Skip to content
Snippets Groups Projects
Commit 822ce8e3 authored by Marcel Koch's avatar Marcel Koch
Browse files

allow for more than one accumulation call

parent 0f8daff7
No related branches found
No related tags found
No related merge requests found
......@@ -56,28 +56,33 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
# write accum expr as "expr + r"
modified_accum_insn = []
replace_accum_insn = dict()
vng = knl.get_var_name_generator()
idg = knl.get_instruction_id_generator()
for insn in write_insns:
if isinstance(insn, lp.CallInstruction) and isinstance(insn.expression.function, PDELabAccumulationFunction):
vec_name, expr = write_insn_to_vec_instance[insn.id]
expr_accum = insn.expression.parameters[-1]
# finde iname, der auf x endet
# finde micro inames
iname_ix = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_x")))
iname_iy = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_y")))
# erstelle a[iy] und b
new_vec_temporaries['a_iy'] = DuneTemporaryVariable('a', dtype=np.float64, shape=(2, 4,), managed=True,
scope=lp.temp_var_scope.PRIVATE, dim_tags=('c', 'vec',))
new_vec_temporaries['b'] = DuneTemporaryVariable('b', dtype=np.float64, shape=(4,), managed=True,
scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',))
identifier_a = vng('a')
identifier_b = vng('b')
new_vec_temporaries[identifier_a] = DuneTemporaryVariable(identifier_a, dtype=np.float64, shape=(2, 4,), managed=True,
scope=lp.temp_var_scope.PRIVATE, dim_tags=('c', 'vec',))
new_vec_temporaries[identifier_b] = DuneTemporaryVariable(identifier_b, dtype=np.float64, shape=(4,), managed=True,
scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',))
a = prim.Subscript(prim.Variable('a'), (prim.Variable(iname_iy), prim.Variable(iname_inner)))
b = prim.Subscript(prim.Variable('b'), (prim.Variable(iname_inner),))
var_a = prim.Subscript(prim.Variable(identifier_a), (prim.Variable(iname_iy), prim.Variable(iname_inner)))
var_b = prim.Subscript(prim.Variable(identifier_b), (prim.Variable(iname_inner),))
# init a
modified_accum_insn.append(lp.Assignment(assignee=substitute(a, {iname_iy: prim.Variable(iname_iy+'_head')}),
id_init_a = idg('insn_init_'+identifier_a)
modified_accum_insn.append(lp.Assignment(assignee=substitute(var_a, {iname_iy: prim.Variable(iname_iy+'_head')}),
expression=0,
id='insn_init_a',
id=id_init_a,
within_inames=(insn.within_inames-frozenset({iname_ix, iname_iy,
iname_outer}))
|frozenset({iname_iy+'_head'}),
......@@ -86,55 +91,60 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
# setze werte für a und b
expr_b = substitute(expr_accum, {iname_ix: 1})
expr_a = prim.Sum((substitute(expr_accum, {iname_ix: 0}), a))
expr_a = prim.Sum((substitute(expr_accum, {iname_ix: 0}), var_a))
modified_accum_insn.append(lp.Assignment(assignee=b,
id_set_a = idg('insn_'+identifier_a)
id_set_b = idg('insn_'+identifier_b)
modified_accum_insn.append(lp.Assignment(assignee=var_b,
expression=expr_b,
id='insn_b',
id=id_set_b,
depends_on=insn.depends_on,
within_inames=insn.within_inames-frozenset({iname_ix}),
)
)
modified_accum_insn.append(lp.Assignment(assignee=a,
modified_accum_insn.append(lp.Assignment(assignee=var_a,
expression=expr_a,
id='insn_a_iy',
depends_on=insn.depends_on|frozenset({'insn_init_a'}),
id=id_set_a,
depends_on=insn.depends_on|frozenset({id_init_a}),
within_inames=insn.within_inames-frozenset({iname_ix}),
)
)
# r+=a[iy]
id_accum = idg('insn_mod_accum')
r_vec = prim.Subscript(prim.Variable(vec_name),(prim.Variable(iname_inner),))
expr_accum_mod = prim.Sum((a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (b,)), r_vec))
expr_accum_mod = prim.Sum((var_a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (var_b,)), r_vec))
replace_accum_insn[insn.id] = lp.Assignment(assignee=r_vec,
expression=expr_accum_mod,
id='insn_mod_accum',
depends_on=insn.depends_on|frozenset({'insn_b', 'insn_init_a',
'insn_a_iy'}),
id=id_accum,
depends_on=insn.depends_on|frozenset({id_set_a, id_set_b,
id_init_a}),
within_inames=insn.within_inames-frozenset({iname_ix})
)
# a[iy] = permute
expr_permute = prim.Call(prim.Variable('permute4d<3,-1,-1,-1>'), (b,))
modified_accum_insn.append(lp.Assignment(assignee=a,
id_permute = idg('insn_permute')
expr_permute = prim.Call(prim.Variable('permute4d<3,-1,-1,-1>'), (var_b,))
modified_accum_insn.append(lp.Assignment(assignee=var_a,
expression=expr_permute,
id='insn_permute',
id=id_permute,
depends_on=replace_accum_insn[insn.id].depends_on
|frozenset({replace_accum_insn[insn.id].id, "insn_b"}),
|frozenset({replace_accum_insn[insn.id].id}),
within_inames=insn.within_inames-frozenset({iname_ix})
)
)
# tail handling
id_accum_tail = idg('insn_accum_tail')
subst_map = {iname_inner: 0, iname_outer: get_option("number_of_blocks")/4, iname_iy: prim.Variable(iname_iy+'_tail'),
iname_ix: 0}
expr_tail = prim.Call(expr.function, tuple(substitute(p, subst_map) for p in expr.parameters[:-1])
+ (prim.Subscript(prim.Variable('a'), (prim.Variable(iname_iy+'_tail'), 0)),))
+ (prim.Subscript(prim.Variable(identifier_a), (prim.Variable(iname_iy+'_tail'), 0)),))
modified_accum_insn.append(lp.CallInstruction(assignees=(),
expression=expr_tail,
id='insn_tail',
id=id_accum_tail,
depends_on=frozenset({replace_accum_insn[insn.id].id,
'insn_permute', 'insn_a_iy', 'insn_a_init'}),
id_permute, id_set_a, id_init_a}),
within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer,
iname_ix, iname_iy}))
| frozenset({iname_iy+'_tail'})))
......@@ -159,7 +169,8 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
code = "{}.load(&{}({}, {}));".format(vec, expr.function.name, expr.parameters[0], index)
within_inames = insn.within_inames|insn.reduction_inames()
load_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_load')
# load_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_load')
load_id = idg('insn_'+vec+'_load')
load_insns.append(lp.CInstruction(iname_exprs=[], code=code,
within_inames=within_inames,
#assignees=(lp.Variable(vec), ), # sonst denkt looy das müsste ein array von Vec4d sein...
......@@ -178,7 +189,8 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
# add store instruction
code = "{}.store(&{}.container()({}, {}));".format(vec, expr.function.accumobj, expr.parameters[0], index)
store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store')
# store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store')
store_id = idg('insn_'+vec+'_store')
store_insns.append(lp.CInstruction(iname_exprs=[], code=code,
within_inames=insn.within_inames-frozenset({iname_ix}),
depends_on=insn.depends_on
......@@ -193,7 +205,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
else:
if insn.id in replace_accum_insn:
new_insn = replace_accum_insn[insn.id].copy(depends_on=replace_accum_insn[insn.id].depends_on
|read_dependencies[insn.id])
| read_dependencies[insn.id])
else:
subst_map = dict()
for vec, expr in read_insn_to_vec_instance[insn.id]:
......@@ -214,7 +226,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
new_insn = insn
if insn in read_insns:
new_insn = new_insn.copy(expression=NodeSubstitutor()(new_insn.expression),
depends_on=new_insn.depends_on|read_dependencies[insn.id])
depends_on=new_insn.depends_on | read_dependencies[insn.id])
if insn in write_insns:
new_insn = new_insn.copy(assignee=NodeSubstitutor()(new_insn.assignee))
new_insns.append(new_insn)
......@@ -230,8 +242,7 @@ def find_accumulation_inames(knl):
if isinstance(insn, lp.CallInstruction) and isinstance(insn.expression.function, PDELabAccumulationFunction):
inames |= insn.within_inames
inames = set((i for i in inames if i.startswith('micro')))
inames = set((i for i in inames if not i.endswith('_x')))
inames = set((i for i in inames if i.startswith('micro') and not i.endswith('_x')))
return inames
......@@ -246,5 +257,4 @@ def vectorize_micro_elements(knl):
knl = lp.duplicate_inames(knl, iname, Not(All()), suffix='_head')
knl = lp.split_iname(knl,"subel_x",4, inner_tag='vec')
knl = add_vcl_vector(knl,'subel_x_inner', 'subel_x_outer')
print(knl)
return knl
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment