diff --git a/python/dune/perftool/blockstructured/vectorization.py b/python/dune/perftool/blockstructured/vectorization.py index 7123809c5fc06e32f9a31559f204f8132ee386eb..cef42b962b8a1c7876ccc7ae2914796f0acdbc28 100644 --- a/python/dune/perftool/blockstructured/vectorization.py +++ b/python/dune/perftool/blockstructured/vectorization.py @@ -56,28 +56,33 @@ def add_vcl_vector(knl, iname_inner, iname_outer): # write accum expr as "expr + r" modified_accum_insn = [] replace_accum_insn = dict() + vng = knl.get_var_name_generator() + idg = knl.get_instruction_id_generator() for insn in write_insns: if isinstance(insn, lp.CallInstruction) and isinstance(insn.expression.function, PDELabAccumulationFunction): vec_name, expr = write_insn_to_vec_instance[insn.id] - expr_accum = insn.expression.parameters[-1] - # finde iname, der auf x endet + + # finde micro inames iname_ix = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_x"))) iname_iy = next((i for i in insn.within_inames if i.startswith('micro') and i.endswith("_y"))) # erstelle a[iy] und b - new_vec_temporaries['a_iy'] = DuneTemporaryVariable('a', dtype=np.float64, shape=(2, 4,), managed=True, - scope=lp.temp_var_scope.PRIVATE, dim_tags=('c', 'vec',)) - new_vec_temporaries['b'] = DuneTemporaryVariable('b', dtype=np.float64, shape=(4,), managed=True, - scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',)) + identifier_a = vng('a') + identifier_b = vng('b') + new_vec_temporaries[identifier_a] = DuneTemporaryVariable(identifier_a, dtype=np.float64, shape=(2, 4,), managed=True, + scope=lp.temp_var_scope.PRIVATE, dim_tags=('c', 'vec',)) + new_vec_temporaries[identifier_b] = DuneTemporaryVariable(identifier_b, dtype=np.float64, shape=(4,), managed=True, + scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',)) - a = prim.Subscript(prim.Variable('a'), (prim.Variable(iname_iy), prim.Variable(iname_inner))) - b = prim.Subscript(prim.Variable('b'), (prim.Variable(iname_inner),)) + var_a = prim.Subscript(prim.Variable(identifier_a), (prim.Variable(iname_iy), prim.Variable(iname_inner))) + var_b = prim.Subscript(prim.Variable(identifier_b), (prim.Variable(iname_inner),)) # init a - modified_accum_insn.append(lp.Assignment(assignee=substitute(a, {iname_iy: prim.Variable(iname_iy+'_head')}), + id_init_a = idg('insn_init_'+identifier_a) + modified_accum_insn.append(lp.Assignment(assignee=substitute(var_a, {iname_iy: prim.Variable(iname_iy+'_head')}), expression=0, - id='insn_init_a', + id=id_init_a, within_inames=(insn.within_inames-frozenset({iname_ix, iname_iy, iname_outer})) |frozenset({iname_iy+'_head'}), @@ -86,55 +91,60 @@ def add_vcl_vector(knl, iname_inner, iname_outer): # setze werte für a und b expr_b = substitute(expr_accum, {iname_ix: 1}) - expr_a = prim.Sum((substitute(expr_accum, {iname_ix: 0}), a)) + expr_a = prim.Sum((substitute(expr_accum, {iname_ix: 0}), var_a)) - modified_accum_insn.append(lp.Assignment(assignee=b, + id_set_a = idg('insn_'+identifier_a) + id_set_b = idg('insn_'+identifier_b) + modified_accum_insn.append(lp.Assignment(assignee=var_b, expression=expr_b, - id='insn_b', + id=id_set_b, depends_on=insn.depends_on, within_inames=insn.within_inames-frozenset({iname_ix}), ) ) - modified_accum_insn.append(lp.Assignment(assignee=a, + modified_accum_insn.append(lp.Assignment(assignee=var_a, expression=expr_a, - id='insn_a_iy', - depends_on=insn.depends_on|frozenset({'insn_init_a'}), + id=id_set_a, + depends_on=insn.depends_on|frozenset({id_init_a}), within_inames=insn.within_inames-frozenset({iname_ix}), ) ) # r+=a[iy] + id_accum = idg('insn_mod_accum') r_vec = prim.Subscript(prim.Variable(vec_name),(prim.Variable(iname_inner),)) - expr_accum_mod = prim.Sum((a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (b,)), r_vec)) + expr_accum_mod = prim.Sum((var_a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (var_b,)), r_vec)) replace_accum_insn[insn.id] = lp.Assignment(assignee=r_vec, expression=expr_accum_mod, - id='insn_mod_accum', - depends_on=insn.depends_on|frozenset({'insn_b', 'insn_init_a', - 'insn_a_iy'}), + id=id_accum, + depends_on=insn.depends_on|frozenset({id_set_a, id_set_b, + id_init_a}), within_inames=insn.within_inames-frozenset({iname_ix}) ) # a[iy] = permute - expr_permute = prim.Call(prim.Variable('permute4d<3,-1,-1,-1>'), (b,)) - modified_accum_insn.append(lp.Assignment(assignee=a, + id_permute = idg('insn_permute') + expr_permute = prim.Call(prim.Variable('permute4d<3,-1,-1,-1>'), (var_b,)) + modified_accum_insn.append(lp.Assignment(assignee=var_a, expression=expr_permute, - id='insn_permute', + id=id_permute, depends_on=replace_accum_insn[insn.id].depends_on - |frozenset({replace_accum_insn[insn.id].id, "insn_b"}), + |frozenset({replace_accum_insn[insn.id].id}), within_inames=insn.within_inames-frozenset({iname_ix}) ) ) # tail handling + id_accum_tail = idg('insn_accum_tail') subst_map = {iname_inner: 0, iname_outer: get_option("number_of_blocks")/4, iname_iy: prim.Variable(iname_iy+'_tail'), iname_ix: 0} expr_tail = prim.Call(expr.function, tuple(substitute(p, subst_map) for p in expr.parameters[:-1]) - + (prim.Subscript(prim.Variable('a'), (prim.Variable(iname_iy+'_tail'), 0)),)) + + (prim.Subscript(prim.Variable(identifier_a), (prim.Variable(iname_iy+'_tail'), 0)),)) modified_accum_insn.append(lp.CallInstruction(assignees=(), expression=expr_tail, - id='insn_tail', + id=id_accum_tail, depends_on=frozenset({replace_accum_insn[insn.id].id, - 'insn_permute', 'insn_a_iy', 'insn_a_init'}), + id_permute, id_set_a, id_init_a}), within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer, iname_ix, iname_iy})) | frozenset({iname_iy+'_tail'}))) @@ -159,7 +169,8 @@ def add_vcl_vector(knl, iname_inner, iname_outer): code = "{}.load(&{}({}, {}));".format(vec, expr.function.name, expr.parameters[0], index) within_inames = insn.within_inames|insn.reduction_inames() - load_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_load') + # load_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_load') + load_id = idg('insn_'+vec+'_load') load_insns.append(lp.CInstruction(iname_exprs=[], code=code, within_inames=within_inames, #assignees=(lp.Variable(vec), ), # sonst denkt looy das müsste ein array von Vec4d sein... @@ -178,7 +189,8 @@ def add_vcl_vector(knl, iname_inner, iname_outer): # add store instruction code = "{}.store(&{}.container()({}, {}));".format(vec, expr.function.accumobj, expr.parameters[0], index) - store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store') + # store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store') + store_id = idg('insn_'+vec+'_store') store_insns.append(lp.CInstruction(iname_exprs=[], code=code, within_inames=insn.within_inames-frozenset({iname_ix}), depends_on=insn.depends_on @@ -193,7 +205,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer): else: if insn.id in replace_accum_insn: new_insn = replace_accum_insn[insn.id].copy(depends_on=replace_accum_insn[insn.id].depends_on - |read_dependencies[insn.id]) + | read_dependencies[insn.id]) else: subst_map = dict() for vec, expr in read_insn_to_vec_instance[insn.id]: @@ -214,7 +226,7 @@ def add_vcl_vector(knl, iname_inner, iname_outer): new_insn = insn if insn in read_insns: new_insn = new_insn.copy(expression=NodeSubstitutor()(new_insn.expression), - depends_on=new_insn.depends_on|read_dependencies[insn.id]) + depends_on=new_insn.depends_on | read_dependencies[insn.id]) if insn in write_insns: new_insn = new_insn.copy(assignee=NodeSubstitutor()(new_insn.assignee)) new_insns.append(new_insn) @@ -230,8 +242,7 @@ def find_accumulation_inames(knl): if isinstance(insn, lp.CallInstruction) and isinstance(insn.expression.function, PDELabAccumulationFunction): inames |= insn.within_inames - inames = set((i for i in inames if i.startswith('micro'))) - inames = set((i for i in inames if not i.endswith('_x'))) + inames = set((i for i in inames if i.startswith('micro') and not i.endswith('_x'))) return inames @@ -246,5 +257,4 @@ def vectorize_micro_elements(knl): knl = lp.duplicate_inames(knl, iname, Not(All()), suffix='_head') knl = lp.split_iname(knl,"subel_x",4, inner_tag='vec') knl = add_vcl_vector(knl,'subel_x_inner', 'subel_x_outer') - print(knl) return knl