From ebad5755515c7ce7a721c05fd7e0d1f956304ad8 Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Fri, 26 Jan 2018 15:11:56 +0100 Subject: [PATCH] fix cherry picks --- .../perftool/blockstructured/vectorization.py | 65 +++++++++---------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/python/dune/perftool/blockstructured/vectorization.py b/python/dune/perftool/blockstructured/vectorization.py index 6eba316b..18f8fb02 100644 --- a/python/dune/perftool/blockstructured/vectorization.py +++ b/python/dune/perftool/blockstructured/vectorization.py @@ -49,14 +49,14 @@ def add_vcl_vector(knl, iname_inner, iname_outer): new_vec_temporaries[vec] = DuneTemporaryVariable(vec, dtype=np.float64, shape=(4,), managed=True, scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',)) - # write accum expr as "expr + r" modified_accum_insn = [] replace_accum_insn = dict() vng = knl.get_var_name_generator() idg = knl.get_instruction_id_generator() for insn in write_insns: if isinstance(insn, lp.Assignment): - expr_without_r = prim.Sum(tuple(e for e in insn.expression.children if not e ==insn.assignee)) + # write accum expr as "r = expr + r" + expr_without_r = prim.Sum(tuple(e for e in insn.expression.children if not e == insn.assignee)) if expr_without_r == insn.expression: continue # finde micro inames @@ -109,12 +109,11 @@ def add_vcl_vector(knl, iname_inner, iname_outer): # r+=a[iy] id_accum = idg('insn_mod_accum') expr_accum = prim.Sum((var_a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (var_b,)), - substitute(insn.assignee,{iname_ix:0}))) + substitute(insn.assignee, {iname_ix:0}))) replace_accum_insn[insn.id] = lp.Assignment(assignee=substitute(insn.assignee,{iname_ix:0}), expression=expr_accum, - id='insn_mod_accum', - depends_on=insn.depends_on|frozenset({'insn_b', 'insn_init_a', - 'insn_a_iy'}), + id=id_accum, + depends_on=insn.depends_on|frozenset({id_set_a,id_init_a,id_set_b}), within_inames=insn.within_inames-frozenset({iname_ix}) ) # a[iy] = permute @@ -189,41 +188,39 @@ def add_vcl_vector(knl, iname_inner, iname_outer): # flat index without vec iname strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags) - index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner:0}).index_tuple, strides))) - - # add store instruction - code = "{}.store({}+ {});".format(vec, alias, index) - #store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store') - store_id = idg('insn_'+vec+'_store') - store_insns.append(lp.CInstruction(iname_exprs=[], code=code,read_variables=frozenset({alias}), - within_inames=insn.within_inames, - depends_on=insn.depends_on - | frozenset({insn.id}) - | read_dependencies[insn.id], - id=store_id)) + index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner: 0, iname_ix: 0}).index_tuple, strides))) + + # add store instruction + code = "{}.store({} + {});".format(vec, alias, index) + #store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store') + store_id = idg('insn_'+vec+'_store') + store_insns.append(lp.CInstruction(iname_exprs=[], code=code,read_variables=frozenset({alias}), + within_inames=insn.within_inames, + depends_on=insn.depends_on + | frozenset({insn.id}) + | read_dependencies[insn.id], + id=store_id)) # exchange alias for vector new_insns = [] for insn in knl.instructions: + insn = replace_accum_insn.get(insn.id, insn) if insn.id not in read_insn_to_vec_instance.keys() | write_insn_to_vec_instance.keys(): new_insns.append(insn) else: - if insn.id in replace_accum_insn: - new_insn = replace_accum_insn[insn.id].copy(depends_on=replace_accum_insn[insn.id].depends_on - | read_dependencies[insn.id]) - else: - subst_map = dict() - for vec, expr in read_insn_to_vec_instance[insn.id]: - subst_map[expr] = prim.Subscript(prim.Variable(vec), (prim.Variable(iname_inner),)) - - new_insn = insn - - if insn in read_insns: - new_insn = new_insn.copy(expression=substitute(new_insn.expression, subst_map), - depends_on=new_insn.depends_on | read_dependencies[insn.id]) - if insn in write_insns: - new_insn = new_insn.copy(assignee=substitute(new_insn.assignee, subst_map)) - new_insns.append(new_insn) + subst_map = dict() + for vec, expr in read_insn_to_vec_instance[insn.id]: + subst_map[expr] = prim.Subscript(prim.Variable(vec), (prim.Variable(iname_inner),)) + + new_insn = insn + + if insn in read_insns: + new_insn = new_insn.copy(expression=substitute(new_insn.expression, subst_map), + depends_on=new_insn.depends_on | read_dependencies[insn.id]) + if insn in write_insns: + new_insn = new_insn.copy(assignee=substitute(new_insn.assignee, subst_map)) + + new_insns.append(new_insn) from loopy.kernel.creation import resolve_dependencies return resolve_dependencies(knl.copy(instructions=new_insns+load_insns+store_insns+modified_accum_insn, -- GitLab