From ebad5755515c7ce7a721c05fd7e0d1f956304ad8 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@uni-muenster.de>
Date: Fri, 26 Jan 2018 15:11:56 +0100
Subject: [PATCH] fix cherry picks

---
 .../perftool/blockstructured/vectorization.py | 65 +++++++++----------
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/python/dune/perftool/blockstructured/vectorization.py b/python/dune/perftool/blockstructured/vectorization.py
index 6eba316b..18f8fb02 100644
--- a/python/dune/perftool/blockstructured/vectorization.py
+++ b/python/dune/perftool/blockstructured/vectorization.py
@@ -49,14 +49,14 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
         new_vec_temporaries[vec] = DuneTemporaryVariable(vec, dtype=np.float64, shape=(4,), managed=True,
                                                          scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',))
 
-    # write accum expr as "expr + r"
     modified_accum_insn = []
     replace_accum_insn = dict()
     vng = knl.get_var_name_generator()
     idg = knl.get_instruction_id_generator()
     for insn in write_insns:
         if isinstance(insn, lp.Assignment):
-            expr_without_r = prim.Sum(tuple(e for e in insn.expression.children if not e ==insn.assignee))
+            # write accum expr as "r = expr + r"
+            expr_without_r = prim.Sum(tuple(e for e in insn.expression.children if not e == insn.assignee))
             if expr_without_r == insn.expression:
                 continue
             # finde micro inames
@@ -109,12 +109,11 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
             # r+=a[iy]
             id_accum = idg('insn_mod_accum')
             expr_accum = prim.Sum((var_a, prim.Call(prim.Variable('permute4d<-1,0,1,2>'), (var_b,)),
-                                   substitute(insn.assignee,{iname_ix:0})))
+                                   substitute(insn.assignee, {iname_ix:0})))
             replace_accum_insn[insn.id] = lp.Assignment(assignee=substitute(insn.assignee,{iname_ix:0}),
                                                         expression=expr_accum,
-                                                        id='insn_mod_accum',
-                                                        depends_on=insn.depends_on|frozenset({'insn_b', 'insn_init_a',
-                                                                                              'insn_a_iy'}),
+                                                        id=id_accum,
+                                                        depends_on=insn.depends_on|frozenset({id_set_a,id_init_a,id_set_b}),
                                                         within_inames=insn.within_inames-frozenset({iname_ix})
                                                         )
             # a[iy] = permute
@@ -189,41 +188,39 @@ def add_vcl_vector(knl, iname_inner, iname_outer):
 
             # flat index without vec iname
             strides = tuple(tag.stride for tag in knl.temporary_variables[alias].dim_tags)
-            index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner:0}).index_tuple, strides)))
-
-        # add store instruction
-        code = "{}.store({}+ {});".format(vec, alias, index)
-        #store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store')
-        store_id = idg('insn_'+vec+'_store')
-        store_insns.append(lp.CInstruction(iname_exprs=[], code=code,read_variables=frozenset({alias}),
-                                           within_inames=insn.within_inames,
-                                           depends_on=insn.depends_on
-                                                      | frozenset({insn.id})
-                                                      | read_dependencies[insn.id],
-                                           id=store_id))
+            index = prim.Sum(tuple(prim.Product(z) for z in zip(substitute(expr, {iname_inner: 0, iname_ix: 0}).index_tuple, strides)))
+
+            # add store instruction
+            code = "{}.store({} + {});".format(vec, alias, index)
+            #store_id = knl.make_unique_instruction_id(insns=knl.instructions+load_insns, based_on='insn_'+vec+'_store')
+            store_id = idg('insn_'+vec+'_store')
+            store_insns.append(lp.CInstruction(iname_exprs=[], code=code,read_variables=frozenset({alias}),
+                                               within_inames=insn.within_inames,
+                                               depends_on=insn.depends_on
+                                                          | frozenset({insn.id})
+                                                          | read_dependencies[insn.id],
+                                               id=store_id))
 
     # exchange alias for vector
     new_insns = []
     for insn in knl.instructions:
+        insn = replace_accum_insn.get(insn.id, insn)
         if insn.id not in read_insn_to_vec_instance.keys() | write_insn_to_vec_instance.keys():
             new_insns.append(insn)
         else:
-            if insn.id in replace_accum_insn:
-                new_insn = replace_accum_insn[insn.id].copy(depends_on=replace_accum_insn[insn.id].depends_on
-                                                                       | read_dependencies[insn.id])
-            else:
-                subst_map = dict()
-                for vec, expr in read_insn_to_vec_instance[insn.id]:
-                    subst_map[expr] = prim.Subscript(prim.Variable(vec), (prim.Variable(iname_inner),))
-
-                new_insn = insn
-
-                if insn in read_insns:
-                    new_insn = new_insn.copy(expression=substitute(new_insn.expression, subst_map),
-                                             depends_on=new_insn.depends_on | read_dependencies[insn.id])
-                if insn in write_insns:
-                    new_insn = new_insn.copy(assignee=substitute(new_insn.assignee, subst_map))
-                new_insns.append(new_insn)
+            subst_map = dict()
+            for vec, expr in read_insn_to_vec_instance[insn.id]:
+                subst_map[expr] = prim.Subscript(prim.Variable(vec), (prim.Variable(iname_inner),))
+
+            new_insn = insn
+
+            if insn in read_insns:
+                new_insn = new_insn.copy(expression=substitute(new_insn.expression, subst_map),
+                                         depends_on=new_insn.depends_on | read_dependencies[insn.id])
+            if insn in write_insns:
+                new_insn = new_insn.copy(assignee=substitute(new_insn.assignee, subst_map))
+
+            new_insns.append(new_insn)
 
     from loopy.kernel.creation import resolve_dependencies
     return resolve_dependencies(knl.copy(instructions=new_insns+load_insns+store_insns+modified_accum_insn,
-- 
GitLab