From 161d10b4f94dbbef2a43c52d905d44ff96a2f9c0 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@uni-muenster.de>
Date: Thu, 14 Feb 2019 12:10:52 +0100
Subject: [PATCH] (unvectorized) tail works with vectorization

---
 .../codegen/blockstructured/vectorization.py  | 28 +++++++++----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py
index 9afe9baf..95298bb1 100644
--- a/python/dune/codegen/blockstructured/vectorization.py
+++ b/python/dune/codegen/blockstructured/vectorization.py
@@ -2,7 +2,7 @@ import loopy as lp
 import numpy as np
 import pymbolic.primitives as prim
 
-from loopy.match import Tagged, Id, Writes, Or, Iname
+from loopy.match import Tagged, Id, Writes, And, Or, Iname, All
 from islpy import BasicSet
 
 from dune.codegen.generation import get_global_context_value
@@ -172,8 +172,8 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
 
 def add_vcl_access(knl, iname_inner):
     from loopy.match import Reads, Tagged
-    accum_insns = set((insn.id for insn in lp.find_instructions(knl, Tagged('accum'))))
-    read_insns = set((insn.id for insn in lp.find_instructions(knl, Reads('*alias'))))
+    accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner))))))
+    read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner))))))
     vectorized_insns = accum_insns | read_insns
 
     from loopy.symbolic import CombineMapper
@@ -260,37 +260,35 @@ def add_vcl_access(knl, iname_inner):
     dim = world_dimension()
     dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)]
     # remove CInstructions since loopy extract expects to get only assignments
-    knl_without_cinsn = knl.copy(instructions=[insn for insn in knl.instructions
-                                               if not isinstance(insn, lp.CInstruction)])
+    knl_with_subst_insns = knl.copy(instructions=[insn for insn in lp.find_instructions(knl, Iname(iname_inner))
+                                                  if not isinstance(insn, lp.CInstruction)])
     for alias in vector_alias:
         # Rename lhs which would match the substitution rule since loopy doesn't want substitutions as lhs
         new_insns = []
-        for insn in knl_without_cinsn.instructions:
+        for insn in knl_with_subst_insns.instructions:
             if isinstance(insn, lp.Assignment) and isinstance(insn.assignee, prim.Subscript):
                 if insn.assignee.aggregate.name == alias:
                     new_insns.append(insn.copy(assignee=prim.Subscript(prim.Variable('dummy_' + alias),
                                                                        insn.assignee.index_tuple)))
-                    pass
                 else:
                     new_insns.append(insn)
             else:
                 new_insns.append(insn)
-        knl_without_cinsn = knl_without_cinsn.copy(instructions=new_insns)
+        knl_with_subst_insns = knl_with_subst_insns.copy(instructions=new_insns)
 
         # substitution rule for alias[ex_outer,ex_inner, ey, ix, iy] -> vec[ex_inner]
         parameters = 'ex_o,ex_i,' + ','.join(['e' + d for d in dim_names[1:dim]]) + \
                      ',ix,' + ','.join(['i' + d for d in dim_names[1:dim]])
-        knl_without_cinsn = lp.extract_subst(knl_without_cinsn, alias + '_subst', '{}[{}]'.format(alias, parameters),
+        knl_with_subst_insns = lp.extract_subst(knl_with_subst_insns, alias + '_subst', '{}[{}]'.format(alias, parameters),
                                              parameters=parameters)
-        new_subst = knl_without_cinsn.substitutions.copy()
+        new_subst = knl_with_subst_insns.substitutions.copy()
         rule = new_subst[alias + '_subst']
         rule.expression = prim.Subscript(prim.Variable(alias.replace('alias', 'vec')), (prim.Variable('ex_i'),))
-        knl_without_cinsn = knl_without_cinsn.copy(substitutions=new_subst)
+        knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst)
 
-    from loopy.match import All
-    knl_without_cinsn = lp.expand_subst(knl_without_cinsn, All())
-    knl = knl_without_cinsn.copy(instructions=knl_without_cinsn.instructions + [insn for insn in knl.instructions
-                                                                                if isinstance(insn, lp.CInstruction)])
+    knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner))
+    knl = knl.copy(instructions=knl_with_subst_insns.instructions +
+                                [insn for insn in knl.instructions if insn.id not in knl_with_subst_insns.id_to_insn])
 
     # add store and load dependencies and set right accumulation assignee
     new_insns = []
-- 
GitLab