From 7ddc18e124cff4760b8ba8b87575104cffe8fc7b Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Thu, 17 Nov 2016 16:06:13 +0100
Subject: [PATCH] sumfact_mass_numdiff compiles successfully

---
 .../loopy/transformations/collect_rotate.py   | 19 +++++++++++++++++--
 python/dune/perftool/pdelab/localoperator.py  |  5 +++--
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py
index bb8b2076..f7184efd 100644
--- a/python/dune/perftool/loopy/transformations/collect_rotate.py
+++ b/python/dune/perftool/loopy/transformations/collect_rotate.py
@@ -77,6 +77,7 @@ def collect_vector_data_rotate(knl, insns, inames):
                                              shape=(vec_size,),
                                              dim_tags="c",
                                              base_storage=quantity + '_base_storage',
+                                             scope=lp.temp_var_scope.PRIVATE,
                                              )
 
         replacemap_arr[quantity] = prim.Subscript(prim.Variable(arrname), (prim.Variable('rotate_index'),))
@@ -94,10 +95,19 @@ def collect_vector_data_rotate(knl, insns, inames):
     for insn in write_insns:
         if isinstance(insn, lp.Assignment):
             new_insns.append(insn.copy(assignee=replacemap_arr[get_pymbolic_basename(insn.assignee)],
+                                       depends_on_is_final=True,
                                        )
                              )
         elif isinstance(insn, lp.CInstruction):
-            pass
+            # Rip apart the code and change the assignee
+            assignee, expression = insn.code.split("=")
+            assignee = assignee.strip()
+            assert assignee in replacemap_arr
+
+            code = "{} ={}".format(str(replacemap_arr[assignee]), expression)
+            new_insns.append(insn.copy(code=code,
+                                       depends_on_is_final=True,
+                                       ))
         else:
             raise NotImplementedError
 
@@ -108,6 +118,7 @@ def collect_vector_data_rotate(knl, insns, inames):
     # Insert a flat consecutive counter 'total_index'
     temporaries['total_index'] = lp.TemporaryVariable('total_index',  # name
                                                       dtype=np.int32,
+                                                      scope=lp.temp_var_scope.PRIVATE,
                                                       )
     new_insns.append(lp.Assignment(prim.Variable("total_index"),  # assignee
                                    0,  # expression
@@ -127,6 +138,7 @@ def collect_vector_data_rotate(knl, insns, inames):
     # Insert a rotating index, that counts 0 , .. , vecsize - 1
     temporaries['rotate_index'] = lp.TemporaryVariable('rotate_index',  # name
                                                        dtype=np.int32,
+                                                       scope=lp.temp_var_scope.PRIVATE,
                                                        )
     new_insns.append(lp.Assignment(prim.Variable("rotate_index"),  # assignee
                                    0,  # expression
@@ -170,10 +182,13 @@ def collect_vector_data_rotate(knl, insns, inames):
         knl = add_vector_view(knl, lhsname)
         lhsname = get_vector_view_name(lhsname)
 
-        new_insns.append(lp.Assignment(prim.Subscript(prim.Variable(lhsname), (prim.FloorDiv(prim.Variable("total_size"), vec_size), prim.Variable(new_iname))),
+        new_insns.append(lp.Assignment(prim.Subscript(prim.Variable(lhsname), (prim.FloorDiv(prim.Variable("total_index"), vec_size), prim.Variable(new_iname))),
                                        substitute(insn.expression, replacemap_vec),
+                                       depends_on=frozenset({"continue_stmt"}),
+                                       depends_on_is_final=True,
                                        within_inames=frozenset(inames + (new_iname,)),
                                        within_inames_is_final=True,
+                                       id=insn.id,
                                        )
                          )
 
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 73a73367..cd64a827 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -501,8 +501,6 @@ def generate_kernel(integrals):
     from loopy import make_reduction_inames_unique
     kernel = make_reduction_inames_unique(kernel)
 
-    kernel = preprocess_kernel(kernel)
-
     # Apply the transformations that were gathered during tree traversals
     for trafo in transformations:
         kernel = trafo[0](kernel, *trafo[1])
@@ -514,6 +512,9 @@ def generate_kernel(integrals):
     preambles = [(i, p) for i, p in enumerate(retrieve_cache_items("preamble"))]
     kernel = kernel.copy(preambles=preambles)
 
+    # Do the loopy preprocessing!
+    kernel = preprocess_kernel(kernel)
+
     # All items with the kernel tags can be destroyed once a kernel has been generated
     from dune.perftool.generation import delete_cache_items
     delete_cache_items("(not file) and (not clazz)")
-- 
GitLab