From 6f0041b59306bcee04fb9d02b596f75443f1b861 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Wed, 7 Dec 2016 13:28:25 +0100
Subject: [PATCH] Last fixes to combined vectorization

---
 dune/perftool/sumfact/transposereg.hh                        | 3 ++-
 python/dune/perftool/loopy/transformations/collect_rotate.py | 4 ++--
 python/dune/perftool/sumfact/sumfact.py                      | 4 +++-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/dune/perftool/sumfact/transposereg.hh b/dune/perftool/sumfact/transposereg.hh
index 7efecee7..81cdbbc8 100644
--- a/dune/perftool/sumfact/transposereg.hh
+++ b/dune/perftool/sumfact/transposereg.hh
@@ -1,11 +1,12 @@
 #ifndef DUNE_PERFTOOL_SUMFACT_TRANSPOSEREG_HH
 #define DUNE_PERFTOOL_SUMFACT_TRANSPOSEREG_HH
 
+#include<dune/perftool/vectorclass/vectorclass.h>
+
 /**
  * Transpose a 4x4 matrix given by 4 vector registers (efficiently)
  */
 
-
 void transpose_reg(Vec4d& a0, Vec4d& a1, Vec4d& a2, Vec4d& a3)
 {
   Vec4d b0,b1,b2,b3;
diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py
index f585c79e..409b4c2a 100644
--- a/python/dune/perftool/loopy/transformations/collect_rotate.py
+++ b/python/dune/perftool/loopy/transformations/collect_rotate.py
@@ -61,7 +61,8 @@ def collect_vector_data_rotate(knl):
 
     new_insns = []
     all_writers = []
-    rotating = False
+    tags = frozenset().union(*tuple(i.tags for i in insns))
+    rotating = "gradvec" in tags
 
     #
     # Inspect the given instructions for dependent quantities
@@ -156,7 +157,6 @@ def collect_vector_data_rotate(knl):
 
                 # Add substitution rules
                 for expr in quantities[quantity]:
-                    rotating = True
                     assert isinstance(expr, prim.Subscript)
                     last_index = expr.index[-1]
                     assert last_index in tuple(range(4))
diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index d6f11c77..665fe313 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -153,8 +153,10 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
             shape = shape + (4,)
             dim_tags = dim_tags + ",c"
             index = (index,)
+            vectag = frozenset({"gradvec"})
         else:
             index = ()
+            vectag = frozenset()
 
         temp = initialize_buffer(buffer,
                                  base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
@@ -183,7 +185,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                                   expression=expression,
                                   forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
                                   forced_iname_deps_is_final=True,
-                                  tags=frozenset({"quadvec"}),
+                                  tags=frozenset({"quadvec"}).union(vectag),
                                   depends_on=frozenset({deps})
                                   )
 
-- 
GitLab