From 24391f126f982101e0c4260a23b1db334fdbce0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de>
Date: Fri, 19 Oct 2018 16:36:11 +0200
Subject: [PATCH] Choose vectorization strategy based on cost permuted matrix
 sequence

---
 python/dune/codegen/sumfact/symbolic.py | 17 ++++++++++++-----
 test/sumfact/poisson/poisson_dg_3d.mini |  1 +
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/python/dune/codegen/sumfact/symbolic.py b/python/dune/codegen/sumfact/symbolic.py
index 24f92455..8ea27280 100644
--- a/python/dune/codegen/sumfact/symbolic.py
+++ b/python/dune/codegen/sumfact/symbolic.py
@@ -376,7 +376,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
     @property
     def parallel_key(self):
         """ A key that identifies parallellizable kernels. """
-        return tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer, self.interface.within_inames)
+        # TODO: For now we do not vectorize SumfactKernels with different
+        # quadrature_permutation. This should be handled like upper/lower
+        # vectorization
+        return self.quadrature_permutation + tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer, self.interface.within_inames)
 
     @property
     def cache_key(self):
@@ -576,8 +579,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
     def operations(self):
         """ The total number of floating point operations for the kernel
         to be carried out """
-        from dune.codegen.sumfact.permutation import flop_cost
-        return flop_cost(self.permuted_matrix_sequence)
+        from dune.codegen.sumfact.permutation import flop_cost, sumfact_permutation_strategy, permute_forward
+        perm = sumfact_permutation_strategy(self)
+        permuted_matrix_sequence_cost = permute_forward(self.matrix_sequence, perm)
+        return flop_cost(permuted_matrix_sequence_cost)
 
 
 # Extract the argument list and store it on the class. This needs to be done
@@ -881,5 +886,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
     def operations(self):
         """ The total number of floating point operations for the kernel
         to be carried out """
-        from dune.codegen.sumfact.permutation import flop_cost
-        return flop_cost(self.matrix_sequence)
+        from dune.codegen.sumfact.permutation import flop_cost, sumfact_permutation_strategy, permute_forward
+        perm = sumfact_permutation_strategy(self)
+        permuted_matrix_sequence_cost = permute_forward(self.matrix_sequence, perm)
+        return flop_cost(permuted_matrix_sequence_cost)
diff --git a/test/sumfact/poisson/poisson_dg_3d.mini b/test/sumfact/poisson/poisson_dg_3d.mini
index f0b4ef26..91556ff0 100644
--- a/test/sumfact/poisson/poisson_dg_3d.mini
+++ b/test/sumfact/poisson/poisson_dg_3d.mini
@@ -15,6 +15,7 @@ extension = vtu
 
 [formcompiler]
 compare_l2errorsquared = 1e-4, 5e-6 | expand deg
+debug_interpolate_input = 1
 
 [formcompiler.r]
 numerical_jacobian = 1, 0 | expand num
-- 
GitLab