From 3a6fc602a06960ff535e980636bd3d32b4f52737 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Wed, 22 Nov 2017 10:13:18 +0100
Subject: [PATCH] Allow explicit control of the diagonal vectorization strategy

---
 python/dune/perftool/options.py               |  3 +++
 python/dune/perftool/sumfact/vectorization.py | 21 +++++++++++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py
index ffd4551d..ed0e007a 100644
--- a/python/dune/perftool/options.py
+++ b/python/dune/perftool/options.py
@@ -59,6 +59,9 @@ class PerftoolOptionsArray(ImmutableRecord):
     vectorize_slice = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization")
     vectorize_diagonal = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization")
     vectorize_greedy = PerftoolOption(default=False, helpstr="the heuristic currently in use (to produce paper numbers)")
+    vectorize_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization")
+    vectorize_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization")
+    vectorize_padding = PerftoolOption(default=None, helpstr="an explicit value for padding in vectorization")
     turn_off_diagonal_jacobian = PerftoolOption(default=False, helpstr="Do not use diagonal_jacobian transformation on the ufl tree and cast result of jacobianInverseTransposed into a FieldMatrix.")
     architecture = PerftoolOption(default="haswell", helpstr="The architecture to optimize for. Possible values: haswell|knl")
     grid_offset = PerftoolOption(default=False, helpstr="Set to true if you want a yasp grid where the lower left corner is not in the origin.")
diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index 303092f1..d6918246 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -124,12 +124,25 @@ def horizontal_vectorization_strategy(sumfacts, width, allow_padding=1):
 
 
 def diagonal_vectorization_strategy(sumfacts, width):
+    # Read explicitly set values
+    horizontal = get_option("vectorize_horizontal")
+    vertical = get_option("vectorize_vertical")
+    padding = get_option("vectorize_padding")
+
     if width == 4:
-        horizontal, vertical = 2, 2
-        padding = 0
+        if horizontal is None:
+            horizontal = 2
+        if vertical is None:
+            vertical = 2
+        if padding is None:
+            padding = 0
     elif width == 8:
-        horizontal, vertical = 4, 2
-        padding = 1
+        if horizontal is None:
+            horizontal = 4
+        if vertical is None:
+            vertical = 2
+        if padding is None:
+            padding = 1
     else:
         raise NotImplementedError
 
-- 
GitLab