From 402f9e8ca4bd32e4e9d823fbf6add4c16eff675e Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Wed, 6 Dec 2017 15:47:56 +0100
Subject: [PATCH] More adaptations

---
 .../knl/poisson_dg/knl_poisson_dg.mini        |  4 +-
 applications/knl/poisson_dg/verify.mini       |  4 +-
 .../knl_poisson_dg_tensor.mini                |  4 +-
 .../knl/poisson_dg_tensor/verify.mini         |  4 +-
 applications/poisson_dg/poisson_dg.mini       |  2 +-
 applications/poisson_dg_tensor/CMakeLists.txt |  2 -
 .../poisson_dg_tensor/poisson_dg_tensor.mini  |  2 +-
 .../poisson_dg_tensor/sliced/CMakeLists.txt   |  5 --
 .../poisson_dg_tensor/sliced/sliced.mini      | 49 -------------------
 applications/stokes_dg/stokes_dg.mini         |  2 +-
 python/dune/perftool/sumfact/vectorization.py |  8 +--
 test/sumfact/mass/sliced.mini                 |  4 +-
 test/sumfact/poisson/diagonal.mini            |  4 +-
 test/sumfact/poisson/sliced.mini              |  4 +-
 14 files changed, 29 insertions(+), 69 deletions(-)
 delete mode 100644 applications/poisson_dg_tensor/sliced/CMakeLists.txt
 delete mode 100644 applications/poisson_dg_tensor/sliced/sliced.mini

diff --git a/applications/knl/poisson_dg/knl_poisson_dg.mini b/applications/knl/poisson_dg/knl_poisson_dg.mini
index 0d8380d8..78b2fe03 100644
--- a/applications/knl/poisson_dg/knl_poisson_dg.mini
+++ b/applications/knl/poisson_dg/knl_poisson_dg.mini
@@ -39,7 +39,9 @@ extension = vtu
 fastdg = 1
 sumfact = 1
 vectorization_quadloop = 1
-vectorize_diagonal = 1
+vectorization_strategy = explicit
+vectorization_horizontal = 4
+vectorization_vertical = 2
 instrumentation_level = 2, 3, 4 | expand
 opcounter = 1, 0 | expand opcount
 time_opcounter = 0, 1 | expand opcount
diff --git a/applications/knl/poisson_dg/verify.mini b/applications/knl/poisson_dg/verify.mini
index 07c5074b..d6dcdc85 100644
--- a/applications/knl/poisson_dg/verify.mini
+++ b/applications/knl/poisson_dg/verify.mini
@@ -12,7 +12,9 @@ extension = vtu
 fastdg = 1
 sumfact = 1
 vectorization_quadloop = 1
-vectorize_diagonal = 1
+vectorization_strategy = explicit
+vectorization_horizontal = 4
+vectorization_vertical = 2
 quadrature_order = 6
 architecture = knl
 
diff --git a/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini b/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini
index 95c6368f..d7dd0166 100644
--- a/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini
+++ b/applications/knl/poisson_dg_tensor/knl_poisson_dg_tensor.mini
@@ -39,7 +39,9 @@ extension = vtu
 fastdg = 1
 sumfact = 1
 vectorization_quadloop = 1
-vectorize_diagonal = 1
+vectorization_strategy = explicit
+vectorization_horizontal = 4
+vectorization_vertical = 2
 instrumentation_level = 2, 3, 4 | expand
 opcounter = 1, 0 | expand opcount
 time_opcounter = 0, 1 | expand opcount
diff --git a/applications/knl/poisson_dg_tensor/verify.mini b/applications/knl/poisson_dg_tensor/verify.mini
index 029acbe5..c2447c07 100644
--- a/applications/knl/poisson_dg_tensor/verify.mini
+++ b/applications/knl/poisson_dg_tensor/verify.mini
@@ -12,7 +12,9 @@ extension = vtu
 fastdg = 1
 sumfact = 1
 vectorization_quadloop = 1
-vectorize_diagonal = 1
+vectorization_strategy = explicit
+vectorization_horizontal = 4
+vectorization_vertical = 2
 quadrature_order = 6
 architecture = knl
 
diff --git a/applications/poisson_dg/poisson_dg.mini b/applications/poisson_dg/poisson_dg.mini
index 2ba1c2cd..7bf6144b 100644
--- a/applications/poisson_dg/poisson_dg.mini
+++ b/applications/poisson_dg/poisson_dg.mini
@@ -39,7 +39,7 @@ extension = vtu
 fastdg = 1
 sumfact = 1
 vectorization_quadloop = 1
-vectorize_greedy = 1
+vectorization_strategy = explicit
 instrumentation_level = 2, 3, 4 | expand
 opcounter = 1, 0 | expand opcount
 time_opcounter = 0, 1 | expand opcount
diff --git a/applications/poisson_dg_tensor/CMakeLists.txt b/applications/poisson_dg_tensor/CMakeLists.txt
index 7e1a5438..27ae6ff2 100644
--- a/applications/poisson_dg_tensor/CMakeLists.txt
+++ b/applications/poisson_dg_tensor/CMakeLists.txt
@@ -4,8 +4,6 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl
                                   NO_TESTS
                                   )
 
-add_subdirectory(sliced)
-
 dune_add_formcompiler_system_test(UFLFILE poisson_dg_tensor.ufl
                                   BASENAME verify_app_poisson_dg_tensor
                                   INIFILE verify.mini
diff --git a/applications/poisson_dg_tensor/poisson_dg_tensor.mini b/applications/poisson_dg_tensor/poisson_dg_tensor.mini
index d0c7251a..80986df2 100644
--- a/applications/poisson_dg_tensor/poisson_dg_tensor.mini
+++ b/applications/poisson_dg_tensor/poisson_dg_tensor.mini
@@ -39,7 +39,7 @@ extension = vtu
 fastdg = 1
 sumfact = 1
 vectorization_quadloop = 1
-vectorize_greedy = 1
+vectorization_strategy = explicit
 instrumentation_level = 2, 3, 4 | expand
 opcounter = 1, 0 | expand opcount
 time_opcounter = 0, 1 | expand opcount
diff --git a/applications/poisson_dg_tensor/sliced/CMakeLists.txt b/applications/poisson_dg_tensor/sliced/CMakeLists.txt
deleted file mode 100644
index fb516b3c..00000000
--- a/applications/poisson_dg_tensor/sliced/CMakeLists.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-dune_add_formcompiler_system_test(UFLFILE ../poisson_dg_tensor.ufl
-                                  BASENAME app_poisson_dg_tensor_sliced
-                                  INIFILE sliced.mini
-                                  NO_TESTS
-                                  )
diff --git a/applications/poisson_dg_tensor/sliced/sliced.mini b/applications/poisson_dg_tensor/sliced/sliced.mini
deleted file mode 100644
index 7522320a..00000000
--- a/applications/poisson_dg_tensor/sliced/sliced.mini
+++ /dev/null
@@ -1,49 +0,0 @@
-__name = app_poisson_dg_tensor_sliced_{__exec_suffix}
-__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{formcompiler.instrumentation_level}
-
-opcount_suffix = opcount, nonopcount | expand opcount
-{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude
-
-# Calculate the size of the grid to equlibritate it to 100 MB/rank
-# Input parameters
-dim = 3
-mbperrank = 100
-ranks = 32
-floatingbytes = 8
-
-# Metaini Calculations
-memperrank = {mbperrank} * 1048576 | eval
-dofsperdir = {formcompiler.ufl_variants.degree} + 1 | eval
-celldofs = {dofsperdir} ** {dim} | eval
-cellsperrank = {memperrank} / ({floatingbytes} * {celldofs}) | eval
-cellsperdir = {cellsperrank} ** (1/{dim}) | eval | toint
-firstdircells = {ranks} * {cellsperdir} | eval
-dimminusone = {dim} - 1 | eval
-ones = 1 | repeat {dimminusone}
-otherdircells = {cellsperdir} | repeat {dimminusone}
-
-# Setup the grid!
-extension = 1.0 | repeat {dim}
-cells = {firstdircells} {otherdircells}
-partitioning = {ranks} {ones}
-
-# Set up the timing identifier
-identifier = poisson_dg_tensor_deg{formcompiler.ufl_variants.degree}
-
-[wrapper.vtkcompare]
-name = {__name}
-extension = vtu
-
-[formcompiler]
-fastdg = 1
-sumfact = 1
-vectorization_quadloop = 1
-vectorize_slice = 1
-instrumentation_level = 2, 3, 4 | expand
-opcounter = 1, 0 | expand opcount
-time_opcounter = 0, 1 | expand opcount
-quadrature_order = {formcompiler.ufl_variants.degree} * 2 | eval
-
-[formcompiler.ufl_variants]
-cell = hexahedron
-degree = 3, 7 | expand
diff --git a/applications/stokes_dg/stokes_dg.mini b/applications/stokes_dg/stokes_dg.mini
index 2727253b..f6ae1d0f 100644
--- a/applications/stokes_dg/stokes_dg.mini
+++ b/applications/stokes_dg/stokes_dg.mini
@@ -40,7 +40,7 @@ extension = vtu
 fastdg = 1
 sumfact = 1
 vectorization_quadloop = 1
-vectorize_greedy = 1
+vectorization_strategy = explicit
 instrumentation_level = 2, 3, 4 | expand
 opcounter = 1, 0 | expand opcount
 time_opcounter = 0, 1 | expand opcount
diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index 1c2a7098..253b3528 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -226,9 +226,11 @@ def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already=
     while horizontal <= width:
         # Iterate over the possible combinations of sum factorization kernels
         # taking into account all the permutations of kernels. This also includes
-        # combinations which use a padding of 1.
-        for combo in it.chain(it.permutations(candidates, horizontal),
-                              it.permutations(candidates, horizontal - 1)):
+        # combinations which use a padding of 1 - but only for pure horizontality.
+        generators = [it.permutations(candidates, horizontal)]
+        if horizontal >= 4:
+            generators.append(it.permutations(candidates, horizontal - 1))
+        for combo in it.chain(*generators):
             # The chosen kernels must be part of the kernels for recursion
             # to work correctly
             if sf_to_decide not in combo:
diff --git a/test/sumfact/mass/sliced.mini b/test/sumfact/mass/sliced.mini
index 712d2d5e..90dab43e 100644
--- a/test/sumfact/mass/sliced.mini
+++ b/test/sumfact/mass/sliced.mini
@@ -11,7 +11,9 @@ extension = vtu
 
 [formcompiler]
 numerical_jacobian = 1
-vectorize_slice = 1
+vectorization_strategy = explicit
+vectorization_horizontal = 1
+vectorization_vertical = 4
 sumfact = 1
 
 [formcompiler.ufl_variants]
diff --git a/test/sumfact/poisson/diagonal.mini b/test/sumfact/poisson/diagonal.mini
index d2687d64..d3744184 100644
--- a/test/sumfact/poisson/diagonal.mini
+++ b/test/sumfact/poisson/diagonal.mini
@@ -11,7 +11,9 @@ extension = vtu
 sumfact = 1
 compare_l2errorsquared = 1e-5
 vectorization_quadloop = 1
-vectorize_diagonal = 1
+vectorization_strategy = explicit
+vectorization_horizontal = 2
+vectorization_vertical = 2
 quadrature_order = 6, 6, 6
 fastdg = 1
 
diff --git a/test/sumfact/poisson/sliced.mini b/test/sumfact/poisson/sliced.mini
index 39e9e915..858b8c6b 100644
--- a/test/sumfact/poisson/sliced.mini
+++ b/test/sumfact/poisson/sliced.mini
@@ -11,7 +11,9 @@ extension = vtu
 sumfact = 1
 compare_l2errorsquared = 1e-5
 vectorization_quadloop = 1
-vectorize_slice = 1
+vectorization_strategy = explicit
+vectorization_horizontal = 1
+vectorization_vertical = 4
 quadrature_order = 6
 
 [formcompiler.ufl_variants]
-- 
GitLab