From a0886f08262174d68c9002836283d814d1022380 Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@uni-muenster.de>
Date: Fri, 15 Feb 2019 11:26:50 +0100
Subject: [PATCH] make tail vectorization an option

---
 python/dune/codegen/blockstructured/vectorization.py | 11 +++++++----
 python/dune/codegen/options.py                       |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py
index 10c8f148..c78ae24f 100644
--- a/python/dune/codegen/blockstructured/vectorization.py
+++ b/python/dune/codegen/blockstructured/vectorization.py
@@ -483,10 +483,13 @@ def vectorize_micro_elements(knl):
             outer_iname = vec_iname + '_outer'
 
             tail_size = iname_bound % vcl_size
-            tail_vcl_size = vcl_size
-            while tail_vcl_size > tail_size:
-                tail_vcl_size = tail_vcl_size // 2
-            vectorize_tail = tail_vcl_size > 1
+            if get_form_option('vectorization_blockstructured_tail'):
+                tail_vcl_size = vcl_size
+                while tail_vcl_size > tail_size:
+                    tail_vcl_size = tail_vcl_size // 2
+                vectorize_tail = tail_vcl_size > 1
+            else:
+                vectorize_tail = False
 
             # manually add tail, since split_iname with slabs tries to vectorize the tail
             if tail_size > 0:
diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py
index bb6bbafa..bf80a72a 100644
--- a/python/dune/codegen/options.py
+++ b/python/dune/codegen/options.py
@@ -101,6 +101,7 @@ class CodegenFormOptionsArray(ImmutableRecord):
     blockstructured = CodegenOption(default=False, helpstr="Use block structure")
     number_of_blocks = CodegenOption(default=1, helpstr="Number of sub blocks in one direction")
     vectorization_blockstructured = CodegenOption(default=False, helpstr="Vectorize block structuring")
+    vectorization_blockstructured_tail = CodegenOption(default=True, helpstr="Try to fully vectorize block structuring even when 'nunmber_of_blocks' is not divisible by vector length.")
     adjoint = CodegenOption(default=False, helpstr="Generate adjoint operator")
     control = CodegenOption(default=False, helpstr="Generate operator of derivative w.r.t. the control variable")
     objective_function = CodegenOption(default=None, helpstr="Name of form representing the objective function in UFL file")
-- 
GitLab