From 9e9392e155dc355945119b3e9e60fb5cc3cfcafa Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@uni-muenster.de>
Date: Fri, 15 Feb 2019 10:57:12 +0100
Subject: [PATCH] find suitable vcl size

---
 python/dune/codegen/blockstructured/vectorization.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py
index 5accb340..f6ec204e 100644
--- a/python/dune/codegen/blockstructured/vectorization.py
+++ b/python/dune/codegen/blockstructured/vectorization.py
@@ -483,7 +483,11 @@ def vectorize_micro_elements(knl):
             outer_iname = vec_iname + '_outer'
 
             tail_size = iname_bound % vcl_size
-            vectorize_tail = ((vcl_size // 2) > 1) and (tail_size >= (vcl_size // 2))
+            tail_vcl_size = vcl_size
+            while tail_vcl_size > tail_size:
+                tail_vcl_size = tail_vcl_size // 2
+                assert(tail_vcl_size > 0)
+            vectorize_tail = tail_vcl_size > 1
 
             # manually add tail, since split_iname with slabs tries to vectorize the tail
             if tail_size > 0:
@@ -512,7 +516,7 @@ def vectorize_micro_elements(knl):
             knl = add_vcl_access(knl, inner_iname, vcl_size, level)
 
             if tail_size > 0 and vectorize_tail:
-                knl = _do_vectorization(knl, tail_iname, tail_size, vcl_size // 2, level + 1)
+                knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1)
 
             return knl
 
-- 
GitLab