From 9e9392e155dc355945119b3e9e60fb5cc3cfcafa Mon Sep 17 00:00:00 2001 From: Marcel Koch <marcel.koch@uni-muenster.de> Date: Fri, 15 Feb 2019 10:57:12 +0100 Subject: [PATCH] find suitable vcl size --- python/dune/codegen/blockstructured/vectorization.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index 5accb340..f6ec204e 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -483,7 +483,11 @@ def vectorize_micro_elements(knl): outer_iname = vec_iname + '_outer' tail_size = iname_bound % vcl_size - vectorize_tail = ((vcl_size // 2) > 1) and (tail_size >= (vcl_size // 2)) + tail_vcl_size = vcl_size + while tail_vcl_size > tail_size: + tail_vcl_size = tail_vcl_size // 2 + assert(tail_vcl_size > 0) + vectorize_tail = tail_vcl_size > 1 # manually add tail, since split_iname with slabs tries to vectorize the tail if tail_size > 0: @@ -512,7 +516,7 @@ def vectorize_micro_elements(knl): knl = add_vcl_access(knl, inner_iname, vcl_size, level) if tail_size > 0 and vectorize_tail: - knl = _do_vectorization(knl, tail_iname, tail_size, vcl_size // 2, level + 1) + knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1) return knl -- GitLab