diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index 5accb340eeba612f95e45152d0c7a70679c685bd..f6ec204e011ebc04b196eebf047a3ea6422d9c44 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -483,7 +483,11 @@ def vectorize_micro_elements(knl): outer_iname = vec_iname + '_outer' tail_size = iname_bound % vcl_size - vectorize_tail = ((vcl_size // 2) > 1) and (tail_size >= (vcl_size // 2)) + tail_vcl_size = vcl_size + while tail_vcl_size > tail_size: + tail_vcl_size = tail_vcl_size // 2 + assert(tail_vcl_size > 0) + vectorize_tail = tail_vcl_size > 1 # manually add tail, since split_iname with slabs tries to vectorize the tail if tail_size > 0: @@ -512,7 +516,7 @@ def vectorize_micro_elements(knl): knl = add_vcl_access(knl, inner_iname, vcl_size, level) if tail_size > 0 and vectorize_tail: - knl = _do_vectorization(knl, tail_iname, tail_size, vcl_size // 2, level + 1) + knl = _do_vectorization(knl, tail_iname, tail_size, tail_vcl_size, level + 1) return knl