diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py index 10c8f148c73715f1c07568c476c7cbc55c4aa93c..c78ae24fa4acca4751332f2a13be0d71dc0af18f 100644 --- a/python/dune/codegen/blockstructured/vectorization.py +++ b/python/dune/codegen/blockstructured/vectorization.py @@ -483,10 +483,13 @@ def vectorize_micro_elements(knl): outer_iname = vec_iname + '_outer' tail_size = iname_bound % vcl_size - tail_vcl_size = vcl_size - while tail_vcl_size > tail_size: - tail_vcl_size = tail_vcl_size // 2 - vectorize_tail = tail_vcl_size > 1 + if get_form_option('vectorization_blockstructured_tail'): + tail_vcl_size = vcl_size + while tail_vcl_size > tail_size: + tail_vcl_size = tail_vcl_size // 2 + vectorize_tail = tail_vcl_size > 1 + else: + vectorize_tail = False # manually add tail, since split_iname with slabs tries to vectorize the tail if tail_size > 0: diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py index bb6bbafaf09dd0b0ae7330b174dfa5805769d09f..bf80a72a94d7facee6bfb019863883bf3c287feb 100644 --- a/python/dune/codegen/options.py +++ b/python/dune/codegen/options.py @@ -101,6 +101,7 @@ class CodegenFormOptionsArray(ImmutableRecord): blockstructured = CodegenOption(default=False, helpstr="Use block structure") number_of_blocks = CodegenOption(default=1, helpstr="Number of sub blocks in one direction") vectorization_blockstructured = CodegenOption(default=False, helpstr="Vectorize block structuring") + vectorization_blockstructured_tail = CodegenOption(default=True, helpstr="Try to fully vectorize block structuring even when 'nunmber_of_blocks' is not divisible by vector length.") adjoint = CodegenOption(default=False, helpstr="Generate adjoint operator") control = CodegenOption(default=False, helpstr="Generate operator of derivative w.r.t. the control variable") objective_function = CodegenOption(default=None, helpstr="Name of form representing the objective function in UFL file")