diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py
index 22942d662f27e0921a042adb8edb92e46a5a5ffa..cfdfc7945ec506327c81cc5dcddb0a9f480c8dd5 100644
--- a/python/dune/codegen/blockstructured/vectorization.py
+++ b/python/dune/codegen/blockstructured/vectorization.py
@@ -1,6 +1,7 @@
 import loopy as lp
 import numpy as np
 import pymbolic.primitives as prim
+from dune.codegen.blockstructured.tools import sub_element_inames
 
 from loopy.match import Tagged, Id, Writes, Reads, And, Or, Iname, All, Not
 from islpy import BasicSet
@@ -468,13 +469,18 @@ def realize_tail(knl, inner_iname, outer_iname, outer_bound, tail_iname, vcl_siz
                    temporary_variables=dict(**knl.temporary_variables, **temporaries_to_duplicate))
 
     common_inames = knl.all_inames()
-    for insn in insns_to_duplicate:
+    for insn in new_insns:
         common_inames = common_inames & (insn.within_inames | insn.reduction_inames())
 
+    if get_form_option('vectorization_blockstructured_tail_ordering') == 'blocked':
+        # TODO need to be more clever to get the right inames
+        macro_inames = frozenset((iname + '_0' * level) for iname in sub_element_inames())
+        common_inames = common_inames - macro_inames
+
     additional_inames_to_duplicate = frozenset()
-    for insn in insns_to_duplicate:
-        additional_inames_to_duplicate = additional_inames_to_duplicate | ((insn.within_inames |
-                                                                            insn.reduction_inames()) - common_inames)
+    for insn in new_insns:
+        insn_inames = insn.within_inames | insn.reduction_inames()
+        additional_inames_to_duplicate = additional_inames_to_duplicate | (insn_inames - common_inames)
 
     knl = lp.duplicate_inames(knl, tuple(additional_inames_to_duplicate),
                               Or(tuple((Id(insn.id) for insn in new_insns))))
diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py
index bf80a72a94d7facee6bfb019863883bf3c287feb..00fc2e48941a2dd52dcdd3beec5a7ef49795d02a 100644
--- a/python/dune/codegen/options.py
+++ b/python/dune/codegen/options.py
@@ -101,7 +101,8 @@ class CodegenFormOptionsArray(ImmutableRecord):
     blockstructured = CodegenOption(default=False, helpstr="Use block structure")
     number_of_blocks = CodegenOption(default=1, helpstr="Number of sub blocks in one direction")
     vectorization_blockstructured = CodegenOption(default=False, helpstr="Vectorize block structuring")
-    vectorization_blockstructured_tail = CodegenOption(default=True, helpstr="Try to fully vectorize block structuring even when 'nunmber_of_blocks' is not divisible by vector length.")
+    vectorization_blockstructured_tail = CodegenOption(default=True, helpstr="Try to fully vectorize block structuring even when 'nunmber_of_blocks' is not divisible by vector length")
+    vectorization_blockstructured_tail_ordering = CodegenOption(default='consecutive', helpstr="Ordering of the tail w.r.t the vectorized loop. Possible values: consecutive|blocked")
     adjoint = CodegenOption(default=False, helpstr="Generate adjoint operator")
     control = CodegenOption(default=False, helpstr="Generate operator of derivative w.r.t. the control variable")
     objective_function = CodegenOption(default=None, helpstr="Name of form representing the objective function in UFL file")