diff --git a/python/dune/codegen/blockstructured/vectorization.py b/python/dune/codegen/blockstructured/vectorization.py
index ac3a58db7bb8f66c40e154ade21c937504bdffce..c3a4b81f907bfc3c9eec8496ec8cd238dd56e4e7 100644
--- a/python/dune/codegen/blockstructured/vectorization.py
+++ b/python/dune/codegen/blockstructured/vectorization.py
@@ -2,7 +2,7 @@ import loopy as lp
 import numpy as np
 import pymbolic.primitives as prim
 
-from loopy.match import Tagged, Id, Writes, And, Or, Iname, All
+from loopy.match import Tagged, Id, Writes, Reads, And, Or, Iname, All
 from islpy import BasicSet
 
 from dune.codegen.generation import get_global_context_value
@@ -15,25 +15,25 @@ from dune.codegen.pdelab.geometry import world_dimension
 from dune.codegen.tools import get_pymbolic_basename
 
 
-def add_vcl_temporaries(knl):
+def add_vcl_temporaries(knl, vcl_size):
     vector_alias = [a for a in knl.arg_dict if a.endswith('alias')]
 
     # add new temporaries for vectors
     # hope one read insn doesn't have two different reads from the same temporary
     new_vec_temporaries = dict()
     new_insns = []
-    init_iname = 'init_vec'
+    init_iname = 'init_vec{}'.format(vcl_size)
     from islpy import BasicSet
     init_domain = BasicSet("{{ [{0}] : 0<={0}<{1} }}".format(init_iname, get_vcl_type_size(dtype_floatingpoint())))
     for alias in vector_alias:
-        vector_name = alias.replace('alias', 'vec')
+        vector_name = alias.replace('alias', 'vec{}'.format(vcl_size))
         new_vec_temporaries[vector_name] = DuneTemporaryVariable(vector_name, dtype=np.float64,
-                                                                 shape=(get_vcl_type_size(np.float64),), managed=True,
+                                                                 shape=(vcl_size,), managed=True,
                                                                  scope=lp.temp_var_scope.PRIVATE, dim_tags=('vec',))
         # write once to the vector such that loopy won't complain
         new_insns.append(lp.Assignment(assignee=prim.Subscript(prim.Variable(vector_name), prim.Variable(init_iname)),
                                        expression=0, within_inames=frozenset({init_iname}),
-                                       id='init_' + vector_name))
+                                       id='init_{}'.format(vector_name)))
 
     from loopy.kernel.data import VectorizeTag
     return knl.copy(instructions=knl.instructions + new_insns, domains=knl.domains + [init_domain],
@@ -41,9 +41,8 @@ def add_vcl_temporaries(knl):
                     iname_to_tag=dict(**knl.iname_to_tag, **{init_iname: VectorizeTag()}))
 
 
-def add_vcl_accum_insns(knl, iname_inner, iname_outer):
+def add_vcl_accum_insns(knl, iname_inner, iname_outer, vcl_size):
     nptype = dtype_floatingpoint()
-    vcl_size = get_vcl_type_size(np.float64)
 
     accum_insns = lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner))))
     accum_ids = [insn.id for insn in accum_insns]
@@ -59,7 +58,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
             expr_without_r = prim.Sum(tuple(e for e in insn.expression.children if not e == insn.assignee))
 
             inames_micro = set((i for i in insn.within_inames if i.startswith('micro')))
-            iname_ix = next((i for i in inames_micro if i.endswith("_x")))
+            iname_ix = next((i for i in inames_micro if '_x' in i))
 
             # need inames for head and tail handling a priori
             from loopy.match import Not, All
@@ -76,8 +75,8 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
             inames_tail = frozenset((var.name for var in replace_tail_inames.values()))
 
             # erstelle a[iy] und b
-            identifier_left = vng('left_node')
-            identifier_right = vng('right_node')
+            identifier_left = vng('left_node_vec{}'.format(vcl_size))
+            identifier_right = vng('right_node_vec{}'.format(vcl_size))
             new_vec_temporaries[identifier_left] = DuneTemporaryVariable(identifier_left, dtype=np.float64,
                                                                          shape=(2,) * (world_dimension() - 1) +
                                                                                (vcl_size,),
@@ -100,7 +99,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
                                            id=id_init_a,
                                            within_inames=(insn.within_inames - frozenset({iname_outer}) -
                                                           inames_micro) | inames_head,
-                                           tags=frozenset({'head'})))
+                                           tags=frozenset({'head_vec{}'.format(vcl_size)})))
 
             # setze werte für a und b
             expr_right = substitute(expr_without_r, {iname_ix: 1})
@@ -131,7 +130,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
                                            depends_on=insn.depends_on | frozenset({id_set_left,
                                                                                    id_init_a, id_set_right}),
                                            within_inames=insn.within_inames - frozenset({iname_ix}),
-                                           tags=frozenset({'accum'})))
+                                           tags=frozenset({'accum_vec{}'.format(vcl_size)})))
             # a[iy] = permute
             id_permute = idg('{}_permute'.format(insn.id))
             expr_permute = prim.Call(VCLPermute(nptype, vcl_size, (vcl_size - 1,) + (-1,) * (vcl_size - 1)),
@@ -162,7 +161,7 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
                                                        frozenset(write_to_tail_ids)),
                                            within_inames=(insn.within_inames - frozenset({iname_inner, iname_outer}) -
                                                           inames_micro) | inames_tail,
-                                           tags=frozenset({'tail'})))
+                                           tags=frozenset({'tail_vec{}'.format(vcl_size)})))
         else:
             if insn.id.endswith('tail') and insn.id.replace('_tail', '') in accum_ids:
                 accum_id = insn.id.replace('_tail', '')
@@ -174,12 +173,16 @@ def add_vcl_accum_insns(knl, iname_inner, iname_outer):
                     temporary_variables=dict(**knl.temporary_variables, **new_vec_temporaries))
 
 
-def add_vcl_access(knl, iname_inner):
+def add_vcl_access(knl, iname_inner, vcl_size):
     from loopy.match import Reads, Tagged
-    accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum'), Iname(iname_inner))))))
+    accum_insns = set((insn.id for insn in lp.find_instructions(knl, And((Tagged('accum_vec{}'.format(vcl_size)),
+                                                                          Iname(iname_inner))))))
     read_insns = set((insn.id for insn in lp.find_instructions(knl, And((Reads('*alias'), Iname(iname_inner))))))
     vectorized_insns = accum_insns | read_insns
 
+    alias_suffix = 'alias'
+    vector_sufix = 'vec{}'.format(vcl_size)
+
     from loopy.symbolic import CombineMapper
     from loopy.symbolic import IdentityMapper
 
@@ -195,7 +198,7 @@ def add_vcl_access(knl, iname_inner):
         map_loopy_function_identifier = map_constant
 
         def map_subscript(self, expr):
-            if expr.aggregate.name.endswith('alias'):
+            if expr.aggregate.name.endswith(alias_suffix):
                 return expr.aggregate, expr.index_tuple
             else:
                 return tuple()
@@ -213,7 +216,7 @@ def add_vcl_access(knl, iname_inner):
 
         alias, index = aic(insn.expression)
         name_alias = alias.name
-        name_vec = name_alias.replace('alias', 'vec')
+        name_vec = name_alias.replace(alias_suffix, vector_sufix)
         vectorized_insn_to_vector_names[id] = (name_alias, name_vec)
 
         # compute index without vec iname
@@ -240,7 +243,7 @@ def add_vcl_access(knl, iname_inner):
 
         alias, index = aic(insn.expression)
         name_alias = alias.name
-        name_vec = name_alias.replace('alias', 'vec')
+        name_vec = name_alias.replace(alias_suffix, vector_sufix)
         vectorized_insn_to_vector_names[id] = (name_alias, name_vec)
 
         # flat index without vec iname
@@ -260,7 +263,7 @@ def add_vcl_access(knl, iname_inner):
                                                           write_ids)))
 
     # replace alias with vcl vector, except for accumulation assignee
-    vector_alias = [a for a in knl.arg_dict if a.endswith('alias')]
+    vector_alias = [a for a in knl.arg_dict if a.endswith(alias_suffix)]
     dim = world_dimension()
     dim_names = ["x", "y", "z"] + [str(i) for i in range(4, dim + 1)]
     # remove CInstructions since loopy extract expects to get only assignments
@@ -287,7 +290,8 @@ def add_vcl_access(knl, iname_inner):
                                              parameters=parameters)
         new_subst = knl_with_subst_insns.substitutions.copy()
         rule = new_subst[alias + '_subst']
-        rule.expression = prim.Subscript(prim.Variable(alias.replace('alias', 'vec')), (prim.Variable('ex_i'),))
+        rule.expression = prim.Subscript(prim.Variable(alias.replace(alias_suffix, vector_sufix)),
+                                         (prim.Variable('ex_i'),))
         knl_with_subst_insns = knl_with_subst_insns.copy(substitutions=new_subst)
 
     knl_with_subst_insns = lp.expand_subst(knl_with_subst_insns, Iname(iname_inner))
@@ -308,7 +312,7 @@ def add_vcl_access(knl, iname_inner):
                 try:
                     assignee_vec = next((expr for expr in insn.expression.children
                                          if isinstance(expr, prim.Subscript) and
-                                         expr.aggregate.name.replace('vec', 'alias') ==
+                                         expr.aggregate.name.replace(vector_sufix, alias_suffix) ==
                                          assignee_alias.aggregate.name.replace('dummy_', '')))
                 except StopIteration:
                     from dune.codegen.error import CodegenVectorizationError
@@ -431,10 +435,10 @@ def vectorize_micro_elements(knl):
     if vec_iname in knl.all_inames() and get_global_context_value('integral_type') == 'cell':
         vcl_size = get_vcl_type_size(np.float64)
 
-        knl = add_iname_array(knl, vec_iname)
+        has_tail = get_form_option('number_of_blocks') % vcl_size > 0
 
         # manually add tail, since split_iname with slabs tries to vectorize the tail
-        if get_form_option('number_of_blocks') % vcl_size > 0:
+        if has_tail:
             vectorizable_bound = (get_form_option('number_of_blocks') // vcl_size) * vcl_size
             from loopy.kernel.tools import DomainChanger
             domch = DomainChanger(knl, (vec_iname,))
@@ -448,12 +452,14 @@ def vectorize_micro_elements(knl):
 
         knl = lp.tag_inames(knl, [(vec_iname + '_inner', 'vec')])
 
+        knl = add_iname_array(knl, vec_iname)
+
         array_alias = [a for a in knl.arg_dict.keys() if a.endswith('alias') or a.endswith('tail')]
         iname_vector = [a for a in knl.temporary_variables.keys() if a.endswith('vec')]
         knl = lp.split_array_axis(knl, array_alias + iname_vector, 0, vcl_size)
         knl = lp.tag_array_axes(knl, iname_vector, ('c', 'vec'))
 
-        knl = add_vcl_temporaries(knl)
-        knl = add_vcl_accum_insns(knl, vec_iname + '_inner', vec_iname + '_outer')
-        knl = add_vcl_access(knl, vec_iname + '_inner')
+        knl = add_vcl_temporaries(knl, vcl_size)
+        knl = add_vcl_accum_insns(knl, vec_iname + '_inner', vec_iname + '_outer', vcl_size)
+        knl = add_vcl_access(knl, vec_iname + '_inner', vcl_size)
     return knl