diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py
index 78070a10be4fef8c535a9c3dad6f0a02fe323edf..d99e530dbc4107691663c6bfd865bab274965caa 100644
--- a/python/dune/perftool/loopy/transformations/collect_rotate.py
+++ b/python/dune/perftool/loopy/transformations/collect_rotate.py
@@ -30,10 +30,21 @@ def rotate_function_mangler(knl, func, arg_dtypes):
         # This is not 100% within the loopy philosophy, as we are
         # passing the vector registers as references and have them
         # changed. Loopy assumes this function to be read-only.
+        include_file("dune/perftool/sumfact/transposereg.hh", filetag="operatorfile")
         vcl = lp.types.NumpyType(get_vcl_type(np.float64, register_size=256))
         return lp.CallMangleInfo("transpose_reg", (), (vcl, vcl, vcl, vcl))
 
 
+class VectorIndices(object):
+    def __init__(self):
+        self.needed = set()
+
+    def get(self, increment):
+        name = "vec_index_inc{}".format(increment)
+        self.needed.add((name, increment))
+        return prim.Variable(name)
+
+
 def collect_vector_data_rotate(knl):
     #
     # Process/Assert/Standardize the input
@@ -51,6 +62,7 @@ def collect_vector_data_rotate(knl):
     # Determine the vector lane width
     # TODO infer the numpy type here
     vec_size = get_vcl_type_size(np.float64)
+    vector_indices = VectorIndices()
 
     # Add an iname to the kernel which will be used for vectorization
     new_iname = "quad_vec_{}".format("_".join(inames))
@@ -61,8 +73,6 @@ def collect_vector_data_rotate(knl):
 
     new_insns = []
     all_writers = []
-    tags = frozenset().union(*tuple(i.tags for i in insns))
-    rotating = "gradvec" in tags
 
     #
     # Inspect the given instructions for dependent quantities
@@ -168,10 +178,9 @@ def collect_vector_data_rotate(knl):
 
                 # 1. Rotating the input data
                 knl = add_vector_view(knl, quantity, flatview=True)
-                include_file("dune/perftool/sumfact/transposereg.hh", filetag="operatorfile")
                 new_insns.append(lp.CallInstruction((),  # assignees
                                                     prim.Call(prim.Variable("transpose_reg"),
-                                                              tuple(prim.Subscript(prim.Variable(get_vector_view_name(quantity)), (prim.Variable("vec_index") + i, prim.Variable(new_iname))) for i in range(4))),
+                                                              tuple(prim.Subscript(prim.Variable(get_vector_view_name(quantity)), (vector_indices.get(vec_size) + i, prim.Variable(new_iname))) for i in range(4))),
                                                     depends_on=frozenset({'continue_stmt'}),
                                                     within_inames=common_inames.union(inames).union(frozenset({new_iname})),
                                                     within_inames_is_final=True,
@@ -183,14 +192,14 @@ def collect_vector_data_rotate(knl):
                     assert isinstance(expr, prim.Subscript)
                     last_index = expr.index[-1]
                     replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)),
-                                                          (prim.Variable("vec_index") + last_index, prim.Variable(new_iname)),
+                                                          (vector_indices.get(vec_size) + last_index, prim.Variable(new_iname)),
                                                           )
             else:
                 # Add a vector view to this quantity
                 expr, = quantities[quantity]
                 knl = add_vector_view(knl, quantity, flatview=True)
                 replacemap_vec[expr] = prim.Subscript(prim.Variable(get_vector_view_name(quantity)),
-                                                      (prim.Variable("vec_index"), prim.Variable(new_iname)),
+                                                      (vector_indices.get(1), prim.Variable(new_iname)),
                                                       )
 
     other_insns = [i for i in knl.instructions if i.id not in [j.id for j in insns + new_insns]]
@@ -219,26 +228,6 @@ def collect_vector_data_rotate(knl):
                                    id="update_total_index",
                                    ))
 
-    # Insert a flat consecutive counter 'vec_index', which is increased after a vector chunk is handled
-    temporaries['vec_index'] = lp.TemporaryVariable('vec_index',  # name
-                                                    dtype=np.int32,
-                                                    scope=lp.temp_var_scope.PRIVATE,
-                                                    )
-    new_insns.append(lp.Assignment(prim.Variable("vec_index"),  # assignee
-                                   0,  # expression
-                                   within_inames=common_inames,
-                                   within_inames_is_final=True,
-                                   id="assign_vec_index",
-                                   ))
-    new_insns.append(lp.Assignment(prim.Variable("vec_index"),  # assignee
-                                   prim.Sum((prim.Variable("vec_index"), vec_size if rotating else 1)),  # expression
-                                   within_inames=common_inames.union(inames),
-                                   within_inames_is_final=True,
-                                   depends_on=frozenset({Tagged("vec_write"), "assign_vec_index"}),
-                                   depends_on_is_final=True,
-                                   id="update_vec_index",
-                                   ))
-
     # Insert a rotating index, that counts 0 , .. , vecsize - 1
     temporaries['rotate_index'] = lp.TemporaryVariable('rotate_index',  # name
                                                        dtype=np.int32,
@@ -291,16 +280,19 @@ def collect_vector_data_rotate(knl):
         lhsname = get_pymbolic_basename(insn.assignee)
         knl = add_vector_view(knl, lhsname, pad_to=vec_size, flatview=True)
         lhsname = get_vector_view_name(lhsname)
+        rotating = "gradvec" in insn.tags
 
         if rotating:
             assert isinstance(insn.assignee, prim.Subscript)
             last_index = insn.assignee.index[-1]
             assert last_index in tuple(range(4))
+            vec_index_size = vec_size
         else:
             last_index = 0
+            vec_index_size = 1
 
         new_insns.append(lp.Assignment(prim.Subscript(prim.Variable(lhsname),
-                                                      (prim.Variable("vec_index") + last_index, prim.Variable(new_iname)),
+                                                      (vector_indices.get(vec_index_size) + last_index, prim.Variable(new_iname)),
                                                       ),
                                        substitute(insn.expression, replacemap_vec),
                                        depends_on=frozenset({"continue_stmt"}),
@@ -316,12 +308,33 @@ def collect_vector_data_rotate(knl):
         if rotating and "{}_rotateback".format(lhsname) not in [i.id for i in new_insns]:
             new_insns.append(lp.CallInstruction((),  # assignees
                                                 prim.Call(prim.Variable("transpose_reg"),
-                                                          tuple(prim.Subscript(prim.Variable(lhsname), (prim.Variable("vec_index") + i, prim.Variable(new_iname))) for i in range(4))),
+                                                          tuple(prim.Subscript(prim.Variable(lhsname), (vector_indices.get(vec_size) + i, prim.Variable(new_iname))) for i in range(4))),
                                                 depends_on=frozenset({Tagged("vec_write")}),
                                                 within_inames=common_inames.union(inames).union(frozenset({new_iname})),
                                                 within_inames_is_final=True,
                                                 id="{}_rotateback".format(lhsname),
                                                 ))
 
+    # Add the necessary vector indices
+    for name, increment in vector_indices.needed:
+        temporaries[name] = lp.TemporaryVariable(name,  # name
+                                                 dtype=np.int32,
+                                                 scope=lp.temp_var_scope.PRIVATE,
+                                                 )
+        new_insns.append(lp.Assignment(prim.Variable(name),  # assignee
+                                       0,  # expression
+                                       within_inames=common_inames,
+                                       within_inames_is_final=True,
+                                       id="assign_{}".format(name),
+                                       ))
+        new_insns.append(lp.Assignment(prim.Variable(name),  # assignee
+                                       prim.Sum((prim.Variable(name), increment)),  # expression
+                                       within_inames=common_inames.union(inames),
+                                       within_inames_is_final=True,
+                                       depends_on=frozenset({Tagged("vec_write"), "assign_{}".format(name)}),
+                                       depends_on_is_final=True,
+                                       id="update_{}".format(name),
+                                       ))
+
     from loopy.kernel.creation import resolve_dependencies
     return resolve_dependencies(knl.copy(instructions=new_insns + other_insns))
diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index 210677bf7b369f926394c3e1d537437b88bbe116..2c05e07a54e3bdb034c656f545606c3167a5a356 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -174,7 +174,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                         expression=0,
                         forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
                         forced_iname_deps_is_final=True,
-                        tags=frozenset(["quadvec"])
+                        tags=frozenset(["quadvec", "gradvec"])
                         )
 
         # Replace gradient iname with correct index for assignement