diff --git a/python/dune/perftool/generation/cache.py b/python/dune/perftool/generation/cache.py
index e4c3fb592dd3eb4e32a51e44fc377d49e969aa10..f0823bb5b14e9f6475ec4feae3df37ca713cffe9 100644
--- a/python/dune/perftool/generation/cache.py
+++ b/python/dune/perftool/generation/cache.py
@@ -120,7 +120,7 @@ class _RegisteredFunction(object):
         return self._get_content(cache_key)
 
     def remove_by_value(self, val):
-        self._memoize_cache = {k:v for k, v in self._memoize_cache.items() if v != val}
+        self._memoize_cache = {k: v for k, v in self._memoize_cache.items() if v != val}
 
 
 def generator_factory(**factory_kwargs):
diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py
index 62077833cb2cdca267dc11ec27952a0712991c0e..fda761c73f08db88c8fc8712512e881bbf6999a6 100644
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -69,7 +69,6 @@ def pymbolic_trialfunction_gradient(element, restriction, component, visitor):
     dim = formdata.geometric_dimension
     buffers = []
     insn_dep = None
-    ret = False
     for i in range(dim):
         a_matrices = [theta_matrix] * dim
         a_matrices[i] = dtheta_matrix
@@ -98,28 +97,19 @@ def pymbolic_trialfunction_gradient(element, restriction, component, visitor):
         # Add a sum factorization kernel that implements the
         # evaluation of the gradients of basis functions at quadrature
         # points (stage 1)
-        if index:
-            assert len(visitor.indices) == 1
-            var, insn_dep = sum_factorization_kernel(a_matrices,
-                                                     buffer,
-                                                     1,
-                                                     preferred_position=i,
-                                                     insn_dep=insn_dep,
-                                                     )
-            ret = True
-        else:
-            var, insn_dep = sum_factorization_kernel(a_matrices,
-                                                     buffer,
-                                                     1,
-                                                     preferred_position=i,
-                                                     insn_dep=insn_dep,
-                                                     )
-            buffers.append(var)
+        var, insn_dep = sum_factorization_kernel(a_matrices,
+                                                 buffer,
+                                                 1,
+                                                 preferred_position=i,
+                                                 insn_dep=insn_dep,
+                                                 )
+        buffers.append(var)
 
     # Check whether we want to return early with something that has the indexing
     # already handled! This happens with vectorization when the index coincides
     # with the position in the vector register.
-    if ret:
+    if index:
+        assert len(visitor.indices) == 1
         indices = visitor.indices
         visitor.indices = None
         return maybe_wrap_subscript(var, tuple(prim.Variable(i) for i in quadrature_inames()) + indices)
@@ -179,8 +169,13 @@ def pymbolic_trialfunction(element, restriction, component, visitor):
                                       insn_dep=frozenset({Writes(input)}),
                                       )
 
+    if index:
+        index = (index,)
+    else:
+        index = ()
+
     return prim.Subscript(var,
-                          tuple(prim.Variable(i) for i in quadrature_inames())
+                          tuple(prim.Variable(i) for i in quadrature_inames() + index)
                           )
 
 
diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index 5787913e3bc48b0816620d3298820afcf817048a..57d255883b9ea164f07c8df7e689a5a6960538d4 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -237,7 +237,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
         transform(nest_quadrature_loops, visitor.inames)
 
 
-@generator_factory(item_tags=("sumfactkernel",), context_tags=("kernel",), cache_key_generator=lambda a, b, s, **kw: (a,b,s))
+@generator_factory(item_tags=("sumfactkernel",), context_tags=("kernel",), cache_key_generator=lambda a, b, s, **kw: (a, b, s))
 def sum_factorization_kernel(a_matrices, buf, stage, insn_dep=frozenset({}), additional_inames=frozenset({}), preferred_position=None):
     """
     Calculate a sum factorization matrix product.