diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py
index 4b704c449a8ea31249faa70011c2fd8db1558746..d4aea877bdfd46157f3447baaf02869a2dfe6131 100644
--- a/python/dune/perftool/sumfact/realization.py
+++ b/python/dune/perftool/sumfact/realization.py
@@ -205,7 +205,8 @@ def _realize_sum_factorization_kernel(sf):
         # In case of direct output we directly accumulate the result
         # of the Sumfactorization into some global data structure.
         if l == len(matrix_sequence) - 1 and get_form_option('fastdg') and sf.stage == 3:
-            insn_args["forced_iname_deps"] = insn_args["forced_iname_deps"].union(frozenset({vec_iname[0].name}))
+            if sf.vectorized:
+                insn_args["forced_iname_deps"] = insn_args["forced_iname_deps"].union(frozenset({vec_iname[0].name}))
             insn_dep = sf.output.realize_direct(matprod, output_inames, out_shape, insn_args)
         else:
             # Issue the reduction instruction that implements the multiplication
diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py
index 4ee4960ec15ba6ece4287ae3c1cb26e2f7a8eb5e..44cf0e0bf530867a167127e3a00bef1cf5a3bc22 100644
--- a/python/dune/perftool/sumfact/symbolic.py
+++ b/python/dune/perftool/sumfact/symbolic.py
@@ -10,7 +10,7 @@ from dune.perftool.sumfact.quadrature import quadrature_inames
 from dune.perftool.sumfact.tabulation import BasisTabulationMatrixBase, BasisTabulationMatrixArray
 from dune.perftool.loopy.target import dtype_floatingpoint
 from dune.perftool.loopy.vcl import ExplicitVCLCast, VCLLowerUpperLoad
-from dune.perftool.tools import get_leaf
+from dune.perftool.tools import get_leaf, maybe_wrap_subscript
 
 from pytools import ImmutableRecord, product