diff --git a/python/dune/perftool/sumfact/amatrix.py b/python/dune/perftool/sumfact/amatrix.py
index 135fcc3d4cf50bf8e5e53822c29c8997516c9d2b..c13ab899aab3c75fdbc6997f39409e506a9019c4 100644
--- a/python/dune/perftool/sumfact/amatrix.py
+++ b/python/dune/perftool/sumfact/amatrix.py
@@ -59,22 +59,24 @@ class AMatrix(ImmutableRecord):
 
 
 class LargeAMatrix(ImmutableRecord):
-    def __init__(self, rows, cols, transpose, derivative):
+    def __init__(self, rows, cols, transpose, derivative, face):
         assert isinstance(derivative, tuple)
         ImmutableRecord.__init__(self,
                                  rows=rows,
                                  cols=cols,
                                  transpose=transpose,
                                  derivative=derivative,
+                                 face=face,
                                  )
 
     @property
     def name(self):
-        name = "ThetaLarge{}_{}".format("T" if self.transpose else "",
-                                        "_".join(tuple("d" if d else "" for d in self.derivative))
-                                        )
+        name = "ThetaLarge{}{}_{}".format("face{}_".format(self.face) if self.face is not None else "",
+                                          "T" if self.transpose else "",
+                                          "_".join(tuple("d" if d else "" for d in self.derivative))
+                                          )
         for i, d in enumerate(self.derivative):
-            define_theta(name, (self.rows, self.cols), self.transpose, d, additional_indices=(i,))
+            define_theta(name, (self.rows, self.cols), self.transpose, d, face=self.face, additional_indices=(i,))
 
         return loopy_class_member(name,
                                   classtag="operator",
diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py
index fe9ef21b484d6567a5deca882c6da9b5a68b307c..f5f5ebdf613678254fa6d2ea9770f15976e94a1c 100644
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -172,7 +172,7 @@ def pymbolic_trialfunction(element, restriction, component, visitor):
         index = ()
 
     return prim.Subscript(var,
-                          tuple(prim.Variable(i) for i in quadrature_inames() + index)
+                          tuple(prim.Variable(i) for i in quadrature_inames()) + index
                           )
 
 
diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index a7e8eae021ed6b5d606b567874def31a39b73d91..d4942550ed39e7007c66bbec3066f3c8bd7ace10 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -79,6 +79,7 @@ def decide_stage_vectorization_strategy(sumfacts, stage):
                                  cols=next(iter(stage_sumfacts)).a_matrices[i].cols,
                                  transpose=next(iter(stage_sumfacts)).a_matrices[i].transpose,
                                  derivative=tuple(derivative),
+                                 face=next(iter(stage_sumfacts)).a_matrices[i].face,
                                  )
             large_a_matrices.append(large)
 
diff --git a/test/sumfact/poisson/poisson_dg.mini b/test/sumfact/poisson/poisson_dg.mini
index b1b82b57d364b3c649762952060e404e947be3de..747a5b7dac42a836c8a4eb0acdfc59b418f58581 100644
--- a/test/sumfact/poisson/poisson_dg.mini
+++ b/test/sumfact/poisson/poisson_dg.mini
@@ -1,8 +1,9 @@
 __name = sumfact_poisson_dg_{__exec_suffix}
-__exec_suffix = {diff_suffix}_{quadvec_suffix}
+__exec_suffix = {diff_suffix}_{quadvec_suffix}_{gradvec_suffix}
 
 diff_suffix = numdiff, symdiff | expand num
 quadvec_suffix = quadvec, nonquadvec | expand quad
+gradvec_suffix = gradvec, nongradvec | expand grad
 
 cells = 16 16
 extension = 1. 1.
@@ -17,3 +18,4 @@ sumfact = 1
 exact_solution_expression = g
 compare_l2errorsquared = 1e-4
 vectorize_quad = 1, 0 | expand quad
+vectorize_grads = 1, 0 | expand grad