diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py
index 627c8fc0fd0e0a22a3c1dd1f9919ba50c80823dd..393a22756dc87a3b3a4ec06ccd57d0ac964d3ece 100644
--- a/python/dune/perftool/sumfact/symbolic.py
+++ b/python/dune/perftool/sumfact/symbolic.py
@@ -202,6 +202,11 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
     # Watch out for the documentation to see which key is used unter what circumstances
     #
 
+    @property
+    def parallel_key(self):
+        """ A key that identifies parallellizable kernels. """
+        return tuple(m.basis_size for m in self.matrix_sequence) + (self.stage, self.buffer)
+
     @property
     def cache_key(self):
         """ The cache key that can be used in generation magic
@@ -214,8 +219,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
     def input_key(self):
         """ A cache key for the input coefficients
         Any two sum factorization kernels having the same input_key
-        work on the same input coefficient (and are suitable for simultaneous
-        treatment because of that)
+        work on the same input coefficient
         """
         return (self.input, self.restriction, self.accumvar, self.trial_element_index)
 
diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index 95286389498b4697cc2450b464b37761f521f358..7c9c549546f9243fb9449019e1aa76151eee9891 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -257,6 +257,21 @@ def level1_optimal_vectorization_strategy(sumfacts, width):
 
 
 def level2_optimal_vectorization_strategy(sumfacts, width, qp):
+    # Find the sets of simultaneously realizable kernels
+    keys = frozenset(sf.parallel_key for sf in sumfacts)
+
+    # Find minimums for each of these sets
+    sfdict = frozendict()
+
+    for key in keys:
+        key_sumfacts = frozenset(sf for sf in sumfacts if sf.parallel_key == key)
+        key_strategy = level3_optimal_vectorization_strategy(key_sumfacts, width, qp)
+        sfdict = add_to_frozendict(sfdict, key_strategy)
+
+    return sfdict
+
+
+def level3_optimal_vectorization_strategy(sumfacts, width, qp):
     # Find the sets of simultaneously realizable kernels (thats an equivalence relation)
     keys = frozenset(sf.input_key for sf in sumfacts)