diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index 8d0081dbf4739005c7fb9c5ffc9fa7aa48806c93..ad01abbc8360704e1842be9c503b0117063fdaee 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -288,33 +288,35 @@ def _level2_optimal_vectorization_strategy_generator(sumfacts, width, qp, alread
     inoutkey_sumfacts = [tuple(sorted(filter(lambda sf: sf.inout_key == key, sumfacts))) for key in keys]
 
     for parallel in (1, 2):
+        if parallel > len(keys):
+            continue
+
         if parallel == 2 and next(iter(sumfacts)).stage == 3:
             continue
-        for which in filter(lambda w: w == tuple(sorted(w)),
-                            it.permutations(range(len(keys)), parallel)):
-            horizontal = 1
-            while horizontal <= width // parallel:
-                combo = sum((inoutkey_sumfacts[part][:horizontal] for part in which), ())
-
-                vecdict = get_vectorization_dict(combo, width // (horizontal * parallel), horizontal * parallel, qp)
-                horizontal *= 2
-
-                if vecdict is None:
-                    # This particular choice was rejected for some reason.
-                    # Possible reasons:
-                    # * the quadrature point tuple not being suitable
-                    #   for this vectorization strategy
-                    # * there are not enough horizontal kernels
-                    continue
-
-                # Go into recursion to also vectorize all kernels not in this combo
-                for opp in _level2_optimal_vectorization_strategy_generator(list_diff(sumfacts, combo),
-                                                                            width,
-                                                                            qp,
-                                                                            add_to_frozendict(already, vecdict),
-                                                                            ):
-                    yielded = True
-                    yield opp
+
+        horizontal = 1
+        while horizontal <= width // parallel:
+            combo = sum((inoutkey_sumfacts[part][:horizontal] for part in range(parallel)), ())
+
+            vecdict = get_vectorization_dict(combo, width // (horizontal * parallel), horizontal * parallel, qp)
+            horizontal *= 2
+
+            if vecdict is None:
+                # This particular choice was rejected for some reason.
+                # Possible reasons:
+                # * the quadrature point tuple not being suitable
+                #   for this vectorization strategy
+                # * there are not enough horizontal kernels
+                continue
+
+            # Go into recursion to also vectorize all kernels not in this combo
+            for opp in _level2_optimal_vectorization_strategy_generator(list_diff(sumfacts, combo),
+                                                                        width,
+                                                                        qp,
+                                                                        add_to_frozendict(already, vecdict),
+                                                                        ):
+                yielded = True
+                yield opp
 
     # If we did not yield on this recursion level, yield what we got so far
     if not yielded: