diff --git a/python/dune/perftool/loopy/transformations/vectorize_quad.py b/python/dune/perftool/loopy/transformations/vectorize_quad.py
index f82f482929a5eaa8f742bbe388c8eeff5bce4ceb..f1b3ce12100731e87eff358a0cc3bcef49db6beb 100644
--- a/python/dune/perftool/loopy/transformations/vectorize_quad.py
+++ b/python/dune/perftool/loopy/transformations/vectorize_quad.py
@@ -197,6 +197,11 @@ def _vectorize_quadrature_loop(knl, inames, suffix):
                 # 1. Rotating the input data
                 knl = add_vector_view(knl, quantity)
                 if horizontal > 1:
+                    # Pitfall: In the case of generating jacobians, the input needs to be rotated exactly once.
+                    predicates = frozenset()
+                    if common_inames:
+                        predicates = frozenset({prim.Comparison(prim.Sum(tuple(prim.Variable(i) for i in common_inames)), "==", 0)})
+
                     new_insns.append(lp.CallInstruction((),  # assignees
                                                         prim.Call(TransposeReg(vertical=vertical, horizontal=horizontal),
                                                                   tuple(prim.Subscript(prim.Variable(get_vector_view_name(quantity)),
@@ -207,6 +212,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix):
                                                         within_inames_is_final=True,
                                                         id="{}_rotate{}".format(quantity, suffix),
                                                         tags=frozenset({"sumfact_stage2"}),
+                                                        predicates=predicates,
                                                         ))
 
                 # Add substitution rules
@@ -263,7 +269,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix):
                                                       (vector_indices.get(horizontal) + last_index, prim.Variable(vec_iname)),
                                                       ),
                                        substitute(insn.expression, replacemap),
-                                       depends_on=frozenset({"continue_stmt{}".format(suffix)}),
+                                       depends_on=frozenset({"continue_stmt{}".format(suffix), lp.match.Tagged("sumfact_stage1")}),
                                        depends_on_is_final=True,
                                        within_inames=common_inames.union(frozenset({outer_iname, vec_iname})),
                                        within_inames_is_final=True,
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 3fb213c87e98883e4f474c4145090b40aff1eec4..4daeb45a7627f7bda1d83c178e83a20a5362607c 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -281,7 +281,7 @@ def determine_accumulation_space(info, number):
 
 
 def boundary_predicates(expr, measure, subdomain_id):
-    predicates = frozenset([])
+    predicates = []
 
     if subdomain_id not in ['everywhere', 'otherwise']:
         # Get the original form and inspect the present measures
@@ -301,9 +301,18 @@ def boundary_predicates(expr, measure, subdomain_id):
             visitor = get_visitor(measure, subdomain_id)
             subdomain_data = visitor(subdomain_data, do_predicates=True)
 
-        predicates = predicates.union([prim.Comparison(subdomain_data, '==', subdomain_id)])
+        p = prim.Comparison(subdomain_data, '==', subdomain_id)
 
-    return predicates
+        # Try to find conditions that are always 0 or always 1
+        from pymbolic.mapper.evaluator import evaluate
+        try:
+            eval = evaluate(p)
+            if not eval:
+                predicates.append(False)
+        except:
+            predicates.append(p)
+
+    return frozenset(predicates)
 
 
 class PDELabAccumulationInfo(ImmutableRecord):
@@ -392,6 +401,8 @@ def generate_accumulation_instruction(expr, visitor):
     accumvar = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
 
     predicates = boundary_predicates(expr, visitor.measure, visitor.subdomain_id)
+    if False in predicates:
+        return
 
     rank = 1 if ansatz_lfs.lfs is None else 2
 
@@ -755,14 +766,17 @@ def generate_residual_kernels(form, original_form):
         for measure in set(i.integral_type() for i in form.integrals()):
             logger.info("generate_residual_kernels: measure {}".format(measure))
             with global_context(integral_type=measure):
-                enum_pattern()
-                pattern_baseclass()
-                enum_alpha()
-
                 from dune.perftool.pdelab.signatures import assembler_routine_name
                 with global_context(kernel=assembler_routine_name()):
                     kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(form.integrals_by_type(measure))]
 
+                # The integrals might vanish due to unfulfillable boundary conditions.
+                # We only generate the local operator enums/base classes if they did not.
+                if kernel:
+                    enum_pattern()
+                    pattern_baseclass()
+                    enum_alpha()
+
                 # Maybe add numerical differentiation
                 if get_form_option("numerical_jacobian"):
                     # Include headers for numerical methods
@@ -897,10 +911,6 @@ def generate_control_kernels(forms):
         for measure in set(i.integral_type() for form in forms for i in form.integrals()):
             logger.info("generate_control_kernels: measure {}".format(measure))
             with global_context(integral_type=measure):
-                enum_pattern()
-                pattern_baseclass()
-                enum_alpha()
-
                 from dune.perftool.pdelab.signatures import assembler_routine_name
                 with global_context(kernel=assembler_routine_name()):
                     # TODO: Sumfactorization not yet implemented
@@ -910,6 +920,13 @@ def generate_control_kernels(forms):
                     forms_measure = [form.integrals_by_type(measure) for form in forms]
                     kernel = [k for k in control_generate_kernels_per_integral(forms_measure)]
 
+                    # The integrals might vanish due to unfulfillable boundary conditions.
+                    # We only generate the local operator enums/base classes if they did not.
+                    if kernel:
+                        enum_pattern()
+                        pattern_baseclass()
+                        enum_alpha()
+
             operator_kernels[(measure, 'residual')] = kernel
 
         return operator_kernels
diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py
index dff7cfccd8a0282a2b0cf87e8c4a0ea3c709a010..c943d80c8597522a97438ceb3dcffa2991553cb7 100644
--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
@@ -381,9 +381,6 @@ def generate_accumulation_instruction(expr, visitor):
     test_info = visitor.test_info
     trial_info = visitor.trial_info
 
-    # Cache all stage 1 sum factorization kernels used in this expression
-    SumfactCollectMapper()(expr)
-
     # Number of basis functions per direction
     leaf_element = test_info.element
     from ufl import MixedElement
@@ -402,6 +399,11 @@ def generate_accumulation_instruction(expr, visitor):
                                      visitor.measure,
                                      visitor.subdomain_id,
                                      )
+    if False in predicates:
+        return
+
+    # Cache all stage 1 sum factorization kernels used in this expression
+    SumfactCollectMapper()(expr)
 
     insn_dep = None
 
diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py
index 2c5f8a3c3b44e1dd473816464b7856432bdc37b4..12f7115662533b6f279e6e340130de95b64992d9 100644
--- a/python/dune/perftool/sumfact/realization.py
+++ b/python/dune/perftool/sumfact/realization.py
@@ -137,6 +137,9 @@ class BufferSwitcher(object):
 
 
 def realize_sumfact_kernel_function(sf):
+    # Remove anything kernel related from caches
+    delete_cache_items("kernel_default")
+
     # Get a buffer switcher instance
     buffer = BufferSwitcher()
     insn_dep = frozenset()
diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py
index 85601c1e84e44bb15f998e227da0b980e63789d9..435975e41af8c29c0aaf078450f0046dd721137a 100644
--- a/python/dune/perftool/sumfact/symbolic.py
+++ b/python/dune/perftool/sumfact/symbolic.py
@@ -155,7 +155,7 @@ class VectorSumfactKernelOutput(SumfactKernelInterfaceBase):
         outputs = set(self.interfaces)
 
         trial_element, = set(o.trial_element for o in self.interfaces)
-        trial_element_index, = set(o.trial_element_index for o in self.interfaces)
+        trial_element_index = set(o.trial_element_index for o in self.interfaces).pop()
         from dune.perftool.sumfact.accumulation import accum_iname
         element = get_leaf(trial_element, trial_element_index) if trial_element is not None else None
         inames = tuple(accum_iname(element, mat.rows, i)
@@ -368,7 +368,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
     @property
     def parallel_key(self):
         """ A key that identifies parallellizable kernels. """
-        return tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer)
+        return tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer, self.interface.within_inames)
 
     @property
     def cache_key(self):