diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py
index 167de058f85b773978d4d6cef101101df72bf840..0ecf0134581909c47bcd8f159bb577d2763ffe29 100644
--- a/python/dune/perftool/loopy/transformations/collect_rotate.py
+++ b/python/dune/perftool/loopy/transformations/collect_rotate.py
@@ -241,7 +241,7 @@ def collect_vector_data_rotate(knl):
             tags = set(get_pymbolic_tag(expr) for expr in quantity_exprs)
             if tags and tags.pop() == "operator_precomputed":
                 expr, = quantity_exprs
-                shape=(ceildiv(product(s for s in arg.shape), vec_size), vec_size)
+                shape = (ceildiv(product(s for s in arg.shape), vec_size), vec_size)
                 name = loopy_class_member(quantity,
                                           shape=shape,
                                           dim_tags="f,vec",
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 4e7227e12b1925ee59588023685471460046c3e6..d27275516494af74799a6397b89a6f9dc9ec770e 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -281,7 +281,7 @@ def boundary_predicates(expr, measure, subdomain_id, visitor):
 
         from ufl.classes import Expr
         if isinstance(subdomain_data, Expr):
-            cond = visitor(subdomain_data)
+            cond = visitor(subdomain_data, do_predicates=True)
         else:
             # Determine the name of the parameter function
             cond = get_global_context_value("data").object_names[id(subdomain_data)]
diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py
index 36d18d54abc1d6bc3c5fa9b19f7cce2142662c29..beb96fd9e6e7679920a96c006ea7bfe5e6737e7b 100644
--- a/python/dune/perftool/sumfact/__init__.py
+++ b/python/dune/perftool/sumfact/__init__.py
@@ -59,6 +59,6 @@ class SumFactInterface(PDELabInterface):
 
     def pymbolic_spatial_coordinate(self):
         import dune.perftool.sumfact.geometry
-        ret, indices = get_backend(interface="spatial_coordinate", selector=option_switch("diagonal_transformation_matrix"))(self.visitor.indices)
+        ret, indices = get_backend(interface="spatial_coordinate", selector=option_switch("diagonal_transformation_matrix"))(self.visitor.indices, self.visitor.do_predicates)
         self.visitor.indices = indices
         return ret
diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py
index afced02f64c4a0a772d94c983c1941f8189777f1..db18095b209d5f4a3d42bf8cf38b92536bc4b819 100644
--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
@@ -77,6 +77,11 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
     # When doing sum factorization we want to split the test function
     assert(accterm.argument.expr is not None)
 
+    # For vector valued functions the first index is the dimension index
+    coeff_func_index = None
+    if isinstance(accterm.argument.expr, uc.Indexed):
+        coeff_func_index = accterm.argument.expr.ufl_operands[1].indices()[0]._value
+
     # Do the tree traversal to get a pymbolic expression representing this expression
     pymbolic_expr = visitor(accterm.term)
     if pymbolic_expr == 0:
@@ -84,30 +89,37 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
 
     dim = world_dimension()
     mod_arg_expr = accterm.argument.expr
-    from ufl.classes import FunctionView, Argument
-    while (not isinstance(mod_arg_expr, FunctionView)) and (not isinstance(mod_arg_expr, Argument)):
+    while (not isinstance(mod_arg_expr, uc.FunctionView)) and (not isinstance(mod_arg_expr, uc.Argument)):
         mod_arg_expr = mod_arg_expr.ufl_operands[0]
+
     degree = mod_arg_expr.ufl_element()._degree
     basis_size = degree + 1
 
-    # Extract index information
+    # The gradient index is the last
     grad_index = None
     if accterm.argument.reference_grad:
-        grad_index = accterm.argument.expr.ufl_operands[1][0]._value
+        grad_index = accterm.argument.expr.ufl_operands[1][-1]._value
 
+    # Could be 0 so compare to None
     accum_index = None
-    if visitor.indices:
-        accum_index = visitor.indices[0]
+    if coeff_func_index is not None:
+        accum_index = coeff_func_index
     if accterm.argument.index and len(accterm.argument.index) > 1:
-        accum_index = accterm.argument.index[1]._value
+        accum_index = accterm.argument.index[0]._value
+    if isinstance(accum_index, int):
+        accum_index = (accum_index,)
 
     jacobian_inames = tuple()
     if accterm.is_jacobian:
         jacobian_inames = visitor.inames
 
+    # Only accumulate boundary conditions on parts where it is defined
+    from dune.perftool.pdelab.localoperator import boundary_predicates
+    predicates = boundary_predicates(accterm.term, measure, subdomain_id, visitor)
+
     def emit_sumfact_kernel(restriction, insn_dep):
-        test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure, idims=(accum_index,))
-        ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure, idims=(accum_index,))
+        test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure, idims=accum_index)
+        ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure, idims=accum_index)
         accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
 
         # Construct the matrix sequence for this sum factorization
@@ -126,6 +138,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                            accumvar=accum,
                            within_inames=jacobian_inames,
                            input=AlreadyAssembledInput(index=accum_index),
+                           coeff_func_index=coeff_func_index,
                            )
 
         from dune.perftool.sumfact.vectorization import attach_vectorization_info
@@ -152,7 +165,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                         expression=0,
                         forced_iname_deps=frozenset(quadrature_inames() + jacobian_inames),
                         forced_iname_deps_is_final=True,
-                        tags=frozenset(["quadvec", "gradvec"])
+                        tags=frozenset(["quadvec", "gradvec"]),
                         )
 
         # Write timing stuff for jacobian (for alpha methods it is done at the end of stage 1)
@@ -180,7 +193,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                                   forced_iname_deps=frozenset(quadrature_inames() + jacobian_inames),
                                   forced_iname_deps_is_final=True,
                                   tags=frozenset({"quadvec"}).union(vectag),
-                                  depends_on=frozenset({deps}).union(timer_dep).union(frozenset({lp.match.Tagged("sumfact_stage1")}))
+                                  depends_on=frozenset({deps}).union(timer_dep).union(frozenset({lp.match.Tagged("sumfact_stage1")})),
                                   )
 
         if insn_dep is None:
@@ -241,6 +254,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                         forced_iname_deps=frozenset(inames + vecinames + jacobian_inames),
                         forced_iname_deps_is_final=True,
                         depends_on=insn_dep,
+                        predicates=predicates
                         )
 
         # Mark the transformation that moves the quadrature loop
diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py
index f50c478a804bc22911593d7c235f682d5b5c22a4..4b3eba3928efe40cb1d4fd335bde7f578cbdef24 100644
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -150,7 +150,7 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v
         coeff_func_index = None
         grad_index, = visitor_indices
     else:
-        grad_index, coeff_func_index = visitor_indices
+        coeff_func_index, grad_index = visitor_indices
 
     # Construct the matrix sequence for this sum factorization
     matrix_sequence = construct_basis_matrix_sequence(derivative=grad_index,
@@ -183,7 +183,6 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v
 
 @kernel_cached
 def pymbolic_coefficient(element, restriction, component, coeff_func, visitor_indices):
-    assert visitor_indices is None
     # Basis functions per direction
     basis_size = _basis_functions_per_direction(element, component)
 
@@ -192,7 +191,13 @@ def pymbolic_coefficient(element, restriction, component, coeff_func, visitor_in
                                                       facemod=get_facemod(restriction),
                                                       basis_size=basis_size)
 
+    coeff_func_index = None
+    if visitor_indices:
+        assert len(visitor_indices) == 1
+        coeff_func_index = visitor_indices[0]
+
     inp = LFSSumfactKernelInput(coeff_func=coeff_func,
+                                coeff_func_index=coeff_func_index,
                                 element=element,
                                 component=component,
                                 restriction=restriction,
diff --git a/python/dune/perftool/sumfact/geometry.py b/python/dune/perftool/sumfact/geometry.py
index fce0ec9b8b403ed2b5439f70b418421823ff1804..c991aeaa8113f218242c6d07e9f0b41479ce5acc 100644
--- a/python/dune/perftool/sumfact/geometry.py
+++ b/python/dune/perftool/sumfact/geometry.py
@@ -65,9 +65,10 @@ class GeoCornersInput(SumfactKernelInputBase, ImmutableRecord):
                     tags=frozenset({"sumfact_stage{}".format(sf.stage)}),
                     )
 
+
 @kernel_cached
 @backend(interface="spatial_coordinate", name="default")
-def pymbolic_spatial_coordinate_multilinear(visitor_indices):
+def pymbolic_spatial_coordinate_multilinear(visitor_indices, do_predicates):
     assert len(visitor_indices) == 1
 
     # Construct the matrix sequence for the evaluation of the global coordinate.
@@ -126,7 +127,7 @@ def name_meshwidth():
 
 @kernel_cached
 @backend(interface="spatial_coordinate", name="diagonal_transformation_matrix")
-def pymbolic_spatial_coordinate_axiparallel(visitor_indices):
+def pymbolic_spatial_coordinate_axiparallel(visitor_indices, do_predicates):
     assert len(visitor_indices) == 1
     index, = visitor_indices
 
@@ -142,7 +143,13 @@ def pymbolic_spatial_coordinate_axiparallel(visitor_indices):
     lowcorner = name_lowerleft_corner()
     meshwidth = name_meshwidth()
 
-    if index == face:
+    # If we have to decide which boundary condition to take for this
+    # intersection we always take the boundary condition of the center
+    # of the intersection. We assume that there are no cells with more
+    # than one boundary condition.
+    if do_predicates:
+        x = 0.5
+    elif index == face:
         x = 0
     else:
         iindex = index
diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py
index 9a3011cba1b3223e5af7b081a89cecd9d6326549..44ef5ecb5e2cccc3e8fde593f10ac66abc3448f5 100644
--- a/python/dune/perftool/sumfact/symbolic.py
+++ b/python/dune/perftool/sumfact/symbolic.py
@@ -38,6 +38,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
                  insn_dep=frozenset(),
                  input=None,
                  accumvar=None,
+                 coeff_func_index=None,
                  ):
         """Create a sum factorization kernel
 
@@ -91,23 +92,18 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
             pre-initialized with the input or you have to provide
             direct_input (FastDGGridOperator).
         stage: 1 or 3
-        insn_dep: An instruction ID that the first issued instruction
-            should depend upon. All following ones will depend on each
-            other.
-        within_inames: Instructions will be executed within those
-            inames (needed for stage 3 in jacobians).
         preferred_position: Will be used in the dry run to order kernels
             when doing vectorization e.g. (dx u,dy u,dz u, u).
         restriction: Restriction for faces values.
-        padding: a set of slots in the input temporary to be padded
-        index: The slot in the input temporary dedicated to this kernel
-        coeff_func: The function to call to get the input coefficient
+        within_inames: Instructions will be executed within those
+            inames (needed for stage 3 in jacobians).
+        insn_dep: An instruction ID that the first issued instruction
+            should depend upon. All following ones will depend on each
+            other.
+        input: An SumfactKernelInputBase instance describing the input of the kernel
+        accumvar: The accumulation variable to accumulate into
         coeff_func_index: Index to get the right input coefficient
             (needed for systems of PDEs)
-        element: The UFL element
-        component: The treepath to the correct component of above element
-        accumvar: The accumulation variable to accumulate into
-        input: An SumfactKernelInputBase instance describing the input of the kernel
         """
         # Assert the inputs!
         assert isinstance(matrix_sequence, tuple)
@@ -162,7 +158,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
         Any two sum factorization kernels having the same cache_key
         are realized simulatenously!
         """
-        return (self.matrix_sequence, self.restriction, self.stage, self.buffer)
+        return (self.matrix_sequence, self.restriction, self.stage, self.buffer, self.coeff_func_index)
 
     @property
     def input_key(self):
diff --git a/python/dune/perftool/ufl/modified_terminals.py b/python/dune/perftool/ufl/modified_terminals.py
index 322e31991421689fcaf0d6e03d4b462d997ca4a1..f9d2ef7d5d77600c5312dba0959e51267861ea1c 100644
--- a/python/dune/perftool/ufl/modified_terminals.py
+++ b/python/dune/perftool/ufl/modified_terminals.py
@@ -42,6 +42,7 @@ class ModifiedArgument(Record):
         else:
             return self.index
 
+
 class ModifiedTerminalTracker(MultiFunction):
     """ A multifunction base that defines handler for
     grad, reference_grad, positive_restricted and negative_restricted.
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index 9ec9afd1bc65990126f9c225335d975a305733c5..c95b753715392a1d7ea1a587678af474cb08ba6b 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -44,12 +44,12 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
         # Call base class constructors
         super(UFL2LoopyVisitor, self).__init__()
 
-    def __call__(self, o):
+    def __call__(self, o, do_predicates=False):
         # Reset some state variables that are reinitialized for each accumulation term
         self.indices = None
         self._indices_backup = []
         self.inames = ()
-
+        self.do_predicates = do_predicates
         return self.call(o)
 
     call = MultiFunction.__call__
diff --git a/test/stokes/stokes_dg_quadrilateral.ufl b/test/stokes/stokes_dg_quadrilateral.ufl
index e3c63e85a3ace9ae58545f9839147e752429ce7d..0995ec6ece110d49fc6ca99d0c05ab79e8143229 100644
--- a/test/stokes/stokes_dg_quadrilateral.ufl
+++ b/test/stokes/stokes_dg_quadrilateral.ufl
@@ -20,18 +20,18 @@ eps = -1.0
 sigma = 1.0
 
 r = inner(grad(u), grad(v))*dx \
+  - p*div(v)*dx \
+  - q*div(u)*dx \
   + inner(avg(grad(u))*n, jump(v))*dS \
-  - eps * inner(avg(grad(v))*n, jump(u))*dS \
-  - inner(grad(u)*n, v)*ds \
-  + eps * inner(grad(v)*n, u-g_v)*ds \
   + sigma * inner(jump(u), jump(v))*dS \
-  + sigma * inner(u-g_v, v)*ds \
-  - p*div(v)*dx \
+  - eps * inner(avg(grad(v))*n, jump(u))*dS \
   - avg(p)*inner(jump(v), n)*dS \
-  + p*inner(v, n)*ds \
-  - q*div(u)*dx \
   - avg(q)*inner(jump(u), n)*dS \
+  - inner(grad(u)*n, v)*ds \
+  + p*inner(v, n)*ds \
   + q*inner(u, n)*ds \
+  + eps * inner(grad(v)*n, u-g_v)*ds \
+  + sigma * inner(u-g_v, v)*ds \
   - q*inner(g_v, n)*ds
 
 forms = [r]
diff --git a/test/sumfact/stokes/stokes_dg.ufl b/test/sumfact/stokes/stokes_dg.ufl
index 9d498628e252d340f0386a14ed8c64353ce72998..0995ec6ece110d49fc6ca99d0c05ab79e8143229 100644
--- a/test/sumfact/stokes/stokes_dg.ufl
+++ b/test/sumfact/stokes/stokes_dg.ufl
@@ -19,39 +19,19 @@ n = FacetNormal(cell)('+')
 eps = -1.0
 sigma = 1.0
 
-# r = inner(grad(u), grad(v))*dx \
-#   + inner(avg(grad(u))*n, jump(v))*dS \
-#   - eps * inner(avg(grad(v))*n, jump(u))*dS \
-#   - inner(grad(u)*n, v)*ds \
-#   + eps * inner(grad(v)*n, u-g_v)*ds \
-#   + sigma * inner(jump(u), jump(v))*dS \
-#   + sigma * inner(u-g_v, v)*ds \
-#   - p*div(v)*dx \
-#   - avg(p)*inner(jump(v), n)*dS \
-#   + p*inner(v, n)*ds \
-#   - q*div(u)*dx \
-#   - avg(q)*inner(jump(u), n)*dS \
-#   + q*inner(u, n)*ds \
-#   - q*inner(g_v, n)*ds
-
-# r = inner(grad(u), grad(v))*dx \
-#   + eps * inner(grad(v)*n, u-g_v)*ds \
-#   - p*div(v)*dx \
-#   - q*div(u)*dx \
-#   - inner(grad(u)*n, v)*ds \
-#   + sigma * inner(u-g_v, v)*ds \
-#   - avg(q)*inner(jump(u), n)*dS \
-#   - eps * inner(avg(grad(v))*n, jump(u))*dS \
-#   + q*inner(u, n)*ds \
-#   - q*inner(g_v, n)*ds
-# # + p*inner(v, n)*ds \
-# # + inner(avg(grad(u))*n, jump(v))*dS \
-# # + sigma * inner(jump(u), jump(v))*dS \
-# # - avg(p)*inner(jump(v), n)*dS \
-# # + p*inner(v, n)*ds \
-
-r = q*inner(u, n)*ds \
-  + p*inner(v, n)*ds
-
+r = inner(grad(u), grad(v))*dx \
+  - p*div(v)*dx \
+  - q*div(u)*dx \
+  + inner(avg(grad(u))*n, jump(v))*dS \
+  + sigma * inner(jump(u), jump(v))*dS \
+  - eps * inner(avg(grad(v))*n, jump(u))*dS \
+  - avg(p)*inner(jump(v), n)*dS \
+  - avg(q)*inner(jump(u), n)*dS \
+  - inner(grad(u)*n, v)*ds \
+  + p*inner(v, n)*ds \
+  + q*inner(u, n)*ds \
+  + eps * inner(grad(v)*n, u-g_v)*ds \
+  + sigma * inner(u-g_v, v)*ds \
+  - q*inner(g_v, n)*ds
 
 forms = [r]