diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index b7ad0e727a7bccc5d23e07fa002c718754ba5c67..ce55b3597363fa62daaa4239cf51cf95e0cc056a 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -19,6 +19,8 @@ from dune.perftool.cgen.clazz import (AccessModifier,
                                       ClassMember,
                                       )
 from dune.perftool import Restriction
+from pymbolic.primitives import Variable
+from pytools import Record
 
 
 def name_form(formdata, data):
@@ -236,8 +238,156 @@ def assembly_routine_signature(formdata):
     assert False
 
 
+class AccumulationSpace(Record):
+    def __init__(self,
+                 lfs=None,
+                 index=None,
+                 restriction=None,
+                 element=None,
+                 ):
+        Record.__init__(self,
+                        lfs=lfs,
+                        index=index,
+                        restriction=restriction,
+                        element=element,
+                        )
+
+    def get_args(self):
+        if self.lfs is None:
+            return ()
+        else:
+            return (self.lfs, self.index)
+
+    def get_restriction(self):
+        if self.restriction is None:
+            return ()
+        else:
+            return (self.restriction,)
+
+
+def determine_accumulation_space(expr, number):
+    from dune.perftool.ufl.modified_terminals import extract_modified_arguments
+    args = extract_modified_arguments(expr, argnumber=number)
+
+    # If this is a residual term we return a dummy object
+    if len(args) == 0:
+        return AccumulationSpace()
+
+    assert(len(args) == 1)
+    ma, = args
+
+    # Extract information on the finite element
+    from ufl.functionview import select_subelement
+    subel = select_subelement(ma.argexpr.ufl_element(), ma.component)
+
+    # And generate a local function space for it!
+    from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound, lfs_child, lfs_iname
+    lfs = name_lfs(ma.argexpr.ufl_element(), ma.restriction, ma.component)
+    from dune.perftool.generation import valuearg
+    from loopy.types import NumpyType
+    valuearg(lfs, dtype=NumpyType("str"))
+
+    if len(subel.value_shape()) != 0:
+        from dune.perftool.pdelab.geometry import dimension_iname
+        idims = tuple(dimension_iname(context='arg', count=number) for i in range(len(subel.value_shape())))
+        lfs = lfs_child(lfs, idims, shape=subel.value_shape(), symmetry=subel.symmetry)
+        subel = subel.sub_elements()[0]
+
+    lfsi = Variable(lfs_iname(subel, ma.restriction, count=number))
+
+    # If the LFS is not yet a pymbolic expression, make it one
+    from pymbolic.primitives import Expression
+    if not isinstance(lfs, Expression):
+        lfs = Variable(lfs)
+
+    return AccumulationSpace(lfs=lfs,
+                             index=lfsi,
+                             restriction=ma.restriction,
+                             )
+
+
+def boundary_predicates():
+    #     # Maybe wrap this instruction into a condiditional. This mostly happens with mixed boundary conditions
+    #     if self.subdomain_id not in ['everywhere', 'otherwise']:
+    #         # We need to reconstruct the subdomain_data parameter of the measure
+    #         # I am *totally* confused as to why this information is not at hand anyway,
+    #         # but conversation with Martin pointed me to dolfin.fem.assembly where this
+    #         # is done in preprocessing with the limitation of only one possible type of
+    #         # modified measure per integral type.
+    #
+    #         # Get the original form and inspect the present measures
+    #         from dune.perftool.generation import get_global_context_value
+    #         original_form = get_global_context_value("formdata").original_form
+    #
+    #         sd = original_form.subdomain_data()
+    #         assert len(sd) == 1
+    #         subdomains, = list(sd.values())
+    #         domain, = list(sd.keys())
+    #         for k in list(subdomains.keys()):
+    #             if subdomains[k] is None:
+    #                     del subdomains[k]
+    #
+    #         # Finally extract the original subdomain_data (which needs to be present!)
+    #         assert self.measure in subdomains
+    #         subdomain_data = subdomains[self.measure]
+    #
+    #         # Determine the name of the parameter function
+    #         name = get_global_context_value("data").object_names[id(subdomain_data)]
+    #
+    #         # Trigger the generation of code for this thing in the parameter class
+    #         from ufl.checks import is_cellwise_constant
+    #         cellwise_constant = is_cellwise_constant(o)
+    #         from dune.perftool.pdelab.parameter import intersection_parameter_function
+    #         intersection_parameter_function(name, subdomain_data, cellwise_constant, t='int')
+    #
+    #         predicates = predicates.union(['{} == {}'.format(name, self.subdomain_id)])
+    return frozenset([])
+
+
+def generate_accumulation_instruction(visitor, accterm):
+    # First we do the tree traversal to get a pymbolic expression representing this expression
+    pymbolic_expr = visitor(accterm.term)
+
+    # It may happen that an entire accumulation term vanishes. We do nothing in that case
+    if pymbolic_expr == 0:
+        return
+
+    # We also traverse the test function to get its pymbolic equivalent
+    test_expr = visitor(accterm.argument.expr)
+
+    # Combine expression and test function
+    from pymbolic.primitives import Product
+    pymbolic_expr = Product((pymbolic_expr, test_expr))
+
+    # Collect the lfs and lfs indices for the accumulate call
+    test_lfs = determine_accumulation_space(accterm.argument.expr, 0)
+    # In the jacobian case, also determine the space for the ansatz space
+    ansatz_lfs = determine_accumulation_space(accterm.term, 1)
+
+    from dune.perftool.pdelab.argument import name_accumulation_variable
+    accumvar = name_accumulation_variable((ansatz_lfs.get_restriction() + test_lfs.get_restriction()))
+
+    predicates = boundary_predicates()
+
+    rank = 1 if ansatz_lfs.lfs is None else 2
+
+    from dune.perftool.pdelab.argument import PDELabAccumulationFunction
+    from pymbolic.primitives import Call
+    expr = Call(PDELabAccumulationFunction(accumvar, rank),
+                (ansatz_lfs.get_args() + test_lfs.get_args() + (pymbolic_expr,))
+                )
+
+    from dune.perftool.generation import instruction
+    from dune.perftool.pdelab.quadrature import quadrature_iname
+    instruction(assignees=(),
+                expression=expr,
+                forced_iname_deps=frozenset(visitor.inames).union(frozenset({quadrature_iname()})),
+                forced_iname_deps_is_final=True,
+                predicates=predicates
+                )
+
+
 def generate_kernel(integrals):
-    subst_rules = []
     for integral in integrals:
         integrand = integral.integrand()
         measure = integral.integral_type()
@@ -249,9 +399,28 @@ def generate_kernel(integrals):
             from dune.perftool.ufl.transformations.axiparallel import diagonal_jacobian
             integrand = diagonal_jacobian(integrand)
 
+        # Gather dimension indices
         from dune.perftool.ufl.dimensionindex import dimension_index_mapping
         dimension_indices = dimension_index_mapping(integrand)
 
+        # Generate code for the LFS trees present in the form
+        from dune.perftool.ufl.modified_terminals import extract_modified_arguments
+        test_ma = extract_modified_arguments(integrand,)
+        trial_ma = extract_modified_arguments(integrand, coeffcount=0)
+        apply_ma = extract_modified_arguments(integrand, coeffcount=1)
+
+        import itertools
+        for ma in itertools.chain(test_ma, trial_ma, apply_ma):
+            if measure == 'exterior_facet':
+                ma.restriction = Restriction.NEGATIVE
+
+            from dune.perftool.pdelab.spaces import traverse_lfs_tree
+            traverse_lfs_tree(ma)
+
+        from dune.perftool.options import set_option
+        set_option('print_transformations', True)
+        set_option('print_transformations_dir', '.')
+
         # Now split the given integrand into accumulation expressions
         from dune.perftool.ufl.transformations.extract_accumulation_terms import split_into_accumulation_terms
         accterms = split_into_accumulation_terms(integrand)
@@ -262,8 +431,7 @@ def generate_kernel(integrals):
 
         # Iterate over the terms and generate a kernel
         for term in accterms:
-            visitor(term)
-            subst_rules.extend(visitor.substitution_rules)
+            generate_accumulation_instruction(visitor, term)
 
     # Extract the information, which is needed to create a loopy kernel.
     # First extracting it, might be useful to alter it before kernel generation.
@@ -282,7 +450,7 @@ def generate_kernel(integrals):
     # Create the kernel
     from loopy import make_kernel, preprocess_kernel
     kernel = make_kernel(domains,
-                         instructions + subst_rules,
+                         instructions,
                          arguments,
                          temporary_variables=temporaries,
                          function_manglers=manglers,
diff --git a/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py b/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py
index 94413d1a6fdbcc3abf1d5139e851454426e6b6f3..98922bfad056998676a8a7bd28d68385ff50133e 100644
--- a/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py
+++ b/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py
@@ -1,56 +1,63 @@
 """
 This module defines an UFL transformation, that takes a UFL expression
-and transforms it into a sum of terms that all contain the correct number
-of test function terms (which is equal to the form rank).
+and transforms it into a sum of accumulation terms.
 """
-from __future__ import absolute_import
-
 from dune.perftool.ufl.modified_terminals import extract_modified_arguments
 from dune.perftool.ufl.transformations import ufl_transformation
 from dune.perftool.ufl.transformations.replace import replace_expression
+from dune.perftool.ufl.transformations.identitypropagation import identity_propagation
 
-from ufl.algorithms import MultiFunction
-from ufl.classes import Zero
+from ufl.classes import Zero, Identity, Indexed, IntValue, MultiIndex
+from ufl.core.multiindex import indices
 
-import itertools
+from pytools import Record
 
 
-class _ReplacementDict(dict):
-    def __init__(self, good=[], bad=[]):
-        dict.__init__(self)
-        for a in bad:
-            self[a] = Zero(shape=a.ufl_shape, free_indices=a.ufl_free_indices, index_dimensions=a.ufl_index_dimensions)
-        for a in good:
-            self[a] = a
+class AccumulationTerm(Record):
+    def __init__(self, term, argument):
+        Record.__init__(self, term=term, argument=argument)
 
 
-@ufl_transformation(name="accterms2", extraction_lambda=lambda l: l)
+@ufl_transformation(name="accterms", extraction_lambda=lambda l: [at.term for at in l])
 def split_into_accumulation_terms(expr):
-    mod_args = extract_modified_arguments(expr)
-
-    accumulation_terms = []
-
-    # Treat the case of a rank 1 form:
-    if len(list(filter(lambda ma: ma.argexpr.number() == 1, mod_args))) == 0:
-        for arg in mod_args:
-            # Do the replacement on the expression
-            accumulation_terms.append(replace_expression(expr,
-                                                         replacemap=_ReplacementDict(good=(arg.expr,),
-                                                                                     bad=[ma.expr for ma in mod_args],
-                                                                                     )
-                                                         )
-                                      )
-    # and now the case of a rank 2 form:
-    else:
-        for arg1, arg2 in itertools.product(filter(lambda ma: ma.argexpr.number() == 0, mod_args),
-                                            filter(lambda ma: ma.argexpr.number() == 1, mod_args)
-                                            ):
-            accumulation_terms.append(replace_expression(expr,
-                                                         replacemap=_ReplacementDict(good=(arg1.expr, arg2.expr),
-                                                                                     bad=[ma.expr for ma in mod_args],
-                                                                                     )
-                                                         )
-                                      )
-
-    # and return the result
-    return accumulation_terms
+    ret = []
+
+    # Extract a list of modified terminals for the test function
+    # One accumulation instruction will be generated for each of these.
+    test_args = extract_modified_arguments(expr, argnumber=0)
+
+    # Extract a list of modified terminals for the ansatz function
+    # in jacobian forms. Only the restriction of those terminals will
+    # be used to generate new accumulation terms!
+    all_jacobian_args = extract_modified_arguments(expr, argnumber=1)
+
+    for test_arg in test_args:
+        # Do this as a multi step replacement procedure to avoid UFL nagging
+        # about too much stuff in reconstructing the expressions
+
+        # 1) We first cut the expression to the relevant modified test_function
+        # Build a replacement dictionary
+        replacement = {ma.expr: Zero() for ma in test_args}
+        replacement[test_arg.expr] = test_arg.expr
+        replace_expr = replace_expression(expr, replacemap=replacement)
+
+        # 2) Cut the test function itself from the expression
+        if test_arg.reference_grad:
+            newi = indices(1)
+            replacement = {test_arg.expr: Indexed(Identity(2), MultiIndex(newi + test_arg.index._indices))}
+        else:
+            replacement = {test_arg.expr: IntValue(1)}
+        replace_expr = replace_expression(replace_expr, replacemap=replacement)
+
+        # 3) Collapse any identity nodes that may have been introduced by replacing vectors
+        replace_expr = identity_propagation(replace_expr)
+
+        # 4) Further split according to trial function in jacobian terms
+        if all_jacobian_args:
+            for jac_arg in all_jacobian_args:
+                pass
+        else:
+            if not isinstance(replace_expr, Zero):
+                ret.append(AccumulationTerm(replace_expr, test_arg))
+
+    return ret
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index ccc6cb24091e6b48b53d0a86292e34e93dd42f23..9860add42c88d2723b6c4338a062aa5572fa76a8 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -44,128 +44,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
         self.argshape = 0
         self.transpose_necessary = False
         self.inames = []
-        self.substitution_rules = []
-
-        # Initialize the local function spaces that we might need for this term
-        # We therefore gather a list of modified trial functions too.
-        from dune.perftool.ufl.modified_terminals import extract_modified_arguments
-        test_ma = extract_modified_arguments(o,)
-        trial_ma = extract_modified_arguments(o, coeffcount=0)
-        apply_ma = extract_modified_arguments(o, coeffcount=1)
-
-        # All test and trial functions on boundary integrals are technically restricted
-        import itertools
-        for ma in itertools.chain(test_ma, trial_ma, apply_ma):
-            if self.measure == 'exterior_facet':
-                ma.restriction = Restriction.NEGATIVE
-
-            from dune.perftool.pdelab.spaces import traverse_lfs_tree
-            traverse_lfs_tree(ma)
-
-        # Determine the rank of the term
-        self.rank = len(test_ma)
-
-        # First we do the tree traversal to get a pymbolic expression representing this expression
-        pymbolic_expr = self.call(o)
-
-        # It may happen that an entire accumulation term vanishes. We do nothing in that case
-        if pymbolic_expr == 0:
-            return
-
-        # Collect the arguments for the accumulate function
-        accumargs = [None] * (2 * len(test_ma))
-        residual_shape = [None] * len(test_ma)
-        arg_restr = [None] * len(test_ma)
-
-        for ma in test_ma:
-            count = ma.argexpr.number()
-            if len(test_ma) == 2:
-                icount = (count + 1) % 2
-            else:
-                icount = 0
-
-            # Extract the subelement of the (mixed) finite element
-            from ufl.functionview import select_subelement
-            subel = select_subelement(ma.argexpr.ufl_element(), ma.component)
-
-            # And generate a local function space for it!
-            lfs = name_lfs(ma.argexpr.ufl_element(), ma.restriction, ma.component)
-            from dune.perftool.generation import valuearg
-            from loopy.types import NumpyType
-            valuearg(lfs, dtype=NumpyType("str"))
-
-            if len(subel.value_shape()) != 0:
-                from dune.perftool.pdelab.geometry import dimension_iname
-                from dune.perftool.pdelab.basis import lfs_child
-                idims = tuple(dimension_iname(context='arg', count=i) for i in range(len(subel.value_shape())))
-                lfs = lfs_child(lfs, idims, shape=subel.value_shape(), symmetry=subel.symmetry)
-                residual_shape[icount] = name_lfs_bound(name_leaf_lfs(subel.sub_elements()[0], ma.restriction))
-                subel = subel.sub_elements()[0]
-            else:
-                residual_shape[icount] = name_lfs_bound(lfs)
-
-            lfsi = lfs_iname(subel, ma.restriction, count=count)
-
-            # If the LFS is not yet a pymbolic expression, make it one
-            from pymbolic.primitives import Expression
-            if not isinstance(lfs, Expression):
-                lfs = Variable(lfs)
-
-            accumargs[2 * icount] = lfs
-            accumargs[2 * icount + 1] = Variable(lfsi)
-
-            arg_restr[icount] = ma.restriction
-
-        from dune.perftool.pdelab.argument import name_accumulation_variable
-        accumvar = name_accumulation_variable(arg_restr)
-
-        predicates = frozenset({})
-
-        # Maybe wrap this instruction into a condiditional. This mostly happens with mixed boundary conditions
-        if self.subdomain_id not in ['everywhere', 'otherwise']:
-            # We need to reconstruct the subdomain_data parameter of the measure
-            # I am *totally* confused as to why this information is not at hand anyway,
-            # but conversation with Martin pointed me to dolfin.fem.assembly where this
-            # is done in preprocessing with the limitation of only one possible type of
-            # modified measure per integral type.
-
-            # Get the original form and inspect the present measures
-            from dune.perftool.generation import get_global_context_value
-            original_form = get_global_context_value("formdata").original_form
-
-            sd = original_form.subdomain_data()
-            assert len(sd) == 1
-            subdomains, = list(sd.values())
-            domain, = list(sd.keys())
-            for k in list(subdomains.keys()):
-                if subdomains[k] is None:
-                    del subdomains[k]
-
-            # Finally extract the original subdomain_data (which needs to be present!)
-            assert self.measure in subdomains
-            subdomain_data = subdomains[self.measure]
-
-            # Determine the name of the parameter function
-            name = get_global_context_value("data").object_names[id(subdomain_data)]
-
-            # Trigger the generation of code for this thing in the parameter class
-            from ufl.checks import is_cellwise_constant
-            cellwise_constant = is_cellwise_constant(o)
-            from dune.perftool.pdelab.parameter import intersection_parameter_function
-            intersection_parameter_function(name, subdomain_data, cellwise_constant, t='int')
-
-            predicates = predicates.union(['{} == {}'.format(name, self.subdomain_id)])
-
-        from dune.perftool.pdelab.argument import PDELabAccumulationFunction
-        from pymbolic.primitives import Call
-        expr = Call(PDELabAccumulationFunction(accumvar, len(test_ma)), tuple(a for a in accumargs) + (pymbolic_expr,))
 
-        instruction(assignees=(),
-                    expression=expr,
-                    forced_iname_deps=frozenset(self.inames).union(frozenset({quadrature_iname()})),
-                    forced_iname_deps_is_final=True,
-                    predicates=predicates
-                    )
+        return self.call(o)
 
     #
     # Form argument/coefficients handlers: