diff --git a/python/dune/perftool/interactive.py b/python/dune/perftool/interactive.py
index 91548045cadbff5ddbb34867ae17309ef5c0c119..e4ea980470bb67fb3616c101e9e533b90333fcae 100644
--- a/python/dune/perftool/interactive.py
+++ b/python/dune/perftool/interactive.py
@@ -15,7 +15,7 @@ except:
 
 
 def clear():
-    os.system('cls' if os.name == 'nt' else 'clear')
+    system('cls' if os.name == 'nt' else 'clear')
 
 
 def kernel_name(v):
diff --git a/python/dune/perftool/pdelab/argument.py b/python/dune/perftool/pdelab/argument.py
index d62f884dda2db26ce6d7f7c32c60c8136dc9b6d5..a55a485c28a8f358824efa6644588b314d091157 100644
--- a/python/dune/perftool/pdelab/argument.py
+++ b/python/dune/perftool/pdelab/argument.py
@@ -14,7 +14,6 @@ from dune.perftool.generation import (domain,
                                       valuearg,
                                       get_global_context_value
                                       )
-from dune.perftool.ufl.modified_terminals import ModifiedArgumentDescriptor
 from dune.perftool.pdelab import (name_index,
                                   restricted_name,
                                   )
diff --git a/python/dune/perftool/pdelab/driver.py b/python/dune/perftool/pdelab/driver.py
index d0b403633da5c79610b261be51a8242446f44a73..303425bcd794f5d2c12d3d07c9dd4b784af6b31f 100644
--- a/python/dune/perftool/pdelab/driver.py
+++ b/python/dune/perftool/pdelab/driver.py
@@ -1293,7 +1293,7 @@ def compare_L2_squared():
     accum_error = name_accumulated_L2_error()
     fail = name_test_fail_variable()
     return ["std::cout << \"l2errorsquared: \" << {} << std::endl;".format(accum_error),
-            "if ({}>{})".format(accum_error, get_option("compare_l2errorsquared")),
+            "if (std::isnan({0}) or std::abs({0})>{1})".format(accum_error, get_option("compare_l2errorsquared")),
             "  {} = true;".format(fail)]
 
 
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 2c49b7e3c5a5d531938e15ee2c03e364cb617266..2dfbe9be93206b1bf527f83ddefaf84c6dc44d32 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -6,8 +6,10 @@ from dune.perftool.generation import (base_class,
                                       class_basename,
                                       class_member,
                                       constructor_parameter,
+                                      domain,
                                       dump_accumulate_timer,
                                       global_context,
+                                      iname,
                                       include_file,
                                       initializer_list,
                                       post_include,
@@ -19,6 +21,8 @@ from dune.perftool.cgen.clazz import (AccessModifier,
                                       ClassMember,
                                       )
 from dune.perftool import Restriction
+from pymbolic.primitives import Variable
+from pytools import Record
 
 
 def name_form(formdata, data):
@@ -236,8 +240,180 @@ def assembly_routine_signature(formdata):
     assert False
 
 
+class AccumulationSpace(Record):
+    def __init__(self,
+                 lfs=None,
+                 index=None,
+                 restriction=None,
+                 element=None,
+                 ):
+        Record.__init__(self,
+                        lfs=lfs,
+                        index=index,
+                        restriction=restriction,
+                        element=element,
+                        )
+
+    def get_args(self):
+        if self.lfs is None:
+            return ()
+        else:
+            return (self.lfs, self.index)
+
+    def get_restriction(self):
+        if self.restriction is None:
+            return ()
+        else:
+            return (self.restriction,)
+
+
+def determine_accumulation_space(expr, number, measure):
+    from dune.perftool.ufl.modified_terminals import extract_modified_arguments
+    args = extract_modified_arguments(expr, argnumber=number)
+
+    if measure == 'exterior_facet':
+        for ma in args:
+            ma.restriction = Restriction.NEGATIVE
+
+    # If this is a residual term we return a dummy object
+    if len(args) == 0:
+        return AccumulationSpace()
+
+    # There should be but one modified argument, as the splitting eliminated all others.
+    assert(len(args) == 1)
+    ma, = args
+
+    # Extract information on the finite element
+    from ufl.functionview import select_subelement
+    subel = select_subelement(ma.argexpr.ufl_element(), ma.component)
+
+    # And generate a local function space for it!
+    from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound, lfs_child, lfs_iname
+    lfs = name_lfs(ma.argexpr.ufl_element(), ma.restriction, ma.component)
+    from dune.perftool.generation import valuearg
+    from loopy.types import NumpyType
+    valuearg(lfs, dtype=NumpyType("str"))
+
+    if len(subel.value_shape()) != 0:
+        from dune.perftool.pdelab.geometry import dimension_iname
+        idims = tuple(dimension_iname(context='arg', count=i) for i in range(len(subel.value_shape())))
+        lfs = lfs_child(lfs, idims, shape=subel.value_shape(), symmetry=subel.symmetry)
+        subel = subel.sub_elements()[0]
+
+    lfsi = Variable(lfs_iname(subel, ma.restriction, count=number))
+
+    # If the LFS is not yet a pymbolic expression, make it one
+    from pymbolic.primitives import Expression
+    if not isinstance(lfs, Expression):
+        lfs = Variable(lfs)
+
+    return AccumulationSpace(lfs=lfs,
+                             index=lfsi,
+                             restriction=ma.restriction,
+                             )
+
+
+def boundary_predicates(expr, measure, subdomain_id):
+    predicates = frozenset([])
+
+    if subdomain_id not in ['everywhere', 'otherwise']:
+        # We need to reconstruct the subdomain_data parameter of the measure
+        # I am *totally* confused as to why this information is not at hand anyway,
+        # but conversation with Martin pointed me to dolfin.fem.assembly where this
+        # is done in preprocessing with the limitation of only one possible type of
+        # modified measure per integral type.
+
+        # Get the original form and inspect the present measures
+        from dune.perftool.generation import get_global_context_value
+        original_form = get_global_context_value("formdata").original_form
+
+        sd = original_form.subdomain_data()
+        assert len(sd) == 1
+        subdomains, = list(sd.values())
+        domain, = list(sd.keys())
+        for k in list(subdomains.keys()):
+            if subdomains[k] is None:
+                    del subdomains[k]
+
+        # Finally extract the original subdomain_data (which needs to be present!)
+        assert measure in subdomains
+        subdomain_data = subdomains[measure]
+
+        # Determine the name of the parameter function
+        name = get_global_context_value("data").object_names[id(subdomain_data)]
+
+        # Trigger the generation of code for this thing in the parameter class
+        from ufl.checks import is_cellwise_constant
+        cellwise_constant = is_cellwise_constant(expr)
+        from dune.perftool.pdelab.parameter import intersection_parameter_function
+        intersection_parameter_function(name, subdomain_data, cellwise_constant, t='int')
+
+        predicates = predicates.union(['{} == {}'.format(name, subdomain_id)])
+
+    return predicates
+
+
+@iname
+def grad_iname(index, dim):
+    from dune.perftool.pdelab import name_index
+    name = name_index(index)
+    domain(name, dim)
+    return name
+
+
+def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
+    # First we do the tree traversal to get a pymbolic expression representing this expression
+    pymbolic_expr = visitor(accterm.term)
+
+    # If this is a gradient, we generate an iname
+    additional_inames = frozenset({})
+    if accterm.argument.index:
+        from ufl.domain import find_geometric_dimension
+        dim = find_geometric_dimension(accterm.argument.expr)
+        for i in accterm.argument.index._indices:
+            if i not in visitor.dimension_indices:
+                additional_inames = additional_inames.union(frozenset({grad_iname(i, dim)}))
+
+    # It may happen that an entire accumulation term vanishes. We do nothing in that case
+    if pymbolic_expr == 0:
+        return
+
+    # We also traverse the test function to get its pymbolic equivalent
+    test_expr = visitor(accterm.argument.expr)
+
+    # Combine expression and test function
+    from pymbolic.primitives import Product
+    pymbolic_expr = Product((pymbolic_expr, test_expr))
+
+    # Collect the lfs and lfs indices for the accumulate call
+    test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure)
+    # In the jacobian case, also determine the space for the ansatz space
+    ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure)
+
+    from dune.perftool.pdelab.argument import name_accumulation_variable
+    accumvar = name_accumulation_variable((ansatz_lfs.get_restriction() + test_lfs.get_restriction()))
+
+    predicates = boundary_predicates(accterm.term, measure, subdomain_id)
+
+    rank = 1 if ansatz_lfs.lfs is None else 2
+
+    from dune.perftool.pdelab.argument import PDELabAccumulationFunction
+    from pymbolic.primitives import Call
+    expr = Call(PDELabAccumulationFunction(accumvar, rank),
+                (ansatz_lfs.get_args() + test_lfs.get_args() + (pymbolic_expr,))
+                )
+
+    from dune.perftool.generation import instruction
+    from dune.perftool.pdelab.quadrature import quadrature_iname
+    instruction(assignees=(),
+                expression=expr,
+                forced_iname_deps=additional_inames.union(frozenset(visitor.inames).union(frozenset({quadrature_iname()}))),
+                forced_iname_deps_is_final=True,
+                predicates=predicates
+                )
+
+
 def generate_kernel(integrals):
-    subst_rules = []
     for integral in integrals:
         integrand = integral.integrand()
         measure = integral.integral_type()
@@ -249,21 +425,41 @@ def generate_kernel(integrals):
             from dune.perftool.ufl.transformations.axiparallel import diagonal_jacobian
             integrand = diagonal_jacobian(integrand)
 
+        # Gather dimension indices
         from dune.perftool.ufl.dimensionindex import dimension_index_mapping
         dimension_indices = dimension_index_mapping(integrand)
 
+        # Generate code for the LFS trees present in the form
+        from dune.perftool.ufl.modified_terminals import extract_modified_arguments
+        test_ma = extract_modified_arguments(integrand, argnumber=0)
+        trial_ma = extract_modified_arguments(integrand, coeffcount=0)
+        apply_ma = extract_modified_arguments(integrand, coeffcount=1)
+
+        import itertools
+        for ma in itertools.chain(test_ma, trial_ma, apply_ma):
+            if measure == 'exterior_facet':
+                ma.restriction = Restriction.NEGATIVE
+
+            from dune.perftool.pdelab.spaces import traverse_lfs_tree
+            traverse_lfs_tree(ma)
+
         # Now split the given integrand into accumulation expressions
         from dune.perftool.ufl.transformations.extract_accumulation_terms import split_into_accumulation_terms
         accterms = split_into_accumulation_terms(integrand)
 
-        # Get a transformer instance for this kernel
-        from dune.perftool.ufl.visitor import UFL2LoopyVisitor
-        visitor = UFL2LoopyVisitor(measure, subdomain_id, dimension_indices)
-
         # Iterate over the terms and generate a kernel
         for term in accterms:
-            visitor(term)
-            subst_rules.extend(visitor.substitution_rules)
+            # Adjust the index map for the visitor
+            from copy import deepcopy
+            indexmap = deepcopy(dimension_indices)
+            for i, j in term.indexmap.items():
+                if i in indexmap:
+                    indexmap[j] = indexmap[i]
+
+            # Get a transformer instance for this kernel
+            from dune.perftool.ufl.visitor import UFL2LoopyVisitor
+            visitor = UFL2LoopyVisitor(measure, indexmap)
+            generate_accumulation_instruction(visitor, term, measure, subdomain_id)
 
     # Extract the information, which is needed to create a loopy kernel.
     # First extracting it, might be useful to alter it before kernel generation.
@@ -282,7 +478,7 @@ def generate_kernel(integrals):
     # Create the kernel
     from loopy import make_kernel, preprocess_kernel
     kernel = make_kernel(domains,
-                         instructions + subst_rules,
+                         instructions,
                          arguments,
                          temporary_variables=temporaries,
                          function_manglers=manglers,
diff --git a/python/dune/perftool/pdelab/spaces.py b/python/dune/perftool/pdelab/spaces.py
index 3812cc1fa49a52071748bc8788d1446fd6994166..e439425f36e8964d407814a2042337578b12dc34 100644
--- a/python/dune/perftool/pdelab/spaces.py
+++ b/python/dune/perftool/pdelab/spaces.py
@@ -161,8 +161,8 @@ def type_gfs(element, basetype=None, index_stack=None):
 
 
 def traverse_lfs_tree(arg):
-    from dune.perftool.ufl.modified_terminals import ModifiedArgumentDescriptor
-    assert isinstance(arg, ModifiedArgumentDescriptor)
+    from dune.perftool.ufl.modified_terminals import ModifiedArgument
+    assert isinstance(arg, ModifiedArgument)
 
     # First we need to determine the basename as given in the signature of
     # this kernel method!
diff --git a/python/dune/perftool/ufl/modified_terminals.py b/python/dune/perftool/ufl/modified_terminals.py
index 97e4c2c75e1a8171cfbf9bb6f192763e8f983b65..33319b273143455b599bcc1665ea4ff81d35182f 100644
--- a/python/dune/perftool/ufl/modified_terminals.py
+++ b/python/dune/perftool/ufl/modified_terminals.py
@@ -3,6 +3,30 @@
 from ufl.algorithms import MultiFunction
 from dune.perftool import Restriction
 from ufl.classes import MultiIndex
+from pytools import Record
+
+
+class ModifiedArgument(Record):
+    def __init__(self,
+                 expr=None,
+                 argexpr=None,
+                 grad=False,
+                 index=None,
+                 reference_grad=False,
+                 restriction=Restriction.NONE,
+                 component=MultiIndex(()),
+                 reference=False,
+                 ):
+        Record.__init__(self,
+                        expr=expr,
+                        argexpr=argexpr,
+                        grad=grad,
+                        index=index,
+                        reference_grad=reference_grad,
+                        restriction=restriction,
+                        component=component,
+                        reference=reference,
+                        )
 
 
 class ModifiedTerminalTracker(MultiFunction):
@@ -10,6 +34,9 @@ class ModifiedTerminalTracker(MultiFunction):
     grad, reference_grad, positive_restricted and negative_restricted.
     The appearance of those classes changes the internal state of the MF.
     """
+
+    call = MultiFunction.__call__
+
     def __init__(self):
         MultiFunction.__init__(self)
         self.grad = False
@@ -59,57 +86,33 @@ class ModifiedTerminalTracker(MultiFunction):
         return ret
 
 
-class ModifiedArgumentDescriptor(MultiFunction):
-    def __init__(self, e):
-        MultiFunction.__init__(self)
-
-        self.grad = False
-        self.reference = False
-        self.reference_grad = False
+class ModifiedArgumentAnalysis(ModifiedTerminalTracker):
+    def __init__(self):
         self.index = None
-        self.restriction = Restriction.NONE
-        self.component = MultiIndex(())
-        self.expr = e
-
-        self.__call__(e)
-        self.__call__ = None
+        ModifiedTerminalTracker.__init__(self)
 
-    def __eq__(self, other):
-        return self.expr == other.expr
-
-    def grad(self, o):
-        self.grad = True
-        self(o.ufl_operands[0])
-
-    def reference_grad(self, o):
-        self.reference_grad = True
-        self(o.ufl_operands[0])
-
-    def reference_value(self, o):
-        self.reference = True
-        self(o.ufl_operands[0])
-
-    def positive_restricted(self, o):
-        self.restriction = Restriction.POSITIVE
-        self(o.ufl_operands[0])
-
-    def negative_restricted(self, o):
-        self.restriction = Restriction.NEGATIVE
-        self(o.ufl_operands[0])
+    def __call__(self, o):
+        self.call_expr = o
+        return self.call(o)
 
     def indexed(self, o):
         self.index = o.ufl_operands[1]
-        self(o.ufl_operands[0])
+        return self.call(o.ufl_operands[0])
 
-    def function_view(self, o):
-        self.component = o.ufl_operands[1]
-        self(o.ufl_operands[0])
+    def form_argument(self, o):
+        return ModifiedArgument(expr=self.call_expr,
+                                argexpr=o,
+                                index=self.index,
+                                restriction=self.restriction,
+                                component=self.component,
+                                grad=self.grad,
+                                reference_grad=self.reference_grad,
+                                reference=self.reference,
+                                )
 
-    def argument(self, o):
-        self.argexpr = o
 
-    def coefficient(self, o):
-        self.argexpr = o
+def analyse_modified_argument(expr):
+    return ModifiedArgumentAnalysis()(expr)
 
 
 class _ModifiedArgumentExtractor(MultiFunction):
@@ -123,7 +126,7 @@ class _ModifiedArgumentExtractor(MultiFunction):
         if ret:
             # This indicates that this entire expression was a modified thing...
             self.modified_arguments.add(ret)
-        return tuple(ModifiedArgumentDescriptor(ma) for ma in self.modified_arguments)
+        return tuple(analyse_modified_argument(ma) for ma in self.modified_arguments)
 
     def expr(self, o):
         for op in o.ufl_operands:
@@ -144,7 +147,7 @@ class _ModifiedArgumentExtractor(MultiFunction):
     reference_value = pass_on
 
     def argument(self, o):
-        if self.argnumber is None or o.number() == self.argnumber:
+        if o.number() == self.argnumber:
             return o
 
     def coefficient(self, o):
diff --git a/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py b/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py
index 94413d1a6fdbcc3abf1d5139e851454426e6b6f3..0429c1c942849170a6b93e5910964385c869ac5a 100644
--- a/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py
+++ b/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py
@@ -1,56 +1,93 @@
 """
 This module defines an UFL transformation, that takes a UFL expression
-and transforms it into a sum of terms that all contain the correct number
-of test function terms (which is equal to the form rank).
+and transforms it into a sum of accumulation terms.
 """
-from __future__ import absolute_import
-
 from dune.perftool.ufl.modified_terminals import extract_modified_arguments
 from dune.perftool.ufl.transformations import ufl_transformation
 from dune.perftool.ufl.transformations.replace import replace_expression
+from dune.perftool.ufl.transformations.identitypropagation import identity_propagation
+from dune.perftool.ufl.transformations.reindexing import reindexing
+from dune.perftool.ufl.modified_terminals import analyse_modified_argument, ModifiedArgument
+
+from ufl.classes import Zero, Identity, Indexed, IntValue, MultiIndex
+from ufl.core.multiindex import indices
+
+from pytools import Record
+
+
+class AccumulationTerm(Record):
+    def __init__(self,
+                 term,
+                 argument,
+                 indexmap={},
+                 ):
+        assert isinstance(argument, ModifiedArgument)
+        Record.__init__(self,
+                        term=term,
+                        argument=argument,
+                        indexmap=indexmap,
+                        )
+
+
+@ufl_transformation(name="accterms", extraction_lambda=lambda l: [at.term for at in l])
+def split_into_accumulation_terms(expr, indexmap={}):
+    ret = []
+
+    # Extract a list of modified terminals for the test function
+    # One accumulation instruction will be generated for each of these.
+    test_args = extract_modified_arguments(expr, argnumber=0)
+
+    # Extract a list of modified terminals for the ansatz function
+    # in jacobian forms.
+    all_jacobian_args = extract_modified_arguments(expr, argnumber=1)
+
+    for test_arg in test_args:
+        # Do this as a multi step replacement procedure to avoid UFL nagging
+        # about too much stuff in reconstructing the expressions
+
+        # 1) We first cut the expression to the relevant modified test_function
+        # Build a replacement dictionary
+        replacement = {ma.expr: Zero(shape=ma.expr.ufl_shape,
+                                     free_indices=ma.expr.ufl_free_indices,
+                                     index_dimensions=ma.expr.ufl_index_dimensions)
+                       for ma in test_args}
+        replacement[test_arg.expr] = test_arg.expr
+        replace_expr = replace_expression(expr, replacemap=replacement)
+
+        # 2) Cut the test function itself from the expression
+        indexmap = {}
+        if test_arg.index:
+            newi = indices(len(test_arg.index))
+            identities = tuple(Indexed(Identity(2), MultiIndex((i,) + (j,))) for i, j in zip(newi, test_arg.index._indices))
+            indexmap = {i: j for i, j in zip(test_arg.index._indices, newi)}
+            from dune.perftool.ufl.flatoperators import construct_binary_operator
+            from ufl.classes import Product
+            replacement = {test_arg.expr: construct_binary_operator(identities, Product)}
+            test_arg = analyse_modified_argument(reindexing(test_arg.expr, replacemap=indexmap))
+        else:
+            replacement = {test_arg.expr: IntValue(1)}
+        replace_expr = replace_expression(replace_expr, replacemap=replacement)
+
+        # 3) Collapse any identity nodes that may have been introduced by replacing vectors
+        replace_expr = identity_propagation(replace_expr)
+
+        # 4) Further split according to trial function in jacobian terms
+        if all_jacobian_args:
+            # Update the list!
+            jac_args = extract_modified_arguments(replace_expr, argnumber=1)
+            for jac_arg in jac_args:
+                # TODO Some jacobian terms can be joined
+                replacement = {ma.expr: Zero(shape=ma.expr.ufl_shape,
+                                             free_indices=ma.expr.ufl_free_indices,
+                                             index_dimensions=ma.expr.ufl_index_dimensions)
+                               for ma in jac_args}
+                replacement[jac_arg.expr] = jac_arg.expr
+                jac_expr = replace_expression(replace_expr, replacemap=replacement)
+
+                if not isinstance(jac_expr, Zero):
+                    ret.append(AccumulationTerm(jac_expr, test_arg, indexmap))
+        else:
+            if not isinstance(replace_expr, Zero):
+                ret.append(AccumulationTerm(replace_expr, test_arg, indexmap))
 
-from ufl.algorithms import MultiFunction
-from ufl.classes import Zero
-
-import itertools
-
-
-class _ReplacementDict(dict):
-    def __init__(self, good=[], bad=[]):
-        dict.__init__(self)
-        for a in bad:
-            self[a] = Zero(shape=a.ufl_shape, free_indices=a.ufl_free_indices, index_dimensions=a.ufl_index_dimensions)
-        for a in good:
-            self[a] = a
-
-
-@ufl_transformation(name="accterms2", extraction_lambda=lambda l: l)
-def split_into_accumulation_terms(expr):
-    mod_args = extract_modified_arguments(expr)
-
-    accumulation_terms = []
-
-    # Treat the case of a rank 1 form:
-    if len(list(filter(lambda ma: ma.argexpr.number() == 1, mod_args))) == 0:
-        for arg in mod_args:
-            # Do the replacement on the expression
-            accumulation_terms.append(replace_expression(expr,
-                                                         replacemap=_ReplacementDict(good=(arg.expr,),
-                                                                                     bad=[ma.expr for ma in mod_args],
-                                                                                     )
-                                                         )
-                                      )
-    # and now the case of a rank 2 form:
-    else:
-        for arg1, arg2 in itertools.product(filter(lambda ma: ma.argexpr.number() == 0, mod_args),
-                                            filter(lambda ma: ma.argexpr.number() == 1, mod_args)
-                                            ):
-            accumulation_terms.append(replace_expression(expr,
-                                                         replacemap=_ReplacementDict(good=(arg1.expr, arg2.expr),
-                                                                                     bad=[ma.expr for ma in mod_args],
-                                                                                     )
-                                                         )
-                                      )
-
-    # and return the result
-    return accumulation_terms
+    return ret
diff --git a/python/dune/perftool/ufl/transformations/identitypropagation.py b/python/dune/perftool/ufl/transformations/identitypropagation.py
new file mode 100644
index 0000000000000000000000000000000000000000..aeb21a10790b81cdad8562e671d00e41ead62bdc
--- /dev/null
+++ b/python/dune/perftool/ufl/transformations/identitypropagation.py
@@ -0,0 +1,72 @@
+"""
+A transformation to help the form splitting algorithm split
+vector and tensor expressions. In a nutshell does:
+\sum_i f(i)I(i,k) => f(k)
+"""
+
+from dune.perftool.ufl.transformations import ufl_transformation
+
+from ufl.algorithms import MultiFunction
+from ufl.classes import Identity, Index, IntValue, MultiIndex
+
+
+class GetIndexMap(MultiFunction):
+    call = MultiFunction.__call__
+
+    def __call__(self, o):
+        self.free_indices = frozenset(Index(i) for i in o.ufl_free_indices)
+        self.replacemap = {}
+        self.call(o)
+        return self.replacemap
+
+    def expr(self, o):
+        for op in o.ufl_operands:
+            self.call(op)
+
+    def indexed(self, o):
+        op, i = o.ufl_operands
+        if isinstance(op, Identity):
+            free_index = self.free_indices.intersection(frozenset(i))
+            assert(len(free_index) == 1)
+            ind, = frozenset(i) - free_index
+            free_index, = free_index
+            self.replacemap[ind] = free_index
+        else:
+            self.call(op)
+
+
+class IdentityPropagation(MultiFunction):
+    call = MultiFunction.__call__
+
+    def __call__(self, expr):
+        self.replacemap = GetIndexMap()(expr)
+        return self.call(expr)
+
+    def expr(self, o):
+        return self.reuse_if_untouched(o, *tuple(self.call(op) for op in o.ufl_operands))
+
+    def indexed(self, o):
+        op, i = o.ufl_operands
+        if isinstance(op, Identity):
+            return IntValue(1)
+        else:
+            return self.reuse_if_untouched(o, self.call(op), self.call(i))
+
+    def multi_index(self, o):
+        return MultiIndex(tuple(self.replacemap.get(i, i) for i in o))
+
+    def index_sum(self, o):
+        op, i = o.ufl_operands
+
+        if i[0] in self.replacemap:
+            return self.call(op)
+        else:
+            return self.reuse_if_untouched(o, self.call(op), self.call(i))
+
+
+@ufl_transformation(name='identity')
+def identity_propagation(expr):
+    if expr.ufl_free_indices:
+        return IdentityPropagation()(expr)
+    else:
+        return expr
diff --git a/python/dune/perftool/ufl/transformations/reindexing.py b/python/dune/perftool/ufl/transformations/reindexing.py
index 2329462d94b860fbac3d9dbb38b772745b16d81e..99973ae12813e3e0e813753c0f9261d69f9517a4 100644
--- a/python/dune/perftool/ufl/transformations/reindexing.py
+++ b/python/dune/perftool/ufl/transformations/reindexing.py
@@ -42,9 +42,9 @@ class ReindexingMapper(MultiFunction):
 
     call = MultiFunction.__call__
 
-    def __init__(self):
+    def __init__(self, replacemap):
         MultiFunction.__init__(self)
-        self.replacement_map = {}
+        self.replacement_map = replacemap
         self.multi_index_cache = {}
 
     def expr(self, o):
@@ -60,5 +60,5 @@ class ReindexingMapper(MultiFunction):
 
 
 @ufl_transformation(name="reindexing")
-def reindexing(e):
-    return ReindexingMapper()(e)
+def reindexing(e, replacemap={}):
+    return ReindexingMapper(replacemap)(e)
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index ccc6cb24091e6b48b53d0a86292e34e93dd42f23..70d77358bdb105eeb5d09b7635214190d2517510 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -27,10 +27,9 @@ from ufl.algorithms import MultiFunction
 
 
 class UFL2LoopyVisitor(ModifiedTerminalTracker):
-    def __init__(self, measure, subdomain_id, dimension_indices):
+    def __init__(self, measure, dimension_indices):
         # Some variables describing the integral measure of this integral
         self.measure = measure
-        self.subdomain_id = subdomain_id
         self.dimension_indices = dimension_indices
 
         # Call base class constructors
@@ -44,128 +43,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
         self.argshape = 0
         self.transpose_necessary = False
         self.inames = []
-        self.substitution_rules = []
-
-        # Initialize the local function spaces that we might need for this term
-        # We therefore gather a list of modified trial functions too.
-        from dune.perftool.ufl.modified_terminals import extract_modified_arguments
-        test_ma = extract_modified_arguments(o,)
-        trial_ma = extract_modified_arguments(o, coeffcount=0)
-        apply_ma = extract_modified_arguments(o, coeffcount=1)
-
-        # All test and trial functions on boundary integrals are technically restricted
-        import itertools
-        for ma in itertools.chain(test_ma, trial_ma, apply_ma):
-            if self.measure == 'exterior_facet':
-                ma.restriction = Restriction.NEGATIVE
-
-            from dune.perftool.pdelab.spaces import traverse_lfs_tree
-            traverse_lfs_tree(ma)
-
-        # Determine the rank of the term
-        self.rank = len(test_ma)
-
-        # First we do the tree traversal to get a pymbolic expression representing this expression
-        pymbolic_expr = self.call(o)
-
-        # It may happen that an entire accumulation term vanishes. We do nothing in that case
-        if pymbolic_expr == 0:
-            return
-
-        # Collect the arguments for the accumulate function
-        accumargs = [None] * (2 * len(test_ma))
-        residual_shape = [None] * len(test_ma)
-        arg_restr = [None] * len(test_ma)
-
-        for ma in test_ma:
-            count = ma.argexpr.number()
-            if len(test_ma) == 2:
-                icount = (count + 1) % 2
-            else:
-                icount = 0
-
-            # Extract the subelement of the (mixed) finite element
-            from ufl.functionview import select_subelement
-            subel = select_subelement(ma.argexpr.ufl_element(), ma.component)
-
-            # And generate a local function space for it!
-            lfs = name_lfs(ma.argexpr.ufl_element(), ma.restriction, ma.component)
-            from dune.perftool.generation import valuearg
-            from loopy.types import NumpyType
-            valuearg(lfs, dtype=NumpyType("str"))
-
-            if len(subel.value_shape()) != 0:
-                from dune.perftool.pdelab.geometry import dimension_iname
-                from dune.perftool.pdelab.basis import lfs_child
-                idims = tuple(dimension_iname(context='arg', count=i) for i in range(len(subel.value_shape())))
-                lfs = lfs_child(lfs, idims, shape=subel.value_shape(), symmetry=subel.symmetry)
-                residual_shape[icount] = name_lfs_bound(name_leaf_lfs(subel.sub_elements()[0], ma.restriction))
-                subel = subel.sub_elements()[0]
-            else:
-                residual_shape[icount] = name_lfs_bound(lfs)
-
-            lfsi = lfs_iname(subel, ma.restriction, count=count)
-
-            # If the LFS is not yet a pymbolic expression, make it one
-            from pymbolic.primitives import Expression
-            if not isinstance(lfs, Expression):
-                lfs = Variable(lfs)
-
-            accumargs[2 * icount] = lfs
-            accumargs[2 * icount + 1] = Variable(lfsi)
-
-            arg_restr[icount] = ma.restriction
-
-        from dune.perftool.pdelab.argument import name_accumulation_variable
-        accumvar = name_accumulation_variable(arg_restr)
-
-        predicates = frozenset({})
-
-        # Maybe wrap this instruction into a condiditional. This mostly happens with mixed boundary conditions
-        if self.subdomain_id not in ['everywhere', 'otherwise']:
-            # We need to reconstruct the subdomain_data parameter of the measure
-            # I am *totally* confused as to why this information is not at hand anyway,
-            # but conversation with Martin pointed me to dolfin.fem.assembly where this
-            # is done in preprocessing with the limitation of only one possible type of
-            # modified measure per integral type.
-
-            # Get the original form and inspect the present measures
-            from dune.perftool.generation import get_global_context_value
-            original_form = get_global_context_value("formdata").original_form
-
-            sd = original_form.subdomain_data()
-            assert len(sd) == 1
-            subdomains, = list(sd.values())
-            domain, = list(sd.keys())
-            for k in list(subdomains.keys()):
-                if subdomains[k] is None:
-                    del subdomains[k]
-
-            # Finally extract the original subdomain_data (which needs to be present!)
-            assert self.measure in subdomains
-            subdomain_data = subdomains[self.measure]
-
-            # Determine the name of the parameter function
-            name = get_global_context_value("data").object_names[id(subdomain_data)]
-
-            # Trigger the generation of code for this thing in the parameter class
-            from ufl.checks import is_cellwise_constant
-            cellwise_constant = is_cellwise_constant(o)
-            from dune.perftool.pdelab.parameter import intersection_parameter_function
-            intersection_parameter_function(name, subdomain_data, cellwise_constant, t='int')
-
-            predicates = predicates.union(['{} == {}'.format(name, self.subdomain_id)])
-
-        from dune.perftool.pdelab.argument import PDELabAccumulationFunction
-        from pymbolic.primitives import Call
-        expr = Call(PDELabAccumulationFunction(accumvar, len(test_ma)), tuple(a for a in accumargs) + (pymbolic_expr,))
 
-        instruction(assignees=(),
-                    expression=expr,
-                    forced_iname_deps=frozenset(self.inames).union(frozenset({quadrature_iname()})),
-                    forced_iname_deps_is_final=True,
-                    predicates=predicates
-                    )
+        return self.call(o)
 
     #
     # Form argument/coefficients handlers: