diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py
index dcc17068303501fc01d96e842089454907342c3b..3d3f3f99cc0abbbabb12c5b766dbea19f3eed7ba 100644
--- a/python/dune/perftool/pdelab/__init__.py
+++ b/python/dune/perftool/pdelab/__init__.py
@@ -31,7 +31,7 @@ from dune.perftool.pdelab.quadrature import (pymbolic_quadrature_weight,
                                              )
 from dune.perftool.pdelab.spaces import (lfs_inames,
                                          )
-from dune.perftool.pdelab.tensors import pymbolic_list_tensor
+from dune.perftool.pdelab.tensors import pymbolic_list_tensor, pymbolic_identity
 
 
 class PDELabInterface(object):
@@ -92,6 +92,9 @@ class PDELabInterface(object):
     def pymbolic_list_tensor(self, o, visitor):
         return pymbolic_list_tensor(o, visitor)
 
+    def pymbolic_identity(self, o, visitor):
+        return pymbolic_identity(o, visitor)
+
     #
     # Geometry related generator functions
     #
diff --git a/python/dune/perftool/pdelab/parameter.py b/python/dune/perftool/pdelab/parameter.py
index 2d8162495cfbdc2f03d85ac31418363d0a1c571a..8e8af15b399e0012dab5016c5ae5185f14eb83f3 100644
--- a/python/dune/perftool/pdelab/parameter.py
+++ b/python/dune/perftool/pdelab/parameter.py
@@ -13,10 +13,7 @@ from dune.perftool.generation import (class_basename,
 from dune.perftool.pdelab.geometry import (name_cell,
                                            name_intersection,
                                            )
-from dune.perftool.pdelab.quadrature import (pymbolic_quadrature_position,
-                                             pymbolic_quadrature_position_in_cell,
-                                             quadrature_preamble,
-                                             )
+from dune.perftool.pdelab.quadrature import quadrature_preamble
 from dune.perftool.tools import get_pymbolic_basename
 from dune.perftool.cgen.clazz import AccessModifier
 from dune.perftool.pdelab.localoperator import (class_type_from_cache,
diff --git a/python/dune/perftool/pdelab/tensors.py b/python/dune/perftool/pdelab/tensors.py
index 315e3ceccee0d4d203bef033c901043cff4efa34..6c7e4fc5f88eaaa5d019c6c2c26371e6b672b550 100644
--- a/python/dune/perftool/pdelab/tensors.py
+++ b/python/dune/perftool/pdelab/tensors.py
@@ -1,7 +1,9 @@
 """ Code generation for explicitly specified tensors """
 
 from dune.perftool.generation import (get_counted_variable,
+                                      domain,
                                       kernel_cached,
+                                      iname,
                                       instruction,
                                       temporary_variable,
                                       )
@@ -33,3 +35,31 @@ def pymbolic_list_tensor(expr, visitor):
                        )
     define_list_tensor(name, expr, visitor)
     return prim.Variable(name)
+
+
+@iname
+def identity_iname(name, bound):
+    name = "id_{}_{}".format(name, bound)
+    domain(name, bound)
+    return name
+
+
+def define_identity(name, expr, visitor):
+    i = identity_iname("i", expr.ufl_shape[0])
+    j = identity_iname("j", expr.ufl_shape[1])
+    instruction(assignee=prim.Subscript(prim.Variable(name), (prim.Variable(i), prim.Variable(j))),
+                expression=prim.If(prim.Comparison(prim.Variable(i),"==",prim.Variable(j)),1,0),
+                forced_iname_deps_is_final=True,
+                )
+
+
+@kernel_cached
+def pymbolic_identity(expr, visitor):
+    name = get_counted_variable("identity")
+    temporary_variable(name,
+                       shape=expr.ufl_shape,
+                       shape_impl=('fm',),
+                       dtype=np.float64,
+                       )
+    define_identity(name, expr, visitor)
+    return prim.Variable(name)
diff --git a/python/dune/perftool/ufl/extract_accumulation_terms.py b/python/dune/perftool/ufl/extract_accumulation_terms.py
index b5d473b9a3a90cfe72acecc89dcbb0289e41e54a..d1615aca972ed0dc191fa17fbe1790fac5c86dfb 100644
--- a/python/dune/perftool/ufl/extract_accumulation_terms.py
+++ b/python/dune/perftool/ufl/extract_accumulation_terms.py
@@ -12,7 +12,7 @@ from dune.perftool.ufl.transformations.reindexing import reindexing
 from dune.perftool.ufl.modified_terminals import analyse_modified_argument, ModifiedArgument
 from dune.perftool.pdelab.restriction import Restriction
 
-from ufl.classes import Zero, Identity, Indexed, IntValue, MultiIndex, Product
+from ufl.classes import Zero, Identity, Indexed, IntValue, MultiIndex, Product, IndexSum
 from ufl.core.multiindex import indices
 
 from pytools import Record
@@ -107,6 +107,7 @@ def split_into_accumulation_terms(expr):
         replacement = {}
         indexmap = {}
         newi = None
+        backmap = {}
         # Get all appearances of test functions with their indices
         indexed_test_args = extract_modified_arguments(replace_expr, argnumber=0, do_index=True)
         for indexed_test_arg in indexed_test_args:
@@ -115,20 +116,53 @@ def split_into_accumulation_terms(expr):
                 # -> (m,n) in the example above
                 if newi is None:
                     newi = indices(len(indexed_test_arg.index))
-                # Replace indexed test function with a product of identities.
-                identities = tuple(Indexed(Identity(2), MultiIndex((i,) + (j,)))
-                                   for i, j in zip(newi, indexed_test_arg.index._indices))
-                replacement.update({indexed_test_arg.expr:
-                                    construct_binary_operator(identities, Product)})
-                indexmap.update({i: j for i, j in zip(indexed_test_arg.index._indices, newi)})
-                indexed_test_arg = analyse_modified_argument(reindexing(indexed_test_arg.expr,
-                                                                        replacemap=indexmap))
+
+                # This handles the special case with two identical
+                # indices on an test function. E.g. in Stokes on an
+                # axiparallel grid you get a term:
+                #
+                # -(\sum_i K_{i,i} (\nabla v)_{i,i}) w
+                #   = \sum_k \sum_l (-K_{k,k} w I_{k,l} (\nabla v)_{k,l})
+                #
+                # and we want to split
+                #
+                # -K_{k,k} w I_{k,l} corresponding to (\nabla v)_{k,l}.
+                #
+                # This is done by:
+                # - Replacing (\nabla v)_{i,i} with I_{k,i}*(\nabla
+                #   v)_{k,l}. Here (\nabla v)_{k,l} serves as a
+                #   placeholder and will be replaced later on.
+                # - Propagating the identity in step 4.
+                # - Replacing (\nabla v)_{k,l} by I_{k,l} after step 4.
+                if len(set(indexed_test_arg.index._indices)) < len(indexed_test_arg.index._indices):
+                    if len(indexed_test_arg.index._indices)>2:
+                        raise NotImplementedError("Test argument with more than three indices and double occurence ist not implemted.")
+                    mod_index_map = {indexed_test_arg.index: MultiIndex((newi[0], newi[1]))}
+                    mod_indexed_test_arg = replace_expression(indexed_test_arg.expr,
+                                                              replacemap = mod_index_map)
+                    rep = Product(Indexed(Identity(2),
+                                          MultiIndex((newi[0],indexed_test_arg.index[0]))),
+                                  mod_indexed_test_arg)
+                    backmap.update({mod_indexed_test_arg:
+                                    Indexed(Identity(2), MultiIndex((newi[0],newi[1])))})
+                    replacement.update({indexed_test_arg.expr: rep})
+                    indexmap.update({indexed_test_arg.index[0]: newi[0]})
+                else:
+                    # Replace indexed test function with a product of identities.
+                    identities = tuple(Indexed(Identity(2), MultiIndex((i,) + (j,)))
+                                       for i, j in zip(newi, indexed_test_arg.index._indices))
+                    replacement.update({indexed_test_arg.expr:
+                                        construct_binary_operator(identities, Product)})
+
+                    indexmap.update({i: j for i, j in zip(indexed_test_arg.index._indices, newi)})
             else:
                 replacement.update({indexed_test_arg.expr: IntValue(1)})
         replace_expr = replace_expression(replace_expr, replacemap=replacement)
 
-        # 4) Collapse any identity nodes that may have been introduced by replacing vectors
+        # 4) Collapse any identity nodes that may have been introduced
+        # by replacing vectors and maybe replace placeholder from last step
         replace_expr = identity_propagation(replace_expr)
+        replace_expr = replace_expression(replace_expr, replacemap=backmap)
 
         # 5) Further split according to trial function in jacobian terms
         #
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index 503bfd83959e3d3efb7ae597e11b352c4b5f5952..a5cf1a9d076d95ae484eb1693f6ff19b891ee0ba 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -234,6 +234,9 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
     # Those handlers would be valid in any code going from UFL to pymbolic
     #
 
+    def identity(self, o):
+        return self.interface.pymbolic_identity(o, self)
+
     def product(self, o):
         return Product(tuple(self.call(op) for op in o.ufl_operands))