diff --git a/python/dune/perftool/compile.py b/python/dune/perftool/compile.py
index 231792238a7e4e7c60494a241775e178881c3ec2..f77f9d308233cd1742eb988e7027e529a3ef479e 100644
--- a/python/dune/perftool/compile.py
+++ b/python/dune/perftool/compile.py
@@ -41,13 +41,11 @@ def read_ufl(uflfile):
 
     # apply some transformations unconditionally!
     from dune.perftool.ufl.transformations import transform_form
-    # from dune.perftool.ufl.transformations.splitarguments import split_arguments
     from dune.perftool.ufl.transformations.indexpushdown import pushdown_indexed
     from dune.perftool.ufl.transformations.reindexing import reindexing
 
     form = transform_form(form, pushdown_indexed)
     form = transform_form(form, reindexing)
-#    form = transform_form(form, split_arguments)
 
     formdata.preprocessed_form = form
 
diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py
index f13d8e98c575ac11e6afae1e1dc5bf5078259a66..a113cb6a203fead3a704d4ce74f2b46bb08aa16a 100644
--- a/python/dune/perftool/generation/loopy.py
+++ b/python/dune/perftool/generation/loopy.py
@@ -57,7 +57,9 @@ def _temporary_type(shape_impl, shape, first=True):
     if shape_impl[0] == 'fv':
         return "Dune::FieldVector<{}, {}>".format(_temporary_type(shape_impl[1:], shape[1:], first=False), shape[0])
     if shape_impl[0] == 'fm':
-        raise NotImplementedError
+        # For now, no field matrices of weird stuff...
+        assert len(shape) == 2
+        return "Dune::FieldMatrix<double, {}, {}>".format(shape[0], shape[1])
 
 
 @preamble
@@ -73,6 +75,9 @@ def default_declaration(name, shape=(), shape_impl=()):
         return '{} {}({});'.format(t, name, shape[0])
     if shape_impl[0] == 'fv':
         return '{} {}(0.0);'.format(t, name)
+    if shape_impl[0] == 'fm':
+        assert len(shape) == 2
+        return '{} {}(0.0);'.format(t, name)
 
 
 class _TemporaryCounter:
diff --git a/python/dune/perftool/loopy/transformer.py b/python/dune/perftool/loopy/transformer.py
index 6302591164b7fc6632837807c83f56da773a9faa..664d652ef688a75d0065c37b8256469066bde26b 100644
--- a/python/dune/perftool/loopy/transformer.py
+++ b/python/dune/perftool/loopy/transformer.py
@@ -25,6 +25,7 @@ from dune.perftool.generation import (domain,
                                       )
 
 from dune.perftool.pdelab.basis import (lfs_iname,
+                                        name_leaf_lfs,
                                         name_lfs,
                                         name_lfs_bound,
                                         traverse_lfs_tree,
@@ -52,8 +53,11 @@ def get_outerloop():
 
 
 class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapper):
-    def __init__(self, measure):
+    def __init__(self, measure, subdomain_id):
         self.measure = measure
+        self.subdomain_id = subdomain_id
+        self.argshape = 0
+        self.redinames = ()
         super(UFL2LoopyVisitor, self).__init__()
 
     def _assign(self, o):
@@ -86,7 +90,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp
             assignee_index_placeholder = IndexPlaceholderExtraction()(o).pop()
             assignee_index = IndexPlaceholderRemoval(duplicate_inames=True)(assignee_index_placeholder)
             assignee = Subscript(assignee, (assignee_index,))
-            temp_shape = (name_lfs_bound(name_lfs(assignee_index_placeholder.element, assignee_index_placeholder.restriction)),)
+            temp_shape = (name_lfs_bound(name_leaf_lfs(assignee_index_placeholder.element, assignee_index_placeholder.restriction)),)
 
         # Now introduce duplicate inames for the argument loop if necessary
         replaced_iname_deps = [IndexPlaceholderRemoval(duplicate_inames=not merge_into_main_loopnest, wrap_in_variable=False)(i) for i in iname_deps]
@@ -114,20 +118,125 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp
         return retvar
 
     def __call__(self, o):
+        # Initialize the local function spaces that we might need for this term
+        # We therefore gather a list of modified trial functions too.
+        from dune.perftool.ufl.modified_terminals import extract_modified_arguments
+        test_ma = extract_modified_arguments(o)
+        trial_ma = extract_modified_arguments(o, trialfunction=True, testfunction=False)
+
+        # All test and trial functions on boundary integrals are technically restricted
+        import itertools
+        for ma in itertools.chain(test_ma, trial_ma):
+            if self.measure == 'exterior_facet':
+                ma.restriction = Restriction.NEGATIVE
+
+            traverse_lfs_tree(ma)
+
         # Determine the rank of the term
-        from dune.perftool.ufl.rank import ufl_rank
-        self.rank = ufl_rank(o)
+        self.rank = len(test_ma)
 
         # And initialize a list of found inames
         self.inames = []
 
         # First we do the tree traversal to get a pymbolic expression representing this expression
-        ret = self.call(o)
+        pymbolic_expr = self.call(o)
 
-        if isinstance(ret, Variable):
-            return ret
-        else:
-            return self._assign(ret)
+        # It may happen that an entire accumulation term vanishes. We do nothing in that case
+        if pymbolic_expr == 0:
+            return
+
+        # We assign the result to a temporary variable to ease things a bit
+        if not isinstance(pymbolic_expr, Variable):
+            pymbolic_expr = self._assign(pymbolic_expr)
+
+        # Transform the IndexPlaceholders into real inames
+        self.inames = [IndexPlaceholderRemoval(wrap_in_variable=False)(i) for i in self.inames]
+
+        # Collect the arguments for the accumulate function
+        accumargs = [None] * (2 * len(test_ma))
+        residual_shape = [None] * len(test_ma)
+        arg_restr = [None] * len(test_ma)
+
+        for ma in test_ma:
+            count = ma.argexpr.number()
+            if len(test_ma) == 2:
+                icount = (count + 1) % 2
+            else:
+                icount = 0
+
+            # Extract the subelement of the (mixed) finite element
+            from ufl.functionview import select_subelement
+            subel = select_subelement(ma.argexpr.ufl_element(), ma.component)
+
+            # And generate a local function space for it!
+            lfs = name_lfs(ma.argexpr.ufl_element(), ma.restriction, ma.component)
+            if len(subel.value_shape()) != 0:
+                from dune.perftool.pdelab.geometry import dimension_iname
+                from dune.perftool.pdelab.basis import lfs_child
+                lfs = lfs_child(lfs, dimension_iname(count=count, context='arg'))
+                residual_shape[icount] = name_lfs_bound(name_leaf_lfs(subel.sub_elements()[0], ma.restriction))
+            else:
+                residual_shape[icount] = name_lfs_bound(lfs)
+
+            lfsi = lfs_iname(subel, ma.restriction, count=count)
+
+            accumargs[2 * icount] = lfs
+            accumargs[2 * icount + 1] = lfsi
+
+            arg_restr[icount] = ma.restriction
+
+        from dune.perftool.pdelab.argument import name_accumulation_variable
+        accumvar = name_accumulation_variable(arg_restr)
+
+        from dune.perftool.pdelab.quadrature import name_factor
+        factor = name_factor()
+
+        # Generate the code snippet for this accumulation instruction
+        code = "{}.accumulate({}, {}*{});".format(accumvar,
+                                                  ", ".join(accumargs),
+                                                  pymbolic_expr.name,
+                                                  factor,
+                                                  )
+
+        # Maybe wrap this instruction into a condiditional. This mostly happens with mixed boundary conditions
+        if self.subdomain_id not in ['everywhere', 'otherwise']:
+            # We need to reconstruct the subdomain_data parameter of the measure
+            # I am *totally* confused as to why this information is not at hand anyway,
+            # but conversation with Martin pointed me to dolfin.fem.assembly where this
+            # is done in preprocessing with the limitation of only one possible type of
+            # modified measure per integral type.
+
+            # Get the original form and inspect the present measures
+            from dune.perftool.generation import get_global_context_value
+            original_form = get_global_context_value("formdata").original_form
+
+            sd = original_form.subdomain_data()
+            assert len(sd) == 1
+            subdomains, = list(sd.values())
+            domain, = list(sd.keys())
+            for k in list(subdomains.keys()):
+                if subdomains[k] is None:
+                    del subdomains[k]
+
+            # Finally extract the original subdomain_data (which needs to be present!)
+            assert self.measure in subdomains
+            subdomain_data = subdomains[self.measure]
+
+            # Determine the name of the parameter function
+            name = get_global_context_value("namedata")[id(subdomain_data)]
+
+            # Trigger the generation of code for this thing in the parameter class
+            from dune.perftool.pdelab.parameter import intersection_parameter_function
+            intersection_parameter_function(name, subdomain_data, t='int')
+
+            code = "if ({} == {})\n  {}".format(name, self.subdomain_id, code)
+
+        # Finally, issue the instruction
+        instruction(code=code,
+                    read_variables=frozenset({factor, pymbolic_expr.name}),
+                    forced_iname_deps=frozenset(self.inames).union(frozenset({quadrature_iname()})),
+                    forced_iname_deps_is_final=True,
+                    )
 
     def argument(self, o):
         # Correct the restriction on boundary integrals
@@ -135,15 +244,38 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp
         if self.measure == 'exterior_facet':
             restriction = Restriction.NEGATIVE
 
+        # Select the correct subtree of the finite element
+        from ufl.functionview import select_subelement
+        element = select_subelement(o.ufl_element(), self.component)
+        leaf_element = element
+
         # Have the issued instruction depend on the iname for this localfunction space
-        self.inames.append(IndexPlaceholder(o.element(), restriction, o.number()))
+        self.inames.append(IndexPlaceholder(element, restriction, o.number()))
+
+        # Now treat the case of this being a vector finite element
+        if element.num_sub_elements() > 0:
+            # I cannot handle general mixed elements here...
+            from ufl import VectorElement
+            assert isinstance(element, VectorElement)
+
+            # Determine whether this is a non-scalar subargument. This information is later
+            # used by the Indexed-Mapper to make some indices loop indices!
+            self.argshape = len(element.value_shape())
+
+            # If this is a vector element, we need add an additional accumulation loop iname
+            from dune.perftool.pdelab.geometry import dimension_iname
+            for i in range(self.argshape):
+                self.inames.append(dimension_iname(count=i, context='arg'))
+
+            # For the purpose of basis evaluation, we need to take the leaf element
+            leaf_element = element.sub_elements()[0]
 
         if self.grad:
             from dune.perftool.pdelab.argument import name_testfunction_gradient
-            return Subscript(Variable(name_testfunction_gradient(o, restriction)), (IndexPlaceholder(o.element(), restriction, o.number()),))
+            return Subscript(Variable(name_testfunction_gradient(leaf_element, restriction)), (IndexPlaceholder(element, restriction, o.number()),))
         else:
             from dune.perftool.pdelab.argument import name_testfunction
-            return Subscript(Variable(name_testfunction(o, restriction)), (IndexPlaceholder(o.element(), restriction, o.number()),))
+            return Subscript(Variable(name_testfunction(leaf_element, restriction)), (IndexPlaceholder(element, restriction, o.number()),))
 
     def coefficient(self, o):
         # If this is a trialfunction, we do something entirely different
@@ -155,10 +287,10 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp
 
             if self.grad:
                 from dune.perftool.pdelab.argument import name_trialfunction_gradient
-                return Variable(name_trialfunction_gradient(o, restriction))
+                return Variable(name_trialfunction_gradient(o.ufl_element(), restriction, self.component))
             else:
                 from dune.perftool.pdelab.argument import name_trialfunction
-                return Variable(name_trialfunction(o, restriction))
+                return Variable(name_trialfunction(o.ufl_element(), restriction, self.component))
 
         # so this is a parameter function
         else:
@@ -182,129 +314,50 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp
             # And return a symbol
             return Variable(name)
 
+    def indexed(self, o):
+        """ Although indexed is something that we want to handle in the pymbolic mapper we
+        need to handle it here, as we have to give special treatment to indices into vector
+        elements: they become a loop index! """
+        # Set the generated index for the argument handler to have it available
+        self.last_index = self.call(o.ufl_operands[1])
+        aggr = self.call(o.ufl_operands[0])
+
+        use_indices = self.last_index[self.argshape:]
+        self.argshape = 0
+        if isinstance(aggr, Subscript):
+            return Subscript(aggr.aggregate, aggr.index + use_indices)
+        else:
+            return Subscript(aggr, use_indices)
+
     def index_sum(self, o):
-        from loopy import Reduction
+        # Get the iname for the reduction index
+        ind = o.ufl_operands[1][0]
+        self.redinames = self.redinames + (index_sum_iname(ind),)
+        shape = o.ufl_operands[0].ufl_index_dimensions[0]
+        from dune.perftool.pdelab import name_index
+        domain(name_index(ind), shape)
+
+        # If the left operand is an index sum to, we do it in one reduction
+        from ufl.classes import IndexSum
+        if isinstance(o.ufl_operands[0], IndexSum):
+            return self.call(o.ufl_operands[0])
+        else:
+            from loopy import Reduction
 
-        oldinames = self.inames
-        self.inames = []
+            oldinames = self.inames
+            self.inames = []
 
-        red_inames = ()
-        # Define an iname for each of the indices in the multiindex
-        for i, ind in enumerate(o.ufl_operands[1].indices()):
-            red_inames = red_inames + (index_sum_iname(ind),)
-            shape = o.ufl_operands[0].ufl_index_dimensions[i]
-            from dune.perftool.pdelab import name_index
-            domain(name_index(ind), shape)
+            # Recurse to get the summation expression
+            term = self.call(o.ufl_operands[0])
+            ret = self._assign(Reduction("sum", self.redinames, term))
 
-        # Recurse to get the summation expression
-        term = self.call(o.ufl_operands[0])
-        ret = self._assign(Reduction("sum", red_inames, term))
+            self.inames = self.inames + oldinames
 
-        self.inames = self.inames + oldinames
+            # Reset the reduction inames for future indexsums
+            self.redinames = ()
 
-        return ret
+            return ret
 
 
 def transform_accumulation_term(term, measure, subdomain_id):
-    # Initialize the local function spaces that we might need for this term
-    # We therefore gather a list of modified trial functions too.
-    from dune.perftool.ufl.modified_terminals import extract_modified_arguments
-    test_ma = extract_modified_arguments(term)
-    trial_ma = extract_modified_arguments(term, trialfunction=True, testfunction=False)
-
-    # All test and trial functions on boundary integrals are technically restricted
-    import itertools
-    for ma in itertools.chain(test_ma, trial_ma):
-        if measure == 'exterior_facet':
-            ma.restriction = Restriction.NEGATIVE
-
-        traverse_lfs_tree(ma)
-
-    pymbolic_expr = UFL2LoopyVisitor(measure)(term)
-
-    # It may happen that an entire accumulation term vanishes!
-    if pymbolic_expr == 0:
-        return
-
-    # The data that is used to collect the arguments for the accumulate function
-    accumargs = [None] * (2 * len(test_ma))
-    residual_shape = [None] * len(test_ma)
-    arg_restr = [None] * len(test_ma)
-    accum_inames = [None] * len(test_ma)
-
-    for ma in test_ma:
-        count = ma.argexpr.number()
-        if len(test_ma) == 2:
-            icount = (count + 1) % 2
-        else:
-            icount = 0
-
-        lfs = name_lfs(ma.argexpr.element(), ma.restriction)
-        lfsi = lfs_iname(ma.argexpr.element(), ma.restriction, count=count)
-
-        accumargs[2 * icount] = lfs
-        accumargs[2 * icount + 1] = lfsi
-
-        accum_inames[count] = lfsi
-        arg_restr[icount] = ma.restriction
-        residual_shape[icount] = name_lfs_bound(lfs)
-
-    from dune.perftool.pdelab.argument import name_accumulation_variable
-    accumvar = name_accumulation_variable(arg_restr)
-
-    # The residual/the jacobian should be represented through a loopy global argument
-    # TODO this seems still a bit hacky, esp. w.r.t. systems
-    for rs in residual_shape:
-        import numpy
-        valuearg(rs, dtype=numpy.int32)
-    globalarg(accumvar, shape=tuple(residual_shape))
-
-    from dune.perftool.pdelab.quadrature import name_factor
-    factor = name_factor()
-
-    # Generate the code snippet for this accumulation instruction
-    code = "{}.accumulate({}, {}*{});".format(accumvar,
-                                              ", ".join(accumargs),
-                                              pymbolic_expr.name,
-                                              factor,
-                                              )
-
-    # Maybe wrap this instruction into a condiditional. This mostly happens with mixed boundary conditions
-    if subdomain_id not in ['everywhere', 'otherwise']:
-        # We need to reconstruct the subdomain_data parameter of the measure
-        # I am *totally* confused as to why this information is not at hand anyway,
-        # but conversation with Martin pointed me to dolfin.fem.assembly where this
-        # is done in preprocessing with the limitation of only one possible type of
-        # modified measure per integral type.
-
-        # Get the original form and inspect the present measures
-        from dune.perftool.generation import get_global_context_value
-        original_form = get_global_context_value("formdata").original_form
-        sd = original_form.subdomain_data()
-        assert len(sd) == 1
-        subdomains, = list(sd.values())
-        domain, = list(sd.keys())
-        for k in list(subdomains.keys()):
-            if subdomains[k] is None:
-                del subdomains[k]
-
-        # Finally extract the original subdomain_data (which needs to be present!)
-        assert measure in subdomains
-        subdomain_data = subdomains[measure]
-
-        # Determine the name of the parameter function
-        name = get_global_context_value("namedata")[id(subdomain_data)]
-
-        # Trigger the generation of code for this thing in the parameter class
-        from dune.perftool.pdelab.parameter import intersection_parameter_function
-        intersection_parameter_function(name, subdomain_data, t='int')
-
-        code = "if ({} == {})\n  {}".format(name, subdomain_id, code)
-
-    # Finally, issue the instruction
-    instruction(code=code,
-                assignees=frozenset({accumvar}),
-                read_variables=frozenset({accumvar, factor, pymbolic_expr.name}),
-                forced_iname_deps=frozenset(accum_inames).union(frozenset({quadrature_iname()})),
-                forced_iname_deps_is_final=True,
-                )
+    return UFL2LoopyVisitor(measure, subdomain_id)(term)
diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py
index 0f57601058a0d6cc54e221f0049c90b8974c33db..50d41728a10ef4af2f0b34cb4a4b92db6da508b0 100644
--- a/python/dune/perftool/pdelab/__init__.py
+++ b/python/dune/perftool/pdelab/__init__.py
@@ -1,6 +1,7 @@
 """ The pdelab specific parts of the code generation process """
 
 from dune.perftool.generation import (preamble,
+                                      pymbolic_expr,
                                       symbol,
                                       )
 
@@ -17,6 +18,17 @@ def name_index(index):
         assert len(index) == 1
         # return str(index._indices[0])
         return "i_{}".format(index._indices[0].count())
+    raise NotImplementedError
+
+
+@pymbolic_expr
+def pymbolic_index(index):
+    from ufl.classes import FixedIndex
+    if isinstance(index, FixedIndex):
+        return index._value
+    else:
+        from pymbolic.primitives import Variable
+        return Variable(name_index(index))
 
 
 def restricted_name(name, restriction):
diff --git a/python/dune/perftool/pdelab/argument.py b/python/dune/perftool/pdelab/argument.py
index bcb9ec3717432f0264b8554c34d7350a9d4db500..720a46218ddf1aff087ea18cf979b4069f2d71fe 100644
--- a/python/dune/perftool/pdelab/argument.py
+++ b/python/dune/perftool/pdelab/argument.py
@@ -17,29 +17,31 @@ from pymbolic.primitives import Call, Subscript, Variable
 
 
 @symbol
-def name_testfunction_gradient(expr, restriction):
-    return name_basis_gradient(expr.element(), restriction)
+def name_testfunction_gradient(element, restriction):
+    return name_basis_gradient(element, restriction)
 
 
 @symbol
-def name_testfunction(expr, restriction):
+def name_testfunction(element, restriction):
     it = get_global_context_value("integral_type")
     if it == 'exterior_facet':
         restriction = Restriction.NEGATIVE
-    return name_basis(expr.element(), restriction)
+    return name_basis(element, restriction)
 
 
 @symbol
-def name_trialfunction_gradient(expr, restriction):
-    name = restricted_name("gradu", restriction)
-    evaluate_trialfunction_gradient(expr.element(), name, restriction)
+def name_trialfunction_gradient(element, restriction, component):
+    rawname = "gradu" + "_".join(str(c) for c in component)
+    name = restricted_name(rawname, restriction)
+    evaluate_trialfunction_gradient(element, name, restriction, component)
     return name
 
 
 @symbol
-def name_trialfunction(expr, restriction):
-    name = restricted_name("u", restriction)
-    evaluate_trialfunction(expr.element(), name, restriction)
+def name_trialfunction(element, restriction, component):
+    rawname = "u" + "_".join(str(c) for c in component)
+    name = restricted_name(rawname, restriction)
+    evaluate_trialfunction(element, name, restriction, component)
     return name
 
 
diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py
index 303e860f800275dc1feb450c35ba629f7263b28f..b993e1e234cb4be159922b65db2c5f89489d7a8e 100644
--- a/python/dune/perftool/pdelab/basis.py
+++ b/python/dune/perftool/pdelab/basis.py
@@ -64,28 +64,69 @@ def name_lfs_bound(lfs):
     return bound
 
 
+@preamble
+def using_indices():
+    return "using namespace Dune::Indices;"
+
+
+@preamble
+def define_lfs(name, father, child):
+    using_indices()
+    return "auto {} = child({}, _{});".format(name, father, child)
+
+
+def lfs_child(lfs, child):
+    return "{}.child({})".format(lfs, child)
+
+
 @generator_factory(cache_key_generator=lambda e, r, **kw: (e, r))
-def name_lfs(element, restriction, prefix=None):
+def name_leaf_lfs(leaf_element, restriction, val=None):
+    """ This function just caches leaf lfs names based on the
+    element. The resulting local function spaces are useful only
+    for size information. OTOH, they are available with just the
+    leaf element available (as seen in basis evaluation).
+    """
+    # This generator function should be prepoluted by the lfs tree traversal,
+    # so val should always be None when we actually want the result
+    assert val
+
+    return val
+
+
+@generator_factory(cache_key_generator=lambda e, r, c, **kw: (e, r, c))
+def name_lfs(element, restriction, component, prefix=None):
     # Omitting the prefix is only valid upon a second call, which will
     # result in a cache hit.
     assert prefix
 
-    # Additionally, element is expected to be a ufl finite element
-    from ufl import FiniteElementBase
-    assert isinstance(element, FiniteElementBase)
+    def _name_lfs(prefix, component):
+        name = prefix
+        if len(component) > 0:
+            name = name + '_' + '_'.join(str(i) for i in component)
+        return name
+
+    name = _name_lfs(prefix, component)
+    if len(component) > 0:
+        father = _name_lfs(prefix, component[:-1])
+        # If this localfunction space is the child of another one, trigger
+        # the extraction preamble. Necessary before going into recursion
+        # for having the correct (top-down) order of preambles
+        define_lfs(name, father, component[-1])
 
     # Recurse into the given element to define all other local function spaces!
     from ufl import MixedElement
-    if isinstance(element, MixedElement):
-        for i, subelem in enumerate(element.sub_elements()):
-            # TODO in this case, we need to trigger the extraction mechanism
-            # as preambles in our code.
-            name_lfs(subelem, restriction, prefix + "_" + str(i))
+    from ufl.functionview import select_subelement
+    from ufl.classes import FixedIndex
+    subel = select_subelement(element, component)
+    if isinstance(subel, MixedElement):
+        for i in range(subel.num_sub_elements()):
+            name_lfs(element, restriction, component + (FixedIndex(i),), prefix=prefix)
 
-    # Now trigger the creation of all those other symbols/preables necessary for this lfs
+    # Cache the name for the subelement
+    name_leaf_lfs(subel, restriction, val=name)
 
     # Now return the prefix!
-    return prefix
+    return name
 
 
 @generator_factory(cache_key_generator=lambda e, **kw: e)
@@ -101,9 +142,10 @@ def type_gfs(element, basetype=None, index_stack=None):
 
     # Recurse into the given element to define all other local function spaces!
     from ufl import MixedElement
+    from ufl.classes import FixedIndex
     if isinstance(element, MixedElement):
         for i, subelem in enumerate(element.sub_elements()):
-            type_gfs(subelem, basetype=basetype, index_stack=index_stack + (i,))
+            type_gfs(subelem, basetype=basetype, index_stack=index_stack + (FixedIndex(i),))
 
     if len(index_stack) == 0:
         return basetype
@@ -126,13 +168,14 @@ def traverse_lfs_tree(arg):
     # Now start recursively extracting local function spaces and fill the cache with
     # all those values. That way we can later get a correct local function space with
     # just the ufl finite element.
-    name_lfs(arg.argexpr.element(), arg.restriction, prefix=lfs_basename)
-    type_gfs(arg.argexpr.element(), basetype=gfs_basename, index_stack=())
+    from ufl.classes import MultiIndex
+    name_lfs(arg.argexpr.ufl_element(), arg.restriction, MultiIndex(()), prefix=lfs_basename)
+    type_gfs(arg.argexpr.ufl_element(), basetype=gfs_basename, index_stack=())
 
 
 @generator_factory(item_tags=("iname",), cache_key_generator=lambda e, r, c: (e, c))
 def _lfs_iname(element, restriction, context):
-    lfs = name_lfs(element, restriction)
+    lfs = name_leaf_lfs(element, restriction)
     bound = name_lfs_bound(lfs)
 
     name = "{}_{}_index".format(lfs, context)
@@ -167,7 +210,7 @@ def lfs_iname(element, restriction, count=None, context=''):
 @preamble
 def declare_cache_temporary(element, restriction, name, which):
     t_cache = type_localbasis_cache(element)
-    lfs = name_lfs(element, restriction)
+    lfs = name_leaf_lfs(element, restriction)
     return "typename {}::{}ReturnType {};".format(t_cache,
                                                   which,
                                                   name,
@@ -175,11 +218,11 @@ def declare_cache_temporary(element, restriction, name, which):
 
 
 @cached
-def evaluate_basis(element, name, restriction):
-    lfs = name_lfs(element, restriction)
+def evaluate_basis(leaf_element, name, restriction):
+    lfs = name_leaf_lfs(leaf_element, restriction)
     temporary_variable(name, shape=(name_lfs_bound(lfs),), decl_method=None)
-    declare_cache_temporary(element, restriction, name, which='Function')
-    cache = name_localbasis_cache(element)
+    declare_cache_temporary(leaf_element, restriction, name, which='Function')
+    cache = name_localbasis_cache(leaf_element)
     qp = name_quadrature_position_in_cell(restriction)
     instruction(inames=(quadrature_iname(),
                         ),
@@ -194,18 +237,20 @@ def evaluate_basis(element, name, restriction):
 
 
 @symbol
-def name_basis(element, restriction):
+def name_basis(leaf_element, restriction):
+    assert leaf_element.num_sub_elements() == 0
+    # TODO name mangling!
     name = restricted_name("phi", restriction)
-    evaluate_basis(element, name, restriction)
+    evaluate_basis(leaf_element, name, restriction)
     return name
 
 
 @cached
-def evaluate_reference_gradient(element, name, restriction):
-    lfs = name_lfs(element, restriction)
+def evaluate_reference_gradient(leaf_element, name, restriction):
+    lfs = name_leaf_lfs(leaf_element, restriction)
     temporary_variable(name, shape=(name_lfs_bound(lfs), name_dimension()), decl_method=None)
-    declare_cache_temporary(element, restriction, name, which='Jacobian')
-    cache = name_localbasis_cache(element)
+    declare_cache_temporary(leaf_element, restriction, name, which='Jacobian')
+    cache = name_localbasis_cache(leaf_element)
     qp = name_quadrature_position_in_cell(restriction)
     instruction(inames=(quadrature_iname(),
                         ),
@@ -220,19 +265,21 @@ def evaluate_reference_gradient(element, name, restriction):
 
 
 @symbol
-def name_reference_gradient(element, restriction):
+def name_reference_gradient(leaf_element, restriction):
+    assert leaf_element.num_sub_elements() == 0
+    # TODO name mangling!
     name = restricted_name("js", restriction)
-    evaluate_reference_gradient(element, name, restriction)
+    evaluate_reference_gradient(leaf_element, name, restriction)
     return name
 
 
 @cached
-def evaluate_basis_gradient(element, name, restriction):
-    lfs = name_lfs(element, restriction)
+def evaluate_basis_gradient(leaf_element, name, restriction):
+    lfs = name_leaf_lfs(leaf_element, restriction)
     temporary_variable(name, shape=(name_lfs_bound(lfs), name_dimension()), shape_impl=('vec', 'fv'))
     jac = name_jacobian_inverse_transposed(restriction)
-    index = lfs_iname(element, restriction, context='transformgrads')
-    reference_gradients = name_reference_gradient(element, restriction)
+    index = lfs_iname(leaf_element, restriction, context='transformgrads')
+    reference_gradients = name_reference_gradient(leaf_element, restriction)
     instruction(inames=(index,
                         quadrature_iname(),
                         ),
@@ -248,18 +295,27 @@ def evaluate_basis_gradient(element, name, restriction):
 
 
 @symbol
-def name_basis_gradient(element, restriction):
+def name_basis_gradient(leaf_element, restriction):
+    assert leaf_element.num_sub_elements() == 0
+    # TODO name mangling!
     name = restricted_name("gradphi", restriction)
-    evaluate_basis_gradient(element, name, restriction)
+    evaluate_basis_gradient(leaf_element, name, restriction)
     return name
 
 
 @cached
-def evaluate_trialfunction(element, name, restriction):
+def evaluate_trialfunction(element, name, restriction, component):
+    from ufl.functionview import select_subelement
+    sub_element = select_subelement(element, component)
+
+    # Right now, no chance of getting velocity field...
+    assert len(sub_element.value_shape()) == 0
+#     element = sub_element
+
     temporary_variable(name, shape=())
-    lfs = name_lfs(element, restriction)
+    lfs = name_lfs(element, restriction, component)
     index = lfs_iname(element, restriction, context='trial')
-    basis = name_basis(element, restriction)
+    basis = name_basis(sub_element, restriction)
     from dune.perftool.pdelab.argument import pymbolic_coefficient
     coeff = pymbolic_coefficient(lfs, index, restriction)
     reduction_expr = Product((coeff, Subscript(Variable(basis), Variable(index))))
@@ -271,17 +327,41 @@ def evaluate_trialfunction(element, name, restriction):
 
 
 @cached
-def evaluate_trialfunction_gradient(element, name, restriction):
-    temporary_variable(name, shape=(name_dimension(),), shape_impl=('fv',))
-    lfs = name_lfs(element, restriction)
-    index = lfs_iname(element, restriction, context='trialgrad')
-    basis = name_basis_gradient(element, restriction)
-    idim = dimension_iname()
+def evaluate_trialfunction_gradient(element, name, restriction, component):
+    # First we determine the rank of the tensor we are talking about
+    from ufl.functionview import select_subelement
+    sub_element = select_subelement(element, component)
+    rank = len(sub_element.value_shape()) + 1
+    assert rank in (1, 2)
+
+    # We do then set some variables accordingly
+    shape = (name_dimension(),) * rank
+    if rank == 1:
+        shape_impl = ('fv',)
+    else:
+        shape_impl = ('fm',)
+    idims = tuple(dimension_iname(count=i) for i in range(rank))
+    leaf_element = sub_element
+    from ufl import VectorElement
+    if isinstance(sub_element, VectorElement):
+        leaf_element = sub_element.sub_elements()[0]
+
+    # and proceed to call the necessary generator functions
+    temporary_variable(name, shape=shape, shape_impl=shape_impl)
+    lfs = name_lfs(element, restriction, component)
+    index = lfs_iname(leaf_element, restriction, context='trialgrad')
+    basis = name_basis_gradient(leaf_element, restriction)
+
+    if isinstance(sub_element, VectorElement):
+        lfs = lfs_child(lfs, idims[0])
+
     from dune.perftool.pdelab.argument import pymbolic_coefficient
     coeff = pymbolic_coefficient(lfs, index, restriction)
-    reduction_expr = Product((coeff, Subscript(Variable(basis), (Variable(index), Variable(idim)))))
+    assignee = Subscript(Variable(name), tuple(Variable(i) for i in idims))
+
+    reduction_expr = Product((coeff, Subscript(Variable(basis), (Variable(index), Variable(idims[-1])))))
     instruction(expression=Reduction("sum", index, reduction_expr, allow_simultaneous=True),
-                assignee=Subscript(Variable(name), Variable(idim)),
-                forced_iname_deps=frozenset({quadrature_iname(), idim}),
+                assignee=assignee,
+                forced_iname_deps=frozenset({quadrature_iname()}).union(frozenset(idims)),
                 forced_iname_deps_is_final=True,
                 )
diff --git a/python/dune/perftool/pymbolic/placeholder.py b/python/dune/perftool/pymbolic/placeholder.py
index c96765b1c42a985316645d0160ea06c781dd3c96..16bb78d9e3f7818f4b52f90b61b5b83c96d1f325 100644
--- a/python/dune/perftool/pymbolic/placeholder.py
+++ b/python/dune/perftool/pymbolic/placeholder.py
@@ -24,7 +24,7 @@ class IndexPlaceholderRemoval(IdentityMapper):
 
     def map_foreign(self, o):
         # How do we map constants here? map_constant was not correct
-        if isinstance(o, int) or isinstance(o, float):
+        if isinstance(o, int) or isinstance(o, float) or isinstance(o, str):
             return o
 
         # There might be tuples of indices where only one is a placeholder,
diff --git a/python/dune/perftool/pymbolic/uflmapper.py b/python/dune/perftool/pymbolic/uflmapper.py
index 19d6f2500e12fea0b32de37f41b277143c2686ad..e9894aaa2a70449dc21bcbdb92a880c19e629df8 100644
--- a/python/dune/perftool/pymbolic/uflmapper.py
+++ b/python/dune/perftool/pymbolic/uflmapper.py
@@ -24,27 +24,14 @@ class UFL2PymbolicMapper(MultiFunction):
         return Product(tuple(self.call(op) for op in get_operands(o)))
 
     def multi_index(self, o):
-        from dune.perftool.pdelab import name_index
-        return tuple(Variable(name_index(op)) for op in o.indices())
+        from dune.perftool.pdelab import pymbolic_index
+        return tuple(pymbolic_index(op) for op in o.indices())
 
     def index(self, o):
         # One might as well take the uflname as string here, but I apply this function
         from dune.perftool.pdelab import name_index
         return Variable(name_index(o))
 
-    def fixed_index(self, o):
-        return o._value
-
-    def indexed(self, o):
-        aggr = self.call(o.ufl_operands[0])
-        ind = self.call(o.ufl_operands[1])
-
-        # If the left operand is already a subscripted pymbolic expression, we need to do tuple addition on the indices
-        if isinstance(aggr, Subscript):
-            return Subscript(aggr.aggregate, aggr.index + ind)
-        else:
-            return Subscript(aggr, ind)
-
     def float_value(self, o):
         return o.value()
 
diff --git a/python/dune/perftool/ufl/execution.py b/python/dune/perftool/ufl/execution.py
index 4f5cf5e28f4a0b4414b98194b38d2798df100b75..7b5d4a2b289cb8cfc72f2f39485b731b849c8c14 100644
--- a/python/dune/perftool/ufl/execution.py
+++ b/python/dune/perftool/ufl/execution.py
@@ -6,7 +6,7 @@ UFL.
 import ufl
 
 from ufl import *
-from ufl.split_functions import split
+
 
 # Monkey patch to the FiniteElementBase
 # We want to subclass finite elements in order to allow meta data
@@ -36,10 +36,17 @@ class Coefficient(ufl.Coefficient):
         ufl.Coefficient.__init__(self, element, count)
 
 
+split = ufl.split_functions.split2
+
+
 def Coefficients(element):
     return split(Coefficient(element))
 
 
+def TestFunctions(element):
+    return split(TestFunction(element))
+
+
 def TrialFunctions(element):
     return split(TrialFunction(element))
 
diff --git a/python/dune/perftool/ufl/modified_terminals.py b/python/dune/perftool/ufl/modified_terminals.py
index 6beed959b0c0cd372114be603a8eeaae2190f122..0d489dcda91148c7c5a3bf184a87106a412b649a 100644
--- a/python/dune/perftool/ufl/modified_terminals.py
+++ b/python/dune/perftool/ufl/modified_terminals.py
@@ -2,6 +2,7 @@
 
 from ufl.algorithms import MultiFunction
 from dune.perftool import Restriction
+from ufl.classes import MultiIndex
 
 
 class ModifiedTerminalTracker(MultiFunction):
@@ -14,6 +15,7 @@ class ModifiedTerminalTracker(MultiFunction):
         self.grad = False
         self.reference_grad = False
         self.restriction = Restriction.NONE
+        self.component = MultiIndex(())
 
     def positive_restricted(self, o):
         assert self.restriction == Restriction.NONE
@@ -43,6 +45,12 @@ class ModifiedTerminalTracker(MultiFunction):
         self.reference_grad = False
         return ret
 
+    def function_view(self, o):
+        self.component = o.ufl_operands[1]
+        ret = self.call(o.ufl_operands[0])
+        self.component = MultiIndex(())
+        return ret
+
 
 class ModifiedArgumentDescriptor(MultiFunction):
     def __init__(self, e):
@@ -52,6 +60,7 @@ class ModifiedArgumentDescriptor(MultiFunction):
         self.reference_grad = False
         self.index = None
         self.restriction = Restriction.NONE
+        self.component = MultiIndex(())
         self.expr = e
 
         self.__call__(e)
@@ -80,6 +89,10 @@ class ModifiedArgumentDescriptor(MultiFunction):
         self.index = o.ufl_operands[1]
         self(o.ufl_operands[0])
 
+    def function_view(self, o):
+        self.component = o.ufl_operands[1]
+        self(o.ufl_operands[0])
+
     def argument(self, o):
         self.argexpr = o
 
@@ -107,25 +120,16 @@ class _ModifiedArgumentExtractor(MultiFunction):
             if ret:
                 self.modified_arguments.add(ret)
 
-    def indexed(self, o):
+    def pass_on(self, o):
         if self.call(o.ufl_operands[0]):
             return o
 
-    def positive_restricted(self, o):
-        if self.call(o.ufl_operands[0]):
-            return o
-
-    def negative_restricted(self, o):
-        if self.call(o.ufl_operands[0]):
-            return o
-
-    def grad(self, o):
-        if self.call(o.ufl_operands[0]):
-            return o
-
-    def reference_grad(self, o):
-        if self.call(o.ufl_operands[0]):
-            return o
+    indexed = pass_on
+    positive_restricted = pass_on
+    negative_restricted = pass_on
+    grad = pass_on
+    reference_grad = pass_on
+    function_view = pass_on
 
     def argument(self, o):
         if self.testfunction:
diff --git a/test/stokes/stokes.ufl b/test/stokes/stokes.ufl
index bf52d7b9895095455e0ee298e12e7543d4f89fb8..7fb10134e96dec3ee25f631d52e61ec531c1de1d 100644
--- a/test/stokes/stokes.ufl
+++ b/test/stokes/stokes.ufl
@@ -3,11 +3,12 @@ P2 = VectorElement("Lagrange", cell, 2)
 P1 = FiniteElement("Lagrange", cell, 1)
 TH = P2 * P1
 
-(v, q) = TestFunctions(TH)
-(u, p) = TrialFunctions(TH)
+v, q = TestFunctions(TH)
+u, p = TrialFunctions(TH)
 
-f = Coefficient(P2)
+#r = inner(grad(v), grad(u))*dx
+r = (inner(grad(v), grad(u)) - div(v)*p + q*div(u))*dx
+#r = q*div(u)*dx
+#r = p*div(v)*dx
 
-form = (inner(grad(v), grad(u)) - div(v)*p + q*div(u))*dx - dot(v, f)*dx
-
-forms = [form]
+forms = [r]