diff --git a/python/dune/perftool/generation/context.py b/python/dune/perftool/generation/context.py
index 89eb7b59c0157956c44898346835fba222369a52..3766971dd6d469778f71ee74cdc7cd210ed99722 100644
--- a/python/dune/perftool/generation/context.py
+++ b/python/dune/perftool/generation/context.py
@@ -29,5 +29,5 @@ def global_context(**kwargs):
     return _GlobalContext(**kwargs)
 
 
-def get_global_context_value(key):
-    return _global_context_cache[key]
+def get_global_context_value(key, default=None):
+    return _global_context_cache.get(key, default)
diff --git a/python/dune/perftool/interactive.py b/python/dune/perftool/interactive.py
index e4ea980470bb67fb3616c101e9e533b90333fcae..8b7fb0cd3be913568b063b6683f8403e91ae515b 100644
--- a/python/dune/perftool/interactive.py
+++ b/python/dune/perftool/interactive.py
@@ -1,11 +1,12 @@
 from __future__ import print_function
 from functools import partial
-from os import system
 
 from dune.perftool.generation import global_context
 from dune.perftool.loopy.transformations import get_loopy_transformations
 from dune.perftool.pdelab.localoperator import assembly_routine_signature, AssemblyMethod
 
+import os
+
 
 # Use the builtin 'input' in python2 and 'raw_input' in python3
 try:
@@ -15,7 +16,7 @@ except:
 
 
 def clear():
-    system('cls' if os.name == 'nt' else 'clear')
+    os.system('cls' if os.name == 'nt' else 'clear')
 
 
 def kernel_name(v):
diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py
index 82516df9832f307c7b9048ae4960d089afd8c6ba..c4de2b6162c47b6a58a8a02368abf4e3ff122217 100644
--- a/python/dune/perftool/sumfact/__init__.py
+++ b/python/dune/perftool/sumfact/__init__.py
@@ -2,12 +2,21 @@ from dune.perftool.sumfact.quadrature import (quadrature_inames,
                                               quadrature_weight,
                                               )
 
-from dune.perftool.sumfact.basis import pymbolic_trialfunction
+from dune.perftool.sumfact.basis import (lfs_inames,
+                                         pymbolic_basis,
+                                         pymbolic_trialfunction,
+                                         )
 
 from dune.perftool.pdelab import PDELabInterface
 
 
 class SumFactInterface(PDELabInterface):
+    def lfs_inames(self, element, restriction, number=None, context=''):
+        return lfs_inames(element, restriction, number, context)
+
+    def pymbolic_basis(self, element, restriction, number):
+        return pymbolic_basis(element, restriction, number)
+
     def pymbolic_trialfunction(self, element, restriction, component):
         return pymbolic_trialfunction(element, restriction, component)
 
diff --git a/python/dune/perftool/sumfact/amatrix.py b/python/dune/perftool/sumfact/amatrix.py
index b6ab9d3da719fa30aca523bca210d8e2c76e241b..119fdea3ab2595382ad113beb5e06487c5a68c6b 100644
--- a/python/dune/perftool/sumfact/amatrix.py
+++ b/python/dune/perftool/sumfact/amatrix.py
@@ -226,13 +226,8 @@ def type_theta():
 
 
 @class_member("operator")
-def define_theta(name, transpose):
+def define_theta(name, shape, transpose):
     theta_type = type_theta()
-    if transpose:
-        shape = (basis_functions_per_direction(), quadrature_points_per_direction())
-    else:
-        shape = (quadrature_points_per_direction(), basis_functions_per_direction())
-    globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
     initializer_list(name, [str(axis) for axis in shape], classtag="operator")
     construct_theta(name, transpose)
     return "{} {};".format(theta_type, name)
@@ -240,11 +235,15 @@ def define_theta(name, transpose):
 
 def name_theta():
     name = "Theta"
-    define_theta(name, False)
+    shape = (quadrature_points_per_direction(), basis_functions_per_direction())
+    globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
+    define_theta(name, shape, False)
     return name
 
 
 def name_theta_transposed():
     name = "ThetaT"
-    define_theta(name, True)
+    shape = (basis_functions_per_direction(), quadrature_points_per_direction())
+    globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
+    define_theta(name, shape, True)
     return name
diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py
index 08e51ed87ac0ced7184cbc7e3ba5c965bd19beb7..d64b2f62dd3400309e337b8828dd8f093f5a54a5 100644
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -5,6 +5,9 @@ multiplication withthe test function is part of the sum factorization kernel.
 """
 from dune.perftool.generation import (backend,
                                       cached,
+                                      get_global_context_value,
+                                      instruction,
+                                      temporary_variable,
                                       )
 from dune.perftool.sumfact.amatrix import (AMatrix,
                                            basis_functions_per_direction,
@@ -13,9 +16,12 @@ from dune.perftool.sumfact.amatrix import (AMatrix,
                                            )
 from dune.perftool.sumfact.sumfact import (setup_theta,
                                            sum_factorization_kernel,
+                                           sumfact_iname,
                                            )
 from dune.perftool.sumfact.quadrature import quadrature_inames
 from dune.perftool.loopy.buffer import initialize_buffer
+from dune.perftool.pdelab.driver import FEM_name_mangling
+from dune.perftool.pdelab.restriction import restricted_name
 
 from pytools import product
 
@@ -42,23 +48,38 @@ def pymbolic_trialfunction(element, restriction, component):
     return p.Subscript(p.Variable(var), tuple(p.Variable(i) for i in quadrature_inames()))
 
 
-def lfs_inames(leaf_element):
-    return ()
+@backend(interface="lfs_inames", name="sumfact")
+def lfs_inames(element, restriction, number=1, context=''):
+    assert number == 1
+    formdata = get_global_context_value('formdata')
+    dim = formdata.geometric_dimension
+    return tuple(sumfact_iname(basis_functions_per_direction(),'lfsdim{}'.format(d)) for d in range(dim))
 
 
 @backend(interface="evaluate_basis")
 @cached
-def evaluate_basis(leaf_element, name, restriction):
+def evaluate_basis(element, name, restriction):
     temporary_variable(name, shape=())
     theta = name_theta()
     quad_inames = quadrature_inames()
-    lfs_inames = lfs_inames()
-    assert(len(quad_inames) == len(lfs_inames))
+    inames = lfs_inames(element, restriction)
+    assert(len(quad_inames) == len(inames))
 
     instruction(expression=p.Product(tuple(p.Subscript(p.Variable(theta), (p.Variable(i), p.Variable(j)))
-                                           for (i,j) in zip(quad_inames, lfs_inames))
+                                           for (i,j) in zip(quad_inames, inames))
                                      ),
                 assignee=p.Variable(name),
-                forced_iname_deps=frozenset(quad_inames + lfs_inames),
+                forced_iname_deps=frozenset(quad_inames + inames),
                 forced_iname_deps_is_final=True,
                 )
+
+
+def pymbolic_basis(element, restriction, number):
+    assert number == 1
+    assert element.num_sub_elements() == 0
+
+    name = "phi_{}".format(FEM_name_mangling(element))
+    name = restricted_name(name, restriction)
+    evaluate_basis(element, name, restriction)
+
+    return p.Variable(name)
\ No newline at end of file
diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index 1186d4153e0797f290ab30f6dc6968cbbe629b39..3d52a87915999f32fb8033c1f975cc7097c34dc2 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -1,11 +1,11 @@
-from dune.perftool.pdelab.argument import (name_coefficientcontainer,
+from dune.perftool.pdelab.argument import (name_accumulation_variable,
+                                           name_coefficientcontainer,
                                            pymbolic_coefficient,
                                            )
 from dune.perftool.generation import (backend,
                                       domain,
                                       function_mangler,
                                       get_counter,
-                                      get_global_context_value,
                                       globalarg,
                                       iname,
                                       instruction,
@@ -88,7 +88,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
     temporary_variable(name, shape=(quadrature_points_per_direction(),) * dim, managed=True)
     instruction(assignee=Subscript(Variable(name), tuple(Variable(i) for i in quadrature_inames())),
                 expression=pymbolic_expr,
-                forced_iname_deps=frozenset(quadrature_inames()),
+                forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
                 forced_iname_deps_is_final=True,
                 depends_on=frozenset({stage_insn(1)}),
                 )
@@ -105,7 +105,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                       num=2
                       )
 
-    result = sum_factorization_kernel(a_matrices, "reffub", 2)
+    result = sum_factorization_kernel(a_matrices, "reffub", 2, additional_inames=visitor.inames)
 
     # Now write all this into the correct residual
     lfs = name_lfs(accterm.argument.argexpr.ufl_element(),
@@ -113,21 +113,22 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                    accterm.argument.component,
                    )
     inames = tuple(sumfact_iname(mat.rows, 'accum') for mat in a_matrices)
-    globalarg("r",
+    accum = name_accumulation_variable()
+    globalarg(accum,
               shape=(basis_functions_per_direction(),) * dim,
               argtype=AccumulationArg,
               lfs=lfs,
               )
 
     instruction(expression=Subscript(Variable(result), tuple(Variable(i) for i in inames)),
-                assignee=Subscript(Variable('r'), tuple(Variable(i) for i in inames)),
-                forced_iname_deps=frozenset(inames),
+                assignee=Subscript(Variable(accum), tuple(Variable(i) for i in inames)),
+                forced_iname_deps=frozenset(inames + visitor.inames),
                 forced_iname_deps_is_final=True,
                 depends_on=frozenset({stage_insn(3)}),
                 )
 
 
-def sum_factorization_kernel(a_matrices, buffer, stage, insn_dep=frozenset({})):
+def sum_factorization_kernel(a_matrices, buffer, stage, insn_dep=frozenset({}), additional_inames=frozenset({})):
     """
     Calculate a sum factorization matrix product.
 
@@ -183,7 +184,7 @@ def sum_factorization_kernel(a_matrices, buffer, stage, insn_dep=frozenset({})):
         # at the same time store the instruction ID for the next instruction to depend on
         insn_dep = frozenset({instruction(assignee=Subscript(Variable(out), (Variable(i), Variable(j))),
                                           expression=Reduction("sum", k, prod),
-                                          forced_iname_deps=frozenset({i, j}),
+                                          forced_iname_deps=frozenset({i, j}).union(additional_inames),
                                           forced_iname_deps_is_final=True,
                                           depends_on=insn_dep.union(frozenset({stage_insn(stage)})),
                                           )