diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py
index 7acc9a0eb4a4ea3ab7203e04608e70a6fd5bb99a..8afffba8a65d09ab4dd78ccb87f8e896738c0f47 100644
--- a/python/dune/perftool/generation/loopy.py
+++ b/python/dune/perftool/generation/loopy.py
@@ -46,90 +46,14 @@ def domain(iname, shape):
     return "{{ [{0}] : 0<={0}<{1} }}".format(iname, shape)
 
 
-def _temporary_type(shape_impl, shape, first=True):
-    if len(shape_impl) == 0:
-        return 'double'
-    if shape_impl[0] == 'arr':
-        if not first or len(set(shape_impl)) != 1:
-            raise ValueError("We do not allow mixing of C++ containers and plain C arrays, for reasons of mental sanity")
-        return 'double'
-    if shape_impl[0] == 'vec':
-        return "std::vector<{}>".format(_temporary_type(shape_impl[1:], shape[1:], first=False))
-    if shape_impl[0] == 'fv':
-        return "Dune::FieldVector<{}, {}>".format(_temporary_type(shape_impl[1:], shape[1:], first=False), shape[0])
-    if shape_impl[0] == 'fm':
-        # For now, no field matrices of weird stuff...
-        assert len(shape) == 2
-        return "Dune::FieldMatrix<double, {}, {}>".format(shape[0], shape[1])
-
-
-@preamble
-def default_declaration(name, shape=(), shape_impl=()):
-    # Determine the C++ type to use for this temporary.
-    t = _temporary_type(shape_impl, shape)
-    if len(shape_impl) == 0:
-        # This is a scalar, just return it!
-        return '{} {}(0.0);'.format(t, name)
-    if shape_impl[0] == 'arr':
-        return '{} {}{};'.format(t, name, ''.join('[{}]'.format(s) for s in shape))
-    if shape_impl[0] == 'vec':
-        return '{} {}({});'.format(t, name, shape[0])
-    if shape_impl[0] == 'fv':
-        return '{} {}(0.0);'.format(t, name)
-    if shape_impl[0] == 'fm':
-        assert len(shape) == 2
-        return '{} {}(0.0);'.format(t, name)
-
-
-def declaration_with_base_storage(base_storage, storage_shape):
-    # We currently consider the base storage to be linear
-    assert(len(storage_shape) == 1)
-
-    @preamble
-    def base_storage_decl(name, shape=(), shape_impl=()):
-        default_declaration(base_storage, shape=storage_shape, shape_impl=('arr',))
-        return 'double* {} = {};'.format(name, base_storage)
-
-    return base_storage_decl
-
-
 def get_temporary_name():
     return 'expr_{}'.format(str(get_counter('__temporary').zfill(4)))
 
 
-class UnmanagedTemporaryVariable(loopy.TemporaryVariable):
-    """
-    A TemporaryVariable class that does not have knowledge of data layout.
-    Is used when the data layout is under control of the underlying C++ type.
-    Indexed access will result in multiple operator[]s. Only known to
-    the DuneTarget.
-    """
-    pass
-
-
-@generator_factory(item_tags=("temporary",), cache_key_generator=no_caching)
-def temporary_variable(name, managed=False, **kwargs):
-    if 'dtype' not in kwargs:
-        kwargs['dtype'] = numpy.float64
-
-    # Check if a specified base_storage has already been initialized
-    if kwargs.get('base_storage', None):
-        assert(kwargs.get('decl_method', None) is None)
-        assert('storage_shape' in kwargs)
-        decl_method = declaration_with_base_storage(kwargs['base_storage'], kwargs['storage_shape'])
-    else:
-        decl_method = kwargs.pop('decl_method', default_declaration)
-
-    shape = kwargs.get('shape', ())
-    shape_impl = kwargs.pop('shape_impl', ('arr',) * len(shape))
-
-    if decl_method is not None:
-        decl_method(name, shape, shape_impl)
-
-    if managed:
-        return loopy.TemporaryVariable(name, **kwargs)
-    else:
-        return UnmanagedTemporaryVariable(name, **kwargs)
+@generator_factory(item_tags=("temporary",), cache_key_generator=lambda n, **kw: n)
+def temporary_variable(name, **kwargs):
+    from dune.perftool.loopy.temporary import DuneTemporaryVariable
+    return DuneTemporaryVariable(name, **kwargs)
 
 
 # Now define generators for instructions. To ease dependency handling of instructions
diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py
index b1c738b359885a1c8e0434b7420799e4959c6770..8854688164e7805d1de90df31aa157b85c0efca6 100644
--- a/python/dune/perftool/loopy/target.py
+++ b/python/dune/perftool/loopy/target.py
@@ -1,3 +1,5 @@
+from dune.perftool.loopy.temporary import DuneTemporaryVariable
+
 from loopy.target import (TargetBase,
                           ASTBuilderBase,
                           DummyHostASTBuilder,
@@ -14,8 +16,8 @@ _registry = {'float32': 'float',
 
 class MyMapper(ExpressionToCExpressionMapper):
     def map_subscript(self, expr, type_context):
-        from dune.perftool.generation.loopy import UnmanagedTemporaryVariable
-        if isinstance(self.find_array(expr), UnmanagedTemporaryVariable):
+        temporary = self.find_array(expr)
+        if isinstance(temporary, DuneTemporaryVariable) and not temporary.managed:
             # If there is but one index, we do not need to handle this
             from pymbolic.primitives import Subscript, Variable
             if isinstance(expr.index, (Variable, int)):
@@ -34,11 +36,15 @@ class DuneASTBuilder(CASTBuilder):
     def get_expression_to_c_expression_mapper(self, codegen_state):
         return MyMapper(codegen_state)
 
-    def get_temporary_decls(self, codegen_state, schedule_index):
-        # Currently we do *not* want to build the temporary declarations
-        # through loopy, as this would involve lots of work on a proper
-        # type system!
-        return []
+    def get_temporary_decl(self, knl, schedule_index, temp_var, decl_info):
+        # If this is not a DuneTemporaryVariable, it was introduced by loopy
+        # and it should be totally under loopys control: Call the base class implementation!
+        if not isinstance(temp_var, DuneTemporaryVariable) or temp_var.managed:
+            return CASTBuilder.get_temporary_decl(self, knl, schedule_index, temp_var, decl_info)
+
+        from cgen import Line
+        if temp_var.decl_method:
+            return Line(temp_var.decl_method(temp_var.name, temp_var.shape, temp_var.shape_impl))
 
 
 class DuneTarget(TargetBase):
diff --git a/python/dune/perftool/loopy/temporary.py b/python/dune/perftool/loopy/temporary.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8d614dd080191722d9a61b4379371e2c0019827
--- /dev/null
+++ b/python/dune/perftool/loopy/temporary.py
@@ -0,0 +1,56 @@
+"""
+Class for temporary variables that allows us to use very non-standard types.
+"""
+
+from loopy import TemporaryVariable
+
+import numpy
+
+
+def _temporary_type(shape_impl, shape, first=True):
+    if len(shape_impl) == 0:
+        return 'double'
+    if shape_impl[0] == 'arr':
+        if not first or len(set(shape_impl)) != 1:
+            raise ValueError("We do not allow mixing of C++ containers and plain C arrays, for reasons of mental sanity")
+        return 'double'
+    if shape_impl[0] == 'vec':
+        return "std::vector<{}>".format(_temporary_type(shape_impl[1:], shape[1:], first=False))
+    if shape_impl[0] == 'fv':
+        return "Dune::FieldVector<{}, {}>".format(_temporary_type(shape_impl[1:], shape[1:], first=False), shape[0])
+    if shape_impl[0] == 'fm':
+        # For now, no field matrices of weird stuff...
+        assert len(shape) == 2
+        return "Dune::FieldMatrix<double, {}, {}>".format(shape[0], shape[1])
+
+
+def default_declaration(name, shape=(), shape_impl=()):
+    # Determine the C++ type to use for this temporary.
+    t = _temporary_type(shape_impl, shape)
+    if len(shape_impl) == 0:
+        # This is a scalar, just return it!
+        return '{} {}(0.0);'.format(t, name)
+    if shape_impl[0] == 'arr':
+        return '{} {}{};'.format(t, name, ''.join('[{}]'.format(s) for s in shape))
+    if shape_impl[0] == 'vec':
+        return '{} {}({});'.format(t, name, shape[0])
+    if shape_impl[0] == 'fv':
+        return '{} {}(0.0);'.format(t, name)
+    if shape_impl[0] == 'fm':
+        assert len(shape) == 2
+        return '{} {}(0.0);'.format(t, name)
+
+
+class DuneTemporaryVariable(TemporaryVariable):
+
+    allowed_extra_kwargs = TemporaryVariable.allowed_extra_kwargs + ["managed", "shape_impl", "decl_method"]
+
+    def __init__(self, name, managed=False, shape_impl=None, decl_method=default_declaration, **kwargs):
+        self.managed = managed
+        self.decl_method = decl_method
+        self.shape_impl = shape_impl
+        if shape_impl is None:
+            self.shape_impl = ("arr",) * len(kwargs.get('shape', ()))
+        kwargs.setdefault('dtype', numpy.float64)
+
+        TemporaryVariable.__init__(self, name, managed=self.managed, shape_impl=self.shape_impl, decl_method=self.decl_method, **kwargs)
diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py
index 370efeecbd64001d94cfa7ff56437b81dce488ee..2ac5432e61419d95d75e624952cac09cc107bdec 100644
--- a/python/dune/perftool/pdelab/__init__.py
+++ b/python/dune/perftool/pdelab/__init__.py
@@ -22,8 +22,8 @@ from dune.perftool.pdelab.index import (name_index,
 from dune.perftool.pdelab.parameter import (cell_parameter_function,
                                             intersection_parameter_function,
                                             )
-from dune.perftool.pdelab.quadrature import (name_quadrature_weights,
-                                             quadrature_iname,
+from dune.perftool.pdelab.quadrature import (pymbolic_quadrature_weight,
+                                             quadrature_inames,
                                              )
 from dune.perftool.pdelab.spaces import (lfs_iname,
                                          )
@@ -103,8 +103,9 @@ class PDELabInterface(object):
     # Quadrature related generator functions
     #
 
-    def quadrature_iname(self):
-        return quadrature_iname()
+    def pymbolic_quadrature_weight(self):
+        return pymbolic_quadrature_weight()
 
-    def name_quadrature_weights(self):
-        return name_quadrature_weights()
+    # TODO Should this be part of interface or not?
+    def quadrature_inames(self):
+        return quadrature_inames()
diff --git a/python/dune/perftool/pdelab/argument.py b/python/dune/perftool/pdelab/argument.py
index afcb0e956c1597d526251dcf12efdc86def82d8f..5b72c9b95e54e5796b86c324d6b4c257a15afb06 100644
--- a/python/dune/perftool/pdelab/argument.py
+++ b/python/dune/perftool/pdelab/argument.py
@@ -169,11 +169,22 @@ def type_residual():
     return "R"
 
 
-def name_accumulation_variable(restrictions=(Restriction.NONE,)):
+def name_accumulation_variable(restrictions=None):
     ft = get_global_context_value("form_type")
+    measure = get_global_context_value("integral_type")
     if ft == 'residual' or ft == 'jacobian_apply':
+        if restrictions is None:
+            if measure == "cell":
+                restrictions = (Restriction.NONE,)
+            else:
+                restrictions = (Restriction.OUTSIDE,)
         return name_residual(*restrictions)
     if ft == 'jacobian':
+        if restrictions is None:
+            if measure == "cell":
+                restrictions = (Restriction.NONE, Restriction.NONE)
+            else:
+                restrictions = (Restriction.OUTSIDE, Restriction.OUTSIDE)
         return name_jacobian(*restrictions)
     assert False
 
diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py
index ad587091096fff8a51debf2002332da2412890bf..17d967b1c2ee36e732fe790fba7ebe92f3ec8624 100644
--- a/python/dune/perftool/pdelab/basis.py
+++ b/python/dune/perftool/pdelab/basis.py
@@ -1,9 +1,9 @@
 """ Generators for basis evaluations """
 
-from dune.perftool.generation import (backend,
-                                      cached,
+from dune.perftool.generation import (cached,
                                       class_member,
                                       generator_factory,
+                                      get_backend,
                                       include_file,
                                       instruction,
                                       preamble,
@@ -50,25 +50,25 @@ def name_localbasis_cache(element):
     return name
 
 
-@preamble
-def declare_cache_temporary(element, restriction, name, which):
+def declare_cache_temporary(element, restriction, which):
     t_cache = type_localbasis_cache(element)
     lfs = name_leaf_lfs(element, restriction)
-    return "typename {}::{}ReturnType {};".format(t_cache,
-                                                  which,
-                                                  name,
-                                                  )
+
+    def decl(name, shape, shape_impl):
+        return "typename {}::{}ReturnType {};".format(t_cache,
+                                                      which,
+                                                      name,
+                                                      )
+    return decl
 
 
 @cached
 def evaluate_basis(leaf_element, name, restriction):
     lfs = name_leaf_lfs(leaf_element, restriction)
-    temporary_variable(name, shape=(name_lfs_bound(lfs),), decl_method=None)
-    declare_cache_temporary(leaf_element, restriction, name, which='Function')
+    temporary_variable(name, shape=(name_lfs_bound(lfs),), decl_method=declare_cache_temporary(leaf_element, restriction, 'Function'))
     cache = name_localbasis_cache(leaf_element)
-    qp = name_quadrature_position_in_cell(restriction)
-    instruction(inames=(quadrature_iname(),
-                        ),
+    qp = get_backend("qp_in_cell")(restriction)
+    instruction(inames=get_backend("quad_inames")(),
                 code='{} = {}.evaluateFunction({}, {}.finiteElement().localBasis());'.format(name,
                                                                                              cache,
                                                                                              qp,
@@ -91,12 +91,10 @@ def name_basis(leaf_element, restriction):
 @cached
 def evaluate_reference_gradient(leaf_element, name, restriction):
     lfs = name_leaf_lfs(leaf_element, restriction)
-    temporary_variable(name, shape=(name_lfs_bound(lfs), 1, name_dimension()), decl_method=None)
-    declare_cache_temporary(leaf_element, restriction, name, which='Jacobian')
+    temporary_variable(name, shape=(name_lfs_bound(lfs), 1, name_dimension()), decl_method=declare_cache_temporary(leaf_element, restriction, 'Jacobian'))
     cache = name_localbasis_cache(leaf_element)
     qp = name_quadrature_position_in_cell(restriction)
-    instruction(inames=(quadrature_iname(),
-                        ),
+    instruction(inames=get_backend("quad_inames")(),
                 code='{} = {}.evaluateJacobian({}, {}.finiteElement().localBasis());'.format(name,
                                                                                              cache,
                                                                                              qp,
@@ -124,7 +122,6 @@ def shape_as_pymbolic(shape):
     return tuple(_shape_as_pymbolic(s) for s in shape)
 
 
-@backend(interface="eval_coefficient")
 @cached
 def evaluate_coefficient(element, name, container, restriction, component):
     from ufl.functionview import select_subelement
@@ -156,7 +153,7 @@ def evaluate_coefficient(element, name, container, restriction, component):
     reduction_expr = Product((coeff, Subscript(Variable(basis), Variable(index))))
     instruction(expression=Reduction("sum", index, reduction_expr, allow_simultaneous=True),
                 assignee=assignee,
-                forced_iname_deps=frozenset({quadrature_iname()}).union(frozenset(idims)),
+                forced_iname_deps=frozenset(get_backend("quad_inames")()).union(frozenset(idims)),
                 forced_iname_deps_is_final=True,
                 )
 
@@ -195,6 +192,6 @@ def evaluate_coefficient_gradient(element, name, container, restriction, compone
 
     instruction(expression=Reduction("sum", index, reduction_expr, allow_simultaneous=True),
                 assignee=assignee,
-                forced_iname_deps=frozenset({quadrature_iname()}).union(frozenset(idims)),
+                forced_iname_deps=frozenset(get_backend("quad_inames")()).union(frozenset(idims)),
                 forced_iname_deps_is_final=True,
                 )
diff --git a/python/dune/perftool/pdelab/geometry.py b/python/dune/perftool/pdelab/geometry.py
index 74a699d06a1c0d0a816bc4d83ab39bf0d5446bf5..f9782c8fb7aa08eaf2c24001e5d42b35e7da8d4b 100644
--- a/python/dune/perftool/pdelab/geometry.py
+++ b/python/dune/perftool/pdelab/geometry.py
@@ -2,6 +2,7 @@ from dune.perftool.ufl.modified_terminals import Restriction
 from dune.perftool.pdelab.restriction import restricted_name
 from dune.perftool.generation import (cached,
                                       domain,
+                                      get_backend,
                                       get_global_context_value,
                                       iname,
                                       include_file,
@@ -281,7 +282,7 @@ def define_jacobian_inverse_transposed(name, restriction):
     dim = name_dimension()
     temporary_variable(name, decl_method=define_jacobian_inverse_transposed_temporary(restriction), shape=(dim, dim))
     geo = name_cell_geometry(restriction)
-    pos = name_quadrature_position_in_cell(restriction)
+    pos = get_backend("qp_in_cell")(restriction)
     return quadrature_preamble("{} = {}.jacobianInverseTransposed({});".format(name,
                                                                                geo,
                                                                                pos,
@@ -300,7 +301,7 @@ def name_jacobian_inverse_transposed(restriction):
 def define_jacobian_determinant(name):
     temporary_variable(name, shape=())
     geo = name_geometry()
-    pos = name_quadrature_position()
+    pos = get_backend("quad_pos")()
     code = "{} = {}.integrationElement({});".format(name,
                                                     geo,
                                                     pos,
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 156721f4c41eb0ec61b027d4dc33518ec9a47666..48998b88d7ba14d14e70f62cba9436f0d670eeb1 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -403,13 +403,13 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                 (ansatz_lfs.get_args() + test_lfs.get_args() + (pymbolic_expr,))
                 )
 
-    from dune.perftool.generation import get_backend, instruction
+    from dune.perftool.generation import instruction
     from dune.perftool.options import option_switch
-    quad_inames = get_backend(interface="quadinames", selector=option_switch("sumfac"))()
+    quad_inames = visitor.interface.quadrature_inames()
 
     instruction(assignees=(),
                 expression=expr,
-                forced_iname_deps=additional_inames.union(frozenset(visitor.inames).union(quad_inames)),
+                forced_iname_deps=additional_inames.union(frozenset(visitor.inames).union(frozenset(quad_inames))),
                 forced_iname_deps_is_final=True,
                 predicates=predicates
                 )
@@ -459,8 +459,12 @@ def generate_kernel(integrals):
                     indexmap[j] = indexmap[i]
 
             # Get a transformer instance for this kernel
-            from dune.perftool.pdelab import PDELabInterface
-            interface = PDELabInterface()
+            if get_option('sumfact'):
+                from dune.perftool.sumfact import SumFactInterface
+                interface = SumFactInterface()
+            else:
+                from dune.perftool.pdelab import PDELabInterface
+                interface = PDELabInterface()
             from dune.perftool.ufl.visitor import UFL2LoopyVisitor
             visitor = UFL2LoopyVisitor(interface, measure, indexmap)
             generate_accumulation_instruction(visitor, term, measure, subdomain_id)
@@ -504,12 +508,6 @@ def generate_kernel(integrals):
     from dune.perftool.loopy.stages import finalize_stage_instructions
     kernel = finalize_stage_instructions(kernel)
 
-    # Loopy might have introduced some temporary variables during preprocessing. As I want to have my own
-    # temporary declaration code right now, I call the declaration preamble manually.
-    for added_tv in set(kernel.temporary_variables.keys()) - set(temporaries.keys()):
-        from dune.perftool.generation.loopy import default_declaration
-        default_declaration(added_tv)
-
     # Now add the preambles to the kernel
     preambles = [(i, p) for i, p in enumerate(retrieve_cache_items("preamble"))]
     kernel = kernel.copy(preambles=preambles)
diff --git a/python/dune/perftool/pdelab/parameter.py b/python/dune/perftool/pdelab/parameter.py
index 1c716dc2e976dad05a3bd2d7b94b0feb7fdb9944..466f382ae953f648394617b39e508f1b49834123 100644
--- a/python/dune/perftool/pdelab/parameter.py
+++ b/python/dune/perftool/pdelab/parameter.py
@@ -10,6 +10,7 @@ from dune.perftool.generation import (cached,
                                       temporary_variable
                                       )
 from dune.perftool.pdelab.geometry import (name_cell,
+                                           name_dimension,
                                            name_intersection,
                                            )
 from dune.perftool.pdelab.quadrature import (name_quadrature_position,
@@ -209,8 +210,6 @@ def cell_parameter_function(name, expr, restriction, cellwise_constant, t='doubl
     shape_impl = ('fv',) * len(shape)
     define_parameter_function_class_member(name, expr, t, shape, True)
     if cellwise_constant:
-        from dune.perftool.generation.loopy import default_declaration
-        default_declaration(name, shape, shape_impl)
         evaluate_cellwise_constant_parameter_function(name, restriction)
     else:
         temporary_variable(name, shape=shape, shape_impl=shape_impl)
@@ -223,8 +222,6 @@ def intersection_parameter_function(name, expr, cellwise_constant, t='double'):
     shape_impl = ('fv',) * len(shape)
     define_parameter_function_class_member(name, expr, t, shape, False)
     if cellwise_constant:
-        from dune.perftool.generation.loopy import default_declaration
-        default_declaration(name, shape, shape_impl)
         evaluate_intersectionwise_constant_parameter_function(name)
     else:
         temporary_variable(name, shape=shape, shape_impl=shape_impl)
diff --git a/python/dune/perftool/pdelab/quadrature.py b/python/dune/perftool/pdelab/quadrature.py
index 23ab380ed913137eabe2f7550456c4ff51e09e16..918a0620d82e7fdcd0a1d3910823390e3b24a2f5 100644
--- a/python/dune/perftool/pdelab/quadrature.py
+++ b/python/dune/perftool/pdelab/quadrature.py
@@ -4,6 +4,7 @@ from dune.perftool.generation import (backend,
                                       cached,
                                       class_member,
                                       domain,
+                                      get_backend,
                                       get_global_context_value,
                                       globalarg,
                                       iname,
@@ -17,6 +18,8 @@ from dune.perftool.pdelab.localoperator import lop_template_range_field
 from dune.perftool.options import get_option
 from dune.perftool.ufl.modified_terminals import Restriction
 
+from pymbolic.primitives import Variable, Subscript
+
 
 @preamble
 def define_quadrature_rule(name):
@@ -55,13 +58,13 @@ def quadrature_iname():
     return "q"
 
 
-@backend(interface="quadinames")
+@backend(interface="quad_inames")
 def quadrature_inames():
-    return frozenset({quadrature_iname()})
+    return (quadrature_iname(),)
 
 
 def quadrature_preamble(code, **kw):
-    return instruction(inames=quadrature_iname(), code=code, **kw)
+    return instruction(inames=get_backend(interface="quad_inames")(), code=code, **kw)
 
 
 def name_quadrature_point():
@@ -119,6 +122,7 @@ def name_quadrature_points():
     return name
 
 
+@backend(interface="quad_pos")
 def pymbolic_quadrature_position():
     quad_points = name_quadrature_points()
     quad_iname = quadrature_iname()
@@ -130,6 +134,7 @@ def name_quadrature_position():
     return str(pymbolic_quadrature_position())
 
 
+@backend(interface="qp_in_cell")
 def name_quadrature_position_in_cell(restriction):
     if restriction == Restriction.NONE:
         return name_quadrature_position()
@@ -153,6 +158,10 @@ def typedef_quadrature_weights(name):
     dim = _local_dim()
     return "using {} = typename Dune::QuadraturePoint<{}, {}>::Field;".format(name, range_field, dim)
 
+def pymbolic_quadrature_weight():
+    vec = name_quadrature_weights()
+    return Subscript(Variable(vec),
+                     tuple(Variable(i) for i in quadrature_inames()))
 
 def type_quadrature_weights(name):
     name = name.upper()
@@ -182,7 +191,6 @@ def name_quadrature_weights():
 
 def _estimate_quadrature_order():
     """Estimate quadrature order using polynomial degree estimation from UFL"""
-
     # According to UFL documentation estimate_total_polynomial_degree
     # should only be called on preprocessed forms.
     form = get_global_context_value("formdata").preprocessed_form
diff --git a/python/dune/perftool/pdelab/spaces.py b/python/dune/perftool/pdelab/spaces.py
index 07403a8641ba8701ccd3a89a8c9c619ef8167df9..0e7b0ce29818b72082a55ba795844d2ebf18747f 100644
--- a/python/dune/perftool/pdelab/spaces.py
+++ b/python/dune/perftool/pdelab/spaces.py
@@ -212,6 +212,24 @@ def lfs_iname(element, restriction, count=None, context=''):
     return _lfs_iname(element, restriction, context)
 
 
+class LFSLocalIndex(FunctionIdentifier):
+    def __init__(self, lfs):
+        self.lfs = lfs
+
+    def __getinitargs__(self):
+        return (self.lfs,)
+
+    @property
+    def name(self):
+        return '{}.local_index'.format(self.lfs)
+
+
+@function_mangler
+def lfs_localindex_mangler(target, func, dtypes):
+    if isinstance(func, LFSLocalIndex):
+        return CallMangleInfo(func.name, (NumpyType(numpy.int32),), (NumpyType(numpy.int32),))
+
+
 def name_testfunctionspace(restriction):
     return restricted_name("lfsv", restriction)
 
diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py
index 3f081643c4230fa3168db58beef408640e2a5290..e263c742444e51ccbb301eade94ffcd60472756d 100644
--- a/python/dune/perftool/sumfact/__init__.py
+++ b/python/dune/perftool/sumfact/__init__.py
@@ -1,6 +1,18 @@
-# Trigger some imports that are needed to have all backend implementations visible
-# to the selection mechanisms
-import dune.perftool.sumfact.amatrix
-import dune.perftool.sumfact.sumfact
+from dune.perftool.sumfact.quadrature import (quadrature_inames,
+                                              quadrature_weight,
+                                              )
 
-from dune.perftool.sumfact.sumfact import start_sumfactorization
+from dune.perftool.sumfact.sumfact import pymbolic_trialfunction
+
+from dune.perftool.pdelab import PDELabInterface
+
+
+class SumFactInterface(PDELabInterface):
+    def pymbolic_trialfunction(self, element, restriction, component):
+        return pymbolic_trialfunction(element, restriction, component)
+
+    def quadrature_inames(self):
+        return quadrature_inames()
+
+    def pymbolic_quadrature_weight(self):
+        return quadrature_weight()
diff --git a/python/dune/perftool/sumfact/amatrix.py b/python/dune/perftool/sumfact/amatrix.py
index cf9f33c41b63c2fd5c3929ba87d77ea1c1e09007..8dfc180cdb10d6efac8d6089335487ea847f25b7 100644
--- a/python/dune/perftool/sumfact/amatrix.py
+++ b/python/dune/perftool/sumfact/amatrix.py
@@ -125,6 +125,7 @@ def define_oned_quadrature_weights(name):
 
 def name_oned_quadrature_weights():
     name = "qw"
+    globalarg(name, shape=(quadrature_points_per_direction(),), dtype=NumpyType(numpy.float64))
     define_oned_quadrature_weights(name)
     return name
 
@@ -138,6 +139,7 @@ def define_oned_quadrature_points(name):
 
 def name_oned_quadrature_points():
     name = "qp"
+    globalarg(name, shape=(quadrature_points_per_direction(),), dtype=NumpyType(numpy.float64))
     define_oned_quadrature_points(name)
     return name
 
diff --git a/python/dune/perftool/sumfact/quadrature.py b/python/dune/perftool/sumfact/quadrature.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea7104a4c8316f7aff23fd3f4a7f9f6dbdc040e6
--- /dev/null
+++ b/python/dune/perftool/sumfact/quadrature.py
@@ -0,0 +1,114 @@
+from dune.perftool.generation import (backend,
+                                      domain,
+                                      function_mangler,
+                                      get_global_context_value,
+                                      iname,
+                                      instruction,
+                                      temporary_variable,
+                                      )
+
+from dune.perftool.sumfact.amatrix import (name_number_of_basis_functions_per_direction,
+                                           name_oned_quadrature_points,
+                                           name_oned_quadrature_weights,
+                                           )
+from dune.perftool.pdelab.argument import name_accumulation_variable
+from dune.perftool.pdelab.geometry import dimension_iname
+
+from loopy import CallMangleInfo
+from loopy.symbolic import FunctionIdentifier
+from loopy.types import NumpyType
+
+from pymbolic.primitives import (Call,
+                                 Product,
+                                 Subscript,
+                                 Variable,
+                                 )
+
+import numpy
+
+
+class BaseWeight(FunctionIdentifier):
+    def __init__(self, accumobj):
+        self.accumobj = accumobj
+
+    def __getinitargs__(self):
+        return (self.accumobj,)
+
+    @property
+    def name(self):
+        return '{}.weight'.format(self.accumobj)
+
+
+@function_mangler
+def base_weight_function_mangler(target, func, dtypes):
+    if isinstance(func, BaseWeight):
+        return CallMangleInfo(func.name, (NumpyType(numpy.float64),), ())
+
+
+@iname
+def sumfact_quad_iname(d, context):
+    name = "quad_{}_{}".format(context, d)
+    domain(name, name_number_of_basis_functions_per_direction())
+    return name
+
+
+@backend(interface="quad_inames", name="sumfact")
+def quadrature_inames(context=''):
+    formdata = get_global_context_value('formdata')
+    dim = formdata.geometric_dimension
+    return tuple(sumfact_quad_iname(d, context) for d in range(dim))
+
+
+def define_recursive_quadrature_weight(name, dir):
+    iname = quadrature_inames()[dir]
+    temporary_variable(name, shape=(), shape_impl=())
+    instruction(expression=Product((recursive_quadrature_weight(dir + 1),
+                                    Subscript(Variable(name_oned_quadrature_weights()),
+                                              (Variable(iname),)
+                                              ))
+                                   ),
+                assignee=Variable(name),
+                forced_iname_deps=frozenset(quadrature_inames()[dir:]),
+                forced_iname_deps_is_final=True,
+                )
+
+
+def recursive_quadrature_weight(dir=0):
+    formdata = get_global_context_value('formdata')
+    dim = formdata.geometric_dimension
+    if dir == dim:
+        return Call(BaseWeight(name_accumulation_variable()), ())
+    else:
+        name = 'weight_{}'.format(dir)
+        define_recursive_quadrature_weight(name, dir)
+        return Variable(name)
+
+
+def quadrature_weight():
+    return recursive_quadrature_weight()
+
+
+def define_quadrature_position(name):
+    formdata = get_global_context_value('formdata')
+    dim = formdata.geometric_dimension
+    for i in range(dim):
+        instruction(expression=Subscript(Variable(name_oned_quadrature_points()), (Variable(quadrature_inames()[i]),)),
+                    assignee=Subscript(Variable(name), (i,)),
+                    forced_iname_deps=frozenset(quadrature_inames()),
+                    forced_iname_deps_is_final=True,
+                    )
+
+
+@backend(interface="quad_pos", name="sumfact")
+def name_quadrature_position():
+    formdata = get_global_context_value('formdata')
+    dim = formdata.geometric_dimension
+    name = 'pos'
+    temporary_variable(name, shape=(dim,), shape_impl=("fv",))
+    define_quadrature_position(name)
+    return name
+
+
+@backend(interface="qp_in_cell", name="sumfact")
+def name_quadrature_position_in_cell(restriction):
+    return name_quadrature_position()
diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index 5a3e364d2705281f8184ceb1ab4b013956bba50a..37be6d7d9b7e297181914f54af42a7111608178a 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -1,4 +1,6 @@
-from dune.perftool.pdelab.argument import pymbolic_coefficient
+from dune.perftool.pdelab.argument import (name_coefficientcontainer,
+                                           pymbolic_coefficient,
+                                           )
 from dune.perftool.generation import (backend,
                                       domain,
                                       get_counter,
@@ -23,7 +25,7 @@ from pymbolic.primitives import (Call,
                                  Subscript,
                                  Variable,
                                  )
-
+from dune.perftool.sumfact.quadrature import quadrature_inames
 from loopy import Reduction
 
 from pytools import product
@@ -42,7 +44,7 @@ def sumfact_iname(bound, _type):
     return name
 
 
-def setup_theta(element, container, restriction, component, a_matrices):
+def setup_theta(element, restriction, component, a_matrices):
     number_basis = product(mat.cols for mat in a_matrices)
     shape = (number_basis,)
     inp = get_buffer_temporary("buffer",
@@ -52,6 +54,7 @@ def setup_theta(element, container, restriction, component, a_matrices):
     # Write initial coefficients into buffer
     basisiname = sumfact_iname(number_basis, "basis")
     lfs = name_lfs(element, restriction, component)
+    container = name_coefficientcontainer(restriction)
     coeff = pymbolic_coefficient(container, lfs, basisiname)
     assignee = Subscript(Variable(inp), (Variable(basisiname),))
     return instruction(assignee=assignee,
@@ -60,9 +63,7 @@ def setup_theta(element, container, restriction, component, a_matrices):
                        )
 
 
-# TODO this code is WIP and mainly used for experiments.
-@backend(interface="eval_coefficient", name="sumfact")
-def start_sumfactorization(element, name, container, restriction, component):
+def pymbolic_trialfunction(element, restriction, component):
     theta = name_theta()
     rows = quadrature_points_per_direction()
     cols = basis_functions_per_direction()
@@ -75,16 +76,17 @@ def start_sumfactorization(element, name, container, restriction, component):
                       num=2
                       )
 
-    insn_dep = setup_theta(element, container, restriction, component, a_matrices)
+    insn_dep = setup_theta(element, restriction, component, a_matrices)
+    var = sum_factorization_kernel(a_matrices, "buffer", 0, frozenset({insn_dep}))
 
-    sum_factorization_kernel(a_matrices, "buffer", 0, frozenset({insn_dep}))
+    return Subscript(Variable(var), tuple(Variable(i) for i in quadrature_inames()))
 
-    # Do stage 3 (for f=u => mass matrix)
-    theta_transposed = name_theta_transposed()
-    a_matrix_transposed = AMatrix(theta_transposed, cols, rows)
-    a_matrices_transposed = (a_matrix_transposed, a_matrix_transposed)
 
-    return sum_factorization_kernel(a_matrices_transposed, "buffer", 2)
+#     # Do stage 3 (for f=u => mass matrix)
+#     theta_transposed = name_theta_transposed()
+#     a_matrix_transposed = AMatrix(theta_transposed, cols, rows)
+#     a_matrices_transposed = (a_matrix_transposed, a_matrix_transposed)
+#     var = sum_factorization_kernel(a_matrices_transposed, "buffer", 2)
 
 
 def sum_factorization_kernel(a_matrices, buffer, stage, insn_dep=frozenset({})):
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index e05411a07378215de24b3269722fec58cebb7581..7a7ff15a1eee44714b3d77ac15bad4f04d3a977a 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -256,8 +256,7 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
             return Variable(self.interface.name_unit_inner_normal())
 
     def quadrature_weight(self, o):
-        self.inames.append(self.interface.quadrature_iname())
-        return Subscript(Variable(self.interface.name_quadrature_weights()), (Variable(self.interface.quadrature_iname()),))
+        return self.interface.pymbolic_quadrature_weight()
 
     def jacobian_determinant(self, o):
         return Variable(self.interface.name_jacobian_determinant())
diff --git a/python/loopy b/python/loopy
index a08c8677fbd3aaded4ad63210d24439aac18f2d0..8eb69d1d973f501e9c01ac97d9a462faa1d18665 160000
--- a/python/loopy
+++ b/python/loopy
@@ -1 +1 @@
-Subproject commit a08c8677fbd3aaded4ad63210d24439aac18f2d0
+Subproject commit 8eb69d1d973f501e9c01ac97d9a462faa1d18665