diff --git a/python/dune/perftool/pdelab_preambles.py b/python/dune/perftool/pdelab_preambles.py
index ed9d6fad49043804e094f416cce0593bff6885f0..4f426a9f532917df982e6edcd3cafa6b2a42bac4 100644
--- a/python/dune/perftool/pdelab_preambles.py
+++ b/python/dune/perftool/pdelab_preambles.py
@@ -1,4 +1,5 @@
-from dune.perftool.preambles import dune_preambel, dune_symbol
+from dune.perftool.preambles import dune_preamble, dune_symbol, loop_domain, loopy_iname, temporary_variable, loopy_c_instruction, loopy_expr_instruction
+from dune.perftool.pdelab_names import name
 
 @dune_symbol
 def test_function_name(arg, grad):
@@ -12,10 +13,59 @@ def trial_function_name(arg, grad):
 def index_name(index):
     return str(index._indices[0])
 
-@dune_symbol
-def argument_index(arg):
-    return "arg{}".format(chr(ord("i") + arg.number()))
+@loop_domain
+def argument_loop_domain(name):
+    return name
+
+@loopy_iname
+def argument_iname(arg):
+    name = "arg{}".format(chr(ord("i") + arg.number()))
+    argument_loop_domain(name)
+    return name
 
 @dune_symbol
-def dimension(arg):
+def dimension():
     return "dim"
+
+@loop_domain
+def quadrature_loop_domain(name):
+    return name
+
+@loopy_iname
+def quadrature_iname():
+    quadrature_loop_domain("q")
+    return "q"
+
+from dune.perftool.preambles import _dune_decorator_factory, CInstructionCacheItem
+quadrature_preamble = _dune_decorator_factory(cache_item_type=CInstructionCacheItem, inames=quadrature_iname())
+
+@dune_symbol
+def quadrature_rule():
+    return "rule"
+
+@quadrature_preamble(assignees=name("quadrature_factor"))
+def define_quadrature_factor():
+    rule = quadrature_rule()
+    return "auto {} = {}->weight();".format(name("quadrature_factor"), rule)
+
+@temporary_variable
+def quadrature_factor():
+    define_quadrature_factor()
+    return name("quadrature_factor")
+
+@loop_domain(upperbound=dimension())
+def dimension_loop_domain(name):
+    return name
+
+@loopy_iname
+def dimension_iname(index):
+    dimension_loop_domain(index_name(index))
+    return index_name(index)
+
+@dune_symbol
+def residual_name():
+    return "r"
+
+@loopy_expr_instruction
+def accumulation_instruction(expr):
+    return expr
\ No newline at end of file
diff --git a/python/dune/perftool/preambles.py b/python/dune/perftool/preambles.py
index a11eed6de55507db3d38f6c6458182010d4ee4cd..ad902b907cfb990cc29cf8bfb7f04b4a6c9f2541 100644
--- a/python/dune/perftool/preambles.py
+++ b/python/dune/perftool/preambles.py
@@ -2,40 +2,70 @@
 a complex requirement structure. This includes:
 * preambles triggering the creation of other preambles
 * a caching mechanism to avoid duplicated preambles where harmful
-"""
-
-
-class UFL2LoopyDataCache(dict):
-    """ The cache data structure
-
-    The data is stored as key value pairs of the following form:
-    (function, cache_tuple) -> (content, priority_tag, preamble)
 
-    The parameters are:
-    function : the function that generates the preamble code snippet
-    cache_tuple : A frozen (sub)set of arguments to the function.
-                  The map from function arguments to cache tuple controls
-                  the amount of caching.
-    content : The content to store. No assumptions made.
-    priority_tag : Will later decide the ordering of the preambles.
-    preamble : A bool whether this cache entry does generate a loopy preamble.
-               This is usually not the case, when you generate something
-               like symbol names. Those are inserted into other snippets,
-               but not on their own right.
-    """
-    def __init__(self):
-        self.counter = 0
-
-    def register(self, cachekey, content):
-        self[cachekey] = (content, self.counter)
-        self.counter = self.counter + 1
+TODO rename to generation_cache. I will use it for much more than preambles.
+"""
 
-    def extract_preambles(self):
-        return [(p[1], p[0]) for p in self.values() if p[0].generate_preambel]
+from pytools import Record
+
+# Base class for all cache items.
+class UFL2LoopyCacheItem(Record):
+    __slots__ = ["content", "returnValue"]
+
+    def __init__(self, content):
+        self.content = content
+        self.returnValue = content
+
+class NoReturnCacheItem(UFL2LoopyCacheItem):
+    def __init__(self, content):
+        UFL2LoopyCacheItem.__init__(self, content)
+        self.returnValue = None
+
+class LoopyInameCacheItem(UFL2LoopyCacheItem):
+    pass
+
+class SymbolCacheItem(UFL2LoopyCacheItem):
+    pass
+
+class PreambleCacheItem(NoReturnCacheItem):
+    counter = 0
+    def __init__(self, content):
+        NoReturnCacheItem.__init__(self, content)
+        self.content = (PreambleCacheItem.counter, content)
+        PreambleCacheItem.counter = PreambleCacheItem.counter + 1
+
+class TemporaryVariableCacheItem(UFL2LoopyCacheItem):
+    import numpy
+    def __init__(self, content, dtype=numpy.float64):
+        UFL2LoopyCacheItem.__init__(self, content)
+        from loopy import TemporaryVariable
+        self.content = TemporaryVariable(content, dtype)
+        self.returnVariable = content
+
+class InstructionCacheItem(NoReturnCacheItem):
+    def __init__(self, content):
+        NoReturnCacheItem.__init__(self, content)
+
+class CInstructionCacheItem(InstructionCacheItem):
+    def __init__(self, content, inames=[], assignees=[]):
+        InstructionCacheItem.__init__(self, content)
+        from loopy import CInstruction
+        self.content = CInstruction(inames, content, assignees=assignees)
+
+class ExpressionInstructionCacheItem(InstructionCacheItem):
+    pass
+
+class LoopDomainCacheItem(UFL2LoopyCacheItem):
+    def __init__(self, content, upperbound=None):
+        if upperbound:
+            self.content = "{{ [{0}] : 0<={0}<{1} }}".format(content, upperbound)
+        else:
+            self.content = "{{ [{0}] : 0<={0}<{0}_n }}".format(content)
+        self.returnValue = content
 
 
-# have one such cache on the module level. It is easier than handing around an instance of it.
-_cache = UFL2LoopyDataCache()
+# have one cache the module level. It is easier than handing around an instance of it.
+_cache = {}
 
 
 def freeze(data):
@@ -67,29 +97,43 @@ def freeze(data):
     # we don't know how to handle this object, so we give up
     raise TypeError('Cannot freeze non-hashable object {} of type {}'.format(data, type(data)))
 
+class _NoCachingCounter(object):
+    counter = 0
+    def get(self):
+        _NoCachingCounter.counter = _NoCachingCounter.counter + 1
+        return _NoCachingCounter.counter
+
+
+def no_caching(*a):
+    return _NoCachingCounter.get()
+
 
 class _RegisteredFunction(object):
     """ The data structure for a function that accesses UFL2LoopyDataCache """
     def __init__(self, func,
                  cache_key_generator=lambda *a : freeze(a),
-                 generate_preamble=False,
-                 return_content=False
+                 cache_item_type=UFL2LoopyCacheItem,
+                 **kwargs
                 ):
         self.func = func
         self.cache_key_generator = cache_key_generator
-        self.generate_preamble = generate_preamble
-        self.return_content = return_content
+        self.cache_item_type = cache_item_type
+        self.kwargs = kwargs
+
+        assert issubclass(cache_item_type, UFL2LoopyCacheItem)
 
     def __call__(self, *args):
+        # Get the cache key from the given arguments
         cache_key = (self, self.cache_key_generator(*args))
+        # check whether we have a cache hit
         if cache_key in _cache:
-            if self.return_content:
-                return _cache[cache_key][0]
+            # and return the result depending on the cache item type
+            return _cache[cache_key].returnValue
         else:
-            content = self.func(*args)
-            _cache.register(cache_key, content)
-            if self.return_content:
-                return content
+            # evaluate the original function and wrap it in a cache item
+            content = self.cache_item_type(self.func(*args), **self.kwargs)
+            _cache[cache_key] = content
+            return content.returnValue
 
 
 def _dune_decorator_factory(**factory_kwargs):
@@ -109,14 +153,10 @@ def _dune_decorator_factory(**factory_kwargs):
     cache_key_generator : function
         A function that maps the arguments to the function to an immutable cache key.
         Defaults to generate_cache_tuple.
-    generate_preambel : bool
-        Whether a code snippet should be generated.
-        This is usually not the case, when you generate something
-        like symbol names. Those are inserted into other snippets,
-        but not on their own right.
-    return_content : bool
-        Whether the content of the cache should be returned on subsequent function calls.
-        If the cache is only used for requirements tracking, this is not needed.
+    cache_item_type : type of UFL2LoopyCacheItem
+        The type of to wrap the contents in when storing in the cache.
+
+    Any excess keyword arguments will be forwarded to the CacheItem!
     """
     def _dec(*args, **kwargs):
         # Modify the kwargs according to the factorys kwargs
@@ -130,9 +170,23 @@ def _dune_decorator_factory(**factory_kwargs):
 
     return _dec
 
+def cache_preambles():
+    return [v.content for v in _cache.values() if isinstance(v, PreambleCacheItem)]
+
+def cache_instructions():
+    return [v.content for v in _cache.values() if isinstance(v, InstructionCacheItem)]
+
+def cache_temporaries():
+    return {v.returnValue: v.content for v in _cache.values() if isinstance(v, TemporaryVariableCacheItem)}
 
-# A decorator for a dune preambel
-dune_preambel = _dune_decorator_factory(generate_preamble=True)
+def cache_loop_domains():
+    return [v.content for v in _cache.values() if isinstance(v, LoopDomainCacheItem)]
 
-# A decorator for a dune symbol name
-dune_symbol = _dune_decorator_factory(return_content=True)
+# Define some decorators that will be useful!
+loop_domain = _dune_decorator_factory(cache_item_type=LoopDomainCacheItem)
+dune_preamble = _dune_decorator_factory(cache_item_type=PreambleCacheItem)
+loopy_iname = _dune_decorator_factory(cache_item_type=LoopyInameCacheItem)
+dune_symbol = _dune_decorator_factory(cache_item_type=SymbolCacheItem)
+temporary_variable = _dune_decorator_factory(cache_item_type=TemporaryVariableCacheItem)
+loopy_c_instruction = _dune_decorator_factory(cache_item_type=CInstructionCacheItem)
+loopy_expr_instruction = _dune_decorator_factory(cache_item_type=ExpressionInstructionCacheItem)
diff --git a/python/dune/perftool/target.py b/python/dune/perftool/target.py
index b783f9c9c53a1342a7a942baebb8d4aa25bbb71b..ce420b128efaf8e308a1278ead45f9782ec9935a 100644
--- a/python/dune/perftool/target.py
+++ b/python/dune/perftool/target.py
@@ -4,28 +4,22 @@ import six
 from loopy.target import TargetBase
 from loopy.target.c.codegen.expression import LoopyCCodeMapper
 
+class AllToDouble(dict):
+    """ This imitates a dict that maps everything to double and logs the requested keys """
+    def __getitem__(self, key):
+        self.__setitem__(key, numpy.float64)
+        return numpy.float64
+
+
 _registry = {'float32': 'float',
-             'int32' : 'int'}
+             'int32' : 'int',
+             'float64' : 'double'}
 
 class MyMapper(LoopyCCodeMapper):
     var_subst_map = {}
 
-def dune_function_manglers():
-    # OpenCL example impls: target/opencl/__init__.py:108
-    return []
-
-def dune_symbol_manglers():
-    # OpenCL example impls: target/opencl/__init__.py:137
-    return []
-
 class DuneTarget(TargetBase):
 
-    def function_manglers(self):
-        return super(DuneTarget, self).function_manglers() + dune_function_manglers()
-
-    def symbol_manglers(self):
-        return super(DuneTarget, self).symbol_manglers() + dune_symbol_manglers()
-
     def get_or_register_dtype(self, names, dtype=None):
         return dtype
 
@@ -44,7 +38,6 @@ class DuneTarget(TargetBase):
         return MyMapper(codegen_state)
 
     def generate_code(self, kernel, codegen_state, impl_arg_info):
-        print "Somebody wants me to generate code!"
         from cgen import Block
         body = Block()
 
diff --git a/python/dune/perftool/transformer.py b/python/dune/perftool/transformer.py
index 5053bec202131cabbcc0d196566b8bcd7e110f53..ec393f18d628d07ed82e5b637fe877c7e3600864 100644
--- a/python/dune/perftool/transformer.py
+++ b/python/dune/perftool/transformer.py
@@ -3,66 +3,43 @@
 from ufl.algorithms import MultiFunction
 from pymbolic.primitives import Variable, Subscript, Sum, Product
 from loopy.kernel.data import ExpressionInstruction, CInstruction
+import loopy
+import numpy
 
 # For now, import all implemented preambles, restrict later
 from dune.perftool.pdelab_preambles import *
-
+from dune.perftool.restriction import Restriction
 
 class UFLVisitor(MultiFunction):
-    def __init__(self):
-        MultiFunction.__init__(self)
-        # Have a cache for the loop domains
-        self.loop_domains = {}
-        self.loop_instructions = []
-        self.temporary_variables = {}
-
+    def __call__(self, o):
         # Make this multifunction stateful: Store information similar to uflacs' modified terminal
         self.grad = False
         self.reference_grad = False
         self.index = None
+        self.restriction = Restriction.NONE
 
-        # We do always have a quadrature loop:
-        # We need an additional loop domain
-        self.loop_domains.setdefault("quadrature", "{ [q] : 0<=q<qn }")
-
-        # We need instructions to evaluate weight and position
-        # Add a temporary variable for the integration factor
-        self.temporary_variables["fac"] = loopy.TemporaryVariable("fac", dtype=numpy.float32)
-
-        # Add the CInstruction's needed to correctly set up the quadrature.
-        self.loop_instructions.append(
-            CInstruction(
-                "q",
-                code="fac = r->weight();",
-                assignees="fac"
-            )
-        )
+        # Have a short cut for the bases class call operator
+        self.call = lambda *a : MultiFunction.__call__(self, *a)
 
+        # Have a list of argument indices that this term depends on.
         self.argument_indices = []
 
-    def add_accumulation(self, loopyexpr):
-        # TODO how to determine the accumulation loop details here?
+        loopyexpr = self.call(o)
+
         # TODO no jacobian support yet!
         assert len(self.argument_indices) == 1
 
         assignee = Subscript(Variable("r"), Variable(self.argument_indices[0]))
-        loopyexpr = Sum((assignee, Product((loopyexpr, Variable("fac")))))
-
-        self.loop_instructions.append(
-            ExpressionInstruction(
-                assignee=assignee,
-                expression=loopyexpr
-            )
-        )
+        loopyexpr = Sum((assignee, Product((Variable(quadrature_factor()), loopyexpr))))
 
-        # Reset the argument indices.
-        self.argument_indices = []
+        instruction = ExpressionInstruction(assignee=assignee, expression=loopyexpr)
+        accumulation_instruction(instruction)
 
     def grad(self, o):
         assert(len(o.operands()) == 1)
 
         self.grad = True
-        ret = self(o.operands()[0])
+        ret = self.call(o.operands()[0])
         self.grad = False
         return ret
 
@@ -70,8 +47,7 @@ class UFLVisitor(MultiFunction):
         assert(len(o.operands()) == 0)
 
         # We do need an argument loop domain for this argument
-        argindex = argument_index(o)
-        self.loop_domains.setdefault(o, "{{[{0}] : 0 <= {0} < {0}n}}".format(argindex))
+        argindex = argument_iname(o)
 
         # Generate the variable name for the test function
         name = test_function_name(o, self.grad)
@@ -89,16 +65,8 @@ class UFLVisitor(MultiFunction):
         index = index_name(self.index)
         return Subscript(Variable(name), Variable(index))
 
-    def index_sum(self, o):
-        # Add a loop domain based on the index
-        index = index_name(o.operands()[1])
-        self.loop_domains.setdefault(index, "{{[{0}] : 0 <= {0} < {1}}}".format(index, o.dimension()))
-        # Get the expression
-        loopyexpr = self(o.operands()[0])
-        self.add_accumulation(loopyexpr)
-
     def product(self, o):
-        return Product(tuple(self(op) for op in o.operands()))
+        return Product(tuple(self.call(op) for op in o.operands()))
 
     def multi_index(self, o):
         # I don't think we should ever find a multi index, because its father should take care of it.
@@ -107,6 +75,30 @@ class UFLVisitor(MultiFunction):
     def indexed(self, o):
         # TODO in the long run, this is a stack of indices.
         self.index = o.operands()[1]
-        ret = self(o.operands()[0])
+        ret = self.call(o.operands()[0])
         self.index = None
         return ret
+
+
+class TopSumSeparation(MultiFunction):
+    """ A multifunction that separates the toplevel sum """
+    def __init__(self):
+        MultiFunction.__init__(self)
+        self.visitor = UFLVisitor()
+
+    def expr(self, o):
+        print "Call TopSumSeparation.expr with {}".format(o)
+        self.visitor(o)
+
+    def sum(self, o):
+        for op in o.operands():
+            self(op)
+
+    def index_sum(self, o):
+        print "Call TopSumSeparation.index_sum with {}".format(o)
+        dimension_iname(o.operands()[1])
+        self(o.operands()[0])
+
+
+def transform_expression(expr):
+    return TopSumSeparation()(expr)