diff --git a/python/dune/perftool/pdelab_preambles.py b/python/dune/perftool/pdelab_preambles.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed9d6fad49043804e094f416cce0593bff6885f0
--- /dev/null
+++ b/python/dune/perftool/pdelab_preambles.py
@@ -0,0 +1,21 @@
+from dune.perftool.preambles import dune_preambel, dune_symbol
+
+@dune_symbol
+def test_function_name(arg, grad):
+    return "{}a{}".format("grad_" if grad else "", arg.number())
+
+@dune_symbol
+def trial_function_name(arg, grad):
+    return "{}c{}".format("grad_" if grad else "", arg.count())
+
+@dune_symbol
+def index_name(index):
+    return str(index._indices[0])
+
+@dune_symbol
+def argument_index(arg):
+    return "arg{}".format(chr(ord("i") + arg.number()))
+
+@dune_symbol
+def dimension(arg):
+    return "dim"
diff --git a/python/dune/perftool/transformer.py b/python/dune/perftool/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5053bec202131cabbcc0d196566b8bcd7e110f53
--- /dev/null
+++ b/python/dune/perftool/transformer.py
@@ -0,0 +1,112 @@
+# Generate a loop kernel from that cell integral. This will map the UFL IR
+# to the loopy IR.
+from ufl.algorithms import MultiFunction
+from pymbolic.primitives import Variable, Subscript, Sum, Product
+from loopy.kernel.data import ExpressionInstruction, CInstruction
+
+# For now, import all implemented preambles, restrict later
+from dune.perftool.pdelab_preambles import *
+
+
+class UFLVisitor(MultiFunction):
+    def __init__(self):
+        MultiFunction.__init__(self)
+        # Have a cache for the loop domains
+        self.loop_domains = {}
+        self.loop_instructions = []
+        self.temporary_variables = {}
+
+        # Make this multifunction stateful: Store information similar to uflacs' modified terminal
+        self.grad = False
+        self.reference_grad = False
+        self.index = None
+
+        # We do always have a quadrature loop:
+        # We need an additional loop domain
+        self.loop_domains.setdefault("quadrature", "{ [q] : 0<=q<qn }")
+
+        # We need instructions to evaluate weight and position
+        # Add a temporary variable for the integration factor
+        self.temporary_variables["fac"] = loopy.TemporaryVariable("fac", dtype=numpy.float32)
+
+        # Add the CInstruction's needed to correctly set up the quadrature.
+        self.loop_instructions.append(
+            CInstruction(
+                "q",
+                code="fac = r->weight();",
+                assignees="fac"
+            )
+        )
+
+        self.argument_indices = []
+
+    def add_accumulation(self, loopyexpr):
+        # TODO how to determine the accumulation loop details here?
+        # TODO no jacobian support yet!
+        assert len(self.argument_indices) == 1
+
+        assignee = Subscript(Variable("r"), Variable(self.argument_indices[0]))
+        loopyexpr = Sum((assignee, Product((loopyexpr, Variable("fac")))))
+
+        self.loop_instructions.append(
+            ExpressionInstruction(
+                assignee=assignee,
+                expression=loopyexpr
+            )
+        )
+
+        # Reset the argument indices.
+        self.argument_indices = []
+
+    def grad(self, o):
+        assert(len(o.operands()) == 1)
+
+        self.grad = True
+        ret = self(o.operands()[0])
+        self.grad = False
+        return ret
+
+    def argument(self, o):
+        assert(len(o.operands()) == 0)
+
+        # We do need an argument loop domain for this argument
+        argindex = argument_index(o)
+        self.loop_domains.setdefault(o, "{{[{0}] : 0 <= {0} < {0}n}}".format(argindex))
+
+        # Generate the variable name for the test function
+        name = test_function_name(o, self.grad)
+        index = index_name(self.index)
+
+        # Register that index as argument loop index for later
+        self.argument_indices.append(argindex)
+
+        return Subscript(Variable(name), Variable(index))
+
+    def coefficient(self, o):
+        assert(len(o.operands()) == 0)
+
+        name = trial_function_name(o, self.grad)
+        index = index_name(self.index)
+        return Subscript(Variable(name), Variable(index))
+
+    def index_sum(self, o):
+        # Add a loop domain based on the index
+        index = index_name(o.operands()[1])
+        self.loop_domains.setdefault(index, "{{[{0}] : 0 <= {0} < {1}}}".format(index, o.dimension()))
+        # Get the expression
+        loopyexpr = self(o.operands()[0])
+        self.add_accumulation(loopyexpr)
+
+    def product(self, o):
+        return Product(tuple(self(op) for op in o.operands()))
+
+    def multi_index(self, o):
+        # I don't think we should ever find a multi index, because its father should take care of it.
+        raise ValueError
+
+    def indexed(self, o):
+        # TODO in the long run, this is a stack of indices.
+        self.index = o.operands()[1]
+        ret = self(o.operands()[0])
+        self.index = None
+        return ret