diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py
index f53b6ff6297efdde79344c49579e466ccd99d817..7f1ab8d17feb6849944233278826dc529fe9fbe9 100644
--- a/python/dune/perftool/pdelab/__init__.py
+++ b/python/dune/perftool/pdelab/__init__.py
@@ -32,7 +32,10 @@ from dune.perftool.pdelab.quadrature import (pymbolic_quadrature_weight,
                                              )
 from dune.perftool.pdelab.spaces import (lfs_inames,
                                          )
-from dune.perftool.pdelab.tensors import pymbolic_list_tensor, pymbolic_identity
+from dune.perftool.pdelab.tensors import (pymbolic_list_tensor,
+                                          pymbolic_identity,
+                                          pymbolic_matrix_inverse,
+                                          )
 
 
 class PDELabInterface(object):
@@ -112,6 +115,9 @@ class PDELabInterface(object):
     def pymbolic_identity(self, o):
         return pymbolic_identity(o)
 
+    def pymbolic_matrix_inverse(self, o):
+        return pymbolic_matrix_inverse(o, self.visitor)
+
     #
     # Geometry related generator functions
     #
diff --git a/python/dune/perftool/pdelab/tensors.py b/python/dune/perftool/pdelab/tensors.py
index 5e962b9921686ee36b08d0c245272e642d1ffba4..3d89dd70995ffa12cde249ca77cac57b3d2149e9 100644
--- a/python/dune/perftool/pdelab/tensors.py
+++ b/python/dune/perftool/pdelab/tensors.py
@@ -11,6 +11,7 @@ from dune.perftool.generation import (get_counted_variable,
 import pymbolic.primitives as prim
 import numpy as np
 import loopy as lp
+import itertools as it
 
 
 def define_list_tensor(name, expr, visitor, stack=()):
@@ -66,3 +67,42 @@ def pymbolic_identity(expr):
                        )
     define_identity(name, expr)
     return prim.Variable(name)
+
+
+def define_assembled_tensor(name, expr, visitor):
+    temporary_variable(name,
+                       shape=expr.ufl_shape,
+                       shape_impl=('fm',))
+    for indices in it.product(*tuple(range(i) for i in expr.ufl_shape)):
+        visitor.indices = indices
+        instruction(assignee=prim.Subscript(prim.Variable(name), indices),
+                    expression=visitor.call(expr),
+                    forced_iname_deps=frozenset(visitor.interface.quadrature_inames()),
+                    depends_on=frozenset({lp.match.Tagged("sumfact_stage1")}),
+                    tags=frozenset({"quad"}),
+                    )
+
+
+@kernel_cached
+def name_assembled_tensor(o, visitor):
+    name = get_counted_variable("assembled_tensor")
+    define_assembled_tensor(name, o, visitor)
+    return name
+
+
+@kernel_cached
+def pymbolic_matrix_inverse(o, visitor):
+    indices = visitor.indices
+    visitor.indices = None
+    name = name_assembled_tensor(o.ufl_operands[0], visitor)
+
+    instruction(code="{}.invert();".format(name),
+                within_inames=frozenset(visitor.interface.quadrature_inames()),
+                depends_on=frozenset({lp.match.Writes(name),
+                                      lp.match.Tagged("sumfact_stage1"),
+                                      }),
+                tags=frozenset({"quad"}),
+                )
+
+    visitor.indices = indices
+    return prim.Variable(name)
diff --git a/python/dune/perftool/ufl/preprocess.py b/python/dune/perftool/ufl/preprocess.py
index bf6b1af86e7424b79d7da6d1602ee7d0e965a0d6..175d0c00ddb57db3bfcf5a84aba62e7d2a997030 100644
--- a/python/dune/perftool/ufl/preprocess.py
+++ b/python/dune/perftool/ufl/preprocess.py
@@ -3,6 +3,10 @@
 from dune.perftool.generation import run_hook, ReturnArg
 import ufl.classes as uc
 import ufl.algorithms.apply_function_pullbacks as afp
+import ufl.algorithms.apply_algebra_lowering as aal
+import ufl.algorithms.apply_derivatives as ad
+
+from dune.perftool.options import get_form_option
 
 from pytools import memoize
 
@@ -35,7 +39,8 @@ def preprocess_form(form):
                                                           uc.FacetNormal,
                                                           uc.JacobianDeterminant,
                                                           uc.JacobianInverse,
-                                                          )
+                                                          ),
+                                 do_estimate_degrees=not get_form_option("quadrature_order"),
                                  )
 
     formdata.preprocessed_form = apply_default_transformations(formdata.preprocessed_form)
@@ -58,3 +63,11 @@ def apply_default_transformations(form):
     form = transform_form(form, pushdown_indexed)
 
     return form
+
+
+# Monkey patch UFL, such that we invert matrices in C++ instead of Python.
+# The latter only works for very small matrices. If this causes a problem at
+# some point, we should guard this monkey patch with an option.
+
+aal.LowerCompoundAlgebra.inverse = lambda s, o, A: s.reuse_if_untouched(o, A)
+ad.GenericDerivativeRuleset.inverse = lambda s, o, A: -uc.Dot(uc.Dot(o, A), o)
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index 57bd5b41bc8e37a67bbaf632cf4411168fd0b611..987d29d034caf1ce191960c8a1429bbf3c8e5ba4 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -267,6 +267,9 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
     def identity(self, o):
         return self.interface.pymbolic_identity(o)
 
+    def inverse(self, o):
+        return self.interface.pymbolic_matrix_inverse(o)
+
     #
     # Handlers for arithmetic operators and functions
     # Those handlers would be valid in any code going from UFL to pymbolic