diff --git a/python/dune/perftool/generation/__init__.py b/python/dune/perftool/generation/__init__.py
index 3751e64a0f55b5206c72a4ff38e4bc0d9bde2a18..ae313551db63794fc6691aecc85b8fecc1d059ee 100644
--- a/python/dune/perftool/generation/__init__.py
+++ b/python/dune/perftool/generation/__init__.py
@@ -20,6 +20,7 @@ from dune.perftool.generation.cpp import (base_class,
 
 from dune.perftool.generation.loopy import (constantarg,
                                             domain,
+                                            get_temporary_name,
                                             globalarg,
                                             iname,
                                             instruction,
diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py
index 61e04f062620a488c4b95cbbe1b5d19bdea07f1b..7a0a11c9ee2aa0b70014098b2eabf18b2e227f1f 100644
--- a/python/dune/perftool/generation/loopy.py
+++ b/python/dune/perftool/generation/loopy.py
@@ -74,11 +74,14 @@ class _TemporaryCounter:
     counter = 0
 
 
+def get_temporary_name():
+    name = 'expr_{}'.format(str(_TemporaryCounter.counter).zfill(4))
+    _TemporaryCounter.counter = _TemporaryCounter.counter + 1
+    return name
+
+
 @generator_factory(item_tags=("temporary",), cache_key_generator=no_caching)
-def temporary_variable(name=None, **kwargs):
-    if name is None:
-        name = 'expr_{}'.format(str(_TemporaryCounter.counter).zfill(4))
-        _TemporaryCounter.counter = _TemporaryCounter.counter + 1
+def temporary_variable(name, **kwargs):
     if 'dtype' not in kwargs:
         kwargs['dtype'] = numpy.float64
 
diff --git a/python/dune/perftool/loopy/transformer.py b/python/dune/perftool/loopy/transformer.py
index 7e414082efd7b23418e6f38dcc5e59888b325459..349b473493e713434d1fd860a8f2dd5a407d2c33 100644
--- a/python/dune/perftool/loopy/transformer.py
+++ b/python/dune/perftool/loopy/transformer.py
@@ -10,6 +10,7 @@ from dune.perftool.ufl.modified_terminals import ModifiedTerminalTracker
 from dune.perftool.pymbolic.uflmapper import UFL2PymbolicMapper
 from dune.perftool.pdelab.geometry import GeometryMapper
 from dune.perftool.generation import (domain,
+                                      get_temporary_name,
                                       global_context,
                                       globalarg,
                                       iname,
@@ -41,8 +42,12 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp
         super(UFL2LoopyVisitor, self).__init__()
 
     def _assign(self, o):
+        # In some corner cases we do not even need a temporary variable
+        if isinstance(o, int):
+            return o
+
         # Assign a name to the temporary variable we want our result in
-        temp = temporary_variable().name
+        temp = get_temporary_name()
 
         # Now we assign this expression to a new temporary variable
         insn_id = instruction(assignee=Variable(temp),
@@ -55,6 +60,9 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp
         from dune.perftool.generation import retrieve_cache_items
         temp = filter(lambda i: i.id == insn_id, retrieve_cache_items("instruction"))[0].assignee_name
 
+        # Now that we know its exact name, declare the temporary
+        temporary_variable(temp)
+
         return Variable(temp)
 
     def __call__(self, o):
@@ -156,6 +164,10 @@ def transform_accumulation_term(term, measure, subdomain_id):
 
     pymbolic_expr = UFL2LoopyVisitor(measure)(term)
 
+    # It may happen that an entire accumulation term vanishes!
+    if pymbolic_expr == 0:
+        return
+
     # The data that is used to collect the arguments for the accumulate function
     accumargs = [None] * (2 * len(test_ma))
     residual_shape = [None] * len(test_ma)
diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py
index 5bf7fe5ba9f9083a978f0e729b6c24a10f0fc406..518630c6cfd101fe4e8d46ed4030411deac0a1e6 100644
--- a/python/dune/perftool/pdelab/basis.py
+++ b/python/dune/perftool/pdelab/basis.py
@@ -263,8 +263,8 @@ def evaluate_trialfunction(element, name, restriction):
     from dune.perftool.pdelab.argument import pymbolic_coefficient
     coeff = pymbolic_coefficient(lfs, index, restriction)
     reduction_expr = Product((coeff, Subscript(Variable(basis), Variable(index))))
-    instruction(expression=Reduction("sum", index, reduction_expr),
-                assignee=frozenset({name}),
+    instruction(expression=Reduction("sum", index, reduction_expr, allow_simultaneous=True),
+                assignee=Variable(name),
                 forced_iname_deps=frozenset({quadrature_iname()}),
                 forced_iname_deps_is_final=True,
                 )
@@ -280,7 +280,7 @@ def evaluate_trialfunction_gradient(element, name, restriction):
     from dune.perftool.pdelab.argument import pymbolic_coefficient
     coeff = pymbolic_coefficient(lfs, index, restriction)
     reduction_expr = Product((coeff, Subscript(Variable(basis), (Variable(index), Variable(idim)))))
-    instruction(expression=Reduction("sum", index, reduction_expr),
+    instruction(expression=Reduction("sum", index, reduction_expr, allow_simultaneous=True),
                 assignee=Subscript(Variable(name), Variable(idim)),
                 forced_iname_deps=frozenset({quadrature_iname(), idim}),
                 forced_iname_deps_is_final=True,
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index bb38056a8ff67a0599da008e0c7c6be4fdad27be..1162f9e843b6ceb4544e0b1eb0814ee29fe9636a 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -188,6 +188,10 @@ def generate_kernel(integral):
                          function_manglers=manglers,
                          target=DuneTarget()
                          )
+
+    from loopy import make_reduction_inames_unique
+    kernel = make_reduction_inames_unique(kernel)
+
     kernel = preprocess_kernel(kernel)
 
     # Loopy might have introduced some temporary variables during preprocessing. As I want to have my own
diff --git a/python/loopy b/python/loopy
index c3950ff5c8fce47f208ffe59374222a66a80b64d..6b32fb790fb7c4947da03f6c8f1a3694ee90da92 160000
--- a/python/loopy
+++ b/python/loopy
@@ -1 +1 @@
-Subproject commit c3950ff5c8fce47f208ffe59374222a66a80b64d
+Subproject commit 6b32fb790fb7c4947da03f6c8f1a3694ee90da92
diff --git a/test/laplace/CMakeLists.txt b/test/laplace/CMakeLists.txt
index ae7f7d55edf3871c5bbfa256983e864028175089..d430d22135fdc9f0321b0462fecd1803eac2ad7e 100644
--- a/test/laplace/CMakeLists.txt
+++ b/test/laplace/CMakeLists.txt
@@ -29,3 +29,5 @@ add_generated_executable(UFLFILE laplace_dg.ufl
 
 dune_add_system_test(TARGET laplace_dg_symdiff
                      INIFILE laplace_dg_symdiff.mini)
+
+add_executable(dgref reference_main.cc)
diff --git a/test/laplace/laplace_dg.ufl b/test/laplace/laplace_dg.ufl
index 3c89fbb7648410f3117a781bc41ca12333deff57..5d540e01966983fecf1c33c6f93a9e1523ad61b8 100644
--- a/test/laplace/laplace_dg.ufl
+++ b/test/laplace/laplace_dg.ufl
@@ -10,9 +10,9 @@ gamma = 1.0
 theta = 1.0
 
 r = inner(grad(u), grad(v))*dx \
-  - inner(n, avg(grad(u)))*jump(v)*dS \
+  + inner(n, avg(grad(u)))*jump(v)*dS \
   + gamma*jump(u)*jump(v)*dS \
-  - theta*jump(u)*inner(avg(grad(v)), n)*dS \
+  + theta*jump(u)*inner(avg(grad(v)), n)*dS \
   - inner(n, grad(u))*v*ds \
   + gamma*u*v*ds \
   - theta*u*inner(grad(v), n)*ds