diff --git a/python/dune/perftool/loopy/stages.py b/python/dune/perftool/loopy/stages.py
index 1048ca7a59b18034a1be3fb6645598d3326feb79..a0a2b2c48fc9c26e84aa329a8adf63bbd8903c66 100644
--- a/python/dune/perftool/loopy/stages.py
+++ b/python/dune/perftool/loopy/stages.py
@@ -1,8 +1,14 @@
 """ loopy instructions to mark stages of computations """
 
-from dune.perftool.generation import noop_instruction
+from dune.perftool.generation import (generator_factory,
+                                      noop_instruction,
+                                      )
 
+from loopy import add_dependency
+from loopy.match import Id
 
+
+@generator_factory(item_tags=("stage",), cache_key_generator=lambda n, **kw: n)
 def stage_insn(n, **kwargs):
     assert 'id' not in kwargs
 
@@ -11,9 +17,18 @@ def stage_insn(n, **kwargs):
 
     # Chain dependencies of stage instructions
     if n > 0:
-        kwargs['depends_on'] = kwargs.get('depends_on', frozenset([])).union(frozenset([stage_insn(n-1, **kwargs)]))
+        kwargs['depends_on'] = kwargs.get('depends_on', frozenset([])).union(frozenset([stage_insn(n - 1, **kwargs)]))
 
     # Actually issue the instruction
     noop_instruction(id=id, **kwargs)
 
     return id
+
+
+def finalize_stage_instructions(kernel):
+    for i in range(len(stage_insn._memoize_cache)):
+        deps = frozenset({insn.id for insn in kernel.instructions if stage_insn(i) in insn.depends_on and not insn.id.startswith('stage_insn_')})
+        for dep_id in deps:
+            kernel = add_dependency(kernel, Id(stage_insn(i+1)), dep_id)
+
+    return kernel
\ No newline at end of file
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index 3fbb2595bc71898592cb180ee5f073cbdb27d2ea..3c008a173b98bcfcd06fb77f2948d4b6a76e2462 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -496,6 +496,10 @@ def generate_kernel(integrals):
     from dune.perftool.loopy.duplicate import heuristic_duplication
     kernel = heuristic_duplication(kernel)
 
+    # Finalize our stages mechanism
+    from dune.perftool.loopy.stages import finalize_stage_instructions
+    kernel = finalize_stage_instructions(kernel)
+
     # This is also silly, but 1D DG Schemes never need the geometry, so the quadrature
     # statement actually introduces a preamble at a stage where preambles cannot be generated
     # anymore. TODO think about how to avoid this!!!