diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py
index 777f8dab972edba7ee55398f7a2d41c37d3c03b4..794e67e1dc1a7947f82399554e9678114c017101 100644
--- a/python/dune/perftool/sumfact/realization.py
+++ b/python/dune/perftool/sumfact/realization.py
@@ -275,7 +275,7 @@ def realize_sumfact_kernel_function(sf):
     # Construct a loopy kernel object
     from dune.perftool.pdelab.localoperator import extract_kernel_from_cache
     args = ("const char* buffer0", "const char* buffer1") + sf.interface.signature_args
-    signature = "void {}({}) const".format(sf.function_name, ", ".join(args))
+    signature = "void {}({}) const __attribute__((always_inline))".format(sf.function_name, ", ".join(args))
     kernel = extract_kernel_from_cache("kernel_default", sf.function_name, [signature], add_timings=False)
     delete_cache_items("kernel_default")
     return kernel