From ffe28867d30644ffad83c960265968083d085246 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Thu, 31 Aug 2017 16:06:15 +0200
Subject: [PATCH] [bugfix] fix instrumentation for multiple quadrature loops

---
 python/dune/perftool/generation/loopy.py    | 6 +++---
 python/dune/perftool/sumfact/realization.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py
index fb3dbaba..96734233 100644
--- a/python/dune/perftool/generation/loopy.py
+++ b/python/dune/perftool/generation/loopy.py
@@ -88,7 +88,7 @@ def temporary_variable(name, **kwargs):
 
 @generator_factory(item_tags=("instruction", "cinstruction"),
                    context_tags="kernel",
-                   cache_key_generator=lambda *a, **kw: kw['code'],
+                   cache_key_generator=lambda *a, **kw: kw['id'],
                    )
 def c_instruction_impl(**kw):
     kw.setdefault('assignees', [])
@@ -101,7 +101,7 @@ def c_instruction_impl(**kw):
 
 @generator_factory(item_tags=("instruction", "exprinstruction"),
                    context_tags="kernel",
-                   cache_key_generator=lambda *a, **kw: (kw['assignee'], kw['expression']),
+                   cache_key_generator=lambda *a, **kw: (kw['id']),
                    )
 def expr_instruction_impl(**kw):
     if 'assignees' in kw:
@@ -120,7 +120,7 @@ def call_instruction_impl(**kw):
 
 def _insn_cache_key(code=None, expression=None, **kwargs):
     if code is not None:
-        return code
+        return (code, kwargs.get('within_inames', None))
     if expression is not None:
         if 'assignees' in kwargs:
             return (kwargs['assignees'], expression)
diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py
index 5be6fc82..bda7bd13 100644
--- a/python/dune/perftool/sumfact/realization.py
+++ b/python/dune/perftool/sumfact/realization.py
@@ -265,7 +265,7 @@ def _realize_sum_factorization_kernel(sf):
                                           forced_iname_deps=frozenset([iname for iname in out_inames]).union(frozenset(sf.within_inames)),
                                           forced_iname_deps_is_final=True,
                                           depends_on=insn_dep,
-                                          tags=frozenset({"sumfact_stage{}".format(sf.stage)}),
+                                          tags=frozenset({"sumfact_stage{}_within{}".format(sf.stage, "_".join(sf.within_inames))}),
                                           predicates=sf.predicates,
                                           )
                               })
@@ -273,7 +273,7 @@ def _realize_sum_factorization_kernel(sf):
     # Measure times and count operations in c++ code
     if get_option("instrumentation_level") >= 4:
         stop_insn = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name),
-                                           depends_on=frozenset({lp.match.Tagged("sumfact_stage{}".format(sf.stage))}),
+                                           depends_on=frozenset({lp.match.Tagged("sumfact_stage{}_within{}".format(sf.stage, "_".join(sf.within_inames)))}),
                                            within_inames=frozenset(sf.within_inames))})
         if sf.stage == 1:
             qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop'
-- 
GitLab