From ffe28867d30644ffad83c960265968083d085246 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Thu, 31 Aug 2017 16:06:15 +0200 Subject: [PATCH] [bugfix] fix instrumentation for multiple quadrature loops --- python/dune/perftool/generation/loopy.py | 6 +++--- python/dune/perftool/sumfact/realization.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index fb3dbaba..96734233 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -88,7 +88,7 @@ def temporary_variable(name, **kwargs): @generator_factory(item_tags=("instruction", "cinstruction"), context_tags="kernel", - cache_key_generator=lambda *a, **kw: kw['code'], + cache_key_generator=lambda *a, **kw: kw['id'], ) def c_instruction_impl(**kw): kw.setdefault('assignees', []) @@ -101,7 +101,7 @@ def c_instruction_impl(**kw): @generator_factory(item_tags=("instruction", "exprinstruction"), context_tags="kernel", - cache_key_generator=lambda *a, **kw: (kw['assignee'], kw['expression']), + cache_key_generator=lambda *a, **kw: (kw['id']), ) def expr_instruction_impl(**kw): if 'assignees' in kw: @@ -120,7 +120,7 @@ def call_instruction_impl(**kw): def _insn_cache_key(code=None, expression=None, **kwargs): if code is not None: - return code + return (code, kwargs.get('within_inames', None)) if expression is not None: if 'assignees' in kwargs: return (kwargs['assignees'], expression) diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py index 5be6fc82..bda7bd13 100644 --- a/python/dune/perftool/sumfact/realization.py +++ b/python/dune/perftool/sumfact/realization.py @@ -265,7 +265,7 @@ def _realize_sum_factorization_kernel(sf): forced_iname_deps=frozenset([iname for iname in out_inames]).union(frozenset(sf.within_inames)), forced_iname_deps_is_final=True, depends_on=insn_dep, - tags=frozenset({"sumfact_stage{}".format(sf.stage)}), + tags=frozenset({"sumfact_stage{}_within{}".format(sf.stage, "_".join(sf.within_inames))}), predicates=sf.predicates, ) }) @@ -273,7 +273,7 @@ def _realize_sum_factorization_kernel(sf): # Measure times and count operations in c++ code if get_option("instrumentation_level") >= 4: stop_insn = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name), - depends_on=frozenset({lp.match.Tagged("sumfact_stage{}".format(sf.stage))}), + depends_on=frozenset({lp.match.Tagged("sumfact_stage{}_within{}".format(sf.stage, "_".join(sf.within_inames)))}), within_inames=frozenset(sf.within_inames))}) if sf.stage == 1: qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop' -- GitLab