From 2490f72a14a0a4b7eacad26e9f07a5cab3ab48d2 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Tue, 24 Apr 2018 13:58:17 +0200 Subject: [PATCH] Fix some instrumentation ordering --- .../loopy/transformations/instrumentation.py | 15 ++++++++++++--- python/dune/perftool/pdelab/localoperator.py | 4 ++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/python/dune/perftool/loopy/transformations/instrumentation.py b/python/dune/perftool/loopy/transformations/instrumentation.py index 89b08b6f..2771ba14 100644 --- a/python/dune/perftool/loopy/transformations/instrumentation.py +++ b/python/dune/perftool/loopy/transformations/instrumentation.py @@ -22,7 +22,7 @@ def _union(a): return frozenset.union(*a) -def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', operator=False): +def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', operator=False, depends_on=frozenset()): """ Transform loopy kernel to contain instrumentation code Arguments: @@ -32,6 +32,9 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o identifier : The name of the counter to start and stop level : The instrumentation level this measurement is defined at filetag : The tag of the file that should contain the counter definitions + depends_on: Additional dependencies to add to the start instruction. This is used to correct + currently wrong behaviour of the transformation in cases where a lot of structure + of the instrumentation is known a priori. """ # If the instrumentation level is not high enough, this is a no-op if level > get_option("instrumentation_level"): @@ -53,6 +56,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o insn_inames = _intersect(tuple(i.within_inames for i in insns)) other_inames = _union(tuple(i.within_inames for i in lp.find_instructions(knl, lp.match.Not(match)))) within = _intersect((insn_inames, other_inames)) + uniontags = _intersect(tuple(i.tags for i in insns)) # Get a unique identifer - note that the same timer could be started and stopped several times # within one kernel... @@ -67,8 +71,9 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o "HP_TIMER_START({});".format(identifier), id=start_id, within_inames=within, - depends_on=start_depends, + depends_on=depends_on.union(start_depends), boostable_into=frozenset(), + tags=uniontags, ) # Add dependencies on the timing instructions @@ -82,6 +87,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o within_inames=within, depends_on=frozenset(i.id for i in insns), boostable_into=frozenset(), + tags=uniontags, ) # Find all the instructions that should depend on stop @@ -98,4 +104,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o other_insns = list(filter(lambda i: i.id not in [j.id for j in rewritten_insns], knl.instructions)) # Add all the modified instructions into the kernel object - return knl.copy(instructions=rewritten_insns + other_insns + [start_insn, stop_insn]) + knl = knl.copy(instructions=rewritten_insns + other_insns + [start_insn, stop_insn]) + + from loopy.kernel.creation import resolve_dependencies + return resolve_dependencies(knl) diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 04dad983..f38b76d7 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -569,8 +569,8 @@ def extract_kernel_from_cache(tag, name, signature, wrap_in_cgen=True, add_timin if add_timings and get_form_option("sumfact"): from dune.perftool.pdelab.signatures import assembler_routine_name kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage1"), "{}_kernel_stage1".format(assembler_routine_name()), 4) - kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage2"), "{}_kernel_quadratureloop".format(assembler_routine_name()), 4) - kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage3"), "{}_kernel_stage3".format(assembler_routine_name()), 4) + kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage2"), "{}_kernel_quadratureloop".format(assembler_routine_name()), 4, depends_on=frozenset({lp.match.Tagged("sumfact_stage1")})) + kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage3"), "{}_kernel_stage3".format(assembler_routine_name()), 4, depends_on=frozenset({lp.match.Tagged("sumfact_stage2")})) if wrap_in_cgen: # Wrap the kernel in something which can generate code -- GitLab