Skip to content
Snippets Groups Projects
Commit 2490f72a authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Fix some instrumentation ordering

parent f3d19543
No related branches found
No related tags found
No related merge requests found
......@@ -22,7 +22,7 @@ def _union(a):
return frozenset.union(*a)
def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', operator=False):
def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', operator=False, depends_on=frozenset()):
""" Transform loopy kernel to contain instrumentation code
Arguments:
......@@ -32,6 +32,9 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
identifier : The name of the counter to start and stop
level : The instrumentation level this measurement is defined at
filetag : The tag of the file that should contain the counter definitions
depends_on: Additional dependencies to add to the start instruction. This is used to correct
currently wrong behaviour of the transformation in cases where a lot of structure
of the instrumentation is known a priori.
"""
# If the instrumentation level is not high enough, this is a no-op
if level > get_option("instrumentation_level"):
......@@ -53,6 +56,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
insn_inames = _intersect(tuple(i.within_inames for i in insns))
other_inames = _union(tuple(i.within_inames for i in lp.find_instructions(knl, lp.match.Not(match))))
within = _intersect((insn_inames, other_inames))
uniontags = _intersect(tuple(i.tags for i in insns))
# Get a unique identifer - note that the same timer could be started and stopped several times
# within one kernel...
......@@ -67,8 +71,9 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
"HP_TIMER_START({});".format(identifier),
id=start_id,
within_inames=within,
depends_on=start_depends,
depends_on=depends_on.union(start_depends),
boostable_into=frozenset(),
tags=uniontags,
)
# Add dependencies on the timing instructions
......@@ -82,6 +87,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
within_inames=within,
depends_on=frozenset(i.id for i in insns),
boostable_into=frozenset(),
tags=uniontags,
)
# Find all the instructions that should depend on stop
......@@ -98,4 +104,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
other_insns = list(filter(lambda i: i.id not in [j.id for j in rewritten_insns], knl.instructions))
# Add all the modified instructions into the kernel object
return knl.copy(instructions=rewritten_insns + other_insns + [start_insn, stop_insn])
knl = knl.copy(instructions=rewritten_insns + other_insns + [start_insn, stop_insn])
from loopy.kernel.creation import resolve_dependencies
return resolve_dependencies(knl)
......@@ -569,8 +569,8 @@ def extract_kernel_from_cache(tag, name, signature, wrap_in_cgen=True, add_timin
if add_timings and get_form_option("sumfact"):
from dune.perftool.pdelab.signatures import assembler_routine_name
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage1"), "{}_kernel_stage1".format(assembler_routine_name()), 4)
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage2"), "{}_kernel_quadratureloop".format(assembler_routine_name()), 4)
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage3"), "{}_kernel_stage3".format(assembler_routine_name()), 4)
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage2"), "{}_kernel_quadratureloop".format(assembler_routine_name()), 4, depends_on=frozenset({lp.match.Tagged("sumfact_stage1")}))
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage3"), "{}_kernel_stage3".format(assembler_routine_name()), 4, depends_on=frozenset({lp.match.Tagged("sumfact_stage2")}))
if wrap_in_cgen:
# Wrap the kernel in something which can generate code
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment