Skip to content
Snippets Groups Projects
Commit 84e20b93 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Correct instrumentation in stage 3

Some kernels were not properly instrumented before
parent f283e3cc
No related branches found
No related tags found
No related merge requests found
......@@ -45,6 +45,17 @@ def realize_sum_factorization_kernel(sf, **kwargs):
context_tags=("kernel",),
cache_key_generator=lambda s, **kw: s.cache_key)
def _realize_sum_factorization_kernel(sf):
insn_dep = sf.insn_dep
# Measure times and count operations in c++ code
if get_option("instrumentation_level") >= 4:
timer_name = assembler_routine_name() + '_kernel' + '_stage{}'.format(sf.stage)
post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
dump_accumulate_timer(timer_name)
insn_dep = frozenset({instruction(code="HP_TIMER_START({});".format(timer_name),
depends_on=insn_dep,
within_inames=frozenset(sf.within_inames))})
# Set up the input for stage 1
if sf.stage == 1 and not get_option("fastdg"):
assert sf.coeff_func
......@@ -65,7 +76,8 @@ def _realize_sum_factorization_kernel(sf):
assignee = prim.Subscript(prim.Variable(input_setup), (prim.Variable(basisiname),) + (sf.horizontal_index(inputsf),))
instruction(assignee=assignee,
expression=coeff,
depends_on=inputsf.insn_dep,
depends_on=inputsf.insn_dep.union(insn_dep),
tags=frozenset({"sumfact_stage{}".format(sf.stage)}),
)
if sf.vectorized:
......@@ -74,9 +86,6 @@ def _realize_sum_factorization_kernel(sf):
else:
_write_input(sf)
# Add a dependency on the input variable
insn_dep = sf.insn_dep
if sf.input:
insn_dep = insn_dep.union(frozenset({lp.match.Writes(sf.input)}))
# Construct the direct_input for the FastDG case
......@@ -98,15 +107,6 @@ def _realize_sum_factorization_kernel(sf):
ctags = ctags + ",vec"
vec_shape = (sf.vector_width,)
# Measure times and count operations in c++ code
if get_option("instrumentation_level") >= 4:
timer_name = assembler_routine_name() + '_kernel' + '_stage{}'.format(sf.stage)
post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
dump_accumulate_timer(timer_name)
insn_dep = frozenset({instruction(code="HP_TIMER_START({});".format(timer_name),
depends_on=insn_dep,
within_inames=frozenset(sf.within_inames))})
# Put a barrier before the sumfactorization kernel
insn_dep = frozenset({barrier(depends_on=insn_dep,
within_inames=frozenset(sf.within_inames),
......@@ -255,13 +255,14 @@ def _realize_sum_factorization_kernel(sf):
forced_iname_deps=frozenset([iname for iname in out_inames]).union(frozenset(sf.within_inames)),
forced_iname_deps_is_final=True,
depends_on=insn_dep,
tags=frozenset({"sumfact_stage{}".format(sf.stage)}),
)
})
# Measure times and count operations in c++ code
if get_option("instrumentation_level") >= 4:
insn_dep = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name),
depends_on=insn_dep,
depends_on=frozenset({lp.match.Tagged("sumfact_stage{}".format(sf.stage))}),
within_inames=frozenset(sf.within_inames))})
if sf.stage == 1:
qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment