diff --git a/python/dune/perftool/generation/__init__.py b/python/dune/perftool/generation/__init__.py index 40d92670b3524be708d41ad75e5e40aa051bc6ab..7801d1a1bd9982cb474ef1e750fe2a48d1721e6f 100644 --- a/python/dune/perftool/generation/__init__.py +++ b/python/dune/perftool/generation/__init__.py @@ -32,6 +32,7 @@ from dune.perftool.generation.loopy import (constantarg, globalarg, iname, instruction, + noop_instruction, pymbolic_expr, silenced_warning, temporary_variable, diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index 0fbbfbfc222599371c8feaff98754b275d19c3d9..2b95668fc6db5cd5b8d2be57f14adc5ac40322f9 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -196,3 +196,8 @@ def instruction(code=None, expression=None, **kwargs): # return the ID, as it is the only useful information to the user return id + + +@generator_factory(item_tags=("instruction",), cache_key_generator=lambda **kw: kw['id']) +def noop_instruction(**kwargs): + return loopy.NoOpInstruction(**kwargs) \ No newline at end of file diff --git a/python/dune/perftool/loopy/stages.py b/python/dune/perftool/loopy/stages.py new file mode 100644 index 0000000000000000000000000000000000000000..1048ca7a59b18034a1be3fb6645598d3326feb79 --- /dev/null +++ b/python/dune/perftool/loopy/stages.py @@ -0,0 +1,19 @@ +""" loopy instructions to mark stages of computations """ + +from dune.perftool.generation import noop_instruction + + +def stage_insn(n, **kwargs): + assert 'id' not in kwargs + + # Get an ID for this instruction + id = 'stage_insn_{}'.format(n) + + # Chain dependencies of stage instructions + if n > 0: + kwargs['depends_on'] = kwargs.get('depends_on', frozenset([])).union(frozenset([stage_insn(n-1, **kwargs)])) + + # Actually issue the instruction + noop_instruction(id=id, **kwargs) + + return id diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py index 8585f15357cd2a8e03b956c73a967c151c67d67b..0d839d614fefa3f10aaeec39a74a72ecf196556c 100644 --- a/python/dune/perftool/sumfact/sumfact.py +++ b/python/dune/perftool/sumfact/sumfact.py @@ -59,7 +59,7 @@ def start_sumfactorization(): return sum_factorization_kernel(a_matrices, inp, "buffer") -def sum_factorization_kernel(a_matrices, inp, buffer): +def sum_factorization_kernel(a_matrices, inp, buffer, stage=0): """ Calculate a sum factorization matrix product. @@ -76,6 +76,10 @@ def sum_factorization_kernel(a_matrices, inp, buffer): buffer: A string identifying the flip flop buffer in use for intermediate results. """ + # Get the stage instruction + from dune.perftool.loopy.stages import stage_insn + insn_dep = stage_insn(stage) + for l, a_matrix in enumerate(a_matrices): # Get a temporary that interprets the base storage of the input # as a column-major matrix. In later iteration of the amatrix loop @@ -108,10 +112,12 @@ def sum_factorization_kernel(a_matrices, inp, buffer): )) # Issue the reduction instruction that implements the multiplication - instruction(assignee=Subscript(Variable(out), (Variable(i), Variable(j))), - expression=Reduction("sum", k, prod), - forced_iname_deps=frozenset({i, j}), - forced_iname_deps_is_final=True, - ) + # at the same time store the instruction ID for the next instruction to depend on + insn_dep = instruction(assignee=Subscript(Variable(out), (Variable(i), Variable(j))), + expression=Reduction("sum", k, prod), + forced_iname_deps=frozenset({i, j}), + forced_iname_deps_is_final=True, + depends_on=frozenset({insn_dep}), + ) return out