diff --git a/python/dune/perftool/loopy/stages.py b/python/dune/perftool/loopy/stages.py index 1048ca7a59b18034a1be3fb6645598d3326feb79..a0a2b2c48fc9c26e84aa329a8adf63bbd8903c66 100644 --- a/python/dune/perftool/loopy/stages.py +++ b/python/dune/perftool/loopy/stages.py @@ -1,8 +1,14 @@ """ loopy instructions to mark stages of computations """ -from dune.perftool.generation import noop_instruction +from dune.perftool.generation import (generator_factory, + noop_instruction, + ) +from loopy import add_dependency +from loopy.match import Id + +@generator_factory(item_tags=("stage",), cache_key_generator=lambda n, **kw: n) def stage_insn(n, **kwargs): assert 'id' not in kwargs @@ -11,9 +17,18 @@ def stage_insn(n, **kwargs): # Chain dependencies of stage instructions if n > 0: - kwargs['depends_on'] = kwargs.get('depends_on', frozenset([])).union(frozenset([stage_insn(n-1, **kwargs)])) + kwargs['depends_on'] = kwargs.get('depends_on', frozenset([])).union(frozenset([stage_insn(n - 1, **kwargs)])) # Actually issue the instruction noop_instruction(id=id, **kwargs) return id + + +def finalize_stage_instructions(kernel): + for i in range(len(stage_insn._memoize_cache)): + deps = frozenset({insn.id for insn in kernel.instructions if stage_insn(i) in insn.depends_on and not insn.id.startswith('stage_insn_')}) + for dep_id in deps: + kernel = add_dependency(kernel, Id(stage_insn(i+1)), dep_id) + + return kernel \ No newline at end of file diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 3fbb2595bc71898592cb180ee5f073cbdb27d2ea..3c008a173b98bcfcd06fb77f2948d4b6a76e2462 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -496,6 +496,10 @@ def generate_kernel(integrals): from dune.perftool.loopy.duplicate import heuristic_duplication kernel = heuristic_duplication(kernel) + # Finalize our stages mechanism + from dune.perftool.loopy.stages import finalize_stage_instructions + kernel = finalize_stage_instructions(kernel) + # This is also silly, but 1D DG Schemes never need the geometry, so the quadrature # statement actually introduces a preamble at a stage where preambles cannot be generated # anymore. TODO think about how to avoid this!!!