diff --git a/python/dune/perftool/loopy/transformations/disjointgroups.py b/python/dune/perftool/loopy/transformations/disjointgroups.py index 2f0a64f0f19a58926457b9e4a421d18604563b7d..1f22c67d9600520e0d26ae11ee0a72db043b708c 100644 --- a/python/dune/perftool/loopy/transformations/disjointgroups.py +++ b/python/dune/perftool/loopy/transformations/disjointgroups.py @@ -1,6 +1,12 @@ """ A helper transformation that makes all groups conflicting """ +from dune.perftool.options import get_option def make_groups_conflicting(knl): - groups = frozenset().union(*tuple(i.groups for i in knl.instructions)) - return knl.copy(instructions=[i.copy(conflicts_with_groups=groups - i.groups) for i in knl.instructions]) + # As this transformation introduces a performance bug that basically + # kills our CI, we only apply it if really needed - meaning in production. + if get_option("assure_statement_ordering"): + groups = frozenset().union(*tuple(i.groups for i in knl.instructions)) + return knl.copy(instructions=[i.copy(conflicts_with_groups=groups - i.groups) for i in knl.instructions]) + else: + return knl diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index 789f7b86c0dc8fc6f91f81961796f34df120ff5a..9ae717c5b2f881bdaa25c5f78007455833d10178 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -64,6 +64,7 @@ class PerftoolOptionsArray(ImmutableRecord): architecture = PerftoolOption(default="haswell", helpstr="The architecture to optimize for. Possible values: haswell|knl") grid_offset = PerftoolOption(default=False, helpstr="Set to true if you want a yasp grid where the lower left corner is not in the origin.") simplify = PerftoolOption(default=True, helpstr="Whether to simplify expressions using sympy") + assure_statement_ordering = PerftoolOption(default=False, helpstr="Whether special care should be taken for a good statement ordering in sumfact kernels, runs into a loopy scheduler performance bug, but is necessary for production.") # Arguments that are mainly to be set by logic depending on other options max_vector_width = PerftoolOption(default=256, helpstr=None)