diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 3cecd9626e808188577fc41e4a266ffd388ebe76..05a69b3663e529f502f60069e1c7c156bae33c6e 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -20,7 +20,7 @@ from dune.perftool.sumfact.tabulation import (BasisTabulationMatrixArray, ) from dune.perftool.error import PerftoolVectorizationError from dune.perftool.options import get_option -from dune.perftool.tools import add_to_frozendict, round_to_multiple +from dune.perftool.tools import add_to_frozendict, round_to_multiple, list_diff from pytools import product from frozendict import frozendict @@ -260,12 +260,16 @@ def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already= yield already return + # Ensure a deterministic order of the given sumfact kernels. This is necessary for the + # fromlist strategy to pick correct strategies across different program runs + sumfacts = sorted(sumfacts, key=lambda sf: repr(sf)) + # Otherwise we pick a random sum factorization kernel and construct all the vectorization # opportunities realizing this particular kernel and go into recursion. - sf_to_decide = next(iter(sumfacts)) + sf_to_decide = sumfacts[0] # Have "unvectorized" as an option, although it is not good - for opp in fixed_quad_vectorization_opportunity_generator(sumfacts.difference({sf_to_decide}), + for opp in fixed_quad_vectorization_opportunity_generator(list_diff(sumfacts, [sf_to_decide]), width, qp, add_to_frozendict(already, @@ -298,7 +302,7 @@ def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already= continue # Go into recursion to also vectorize all kernels not in this combo - for opp in fixed_quad_vectorization_opportunity_generator(sumfacts.difference(combo), + for opp in fixed_quad_vectorization_opportunity_generator(list_diff(sumfacts, combo), width, qp, add_to_frozendict(already, vecdict), diff --git a/python/dune/perftool/tools.py b/python/dune/perftool/tools.py index d358f3ab45ef8231a3a952a881167e7108c14704..e302f28c0e63d03d013c181dc0eb4f831ec1648c 100644 --- a/python/dune/perftool/tools.py +++ b/python/dune/perftool/tools.py @@ -74,3 +74,11 @@ def add_to_frozendict(fd, valdict): t = dict(fd) t.update(valdict) return frozendict.frozendict(t) + + +def list_diff(l1, l2): + l = [] + for item in l1: + if item not in l2: + l.append(item) + return l