diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 627c8fc0fd0e0a22a3c1dd1f9919ba50c80823dd..393a22756dc87a3b3a4ec06ccd57d0ac964d3ece 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -202,6 +202,11 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): # Watch out for the documentation to see which key is used unter what circumstances # + @property + def parallel_key(self): + """ A key that identifies parallellizable kernels. """ + return tuple(m.basis_size for m in self.matrix_sequence) + (self.stage, self.buffer) + @property def cache_key(self): """ The cache key that can be used in generation magic @@ -214,8 +219,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): def input_key(self): """ A cache key for the input coefficients Any two sum factorization kernels having the same input_key - work on the same input coefficient (and are suitable for simultaneous - treatment because of that) + work on the same input coefficient """ return (self.input, self.restriction, self.accumvar, self.trial_element_index) diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 95286389498b4697cc2450b464b37761f521f358..7c9c549546f9243fb9449019e1aa76151eee9891 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -257,6 +257,21 @@ def level1_optimal_vectorization_strategy(sumfacts, width): def level2_optimal_vectorization_strategy(sumfacts, width, qp): + # Find the sets of simultaneously realizable kernels + keys = frozenset(sf.parallel_key for sf in sumfacts) + + # Find minimums for each of these sets + sfdict = frozendict() + + for key in keys: + key_sumfacts = frozenset(sf for sf in sumfacts if sf.parallel_key == key) + key_strategy = level3_optimal_vectorization_strategy(key_sumfacts, width, qp) + sfdict = add_to_frozendict(sfdict, key_strategy) + + return sfdict + + +def level3_optimal_vectorization_strategy(sumfacts, width, qp): # Find the sets of simultaneously realizable kernels (thats an equivalence relation) keys = frozenset(sf.input_key for sf in sumfacts)