From 880adbdc802d87796f4901a4f14935d3b7e14c2c Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Mon, 12 Feb 2018 15:10:44 +0100 Subject: [PATCH] Add one level of indirection into the vectorization generator --- python/dune/perftool/sumfact/symbolic.py | 8 ++++++-- python/dune/perftool/sumfact/vectorization.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 627c8fc0..393a2275 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -202,6 +202,11 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): # Watch out for the documentation to see which key is used unter what circumstances # + @property + def parallel_key(self): + """ A key that identifies parallellizable kernels. """ + return tuple(m.basis_size for m in self.matrix_sequence) + (self.stage, self.buffer) + @property def cache_key(self): """ The cache key that can be used in generation magic @@ -214,8 +219,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): def input_key(self): """ A cache key for the input coefficients Any two sum factorization kernels having the same input_key - work on the same input coefficient (and are suitable for simultaneous - treatment because of that) + work on the same input coefficient """ return (self.input, self.restriction, self.accumvar, self.trial_element_index) diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 95286389..7c9c5495 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -257,6 +257,21 @@ def level1_optimal_vectorization_strategy(sumfacts, width): def level2_optimal_vectorization_strategy(sumfacts, width, qp): + # Find the sets of simultaneously realizable kernels + keys = frozenset(sf.parallel_key for sf in sumfacts) + + # Find minimums for each of these sets + sfdict = frozendict() + + for key in keys: + key_sumfacts = frozenset(sf for sf in sumfacts if sf.parallel_key == key) + key_strategy = level3_optimal_vectorization_strategy(key_sumfacts, width, qp) + sfdict = add_to_frozendict(sfdict, key_strategy) + + return sfdict + + +def level3_optimal_vectorization_strategy(sumfacts, width, qp): # Find the sets of simultaneously realizable kernels (thats an equivalence relation) keys = frozenset(sf.input_key for sf in sumfacts) -- GitLab