diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py index 5f328bd66fbb19fb74df2b91db212360163e11c9..02eb474754f1233e8cfbe86f57c474db4d2ed9d2 100644 --- a/python/dune/perftool/sumfact/accumulation.py +++ b/python/dune/perftool/sumfact/accumulation.py @@ -104,6 +104,9 @@ class AccumulationOutput(SumfactKernelOutputBase, ImmutableRecord): trial_element_index=trial_element_index, ) + def __repr__(self): + return ImmutableRecord.__repr__(self) + @property def within_inames(self): if self.trial_element is None: diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 22b0d64fe597c33595e9010f028e89caaeab832f..a66d744440595b8f8d8fbf32855dddf2bfb57612 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -33,12 +33,18 @@ class SumfactKernelInputBase(object): def realize_direct(self, inames): raise NotImplementedError + def __repr__(self): + return "SumfactKernelInputBase()" + class VectorSumfactKernelInput(SumfactKernelInputBase): def __init__(self, inputs): assert(isinstance(inputs, tuple)) self.inputs = inputs + def __repr__(self): + return "_".join(repr(i) for i in self.inputs) + @property def direct_input_is_possible(self): return all(i.direct_input_is_possible for i in self.inputs) @@ -87,11 +93,17 @@ class SumfactKernelOutputBase(object): def realize_direct(self, result, inames, shape, args): raise NotImplementedError + def __repr__(self): + return "SumfactKernelOutputBase()" + class VectorSumfactKernelOutput(SumfactKernelOutputBase): def __init__(self, outputs): self.outputs = outputs + def __repr__(self): + return "_".join(repr(o) for o in self.outputs) + def _add_hadd(self, o, result): hadd_function = "horizontal_add" if len(set(self.outputs)) > 1: @@ -293,7 +305,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): work on the same input coefficient (stage 1) or accumulate into the same thing (stage 3) """ - return (self.input, self.output) + return (repr(self.input), repr(self.output)) @property def group_name(self): diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 2ea28eec3f786c4485d3c0913f7e3279d82e666f..97a197c94b9b33d14b342ae2c05d1392e3f0707a 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -285,7 +285,8 @@ def _level2_optimal_vectorization_strategy_generator(sumfacts, width, qp, alread # Find the number of input coefficients we can work on keys = frozenset(sf.inout_key for sf in sumfacts) - inoutkey_sumfacts = [tuple(sorted(filter(lambda sf: sf.inout_key == key, sumfacts))) for key in keys] + + inoutkey_sumfacts = [tuple(sorted(filter(lambda sf: sf.inout_key == key, sumfacts))) for key in sorted(keys)] for parallel in (1, 2): if parallel > len(keys):