From 989e016340bdc94294c36da55cbe5a028a7e7e1a Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Wed, 28 Feb 2018 14:05:41 +0100
Subject: [PATCH] Make vectorization opportunities deterministic again

By implementing repr such that instance addresses are
not part of sorting anymore
---
 python/dune/perftool/sumfact/accumulation.py  |  3 +++
 python/dune/perftool/sumfact/symbolic.py      | 14 +++++++++++++-
 python/dune/perftool/sumfact/vectorization.py |  3 ++-
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py
index 5f328bd6..02eb4747 100644
--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
@@ -104,6 +104,9 @@ class AccumulationOutput(SumfactKernelOutputBase, ImmutableRecord):
                                  trial_element_index=trial_element_index,
                                  )
 
+    def __repr__(self):
+        return ImmutableRecord.__repr__(self)
+
     @property
     def within_inames(self):
         if self.trial_element is None:
diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py
index 22b0d64f..a66d7444 100644
--- a/python/dune/perftool/sumfact/symbolic.py
+++ b/python/dune/perftool/sumfact/symbolic.py
@@ -33,12 +33,18 @@ class SumfactKernelInputBase(object):
     def realize_direct(self, inames):
         raise NotImplementedError
 
+    def __repr__(self):
+        return "SumfactKernelInputBase()"
+
 
 class VectorSumfactKernelInput(SumfactKernelInputBase):
     def __init__(self, inputs):
         assert(isinstance(inputs, tuple))
         self.inputs = inputs
 
+    def __repr__(self):
+        return "_".join(repr(i) for i in self.inputs)
+
     @property
     def direct_input_is_possible(self):
         return all(i.direct_input_is_possible for i in self.inputs)
@@ -87,11 +93,17 @@ class SumfactKernelOutputBase(object):
     def realize_direct(self, result, inames, shape, args):
         raise NotImplementedError
 
+    def __repr__(self):
+        return "SumfactKernelOutputBase()"
+
 
 class VectorSumfactKernelOutput(SumfactKernelOutputBase):
     def __init__(self, outputs):
         self.outputs = outputs
 
+    def __repr__(self):
+        return "_".join(repr(o) for o in self.outputs)
+
     def _add_hadd(self, o, result):
         hadd_function = "horizontal_add"
         if len(set(self.outputs)) > 1:
@@ -293,7 +305,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
         work on the same input coefficient (stage 1) or accumulate
         into the same thing (stage 3)
         """
-        return (self.input, self.output)
+        return (repr(self.input), repr(self.output))
 
     @property
     def group_name(self):
diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index 2ea28eec..97a197c9 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -285,7 +285,8 @@ def _level2_optimal_vectorization_strategy_generator(sumfacts, width, qp, alread
 
     # Find the number of input coefficients we can work on
     keys = frozenset(sf.inout_key for sf in sumfacts)
-    inoutkey_sumfacts = [tuple(sorted(filter(lambda sf: sf.inout_key == key, sumfacts))) for key in keys]
+
+    inoutkey_sumfacts = [tuple(sorted(filter(lambda sf: sf.inout_key == key, sumfacts))) for key in sorted(keys)]
 
     for parallel in (1, 2):
         if parallel > len(keys):
-- 
GitLab