From 81aa44584776ff8f7f3e41e5fdc5246eb476091c Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Thu, 30 Mar 2017 16:42:38 +0200
Subject: [PATCH] fixup

---
 python/dune/perftool/loopy/symbolic.py      |  4 ++--
 python/dune/perftool/sumfact/realization.py | 16 ++++++++--------
 python/dune/perftool/sumfact/sumfact.py     |  2 +-
 python/dune/perftool/ufl/visitor.py         | 10 +++++++---
 4 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/python/dune/perftool/loopy/symbolic.py b/python/dune/perftool/loopy/symbolic.py
index 91dadf16..cb877c7e 100644
--- a/python/dune/perftool/loopy/symbolic.py
+++ b/python/dune/perftool/loopy/symbolic.py
@@ -23,7 +23,7 @@ class SumfactKernel(ImmutableRecord, prim.Variable):
                  stage=1,
                  preferred_position=None,
                  restriction=0,
-                 within_inames=frozenset(),
+                 within_inames=(),
                  input=None,
                  padding=frozenset(),
                  index=None,
@@ -41,7 +41,7 @@ class SumfactKernel(ImmutableRecord, prim.Variable):
         if not isinstance(restriction, tuple):
             restriction = (restriction, 0)
 
-        assert isinstance(within_inames, frozenset)
+        assert isinstance(within_inames, tuple)
 
         ImmutableRecord.__init__(self,
                                  a_matrices=a_matrices,
diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py
index 7b84f4f6..739514e8 100644
--- a/python/dune/perftool/sumfact/realization.py
+++ b/python/dune/perftool/sumfact/realization.py
@@ -16,6 +16,7 @@ from dune.perftool.generation import (barrier,
 from dune.perftool.loopy.buffer import (get_buffer_temporary,
                                         switch_base_storage,
                                         )
+from dune.perftool.pdelab.geometry import world_dimension
 from dune.perftool.options import get_option
 from dune.perftool.pdelab.signatures import assembler_routine_name
 from dune.perftool.sumfact.permutation import (_sf_permutation_strategy,
@@ -66,11 +67,11 @@ def realize_sum_factorization_kernel(sf, insn_dep=frozenset(), outshape=None, di
         dump_accumulate_timer(timer_name)
         insn_dep = frozenset({instruction(code="HP_TIMER_START({});".format(timer_name),
                                           depends_on=insn_dep,
-                                          within_inames=sf.within_inames)})
+                                          within_inames=frozenset(sf.within_inames))})
 
     # Put a barrier before the sumfactorization kernel
     insn_dep = frozenset({barrier(depends_on=insn_dep,
-                                  within_inames=sf.within_inames,
+                                  within_inames=frozenset(sf.within_inames),
                                   )})
 
     # Decide in which order we want to process directions in the
@@ -195,10 +196,9 @@ def realize_sum_factorization_kernel(sf, insn_dep=frozenset(), outshape=None, di
                           dtype=np.float64,
                           shape=output_shape + output_shape,
                           dim_tags=novec_ftags + "," + novec_ftags)
-                # TODO the next line should get its inames from
-                # elsewhere. This is *NOT* robust (but works right
-                # now)
-                _ansatz_inames = tuple(Variable(sf.within_inames[i]) for i in range(world_dimension()))
+                # TODO: It is at least questionnable, whether using the *order* of the inames in here
+                #       for indexing is a good idea. Then again, it is hard to find an alternative.
+                _ansatz_inames = tuple(prim.Variable(i) for i in sf.within_inames)
                 assignee = prim.Subscript(prim.Variable(direct_output), _ansatz_inames + output_inames)
 
             # In case of vectorization we need to apply a horizontal add
@@ -215,7 +215,7 @@ def realize_sum_factorization_kernel(sf, insn_dep=frozenset(), outshape=None, di
         # at the same time store the instruction ID for the next instruction to depend on
         insn_dep = frozenset({instruction(assignee=assignee,
                                           expression=matprod,
-                                          forced_iname_deps=frozenset([iname for iname in out_inames]).union(sf.within_inames),
+                                          forced_iname_deps=frozenset([iname for iname in out_inames]).union(frozenset(sf.within_inames)),
                                           forced_iname_deps_is_final=True,
                                           depends_on=insn_dep,
                                           )
@@ -225,7 +225,7 @@ def realize_sum_factorization_kernel(sf, insn_dep=frozenset(), outshape=None, di
     if get_option("instrumentation_level") >= 4:
         insn_dep = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name),
                                           depends_on=insn_dep,
-                                          within_inames=sf.within_inames)})
+                                          within_inames=frozenset(sf.within_inames))})
         if sf.stage == 1:
             qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop'
             post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py
index d46bf6b9..cc05067b 100644
--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -152,7 +152,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                            restriction=(accterm.argument.restriction, restriction),
                            stage=3,
                            preferred_position=i if accterm.new_indices else None,
-                           within_inames=frozenset(visitor.inames),
+                           within_inames=visitor.inames,
                            )
 
         # TODO: Move this away!
diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py
index 96f1cb85..38b7eee6 100644
--- a/python/dune/perftool/ufl/visitor.py
+++ b/python/dune/perftool/ufl/visitor.py
@@ -98,7 +98,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
             shape = len(element.value_shape())
             indices = indices[shape:]
             for i in range(len(element.value_shape())):
-                self.inames = self.inames + (self.interface.dimension_iname(context='arg', count=i),)
+                if self.interface.dimension_iname(context='arg', count=i) not in self.inames:
+                    self.inames = self.inames + (self.interface.dimension_iname(context='arg', count=i),)
 
             # For the purpose of basis evaluation, we need to take the leaf element
             leaf_element = element.sub_elements()[0]
@@ -108,7 +109,9 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
 
         # Have the issued instruction depend on the iname for this localfunction space
         inames = self.interface.lfs_inames(leaf_element, restriction, o.number())
-        self.inames = self.inames + inames
+        for iname in inames:
+            if iname not in self.inames:
+                self.inames = self.inames + (iname,)
 
         if self.reference_grad:
             return maybe_wrap_subscript(self.interface.pymbolic_reference_gradient(leaf_element, restriction, o.number()), indices)
@@ -215,7 +218,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
             return index._value
         else:
             if index in self.dimension_indices:
-                self.inames = self.inames + (self.dimension_indices[index],)
+                if self.dimension_indices[index] not in self.inames:
+                    self.inames = self.inames + (self.dimension_indices[index],)
                 return Variable(self.dimension_indices[index])
             else:
                 return Variable(self.interface.name_index(index))
-- 
GitLab