From 81aa44584776ff8f7f3e41e5fdc5246eb476091c Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Thu, 30 Mar 2017 16:42:38 +0200 Subject: [PATCH] fixup --- python/dune/perftool/loopy/symbolic.py | 4 ++-- python/dune/perftool/sumfact/realization.py | 16 ++++++++-------- python/dune/perftool/sumfact/sumfact.py | 2 +- python/dune/perftool/ufl/visitor.py | 10 +++++++--- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/python/dune/perftool/loopy/symbolic.py b/python/dune/perftool/loopy/symbolic.py index 91dadf16..cb877c7e 100644 --- a/python/dune/perftool/loopy/symbolic.py +++ b/python/dune/perftool/loopy/symbolic.py @@ -23,7 +23,7 @@ class SumfactKernel(ImmutableRecord, prim.Variable): stage=1, preferred_position=None, restriction=0, - within_inames=frozenset(), + within_inames=(), input=None, padding=frozenset(), index=None, @@ -41,7 +41,7 @@ class SumfactKernel(ImmutableRecord, prim.Variable): if not isinstance(restriction, tuple): restriction = (restriction, 0) - assert isinstance(within_inames, frozenset) + assert isinstance(within_inames, tuple) ImmutableRecord.__init__(self, a_matrices=a_matrices, diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py index 7b84f4f6..739514e8 100644 --- a/python/dune/perftool/sumfact/realization.py +++ b/python/dune/perftool/sumfact/realization.py @@ -16,6 +16,7 @@ from dune.perftool.generation import (barrier, from dune.perftool.loopy.buffer import (get_buffer_temporary, switch_base_storage, ) +from dune.perftool.pdelab.geometry import world_dimension from dune.perftool.options import get_option from dune.perftool.pdelab.signatures import assembler_routine_name from dune.perftool.sumfact.permutation import (_sf_permutation_strategy, @@ -66,11 +67,11 @@ def realize_sum_factorization_kernel(sf, insn_dep=frozenset(), outshape=None, di dump_accumulate_timer(timer_name) insn_dep = frozenset({instruction(code="HP_TIMER_START({});".format(timer_name), depends_on=insn_dep, - within_inames=sf.within_inames)}) + within_inames=frozenset(sf.within_inames))}) # Put a barrier before the sumfactorization kernel insn_dep = frozenset({barrier(depends_on=insn_dep, - within_inames=sf.within_inames, + within_inames=frozenset(sf.within_inames), )}) # Decide in which order we want to process directions in the @@ -195,10 +196,9 @@ def realize_sum_factorization_kernel(sf, insn_dep=frozenset(), outshape=None, di dtype=np.float64, shape=output_shape + output_shape, dim_tags=novec_ftags + "," + novec_ftags) - # TODO the next line should get its inames from - # elsewhere. This is *NOT* robust (but works right - # now) - _ansatz_inames = tuple(Variable(sf.within_inames[i]) for i in range(world_dimension())) + # TODO: It is at least questionnable, whether using the *order* of the inames in here + # for indexing is a good idea. Then again, it is hard to find an alternative. + _ansatz_inames = tuple(prim.Variable(i) for i in sf.within_inames) assignee = prim.Subscript(prim.Variable(direct_output), _ansatz_inames + output_inames) # In case of vectorization we need to apply a horizontal add @@ -215,7 +215,7 @@ def realize_sum_factorization_kernel(sf, insn_dep=frozenset(), outshape=None, di # at the same time store the instruction ID for the next instruction to depend on insn_dep = frozenset({instruction(assignee=assignee, expression=matprod, - forced_iname_deps=frozenset([iname for iname in out_inames]).union(sf.within_inames), + forced_iname_deps=frozenset([iname for iname in out_inames]).union(frozenset(sf.within_inames)), forced_iname_deps_is_final=True, depends_on=insn_dep, ) @@ -225,7 +225,7 @@ def realize_sum_factorization_kernel(sf, insn_dep=frozenset(), outshape=None, di if get_option("instrumentation_level") >= 4: insn_dep = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name), depends_on=insn_dep, - within_inames=sf.within_inames)}) + within_inames=frozenset(sf.within_inames))}) if sf.stage == 1: qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop' post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py index d46bf6b9..cc05067b 100644 --- a/python/dune/perftool/sumfact/sumfact.py +++ b/python/dune/perftool/sumfact/sumfact.py @@ -152,7 +152,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): restriction=(accterm.argument.restriction, restriction), stage=3, preferred_position=i if accterm.new_indices else None, - within_inames=frozenset(visitor.inames), + within_inames=visitor.inames, ) # TODO: Move this away! diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py index 96f1cb85..38b7eee6 100644 --- a/python/dune/perftool/ufl/visitor.py +++ b/python/dune/perftool/ufl/visitor.py @@ -98,7 +98,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): shape = len(element.value_shape()) indices = indices[shape:] for i in range(len(element.value_shape())): - self.inames = self.inames + (self.interface.dimension_iname(context='arg', count=i),) + if self.interface.dimension_iname(context='arg', count=i) not in self.inames: + self.inames = self.inames + (self.interface.dimension_iname(context='arg', count=i),) # For the purpose of basis evaluation, we need to take the leaf element leaf_element = element.sub_elements()[0] @@ -108,7 +109,9 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): # Have the issued instruction depend on the iname for this localfunction space inames = self.interface.lfs_inames(leaf_element, restriction, o.number()) - self.inames = self.inames + inames + for iname in inames: + if iname not in self.inames: + self.inames = self.inames + (iname,) if self.reference_grad: return maybe_wrap_subscript(self.interface.pymbolic_reference_gradient(leaf_element, restriction, o.number()), indices) @@ -215,7 +218,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): return index._value else: if index in self.dimension_indices: - self.inames = self.inames + (self.dimension_indices[index],) + if self.dimension_indices[index] not in self.inames: + self.inames = self.inames + (self.dimension_indices[index],) return Variable(self.dimension_indices[index]) else: return Variable(self.interface.name_index(index)) -- GitLab