diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index e65e9485ff2d270f7866c44039b2e00c0924e3f5..c8d6ee97854fa62db6204103b1d3e32dd65ec00b 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -56,9 +56,9 @@ def temporary_variable(name, **kwargs): cache_key_generator=lambda *a, **kw: kw['code'], ) def c_instruction_impl(**kw): - kw['insn_deps'] = kw.pop('deps', None) kw.setdefault('assignees', []) - inames = kw.pop('inames') + inames = kw.pop('inames', kw.get('forced_iname_deps', [])) + return loopy.CInstruction(inames, **kw) @@ -66,14 +66,14 @@ def c_instruction_impl(**kw): cache_key_generator=lambda *a, **kw: kw['expression'], ) def expr_instruction_impl(**kw): - return loopy.ExpressionInstruction(id=kw['id'], assignee=kw['assignee'], expression=kw['expression']) + return loopy.ExpressionInstruction(**kw) class _IDCounter: count = 0 -def _insn_cache_key(inames, code=None, expr=None, deps=[]): +def _insn_cache_key(inames, code=None, expr=None, **kwargs): if code: return code if expr: @@ -84,6 +84,7 @@ def _insn_cache_key(inames, code=None, expr=None, deps=[]): def instruction(code=None, expression=None, **kwargs): assert code or expression assert not (code and expression) + assert 'id' not in kwargs # Get an ID for this instruction id = 'insn' + str(_IDCounter.count).zfill(4) diff --git a/python/dune/perftool/loopy/transformer.py b/python/dune/perftool/loopy/transformer.py index 727f31b4dc13eb1fe14928d04ea5743962d1f77f..2f290ac9e2190a4dda9c21ba7fef9f93c5848565 100644 --- a/python/dune/perftool/loopy/transformer.py +++ b/python/dune/perftool/loopy/transformer.py @@ -117,7 +117,19 @@ def transform_accumulation_term(term): expr_tv_name = "expr_" + str(get_count()).zfill(4) expr_tv = temporary_variable(expr_tv_name) from pymbolic.primitives import Variable - instruction(assignee=Variable(expr_tv_name), expression=pymbolic_expr) + + # This is a bit hacky now. To correctly determine the iname dependencies of + # the accumulation term, we inspect it manually. This is necessary, as loopys + # automatic detection would also considers those inames dependencies which are + # duplicates of the lfs inames. + from dune.perftool.pymbolic.inameset import get_index_inames + acc_inames = get_index_inames(pymbolic_expr).union(frozenset({quadrature_iname()})) + + insn_id = instruction(assignee=Variable(expr_tv_name), + expression=pymbolic_expr, + forced_iname_deps=acc_inames, + forced_iname_deps_is_final=True, + ) # The data that is used to collect the arguments for the accumulate function accumargs = [] @@ -140,17 +152,15 @@ def transform_accumulation_term(term): shape = tuple(v for k, v in sorted(residual_shape.items(), key=lambda (k, v): k)) globalarg(residual, shape=shape) - from dune.perftool.generation import retrieve_cache_items - inames = retrieve_cache_items("iname") - from dune.perftool.pdelab.quadrature import name_factor factor = name_factor() - instruction(inames=inames, - code="{}.accumulate({}, {}*{})".format(residual, + instruction(code="{}.accumulate({}, {}*{})".format(residual, ", ".join(accumargs), expr_tv_name, factor, ), - assignees=residual, - read_variables=(factor, expr_tv_name), + assignees=frozenset({residual}), + read_variables=frozenset({residual, factor, expr_tv_name}), + forced_iname_deps=acc_inames, + forced_iname_deps_is_final=True, ) diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py index 9a44bb64c6f9079d5bc26451d8c4f11745db220f..4cdff4d76b98642404662ca2d992b6b93357fa3a 100644 --- a/python/dune/perftool/pdelab/basis.py +++ b/python/dune/perftool/pdelab/basis.py @@ -89,21 +89,37 @@ def traverse_lfs_tree(arg): @iname -def _lfs_iname(element, argcount): +def _lfs_iname(element, argcount, context): name = name_lfs(element) bound = name_lfs_bound(name) if argcount != 0: name = 'lfsu' - name = name + '_index' + if context: + context = '_' + context + + name = name + context + '_index' domain(name, bound) return name -def lfs_iname(element, argcount=0): - return _lfs_iname(element, argcount) +def lfs_iname(element, argcount=0, context=''): + """ Get the iname to iterate over the local function space given by element + + Arguments: + ---------- + element: ufl.FiniteElementBase + The finite element this local function space belongs to + argcount: int + Use to realize double nesting in case of jacobians + context: str + Some generation methods will require you to duplicate an iname for + a given purpose, see the 'Loops and dependencies' of the loopy docs: + https://documen.tician.de/loopy/tutorial.html#loops-and-dependencies + """ + return _lfs_iname(element, argcount, context) @cached @@ -117,7 +133,7 @@ def evaluate_basis(element, name): qp, name, ), - assignees=name, + assignees=frozenset({name}), ) @@ -139,7 +155,7 @@ def evaluate_reference_gradient(element, name): qp, name, ), - assignees=name, + assignees=frozenset({name}), ) @@ -154,7 +170,7 @@ def evaluate_basis_gradient(element, name): # TODO this is of course not yet correct temporary_variable(name, shape=(name_lfs_bound(element), name_dimension())) jac = name_jacobian_inverse_transposed() - index = lfs_iname(element) + index = lfs_iname(element, context='transformgrads') reference_gradients = name_reference_gradient(element) instruction(inames=(index, quadrature_iname(), @@ -165,9 +181,7 @@ def evaluate_basis_gradient(element, name): name, ), assignees=name, - read_variables=(reference_gradients, - ), - + read_variables=frozenset({reference_gradients}), ) @@ -192,9 +206,8 @@ def evaluate_trialfunction(element, name): basis, index ), - assignees=name, - read_variables=(basis, - ), + assignees=frozenset({name}), + read_variables=frozenset({basis}), ) @@ -204,7 +217,7 @@ def evaluate_trialfunction_gradient(element, name): # TODO this is of course not yet correct temporary_variable(name, shape=(name_dimension(),)) lfs = name_lfs(element) - index = lfs_iname(element) + index = lfs_iname(element, context='trialgrad') basis = name_basis_gradient(element) instruction(inames=(quadrature_iname(), index, @@ -214,7 +227,8 @@ def evaluate_trialfunction_gradient(element, name): index, basis ), - assignees=name, - read_variables=(basis, - ), + assignees=frozenset({name}), + read_variables=frozenset({basis}), + forced_iname_deps=frozenset({quadrature_iname(), index}), + forced_iname_deps_is_final=True, ) diff --git a/python/dune/perftool/pymbolic/inameset.py b/python/dune/perftool/pymbolic/inameset.py new file mode 100644 index 0000000000000000000000000000000000000000..ba1a9d7ee94f35f3fb10a10c15445a45e1468770 --- /dev/null +++ b/python/dune/perftool/pymbolic/inameset.py @@ -0,0 +1,17 @@ +from pymbolic.mapper import CombineMapper +from pymbolic.primitives import Variable + + +class INameMapper(CombineMapper): + def map_subscript(self, e): + if isinstance(e.index, Variable): + return frozenset([str(e.index)]) + else: + return frozenset([str(i) for i in e.index]) + + def combine(self, values): + return frozenset().union(*values) + + +def get_index_inames(e): + return INameMapper()(e)