From 029c2ebac3db9bf19fe214c0b59bd0a0c05ac8f8 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Mon, 18 Apr 2016 17:45:40 +0200 Subject: [PATCH] Some more changes -> correct dependencies for laplacian! --- python/dune/perftool/generation/loopy.py | 9 +++-- python/dune/perftool/loopy/transformer.py | 26 ++++++++---- python/dune/perftool/pdelab/basis.py | 48 +++++++++++++++-------- python/dune/perftool/pymbolic/inameset.py | 17 ++++++++ 4 files changed, 71 insertions(+), 29 deletions(-) create mode 100644 python/dune/perftool/pymbolic/inameset.py diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index e65e9485..c8d6ee97 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -56,9 +56,9 @@ def temporary_variable(name, **kwargs): cache_key_generator=lambda *a, **kw: kw['code'], ) def c_instruction_impl(**kw): - kw['insn_deps'] = kw.pop('deps', None) kw.setdefault('assignees', []) - inames = kw.pop('inames') + inames = kw.pop('inames', kw.get('forced_iname_deps', [])) + return loopy.CInstruction(inames, **kw) @@ -66,14 +66,14 @@ def c_instruction_impl(**kw): cache_key_generator=lambda *a, **kw: kw['expression'], ) def expr_instruction_impl(**kw): - return loopy.ExpressionInstruction(id=kw['id'], assignee=kw['assignee'], expression=kw['expression']) + return loopy.ExpressionInstruction(**kw) class _IDCounter: count = 0 -def _insn_cache_key(inames, code=None, expr=None, deps=[]): +def _insn_cache_key(inames, code=None, expr=None, **kwargs): if code: return code if expr: @@ -84,6 +84,7 @@ def _insn_cache_key(inames, code=None, expr=None, deps=[]): def instruction(code=None, expression=None, **kwargs): assert code or expression assert not (code and expression) + assert 'id' not in kwargs # Get an ID for this instruction id = 'insn' + str(_IDCounter.count).zfill(4) diff --git a/python/dune/perftool/loopy/transformer.py b/python/dune/perftool/loopy/transformer.py index 727f31b4..2f290ac9 100644 --- a/python/dune/perftool/loopy/transformer.py +++ b/python/dune/perftool/loopy/transformer.py @@ -117,7 +117,19 @@ def transform_accumulation_term(term): expr_tv_name = "expr_" + str(get_count()).zfill(4) expr_tv = temporary_variable(expr_tv_name) from pymbolic.primitives import Variable - instruction(assignee=Variable(expr_tv_name), expression=pymbolic_expr) + + # This is a bit hacky now. To correctly determine the iname dependencies of + # the accumulation term, we inspect it manually. This is necessary, as loopys + # automatic detection would also considers those inames dependencies which are + # duplicates of the lfs inames. + from dune.perftool.pymbolic.inameset import get_index_inames + acc_inames = get_index_inames(pymbolic_expr).union(frozenset({quadrature_iname()})) + + insn_id = instruction(assignee=Variable(expr_tv_name), + expression=pymbolic_expr, + forced_iname_deps=acc_inames, + forced_iname_deps_is_final=True, + ) # The data that is used to collect the arguments for the accumulate function accumargs = [] @@ -140,17 +152,15 @@ def transform_accumulation_term(term): shape = tuple(v for k, v in sorted(residual_shape.items(), key=lambda (k, v): k)) globalarg(residual, shape=shape) - from dune.perftool.generation import retrieve_cache_items - inames = retrieve_cache_items("iname") - from dune.perftool.pdelab.quadrature import name_factor factor = name_factor() - instruction(inames=inames, - code="{}.accumulate({}, {}*{})".format(residual, + instruction(code="{}.accumulate({}, {}*{})".format(residual, ", ".join(accumargs), expr_tv_name, factor, ), - assignees=residual, - read_variables=(factor, expr_tv_name), + assignees=frozenset({residual}), + read_variables=frozenset({residual, factor, expr_tv_name}), + forced_iname_deps=acc_inames, + forced_iname_deps_is_final=True, ) diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py index 9a44bb64..4cdff4d7 100644 --- a/python/dune/perftool/pdelab/basis.py +++ b/python/dune/perftool/pdelab/basis.py @@ -89,21 +89,37 @@ def traverse_lfs_tree(arg): @iname -def _lfs_iname(element, argcount): +def _lfs_iname(element, argcount, context): name = name_lfs(element) bound = name_lfs_bound(name) if argcount != 0: name = 'lfsu' - name = name + '_index' + if context: + context = '_' + context + + name = name + context + '_index' domain(name, bound) return name -def lfs_iname(element, argcount=0): - return _lfs_iname(element, argcount) +def lfs_iname(element, argcount=0, context=''): + """ Get the iname to iterate over the local function space given by element + + Arguments: + ---------- + element: ufl.FiniteElementBase + The finite element this local function space belongs to + argcount: int + Use to realize double nesting in case of jacobians + context: str + Some generation methods will require you to duplicate an iname for + a given purpose, see the 'Loops and dependencies' of the loopy docs: + https://documen.tician.de/loopy/tutorial.html#loops-and-dependencies + """ + return _lfs_iname(element, argcount, context) @cached @@ -117,7 +133,7 @@ def evaluate_basis(element, name): qp, name, ), - assignees=name, + assignees=frozenset({name}), ) @@ -139,7 +155,7 @@ def evaluate_reference_gradient(element, name): qp, name, ), - assignees=name, + assignees=frozenset({name}), ) @@ -154,7 +170,7 @@ def evaluate_basis_gradient(element, name): # TODO this is of course not yet correct temporary_variable(name, shape=(name_lfs_bound(element), name_dimension())) jac = name_jacobian_inverse_transposed() - index = lfs_iname(element) + index = lfs_iname(element, context='transformgrads') reference_gradients = name_reference_gradient(element) instruction(inames=(index, quadrature_iname(), @@ -165,9 +181,7 @@ def evaluate_basis_gradient(element, name): name, ), assignees=name, - read_variables=(reference_gradients, - ), - + read_variables=frozenset({reference_gradients}), ) @@ -192,9 +206,8 @@ def evaluate_trialfunction(element, name): basis, index ), - assignees=name, - read_variables=(basis, - ), + assignees=frozenset({name}), + read_variables=frozenset({basis}), ) @@ -204,7 +217,7 @@ def evaluate_trialfunction_gradient(element, name): # TODO this is of course not yet correct temporary_variable(name, shape=(name_dimension(),)) lfs = name_lfs(element) - index = lfs_iname(element) + index = lfs_iname(element, context='trialgrad') basis = name_basis_gradient(element) instruction(inames=(quadrature_iname(), index, @@ -214,7 +227,8 @@ def evaluate_trialfunction_gradient(element, name): index, basis ), - assignees=name, - read_variables=(basis, - ), + assignees=frozenset({name}), + read_variables=frozenset({basis}), + forced_iname_deps=frozenset({quadrature_iname(), index}), + forced_iname_deps_is_final=True, ) diff --git a/python/dune/perftool/pymbolic/inameset.py b/python/dune/perftool/pymbolic/inameset.py new file mode 100644 index 00000000..ba1a9d7e --- /dev/null +++ b/python/dune/perftool/pymbolic/inameset.py @@ -0,0 +1,17 @@ +from pymbolic.mapper import CombineMapper +from pymbolic.primitives import Variable + + +class INameMapper(CombineMapper): + def map_subscript(self, e): + if isinstance(e.index, Variable): + return frozenset([str(e.index)]) + else: + return frozenset([str(i) for i in e.index]) + + def combine(self, values): + return frozenset().union(*values) + + +def get_index_inames(e): + return INameMapper()(e) -- GitLab