From bea763e55677e55d79f8fcb5041b9e492f9a9fc4 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Fri, 6 May 2016 17:24:59 +0200 Subject: [PATCH] Poisson DG is working! + Shaped temporaries --- python/dune/perftool/loopy/transformer.py | 94 +++++++++++++++----- python/dune/perftool/pdelab/basis.py | 20 ++--- python/dune/perftool/pdelab/localoperator.py | 8 ++ python/dune/perftool/pymbolic/placeholder.py | 64 +++++++++++++ 4 files changed, 156 insertions(+), 30 deletions(-) create mode 100644 python/dune/perftool/pymbolic/placeholder.py diff --git a/python/dune/perftool/loopy/transformer.py b/python/dune/perftool/loopy/transformer.py index 70cea9f9..d5efa83b 100644 --- a/python/dune/perftool/loopy/transformer.py +++ b/python/dune/perftool/loopy/transformer.py @@ -8,6 +8,10 @@ from __future__ import absolute_import from dune.perftool import Restriction from dune.perftool.ufl.modified_terminals import ModifiedTerminalTracker from dune.perftool.pymbolic.uflmapper import UFL2PymbolicMapper +from dune.perftool.pymbolic.placeholder import (IndexPlaceholder, + IndexPlaceholderExtraction, + IndexPlaceholderRemoval, + ) from dune.perftool.pdelab.geometry import GeometryMapper from dune.perftool.generation import (domain, get_temporary_name, @@ -35,24 +39,63 @@ def index_sum_iname(i): return name_index(i) +_outerloop = None + + +def set_outerloop(v): + global _outerloop + _outerloop = v + + +def get_outerloop(): + return _outerloop + + class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapper): def __init__(self, measure): - self.iname_stack = [quadrature_iname()] self.measure = measure super(UFL2LoopyVisitor, self).__init__() def _assign(self, o): # In some corner cases we do not even need a temporary variable - if isinstance(o, int): + if isinstance(o, int) or isinstance(o, float): return o # Assign a name to the temporary variable we want our result in temp = get_temporary_name() + temp_shape = () + + # Determine which inames this assignment depends on and whether it should + # be merged into the main accumulation loop. Right now we apply the following + # procedure: All instructions that depend on all argument loop indices are + # merged into the main loop nest. Those instructions that depend on some + # argument loop indices but not all are merged into the kernel by fixing the + # loop ordering of the main loop (or are pulled outside if this already happened). + assignee = Variable(temp) + iname_deps = self.inames + merge_into_main_loopnest = True + + if self.rank == 2 and len(set(iname_deps)) == 1: + if get_outerloop() is None: + set_outerloop(iname_deps[0].number) + if iname_deps[0].number != get_outerloop(): + merge_into_main_loopnest = False + + # Change the assignee! + if not merge_into_main_loopnest: + assignee_index_placeholder = IndexPlaceholderExtraction()(o).pop() + assignee_index = IndexPlaceholderRemoval(duplicate_inames=True)(assignee_index_placeholder) + assignee = Subscript(assignee, (assignee_index,)) + temp_shape = (name_lfs_bound(name_lfs(assignee_index_placeholder.element, assignee_index_placeholder.restriction)),) + + # Now introduce duplicate inames for the argument loop if necessary + replaced_iname_deps = [IndexPlaceholderRemoval(duplicate_inames=not merge_into_main_loopnest, wrap_in_variable=False)(i) for i in iname_deps] + replaced_expr = IndexPlaceholderRemoval(duplicate_inames=not merge_into_main_loopnest)(o) # Now we assign this expression to a new temporary variable - insn_id = instruction(assignee=Variable(temp), - expression=o, - forced_iname_deps=frozenset({i for i in self.iname_stack}), + insn_id = instruction(assignee=assignee, + expression=replaced_expr, + forced_iname_deps=frozenset({i for i in replaced_iname_deps}).union(frozenset({quadrature_iname()})), forced_iname_deps_is_final=True, ) @@ -60,12 +103,24 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp from dune.perftool.generation import retrieve_cache_items temp = filter(lambda i: i.id == insn_id, retrieve_cache_items("instruction"))[0].assignee_name + retvar = Variable(temp) + if not merge_into_main_loopnest: + retvar_index = IndexPlaceholderRemoval()(assignee_index_placeholder) + retvar = Subscript(retvar, (retvar_index,)) + # Now that we know its exact name, declare the temporary - temporary_variable(temp) + temporary_variable(temp, shape=temp_shape) - return Variable(temp) + return retvar def __call__(self, o): + # Determine the rank of the term + from dune.perftool.ufl.rank import ufl_rank + self.rank = ufl_rank(o) + + # And initialize a list of found inames + self.inames = [] + # First we do the tree traversal to get a pymbolic expression representing this expression ret = self.call(o) @@ -81,15 +136,14 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp restriction = Restriction.NEGATIVE # Have the issued instruction depend on the iname for this localfunction space - from dune.perftool.pdelab.basis import lfs_iname - self.iname_stack.append(lfs_iname(o.element(), restriction, argcount=o.number())) + self.inames.append(IndexPlaceholder(o.element(), restriction, o.number())) if self.grad: from dune.perftool.pdelab.argument import name_testfunction_gradient - return Subscript(Variable(name_testfunction_gradient(o, restriction)), (Variable(lfs_iname(o.element(), restriction, argcount=o.number())),)) + return Subscript(Variable(name_testfunction_gradient(o, restriction)), (IndexPlaceholder(o.element(), restriction, o.number()),)) else: from dune.perftool.pdelab.argument import name_testfunction - return Subscript(Variable(name_testfunction(o, restriction)), (Variable(lfs_iname(o.element(), restriction, argcount=o.number())),)) + return Subscript(Variable(name_testfunction(o, restriction)), (IndexPlaceholder(o.element(), restriction, o.number()),)) def coefficient(self, o): # If this is a trialfunction, we do something entirely different @@ -132,17 +186,22 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp from loopy import Reduction from dune.perftool.ufl.shape import determine_shape - inames = () + oldinames = self.inames + self.inames = [] + + red_inames = () # Define an iname for each of the indices in the multiindex for i in o.ufl_operands[1].indices(): - inames = inames + (index_sum_iname(i),) + red_inames = red_inames + (index_sum_iname(i),) shape = determine_shape(o.ufl_operands[0], i) from dune.perftool.pdelab import name_index domain(name_index(i), shape) # Recurse to get the summation expression term = self.call(o.ufl_operands[0]) - ret = self._assign(Reduction("sum", inames, term)) + ret = self._assign(Reduction("sum", red_inames, term)) + + self.inames = self.inames + oldinames return ret @@ -182,9 +241,7 @@ def transform_accumulation_term(term, measure, subdomain_id): icount = 0 lfs = name_lfs(ma.argexpr.element(), ma.restriction) -# force_duplicate_iname = (count == 1) -# lfsi = lfs_iname(ma.argexpr.element(), ma.restriction, force_duplicate_iname=force_duplicate_iname) - lfsi = lfs_iname(ma.argexpr.element(), ma.restriction, argcount=count) + lfsi = lfs_iname(ma.argexpr.element(), ma.restriction, count=count) accumargs[2 * icount] = lfs accumargs[2 * icount + 1] = lfsi @@ -196,9 +253,6 @@ def transform_accumulation_term(term, measure, subdomain_id): from dune.perftool.pdelab.argument import name_accumulation_variable accumvar = name_accumulation_variable(arg_restr) -# if len(test_ma) == 2: -# from IPython import embed; embed() - # The residual/the jacobian should be represented through a loopy global argument # TODO this seems still a bit hacky, esp. w.r.t. systems for rs in residual_shape: diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py index e94b0554..303e860f 100644 --- a/python/dune/perftool/pdelab/basis.py +++ b/python/dune/perftool/pdelab/basis.py @@ -132,19 +132,16 @@ def traverse_lfs_tree(arg): @generator_factory(item_tags=("iname",), cache_key_generator=lambda e, r, c: (e, c)) def _lfs_iname(element, restriction, context): - name = name_lfs(element, restriction) - bound = name_lfs_bound(name) - - if context: - context = '_' + context + lfs = name_lfs(element, restriction) + bound = name_lfs_bound(lfs) - name = name + context + '_index' + name = "{}_{}_index".format(lfs, context) domain(name, bound) return name -def lfs_iname(element, restriction, context='', argcount=0): +def lfs_iname(element, restriction, count=None, context=''): """ Get the iname to iterate over the local function space given by element Arguments: @@ -158,9 +155,12 @@ def lfs_iname(element, restriction, context='', argcount=0): a given purpose, see the 'Loops and dependencies' of the loopy docs: https://documen.tician.de/loopy/tutorial.html#loops-and-dependencies """ - if argcount != 0: - context = '{}{}_arg{}'.format(context, '_' if len(context) else '', argcount) - + assert not ((context == '') and (count is None)) + if count is not None: + if context != '': + context = "{}_{}".format(count, context) + else: + context = str(count) return _lfs_iname(element, restriction, context) diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 1162f9e8..8335f8b0 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -284,6 +284,10 @@ def generate_localoperator_kernels(formdata, namedata): with global_context(form_type='residual'): # Generate the necessary residual methods for integral in form.integrals(): + # Reset the outer loop + from dune.perftool.loopy.transformer import set_outerloop + set_outerloop(None) + with global_context(integral_type=integral.integral_type()): enum_pattern() pattern_baseclass() @@ -316,6 +320,10 @@ def generate_localoperator_kernels(formdata, namedata): with global_context(form_type="jacobian"): for integral in jacform.integrals(): + # Reset the outer loop + from dune.perftool.loopy.transformer import set_outerloop + set_outerloop(None) + with global_context(integral_type=integral.integral_type()): kernel = generate_kernel(integral) operator_kernels[(integral.integral_type(), 'jacobian')] = kernel diff --git a/python/dune/perftool/pymbolic/placeholder.py b/python/dune/perftool/pymbolic/placeholder.py new file mode 100644 index 00000000..c96765b1 --- /dev/null +++ b/python/dune/perftool/pymbolic/placeholder.py @@ -0,0 +1,64 @@ +from pymbolic.mapper import Collector, IdentityMapper +from pymbolic.primitives import Variable + + +class IndexPlaceholder(object): + def __init__(self, element, restriction, number, context=''): + self.element = element + self.restriction = restriction + self.number = number + self.context = context + + def __hash__(self): + return hash((self.element, self.restriction, self.number, self.context)) + + def __eq__(self, o): + return (self.element == o.element) and (self.restriction == o.restriction) and (self.number == o.number) and (self.context == o.context) + + +class IndexPlaceholderRemoval(IdentityMapper): + def __init__(self, wrap_in_variable=True, duplicate_inames=False): + self.duplicate_inames = duplicate_inames + self.wrap_in_variable = wrap_in_variable + super(IndexPlaceholderRemoval, self).__init__() + + def map_foreign(self, o): + # How do we map constants here? map_constant was not correct + if isinstance(o, int) or isinstance(o, float): + return o + + # There might be tuples of indices where only one is a placeholder, + # so we recurse manually into the tuple. + if isinstance(o, tuple): + return tuple(self.rec(op) for op in o) + + # We only handle IndexPlaceholder instances from now on + assert isinstance(o, IndexPlaceholder) + + context = o.context + from dune.perftool.loopy.transformer import get_outerloop + if (self.duplicate_inames) and (o.number != get_outerloop()): + context = 'dup' + + from dune.perftool.pdelab.basis import lfs_iname + i = lfs_iname(o.element, o.restriction, count=o.number, context=context) + if self.wrap_in_variable: + return Variable(i) + else: + return i + + def map_reduction(self, o): + from loopy import Reduction + o.expr = self.rec(o.expr) + return o + + +class IndexPlaceholderExtraction(Collector): + def map_foreign(self, o): + if isinstance(o, int) or isinstance(o, float): + return set() + assert isinstance(o, tuple) + return set(i for i in o if isinstance(i, IndexPlaceholder)) + + def map_reduction(self, o): + return self.rec(o.expr) -- GitLab