diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index 506958e458107553745ada42bb475d4de0282139..ed8b7c00766351b9aa774b2a1f04b79d1d4f7f72 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -131,6 +131,13 @@ def expr_instruction_impl(**kw): return loopy.ExpressionInstruction(**kw) +@generator_factory(item_tags=("instruction", "callinstruction"), + cache_key_generator=lambda *a, **kw: kw['expression'], + ) +def call_instruction_impl(**kw): + return loopy.CallInstruction(**kw) + + class _IDCounter: count = 0 @@ -157,7 +164,10 @@ def instruction(code=None, expression=None, **kwargs): if code: c_instruction_impl(id=id, code=code, **kwargs) if expression: - expr_instruction_impl(id=id, expression=expression, **kwargs) + if 'assignees' in kwargs and len(kwargs['assignees']) == 0: + call_instruction_impl(id=id, expression=expression, **kwargs) + else: + expr_instruction_impl(id=id, expression=expression, **kwargs) # return the ID, as it is the only useful information to the user return id diff --git a/python/dune/perftool/loopy/functions.py b/python/dune/perftool/loopy/functions.py index 4637c4ed191eeb1acaa001e7634e8285e7664de7..ec06459e4167968a61f46bcac1b2f8d2b07b8947 100644 --- a/python/dune/perftool/loopy/functions.py +++ b/python/dune/perftool/loopy/functions.py @@ -24,13 +24,33 @@ def coefficient_mangler(target, func, dtypes): class PDELabAccumulationFunction(FunctionIdentifier): - def __init__(self, accumobj): + def __init__(self, accumobj, rank): self.accumobj = accumobj + self.rank = rank + + assert rank in (1, 2) def __getinitargs__(self): - return (self.accumobj,) + return (self.accumobj, self.rank) def accumulation_mangler(target, func, dtypes): if isinstance(func, PDELabAccumulationFunction): - return CallMangleInfo('{}.accumulate'.format(func.accumobj), (), ()) + if func.rank == 1: + return CallMangleInfo('{}.accumulate'.format(func.accumobj), + (), + (NumpyType(str), + NumpyType(numpy.int32), + NumpyType(numpy.float64), + ) + ) + if func.rank == 2: + return CallMangleInfo('{}.accumulate'.format(func.accumobj), + (), + (NumpyType(str), + NumpyType(numpy.int32), + NumpyType(str), + NumpyType(numpy.int32), + NumpyType(numpy.float64), + ) + ) diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py index d31b54c76d3e418c2f89a1f1a216c1ca59bbfdec..41cb9a7c10aaa226958713a0d56db7bd70bcf2fb 100644 --- a/python/dune/perftool/loopy/target.py +++ b/python/dune/perftool/loopy/target.py @@ -5,7 +5,6 @@ from loopy.target import (TargetBase, from loopy.target.c import CASTBuilder from loopy.target.c.codegen.expression import ExpressionToCMapper - _registry = {'float32': 'float', 'int32': 'int', 'float64': 'double', diff --git a/python/dune/perftool/loopy/transformer.py b/python/dune/perftool/loopy/transformer.py index 5af4377c0c5197b0cd3abaf16efcfeababde5269..843e73e4df092a18cefb958017c62a99e07c8683 100644 --- a/python/dune/perftool/loopy/transformer.py +++ b/python/dune/perftool/loopy/transformer.py @@ -41,18 +41,6 @@ def index_sum_iname(i): return name_index(i) -_outerloop = None - - -def set_outerloop(v): - global _outerloop - _outerloop = v - - -def get_outerloop(): - return _outerloop - - class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapper): def __init__(self, measure, subdomain_id): # Some variables describing the integral measure of this integral @@ -62,71 +50,13 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp # Call base class constructors super(UFL2LoopyVisitor, self).__init__() - # Some state variables that need to be persistent over multiple calls - self.index_placeholder_removal_mapper = IndexPlaceholderRemoval() - - def _assign(self, o): - # In some corner cases we do not even need a temporary variable - if isinstance(o, int) or isinstance(o, float): - return o - - # Assign a name to the temporary variable we want our result in - temp = get_temporary_name() - temp_shape = () - - # Determine which inames this assignment depends on and whether it should - # be merged into the main accumulation loop. Right now we apply the following - # procedure: All instructions that depend on all argument loop indices are - # merged into the main loop nest. Those instructions that depend on some - # argument loop indices but not all are merged into the kernel by fixing the - # loop ordering of the main loop (or are pulled outside if this already happened). - assignee = Variable(temp) - iname_deps = self.inames - merge_into_main_loopnest = True - - if self.rank == 2 and len(set(iname_deps)) == 1: - if get_outerloop() is None: - set_outerloop(iname_deps[0].number) - if iname_deps[0].number != get_outerloop(): - merge_into_main_loopnest = False - - # Change the assignee! - if not merge_into_main_loopnest: - assignee_index_placeholder = LFSIndexPlaceholderExtraction()(o).pop() - assignee_index = self.index_placeholder_removal_mapper(assignee_index_placeholder, duplicate_inames=True) - assignee = Subscript(assignee, (assignee_index,)) - temp_shape = (name_lfs_bound(name_leaf_lfs(assignee_index_placeholder.element, assignee_index_placeholder.restriction)),) - - # Now introduce duplicate inames for the argument loop if necessary - replaced_iname_deps = [self.index_placeholder_removal_mapper(i, duplicate_inames=not merge_into_main_loopnest, wrap_in_variable=False) for i in iname_deps] - replaced_expr = self.index_placeholder_removal_mapper(o, duplicate_inames=not merge_into_main_loopnest) - - # Now we assign this expression to a new temporary variable - insn_id = instruction(assignee=assignee, - expression=replaced_expr, - forced_iname_deps=frozenset({i for i in replaced_iname_deps}).union(frozenset({quadrature_iname()})), - forced_iname_deps_is_final=True, - ) - - # Actually, if we have a cache hit, we need to change our temporary - from dune.perftool.generation import retrieve_cache_items - temp = filter(lambda i: i.id == insn_id, retrieve_cache_items("instruction"))[0].assignee_name - - retvar = Variable(temp) - if not merge_into_main_loopnest: - retvar_index = self.index_placeholder_removal_mapper(assignee_index_placeholder) - retvar = Subscript(retvar, (retvar_index,)) - - # Now that we know its exact name, declare the temporary - temporary_variable(temp, shape=temp_shape) - - return retvar - def __call__(self, o): # Reset some state variables that are reinitialized for each accumulation term self.argshape = 0 self.redinames = () self.inames = [] + self.dimension_index_aliases = [] + self.substitution_rules = [] # Initialize the local function spaces that we might need for this term # We therefore gather a list of modified trial functions too. @@ -152,13 +82,6 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp if pymbolic_expr == 0: return - # We assign the result to a temporary variable to ease things a bit - if not isinstance(pymbolic_expr, Variable): - pymbolic_expr = self._assign(pymbolic_expr) - - # Transform the IndexPlaceholders into real inames - self.inames = [self.index_placeholder_removal_mapper(i, wrap_in_variable=False) for i in self.inames] - # Collect the arguments for the accumulate function accumargs = [None] * (2 * len(test_ma)) residual_shape = [None] * len(test_ma) @@ -177,6 +100,10 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp # And generate a local function space for it! lfs = name_lfs(ma.argexpr.ufl_element(), ma.restriction, ma.component) + from dune.perftool.generation import valuearg + from loopy.types import NumpyType + valuearg(lfs, dtype=NumpyType("str")) + if len(subel.value_shape()) != 0: from dune.perftool.pdelab.geometry import dimension_iname from dune.perftool.pdelab.basis import lfs_child @@ -188,8 +115,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp lfsi = lfs_iname(subel, ma.restriction, count=count) - accumargs[2 * icount] = lfs - accumargs[2 * icount + 1] = lfsi + accumargs[2 * icount] = Variable(lfs) + accumargs[2 * icount + 1] = Variable(lfsi) arg_restr[icount] = ma.restriction @@ -199,13 +126,6 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp from dune.perftool.pdelab.quadrature import name_factor factor = name_factor() - # Generate the code snippet for this accumulation instruction - code = "{}.accumulate({}, {}*{});".format(accumvar, - ", ".join(accumargs), - pymbolic_expr.name, - factor, - ) - predicates = frozenset({}) # Maybe wrap this instruction into a condiditional. This mostly happens with mixed boundary conditions @@ -241,9 +161,13 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp predicates = predicates.union(['{} == {}'.format(name, self.subdomain_id)]) - # Finally, issue the instruction - instruction(code=code, - read_variables=frozenset({factor, pymbolic_expr.name}), + from dune.perftool.loopy.functions import PDELabAccumulationFunction + from pymbolic.primitives import Call, Product + expr = Product((pymbolic_expr, Variable(factor))) + expr = Call(PDELabAccumulationFunction(accumvar, len(test_ma)), tuple(a for a in accumargs) + (expr,)) + + instruction(assignees=frozenset({}), + expression=expr, forced_iname_deps=frozenset(self.inames).union(frozenset({quadrature_iname()})), forced_iname_deps_is_final=True, predicates=predicates @@ -279,14 +203,15 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp leaf_element = element.sub_elements()[0] # Have the issued instruction depend on the iname for this localfunction space - self.inames.append(LFSIndexPlaceholder(leaf_element, restriction, o.number())) + iname = lfs_iname(leaf_element, restriction, o.number()) + self.inames.append(iname) if self.grad: from dune.perftool.pdelab.argument import name_testfunction_gradient - return Subscript(Variable(name_testfunction_gradient(leaf_element, restriction)), (LFSIndexPlaceholder(leaf_element, restriction, o.number()),)) + return Subscript(Variable(name_testfunction_gradient(leaf_element, restriction)), (Variable(iname),)) else: from dune.perftool.pdelab.argument import name_testfunction - return Subscript(Variable(name_testfunction(leaf_element, restriction)), (LFSIndexPlaceholder(leaf_element, restriction, o.number()),)) + return Subscript(Variable(name_testfunction(leaf_element, restriction)), (Variable(iname),)) def coefficient(self, o): # If this is a trialfunction, we do something entirely different @@ -336,8 +261,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp use_indices = self.last_index[self.argshape:] for i in range(self.argshape): - from dune.perftool.pdelab.geometry import dimension_iname - self.index_placeholder_removal_mapper.index_replacement_map[self.last_index[i].expr] = Variable(dimension_iname(context='arg')) + self.dimension_index_aliases.append(i) +# self.index_placeholder_removal_mapper.index_replacement_map[self.last_index[i].expr] = Variable(dimension_iname(context='arg')) self.argshape = 0 if isinstance(aggr, Subscript): @@ -369,37 +294,46 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapp else: from loopy import Reduction - oldinames = self.inames - self.inames = [] - # Recurse to get the summation expression term = self.call(o.ufl_operands[0]) - from dune.perftool.pymbolic.inameset import get_index_inames - used_inames = frozenset({self.index_placeholder_removal_mapper.index_replacement_map.get(i, i).name for i in get_index_inames(term, as_variables=True)}) - self.inames = [i for i in used_inames.intersection(frozenset({i for i in oldinames}))] + self.inames - - # Now filter all those reduction inames that are marked for removal - implicit_inames = [i.name for i in self.index_placeholder_removal_mapper.index_replacement_map] - self.redinames = tuple(i for i in self.redinames if i not in implicit_inames) + self.redinames = tuple(i for i in self.redinames if i not in self.dimension_index_aliases) if len(self.redinames) > 0: - ret = self._assign(Reduction("sum", self.redinames, term)) + ret = Reduction("sum", self.redinames, term) + name = get_temporary_name() + # Generate a substitution rule for this one. + from loopy import SubstitutionRule + self.substitution_rules.append(SubstitutionRule(name, + (), + ret + ) + ) + + ret = Variable(name) else: ret = term - self.inames = self.inames + oldinames - # Reset the reduction inames for future indexsums self.redinames = () return ret + def _index_or_fixed_index(self, index): + from ufl.classes import FixedIndex + if isinstance(index, FixedIndex): + return index._value + else: + from pymbolic.primitives import Variable + from dune.perftool.pdelab import name_index + if index in self.dimension_index_aliases: + from dune.perftool.pdelab.geometry import dimension_iname + return Variable(dimension_iname(context='arg')) + else: + return Variable(name_index(index)) + def multi_index(self, o): - from dune.perftool.pdelab import pymbolic_index - return tuple(IndexPlaceholder(pymbolic_index(op)) for op in o.indices()) + return tuple(self._index_or_fixed_index(i) for i in o) def index(self, o): - # One might as well take the uflname as string here, but I apply this function - from dune.perftool.pdelab import name_index - return IndexPlaceholder(Variable(name_index(o))) + return self._index_or_fixed_index(o) diff --git a/python/dune/perftool/pdelab/__init__.py b/python/dune/perftool/pdelab/__init__.py index 50d41728a10ef4af2f0b34cb4a4b92db6da508b0..be3efaf4a60c51b712aec668cb272090f7935674 100644 --- a/python/dune/perftool/pdelab/__init__.py +++ b/python/dune/perftool/pdelab/__init__.py @@ -1,7 +1,6 @@ """ The pdelab specific parts of the code generation process """ from dune.perftool.generation import (preamble, - pymbolic_expr, symbol, ) @@ -21,7 +20,6 @@ def name_index(index): raise NotImplementedError -@pymbolic_expr def pymbolic_index(index): from ufl.classes import FixedIndex if isinstance(index, FixedIndex): diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 86f719037ebae1776d71e613af9ac1aac1497f33..ef2165e7060a6dc487bec341dc2b14dec60c8718 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -154,6 +154,7 @@ def assembly_routine_signature(): def generate_kernel(integrals): + subst_rules = [] for integral in integrals: integrand = integral.integrand() measure = integral.integral_type() @@ -171,6 +172,7 @@ def generate_kernel(integrals): # Iterate over the terms and generate a kernel for term in accterms: visitor(term) + subst_rules.extend(visitor.substitution_rules) # Extract the information, which is needed to create a loopy kernel. # First extracting it, might be useful to alter it before kernel generation. @@ -187,7 +189,7 @@ def generate_kernel(integrals): # Create the kernel from loopy import make_kernel, preprocess_kernel kernel = make_kernel(domains, - instructions, + instructions + subst_rules, arguments, temporary_variables=temporaries, function_manglers=[accumulation_mangler, coefficient_mangler], @@ -213,7 +215,6 @@ def generate_kernel(integrals): from dune.perftool.generation import delete_cache_items delete_cache_items("(not file) and (not clazz)") - # Return the actual code (might instead return kernels...) return kernel @@ -225,7 +226,7 @@ class AssemblyMethod(ClassMember): content.append('{') if kernel is not None: for i, p in kernel.preambles: - content.append(p) + content.append(' ' + p) content.extend(l for l in generate_body(kernel).split('\n')[1:-1]) content.append('}') ClassMember.__init__(self, content) @@ -286,10 +287,6 @@ def generate_localoperator_kernels(formdata, namedata): # Generate the necessary residual methods for measure in set(i.integral_type() for i in form.integrals()): with global_context(integral_type=measure): - # Reset the outer loop - from dune.perftool.loopy.transformer import set_outerloop - set_outerloop(None) - enum_pattern() pattern_baseclass() enum_alpha() @@ -321,10 +318,6 @@ def generate_localoperator_kernels(formdata, namedata): with global_context(form_type="jacobian"): for measure in set(i.integral_type() for i in jacform.integrals()): - # Reset the outer loop - from dune.perftool.loopy.transformer import set_outerloop - set_outerloop(None) - with global_context(integral_type=measure): kernel = generate_kernel(jacform.integrals_by_type(measure)) operator_kernels[(measure, 'jacobian')] = kernel diff --git a/python/dune/perftool/pymbolic/inameset.py b/python/dune/perftool/pymbolic/inameset.py deleted file mode 100644 index 18a8503614578d3cd83f3d800e1c26274c98589f..0000000000000000000000000000000000000000 --- a/python/dune/perftool/pymbolic/inameset.py +++ /dev/null @@ -1,37 +0,0 @@ -from pymbolic.mapper import CombineMapper -from pymbolic.primitives import Variable - - -class INameMapper(CombineMapper): - def _map_index(self, i): - if isinstance(i, Variable): - return frozenset([str(i)]) - from dune.perftool.pymbolic.placeholder import IndexPlaceholder, LFSIndexPlaceholder - if isinstance(i, IndexPlaceholder): - return frozenset([str(i.expr.name)]) - if isinstance(i, LFSIndexPlaceholder): - return frozenset({}) - - def map_subscript(self, e): - if isinstance(e.index, tuple): - return self.combine(tuple(self._map_index(i) for i in e.index)) - else: - return self._map_index(e.index) - - def map_constant(self, e): - return frozenset({}) - - def map_algebraic_leaf(self, e): - return frozenset({}) - - def combine(self, values): - return frozenset().union(*values) - - -def get_index_inames(e, as_variables=False): - iset = INameMapper()(e) - - if as_variables: - return frozenset(Variable(i) for i in iset) - else: - return iset diff --git a/python/dune/perftool/pymbolic/placeholder.py b/python/dune/perftool/pymbolic/placeholder.py deleted file mode 100644 index 05e4787f76268d631e4f4fdb8f91e3056991ec9f..0000000000000000000000000000000000000000 --- a/python/dune/perftool/pymbolic/placeholder.py +++ /dev/null @@ -1,91 +0,0 @@ -from pymbolic.mapper import Collector, IdentityMapper -from pymbolic.primitives import Variable - - -class IndexPlaceholderBase(object): - pass - - -class IndexPlaceholder(IndexPlaceholderBase): - def __init__(self, expr): - self.expr = expr - - def __hash__(self): - return hash(self.expr) - - def __eq__(self, o): - return (type(o) == IndexPlaceholder) and (self.expr == o.expr) - - -class LFSIndexPlaceholder(IndexPlaceholderBase): - def __init__(self, element, restriction, number, context=''): - self.element = element - self.restriction = restriction - self.number = number - self.context = context - - def __hash__(self): - return hash((self.element, self.restriction, self.number, self.context)) - - def __eq__(self, o): - return (self.element == o.element) and (self.restriction == o.restriction) and (self.number == o.number) and (self.context == o.context) - - -class IndexPlaceholderRemoval(IdentityMapper): - def __init__(self): - # Initialize base class - super(IndexPlaceholderRemoval, self).__init__() - - # Initialize state variables that are persistent over multiple calls - self.index_replacement_map = {} - - def __call__(self, o, wrap_in_variable=True, duplicate_inames=False): - self.duplicate_inames = duplicate_inames - self.wrap_in_variable = wrap_in_variable - return self.rec(o) - - def map_foreign(self, o): - # How do we map constants here? map_constant was not correct - if isinstance(o, int) or isinstance(o, float) or isinstance(o, str): - return o - - # There might be tuples of indices where only one is a placeholder, - # so we recurse manually into the tuple. - if isinstance(o, tuple): - return tuple(self.rec(op) for op in o) - - # We only handle IndexPlaceholder instances from now on - assert isinstance(o, IndexPlaceholderBase) - - if isinstance(o, LFSIndexPlaceholder): - context = o.context - from dune.perftool.loopy.transformer import get_outerloop - if (self.duplicate_inames) and (o.number != get_outerloop()): - context = 'dup' - - from dune.perftool.pdelab.basis import lfs_iname - i = lfs_iname(o.element, o.restriction, count=o.number, context=context) - - if isinstance(o, IndexPlaceholder): - i = self.index_replacement_map.get(o.expr, o.expr) - - if self.wrap_in_variable and not isinstance(i, Variable): - return Variable(i) - else: - return i - - def map_reduction(self, o): - from loopy import Reduction - o.expr = self.rec(o.expr) - return o - - -class LFSIndexPlaceholderExtraction(Collector): - def map_foreign(self, o): - if isinstance(o, int) or isinstance(o, float): - return set() - assert isinstance(o, tuple) - return set(i for i in o if isinstance(i, LFSIndexPlaceholder)) - - def map_reduction(self, o): - return self.rec(o.expr) diff --git a/python/dune/perftool/ufl/delegate.py b/python/dune/perftool/ufl/delegate.py deleted file mode 100644 index 09a6fc44acf4953b2d3bdf5f22417ab8871754bd..0000000000000000000000000000000000000000 --- a/python/dune/perftool/ufl/delegate.py +++ /dev/null @@ -1,23 +0,0 @@ -""" Implement a design pattern for a multifunction that delegates to another -multifunction and gets back control if the delegate does not define a handler. -This avoids writing isinstance-if-orgies in handlers -""" - - -def delegate(Delegate, *args, **kwargs): - assert(isinstance(Delegate, type)) - assert(Delegate.expr == Delegate.undefined) - - class MyDelegate(Delegate): - def __init__(self, *a, **ka): - Delegate.__init__(self, *a, **ka) - - def expr(s, *a, **ka): - s._back(*a, **ka) - - def _handler(s, *a, **ka): - delegate_instance = MyDelegate(*args, **kwargs) - delegate_instance._back = s - return delegate_instance(*a, **ka) - - return _handler diff --git a/python/dune/perftool/ufl/modified_terminals.py b/python/dune/perftool/ufl/modified_terminals.py index 0d489dcda91148c7c5a3bf184a87106a412b649a..27543551b6b218c299e4b82502a4e00c2cf1af47 100644 --- a/python/dune/perftool/ufl/modified_terminals.py +++ b/python/dune/perftool/ufl/modified_terminals.py @@ -146,17 +146,3 @@ class _ModifiedArgumentExtractor(MultiFunction): def extract_modified_arguments(expr, **kwargs): return _ModifiedArgumentExtractor()(expr, **kwargs) - - -class _ModifiedArgumentNumber(MultiFunction): - """ return the number() of a modified argument """ - def expr(self, o): - return self(o.ufl_operands[0]) - - def argument(self, o): - return o.number() - - -def modified_argument_number(expr): - """ Given an expression, return the number() of the argument in it """ - return _ModifiedArgumentNumber()(expr) diff --git a/python/dune/perftool/ufl/rank.py b/python/dune/perftool/ufl/rank.py deleted file mode 100644 index 973b2c071fd422e41675d33972c03f2e1f53eaae..0000000000000000000000000000000000000000 --- a/python/dune/perftool/ufl/rank.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import absolute_import -from ufl.algorithms import MultiFunction - - -class _UFLRank(MultiFunction): - def __call__(self, expr): - return len(MultiFunction.__call__(self, expr)) - - def expr(self, o): - return set(a for op in o.ufl_operands for a in MultiFunction.__call__(self, op)) - - def argument(self, o): - return (o.number(),) - - -def ufl_rank(o): - return _UFLRank()(o)