diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index b7ad0e727a7bccc5d23e07fa002c718754ba5c67..ce55b3597363fa62daaa4239cf51cf95e0cc056a 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -19,6 +19,8 @@ from dune.perftool.cgen.clazz import (AccessModifier, ClassMember, ) from dune.perftool import Restriction +from pymbolic.primitives import Variable +from pytools import Record def name_form(formdata, data): @@ -236,8 +238,156 @@ def assembly_routine_signature(formdata): assert False +class AccumulationSpace(Record): + def __init__(self, + lfs=None, + index=None, + restriction=None, + element=None, + ): + Record.__init__(self, + lfs=lfs, + index=index, + restriction=restriction, + element=element, + ) + + def get_args(self): + if self.lfs is None: + return () + else: + return (self.lfs, self.index) + + def get_restriction(self): + if self.restriction is None: + return () + else: + return (self.restriction,) + + +def determine_accumulation_space(expr, number): + from dune.perftool.ufl.modified_terminals import extract_modified_arguments + args = extract_modified_arguments(expr, argnumber=number) + + # If this is a residual term we return a dummy object + if len(args) == 0: + return AccumulationSpace() + + assert(len(args) == 1) + ma, = args + + # Extract information on the finite element + from ufl.functionview import select_subelement + subel = select_subelement(ma.argexpr.ufl_element(), ma.component) + + # And generate a local function space for it! + from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound, lfs_child, lfs_iname + lfs = name_lfs(ma.argexpr.ufl_element(), ma.restriction, ma.component) + from dune.perftool.generation import valuearg + from loopy.types import NumpyType + valuearg(lfs, dtype=NumpyType("str")) + + if len(subel.value_shape()) != 0: + from dune.perftool.pdelab.geometry import dimension_iname + idims = tuple(dimension_iname(context='arg', count=number) for i in range(len(subel.value_shape()))) + lfs = lfs_child(lfs, idims, shape=subel.value_shape(), symmetry=subel.symmetry) + subel = subel.sub_elements()[0] + + lfsi = Variable(lfs_iname(subel, ma.restriction, count=number)) + + # If the LFS is not yet a pymbolic expression, make it one + from pymbolic.primitives import Expression + if not isinstance(lfs, Expression): + lfs = Variable(lfs) + + return AccumulationSpace(lfs=lfs, + index=lfsi, + restriction=ma.restriction, + ) + + +def boundary_predicates(): + # # Maybe wrap this instruction into a condiditional. This mostly happens with mixed boundary conditions + # if self.subdomain_id not in ['everywhere', 'otherwise']: + # # We need to reconstruct the subdomain_data parameter of the measure + # # I am *totally* confused as to why this information is not at hand anyway, + # # but conversation with Martin pointed me to dolfin.fem.assembly where this + # # is done in preprocessing with the limitation of only one possible type of + # # modified measure per integral type. + # + # # Get the original form and inspect the present measures + # from dune.perftool.generation import get_global_context_value + # original_form = get_global_context_value("formdata").original_form + # + # sd = original_form.subdomain_data() + # assert len(sd) == 1 + # subdomains, = list(sd.values()) + # domain, = list(sd.keys()) + # for k in list(subdomains.keys()): + # if subdomains[k] is None: + # del subdomains[k] + # + # # Finally extract the original subdomain_data (which needs to be present!) + # assert self.measure in subdomains + # subdomain_data = subdomains[self.measure] + # + # # Determine the name of the parameter function + # name = get_global_context_value("data").object_names[id(subdomain_data)] + # + # # Trigger the generation of code for this thing in the parameter class + # from ufl.checks import is_cellwise_constant + # cellwise_constant = is_cellwise_constant(o) + # from dune.perftool.pdelab.parameter import intersection_parameter_function + # intersection_parameter_function(name, subdomain_data, cellwise_constant, t='int') + # + # predicates = predicates.union(['{} == {}'.format(name, self.subdomain_id)]) + return frozenset([]) + + +def generate_accumulation_instruction(visitor, accterm): + # First we do the tree traversal to get a pymbolic expression representing this expression + pymbolic_expr = visitor(accterm.term) + + # It may happen that an entire accumulation term vanishes. We do nothing in that case + if pymbolic_expr == 0: + return + + # We also traverse the test function to get its pymbolic equivalent + test_expr = visitor(accterm.argument.expr) + + # Combine expression and test function + from pymbolic.primitives import Product + pymbolic_expr = Product((pymbolic_expr, test_expr)) + + # Collect the lfs and lfs indices for the accumulate call + test_lfs = determine_accumulation_space(accterm.argument.expr, 0) + # In the jacobian case, also determine the space for the ansatz space + ansatz_lfs = determine_accumulation_space(accterm.term, 1) + + from dune.perftool.pdelab.argument import name_accumulation_variable + accumvar = name_accumulation_variable((ansatz_lfs.get_restriction() + test_lfs.get_restriction())) + + predicates = boundary_predicates() + + rank = 1 if ansatz_lfs.lfs is None else 2 + + from dune.perftool.pdelab.argument import PDELabAccumulationFunction + from pymbolic.primitives import Call + expr = Call(PDELabAccumulationFunction(accumvar, rank), + (ansatz_lfs.get_args() + test_lfs.get_args() + (pymbolic_expr,)) + ) + + from dune.perftool.generation import instruction + from dune.perftool.pdelab.quadrature import quadrature_iname + instruction(assignees=(), + expression=expr, + forced_iname_deps=frozenset(visitor.inames).union(frozenset({quadrature_iname()})), + forced_iname_deps_is_final=True, + predicates=predicates + ) + + def generate_kernel(integrals): - subst_rules = [] for integral in integrals: integrand = integral.integrand() measure = integral.integral_type() @@ -249,9 +399,28 @@ def generate_kernel(integrals): from dune.perftool.ufl.transformations.axiparallel import diagonal_jacobian integrand = diagonal_jacobian(integrand) + # Gather dimension indices from dune.perftool.ufl.dimensionindex import dimension_index_mapping dimension_indices = dimension_index_mapping(integrand) + # Generate code for the LFS trees present in the form + from dune.perftool.ufl.modified_terminals import extract_modified_arguments + test_ma = extract_modified_arguments(integrand,) + trial_ma = extract_modified_arguments(integrand, coeffcount=0) + apply_ma = extract_modified_arguments(integrand, coeffcount=1) + + import itertools + for ma in itertools.chain(test_ma, trial_ma, apply_ma): + if measure == 'exterior_facet': + ma.restriction = Restriction.NEGATIVE + + from dune.perftool.pdelab.spaces import traverse_lfs_tree + traverse_lfs_tree(ma) + + from dune.perftool.options import set_option + set_option('print_transformations', True) + set_option('print_transformations_dir', '.') + # Now split the given integrand into accumulation expressions from dune.perftool.ufl.transformations.extract_accumulation_terms import split_into_accumulation_terms accterms = split_into_accumulation_terms(integrand) @@ -262,8 +431,7 @@ def generate_kernel(integrals): # Iterate over the terms and generate a kernel for term in accterms: - visitor(term) - subst_rules.extend(visitor.substitution_rules) + generate_accumulation_instruction(visitor, term) # Extract the information, which is needed to create a loopy kernel. # First extracting it, might be useful to alter it before kernel generation. @@ -282,7 +450,7 @@ def generate_kernel(integrals): # Create the kernel from loopy import make_kernel, preprocess_kernel kernel = make_kernel(domains, - instructions + subst_rules, + instructions, arguments, temporary_variables=temporaries, function_manglers=manglers, diff --git a/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py b/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py index 94413d1a6fdbcc3abf1d5139e851454426e6b6f3..98922bfad056998676a8a7bd28d68385ff50133e 100644 --- a/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py +++ b/python/dune/perftool/ufl/transformations/extract_accumulation_terms.py @@ -1,56 +1,63 @@ """ This module defines an UFL transformation, that takes a UFL expression -and transforms it into a sum of terms that all contain the correct number -of test function terms (which is equal to the form rank). +and transforms it into a sum of accumulation terms. """ -from __future__ import absolute_import - from dune.perftool.ufl.modified_terminals import extract_modified_arguments from dune.perftool.ufl.transformations import ufl_transformation from dune.perftool.ufl.transformations.replace import replace_expression +from dune.perftool.ufl.transformations.identitypropagation import identity_propagation -from ufl.algorithms import MultiFunction -from ufl.classes import Zero +from ufl.classes import Zero, Identity, Indexed, IntValue, MultiIndex +from ufl.core.multiindex import indices -import itertools +from pytools import Record -class _ReplacementDict(dict): - def __init__(self, good=[], bad=[]): - dict.__init__(self) - for a in bad: - self[a] = Zero(shape=a.ufl_shape, free_indices=a.ufl_free_indices, index_dimensions=a.ufl_index_dimensions) - for a in good: - self[a] = a +class AccumulationTerm(Record): + def __init__(self, term, argument): + Record.__init__(self, term=term, argument=argument) -@ufl_transformation(name="accterms2", extraction_lambda=lambda l: l) +@ufl_transformation(name="accterms", extraction_lambda=lambda l: [at.term for at in l]) def split_into_accumulation_terms(expr): - mod_args = extract_modified_arguments(expr) - - accumulation_terms = [] - - # Treat the case of a rank 1 form: - if len(list(filter(lambda ma: ma.argexpr.number() == 1, mod_args))) == 0: - for arg in mod_args: - # Do the replacement on the expression - accumulation_terms.append(replace_expression(expr, - replacemap=_ReplacementDict(good=(arg.expr,), - bad=[ma.expr for ma in mod_args], - ) - ) - ) - # and now the case of a rank 2 form: - else: - for arg1, arg2 in itertools.product(filter(lambda ma: ma.argexpr.number() == 0, mod_args), - filter(lambda ma: ma.argexpr.number() == 1, mod_args) - ): - accumulation_terms.append(replace_expression(expr, - replacemap=_ReplacementDict(good=(arg1.expr, arg2.expr), - bad=[ma.expr for ma in mod_args], - ) - ) - ) - - # and return the result - return accumulation_terms + ret = [] + + # Extract a list of modified terminals for the test function + # One accumulation instruction will be generated for each of these. + test_args = extract_modified_arguments(expr, argnumber=0) + + # Extract a list of modified terminals for the ansatz function + # in jacobian forms. Only the restriction of those terminals will + # be used to generate new accumulation terms! + all_jacobian_args = extract_modified_arguments(expr, argnumber=1) + + for test_arg in test_args: + # Do this as a multi step replacement procedure to avoid UFL nagging + # about too much stuff in reconstructing the expressions + + # 1) We first cut the expression to the relevant modified test_function + # Build a replacement dictionary + replacement = {ma.expr: Zero() for ma in test_args} + replacement[test_arg.expr] = test_arg.expr + replace_expr = replace_expression(expr, replacemap=replacement) + + # 2) Cut the test function itself from the expression + if test_arg.reference_grad: + newi = indices(1) + replacement = {test_arg.expr: Indexed(Identity(2), MultiIndex(newi + test_arg.index._indices))} + else: + replacement = {test_arg.expr: IntValue(1)} + replace_expr = replace_expression(replace_expr, replacemap=replacement) + + # 3) Collapse any identity nodes that may have been introduced by replacing vectors + replace_expr = identity_propagation(replace_expr) + + # 4) Further split according to trial function in jacobian terms + if all_jacobian_args: + for jac_arg in all_jacobian_args: + pass + else: + if not isinstance(replace_expr, Zero): + ret.append(AccumulationTerm(replace_expr, test_arg)) + + return ret diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py index ccc6cb24091e6b48b53d0a86292e34e93dd42f23..9860add42c88d2723b6c4338a062aa5572fa76a8 100644 --- a/python/dune/perftool/ufl/visitor.py +++ b/python/dune/perftool/ufl/visitor.py @@ -44,128 +44,8 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): self.argshape = 0 self.transpose_necessary = False self.inames = [] - self.substitution_rules = [] - - # Initialize the local function spaces that we might need for this term - # We therefore gather a list of modified trial functions too. - from dune.perftool.ufl.modified_terminals import extract_modified_arguments - test_ma = extract_modified_arguments(o,) - trial_ma = extract_modified_arguments(o, coeffcount=0) - apply_ma = extract_modified_arguments(o, coeffcount=1) - - # All test and trial functions on boundary integrals are technically restricted - import itertools - for ma in itertools.chain(test_ma, trial_ma, apply_ma): - if self.measure == 'exterior_facet': - ma.restriction = Restriction.NEGATIVE - - from dune.perftool.pdelab.spaces import traverse_lfs_tree - traverse_lfs_tree(ma) - - # Determine the rank of the term - self.rank = len(test_ma) - - # First we do the tree traversal to get a pymbolic expression representing this expression - pymbolic_expr = self.call(o) - - # It may happen that an entire accumulation term vanishes. We do nothing in that case - if pymbolic_expr == 0: - return - - # Collect the arguments for the accumulate function - accumargs = [None] * (2 * len(test_ma)) - residual_shape = [None] * len(test_ma) - arg_restr = [None] * len(test_ma) - - for ma in test_ma: - count = ma.argexpr.number() - if len(test_ma) == 2: - icount = (count + 1) % 2 - else: - icount = 0 - - # Extract the subelement of the (mixed) finite element - from ufl.functionview import select_subelement - subel = select_subelement(ma.argexpr.ufl_element(), ma.component) - - # And generate a local function space for it! - lfs = name_lfs(ma.argexpr.ufl_element(), ma.restriction, ma.component) - from dune.perftool.generation import valuearg - from loopy.types import NumpyType - valuearg(lfs, dtype=NumpyType("str")) - - if len(subel.value_shape()) != 0: - from dune.perftool.pdelab.geometry import dimension_iname - from dune.perftool.pdelab.basis import lfs_child - idims = tuple(dimension_iname(context='arg', count=i) for i in range(len(subel.value_shape()))) - lfs = lfs_child(lfs, idims, shape=subel.value_shape(), symmetry=subel.symmetry) - residual_shape[icount] = name_lfs_bound(name_leaf_lfs(subel.sub_elements()[0], ma.restriction)) - subel = subel.sub_elements()[0] - else: - residual_shape[icount] = name_lfs_bound(lfs) - - lfsi = lfs_iname(subel, ma.restriction, count=count) - - # If the LFS is not yet a pymbolic expression, make it one - from pymbolic.primitives import Expression - if not isinstance(lfs, Expression): - lfs = Variable(lfs) - - accumargs[2 * icount] = lfs - accumargs[2 * icount + 1] = Variable(lfsi) - - arg_restr[icount] = ma.restriction - - from dune.perftool.pdelab.argument import name_accumulation_variable - accumvar = name_accumulation_variable(arg_restr) - - predicates = frozenset({}) - - # Maybe wrap this instruction into a condiditional. This mostly happens with mixed boundary conditions - if self.subdomain_id not in ['everywhere', 'otherwise']: - # We need to reconstruct the subdomain_data parameter of the measure - # I am *totally* confused as to why this information is not at hand anyway, - # but conversation with Martin pointed me to dolfin.fem.assembly where this - # is done in preprocessing with the limitation of only one possible type of - # modified measure per integral type. - - # Get the original form and inspect the present measures - from dune.perftool.generation import get_global_context_value - original_form = get_global_context_value("formdata").original_form - - sd = original_form.subdomain_data() - assert len(sd) == 1 - subdomains, = list(sd.values()) - domain, = list(sd.keys()) - for k in list(subdomains.keys()): - if subdomains[k] is None: - del subdomains[k] - - # Finally extract the original subdomain_data (which needs to be present!) - assert self.measure in subdomains - subdomain_data = subdomains[self.measure] - - # Determine the name of the parameter function - name = get_global_context_value("data").object_names[id(subdomain_data)] - - # Trigger the generation of code for this thing in the parameter class - from ufl.checks import is_cellwise_constant - cellwise_constant = is_cellwise_constant(o) - from dune.perftool.pdelab.parameter import intersection_parameter_function - intersection_parameter_function(name, subdomain_data, cellwise_constant, t='int') - - predicates = predicates.union(['{} == {}'.format(name, self.subdomain_id)]) - - from dune.perftool.pdelab.argument import PDELabAccumulationFunction - from pymbolic.primitives import Call - expr = Call(PDELabAccumulationFunction(accumvar, len(test_ma)), tuple(a for a in accumargs) + (pymbolic_expr,)) - instruction(assignees=(), - expression=expr, - forced_iname_deps=frozenset(self.inames).union(frozenset({quadrature_iname()})), - forced_iname_deps_is_final=True, - predicates=predicates - ) + return self.call(o) # # Form argument/coefficients handlers: