Merge branch 'feature/rework-accumulation-splitting' into 'master'

Rework accumulation splitting See merge request !127

Merge branch 'feature/rework-accumulation-splitting' into 'master'
884654cc · Dominic Kempf · a7e18eee · 9423c01a · 884654cc · 884654cc
Commit 884654cc authored 8 years ago by Dominic Kempf
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -309,39 +309,23 @@ def grad_iname(index, dim):

 @backend(interface="accum_insn")
 def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
+    # When we do not do sumfactorization we do not split the test function
+    assert(accterm.argument.expr is None)
+
    # First we do the tree traversal to get a pymbolic expression representing this expression
    pymbolic_expr = visitor(accterm.term)

-    # If this is a gradient, we generate an iname
-    additional_inames = frozenset({})
-    if accterm.new_indices is not None:
-        from ufl.domain import find_geometric_dimension
-        dim = find_geometric_dimension(accterm.argument.expr)
-        for i in accterm.new_indices:
-            if i not in visitor.dimension_indices:
-                additional_inames = additional_inames.union(frozenset({grad_iname(i, dim)}))
-
    # It may happen that an entire accumulation term vanishes. We do nothing in that case
    if pymbolic_expr == 0:
        return

-    # We also traverse the test function to get its pymbolic equivalent
-    if accterm.new_indices is not None:
-        from ufl.classes import Indexed, MultiIndex
-        accum_expr = Indexed(accterm.argument.expr, MultiIndex(accterm.new_indices))
-    else:
-        accum_expr = accterm.argument.expr
-    test_expr = visitor(accum_expr)
-
-    # Combine expression and test function
-    from pymbolic.primitives import Product
-    pymbolic_expr = Product((pymbolic_expr, test_expr))
-
    # Collect the lfs and lfs indices for the accumulate call
-    test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure)
+    test_lfs = determine_accumulation_space(accterm.term, 0, measure)
+
    # In the jacobian case, also determine the space for the ansatz space
    ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure)

+    # Collect the lfs and lfs indices for the accumulate call
    from dune.perftool.pdelab.argument import name_accumulation_variable
    accumvar = name_accumulation_variable((test_lfs.get_restriction() + ansatz_lfs.get_restriction()))

@@ -361,7 +345,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):

    instruction(assignees=(),
                expression=expr,
-                forced_iname_deps=additional_inames.union(frozenset(visitor.inames).union(frozenset(quad_inames))),
+                forced_iname_deps=frozenset(visitor.inames).union(frozenset(quad_inames)),
                forced_iname_deps_is_final=True,
                predicates=predicates
                )
@@ -380,10 +364,6 @@ def visit_integrals(integrals):
                from dune.perftool.ufl.transformations.axiparallel import diagonal_jacobian
                integrand = diagonal_jacobian(integrand)

-        # Gather dimension indices
-        from dune.perftool.ufl.dimensionindex import dimension_index_mapping
-        dimension_indices = dimension_index_mapping(integrand)
-
        # Generate code for the LFS trees present in the form
        from dune.perftool.ufl.modified_terminals import extract_modified_arguments
        test_ma = extract_modified_arguments(integrand, argnumber=0)
@@ -398,18 +378,24 @@ def visit_integrals(integrals):
            from dune.perftool.pdelab.spaces import traverse_lfs_tree
            traverse_lfs_tree(ma)

-        # Now split the given integrand into accumulation expressions
+        # Now split the given integrand into accumulation
+        # expressions. If we do sumfactorization we cut the test
+        # argument from the rest of the expression. This gives the
+        # right input for the sumfactorization kernel of stage 3.
        from dune.perftool.ufl.extract_accumulation_terms import split_into_accumulation_terms
-        accterms = split_into_accumulation_terms(integrand)
+        if get_option('sumfact'):
+            accterms = split_into_accumulation_terms(integrand, cut_test_arg=True, split_gradients=True)
+        else:
+            accterms = split_into_accumulation_terms(integrand)

        # Iterate over the terms and generate a kernel
        for accterm in accterms:
-            # Adjust the index map for the visitor
-            from copy import deepcopy
-            indexmap = deepcopy(dimension_indices)
-            for i, j in accterm.indexmap.items():
-                if i in indexmap:
-                    indexmap[j] = indexmap[i]
+            # Get dimension indices
+            indexmap = {}
+            from dune.perftool.ufl.dimensionindex import dimension_index_mapping
+            if accterm.argument.expr is not None:
+                indexmap.update(dimension_index_mapping(accterm.indexed_test_arg()))
+            indexmap.update(dimension_index_mapping(accterm.term))

            # Get a transformer instance for this kernel
            if get_option('sumfact'):
@@ -428,7 +414,7 @@ def visit_integrals(integrals):
                    set_subst_rule(name, expr)

            # Ensure CSE on detjac * quadrature weight
-            domain = accterm.argument.argexpr.ufl_domain()
+            domain = accterm.term.ufl_domain()
            if measure == "cell":
                set_subst_rule("integration_factor_cell1",
                               uc.QuadratureWeight(domain) * uc.Abs(uc.JacobianDeterminant(domain)))

--- a/python/dune/perftool/ufl/extract_accumulation_terms.py
+++ b/python/dune/perftool/ufl/extract_accumulation_terms.py
-"""
-This module defines an UFL transformation, that takes a UFL expression
-and transforms it into a list of accumulation terms.
-"""
+"""Transform an UFL expression into list of accumulation terms."""
+
 from dune.perftool.ufl.flatoperators import construct_binary_operator
 from dune.perftool.ufl.modified_terminals import extract_modified_arguments
 from dune.perftool.ufl.transformations import ufl_transformation
 from dune.perftool.ufl.transformations.replace import replace_expression
 from dune.perftool.ufl.transformations.identitypropagation import identity_propagation
+from dune.perftool.ufl.transformations.order_gradient_restriction import order_gradient_restriction
 from dune.perftool.ufl.transformations.zeropropagation import zero_propagation
-from dune.perftool.ufl.transformations.reindexing import reindexing
-from dune.perftool.ufl.modified_terminals import analyse_modified_argument, ModifiedArgument
+from dune.perftool.ufl.modified_terminals import ModifiedArgument
 from dune.perftool.pdelab.restriction import Restriction

 from ufl.classes import Zero, Identity, Indexed, IntValue, MultiIndex, Product, IndexSum
@@ -19,51 +17,79 @@ from pytools import Record


 class AccumulationTerm(Record):
+    """Store informations about accumulation terms
+
+    Arguments:
+    ----------
+    term: The UFL expression we want to accumulate
+    argument: Corresponding test function (in case we split it) without indices
+    new_indices: Indices of the test function
+    """
    def __init__(self,
                 term,
-                 argument,
-                 indexmap={},
+                 argument=None,
                 new_indices=None
                 ):
-        assert isinstance(argument, ModifiedArgument)
+        assert (isinstance(argument, ModifiedArgument) or argument is None)
        Record.__init__(self,
                        term=term,
                        argument=argument,
-                        indexmap=indexmap,
                        new_indices=new_indices
                        )

+    def indexed_test_arg(self):
+        """Return test argument of this accumulation term with its indices"""
+        if self.new_indices is None:
+            return self.argument.expr
+        else:
+            return Indexed(self.argument.expr, MultiIndex(self.new_indices))

-@ufl_transformation(name="accterms", extraction_lambda=lambda l: [at.term for at in l])
-def split_into_accumulation_terms(expr):
-    """Split an UFL expression into several accumulation parts and return a list
-
-    For a residual evaluation we split for different test functions
-    and according to the restriction (sefl/neighbor at skeletons). For
-    the jacobians we also need to split according to the ansatz
-    functions (and their restriction).

-    Note: This function is not an UFL transformation. Nonetheless it
-    has the @ufl_transformation decorator for debugging purposes.
+def split_into_accumulation_terms(expr, cut_test_arg=False, split_gradients=False):
+    """Split an expression into a list of AccumulationTerms

    Arguments:
    ----------
-    expr: UFL expression we want to split
+    expr: The UFL expression we split into AccumulationTerms
+    cut_test_arg: Wheter we want to return AccumulationTerms where the
+        test function is cut from the rest of the expression.
+    """
+    expr = order_gradient_restriction(expr)
+
+    expression_list = split_expression(expr, split_gradients=split_gradients)
+    acc_term_list = []
+
+    for e in expression_list:
+        if cut_test_arg:
+            acc_term_list.append(cut_accumulation_term(e))
+        else:
+            acc_term_list.append(AccumulationTerm(e, ModifiedArgument(expr=None)))
+    return acc_term_list
+
+
+@ufl_transformation(name="splitt_expression", extraction_lambda=lambda l: [e for e in l])
+def split_expression(expr, split_gradients=False):
+    """Split expression into a list of expressions
+
+    Note: This is not an ufl transformation since it does not return
+    an expression. It is nonetheless decorated with ufl_transformation
+    since the decorator can handle lists of expressions and it can be
+    useful for debugging to get the corresponding trees.
    """
    # Store AccumulationTerms in this list
    ret = []

-    # Extract a list of modified terminals for the test function
-    # One accumulation instruction will be generated for each of these.
-    test_args = extract_modified_arguments(expr, argnumber=0, do_index=False)
-
-    # Extract a list of modified terminals for the ansatz function
-    # in jacobian forms.
-    all_jacobian_args = extract_modified_arguments(expr, argnumber=1, do_index=False)
+    # Extract a list of modified terminals for the test function. We
+    # will split the expression into one part for each moidified argument.
+    test_args = extract_modified_arguments(expr,
+                                           argnumber=0,
+                                           do_index=False,
+                                           do_gradient=split_gradients)

    for test_arg in test_args:
-        # Do this as a multi step replacement procedure to avoid UFL nagging
-        # about too much stuff in reconstructing the expressions
+        # Do this as a multi step replacement procedure to avoid UFL
+        # nagging about too much stuff in reconstructing the
+        # expressions

        # 1) We first cut the expression to the relevant modified test_function
        # by replacing all other test functions with zero
@@ -77,94 +103,13 @@ def split_into_accumulation_terms(expr):
        # 2) Propagate indexed zeros to simplify expression
        replace_expr = zero_propagation(replace_expr)

-        # 3) Cut the test function itself from the expression
-        #
-        # This is done by replacing the test function with an
-        # appropriate product of identity matrices. This way we can
-        # make sure that the indices of the result will be right. This
-        # is best explained by an example:
-        #
-        # Suppose we have the following expression:
-        #
-        # \sum_{i,j} a_{i,j} (\nabla v)_{i,j} + \sum_{k,l} b_{k,l} (\nable v)_{k,l}
-        #
-        # If we want to cut the gradient of the test function v we
-        # need to make sure, that both sums have the right indices:
-        #
-        # \sum_{m,n} (a_{m,n} + b_{m,n}) (\nabla v)_{m,n}
-        #
-        # and we extract (a_{m,n} + b_{m,n}). We achieve that by the
-        # following replacements:
-        #
-        # (\nabla v)_{i,j} -> I_{m,i} I_{n,j}
-        # (\nabla v)_{k,l} -> I_{m,k} I_{n,l}
-        #
-        # Resulting in:
-        #
-        # \sum_{i,j} a_{i,j} I_{m,i} I_{n,j} + \sum_{k,l} b_{k,l} I_{m,k} I_{n,l}
-        #
-        # In step 4 this will collaps to: a_{m,n} + b_{m,n}
-        replacement = {}
-        indexmap = {}
-        newi = None
-        backmap = {}
-        # Get all appearances of test functions with their indices
-        indexed_test_args = extract_modified_arguments(replace_expr, argnumber=0, do_index=True)
-        for indexed_test_arg in indexed_test_args:
-            if indexed_test_arg.index:
-                # If the test function is indexed, create a new multiindex of this shape
-                # -> (m,n) in the example above
-                if newi is None:
-                    newi = indices(len(indexed_test_arg.index))
-
-                # This handles the special case with two identical
-                # indices on an test function. E.g. in Stokes on an
-                # axiparallel grid you get a term:
-                #
-                # -(\sum_i K_{i,i} (\nabla v)_{i,i}) w
-                #   = \sum_k \sum_l (-K_{k,k} w I_{k,l} (\nabla v)_{k,l})
-                #
-                # and we want to split
-                #
-                # -K_{k,k} w I_{k,l} corresponding to (\nabla v)_{k,l}.
-                #
-                # This is done by:
-                # - Replacing (\nabla v)_{i,i} with I_{k,i}*(\nabla
-                #   v)_{k,l}. Here (\nabla v)_{k,l} serves as a
-                #   placeholder and will be replaced later on.
-                # - Propagating the identity in step 4.
-                # - Replacing (\nabla v)_{k,l} by I_{k,l} after step 4.
-                if len(set(indexed_test_arg.index._indices)) < len(indexed_test_arg.index._indices):
-                    if len(indexed_test_arg.index._indices) > 2:
-                        raise NotImplementedError("Test argument with more than three indices and double occurence ist not implemented.")
-                    mod_index_map = {indexed_test_arg.index: MultiIndex((newi[0], newi[1]))}
-                    mod_indexed_test_arg = replace_expression(indexed_test_arg.expr,
-                                                              replacemap=mod_index_map)
-                    rep = Product(Indexed(Identity(2),
-                                          MultiIndex((newi[0], indexed_test_arg.index[0]))),
-                                  mod_indexed_test_arg)
-                    backmap.update({mod_indexed_test_arg:
-                                    Indexed(Identity(2), MultiIndex((newi[0], newi[1])))})
-                    replacement.update({indexed_test_arg.expr: rep})
-                    indexmap.update({indexed_test_arg.index[0]: newi[0]})
-                else:
-                    # Replace indexed test function with a product of identities.
-                    identities = tuple(Indexed(Identity(2), MultiIndex((i,) + (j,)))
-                                       for i, j in zip(newi, indexed_test_arg.index._indices))
-                    replacement.update({indexed_test_arg.expr:
-                                        construct_binary_operator(identities, Product)})
-
-                    indexmap.update({i: j for i, j in zip(indexed_test_arg.index._indices, newi)})
-            else:
-                replacement.update({indexed_test_arg.expr: IntValue(1)})
-        replace_expr = replace_expression(replace_expr, replacemap=replacement)
-
-        # 4) Collapse any identity nodes that may have been introduced
-        # by replacing vectors and maybe replace placeholder from last step
-        replace_expr = identity_propagation(replace_expr)
-        replace_expr = replace_expression(replace_expr, replacemap=backmap)
+        # Test if we have a jacobian by extracting aguments with argnumber=1
+        all_jacobian_args = extract_modified_arguments(expr,
+                                                       argnumber=1,
+                                                       do_index=False,
+                                                       do_gradient=split_gradients)

-        # 5) Further split according to trial function in jacobian terms
+        # 3) Further split according to trial function in jacobian terms
        #
        # Note: We need to split according to the FunctionView. For
        # example in Stokes with test functions v and q we have to
@@ -179,7 +124,7 @@ def split_into_accumulation_terms(expr):
                                                    do_index=False,
                                                    do_gradient=False)
            for trial_arg in trial_args:
-                # 5.1) Restrict to this trial argument
+                # 3.1) Restrict to this trial argument
                replacement = {ma.expr: Zero(shape=ma.expr.ufl_shape,
                                             free_indices=ma.expr.ufl_free_indices,
                                             index_dimensions=ma.expr.ufl_index_dimensions)
@@ -187,11 +132,11 @@ def split_into_accumulation_terms(expr):
                replacement[trial_arg.expr] = trial_arg.expr
                jac_expr = replace_expression(replace_expr, replacemap=replacement)

-                # 5.2) Propagate indexed zeros to simplify expression
+                # 3.2) Propagate indexed zeros to simplify expression
                jac_expr = zero_propagation(jac_expr)

-                # 5.3) Accumulate according to restriction
-                indexed_jac_args = extract_modified_arguments(jac_expr, argnumber=1, do_index=True)
+                # 3.3) Split according to restriction
+                indexed_jac_args = extract_modified_arguments(jac_expr, argnumber=1, do_index=True, do_gradient=False)
                for restriction in (Restriction.NONE, Restriction.POSITIVE, Restriction.NEGATIVE):
                    replacement = {ma.expr: Zero(shape=ma.expr.ufl_shape,
                                                 free_indices=ma.expr.ufl_free_indices,
@@ -201,10 +146,111 @@ def split_into_accumulation_terms(expr):

                    acc_expr = replace_expression(jac_expr, replacemap=replacement)

-                    if not isinstance(jac_expr, Zero):
-                        ret.append(AccumulationTerm(acc_expr, test_arg, indexmap, newi))
+                    if not isinstance(acc_expr, Zero):
+                        ret.append(acc_expr)
        else:
            if not isinstance(replace_expr, Zero):
-                ret.append(AccumulationTerm(replace_expr, test_arg, indexmap, newi))
+                ret.append(replace_expr)

    return ret
+
+
+@ufl_transformation(name="cut_accum", extraction_lambda=lambda l: [l.term])
+def cut_accumulation_term(expr):
+    """Cut test function from expression and return AccumulationTerm
+
+    Note: This assumes that there is only one test function in the
+    expression. You need to make sure to split your expression into
+    appropriate parts before calling this!
+    """
+    # If there are multiple test arguments or gradients and
+    # non-gradients something wen wrong!
+    test_args = extract_modified_arguments(expr, argnumber=0, do_index=False, do_gradient=True)
+    assert len(test_args) == 1
+    test_arg = test_args[0]
+
+    # 1) Cut the test function itself from the expression
+    #
+    # This is done by replacing the test function with an
+    # appropriate product of identity matrices. This way we can
+    # make sure that the indices of the result will be right. This
+    # is best explained by an example:
+    #
+    # Suppose we have the following expression:
+    #
+    # \sum_{i,j} a_{i,j} (\nabla v)_{i,j} + \sum_{k,l} b_{k,l} (\nable v)_{k,l}
+    #
+    # If we want to cut the gradient of the test function v we
+    # need to make sure, that both sums have the right indices:
+    #
+    # \sum_{m,n} (a_{m,n} + b_{m,n}) (\nabla v)_{m,n}
+    #
+    # and we extract (a_{m,n} + b_{m,n}). We achieve that by the
+    # following replacements:
+    #
+    # (\nabla v)_{i,j} -> I_{m,i} I_{n,j}
+    # (\nabla v)_{k,l} -> I_{m,k} I_{n,l}
+    #
+    # Resulting in:
+    #
+    # \sum_{i,j} a_{i,j} I_{m,i} I_{n,j} + \sum_{k,l} b_{k,l} I_{m,k} I_{n,l}
+    #
+    # In step 2 this will collaps to: a_{m,n} + b_{m,n}
+    replacement = {}
+    newi = None
+    backmap = {}
+    # Get all appearances of the test function with their indices
+    indexed_test_args = extract_modified_arguments(expr, argnumber=0, do_index=True)
+    for indexed_test_arg in indexed_test_args:
+        if indexed_test_arg.index:
+            # If the test function is indexed, create a new multiindex of this shape
+            # -> (m,n) in the example above
+            if newi is None:
+                newi = indices(len(indexed_test_arg.index))
+
+            # This handles the special case with two identical
+            # indices on an test function. E.g. in Stokes on an
+            # axiparallel grid you get a term:
+            #
+            # -(\sum_i K_{i,i} (\nabla v)_{i,i}) w
+            #   = \sum_k \sum_l (-K_{k,k} w I_{k,l} (\nabla v)_{k,l})
+            #
+            # and we want to split
+            #
+            # -K_{k,k} w I_{k,l} corresponding to (\nabla v)_{k,l}.
+            #
+            # This is done by:
+            # - Replacing (\nabla v)_{i,i} with I_{k,i}*(\nabla
+            #   v)_{k,l}. Here (\nabla v)_{k,l} serves as a
+            #   placeholder and will be replaced later on.
+            # - Propagating the identity in step 4.
+            # - Replacing (\nabla v)_{k,l} by I_{k,l} after step 4.
+            if len(set(indexed_test_arg.index._indices)) < len(indexed_test_arg.index._indices):
+                if len(indexed_test_arg.index._indices) > 2:
+                    raise NotImplementedError("Test argument with more than three indices and double occurence ist not implemented.")
+                mod_index_map = {indexed_test_arg.index: MultiIndex((newi[0], newi[1]))}
+                mod_indexed_test_arg = replace_expression(indexed_test_arg.expr,
+                                                          replacemap=mod_index_map)
+                rep = Product(Indexed(Identity(2),
+                                      MultiIndex((newi[0], indexed_test_arg.index[0]))),
+                              mod_indexed_test_arg)
+                backmap.update({mod_indexed_test_arg:
+                                Indexed(Identity(2), MultiIndex((newi[0], newi[1])))})
+                replacement.update({indexed_test_arg.expr: rep})
+            else:
+                # Replace indexed test function with a product of identities.
+                identities = tuple(Indexed(Identity(2), MultiIndex((i,) + (j,)))
+                                   for i, j in zip(newi, indexed_test_arg.index._indices))
+                replacement.update({indexed_test_arg.expr:
+                                    construct_binary_operator(identities, Product)})
+
+        else:
+            replacement.update({indexed_test_arg.expr: IntValue(1)})
+    expr = replace_expression(expr, replacemap=replacement)
+
+    # 2) Collapse any identity nodes that may have been introduced
+    # by replacing vectors and maybe replace placeholder from last step
+    expr = identity_propagation(expr)
+    expr = replace_expression(expr, replacemap=backmap)
+
+    return AccumulationTerm(expr, test_arg, newi)
--- a/python/dune/perftool/ufl/transformations/__init__.py
+++ b/python/dune/perftool/ufl/transformations/__init__.py
-""" Define the general infrastructure for debuggable UFL transformations"""
+"""Infrastructure for printing trees of UFL expressions."""


 class UFLTransformationWrapper(object):
@@ -47,7 +47,6 @@ class UFLTransformationWrapper(object):

        # We do also assume that the transformation returns an ufl expression or a list there of
        ret_for_print = self.extractExpressionListFromResult(ret)
-
        assert isinstance(ret_for_print, list) and all(isinstance(e, Expr) for e in ret_for_print)

        # Maybe output the returned expression

--- a/python/dune/perftool/ufl/transformations/order_gradient_restriction.py
+++ b/python/dune/perftool/ufl/transformations/order_gradient_restriction.py
+"""Move PositiveRestricted/NegativeRestricted nodes below ReferenceGrad nodes"""
+
+from ufl.algorithms import MultiFunction
+from ufl.classes import ReferenceGrad, PositiveRestricted, NegativeRestricted
+
+from dune.perftool.ufl.transformations import ufl_transformation
+
+
+class OrderGradientRestriction(MultiFunction):
+    call = MultiFunction.__call__
+
+    def __call__(self, o):
+        return self.call(o)
+
+    def expr(self, o):
+        return self.reuse_if_untouched(o, *tuple(self(op) for op in o.ufl_operands))
+
+    def positive_restricted(self, o):
+        if isinstance(o.ufl_operands[0], ReferenceGrad):
+            return ReferenceGrad(PositiveRestricted(o.ufl_operands[0].ufl_operands[0]))
+        return self.reuse_if_untouched(o, *tuple(self(op) for op in o.ufl_operands))
+
+    def negative_restricted(self, o):
+        if isinstance(o.ufl_operands[0], ReferenceGrad):
+            return ReferenceGrad(NegativeRestricted(o.ufl_operands[0].ufl_operands[0]))
+        return self.reuse_if_untouched(o, *tuple(self(op) for op in o.ufl_operands))
+
+
+@ufl_transformation(name="order_gradient_restriction")
+def order_gradient_restriction(e):
+    """Move PositiveRestricted/NegativeRestricted nodes below ReferenceGrad nodes"""
+    return OrderGradientRestriction()(e)
--- a/test/stokes/stokes_quadrilateral.mini
+++ b/test/stokes/stokes_quadrilateral.mini
@@ -13,4 +13,4 @@ extension = vtu
 [formcompiler]
 numerical_jacobian = 1, 0 | expand num
 exact_solution_expression = g
-compare_l2errorsquared = 1e-11
+compare_l2errorsquared = 1e-10