Cleanup accumulation.py

93af3b0b · Dominic Kempf · 0a3f6ad3 · 93af3b0b
Commit 93af3b0b authored 7 years ago by Dominic Kempf
--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
@@ -67,7 +67,7 @@ class AlreadyAssembledInput(SumfactKernelInputBase, ImmutableRecord):

 @backend(interface="accum_insn", name="sumfact")
 def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
-    # When doing sumfactorization we want to split the test function
+    # When doing sum factorization we want to split the test function
    assert(accterm.argument.expr is not None)

    # Do the tree traversal to get a pymbolic expression representing this expression
@@ -75,16 +75,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
    if pymbolic_expr == 0:
        return

-    # If this is a gradient, we find the gradient iname
    dim = world_dimension()
-    additional_inames = frozenset({})
-    if accterm.new_indices is not None:
-        for i in accterm.new_indices:
-            if i not in visitor.dimension_indices:
-                from dune.perftool.pdelab.localoperator import grad_iname
-                additional_inames = additional_inames.union(frozenset({grad_iname(i, dim)}))
-
-    # Get the degree of the element corresponding to this modified argument
    mod_arg_expr = accterm.argument.expr
    from ufl.classes import FunctionView, Argument
    while (not isinstance(mod_arg_expr, FunctionView)) and (not isinstance(mod_arg_expr, Argument)):
@@ -92,52 +83,40 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
    degree = mod_arg_expr.ufl_element()._degree
    basis_size = degree + 1

-    def emit_sumfact_kernel(indices, restriction, insn_dep):
-        # Not implemented
-        if indices:
-            assert len(indices) <= 2
-
-        # Figure out the name of the accumulation variable
-        accum_idims = None
-        if indices is not None:
-            accum_idims = (indices[0],)
-        test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure, idims=accum_idims)
-        ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure, idims=accum_idims)
-        accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
+    # Extract index information
+    grad_index = None
+    if accterm.argument.reference_grad:
+        grad_index = accterm.argument.expr.ufl_operands[1][0]._value

-        # TODO: Adjust for stokes sumfact symdiff
-        jacobian_inames = tuple()
-        if accterm.is_jacobian:
-            jacobian_inames = visitor.inames
+    accum_index = None
+    if visitor.indices:
+        accum_index = visitor.indices[0]

-        # Determine the derivative direction
-        derivative = None
-        if accterm.new_indices:
-            derivative = indices[-1]
-        if isinstance(accterm.argument.expr, uc.Indexed):
-            derivative = accterm.argument.expr.ufl_operands[1][0]._value
+    jacobian_inames = tuple()
+    if accterm.is_jacobian:
+        jacobian_inames = visitor.inames
+
+    def emit_sumfact_kernel(restriction, insn_dep):
+        test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure, idims=(accum_index,))
+        ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure, idims=(accum_index,))
+        accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())

        # Construct the matrix sequence for this sum factorization
        matrix_sequence = construct_basis_matrix_sequence(
            transpose=True,
-            derivative=derivative,
+            derivative=grad_index,
            facedir=get_facedir(accterm.argument.restriction),
            facemod=get_facemod(accterm.argument.restriction),
            basis_size=basis_size)

-        # Avoid caching issues by passing the coeff_func_index
-        coeff_func_index = None
-        if indices and len(indices) == 2:
-            coeff_func_index = indices[0]
-
        # TODO: Adapt preferred position for stokes sumfact symdiff
        sf = SumfactKernel(matrix_sequence=matrix_sequence,
                           restriction=(accterm.argument.restriction, restriction),
                           stage=3,
-                           preferred_position=indices[-1] if accterm.new_indices else None,
+                           preferred_position=grad_index,
                           accumvar=accum,
                           within_inames=jacobian_inames,
-                           input=AlreadyAssembledInput(index=coeff_func_index),
+                           input=AlreadyAssembledInput(index=accum_index),
                           )

        from dune.perftool.sumfact.vectorization import attach_vectorization_info
@@ -167,19 +146,6 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
                        tags=frozenset(["quadvec", "gradvec"])
                        )

-        # Replace gradient iname with correct index for assignement
-        replace_dict = {}
-
-        # If we have two indices the first belongs to the dimension
-        # and the second one to the derivative. We handle all these
-        # indices by hand and replace them accordingly.
-        if indices and len(indices) == 2:
-                replace_dict['idim_arg0'] = indices[0]
-        for iname in additional_inames:
-            replace_dict[prim.Variable(iname)] = indices[-1]
-        from dune.perftool.loopy.symbolic import substitute
-        expression = substitute(pymbolic_expr, replace_dict)
-
        # Write timing stuff for jacobian (for alpha methods it is done at the end of stage 1)
        timer_dep = frozenset()
        if get_option("instrumentation_level") >= 4:
@@ -194,14 +160,14 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
        from loopy.match import Or, Writes
        from loopy.symbolic import DependencyMapper
        from dune.perftool.tools import get_pymbolic_basename
-        deps = Or(tuple(Writes(get_pymbolic_basename(expr)) for expr in DependencyMapper()(expression)))
+        deps = Or(tuple(Writes(get_pymbolic_basename(expr)) for expr in DependencyMapper()(pymbolic_expr)))

        # Issue an instruction in the quadrature loop that fills the buffer
        # with the evaluation of the contribution at all quadrature points
        assignee = prim.Subscript(lp.TaggedVariable(temp, vsf.tag),
                                  vsf.quadrature_index(sf))
        contrib_dep = instruction(assignee=assignee,
-                                  expression=expression,
+                                  expression=pymbolic_expr,
                                  forced_iname_deps=frozenset(quadrature_inames() + jacobian_inames),
                                  forced_iname_deps_is_final=True,
                                  tags=frozenset({"quadvec"}).union(vectag),
@@ -243,7 +209,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
        # Determine the expression to accumulate with. This depends on the vectorization strategy!
        result = prim.Subscript(result, tuple(prim.Variable(i) for i in inames))
        vecinames = ()
-        # TODO: evaluate whether the following line would be okay with vsf.vectorized
+
        if vsf.vectorized:
            iname = accum_iname((accterm.argument.restriction, restriction), vsf.vector_width, "vec")
            vecinames = (iname,)
@@ -285,10 +251,4 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):

    insn_dep = None
    for restriction in jac_restrictions:
-        if accterm.new_indices:
-            shape = (world_dimension(),) * len(accterm.new_indices)
-            # Iterate over all combinations of indices for this shape
-            for indices in itertools.product(*map(range, shape)):
-                insn_dep = emit_sumfact_kernel(indices, restriction, insn_dep)
-        else:
-            insn_dep = emit_sumfact_kernel(None, restriction, insn_dep)
+        insn_dep = emit_sumfact_kernel(restriction, insn_dep)