Stage 1 not totally wrong

d5af1e6f · René Heß · b14a7f01 · d5af1e6f · d5af1e6f
Commit d5af1e6f authored 8 years ago by René Heß
--- a/python/dune/perftool/sumfact/basis.py
+++ b/python/dune/perftool/sumfact/basis.py
@@ -6,6 +6,7 @@ multiplication with the test function is part of the sum factorization kernel.
 from dune.perftool.generation import (backend,
                                      cached,
                                      domain,
+                                      get_counter,
                                      get_global_context_value,
                                      iname,
                                      instruction,
@@ -31,21 +32,95 @@ from pytools import product
 import pymbolic.primitives as prim
+def name_sumfact_base_buffer():
+    count = get_counter('sumfact_base_buffer')
+    name = "buffer_{}".format(str(count))
-@cached
-def pymbolic_trialfunction_gradient(element, restriction, component):
+@cached
+def sumfact_evaluate_coefficient_gradient(element, name, restriction, component):
+    # First we determine the rank of the tensor we are talking about
+    from ufl.functionview import select_subelement
+    sub_element = select_subelement(element, component)
+    rank = len(sub_element.value_shape()) + 1
+    # We do then set some variables accordingly
+    shape = sub_element.value_shape() + (element.cell().geometric_dimension(),)
+    shape_impl = ('arr',) * rank
+    from dune.perftool.pdelab.geometry import dimension_iname
+    idims = tuple(dimension_iname(count=i) for i in range(rank))
+    leaf_element = sub_element
+    from ufl import VectorElement, TensorElement
+    if isinstance(sub_element, (VectorElement, TensorElement)):
+        leaf_element = sub_element.sub_elements()[0]
+    # and proceed to call the necessary generator functions
+    temporary_variable(name, shape=shape, shape_impl=shape_impl)
+    from dune.perftool.pdelab.spaces import name_lfs
+    lfs = name_lfs(element, restriction, component)
+    from dune.perftool.pdelab.basis import pymbolic_reference_gradient
+    basis = pymbolic_reference_gradient(leaf_element, restriction, 0, context='trialgrad')
+    from dune.perftool.tools import get_pymbolic_indices
+    index, _ = get_pymbolic_indices(basis)
+    if isinstance(sub_element, (VectorElement, TensorElement)):
+        lfs = lfs_child(lfs, idims[:-1], shape=shape_as_pymbolic(shape[:-1]), symmetry=element.symmetry())
+    # Calculate values with sumfactorization
    theta = name_theta()
    dtheta = name_dtheta()
+    rows = quadrature_points_per_direction()
+    cols = basis_functions_per_direction()
+    theta_matrix = AMatrix(theta, rows, cols)
+    dtheta_matrix = AMatrix(dtheta, rows, cols)
+    # TODO:
+    # - This only covers rank 1
+    # - Avoid setting up whole gradient if only one component is needed?
+    # Get geometric dimension
+    formdata = get_global_context_value('formdata')
+    dim = formdata.geometric_dimension
+    buffers = []
+    for i in range(dim):
+        a_matrices = [theta_matrix] * dim
+        a_matrices[i] = dtheta_matrix
+        a_matrices = tuple(a_matrices)
+        # Do stage 1
+        buffer_name = name_sumfact_base_buffer()
+        initialize_buffer(buffer_name,
+                          base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
+                          num=2
+                         )
+        insn_dep = setup_theta(element, restriction, component, a_matrices, buffer_name)
+        var, _ = sum_factorization_kernel(a_matrices,
+                                            buffer_name,
+                                            insn_dep=frozenset({insn_dep}),
+                                           )
+        buffers.append(var)
+    for i, buf in enumerate(buffers):
+        # Write solution from sumfactorization to gradient variable
+        from pymbolic.primitives import Subscript, Variable
+        from dune.perftool.generation import get_backend
+        assignee = Subscript(Variable(name), i)
+        expression = Subscript(Variable(buf), tuple(Variable(i) for i in quadrature_inames()))
+        instruction(assignee=assignee,
+                    expression=expression,
+                    forced_iname_deps=frozenset(get_backend("quad_inames")()).union(frozenset(idims)),
+                    forced_iname_deps_is_final=True,
+                    )
-    # palpo TODO -> copied from argument.py
+@cached
+def pymbolic_trialfunction_gradient(element, restriction, component):
    rawname = "gradu" + "_".join(str(c) for c in component)
    name = restricted_name(rawname, restriction)
    from dune.perftool.pdelab.argument import name_coefficientcontainer
    container = name_coefficientcontainer(restriction)
-    from dune.perftool.pdelab.argument import evaluate_coefficient_gradient
+    sumfact_evaluate_coefficient_gradient(element, name, restriction, component)
-    evaluate_coefficient_gradient(element, name, container, restriction, component)
    from pymbolic.primitives import Variable
    return Variable(name)
@@ -58,14 +133,15 @@ def pymbolic_trialfunction(element, restriction, component):
    a_matrix = AMatrix(theta, rows, cols)
    # TODO: this is 2D only
    a_matrices = (a_matrix, a_matrix)
+    buffer_name = name_sumfact_base_buffer()
    # Do stage 1
-    initialize_buffer("buffer",
+    initialize_buffer(buffer_name,
                      base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
                      num=2
                      )
-    insn_dep = setup_theta(element, restriction, component, a_matrices)
+    insn_dep = setup_theta(element, restriction, component, a_matrices, buffer_name)
    var, _ = sum_factorization_kernel(a_matrices,
                                      "buffer",
                                      insn_dep=frozenset({insn_dep}),

--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -55,10 +55,10 @@ def sumfact_iname(bound, _type):
    return name
-def setup_theta(element, restriction, component, a_matrices):
+def setup_theta(element, restriction, component, a_matrices, buffer_name):
    number_basis = product(mat.cols for mat in a_matrices)
    shape = (number_basis,)
-    inp = get_buffer_temporary("buffer",
+    inp = get_buffer_temporary(buffer_name,
                               shape=shape)
    silenced_warning('read_no_write({})'.format(inp))