[WIP] Kernel generation is still missing

e85b4c7b · René Heß · 9ffb3188 · e85b4c7b · e85b4c7b
Commit e85b4c7b authored 8 years ago by René Heß
--- a/python/dune/perftool/sumfact/amatrix.py
+++ b/python/dune/perftool/sumfact/amatrix.py
@@ -15,6 +15,7 @@ from dune.perftool.pdelab.localoperator import (name_domain_field,
                                                lop_template_range_field,
                                                )

+from dune.perftool.sumfact.sumfact import AMatrix, sumfact

 @class_member("operator")
 def define_alignment(name):
@@ -235,7 +236,6 @@ def evaluate_coefficient_gradient_sumfact(element, name, container, restriction,
    # n = name_number_of_basis_functions_per_direction()
    m = quadrature_points_per_direction()
    n = basis_functions_per_direction()
-    from dune.perftool.sumfact.sumfact import AMatrix, sumfact
    a_matrix = AMatrix(theta, m, n)
    a_matrices = (a_matrix, a_matrix)
    inp = name_coefficientcontainer(Restriction.NONE) + ".data()"

--- a/python/dune/perftool/sumfact/sumfact.py
+++ b/python/dune/perftool/sumfact/sumfact.py
@@ -6,12 +6,8 @@ from dune.perftool.generation import (class_member,
                                      no_caching,
                                      preamble,
                                      )
+from dune.perftool.pdelab.localoperator import lop_template_range_field

-from dune.perftool.sumfact.amatrix import name_alignment
-
-from dune.perftool.pdelab.localoperator import (
-    lop_template_range_field,
-)

 class AMatrix(object):
    def __init__(self, a_matrix, m, n):
@@ -33,6 +29,7 @@ class _GlobalBufferCounter(object):

 @constructor_block("operator")
 def construct_buffer(name, a_matrices):
+    # TODO Free memory in destructor!
    range_field = lop_template_range_field()

    # Upper bound for the number of elements in the buffer. Note: This
@@ -41,6 +38,7 @@ def construct_buffer(name, a_matrices):
    for mat in a_matrices:
        max_size *= max(mat.n, mat.m)

+    from dune.perftool.sumfact.amatrix import name_alignment
    alignment = name_alignment()
    return ["{} = ({}*) aligned_malloc({}*sizeof({}), {});".format(name, range_field, max_size, range_field, alignment),
            "if ({}==0) throw std::bad_alloc();".format(name)]
@@ -64,7 +62,7 @@ def name_buffer(a_matrices):
    return name


-
+# TODO Avoid caching?
 class _GlobalSumfactCounter(object):
    counter = 0

@@ -75,7 +73,7 @@ class _GlobalSumfactCounter(object):

 @iname
 def _sumfact_rows_iname(a_matrix, count):
-    # TODO: Avoid caching? See geometry.py.
+    # TODO: Avoid caching?
    # count = _GlobalSumfactCounter().get()
    name = "sf_cols_"+str(count)
    domain(name, a_matrix.m)
@@ -89,7 +87,7 @@ def sumfact_rows_iname(a_matrix):

 @iname
 def _sumfact_collumns_iname(input_collumns, count):
-    # TODO: Avoid caching? See geometry.py.
+    # TODO: Avoid caching?
    # count = _GlobalSumfactCounter().get()
    name = "sf_rows_"+str(count)
    domain(name, int(input_collumns))
@@ -102,11 +100,19 @@ def sumfact_collumns_iname(input_collumns):
    return name


-#@preamble(cache_key_generator=no_caching)
 def _sumfact_kernel(a_matrix, inp, out, input_collumns,
                    first, element, name, container, restriction, component):
    """Sumfactorization kernel computing out = a_matrix * inp

+    Rotation step is directly incuded. This can be done by treating
+    inp as a matrix in column major storage and out as a row major
+    matrix. With Theta of size m_lxn_l and the abbreviation
+    r=input_collumns the code should do the following:
+
+    [i=0,...,m_l-1]:
+      [j=0,...,r-1]:
+        out[j+i \cdot r] = \sum_{k=0}^{n_l-1} (Theta[i,k] * inp[k+j\cdot n_l])
+
    Arguments:
    ----------
    a_matrix: AMatrix object of size m_l x n_l
@@ -114,109 +120,8 @@ def _sumfact_kernel(a_matrix, inp, out, input_collumns,
    out: Write solut to this vector. Rotation step included.
    input_rows: Current number of rows if you treat inp as matrix.
    """
-    inames = (sumfact_rows_iname(a_matrix), sumfact_collumns_iname(input_collumns))
-    print(inames)
-
-    if first:
-        assert(element)
-        assert(name)
-        assert(container)
-        # assert(restriction)
-        assert(component)
-
-
-
-
-        # First we determine the rank of the tensor we are talking about
-        from ufl.functionview import select_subelement
-        sub_element = select_subelement(element, component)
-        rank = len(sub_element.value_shape()) + 1
-
-        # We do then set some variables accordingly
-        shape = sub_element.value_shape() + (element.cell().geometric_dimension(),)
-        shape_impl = ('arr',) * rank
-
-        from dune.perftool.pdelab.geometry import dimension_iname
-        # idims = tuple(dimension_iname(count=i) for i in range(rank))
-        # TODO rename
-        idims = tuple((sumfact_rows_iname(a_matrix),))
-        leaf_element = sub_element
-        from ufl import VectorElement, TensorElement
-        if isinstance(sub_element, (VectorElement, TensorElement)):
-            leaf_element = sub_element.sub_elements()[0]
-
-        # and proceed to call the necessary generator functions
-        from dune.perftool.generation.loopy import temporary_variable
-        temporary_variable(name, shape=shape, shape_impl=shape_impl)
-        from dune.perftool.pdelab.spaces import name_lfs
-        lfs = name_lfs(element, restriction, component)
-        from dune.perftool.pdelab.spaces import lfs_iname
-        index = lfs_iname(leaf_element, restriction, context='trialgrad')
-        from dune.perftool.pdelab.basis import name_reference_gradient
-        basis = name_reference_gradient(leaf_element, restriction)
-
-        if isinstance(sub_element, (VectorElement, TensorElement)):
-            lfs = lfs_child(lfs, idims[:-1], shape=shape_as_pymbolic(shape[:-1]), symmetry=element.symmetry)
-
-        from dune.perftool.pdelab.argument import pymbolic_coefficient
-        coeff = pymbolic_coefficient(container, lfs, index)
-        from pymbolic.primitives import Product, Subscript, Variable
-        assignee = Subscript(Variable(name), tuple(Variable(i) for i in idims))
-
-        from loopy import Reduction
-        reduction_expr = Product((coeff, Subscript(Variable(basis), (Variable(index), 0, Variable(idims[-1])))))
-
-        from dune.perftool.pdelab.quadrature import quadrature_iname
-        instruction(expression=Reduction("sum", index, reduction_expr, allow_simultaneous=True),
-                    assignee=assignee,
-                    forced_iname_deps=frozenset({quadrature_iname()}).union(frozenset(idims)),
-                    forced_iname_deps_is_final=True,
-                    )
-
-        # from dune.perftool.pdelab.spaces import name_lfs
-        # lfs = name_lfs(element, restriction, component)
-
-        # from ufl.functionview import select_subelement
-        # sub_element = select_subelement(element, component)
-        # leaf_element = sub_element
-        # from ufl import VectorElement, TensorElement
-        # if isinstance(sub_element, (VectorElement, TensorElement)):
-        #     leaf_element = sub_element.sub_elements()[0]
-
-        # from dune.perftool.pdelab.spaces import lfs_iname
-        # index = lfs_iname(leaf_element, restriction, context='trial')
-
-        # from dune.perftool.pdelab.argument import pymbolic_coefficient
-        # coeff = pymbolic_coefficient(container, lfs, index)
-        # print(coeff)
-        # from pymbolic.primitives import Product, Subscript, Variable
-        # from loopy import Reduction
-        # reduction_expr = Product((Subscript(Variable(a_matrix.a_matrix), (Variable(inames[0]), Variable(inames[1]))), coeff))
-
-
-        # from dune.perftool.generation.loopy import temporary_variable
-        # temporary_variable(out, shape=(out), decl_method=None)
-        # # declare_cache_temporary(leaf_element, restriction, name, which='Jacobian')
-
-        # assignee = Variable(out)
-        # index = lfs_iname(leaf_element, restriction, context='trialgrad')
-        # instruction(expression=Reduction("sum", index, reduction_expr, allow_simultaneous=True),
-        #             assignee=assignee,
-        #             forced_iname_deps=frozenset(inames),
-        #             forced_iname_deps_is_final=True,
-        #             )
-
-    else:
-        return
-
-    # assignee = Subscript(Variable(), tuple(Variable(i) for i in ))
-    # reduction_expr = Product((coeff
-    # instruction(code = "std::cout << {};".format(inames[0]),
-    #             inames = inames,
-    #             )
-
-
-    # return "//// palpo"
+    # TODO Update documentation
+    None


 def sumfact_kernel(a_matrix, inp, out, input_collumns, first=False,
@@ -266,7 +171,7 @@ def sumfact(element, name, container, restriction, component, a_matrices, inp, o
    #
    # input_collumns = \prod_{i=1}^{l} n_i
    #
-    # and update it in the sumfact_kernel call!
+    # and update input_collumns in the sumfact_kernel call!
    input_collumns = 1
    for mat in a_matrices:
        input_collumns *= mat.n