Skip to content
Snippets Groups Projects
Commit e85b4c7b authored by René Heß's avatar René Heß
Browse files

[WIP] Kernel generation is still missing

parent 9ffb3188
No related branches found
No related tags found
No related merge requests found
......@@ -15,6 +15,7 @@ from dune.perftool.pdelab.localoperator import (name_domain_field,
lop_template_range_field,
)
from dune.perftool.sumfact.sumfact import AMatrix, sumfact
@class_member("operator")
def define_alignment(name):
......@@ -235,7 +236,6 @@ def evaluate_coefficient_gradient_sumfact(element, name, container, restriction,
# n = name_number_of_basis_functions_per_direction()
m = quadrature_points_per_direction()
n = basis_functions_per_direction()
from dune.perftool.sumfact.sumfact import AMatrix, sumfact
a_matrix = AMatrix(theta, m, n)
a_matrices = (a_matrix, a_matrix)
inp = name_coefficientcontainer(Restriction.NONE) + ".data()"
......
......@@ -6,12 +6,8 @@ from dune.perftool.generation import (class_member,
no_caching,
preamble,
)
from dune.perftool.pdelab.localoperator import lop_template_range_field
from dune.perftool.sumfact.amatrix import name_alignment
from dune.perftool.pdelab.localoperator import (
lop_template_range_field,
)
class AMatrix(object):
def __init__(self, a_matrix, m, n):
......@@ -33,6 +29,7 @@ class _GlobalBufferCounter(object):
@constructor_block("operator")
def construct_buffer(name, a_matrices):
# TODO Free memory in destructor!
range_field = lop_template_range_field()
# Upper bound for the number of elements in the buffer. Note: This
......@@ -41,6 +38,7 @@ def construct_buffer(name, a_matrices):
for mat in a_matrices:
max_size *= max(mat.n, mat.m)
from dune.perftool.sumfact.amatrix import name_alignment
alignment = name_alignment()
return ["{} = ({}*) aligned_malloc({}*sizeof({}), {});".format(name, range_field, max_size, range_field, alignment),
"if ({}==0) throw std::bad_alloc();".format(name)]
......@@ -64,7 +62,7 @@ def name_buffer(a_matrices):
return name
# TODO Avoid caching?
class _GlobalSumfactCounter(object):
counter = 0
......@@ -75,7 +73,7 @@ class _GlobalSumfactCounter(object):
@iname
def _sumfact_rows_iname(a_matrix, count):
# TODO: Avoid caching? See geometry.py.
# TODO: Avoid caching?
# count = _GlobalSumfactCounter().get()
name = "sf_cols_"+str(count)
domain(name, a_matrix.m)
......@@ -89,7 +87,7 @@ def sumfact_rows_iname(a_matrix):
@iname
def _sumfact_collumns_iname(input_collumns, count):
# TODO: Avoid caching? See geometry.py.
# TODO: Avoid caching?
# count = _GlobalSumfactCounter().get()
name = "sf_rows_"+str(count)
domain(name, int(input_collumns))
......@@ -102,11 +100,19 @@ def sumfact_collumns_iname(input_collumns):
return name
#@preamble(cache_key_generator=no_caching)
def _sumfact_kernel(a_matrix, inp, out, input_collumns,
first, element, name, container, restriction, component):
"""Sumfactorization kernel computing out = a_matrix * inp
Rotation step is directly incuded. This can be done by treating
inp as a matrix in column major storage and out as a row major
matrix. With Theta of size m_lxn_l and the abbreviation
r=input_collumns the code should do the following:
[i=0,...,m_l-1]:
[j=0,...,r-1]:
out[j+i \cdot r] = \sum_{k=0}^{n_l-1} (Theta[i,k] * inp[k+j\cdot n_l])
Arguments:
----------
a_matrix: AMatrix object of size m_l x n_l
......@@ -114,109 +120,8 @@ def _sumfact_kernel(a_matrix, inp, out, input_collumns,
out: Write solut to this vector. Rotation step included.
input_rows: Current number of rows if you treat inp as matrix.
"""
inames = (sumfact_rows_iname(a_matrix), sumfact_collumns_iname(input_collumns))
print(inames)
if first:
assert(element)
assert(name)
assert(container)
# assert(restriction)
assert(component)
# First we determine the rank of the tensor we are talking about
from ufl.functionview import select_subelement
sub_element = select_subelement(element, component)
rank = len(sub_element.value_shape()) + 1
# We do then set some variables accordingly
shape = sub_element.value_shape() + (element.cell().geometric_dimension(),)
shape_impl = ('arr',) * rank
from dune.perftool.pdelab.geometry import dimension_iname
# idims = tuple(dimension_iname(count=i) for i in range(rank))
# TODO rename
idims = tuple((sumfact_rows_iname(a_matrix),))
leaf_element = sub_element
from ufl import VectorElement, TensorElement
if isinstance(sub_element, (VectorElement, TensorElement)):
leaf_element = sub_element.sub_elements()[0]
# and proceed to call the necessary generator functions
from dune.perftool.generation.loopy import temporary_variable
temporary_variable(name, shape=shape, shape_impl=shape_impl)
from dune.perftool.pdelab.spaces import name_lfs
lfs = name_lfs(element, restriction, component)
from dune.perftool.pdelab.spaces import lfs_iname
index = lfs_iname(leaf_element, restriction, context='trialgrad')
from dune.perftool.pdelab.basis import name_reference_gradient
basis = name_reference_gradient(leaf_element, restriction)
if isinstance(sub_element, (VectorElement, TensorElement)):
lfs = lfs_child(lfs, idims[:-1], shape=shape_as_pymbolic(shape[:-1]), symmetry=element.symmetry)
from dune.perftool.pdelab.argument import pymbolic_coefficient
coeff = pymbolic_coefficient(container, lfs, index)
from pymbolic.primitives import Product, Subscript, Variable
assignee = Subscript(Variable(name), tuple(Variable(i) for i in idims))
from loopy import Reduction
reduction_expr = Product((coeff, Subscript(Variable(basis), (Variable(index), 0, Variable(idims[-1])))))
from dune.perftool.pdelab.quadrature import quadrature_iname
instruction(expression=Reduction("sum", index, reduction_expr, allow_simultaneous=True),
assignee=assignee,
forced_iname_deps=frozenset({quadrature_iname()}).union(frozenset(idims)),
forced_iname_deps_is_final=True,
)
# from dune.perftool.pdelab.spaces import name_lfs
# lfs = name_lfs(element, restriction, component)
# from ufl.functionview import select_subelement
# sub_element = select_subelement(element, component)
# leaf_element = sub_element
# from ufl import VectorElement, TensorElement
# if isinstance(sub_element, (VectorElement, TensorElement)):
# leaf_element = sub_element.sub_elements()[0]
# from dune.perftool.pdelab.spaces import lfs_iname
# index = lfs_iname(leaf_element, restriction, context='trial')
# from dune.perftool.pdelab.argument import pymbolic_coefficient
# coeff = pymbolic_coefficient(container, lfs, index)
# print(coeff)
# from pymbolic.primitives import Product, Subscript, Variable
# from loopy import Reduction
# reduction_expr = Product((Subscript(Variable(a_matrix.a_matrix), (Variable(inames[0]), Variable(inames[1]))), coeff))
# from dune.perftool.generation.loopy import temporary_variable
# temporary_variable(out, shape=(out), decl_method=None)
# # declare_cache_temporary(leaf_element, restriction, name, which='Jacobian')
# assignee = Variable(out)
# index = lfs_iname(leaf_element, restriction, context='trialgrad')
# instruction(expression=Reduction("sum", index, reduction_expr, allow_simultaneous=True),
# assignee=assignee,
# forced_iname_deps=frozenset(inames),
# forced_iname_deps_is_final=True,
# )
else:
return
# assignee = Subscript(Variable(), tuple(Variable(i) for i in ))
# reduction_expr = Product((coeff
# instruction(code = "std::cout << {};".format(inames[0]),
# inames = inames,
# )
# return "//// palpo"
# TODO Update documentation
None
def sumfact_kernel(a_matrix, inp, out, input_collumns, first=False,
......@@ -266,7 +171,7 @@ def sumfact(element, name, container, restriction, component, a_matrices, inp, o
#
# input_collumns = \prod_{i=1}^{l} n_i
#
# and update it in the sumfact_kernel call!
# and update input_collumns in the sumfact_kernel call!
input_collumns = 1
for mat in a_matrices:
input_collumns *= mat.n
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment