Skip to content
Snippets Groups Projects
Commit 10149193 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Merge branch 'feature/sumfact-poisson' into 'master'

Feature/sumfact poisson

See merge request !59
parents 6e868bbc 1288f744
No related branches found
No related tags found
No related merge requests found
Showing with 398 additions and 97 deletions
""" Some error classes for dune-perftool """
class PerftoolError(Exception):
pass
......
......@@ -4,7 +4,9 @@ from dune.perftool.sumfact.quadrature import (quadrature_inames,
from dune.perftool.sumfact.basis import (lfs_inames,
pymbolic_basis,
pymbolic_reference_gradient,
pymbolic_trialfunction,
pymbolic_trialfunction_gradient,
)
from dune.perftool.pdelab import PDELabInterface
......@@ -17,6 +19,12 @@ class SumFactInterface(PDELabInterface):
def pymbolic_basis(self, element, restriction, number):
return pymbolic_basis(element, restriction, number)
def pymbolic_reference_gradient(self, element, restriction, number):
return pymbolic_reference_gradient(element, restriction, number)
def pymbolic_trialfunction_gradient(self, element, restriction, component):
return pymbolic_trialfunction_gradient(element, restriction, component)
def pymbolic_trialfunction(self, element, restriction, component):
return pymbolic_trialfunction(element, restriction, component)
......
......@@ -193,7 +193,7 @@ def sort_quadrature_points_weights():
@constructor_block("operator")
def construct_theta(name, transpose):
def construct_theta(name, transpose, derivative):
# Make sure that the quadrature points are sorted
sort_quadrature_points_weights()
......@@ -204,11 +204,13 @@ def construct_theta(name, transpose):
polynomials = name_polynomials()
qp = name_oned_quadrature_points()
access = "j,i" if transpose else "i,j"
# access = "j,i" if transpose else "i,j"
basispol = "dp" if derivative else "p"
polynomial_access = "i,{}[j]".format(qp) if transpose else "j,{}[i]".format(qp)
return ["for (std::size_t i=0; i<{}; i++){{".format(shape[0]),
" for (std::size_t j=0; j<{}; j++){{".format(shape[1]),
" {}.colmajoraccess({}) = {}.p(j,{}[i]);".format(name, access, polynomials, qp),
" {}.colmajoraccess(i,j) = {}.{}({});".format(name, polynomials, basispol, polynomial_access),
" }",
"}"]
......@@ -228,10 +230,10 @@ def type_theta():
@class_member("operator")
def define_theta(name, shape, transpose):
def define_theta(name, shape, transpose, derivative):
theta_type = type_theta()
initializer_list(name, [str(axis) for axis in shape], classtag="operator")
construct_theta(name, transpose)
construct_theta(name, transpose, derivative)
return "{} {};".format(theta_type, name)
......@@ -239,7 +241,7 @@ def name_theta():
name = "Theta"
shape = (quadrature_points_per_direction(), basis_functions_per_direction())
globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
define_theta(name, shape, False)
define_theta(name, shape, False, False)
return name
......@@ -247,5 +249,21 @@ def name_theta_transposed():
name = "ThetaT"
shape = (basis_functions_per_direction(), quadrature_points_per_direction())
globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
define_theta(name, shape, True)
define_theta(name, shape, True, False)
return name
def name_dtheta():
name = "dTheta"
shape = (quadrature_points_per_direction(), basis_functions_per_direction())
globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
define_theta(name, shape, False, True)
return name
def name_dtheta_transposed():
name = "dThetaT"
shape = (basis_functions_per_direction(), quadrature_points_per_direction())
globalarg(name, shape=shape, dtype=numpy.float64, dim_tags="f,f")
define_theta(name, shape, True, True)
return name
......@@ -6,6 +6,7 @@ multiplication with the test function is part of the sum factorization kernel.
from dune.perftool.generation import (backend,
cached,
domain,
get_counter,
get_global_context_value,
iname,
instruction,
......@@ -14,6 +15,7 @@ from dune.perftool.generation import (backend,
from dune.perftool.sumfact.amatrix import (AMatrix,
basis_functions_per_direction,
ColMajorAccess,
name_dtheta,
name_theta,
quadrature_points_per_direction,
)
......@@ -31,6 +33,100 @@ from pytools import product
import pymbolic.primitives as prim
def name_sumfact_base_buffer():
count = get_counter('sumfact_base_buffer')
name = "buffer_{}".format(str(count))
return name
@cached
def sumfact_evaluate_coefficient_gradient(element, name, restriction, component):
# First we determine the rank of the tensor we are talking about
from ufl.functionview import select_subelement
sub_element = select_subelement(element, component)
rank = len(sub_element.value_shape()) + 1
# We do then set some variables accordingly
shape = sub_element.value_shape() + (element.cell().geometric_dimension(),)
shape_impl = ('arr',) * rank
from dune.perftool.pdelab.geometry import dimension_iname
idims = tuple(dimension_iname(count=i) for i in range(rank))
leaf_element = sub_element
from ufl import VectorElement, TensorElement
if isinstance(sub_element, (VectorElement, TensorElement)):
leaf_element = sub_element.sub_elements()[0]
# and proceed to call the necessary generator functions
temporary_variable(name, shape=shape, shape_impl=shape_impl)
from dune.perftool.pdelab.spaces import name_lfs
lfs = name_lfs(element, restriction, component)
from dune.perftool.pdelab.basis import pymbolic_reference_gradient
basis = pymbolic_reference_gradient(leaf_element, restriction, 0, context='trialgrad')
from dune.perftool.tools import get_pymbolic_indices
index, _ = get_pymbolic_indices(basis)
if isinstance(sub_element, (VectorElement, TensorElement)):
lfs = lfs_child(lfs, idims[:-1], shape=shape_as_pymbolic(shape[:-1]), symmetry=element.symmetry())
# Calculate values with sumfactorization
theta = name_theta()
dtheta = name_dtheta()
rows = quadrature_points_per_direction()
cols = basis_functions_per_direction()
theta_matrix = AMatrix(theta, rows, cols)
dtheta_matrix = AMatrix(dtheta, rows, cols)
# TODO:
# - This only covers rank 1
# - Avoid setting up whole gradient if only one component is needed?
# Get geometric dimension
formdata = get_global_context_value('formdata')
dim = formdata.geometric_dimension
buffers = []
for i in range(dim):
a_matrices = [theta_matrix] * dim
a_matrices[i] = dtheta_matrix
a_matrices = tuple(a_matrices)
# Do stage 1
buffer_name = name_sumfact_base_buffer()
initialize_buffer(buffer_name,
base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
num=2
)
insn_dep = setup_theta(element, restriction, component, a_matrices, buffer_name)
var, _ = sum_factorization_kernel(a_matrices,
buffer_name,
insn_dep=frozenset({insn_dep}),
)
buffers.append(var)
for i, buf in enumerate(buffers):
# Write solution from sumfactorization to gradient variable
from pymbolic.primitives import Subscript, Variable
from dune.perftool.generation import get_backend
assignee = Subscript(Variable(name), i)
expression = Subscript(Variable(buf), tuple(Variable(i) for i in quadrature_inames()))
instruction(assignee=assignee,
expression=expression,
forced_iname_deps=frozenset(get_backend("quad_inames")()).union(frozenset(idims)),
forced_iname_deps_is_final=True,
)
@cached
def pymbolic_trialfunction_gradient(element, restriction, component):
rawname = "gradu" + "_".join(str(c) for c in component)
name = restricted_name(rawname, restriction)
from dune.perftool.pdelab.argument import name_coefficientcontainer
container = name_coefficientcontainer(restriction)
sumfact_evaluate_coefficient_gradient(element, name, restriction, component)
from pymbolic.primitives import Variable
return Variable(name)
@cached
def pymbolic_trialfunction(element, restriction, component):
theta = name_theta()
......@@ -39,16 +135,17 @@ def pymbolic_trialfunction(element, restriction, component):
a_matrix = AMatrix(theta, rows, cols)
# TODO: this is 2D only
a_matrices = (a_matrix, a_matrix)
buffer_name = name_sumfact_base_buffer()
# Do stage 1
initialize_buffer("buffer",
initialize_buffer(buffer_name,
base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
num=2
)
insn_dep = setup_theta(element, restriction, component, a_matrices)
insn_dep = setup_theta(element, restriction, component, a_matrices, buffer_name)
var, _ = sum_factorization_kernel(a_matrices,
"buffer",
buffer_name,
insn_dep=frozenset({insn_dep}),
)
......@@ -102,3 +199,53 @@ def pymbolic_basis(element, restriction, number):
evaluate_basis(element, name, restriction)
return prim.Variable(name)
@backend(interface="evaluate_grad")
@cached
def evaluate_reference_gradient(element, name, restriction):
# from dune.perftool.pdelab.basis import name_leaf_lfs
# lfs = name_leaf_lfs(element, restriction)
# from dune.perftool.pdelab.spaces import name_lfs_bound
from dune.perftool.pdelab.geometry import name_dimension
temporary_variable(
name,
shape=(name_dimension(),))
quad_inames = quadrature_inames()
inames = lfs_inames(element, restriction)
assert(len(quad_inames) == len(inames))
theta = name_theta()
dtheta = name_dtheta()
# Get geometric dimension
formdata = get_global_context_value('formdata')
dim = formdata.geometric_dimension
for i in range(dim):
calls = [prim.Call(ColMajorAccess(theta), (prim.Variable(i), prim.Variable(j)))
for (i, j) in zip(quad_inames, inames)]
calls[i] = prim.Call(ColMajorAccess(dtheta), (prim.Variable(quad_inames[i]), prim.Variable(inames[i])))
calls = tuple(calls)
# assignee = prim.Subscript(prim.Variable(name), tuple(prim.Variable(0)))
assignee = prim.Subscript(prim.Variable(name), (i,))
# assignee = prim.Variable(name)
expression = prim.Product(calls)
instruction(assignee=assignee,
expression=expression,
forced_iname_deps=frozenset(quad_inames + inames),
forced_iname_deps_is_final=True,
)
def pymbolic_reference_gradient(element, restriction, number):
assert number == 1
# TODO: Change name?
name = "js_{}".format(FEM_name_mangling(element))
name = restricted_name(name, restriction)
evaluate_reference_gradient(element, name, restriction)
return prim.Variable(name)
......@@ -7,7 +7,7 @@ from dune.perftool.generation import (backend,
temporary_variable,
)
from dune.perftool.sumfact.amatrix import (name_number_of_basis_functions_per_direction,
from dune.perftool.sumfact.amatrix import (name_number_of_quadrature_points_per_direction,
name_oned_quadrature_points,
name_oned_quadrature_weights,
)
......@@ -71,7 +71,7 @@ def pymbolic_base_weight():
@iname
def sumfact_quad_iname(d, context):
name = "quad_{}_{}".format(context, d)
domain(name, name_number_of_basis_functions_per_direction())
domain(name, name_number_of_quadrature_points_per_direction())
return name
......
import copy
from pymbolic.mapper.substitutor import substitute
from dune.perftool.pdelab.argument import (name_accumulation_variable,
name_coefficientcontainer,
pymbolic_coefficient,
......@@ -55,10 +59,10 @@ def sumfact_iname(bound, _type):
return name
def setup_theta(element, restriction, component, a_matrices):
def setup_theta(element, restriction, component, a_matrices, buffer_name):
number_basis = product(mat.cols for mat in a_matrices)
shape = (number_basis,)
inp = get_buffer_temporary("buffer",
inp = get_buffer_temporary(buffer_name,
shape=shape)
silenced_warning('read_no_write({})'.format(inp))
......@@ -74,8 +78,9 @@ def setup_theta(element, restriction, component, a_matrices):
def name_test_function_contribution(test):
count = get_counter('sumfact_contribution_counter')
grad = "grad_" if test.reference_grad else ""
return restricted_name("contrib_{}phi".format(grad), test.restriction)
return restricted_name("contrib_{}phi_{}".format(grad, str(count)), test.restriction)
@backend(interface="accum_insn", name="sumfact")
......@@ -89,73 +94,113 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
formdata = get_global_context_value('formdata')
dim = formdata.geometric_dimension
# Get the a matrices needed for this accumulation term
theta_transposed = name_theta_transposed()
rows = basis_functions_per_direction()
cols = quadrature_points_per_direction()
a_matrix = AMatrix(theta_transposed, rows, cols)
a_matrices = (a_matrix, a_matrix)
# Get a matrix that holds the result of the pymbolic_expr at all quadrature points
buffer = name_test_function_contribution(accterm.argument)
temp = initialize_buffer(buffer,
base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
num=2
).get_temporary(shape=(quadrature_points_per_direction(),) * dim)
# Issue an instruction in the quadrature loop that fills the buffer
# with the evaluation of the contribution at all quadrature points
insn_dep = instruction(assignee=Subscript(Variable(temp), tuple(Variable(i) for i in quadrature_inames())),
expression=pymbolic_expr,
forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
forced_iname_deps_is_final=True,
)
# Add a sum factorization kernel that implements the multiplication
# with the test function (step 3)
result, insn_dep = sum_factorization_kernel(a_matrices,
buffer,
insn_dep=frozenset({insn_dep}),
additional_inames=frozenset(visitor.inames),
)
inames = tuple(sumfact_iname(mat.rows, 'accum') for mat in a_matrices)
# Collect the lfs and lfs indices for the accumulate call
test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure)
test_lfs.index = flatten_index(tuple(Variable(i) for i in inames),
(basis_functions_per_direction(),) * dim
)
# In the jacobian case, also determine the space for the ansatz space
ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure)
rank = 2 if visitor.inames else 1
if rank == 2:
ansatz_lfs.index = flatten_index(tuple(Variable(i) for i in visitor.inames),
(basis_functions_per_direction(),) * dim
)
# Construct the expression representing "{r,jac}.accumulate(..)"
accum = name_accumulation_variable()
expr = Call(PDELabAccumulationFunction(accum, rank),
(ansatz_lfs.get_args() +
test_lfs.get_args() +
(Subscript(Variable(result), tuple(Variable(i) for i in inames)),)
)
)
instruction(assignees=(),
expression=expr,
forced_iname_deps=frozenset(inames + visitor.inames),
forced_iname_deps_is_final=True,
depends_on=insn_dep,
)
# Mark the transformation that moves the quadrature loop inside the trialfunction loops for application
transform(nest_quadrature_loops, visitor.inames)
def sum_factorization_kernel(a_matrices, buffer, insn_dep=frozenset({}), additional_inames=frozenset({})):
# Collect buffers we need
buffers = []
# If this is a gradient, we find the gradient iname
additional_inames = frozenset({})
if accterm.argument.index:
for i in accterm.argument.index._indices:
if i not in visitor.dimension_indices:
from dune.perftool.pdelab.localoperator import grad_iname
additional_inames = additional_inames.union(frozenset({grad_iname(i, dim)}))
for i in range(dim):
buffers.append(name_test_function_contribution(accterm.argument))
else:
buffers.append(name_test_function_contribution(accterm.argument))
# TODO covers only 2D
for i, buf in enumerate(buffers):
# Get the a matrices needed for this accumulation term
theta_transposed = name_theta_transposed()
rows = basis_functions_per_direction()
cols = quadrature_points_per_direction()
theta_matrix = AMatrix(theta_transposed, rows, cols)
# If this is a gradient we need different matrices
if accterm.argument.index:
from dune.perftool.sumfact.amatrix import name_dtheta_transposed
dtheta_transposed = name_dtheta_transposed()
rows = basis_functions_per_direction()
cols = quadrature_points_per_direction()
dtheta_matrix = AMatrix(dtheta_transposed, rows, cols)
a_matrices = [theta_matrix] * dim
a_matrices[i] = dtheta_matrix
a_matrices = tuple(a_matrices)
else:
a_matrices = (theta_matrix, theta_matrix)
temp = initialize_buffer(buf,
base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices),
num=2
).get_temporary(shape=(quadrature_points_per_direction(),) * dim,
dim_tags="f,f")
# Replace gradient iname with correct index for assignement
replace_dict = {}
expression = copy.deepcopy(pymbolic_expr)
for iname in additional_inames:
replace_dict[Variable(iname)] = i
expression = substitute(expression, replace_dict)
# Issue an instruction in the quadrature loop that fills the buffer
# with the evaluation of the contribution at all quadrature points
assignee = Subscript(Variable(temp), tuple(Variable(i) for i in quadrature_inames()))
insn_dep = instruction(assignee=assignee,
expression=expression,
forced_iname_deps=frozenset(quadrature_inames() + visitor.inames),
forced_iname_deps_is_final=True,
)
# Add a sum factorization kernel that implements the multiplication
# with the test function (step 3)
result, insn_dep = sum_factorization_kernel(a_matrices,
buf,
insn_dep=frozenset({insn_dep}),
additional_inames=frozenset(visitor.inames),
)
inames = tuple(sumfact_iname(mat.rows, 'accum') for mat in a_matrices)
# Collect the lfs and lfs indices for the accumulate call
test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure)
test_lfs.index = flatten_index(tuple(Variable(i) for i in inames),
(basis_functions_per_direction(),) * dim,
order="f"
)
# In the jacobian case, also determine the space for the ansatz space
ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure)
rank = 2 if visitor.inames else 1
if rank == 2:
ansatz_lfs.index = flatten_index(tuple(Variable(i) for i in visitor.inames),
(basis_functions_per_direction(),) * dim,
order="f"
)
# Construct the expression representing "{r,jac}.accumulate(..)"
accum = name_accumulation_variable()
expr = Call(PDELabAccumulationFunction(accum, rank),
(ansatz_lfs.get_args() +
test_lfs.get_args() +
(Subscript(Variable(result), tuple(Variable(i) for i in inames)),)
)
)
instruction(assignees=(),
expression=expr,
forced_iname_deps=frozenset(inames + visitor.inames),
forced_iname_deps_is_final=True,
depends_on=insn_dep,
)
# Mark the transformation that moves the quadrature loop inside the trialfunction loops for application
transform(nest_quadrature_loops, visitor.inames)
def sum_factorization_kernel(a_matrices, buf, insn_dep=frozenset({}), additional_inames=frozenset({})):
"""
Calculate a sum factorization matrix product.
......@@ -168,7 +213,7 @@ def sum_factorization_kernel(a_matrices, buffer, insn_dep=frozenset({}), additio
a_matrices: An iterable of AMatrix instances
The list of tensors to be applied to the input.
Order of application is from 0 up.
buffer: A string identifying the flip flop buffer in use
buf: A string identifying the flip flop buffer in use
for intermediate results. The memory is expected to be
pre-initialized with the input.
insn_dep: an instruction ID that the first issued instruction
......@@ -180,21 +225,27 @@ def sum_factorization_kernel(a_matrices, buffer, insn_dep=frozenset({}), additio
# as a column-major matrix. In later iteration of the amatrix loop
# this reinterprets the output of the previous iteration.
inp_shape = (a_matrix.cols, product(mat.rows for mat in a_matrices[:l]) * product(mat.cols for mat in a_matrices[l + 1:]))
inp = get_buffer_temporary(buffer,
inp = get_buffer_temporary(buf,
shape=inp_shape,
dim_tags="f,f")
# The input temporary will only be read from, so we need to silence the loopy warning
silenced_warning('read_no_write({})'.format(inp))
switch_base_storage(buffer)
switch_base_storage(buf)
# Get a temporary that interprets the base storage of the output
# as row-major matrix
out_shape = (a_matrix.rows, product(mat.rows for mat in a_matrices[:l]) * product(mat.cols for mat in a_matrices[l + 1:]))
out = get_buffer_temporary(buffer,
# Do not rotate in last step
out_tags = "c,c"
if l == len(a_matrices) - 1:
out_tags = "f,f"
out = get_buffer_temporary(buf,
shape=out_shape,
dim_tags="c,c")
dim_tags=out_tags)
# Get the inames needed for one matrix-matrix multiplication
i = sumfact_iname(out_shape[0], "row")
......@@ -217,4 +268,8 @@ def sum_factorization_kernel(a_matrices, buffer, insn_dep=frozenset({}), additio
)
})
out = get_buffer_temporary(buf,
shape=out_shape,
dim_tags="c,c")
return out, insn_dep
# # 1. Poisson Test Case: source f, bc g
# dune_add_formcompiler_system_test(UFLFILE poisson.ufl
# BASENAME sumfact_poisson
# INIFILE poisson.mini
# )
#
# 1. Poisson Test Case with order 1
dune_add_formcompiler_system_test(UFLFILE poisson_order1.ufl
BASENAME sumfact_poisson_order1
INIFILE poisson_order1.mini
)
# 1. Poisson Test Case with order 2
dune_add_formcompiler_system_test(UFLFILE poisson_order2.ufl
BASENAME sumfact_poisson_order2
INIFILE poisson_order2.mini
)
# # 2. Poisson Test Case: DG, f + pure dirichlet
#dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl
# dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl
# BASENAME sumfact_poisson_dg
# INIFILE poisson_dg.mini
# )
__name = sumfact_poisson_dg_only_volume_{__exec_suffix}
__exec_suffix = {__num_suffix}_{__sumfact_suffix}
__num_suffix = numdiff, symdiff |expand num
__sumfact_suffix = normal, sumfact | expand sumf
cells = 1 1
extension = 1. 1.
printresidual = 1
printmatrix = 1
[wrapper.vtkcompare]
name = {__name}
extension = vtu
[formcompiler]
numerical_jacobian = 1, 0 | expand num
sumfact = 0, 1 | expand sumf
print_transformations = 1
print_transformations_dir = .
cell = "quadrilateral"
f = Expression("return -2.0*x.size();", cell=cell)
g = Expression("return x.two_norm2();", on_intersection=True, cell=cell)
V = FiniteElement("DG", cell, 1)
u = TrialFunction(V)
v = TestFunction(V)
n = FacetNormal(cell)('+')
gamma = 1.0
theta = 1.0
r = inner(grad(u), grad(v))*dx \
- f*v*dx
forms = [r]
\ No newline at end of file
__name = sumfact_poisson_{__exec_suffix}
__name = sumfact_poisson_order1_{__exec_suffix}
__exec_suffix = numdiff, symdiff | expand num
cells = 1 1
cells = 8 8
extension = 1. 1.
[wrapper.vtkcompare]
......@@ -12,5 +12,5 @@ extension = vtu
[formcompiler]
numerical_jacobian = 1, 0 | expand num
exact_solution_expression = g
compare_l2errorsquared = 1e-6
compare_l2errorsquared = 1e-4
sumfact = 1
__name = sumfact_poisson_order2_{__exec_suffix}
__exec_suffix = numdiff, symdiff | expand num
cells = 8 8
extension = 1. 1.
[wrapper.vtkcompare]
name = {__name}
reference = poisson_ref
extension = vtu
[formcompiler]
numerical_jacobian = 1, 0 | expand num
exact_solution_expression = g
compare_l2errorsquared = 1e-8
sumfact = 1
cell = "quadrilateral"
f = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return 4.*(1.-c.two_norm2())*std::exp(-1.*c.two_norm2());", cell=cell)
g = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return std::exp(-1.*c.two_norm2());", cell=cell)
V = FiniteElement("CG", cell, 2, dirichlet_expression=g)
u = TrialFunction(V)
v = TestFunction(V)
forms = [(inner(grad(u), grad(v)) - f*v)*dx]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment