Newer
Older
form_option_context,
)
from dune.codegen.generation import (accumulation_mixin,
constructor_parameter,
domain,
dump_accumulate_timer,
end_of_file,
function_mangler,
generator_factory,
get_global_context_value,
global_context,
iname,
include_file,
initializer_list,
post_include,
retrieve_cache_functions,
retrieve_cache_items,
ReturnArg,
run_hook,
template_parameter,
Dominic Kempf
committed
from dune.codegen.loopy.target import type_floatingpoint
from dune.codegen.ufl.modified_terminals import Restriction
from pymbolic.primitives import Variable
from pytools import Record, ImmutableRecord
Dominic Kempf
committed
import cgen
def lop_template_ansatz_gfs():
name = "GFSU"
constructor_parameter("const {}&".format(name), name_ansatz_gfs_constructor_param(), classtag="operator")
return name
def name_ansatz_gfs_constructor_param():
return "gfsu"
def lop_template_test_gfs():
name = "GFSV"
constructor_parameter("const {}&".format(name), name_test_gfs_constructor_param(), classtag="operator")
return name
def name_test_gfs_constructor_param():
return "gfsv"
def lop_domain_field(name):
# TODO: Rethink for not Galerkin Method
gfs = lop_template_ansatz_gfs()
return "using {} = typename {}::Traits::GridView::ctype;".format(name, gfs)
def name_domain_field():
name = "DF"
lop_domain_field(name)
return name
def lop_template_gfs(ma):
from ufl.classes import Argument, Coefficient
if isinstance(ma.argexpr, Argument):
if ma.argexpr.number() == 0:
return lop_template_test_gfs()
if ma.argexpr.number() == 1:
return lop_template_ansatz_gfs()
if isinstance(ma.argexpr, Coefficient):
# Index 0 is reserved for trialfunction, index 1 is reserved for jacobian apply function
assert ma.argexpr.count() < 2
return lop_template_ansatz_gfs()
assert False
def name_initree_constructor_param():
return "iniParams"
param_name = name_initree_constructor_param()
include_file('dune/common/parametertree.hh', filetag="operatorfile")
constructor_parameter("const Dune::ParameterTree&", param_name, classtag="operator")
initializer_list(name, [param_name], classtag="operator")
return "const Dune::ParameterTree& {};".format(name)
return "enum {{ doPattern{} = true }};".format(which)
from dune.codegen.generation import get_global_context_value
integral_type = get_global_context_value("integral_type")
from dune.codegen.pdelab.signatures import ufl_measure_to_pdelab_measure
return _enum_pattern(ufl_measure_to_pdelab_measure(integral_type))
def _pattern_baseclass(measure):
return base_class('Dune::PDELab::Full{}Pattern'.format(measure), classtag="operator")
def pattern_baseclass():
from dune.codegen.generation import get_global_context_value
integral_type = get_global_context_value("integral_type")
from dune.codegen.pdelab.signatures import ufl_measure_to_pdelab_measure
return _pattern_baseclass(ufl_measure_to_pdelab_measure(integral_type))
return "enum {{ doAlpha{} = true }};".format(which)
from dune.codegen.generation import get_global_context_value
integral_type = get_global_context_value("integral_type")
from dune.codegen.pdelab.signatures import ufl_measure_to_pdelab_measure
return _enum_alpha(ufl_measure_to_pdelab_measure(integral_type))
@class_member(classtag="operator")
def enum_skeleton_twosided():
return "enum { doSkeletonTwoSided = true };"
def localoperator_basename(form_ident):
return get_form_option("classname", form_ident)
def name_gridfunction_member(coeff, restriction, diffOrder=0):
# We reuse the grid function for volume integrals in skeleton integrals
if restriction == Restriction.POSITIVE:
restriction = Restriction.NONE
restr = "_n" if restriction == Restriction.NEGATIVE else ""
name = "local_gridfunction_coeff{}_diff{}{}".format(coeff.count(), diffOrder, restr)
define_gridfunction_member(name, coeff, restriction, diffOrder)
return name
def name_gridfunction_constructor_argument(coeff):
_type = type_gridfunction_template_parameter(coeff)
name = "gridfunction_coeff{}_".format(coeff.count())
constructor_parameter("const {}&".format(_type), name, classtag="operator")
return name
@class_member(classtag="operator")
def define_gridfunction_member(name, coeff, restriction, diffOrder):
_type = type_gridfunction_template_parameter(coeff)
param = name_gridfunction_constructor_argument(coeff)
if diffOrder > 0:
other = name_gridfunction_member(coeff, restriction, diffOrder - 1)
init = "derivative({})".format(other)
initializer_list(name, [init], classtag="operator")
return "mutable decltype({}) {};".format(init, name)
init = "localFunction({})".format(param)
initializer_list(name, [init], classtag="operator")
return "mutable typename {}::LocalFunction {};".format(_type, name)
@template_parameter(classtag="operator")
def type_gridfunction_template_parameter(coeff):
return "GRIDFUNCTION_COEFF{}".format(coeff.count())
def class_type_from_cache(classtag):
from dune.codegen.generation import retrieve_cache_items
# get the basename
basename = [i for i in retrieve_cache_items(condition="{} and basename".format(classtag))]
assert len(basename) == 1
basename = basename[0]
# get the template parameters
tparams = [i for i in retrieve_cache_items(condition="{} and template_param".format(classtag))]
tparam_str = ''
if len(tparams) > 0:
tparam_str = '<{}>'.format(', '.join(t for t in tparams))
return basename, basename + tparam_str
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
class AccumulationSpace(Record):
def __init__(self,
lfs=None,
index=None,
restriction=None,
element=None,
):
Record.__init__(self,
lfs=lfs,
index=index,
restriction=restriction,
element=element,
)
def get_args(self):
if self.lfs is None:
return ()
else:
return (self.lfs, self.index)
def get_restriction(self):
if self.restriction is None:
return ()
else:
return (self.restriction,)
# TODO maybe move this onto the visitor as a helper function?
def determine_accumulation_space(info, number):
if info is None:
return AccumulationSpace()
assert info is not None
element = info.element
subel = element
from ufl import MixedElement
if isinstance(element, MixedElement):
subel = element.extract_component(info.element_index)[1]
# And generate a local function space for it!
from dune.codegen.pdelab.spaces import name_lfs, name_lfs_bound, lfs_iname
lfs = name_lfs(element, info.restriction, info.element_index)
from loopy.types import NumpyType
valuearg(lfs, dtype=NumpyType("str"))
if get_form_option("blockstructured"):
from dune.codegen.blockstructured.tools import micro_index_to_macro_index
from dune.codegen.blockstructured.spaces import lfs_inames
Marcel Koch
committed
lfsi = micro_index_to_macro_index(subel, lfs_inames(subel, info.restriction, count=number))
lfsi = Variable(lfs_iname(subel, info.restriction, count=number))
# If the LFS is not yet a pymbolic expression, make it one
from pymbolic.primitives import Expression
if not isinstance(lfs, Expression):
lfs = Variable(lfs)
return AccumulationSpace(lfs=lfs,
index=lfsi,
restriction=info.restriction,
def boundary_predicates(measure, subdomain_id):
predicates = []
if subdomain_id not in ['everywhere', 'otherwise']:
# Get the original form and inspect the present measures
from dune.codegen.generation import get_global_context_value
data = get_global_context_value("data")
original_form = data.object_by_name[get_form_option("form")]
subdomains = []
for integral in original_form.integrals():
if integral.integral_type() == measure:
subdomains.append(integral.subdomain_data())
subdomain_data, = set(subdomains)
from ufl.classes import Expr
if isinstance(subdomain_data, Expr):
visitor = get_visitor(measure, subdomain_id)
subdomain_data = visitor(subdomain_data, do_predicates=True)
# Often, boundary predicates are just nested conditionals. We try to find
# out which of these are unfulfillable in the first place.
def possible_values(expr):
if isinstance(expr, prim.If):
return set(possible_values(expr.then)).union(possible_values(expr.else_))
else:
return set({expr})
if subdomain_id not in possible_values(subdomain_data):
return frozenset({False})
p = prim.Comparison(subdomain_data, '==', subdomain_id)
# Try to find conditions that are always 0 or always 1
from pymbolic.mapper.evaluator import evaluate
try:
eval = evaluate(p)
if not eval:
except:
predicates.append(p)
return frozenset(predicates)
@accumulation_mixin("base")
class AccumulationMixinBase(object):
def get_accumulation_info(self, expr):
raise NotImplementedError
def list_accumulation_infos(self, expr):
raise NotImplementedError
def generate_accumulation_instruction(self, expr):
raise NotImplementedError
@accumulation_mixin("generic")
class GenericAccumulationMixin(AccumulationMixinBase):
def get_accumulation_info(self, expr):
restriction = self.restriction
if self.measure == 'exterior_facet':
restriction = Restriction.POSITIVE
return get_accumulation_info(expr, restriction, self.indices, self)
def list_accumulation_infos(self, expr):
return list_accumulation_infos(expr, self)
def generate_accumulation_instruction(self, expr):
return generate_accumulation_instruction(expr, self)
class PDELabAccumulationInfo(ImmutableRecord):
def __init__(self,
element=None,
element_index=0,
restriction=None,
inames=(),
):
ImmutableRecord.__init__(self,
element=element,
element_index=element_index,
restriction=restriction,
inames=inames,
)
def __eq__(self, other):
return type(self) == type(other) and self.element_index == other.element_index and self.restriction == other.restriction
def __hash__(self):
return (self.element_index, self.restriction)
def _list_infos(expr, number, visitor):
from dune.codegen.ufl.modified_terminals import extract_modified_arguments
ma = extract_modified_arguments(expr, argnumber=number)
if len(ma) == 0:
if number == 1:
yield None
return
element = ma[0].argexpr.ufl_element()
if visitor.measure == "cell":
restrictions = (Restriction.NONE,)
elif visitor.measure == "exterior_facet":
restrictions = (Restriction.POSITIVE,)
elif visitor.measure == "interior_facet":
restrictions = (Restriction.POSITIVE, Restriction.NEGATIVE)
for res in restrictions:
for ei in range(element.value_size()):
yield PDELabAccumulationInfo(element_index=ei, restriction=res)
def list_accumulation_infos(expr, visitor):
testgen = _list_infos(expr, 0, visitor)
trialgen = _list_infos(expr, 1, visitor)
import itertools
return itertools.product(testgen, trialgen)
@kernel_cached
def get_accumulation_info(expr, restriction, indices, visitor):
element = expr.ufl_element()
leaf_element = element
element_index = 0
from ufl import MixedElement
if isinstance(expr.ufl_element(), MixedElement):
element_index = indices[0]
leaf_element = element.extract_component(element_index)[1]
inames = visitor.lfs_inames(leaf_element,
restriction,
expr.number()
)
return PDELabAccumulationInfo(element=expr.ufl_element(),
element_index=element_index,
restriction=restriction,
inames=inames,
)
def generate_accumulation_instruction(expr, visitor):
# Collect the lfs and lfs indices for the accumulate call
test_lfs = determine_accumulation_space(visitor.test_info, 0)
# In the jacobian case, also determine the space for the ansatz space
ansatz_lfs = determine_accumulation_space(visitor.trial_info, 1)
# Collect the lfs and lfs indices for the accumulate call
from dune.codegen.pdelab.argument import name_accumulation_variable
accumvar = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction())
predicates = boundary_predicates(visitor.measure, visitor.subdomain_id)
rank = 1 if ansatz_lfs.lfs is None else 2
from dune.codegen.pdelab.argument import PDELabAccumulationFunction
from pymbolic.primitives import Call
accexpr = Call(PDELabAccumulationFunction(accumvar, rank),
(test_lfs.get_args() + ansatz_lfs.get_args() + (expr,))
)
quad_inames = visitor.quadrature_inames()
lfs_inames = frozenset(visitor.test_info.inames)
if visitor.trial_info:
lfs_inames = lfs_inames.union(visitor.trial_info.inames)
instruction(assignees=(),
expression=accexpr,
forced_iname_deps=lfs_inames.union(frozenset(quad_inames)),
forced_iname_deps_is_final=True,
predicates=predicates
)
def get_visitor(measure, subdomain_id):
# Construct the visitor taking into account geometry mixins
mixins = get_form_option("geometry_mixins").split(",")
VisitorType = construct_from_mixins(base=UFL2LoopyVisitor, mixins=mixins, mixintype="geometry", name="UFLVisitor")
# Mix quadrature mixins in
mixins = get_form_option("quadrature_mixins").split(",")
VisitorType = construct_from_mixins(base=VisitorType, mixins=mixins, mixintype="quadrature", name="UFLVisitor")
# Mix basis mixins in
mixins = get_form_option("basis_mixins").split(",")
VisitorType = construct_from_mixins(base=VisitorType, mixins=mixins, mixintype="basis", name="UFLVisitor")
# Mix accumulation mixins in
mixins = get_form_option("accumulation_mixins").split(",")
VisitorType = construct_from_mixins(base=VisitorType, mixins=mixins, mixintype="accumulation", name="UFLVisitor")
def visit_integral(integral):
integrand = integral.integrand()
measure = integral.integral_type()
subdomain_id = integral.subdomain_id()
# Avoid even visiting the integral, if it is noop
predicates = boundary_predicates(measure, subdomain_id)
if False in predicates:
return
# Start the visiting process!
visitor = get_visitor(measure, subdomain_id)
with global_context(visitor=visitor):
visitor.accumulate(integrand)
run_hook(name="after_visit",
args=(visitor,),
)
def generate_kernel(integrals):
# Visit all integrals once to collect information (dry-run)!
logger.debug('generate_kernel: visit_integrals (dry run)')
with global_context(dry_run=True):
for integral in integrals:
visit_integral(integral)
# Now perform some checks on what should be done
from dune.codegen.sumfact.vectorization import decide_vectorization_strategy
logger.debug('generate_kernel: decide_vectorization_strategy')
decide_vectorization_strategy()
# Delete the cache contents and do the real thing!
logger.debug('generate_kernel: visit_integrals (no dry run)')
delete_cache_items("kernel_default")
for integral in integrals:
visit_integral(integral)
from dune.codegen.pdelab.signatures import kernel_name, assembly_routine_signature
name = kernel_name()
signature = assembly_routine_signature()
knl = extract_kernel_from_cache("kernel_default", name, signature)
delete_cache_items("kernel_default")
# Reset the quadrature degree
from dune.codegen.sumfact.tabulation import set_quadrature_points
set_quadrature_points(None)
# Clean the cache from any data collected after the dry run
delete_cache_items("dryrundata")
# Run preprocessing from custom user code
knl = run_hook(name="loopy_kernel",
args=(ReturnArg(knl),),
)
Dominic Kempf
committed
def generate_kernels_per_integral(integrals):
if get_form_option("sumfact"):
from dune.codegen.sumfact.switch import sumfact_generate_kernels_per_integral
for k in sumfact_generate_kernels_per_integral(integrals):
yield k
else:
yield generate_kernel(integrals)
Dominic Kempf
committed
def extract_kernel_from_cache(tag, name, signature, wrap_in_cgen=True, add_timings=True):
# Now extract regular loopy kernel components
domains = [i for i in retrieve_cache_items("{} and domain".format(tag))]
if not domains:
domains = ["{[stupid] : 0<=stupid<1}"]
instructions = [i for i in retrieve_cache_items("{} and instruction".format(tag))]
substrules = [i for i in retrieve_cache_items("{} and substrule".format(tag)) if i is not None]
temporaries = {i.name: i for i in retrieve_cache_items("{} and temporary".format(tag))}
arguments = [i for i in retrieve_cache_items("{} and argument".format(tag))]
silenced = [l for l in retrieve_cache_items("{} and silenced_warning".format(tag))]
transformations = [t for t in retrieve_cache_items("{} and transformation".format(tag))]
# Construct an options object
from loopy import Options
opt = Options(ignore_boostable_into=True,
check_dep_resolution=False,
enforce_variable_access_ordered="no_check",
kernel = make_kernel(domains,
arguments,
temporary_variables=temporaries,
target=DuneTarget(),
options=opt,
silenced_warnings=silenced,
lang_version=(2017, 2, 1),
from loopy import make_reduction_inames_unique
kernel = make_reduction_inames_unique(kernel)
# Apply the transformations that were gathered during tree traversals
for trafo in transformations:
# Apply performance transformations
from dune.codegen.loopy.transformations.performance import performance_transformations
kernel = performance_transformations(kernel, signature)
# Maybe apply vectorization strategies
if get_form_option("vectorization_quadloop") and get_form_option("sumfact"):
from dune.codegen.loopy.transformations.vectorize_quad import vectorize_quadrature_loop
kernel = vectorize_quadrature_loop(kernel)
if get_form_option("vectorization_blockstructured"):
from dune.codegen.blockstructured.vectorization import vectorize_micro_elements
Marcel Koch
committed
kernel = vectorize_micro_elements(kernel)
# Now add the preambles to the kernel
preambles = [(i, p) for i, p in enumerate(retrieve_cache_items("{} and preamble".format(tag)))]
kernel = kernel.copy(preambles=preambles)
# Remove inames that have become obsolete
kernel = lp.remove_unused_inames(kernel)
# Do the loopy preprocessing!
kernel = preprocess_kernel(kernel)
# *REALLY* ignore boostability. This is - so far - necessary due to a mystery bug.
kernel = kernel.copy(instructions=[i.copy(boostable=False, boostable_into=frozenset()) for i in kernel.instructions])
from dune.codegen.loopy.transformations.matchfma import match_fused_multiply_add
# Add instrumentation to the kernel
from dune.codegen.loopy.transformations.instrumentation import add_instrumentation
if add_timings and get_form_option("sumfact"):
from dune.codegen.pdelab.signatures import assembler_routine_name
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage1"), "{}_kernel_stage1".format(assembler_routine_name()), 4)
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage2"), "{}_kernel_quadratureloop".format(assembler_routine_name()), 4, depends_on=frozenset({lp.match.Tagged("sumfact_stage1")}))
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage3"), "{}_kernel_stage3".format(assembler_routine_name()), 4, depends_on=frozenset({lp.match.Tagged("sumfact_stage2")}))
Dominic Kempf
committed
if wrap_in_cgen:
# Wrap the kernel in something which can generate code
if signature is None:
from dune.codegen.pdelab.signatures import assembly_routine_signature
signature = assembly_routine_signature()
kernel = LoopyKernelMethod(signature, kernel, add_timings=add_timings)
Dominic Kempf
committed
def name_time_dumper_os():
return "os"
def name_time_dumper_reset():
return "reset"
def name_time_dumper_ident():
return "ident"
@generator_factory(item_tags=("cached",), cache_key_generator=lambda **kw: None)
def name_example_kernel(name=None):
return name
class TimerMethod(ClassMember):
def __init__(self):
os = name_time_dumper_os()
reset = name_time_dumper_reset()
ident = name_time_dumper_ident()
knl = name_example_kernel()
assert(knl is not None)
"void dump_timers(Stream& {}, std::string {}, bool {})".format(os, ident, reset),
dump_timers = [i for i in retrieve_cache_items(condition='dump_timers')]
content.extend(map(lambda x: ' ' + x, dump_timers))
content.append("}")
ClassMember.__init__(self, content)
class RegisterLikwidMethod(ClassMember):
def __init__(self):
knl = name_example_kernel()
assert(knl is not None)
content = ["void register_likwid_timers()"
"{"]
register_liwkid_timers = [i for i in retrieve_cache_items(condition='register_likwid_timers')]
content.extend(map(lambda x: ' ' + x, register_liwkid_timers))
content += ["}"]
ClassMember.__init__(self, content)
class RegisterSSCMarksMethod(ClassMember):
def __init__(self):
knl = name_example_kernel()
assert(knl is not None)
content = ["void dump_ssc_marks()"
"{"]
register_liwkid_timers = [i for i in retrieve_cache_items(condition='register_ssc_marks')]
content.extend(map(lambda x: ' ' + x, register_liwkid_timers))
content += ["}"]
ClassMember.__init__(self, content)
class LoopyKernelMethod(ClassMember):
def __init__(self, signature, kernel, add_timings=True, initializer_list=[]):
from loopy import generate_body
from cgen import LiteralLines, Block
# Add initializer list if this is a constructor
if initializer_list:
content[-1] = content[-1] + " :"
for init in initializer_list[:-1]:
content.append(" " * 4 + init + ",")
content.append(" " * 4 + initializer_list[-1])
if add_timings and get_option('instrumentation_level') >= 3:
from dune.codegen.pdelab.signatures import assembler_routine_name
timer_name = assembler_routine_name() + '_kernel'
name_example_kernel(name=timer_name)
if get_option('use_likwid'):
from dune.codegen.pdelab.driver.timings import init_likwid_timer
include_file("likwid.h", filetag="operatorfile")
init_likwid_timer(timer_name)
content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(timer_name))
register_liwkid_timer(timer_name)
from dune.codegen.pdelab.driver.timings import get_region_marks, ssc_macro
post_include(ssc_macro(), filetag='operatorfile')
marks = get_region_marks(timer_name, driver=False)
content.append(' ' + '__SSC_MARK(0x{});'.format(marks[0]))
dump_ssc_marks(timer_name)
else:
post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
content.append(' ' + 'HP_TIMER_START({});'.format(timer_name))
dump_accumulate_timer(timer_name)
if add_timings and get_option("instrumentation_level") >= 4:
setuptimer = '{}_kernel_setup'.format(assembler_routine_name())
if get_option('use_likwid'):
from dune.codegen.pdelab.driver.timings import init_likwid_timer
init_likwid_timer(setuptimer)
content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(setuptimer))
register_liwkid_timer(setuptimer)
elif get_option('use_sde'):
from dune.codegen.pdelab.driver.timings import get_region_marks
setup_marks = get_region_marks(setuptimer, driver=False)
content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[0]))
dump_ssc_marks(setuptimer)
else:
post_include('HP_DECLARE_TIMER({});'.format(setuptimer), filetag='operatorfile')
content.append(' HP_TIMER_START({});'.format(setuptimer))
dump_accumulate_timer(setuptimer)
# Add kernel preamble
for i, p in kernel.preambles:
content.append(' ' + p)
if add_timings and get_option('instrumentation_level') >= 4:
if get_option('use_likwid'):
content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(setuptimer))
elif get_option('use_sde'):
content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[1]))
else:
content.append(' ' + 'HP_TIMER_STOP({});'.format(setuptimer))
content.extend(l for l in generate_body(kernel).split('\n')[1:-1])
if add_timings and get_option('instrumentation_level') >= 3:
if get_option('use_likwid'):
content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(timer_name))
elif get_option('use_sde'):
content.append(' ' + '__SSC_MARK(0x{});'.format(marks[1]))
else:
content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name))
ClassMember.__init__(self, content, name=kernel.name if kernel is not None else "")
from dune.codegen.generation import retrieve_cache_items
# Sort the given member functions by their name to help debugging by fixing
# the order
members = sorted(members, key=lambda m: m.name)
# Generate the name by concatenating basename and template parameters
basename, fullname = class_type_from_cache(tag)
base_classes = [bc for bc in retrieve_cache_items('{} and baseclass'.format(tag))]
constructor_params = [bc for bc in retrieve_cache_items('{} and constructor_param'.format(tag))]
il = [i for i in retrieve_cache_items('{} and initializer'.format(tag))]
pm = [m for m in retrieve_cache_items('{} and member'.format(tag))]
tparams = [i for i in retrieve_cache_items('{} and template_param'.format(tag))]
# Construct the constructor
constructor_knl = extract_kernel_from_cache(tag, "constructor_kernel", None, wrap_in_cgen=False, add_timings=False)
constructor_knl = constructor_knl.copy(target=DuneTarget(declare_temporaries=False))
signature = "{}({})".format(basename, ", ".join(next(iter(p.generate(with_semicolon=False))) for p in constructor_params))
constructor = LoopyKernelMethod([signature], constructor_knl, add_timings=False, initializer_list=il)
from loopy import get_one_scheduled_kernel
constructor_knl = get_one_scheduled_kernel(constructor_knl)
# Take any temporary declarations from the kernel and make them class members
target = DuneTarget()
from loopy.codegen import CodeGenerationState
codegen_state = CodeGenerationState(kernel=constructor_knl,
implemented_data_info=None,
implemented_domain=None,
implemented_predicates=frozenset(),
seen_dtypes=frozenset(),
seen_functions=frozenset(),
seen_atomic_dtypes=frozenset(),
var_subst_map={},
allow_complex=False,
is_generating_device_code=True,
)
Dominic Kempf
committed
decls = [cgen.Line("\n " + next(iter(d.generate()))) for d in target.get_device_ast_builder().get_temporary_decls(codegen_state, 0)]
return Class(basename, base_classes=base_classes, members=[constructor] + members + pm + decls, tparam_decls=tparams)
def local_operator_default_settings(operator, form):
# Manage includes and base classes that we always need
include_file('dune/pdelab/gridfunctionspace/gridfunctionspace.hh', filetag="operatorfile")
include_file('dune/pdelab/localoperator/idefault.hh', filetag="operatorfile")
include_file('dune/pdelab/localoperator/flags.hh', filetag="operatorfile")
include_file('dune/pdelab/localoperator/pattern.hh', filetag="operatorfile")
post_include("#pragma GCC diagnostic push", filetag="operatorfile")
post_include("#pragma GCC diagnostic ignored \"-Wsign-compare\"", filetag="operatorfile")
post_include("#pragma GCC diagnostic ignored \"-Wunused-variable\"", filetag="operatorfile")
post_include("#pragma GCC diagnostic ignored \"-Wunused-but-set-variable\"", filetag="operatorfile")
end_of_file("#pragma GCC diagnostic pop", filetag="operatorfile")
# Trigger this one once early on to assure that template
# parameters are set in the right order
localoperator_basename(operator)
lop_template_ansatz_gfs()
lop_template_test_gfs()
# Make sure there is always the same constructor arguments, even if some of them are
# not strictly needed. Also ensure the order.
name_initree_member()
# Iterate over the needed grid functions in correct order
for c in sorted(filter(lambda c: c.count() > 2, form.coefficients()), key=lambda c: c.count()):
name_gridfunction_constructor_argument(c)
# Add right base classes for stationary/instationary operators
base_class('Dune::PDELab::LocalOperatorDefaultFlags', classtag="operator")
if not is_stationary():
Dominic Kempf
committed
rf = type_floatingpoint()
base_class('Dune::PDELab::InstationaryLocalOperatorDefaultMethods<{}>'
.format(rf), classtag="operator")
# *always* add the volume pattern, PDELab cannot handle matrices without diagonal blocks
with global_context(integral_type="cell"):
enum_pattern()
pattern_baseclass()
if get_form_option("block_preconditioner_diagonal") or get_form_option("block_preconditioner_pointdiagonal"):
enum_skeleton_twosided()
def measure_is_enabled(measure):
option_dict = {"cell": "enable_volume",
"interior_facet": "enable_skeleton",
"exterior_facet": "enable_boundary",
}
return get_form_option(option_dict[measure])
def generate_residual_kernels(form, original_form):
if not get_form_option("generate_residuals"):
return {}
if get_form_option("block_preconditioner_pointdiagonal"):
from ufl import derivative
jacform = derivative(original_form, original_form.coefficients()[0])
from dune.codegen.ufl.preprocess import preprocess_form
jacform = preprocess_form(jacform).preprocessed_form
from dune.codegen.ufl.transformations.blockpreconditioner import diagonal_block_jacobian
form = diagonal_block_jacobian(jacform)
with global_context(form_type='residual'):
# Generate the necessary residual methods
for measure in set(i.integral_type() for i in form.integrals()):
logger.info("generate_residual_kernels: measure {}".format(measure))
if not measure_is_enabled(measure):
continue
with global_context(integral_type=measure):
from dune.codegen.pdelab.signatures import assembler_routine_name
with global_context(kernel=assembler_routine_name()):
kernel = [k for k in generate_kernels_per_integral(form.integrals_by_type(measure))]
# The integrals might vanish due to unfulfillable boundary conditions.
# We only generate the local operator enums/base classes if they did not.
if kernel:
enum_pattern()
pattern_baseclass()
enum_alpha()
# Maybe add numerical differentiation
if get_form_option("numerical_jacobian"):
# Include headers for numerical methods
include_file("dune/pdelab/localoperator/defaultimp.hh", filetag="operatorfile")
# Numerical jacobian base class
_, loptype = class_type_from_cache("operator")
from dune.codegen.pdelab.signatures import ufl_measure_to_pdelab_measure
which = ufl_measure_to_pdelab_measure(measure)
base_class("Dune::PDELab::NumericalJacobian{}<{}>".format(which, loptype), classtag="operator")
# Numerical jacobian initializer list
ini = name_initree_member()
ini_constructor = name_initree_constructor_param()
initializer_list("Dune::PDELab::NumericalJacobian{}<{}>".format(which, loptype),
["{}.get<double>(\"numerical_epsilon.{}\", 1e-9)".format(ini_constructor, ini, which.lower())],
classtag="operator",
)
# In the case of matrix free operator evaluation we need jacobian apply methods
if get_form_option("generate_jacobian_apply"):
if is_linear(original_form):
# Numeical jacobian apply base class
base_class("Dune::PDELab::NumericalJacobianApply{}<{}>".format(which, loptype), classtag="operator")
# Numerical jacobian apply initializer list
initializer_list("Dune::PDELab::NumericalJacobianApply{}<{}>".format(which, loptype),
["{}.get<double>(\"numerical_epsilon.{}\", 1e-9)".format(ini_constructor, ini, which.lower())],
)
else:
# Numerical nonlinear jacobian apply base class
base_class("Dune::PDELab::NumericalNonlinearJacobianApply{}<{}>".format(which, loptype), classtag="operator")
# Numerical nonlinear jacobian apply initializer list
initializer_list("Dune::PDELab::NumericalNonlinearJacobianApply{}<{}>".format(which, loptype),
["{}.get<double>(\"numerical_epsilon.{}\", 1e-9)".format(ini_constructor, ini, which.lower())],
def generate_jacobian_kernels(form, original_form):
logger = logging.getLogger(__name__)
from ufl import derivative
jacform = derivative(original_form, original_form.coefficients()[0])
jacform = preprocess_form(jacform).preprocessed_form
if get_form_option("block_preconditioner_diagonal"):
from dune.codegen.ufl.transformations.blockpreconditioner import diagonal_block_jacobian
jacform = diagonal_block_jacobian(jacform)
if get_form_option("block_preconditioner_offdiagonal"):
from dune.codegen.ufl.transformations.blockpreconditioner import offdiagonal_block_jacobian
jacform = offdiagonal_block_jacobian(jacform)
if get_form_option("generate_jacobian_apply"):
# The apply vector has reserved index 1 so we directly use Coefficient class from ufl
from ufl import Coefficient
apply_coefficient = Coefficient(form.coefficients()[0].ufl_element(), 1)
# Create application of jacobian on vector
from ufl import action
jac_apply_form = action(jacform, apply_coefficient)