from __future__ import absolute_import from os.path import splitext from dune.perftool.options import get_option from dune.perftool.generation import (backend, base_class, class_basename, class_member, constructor_parameter, domain, dump_accumulate_timer, generator_factory, get_backend, get_global_context_value, global_context, iname, include_file, initializer_list, post_include, retrieve_cache_functions, retrieve_cache_items, set_subst_rule, template_parameter, ) from dune.perftool.cgen.clazz import (AccessModifier, BaseClass, ClassMember, ) from dune.perftool.ufl.modified_terminals import Restriction import dune.perftool.loopy.mangler from pymbolic.primitives import Variable import pymbolic.primitives as prim from pytools import Record import ufl.classes as uc import loopy as lp import cgen def name_form(formdata, data): # Check wether the formdata has a name in UFL try: name = data.object_names[id(formdata.original_form)] return name except: for index, form in enumerate(data.forms): if formdata.preprocessed_form.equals(form): name = str(index) return name # If the form has no name and can not be found in data.forms something went wrong assert False def name_localoperator_file(formdata, data): from dune.perftool.options import get_option if len(data.forms) == 1: filename = get_option("operator_file") else: suffix = '_' + name_form(formdata, data) basename, extension = splitext(get_option("operator_file")) filename = basename + suffix + extension return filename @template_parameter(classtag="operator") def lop_template_ansatz_gfs(): return "GFSU" @template_parameter(classtag="operator") def lop_template_test_gfs(): return "GFSV" @template_parameter(classtag="operator") def lop_template_range_field(): return "RF" @class_member(classtag="operator") def lop_domain_field(name): # TODO: Rethink for not Galerkin Method gfs = lop_template_ansatz_gfs() return "using {} = typename {}::Traits::GridView::ctype;".format(name, gfs) def name_domain_field(): name = "DF" lop_domain_field(name) return name def lop_template_gfs(ma): from ufl.classes import Argument, Coefficient if isinstance(ma.argexpr, Argument): if ma.argexpr.number() == 0: return lop_template_test_gfs() if ma.argexpr.number() == 1: return lop_template_ansatz_gfs() if isinstance(ma.argexpr, Coefficient): # Index 0 is reserved for trialfunction, index 1 is reserved for jacobian apply function assert ma.argexpr.count() < 2 return lop_template_ansatz_gfs() assert False def name_initree_constructor_param(): return "iniParams" @class_member(classtag="operator") def define_initree(name): param_name = name_initree_constructor_param() include_file('dune/common/parametertree.hh', filetag="operatorfile") constructor_parameter("const Dune::ParameterTree&", param_name, classtag="operator") initializer_list(name, [param_name], classtag="operator") return "const Dune::ParameterTree& {};".format(name) @class_member(classtag="operator") def _enum_pattern(which): return "enum {{ doPattern{} = true }};".format(which) def enum_pattern(): from dune.perftool.generation import get_global_context_value integral_type = get_global_context_value("integral_type") from dune.perftool.pdelab.signatures import ufl_measure_to_pdelab_measure return _enum_pattern(ufl_measure_to_pdelab_measure(integral_type)) def _pattern_baseclass(measure): return base_class('Dune::PDELab::Full{}Pattern'.format(measure), classtag="operator") def pattern_baseclass(): from dune.perftool.generation import get_global_context_value integral_type = get_global_context_value("integral_type") from dune.perftool.pdelab.signatures import ufl_measure_to_pdelab_measure return _pattern_baseclass(ufl_measure_to_pdelab_measure(integral_type)) @class_member(classtag="operator") def _enum_alpha(which): return "enum {{ doAlpha{} = true }};".format(which) def enum_alpha(): from dune.perftool.generation import get_global_context_value integral_type = get_global_context_value("integral_type") from dune.perftool.pdelab.signatures import ufl_measure_to_pdelab_measure return _enum_alpha(ufl_measure_to_pdelab_measure(integral_type)) def name_initree_member(): define_initree("_iniParams") return "_iniParams" @class_basename(classtag="operator") def localoperator_basename(formdata, data): form_name = name_form(formdata, data) return "LocalOperator" + form_name.capitalize() def class_type_from_cache(classtag): from dune.perftool.generation import retrieve_cache_items # get the basename basename = [i for i in retrieve_cache_items(condition="{} and basename".format(classtag))] assert len(basename) == 1 basename = basename[0] # get the template parameters tparams = [i for i in retrieve_cache_items(condition="{} and template_param".format(classtag))] tparam_str = '' if len(tparams) > 0: tparam_str = '<{}>'.format(', '.join(t for t in tparams)) return basename, basename + tparam_str class AccumulationSpace(Record): def __init__(self, lfs=None, index=None, restriction=None, element=None, ): Record.__init__(self, lfs=lfs, index=index, restriction=restriction, element=element, ) def get_args(self): if self.lfs is None: return () else: return (self.lfs, self.index) def get_restriction(self): if self.restriction is None: return () else: return (self.restriction,) def determine_accumulation_space(expr, number, measure): from dune.perftool.ufl.modified_terminals import extract_modified_arguments args = extract_modified_arguments(expr, argnumber=number) if measure == 'exterior_facet': for ma in args: ma.restriction = Restriction.NEGATIVE # If this is a residual term we return a dummy object if len(args) == 0: return AccumulationSpace() ma = next(iter(args)) # Extract information on the finite element from ufl.functionview import select_subelement subel = select_subelement(ma.argexpr.ufl_element(), ma.component) # And generate a local function space for it! from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound, lfs_child, lfs_iname lfs = name_lfs(ma.argexpr.ufl_element(), ma.restriction, ma.component) from dune.perftool.generation import valuearg from loopy.types import NumpyType valuearg(lfs, dtype=NumpyType("str")) if len(subel.value_shape()) != 0: from dune.perftool.pdelab.geometry import dimension_iname idims = tuple(dimension_iname(context='arg', count=i) for i in range(len(subel.value_shape()))) lfs = lfs_child(lfs, idims, shape=subel.value_shape(), symmetry=subel.symmetry()) subel = subel.sub_elements()[0] lfsi = Variable(lfs_iname(subel, ma.restriction, count=number)) # If the LFS is not yet a pymbolic expression, make it one from pymbolic.primitives import Expression if not isinstance(lfs, Expression): lfs = Variable(lfs) return AccumulationSpace(lfs=lfs, index=lfsi, restriction=ma.restriction, ) def boundary_predicates(expr, measure, subdomain_id, visitor): predicates = frozenset([]) if subdomain_id not in ['everywhere', 'otherwise']: # We need to reconstruct the subdomain_data parameter of the measure # I am *totally* confused as to why this information is not at hand anyway, # but conversation with Martin pointed me to dolfin.fem.assembly where this # is done in preprocessing with the limitation of only one possible type of # modified measure per integral type. # Get the original form and inspect the present measures from dune.perftool.generation import get_global_context_value original_form = get_global_context_value("formdata").original_form sd = original_form.subdomain_data() assert len(sd) == 1 subdomains, = list(sd.values()) domain, = list(sd.keys()) for k in list(subdomains.keys()): if subdomains[k] is None: del subdomains[k] # Finally extract the original subdomain_data (which needs to be present!) assert measure in subdomains subdomain_data = subdomains[measure] from ufl.classes import Expr if isinstance(subdomain_data, Expr): cond = visitor(subdomain_data) else: # Determine the name of the parameter function cond = get_global_context_value("data").object_names[id(subdomain_data)] # Trigger the generation of code for this thing in the parameter class from ufl.checks import is_cellwise_constant cellwise_constant = is_cellwise_constant(expr) from dune.perftool.pdelab.parameter import intersection_parameter_function intersection_parameter_function(cond, subdomain_data, cellwise_constant, t='int32') cond = prim.Variable(cond) predicates = predicates.union([prim.Comparison(cond, '==', subdomain_id)]) return predicates @iname def grad_iname(index, dim): from dune.perftool.pdelab.index import name_index name = name_index(index) domain(name, dim) return name @backend(interface="accum_insn") def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): # First we do the tree traversal to get a pymbolic expression representing this expression pymbolic_expr = visitor(accterm.term) # If this is a gradient, we generate an iname additional_inames = frozenset({}) if accterm.new_indices is not None: from ufl.domain import find_geometric_dimension dim = find_geometric_dimension(accterm.argument.expr) for i in accterm.new_indices: if i not in visitor.dimension_indices: additional_inames = additional_inames.union(frozenset({grad_iname(i, dim)})) # It may happen that an entire accumulation term vanishes. We do nothing in that case if pymbolic_expr == 0: return # We also traverse the test function to get its pymbolic equivalent if accterm.new_indices is not None: from ufl.classes import Indexed, MultiIndex accum_expr = Indexed(accterm.argument.expr, MultiIndex(accterm.new_indices)) else: accum_expr = accterm.argument.expr test_expr = visitor(accum_expr) # Combine expression and test function from pymbolic.primitives import Product pymbolic_expr = Product((pymbolic_expr, test_expr)) # Collect the lfs and lfs indices for the accumulate call test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure) # In the jacobian case, also determine the space for the ansatz space ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure) from dune.perftool.pdelab.argument import name_accumulation_variable accumvar = name_accumulation_variable((test_lfs.get_restriction() + ansatz_lfs.get_restriction())) predicates = boundary_predicates(accterm.term, measure, subdomain_id, visitor) rank = 1 if ansatz_lfs.lfs is None else 2 from dune.perftool.pdelab.argument import PDELabAccumulationFunction from pymbolic.primitives import Call expr = Call(PDELabAccumulationFunction(accumvar, rank), (test_lfs.get_args() + ansatz_lfs.get_args() + (pymbolic_expr,)) ) from dune.perftool.generation import instruction from dune.perftool.options import option_switch quad_inames = visitor.interface.quadrature_inames() instruction(assignees=(), expression=expr, forced_iname_deps=additional_inames.union(frozenset(visitor.inames).union(frozenset(quad_inames))), forced_iname_deps_is_final=True, predicates=predicates ) def visit_integrals(integrals): for integral in integrals: integrand = integral.integrand() measure = integral.integral_type() subdomain_id = integral.subdomain_id() subdomain_data = integral.subdomain_data() # Maybe make the jacobian inverse diagonal! if get_option('diagonal_transformation_matrix'): if not get_option('turn_off_diagonal_jacobian'): from dune.perftool.ufl.transformations.axiparallel import diagonal_jacobian integrand = diagonal_jacobian(integrand) # Generate code for the LFS trees present in the form from dune.perftool.ufl.modified_terminals import extract_modified_arguments test_ma = extract_modified_arguments(integrand, argnumber=0) trial_ma = extract_modified_arguments(integrand, coeffcount=0) apply_ma = extract_modified_arguments(integrand, coeffcount=1) import itertools for ma in itertools.chain(test_ma, trial_ma, apply_ma): if measure == 'exterior_facet': ma.restriction = Restriction.NEGATIVE from dune.perftool.pdelab.spaces import traverse_lfs_tree traverse_lfs_tree(ma) # Now split the given integrand into accumulation expressions from dune.perftool.ufl.extract_accumulation_terms import split_into_accumulation_terms accterms = split_into_accumulation_terms(integrand) # Iterate over the terms and generate a kernel for accterm in accterms: # Get dimension indices from dune.perftool.ufl.dimensionindex import dimension_index_mapping indexmap = dimension_index_mapping(accterm.test_arg()) # For jacobian there can also be dimension indices in the expression indexmap.update(dimension_index_mapping(accterm.term)) # Get a transformer instance for this kernel if get_option('sumfact'): from dune.perftool.sumfact import SumFactInterface interface = SumFactInterface() else: from dune.perftool.pdelab import PDELabInterface interface = PDELabInterface() from dune.perftool.ufl.visitor import UFL2LoopyVisitor visitor = UFL2LoopyVisitor(interface, measure, indexmap) # Inspect the UFL file for manual common subexpression elimination stuff! data = get_global_context_value("data") for name, expr in data.object_by_name.items(): if name.startswith("cse"): set_subst_rule(name, expr) # Ensure CSE on detjac * quadrature weight domain = accterm.argument.argexpr.ufl_domain() if measure == "cell": set_subst_rule("integration_factor_cell1", uc.QuadratureWeight(domain) * uc.Abs(uc.JacobianDeterminant(domain))) set_subst_rule("integration_factor_cell2", uc.Abs(uc.JacobianDeterminant(domain)) * uc.QuadratureWeight(domain)) else: set_subst_rule("integration_factor_facet1", uc.FacetJacobianDeterminant(domain) * uc.QuadratureWeight(domain)) set_subst_rule("integration_factor_facet2", uc.QuadratureWeight(domain) * uc.FacetJacobianDeterminant(domain)) get_backend(interface="accum_insn")(visitor, accterm, measure, subdomain_id) def generate_kernel(integrals): # Visit all integrals once to collect information (dry-run)! with global_context(dry_run=True): visit_integrals(integrals) # Now perform some checks on what should be done from dune.perftool.sumfact.vectorization import decide_vectorization_strategy decide_vectorization_strategy() # Delete the cache contents and do the real thing! from dune.perftool.generation import delete_cache_items delete_cache_items("kernel_default") visit_integrals(integrals) knl = extract_kernel_from_cache("kernel_default") delete_cache_items("kernel_default") # Clean the cache from any data collected after the dry run delete_cache_items("dryrundata") return knl @backend(interface="generate_kernels_per_integral") def generate_kernels_per_integral(integrals): yield generate_kernel(integrals) def extract_kernel_from_cache(tag, wrap_in_cgen=True): # Now extract regular loopy kernel components from dune.perftool.loopy.target import DuneTarget domains = [i for i in retrieve_cache_items("{} and domain".format(tag))] if not domains: domains = ["{[stupid] : 0<=stupid<1}"] instructions = [i for i in retrieve_cache_items("{} and instruction".format(tag))] substrules = [i for i in retrieve_cache_items("{} and substrule".format(tag)) if i is not None] temporaries = {i.name: i for i in retrieve_cache_items("{} and temporary".format(tag))} arguments = [i for i in retrieve_cache_items("{} and argument".format(tag))] silenced = [l for l in retrieve_cache_items("{} and silenced_warning".format(tag))] transformations = [t for t in retrieve_cache_items("{} and transformation".format(tag))] # Construct an options object from loopy import Options opt = Options(ignore_boostable_into=True, check_dep_resolution=False, ) # Find a name for the kernel if wrap_in_cgen: from dune.perftool.pdelab.signatures import kernel_name name = kernel_name() else: name = "constructor_kernel" # Create the kernel from loopy import make_kernel, preprocess_kernel kernel = make_kernel(domains, instructions + substrules, arguments, temporary_variables=temporaries, target=DuneTarget(), options=opt, silenced_warnings=silenced, name=name, ) from loopy import make_reduction_inames_unique kernel = make_reduction_inames_unique(kernel) # Apply the transformations that were gathered during tree traversals for trafo in transformations: kernel = trafo[0](kernel, *trafo[1]) # Precompute all the substrules for sr in kernel.substitutions: tmpname = "precompute_{}".format(sr) kernel = lp.precompute(kernel, sr, temporary_name=tmpname, ) # Vectorization strategies are actually very likely to eliminate the # precomputation temporary. To avoid the temporary elimination warning # we need to explicitly disable it. kernel = kernel.copy(silenced_warnings=kernel.silenced_warnings + ["temp_to_write({})".format(tmpname)]) from dune.perftool.loopy import heuristic_duplication kernel = heuristic_duplication(kernel) # Maybe apply vectorization strategies if get_option("vectorize_quad"): if get_option("sumfact"): from dune.perftool.loopy.transformations.collect_rotate import collect_vector_data_rotate kernel = collect_vector_data_rotate(kernel) else: raise NotImplementedError("Only vectorizing sumfactorized code right now!") # Now add the preambles to the kernel preambles = [(i, p) for i, p in enumerate(retrieve_cache_items("{} and preamble".format(tag)))] kernel = kernel.copy(preambles=preambles) # Remove inames that have become obsolete kernel = lp.remove_unused_inames(kernel) # Do the loopy preprocessing! kernel = preprocess_kernel(kernel) # *REALLY* ignore boostability. This is - so far - necessary due to a mystery bug. kernel = kernel.copy(instructions=[i.copy(boostable=False, boostable_into=frozenset()) for i in kernel.instructions]) from dune.perftool.loopy.transformations.matchfma import match_fused_multiply_add kernel = match_fused_multiply_add(kernel) if wrap_in_cgen: # Wrap the kernel in something which can generate code from dune.perftool.pdelab.signatures import assembly_routine_signature signature = assembly_routine_signature() kernel = LoopyKernelMethod(signature, kernel) return kernel def name_time_dumper_os(): return "os" def name_time_dumper_reset(): return "reset" def name_time_dumper_ident(): return "ident" @generator_factory(item_tags=("cached",), cache_key_generator=lambda **kw: None) def name_example_kernel(name=None): return name class TimerMethod(ClassMember): def __init__(self): os = name_time_dumper_os() reset = name_time_dumper_reset() ident = name_time_dumper_ident() knl = name_example_kernel() assert(knl is not None) content = ["template <typename Stream>", "void dump_timers(Stream& {}, std::string {}, bool {})".format(os, ident, reset), "{"] dump_timers = [i for i in retrieve_cache_items(condition='dump_timers')] content.extend(map(lambda x: ' ' + x, dump_timers)) content.append("}") ClassMember.__init__(self, content) class LoopyKernelMethod(ClassMember): def __init__(self, signature, kernel, add_timings=True, initializer_list=[]): from loopy import generate_body from cgen import LiteralLines, Block content = signature # Add initializer list if this is a constructor if initializer_list: content[-1] = content[-1] + " :" for init in initializer_list[:-1]: content.append(" " * 4 + init + ",") content.append(" " * 4 + initializer_list[-1]) content.append('{') if kernel is not None: # Add kernel preamble for i, p in kernel.preambles: content.append(' ' + p) # Start timer if add_timings and get_option('instrumentation_level') >= 3: from dune.perftool.pdelab.signatures import assembler_routine_name timer_name = assembler_routine_name() + '_kernel' name_example_kernel(name=timer_name) post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') content.append(' ' + 'HP_TIMER_START({});'.format(timer_name)) dump_accumulate_timer(timer_name) # Add kernel body content.extend(l for l in generate_body(kernel).split('\n')[1:-1]) # Stop timer if add_timings and get_option('instrumentation_level') >= 3: content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) content.append('}') ClassMember.__init__(self, content) def cgen_class_from_cache(tag, members=[]): from dune.perftool.generation import retrieve_cache_items # Generate the name by concatenating basename and template parameters basename, fullname = class_type_from_cache(tag) base_classes = [bc for bc in retrieve_cache_items('{} and baseclass'.format(tag))] constructor_params = [bc for bc in retrieve_cache_items('{} and constructor_param'.format(tag))] il = [i for i in retrieve_cache_items('{} and initializer'.format(tag))] pm = [m for m in retrieve_cache_items('{} and member'.format(tag))] tparams = [i for i in retrieve_cache_items('{} and template_param'.format(tag))] # Construct the constructor constructor_knl = extract_kernel_from_cache(tag, wrap_in_cgen=False) from dune.perftool.loopy.target import DuneTarget constructor_knl = constructor_knl.copy(target=DuneTarget(declare_temporaries=False)) signature = "{}({})".format(basename, ", ".join(next(iter(p.generate(with_semicolon=False))) for p in constructor_params)) constructor = LoopyKernelMethod([signature], constructor_knl, add_timings=False, initializer_list=il) # Take any temporary declarations from the kernel and make them class members target = DuneTarget() from loopy.codegen import CodeGenerationState codegen_state = CodeGenerationState(kernel=constructor_knl, implemented_data_info=None, implemented_domain=None, implemented_predicates=frozenset(), seen_dtypes=frozenset(), seen_functions=frozenset(), seen_atomic_dtypes=frozenset(), var_subst_map={}, allow_complex=False, is_generating_device_code=True, ) decls = [cgen.Line("\n " + next(iter(d.generate()))) for d in target.get_device_ast_builder().get_temporary_decls(codegen_state, 0)] from dune.perftool.cgen import Class return Class(basename, base_classes=base_classes, members=[constructor] + members + pm + decls, tparam_decls=tparams) def generate_localoperator_kernels(formdata, data): # Extract the relevant attributes of the form data form = formdata.preprocessed_form # Reset the generation cache from dune.perftool.generation import delete_cache_items delete_cache_items() # Manage includes and base classes that we always need include_file('dune/pdelab/gridfunctionspace/gridfunctionspace.hh', filetag="operatorfile") include_file('dune/pdelab/localoperator/idefault.hh', filetag="operatorfile") include_file('dune/pdelab/localoperator/flags.hh', filetag="operatorfile") include_file('dune/pdelab/localoperator/pattern.hh', filetag="operatorfile") # Trigger this one once early on to assure that template # parameters are set in the right order localoperator_basename(formdata, data) lop_template_ansatz_gfs() lop_template_test_gfs() lop_template_range_field() from dune.perftool.pdelab.parameter import parameterclass_basename parameterclass_basename(formdata, data) # Make sure there is always the same constructor arguments (even if parameter class is empty) from dune.perftool.pdelab.localoperator import name_initree_member name_initree_member() from dune.perftool.pdelab.parameter import name_paramclass name_paramclass() # Add right base classes for stationary/instationary operators base_class('Dune::PDELab::LocalOperatorDefaultFlags', classtag="operator") from dune.perftool.pdelab.driver import is_stationary if not is_stationary(): rf = lop_template_range_field() base_class('Dune::PDELab::InstationaryLocalOperatorDefaultMethods<{}>' .format(rf), classtag="operator") # Create set time method in parameter class from dune.perftool.pdelab.parameter import define_set_time_method define_set_time_method() # Have a data structure collect the generated kernels operator_kernels = {} with global_context(form_type='residual'): # Generate the necessary residual methods for measure in set(i.integral_type() for i in form.integrals()): with global_context(integral_type=measure): enum_pattern() pattern_baseclass() enum_alpha() from dune.perftool.pdelab.signatures import assembler_routine_name with global_context(kernel=assembler_routine_name()): kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(form.integrals_by_type(measure))] # Maybe add numerical differentiation if get_option("numerical_jacobian"): # Include headers for numerical methods include_file("dune/pdelab/localoperator/defaultimp.hh", filetag="operatorfile") # Numerical jacobian base class _, loptype = class_type_from_cache("operator") from dune.perftool.pdelab.signatures import ufl_measure_to_pdelab_measure which = ufl_measure_to_pdelab_measure(measure) base_class("Dune::PDELab::NumericalJacobian{}<{}>".format(which, loptype), classtag="operator") # Numerical jacobian initializer list ini = name_initree_member() ini_constructor = name_initree_constructor_param() initializer_list("Dune::PDELab::NumericalJacobian{}<{}>".format(which, loptype), ["{}.get<double>(\"numerical_epsilon.{}\", 1e-9)".format(ini_constructor, ini, which.lower())], classtag="operator", ) # In the case of matrix free operator evaluation we need jacobian apply methods if get_option("matrix_free"): from dune.perftool.pdelab.driver import is_linear if is_linear(formdata.original_form): # Numeical jacobian apply base class base_class("Dune::PDELab::NumericalJacobianApply{}<{}>".format(which, loptype), classtag="operator") # Numerical jacobian apply initializer list initializer_list("Dune::PDELab::NumericalJacobianApply{}<{}>".format(which, loptype), ["{}.get<double>(\"numerical_epsilon.{}\", 1e-9)".format(ini_constructor, ini, which.lower())], classtag="operator", ) else: # Numerical nonlinear jacobian apply base class base_class("Dune::PDELab::NumericalNonlinearJacobianApply{}<{}>".format(which, loptype), classtag="operator") # Numerical nonlinear jacobian apply initializer list initializer_list("Dune::PDELab::NumericalNonlinearJacobianApply{}<{}>".format(which, loptype), ["{}.get<double>(\"numerical_epsilon.{}\", 1e-9)".format(ini_constructor, ini, which.lower())], classtag="operator", ) operator_kernels[(measure, 'residual')] = kernel # Generate the necessary jacobian methods if not get_option("numerical_jacobian"): from ufl import derivative jacform = derivative(formdata.original_form, formdata.original_form.coefficients()[0]) from dune.perftool.ufl.preprocess import preprocess_form jacform = preprocess_form(jacform).preprocessed_form with global_context(form_type="jacobian"): for measure in set(i.integral_type() for i in jacform.integrals()): with global_context(integral_type=measure): with global_context(kernel=assembler_routine_name()): kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(jacform.integrals_by_type(measure))] operator_kernels[(measure, 'jacobian')] = kernel # Generate dummy functions for those kernels, that vanished in the differentiation process # We *could* solve this problem by using lambda_* terms but we do not really want that, so # we use empty jacobian assembly methods instead alpha_measures = set(i.integral_type() for i in form.integrals()) jacobian_measures = set(i.integral_type() for i in jacform.integrals()) for it in alpha_measures - jacobian_measures: with global_context(integral_type=it): from dune.perftool.pdelab.signatures import assembly_routine_signature operator_kernels[(it, 'jacobian')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)] # Jacobian apply methods for matrix-free computations if get_option("matrix_free"): # The apply vector has reserved index 1 so we directly use Coefficient class from ufl from ufl import Coefficient apply_coefficient = Coefficient(form.coefficients()[0].ufl_element(), 1) # Create application of jacobian on vector from ufl import action jac_apply_form = action(jacform, apply_coefficient) # Create kernel for jacobian application with global_context(form_type="jacobian_apply"): for measure in set(i.integral_type() for i in jac_apply_form.integrals()): with global_context(integral_type=measure): with global_context(kernel=assembler_routine_name()): kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(jac_apply_form.integrals_by_type(measure))] operator_kernels[(measure, 'jacobian_apply')] = kernel # Generate dummy functions for those kernels, that vanished in the differentiation process # We *could* solve this problem by using lambda_* terms but we do not really want that, so # we use empty jacobian assembly methods instead alpha_measures = set(i.integral_type() for i in form.integrals()) jacobian_apply_measures = set(i.integral_type() for i in jac_apply_form.integrals()) for it in alpha_measures - jacobian_apply_measures: with global_context(integral_type=it): from dune.perftool.pdelab.signatures import assembly_routine_signature operator_kernels[(it, 'jacobian_apply')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)] # Return the set of generated kernels return operator_kernels def generate_localoperator_file(formdata, kernels, filename): operator_methods = [] for k in kernels.values(): operator_methods.extend(k) if get_option('instrumentation_level') >= 3: include_file('dune/perftool/common/timer.hh', filetag='operatorfile') operator_methods.append(TimerMethod()) elif get_option('opcounter'): include_file('dune/perftool/common/timer.hh', filetag='operatorfile') # Write the file! from dune.perftool.file import generate_file param = cgen_class_from_cache("parameterclass") # TODO take the name of this thing from the UFL file lop = cgen_class_from_cache("operator", members=operator_methods) generate_file(filename, "operatorfile", [param, lop]) def generate_localoperator_basefile(formdatas, data): filename = get_option("operator_file") for formdata in formdatas: lop_filename = name_localoperator_file(formdata, data) include_file(lop_filename, filetag="operatorbasefile") from dune.perftool.file import generate_file generate_file(filename, "operatorbasefile", [])