from __future__ import absolute_import from os.path import splitext import logging from dune.perftool.options import (get_form_option, get_option, option_switch) from dune.perftool.generation import (backend, base_class, class_basename, class_member, constructor_parameter, domain, dump_accumulate_timer, end_of_file, function_mangler, generator_factory, get_backend, get_global_context_value, global_context, iname, include_file, initializer_list, post_include, retrieve_cache_functions, retrieve_cache_items, template_parameter, ) from dune.perftool.cgen.clazz import (AccessModifier, BaseClass, ClassMember, ) from dune.perftool.ufl.modified_terminals import Restriction import dune.perftool.loopy.mangler from pymbolic.primitives import Variable import pymbolic.primitives as prim from pytools import Record, ImmutableRecord import ufl.classes as uc import loopy as lp import cgen @template_parameter(classtag="operator") def lop_template_ansatz_gfs(): name = "GFSU" constructor_parameter("const {}&".format(name), name_ansatz_gfs_constructor_param(), classtag="operator") return name def name_ansatz_gfs_constructor_param(): return "gfsu" @template_parameter(classtag="operator") def lop_template_test_gfs(): name = "GFSV" constructor_parameter("const {}&".format(name), name_test_gfs_constructor_param(), classtag="operator") return name def name_test_gfs_constructor_param(): return "gfsv" @template_parameter(classtag="operator") def lop_template_range_field(): return "RF" @class_member(classtag="operator") def lop_domain_field(name): # TODO: Rethink for not Galerkin Method gfs = lop_template_ansatz_gfs() return "using {} = typename {}::Traits::GridView::ctype;".format(name, gfs) def name_domain_field(): name = "DF" lop_domain_field(name) return name def lop_template_gfs(ma): from ufl.classes import Argument, Coefficient if isinstance(ma.argexpr, Argument): if ma.argexpr.number() == 0: return lop_template_test_gfs() if ma.argexpr.number() == 1: return lop_template_ansatz_gfs() if isinstance(ma.argexpr, Coefficient): # Index 0 is reserved for trialfunction, index 1 is reserved for jacobian apply function assert ma.argexpr.count() < 2 return lop_template_ansatz_gfs() assert False def name_initree_constructor_param(): return "iniParams" @class_member(classtag="operator") def define_initree(name): param_name = name_initree_constructor_param() include_file('dune/common/parametertree.hh', filetag="operatorfile") constructor_parameter("const Dune::ParameterTree&", param_name, classtag="operator") initializer_list(name, [param_name], classtag="operator") return "const Dune::ParameterTree& {};".format(name) @class_member(classtag="operator") def _enum_pattern(which): return "enum {{ doPattern{} = true }};".format(which) def enum_pattern(): from dune.perftool.generation import get_global_context_value integral_type = get_global_context_value("integral_type") from dune.perftool.pdelab.signatures import ufl_measure_to_pdelab_measure return _enum_pattern(ufl_measure_to_pdelab_measure(integral_type)) def _pattern_baseclass(measure): return base_class('Dune::PDELab::Full{}Pattern'.format(measure), classtag="operator") def pattern_baseclass(): from dune.perftool.generation import get_global_context_value integral_type = get_global_context_value("integral_type") from dune.perftool.pdelab.signatures import ufl_measure_to_pdelab_measure return _pattern_baseclass(ufl_measure_to_pdelab_measure(integral_type)) @class_member(classtag="operator") def _enum_alpha(which): return "enum {{ doAlpha{} = true }};".format(which) def enum_alpha(): from dune.perftool.generation import get_global_context_value integral_type = get_global_context_value("integral_type") from dune.perftool.pdelab.signatures import ufl_measure_to_pdelab_measure return _enum_alpha(ufl_measure_to_pdelab_measure(integral_type)) def name_initree_member(): define_initree("_iniParams") return "_iniParams" @class_basename(classtag="operator") def localoperator_basename(form_ident): return get_form_option("classname", form_ident) def class_type_from_cache(classtag): from dune.perftool.generation import retrieve_cache_items # get the basename basename = [i for i in retrieve_cache_items(condition="{} and basename".format(classtag))] assert len(basename) == 1 basename = basename[0] # get the template parameters tparams = [i for i in retrieve_cache_items(condition="{} and template_param".format(classtag))] tparam_str = '' if len(tparams) > 0: tparam_str = '<{}>'.format(', '.join(t for t in tparams)) return basename, basename + tparam_str class AccumulationSpace(Record): def __init__(self, lfs=None, index=None, restriction=None, element=None, ): Record.__init__(self, lfs=lfs, index=index, restriction=restriction, element=element, ) def get_args(self): if self.lfs is None: return () else: return (self.lfs, self.index) def get_restriction(self): if self.restriction is None: return () else: return (self.restriction,) # TODO maybe move this onto the visitor as a helper function? def determine_accumulation_space(info, number): if info is None: return AccumulationSpace() assert info is not None element = info.element subel = element from ufl import MixedElement if isinstance(element, MixedElement): subel = element.extract_component(info.element_index)[1] # And generate a local function space for it! from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound, lfs_iname lfs = name_lfs(element, info.restriction, info.element_index) from dune.perftool.generation import valuearg from loopy.types import NumpyType valuearg(lfs, dtype=NumpyType("str")) if get_form_option("blockstructured"): from dune.perftool.blockstructured.tools import micro_index_to_macro_index from dune.perftool.blockstructured.spaces import lfs_inames lfsi = micro_index_to_macro_index(subel, lfs_inames(subel, info.restriction, count=number)[0]) else: from dune.perftool.pdelab.spaces import lfs_inames lfsi = Variable(lfs_iname(subel, info.restriction, count=number)) # If the LFS is not yet a pymbolic expression, make it one from pymbolic.primitives import Expression if not isinstance(lfs, Expression): lfs = Variable(lfs) return AccumulationSpace(lfs=lfs, index=lfsi, restriction=info.restriction, ) def boundary_predicates(expr, measure, subdomain_id): predicates = frozenset([]) if subdomain_id not in ['everywhere', 'otherwise']: # We need to reconstruct the subdomain_data parameter of the measure # I am *totally* confused as to why this information is not at hand anyway, # but conversation with Martin pointed me to dolfin.fem.assembly where this # is done in preprocessing with the limitation of only one possible type of # modified measure per integral type. # Get the original form and inspect the present measures from dune.perftool.generation import get_global_context_value data = get_global_context_value("data") original_form = data.object_by_name[get_form_option("form")] sd = original_form.subdomain_data() assert len(sd) == 1 subdomains, = list(sd.values()) domain, = list(sd.keys()) for k in list(subdomains.keys()): if subdomains[k] is None: del subdomains[k] # Finally extract the original subdomain_data (which needs to be present!) assert measure in subdomains subdomain_data = subdomains[measure] from ufl.classes import Expr if isinstance(subdomain_data, Expr): visitor = get_visitor(measure, subdomain_id) cond = visitor(subdomain_data, do_predicates=True) else: raise NotImplementedError("Only UFL expressions allowed in subdomain_data right now.") predicates = predicates.union([prim.Comparison(cond, '==', subdomain_id)]) return predicates class PDELabAccumulationInfo(ImmutableRecord): def __init__(self, element=None, element_index=0, restriction=None, inames=(), ): ImmutableRecord.__init__(self, element=element, element_index=element_index, restriction=restriction, inames=inames, ) def __eq__(self, other): return type(self) == type(other) and self.element_index == other.element_index and self.restriction == other.restriction def __hash__(self): return (self.element_index, self.restriction) def _list_infos(expr, number, visitor): from dune.perftool.ufl.modified_terminals import extract_modified_arguments ma = extract_modified_arguments(expr, argnumber=number) if len(ma) == 0: if number == 1: yield None return element = ma[0].argexpr.ufl_element() from dune.perftool.ufl.modified_terminals import Restriction if visitor.measure == "cell": restrictions = (Restriction.NONE,) elif visitor.measure == "exterior_facet": restrictions = (Restriction.NEGATIVE,) elif visitor.measure == "interior_facet": restrictions = (Restriction.NEGATIVE, Restriction.POSITIVE) for res in restrictions: for ei in range(element.value_size()): yield PDELabAccumulationInfo(element_index=ei, restriction=res) def list_accumulation_infos(expr, visitor): testgen = _list_infos(expr, 0, visitor) trialgen = _list_infos(expr, 1, visitor) import itertools return itertools.product(testgen, trialgen) def get_accumulation_info(expr, visitor): element = expr.ufl_element() leaf_element = element element_index = 0 from ufl import MixedElement if isinstance(expr.ufl_element(), MixedElement): element_index = visitor.indices[0] leaf_element = element.extract_component(element_index)[1] restriction = visitor.restriction if visitor.measure == 'exterior_facet': from dune.perftool.pdelab.restriction import Restriction restriction = Restriction.NEGATIVE inames = visitor.interface.lfs_inames(leaf_element, restriction, expr.number() ) return PDELabAccumulationInfo(element=expr.ufl_element(), element_index=element_index, restriction=restriction, inames=inames, ) def generate_accumulation_instruction(expr, visitor): # Collect the lfs and lfs indices for the accumulate call test_lfs = determine_accumulation_space(visitor.test_info, 0) # In the jacobian case, also determine the space for the ansatz space ansatz_lfs = determine_accumulation_space(visitor.trial_info, 1) # Collect the lfs and lfs indices for the accumulate call from dune.perftool.pdelab.argument import name_accumulation_variable accumvar = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction()) predicates = boundary_predicates(expr, visitor.measure, visitor.subdomain_id) rank = 1 if ansatz_lfs.lfs is None else 2 from dune.perftool.pdelab.argument import PDELabAccumulationFunction from pymbolic.primitives import Call accexpr = Call(PDELabAccumulationFunction(accumvar, rank), (test_lfs.get_args() + ansatz_lfs.get_args() + (expr,)) ) from dune.perftool.generation import instruction from dune.perftool.options import option_switch quad_inames = visitor.interface.quadrature_inames() lfs_inames = frozenset(visitor.test_info.inames) if visitor.trial_info: lfs_inames = lfs_inames.union(visitor.trial_info.inames) instruction(assignees=(), expression=accexpr, forced_iname_deps=lfs_inames.union(frozenset(quad_inames)), forced_iname_deps_is_final=True, predicates=predicates ) def get_visitor(measure, subdomain_id): # Get a transformer instance for this kernel if get_form_option('sumfact'): from dune.perftool.sumfact import SumFactInterface interface = SumFactInterface() elif get_form_option('blockstructured'): from dune.perftool.blockstructured import BlockStructuredInterface interface = BlockStructuredInterface() else: from dune.perftool.pdelab import PDELabInterface interface = PDELabInterface() from dune.perftool.ufl.visitor import UFL2LoopyVisitor return UFL2LoopyVisitor(interface, measure, subdomain_id) def visit_integral(integral): integrand = integral.integrand() measure = integral.integral_type() subdomain_id = integral.subdomain_id() # Start the visiting process! visitor = get_visitor(measure, subdomain_id) visitor.accumulate(integrand) def generate_kernel(integrals): logger = logging.getLogger(__name__) # Visit all integrals once to collect information (dry-run)! logger.debug('generate_kernel: visit_integrals (dry run)') with global_context(dry_run=True): for integral in integrals: visit_integral(integral) # Now perform some checks on what should be done from dune.perftool.sumfact.vectorization import decide_vectorization_strategy logger.debug('generate_kernel: decide_vectorization_strategy') decide_vectorization_strategy() # Delete the cache contents and do the real thing! logger.debug('generate_kernel: visit_integrals (no dry run)') from dune.perftool.generation import delete_cache_items delete_cache_items("kernel_default") for integral in integrals: visit_integral(integral) knl = extract_kernel_from_cache("kernel_default") delete_cache_items("kernel_default") # Reset the quadrature degree from dune.perftool.sumfact.tabulation import set_quadrature_points set_quadrature_points(None) # Clean the cache from any data collected after the dry run delete_cache_items("dryrundata") return knl @backend(interface="generate_kernels_per_integral") def generate_kernels_per_integral(integrals): yield generate_kernel(integrals) def extract_kernel_from_cache(tag, wrap_in_cgen=True): # Now extract regular loopy kernel components from dune.perftool.loopy.target import DuneTarget domains = [i for i in retrieve_cache_items("{} and domain".format(tag))] if not domains: domains = ["{[stupid] : 0<=stupid<1}"] instructions = [i for i in retrieve_cache_items("{} and instruction".format(tag))] substrules = [i for i in retrieve_cache_items("{} and substrule".format(tag)) if i is not None] temporaries = {i.name: i for i in retrieve_cache_items("{} and temporary".format(tag))} arguments = [i for i in retrieve_cache_items("{} and argument".format(tag))] silenced = [l for l in retrieve_cache_items("{} and silenced_warning".format(tag))] transformations = [t for t in retrieve_cache_items("{} and transformation".format(tag))] # Construct an options object from loopy import Options opt = Options(ignore_boostable_into=True, check_dep_resolution=False, ) # Find a name for the kernel if wrap_in_cgen: from dune.perftool.pdelab.signatures import kernel_name name = kernel_name() else: name = "constructor_kernel" # Create the kernel from loopy import make_kernel, preprocess_kernel kernel = make_kernel(domains, instructions + substrules, arguments, temporary_variables=temporaries, target=DuneTarget(), options=opt, silenced_warnings=silenced, name=name, ) from loopy import make_reduction_inames_unique kernel = make_reduction_inames_unique(kernel) from dune.perftool.loopy.transformations.disjointgroups import make_groups_conflicting kernel = make_groups_conflicting(kernel) # Apply the transformations that were gathered during tree traversals for trafo in transformations: kernel = trafo[0](kernel, *trafo[1]) # Precompute all the substrules for sr in kernel.substitutions: tmpname = "precompute_{}".format(sr) kernel = lp.precompute(kernel, sr, temporary_name=tmpname, ) # Vectorization strategies are actually very likely to eliminate the # precomputation temporary. To avoid the temporary elimination warning # we need to explicitly disable it. kernel = kernel.copy(silenced_warnings=kernel.silenced_warnings + ["temp_to_write({})".format(tmpname)]) from dune.perftool.loopy import heuristic_duplication kernel = heuristic_duplication(kernel) # Maybe apply vectorization strategies if get_form_option("vectorization_quadloop"): if get_form_option("sumfact"): from dune.perftool.loopy.transformations.vectorize_quad import vectorize_quadrature_loop kernel = vectorize_quadrature_loop(kernel) else: raise NotImplementedError("Only vectorizing sumfactorized code right now!") # Now add the preambles to the kernel preambles = [(i, p) for i, p in enumerate(retrieve_cache_items("{} and preamble".format(tag)))] kernel = kernel.copy(preambles=preambles) # Remove inames that have become obsolete kernel = lp.remove_unused_inames(kernel) # Do the loopy preprocessing! kernel = preprocess_kernel(kernel) # *REALLY* ignore boostability. This is - so far - necessary due to a mystery bug. kernel = kernel.copy(instructions=[i.copy(boostable=False, boostable_into=frozenset()) for i in kernel.instructions]) from dune.perftool.loopy.transformations.matchfma import match_fused_multiply_add kernel = match_fused_multiply_add(kernel) if wrap_in_cgen: # Wrap the kernel in something which can generate code from dune.perftool.pdelab.signatures import assembly_routine_signature signature = assembly_routine_signature() kernel = LoopyKernelMethod(signature, kernel) return kernel def name_time_dumper_os(): return "os" def name_time_dumper_reset(): return "reset" def name_time_dumper_ident(): return "ident" @generator_factory(item_tags=("cached",), cache_key_generator=lambda **kw: None) def name_example_kernel(name=None): return name class TimerMethod(ClassMember): def __init__(self): os = name_time_dumper_os() reset = name_time_dumper_reset() ident = name_time_dumper_ident() knl = name_example_kernel() assert(knl is not None) content = ["template <typename Stream>", "void dump_timers(Stream& {}, std::string {}, bool {})".format(os, ident, reset), "{"] dump_timers = [i for i in retrieve_cache_items(condition='dump_timers')] content.extend(map(lambda x: ' ' + x, dump_timers)) content.append("}") ClassMember.__init__(self, content) class LoopyKernelMethod(ClassMember): def __init__(self, signature, kernel, add_timings=True, initializer_list=[]): from loopy import generate_body from cgen import LiteralLines, Block content = signature # Add initializer list if this is a constructor if initializer_list: content[-1] = content[-1] + " :" for init in initializer_list[:-1]: content.append(" " * 4 + init + ",") content.append(" " * 4 + initializer_list[-1]) content.append('{') if kernel is not None: # Start timer if add_timings and get_option('instrumentation_level') >= 3: from dune.perftool.pdelab.signatures import assembler_routine_name timer_name = assembler_routine_name() + '_kernel' name_example_kernel(name=timer_name) post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') content.append(' ' + 'HP_TIMER_START({});'.format(timer_name)) dump_accumulate_timer(timer_name) if add_timings and get_option("instrumentation_level") >= 4: setuptimer = '{}_kernel_setup'.format(assembler_routine_name()) post_include('HP_DECLARE_TIMER({});'.format(setuptimer), filetag='operatorfile') content.append(' HP_TIMER_START({});'.format(setuptimer)) dump_accumulate_timer(setuptimer) # Add kernel preamble for i, p in kernel.preambles: content.append(' ' + p) # Add kernel body content.extend(l for l in generate_body(kernel).split('\n')[1:-1]) # Stop timer if add_timings and get_option('instrumentation_level') >= 3: content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) content.append('}') ClassMember.__init__(self, content) def cgen_class_from_cache(tag, members=[]): from dune.perftool.generation import retrieve_cache_items # Generate the name by concatenating basename and template parameters basename, fullname = class_type_from_cache(tag) base_classes = [bc for bc in retrieve_cache_items('{} and baseclass'.format(tag))] constructor_params = [bc for bc in retrieve_cache_items('{} and constructor_param'.format(tag))] il = [i for i in retrieve_cache_items('{} and initializer'.format(tag))] pm = [m for m in retrieve_cache_items('{} and member'.format(tag))] tparams = [i for i in retrieve_cache_items('{} and template_param'.format(tag))] # Construct the constructor constructor_knl = extract_kernel_from_cache(tag, wrap_in_cgen=False) from dune.perftool.loopy.target import DuneTarget constructor_knl = constructor_knl.copy(target=DuneTarget(declare_temporaries=False)) signature = "{}({})".format(basename, ", ".join(next(iter(p.generate(with_semicolon=False))) for p in constructor_params)) constructor = LoopyKernelMethod([signature], constructor_knl, add_timings=False, initializer_list=il) # Take any temporary declarations from the kernel and make them class members target = DuneTarget() from loopy.codegen import CodeGenerationState codegen_state = CodeGenerationState(kernel=constructor_knl, implemented_data_info=None, implemented_domain=None, implemented_predicates=frozenset(), seen_dtypes=frozenset(), seen_functions=frozenset(), seen_atomic_dtypes=frozenset(), var_subst_map={}, allow_complex=False, is_generating_device_code=True, ) decls = [cgen.Line("\n " + next(iter(d.generate()))) for d in target.get_device_ast_builder().get_temporary_decls(codegen_state, 0)] from dune.perftool.cgen import Class return Class(basename, base_classes=base_classes, members=[constructor] + members + pm + decls, tparam_decls=tparams) def generate_localoperator_kernels(operator): logger = logging.getLogger(__name__) data = get_global_context_value("data") original_form = data.object_by_name[get_form_option("form")] from dune.perftool.ufl.preprocess import preprocess_form form = preprocess_form(original_form).preprocessed_form # Reset the generation cache from dune.perftool.generation import delete_cache_items delete_cache_items() # Manage includes and base classes that we always need include_file('dune/pdelab/gridfunctionspace/gridfunctionspace.hh', filetag="operatorfile") include_file('dune/pdelab/localoperator/idefault.hh', filetag="operatorfile") include_file('dune/pdelab/localoperator/flags.hh', filetag="operatorfile") include_file('dune/pdelab/localoperator/pattern.hh', filetag="operatorfile") post_include("#pragma GCC diagnostic push", filetag="operatorfile") post_include("#pragma GCC diagnostic ignored \"-Wsign-compare\"", filetag="operatorfile") post_include("#pragma GCC diagnostic ignored \"-Wunused-variable\"", filetag="operatorfile") post_include("#pragma GCC diagnostic ignored \"-Wunused-but-set-variable\"", filetag="operatorfile") end_of_file("#pragma GCC diagnostic pop", filetag="operatorfile") # Trigger this one once early on to assure that template # parameters are set in the right order localoperator_basename(operator) lop_template_ansatz_gfs() lop_template_test_gfs() lop_template_range_field() # Make sure there is always the same constructor arguments (even if parameter class is empty) from dune.perftool.pdelab.localoperator import name_initree_member name_initree_member() # Set some options! from dune.perftool.pdelab.driver import isQuadrilateral if isQuadrilateral(form.coefficients()[0].ufl_element().cell()): from dune.perftool.options import set_form_option # For Yasp Grids the jacobian of the transformation is diagonal and constant on each cell set_form_option('diagonal_transformation_matrix', True) set_form_option('constant_transformation_matrix', True) # Add right base classes for stationary/instationary operators base_class('Dune::PDELab::LocalOperatorDefaultFlags', classtag="operator") from dune.perftool.pdelab.driver import is_stationary if not is_stationary(): rf = lop_template_range_field() base_class('Dune::PDELab::InstationaryLocalOperatorDefaultMethods<{}>' .format(rf), classtag="operator") # Have a data structure collect the generated kernels operator_kernels = {} logger.info("generate_localoperator_kernels: create residual methods") with global_context(form_type='residual'): # Generate the necessary residual methods for measure in set(i.integral_type() for i in form.integrals()): logger.info("generate_localoperator_kernels: measure {}".format(measure)) with global_context(integral_type=measure): enum_pattern() pattern_baseclass() enum_alpha() from dune.perftool.pdelab.signatures import assembler_routine_name with global_context(kernel=assembler_routine_name()): kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(form.integrals_by_type(measure))] # Maybe add numerical differentiation if get_form_option("numerical_jacobian"): # Include headers for numerical methods include_file("dune/pdelab/localoperator/defaultimp.hh", filetag="operatorfile") # Numerical jacobian base class _, loptype = class_type_from_cache("operator") from dune.perftool.pdelab.signatures import ufl_measure_to_pdelab_measure which = ufl_measure_to_pdelab_measure(measure) base_class("Dune::PDELab::NumericalJacobian{}<{}>".format(which, loptype), classtag="operator") # Numerical jacobian initializer list ini = name_initree_member() ini_constructor = name_initree_constructor_param() initializer_list("Dune::PDELab::NumericalJacobian{}<{}>".format(which, loptype), ["{}.get<double>(\"numerical_epsilon.{}\", 1e-9)".format(ini_constructor, ini, which.lower())], classtag="operator", ) # In the case of matrix free operator evaluation we need jacobian apply methods if get_form_option("matrix_free"): from dune.perftool.pdelab.driver import is_linear if is_linear(original_form): # Numeical jacobian apply base class base_class("Dune::PDELab::NumericalJacobianApply{}<{}>".format(which, loptype), classtag="operator") # Numerical jacobian apply initializer list initializer_list("Dune::PDELab::NumericalJacobianApply{}<{}>".format(which, loptype), ["{}.get<double>(\"numerical_epsilon.{}\", 1e-9)".format(ini_constructor, ini, which.lower())], classtag="operator", ) else: # Numerical nonlinear jacobian apply base class base_class("Dune::PDELab::NumericalNonlinearJacobianApply{}<{}>".format(which, loptype), classtag="operator") # Numerical nonlinear jacobian apply initializer list initializer_list("Dune::PDELab::NumericalNonlinearJacobianApply{}<{}>".format(which, loptype), ["{}.get<double>(\"numerical_epsilon.{}\", 1e-9)".format(ini_constructor, ini, which.lower())], classtag="operator", ) operator_kernels[(measure, 'residual')] = kernel # Generate the necessary jacobian methods if not get_form_option("numerical_jacobian"): logger.info("generate_localoperator_kernels: create jacobian methods") from ufl import derivative jacform = derivative(original_form, original_form.coefficients()[0]) from dune.perftool.ufl.preprocess import preprocess_form jacform = preprocess_form(jacform).preprocessed_form with global_context(form_type="jacobian"): if get_form_option("generate_jacobians"): for measure in set(i.integral_type() for i in jacform.integrals()): logger.info("generate_localoperator_kernels: measure {}".format(measure)) with global_context(integral_type=measure): with global_context(kernel=assembler_routine_name()): kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(jacform.integrals_by_type(measure))] operator_kernels[(measure, 'jacobian')] = kernel # Generate dummy functions for those kernels, that vanished in the differentiation process # We *could* solve this problem by using lambda_* terms but we do not really want that, so # we use empty jacobian assembly methods instead alpha_measures = set(i.integral_type() for i in form.integrals()) jacobian_measures = set(i.integral_type() for i in jacform.integrals()) for it in alpha_measures - jacobian_measures: with global_context(integral_type=it): from dune.perftool.pdelab.signatures import assembly_routine_signature operator_kernels[(it, 'jacobian')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)] # Jacobian apply methods for matrix-free computations if get_form_option("matrix_free"): # The apply vector has reserved index 1 so we directly use Coefficient class from ufl from ufl import Coefficient apply_coefficient = Coefficient(form.coefficients()[0].ufl_element(), 1) # Create application of jacobian on vector from ufl import action jac_apply_form = action(jacform, apply_coefficient) # Create kernel for jacobian application with global_context(form_type="jacobian_apply"): for measure in set(i.integral_type() for i in jac_apply_form.integrals()): with global_context(integral_type=measure): with global_context(kernel=assembler_routine_name()): kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(jac_apply_form.integrals_by_type(measure))] operator_kernels[(measure, 'jacobian_apply')] = kernel # Generate dummy functions for those kernels, that vanished in the differentiation process # We *could* solve this problem by using lambda_* terms but we do not really want that, so # we use empty jacobian assembly methods instead alpha_measures = set(i.integral_type() for i in form.integrals()) jacobian_apply_measures = set(i.integral_type() for i in jac_apply_form.integrals()) for it in alpha_measures - jacobian_apply_measures: with global_context(integral_type=it): from dune.perftool.pdelab.signatures import assembly_routine_signature operator_kernels[(it, 'jacobian_apply')] = [LoopyKernelMethod(assembly_routine_signature(), kernel=None)] # Return the set of generated kernels return operator_kernels def generate_localoperator_file(kernels, filename): operator_methods = [] for k in kernels.values(): operator_methods.extend(k) if get_option('instrumentation_level') >= 3: include_file('dune/perftool/common/timer.hh', filetag='operatorfile') operator_methods.append(TimerMethod()) elif get_option('opcounter'): include_file('dune/perftool/common/timer.hh', filetag='operatorfile') # Write the file! from dune.perftool.file import generate_file # TODO take the name of this thing from the UFL file lop = cgen_class_from_cache("operator", members=operator_methods) generate_file(filename, "operatorfile", [lop])