""" Sum factorized geometry evaluations """ from dune.codegen.generation import (class_member, domain, geometry_mixin, get_counted_variable, iname, include_file, instruction, kernel_cached, preamble, silenced_warning, temporary_variable, get_global_context_value, globalarg, valuearg, ) from dune.codegen.loopy.flatten import flatten_index from dune.codegen.loopy.target import type_floatingpoint from dune.codegen.options import get_form_option, get_option from dune.codegen.pdelab.geometry import (enforce_boundary_restriction, local_dimension, world_dimension, name_cell_geometry, name_geometry, GenericPDELabGeometryMixin, AxiparallelGeometryMixin, EquidistantGeometryMixin, ) from dune.codegen.pdelab.localoperator import (name_ansatz_gfs_constructor_param, lop_template_ansatz_gfs, ) from dune.codegen.pdelab.restriction import restricted_name from dune.codegen.sumfact.accumulation import basis_sf_kernels, sumfact_iname from dune.codegen.sumfact.basis import construct_basis_matrix_sequence from dune.codegen.sumfact.permutation import (permute_backward, permute_forward, sumfact_cost_permutation_strategy, sumfact_quadrature_permutation_strategy, ) from dune.codegen.sumfact.quadrature import additional_inames from dune.codegen.sumfact.symbolic import SumfactKernelInterfaceBase, SumfactKernel from dune.codegen.tools import get_pymbolic_basename, ImmutableCuttingRecord from dune.codegen.ufl.modified_terminals import Restriction from loopy.match import Writes import pymbolic.primitives as prim import numpy as np import loopy as lp class SumfactGeometryMixinBase(GenericPDELabGeometryMixin): def nonsumfact_fallback(self): return None @geometry_mixin("sumfact_multilinear") class SumfactMultiLinearGeometryMixin(SumfactGeometryMixinBase): def nonsumfact_fallback(self): return "generic" def _jacobian_inverse(self): return "jit" def jacobian_inverse(self, o): # This differs from the generic one by not falling back to the # transpose of the jacobian inverse. restriction = enforce_boundary_restriction(self) assert(len(self.indices) == 2) i, j = self.indices self.indices = None name = restricted_name(self._jacobian_inverse(), restriction) self.define_jacobian_inverse(name, restriction) return prim.Subscript(prim.Variable(name), (i, j)) def _jacobian_determinant(self): return "detjac" def define_jacobian_determinant(self, o): restriction = Restriction.NONE if self.measure == "cell" else Restriction.POSITIVE self.define_jacobian_inverse(self._jacobian_inverse(), restriction) return prim.Variable(self._jacobian_determinant()) def define_jacobian_inverse(self, name, restriction): """Return jacobian inverse of the geometry mapping (of a cell) At the moment this only works for geometry mappings of cells and not for intersection. We only consider this case as it greatly simplifies code generation for unstructured grids by making the grid edge consistent and avoiding the face-twist problem. """ # Calculate the jacobian of the geometry mapping using sum factorization dim = world_dimension() names_jacobian = [] for j in range(dim): for i in range(dim): name_jacobian = restricted_name("jacobian_geometry_{}{}".format(i, j), restriction) temporary_variable(name_jacobian) assignee = prim.Variable(name_jacobian) expression = _name_jacobian(i, j, restriction, self) inames = self.quadrature_inames() + additional_inames(self) instruction(assignee=assignee, expression=expression, within_inames=frozenset(inames), depends_on=frozenset({lp.match.Tagged("sumfact_stage1")}) ) names_jacobian.append(name_jacobian) # The sum factorization kernels from the geometry evaluation of the # jacobians will never appear in the expression for the input of # stage 3. This way the SumfactCollectMapper will never see them # and they will be marked as inactive. Here we explicitly mark the # as used. basis_sf_kernels(expression.aggregate) # Calculate the inverse of the jacobian of the geometry mapping and the # determinant by calling a c++ function. Note: The result will be column # major -> fortran style. name_detjac = self._jacobian_determinant() temporary_variable(name_detjac, shape=()) ftags = "f,f" temporary_variable(name, shape=(dim, dim), dim_tags=ftags, managed=True) include_file('dune/codegen/sumfact/invertgeometry.hh', filetag='operatorfile') code = "{} = invert_and_return_determinant({}, {});".format(name_detjac, ", ".join(names_jacobian), name) silenced_warning("read_no_write({})".format(name_detjac)) inames = self.quadrature_inames() + additional_inames(self) return instruction(code=code, assignees=frozenset([name, name_detjac]), read_variables=frozenset(names_jacobian), inames=frozenset(inames), ) def facet_jacobian_determinant(self, o): """Absolute value of determinant of jacobian of facet geometry mapping Calculate the absolute value of the determinant of the jacobian of the geometry mapping from the reference cell of the intersection to the intersection: |\det(\nabla \mu_F)| This is done using the surface metric tensor lemma from the lecture notes of Jean-Luc Guermond: |\det(\nabla \mu_F)| = \| \tilde{n} \| |\det(\nabla \mu_{T_s})| Here \tilde{n} is the outer normal defined by: \tilde{n} = (\nabla \mu_{T_s})^{-T} \hat{n}_s where \hat{n}_s is the unit outer normal of the corresponding face of the reference cell. """ detjac = self.jacobian_determinant(None) norm_of_outer_normal = normalize(self.outer_normal(), world_dimension()) return prim.Product((detjac, norm_of_outer_normal)) def outer_normal(self): """ This is the *unnormalized* outer normal """ name = "outer_normal" facedir_s = self.get_facedir(Restriction.POSITIVE) facemod_s = self.get_facemod(Restriction.POSITIVE) temporary_variable(name, shape=(world_dimension(),)) for i in range(world_dimension()): assignee = prim.Subscript(prim.Variable(name), i) self.indices = (facedir_s, i) ji = self.jacobian_inverse(None) # Note: 2*facemod_s-1 because of # -1 if facemod_s = 0 # +1 if facemod_s = 1 expression = ji * (2 * facemod_s - 1) inames = self.quadrature_inames() + additional_inames(self) instruction(assignee=assignee, expression=expression, within_inames=frozenset(inames), ) return prim.Variable(name) def facet_normal(self, o): index, = self.indices self.indices = None outer_normal = self.outer_normal() norm = normalize(outer_normal, world_dimension()) return prim.Subscript(outer_normal, (index,)) / norm def spatial_coordinate(self, o): """Calculate global coordinates of quadrature points for multilinear geometry mappings The evalualation is done using sum factorization techniques. On facets we use the geometry mapping of the self cell to get a global evaluation of the quadrature points. Avoiding the geometry mapping of the face itself greatly simplifies the generation of code for unstructured grids. Instead of looking at all the possible embeddings of the reference element of the face into the reference element of the cell we can make the grid edge consistent and choose a suitable convention for the order of directions in our sum factorization kernels. """ assert len(self.indices) == 1 restriction = enforce_boundary_restriction(self) # Generate sum factorization kernel and add vectorization info matrix_sequence = construct_basis_matrix_sequence(facedir=self.get_facedir(restriction), facemod=self.get_facemod(restriction), basis_size=(2,) * world_dimension()) inp = GeoCornersInput(matrix_sequence=matrix_sequence, direction=self.indices[0], restriction=restriction, ) sf = SumfactKernel(matrix_sequence=matrix_sequence, interface=inp, ) from dune.codegen.sumfact.vectorization import attach_vectorization_info vsf = attach_vectorization_info(sf) # If this sum factorization kernel was not used in the dry run we # just return 0 if vsf == 0: self.indices = None return 0 # Add a sum factorization kernel that implements the evaluation of # the basis functions at quadrature points (stage 1) from dune.codegen.sumfact.realization import realize_sum_factorization_kernel var, _ = realize_sum_factorization_kernel(vsf) # The results of this function is already the right component of the # spatial coordinate and we don't need to index this in the visitor self.indices = None return prim.Subscript(var, vsf.quadrature_index(sf, self)) @geometry_mixin("sumfact_axiparallel") class SumfactAxiParallelGeometryMixin(SumfactGeometryMixinBase, AxiparallelGeometryMixin): def nonsumfact_fallback(self): return "axiparallel" def facet_normal(self, o): i, = self.indices self.indices = None assert isinstance(i, int) # Use facemod_s and facedir_s if i == self.get_facedir(Restriction.POSITIVE): if self.get_facemod(Restriction.POSITIVE): return 1 else: return -1 else: return 0 @geometry_mixin("sumfact_equidistant") class SumfactEqudistantGeometryMixin(EquidistantGeometryMixin, SumfactAxiParallelGeometryMixin): def nonsumfact_fallback(self): return "equidistant" def facet_jacobian_determinant(self, o): name = "fdetjac" self.define_facet_jacobian_determinant(name) facedir = self.get_facedir(Restriction.POSITIVE) globalarg(name, shape=(world_dimension(),)) return prim.Subscript(prim.Variable(name), (facedir,)) @class_member(classtag="operator") def define_facet_jacobian_determinant(self, name): self._define_facet_jacobian_determinant_eval(name) gfst = lop_template_ansatz_gfs() return "std::array<typename {}::Traits::GridView::template Codim<0>::Geometry::ctype, {}> {};".format(gfst, world_dimension(), name) @kernel_cached(kernel="operator") def _define_facet_jacobian_determinant_eval(self, name): gfs = name_ansatz_gfs_constructor_param() rft = type_floatingpoint() code = ["{", " auto e = *({}.gridView().template begin<0>());".format(gfs), " int dir=0;", " for(auto is = {0}.gridView().ibegin(e); is != {0}.gridView().iend(e); ++(++is), ++dir)".format(gfs), " {}[dir] = is->geometry().integrationElement(Dune::FieldVector<{}, {}>());".format(name, rft, world_dimension() - 1), "}", ] instruction(code="\n".join(code), kernel="operator", ) def spatial_coordinate(self, o): index, = self.indices # Urgh: *SOMEHOW* construct a face direction. This is not breaking in the unstructured # case, because we do not enter this code path... from dune.codegen.pdelab.restriction import Restriction restriction = Restriction.NONE if self.measure == "interior_facet": restriction = Restriction.POSITIVE face = self.get_facedir(restriction) lowcorner = name_lowerleft_corner() meshwidth = name_meshwidth() # If we have to decide which boundary condition to take for this # intersection we always take the boundary condition of the center # of the intersection. We assume that there are no cells with more # than one boundary condition. if self.do_predicates: x = 0.5 elif index == face: x = 0 else: iindex = index if face is not None and index > face: iindex = iindex - 1 x = self.quadrature_position(iindex) self.indices = None return prim.Subscript(prim.Variable(lowcorner), (index,)) + x * prim.Subscript(prim.Variable(meshwidth), (index,)) @iname def global_corner_iname(restriction): name = get_counted_variable(restricted_name("global_corneriname", restriction)) domain(name, 2 ** world_dimension()) return name class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableCuttingRecord): def __init__(self, matrix_sequence=None, direction=None, restriction=None): """Base class for sum-factorized evaluation of geometry mappings At the moment we only do this for cells and not faces. For intersections we do this corresponding reference elements of the neigboring cells. Each spatial component needs a seperate sum factorization kernel. The argument 'direction' specifies the component (x-component: 0, y-component: 1, z-component: 2). """ # Note: The function sumfact_quadrature_permutation_strategy does not # work anymore after the visiting process since get_facedir and # get_facemod are not well defined. But we need the # quadrature_permutation to generate the name of the sumfact # kernel. This means we need to store the value here instead of # recalculating it in the property. dim = world_dimension() quadrature_permutation = sumfact_quadrature_permutation_strategy(dim, restriction) matrix_sequence = permute_forward(matrix_sequence, quadrature_permutation) # Note: Do not put matrix_sequence into the Record. That screws up the vectorization strategy! ImmutableCuttingRecord.__init__(self, direction=direction, restriction=restriction, _quadrature_permutation=quadrature_permutation, _permuted_matrix_sequence=matrix_sequence, ) def get_keyword_arguments(self): """Get dictionary of keyword arguments needed to initialize this class Extract keyword arguments from the ImmutableRecord and modify accordingly. You need to set the correct matrix sequence before using this dict to create an interface. """ dict = self.get_copy_kwargs() del dict['_permuted_matrix_sequence'] del dict['_quadrature_permutation'] dict['matrix_sequence'] = None return dict @property def quadrature_permutation(self): return self._quadrature_permutation @property def cost_permutation(self): return sumfact_cost_permutation_strategy(self._permuted_matrix_sequence, self.stage) @property def stage(self): return 1 @property def direct_is_possible(self): return False def setup_input(self, sf, insn_dep, index=0): # Inames for interating over the coefficients (in this case the # coordinate of the component 'sefl.direction' of the corners). We take # them from the cost permuted matrix sequence. In order to get the # inames in order x,y,... we need to take the permutation back. shape_cost_permuted = tuple(mat.basis_size for mat in sf.matrix_sequence_cost_permuted) shape_ordered = permute_backward(shape_cost_permuted, self.cost_permutation) shape_ordered = permute_backward(shape_ordered, self.quadrature_permutation) inames_cost_permuted = tuple(sumfact_iname(length, "corner_setup_inames_" + str(k)) for k, length in enumerate(shape_cost_permuted)) inames_ordered = permute_backward(inames_cost_permuted, self.cost_permutation) inames_ordered = permute_backward(inames_ordered, self.quadrature_permutation) # Flat indices needed to access pdelab corner method flat_index_ordered = flatten_index(tuple(prim.Variable(i) for i in inames_ordered), shape_ordered, order="f") flat_index_cost_permuted = flatten_index(tuple(prim.Variable(i) for i in inames_cost_permuted), shape_cost_permuted, order="f") # The array that will be passed to the sum factorization kernel # function should contain the coefficients in the cost permuted order! from dune.codegen.sumfact.realization import name_buffer_storage name = "input_{}".format(sf.buffer) ftags = ",".join(["f"] * (sf.length + 1)) temporary_variable(name, shape=(sf.vector_width,) + shape_cost_permuted, custom_base_storage=name_buffer_storage(sf.buffer, 0), managed=True, dim_tags=ftags, ) if self.restriction == 0: geo = name_geometry() else: geo = name_cell_geometry(self.restriction) # NB: We need to realize this as a C instruction, because the corner # method does return a non-scalar, which does not fit into the current # loopy philosophy for function calls. This problem will be solved once # #11 is resolved. Admittedly, the code looks *really* ugly until that happens. code = "{}[{}*({})+{}] = {}.corner({})[{}];".format(name, sf.vector_width, str(flat_index_cost_permuted), index, geo, str(flat_index_ordered), self.direction, ) insn = instruction(code=code, within_inames=frozenset(inames_cost_permuted), assignees=(name,), tags=frozenset({"sumfact_stage{}".format(sf.stage)}), ) return insn_dep.union(frozenset({insn})) def realize_input(self, sf, inames, shape, vec_iname, vec_shape, buf, ftags): # Get a temporary that interprets the base storage of the input # as a column-major matrix. In later iteration of the matrix loop # this reinterprets the output of the previous iteration. inp = buf.get_temporary(sf, "buff_step0_in", shape=shape + vec_shape, dim_tags=ftags, ) # The input temporary will only be read from, so we need to silence # the loopy warning silenced_warning('read_no_write({})'.format(inp)) return prim.Subscript(prim.Variable(inp), inames + vec_iname) @preamble def define_corner(name, low): geo = name_geometry() return "auto {} = {}.corner({});".format(name, geo, 0 if low else 2 ** local_dimension() - 1) @class_member(classtag="operator") def define_mesh_width(name): rft = type_floatingpoint() define_mesh_width_eval(name) return "Dune::FieldVector<{}, {}> {};".format(rft, world_dimension(), name) def define_mesh_width_eval(name): from dune.codegen.pdelab.localoperator import name_ansatz_gfs_constructor_param gfs = name_ansatz_gfs_constructor_param() code = ["{", " auto e = *({}.gridView().template begin<0>());".format(gfs), " {} = e.geometry().corner((1<<{}) - 1);".format(name, world_dimension()), " {} -= e.geometry().corner(0);".format(name), "}", ] instruction(code="\n".join(code), kernel="operator") def name_lowerleft_corner(): name = "lowerleft_corner" globalarg(name, shape=(world_dimension(),)) define_corner(name, True) return name def name_meshwidth(): name = "meshwidth" globalarg(name, shape=(world_dimension(),)) define_mesh_width(name) return name @kernel_cached def _name_jacobian(i, j, restriction, visitor): """Return the (i, j) component of the jacobian of the geometry mapping Evaluation of the derivative of the geometry mapping is done using sum factorization. Note: At the moment this only works for the mappings from reference cells to the cell and not for the geometry mappings of intersections. """ # Create matrix sequence with derivative in j direction matrix_sequence = construct_basis_matrix_sequence(derivative=j, facedir=visitor.get_facedir(restriction), facemod=visitor.get_facemod(restriction), basis_size=(2,) * world_dimension()) # Sum factorization input for the i'th component of the geometry mapping inp = GeoCornersInput(matrix_sequence=matrix_sequence, direction=i, restriction=restriction) sf = SumfactKernel(matrix_sequence=matrix_sequence, interface=inp, ) from dune.codegen.sumfact.vectorization import attach_vectorization_info vsf = attach_vectorization_info(sf) # If this sum factorization kernel was not used in the dry run we # just return 0 if vsf == 0: visitor.indices = None return 0 # Add a sum factorization kernel that implements the evaluation of # the basis functions at quadrature points (stage 1) from dune.codegen.sumfact.realization import realize_sum_factorization_kernel var, _ = realize_sum_factorization_kernel(vsf) assert(visitor.indices is None) return prim.Subscript(var, vsf.quadrature_index(sf, visitor)) def normalize(expr, dim): return prim.Call(prim.Variable("sqrt"), (prim.Sum(tuple(expr[i] * expr[i] for i in range(dim))),))