diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 97b2cf71304f5c4ebc87c9cfa8ab5d8506fc65f2..2a46f7c9e0606194bbc627a9049f3a18388cc3c1 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -210,7 +210,7 @@ class AccumulationSpace(Record): return (self.restriction,) -def determine_accumulation_space(expr, number, measure): +def determine_accumulation_space(expr, number, measure, idims=None): from dune.perftool.ufl.modified_terminals import extract_modified_arguments args = extract_modified_arguments(expr, argnumber=number) @@ -236,7 +236,8 @@ def determine_accumulation_space(expr, number, measure): if len(subel.value_shape()) != 0: from dune.perftool.pdelab.geometry import dimension_iname - idims = tuple(dimension_iname(context='arg', count=i) for i in range(len(subel.value_shape()))) + if idims is None: + idims = tuple(dimension_iname(context='arg', count=i) for i in range(len(subel.value_shape()))) lfs = lfs_child(lfs, idims, shape=subel.value_shape(), symmetry=subel.symmetry()) subel = subel.sub_elements()[0] @@ -312,7 +313,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): # When we do not do sumfactorization we do not split the test function assert(accterm.argument.expr is None) - # First we do the tree traversal to get a pymbolic expression representing this expression + # Do the tree traversal to get a pymbolic expression representing this expression pymbolic_expr = visitor(accterm.term) # It may happen that an entire accumulation term vanishes. We do nothing in that case diff --git a/python/dune/perftool/pdelab/spaces.py b/python/dune/perftool/pdelab/spaces.py index 710f86b0dbfd5093ab21c05459992848c42c90ea..d639fb9df4f528e966f0026ca2f71c647b604a77 100644 --- a/python/dune/perftool/pdelab/spaces.py +++ b/python/dune/perftool/pdelab/spaces.py @@ -79,7 +79,12 @@ def lfs_child(lfs, children, shape=None, symmetry=False): indices = (Sum((Product((n - 1, i)), Product((.5, i, 1 - i)), j)),) else: - children = tuple(Variable(c) for c in children) + # If the index is not an int we need to make a variable out of it. + # + # Note: ints occur in the sumfactorisation case with + # vector valued functions (eg. Stokes) + if not isinstance(children[0], int): + children = tuple(Variable(c) for c in children) indices = (Sum(tuple(Product((Product(tuple(shape[j] for j in range(i))), child)) for i, child in enumerate(children))),) return Call(LFSChild(lfs), indices) diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py index f7f108b311b2f17379e5e719730f75e46ff5c9f2..fa71ec4eb79b52cb963f27190ef2fc70d4208f28 100644 --- a/python/dune/perftool/sumfact/__init__.py +++ b/python/dune/perftool/sumfact/__init__.py @@ -30,22 +30,22 @@ class SumFactInterface(PDELabInterface): return pymbolic_reference_gradient(element, restriction, number) def pymbolic_trialfunction_gradient(self, element, restriction, component, visitor=None): - ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_coefficientcontainer, visitor) + ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_coefficientcontainer, visitor.indices) visitor.indices = indices return ret def pymbolic_trialfunction(self, element, restriction, component, visitor=None): - ret, indices = pymbolic_coefficient(element, restriction, component, name_coefficientcontainer, visitor) + ret, indices = pymbolic_coefficient(element, restriction, component, name_coefficientcontainer, visitor.indices) visitor.indices = indices return ret def pymbolic_apply_function_gradient(self, element, restriction, component, visitor=None): - ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_applycontainer, visitor) + ret, indices = pymbolic_coefficient_gradient(element, restriction, component, name_applycontainer, visitor.indices) visitor.indices = indices return ret def pymbolic_apply_function(self, element, restriction, component, visitor=None): - ret, indices = pymbolic_coefficient(element, restriction, component, name_applycontainer, visitor) + ret, indices = pymbolic_coefficient(element, restriction, component, name_applycontainer, visitor.indices) visitor.indices = indices return ret diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py index bbfab3957e568550f2b25a758a9f2a32c3d48a01..0bb56c833ccff61be97addda200a0db81e07d3ff 100644 --- a/python/dune/perftool/sumfact/accumulation.py +++ b/python/dune/perftool/sumfact/accumulation.py @@ -1,3 +1,5 @@ +import itertools + from dune.perftool.pdelab.argument import (name_accumulation_variable, PDELabAccumulationFunction, ) @@ -65,14 +67,17 @@ def name_test_function_contribution(test): @backend(interface="accum_insn", name="sumfact") def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): - pymbolic_expr = visitor(accterm.term) + # When doing sumfactorization we want to split the test function + assert(accterm.argument.expr is not None) + + # Do the tree traversal to get a pymbolic expression representing this expression + pymbolic_expr = visitor(accterm.term) if pymbolic_expr == 0: return - dim = world_dimension() - # If this is a gradient, we find the gradient iname + dim = world_dimension() additional_inames = frozenset({}) if accterm.new_indices is not None: for i in accterm.new_indices: @@ -80,25 +85,55 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): from dune.perftool.pdelab.localoperator import grad_iname additional_inames = additional_inames.union(frozenset({grad_iname(i, dim)})) - # Figure out the name of the accumulation variable - test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure) - ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure) - accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction()) + # Get the degree of the element corresponding to this modified argument + mod_arg_expr = accterm.argument.expr + from ufl.classes import FunctionView, Argument + while (not isinstance(mod_arg_expr, FunctionView)) and (not isinstance(mod_arg_expr, Argument)) : + # If this has more than one operand we are potentially doing dangerous stuff + assert len(mod_arg_expr.ufl_operands) == 1 + mod_arg_expr = mod_arg_expr.ufl_operands[0] + degree = mod_arg_expr.ufl_element()._degree + basis_size = degree + 1 + + def emit_sumfact_kernel(indices, restriction, insn_dep): + # Not implemented + if indices: + assert len(indices) <= 2 + + # Figure out the name of the accumulation variable + accum_idims = None + if indices is not None: + accum_idims = (indices[0],) + test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure, idims=accum_idims) + ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure, idims=accum_idims) + accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction()) + + # TODO: Adjust for stokes sumfact symdiff + jacobian_inames = tuple() + if accterm.is_jacobian: + jacobian_inames = visitor.inames - def emit_sumfact_kernel(i, restriction, insn_dep): # Construct the matrix sequence for this sum factorization - matrix_sequence = construct_basis_matrix_sequence(transpose=True, - derivative=i if accterm.new_indices else None, - facedir=get_facedir(accterm.argument.restriction), - facemod=get_facemod(accterm.argument.restriction), - ) - + matrix_sequence = construct_basis_matrix_sequence( + transpose=True, + derivative=indices[-1] if accterm.new_indices else None, + facedir=get_facedir(accterm.argument.restriction), + facemod=get_facemod(accterm.argument.restriction), + basis_size=basis_size) + + # Avoid caching issues by passing the coeff_func_index + coeff_func_index = None + if indices and len(indices) == 2: + coeff_func_index = indices[0] + + # TODO: Adapt preferred position for stokes sumfact symdiff sf = SumfactKernel(matrix_sequence=matrix_sequence, restriction=(accterm.argument.restriction, restriction), stage=3, - preferred_position=i if accterm.new_indices else None, - within_inames=visitor.inames, + preferred_position=indices[-1] if accterm.new_indices else None, accumvar=accum, + within_inames=jacobian_inames, + coeff_func_index=coeff_func_index, ) from dune.perftool.sumfact.vectorization import attach_vectorization_info @@ -123,15 +158,21 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): assignee = prim.Subscript(lp.TaggedVariable(temp, vsf.tag), pad) instruction(assignee=assignee, expression=0, - forced_iname_deps=frozenset(quadrature_inames() + visitor.inames), + forced_iname_deps=frozenset(quadrature_inames() + jacobian_inames), forced_iname_deps_is_final=True, tags=frozenset(["quadvec", "gradvec"]) ) # Replace gradient iname with correct index for assignement replace_dict = {} + + # If we have two indices the first belongs to the dimension + # and the second one to the derivative. We handle all these + # indices by hand and replace them accordingly. + if indices and len(indices) == 2: + replace_dict['idim_arg0'] = indices[0] for iname in additional_inames: - replace_dict[prim.Variable(iname)] = i + replace_dict[prim.Variable(iname)] = indices[-1] from dune.perftool.loopy.symbolic import substitute expression = substitute(pymbolic_expr, replace_dict) @@ -141,9 +182,9 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop' post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') dump_accumulate_timer(timer_name) - if(visitor.inames): + if(jacobian_inames): timer_dep = frozenset({instruction(code="HP_TIMER_START({});".format(timer_name), - within_inames=frozenset(visitor.inames))}) + within_inames=frozenset(jacobian_inames))}) # Determine dependencies from loopy.match import Or, Writes @@ -157,7 +198,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): vsf.quadrature_index(sf)) contrib_dep = instruction(assignee=assignee, expression=expression, - forced_iname_deps=frozenset(quadrature_inames() + visitor.inames), + forced_iname_deps=frozenset(quadrature_inames() + jacobian_inames), forced_iname_deps_is_final=True, tags=frozenset({"quadvec"}).union(vectag), depends_on=frozenset({deps}).union(timer_dep) @@ -169,23 +210,22 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): if get_option("instrumentation_level") >= 4: insn_dep = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name), depends_on=insn_dep, - within_inames=frozenset(visitor.inames))}) + within_inames=frozenset(jacobian_inames))}) inames = tuple(accum_iname((accterm.argument.restriction, restriction), mat.rows, i) for i, mat in enumerate(vsf.matrix_sequence)) # Collect the lfs and lfs indices for the accumulate call test_lfs.index = flatten_index(tuple(prim.Variable(i) for i in inames), - (basis_functions_per_direction(),) * dim, + (basis_size,) * dim, order="f" ) # In the jacobian case, also determine the space for the ansatz space - rank = 2 if visitor.inames else 1 - if rank == 2: + if accterm.is_jacobian: # TODO the next line should get its inames from # elsewhere. This is *NOT* robust (but works right now) - ansatz_lfs.index = flatten_index(tuple(prim.Variable(visitor.inames[i]) + ansatz_lfs.index = flatten_index(tuple(prim.Variable(jacobian_inames[i]) for i in range(world_dimension())), (basis_functions_per_direction(),) * dim, order="f" @@ -210,6 +250,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): ) if not get_option("fastdg"): + rank = 2 if accterm.is_jacobian else 1 expr = prim.Call(PDELabAccumulationFunction(accum, rank), (test_lfs.get_args() + ansatz_lfs.get_args() + @@ -218,27 +259,32 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): ) instruction(assignees=(), expression=expr, - forced_iname_deps=frozenset(inames + visitor.inames + vecinames), + forced_iname_deps=frozenset(inames + vecinames + jacobian_inames), forced_iname_deps_is_final=True, depends_on=insn_dep, ) # Mark the transformation that moves the quadrature loop # inside the trialfunction loops for application - transform(nest_quadrature_loops, visitor.inames) + if accterm.is_jacobian: + transform(nest_quadrature_loops, jacobian_inames) return insn_dep # Extract the restrictions on argument-1: jac_restrictions = frozenset(tuple(ma.restriction for ma in - extract_modified_arguments(accterm.term, argnumber=1, do_index=True))) + extract_modified_arguments(accterm.term, + argnumber=1, + do_index=True))) if not jac_restrictions: jac_restrictions = frozenset({0}) insn_dep = None for restriction in jac_restrictions: if accterm.new_indices: - for i in range(world_dimension()): - insn_dep = emit_sumfact_kernel(i, restriction, insn_dep) + shape = (world_dimension(),) * len(accterm.new_indices) + # Iterate over all combinations of indices for this shape + for indices in itertools.product(*map(range, shape)): + insn_dep = emit_sumfact_kernel(indices, restriction, insn_dep) else: insn_dep = emit_sumfact_kernel(None, restriction, insn_dep) diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py index 49e154ec0143a599d2b8977aeb8ddf614b9a99d4..445a52d989f4db4f12db1daa8060ea52e823b21b 100644 --- a/python/dune/perftool/sumfact/basis.py +++ b/python/dune/perftool/sumfact/basis.py @@ -3,6 +3,7 @@ NB: Basis evaluation is only needed for the trial function argument in jacobians, as the multiplication with the test function is part of the sum factorization kernel. """ +import itertools from dune.perftool.generation import (backend, domain, @@ -50,11 +51,26 @@ def name_sumfact_base_buffer(): return name +def _basis_functions_per_direction(element, component): + """Number of basis functions per direction of a given component of an element""" + assert len(component.indices()) <= 1 + if len(component.indices()) == 0: + degree = element.degree() + else: + index = component.indices()[0]._value + degree = element.sub_elements()[index].degree() + basis_size = degree + 1 + return basis_size + + @kernel_cached -def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, visitor): +def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, visitor_indices): rawname = "gradu" + "_".join(str(c) for c in component) name = restricted_name(rawname, restriction) + # Number of basis functions per direction + basis_size = _basis_functions_per_direction(element, component) + # Get a temporary for the gradient from ufl.functionview import select_subelement sub_element = select_subelement(element, component) @@ -67,20 +83,30 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v # if the positioning within a SIMD vectors coincides with the index! direct_indexing_is_possible = True - expressions = [] + expressions = {} insn_dep = frozenset() - for i in range(world_dimension()): + for indices in itertools.product(*map(range, shape)): + # Do not consider derivatives of rank > 2 + assert len(indices) <= 2 + # Construct the matrix sequence for this sum factorization - matrix_sequence = construct_basis_matrix_sequence(derivative=i, + matrix_sequence = construct_basis_matrix_sequence(derivative=indices[-1], facedir=get_facedir(restriction), facemod=get_facemod(restriction), + basis_size=basis_size, ) + # Index needed to acces the right coefficient container for vector valued functions + coeff_func_index = None + if len(indices) == 2: + coeff_func_index = indices[0] + # The sum factorization kernel object gathering all relevant information sf = SumfactKernel(matrix_sequence=matrix_sequence, restriction=restriction, - preferred_position=i, + preferred_position=indices[-1], coeff_func=coeff_func, + coeff_func_index=coeff_func_index, element=element, component=component, ) @@ -88,7 +114,7 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v from dune.perftool.sumfact.vectorization import attach_vectorization_info vsf = attach_vectorization_info(sf) - if i != vsf.vec_index(sf): + if indices[-1] != vsf.vec_index(sf): direct_indexing_is_possible = False # Add a sum factorization kernel that implements the @@ -97,33 +123,37 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v from dune.perftool.sumfact.realization import realize_sum_factorization_kernel var, insn_dep = realize_sum_factorization_kernel(vsf.copy(insn_dep=vsf.insn_dep.union(insn_dep))) - expressions.append(prim.Subscript(var, vsf.quadrature_index(sf))) + expressions.update({indices: prim.Subscript(var, vsf.quadrature_index(sf))}) # Check whether we want to return early with something that has the indexing # already handled! This happens with vectorization when the index coincides # with the position in the vector register. if direct_indexing_is_possible: - assert len(visitor.indices) == 1 - return maybe_wrap_subscript(var, vsf.quadrature_index(sf, visitor.indices)), None + assert len(visitor_indices) == 1 + return maybe_wrap_subscript(var, vsf.quadrature_index(sf, visitor_indices)), None # TODO this should be quite conditional!!! - for i, expr in enumerate(expressions): + for indices in itertools.product(*map(range, shape)): # Write solution from sumfactorization to gradient variable - assignee = prim.Subscript(prim.Variable(name), i) + assignee = prim.Subscript(prim.Variable(name), indices) instruction(assignee=assignee, - expression=expr, + expression=expressions[indices], forced_iname_deps=frozenset(get_backend("quad_inames")()), forced_iname_deps_is_final=True, ) - return prim.Variable(name), visitor.indices + return prim.Variable(name), visitor_indices @kernel_cached -def pymbolic_coefficient(element, restriction, component, coeff_func, visitor): +def pymbolic_coefficient(element, restriction, component, coeff_func, visitor_indices): + # Basis functions per direction + basis_size = _basis_functions_per_direction(element, component) + # Construct the matrix sequence for this sum factorization matrix_sequence = construct_basis_matrix_sequence(facedir=get_facedir(restriction), - facemod=get_facemod(restriction),) + facemod=get_facemod(restriction), + basis_size=basis_size) sf = SumfactKernel(matrix_sequence=matrix_sequence, restriction=restriction, @@ -142,7 +172,7 @@ def pymbolic_coefficient(element, restriction, component, coeff_func, visitor): return prim.Subscript(var, vsf.quadrature_index(sf) - ), visitor.indices + ), visitor_indices @iname @@ -175,8 +205,12 @@ def evaluate_basis(element, name, restriction): # Add the missing direction on facedirs by evaluating at either 0 or 1 if facedir is not None: + # TODO: Does not work for systems! + from tabulation import polynomial_degree + degree = polynomial_degree() + facemod = get_facemod(restriction) - prod = prod + (prim.Call(PolynomialLookup(name_polynomials(), False), + prod = prod + (prim.Call(PolynomialLookup(name_polynomials(degree), False), (prim.Variable(inames[facedir]), facemod)),) # Issue the product @@ -222,8 +256,12 @@ def evaluate_reference_gradient(element, name, restriction): if i != facedir: prod.append(BasisTabulationMatrix(derivative=d == i).pymbolic((prim.Variable(quadinamemapping[i]), prim.Variable(inames[i])))) if facedir is not None: + # TODO: Does not work for systems! + from tabulation import polynomial_degree + degree = polynomial_degree() + facemod = get_facemod(restriction) - prod.append(prim.Call(PolynomialLookup(name_polynomials(), facedir == d), + prod.append(prim.Call(PolynomialLookup(name_polynomials(degree), facedir == d), (prim.Variable(inames[facedir]), facemod)),) assignee = prim.Subscript(prim.Variable(name), (d,)) diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py index 89c5791e0edb1d08eb51bff020a910d2b4f7c8ac..95e8961a32d5d7e456a51ccf32d9abd6d6b11adb 100644 --- a/python/dune/perftool/sumfact/realization.py +++ b/python/dune/perftool/sumfact/realization.py @@ -2,6 +2,8 @@ The code that triggers the creation of the necessary code constructs to realize a sum factorization kernel """ +from ufl.functionview import select_subelement +from ufl import VectorElement, TensorElement from dune.perftool.generation import (barrier, dump_accumulate_timer, @@ -17,8 +19,9 @@ from dune.perftool.loopy.buffer import (get_buffer_temporary, switch_base_storage, ) from dune.perftool.pdelab.argument import pymbolic_coefficient +from dune.perftool.pdelab.basis import shape_as_pymbolic from dune.perftool.pdelab.geometry import world_dimension -from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound +from dune.perftool.pdelab.spaces import name_lfs, name_lfs_bound, lfs_child, name_leaf_lfs from dune.perftool.options import get_option from dune.perftool.pdelab.signatures import assembler_routine_name from dune.perftool.sumfact.permutation import (sumfact_permutation_strategy, @@ -71,9 +74,31 @@ def _realize_sum_factorization_kernel(sf): def _write_input(inputsf, index=0): # Write initial coefficients into buffer lfs = name_lfs(inputsf.element, inputsf.restriction, inputsf.component) + + sub_element = select_subelement(inputsf.element, inputsf.component) + shape = sub_element.value_shape() + (inputsf.element.cell().geometric_dimension(),) + + if isinstance(sub_element, (VectorElement, TensorElement)): + # Could be 0 but shouldn't be None + assert inputsf.coeff_func_index is not None + + lfs_pym = lfs_child(lfs, + (inputsf.coeff_func_index,), + shape=shape_as_pymbolic(shape[:-1]), + symmetry=inputsf.element.symmetry()) + + leaf_element = sub_element + if isinstance(sub_element, (VectorElement, TensorElement)): + leaf_element = sub_element.sub_elements()[0] + + lfs = name_leaf_lfs(leaf_element, inputsf.restriction) + basisiname = sumfact_iname(name_lfs_bound(lfs), "basis") container = inputsf.coeff_func(inputsf.restriction) - coeff = pymbolic_coefficient(container, lfs, basisiname) + if isinstance(sub_element, (VectorElement, TensorElement)): + coeff = pymbolic_coefficient(container, lfs_pym, basisiname) + else: + coeff = pymbolic_coefficient(container, lfs, basisiname) assignee = prim.Subscript(prim.Variable(input_setup), (prim.Variable(basisiname),) + (index,)) instruction(assignee=assignee, expression=coeff, diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 0cbb6b814b13b3f60bacc065ea539ed0a8a37af5..17b5d8e4e46e8ecd4b3350a2eaed3609a583603a 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -29,6 +29,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): input=None, insn_dep=frozenset(), coeff_func=None, + coeff_func_index=None, element=None, component=None, accumvar=None, @@ -97,6 +98,8 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): padding: a set of slots in the input temporary to be padded index: The slot in the input temporary dedicated to this kernel coeff_func: The function to call to get the input coefficient + coeff_func_index: Index to get the right input coefficient + (needed for systems of PDEs) element: The UFL element component: The treepath to the correct component of above element accumvar: The accumulation variable to accumulate into @@ -159,7 +162,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): work on the same input coefficient (and are suitable for simultaneous treatment because of that) """ - return (self.restriction, self.stage, self.coeff_func, self.element, self.component, self.accumvar) + return (self.restriction, self.stage, self.coeff_func, self.coeff_func_index, self.element, self.component, self.accumvar) # # Some convenience methods to extract information about the sum factorization kernel @@ -354,7 +357,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) work on the same input coefficient (and are suitable for simultaneous treatment because of that) """ - return (self.restriction, self.stage, self.coeff_func, self.element, self.component, self.accumvar) + return (self.restriction, self.stage, self.coeff_func, self.coeff_func_index, self.element, self.component, self.accumvar) # # Deduce all data fields of normal sum factorization kernels from the underlying kernels @@ -384,6 +387,11 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) assert len(set(k.coeff_func for k in self.kernels)) == 1 return self.kernels[0].coeff_func + @property + def coeff_func_index(self): + assert len(set(k.coeff_func_index for k in self.kernels)) == 1 + return self.kernels[0].coeff_func_index + @property def element(self): assert len(set(k.element for k in self.kernels)) == 1 diff --git a/python/dune/perftool/sumfact/tabulation.py b/python/dune/perftool/sumfact/tabulation.py index 466690a4a9fa3fc414ba790c93a098e398abfad5..f3b575de221a780e4ed4dadfb27a576ee7130d03 100644 --- a/python/dune/perftool/sumfact/tabulation.py +++ b/python/dune/perftool/sumfact/tabulation.py @@ -86,13 +86,13 @@ class BasisTabulationMatrix(BasisTabulationMatrixBase, ImmutableRecord): return self.basis_size def pymbolic(self, indices): - name = "{}{}Theta{}{}_qp{}_dof_{}".format("face{}_".format(self.face) if self.face is not None else "", - "d" if self.derivative else "", - "T" if self.transpose else "", - "_slice{}".format(self.slice_index) if self.slice_size is not None else "", - self.quadrature_size, - self.basis_size, - ) + name = "{}{}Theta{}{}_qp{}_dof{}".format("face{}_".format(self.face) if self.face is not None else "", + "d" if self.derivative else "", + "T" if self.transpose else "", + "_slice{}".format(self.slice_index) if self.slice_size is not None else "", + self.quadrature_size, + self.basis_size, + ) define_theta(name, self) return prim.Subscript(prim.Variable(name), indices) @@ -162,7 +162,7 @@ class BasisTabulationMatrixArray(BasisTabulationMatrixBase): def pymbolic(self, indices): assert len(indices) == 3 - # Check whether we can realize this by broadcasting the values of a simple tabulation + # Check whether we can realize this by broadcasting the values of a simple tabulation if len(set(self.tabs)) == 1: theta = self.tabs[0].pymbolic(indices[:-1]) return prim.Call(ExplicitVCLCast(np.float64, vector_width=len(self.tabs)), (theta,)) @@ -254,10 +254,9 @@ def name_oned_quadrature_points(): @class_member(classtag="operator") -def typedef_polynomials(name): +def typedef_polynomials(name, degree): range_field = lop_template_range_field() domain_field = name_domain_field() - degree = polynomial_degree() include_file("dune/pdelab/finiteelementmap/qkdg.hh", filetag="operatorfile") @@ -272,21 +271,21 @@ def typedef_polynomials(name): degree) -def type_polynomials(): - name = "Polynomials1D" - typedef_polynomials(name) +def type_polynomials(degree): + name = "Polynomials1D_Degree{}".format(degree) + typedef_polynomials(name, degree) return name @class_member(classtag="operator") -def define_polynomials(name): - polynomials_type = type_polynomials() +def define_polynomials(name, degree): + polynomials_type = type_polynomials(degree) return "{} {};".format(polynomials_type, name) -def name_polynomials(): - name = "poly" - define_polynomials(name) +def name_polynomials(degree): + name = "poly_degree{}".format(degree) + define_polynomials(name, degree) return name @@ -330,7 +329,8 @@ def define_theta(name, tabmat, additional_indices=(), width=None): qp = name_oned_quadrature_points() qw = name_oned_quadrature_weights() sort_quadrature_points_weights(qp, qw) - polynomials = name_polynomials() + degree = tabmat.basis_size-1 + polynomials = name_polynomials(degree) shape = (tabmat.rows, tabmat.cols) dim_tags = "f,f" @@ -367,13 +367,14 @@ def define_theta(name, tabmat, additional_indices=(), width=None): ) -def construct_basis_matrix_sequence(transpose=False, derivative=None, facedir=None, facemod=None): +def construct_basis_matrix_sequence(transpose=False, derivative=None, facedir=None, facemod=None, basis_size=None): dim = world_dimension() result = [None] * dim for i in range(dim): quadrature_size = quadrature_points_per_direction() - basis_size = basis_functions_per_direction() + if basis_size is None: + basis_size = basis_functions_per_direction() onface = None if facedir == i: diff --git a/python/dune/perftool/ufl/extract_accumulation_terms.py b/python/dune/perftool/ufl/extract_accumulation_terms.py index b9f976a13dc0cfdd60923b4c611d7671ee185476..fb6e496b0ba2cb0e4fb739207d3a212d30f64865 100644 --- a/python/dune/perftool/ufl/extract_accumulation_terms.py +++ b/python/dune/perftool/ufl/extract_accumulation_terms.py @@ -31,10 +31,21 @@ class AccumulationTerm(Record): new_indices=None ): assert (isinstance(argument, ModifiedArgument) or argument is None) + + # Test if this expression belongs to a jacobian method + all_jacobian_args = extract_modified_arguments(term, + argnumber=1, + do_index=False, + do_gradient=False) + is_jacobian = False + if all_jacobian_args: + is_jacobian = True + Record.__init__(self, term=term, argument=argument, - new_indices=new_indices + new_indices=new_indices, + is_jacobian=is_jacobian ) def indexed_test_arg(self): diff --git a/test/sumfact/stokes/stokes.mini b/test/sumfact/stokes/stokes.mini index cb38db6aef434a385c25e6c7a9ea7eb97c4e6da5..5c1ccf5ea9e7a1b3570631af82095c66fb03b9fd 100644 --- a/test/sumfact/stokes/stokes.mini +++ b/test/sumfact/stokes/stokes.mini @@ -13,5 +13,8 @@ extension = vtu [formcompiler] numerical_jacobian = 1, 0 | expand num exact_solution_expression = g -compare_l2errorsquared = 1e-10 +compare_l2errorsquared = 1e-12 sumfact = 1 + +# Disable symdiff test for now +{__exec_suffix} == symdiff | exclude