diff --git a/python/dune/perftool/loopy/transformations/collect_rotate.py b/python/dune/perftool/loopy/transformations/collect_rotate.py index 167de058f85b773978d4d6cef101101df72bf840..0ecf0134581909c47bcd8f159bb577d2763ffe29 100644 --- a/python/dune/perftool/loopy/transformations/collect_rotate.py +++ b/python/dune/perftool/loopy/transformations/collect_rotate.py @@ -241,7 +241,7 @@ def collect_vector_data_rotate(knl): tags = set(get_pymbolic_tag(expr) for expr in quantity_exprs) if tags and tags.pop() == "operator_precomputed": expr, = quantity_exprs - shape=(ceildiv(product(s for s in arg.shape), vec_size), vec_size) + shape = (ceildiv(product(s for s in arg.shape), vec_size), vec_size) name = loopy_class_member(quantity, shape=shape, dim_tags="f,vec", diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 4e7227e12b1925ee59588023685471460046c3e6..d27275516494af74799a6397b89a6f9dc9ec770e 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -281,7 +281,7 @@ def boundary_predicates(expr, measure, subdomain_id, visitor): from ufl.classes import Expr if isinstance(subdomain_data, Expr): - cond = visitor(subdomain_data) + cond = visitor(subdomain_data, do_predicates=True) else: # Determine the name of the parameter function cond = get_global_context_value("data").object_names[id(subdomain_data)] diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py index 36d18d54abc1d6bc3c5fa9b19f7cce2142662c29..beb96fd9e6e7679920a96c006ea7bfe5e6737e7b 100644 --- a/python/dune/perftool/sumfact/__init__.py +++ b/python/dune/perftool/sumfact/__init__.py @@ -59,6 +59,6 @@ class SumFactInterface(PDELabInterface): def pymbolic_spatial_coordinate(self): import dune.perftool.sumfact.geometry - ret, indices = get_backend(interface="spatial_coordinate", selector=option_switch("diagonal_transformation_matrix"))(self.visitor.indices) + ret, indices = get_backend(interface="spatial_coordinate", selector=option_switch("diagonal_transformation_matrix"))(self.visitor.indices, self.visitor.do_predicates) self.visitor.indices = indices return ret diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py index afced02f64c4a0a772d94c983c1941f8189777f1..db18095b209d5f4a3d42bf8cf38b92536bc4b819 100644 --- a/python/dune/perftool/sumfact/accumulation.py +++ b/python/dune/perftool/sumfact/accumulation.py @@ -77,6 +77,11 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): # When doing sum factorization we want to split the test function assert(accterm.argument.expr is not None) + # For vector valued functions the first index is the dimension index + coeff_func_index = None + if isinstance(accterm.argument.expr, uc.Indexed): + coeff_func_index = accterm.argument.expr.ufl_operands[1].indices()[0]._value + # Do the tree traversal to get a pymbolic expression representing this expression pymbolic_expr = visitor(accterm.term) if pymbolic_expr == 0: @@ -84,30 +89,37 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): dim = world_dimension() mod_arg_expr = accterm.argument.expr - from ufl.classes import FunctionView, Argument - while (not isinstance(mod_arg_expr, FunctionView)) and (not isinstance(mod_arg_expr, Argument)): + while (not isinstance(mod_arg_expr, uc.FunctionView)) and (not isinstance(mod_arg_expr, uc.Argument)): mod_arg_expr = mod_arg_expr.ufl_operands[0] + degree = mod_arg_expr.ufl_element()._degree basis_size = degree + 1 - # Extract index information + # The gradient index is the last grad_index = None if accterm.argument.reference_grad: - grad_index = accterm.argument.expr.ufl_operands[1][0]._value + grad_index = accterm.argument.expr.ufl_operands[1][-1]._value + # Could be 0 so compare to None accum_index = None - if visitor.indices: - accum_index = visitor.indices[0] + if coeff_func_index is not None: + accum_index = coeff_func_index if accterm.argument.index and len(accterm.argument.index) > 1: - accum_index = accterm.argument.index[1]._value + accum_index = accterm.argument.index[0]._value + if isinstance(accum_index, int): + accum_index = (accum_index,) jacobian_inames = tuple() if accterm.is_jacobian: jacobian_inames = visitor.inames + # Only accumulate boundary conditions on parts where it is defined + from dune.perftool.pdelab.localoperator import boundary_predicates + predicates = boundary_predicates(accterm.term, measure, subdomain_id, visitor) + def emit_sumfact_kernel(restriction, insn_dep): - test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure, idims=(accum_index,)) - ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure, idims=(accum_index,)) + test_lfs = determine_accumulation_space(accterm.argument.expr, 0, measure, idims=accum_index) + ansatz_lfs = determine_accumulation_space(accterm.term, 1, measure, idims=accum_index) accum = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction()) # Construct the matrix sequence for this sum factorization @@ -126,6 +138,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): accumvar=accum, within_inames=jacobian_inames, input=AlreadyAssembledInput(index=accum_index), + coeff_func_index=coeff_func_index, ) from dune.perftool.sumfact.vectorization import attach_vectorization_info @@ -152,7 +165,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): expression=0, forced_iname_deps=frozenset(quadrature_inames() + jacobian_inames), forced_iname_deps_is_final=True, - tags=frozenset(["quadvec", "gradvec"]) + tags=frozenset(["quadvec", "gradvec"]), ) # Write timing stuff for jacobian (for alpha methods it is done at the end of stage 1) @@ -180,7 +193,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): forced_iname_deps=frozenset(quadrature_inames() + jacobian_inames), forced_iname_deps_is_final=True, tags=frozenset({"quadvec"}).union(vectag), - depends_on=frozenset({deps}).union(timer_dep).union(frozenset({lp.match.Tagged("sumfact_stage1")})) + depends_on=frozenset({deps}).union(timer_dep).union(frozenset({lp.match.Tagged("sumfact_stage1")})), ) if insn_dep is None: @@ -241,6 +254,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): forced_iname_deps=frozenset(inames + vecinames + jacobian_inames), forced_iname_deps_is_final=True, depends_on=insn_dep, + predicates=predicates ) # Mark the transformation that moves the quadrature loop diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py index f50c478a804bc22911593d7c235f682d5b5c22a4..4b3eba3928efe40cb1d4fd335bde7f578cbdef24 100644 --- a/python/dune/perftool/sumfact/basis.py +++ b/python/dune/perftool/sumfact/basis.py @@ -150,7 +150,7 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v coeff_func_index = None grad_index, = visitor_indices else: - grad_index, coeff_func_index = visitor_indices + coeff_func_index, grad_index = visitor_indices # Construct the matrix sequence for this sum factorization matrix_sequence = construct_basis_matrix_sequence(derivative=grad_index, @@ -183,7 +183,6 @@ def pymbolic_coefficient_gradient(element, restriction, component, coeff_func, v @kernel_cached def pymbolic_coefficient(element, restriction, component, coeff_func, visitor_indices): - assert visitor_indices is None # Basis functions per direction basis_size = _basis_functions_per_direction(element, component) @@ -192,7 +191,13 @@ def pymbolic_coefficient(element, restriction, component, coeff_func, visitor_in facemod=get_facemod(restriction), basis_size=basis_size) + coeff_func_index = None + if visitor_indices: + assert len(visitor_indices) == 1 + coeff_func_index = visitor_indices[0] + inp = LFSSumfactKernelInput(coeff_func=coeff_func, + coeff_func_index=coeff_func_index, element=element, component=component, restriction=restriction, diff --git a/python/dune/perftool/sumfact/geometry.py b/python/dune/perftool/sumfact/geometry.py index fce0ec9b8b403ed2b5439f70b418421823ff1804..c991aeaa8113f218242c6d07e9f0b41479ce5acc 100644 --- a/python/dune/perftool/sumfact/geometry.py +++ b/python/dune/perftool/sumfact/geometry.py @@ -65,9 +65,10 @@ class GeoCornersInput(SumfactKernelInputBase, ImmutableRecord): tags=frozenset({"sumfact_stage{}".format(sf.stage)}), ) + @kernel_cached @backend(interface="spatial_coordinate", name="default") -def pymbolic_spatial_coordinate_multilinear(visitor_indices): +def pymbolic_spatial_coordinate_multilinear(visitor_indices, do_predicates): assert len(visitor_indices) == 1 # Construct the matrix sequence for the evaluation of the global coordinate. @@ -126,7 +127,7 @@ def name_meshwidth(): @kernel_cached @backend(interface="spatial_coordinate", name="diagonal_transformation_matrix") -def pymbolic_spatial_coordinate_axiparallel(visitor_indices): +def pymbolic_spatial_coordinate_axiparallel(visitor_indices, do_predicates): assert len(visitor_indices) == 1 index, = visitor_indices @@ -142,7 +143,13 @@ def pymbolic_spatial_coordinate_axiparallel(visitor_indices): lowcorner = name_lowerleft_corner() meshwidth = name_meshwidth() - if index == face: + # If we have to decide which boundary condition to take for this + # intersection we always take the boundary condition of the center + # of the intersection. We assume that there are no cells with more + # than one boundary condition. + if do_predicates: + x = 0.5 + elif index == face: x = 0 else: iindex = index diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 9a3011cba1b3223e5af7b081a89cecd9d6326549..44ef5ecb5e2cccc3e8fde593f10ac66abc3448f5 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -38,6 +38,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): insn_dep=frozenset(), input=None, accumvar=None, + coeff_func_index=None, ): """Create a sum factorization kernel @@ -91,23 +92,18 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): pre-initialized with the input or you have to provide direct_input (FastDGGridOperator). stage: 1 or 3 - insn_dep: An instruction ID that the first issued instruction - should depend upon. All following ones will depend on each - other. - within_inames: Instructions will be executed within those - inames (needed for stage 3 in jacobians). preferred_position: Will be used in the dry run to order kernels when doing vectorization e.g. (dx u,dy u,dz u, u). restriction: Restriction for faces values. - padding: a set of slots in the input temporary to be padded - index: The slot in the input temporary dedicated to this kernel - coeff_func: The function to call to get the input coefficient + within_inames: Instructions will be executed within those + inames (needed for stage 3 in jacobians). + insn_dep: An instruction ID that the first issued instruction + should depend upon. All following ones will depend on each + other. + input: An SumfactKernelInputBase instance describing the input of the kernel + accumvar: The accumulation variable to accumulate into coeff_func_index: Index to get the right input coefficient (needed for systems of PDEs) - element: The UFL element - component: The treepath to the correct component of above element - accumvar: The accumulation variable to accumulate into - input: An SumfactKernelInputBase instance describing the input of the kernel """ # Assert the inputs! assert isinstance(matrix_sequence, tuple) @@ -162,7 +158,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): Any two sum factorization kernels having the same cache_key are realized simulatenously! """ - return (self.matrix_sequence, self.restriction, self.stage, self.buffer) + return (self.matrix_sequence, self.restriction, self.stage, self.buffer, self.coeff_func_index) @property def input_key(self): diff --git a/python/dune/perftool/ufl/modified_terminals.py b/python/dune/perftool/ufl/modified_terminals.py index 322e31991421689fcaf0d6e03d4b462d997ca4a1..f9d2ef7d5d77600c5312dba0959e51267861ea1c 100644 --- a/python/dune/perftool/ufl/modified_terminals.py +++ b/python/dune/perftool/ufl/modified_terminals.py @@ -42,6 +42,7 @@ class ModifiedArgument(Record): else: return self.index + class ModifiedTerminalTracker(MultiFunction): """ A multifunction base that defines handler for grad, reference_grad, positive_restricted and negative_restricted. diff --git a/python/dune/perftool/ufl/visitor.py b/python/dune/perftool/ufl/visitor.py index 9ec9afd1bc65990126f9c225335d975a305733c5..c95b753715392a1d7ea1a587678af474cb08ba6b 100644 --- a/python/dune/perftool/ufl/visitor.py +++ b/python/dune/perftool/ufl/visitor.py @@ -44,12 +44,12 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): # Call base class constructors super(UFL2LoopyVisitor, self).__init__() - def __call__(self, o): + def __call__(self, o, do_predicates=False): # Reset some state variables that are reinitialized for each accumulation term self.indices = None self._indices_backup = [] self.inames = () - + self.do_predicates = do_predicates return self.call(o) call = MultiFunction.__call__ diff --git a/test/stokes/stokes_dg_quadrilateral.ufl b/test/stokes/stokes_dg_quadrilateral.ufl index e3c63e85a3ace9ae58545f9839147e752429ce7d..0995ec6ece110d49fc6ca99d0c05ab79e8143229 100644 --- a/test/stokes/stokes_dg_quadrilateral.ufl +++ b/test/stokes/stokes_dg_quadrilateral.ufl @@ -20,18 +20,18 @@ eps = -1.0 sigma = 1.0 r = inner(grad(u), grad(v))*dx \ + - p*div(v)*dx \ + - q*div(u)*dx \ + inner(avg(grad(u))*n, jump(v))*dS \ - - eps * inner(avg(grad(v))*n, jump(u))*dS \ - - inner(grad(u)*n, v)*ds \ - + eps * inner(grad(v)*n, u-g_v)*ds \ + sigma * inner(jump(u), jump(v))*dS \ - + sigma * inner(u-g_v, v)*ds \ - - p*div(v)*dx \ + - eps * inner(avg(grad(v))*n, jump(u))*dS \ - avg(p)*inner(jump(v), n)*dS \ - + p*inner(v, n)*ds \ - - q*div(u)*dx \ - avg(q)*inner(jump(u), n)*dS \ + - inner(grad(u)*n, v)*ds \ + + p*inner(v, n)*ds \ + q*inner(u, n)*ds \ + + eps * inner(grad(v)*n, u-g_v)*ds \ + + sigma * inner(u-g_v, v)*ds \ - q*inner(g_v, n)*ds forms = [r] diff --git a/test/sumfact/stokes/stokes_dg.ufl b/test/sumfact/stokes/stokes_dg.ufl index 9d498628e252d340f0386a14ed8c64353ce72998..0995ec6ece110d49fc6ca99d0c05ab79e8143229 100644 --- a/test/sumfact/stokes/stokes_dg.ufl +++ b/test/sumfact/stokes/stokes_dg.ufl @@ -19,39 +19,19 @@ n = FacetNormal(cell)('+') eps = -1.0 sigma = 1.0 -# r = inner(grad(u), grad(v))*dx \ -# + inner(avg(grad(u))*n, jump(v))*dS \ -# - eps * inner(avg(grad(v))*n, jump(u))*dS \ -# - inner(grad(u)*n, v)*ds \ -# + eps * inner(grad(v)*n, u-g_v)*ds \ -# + sigma * inner(jump(u), jump(v))*dS \ -# + sigma * inner(u-g_v, v)*ds \ -# - p*div(v)*dx \ -# - avg(p)*inner(jump(v), n)*dS \ -# + p*inner(v, n)*ds \ -# - q*div(u)*dx \ -# - avg(q)*inner(jump(u), n)*dS \ -# + q*inner(u, n)*ds \ -# - q*inner(g_v, n)*ds - -# r = inner(grad(u), grad(v))*dx \ -# + eps * inner(grad(v)*n, u-g_v)*ds \ -# - p*div(v)*dx \ -# - q*div(u)*dx \ -# - inner(grad(u)*n, v)*ds \ -# + sigma * inner(u-g_v, v)*ds \ -# - avg(q)*inner(jump(u), n)*dS \ -# - eps * inner(avg(grad(v))*n, jump(u))*dS \ -# + q*inner(u, n)*ds \ -# - q*inner(g_v, n)*ds -# # + p*inner(v, n)*ds \ -# # + inner(avg(grad(u))*n, jump(v))*dS \ -# # + sigma * inner(jump(u), jump(v))*dS \ -# # - avg(p)*inner(jump(v), n)*dS \ -# # + p*inner(v, n)*ds \ - -r = q*inner(u, n)*ds \ - + p*inner(v, n)*ds - +r = inner(grad(u), grad(v))*dx \ + - p*div(v)*dx \ + - q*div(u)*dx \ + + inner(avg(grad(u))*n, jump(v))*dS \ + + sigma * inner(jump(u), jump(v))*dS \ + - eps * inner(avg(grad(v))*n, jump(u))*dS \ + - avg(p)*inner(jump(v), n)*dS \ + - avg(q)*inner(jump(u), n)*dS \ + - inner(grad(u)*n, v)*ds \ + + p*inner(v, n)*ds \ + + q*inner(u, n)*ds \ + + eps * inner(grad(v)*n, u-g_v)*ds \ + + sigma * inner(u-g_v, v)*ds \ + - q*inner(g_v, n)*ds forms = [r]