diff --git a/python/dune/perftool/loopy/transformations/vectorize_quad.py b/python/dune/perftool/loopy/transformations/vectorize_quad.py index f82f482929a5eaa8f742bbe388c8eeff5bce4ceb..f1b3ce12100731e87eff358a0cc3bcef49db6beb 100644 --- a/python/dune/perftool/loopy/transformations/vectorize_quad.py +++ b/python/dune/perftool/loopy/transformations/vectorize_quad.py @@ -197,6 +197,11 @@ def _vectorize_quadrature_loop(knl, inames, suffix): # 1. Rotating the input data knl = add_vector_view(knl, quantity) if horizontal > 1: + # Pitfall: In the case of generating jacobians, the input needs to be rotated exactly once. + predicates = frozenset() + if common_inames: + predicates = frozenset({prim.Comparison(prim.Sum(tuple(prim.Variable(i) for i in common_inames)), "==", 0)}) + new_insns.append(lp.CallInstruction((), # assignees prim.Call(TransposeReg(vertical=vertical, horizontal=horizontal), tuple(prim.Subscript(prim.Variable(get_vector_view_name(quantity)), @@ -207,6 +212,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): within_inames_is_final=True, id="{}_rotate{}".format(quantity, suffix), tags=frozenset({"sumfact_stage2"}), + predicates=predicates, )) # Add substitution rules @@ -263,7 +269,7 @@ def _vectorize_quadrature_loop(knl, inames, suffix): (vector_indices.get(horizontal) + last_index, prim.Variable(vec_iname)), ), substitute(insn.expression, replacemap), - depends_on=frozenset({"continue_stmt{}".format(suffix)}), + depends_on=frozenset({"continue_stmt{}".format(suffix), lp.match.Tagged("sumfact_stage1")}), depends_on_is_final=True, within_inames=common_inames.union(frozenset({outer_iname, vec_iname})), within_inames_is_final=True, diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 3fb213c87e98883e4f474c4145090b40aff1eec4..4daeb45a7627f7bda1d83c178e83a20a5362607c 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -281,7 +281,7 @@ def determine_accumulation_space(info, number): def boundary_predicates(expr, measure, subdomain_id): - predicates = frozenset([]) + predicates = [] if subdomain_id not in ['everywhere', 'otherwise']: # Get the original form and inspect the present measures @@ -301,9 +301,18 @@ def boundary_predicates(expr, measure, subdomain_id): visitor = get_visitor(measure, subdomain_id) subdomain_data = visitor(subdomain_data, do_predicates=True) - predicates = predicates.union([prim.Comparison(subdomain_data, '==', subdomain_id)]) + p = prim.Comparison(subdomain_data, '==', subdomain_id) - return predicates + # Try to find conditions that are always 0 or always 1 + from pymbolic.mapper.evaluator import evaluate + try: + eval = evaluate(p) + if not eval: + predicates.append(False) + except: + predicates.append(p) + + return frozenset(predicates) class PDELabAccumulationInfo(ImmutableRecord): @@ -392,6 +401,8 @@ def generate_accumulation_instruction(expr, visitor): accumvar = name_accumulation_variable(test_lfs.get_restriction() + ansatz_lfs.get_restriction()) predicates = boundary_predicates(expr, visitor.measure, visitor.subdomain_id) + if False in predicates: + return rank = 1 if ansatz_lfs.lfs is None else 2 @@ -755,14 +766,17 @@ def generate_residual_kernels(form, original_form): for measure in set(i.integral_type() for i in form.integrals()): logger.info("generate_residual_kernels: measure {}".format(measure)) with global_context(integral_type=measure): - enum_pattern() - pattern_baseclass() - enum_alpha() - from dune.perftool.pdelab.signatures import assembler_routine_name with global_context(kernel=assembler_routine_name()): kernel = [k for k in get_backend(interface="generate_kernels_per_integral")(form.integrals_by_type(measure))] + # The integrals might vanish due to unfulfillable boundary conditions. + # We only generate the local operator enums/base classes if they did not. + if kernel: + enum_pattern() + pattern_baseclass() + enum_alpha() + # Maybe add numerical differentiation if get_form_option("numerical_jacobian"): # Include headers for numerical methods @@ -897,10 +911,6 @@ def generate_control_kernels(forms): for measure in set(i.integral_type() for form in forms for i in form.integrals()): logger.info("generate_control_kernels: measure {}".format(measure)) with global_context(integral_type=measure): - enum_pattern() - pattern_baseclass() - enum_alpha() - from dune.perftool.pdelab.signatures import assembler_routine_name with global_context(kernel=assembler_routine_name()): # TODO: Sumfactorization not yet implemented @@ -910,6 +920,13 @@ def generate_control_kernels(forms): forms_measure = [form.integrals_by_type(measure) for form in forms] kernel = [k for k in control_generate_kernels_per_integral(forms_measure)] + # The integrals might vanish due to unfulfillable boundary conditions. + # We only generate the local operator enums/base classes if they did not. + if kernel: + enum_pattern() + pattern_baseclass() + enum_alpha() + operator_kernels[(measure, 'residual')] = kernel return operator_kernels diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py index dff7cfccd8a0282a2b0cf87e8c4a0ea3c709a010..c943d80c8597522a97438ceb3dcffa2991553cb7 100644 --- a/python/dune/perftool/sumfact/accumulation.py +++ b/python/dune/perftool/sumfact/accumulation.py @@ -381,9 +381,6 @@ def generate_accumulation_instruction(expr, visitor): test_info = visitor.test_info trial_info = visitor.trial_info - # Cache all stage 1 sum factorization kernels used in this expression - SumfactCollectMapper()(expr) - # Number of basis functions per direction leaf_element = test_info.element from ufl import MixedElement @@ -402,6 +399,11 @@ def generate_accumulation_instruction(expr, visitor): visitor.measure, visitor.subdomain_id, ) + if False in predicates: + return + + # Cache all stage 1 sum factorization kernels used in this expression + SumfactCollectMapper()(expr) insn_dep = None diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py index 2c5f8a3c3b44e1dd473816464b7856432bdc37b4..12f7115662533b6f279e6e340130de95b64992d9 100644 --- a/python/dune/perftool/sumfact/realization.py +++ b/python/dune/perftool/sumfact/realization.py @@ -137,6 +137,9 @@ class BufferSwitcher(object): def realize_sumfact_kernel_function(sf): + # Remove anything kernel related from caches + delete_cache_items("kernel_default") + # Get a buffer switcher instance buffer = BufferSwitcher() insn_dep = frozenset() diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 85601c1e84e44bb15f998e227da0b980e63789d9..435975e41af8c29c0aaf078450f0046dd721137a 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -155,7 +155,7 @@ class VectorSumfactKernelOutput(SumfactKernelInterfaceBase): outputs = set(self.interfaces) trial_element, = set(o.trial_element for o in self.interfaces) - trial_element_index, = set(o.trial_element_index for o in self.interfaces) + trial_element_index = set(o.trial_element_index for o in self.interfaces).pop() from dune.perftool.sumfact.accumulation import accum_iname element = get_leaf(trial_element, trial_element_index) if trial_element is not None else None inames = tuple(accum_iname(element, mat.rows, i) @@ -368,7 +368,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): @property def parallel_key(self): """ A key that identifies parallellizable kernels. """ - return tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer) + return tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer, self.interface.within_inames) @property def cache_key(self):