diff --git a/python/dune/codegen/sumfact/symbolic.py b/python/dune/codegen/sumfact/symbolic.py index 24f924555b0573d895ed7f574aa6d744de44f6f3..8ea272809312e6dda945cc51e05ded2eb62c116e 100644 --- a/python/dune/codegen/sumfact/symbolic.py +++ b/python/dune/codegen/sumfact/symbolic.py @@ -376,7 +376,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): @property def parallel_key(self): """ A key that identifies parallellizable kernels. """ - return tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer, self.interface.within_inames) + # TODO: For now we do not vectorize SumfactKernels with different + # quadrature_permutation. This should be handled like upper/lower + # vectorization + return self.quadrature_permutation + tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer, self.interface.within_inames) @property def cache_key(self): @@ -576,8 +579,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): def operations(self): """ The total number of floating point operations for the kernel to be carried out """ - from dune.codegen.sumfact.permutation import flop_cost - return flop_cost(self.permuted_matrix_sequence) + from dune.codegen.sumfact.permutation import flop_cost, sumfact_permutation_strategy, permute_forward + perm = sumfact_permutation_strategy(self) + permuted_matrix_sequence_cost = permute_forward(self.matrix_sequence, perm) + return flop_cost(permuted_matrix_sequence_cost) # Extract the argument list and store it on the class. This needs to be done @@ -881,5 +886,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) def operations(self): """ The total number of floating point operations for the kernel to be carried out """ - from dune.codegen.sumfact.permutation import flop_cost - return flop_cost(self.matrix_sequence) + from dune.codegen.sumfact.permutation import flop_cost, sumfact_permutation_strategy, permute_forward + perm = sumfact_permutation_strategy(self) + permuted_matrix_sequence_cost = permute_forward(self.matrix_sequence, perm) + return flop_cost(permuted_matrix_sequence_cost) diff --git a/test/sumfact/poisson/poisson_dg_3d.mini b/test/sumfact/poisson/poisson_dg_3d.mini index f0b4ef26f73509e9dea47a1cca366c83a51bfb93..91556ff0ca2f70d49e9ac89cfaad206040ebda03 100644 --- a/test/sumfact/poisson/poisson_dg_3d.mini +++ b/test/sumfact/poisson/poisson_dg_3d.mini @@ -15,6 +15,7 @@ extension = vtu [formcompiler] compare_l2errorsquared = 1e-4, 5e-6 | expand deg +debug_interpolate_input = 1 [formcompiler.r] numerical_jacobian = 1, 0 | expand num