From 24391f126f982101e0c4260a23b1db334fdbce0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de> Date: Fri, 19 Oct 2018 16:36:11 +0200 Subject: [PATCH] Choose vectorization strategy based on cost permuted matrix sequence --- python/dune/codegen/sumfact/symbolic.py | 17 ++++++++++++----- test/sumfact/poisson/poisson_dg_3d.mini | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/python/dune/codegen/sumfact/symbolic.py b/python/dune/codegen/sumfact/symbolic.py index 24f92455..8ea27280 100644 --- a/python/dune/codegen/sumfact/symbolic.py +++ b/python/dune/codegen/sumfact/symbolic.py @@ -376,7 +376,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): @property def parallel_key(self): """ A key that identifies parallellizable kernels. """ - return tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer, self.interface.within_inames) + # TODO: For now we do not vectorize SumfactKernels with different + # quadrature_permutation. This should be handled like upper/lower + # vectorization + return self.quadrature_permutation + tuple(m.basis_size for m in self.permuted_matrix_sequence) + (self.stage, self.buffer, self.interface.within_inames) @property def cache_key(self): @@ -576,8 +579,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): def operations(self): """ The total number of floating point operations for the kernel to be carried out """ - from dune.codegen.sumfact.permutation import flop_cost - return flop_cost(self.permuted_matrix_sequence) + from dune.codegen.sumfact.permutation import flop_cost, sumfact_permutation_strategy, permute_forward + perm = sumfact_permutation_strategy(self) + permuted_matrix_sequence_cost = permute_forward(self.matrix_sequence, perm) + return flop_cost(permuted_matrix_sequence_cost) # Extract the argument list and store it on the class. This needs to be done @@ -881,5 +886,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) def operations(self): """ The total number of floating point operations for the kernel to be carried out """ - from dune.codegen.sumfact.permutation import flop_cost - return flop_cost(self.matrix_sequence) + from dune.codegen.sumfact.permutation import flop_cost, sumfact_permutation_strategy, permute_forward + perm = sumfact_permutation_strategy(self) + permuted_matrix_sequence_cost = permute_forward(self.matrix_sequence, perm) + return flop_cost(permuted_matrix_sequence_cost) diff --git a/test/sumfact/poisson/poisson_dg_3d.mini b/test/sumfact/poisson/poisson_dg_3d.mini index f0b4ef26..91556ff0 100644 --- a/test/sumfact/poisson/poisson_dg_3d.mini +++ b/test/sumfact/poisson/poisson_dg_3d.mini @@ -15,6 +15,7 @@ extension = vtu [formcompiler] compare_l2errorsquared = 1e-4, 5e-6 | expand deg +debug_interpolate_input = 1 [formcompiler.r] numerical_jacobian = 1, 0 | expand num -- GitLab