diff --git a/python/dune/perftool/sumfact/geometry.py b/python/dune/perftool/sumfact/geometry.py index ffea761f515bd776fcd18ca0328274f00a8efec2..c8ff6c38d9b906e126f2571d065b2cea6c852a38 100644 --- a/python/dune/perftool/sumfact/geometry.py +++ b/python/dune/perftool/sumfact/geometry.py @@ -45,13 +45,15 @@ class GeoCornersInput(SumfactKernelInputBase, ImmutableRecord): # NB: We need to realize this as a C instruction, because the corner # method does return a non-scalar, which does not fit into the current # loopy philosophy for function calls. This problem will be solved once - # #11 is resolved. - code = "{}[{}] = {}.corner({})[{}];".format(name, - ciname, - geo, - ciname, - self.dir, - ) + # #11 is resolved. Admittedly, the code looks *really* ugly until that happens. + code = "{}[{}*{}+{}] = {}.corner({})[{}];".format(name, + sf.vector_width, + ciname, + index, + geo, + ciname, + self.dir, + ) instruction(code=code, within_inames=frozenset({ciname}), diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 90818e301475406ec4a85bb8a7e0f4a76434d4d5..a8a42ed170ea97eead8741183d4145f0d5b7ba3e 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -355,15 +355,6 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) """ return (self.matrix_sequence, self.restriction, self.stage, self.buffer) - @property - def input_key(self): - """ A cache key for the input coefficients - Any two sum factorization kernels having the same input_key - work on the same input coefficient (and are suitable for simultaneous - treatment because of that) - """ - return (self.restriction, self.stage, self.coeff_func, self.coeff_func_index, self.element, self.component, self.accumvar) - # # Deduce all data fields of normal sum factorization kernels from the underlying kernels # @@ -419,7 +410,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) @property def cache_key(self): - return tuple(k.cache_key for k in self.kernels) + return (tuple(k.cache_key for k in self.kernels), self.buffer) @property def input_key(self): diff --git a/python/dune/perftool/sumfact/tabulation.py b/python/dune/perftool/sumfact/tabulation.py index 90271e6552a0cca72a8b8df17178d8e48d4c4bca..6b04195775a6814acf6c9edb9e6de4bbb1dcd46e 100644 --- a/python/dune/perftool/sumfact/tabulation.py +++ b/python/dune/perftool/sumfact/tabulation.py @@ -173,8 +173,8 @@ class BasisTabulationMatrixArray(BasisTabulationMatrixBase): name = "ThetaLarge{}{}_{}_qp{}_dof{}".format("face{}_".format(self.face) if self.face is not None else "", "T" if self.transpose else "", "_".join(abbrevs), - quadrature_points_per_direction(), - basis_functions_per_direction(), + self.tabs[0].quadrature_size, + self.tabs[0].basis_size, ) for i, tab in enumerate(self.tabs): define_theta(name, tab, additional_indices=(i,), width=self.width)