diff --git a/python/dune/codegen/sumfact/accumulation.py b/python/dune/codegen/sumfact/accumulation.py index b32541f69169a23d0c626d280a53f0ec91698122..3264a62c96844b372b7366b4260ef985f9f76904 100644 --- a/python/dune/codegen/sumfact/accumulation.py +++ b/python/dune/codegen/sumfact/accumulation.py @@ -171,7 +171,7 @@ class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): from dune.codegen.sumfact.basis import lfs_inames return lfs_inames(get_leaf(self.trial_element, self.trial_element_index), self.restriction) - def realize_input(self, inames, shape, vec_iname, vec_shape, buffer, ftags, l): + def realize_input(self, inames, shape, vec_iname, vec_shape, buf, ftags): # TODO: This should happen in stage 2 and not in stage 3 shape = permute_backward(shape, self.cost_permutation) inames = permute_backward(inames, self.cost_permutation) @@ -179,10 +179,10 @@ class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): # Get a temporary that interprets the base storage of the input # as a column-major matrix. In later iteration of the matrix loop # this reinterprets the output of the previous iteration. - inp = buffer.get_temporary("buff_step{}_in".format(l), - shape=shape + vec_shape, - dim_tags=ftags, - ) + inp = buf.get_temporary("buff_step0_in", + shape=shape + vec_shape, + dim_tags=ftags, + ) # The input temporary will only be read from, so we need to silence # the loopy warning diff --git a/python/dune/codegen/sumfact/basis.py b/python/dune/codegen/sumfact/basis.py index 067f31e7384c43d9e31875cf705234fbd8b35a91..a26bae5bfc61717f06fe1bbd4d0abc99f28240cf 100644 --- a/python/dune/codegen/sumfact/basis.py +++ b/python/dune/codegen/sumfact/basis.py @@ -175,17 +175,17 @@ class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableRecord): return insn_dep.union(frozenset({insn})) - def realize_input(self, inames, shape, vec_iname, vec_shape, buffer, ftags, l): + def realize_input(self, inames, shape, vec_iname, vec_shape, buf, ftags): # Note: Here we do not need to reverse any permutation since this is # already done in the setup_input method above! # Get a temporary that interprets the base storage of the input # as a column-major matrix. In later iteration of the matrix loop # this reinterprets the output of the previous iteration. - inp = buffer.get_temporary("buff_step{}_in".format(l), - shape=shape + vec_shape, - dim_tags=ftags, - ) + inp = buf.get_temporary("buff_step0_in", + shape=shape + vec_shape, + dim_tags=ftags, + ) # The input temporary will only be read from, so we need to silence # the loopy warning diff --git a/python/dune/codegen/sumfact/geometry.py b/python/dune/codegen/sumfact/geometry.py index cd0d34b7a07d0e1cf341fb0a629dc3500f1684e2..7cd4bf0245f297ee52c6ee52bc02575f4b5f5839 100644 --- a/python/dune/codegen/sumfact/geometry.py +++ b/python/dune/codegen/sumfact/geometry.py @@ -189,14 +189,14 @@ class GeoCornersInput(SumfactKernelInterfaceBase, ImmutableRecord): return insn_dep.union(frozenset({insn})) - def realize_input(self, inames, shape, vec_iname, vec_shape, buffer, ftags, l): + def realize_input(self, inames, shape, vec_iname, vec_shape, buf, ftags): # Get a temporary that interprets the base storage of the input # as a column-major matrix. In later iteration of the matrix loop # this reinterprets the output of the previous iteration. - inp = buffer.get_temporary("buff_step{}_in".format(l), - shape=shape + vec_shape, - dim_tags=ftags, - ) + inp = buf.get_temporary("buff_step0_in", + shape=shape + vec_shape, + dim_tags=ftags, + ) # The input temporary will only be read from, so we need to silence # the loopy warning diff --git a/python/dune/codegen/sumfact/permutation.py b/python/dune/codegen/sumfact/permutation.py index 1f775eba4e8ddf4cc5df8c5a8295e4794ea24d87..7f37dfeae795031ec81b292b96e65649f8fc78cc 100644 --- a/python/dune/codegen/sumfact/permutation.py +++ b/python/dune/codegen/sumfact/permutation.py @@ -145,6 +145,9 @@ def sumfact_quadrature_permutation_strategy(dim, restriction): if restriction == Restriction.POSITIVE: return _order_on_self(restriction) else: + # Still do normal direction first. The other two directions need to + # be done in reverse order to go through the quadrature points in + # the same order as on self (draw cubes!). assert restriction == Restriction.NEGATIVE l = list(_order_on_self(restriction)) return (l[0], l[2], l[1]) diff --git a/python/dune/codegen/sumfact/realization.py b/python/dune/codegen/sumfact/realization.py index d1f10d6f9c659fd8c719e19a59658245e5c27b39..010118b10a1c2f1d68109f16b8f5471d03da9224 100644 --- a/python/dune/codegen/sumfact/realization.py +++ b/python/dune/codegen/sumfact/realization.py @@ -201,7 +201,6 @@ def realize_sumfact_kernel_function(sf): vec_shape, buffer, ftags, - l, ) else: # Get a temporary that interprets the base storage of the input diff --git a/python/dune/codegen/sumfact/symbolic.py b/python/dune/codegen/sumfact/symbolic.py index 25b3a79578364f9be94374e03399881e06dc9ae9..96db55bb2f83744d56617cd1af028aaf98f41afd 100644 --- a/python/dune/codegen/sumfact/symbolic.py +++ b/python/dune/codegen/sumfact/symbolic.py @@ -35,23 +35,43 @@ class SumfactKernelInterfaceBase(object): In stage 1, this represents the input object, in stage 3 the output object. """ def setup_input(self, sf, insn_dep, index=0): - """Create and fill an input array for sumfact kernel function (non fastdg) + """Create and fill an input array for a stage 1 sumfact kernel function (non fastdg) - This happens before the function call. + This happens before the function call. The input will be quadrature + (for unstructured grids) and cost permuted. + + Parameters + ---------- + sf : SumfactKernel or VectorizedSumfactKernel + insn_dep : frozenset + Instructions this setup depends on. + index : int + Vectorization index, SIMD lane. - TODO: Add note about permutation - TODO: Document input arguments """ raise NotImplementedError - def realize_input(self, inames, shape, vec_iname, vec_shape, buffer, ftags, l): + def realize_input(self, inames, shape, vec_iname, vec_shape, buf, ftags): """Interpret the input of sumfact kernel function in the right way (non fastdgg) This happens inside the sumfact kernel function. - TODO: Cleanup arguments - TODO: Add note about permutation - TODO: Document input arguments + Stage 1: Input is already permuted the right way in setup_input. + + Stage 3: TODO -> Check permutation in accumulation.py + + Parameters + ---------- + inames : tuple of pymbolic.primitives.Variable + shape : tuple of int + vec_iname : tuple of pymbolic.primitives.Variable + In case of vectorized kernel provide vectorization iname. + vec_shape : tuple of int + In case of vectorized kernel provide the number of vectorized kernels. + buf : dune.codegen.sumfact.realization.BufferSwitcher + Provides the input variable. + ftags : str + dim_tags needed to access input variable correctly. """ raise NotImplementedError @@ -66,7 +86,7 @@ class SumfactKernelInterfaceBase(object): raise NotImplementedError def accumulate_output(self, sf, result, insn_dep, inames=None, additional_inames=()): - """Generate accumulate instruction after sumfact kernel function (non fastdg) + """Generate accumulate instruction after a stage 3 sumfact kernel function (non fastdg) This happens after the function call. @@ -80,8 +100,13 @@ class SumfactKernelInterfaceBase(object): This happens inside the sumfact kernel function. + Stage 1: Reverse cost permutation, output should only be quadrature + permuted. + + Stage 3: Reverse cost and quadrature permutation. The output will be + sorted according to dof/residual vector. + TODO: Cleanup arguments - TODO: Add note about permutation TODO: Document input arguments """ inames = permute_backward(inames, self.cost_permutation) @@ -223,16 +248,16 @@ class VectorSumfactKernelInput(SumfactKernelInterfaceBase): dep = dep.union(inp.setup_input(sf, dep, index=i)) return dep - def realize_input(self, inames, shape, vec_iname, vec_shape, buffer, ftags, l): + def realize_input(self, inames, shape, vec_iname, vec_shape, buf, ftags): # TODO: vector_cost_permutation not used! # Get a temporary that interprets the base storage of the input # as a column-major matrix. In later iteration of the matrix loop # this reinterprets the output of the previous iteration. - inp = buffer.get_temporary("buff_step{}_in".format(l), - shape=shape + vec_shape, - dim_tags=ftags, - ) + inp = buf.get_temporary("buff_step0_in", + shape=shape + vec_shape, + dim_tags=ftags, + ) # The input temporary will only be read from, so we need to silence # the loopy warning @@ -332,7 +357,7 @@ class VectorSumfactKernelOutput(SumfactKernelInterfaceBase): return prim.Call(prim.Variable(hadd_function), (result,)) - def realize_input(self, inames, shape, vec_iname, vec_shape, buffer, ftags, l): + def realize_input(self, inames, shape, vec_iname, vec_shape, buf, ftags): # The input for stage 3 is quadrature permuted. The inames and shape # passed to this method are quadrature and cost permuted. This means we # need to take the cost permutation back to get the right inames and @@ -342,10 +367,10 @@ class VectorSumfactKernelOutput(SumfactKernelInterfaceBase): # Get a temporary that interprets the base storage of the input as a # column-major matrix. - inp = buffer.get_temporary("buff_step{}_in".format(l), - shape=shape + vec_shape, - dim_tags=ftags, - ) + inp = buf.get_temporary("buff_step0_in", + shape=shape + vec_shape, + dim_tags=ftags, + ) # The input temporary will only be read from, so we need to silence # the loopy warning