From 431484543c064b6bfcb61239facae6e21db1d011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de> Date: Thu, 22 Nov 2018 13:11:18 +0100 Subject: [PATCH] [skip ci] Rename sumfact interface methods --- python/dune/codegen/sumfact/accumulation.py | 6 +- python/dune/codegen/sumfact/basis.py | 2 +- python/dune/codegen/sumfact/realization.py | 4 +- python/dune/codegen/sumfact/symbolic.py | 72 ++++++++++++++++++--- 4 files changed, 68 insertions(+), 16 deletions(-) diff --git a/python/dune/codegen/sumfact/accumulation.py b/python/dune/codegen/sumfact/accumulation.py index 34599f55..b43d7906 100644 --- a/python/dune/codegen/sumfact/accumulation.py +++ b/python/dune/codegen/sumfact/accumulation.py @@ -154,7 +154,7 @@ class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): from dune.codegen.sumfact.basis import lfs_inames return lfs_inames(get_leaf(self.trial_element, self.trial_element_index), self.restriction) - def realize(self, sf, result, insn_dep, inames=None, additional_inames=()): + def setup_output(self, sf, result, insn_dep, inames=None, additional_inames=()): trial_leaf_element = get_leaf(self.trial_element, self.trial_element_index) if self.trial_element is not None else None basis_size = tuple(mat.basis_size for mat in sf.matrix_sequence_quadrature_permuted) @@ -212,7 +212,7 @@ class AccumulationOutput(SumfactKernelInterfaceBase, ImmutableRecord): return frozenset({dep}) - def realize_direct(self, result, inames, shape, which=0, **args): + def realize_direct_output(self, result, inames, shape, which=0, **args): direct_output = "fastdg{}".format(which) ftags = ",".join(["f"] * len(shape)) @@ -563,4 +563,4 @@ def generate_accumulation_instruction(expr, visitor): result, insn_dep = realize_sum_factorization_kernel(vsf.copy(insn_dep=vsf.insn_dep.union(insn_dep))) if not get_form_option("fastdg"): - insn_dep = vsf.interface.realize(vsf, result, insn_dep) + insn_dep = vsf.interface.setup_output(vsf, result, insn_dep) diff --git a/python/dune/codegen/sumfact/basis.py b/python/dune/codegen/sumfact/basis.py index 1db2b4bf..78bf364b 100644 --- a/python/dune/codegen/sumfact/basis.py +++ b/python/dune/codegen/sumfact/basis.py @@ -161,7 +161,7 @@ class LFSSumfactKernelInput(SumfactKernelInterfaceBase, ImmutableRecord): return insn_dep.union(frozenset({insn})) - def realize_direct(self, shape, inames, which=0): + def realize_direct_input(self, shape, inames, which=0): # If the input comes directly from a global data structure inames are # ordered x,y,z,... # diff --git a/python/dune/codegen/sumfact/realization.py b/python/dune/codegen/sumfact/realization.py index 306f6c75..73adc444 100644 --- a/python/dune/codegen/sumfact/realization.py +++ b/python/dune/codegen/sumfact/realization.py @@ -192,7 +192,7 @@ def realize_sumfact_kernel_function(sf): input_inames = (k_expr,) + tuple(prim.Variable(j) for j in out_inames[1:]) if l == 0 and sf.stage == 1 and sf.interface.direct_is_possible: - input_summand = sf.interface.realize_direct(inp_shape, input_inames) + input_summand = sf.interface.realize_direct_input(inp_shape, input_inames) elif l == 0: # TODO: Simplify arguments! input_summand = sf.interface.realize_input(inp_shape, @@ -244,7 +244,7 @@ def realize_sumfact_kernel_function(sf): if sf.vectorized: insn_args["forced_iname_deps"] = frozenset({vec_iname[0].name}) - insn_dep = sf.interface.realize_direct(matprod, output_inames, out_shape, **insn_args) + insn_dep = sf.interface.realize_direct_output(matprod, output_inames, out_shape, **insn_args) elif l == len(matrix_sequence) - 1: # TODO: Move permutations to interface! output_inames = permute_backward(output_inames, sf.cost_permutation) diff --git a/python/dune/codegen/sumfact/symbolic.py b/python/dune/codegen/sumfact/symbolic.py index e51606dc..440de43a 100644 --- a/python/dune/codegen/sumfact/symbolic.py +++ b/python/dune/codegen/sumfact/symbolic.py @@ -33,18 +33,70 @@ class SumfactKernelInterfaceBase(object): """ A base class for the input/output of a sum factorization kernel In stage 1, this represents the input object, in stage 3 the output object. """ - def realize(self, *a, **kw): + def setup_input(self, sf, insn_dep, index=0): + """Create and fill an input array for sumfact kernel function (non fastdg) + + This happens before the function call. + + TODO: Add note about permutation + TODO: Document input arguments + """ + raise NotImplementedError + + def realize_input(self, shape, inames, vec_shape, vec_iname, buffer, ftags, l): + """Interpret the input of sumfact kernel function in the right way (non fastdgg) + + This happens inside the sumfact kernel function. + + TODO: Cleanup input + TODO: Add note about permutation + TODO: Document input arguments + """ + raise NotImplementedError + + def realize_direct_input(self, shape, inames, which=0): + """Interpret the input of sumfact kernel function in the right way (fastdg) + + This happens inside the sumfact kernel function. + + TODO: Add note about permutation + TODO: Document input arguments + """ raise NotImplementedError - def realize_direct(self, *a, **kw): + def realize_direct_output(self, result, iname, shape, which=0, **args): + """Accumulate results directly in the sumfact kernel function (fastdg) + + This happens inside the sumfact kernel function. + + TODO: Add note about permutation + TODO: Document input arguments + """ + + def setup_output(self, sf, result, insn_dep, inames=None, additional_inames=()): + """Generate accumulate instruction after sumfact kernel function (non fastdg) + + This happens after the function call. + + TODO: Add note about permutation + TODO: Document input arguments + """ raise NotImplementedError @property def quadrature_permutation(self): + """Order of local coordinate axis + + On unstructured grids we sometimes need to go through the directions in + different order to make sure that we visit the (global) quadrature + points on self and neighbor in the same order. + """ raise NotImplementedError @property def cost_permutation(self): + """Permutation that minimizes flops + """ raise NotImplementedError @property @@ -141,7 +193,7 @@ class VectorSumfactKernelInput(SumfactKernelInterfaceBase): dep = dep.union(inp.setup_input(sf, dep, index=i)) return dep - def realize_direct(self, shape, inames): + def realize_direct_input(self, shape, inames): # TODO: vector_cost_permutation not used! # Check whether the input exhibits a favorable structure @@ -154,15 +206,15 @@ class VectorSumfactKernelInput(SumfactKernelInterfaceBase): # All input coefficients use the exact same input coefficient. # We implement this by broadcasting it into a SIMD register return prim.Call(ExplicitVCLCast(dtype_floatingpoint()), - (self.interfaces[0].realize_direct(shape, inames),) + (self.interfaces[0].realize_direct_input(shape, inames),) ) elif len(total) == 2 and len(lower) == 1 and len(upper) == 1: # The lower and the upper part of the SIMD register use # the same input coefficient, we combine the SIMD register # from two shorter SIMD types return prim.Call(VCLLowerUpperLoad(dtype_floatingpoint()), - (self.interfaces[0].realize_direct(shape, inames), - self.interfaces[len(self.interfaces) // 2].realize_direct(shape, inames, which=1), + (self.interfaces[0].realize_direct_input(shape, inames), + self.interfaces[len(self.interfaces) // 2].realize_direct_input(shape, inames, which=1), ) ) else: @@ -241,7 +293,7 @@ class VectorSumfactKernelOutput(SumfactKernelInterfaceBase): return prim.Call(prim.Variable(hadd_function), (result,)) - def realize(self, sf, result, insn_dep): + def setup_output(self, sf, result, insn_dep): # TODO: vector_cost_permutation not used! outputs = set(self.interfaces) @@ -258,11 +310,11 @@ class VectorSumfactKernelOutput(SumfactKernelInterfaceBase): deps = frozenset() for o in outputs: hadd_result = self._add_hadd(o, maybe_wrap_subscript(result, tuple(prim.Variable(iname) for iname in inames + (veciname,)))) - deps = deps.union(o.realize(sf, hadd_result, insn_dep, inames=inames, additional_inames=(veciname,))) + deps = deps.union(o.setup_output(sf, hadd_result, insn_dep, inames=inames, additional_inames=(veciname,))) return deps - def realize_direct(self, result, inames, shape, **args): + def realize_direct_output(self, result, inames, shape, **args): # TODO: vector_cost_permutation not used! outputs = set(self.interfaces) @@ -279,7 +331,7 @@ class VectorSumfactKernelOutput(SumfactKernelInterfaceBase): for o in outputs: hadd_result = self._add_hadd(o, result) which = tuple(remove_duplicates(self.interfaces)).index(o) - deps = deps.union(o.realize_direct(hadd_result, inames, shape, which=which, **args)) + deps = deps.union(o.realize_direct_output(hadd_result, inames, shape, which=which, **args)) return deps -- GitLab