From 61adf2f4b221652708fcd92e09fe7e2016c41584 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Mon, 26 Mar 2018 13:20:31 +0200 Subject: [PATCH] Remove remnants of previous implementation --- python/dune/perftool/sumfact/basis.py | 1 - python/dune/perftool/sumfact/realization.py | 208 +------------------- 2 files changed, 4 insertions(+), 205 deletions(-) diff --git a/python/dune/perftool/sumfact/basis.py b/python/dune/perftool/sumfact/basis.py index 18f1730a..ffb2eef9 100644 --- a/python/dune/perftool/sumfact/basis.py +++ b/python/dune/perftool/sumfact/basis.py @@ -104,7 +104,6 @@ class LFSSumfactKernelInput(SumfactKernelInputBase, ImmutableRecord): arg = "fastdg{}".format(which) from dune.perftool.sumfact.accumulation import _dof_offset - from dune.perftool.sumfact.realization import alias_data_array globalarg(arg, shape=shape, dim_tags=",".join("f" * len(shape)), diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py index e85f834c..4ea2e8a5 100644 --- a/python/dune/perftool/sumfact/realization.py +++ b/python/dune/perftool/sumfact/realization.py @@ -89,11 +89,6 @@ def realize_sum_factorization_kernel(sf, **kwargs): return _realize_sum_factorization_kernel(sf, **kwargs) -@preamble -def alias_data_array(name, data): - return "auto {} = {}.data();".format(name, data) - - def name_buffer_storage(buff, which): name = "{}_{}".format(buff, which) return name @@ -182,26 +177,17 @@ def _realize_sum_factorization_kernel(sf): return lp.TaggedVariable(out, sf.tag), insn_dep -def buffer_decl(buffer, dtype): - def _buffer_decl(name, *a): - from dune.perftool.loopy.target import numpy_to_cpp_dtype - _type = numpy_to_cpp_dtype(dtype) - return "{0} *{1} = ({0} *){2};".format(_type, name, buffer) - - return _buffer_decl - - class BufferSwitcher(object): - def __init__(self, buffers=("buffer0", "buffer1")): - self.buffers = buffers + def __init__(self): self.current = 0 def get_temporary(self, name=None, **kwargs): - bs = self.buffers[self.current] + assert name + bs = "buffer{}".format(self.current) globalarg(bs) temporary_variable(name, managed=True, - custom_base_storage=self.buffers[self.current], + custom_base_storage=bs, **kwargs ) @@ -362,189 +348,3 @@ def realize_sumfact_kernel_function(sf): kernel = extract_kernel_from_cache("kernel_default", name, [signature], add_timings=False) delete_cache_items("kernel_default") return kernel - - -# @generator_factory(item_tags=("sumfactkernel",), -# context_tags=("kernel",), -# cache_key_generator=lambda s, **kw: s.cache_key) -# def old_realize_sum_factorization_kernel(sf): -# insn_dep = sf.insn_dep -# -# # Measure times and count operations in c++ code -# if get_option("instrumentation_level") >= 4: -# if sf.stage == 1: -# setuptimer = '{}_kernel_setup'.format(assembler_routine_name()) -# insn_dep = insn_dep.union(frozenset({instruction(code='HP_TIMER_STOP({});'.format(setuptimer), -# within_inames=frozenset(sf.within_inames), -# depends_on=insn_dep)})) -# -# timer_name = assembler_routine_name() + '_kernel' + '_stage{}'.format(sf.stage) -# post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') -# dump_accumulate_timer(timer_name) -# insn_dep = insn_dep.union(frozenset({instruction(code="HP_TIMER_START({});".format(timer_name), -# within_inames=frozenset(sf.within_inames), -# depends_on=insn_dep, -# ), -# })) -# -# if not sf.input.direct_input_is_possible: -# insn_dep = insn_dep.union(sf.input.realize(sf, insn_dep)) -# -# # Prepare some dim_tags/shapes for later use -# ftags = ",".join(["f"] * sf.length) -# novec_ftags = ftags -# ctags = ",".join(["c"] * sf.length) -# vec_shape = () -# if sf.vectorized: -# ftags = ftags + ",vec" -# ctags = ctags + ",vec" -# vec_shape = (sf.vector_width,) -# -# # Decide in which order we want to process directions in the -# # sumfactorization. A clever ordering can lead to a reduced -# # complexity. This will e.g. happen at faces where we only have -# # one quadratue point m_l=1 if l is the normal direction of the -# # face. -# # -# # Rule of thumb: small m's early and large n's late. -# perm = sumfact_permutation_strategy(sf) -# -# # Permute matrix sequence -# matrix_sequence = permute_forward(sf.matrix_sequence, perm) -# -# # Product of all matrices -# for l, matrix in enumerate(matrix_sequence): -# # Compute the correct shapes of in- and output matrices of this matrix-matrix multiplication -# # and get inames that realize the product. -# inp_shape = (matrix.cols,) + tuple(mat.cols for mat in matrix_sequence[l + 1:]) + tuple(mat.rows for mat in matrix_sequence[:l]) -# out_shape = (matrix.rows,) + tuple(mat.cols for mat in matrix_sequence[l + 1:]) + tuple(mat.rows for mat in matrix_sequence[:l]) -# out_inames = tuple(sumfact_iname(length, "out_inames_" + str(k)) for k, length in enumerate(out_shape)) -# vec_iname = () -# if matrix.vectorized: -# iname = sumfact_iname(sf.vector_width, "vec") -# vec_iname = (prim.Variable(iname),) -# transform(lp.tag_inames, [(iname, "vec")]) -# -# # A trivial reduction is implemented as a product, otherwise we run into -# # a code generation corner case producing way too complicated code. This -# # could be fixed upstream, but the loopy code realizing reductions is not -# # trivial and the priority is kind of low. -# if matrix.cols != 1: -# k = sumfact_iname(matrix.cols, "red") -# k_expr = prim.Variable(k) -# else: -# k_expr = 0 -# -# # Setup the input of the sum factorization kernel. In the -# # first matrix multiplication this can be taken from -# # * an input temporary (default) -# # * a global data structure (if FastDGGridOperator is in use) -# # * a value from a global data structure, broadcasted to a vector type (vectorized + FastDGGridOperator) -# input_inames = (k_expr,) + tuple(prim.Variable(j) for j in out_inames[1:]) -# if l == 0 and sf.input.direct_input_is_possible: -# # See comment below -# input_inames = permute_backward(input_inames, perm) -# inp_shape = permute_backward(inp_shape, perm) -# -# input_summand = sf.input.realize_direct(inp_shape, input_inames) -# else: -# # If we did permute the order of a matrices above we also -# # permuted the order of out_inames. Unfortunately the -# # order of our input is from 0 to d-1. This means we need -# # to permute _back_ to get the right coefficients. -# if l == 0: -# inp_shape = permute_backward(inp_shape, perm) -# input_inames = permute_backward(input_inames, perm) -# -# # Get a temporary that interprets the base storage of the input -# # as a column-major matrix. In later iteration of the matrix loop -# # this reinterprets the output of the previous iteration. -# inp = get_buffer_temporary(sf.buffer, -# shape=inp_shape + vec_shape, -# dim_tags=ftags) -# -# # The input temporary will only be read from, so we need to silence the loopy warning -# silenced_warning('read_no_write({})'.format(inp)) -# -# input_summand = prim.Subscript(prim.Variable(inp), -# input_inames + vec_iname) -# -# switch_base_storage(sf.buffer) -# -# # Get a temporary that interprets the base storage of the output. -# # -# # Note: In this step the reordering of the fastest directions -# # is happening. The new direction (out_inames[0]) and the -# # corresponding shape (out_shape[0]) goes to the end (slowest -# # direction) and everything stays column major (ftags->fortran -# # style). -# # -# # If we are in the last step we reverse the permutation. -# output_shape = tuple(out_shape[1:]) + (out_shape[0],) -# if l == len(matrix_sequence) - 1: -# output_shape = permute_backward(output_shape, perm) -# out = get_buffer_temporary(sf.buffer, -# shape=output_shape + vec_shape, -# dim_tags=ftags) -# -# # Write the matrix-matrix multiplication expression -# matprod = prim.Product((matrix.pymbolic((prim.Variable(out_inames[0]), k_expr) + vec_iname), -# input_summand)) -# -# # ... which may be a reduction, if k>0 -# if matrix.cols != 1: -# matprod = lp.Reduction("sum", k, matprod) -# -# # Here we also move the new direction (out_inames[0]) to the -# # end and reverse permutation -# output_inames = tuple(prim.Variable(i) for i in out_inames[1:]) + (prim.Variable(out_inames[0]),) -# if l == len(matrix_sequence) - 1: -# output_inames = permute_backward(output_inames, perm) -# -# tag = "sumfact_stage{}".format(sf.stage) -# if sf.stage == 3: -# tag = "{}_{}".format(tag, "_".join(sf.within_inames)) -# -# # Collect the key word arguments for the loopy instruction -# insn_args = {"forced_iname_deps": frozenset([i for i in out_inames]).union(frozenset(sf.within_inames)), -# "forced_iname_deps_is_final": True, -# "depends_on": insn_dep, -# "tags": frozenset({tag}), -# "predicates": sf.predicates, -# "groups": frozenset({sf.group_name}), -# } -# -# # In case of direct output we directly accumulate the result -# # of the Sumfactorization into some global data structure. -# if l == len(matrix_sequence) - 1 and get_form_option('fastdg') and sf.stage == 3: -# if sf.vectorized: -# insn_args["forced_iname_deps"] = insn_args["forced_iname_deps"].union(frozenset({vec_iname[0].name})) -# insn_dep = sf.output.realize_direct(matprod, output_inames, out_shape, insn_args) -# else: -# # Issue the reduction instruction that implements the multiplication -# # at the same time store the instruction ID for the next instruction to depend on -# insn_dep = frozenset({instruction(assignee=prim.Subscript(prim.Variable(out), output_inames + vec_iname), -# expression=matprod, -# **insn_args -# ) -# }) -# -# # Measure times and count operations in c++ code -# if get_option("instrumentation_level") >= 4: -# stop_insn = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name), -# depends_on=frozenset({lp.match.Tagged(tag)}), -# within_inames=frozenset(sf.within_inames))}) -# if sf.stage == 1: -# qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop' -# post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') -# dump_accumulate_timer(timer_name) -# frozenset({instruction(code="HP_TIMER_START({});".format(qp_timer_name), -# depends_on=stop_insn)}) -# -# out = get_buffer_temporary(sf.buffer, -# shape=sf.output_shape, -# dim_tags=sf.output_dimtags, -# ) -# silenced_warning('read_no_write({})'.format(out)) -# -# return lp.TaggedVariable(out, sf.tag), insn_dep -- GitLab