Skip to content
Snippets Groups Projects
Commit 61adf2f4 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Remove remnants of previous implementation

parent 4a2b40ab
No related branches found
No related tags found
No related merge requests found
......@@ -104,7 +104,6 @@ class LFSSumfactKernelInput(SumfactKernelInputBase, ImmutableRecord):
arg = "fastdg{}".format(which)
from dune.perftool.sumfact.accumulation import _dof_offset
from dune.perftool.sumfact.realization import alias_data_array
globalarg(arg,
shape=shape,
dim_tags=",".join("f" * len(shape)),
......
......@@ -89,11 +89,6 @@ def realize_sum_factorization_kernel(sf, **kwargs):
return _realize_sum_factorization_kernel(sf, **kwargs)
@preamble
def alias_data_array(name, data):
return "auto {} = {}.data();".format(name, data)
def name_buffer_storage(buff, which):
name = "{}_{}".format(buff, which)
return name
......@@ -182,26 +177,17 @@ def _realize_sum_factorization_kernel(sf):
return lp.TaggedVariable(out, sf.tag), insn_dep
def buffer_decl(buffer, dtype):
def _buffer_decl(name, *a):
from dune.perftool.loopy.target import numpy_to_cpp_dtype
_type = numpy_to_cpp_dtype(dtype)
return "{0} *{1} = ({0} *){2};".format(_type, name, buffer)
return _buffer_decl
class BufferSwitcher(object):
def __init__(self, buffers=("buffer0", "buffer1")):
self.buffers = buffers
def __init__(self):
self.current = 0
def get_temporary(self, name=None, **kwargs):
bs = self.buffers[self.current]
assert name
bs = "buffer{}".format(self.current)
globalarg(bs)
temporary_variable(name,
managed=True,
custom_base_storage=self.buffers[self.current],
custom_base_storage=bs,
**kwargs
)
......@@ -362,189 +348,3 @@ def realize_sumfact_kernel_function(sf):
kernel = extract_kernel_from_cache("kernel_default", name, [signature], add_timings=False)
delete_cache_items("kernel_default")
return kernel
# @generator_factory(item_tags=("sumfactkernel",),
# context_tags=("kernel",),
# cache_key_generator=lambda s, **kw: s.cache_key)
# def old_realize_sum_factorization_kernel(sf):
# insn_dep = sf.insn_dep
#
# # Measure times and count operations in c++ code
# if get_option("instrumentation_level") >= 4:
# if sf.stage == 1:
# setuptimer = '{}_kernel_setup'.format(assembler_routine_name())
# insn_dep = insn_dep.union(frozenset({instruction(code='HP_TIMER_STOP({});'.format(setuptimer),
# within_inames=frozenset(sf.within_inames),
# depends_on=insn_dep)}))
#
# timer_name = assembler_routine_name() + '_kernel' + '_stage{}'.format(sf.stage)
# post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
# dump_accumulate_timer(timer_name)
# insn_dep = insn_dep.union(frozenset({instruction(code="HP_TIMER_START({});".format(timer_name),
# within_inames=frozenset(sf.within_inames),
# depends_on=insn_dep,
# ),
# }))
#
# if not sf.input.direct_input_is_possible:
# insn_dep = insn_dep.union(sf.input.realize(sf, insn_dep))
#
# # Prepare some dim_tags/shapes for later use
# ftags = ",".join(["f"] * sf.length)
# novec_ftags = ftags
# ctags = ",".join(["c"] * sf.length)
# vec_shape = ()
# if sf.vectorized:
# ftags = ftags + ",vec"
# ctags = ctags + ",vec"
# vec_shape = (sf.vector_width,)
#
# # Decide in which order we want to process directions in the
# # sumfactorization. A clever ordering can lead to a reduced
# # complexity. This will e.g. happen at faces where we only have
# # one quadratue point m_l=1 if l is the normal direction of the
# # face.
# #
# # Rule of thumb: small m's early and large n's late.
# perm = sumfact_permutation_strategy(sf)
#
# # Permute matrix sequence
# matrix_sequence = permute_forward(sf.matrix_sequence, perm)
#
# # Product of all matrices
# for l, matrix in enumerate(matrix_sequence):
# # Compute the correct shapes of in- and output matrices of this matrix-matrix multiplication
# # and get inames that realize the product.
# inp_shape = (matrix.cols,) + tuple(mat.cols for mat in matrix_sequence[l + 1:]) + tuple(mat.rows for mat in matrix_sequence[:l])
# out_shape = (matrix.rows,) + tuple(mat.cols for mat in matrix_sequence[l + 1:]) + tuple(mat.rows for mat in matrix_sequence[:l])
# out_inames = tuple(sumfact_iname(length, "out_inames_" + str(k)) for k, length in enumerate(out_shape))
# vec_iname = ()
# if matrix.vectorized:
# iname = sumfact_iname(sf.vector_width, "vec")
# vec_iname = (prim.Variable(iname),)
# transform(lp.tag_inames, [(iname, "vec")])
#
# # A trivial reduction is implemented as a product, otherwise we run into
# # a code generation corner case producing way too complicated code. This
# # could be fixed upstream, but the loopy code realizing reductions is not
# # trivial and the priority is kind of low.
# if matrix.cols != 1:
# k = sumfact_iname(matrix.cols, "red")
# k_expr = prim.Variable(k)
# else:
# k_expr = 0
#
# # Setup the input of the sum factorization kernel. In the
# # first matrix multiplication this can be taken from
# # * an input temporary (default)
# # * a global data structure (if FastDGGridOperator is in use)
# # * a value from a global data structure, broadcasted to a vector type (vectorized + FastDGGridOperator)
# input_inames = (k_expr,) + tuple(prim.Variable(j) for j in out_inames[1:])
# if l == 0 and sf.input.direct_input_is_possible:
# # See comment below
# input_inames = permute_backward(input_inames, perm)
# inp_shape = permute_backward(inp_shape, perm)
#
# input_summand = sf.input.realize_direct(inp_shape, input_inames)
# else:
# # If we did permute the order of a matrices above we also
# # permuted the order of out_inames. Unfortunately the
# # order of our input is from 0 to d-1. This means we need
# # to permute _back_ to get the right coefficients.
# if l == 0:
# inp_shape = permute_backward(inp_shape, perm)
# input_inames = permute_backward(input_inames, perm)
#
# # Get a temporary that interprets the base storage of the input
# # as a column-major matrix. In later iteration of the matrix loop
# # this reinterprets the output of the previous iteration.
# inp = get_buffer_temporary(sf.buffer,
# shape=inp_shape + vec_shape,
# dim_tags=ftags)
#
# # The input temporary will only be read from, so we need to silence the loopy warning
# silenced_warning('read_no_write({})'.format(inp))
#
# input_summand = prim.Subscript(prim.Variable(inp),
# input_inames + vec_iname)
#
# switch_base_storage(sf.buffer)
#
# # Get a temporary that interprets the base storage of the output.
# #
# # Note: In this step the reordering of the fastest directions
# # is happening. The new direction (out_inames[0]) and the
# # corresponding shape (out_shape[0]) goes to the end (slowest
# # direction) and everything stays column major (ftags->fortran
# # style).
# #
# # If we are in the last step we reverse the permutation.
# output_shape = tuple(out_shape[1:]) + (out_shape[0],)
# if l == len(matrix_sequence) - 1:
# output_shape = permute_backward(output_shape, perm)
# out = get_buffer_temporary(sf.buffer,
# shape=output_shape + vec_shape,
# dim_tags=ftags)
#
# # Write the matrix-matrix multiplication expression
# matprod = prim.Product((matrix.pymbolic((prim.Variable(out_inames[0]), k_expr) + vec_iname),
# input_summand))
#
# # ... which may be a reduction, if k>0
# if matrix.cols != 1:
# matprod = lp.Reduction("sum", k, matprod)
#
# # Here we also move the new direction (out_inames[0]) to the
# # end and reverse permutation
# output_inames = tuple(prim.Variable(i) for i in out_inames[1:]) + (prim.Variable(out_inames[0]),)
# if l == len(matrix_sequence) - 1:
# output_inames = permute_backward(output_inames, perm)
#
# tag = "sumfact_stage{}".format(sf.stage)
# if sf.stage == 3:
# tag = "{}_{}".format(tag, "_".join(sf.within_inames))
#
# # Collect the key word arguments for the loopy instruction
# insn_args = {"forced_iname_deps": frozenset([i for i in out_inames]).union(frozenset(sf.within_inames)),
# "forced_iname_deps_is_final": True,
# "depends_on": insn_dep,
# "tags": frozenset({tag}),
# "predicates": sf.predicates,
# "groups": frozenset({sf.group_name}),
# }
#
# # In case of direct output we directly accumulate the result
# # of the Sumfactorization into some global data structure.
# if l == len(matrix_sequence) - 1 and get_form_option('fastdg') and sf.stage == 3:
# if sf.vectorized:
# insn_args["forced_iname_deps"] = insn_args["forced_iname_deps"].union(frozenset({vec_iname[0].name}))
# insn_dep = sf.output.realize_direct(matprod, output_inames, out_shape, insn_args)
# else:
# # Issue the reduction instruction that implements the multiplication
# # at the same time store the instruction ID for the next instruction to depend on
# insn_dep = frozenset({instruction(assignee=prim.Subscript(prim.Variable(out), output_inames + vec_iname),
# expression=matprod,
# **insn_args
# )
# })
#
# # Measure times and count operations in c++ code
# if get_option("instrumentation_level") >= 4:
# stop_insn = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name),
# depends_on=frozenset({lp.match.Tagged(tag)}),
# within_inames=frozenset(sf.within_inames))})
# if sf.stage == 1:
# qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop'
# post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
# dump_accumulate_timer(timer_name)
# frozenset({instruction(code="HP_TIMER_START({});".format(qp_timer_name),
# depends_on=stop_insn)})
#
# out = get_buffer_temporary(sf.buffer,
# shape=sf.output_shape,
# dim_tags=sf.output_dimtags,
# )
# silenced_warning('read_no_write({})'.format(out))
#
# return lp.TaggedVariable(out, sf.tag), insn_dep
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment