Skip to content
Snippets Groups Projects
Commit 91e6c5c0 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

First draft of sumfact kernels in separate functions

parent 13c37b3c
No related branches found
No related tags found
No related merge requests found
from dune.perftool.error import PerftoolLoopyError
from dune.perftool.generation import (get_counted_variable,
kernel_cached,
temporary_variable,
)
class FlipFlopBuffer(object):
def __init__(self, identifier):
self.identifier = identifier
# Initialize the counter that switches between the base storages!
self._current = 0
# Generate the base storage names
self.base_storage = tuple("{}_base_{}".format(self.identifier, i) for i in (0, 1))
def switch_base_storage(self):
self._current = (self._current + 1) % 2
def get_temporary(self, **kwargs):
assert("base_storage" not in kwargs)
assert("storage_shape" not in kwargs)
# Select the base storage and increase counter
base = self.base_storage[self._current]
# Construct a temporary name
name = kwargs.pop("name", None)
if name is None:
name = get_counted_variable(self.identifier)
# Construct the temporary and return it
temporary_variable(name,
base_storage=base,
managed=True,
_base_storage_access_may_be_aliasing=True,
**kwargs
)
return name
@kernel_cached
def initialize_buffer(identifier):
assert isinstance(identifier, str)
return FlipFlopBuffer(identifier)
def get_buffer_temporary(identifier, **kwargs):
return initialize_buffer(identifier).get_temporary(**kwargs)
def switch_base_storage(identifier):
initialize_buffer(identifier).switch_base_storage()
......@@ -473,7 +473,11 @@ def generate_kernel(integrals):
delete_cache_items("kernel_default")
for integral in integrals:
visit_integral(integral)
knl = extract_kernel_from_cache("kernel_default")
from dune.perftool.pdelab.signatures import kernel_name, assembly_routine_signature
name = kernel_name()
signature = assembly_routine_signature()
knl = extract_kernel_from_cache("kernel_default", name, signature)
delete_cache_items("kernel_default")
# Reset the quadrature degree
......@@ -491,7 +495,7 @@ def generate_kernels_per_integral(integrals):
yield generate_kernel(integrals)
def extract_kernel_from_cache(tag, wrap_in_cgen=True):
def extract_kernel_from_cache(tag, name, signature, wrap_in_cgen=True):
# Now extract regular loopy kernel components
from dune.perftool.loopy.target import DuneTarget
domains = [i for i in retrieve_cache_items("{} and domain".format(tag))]
......@@ -512,13 +516,6 @@ def extract_kernel_from_cache(tag, wrap_in_cgen=True):
check_dep_resolution=False,
)
# Find a name for the kernel
if wrap_in_cgen:
from dune.perftool.pdelab.signatures import kernel_name
name = kernel_name()
else:
name = "constructor_kernel"
# Create the kernel
from loopy import make_kernel, preprocess_kernel
kernel = make_kernel(domains,
......@@ -570,8 +567,9 @@ def extract_kernel_from_cache(tag, wrap_in_cgen=True):
if wrap_in_cgen:
# Wrap the kernel in something which can generate code
from dune.perftool.pdelab.signatures import assembly_routine_signature
signature = assembly_routine_signature()
if signature is None:
from dune.perftool.pdelab.signatures import assembly_routine_signature
signature = assembly_routine_signature()
kernel = LoopyKernelMethod(signature, kernel)
return kernel
......@@ -673,7 +671,7 @@ def cgen_class_from_cache(tag, members=[]):
tparams = [i for i in retrieve_cache_items('{} and template_param'.format(tag))]
# Construct the constructor
constructor_knl = extract_kernel_from_cache(tag, wrap_in_cgen=False)
constructor_knl = extract_kernel_from_cache(tag, "constructor_kernel", None, wrap_in_cgen=False)
from dune.perftool.loopy.target import DuneTarget
constructor_knl = constructor_knl.copy(target=DuneTarget(declare_temporaries=False))
signature = "{}({})".format(basename, ", ".join(next(iter(p.generate(with_semicolon=False))) for p in constructor_params))
......@@ -1035,6 +1033,13 @@ def generate_localoperator_file(kernels, filename):
for k in kernels.values():
operator_methods.extend(k)
# Generate all the realizations of sum factorization kernel objects needed in this operator
from dune.perftool.sumfact.realization import realize_sumfact_kernel_function
for sf, qp in retrieve_cache_items("kernelimpl"):
from dune.perftool.sumfact.tabulation import set_quadrature_points
set_quadrature_points(qp)
operator_methods.append(realize_sumfact_kernel_function(sf))
if get_option('instrumentation_level') >= 3:
include_file('dune/perftool/common/timer.hh', filetag='operatorfile')
operator_methods.append(TimerMethod())
......
......@@ -23,7 +23,6 @@ from dune.perftool.options import (get_form_option,
get_option,
)
from dune.perftool.loopy.flatten import flatten_index
from dune.perftool.loopy.buffer import get_buffer_temporary
from dune.perftool.sumfact.quadrature import nest_quadrature_loops
from dune.perftool.pdelab.localoperator import determine_accumulation_space
from dune.perftool.pdelab.restriction import restricted_name
......@@ -427,11 +426,15 @@ def generate_accumulation_instruction(expr, visitor):
vectag = frozenset({"gradvec"}) if vsf.vectorized else frozenset()
temp = get_buffer_temporary(buffer,
shape=vsf.quadrature_shape,
dim_tags=vsf.quadrature_dimtags,
name="input_{}".format(buffer),
)
from dune.perftool.sumfact.realization import name_buffer_storage, buffer_decl, get_sumfact_dtype
storage = name_buffer_storage(buffer, 0)
temp = "input_{}".format(buffer)
temporary_variable(temp,
shape=vsf.quadrature_shape,
dim_tags=vsf.quadrature_dimtags,
decl_method=buffer_decl(storage, get_sumfact_dtype(sf)),
managed=True,
)
# Those input fields, that are padded need to be set to zero
# in order to do a horizontal_add later on
......
......@@ -32,7 +32,6 @@ from dune.perftool.pdelab.argument import name_coefficientcontainer
from dune.perftool.pdelab.geometry import (local_dimension,
world_dimension,
)
from dune.perftool.loopy.buffer import initialize_buffer, get_buffer_temporary
from dune.perftool.sumfact.symbolic import SumfactKernel, SumfactKernelInputBase
from dune.perftool.options import get_form_option
from dune.perftool.pdelab.driver import FEM_name_mangling
......@@ -83,10 +82,14 @@ class LFSSumfactKernelInput(SumfactKernelInputBase, ImmutableRecord):
coeff = pc(container, lfs, basisiname)
# Get the input temporary!
name = get_buffer_temporary(sf.buffer,
shape=(product(mat.basis_size for mat in sf.matrix_sequence), sf.vector_width),
name="input_{}".format(sf.buffer)
)
from dune.perftool.sumfact.realization import name_buffer_storage, buffer_decl, get_sumfact_dtype
storage = name_buffer_storage(sf.buffer, 0)
name = "input_{}".format(sf.buffer)
temporary_variable(name,
shape=(product(mat.basis_size for mat in sf.matrix_sequence), sf.vector_width),
decl_method=buffer_decl(storage, get_sumfact_dtype(sf)),
managed=True,
)
assignee = prim.Subscript(prim.Variable(name),
(prim.Variable(basisiname),) + (index,))
......
......@@ -12,7 +12,6 @@ from dune.perftool.generation import (backend,
temporary_variable,
globalarg,
)
from dune.perftool.loopy.buffer import get_buffer_temporary
from dune.perftool.pdelab.geometry import (local_dimension,
world_dimension,
name_geometry,
......@@ -41,10 +40,14 @@ class GeoCornersInput(SumfactKernelInputBase, ImmutableRecord):
ImmutableRecord.__init__(self, dir=dir)
def realize(self, sf, index, insn_dep):
name = get_buffer_temporary(sf.buffer,
shape=(2 ** local_dimension(), sf.vector_width),
name="input_{}".format(sf.buffer)
)
from dune.perftool.sumfact.realization import name_buffer_storage, buffer_decl, get_sumfact_dtype
storage = name_buffer_storage(sf.buffer, 0)
name = name="input_{}".format(sf.buffer)
temporary_variable(name,
shape=(2 ** local_dimension(), sf.vector_width),
decl_method=buffer_decl(storage, get_sumfact_dtype(sf)),
managed=True,
)
ciname = corner_iname()
geo = name_geometry()
......
......@@ -3,20 +3,19 @@ The code that triggers the creation of the necessary code constructs
to realize a sum factorization kernel
"""
from dune.perftool.generation import (barrier,
delete_cache_items,
dump_accumulate_timer,
generator_factory,
get_global_context_value,
globalarg,
instruction,
kernel_cached,
post_include,
preamble,
silenced_warning,
temporary_variable,
transform,
)
from dune.perftool.loopy.buffer import (get_buffer_temporary,
switch_base_storage,
)
from dune.perftool.pdelab.argument import pymbolic_coefficient
from dune.perftool.pdelab.basis import shape_as_pymbolic
from dune.perftool.pdelab.geometry import world_dimension
......@@ -40,6 +39,22 @@ import numpy as np
import pymbolic.primitives as prim
necessary_kernel_implementations = generator_factory(item_tags=("kernelimpl",), no_deco=True)
@generator_factory(cache_key_generator=lambda s, qp: (s.function_key, qp))
def _name_kernel_implementation_function(sf, qp):
name = "sfimpl_{}".format("_".join(str(m) for m in sf.matrix_sequence))
necessary_kernel_implementations((sf, qp))
return name
def name_kernel_implementation_function(sf):
from dune.perftool.sumfact.quadrature import quadrature_points_per_direction
qp = quadrature_points_per_direction()
return _name_kernel_implementation_function(sf, qp)
def realize_sum_factorization_kernel(sf, **kwargs):
if get_global_context_value("dry_run", False):
return sf, sf.insn_dep
......@@ -52,32 +67,96 @@ def alias_data_array(name, data):
return "auto {} = {}.data();".format(name, data)
@generator_factory(item_tags=("sumfactkernel",),
context_tags=("kernel",),
cache_key_generator=lambda s, **kw: s.cache_key)
@preamble
def declare_buffer_storage(name, size, alignment):
return "char {}[{}] __attribute__ ((aligned({})));".format(name, size * alignment, alignment)
def name_buffer_storage(buff, which):
name = "{}_{}".format(buff, which)
return name
@kernel_cached
def _realize_sum_factorization_kernel(sf):
insn_dep = sf.insn_dep
# Measure times and count operations in c++ code
if get_option("instrumentation_level") >= 4:
if sf.stage == 1:
setuptimer = '{}_kernel_setup'.format(assembler_routine_name())
insn_dep = insn_dep.union(frozenset({instruction(code='HP_TIMER_STOP({});'.format(setuptimer),
within_inames=frozenset(sf.within_inames),
depends_on=insn_dep)}))
timer_name = assembler_routine_name() + '_kernel' + '_stage{}'.format(sf.stage)
post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
dump_accumulate_timer(timer_name)
insn_dep = insn_dep.union(frozenset({instruction(code="HP_TIMER_START({});".format(timer_name),
within_inames=frozenset(sf.within_inames),
depends_on=insn_dep,
),
}))
# Get all the necessary pieces for a function call
funcname = name_kernel_implementation_function(sf)
#TODO calculate the size and alignment correctly
size = 10000
alignment = 8
buffers = tuple(name_buffer_storage(sf.buffer, i) for i in range(2))
# Make sure that the storage is allocated
for buf in buffers:
declare_buffer_storage(buf, size, alignment)
# Realize the input if it is not direct
if not sf.input.direct_input_is_possible:
insn_dep = insn_dep.union(sf.input.realize(sf, insn_dep))
# Call the function
code = "{}({}, {});".format(funcname, *buffers)
insn_dep = frozenset({instruction(code=code,
depends_on=insn_dep,
within_inames=frozenset(sf.within_inames))
})
# Interpret the output as a temporary of correct shape
out = "{}_output".format(sf.buffer)
temporary_variable(out,
shape=sf.output_shape,
dim_tags=sf.output_dimtags,
decl_method=buffer_decl(buffers[sf.length % 2], get_sumfact_dtype(sf)),
managed=True,
)
silenced_warning("read_no_write({})".format(out))
return lp.TaggedVariable(out, sf.tag), insn_dep
def buffer_decl(buffer, dtype):
def _buffer_decl(name, *a):
from dune.perftool.loopy.target import numpy_to_cpp_dtype
_type = numpy_to_cpp_dtype(dtype)
return "{0} *{1} = ({0} *){2};".format(_type, name, buffer)
return _buffer_decl
def get_sumfact_dtype(sf):
if sf.vectorized:
pass
else:
from dune.perftool.loopy.target import dtype_floatingpoint
from loopy.types import NumpyType
return NumpyType(dtype_floatingpoint()).dtype.name
class BufferSwitcher(object):
def __init__(self, buffers=("buffer0", "buffer1")):
self.buffers = buffers
self.current = 0
def get_temporary(self, name=None, **kwargs):
temporary_variable(name,
managed=True,
decl_method=buffer_decl(self.buffers[self.current], kwargs["dtype"]),
**kwargs
)
return name
def switch(self):
self.current = (self.current + 1) % 2
def realize_sumfact_kernel_function(sf):
# Get a buffer switcher instance
buffer = BufferSwitcher()
insn_dep = frozenset()
# Prepare some dim_tags/shapes for later use
ftags = ",".join(["f"] * sf.length)
novec_ftags = ftags
......@@ -147,9 +226,11 @@ def _realize_sum_factorization_kernel(sf):
# Get a temporary that interprets the base storage of the input
# as a column-major matrix. In later iteration of the matrix loop
# this reinterprets the output of the previous iteration.
inp = get_buffer_temporary(sf.buffer,
inp = buffer.get_temporary("buff_step{}_in".format(l),
shape=inp_shape + vec_shape,
dim_tags=ftags)
dim_tags=ftags,
dtype=get_sumfact_dtype(sf),
)
# The input temporary will only be read from, so we need to silence the loopy warning
silenced_warning('read_no_write({})'.format(inp))
......@@ -157,7 +238,7 @@ def _realize_sum_factorization_kernel(sf):
input_summand = prim.Subscript(prim.Variable(inp),
input_inames + vec_iname)
switch_base_storage(sf.buffer)
buffer.switch()
# Get a temporary that interprets the base storage of the output.
#
......@@ -171,9 +252,11 @@ def _realize_sum_factorization_kernel(sf):
output_shape = tuple(out_shape[1:]) + (out_shape[0],)
if l == len(matrix_sequence) - 1:
output_shape = permute_backward(output_shape, perm)
out = get_buffer_temporary(sf.buffer,
out = buffer.get_temporary("buff_step{}_out".format(l),
shape=output_shape + vec_shape,
dim_tags=ftags)
dim_tags=ftags,
dtype=get_sumfact_dtype(sf),
)
# Write the matrix-matrix multiplication expression
matprod = prim.Product((matrix.pymbolic((prim.Variable(out_inames[0]), k_expr) + vec_iname),
......@@ -217,22 +300,196 @@ def _realize_sum_factorization_kernel(sf):
)
})
# Measure times and count operations in c++ code
if get_option("instrumentation_level") >= 4:
stop_insn = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name),
depends_on=frozenset({lp.match.Tagged(tag)}),
within_inames=frozenset(sf.within_inames))})
if sf.stage == 1:
qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop'
post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
dump_accumulate_timer(timer_name)
frozenset({instruction(code="HP_TIMER_START({});".format(qp_timer_name),
depends_on=stop_insn)})
out = get_buffer_temporary(sf.buffer,
shape=sf.output_shape,
dim_tags=sf.output_dimtags,
)
silenced_warning('read_no_write({})'.format(out))
return lp.TaggedVariable(out, sf.tag), insn_dep
# Construct a loopy kernel object
name = name_kernel_implementation_function(sf)
from dune.perftool.pdelab.localoperator import extract_kernel_from_cache
signature = "void {}(const char* buffer0, const char* buffer1) const".format(name)
kernel = extract_kernel_from_cache("kernel_default", name, [signature])
delete_cache_items("kernel_default")
return kernel
# @generator_factory(item_tags=("sumfactkernel",),
# context_tags=("kernel",),
# cache_key_generator=lambda s, **kw: s.cache_key)
# def old_realize_sum_factorization_kernel(sf):
# insn_dep = sf.insn_dep
#
# # Measure times and count operations in c++ code
# if get_option("instrumentation_level") >= 4:
# if sf.stage == 1:
# setuptimer = '{}_kernel_setup'.format(assembler_routine_name())
# insn_dep = insn_dep.union(frozenset({instruction(code='HP_TIMER_STOP({});'.format(setuptimer),
# within_inames=frozenset(sf.within_inames),
# depends_on=insn_dep)}))
#
# timer_name = assembler_routine_name() + '_kernel' + '_stage{}'.format(sf.stage)
# post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
# dump_accumulate_timer(timer_name)
# insn_dep = insn_dep.union(frozenset({instruction(code="HP_TIMER_START({});".format(timer_name),
# within_inames=frozenset(sf.within_inames),
# depends_on=insn_dep,
# ),
# }))
#
# if not sf.input.direct_input_is_possible:
# insn_dep = insn_dep.union(sf.input.realize(sf, insn_dep))
#
# # Prepare some dim_tags/shapes for later use
# ftags = ",".join(["f"] * sf.length)
# novec_ftags = ftags
# ctags = ",".join(["c"] * sf.length)
# vec_shape = ()
# if sf.vectorized:
# ftags = ftags + ",vec"
# ctags = ctags + ",vec"
# vec_shape = (sf.vector_width,)
#
# # Decide in which order we want to process directions in the
# # sumfactorization. A clever ordering can lead to a reduced
# # complexity. This will e.g. happen at faces where we only have
# # one quadratue point m_l=1 if l is the normal direction of the
# # face.
# #
# # Rule of thumb: small m's early and large n's late.
# perm = sumfact_permutation_strategy(sf)
#
# # Permute matrix sequence
# matrix_sequence = permute_forward(sf.matrix_sequence, perm)
#
# # Product of all matrices
# for l, matrix in enumerate(matrix_sequence):
# # Compute the correct shapes of in- and output matrices of this matrix-matrix multiplication
# # and get inames that realize the product.
# inp_shape = (matrix.cols,) + tuple(mat.cols for mat in matrix_sequence[l + 1:]) + tuple(mat.rows for mat in matrix_sequence[:l])
# out_shape = (matrix.rows,) + tuple(mat.cols for mat in matrix_sequence[l + 1:]) + tuple(mat.rows for mat in matrix_sequence[:l])
# out_inames = tuple(sumfact_iname(length, "out_inames_" + str(k)) for k, length in enumerate(out_shape))
# vec_iname = ()
# if matrix.vectorized:
# iname = sumfact_iname(sf.vector_width, "vec")
# vec_iname = (prim.Variable(iname),)
# transform(lp.tag_inames, [(iname, "vec")])
#
# # A trivial reduction is implemented as a product, otherwise we run into
# # a code generation corner case producing way too complicated code. This
# # could be fixed upstream, but the loopy code realizing reductions is not
# # trivial and the priority is kind of low.
# if matrix.cols != 1:
# k = sumfact_iname(matrix.cols, "red")
# k_expr = prim.Variable(k)
# else:
# k_expr = 0
#
# # Setup the input of the sum factorization kernel. In the
# # first matrix multiplication this can be taken from
# # * an input temporary (default)
# # * a global data structure (if FastDGGridOperator is in use)
# # * a value from a global data structure, broadcasted to a vector type (vectorized + FastDGGridOperator)
# input_inames = (k_expr,) + tuple(prim.Variable(j) for j in out_inames[1:])
# if l == 0 and sf.input.direct_input_is_possible:
# # See comment below
# input_inames = permute_backward(input_inames, perm)
# inp_shape = permute_backward(inp_shape, perm)
#
# input_summand = sf.input.realize_direct(inp_shape, input_inames)
# else:
# # If we did permute the order of a matrices above we also
# # permuted the order of out_inames. Unfortunately the
# # order of our input is from 0 to d-1. This means we need
# # to permute _back_ to get the right coefficients.
# if l == 0:
# inp_shape = permute_backward(inp_shape, perm)
# input_inames = permute_backward(input_inames, perm)
#
# # Get a temporary that interprets the base storage of the input
# # as a column-major matrix. In later iteration of the matrix loop
# # this reinterprets the output of the previous iteration.
# inp = get_buffer_temporary(sf.buffer,
# shape=inp_shape + vec_shape,
# dim_tags=ftags)
#
# # The input temporary will only be read from, so we need to silence the loopy warning
# silenced_warning('read_no_write({})'.format(inp))
#
# input_summand = prim.Subscript(prim.Variable(inp),
# input_inames + vec_iname)
#
# switch_base_storage(sf.buffer)
#
# # Get a temporary that interprets the base storage of the output.
# #
# # Note: In this step the reordering of the fastest directions
# # is happening. The new direction (out_inames[0]) and the
# # corresponding shape (out_shape[0]) goes to the end (slowest
# # direction) and everything stays column major (ftags->fortran
# # style).
# #
# # If we are in the last step we reverse the permutation.
# output_shape = tuple(out_shape[1:]) + (out_shape[0],)
# if l == len(matrix_sequence) - 1:
# output_shape = permute_backward(output_shape, perm)
# out = get_buffer_temporary(sf.buffer,
# shape=output_shape + vec_shape,
# dim_tags=ftags)
#
# # Write the matrix-matrix multiplication expression
# matprod = prim.Product((matrix.pymbolic((prim.Variable(out_inames[0]), k_expr) + vec_iname),
# input_summand))
#
# # ... which may be a reduction, if k>0
# if matrix.cols != 1:
# matprod = lp.Reduction("sum", k, matprod)
#
# # Here we also move the new direction (out_inames[0]) to the
# # end and reverse permutation
# output_inames = tuple(prim.Variable(i) for i in out_inames[1:]) + (prim.Variable(out_inames[0]),)
# if l == len(matrix_sequence) - 1:
# output_inames = permute_backward(output_inames, perm)
#
# tag = "sumfact_stage{}".format(sf.stage)
# if sf.stage == 3:
# tag = "{}_{}".format(tag, "_".join(sf.within_inames))
#
# # Collect the key word arguments for the loopy instruction
# insn_args = {"forced_iname_deps": frozenset([i for i in out_inames]).union(frozenset(sf.within_inames)),
# "forced_iname_deps_is_final": True,
# "depends_on": insn_dep,
# "tags": frozenset({tag}),
# "predicates": sf.predicates,
# "groups": frozenset({sf.group_name}),
# }
#
# # In case of direct output we directly accumulate the result
# # of the Sumfactorization into some global data structure.
# if l == len(matrix_sequence) - 1 and get_form_option('fastdg') and sf.stage == 3:
# if sf.vectorized:
# insn_args["forced_iname_deps"] = insn_args["forced_iname_deps"].union(frozenset({vec_iname[0].name}))
# insn_dep = sf.output.realize_direct(matprod, output_inames, out_shape, insn_args)
# else:
# # Issue the reduction instruction that implements the multiplication
# # at the same time store the instruction ID for the next instruction to depend on
# insn_dep = frozenset({instruction(assignee=prim.Subscript(prim.Variable(out), output_inames + vec_iname),
# expression=matprod,
# **insn_args
# )
# })
#
# # Measure times and count operations in c++ code
# if get_option("instrumentation_level") >= 4:
# stop_insn = frozenset({instruction(code="HP_TIMER_STOP({});".format(timer_name),
# depends_on=frozenset({lp.match.Tagged(tag)}),
# within_inames=frozenset(sf.within_inames))})
# if sf.stage == 1:
# qp_timer_name = assembler_routine_name() + '_kernel' + '_quadratureloop'
# post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
# dump_accumulate_timer(timer_name)
# frozenset({instruction(code="HP_TIMER_START({});".format(qp_timer_name),
# depends_on=stop_insn)})
#
# out = get_buffer_temporary(sf.buffer,
# shape=sf.output_shape,
# dim_tags=sf.output_dimtags,
# )
# silenced_warning('read_no_write({})'.format(out))
#
# return lp.TaggedVariable(out, sf.tag), insn_dep
......@@ -278,6 +278,10 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
# Some cache key definitions
# Watch out for the documentation to see which key is used unter what circumstances
#
@property
def function_key(self):
""" Kernels sharing this key may use the same kernel implementation function """
return self.matrix_sequence
@property
def parallel_key(self):
......
......@@ -18,7 +18,6 @@ from dune.perftool.generation import (class_member,
transform,
valuearg
)
from dune.perftool.loopy.buffer import get_buffer_temporary
from dune.perftool.loopy.target import dtype_floatingpoint
from dune.perftool.loopy.vcl import ExplicitVCLCast, get_vcl_type_size
from dune.perftool.pdelab.localoperator import (name_domain_field,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment