diff --git a/python/dune/perftool/cgen/clazz.py b/python/dune/perftool/cgen/clazz.py index c673b7c495007591e34dd105beb49c7027b72a55..f265638ec68ba122cd209909b7c2704cd6992aaf 100644 --- a/python/dune/perftool/cgen/clazz.py +++ b/python/dune/perftool/cgen/clazz.py @@ -61,57 +61,18 @@ class ClassMember(Generable): yield line + '\n' -class Constructor(Generable): - def __init__(self, block=Block([]), arg_decls=[], clsname=None, initializer_list=[], access=AccessModifier.PUBLIC): - self.clsname = clsname - self.arg_decls = arg_decls - self.access = access - self.block = block - self.il = initializer_list - - def generate(self): - assert self.clsname - - yield '\n' - yield "{}:\n".format(access_modifier_string(self.access)) - yield self.clsname + "(" - if self.arg_decls: - for content in self.arg_decls[0].generate(with_semicolon=False): - yield content - for ad in self.arg_decls[1:]: - yield ", " - for content in ad.generate(with_semicolon=False): - yield content - yield ")\n" - - # add the initializer list - if self.il: - yield " : {}".format(self.il[0]) - - for i in self.il[1:]: - yield ",\n" - yield " {}".format(i) - yield '\n' - - for line in self.block.generate(): - yield line - - class Class(Generable): """ Generator for a templated class """ - def __init__(self, name, base_classes=[], members=[], tparam_decls=[], constructors=[]): + def __init__(self, name, base_classes=[], members=[], tparam_decls=[]): self.name = name self.base_classes = base_classes self.members = members self.tparam_decls = tparam_decls - self.constructors = constructors for bc in base_classes: assert isinstance(bc, BaseClass) for mem in members: assert isinstance(mem, ClassMember) - for con in constructors: - assert isinstance(con, Constructor) def generate(self): # define the class header @@ -139,7 +100,7 @@ class Class(Generable): yield '\n' # Now yield the entire block - block = Block(contents=self.constructors + self.members) + block = Block(contents=self.members) # Yield the block for line in block.generate(): diff --git a/python/dune/perftool/generation/cpp.py b/python/dune/perftool/generation/cpp.py index 797c29a5db3f84df9db8e95a8dfc3964fe20fe39..507c8217d335936ec3c70ceb845984a734ee2738 100644 --- a/python/dune/perftool/generation/cpp.py +++ b/python/dune/perftool/generation/cpp.py @@ -12,10 +12,9 @@ import cgen preamble = generator_factory(item_tags=("preamble",), counted=True, context_tags="kernel") pre_include = generator_factory(item_tags=("file", "pre_include"), context_tags=("filetag",), no_deco=True) post_include = generator_factory(item_tags=("file", "post_include"), context_tags=("filetag",), no_deco=True) -class_member = generator_factory(item_tags=("clazz", "member"), context_tags=("classtag",), on_store=lambda m: ClassMember(m), counted=True) -template_parameter = generator_factory(item_tags=("clazz", "template_param"), context_tags=("classtag",), counted=True) -class_basename = generator_factory(item_tags=("clazz", "basename"), context_tags=("classtag",)) -constructor_block = generator_factory(item_tags=("clazz", "constructor_block"), context_tags=("classtag",), counted=True) +class_member = generator_factory(item_tags=("member",), context_tags=("classtag",), on_store=lambda m: ClassMember(m), counted=True) +template_parameter = generator_factory(item_tags=("template_param",), context_tags=("classtag",), counted=True) +class_basename = generator_factory(item_tags=("basename",), context_tags=("classtag",)) @generator_factory(item_tags=("file", "include"), context_tags=("filetag",)) diff --git a/python/dune/perftool/interactive.py b/python/dune/perftool/interactive.py index 8b7fb0cd3be913568b063b6683f8403e91ae515b..f64f99806bbda1db952ef1406891ace86d210d0c 100644 --- a/python/dune/perftool/interactive.py +++ b/python/dune/perftool/interactive.py @@ -3,7 +3,7 @@ from functools import partial from dune.perftool.generation import global_context from dune.perftool.loopy.transformations import get_loopy_transformations -from dune.perftool.pdelab.localoperator import assembly_routine_signature, AssemblyMethod +from dune.perftool.pdelab.localoperator import assembly_routine_signature, LoopyKernelMethod import os @@ -80,7 +80,7 @@ def show_code(which, kernel): with global_context(integral_type=which[0], form_type=which[1]): signature = assembly_routine_signature() - print("".join(AssemblyMethod(signature, kernel).generate())) + print("".join(LoopyKernelMethod(signature, kernel).generate())) print("Press Return to return to the previous menu") input() diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index d9838de3d46f8a16a4de361255b9671653d5e7d4..d6a2879e6f096341f1e9e44eecb57e4628f37881 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -476,7 +476,13 @@ def generate_kernel(integrals): get_backend(interface="accum_insn")(visitor, term, measure, subdomain_id) tag = get_global_context_value("kernel") - return extract_kernel_from_cache(tag) + knl = extract_kernel_from_cache(tag) + + # All items with the kernel tags can be destroyed once a kernel has been generated + from dune.perftool.generation import delete_cache_items + delete_cache_items(tag) + + return knl def extract_kernel_from_cache(tag): @@ -485,6 +491,10 @@ def extract_kernel_from_cache(tag): from dune.perftool.generation import retrieve_cache_functions, retrieve_cache_items from dune.perftool.loopy.target import DuneTarget domains = [i for i in retrieve_cache_items("{} and domain".format(tag))] + + if not domains: + domains = ["{[stupid] : 0<=stupid<1}"] + instructions = [i for i in retrieve_cache_items("{} and instruction".format(tag))] temporaries = {i.name: i for i in retrieve_cache_items("{} and temporary".format(tag))} arguments = [i for i in retrieve_cache_items("{} and argument".format(tag))] @@ -536,10 +546,6 @@ def extract_kernel_from_cache(tag): # Do the loopy preprocessing! kernel = preprocess_kernel(kernel) - # All items with the kernel tags can be destroyed once a kernel has been generated - from dune.perftool.generation import delete_cache_items - delete_cache_items(tag) - return kernel @@ -587,11 +593,19 @@ class TimerMethod(ClassMember): ClassMember.__init__(self, content) -class AssemblyMethod(ClassMember): - def __init__(self, signature, kernel, filename): +class LoopyKernelMethod(ClassMember): + def __init__(self, signature, kernel, add_timings=True, initializer_list=[]): from loopy import generate_body from cgen import LiteralLines, Block content = signature + + # Add initializer list if this is a constructor + if initializer_list: + content[-1] = content[-1] + " :" + for init in initializer_list[:-1]: + content.append(" "*4 + init + ",") + content.append(" "*4 + initializer_list[-1]) + content.append('{') if kernel is not None: # Add kernel preamble @@ -599,7 +613,7 @@ class AssemblyMethod(ClassMember): content.append(' ' + p) # Start timer - if get_option('timer'): + if add_timings and get_option('timer'): timer_name = assembler_routine_name() + '_kernel' post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') content.append(' ' + 'HP_TIMER_START({});'.format(timer_name)) @@ -609,7 +623,7 @@ class AssemblyMethod(ClassMember): content.extend(l for l in generate_body(kernel).split('\n')[1:-1]) # Stop timer - if get_option('timer'): + if add_timings and get_option('timer'): content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) content.append('}') @@ -624,17 +638,17 @@ def cgen_class_from_cache(tag, members=[]): base_classes = [bc for bc in retrieve_cache_items('{} and baseclass'.format(tag))] constructor_params = [bc for bc in retrieve_cache_items('{} and constructor_param'.format(tag))] - from cgen import Block - constructor_block = Block(contents=[i for i in retrieve_cache_items("{} and constructor_block".format(tag), make_generable=True)]) il = [i for i in retrieve_cache_items('{} and initializer'.format(tag))] pm = [m for m in retrieve_cache_items('{} and member'.format(tag))] tparams = [i for i in retrieve_cache_items('{} and template_param'.format(tag))] - from dune.perftool.cgen.clazz import Constructor - constructor = Constructor(block=constructor_block, arg_decls=constructor_params, clsname=basename, initializer_list=il) + # Construct the constructor + constructor_knl = extract_kernel_from_cache(tag) + signature = "{}({})".format(basename, ", ".join(next(iter(p.generate(with_semicolon=False))) for p in constructor_params)) + constructor = LoopyKernelMethod([signature], constructor_knl, add_timings=False, initializer_list=il) from dune.perftool.cgen import Class - return Class(basename, base_classes=base_classes, members=members + pm, constructors=[constructor], tparam_decls=tparams) + return Class(basename, base_classes=base_classes, members=[constructor] + members + pm, tparam_decls=tparams) def generate_localoperator_kernels(formdata, data): @@ -794,7 +808,7 @@ def generate_localoperator_file(formdata, kernels, filename): it, ft = method with global_context(integral_type=it, form_type=ft): signature = assembly_routine_signature(formdata) - operator_methods.append(AssemblyMethod(signature, kernel, filename)) + operator_methods.append(LoopyKernelMethod(signature, kernel)) if get_option('timer'): include_file('dune/perftool/common/timer.hh', filetag='operatorfile') diff --git a/python/dune/perftool/sumfact/amatrix.py b/python/dune/perftool/sumfact/amatrix.py index ec0dace8da0ac18a99e541a0c4b76bf5a2b5fec2..0d41c26b032c78cd95507b59825fb5076bdd3405 100644 --- a/python/dune/perftool/sumfact/amatrix.py +++ b/python/dune/perftool/sumfact/amatrix.py @@ -13,6 +13,8 @@ from dune.perftool.generation import (class_member, iname, include_file, initializer_list, + instruction, + preamble, silenced_warning, temporary_variable, valuearg @@ -181,7 +183,7 @@ def name_polynomials(): return name -@constructor_block(classtag="operator") +@preamble(kernel="operator") def sort_quadrature_points_weights(): range_field = lop_template_range_field() domain_field = name_domain_field() @@ -192,7 +194,13 @@ def sort_quadrature_points_weights(): return "onedQuadraturePointsWeights<{}, {}, {}>({}, {});".format(range_field, domain_field, number_qp, qp, qw) -@constructor_block(classtag="operator") +@iname(kernel="operator") +def theta_iname(name, bound): + name = "{}_{}".format(name, bound) + domain(name, bound) + return name + + def construct_theta(name, transpose, derivative): # Make sure that the quadrature points are sorted sort_quadrature_points_weights() @@ -204,15 +212,18 @@ def construct_theta(name, transpose, derivative): polynomials = name_polynomials() qp = name_oned_quadrature_points() + i = theta_iname("i", shape[0]) + j = theta_iname("j", shape[1]) + # access = "j,i" if transpose else "i,j" basispol = "dp" if derivative else "p" - polynomial_access = "i,{}[j]".format(qp) if transpose else "j,{}[i]".format(qp) + polynomial_access = "{},{}[{}]".format(i, qp, j) if transpose else "{},{}[{}]".format(j, qp, i) - return ["for (std::size_t i=0; i<{}; i++){{".format(shape[0]), - " for (std::size_t j=0; j<{}; j++){{".format(shape[1]), - " {}.colmajoraccess(i,j) = {}.{}({});".format(name, polynomials, basispol, polynomial_access), - " }", - "}"] + return instruction(code="{}.colmajoraccess({},{}) = {}.{}({});".format(name, i, j, polynomials, basispol, polynomial_access), + kernel="operator", + within_inames=frozenset({i, j}), + within_inames_is_final=True, + ) @class_member(classtag="operator")