diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index a113cb6a203fead3a704d4ce74f2b46bb08aa16a..1345af2486c6b1e7c77a4f5317ef7f6b1823ec83 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -115,6 +115,8 @@ def temporary_variable(name, **kwargs): ) def c_instruction_impl(**kw): kw.setdefault('assignees', []) + from pymbolic.primitives import Variable + kw['assignees'] = frozenset(Variable(i) for i in kw['assignees']) inames = kw.pop('inames', kw.get('forced_iname_deps', [])) return loopy.CInstruction(inames, **kw) @@ -124,6 +126,9 @@ def c_instruction_impl(**kw): cache_key_generator=lambda *a, **kw: kw['expression'], ) def expr_instruction_impl(**kw): + if 'assignees' in kw: + from pymbolic.primitives import Variable + kw['assignees'] = frozenset(Variable(i) for i in kw['assignees']) return loopy.ExpressionInstruction(**kw) diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py index 71bb18d067122592ec867ddb56bd32ee3df4ef08..3c717f2de5d87a3070fbec434cfb815c9676ab93 100644 --- a/python/dune/perftool/loopy/target.py +++ b/python/dune/perftool/loopy/target.py @@ -1,28 +1,18 @@ -import numpy -import six - -from loopy.target import TargetBase -from loopy.target.c.codegen.expression import LoopyCCodeMapper - - -from loopy.library.reduction import (ReductionOperation, - register_reduction_parser, - ) - - -class AllToDouble(dict): - """ This imitates a dict that maps everything to double and logs the requested keys """ - def __getitem__(self, key): - self.__setitem__(key, numpy.float64) - return numpy.float64 +from loopy.target import (TargetBase, + ASTBuilderBase, + DummyHostASTBuilder, + ) +from loopy.target.c import CASTBuilder +from loopy.target.c.codegen.expression import ExpressionToCMapper _registry = {'float32': 'float', 'int32': 'int', - 'float64': 'double'} + 'float64': 'double', + 'string': 'std::string'} -class MyMapper(LoopyCCodeMapper): +class MyMapper(ExpressionToCMapper): def map_subscript(self, expr, enclosing_prec, type_context): ret = str(expr.aggregate) from pymbolic.primitives import Variable @@ -41,50 +31,17 @@ class MyMapper(LoopyCCodeMapper): return super(MyMapper, self).map_variable(expr, enclosing_prec, type_context) -class DuneTarget(TargetBase): - - def get_or_register_dtype(self, names, dtype=None): - return dtype - - def dtype_to_typename(self, dtype): - # For now, we do this the simplest possible way - return _registry[dtype.dtype.name] - - def is_vector_dtype(self, dtype): - return False - +class DuneASTBuilder(CASTBuilder): def get_expression_to_code_mapper(self, codegen_state): return MyMapper(codegen_state) - def generate_code(self, kernel, codegen_state, impl_arg_info): - from cgen import Block - body = Block() - - from loopy.codegen.loop import set_up_hw_parallel_loops - gen_code = set_up_hw_parallel_loops(kernel, 0, codegen_state) - - from cgen import Line - body.append(Line()) - - if isinstance(gen_code.ast, Block): - body.extend(gen_code.ast.contents) - else: - body.append(gen_code.ast) - - return str(body), gen_code.implemented_domains - - def get_value_arg_decl(self, name, shape, dtype, is_written): - assert shape == () - - return "blubb" - - def get_global_arg_decl(self, name, shape, dtype, is_written): - return "blubb" + def get_temporary_decls(self, codegen_state, schedule_index): + # Currently we do *not* want to build the temporary declarations + # through loopy, as this would involve lots of work on a proper + # type system! + return [] def emit_sequential_loop(self, codegen_state, iname, iname_dtype, static_lbound, static_ubound, inner): - - from loopy.codegen import wrap_in - # Some of our loops are special as they use PDELab specific concepts. # Fortunately those loops are tied to specific inames. from dune.perftool.pdelab.quadrature import quadrature_iname @@ -92,32 +49,28 @@ class DuneTarget(TargetBase): from cgen import CustomLoop from dune.perftool.pdelab.quadrature import quadrature_loop_statement loop_stmt = quadrature_loop_statement() - return wrap_in(CustomLoop, - "for({})".format(loop_stmt), - inner) - - # From here on it is the default implementation taken from loopys CTarget. - ecm = codegen_state.expression_to_code_mapper - - from loopy.symbolic import aff_to_expr - - from pymbolic.mapper.stringifier import PREC_NONE - from cgen import For - - return wrap_in(For, - "{} {} = {}".format(self.dtype_to_typename(iname_dtype), - iname, - ecm(aff_to_expr(static_lbound), - PREC_NONE, - "i", - ), - ), - "{} <= {}".format(iname, - ecm(aff_to_expr(static_ubound), - PREC_NONE, - "i", - ), - ), - "++{}".format(iname), - inner, - ) + return CustomLoop("for({})".format(loop_stmt), inner) + else: + return CASTBuilder.emit_sequential_loop(self, codegen_state, iname, iname_dtype, static_lbound, static_ubound, inner) + + +class DuneTarget(TargetBase): + def __init__(self): + # Set fortran_abi to allow reusing CASTBuilder for the moment + self.fortran_abi = False + + def split_kernel_at_global_barriers(self): + return False + + def get_host_ast_builder(self): + return DummyHostASTBuilder(self) + + def get_device_ast_builder(self): + return DuneASTBuilder(self) + + def dtype_to_typename(self, dtype): + # For now, we do this the simplest possible way + return _registry[dtype.dtype.name] + + def is_vector_dtype(self, dtype): + return False diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py index db50a9053d24bf83417630589894524b2d998685..0031f468087c22de132ba186404188299e26a24f 100644 --- a/python/dune/perftool/pdelab/basis.py +++ b/python/dune/perftool/pdelab/basis.py @@ -290,7 +290,7 @@ def evaluate_basis_gradient(leaf_element, name, restriction): name, index, ), - assignees=name, + assignees=frozenset({name}), read_variables=frozenset({reference_gradients}), ) diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 596b1414fb67582362920acf54b2ac3a61f98af2..958e0974ddf24d2b3b7c8889d6ccfabd5207dd94 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -16,7 +16,6 @@ from dune.perftool.cgen.clazz import (AccessModifier, ClassMember, ) from dune.perftool import Restriction -from pytools import memoize @template_parameter("operator") @@ -218,12 +217,14 @@ def generate_kernel(integrals): class AssemblyMethod(ClassMember): def __init__(self, signature, kernel): - from loopy import generate_code + from loopy import generate_body from cgen import LiteralLines, Block content = signature content.append('{') if kernel is not None: - content.extend(' ' + l for l in generate_code(kernel)[0].split('\n')) + for i, p in kernel.preambles: + content.append(p) + content.extend(l for l in generate_body(kernel).split('\n')[1:-1]) content.append('}') ClassMember.__init__(self, content) diff --git a/python/dune/perftool/pdelab/parameter.py b/python/dune/perftool/pdelab/parameter.py index 5ba55552f880f736ef6d8647451f3c04b07cd239..c4e3c5488015445dc954258525370cece2c82d65 100644 --- a/python/dune/perftool/pdelab/parameter.py +++ b/python/dune/perftool/pdelab/parameter.py @@ -107,7 +107,7 @@ def construct_nested_fieldvector(t, shape): @cached def cell_parameter_function(name, expr, restriction, t='double'): shape = expr.ufl_element().value_shape() - shape_impl = ('fv',)*len(shape) + shape_impl = ('fv',) * len(shape) t = construct_nested_fieldvector(t, shape) temporary_variable(name, shape=shape, shape_impl=shape_impl) define_parameter_function_class_member(name, expr, t, True) @@ -117,7 +117,7 @@ def cell_parameter_function(name, expr, restriction, t='double'): @cached def intersection_parameter_function(name, expr, t='double'): shape = expr.ufl_element().value_shape() - shape_impl = ('fv',)*len(shape) + shape_impl = ('fv',) * len(shape) t = construct_nested_fieldvector(t, shape) temporary_variable(name, shape=shape, shape_impl=shape_impl) define_parameter_function_class_member(name, expr, t, False) diff --git a/python/loopy b/python/loopy index 6b32fb790fb7c4947da03f6c8f1a3694ee90da92..db5dd7408b03e4cb453120cee3476f44b4179dfc 160000 --- a/python/loopy +++ b/python/loopy @@ -1 +1 @@ -Subproject commit 6b32fb790fb7c4947da03f6c8f1a3694ee90da92 +Subproject commit db5dd7408b03e4cb453120cee3476f44b4179dfc