diff --git a/python/dune/perftool/generation/__init__.py b/python/dune/perftool/generation/__init__.py index 5fecdc4c25c3b4dfc76aa194c3a9ca7a2d50bee9..3751e64a0f55b5206c72a4ff38e4bc0d9bde2a18 100644 --- a/python/dune/perftool/generation/__init__.py +++ b/python/dune/perftool/generation/__init__.py @@ -2,6 +2,7 @@ from __future__ import absolute_import from dune.perftool.generation.cache import (cached, generator_factory, + no_caching, retrieve_cache_items, delete_cache_items, ) @@ -17,10 +18,12 @@ from dune.perftool.generation.cpp import (base_class, template_parameter, ) -from dune.perftool.generation.loopy import (domain, +from dune.perftool.generation.loopy import (constantarg, + domain, globalarg, iname, instruction, + function_mangler, pymbolic_expr, temporary_variable, valuearg, diff --git a/python/dune/perftool/generation/cache.py b/python/dune/perftool/generation/cache.py index 27f93b19e816146d983e99a535282f815fd58c89..80849f8cf4e504851ffc142745a544ae36bd0efa 100644 --- a/python/dune/perftool/generation/cache.py +++ b/python/dune/perftool/generation/cache.py @@ -54,8 +54,8 @@ class _NoCachingCounter(object): return _NoCachingCounter.counter -def no_caching(*a): - return _NoCachingCounter.get() +def no_caching(*a, **k): + return _NoCachingCounter().get() class _CacheItemMeta(type): diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index b99ea877dbfb679f24844a8f8dc7fc9f8d1cd778..61e04f062620a488c4b95cbbe1b5d19bdea07f1b 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -2,6 +2,7 @@ from __future__ import absolute_import from dune.perftool.generation import (generator_factory, + no_caching, preamble, ) @@ -11,7 +12,7 @@ import numpy iname = generator_factory(item_tags=("iname",)) valuearg = generator_factory(item_tags=("argument", "valuearg"), on_store=lambda n: loopy.ValueArg(n), no_deco=True) pymbolic_expr = generator_factory(item_tags=("kernel", "pymbolic")) -constantarg = generator_factory(item_tags=("kernel", "argument", "constantarg"), on_store=lambda n: loopy.ConstantArg(n)) +function_mangler = generator_factory(item_tags=("kernel", "mangler")) @generator_factory(item_tags=("argument", "globalarg"), @@ -19,7 +20,17 @@ constantarg = generator_factory(item_tags=("kernel", "argument", "constantarg"), def globalarg(name, shape=loopy.auto, **kw): if isinstance(shape, str): shape = (shape,) - return loopy.GlobalArg(name, dtype=numpy.float64, shape=shape, **kw) + dtype = kw.pop("dtype", numpy.float64) + return loopy.GlobalArg(name, dtype=dtype, shape=shape, **kw) + + +@generator_factory(item_tags=("argument", "constantarg"), + cache_key_generator=lambda n, **kw: n) +def constantarg(name, shape=loopy.auto, **kw): + if isinstance(shape, str): + shape = (shape,) + dtype = kw.pop("dtype", numpy.float64) + return loopy.GlobalArg(name, dtype=dtype, shape=shape, **kw) @generator_factory(item_tags=("domain",)) @@ -41,11 +52,11 @@ def _temporary_type(shape_impl, shape, first=True): if shape_impl[0] == 'fv': return "Dune::FieldVector<{}, {}>".format(_temporary_type(shape_impl[1:], shape[1:], first=False), shape[0]) if shape_impl[0] == 'fm': - pass + raise NotImplementedError @preamble -def default_declaration(name, shape, shape_impl): +def default_declaration(name, shape=(), shape_impl=()): # Determine the C++ type to use for this temporary. t = _temporary_type(shape_impl, shape) if len(shape_impl) == 0: @@ -59,8 +70,15 @@ def default_declaration(name, shape, shape_impl): return '{} {}(0.0);'.format(t, name) -@generator_factory(item_tags=("temporary",), cache_key_generator=lambda n, **kw: n) -def temporary_variable(name, **kwargs): +class _TemporaryCounter: + counter = 0 + + +@generator_factory(item_tags=("temporary",), cache_key_generator=no_caching) +def temporary_variable(name=None, **kwargs): + if name is None: + name = 'expr_{}'.format(str(_TemporaryCounter.counter).zfill(4)) + _TemporaryCounter.counter = _TemporaryCounter.counter + 1 if 'dtype' not in kwargs: kwargs['dtype'] = numpy.float64 @@ -101,17 +119,17 @@ class _IDCounter: def _insn_cache_key(code=None, expression=None, **kwargs): - if code: + if code is not None: return code - if expression: + if expression is not None: return expression raise ValueError("Please specify either code or expression for instruction!") @generator_factory(item_tags=("insn_id",), cache_key_generator=_insn_cache_key) def instruction(code=None, expression=None, **kwargs): - assert code or expression - assert not (code and expression) + assert (code is not None) or (expression is not None) + assert not ((code is not None) and (expression is not None)) assert 'id' not in kwargs # Get an ID for this instruction diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py index dd4441ddc5e54389640723d6dd92ab060844a404..71bb18d067122592ec867ddb56bd32ee3df4ef08 100644 --- a/python/dune/perftool/loopy/target.py +++ b/python/dune/perftool/loopy/target.py @@ -5,6 +5,11 @@ from loopy.target import TargetBase from loopy.target.c.codegen.expression import LoopyCCodeMapper +from loopy.library.reduction import (ReductionOperation, + register_reduction_parser, + ) + + class AllToDouble(dict): """ This imitates a dict that maps everything to double and logs the requested keys """ def __getitem__(self, key): @@ -28,6 +33,13 @@ class MyMapper(LoopyCCodeMapper): ret = ret + '[{}]'.format(str(i)) return ret + def map_variable(self, expr, enclosing_prec, type_context): + from dune.perftool.pymbolic import VerbatimVariable + if isinstance(expr, VerbatimVariable): + return expr.name + else: + return super(MyMapper, self).map_variable(expr, enclosing_prec, type_context) + class DuneTarget(TargetBase): @@ -37,12 +49,9 @@ class DuneTarget(TargetBase): def dtype_to_typename(self, dtype): # For now, we do this the simplest possible way return _registry[dtype.dtype.name] -# -# def is_vector_dtype(self, dtype): -# raise NotImplementedError() -# -# def vector_dtype(self, base, count): -# raise NotImplementedError() + + def is_vector_dtype(self, dtype): + return False def get_expression_to_code_mapper(self, codegen_state): return MyMapper(codegen_state) diff --git a/python/dune/perftool/loopy/transformer.py b/python/dune/perftool/loopy/transformer.py index 0120b33f905fd7870d04dcd92cd7c2f790041152..7e414082efd7b23418e6f38dcc5e59888b325459 100644 --- a/python/dune/perftool/loopy/transformer.py +++ b/python/dune/perftool/loopy/transformer.py @@ -24,6 +24,8 @@ from dune.perftool.pdelab.basis import (lfs_iname, name_lfs_bound, traverse_lfs_tree, ) +from dune.perftool.pdelab.quadrature import quadrature_iname +from pymbolic.primitives import Subscript, Variable @iname @@ -33,141 +35,132 @@ def index_sum_iname(i): class UFL2LoopyVisitor(ModifiedTerminalTracker, UFL2PymbolicMapper, GeometryMapper): - def __init__(self): + def __init__(self, measure): + self.iname_stack = [quadrature_iname()] + self.measure = measure super(UFL2LoopyVisitor, self).__init__() - def coefficient(self, o): - # All trial functions should already be handled - assert o.count() != 0 + def _assign(self, o): + # Assign a name to the temporary variable we want our result in + temp = temporary_variable().name - # We expect all coefficients to be of type Expression! - from dune.perftool.ufl.execution import Expression - assert isinstance(o, Expression) + # Now we assign this expression to a new temporary variable + insn_id = instruction(assignee=Variable(temp), + expression=o, + forced_iname_deps=frozenset({i for i in self.iname_stack}), + forced_iname_deps_is_final=True, + ) - # Determine the name of the parameter function - from dune.perftool.generation import get_global_context_value - name = get_global_context_value("namedata")[id(o)] + # Actually, if we have a cache hit, we need to change our temporary + from dune.perftool.generation import retrieve_cache_items + temp = filter(lambda i: i.id == insn_id, retrieve_cache_items("instruction"))[0].assignee_name - # Trigger the generation of code for this thing in the parameter class - from dune.perftool.pdelab.parameter import (cell_parameter_function, - intersection_parameter_function, - ) - if o.on_intersection: - intersection_parameter_function(name, o) + return Variable(temp) + + def __call__(self, o): + # First we do the tree traversal to get a pymbolic expression representing this expression + ret = self.call(o) + + if isinstance(ret, Variable): + return ret else: - cell_parameter_function(name, o, self.restriction) + return self._assign(ret) + + def argument(self, o): + # Have the issued instruction depend on the iname for this localfunction space + from dune.perftool.pdelab.basis import lfs_iname + self.iname_stack.append(lfs_iname(o.element(), argnumber=o.number())) - # And return a symbol - from pymbolic.primitives import Variable - return Variable(name) + # Correct the restriction on boundary integrals + restriction = self.restriction + if self.measure == 'exterior_facet': + restriction = Restriction.NEGATIVE + + if self.grad: + from dune.perftool.pdelab.argument import name_testfunction_gradient + return Subscript(Variable(name_testfunction_gradient(o, restriction)), (Variable(lfs_iname(o.element(), argnumber=o.number())),)) + else: + from dune.perftool.pdelab.argument import name_testfunction + return Subscript(Variable(name_testfunction(o, restriction)), (Variable(lfs_iname(o.element(), argnumber=o.number())),)) + + def coefficient(self, o): + # If this is a trialfunction, we do something entirely different + if o.count() == 0: + # Correct the restriction on boundary integrals + restriction = self.restriction + if self.measure == 'exterior_facet': + restriction = Restriction.NEGATIVE + + if self.grad: + from dune.perftool.pdelab.argument import name_trialfunction_gradient + return Variable(name_trialfunction_gradient(o, restriction)) + else: + from dune.perftool.pdelab.argument import name_trialfunction + return Variable(name_trialfunction(o, restriction)) + + # so this is a parameter function + else: + # We expect all coefficients to be of type Expression! + from dune.perftool.ufl.execution import Expression + assert isinstance(o, Expression) + + # Determine the name of the parameter function + from dune.perftool.generation import get_global_context_value + name = get_global_context_value("namedata")[id(o)] + + # Trigger the generation of code for this thing in the parameter class + from dune.perftool.pdelab.parameter import (cell_parameter_function, + intersection_parameter_function, + ) + if o.on_intersection: + intersection_parameter_function(name, o) + else: + cell_parameter_function(name, o, self.restriction) + + # And return a symbol + return Variable(name) def index_sum(self, o): + from loopy import Reduction from dune.perftool.ufl.shape import determine_shape + inames = () # Define an iname for each of the indices in the multiindex for i in o.ufl_operands[1].indices(): + inames = inames + (index_sum_iname(i),) shape = determine_shape(o.ufl_operands[0], i) - index_sum_iname(i) from dune.perftool.pdelab import name_index domain(name_index(i), shape) - # Now continue processing the expression - return self.call(o.ufl_operands[0]) - + # Recurse to get the summation expression + term = self.call(o.ufl_operands[0]) + ret = self._assign(Reduction("sum", inames, term)) -class _Counter: - counter = 0 - - -def get_count(): - c = _Counter.counter - _Counter.counter = c + 1 - return c + return ret def transform_accumulation_term(term, measure, subdomain_id): - from dune.perftool.ufl.transformations.replace import ReplaceExpression - from pymbolic.primitives import Variable - - # We always have a quadrature loop - from dune.perftool.pdelab.quadrature import quadrature_iname - quadrature_iname() - - # Get the pymbolic expression needed for this accumulation term. - # This includes filling the cache with all sorts of necessary preambles! + # Initialize the local function spaces that we might need for this term + # We therefore gather a list of modified trial functions too. from dune.perftool.ufl.modified_terminals import extract_modified_arguments - test_ma = extract_modified_arguments(term, trialfunction=False, testfunction=True) + test_ma = extract_modified_arguments(term) trial_ma = extract_modified_arguments(term, trialfunction=True, testfunction=False) - rmap = {} - for ma in test_ma: - # If this is a boundary integral, all terms are implicitly restricted to the inside cell - if measure == 'exterior_facet': - ma.restriction = Restriction.NEGATIVE - - # Set up the local function space structure - traverse_lfs_tree(ma) - - # Get the expression for the modified argument representing the test function - from dune.perftool.pdelab.argument import pymbolic_testfunction - rmap[ma.expr] = pymbolic_testfunction(ma) - - for ma in trial_ma: - # If this is a boundary integral, all terms are implicitly restricted to the inside cell + # All test and trial functions on boundary integrals are technically restricted + import itertools + for ma in itertools.chain(test_ma, trial_ma): if measure == 'exterior_facet': ma.restriction = Restriction.NEGATIVE - # Set up the local function space structure traverse_lfs_tree(ma) - # Get the expression for the modified argument representing the trial function - from dune.perftool.pdelab.argument import pymbolic_trialfunction - rmap[ma.expr] = pymbolic_trialfunction(ma) - - # Get the transformer! - ufl2l_mf = UFL2LoopyVisitor() - re_mf = ReplaceExpression(replacemap=rmap, otherwise=ufl2l_mf) - ufl2l_mf.call = re_mf.__call__ - - pymbolic_expr = re_mf(term) - - # Now simplify the expression - # TODO: Add a switch to disable/configure this. - from dune.perftool.pymbolic.simplify import simplify_pymbolic_expression - pymbolic_expr = simplify_pymbolic_expression(pymbolic_expr) - - # There are some corner cases in DG, where this accumulation term vanishes completely - if pymbolic_expr == 0: - return - - # Define a temporary variable for this expression - expr_tv_name = "expr_" + str(get_count()).zfill(4) - from pymbolic.primitives import Variable - - # This is a bit hacky now. To correctly determine the iname dependencies of - # the accumulation term, we inspect it manually. This is necessary, as loopys - # automatic detection would also considers those inames dependencies which are - # duplicates of the lfs inames. - from dune.perftool.pymbolic.inameset import get_index_inames - acc_inames = get_index_inames(pymbolic_expr).union(frozenset({quadrature_iname()})) - - insn_id = instruction(assignee=Variable(expr_tv_name), - expression=pymbolic_expr, - forced_iname_deps=acc_inames, - forced_iname_deps_is_final=True, - ) - - # Actually, if we have a cache hit, we need to change our temporary - from dune.perftool.generation import retrieve_cache_items - expr_tv_name = filter(lambda i: i.id == insn_id, retrieve_cache_items("instruction"))[0].assignee_name - - # Now register the temporary variable with loopy - expr_tv = temporary_variable(expr_tv_name) + pymbolic_expr = UFL2LoopyVisitor(measure)(term) # The data that is used to collect the arguments for the accumulate function accumargs = [None] * (2 * len(test_ma)) residual_shape = [None] * len(test_ma) arg_restr = [None] * len(test_ma) + accum_inames = [None] * len(test_ma) for ma in test_ma: count = ma.argexpr.number() @@ -177,6 +170,7 @@ def transform_accumulation_term(term, measure, subdomain_id): accumargs[2 * count] = lfs accumargs[2 * count + 1] = lfsi + accum_inames[count] = lfsi arg_restr[count] = ma.restriction residual_shape[count] = name_lfs_bound(lfs) @@ -193,7 +187,7 @@ def transform_accumulation_term(term, measure, subdomain_id): # Generate the code snippet for this accumulation instruction code = "{}.accumulate({}, {}*{});".format(accumvar, ", ".join(accumargs), - expr_tv_name, + pymbolic_expr.name, factor, ) @@ -232,7 +226,7 @@ def transform_accumulation_term(term, measure, subdomain_id): # Finally, issue the instruction instruction(code=code, assignees=frozenset({accumvar}), - read_variables=frozenset({accumvar, factor, expr_tv_name}), - forced_iname_deps=acc_inames, + read_variables=frozenset({accumvar, factor, pymbolic_expr.name}), + forced_iname_deps=frozenset(accum_inames).union(frozenset({quadrature_iname()})), forced_iname_deps_is_final=True, ) diff --git a/python/dune/perftool/pdelab/argument.py b/python/dune/perftool/pdelab/argument.py index 1c2e69bfd3d31717fb363e744ef80c66a8f8df90..bcb9ec3717432f0264b8554c34d7350a9d4db500 100644 --- a/python/dune/perftool/pdelab/argument.py +++ b/python/dune/perftool/pdelab/argument.py @@ -1,6 +1,6 @@ """ Generator functions related to trial and test functions and the accumulation loop""" -from dune.perftool.generation import domain, iname, pymbolic_expr, symbol, globalarg +from dune.perftool.generation import domain, iname, pymbolic_expr, symbol, globalarg, function_mangler, constantarg, get_global_context_value from dune.perftool.ufl.modified_terminals import ModifiedArgumentDescriptor from dune.perftool.pdelab import (name_index, restricted_name, @@ -13,63 +13,36 @@ from dune.perftool.pdelab.basis import (evaluate_trialfunction, name_lfs_bound, ) from dune.perftool import Restriction -from pymbolic.primitives import Subscript, Variable +from pymbolic.primitives import Call, Subscript, Variable @symbol -def name_testfunction_gradient(ma): - assert ma.grad - - return name_basis_gradient(ma.argexpr.element(), ma.restriction) +def name_testfunction_gradient(expr, restriction): + return name_basis_gradient(expr.element(), restriction) @symbol -def name_testfunction(ma): - assert not ma.grad - - return name_basis(ma.argexpr.element(), ma.restriction) - - -@pymbolic_expr -def pymbolic_testfunction(ma): - # we only accept an index if we treat a gradient... - assert bool(ma.index) == ma.grad - - if ma.grad: - return Subscript(Variable(name_testfunction_gradient(ma)), (Variable(lfs_iname(ma.argexpr.element(), argnumber=ma.argexpr.number())), Variable(name_index(ma.index)))) - else: - return Subscript(Variable(name_testfunction(ma)), Variable(lfs_iname(ma.argexpr.element(), argnumber=ma.argexpr.number()))) +def name_testfunction(expr, restriction): + it = get_global_context_value("integral_type") + if it == 'exterior_facet': + restriction = Restriction.NEGATIVE + return name_basis(expr.element(), restriction) @symbol -def name_trialfunction_gradient(ma): - assert ma.grad - - name = restricted_name("gradu", ma.restriction) - evaluate_trialfunction_gradient(ma.argexpr.element(), name, ma.restriction) +def name_trialfunction_gradient(expr, restriction): + name = restricted_name("gradu", restriction) + evaluate_trialfunction_gradient(expr.element(), name, restriction) return name @symbol -def name_trialfunction(ma): - assert not ma.grad - - name = restricted_name("u", ma.restriction) - evaluate_trialfunction(ma.argexpr.element(), name, ma.restriction) +def name_trialfunction(expr, restriction): + name = restricted_name("u", restriction) + evaluate_trialfunction(expr.element(), name, restriction) return name -@pymbolic_expr -def pymbolic_trialfunction(ma): - # we only accept an index if we treat a gradient... - assert bool(ma.index) == ma.grad - - if ma.grad: - return Subscript(Variable(name_trialfunction_gradient(ma)), Variable(name_index(ma.index))) - else: - return Variable(name_trialfunction(ma)) - - @symbol def name_testfunctionspace(restriction): return restricted_name("lfsv", restriction) @@ -92,7 +65,29 @@ def type_trialfunctionspace(): @symbol def name_coefficientcontainer(restriction): - return restricted_name("x", restriction) + name = restricted_name("x", restriction) + from dune.perftool.pdelab.basis import name_lfs_bound, lfs_iname + return name + + +@function_mangler +def create_function_mangler(container): + def _mangler(kernel, name, dtypes): + if name == container: + import loopy + return loopy.types.NumpyType("int"), container + + return _mangler + + +@pymbolic_expr +def pymbolic_coefficient(lfs, index, restriction): + container = name_coefficientcontainer(restriction) + create_function_mangler(container) + import loopy + constantarg(lfs, dtype=loopy.types.NumpyType("str")) + from dune.perftool.pymbolic import VerbatimVariable + return Call(VerbatimVariable(container), (VerbatimVariable(lfs), Variable(index),)) @symbol @@ -123,14 +118,6 @@ def name_argument(ma): assert False -def pymbolic_argument(ma): - if ma.argexpr.number() == 0: - return pymbolic_testfunction(ma) - if ma.argexpr.number() == 1: - return pymbolic_trialfunction(ma) - assert False - - @symbol def name_jacobian(restriction1, restriction2): # Restrictions may only differ if NONE diff --git a/python/dune/perftool/pdelab/basis.py b/python/dune/perftool/pdelab/basis.py index 745ce9b2bda64e37957884ca1f8998f5fb3779e5..5bf7fe5ba9f9083a978f0e729b6c24a10f0fc406 100644 --- a/python/dune/perftool/pdelab/basis.py +++ b/python/dune/perftool/pdelab/basis.py @@ -14,7 +14,8 @@ from dune.perftool.generation import (cached, from dune.perftool.pdelab.quadrature import (name_quadrature_position_in_cell, quadrature_iname, ) -from dune.perftool.pdelab.geometry import (name_dimension, +from dune.perftool.pdelab.geometry import (dimension_iname, + name_dimension, name_jacobian_inverse_transposed, to_cell_coordinates, ) @@ -23,6 +24,8 @@ from dune.perftool.pdelab.localoperator import (lop_template_ansatz_gfs, ) from dune.perftool.pdelab.driver import FEM_name_mangling from dune.perftool.pdelab import restricted_name +from pymbolic.primitives import Product, Subscript, Variable +from loopy import Reduction @symbol @@ -135,7 +138,7 @@ def _lfs_iname(element, context): if context: context = '_' + context - name = name + context + '_index' + name = name + context + 'index' domain(name, bound) return name @@ -157,6 +160,8 @@ def lfs_iname(element, context='', argnumber=None): """ if argnumber is not None: context = 'arg{}_{}'.format(argnumber, context) + else: + context = context + '_' return _lfs_iname(element, context) @@ -249,70 +254,34 @@ def name_basis_gradient(element, restriction): return name -def reset_trialfunction(name): - return instruction(inames=(quadrature_iname(), - ), - code="{} = 0.0;".format(name), - assignees=frozenset({name}), - ) - - @cached def evaluate_trialfunction(element, name, restriction): temporary_variable(name, shape=()) - reset = reset_trialfunction(name) lfs = name_lfs(element) index = lfs_iname(element, context='trial') basis = name_basis(element, restriction) - from dune.perftool.pdelab.argument import name_coefficientcontainer - coeffs = name_coefficientcontainer(restriction) - instruction(inames=(quadrature_iname(), - index, - ), - code='{} += {}({}, {})*{}[{}];'.format(name, - coeffs, - lfs, - index, - basis, - index - ), - assignees=frozenset({name}), - read_variables=frozenset({basis}), - depends_on=frozenset({reset}), + from dune.perftool.pdelab.argument import pymbolic_coefficient + coeff = pymbolic_coefficient(lfs, index, restriction) + reduction_expr = Product((coeff, Subscript(Variable(basis), Variable(index)))) + instruction(expression=Reduction("sum", index, reduction_expr), + assignee=frozenset({name}), + forced_iname_deps=frozenset({quadrature_iname()}), + forced_iname_deps_is_final=True, ) -def reset_trialfunction_gradient(name): - return instruction(inames=(quadrature_iname(), - ), - code='{} = 0.0;'.format(name), - assignees=frozenset({name}), - ) - - @cached def evaluate_trialfunction_gradient(element, name, restriction): - # TODO this is of course not yet correct temporary_variable(name, shape=(name_dimension(),), shape_impl=('fv',)) - reset = reset_trialfunction_gradient(name) lfs = name_lfs(element) index = lfs_iname(element, context='trialgrad') basis = name_basis_gradient(element, restriction) - from dune.perftool.pdelab.argument import name_coefficientcontainer - coeffs = name_coefficientcontainer(restriction) - instruction(inames=(quadrature_iname(), - index, - ), - code='{}.axpy({}({}, {}), {}[{}]);'.format(name, - coeffs, - lfs, - index, - basis, - index, - ), - assignees=frozenset({name}), - read_variables=frozenset({basis}), - forced_iname_deps=frozenset({quadrature_iname(), index}), + idim = dimension_iname() + from dune.perftool.pdelab.argument import pymbolic_coefficient + coeff = pymbolic_coefficient(lfs, index, restriction) + reduction_expr = Product((coeff, Subscript(Variable(basis), (Variable(index), Variable(idim))))) + instruction(expression=Reduction("sum", index, reduction_expr), + assignee=Subscript(Variable(name), Variable(idim)), + forced_iname_deps=frozenset({quadrature_iname(), idim}), forced_iname_deps_is_final=True, - depends_on=frozenset({reset}), ) diff --git a/python/dune/perftool/pdelab/driver.py b/python/dune/perftool/pdelab/driver.py index bdf8bc57bfa199187e93dc2bfa89b9230f67d370..397a1614196d3d349f466783a90e0829eaea27f8 100644 --- a/python/dune/perftool/pdelab/driver.py +++ b/python/dune/perftool/pdelab/driver.py @@ -801,6 +801,6 @@ def vtkoutput(): vec = name_vector() vtkfile = name_vtkfile() dune_solve() - # print_matrix() + print_matrix() return ["Dune::PDELab::addSolutionToVTKWriter({}, {}, {});".format(vtkwriter, gfs, vec), "{}.write({}, Dune::VTK::ascii);".format(vtkwriter, vtkfile)] diff --git a/python/dune/perftool/pdelab/geometry.py b/python/dune/perftool/pdelab/geometry.py index 8aac1c1d7bc1613d69be39e47779d6d819b2a3ec..1817b37b5538bb08de90476ed2f4c1138213aeb6 100644 --- a/python/dune/perftool/pdelab/geometry.py +++ b/python/dune/perftool/pdelab/geometry.py @@ -1,6 +1,9 @@ from dune.perftool import Restriction from dune.perftool.pdelab import restricted_name -from dune.perftool.generation import (preamble, +from dune.perftool.generation import (domain, + get_global_context_value, + iname, + preamble, symbol, temporary_variable, ) @@ -41,6 +44,21 @@ class GeometryMapper(MultiFunction): return Variable(name_facetarea()) +@iname +def _dimension_iname(context, count): + if context: + context = '_' + context + name = 'idim{}{}'.format(context, str(count)) + formdata = get_global_context_value('formdata') + dim = formdata.geometric_dimension + domain(name, dim) + return name + + +def dimension_iname(context='', count=0): + return _dimension_iname(context, count) + + @symbol def name_element_geometry_wrapper(): return 'eg' diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index d44424bb7e13fc4b1d5acf0ec1bc243e6c361148..bb38056a8ff67a0599da008e0c7c6be4fdad27be 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -176,15 +176,30 @@ def generate_kernel(integral): domains = [i for i in retrieve_cache_items("domain")] instructions = [i for i in retrieve_cache_items("instruction")] temporaries = {i.name: i for i in retrieve_cache_items("temporary")} - preambles = [(i, p) for i, p in enumerate(retrieve_cache_items("preamble"))] arguments = [i for i in retrieve_cache_items("argument")] + manglers = [i for i in retrieve_cache_items("mangler")] # Create the kernel from loopy import make_kernel, preprocess_kernel - # kernel = make_kernel(domains, instructions, arguments, temporary_variables=temporaries, preambles=preambles, target=DuneTarget()) - kernel = make_kernel(domains, instructions, arguments, temporary_variables=temporaries, target=DuneTarget(), preambles=preambles) + kernel = make_kernel(domains, + instructions, + arguments, + temporary_variables=temporaries, + function_manglers=manglers, + target=DuneTarget() + ) kernel = preprocess_kernel(kernel) + # Loopy might have introduced some temporary variables during preprocessing. As I want to have my own + # temporary declaration code right now, I call the declaration preamble manually. + for added_tv in set(kernel.temporary_variables.keys()) - set(temporaries.keys()): + from dune.perftool.generation.loopy import default_declaration + default_declaration(added_tv) + + # Now add the preambles to the kernel + preambles = [(i, p) for i, p in enumerate(retrieve_cache_items("preamble"))] + kernel = kernel.copy(preambles=preambles) + # All items with the kernel tags can be destroyed once a kernel has been generated from dune.perftool.generation import delete_cache_items delete_cache_items("(not file) and (not clazz)") diff --git a/python/dune/perftool/pymbolic/__init__.py b/python/dune/perftool/pymbolic/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d4a85c13e666669948ac257142656a685dc8b24b 100644 --- a/python/dune/perftool/pymbolic/__init__.py +++ b/python/dune/perftool/pymbolic/__init__.py @@ -0,0 +1,5 @@ +from pymbolic.primitives import Variable + + +class VerbatimVariable(Variable): + pass diff --git a/python/dune/perftool/ufl/modified_terminals.py b/python/dune/perftool/ufl/modified_terminals.py index b992dc05f2fe76fb68c76b336bd46cf580c96277..6beed959b0c0cd372114be603a8eeaae2190f122 100644 --- a/python/dune/perftool/ufl/modified_terminals.py +++ b/python/dune/perftool/ufl/modified_terminals.py @@ -18,22 +18,14 @@ class ModifiedTerminalTracker(MultiFunction): def positive_restricted(self, o): assert self.restriction == Restriction.NONE self.restriction = Restriction.POSITIVE - - from dune.perftool.generation import global_context - with global_context(restriction=Restriction.POSITIVE): - ret = self.call(o.ufl_operands[0]) - + ret = self.call(o.ufl_operands[0]) self.restriction = Restriction.NONE return ret def negative_restricted(self, o): assert self.restriction == Restriction.NONE self.restriction = Restriction.NEGATIVE - - from dune.perftool.generation import global_context - with global_context(restriction=Restriction.NEGATIVE): - ret = self.call(o.ufl_operands[0]) - + ret = self.call(o.ufl_operands[0]) self.restriction = Restriction.NONE return ret