diff --git a/.gitmodules b/.gitmodules index d07bf324168684295261b9284a89551048323a6a..c0aa0e62e13178c6010b9f7120309688556ebd40 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,3 +8,6 @@ [submodule "python/pymbolic"] path = python/pymbolic url = https://github.com/inducer/pymbolic.git +[submodule "python/cgen"] + path = python/cgen + url = https://github.com/inducer/cgen.git diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 9dc164e7e1f6768ee09135706df5e6d378d390a5..e3c67559b3c7fc897e8923b34c25673a924c0d39 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -9,6 +9,7 @@ create_virtualenv_wrapper(COMMANDS python -m IPython notebook NAME notebook) # Install all the external packages that we have as submodules +dune_install_python_package(PATH cgen) dune_install_python_package(PATH pymbolic) dune_install_python_package(PATH loopy) dune_install_python_package(PATH ufl) diff --git a/python/cgen b/python/cgen new file mode 160000 index 0000000000000000000000000000000000000000..4aa99a0ce7a843da42e6c0f2fba942489cfb3c59 --- /dev/null +++ b/python/cgen @@ -0,0 +1 @@ +Subproject commit 4aa99a0ce7a843da42e6c0f2fba942489cfb3c59 diff --git a/python/dune/perftool/generation/__init__.py b/python/dune/perftool/generation/__init__.py index 1b084cbc18ac34b4b1193d3c45ed841d0766884a..32463b4b1ccd0af0ececa244c5578b5ccc4a6d67 100644 --- a/python/dune/perftool/generation/__init__.py +++ b/python/dune/perftool/generation/__init__.py @@ -1,5 +1,11 @@ from __future__ import absolute_import +from dune.perftool.generation.backend import (backend, + get_backend, + ) + +from dune.perftool.generation.counter import get_counter + from dune.perftool.generation.cache import (cached, generator_factory, no_caching, @@ -30,7 +36,9 @@ from dune.perftool.generation.loopy import (constantarg, globalarg, iname, instruction, + noop_instruction, pymbolic_expr, + silenced_warning, temporary_variable, valuearg, ) diff --git a/python/dune/perftool/generation/backend.py b/python/dune/perftool/generation/backend.py new file mode 100644 index 0000000000000000000000000000000000000000..27356c937f1c8617297736a34e74d540dd0996d2 --- /dev/null +++ b/python/dune/perftool/generation/backend.py @@ -0,0 +1,41 @@ +from dune.perftool.generation.cache import _RegisteredFunction +from pytools import Record + + +_backend_mapping = {} + + +class FuncProxy(Record): + def __init__(self, interface, name, func): + Record.__init__(self, interface=interface, name=name, func=func) + + +def register_backend(interface, name, func): + _backend_mapping.setdefault(interface, {}) + assert name not in _backend_mapping[interface] + _backend_mapping[interface][name] = func + + +def backend(interface=None, name='default'): + assert interface + + def _dec(func): + if not isinstance(func, _RegisteredFunction): + # Allow order independence of the generator decorators + # and the backend decorator by delaying the registration + func = FuncProxy(interface, name, func) + else: + register_backend(interface, name, func) + + return func + + return _dec + + +def get_backend(interface=None, selector=None, **kwargs): + assert interface and selector + + select = selector(**kwargs) + assert select in _backend_mapping[interface] + + return _backend_mapping[interface][select] diff --git a/python/dune/perftool/generation/cache.py b/python/dune/perftool/generation/cache.py index 5c2b6cc827d41a790cbb364d8554adff8c5e6af0..53c8eada2d1722bc554bd802527264facbad8ed9 100644 --- a/python/dune/perftool/generation/cache.py +++ b/python/dune/perftool/generation/cache.py @@ -1,6 +1,7 @@ """ This module provides the memoization infrastructure for code generating functions. """ +from dune.perftool.generation.counter import get_counter # Store a global list of generator functions _generators = [] @@ -40,16 +41,8 @@ def _freeze(data): raise TypeError('Cannot freeze non-hashable object {} of type {}'.format(data, type(data))) -class _GlobalCounter(object): - counter = 0 - - def get(self): - _GlobalCounter.counter = _GlobalCounter.counter + 1 - return _GlobalCounter.counter - - def no_caching(*a, **k): - return _GlobalCounter().get() + return get_counter('__no_caching') class _RegisteredFunction(object): @@ -72,6 +65,13 @@ class _RegisteredFunction(object): # Register this generator function _generators.append(self) + # Allow order independence of the backend and the generator decorators. + # If backend was applied first, we resolve the issued FuncProxy object + from dune.perftool.generation.backend import FuncProxy, register_backend + if isinstance(self.func, FuncProxy): + register_backend(self.func.interface, self.func.name, self) + self.func = self.func.func + def _get_content(self, key): if self.counted: return self._memoize_cache[key][1] @@ -90,7 +90,7 @@ class _RegisteredFunction(object): val = self.on_store(self.func(*args, **kwargs)) # Maybe wrap it with a counter! if self.counted: - val = (_GlobalCounter().get(), val) + val = (get_counter('__cache_counted'), val) # and store the result self._memoize_cache[cache_key] = val diff --git a/python/dune/perftool/generation/counter.py b/python/dune/perftool/generation/counter.py new file mode 100644 index 0000000000000000000000000000000000000000..607d7eaa79351d4b9f50e79a919d0c8e075aac83 --- /dev/null +++ b/python/dune/perftool/generation/counter.py @@ -0,0 +1,9 @@ +""" Some global counters used to generate unique names throughout the project """ + +_counts = {} + + +def get_counter(identifier): + count = _counts.setdefault(identifier, 0) + _counts[identifier] = _counts[identifier] + 1 + return count diff --git a/python/dune/perftool/generation/loopy.py b/python/dune/perftool/generation/loopy.py index 10620a1ef073c181774a21aff8073eb1fc8fad9a..f62f309c069b3cbec2ca3a9ebce6f13cd7653646 100644 --- a/python/dune/perftool/generation/loopy.py +++ b/python/dune/perftool/generation/loopy.py @@ -1,7 +1,8 @@ """ The loopy specific generators """ from __future__ import absolute_import -from dune.perftool.generation import (generator_factory, +from dune.perftool.generation import (get_counter, + generator_factory, no_caching, preamble, ) @@ -12,6 +13,7 @@ import numpy iname = generator_factory(item_tags=("iname",)) pymbolic_expr = generator_factory(item_tags=("kernel", "pymbolic")) function_mangler = generator_factory(item_tags=("mangler",)) +silenced_warning = generator_factory(item_tags=("silenced_warning",), no_deco=True) @generator_factory(item_tags=("argument", "globalarg"), @@ -80,29 +82,56 @@ def default_declaration(name, shape=(), shape_impl=()): return '{} {}(0.0);'.format(t, name) -class _TemporaryCounter: - counter = 0 +def declaration_with_base_storage(base_storage, storage_shape): + # We currently consider the base storage to be linear + assert(len(storage_shape) == 1) + + @preamble + def base_storage_decl(name, shape=(), shape_impl=()): + default_declaration(base_storage, shape=storage_shape, shape_impl=('arr',)) + return 'double* {} = {};'.format(name, base_storage) + + return base_storage_decl def get_temporary_name(): - name = 'expr_{}'.format(str(_TemporaryCounter.counter).zfill(4)) - _TemporaryCounter.counter = _TemporaryCounter.counter + 1 - return name + return 'expr_{}'.format(str(get_counter('__temporary').zfill(4))) + + +class UnmanagedTemporaryVariable(loopy.TemporaryVariable): + """ + A TemporaryVariable class that does not have knowledge of data layout. + Is used when the data layout is under control of the underlying C++ type. + Indexed access will result in multiple operator[]s. Only known to + the DuneTarget. + """ + pass @generator_factory(item_tags=("temporary",), cache_key_generator=no_caching) -def temporary_variable(name, **kwargs): +def temporary_variable(name, managed=False, **kwargs): if 'dtype' not in kwargs: kwargs['dtype'] = numpy.float64 + # Check if a specified base_storage has already been initialized + if kwargs.get('base_storage', None): + assert(kwargs.get('decl_method', None) is None) + assert('storage_shape' in kwargs) + decl_method = declaration_with_base_storage(kwargs['base_storage'], kwargs['storage_shape']) + else: + decl_method = kwargs.pop('decl_method', default_declaration) + shape = kwargs.get('shape', ()) shape_impl = kwargs.pop('shape_impl', ('arr',) * len(shape)) - decl_method = kwargs.pop('decl_method', default_declaration) if decl_method is not None: decl_method(name, shape, shape_impl) - return loopy.TemporaryVariable(name, **kwargs) + if managed: + return loopy.TemporaryVariable(name, **kwargs) + else: + return UnmanagedTemporaryVariable(name, **kwargs) + # Now define generators for instructions. To ease dependency handling of instructions # these generators are a bit more involved... We apply the following procedure: @@ -139,10 +168,6 @@ def call_instruction_impl(**kw): return loopy.CallInstruction(**kw) -class _IDCounter: - count = 0 - - def _insn_cache_key(code=None, expression=None, **kwargs): if code is not None: return code @@ -158,8 +183,7 @@ def instruction(code=None, expression=None, **kwargs): assert 'id' not in kwargs # Get an ID for this instruction - id = 'insn' + str(_IDCounter.count).zfill(4) - _IDCounter.count = _IDCounter.count + 1 + id = 'insn_{}'.format(str(get_counter('__insn_id')).zfill(4)) # Now create the actual instruction if code: @@ -172,3 +196,8 @@ def instruction(code=None, expression=None, **kwargs): # return the ID, as it is the only useful information to the user return id + + +@generator_factory(item_tags=("instruction",), cache_key_generator=lambda **kw: kw['id']) +def noop_instruction(**kwargs): + return loopy.NoOpInstruction(**kwargs) diff --git a/python/dune/perftool/loopy/buffer.py b/python/dune/perftool/loopy/buffer.py new file mode 100644 index 0000000000000000000000000000000000000000..4238c4929ca188edf6b53dd4583937b47d4df91b --- /dev/null +++ b/python/dune/perftool/loopy/buffer.py @@ -0,0 +1,61 @@ +from dune.perftool.generation import (generator_factory, + temporary_variable, + ) + + +class FlipFlopBuffer(object): + def __init__(self, identifier, base_storage_size=None, num=2): + self.identifier = identifier + self.base_storage_size = base_storage_size + self.num = num + + # Initialize the counter that switches between the base storages! + self._current = 0 + + # Initialize a total counter for the issued temporaries + self._counter = 0 + + # Generate the base storage names + self.base_storage = tuple("{}_base_{}".format(self.identifier, i) for i in range(self.num)) + + def switch_base_storage(self): + self._current = (self._current + 1) % self.num + + def get_temporary(self, **kwargs): + assert("base_storage" not in kwargs) + assert("storage_shape" not in kwargs) + + # Select the base storage and increase counter + base = self.base_storage[self._current] + + # Construct a temporary name + name = "{}_{}".format(self.identifier, self._counter) + self._counter = self._counter + 1 + + # Construct the temporary and return it + temporary_variable(name, + base_storage=base, + storage_shape=(self.base_storage_size,), + managed=True, + **kwargs + ) + + return name + + +@generator_factory(item_tags=("kernel", "buffer"), cache_key_generator=lambda i, **kw: i) +def initialize_buffer(identifier, base_storage_size=None, num=2): + if base_storage_size is None: + raise ValueError("The buffer for identifier {} has not been initialized.".format(identifier)) + return FlipFlopBuffer(identifier, + base_storage_size=base_storage_size, + num=num, + ) + + +def get_buffer_temporary(identifier, **kwargs): + return initialize_buffer(identifier).get_temporary(**kwargs) + + +def switch_base_storage(identifier): + initialize_buffer(identifier).switch_base_storage() diff --git a/python/dune/perftool/loopy/stages.py b/python/dune/perftool/loopy/stages.py new file mode 100644 index 0000000000000000000000000000000000000000..d8ff325a95aed044c043aecd722cef3b97aa4f4f --- /dev/null +++ b/python/dune/perftool/loopy/stages.py @@ -0,0 +1,34 @@ +""" loopy instructions to mark stages of computations """ + +from dune.perftool.generation import (generator_factory, + noop_instruction, + ) + +from loopy import add_dependency +from loopy.match import Id + + +@generator_factory(item_tags=("stage",), cache_key_generator=lambda n, **kw: n) +def stage_insn(n, **kwargs): + assert 'id' not in kwargs + + # Get an ID for this instruction + id = 'stage_insn_{}'.format(n) + + # Chain dependencies of stage instructions + if n > 0: + kwargs['depends_on'] = kwargs.get('depends_on', frozenset([])).union(frozenset([stage_insn(n - 1, **kwargs)])) + + # Actually issue the instruction + noop_instruction(id=id, **kwargs) + + return id + + +def finalize_stage_instructions(kernel): + for i in range(len(stage_insn._memoize_cache)): + deps = frozenset({insn.id for insn in kernel.instructions if stage_insn(i) in insn.depends_on and not insn.id.startswith('stage_insn_')}) + for dep_id in deps: + kernel = add_dependency(kernel, Id(stage_insn(i + 1)), dep_id) + + return kernel diff --git a/python/dune/perftool/loopy/target.py b/python/dune/perftool/loopy/target.py index 36110ff2cfd53ee94155cadb73955fcdc54c30a1..b9767cdb92cb788f130533ea36e83d68588f202b 100644 --- a/python/dune/perftool/loopy/target.py +++ b/python/dune/perftool/loopy/target.py @@ -3,7 +3,7 @@ from loopy.target import (TargetBase, DummyHostASTBuilder, ) from loopy.target.c import CASTBuilder -from loopy.target.c.codegen.expression import ExpressionToCMapper +from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper, CExpressionToCodeMapper _registry = {'float32': 'float', 'int32': 'int', @@ -12,20 +12,26 @@ _registry = {'float32': 'float', 'str': 'std::string'} -class MyMapper(ExpressionToCMapper): - def map_subscript(self, expr, enclosing_prec, type_context): - ret = str(expr.aggregate) - from pymbolic.primitives import Variable - if isinstance(expr.index, Variable): - ret = ret + '[{}]'.format(str(expr.index)) - else: +class MyMapper(ExpressionToCExpressionMapper): + def map_subscript(self, expr, type_context): + from dune.perftool.generation.loopy import UnmanagedTemporaryVariable + if isinstance(self.find_array(expr), UnmanagedTemporaryVariable): + # If there is but one index, we do not need to handle this + from pymbolic.primitives import Subscript, Variable + if isinstance(expr.index, (Variable, int)): + return expr + + # Else, we construct a nested Subscript chain + ret = expr.aggregate for i in expr.index: - ret = ret + '[{}]'.format(str(i)) - return ret + ret = Subscript(ret, i) + return ret + else: + return ExpressionToCExpressionMapper.map_subscript(self, expr, type_context) class DuneASTBuilder(CASTBuilder): - def get_expression_to_code_mapper(self, codegen_state): + def get_expression_to_c_expression_mapper(self, codegen_state): return MyMapper(codegen_state) def get_temporary_decls(self, codegen_state, schedule_index): diff --git a/python/dune/perftool/pdelab/argument.py b/python/dune/perftool/pdelab/argument.py index a55a485c28a8f358824efa6644588b314d091157..06d4a75dc02bfb21051eb16311ed2c7d03c0c55a 100644 --- a/python/dune/perftool/pdelab/argument.py +++ b/python/dune/perftool/pdelab/argument.py @@ -101,9 +101,9 @@ def name_trialfunction_gradient(element, restriction, component): # This is just a temporary test used to create an A-matrix as # local operator class member. Right now it doesn't evaluate # anything. - if get_option("sumfact"): - from dune.perftool.sumfact import evaluate_coefficient_gradient_sumfact - evaluate_coefficient_gradient_sumfact() + if get_option("sumfact") and restriction == Restriction.NONE: + from dune.perftool.sumfact import start_sumfactorization + start_sumfactorization(element, container, restriction, component) evaluate_coefficient_gradient(element, name, container, restriction, component) return name diff --git a/python/dune/perftool/pdelab/driver.py b/python/dune/perftool/pdelab/driver.py index 303425bcd794f5d2c12d3d07c9dd4b784af6b31f..abb8938c6f0bb786aa0cef1f08c5ee880f05fffe 100644 --- a/python/dune/perftool/pdelab/driver.py +++ b/python/dune/perftool/pdelab/driver.py @@ -798,8 +798,8 @@ def name_solution_function(tree_path=()): try: name = get_global_context_value("data").object_names[id(expr)] except KeyError: - from dune.perftool.generation.cache import _GlobalCounter - name = 'generic_{}'.format(_GlobalCounter().get()) + from dune.perftool.generation import get_counter + name = 'generic_{}'.format(get_counter('__generic')) define_boundary_function(expr, name) diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 702e8ff9a81c8af52f15fe0c38bded1a5b9086f3..3c008a173b98bcfcd06fb77f2948d4b6a76e2462 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -470,6 +470,7 @@ def generate_kernel(integrals): temporaries = {i.name: i for i in retrieve_cache_items("temporary")} arguments = [i for i in retrieve_cache_items("argument")] manglers = retrieve_cache_functions("mangler") + silenced = [l for l in retrieve_cache_items("silenced_warning")] # Construct an options object from loopy import Options @@ -484,6 +485,7 @@ def generate_kernel(integrals): function_manglers=manglers, target=DuneTarget(), options=opt, + silenced_warnings=silenced, ) from loopy import make_reduction_inames_unique @@ -494,6 +496,10 @@ def generate_kernel(integrals): from dune.perftool.loopy.duplicate import heuristic_duplication kernel = heuristic_duplication(kernel) + # Finalize our stages mechanism + from dune.perftool.loopy.stages import finalize_stage_instructions + kernel = finalize_stage_instructions(kernel) + # This is also silly, but 1D DG Schemes never need the geometry, so the quadrature # statement actually introduces a preamble at a stage where preambles cannot be generated # anymore. TODO think about how to avoid this!!! diff --git a/python/dune/perftool/sumfact/__init__.py b/python/dune/perftool/sumfact/__init__.py index daaea90a264472f83ea13bd9a58fc3fc356a72c9..793cbde027e0e7c6a9b8278adbfa66cd60d38156 100644 --- a/python/dune/perftool/sumfact/__init__.py +++ b/python/dune/perftool/sumfact/__init__.py @@ -1 +1 @@ -from dune.perftool.sumfact.amatrix import (evaluate_coefficient_gradient_sumfact) +from dune.perftool.sumfact.sumfact import start_sumfactorization diff --git a/python/dune/perftool/sumfact/amatrix.py b/python/dune/perftool/sumfact/amatrix.py index 8755a56abe2b8a77efd7840c33b13337c5a102eb..9d2e249886c0075fe318449cfede12183ec24d2d 100644 --- a/python/dune/perftool/sumfact/amatrix.py +++ b/python/dune/perftool/sumfact/amatrix.py @@ -1,10 +1,20 @@ +from dune.perftool import Restriction + from dune.perftool.options import get_option +from dune.perftool.pdelab.argument import name_coefficientcontainer + from dune.perftool.generation import (class_member, constructor_block, + domain, + function_mangler, get_global_context_value, + globalarg, + iname, include_file, initializer_list, + temporary_variable, + valuearg ) from dune.perftool.pdelab.localoperator import (name_domain_field, @@ -12,6 +22,41 @@ from dune.perftool.pdelab.localoperator import (name_domain_field, ) from dune.perftool.pdelab.quadrature import estimate_quadrature_order +from loopy import CallMangleInfo +from loopy.symbolic import FunctionIdentifier +from loopy.types import NumpyType + +from pytools import Record + +import numpy + + +class AMatrix(Record): + def __init__(self, a_matrix, rows, cols): + Record.__init__(self, + a_matrix=a_matrix, + rows=rows, + cols=cols, + ) + + +class ColMajorAccess(FunctionIdentifier): + def __init__(self, amatrix): + self.amatrix = amatrix + + def __getinitargs__(self): + return (self.amatrix,) + + @property + def name(self): + return '{}.colmajoraccess'.format(self.amatrix) + + +@function_mangler +def colmajoraccess_mangler(target, func, dtypes): + if isinstance(func, ColMajorAccess): + return CallMangleInfo(func.name, (NumpyType(numpy.float64),), (NumpyType(numpy.int32), NumpyType(numpy.int32))) + @class_member("operator") def define_alignment(name): @@ -106,10 +151,14 @@ def typedef_polynomials(name): include_file("dune/pdelab/finiteelementmap/qkdg.hh", filetag="operatorfile") # TODO: make switching polynomials possible - return "using {} = Dune::QkStuff::GaussLobattoLagrangePolynomials<{}, {}, {}>;".format(name, - domain_field, - range_field, - degree) + # return "using {} = Dune::QkStuff::GaussLobattoLagrangePolynomials<{}, {}, {}>;".format(name, + # domain_field, + # range_field, + # degree) + return "using {} = Dune::QkStuff::EquidistantLagrangePolynomials<{}, {}, {}>;".format(name, + domain_field, + range_field, + degree) def type_polynomials(): @@ -142,17 +191,19 @@ def sort_quadrature_points_weights(): @constructor_block("operator") -def construct_theta(name): +def construct_theta(name, transpose): # Make sure that the quadrature points are sorted sort_quadrature_points_weights() - m = name_number_of_quadrature_points_per_direction() - n = name_number_of_basis_functions_per_direction() + if transpose: + shape = (name_number_of_basis_functions_per_direction(), name_number_of_quadrature_points_per_direction()) + else: + shape = (name_number_of_quadrature_points_per_direction(), name_number_of_basis_functions_per_direction()) polynomials = name_polynomials() qp = name_oned_quadrature_points() - return ["for (std::size_t i=0; i<{}; i++){{".format(m), - " for (std::size_t j=0; j<{}; j++){{".format(n), + return ["for (std::size_t i=0; i<{}; i++){{".format(shape[0]), + " for (std::size_t j=0; j<{}; j++){{".format(shape[1]), " {}.colmajoraccess(i,j) = {}.p(j,{}[i]);".format(name, polynomials, qp), " }", "}"] @@ -160,6 +211,7 @@ def construct_theta(name): @class_member("operator") def typedef_theta(name): + include_file("dune/perftool/sumfact/alignedmatvec.hh", filetag="operatorfile") alignment = name_alignment() range_field = lop_template_range_field() return "using {} = AlignedMatrix<{}, {}>;".format(name, range_field, alignment) @@ -172,21 +224,25 @@ def type_theta(): @class_member("operator") -def define_theta(name): +def define_theta(name, transpose): theta_type = type_theta() - number_qp = name_number_of_quadrature_points_per_direction() - number_basis = name_number_of_basis_functions_per_direction() - initializer_list(name, [str(number_qp), str(number_basis)], classtag="operator") - construct_theta(name) + if transpose: + shape = (basis_functions_per_direction(), quadrature_points_per_direction()) + else: + shape = (quadrature_points_per_direction(), basis_functions_per_direction()) + globalarg(name, shape=shape, dtype=numpy.float32, dim_tags="f,f") + initializer_list(name, [str(axis) for axis in shape], classtag="operator") + construct_theta(name, transpose) return "{} {};".format(theta_type, name) def name_theta(): name = "Theta" - define_theta(name) + define_theta(name, False) return name -def evaluate_coefficient_gradient_sumfact(): - include_file("dune/perftool/sumfact/alignedmatvec.hh", filetag="operatorfile") - name_theta() +def name_theta_transposed(): + name = "ThetaT" + define_theta(name, True) + return name diff --git a/python/dune/perftool/sumfact/sumfact.py b/python/dune/perftool/sumfact/sumfact.py new file mode 100644 index 0000000000000000000000000000000000000000..a7ee0a042c1fab3ac5b3662877c5a4b3a841f8a4 --- /dev/null +++ b/python/dune/perftool/sumfact/sumfact.py @@ -0,0 +1,150 @@ +from dune.perftool.pdelab.argument import pymbolic_coefficient +from dune.perftool.generation import (domain, + get_counter, + iname, + instruction, + silenced_warning, + ) +from dune.perftool.loopy.buffer import (get_buffer_temporary, + initialize_buffer, + switch_base_storage, + ) +from dune.perftool.pdelab.spaces import name_lfs +from dune.perftool.sumfact.amatrix import (AMatrix, + quadrature_points_per_direction, + basis_functions_per_direction, + name_theta, + name_theta_transposed, + ) +from dune.perftool.loopy.stages import stage_insn +from pymbolic.primitives import (Call, + Product, + Subscript, + Variable, + ) + +from loopy import Reduction + +from pytools import product + + +@iname +def _sumfact_iname(bound, _type, count): + name = "sf_{}_{}".format(_type, str(count)) + domain(name, bound) + return name + + +def sumfact_iname(bound, _type): + count = get_counter('_sumfac_iname_{}'.format(_type)) + name = _sumfact_iname(bound, _type, count) + return name + + +def setup_theta(element, container, restriction, component, a_matrices): + number_basis = product(mat.cols for mat in a_matrices) + shape = (number_basis,) + inp = get_buffer_temporary("buffer", + shape=shape) + silenced_warning('read_no_write({})'.format(inp)) + + # Write initial coefficients into buffer + basisiname = sumfact_iname(number_basis, "basis") + lfs = name_lfs(element, restriction, component) + coeff = pymbolic_coefficient(container, lfs, basisiname) + assignee = Subscript(Variable(inp), (Variable(basisiname),)) + return instruction(assignee=assignee, + expression=coeff, + depends_on=frozenset({stage_insn(0)}), + ) + + +# TODO this code is WIP and mainly used for experiments. +def start_sumfactorization(element, container, restriction, component): + theta = name_theta() + rows = quadrature_points_per_direction() + cols = basis_functions_per_direction() + a_matrix = AMatrix(theta, rows, cols) + a_matrices = (a_matrix, a_matrix) + + # Do stage 1 + initialize_buffer("buffer", + base_storage_size=product(max(mat.rows, mat.cols) for mat in a_matrices), + num=2 + ) + + insn_dep = setup_theta(element, container, restriction, component, a_matrices) + + sum_factorization_kernel(a_matrices, "buffer", 0, frozenset({insn_dep})) + + # Do stage 3 (for f=u => mass matrix) + theta_transposed = name_theta_transposed() + a_matrix_transposed = AMatrix(theta_transposed, cols, rows) + a_matrices_transposed = (a_matrix_transposed, a_matrix_transposed) + + return sum_factorization_kernel(a_matrices_transposed, "buffer", 2) + + +def sum_factorization_kernel(a_matrices, buffer, stage, insn_dep=frozenset({})): + """ + Calculate a sum factorization matrix product. + + Y = A_{d-1}*...*A_0*X + + where X is the input matrix and Y is the output variable. + + Arguments: + ---------- + a_matrices: An iterable of AMatrix instances + The list of tensors to be applied to the input. + Order of application is from 0 up. + buffer: A string identifying the flip flop buffer in use + for intermediate results. The memory is expected to be + pre-initialized with the input. + insn_dep: an instruction ID that the first issued instruction + should depend upon. All following ones will depend on each + other. + """ + for l, a_matrix in enumerate(a_matrices): + # Get a temporary that interprets the base storage of the input + # as a column-major matrix. In later iteration of the amatrix loop + # this reinterprets the output of the previous iteration. + inp_shape = (a_matrix.cols, product(mat.rows for mat in a_matrices[:l]) * product(mat.cols for mat in a_matrices[l + 1:])) + inp = get_buffer_temporary(buffer, + shape=inp_shape, + dim_tags="f,f") + + # The input temporary will only be read from, so we need to silence the loopy warning + silenced_warning('read_no_write({})'.format(inp)) + + switch_base_storage(buffer) + + # Get a temporary that interprets the base storage of the output + # as row-major matrix + out_shape = (a_matrix.rows, product(mat.rows for mat in a_matrices[:l]) * product(mat.cols for mat in a_matrices[l + 1:])) + out = get_buffer_temporary(buffer, + shape=out_shape, + dim_tags="c,c") + + # Get the inames needed for one matrix-matrix multiplication + i = sumfact_iname(out_shape[0], "row") + j = sumfact_iname(out_shape[1], "col") + k = sumfact_iname(a_matrix.cols, "red") + + # Construct the matrix-matrix-multiplication expression a_ik*in_kj + from dune.perftool.sumfact.amatrix import ColMajorAccess + prod = Product((Call(ColMajorAccess(a_matrix.a_matrix), (Variable(i), Variable(k))), + Subscript(Variable(inp), (Variable(k), Variable(j))) + )) + + # Issue the reduction instruction that implements the multiplication + # at the same time store the instruction ID for the next instruction to depend on + insn_dep = frozenset({instruction(assignee=Subscript(Variable(out), (Variable(i), Variable(j))), + expression=Reduction("sum", k, prod), + forced_iname_deps=frozenset({i, j}), + forced_iname_deps_is_final=True, + depends_on=insn_dep.union(frozenset({stage_insn(stage)})), + ) + }) + + return out diff --git a/python/loopy b/python/loopy index 2e562728b81caed706a1304fdffd64e86286bae4..a08c8677fbd3aaded4ad63210d24439aac18f2d0 160000 --- a/python/loopy +++ b/python/loopy @@ -1 +1 @@ -Subproject commit 2e562728b81caed706a1304fdffd64e86286bae4 +Subproject commit a08c8677fbd3aaded4ad63210d24439aac18f2d0 diff --git a/python/pymbolic b/python/pymbolic index 5fa83688cb95b429d8fa4175bb2d88ad6778a311..22b3e10d489ca9c8ba21e2e8bba77791f3f54ade 160000 --- a/python/pymbolic +++ b/python/pymbolic @@ -1 +1 @@ -Subproject commit 5fa83688cb95b429d8fa4175bb2d88ad6778a311 +Subproject commit 22b3e10d489ca9c8ba21e2e8bba77791f3f54ade diff --git a/python/test/dune/perftool/generation/test_backend.py b/python/test/dune/perftool/generation/test_backend.py new file mode 100644 index 0000000000000000000000000000000000000000..2a1e001cd9da6dad0d0d18c46d2da47d9506da7c --- /dev/null +++ b/python/test/dune/perftool/generation/test_backend.py @@ -0,0 +1,35 @@ +from dune.perftool.generation import (backend, + get_backend, + generator_factory, + ) + + +@backend(interface="foo", name="f1") +@generator_factory() +def f1(): + return 1 + + +@generator_factory() +@backend(interface="foo", name="f2") +def f2(): + return 2 + + +@backend(interface="bar", name="f3") +@generator_factory() +def f3(): + return 3 + + +@generator_factory() +@backend(interface="bar", name="f4") +def f4(): + return 4 + + +def test_backend(): + assert get_backend(interface="foo", selector=lambda: "f1")() == 1 + assert get_backend(interface="foo", selector=lambda: "f2")() == 2 + assert get_backend(interface="bar", selector=lambda: "f3")() == 3 + assert get_backend(interface="bar", selector=lambda: "f4")() == 4 diff --git a/test/sumfact/poisson/CMakeLists.txt b/test/sumfact/poisson/CMakeLists.txt index 4620b9a53c6a052155d08b7a42dbdf94c6e9aba5..6a7006ce4c84db13bd9376b56be9eb69770d466d 100644 --- a/test/sumfact/poisson/CMakeLists.txt +++ b/test/sumfact/poisson/CMakeLists.txt @@ -1,11 +1,11 @@ -# 1. Poisson Test Case: source f, bc g -dune_add_formcompiler_system_test(UFLFILE poisson.ufl - BASENAME sumfact_poisson - INIFILE poisson.mini - ) - -# # 2. Poisson Test Case: DG, f + pure dirichlet -# dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl -# BASENAME sumfact_poisson_dg -# INIFILE poisson_dg.mini +# # 1. Poisson Test Case: source f, bc g +# dune_add_formcompiler_system_test(UFLFILE poisson.ufl +# BASENAME sumfact_poisson +# INIFILE poisson.mini # ) + +# 2. Poisson Test Case: DG, f + pure dirichlet +dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl + BASENAME sumfact_poisson_dg + INIFILE poisson_dg.mini + ) diff --git a/test/sumfact/poisson/poisson.mini b/test/sumfact/poisson/poisson.mini index 5f07c985767d2ef9bfbe5e5bebcf82d801222833..491c685915a7585fc248ef11e6047bcb7de6cc84 100644 --- a/test/sumfact/poisson/poisson.mini +++ b/test/sumfact/poisson/poisson.mini @@ -1,10 +1,8 @@ __name = sumfact_poisson_{__exec_suffix} __exec_suffix = numdiff, symdiff | expand num -lowerleft = 0.0 0.0 -upperright = 1.0 1.0 -elements = 32 32 -elementType = simplical +cells = 1 1 +extension = 1. 1. [wrapper.vtkcompare] name = {__name} diff --git a/test/sumfact/poisson/poisson.ufl b/test/sumfact/poisson/poisson.ufl index 8c62dd3a24d2f64a3aec350c2096fd65967498d8..576c603e5c5f8b8d00bad8ea4cdcb6ec19ebd332 100644 --- a/test/sumfact/poisson/poisson.ufl +++ b/test/sumfact/poisson/poisson.ufl @@ -1,7 +1,9 @@ -f = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return 4.*(1.-c.two_norm2())*std::exp(-1.*c.two_norm2());") -g = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return std::exp(-1.*c.two_norm2());") +cell = "quadrilateral" -V = FiniteElement("CG", "triangle", 1, dirichlet_expression=g) +f = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return 4.*(1.-c.two_norm2())*std::exp(-1.*c.two_norm2());", cell=cell) +g = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return std::exp(-1.*c.two_norm2());", cell=cell) + +V = FiniteElement("CG", cell, 1, dirichlet_expression=g) u = TrialFunction(V) v = TestFunction(V) diff --git a/test/sumfact/poisson/poisson_dg.mini b/test/sumfact/poisson/poisson_dg.mini index 8a2000a43db23777bd2a16ef034a794460bd3b98..f388ba3eb61b3426b5edfe9955de1c69e32281e1 100644 --- a/test/sumfact/poisson/poisson_dg.mini +++ b/test/sumfact/poisson/poisson_dg.mini @@ -1,18 +1,13 @@ __name = sumfact_poisson_dg_{__exec_suffix} __exec_suffix = numdiff, symdiff | expand num -lowerleft = 0.0 0.0 -upperright = 1.0 1.0 -elements = 32 32 -elementType = simplical +cells = 1 1 +extension = 1. 1. [wrapper.vtkcompare] name = {__name} -reference = poisson_dg_ref extension = vtu [formcompiler] numerical_jacobian = 1, 0 | expand num -exact_solution_expression = g -compare_l2errorsquared = 2e-5 sumfact = 1 diff --git a/test/sumfact/poisson/poisson_dg.ufl b/test/sumfact/poisson/poisson_dg.ufl index 0ee5a857931d2ceea62b8cf5e52c5aed2b696851..a0b5049d9218da6984e040652f90c7fb24d809a8 100644 --- a/test/sumfact/poisson/poisson_dg.ufl +++ b/test/sumfact/poisson/poisson_dg.ufl @@ -1,7 +1,8 @@ -f = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return 4.*(1.-c.two_norm2())*std::exp(-1.*c.two_norm2());") -g = Expression("Dune::FieldVector<double,2> c(0.5); c-= x; return std::exp(-1.*c.two_norm2());", on_intersection=True) +cell = "quadrilateral" + +f = Expression("return -2.0*x.size();", cell=cell) +g = Expression("return x.two_norm2();", on_intersection=True, cell=cell) -cell = triangle V = FiniteElement("DG", cell, 1) u = TrialFunction(V) @@ -23,4 +24,4 @@ r = inner(grad(u), grad(v))*dx \ + theta*g*inner(grad(v), n)*ds \ - gamma*g*v*ds -forms = [r] +forms = [r] \ No newline at end of file