diff --git a/README.md b/README.md index 8896b12177ec1532e69d64f1748b81f140123643..5856f3514a91a64db9734af6cf7825952b3935c0 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,25 @@ ctest Note that this takes quite a while. +## Building and Running dune-codegen in an offline environment + +dune-codegen relies on installing Python packages into self-contained environments +during its configuration and build process. In order to do this in an offline +environment, we recommend using the tool `devpi`. One of its use cases is to provide +a local mirror for the Python package index. A quickstart tutorial for this use case +is available [5]. It boils down to the following: + +* Installing the `devpi-server` package through your favorite method +* Setting up a local server with `devpi-server --init` +* Making sure it is running in the background (explicitly with `devpi-server --start/stop` or by configuring a systemd service. +* Have the environment variable `PIP_INDEX_URL` to its index, e.g. by adding this line to your `~/.bashrc` (where `http://localhost:3141` might differ depending on your devpi configuration): +``` +export PIP_INDEX_URL=http://localhost:3141/root/pypi/+simple/ +``` + +At first installation, the locally mirrored package index will access PyPI. +Later on, it will install packages from its local cache. + ## Links [0]: https://git-lfs.github.com/ @@ -112,3 +131,4 @@ Note that this takes quite a while. [2]: https://gitlab.dune-project.org/quality/dune-testtools [3]: http://isl.gforge.inria.fr/ [4]: https://www.dune-project.org/doc/installation/ +[5]: https://github.com/devpi/devpi/blob/master/doc/quickstart-pypimirror.rst diff --git a/cmake/modules/DuneCodegenMacros.cmake b/cmake/modules/DuneCodegenMacros.cmake index da3225866785c75a8cf73e6aa78b6e3e0eea42f9..91e48d73f4c79ccc6a7258071dbbfeaf8a27a1c6 100644 --- a/cmake/modules/DuneCodegenMacros.cmake +++ b/cmake/modules/DuneCodegenMacros.cmake @@ -116,6 +116,11 @@ function(dune_add_generated_executable) message(FATAL_ERROR "Unrecognized arguments in dune_add_generated_executable. This usually indicates a typo.") endif() + set(MPI_OPTION "0") + if(MPI_FOUND) + set(MPI_OPTION "1") + endif() + # Apply defaults and enforce requirements if(NOT GEN_TARGET) message(FATAL_ERROR "Need to specify the TARGET parameter for dune_add_generated_executable") @@ -139,6 +144,7 @@ function(dune_add_generated_executable) --target-name ${GEN_TARGET} --driver-file ${GEN_SOURCE} --project-basedir ${CMAKE_BINARY_DIR} + --with-mpi ${MPI_OPTION} ${GEN_FORM_COMPILER_ARGS} DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} ${DUNE_CODEGEN_ADDITIONAL_PYTHON_SOURCES} COMMENT "Generating driver for the target ${GEN_TARGET}" @@ -199,6 +205,7 @@ function(dune_add_generated_executable) --ini-file ${GEN_INIFILE} --target-name ${GEN_TARGET} --operator-to-build ${op} + --with-mpi ${MPI_OPTION} ${ANALYZE_GRID_OPTION} DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} ${DUNE_CODEGEN_ADDITIONAL_PYTHON_SOURCES} ${ANALYZE_GRID_FILE} COMMENT "Generating operator file ${depdata___${op}} for the target ${GEN_TARGET}" diff --git a/python/dune/codegen/generation/__init__.py b/python/dune/codegen/generation/__init__.py index bed0256407b7259bab61b6e932c4a17761097e75..97090e18852359b10d1a2d3f74a268a3abac60f1 100644 --- a/python/dune/codegen/generation/__init__.py +++ b/python/dune/codegen/generation/__init__.py @@ -24,6 +24,7 @@ from dune.codegen.generation.cpp import (base_class, preamble, post_include, template_parameter, + dump_ssc_marks ) from dune.codegen.generation.hooks import (hook, diff --git a/python/dune/codegen/generation/cpp.py b/python/dune/codegen/generation/cpp.py index b918291067f45c5f988bc8fdcea55651d538a9db..2ea4c346590ee80ef329fdc9394b9fbc3c59db9c 100644 --- a/python/dune/codegen/generation/cpp.py +++ b/python/dune/codegen/generation/cpp.py @@ -55,3 +55,10 @@ def dump_accumulate_timer(name): @generator_factory(item_tags=("register_likwid_timers",)) def register_liwkid_timer(name): return "LIKWID_MARKER_REGISTER(\"{}\");".format(name) + + +@generator_factory(item_tags=("register_ssc_marks",)) +def dump_ssc_marks(name): + from dune.codegen.pdelab.driver.timings import get_region_marks + return 'std::cout << "{}: " << {} << " <--> " << {} << std::endl;'.format(name, + *get_region_marks(name, driver=False)) diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py index 97f3ce47af0d543141672b08f1e837cbc2ff0cc6..72697492907cbf4afdfe62d1a3789606a8e2c290 100644 --- a/python/dune/codegen/options.py +++ b/python/dune/codegen/options.py @@ -57,7 +57,9 @@ class CodegenGlobalOptionsArray(ImmutableRecord): operator_to_build = CodegenOption(default=None, helpstr="The operators from the list that is about to be build now. CMake sets this one!!!") debug_interpolate_input = CodegenOption(default=False, helpstr="Should the input for printresidual and printmatix be interpolated (instead of random input).") use_likwid = CodegenOption(default=False, helpstr="Use likwid instead of own performance measurements.") + use_sde = CodegenOption(default=False, helpstr="Use sde instead of own performance measurements.") autotune_google_benchmark = CodegenOption(default=False, helpstr="Use google-benchmark library for autotuning (when autotuning is activated).") + with_mpi = CodegenOption(default=True, helpstr="The module was configured with mpi") # Arguments that are mainly to be set by logic depending on other options max_vector_width = CodegenOption(default=256, helpstr=None) diff --git a/python/dune/codegen/pdelab/argument.py b/python/dune/codegen/pdelab/argument.py index 5124c77608fa78fe1ea5f72a75cc7c097ef178d9..dc1acd660c137c42be6fb65bb687bafe03fbc730 100644 --- a/python/dune/codegen/pdelab/argument.py +++ b/python/dune/codegen/pdelab/argument.py @@ -117,6 +117,10 @@ def type_coefficientcontainer(): return "X" +def type_linearizationpointcontainer(): + return "Z" + + def name_jacobian(restriction1, restriction2): # Restrictions may only differ if NONE if (restriction1 == Restriction.NONE) or (restriction2 == Restriction.NONE): diff --git a/python/dune/codegen/pdelab/driver/__init__.py b/python/dune/codegen/pdelab/driver/__init__.py index 50effdf1f6730c8a6241292b3dd8375e7d472f4c..124354b50dd354263800c84d99425cc6c9a57c01 100644 --- a/python/dune/codegen/pdelab/driver/__init__.py +++ b/python/dune/codegen/pdelab/driver/__init__.py @@ -215,7 +215,10 @@ def name_initree(): @preamble(section="init") def define_mpihelper(name): include_file("dune/common/parallel/mpihelper.hh", filetag="driver") - return "Dune::MPIHelper& {} = Dune::MPIHelper::instance(argc, argv);".format(name) + if get_option("with_mpi"): + return "Dune::MPIHelper& {} = Dune::MPIHelper::instance(argc, argv);".format(name) + else: + return "Dune::FakeMPIHelper& {} = Dune::FakeMPIHelper::instance(argc, argv);".format(name) def name_mpihelper(): @@ -285,6 +288,13 @@ def generate_driver(): contents = [] + # Assert that this program was called with ini file + contents += ['if (argc != 2){', + ' std::cerr << "This program needs to be called with an ini file" << std::endl;', + ' return 1;', + '}', + ''] + def add_section(tag, comment): tagcontents = [i for i in retrieve_cache_items("preamble and {}".format(tag), make_generable=True)] if tagcontents: diff --git a/python/dune/codegen/pdelab/driver/timings.py b/python/dune/codegen/pdelab/driver/timings.py index aeca64d46c73f2327b48c22f07dca7a85a044104..6bbbd07e4b7701fe516eff9509525165ac23a5eb 100644 --- a/python/dune/codegen/pdelab/driver/timings.py +++ b/python/dune/codegen/pdelab/driver/timings.py @@ -4,7 +4,7 @@ from dune.codegen.generation import (cached, include_file, pre_include, preamble, - ) + post_include) from dune.codegen.options import get_option from dune.codegen.pdelab.driver import (get_form_ident, is_linear, @@ -24,6 +24,9 @@ from dune.codegen.pdelab.driver.solve import (name_vector, ) +_sde_marks = {} + + @preamble(section="timings") def define_timing_identifier(name): ini = name_initree() @@ -125,6 +128,17 @@ def local_operator_likwid(): return "{}.register_likwid_timers();".format(lop_name) +@preamble(section="timings") +def local_operator_ssc_marks(): + lop_name = name_localoperator(get_form_ident()) + return "{}.dump_ssc_marks();".format(lop_name) + + +def ssc_macro(): + return '#define __SSC_MARK(x) do{ __asm__ __volatile__' \ + '("movl %0, %%ebx; .byte 100, 103, 144" : :"i"(x) : "%ebx"); } while(0)' + + @cached def setup_timer(): # TODO check that we are using YASP? @@ -138,6 +152,10 @@ def setup_timer(): logger.warning("timings: using instrumentation level >= 3 with likwid will slow down your code considerably") local_operator_likwid() finalize_likwid() + elif get_option("use_sde"): + post_include(ssc_macro(), filetag='driver') + if get_option('instrumentation_level') >= 3: + local_operator_ssc_marks() else: from dune.codegen.loopy.target import type_floatingpoint pre_include("#define HP_TIMER_OPCOUNTER {}".format(type_floatingpoint()), filetag="driver") @@ -156,14 +174,26 @@ def init_region_timer(region): setup_timer() if get_option("use_likwid"): init_likwid_timer(region) + elif get_option("use_sde"): + pass else: from dune.codegen.generation import post_include post_include("HP_DECLARE_TIMER({});".format(region), filetag="driver") +def get_region_marks(region, driver): + if driver: + return _sde_marks.setdefault(region, (2 * (len(_sde_marks) + 1) * 11, (2 * (len(_sde_marks) + 1) + 1) * 11)) + else: + return _sde_marks.setdefault(region, (2 * (len(_sde_marks) + 1) * 1, (2 * (len(_sde_marks) + 1) + 1) * 1)) + + def start_region_timer(region): if get_option("use_likwid"): return ["LIKWID_MARKER_START(\"{}\");".format(region)] + elif get_option("use_sde"): + marks = get_region_marks(region, driver=True) + return ["__SSC_MARK(0x{});".format(marks[0])] else: return ["HP_TIMER_START({});".format(region)] @@ -171,6 +201,10 @@ def start_region_timer(region): def stop_region_timer(region): if get_option("use_likwid"): return ["LIKWID_MARKER_STOP(\"{}\");".format(region)] + elif get_option("use_sde"): + marks = get_region_marks(region, driver=True) + return ["__SSC_MARK(0x{});".format(marks[1]), + "std::cout << \"Timed region {}: {} <--> {}\" << std::endl;".format(region, *marks)] else: timestream = name_timing_stream() return ["HP_TIMER_STOP({});".format(region), @@ -207,7 +241,7 @@ def timed_region(region, actions): init_region_timer(region) - if get_option('instrumentation_level') >= 3 and not get_option('use_likwid'): + if get_option('instrumentation_level') >= 3 and not (get_option('use_likwid') or get_option("use_sde")): timestream = name_timing_stream() lop_name = name_localoperator(get_form_ident()) print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) diff --git a/python/dune/codegen/pdelab/localoperator.py b/python/dune/codegen/pdelab/localoperator.py index 6db956e498c1de5d7e25cb47fb40771314119299..0ca6bfb45ed80271af92800980df7405200dca8b 100644 --- a/python/dune/codegen/pdelab/localoperator.py +++ b/python/dune/codegen/pdelab/localoperator.py @@ -32,6 +32,7 @@ from dune.codegen.generation import (accumulation_mixin, ReturnArg, run_hook, template_parameter, + dump_ssc_marks ) from dune.codegen.cgen.clazz import (AccessModifier, BaseClass, @@ -696,6 +697,19 @@ class RegisterLikwidMethod(ClassMember): ClassMember.__init__(self, content) +class RegisterSSCMarksMethod(ClassMember): + def __init__(self): + knl = name_example_kernel() + assert(knl is not None) + + content = ["void dump_ssc_marks()" + "{"] + register_liwkid_timers = [i for i in retrieve_cache_items(condition='register_ssc_marks')] + content.extend(map(lambda x: ' ' + x, register_liwkid_timers)) + content += ["}"] + ClassMember.__init__(self, content) + + class LoopyKernelMethod(ClassMember): def __init__(self, signature, kernel, add_timings=True, initializer_list=[]): from loopy import generate_body @@ -723,6 +737,12 @@ class LoopyKernelMethod(ClassMember): init_likwid_timer(timer_name) content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(timer_name)) register_liwkid_timer(timer_name) + elif get_option('use_sde'): + from dune.codegen.pdelab.driver.timings import get_region_marks, ssc_macro + post_include(ssc_macro(), filetag='operatorfile') + marks = get_region_marks(timer_name, driver=False) + content.append(' ' + '__SSC_MARK(0x{});'.format(marks[0])) + dump_ssc_marks(timer_name) else: post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') content.append(' ' + 'HP_TIMER_START({});'.format(timer_name)) @@ -735,6 +755,11 @@ class LoopyKernelMethod(ClassMember): init_likwid_timer(setuptimer) content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(setuptimer)) register_liwkid_timer(setuptimer) + elif get_option('use_sde'): + from dune.codegen.pdelab.driver.timings import get_region_marks + setup_marks = get_region_marks(setuptimer, driver=False) + content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[0])) + dump_ssc_marks(setuptimer) else: post_include('HP_DECLARE_TIMER({});'.format(setuptimer), filetag='operatorfile') content.append(' HP_TIMER_START({});'.format(setuptimer)) @@ -747,6 +772,8 @@ class LoopyKernelMethod(ClassMember): if add_timings and get_option('instrumentation_level') >= 4: if get_option('use_likwid'): content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(setuptimer)) + elif get_option('use_sde'): + content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[1])) else: content.append(' ' + 'HP_TIMER_STOP({});'.format(setuptimer)) @@ -757,6 +784,8 @@ class LoopyKernelMethod(ClassMember): if add_timings and get_option('instrumentation_level') >= 3: if get_option('use_likwid'): content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(timer_name)) + elif get_option('use_sde'): + content.append(' ' + '__SSC_MARK(0x{});'.format(marks[1])) else: content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) @@ -1219,6 +1248,8 @@ def generate_localoperator_file(kernels, filename): include_file('dune/codegen/common/timer.hh', filetag='operatorfile') if get_option('use_likwid'): operator_methods.append(RegisterLikwidMethod()) + elif get_option('use_sde'): + operator_methods.append(RegisterSSCMarksMethod()) else: operator_methods.append(TimerMethod()) elif get_option('opcounter'): diff --git a/python/dune/codegen/pdelab/signatures.py b/python/dune/codegen/pdelab/signatures.py index 09b832ac252138181d8be19c9fd4c098b5bb9b68..34acea22ea1fda1b8369bf499d7ce8ed0c37859d 100644 --- a/python/dune/codegen/pdelab/signatures.py +++ b/python/dune/codegen/pdelab/signatures.py @@ -9,6 +9,7 @@ from dune.codegen.pdelab.argument import (name_accumulation_variable, name_coefficientcontainer, type_coefficientcontainer, name_applycontainer, + type_linearizationpointcontainer, ) from dune.codegen.pdelab.spaces import (name_testfunctionspace, type_testfunctionspace, @@ -293,8 +294,9 @@ def nonlinear_jacobian_apply_volume_templates(): lfsut = type_trialfunctionspace() lfsvt = type_testfunctionspace() cct = type_coefficientcontainer() + lpt = type_linearizationpointcontainer() avt = type_accumulation_variable() - return (geot, lfsut, cct, cct, lfsvt, avt) + return (geot, lfsut, cct, lpt, lfsvt, avt) def nonlinear_jacobian_apply_volume_args(): @@ -312,8 +314,9 @@ def nonlinear_jacobian_apply_boundary_templates(): lfsut = type_trialfunctionspace() lfsvt = type_testfunctionspace() cct = type_coefficientcontainer() + lpt = type_linearizationpointcontainer() avt = type_accumulation_variable() - return (geot, lfsut, cct, cct, lfsvt, avt) + return (geot, lfsut, cct, lpt, lfsvt, avt) def nonlinear_jacobian_apply_boundary_args(): @@ -331,8 +334,9 @@ def nonlinear_jacobian_apply_skeleton_templates(): lfsut = type_trialfunctionspace() lfsvt = type_testfunctionspace() cct = type_coefficientcontainer() + lpt = type_linearizationpointcontainer() avt = type_accumulation_variable() - return (geot, lfsut, cct, cct, lfsvt, lfsut, cct, cct, lfsvt, avt, avt) + return (geot, lfsut, cct, lpt, lfsvt, lfsut, cct, lpt, lfsvt, avt, avt) def nonlinear_jacobian_apply_skeleton_args(): diff --git a/python/dune/codegen/pdelab/tensors.py b/python/dune/codegen/pdelab/tensors.py index 4fdf59e448e8bb49172ae7a798796fb2afce9f5e..7a86ba52eff5f255c3d040d31951043a65266890 100644 --- a/python/dune/codegen/pdelab/tensors.py +++ b/python/dune/codegen/pdelab/tensors.py @@ -9,6 +9,7 @@ from dune.codegen.loopy.symbolic import FusedMultiplyAdd as FMA from loopy.match import Writes import pymbolic.primitives as prim +import numpy as np import loopy as lp import itertools as it @@ -145,11 +146,35 @@ def name_assembled_tensor(o, visitor): @kernel_cached -def pymbolic_matrix_inverse(o, visitor): - expr = o.ufl_operands[0] +def code_generation_time_inversion(expr, visitor): + mat = np.ndarray(expr.ufl_shape) + for indices in it.product(*tuple(range(i) for i in expr.ufl_shape)): + visitor.indices = indices + val = visitor.call(expr.ufl_operands[0]) + if not isinstance(val, (float, int)): + visitor.indices = None + return None + + mat[indices] = val + + visitor.indices = None + return np.linalg.inv(mat) + +def pymbolic_matrix_inverse(o, visitor): + # Try to evaluate the matrix at code generation time. + # If this works (it does e.g. for Maxwell on structured grids) + # we can invert the matrix at code generation time!!! indices = visitor.indices visitor.indices = None + + mat = code_generation_time_inversion(o, visitor) + if mat is not None: + return mat[indices] + + # If code generation time inversion failed, we assemble it in C++ + # and invert it there. + expr = o.ufl_operands[0] name = name_assembled_tensor(expr, visitor) if expr.shape[0] <= 3: @@ -160,9 +185,8 @@ def pymbolic_matrix_inverse(o, visitor): depends_on=frozenset({lp.match.Writes(name), lp.match.Tagged("sumfact_stage1"), }), - tags=frozenset({"quad"}), + tags=frozenset({name}), ) visitor.indices = indices - return prim.Variable(name) diff --git a/python/dune/codegen/ufl/visitor.py b/python/dune/codegen/ufl/visitor.py index ab7c334f323bbc51221d926d357466d6ebc8cf88..e774e03d90e8bc61a1108767867ab046bfa56c81 100644 --- a/python/dune/codegen/ufl/visitor.py +++ b/python/dune/codegen/ufl/visitor.py @@ -37,6 +37,7 @@ from ufl.classes import (Coefficient, JacobianDeterminant, ) +from pytools import product as ptproduct import pymbolic.primitives as prim import numpy as np @@ -278,7 +279,10 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): # def product(self, o): - return prim.flattened_product(tuple(self.call(op) for op in o.ufl_operands)) + ops = tuple(self.call(op) for op in o.ufl_operands) + if all(isinstance(op, (int, float)) for op in ops): + return ptproduct(ops) + return prim.flattened_product(ops) def float_value(self, o): return o.value() @@ -290,7 +294,10 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker): return prim.quotient(self.call(o.ufl_operands[0]), self.call(o.ufl_operands[1])) def sum(self, o): - return prim.flattened_sum(tuple(self.call(op) for op in o.ufl_operands)) + ops = tuple(self.call(op) for op in o.ufl_operands) + if all(isinstance(op, (int, float)) for op in ops): + return sum(ops) + return prim.flattened_sum(ops) def zero(self, o): # UFL has Zeroes with shape. We ignore those indices.