Skip to content
Snippets Groups Projects
Commit 0227d4ff authored by Marcel Koch's avatar Marcel Koch
Browse files

Merge branch 'master' into 'feature/loopy-invert-matrix'

# Conflicts:
#   python/dune/codegen/pdelab/tensors.py
parents fe604286 68893fc1
No related branches found
No related tags found
No related merge requests found
......@@ -105,6 +105,25 @@ ctest
Note that this takes quite a while.
## Building and Running dune-codegen in an offline environment
dune-codegen relies on installing Python packages into self-contained environments
during its configuration and build process. In order to do this in an offline
environment, we recommend using the tool `devpi`. One of its use cases is to provide
a local mirror for the Python package index. A quickstart tutorial for this use case
is available [5]. It boils down to the following:
* Installing the `devpi-server` package through your favorite method
* Setting up a local server with `devpi-server --init`
* Making sure it is running in the background (explicitly with `devpi-server --start/stop` or by configuring a systemd service.
* Have the environment variable `PIP_INDEX_URL` to its index, e.g. by adding this line to your `~/.bashrc` (where `http://localhost:3141` might differ depending on your devpi configuration):
```
export PIP_INDEX_URL=http://localhost:3141/root/pypi/+simple/
```
At first installation, the locally mirrored package index will access PyPI.
Later on, it will install packages from its local cache.
## Links
[0]: https://git-lfs.github.com/
......@@ -112,3 +131,4 @@ Note that this takes quite a while.
[2]: https://gitlab.dune-project.org/quality/dune-testtools
[3]: http://isl.gforge.inria.fr/
[4]: https://www.dune-project.org/doc/installation/
[5]: https://github.com/devpi/devpi/blob/master/doc/quickstart-pypimirror.rst
......@@ -116,6 +116,11 @@ function(dune_add_generated_executable)
message(FATAL_ERROR "Unrecognized arguments in dune_add_generated_executable. This usually indicates a typo.")
endif()
set(MPI_OPTION "0")
if(MPI_FOUND)
set(MPI_OPTION "1")
endif()
# Apply defaults and enforce requirements
if(NOT GEN_TARGET)
message(FATAL_ERROR "Need to specify the TARGET parameter for dune_add_generated_executable")
......@@ -139,6 +144,7 @@ function(dune_add_generated_executable)
--target-name ${GEN_TARGET}
--driver-file ${GEN_SOURCE}
--project-basedir ${CMAKE_BINARY_DIR}
--with-mpi ${MPI_OPTION}
${GEN_FORM_COMPILER_ARGS}
DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} ${DUNE_CODEGEN_ADDITIONAL_PYTHON_SOURCES}
COMMENT "Generating driver for the target ${GEN_TARGET}"
......@@ -199,6 +205,7 @@ function(dune_add_generated_executable)
--ini-file ${GEN_INIFILE}
--target-name ${GEN_TARGET}
--operator-to-build ${op}
--with-mpi ${MPI_OPTION}
${ANALYZE_GRID_OPTION}
DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} ${DUNE_CODEGEN_ADDITIONAL_PYTHON_SOURCES} ${ANALYZE_GRID_FILE}
COMMENT "Generating operator file ${depdata___${op}} for the target ${GEN_TARGET}"
......
......@@ -24,6 +24,7 @@ from dune.codegen.generation.cpp import (base_class,
preamble,
post_include,
template_parameter,
dump_ssc_marks
)
from dune.codegen.generation.hooks import (hook,
......
......@@ -55,3 +55,10 @@ def dump_accumulate_timer(name):
@generator_factory(item_tags=("register_likwid_timers",))
def register_liwkid_timer(name):
return "LIKWID_MARKER_REGISTER(\"{}\");".format(name)
@generator_factory(item_tags=("register_ssc_marks",))
def dump_ssc_marks(name):
from dune.codegen.pdelab.driver.timings import get_region_marks
return 'std::cout << "{}: " << {} << " <--> " << {} << std::endl;'.format(name,
*get_region_marks(name, driver=False))
......@@ -57,7 +57,9 @@ class CodegenGlobalOptionsArray(ImmutableRecord):
operator_to_build = CodegenOption(default=None, helpstr="The operators from the list that is about to be build now. CMake sets this one!!!")
debug_interpolate_input = CodegenOption(default=False, helpstr="Should the input for printresidual and printmatix be interpolated (instead of random input).")
use_likwid = CodegenOption(default=False, helpstr="Use likwid instead of own performance measurements.")
use_sde = CodegenOption(default=False, helpstr="Use sde instead of own performance measurements.")
autotune_google_benchmark = CodegenOption(default=False, helpstr="Use google-benchmark library for autotuning (when autotuning is activated).")
with_mpi = CodegenOption(default=True, helpstr="The module was configured with mpi")
# Arguments that are mainly to be set by logic depending on other options
max_vector_width = CodegenOption(default=256, helpstr=None)
......
......@@ -117,6 +117,10 @@ def type_coefficientcontainer():
return "X"
def type_linearizationpointcontainer():
return "Z"
def name_jacobian(restriction1, restriction2):
# Restrictions may only differ if NONE
if (restriction1 == Restriction.NONE) or (restriction2 == Restriction.NONE):
......
......@@ -215,7 +215,10 @@ def name_initree():
@preamble(section="init")
def define_mpihelper(name):
include_file("dune/common/parallel/mpihelper.hh", filetag="driver")
return "Dune::MPIHelper& {} = Dune::MPIHelper::instance(argc, argv);".format(name)
if get_option("with_mpi"):
return "Dune::MPIHelper& {} = Dune::MPIHelper::instance(argc, argv);".format(name)
else:
return "Dune::FakeMPIHelper& {} = Dune::FakeMPIHelper::instance(argc, argv);".format(name)
def name_mpihelper():
......@@ -285,6 +288,13 @@ def generate_driver():
contents = []
# Assert that this program was called with ini file
contents += ['if (argc != 2){',
' std::cerr << "This program needs to be called with an ini file" << std::endl;',
' return 1;',
'}',
'']
def add_section(tag, comment):
tagcontents = [i for i in retrieve_cache_items("preamble and {}".format(tag), make_generable=True)]
if tagcontents:
......
......@@ -4,7 +4,7 @@ from dune.codegen.generation import (cached,
include_file,
pre_include,
preamble,
)
post_include)
from dune.codegen.options import get_option
from dune.codegen.pdelab.driver import (get_form_ident,
is_linear,
......@@ -24,6 +24,9 @@ from dune.codegen.pdelab.driver.solve import (name_vector,
)
_sde_marks = {}
@preamble(section="timings")
def define_timing_identifier(name):
ini = name_initree()
......@@ -125,6 +128,17 @@ def local_operator_likwid():
return "{}.register_likwid_timers();".format(lop_name)
@preamble(section="timings")
def local_operator_ssc_marks():
lop_name = name_localoperator(get_form_ident())
return "{}.dump_ssc_marks();".format(lop_name)
def ssc_macro():
return '#define __SSC_MARK(x) do{ __asm__ __volatile__' \
'("movl %0, %%ebx; .byte 100, 103, 144" : :"i"(x) : "%ebx"); } while(0)'
@cached
def setup_timer():
# TODO check that we are using YASP?
......@@ -138,6 +152,10 @@ def setup_timer():
logger.warning("timings: using instrumentation level >= 3 with likwid will slow down your code considerably")
local_operator_likwid()
finalize_likwid()
elif get_option("use_sde"):
post_include(ssc_macro(), filetag='driver')
if get_option('instrumentation_level') >= 3:
local_operator_ssc_marks()
else:
from dune.codegen.loopy.target import type_floatingpoint
pre_include("#define HP_TIMER_OPCOUNTER {}".format(type_floatingpoint()), filetag="driver")
......@@ -156,14 +174,26 @@ def init_region_timer(region):
setup_timer()
if get_option("use_likwid"):
init_likwid_timer(region)
elif get_option("use_sde"):
pass
else:
from dune.codegen.generation import post_include
post_include("HP_DECLARE_TIMER({});".format(region), filetag="driver")
def get_region_marks(region, driver):
if driver:
return _sde_marks.setdefault(region, (2 * (len(_sde_marks) + 1) * 11, (2 * (len(_sde_marks) + 1) + 1) * 11))
else:
return _sde_marks.setdefault(region, (2 * (len(_sde_marks) + 1) * 1, (2 * (len(_sde_marks) + 1) + 1) * 1))
def start_region_timer(region):
if get_option("use_likwid"):
return ["LIKWID_MARKER_START(\"{}\");".format(region)]
elif get_option("use_sde"):
marks = get_region_marks(region, driver=True)
return ["__SSC_MARK(0x{});".format(marks[0])]
else:
return ["HP_TIMER_START({});".format(region)]
......@@ -171,6 +201,10 @@ def start_region_timer(region):
def stop_region_timer(region):
if get_option("use_likwid"):
return ["LIKWID_MARKER_STOP(\"{}\");".format(region)]
elif get_option("use_sde"):
marks = get_region_marks(region, driver=True)
return ["__SSC_MARK(0x{});".format(marks[1]),
"std::cout << \"Timed region {}: {} <--> {}\" << std::endl;".format(region, *marks)]
else:
timestream = name_timing_stream()
return ["HP_TIMER_STOP({});".format(region),
......@@ -207,7 +241,7 @@ def timed_region(region, actions):
init_region_timer(region)
if get_option('instrumentation_level') >= 3 and not get_option('use_likwid'):
if get_option('instrumentation_level') >= 3 and not (get_option('use_likwid') or get_option("use_sde")):
timestream = name_timing_stream()
lop_name = name_localoperator(get_form_ident())
print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier()))
......
......@@ -32,6 +32,7 @@ from dune.codegen.generation import (accumulation_mixin,
ReturnArg,
run_hook,
template_parameter,
dump_ssc_marks
)
from dune.codegen.cgen.clazz import (AccessModifier,
BaseClass,
......@@ -696,6 +697,19 @@ class RegisterLikwidMethod(ClassMember):
ClassMember.__init__(self, content)
class RegisterSSCMarksMethod(ClassMember):
def __init__(self):
knl = name_example_kernel()
assert(knl is not None)
content = ["void dump_ssc_marks()"
"{"]
register_liwkid_timers = [i for i in retrieve_cache_items(condition='register_ssc_marks')]
content.extend(map(lambda x: ' ' + x, register_liwkid_timers))
content += ["}"]
ClassMember.__init__(self, content)
class LoopyKernelMethod(ClassMember):
def __init__(self, signature, kernel, add_timings=True, initializer_list=[]):
from loopy import generate_body
......@@ -723,6 +737,12 @@ class LoopyKernelMethod(ClassMember):
init_likwid_timer(timer_name)
content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(timer_name))
register_liwkid_timer(timer_name)
elif get_option('use_sde'):
from dune.codegen.pdelab.driver.timings import get_region_marks, ssc_macro
post_include(ssc_macro(), filetag='operatorfile')
marks = get_region_marks(timer_name, driver=False)
content.append(' ' + '__SSC_MARK(0x{});'.format(marks[0]))
dump_ssc_marks(timer_name)
else:
post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
content.append(' ' + 'HP_TIMER_START({});'.format(timer_name))
......@@ -735,6 +755,11 @@ class LoopyKernelMethod(ClassMember):
init_likwid_timer(setuptimer)
content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(setuptimer))
register_liwkid_timer(setuptimer)
elif get_option('use_sde'):
from dune.codegen.pdelab.driver.timings import get_region_marks
setup_marks = get_region_marks(setuptimer, driver=False)
content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[0]))
dump_ssc_marks(setuptimer)
else:
post_include('HP_DECLARE_TIMER({});'.format(setuptimer), filetag='operatorfile')
content.append(' HP_TIMER_START({});'.format(setuptimer))
......@@ -747,6 +772,8 @@ class LoopyKernelMethod(ClassMember):
if add_timings and get_option('instrumentation_level') >= 4:
if get_option('use_likwid'):
content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(setuptimer))
elif get_option('use_sde'):
content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[1]))
else:
content.append(' ' + 'HP_TIMER_STOP({});'.format(setuptimer))
......@@ -757,6 +784,8 @@ class LoopyKernelMethod(ClassMember):
if add_timings and get_option('instrumentation_level') >= 3:
if get_option('use_likwid'):
content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(timer_name))
elif get_option('use_sde'):
content.append(' ' + '__SSC_MARK(0x{});'.format(marks[1]))
else:
content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name))
......@@ -1219,6 +1248,8 @@ def generate_localoperator_file(kernels, filename):
include_file('dune/codegen/common/timer.hh', filetag='operatorfile')
if get_option('use_likwid'):
operator_methods.append(RegisterLikwidMethod())
elif get_option('use_sde'):
operator_methods.append(RegisterSSCMarksMethod())
else:
operator_methods.append(TimerMethod())
elif get_option('opcounter'):
......
......@@ -9,6 +9,7 @@ from dune.codegen.pdelab.argument import (name_accumulation_variable,
name_coefficientcontainer,
type_coefficientcontainer,
name_applycontainer,
type_linearizationpointcontainer,
)
from dune.codegen.pdelab.spaces import (name_testfunctionspace,
type_testfunctionspace,
......@@ -293,8 +294,9 @@ def nonlinear_jacobian_apply_volume_templates():
lfsut = type_trialfunctionspace()
lfsvt = type_testfunctionspace()
cct = type_coefficientcontainer()
lpt = type_linearizationpointcontainer()
avt = type_accumulation_variable()
return (geot, lfsut, cct, cct, lfsvt, avt)
return (geot, lfsut, cct, lpt, lfsvt, avt)
def nonlinear_jacobian_apply_volume_args():
......@@ -312,8 +314,9 @@ def nonlinear_jacobian_apply_boundary_templates():
lfsut = type_trialfunctionspace()
lfsvt = type_testfunctionspace()
cct = type_coefficientcontainer()
lpt = type_linearizationpointcontainer()
avt = type_accumulation_variable()
return (geot, lfsut, cct, cct, lfsvt, avt)
return (geot, lfsut, cct, lpt, lfsvt, avt)
def nonlinear_jacobian_apply_boundary_args():
......@@ -331,8 +334,9 @@ def nonlinear_jacobian_apply_skeleton_templates():
lfsut = type_trialfunctionspace()
lfsvt = type_testfunctionspace()
cct = type_coefficientcontainer()
lpt = type_linearizationpointcontainer()
avt = type_accumulation_variable()
return (geot, lfsut, cct, cct, lfsvt, lfsut, cct, cct, lfsvt, avt, avt)
return (geot, lfsut, cct, lpt, lfsvt, lfsut, cct, lpt, lfsvt, avt, avt)
def nonlinear_jacobian_apply_skeleton_args():
......
......@@ -9,6 +9,7 @@ from dune.codegen.loopy.symbolic import FusedMultiplyAdd as FMA
from loopy.match import Writes
import pymbolic.primitives as prim
import numpy as np
import loopy as lp
import itertools as it
......@@ -145,11 +146,35 @@ def name_assembled_tensor(o, visitor):
@kernel_cached
def pymbolic_matrix_inverse(o, visitor):
expr = o.ufl_operands[0]
def code_generation_time_inversion(expr, visitor):
mat = np.ndarray(expr.ufl_shape)
for indices in it.product(*tuple(range(i) for i in expr.ufl_shape)):
visitor.indices = indices
val = visitor.call(expr.ufl_operands[0])
if not isinstance(val, (float, int)):
visitor.indices = None
return None
mat[indices] = val
visitor.indices = None
return np.linalg.inv(mat)
def pymbolic_matrix_inverse(o, visitor):
# Try to evaluate the matrix at code generation time.
# If this works (it does e.g. for Maxwell on structured grids)
# we can invert the matrix at code generation time!!!
indices = visitor.indices
visitor.indices = None
mat = code_generation_time_inversion(o, visitor)
if mat is not None:
return mat[indices]
# If code generation time inversion failed, we assemble it in C++
# and invert it there.
expr = o.ufl_operands[0]
name = name_assembled_tensor(expr, visitor)
if expr.shape[0] <= 3:
......@@ -160,9 +185,8 @@ def pymbolic_matrix_inverse(o, visitor):
depends_on=frozenset({lp.match.Writes(name),
lp.match.Tagged("sumfact_stage1"),
}),
tags=frozenset({"quad"}),
tags=frozenset({name}),
)
visitor.indices = indices
return prim.Variable(name)
......@@ -37,6 +37,7 @@ from ufl.classes import (Coefficient,
JacobianDeterminant,
)
from pytools import product as ptproduct
import pymbolic.primitives as prim
import numpy as np
......@@ -278,7 +279,10 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
#
def product(self, o):
return prim.flattened_product(tuple(self.call(op) for op in o.ufl_operands))
ops = tuple(self.call(op) for op in o.ufl_operands)
if all(isinstance(op, (int, float)) for op in ops):
return ptproduct(ops)
return prim.flattened_product(ops)
def float_value(self, o):
return o.value()
......@@ -290,7 +294,10 @@ class UFL2LoopyVisitor(ModifiedTerminalTracker):
return prim.quotient(self.call(o.ufl_operands[0]), self.call(o.ufl_operands[1]))
def sum(self, o):
return prim.flattened_sum(tuple(self.call(op) for op in o.ufl_operands))
ops = tuple(self.call(op) for op in o.ufl_operands)
if all(isinstance(op, (int, float)) for op in ops):
return sum(ops)
return prim.flattened_sum(ops)
def zero(self, o):
# UFL has Zeroes with shape. We ignore those indices.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment