Skip to content
Snippets Groups Projects
Commit b65b33a4 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Merge branch 'feature/measure-nonlinear-jacobian-apply' into 'master'

Feature/measure nonlinear jacobian apply

Closes #111 and #119

See merge request dominic/dune-perftool!244
parents ef26fe46 2f3f807d
No related branches found
No related tags found
No related merge requests found
Showing
with 111 additions and 2761 deletions
...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{for ...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{for
opcount_suffix = opcount, nonopcount | expand opcount opcount_suffix = opcount, nonopcount | expand opcount
{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude {opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude
dune-opcounter_FOUND, 1 | expand opcount | cmake_guard
# Calculate the size of the grid to equlibritate it to 100 MB/rank # Calculate the size of the grid to equlibritate it to 100 MB/rank
# Input parameters # Input parameters
......
...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{for ...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{for
opcount_suffix = opcount, nonopcount | expand opcount opcount_suffix = opcount, nonopcount | expand opcount
{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude {opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude
dune-opcounter_FOUND, 1 | expand opcount | cmake_guard
# Calculate the size of the grid to equlibritate it to 100 MB/rank # Calculate the size of the grid to equlibritate it to 100 MB/rank
# Input parameters # Input parameters
......
...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{for ...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{for
opcount_suffix = opcount, nonopcount | expand opcount opcount_suffix = opcount, nonopcount | expand opcount
{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude {opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude
dune-opcounter_FOUND, 1 | expand opcount | cmake_guard
# Calculate the size of the grid to equlibritate it to 100 MB/rank # Calculate the size of the grid to equlibritate it to 100 MB/rank
# Input parameters # Input parameters
......
...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{for ...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{for
opcount_suffix = opcount, nonopcount | expand opcount opcount_suffix = opcount, nonopcount | expand opcount
{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude {opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude
dune-opcounter_FOUND, 1 | expand opcount | cmake_guard
# Calculate the size of the grid to equlibritate it to 100 MB/rank # Calculate the size of the grid to equlibritate it to 100 MB/rank
# Input parameters # Input parameters
......
...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.v_degree}_{opcount_suffix}_level{f ...@@ -3,6 +3,7 @@ __exec_suffix = deg{formcompiler.ufl_variants.v_degree}_{opcount_suffix}_level{f
opcount_suffix = opcount, nonopcount | expand opcount opcount_suffix = opcount, nonopcount | expand opcount
{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude {opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude
dune-opcounter_FOUND, 1 | expand opcount | cmake_guard
# Calculate the size of the grid to equlibritate it to 100 MB/rank # Calculate the size of the grid to equlibritate it to 100 MB/rank
# Input parameters # Input parameters
......
...@@ -96,7 +96,7 @@ function(dune_add_formcompiler_system_test) ...@@ -96,7 +96,7 @@ function(dune_add_formcompiler_system_test)
) )
set_tests_properties(${tname} PROPERTIES SKIP_RETURN_CODE 77) set_tests_properties(${tname} PROPERTIES SKIP_RETURN_CODE 77)
set_tests_properties(${tname} PROPERTIES TIMEOUT 60) set_tests_properties(${tname} PROPERTIES TIMEOUT 120)
endif() endif()
endforeach() endforeach()
endfunction() endfunction()
...@@ -8,3 +8,4 @@ Version: 0.0 ...@@ -8,3 +8,4 @@ Version: 0.0
Maintainer: dominic.kempf@iwr.uni-heidelberg.de Maintainer: dominic.kempf@iwr.uni-heidelberg.de
#depending on #depending on
Depends: dune-testtools dune-pdelab dune-alugrid Depends: dune-testtools dune-pdelab dune-alugrid
Suggests: dune-opcounter
install(FILES muladd_workarounds.hh install(FILES muladd_workarounds.hh
opcounter.hh
timer.hh timer.hh
timer_tsc.hh
timer_chrono.hh
tsc.hh tsc.hh
vectorclass.hh vectorclass.hh
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dune/perftool/common DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dune/perftool/common
......
...@@ -3,36 +3,14 @@ ...@@ -3,36 +3,14 @@
/* We are currently having some issues with FMA nodes not being /* We are currently having some issues with FMA nodes not being
* eliminated correctly upon code generation. We "solve" the problem * eliminated correctly upon code generation. We "solve" the problem
* for now with overloads of the mul_add function for scalars. * for now with a generic implementation of the mul_add function.
*/ */
#include<dune/perftool/common/opcounter.hh>
template<typename T>
inline double mul_add(double op1, double& op2, double op3) inline T mul_add(T op1, T op2, T op3)
{
return op1 * op2 + op3;
}
inline float mul_add(float op1, float& op2, float op3)
{ {
return op1 * op2 + op3; return op1 * op2 + op3;
} }
#ifdef ENABLE_COUNTER
oc::OpCounter<double> mul_add(oc::OpCounter<double> op1, oc::OpCounter<double>& op2, oc::OpCounter<double> op3)
{
return op1 * op2 + op3;
}
oc::OpCounter<float> mul_add(oc::OpCounter<float> op1, oc::OpCounter<float>& op2, oc::OpCounter<float> op3)
{
return op1 * op2 + op3;
}
#endif
#endif #endif
This diff is collapsed.
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include <chrono> #include <chrono>
#include <dune/perftool/common/opcounter.hh> #include <dune/opcounter/opcounter.hh>
#define HP_TIMER_OPCOUNTER oc::OpCounter<double> #define HP_TIMER_OPCOUNTER oc::OpCounter<double>
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#endif #endif
#include <dune/perftool/common/tsc.hh> #include <dune/perftool/common/tsc.hh>
#include <dune/perftool/common/opcounter.hh> #include <dune/opcounter/opcounter.hh>
#define HP_TIMER_DURATION(name) __hp_timer_##name##_duration #define HP_TIMER_DURATION(name) __hp_timer_##name##_duration
#define HP_TIMER_STARTTIME(name) __hp_timer_##name##_start #define HP_TIMER_STARTTIME(name) __hp_timer_##name##_start
...@@ -76,16 +76,22 @@ ...@@ -76,16 +76,22 @@
#ifdef ENABLE_COUNTER #ifdef ENABLE_COUNTER
#define DUMP_TIMER(level,name,os,reset)\ #define DUMP_TIMER(level,name,os,reset)\
if (HP_TIMER_DURATION(name) > 1e-12) \ { \
os << #level << " " << ident << " " << #name << " time " << Dune::PDELab::TSC::seconds(HP_TIMER_DURATION(name)) << std::endl; \ std::string prefix = std::string(#level) + " " + ident + " " + std::string(#name); \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,#level,ident,#name,reset); if (HP_TIMER_DURATION(name) > 1e-12) \
os << prefix << " time " << Dune::PDELab::TSC::seconds(HP_TIMER_DURATION(name)) << std::endl; \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,prefix,reset); \
}
#define DUMP_AND_ACCUMULATE_TIMER(level,name,os,reset,time,ops) \ #define DUMP_AND_ACCUMULATE_TIMER(level,name,os,reset,time,ops) \
if (HP_TIMER_DURATION(name) > 1e-12) \ { \
os << #level << " " << ident << " " << #name << " time " << Dune::PDELab::TSC::seconds(HP_TIMER_DURATION(name)) << std::endl; \ std::string prefix = std::string(#level) + " " + ident + " " + std::string(#name); \
time += HP_TIMER_DURATION(name); \ if (HP_TIMER_DURATION(name) > 1e-12) \
ops += HP_TIMER_OPCOUNTERS(name); \ os << prefix << " time " << Dune::PDELab::TSC::seconds(HP_TIMER_DURATION(name)) << std::endl; \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,#level,ident,#name,reset); time += HP_TIMER_DURATION(name); \
ops += HP_TIMER_OPCOUNTERS(name); \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,prefix,reset); \
}
#elif defined ENABLE_HP_TIMERS #elif defined ENABLE_HP_TIMERS
......
This diff is collapsed.
...@@ -30,7 +30,8 @@ import cgen ...@@ -30,7 +30,8 @@ import cgen
def _type_to_op_counter_type(name): def _type_to_op_counter_type(name):
return "oc::OpCounter<{}>".format(name) include_file("dune/opcounter/opcounter.hh")
return "OpCounter::OpCounter<{}>".format(name)
def dtype_floatingpoint(): def dtype_floatingpoint():
...@@ -63,6 +64,10 @@ def type_floatingpoint(): ...@@ -63,6 +64,10 @@ def type_floatingpoint():
return numpy_to_cpp_dtype(NumpyType(dtype).dtype.name) return numpy_to_cpp_dtype(NumpyType(dtype).dtype.name)
def type_context_floatingpoint():
return {np.float32: 'f', np.float64: 'd'}.get(dtype_floatingpoint())
class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper): class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper):
def map_subscript(self, expr, type_context): def map_subscript(self, expr, type_context):
arr = self.find_array(expr) arr = self.find_array(expr)
...@@ -88,6 +93,10 @@ class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper): ...@@ -88,6 +93,10 @@ class DuneExpressionToCExpressionMapper(ExpressionToCExpressionMapper):
return expr return expr
def map_constant(self, expr, type_context): def map_constant(self, expr, type_context):
# We correct the type context to force all floating point literals to be of
# the type that we use throughout the computation.
if type_context in ("f", "d"):
type_context = type_context_floatingpoint()
ret = ExpressionToCExpressionMapper.map_constant(self, expr, type_context) ret = ExpressionToCExpressionMapper.map_constant(self, expr, type_context)
if get_option('opcounter'): if get_option('opcounter'):
if type_context in ("f", "d"): if type_context in ("f", "d"):
......
...@@ -22,7 +22,7 @@ def _union(a): ...@@ -22,7 +22,7 @@ def _union(a):
return frozenset.union(*a) return frozenset.union(*a)
def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', operator=False): def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', operator=False, depends_on=frozenset()):
""" Transform loopy kernel to contain instrumentation code """ Transform loopy kernel to contain instrumentation code
Arguments: Arguments:
...@@ -32,6 +32,9 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o ...@@ -32,6 +32,9 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
identifier : The name of the counter to start and stop identifier : The name of the counter to start and stop
level : The instrumentation level this measurement is defined at level : The instrumentation level this measurement is defined at
filetag : The tag of the file that should contain the counter definitions filetag : The tag of the file that should contain the counter definitions
depends_on: Additional dependencies to add to the start instruction. This is used to correct
currently wrong behaviour of the transformation in cases where a lot of structure
of the instrumentation is known a priori.
""" """
# If the instrumentation level is not high enough, this is a no-op # If the instrumentation level is not high enough, this is a no-op
if level > get_option("instrumentation_level"): if level > get_option("instrumentation_level"):
...@@ -53,6 +56,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o ...@@ -53,6 +56,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
insn_inames = _intersect(tuple(i.within_inames for i in insns)) insn_inames = _intersect(tuple(i.within_inames for i in insns))
other_inames = _union(tuple(i.within_inames for i in lp.find_instructions(knl, lp.match.Not(match)))) other_inames = _union(tuple(i.within_inames for i in lp.find_instructions(knl, lp.match.Not(match))))
within = _intersect((insn_inames, other_inames)) within = _intersect((insn_inames, other_inames))
uniontags = _intersect(tuple(i.tags for i in insns))
# Get a unique identifer - note that the same timer could be started and stopped several times # Get a unique identifer - note that the same timer could be started and stopped several times
# within one kernel... # within one kernel...
...@@ -67,8 +71,9 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o ...@@ -67,8 +71,9 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
"HP_TIMER_START({});".format(identifier), "HP_TIMER_START({});".format(identifier),
id=start_id, id=start_id,
within_inames=within, within_inames=within,
depends_on=start_depends, depends_on=depends_on.union(start_depends),
boostable_into=frozenset(), boostable_into=frozenset(),
tags=uniontags,
) )
# Add dependencies on the timing instructions # Add dependencies on the timing instructions
...@@ -82,6 +87,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o ...@@ -82,6 +87,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
within_inames=within, within_inames=within,
depends_on=frozenset(i.id for i in insns), depends_on=frozenset(i.id for i in insns),
boostable_into=frozenset(), boostable_into=frozenset(),
tags=uniontags,
) )
# Find all the instructions that should depend on stop # Find all the instructions that should depend on stop
...@@ -98,4 +104,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o ...@@ -98,4 +104,7 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o
other_insns = list(filter(lambda i: i.id not in [j.id for j in rewritten_insns], knl.instructions)) other_insns = list(filter(lambda i: i.id not in [j.id for j in rewritten_insns], knl.instructions))
# Add all the modified instructions into the kernel object # Add all the modified instructions into the kernel object
return knl.copy(instructions=rewritten_insns + other_insns + [start_insn, stop_insn]) knl = knl.copy(instructions=rewritten_insns + other_insns + [start_insn, stop_insn])
from loopy.kernel.creation import resolve_dependencies
return resolve_dependencies(knl)
...@@ -161,7 +161,7 @@ def accumulate_L2_squared(): ...@@ -161,7 +161,7 @@ def accumulate_L2_squared():
@preamble(section="error") @preamble(section="error")
def define_accumulated_L2_error(name): def define_accumulated_L2_error(name):
t = type_range() t = type_range()
return "{} {}(0.0);".format(t, name) return "Dune::FieldVector<{}, 1> {}(0.0);".format(t, name)
def name_accumulated_L2_error(): def name_accumulated_L2_error():
...@@ -182,7 +182,7 @@ def compare_L2_squared(): ...@@ -182,7 +182,7 @@ def compare_L2_squared():
"if ({}.comm().rank() == 0){{".format(gv), "if ({}.comm().rank() == 0){{".format(gv),
" std::cout << \"\\nl2errorsquared: \" << {} << std::endl << std::endl;".format(accum_error), " std::cout << \"\\nl2errorsquared: \" << {} << std::endl << std::endl;".format(accum_error),
"}", "}",
"if (isnan({0}) or abs({0})>{1})".format(accum_error, get_option("compare_l2errorsquared")), "if (isnan({0}[0]) or abs({0}[0])>{1})".format(accum_error, get_option("compare_l2errorsquared")),
" {} = true;".format(fail)] " {} = true;".format(fail)]
......
...@@ -8,6 +8,7 @@ from dune.perftool.generation import (cached, ...@@ -8,6 +8,7 @@ from dune.perftool.generation import (cached,
preamble, preamble,
) )
from dune.perftool.pdelab.driver import (get_form_ident, from dune.perftool.pdelab.driver import (get_form_ident,
is_linear,
name_initree, name_initree,
name_mpihelper, name_mpihelper,
) )
...@@ -131,18 +132,19 @@ def apply_jacobian_timer(): ...@@ -131,18 +132,19 @@ def apply_jacobian_timer():
if get_option('instrumentation_level') >= 3: if get_option('instrumentation_level') >= 3:
print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier()))
if get_option('instrumentation_level') >= 2: if is_linear():
evaluation = ["HP_TIMER_START(apply_jacobian);", declaration = ["{} j({});".format(t_v, v), "j=0.0;"]
"{}.jacobian_apply({}, j);".format(n_go, v),
"HP_TIMER_STOP(apply_jacobian);",
"DUMP_TIMER({}, apply_jacobian, {}, true);".format(get_option("instrumentation_level"), timestream)]
evaluation.extend(print_times)
else:
evaluation = ["{}.jacobian_apply({}, j);".format(n_go, v)] evaluation = ["{}.jacobian_apply({}, j);".format(n_go, v)]
else:
declaration = ["{} j0({});".format(t_v, v), "j0=0.0;",
"{} j1({});".format(t_v, v), "j1=0.0;"]
evaluation = ["{}.nonlinear_jacobian_apply({}, j0, j1);".format(n_go, v)]
evaluation = ["{} j({});".format(t_v, v), "j=0.0;"] + evaluation if get_option('instrumentation_level') >= 2:
evaluation = ["HP_TIMER_START(apply_jacobian);"] + evaluation + ["HP_TIMER_STOP(apply_jacobian);", "DUMP_TIMER({}, apply_jacobian, {}, true);".format(get_option("instrumentation_level"), timestream)]
evaluation.extend(print_times)
return evaluation return declaration + evaluation
@preamble(section="timings") @preamble(section="timings")
......
...@@ -4,11 +4,19 @@ from dune.perftool.ufl.visitor import UFL2LoopyVisitor ...@@ -4,11 +4,19 @@ from dune.perftool.ufl.visitor import UFL2LoopyVisitor
import pymbolic.primitives as prim import pymbolic.primitives as prim
@preamble @preamble(section="init")
def driver_using_statement(what): def driver_using_statement(what):
return "using {};".format(what) return "using {};".format(what)
@preamble(section="gridoperator")
def set_lop_to_starting_time():
from dune.perftool.pdelab.driver import get_form_ident
from dune.perftool.pdelab.driver.gridoperator import name_localoperator
lop = name_localoperator(get_form_ident())
return "{}.setTime(0.0);".format(lop)
class DriverUFL2PymbolicVisitor(UFL2LoopyVisitor): class DriverUFL2PymbolicVisitor(UFL2LoopyVisitor):
def __init__(self): def __init__(self):
from dune.perftool.pdelab import PDELabInterface from dune.perftool.pdelab import PDELabInterface
...@@ -33,6 +41,7 @@ class DriverUFL2PymbolicVisitor(UFL2LoopyVisitor): ...@@ -33,6 +41,7 @@ class DriverUFL2PymbolicVisitor(UFL2LoopyVisitor):
from dune.perftool.pdelab.driver import get_form_ident from dune.perftool.pdelab.driver import get_form_ident
from dune.perftool.pdelab.driver.gridoperator import name_localoperator from dune.perftool.pdelab.driver.gridoperator import name_localoperator
lop = name_localoperator(get_form_ident()) lop = name_localoperator(get_form_ident())
set_lop_to_starting_time()
return prim.Call(prim.Variable("{}.getTime".format(lop)), ()) return prim.Call(prim.Variable("{}.getTime".format(lop)), ())
else: else:
return UFL2LoopyVisitor.coefficient(self, o) return UFL2LoopyVisitor.coefficient(self, o)
......
...@@ -572,8 +572,8 @@ def extract_kernel_from_cache(tag, name, signature, wrap_in_cgen=True, add_timin ...@@ -572,8 +572,8 @@ def extract_kernel_from_cache(tag, name, signature, wrap_in_cgen=True, add_timin
if add_timings and get_form_option("sumfact"): if add_timings and get_form_option("sumfact"):
from dune.perftool.pdelab.signatures import assembler_routine_name from dune.perftool.pdelab.signatures import assembler_routine_name
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage1"), "{}_kernel_stage1".format(assembler_routine_name()), 4) kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage1"), "{}_kernel_stage1".format(assembler_routine_name()), 4)
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage2"), "{}_kernel_quadratureloop".format(assembler_routine_name()), 4) kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage2"), "{}_kernel_quadratureloop".format(assembler_routine_name()), 4, depends_on=frozenset({lp.match.Tagged("sumfact_stage1")}))
kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage3"), "{}_kernel_stage3".format(assembler_routine_name()), 4) kernel = add_instrumentation(kernel, lp.match.Tagged("sumfact_stage3"), "{}_kernel_stage3".format(assembler_routine_name()), 4, depends_on=frozenset({lp.match.Tagged("sumfact_stage2")}))
if wrap_in_cgen: if wrap_in_cgen:
# Wrap the kernel in something which can generate code # Wrap the kernel in something which can generate code
......
...@@ -44,10 +44,12 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl ...@@ -44,10 +44,12 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl
) )
# 8. Poisson with operator counting # 8. Poisson with operator counting
dune_add_formcompiler_system_test(UFLFILE opcount_poisson_dg.ufl if(dune-opcounter_FOUND)
BASENAME opcount_poisson_dg_symdiff dune_add_formcompiler_system_test(UFLFILE opcount_poisson_dg.ufl
INIFILE opcount_poisson_dg_symdiff.mini BASENAME opcount_poisson_dg_symdiff
) INIFILE opcount_poisson_dg_symdiff.mini
)
endif()
# 9. Poisson Test Case: DG quadrilaterals # 9. Poisson Test Case: DG quadrilaterals
dune_add_formcompiler_system_test(UFLFILE poisson_dg_quadrilateral.ufl dune_add_formcompiler_system_test(UFLFILE poisson_dg_quadrilateral.ufl
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment