diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py index 7455f1bb242fa042011f005e154e8088264ad47f..257da40169f578a4b64a367f6aaa4cd55729253d 100644 --- a/python/dune/codegen/options.py +++ b/python/dune/codegen/options.py @@ -55,6 +55,7 @@ class CodegenGlobalOptionsArray(ImmutableRecord): target_name = CodegenOption(default=None, helpstr="The target name from CMake") operator_to_build = CodegenOption(default=None, helpstr="The operators from the list that is about to be build now. CMake sets this one!!!") debug_interpolate_input = CodegenOption(default=False, helpstr="Should the input for printresidual and printmatix be interpolated (instead of random input).") + use_likwid = CodegenOption(default=False, helpstr="Use likwid instead of own performance measurements.") autotune_google_benchmark = CodegenOption(default=False, helpstr="Use google-benchmark library for autotuning (when autotuning is activated).") # Arguments that are mainly to be set by logic depending on other options diff --git a/python/dune/codegen/pdelab/driver/__init__.py b/python/dune/codegen/pdelab/driver/__init__.py index 9e94c6fb55cf9cf23a4a0bc89b069f48f296115f..c33d3acebfe27686f35055801f1282dc1d2b1450 100644 --- a/python/dune/codegen/pdelab/driver/__init__.py +++ b/python/dune/codegen/pdelab/driver/__init__.py @@ -269,10 +269,9 @@ def generate_driver(): compare_L2_squared() # Make sure that timestream is declared before retrieving chache items - if get_option("instrumentation_level") >= 1: - from dune.codegen.pdelab.driver.timings import setup_timer, name_timing_stream + if get_option("instrumentation_level") >= 1 and not get_option("use_likwid"): + from dune.codegen.pdelab.driver.timings import setup_timer setup_timer() - timestream = name_timing_stream() from dune.codegen.pdelab.driver.error import return_statement return_statement() @@ -304,10 +303,13 @@ def generate_driver(): add_section("instat", "Set up instationary stuff...") add_section("printing", "Maybe print residuals and matrices to stdout...") add_section("error", "Maybe calculate errors for test results...") + add_section("end", "Stuff that should happen at the end...") add_section("return_stmt", "Return statement...") - if get_option("instrumentation_level") >= 1: + if get_option("instrumentation_level") >= 1 and not get_option("use_likwid"): from dune.codegen.generation import post_include + from dune.codegen.pdelab.driver.timings import name_timing_stream + timestream = name_timing_stream() post_include("HP_DECLARE_TIMER(driver);\n", filetag="driver") contents.insert(0, Line(text="HP_TIMER_START(driver);\n")) contents.insert(len(contents) - 2, Line(text="HP_TIMER_STOP(driver);\n")) diff --git a/python/dune/codegen/pdelab/driver/timings.py b/python/dune/codegen/pdelab/driver/timings.py index c6241c852b54379a9c39588542aebdd81b520cd1..a617b64ba9c767750050152e0eba3e167bd5fb30 100644 --- a/python/dune/codegen/pdelab/driver/timings.py +++ b/python/dune/codegen/pdelab/driver/timings.py @@ -109,29 +109,62 @@ def name_jacobian(form_ident): return name +@preamble(section="init") +def init_likwid(): + return ["LIKWID_MARKER_INIT;", "LIKWID_MARKER_THREADINIT;"] + + +@preamble(section="end") +def finalize_likwid(): + return ["LIKWID_MARKER_CLOSE;"] + + @cached def setup_timer(): # TODO check that we are using YASP? - if get_option('opcounter'): - pre_include("#define ENABLE_COUNTER", filetag="driver") - pre_include("#define ENABLE_HP_TIMERS", filetag="driver") - include_file("dune/codegen/common/timer.hh", filetag="driver") + if get_option("use_likwid"): + pre_include("#define LIKWID_PERFMON", filetag="driver") + include_file("likwid.h", filetag="driver") + init_likwid() + finalize_likwid() + else: + from dune.codegen.loopy.target import type_floatingpoint + pre_include("#define HP_TIMER_OPCOUNTER {}".format(type_floatingpoint()), filetag="driver") + if get_option('opcounter'): + pre_include("#define ENABLE_COUNTER", filetag="driver") + pre_include("#define ENABLE_HP_TIMERS", filetag="driver") + include_file("dune/codegen/common/timer.hh", filetag="driver") + + + +@preamble(section="init") +def init_likwid_timer(region): + return ["LIKWID_MARKER_REGISTER(\"{}\");".format(region)] def init_region_timer(region): setup_timer() - from dune.codegen.generation import post_include - post_include("HP_DECLARE_TIMER({});".format(region), filetag="driver") + if get_option("use_likwid"): + init_likwid_timer(region) + else: + from dune.codegen.generation import post_include + post_include("HP_DECLARE_TIMER({});".format(region), filetag="driver") def start_region_timer(region): - return ["HP_TIMER_START({});".format(region)] + if get_option("use_likwid"): + return ["LIKWID_MARKER_START(\"{}\");".format(region)] + else: + return ["HP_TIMER_START({});".format(region)] def stop_region_timer(region): - timestream = name_timing_stream() - return ["HP_TIMER_STOP({});".format(region), - "DUMP_TIMER({}, {}, {}, true);".format(get_option("instrumentation_level"), region, timestream)] + if get_option("use_likwid"): + return ["LIKWID_MARKER_STOP(\"{}\");".format(region)] + else: + timestream = name_timing_stream() + return ["HP_TIMER_STOP({});".format(region), + "DUMP_TIMER({}, {}, {}, true);".format(get_option("instrumentation_level"), region, timestream)] def timed_region(region, actions): @@ -140,21 +173,24 @@ def timed_region(region, actions): assert(isinstance(actions, list)) - assembly = [] - print_times = [] + if get_option('instrumentation_level') >= 2: + assembly = [] + print_times = [] - init_region_timer(region) + init_region_timer(region) - if get_option('instrumentation_level') >= 3: - timestream = name_timing_stream() - lop_name = name_localoperator(get_form_ident()) - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) + if get_option('instrumentation_level') >= 3: + timestream = name_timing_stream() + lop_name = name_localoperator(get_form_ident()) + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) - assembly += start_region_timer(region) - assembly += actions - assembly += stop_region_timer(region) + assembly += start_region_timer(region) + assembly += actions + assembly += stop_region_timer(region) - return assembly + print_times + return assembly + print_times + else: + return actions @preamble(section="timings") @@ -165,10 +201,7 @@ def evaluate_residual_timer(): action = "{}.residual({}, {});".format(n_go, v, r) - if get_option('instrumentation_level') >= 2: - return timed_region("residual_evaluation", action) - else: - return action + return timed_region("residual_evaluation", action) @preamble(section="timings") @@ -185,10 +218,7 @@ def apply_jacobian_timer(): j1 = name_temporary_vector("j1", form) action = "{}.nonlinear_jacobian_apply({}, {}, {});".format(n_go, v, j0, j1) - if get_option('instrumentation_level') >= 2: - return timed_region("apply_jacobian", action) - else: - return action + return timed_region("apply_jacobian", action) @preamble(section="timings") @@ -199,7 +229,4 @@ def assemble_matrix_timer(): action = "{}.jacobian({},{});".format(n_go, v, m) - if get_option('instrumentation_level') >= 2: - return timed_region("matrix_assembly", action) - else: - return [action] + return timed_region("matrix_assembly", action)