diff --git a/cmake/modules/DuneCodegenMacros.cmake b/cmake/modules/DuneCodegenMacros.cmake index 73ab6a5b787217c324f1ae83a4320e2a26b4770b..61713109b32cc1af4716b0f9f133879592fec49c 100644 --- a/cmake/modules/DuneCodegenMacros.cmake +++ b/cmake/modules/DuneCodegenMacros.cmake @@ -81,6 +81,10 @@ find_package(benchmark) +if (DUNE_CODEGEN_PROFILING) + find_package(likwid) +endif() + add_custom_target(generation) # Gather a list of form compiler sources to add as dependencies diff --git a/cmake/modules/Findlikwid.cmake b/cmake/modules/Findlikwid.cmake new file mode 100644 index 0000000000000000000000000000000000000000..778901280ee0b778bd7131cf1fcee4b3181557f3 --- /dev/null +++ b/cmake/modules/Findlikwid.cmake @@ -0,0 +1,104 @@ +# .. cmake_module:: +# +# Module that checks whether likwid is available and usable. +# +# Variables used by this module which you may want to set: +# +# :ref:`likwid_ROOT` +# Path list to search for likwid. +# +# Sets the following variables: +# +# :code:`likwid_FOUND` +# True if likwid available. +# +# :code:`likwid_INCLUDE_DIRS` +# Path to the likwid include directories. +# +# +# :code:`likwid_LIBRARIES` +# Link against these libraries to use likwid. +# +# .. cmake_variable:: likwid_ROOT +# +# You may set this variable to have :ref:`Findlikwid` look +# for the likwid package in the given paths before inspecting +# system paths. +# +find_path(LIKWID_INCLUDE_DIR + NAMES "likwid.h" + PATHS ${likwid_ROOT} + PATH_SUFFIXES "include" "include/likwid" + NO_DEFAULT_PATH) +find_path(LIKWID_INCLUDE_DIR + NAMES "likwid.h" + PATH_SUFFIXES "include" "include/likwid") + +find_library(LIKWID_LIBRARY + NAMES "likwid" + PATHS ${likwid_ROOT} + PATH_SUFFIXES "lib" "lib32" "lib64" + NO_DEFAULT_PATH) +find_library(LIKWID_LIBRARY + NAMES "likwid" + PATH_SUFFIXES "lib" "lib32" "lib64") + +include(CMakePushCheckState) +cmake_push_check_state() + +if(LIKWID_INCLUDE_DIR) + set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${LIKWID_INCLUDE_DIR}) +endif() +if(LIKWID_LIBRARY) + set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} ${LIKWID_LIBRARY}) +endif() + +cmake_pop_check_state() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + "likwid" + DEFAULT_MSG + LIKWID_INCLUDE_DIR + LIKWID_LIBRARY +) + +mark_as_advanced(LIKWID_INCLUDE_DIR LIKWID_LIBRARY) + +# if headers are found, store results +if(likwid_FOUND) + set(likwid_INCLUDE_DIRS ${LIKWID_INCLUDE_DIR}) + set(likwid_LIBRARIES ${LIKWID_LIBRARY}) + # log result + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log + "Determing location of likwid succeeded:\n" + "Include directory: ${likwid_INCLUDE_DIRS}\n" + "Libraries to link against: ${likwid_LIBRARIES}\n\n") + + set(likwid_DUNE_COMPILE_FLAGS "-I${likwid_INCLUDE_DIRS}" + CACHE STRING "Compile Flags used by DUNE when compiling with likwid programs") + set(likwid_DUNE_LIBRARIES ${likwid_LIBRARIES} + CACHE STRING "Libraries used by DUNE when linking likwid programs") +else() + # log errornous result + file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeError.log + "Determing location of likwid failed:\n" + "Include directory: ${likwid_INCLUDE_DIRS}\n" + "Libraries to link against: ${likwid_LIBRARIES}\n\n") +endif() + +# set HAVE_LIKWID for config.h +set(HAVE_LIKWID ${likwid_FOUND}) + + +# register all likwid related flags +if(likwid_FOUND) + dune_register_package_flags(COMPILE_DEFINITIONS "ENABLE_LIKWID=1" + LIBRARIES "${likwid_LIBRARIES}" + INCLUDE_DIRS "${likwid_INCLUDE_DIRS}") +endif() + +# text for feature summary +set_package_properties("LIKWID" PROPERTIES + DESCRIPTION "likwid" + PURPOSE "Performance monitoring and benchmarking suite.") \ No newline at end of file diff --git a/python/dune/codegen/generation/__init__.py b/python/dune/codegen/generation/__init__.py index d0cf1d4dc8b6880db13bfdec4458d815ea8208c2..bed0256407b7259bab61b6e932c4a17761097e75 100644 --- a/python/dune/codegen/generation/__init__.py +++ b/python/dune/codegen/generation/__init__.py @@ -16,6 +16,7 @@ from dune.codegen.generation.cpp import (base_class, class_member, constructor_parameter, dump_accumulate_timer, + register_liwkid_timer, end_of_file, include_file, initializer_list, diff --git a/python/dune/codegen/generation/cpp.py b/python/dune/codegen/generation/cpp.py index 29384f98554ab895d37670c017fe5ecc4f191655..b918291067f45c5f988bc8fdcea55651d538a9db 100644 --- a/python/dune/codegen/generation/cpp.py +++ b/python/dune/codegen/generation/cpp.py @@ -50,3 +50,8 @@ def dump_accumulate_timer(name): code = "DUMP_TIMER({},{},{},{});".format(get_option("instrumentation_level"), name, os, reset) return code + + +@generator_factory(item_tags=("register_likwid_timers",)) +def register_liwkid_timer(name): + return "LIKWID_MARKER_REGISTER(\"{}\");".format(name) diff --git a/python/dune/codegen/loopy/transformations/instrumentation.py b/python/dune/codegen/loopy/transformations/instrumentation.py index 7b13a09e597490dd1bef85344c9be6bdfb859dd3..2fab53a6215a15f0e06e7d42a18f4736b46da34a 100644 --- a/python/dune/codegen/loopy/transformations/instrumentation.py +++ b/python/dune/codegen/loopy/transformations/instrumentation.py @@ -1,9 +1,11 @@ """ Add instrumentation instructions to a kernel """ from dune.codegen.generation import (dump_accumulate_timer, + register_liwkid_timer, post_include, ) from dune.codegen.options import get_option +from dune.codegen.pdelab.driver.timings import start_region_timer_instruction, stop_region_timer_instruction import loopy as lp @@ -67,28 +69,25 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o # Define the start instruction and correct dependencies for it start_id = "{}_start".format(ident) start_depends = _union(tuple(i.depends_on for i in insns)).difference(frozenset(i.id for i in insns)) - start_insn = lp.CInstruction([], - "HP_TIMER_START({});".format(identifier), - id=start_id, - within_inames=within, - depends_on=depends_on.union(start_depends), - boostable_into=frozenset(), - tags=uniontags, - ) + start_insn = start_region_timer_instruction(identifier, + id=start_id, + within_inames=within, + depends_on=depends_on.union(start_depends), + boostable_into=frozenset(), + tags=uniontags,) # Add dependencies on the timing instructions rewritten_insns.extend([i.copy(depends_on=i.depends_on.union(frozenset({start_id}))) for i in insns]) # Define the stop instruction and correct dependencies for it stop_id = "{}_stop".format(ident) - stop_insn = lp.CInstruction([], - "HP_TIMER_STOP({});".format(identifier), - id=stop_id, - within_inames=within, - depends_on=frozenset(i.id for i in insns), - boostable_into=frozenset(), - tags=uniontags, - ) + stop_insn = stop_region_timer_instruction(identifier, + id=stop_id, + within_inames=within, + depends_on=frozenset(i.id for i in insns), + boostable_into=frozenset(), + tags=uniontags, + ) # Find all the instructions that should depend on stop dep_insns = filter(lambda i: _intersect((i.depends_on, frozenset(i.id for i in insns))), @@ -97,8 +96,11 @@ def add_instrumentation(knl, match, identifier, level, filetag='operatorfile', o rewritten_insns.extend([i.copy(depends_on=i.depends_on.union(frozenset({stop_id}))) for i in dep_insns]) # Trigger code generation on the file/operator level - post_include('HP_DECLARE_TIMER({});'.format(identifier), filetag=filetag) - dump_accumulate_timer(identifier) + if get_option("use_likwid"): + register_liwkid_timer(identifier) + else: + post_include('HP_DECLARE_TIMER({});'.format(identifier), filetag=filetag) + dump_accumulate_timer(identifier) # Filter all the instructions which were untouched other_insns = list(filter(lambda i: i.id not in [j.id for j in rewritten_insns], knl.instructions)) diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py index 7455f1bb242fa042011f005e154e8088264ad47f..257da40169f578a4b64a367f6aaa4cd55729253d 100644 --- a/python/dune/codegen/options.py +++ b/python/dune/codegen/options.py @@ -55,6 +55,7 @@ class CodegenGlobalOptionsArray(ImmutableRecord): target_name = CodegenOption(default=None, helpstr="The target name from CMake") operator_to_build = CodegenOption(default=None, helpstr="The operators from the list that is about to be build now. CMake sets this one!!!") debug_interpolate_input = CodegenOption(default=False, helpstr="Should the input for printresidual and printmatix be interpolated (instead of random input).") + use_likwid = CodegenOption(default=False, helpstr="Use likwid instead of own performance measurements.") autotune_google_benchmark = CodegenOption(default=False, helpstr="Use google-benchmark library for autotuning (when autotuning is activated).") # Arguments that are mainly to be set by logic depending on other options diff --git a/python/dune/codegen/pdelab/driver/__init__.py b/python/dune/codegen/pdelab/driver/__init__.py index 6526dcbf85cfb60d663a3f0193dde07351a361bd..b60544c1c78242f1490c76c46d4be6a2c4448501 100644 --- a/python/dune/codegen/pdelab/driver/__init__.py +++ b/python/dune/codegen/pdelab/driver/__init__.py @@ -270,15 +270,14 @@ def generate_driver(): # Make sure that timestream is declared before retrieving chache items if get_option("instrumentation_level") >= 1: - from dune.codegen.pdelab.driver.timings import setup_timer, name_timing_stream + from dune.codegen.pdelab.driver.timings import setup_timer setup_timer() - timestream = name_timing_stream() from dune.codegen.pdelab.driver.error import return_statement return_statement() from dune.codegen.generation import retrieve_cache_items - from cgen import FunctionDeclaration, FunctionBody, Block, Value, LineComment, Line + from cgen import FunctionDeclaration, FunctionBody, Block, Value, LineComment, Line, Generable driver_signature = FunctionDeclaration(Value('int', 'main'), [Value('int', 'argc'), Value('char**', 'argv')]) contents = [] @@ -292,6 +291,11 @@ def generate_driver(): contents.append(Line("\n")) add_section("init", "Initialize basic stuff...") + + if get_option("instrumentation_level") >= 1: + init_contents = contents + contents = [] + add_section("grid", "Setup grid (view)...") add_section("fem", "Set up finite element maps...") add_section("gfs", "Set up grid function spaces...") @@ -306,13 +310,14 @@ def generate_driver(): add_section("error", "Maybe calculate errors for test results...") if get_option("instrumentation_level") >= 1: - from dune.codegen.generation import post_include - post_include("HP_DECLARE_TIMER(driver);\n", filetag="driver") - contents.insert(0, Line(text="HP_TIMER_START(driver);\n")) - contents.insert(len(contents) - 1, Line(text="HP_TIMER_STOP(driver);\n")) - contents.insert(len(contents) - 1, Line(text="DUMP_TIMER({}, driver, {}, true);\n".format(get_option("instrumentation_level"), timestream))) - contents.insert(0, Line(text="\n")) - driver_body = Block(contents) + from dune.codegen.pdelab.driver.timings import timed_region + contents = init_contents + timed_region('driver', contents) + + add_section("end", "Stuff that should happen at the end...") + add_section("return_stmt", "Return statement...") + + contents.insert(0, "\n") + driver_body = Block([c if isinstance(c, Generable) else Line(c + '\n') for c in contents]) # Wrap a try/catch block around the driver body from dune.codegen.cgen import CatchBlock, TryCatchBlock, Value, Block, Line diff --git a/python/dune/codegen/pdelab/driver/error.py b/python/dune/codegen/pdelab/driver/error.py index cf9fe42933ca41f2696b39324bb4f443c35d9003..02207b4986dccd7a652c1943402243e755a8c681 100644 --- a/python/dune/codegen/pdelab/driver/error.py +++ b/python/dune/codegen/pdelab/driver/error.py @@ -186,8 +186,7 @@ def compare_L2_squared(): " {} = true;".format(fail)] -@preamble(section="error") +@preamble(section="return_stmt") def return_statement(): - from dune.codegen.pdelab.driver.error import name_test_fail_variable fail = name_test_fail_variable() return "return {};".format(fail) diff --git a/python/dune/codegen/pdelab/driver/solve.py b/python/dune/codegen/pdelab/driver/solve.py index 79dfac051c66c2faa50c4e262eb419ed30b5fde1..4a6a3c9e7e235ee5368927fc6f97ac817ffde5f5 100644 --- a/python/dune/codegen/pdelab/driver/solve.py +++ b/python/dune/codegen/pdelab/driver/solve.py @@ -57,23 +57,8 @@ def dune_solve(): if get_form_option("generate_jacobians"): print_matrix() - if get_option('instrumentation_level') >= 2: - from dune.codegen.pdelab.driver.timings import setup_timer, name_timing_stream, name_timing_identifier - timestream = name_timing_stream() - setup_timer() - from dune.codegen.generation import post_include - post_include("HP_DECLARE_TIMER(solve);", filetag="driver") - - solve = ["HP_TIMER_START(solve);", - "{}".format(solve), - "HP_TIMER_STOP(solve);", - "DUMP_TIMER({}, solve, {}, true);".format(get_option("instrumentation_level"), timestream), - ] - - if get_option('instrumentation_level') >= 3: - from dune.codegen.pdelab.driver.gridoperator import name_localoperator - lop_name = name_localoperator(form_ident) - solve.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) + from dune.codegen.pdelab.driver.timings import timed_region + solve = timed_region('solve', solve) return solve diff --git a/python/dune/codegen/pdelab/driver/timings.py b/python/dune/codegen/pdelab/driver/timings.py index 714f263a353c5a8a3a6b3b83dbc651ffd3401961..4a3d265fefd460bbee0218630bc40d0bc57a22b9 100644 --- a/python/dune/codegen/pdelab/driver/timings.py +++ b/python/dune/codegen/pdelab/driver/timings.py @@ -1,12 +1,11 @@ """ Timing related generator functions """ -from dune.codegen.options import get_option from dune.codegen.generation import (cached, include_file, pre_include, - post_include, preamble, ) +from dune.codegen.options import get_option from dune.codegen.pdelab.driver import (get_form_ident, is_linear, name_initree, @@ -21,7 +20,7 @@ from dune.codegen.pdelab.driver.gridoperator import (name_gridoperator, type_gridoperator, ) from dune.codegen.pdelab.driver.solve import (name_vector, - type_vector, + define_vector, ) @@ -90,109 +89,169 @@ def name_timing_stream(): return name +def name_temporary_vector(name, form): + name = "{}_{}".format(name, form) + define_vector(name, form) + return name + + +@preamble(section="timings") +def define_jacobian(name, form_ident): + t_go = type_gridoperator(form_ident) + n_go = name_gridoperator(form_ident) + return ["using M_{} = typename {}::Traits::Jacobian;".format(form_ident, t_go), + "M_{} {}({});".format(form_ident, name, n_go)] + + +def name_jacobian(form_ident): + name = "J_{}".format(form_ident) + define_jacobian(name, form_ident) + return name + + +@preamble(section="init") +def init_likwid(): + return ["LIKWID_MARKER_INIT;", "LIKWID_MARKER_THREADINIT;"] + + +@preamble(section="end") +def finalize_likwid(): + return ["LIKWID_MARKER_CLOSE;"] + + +@preamble(section="timings") +def local_operator_likwid(): + lop_name = name_localoperator(get_form_ident()) + return "{}.register_likwid_timers();".format(lop_name) + + @cached def setup_timer(): # TODO check that we are using YASP? - if get_option('opcounter'): - pre_include("#define ENABLE_COUNTER", filetag="driver") - pre_include("#define ENABLE_HP_TIMERS", filetag="driver") - include_file("dune/codegen/common/timer.hh", filetag="driver") + if get_option("use_likwid"): + pre_include("#define LIKWID_PERFMON", filetag="driver") + include_file("likwid.h", filetag="driver") + init_likwid() + if get_option('instrumentation_level') >= 3: + local_operator_likwid() + finalize_likwid() + else: + from dune.codegen.loopy.target import type_floatingpoint + pre_include("#define HP_TIMER_OPCOUNTER {}".format(type_floatingpoint()), filetag="driver") + if get_option('opcounter'): + pre_include("#define ENABLE_COUNTER", filetag="driver") + pre_include("#define ENABLE_HP_TIMERS", filetag="driver") + include_file("dune/codegen/common/timer.hh", filetag="driver") -@preamble(section="timings") -def evaluate_residual_timer(): - n_go = name_gridoperator(get_form_ident()) - v = name_vector(get_form_ident()) - t_v = type_vector(get_form_ident()) - setup_timer() +@preamble(section="init") +def init_likwid_timer(region): + return ["LIKWID_MARKER_REGISTER(\"{}\");".format(region)] - if get_option('instrumentation_level') >= 2: - # Write back times + +def init_region_timer(region): + setup_timer() + if get_option("use_likwid"): + init_likwid_timer(region) + else: from dune.codegen.generation import post_include - post_include("HP_DECLARE_TIMER(residual_evaluation);", filetag="driver") + post_include("HP_DECLARE_TIMER({});".format(region), filetag="driver") + + +def start_region_timer(region): + if get_option("use_likwid"): + return ["LIKWID_MARKER_START(\"{}\");".format(region)] + else: + return ["HP_TIMER_START({});".format(region)] + + +def stop_region_timer(region): + if get_option("use_likwid"): + return ["LIKWID_MARKER_STOP(\"{}\");".format(region)] + else: timestream = name_timing_stream() - print_times = [] + return ["HP_TIMER_STOP({});".format(region), + "DUMP_TIMER({}, {}, {}, true);".format(get_option("instrumentation_level"), region, timestream)] - lop_name = name_localoperator(get_form_ident()) - if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) - if get_option('instrumentation_level') >= 2: - evaluation = ["HP_TIMER_START(residual_evaluation);", - "{}.residual({}, r);".format(n_go, v), - "HP_TIMER_STOP(residual_evaluation);", - "DUMP_TIMER({}, residual_evaluation, {}, true);".format(get_option("instrumentation_level"), timestream)] - evaluation.extend(print_times) +def start_region_timer_instruction(region, **kwargs): + if get_option("use_likwid"): + code = "LIKWID_MARKER_START(\"{}\");".format(region) else: - evaluation = ["{}.residual({}, r);".format(n_go, v)] + code = "HP_TIMER_START({});".format(region) + from loopy import CInstruction + return CInstruction([], code, **kwargs) - evaluation = ["{} r({});".format(t_v, v), "r=0.0;"] + evaluation - return evaluation +def stop_region_timer_instruction(region, **kwargs): + if get_option("use_likwid"): + code = "LIKWID_MARKER_STOP(\"{}\");".format(region) + else: + code = "HP_TIMER_STOP({});".format(region) + from loopy import CInstruction + return CInstruction([], code, **kwargs) -@preamble(section="timings") -def apply_jacobian_timer(): - n_go = name_gridoperator(get_form_ident()) - v = name_vector(get_form_ident()) - t_v = type_vector(get_form_ident()) - setup_timer() +def timed_region(region, actions): + if isinstance(actions, str): + actions = [actions] + + assert(isinstance(actions, list)) if get_option('instrumentation_level') >= 2: - # Write back times - from dune.codegen.generation import post_include - post_include("HP_DECLARE_TIMER(apply_jacobian);", filetag="driver") - timestream = name_timing_stream() + assembly = [] print_times = [] - lop_name = name_localoperator(get_form_ident()) - if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) + init_region_timer(region) - if is_linear(): - declaration = ["{} j({});".format(t_v, v), "j=0.0;"] - evaluation = ["{}.jacobian_apply({}, j);".format(n_go, v)] - else: - declaration = ["{} j0({});".format(t_v, v), "j0=0.0;", - "{} j1({});".format(t_v, v), "j1=0.0;"] - evaluation = ["{}.nonlinear_jacobian_apply({}, j0, j1);".format(n_go, v)] + if get_option('instrumentation_level') >= 3 and not get_option('use_likwid'): + timestream = name_timing_stream() + lop_name = name_localoperator(get_form_ident()) + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) - if get_option('instrumentation_level') >= 2: - evaluation = ["HP_TIMER_START(apply_jacobian);"] + evaluation + ["HP_TIMER_STOP(apply_jacobian);", "DUMP_TIMER({}, apply_jacobian, {}, true);".format(get_option("instrumentation_level"), timestream)] - evaluation.extend(print_times) + assembly += start_region_timer(region) + assembly += actions + assembly += stop_region_timer(region) - return declaration + evaluation + return assembly + print_times + else: + return actions @preamble(section="timings") -def assemble_matrix_timer(): - t_go = type_gridoperator(get_form_ident()) +def evaluate_residual_timer(): n_go = name_gridoperator(get_form_ident()) v = name_vector(get_form_ident()) - t_v = type_vector(get_form_ident()) - setup_timer() + r = name_temporary_vector("r", get_form_ident()) - if get_option('instrumentation_level') >= 2: - # Write back times - from dune.codegen.generation import post_include - post_include("HP_DECLARE_TIMER(matrix_assembly);", filetag="driver") - timestream = name_timing_stream() - print_times = [] + action = "{}.residual({}, {});".format(n_go, v, r) - lop_name = name_localoperator(get_form_ident()) - if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) + return timed_region("residual_evaluation", action) - if get_option('instrumentation_level') >= 2: - assembly = ["HP_TIMER_START(matrix_assembly);", - "{}.jacobian({},m);".format(n_go, v), - "HP_TIMER_STOP(matrix_assembly);", - "DUMP_TIMER({}, matrix_assembly, {}, true);".format(get_option("instrumentation_level"), timestream)] - assembly.extend(print_times) + +@preamble(section="timings") +def apply_jacobian_timer(): + form = get_form_ident() + n_go = name_gridoperator(form) + v = name_vector(form) + + if is_linear(): + j = name_temporary_vector("j", form) + action = "{}.jacobian_apply({}, {});".format(n_go, v, j) else: - assembly = ["{}.jacobian({},m);".format(n_go, v)] + j0 = name_temporary_vector("j0", form) + j1 = name_temporary_vector("j1", form) + action = "{}.nonlinear_jacobian_apply({}, {}, {});".format(n_go, v, j0, j1) + + return timed_region("apply_jacobian", action) + + +@preamble(section="timings") +def assemble_matrix_timer(): + n_go = name_gridoperator(get_form_ident()) + v = name_vector(get_form_ident()) + m = name_jacobian(get_form_ident()) - assembly = ["using M = typename {}::Traits::Jacobian;".format(t_go), - "M m({});".format(n_go)] + assembly + action = "{}.jacobian({},{});".format(n_go, v, m) - return assembly + return timed_region("matrix_assembly", action) diff --git a/python/dune/codegen/pdelab/localoperator.py b/python/dune/codegen/pdelab/localoperator.py index 70ab9f4793a6163559d070e33e3600f9cd8c5578..eb7e1d3a50e7f8ba4b7b0f476c54565925c15d2a 100644 --- a/python/dune/codegen/pdelab/localoperator.py +++ b/python/dune/codegen/pdelab/localoperator.py @@ -17,6 +17,7 @@ from dune.codegen.generation import (accumulation_mixin, constructor_parameter, domain, dump_accumulate_timer, + register_liwkid_timer, end_of_file, function_mangler, generator_factory, @@ -673,6 +674,19 @@ class TimerMethod(ClassMember): ClassMember.__init__(self, content) +class RegisterLikwidMethod(ClassMember): + def __init__(self): + knl = name_example_kernel() + assert(knl is not None) + + content = ["void register_likwid_timers()" + "{"] + register_liwkid_timers = [i for i in retrieve_cache_items(condition='register_likwid_timers')] + content.extend(map(lambda x: ' ' + x, register_liwkid_timers)) + content += ["}"] + ClassMember.__init__(self, content) + + class LoopyKernelMethod(ClassMember): def __init__(self, signature, kernel, add_timings=True, initializer_list=[]): from loopy import generate_body @@ -693,26 +707,49 @@ class LoopyKernelMethod(ClassMember): from dune.codegen.pdelab.signatures import assembler_routine_name timer_name = assembler_routine_name() + '_kernel' name_example_kernel(name=timer_name) - post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') - content.append(' ' + 'HP_TIMER_START({});'.format(timer_name)) - dump_accumulate_timer(timer_name) - if add_timings and get_option("instrumentation_level") >= 4: - setuptimer = '{}_kernel_setup'.format(assembler_routine_name()) - post_include('HP_DECLARE_TIMER({});'.format(setuptimer), filetag='operatorfile') - content.append(' HP_TIMER_START({});'.format(setuptimer)) - dump_accumulate_timer(setuptimer) + if get_option('use_likwid'): + from dune.codegen.pdelab.driver.timings import init_likwid_timer + include_file("likwid.h", filetag="operatorfile") + init_likwid_timer(timer_name) + content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(timer_name)) + register_liwkid_timer(timer_name) + else: + post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') + content.append(' ' + 'HP_TIMER_START({});'.format(timer_name)) + dump_accumulate_timer(timer_name) + + if add_timings and get_option("instrumentation_level") >= 4: + setuptimer = '{}_kernel_setup'.format(assembler_routine_name()) + if get_option('use_likwid'): + from dune.codegen.pdelab.driver.timings import init_likwid_timer + init_likwid_timer(setuptimer) + content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(setuptimer)) + register_liwkid_timer(setuptimer) + else: + post_include('HP_DECLARE_TIMER({});'.format(setuptimer), filetag='operatorfile') + content.append(' HP_TIMER_START({});'.format(setuptimer)) + dump_accumulate_timer(setuptimer) # Add kernel preamble for i, p in kernel.preambles: content.append(' ' + p) + if add_timings and get_option('instrumentation_level') >= 4: + if get_option('use_likwid'): + content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(setuptimer)) + else: + content.append(' ' + 'HP_TIMER_STOP({});'.format(setuptimer)) + # Add kernel body content.extend(l for l in generate_body(kernel).split('\n')[1:-1]) # Stop timer if add_timings and get_option('instrumentation_level') >= 3: - content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) + if get_option('use_likwid'): + content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(timer_name)) + else: + content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) content.append('}') ClassMember.__init__(self, content, name=kernel.name if kernel is not None else "") @@ -1141,7 +1178,10 @@ def generate_localoperator_file(kernels, filename): if get_option('instrumentation_level') >= 3: include_file('dune/codegen/common/timer.hh', filetag='operatorfile') - operator_methods.append(TimerMethod()) + if get_option('use_likwid'): + operator_methods.append(RegisterLikwidMethod()) + else: + operator_methods.append(TimerMethod()) elif get_option('opcounter'): include_file('dune/codegen/common/timer.hh', filetag='operatorfile')