diff --git a/python/dune/codegen/generation/__init__.py b/python/dune/codegen/generation/__init__.py index bed0256407b7259bab61b6e932c4a17761097e75..97090e18852359b10d1a2d3f74a268a3abac60f1 100644 --- a/python/dune/codegen/generation/__init__.py +++ b/python/dune/codegen/generation/__init__.py @@ -24,6 +24,7 @@ from dune.codegen.generation.cpp import (base_class, preamble, post_include, template_parameter, + dump_ssc_marks ) from dune.codegen.generation.hooks import (hook, diff --git a/python/dune/codegen/generation/cpp.py b/python/dune/codegen/generation/cpp.py index b918291067f45c5f988bc8fdcea55651d538a9db..2ea4c346590ee80ef329fdc9394b9fbc3c59db9c 100644 --- a/python/dune/codegen/generation/cpp.py +++ b/python/dune/codegen/generation/cpp.py @@ -55,3 +55,10 @@ def dump_accumulate_timer(name): @generator_factory(item_tags=("register_likwid_timers",)) def register_liwkid_timer(name): return "LIKWID_MARKER_REGISTER(\"{}\");".format(name) + + +@generator_factory(item_tags=("register_ssc_marks",)) +def dump_ssc_marks(name): + from dune.codegen.pdelab.driver.timings import get_region_marks + return 'std::cout << "{}: " << {} << " <--> " << {} << std::endl;'.format(name, + *get_region_marks(name, driver=False)) diff --git a/python/dune/codegen/options.py b/python/dune/codegen/options.py index 4f743d6485038a570cba0a55f26538d7e6f53765..72697492907cbf4afdfe62d1a3789606a8e2c290 100644 --- a/python/dune/codegen/options.py +++ b/python/dune/codegen/options.py @@ -57,6 +57,7 @@ class CodegenGlobalOptionsArray(ImmutableRecord): operator_to_build = CodegenOption(default=None, helpstr="The operators from the list that is about to be build now. CMake sets this one!!!") debug_interpolate_input = CodegenOption(default=False, helpstr="Should the input for printresidual and printmatix be interpolated (instead of random input).") use_likwid = CodegenOption(default=False, helpstr="Use likwid instead of own performance measurements.") + use_sde = CodegenOption(default=False, helpstr="Use sde instead of own performance measurements.") autotune_google_benchmark = CodegenOption(default=False, helpstr="Use google-benchmark library for autotuning (when autotuning is activated).") with_mpi = CodegenOption(default=True, helpstr="The module was configured with mpi") diff --git a/python/dune/codegen/pdelab/driver/timings.py b/python/dune/codegen/pdelab/driver/timings.py index aeca64d46c73f2327b48c22f07dca7a85a044104..6bbbd07e4b7701fe516eff9509525165ac23a5eb 100644 --- a/python/dune/codegen/pdelab/driver/timings.py +++ b/python/dune/codegen/pdelab/driver/timings.py @@ -4,7 +4,7 @@ from dune.codegen.generation import (cached, include_file, pre_include, preamble, - ) + post_include) from dune.codegen.options import get_option from dune.codegen.pdelab.driver import (get_form_ident, is_linear, @@ -24,6 +24,9 @@ from dune.codegen.pdelab.driver.solve import (name_vector, ) +_sde_marks = {} + + @preamble(section="timings") def define_timing_identifier(name): ini = name_initree() @@ -125,6 +128,17 @@ def local_operator_likwid(): return "{}.register_likwid_timers();".format(lop_name) +@preamble(section="timings") +def local_operator_ssc_marks(): + lop_name = name_localoperator(get_form_ident()) + return "{}.dump_ssc_marks();".format(lop_name) + + +def ssc_macro(): + return '#define __SSC_MARK(x) do{ __asm__ __volatile__' \ + '("movl %0, %%ebx; .byte 100, 103, 144" : :"i"(x) : "%ebx"); } while(0)' + + @cached def setup_timer(): # TODO check that we are using YASP? @@ -138,6 +152,10 @@ def setup_timer(): logger.warning("timings: using instrumentation level >= 3 with likwid will slow down your code considerably") local_operator_likwid() finalize_likwid() + elif get_option("use_sde"): + post_include(ssc_macro(), filetag='driver') + if get_option('instrumentation_level') >= 3: + local_operator_ssc_marks() else: from dune.codegen.loopy.target import type_floatingpoint pre_include("#define HP_TIMER_OPCOUNTER {}".format(type_floatingpoint()), filetag="driver") @@ -156,14 +174,26 @@ def init_region_timer(region): setup_timer() if get_option("use_likwid"): init_likwid_timer(region) + elif get_option("use_sde"): + pass else: from dune.codegen.generation import post_include post_include("HP_DECLARE_TIMER({});".format(region), filetag="driver") +def get_region_marks(region, driver): + if driver: + return _sde_marks.setdefault(region, (2 * (len(_sde_marks) + 1) * 11, (2 * (len(_sde_marks) + 1) + 1) * 11)) + else: + return _sde_marks.setdefault(region, (2 * (len(_sde_marks) + 1) * 1, (2 * (len(_sde_marks) + 1) + 1) * 1)) + + def start_region_timer(region): if get_option("use_likwid"): return ["LIKWID_MARKER_START(\"{}\");".format(region)] + elif get_option("use_sde"): + marks = get_region_marks(region, driver=True) + return ["__SSC_MARK(0x{});".format(marks[0])] else: return ["HP_TIMER_START({});".format(region)] @@ -171,6 +201,10 @@ def start_region_timer(region): def stop_region_timer(region): if get_option("use_likwid"): return ["LIKWID_MARKER_STOP(\"{}\");".format(region)] + elif get_option("use_sde"): + marks = get_region_marks(region, driver=True) + return ["__SSC_MARK(0x{});".format(marks[1]), + "std::cout << \"Timed region {}: {} <--> {}\" << std::endl;".format(region, *marks)] else: timestream = name_timing_stream() return ["HP_TIMER_STOP({});".format(region), @@ -207,7 +241,7 @@ def timed_region(region, actions): init_region_timer(region) - if get_option('instrumentation_level') >= 3 and not get_option('use_likwid'): + if get_option('instrumentation_level') >= 3 and not (get_option('use_likwid') or get_option("use_sde")): timestream = name_timing_stream() lop_name = name_localoperator(get_form_ident()) print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) diff --git a/python/dune/codegen/pdelab/localoperator.py b/python/dune/codegen/pdelab/localoperator.py index 6db956e498c1de5d7e25cb47fb40771314119299..0ca6bfb45ed80271af92800980df7405200dca8b 100644 --- a/python/dune/codegen/pdelab/localoperator.py +++ b/python/dune/codegen/pdelab/localoperator.py @@ -32,6 +32,7 @@ from dune.codegen.generation import (accumulation_mixin, ReturnArg, run_hook, template_parameter, + dump_ssc_marks ) from dune.codegen.cgen.clazz import (AccessModifier, BaseClass, @@ -696,6 +697,19 @@ class RegisterLikwidMethod(ClassMember): ClassMember.__init__(self, content) +class RegisterSSCMarksMethod(ClassMember): + def __init__(self): + knl = name_example_kernel() + assert(knl is not None) + + content = ["void dump_ssc_marks()" + "{"] + register_liwkid_timers = [i for i in retrieve_cache_items(condition='register_ssc_marks')] + content.extend(map(lambda x: ' ' + x, register_liwkid_timers)) + content += ["}"] + ClassMember.__init__(self, content) + + class LoopyKernelMethod(ClassMember): def __init__(self, signature, kernel, add_timings=True, initializer_list=[]): from loopy import generate_body @@ -723,6 +737,12 @@ class LoopyKernelMethod(ClassMember): init_likwid_timer(timer_name) content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(timer_name)) register_liwkid_timer(timer_name) + elif get_option('use_sde'): + from dune.codegen.pdelab.driver.timings import get_region_marks, ssc_macro + post_include(ssc_macro(), filetag='operatorfile') + marks = get_region_marks(timer_name, driver=False) + content.append(' ' + '__SSC_MARK(0x{});'.format(marks[0])) + dump_ssc_marks(timer_name) else: post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') content.append(' ' + 'HP_TIMER_START({});'.format(timer_name)) @@ -735,6 +755,11 @@ class LoopyKernelMethod(ClassMember): init_likwid_timer(setuptimer) content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(setuptimer)) register_liwkid_timer(setuptimer) + elif get_option('use_sde'): + from dune.codegen.pdelab.driver.timings import get_region_marks + setup_marks = get_region_marks(setuptimer, driver=False) + content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[0])) + dump_ssc_marks(setuptimer) else: post_include('HP_DECLARE_TIMER({});'.format(setuptimer), filetag='operatorfile') content.append(' HP_TIMER_START({});'.format(setuptimer)) @@ -747,6 +772,8 @@ class LoopyKernelMethod(ClassMember): if add_timings and get_option('instrumentation_level') >= 4: if get_option('use_likwid'): content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(setuptimer)) + elif get_option('use_sde'): + content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[1])) else: content.append(' ' + 'HP_TIMER_STOP({});'.format(setuptimer)) @@ -757,6 +784,8 @@ class LoopyKernelMethod(ClassMember): if add_timings and get_option('instrumentation_level') >= 3: if get_option('use_likwid'): content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(timer_name)) + elif get_option('use_sde'): + content.append(' ' + '__SSC_MARK(0x{});'.format(marks[1])) else: content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) @@ -1219,6 +1248,8 @@ def generate_localoperator_file(kernels, filename): include_file('dune/codegen/common/timer.hh', filetag='operatorfile') if get_option('use_likwid'): operator_methods.append(RegisterLikwidMethod()) + elif get_option('use_sde'): + operator_methods.append(RegisterSSCMarksMethod()) else: operator_methods.append(TimerMethod()) elif get_option('opcounter'):