diff --git a/python/dune/codegen/generation/__init__.py b/python/dune/codegen/generation/__init__.py index bed0256407b7259bab61b6e932c4a17761097e75..97090e18852359b10d1a2d3f74a268a3abac60f1 100644 --- a/python/dune/codegen/generation/__init__.py +++ b/python/dune/codegen/generation/__init__.py @@ -24,6 +24,7 @@ from dune.codegen.generation.cpp import (base_class, preamble, post_include, template_parameter, + dump_ssc_marks ) from dune.codegen.generation.hooks import (hook, diff --git a/python/dune/codegen/generation/cpp.py b/python/dune/codegen/generation/cpp.py index b918291067f45c5f988bc8fdcea55651d538a9db..57dbab554850aa9a745b03a1ad018c9f47973964 100644 --- a/python/dune/codegen/generation/cpp.py +++ b/python/dune/codegen/generation/cpp.py @@ -55,3 +55,10 @@ def dump_accumulate_timer(name): @generator_factory(item_tags=("register_likwid_timers",)) def register_liwkid_timer(name): return "LIKWID_MARKER_REGISTER(\"{}\");".format(name) + + +@generator_factory(item_tags=("register_ssc_marks",)) +def dump_ssc_marks(name): + from dune.codegen.pdelab.driver.timings import get_region_marks + return 'std::cout << "{}: " << {} << " <--> " << {} << std::endl;'.format(name, + *get_region_marks(name, driver=False)) diff --git a/python/dune/codegen/pdelab/driver/timings.py b/python/dune/codegen/pdelab/driver/timings.py index fc8cf3b710700959575c4ba6fe937f544e047d0c..32ffba460f1e1478562688894ab5616b6070060b 100644 --- a/python/dune/codegen/pdelab/driver/timings.py +++ b/python/dune/codegen/pdelab/driver/timings.py @@ -128,6 +128,12 @@ def local_operator_likwid(): return "{}.register_likwid_timers();".format(lop_name) +@preamble(section="timings") +def local_operator_ssc_marks(): + lop_name = name_localoperator(get_form_ident()) + return "{}.dump_ssc_marks();".format(lop_name) + + @cached def setup_timer(): # TODO check that we are using YASP? @@ -145,6 +151,8 @@ def setup_timer(): post_include('#define __SSC_MARK(x) do{ __asm__ volatile' + '("movl $" #x ", %%ebx\\n\\t.byte 100\\n\\t.byte 103\\n\\t.byte 144" : : : "%ebx"); } while(0)', filetag='driver') + if get_option('instrumentation_level') >= 3: + local_operator_ssc_marks() else: from dune.codegen.loopy.target import type_floatingpoint pre_include("#define HP_TIMER_OPCOUNTER {}".format(type_floatingpoint()), filetag="driver") @@ -170,15 +178,18 @@ def init_region_timer(region): post_include("HP_DECLARE_TIMER({});".format(region), filetag="driver") -def get_region_marks(region): - return _sde_marks.setdefault(region, (2 * (len(_sde_marks) + 1), 2 * (len(_sde_marks) + 1) + 1)) +def get_region_marks(region, driver): + if driver: + return _sde_marks.setdefault(region, (2 * (len(_sde_marks) + 1) * 11, (2 * (len(_sde_marks) + 1) + 1) * 11)) + else: + return _sde_marks.setdefault(region, (2 * (len(_sde_marks) + 1) * 1, (2 * (len(_sde_marks) + 1) + 1) * 1)) def start_region_timer(region): if get_option("use_likwid"): return ["LIKWID_MARKER_START(\"{}\");".format(region)] elif get_option("use_sde"): - marks = get_region_marks(region) + marks = get_region_marks(region, driver=True) return ["__SSC_MARK(0x{});".format(marks[0])] else: return ["HP_TIMER_START({});".format(region)] @@ -188,7 +199,7 @@ def stop_region_timer(region): if get_option("use_likwid"): return ["LIKWID_MARKER_STOP(\"{}\");".format(region)] elif get_option("use_sde"): - marks = get_region_marks(region) + marks = get_region_marks(region, driver=True) return ["__SSC_MARK(0x{});".format(marks[1]), "std::cout << \"Timed region {}: {} <--> {}\" << std::endl;".format(region, *marks)] else: diff --git a/python/dune/codegen/pdelab/localoperator.py b/python/dune/codegen/pdelab/localoperator.py index eb7e1d3a50e7f8ba4b7b0f476c54565925c15d2a..a75c6e784250a9dd0d457a74176b85415d807525 100644 --- a/python/dune/codegen/pdelab/localoperator.py +++ b/python/dune/codegen/pdelab/localoperator.py @@ -32,6 +32,7 @@ from dune.codegen.generation import (accumulation_mixin, ReturnArg, run_hook, template_parameter, + dump_ssc_marks ) from dune.codegen.cgen.clazz import (AccessModifier, BaseClass, @@ -687,6 +688,19 @@ class RegisterLikwidMethod(ClassMember): ClassMember.__init__(self, content) +class RegisterSSCMarksMethod(ClassMember): + def __init__(self): + knl = name_example_kernel() + assert(knl is not None) + + content = ["void dump_ssc_marks()" + "{"] + register_liwkid_timers = [i for i in retrieve_cache_items(condition='register_ssc_marks')] + content.extend(map(lambda x: ' ' + x, register_liwkid_timers)) + content += ["}"] + ClassMember.__init__(self, content) + + class LoopyKernelMethod(ClassMember): def __init__(self, signature, kernel, add_timings=True, initializer_list=[]): from loopy import generate_body @@ -714,6 +728,14 @@ class LoopyKernelMethod(ClassMember): init_likwid_timer(timer_name) content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(timer_name)) register_liwkid_timer(timer_name) + elif get_option('use_sde'): + post_include('#define __SSC_MARK(x) do{ __asm__ volatile' + + '("movl $" #x ", %%ebx\\n\\t.byte 100\\n\\t.byte 103\\n\\t.byte 144" : : : "%ebx"); } while(0)', + filetag='operatorfile') + from dune.codegen.pdelab.driver.timings import get_region_marks + marks = get_region_marks(timer_name, driver=False) + content.append(' ' + '__SSC_MARK(0x{});'.format(marks[0])) + dump_ssc_marks(timer_name) else: post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') content.append(' ' + 'HP_TIMER_START({});'.format(timer_name)) @@ -726,6 +748,11 @@ class LoopyKernelMethod(ClassMember): init_likwid_timer(setuptimer) content.append(' ' + 'LIKWID_MARKER_START(\"{}\");'.format(setuptimer)) register_liwkid_timer(setuptimer) + elif get_option('use_sde'): + from dune.codegen.pdelab.driver.timings import get_region_marks + setup_marks = get_region_marks(setuptimer, driver=False) + content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[0])) + dump_ssc_marks(setuptimer) else: post_include('HP_DECLARE_TIMER({});'.format(setuptimer), filetag='operatorfile') content.append(' HP_TIMER_START({});'.format(setuptimer)) @@ -738,6 +765,8 @@ class LoopyKernelMethod(ClassMember): if add_timings and get_option('instrumentation_level') >= 4: if get_option('use_likwid'): content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(setuptimer)) + elif get_option('use_sde'): + content.append(' ' + '__SSC_MARK(0x{});'.format(setup_marks[1])) else: content.append(' ' + 'HP_TIMER_STOP({});'.format(setuptimer)) @@ -748,6 +777,8 @@ class LoopyKernelMethod(ClassMember): if add_timings and get_option('instrumentation_level') >= 3: if get_option('use_likwid'): content.append(' ' + 'LIKWID_MARKER_STOP(\"{}\");'.format(timer_name)) + elif get_option('use_sde'): + content.append(' ' + '__SSC_MARK(0x{});'.format(marks[1])) else: content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) @@ -1180,6 +1211,8 @@ def generate_localoperator_file(kernels, filename): include_file('dune/codegen/common/timer.hh', filetag='operatorfile') if get_option('use_likwid'): operator_methods.append(RegisterLikwidMethod()) + elif get_option('use_sde'): + operator_methods.append(RegisterSSCMarksMethod()) else: operator_methods.append(TimerMethod()) elif get_option('opcounter'):