From 1f867f4fb702793ff8b13a6ef5ddfe492e2c3fb6 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Wed, 4 Oct 2017 13:50:02 +0200
Subject: [PATCH] Add a timer for the setup phase of each localoperator kernel

---
 python/dune/perftool/pdelab/localoperator.py | 13 +++++++++----
 python/dune/perftool/sumfact/realization.py  |  6 ++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index a8b69b73..5797c08c 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -614,10 +614,6 @@ class LoopyKernelMethod(ClassMember):
 
         content.append('{')
         if kernel is not None:
-            # Add kernel preamble
-            for i, p in kernel.preambles:
-                content.append('  ' + p)
-
             # Start timer
             if add_timings and get_option('instrumentation_level') >= 3:
                 from dune.perftool.pdelab.signatures import assembler_routine_name
@@ -627,6 +623,15 @@ class LoopyKernelMethod(ClassMember):
                 content.append('  ' + 'HP_TIMER_START({});'.format(timer_name))
                 dump_accumulate_timer(timer_name)
 
+            if add_timings and get_option("instrumentation_level") >= 4:
+                setuptimer = '{}_kernel_setup'.format(assembler_routine_name())
+                post_include('HP_DECLARE_TIMER({});'.format(setuptimer), filetag='operatorfile')
+                content.append('  HP_TIMER_START({});'.format(setuptimer))
+
+            # Add kernel preamble
+            for i, p in kernel.preambles:
+                content.append('  ' + p)
+
             # Add kernel body
             content.extend(l for l in generate_body(kernel).split('\n')[1:-1])
 
diff --git a/python/dune/perftool/sumfact/realization.py b/python/dune/perftool/sumfact/realization.py
index 5fe9ebb1..13acc6ac 100644
--- a/python/dune/perftool/sumfact/realization.py
+++ b/python/dune/perftool/sumfact/realization.py
@@ -59,6 +59,12 @@ def _realize_sum_factorization_kernel(sf):
 
     # Measure times and count operations in c++ code
     if get_option("instrumentation_level") >= 4:
+        if sf.stage == 1:
+            setuptimer = '{}_kernel_setup'.format(assembler_routine_name())
+            insn_dep = insn_dep.union(frozenset({instruction(code='HP_TIMER_STOP({});'.format(setuptimer),
+                                                             within_inames=frozenset(sf.within_inames),
+                                                             depends_on=insn_dep)}))
+
         timer_name = assembler_routine_name() + '_kernel' + '_stage{}'.format(sf.stage)
         post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
         dump_accumulate_timer(timer_name)
-- 
GitLab