From 366c6c7b45c702a5e1ca3036a25ada013b275795 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de>
Date: Tue, 30 Aug 2016 17:31:01 +0200
Subject: [PATCH] Add kernel time measurements

---
 dune/perftool/common/timer.hh                |  36 +++++++
 python/dune/perftool/file.py                 |   2 +
 python/dune/perftool/generation/__init__.py  |   1 +
 python/dune/perftool/generation/cpp.py       |  16 +++
 python/dune/perftool/pdelab/driver.py        |  19 +++-
 python/dune/perftool/pdelab/localoperator.py | 100 ++++++++++++++++++-
 6 files changed, 167 insertions(+), 7 deletions(-)

diff --git a/dune/perftool/common/timer.hh b/dune/perftool/common/timer.hh
index 890a8b45..189440c0 100644
--- a/dune/perftool/common/timer.hh
+++ b/dune/perftool/common/timer.hh
@@ -68,4 +68,40 @@
 
 #endif // ENABLE_HP_TIMERS
 
+
+#ifdef ENABLE_COUNTER
+
+#define DUMP_TIMER(name,os,reset)                       \
+  os << "===== "  #name  " =====" << std::endl; \
+  os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \
+  HP_TIMER_OPCOUNTERS(name).reportOperations(os,reset);
+
+#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops)  \
+  os << "===== "  #name " =====" << std::endl; \
+  os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \
+  time += HP_TIMER_ELAPSED(name); \
+  ops += HP_TIMER_OPCOUNTERS(name); \
+  HP_TIMER_OPCOUNTERS(name).reportOperations(os,reset);
+
+#elif defined ENABLE_HP_TIMERS
+
+#define DUMP_TIMER(name,os,reset)                       \
+  os << "===== "  #name  " =====" << std::endl; \
+  os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \
+  if (reset) HP_TIMER_RESET(name);
+
+#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops)  \
+  os << "===== "  #name " =====" << std::endl; \
+  os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \
+  time += HP_TIMER_ELAPSED(name); \
+  if (reset) HP_TIMER_RESET(name);
+
+#else
+
+#define DUMP_TIMER(name,os,reset)
+#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops)
+
+#endif
+
+
 #endif // HP_TIMER_HH
diff --git a/python/dune/perftool/file.py b/python/dune/perftool/file.py
index 3fcd0335..9403ff8e 100644
--- a/python/dune/perftool/file.py
+++ b/python/dune/perftool/file.py
@@ -32,6 +32,7 @@ def generate_file(filename, tag, content, headerguard=True):
             for line in define:
                 f.write(line)
             f.write('\n')
+        f.write('\n')
 
         # Add the includes from the cache
         for inc in retrieve_cache_items('{} and include'.format(tag)):
@@ -40,6 +41,7 @@ def generate_file(filename, tag, content, headerguard=True):
             for line in inc.generate():
                 f.write(line)
             f.write('\n')
+        f.write('\n')
 
         # Add post include lines from the cache
         for define in retrieve_cache_items('{} and post_include'.format(tag)):
diff --git a/python/dune/perftool/generation/__init__.py b/python/dune/perftool/generation/__init__.py
index cd2779b3..6496c2ca 100644
--- a/python/dune/perftool/generation/__init__.py
+++ b/python/dune/perftool/generation/__init__.py
@@ -13,6 +13,7 @@ from dune.perftool.generation.cpp import (base_class,
                                           class_basename,
                                           class_member,
                                           constructor_parameter,
+                                          dump_accumulate_timer,
                                           include_file,
                                           initializer_list,
                                           pre_include,
diff --git a/python/dune/perftool/generation/cpp.py b/python/dune/perftool/generation/cpp.py
index 70735c65..04b53241 100644
--- a/python/dune/perftool/generation/cpp.py
+++ b/python/dune/perftool/generation/cpp.py
@@ -74,3 +74,19 @@ def class_basename(classtag=None):
     assert classtag
 
     return generator_factory(item_tags=("clazz", classtag, "basename"))
+
+
+def dump_accumulate_timer(name):
+    gen = generator_factory(item_tags=("dump_timers"), no_deco=True)
+
+    from dune.perftool.pdelab.localoperator import (name_time_dumper_os,
+                                                    name_time_dumper_reset,
+                                                    name_time_dumper_t,
+                                                    name_time_dumper_counter)
+    os = name_time_dumper_os()
+    reset = name_time_dumper_reset()
+    t = name_time_dumper_t()
+    counter = name_time_dumper_counter()
+
+    code = "DUMP_AND_ACCUMULATE_TIMER({},{},{},{},{});".format(name, os, reset, t, counter)
+    return gen(code)
diff --git a/python/dune/perftool/pdelab/driver.py b/python/dune/perftool/pdelab/driver.py
index 3bca95f2..57ae37c0 100644
--- a/python/dune/perftool/pdelab/driver.py
+++ b/python/dune/perftool/pdelab/driver.py
@@ -1109,21 +1109,32 @@ def dune_solve():
         snp = name_stationarynonlinearproblemsolver(go_type, go)
         solve = "{}.apply();".format(snp)
 
-    timer = get_option('timer')
-    if timer:
+    if get_option('timer'):
+        # Necessary includes and defines
         from dune.perftool.generation import pre_include
         from dune.perftool.generation import post_include
         pre_include("#define ENABLE_HP_TIMERS", filetag="driver")
         include_file("dune/perftool/common/timer.hh", filetag="driver")
         post_include("HP_DECLARE_TIMER(total);", filetag="driver")
+
+        # Print times after solving
+        from dune.perftool.generation import get_global_context_value
+        formdatas = get_global_context_value("formdatas")
+        print_times = []
+        for formdata in formdatas:
+            lop_name = name_localoperator(formdata)
+            print_times.append("{}.dump_timers(std::cout, true);".format(lop_name))
         solve = ["HP_TIMER_START(total);",
                  "{}".format(solve),
                  "HP_TIMER_STOP(total);",
                  "",
                  "// Print timer results",
                  "std::cout << std::endl;",
-                 "std::cout << \"===== total =====\" << std::endl;",
-                 "std::cout << \"timer elapsed: \" << HP_TIMER_ELAPSED(total) << std::endl;"]
+                 "std::cout << \"=== Time for calling solve method ===\" << std::endl;",
+                 "DUMP_TIMER({},{},{});".format('total', 'std::cout', 'true'),
+                 "std::cout << std::endl;"
+                 "std::cout << \"=== {} ===\" << std::endl;".format(lop_name)]
+        solve.extend(print_times)
 
     return solve
 
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index e7abf192..49574c5c 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -6,9 +6,12 @@ from dune.perftool.generation import (base_class,
                                       class_basename,
                                       class_member,
                                       constructor_parameter,
+                                      dump_accumulate_timer,
                                       global_context,
                                       include_file,
                                       initializer_list,
+                                      post_include,
+                                      retrieve_cache_items,
                                       symbol,
                                       template_parameter,
                                       )
@@ -151,6 +154,38 @@ def class_type_from_cache(classtag):
     return basename, basename + tparam_str
 
 
+def assembler_routine_name():
+    from dune.perftool.generation import get_global_context_value
+    integral_type = get_global_context_value("integral_type")
+    form_type = get_global_context_value("form_type")
+
+    if form_type == 'residual':
+        if integral_type == 'cell':
+            return "alpha_volume"
+        if integral_type == 'exterior_facet':
+            return "alpha_boundary"
+        if integral_type == 'interior_facet':
+            return "alpha_skeleton"
+
+    if form_type == 'jacobian':
+        if integral_type == 'cell':
+            return "jacobian_volume"
+        if integral_type == 'exterior_facet':
+            return "jacobian_boundary"
+        if integral_type == 'interior_facet':
+            return "jacobian_skeleton"
+
+    if form_type == 'jacobian_apply':
+        if integral_type == 'cell':
+            return "jacobian_apply_volume"
+        if integral_type == 'exterior_facet':
+            return "jacobian_apply_boundary"
+        if integral_type == 'interior_facet':
+            return "jacobian_apply_skeleton"
+
+    assert False
+
+
 def assembly_routine_signature():
     from dune.perftool.generation import get_global_context_value
     integral_type = get_global_context_value("integral_type")
@@ -276,16 +311,72 @@ def generate_kernel(integrals):
     return kernel
 
 
+def name_time_dumper_os():
+    return "os"
+
+
+def name_time_dumper_reset():
+    return "reset"
+
+
+def name_time_dumper_t():
+    return "t"
+
+
+def name_time_dumper_counter():
+    return "counter"
+
+
+class TimerMethod(ClassMember):
+    def __init__(self):
+        os = name_time_dumper_os()
+        reset = name_time_dumper_reset()
+        t = name_time_dumper_t()
+        content = ["template <typename Stream>",
+                   "void dump_timers(Stream& {}, bool {})".format(os, reset),
+                   "{",
+                   "  double {} = 0.0;".format(t),
+                   "#ifdef ENABLE_COUNTERS",
+                   "  auto counter = HP_TIMER_OPCOUNTERS({})",
+                   "  counter.reset();",
+                   "#endif",
+                   ""]
+        dump_timers = [i for i in retrieve_cache_items(condition='dump_timers')]
+        content.extend(map(lambda x: '  ' + x, dump_timers))
+        content.extend(["    {} <<\"===== all_kernels =====\" << std::endl".format(os),
+                        "    <<\"elapsed: \" << {} << std::endl;".format(t),
+                        "#ifdef ENABLE_COUNTERS",
+                        "  counter.reportOperations({});".format(os),
+                        "#endif"])
+        content.append("}")
+        ClassMember.__init__(self, content)
+
+
 class AssemblyMethod(ClassMember):
-    def __init__(self, signature, kernel):
+    def __init__(self, signature, kernel, filename):
         from loopy import generate_body
         from cgen import LiteralLines, Block
         content = signature
         content.append('{')
         if kernel is not None:
+            # Add kernel preamble
             for i, p in kernel.preambles:
                 content.append('  ' + p)
+
+            # Start timer
+            if get_option('timer'):
+                timer_name = assembler_routine_name() + '_kernel'
+                post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
+                content.append('  ' + 'HP_TIMER_START({});'.format(timer_name))
+                dump_accumulate_timer(timer_name)
+
+            # Add kernel body
             content.extend(l for l in generate_body(kernel).split('\n')[1:-1])
+
+            # Stop timer
+            if get_option('timer'):
+                content.append('  ' + 'HP_TIMER_STOP({});'.format(timer_name))
+
         content.append('}')
         ClassMember.__init__(self, content)
 
@@ -447,11 +538,14 @@ def generate_localoperator_file(kernels, filename):
         it, ft = method
         with global_context(integral_type=it, form_type=ft):
             signature = assembly_routine_signature()
-            operator_methods.append(AssemblyMethod(signature, kernel))
+            operator_methods.append(AssemblyMethod(signature, kernel, filename))
+
+    if get_option('timer'):
+        include_file('dune/perftool/common/timer.hh', filetag='operatorfile')
+        operator_methods.append(TimerMethod())
 
     # Write the file!
     from dune.perftool.file import generate_file
-
     param = cgen_class_from_cache("parameterclass")
     # TODO take the name of this thing from the UFL file
     lop = cgen_class_from_cache("operator", members=operator_methods)
-- 
GitLab