From 366c6c7b45c702a5e1ca3036a25ada013b275795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20He=C3=9F?= <rene.hess@iwr.uni-heidelberg.de> Date: Tue, 30 Aug 2016 17:31:01 +0200 Subject: [PATCH] Add kernel time measurements --- dune/perftool/common/timer.hh | 36 +++++++ python/dune/perftool/file.py | 2 + python/dune/perftool/generation/__init__.py | 1 + python/dune/perftool/generation/cpp.py | 16 +++ python/dune/perftool/pdelab/driver.py | 19 +++- python/dune/perftool/pdelab/localoperator.py | 100 ++++++++++++++++++- 6 files changed, 167 insertions(+), 7 deletions(-) diff --git a/dune/perftool/common/timer.hh b/dune/perftool/common/timer.hh index 890a8b45..189440c0 100644 --- a/dune/perftool/common/timer.hh +++ b/dune/perftool/common/timer.hh @@ -68,4 +68,40 @@ #endif // ENABLE_HP_TIMERS + +#ifdef ENABLE_COUNTER + +#define DUMP_TIMER(name,os,reset) \ + os << "===== " #name " =====" << std::endl; \ + os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \ + HP_TIMER_OPCOUNTERS(name).reportOperations(os,reset); + +#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \ + os << "===== " #name " =====" << std::endl; \ + os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \ + time += HP_TIMER_ELAPSED(name); \ + ops += HP_TIMER_OPCOUNTERS(name); \ + HP_TIMER_OPCOUNTERS(name).reportOperations(os,reset); + +#elif defined ENABLE_HP_TIMERS + +#define DUMP_TIMER(name,os,reset) \ + os << "===== " #name " =====" << std::endl; \ + os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \ + if (reset) HP_TIMER_RESET(name); + +#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \ + os << "===== " #name " =====" << std::endl; \ + os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \ + time += HP_TIMER_ELAPSED(name); \ + if (reset) HP_TIMER_RESET(name); + +#else + +#define DUMP_TIMER(name,os,reset) +#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) + +#endif + + #endif // HP_TIMER_HH diff --git a/python/dune/perftool/file.py b/python/dune/perftool/file.py index 3fcd0335..9403ff8e 100644 --- a/python/dune/perftool/file.py +++ b/python/dune/perftool/file.py @@ -32,6 +32,7 @@ def generate_file(filename, tag, content, headerguard=True): for line in define: f.write(line) f.write('\n') + f.write('\n') # Add the includes from the cache for inc in retrieve_cache_items('{} and include'.format(tag)): @@ -40,6 +41,7 @@ def generate_file(filename, tag, content, headerguard=True): for line in inc.generate(): f.write(line) f.write('\n') + f.write('\n') # Add post include lines from the cache for define in retrieve_cache_items('{} and post_include'.format(tag)): diff --git a/python/dune/perftool/generation/__init__.py b/python/dune/perftool/generation/__init__.py index cd2779b3..6496c2ca 100644 --- a/python/dune/perftool/generation/__init__.py +++ b/python/dune/perftool/generation/__init__.py @@ -13,6 +13,7 @@ from dune.perftool.generation.cpp import (base_class, class_basename, class_member, constructor_parameter, + dump_accumulate_timer, include_file, initializer_list, pre_include, diff --git a/python/dune/perftool/generation/cpp.py b/python/dune/perftool/generation/cpp.py index 70735c65..04b53241 100644 --- a/python/dune/perftool/generation/cpp.py +++ b/python/dune/perftool/generation/cpp.py @@ -74,3 +74,19 @@ def class_basename(classtag=None): assert classtag return generator_factory(item_tags=("clazz", classtag, "basename")) + + +def dump_accumulate_timer(name): + gen = generator_factory(item_tags=("dump_timers"), no_deco=True) + + from dune.perftool.pdelab.localoperator import (name_time_dumper_os, + name_time_dumper_reset, + name_time_dumper_t, + name_time_dumper_counter) + os = name_time_dumper_os() + reset = name_time_dumper_reset() + t = name_time_dumper_t() + counter = name_time_dumper_counter() + + code = "DUMP_AND_ACCUMULATE_TIMER({},{},{},{},{});".format(name, os, reset, t, counter) + return gen(code) diff --git a/python/dune/perftool/pdelab/driver.py b/python/dune/perftool/pdelab/driver.py index 3bca95f2..57ae37c0 100644 --- a/python/dune/perftool/pdelab/driver.py +++ b/python/dune/perftool/pdelab/driver.py @@ -1109,21 +1109,32 @@ def dune_solve(): snp = name_stationarynonlinearproblemsolver(go_type, go) solve = "{}.apply();".format(snp) - timer = get_option('timer') - if timer: + if get_option('timer'): + # Necessary includes and defines from dune.perftool.generation import pre_include from dune.perftool.generation import post_include pre_include("#define ENABLE_HP_TIMERS", filetag="driver") include_file("dune/perftool/common/timer.hh", filetag="driver") post_include("HP_DECLARE_TIMER(total);", filetag="driver") + + # Print times after solving + from dune.perftool.generation import get_global_context_value + formdatas = get_global_context_value("formdatas") + print_times = [] + for formdata in formdatas: + lop_name = name_localoperator(formdata) + print_times.append("{}.dump_timers(std::cout, true);".format(lop_name)) solve = ["HP_TIMER_START(total);", "{}".format(solve), "HP_TIMER_STOP(total);", "", "// Print timer results", "std::cout << std::endl;", - "std::cout << \"===== total =====\" << std::endl;", - "std::cout << \"timer elapsed: \" << HP_TIMER_ELAPSED(total) << std::endl;"] + "std::cout << \"=== Time for calling solve method ===\" << std::endl;", + "DUMP_TIMER({},{},{});".format('total', 'std::cout', 'true'), + "std::cout << std::endl;" + "std::cout << \"=== {} ===\" << std::endl;".format(lop_name)] + solve.extend(print_times) return solve diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index e7abf192..49574c5c 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -6,9 +6,12 @@ from dune.perftool.generation import (base_class, class_basename, class_member, constructor_parameter, + dump_accumulate_timer, global_context, include_file, initializer_list, + post_include, + retrieve_cache_items, symbol, template_parameter, ) @@ -151,6 +154,38 @@ def class_type_from_cache(classtag): return basename, basename + tparam_str +def assembler_routine_name(): + from dune.perftool.generation import get_global_context_value + integral_type = get_global_context_value("integral_type") + form_type = get_global_context_value("form_type") + + if form_type == 'residual': + if integral_type == 'cell': + return "alpha_volume" + if integral_type == 'exterior_facet': + return "alpha_boundary" + if integral_type == 'interior_facet': + return "alpha_skeleton" + + if form_type == 'jacobian': + if integral_type == 'cell': + return "jacobian_volume" + if integral_type == 'exterior_facet': + return "jacobian_boundary" + if integral_type == 'interior_facet': + return "jacobian_skeleton" + + if form_type == 'jacobian_apply': + if integral_type == 'cell': + return "jacobian_apply_volume" + if integral_type == 'exterior_facet': + return "jacobian_apply_boundary" + if integral_type == 'interior_facet': + return "jacobian_apply_skeleton" + + assert False + + def assembly_routine_signature(): from dune.perftool.generation import get_global_context_value integral_type = get_global_context_value("integral_type") @@ -276,16 +311,72 @@ def generate_kernel(integrals): return kernel +def name_time_dumper_os(): + return "os" + + +def name_time_dumper_reset(): + return "reset" + + +def name_time_dumper_t(): + return "t" + + +def name_time_dumper_counter(): + return "counter" + + +class TimerMethod(ClassMember): + def __init__(self): + os = name_time_dumper_os() + reset = name_time_dumper_reset() + t = name_time_dumper_t() + content = ["template <typename Stream>", + "void dump_timers(Stream& {}, bool {})".format(os, reset), + "{", + " double {} = 0.0;".format(t), + "#ifdef ENABLE_COUNTERS", + " auto counter = HP_TIMER_OPCOUNTERS({})", + " counter.reset();", + "#endif", + ""] + dump_timers = [i for i in retrieve_cache_items(condition='dump_timers')] + content.extend(map(lambda x: ' ' + x, dump_timers)) + content.extend([" {} <<\"===== all_kernels =====\" << std::endl".format(os), + " <<\"elapsed: \" << {} << std::endl;".format(t), + "#ifdef ENABLE_COUNTERS", + " counter.reportOperations({});".format(os), + "#endif"]) + content.append("}") + ClassMember.__init__(self, content) + + class AssemblyMethod(ClassMember): - def __init__(self, signature, kernel): + def __init__(self, signature, kernel, filename): from loopy import generate_body from cgen import LiteralLines, Block content = signature content.append('{') if kernel is not None: + # Add kernel preamble for i, p in kernel.preambles: content.append(' ' + p) + + # Start timer + if get_option('timer'): + timer_name = assembler_routine_name() + '_kernel' + post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile') + content.append(' ' + 'HP_TIMER_START({});'.format(timer_name)) + dump_accumulate_timer(timer_name) + + # Add kernel body content.extend(l for l in generate_body(kernel).split('\n')[1:-1]) + + # Stop timer + if get_option('timer'): + content.append(' ' + 'HP_TIMER_STOP({});'.format(timer_name)) + content.append('}') ClassMember.__init__(self, content) @@ -447,11 +538,14 @@ def generate_localoperator_file(kernels, filename): it, ft = method with global_context(integral_type=it, form_type=ft): signature = assembly_routine_signature() - operator_methods.append(AssemblyMethod(signature, kernel)) + operator_methods.append(AssemblyMethod(signature, kernel, filename)) + + if get_option('timer'): + include_file('dune/perftool/common/timer.hh', filetag='operatorfile') + operator_methods.append(TimerMethod()) # Write the file! from dune.perftool.file import generate_file - param = cgen_class_from_cache("parameterclass") # TODO take the name of this thing from the UFL file lop = cgen_class_from_cache("operator", members=operator_methods) -- GitLab