diff --git a/applications/poisson_dg/CMakeLists.txt b/applications/poisson_dg/CMakeLists.txt index 9bb13626e9b948e987c98188a1c9e61c1e0385e0..a31cf6202f47491d864bb3f4a0126858747331a0 100644 --- a/applications/poisson_dg/CMakeLists.txt +++ b/applications/poisson_dg/CMakeLists.txt @@ -3,3 +3,5 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl INIFILE poisson_dg.mini NO_TESTS ) + +dune_symlink_to_source_files(FILES donkey.sbatch) diff --git a/applications/poisson_dg/donkey.sbatch b/applications/poisson_dg/donkey.sbatch new file mode 100755 index 0000000000000000000000000000000000000000..50a3570163421c471b282ef5a36cfde07ccf0f65 --- /dev/null +++ b/applications/poisson_dg/donkey.sbatch @@ -0,0 +1,46 @@ +#!/bin/bash + +# Load modules +ml gcc/6.2 +ml intelmpi +ml openblas +ml metis +ml suitesparse + +# Set a name for the job +#SBATCH -J poisson_dg + +# Number of processes +#SBATCH -n 16 + +# Choose the SLURM partition (sinfo for overview) +#SBATCH -p haswell16c + +# Each process needs two PUs: circumvent hyperthreading +#SBATCH -c 2 + +# Pin processes to cores +# (Possible values: socket, core) +SRUNOPT="--cpu_bind=verbose,core" + +# Run the opcount executables +srun $SRUNOPT ./app_poisson_dg_deg2_opcount app_poisson_dg_3d_deg2_opcount.ini +srun $SRUNOPT ./app_poisson_dg_deg3_opcount app_poisson_dg_3d_deg3_opcount.ini +srun $SRUNOPT ./app_poisson_dg_deg4_opcount app_poisson_dg_3d_deg4_opcount.ini +srun $SRUNOPT ./app_poisson_dg_deg5_opcount app_poisson_dg_3d_deg5_opcount.ini +srun $SRUNOPT ./app_poisson_dg_deg6_opcount app_poisson_dg_3d_deg6_opcount.ini +srun $SRUNOPT ./app_poisson_dg_deg7_opcount app_poisson_dg_3d_deg7_opcount.ini +srun $SRUNOPT ./app_poisson_dg_deg8_opcount app_poisson_dg_3d_deg8_opcount.ini + +# Run the timing executables +COUNT=0 +while [ $COUNT -lt 2 ]; do + srun $SRUNOPT ./app_poisson_dg_deg2_nonopcount app_poisson_dg_3d_deg2_nonopcount.ini + srun $SRUNOPT ./app_poisson_dg_deg3_nonopcount app_poisson_dg_3d_deg3_nonopcount.ini + srun $SRUNOPT ./app_poisson_dg_deg4_nonopcount app_poisson_dg_3d_deg4_nonopcount.ini + srun $SRUNOPT ./app_poisson_dg_deg5_nonopcount app_poisson_dg_3d_deg5_nonopcount.ini + srun $SRUNOPT ./app_poisson_dg_deg6_nonopcount app_poisson_dg_3d_deg6_nonopcount.ini + srun $SRUNOPT ./app_poisson_dg_deg7_nonopcount app_poisson_dg_3d_deg7_nonopcount.ini + srun $SRUNOPT ./app_poisson_dg_deg8_nonopcount app_poisson_dg_3d_deg8_nonopcount.ini + COUNT=$((COUNT + 1)) +done diff --git a/applications/poisson_dg/poisson_dg.mini b/applications/poisson_dg/poisson_dg.mini index e7a0eedf624a78760143f777449033c3fcafefb2..26c9b94ea8ae55f39ca50ca18df2716fc79409a6 100644 --- a/applications/poisson_dg/poisson_dg.mini +++ b/applications/poisson_dg/poisson_dg.mini @@ -1,15 +1,44 @@ -__name = app_poisson_dg_{__exec_suffix} -__exec_suffix = deg{formcompiler.ufl_variants.degree} +__name = app_poisson_dg_{dim}d_{__exec_suffix} +__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix} -extension = 1.0 1.0 1.0 -cells = 16 16 16 +opcount_suffix = opcount, nonopcount | expand opcount + +# Calculate the size of the grid to equlibritate it to 100 MB/rank +# Input parameters +dim = 3 +mbperrank = 10 +ranks = 16 +floatingbytes = 8 + +# Metaini Calculations +memperrank = {mbperrank} * 1048576 | eval +dofsperdir = {formcompiler.ufl_variants.degree} + 1 | eval +celldofs = {dofsperdir} ** {dim} | eval +cellsperrank = {memperrank} / ({floatingbytes} * {celldofs}) | eval +cellsperdir = {cellsperrank} ** (1/{dim}) | eval | toint +firstdircells = {ranks} * {cellsperdir} | eval +dimminusone = {dim} - 1 | eval +ones = 1 | repeat {dimminusone} +otherdircells = {cellsperdir} | repeat {dimminusone} + +# Setup the grid! +extension = 1.0 | repeat {dim} +cells = {firstdircells} {otherdircells} +partitioning = {ranks} {ones} [wrapper.vtkcompare] name = {__name} extension = vtu [formcompiler] +fastdg = 1 sumfact = 1 +vectorize_quad = 1 +vectorize_grads = 1 +instrumentation_level = 4 +opcounter = 1, 0 | expand opcount +time_opcounter = 0, 1 | expand opcount [formcompiler.ufl_variants] -degree = 1, 2 | expand +cell = hexahedron +degree = 2, 3, 4, 5, 6, 7, 8 | expand diff --git a/applications/poisson_dg/poisson_dg.ufl b/applications/poisson_dg/poisson_dg.ufl index 6a7e50f7e9f28f333304e713214f77516d00fe45..536360533793fe795d9b5bd303e023f9ecb31c19 100644 --- a/applications/poisson_dg/poisson_dg.ufl +++ b/applications/poisson_dg/poisson_dg.ufl @@ -1,5 +1,3 @@ -cell = hexahedron - x = SpatialCoordinate(cell) f = -6. g = x[0]*x[0] + x[1]*x[1] + x[2]*x[2] diff --git a/bin/CMakeLists.txt b/bin/CMakeLists.txt index c1aaf2f56753ff1efd504ba81ad5a5090fefeddf..e8ea4bd424beb1d30c28ca716113021b2713aa6b 100644 --- a/bin/CMakeLists.txt +++ b/bin/CMakeLists.txt @@ -1,3 +1,14 @@ +dune_install_python_script(SCRIPT process_measurements.py + REQUIRES pandas + ) + +dune_install_python_script(SCRIPT plot_measurements.py + REQUIRES pandas matplotlib + ) + +dune_install_python_script(SCRIPT performance_regression.py + REQUIRES pandas + ) + dune_symlink_to_source_files(FILES make_graph.sh - performance_regression.py ) diff --git a/bin/calculate_floprate.py b/bin/calculate_floprate.py deleted file mode 100644 index 49b3136128617d98eb42af7c2487f35384709d4f..0000000000000000000000000000000000000000 --- a/bin/calculate_floprate.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python - -import pandas - - -def parse_data(): - frame = pandas.read_csv('timings.csv', header=None, names=('exec', 'kernel', 'what', 'value'), delimiter=' ') - time = frame[frame.what == "time"] - ops = frame[frame.what != "time"] - - # Here we should take 'min', but right now there are some zeroes written - timedata = time.groupby(('exec', 'kernel'))['value'].max() - opsdata = ops.groupby(('exec', 'kernel'))['value'].max() - - return timedata, opsdata - - -def calculate_floprate(): - time, ops = parse_data() - - for key in time.keys(): - exe, kernel = key - print("exe={}".format(exe)) - print("kernel={}".format(kernel)) - print("FLOPS={}\n".format(ops[exe][kernel] / time[exe][kernel])) - - -if __name__ == '__main__': - calculate_floprate() diff --git a/bin/plot_measurements.py b/bin/plot_measurements.py new file mode 100755 index 0000000000000000000000000000000000000000..4d7a2654a55b6e38b11679e0c93365e936026017 --- /dev/null +++ b/bin/plot_measurements.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +import pandas +import matplotlib.pyplot as plt +import sys + +title = sys.argv[1] +filename = title.lower().replace(" ", "_") + ".pdf" + +flopframe = pandas.read_csv("./poissondg-insn2/floprates.csv", header=None, delimiter=" ", names=("exec", "degree", "what", "GFlops")) +flopframe = flopframe[flopframe.what == "residual_evaluation"] + +timeframe = pandas.read_csv("./poissondg-insn2/doftimes.csv", header=None, delimiter=" ", names=("exec", "degree", "what", "DOFs")) +timeframe = timeframe[timeframe.what == "residual_evaluation"] + +fig, ax1 = plt.subplots() + +ax2 = ax1.twinx() +ax1.plot(frame['degree'], flopframe['GFlops']) +ax2.plot(frame['degree'], timeframe['DOFs']) + +ax1.set_xlabel("Polynomial degree") +ax1.set_ylabel("GFlops") +ax2.set_ylabel("DOFs / s") +plt.title(title) + +plt.savefig(filename) diff --git a/bin/process_measurements.py b/bin/process_measurements.py new file mode 100755 index 0000000000000000000000000000000000000000..945687f99f58a673693b763544d5b69eaae2e059 --- /dev/null +++ b/bin/process_measurements.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +import os +import pandas +import re + + +def join_csv_files(): + with open('timings.csv', 'w') as out: + for f in os.listdir(os.getcwd()): + match = re.match(".*rank-([0-9]*).csv", f) + if match: + for line in open(f, 'r'): + out.write("{} {}".format(match.group(1), line)) + + +def calculate_floprate(): + frame = pandas.read_csv('timings.csv', header=None, names=('rank', 'exec', 'kernel', 'what', 'value'), delimiter=' ') + time = frame[frame.what == "time"] + ops = frame[frame.what != "time"] + + time = time.groupby(('rank', 'exec', 'kernel'))['value'].min().to_frame().reset_index().groupby(('exec', 'kernel'))['value'].max() + ops = ops.groupby(('rank', 'exec', 'kernel'))['value'].max().to_frame().reset_index().groupby(('exec', 'kernel'))['value'].max() + + with open('floprates.csv', 'w') as out: + for key in time.keys(): + exe, kernel = key + if "nonopcount" in exe and kernel != "total": + opexe = exe.replace("nonopcount", "opcount") + degree = re.match(".*deg([0-9]*).*", exe).group(1) + out.write(" ".join([exe, degree, kernel, str((ops[opexe][kernel] / time[exe][kernel]) / 1e9)]) + "\n") + + +def calculate_doftimes(): + frame = pandas.read_csv('timings.csv', header=None, names=('rank', 'exec', 'kernel', 'what', 'value'), delimiter=' ') + dofs = frame[frame.what == "dofs"] + time = frame[frame.what == "time"] + + dofs = dofs.groupby(('rank', 'exec', 'kernel'))['value'].max().to_frame().reset_index().groupby(('exec', 'kernel'))['value'].max() + time = time.groupby(('rank', 'exec', 'kernel'))['value'].min().to_frame().reset_index().groupby(('exec', 'kernel'))['value'].max() + + with open('doftimes.csv', 'w') as out: + for key in time.keys(): + exe, kernel = key + degree = re.match(".*deg([0-9]*).*", exe).group(1) + if "nonopcount" in exe: + out.write(" ".join([exe, degree, kernel, str(dofs[exe]["dofs"] / time[exe][kernel] / 1e6)]) + "\n") + + +if __name__ == '__main__': + join_csv_files() + calculate_floprate() + calculate_doftimes() diff --git a/cmake/modules/DunePerftoolMacros.cmake b/cmake/modules/DunePerftoolMacros.cmake index 9a08aad70ca40d950dad9aab4cecf141f933bdb7..0b9e63eb51ed57166004e16501fece7411c997ed 100644 --- a/cmake/modules/DunePerftoolMacros.cmake +++ b/cmake/modules/DunePerftoolMacros.cmake @@ -37,6 +37,13 @@ # # Additional arguments as recognized by the form compiler. # +# .. cmake_param:: DEPENDS +# :multi: +# :argname dep: +# +# Additional dependencies of the generated executable (changes in those +# will retrigger generation) +# # Add an executable to the project that gets automatically # generated at configure time with the form compiler uf2pdelab. # Regeneration is triggered correctly if the UFL file or the @@ -64,7 +71,7 @@ file(GLOB_RECURSE UFL2PDELAB_SOURCES ${UFL2PDELAB_GLOB_PATTERN}) function(add_generated_executable) set(OPTIONS) set(SINGLE TARGET OPERATOR DRIVER UFLFILE) - set(MULTI FORM_COMPILER_ARGS) + set(MULTI FORM_COMPILER_ARGS DEPENDS) include(CMakeParseArguments) cmake_parse_arguments(GEN "${OPTIONS}" "${SINGLE}" "${MULTI}" ${ARGN}) @@ -108,7 +115,7 @@ function(add_generated_executable) --driver-file ${GEN_DRIVER} ${GEN_FORM_COMPILER_ARGS} ${GEN_UFLFILE} - DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} + DEPENDS ${GEN_UFLFILE} ${UFL2PDELAB_SOURCES} ${GEN_DEPENDS} COMMENT "Running ufl2pdelab for the target ${GEN_TARGET}" ) diff --git a/cmake/modules/GeneratedSystemtests.cmake b/cmake/modules/GeneratedSystemtests.cmake index 1803b1e508e43188c22ee1b846bd3d42abf0211f..884f80da5e3be6301ebccad8196546e96ea3b71b 100644 --- a/cmake/modules/GeneratedSystemtests.cmake +++ b/cmake/modules/GeneratedSystemtests.cmake @@ -50,6 +50,7 @@ function(dune_add_formcompiler_system_test) add_generated_executable(TARGET ${tname} UFLFILE ${SYSTEMTEST_UFLFILE} FORM_COMPILER_ARGS --ini-file ${inifile} + DEPENDS ${SYSTEMTEST_INIFILE} ) # Exclude the target from all diff --git a/dune/perftool/common/timer.hh b/dune/perftool/common/timer.hh index 8f46c8a5746bf5043ac1dca67976a56da6ff547e..c6d3198f30ada8fa9f079c722791e5b86f0d4c46 100644 --- a/dune/perftool/common/timer.hh +++ b/dune/perftool/common/timer.hh @@ -13,6 +13,8 @@ #define HP_TIMER_OPCOUNTERS(name) __hp_timer_##name##_counters #define HP_TIMER_ELAPSED(name) std::chrono::duration_cast<std::chrono::duration<double> >( HP_TIMER_DURATION(name) ).count() + + #ifdef ENABLE_HP_TIMERS #ifdef ENABLE_COUNTER @@ -38,7 +40,7 @@ #define HP_TIMER_RESET(name) \ do { \ - HP_TIMER_DURATION(name) = std::chrono::high_resolution_clock::duration::zero(); \ + HP_TIMER_DURATION(name) = std::chrono::high_resolution_clock::duration::zero(); \ HP_TIMER_OPCOUNTERS(name).reset(); \ } while (false) @@ -69,14 +71,18 @@ #endif // ENABLE_HP_TIMERS + + #ifdef ENABLE_COUNTER -#define DUMP_TIMER(name,os,reset) \ - os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ +#define DUMP_TIMER(name,os,reset)\ + if (HP_TIMER_ELAPSED(name) > 1e-12) \ + os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ HP_TIMER_OPCOUNTERS(name).reportOperations(os,exec,#name,reset); #define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \ - os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ + if (HP_TIMER_ELAPSED(name) > 1e-12) \ + os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ time += HP_TIMER_ELAPSED(name); \ ops += HP_TIMER_OPCOUNTERS(name); \ HP_TIMER_OPCOUNTERS(name).reportOperations(os,exec,#name,reset); @@ -84,11 +90,13 @@ #elif defined ENABLE_HP_TIMERS #define DUMP_TIMER(name,os,reset) \ - os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ + if (HP_TIMER_ELAPSED(name) > 1e-12) \ + os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ if (reset) HP_TIMER_RESET(name); #define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \ - os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ + if (HP_TIMER_ELAPSED(name) > 1e-12) \ + os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ time += HP_TIMER_ELAPSED(name); \ if (reset) HP_TIMER_RESET(name); diff --git a/python/dune/perftool/compile.py b/python/dune/perftool/compile.py index 1a3f725b4d21cd8d4f8e043082a6ae613d5dbf10..863038390e37322c615de38feb35fe563101174c 100644 --- a/python/dune/perftool/compile.py +++ b/python/dune/perftool/compile.py @@ -31,7 +31,7 @@ def type_guessing(val): for t in [int, float]: try: return t(val) - except TypeError: + except ValueError: pass return val diff --git a/python/dune/perftool/generation/cpp.py b/python/dune/perftool/generation/cpp.py index 5bb21bb092fb30ea8fc108cc00b4fb3e5f79e5ba..f3dba1d4aa907ff52e934372476889f5397d9ba1 100644 --- a/python/dune/perftool/generation/cpp.py +++ b/python/dune/perftool/generation/cpp.py @@ -44,13 +44,10 @@ def constructor_parameter(_type, name): def dump_accumulate_timer(name): from dune.perftool.pdelab.localoperator import (name_time_dumper_os, name_time_dumper_reset, - name_time_dumper_t, - name_time_dumper_counter, name_time_dumper_exec,) os = name_time_dumper_os() - reset = name_time_dumper_reset() - t = name_time_dumper_t() - counter = name_time_dumper_counter() + # reset = name_time_dumper_reset() + reset = 'false' - code = "DUMP_AND_ACCUMULATE_TIMER({},{},{},{},{});".format(name, os, reset, t, counter) + code = "DUMP_TIMER({},{},{});".format(name, os, reset) return code diff --git a/python/dune/perftool/pdelab/driver.py b/python/dune/perftool/pdelab/driver.py index f6a2052de91935f8f26635c13d115a9438d2b5e2..524ce57967e4c4c42f94d08b30c433ba2aab40ed 100644 --- a/python/dune/perftool/pdelab/driver.py +++ b/python/dune/perftool/pdelab/driver.py @@ -1140,21 +1140,42 @@ def name_explicitonestepmethod(): return "eosm" +@preamble +def define_mpihelper(name): + include_file("dune/common/parallel/mpihelper.hh", filetag="driver") + return "Dune::MPIHelper& {} = Dune::MPIHelper::instance(argc, argv);".format(name) + + +def name_mpihelper(): + name = "mpihelper" + define_mpihelper(name) + return name + + @preamble def define_timing_stream(name): include_file('fstream', filetag='driver', system=True) + include_file('sstream', filetag='driver', system=True) - import os - tfile = os.path.join(get_option('project_basedir'), 'timings.csv') - return ["std::ofstream {};".format(name), - "{}.open(\"{}\", std::ios_base::app);".format(name, tfile), + return ["std::stringstream ss;", + "ss << \"{}/timings-rank-\" << {}.rank() << \".csv\";".format(get_option('project_basedir'), name_mpihelper()), + "std::ofstream {};".format(name), + "{}.open(ss.str(), std::ios_base::app);".format(name), ] +@preamble +def dump_dof_numbers(stream): + exe = name_exec() + return "{} << {} << \" dofs dofs \" << {}.size() << std::endl;".format(stream, + exe, + name_gfs(_driver_data['form'].coefficients()[0].ufl_element())) + + def name_timing_stream(): - define_exec() name = "timestream" define_timing_stream(name) + dump_dof_numbers(name) return name @@ -1404,8 +1425,14 @@ def setup_timer(): @preamble -def define_exec(): - return "char* exec = argv[0];" +def define_exec(name): + return "char* {} = argv[0];".format(name) + + +def name_exec(): + name = "exec" + define_exec(name) + return name @preamble @@ -1427,7 +1454,7 @@ def evaluate_residual_timer(): formdatas = get_global_context_value("formdatas") for formdata in formdatas: lop_name = name_localoperator(formdata) - if get_option('intrumentation_level') >= 3: + if get_option('instrumentation_level') >= 3: print_times.append("{}.dump_timers({}, argv[0], true);".format(lop_name, timestream)) if get_option('instrumentation_level') >= 2: @@ -1714,7 +1741,7 @@ def generate_driver(formdatas, data): assert(any(_driver_data['form'].ufl_cell().cellname() in x for x in ["vertex", "interval", "quadrilateral", "hexahedron"])) # In case of operator conunting we only assemble the matrix and evaluate the residual - assemble_matrix_timer() + #assemble_matrix_timer() evaluate_residual_timer() elif is_stationary(): # We could also use solve if we are not interested in visualization diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index 3e389c5a9913ab6d88ca5be00c1732195c88294b..50f4b2a00ca3c7818bca9619cb58808ed2855a92 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -530,14 +530,6 @@ def name_time_dumper_reset(): return "reset" -def name_time_dumper_t(): - return "t" - - -def name_time_dumper_counter(): - return "counter" - - def name_time_dumper_exec(): return "exec" @@ -551,25 +543,15 @@ class TimerMethod(ClassMember): def __init__(self): os = name_time_dumper_os() reset = name_time_dumper_reset() - t = name_time_dumper_t() ex = name_time_dumper_exec() knl = name_example_kernel() assert(knl is not None) content = ["template <typename Stream>", "void dump_timers(Stream& {}, char* {}, bool {})".format(os, ex, reset), - "{", - " double {} = 0.0;".format(t), - "#ifdef ENABLE_COUNTER", - " auto counter = HP_TIMER_OPCOUNTERS({});".format(knl), - " counter.reset();", - "#endif", - ""] + "{"] dump_timers = [i for i in retrieve_cache_items(condition='dump_timers')] content.extend(map(lambda x: ' ' + x, dump_timers)) - content.extend(["#ifdef ENABLE_COUNTERS", - " counter.reportOperations({});".format(os), - "#endif"]) content.append("}") ClassMember.__init__(self, content)