diff --git a/applications/convection_diffusion/CMakeLists.txt b/applications/convection_diffusion/CMakeLists.txt index 8cbf0d4463491995042fc66aecae4e1f240dc537..3195449f15ab6f7cc7e87ad44b9e24bcbe06387b 100644 --- a/applications/convection_diffusion/CMakeLists.txt +++ b/applications/convection_diffusion/CMakeLists.txt @@ -3,5 +3,3 @@ dune_add_formcompiler_system_test(UFLFILE conv_diff_dg.ufl INIFILE conv_diff_dg.mini NO_TESTS ) - -dune_symlink_to_source_files(FILES donkey.sbatch) diff --git a/applications/convection_diffusion/conv_diff_dg.mini b/applications/convection_diffusion/conv_diff_dg.mini index 7476dd5e0913c620236371bfeb378d9e92ef2a9b..1234e47ef5475d892cf620adf87ffff4e4ce8b57 100644 --- a/applications/convection_diffusion/conv_diff_dg.mini +++ b/applications/convection_diffusion/conv_diff_dg.mini @@ -1,7 +1,8 @@ __name = app_conv_diff_{__exec_suffix} -__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix} +__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{formcompiler.instrumentation_level} opcount_suffix = opcount, nonopcount | expand opcount +{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude # Calculate the size of the grid to equlibritate it to 100 MB/rank # Input parameters @@ -26,6 +27,9 @@ extension = 1.0 | repeat {dim} cells = {firstdircells} {otherdircells} partitioning = {ranks} {ones} +# Set up the timing identifier +identifier = convdiff_deg{formcompiler.ufl_variants.degree} + [wrapper.vtkcompare] name = {__name} extension = vtu @@ -35,7 +39,7 @@ fastdg = 1 sumfact = 1 vectorize_quad = 1 vectorize_grads = 1 -instrumentation_level = 2 +instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount exact_solution_expression = g diff --git a/applications/convection_diffusion/donkey.sbatch b/applications/convection_diffusion/donkey.sbatch deleted file mode 100755 index 9b9a9749658a49191634a2362114d37bae384c0d..0000000000000000000000000000000000000000 --- a/applications/convection_diffusion/donkey.sbatch +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -# Load modules -ml gcc/6.2 -ml intelmpi -ml openblas -ml metis -ml suitesparse - -# Set a name for the job -#SBATCH -J conv_diff - -# Number of processes -#SBATCH -n 16 - -# Choose the SLURM partition (sinfo for overview) -#SBATCH -p haswell16c - -# Each process needs two PUs: circumvent hyperthreading -#SBATCH -c 2 - -# Pin processes to cores -# (Possible values: socket, core) -SRUNOPT="--cpu_bind=verbose,core" - -# Run the opcount executables -srun $SRUNOPT ./app_conv_diff_deg2_opcount app_conv_diff_deg2_opcount.ini -srun $SRUNOPT ./app_conv_diff_deg3_opcount app_conv_diff_deg3_opcount.ini -srun $SRUNOPT ./app_conv_diff_deg4_opcount app_conv_diff_deg4_opcount.ini -srun $SRUNOPT ./app_conv_diff_deg5_opcount app_conv_diff_deg5_opcount.ini -srun $SRUNOPT ./app_conv_diff_deg6_opcount app_conv_diff_deg6_opcount.ini -srun $SRUNOPT ./app_conv_diff_deg7_opcount app_conv_diff_deg7_opcount.ini -srun $SRUNOPT ./app_conv_diff_deg8_opcount app_conv_diff_deg8_opcount.ini -srun $SRUNOPT ./app_conv_diff_deg9_opcount app_conv_diff_deg9_opcount.ini -srun $SRUNOPT ./app_conv_diff_deg10_opcount app_conv_diff_deg10_opcount.ini - -# Run the timing executables -COUNT=0 -while [ $COUNT -lt 10 ]; do - srun $SRUNOPT ./app_conv_diff_deg2_nonopcount app_conv_diff_deg2_nonopcount.ini - srun $SRUNOPT ./app_conv_diff_deg3_nonopcount app_conv_diff_deg3_nonopcount.ini - srun $SRUNOPT ./app_conv_diff_deg4_nonopcount app_conv_diff_deg4_nonopcount.ini - srun $SRUNOPT ./app_conv_diff_deg5_nonopcount app_conv_diff_deg5_nonopcount.ini - srun $SRUNOPT ./app_conv_diff_deg6_nonopcount app_conv_diff_deg6_nonopcount.ini - srun $SRUNOPT ./app_conv_diff_deg7_nonopcount app_conv_diff_deg7_nonopcount.ini - srun $SRUNOPT ./app_conv_diff_deg8_nonopcount app_conv_diff_deg8_nonopcount.ini - srun $SRUNOPT ./app_conv_diff_deg9_nonopcount app_conv_diff_deg9_nonopcount.ini - srun $SRUNOPT ./app_conv_diff_deg10_nonopcount app_conv_diff_deg10_nonopcount.ini - COUNT=$((COUNT + 1)) -done diff --git a/applications/poisson_dg/CMakeLists.txt b/applications/poisson_dg/CMakeLists.txt index a31cf6202f47491d864bb3f4a0126858747331a0..9bb13626e9b948e987c98188a1c9e61c1e0385e0 100644 --- a/applications/poisson_dg/CMakeLists.txt +++ b/applications/poisson_dg/CMakeLists.txt @@ -3,5 +3,3 @@ dune_add_formcompiler_system_test(UFLFILE poisson_dg.ufl INIFILE poisson_dg.mini NO_TESTS ) - -dune_symlink_to_source_files(FILES donkey.sbatch) diff --git a/applications/poisson_dg/donkey.sbatch b/applications/poisson_dg/donkey.sbatch deleted file mode 100755 index 075bb5c85f5cca6574c40fdd469201967bcf90e3..0000000000000000000000000000000000000000 --- a/applications/poisson_dg/donkey.sbatch +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -# Load modules -ml gcc/6.2 -ml intelmpi -ml openblas -ml metis -ml suitesparse - -# Set a name for the job -#SBATCH -J poisson_dg - -# Number of processes -#SBATCH -n 16 - -# Choose the SLURM partition (sinfo for overview) -#SBATCH -p haswell16c - -# Each process needs two PUs: circumvent hyperthreading -#SBATCH -c 2 - -# Pin processes to cores -# (Possible values: socket, core) -SRUNOPT="--cpu_bind=verbose,core" - -# Run the opcount executables -srun $SRUNOPT ./app_poisson_dg_deg2_opcount app_poisson_dg_3d_deg2_opcount.ini -srun $SRUNOPT ./app_poisson_dg_deg3_opcount app_poisson_dg_3d_deg3_opcount.ini -srun $SRUNOPT ./app_poisson_dg_deg4_opcount app_poisson_dg_3d_deg4_opcount.ini -srun $SRUNOPT ./app_poisson_dg_deg5_opcount app_poisson_dg_3d_deg5_opcount.ini -srun $SRUNOPT ./app_poisson_dg_deg6_opcount app_poisson_dg_3d_deg6_opcount.ini -srun $SRUNOPT ./app_poisson_dg_deg7_opcount app_poisson_dg_3d_deg7_opcount.ini -srun $SRUNOPT ./app_poisson_dg_deg8_opcount app_poisson_dg_3d_deg8_opcount.ini -srun $SRUNOPT ./app_poisson_dg_deg9_opcount app_poisson_dg_3d_deg9_opcount.ini -srun $SRUNOPT ./app_poisson_dg_deg10_opcount app_poisson_dg_3d_deg10_opcount.ini - -# Run the timing executables -COUNT=0 -while [ $COUNT -lt 10 ]; do - srun $SRUNOPT ./app_poisson_dg_deg2_nonopcount app_poisson_dg_3d_deg2_nonopcount.ini - srun $SRUNOPT ./app_poisson_dg_deg3_nonopcount app_poisson_dg_3d_deg3_nonopcount.ini - srun $SRUNOPT ./app_poisson_dg_deg4_nonopcount app_poisson_dg_3d_deg4_nonopcount.ini - srun $SRUNOPT ./app_poisson_dg_deg5_nonopcount app_poisson_dg_3d_deg5_nonopcount.ini - srun $SRUNOPT ./app_poisson_dg_deg6_nonopcount app_poisson_dg_3d_deg6_nonopcount.ini - srun $SRUNOPT ./app_poisson_dg_deg7_nonopcount app_poisson_dg_3d_deg7_nonopcount.ini - srun $SRUNOPT ./app_poisson_dg_deg8_nonopcount app_poisson_dg_3d_deg8_nonopcount.ini - srun $SRUNOPT ./app_poisson_dg_deg9_nonopcount app_poisson_dg_3d_deg9_nonopcount.ini - srun $SRUNOPT ./app_poisson_dg_deg10_nonopcount app_poisson_dg_3d_deg10_nonopcount.ini - COUNT=$((COUNT + 1)) -done diff --git a/applications/poisson_dg/poisson_dg.mini b/applications/poisson_dg/poisson_dg.mini index a8aac4c2a2cef789f5380010c3cfd1fa6111500a..51385f8d970c73435e76cd4f9f8f712b27cd7d4a 100644 --- a/applications/poisson_dg/poisson_dg.mini +++ b/applications/poisson_dg/poisson_dg.mini @@ -1,7 +1,8 @@ -__name = app_poisson_dg_{dim}d_{__exec_suffix} -__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix} +__name = app_poisson_dg_{__exec_suffix} +__exec_suffix = deg{formcompiler.ufl_variants.degree}_{opcount_suffix}_level{formcompiler.instrumentation_level} opcount_suffix = opcount, nonopcount | expand opcount +{opcount_suffix} == opcount and {formcompiler.instrumentation_level} != 4 | exclude # Calculate the size of the grid to equlibritate it to 100 MB/rank # Input parameters @@ -21,6 +22,9 @@ dimminusone = {dim} - 1 | eval ones = 1 | repeat {dimminusone} otherdircells = {cellsperdir} | repeat {dimminusone} +# Set up the timing identifier +identifier = poisson_dg_deg{formcompiler.ufl_variants.degree} + # Setup the grid! extension = 1.0 | repeat {dim} cells = {firstdircells} {otherdircells} @@ -35,7 +39,7 @@ fastdg = 1 sumfact = 1 vectorize_quad = 1 vectorize_grads = 1 -instrumentation_level = 2 +instrumentation_level = 2, 3, 4 | expand opcounter = 1, 0 | expand opcount time_opcounter = 0, 1 | expand opcount diff --git a/bin/donkey.sbatch b/bin/donkey.sbatch new file mode 100755 index 0000000000000000000000000000000000000000..b8091947f51c4d160b07f66c357d4132f8d4f99a --- /dev/null +++ b/bin/donkey.sbatch @@ -0,0 +1,47 @@ +#!/bin/bash + +# IMPORTANT +# Remember to set the working directory of this script through +# sbatch -D <workdir> + +# Load modules +ml gcc/6.2 +ml intelmpi +ml openblas +ml metis +ml suitesparse + +# Set a name for the job +#SBATCH -J poisson_dg + +# Number of processes +#SBATCH -n 16 + +# Choose the SLURM partition (sinfo for overview) +#SBATCH -p haswell16c + +# Each process needs two PUs: circumvent hyperthreading +#SBATCH -c 2 + +# Pin processes to cores +# (Possible values: socket, core) +SRUNOPT="--cpu_bind=verbose,core" + +# Search for runnable executables +FILES=$(ls *.ini) +for inifile in $FILES +do + line=$(grep ^"opcounter = " $inifile) + extract=${line##opcounter = } + UPPER=10 + if [ $extract -eq 1 ] + then + UPPER=1 + fi + COUNT=0 + while [ $COUNT -lt $UPPER ]; do + exec=${inifile%.ini} + srun $SRUNOPT ./$exec $inifile + COUNT=$((COUNT + 1)) + done +done diff --git a/bin/process_measurements.py b/bin/process_measurements.py index 945687f99f58a673693b763544d5b69eaae2e059..2790a32e4a1c3578fc088476e25e1114dce3b8cc 100755 --- a/bin/process_measurements.py +++ b/bin/process_measurements.py @@ -15,36 +15,33 @@ def join_csv_files(): def calculate_floprate(): - frame = pandas.read_csv('timings.csv', header=None, names=('rank', 'exec', 'kernel', 'what', 'value'), delimiter=' ') + frame = pandas.read_csv('timings.csv', header=None, names=('rank', 'ident', 'kernel', 'what', 'value'), delimiter=' ') time = frame[frame.what == "time"] ops = frame[frame.what != "time"] - time = time.groupby(('rank', 'exec', 'kernel'))['value'].min().to_frame().reset_index().groupby(('exec', 'kernel'))['value'].max() - ops = ops.groupby(('rank', 'exec', 'kernel'))['value'].max().to_frame().reset_index().groupby(('exec', 'kernel'))['value'].max() + time = time.groupby(('rank', 'ident', 'kernel'))['value'].min().to_frame().reset_index().groupby(('ident', 'kernel'))['value'].max() + ops = ops.groupby(('rank', 'ident', 'kernel'))['value'].max().to_frame().reset_index().groupby(('ident', 'kernel'))['value'].max() with open('floprates.csv', 'w') as out: for key in time.keys(): - exe, kernel = key - if "nonopcount" in exe and kernel != "total": - opexe = exe.replace("nonopcount", "opcount") - degree = re.match(".*deg([0-9]*).*", exe).group(1) - out.write(" ".join([exe, degree, kernel, str((ops[opexe][kernel] / time[exe][kernel]) / 1e9)]) + "\n") + ident, kernel = key + degree = re.match(".*deg([0-9]*).*", ident).group(1) + out.write(" ".join([ident, degree, kernel, str((ops[ident][kernel] / time[ident][kernel]) / 1e9)]) + "\n") def calculate_doftimes(): - frame = pandas.read_csv('timings.csv', header=None, names=('rank', 'exec', 'kernel', 'what', 'value'), delimiter=' ') + frame = pandas.read_csv('timings.csv', header=None, names=('rank', 'ident', 'kernel', 'what', 'value'), delimiter=' ') dofs = frame[frame.what == "dofs"] time = frame[frame.what == "time"] - dofs = dofs.groupby(('rank', 'exec', 'kernel'))['value'].max().to_frame().reset_index().groupby(('exec', 'kernel'))['value'].max() - time = time.groupby(('rank', 'exec', 'kernel'))['value'].min().to_frame().reset_index().groupby(('exec', 'kernel'))['value'].max() + dofs = dofs.groupby(('rank', 'ident', 'kernel'))['value'].max().to_frame().reset_index().groupby(('ident', 'kernel'))['value'].max() + time = time.groupby(('rank', 'ident', 'kernel'))['value'].min().to_frame().reset_index().groupby(('ident', 'kernel'))['value'].max() with open('doftimes.csv', 'w') as out: for key in time.keys(): - exe, kernel = key - degree = re.match(".*deg([0-9]*).*", exe).group(1) - if "nonopcount" in exe: - out.write(" ".join([exe, degree, kernel, str(dofs[exe]["dofs"] / time[exe][kernel] / 1e6)]) + "\n") + ident, kernel = key + degree = re.match(".*deg([0-9]*).*", ident).group(1) + out.write(" ".join([ident, degree, kernel, str(dofs[ident]["dofs"] / time[ident][kernel] / 1e6)]) + "\n") if __name__ == '__main__': diff --git a/dune/perftool/common/timer.hh b/dune/perftool/common/timer.hh index c6d3198f30ada8fa9f079c722791e5b86f0d4c46..d7d208adf20b716511d184b6b499cc7230edc76a 100644 --- a/dune/perftool/common/timer.hh +++ b/dune/perftool/common/timer.hh @@ -77,26 +77,26 @@ #define DUMP_TIMER(name,os,reset)\ if (HP_TIMER_ELAPSED(name) > 1e-12) \ - os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ - HP_TIMER_OPCOUNTERS(name).reportOperations(os,exec,#name,reset); + os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ + HP_TIMER_OPCOUNTERS(name).reportOperations(os,ident,#name,reset); #define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \ if (HP_TIMER_ELAPSED(name) > 1e-12) \ - os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ + os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ time += HP_TIMER_ELAPSED(name); \ ops += HP_TIMER_OPCOUNTERS(name); \ - HP_TIMER_OPCOUNTERS(name).reportOperations(os,exec,#name,reset); + HP_TIMER_OPCOUNTERS(name).reportOperations(os,ident,#name,reset); #elif defined ENABLE_HP_TIMERS #define DUMP_TIMER(name,os,reset) \ if (HP_TIMER_ELAPSED(name) > 1e-12) \ - os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ + os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ if (reset) HP_TIMER_RESET(name); #define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \ if (HP_TIMER_ELAPSED(name) > 1e-12) \ - os << exec << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ + os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \ time += HP_TIMER_ELAPSED(name); \ if (reset) HP_TIMER_RESET(name); diff --git a/python/dune/perftool/generation/cpp.py b/python/dune/perftool/generation/cpp.py index f3dba1d4aa907ff52e934372476889f5397d9ba1..57dd91c850779284f875df204cfc21414f9ae592 100644 --- a/python/dune/perftool/generation/cpp.py +++ b/python/dune/perftool/generation/cpp.py @@ -42,9 +42,7 @@ def constructor_parameter(_type, name): @generator_factory(item_tags=("dump_timers",)) def dump_accumulate_timer(name): - from dune.perftool.pdelab.localoperator import (name_time_dumper_os, - name_time_dumper_reset, - name_time_dumper_exec,) + from dune.perftool.pdelab.localoperator import name_time_dumper_os os = name_time_dumper_os() # reset = name_time_dumper_reset() reset = 'false' diff --git a/python/dune/perftool/pdelab/driver.py b/python/dune/perftool/pdelab/driver.py index 26de3b67de909ee8156d33aefafcaff3727c718d..22e56115cc9c4fad34eecd3c951d7d15989cd452 100644 --- a/python/dune/perftool/pdelab/driver.py +++ b/python/dune/perftool/pdelab/driver.py @@ -1166,9 +1166,9 @@ def define_timing_stream(name): @preamble def dump_dof_numbers(stream): - exe = name_exec() + ident = name_timing_identifier() return "{} << {} << \" dofs dofs \" << {}.size() << std::endl;".format(stream, - exe, + ident, name_gfs(_driver_data['form'].coefficients()[0].ufl_element())) @@ -1222,7 +1222,7 @@ def dune_solve(): for formdata in formdatas: lop_name = name_localoperator(formdata) timestream = name_timing_stream() - print_times.append("{}.dump_timers({}, argv[0], true);".format(lop_name, timestream)) + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) solve = ["HP_TIMER_START(solve);", "{}".format(solve), @@ -1425,13 +1425,14 @@ def setup_timer(): @preamble -def define_exec(name): - return "char* {} = argv[0];".format(name) +def define_timing_identifier(name): + ini = name_initree() + return "auto {} = {}.get<std::string>(\"identifier\", std::string(argv[0])).c_str();".format(name, ini) -def name_exec(): - name = "exec" - define_exec(name) +def name_timing_identifier(): + name = "ident" + define_timing_identifier(name) return name @@ -1455,7 +1456,7 @@ def evaluate_residual_timer(): for formdata in formdatas: lop_name = name_localoperator(formdata) if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, argv[0], true);".format(lop_name, timestream)) + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) if get_option('instrumentation_level') >= 2: evaluation = ["HP_TIMER_START(residual_evaluation);", @@ -1495,7 +1496,7 @@ def apply_jacobian_timer(): for formdata in formdatas: lop_name = name_localoperator(formdata) if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, argv[0], true);".format(lop_name, timestream)) + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) if get_option('instrumentation_level') >= 2: evaluation = ["HP_TIMER_START(apply_jacobian);", @@ -1532,7 +1533,7 @@ def assemble_matrix_timer(): for formdata in formdatas: lop_name = name_localoperator(formdata) if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, argv[0], true);".format(lop_name, timestream)) + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) if get_option('instrumentation_level') >= 2: assembly = ["HP_TIMER_START(matrix_assembly);", diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index f6ffbec814cd02ef5053ffc2659209535b3d61bc..09a12e772d1c73803c36aca32d1447a934e4e353 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -530,8 +530,8 @@ def name_time_dumper_reset(): return "reset" -def name_time_dumper_exec(): - return "exec" +def name_time_dumper_ident(): + return "ident" @generator_factory(item_tags=("cached",), cache_key_generator=lambda **kw: None) @@ -543,12 +543,12 @@ class TimerMethod(ClassMember): def __init__(self): os = name_time_dumper_os() reset = name_time_dumper_reset() - ex = name_time_dumper_exec() + ident = name_time_dumper_ident() knl = name_example_kernel() assert(knl is not None) content = ["template <typename Stream>", - "void dump_timers(Stream& {}, char* {}, bool {})".format(os, ex, reset), + "void dump_timers(Stream& {}, const char* {}, bool {})".format(os, ident, reset), "{"] dump_timers = [i for i in retrieve_cache_items(condition='dump_timers')] content.extend(map(lambda x: ' ' + x, dump_timers))