diff --git a/python/dune/perftool/pdelab/driver/__init__.py b/python/dune/perftool/pdelab/driver/__init__.py index 48fa457468ae3c2413b6d8add759d7d2645d219f..47603d74453e771b40ac8e55c4a97fdfafad7dbd 100644 --- a/python/dune/perftool/pdelab/driver/__init__.py +++ b/python/dune/perftool/pdelab/driver/__init__.py @@ -43,6 +43,22 @@ def set_driver_data(formdatas, data): _driver_data['data'] = data +def get_dimension(): + return _driver_data['form'].ufl_cell().geometric_dimension() + + +def get_test_element(): + return _driver_data['form'].arguments()[0].ufl_element() + + +def get_trial_element(): + return _driver_data['form'].coefficients()[0].ufl_element() + + +def get_formdata(): + return _driver_data['formdata'] + + def is_stationary(): return 'mass_form' not in _driver_data @@ -169,7 +185,7 @@ def name_initree(): @preamble def define_dimension(name): - return "static const int {} = {};".format(name, _driver_data['form'].ufl_cell().geometric_dimension()) + return "static const int {} = {};".format(name, get_dimension()) def name_dimension(): @@ -292,6 +308,7 @@ def type_domainfield(): @preamble def typedef_range(name): if get_option('opcounter'): + from dune.perftool.pdelab.driver.timings import setup_timer setup_timer() return "using {} = oc::OpCounter<double>;".format(name) else: @@ -599,11 +616,11 @@ def name_gfs(expr): @preamble def define_dofestimate(name): # Provide a worstcase estimate for the number of entries per row based on the given gridfunction space and cell geometry - if isQuadrilateral(_driver_data['form'].coefficients()[0].ufl_element()): + if isQuadrilateral(get_trial_element()): geo_factor = "4" else: geo_factor = "6" - gfs = name_gfs(_driver_data['form'].coefficients()[0].ufl_element()) + gfs = name_gfs(get_trial_element()) ini = name_initree() # Assure that gfs in initialized @@ -668,8 +685,8 @@ def typedef_localoperator(name, formdata): # No Parameter class here, yet # params = type_parameters() # return "using {} = LocalOperator<{}>;".format(name, params) - ugfs = type_gfs(_driver_data['form'].coefficients()[0].ufl_element()) - vgfs = type_gfs(_driver_data['form'].arguments()[0].ufl_element()) + ugfs = type_gfs(get_trial_element()) + vgfs = type_gfs(get_test_element()) from dune.perftool.generation import get_global_context_value data = get_global_context_value("data") filename = get_option("operator_file") @@ -702,11 +719,11 @@ def name_localoperator(formdata): @preamble def typedef_gridoperator(name, formdata): - ugfs = type_gfs(_driver_data['form'].coefficients()[0].ufl_element()) - vgfs = type_gfs(_driver_data['form'].arguments()[0].ufl_element()) + ugfs = type_gfs(get_trial_element()) + vgfs = type_gfs(get_test_element()) lop = type_localoperator(formdata) - ucc = type_constraintscontainer(_driver_data['form'].coefficients()[0].ufl_element()) - vcc = type_constraintscontainer(_driver_data['form'].arguments()[0].ufl_element()) + ucc = type_constraintscontainer(get_trial_element()) + vcc = type_constraintscontainer(get_test_element()) mb = type_matrixbackend() df = type_domainfield() r = type_range() @@ -730,10 +747,10 @@ def type_gridoperator(formdata): def define_gridoperator(name, formdata): gotype = type_gridoperator(formdata) # TODO use formdata insteat of _driver_data object - ugfs = name_gfs(_driver_data['form'].coefficients()[0].ufl_element()) - ucc = name_assembled_constraints(_driver_data['form'].coefficients()[0].ufl_element()) - vgfs = name_gfs(_driver_data['form'].arguments()[0].ufl_element()) - vcc = name_assembled_constraints(_driver_data['form'].arguments()[0].ufl_element()) + ugfs = name_gfs(get_trial_element()) + ucc = name_assembled_constraints(get_trial_element()) + vgfs = name_gfs(get_test_element()) + vcc = name_assembled_constraints(get_test_element()) lop = name_localoperator(formdata) mb = name_matrixbackend() return ["{} {}({}, {}, {}, {}, {}, {});".format(gotype, name, ugfs, ucc, vgfs, vcc, lop, mb), @@ -762,7 +779,7 @@ def type_vector(formdata): @preamble def define_vector(name, formdata): vtype = type_vector(formdata) - gfs = name_gfs(_driver_data['form'].coefficients()[0].ufl_element()) + gfs = name_gfs(get_trial_element()) return ["{} {}({});".format(vtype, name, gfs), "{} = 0.0;".format(name)] @@ -897,7 +914,7 @@ def name_solution_function(tree_path=()): @preamble def interpolate_vector(name, formdata): define_vector(name, formdata) - element = _driver_data['form'].coefficients()[0].ufl_element() + element = get_trial_element() bf = name_boundary_function(element) gfs = name_gfs(element) return "Dune::PDELab::interpolate({}, {}, {});".format(bf, @@ -910,7 +927,7 @@ def interpolate_vector(name, formdata): def interpolate_solution_expression(name): formdata = _driver_data['formdata'] define_vector(name, formdata) - element = _driver_data['form'].coefficients()[0].ufl_element() + element = get_trial_element() sol = name_solution_function() gfs = name_gfs(element) return "Dune::PDELab::interpolate({}, {}, {});".format(sol, @@ -920,7 +937,7 @@ def interpolate_solution_expression(name): def maybe_interpolate_vector(name, formdata): - element = _driver_data['form'].coefficients()[0].ufl_element() + element = get_trial_element() if has_constraints(element): interpolate_vector(name, formdata) else: @@ -1156,35 +1173,6 @@ def name_mpihelper(): return name -@preamble -def define_timing_stream(name): - include_file('fstream', filetag='driver', system=True) - include_file('sstream', filetag='driver', system=True) - include_file('sys/types.h', filetag='driver', system=True) - include_file('unistd.h', filetag='driver', system=True) - - return ["std::stringstream ss;", - "ss << \"{}/timings-rank-\" << {}.rank() << \"-pid-\" << getpid() << \".csv\";".format(get_option('project_basedir'), name_mpihelper()), - "std::ofstream {};".format(name), - "{}.open(ss.str(), std::ios_base::app);".format(name), - ] - - -@preamble -def dump_dof_numbers(stream): - ident = name_timing_identifier() - return "{} << {} << \" dofs dofs \" << {}.size() << std::endl;".format(stream, - ident, - name_gfs(_driver_data['form'].coefficients()[0].ufl_element())) - - -def name_timing_stream(): - name = "timestream" - define_timing_stream(name) - dump_dof_numbers(name) - return name - - @preamble def dune_solve(): # Test if form is linear in ansatzfunction @@ -1217,6 +1205,7 @@ def dune_solve(): solve = "{}.apply();".format(snp) if get_option('instrumentation_level') >= 2: + from dune.perftool.pdelab.driver.timings import setup_timer, name_timing_stream setup_timer() from dune.perftool.generation import post_include post_include("HP_DECLARE_TIMER(solve);", filetag="driver") @@ -1314,7 +1303,7 @@ def name_subgfs(element, tree_path): @preamble def typedef_difference_squared_adapter(name, tree_path): - element = _driver_data['form'].coefficients()[0].ufl_element() + element = get_trial_element() formdata = _driver_data['formdata'] solution_function = name_solution_function(tree_path) @@ -1333,7 +1322,7 @@ def type_difference_squared_adapter(tree_path): @preamble def define_difference_squared_adapter(name, tree_path): - element = _driver_data['form'].coefficients()[0].ufl_element() + element = get_trial_element() formdata = _driver_data['formdata'] t = type_difference_squared_adapter(tree_path) @@ -1372,7 +1361,7 @@ def _accumulate_L2_squared(tree_path): def accumulate_L2_squared(tree_path=()): - element = _driver_data['form'].coefficients()[0].ufl_element() + element = get_trial_element() from ufl.functionview import select_subelement from ufl.classes import MultiIndex, FixedIndex element = select_subelement(element, MultiIndex(tuple(FixedIndex(int(i)) for i in tree_path))) @@ -1418,144 +1407,6 @@ def name_test_fail_variable(): return name -@cached -def setup_timer(): - # Necessary includes and defines - from dune.perftool.generation import pre_include - - # TODO check that we are using YASP? - if get_option('opcounter'): - pre_include("#define ENABLE_COUNTER", filetag="driver") - pre_include("#define ENABLE_HP_TIMERS", filetag="driver") - include_file("dune/perftool/common/timer.hh", filetag="driver") - - -@preamble -def define_timing_identifier(name): - ini = name_initree() - return "auto {} = {}.get<std::string>(\"identifier\", std::string(argv[0]));".format(name, ini) - - -def name_timing_identifier(): - name = "ident" - define_timing_identifier(name) - return name - - -@preamble -def evaluate_residual_timer(): - formdata = _driver_data['formdata'] - n_go = name_gridoperator(formdata) - v = name_vector(formdata) - t_v = type_vector(formdata) - setup_timer() - - if get_option('instrumentation_level') >= 2: - # Write back times - from dune.perftool.generation import post_include - post_include("HP_DECLARE_TIMER(residual_evaluation);", filetag="driver") - timestream = name_timing_stream() - print_times = [] - - from dune.perftool.generation import get_global_context_value - formdatas = get_global_context_value("formdatas") - for formdata in formdatas: - lop_name = name_localoperator(formdata) - if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) - - if get_option('instrumentation_level') >= 2: - evaluation = ["HP_TIMER_START(residual_evaluation);", - "{}.residual({}, r);".format(n_go, v), - "HP_TIMER_STOP(residual_evaluation);", - "DUMP_TIMER(residual_evaluation, {}, true);".format(timestream)] - evaluation.extend(print_times) - else: - evaluation = ["{}.residual({}, r);".format(n_go, v)] - - evaluation = ["{} r({});".format(t_v, v), "r=0.0;"] + evaluation - - return evaluation - - -@preamble -def apply_jacobian_timer(): - # Set the matrix_free option to True! - from dune.perftool.options import set_option - set_option("matrix_free", True) - - formdata = _driver_data['formdata'] - n_go = name_gridoperator(formdata) - v = name_vector(formdata) - t_v = type_vector(formdata) - setup_timer() - - if get_option('instrumentation_level') >= 2: - # Write back times - from dune.perftool.generation import post_include - post_include("HP_DECLARE_TIMER(apply_jacobian);", filetag="driver") - timestream = name_timing_stream() - print_times = [] - - from dune.perftool.generation import get_global_context_value - formdatas = get_global_context_value("formdatas") - for formdata in formdatas: - lop_name = name_localoperator(formdata) - if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) - - if get_option('instrumentation_level') >= 2: - evaluation = ["HP_TIMER_START(apply_jacobian);", - "{}.jacobian_apply({}, j);".format(n_go, v), - "HP_TIMER_STOP(apply_jacobian);", - "DUMP_TIMER(apply_jacobian, {}, true);".format(timestream)] - evaluation.extend(print_times) - else: - evaluation = ["{}.jacobian_apply({}, j);".format(n_go, v)] - - evaluation = ["{} j({});".format(t_v, v), "j=0.0;"] + evaluation - - return evaluation - - -@preamble -def assemble_matrix_timer(): - formdata = _driver_data['formdata'] - t_go = type_gridoperator(formdata) - n_go = name_gridoperator(formdata) - v = name_vector(formdata) - t_v = type_vector(formdata) - setup_timer() - - if get_option('instrumentation_level') >= 2: - # Write back times - from dune.perftool.generation import post_include - post_include("HP_DECLARE_TIMER(matrix_assembly);", filetag="driver") - timestream = name_timing_stream() - print_times = [] - - from dune.perftool.generation import get_global_context_value - formdatas = get_global_context_value("formdatas") - for formdata in formdatas: - lop_name = name_localoperator(formdata) - if get_option('instrumentation_level') >= 3: - print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) - - if get_option('instrumentation_level') >= 2: - assembly = ["HP_TIMER_START(matrix_assembly);", - "{}.jacobian({},m);".format(n_go, v), - "HP_TIMER_STOP(matrix_assembly);", - "DUMP_TIMER(matrix_assembly, {}, true);".format(timestream)] - assembly.extend(print_times) - else: - assembly = ["{}.jacobian({},m);".format(n_go, v)] - - assembly = ["using M = typename {}::Traits::Jacobian;".format(t_go), - "M m({});".format(n_go)] + assembly - - return assembly - - @preamble def print_residual(): ini = name_initree() @@ -1631,7 +1482,7 @@ def name_predicate(): @preamble def vtkoutput(): - element = _driver_data['form'].coefficients()[0].ufl_element() + element = get_trial_element() define_gfs_name(element) include_file("dune/pdelab/gridfunctionspace/vtk.hh", filetag="driver") vtkwriter = name_vtkwriter() @@ -1693,7 +1544,7 @@ def name_vtk_sequence_writer(): def visualize_initial_condition(): include_file("dune/pdelab/gridfunctionspace/vtk.hh", filetag="driver") vtkwriter = name_vtk_sequence_writer() - element = _driver_data['form'].coefficients()[0].ufl_element() + element = get_trial_element() define_gfs_name(element) gfs = name_gfs(element) vector = name_vector(_driver_data['formdata']) @@ -1709,7 +1560,7 @@ def time_loop(): formdata = _driver_data['formdata'] params = name_parameters(formdata) time = name_time() - expr = _driver_data['form'].coefficients()[0].ufl_element() + expr = get_trial_element() bctype = name_bctype_function(expr) gfs = name_gfs(expr) cc = name_constraintscontainer(expr) @@ -1796,6 +1647,7 @@ def generate_driver(formdatas, data): ["vertex", "interval", "quadrilateral", "hexahedron"])) # In case of operator conunting we only assemble the matrix and evaluate the residual # assemble_matrix_timer() + from dune.perftool.pdelab.driver.timings import apply_jacobian_timer, evaluate_residual_timer evaluate_residual_timer() apply_jacobian_timer() elif is_stationary(): @@ -1806,6 +1658,7 @@ def generate_driver(formdatas, data): # Make sure that timestream is declared before retrieving chache items if get_option("instrumentation_level") >= 1: + from dune.perftool.pdelab.driver.timings import setup_timer, name_timing_stream setup_timer() timestream = name_timing_stream() diff --git a/python/dune/perftool/pdelab/driver/timings.py b/python/dune/perftool/pdelab/driver/timings.py index 58345d92a95b71d394b07e08f7b44404b8b43f86..67576153fda54668d2331871c4827e25356d4079 100644 --- a/python/dune/perftool/pdelab/driver/timings.py +++ b/python/dune/perftool/pdelab/driver/timings.py @@ -3,27 +3,14 @@ from dune.perftool.options import get_option, set_option from dune.perftool.generation import (cached, include_file, - pre_include, post_include, preamble, ) -from dune.perftool.pdelab.driver import (get_formdata, - name_initree, - name_mpihelper, - ) -from dune.perftool.pdelab.driver.gridfunctionspace import (name_trial_gfs, - ) -from dune.perftool.pdelab.driver.gridoperator import (name_gridoperator, - name_localoperator, - type_gridoperator, - ) -from dune.perftool.pdelab.driver.solve import (name_vector, - type_vector, - ) @preamble def define_timing_identifier(name): + from dune.perftool.pdelab.driver import name_initree ini = name_initree() return "auto {} = {}.get<std::string>(\"identifier\", std::string(argv[0]));".format(name, ini) @@ -37,13 +24,15 @@ def name_timing_identifier(): @preamble def dump_dof_numbers(stream): ident = name_timing_identifier() + from dune.perftool.pdelab.driver import get_trial_element, name_gfs return "{} << {} << \" dofs dofs \" << {}.size() << std::endl;".format(stream, ident, - name_trial_gfs()) - + name_gfs(get_trial_element())) @preamble def define_timing_stream(name): + from dune.perftool.pdelab.driver import name_mpihelper + include_file('fstream', filetag='driver', system=True) include_file('sstream', filetag='driver', system=True) include_file('sys/types.h', filetag='driver', system=True) @@ -65,6 +54,9 @@ def name_timing_stream(): @cached def setup_timer(): + # Necessary includes and defines + from dune.perftool.generation import pre_include + # TODO check that we are using YASP? if get_option('opcounter'): pre_include("#define ENABLE_COUNTER", filetag="driver") @@ -73,13 +65,24 @@ def setup_timer(): @preamble -def run_lop_timer(name, whattodo, setup): +def evaluate_residual_timer(): + from dune.perftool.pdelab.driver import (get_formdata, + name_gridoperator, + name_localoperator, + name_vector, + type_vector, + ) + + formdata = get_formdata() + n_go = name_gridoperator(formdata) + v = name_vector(formdata) + t_v = type_vector(formdata) setup_timer() if get_option('instrumentation_level') >= 2: # Write back times from dune.perftool.generation import post_include - post_include("HP_DECLARE_TIMER({});".format(name), filetag="driver") + post_include("HP_DECLARE_TIMER(residual_evaluation);", filetag="driver") timestream = name_timing_stream() print_times = [] @@ -91,54 +94,105 @@ def run_lop_timer(name, whattodo, setup): print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) if get_option('instrumentation_level') >= 2: - evaluation = ["HP_TIMER_START({});".format(name), - whattodo, - "HP_TIMER_STOP({});".format(name), - "DUMP_TIMER({}, {}, true);".format(name, timestream)] + evaluation = ["HP_TIMER_START(residual_evaluation);", + "{}.residual({}, r);".format(n_go, v), + "HP_TIMER_STOP(residual_evaluation);", + "DUMP_TIMER(residual_evaluation, {}, true);".format(timestream)] evaluation.extend(print_times) else: - evaluation = [whattodo] + evaluation = ["{}.residual({}, r);".format(n_go, v)] - evaluation = list(setup) + evaluation + evaluation = ["{} r({});".format(t_v, v), "r=0.0;"] + evaluation return evaluation -def evaluate_residual_timer(): - formdata = get_formdata() - n_go = name_gridoperator(formdata) - v = name_vector(formdata) - t_v = type_vector(formdata) - - return run_lop_timer("residual_evaluation", - "{}.residual({}, r);".format(n_go, v), - ("{} r({});".format(t_v, v), "r=0.0;") - ) - - +@preamble def apply_jacobian_timer(): # Set the matrix_free option to True! set_option("matrix_free", True) + from dune.perftool.pdelab.driver import (get_formdata, + name_gridoperator, + name_localoperator, + name_vector, + type_vector, + ) + formdata = get_formdata() n_go = name_gridoperator(formdata) v = name_vector(formdata) t_v = type_vector(formdata) + setup_timer() + + if get_option('instrumentation_level') >= 2: + # Write back times + from dune.perftool.generation import post_include + post_include("HP_DECLARE_TIMER(apply_jacobian);", filetag="driver") + timestream = name_timing_stream() + print_times = [] + + from dune.perftool.generation import get_global_context_value + formdatas = get_global_context_value("formdatas") + for formdata in formdatas: + lop_name = name_localoperator(formdata) + if get_option('instrumentation_level') >= 3: + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) + + if get_option('instrumentation_level') >= 2: + evaluation = ["HP_TIMER_START(apply_jacobian);", + "{}.jacobian_apply({}, j);".format(n_go, v), + "HP_TIMER_STOP(apply_jacobian);", + "DUMP_TIMER(apply_jacobian, {}, true);".format(timestream)] + evaluation.extend(print_times) + else: + evaluation = ["{}.jacobian_apply({}, j);".format(n_go, v)] - return run_lop_timer("apply_jacobian", - "{}.jacobian_apply({}, j);".format(n_go, v), - ("{} j({});".format(t_v, v), "j=0.0;") - ) + evaluation = ["{} j({});".format(t_v, v), "j=0.0;"] + evaluation + return evaluation + +@preamble def assemble_matrix_timer(): + from dune.perftool.pdelab.driver import (get_formdata, + name_gridoperator, + name_localoperator, + name_vector, + type_vector, + ) + formdata = get_formdata() t_go = type_gridoperator(formdata) n_go = name_gridoperator(formdata) v = name_vector(formdata) + t_v = type_vector(formdata) + setup_timer() + + if get_option('instrumentation_level') >= 2: + # Write back times + from dune.perftool.generation import post_include + post_include("HP_DECLARE_TIMER(matrix_assembly);", filetag="driver") + timestream = name_timing_stream() + print_times = [] + + from dune.perftool.generation import get_global_context_value + formdatas = get_global_context_value("formdatas") + for formdata in formdatas: + lop_name = name_localoperator(formdata) + if get_option('instrumentation_level') >= 3: + print_times.append("{}.dump_timers({}, {}, true);".format(lop_name, timestream, name_timing_identifier())) + + if get_option('instrumentation_level') >= 2: + assembly = ["HP_TIMER_START(matrix_assembly);", + "{}.jacobian({},m);".format(n_go, v), + "HP_TIMER_STOP(matrix_assembly);", + "DUMP_TIMER(matrix_assembly, {}, true);".format(timestream)] + assembly.extend(print_times) + else: + assembly = ["{}.jacobian({},m);".format(n_go, v)] + + assembly = ["using M = typename {}::Traits::Jacobian;".format(t_go), + "M m({});".format(n_go)] + assembly - return run_lop_timer("matrix_assembly", - "{}.jacobian({},m);".format(n_go, v), - ("using M = typename {}::Traits::Jacobian;".format(t_go), - "M m({});".format(n_go)) - ) + return assembly