diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index a28b2bc5beb7b7cb6ac5317797a0122be10bd214..9d4ccb3f5da38ac1d83bd5d7b63f7c7a1059c99d 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -82,7 +82,7 @@ class PerftoolFormOptionsArray(ImmutableRecord): sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization") sumfact_regular_jacobians = PerftoolOption(default=False, helpstr="Generate non sum-factorized jacobians (only useful if sumfact is set)") vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization") - vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model") + vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model|target") vectorization_not_fully_vectorized_error = PerftoolOption(default=False, helpstr="throw an error if nonquadloop vectorization did not fully vectorize") vectorization_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization read by the 'explicit' strategy") vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy") @@ -90,6 +90,7 @@ class PerftoolFormOptionsArray(ImmutableRecord): vectorization_allow_quadrature_changes = PerftoolOption(default=False, helpstr="whether the vectorization strategy is allowed to alter quadrature point numbers") vectorization_list_index = PerftoolOption(default=None, helpstr="Which vectorization to pick from a list (only valid with vectorization_strategy=fromlist).") vectorization_jacobians = PerftoolOption(default=True, helpstr="Whether to attempt to vectorize jacobians (takes time, often not needed)") + vectorization_target = PerftoolOption(_type=float, helpstr="The cost function target for the 'target' cost model. Only needed to verify the cost model itself, do not use light-heartedly!!!") simplify = PerftoolOption(default=False, helpstr="Whether to simplify expressions using sympy") generate_jacobians = PerftoolOption(default=True, helpstr="Whether jacobian_* methods should be generated. This is set to false automatically, when numerical_jacobian is set to true.") generate_residuals = PerftoolOption(default=True, helpstr="Whether alpha_* methods should be generated.") diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 3e79419dcb97777c86a130deb3f0b82b84d4c950..62293c2999e15eef4e059bd9f9bd9021a31af889 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -19,7 +19,7 @@ from dune.perftool.sumfact.tabulation import (BasisTabulationMatrixArray, set_quadrature_points, ) from dune.perftool.error import PerftoolVectorizationError -from dune.perftool.options import get_form_option +from dune.perftool.options import get_form_option, set_form_option from dune.perftool.tools import add_to_frozendict, round_to_multiple, list_diff from pytools import product @@ -51,7 +51,6 @@ def attach_vectorization_info(sf): return _cache_vectorization_info(sf, None) -@backend(interface="vectorization_strategy", name="model") def costmodel(sf): # Penalize vertical vectorization vertical_penalty = 1 + math.log(sf.vertical_width) @@ -65,7 +64,6 @@ def costmodel(sf): return sf.operations * vertical_penalty * scalar_penalty -@backend(interface="vectorization_strategy", name="explicit") def explicit_costfunction(sf): # Read the explicitly set values for horizontal and vertical vectorization width = get_vcl_type_size(dtype_floatingpoint()) @@ -85,12 +83,35 @@ def explicit_costfunction(sf): return 1000000000000 +_global_cost_for_target = 0.0 +_subset_cost_for_target = 0.0 + + +def target_costfunction(sf): + target = float(get_form_option("vectorization_target")) + realcost = costmodel(sf) + val = abs(realcost - (_subset_cost_for_target / _global_cost_for_target) * target) + print(val) + return val + + def strategy_cost(strat_tuple): qp, strategy = strat_tuple - func = get_backend(interface="vectorization_strategy", - selector=lambda: get_form_option("vectorization_strategy")) + + # Choose the correct cost function + s = get_form_option("vectorization_strategy") + if s == "model": + func = costmodel + elif s == "explicit": + func = explicit_costfunction + elif s == "target": + func = target_costfunction + else: + raise NotImplementedError("Vectorization strategy '{}' unknown!".format(s)) + keys = set(sf.cache_key for sf in strategy.values()) - set_quadrature_points(qp) + if qp is not None: + set_quadrature_points(qp) # Sum over all the sum factorization kernels in the realization score = 0.0 @@ -192,6 +213,15 @@ def decide_vectorization_strategy(): def level1_optimal_vectorization_strategy(sumfacts, width): + # If this uses the 'target' cost model, we need to do an expensive setup step: + # We switch to the 'model' implementation and find a minimum. This way we learn + # about the total cost needed to weigh costs of subsets of sum factorization kernels. + if get_form_option("vectorization_strategy") == "target": + set_form_option("vectorization_strategy", "model") + global _global_cost_for_target + _global_cost_for_target = strategy_cost(level1_optimal_vectorization_strategy(sumfacts, width)) + set_form_option("vectorization_strategy", "target") + # Gather a list of possible quadrature point tuples quad_points = [quadrature_points_per_direction()] if get_form_option("vectorization_allow_quadrature_changes"): @@ -209,6 +239,14 @@ def level1_optimal_vectorization_strategy(sumfacts, width): optimal_strategies = {qp: level2_optimal_vectorization_strategy(sumfacts, width, qp) for qp in quad_points} qp = min(optimal_strategies, key=lambda qp: strategy_cost((qp, optimal_strategies[qp]))) + # If we are using the 'target' strategy, we might want to log some information. + if get_form_option("vectorization_strategy") == "target": + set_form_option("vectorization_strategy", "model") + cost = strategy_cost((qp, optimal_strategies[qp])) + print("The target cost was: {}".format(get_form_option("vectorization_target"))) + print("The achieved cost was: {}".format(cost)) + set_form_option("vectorization_strategy", "target") + return qp, optimal_strategies[qp] @@ -221,6 +259,20 @@ def level2_optimal_vectorization_strategy(sumfacts, width, qp): for key in keys: key_sumfacts = frozenset(sf for sf in sumfacts if sf.parallel_key == key) + + # If this uses the 'target' cost model, we need to find out how the score of + # the normal cost model for the given subset of sum factorization kernels would + # be. This way we get a percentage of the total target, which should be spent in + # this subset. + if get_form_option("vectorization_strategy") == "target": + set_form_option("vectorization_strategy", "model") + global _subset_cost_for_target + minimum = min(level2_optimal_vectorization_strategy_generator(key_sumfacts, width, qp), + key=fixedqp_strategy_costfunction(qp)) + _subset_cost_for_target = strategy_cost((qp, minimum)) + set_form_option("vectorization_strategy", "target") + + # Minimize over all the opportunities for the subset given by the current key key_strategy = min(level2_optimal_vectorization_strategy_generator(key_sumfacts, width, qp), key=fixedqp_strategy_costfunction(qp)) sfdict = add_to_frozendict(sfdict, key_strategy)