diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 62293c2999e15eef4e059bd9f9bd9021a31af889..d6cc0740480e7eb82dd955ca97d0cc6923a8d7a2 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -1,5 +1,7 @@ """ Sum factorization vectorization """ +from __future__ import division + import logging from dune.perftool.loopy.target import dtype_floatingpoint @@ -19,7 +21,7 @@ from dune.perftool.sumfact.tabulation import (BasisTabulationMatrixArray, set_quadrature_points, ) from dune.perftool.error import PerftoolVectorizationError -from dune.perftool.options import get_form_option, set_form_option +from dune.perftool.options import get_form_option, get_option, set_form_option from dune.perftool.tools import add_to_frozendict, round_to_multiple, list_diff from pytools import product @@ -83,16 +85,14 @@ def explicit_costfunction(sf): return 1000000000000 -_global_cost_for_target = 0.0 -_subset_cost_for_target = 0.0 +_global_kernel_amount = 0 def target_costfunction(sf): target = float(get_form_option("vectorization_target")) realcost = costmodel(sf) - val = abs(realcost - (_subset_cost_for_target / _global_cost_for_target) * target) - print(val) - return val + ratio = sf.horizontal_width / _global_kernel_amount + return abs(realcost - ratio * target) def strategy_cost(strat_tuple): @@ -213,14 +213,12 @@ def decide_vectorization_strategy(): def level1_optimal_vectorization_strategy(sumfacts, width): - # If this uses the 'target' cost model, we need to do an expensive setup step: - # We switch to the 'model' implementation and find a minimum. This way we learn - # about the total cost needed to weigh costs of subsets of sum factorization kernels. + # If this uses the 'target' cost model, we need to store information on how many + # sum factorization kernels need to be implemented. This will be used to correctly + # weight the cost target in the cost function. if get_form_option("vectorization_strategy") == "target": - set_form_option("vectorization_strategy", "model") - global _global_cost_for_target - _global_cost_for_target = strategy_cost(level1_optimal_vectorization_strategy(sumfacts, width)) - set_form_option("vectorization_strategy", "target") + global _global_kernel_amount + _global_kernel_amount = len(sumfacts) # Gather a list of possible quadrature point tuples quad_points = [quadrature_points_per_direction()] @@ -245,7 +243,12 @@ def level1_optimal_vectorization_strategy(sumfacts, width): cost = strategy_cost((qp, optimal_strategies[qp])) print("The target cost was: {}".format(get_form_option("vectorization_target"))) print("The achieved cost was: {}".format(cost)) + from os.path import join + filename = join(get_option("project_basedir"), "targetstrat_{}".format(int(float(get_form_option("vectorization_target"))))) + with open(filename, 'w') as f: + f.write("\n".join(stringify_vectorization_strategy((qp, optimal_strategies[qp])))) set_form_option("vectorization_strategy", "target") + print("The score in 'target' logic was: {}".format(strategy_cost((qp, optimal_strategies[qp])))) return qp, optimal_strategies[qp] @@ -260,18 +263,6 @@ def level2_optimal_vectorization_strategy(sumfacts, width, qp): for key in keys: key_sumfacts = frozenset(sf for sf in sumfacts if sf.parallel_key == key) - # If this uses the 'target' cost model, we need to find out how the score of - # the normal cost model for the given subset of sum factorization kernels would - # be. This way we get a percentage of the total target, which should be spent in - # this subset. - if get_form_option("vectorization_strategy") == "target": - set_form_option("vectorization_strategy", "model") - global _subset_cost_for_target - minimum = min(level2_optimal_vectorization_strategy_generator(key_sumfacts, width, qp), - key=fixedqp_strategy_costfunction(qp)) - _subset_cost_for_target = strategy_cost((qp, minimum)) - set_form_option("vectorization_strategy", "target") - # Minimize over all the opportunities for the subset given by the current key key_strategy = min(level2_optimal_vectorization_strategy_generator(key_sumfacts, width, qp), key=fixedqp_strategy_costfunction(qp))