diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 79bc6b4fb1059a9fce2897fd447e0933caeb9d52..3cecd9626e808188577fc41e4a266ffd388ebe76 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -69,6 +69,12 @@ def costmodel(sf): return sf.operations * position_penalty_factor(sf) * vertical_penalty * scalar_penalty +@backend(interface="vectorization_strategy", name="fromlist") +def fromlist_costmodel(sf): + # The fromlist strategy needs to reuse the cost model! + return costmodel(sf) + + @backend(interface="vectorization_strategy", name="explicit") def explicit_costfunction(sf): # Read the explicitly set values for horizontal and vertical vectorization @@ -89,16 +95,6 @@ def explicit_costfunction(sf): return 1000000000000 -@backend(interface="vectorization_strategy", name="fromlist") -def fromlist_costfunction(sf): - from dune.perftool.pdelab.signatures import kernel_name - count = get_counter(kernel_name()) - if count == get_option("vectorization_list_index"): - return 0.0 - else: - return 1.0 - - def strategy_cost(strategy): func = get_backend(interface="vectorization_strategy", selector=lambda: get_option("vectorization_strategy")) @@ -186,10 +182,40 @@ def decide_vectorization_strategy(): depth = depth * 2 quad_points = list(set(quad_points)) - # Find the minimum cost strategy between all the quadrature point tuples - optimal_strategies = {qp: fixed_quadrature_optimal_vectorization(active_sumfacts, width, qp) for qp in quad_points} - qp = min(optimal_strategies, key=lambda qp: strategy_cost(optimal_strategies[qp])) - sfdict = optimal_strategies[qp] + if get_option("vectorization_strategy") == "fromlist": + # This is a bit special and does not follow the minimization procedure at all + + # Check that we are using python2 + import sys + if not sys.version_info[0] == 2: + raise PerftoolVectorizationError("fromlist vectorization relies on iteration order and therefore needs python2. Yes, I know this is broken design. It is only used for one plot in the paper though.") + + def _choose_strategy_from_list(stage1_sumfacts): + strategy = 0 + for qp in quad_points: + for strat in fixed_quad_vectorization_opportunity_generator(frozenset(stage1_sumfacts), width, qp): + if strategy == int(get_option("vectorization_list_index")): + return qp, strat + print("\n".join(stringify_vectorization_strategy((qp, strat))) + "\n") + strategy = strategy + 1 + + raise PerftoolVectorizationError("Specified vectorization list index '{}' was too high!".format(get_option("vectorization_list_index"))) + + s1_sumfacts = frozenset(sf for sf in active_sumfacts if sf.stage == 1) + qp, sfdict = _choose_strategy_from_list(s1_sumfacts) + + keys = frozenset(sf.input_key for sf in active_sumfacts if sf.stage != 1) + for key in keys: + key_sumfacts = frozenset(sf for sf in active_sumfacts if sf.input_key == key) + minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp), + key=strategy_cost) + sfdict = add_to_frozendict(sfdict, minimum) + else: + # Find the minimum cost strategy between all the quadrature point tuples + optimal_strategies = {qp: fixed_quadrature_optimal_vectorization(active_sumfacts, width, qp) for qp in quad_points} + qp = min(optimal_strategies, key=lambda qp: strategy_cost(optimal_strategies[qp])) + sfdict = optimal_strategies[qp] + set_quadrature_points(qp) logger.debug("decide_vectorization_strategy: Decided for the following strategy:" @@ -212,23 +238,19 @@ def fixed_quadrature_optimal_vectorization(sumfacts, width, qp): """ set_quadrature_points(qp) - if get_option("vectorization_divide_and_conquer"): - # Find the sets of simultaneously realizable kernels (thats an equivalence relation) - keys = frozenset(sf.input_key for sf in sumfacts) + # Find the sets of simultaneously realizable kernels (thats an equivalence relation) + keys = frozenset(sf.input_key for sf in sumfacts) - # Find minimums for each of these sets - sfdict = frozendict() + # Find minimums for each of these sets + sfdict = frozendict() - for key in keys: - key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key) - minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp), - key=strategy_cost) - sfdict = add_to_frozendict(sfdict, minimum) + for key in keys: + key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key) + minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp), + key=strategy_cost) + sfdict = add_to_frozendict(sfdict, minimum) - return sfdict - else: - return min(fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp), - key=strategy_cost) + return sfdict def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already=frozendict()):