From b0c6c6dbac234ef8726f6f9c5f8a93d15944933a Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Mon, 12 Feb 2018 14:39:36 +0100
Subject: [PATCH] Rename optimization functions to levelx_...

We will need even more "levels" in the near future
---
 python/dune/perftool/sumfact/vectorization.py | 125 ++++++++++--------
 1 file changed, 68 insertions(+), 57 deletions(-)

diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index 690a1c0b..95286389 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -172,59 +172,55 @@ def decide_vectorization_strategy():
     logger.debug("decide_vectorization_strategy: Found {} active sum factorization nodes"
                  .format(len(active_sumfacts)))
 
-    # Find the best vectorization strategy by using a costmodel
-    width = get_vcl_type_size(dtype_floatingpoint())
-
     #
-    # Optimize over all the possible quadrature point tuples
+    # Find the best vectorization strategy by using a costmodel
     #
-    quad_points = [quadrature_points_per_direction()]
-    if get_form_option("vectorization_allow_quadrature_changes"):
-        sf = next(iter(active_sumfacts))
-        depth = 1
-        while depth <= width:
-            i = 0 if sf.matrix_sequence[0].face is None else 1
-            quad = list(quadrature_points_per_direction())
-            quad[i] = round_to_multiple(quad[i], depth)
-            quad_points.append(tuple(quad))
-            depth = depth * 2
-        quad_points = list(set(quad_points))
-
-    if get_form_option("vectorization_strategy") == "fromlist":
-        # This is a bit special and does not follow the minimization procedure at all
-
-        def _choose_strategy_from_list(stage1_sumfacts):
-            strategy = 0
-            for qp in quad_points:
-                for strat in fixed_quad_vectorization_opportunity_generator(frozenset(stage1_sumfacts), width, qp):
-                    if strategy == int(get_form_option("vectorization_list_index")):
-                        # Output the strategy and its cost into a separate file
-                        if get_global_context_value("form_type") == "jacobian_apply":
-                            with open("strategycosts.csv", "a") as f:
-                                f.write("{} {}\n".format(strategy, strategy_cost((qp, strat))))
-                        return qp, strat
-                    strategy = strategy + 1
-
-            raise PerftoolVectorizationError("Specified vectorization list index '{}' was too high!".format(get_form_option("vectorization_list_index")))
-
-        s1_sumfacts = frozenset(sf for sf in active_sumfacts if sf.stage == 1)
-
-        total = sum(len([s for s in fixed_quad_vectorization_opportunity_generator(frozenset(s1_sumfacts), width, qp)]) for qp in quad_points)
-        print("'fromlist' vectorization is attempting to pick #{} of {} strategies...".format(int(get_form_option("vectorization_list_index")),
-                                                                                              total))
-        qp, sfdict = _choose_strategy_from_list(s1_sumfacts)
-
-        keys = frozenset(sf.input_key for sf in active_sumfacts if sf.stage != 1)
-        for key in keys:
-            key_sumfacts = frozenset(sf for sf in active_sumfacts if sf.input_key == key)
-            minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp),
-                          key=fixedqp_strategy_costfunction(qp))
-            sfdict = add_to_frozendict(sfdict, minimum)
-    else:
-        # Find the minimum cost strategy between all the quadrature point tuples
-        optimal_strategies = {qp: fixed_quadrature_optimal_vectorization(active_sumfacts, width, qp) for qp in quad_points}
-        qp = min(optimal_strategies, key=lambda qp: strategy_cost((qp, optimal_strategies[qp])))
-        sfdict = optimal_strategies[qp]
+    # Note that this optimization procedure uses a hierarchic approach to bypass
+    # the problems of unfavorable complexity of the set of all possible vectorization
+    # opportunities. Optimizations are performed at different levels (you find these
+    # levels in the function names implementing them), where optimal solutions at a
+    # higher level are combined into lower level solutions or optima of optimal solutions
+    # at higher level are calculated:
+    # * Level 1: Finding an optimal quadrature tuple (by finding optimum of level 2 optima)
+    # * Level 2: Split by parallelizability and combine optima into optimal solution
+    # * Level 3: Optimize number of different inputs to consider
+    # * Level 4: Optimize horizontal/vertical/hybrid strategy
+    width = get_vcl_type_size(dtype_floatingpoint())
+    qp, sfdict = level1_optimal_vectorization_strategy(active_sumfacts, width)
+
+
+#   TODO: Check how the 'fromlist' generator fits into the new overall picture
+#
+#     if get_form_option("vectorization_strategy") == "fromlist":
+#         # This is a bit special and does not follow the minimization procedure at all
+#
+#         def _choose_strategy_from_list(stage1_sumfacts):
+#             strategy = 0
+#             for qp in quad_points:
+#                 for strat in fixed_quad_vectorization_opportunity_generator(frozenset(stage1_sumfacts), width, qp):
+#                     if strategy == int(get_form_option("vectorization_list_index")):
+#                         # Output the strategy and its cost into a separate file
+#                         if get_global_context_value("form_type") == "jacobian_apply":
+#                             with open("strategycosts.csv", "a") as f:
+#                                 f.write("{} {}\n".format(strategy, strategy_cost((qp, strat))))
+#                         return qp, strat
+#                     strategy = strategy + 1
+#
+#             raise PerftoolVectorizationError("Specified vectorization list index '{}' was too high!".format(get_form_option("vectorization_list_index")))
+#
+#         s1_sumfacts = frozenset(sf for sf in active_sumfacts if sf.stage == 1)
+#
+#         total = sum(len([s for s in fixed_quad_vectorization_opportunity_generator(frozenset(s1_sumfacts), width, qp)]) for qp in quad_points)
+#         print("'fromlist' vectorization is attempting to pick #{} of {} strategies...".format(int(get_form_option("vectorization_list_index")),
+#                                                                                               total))
+#         qp, sfdict = _choose_strategy_from_list(s1_sumfacts)
+#
+#         keys = frozenset(sf.input_key for sf in active_sumfacts if sf.stage != 1)
+#         for key in keys:
+#             key_sumfacts = frozenset(sf for sf in active_sumfacts if sf.input_key == key)
+#             minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp),
+#                           key=fixedqp_strategy_costfunction(qp))
+#             sfdict = add_to_frozendict(sfdict, minimum)
 
     set_quadrature_points(qp)
 
@@ -239,13 +235,28 @@ def decide_vectorization_strategy():
         _cache_vectorization_info(sf, sfdict[sf])
 
 
-def fixed_quadrature_optimal_vectorization(sumfacts, width, qp):
-    """ For a given quadrature point tuple, find the optimal strategy!
+def level1_optimal_vectorization_strategy(sumfacts, width):
+    # Gather a list of possible quadrature point tuples
+    quad_points = [quadrature_points_per_direction()]
+    if get_form_option("vectorization_allow_quadrature_changes"):
+        sf = next(iter(sumfacts))
+        depth = 1
+        while depth <= width:
+            i = 0 if sf.matrix_sequence[0].face is None else 1
+            quad = list(quadrature_points_per_direction())
+            quad[i] = round_to_multiple(quad[i], depth)
+            quad_points.append(tuple(quad))
+            depth = depth * 2
+        quad_points = list(set(quad_points))
 
-    In order to have this scale sufficiently, we cannot simply list all vectorization
-    opportunities and score them individually, but we need to do a divide and conquer
-    approach.
-    """
+    # Find the minimum cost strategy between all the quadrature point tuples
+    optimal_strategies = {qp: level2_optimal_vectorization_strategy(sumfacts, width, qp) for qp in quad_points}
+    qp = min(optimal_strategies, key=lambda qp: strategy_cost((qp, optimal_strategies[qp])))
+
+    return qp, optimal_strategies[qp]
+
+
+def level2_optimal_vectorization_strategy(sumfacts, width, qp):
     # Find the sets of simultaneously realizable kernels (thats an equivalence relation)
     keys = frozenset(sf.input_key for sf in sumfacts)
 
-- 
GitLab