From 171b1962cec9563fad310f868624d610599f123b Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Wed, 24 Jan 2018 13:46:51 +0100
Subject: [PATCH] First draft of a cost function that picks a specific strategy
 by index

---
 python/dune/perftool/options.py               |  4 +-
 python/dune/perftool/sumfact/vectorization.py | 37 +++++++++++++------
 python/setup.py                               |  1 +
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py
index 23733a7a..85efbb66 100644
--- a/python/dune/perftool/options.py
+++ b/python/dune/perftool/options.py
@@ -55,11 +55,13 @@ class PerftoolOptionsArray(ImmutableRecord):
     fastdg = PerftoolOption(default=False, helpstr="Use FastDGGridOperator from PDELab.")
     sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization")
     vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization")
-    vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model")
+    vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model|fromlist")
     vectorization_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization read by the 'explicit' strategy")
     vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy")
     vectorization_padding = PerftoolOption(default=None, helpstr="an explicit value for the allowed padding in vectorization")
     vectorization_allow_quadrature_changes = PerftoolOption(default=False, helpstr="whether the vectorization strategy is allowed to alter quadrature point numbers")
+    vectorization_list_index = PerftoolOption(default=None, helpstr="Which vectorization to pick from a list (only valid with vectorization_strategy=fromlist).")
+    vectorization_divide_and_conquer = PerftoolOption(default=True, helpstr="Whether to find cost model optima in subsets, only turn off when you know exactly what you are doing")
     turn_off_diagonal_jacobian = PerftoolOption(default=False, helpstr="Do not use diagonal_jacobian transformation on the ufl tree and cast result of jacobianInverseTransposed into a FieldMatrix.")
     architecture = PerftoolOption(default="haswell", helpstr="The architecture to optimize for. Possible values: haswell|knl")
     grid_offset = PerftoolOption(default=False, helpstr="Set to true if you want a yasp grid where the lower left corner is not in the origin.")
diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index f3fb96b9..6f5049aa 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -89,6 +89,16 @@ def explicit_costfunction(sf):
         return 1000000000000
 
 
+@backend(interface="vectorization_strategy", name="fromlist")
+def fromlist_costfunction(sf):
+    from dune.perftool.pdelab.signatures import kernel_name
+    count = get_counter(kernel_name())
+    if count == get_option("vectorization_list_index"):
+        return 0.0
+    else:
+        return 1.0
+
+
 def strategy_cost(strategy):
     func = get_backend(interface="vectorization_strategy",
                        selector=lambda: get_option("vectorization_strategy"))
@@ -139,7 +149,7 @@ def decide_vectorization_strategy():
     from dune.perftool.generation import retrieve_cache_items
     all_sumfacts = [i for i in retrieve_cache_items("kernel_default and sumfactnodes")]
 
-    # Stage 1 sumfactorizations that were actually used
+    # Stage 1 sum factorizations that were actually used
     basis_sumfacts = [i for i in retrieve_cache_items('kernel_default and basis_sf_kernels')]
 
     # This means we can have sum factorizations that will not get used
@@ -202,18 +212,23 @@ def fixed_quadrature_optimal_vectorization(sumfacts, width, qp):
     """
     set_quadrature_points(qp)
 
-    # Find the sets of simultaneously realizable kernels (thats an equivalence relation)
-    keys = frozenset(sf.input_key for sf in sumfacts)
+    if get_option("vectorization_divide_and_conquer"):
+        # Find the sets of simultaneously realizable kernels (thats an equivalence relation)
+        keys = frozenset(sf.input_key for sf in sumfacts)
 
-    # Find minimums for each of these sets
-    sfdict = frozendict()
-    for key in keys:
-        key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key)
-        minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp),
-                      key=strategy_cost)
-        sfdict = add_to_frozendict(sfdict, minimum)
+        # Find minimums for each of these sets
+        sfdict = frozendict()
 
-    return sfdict
+        for key in keys:
+            key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key)
+            minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp),
+                          key=strategy_cost)
+            sfdict = add_to_frozendict(sfdict, minimum)
+
+        return sfdict
+    else:
+        return min(fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp),
+                   key=strategy_cost)
 
 
 def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already=frozendict()):
diff --git a/python/setup.py b/python/setup.py
index ccbe9e56..c7e80ce1 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -45,5 +45,6 @@ setup(name='dune.perftool',
       entry_points = {
         "console_scripts": [
             "ufl2pdelab = dune.perftool.compile:compile_form",
+            "picklevecstrats = dune.perftool.sumfact.vectorization:pickle_vectorization_strategies",
         ]
     })
-- 
GitLab