From 49298e64271d40a9050149a6a4917a9ccc03aad8 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Thu, 7 Dec 2017 09:49:42 +0100
Subject: [PATCH] Implement a first draft of a heuristic model

---
 python/dune/perftool/options.py               |  2 +-
 python/dune/perftool/sumfact/symbolic.py      |  4 ++--
 python/dune/perftool/sumfact/vectorization.py | 20 +++++++++++++++++--
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py
index cdff497b..789f7b86 100644
--- a/python/dune/perftool/options.py
+++ b/python/dune/perftool/options.py
@@ -55,7 +55,7 @@ class PerftoolOptionsArray(ImmutableRecord):
     fastdg = PerftoolOption(default=False, helpstr="Use FastDGGridOperator from PDELab.")
     sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization")
     vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization")
-    vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit")
+    vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model")
     vectorization_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization read by the 'explicit' strategy")
     vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy")
     vectorization_padding = PerftoolOption(default=None, helpstr="an explicit value for the allowed padding in vectorization")
diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py
index 857a1d97..ecffe144 100644
--- a/python/dune/perftool/sumfact/symbolic.py
+++ b/python/dune/perftool/sumfact/symbolic.py
@@ -333,7 +333,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable):
     def operations(self):
         """ The total number of floating point operations for the kernel
         to be carried out """
-        from dune.perftool.sumfact.tabulation import flop_cost
+        from dune.perftool.sumfact.permutation import flop_cost
         return flop_cost(self.matrix_sequence)
 
 
@@ -642,5 +642,5 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
     def operations(self):
         """ The total number of floating point operations for the kernel
         to be carried out """
-        from dune.perftool.sumfact.tabulation import flop_cost
+        from dune.perftool.sumfact.permutation import flop_cost
         return flop_cost(self.matrix_sequence)
diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index 4a9fbd1e..0c626519 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -26,6 +26,7 @@ from frozendict import frozendict
 import itertools as it
 import loopy as lp
 import numpy as np
+import math
 
 
 @generator_factory(item_tags=("vecinfo", "dryrundata"), cache_key_generator=lambda o, n: o)
@@ -51,6 +52,22 @@ def attach_vectorization_info(sf):
         return _cache_vectorization_info(sf, None)
 
 
+def position_penalty_factor(sf):
+    if isinstance(sf, SumfactKernel):
+        return 1
+    else:
+        return 1 + sum(abs(sf.kernels[i].position_priority - i) if sf.kernels[i].position_priority is not None else 0 for i in range(sf.length))
+
+
+@backend(interface="vectorization_strategy", name="model")
+def costmodel(sf):
+    # Penalize vertical vectorization
+    vertical_penalty = 1 + math.log(sf.vertical_width)
+
+    # Return total operations
+    return sf.operations * position_penalty_factor(sf) * vertical_penalty
+
+
 @backend(interface="vectorization_strategy", name="explicit")
 def explicit_costfunction(sf):
     # Read the explicitly set values for horizontal and vertical vectorization
@@ -66,8 +83,7 @@ def explicit_costfunction(sf):
 
     if sf.horizontal_width == horizontal and sf.vertical_width == vertical:
         # Penalize position mapping
-        penalty = sum(abs(sf.kernels[i].position_priority - i) if sf.kernels[i].position_priority is not None else 0 for i in range(sf.length))
-        return 1 + penalty
+        return position_penalty_factor(sf)
     else:
         return 1000000000000
 
-- 
GitLab