From ce8f3cc42836db53ee957d8ce2212fdba13001f4 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Mon, 27 Aug 2018 14:16:42 +0200
Subject: [PATCH] Small adjustments to prevent stupid stuff

---
 python/dune/perftool/sumfact/vectorization.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index 0b6c9494..bf463446 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -57,13 +57,17 @@ def costmodel(sf):
     # Penalize vertical vectorization
     vertical_penalty = 1 + math.log(sf.vertical_width)
 
+    memory_penalty = 1.0
+    if isinstance(sf, VectorizedSumfactKernel):
+        memory_penalty = 1.0 + math.log(len(set(k.interface for k in sf.kernels)), 2)
+
     # Penalize scalar sum factorization kernels
     scalar_penalty = 1
     if isinstance(sf, SumfactKernel):
         scalar_penalty = get_vcl_type_size(dtype_floatingpoint())
 
     # Return total operations
-    return sf.operations * vertical_penalty * scalar_penalty
+    return sf.operations * vertical_penalty * memory_penalty * scalar_penalty
 
 
 def explicit_costfunction(sf):
@@ -367,7 +371,7 @@ def _level2_optimal_vectorization_strategy_generator(sumfacts, width, qp, alread
 
 def get_vectorization_dict(sumfacts, vertical, horizontal, qp):
     # Discard opportunities that do not contain enough horizontal kernels
-    if len(sumfacts) not in (horizontal, horizontal - 1):
+    if len(sumfacts) not in (horizontal, horizontal * vertical - 1):
         return None
 
     # Enhance the list of sumfact nodes by adding vertical splittings
-- 
GitLab