From ce8f3cc42836db53ee957d8ce2212fdba13001f4 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Mon, 27 Aug 2018 14:16:42 +0200 Subject: [PATCH] Small adjustments to prevent stupid stuff --- python/dune/perftool/sumfact/vectorization.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 0b6c9494..bf463446 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -57,13 +57,17 @@ def costmodel(sf): # Penalize vertical vectorization vertical_penalty = 1 + math.log(sf.vertical_width) + memory_penalty = 1.0 + if isinstance(sf, VectorizedSumfactKernel): + memory_penalty = 1.0 + math.log(len(set(k.interface for k in sf.kernels)), 2) + # Penalize scalar sum factorization kernels scalar_penalty = 1 if isinstance(sf, SumfactKernel): scalar_penalty = get_vcl_type_size(dtype_floatingpoint()) # Return total operations - return sf.operations * vertical_penalty * scalar_penalty + return sf.operations * vertical_penalty * memory_penalty * scalar_penalty def explicit_costfunction(sf): @@ -367,7 +371,7 @@ def _level2_optimal_vectorization_strategy_generator(sumfacts, width, qp, alread def get_vectorization_dict(sumfacts, vertical, horizontal, qp): # Discard opportunities that do not contain enough horizontal kernels - if len(sumfacts) not in (horizontal, horizontal - 1): + if len(sumfacts) not in (horizontal, horizontal * vertical - 1): return None # Enhance the list of sumfact nodes by adding vertical splittings -- GitLab