From 49298e64271d40a9050149a6a4917a9ccc03aad8 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Thu, 7 Dec 2017 09:49:42 +0100 Subject: [PATCH] Implement a first draft of a heuristic model --- python/dune/perftool/options.py | 2 +- python/dune/perftool/sumfact/symbolic.py | 4 ++-- python/dune/perftool/sumfact/vectorization.py | 20 +++++++++++++++++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index cdff497b..789f7b86 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -55,7 +55,7 @@ class PerftoolOptionsArray(ImmutableRecord): fastdg = PerftoolOption(default=False, helpstr="Use FastDGGridOperator from PDELab.") sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization") vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization") - vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit") + vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model") vectorization_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization read by the 'explicit' strategy") vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy") vectorization_padding = PerftoolOption(default=None, helpstr="an explicit value for the allowed padding in vectorization") diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 857a1d97..ecffe144 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -333,7 +333,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): def operations(self): """ The total number of floating point operations for the kernel to be carried out """ - from dune.perftool.sumfact.tabulation import flop_cost + from dune.perftool.sumfact.permutation import flop_cost return flop_cost(self.matrix_sequence) @@ -642,5 +642,5 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) def operations(self): """ The total number of floating point operations for the kernel to be carried out """ - from dune.perftool.sumfact.tabulation import flop_cost + from dune.perftool.sumfact.permutation import flop_cost return flop_cost(self.matrix_sequence) diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 4a9fbd1e..0c626519 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -26,6 +26,7 @@ from frozendict import frozendict import itertools as it import loopy as lp import numpy as np +import math @generator_factory(item_tags=("vecinfo", "dryrundata"), cache_key_generator=lambda o, n: o) @@ -51,6 +52,22 @@ def attach_vectorization_info(sf): return _cache_vectorization_info(sf, None) +def position_penalty_factor(sf): + if isinstance(sf, SumfactKernel): + return 1 + else: + return 1 + sum(abs(sf.kernels[i].position_priority - i) if sf.kernels[i].position_priority is not None else 0 for i in range(sf.length)) + + +@backend(interface="vectorization_strategy", name="model") +def costmodel(sf): + # Penalize vertical vectorization + vertical_penalty = 1 + math.log(sf.vertical_width) + + # Return total operations + return sf.operations * position_penalty_factor(sf) * vertical_penalty + + @backend(interface="vectorization_strategy", name="explicit") def explicit_costfunction(sf): # Read the explicitly set values for horizontal and vertical vectorization @@ -66,8 +83,7 @@ def explicit_costfunction(sf): if sf.horizontal_width == horizontal and sf.vertical_width == vertical: # Penalize position mapping - penalty = sum(abs(sf.kernels[i].position_priority - i) if sf.kernels[i].position_priority is not None else 0 for i in range(sf.length)) - return 1 + penalty + return position_penalty_factor(sf) else: return 1000000000000 -- GitLab