diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index cdff497b2442a817ea1f88ed1fb49cd052cd6063..789f7b86c0dc8fc6f91f81961796f34df120ff5a 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -55,7 +55,7 @@ class PerftoolOptionsArray(ImmutableRecord): fastdg = PerftoolOption(default=False, helpstr="Use FastDGGridOperator from PDELab.") sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization") vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization") - vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit") + vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model") vectorization_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization read by the 'explicit' strategy") vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy") vectorization_padding = PerftoolOption(default=None, helpstr="an explicit value for the allowed padding in vectorization") diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 857a1d9766f71c46956cd822d73b4dbc3ce090e9..ecffe144957d533cb27be7fcc5818a0cf880f6ef 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -333,7 +333,7 @@ class SumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable): def operations(self): """ The total number of floating point operations for the kernel to be carried out """ - from dune.perftool.sumfact.tabulation import flop_cost + from dune.perftool.sumfact.permutation import flop_cost return flop_cost(self.matrix_sequence) @@ -642,5 +642,5 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) def operations(self): """ The total number of floating point operations for the kernel to be carried out """ - from dune.perftool.sumfact.tabulation import flop_cost + from dune.perftool.sumfact.permutation import flop_cost return flop_cost(self.matrix_sequence) diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 4a9fbd1e88bc507d7bc831845811193a180c1194..0c626519dd5ab7c4631d01749440bcb2efa3ca64 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -26,6 +26,7 @@ from frozendict import frozendict import itertools as it import loopy as lp import numpy as np +import math @generator_factory(item_tags=("vecinfo", "dryrundata"), cache_key_generator=lambda o, n: o) @@ -51,6 +52,22 @@ def attach_vectorization_info(sf): return _cache_vectorization_info(sf, None) +def position_penalty_factor(sf): + if isinstance(sf, SumfactKernel): + return 1 + else: + return 1 + sum(abs(sf.kernels[i].position_priority - i) if sf.kernels[i].position_priority is not None else 0 for i in range(sf.length)) + + +@backend(interface="vectorization_strategy", name="model") +def costmodel(sf): + # Penalize vertical vectorization + vertical_penalty = 1 + math.log(sf.vertical_width) + + # Return total operations + return sf.operations * position_penalty_factor(sf) * vertical_penalty + + @backend(interface="vectorization_strategy", name="explicit") def explicit_costfunction(sf): # Read the explicitly set values for horizontal and vertical vectorization @@ -66,8 +83,7 @@ def explicit_costfunction(sf): if sf.horizontal_width == horizontal and sf.vertical_width == vertical: # Penalize position mapping - penalty = sum(abs(sf.kernels[i].position_priority - i) if sf.kernels[i].position_priority is not None else 0 for i in range(sf.length)) - return 1 + penalty + return position_penalty_factor(sf) else: return 1000000000000