From 171b1962cec9563fad310f868624d610599f123b Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Wed, 24 Jan 2018 13:46:51 +0100 Subject: [PATCH] First draft of a cost function that picks a specific strategy by index --- python/dune/perftool/options.py | 4 +- python/dune/perftool/sumfact/vectorization.py | 37 +++++++++++++------ python/setup.py | 1 + 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index 23733a7a..85efbb66 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -55,11 +55,13 @@ class PerftoolOptionsArray(ImmutableRecord): fastdg = PerftoolOption(default=False, helpstr="Use FastDGGridOperator from PDELab.") sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization") vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization") - vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model") + vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model|fromlist") vectorization_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization read by the 'explicit' strategy") vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy") vectorization_padding = PerftoolOption(default=None, helpstr="an explicit value for the allowed padding in vectorization") vectorization_allow_quadrature_changes = PerftoolOption(default=False, helpstr="whether the vectorization strategy is allowed to alter quadrature point numbers") + vectorization_list_index = PerftoolOption(default=None, helpstr="Which vectorization to pick from a list (only valid with vectorization_strategy=fromlist).") + vectorization_divide_and_conquer = PerftoolOption(default=True, helpstr="Whether to find cost model optima in subsets, only turn off when you know exactly what you are doing") turn_off_diagonal_jacobian = PerftoolOption(default=False, helpstr="Do not use diagonal_jacobian transformation on the ufl tree and cast result of jacobianInverseTransposed into a FieldMatrix.") architecture = PerftoolOption(default="haswell", helpstr="The architecture to optimize for. Possible values: haswell|knl") grid_offset = PerftoolOption(default=False, helpstr="Set to true if you want a yasp grid where the lower left corner is not in the origin.") diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index f3fb96b9..6f5049aa 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -89,6 +89,16 @@ def explicit_costfunction(sf): return 1000000000000 +@backend(interface="vectorization_strategy", name="fromlist") +def fromlist_costfunction(sf): + from dune.perftool.pdelab.signatures import kernel_name + count = get_counter(kernel_name()) + if count == get_option("vectorization_list_index"): + return 0.0 + else: + return 1.0 + + def strategy_cost(strategy): func = get_backend(interface="vectorization_strategy", selector=lambda: get_option("vectorization_strategy")) @@ -139,7 +149,7 @@ def decide_vectorization_strategy(): from dune.perftool.generation import retrieve_cache_items all_sumfacts = [i for i in retrieve_cache_items("kernel_default and sumfactnodes")] - # Stage 1 sumfactorizations that were actually used + # Stage 1 sum factorizations that were actually used basis_sumfacts = [i for i in retrieve_cache_items('kernel_default and basis_sf_kernels')] # This means we can have sum factorizations that will not get used @@ -202,18 +212,23 @@ def fixed_quadrature_optimal_vectorization(sumfacts, width, qp): """ set_quadrature_points(qp) - # Find the sets of simultaneously realizable kernels (thats an equivalence relation) - keys = frozenset(sf.input_key for sf in sumfacts) + if get_option("vectorization_divide_and_conquer"): + # Find the sets of simultaneously realizable kernels (thats an equivalence relation) + keys = frozenset(sf.input_key for sf in sumfacts) - # Find minimums for each of these sets - sfdict = frozendict() - for key in keys: - key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key) - minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp), - key=strategy_cost) - sfdict = add_to_frozendict(sfdict, minimum) + # Find minimums for each of these sets + sfdict = frozendict() - return sfdict + for key in keys: + key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key) + minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp), + key=strategy_cost) + sfdict = add_to_frozendict(sfdict, minimum) + + return sfdict + else: + return min(fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp), + key=strategy_cost) def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already=frozendict()): diff --git a/python/setup.py b/python/setup.py index ccbe9e56..c7e80ce1 100644 --- a/python/setup.py +++ b/python/setup.py @@ -45,5 +45,6 @@ setup(name='dune.perftool', entry_points = { "console_scripts": [ "ufl2pdelab = dune.perftool.compile:compile_form", + "picklevecstrats = dune.perftool.sumfact.vectorization:pickle_vectorization_strategies", ] }) -- GitLab