diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py index 23733a7a4031fb389cc6359d7d209ffa5cb9ca18..85efbb6630d537e2d10c3849c10562763a072bd4 100644 --- a/python/dune/perftool/options.py +++ b/python/dune/perftool/options.py @@ -55,11 +55,13 @@ class PerftoolOptionsArray(ImmutableRecord): fastdg = PerftoolOption(default=False, helpstr="Use FastDGGridOperator from PDELab.") sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization") vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization") - vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model") + vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model|fromlist") vectorization_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization read by the 'explicit' strategy") vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy") vectorization_padding = PerftoolOption(default=None, helpstr="an explicit value for the allowed padding in vectorization") vectorization_allow_quadrature_changes = PerftoolOption(default=False, helpstr="whether the vectorization strategy is allowed to alter quadrature point numbers") + vectorization_list_index = PerftoolOption(default=None, helpstr="Which vectorization to pick from a list (only valid with vectorization_strategy=fromlist).") + vectorization_divide_and_conquer = PerftoolOption(default=True, helpstr="Whether to find cost model optima in subsets, only turn off when you know exactly what you are doing") turn_off_diagonal_jacobian = PerftoolOption(default=False, helpstr="Do not use diagonal_jacobian transformation on the ufl tree and cast result of jacobianInverseTransposed into a FieldMatrix.") architecture = PerftoolOption(default="haswell", helpstr="The architecture to optimize for. Possible values: haswell|knl") grid_offset = PerftoolOption(default=False, helpstr="Set to true if you want a yasp grid where the lower left corner is not in the origin.") diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index f3fb96b99b0d7cb0a633e6d499f6e362db5da207..6f5049aa5a3a8eb875d4a5cf1d666864c9b206c2 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -89,6 +89,16 @@ def explicit_costfunction(sf): return 1000000000000 +@backend(interface="vectorization_strategy", name="fromlist") +def fromlist_costfunction(sf): + from dune.perftool.pdelab.signatures import kernel_name + count = get_counter(kernel_name()) + if count == get_option("vectorization_list_index"): + return 0.0 + else: + return 1.0 + + def strategy_cost(strategy): func = get_backend(interface="vectorization_strategy", selector=lambda: get_option("vectorization_strategy")) @@ -139,7 +149,7 @@ def decide_vectorization_strategy(): from dune.perftool.generation import retrieve_cache_items all_sumfacts = [i for i in retrieve_cache_items("kernel_default and sumfactnodes")] - # Stage 1 sumfactorizations that were actually used + # Stage 1 sum factorizations that were actually used basis_sumfacts = [i for i in retrieve_cache_items('kernel_default and basis_sf_kernels')] # This means we can have sum factorizations that will not get used @@ -202,18 +212,23 @@ def fixed_quadrature_optimal_vectorization(sumfacts, width, qp): """ set_quadrature_points(qp) - # Find the sets of simultaneously realizable kernels (thats an equivalence relation) - keys = frozenset(sf.input_key for sf in sumfacts) + if get_option("vectorization_divide_and_conquer"): + # Find the sets of simultaneously realizable kernels (thats an equivalence relation) + keys = frozenset(sf.input_key for sf in sumfacts) - # Find minimums for each of these sets - sfdict = frozendict() - for key in keys: - key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key) - minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp), - key=strategy_cost) - sfdict = add_to_frozendict(sfdict, minimum) + # Find minimums for each of these sets + sfdict = frozendict() - return sfdict + for key in keys: + key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key) + minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp), + key=strategy_cost) + sfdict = add_to_frozendict(sfdict, minimum) + + return sfdict + else: + return min(fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp), + key=strategy_cost) def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already=frozendict()): diff --git a/python/setup.py b/python/setup.py index ccbe9e56658fa87a0c574767f6c7f4282612a750..c7e80ce1b0de4a2a561ee1a1a4d2189ec0df0d0c 100644 --- a/python/setup.py +++ b/python/setup.py @@ -45,5 +45,6 @@ setup(name='dune.perftool', entry_points = { "console_scripts": [ "ufl2pdelab = dune.perftool.compile:compile_form", + "picklevecstrats = dune.perftool.sumfact.vectorization:pickle_vectorization_strategies", ] })