Skip to content
Snippets Groups Projects
Commit c761cd04 authored by René Heß's avatar René Heß
Browse files

[!262] Costmodel Validation

Merge branch 'feature/costmodel-validation' into 'master'

The MR adds a new vectorization strategy target, which tries to find a
vectorization strategy whose cost (according to the model strategy) is as
close as possible to a given target. This allows to run several executables
and plot the cost model value against performance data. The actual plotting
work is done in dune-perftool-paperplots. The feature is very niche, and I
tried to minimize the occurence of code paths only necessary for this, but I
think the outcome is quite okay.

See merge request [dominic/dune-perftool!262]

  [dominic/dune-perftool!262]: gitlab.dune-project.org/dominic/dune-perftool/merge_requests/262
parents 6b82c38c e3ef02ad
No related branches found
No related tags found
No related merge requests found
...@@ -82,7 +82,7 @@ class PerftoolFormOptionsArray(ImmutableRecord): ...@@ -82,7 +82,7 @@ class PerftoolFormOptionsArray(ImmutableRecord):
sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization") sumfact = PerftoolOption(default=False, helpstr="Use sumfactorization")
sumfact_regular_jacobians = PerftoolOption(default=False, helpstr="Generate non sum-factorized jacobians (only useful if sumfact is set)") sumfact_regular_jacobians = PerftoolOption(default=False, helpstr="Generate non sum-factorized jacobians (only useful if sumfact is set)")
vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization") vectorization_quadloop = PerftoolOption(default=False, helpstr="whether to generate code with explicit vectorization")
vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model") vectorization_strategy = PerftoolOption(default="none", helpstr="The identifier of the vectorization cost model. Possible values: none|explicit|model|target")
vectorization_not_fully_vectorized_error = PerftoolOption(default=False, helpstr="throw an error if nonquadloop vectorization did not fully vectorize") vectorization_not_fully_vectorized_error = PerftoolOption(default=False, helpstr="throw an error if nonquadloop vectorization did not fully vectorize")
vectorization_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization read by the 'explicit' strategy") vectorization_horizontal = PerftoolOption(default=None, helpstr="an explicit value for horizontal vectorization read by the 'explicit' strategy")
vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy") vectorization_vertical = PerftoolOption(default=None, helpstr="an explicit value for vertical vectorization read by the 'explicit' strategy")
...@@ -90,6 +90,7 @@ class PerftoolFormOptionsArray(ImmutableRecord): ...@@ -90,6 +90,7 @@ class PerftoolFormOptionsArray(ImmutableRecord):
vectorization_allow_quadrature_changes = PerftoolOption(default=False, helpstr="whether the vectorization strategy is allowed to alter quadrature point numbers") vectorization_allow_quadrature_changes = PerftoolOption(default=False, helpstr="whether the vectorization strategy is allowed to alter quadrature point numbers")
vectorization_list_index = PerftoolOption(default=None, helpstr="Which vectorization to pick from a list (only valid with vectorization_strategy=fromlist).") vectorization_list_index = PerftoolOption(default=None, helpstr="Which vectorization to pick from a list (only valid with vectorization_strategy=fromlist).")
vectorization_jacobians = PerftoolOption(default=True, helpstr="Whether to attempt to vectorize jacobians (takes time, often not needed)") vectorization_jacobians = PerftoolOption(default=True, helpstr="Whether to attempt to vectorize jacobians (takes time, often not needed)")
vectorization_target = PerftoolOption(_type=float, helpstr="The cost function target for the 'target' cost model. Only needed to verify the cost model itself, do not use light-heartedly!!!")
simplify = PerftoolOption(default=False, helpstr="Whether to simplify expressions using sympy") simplify = PerftoolOption(default=False, helpstr="Whether to simplify expressions using sympy")
generate_jacobians = PerftoolOption(default=True, helpstr="Whether jacobian_* methods should be generated. This is set to false automatically, when numerical_jacobian is set to true.") generate_jacobians = PerftoolOption(default=True, helpstr="Whether jacobian_* methods should be generated. This is set to false automatically, when numerical_jacobian is set to true.")
generate_residuals = PerftoolOption(default=True, helpstr="Whether alpha_* methods should be generated.") generate_residuals = PerftoolOption(default=True, helpstr="Whether alpha_* methods should be generated.")
......
""" Sum factorization vectorization """ """ Sum factorization vectorization """
from __future__ import division
import logging import logging
from dune.perftool.loopy.target import dtype_floatingpoint from dune.perftool.loopy.target import dtype_floatingpoint
...@@ -19,7 +21,7 @@ from dune.perftool.sumfact.tabulation import (BasisTabulationMatrixArray, ...@@ -19,7 +21,7 @@ from dune.perftool.sumfact.tabulation import (BasisTabulationMatrixArray,
set_quadrature_points, set_quadrature_points,
) )
from dune.perftool.error import PerftoolVectorizationError from dune.perftool.error import PerftoolVectorizationError
from dune.perftool.options import get_form_option from dune.perftool.options import get_form_option, get_option, set_form_option
from dune.perftool.tools import add_to_frozendict, round_to_multiple, list_diff from dune.perftool.tools import add_to_frozendict, round_to_multiple, list_diff
from pytools import product from pytools import product
...@@ -51,21 +53,21 @@ def attach_vectorization_info(sf): ...@@ -51,21 +53,21 @@ def attach_vectorization_info(sf):
return _cache_vectorization_info(sf, None) return _cache_vectorization_info(sf, None)
@backend(interface="vectorization_strategy", name="model")
def costmodel(sf): def costmodel(sf):
# Penalize vertical vectorization # Penalize vertical vectorization and scalar execution
vertical_penalty = 1 + math.log(sf.vertical_width) verticality = sf.vertical_width
# Penalize scalar sum factorization kernels
scalar_penalty = 1
if isinstance(sf, SumfactKernel): if isinstance(sf, SumfactKernel):
scalar_penalty = get_vcl_type_size(dtype_floatingpoint()) verticality = get_vcl_type_size(dtype_floatingpoint())
vertical_penalty = 1 + 0.5 * math.log(verticality, 2)
memory_penalty = 1.0
if isinstance(sf, VectorizedSumfactKernel):
memory_penalty = 1.0 + 0.25 * math.log(len(set(k.interface for k in sf.kernels)), 2)
# Return total operations # Return total operations
return sf.operations * vertical_penalty * scalar_penalty return sf.operations * vertical_penalty * memory_penalty
@backend(interface="vectorization_strategy", name="explicit")
def explicit_costfunction(sf): def explicit_costfunction(sf):
# Read the explicitly set values for horizontal and vertical vectorization # Read the explicitly set values for horizontal and vertical vectorization
width = get_vcl_type_size(dtype_floatingpoint()) width = get_vcl_type_size(dtype_floatingpoint())
...@@ -85,10 +87,32 @@ def explicit_costfunction(sf): ...@@ -85,10 +87,32 @@ def explicit_costfunction(sf):
return 1000000000000 return 1000000000000
def target_costfunction(sf):
# The cost of a kernel is given by the difference to the desired target cost.
# Pitfall: The target cost needs to be weighed to account for this being called
# on subsets and not on a full vectorization strategy!
_, all_sf, _ = filter_active_inactive_sumfacts()
total = len(all_sf)
target = float(get_form_option("vectorization_target"))
realcost = costmodel(sf)
ratio = sf.horizontal_width / total
return abs(realcost - ratio * target)
def strategy_cost(strat_tuple): def strategy_cost(strat_tuple):
qp, strategy = strat_tuple qp, strategy = strat_tuple
func = get_backend(interface="vectorization_strategy",
selector=lambda: get_form_option("vectorization_strategy")) # Choose the correct cost function
s = get_form_option("vectorization_strategy")
if s == "model":
func = costmodel
elif s == "explicit":
func = explicit_costfunction
elif s == "target":
func = target_costfunction
else:
raise NotImplementedError("Vectorization strategy '{}' unknown!".format(s))
keys = set(sf.cache_key for sf in strategy.values()) keys = set(sf.cache_key for sf in strategy.values())
set_quadrature_points(qp) set_quadrature_points(qp)
...@@ -133,13 +157,33 @@ def stringify_vectorization_strategy(strategy): ...@@ -133,13 +157,33 @@ def stringify_vectorization_strategy(strategy):
return result return result
def decide_vectorization_strategy(): def short_stringify_vectorization_strategy(strategy):
""" Decide how to vectorize! """ A short string decribing the vectorization strategy. This is used
Note that the vectorization of the quadrature loop is independent of this, in costmodel validation plots to describe what a data point does
as it is implemented through a post-processing (== loopy transformation) step.
""" """
logger = logging.getLogger(__name__) qp, strategy = strategy
def _short(k):
if isinstance(k, VectorizedSumfactKernel):
return str(k.horizontal_width)
else:
return "scalar"
stage1 = []
stage3 = []
keys = set(sf.cache_key for sf in strategy.values())
for kernel in strategy.values():
if kernel.cache_key in keys:
keys.discard(kernel.cache_key)
if kernel.stage == 1:
stage1.append(_short(kernel))
if kernel.stage == 3:
stage3.append(_short(kernel))
return "m0={};S1:{};S3:{}".format(qp[0], "|".join(stage1), "|".join(stage3))
def filter_active_inactive_sumfacts():
# Retrieve all sum factorization kernels for stage 1 and 3 # Retrieve all sum factorization kernels for stage 1 and 3
from dune.perftool.generation import retrieve_cache_items from dune.perftool.generation import retrieve_cache_items
all_sumfacts = [i for i in retrieve_cache_items("kernel_default and sumfactnodes")] all_sumfacts = [i for i in retrieve_cache_items("kernel_default and sumfactnodes")]
...@@ -153,6 +197,18 @@ def decide_vectorization_strategy(): ...@@ -153,6 +197,18 @@ def decide_vectorization_strategy():
# All sum factorization kernels that get used # All sum factorization kernels that get used
active_sumfacts = [i for i in all_sumfacts if i.stage == 3 or i in basis_sumfacts] active_sumfacts = [i for i in all_sumfacts if i.stage == 3 or i in basis_sumfacts]
return all_sumfacts, active_sumfacts, inactive_sumfacts
def decide_vectorization_strategy():
""" Decide how to vectorize!
Note that the vectorization of the quadrature loop is independent of this,
as it is implemented through a post-processing (== loopy transformation) step.
"""
logger = logging.getLogger(__name__)
all_sumfacts, active_sumfacts, inactive_sumfacts = filter_active_inactive_sumfacts()
# If no vectorization is needed, abort now # If no vectorization is needed, abort now
if get_form_option("vectorization_strategy") == "none" or (get_global_context_value("form_type") == "jacobian" and not get_form_option("vectorization_jacobians")): if get_form_option("vectorization_strategy") == "none" or (get_global_context_value("form_type") == "jacobian" and not get_form_option("vectorization_jacobians")):
for sf in all_sumfacts: for sf in all_sumfacts:
...@@ -207,7 +263,41 @@ def level1_optimal_vectorization_strategy(sumfacts, width): ...@@ -207,7 +263,41 @@ def level1_optimal_vectorization_strategy(sumfacts, width):
# Find the minimum cost strategy between all the quadrature point tuples # Find the minimum cost strategy between all the quadrature point tuples
optimal_strategies = {qp: level2_optimal_vectorization_strategy(sumfacts, width, qp) for qp in quad_points} optimal_strategies = {qp: level2_optimal_vectorization_strategy(sumfacts, width, qp) for qp in quad_points}
qp = min(optimal_strategies, key=lambda qp: strategy_cost((qp, optimal_strategies[qp])))
# If we are using the 'target' strategy, we might want to log some information.
if get_form_option("vectorization_strategy") == "target":
# Print the achieved cost and the target cost on the screen
set_form_option("vectorization_strategy", "model")
target = float(get_form_option("vectorization_target"))
qp = min(optimal_strategies, key=lambda qp: abs(strategy_cost((qp, optimal_strategies[qp])) - target))
cost = strategy_cost((qp, optimal_strategies[qp]))
print("The target cost was: {}".format(target))
print("The achieved cost was: {}".format(cost))
optimum = level1_optimal_vectorization_strategy(sumfacts, width)
print("The optimal cost would be: {}".format(strategy_cost(optimum)))
set_form_option("vectorization_strategy", "target")
print("The score in 'target' logic was: {}".format(strategy_cost((qp, optimal_strategies[qp]))))
# Print the employed vectorization strategy into a file
suffix = ""
if get_global_context_value("integral_type") == "interior_facet":
suffix = "_dir{}_mod{}".format(get_global_context_value("facedir_s"),
get_global_context_value("facemod_s"))
filename = "targetstrat_{}{}.log".format(int(float(get_form_option("vectorization_target"))), suffix)
with open(filename, 'w') as f:
f.write("\n".join(stringify_vectorization_strategy((qp, optimal_strategies[qp]))))
# Write an entry into a csvfile which connects the given measuring identifier with a cost
from dune.testtools.parametertree.parser import parse_ini_file
inifile = parse_ini_file(get_option("ini_file"))
identifier = inifile["identifier"]
# TODO: Depending on the number of samples, we might need a file lock here.
with open("mapping.csv", 'a') as f:
f.write(" ".join((identifier, str(cost), short_stringify_vectorization_strategy((qp, optimal_strategies[qp])))) + "\n")
else:
qp = min(optimal_strategies, key=lambda qp: strategy_cost((qp, optimal_strategies[qp])))
return qp, optimal_strategies[qp] return qp, optimal_strategies[qp]
...@@ -221,6 +311,8 @@ def level2_optimal_vectorization_strategy(sumfacts, width, qp): ...@@ -221,6 +311,8 @@ def level2_optimal_vectorization_strategy(sumfacts, width, qp):
for key in keys: for key in keys:
key_sumfacts = frozenset(sf for sf in sumfacts if sf.parallel_key == key) key_sumfacts = frozenset(sf for sf in sumfacts if sf.parallel_key == key)
# Minimize over all the opportunities for the subset given by the current key
key_strategy = min(level2_optimal_vectorization_strategy_generator(key_sumfacts, width, qp), key_strategy = min(level2_optimal_vectorization_strategy_generator(key_sumfacts, width, qp),
key=fixedqp_strategy_costfunction(qp)) key=fixedqp_strategy_costfunction(qp))
sfdict = add_to_frozendict(sfdict, key_strategy) sfdict = add_to_frozendict(sfdict, key_strategy)
...@@ -286,7 +378,7 @@ def _level2_optimal_vectorization_strategy_generator(sumfacts, width, qp, alread ...@@ -286,7 +378,7 @@ def _level2_optimal_vectorization_strategy_generator(sumfacts, width, qp, alread
def get_vectorization_dict(sumfacts, vertical, horizontal, qp): def get_vectorization_dict(sumfacts, vertical, horizontal, qp):
# Discard opportunities that do not contain enough horizontal kernels # Discard opportunities that do not contain enough horizontal kernels
if len(sumfacts) not in (horizontal, horizontal - 1): if len(sumfacts) not in (horizontal, horizontal * vertical - 1):
return None return None
# Enhance the list of sumfact nodes by adding vertical splittings # Enhance the list of sumfact nodes by adding vertical splittings
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment