Skip to content
Snippets Groups Projects
Commit c522ea7a authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Use a divide and conquer approach to reduce the complexity of opportunity generation

parent 49298e64
No related branches found
No related tags found
No related merge requests found
......@@ -53,7 +53,7 @@ def attach_vectorization_info(sf):
def position_penalty_factor(sf):
if isinstance(sf, SumfactKernel):
if isinstance(sf, SumfactKernel) or sf.vertical_width > 1:
return 1
else:
return 1 + sum(abs(sf.kernels[i].position_priority - i) if sf.kernels[i].position_priority is not None else 0 for i in range(sf.length))
......@@ -64,8 +64,13 @@ def costmodel(sf):
# Penalize vertical vectorization
vertical_penalty = 1 + math.log(sf.vertical_width)
# Penalize scalar sum factorization kernels
scalar_penalty = 1
if isinstance(sf, SumfactKernel):
scalar_penalty = get_vcl_type_size(np.float64)
# Return total operations
return sf.operations * position_penalty_factor(sf) * vertical_penalty
return sf.operations * position_penalty_factor(sf) * vertical_penalty * scalar_penalty
@backend(interface="vectorization_strategy", name="explicit")
......@@ -89,8 +94,6 @@ def explicit_costfunction(sf):
def strategy_cost(strategy):
qp, strategy = strategy
set_quadrature_points(qp)
func = get_backend(interface="vectorization_strategy",
selector=lambda: get_option("vectorization_strategy"))
keys = set(sf.cache_key for sf in strategy.values())
......@@ -160,35 +163,14 @@ def decide_vectorization_strategy():
# Find the best vectorization strategy by using a costmodel
width = get_vcl_type_size(np.float64)
strategy = min(vectorization_opportunity_generator(active_sumfacts, width),
key=strategy_cost)
# Treat the quadrature points
qp, sfdict = strategy
set_quadrature_points(qp)
logger.debug("decide_vectorization_strategy: Decided for the following strategy:"
"\n".join(stringify_vectorization_strategy(strategy)))
# We map inactive sum factorization kernels to 0
sfdict = add_to_frozendict(sfdict, {sf: 0 for sf in inactive_sumfacts})
# Register the results
for sf in all_sumfacts:
_cache_vectorization_info(sf, sfdict[sf])
def vectorization_opportunity_generator(sumfacts, width):
""" Generator that yields all vectorization opportunities for the given
sum factorization kernels as tuples of quadrature point tuple and vectorization
dictionary """
#
# Find all the possible quadrature point tuples
# Optimize over all the possible quadrature point tuples
#
quad_points = [quadrature_points_per_direction()]
if get_option("vectorization_allow_quadrature_changes"):
sf = next(iter(sumfacts))
sf = next(iter(active_sumfacts))
depth = 1
while depth <= width:
i = 0 if sf.matrix_sequence[0].face is None else 1
......@@ -198,12 +180,44 @@ def vectorization_opportunity_generator(sumfacts, width):
depth = depth * 2
quad_points = list(set(quad_points))
for qp in quad_points:
#
# Determine vectorization opportunities given a fixed quadrature point number
#
for opp in fixed_quad_vectorization_opportunity_generator(frozenset(sumfacts), width, qp):
yield qp, opp
# Find the minimum cost strategy between all the quadrature point tuples
optimal_strategies = {qp: fixed_quadrature_optimal_vectorization(active_sumfacts, width, qp) for qp in quad_points}
qp = min(optimal_strategies, key=lambda qp: strategy_cost(optimal_strategies[qp]))
sfdict = optimal_strategies[qp]
set_quadrature_points(qp)
logger.debug("decide_vectorization_strategy: Decided for the following strategy:"
"\n".join(stringify_vectorization_strategy((qp, sfdict))))
# We map inactive sum factorization kernels to 0
sfdict = add_to_frozendict(sfdict, {sf: 0 for sf in inactive_sumfacts})
# Register the results
for sf in all_sumfacts:
_cache_vectorization_info(sf, sfdict[sf])
def fixed_quadrature_optimal_vectorization(sumfacts, width, qp):
""" For a given quadrature point tuple, find the optimal strategy!
In order to have this scale sufficiently, we cannot simply list all vectorization
opportunities and score them individually, but we need to do a divide and conquer
approach.
"""
set_quadrature_points(qp)
# Find the sets of simultaneously realizable kernels (thats an equivalence relation)
keys = frozenset(sf.input_key for sf in sumfacts)
# Find minimums for each of these sets
sfdict = frozendict()
for key in keys:
key_sumfacts = frozenset(sf for sf in sumfacts if sf.input_key == key)
minimum = min(fixed_quad_vectorization_opportunity_generator(key_sumfacts, width, qp),
key=strategy_cost)
sfdict = add_to_frozendict(sfdict, minimum)
return sfdict
def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already=frozendict()):
......@@ -227,20 +241,14 @@ def fixed_quad_vectorization_opportunity_generator(sumfacts, width, qp, already=
):
yield opp
# Find all the sum factorization kernels that the chosen kernel can be parallelized with
#
# TODO: Right now we check for same input, which is not actually needed in order
# to be a suitable candidate! We should relax this concept at some point!
candidates = filter(lambda sf: sf.input_key == sf_to_decide.input_key, sumfacts)
horizontal = 1
while horizontal <= width:
# Iterate over the possible combinations of sum factorization kernels
# taking into account all the permutations of kernels. This also includes
# combinations which use a padding of 1 - but only for pure horizontality.
generators = [it.permutations(candidates, horizontal)]
generators = [it.permutations(sumfacts, horizontal)]
if horizontal >= 4:
generators.append(it.permutations(candidates, horizontal - 1))
generators.append(it.permutations(sumfacts, horizontal - 1))
for combo in it.chain(*generators):
# The chosen kernels must be part of the kernels for recursion
# to work correctly
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment