Skip to content
Snippets Groups Projects
Commit 2c9cdfc6 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

First improvement of vectorization strategy

parent f84650ac
No related branches found
No related tags found
No related merge requests found
......@@ -130,12 +130,35 @@ class SumfactKernel(ImmutableRecord, prim.Variable):
mapper_method = "map_sumfact_kernel"
#
# Some cache key definitions
# Watch out for the documentation to see which key is used unter what circumstances
#
@property
def cache_key(self):
""" The cache key that can be used in generation magic
Any two sum factorization kernels having the same cache_key
are realized simulatenously!
"""
return (self.matrix_sequence, self.restriction, self.stage, self.buffer)
@property
def input_key(self):
""" A cache key for the input coefficients
Any two sum factorization kernels having the same input_key
work on the same input coefficient (and are suitable for simultaneous
treatment because of that)
"""
return (self.restriction, self.stage, self.coeff_func, self.element, self.component, self.accumvar)
#
# Some convenience methods to extract information about the sum factorization kernel
#
@property
def length(self):
""" The number of matrices to apply """
return len(self.matrix_sequence)
@property
......@@ -146,14 +169,6 @@ class SumfactKernel(ImmutableRecord, prim.Variable):
def transposed(self):
return next(iter(self.matrix_sequence)).transpose
@property
def cache_key(self):
""" The cache key that can be used in generation magic,
Any two sum factorization kernels having the same cache_key
are realized simulatneously!
"""
return hash((self.matrix_sequence, self.restriction, self.stage, self.buffer))
@property
def vec_index(self):
""" A tuple with the vector index
......
......@@ -39,13 +39,12 @@ def no_vectorization(sumfacts):
input=get_counted_variable("input")))
def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
stage_sumfacts = frozenset([sf for sf in sumfacts if sf.stage == stage and sf.restriction == restriction])
if len(stage_sumfacts) in (3, 4):
def horizontal_vectorization_strategy(sumfacts):
if len(sumfacts) in (3, 4):
# Map the sum factorization to their position in the joint kernel
position_mapping = {}
available = set(range(4))
for sf in stage_sumfacts:
for sf in sumfacts:
if sf.preferred_position is not None:
# This asserts that no two kernels want to take the same position
# Later on, more complicated stuff might be necessary here.
......@@ -54,7 +53,7 @@ def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
position_mapping[sf] = sf.preferred_position
# Choose a position for those that have no preferred one!
for sumf in stage_sumfacts:
for sumf in sumfacts:
if sumf.preferred_position is None:
position_mapping[sumf] = available.pop()
......@@ -64,37 +63,37 @@ def decide_stage_vectorization_strategy(sumfacts, stage, restriction):
# Collect the large matrices!
large_matrix_sequence = []
for i in range(len(next(iter(stage_sumfacts)).matrix_sequence)):
for i in range(len(next(iter(sumfacts)).matrix_sequence)):
# Assert that the matrices of all sum factorizations have the same size
assert len(set(tuple(sf.matrix_sequence[i].rows for sf in stage_sumfacts))) == 1
assert len(set(tuple(sf.matrix_sequence[i].cols for sf in stage_sumfacts))) == 1
assert len(set(tuple(sf.matrix_sequence[i].rows for sf in sumfacts))) == 1
assert len(set(tuple(sf.matrix_sequence[i].cols for sf in sumfacts))) == 1
# Collect the derivative information
derivative = [False] * 4
for sf in stage_sumfacts:
for sf in sumfacts:
derivative[position_mapping[sf]] = sf.matrix_sequence[i].derivative
large = BasisTabulationMatrixArray(rows=next(iter(stage_sumfacts)).matrix_sequence[i].rows,
cols=next(iter(stage_sumfacts)).matrix_sequence[i].cols,
transpose=next(iter(stage_sumfacts)).matrix_sequence[i].transpose,
large = BasisTabulationMatrixArray(rows=next(iter(sumfacts)).matrix_sequence[i].rows,
cols=next(iter(sumfacts)).matrix_sequence[i].cols,
transpose=next(iter(sumfacts)).matrix_sequence[i].transpose,
derivative=tuple(derivative),
face=next(iter(stage_sumfacts)).matrix_sequence[i].face,
face=next(iter(sumfacts)).matrix_sequence[i].face,
)
large_matrix_sequence.append(large)
for sumf in stage_sumfacts:
for sumf in sumfacts:
_cache_vectorization_info(sumf,
sumf.copy(matrix_sequence=tuple(large_matrix_sequence),
buffer=buf,
input=inp,
index=position_mapping[sumf],
padding=frozenset(available),
insn_dep=frozenset().union(sf.insn_dep for sf in stage_sumfacts),
insn_dep=frozenset().union(sf.insn_dep for sf in sumfacts),
)
)
else:
# Disable vectorization strategy
no_vectorization(stage_sumfacts)
no_vectorization(sumfacts)
def decide_vectorization_strategy():
......@@ -108,12 +107,9 @@ def decide_vectorization_strategy():
if not get_option("vectorize_grads"):
no_vectorization(sumfacts)
else:
res = (Restriction.NONE, Restriction.POSITIVE, Restriction.NEGATIVE)
# Stage 1 kernels
for restriction in res:
decide_stage_vectorization_strategy(sumfacts, 1, restriction)
# Stage 3 kernels
import itertools as it
for restriction in it.product(res, res):
decide_stage_vectorization_strategy(sumfacts, 3, restriction)
# Currently we base our idea here on the fact that we only group sum
# factorization kernels with the same input.
inputkeys = set(sf.input_key for sf in sumfacts)
for inputkey in inputkeys:
sumfact_filter = [sf for sf in sumfacts if sf.input_key == inputkey]
horizontal_vectorization_strategy(sumfact_filter)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment