Skip to content
Snippets Groups Projects
Commit 6cb3a57e authored by Dominic Kempf's avatar Dominic Kempf
Browse files

First implementation of vertical vectorization

parent 31f14b8d
No related branches found
No related tags found
No related merge requests found
......@@ -56,7 +56,7 @@ class BasisTabulationMatrix(BasisTabulationMatrixBase, ImmutableRecord):
if quadrature_size is None:
quadrature_size = quadrature_points_per_direction()
if slice_size is not None:
quadrature_size = ceildiv(quadrature_size, slize_size)
quadrature_size = ceildiv(quadrature_size, slice_size)
if basis_size is None:
basis_size = basis_functions_per_direction()
ImmutableRecord.__init__(self,
......
......@@ -41,6 +41,35 @@ def no_vectorization(sumfacts):
input=get_counted_variable("input")))
def vertical_vectorization_strategy(sumfact, depth):
# For sake of simplicity we restrict us to stage 1 so far
if sumfact.stage == 1:
# Assert that this is not already sliced
assert all(mat.slice_size is None for mat in sumfact.matrix_sequence)
# Determine which of the matrices in the kernel should be sliced
sliced = None
for i, mat in enumerate(sumfact.matrix_sequence):
if mat.quadrature_size % depth == 0:
sliced = i
# Currently we assume that this function is always able to do the thing!
assert sliced is not None
kernels = []
oldtab = sumfact.matrix_sequence[sliced]
for i in range(depth):
seq = list(sumfact.matrix_sequence)
seq[sliced] = oldtab.copy(slice_size=depth,
slice_index=i)
kernels.append(sumfact.copy(matrix_sequence=tuple(seq)))
vsf = VectorizedSumfactKernel(kernels=tuple(kernels))
return _cache_vectorization_info(sumfact, vsf)
else:
return _cache_vectorization_info(sumfact, sumfact)
def horizontal_vectorization_strategy(sumfacts):
width = get_vcl_type_size(np.float64)
# We currently heuristically allow horizontal vectorization if the number
......@@ -95,12 +124,16 @@ def decide_vectorization_strategy():
from dune.perftool.generation import retrieve_cache_items
sumfacts = [i for i in retrieve_cache_items("kernel_default and sumfactnodes")]
if not get_option("vectorize_grads"):
no_vectorization(sumfacts)
else:
if get_option("vectorize_grads"):
# Currently we base our idea here on the fact that we only group sum
# factorization kernels with the same input.
inputkeys = set(sf.input_key for sf in sumfacts)
for inputkey in inputkeys:
sumfact_filter = [sf for sf in sumfacts if sf.input_key == inputkey]
horizontal_vectorization_strategy(sumfact_filter)
elif get_option("vectorize_slice"):
for sumfact in sumfacts:
width = get_vcl_type_size(np.float64)
vertical_vectorization_strategy(sumfact, width)
else:
no_vectorization(sumfacts)
......@@ -8,3 +8,8 @@ dune_add_formcompiler_system_test(UFLFILE mass_3d.ufl
BASENAME sumfact_mass_3d
INIFILE mass_3d.mini
)
dune_add_formcompiler_system_test(UFLFILE mass_3d.ufl
BASENAME sumfact_mass_sliced
INIFILE sliced.mini
)
......@@ -17,3 +17,6 @@ extension = vtu
numerical_jacobian = 1, 0 | expand num
vectorize_quad = 1, 0 | expand vec
sumfact = 1
[formcompiler.ufl_variants]
degree = 1
cell = "hexahedron"
V = FiniteElement("DG", cell, 1)
V = FiniteElement("DG", cell, degree)
u = TrialFunction(V)
v = TestFunction(V)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment