Skip to content
Snippets Groups Projects
Commit 465f90be authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Stage 3 vertical vectorization

parent 4b54d1fb
No related branches found
No related tags found
No related merge requests found
...@@ -202,7 +202,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): ...@@ -202,7 +202,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
vecinames = () vecinames = ()
# TODO: evaluate whether the following line would be okay with vsf.vectorized # TODO: evaluate whether the following line would be okay with vsf.vectorized
if vsf.vec_index(sf) is not None: if vsf.vec_index(sf) is not None:
iname = accum_iname((accterm.argument.restriction, restriction), vsf.horizontal_width, "vec") iname = accum_iname((accterm.argument.restriction, restriction), vsf.vector_width, "vec")
vecinames = (iname,) vecinames = (iname,)
transform(lp.tag_inames, [(iname, "vec")]) transform(lp.tag_inames, [(iname, "vec")])
from dune.perftool.tools import maybe_wrap_subscript from dune.perftool.tools import maybe_wrap_subscript
......
...@@ -512,7 +512,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) ...@@ -512,7 +512,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
@property @property
def dof_shape(self): def dof_shape(self):
return tuple(mat.basis_size for mat in self.matrix_sequence) + (self.horizontal_width,) return tuple(mat.basis_size for mat in self.matrix_sequence) + (self.vector_width,)
@property @property
def dof_dimtags(self): def dof_dimtags(self):
......
...@@ -42,41 +42,36 @@ def no_vectorization(sumfacts): ...@@ -42,41 +42,36 @@ def no_vectorization(sumfacts):
def vertical_vectorization_strategy(sumfact, depth): def vertical_vectorization_strategy(sumfact, depth):
# For sake of simplicity we restrict us to stage 1 so far # Assert that this is not already sliced
if sumfact.stage == 1: assert all(mat.slice_size is None for mat in sumfact.matrix_sequence)
# Assert that this is not already sliced
assert all(mat.slice_size is None for mat in sumfact.matrix_sequence) # Determine which of the matrices in the kernel should be sliced
def determine_slice_direction():
# Determine which of the matrices in the kernel should be sliced for i, mat in enumerate(sumfact.matrix_sequence):
def determine_slice_direction(): if mat.quadrature_size % depth == 0:
for i, mat in enumerate(sumfact.matrix_sequence): return i
if mat.quadrature_size % depth == 0: elif mat.quadrature_size != 1:
return i raise PerftoolError("Vertical vectorization is not possible!")
elif mat.quadrature_size != 1:
raise PerftoolError("Vertical vectorization is not possible!") sliced = determine_slice_direction()
sliced = determine_slice_direction() kernels = []
oldtab = sumfact.matrix_sequence[sliced]
kernels = [] for i in range(depth):
oldtab = sumfact.matrix_sequence[sliced] seq = list(sumfact.matrix_sequence)
for i in range(depth): seq[sliced] = oldtab.copy(slice_size=depth,
seq = list(sumfact.matrix_sequence) slice_index=i)
seq[sliced] = oldtab.copy(slice_size=depth, kernels.append(sumfact.copy(matrix_sequence=tuple(seq)))
slice_index=i)
kernels.append(sumfact.copy(matrix_sequence=tuple(seq))) buffer = get_counted_variable("vertical_buffer")
input = get_counted_variable("vertical_input")
buffer = get_counted_variable("vertical_buffer")
input = get_counted_variable("vertical_input") vsf = VectorizedSumfactKernel(kernels=tuple(kernels),
buffer=buffer,
vsf = VectorizedSumfactKernel(kernels=tuple(kernels), input=input,
buffer=buffer, vertical_width=depth,
input=input, )
vertical_width=depth, return _cache_vectorization_info(sumfact, vsf)
)
return _cache_vectorization_info(sumfact, vsf)
else:
return _cache_vectorization_info(sumfact, sumfact.copy(buffer=get_counted_variable("buffer"),
input=get_counted_variable("input")))
def horizontal_vectorization_strategy(sumfacts): def horizontal_vectorization_strategy(sumfacts):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment