From 465f90be29c3a14f3b82fb4b8047a32a953fc87d Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Fri, 7 Apr 2017 18:15:00 +0200 Subject: [PATCH] Stage 3 vertical vectorization --- python/dune/perftool/sumfact/accumulation.py | 2 +- python/dune/perftool/sumfact/symbolic.py | 2 +- python/dune/perftool/sumfact/vectorization.py | 65 +++++++++---------- 3 files changed, 32 insertions(+), 37 deletions(-) diff --git a/python/dune/perftool/sumfact/accumulation.py b/python/dune/perftool/sumfact/accumulation.py index 4d729613..20ab84ac 100644 --- a/python/dune/perftool/sumfact/accumulation.py +++ b/python/dune/perftool/sumfact/accumulation.py @@ -202,7 +202,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id): vecinames = () # TODO: evaluate whether the following line would be okay with vsf.vectorized if vsf.vec_index(sf) is not None: - iname = accum_iname((accterm.argument.restriction, restriction), vsf.horizontal_width, "vec") + iname = accum_iname((accterm.argument.restriction, restriction), vsf.vector_width, "vec") vecinames = (iname,) transform(lp.tag_inames, [(iname, "vec")]) from dune.perftool.tools import maybe_wrap_subscript diff --git a/python/dune/perftool/sumfact/symbolic.py b/python/dune/perftool/sumfact/symbolic.py index 1debd5ac..6487ce49 100644 --- a/python/dune/perftool/sumfact/symbolic.py +++ b/python/dune/perftool/sumfact/symbolic.py @@ -512,7 +512,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable) @property def dof_shape(self): - return tuple(mat.basis_size for mat in self.matrix_sequence) + (self.horizontal_width,) + return tuple(mat.basis_size for mat in self.matrix_sequence) + (self.vector_width,) @property def dof_dimtags(self): diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index 229d070c..2c6f4225 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -42,41 +42,36 @@ def no_vectorization(sumfacts): def vertical_vectorization_strategy(sumfact, depth): - # For sake of simplicity we restrict us to stage 1 so far - if sumfact.stage == 1: - # Assert that this is not already sliced - assert all(mat.slice_size is None for mat in sumfact.matrix_sequence) - - # Determine which of the matrices in the kernel should be sliced - def determine_slice_direction(): - for i, mat in enumerate(sumfact.matrix_sequence): - if mat.quadrature_size % depth == 0: - return i - elif mat.quadrature_size != 1: - raise PerftoolError("Vertical vectorization is not possible!") - - sliced = determine_slice_direction() - - kernels = [] - oldtab = sumfact.matrix_sequence[sliced] - for i in range(depth): - seq = list(sumfact.matrix_sequence) - seq[sliced] = oldtab.copy(slice_size=depth, - slice_index=i) - kernels.append(sumfact.copy(matrix_sequence=tuple(seq))) - - buffer = get_counted_variable("vertical_buffer") - input = get_counted_variable("vertical_input") - - vsf = VectorizedSumfactKernel(kernels=tuple(kernels), - buffer=buffer, - input=input, - vertical_width=depth, - ) - return _cache_vectorization_info(sumfact, vsf) - else: - return _cache_vectorization_info(sumfact, sumfact.copy(buffer=get_counted_variable("buffer"), - input=get_counted_variable("input"))) + # Assert that this is not already sliced + assert all(mat.slice_size is None for mat in sumfact.matrix_sequence) + + # Determine which of the matrices in the kernel should be sliced + def determine_slice_direction(): + for i, mat in enumerate(sumfact.matrix_sequence): + if mat.quadrature_size % depth == 0: + return i + elif mat.quadrature_size != 1: + raise PerftoolError("Vertical vectorization is not possible!") + + sliced = determine_slice_direction() + + kernels = [] + oldtab = sumfact.matrix_sequence[sliced] + for i in range(depth): + seq = list(sumfact.matrix_sequence) + seq[sliced] = oldtab.copy(slice_size=depth, + slice_index=i) + kernels.append(sumfact.copy(matrix_sequence=tuple(seq))) + + buffer = get_counted_variable("vertical_buffer") + input = get_counted_variable("vertical_input") + + vsf = VectorizedSumfactKernel(kernels=tuple(kernels), + buffer=buffer, + input=input, + vertical_width=depth, + ) + return _cache_vectorization_info(sumfact, vsf) def horizontal_vectorization_strategy(sumfacts): -- GitLab