Stage 3 vertical vectorization

465f90be · Dominic Kempf · 4b54d1fb · 465f90be · 465f90be · 465f90be
Commit 465f90be authored 8 years ago by Dominic Kempf
--- a/python/dune/perftool/sumfact/accumulation.py
+++ b/python/dune/perftool/sumfact/accumulation.py
@@ -202,7 +202,7 @@ def generate_accumulation_instruction(visitor, accterm, measure, subdomain_id):
        vecinames = ()
        # TODO: evaluate whether the following line would be okay with vsf.vectorized
        if vsf.vec_index(sf) is not None:
-            iname = accum_iname((accterm.argument.restriction, restriction), vsf.horizontal_width, "vec")
+            iname = accum_iname((accterm.argument.restriction, restriction), vsf.vector_width, "vec")
            vecinames = (iname,)
            transform(lp.tag_inames, [(iname, "vec")])
            from dune.perftool.tools import maybe_wrap_subscript

--- a/python/dune/perftool/sumfact/symbolic.py
+++ b/python/dune/perftool/sumfact/symbolic.py
@@ -512,7 +512,7 @@ class VectorizedSumfactKernel(SumfactKernelBase, ImmutableRecord, prim.Variable)
    @property
    def dof_shape(self):
-        return tuple(mat.basis_size for mat in self.matrix_sequence) + (self.horizontal_width,)
+        return tuple(mat.basis_size for mat in self.matrix_sequence) + (self.vector_width,)
    @property
    def dof_dimtags(self):

--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -42,41 +42,36 @@ def no_vectorization(sumfacts):
 def vertical_vectorization_strategy(sumfact, depth):
-    # For sake of simplicity we restrict us to stage 1 so far
+    # Assert that this is not already sliced
-    if sumfact.stage == 1:
+    assert all(mat.slice_size is None for mat in sumfact.matrix_sequence)
-        # Assert that this is not already sliced
-        assert all(mat.slice_size is None for mat in sumfact.matrix_sequence)
+    # Determine which of the matrices in the kernel should be sliced
+    def determine_slice_direction():
-        # Determine which of the matrices in the kernel should be sliced
+        for i, mat in enumerate(sumfact.matrix_sequence):
-        def determine_slice_direction():
+            if mat.quadrature_size % depth == 0:
-            for i, mat in enumerate(sumfact.matrix_sequence):
+                return i
-                if mat.quadrature_size % depth == 0:
+            elif mat.quadrature_size != 1:
-                    return i
+                raise PerftoolError("Vertical vectorization is not possible!")
-                elif mat.quadrature_size != 1:
-                    raise PerftoolError("Vertical vectorization is not possible!")
+    sliced = determine_slice_direction()
-        sliced = determine_slice_direction()
+    kernels = []
+    oldtab = sumfact.matrix_sequence[sliced]
-        kernels = []
+    for i in range(depth):
-        oldtab = sumfact.matrix_sequence[sliced]
+        seq = list(sumfact.matrix_sequence)
-        for i in range(depth):
+        seq[sliced] = oldtab.copy(slice_size=depth,
-            seq = list(sumfact.matrix_sequence)
+                                  slice_index=i)
-            seq[sliced] = oldtab.copy(slice_size=depth,
+        kernels.append(sumfact.copy(matrix_sequence=tuple(seq)))
-                                      slice_index=i)
-            kernels.append(sumfact.copy(matrix_sequence=tuple(seq)))
+    buffer = get_counted_variable("vertical_buffer")
+    input = get_counted_variable("vertical_input")
-        buffer = get_counted_variable("vertical_buffer")
-        input = get_counted_variable("vertical_input")
+    vsf = VectorizedSumfactKernel(kernels=tuple(kernels),
+                                  buffer=buffer,
-        vsf = VectorizedSumfactKernel(kernels=tuple(kernels),
+                                  input=input,
-                                      buffer=buffer,
+                                  vertical_width=depth,
-                                      input=input,
+                                  )
-                                      vertical_width=depth,
+    return _cache_vectorization_info(sumfact, vsf)
-                                      )
-        return _cache_vectorization_info(sumfact, vsf)
-    else:
-        return _cache_vectorization_info(sumfact, sumfact.copy(buffer=get_counted_variable("buffer"),
-                                                               input=get_counted_variable("input")))
 def horizontal_vectorization_strategy(sumfacts):