diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py index b856ccc76afc649e8af310f9fdfebaf5df59f4d7..217d22deedd1d5e6f036fc395082cfc21862cc62 100644 --- a/python/dune/perftool/sumfact/vectorization.py +++ b/python/dune/perftool/sumfact/vectorization.py @@ -133,12 +133,14 @@ def horizontal_vectorization_strategy(sumfacts, width, allow_padding=1): def diagonal_vectorization_strategy(sumfacts, width): if width == 4: horizontal, vertical = 2, 2 + elif width == 8: + horizontal, vertical = 4, 2 else: raise NotImplementedError result = {} - horizontal_kernels = horizontal_vectorization_strategy(sumfacts, horizontal, allow_padding=0) + horizontal_kernels = horizontal_vectorization_strategy(sumfacts, horizontal, allow_padding=1) for sf in horizontal_kernels: vert = vertical_vectorization_strategy(horizontal_kernels[sf], width // horizontal_kernels[sf].horizontal_width)