diff --git a/python/dune/perftool/sumfact/vectorization.py b/python/dune/perftool/sumfact/vectorization.py
index b856ccc76afc649e8af310f9fdfebaf5df59f4d7..217d22deedd1d5e6f036fc395082cfc21862cc62 100644
--- a/python/dune/perftool/sumfact/vectorization.py
+++ b/python/dune/perftool/sumfact/vectorization.py
@@ -133,12 +133,14 @@ def horizontal_vectorization_strategy(sumfacts, width, allow_padding=1):
 def diagonal_vectorization_strategy(sumfacts, width):
     if width == 4:
         horizontal, vertical = 2, 2
+    elif width == 8:
+        horizontal, vertical = 4, 2
     else:
         raise NotImplementedError
 
     result = {}
 
-    horizontal_kernels = horizontal_vectorization_strategy(sumfacts, horizontal, allow_padding=0)
+    horizontal_kernels = horizontal_vectorization_strategy(sumfacts, horizontal, allow_padding=1)
 
     for sf in horizontal_kernels:
         vert = vertical_vectorization_strategy(horizontal_kernels[sf], width // horizontal_kernels[sf].horizontal_width)