diff --git a/dune/perftool/sumfact/transposereg.hh b/dune/perftool/sumfact/transposereg.hh
index af50b29ccdc6cf263eb0317a903b692a9f77da81..6cdd9668f97a82cf94f9994e4280e00482efccb8 100644
--- a/dune/perftool/sumfact/transposereg.hh
+++ b/dune/perftool/sumfact/transposereg.hh
@@ -20,4 +20,21 @@ void transpose_reg(Vec4d& a0, Vec4d& a1, Vec4d& a2, Vec4d& a3)
   a3 = blend4d<2,3,6,7>(b1,b3);
 }
 
+#if MAX_VECTOR_SIZE >= 512
+
+void transpose_reg(Vec8d& a0, Vec8d& a1, Vec8d& a2, Vec8d& a3)
+{
+  Vec8d b0, b1, b2, b3;
+  b0 = blend8d<0,4,8,12,1,5,9,13>(a0, a1);
+  b1 = blend8d<1,5,9,13,3,7,11,15>(a0, a1);
+  b2 = blend8d<0,4,8,12,1,5,9,13>(a2, a3);
+  b3 = blend8d<1,5,9,13,3,7,11,15>(a2, a3);
+  a0 = blend8d<0,1,2,3,8,9,10,11>(b0, b2);
+  a1 = blend8d<4,5,6,7,12,13,14,15>(b0, b2);
+  a2 = blend8d<0,1,2,3,8,9,10,11>(b1, b3);
+  a3 = blend8d<4,5,6,7,12,13,14,15>(b1, b3);
+}
+
+#endif
+
 #endif