diff --git a/dune/perftool/sumfact/transposereg.hh b/dune/perftool/sumfact/transposereg.hh index af50b29ccdc6cf263eb0317a903b692a9f77da81..6cdd9668f97a82cf94f9994e4280e00482efccb8 100644 --- a/dune/perftool/sumfact/transposereg.hh +++ b/dune/perftool/sumfact/transposereg.hh @@ -20,4 +20,21 @@ void transpose_reg(Vec4d& a0, Vec4d& a1, Vec4d& a2, Vec4d& a3) a3 = blend4d<2,3,6,7>(b1,b3); } +#if MAX_VECTOR_SIZE >= 512 + +void transpose_reg(Vec8d& a0, Vec8d& a1, Vec8d& a2, Vec8d& a3) +{ + Vec8d b0, b1, b2, b3; + b0 = blend8d<0,4,8,12,1,5,9,13>(a0, a1); + b1 = blend8d<1,5,9,13,3,7,11,15>(a0, a1); + b2 = blend8d<0,4,8,12,1,5,9,13>(a2, a3); + b3 = blend8d<1,5,9,13,3,7,11,15>(a2, a3); + a0 = blend8d<0,1,2,3,8,9,10,11>(b0, b2); + a1 = blend8d<4,5,6,7,12,13,14,15>(b0, b2); + a2 = blend8d<0,1,2,3,8,9,10,11>(b1, b3); + a3 = blend8d<4,5,6,7,12,13,14,15>(b1, b3); +} + +#endif + #endif