Skip to content
Snippets Groups Projects
Commit 4dfb8de5 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Implement an 8x8 transpose of 512 bit SIMD vectors

parent 3ca61e2b
No related branches found
No related tags found
No related merge requests found
......@@ -75,6 +75,48 @@ void transpose_reg (Vec8d& a0, Vec8d& a1)
a1 = b1;
}
namespace impl
{
/* (alow, aupp), (blow, bupp) -> (alow, blow), (aupp, bupp) */
void swap_halves(Vec8d& a, Vec8d& b)
{
Vec4d tmp = a.get_high();
a = Vec8d(a.get_low(), b.get_low());
b = Vec8d(tmp, b.get_high());
}
/* A 4x8 transpose that behaves exactly like Vec4d's 4x4 transpose
* on the lower and upper halves of the Vec8d
*/
void _transpose4x8(Vec8d& a0, Vec8d& a1, Vec8d& a2, Vec8d& a3)
{
Vec8d b0,b1,b2,b3;
b0 = blend8d<0,8,2,10,4,12,6,14>(a0,a1);
b1 = blend8d<1,9,3,11,5,13,7,15>(a0,a1);
b2 = blend8d<0,8,2,10,4,12,6,14>(a2,a3);
b3 = blend8d<1,9,3,11,5,13,7,15>(a2,a3);
a0 = blend8d<0,1,8,9,4,5,12,13>(b0,b2);
a1 = blend8d<0,1,8,9,4,5,12,13>(b1,b3);
a2 = blend8d<2,3,10,11,6,7,14,15>(b0,b2);
a3 = blend8d<2,3,10,11,6,7,14,15>(b1,b3);
}
}
/* This is the 8x8 transpose of Vec8d's. It uses the same shuffling
* as Vec4d, but on the 4x4 subblocks. Afterwards, the off diagonal
* blocks are swapped.
*/
void transpose(Vec8d& a0, Vec8d& a1, Vec8d& a2, Vec8d& a3,
Vec8d& a4, Vec8d& a5, Vec8d& a6, Vec8d& a7)
{
impl::_transpose4x8(a0,a1,a2,a3);
impl::_transpose4x8(a4,a5,a6,a7);
impl::swap_halves(a0,a4);
impl::swap_halves(a1,a5);
impl::swap_halves(a2,a6);
impl::swap_halves(a3,a7);
}
#endif
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment