Skip to content
Snippets Groups Projects
Commit 4fe88b61 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Correct permuting_horizontal_add for Vec8d

parent 7b181239
No related branches found
No related tags found
No related merge requests found
...@@ -17,18 +17,14 @@ typename base_floatingpoint<V>::value horizontal_add_upper(const V& x) ...@@ -17,18 +17,14 @@ typename base_floatingpoint<V>::value horizontal_add_upper(const V& x)
return horizontal_add(x.get_high()); return horizontal_add(x.get_high());
} }
#if MAX_VECTOR_SIZE >= 512 /** Implement a variant of horizontal_add(Vec2d) that avoids the haddpd
/** Implement a variant of horizontal_add(Vec8d) that avoids the haddpd
* instruction and instead uses the shuffle port. * instruction and instead uses the shuffle port.
*/ */
static inline double permuting_horizontal_add(const Vec8d& a) static inline double permuting_horizontal_add (const Vec2d & a)
{ {
return permuting_horizontal_add(a.get_low()) + permuting_horizontal_add(a.get_high()); return _mm_cvtsd_f64(_mm_add_pd(_mm_permute_pd(a,1),a));
} }
#endif
/** Implement a variant of horizontal_add(Vec4d) that avoids the haddpd /** Implement a variant of horizontal_add(Vec4d) that avoids the haddpd
* instruction and instead uses the shuffle port. * instruction and instead uses the shuffle port.
*/ */
...@@ -41,14 +37,18 @@ static inline double permuting_horizontal_add (const Vec4d& a) ...@@ -41,14 +37,18 @@ static inline double permuting_horizontal_add (const Vec4d& a)
return _mm_cvtsd_f64(res); return _mm_cvtsd_f64(res);
} }
/** Implement a variant of horizontal_add(Vec2d) that avoids the haddpd #if MAX_VECTOR_SIZE >= 512
/** Implement a variant of horizontal_add(Vec8d) that avoids the haddpd
* instruction and instead uses the shuffle port. * instruction and instead uses the shuffle port.
*/ */
static inline double permuting_horizontal_add (const Vec2d & a) static inline double permuting_horizontal_add(const Vec8d& a)
{ {
return _mm_cvtsd_f64(_mm_add_pd(_mm_permute_pd(a,1),a)); return permuting_horizontal_add(a.get_low() + a.get_high());
} }
#endif
template<class V> template<class V>
typename base_floatingpoint<V>::value permuting_horizontal_add_lower(const V& x) typename base_floatingpoint<V>::value permuting_horizontal_add_lower(const V& x)
{ {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment