From 4fe88b61e1b909f5c2e2ffe5f7ec06814bd38bf9 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Wed, 19 Jun 2019 16:11:04 +0200 Subject: [PATCH] Correct permuting_horizontal_add for Vec8d --- dune/codegen/sumfact/horizontaladd.hh | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dune/codegen/sumfact/horizontaladd.hh b/dune/codegen/sumfact/horizontaladd.hh index 6b350a56..90dbf038 100644 --- a/dune/codegen/sumfact/horizontaladd.hh +++ b/dune/codegen/sumfact/horizontaladd.hh @@ -17,18 +17,14 @@ typename base_floatingpoint<V>::value horizontal_add_upper(const V& x) return horizontal_add(x.get_high()); } -#if MAX_VECTOR_SIZE >= 512 - -/** Implement a variant of horizontal_add(Vec8d) that avoids the haddpd +/** Implement a variant of horizontal_add(Vec2d) that avoids the haddpd * instruction and instead uses the shuffle port. */ -static inline double permuting_horizontal_add(const Vec8d& a) +static inline double permuting_horizontal_add (const Vec2d & a) { - return permuting_horizontal_add(a.get_low()) + permuting_horizontal_add(a.get_high()); + return _mm_cvtsd_f64(_mm_add_pd(_mm_permute_pd(a,1),a)); } -#endif - /** Implement a variant of horizontal_add(Vec4d) that avoids the haddpd * instruction and instead uses the shuffle port. */ @@ -41,14 +37,18 @@ static inline double permuting_horizontal_add (const Vec4d& a) return _mm_cvtsd_f64(res); } -/** Implement a variant of horizontal_add(Vec2d) that avoids the haddpd +#if MAX_VECTOR_SIZE >= 512 + +/** Implement a variant of horizontal_add(Vec8d) that avoids the haddpd * instruction and instead uses the shuffle port. */ -static inline double permuting_horizontal_add (const Vec2d & a) +static inline double permuting_horizontal_add(const Vec8d& a) { - return _mm_cvtsd_f64(_mm_add_pd(_mm_permute_pd(a,1),a)); + return permuting_horizontal_add(a.get_low() + a.get_high()); } +#endif + template<class V> typename base_floatingpoint<V>::value permuting_horizontal_add_lower(const V& x) { -- GitLab