Correct permuting_horizontal_add for Vec8d

4fe88b61 · Dominic Kempf · 7b181239 · 4fe88b61
Commit 4fe88b61 authored 5 years ago by Dominic Kempf
--- a/dune/codegen/sumfact/horizontaladd.hh
+++ b/dune/codegen/sumfact/horizontaladd.hh
@@ -17,18 +17,14 @@ typename base_floatingpoint<V>::value horizontal_add_upper(const V& x)
  return horizontal_add(x.get_high());
 }
-#if MAX_VECTOR_SIZE >= 512
+/** Implement a variant of horizontal_add(Vec2d) that avoids the haddpd
-/** Implement a variant of horizontal_add(Vec8d) that avoids the haddpd
 *  instruction and instead uses the shuffle port.
 */
-static inline double permuting_horizontal_add(const Vec8d& a)
+static inline double permuting_horizontal_add (const Vec2d & a)
 {
-  return permuting_horizontal_add(a.get_low()) + permuting_horizontal_add(a.get_high());
+    return _mm_cvtsd_f64(_mm_add_pd(_mm_permute_pd(a,1),a));
 }
-#endif
 /** Implement a variant of horizontal_add(Vec4d) that avoids the haddpd
 *  instruction and instead uses the shuffle port.
 */
@@ -41,14 +37,18 @@ static inline double permuting_horizontal_add (const Vec4d& a)
    return _mm_cvtsd_f64(res);
 }
-/** Implement a variant of horizontal_add(Vec2d) that avoids the haddpd
+#if MAX_VECTOR_SIZE >= 512
+/** Implement a variant of horizontal_add(Vec8d) that avoids the haddpd
 *  instruction and instead uses the shuffle port.
 */
-static inline double permuting_horizontal_add (const Vec2d & a)
+static inline double permuting_horizontal_add(const Vec8d& a)
 {
-    return _mm_cvtsd_f64(_mm_add_pd(_mm_permute_pd(a,1),a));
+  return permuting_horizontal_add(a.get_low() + a.get_high());
 }
+#endif
 template<class V>
 typename base_floatingpoint<V>::value permuting_horizontal_add_lower(const V& x)
 {