diff --git a/dune/codegen/sumfact/horizontaladd.hh b/dune/codegen/sumfact/horizontaladd.hh
index 6b350a56e25c51dcfacee7fb177efcaab4fc9395..90dbf03832056964eea8107f6ba2c80868af3dee 100644
--- a/dune/codegen/sumfact/horizontaladd.hh
+++ b/dune/codegen/sumfact/horizontaladd.hh
@@ -17,18 +17,14 @@ typename base_floatingpoint<V>::value horizontal_add_upper(const V& x)
   return horizontal_add(x.get_high());
 }
 
-#if MAX_VECTOR_SIZE >= 512
-
-/** Implement a variant of horizontal_add(Vec8d) that avoids the haddpd
+/** Implement a variant of horizontal_add(Vec2d) that avoids the haddpd
  *  instruction and instead uses the shuffle port.
  */
-static inline double permuting_horizontal_add(const Vec8d& a)
+static inline double permuting_horizontal_add (const Vec2d & a)
 {
-  return permuting_horizontal_add(a.get_low()) + permuting_horizontal_add(a.get_high());
+    return _mm_cvtsd_f64(_mm_add_pd(_mm_permute_pd(a,1),a));
 }
 
-#endif
-
 /** Implement a variant of horizontal_add(Vec4d) that avoids the haddpd
  *  instruction and instead uses the shuffle port.
  */
@@ -41,14 +37,18 @@ static inline double permuting_horizontal_add (const Vec4d& a)
     return _mm_cvtsd_f64(res);
 }
 
-/** Implement a variant of horizontal_add(Vec2d) that avoids the haddpd
+#if MAX_VECTOR_SIZE >= 512
+
+/** Implement a variant of horizontal_add(Vec8d) that avoids the haddpd
  *  instruction and instead uses the shuffle port.
  */
-static inline double permuting_horizontal_add (const Vec2d & a)
+static inline double permuting_horizontal_add(const Vec8d& a)
 {
-    return _mm_cvtsd_f64(_mm_add_pd(_mm_permute_pd(a,1),a));
+  return permuting_horizontal_add(a.get_low() + a.get_high());
 }
 
+#endif
+
 template<class V>
 typename base_floatingpoint<V>::value permuting_horizontal_add_lower(const V& x)
 {