From c11a6fe10189009220bed168707123af34376de2 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Wed, 24 Jul 2019 14:17:14 +0200 Subject: [PATCH] Provide fallback if no arch flag is given --- dune/codegen/sumfact/horizontaladd.hh | 35 ++++++++++++++++++--------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/dune/codegen/sumfact/horizontaladd.hh b/dune/codegen/sumfact/horizontaladd.hh index 90dbf038..7dd122af 100644 --- a/dune/codegen/sumfact/horizontaladd.hh +++ b/dune/codegen/sumfact/horizontaladd.hh @@ -1,21 +1,11 @@ #ifndef DUNE_CODEGEN_SUMFACT_HORIZONTALADD_HH #define DUNE_CODEGEN_SUMFACT_HORIZONTALADD_HH -#include<immintrin.h> #include<dune/codegen/common/vectorclass.hh> -template<class V> -typename base_floatingpoint<V>::value horizontal_add_lower(const V& x) -{ - return horizontal_add(x.get_low()); -} - -template<class V> -typename base_floatingpoint<V>::value horizontal_add_upper(const V& x) -{ - return horizontal_add(x.get_high()); -} +// Only use our custom implementations if we have AVX2 or later! +#if INSTRSET >= 8 /** Implement a variant of horizontal_add(Vec2d) that avoids the haddpd * instruction and instead uses the shuffle port. @@ -49,6 +39,27 @@ static inline double permuting_horizontal_add(const Vec8d& a) #endif +#else +template<typename V> +static inline double permuting_horizontal_add (const V& a) +{ + return horizontal_add(a); +} + +#endif + +template<class V> +typename base_floatingpoint<V>::value horizontal_add_lower(const V& x) +{ + return horizontal_add(x.get_low()); +} + +template<class V> +typename base_floatingpoint<V>::value horizontal_add_upper(const V& x) +{ + return horizontal_add(x.get_high()); +} + template<class V> typename base_floatingpoint<V>::value permuting_horizontal_add_lower(const V& x) { -- GitLab