Skip to content
Snippets Groups Projects
Commit 808f7084 authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Update vector class library + Patch

parent 488faabb
No related branches found
No related tags found
No related merge requests found
Subproject commit 4e11d28201c90f357771c98af790eccfaea2103d Subproject commit 8d52f13665adbfe4b93bbf076b79828d03563ce1
From 69f4ea4dcd018eb74c39a076a60fc27c0496e1dd Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Mon, 19 Jun 2017 13:07:22 +0200
Subject: [PATCH] Better implementation of horizontal_add
---
vectorf256.h | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/vectorf256.h b/vectorf256.h
index db509f8..2bbd9de 100644
--- a/vectorf256.h
+++ b/vectorf256.h
@@ -1692,10 +1692,11 @@ static inline Vec4d if_mul (Vec4db const & f, Vec4d const & a, Vec4d const & b)
// Horizontal add: Calculates the sum of all vector elements.
static inline double horizontal_add (Vec4d const & a) {
- __m256d t1 = _mm256_hadd_pd(a,a);
- __m128d t2 = _mm256_extractf128_pd(t1,1);
- __m128d t3 = _mm_add_sd(_mm256_castpd256_pd128(t1),t2);
- return _mm_cvtsd_f64(t3);
+ const __m128d valupper = _mm256_extractf128_pd(val, 1);
+ const __m128d vallower = _mm256_castpd256_pd128(val);
+ const __m128d valval = _mm_add_pd(valupper, vallower);
+ const __m128d res = _mm_add_pd(_mm_permute_pd(valval,1), valval);
+ return _mm_cvtsd_f64(res);
}
// function max: a > b ? a : b
--
2.1.4
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment