From 808f70848cb39ded755486a2482287cd1156c912 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Mon, 19 Jun 2017 13:08:04 +0200
Subject: [PATCH] Update vector class library + Patch

---
 dune/perftool/vectorclass                     |  2 +-
 ...ter-implementation-of-horizontal_add.patch | 32 +++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 patches/vectorclass/0001-Better-implementation-of-horizontal_add.patch

diff --git a/dune/perftool/vectorclass b/dune/perftool/vectorclass
index 4e11d282..8d52f136 160000
--- a/dune/perftool/vectorclass
+++ b/dune/perftool/vectorclass
@@ -1 +1 @@
-Subproject commit 4e11d28201c90f357771c98af790eccfaea2103d
+Subproject commit 8d52f13665adbfe4b93bbf076b79828d03563ce1
diff --git a/patches/vectorclass/0001-Better-implementation-of-horizontal_add.patch b/patches/vectorclass/0001-Better-implementation-of-horizontal_add.patch
new file mode 100644
index 00000000..cb167554
--- /dev/null
+++ b/patches/vectorclass/0001-Better-implementation-of-horizontal_add.patch
@@ -0,0 +1,32 @@
+From 69f4ea4dcd018eb74c39a076a60fc27c0496e1dd Mon Sep 17 00:00:00 2001
+From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
+Date: Mon, 19 Jun 2017 13:07:22 +0200
+Subject: [PATCH] Better implementation of horizontal_add
+
+---
+ vectorf256.h | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/vectorf256.h b/vectorf256.h
+index db509f8..2bbd9de 100644
+--- a/vectorf256.h
++++ b/vectorf256.h
+@@ -1692,10 +1692,11 @@ static inline Vec4d if_mul (Vec4db const & f, Vec4d const & a, Vec4d const & b)
+ 
+ // Horizontal add: Calculates the sum of all vector elements.
+ static inline double horizontal_add (Vec4d const & a) {
+-    __m256d t1 = _mm256_hadd_pd(a,a);
+-    __m128d t2 = _mm256_extractf128_pd(t1,1);
+-    __m128d t3 = _mm_add_sd(_mm256_castpd256_pd128(t1),t2);
+-    return _mm_cvtsd_f64(t3);        
++    const __m128d valupper = _mm256_extractf128_pd(val, 1);
++    const __m128d vallower = _mm256_castpd256_pd128(val);
++    const __m128d valval = _mm_add_pd(valupper, vallower);
++    const __m128d res = _mm_add_pd(_mm_permute_pd(valval,1), valval);
++    return _mm_cvtsd_f64(res);
+ }
+ 
+ // function max: a > b ? a : b
+-- 
+2.1.4
+
-- 
GitLab