From 655e45215554c6292cdea25543818ece48b09344 Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Tue, 18 Apr 2017 10:50:51 +0200 Subject: [PATCH] Fix operator counting for Vec8d --- dune/perftool/common/vectorclass.hh | 420 +++++++++++++++++++++++++++- 1 file changed, 419 insertions(+), 1 deletion(-) diff --git a/dune/perftool/common/vectorclass.hh b/dune/perftool/common/vectorclass.hh index 3b857c35..a3c71167 100644 --- a/dune/perftool/common/vectorclass.hh +++ b/dune/perftool/common/vectorclass.hh @@ -165,7 +165,7 @@ static inline Vec4d operator - (Vec4d const & a, Vec4d const & b) { static inline Vec4d operator - (Vec4d const & a) { BARRIER; Vec4d r(a); - for (size_t i = 0 ; i < 3 ; ++i) + for (size_t i = 0 ; i < 4 ; ++i) r._d[i] = -a._d[i]; BARRIER; return r; @@ -590,6 +590,424 @@ static inline Vec4d blend4d(Vec4d const & a, Vec4d const & b) { return r; } + +struct Vec8d +{ + oc::OpCounter<double> _d[8]; + + using F = oc::OpCounter<double>; + + Vec8d() + {} + + Vec8d(F d) + { + BARRIER; + std::fill(_d,_d+8,d); + BARRIER; + } + + Vec8d(double d) + { + BARRIER; + std::fill(_d,_d+8,d); + BARRIER; + } + + Vec8d(F d0, F d1, F d2, F d3, F d4, F d5, F d6, F d7) + : _d{d0,d1,d2,d3,d4,d5,d6,d7} + { + BARRIER; + } + + Vec8d& load(const F* p) + { + BARRIER; + std::copy(p,p+8,_d); + BARRIER; + return *this; + } + + Vec8d& load_a(const F* p) + { + BARRIER; + std::copy(p,p+8,_d); + BARRIER; + return *this; + } + + void store(F* p) const + { + BARRIER; + std::copy(_d,_d+8,p); + BARRIER; + } + + void store_a(F* p) const + { + BARRIER; + std::copy(_d,_d+8,p); + BARRIER; + } + + Vec8d const& insert(uint32_t index, F value) + { + BARRIER; + _d[index] = value; + BARRIER; + return *this; + } + + F extract(uint32_t index) const + { + BARRIER; + return _d[index]; + } + + constexpr static int size() + { + return 8; + } + +}; + + +/***************************************************************************** +* +* Operators for Vec8d +* +*****************************************************************************/ + +// vector operator + : add element by element +static inline Vec8d operator + (Vec8d const & a, Vec8d const & b) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return x + y; }); + BARRIER; + return r; +} + +// vector operator += : add +static inline Vec8d & operator += (Vec8d & a, Vec8d const & b) { + BARRIER; + std::transform(a._d,a._d+8,b._d,a._d,[](auto x, auto y){ return x + y; }); + BARRIER; + return a; +} + +// postfix operator ++ +static inline Vec8d operator ++ (Vec8d & a, int) { + BARRIER; + Vec8d a0 = a; + a = a + 1.0; + BARRIER; + return a0; +} + +// prefix operator ++ +static inline Vec8d & operator ++ (Vec8d & a) { + BARRIER; + a = a + 1.0; + BARRIER; + return a; +} + +// vector operator - : subtract element by element +static inline Vec8d operator - (Vec8d const & a, Vec8d const & b) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return x - y; }); + BARRIER; + return r; +} + +// vector operator - : unary minus +// Change sign bit, even for 0, INF and NAN +static inline Vec8d operator - (Vec8d const & a) { + BARRIER; + Vec8d r(a); + for (size_t i = 0 ; i < 8 ; ++i) + r._d[i] = -a._d[i]; + BARRIER; + return r; +} + +// vector operator -= : subtract +static inline Vec8d & operator -= (Vec8d & a, Vec8d const & b) { + BARRIER; + std::transform(a._d,a._d+8,b._d,a._d,[](auto x, auto y){ return x - y; }); + BARRIER; + return a; +} + +// postfix operator -- +static inline Vec8d operator -- (Vec8d & a, int) { + BARRIER; + Vec8d a0 = a; + a = a - 1.0; + BARRIER; + return a0; +} + +// prefix operator -- +static inline Vec8d & operator -- (Vec8d & a) { + BARRIER; + a = a - 1.0; + BARRIER; + return a; +} + +// vector operator * : multiply element by element +static inline Vec8d operator * (Vec8d const & a, Vec8d const & b) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return x * y; }); + BARRIER; + return r; +} + +// vector operator *= : multiply +static inline Vec8d & operator *= (Vec8d & a, Vec8d const & b) { + BARRIER; + std::transform(a._d,a._d+8,b._d,a._d,[](auto x, auto y){ return x * y; }); + BARRIER; + return a; +} + +// vector operator / : divide all elements by same integer +static inline Vec8d operator / (Vec8d const & a, Vec8d const & b) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return x / y; }); + BARRIER; + return r; +} + +// vector operator /= : divide +static inline Vec8d & operator /= (Vec8d & a, Vec8d const & b) { + BARRIER; + std::transform(a._d,a._d+8,b._d,a._d,[](auto x, auto y){ return x / y; }); + BARRIER; + return a; +} + +// vector operator == : returns true for elements for which a == b +static inline _vcl::Vec8db operator == (Vec8d const & a, Vec8d const & b) { + BARRIER; + _vcl::Vec8d a_, b_; + BARRIER; + a_.load(a._d[0].data()); + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec8d::F::comparisons(8); + BARRIER; + return a_ == b_; +} + +// vector operator != : returns true for elements for which a != b +static inline _vcl::Vec8db operator != (Vec8d const & a, Vec8d const & b) { + BARRIER; + _vcl::Vec8d a_, b_; + BARRIER; + a_.load(a._d[0].data()); + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec8d::F::comparisons(8); + BARRIER; + return a_ != b_; +} + +// vector operator < : returns true for elements for which a < b +static inline _vcl::Vec8db operator < (Vec8d const & a, Vec8d const & b) { + BARRIER; + _vcl::Vec8d a_, b_; + BARRIER; + a_.load(a._d[0].data()); + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec8d::F::comparisons(8); + BARRIER; + return a_ < b_; +} + +// vector operator <= : returns true for elements for which a <= b +static inline _vcl::Vec8db operator <= (Vec8d const & a, Vec8d const & b) { + BARRIER; + _vcl::Vec8d a_, b_; + BARRIER; + a_.load(a._d[0].data()); + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec8d::F::comparisons(8); + BARRIER; + return a_ <= b_; +} + +// vector operator > : returns true for elements for which a > b +static inline _vcl::Vec8db operator > (Vec8d const & a, Vec8d const & b) { + return b < a; +} + +// vector operator >= : returns true for elements for which a >= b +static inline _vcl::Vec8db operator >= (Vec8d const & a, Vec8d const & b) { + return b <= a; +} + +// General arithmetic functions, etc. + +// Horizontal add: Calculates the sum of all vector elements. +static inline Vec8d::F horizontal_add (Vec8d const & a) { + BARRIER; + return std::accumulate(a._d,a._d+8,Vec8d::F(0.0)); + BARRIER; +} + +// function max: a > b ? a : b +static inline Vec8d max(Vec8d const & a, Vec8d const & b) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return max(x,y); }); + BARRIER; + return r; +} + +// function min: a < b ? a : b +static inline Vec8d min(Vec8d const & a, Vec8d const & b) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return min(x,y); }); + BARRIER; + return r; +} + +// function abs: absolute value +// Removes sign bit, even for -0.0f, -INF and -NAN +static inline Vec8d abs(Vec8d const & a) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,r._d,[](auto x){ return abs(x); }); + BARRIER; + return r; +} + +// function sqrt: square root +static inline Vec8d sqrt(Vec8d const & a) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,r._d,[](auto x){ return sqrt(x); }); + BARRIER; + return r; +} + +// function square: a * a +static inline Vec8d square(Vec8d const & a) { + return a * a; +} + + +// exponential function +static inline Vec8d exp(Vec8d const & a){ + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,r._d,[](auto x){ return exp(x); }); + BARRIER; + return r; +} + +// function round: round to nearest integer (even). (result as double vector) +static inline Vec8d round(Vec8d const & a) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,r._d,[](auto x){ return round(x); }); + BARRIER; + return r; +} + +// function truncate: round towards zero. (result as double vector) +static inline Vec8d truncate(Vec8d const & a) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,r._d,[](auto x){ return trunc(x); }); + BARRIER; + return r; +} + +// function floor: round towards minus infinity. (result as double vector) +static inline Vec8d floor(Vec8d const & a) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,r._d,[](auto x){ return floor(x); }); + BARRIER; + return r; +} + +// function ceil: round towards plus infinity. (result as double vector) +static inline Vec8d ceil(Vec8d const & a) { + BARRIER; + Vec8d r; + std::transform(a._d,a._d+8,r._d,[](auto x){ return ceil(x); }); + BARRIER; + return r; +} + +// Fused multiply and add functions + +// Multiply and add +static inline Vec8d mul_add(Vec8d const & a, Vec8d const & b, Vec8d const & c) { + BARRIER; + Vec8d r; + for (size_t i = 0 ; i < 8 ; ++i) + r._d[i] = a._d[i] * b._d[i] + c._d[i]; + BARRIER; + return r; +} + + +// Multiply and subtract +static inline Vec8d mul_sub(Vec8d const & a, Vec8d const & b, Vec8d const & c) { + BARRIER; + Vec8d r; + for (size_t i = 0 ; i < 8 ; ++i) + r._d[i] = a._d[i] * b._d[i] - c._d[i]; + BARRIER; + return r; +} + +// Multiply and inverse subtract +static inline Vec8d nmul_add(Vec8d const & a, Vec8d const & b, Vec8d const & c) { + BARRIER; + Vec8d r; + for (size_t i = 0 ; i < 8 ; ++i) + r._d[i] = - a._d[i] * b._d[i] + c._d[i]; + BARRIER; + return r; +} + + +template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7> +static inline Vec8d blend8d(Vec8d const & a, Vec8d const & b) { + BARRIER; + _vcl::Vec8d a_,b_; + BARRIER; + a_.load(a._d[0].data()); + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + _vcl::Vec8d r_ = _vcl::blend8d<i0,i1,i2,i3,i4,i5,i6,i7>(a_,b_); + BARRIER; + Vec8d::F::blends(1); + BARRIER; + Vec8d r; + BARRIER; + r_.store(r._d[0].data()); + BARRIER; + return r; +} + #endif // ENABLE_COUNTER #endif // DUNE_PDELAB_COMMON_VECTORCLASS_HH -- GitLab