From 5453b6452b92e48c2d60ec0e5b4ff0df99bc08ec Mon Sep 17 00:00:00 2001 From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de> Date: Thu, 15 Feb 2018 15:26:15 +0100 Subject: [PATCH] Implement some functions in counted Vec4d (pow, select) --- dune/perftool/common/opcounter.hh | 10 ++ dune/perftool/common/vectorclass.hh | 214 +++++++++++++++++++--------- 2 files changed, 155 insertions(+), 69 deletions(-) diff --git a/dune/perftool/common/opcounter.hh b/dune/perftool/common/opcounter.hh index 5a0115e6..966f286e 100644 --- a/dune/perftool/common/opcounter.hh +++ b/dune/perftool/common/opcounter.hh @@ -16,6 +16,16 @@ namespace oc { template<typename F> class OpCounter; + template<typename T> + struct isOpCounter : public std::false_type + {}; + + template<typename F> + struct isOpCounter<OpCounter<F>> : public std::true_type + {}; + + template<typename T> + constexpr bool isOpCounterV = isOpCounter<T>::value; } namespace Dune { diff --git a/dune/perftool/common/vectorclass.hh b/dune/perftool/common/vectorclass.hh index 0f5b9b88..9a87d058 100644 --- a/dune/perftool/common/vectorclass.hh +++ b/dune/perftool/common/vectorclass.hh @@ -244,6 +244,30 @@ static inline _vcl::Vec4db operator == (Vec4d const & a, Vec4d const & b) { return a_ == b_; } +// vector operator == : returns true for elements for which a == b +static inline _vcl::Vec4db operator == (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ == b_; +} + +// vector operator == : returns true for elements for which a == b +static inline _vcl::Vec4db operator == (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ == b_; +} + // vector operator != : returns true for elements for which a != b static inline _vcl::Vec4db operator != (Vec4d const & a, Vec4d const & b) { BARRIER; @@ -258,6 +282,30 @@ static inline _vcl::Vec4db operator != (Vec4d const & a, Vec4d const & b) { return a_ != b_; } +// vector operator != : returns true for elements for which a != b +static inline _vcl::Vec4db operator != (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ != b_; +} + +// vector operator != : returns true for elements for which a != b +static inline _vcl::Vec4db operator != (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ != b_; +} + // vector operator < : returns true for elements for which a < b static inline _vcl::Vec4db operator < (Vec4d const & a, Vec4d const & b) { BARRIER; @@ -272,6 +320,30 @@ static inline _vcl::Vec4db operator < (Vec4d const & a, Vec4d const & b) { return a_ < b_; } +// vector operator < : returns true for elements for which a < b +static inline _vcl::Vec4db operator < (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ < b_; +} + +// vector operator < : returns true for elements for which a < b +static inline _vcl::Vec4db operator < (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return b_ < a_; +} + // vector operator <= : returns true for elements for which a <= b static inline _vcl::Vec4db operator <= (Vec4d const & a, Vec4d const & b) { BARRIER; @@ -286,16 +358,61 @@ static inline _vcl::Vec4db operator <= (Vec4d const & a, Vec4d const & b) { return a_ <= b_; } +// vector operator <= : returns true for elements for which a <= b +static inline _vcl::Vec4db operator <= (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ <= b_; +} + +// vector operator <= : returns true for elements for which a <= b +static inline _vcl::Vec4db operator <= (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return b_ <= a_; +} + // vector operator > : returns true for elements for which a > b static inline _vcl::Vec4db operator > (Vec4d const & a, Vec4d const & b) { return b < a; } +// vector operator > : returns true for elements for which a > b +static inline _vcl::Vec4db operator > (oc::OpCounter<double> a, Vec4d const & b) { + return a < b; +} + +// vector operator > : returns true for elements for which a > b +static inline _vcl::Vec4db operator > (Vec4d const & b, oc::OpCounter<double> a) { + return a < b; +} + // vector operator >= : returns true for elements for which a >= b static inline _vcl::Vec4db operator >= (Vec4d const & a, Vec4d const & b) { return b <= a; } +// vector operator >= : returns true for elements for which a >= b +static inline _vcl::Vec4db operator >= (oc::OpCounter<double> a, Vec4d const & b) { + return b <= a; +} + +// vector operator >= : returns true for elements for which a >= b +static inline _vcl::Vec4db operator >= (Vec4d const & b, oc::OpCounter<double> a) { + return a <= b; +} + + // avoid logical operators for now, I don't think we need them #if 0 @@ -411,81 +528,40 @@ static inline Vec4d exp(Vec4d const & a){ return r; } - -// ignore pow() for now -#if 0 - -// pow(Vec4d, int): -template <typename TT> static Vec4d pow(Vec4d const & a, TT n); - -// Raise floating point numbers to integer power n -template <> -inline Vec4d pow<int>(Vec4d const & x0, int n) { - return pow_template_i<Vec4d>(x0, n); +// pow +template <typename TT> +static inline Vec4d pow(Vec4d const & a, oc::OpCounter<TT> n) +{ + BARRIER; + Vec4d r; + std::transform(a._d,a._d+4,r._d,[=](auto x){ return pow(x, n); }); + BARRIER; + return r; } -// allow conversion from unsigned int -template <> -inline Vec4d pow<uint32_t>(Vec4d const & x0, uint32_t n) { - return pow_template_i<Vec4d>(x0, (int)n); +// pow +template <typename TT> +static inline +std::enable_if_t<not oc::isOpCounterV<TT>, Vec4d> pow(Vec4d const & a, TT n) +{ + BARRIER; + Vec4d r; + std::transform(a._d,a._d+4,r._d,[=](auto x){ return pow(x, n); }); + BARRIER; + return r; } -// Raise floating point numbers to integer power n, where n is a compile-time constant -template <int n> -static inline Vec4d pow_n(Vec4d const & a) { - if (n < 0) return Vec4d(1.0) / pow_n<-n>(a); - if (n == 0) return Vec4d(1.0); - if (n >= 256) return pow(a, n); - Vec4d x = a; // a^(2^i) - Vec4d y; // accumulator - const int lowest = n - (n & (n-1));// lowest set bit in n - if (n & 1) y = x; - if (n < 2) return y; - x = x*x; // x^2 - if (n & 2) { - if (lowest == 2) y = x; else y *= x; - } - if (n < 4) return y; - x = x*x; // x^4 - if (n & 4) { - if (lowest == 4) y = x; else y *= x; - } - if (n < 8) return y; - x = x*x; // x^8 - if (n & 8) { - if (lowest == 8) y = x; else y *= x; - } - if (n < 16) return y; - x = x*x; // x^16 - if (n & 16) { - if (lowest == 16) y = x; else y *= x; - } - if (n < 32) return y; - x = x*x; // x^32 - if (n & 32) { - if (lowest == 32) y = x; else y *= x; - } - if (n < 64) return y; - x = x*x; // x^64 - if (n & 64) { - if (lowest == 64) y = x; else y *= x; - } - if (n < 128) return y; - x = x*x; // x^128 - if (n & 128) { - if (lowest == 128) y = x; else y *= x; - } - return y; -} - -template <int n> -static inline Vec4d pow(Vec4d const & a, Const_int_t<n>) { - return pow_n<n>(a); +static inline Vec4d select(const _vcl::Vec4db& s, const Vec4d& a, const Vec4d& b) +{ + BARRIER; + Vec4d r; + for(int i=0; i<4; ++i) + r._d[i] = s.extract(i) ? a._d[i] : b._d[i]; + BARRIER; + return r; } -#endif - // function round: round to nearest integer (even). (result as double vector) static inline Vec4d round(Vec4d const & a) { BARRIER; @@ -1095,7 +1171,7 @@ struct Vec8f /***************************************************************************** * -* Operators for Vec4d +* Operators for Vec8f * *****************************************************************************/ -- GitLab