From 5453b6452b92e48c2d60ec0e5b4ff0df99bc08ec Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Thu, 15 Feb 2018 15:26:15 +0100
Subject: [PATCH] Implement some functions in counted Vec4d (pow, select)

---
 dune/perftool/common/opcounter.hh   |  10 ++
 dune/perftool/common/vectorclass.hh | 214 +++++++++++++++++++---------
 2 files changed, 155 insertions(+), 69 deletions(-)

diff --git a/dune/perftool/common/opcounter.hh b/dune/perftool/common/opcounter.hh
index 5a0115e6..966f286e 100644
--- a/dune/perftool/common/opcounter.hh
+++ b/dune/perftool/common/opcounter.hh
@@ -16,6 +16,16 @@ namespace oc {
   template<typename F>
   class OpCounter;
 
+  template<typename T>
+  struct isOpCounter : public std::false_type
+  {};
+
+  template<typename F>
+  struct isOpCounter<OpCounter<F>> : public std::true_type
+  {};
+
+  template<typename T>
+  constexpr bool isOpCounterV = isOpCounter<T>::value;
 }
 
 namespace Dune {
diff --git a/dune/perftool/common/vectorclass.hh b/dune/perftool/common/vectorclass.hh
index 0f5b9b88..9a87d058 100644
--- a/dune/perftool/common/vectorclass.hh
+++ b/dune/perftool/common/vectorclass.hh
@@ -244,6 +244,30 @@ static inline _vcl::Vec4db operator == (Vec4d const & a, Vec4d const & b) {
   return a_ == b_;
 }
 
+// vector operator == : returns true for elements for which a == b
+static inline _vcl::Vec4db operator == (oc::OpCounter<double> a, Vec4d const & b) {
+  BARRIER;
+  _vcl::Vec4d a_(a._v), b_;
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec4d::F::comparisons(4);
+  BARRIER;
+  return a_ == b_;
+}
+
+// vector operator == : returns true for elements for which a == b
+static inline _vcl::Vec4db operator == (Vec4d const & b, oc::OpCounter<double> a) {
+  BARRIER;
+  _vcl::Vec4d a_(a._v), b_;
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec4d::F::comparisons(4);
+  BARRIER;
+  return a_ == b_;
+}
+
 // vector operator != : returns true for elements for which a != b
 static inline _vcl::Vec4db operator != (Vec4d const & a, Vec4d const & b) {
   BARRIER;
@@ -258,6 +282,30 @@ static inline _vcl::Vec4db operator != (Vec4d const & a, Vec4d const & b) {
   return a_ != b_;
 }
 
+// vector operator != : returns true for elements for which a != b
+static inline _vcl::Vec4db operator != (oc::OpCounter<double> a, Vec4d const & b) {
+  BARRIER;
+  _vcl::Vec4d a_(a._v), b_;
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec4d::F::comparisons(4);
+  BARRIER;
+  return a_ != b_;
+}
+
+// vector operator != : returns true for elements for which a != b
+static inline _vcl::Vec4db operator != (Vec4d const & b, oc::OpCounter<double> a) {
+  BARRIER;
+  _vcl::Vec4d a_(a._v), b_;
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec4d::F::comparisons(4);
+  BARRIER;
+  return a_ != b_;
+}
+
 // vector operator < : returns true for elements for which a < b
 static inline _vcl::Vec4db operator < (Vec4d const & a, Vec4d const & b) {
   BARRIER;
@@ -272,6 +320,30 @@ static inline _vcl::Vec4db operator < (Vec4d const & a, Vec4d const & b) {
   return a_ < b_;
 }
 
+// vector operator < : returns true for elements for which a < b
+static inline _vcl::Vec4db operator < (oc::OpCounter<double> a, Vec4d const & b) {
+  BARRIER;
+  _vcl::Vec4d a_(a._v), b_;
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec4d::F::comparisons(4);
+  BARRIER;
+  return a_ < b_;
+}
+
+// vector operator < : returns true for elements for which a < b
+static inline _vcl::Vec4db operator < (Vec4d const & b, oc::OpCounter<double> a) {
+  BARRIER;
+  _vcl::Vec4d a_(a._v), b_;
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec4d::F::comparisons(4);
+  BARRIER;
+  return b_ < a_;
+}
+
 // vector operator <= : returns true for elements for which a <= b
 static inline _vcl::Vec4db operator <= (Vec4d const & a, Vec4d const & b) {
   BARRIER;
@@ -286,16 +358,61 @@ static inline _vcl::Vec4db operator <= (Vec4d const & a, Vec4d const & b) {
   return a_ <= b_;
 }
 
+// vector operator <= : returns true for elements for which a <= b
+static inline _vcl::Vec4db operator <= (oc::OpCounter<double> a, Vec4d const & b) {
+  BARRIER;
+  _vcl::Vec4d a_(a._v), b_;
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec4d::F::comparisons(4);
+  BARRIER;
+  return a_ <= b_;
+}
+
+// vector operator <= : returns true for elements for which a <= b
+static inline _vcl::Vec4db operator <= (Vec4d const & b, oc::OpCounter<double> a) {
+  BARRIER;
+  _vcl::Vec4d a_(a._v), b_;
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec4d::F::comparisons(4);
+  BARRIER;
+  return b_ <= a_;
+}
+
 // vector operator > : returns true for elements for which a > b
 static inline _vcl::Vec4db operator > (Vec4d const & a, Vec4d const & b) {
     return b < a;
 }
 
+// vector operator > : returns true for elements for which a > b
+static inline _vcl::Vec4db operator > (oc::OpCounter<double> a, Vec4d const & b) {
+    return a < b;
+}
+
+// vector operator > : returns true for elements for which a > b
+static inline _vcl::Vec4db operator > (Vec4d const & b, oc::OpCounter<double> a) {
+    return a < b;
+}
+
 // vector operator >= : returns true for elements for which a >= b
 static inline _vcl::Vec4db operator >= (Vec4d const & a, Vec4d const & b) {
     return b <= a;
 }
 
+// vector operator >= : returns true for elements for which a >= b
+static inline _vcl::Vec4db operator >= (oc::OpCounter<double> a, Vec4d const & b) {
+    return b <= a;
+}
+
+// vector operator >= : returns true for elements for which a >= b
+static inline _vcl::Vec4db operator >= (Vec4d const & b, oc::OpCounter<double> a) {
+    return a <= b;
+}
+
+
 // avoid logical operators for now, I don't think we need them
 #if 0
 
@@ -411,81 +528,40 @@ static inline Vec4d exp(Vec4d const & a){
   return r;
 }
 
-
-// ignore pow() for now
-#if 0
-
-// pow(Vec4d, int):
-template <typename TT> static Vec4d pow(Vec4d const & a, TT n);
-
-// Raise floating point numbers to integer power n
-template <>
-inline Vec4d pow<int>(Vec4d const & x0, int n) {
-    return pow_template_i<Vec4d>(x0, n);
+// pow
+template <typename TT>
+static inline Vec4d pow(Vec4d const & a, oc::OpCounter<TT> n)
+{
+  BARRIER;
+  Vec4d r;
+  std::transform(a._d,a._d+4,r._d,[=](auto x){ return pow(x, n); });
+  BARRIER;
+  return r;
 }
 
-// allow conversion from unsigned int
-template <>
-inline Vec4d pow<uint32_t>(Vec4d const & x0, uint32_t n) {
-    return pow_template_i<Vec4d>(x0, (int)n);
+// pow
+template <typename TT>
+static inline
+std::enable_if_t<not oc::isOpCounterV<TT>, Vec4d> pow(Vec4d const & a, TT n)
+{
+  BARRIER;
+  Vec4d r;
+  std::transform(a._d,a._d+4,r._d,[=](auto x){ return pow(x, n); });
+  BARRIER;
+  return r;
 }
 
 
-// Raise floating point numbers to integer power n, where n is a compile-time constant
-template <int n>
-static inline Vec4d pow_n(Vec4d const & a) {
-    if (n < 0)    return Vec4d(1.0) / pow_n<-n>(a);
-    if (n == 0)   return Vec4d(1.0);
-    if (n >= 256) return pow(a, n);
-    Vec4d x = a;                       // a^(2^i)
-    Vec4d y;                           // accumulator
-    const int lowest = n - (n & (n-1));// lowest set bit in n
-    if (n & 1) y = x;
-    if (n < 2) return y;
-    x = x*x;                           // x^2
-    if (n & 2) {
-        if (lowest == 2) y = x; else y *= x;
-    }
-    if (n < 4) return y;
-    x = x*x;                           // x^4
-    if (n & 4) {
-        if (lowest == 4) y = x; else y *= x;
-    }
-    if (n < 8) return y;
-    x = x*x;                           // x^8
-    if (n & 8) {
-        if (lowest == 8) y = x; else y *= x;
-    }
-    if (n < 16) return y;
-    x = x*x;                           // x^16
-    if (n & 16) {
-        if (lowest == 16) y = x; else y *= x;
-    }
-    if (n < 32) return y;
-    x = x*x;                           // x^32
-    if (n & 32) {
-        if (lowest == 32) y = x; else y *= x;
-    }
-    if (n < 64) return y;
-    x = x*x;                           // x^64
-    if (n & 64) {
-        if (lowest == 64) y = x; else y *= x;
-    }
-    if (n < 128) return y;
-    x = x*x;                           // x^128
-    if (n & 128) {
-        if (lowest == 128) y = x; else y *= x;
-    }
-    return y;
-}
-
-template <int n>
-static inline Vec4d pow(Vec4d const & a, Const_int_t<n>) {
-    return pow_n<n>(a);
+static inline Vec4d select(const _vcl::Vec4db& s, const Vec4d& a, const Vec4d& b)
+{
+  BARRIER;
+  Vec4d r;
+  for(int i=0; i<4; ++i)
+    r._d[i] = s.extract(i) ? a._d[i] : b._d[i];
+  BARRIER;
+  return r;
 }
 
-#endif
-
 // function round: round to nearest integer (even). (result as double vector)
 static inline Vec4d round(Vec4d const & a) {
   BARRIER;
@@ -1095,7 +1171,7 @@ struct Vec8f
 
 /*****************************************************************************
 *
-*          Operators for Vec4d
+*          Operators for Vec8f
 *
 *****************************************************************************/
 
-- 
GitLab