From 655e45215554c6292cdea25543818ece48b09344 Mon Sep 17 00:00:00 2001
From: Dominic Kempf <dominic.kempf@iwr.uni-heidelberg.de>
Date: Tue, 18 Apr 2017 10:50:51 +0200
Subject: [PATCH] Fix operator counting for Vec8d

---
 dune/perftool/common/vectorclass.hh | 420 +++++++++++++++++++++++++++-
 1 file changed, 419 insertions(+), 1 deletion(-)

diff --git a/dune/perftool/common/vectorclass.hh b/dune/perftool/common/vectorclass.hh
index 3b857c35..a3c71167 100644
--- a/dune/perftool/common/vectorclass.hh
+++ b/dune/perftool/common/vectorclass.hh
@@ -165,7 +165,7 @@ static inline Vec4d operator - (Vec4d const & a, Vec4d const & b) {
 static inline Vec4d operator - (Vec4d const & a) {
   BARRIER;
   Vec4d r(a);
-  for (size_t i = 0 ; i < 3 ; ++i)
+  for (size_t i = 0 ; i < 4 ; ++i)
     r._d[i] = -a._d[i];
   BARRIER;
   return r;
@@ -590,6 +590,424 @@ static inline Vec4d blend4d(Vec4d const & a, Vec4d const & b) {
   return r;
 }
 
+
+struct Vec8d
+{
+  oc::OpCounter<double> _d[8];
+
+  using F = oc::OpCounter<double>;
+
+  Vec8d()
+  {}
+
+  Vec8d(F d)
+  {
+    BARRIER;
+    std::fill(_d,_d+8,d);
+    BARRIER;
+  }
+
+  Vec8d(double d)
+  {
+    BARRIER;
+    std::fill(_d,_d+8,d);
+    BARRIER;
+  }
+
+  Vec8d(F d0, F d1, F d2, F d3, F d4, F d5, F d6, F d7)
+    : _d{d0,d1,d2,d3,d4,d5,d6,d7}
+  {
+    BARRIER;
+  }
+
+  Vec8d& load(const F* p)
+  {
+    BARRIER;
+    std::copy(p,p+8,_d);
+    BARRIER;
+    return *this;
+  }
+
+  Vec8d& load_a(const F* p)
+  {
+    BARRIER;
+    std::copy(p,p+8,_d);
+    BARRIER;
+    return *this;
+  }
+
+  void store(F* p) const
+  {
+    BARRIER;
+    std::copy(_d,_d+8,p);
+    BARRIER;
+  }
+
+  void store_a(F* p) const
+  {
+    BARRIER;
+    std::copy(_d,_d+8,p);
+    BARRIER;
+  }
+
+  Vec8d const& insert(uint32_t index, F value)
+  {
+    BARRIER;
+    _d[index] = value;
+    BARRIER;
+    return *this;
+  }
+
+  F extract(uint32_t index) const
+  {
+    BARRIER;
+    return _d[index];
+  }
+
+  constexpr static int size()
+  {
+    return 8;
+  }
+
+};
+
+
+/*****************************************************************************
+*
+*          Operators for Vec8d
+*
+*****************************************************************************/
+
+// vector operator + : add element by element
+static inline Vec8d operator + (Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return x + y; });
+  BARRIER;
+  return r;
+}
+
+// vector operator += : add
+static inline Vec8d & operator += (Vec8d & a, Vec8d const & b) {
+  BARRIER;
+  std::transform(a._d,a._d+8,b._d,a._d,[](auto x, auto y){ return x + y; });
+  BARRIER;
+  return a;
+}
+
+// postfix operator ++
+static inline Vec8d operator ++ (Vec8d & a, int) {
+  BARRIER;
+  Vec8d a0 = a;
+  a = a + 1.0;
+  BARRIER;
+  return a0;
+}
+
+// prefix operator ++
+static inline Vec8d & operator ++ (Vec8d & a) {
+  BARRIER;
+  a = a + 1.0;
+  BARRIER;
+  return a;
+}
+
+// vector operator - : subtract element by element
+static inline Vec8d operator - (Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return x - y; });
+  BARRIER;
+  return r;
+}
+
+// vector operator - : unary minus
+// Change sign bit, even for 0, INF and NAN
+static inline Vec8d operator - (Vec8d const & a) {
+  BARRIER;
+  Vec8d r(a);
+  for (size_t i = 0 ; i < 8 ; ++i)
+    r._d[i] = -a._d[i];
+  BARRIER;
+  return r;
+}
+
+// vector operator -= : subtract
+static inline Vec8d & operator -= (Vec8d & a, Vec8d const & b) {
+  BARRIER;
+  std::transform(a._d,a._d+8,b._d,a._d,[](auto x, auto y){ return x - y; });
+  BARRIER;
+  return a;
+}
+
+// postfix operator --
+static inline Vec8d operator -- (Vec8d & a, int) {
+  BARRIER;
+  Vec8d a0 = a;
+  a = a - 1.0;
+  BARRIER;
+  return a0;
+}
+
+// prefix operator --
+static inline Vec8d & operator -- (Vec8d & a) {
+  BARRIER;
+  a = a - 1.0;
+  BARRIER;
+  return a;
+}
+
+// vector operator * : multiply element by element
+static inline Vec8d operator * (Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return x * y; });
+  BARRIER;
+  return r;
+}
+
+// vector operator *= : multiply
+static inline Vec8d & operator *= (Vec8d & a, Vec8d const & b) {
+  BARRIER;
+  std::transform(a._d,a._d+8,b._d,a._d,[](auto x, auto y){ return x * y; });
+  BARRIER;
+  return a;
+}
+
+// vector operator / : divide all elements by same integer
+static inline Vec8d operator / (Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return x / y; });
+  BARRIER;
+  return r;
+}
+
+// vector operator /= : divide
+static inline Vec8d & operator /= (Vec8d & a, Vec8d const & b) {
+  BARRIER;
+  std::transform(a._d,a._d+8,b._d,a._d,[](auto x, auto y){ return x / y; });
+  BARRIER;
+  return a;
+}
+
+// vector operator == : returns true for elements for which a == b
+static inline _vcl::Vec8db operator == (Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  _vcl::Vec8d a_, b_;
+  BARRIER;
+  a_.load(a._d[0].data());
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec8d::F::comparisons(8);
+  BARRIER;
+  return a_ == b_;
+}
+
+// vector operator != : returns true for elements for which a != b
+static inline _vcl::Vec8db operator != (Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  _vcl::Vec8d a_, b_;
+  BARRIER;
+  a_.load(a._d[0].data());
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec8d::F::comparisons(8);
+  BARRIER;
+  return a_ != b_;
+}
+
+// vector operator < : returns true for elements for which a < b
+static inline _vcl::Vec8db operator < (Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  _vcl::Vec8d a_, b_;
+  BARRIER;
+  a_.load(a._d[0].data());
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec8d::F::comparisons(8);
+  BARRIER;
+  return a_ < b_;
+}
+
+// vector operator <= : returns true for elements for which a <= b
+static inline _vcl::Vec8db operator <= (Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  _vcl::Vec8d a_, b_;
+  BARRIER;
+  a_.load(a._d[0].data());
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  Vec8d::F::comparisons(8);
+  BARRIER;
+  return a_ <= b_;
+}
+
+// vector operator > : returns true for elements for which a > b
+static inline _vcl::Vec8db operator > (Vec8d const & a, Vec8d const & b) {
+    return b < a;
+}
+
+// vector operator >= : returns true for elements for which a >= b
+static inline _vcl::Vec8db operator >= (Vec8d const & a, Vec8d const & b) {
+    return b <= a;
+}
+
+// General arithmetic functions, etc.
+
+// Horizontal add: Calculates the sum of all vector elements.
+static inline Vec8d::F horizontal_add (Vec8d const & a) {
+  BARRIER;
+  return std::accumulate(a._d,a._d+8,Vec8d::F(0.0));
+  BARRIER;
+}
+
+// function max: a > b ? a : b
+static inline Vec8d max(Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return max(x,y); });
+  BARRIER;
+  return r;
+}
+
+// function min: a < b ? a : b
+static inline Vec8d min(Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,b._d,r._d,[](auto x, auto y){ return min(x,y); });
+  BARRIER;
+  return r;
+}
+
+// function abs: absolute value
+// Removes sign bit, even for -0.0f, -INF and -NAN
+static inline Vec8d abs(Vec8d const & a) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,r._d,[](auto x){ return abs(x); });
+  BARRIER;
+  return r;
+}
+
+// function sqrt: square root
+static inline Vec8d sqrt(Vec8d const & a) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,r._d,[](auto x){ return sqrt(x); });
+  BARRIER;
+  return r;
+}
+
+// function square: a * a
+static inline Vec8d square(Vec8d const & a) {
+  return a * a;
+}
+
+
+// exponential function
+static inline Vec8d exp(Vec8d const & a){
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,r._d,[](auto x){ return exp(x); });
+  BARRIER;
+  return r;
+}
+
+// function round: round to nearest integer (even). (result as double vector)
+static inline Vec8d round(Vec8d const & a) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,r._d,[](auto x){ return round(x); });
+  BARRIER;
+  return r;
+}
+
+// function truncate: round towards zero. (result as double vector)
+static inline Vec8d truncate(Vec8d const & a) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,r._d,[](auto x){ return trunc(x); });
+  BARRIER;
+  return r;
+}
+
+// function floor: round towards minus infinity. (result as double vector)
+static inline Vec8d floor(Vec8d const & a) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,r._d,[](auto x){ return floor(x); });
+  BARRIER;
+  return r;
+}
+
+// function ceil: round towards plus infinity. (result as double vector)
+static inline Vec8d ceil(Vec8d const & a) {
+  BARRIER;
+  Vec8d r;
+  std::transform(a._d,a._d+8,r._d,[](auto x){ return ceil(x); });
+  BARRIER;
+  return r;
+}
+
+// Fused multiply and add functions
+
+// Multiply and add
+static inline Vec8d mul_add(Vec8d const & a, Vec8d const & b, Vec8d const & c) {
+  BARRIER;
+  Vec8d r;
+  for (size_t i = 0 ; i < 8 ; ++i)
+    r._d[i] = a._d[i] * b._d[i] + c._d[i];
+  BARRIER;
+  return r;
+}
+
+
+// Multiply and subtract
+static inline Vec8d mul_sub(Vec8d const & a, Vec8d const & b, Vec8d const & c) {
+  BARRIER;
+  Vec8d r;
+  for (size_t i = 0 ; i < 8 ; ++i)
+    r._d[i] = a._d[i] * b._d[i] - c._d[i];
+  BARRIER;
+  return r;
+}
+
+// Multiply and inverse subtract
+static inline Vec8d nmul_add(Vec8d const & a, Vec8d const & b, Vec8d const & c) {
+  BARRIER;
+  Vec8d r;
+  for (size_t i = 0 ; i < 8 ; ++i)
+    r._d[i] = - a._d[i] * b._d[i] + c._d[i];
+  BARRIER;
+  return r;
+}
+
+
+template <int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
+static inline Vec8d blend8d(Vec8d const & a, Vec8d const & b) {
+  BARRIER;
+  _vcl::Vec8d a_,b_;
+  BARRIER;
+  a_.load(a._d[0].data());
+  BARRIER;
+  b_.load(b._d[0].data());
+  BARRIER;
+  _vcl::Vec8d r_ = _vcl::blend8d<i0,i1,i2,i3,i4,i5,i6,i7>(a_,b_);
+  BARRIER;
+  Vec8d::F::blends(1);
+  BARRIER;
+  Vec8d r;
+  BARRIER;
+  r_.store(r._d[0].data());
+  BARRIER;
+  return r;
+}
+
 #endif // ENABLE_COUNTER
 
 #endif // DUNE_PDELAB_COMMON_VECTORCLASS_HH
-- 
GitLab