diff --git a/dune/perftool/common/opcounter.hh b/dune/perftool/common/opcounter.hh
new file mode 100644
index 0000000000000000000000000000000000000000..8d80f2bdf6801d285b122ee9fe77b17c5da25a53
--- /dev/null
+++ b/dune/perftool/common/opcounter.hh
@@ -0,0 +1,863 @@
+#ifndef __OPCOUNTER__
+#define __OPCOUNTER__
+
+#include <type_traits>
+#include <iostream>
+#include <cmath>
+#include <cstdlib>
+
+namespace oc {
+
+  template<typename F>
+  class OpCounter;
+
+}
+
+namespace Dune {
+
+  template<typename T, int n>
+  class FieldVector;
+
+}
+
+namespace oc {
+
+  template<typename F>
+  class OpCounter
+  {
+
+  public:
+
+    typedef std::size_t size_type;
+
+    using value_type = F;
+
+    OpCounter()
+      : _v()
+    {}
+
+    template<typename T>
+    OpCounter(const T& t, typename std::enable_if<std::is_same<T,int>::value and !std::is_same<F,int>::value>::type* = nullptr)
+      : _v(t)
+    {}
+
+    OpCounter(const F& f)
+      : _v(f)
+    {}
+
+    OpCounter(F&& f)
+      : _v(f)
+    {}
+
+    explicit OpCounter(const char* s)
+      : _v(strtod(s,nullptr))
+    {}
+
+    OpCounter& operator=(const char* s)
+    {
+      _v = strtod(s,nullptr);
+      return *this;
+    }
+
+    explicit operator F() const
+    {
+      return _v;
+    }
+
+    OpCounter& operator=(const F& f)
+    {
+      _v = f;
+      return *this;
+    }
+
+    OpCounter& operator=(F&& f)
+    {
+      _v = f;
+      return *this;
+    }
+
+    friend std::ostream& operator<<(std::ostream& os, const OpCounter& f)
+    {
+      os << "OC(" << f._v << ")";
+      return os;
+    }
+
+    friend std::istringstream& operator>>(std::istringstream& iss, OpCounter& f)
+    {
+      iss >> f._v;
+      return iss;
+    }
+
+    F* data()
+    {
+      return &_v;
+    }
+
+    const F* data() const
+    {
+      return &_v;
+    }
+
+    F _v;
+
+    struct Counters {
+
+      size_type addition_count;
+      size_type multiplication_count;
+      size_type division_count;
+      size_type exp_count;
+      size_type pow_count;
+      size_type sin_count;
+      size_type sqrt_count;
+      size_type comparison_count;
+
+      Counters()
+        : addition_count(0)
+        , multiplication_count(0)
+        , division_count(0)
+        , exp_count(0)
+        , pow_count(0)
+        , sin_count(0)
+        , sqrt_count(0)
+        , comparison_count(0)
+      {}
+
+      void reset()
+      {
+        addition_count = 0;
+        multiplication_count = 0;
+        division_count = 0;
+        exp_count = 0;
+        pow_count = 0;
+        sin_count = 0;
+        sqrt_count = 0;
+        comparison_count = 0;
+      }
+
+      template<typename Stream>
+      void reportOperations(Stream& os, bool doReset = false)
+      {
+        os << "additions: " << addition_count << std::endl
+           << "multiplications: " << multiplication_count << std::endl
+           << "divisions: " << division_count << std::endl
+           << "exp: " << exp_count << std::endl
+           << "pow: " << pow_count << std::endl
+           << "sin: " << sin_count << std::endl
+           << "sqrt: " << sqrt_count << std::endl
+           << "comparisons: " << comparison_count << std::endl
+           << std::endl
+           << "total: " << addition_count + multiplication_count + division_count + exp_count + pow_count + sin_count + sqrt_count + comparison_count << std::endl;
+
+        if (doReset)
+          reset();
+      }
+
+      Counters& operator+=(const Counters& rhs)
+      {
+        addition_count += rhs.addition_count;
+        multiplication_count += rhs.multiplication_count;
+        division_count += rhs.division_count;
+        exp_count += rhs.exp_count;
+        pow_count += rhs.pow_count;
+        sin_count += rhs.sin_count;
+        sqrt_count += rhs.sqrt_count;
+        comparison_count += rhs.comparison_count;
+        return *this;
+      }
+
+      Counters operator-(const Counters& rhs)
+      {
+        Counters r;
+        r.addition_count = addition_count - rhs.addition_count;
+        r.multiplication_count = multiplication_count - rhs.multiplication_count;
+        r.division_count = division_count - rhs.division_count;
+        r.exp_count = exp_count - rhs.exp_count;
+        r.pow_count = pow_count - rhs.pow_count;
+        r.sin_count = sin_count - rhs.sin_count;
+        r.sqrt_count = sqrt_count - rhs.sqrt_count;
+        r.comparison_count = comparison_count - rhs.comparison_count;
+        return r;
+      }
+
+    };
+
+    static void additions(std::size_t n)
+    {
+      counters.addition_count += n;
+    }
+
+    static void multiplications(std::size_t n)
+    {
+      counters.multiplication_count += n;
+    }
+
+    static void divisions(std::size_t n)
+    {
+      counters.division_count += n;
+    }
+
+    static void reset()
+    {
+      counters.reset();
+    }
+
+    template<typename Stream>
+    static void reportOperations(Stream& os, bool doReset = false)
+    {
+      counters.reportOperations(os,doReset);
+    }
+
+    static Counters counters;
+
+  };
+
+  template<typename F>
+  typename OpCounter<F>::Counters OpCounter<F>::counters;
+
+  // ********************************************************************************
+  // negation
+  // ********************************************************************************
+
+  template<typename F>
+  OpCounter<F> operator-(const OpCounter<F>& a)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {-a._v};
+  }
+
+
+  // ********************************************************************************
+  // addition
+  // ********************************************************************************
+
+  template<typename F>
+  OpCounter<F> operator+(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a._v + b._v};
+  }
+
+  template<typename F>
+  OpCounter<F> operator+(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a._v + b};
+  }
+
+  template<typename F>
+  OpCounter<F> operator+(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a + b._v};
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>
+    >::type
+  operator+(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a._v + b};
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>
+    >::type
+  operator+(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a + b._v};
+  }
+
+  template<typename F>
+  OpCounter<F>& operator+=(OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    a._v += b._v;
+    return a;
+  }
+
+  template<typename F>
+  OpCounter<F>& operator+=(OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    a._v += b;
+    return a;
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>&
+    >::type
+  operator+=(OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    a._v += b;
+    return a;
+  }
+
+  template<typename F>
+  OpCounter<F>& operator+=(OpCounter<F>& a, const Dune::FieldVector<OpCounter<F>,1>& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    a._v += b[0]._v;
+    return a;
+  }
+
+  // ********************************************************************************
+  // subtraction
+  // ********************************************************************************
+
+  template<typename F>
+  OpCounter<F> operator-(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a._v - b._v};
+  }
+
+  template<typename F>
+  OpCounter<F> operator-(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a._v - b};
+  }
+
+  template<typename F>
+  OpCounter<F> operator-(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a - b._v};
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>
+    >::type
+  operator-(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a._v - b};
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>
+    >::type
+  operator-(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    return {a - b._v};
+  }
+
+  template<typename F>
+  OpCounter<F>& operator-=(OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    a._v -= b._v;
+    return a;
+  }
+
+  template<typename F>
+  OpCounter<F>& operator-=(OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    a._v -= b;
+    return a;
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>&
+    >::type
+  operator-=(OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.addition_count;
+    a._v -= b;
+    return a;
+  }
+
+
+  // ********************************************************************************
+  // multiplication
+  // ********************************************************************************
+
+  template<typename F>
+  OpCounter<F> operator*(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.multiplication_count;
+    return {a._v * b._v};
+  }
+
+  template<typename F>
+  OpCounter<F> operator*(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.multiplication_count;
+    return {a._v * b};
+  }
+
+  template<typename F>
+  OpCounter<F> operator*(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.multiplication_count;
+    return {a * b._v};
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>
+    >::type
+  operator*(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.multiplication_count;
+    return {a._v * b};
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>
+    >::type
+  operator*(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.multiplication_count;
+    return {a * b._v};
+  }
+
+  template<typename F>
+  OpCounter<F>& operator*=(OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.multiplication_count;
+    a._v *= b._v;
+    return a;
+  }
+
+  template<typename F>
+  OpCounter<F>& operator*=(OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.multiplication_count;
+    a._v *= b;
+    return a;
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>&
+    >::type
+  operator*=(OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.multiplication_count;
+    a._v *= b;
+    return a;
+  }
+
+
+  // ********************************************************************************
+  // division
+  // ********************************************************************************
+
+  template<typename F>
+  OpCounter<F> operator/(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.division_count;
+    return {a._v / b._v};
+  }
+
+  template<typename F>
+  OpCounter<F> operator/(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.division_count;
+    return {a._v / b};
+  }
+
+  template<typename F>
+  OpCounter<F> operator/(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.division_count;
+    return {a / b._v};
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>
+    >::type
+  operator/(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.division_count;
+    return {a._v / b};
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>
+    >::type
+  operator/(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.division_count;
+    return {a / b._v};
+  }
+
+  template<typename F>
+  OpCounter<F>& operator/=(OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.division_count;
+    a._v /= b._v;
+    return a;
+  }
+
+  template<typename F>
+  OpCounter<F>& operator/=(OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.division_count;
+    a._v /= b;
+    return a;
+  }
+
+  template<typename F, typename T>
+  typename std::enable_if<
+    std::is_arithmetic<T>::value,
+    OpCounter<F>&
+    >::type
+  operator/=(OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.division_count;
+    a._v /= b;
+    return a;
+  }
+
+
+
+  // ********************************************************************************
+  // comparisons
+  // ********************************************************************************
+
+
+  // ********************************************************************************
+  // less
+  // ********************************************************************************
+
+  template<typename F>
+  bool operator<(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v < b._v};
+  }
+
+  template<typename F>
+  bool operator<(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v < b};
+  }
+
+  template<typename F>
+  bool operator<(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a < b._v};
+  }
+
+  template<typename F, typename T>
+  bool operator<(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v < b};
+  }
+
+  template<typename F, typename T>
+  bool operator<(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a < b._v};
+  }
+
+
+  // ********************************************************************************
+  // less_or_equals
+  // ********************************************************************************
+
+  template<typename F>
+  bool operator<=(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v <= b._v};
+  }
+
+  template<typename F>
+  bool operator<=(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v <= b};
+  }
+
+  template<typename F>
+  bool operator<=(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a <= b._v};
+  }
+
+  template<typename F, typename T>
+  bool operator<=(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v <= b};
+  }
+
+  template<typename F, typename T>
+  bool operator<=(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a <= b._v};
+  }
+
+
+  // ********************************************************************************
+  // greater
+  // ********************************************************************************
+
+  template<typename F>
+  bool operator>(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v > b._v};
+  }
+
+  template<typename F>
+  bool operator>(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v > b};
+  }
+
+  template<typename F>
+  bool operator>(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a > b._v};
+  }
+
+  template<typename F, typename T>
+  bool operator>(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v > b};
+  }
+
+  template<typename F, typename T>
+  bool operator>(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a > b._v};
+  }
+
+
+  // ********************************************************************************
+  // greater_or_equals
+  // ********************************************************************************
+
+  template<typename F>
+  bool operator>=(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v >= b._v};
+  }
+
+  template<typename F>
+  bool operator>=(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v >= b};
+  }
+
+  template<typename F>
+  bool operator>=(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a >= b._v};
+  }
+
+  template<typename F, typename T>
+  bool operator>=(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v >= b};
+  }
+
+  template<typename F, typename T>
+  bool operator>=(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a >= b._v};
+  }
+
+
+  // ********************************************************************************
+  // inequals
+  // ********************************************************************************
+
+  template<typename F>
+  bool operator!=(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v != b._v};
+  }
+
+  template<typename F>
+  bool operator!=(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v != b};
+  }
+
+  template<typename F>
+  bool operator!=(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a != b._v};
+  }
+
+  template<typename F, typename T>
+  bool operator!=(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v != b};
+  }
+
+  template<typename F, typename T>
+  bool operator!=(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a != b._v};
+  }
+
+
+  // ********************************************************************************
+  // equals
+  // ********************************************************************************
+
+  template<typename F>
+  bool operator==(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v == b._v};
+  }
+
+  template<typename F>
+  bool operator==(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v == b};
+  }
+
+  template<typename F>
+  bool operator==(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a == b._v};
+  }
+
+  template<typename F, typename T>
+  bool operator==(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a._v == b};
+  }
+
+  template<typename F, typename T>
+  bool operator==(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {a == b._v};
+  }
+
+
+
+  // ********************************************************************************
+  // functions
+  // ********************************************************************************
+
+  template<typename F>
+  OpCounter<F> exp(const OpCounter<F>& a)
+  {
+    ++OpCounter<F>::counters.exp_count;
+    return {std::exp(a._v)};
+  }
+
+  template<typename F>
+  OpCounter<F> pow(const OpCounter<F>& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.pow_count;
+    return {std::pow(a._v,b._v)};
+  }
+
+  template<typename F>
+  OpCounter<F> pow(const OpCounter<F>& a, const F& b)
+  {
+    ++OpCounter<F>::counters.pow_count;
+    return {std::pow(a._v,b)};
+  }
+
+  template<typename F, typename T>
+  OpCounter<F> pow(const OpCounter<F>& a, const T& b)
+  {
+    ++OpCounter<F>::counters.pow_count;
+    return {std::pow(a._v,b)};
+  }
+
+  template<typename F>
+  OpCounter<F> pow(const F& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.pow_count;
+    return {std::pow(a,b._v)};
+  }
+
+  template<typename F, typename T>
+  OpCounter<F> pow(const T& a, const OpCounter<F>& b)
+  {
+    ++OpCounter<F>::counters.pow_count;
+    return {std::pow(a,b._v)};
+  }
+
+  template<typename F>
+  OpCounter<F> sin(const OpCounter<F>& a)
+  {
+    ++OpCounter<F>::counters.sin_count;
+    return {std::sin(a._v)};
+  }
+
+  template<typename F>
+  OpCounter<F> cos(const OpCounter<F>& a)
+  {
+    ++OpCounter<F>::counters.sin_count;
+    return {std::cos(a._v)};
+  }
+
+  template<typename F>
+  OpCounter<F> sqrt(const OpCounter<F>& a)
+  {
+    ++OpCounter<F>::counters.sqrt_count;
+    return {std::sqrt(a._v)};
+  }
+
+  template<typename F>
+  OpCounter<F> abs(const OpCounter<F>& a)
+  {
+    ++OpCounter<F>::counters.comparison_count;
+    return {std::abs(a._v)};
+  }
+
+}
+
+#endif // __OPCOUNTER__
diff --git a/dune/perftool/common/timer.hh b/dune/perftool/common/timer.hh
new file mode 100644
index 0000000000000000000000000000000000000000..189440c0b6fc48fdf3b9ddf695c6b1c1b5e7cc87
--- /dev/null
+++ b/dune/perftool/common/timer.hh
@@ -0,0 +1,107 @@
+#ifndef HP_TIMER_HH
+#define HP_TIMER_HH
+
+#include <chrono>
+
+# include <dune/perftool/common/opcounter.hh>
+
+#define HP_TIMER_OPCOUNTER oc::OpCounter<double>
+
+#define HP_TIMER_DURATION(name) __hp_timer_##name##_duration
+#define HP_TIMER_STARTTIME(name) __hp_timer_##name##_start
+#define HP_TIMER_OPCOUNTERS_START(name) __hp_timer_##name##_counters_start
+#define HP_TIMER_OPCOUNTERS(name) __hp_timer_##name##_counters
+#define HP_TIMER_ELAPSED(name) std::chrono::duration_cast<std::chrono::duration<double> >( HP_TIMER_DURATION(name) ).count()
+
+#ifdef ENABLE_HP_TIMERS
+
+#ifdef ENABLE_COUNTER
+
+#define HP_DECLARE_TIMER(name)                               \
+  std::chrono::high_resolution_clock::duration HP_TIMER_DURATION(name);	\
+  std::chrono::high_resolution_clock::time_point HP_TIMER_STARTTIME(name); \
+  HP_TIMER_OPCOUNTER::Counters HP_TIMER_OPCOUNTERS_START(name); \
+  HP_TIMER_OPCOUNTER::Counters HP_TIMER_OPCOUNTERS(name);
+
+#define HP_TIMER_START(name) \
+  do { \
+  HP_TIMER_OPCOUNTERS_START(name) = HP_TIMER_OPCOUNTER::counters; \
+  HP_TIMER_STARTTIME(name) = std::chrono::high_resolution_clock::now(); \
+  } while(false)
+
+#define HP_TIMER_STOP(name) \
+  do { \
+  std::chrono::high_resolution_clock::time_point __hp_end_time = std::chrono::high_resolution_clock::now(); \
+  HP_TIMER_OPCOUNTERS(name) += HP_TIMER_OPCOUNTER::counters - HP_TIMER_OPCOUNTERS_START(name); \
+  HP_TIMER_DURATION(name) += __hp_end_time - HP_TIMER_STARTTIME(name); \
+  } while(false)
+
+#define HP_TIMER_RESET(name) \
+  do { \
+    HP_TIMER_DURATION(name) = std::chrono::high_resolution_clock::duration::zero(); \
+  HP_TIMER_OPCOUNTERS(name).reset(); \
+  } while (false)
+
+#else
+
+#define HP_DECLARE_TIMER(name)                               \
+  std::chrono::high_resolution_clock::duration HP_TIMER_DURATION(name);	\
+  std::chrono::high_resolution_clock::time_point HP_TIMER_STARTTIME(name);
+
+#define HP_TIMER_START(name) HP_TIMER_STARTTIME(name) = std::chrono::high_resolution_clock::now();
+#define HP_TIMER_STOP(name) \
+  do { \
+  std::chrono::high_resolution_clock::time_point __hp_end_time = std::chrono::high_resolution_clock::now(); \
+  HP_TIMER_DURATION(name) += __hp_end_time - HP_TIMER_STARTTIME(name); \
+  } while(false)
+
+#define HP_TIMER_RESET(name) HP_TIMER_DURATION(name) = std::chrono::high_resolution_clock::duration::zero();
+
+#endif // ENABLE_COUNTER
+
+#else // ENABLE_HP_TIMERS
+
+#define HP_DECLARE_TIMER(name)
+#define HP_TIMER_START(name)
+#define HP_TIMER_STOP(name)
+#define HP_TIMER_RESET(name)
+
+#endif // ENABLE_HP_TIMERS
+
+
+#ifdef ENABLE_COUNTER
+
+#define DUMP_TIMER(name,os,reset)                       \
+  os << "===== "  #name  " =====" << std::endl; \
+  os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \
+  HP_TIMER_OPCOUNTERS(name).reportOperations(os,reset);
+
+#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops)  \
+  os << "===== "  #name " =====" << std::endl; \
+  os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \
+  time += HP_TIMER_ELAPSED(name); \
+  ops += HP_TIMER_OPCOUNTERS(name); \
+  HP_TIMER_OPCOUNTERS(name).reportOperations(os,reset);
+
+#elif defined ENABLE_HP_TIMERS
+
+#define DUMP_TIMER(name,os,reset)                       \
+  os << "===== "  #name  " =====" << std::endl; \
+  os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \
+  if (reset) HP_TIMER_RESET(name);
+
+#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops)  \
+  os << "===== "  #name " =====" << std::endl; \
+  os << "elapsed: " << HP_TIMER_ELAPSED(name) << std::endl; \
+  time += HP_TIMER_ELAPSED(name); \
+  if (reset) HP_TIMER_RESET(name);
+
+#else
+
+#define DUMP_TIMER(name,os,reset)
+#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops)
+
+#endif
+
+
+#endif // HP_TIMER_HH
diff --git a/python/dune/perftool/file.py b/python/dune/perftool/file.py
index e5abbaed55e0e12fce92ceb74e59b693f078127f..9403ff8e5878c354b3bed69311457d6421cf8055 100644
--- a/python/dune/perftool/file.py
+++ b/python/dune/perftool/file.py
@@ -26,14 +26,28 @@ def generate_file(filename, tag, content, headerguard=True):
             macro = filename.upper().replace("/", "_").replace(".", "_").replace("-", "_")
             f.write("#ifndef {0}\n#define {0}\n\n".format(macro))
 
-        # Add the includes from the cache
         from dune.perftool.generation import retrieve_cache_items
+        # Add pre include lines from the cache
+        for define in retrieve_cache_items('{} and pre_include'.format(tag)):
+            for line in define:
+                f.write(line)
+            f.write('\n')
+        f.write('\n')
+
+        # Add the includes from the cache
         for inc in retrieve_cache_items('{} and include'.format(tag)):
             from cgen import Include
             assert isinstance(inc, Include)
             for line in inc.generate():
                 f.write(line)
             f.write('\n')
+        f.write('\n')
+
+        # Add post include lines from the cache
+        for define in retrieve_cache_items('{} and post_include'.format(tag)):
+            for line in define:
+                f.write(line)
+            f.write('\n')
 
         f.write('\n\n')
 
diff --git a/python/dune/perftool/generation/__init__.py b/python/dune/perftool/generation/__init__.py
index 5edd83582410e7e70730b0ff2db3ab75efb42328..6496c2caac5c8217298af4b7602e0a3b272367d3 100644
--- a/python/dune/perftool/generation/__init__.py
+++ b/python/dune/perftool/generation/__init__.py
@@ -13,9 +13,12 @@ from dune.perftool.generation.cpp import (base_class,
                                           class_basename,
                                           class_member,
                                           constructor_parameter,
+                                          dump_accumulate_timer,
                                           include_file,
                                           initializer_list,
+                                          pre_include,
                                           preamble,
+                                          post_include,
                                           symbol,
                                           template_parameter,
                                           )
diff --git a/python/dune/perftool/generation/cpp.py b/python/dune/perftool/generation/cpp.py
index c23550a13b2714bea08c7404f932d8ab96f667f7..04b5324130038156a194296eee28bb003c3af8c6 100644
--- a/python/dune/perftool/generation/cpp.py
+++ b/python/dune/perftool/generation/cpp.py
@@ -10,6 +10,18 @@ symbol = generator_factory(item_tags=("symbol",))
 preamble = generator_factory(item_tags=("preamble",), counted=True)
 
 
+def pre_include(pre, filetag=None, pre_include=True):
+    assert filetag
+    gen = generator_factory(item_tags=("file", filetag, "pre_include"), no_deco=True)
+    return gen(pre)
+
+
+def post_include(post, filetag=None, pre_include=True):
+    assert filetag
+    gen = generator_factory(item_tags=("file", filetag, "post_include"), no_deco=True)
+    return gen(post)
+
+
 def include_file(include, filetag=None):
     assert filetag
     from cgen import Include
@@ -62,3 +74,19 @@ def class_basename(classtag=None):
     assert classtag
 
     return generator_factory(item_tags=("clazz", classtag, "basename"))
+
+
+def dump_accumulate_timer(name):
+    gen = generator_factory(item_tags=("dump_timers"), no_deco=True)
+
+    from dune.perftool.pdelab.localoperator import (name_time_dumper_os,
+                                                    name_time_dumper_reset,
+                                                    name_time_dumper_t,
+                                                    name_time_dumper_counter)
+    os = name_time_dumper_os()
+    reset = name_time_dumper_reset()
+    t = name_time_dumper_t()
+    counter = name_time_dumper_counter()
+
+    code = "DUMP_AND_ACCUMULATE_TIMER({},{},{},{},{});".format(name, os, reset, t, counter)
+    return gen(code)
diff --git a/python/dune/perftool/options.py b/python/dune/perftool/options.py
index 5441bb4714710297acb00803ec5543ecb55b5d9f..85fd6304c051f94e51f1679404445c7776c1bee9 100644
--- a/python/dune/perftool/options.py
+++ b/python/dune/perftool/options.py
@@ -34,6 +34,7 @@ def get_form_compiler_arguments():
     parser.add_argument("--print-transformations-dir", type=str, help="place where to put dot files (can be omitted)")
     parser.add_argument("--diagonal-transformation-matrix", action="store_true", help="set option if the jacoby of the transformation is diagonal (axiparallel grids)")
     parser.add_argument("--ini-file", type=str, help="An inifile to use. A generated driver will be hard-coded to it, a [formcompiler] section will be used as default values to form compiler arguments (use snake case)")
+    parser.add_argument("--timer", action="store_true", help="measure times")
 
     # Modify the positional argument to not be a list
     args = vars(parser.parse_args())
diff --git a/python/dune/perftool/pdelab/driver.py b/python/dune/perftool/pdelab/driver.py
index cb786db5ed2ad1e87cf8f68ce002479d67a26751..57ae37c02406953ba799be1322cbc24f1ca42ac5 100644
--- a/python/dune/perftool/pdelab/driver.py
+++ b/python/dune/perftool/pdelab/driver.py
@@ -1091,24 +1091,52 @@ def dune_solve():
     # Test wether we want to do matrix free operator evaluation
     matrix_free = get_option('matrix_free')
 
+    # Get right solve command
     if linear and matrix_free:
         formdata = _driver_data['formdata']
         go = name_gridoperator(formdata)
         x = name_vector(formdata)
         include_file("dune/perftool/matrixfree.hh", filetag="driver")
-        return "solveMatrixFree({},{});".format(go, x)
+        solve = "solveMatrixFree({},{});".format(go, x)
     elif linear and not matrix_free:
         slp = name_stationarylinearproblemsolver()
-        return "{}.apply();".format(slp)
+        solve = "{}.apply();".format(slp)
     elif not linear and matrix_free:
         raise NotImplementedError("Nonlinear and matrix free is not yet implemented")
     elif not linear and not matrix_free:
         go_type = type_gridoperator(_driver_data['formdata'])
         go = name_gridoperator(_driver_data['formdata'])
         snp = name_stationarynonlinearproblemsolver(go_type, go)
-        return "{}.apply();".format(snp)
-    else:
-        assert False
+        solve = "{}.apply();".format(snp)
+
+    if get_option('timer'):
+        # Necessary includes and defines
+        from dune.perftool.generation import pre_include
+        from dune.perftool.generation import post_include
+        pre_include("#define ENABLE_HP_TIMERS", filetag="driver")
+        include_file("dune/perftool/common/timer.hh", filetag="driver")
+        post_include("HP_DECLARE_TIMER(total);", filetag="driver")
+
+        # Print times after solving
+        from dune.perftool.generation import get_global_context_value
+        formdatas = get_global_context_value("formdatas")
+        print_times = []
+        for formdata in formdatas:
+            lop_name = name_localoperator(formdata)
+            print_times.append("{}.dump_timers(std::cout, true);".format(lop_name))
+        solve = ["HP_TIMER_START(total);",
+                 "{}".format(solve),
+                 "HP_TIMER_STOP(total);",
+                 "",
+                 "// Print timer results",
+                 "std::cout << std::endl;",
+                 "std::cout << \"=== Time for calling solve method ===\" << std::endl;",
+                 "DUMP_TIMER({},{},{});".format('total', 'std::cout', 'true'),
+                 "std::cout << std::endl;"
+                 "std::cout << \"=== {} ===\" << std::endl;".format(lop_name)]
+        solve.extend(print_times)
+
+    return solve
 
 
 @preamble
diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py
index e7abf19293280f7870b16fc12f064205d9427a51..ceab0ba7640555f7c8903a5e56bbba85bd8af4bb 100644
--- a/python/dune/perftool/pdelab/localoperator.py
+++ b/python/dune/perftool/pdelab/localoperator.py
@@ -6,9 +6,12 @@ from dune.perftool.generation import (base_class,
                                       class_basename,
                                       class_member,
                                       constructor_parameter,
+                                      dump_accumulate_timer,
                                       global_context,
                                       include_file,
                                       initializer_list,
+                                      post_include,
+                                      retrieve_cache_items,
                                       symbol,
                                       template_parameter,
                                       )
@@ -151,6 +154,17 @@ def class_type_from_cache(classtag):
     return basename, basename + tparam_str
 
 
+def assembler_routine_name():
+    from dune.perftool.generation import get_global_context_value
+    integral_type = get_global_context_value("integral_type")
+    form_type = get_global_context_value("form_type")
+
+    part1 = {"residual": "alpha"}.get(form_type, form_type)
+    part2 = ufl_measure_to_pdelab_measure(integral_type).lower()
+
+    return "{}_{}".format(part1, part2)
+
+
 def assembly_routine_signature():
     from dune.perftool.generation import get_global_context_value
     integral_type = get_global_context_value("integral_type")
@@ -276,16 +290,72 @@ def generate_kernel(integrals):
     return kernel
 
 
+def name_time_dumper_os():
+    return "os"
+
+
+def name_time_dumper_reset():
+    return "reset"
+
+
+def name_time_dumper_t():
+    return "t"
+
+
+def name_time_dumper_counter():
+    return "counter"
+
+
+class TimerMethod(ClassMember):
+    def __init__(self):
+        os = name_time_dumper_os()
+        reset = name_time_dumper_reset()
+        t = name_time_dumper_t()
+        content = ["template <typename Stream>",
+                   "void dump_timers(Stream& {}, bool {})".format(os, reset),
+                   "{",
+                   "  double {} = 0.0;".format(t),
+                   "#ifdef ENABLE_COUNTERS",
+                   "  auto counter = HP_TIMER_OPCOUNTERS({})",
+                   "  counter.reset();",
+                   "#endif",
+                   ""]
+        dump_timers = [i for i in retrieve_cache_items(condition='dump_timers')]
+        content.extend(map(lambda x: '  ' + x, dump_timers))
+        content.extend(["    {} <<\"===== all_kernels =====\" << std::endl".format(os),
+                        "    <<\"elapsed: \" << {} << std::endl;".format(t),
+                        "#ifdef ENABLE_COUNTERS",
+                        "  counter.reportOperations({});".format(os),
+                        "#endif"])
+        content.append("}")
+        ClassMember.__init__(self, content)
+
+
 class AssemblyMethod(ClassMember):
-    def __init__(self, signature, kernel):
+    def __init__(self, signature, kernel, filename):
         from loopy import generate_body
         from cgen import LiteralLines, Block
         content = signature
         content.append('{')
         if kernel is not None:
+            # Add kernel preamble
             for i, p in kernel.preambles:
                 content.append('  ' + p)
+
+            # Start timer
+            if get_option('timer'):
+                timer_name = assembler_routine_name() + '_kernel'
+                post_include('HP_DECLARE_TIMER({});'.format(timer_name), filetag='operatorfile')
+                content.append('  ' + 'HP_TIMER_START({});'.format(timer_name))
+                dump_accumulate_timer(timer_name)
+
+            # Add kernel body
             content.extend(l for l in generate_body(kernel).split('\n')[1:-1])
+
+            # Stop timer
+            if get_option('timer'):
+                content.append('  ' + 'HP_TIMER_STOP({});'.format(timer_name))
+
         content.append('}')
         ClassMember.__init__(self, content)
 
@@ -447,11 +517,14 @@ def generate_localoperator_file(kernels, filename):
         it, ft = method
         with global_context(integral_type=it, form_type=ft):
             signature = assembly_routine_signature()
-            operator_methods.append(AssemblyMethod(signature, kernel))
+            operator_methods.append(AssemblyMethod(signature, kernel, filename))
+
+    if get_option('timer'):
+        include_file('dune/perftool/common/timer.hh', filetag='operatorfile')
+        operator_methods.append(TimerMethod())
 
     # Write the file!
     from dune.perftool.file import generate_file
-
     param = cgen_class_from_cache("parameterclass")
     # TODO take the name of this thing from the UFL file
     lop = cgen_class_from_cache("operator", members=operator_methods)
diff --git a/test/poisson/poisson.mini b/test/poisson/poisson.mini
index eb8c175b6a9b09da89b11ee244978c4099ede35e..71c3cda88f974cf11ef4edcf35320e3f1fbcf6bd 100644
--- a/test/poisson/poisson.mini
+++ b/test/poisson/poisson.mini
@@ -13,3 +13,4 @@ extension = vtu
 
 [formcompiler]
 numerical_jacobian = 1, 0 | expand num
+timer = 1
\ No newline at end of file