diff --git a/bin/timings.sh b/bin/timings.sh new file mode 100755 index 0000000000000000000000000000000000000000..b85e6589dc9086b1488f788cccc86118e8a51e1e --- /dev/null +++ b/bin/timings.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# If an argument was given use it as the working directory +if [ $# -eq 1 ] +then + cd $1 +fi + +# Search for runnable executables +FILES=$(ls *.ini | grep -v '^verify') +for inifile in $FILES +do + line=$(grep ^"opcounter = " $inifile) + extract=${line##opcounter = } + UPPER=10 + if [ $extract -eq 1 ] + then + UPPER=1 + fi + COUNT=0 + while [ $COUNT -lt $UPPER ]; do + exec=${inifile%.ini} + MAXCORES=40 + mpirun --bind-to core -np $MAXCORES ./$exec $inifile + COUNT=$((COUNT + 1)) + done +done diff --git a/dune/perftool/common/opcounter.hh b/dune/perftool/common/opcounter.hh index 5a0115e6b5649d45b86db1432d83a66330e00e0e..966f286e135409f79c4c3fd38d5c3d4117ef65a6 100644 --- a/dune/perftool/common/opcounter.hh +++ b/dune/perftool/common/opcounter.hh @@ -16,6 +16,16 @@ namespace oc { template<typename F> class OpCounter; + template<typename T> + struct isOpCounter : public std::false_type + {}; + + template<typename F> + struct isOpCounter<OpCounter<F>> : public std::true_type + {}; + + template<typename T> + constexpr bool isOpCounterV = isOpCounter<T>::value; } namespace Dune { diff --git a/dune/perftool/common/vectorclass.hh b/dune/perftool/common/vectorclass.hh index 0f5b9b88afb7869a32e321f5276d2b245c28439b..9a87d0586a2bf4309826d48f4f850e3d644b2d8f 100644 --- a/dune/perftool/common/vectorclass.hh +++ b/dune/perftool/common/vectorclass.hh @@ -244,6 +244,30 @@ static inline _vcl::Vec4db operator == (Vec4d const & a, Vec4d const & b) { return a_ == b_; } +// vector operator == : returns true for elements for which a == b +static inline _vcl::Vec4db operator == (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ == b_; +} + +// vector operator == : returns true for elements for which a == b +static inline _vcl::Vec4db operator == (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ == b_; +} + // vector operator != : returns true for elements for which a != b static inline _vcl::Vec4db operator != (Vec4d const & a, Vec4d const & b) { BARRIER; @@ -258,6 +282,30 @@ static inline _vcl::Vec4db operator != (Vec4d const & a, Vec4d const & b) { return a_ != b_; } +// vector operator != : returns true for elements for which a != b +static inline _vcl::Vec4db operator != (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ != b_; +} + +// vector operator != : returns true for elements for which a != b +static inline _vcl::Vec4db operator != (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ != b_; +} + // vector operator < : returns true for elements for which a < b static inline _vcl::Vec4db operator < (Vec4d const & a, Vec4d const & b) { BARRIER; @@ -272,6 +320,30 @@ static inline _vcl::Vec4db operator < (Vec4d const & a, Vec4d const & b) { return a_ < b_; } +// vector operator < : returns true for elements for which a < b +static inline _vcl::Vec4db operator < (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ < b_; +} + +// vector operator < : returns true for elements for which a < b +static inline _vcl::Vec4db operator < (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return b_ < a_; +} + // vector operator <= : returns true for elements for which a <= b static inline _vcl::Vec4db operator <= (Vec4d const & a, Vec4d const & b) { BARRIER; @@ -286,16 +358,61 @@ static inline _vcl::Vec4db operator <= (Vec4d const & a, Vec4d const & b) { return a_ <= b_; } +// vector operator <= : returns true for elements for which a <= b +static inline _vcl::Vec4db operator <= (oc::OpCounter<double> a, Vec4d const & b) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return a_ <= b_; +} + +// vector operator <= : returns true for elements for which a <= b +static inline _vcl::Vec4db operator <= (Vec4d const & b, oc::OpCounter<double> a) { + BARRIER; + _vcl::Vec4d a_(a._v), b_; + BARRIER; + b_.load(b._d[0].data()); + BARRIER; + Vec4d::F::comparisons(4); + BARRIER; + return b_ <= a_; +} + // vector operator > : returns true for elements for which a > b static inline _vcl::Vec4db operator > (Vec4d const & a, Vec4d const & b) { return b < a; } +// vector operator > : returns true for elements for which a > b +static inline _vcl::Vec4db operator > (oc::OpCounter<double> a, Vec4d const & b) { + return a < b; +} + +// vector operator > : returns true for elements for which a > b +static inline _vcl::Vec4db operator > (Vec4d const & b, oc::OpCounter<double> a) { + return a < b; +} + // vector operator >= : returns true for elements for which a >= b static inline _vcl::Vec4db operator >= (Vec4d const & a, Vec4d const & b) { return b <= a; } +// vector operator >= : returns true for elements for which a >= b +static inline _vcl::Vec4db operator >= (oc::OpCounter<double> a, Vec4d const & b) { + return b <= a; +} + +// vector operator >= : returns true for elements for which a >= b +static inline _vcl::Vec4db operator >= (Vec4d const & b, oc::OpCounter<double> a) { + return a <= b; +} + + // avoid logical operators for now, I don't think we need them #if 0 @@ -411,81 +528,40 @@ static inline Vec4d exp(Vec4d const & a){ return r; } - -// ignore pow() for now -#if 0 - -// pow(Vec4d, int): -template <typename TT> static Vec4d pow(Vec4d const & a, TT n); - -// Raise floating point numbers to integer power n -template <> -inline Vec4d pow<int>(Vec4d const & x0, int n) { - return pow_template_i<Vec4d>(x0, n); +// pow +template <typename TT> +static inline Vec4d pow(Vec4d const & a, oc::OpCounter<TT> n) +{ + BARRIER; + Vec4d r; + std::transform(a._d,a._d+4,r._d,[=](auto x){ return pow(x, n); }); + BARRIER; + return r; } -// allow conversion from unsigned int -template <> -inline Vec4d pow<uint32_t>(Vec4d const & x0, uint32_t n) { - return pow_template_i<Vec4d>(x0, (int)n); +// pow +template <typename TT> +static inline +std::enable_if_t<not oc::isOpCounterV<TT>, Vec4d> pow(Vec4d const & a, TT n) +{ + BARRIER; + Vec4d r; + std::transform(a._d,a._d+4,r._d,[=](auto x){ return pow(x, n); }); + BARRIER; + return r; } -// Raise floating point numbers to integer power n, where n is a compile-time constant -template <int n> -static inline Vec4d pow_n(Vec4d const & a) { - if (n < 0) return Vec4d(1.0) / pow_n<-n>(a); - if (n == 0) return Vec4d(1.0); - if (n >= 256) return pow(a, n); - Vec4d x = a; // a^(2^i) - Vec4d y; // accumulator - const int lowest = n - (n & (n-1));// lowest set bit in n - if (n & 1) y = x; - if (n < 2) return y; - x = x*x; // x^2 - if (n & 2) { - if (lowest == 2) y = x; else y *= x; - } - if (n < 4) return y; - x = x*x; // x^4 - if (n & 4) { - if (lowest == 4) y = x; else y *= x; - } - if (n < 8) return y; - x = x*x; // x^8 - if (n & 8) { - if (lowest == 8) y = x; else y *= x; - } - if (n < 16) return y; - x = x*x; // x^16 - if (n & 16) { - if (lowest == 16) y = x; else y *= x; - } - if (n < 32) return y; - x = x*x; // x^32 - if (n & 32) { - if (lowest == 32) y = x; else y *= x; - } - if (n < 64) return y; - x = x*x; // x^64 - if (n & 64) { - if (lowest == 64) y = x; else y *= x; - } - if (n < 128) return y; - x = x*x; // x^128 - if (n & 128) { - if (lowest == 128) y = x; else y *= x; - } - return y; -} - -template <int n> -static inline Vec4d pow(Vec4d const & a, Const_int_t<n>) { - return pow_n<n>(a); +static inline Vec4d select(const _vcl::Vec4db& s, const Vec4d& a, const Vec4d& b) +{ + BARRIER; + Vec4d r; + for(int i=0; i<4; ++i) + r._d[i] = s.extract(i) ? a._d[i] : b._d[i]; + BARRIER; + return r; } -#endif - // function round: round to nearest integer (even). (result as double vector) static inline Vec4d round(Vec4d const & a) { BARRIER; @@ -1095,7 +1171,7 @@ struct Vec8f /***************************************************************************** * -* Operators for Vec4d +* Operators for Vec8f * *****************************************************************************/ diff --git a/python/dune/perftool/pdelab/driver/__init__.py b/python/dune/perftool/pdelab/driver/__init__.py index 91666020ee0487fdb6a8733f5a1893ffa014587d..4a832ba7709c9f8ca7f7c6f6380230f43fa55579 100644 --- a/python/dune/perftool/pdelab/driver/__init__.py +++ b/python/dune/perftool/pdelab/driver/__init__.py @@ -296,7 +296,7 @@ def generate_driver(): add_section("vtk", "Do visualization...") add_section("instat", "Set up instationary stuff...") add_section("timings", "Maybe take performance measurements...") - add_section("print", "Maybe print residuals and matrices to stdout...") + add_section("printing", "Maybe print residuals and matrices to stdout...") add_section("error", "Maybe calculate errors for test results...") if get_option("instrumentation_level") >= 1: diff --git a/python/dune/perftool/pdelab/driver/solve.py b/python/dune/perftool/pdelab/driver/solve.py index 4ecce62ff12bf1afd4f36b4d16ca1216c0ba3910..8f3ef96ff9fd2babe838a2b3ceb7baeeb63d67f7 100644 --- a/python/dune/perftool/pdelab/driver/solve.py +++ b/python/dune/perftool/pdelab/driver/solve.py @@ -204,7 +204,7 @@ def name_stationarynonlinearproblemsolver(go_type, go): return name -@preamble(section="print") +@preamble(section="printing") def print_residual(): ini = name_initree() n_go = name_gridoperator(get_form_ident()) @@ -227,7 +227,7 @@ def print_residual(): "}"] -@preamble(section="print") +@preamble(section="printing") def print_matrix(): ini = name_initree() t_go = type_gridoperator(get_form_ident()) diff --git a/python/dune/perftool/pdelab/localoperator.py b/python/dune/perftool/pdelab/localoperator.py index c56f24e25f1e62fa116b8f753c0eb7de04a2d8db..b205b34c4f48fcd0df5bdfdae3213d8c2c929969 100644 --- a/python/dune/perftool/pdelab/localoperator.py +++ b/python/dune/perftool/pdelab/localoperator.py @@ -613,6 +613,8 @@ class TimerMethod(ClassMember): class LoopyKernelMethod(ClassMember): def __init__(self, signature, kernel, add_timings=True, initializer_list=[]): + self.name = kernel.name + from loopy import generate_body from cgen import LiteralLines, Block content = signature @@ -659,6 +661,10 @@ class LoopyKernelMethod(ClassMember): def cgen_class_from_cache(tag, members=[]): from dune.perftool.generation import retrieve_cache_items + # Sort the given member functions by their name to help debugging by fixing + # the order + members = sorted(members, key=lambda m: m.name) + # Generate the name by concatenating basename and template parameters basename, fullname = class_type_from_cache(tag)