Skip to content
Snippets Groups Projects
Commit 0a9b527c authored by Dominic Kempf's avatar Dominic Kempf
Browse files

Add TSC timers

parent aa3d5beb
No related branches found
No related tags found
No related merge requests found
...@@ -17,6 +17,13 @@ include(DuneMacros) ...@@ -17,6 +17,13 @@ include(DuneMacros)
# start a dune project with information from dune.module # start a dune project with information from dune.module
dune_project() dune_project()
dune_add_library(duneperftool dune/perftool/common/tsc.cc)
dune_target_enable_all_packages(duneperftool)
dune_register_package_flags(LIBRARIES duneperftool)
dune_enable_all_packages() dune_enable_all_packages()
add_subdirectory(dune/perftool) add_subdirectory(dune/perftool)
......
add_subdirectory(common)
install(FILES vectorclass/dispatch_example.cpp install(FILES vectorclass/dispatch_example.cpp
vectorclass/instrset_detect.cpp vectorclass/instrset_detect.cpp
vectorclass/instrset.h vectorclass/instrset.h
......
install(FILES muladd_workarounds.hh
opcounter.hh
timer.hh
tsc.hh
vectorclass.hh
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dune/perftool/common
)
#ifndef HP_TIMER_HH #ifndef DUNE_PERFTOOL_COMMON_TIMER_HH
#define HP_TIMER_HH #define DUNE_PERFTOOL_COMMON_TIMER_HH
#include <chrono> #define _GONE_THROUGH_TIMER_HH
#include <dune/perftool/common/opcounter.hh>
#define HP_TIMER_OPCOUNTER oc::OpCounter<double>
#define HP_TIMER_DURATION(name) __hp_timer_##name##_duration
#define HP_TIMER_STARTTIME(name) __hp_timer_##name##_start
#define HP_TIMER_OPCOUNTERS_START(name) __hp_timer_##name##_counters_start
#define HP_TIMER_OPCOUNTERS(name) __hp_timer_##name##_counters
#define HP_TIMER_ELAPSED(name) std::chrono::duration_cast<std::chrono::duration<double> >( HP_TIMER_DURATION(name) ).count()
#ifdef ENABLE_HP_TIMERS
#ifdef ENABLE_COUNTER
#define HP_DECLARE_TIMER(name) \
std::chrono::high_resolution_clock::duration HP_TIMER_DURATION(name); \
std::chrono::high_resolution_clock::time_point HP_TIMER_STARTTIME(name); \
HP_TIMER_OPCOUNTER::Counters HP_TIMER_OPCOUNTERS_START(name); \
HP_TIMER_OPCOUNTER::Counters HP_TIMER_OPCOUNTERS(name);
#define HP_TIMER_START(name) \
do { \
HP_TIMER_OPCOUNTERS_START(name) = HP_TIMER_OPCOUNTER::counters; \
HP_TIMER_STARTTIME(name) = std::chrono::high_resolution_clock::now(); \
} while(false)
#define HP_TIMER_STOP(name) \
do { \
std::chrono::high_resolution_clock::time_point __hp_end_time = std::chrono::high_resolution_clock::now(); \
HP_TIMER_OPCOUNTERS(name) += HP_TIMER_OPCOUNTER::counters - HP_TIMER_OPCOUNTERS_START(name); \
HP_TIMER_DURATION(name) += __hp_end_time - HP_TIMER_STARTTIME(name); \
} while(false)
#define HP_TIMER_RESET(name) \
do { \
HP_TIMER_DURATION(name) = std::chrono::high_resolution_clock::duration::zero(); \
HP_TIMER_OPCOUNTERS(name).reset(); \
} while (false)
#else
#define HP_DECLARE_TIMER(name) \
std::chrono::high_resolution_clock::duration HP_TIMER_DURATION(name); \
std::chrono::high_resolution_clock::time_point HP_TIMER_STARTTIME(name);
#define HP_TIMER_START(name) HP_TIMER_STARTTIME(name) = std::chrono::high_resolution_clock::now();
#define HP_TIMER_STOP(name) \
do { \
std::chrono::high_resolution_clock::time_point __hp_end_time = std::chrono::high_resolution_clock::now(); \
HP_TIMER_DURATION(name) += __hp_end_time - HP_TIMER_STARTTIME(name); \
} while(false)
#define HP_TIMER_RESET(name) HP_TIMER_DURATION(name) = std::chrono::high_resolution_clock::duration::zero();
#endif // ENABLE_COUNTER
#else // ENABLE_HP_TIMERS
#define HP_DECLARE_TIMER(name)
#define HP_TIMER_START(name)
#define HP_TIMER_STOP(name)
#define HP_TIMER_RESET(name)
#endif // ENABLE_HP_TIMERS
#ifdef ENABLE_COUNTER
#define DUMP_TIMER(name,os,reset)\
if (HP_TIMER_ELAPSED(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,ident,#name,reset);
#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \
if (HP_TIMER_ELAPSED(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \
time += HP_TIMER_ELAPSED(name); \
ops += HP_TIMER_OPCOUNTERS(name); \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,ident,#name,reset);
#elif defined ENABLE_HP_TIMERS
#define DUMP_TIMER(name,os,reset) \
if (HP_TIMER_ELAPSED(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \
if (reset) HP_TIMER_RESET(name);
#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \
if (HP_TIMER_ELAPSED(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \
time += HP_TIMER_ELAPSED(name); \
if (reset) HP_TIMER_RESET(name);
#if ENABLE_CHRONO_TIMER
#include<dune/perftool/common/timer_chrono.hh>
#else #else
#include<dune/perftool/common/timer_tsc.hh>
#define DUMP_TIMER(name,os,reset)
#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops)
#endif #endif
#undef _GONE_THROUGH_TIMER_HH
#endif // HP_TIMER_HH #endif
#ifndef DUNE_PERFTOOL_COMMON_TIMER_CHRONO_HH
#define DUNE_PERFTOOL_COMMON_TIMER_CHRONO_HH
#ifndef _GONE_THROUGH_TIMER_HH
#error "Do not include timer_chrono.hh directly, instead use timer.hh"
#endif
#include <chrono>
#include <dune/perftool/common/opcounter.hh>
#define HP_TIMER_OPCOUNTER oc::OpCounter<double>
#define HP_TIMER_DURATION(name) __hp_timer_##name##_duration
#define HP_TIMER_STARTTIME(name) __hp_timer_##name##_start
#define HP_TIMER_OPCOUNTERS_START(name) __hp_timer_##name##_counters_start
#define HP_TIMER_OPCOUNTERS(name) __hp_timer_##name##_counters
#define HP_TIMER_ELAPSED(name) std::chrono::duration_cast<std::chrono::duration<double> >( HP_TIMER_DURATION(name) ).count()
#ifdef ENABLE_HP_TIMERS
#ifdef ENABLE_COUNTER
#define HP_DECLARE_TIMER(name) \
std::chrono::high_resolution_clock::duration HP_TIMER_DURATION(name); \
std::chrono::high_resolution_clock::time_point HP_TIMER_STARTTIME(name); \
HP_TIMER_OPCOUNTER::Counters HP_TIMER_OPCOUNTERS_START(name); \
HP_TIMER_OPCOUNTER::Counters HP_TIMER_OPCOUNTERS(name);
#define HP_TIMER_START(name) \
do { \
HP_TIMER_OPCOUNTERS_START(name) = HP_TIMER_OPCOUNTER::counters; \
HP_TIMER_STARTTIME(name) = std::chrono::high_resolution_clock::now(); \
} while(false)
#define HP_TIMER_STOP(name) \
do { \
std::chrono::high_resolution_clock::time_point __hp_end_time = std::chrono::high_resolution_clock::now(); \
HP_TIMER_OPCOUNTERS(name) += HP_TIMER_OPCOUNTER::counters - HP_TIMER_OPCOUNTERS_START(name); \
HP_TIMER_DURATION(name) += __hp_end_time - HP_TIMER_STARTTIME(name); \
} while(false)
#define HP_TIMER_RESET(name) \
do { \
HP_TIMER_DURATION(name) = std::chrono::high_resolution_clock::duration::zero(); \
HP_TIMER_OPCOUNTERS(name).reset(); \
} while (false)
#else
#define HP_DECLARE_TIMER(name) \
std::chrono::high_resolution_clock::duration HP_TIMER_DURATION(name); \
std::chrono::high_resolution_clock::time_point HP_TIMER_STARTTIME(name);
#define HP_TIMER_START(name) HP_TIMER_STARTTIME(name) = std::chrono::high_resolution_clock::now();
#define HP_TIMER_STOP(name) \
do { \
std::chrono::high_resolution_clock::time_point __hp_end_time = std::chrono::high_resolution_clock::now(); \
HP_TIMER_DURATION(name) += __hp_end_time - HP_TIMER_STARTTIME(name); \
} while(false)
#define HP_TIMER_RESET(name) HP_TIMER_DURATION(name) = std::chrono::high_resolution_clock::duration::zero();
#endif // ENABLE_COUNTER
#else // ENABLE_HP_TIMERS
#define HP_DECLARE_TIMER(name)
#define HP_TIMER_START(name)
#define HP_TIMER_STOP(name)
#define HP_TIMER_RESET(name)
#endif // ENABLE_HP_TIMERS
#ifdef ENABLE_COUNTER
#define DUMP_TIMER(name,os,reset)\
if (HP_TIMER_ELAPSED(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,ident,#name,reset);
#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \
if (HP_TIMER_ELAPSED(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \
time += HP_TIMER_ELAPSED(name); \
ops += HP_TIMER_OPCOUNTERS(name); \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,ident,#name,reset);
#elif defined ENABLE_HP_TIMERS
#define DUMP_TIMER(name,os,reset) \
if (HP_TIMER_ELAPSED(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \
if (reset) HP_TIMER_RESET(name);
#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \
if (HP_TIMER_ELAPSED(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_ELAPSED(name) << std::endl; \
time += HP_TIMER_ELAPSED(name); \
if (reset) HP_TIMER_RESET(name);
#else
#define DUMP_TIMER(name,os,reset)
#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops)
#endif
#endif // DUNE_PERFTOOL_COMMON_TIMER_CHRONO_HH
#ifndef DUNE_PERFTOOL_COMMON_TIMER_TSC_HH
#define DUNE_PERFTOOL_COMMON_TIMER_TSC_HH
#ifndef _GONE_THROUGH_TIMER_HH
#error "Do not include timer_tsc.hh directly, instead use timer.hh"
#endif
#include <dune/perftool/common/tsc.hh>
#include <dune/perftool/common/opcounter.hh>
#define HP_TIMER_OPCOUNTER oc::OpCounter<double>
#define HP_TIMER_DURATION(name) __hp_timer_##name##_duration
#define HP_TIMER_STARTTIME(name) __hp_timer_##name##_start
#define HP_TIMER_OPCOUNTERS_START(name) __hp_timer_##name##_counters_start
#define HP_TIMER_OPCOUNTERS(name) __hp_timer_##name##_counters
#ifdef ENABLE_HP_TIMERS
#ifdef ENABLE_COUNTER
#define HP_DECLARE_TIMER(name) \
Dune::PDELab::TSC::Counter HP_TIMER_DURATION(name); \
long long HP_TIMER_STARTTIME(name); \
HP_TIMER_OPCOUNTER::Counters HP_TIMER_OPCOUNTERS_START(name); \
HP_TIMER_OPCOUNTER::Counters HP_TIMER_OPCOUNTERS(name);
#define HP_TIMER_START(name) \
do { \
HP_TIMER_OPCOUNTERS_START(name) = HP_TIMER_OPCOUNTER::counters; \
HP_TIMER_STARTTIME(name) = Dune::PDELab::TSC::start(); \
} while(false)
#define HP_TIMER_STOP(name) \
do { \
long long __hp_end_time = Dune::PDELab::TSC::stop(); \
HP_TIMER_OPCOUNTERS(name) += HP_TIMER_OPCOUNTER::counters - HP_TIMER_OPCOUNTERS_START(name); \
HP_TIMER_DURATION(name) += Dune::PDELab::TSC::elapsed(HP_TIMER_STARTTIME(name),__hp_end_time); \
} while(false)
#define HP_TIMER_RESET(name) \
do { \
HP_TIMER_DURATION(name) = Dune::PDELab::TSC::zero(); \
HP_TIMER_OPCOUNTERS(name).reset(); \
} while (false)
#else
#define HP_DECLARE_TIMER(name) \
Dune::PDELab::TSC::Counter HP_TIMER_DURATION(name); \
long long HP_TIMER_STARTTIME(name);
#define HP_TIMER_START(name) HP_TIMER_STARTTIME(name) = Dune::PDELab::TSC::start();
#define HP_TIMER_STOP(name) \
do { \
long long __hp_end_time = Dune::PDELab::TSC::stop(); \
HP_TIMER_DURATION(name) += Dune::PDELab::TSC::elapsed(HP_TIMER_STARTTIME(name), __hp_end_time); \
} while(false)
#define HP_TIMER_RESET(name) HP_TIMER_DURATION(name) = Dune::PDELab::TSC::zero();
#endif // ENABLE_COUNTER
#else // ENABLE_HP_TIMERS
#define HP_DECLARE_TIMER(name)
#define HP_TIMER_START(name)
#define HP_TIMER_STOP(name)
#define HP_TIMER_RESET(name)
#endif // ENABLE_HP_TIMERS
#ifdef ENABLE_COUNTER
#define DUMP_TIMER(name,os,reset)\
if (HP_TIMER_DURATION(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_DURATION(name) << std::endl; \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,ident,#name,reset);
#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \
if (HP_TIMER_DURATION(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_DURATION(name) << std::endl; \
time += HP_TIMER_DURATION(name); \
ops += HP_TIMER_OPCOUNTERS(name); \
HP_TIMER_OPCOUNTERS(name).reportOperations(os,ident,#name,reset);
#elif defined ENABLE_HP_TIMERS
#define DUMP_TIMER(name,os,reset) \
if (HP_TIMER_DURATION(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_DURATION(name) << std::endl; \
if (reset) HP_TIMER_RESET(name);
#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops) \
if (HP_TIMER_DURATION(name) > 1e-12) \
os << ident << " " << #name << " time " << HP_TIMER_DURATION(name) << std::endl; \
time += HP_TIMER_DURATION(name); \
if (reset) HP_TIMER_RESET(name);
#else
#define DUMP_TIMER(name,os,reset)
#define DUMP_AND_ACCUMULATE_TIMER(name,os,reset,time,ops)
#endif
#endif // DUNE_PERFTOOL_COMMON_TIMER_TSC_HH
#include "config.h"
#include <regex>
#include <iostream>
#include <algorithm>
#include <vector>
#ifdef __linux__
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#elif __APPLE__
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
#include <dune/perftool/common/tsc.hh>
namespace Dune {
namespace PDELab {
namespace impl {
#if __linux__
TSC::Counter get_tsc_frequency()
{
int pipe_fds[2];
if (pipe(pipe_fds) < 0)
DUNE_THROW(TSCError,"Failed to create pipe for communicating with dmesg");
pid_t pid = fork();
if (pid < 0)
DUNE_THROW(TSCError,"Failed to fork process for running dmesg");
double result = -1.0;
if (pid == 0)
{
if (close(0) < 0)
_exit(1);
if (close(2) < 0)
_exit(2);
if (dup2(pipe_fds[1],1) < 0)
_exit(3);
if (close(pipe_fds[0]) < 0)
_exit(4);
if (close(pipe_fds[1]) < 0)
_exit(5);
const char * args[] = {
"/bin/dmesg",
"-t",
nullptr
};
execvp(args[0],const_cast<char*const*>(args));
_exit(6);
}
if (pid > 0)
{
if (close(pipe_fds[1]) < 0)
DUNE_THROW(TSCError,"Failed to close write end of pipe");
std::regex regex("tsc:.*TSC.*?(\\d+\\.\\d+)\\s+MHz\n");
FILE* input = fdopen(pipe_fds[0],"r");
if (not input)
DUNE_THROW(TSCError,"Failed to create file object for reading dmesg output");
char buf[1024];
std::cmatch match;
while(fgets(buf,1024,input))
{
if(std::regex_match(buf,match,regex))
{
result = atof(match[1].first);
break;
}
}
if (fclose(input) != 0)
DUNE_THROW(TSCError,"Failed to close file object for reading dmesg output");
int status;
if (waitpid(pid,&status,0) < 0)
DUNE_THROW(TSCError,"Failed to clean up dmesg child process");
if (WEXITSTATUS(status) != 0)
DUNE_THROW(TSCError,"Child process failed with return status " << WEXITSTATUS(status));
if (result < 0)
DUNE_THROW(TSCError,"Could not find TSC frequency information in kernel log");
}
// The kernel logs the frequency in MHz
return static_cast<TSC::Counter>(result*1e6);
}
#elif __APPLE__
TSC::Counter get_tsc_frequency()
{
std::int64_t frequency = 0;
std::size_t size = sizeof(frequency);
if (sysctlbyname("machdep.tsc.frequency",&frequency,&size,nullptr,0) < 0)
DUNE_THROW(TSCError,"Failed to read TSC frequency from sysctl machdep.tsc.frequency");
return frequency;
}
#endif
TSC::Counter calibrate_tsc_overhead_min(std::size_t iterations)
{
TSC::Counter start, end, overhead = std::numeric_limits<TSC::Counter>::max();
for (std::size_t i = 0 ; i < iterations ; ++i)
{
start = TSC::start();
asm volatile("");
end = TSC::stop();
overhead = std::min(overhead,end - start);
}
return overhead;
}
TSC::Counter calibrate_tsc_overhead_median(std::size_t iterations)
{
TSC::Counter start, end;
std::vector<TSC::Counter> measurements(iterations);
for (auto& m : measurements)
{
start = TSC::start();
asm volatile("");
end = TSC::stop();
m = end - start;
}
std::sort(measurements.begin(),measurements.end());
return measurements[measurements.size()/2];
}
} // namespace impl
TSC::TSC(const Dune::ParameterTree* params)
{
if (params)
{
if (params->hasKey("frequency"))
_frequency = params->get<TSC::Counter>("frequency");
else
_frequency = impl::get_tsc_frequency();
_scale_factor = 1.0 / _frequency;
if (params->get<bool>("correct_overhead",true))
{
std::string calibration_method = params->get<std::string>("calibration_method","min");
if (calibration_method == "min")
_overhead = impl::calibrate_tsc_overhead_min(params->get<std::size_t>("calibration_iterations",TSC::calibrationIterations));
else if (calibration_method == "median")
_overhead = impl::calibrate_tsc_overhead_median(params->get<std::size_t>("calibration_iterations",TSC::calibrationIterations));
else
DUNE_THROW(TSCError,"Unknown TSC calibration method " << calibration_method);
}
else
_overhead = 0;
}
else
{
_frequency = impl::get_tsc_frequency();
_overhead = impl::calibrate_tsc_overhead_min(TSC::calibrationIterations);
}
_scale_factor = 1.0 / _frequency;
}
// initialize with something stupid
TSC::Counter TSC::_overhead = std::numeric_limits<TSC::Counter>::max();
} // namespace PDELab
} // namespace Dune
#ifndef DUNE_PERFTOOL_COMMON_TSC_TIMER_HH
#define DUNE_PERFTOOL_COMMON_TSC_TIMER_HH
#include <dune/pdelab/common/exceptions.hh>
#include <dune/common/parametertree.hh>
namespace Dune {
namespace PDELab {
class TSCError
: public Exception
{};
class TSC
{
public:
using Counter = std::uint64_t;
static constexpr std::size_t calibrationIterations = 100000;
static Counter start()
{
unsigned a, d;
asm volatile(
"lfence\n"
"rdtsc"
:"=a" (a), "=d" (d)
::
);
return (static_cast<std::uint64_t>(d) << 32) | a;
}
static Counter stop()
{
unsigned a, d;
asm volatile(
"lfence\n"
"rdtsc"
:"=a" (a), "=d" (d)
::
);
return (static_cast<std::uint64_t>(d) << 32) | a;
}
static Counter zero()
{
return 0;
}
static void init()
{
instance(nullptr);
}
static void init(const Dune::ParameterTree& params)
{
instance(&params);
}
static Counter overhead()
{
return instance()._overhead;
}
static Counter frequency()
{
return instance()._frequency;
}
static Counter elapsed(Counter begin, Counter end)
{
return end - begin - _overhead;
}
static double seconds(Counter elapsed)
{
return elapsed * instance()._scale_factor;
}
TSC(const TSC&) = delete;
TSC(TSC&&) = delete;
TSC& operator=(const TSC&) = delete;
TSC& operator=(TSC&&) = delete;
private:
static const TSC& instance(const Dune::ParameterTree* params = nullptr)
{
static TSC tsc(params);
return tsc;
}
TSC(const Dune::ParameterTree* params);
Counter _frequency;
double _scale_factor;
// make this static to avoid the overhead of calling instance everytime we evaluate a timer
static Counter _overhead;
};
} // namespace PDELab
} // namespace Dune
#endif // DUNE_PDELAB_COMMON_TSC_HH
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment