17static double start_time = -1.0;
27void timer::init(
const char *tag) {
45 t_start = t_total = 0.0;
47 is_on = is_error =
false;
53 <<
"' error, unbalanced start/stop. Removing from statistics\n";
58double timer::start() {
69#ifdef GPU_SYNCHRONIZE_TIMERS
70 gpuStreamSynchronize(0);
82#ifdef GPU_SYNCHRONIZE_TIMERS
83 gpuStreamSynchronize(0);
87 t_total += (e - t_start);
92timer_value timer::value() {
99void timer::report(
bool print_not_timed) {
105 if (count > 0 && !is_error) {
106 if (t_total / count > 0.1) {
107 std::snprintf(line, 200,
"%-20s: %14.3f %14ld %10.3f s %8.4f\n", label.c_str(),
108 t_total, (
long)count, t_total / count, t_total / ttime);
109 }
else if (t_total / count > 1e-4) {
110 std::snprintf(line, 200,
"%-20s: %14.3f %14ld %10.3f ms %8.4f\n", label.c_str(),
111 t_total, (
long)count, 1e3 * t_total / count, t_total / ttime);
113 std::snprintf(line, 200,
"%-20s: %14.3f %14ld %10.3f μs %8.4f\n", label.c_str(),
114 t_total, (
long)count, 1e6 * t_total / count, t_total / ttime);
117 }
else if (!is_error && print_not_timed) {
118 std::snprintf(line, 200,
"%-20s: no timed calls made\n", label.c_str());
120 }
else if (is_error) {
121 std::snprintf(line, 200,
"%-20s: error:unbalanced start/stop\n", label.c_str());
127void report_timers() {
131#if defined(CUDA) || defined(HIP)
132#if defined(GPU_SYNCHRONIZE_TIMERS)
133 hila::out <<
"TIMERS: synchronized to GPU kernel execution (GPU_SYNCHRONIZE_TIMERS "
136 hila::out <<
"TIMERS: GPU_SYNCHRONIZE_TIMERS not defined, fine-grained timing "
137 "likely to be incorrect\n";
141 hila::out <<
"TIMER REPORT: total(sec) calls "
142 "time/call fraction\n";
143 hila::out <<
"------------------------------------------------------------"
150 hila::out <<
"------------------------------------------------------------"
166 if (start_time == -1.0)
169 clock_gettime(CLOCK_MONOTONIC, &tp);
170 return (((
double)tp.tv_sec - start_time) + 1.0e-9 * (
double)tp.tv_nsec);
174 if (start_time == -1.0) {
187static double timelimit = 0;
189void setup_timelimit(
long seconds) {
190 timelimit = (double)seconds;
194 static double max_interval = 0.0;
195 static double previous_time = 0.0;
199 if (timelimit == 0.0)
204 if (this_time - previous_time > max_interval)
205 max_interval = this_time - previous_time;
206 previous_time = this_time;
209 if (timelimit - this_time < max_interval + 5 * 60.0)
214 hila::out <<
"TIMECHECK: " << this_time <<
"s used, " << timelimit - this_time
217 hila::out <<
"CPU TIME LIMIT, EXITING THE PROGRAM\n";
227void timestamp(
const char *msg) {
230 std::time_t ct = std::time(NULL);
233 std::string d = ctime(&ct);
234 d.resize(d.size() - 1);
242void timestamp(
const std::string &msg) {
243 hila::timestamp(msg.c_str());
This file defines all includes for HILA.
Implement hila::swap for gauge fields.
int myrank()
rank of this node
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
std::vector< timer * > timer_list
Timer routines - for high-resolution event timing.