HILA
Loading...
Searching...
No Matches
initialize.cpp
1
2#include <cstring>
3#include "hila.h"
4
5// define these global var here - somehow NULL needed for ostream
6std::ostream hila::out(NULL);
7std::ostream hila::out0(NULL);
8std::ofstream hila::output_file;
9bool hila::about_to_finish = false;
10bool hila::is_initialized = false;
11bool hila::check_input = false;
12int hila::check_with_nodes;
14
15void setup_partitions();
16void setup_output();
17void vector_type_info();
18
19#include <limits.h>
20#include <errno.h>
21
22int get_onoff(std::string flag) {
23 // Check if flag has been set
24 if (hila::cmdline.flag_set(flag.c_str())) {
25 std::string opt = hila::cmdline.get_string(flag.c_str());
26 if (opt.compare("on") == 0)
27 return 1;
28 else if (opt.compare("off") == 0)
29 return -1;
30 else {
31 hila::out0 << "Command line argument " << flag << " requires value on/off\n";
33 return 0;
34 }
35 } else
36 return 0;
37}
38
39/////////////////////////////////////////////////////////////////////////////////
40/// Initial setup routines
41/////////////////////////////////////////////////////////////////////////////////
42
43#if (defined(__GNUC__) && !defined(DARWIN)) // || defined(__bg__)
44#include <malloc.h>
45#endif
46
47// #define DEBUG_NAN
48
49#ifdef DEBUG_NAN
50#include <fenv.h>
51#endif
52
53/**
54 * @brief Read in command line arguments. Initialise default stream and MPI communication
55 *
56 * @param argc Number of command line arguments
57 * @param argv List of command line arguments
58 */
59
60void hila::initialize(int argc, char **argv) {
61
62#if (defined(__GNUC__) && !defined(DARWIN) && !defined(_MAC_OSX_)) // || defined(__bg__)
63 /* First, adjust malloc so that glibc free() does not
64 * release space to the system, increasing the performance
65 * of the glib malloc substantially. The memory use is cyclic,
66 * so we can just sit on the max memory.
67 */
68 mallopt(M_MMAP_MAX, 0); /* don't use mmap */
69 /* HACK: don't release memory by calling sbrk */
70 mallopt(M_TRIM_THRESHOLD, -1);
71
72#ifdef DEBUG_NAN
73 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
74#endif
75#endif
76
77 // initialize MPI so that hila::myrank() etc. works
78 initialize_communications(argc, &argv);
79
80 // catch signals
81 setup_signal_handler();
82
83 // set the initialized flag
84 hila::is_initialized = true;
85
86 // Default output file - we're happy with this unless partitions
87 // or otherwise indicated
88 // This channels outf to std::cout
89 hila::out.rdbuf(std::cout.rdbuf());
90
91 // set the timing so that gettime() returns time from this point
92 hila::inittime();
93
94 // open hila::out0 only for node 0
95 if (hila::myrank() == 0)
96 hila::out0.rdbuf(std::cout.rdbuf());
97
98 // Set the inbuilt command-line flags and their corresponding help texts
99 hila::cmdline.add_flag("-t",
100 "cpu time limit, in one of the formats:\n"
101 "s, m:s, h:m:s, d-h:m:s, or 'slurm' "
102 "where s=seconds, m=minutes, h=hours, d=days.\n"
103 "Values need not be restricted into natural ranges.\n"
104 "Format is compatible with the output of\n"
105 "' squeue -h --job ${SLURM_JOB_ID} --format=\"%L\" '\n"
106 "Option '-t slurm' makes program to use slurm to get time limit",
107 "<time>", 1);
108 hila::cmdline.add_flag("-o", "output file (default: stdout)", "<filename>", 1);
109 hila::cmdline.add_flag("-i",
110 "input file (overrides the 1st hila::input() name)\n"
111 "use '-i -' for standard input",
112 "<filename>", 1);
113 hila::cmdline.add_flag("-device",
114 "in GPU runs using only 1 GPU, choose this GPU number (default 0)",
115 "<GPU number>", 1);
116 hila::cmdline.add_flag("-check",
117 "check input & layout with <num> nodes & exit.\nDoes not initialize MPI "
118 "or GPUs (do not use mpirun)\n",
119 "<num>", 1);
120 hila::cmdline.add_flag("-partitions",
121 "number of partitioned lattice streams.\nBy default, creates "
122 "directories 'partitionN' for each stream if these don't exist.\nEnter "
123 "as '-partitions <num> <dirname>' to use '<dirname>N'.\n",
124 "<num>");
125
126 hila::cmdline.add_flag("-p",
127 "parameter overriding the input file field <key>.\n"
128 "If fields contain spaces enclose in quotes.\n"
129 "Can be repeated many times, each overrides only one input entry.",
130 "<key> <value>", 2);
131
132 // Init command line - after MPI has been started, so
133 // that all nodes do this. First feed argc and argv to the
134 // global cmdline class instance and parse for the preset flags.
135 hila::cmdline.initialise_args(argc, argv);
136 // The values can now be requested from hila::cmdline.
137
138 // check the "-check" -input early
139 // do it only with 1 node
140 if (lattice.nodes.number == 1) {
141 // Check whether '-check' was found and only then search for '-n'
142 if (hila::cmdline.flag_present("-check")) {
143
144 long nodes = 1;
145 if (hila::cmdline.flag_set("-check") > 0)
146 nodes = hila::cmdline.get_int("-check");
147
148 hila::check_input = true;
149 if (nodes <= 0)
150 nodes = 1;
151 hila::check_with_nodes = nodes;
152 hila::out << "****** INPUT AND LAYOUT CHECK ******" << std::endl;
153
154 // reset node variables
155 lattice.mynode.rank = 0;
156 lattice.nodes.number = hila::check_with_nodes;
157 }
158 }
159
160#if defined(CUDA) || defined(HIP)
161 if (!hila::check_input) {
162 long device;
163 if (hila::cmdline.flag_set("-device"))
164 device = hila::cmdline.get_int("-device");
165 else
166 device = 0;
167 hila::out0 << "Chose device " << device << "\n";
168
169 initialize_gpu(lattice.mynode.rank, device);
170 }
171#endif
172
173 setup_partitions();
174
175 setup_output();
176
177 if (hila::partitions.number() > 1) {
178 hila::out0 << " ---- SPLIT " << hila::number_of_nodes() << " nodes into "
179 << hila::partitions.number() << " partitions, this "
180 << hila::partitions.mylattice() << " ----\n";
181 }
182
183
184 if (hila::myrank() == 0) {
185 hila::print_dashed_line("HILA lattice framework");
186 hila::out0 << "Running program " << argv[0] << "\n";
187 hila::out0 << "with command line arguments '";
188 for (int i = 1; i < argc; i++)
189 hila::out0 << argv[i] << ' ';
190 hila::out0 << "'\n";
191 hila::out0 << "Code version: ";
192#if defined(GIT_SHA_VALUE)
193#define xstr(s) makestr(s)
194#define makestr(s) #s
195 hila::out0 << "git SHA " << xstr(GIT_SHA_VALUE) << '\n';
196#else
197 hila::out0 << "no git information available\n";
198#endif
199 hila::out0 << "Compiled " << __DATE__ << " at " << __TIME__ << '\n';
200
201 hila::out0 << "with options: EVEN_SITES_FIRST";
202#ifndef EVEN_SITES_FIRST
203 hila::out0 << "=0";
204#endif
205#ifdef SPECIAL_BOUNDARY_CONDITIONS
206 hila::out0 << " SPECIAL_BOUNDARY_CONDITIONS";
207#endif
208 hila::out0 << '\n';
209
210 hila::timestamp("Starting");
211 }
212
213 // Check if flag set and parse
214 if (hila::cmdline.flag_present("-t")) {
215 // Following quits if '-t' is given without a valid time argument
216 hila::setup_timelimit(hila::cmdline.get_string("-t"));
217 } else {
218 hila::out0 << "No runtime limit given\n";
219 }
220
221 if (hila::cmdline.flag_present("-i")) {
222 // Quits if '-i' given without a string argument
223 hila::out0 << "Input file from command line: " << hila::cmdline.get_string("-i") << "\n";
224 }
225
226#if defined(OPENMP)
227 hila::out0 << "Using option OPENMP - with " << omp_get_max_threads() << " threads\n";
228#endif
229
230
231#if defined(CUDA) || defined(HIP)
232 hila::out0 << "Using thread blocks of size " << N_threads << " threads\n";
233
234#if defined(GPU_AWARE_MPI)
235 hila::out0 << "Using GPU_AWARE_MPI\n";
236#else
237 hila::out0 << "Not using GPU_AWARE_MPI\n";
238#endif
239
240#if !defined(GPU_VECTOR_REDUCTION_THREAD_BLOCKS) || GPU_VECTOR_REDUCTION_THREAD_BLOCKS <= 0
241 hila::out0 << "ReductionVector with atomic operations (GPU_VECTOR_REDUCTION_THREAD_BLOCKS=0)\n";
242#else
243 hila::out0 << "ReductionVector with " << GPU_VECTOR_REDUCTION_THREAD_BLOCKS
244 << " thread blocks\n";
245#endif
246
247 if (!hila::check_input)
248 gpu_device_info();
249#endif
250
251
252#ifdef AVX
253 vector_type_info();
254#endif
255
256 /* basic static node variables */
257#if defined(CUDA) && !defined(PIZDAINT)
258 // localhost_info(&g_local_nodeid, &g_num_local_nodes);
259#endif
260
261#if (defined(__GNUC__) && !defined(DARWIN)) // || defined(__bg__)
262 hila::out0 << "GNU c-library performance: not returning allocated memory\n";
263#endif
264}
265
266
267///////////////////////////////////////////////////////////////
268/// Force quit for multinode processes -- kill all nodes
269/// No synchronisation done
270///////////////////////////////////////////////////////////////
271void hila::terminate(int status) {
272 hila::timestamp("Terminate");
273 hila::print_dashed_line();
274 hila::about_to_finish = true; // avoid destructors
275 if (hila::is_comm_initialized()) {
276 hila::finish_communications();
277 }
278 exit(1);
279}
280
281////////////////////////////////////////////////////////////////
282/// Print message and force quit
283////////////////////////////////////////////////////////////////
284
285void hila::error(const char *msg) {
286 hila::out0 << "Error: " << msg << '\n';
288}
289
290void hila::error(const std::string &msg) {
291 hila::error(msg.c_str());
292}
293
294/**
295 * @brief Normal, controlled exit - all nodes must call this.
296 * Prints timing information and information about communications
297 */
299 report_timers();
300
301 for (const lattice_struct *latp : lattices) {
302
303
304 int64_t gathers = latp->n_gather_done;
305 int64_t avoided = latp->n_gather_avoided;
306
307 if (gathers + avoided > 0) {
308 hila::out0 << " COMMS from node 0: " << gathers << " done, " << avoided << "("
309 << 100.0 * avoided / (avoided + gathers) << "%) optimized away\n";
310 } else {
311 hila::out0 << " No communications done from node 0\n";
312 }
313 }
314
315
316#if defined(CUDA) || defined(HIP)
317 gpuMemPoolReport();
318#endif
319
320 if (hila::partitions.number() > 1) {
321 hila::timestamp("Waiting to sync partitions");
322 hila::synchronize_partitions();
323 }
324
325 // hip seems to want this?
326 FFT_delete_plans();
327
329 hila::timestamp("Finishing");
330
331 hila::about_to_finish = true;
332
333 hila::finish_communications();
334
335 hila::print_dashed_line();
336 exit(0);
337}
338
339
340////////////////////////////////////////////////////////////////
341/// Setup standard output file (hila::out0)
342////////////////////////////////////////////////////////////////
343
344void setup_output() {
345
346 bool do_exit = false;
347
348 if (hila::myrank() == 0) {
349 if (hila::cmdline.flag_present("-o")) {
350 // Quits if '-o' was left without an argument
351 std::string name;
352 if (hila::cmdline.flag_set("-o"))
353 name = hila::cmdline.get_string("-o");
354 else {
355 hila::out0 << "The name of the output file must be provided after flag '-o'!\n";
356 do_exit = true;
357 }
358 // If found, open the file for the output
359 if (!hila::check_input) {
360 hila::output_file.open(name, std::ios::out | std::ios::app);
361 if (hila::output_file.fail()) {
362 hila::out << "Cannot open output file " << name << '\n';
363 do_exit = true;
364 } else {
365 hila::out0 << "Output is now directed to the file '" << name << "'.\n";
366 hila::out.flush();
367 hila::out.rdbuf(
368 hila::output_file.rdbuf()); // output now points to output_redirect
369
370 if (hila::myrank() == 0)
371 hila::out0.rdbuf(hila::out.rdbuf());
372 }
373 }
374 }
375 }
376
377 hila::broadcast(do_exit);
378 if (do_exit)
380}
381
382
383/******************************************************
384 * Sublattice division
385 * Handle command line arguments
386 * partitions=nn
387 * sync=yes / sync=no
388 * here
389 */
390
391
392void setup_partitions() {
393
394 std::string partition_dir("partition");
395
396 // get partitions cmdlinearg first
397 if (hila::cmdline.flag_present("-partitions")) {
398 // Following quits if '-partitions' is given without an integer argument
399 long lnum = hila::cmdline.get_int("-partitions");
400 if (lnum <= 0) {
401 hila::out0 << "partitions <number> command line argument value must be positive "
402 "integer\n";
404 } else
405 hila::partitions.set_number(lnum);
406
407 if (hila::cmdline.flag_set("-partitions") > 1) {
408 partition_dir = hila::cmdline.get_string("-partitions", 1);
409 }
410 } else
411 hila::partitions.set_number(1);
412
413 if (hila::partitions.number() == 1)
414 return;
415
416 hila::out0 << " Dividing nodes into " << hila::partitions.number() << " partitions\n";
417
418 if (hila::number_of_nodes() % hila::partitions.number()) {
419 hila::out0 << "** " << hila::number_of_nodes() << " nodes not evenly divisible into "
420 << hila::partitions.number() << " partitions\n";
422 }
423
424 hila::out0 << "REST OF OUTPUT TO DIRECTORIES " << partition_dir << "N\n";
425
426#if defined(BLUEGENE_LAYOUT)
427 hila::partitions.set_mylattice(bg_layout_partitions(hila::partitions.number()));
428#else // generic
429 hila::partitions.set_mylattice((hila::myrank() * hila::partitions.number()) /
431 /* and divide system into partitions */
432 if (!hila::check_input)
433 hila::split_into_partitions(hila::partitions.mylattice());
434#endif
435
436 std::string dirname = partition_dir + std::to_string(hila::partitions.mylattice());
437 if (hila::myrank() == 0) {
438 filesys_ns::create_directory(dirname);
439 }
441 // change to new dir
442 filesys_ns::current_path(dirname);
443
444 // now need to open output file
445
446 hila::out.flush(); // this should be cout at this stage
447
448 // is output file named? (-o on cmdline) then do nothing here
449
450 if (!hila::cmdline.flag_present("-o")) {
451 int do_exit = 0;
452
453 if (!hila::check_input) {
454 hila::output_file.open(DEFAULT_OUTPUT_NAME, std::ios::out | std::ios::app);
455 if (hila::output_file.fail()) {
456 std::cout << "Cannot open output file " << DEFAULT_OUTPUT_NAME << '\n';
457 do_exit = 1;
458 }
459 }
460
461 hila::broadcast(do_exit);
462
463 if (do_exit)
465
466 hila::out.flush();
467 if (!hila::check_input) {
468 hila::out.rdbuf(hila::output_file.rdbuf());
469 // output now points to output_redirect
470 if (hila::myrank() == 0) {
471 hila::out0.rdbuf(hila::out.rdbuf());
472 }
473 }
474 }
475}
476
477/////////////////////////////////////////////////////////////////////////////
478
479#ifdef AVX
480void vector_type_info() {
481
482 hila::out0 << "Using VCL vector class with instruction set level INSTRSET=" << INSTRSET
483 << " <=> ";
484
485 switch (INSTRSET) {
486 case 2:
487 hila::out0 << "SSE2";
488 break;
489 case 3:
490 hila::out0 << "SSE3";
491 break;
492 case 4:
493 hila::out0 << "SSSE3";
494 break;
495 case 5:
496 hila::out0 << "SSE4.1";
497 break;
498 case 6:
499 hila::out0 << "SSE4.2";
500 break;
501 case 7:
502 hila::out0 << "AVX";
503 break;
504 case 8:
505 hila::out0 << "AVX2";
506 break;
507 case 9:
508 hila::out0 << "AVX512F";
509 break;
510 case 10:
511 hila::out0 << "AVX512BW/DQ/VL";
512 break;
513 default:
514 hila::out0 << "Unknown";
515 break;
516 }
517 hila::out0 << '\n';
518 if (INSTRSET < 8)
519 hila::out0 << " (You probably should use options '-mavx2 -fmad' in compilation)\n";
520}
521
522
523#endif
524
525void hila::print_dashed_line(const std::string &text) {
526 static constexpr int linelength = 60;
527
528 if (hila::myrank() == 0) {
529
530 if (text.size() == 0) {
531 for (int i = 0; i < linelength; i++)
532 hila::out << '-';
533
534 } else {
535
536 hila::out << "----- " << text << ' ';
537 for (int i = 7 + text.size(); i < linelength; i++)
538 hila::out << '-';
539 }
540 hila::out << '\n';
541 }
542}
Define the logger class here.
Definition logger.h:8
logger_class log
Now declare the logger.
void setup_timelimit(const double secs)
Setup time limit with seconds.
Definition timing.cpp:197
int myrank()
rank of this node
Definition com_mpi.cpp:234
int number_of_nodes()
how many nodes there are
Definition com_mpi.cpp:245
void synchronize()
synchronize mpi
Definition com_mpi.cpp:254
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
std::ofstream output_file
this is just a hook to store output file, if it is in use
Definition initialize.cpp:8
void initialize(int argc, char **argv)
Initial setup routines.
void split_into_partitions(int rank)
Definition com_mpi.cpp:281
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
Definition com_mpi.h:168
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
void terminate(int status)
#define GPU_VECTOR_REDUCTION_THREAD_BLOCKS
Definition params.h:143
#define DEFAULT_OUTPUT_NAME
Default output file name.
Definition params.h:40
#define N_threads
General number of threads in a thread block.
Definition params.h:190