HILA
Loading...
Searching...
No Matches
initialize.cpp
1
2#include <cstring>
3#include "hila.h"
4
5// define these global var here - somehow NULL needed for ostream
6std::ostream hila::out(NULL);
7std::ostream hila::out0(NULL);
8std::ofstream hila::output_file;
9bool hila::about_to_finish = false;
10bool hila::is_initialized = false;
11bool hila::check_input = false;
12int hila::check_with_nodes;
14
15void setup_partitions();
16void setup_output();
17void vector_type_info();
18
19#include <limits.h>
20#include <errno.h>
21
22int get_onoff(std::string flag) {
23 // Check if flag has been set
24 if (hila::cmdline.flag_set(flag.c_str())) {
25 std::string opt = hila::cmdline.get_string(flag.c_str());
26 if (opt.compare("on") == 0)
27 return 1;
28 else if (opt.compare("off") == 0)
29 return -1;
30 else {
31 hila::out0 << "Command line argument " << flag << " requires value on/off\n";
33 return 0;
34 }
35 } else
36 return 0;
37}
38
39/////////////////////////////////////////////////////////////////////////////////
40/// Initial setup routines
41/////////////////////////////////////////////////////////////////////////////////
42
43#if (defined(__GNUC__) && !defined(DARWIN)) // || defined(__bg__)
44#include <malloc.h>
45#endif
46
47// #define DEBUG_NAN
48
49#ifdef DEBUG_NAN
50#include <fenv.h>
51#endif
52
53/**
54 * @brief Read in command line arguments. Initialise default stream and MPI communication
55 *
56 * @param argc Number of command line arguments
57 * @param argv List of command line arguments
58 */
59
60void hila::initialize(int argc, char **argv) {
61
62#if (defined(__GNUC__) && !defined(DARWIN) && !defined(_MAC_OSX_)) // || defined(__bg__)
63 /* First, adjust malloc so that glibc free() does not
64 * release space to the system, increasing the performance
65 * of the glib malloc substantially. The memory use is cyclic,
66 * so we can just sit on the max memory.
67 */
68 mallopt(M_MMAP_MAX, 0); /* don't use mmap */
69 /* HACK: don't release memory by calling sbrk */
70 mallopt(M_TRIM_THRESHOLD, -1);
71
72#ifdef DEBUG_NAN
73 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
74#endif
75#endif
76
77 // First, get the base lattice_struct - used for storing basic data
78 lattice_struct * base_lat = new lattice_struct;
79 lattice.set_lattice_pointer(base_lat);
80
81 // initialize MPI so that hila::myrank() etc. works
82 initialize_communications(argc, &argv);
83
84 // catch signals
85 setup_signal_handler();
86
87 // set the initialized flag
88 hila::is_initialized = true;
89
90 // Default output file - we're happy with this unless partitions
91 // or otherwise indicated
92 // This channels outf to std::cout
93 hila::out.rdbuf(std::cout.rdbuf());
94
95 // set the timing so that gettime() returns time from this point
96 hila::inittime();
97
98 // open hila::out0 only for node 0
99 if (hila::myrank() == 0)
100 hila::out0.rdbuf(std::cout.rdbuf());
101
102 // Set the inbuilt command-line flags and their corresponding help texts
103 hila::cmdline.add_flag("-t",
104 "cpu time limit, in one of the formats:\n"
105 "s, m:s, h:m:s, d-h:m:s, or 'slurm' "
106 "where s=seconds, m=minutes, h=hours, d=days.\n"
107 "Values need not be restricted into natural ranges.\n"
108 "Format is compatible with the output of\n"
109 "' squeue -h --job ${SLURM_JOB_ID} --format=\"%L\" '\n"
110 "Option '-t slurm' makes program to use slurm to get time limit",
111 "<time>", 1);
112 hila::cmdline.add_flag("-o", "output file (default: stdout)", "<filename>", 1);
113 hila::cmdline.add_flag("-i",
114 "input file (overrides the 1st hila::input() name)\n"
115 "use '-i -' for standard input",
116 "<filename>", 1);
117 hila::cmdline.add_flag("-device",
118 "in GPU runs using only 1 GPU, choose this GPU number (default 0)",
119 "<GPU number>", 1);
120 hila::cmdline.add_flag("-check",
121 "check input & layout with <num> nodes & exit.\nDoes not initialize MPI "
122 "or GPUs (do not use mpirun)\n",
123 "<num>", 1);
124 hila::cmdline.add_flag("-partitions",
125 "number of partitioned lattice streams.\nBy default, creates "
126 "directories 'partitionN' for each stream if these don't exist.\nEnter "
127 "as '-partitions <num> <dirname>' to use '<dirname>N'.\n",
128 "<num>");
129
130 hila::cmdline.add_flag("-p",
131 "parameter overriding the input file field <key>.\n"
132 "If fields contain spaces enclose in quotes.\n"
133 "Can be repeated many times, each overrides only one input entry.",
134 "<key> <value>", 2);
135
136 // Init command line - after MPI has been started, so
137 // that all nodes do this. First feed argc and argv to the
138 // global cmdline class instance and parse for the preset flags.
139 hila::cmdline.initialise_args(argc, argv);
140 // The values can now be requested from hila::cmdline.
141
142 // check the "-check" -input early
143 // do it only with 1 node
144 if (lattice->nodes.number == 1) {
145 // Check whether '-check' was found and only then search for '-n'
146 if (hila::cmdline.flag_present("-check")) {
147
148 long nodes = 1;
149 if (hila::cmdline.flag_set("-check") > 0)
150 nodes = hila::cmdline.get_int("-check");
151
152 hila::check_input = true;
153 if (nodes <= 0)
154 nodes = 1;
155 hila::check_with_nodes = nodes;
156 hila::out << "****** INPUT AND LAYOUT CHECK ******" << std::endl;
157
158 // reset node variables
159 lattice.ptr()->mynode.rank = 0;
160 lattice.ptr()->nodes.number = hila::check_with_nodes;
161 }
162 }
163
164#if defined(CUDA) || defined(HIP)
165 if (!hila::check_input) {
166 long device;
167 if (hila::cmdline.flag_set("-device"))
168 device = hila::cmdline.get_int("-device");
169 else
170 device = 0;
171 hila::out0 << "Chose device " << device << "\n";
172
173 initialize_gpu(lattice->mynode.rank, device);
174 }
175#endif
176
177 setup_partitions();
178
179 setup_output();
180
181 if (hila::partitions.number() > 1) {
182 hila::out0 << " ---- SPLIT " << hila::number_of_nodes() << " nodes into "
183 << hila::partitions.number() << " partitions, this "
184 << hila::partitions.mylattice() << " ----\n";
185 }
186
187
188 if (hila::myrank() == 0) {
189 hila::print_dashed_line("HILA lattice framework");
190 hila::out0 << "Running program " << argv[0] << "\n";
191 hila::out0 << "with command line arguments '";
192 for (int i = 1; i < argc; i++)
193 hila::out0 << argv[i] << ' ';
194 hila::out0 << "'\n";
195 hila::out0 << "Code version: ";
196#if defined(GIT_SHA_VALUE)
197#define xstr(s) makestr(s)
198#define makestr(s) #s
199 hila::out0 << "git SHA " << xstr(GIT_SHA_VALUE) << '\n';
200#else
201 hila::out0 << "no git information available\n";
202#endif
203 hila::out0 << "Compiled " << __DATE__ << " at " << __TIME__ << '\n';
204
205 hila::out0 << "with options: EVEN_SITES_FIRST";
206#ifndef EVEN_SITES_FIRST
207 hila::out0 << "=0";
208#endif
209#ifdef SPECIAL_BOUNDARY_CONDITIONS
210 hila::out0 << " SPECIAL_BOUNDARY_CONDITIONS";
211#endif
212 hila::out0 << '\n';
213
214 hila::timestamp("Starting");
215 }
216
217 // Check if flag set and parse
218 if (hila::cmdline.flag_present("-t")) {
219 // Following quits if '-t' is given without a valid time argument
220 hila::setup_timelimit(hila::cmdline.get_string("-t"));
221 } else {
222 hila::out0 << "No runtime limit given\n";
223 }
224
225 if (hila::cmdline.flag_present("-i")) {
226 // Quits if '-i' given without a string argument
227 hila::out0 << "Input file from command line: " << hila::cmdline.get_string("-i") << "\n";
228 }
229
230#if defined(OPENMP)
231 hila::out0 << "Using option OPENMP - with " << omp_get_max_threads() << " threads\n";
232#endif
233
234
235#if defined(CUDA) || defined(HIP)
236 hila::out0 << "Using thread blocks of size " << N_threads << " threads\n";
237
238#if defined(GPU_AWARE_MPI)
239 hila::out0 << "Using GPU_AWARE_MPI\n";
240#else
241 hila::out0 << "Not using GPU_AWARE_MPI\n";
242#endif
243
244#if !defined(GPU_VECTOR_REDUCTION_THREAD_BLOCKS) || GPU_VECTOR_REDUCTION_THREAD_BLOCKS <= 0
245 hila::out0 << "ReductionVector with atomic operations (GPU_VECTOR_REDUCTION_THREAD_BLOCKS=0)\n";
246#else
247 hila::out0 << "ReductionVector with " << GPU_VECTOR_REDUCTION_THREAD_BLOCKS
248 << " thread blocks\n";
249#endif
250
251 if (!hila::check_input)
252 gpu_device_info();
253#endif
254
255
256#ifdef AVX
257 vector_type_info();
258#endif
259
260 /* basic static node variables */
261#if defined(CUDA) && !defined(PIZDAINT)
262 // localhost_info(&g_local_nodeid, &g_num_local_nodes);
263#endif
264
265#if (defined(__GNUC__) && !defined(DARWIN)) // || defined(__bg__)
266 hila::out0 << "GNU c-library performance: not returning allocated memory\n";
267#endif
268}
269
270
271///////////////////////////////////////////////////////////////
272/// Force quit for multinode processes -- kill all nodes
273/// No synchronisation done
274///////////////////////////////////////////////////////////////
275void hila::terminate(int status) {
276 hila::timestamp("Terminate");
277 hila::print_dashed_line();
278 hila::about_to_finish = true; // avoid destructors
279 if (hila::is_comm_initialized()) {
280 hila::finish_communications();
281 }
282 exit(1);
283}
284
285////////////////////////////////////////////////////////////////
286/// Print message and force quit
287////////////////////////////////////////////////////////////////
288
289void hila::error(const char *msg) {
290 hila::out0 << "Error: " << msg << '\n';
292}
293
294void hila::error(const std::string &msg) {
295 hila::error(msg.c_str());
296}
297
298/**
299 * @brief Normal, controlled exit - all nodes must call this.
300 * Prints timing information and information about communications
301 */
303 report_timers();
304
305
306 int64_t gathers = hila::n_gather_done;
307 int64_t avoided = hila::n_gather_avoided;
308
309 if (gathers + avoided > 0) {
310 hila::out0 << " COMMS from node 0: " << gathers << " done, " << avoided << "("
311 << 100.0 * avoided / (avoided + gathers) << "%) optimized away\n";
312 } else {
313 hila::out0 << " No communications done from node 0\n";
314 }
315
316
317#if defined(CUDA) || defined(HIP)
318 gpuMemPoolReport();
319#endif
320
321 if (hila::partitions.number() > 1) {
322 hila::timestamp("Waiting to sync partitions");
323 hila::synchronize_partitions();
324 }
325
326 // hip seems to want this?
327 FFT_delete_plans();
328
330 hila::timestamp("Finishing");
331
332 hila::about_to_finish = true;
333
334 hila::finish_communications();
335
336 hila::print_dashed_line();
337 exit(0);
338}
339
340
341////////////////////////////////////////////////////////////////
342/// Setup standard output file (hila::out0)
343////////////////////////////////////////////////////////////////
344
345void setup_output() {
346
347 bool do_exit = false;
348
349 if (hila::myrank() == 0) {
350 if (hila::cmdline.flag_present("-o")) {
351 // Quits if '-o' was left without an argument
352 std::string name;
353 if (hila::cmdline.flag_set("-o"))
354 name = hila::cmdline.get_string("-o");
355 else {
356 hila::out0 << "The name of the output file must be provided after flag '-o'!\n";
357 do_exit = true;
358 }
359 // If found, open the file for the output
360 if (!hila::check_input) {
361 hila::output_file.open(name, std::ios::out | std::ios::app);
362 if (hila::output_file.fail()) {
363 hila::out << "Cannot open output file " << name << '\n';
364 do_exit = true;
365 } else {
366 hila::out0 << "Output is now directed to the file '" << name << "'.\n";
367 hila::out.flush();
368 hila::out.rdbuf(
369 hila::output_file.rdbuf()); // output now points to output_redirect
370
371 if (hila::myrank() == 0)
372 hila::out0.rdbuf(hila::out.rdbuf());
373 }
374 }
375 }
376 }
377
378 hila::broadcast(do_exit);
379 if (do_exit)
381}
382
383
384/******************************************************
385 * Sublattice division
386 * Handle command line arguments
387 * partitions=nn
388 * sync=yes / sync=no
389 * here
390 */
391
392
393void setup_partitions() {
394
395 std::string partition_dir("partition");
396
397 // get partitions cmdlinearg first
398 if (hila::cmdline.flag_present("-partitions")) {
399 // Following quits if '-partitions' is given without an integer argument
400 long lnum = hila::cmdline.get_int("-partitions");
401 if (lnum <= 0) {
402 hila::out0 << "partitions <number> command line argument value must be positive "
403 "integer\n";
405 } else
406 hila::partitions.set_number(lnum);
407
408 if (hila::cmdline.flag_set("-partitions") > 1) {
409 partition_dir = hila::cmdline.get_string("-partitions", 1);
410 }
411 } else
412 hila::partitions.set_number(1);
413
414 if (hila::partitions.number() == 1)
415 return;
416
417 hila::out0 << " Dividing nodes into " << hila::partitions.number() << " partitions\n";
418
419 if (hila::number_of_nodes() % hila::partitions.number()) {
420 hila::out0 << "** " << hila::number_of_nodes() << " nodes not evenly divisible into "
421 << hila::partitions.number() << " partitions\n";
423 }
424
425 hila::out0 << "REST OF OUTPUT TO DIRECTORIES " << partition_dir << "N\n";
426
427#if defined(BLUEGENE_LAYOUT)
428 hila::partitions.set_mylattice(bg_layout_partitions(hila::partitions.number()));
429#else // generic
430 hila::partitions.set_mylattice((hila::myrank() * hila::partitions.number()) /
432 /* and divide system into partitions */
433 if (!hila::check_input)
434 hila::split_into_partitions(hila::partitions.mylattice());
435#endif
436
437 std::string dirname = partition_dir + std::to_string(hila::partitions.mylattice());
438 if (hila::myrank() == 0) {
439 filesys_ns::create_directory(dirname);
440 }
442 // change to new dir
443 filesys_ns::current_path(dirname);
444
445 // now need to open output file
446
447 hila::out.flush(); // this should be cout at this stage
448
449 // is output file named? (-o on cmdline) then do nothing here
450
451 if (!hila::cmdline.flag_present("-o")) {
452 int do_exit = 0;
453
454 if (!hila::check_input) {
455 hila::output_file.open(DEFAULT_OUTPUT_NAME, std::ios::out | std::ios::app);
456 if (hila::output_file.fail()) {
457 std::cout << "Cannot open output file " << DEFAULT_OUTPUT_NAME << '\n';
458 do_exit = 1;
459 }
460 }
461
462 hila::broadcast(do_exit);
463
464 if (do_exit)
466
467 hila::out.flush();
468 if (!hila::check_input) {
469 hila::out.rdbuf(hila::output_file.rdbuf());
470 // output now points to output_redirect
471 if (hila::myrank() == 0) {
472 hila::out0.rdbuf(hila::out.rdbuf());
473 }
474 }
475 }
476}
477
478/////////////////////////////////////////////////////////////////////////////
479
480#ifdef AVX
481void vector_type_info() {
482
483 hila::out0 << "Using VCL vector class with instruction set level INSTRSET=" << INSTRSET
484 << " <=> ";
485
486 switch (INSTRSET) {
487 case 2:
488 hila::out0 << "SSE2";
489 break;
490 case 3:
491 hila::out0 << "SSE3";
492 break;
493 case 4:
494 hila::out0 << "SSSE3";
495 break;
496 case 5:
497 hila::out0 << "SSE4.1";
498 break;
499 case 6:
500 hila::out0 << "SSE4.2";
501 break;
502 case 7:
503 hila::out0 << "AVX";
504 break;
505 case 8:
506 hila::out0 << "AVX2";
507 break;
508 case 9:
509 hila::out0 << "AVX512F";
510 break;
511 case 10:
512 hila::out0 << "AVX512BW/DQ/VL";
513 break;
514 default:
515 hila::out0 << "Unknown";
516 break;
517 }
518 hila::out0 << '\n';
519 if (INSTRSET < 8)
520 hila::out0 << " (You probably should use options '-mavx2 -fmad' in compilation)\n";
521}
522
523
524#endif
525
526void hila::print_dashed_line(const std::string &text) {
527 static constexpr int linelength = 60;
528
529 if (hila::myrank() == 0) {
530
531 if (text.size() == 0) {
532 for (int i = 0; i < linelength; i++)
533 hila::out << '-';
534
535 } else {
536
537 hila::out << "----- " << text << ' ';
538 for (int i = 7 + text.size(); i < linelength; i++)
539 hila::out << '-';
540 }
541 hila::out << '\n';
542 }
543}
lattice_struct * ptr() const
get non-const pointer to lattice_struct (cf. operator ->)
Definition lattice.h:474
Define the logger class here.
Definition logger.h:8
logger_class log
Now declare the logger.
void setup_timelimit(const double secs)
Setup time limit with seconds.
Definition timing.cpp:197
int myrank()
rank of this node
Definition com_mpi.cpp:237
int number_of_nodes()
how many nodes there are
Definition com_mpi.cpp:248
void synchronize()
synchronize mpi + gpu
Definition com_mpi.cpp:257
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
std::ofstream output_file
this is just a hook to store output file, if it is in use
Definition initialize.cpp:8
void initialize(int argc, char **argv)
Initial setup routines.
void split_into_partitions(int rank)
Definition com_mpi.cpp:290
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
Definition com_mpi.h:170
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
void terminate(int status)
#define GPU_VECTOR_REDUCTION_THREAD_BLOCKS
Definition params.h:143
#define DEFAULT_OUTPUT_NAME
Default output file name.
Definition params.h:40
#define N_threads
General number of threads in a thread block.
Definition params.h:212