HILA
Loading...
Searching...
No Matches
initialize.cpp
1
2#include <cstring>
3#include "hila.h"
4
5// define these global var here - somehow NULL needed for ostream
6std::ostream hila::out(NULL);
7std::ostream hila::out0(NULL);
8std::ofstream hila::output_file;
9bool hila::about_to_finish = false;
10bool hila::is_initialized = false;
11bool hila::check_input = false;
12int hila::check_with_nodes;
13const char *hila::input_file;
15
16
17void vector_type_info();
18
19#include <limits.h>
20#include <errno.h>
21
22int get_onoff(std::string flag)
23{
24 // Check if flag has been set
25 if (hila::cmdline.flag_set(flag.c_str()))
26 {
27 std::string opt = hila::cmdline.get_string(flag.c_str());
28 if (opt.compare("on") == 0)
29 return 1;
30 else if (opt.compare("off") == 0)
31 return -1;
32 else
33 {
34 hila::out0 << "Command line argument " << flag << " requires value on/off\n";
36 return 0;
37 }
38 }
39 else
40 return 0;
41}
42
43/////////////////////////////////////////////////////////////////////////////////
44/// Initial setup routines
45/////////////////////////////////////////////////////////////////////////////////
46
47#if (defined(__GNUC__) && !defined(DARWIN)) // || defined(__bg__)
48#include <malloc.h>
49#endif
50
51// #define DEBUG_NAN
52
53#ifdef DEBUG_NAN
54#include <fenv.h>
55#endif
56
57void setup_partitions();
58
59/**
60 * @brief Read in command line arguments. Initialise default stream and MPI communication
61 *
62 * @param argc Number of command line arguments
63 * @param argv List of command line arguments
64 */
65
66void hila::initialize(int argc, char **argv) {
67
68#if (defined(__GNUC__) && !defined(DARWIN) && !defined(_MAC_OSX_)) // || defined(__bg__)
69 /* First, adjust malloc so that glibc free() does not
70 * release space to the system, increasing the performance
71 * of the glib malloc substantially. The memory use is cyclic,
72 * so we can just sit on the max memory.
73 */
74 mallopt( M_MMAP_MAX, 0 ); /* don't use mmap */
75 /* HACK: don't release memory by calling sbrk */
76 mallopt(M_TRIM_THRESHOLD, -1);
77
78#ifdef DEBUG_NAN
79 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
80#endif
81#endif
82
83 // initialize MPI so that hila::myrank() etc. works
84 initialize_communications(argc, &argv);
85
86 // set the initialized flag
87 hila::is_initialized = true;
88
89 // Default output file - we're happy with this unless partitions
90 // or otherwise indicated
91 // This channels outf to std::cout
92 hila::out.rdbuf(std::cout.rdbuf());
93
94 // set the timing so that gettime() returns time from this point
95 hila::inittime();
96
97 // open hila::out0 only for node 0
98 if (hila::myrank() == 0)
99 hila::out0.rdbuf(std::cout.rdbuf());
100
101 // Set the inbuilt command-line flags and their corresponding help texts
102 hila::cmdline.add_flag("-t","cpu time limit");
103 hila::cmdline.add_flag("-o","output filename (default: stdout)");
104 hila::cmdline.add_flag("-i","input filename (overrides the 1st hila::input() name)\nuse '-i -' for standard input");
105 hila::cmdline.add_flag("-device","in GPU runs using only 1 GPU, choose this GPU number (default 0)");
106 hila::cmdline.add_flag("-check","check input & layout with <nodes>-nodes & exit\nonly with 1 real MPI node (don't use mpirun)");
107 hila::cmdline.add_flag("-n","number of nodes used in layout check, only relevant with -check");
108 hila::cmdline.add_flag("-partitions","number of partitioned lattice streams");
109 hila::cmdline.add_flag("-sync","synchronize partition runs (on/off) (default = off)");
110
111 // Init command line - after MPI has been started, so
112 // that all nodes do this. First feed argc and argv to the
113 // global cmdline class instance and parse for the preset flags.
114 hila::cmdline.initialise_args(argc, argv);
115 // The values can now be requested from hila::cmdline.
116
117 // check the "-check" -input early
118 // do it only with 1 node
119 if (lattice.nodes.number == 1) {
120 // Check whether '-check' was found and only then search for '-n'
121 if (hila::cmdline.flag_present("-check")) {
122 long nodes;
123 if (hila::cmdline.flag_present("-n")) {
124 nodes = hila::cmdline.get_int("-n");
125 }
126 else
127 nodes = 1;
128
129 hila::check_input = true;
130 if (nodes <= 0)
131 nodes = 1;
132 hila::check_with_nodes = nodes;
133 hila::out << "****** INPUT AND LAYOUT CHECK ******" << std::endl;
134
135 // reset node variables
136 lattice.mynode.rank = 0;
137 lattice.nodes.number = hila::check_with_nodes;
138 }
139 }
140
141#if defined(CUDA) || defined(HIP)
142 if (!hila::check_input) {
143 long device;
144 if (hila::cmdline.flag_set("-device"))
145 device = hila::cmdline.get_int("-device");
146 else
147 device = 0;
148 hila::out0 << "Chose device " << device << "\n";
149
150 initialize_gpu(lattice.mynode.rank, device);
151 }
152#endif
153
154 setup_partitions();
155
156 // check the output file if partitions not used
157 if (hila::partitions.number() == 1) {
158 int do_exit = 0;
159 if (hila::myrank() == 0) {
160 if (hila::cmdline.flag_present("-o")) {
161 // Quits if '-o' was left without an argument
162 std::string name;
163 if (hila::cmdline.flag_set("-o"))
164 name = hila::cmdline.get_string("-o");
165 else
166 {
167 hila::out0 << "The name of the output file must be provided after flag '-o'!\n";
168 do_exit = 1;
169 }
170 // If found, open the file for the output
171 if (!hila::check_input) {
172 hila::output_file.open(name, std::ios::out | std::ios::app);
173 if (hila::output_file.fail()) {
174 hila::out << "Cannot open output file " << name << '\n';
175 do_exit = 1;
176 } else {
177 hila::out0 << "Output is now directed to the file '"
178 << name << "'.\n";
179 hila::out.flush();
180 hila::out.rdbuf(
181 hila::output_file.rdbuf()); // output now points to output_redirect
182
183 if (hila::myrank() == 0)
184 hila::out0.rdbuf(hila::out.rdbuf());
185 }
186 }
187 }
188 }
189 hila::broadcast(do_exit);
190 if (do_exit)
192 }
193
194 if (hila::myrank() == 0) {
195 print_dashed_line("HILA lattice framework");
196 hila::out0 << "Running program " << argv[0] << "\n";
197 hila::out0 << "with command line arguments '";
198 for (int i = 1; i < argc; i++)
199 hila::out0 << argv[i] << ' ';
200 hila::out0 << "'\n";
201 hila::out0 << "Code version: ";
202#if defined(GIT_SHA_VALUE)
203#define xstr(s) makestr(s)
204#define makestr(s) #s
205 hila::out0 << "git SHA " << xstr(GIT_SHA_VALUE) << '\n';
206#else
207 hila::out0 << "no git information available\n";
208#endif
209 hila::out0 << "Compiled " << __DATE__ << " at " << __TIME__ << '\n';
210
211 hila::out0 << "with options: EVEN_SITES_FIRST";
212#ifndef EVEN_SITES_FIRST
213 hila::out0 << "=0";
214#endif
215#ifdef SPECIAL_BOUNDARY_CONDITIONS
216 hila::out0 << " SPECIAL_BOUNDARY_CONDITIONS";
217#endif
218 hila::out0 << '\n';
219
220 hila::timestamp("Starting");
221 }
222
223 // Check if flag set and parse
224 if (hila::cmdline.flag_present("-t")) {
225 // Following quits if '-t' is given without an integer argument
226 long cputime = hila::cmdline.get_int("-t");
227 if (cputime > 0) {
228 hila::out0 << "CPU time limit " << cputime << " seconds\n";
229 hila::setup_timelimit(cputime);
230 }
231 }
232 else {
233 hila::out0 << "No runtime limit given\n";
234 }
235
236
237 hila::input_file = nullptr;
238 if (hila::cmdline.flag_present("-i"))
239 {
240 // Quits if '-i' given without a string argument
241 // Copy to a static variable to preserve the memory address
242 static const std::string input_string = hila::cmdline.get_string("-i");
243 hila::input_file = input_string.c_str();
244 hila::out0 << "Input file from command line: " << hila::input_file << "\n";
245 }
246
247
248#if defined(OPENMP)
249 hila::out0 << "Using option OPENMP - with " << omp_get_max_threads() << " threads\n";
250#endif
251
252
253#if defined(CUDA) || defined(HIP)
254 hila::out0 << "Using thread blocks of size " << N_threads << " threads\n";
255
256#if defined(GPU_AWARE_MPI)
257 hila::out0 << "Using GPU_AWARE_MPI\n";
258#else
259 hila::out0 << "Not using GPU_AWARE_MPI\n";
260#endif
261
262#if !defined(GPU_VECTOR_REDUCTION_THREAD_BLOCKS) || GPU_VECTOR_REDUCTION_THREAD_BLOCKS <= 0
263 hila::out0 << "ReductionVector with atomic operations (GPU_VECTOR_REDUCTION_THREAD_BLOCKS=0)\n";
264#else
265 hila::out0 << "ReductionVector with " << GPU_VECTOR_REDUCTION_THREAD_BLOCKS
266 << " thread blocks\n";
267#endif
268
269 if (!hila::check_input)
270 gpu_device_info();
271#endif
272
273
274#ifdef AVX
275 vector_type_info();
276#endif
277
278 /* basic static node variables */
279#if defined(CUDA) && !defined(PIZDAINT)
280 // localhost_info(&g_local_nodeid, &g_num_local_nodes);
281#endif
282
283#if (defined(__GNUC__) && !defined(DARWIN)) // || defined(__bg__)
284 hila::out0 << "GNU c-library performance: not returning allocated memory\n";
285#endif
286}
287
288
289///////////////////////////////////////////////////////////////
290/// Force quit for multinode processes -- kill all nodes
291/// No synchronisation done
292///////////////////////////////////////////////////////////////
293void hila::terminate(int status) {
294 hila::timestamp("Terminate");
295 print_dashed_line();
296 hila::about_to_finish = true; // avoid destructors
297 if (is_comm_initialized()) {
298 finish_communications();
299 }
300 exit(1);
301}
302
303////////////////////////////////////////////////////////////////
304/// Print message and force quit
305////////////////////////////////////////////////////////////////
306
307void hila::error(const char *msg) {
308 hila::out0 << "Error: " << msg << '\n';
310}
311
312void hila::error(const std::string &msg) {
313 hila::error(msg.c_str());
314}
315
316/**
317 * @brief Normal, controlled exit - all nodes must call this.
318 * Prints timing information and information about communications
319 */
321 report_timers();
322
323 for (const lattice_struct *latp : lattices) {
324
325
326 int64_t gathers = latp->n_gather_done;
327 int64_t avoided = latp->n_gather_avoided;
328
329 if (gathers + avoided > 0) {
330 hila::out0 << " COMMS from node 0: " << gathers << " done, " << avoided << "("
331 << 100.0 * avoided / (avoided + gathers) << "%) optimized away\n";
332 } else {
333 hila::out0 << " No communications done from node 0\n";
334 }
335 }
336
337
338#if defined(CUDA) || defined(HIP)
339 gpuMemPoolReport();
340#endif
341
342 if (hila::partitions.number() > 1) {
343 hila::timestamp("Waiting to sync partitions");
344 }
345
346 // hip seems to want this?
347 FFT_delete_plans();
348
350 hila::timestamp("Finishing");
351
352 hila::about_to_finish = true;
353
354 finish_communications();
355
356 print_dashed_line();
357 exit(0);
358}
359
360/******************************************************
361 * Open parameter file - moved here in order to
362 * enable partition division if requested
363 */
364#if 0
365
366FILE *open_parameter_file()
367{
368 static char parameter[] = "parameter";
369 FILE *fil = NULL;
370
371 if (mynode == 0) {
372#ifdef SUBLATTICES
373 if (n_partitions > 1) {
374 char parameter_name[50];
375 /* First, try opening parameter99 etc. */
376 sprintf(parameter_name,"%s%d",parameter,this_partition);
377 fil = fopen(parameter_name,"r");
378
379 if (fil != NULL) {
380 fprintf(outf," READING PARAMETERS FROM %s\n",parameter_name);
381 }
382 }
383#endif
384 if (fil == NULL) {
385 fil = fopen(parameter,"r");
386 if (fil == NULL) {
387 halt(" ** No parameter file?");
388 }
389 }
390 } // mynode == 0
391 return( fil );
392}
393
394#endif
395
396/******************************************************
397 * Sublattice division
398 * Handle command line arguments
399 * partitions=nn
400 * sync=yes / sync=no
401 * out=name
402 * here
403 */
404
405void setup_partitions() {
406
407 // get partitions cmdlinearg first
408 if (hila::cmdline.flag_present("-partitions")) {
409 // Following quits if '-partitions' is given without an integer argument
410 long lnum = hila::cmdline.get_int("-partitions");
411 if (lnum <= 0) {
412 hila::out0 << "partitions=<number> command line argument value must be positive "
413 "integer (or argument omitted)\n";
415 }
416 else
417 hila::partitions._number = lnum;
418 }
419 else
420 hila::partitions._number = 1;
421
422 if (hila::partitions.number() == 1)
423 return;
424
425 hila::out0 << " Dividing nodes into " << hila::partitions.number() << " partitions\n";
426
427 if (hila::number_of_nodes() % hila::partitions.number()) {
428 hila::out0 << "** " << hila::number_of_nodes() << " nodes not evenly divisible into "
429 << hila::partitions.number() << " partitions\n";
431 }
432
433#if defined(BLUEGENE_LAYOUT)
434 hila::partitions._mylattice = bg_layout_partitions(hila::partitions.number());
435#else // generic
436 hila::partitions._mylattice =
437 (hila::myrank() * hila::partitions.number()) / hila::number_of_nodes();
438 /* and divide system into partitions */
439 if (!hila::check_input)
440 split_into_partitions(hila::partitions.mylattice());
441#endif
442 std::string fname;
443 if (hila::cmdline.flag_present("-o"))
444 {
445 std::string opt = hila::cmdline.get_string("-o");
446 fname = opt + std::to_string(hila::partitions.mylattice());
447 }
448 else
449 fname = DEFAULT_OUTPUT_NAME + std::to_string(hila::partitions.mylattice());
450
451 // now need to open output file
452
453 hila::out.flush(); // this should be cout at this stage
454
455 // all nodes open the file -- perhaps not? Leave only node 0
456 int do_exit = 0;
457 if (hila::myrank() == 0 && !hila::check_input) {
458 hila::output_file.open(fname, std::ios::out | std::ios::app);
459 if (hila::output_file.fail()) {
460 std::cout << "Cannot open output file " << fname << '\n';
461 do_exit = 1;
462 }
463 }
464
465 hila::broadcast(do_exit);
466
467 if (do_exit)
469
470 hila::out.flush();
471 if (!hila::check_input) {
472 hila::out.rdbuf(hila::output_file.rdbuf());
473 // output now points to output_redirect
474 if (hila::myrank() == 0) {
475 hila::out0.rdbuf(hila::out.rdbuf());
476 }
477 }
478 hila::out0 << " ---- SPLIT " << hila::number_of_nodes() << " nodes into "
479 << hila::partitions.number() << " partitions, this " << hila::partitions.mylattice()
480 << " ----\n";
481
482
483 /* Default sync is no */
484 if (hila::cmdline.flag_present("-sync"))
485 {
486 std::string onoffopt = hila::cmdline.get_string("-sync");
487 if (get_onoff(onoffopt) == 1)
488 {
489 hila::partitions._sync = true;
490 hila::out0 << "Synchronising partition trajectories\n";
491 }
492 }
493 else
494 {
495 hila::partitions._sync = false;
496 hila::out0 << "Not synchronising the partition trajectories\n"
497 << "Use '-sync on' command line argument to override\n";
498 }
499}
500
501/////////////////////////////////////////////////////////////////////////////
502
503#ifdef AVX
504void vector_type_info() {
505
506 hila::out0 << "Using VCL vector class with instruction set level INSTRSET=" << INSTRSET
507 << " <=> ";
508
509 switch (INSTRSET) {
510 case 2:
511 hila::out0 << "SSE2";
512 break;
513 case 3:
514 hila::out0 << "SSE3";
515 break;
516 case 4:
517 hila::out0 << "SSSE3";
518 break;
519 case 5:
520 hila::out0 << "SSE4.1";
521 break;
522 case 6:
523 hila::out0 << "SSE4.2";
524 break;
525 case 7:
526 hila::out0 << "AVX";
527 break;
528 case 8:
529 hila::out0 << "AVX2";
530 break;
531 case 9:
532 hila::out0 << "AVX512F";
533 break;
534 case 10:
535 hila::out0 << "AVX512BW/DQ/VL";
536 break;
537 default:
538 hila::out0 << "Unknown";
539 break;
540 }
541 hila::out0 << '\n';
542 if (INSTRSET < 8)
543 hila::out0 << " (You probably should use options '-mavx2 -fmad' in compilation)\n";
544}
545
546
547#endif
548
549void print_dashed_line(const std::string &text) {
550 static constexpr int linelength = 60;
551
552 if (hila::myrank() == 0) {
553
554 if (text.size() == 0) {
555 for (int i = 0; i < linelength; i++)
556 hila::out << '-';
557
558 } else {
559
560 hila::out << "----- " << text << ' ';
561 for (int i = 7 + text.size(); i < linelength; i++)
562 hila::out << '-';
563 }
564 hila::out << '\n';
565 }
566}
Define the logger class here.
Definition logger.h:8
logger_class log
Now declare the logger.
int myrank()
rank of this node
Definition com_mpi.cpp:234
int number_of_nodes()
how many nodes there are
Definition com_mpi.cpp:245
void synchronize()
synchronize mpi
Definition com_mpi.cpp:254
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
std::ofstream output_file
this is just a hook to store output file, if it is in use
Definition initialize.cpp:8
void initialize(int argc, char **argv)
Read in command line arguments. Initialise default stream and MPI communication.
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
Definition com_mpi.h:152
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
void terminate(int status)
#define GPU_VECTOR_REDUCTION_THREAD_BLOCKS
Definition params.h:142
#define DEFAULT_OUTPUT_NAME
Default output file name.
Definition params.h:40
#define N_threads
General number of threads in a thread block.
Definition params.h:189