9bool hila::about_to_finish =
false;
10bool hila::is_initialized =
false;
11bool hila::check_input =
false;
12int hila::check_with_nodes;
15void setup_partitions();
17void vector_type_info();
22int get_onoff(std::string flag) {
24 if (hila::cmdline.flag_set(flag.c_str())) {
25 std::string opt = hila::cmdline.get_string(flag.c_str());
26 if (opt.compare(
"on") == 0)
28 else if (opt.compare(
"off") == 0)
31 hila::out0 <<
"Command line argument " << flag <<
" requires value on/off\n";
43#if (defined(__GNUC__) && !defined(DARWIN))
62#if (defined(__GNUC__) && !defined(DARWIN) && !defined(_MAC_OSX_))
68 mallopt(M_MMAP_MAX, 0);
70 mallopt(M_TRIM_THRESHOLD, -1);
73 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
79 lattice.set_lattice_pointer(base_lat);
82 initialize_communications(argc, &argv);
85 setup_signal_handler();
88 hila::is_initialized =
true;
103 hila::cmdline.add_flag(
"-t",
104 "cpu time limit, in one of the formats:\n"
105 "s, m:s, h:m:s, d-h:m:s, or 'slurm' "
106 "where s=seconds, m=minutes, h=hours, d=days.\n"
107 "Values need not be restricted into natural ranges.\n"
108 "Format is compatible with the output of\n"
109 "' squeue -h --job ${SLURM_JOB_ID} --format=\"%L\" '\n"
110 "Option '-t slurm' makes program to use slurm to get time limit",
112 hila::cmdline.add_flag(
"-o",
"output file (default: stdout)",
"<filename>", 1);
113 hila::cmdline.add_flag(
"-i",
114 "input file (overrides the 1st hila::input() name)\n"
115 "use '-i -' for standard input",
117 hila::cmdline.add_flag(
"-device",
118 "in GPU runs using only 1 GPU, choose this GPU number (default 0)",
120 hila::cmdline.add_flag(
"-check",
121 "check input & layout with <num> nodes & exit.\nDoes not initialize MPI "
122 "or GPUs (do not use mpirun)\n",
124 hila::cmdline.add_flag(
"-partitions",
125 "number of partitioned lattice streams.\nBy default, creates "
126 "directories 'partitionN' for each stream if these don't exist.\nEnter "
127 "as '-partitions <num> <dirname>' to use '<dirname>N'.\n",
130 hila::cmdline.add_flag(
"-p",
131 "parameter overriding the input file field <key>.\n"
132 "If fields contain spaces enclose in quotes.\n"
133 "Can be repeated many times, each overrides only one input entry.",
139 hila::cmdline.initialise_args(argc, argv);
144 if (lattice->nodes.number == 1) {
146 if (hila::cmdline.flag_present(
"-check")) {
149 if (hila::cmdline.flag_set(
"-check") > 0)
150 nodes = hila::cmdline.get_int(
"-check");
152 hila::check_input =
true;
155 hila::check_with_nodes = nodes;
156 hila::out <<
"****** INPUT AND LAYOUT CHECK ******" << std::endl;
159 lattice.
ptr()->mynode.rank = 0;
160 lattice.
ptr()->nodes.number = hila::check_with_nodes;
164#if defined(CUDA) || defined(HIP)
165 if (!hila::check_input) {
167 if (hila::cmdline.flag_set(
"-device"))
168 device = hila::cmdline.get_int(
"-device");
171 hila::out0 <<
"Chose device " << device <<
"\n";
173 initialize_gpu(lattice->mynode.rank, device);
181 if (hila::partitions.number() > 1) {
183 << hila::partitions.number() <<
" partitions, this "
184 << hila::partitions.mylattice() <<
" ----\n";
189 hila::print_dashed_line(
"HILA lattice framework");
190 hila::out0 <<
"Running program " << argv[0] <<
"\n";
191 hila::out0 <<
"with command line arguments '";
192 for (
int i = 1; i < argc; i++)
196#if defined(GIT_SHA_VALUE)
197#define xstr(s) makestr(s)
199 hila::out0 <<
"git SHA " << xstr(GIT_SHA_VALUE) <<
'\n';
201 hila::out0 <<
"no git information available\n";
203 hila::out0 <<
"Compiled " << __DATE__ <<
" at " << __TIME__ <<
'\n';
205 hila::out0 <<
"with options: EVEN_SITES_FIRST";
206#ifndef EVEN_SITES_FIRST
209#ifdef SPECIAL_BOUNDARY_CONDITIONS
214 hila::timestamp(
"Starting");
218 if (hila::cmdline.flag_present(
"-t")) {
225 if (hila::cmdline.flag_present(
"-i")) {
227 hila::out0 <<
"Input file from command line: " << hila::cmdline.get_string(
"-i") <<
"\n";
231 hila::out0 <<
"Using option OPENMP - with " << omp_get_max_threads() <<
" threads\n";
235#if defined(CUDA) || defined(HIP)
238#if defined(GPU_AWARE_MPI)
244#if !defined(GPU_VECTOR_REDUCTION_THREAD_BLOCKS) || GPU_VECTOR_REDUCTION_THREAD_BLOCKS <= 0
245 hila::out0 <<
"ReductionVector with atomic operations (GPU_VECTOR_REDUCTION_THREAD_BLOCKS=0)\n";
248 <<
" thread blocks\n";
251 if (!hila::check_input)
261#if defined(CUDA) && !defined(PIZDAINT)
265#if (defined(__GNUC__) && !defined(DARWIN))
266 hila::out0 <<
"GNU c-library performance: not returning allocated memory\n";
276 hila::timestamp(
"Terminate");
277 hila::print_dashed_line();
278 hila::about_to_finish =
true;
279 if (hila::is_comm_initialized()) {
280 hila::finish_communications();
289void hila::error(
const char *msg) {
294void hila::error(
const std::string &msg) {
295 hila::error(msg.c_str());
306 int64_t gathers = hila::n_gather_done;
307 int64_t avoided = hila::n_gather_avoided;
309 if (gathers + avoided > 0) {
310 hila::out0 <<
" COMMS from node 0: " << gathers <<
" done, " << avoided <<
"("
311 << 100.0 * avoided / (avoided + gathers) <<
"%) optimized away\n";
313 hila::out0 <<
" No communications done from node 0\n";
317#if defined(CUDA) || defined(HIP)
321 if (hila::partitions.number() > 1) {
322 hila::timestamp(
"Waiting to sync partitions");
323 hila::synchronize_partitions();
330 hila::timestamp(
"Finishing");
332 hila::about_to_finish =
true;
334 hila::finish_communications();
336 hila::print_dashed_line();
347 bool do_exit =
false;
350 if (hila::cmdline.flag_present(
"-o")) {
353 if (hila::cmdline.flag_set(
"-o"))
354 name = hila::cmdline.get_string(
"-o");
356 hila::out0 <<
"The name of the output file must be provided after flag '-o'!\n";
360 if (!hila::check_input) {
363 hila::out <<
"Cannot open output file " << name <<
'\n';
366 hila::out0 <<
"Output is now directed to the file '" << name <<
"'.\n";
393void setup_partitions() {
395 std::string partition_dir(
"partition");
398 if (hila::cmdline.flag_present(
"-partitions")) {
400 long lnum = hila::cmdline.get_int(
"-partitions");
402 hila::out0 <<
"partitions <number> command line argument value must be positive "
406 hila::partitions.set_number(lnum);
408 if (hila::cmdline.flag_set(
"-partitions") > 1) {
409 partition_dir = hila::cmdline.get_string(
"-partitions", 1);
412 hila::partitions.set_number(1);
414 if (hila::partitions.number() == 1)
417 hila::out0 <<
" Dividing nodes into " << hila::partitions.number() <<
" partitions\n";
421 << hila::partitions.number() <<
" partitions\n";
425 hila::out0 <<
"REST OF OUTPUT TO DIRECTORIES " << partition_dir <<
"N\n";
427#if defined(BLUEGENE_LAYOUT)
428 hila::partitions.set_mylattice(bg_layout_partitions(hila::partitions.number()));
430 hila::partitions.set_mylattice((
hila::myrank() * hila::partitions.number()) /
433 if (!hila::check_input)
437 std::string dirname = partition_dir + std::to_string(hila::partitions.mylattice());
439 filesys_ns::create_directory(dirname);
443 filesys_ns::current_path(dirname);
451 if (!hila::cmdline.flag_present(
"-o")) {
454 if (!hila::check_input) {
468 if (!hila::check_input) {
481void vector_type_info() {
483 hila::out0 <<
"Using VCL vector class with instruction set level INSTRSET=" << INSTRSET
520 hila::out0 <<
" (You probably should use options '-mavx2 -fmad' in compilation)\n";
526void hila::print_dashed_line(
const std::string &text) {
527 static constexpr int linelength = 60;
531 if (text.size() == 0) {
532 for (
int i = 0; i < linelength; i++)
538 for (
int i = 7 + text.size(); i < linelength; i++)
lattice_struct * ptr() const
get non-const pointer to lattice_struct (cf. operator ->)
Define the logger class here.
logger_class log
Now declare the logger.
void setup_timelimit(const double secs)
Setup time limit with seconds.
int myrank()
rank of this node
int number_of_nodes()
how many nodes there are
void synchronize()
synchronize mpi + gpu
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
std::ofstream output_file
this is just a hook to store output file, if it is in use
void initialize(int argc, char **argv)
Initial setup routines.
void split_into_partitions(int rank)
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
void terminate(int status)
#define GPU_VECTOR_REDUCTION_THREAD_BLOCKS
#define DEFAULT_OUTPUT_NAME
Default output file name.
#define N_threads
General number of threads in a thread block.