9bool hila::about_to_finish =
false;
10bool hila::is_initialized =
false;
11bool hila::check_input =
false;
12int hila::check_with_nodes;
15void setup_partitions();
17void vector_type_info();
22int get_onoff(std::string flag) {
24 if (hila::cmdline.flag_set(flag.c_str())) {
25 std::string opt = hila::cmdline.get_string(flag.c_str());
26 if (opt.compare(
"on") == 0)
28 else if (opt.compare(
"off") == 0)
31 hila::out0 <<
"Command line argument " << flag <<
" requires value on/off\n";
43#if (defined(__GNUC__) && !defined(DARWIN))
62#if (defined(__GNUC__) && !defined(DARWIN) && !defined(_MAC_OSX_))
68 mallopt(M_MMAP_MAX, 0);
70 mallopt(M_TRIM_THRESHOLD, -1);
73 feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
78 initialize_communications(argc, &argv);
81 setup_signal_handler();
84 hila::is_initialized =
true;
99 hila::cmdline.add_flag(
"-t",
100 "cpu time limit, in one of the formats:\n"
101 "s, m:s, h:m:s, d-h:m:s, or 'slurm' "
102 "where s=seconds, m=minutes, h=hours, d=days.\n"
103 "Values need not be restricted into natural ranges.\n"
104 "Format is compatible with the output of\n"
105 "' squeue -h --job ${SLURM_JOB_ID} --format=\"%L\" '\n"
106 "Option '-t slurm' makes program to use slurm to get time limit",
108 hila::cmdline.add_flag(
"-o",
"output file (default: stdout)",
"<filename>", 1);
109 hila::cmdline.add_flag(
"-i",
110 "input file (overrides the 1st hila::input() name)\n"
111 "use '-i -' for standard input",
113 hila::cmdline.add_flag(
"-device",
114 "in GPU runs using only 1 GPU, choose this GPU number (default 0)",
116 hila::cmdline.add_flag(
"-check",
117 "check input & layout with <num> nodes & exit.\nDoes not initialize MPI "
118 "or GPUs (do not use mpirun)\n",
120 hila::cmdline.add_flag(
"-partitions",
121 "number of partitioned lattice streams.\nBy default, creates "
122 "directories 'partitionN' for each stream if these don't exist.\nEnter "
123 "as '-partitions <num> <dirname>' to use '<dirname>N'.\n",
126 hila::cmdline.add_flag(
"-p",
127 "parameter overriding the input file field <key>.\n"
128 "If fields contain spaces enclose in quotes.\n"
129 "Can be repeated many times, each overrides only one input entry.",
135 hila::cmdline.initialise_args(argc, argv);
140 if (lattice.nodes.number == 1) {
142 if (hila::cmdline.flag_present(
"-check")) {
145 if (hila::cmdline.flag_set(
"-check") > 0)
146 nodes = hila::cmdline.get_int(
"-check");
148 hila::check_input =
true;
151 hila::check_with_nodes = nodes;
152 hila::out <<
"****** INPUT AND LAYOUT CHECK ******" << std::endl;
155 lattice.mynode.rank = 0;
156 lattice.nodes.number = hila::check_with_nodes;
160#if defined(CUDA) || defined(HIP)
161 if (!hila::check_input) {
163 if (hila::cmdline.flag_set(
"-device"))
164 device = hila::cmdline.get_int(
"-device");
167 hila::out0 <<
"Chose device " << device <<
"\n";
169 initialize_gpu(lattice.mynode.rank, device);
177 if (hila::partitions.number() > 1) {
179 << hila::partitions.number() <<
" partitions, this "
180 << hila::partitions.mylattice() <<
" ----\n";
185 hila::print_dashed_line(
"HILA lattice framework");
186 hila::out0 <<
"Running program " << argv[0] <<
"\n";
187 hila::out0 <<
"with command line arguments '";
188 for (
int i = 1; i < argc; i++)
192#if defined(GIT_SHA_VALUE)
193#define xstr(s) makestr(s)
195 hila::out0 <<
"git SHA " << xstr(GIT_SHA_VALUE) <<
'\n';
197 hila::out0 <<
"no git information available\n";
199 hila::out0 <<
"Compiled " << __DATE__ <<
" at " << __TIME__ <<
'\n';
201 hila::out0 <<
"with options: EVEN_SITES_FIRST";
202#ifndef EVEN_SITES_FIRST
205#ifdef SPECIAL_BOUNDARY_CONDITIONS
210 hila::timestamp(
"Starting");
214 if (hila::cmdline.flag_present(
"-t")) {
221 if (hila::cmdline.flag_present(
"-i")) {
223 hila::out0 <<
"Input file from command line: " << hila::cmdline.get_string(
"-i") <<
"\n";
227 hila::out0 <<
"Using option OPENMP - with " << omp_get_max_threads() <<
" threads\n";
231#if defined(CUDA) || defined(HIP)
234#if defined(GPU_AWARE_MPI)
240#if !defined(GPU_VECTOR_REDUCTION_THREAD_BLOCKS) || GPU_VECTOR_REDUCTION_THREAD_BLOCKS <= 0
241 hila::out0 <<
"ReductionVector with atomic operations (GPU_VECTOR_REDUCTION_THREAD_BLOCKS=0)\n";
244 <<
" thread blocks\n";
247 if (!hila::check_input)
257#if defined(CUDA) && !defined(PIZDAINT)
261#if (defined(__GNUC__) && !defined(DARWIN))
262 hila::out0 <<
"GNU c-library performance: not returning allocated memory\n";
272 hila::timestamp(
"Terminate");
273 hila::print_dashed_line();
274 hila::about_to_finish =
true;
275 if (hila::is_comm_initialized()) {
276 hila::finish_communications();
285void hila::error(
const char *msg) {
290void hila::error(
const std::string &msg) {
291 hila::error(msg.c_str());
304 int64_t gathers = latp->n_gather_done;
305 int64_t avoided = latp->n_gather_avoided;
307 if (gathers + avoided > 0) {
308 hila::out0 <<
" COMMS from node 0: " << gathers <<
" done, " << avoided <<
"("
309 << 100.0 * avoided / (avoided + gathers) <<
"%) optimized away\n";
311 hila::out0 <<
" No communications done from node 0\n";
316#if defined(CUDA) || defined(HIP)
320 if (hila::partitions.number() > 1) {
321 hila::timestamp(
"Waiting to sync partitions");
322 hila::synchronize_partitions();
329 hila::timestamp(
"Finishing");
331 hila::about_to_finish =
true;
333 hila::finish_communications();
335 hila::print_dashed_line();
346 bool do_exit =
false;
349 if (hila::cmdline.flag_present(
"-o")) {
352 if (hila::cmdline.flag_set(
"-o"))
353 name = hila::cmdline.get_string(
"-o");
355 hila::out0 <<
"The name of the output file must be provided after flag '-o'!\n";
359 if (!hila::check_input) {
362 hila::out <<
"Cannot open output file " << name <<
'\n';
365 hila::out0 <<
"Output is now directed to the file '" << name <<
"'.\n";
392void setup_partitions() {
394 std::string partition_dir(
"partition");
397 if (hila::cmdline.flag_present(
"-partitions")) {
399 long lnum = hila::cmdline.get_int(
"-partitions");
401 hila::out0 <<
"partitions <number> command line argument value must be positive "
405 hila::partitions.set_number(lnum);
407 if (hila::cmdline.flag_set(
"-partitions") > 1) {
408 partition_dir = hila::cmdline.get_string(
"-partitions", 1);
411 hila::partitions.set_number(1);
413 if (hila::partitions.number() == 1)
416 hila::out0 <<
" Dividing nodes into " << hila::partitions.number() <<
" partitions\n";
420 << hila::partitions.number() <<
" partitions\n";
424 hila::out0 <<
"REST OF OUTPUT TO DIRECTORIES " << partition_dir <<
"N\n";
426#if defined(BLUEGENE_LAYOUT)
427 hila::partitions.set_mylattice(bg_layout_partitions(hila::partitions.number()));
429 hila::partitions.set_mylattice((
hila::myrank() * hila::partitions.number()) /
432 if (!hila::check_input)
436 std::string dirname = partition_dir + std::to_string(hila::partitions.mylattice());
438 filesys_ns::create_directory(dirname);
442 filesys_ns::current_path(dirname);
450 if (!hila::cmdline.flag_present(
"-o")) {
453 if (!hila::check_input) {
467 if (!hila::check_input) {
480void vector_type_info() {
482 hila::out0 <<
"Using VCL vector class with instruction set level INSTRSET=" << INSTRSET
519 hila::out0 <<
" (You probably should use options '-mavx2 -fmad' in compilation)\n";
525void hila::print_dashed_line(
const std::string &text) {
526 static constexpr int linelength = 60;
530 if (text.size() == 0) {
531 for (
int i = 0; i < linelength; i++)
537 for (
int i = 7 + text.size(); i < linelength; i++)
Define the logger class here.
logger_class log
Now declare the logger.
void setup_timelimit(const double secs)
Setup time limit with seconds.
int myrank()
rank of this node
int number_of_nodes()
how many nodes there are
void synchronize()
synchronize mpi
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
std::ofstream output_file
this is just a hook to store output file, if it is in use
void initialize(int argc, char **argv)
Initial setup routines.
void split_into_partitions(int rank)
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
void terminate(int status)
#define GPU_VECTOR_REDUCTION_THREAD_BLOCKS
#define DEFAULT_OUTPUT_NAME
Default output file name.
#define N_threads
General number of threads in a thread block.