HILA
|
Implement hila::swap for gauge fields. More...
Namespaces | |
namespace | linalg |
Inversed diagnal + const. matrix using Sherman-Morrison formula. | |
Classes | |
struct | base_type_struct |
struct | contains_type |
class | global |
Global variable class within hila namespace. More... | |
class | has_assign_zero |
hila::has_assign_zero<T>::value returns true if '= 0 is defined for T More... | |
class | has_unary_minus |
Conditionally reture bool type false if type T does't have unary - operator. More... | |
struct | inner_type_struct |
class | input |
hila::input - Class for parsing runtime parameter files. More... | |
struct | is_arithmetic |
struct | is_avx_vector |
struct | is_complex_or_arithmetic |
hila::is_complex_or_arithmetic<T>::value More... | |
struct | is_field_class_type |
struct | is_field_type |
struct | is_std_array |
struct | is_vectorizable_type |
is_vectorizable_type<T>::value is always false if the target is not vectorizable More... | |
class | k_binning |
class | timer |
struct | timer_value |
This file defines timer class and other timing related utilities. More... | |
struct | vector_info |
Typedefs | |
template<typename A , typename B > | |
using | type_plus = decltype(std::declval< A >()+std::declval< B >()) |
Enumerations | |
enum class | bc |
list of field boundary conditions - used only if SPECIAL_BOUNDARY_CONDITIONS defined | |
Functions | |
void | barrier () |
sync MPI | |
bool | bc_need_communication (hila::bc bc) |
False if we have b.c. which does not require communication. | |
template<typename T > | |
T | broadcast (const T &var, int rank=0) |
Version of broadcast with non-modifiable var. | |
template<typename T , int n> | |
void | broadcast (std::array< T, n > &arr, int rank=0) |
And broadcast for std::array. | |
template<typename T > | |
void | broadcast (std::vector< T > &list, int rank=0) |
Broadcast for std::vector. | |
template<typename T > | |
T | broadcast (T &var, int rank=0) |
Broadcast the value of var to all MPI ranks from rank (default=0). | |
template<typename T > | |
void | broadcast (T *var, int rank=0) |
Bare pointers cannot be broadcast. | |
template<typename T , typename U > | |
void | broadcast2 (T &t, U &u, int rank=0) |
and broadcast with two values | |
template<typename T > | |
void | broadcast_array (T *var, int n, int rank=0) |
Broadcast for arrays where size must be known and same for all nodes. | |
template<typename Ntype , typename T , int n, int m, std::enable_if_t< hila::is_arithmetic_or_extended< T >::value, int > = 0> | |
Array< n, m, Ntype > | cast_to (const Array< n, m, T > &mat) |
Array casting operation. | |
template<typename Ntype , typename T > | |
Complex< Ntype > | cast_to (const Complex< T > &m) |
Cast to different basic number type. | |
template<typename Ntype , typename T , int n, std::enable_if_t< hila::is_arithmetic_or_extended< T >::value, int > = 0> | |
DiagonalMatrix< n, Ntype > | cast_to (const DiagonalMatrix< n, T > &mat) |
Cast to different basic number: | |
template<typename Ntype , typename T , int n, int m, std::enable_if_t< hila::is_arithmetic_or_extended< T >::value, int > = 0> | |
Matrix< n, m, Ntype > | cast_to (const Matrix< n, m, T > &mat) |
Change basic number type of Matrix/Vector. | |
void | check_that_rng_is_initialized () |
Check if RNG is initialized, do what the name says. | |
void | error (const char *msg) |
Print message and force quit. | |
void | finishrun () |
Normal, controlled exit - all nodes must call this. Prints timing information and information about communications. | |
void | free_device_rng () |
Free GPU RNG state, does nothing on non-GPU archs. | |
template<typename T > | |
T | gaussian_random () |
Template function T hila::gaussian_random<T>() ,generates gaussian random value of type T , with variance \(1\). | |
template<typename T , std::enable_if_t< std::is_arithmetic< T >::value, int > = 0> | |
T | gaussian_random (out_only T &val, double w=1.0) |
Template function const T & hila::gaussian_random(T & variable,double width=1) | |
double | gaussrand () |
Gaussian random generation routine. | |
double | gaussrand2 (double &out2) |
hila::gaussrand2 returns 2 Gaussian distributed random numbers with variance \(1.0\). | |
template<typename T > | |
hila::arithmetic_type< T > | get_number_in_var (const T &var, int i) |
double | gettime () |
void | initialize (int argc, char **argv) |
Initial setup routines. | |
void | initialize_device_rng (uint64_t seed) |
Initialize device random number generator on GPUs, if application run on GPU platform. No effect on other archs. | |
void | initialize_host_rng (uint64_t seed) |
Initialize host (CPU) random number generator separately, done implicitly by seed_random() | |
bool | is_device_rng_on () |
Check if the RNG on GPU is allocated and ready to use. | |
bool | is_rng_seeded () |
Check if RNG is seeded already. | |
int | myrank () |
rank of this node | |
int | number_of_nodes () |
how many nodes there are | |
template<typename T > | |
std::string | prettyprint (const Complex< T > &A, int prec=8) |
Return well formatted Complex number as std::string. | |
template<int n, int m, typename T , typename MT > | |
std::string | prettyprint (const Matrix_t< n, m, T, MT > &A, int prec=8) |
Formats Matrix_t object in a human readable way. | |
double | random () |
Real valued uniform random number generator. | |
template<typename T > | |
T | random () |
Template function T hila::random<T>() without argument. | |
template<typename T , std::enable_if_t< std::is_arithmetic< T >::value, int > = 0> | |
T | random (out_only T &val) |
Template function const T & hila::random(T & var) sets the argument to a random value, and return a constant reference to it. | |
template<typename T > | |
T | reduce_node_sum (T &var, bool allreduce=true) |
template<typename T > | |
void | reduce_node_sum (T *value, int send_count, bool allreduce=true) |
Reduce an array across nodes. | |
void | seed_random (uint64_t seed, bool device_rng=true) |
Seed random generators with 64-bit unsigned value. On MPI shuffles the seed so that different MPI ranks are seeded with different values. | |
void | set_allreduce (bool on=true) |
set allreduce on (default) or off on the next reduction | |
void | setup_timelimit (const double secs) |
Setup time limit with seconds. | |
void | setup_timelimit (const std::string ×tr) |
template<typename T , std::enable_if_t< hila::is_std_array< T >::value||hila::is_std_vector< T >::value, int > = 0> | |
void | shuffle (T &arr) |
auto | shuffle_directions () |
auto | shuffle_directions_and_parities () |
uint64_t | shuffle_rng_seed (uint64_t seed) |
Random shuffling of rng seed for MPI nodes. | |
void | split_into_partitions (int rank) |
template<typename T > | |
void | swap (Field< T > &A, Field< T > &B) |
Implement hila::swap() for Fields too, equivalent to std::swap() | |
void | synchronize () |
synchronize mpi + gpu | |
void | terminate (int status) |
bool | time_to_finish () |
template<int n, int m, typename T > | |
std::string | to_string (const Array< n, m, T > &A, int prec=8, char separator=' ') |
Converts Array object to string. | |
template<typename T > | |
std::string | to_string (const Complex< T > &A, int prec=8, char separator=' ') |
Return Complex number as std::string. | |
template<int n, int m, typename T , typename MT > | |
std::string | to_string (const Matrix_t< n, m, T, MT > &A, int prec=8, char separator=' ') |
Converts Matrix_t object to string. | |
template<typename T , std::enable_if_t< hila::is_arithmetic< T >::value, int > = 0> | |
std::string | to_string (const T v, int prec=8, char separator=' ') |
convert to string: separator does nothing, but for compatibility w. other to_strings | |
Variables | |
logger_class | log |
Now declare the logger. | |
std::ostream | out |
this is our default output file stream | |
std::ostream | out0 |
This writes output only from main process (node 0) | |
std::ofstream | output_file |
this is just a hook to store output file, if it is in use | |
std::vector< timer * > | timer_list = {} |
Timer routines - for high-resolution event timing. | |
Implement hila::swap for gauge fields.
template utilities: hila::is_extended<T>::value and hila::is_arithmetic_or_extended<T>::value
FFT_complex_to_real; Field must be a complex-valued field, result is a real field of the same number type Not optimized, should not be used on a hot path
Because the complex field must have the property f(-x) = f(L-x) = f(x)^*, only half of the values in input field are significant, the routine does the appropriate symmetrization.
Routine hila::FFT_complex_to_real_site(CoordinateVector cv) gives the significant values at location cv: = +1 significant complex value, = 0 significant real part, imag ignored = -1 value ignored here Example: in 2d 8x8 lattice the sites are: (* = (0,0), value 0)
Namespace hila
contains most of class templates
and function templates
, which are necessary to run lattice filed simulations on distributed memory system as well as on GPU nodes concurrently.
using hila::type_plus = typedef decltype(std::declval<A>() + std::declval<B>()) |
Helper operations to make generic templates for arithmetic operators e.g. hila::type_plus<A,B> gives the type of the operator a + b, where a is of type A and b type B.
Definition at line 103 of file type_tools.h.
T hila::broadcast | ( | T & | var, |
int | rank = 0 |
||
) |
Broadcast the value of var to all MPI ranks from rank (default=0).
NOTE: the function must be called by all MPI ranks, otherwise the program will deadlock.
The type of the variable var can be any standard plain datatype (trivial type), std::string, std::vector or std::array
For trivial types, the input var can be non-modifiable value. In this case the broadcast value is obtained from the broadcast return value.
Example:
var | variable to be synchronized across the full |
rank | MPI rank from which the |
Array< n, m, Ntype > hila::cast_to | ( | const Array< n, m, T > & | mat | ) |
Array casting operation.
Cast array to different number type or Complex type
Allowed casting: number->number, number->Complex, Complex->Complex
Not allowed casting: Complex->number
Ntype | Type to cast to |
T | Input Array type |
n | Number of rows |
m | Number of columns |
mat | Array to cast |
DiagonalMatrix< n, Ntype > hila::cast_to | ( | const DiagonalMatrix< n, T > & | mat | ) |
Cast to different basic number:
hila::cast_to<Ntype>(a);
where a is DiagonalMatrix<n,T> does DiagonalMatrix<n,T> -> DiagonalMatrix<n,Ntype>
Definition at line 982 of file diagonal_matrix.h.
void hila::check_that_rng_is_initialized | ( | ) |
Check if RNG is initialized, do what the name says.
program quits with error message if RNG is not initialized. It also quit with error messages if the device RNG is not initialized.
Definition at line 263 of file random.cpp.
T hila::gaussian_random | ( | ) |
Template function T hila::gaussian_random<T>()
,generates gaussian random value of type T
, with variance \(1\).
For example,
calculates the norm of a gaussian random complex value.
hila::gaussian_random(value)
T hila::gaussian_random | ( | out_only T & | val, |
double | w = 1.0 |
||
) |
Template function const T & hila::gaussian_random(T & variable,double width=1)
Sets the argument to a gaussian random value, and return a constant reference to it. Optional second argument width sets the \(variance=width^{2}\) ( \(default==1\))
For example:
sets the variable c
to complex gaussian random value and stores its square in n
.
This function is for hila classes relies on the existence of method T::gaussian_random()
. The advantage for this function over class function T::random()
is that the argument can be of elementary arithmetic type.
double hila::gaussrand2 | ( | double & | out2 | ) |
hila::gaussrand2
returns 2 Gaussian distributed random numbers with variance \(1.0\).
Useful because Box-Muller algorithm computes 2 values at the same time.
Definition at line 188 of file random.cpp.
|
inline |
Access variables as if arrays of scalar_type numbers
Definition at line 118 of file type_tools.h.
double hila::gettime | ( | ) |
Use clock_gettime() to get the accurate time (alternative: use gettimeofday() or MPI_Wtime()) gettime returns the time in secs since program start
Definition at line 167 of file timing.cpp.
void hila::initialize | ( | int | argc, |
char ** | argv | ||
) |
Initial setup routines.
Read in command line arguments. Initialise default stream and MPI communication
argc | Number of command line arguments |
argv | List of command line arguments |
Definition at line 60 of file initialize.cpp.
void hila::initialize_host_rng | ( | uint64_t | seed | ) |
Initialize host (CPU) random number generator separately, done implicitly by seed_random()
seed | unsigned int_64 |
Definition at line 61 of file random.cpp.
bool hila::is_rng_seeded | ( | ) |
int hila::myrank | ( | ) |
rank of this node
Return my node number - take care to return the previous node number if mpi is being torn down (used in destructors)
Definition at line 237 of file com_mpi.cpp.
int hila::number_of_nodes | ( | ) |
how many nodes there are
Return number of nodes or "pseudo-nodes".
Definition at line 248 of file com_mpi.cpp.
std::string hila::prettyprint | ( | const Complex< T > & | A, |
int | prec = 8 |
||
) |
std::string hila::prettyprint | ( | const Matrix_t< n, m, T, MT > & | A, |
int | prec = 8 |
||
) |
Formats Matrix_t object in a human readable way.
Example 2 x 3 matrix is the following
__device__ __host__ double hila::random | ( | ) |
Real valued uniform random number generator.
Returns an uniform double precision random number in interval \([0,1)\).
This function can be called from outside or inside site loops (on GPU if the device rng is initialized).
Uses std::uniform_real_distribution
Definition at line 121 of file hila_gpu.cpp.
T hila::random | ( | ) |
Template function T hila::random<T>()
without argument.
This is used to generate random value for type T
without defined variable. Example:
calculates the norm of a random complex value. hila::random<double>()
is functionally equivalent to hila::random()
T hila::random | ( | out_only T & | val | ) |
Template function const T & hila::random(T & var)
sets the argument to a random value, and return a constant reference to it.
For example
sets the variable c
to complex random value and calculates its absolute value. c.real()
and c.imag()
will be \(\in [0,1)\).
For hila classes relies on the existence of method T::random()
(i.e. var.random()
), this function typically sets the argument real numbers to interval \([0,1)\) if type T
is arithmatic. if T is more commplicated classes such as SU<N,T>
-matrix, this function sets the argument to valid random SU<N,T>
.
Advantage of this function over class function T::random()
is that the argument can be of elementary arithmetic type.
T hila::reduce_node_sum | ( | T & | var, |
bool | allreduce = true |
||
) |
void hila::seed_random | ( | uint64_t | seed, |
bool | device_init = true |
||
) |
Seed random generators with 64-bit unsigned value. On MPI shuffles the seed so that different MPI ranks are seeded with different values.
The optional 2nd argument indicates whether to initialize the RNG on GPU device: hila::device_rng_on
(default) or hila::device_rng_off
. This argument does nothing if no GPU platform. If hila::device_rng_off
is used, onsites()
-loops cannot contain random number calls (Runtime error will be flagged and program exits).
Seed is shuffled so that different nodes get different rng seeds. If seed == 0
, seed is generated through using the time()
-function.
Definition at line 86 of file random.cpp.
void hila::setup_timelimit | ( | const std::string & | timestr | ) |
setup time limit from the time given in timestr Format is d-h:m:s, not required to be "normalized" to std ranges Fields can be unused from the largest fields onwards, i.e. simplest case only seconds string "slurm" indicates that we call slurm 'squeue' to obtain the time limit
Definition at line 209 of file timing.cpp.
void hila::shuffle | ( | T & | arr | ) |
|
inline |
Shuffling directions and parities separately with functions
std::array<Direction,NDIM> hila::shuffle_directions() std::array<Parity,2> hila::shuffle_parities()
Thus, (almost) the same operation as above with shuffle_directions_and_parities() can be done with
for (const Direction d : hila::shuffle_directions()) { for (const Parity p : hila::shuffle_parities()) { update_parity_dir(U, p, d); } }
This does more MPI synchronization than the shuffle_directions_and_parities()
|
inline |
Function returning all directions and parities as shuffled to random order. Return type is std::array<dir_and_parity,2*NDIM>,
Use case is in Gauge heatbath/overrelax updates, to randomize order. Synchronizes with all MPI nodes, i.e. all ranks will have the same content. Function marked inline in order to avoid ODR
Typical use: no need to declare std::array
for (const auto & s : hila::shuffle_directions_and_parities()) { update_parity_dir(U, s.parity, s.direction); }
Can also be assigned, i.e. auto shuffled = hila::shuffle_directions_and_parities();
uint64_t hila::shuffle_rng_seed | ( | uint64_t | seed | ) |
Random shuffling of rng seed for MPI nodes.
Do it in a manner makes it difficult to give the same seed by mistake and also avoids giving the same seed for 2 nodes For single MPI node seed remains unchanged
Definition at line 47 of file random.cpp.
void hila::split_into_partitions | ( | int | this_lattice | ) |
Split the communicator to subvolumes, using MPI_Comm_split New MPI_Comm is the global mpi_comm_lat NOTE: no attempt made here to reorder the nodes
Definition at line 290 of file com_mpi.cpp.
void hila::terminate | ( | int | status | ) |
Force quit for multinode processes – kill all nodes No synchronisation done
Definition at line 275 of file initialize.cpp.
bool hila::time_to_finish | ( | ) |
Check the cpu time limit or if signal SIGUSR1 is up - this function is meant to be called periodically, and returns true if it is time to exit or signal has been raised.
Use case: on main loop check 'hila::time_to_finish()' periodically, and if it returns true checkpoint/exit.
This makes sense only if the program can checkpoint or otherwise clean up.
Time limit or signaling are alternative ways to ensure clean exit. Both can be used at the same time. Time limit has the advantage that if the program does periodic checkpointing, it automatically adjusts the grace time to allow for checkpointing at the end. With signal the grace time must be estimated in advance.
Time limit can be given with program command line argument -t <time> or -t slurm or calling function hila::setup_timelimit(time), see above
Signal can be set in slurm submit script with #SBATCH –signal=SIGUSR1@180 where the last number is the time in seconds when the signal SIGUSR1 is sent before the run time expires. This time must allow for the periodic check interval and the time the cleanup takes.
Signal can also be sent to slurm jobs from terminal session with $ scancel –signal=SIGUSR1 <jobid> and, of course, for normal "terminal" runs with $ kill -s SIGUSR1 <pid0> Note: signal must be sent to MPI rank 0. It can be sent to all ranks too.
Definition at line 327 of file timing.cpp.
std::string hila::to_string | ( | const Array< n, m, T > & | A, |
int | prec = 8 , |
||
char | separator = ' ' |
||
) |
std::string hila::to_string | ( | const Complex< T > & | A, |
int | prec = 8 , |
||
char | separator = ' ' |
||
) |
std::string hila::to_string | ( | const Matrix_t< n, m, T, MT > & | A, |
int | prec = 8 , |
||
char | separator = ' ' |
||
) |