3#include "plumbing/lattice.h"
9void report_too_large_node() {
11 hila::out <<
"Node size too large: size = " << lattice->mynode.size[0];
12 for (
int d = 1; d < NDIM; d++)
13 hila::out <<
" x " << lattice->mynode.size[d];
14 hila::out <<
" + communication buffers = " << lattice->mynode.field_alloc_size;
15 hila::out <<
"\nConsider using more nodes (smaller node size).\n";
16 hila::out <<
"[TODO: allow 64bit index?]\n";
27std::vector<lattice_struct *> defined_lattices;
31int64_t n_gather_avoided = 0, n_gather_done = 0;
37 assert(defined_lattices.size() == 0 &&
"lattice.setup() can be called only once");
40 defined_lattices.push_back(
this);
57 initialize_wait_arrays();
59#ifdef SPECIAL_BOUNDARY_CONDITIONS
61 init_special_boundaries();
65 if (mynode.field_alloc_size % 256 > 0)
66 mynode.field_alloc_size += 256 - mynode.field_alloc_size % 256;
71 backend_lattice->
setup(*
this);
74 if (hila::check_input) {
75 hila::out <<
"***** Input check done *****\n";
104 int i = (loc[NDIM - 1] * nodes.n_divisions[NDIM - 1]) / l_size[NDIM - 1];
106 for (
int dir = NDIM - 2; dir >= 0; dir--) {
107 i = i * nodes.n_divisions[dir] + ((loc[dir] * nodes.n_divisions[dir]) / l_size[dir]);
126 nodes.divisors[dir].resize(nodes.n_divisions[dir] + 1);
130 for (
int i = 0; i <= l_size[dir]; i++) {
131 while (n < (i * nodes.n_divisions[dir]) / l_size[dir]) {
133 nodes.divisors[dir][n] = i;
150 for (
int dir = 0; dir < NDIM; dir++) {
151 d = loc[dir] - mynode.min[dir];
152 if (d < 0 || d >= mynode.size[dir])
163#ifndef SUBNODE_LAYOUT
167 unsigned i = loc[NDIM - 1] - mynode.min[NDIM - 1];
168 int s = loc[NDIM - 1];
169 for (
int dir = NDIM - 2; dir >= 0; dir--) {
170 int l = loc[dir] - mynode.min[dir];
171 i = i * mynode.size[dir] + l;
176#if defined(EVEN_SITES_FIRST)
180 return (i / 2 + mynode.evensites);
194 unsigned i = loc[NDIM - 1] - ni.min[NDIM - 1];
195 int s = loc[NDIM - 1];
196 for (
int dir = NDIM - 2; dir >= 0; dir--) {
197 int l = loc[dir] - ni.min[dir];
198 i = i * ni.size[dir] + l;
203#if defined(EVEN_SITES_FIRST)
207 return (i / 2 + ni.evensites);
262 assert(nodeid < nodes.number);
271 subsize.
asArray() = ni.size.
asArray() / mynode.subnodes.divisions.asArray();
273 dir = mynode.subnodes.merged_subnodes_dir;
274 l = loc[dir] - ni.min[dir];
275 subl = l / subsize[dir];
278 subl = subl / 2 + (subl % 2) * (mynode.subnodes.divisions[dir] / 2);
283 for (dir = NDIM - 1; dir >= 0; dir--) {
284 l = loc[dir] - ni.min[dir];
285 if (dir != mynode.subnodes.merged_subnodes_dir) {
286 subl = subl * mynode.subnodes.divisions[dir] + l / subsize[dir];
288 i = i * subsize[dir] + l % subsize[dir];
292#if defined(EVEN_SITES_FIRST)
296 i = i / 2 + ni.evensites / number_of_subnodes;
299 return (subl + number_of_subnodes * i);
312 assert(nodeid < number);
313 nodeid = inverse_remap(nodeid);
319 int nodecoord = nodeid % n_divisions[d];
320 nodeid = nodeid / n_divisions[d];
322 ninfo.min[d] = divisors[d].at(nodecoord);
323 ninfo.size[d] = divisors[d].at(nodecoord + 1) - ninfo.min[d];
324 nodevol *= ninfo.size[d];
327 if (nodevol >= (1ULL << 32)) {
329 report_too_large_node();
332 if (nodevol % 2 == 0) {
333 ninfo.evensites = ninfo.oddsites = nodevol / 2;
335 if (ninfo.min.parity() ==
EVEN) {
336 ninfo.evensites = nodevol / 2 + 1;
337 ninfo.oddsites = nodevol / 2;
339 ninfo.evensites = nodevol / 2;
340 ninfo.oddsites = nodevol / 2 + 1;
364 for (
int i = 0; i < nodes.number; i++) {
367 if (ni.size[d] > nodes.max_size[d])
368 nodes.max_size[d] = ni.size[d];
373 mynode.
setup(ni, *
this);
387 evensites = ni.evensites;
388 oddsites = ni.oddsites;
389 volume = ni.evensites + ni.oddsites;
391 first_site_even = (min.parity() ==
EVEN);
397#ifdef EVEN_SITES_FIRST
398 coordinates.resize(volume);
400 for (
unsigned i = 0; i < volume; i++) {
401 coordinates[lattice.site_index(l)] = l;
404 if (++l[d] < (min[d] + size[d]))
422 subnodes.setup(*
this);
431void lattice_struct::node_struct::subnode_struct::setup(
const node_struct &tn) {
432 size.asArray() = tn.size.
asArray() / divisions.asArray();
433 evensites = tn.evensites / number_of_subnodes;
434 oddsites = tn.oddsites / number_of_subnodes;
435 sites = evensites + oddsites;
440bool lattice_struct::is_this_odd_boundary(
Direction d)
const {
444 return (lattice.
size(d) % 2 > 0 && lattice->mynode.
is_on_edge(d));
476 for (
int d = 0; d <
NDIRS; d++) {
477 neighb[d] = (
unsigned *)memalloc(((
size_t)mynode.volume) *
sizeof(unsigned));
480 size_t c_offset = mynode.volume;
483 int too_large_node = 0;
503 for (
size_t i = 0; i < mynode.volume; i++) {
507 ln = (l + d).mod(l_size);
513 neighb[d][i] = mynode.volume;
516 if (from_node.rank == mynode.rank) {
517 from_node.rank = to_node.rank =
node_rank(ln);
520 if (l.parity() ==
EVEN)
521 from_node.evensites++;
523 from_node.oddsites++;
526 if (ln.parity() ==
EVEN)
534 from_node.buffer = c_offset;
536 from_node.sites = from_node.evensites + from_node.oddsites;
537 to_node.sites = to_node.evensites + to_node.oddsites;
539 assert(from_node.sites == to_node.sites);
541 if (to_node.sites > 0) {
544 to_node.sitelist = (
unsigned *)memalloc(to_node.sites *
sizeof(
unsigned));
550 size_t to_even = 0, to_odd = 0, from_even = 0, from_odd = 0;
552 for (
size_t i = 0; i < mynode.volume; i++) {
553 if (
neighb[d][i] == mynode.volume) {
557 if (l.parity() ==
EVEN) {
558 neighb[d][i] = c_offset + from_even;
562 neighb[d][i] = c_offset + from_node.evensites + from_odd;
566 ln = (l + d).mod(l_size);
567 if (ln.parity() ==
EVEN) {
568 to_node.sitelist[to_even++] = i;
570 to_node.sitelist[(to_odd++) + to_node.evensites] = i;
575#if defined(CUDA) || defined(HIP)
576 auto p = copy_array_to_gpu(to_node.sitelist, to_node.sites);
577 free(to_node.sitelist);
578 to_node.sitelist = p;
583 c_offset += from_node.sites;
585 if (c_offset >= (1ULL << 32))
591 mynode.field_alloc_size = c_offset;
594 report_too_large_node();
607static_assert(NDIM <= 4 &&
"Dimensions at most 4 in dir_mask_t = unsigned char! Use "
608 "larger type to circumvent");
610void lattice_struct::initialize_wait_arrays() {
612#if !(defined(CUDA) || defined(HIP))
621 for (
size_t i = 0; i < mynode.volume; i++) {
625 if (
neighb[dir][i] >= mynode.volume)
627 if (
neighb[odir][i] >= mynode.volume)
635#ifdef SPECIAL_BOUNDARY_CONDITIONS
644void lattice_struct::init_special_boundaries() {
648 special_boundaries[d].n_even = special_boundaries[d].n_odd = special_boundaries[d].n_total =
650 special_boundaries[d].is_needed =
false;
655 if (is_up_dir(d) && mynode.min[d] + mynode.size[d] == l_size[d])
656 coord = l_size[d] - 1;
657 if (is_up_dir(od) && mynode.min[od] == 0)
663 if (nodes.n_divisions[
abs(d)] == 1) {
664 special_boundaries[d].is_needed =
true;
665 special_boundaries[d].offset = mynode.field_alloc_size;
667 for (
unsigned i = 0; i < mynode.volume; i++)
668 if (coordinate(i,
abs(d)) == coord) {
670 special_boundaries[d].n_total++;
671 if (site_parity(i) ==
EVEN)
672 special_boundaries[d].n_even++;
674 special_boundaries[d].n_odd++;
676 mynode.field_alloc_size += special_boundaries[d].n_total;
685 special_boundaries[d].neighbours =
nullptr;
689 if (mynode.field_alloc_size >= (1ULL << 32))
692 report_too_large_node();
701#ifndef SPECIAL_BOUNDARY_CONDITIONS
702 assert(bc == hila::bc::PERIODIC &&
703 "non-periodic BC only if SPECIAL_BOUNDARY_CONDITIONS defined");
708 if (special_boundaries[d].is_needed ==
false || bc == hila::bc::PERIODIC)
711 if (special_boundaries[d].neighbours ==
nullptr) {
712 setup_special_boundary_array(d);
714 return special_boundaries[d].neighbours;
723void lattice_struct::setup_special_boundary_array(
Direction d) {
725 if (special_boundaries[d].is_needed ==
false || special_boundaries[d].neighbours !=
nullptr)
729 special_boundaries[d].neighbours = (
unsigned *)memalloc(
sizeof(
unsigned) * mynode.volume);
730 special_boundaries[d].move_index =
731 (
unsigned *)memalloc(
sizeof(
unsigned) * special_boundaries[d].n_total);
734 int offs = special_boundaries[d].offset;
736 coord = l_size[d] - 1;
741 for (
int i = 0; i < mynode.volume; i++) {
742 if (coordinate(i,
abs(d)) != coord) {
743 special_boundaries[d].neighbours[i] =
neighb[d][i];
745 special_boundaries[d].neighbours[i] = offs++;
746 special_boundaries[d].move_index[k++] =
neighb[d][i];
750 assert(k == special_boundaries[d].n_total);
759void lattice_struct::set_lattice_globals()
const {
760#if defined(CUDA) || defined(HIP)
761 this->backend_lattice->set_device_globals(*
this);
770bool lattice_struct::can_block_by_factor(
const CoordinateVector &blocking_factor)
const {
778 if (blocking_factor[d] <= 0) {
779 hila::out0 <<
"ERROR: invalid blocking factor " << blocking_factor[d] <<
'\n';
782 ok = ok && (l_size[d] % blocking_factor[d] == 0);
794 if (!can_block_by_factor(blocking_factor)) {
795 hila::out0 <<
"Cannot block lattice with factor " << blocking_factor <<
'\n';
800 foralldir(d) blockvol[d] = l_size[d] / blocking_factor[d];
804 for (
auto *l : defined_lattices) {
805 if (l->l_size == blockvol) {
813 int i = defined_lattices.size();
817 defined_lattices.push_back(lp);
818 lp->setup_blocked_lattice(blockvol, i, *
this);
827void lattice_struct::setup_blocked_lattice(
const CoordinateVector &siz,
int label,
830 lattice.set_lattice_pointer(
this);
842 mpi_comm_lat = orig.mpi_comm_lat;
845 nodes.n_divisions = orig.nodes.n_divisions;
846 nodes.number = orig.nodes.number;
849 nodes.map_array = orig.nodes.map_array;
850 nodes.map_inverse = orig.nodes.map_inverse;
855 initialize_wait_arrays();
857#ifdef SPECIAL_BOUNDARY_CONDITIONS
859 init_special_boundaries();
863 if (mynode.field_alloc_size % 256 > 0)
864 mynode.field_alloc_size += 256 - mynode.field_alloc_size % 256;
869 backend_lattice->
setup(*
this);
889std::vector<lattice_struct::comm_node_struct>
890lattice_struct::create_comm_node_vector(
CoordinateVector offset,
unsigned *index,
898 std::vector<unsigned> np_even(nodes.number);
899 std::vector<unsigned> np_odd(nodes.number);
906 for (
unsigned i = 0; i < mynode.volume; i++) {
909 ln = (l + offset).mod(size());
920 index[i] = mynode.volume + r;
923 if (l.parity() ==
EVEN)
930 if (ln.parity() ==
EVEN)
942 for (
int r = 0; r < nodes.number; r++) {
943 if (np_even[r] > 0 || np_odd[r] > 0)
948 std::vector<comm_node_struct> node_v(nnodes);
952 for (
int r = 0; r < nodes.number; r++) {
953 if (np_even[r] > 0 || np_odd[r] > 0) {
956 node_v[n].evensites = np_even[r];
957 node_v[n].oddsites = np_odd[r];
958 node_v[n].sites = np_even[r] + np_odd[r];
963 (
unsigned *)memalloc(node_v[n].sites *
sizeof(
unsigned));
967 c_buffer += node_v[n].sites;
975 for (
int i = 0; i < nnodes; i++)
976 np_even[i] = np_odd[i] = 0;
981 for (
unsigned i = 0; i < mynode.volume; i++) {
984 ln = (l + offset).mod(size());
990 while (node_v[n].rank != r)
995 if (ln.parity() ==
EVEN)
998 k = node_v[n].evensites + np_odd[n]++;
1001 node_v[n].sitelist[k] = i;
1009 for (
unsigned i = 0; i < mynode.volume; i++) {
1010 if (index[i] >= mynode.volume) {
1011 int r = index[i] - mynode.volume;
1014 while (node_v[n].rank != r)
1018 if (l.parity() ==
EVEN)
1019 index[i] = node_v[n].buffer + (np_even[n]++);
1021 index[i] = node_v[n].buffer + node_v[n].evensites + (np_odd[n]++);
1034 gen_comminfo_struct ci;
1037 ci.index = (
unsigned *)memalloc(mynode.volume *
sizeof(
unsigned));
1040 create_comm_node_vector(offset, ci.index,
true);
1041 ci.to_node = create_comm_node_vector(offset,
nullptr,
false);
1044 const comm_node_struct &r = ci.from_node[ci.from_node.size() - 1];
1045 ci.receive_buf_size = r.buffer + r.sites;
main interface class to lattices.
int size(Direction d) const
lattice.size() -> CoordinateVector or lattice.size(d) -> int returns the dimensions of the lattice,...
Lattice switch_to(lattice_struct *lat)
With a valid lattice_struct pointer, switch this lattice to be active.
const Array< n, m, T > & asArray() const
Cast Matrix to Array.
void setup_node_divisors()
void setup_base_lattice(const CoordinateVector &siz)
General lattice setup.
dir_mask_t *__restrict__ wait_arr_
implement waiting using mask_t - unsigned char is good for up to 4 dim.
bool is_on_mynode(const CoordinateVector &c) const
Is the coordinate on THIS node ?
unsigned *__restrict__ neighb[NDIRS]
Main neighbour index array.
void create_std_gathers()
std::array< nn_comminfo_struct, NDIRS > nn_comminfo
nearest neighbour comminfo struct
unsigned site_index(const CoordinateVector &c) const
int node_rank(const CoordinateVector &c) const
T abs(const Complex< T > &a)
Return absolute value of Complex number.
constexpr Parity EVEN
bit pattern: 001
#define foralldir(d)
Macro to loop over (all) Direction(s)
constexpr unsigned NDIRS
Number of directions.
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
This file defines all includes for HILA.
This files containts definitions for the Field class and the classes required to define it such as fi...
Implement hila::swap for gauge fields.
int myrank()
rank of this node
int number_of_nodes()
how many nodes there are
void synchronize()
synchronize mpi + gpu
void reduce_node_sum(T *value, int send_count, bool allreduce=true)
Reduce an array across nodes.
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
bc
list of field boundary conditions - used only if SPECIAL_BOUNDARY_CONDITIONS defined
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
void terminate(int status)
Helper class for loading the vectorized lattice.
void setup(lattice_struct *lattice)
node_info nodeinfo(int i) const
int remap(int i) const
And the call interface for remapping.
Information necessary to communicate with a node.
Information about the node stored on this process.
void setup(node_info &ni, lattice_struct &lattice)
Fill in mynode fields – node_rank() must be set up OK.
bool is_on_edge(Direction d) const
true if this node is on the edge of the lattice to dir d
useful information about a node