3#include "plumbing/lattice.h"
9void report_too_large_node() {
11 hila::out <<
"Node size too large: size = " << lattice.mynode.size[0];
12 for (
int d = 1; d < NDIM; d++)
13 hila::out <<
" x " << lattice.mynode.size[d];
14 hila::out <<
" + communication buffers = " << lattice.mynode.field_alloc_size;
15 hila::out <<
"\nConsider using more nodes (smaller node size).\n";
16 hila::out <<
"[TODO: allow 64bit index?]\n";
28std::vector<lattice_struct *> lattices;
30static int lattice_count = 0;
35 l_label = lattice_count++;
38 lattices.push_back(
this);
54 initialize_wait_arrays();
56#ifdef SPECIAL_BOUNDARY_CONDITIONS
58 init_special_boundaries();
62 if (mynode.field_alloc_size % 256 > 0)
63 mynode.field_alloc_size += 256 - mynode.field_alloc_size % 256;
68 backend_lattice->
setup(*
this);
71 if (hila::check_input) {
72 hila::out <<
"***** Input check done *****\n";
104 i = (loc[NDIM - 1] * nodes.n_divisions[NDIM - 1]) / l_size[NDIM - 1];
105 for (dir = NDIM - 2; dir >= 0; dir--) {
106 i = i * nodes.n_divisions[dir] + ((loc[dir] * nodes.n_divisions[dir]) / l_size[dir]);
121 for (
int dir = 0; dir < NDIM; dir++) {
122 d = loc[dir] - mynode.min[dir];
123 if (d < 0 || d >= mynode.size[dir])
134#ifndef SUBNODE_LAYOUT
140 i = l = loc[NDIM - 1] - mynode.min[NDIM - 1];
142 for (dir = NDIM - 2; dir >= 0; dir--) {
143 l = loc[dir] - mynode.min[dir];
144 i = i * mynode.size[dir] + l;
149#if defined(EVEN_SITES_FIRST)
153 return (i / 2 + mynode.evensites);
167 const node_info &ni = nodes.nodelist[nodeid];
169 i = l = loc[NDIM - 1] - ni.min[NDIM - 1];
171 for (dir = NDIM - 2; dir >= 0; dir--) {
172 l = loc[dir] - ni.min[dir];
173 i = i * ni.size[dir] + l;
178#if defined(EVEN_SITES_FIRST)
182 return (i / 2 + ni.evensites);
237 assert(nodeid < nodes.number);
238 const node_info &ni = nodes.nodelist[nodeid];
246 subsize.
asArray() = ni.size.
asArray() / mynode.subnodes.divisions.asArray();
248 dir = mynode.subnodes.merged_subnodes_dir;
249 l = loc[dir] - ni.min[dir];
250 subl = l / subsize[dir];
253 subl = subl / 2 + (subl % 2) * (mynode.subnodes.divisions[dir] / 2);
258 for (dir = NDIM - 1; dir >= 0; dir--) {
259 l = loc[dir] - ni.min[dir];
260 if (dir != mynode.subnodes.merged_subnodes_dir) {
261 subl = subl * mynode.subnodes.divisions[dir] + l / subsize[dir];
263 i = i * subsize[dir] + l % subsize[dir];
267#if defined(EVEN_SITES_FIRST)
271 i = i / 2 + ni.evensites / number_of_subnodes;
274 return (subl + number_of_subnodes * i);
295 nodes.nodelist.resize(nodes.number);
303 for (
int i = 0; i < nodes.number; i++) {
305 foralldir(d) l[d] = nodes.divisors[d][n[d]];
307 int nn = node_rank(l);
311 ni.min[d] = nodes.divisors[d][n[d]];
312 ni.size[d] = nodes.divisors[d][n[d] + 1] - nodes.divisors[d][n[d]];
315 if (ni.size[d] > nodes.max_size[d])
316 nodes.max_size[d] = ni.size[d];
319 if (v >= (1ULL << 32)) {
321 report_too_large_node();
325 ni.evensites = ni.oddsites = v / 2;
328 if (l.parity() ==
EVEN) {
329 ni.evensites = v / 2 + 1;
332 ni.evensites = v / 2;
333 ni.oddsites = v / 2 + 1;
340 if (n[d] < nodes.n_divisions[d])
347 mynode.
setup(ni, lattice);
363 evensites = ni.evensites;
364 oddsites = ni.oddsites;
365 sites = ni.evensites + ni.oddsites;
367 first_site_even = (min.parity() ==
EVEN);
372 l[d] = (min[d] + size[d]) % lattice.l_size[d];
373 nn[d] = lattice.node_rank(l);
374 l[d] = (lattice.l_size[d] + min[d] - 1) % lattice.l_size[d];
375 nn[opp_dir(d)] = lattice.node_rank(l);
381#ifdef EVEN_SITES_FIRST
382 coordinates.resize(sites);
384 for (
unsigned i = 0; i < sites; i++) {
388 if (++l[d] < (min[d] + size[d]))
406 subnodes.setup(*
this);
415void lattice_struct::node_struct::subnode_struct::setup(
const node_struct &tn) {
416 size.asArray() = tn.size.
asArray() / divisions.asArray();
417 evensites = tn.evensites / number_of_subnodes;
418 oddsites = tn.oddsites / number_of_subnodes;
419 sites = evensites + oddsites;
435 for (
int d = 0; d <
NDIRS; d++) {
436 neighb[d] = (
unsigned *)memalloc(((
size_t)mynode.sites) *
sizeof(unsigned));
439 size_t c_offset = mynode.sites;
442 int too_large_node = 0;
454 from_node.rank = to_node.rank = mynode.rank;
458 from_node.sites = from_node.evensites = from_node.oddsites = 0;
462 for (
int i = 0; i < mynode.sites; i++) {
467 ln = (l + d).mod(size());
476 neighb[d][i] = mynode.sites;
480 unsigned rank = node_rank(ln);
481 if (from_node.rank == mynode.rank) {
482 from_node.rank = rank;
483 }
else if (from_node.rank != rank) {
484 hila::out <<
"Internal error in nn-communication setup\n";
489 if (l.parity() ==
EVEN)
490 from_node.evensites++;
492 from_node.oddsites++;
499 from_node.buffer = c_offset;
501 to_node.rank = from_node.rank;
502 to_node.sites = from_node.sites;
504 to_node.evensites = from_node.oddsites;
505 to_node.oddsites = from_node.evensites;
509 to_node.sitelist = (
unsigned *)memalloc(to_node.sites *
sizeof(
unsigned));
513 from_node.sitelist = (
unsigned *)memalloc(from_node.sites *
sizeof(
unsigned));
516 to_node.sitelist =
nullptr;
524 size_t c_even, c_odd;
527 for (
size_t i = 0; i < mynode.sites; i++) {
528 if (
neighb[d][i] == mynode.sites) {
532 if (l.parity() ==
EVEN) {
534 neighb[d][i] = c_offset + c_even;
535 if (c_offset + c_even >= (1ULL << 32))
539 from_node.sitelist[c_even] = i;
543 to_node.sitelist[c_even + to_node.evensites] = i;
548 neighb[d][i] = c_offset + from_node.evensites + c_odd;
549 if (c_offset + from_node.evensites + c_odd >= (1ULL << 32))
553 from_node.sitelist[c_odd + from_node.evensites] = i;
557 to_node.sitelist[c_odd] = i;
565 c_offset += from_node.sites;
567 if (c_offset >= (1ULL << 32))
573 mynode.field_alloc_size = c_offset;
576 report_too_large_node();
589static_assert(NDIM <= 4 &&
"Dimensions at most 4 in dir_mask_t = unsigned char! Use "
590 "larger type to circumvent");
592void lattice_struct::initialize_wait_arrays() {
602 for (
size_t i = 0; i < mynode.sites; i++) {
606 if (
neighb[dir][i] >= mynode.sites)
608 if (
neighb[odir][i] >= mynode.sites)
615#ifdef SPECIAL_BOUNDARY_CONDITIONS
624void lattice_struct::init_special_boundaries() {
628 special_boundaries[d].n_even = special_boundaries[d].n_odd = special_boundaries[d].n_total =
630 special_boundaries[d].is_needed =
false;
635 if (is_up_dir(d) && mynode.min[d] + mynode.size[d] == size(d))
637 if (is_up_dir(od) && mynode.min[od] == 0)
643 if (nodes.n_divisions[
abs(d)] == 1) {
644 special_boundaries[d].is_needed =
true;
645 special_boundaries[d].offset = mynode.field_alloc_size;
647 for (
unsigned i = 0; i < mynode.sites; i++)
648 if (coordinate(i,
abs(d)) == coord) {
650 special_boundaries[d].n_total++;
651 if (site_parity(i) ==
EVEN)
652 special_boundaries[d].n_even++;
654 special_boundaries[d].n_odd++;
656 mynode.field_alloc_size += special_boundaries[d].n_total;
665 special_boundaries[d].neighbours =
nullptr;
669 if (mynode.field_alloc_size >= (1ULL << 32))
672 report_too_large_node();
681#ifndef SPECIAL_BOUNDARY_CONDITIONS
682 assert(bc == hila::bc::PERIODIC &&
683 "non-periodic BC only if SPECIAL_BOUNDARY_CONDITIONS defined");
688 if (special_boundaries[d].is_needed ==
false || bc == hila::bc::PERIODIC)
691 if (special_boundaries[d].neighbours ==
nullptr) {
692 setup_special_boundary_array(d);
694 return special_boundaries[d].neighbours;
703void lattice_struct::setup_special_boundary_array(
Direction d) {
705 if (special_boundaries[d].is_needed ==
false || special_boundaries[d].neighbours !=
nullptr)
709 special_boundaries[d].neighbours = (
unsigned *)memalloc(
sizeof(
unsigned) * mynode.sites);
710 special_boundaries[d].move_index =
711 (
unsigned *)memalloc(
sizeof(
unsigned) * special_boundaries[d].n_total);
714 int offs = special_boundaries[d].offset;
721 for (
int i = 0; i < mynode.sites; i++) {
722 if (coordinate(i,
abs(d)) != coord) {
723 special_boundaries[d].neighbours[i] =
neighb[d][i];
725 special_boundaries[d].neighbours[i] = offs++;
726 special_boundaries[d].move_index[k++] =
neighb[d][i];
730 assert(k == special_boundaries[d].n_total);
748std::vector<lattice_struct::comm_node_struct>
749lattice_struct::create_comm_node_vector(
CoordinateVector offset,
unsigned *index,
757 std::vector<unsigned> np_even(nodes.number);
758 std::vector<unsigned> np_odd(nodes.number);
765 for (
unsigned i = 0; i < mynode.sites; i++) {
768 ln = (l + offset).mod(size());
776 unsigned r = node_rank(ln);
779 index[i] = mynode.sites + r;
782 if (l.parity() ==
EVEN)
789 if (ln.parity() ==
EVEN)
801 for (
int r = 0; r < nodes.number; r++) {
802 if (np_even[r] > 0 || np_odd[r] > 0)
807 std::vector<comm_node_struct> node_v(nnodes);
811 for (
int r = 0; r < nodes.number; r++) {
812 if (np_even[r] > 0 || np_odd[r] > 0) {
815 node_v[n].evensites = np_even[r];
816 node_v[n].oddsites = np_odd[r];
817 node_v[n].sites = np_even[r] + np_odd[r];
822 (
unsigned *)memalloc(node_v[n].sites *
sizeof(
unsigned));
826 c_buffer += node_v[n].sites;
834 for (
int i = 0; i < nnodes; i++)
835 np_even[i] = np_odd[i] = 0;
840 for (
unsigned i = 0; i < mynode.sites; i++) {
843 ln = (l + offset).mod(size());
846 unsigned r = node_rank(ln);
849 while (node_v[n].rank != r)
854 if (ln.parity() ==
EVEN)
857 k = node_v[n].evensites + np_odd[n]++;
860 node_v[n].sitelist[k] = i;
868 for (
unsigned i = 0; i < mynode.sites; i++) {
869 if (index[i] >= mynode.sites) {
870 int r = index[i] - mynode.sites;
873 while (node_v[n].rank != r)
877 if (l.parity() ==
EVEN)
878 index[i] = node_v[n].buffer + (np_even[n]++);
880 index[i] = node_v[n].buffer + node_v[n].evensites + (np_odd[n]++);
893 gen_comminfo_struct ci;
896 ci.index = (
unsigned *)memalloc(mynode.sites *
sizeof(
unsigned));
899 create_comm_node_vector(offset, ci.index,
true);
900 ci.to_node = create_comm_node_vector(offset,
nullptr,
false);
903 const comm_node_struct &r = ci.from_node[ci.from_node.size() - 1];
904 ci.receive_buf_size = r.buffer + r.sites;
const Array< n, m, T > & asArray() const
Cast Matrix to Array.
dir_mask_t *__restrict__ wait_arr_
implement waiting using mask_t - unsigned char is good for up to 4 dim.
void setup_nodes()
invert the mynode index -> location (only on this node)
void setup(const CoordinateVector &siz)
General lattice setup.
bool is_on_mynode(const CoordinateVector &c) const
Is the coordinate on THIS node.
unsigned *__restrict__ neighb[NDIRS]
Main neighbour index array.
void create_std_gathers()
std::array< nn_comminfo_struct, NDIRS > nn_comminfo
nearest neighbour comminfo struct
unsigned site_index(const CoordinateVector &c) const
T abs(const Complex< T > &a)
Return absolute value of Complex number.
constexpr Parity EVEN
bit pattern: 001
#define foralldir(d)
Macro to loop over (all) Direction(s)
constexpr unsigned NDIRS
Number of directions.
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
This file defines all includes for HILA.
This files containts definitions for the Field class and the classes required to define it such as fi...
int myrank()
rank of this node
int number_of_nodes()
how many nodes there are
void reduce_node_sum(T *value, int send_count, bool allreduce=true)
Reduce an array across nodes.
std::ostream out
this is our default output file stream
bc
list of field boundary conditions - used only if SPECIAL_BOUNDARY_CONDITIONS defined
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
Helper class for loading the vectorized lattice.
void setup(lattice_struct *lattice)
unsigned remap(unsigned i) const
And the call interface for remapping.
Information necessary to communicate with a node.
Information about the node stored on this process.
void setup(node_info &ni, lattice_struct &lattice)
Fill in mynode fields – node_rank() must be set up OK.
useful information about a node