HILA
Loading...
Searching...
No Matches
lattice.cpp
1
2#include "plumbing/defs.h"
3#include "plumbing/lattice.h"
4#include "plumbing/field.h"
5
6// Reporting on possibly too large node: stop if
7// node size (with buffers) is larger than 2^31 - too close for comfort!
8
9void report_too_large_node() {
10 if (hila::myrank() == 0) {
11 hila::out << "Node size too large: size = " << lattice->mynode.size[0];
12 for (int d = 1; d < NDIM; d++)
13 hila::out << " x " << lattice->mynode.size[d];
14 hila::out << " + communication buffers = " << lattice->mynode.field_alloc_size;
15 hila::out << "\nConsider using more nodes (smaller node size).\n";
16 hila::out << "[TODO: allow 64bit index?]\n";
17 }
19}
20
21
22// Define the global lattice handle
23Lattice lattice;
24/// A list of all defined lattices (for the future expansion)
25
26// Keep track of defined lattices
27std::vector<lattice_struct *> defined_lattices;
28
29// global bookkeeping
30namespace hila {
31int64_t n_gather_avoided = 0, n_gather_done = 0;
32}
33
34/// General lattice setup
36
37 assert(defined_lattices.size() == 0 && "lattice.setup() can be called only once");
38
39 l_label = 0;
40 defined_lattices.push_back(this);
41 parent = nullptr; // this has no parent lattice
42
43 l_volume = 1;
44 foralldir(i) {
45 l_size[i] = siz[i];
46 l_volume *= siz[i];
47 }
48
49 setup_layout();
50
52
53 // set up the comm arrays
55
56 // Initialize wait_array structures - has to be after std gathers()
57 initialize_wait_arrays();
58
59#ifdef SPECIAL_BOUNDARY_CONDITIONS
60 // do this after std. boundary is done
61 init_special_boundaries();
62#endif
63
64 // Alignment: set field_alloc_size to be divisible by 256
65 if (mynode.field_alloc_size % 256 > 0)
66 mynode.field_alloc_size += 256 - mynode.field_alloc_size % 256;
67
68#ifndef VANILLA
69 /* Setup backend-specific lattice info if necessary */
70 backend_lattice = new backend_lattice_struct;
71 backend_lattice->setup(*this);
72#endif
73
74 if (hila::check_input) {
75 hila::out << "***** Input check done *****\n";
77 }
78
79 test_std_gathers();
80}
81
82///////////////////////////////////////////////////////////////////////
83/// The routines is_on_mynode(), node_rank(), site_index()
84/// implement the "layout" of the nodes and sites of the lattice.
85/// To be changed in different implementations!
86///////////////////////////////////////////////////////////////////////
87
88///////////////////////////////////////////////////////////////////////
89/// Get the node rank for coordinates
90/// This is the fundamental routine which defines how the nodes
91/// are mapped. map_node_layout MUST BE compatible with this
92/// algorithm! So if you change this, change that too
93/// Divisor moved now below
94///
95/// Here the node number along one Direction is calculated with
96/// loc * n_divisions/l_size (with integer division)
97/// example: with l_size=14 and n_divisions=3,
98/// the dividers are at 0, 5, 10, 14
99///
100///////////////////////////////////////////////////////////////////////
101
103
104 int i = (loc[NDIM - 1] * nodes.n_divisions[NDIM - 1]) / l_size[NDIM - 1];
105
106 for (int dir = NDIM - 2; dir >= 0; dir--) {
107 i = i * nodes.n_divisions[dir] + ((loc[dir] * nodes.n_divisions[dir]) / l_size[dir]);
108 }
109
110 /* do we want to remap this? YES PLEASE */
111 if (i < 0 || i >= hila::number_of_nodes())
112 hila::out << "NODE NUMBER " << i << " CV " << loc << " MYNODE " << hila::myrank() << '\n';
113 i = nodes.remap(i);
114
115 return (i);
116}
117
118////////////////////////////////////////////////////////////////////////
119/// In this routine we set up the node divisors
120/// this must be compatible with the node_rank above
121////////////////////////////////////////////////////////////////////////
122
124
125 foralldir(dir) {
126 nodes.divisors[dir].resize(nodes.n_divisions[dir] + 1);
127 // to be sure, we use naively the same method than in node_rank
128 // last element will be size(dir), for convenience
129 int n = -1;
130 for (int i = 0; i <= l_size[dir]; i++) {
131 while (n < (i * nodes.n_divisions[dir]) / l_size[dir]) {
132 ++n;
133 nodes.divisors[dir][n] = i;
134 }
135 }
136 // hila::out0 << "Divisors ";
137 // for (int i=0;i<nodes.n_divisions[dir]; i++) hila::out0 << nodes.divisors[dir][i]
138 // << " "; hila::out0 << '\n';
139 }
140}
141
142
143///////////////////////////////////////////////////////////////////////
144/// Is the coordinate on THIS node ?
145///////////////////////////////////////////////////////////////////////
146
148 int d;
149
150 for (int dir = 0; dir < NDIM; dir++) {
151 d = loc[dir] - mynode.min[dir];
152 if (d < 0 || d >= mynode.size[dir])
153 return false;
154 }
155 return true;
156}
157
158///////////////////////////////////////////////////////////////////////
159/// give site index for ON NODE sites
160/// Note: loc really has to be on this node
161///////////////////////////////////////////////////////////////////////
162
163#ifndef SUBNODE_LAYOUT
164
166
167 unsigned i = loc[NDIM - 1] - mynode.min[NDIM - 1];
168 int s = loc[NDIM - 1];
169 for (int dir = NDIM - 2; dir >= 0; dir--) {
170 int l = loc[dir] - mynode.min[dir];
171 i = i * mynode.size[dir] + l;
172 s += loc[dir];
173 }
174
175 // now i contains the `running index' for site
176#if defined(EVEN_SITES_FIRST)
177 if (s % 2 == 0)
178 return (i / 2); /* even site index */
179 else
180 return (i / 2 + mynode.evensites); /* odd site */
181#else
182 return (i);
183#endif
184}
185
186///////////////////////////////////////////////////////////////////////
187/// give site index for nodeid sites
188/// compare to above
189///////////////////////////////////////////////////////////////////////
190
191unsigned lattice_struct::site_index(const CoordinateVector &loc, const unsigned nodeid) const {
192 const node_info ni = nodes.nodeinfo(nodeid);
193
194 unsigned i = loc[NDIM - 1] - ni.min[NDIM - 1];
195 int s = loc[NDIM - 1];
196 for (int dir = NDIM - 2; dir >= 0; dir--) {
197 int l = loc[dir] - ni.min[dir];
198 i = i * ni.size[dir] + l;
199 s += loc[dir];
200 }
201
202 // now i contains the `running index' for site
203#if defined(EVEN_SITES_FIRST)
204 if (s % 2 == 0)
205 return (i / 2); /* even site index */
206 else
207 return (i / 2 + ni.evensites); /* odd site */
208#else
209 return (i);
210#endif
211}
212
213#else // now SUBNODE_LAYOUT
214
215///////////////////////////////////////////////////////////////////////
216/// The (AVX) vectorized version of the site_index function.
217/// Now there are two stages: an "inner" vect index, which goes over
218/// over "virtual nodes", and the "outer" index inside the virtual node.
219/// This is to enable the (float/32bit) and the (double/64bit) vectorized
220/// structures, where the latter is achieve by merging two of the 32bit subnodes,
221/// along the Direction merged_subnodes_dir
222/// E.g.
223/// a 2-dim. 4x8 node is divided into 8 / 4 subnodes as follows:
224/// here 0-3 / 0-7 is the index within subnode, and a-h / a-d the subnode label.
225///
226/// 32bit(float) 64bit(double)
227/// 32bit storage: 64bit storage:
228/// 0a 1a | 0b 1b 0a 2a | 0b 2b 0(abcdefgh) 0(abcd) 1(abcd)
229/// 2a 3a | 2b 3b 4a 6a | 4b 6b 1(abcdefgh) 2(abcd) 3(abcd)
230/// ------------- 2(abcdefgh) 4(abcd) 5(abcd)
231/// 0e 1e | 0f 1f 1a 3a | 1b 3b 3(abcdefgh) 6(abcd) 7(abcd)
232/// 2e 3e | 2f 3f 5a 7a | 5b 7b
233/// ------------- ------------- 32bit vec 1st half <-> even 64bit
234/// 0c 1c | 0d 1d 0c 2c | 0d 2d 32bit 2nd half <-> odd 64bit
235/// 2c 3c | 2d 3d 4c 6c | 4d 6d
236/// -------------
237/// 0g 1g | 0h 1h 1c 3c | 1d 3d
238/// 2g 3g | 2h 3h 5c 7c | 5d 7d
239///
240/// The "storage" order above is the site-by-site order, enabling site
241/// traversal with maximum locality. It also enables mixed 32bit/64bit vector
242/// operations with half vectors.
243///
244/// Direction where the "doubling" is done is the last Direction where subnodes
245/// are divided. In layout, this will become the "slowest" Direction
246///
247///////////////////////////////////////////////////////////////////////
248
249unsigned lattice_struct::site_index(const CoordinateVector &loc) const {
250 return site_index(loc, hila::myrank());
251}
252
253///////////////////////////////////////////////////////////////////////
254/// give site index for nodeid sites
255/// compare to above
256///////////////////////////////////////////////////////////////////////
257
258unsigned lattice_struct::site_index(const CoordinateVector &loc, const unsigned nodeid) const {
259 int dir, l, s, subl;
260 unsigned i;
261
262 assert(nodeid < nodes.number);
263 const node_info ni = nodes.nodeinfo(nodeid);
264
265 // let's mod the coordinate to partition
266 // get subnode size - divisons are the same in all nodes
267
268 // foralldir(d) assert( ni.size[d] % mynode.subnodes.divisions[d] == 0);
269
270 CoordinateVector subsize;
271 subsize.asArray() = ni.size.asArray() / mynode.subnodes.divisions.asArray();
272
273 dir = mynode.subnodes.merged_subnodes_dir;
274 l = loc[dir] - ni.min[dir];
275 subl = l / subsize[dir];
276 // flip to interleaved ordering, see above -- this should work
277 // automatically w. mapping between 32 and 64 bit vector elems
278 subl = subl / 2 + (subl % 2) * (mynode.subnodes.divisions[dir] / 2);
279
280 i = 0;
281 s = 0;
282
283 for (dir = NDIM - 1; dir >= 0; dir--) {
284 l = loc[dir] - ni.min[dir];
285 if (dir != mynode.subnodes.merged_subnodes_dir) {
286 subl = subl * mynode.subnodes.divisions[dir] + l / subsize[dir];
287 }
288 i = i * subsize[dir] + l % subsize[dir];
289 s += loc[dir];
290 }
291
292#if defined(EVEN_SITES_FIRST)
293 if (s % 2 == 0)
294 i = i / 2; /* even site index */
295 else
296 i = i / 2 + ni.evensites / number_of_subnodes; /* odd site */
297#endif
298
299 return (subl + number_of_subnodes * i);
300}
301
302#endif // SUBNODE_LAYOUT
303
304
305/////////////////////////////////////////////////////////////////////
306/// Define nodeinfo(nodeid)
307/// Gives the coordinate range managed by the node number nodeid
308/////////////////////////////////////////////////////////////////////
309
311 // get the logical node id
312 assert(nodeid < number);
313 nodeid = inverse_remap(nodeid);
314
315 int64_t nodevol = 1;
316 node_info ninfo;
317
318 foralldir(d) {
319 int nodecoord = nodeid % n_divisions[d];
320 nodeid = nodeid / n_divisions[d];
321
322 ninfo.min[d] = divisors[d].at(nodecoord);
323 ninfo.size[d] = divisors[d].at(nodecoord + 1) - ninfo.min[d];
324 nodevol *= ninfo.size[d];
325 }
326
327 if (nodevol >= (1ULL << 32)) {
328 // node size is larger than 2^32-1 - does not fit to largest unsigned int!
329 report_too_large_node();
330 }
331
332 if (nodevol % 2 == 0) {
333 ninfo.evensites = ninfo.oddsites = nodevol / 2;
334 } else {
335 if (ninfo.min.parity() == EVEN) {
336 ninfo.evensites = nodevol / 2 + 1;
337 ninfo.oddsites = nodevol / 2;
338 } else {
339 ninfo.evensites = nodevol / 2;
340 ninfo.oddsites = nodevol / 2 + 1;
341 }
342 }
343
344 return ninfo;
345}
346
347
348/////////////////////////////////////////////////////////////////////
349/// only thing setup_nodes now does is to call mynode.setup and
350/// set the max size field of nodes-struct
351/////////////////////////////////////////////////////////////////////
352
354
355 nodes.number = hila::number_of_nodes();
356 // Loop over all node "origins"
357
358 // n keeps track of the node "root coordinates"
359 CoordinateVector n(0);
360
361 nodes.max_size = 0;
362
363 // use nodes.divisors - vectors to fill in stuff
364 for (int i = 0; i < nodes.number; i++) {
365 auto ni = nodes.nodeinfo(i);
366 foralldir(d) {
367 if (ni.size[d] > nodes.max_size[d])
368 nodes.max_size[d] = ni.size[d];
369 }
370
371 // use the opportunity to set up mynode when it is met
372 if (i == hila::myrank())
373 mynode.setup(ni, *this);
374 }
375}
376
377////////////////////////////////////////////////////////////////////////
378/// Fill in mynode fields -- node_rank() must be set up OK
379////////////////////////////////////////////////////////////////////////
381
382 rank = hila::myrank();
383
384 min = ni.min;
385 size = ni.size;
386
387 evensites = ni.evensites;
388 oddsites = ni.oddsites;
389 volume = ni.evensites + ni.oddsites;
390
391 first_site_even = (min.parity() == EVEN);
392 parent = &lattice;
393
394 // map site indexes to locations -- coordinates array
395 // after the above site_index should work
396
397#ifdef EVEN_SITES_FIRST
398 coordinates.resize(volume);
399 CoordinateVector l = min;
400 for (unsigned i = 0; i < volume; i++) {
401 coordinates[lattice.site_index(l)] = l;
402 // walk through the coordinates
403 foralldir(d) {
404 if (++l[d] < (min[d] + size[d]))
405 break;
406 l[d] = min[d];
407 }
408 }
409
410#endif
411
412 // set up the auxiliary site_factor array
413 unsigned v = 1;
414 foralldir(d) {
415 size_factor[d] = v; // = size[d-1] * size[d-2] * ..
416 v *= size[d];
417 }
418
419#ifdef SUBNODE_LAYOUT
420
421 // set up the subnodes
422 subnodes.setup(*this);
423#endif
424}
425
426#ifdef SUBNODE_LAYOUT
427
428////////////////////////////////////////////////////////////////////////
429/// Fill in subnodes -struct
430////////////////////////////////////////////////////////////////////////
431void lattice_struct::node_struct::subnode_struct::setup(const node_struct &tn) {
432 size.asArray() = tn.size.asArray() / divisions.asArray();
433 evensites = tn.evensites / number_of_subnodes;
434 oddsites = tn.oddsites / number_of_subnodes;
435 sites = evensites + oddsites;
436}
437
438#endif
439
440bool lattice_struct::is_this_odd_boundary(Direction d) const {
441 // return false unless lattice size is odd to direction d
442 // and mynode is on the boundary of the lattice
443
444 return (lattice.size(d) % 2 > 0 && lattice->mynode.is_on_edge(d));
445}
446
447/////////////////////////////////////////////////////////////////////
448/// Create the neighbour index arrays
449/// This is for the index array neighbours
450/// TODO: implement some other neighbour schemas!
451/////////////////////////////////////////////////////////////////////
452
453// unsigned lattice_struct::count_off_node_sites(const CoordinateVector &offset) {
454
455// CoordinateVector ln, l;
456// unsigned count;
457
458// for (int i = 0; i < mynode.volume; i++) {
459// l = coordinates(i);
460// // set ln to be the neighbour of the site
461// // TODO: FIXED BOUNDARY CONDITIONS DO NOT WRAP
462// ln = (l + offset).mod(size());
463
464// if (!is_on_mynode(ln))
465// count++;
466// }
467// return count;
468// }
469
470
472
473 // allocate neighbour arrays - TODO: these should
474 // be allocated on "device" memory too!
475
476 for (int d = 0; d < NDIRS; d++) {
477 neighb[d] = (unsigned *)memalloc(((size_t)mynode.volume) * sizeof(unsigned));
478 }
479
480 size_t c_offset = mynode.volume; // current offset in field-arrays
481
482 // We set the communication and the neigbour-array here
483 int too_large_node = 0;
484
485 for (Direction d = e_x; d < NDIRS; ++d) {
486
487 nn_comminfo[d].index = neighb[d]; // this is not really used for nn gathers
488
489 comm_node_struct &from_node = nn_comminfo[d].from_node;
490 // we can do the opposite send during another pass of the sites.
491 // This is just the gather inverted
492 // NOTE: this is not the send to Direction d, but to -d!
493 comm_node_struct &to_node = nn_comminfo[-d].to_node;
494
495 from_node.init();
496 to_node.init();
497
498 // if there are no communications the rank is left as is
499
500 // first pass through the sites
501 // - set the neighb array, anything to communicate?
502
503 for (size_t i = 0; i < mynode.volume; i++) {
504 CoordinateVector ln, l;
505 l = coordinates(i);
506 // set ln to be the neighbour of the site
507 ln = (l + d).mod(l_size);
508
509 if (is_on_mynode(ln)) {
510 neighb[d][i] = site_index(ln);
511 } else {
512 // short-circuit neighbour array, for later handling
513 neighb[d][i] = mynode.volume;
514
515 // Now site is off-node, this leads to gathering
516 if (from_node.rank == mynode.rank) {
517 from_node.rank = to_node.rank = node_rank(ln);
518 }
519
520 if (l.parity() == EVEN)
521 from_node.evensites++;
522 else
523 from_node.oddsites++;
524
525 // evensites / oddsites classified by the parity of receiving site
526 if (ln.parity() == EVEN)
527 to_node.evensites++;
528 else
529 to_node.oddsites++;
530 }
531 }
532
533 // store here the buffer index
534 from_node.buffer = c_offset;
535
536 from_node.sites = from_node.evensites + from_node.oddsites;
537 to_node.sites = to_node.evensites + to_node.oddsites;
538
539 assert(from_node.sites == to_node.sites);
540
541 if (to_node.sites > 0) {
542
543 // sitelist tells us which sites to send
544 to_node.sitelist = (unsigned *)memalloc(to_node.sites * sizeof(unsigned));
545
546 // set the remaining neighbour array indices and sitelists in another go
547 // over sites. temp counters NOTE: ordering is automatically right: with a
548 // given parity, neighbour node indices come in ascending order of host node
549 // index - no sorting needed
550 size_t to_even = 0, to_odd = 0, from_even = 0, from_odd = 0;
551
552 for (size_t i = 0; i < mynode.volume; i++) {
553 if (neighb[d][i] == mynode.volume) {
554 CoordinateVector l, ln;
555 l = coordinates(i);
556
557 if (l.parity() == EVEN) {
558 neighb[d][i] = c_offset + from_even;
559 ++from_even;
560
561 } else {
562 neighb[d][i] = c_offset + from_node.evensites + from_odd;
563 ++from_odd;
564 }
565
566 ln = (l + d).mod(l_size);
567 if (ln.parity() == EVEN) {
568 to_node.sitelist[to_even++] = i;
569 } else {
570 to_node.sitelist[(to_odd++) + to_node.evensites] = i;
571 }
572 }
573 }
574
575#if defined(CUDA) || defined(HIP)
576 auto p = copy_array_to_gpu(to_node.sitelist, to_node.sites);
577 free(to_node.sitelist);
578 to_node.sitelist = p;
579#endif
580 }
581
582 // and advance offset for next direction
583 c_offset += from_node.sites;
584
585 if (c_offset >= (1ULL << 32))
586 too_large_node = 1;
587
588 } /* directions */
589
590 /* Finally, set the site to the final offset (better be right!) */
591 mynode.field_alloc_size = c_offset;
592
593 if (hila::reduce_node_sum(too_large_node) > 0) {
594 report_too_large_node();
595 }
596}
597
598
599/************************************************************************/
600
601/* this formats the wait_array, used by forallsites_waitA()site_neighbour
602 * wait_array[i] contains a bit at position 1<<dir if nb(dir,i) is out
603 * of lattice.
604 * Site OK if ((wait_arr ^ xor_mask ) & and_mask) == 0
605 */
606
607static_assert(NDIM <= 4 && "Dimensions at most 4 in dir_mask_t = unsigned char! Use "
608 "larger type to circumvent");
609
610void lattice_struct::initialize_wait_arrays() {
611
612#if !(defined(CUDA) || defined(HIP))
613
614 /* Allocate here the mask array needed for forallsites_wait
615 * This will contain a bit at location dir if the neighbour
616 * at that dir is out of the local volume
617 */
618
619 wait_arr_ = (dir_mask_t *)memalloc(mynode.volume * sizeof(unsigned char));
620
621 for (size_t i = 0; i < mynode.volume; i++) {
622 wait_arr_[i] = 0; /* basic, no wait */
623 foralldir(dir) {
624 Direction odir = -dir;
625 if (neighb[dir][i] >= mynode.volume)
626 wait_arr_[i] = wait_arr_[i] | (1 << dir);
627 if (neighb[odir][i] >= mynode.volume)
628 wait_arr_[i] = wait_arr_[i] | (1 << odir);
629 }
630 }
631#endif
632}
633
634
635#ifdef SPECIAL_BOUNDARY_CONDITIONS
636
637/////////////////////////////////////////////////////////////////////
638/// set up special boundary
639/// sets up the bool array which tells if special neighbour indices
640/// are needed. Note that this is not uniform across the nodes,
641/// not all nodes have them.
642/////////////////////////////////////////////////////////////////////
643
644void lattice_struct::init_special_boundaries() {
645 for (Direction d = (Direction)0; d < NDIRS; ++d) {
646
647 // default values, nothing interesting happens
648 special_boundaries[d].n_even = special_boundaries[d].n_odd = special_boundaries[d].n_total =
649 0;
650 special_boundaries[d].is_needed = false;
651
652 Direction od = -d;
653 int coord = -1;
654 // do we get up/down boundary?
655 if (is_up_dir(d) && mynode.min[d] + mynode.size[d] == l_size[d])
656 coord = l_size[d] - 1;
657 if (is_up_dir(od) && mynode.min[od] == 0)
658 coord = 0;
659
660 if (coord >= 0) {
661 // now we got it
662
663 if (nodes.n_divisions[abs(d)] == 1) {
664 special_boundaries[d].is_needed = true;
665 special_boundaries[d].offset = mynode.field_alloc_size;
666
667 for (unsigned i = 0; i < mynode.volume; i++)
668 if (coordinate(i, abs(d)) == coord) {
669 // set buffer indices
670 special_boundaries[d].n_total++;
671 if (site_parity(i) == EVEN)
672 special_boundaries[d].n_even++;
673 else
674 special_boundaries[d].n_odd++;
675 }
676 mynode.field_alloc_size += special_boundaries[d].n_total;
677 }
678 }
679
680 // hila::out << "Node " << hila::myrank() << " dir " << d << " min " <<
681 // mynode.min << " is_on_edge "
682 // << lattice->mynode.is_on_edge(d) << '\n';
683
684 // allocate neighbours only on 1st use, otherwise unneeded
685 special_boundaries[d].neighbours = nullptr;
686 }
687
688 int toolarge = 0;
689 if (mynode.field_alloc_size >= (1ULL << 32))
690 toolarge = 1;
691 if (hila::reduce_node_sum(toolarge) > 0) {
692 report_too_large_node();
693 }
694}
695
696/////////////////////////////////////////////////////////////////////
697/// give the neighbour array pointer. Allocate if needed
698
699const unsigned *lattice_struct::get_neighbour_array(Direction d, hila::bc bc) {
700
701#ifndef SPECIAL_BOUNDARY_CONDITIONS
702 assert(bc == hila::bc::PERIODIC &&
703 "non-periodic BC only if SPECIAL_BOUNDARY_CONDITIONS defined");
704 return neighb[d];
705#else
706
707 // regular bc exit, should happen almost always
708 if (special_boundaries[d].is_needed == false || bc == hila::bc::PERIODIC)
709 return neighb[d];
710
711 if (special_boundaries[d].neighbours == nullptr) {
712 setup_special_boundary_array(d);
713 }
714 return special_boundaries[d].neighbours;
715
716#endif
717}
718
719//////////////////////////////////////////////////////////////////////
720/// and set up the boundary to one Direction
721//////////////////////////////////////////////////////////////////////
722
723void lattice_struct::setup_special_boundary_array(Direction d) {
724 // if it is not needed or already done...
725 if (special_boundaries[d].is_needed == false || special_boundaries[d].neighbours != nullptr)
726 return;
727
728 // now allocate neighbour array and the gathering array
729 special_boundaries[d].neighbours = (unsigned *)memalloc(sizeof(unsigned) * mynode.volume);
730 special_boundaries[d].move_index =
731 (unsigned *)memalloc(sizeof(unsigned) * special_boundaries[d].n_total);
732
733 int coord;
734 int offs = special_boundaries[d].offset;
735 if (is_up_dir(d))
736 coord = l_size[d] - 1;
737 else
738 coord = 0;
739
740 int k = 0;
741 for (int i = 0; i < mynode.volume; i++) {
742 if (coordinate(i, abs(d)) != coord) {
743 special_boundaries[d].neighbours[i] = neighb[d][i];
744 } else {
745 special_boundaries[d].neighbours[i] = offs++;
746 special_boundaries[d].move_index[k++] = neighb[d][i];
747 }
748 }
749
750 assert(k == special_boundaries[d].n_total);
751}
752
753#endif // SPECIAL_BOUNDARY_CONDITIONS
754
755/**
756 * @internal set lattice global variables, useful for GPUs
757 */
758
759void lattice_struct::set_lattice_globals() const {
760#if defined(CUDA) || defined(HIP)
761 this->backend_lattice->set_device_globals(*this);
762#endif
763}
764
765
766/**
767 * @internal implementation of lattice.can_block()
768 */
769
770bool lattice_struct::can_block_by_factor(const CoordinateVector &blocking_factor) const {
771
772#ifdef SUBNODE_LAYOUT
773 return false; // blocking not implemented for vector layout!
774#else
775
776 bool ok = true;
777 foralldir(d) {
778 if (blocking_factor[d] <= 0) {
779 hila::out0 << "ERROR: invalid blocking factor " << blocking_factor[d] << '\n';
781 }
782 ok = ok && (l_size[d] % blocking_factor[d] == 0);
783 }
784 return ok;
785
786#endif
787}
788
789/**
790 * @internal implementation of lattice.block()
791 */
792
793lattice_struct *lattice_struct::block_by_factor(const CoordinateVector &blocking_factor) {
794 if (!can_block_by_factor(blocking_factor)) {
795 hila::out0 << "Cannot block lattice with factor " << blocking_factor << '\n';
797 }
798
799 CoordinateVector blockvol;
800 foralldir(d) blockvol[d] = l_size[d] / blocking_factor[d];
801
803
804 for (auto *l : defined_lattices) {
805 if (l->l_size == blockvol) {
806 lattice.switch_to(l);
807 return l;
808 }
809 }
810
811 // Now did not find a lattice, make one
812
813 int i = defined_lattices.size(); // label for new lattice
814
815 auto *lp = new lattice_struct;
816
817 defined_lattices.push_back(lp);
818 lp->setup_blocked_lattice(blockvol, i, *this);
819
820 return lp;
821}
822
823/**
824 * @internal create blocked lattice
825 */
826
827void lattice_struct::setup_blocked_lattice(const CoordinateVector &siz, int label,
828 lattice_struct &orig) {
829
830 lattice.set_lattice_pointer(this);
831
832 l_label = label;
833
834 l_volume = 1;
835 foralldir(i) {
836 l_size[i] = siz[i];
837 l_volume *= siz[i];
838 }
839
840 parent = &orig;
841
842 mpi_comm_lat = orig.mpi_comm_lat;
843
844 // set the layout by hand from orig lattice
845 nodes.n_divisions = orig.nodes.n_divisions;
846 nodes.number = orig.nodes.number;
848
849 nodes.map_array = orig.nodes.map_array;
850 nodes.map_inverse = orig.nodes.map_inverse;
851
852 setup_nodes();
854
855 initialize_wait_arrays();
856
857#ifdef SPECIAL_BOUNDARY_CONDITIONS
858 // do this after std. boundary is done
859 init_special_boundaries();
860#endif
861
862 // Alignment: set field_alloc_size to be divisible by 256
863 if (mynode.field_alloc_size % 256 > 0)
864 mynode.field_alloc_size += 256 - mynode.field_alloc_size % 256;
865
866#ifndef VANILLA
867 /* Setup backend-specific lattice info if necessary */
868 backend_lattice = new backend_lattice_struct;
869 backend_lattice->setup(*this);
870#endif
871
872 test_std_gathers();
873}
874
875
876/////////////////////////////////////////////////////////////////////
877/// Create the neighbour index arrays
878/// This is for the index array neighbours
879/// TODO: implement some other neighbour schemas!
880/////////////////////////////////////////////////////////////////////
881
882#if 0
883
884/// This is a helper routine, returning a vector of comm_node_structs for all nodes
885/// involved with communication.
886/// If receive == true, this is "receive" end and index will be filled.
887/// For receive == false the is "send" half is done.
888
889std::vector<lattice_struct::comm_node_struct>
890lattice_struct::create_comm_node_vector(CoordinateVector offset, unsigned *index,
891 bool receive) {
892
893 // for send flip the offset
894 if (!receive)
895 offset = -offset;
896
897 // temp work array: np = node points
898 std::vector<unsigned> np_even(nodes.number); // std::vector initializes to zero
899 std::vector<unsigned> np_odd(nodes.number);
900
901 // we'll go through the sites twice, in order to first resolve the size of the
902 // needed buffers, then to fill them. Trying to avoid memory fragmentation a bit
903
904 // pass over sites
905 int num = 0; // number of sites off node
906 for (unsigned i = 0; i < mynode.volume; i++) {
907 CoordinateVector ln, l;
908 l = coordinates(i);
909 ln = (l + offset).mod(size());
910
911 if (is_on_mynode(ln)) {
912 if (receive)
913 index[i] = site_index(ln);
914 } else {
915 // Now site is off-node, this will leads to gathering
916 // use ci.index to store the node rank
917 unsigned r = node_rank(ln);
918
919 if (receive) {
920 index[i] = mynode.volume + r;
921
922 // using parity of THIS
923 if (l.parity() == EVEN)
924 np_even[r]++;
925 else
926 np_odd[r]++;
927
928 } else {
929 // on the sending side - we use parity of target
930 if (ln.parity() == EVEN)
931 np_even[r]++;
932 else
933 np_odd[r]++;
934 }
935
936 num++;
937 }
938 }
939
940 // count the number of nodes taking part
941 unsigned nnodes = 0;
942 for (int r = 0; r < nodes.number; r++) {
943 if (np_even[r] > 0 || np_odd[r] > 0)
944 nnodes++;
945 }
946
947 // allocate the vector
948 std::vector<comm_node_struct> node_v(nnodes);
949
950 int n = 0;
951 int c_buffer = 0;
952 for (int r = 0; r < nodes.number; r++) {
953 if (np_even[r] > 0 || np_odd[r] > 0) {
954 // add the rank
955 node_v[n].rank = r;
956 node_v[n].evensites = np_even[r];
957 node_v[n].oddsites = np_odd[r];
958 node_v[n].sites = np_even[r] + np_odd[r];
959
960 // pre-allocate the sitelist for sufficient size
961 if (!receive)
962 node_v[n].sitelist =
963 (unsigned *)memalloc(node_v[n].sites * sizeof(unsigned));
964
965 node_v[n].buffer =
966 c_buffer; // running idx to comm buffer - used from receive
967 c_buffer += node_v[n].sites;
968 n++;
969 }
970 }
971
972 // ci.receive_buf_size = c_buffer; // total buf size
973
974 // we'll reuse np_even and np_odd as counting arrays below
975 for (int i = 0; i < nnodes; i++)
976 np_even[i] = np_odd[i] = 0;
977
978 if (!receive) {
979 // sending end -- create sitelists
980
981 for (unsigned i = 0; i < mynode.volume; i++) {
982 CoordinateVector ln, l;
983 l = coordinates(i);
984 ln = (l + offset).mod(size());
985
986 if (!is_on_mynode(ln)) {
987 unsigned r = node_rank(ln);
988 int n = 0;
989 // find the node from the list
990 while (node_v[n].rank != r)
991 n++;
992 // we'll fill the buffers according to the parity of receieving node
993 // first even, then odd sites in the buffer
994 unsigned k;
995 if (ln.parity() == EVEN)
996 k = np_even[n]++;
997 else
998 k = node_v[n].evensites + np_odd[n]++;
999
1000 // and set the ptr to the site to be communicated
1001 node_v[n].sitelist[k] = i;
1002 }
1003 }
1004
1005 } else {
1006 // receive end
1007 // fill in the index pointers
1008
1009 for (unsigned i = 0; i < mynode.volume; i++) {
1010 if (index[i] >= mynode.volume) {
1011 int r = index[i] - mynode.volume;
1012 int n = 0;
1013 // find the node which sends this
1014 while (node_v[n].rank != r)
1015 n++;
1016
1017 CoordinateVector l = coordinates(i);
1018 if (l.parity() == EVEN)
1019 index[i] = node_v[n].buffer + (np_even[n]++);
1020 else
1021 index[i] = node_v[n].buffer + node_v[n].evensites + (np_odd[n]++);
1022 }
1023 }
1024 }
1025
1026 return node_v;
1027}
1028
1030lattice_struct::create_general_gather(const CoordinateVector &offset) {
1031 // allocate neighbour arrays - TODO: these should
1032 // be allocated on "device" memory too!
1033
1034 gen_comminfo_struct ci;
1035
1036 // communication buffer
1037 ci.index = (unsigned *)memalloc(mynode.volume * sizeof(unsigned));
1038
1039 ci.from_node =
1040 create_comm_node_vector(offset, ci.index, true); // create receive end
1041 ci.to_node = create_comm_node_vector(offset, nullptr, false); // create sending end
1042
1043 // set the total receive buffer size from the last vector
1044 const comm_node_struct &r = ci.from_node[ci.from_node.size() - 1];
1045 ci.receive_buf_size = r.buffer + r.sites;
1046
1047 return ci;
1048}
1049
1050#endif
main interface class to lattices.
Definition lattice.h:396
int size(Direction d) const
lattice.size() -> CoordinateVector or lattice.size(d) -> int returns the dimensions of the lattice,...
Definition lattice.h:433
Lattice switch_to(lattice_struct *lat)
With a valid lattice_struct pointer, switch this lattice to be active.
Definition lattice.h:503
const Array< n, m, T > & asArray() const
Cast Matrix to Array.
Definition matrix.h:476
void setup_node_divisors()
Definition lattice.cpp:123
void setup_base_lattice(const CoordinateVector &siz)
General lattice setup.
Definition lattice.cpp:35
dir_mask_t *__restrict__ wait_arr_
implement waiting using mask_t - unsigned char is good for up to 4 dim.
Definition lattice.h:216
void setup_nodes()
Definition lattice.cpp:353
bool is_on_mynode(const CoordinateVector &c) const
Is the coordinate on THIS node ?
Definition lattice.cpp:147
unsigned *__restrict__ neighb[NDIRS]
Main neighbour index array.
Definition lattice.h:213
void create_std_gathers()
Definition lattice.cpp:471
std::array< nn_comminfo_struct, NDIRS > nn_comminfo
nearest neighbour comminfo struct
Definition lattice.h:210
unsigned site_index(const CoordinateVector &c) const
Definition lattice.cpp:165
int node_rank(const CoordinateVector &c) const
Definition lattice.cpp:102
T abs(const Complex< T > &a)
Return absolute value of Complex number.
Definition cmplx.h:1266
constexpr Parity EVEN
bit pattern: 001
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:80
constexpr unsigned NDIRS
Number of directions.
Definition coordinates.h:57
unsigned char dir_mask_t
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
This file defines all includes for HILA.
This files containts definitions for the Field class and the classes required to define it such as fi...
Implement hila::swap for gauge fields.
Definition array.h:982
int myrank()
rank of this node
Definition com_mpi.cpp:237
int number_of_nodes()
how many nodes there are
Definition com_mpi.cpp:248
void synchronize()
synchronize mpi + gpu
Definition com_mpi.cpp:257
void reduce_node_sum(T *value, int send_count, bool allreduce=true)
Reduce an array across nodes.
Definition com_mpi.h:337
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
bc
list of field boundary conditions - used only if SPECIAL_BOUNDARY_CONDITIONS defined
Definition lattice.h:31
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
void terminate(int status)
Helper class for loading the vectorized lattice.
void setup(lattice_struct *lattice)
node_info nodeinfo(int i) const
Definition lattice.cpp:310
int remap(int i) const
And the call interface for remapping.
Information necessary to communicate with a node.
Definition lattice.h:137
general communication
Definition lattice.h:202
Information about the node stored on this process.
Definition lattice.h:80
void setup(node_info &ni, lattice_struct &lattice)
Fill in mynode fields – node_rank() must be set up OK.
Definition lattice.cpp:380
bool is_on_edge(Direction d) const
true if this node is on the edge of the lattice to dir d
Definition lattice.h:110
useful information about a node
Definition lattice.h:48