HILA
Loading...
Searching...
No Matches
lattice.cpp
1
2#include "plumbing/defs.h"
3#include "plumbing/lattice.h"
4#include "plumbing/field.h"
5
6// Reporting on possibly too large node: stop if
7// node size (with buffers) is larger than 2^31 - too close for comfort!
8
9void report_too_large_node() {
10 if (hila::myrank() == 0) {
11 hila::out << "Node size too large: size = " << lattice.mynode.size[0];
12 for (int d = 1; d < NDIM; d++)
13 hila::out << " x " << lattice.mynode.size[d];
14 hila::out << " + communication buffers = " << lattice.mynode.field_alloc_size;
15 hila::out << "\nConsider using more nodes (smaller node size).\n";
16 hila::out << "[TODO: allow 64bit index?]\n";
17 }
19}
20
21// HACK: force disable vectorization in a loop using
22// if(disable_avx[X]==0){};
23// Field<double> disable_avx;
24
25// Define the global lattice var
26lattice_struct lattice;
27/// A list of all defined lattices (for the future expansion)
28std::vector<lattice_struct *> lattices;
29
30static int lattice_count = 0;
31
32/// General lattice setup
34
35 l_label = lattice_count++;
36
37 // Add this lattice to the list
38 lattices.push_back(this);
39
40 l_volume = 1;
41 foralldir(i) {
42 l_size[i] = siz[i];
43 l_volume *= siz[i];
44 }
45
46 setup_layout();
47
49
50 // set up the comm arrays
52
53 // Initialize wait_array structures - has to be after std gathers()
54 initialize_wait_arrays();
55
56#ifdef SPECIAL_BOUNDARY_CONDITIONS
57 // do this after std. boundary is done
58 init_special_boundaries();
59#endif
60
61 // Alignment: set field_alloc_size to be divisible by 256
62 if (mynode.field_alloc_size % 256 > 0)
63 mynode.field_alloc_size += 256 - mynode.field_alloc_size % 256;
64
65#ifndef VANILLA
66 /* Setup backend-specific lattice info if necessary */
67 backend_lattice = new backend_lattice_struct;
68 backend_lattice->setup(*this);
69#endif
70
71 if (hila::check_input) {
72 hila::out << "***** Input check done *****\n";
74 }
75
76 test_std_gathers();
77
78 // disable_avx = 0;
79}
80
81///////////////////////////////////////////////////////////////////////
82/// The routines is_on_mynode(), node_rank(), site_index()
83/// implement the "layout" of the nodes and sites of the lattice.
84/// To be changed in different implementations!
85///////////////////////////////////////////////////////////////////////
86
87///////////////////////////////////////////////////////////////////////
88/// Get the node rank for coordinates
89/// This is the fundamental routine which defines how the nodes
90/// are mapped. map_node_layout MUST BE compatible with this
91/// algorithm! So if you change this, change that too
92///
93/// Here the node number along one Direction is calculated with
94/// loc * n_divisions/l_size (with integer division)
95/// example: with l_size=14 and n_divisions=3,
96/// the dividers are at 0, 5, 10, 14
97///
98///////////////////////////////////////////////////////////////////////
99
100int lattice_struct::node_rank(const CoordinateVector &loc) const {
101 int i;
102 int dir;
103
104 i = (loc[NDIM - 1] * nodes.n_divisions[NDIM - 1]) / l_size[NDIM - 1];
105 for (dir = NDIM - 2; dir >= 0; dir--) {
106 i = i * nodes.n_divisions[dir] + ((loc[dir] * nodes.n_divisions[dir]) / l_size[dir]);
107 }
108 /* do we want to remap this? YES PLEASE */
109 i = nodes.remap(i);
110
111 return (i);
112}
113
114///////////////////////////////////////////////////////////////////////
115/// Is the coordinate on THIS node
116///////////////////////////////////////////////////////////////////////
117
119 int d;
120
121 for (int dir = 0; dir < NDIM; dir++) {
122 d = loc[dir] - mynode.min[dir];
123 if (d < 0 || d >= mynode.size[dir])
124 return false;
125 }
126 return true;
127}
128
129///////////////////////////////////////////////////////////////////////
130/// give site index for ON NODE sites
131/// Note: loc really has to be on this node
132///////////////////////////////////////////////////////////////////////
133
134#ifndef SUBNODE_LAYOUT
135
137
138 unsigned i = loc[NDIM - 1] - mynode.min[NDIM - 1];
139 int s = loc[NDIM - 1];
140 for (int dir = NDIM - 2; dir >= 0; dir--) {
141 int l = loc[dir] - mynode.min[dir];
142 i = i * mynode.size[dir] + l;
143 s += loc[dir];
144 }
145
146 // now i contains the `running index' for site
147#if defined(EVEN_SITES_FIRST)
148 if (s % 2 == 0)
149 return (i / 2); /* even site index */
150 else
151 return (i / 2 + mynode.evensites); /* odd site */
152#else
153 return (i);
154#endif
155}
156
157///////////////////////////////////////////////////////////////////////
158/// give site index for nodeid sites
159/// compare to above
160///////////////////////////////////////////////////////////////////////
161
162unsigned lattice_struct::site_index(const CoordinateVector &loc, const unsigned nodeid) const {
163 const node_info &ni = nodes.nodelist[nodeid];
164
165 unsigned i = loc[NDIM - 1] - ni.min[NDIM - 1];
166 int s = loc[NDIM - 1];
167 for (int dir = NDIM - 2; dir >= 0; dir--) {
168 int l = loc[dir] - ni.min[dir];
169 i = i * ni.size[dir] + l;
170 s += loc[dir];
171 }
172
173 // now i contains the `running index' for site
174#if defined(EVEN_SITES_FIRST)
175 if (s % 2 == 0)
176 return (i / 2); /* even site index */
177 else
178 return (i / 2 + ni.evensites); /* odd site */
179#else
180 return (i);
181#endif
182}
183
184#else // now SUBNODE_LAYOUT
185
186///////////////////////////////////////////////////////////////////////
187/// The (AVX) vectorized version of the site_index function.
188/// Now there are two stages: an "inner" vect index, which goes over
189/// over "virtual nodes", and the "outer" index inside the virtual node.
190/// This is to enable the (float/32bit) and the (double/64bit) vectorized
191/// structures, where the latter is achieve by merging two of the 32bit subnodes,
192/// along the Direction merged_subnodes_dir
193/// E.g.
194/// a 2-dim. 4x8 node is divided into 8 / 4 subnodes as follows:
195/// here 0-3 / 0-7 is the index within subnode, and a-h / a-d the subnode label.
196///
197/// 32bit(float) 64bit(double)
198/// 32bit storage: 64bit storage:
199/// 0a 1a | 0b 1b 0a 2a | 0b 2b 0(abcdefgh) 0(abcd) 1(abcd)
200/// 2a 3a | 2b 3b 4a 6a | 4b 6b 1(abcdefgh) 2(abcd) 3(abcd)
201/// ------------- 2(abcdefgh) 4(abcd) 5(abcd)
202/// 0e 1e | 0f 1f 1a 3a | 1b 3b 3(abcdefgh) 6(abcd) 7(abcd)
203/// 2e 3e | 2f 3f 5a 7a | 5b 7b
204/// ------------- ------------- 32bit vec 1st half <-> even 64bit
205/// 0c 1c | 0d 1d 0c 2c | 0d 2d 32bit 2nd half <-> odd 64bit
206/// 2c 3c | 2d 3d 4c 6c | 4d 6d
207/// -------------
208/// 0g 1g | 0h 1h 1c 3c | 1d 3d
209/// 2g 3g | 2h 3h 5c 7c | 5d 7d
210///
211/// The "storage" order above is the site-by-site order, enabling site
212/// traversal with maximum locality. It also enables mixed 32bit/64bit vector
213/// operations with half vectors.
214///
215/// Direction where the "doubling" is done is the last Direction where subnodes
216/// are divided. In layout, this will become the "slowest" Direction
217///
218///////////////////////////////////////////////////////////////////////
219
220unsigned lattice_struct::site_index(const CoordinateVector &loc) const {
221 return site_index(loc, hila::myrank());
222}
223
224///////////////////////////////////////////////////////////////////////
225/// give site index for nodeid sites
226/// compare to above
227///////////////////////////////////////////////////////////////////////
228
229unsigned lattice_struct::site_index(const CoordinateVector &loc, const unsigned nodeid) const {
230 int dir, l, s, subl;
231 unsigned i;
232
233 assert(nodeid < nodes.number);
234 const node_info &ni = nodes.nodelist[nodeid];
235
236 // let's mod the coordinate to partition
237 // get subnode size - divisons are the same in all nodes
238
239 // foralldir(d) assert( ni.size[d] % mynode.subnodes.divisions[d] == 0);
240
241 CoordinateVector subsize;
242 subsize.asArray() = ni.size.asArray() / mynode.subnodes.divisions.asArray();
243
244 dir = mynode.subnodes.merged_subnodes_dir;
245 l = loc[dir] - ni.min[dir];
246 subl = l / subsize[dir];
247 // flip to interleaved ordering, see above -- this should work
248 // automatically w. mapping between 32 and 64 bit vector elems
249 subl = subl / 2 + (subl % 2) * (mynode.subnodes.divisions[dir] / 2);
250
251 i = 0;
252 s = 0;
253
254 for (dir = NDIM - 1; dir >= 0; dir--) {
255 l = loc[dir] - ni.min[dir];
256 if (dir != mynode.subnodes.merged_subnodes_dir) {
257 subl = subl * mynode.subnodes.divisions[dir] + l / subsize[dir];
258 }
259 i = i * subsize[dir] + l % subsize[dir];
260 s += loc[dir];
261 }
262
263#if defined(EVEN_SITES_FIRST)
264 if (s % 2 == 0)
265 i = i / 2; /* even site index */
266 else
267 i = i / 2 + ni.evensites / number_of_subnodes; /* odd site */
268#endif
269
270 return (subl + number_of_subnodes * i);
271}
272
273#endif // SUBNODE_LAYOUT
274
275///////////////////////////////////////////////////////////////////////
276/// invert the mynode index -> location (only on this node)
277///////////////////////////////////////////////////////////////////////
278
279// this is defined in lattice.h
280
281/////////////////////////////////////////////////////////////////////
282/// Set up the basic list about all nodes
283/// NOTE: in case of very large number of nodes this may be
284/// inconveniently large. Alternatives?
285/////////////////////////////////////////////////////////////////////
286
288
289 nodes.number = hila::number_of_nodes();
290 // Loop over all node "origins"
291 nodes.nodelist.resize(nodes.number);
292
293 // n keeps track of the node "root coordinates"
294 CoordinateVector n(0);
295
296 nodes.max_size = 0;
297
298 // use nodes.divisors - vectors to fill in stuff
299 for (int i = 0; i < nodes.number; i++) {
301 foralldir(d) l[d] = nodes.divisors[d][n[d]];
302
303 int nn = node_rank(l);
304 node_info &ni = nodes.nodelist[nn];
305 int64_t v = 1;
306 foralldir(d) {
307 ni.min[d] = nodes.divisors[d][n[d]];
308 ni.size[d] = nodes.divisors[d][n[d] + 1] - nodes.divisors[d][n[d]];
309 v *= ni.size[d];
310
311 if (ni.size[d] > nodes.max_size[d])
312 nodes.max_size[d] = ni.size[d];
313 }
314
315 if (v >= (1ULL << 32)) {
316 // node size is larger than 2^32-1 - does not fit to largest unsigned int!
317 report_too_large_node();
318 }
319
320 if (v % 2 == 0)
321 ni.evensites = ni.oddsites = v / 2;
322 else {
323 // now node ni has odd number of sites
324 if (l.parity() == EVEN) {
325 ni.evensites = v / 2 + 1;
326 ni.oddsites = v / 2;
327 } else {
328 ni.evensites = v / 2;
329 ni.oddsites = v / 2 + 1;
330 }
331 }
332
333 // now to the next divisor - add to the lowest dir until all done
334 foralldir(d) {
335 n[d]++;
336 if (n[d] < nodes.n_divisions[d])
337 break;
338 n[d] = 0;
339 }
340
341 // use the opportunity to set up mynode when it is met
342 if (nn == hila::myrank())
343 mynode.setup(ni, lattice);
344 }
345}
346
347////////////////////////////////////////////////////////////////////////
348/// Fill in mynode fields -- node_rank() must be set up OK
349////////////////////////////////////////////////////////////////////////
351
352 parent = &lattice;
353
354 rank = hila::myrank();
355
356 min = ni.min;
357 size = ni.size;
358
359 evensites = ni.evensites;
360 oddsites = ni.oddsites;
361 sites = ni.evensites + ni.oddsites;
362
363 first_site_even = (min.parity() == EVEN);
364
365 // neighbour node indices
366 foralldir(d) {
367 CoordinateVector l = min; // this is here on purpose
368 l[d] = (min[d] + size[d]) % lattice.l_size[d];
369 nn[d] = lattice.node_rank(l);
370 l[d] = (lattice.l_size[d] + min[d] - 1) % lattice.l_size[d];
371 nn[opp_dir(d)] = lattice.node_rank(l);
372 }
373
374 // map site indexes to locations -- coordinates array
375 // after the above site_index should work
376
377#ifdef EVEN_SITES_FIRST
378 coordinates.resize(sites);
379 CoordinateVector l = min;
380 for (unsigned i = 0; i < sites; i++) {
381 coordinates[lattice.site_index(l)] = l;
382 // walk through the coordinates
383 foralldir(d) {
384 if (++l[d] < (min[d] + size[d]))
385 break;
386 l[d] = min[d];
387 }
388 }
389
390#endif
391
392 // set up the auxiliary site_factor array
393 unsigned v = 1;
394 foralldir(d) {
395 size_factor[d] = v; // = size[d-1] * size[d-2] * ..
396 v *= size[d];
397 }
398
399#ifdef SUBNODE_LAYOUT
400
401 // set up the subnodes
402 subnodes.setup(*this);
403#endif
404}
405
406#ifdef SUBNODE_LAYOUT
407
408////////////////////////////////////////////////////////////////////////
409/// Fill in subnodes -struct
410////////////////////////////////////////////////////////////////////////
411void lattice_struct::node_struct::subnode_struct::setup(const node_struct &tn) {
412 size.asArray() = tn.size.asArray() / divisions.asArray();
413 evensites = tn.evensites / number_of_subnodes;
414 oddsites = tn.oddsites / number_of_subnodes;
415 sites = evensites + oddsites;
416}
417
418#endif
419
420/////////////////////////////////////////////////////////////////////
421/// Create the neighbour index arrays
422/// This is for the index array neighbours
423/// TODO: implement some other neighbour schemas!
424/////////////////////////////////////////////////////////////////////
425
427
428 // allocate neighbour arrays - TODO: these should
429 // be allocated on "device" memory too!
430
431 for (int d = 0; d < NDIRS; d++) {
432 neighb[d] = (unsigned *)memalloc(((size_t)mynode.sites) * sizeof(unsigned));
433 }
434
435 size_t c_offset = mynode.sites; // current offset in field-arrays
436
437 // We set the communication and the neigbour-array here
438 int too_large_node = 0;
439
440 for (Direction d = e_x; d < NDIRS; ++d) {
441
442 nn_comminfo[d].index = neighb[d]; // this is not really used for nn gathers
443
444 comm_node_struct &from_node = nn_comminfo[d].from_node;
445 // we can do the opposite send during another pass of the sites.
446 // This is just the gather inverted
447 // NOTE: this is not the send to Direction d, but to -d!
448 comm_node_struct &to_node = nn_comminfo[-d].to_node;
449
450 from_node.rank = to_node.rank = mynode.rank; // invalidate from_node, for time being
451 // if there are no communications the rank is left as is
452
453 // counters to zero
454 from_node.sites = from_node.evensites = from_node.oddsites = 0;
455
456 // pass over sites
457 size_t num = 0; // number of sites off node
458 for (int i = 0; i < mynode.sites; i++) {
459 CoordinateVector ln, l;
460 l = coordinates(i);
461 // set ln to be the neighbour of the site
462 // TODO: FIXED BOUNDARY CONDITIONS DO NOT WRAP
463 ln = (l + d).mod(size());
464 // ln = l;
465 // if (is_up_dir(d)) ln[d] = (l[d] + 1) % size(d);
466 // else ln[d] = (l[d] + size(-d) - 1) % size(-d);
467
468 if (is_on_mynode(ln)) {
469 neighb[d][i] = site_index(ln);
470 } else {
471 // reset neighb array temporarily, as a flag
472 neighb[d][i] = mynode.sites;
473
474 // Now site is off-node, this leads to gathering
475 // check that there's really only 1 node to talk with
476 unsigned rank = node_rank(ln);
477 if (from_node.rank == mynode.rank) {
478 from_node.rank = rank;
479 } else if (from_node.rank != rank) {
480 hila::out << "Internal error in nn-communication setup\n";
481 exit(1);
482 }
483
484 from_node.sites++;
485 if (l.parity() == EVEN)
486 from_node.evensites++;
487 else
488 from_node.oddsites++;
489
490 num++;
491 }
492 }
493
494 // and set buffer indices
495 from_node.buffer = c_offset;
496
497 to_node.rank = from_node.rank;
498 to_node.sites = from_node.sites;
499 // note! Parity is flipped, because parity is normalized to receieving node
500 to_node.evensites = from_node.oddsites;
501 to_node.oddsites = from_node.evensites;
502
503 if (num > 0) {
504 // sitelist tells us which sites to send
505 to_node.sitelist = (unsigned *)memalloc(to_node.sites * sizeof(unsigned));
506#ifndef VANILLA
507 // non-vanilla code MAY want to have receive buffers, so we need mapping to
508 // field
509 from_node.sitelist = (unsigned *)memalloc(from_node.sites * sizeof(unsigned));
510#endif
511 } else {
512 to_node.sitelist = nullptr;
513 }
514
515 if (num > 0) {
516 // set the remaining neighbour array indices and sitelists in another go
517 // over sites. temp counters NOTE: ordering is automatically right: with a
518 // given parity, neighbour node indices come in ascending order of host node
519 // index - no sorting needed
520 size_t c_even, c_odd;
521 c_even = c_odd = 0;
522
523 for (size_t i = 0; i < mynode.sites; i++) {
524 if (neighb[d][i] == mynode.sites) {
526 l = coordinates(i);
527
528 if (l.parity() == EVEN) {
529 // THIS site is even
530 neighb[d][i] = c_offset + c_even;
531 if (c_offset + c_even >= (1ULL << 32))
532 too_large_node = 1;
533
534#ifndef VANILLA
535 from_node.sitelist[c_even] = i;
536#endif
537
538 // flipped parity: this is for odd sends
539 to_node.sitelist[c_even + to_node.evensites] = i;
540
541 c_even++;
542
543 } else {
544 neighb[d][i] = c_offset + from_node.evensites + c_odd;
545 if (c_offset + from_node.evensites + c_odd >= (1ULL << 32))
546 too_large_node = 1;
547
548#ifndef VANILLA
549 from_node.sitelist[c_odd + from_node.evensites] = i;
550#endif
551
552 // again flipped parity for setup
553 to_node.sitelist[c_odd] = i;
554
555 c_odd++;
556 }
557 }
558 }
559 }
560
561 c_offset += from_node.sites;
562
563 if (c_offset >= (1ULL << 32))
564 too_large_node = 1;
565
566 } /* directions */
567
568 /* Finally, set the site to the final offset (better be right!) */
569 mynode.field_alloc_size = c_offset;
570
571 if (hila::reduce_node_sum(too_large_node) > 0) {
572 report_too_large_node();
573 }
574}
575
576
577/************************************************************************/
578
579/* this formats the wait_array, used by forallsites_waitA()site_neighbour
580 * wait_array[i] contains a bit at position 1<<dir if nb(dir,i) is out
581 * of lattice.
582 * Site OK if ((wait_arr ^ xor_mask ) & and_mask) == 0
583 */
584
585static_assert(NDIM <= 4 && "Dimensions at most 4 in dir_mask_t = unsigned char! Use "
586 "larger type to circumvent");
587
588void lattice_struct::initialize_wait_arrays() {
589 int i, dir;
590
591 /* Allocate here the mask array needed for forallsites_wait
592 * This will contain a bit at location dir if the neighbour
593 * at that dir is out of the local volume
594 */
595
596 wait_arr_ = (dir_mask_t *)memalloc(mynode.sites * sizeof(unsigned char));
597
598 for (size_t i = 0; i < mynode.sites; i++) {
599 wait_arr_[i] = 0; /* basic, no wait */
600 foralldir(dir) {
601 Direction odir = -dir;
602 if (neighb[dir][i] >= mynode.sites)
603 wait_arr_[i] = wait_arr_[i] | (1 << dir);
604 if (neighb[odir][i] >= mynode.sites)
605 wait_arr_[i] = wait_arr_[i] | (1 << odir);
606 }
607 }
608}
609
610
611#ifdef SPECIAL_BOUNDARY_CONDITIONS
612
613/////////////////////////////////////////////////////////////////////
614/// set up special boundary
615/// sets up the bool array which tells if special neighbour indices
616/// are needed. Note that this is not uniform across the nodes,
617/// not all nodes have them.
618/////////////////////////////////////////////////////////////////////
619
620void lattice_struct::init_special_boundaries() {
621 for (Direction d = (Direction)0; d < NDIRS; ++d) {
622
623 // default values, nothing interesting happens
624 special_boundaries[d].n_even = special_boundaries[d].n_odd = special_boundaries[d].n_total =
625 0;
626 special_boundaries[d].is_needed = false;
627
628 Direction od = -d;
629 int coord = -1;
630 // do we get up/down boundary?
631 if (is_up_dir(d) && mynode.min[d] + mynode.size[d] == size(d))
632 coord = size(d) - 1;
633 if (is_up_dir(od) && mynode.min[od] == 0)
634 coord = 0;
635
636 if (coord >= 0) {
637 // now we got it
638
639 if (nodes.n_divisions[abs(d)] == 1) {
640 special_boundaries[d].is_needed = true;
641 special_boundaries[d].offset = mynode.field_alloc_size;
642
643 for (unsigned i = 0; i < mynode.sites; i++)
644 if (coordinate(i, abs(d)) == coord) {
645 // set buffer indices
646 special_boundaries[d].n_total++;
647 if (site_parity(i) == EVEN)
648 special_boundaries[d].n_even++;
649 else
650 special_boundaries[d].n_odd++;
651 }
652 mynode.field_alloc_size += special_boundaries[d].n_total;
653 }
654 }
655
656 // hila::out << "Node " << hila::myrank() << " dir " << d << " min " <<
657 // mynode.min << " is_on_edge "
658 // << lattice.mynode.is_on_edge(d) << '\n';
659
660 // allocate neighbours only on 1st use, otherwise unneeded
661 special_boundaries[d].neighbours = nullptr;
662 }
663
664 int toolarge = 0;
665 if (mynode.field_alloc_size >= (1ULL << 32))
666 toolarge = 1;
667 if (hila::reduce_node_sum(toolarge) > 0) {
668 report_too_large_node();
669 }
670}
671
672/////////////////////////////////////////////////////////////////////
673/// give the neighbour array pointer. Allocate if needed
674
675const unsigned *lattice_struct::get_neighbour_array(Direction d, hila::bc bc) {
676
677#ifndef SPECIAL_BOUNDARY_CONDITIONS
678 assert(bc == hila::bc::PERIODIC &&
679 "non-periodic BC only if SPECIAL_BOUNDARY_CONDITIONS defined");
680 return neighb[d];
681#else
682
683 // regular bc exit, should happen almost always
684 if (special_boundaries[d].is_needed == false || bc == hila::bc::PERIODIC)
685 return neighb[d];
686
687 if (special_boundaries[d].neighbours == nullptr) {
688 setup_special_boundary_array(d);
689 }
690 return special_boundaries[d].neighbours;
691
692#endif
693}
694
695//////////////////////////////////////////////////////////////////////
696/// and set up the boundary to one Direction
697//////////////////////////////////////////////////////////////////////
698
699void lattice_struct::setup_special_boundary_array(Direction d) {
700 // if it is not needed or already done...
701 if (special_boundaries[d].is_needed == false || special_boundaries[d].neighbours != nullptr)
702 return;
703
704 // now allocate neighbour array and the gathering array
705 special_boundaries[d].neighbours = (unsigned *)memalloc(sizeof(unsigned) * mynode.sites);
706 special_boundaries[d].move_index =
707 (unsigned *)memalloc(sizeof(unsigned) * special_boundaries[d].n_total);
708
709 int coord;
710 int offs = special_boundaries[d].offset;
711 if (is_up_dir(d))
712 coord = size(d) - 1;
713 else
714 coord = 0;
715
716 int k = 0;
717 for (int i = 0; i < mynode.sites; i++) {
718 if (coordinate(i, abs(d)) != coord) {
719 special_boundaries[d].neighbours[i] = neighb[d][i];
720 } else {
721 special_boundaries[d].neighbours[i] = offs++;
722 special_boundaries[d].move_index[k++] = neighb[d][i];
723 }
724 }
725
726 assert(k == special_boundaries[d].n_total);
727}
728
729#endif
730
731/////////////////////////////////////////////////////////////////////
732/// Create the neighbour index arrays
733/// This is for the index array neighbours
734/// TODO: implement some other neighbour schemas!
735/////////////////////////////////////////////////////////////////////
736
737#if 0
738
739/// This is a helper routine, returning a vector of comm_node_structs for all nodes
740/// involved with communication.
741/// If receive == true, this is "receive" end and index will be filled.
742/// For receive == false the is "send" half is done.
743
744std::vector<lattice_struct::comm_node_struct>
745lattice_struct::create_comm_node_vector(CoordinateVector offset, unsigned *index,
746 bool receive) {
747
748 // for send flip the offset
749 if (!receive)
750 offset = -offset;
751
752 // temp work array: np = node points
753 std::vector<unsigned> np_even(nodes.number); // std::vector initializes to zero
754 std::vector<unsigned> np_odd(nodes.number);
755
756 // we'll go through the sites twice, in order to first resolve the size of the
757 // needed buffers, then to fill them. Trying to avoid memory fragmentation a bit
758
759 // pass over sites
760 int num = 0; // number of sites off node
761 for (unsigned i = 0; i < mynode.sites; i++) {
762 CoordinateVector ln, l;
763 l = coordinates(i);
764 ln = (l + offset).mod(size());
765
766 if (is_on_mynode(ln)) {
767 if (receive)
768 index[i] = site_index(ln);
769 } else {
770 // Now site is off-node, this will leads to gathering
771 // use ci.index to store the node rank
772 unsigned r = node_rank(ln);
773
774 if (receive) {
775 index[i] = mynode.sites + r;
776
777 // using parity of THIS
778 if (l.parity() == EVEN)
779 np_even[r]++;
780 else
781 np_odd[r]++;
782
783 } else {
784 // on the sending side - we use parity of target
785 if (ln.parity() == EVEN)
786 np_even[r]++;
787 else
788 np_odd[r]++;
789 }
790
791 num++;
792 }
793 }
794
795 // count the number of nodes taking part
796 unsigned nnodes = 0;
797 for (int r = 0; r < nodes.number; r++) {
798 if (np_even[r] > 0 || np_odd[r] > 0)
799 nnodes++;
800 }
801
802 // allocate the vector
803 std::vector<comm_node_struct> node_v(nnodes);
804
805 int n = 0;
806 int c_buffer = 0;
807 for (int r = 0; r < nodes.number; r++) {
808 if (np_even[r] > 0 || np_odd[r] > 0) {
809 // add the rank
810 node_v[n].rank = r;
811 node_v[n].evensites = np_even[r];
812 node_v[n].oddsites = np_odd[r];
813 node_v[n].sites = np_even[r] + np_odd[r];
814
815 // pre-allocate the sitelist for sufficient size
816 if (!receive)
817 node_v[n].sitelist =
818 (unsigned *)memalloc(node_v[n].sites * sizeof(unsigned));
819
820 node_v[n].buffer =
821 c_buffer; // running idx to comm buffer - used from receive
822 c_buffer += node_v[n].sites;
823 n++;
824 }
825 }
826
827 // ci.receive_buf_size = c_buffer; // total buf size
828
829 // we'll reuse np_even and np_odd as counting arrays below
830 for (int i = 0; i < nnodes; i++)
831 np_even[i] = np_odd[i] = 0;
832
833 if (!receive) {
834 // sending end -- create sitelists
835
836 for (unsigned i = 0; i < mynode.sites; i++) {
837 CoordinateVector ln, l;
838 l = coordinates(i);
839 ln = (l + offset).mod(size());
840
841 if (!is_on_mynode(ln)) {
842 unsigned r = node_rank(ln);
843 int n = 0;
844 // find the node from the list
845 while (node_v[n].rank != r)
846 n++;
847 // we'll fill the buffers according to the parity of receieving node
848 // first even, then odd sites in the buffer
849 unsigned k;
850 if (ln.parity() == EVEN)
851 k = np_even[n]++;
852 else
853 k = node_v[n].evensites + np_odd[n]++;
854
855 // and set the ptr to the site to be communicated
856 node_v[n].sitelist[k] = i;
857 }
858 }
859
860 } else {
861 // receive end
862 // fill in the index pointers
863
864 for (unsigned i = 0; i < mynode.sites; i++) {
865 if (index[i] >= mynode.sites) {
866 int r = index[i] - mynode.sites;
867 int n = 0;
868 // find the node which sends this
869 while (node_v[n].rank != r)
870 n++;
871
872 CoordinateVector l = coordinates(i);
873 if (l.parity() == EVEN)
874 index[i] = node_v[n].buffer + (np_even[n]++);
875 else
876 index[i] = node_v[n].buffer + node_v[n].evensites + (np_odd[n]++);
877 }
878 }
879 }
880
881 return node_v;
882}
883
885lattice_struct::create_general_gather(const CoordinateVector &offset) {
886 // allocate neighbour arrays - TODO: these should
887 // be allocated on "device" memory too!
888
889 gen_comminfo_struct ci;
890
891 // communication buffer
892 ci.index = (unsigned *)memalloc(mynode.sites * sizeof(unsigned));
893
894 ci.from_node =
895 create_comm_node_vector(offset, ci.index, true); // create receive end
896 ci.to_node = create_comm_node_vector(offset, nullptr, false); // create sending end
897
898 // set the total receive buffer size from the last vector
899 const comm_node_struct &r = ci.from_node[ci.from_node.size() - 1];
900 ci.receive_buf_size = r.buffer + r.sites;
901
902 return ci;
903}
904
905#endif
const Array< n, m, T > & asArray() const
Cast Matrix to Array.
Definition matrix.h:443
dir_mask_t *__restrict__ wait_arr_
implement waiting using mask_t - unsigned char is good for up to 4 dim.
Definition lattice.h:206
void setup_nodes()
invert the mynode index -> location (only on this node)
Definition lattice.cpp:287
void setup(const CoordinateVector &siz)
General lattice setup.
Definition lattice.cpp:33
bool is_on_mynode(const CoordinateVector &c) const
Is the coordinate on THIS node.
Definition lattice.cpp:118
unsigned *__restrict__ neighb[NDIRS]
Main neighbour index array.
Definition lattice.h:203
void create_std_gathers()
Definition lattice.cpp:426
std::array< nn_comminfo_struct, NDIRS > nn_comminfo
nearest neighbour comminfo struct
Definition lattice.h:200
unsigned site_index(const CoordinateVector &c) const
Definition lattice.cpp:136
T abs(const Complex< T > &a)
Return absolute value of Complex number.
Definition cmplx.h:1187
constexpr Parity EVEN
bit pattern: 001
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:78
constexpr unsigned NDIRS
Number of directions.
Definition coordinates.h:57
unsigned char dir_mask_t
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
This file defines all includes for HILA.
This files containts definitions for the Field class and the classes required to define it such as fi...
int myrank()
rank of this node
Definition com_mpi.cpp:234
int number_of_nodes()
how many nodes there are
Definition com_mpi.cpp:245
void reduce_node_sum(T *value, int send_count, bool allreduce=true)
Reduce an array across nodes.
Definition com_mpi.h:332
std::ostream out
this is our default output file stream
bc
list of field boundary conditions - used only if SPECIAL_BOUNDARY_CONDITIONS defined
Definition lattice.h:31
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
Helper class for loading the vectorized lattice.
void setup(lattice_struct *lattice)
unsigned remap(unsigned i) const
And the call interface for remapping.
Information necessary to communicate with a node.
Definition lattice.h:134
general communication
Definition lattice.h:192
Information about the node stored on this process.
Definition lattice.h:71
void setup(node_info &ni, lattice_struct &lattice)
Fill in mynode fields – node_rank() must be set up OK.
Definition lattice.cpp:350
useful information about a node
Definition lattice.h:48