HILA
Loading...
Searching...
No Matches
lattice.cpp
1
2#include "plumbing/defs.h"
3#include "plumbing/lattice.h"
4#include "plumbing/field.h"
5
6// Reporting on possibly too large node: stop if
7// node size (with buffers) is larger than 2^31 - too close for comfort!
8
9void report_too_large_node() {
10 if (hila::myrank() == 0) {
11 hila::out << "Node size too large: size = " << lattice.mynode.size[0];
12 for (int d = 1; d < NDIM; d++)
13 hila::out << " x " << lattice.mynode.size[d];
14 hila::out << " + communication buffers = " << lattice.mynode.field_alloc_size;
15 hila::out << "\nConsider using more nodes (smaller node size).\n";
16 hila::out << "[TODO: allow 64bit index?]\n";
17 }
19}
20
21// HACK: force disable vectorization in a loop using
22// if(disable_avx[X]==0){};
23// Field<double> disable_avx;
24
25// Define the global lattice var
26lattice_struct lattice;
27/// A list of all defined lattices (for the future expansion)
28std::vector<lattice_struct *> lattices;
29
30static int lattice_count = 0;
31
32/// General lattice setup
34
35 l_label = lattice_count++;
36
37 // Add this lattice to the list
38 lattices.push_back(this);
39
40 l_volume = 1;
41 foralldir(i) {
42 l_size[i] = siz[i];
43 l_volume *= siz[i];
44 }
45
46 setup_layout();
47
49
50 // set up the comm arrays
52
53 // Initialize wait_array structures - has to be after std gathers()
54 initialize_wait_arrays();
55
56#ifdef SPECIAL_BOUNDARY_CONDITIONS
57 // do this after std. boundary is done
58 init_special_boundaries();
59#endif
60
61 // Alignment: set field_alloc_size to be divisible by 256
62 if (mynode.field_alloc_size % 256 > 0)
63 mynode.field_alloc_size += 256 - mynode.field_alloc_size % 256;
64
65#ifndef VANILLA
66 /* Setup backend-specific lattice info if necessary */
67 backend_lattice = new backend_lattice_struct;
68 backend_lattice->setup(*this);
69#endif
70
71 if (hila::check_input) {
72 hila::out << "***** Input check done *****\n";
74 }
75
76 test_std_gathers();
77
78 // disable_avx = 0;
79}
80
81///////////////////////////////////////////////////////////////////////
82/// The routines is_on_mynode(), node_rank(), site_index()
83/// implement the "layout" of the nodes and sites of the lattice.
84/// To be changed in different implementations!
85///////////////////////////////////////////////////////////////////////
86
87///////////////////////////////////////////////////////////////////////
88/// Get the node rank for coordinates
89/// This is the fundamental routine which defines how the nodes
90/// are mapped. map_node_layout MUST BE compatible with this
91/// algorithm! So if you change this, change that too
92///
93/// Here the node number along one Direction is calculated with
94/// loc * n_divisions/l_size (with integer division)
95/// example: with l_size=14 and n_divisions=3,
96/// the dividers are at 0, 5, 10, 14
97///
98///////////////////////////////////////////////////////////////////////
99
100int lattice_struct::node_rank(const CoordinateVector &loc) const {
101 int i;
102 int dir;
103
104 i = (loc[NDIM - 1] * nodes.n_divisions[NDIM - 1]) / l_size[NDIM - 1];
105 for (dir = NDIM - 2; dir >= 0; dir--) {
106 i = i * nodes.n_divisions[dir] + ((loc[dir] * nodes.n_divisions[dir]) / l_size[dir]);
107 }
108 /* do we want to remap this? YES PLEASE */
109 i = nodes.remap(i);
110
111 return (i);
112}
113
114///////////////////////////////////////////////////////////////////////
115/// Is the coordinate on THIS node
116///////////////////////////////////////////////////////////////////////
117
119 int d;
120
121 for (int dir = 0; dir < NDIM; dir++) {
122 d = loc[dir] - mynode.min[dir];
123 if (d < 0 || d >= mynode.size[dir])
124 return false;
125 }
126 return true;
127}
128
129///////////////////////////////////////////////////////////////////////
130/// give site index for ON NODE sites
131/// Note: loc really has to be on this node
132///////////////////////////////////////////////////////////////////////
133
134#ifndef SUBNODE_LAYOUT
135
137 int dir, l, s;
138 unsigned i;
139
140 i = l = loc[NDIM - 1] - mynode.min[NDIM - 1];
141 s = loc[NDIM - 1];
142 for (dir = NDIM - 2; dir >= 0; dir--) {
143 l = loc[dir] - mynode.min[dir];
144 i = i * mynode.size[dir] + l;
145 s += loc[dir];
146 }
147
148 // now i contains the `running index' for site
149#if defined(EVEN_SITES_FIRST)
150 if (s % 2 == 0)
151 return (i / 2); /* even site index */
152 else
153 return (i / 2 + mynode.evensites); /* odd site */
154#else
155 return (i);
156#endif
157}
158
159///////////////////////////////////////////////////////////////////////
160/// give site index for nodeid sites
161/// compare to above
162///////////////////////////////////////////////////////////////////////
163
164unsigned lattice_struct::site_index(const CoordinateVector &loc, const unsigned nodeid) const {
165 int dir, l, s;
166 unsigned i;
167 const node_info &ni = nodes.nodelist[nodeid];
168
169 i = l = loc[NDIM - 1] - ni.min[NDIM - 1];
170 s = loc[NDIM - 1];
171 for (dir = NDIM - 2; dir >= 0; dir--) {
172 l = loc[dir] - ni.min[dir];
173 i = i * ni.size[dir] + l;
174 s += loc[dir];
175 }
176
177 // now i contains the `running index' for site
178#if defined(EVEN_SITES_FIRST)
179 if (s % 2 == 0)
180 return (i / 2); /* even site index */
181 else
182 return (i / 2 + ni.evensites); /* odd site */
183#else
184 return (i);
185#endif
186}
187
188#else // now SUBNODE_LAYOUT
189
190///////////////////////////////////////////////////////////////////////
191/// The (AVX) vectorized version of the site_index function.
192/// Now there are two stages: an "inner" vect index, which goes over
193/// over "virtual nodes", and the "outer" index inside the virtual node.
194/// This is to enable the (float/32bit) and the (double/64bit) vectorized
195/// structures, where the latter is achieve by merging two of the 32bit subnodes,
196/// along the Direction merged_subnodes_dir
197/// E.g.
198/// a 2-dim. 4x8 node is divided into 8 / 4 subnodes as follows:
199/// here 0-3 / 0-7 is the index within subnode, and a-h / a-d the subnode label.
200///
201/// 32bit(float) 64bit(double)
202/// 32bit storage: 64bit storage:
203/// 0a 1a | 0b 1b 0a 2a | 0b 2b 0(abcdefgh) 0(abcd) 1(abcd)
204/// 2a 3a | 2b 3b 4a 6a | 4b 6b 1(abcdefgh) 2(abcd) 3(abcd)
205/// ------------- 2(abcdefgh) 4(abcd) 5(abcd)
206/// 0e 1e | 0f 1f 1a 3a | 1b 3b 3(abcdefgh) 6(abcd) 7(abcd)
207/// 2e 3e | 2f 3f 5a 7a | 5b 7b
208/// ------------- ------------- 32bit vec 1st half <-> even 64bit
209/// 0c 1c | 0d 1d 0c 2c | 0d 2d 32bit 2nd half <-> odd 64bit
210/// 2c 3c | 2d 3d 4c 6c | 4d 6d
211/// -------------
212/// 0g 1g | 0h 1h 1c 3c | 1d 3d
213/// 2g 3g | 2h 3h 5c 7c | 5d 7d
214///
215/// The "storage" order above is the site-by-site order, enabling site
216/// traversal with maximum locality. It also enables mixed 32bit/64bit vector
217/// operations with half vectors.
218///
219/// Direction where the "doubling" is done is the last Direction where subnodes
220/// are divided. In layout, this will become the "slowest" Direction
221///
222///////////////////////////////////////////////////////////////////////
223
224unsigned lattice_struct::site_index(const CoordinateVector &loc) const {
225 return site_index(loc, hila::myrank());
226}
227
228///////////////////////////////////////////////////////////////////////
229/// give site index for nodeid sites
230/// compare to above
231///////////////////////////////////////////////////////////////////////
232
233unsigned lattice_struct::site_index(const CoordinateVector &loc, const unsigned nodeid) const {
234 int dir, l, s, subl;
235 unsigned i;
236
237 assert(nodeid < nodes.number);
238 const node_info &ni = nodes.nodelist[nodeid];
239
240 // let's mod the coordinate to partition
241 // get subnode size - divisons are the same in all nodes
242
243 // foralldir(d) assert( ni.size[d] % mynode.subnodes.divisions[d] == 0);
244
245 CoordinateVector subsize;
246 subsize.asArray() = ni.size.asArray() / mynode.subnodes.divisions.asArray();
247
248 dir = mynode.subnodes.merged_subnodes_dir;
249 l = loc[dir] - ni.min[dir];
250 subl = l / subsize[dir];
251 // flip to interleaved ordering, see above -- this should work
252 // automatically w. mapping between 32 and 64 bit vector elems
253 subl = subl / 2 + (subl % 2) * (mynode.subnodes.divisions[dir] / 2);
254
255 i = 0;
256 s = 0;
257
258 for (dir = NDIM - 1; dir >= 0; dir--) {
259 l = loc[dir] - ni.min[dir];
260 if (dir != mynode.subnodes.merged_subnodes_dir) {
261 subl = subl * mynode.subnodes.divisions[dir] + l / subsize[dir];
262 }
263 i = i * subsize[dir] + l % subsize[dir];
264 s += loc[dir];
265 }
266
267#if defined(EVEN_SITES_FIRST)
268 if (s % 2 == 0)
269 i = i / 2; /* even site index */
270 else
271 i = i / 2 + ni.evensites / number_of_subnodes; /* odd site */
272#endif
273
274 return (subl + number_of_subnodes * i);
275}
276
277#endif // SUBNODE_LAYOUT
278
279///////////////////////////////////////////////////////////////////////
280/// invert the mynode index -> location (only on this node)
281///////////////////////////////////////////////////////////////////////
282
283// this is defined in lattice.h
284
285/////////////////////////////////////////////////////////////////////
286/// Set up the basic list about all nodes
287/// NOTE: in case of very large number of nodes this may be
288/// inconveniently large. Alternatives?
289/////////////////////////////////////////////////////////////////////
290
292
293 nodes.number = hila::number_of_nodes();
294 // Loop over all node "origins"
295 nodes.nodelist.resize(nodes.number);
296
297 // n keeps track of the node "root coordinates"
298 CoordinateVector n(0);
299
300 nodes.max_size = 0;
301
302 // use nodes.divisors - vectors to fill in stuff
303 for (int i = 0; i < nodes.number; i++) {
305 foralldir(d) l[d] = nodes.divisors[d][n[d]];
306
307 int nn = node_rank(l);
308 node_info &ni = nodes.nodelist[nn];
309 int64_t v = 1;
310 foralldir(d) {
311 ni.min[d] = nodes.divisors[d][n[d]];
312 ni.size[d] = nodes.divisors[d][n[d] + 1] - nodes.divisors[d][n[d]];
313 v *= ni.size[d];
314
315 if (ni.size[d] > nodes.max_size[d])
316 nodes.max_size[d] = ni.size[d];
317 }
318
319 if (v >= (1ULL << 32)) {
320 // node size is larger than 2^32-1 - does not fit to largest unsigned int!
321 report_too_large_node();
322 }
323
324 if (v % 2 == 0)
325 ni.evensites = ni.oddsites = v / 2;
326 else {
327 // now node ni has odd number of sites
328 if (l.parity() == EVEN) {
329 ni.evensites = v / 2 + 1;
330 ni.oddsites = v / 2;
331 } else {
332 ni.evensites = v / 2;
333 ni.oddsites = v / 2 + 1;
334 }
335 }
336
337 // now to the next divisor - add to the lowest dir until all done
338 foralldir(d) {
339 n[d]++;
340 if (n[d] < nodes.n_divisions[d])
341 break;
342 n[d] = 0;
343 }
344
345 // use the opportunity to set up mynode when it is met
346 if (nn == hila::myrank())
347 mynode.setup(ni, lattice);
348 }
349}
350
351////////////////////////////////////////////////////////////////////////
352/// Fill in mynode fields -- node_rank() must be set up OK
353////////////////////////////////////////////////////////////////////////
355
356 parent = &lattice;
357
358 rank = hila::myrank();
359
360 min = ni.min;
361 size = ni.size;
362
363 evensites = ni.evensites;
364 oddsites = ni.oddsites;
365 sites = ni.evensites + ni.oddsites;
366
367 first_site_even = (min.parity() == EVEN);
368
369 // neighbour node indices
370 foralldir(d) {
371 CoordinateVector l = min; // this is here on purpose
372 l[d] = (min[d] + size[d]) % lattice.l_size[d];
373 nn[d] = lattice.node_rank(l);
374 l[d] = (lattice.l_size[d] + min[d] - 1) % lattice.l_size[d];
375 nn[opp_dir(d)] = lattice.node_rank(l);
376 }
377
378 // map site indexes to locations -- coordinates array
379 // after the above site_index should work
380
381#ifdef EVEN_SITES_FIRST
382 coordinates.resize(sites);
383 CoordinateVector l = min;
384 for (unsigned i = 0; i < sites; i++) {
385 coordinates[lattice.site_index(l)] = l;
386 // walk through the coordinates
387 foralldir(d) {
388 if (++l[d] < (min[d] + size[d]))
389 break;
390 l[d] = min[d];
391 }
392 }
393
394#endif
395
396 // set up the auxiliary site_factor array
397 unsigned v = 1;
398 foralldir(d) {
399 size_factor[d] = v; // = size[d-1] * size[d-2] * ..
400 v *= size[d];
401 }
402
403#ifdef SUBNODE_LAYOUT
404
405 // set up the subnodes
406 subnodes.setup(*this);
407#endif
408}
409
410#ifdef SUBNODE_LAYOUT
411
412////////////////////////////////////////////////////////////////////////
413/// Fill in subnodes -struct
414////////////////////////////////////////////////////////////////////////
415void lattice_struct::node_struct::subnode_struct::setup(const node_struct &tn) {
416 size.asArray() = tn.size.asArray() / divisions.asArray();
417 evensites = tn.evensites / number_of_subnodes;
418 oddsites = tn.oddsites / number_of_subnodes;
419 sites = evensites + oddsites;
420}
421
422#endif
423
424/////////////////////////////////////////////////////////////////////
425/// Create the neighbour index arrays
426/// This is for the index array neighbours
427/// TODO: implement some other neighbour schemas!
428/////////////////////////////////////////////////////////////////////
429
431
432 // allocate neighbour arrays - TODO: these should
433 // be allocated on "device" memory too!
434
435 for (int d = 0; d < NDIRS; d++) {
436 neighb[d] = (unsigned *)memalloc(((size_t)mynode.sites) * sizeof(unsigned));
437 }
438
439 size_t c_offset = mynode.sites; // current offset in field-arrays
440
441 // We set the communication and the neigbour-array here
442 int too_large_node = 0;
443
444 for (Direction d = e_x; d < NDIRS; ++d) {
445
446 nn_comminfo[d].index = neighb[d]; // this is not really used for nn gathers
447
448 comm_node_struct &from_node = nn_comminfo[d].from_node;
449 // we can do the opposite send during another pass of the sites.
450 // This is just the gather inverted
451 // NOTE: this is not the send to Direction d, but to -d!
452 comm_node_struct &to_node = nn_comminfo[-d].to_node;
453
454 from_node.rank = to_node.rank = mynode.rank; // invalidate from_node, for time being
455 // if there are no communications the rank is left as is
456
457 // counters to zero
458 from_node.sites = from_node.evensites = from_node.oddsites = 0;
459
460 // pass over sites
461 size_t num = 0; // number of sites off node
462 for (int i = 0; i < mynode.sites; i++) {
463 CoordinateVector ln, l;
464 l = coordinates(i);
465 // set ln to be the neighbour of the site
466 // TODO: FIXED BOUNDARY CONDITIONS DO NOT WRAP
467 ln = (l + d).mod(size());
468 // ln = l;
469 // if (is_up_dir(d)) ln[d] = (l[d] + 1) % size(d);
470 // else ln[d] = (l[d] + size(-d) - 1) % size(-d);
471
472 if (is_on_mynode(ln)) {
473 neighb[d][i] = site_index(ln);
474 } else {
475 // reset neighb array temporarily, as a flag
476 neighb[d][i] = mynode.sites;
477
478 // Now site is off-node, this leads to gathering
479 // check that there's really only 1 node to talk with
480 unsigned rank = node_rank(ln);
481 if (from_node.rank == mynode.rank) {
482 from_node.rank = rank;
483 } else if (from_node.rank != rank) {
484 hila::out << "Internal error in nn-communication setup\n";
485 exit(1);
486 }
487
488 from_node.sites++;
489 if (l.parity() == EVEN)
490 from_node.evensites++;
491 else
492 from_node.oddsites++;
493
494 num++;
495 }
496 }
497
498 // and set buffer indices
499 from_node.buffer = c_offset;
500
501 to_node.rank = from_node.rank;
502 to_node.sites = from_node.sites;
503 // note! Parity is flipped, because parity is normalized to receieving node
504 to_node.evensites = from_node.oddsites;
505 to_node.oddsites = from_node.evensites;
506
507 if (num > 0) {
508 // sitelist tells us which sites to send
509 to_node.sitelist = (unsigned *)memalloc(to_node.sites * sizeof(unsigned));
510#ifndef VANILLA
511 // non-vanilla code MAY want to have receive buffers, so we need mapping to
512 // field
513 from_node.sitelist = (unsigned *)memalloc(from_node.sites * sizeof(unsigned));
514#endif
515 } else {
516 to_node.sitelist = nullptr;
517 }
518
519 if (num > 0) {
520 // set the remaining neighbour array indices and sitelists in another go
521 // over sites. temp counters NOTE: ordering is automatically right: with a
522 // given parity, neighbour node indices come in ascending order of host node
523 // index - no sorting needed
524 size_t c_even, c_odd;
525 c_even = c_odd = 0;
526
527 for (size_t i = 0; i < mynode.sites; i++) {
528 if (neighb[d][i] == mynode.sites) {
530 l = coordinates(i);
531
532 if (l.parity() == EVEN) {
533 // THIS site is even
534 neighb[d][i] = c_offset + c_even;
535 if (c_offset + c_even >= (1ULL << 32))
536 too_large_node = 1;
537
538#ifndef VANILLA
539 from_node.sitelist[c_even] = i;
540#endif
541
542 // flipped parity: this is for odd sends
543 to_node.sitelist[c_even + to_node.evensites] = i;
544
545 c_even++;
546
547 } else {
548 neighb[d][i] = c_offset + from_node.evensites + c_odd;
549 if (c_offset + from_node.evensites + c_odd >= (1ULL << 32))
550 too_large_node = 1;
551
552#ifndef VANILLA
553 from_node.sitelist[c_odd + from_node.evensites] = i;
554#endif
555
556 // again flipped parity for setup
557 to_node.sitelist[c_odd] = i;
558
559 c_odd++;
560 }
561 }
562 }
563 }
564
565 c_offset += from_node.sites;
566
567 if (c_offset >= (1ULL << 32))
568 too_large_node = 1;
569
570 } /* directions */
571
572 /* Finally, set the site to the final offset (better be right!) */
573 mynode.field_alloc_size = c_offset;
574
575 if (hila::reduce_node_sum(too_large_node) > 0) {
576 report_too_large_node();
577 }
578}
579
580
581/************************************************************************/
582
583/* this formats the wait_array, used by forallsites_waitA()site_neighbour
584 * wait_array[i] contains a bit at position 1<<dir if nb(dir,i) is out
585 * of lattice.
586 * Site OK if ((wait_arr ^ xor_mask ) & and_mask) == 0
587 */
588
589static_assert(NDIM <= 4 && "Dimensions at most 4 in dir_mask_t = unsigned char! Use "
590 "larger type to circumvent");
591
592void lattice_struct::initialize_wait_arrays() {
593 int i, dir;
594
595 /* Allocate here the mask array needed for forallsites_wait
596 * This will contain a bit at location dir if the neighbour
597 * at that dir is out of the local volume
598 */
599
600 wait_arr_ = (dir_mask_t *)memalloc(mynode.sites * sizeof(unsigned char));
601
602 for (size_t i = 0; i < mynode.sites; i++) {
603 wait_arr_[i] = 0; /* basic, no wait */
604 foralldir(dir) {
605 Direction odir = -dir;
606 if (neighb[dir][i] >= mynode.sites)
607 wait_arr_[i] = wait_arr_[i] | (1 << dir);
608 if (neighb[odir][i] >= mynode.sites)
609 wait_arr_[i] = wait_arr_[i] | (1 << odir);
610 }
611 }
612}
613
614
615#ifdef SPECIAL_BOUNDARY_CONDITIONS
616
617/////////////////////////////////////////////////////////////////////
618/// set up special boundary
619/// sets up the bool array which tells if special neighbour indices
620/// are needed. Note that this is not uniform across the nodes,
621/// not all nodes have them.
622/////////////////////////////////////////////////////////////////////
623
624void lattice_struct::init_special_boundaries() {
625 for (Direction d = (Direction)0; d < NDIRS; ++d) {
626
627 // default values, nothing interesting happens
628 special_boundaries[d].n_even = special_boundaries[d].n_odd = special_boundaries[d].n_total =
629 0;
630 special_boundaries[d].is_needed = false;
631
632 Direction od = -d;
633 int coord = -1;
634 // do we get up/down boundary?
635 if (is_up_dir(d) && mynode.min[d] + mynode.size[d] == size(d))
636 coord = size(d) - 1;
637 if (is_up_dir(od) && mynode.min[od] == 0)
638 coord = 0;
639
640 if (coord >= 0) {
641 // now we got it
642
643 if (nodes.n_divisions[abs(d)] == 1) {
644 special_boundaries[d].is_needed = true;
645 special_boundaries[d].offset = mynode.field_alloc_size;
646
647 for (unsigned i = 0; i < mynode.sites; i++)
648 if (coordinate(i, abs(d)) == coord) {
649 // set buffer indices
650 special_boundaries[d].n_total++;
651 if (site_parity(i) == EVEN)
652 special_boundaries[d].n_even++;
653 else
654 special_boundaries[d].n_odd++;
655 }
656 mynode.field_alloc_size += special_boundaries[d].n_total;
657 }
658 }
659
660 // hila::out << "Node " << hila::myrank() << " dir " << d << " min " <<
661 // mynode.min << " is_on_edge "
662 // << lattice.mynode.is_on_edge(d) << '\n';
663
664 // allocate neighbours only on 1st use, otherwise unneeded
665 special_boundaries[d].neighbours = nullptr;
666 }
667
668 int toolarge = 0;
669 if (mynode.field_alloc_size >= (1ULL << 32))
670 toolarge = 1;
671 if (hila::reduce_node_sum(toolarge) > 0) {
672 report_too_large_node();
673 }
674}
675
676/////////////////////////////////////////////////////////////////////
677/// give the neighbour array pointer. Allocate if needed
678
679const unsigned *lattice_struct::get_neighbour_array(Direction d, hila::bc bc) {
680
681#ifndef SPECIAL_BOUNDARY_CONDITIONS
682 assert(bc == hila::bc::PERIODIC &&
683 "non-periodic BC only if SPECIAL_BOUNDARY_CONDITIONS defined");
684 return neighb[d];
685#else
686
687 // regular bc exit, should happen almost always
688 if (special_boundaries[d].is_needed == false || bc == hila::bc::PERIODIC)
689 return neighb[d];
690
691 if (special_boundaries[d].neighbours == nullptr) {
692 setup_special_boundary_array(d);
693 }
694 return special_boundaries[d].neighbours;
695
696#endif
697}
698
699//////////////////////////////////////////////////////////////////////
700/// and set up the boundary to one Direction
701//////////////////////////////////////////////////////////////////////
702
703void lattice_struct::setup_special_boundary_array(Direction d) {
704 // if it is not needed or already done...
705 if (special_boundaries[d].is_needed == false || special_boundaries[d].neighbours != nullptr)
706 return;
707
708 // now allocate neighbour array and the gathering array
709 special_boundaries[d].neighbours = (unsigned *)memalloc(sizeof(unsigned) * mynode.sites);
710 special_boundaries[d].move_index =
711 (unsigned *)memalloc(sizeof(unsigned) * special_boundaries[d].n_total);
712
713 int coord;
714 int offs = special_boundaries[d].offset;
715 if (is_up_dir(d))
716 coord = size(d) - 1;
717 else
718 coord = 0;
719
720 int k = 0;
721 for (int i = 0; i < mynode.sites; i++) {
722 if (coordinate(i, abs(d)) != coord) {
723 special_boundaries[d].neighbours[i] = neighb[d][i];
724 } else {
725 special_boundaries[d].neighbours[i] = offs++;
726 special_boundaries[d].move_index[k++] = neighb[d][i];
727 }
728 }
729
730 assert(k == special_boundaries[d].n_total);
731}
732
733#endif
734
735/////////////////////////////////////////////////////////////////////
736/// Create the neighbour index arrays
737/// This is for the index array neighbours
738/// TODO: implement some other neighbour schemas!
739/////////////////////////////////////////////////////////////////////
740
741#if 0
742
743/// This is a helper routine, returning a vector of comm_node_structs for all nodes
744/// involved with communication.
745/// If receive == true, this is "receive" end and index will be filled.
746/// For receive == false the is "send" half is done.
747
748std::vector<lattice_struct::comm_node_struct>
749lattice_struct::create_comm_node_vector(CoordinateVector offset, unsigned *index,
750 bool receive) {
751
752 // for send flip the offset
753 if (!receive)
754 offset = -offset;
755
756 // temp work array: np = node points
757 std::vector<unsigned> np_even(nodes.number); // std::vector initializes to zero
758 std::vector<unsigned> np_odd(nodes.number);
759
760 // we'll go through the sites twice, in order to first resolve the size of the
761 // needed buffers, then to fill them. Trying to avoid memory fragmentation a bit
762
763 // pass over sites
764 int num = 0; // number of sites off node
765 for (unsigned i = 0; i < mynode.sites; i++) {
766 CoordinateVector ln, l;
767 l = coordinates(i);
768 ln = (l + offset).mod(size());
769
770 if (is_on_mynode(ln)) {
771 if (receive)
772 index[i] = site_index(ln);
773 } else {
774 // Now site is off-node, this will leads to gathering
775 // use ci.index to store the node rank
776 unsigned r = node_rank(ln);
777
778 if (receive) {
779 index[i] = mynode.sites + r;
780
781 // using parity of THIS
782 if (l.parity() == EVEN)
783 np_even[r]++;
784 else
785 np_odd[r]++;
786
787 } else {
788 // on the sending side - we use parity of target
789 if (ln.parity() == EVEN)
790 np_even[r]++;
791 else
792 np_odd[r]++;
793 }
794
795 num++;
796 }
797 }
798
799 // count the number of nodes taking part
800 unsigned nnodes = 0;
801 for (int r = 0; r < nodes.number; r++) {
802 if (np_even[r] > 0 || np_odd[r] > 0)
803 nnodes++;
804 }
805
806 // allocate the vector
807 std::vector<comm_node_struct> node_v(nnodes);
808
809 int n = 0;
810 int c_buffer = 0;
811 for (int r = 0; r < nodes.number; r++) {
812 if (np_even[r] > 0 || np_odd[r] > 0) {
813 // add the rank
814 node_v[n].rank = r;
815 node_v[n].evensites = np_even[r];
816 node_v[n].oddsites = np_odd[r];
817 node_v[n].sites = np_even[r] + np_odd[r];
818
819 // pre-allocate the sitelist for sufficient size
820 if (!receive)
821 node_v[n].sitelist =
822 (unsigned *)memalloc(node_v[n].sites * sizeof(unsigned));
823
824 node_v[n].buffer =
825 c_buffer; // running idx to comm buffer - used from receive
826 c_buffer += node_v[n].sites;
827 n++;
828 }
829 }
830
831 // ci.receive_buf_size = c_buffer; // total buf size
832
833 // we'll reuse np_even and np_odd as counting arrays below
834 for (int i = 0; i < nnodes; i++)
835 np_even[i] = np_odd[i] = 0;
836
837 if (!receive) {
838 // sending end -- create sitelists
839
840 for (unsigned i = 0; i < mynode.sites; i++) {
841 CoordinateVector ln, l;
842 l = coordinates(i);
843 ln = (l + offset).mod(size());
844
845 if (!is_on_mynode(ln)) {
846 unsigned r = node_rank(ln);
847 int n = 0;
848 // find the node from the list
849 while (node_v[n].rank != r)
850 n++;
851 // we'll fill the buffers according to the parity of receieving node
852 // first even, then odd sites in the buffer
853 unsigned k;
854 if (ln.parity() == EVEN)
855 k = np_even[n]++;
856 else
857 k = node_v[n].evensites + np_odd[n]++;
858
859 // and set the ptr to the site to be communicated
860 node_v[n].sitelist[k] = i;
861 }
862 }
863
864 } else {
865 // receive end
866 // fill in the index pointers
867
868 for (unsigned i = 0; i < mynode.sites; i++) {
869 if (index[i] >= mynode.sites) {
870 int r = index[i] - mynode.sites;
871 int n = 0;
872 // find the node which sends this
873 while (node_v[n].rank != r)
874 n++;
875
876 CoordinateVector l = coordinates(i);
877 if (l.parity() == EVEN)
878 index[i] = node_v[n].buffer + (np_even[n]++);
879 else
880 index[i] = node_v[n].buffer + node_v[n].evensites + (np_odd[n]++);
881 }
882 }
883 }
884
885 return node_v;
886}
887
889lattice_struct::create_general_gather(const CoordinateVector &offset) {
890 // allocate neighbour arrays - TODO: these should
891 // be allocated on "device" memory too!
892
893 gen_comminfo_struct ci;
894
895 // communication buffer
896 ci.index = (unsigned *)memalloc(mynode.sites * sizeof(unsigned));
897
898 ci.from_node =
899 create_comm_node_vector(offset, ci.index, true); // create receive end
900 ci.to_node = create_comm_node_vector(offset, nullptr, false); // create sending end
901
902 // set the total receive buffer size from the last vector
903 const comm_node_struct &r = ci.from_node[ci.from_node.size() - 1];
904 ci.receive_buf_size = r.buffer + r.sites;
905
906 return ci;
907}
908
909#endif
const Array< n, m, T > & asArray() const
Cast Matrix to Array.
Definition matrix.h:467
dir_mask_t *__restrict__ wait_arr_
implement waiting using mask_t - unsigned char is good for up to 4 dim.
Definition lattice.h:206
void setup_nodes()
invert the mynode index -> location (only on this node)
Definition lattice.cpp:291
void setup(const CoordinateVector &siz)
General lattice setup.
Definition lattice.cpp:33
bool is_on_mynode(const CoordinateVector &c) const
Is the coordinate on THIS node.
Definition lattice.cpp:118
unsigned *__restrict__ neighb[NDIRS]
Main neighbour index array.
Definition lattice.h:203
void create_std_gathers()
Definition lattice.cpp:430
std::array< nn_comminfo_struct, NDIRS > nn_comminfo
nearest neighbour comminfo struct
Definition lattice.h:200
unsigned site_index(const CoordinateVector &c) const
Definition lattice.cpp:136
T abs(const Complex< T > &a)
Return absolute value of Complex number.
Definition cmplx.h:1322
constexpr Parity EVEN
bit pattern: 001
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:78
constexpr unsigned NDIRS
Number of directions.
Definition coordinates.h:57
unsigned char dir_mask_t
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
This file defines all includes for HILA.
This files containts definitions for the Field class and the classes required to define it such as fi...
int myrank()
rank of this node
Definition com_mpi.cpp:235
int number_of_nodes()
how many nodes there are
Definition com_mpi.cpp:246
void reduce_node_sum(T *value, int send_count, bool allreduce=true)
Reduce an array across nodes.
Definition com_mpi.h:297
std::ostream out
this is our default output file stream
bc
list of field boundary conditions - used only if SPECIAL_BOUNDARY_CONDITIONS defined
Definition lattice.h:31
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...
Helper class for loading the vectorized lattice.
void setup(lattice_struct *lattice)
unsigned remap(unsigned i) const
And the call interface for remapping.
Information necessary to communicate with a node.
Definition lattice.h:134
general communication
Definition lattice.h:192
Information about the node stored on this process.
Definition lattice.h:71
void setup(node_info &ni, lattice_struct &lattice)
Fill in mynode fields – node_rank() must be set up OK.
Definition lattice.cpp:354
useful information about a node
Definition lattice.h:48