HILA
Loading...
Searching...
No Matches
field_comm.h
Go to the documentation of this file.
1/** @file field_comm.h */
2
3#ifndef FIELD_COMM_H
4#define FIELD_COMM_H
5
6#include "field.h"
7
8/////////////////////////////////////////////////////////////////////////////////
9/// This file collects implementations of field communication routines
10
11
12/////////////////////////////////////////////////////////////////////////////////////////
13/// Gather boundary elements for communication
14
15template <typename T>
17 Direction d, Parity par, T *RESTRICT buffer,
18 const lattice_struct::comm_node_struct &to_node) const {
19#ifndef VECTORIZED
20#ifdef SPECIAL_BOUNDARY_CONDITIONS
21 // note: -d in is_on_edge, because we're about to send stuff to that
22 // Direction (gathering from Direction +d)
23 if (boundary_condition[d] == hila::bc::ANTIPERIODIC && lattice.mynode.is_on_edge(-d)) {
24 payload.gather_comm_elements(buffer, to_node, par, lattice, true);
25 } else {
26 payload.gather_comm_elements(buffer, to_node, par, lattice, false);
27 }
28#else
29 payload.gather_comm_elements(buffer, to_node, par, lattice, false);
30#endif
31
32#else
33 // this is vectorized branch
34 bool antiperiodic = false;
35#ifdef SPECIAL_BOUNDARY_CONDITIONS
36 if (boundary_condition[d] == hila::bc::ANTIPERIODIC && lattice.mynode.is_on_edge(-d)) {
37 antiperiodic = true;
38 }
39#endif
40
42 // now vectorized layout
43 if (vector_lattice->is_boundary_permutation[abs(d)]) {
44 // with boundary permutation need to gather elems 1-by-1
45 int n;
46 const unsigned *index_list = to_node.get_sitelist(par, n);
47 if (!antiperiodic) {
48 payload.gather_elements(buffer, index_list, n, lattice);
49 } else {
50 payload.gather_elements_negated(buffer, index_list, n, lattice);
51 }
52 } else {
53 // without it, can do the full block
54 payload.gather_comm_vectors(buffer, to_node, par, vector_lattice, antiperiodic);
55 }
56 } else {
57 // not vectoizable, standard methods
58 int n;
59 const unsigned *index_list = to_node.get_sitelist(par, n);
60 if (!antiperiodic)
61 payload.gather_elements(buffer, index_list, n, lattice);
62 else {
63 payload.gather_elements_negated(buffer, index_list, n, lattice);
64 }
65 }
66#endif
67}
68
69/////////////////////////////////////////////////////////////////////////////////////////
70/// Place boundary elements from neighbour
71
72template <typename T>
74 Direction d, Parity par, T *RESTRICT buffer,
75 const lattice_struct::comm_node_struct &from_node) {
76
77#ifdef VECTORIZED
79 // now vectorized layout, act accordingly
80 if (vector_lattice->is_boundary_permutation[abs(d)]) {
81 payload.place_recv_elements(buffer, d, par, vector_lattice);
82 } else {
83 // nothing to do here, comms directly in place
84 }
85 } else {
86 // non-vectorized, using vanilla method, again nothing to do
87 }
88#else
89 // this one is only for CUDA
90 payload.place_comm_elements(d, par, buffer, from_node, lattice);
91#endif
92 // #endif
93}
94
95/////////////////////////////////////////////////////////////////////////////////////////
96/// Place boundary elements from local lattice (used in vectorized version)
97
98template <typename T>
100
101#ifdef SPECIAL_BOUNDARY_CONDITIONS
102 bool antiperiodic =
103 (boundary_condition[dir] == hila::bc::ANTIPERIODIC && lattice.mynode.is_on_edge(dir));
104#else
105 bool antiperiodic = false;
106#endif
107 payload.set_local_boundary_elements(dir, par, lattice, antiperiodic);
108}
109
110
111/////////////////////////////////////////////////////////////////////////////////////////
112/// get the receive buffer pointer for the communication.
113
114template <typename T>
116 const lattice_struct::comm_node_struct &from_node) {
117#if defined(VANILLA)
118
119 return (T *)payload.get_buffer() + from_node.offset(par);
120
121#elif defined(CUDA) || defined(HIP)
122
123 unsigned offs = 0;
124 if (par == ODD)
125 offs = from_node.sites / 2;
126 if (receive_buffer[d] == nullptr) {
127 receive_buffer[d] = payload.allocate_mpi_buffer(from_node.sites);
128 }
129 return receive_buffer[d] + offs;
130
131#elif defined(VECTORIZED)
132
134 // use vanilla type, field laid out in std fashion
135 return (T *)payload.get_buffer() + from_node.offset(par);
136 } else {
137 unsigned offs = 0;
138 if (par == ODD)
139 offs = from_node.sites / 2;
140
141 if (vector_lattice->is_boundary_permutation[abs(d)]) {
142 // extra copy operation needed
143 if (receive_buffer[d] == nullptr) {
144 receive_buffer[d] = payload.allocate_mpi_buffer(from_node.sites);
145 }
146 return receive_buffer[d] + offs;
147 } else {
148 // directly to halo buffer
149 constexpr unsigned vector_size = hila::vector_info<T>::vector_size;
150 return ((T *)payload.get_buffer() +
151 (vector_lattice->halo_offset[d] * vector_size + offs));
152 }
153 }
154#endif
155} // end of get_receive_buffer
156
157
158#define NAIVE_SHIFT
159#if defined(NAIVE_SHIFT)
160
161template <typename T>
162Field<T> &Field<T>::shift(const CoordinateVector &v, Field<T> &res, const Parity par) const {
163
164 // use this to store remaining moves
165 CoordinateVector rem = v;
166
167 // check the parity of the move
168 Parity par_s;
169
170 int len = 0;
171 foralldir(d) len += abs(rem[d]);
172
173 // no move, just copy field
174 if (len == 0) {
175 res = *this;
176 return res;
177 }
178
179 // opp_parity(ALL) == ALL
180 if (len % 2 == 0)
181 par_s = opp_parity(par);
182 else
183 par_s = par;
184
185 // is this already gathered from one of the dirs in v?
186 bool found_dir = false;
187 Direction mdir;
188 foralldir(d) {
189 if (rem[d] > 0 && gather_status(par_s, d) != gather_status_t::NOT_DONE) {
190 mdir = d;
191 found_dir = true;
192 break;
193 } else if (rem[d] < 0 && gather_status(par_s, -d) != gather_status_t::NOT_DONE) {
194 mdir = -d;
195 found_dir = true;
196 break;
197 }
198 }
199
200 if (!found_dir) {
201 // now did not find a 'ready' dir. Take the 1st available
202 foralldir(d) {
203 if (rem[d] > 0) {
204 mdir = d;
205 break;
206 } else if (rem[d] < 0) {
207 mdir = -d;
208 break;
209 }
210 }
211 }
212
213 // Len 1, copy directly
214 if (len == 1) {
215 res[par_s] = (*this)[X + mdir];
216 return res;
217 }
218
219 // now longer - need buffer
220 Field<T> r1;
221 Field<T> *from, *to;
222
223 // this ensures that the final move lands on res
224 if (len % 2 == 0) {
225 from = &r1;
226 to = &res;
227 } else {
228 from = &res;
229 to = &r1;
230 }
231 // and copy initially to "from"
232 (*from)[par_s] = (*this)[X + mdir];
233
234 // and subtract remaining moves from rem
235 rem = rem - mdir;
236 par_s = opp_parity(par_s);
237
238 foralldir(d) {
239 if (rem[d] != 0) {
240 mdir = (rem[d] > 0) ? d : -d;
241
242 while (rem[d] != 0) {
243
244 (*to)[par_s] = (*from)[X + mdir];
245
246 par_s = opp_parity(par_s);
247 rem = rem - mdir;
248 std::swap(to, from);
249 }
250 }
251 }
252
253 return res;
254}
255
256#endif // NAIVE_SHIFT
257
258/// start_gather(): Communicate the field at Parity par from Direction
259/// d. Uses accessors to prevent dependency on the layout.
260/// return the Direction mask bits where something is happening
261template <typename T>
263
264 // get the mpi message tag right away, to ensure that we are always synchronized
265 // with the mpi calls -- some nodes might not need comms, but the tags must be in
266 // sync
267
268 int tag = get_next_msg_tag();
269
271 lattice_struct::comm_node_struct &from_node = ci.from_node;
272 lattice_struct::comm_node_struct &to_node = ci.to_node;
273
274 // check if this is done - either gathered or no comm to be done in the 1st place
275
276 if (is_gathered(d, p)) {
277 lattice.n_gather_avoided++;
278 return 0; // nothing to wait for
279 }
280
281 // No comms to do, nothing to wait for -- we'll use the is_gathered
282 // status to keep track of vector boundary shuffle anyway
283
284 if (from_node.rank == hila::myrank() && to_node.rank == hila::myrank()) {
285 fs->set_local_boundary_elements(d, p);
286 mark_gathered(d, p);
287 return 0;
288 }
289
290 // if this parity or ALL-type gather is going on nothing to be done
291 if (!gather_not_done(d, p) || !gather_not_done(d, ALL)) {
292 lattice.n_gather_avoided++;
293 return get_dir_mask(d); // nothing to do, but still need to wait
294 }
295
296 Parity par = p;
297 // if p is ALL but ODD or EVEN is going on/done, turn off parity which is not needed
298 // corresponding wait must do the same thing
299 if (p == ALL) {
300 if (!gather_not_done(d, EVEN) && !gather_not_done(d, ODD)) {
301 // even and odd are going on or ready, nothing to be done
302 lattice.n_gather_avoided++;
303 return get_dir_mask(d);
304 }
305 if (!gather_not_done(d, EVEN))
306 par = ODD;
307 else if (!gather_not_done(d, ODD))
308 par = EVEN;
309 // if neither is the case par = ALL
310 }
311
312 mark_gather_started(d, par);
313
314 // Communication hasn't been started yet, do it now
315
316 int par_i = static_cast<int>(par) - 1; // index to dim-3 arrays
317
318 constexpr size_t size = sizeof(T);
319
320 T *receive_buffer;
321 T *send_buffer;
322
323 size_t size_type;
324 MPI_Datatype mpi_type = get_MPI_number_type<T>(size_type);
325
326 if (from_node.rank != hila::myrank() && boundary_need_to_communicate(d)) {
327
328 // HANDLE RECEIVES: get node which will send here
329
330 // buffer can be separate or in Field buffer
331 receive_buffer = fs->get_receive_buffer(d, par, from_node);
332
333 // size_t n = from_node.n_sites(par) * size / size_type;
334 size_t n = from_node.n_sites(par) * size;
335
336 if (n >= (1ULL << 31)) {
337 hila::out << "Too large MPI message! Size " << n << '\n';
339 }
340
341 post_receive_timer.start();
342
343 // c++ version does not return errors
344 // was mpi_type
345 MPI_Irecv(receive_buffer, (int)n, MPI_BYTE, from_node.rank, tag, lattice.mpi_comm_lat,
346 &fs->receive_request[par_i][d]);
347
348 post_receive_timer.stop();
349 }
350
351 if (to_node.rank != hila::myrank() && boundary_need_to_communicate(-d)) {
352 // HANDLE SENDS: Copy Field elements on the boundary to a send buffer and send
353
354 unsigned sites = to_node.n_sites(par);
355
356 if (fs->send_buffer[d] == nullptr)
357 fs->send_buffer[d] = fs->payload.allocate_mpi_buffer(to_node.sites);
358
359 send_buffer = fs->send_buffer[d] + to_node.offset(par);
360
361#ifndef MPI_BENCHMARK_TEST
362 fs->gather_comm_elements(d, par, send_buffer, to_node);
363#endif
364
365 // size_t n = sites * size / size_type;
366 size_t n = sites * size;
367
368#ifdef GPU_AWARE_MPI
369 gpuStreamSynchronize(0);
370 // gpuDeviceSynchronize();
371#endif
372
373 start_send_timer.start();
374
375 // was mpi_type
376 MPI_Isend(send_buffer, (int)n, MPI_BYTE, to_node.rank, tag, lattice.mpi_comm_lat,
377 &fs->send_request[par_i][d]);
378
379 start_send_timer.stop();
380 }
381
382 // and do the boundary shuffle here, after MPI has started
383 // NOTE: there should be no danger of MPI and shuffle overwriting, MPI writes
384 // to halo buffers only if no permutation is needed. With a permutation MPI
385 // uses special receive buffer
386#ifndef MPI_BENCHMARK_TEST
387 fs->set_local_boundary_elements(d, par);
388#endif
389
390 return get_dir_mask(d);
391}
392
393/// @internal
394/// wait_gather(): Wait for communication at parity par from
395/// Direction d completes the communication in the function.
396///
397/// NOTE: This will be called even if the field is marked const.
398/// Therefore this function is const, even though it does change
399/// the internal content of the field, the halo. From the point
400/// of view of the user, the value of the field does not change.
401template <typename T>
402void Field<T>::wait_gather(Direction d, Parity p) const {
403
405 lattice_struct::comm_node_struct &from_node = ci.from_node;
406 lattice_struct::comm_node_struct &to_node = ci.to_node;
407
408 // check if this is done - either gathered or no comm to be done in the 1st place
409 if (is_gathered(d, p))
410 return;
411
412 // this is the branch if no comms -- shuffle was done in start_gather
413 if (from_node.rank == hila::myrank() && to_node.rank == hila::myrank())
414 return;
415
416 // if (!is_gather_started(d,p)) {
417 // hila::out0 << "Wait gather error - wait_gather without corresponding
418 // start_gather\n"; exit(1);
419 // }
420
421 // Note: the move can be Parity p OR ALL -- need to wait for it in any case
422 // set par to be the "sum" over both parities
423 // There never should be ongoing ALL and other parity gather -- start_gather takes
424 // care
425
426 // check here consistency, this should never happen
427 assert(!(p != ALL && is_gather_started(d, p) && is_gather_started(d, ALL)));
428
429 Parity par;
430 int n_wait = 1;
431 // what par to wait for?
432 if (is_gather_started(d, p))
433 par = p; // standard match
434 else if (p != ALL) {
435 if (is_gather_started(d, ALL))
436 par = ALL; // if all is running wait for it
437 else {
438 exit(1);
439 }
440 } else {
441 // now p == ALL and ALL is not running
442 if (is_gathered(d, EVEN) && is_gather_started(d, ODD))
443 par = ODD;
444 else if (is_gathered(d, ODD) && is_gather_started(d, EVEN))
445 par = EVEN;
446 else if (is_gather_started(d, EVEN) && is_gather_started(d, ODD)) {
447 n_wait = 2; // need to wait for both!
448 par = EVEN; // will be flipped
449 } else {
450 exit(1);
451 }
452 }
453
454 for (int wait_i = 0; wait_i < n_wait; ++wait_i) {
455
456 int par_i = (int)par - 1;
457
458 if (from_node.rank != hila::myrank() && boundary_need_to_communicate(d)) {
459 wait_receive_timer.start();
460
461 MPI_Status status;
462 MPI_Wait(&fs->receive_request[par_i][d], &status);
463
464 wait_receive_timer.stop();
465
466#if !defined(VANILLA) && !defined(MPI_BENCHMARK_TEST)
467 fs->place_comm_elements(d, par, fs->get_receive_buffer(d, par, from_node), from_node);
468#endif
469 }
470
471 // then wait for the sends
472 if (to_node.rank != hila::myrank() && boundary_need_to_communicate(-d)) {
473 wait_send_timer.start();
474 MPI_Status status;
475 MPI_Wait(&fs->send_request[par_i][d], &status);
476 wait_send_timer.stop();
477 }
478
479 // Mark the parity gathered from Direction dir
480 mark_gathered(d, par);
481
482 // Keep count of communications
483 lattice.n_gather_done += 1;
484
485 par = opp_parity(par); // flip if 2 loops
486 }
487}
488
489
490/// Gather a list of elements to a single node
491/// coord_list must be same on all nodes, buffer is needed only on "root"
492template <typename T>
494 const std::vector<CoordinateVector> &coord_list,
495 int root) const {
496
497 std::vector<unsigned> index_list;
498 std::vector<int> sites_on_rank(lattice.n_nodes());
499 std::vector<int> reshuffle_list(coord_list.size());
500
501 std::fill(sites_on_rank.begin(), sites_on_rank.end(), 0);
502
503 int nranks = 0;
504
505 int i = 0;
506 for (const CoordinateVector &c : coord_list) {
507 int rank = lattice.node_rank(c);
508 if (hila::myrank() == rank) {
509 index_list.push_back(lattice.site_index(c));
510 }
511
512 if (sites_on_rank[rank] == 0 && rank != root)
513 nranks++;
514 sites_on_rank[rank]++;
515 reshuffle_list[i++] = rank;
516 }
517
518 std::vector<T> send_buffer(index_list.size());
519 payload.gather_elements((T *)send_buffer.data(), index_list.data(), send_buffer.size(),
520 lattice);
521 if (hila::myrank() != root && sites_on_rank[hila::myrank()] > 0) {
522 MPI_Send((char *)send_buffer.data(), sites_on_rank[hila::myrank()] * sizeof(T), MPI_BYTE,
523 root, hila::myrank(), lattice.mpi_comm_lat);
524 }
525 if (hila::myrank() == root) {
526
527 // allocate buffer for receiving data
528 T *b;
529 std::vector<T> pb(coord_list.size() - sites_on_rank[root]);
530 b = pb.data();
531 // vector for node ptrs -- point to stuff from nodes
532 std::vector<T *> nptr(lattice.n_nodes());
533
534 std::vector<MPI_Request> mpi_req(nranks);
535 int nreqs = 0;
536 for (int n = 0; n < sites_on_rank.size(); n++) {
537 if (sites_on_rank[n] > 0) {
538 if (n != root) {
539 MPI_Status status;
540 MPI_Irecv(b, (int)(sites_on_rank[n] * sizeof(T)), MPI_BYTE, n, n,
541 lattice.mpi_comm_lat, &mpi_req[nreqs++]);
542
543 nptr[n] = b;
544 b += sites_on_rank[n];
545
546 } else {
547
548 nptr[n] = send_buffer.data();
549 }
550 }
551 }
552
553 if (nreqs > 0) {
554 std::vector<MPI_Status> stat_arr(nreqs);
555 MPI_Waitall(nreqs, mpi_req.data(), stat_arr.data());
556 }
557
558 // copy the data from bp to buffer, reordering
559 for (int i = 0; i < coord_list.size(); i++) {
560 buffer[i] = *nptr[reshuffle_list[i]];
561 nptr[reshuffle_list[i]]++;
562 }
563 }
564}
565
566/// Send elements from a single node to a list of coordinates
567/// coord_list must be the same on all nodes, but buffer is needed only on "root"!
568
569template <typename T>
571 const std::vector<CoordinateVector> &coord_list,
572 int root) {
573
574 std::vector<unsigned> index_list;
575 std::vector<int> sites_on_rank(lattice.n_nodes());
576 std::vector<int> reshuffle_list(coord_list.size());
577 std::fill(sites_on_rank.begin(), sites_on_rank.end(), 0);
578
579 int nranks = 0;
580 int i = 0;
581 for (CoordinateVector c : coord_list) {
582 int rank = lattice.node_rank(c);
583 if (hila::myrank() == rank) {
584 index_list.push_back(lattice.site_index(c));
585 }
586
587 if (sites_on_rank[rank] == 0 && rank != root)
588 nranks++;
589 sites_on_rank[rank]++;
590 reshuffle_list[i++] = rank;
591 }
592
593 // payload.gather_elements((T *)recv_buffer.data(), index_list.data(),
594 // recv_buffer.size(), lattice);
595
596 if (hila::myrank() != root && sites_on_rank[hila::myrank()] > 0) {
597 std::vector<T> recv_buffer(index_list.size());
598 MPI_Status status;
599
600 MPI_Recv((char *)recv_buffer.data(), sites_on_rank[hila::myrank()] * sizeof(T), MPI_BYTE,
601 root, hila::myrank(), lattice.mpi_comm_lat, &status);
602
603 payload.place_elements((T *)recv_buffer.data(), index_list.data(), recv_buffer.size(),
604 lattice);
605 }
606 if (hila::myrank() == root) {
607 // reordering buffers
608 std::vector<T> pb(coord_list.size());
609 // vector for node counters -- point to stuff from nodes
610 std::vector<unsigned> nloc(lattice.n_nodes());
611 std::vector<unsigned> ncount(lattice.n_nodes());
612 nloc[0] = ncount[0] = 0;
613
614 for (int n = 1; n < lattice.n_nodes(); n++) {
615 nloc[n] = nloc[n - 1] + sites_on_rank[n - 1];
616 ncount[n] = 0;
617 }
618 for (int i = 0; i < coord_list.size(); i++) {
619 int node = reshuffle_list[i];
620 pb[nloc[node] + ncount[node]] = buffer[i];
621 ncount[node]++;
622 }
623
624 std::vector<MPI_Request> mpi_req(nranks);
625 int nreqs = 0;
626 for (int n = 0; n < sites_on_rank.size(); n++) {
627 if (sites_on_rank[n] > 0) {
628 if (n != root) {
629 MPI_Isend(pb.data() + nloc[n], (int)(sites_on_rank[n] * sizeof(T)), MPI_BYTE, n,
630 n, lattice.mpi_comm_lat, &mpi_req[nreqs++]);
631 }
632 }
633 }
634
635 payload.place_elements(pb.data() + nloc[root], index_list.data(), index_list.size(),
636 lattice);
637
638 if (nreqs > 0) {
639 std::vector<MPI_Status> stat_arr(nreqs);
640 MPI_Waitall(nreqs, mpi_req.data(), stat_arr.data());
641 }
642 }
643}
644
645template <typename T>
646void Field<T>::set_elements(const std::vector<T> &elements,
647 const std::vector<CoordinateVector> &coord_list) {
648 assert(elements.size() == coord_list.size() && "vector size mismatch in set_elments");
649 std::vector<unsigned> my_indexes;
650 std::vector<T> my_elements;
651 for (int i = 0; i < coord_list.size(); i++) {
652 CoordinateVector c = coord_list[i];
653 if (lattice.is_on_mynode(c)) {
654 my_indexes.push_back(lattice.site_index(c));
655 my_elements.push_back(elements[i]);
656 }
657 }
658 fs->payload.place_elements(my_elements.data(), my_indexes.data(), my_indexes.size(), lattice);
659 mark_changed(ALL);
660}
661
662
663/// Get a list of elements and store them into a vector
664template <typename T>
665std::vector<T> Field<T>::get_elements(const std::vector<CoordinateVector> &coord_list,
666 bool bcast) const {
667
668 std::vector<T> res;
669 if (hila::myrank() == 0)
670 res.resize(coord_list.size());
671
672 fs->gather_elements(res.data(), coord_list);
673 if (bcast)
674 hila::broadcast(res);
675
676 return res;
677}
678
679
680/// get a subvolume of the field elements to all nodes
681template <typename T>
682std::vector<T> Field<T>::get_subvolume(const CoordinateVector &cmin, const CoordinateVector &cmax,
683 bool bcast) const {
684
685 size_t vol = 1;
686 foralldir(d) {
687 vol *= cmax[d] - cmin[d] + 1;
688 assert(cmax[d] >= cmin[d] && cmin[d] >= 0 && cmax[d] < lattice.size(d));
689 }
690 std::vector<CoordinateVector> clist(vol);
692
693 size_t i = 0;
694 forcoordinaterange(c, cmin, cmax) {
695 clist[i++] = c;
696 }
697 return get_elements(clist, bcast);
698}
699
700
701/// and get a slice (subvolume)
702template <typename T>
703std::vector<T> Field<T>::get_slice(const CoordinateVector &c, bool bcast) const {
704 CoordinateVector cmin, cmax;
705 foralldir(d) if (c[d] < 0) {
706 cmin[d] = 0;
707 cmax[d] = lattice.size(d) - 1;
708 }
709 else {
710 cmin[d] = cmax[d] = c[d];
711 }
712 return get_subvolume(cmin, cmax, bcast);
713}
714
715
716//////////////////////////////////////////////////////////////////////////////////
717/// @internal
718/// Copy the local (mpi process) data to a "logical array"
719/// on gpu code, copies to host
720
721template <typename T>
722void Field<T>::copy_local_data(std::vector<T> &buffer) const {
723
724 // copy to local variables to avoid lattice ptr
725 CoordinateVector nmin = lattice.mynode.min;
726 Vector<NDIM, unsigned> nmul = lattice.mynode.size_factor;
727
728 buffer.resize(lattice.mynode.volume());
729#if defined(CUDA) || defined(HIP)
730 // d_malloc mallocs from device if needed
731 T *data = (T *)d_malloc(sizeof(T) * lattice.mynode.volume());
732#else
733 T *data = buffer.data();
734#endif
735
736#pragma hila novector direct_access(data)
737 onsites(ALL) {
739 nodec = X.coordinates() - nmin;
740
741 unsigned i = nodec.dot(nmul);
742 data[i] = (*this)[X];
743 }
744
745#if defined(CUDA) || defined(HIP)
746 gpuMemcpy(buffer.data(), data, sizeof(T) * lattice.mynode.volume(), gpuMemcpyDeviceToHost);
747 d_free(data);
748#endif
749}
750
751////////////////////////////////////////////////////////////////////////////////////
752/// @internal
753/// set the local data from an array
754
755template <typename T>
756void Field<T>::set_local_data(const std::vector<T> &buffer) {
757
758 // copy to local variables to avoid lattice ptr
759 CoordinateVector nmin = lattice.mynode.min;
760 Vector<NDIM, unsigned> nmul = lattice.mynode.size_factor;
761
762 assert(buffer.size() >= lattice.mynode.volume());
763
764#if defined(CUDA) || defined(HIP)
765 // d_malloc mallocs from device if needed
766 T *data = (T *)d_malloc(sizeof(T) * lattice.mynode.volume());
767 gpuMemcpy(data, buffer.data(), sizeof(T) * lattice.mynode.volume(), gpuMemcpyHostToDevice);
768#else
769 T *data = buffer.data();
770#endif
771
772#pragma hila novector direct_access(data)
773 onsites(ALL) {
775 nodec = X.coordinates() - nmin;
776
777 unsigned i = nodec.dot(nmul);
778 (*this)[X] = data[i];
779 }
780
781#if defined(CUDA) || defined(HIP)
782 d_free(data);
783#endif
784
785 this->mark_changed(ALL);
786}
787
788
789////////////////////////////////////////////////////////////////////////////////////
790/// Copy local data with halo - useful for visualization
791
792template <typename T>
793inline void collect_field_halo_data_(T *data, const Field<T> &src, Field<T> &dest,
794 const Vector<NDIM, int> &dirs, int ndir) {
795
796 // get the coords of the min point of the halo array
797 CoordinateVector nmin = lattice.mynode.min;
798 nmin.asArray() -= 1;
799
800 // construct the mult vector to access the data
802 nmul.e(0) = 1;
803 for (int i = 1; i < NDIM; i++)
804 nmul.e(i) = nmul.e(i - 1) * (lattice.mynode.size[i - 1] + 2);
805
806
807 Vector<NDIM, int> node_min;
808 Vector<NDIM, int> node_max;
809 foralldir(d) {
810 node_min[d] = lattice.mynode.min[d];
811 node_max[d] = lattice.mynode.min[d] + lattice.mynode.size[d] - 1;
812 }
813
814#pragma hila novector direct_access(data)
815 onsites(ALL) {
817 CoordinateVector c = X.coordinates();
818 bool gotit = true;
819
820 for (int i = 0; i < ndir; i++) {
821 Direction d = (Direction)dirs[i];
822 if (c.e(d) == node_min[d]) {
823 c.e(d) -= 1;
824 } else if (c.e(d) == node_max[d]) {
825 c.e(d) += 1;
826 } else
827 gotit = false;
828 }
829
830 Direction d = (Direction)dirs[ndir];
831 if (gotit && c.e(d) == node_min[d]) {
832 c.e(d) -= 1;
833 nodec = c - nmin;
834 data[nodec.dot(nmul)] = src[X - d];
835 dest[X] = src[X - d];
836
837 } else if (gotit && c.e(d) == node_max[d]) {
838 c.e(d) += 1;
839 nodec = c - nmin;
840 data[nodec.dot(nmul)] = src[X + d];
841 dest[X] = src[X + d];
842 }
843 }
844}
845
846
847/////////////////////////////////////////////////////////////////////////////////////////
848
849template <typename T>
850void Field<T>::copy_local_data_with_halo(std::vector<T> &buffer) const {
851
852 // get the coords of the min point of the halo array
853 CoordinateVector nmin = lattice.mynode.min;
854 nmin.asArray() -= 1;
855
856 // construct the mult vector to access the data
858 nmul.e(0) = 1;
859 for (int i = 1; i < NDIM; i++)
860 nmul.e(i) = nmul.e(i - 1) * (lattice.mynode.size[i - 1] + 2);
861
862 // full size of the buffer
863 size_t siz = 1;
864 foralldir(d) siz *= (lattice.mynode.size[d] + 2);
865
866 buffer.resize(siz);
867#if defined(CUDA) || defined(HIP)
868 // d_malloc mallocs from device if needed
869 T *data = (T *)d_malloc(sizeof(T) * siz);
870#else
871 T *data = buffer.data();
872#endif
873
874 // now collect bulk
875#pragma hila novector direct_access(data)
876 onsites(ALL) {
878 nodec = X.coordinates() - nmin;
879
880 unsigned i = nodec.dot(nmul);
881 data[i] = (*this)[X];
882 }
883
884 // collect nn-halos
885
886 Field<T> corners = 0;
887 Field<T> corner2 = 0;
888#if NDIM > 2
889 Field<T> corner3 = 0;
890#if NDIM > 3
891 Field<T> corner4 = 0;
892#endif
893#endif
894
896 foralldir(d1) {
897 dirs[0] = d1;
898 // gather d1 halo
899 collect_field_halo_data_(data, (*this), corners, dirs, 0);
900
901 for (int d2 = d1 + 1; d2 < NDIM; ++d2) {
902 dirs[1] = d2;
903 collect_field_halo_data_(data, corners, corner2, dirs, 1);
904#if NDIM > 2
905 for (int d3 = d2 + 1; d3 < NDIM; ++d3) {
906 dirs[2] = d3;
907 collect_field_halo_data_(data, corner2, corner3, dirs, 2);
908#if NDIM > 3
909 for (int d4 = d3 + 1; d4 < NDIM; ++d4) {
910 dirs[3] = d4;
911 collect_field_halo_data_(data, corner3, corner4, dirs, 3);
912 }
913#endif
914 }
915#endif
916 }
917 }
918
919
920#if defined(CUDA) || defined(HIP)
921 gpuMemcpy(buffer.data(), data, sizeof(T) * siz, gpuMemcpyDeviceToHost);
922 d_free(data);
923#endif
924}
925
926
927#endif
The field class implements the standard methods for accessing Fields. Hilapp replaces the parity acce...
Definition field.h:61
std::vector< T > get_subvolume(const CoordinateVector &cmin, const CoordinateVector &cmax, bool broadcast=false) const
get a subvolume of the field elements to all nodes
Definition field_comm.h:682
Field< T > & shift(const CoordinateVector &v, Field< T > &r, Parity par) const
Create a periodically shifted copy of the field.
Definition field_comm.h:162
dir_mask_t start_gather(Direction d, Parity p=ALL) const
Definition field_comm.h:262
std::vector< T > get_elements(const std::vector< CoordinateVector > &coord_list, bool broadcast=false) const
Get a list of elements and store them into a vector.
Definition field_comm.h:665
std::vector< T > get_slice(const CoordinateVector &c, bool broadcast=false) const
and get a slice (subvolume)
Definition field_comm.h:703
T e(const int i, const int j) const
Standard array indexing operation for matrices and vectors.
Definition matrix.h:286
R dot(const Matrix< p, q, S > &rhs) const
Dot product.
Definition matrix.h:1220
const Array< n, m, T > & asArray() const
Cast Matrix to Array.
Definition matrix.h:443
Matrix class which defines matrix operations.
Definition matrix.h:1620
bool is_on_mynode(const CoordinateVector &c) const
Is the coordinate on THIS node.
Definition lattice.cpp:118
std::array< nn_comminfo_struct, NDIRS > nn_comminfo
nearest neighbour comminfo struct
Definition lattice.h:200
unsigned site_index(const CoordinateVector &c) const
Definition lattice.cpp:136
T abs(const Complex< T > &a)
Return absolute value of Complex number.
Definition cmplx.h:1187
Parity
Parity enum with values EVEN, ODD, ALL; refers to parity of the site. Parity of site (x,...
constexpr Parity EVEN
bit pattern: 001
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:78
constexpr Parity ODD
bit pattern: 010
unsigned char dir_mask_t
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
constexpr Parity ALL
bit pattern: 011
#define RESTRICT
Definition defs.h:51
This files containts definitions for the Field class and the classes required to define it such as fi...
void collect_field_halo_data_(T *data, const Field< T > &src, Field< T > &dest, const Vector< 4, int > &dirs, int ndir)
Copy local data with halo - useful for visualization.
Definition field_comm.h:793
int myrank()
rank of this node
Definition com_mpi.cpp:234
std::ostream out
this is our default output file stream
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
Definition com_mpi.h:152
void terminate(int status)
is_vectorizable_type<T>::value is always false if the target is not vectorizable
Definition vector_types.h:8
Information necessary to communicate with a node.
Definition lattice.h:134
nn-communication has only 1 node to talk to
Definition lattice.h:185
bool is_on_edge(Direction d) const
true if this node is on the edge of the lattice to dir d
Definition lattice.h:106