HILA
Loading...
Searching...
No Matches
field_comm.h
Go to the documentation of this file.
1/** @file field_comm.h */
2
3#ifndef FIELD_COMM_H
4#define FIELD_COMM_H
5
6#include "field.h"
7
8/////////////////////////////////////////////////////////////////////////////////
9/// This file collects implementations of field communication routines
10
11
12/////////////////////////////////////////////////////////////////////////////////////////
13/// Gather boundary elements for communication
14
15template <typename T>
17 Direction d, Parity par, T *RESTRICT buffer,
18 const lattice_struct::comm_node_struct &to_node) const {
19#ifndef VECTORIZED
20#ifdef SPECIAL_BOUNDARY_CONDITIONS
21 // note: -d in is_on_edge, because we're about to send stuff to that
22 // Direction (gathering from Direction +d)
23 if (boundary_condition[d] == hila::bc::ANTIPERIODIC && lattice.mynode.is_on_edge(-d)) {
24 payload.gather_comm_elements(buffer, to_node, par, lattice, true);
25 } else {
26 payload.gather_comm_elements(buffer, to_node, par, lattice, false);
27 }
28#else
29 payload.gather_comm_elements(buffer, to_node, par, lattice, false);
30#endif
31
32#else
33 // this is vectorized branch
34 bool antiperiodic = false;
35#ifdef SPECIAL_BOUNDARY_CONDITIONS
36 if (boundary_condition[d] == hila::bc::ANTIPERIODIC && lattice.mynode.is_on_edge(-d)) {
37 antiperiodic = true;
38 }
39#endif
40
42 // now vectorized layout
43 if (vector_lattice->is_boundary_permutation[abs(d)]) {
44 // with boundary permutation need to gather elems 1-by-1
45 int n;
46 const unsigned *index_list = to_node.get_sitelist(par, n);
47 if (!antiperiodic) {
48 payload.gather_elements(buffer, index_list, n, lattice);
49 } else {
50 payload.gather_elements_negated(buffer, index_list, n, lattice);
51 }
52 } else {
53 // without it, can do the full block
54 payload.gather_comm_vectors(buffer, to_node, par, vector_lattice, antiperiodic);
55 }
56 } else {
57 // not vectoizable, standard methods
58 int n;
59 const unsigned *index_list = to_node.get_sitelist(par, n);
60 if (!antiperiodic)
61 payload.gather_elements(buffer, index_list, n, lattice);
62 else {
63 payload.gather_elements_negated(buffer, index_list, n, lattice);
64 }
65 }
66#endif
67}
68
69/////////////////////////////////////////////////////////////////////////////////////////
70/// Place boundary elements from neighbour
71
72template <typename T>
74 Direction d, Parity par, T *RESTRICT buffer,
75 const lattice_struct::comm_node_struct &from_node) {
76
77#ifdef VECTORIZED
79 // now vectorized layout, act accordingly
80 if (vector_lattice->is_boundary_permutation[abs(d)]) {
81 payload.place_recv_elements(buffer, d, par, vector_lattice);
82 } else {
83 // nothing to do here, comms directly in place
84 }
85 } else {
86 // non-vectorized, using vanilla method, again nothing to do
87 }
88#else
89 // this one is only for CUDA
90 payload.place_comm_elements(d, par, buffer, from_node, lattice);
91#endif
92 // #endif
93}
94
95/////////////////////////////////////////////////////////////////////////////////////////
96/// Place boundary elements from local lattice (used in vectorized version)
97
98template <typename T>
100
101#ifdef SPECIAL_BOUNDARY_CONDITIONS
102 bool antiperiodic =
103 (boundary_condition[dir] == hila::bc::ANTIPERIODIC && lattice.mynode.is_on_edge(dir));
104#else
105 bool antiperiodic = false;
106#endif
107 payload.set_local_boundary_elements(dir, par, lattice, antiperiodic);
108}
109
110
111/////////////////////////////////////////////////////////////////////////////////////////
112/// get the receive buffer pointer for the communication.
113
114template <typename T>
116 const lattice_struct::comm_node_struct &from_node) {
117#if defined(VANILLA)
118
119 return (T *)payload.get_buffer() + from_node.offset(par);
120
121#elif defined(CUDA) || defined(HIP)
122
123 unsigned offs = 0;
124 if (par == ODD)
125 offs = from_node.sites / 2;
126 if (receive_buffer[d] == nullptr) {
127 receive_buffer[d] = payload.allocate_mpi_buffer(from_node.sites);
128 }
129 return receive_buffer[d] + offs;
130
131#elif defined(VECTORIZED)
132
134 // use vanilla type, field laid out in std fashion
135 return (T *)payload.get_buffer() + from_node.offset(par);
136 } else {
137 unsigned offs = 0;
138 if (par == ODD)
139 offs = from_node.sites / 2;
140
141 if (vector_lattice->is_boundary_permutation[abs(d)]) {
142 // extra copy operation needed
143 if (receive_buffer[d] == nullptr) {
144 receive_buffer[d] = payload.allocate_mpi_buffer(from_node.sites);
145 }
146 return receive_buffer[d] + offs;
147 } else {
148 // directly to halo buffer
149 constexpr unsigned vector_size = hila::vector_info<T>::vector_size;
150 return ((T *)payload.get_buffer() +
151 (vector_lattice->halo_offset[d] * vector_size + offs));
152 }
153 }
154#endif
155} // end of get_receive_buffer
156
157
158#define NAIVE_SHIFT
159#if defined(NAIVE_SHIFT)
160
161template <typename T>
162Field<T> &Field<T>::shift(const CoordinateVector &v, Field<T> &res, const Parity par) const {
163
164 // use this to store remaining moves
165 CoordinateVector rem = v;
166
167 // check the parity of the move
168 Parity par_s;
169
170 int len = 0;
171 foralldir(d) len += abs(rem[d]);
172
173 // no move, just copy field
174 if (len == 0) {
175 res = *this;
176 return res;
177 }
178
179 // opp_parity(ALL) == ALL
180 if (len % 2 == 0)
181 par_s = opp_parity(par);
182 else
183 par_s = par;
184
185 // is this already gathered from one of the dirs in v?
186 bool found_dir = false;
187 Direction mdir;
188 foralldir(d) {
189 if (rem[d] > 0 && gather_status(par_s, d) != gather_status_t::NOT_DONE) {
190 mdir = d;
191 found_dir = true;
192 break;
193 } else if (rem[d] < 0 && gather_status(par_s, -d) != gather_status_t::NOT_DONE) {
194 mdir = -d;
195 found_dir = true;
196 break;
197 }
198 }
199
200 if (!found_dir) {
201 // now did not find a 'ready' dir. Take the 1st available
202 foralldir(d) {
203 if (rem[d] > 0) {
204 mdir = d;
205 break;
206 } else if (rem[d] < 0) {
207 mdir = -d;
208 break;
209 }
210 }
211 }
212
213 // Len 1, copy directly
214 if (len == 1) {
215 res[par_s] = (*this)[X + mdir];
216 return res;
217 }
218
219 // now longer - need buffer
220 Field<T> r1;
221 Field<T> *from, *to;
222
223 // this ensures that the final move lands on res
224 if (len % 2 == 0) {
225 from = &r1;
226 to = &res;
227 } else {
228 from = &res;
229 to = &r1;
230 }
231 // and copy initially to "from"
232 (*from)[par_s] = (*this)[X + mdir];
233
234 // and subtract remaining moves from rem
235 rem = rem - mdir;
236 par_s = opp_parity(par_s);
237
238 foralldir(d) {
239 if (rem[d] != 0) {
240 mdir = (rem[d] > 0) ? d : -d;
241
242 while (rem[d] != 0) {
243
244 (*to)[par_s] = (*from)[X + mdir];
245
246 par_s = opp_parity(par_s);
247 rem = rem - mdir;
248 std::swap(to, from);
249 }
250 }
251 }
252
253 return res;
254}
255
256#endif // NAIVE_SHIFT
257
258/// start_gather(): Communicate the field at Parity par from Direction
259/// d. Uses accessors to prevent dependency on the layout.
260/// return the Direction mask bits where something is happening
261template <typename T>
263
264 // get the mpi message tag right away, to ensure that we are always synchronized
265 // with the mpi calls -- some nodes might not need comms, but the tags must be in
266 // sync
267
268 int tag = get_next_msg_tag();
269
271 lattice_struct::comm_node_struct &from_node = ci.from_node;
272 lattice_struct::comm_node_struct &to_node = ci.to_node;
273
274 // check if this is done - either gathered or no comm to be done in the 1st place
275
276 if (is_gathered(d, p)) {
277 lattice.n_gather_avoided++;
278 return 0; // nothing to wait for
279 }
280
281 // No comms to do, nothing to wait for -- we'll use the is_gathered
282 // status to keep track of vector boundary shuffle anyway
283
284 if (from_node.rank == hila::myrank() && to_node.rank == hila::myrank()) {
285 fs->set_local_boundary_elements(d, p);
286 mark_gathered(d, p);
287 return 0;
288 }
289
290 // if this parity or ALL-type gather is going on nothing to be done
291 if (!gather_not_done(d, p) || !gather_not_done(d, ALL)) {
292 lattice.n_gather_avoided++;
293 return get_dir_mask(d); // nothing to do, but still need to wait
294 }
295
296 Parity par = p;
297 // if p is ALL but ODD or EVEN is going on/done, turn off parity which is not needed
298 // corresponding wait must do the same thing
299 if (p == ALL) {
300 if (!gather_not_done(d, EVEN) && !gather_not_done(d, ODD)) {
301 // even and odd are going on or ready, nothing to be done
302 lattice.n_gather_avoided++;
303 return get_dir_mask(d);
304 }
305 if (!gather_not_done(d, EVEN))
306 par = ODD;
307 else if (!gather_not_done(d, ODD))
308 par = EVEN;
309 // if neither is the case par = ALL
310 }
311
312 mark_gather_started(d, par);
313
314 // Communication hasn't been started yet, do it now
315
316 int par_i = static_cast<int>(par) - 1; // index to dim-3 arrays
317
318 constexpr size_t size = sizeof(T);
319
320 T *receive_buffer;
321 T *send_buffer;
322
323 size_t size_type;
324 MPI_Datatype mpi_type = get_MPI_number_type<T>(size_type);
325
326 if (from_node.rank != hila::myrank() && boundary_need_to_communicate(d)) {
327
328 // HANDLE RECEIVES: get node which will send here
329
330 // buffer can be separate or in Field buffer
331 receive_buffer = fs->get_receive_buffer(d, par, from_node);
332
333 size_t n = from_node.n_sites(par) * size / size_type;
334
335 if (n >= (1ULL << 31)) {
336 hila::out << "Too large MPI message! Size " << n << '\n';
338 }
339
340 post_receive_timer.start();
341
342 // c++ version does not return errors
343 MPI_Irecv(receive_buffer, (int)n, mpi_type, from_node.rank, tag, lattice.mpi_comm_lat,
344 &fs->receive_request[par_i][d]);
345
346 post_receive_timer.stop();
347 }
348
349 if (to_node.rank != hila::myrank() && boundary_need_to_communicate(-d)) {
350 // HANDLE SENDS: Copy Field elements on the boundary to a send buffer and send
351
352 unsigned sites = to_node.n_sites(par);
353
354 if (fs->send_buffer[d] == nullptr)
355 fs->send_buffer[d] = fs->payload.allocate_mpi_buffer(to_node.sites);
356
357 send_buffer = fs->send_buffer[d] + to_node.offset(par);
358
359 fs->gather_comm_elements(d, par, send_buffer, to_node);
360
361 size_t n = sites * size / size_type;
362#ifdef GPU_AWARE_MPI
363 gpuStreamSynchronize(0);
364 // gpuDeviceSynchronize();
365#endif
366
367 start_send_timer.start();
368
369 MPI_Isend(send_buffer, (int)n, mpi_type, to_node.rank, tag, lattice.mpi_comm_lat,
370 &fs->send_request[par_i][d]);
371
372 start_send_timer.stop();
373 }
374
375 // and do the boundary shuffle here, after MPI has started
376 // NOTE: there should be no danger of MPI and shuffle overwriting, MPI writes
377 // to halo buffers only if no permutation is needed. With a permutation MPI
378 // uses special receive buffer
379 fs->set_local_boundary_elements(d, par);
380
381 return get_dir_mask(d);
382}
383
384/// @internal
385/// wait_gather(): Wait for communication at parity par from
386/// Direction d completes the communication in the function.
387/// If the communication has not started yet, also calls
388/// start_gather()
389///
390/// NOTE: This will be called even if the field is marked const.
391/// Therefore this function is const, even though it does change
392/// the internal content of the field, the halo. From the point
393/// of view of the user, the value of the field does not change.
394template <typename T>
395void Field<T>::wait_gather(Direction d, Parity p) const {
396
398 lattice_struct::comm_node_struct &from_node = ci.from_node;
399 lattice_struct::comm_node_struct &to_node = ci.to_node;
400
401 // check if this is done - either gathered or no comm to be done in the 1st place
402 if (is_gathered(d, p))
403 return;
404
405 // this is the branch if no comms -- shuffle was done in start_gather
406 if (from_node.rank == hila::myrank() && to_node.rank == hila::myrank())
407 return;
408
409 // if (!is_gather_started(d,p)) {
410 // hila::out0 << "Wait gather error - wait_gather without corresponding
411 // start_gather\n"; exit(1);
412 // }
413
414 // Note: the move can be Parity p OR ALL -- need to wait for it in any case
415 // set par to be the "sum" over both parities
416 // There never should be ongoing ALL and other parity gather -- start_gather takes
417 // care
418
419 // check here consistency, this should never happen
420 if (p != ALL && is_gather_started(d, p) && is_gather_started(d, ALL)) {
421 exit(1);
422 }
423
424 Parity par;
425 int n_wait = 1;
426 // what par to wait for?
427 if (is_gather_started(d, p))
428 par = p; // standard match
429 else if (p != ALL) {
430 if (is_gather_started(d, ALL))
431 par = ALL; // if all is running wait for it
432 else {
433 exit(1);
434 }
435 } else {
436 // now p == ALL and ALL is not running
437 if (is_gathered(d, EVEN) && is_gather_started(d, ODD))
438 par = ODD;
439 else if (is_gathered(d, ODD) && is_gather_started(d, EVEN))
440 par = EVEN;
441 else if (is_gather_started(d, EVEN) && is_gather_started(d, ODD)) {
442 n_wait = 2; // need to wait for both!
443 par = ALL;
444 } else {
445 exit(1);
446 }
447 }
448
449 if (n_wait == 2)
450 par = EVEN; // we'll flip both
451
452 for (int wait_i = 0; wait_i < n_wait; ++wait_i) {
453
454 int par_i = (int)par - 1;
455
456 if (from_node.rank != hila::myrank() && boundary_need_to_communicate(d)) {
457 wait_receive_timer.start();
458
459 MPI_Status status;
460 MPI_Wait(&fs->receive_request[par_i][d], &status);
461
462 wait_receive_timer.stop();
463
464#ifndef VANILLA
465 fs->place_comm_elements(d, par, fs->get_receive_buffer(d, par, from_node), from_node);
466#endif
467 }
468
469 // then wait for the sends
470 if (to_node.rank != hila::myrank() && boundary_need_to_communicate(-d)) {
471 wait_send_timer.start();
472 MPI_Status status;
473 MPI_Wait(&fs->send_request[par_i][d], &status);
474 wait_send_timer.stop();
475 }
476
477 // Mark the parity gathered from Direction dir
478 mark_gathered(d, par);
479
480 // Keep count of communications
481 lattice.n_gather_done += 1;
482
483 par = opp_parity(par); // flip if 2 loops
484 }
485}
486
487
488/// Gather a list of elements to a single node
489/// coord_list must be same on all nodes, buffer is needed only on "root"
490template <typename T>
492 const std::vector<CoordinateVector> &coord_list,
493 int root) const {
494
495 std::vector<unsigned> index_list;
496 std::vector<int> sites_on_rank(lattice.n_nodes());
497 std::vector<int> reshuffle_list(coord_list.size());
498
499 std::fill(sites_on_rank.begin(), sites_on_rank.end(), 0);
500
501 int nranks = 0;
502
503 int i = 0;
504 for (const CoordinateVector &c : coord_list) {
505 int rank = lattice.node_rank(c);
506 if (hila::myrank() == rank) {
507 index_list.push_back(lattice.site_index(c));
508 }
509
510 if (sites_on_rank[rank] == 0 && rank != root)
511 nranks++;
512 sites_on_rank[rank]++;
513 reshuffle_list[i++] = rank;
514 }
515
516 std::vector<T> send_buffer(index_list.size());
517 payload.gather_elements((T *)send_buffer.data(), index_list.data(), send_buffer.size(),
518 lattice);
519 if (hila::myrank() != root && sites_on_rank[hila::myrank()] > 0) {
520 MPI_Send((char *)send_buffer.data(), sites_on_rank[hila::myrank()] * sizeof(T), MPI_BYTE,
521 root, hila::myrank(), lattice.mpi_comm_lat);
522 }
523 if (hila::myrank() == root) {
524
525 // allocate buffer for receiving data
526 T *b;
527 std::vector<T> pb(coord_list.size() - sites_on_rank[root]);
528 b = pb.data();
529 // vector for node ptrs -- point to stuff from nodes
530 std::vector<T *> nptr(lattice.n_nodes());
531
532 std::vector<MPI_Request> mpi_req(nranks);
533 int nreqs = 0;
534 for (int n = 0; n < sites_on_rank.size(); n++) {
535 if (sites_on_rank[n] > 0) {
536 if (n != root) {
537 MPI_Status status;
538 MPI_Irecv(b, (int)(sites_on_rank[n] * sizeof(T)), MPI_BYTE, n, n,
539 lattice.mpi_comm_lat, &mpi_req[nreqs++]);
540
541 nptr[n] = b;
542 b += sites_on_rank[n];
543
544 } else {
545
546 nptr[n] = send_buffer.data();
547 }
548 }
549 }
550
551 if (nreqs > 0) {
552 std::vector<MPI_Status> stat_arr(nreqs);
553 MPI_Waitall(nreqs, mpi_req.data(), stat_arr.data());
554 }
555
556 // copy the data from bp to buffer, reordering
557 for (int i = 0; i < coord_list.size(); i++) {
558 buffer[i] = *nptr[reshuffle_list[i]];
559 nptr[reshuffle_list[i]]++;
560 }
561 }
562}
563
564/// Send elements from a single node to a list of coordinates
565/// coord_list must be the same on all nodes, but buffer is needed only on "root"!
566
567template <typename T>
569 const std::vector<CoordinateVector> &coord_list,
570 int root) {
571
572 std::vector<unsigned> index_list;
573 std::vector<int> sites_on_rank(lattice.n_nodes());
574 std::vector<int> reshuffle_list(coord_list.size());
575 std::fill(sites_on_rank.begin(), sites_on_rank.end(), 0);
576
577 int nranks = 0;
578 int i = 0;
579 for (CoordinateVector c : coord_list) {
580 int rank = lattice.node_rank(c);
581 if (hila::myrank() == rank) {
582 index_list.push_back(lattice.site_index(c));
583 }
584
585 if (sites_on_rank[rank] == 0 && rank != root)
586 nranks++;
587 sites_on_rank[rank]++;
588 reshuffle_list[i++] = rank;
589 }
590
591 // payload.gather_elements((T *)recv_buffer.data(), index_list.data(),
592 // recv_buffer.size(), lattice);
593
594 if (hila::myrank() != root && sites_on_rank[hila::myrank()] > 0) {
595 std::vector<T> recv_buffer(index_list.size());
596 MPI_Status status;
597
598 MPI_Recv((char *)recv_buffer.data(), sites_on_rank[hila::myrank()] * sizeof(T), MPI_BYTE,
599 root, hila::myrank(), lattice.mpi_comm_lat, &status);
600
601 payload.place_elements((T *)recv_buffer.data(), index_list.data(), recv_buffer.size(),
602 lattice);
603 }
604 if (hila::myrank() == root) {
605 // reordering buffers
606 std::vector<T> pb(coord_list.size());
607 // vector for node counters -- point to stuff from nodes
608 std::vector<unsigned> nloc(lattice.n_nodes());
609 std::vector<unsigned> ncount(lattice.n_nodes());
610 nloc[0] = ncount[0] = 0;
611
612 for (int n = 1; n < lattice.n_nodes(); n++) {
613 nloc[n] = nloc[n - 1] + sites_on_rank[n - 1];
614 ncount[n] = 0;
615 }
616 for (int i = 0; i < coord_list.size(); i++) {
617 int node = reshuffle_list[i];
618 pb[nloc[node] + ncount[node]] = buffer[i];
619 ncount[node]++;
620 }
621
622 std::vector<MPI_Request> mpi_req(nranks);
623 int nreqs = 0;
624 for (int n = 0; n < sites_on_rank.size(); n++) {
625 if (sites_on_rank[n] > 0) {
626 if (n != root) {
627 MPI_Isend(pb.data() + nloc[n], (int)(sites_on_rank[n] * sizeof(T)), MPI_BYTE, n,
628 n, lattice.mpi_comm_lat, &mpi_req[nreqs++]);
629 }
630 }
631 }
632
633 payload.place_elements(pb.data() + nloc[root], index_list.data(), index_list.size(),
634 lattice);
635
636 if (nreqs > 0) {
637 std::vector<MPI_Status> stat_arr(nreqs);
638 MPI_Waitall(nreqs, mpi_req.data(), stat_arr.data());
639 }
640 }
641}
642
643template <typename T>
644void Field<T>::set_elements(const std::vector<T> &elements,
645 const std::vector<CoordinateVector> &coord_list) {
646 assert(elements.size() == coord_list.size() && "vector size mismatch in set_elments");
647 std::vector<unsigned> my_indexes;
648 std::vector<T> my_elements;
649 for (int i = 0; i < coord_list.size(); i++) {
650 CoordinateVector c = coord_list[i];
651 if (lattice.is_on_mynode(c)) {
652 my_indexes.push_back(lattice.site_index(c));
653 my_elements.push_back(elements[i]);
654 }
655 }
656 fs->payload.place_elements(my_elements.data(), my_indexes.data(), my_indexes.size(), lattice);
657 mark_changed(ALL);
658}
659
660
661/// Get a list of elements and store them into a vector
662template <typename T>
663std::vector<T> Field<T>::get_elements(const std::vector<CoordinateVector> &coord_list,
664 bool bcast) const {
665
666 std::vector<T> res;
667 if (hila::myrank() == 0)
668 res.resize(coord_list.size());
669
670 fs->gather_elements(res.data(), coord_list);
671 if (bcast)
672 hila::broadcast(res);
673
674 return res;
675}
676
677
678/// Get a subvolume of the field elements to all nodes
679template <typename T>
680std::vector<T> Field<T>::get_subvolume(const CoordinateVector &cmin, const CoordinateVector &cmax,
681 bool bcast) const {
682
683 size_t vol = 1;
684 foralldir(d) {
685 vol *= cmax[d] - cmin[d] + 1;
686 assert(cmax[d] >= cmin[d] && cmin[d] >= 0 && cmax[d] < lattice.size(d));
687 }
688 std::vector<CoordinateVector> clist(vol);
690
691 size_t i = 0;
692 forcoordinaterange(c, cmin, cmax) {
693 clist[i++] = c;
694 }
695 return get_elements(clist, bcast);
696}
697
698
699/// Get a slice (subvolume)
700template <typename T>
701std::vector<T> Field<T>::get_slice(const CoordinateVector &c, bool bcast) const {
702 CoordinateVector cmin, cmax;
703 foralldir(d) if (c[d] < 0) {
704 cmin[d] = 0;
705 cmax[d] = lattice.size(d) - 1;
706 }
707 else {
708 cmin[d] = cmax[d] = c[d];
709 }
710 return get_subvolume(cmin, cmax, bcast);
711}
712
713
714//////////////////////////////////////////////////////////////////////////////////
715/// @internal
716/// Copy the local (mpi process) data to a "logical array"
717/// on gpu code, copies to host
718
719template <typename T>
720void Field<T>::copy_local_data(std::vector<T> &buffer) const {
721
722 // copy to local variables to avoid lattice ptr
723 CoordinateVector nmin = lattice.mynode.min;
724 Vector<NDIM, unsigned> nmul = lattice.mynode.size_factor;
725
726 buffer.resize(lattice.mynode.volume());
727#if defined(CUDA) || defined(HIP)
728 // d_malloc mallocs from device if needed
729 T *data = (T *)d_malloc(sizeof(T) * lattice.mynode.volume());
730#else
731 T *data = buffer.data();
732#endif
733
734#pragma hila novector direct_access(data)
735 onsites(ALL) {
737 nodec = X.coordinates() - nmin;
738
739 unsigned i = nodec.dot(nmul);
740 data[i] = (*this)[X];
741 }
742
743#if defined(CUDA) || defined(HIP)
744 gpuMemcpy(buffer.data(), data, sizeof(T) * lattice.mynode.volume(), gpuMemcpyDeviceToHost);
745 d_free(data);
746#endif
747}
748
749////////////////////////////////////////////////////////////////////////////////////
750/// @internal
751/// set the local data from an array
752
753template <typename T>
754void Field<T>::set_local_data(const std::vector<T> &buffer) {
755
756 // copy to local variables to avoid lattice ptr
757 CoordinateVector nmin = lattice.mynode.min;
758 Vector<NDIM, unsigned> nmul = lattice.mynode.size_factor;
759
760 assert(buffer.size() >= lattice.mynode.volume());
761
762#if defined(CUDA) || defined(HIP)
763 // d_malloc mallocs from device if needed
764 T *data = (T *)d_malloc(sizeof(T) * lattice.mynode.volume());
765 gpuMemcpy(data, buffer.data(), sizeof(T) * lattice.mynode.volume(), gpuMemcpyHostToDevice);
766#else
767 T *data = buffer.data();
768#endif
769
770#pragma hila novector direct_access(data)
771 onsites(ALL) {
773 nodec = X.coordinates() - nmin;
774
775 unsigned i = nodec.dot(nmul);
776 (*this)[X] = data[i];
777 }
778
779#if defined(CUDA) || defined(HIP)
780 d_free(data);
781#endif
782
783 this->mark_changed(ALL);
784}
785
786
787////////////////////////////////////////////////////////////////////////////////////
788/// Copy local data with halo - useful for visualization
789
790template <typename T>
791inline void collect_field_halo_data_(T *data, const Field<T> &src, Field<T> &dest,
792 const Vector<NDIM, int> &dirs, int ndir) {
793
794 // get the coords of the min point of the halo array
795 CoordinateVector nmin = lattice.mynode.min;
796 nmin.asArray() -= 1;
797
798 // construct the mult vector to access the data
800 nmul.e(0) = 1;
801 for (int i = 1; i < NDIM; i++)
802 nmul.e(i) = nmul.e(i - 1) * (lattice.mynode.size[i - 1] + 2);
803
804
805 Vector<NDIM, int> node_min;
806 Vector<NDIM, int> node_max;
807 foralldir(d) {
808 node_min[d] = lattice.mynode.min[d];
809 node_max[d] = lattice.mynode.min[d] + lattice.mynode.size[d] - 1;
810 }
811
812#pragma hila novector direct_access(data)
813 onsites(ALL) {
815 CoordinateVector c = X.coordinates();
816 bool gotit = true;
817
818 for (int i = 0; i < ndir; i++) {
819 Direction d = (Direction)dirs[i];
820 if (c.e(d) == node_min[d]) {
821 c.e(d) -= 1;
822 } else if (c.e(d) == node_max[d]) {
823 c.e(d) += 1;
824 } else
825 gotit = false;
826 }
827
828 Direction d = (Direction)dirs[ndir];
829 if (gotit && c.e(d) == node_min[d]) {
830 c.e(d) -= 1;
831 nodec = c - nmin;
832 data[nodec.dot(nmul)] = src[X - d];
833 dest[X] = src[X - d];
834
835 } else if (gotit && c.e(d) == node_max[d]) {
836 c.e(d) += 1;
837 nodec = c - nmin;
838 data[nodec.dot(nmul)] = src[X + d];
839 dest[X] = src[X + d];
840 }
841 }
842}
843
844
845/////////////////////////////////////////////////////////////////////////////////////////
846
847template <typename T>
848void Field<T>::copy_local_data_with_halo(std::vector<T> &buffer) const {
849
850 // get the coords of the min point of the halo array
851 CoordinateVector nmin = lattice.mynode.min;
852 nmin.asArray() -= 1;
853
854 // construct the mult vector to access the data
856 nmul.e(0) = 1;
857 for (int i = 1; i < NDIM; i++)
858 nmul.e(i) = nmul.e(i - 1) * (lattice.mynode.size[i - 1] + 2);
859
860 // full size of the buffer
861 size_t siz = 1;
862 foralldir(d) siz *= (lattice.mynode.size[d] + 2);
863
864 buffer.resize(siz);
865#if defined(CUDA) || defined(HIP)
866 // d_malloc mallocs from device if needed
867 T *data = (T *)d_malloc(sizeof(T) * siz);
868#else
869 T *data = buffer.data();
870#endif
871
872 // now collect bulk
873#pragma hila novector direct_access(data)
874 onsites(ALL) {
876 nodec = X.coordinates() - nmin;
877
878 unsigned i = nodec.dot(nmul);
879 data[i] = (*this)[X];
880 }
881
882 // collect nn-halos
883
884 Field<T> corners = 0;
885 Field<T> corner2 = 0;
886#if NDIM > 2
887 Field<T> corner3 = 0;
888#if NDIM > 3
889 Field<T> corner4 = 0;
890#endif
891#endif
892
894 foralldir(d1) {
895 dirs[0] = d1;
896 // gather d1 halo
897 collect_field_halo_data_(data, (*this), corners, dirs, 0);
898
899 for (int d2 = d1 + 1; d2 < NDIM; ++d2) {
900 dirs[1] = d2;
901 collect_field_halo_data_(data, corners, corner2, dirs, 1);
902#if NDIM > 2
903 for (int d3 = d2 + 1; d3 < NDIM; ++d3) {
904 dirs[2] = d3;
905 collect_field_halo_data_(data, corner2, corner3, dirs, 2);
906#if NDIM > 3
907 for (int d4 = d3 + 1; d4 < NDIM; ++d4) {
908 dirs[3] = d4;
909 collect_field_halo_data_(data, corner3, corner4, dirs, 3);
910 }
911#endif
912 }
913#endif
914 }
915 }
916
917
918#if defined(CUDA) || defined(HIP)
919 gpuMemcpy(buffer.data(), data, sizeof(T) * siz, gpuMemcpyDeviceToHost);
920 d_free(data);
921#endif
922}
923
924
925#endif
The field class implements the standard methods for accessing Fields. Hilapp replaces the parity acce...
Definition field.h:61
std::vector< T > get_subvolume(const CoordinateVector &cmin, const CoordinateVector &cmax, bool broadcast=false) const
Get a subvolume of the field elements to all nodes.
Definition field_comm.h:680
Field< T > & shift(const CoordinateVector &v, Field< T > &r, Parity par) const
Create a periodically shifted copy of the field.
Definition field_comm.h:162
dir_mask_t start_gather(Direction d, Parity p=ALL) const
Definition field_comm.h:262
std::vector< T > get_elements(const std::vector< CoordinateVector > &coord_list, bool broadcast=false) const
Get a list of elements and store them into a vector.
Definition field_comm.h:663
std::vector< T > get_slice(const CoordinateVector &c, bool broadcast=false) const
Get a slice (subvolume)
Definition field_comm.h:701
T e(const int i, const int j) const
Standard array indexing operation for matrices.
Definition matrix.h:272
R dot(const Matrix< p, q, S > &rhs) const
Dot product.
Definition matrix.h:1251
const Array< n, m, T > & asArray() const
Cast Matrix to Array.
Definition matrix.h:467
Matrix class which defines matrix operations.
Definition matrix.h:1679
bool is_on_mynode(const CoordinateVector &c) const
Is the coordinate on THIS node.
Definition lattice.cpp:118
std::array< nn_comminfo_struct, NDIRS > nn_comminfo
nearest neighbour comminfo struct
Definition lattice.h:200
unsigned site_index(const CoordinateVector &c) const
Definition lattice.cpp:136
T abs(const Complex< T > &a)
Return absolute value of Complex number.
Definition cmplx.h:1322
Parity
Parity enum with values EVEN, ODD, ALL; refers to parity of the site. Parity of site (x,...
constexpr Parity EVEN
bit pattern: 001
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:78
constexpr Parity ODD
bit pattern: 010
unsigned char dir_mask_t
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
constexpr Parity ALL
bit pattern: 011
#define RESTRICT
Definition defs.h:50
This files containts definitions for the Field class and the classes required to define it such as fi...
void collect_field_halo_data_(T *data, const Field< T > &src, Field< T > &dest, const Vector< 4, int > &dirs, int ndir)
Copy local data with halo - useful for visualization.
Definition field_comm.h:791
int myrank()
rank of this node
Definition com_mpi.cpp:235
std::ostream out
this is our default output file stream
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
Definition com_mpi.h:153
void terminate(int status)
is_vectorizable_type<T>::value is always false if the target is not vectorizable
Definition vector_types.h:8
Information necessary to communicate with a node.
Definition lattice.h:134
nn-communication has only 1 node to talk to
Definition lattice.h:185
bool is_on_edge(Direction d) const
true if this node is on the edge of the lattice to dir d
Definition lattice.h:106