HILA
Loading...
Searching...
No Matches
field_comm.h
Go to the documentation of this file.
1/** @file field_comm.h */
2
3#ifndef FIELD_COMM_H
4#define FIELD_COMM_H
5
6#include "field.h"
7
8/////////////////////////////////////////////////////////////////////////////////
9/// This file collects implementations of field communication routines
10
11
12/////////////////////////////////////////////////////////////////////////////////////////
13/// Gather boundary elements for communication
14
15template <typename T>
17 Direction d, Parity par, T *RESTRICT buffer,
18 const lattice_struct::comm_node_struct &to_node) const {
19#ifndef VECTORIZED
20#ifdef SPECIAL_BOUNDARY_CONDITIONS
21 // note: -d in is_on_edge, because we're about to send stuff to that
22 // Direction (gathering from Direction +d)
23 if (boundary_condition[d] == hila::bc::ANTIPERIODIC && lattice->mynode.is_on_edge(-d)) {
24 payload.gather_comm_elements(buffer, to_node, par, lattice, true);
25 } else {
26 payload.gather_comm_elements(buffer, to_node, par, lattice, false);
27 }
28#else
29 payload.gather_comm_elements(buffer, to_node, par, lattice, false);
30#endif
31
32#else
33 // this is vectorized branch
34 bool antiperiodic = false;
35#ifdef SPECIAL_BOUNDARY_CONDITIONS
36 if (boundary_condition[d] == hila::bc::ANTIPERIODIC && lattice->mynode.is_on_edge(-d)) {
37 antiperiodic = true;
38 }
39#endif
40
42 // now vectorized layout
43 if (vector_lattice->is_boundary_permutation[abs(d)]) {
44 // with boundary permutation need to gather elems 1-by-1
45 int n;
46 const unsigned *index_list = to_node.get_sitelist(par, n);
47 if (!antiperiodic) {
48 payload.gather_elements(buffer, index_list, n, lattice);
49 } else {
50 payload.gather_elements_negated(buffer, index_list, n, lattice);
51 }
52 } else {
53 // without it, can do the full block
54 payload.gather_comm_vectors(buffer, to_node, par, vector_lattice, antiperiodic);
55 }
56 } else {
57 // not vectoizable, standard methods
58 int n;
59 const unsigned *index_list = to_node.get_sitelist(par, n);
60 if (!antiperiodic)
61 payload.gather_elements(buffer, index_list, n, lattice);
62 else {
63 payload.gather_elements_negated(buffer, index_list, n, lattice);
64 }
65 }
66#endif
67}
68
69/////////////////////////////////////////////////////////////////////////////////////////
70/// Place boundary elements from neighbour
71
72template <typename T>
74 Direction d, Parity par, T *RESTRICT buffer,
75 const lattice_struct::comm_node_struct &from_node) {
76
77#ifdef VECTORIZED
79 // now vectorized layout, act accordingly
80 if (vector_lattice->is_boundary_permutation[abs(d)]) {
81 payload.place_recv_elements(buffer, d, par, vector_lattice);
82 } else {
83 // nothing to do here, comms directly in place
84 }
85 } else {
86 // non-vectorized, using vanilla method, again nothing to do
87 }
88#else
89 // this one is only for CUDA
90 payload.place_comm_elements(d, par, buffer, from_node, lattice);
91#endif
92 // #endif
93}
94
95/////////////////////////////////////////////////////////////////////////////////////////
96/// Place boundary elements from local lattice (used in vectorized version)
97
98template <typename T>
100
101#ifdef SPECIAL_BOUNDARY_CONDITIONS
102 bool antiperiodic =
103 (boundary_condition[dir] == hila::bc::ANTIPERIODIC && lattice->mynode.is_on_edge(dir));
104#else
105 bool antiperiodic = false;
106#endif
107 payload.set_local_boundary_elements(dir, par, lattice, antiperiodic);
108}
109
110
111/////////////////////////////////////////////////////////////////////////////////////////
112/// get the receive buffer pointer for the communication.
113
114template <typename T>
116 const lattice_struct::comm_node_struct &from_node) {
117#if defined(VANILLA)
118
119 return (T *)payload.get_buffer() + from_node.offset(par);
120
121#elif defined(CUDA) || defined(HIP)
122
123 unsigned offs = 0;
124 if (par == ODD)
125 offs = from_node.evensites;
126 if (receive_buffer[d] == nullptr) {
127 receive_buffer[d] = payload.allocate_mpi_buffer(from_node.sites);
128 }
129 return receive_buffer[d] + offs;
130
131#elif defined(VECTORIZED)
132
134 // use vanilla type, field laid out in std fashion
135 return (T *)payload.get_buffer() + from_node.offset(par);
136 } else {
137 unsigned offs = 0;
138 if (par == ODD)
139 offs = from_node.evensites;
140
141 if (vector_lattice->is_boundary_permutation[abs(d)]) {
142 // extra copy operation needed
143 if (receive_buffer[d] == nullptr) {
144 receive_buffer[d] = payload.allocate_mpi_buffer(from_node.sites);
145 }
146 return receive_buffer[d] + offs;
147 } else {
148 // directly to halo buffer
149 constexpr unsigned vector_size = hila::vector_info<T>::vector_size;
150 return ((T *)payload.get_buffer() +
151 (vector_lattice->halo_offset[d] * vector_size + offs));
152 }
153 }
154#endif
155} // end of get_receive_buffer
156
157
158#define NAIVE_SHIFT
159#if defined(NAIVE_SHIFT)
160
161template <typename T>
162Field<T> &Field<T>::shift(const CoordinateVector &v, Field<T> &res, const Parity par) const {
163
164 // use this to store remaining moves
165 CoordinateVector rem = v;
166
167 // check the parity of the move
168 Parity par_s;
169
170 int len = 0;
171 foralldir(d) len += abs(rem[d]);
172
173 // no move, just copy field
174 if (len == 0) {
175 res = *this;
176 return res;
177 }
178
179 // opp_parity(ALL) == ALL
180 if (len % 2 == 0)
181 par_s = opp_parity(par);
182 else
183 par_s = par;
184
185 // is this already gathered from one of the dirs in v?
186 bool found_dir = false;
187 Direction mdir;
188 foralldir(d) {
189 if (rem[d] > 0 && gather_status(par_s, d) != gather_status_t::NOT_DONE) {
190 mdir = d;
191 found_dir = true;
192 break;
193 } else if (rem[d] < 0 && gather_status(par_s, -d) != gather_status_t::NOT_DONE) {
194 mdir = -d;
195 found_dir = true;
196 break;
197 }
198 }
199
200 if (!found_dir) {
201 // now did not find a 'ready' dir. Take the 1st available
202 foralldir(d) {
203 if (rem[d] > 0) {
204 mdir = d;
205 break;
206 } else if (rem[d] < 0) {
207 mdir = -d;
208 break;
209 }
210 }
211 }
212
213 // Len 1, copy directly
214 if (len == 1) {
215 res[par_s] = (*this)[X + mdir];
216 return res;
217 }
218
219 // now longer - need buffer
220 Field<T> r1;
221 Field<T> *from, *to;
222
223 // this ensures that the final move lands on res
224 if (len % 2 == 0) {
225 from = &r1;
226 to = &res;
227 } else {
228 from = &res;
229 to = &r1;
230 }
231 // and copy initially to "from"
232 (*from)[par_s] = (*this)[X + mdir];
233
234 // and subtract remaining moves from rem
235 rem = rem - mdir;
236 par_s = opp_parity(par_s);
237
238 foralldir(d) {
239 if (rem[d] != 0) {
240 mdir = (rem[d] > 0) ? d : -d;
241
242 while (rem[d] != 0) {
243
244 (*to)[par_s] = (*from)[X + mdir];
245
246 par_s = opp_parity(par_s);
247 rem = rem - mdir;
248 std::swap(to, from);
249 }
250 }
251 }
252
253 return res;
254}
255
256#endif // NAIVE_SHIFT
257
258/// start_gather(): Communicate the field at Parity par from Direction
259/// d. Uses accessors to prevent dependency on the layout.
260/// return the Direction mask bits where something is happening
261template <typename T>
263
264 // get the mpi message tag right away, to ensure that we are always synchronized
265 // with the mpi calls -- some nodes might not need comms, but the tags must be in
266 // sync
267
268 int tag = get_next_msg_tag();
269
270 const lattice_struct::nn_comminfo_struct &ci = lattice->nn_comminfo[d];
271 const lattice_struct::comm_node_struct &from_node = ci.from_node;
272 const lattice_struct::comm_node_struct &to_node = ci.to_node;
273
274 // check if this is done - either gathered or no comm to be done in the 1st place
275
276 if (is_gathered(d, p)) {
277 hila::n_gather_avoided++;
278 return 0; // nothing to wait for
279 }
280
281 // No comms to do, nothing to wait for -- we'll use the is_gathered
282 // status to keep track of vector boundary shuffle anyway
283
284 if (from_node.rank == hila::myrank() && to_node.rank == hila::myrank()) {
285 fs->set_local_boundary_elements(d, p);
286 mark_gathered(d, p);
287 return 0;
288 }
289
290 // if this parity or ALL-type gather is going on nothing to be done
291 if (!gather_not_done(d, p) || !gather_not_done(d, ALL)) {
292 hila::n_gather_avoided++;
293 return get_dir_mask(d); // nothing to do, but still need to wait
294 }
295
296 Parity par = p;
297 // if p is ALL but ODD or EVEN is going on/done, turn off parity which is not needed
298 // corresponding wait must do the same thing
299 if (p == ALL) {
300 if (!gather_not_done(d, EVEN) && !gather_not_done(d, ODD)) {
301 // even and odd are going on or ready, nothing to be done
302 hila::n_gather_avoided++;
303 return get_dir_mask(d);
304 }
305 if (!gather_not_done(d, EVEN))
306 par = ODD;
307 else if (!gather_not_done(d, ODD))
308 par = EVEN;
309 // if neither is the case par = ALL
310 }
311
312 mark_gather_started(d, par);
313
314 // Communication hasn't been started yet, do it now
315
316 int par_i = static_cast<int>(par) - 1; // index to dim-3 arrays
317
318 constexpr size_t size = sizeof(T);
319
320 T *receive_buffer;
321 T *send_buffer;
322
323 if (from_node.rank != hila::myrank() && boundary_need_to_communicate(d)) {
324
325 // HANDLE RECEIVES: get node which will send here
326
327 // buffer can be separate or in Field buffer
328 receive_buffer = fs->get_receive_buffer(d, par, from_node);
329
330 size_t n = from_node.n_sites(par) * size;
331
332 if (n >= (1ULL << 31)) {
333 hila::out << "Too large MPI message! Size " << n << '\n';
335 }
336
337 post_receive_timer.start();
338
339 // c++ version does not return errors
340 MPI_Irecv(receive_buffer, (int)n, MPI_BYTE, from_node.rank, tag, lattice->mpi_comm_lat,
341 &fs->receive_request[par_i][d]);
342
343 post_receive_timer.stop();
344 }
345
346 if (to_node.rank != hila::myrank() && boundary_need_to_communicate(-d)) {
347 // HANDLE SENDS: Copy Field elements on the boundary to a send buffer and send
348
349 unsigned sites = to_node.n_sites(par);
350
351 if (fs->send_buffer[d] == nullptr)
352 fs->send_buffer[d] = fs->payload.allocate_mpi_buffer(to_node.sites);
353
354 send_buffer = fs->send_buffer[d] + to_node.offset(par);
355
356#ifndef MPI_BENCHMARK_TEST
357 fs->gather_comm_elements(d, par, send_buffer, to_node);
358#endif
359
360 size_t n = sites * size;
361
362#ifdef GPU_AWARE_MPI
363 gpuStreamSynchronize(0);
364 // gpuDeviceSynchronize();
365#endif
366
367 start_send_timer.start();
368
369 MPI_Isend(send_buffer, (int)n, MPI_BYTE, to_node.rank, tag, lattice->mpi_comm_lat,
370 &fs->send_request[par_i][d]);
371
372 start_send_timer.stop();
373 }
374
375 // and do the boundary shuffle here, after MPI has started
376 // NOTE: there should be no danger of MPI and shuffle overwriting, MPI writes
377 // to halo buffers only if no permutation is needed. With a permutation MPI
378 // uses special receive buffer
379#ifndef MPI_BENCHMARK_TEST
380 fs->set_local_boundary_elements(d, par);
381#endif
382
383 return get_dir_mask(d);
384}
385
386/// @internal
387/// wait_gather(): Wait for communication at parity par from
388/// Direction d completes the communication in the function.
389///
390/// NOTE: This will be called even if the field is marked const.
391/// Therefore this function is const, even though it does change
392/// the internal content of the field, the halo. From the point
393/// of view of the user, the value of the field does not change.
394template <typename T>
395void Field<T>::wait_gather(Direction d, Parity p) const {
396
397 const lattice_struct::nn_comminfo_struct &ci = lattice->nn_comminfo[d];
398 const lattice_struct::comm_node_struct &from_node = ci.from_node;
399 const lattice_struct::comm_node_struct &to_node = ci.to_node;
400
401 // check if this is done - either gathered or no comm to be done in the 1st place
402 if (is_gathered(d, p))
403 return;
404
405 // this is the branch if no comms -- shuffle was done in start_gather
406 if (from_node.rank == hila::myrank() && to_node.rank == hila::myrank())
407 return;
408
409 // if (!is_gather_started(d,p)) {
410 // hila::out0 << "Wait gather error - wait_gather without corresponding
411 // start_gather\n"; exit(1);
412 // }
413
414 // Note: the move can be Parity p OR ALL -- need to wait for it in any case
415 // set par to be the "sum" over both parities
416 // There never should be ongoing ALL and other parity gather -- start_gather takes
417 // care
418
419 // check here consistency, this should never happen
420 assert(!(p != ALL && is_gather_started(d, p) && is_gather_started(d, ALL)));
421
422 Parity par;
423 int n_wait = 1;
424 // what par to wait for?
425 if (is_gather_started(d, p))
426 par = p; // standard match
427 else if (p != ALL) {
428 if (is_gather_started(d, ALL))
429 par = ALL; // if all is running wait for it
430 else {
431 exit(1);
432 }
433 } else {
434 // now p == ALL and ALL is not running
435 if (is_gathered(d, EVEN) && is_gather_started(d, ODD))
436 par = ODD;
437 else if (is_gathered(d, ODD) && is_gather_started(d, EVEN))
438 par = EVEN;
439 else if (is_gather_started(d, EVEN) && is_gather_started(d, ODD)) {
440 n_wait = 2; // need to wait for both!
441 par = EVEN; // will be flipped
442 } else {
443 exit(1);
444 }
445 }
446
447 for (int wait_i = 0; wait_i < n_wait; ++wait_i) {
448
449 int par_i = (int)par - 1;
450
451 if (from_node.rank != hila::myrank() && boundary_need_to_communicate(d)) {
452 wait_receive_timer.start();
453
454 MPI_Status status;
455 MPI_Wait(&fs->receive_request[par_i][d], &status);
456
457 wait_receive_timer.stop();
458
459#if !defined(VANILLA) && !defined(MPI_BENCHMARK_TEST)
460 fs->place_comm_elements(d, par, fs->get_receive_buffer(d, par, from_node), from_node);
461#endif
462 }
463
464 // then wait for the sends
465 if (to_node.rank != hila::myrank() && boundary_need_to_communicate(-d)) {
466 wait_send_timer.start();
467 MPI_Status status;
468 MPI_Wait(&fs->send_request[par_i][d], &status);
469 wait_send_timer.stop();
470 }
471
472 // Mark the parity gathered from Direction dir
473 mark_gathered(d, par);
474
475 // Keep count of communications
476 hila::n_gather_done += 1;
477
478 par = opp_parity(par); // flip if 2 loops
479 }
480}
481
482
483/// Gather a list of elements to a single node
484/// coord_list must be same on all nodes, buffer is needed only on "root"
485template <typename T>
487 const std::vector<CoordinateVector> &coord_list,
488 int root) const {
489
490 std::vector<unsigned> index_list;
491 std::vector<int> sites_on_rank(lattice->nodes.number);
492 std::vector<int> reshuffle_list(coord_list.size());
493
494 std::fill(sites_on_rank.begin(), sites_on_rank.end(), 0);
495
496 int nranks = 0;
497
498 int i = 0;
499 for (const CoordinateVector &c : coord_list) {
500 int rank = lattice->node_rank(c);
501 if (hila::myrank() == rank) {
502 index_list.push_back(lattice->site_index(c));
503 }
504
505 if (sites_on_rank[rank] == 0 && rank != root)
506 nranks++;
507 sites_on_rank[rank]++;
508 reshuffle_list[i++] = rank;
509 }
510
511 std::vector<T> send_buffer(index_list.size());
512 payload.gather_elements((T *)send_buffer.data(), index_list.data(), send_buffer.size(),
513 lattice);
514 if (hila::myrank() != root && sites_on_rank[hila::myrank()] > 0) {
515 MPI_Send((char *)send_buffer.data(), sites_on_rank[hila::myrank()] * sizeof(T), MPI_BYTE,
516 root, hila::myrank(), lattice->mpi_comm_lat);
517 }
518 if (hila::myrank() == root) {
519
520 // allocate buffer for receiving data
521 T *b;
522 std::vector<T> pb(coord_list.size() - sites_on_rank[root]);
523 b = pb.data();
524 // vector for node ptrs -- point to stuff from nodes
525 std::vector<T *> nptr(lattice->nodes.number);
526
527 std::vector<MPI_Request> mpi_req(nranks);
528 int nreqs = 0;
529 for (int n = 0; n < sites_on_rank.size(); n++) {
530 if (sites_on_rank[n] > 0) {
531 if (n != root) {
532 MPI_Status status;
533 MPI_Irecv(b, (int)(sites_on_rank[n] * sizeof(T)), MPI_BYTE, n, n,
534 lattice->mpi_comm_lat, &mpi_req[nreqs++]);
535
536 nptr[n] = b;
537 b += sites_on_rank[n];
538
539 } else {
540
541 nptr[n] = send_buffer.data();
542 }
543 }
544 }
545
546 if (nreqs > 0) {
547 std::vector<MPI_Status> stat_arr(nreqs);
548 MPI_Waitall(nreqs, mpi_req.data(), stat_arr.data());
549 }
550
551 // copy the data from bp to buffer, reordering
552 for (int i = 0; i < coord_list.size(); i++) {
553 buffer[i] = *nptr[reshuffle_list[i]];
554 nptr[reshuffle_list[i]]++;
555 }
556 }
557}
558
559/// Send elements from a single node to a list of coordinates
560/// coord_list must be the same on all nodes, but buffer is needed only on "root"!
561
562template <typename T>
564 const std::vector<CoordinateVector> &coord_list,
565 int root) {
566
567 std::vector<unsigned> index_list;
568 std::vector<int> sites_on_rank(lattice->nodes.number);
569 std::vector<int> reshuffle_list(coord_list.size());
570 std::fill(sites_on_rank.begin(), sites_on_rank.end(), 0);
571
572 int nranks = 0;
573 size_t i = 0;
574 for (CoordinateVector c : coord_list) {
575 int rank = lattice->node_rank(c);
576 if (hila::myrank() == rank) {
577 index_list.push_back(lattice->site_index(c));
578 }
579
580 if (sites_on_rank[rank] == 0 && rank != root)
581 nranks++;
582 sites_on_rank[rank]++;
583 reshuffle_list[i++] = rank;
584 }
585
586 // payload.gather_elements((T *)recv_buffer.data(), index_list.data(),
587 // recv_buffer.size(), lattice);
588
589 if (hila::myrank() != root && sites_on_rank[hila::myrank()] > 0) {
590 std::vector<T> recv_buffer(sites_on_rank[hila::myrank()]);
591 MPI_Status status;
592
593 MPI_Recv((char *)recv_buffer.data(), sites_on_rank[hila::myrank()] * sizeof(T), MPI_BYTE,
594 root, hila::myrank(), lattice->mpi_comm_lat, &status);
595
596 payload.place_elements((T *)recv_buffer.data(), index_list.data(), recv_buffer.size(),
597 lattice);
598 }
599 if (hila::myrank() == root) {
600 // reordering buffers
601 std::vector<T> pb(coord_list.size());
602 // vector for node counters -- point to stuff from nodes
603 std::vector<unsigned> nloc(lattice->nodes.number);
604 std::vector<unsigned> ncount(lattice->nodes.number);
605 nloc[0] = ncount[0] = 0;
606
607 for (int n = 1; n < lattice->nodes.number; n++) {
608 nloc[n] = nloc[n - 1] + sites_on_rank[n - 1];
609 ncount[n] = 0;
610 }
611 for (size_t i = 0; i < coord_list.size(); i++) {
612 int node = reshuffle_list[i];
613 pb[nloc[node] + ncount[node]] = buffer[i];
614 ncount[node]++;
615 }
616 std::vector<MPI_Request> mpi_req(nranks);
617 int nreqs = 0;
618 for (int n = 0; n < sites_on_rank.size(); n++) {
619 if (sites_on_rank[n] > 0) {
620 if (n != root) {
621 MPI_Isend(pb.data() + nloc[n], (int)(sites_on_rank[n] * sizeof(T)), MPI_BYTE, n,
622 n, lattice->mpi_comm_lat, &mpi_req[nreqs++]);
623 }
624 }
625 }
626 if(sites_on_rank[root] > 0) {
627 payload.place_elements(pb.data() + nloc[root], index_list.data(), index_list.size(),
628 lattice);
629 }
630 if (nreqs > 0) {
631 std::vector<MPI_Status> stat_arr(nreqs);
632 MPI_Waitall(nreqs, mpi_req.data(), stat_arr.data());
633 }
634 }
635 //MPI_Barrier(lattice.mpi_comm_lat);
636}
637
638template <typename T>
639void Field<T>::set_elements(const std::vector<T> &elements,
640 const std::vector<CoordinateVector> &coord_list) {
641 assert(elements.size() == coord_list.size() && "vector size mismatch in set_elments");
642 std::vector<unsigned> my_indexes;
643 std::vector<T> my_elements;
644 for (int i = 0; i < coord_list.size(); i++) {
645 CoordinateVector c = coord_list[i];
646 if (lattice->is_on_mynode(c)) {
647 my_indexes.push_back(lattice->site_index(c));
648 my_elements.push_back(elements[i]);
649 }
650 }
651 fs->payload.place_elements(my_elements.data(), my_indexes.data(), my_indexes.size(), lattice);
652 mark_changed(ALL);
653}
654
655
656/// Get a list of elements and store them into a vector
657template <typename T>
658std::vector<T> Field<T>::get_elements(const std::vector<CoordinateVector> &coord_list,
659 bool bcast) const {
660
661 std::vector<T> res;
662 if (hila::myrank() == 0)
663 res.resize(coord_list.size());
664
665 fs->gather_elements(res.data(), coord_list);
666 if (bcast)
667 hila::broadcast(res);
668
669 return res;
670}
671
672
673/// Get a subvolume of the field elements to all nodes
674template <typename T>
675std::vector<T> Field<T>::get_subvolume(const CoordinateVector &cmin, const CoordinateVector &cmax,
676 bool bcast) const {
677
678 size_t vol = 1;
679 foralldir(d) {
680 vol *= cmax[d] - cmin[d] + 1;
681 assert(cmax[d] >= cmin[d] && cmin[d] >= 0 && cmax[d] < lattice.size(d));
682 }
683 std::vector<CoordinateVector> clist(vol);
685
686 size_t i = 0;
687 forcoordinaterange(c, cmin, cmax) {
688 clist[i++] = c;
689 }
690 return get_elements(clist, bcast);
691}
692
693
694/// Get a slice (subvolume)
695template <typename T>
696std::vector<T> Field<T>::get_slice(const CoordinateVector &c, bool bcast) const {
697 CoordinateVector cmin, cmax;
698 foralldir(d) if (c[d] < 0) {
699 cmin[d] = 0;
700 cmax[d] = lattice.size(d) - 1;
701 }
702 else {
703 cmin[d] = cmax[d] = c[d];
704 }
705 return get_subvolume(cmin, cmax, bcast);
706}
707
708
709//////////////////////////////////////////////////////////////////////////////////
710/// @internal
711/// Copy the local (mpi process) data to a "logical array"
712/// on gpu code, copies to host
713
714template <typename T>
715void Field<T>::copy_local_data(std::vector<T> &buffer) const {
716
717 // copy to local variables to avoid lattice ptr
718 CoordinateVector nmin = lattice->mynode.min;
719 Vector<NDIM, unsigned> nmul = lattice->mynode.size_factor;
720
721 buffer.resize(lattice->mynode.volume);
722#if defined(CUDA) || defined(HIP)
723 // d_malloc mallocs from device if needed
724 T *data = (T *)d_malloc(sizeof(T) * lattice->mynode.volume);
725#else
726 T *data = buffer.data();
727#endif
728
729#pragma hila novector direct_access(data)
730 onsites(ALL) {
732 nodec = X.coordinates() - nmin;
733
734 unsigned i = nodec.dot(nmul);
735 data[i] = (*this)[X];
736 }
737
738#if defined(CUDA) || defined(HIP)
739 gpuMemcpy(buffer.data(), data, sizeof(T) * lattice->mynode.volume, gpuMemcpyDeviceToHost);
740 d_free(data);
741#endif
742}
743
744////////////////////////////////////////////////////////////////////////////////////
745/// @internal
746/// set the local data from an array
747
748template <typename T>
749void Field<T>::set_local_data(const std::vector<T> &buffer) {
750
751 // copy to local variables to avoid lattice ptr
752 CoordinateVector nmin = lattice->mynode.min;
753 Vector<NDIM, unsigned> nmul = lattice->mynode.size_factor;
754
755 assert(buffer.size() >= lattice->mynode.volume);
756
757#if defined(CUDA) || defined(HIP)
758 // d_malloc mallocs from device if needed
759 T *data = (T *)d_malloc(sizeof(T) * lattice->mynode.volume);
760 gpuMemcpy(data, buffer.data(), sizeof(T) * lattice->mynode.volume, gpuMemcpyHostToDevice);
761#else
762 T *data = buffer.data();
763#endif
764
765#pragma hila novector direct_access(data)
766 onsites(ALL) {
768 nodec = X.coordinates() - nmin;
769
770 unsigned i = nodec.dot(nmul);
771 (*this)[X] = data[i];
772 }
773
774#if defined(CUDA) || defined(HIP)
775 d_free(data);
776#endif
777
778 this->mark_changed(ALL);
779}
780
781
782////////////////////////////////////////////////////////////////////////////////////
783/// Copy local data with halo - useful for visualization
784
785template <typename T>
786inline void collect_field_halo_data_(T *data, const Field<T> &src, Field<T> &dest,
787 const Vector<NDIM, int> &dirs, int ndir) {
788
789 // get the coords of the min point of the halo array
790 CoordinateVector nmin = lattice->mynode.min;
791 nmin.asArray() -= 1;
792
793 // construct the mult vector to access the data
795 nmul.e(0) = 1;
796 for (int i = 1; i < NDIM; i++)
797 nmul.e(i) = nmul.e(i - 1) * (lattice->mynode.size[i - 1] + 2);
798
799
800 Vector<NDIM, int> node_min;
801 Vector<NDIM, int> node_max;
802 foralldir(d) {
803 node_min[d] = lattice->mynode.min[d];
804 node_max[d] = lattice->mynode.min[d] + lattice->mynode.size[d] - 1;
805 }
806
807#pragma hila novector direct_access(data)
808 onsites(ALL) {
810 CoordinateVector c = X.coordinates();
811 bool gotit = true;
812
813 for (int i = 0; i < ndir; i++) {
814 Direction d = (Direction)dirs[i];
815 if (c.e(d) == node_min[d]) {
816 c.e(d) -= 1;
817 } else if (c.e(d) == node_max[d]) {
818 c.e(d) += 1;
819 } else
820 gotit = false;
821 }
822
823 Direction d = (Direction)dirs[ndir];
824 if (gotit && c.e(d) == node_min[d]) {
825 c.e(d) -= 1;
826 nodec = c - nmin;
827 data[nodec.dot(nmul)] = src[X - d];
828 dest[X] = src[X - d];
829
830 } else if (gotit && c.e(d) == node_max[d]) {
831 c.e(d) += 1;
832 nodec = c - nmin;
833 data[nodec.dot(nmul)] = src[X + d];
834 dest[X] = src[X + d];
835 }
836 }
837}
838
839
840/////////////////////////////////////////////////////////////////////////////////////////
841
842template <typename T>
843void Field<T>::copy_local_data_with_halo(std::vector<T> &buffer) const {
844
845 // get the coords of the min point of the halo array
846 CoordinateVector nmin = lattice->mynode.min;
847 nmin.asArray() -= 1;
848
849 // construct the mult vector to access the data
851 nmul.e(0) = 1;
852 for (int i = 1; i < NDIM; i++)
853 nmul.e(i) = nmul.e(i - 1) * (lattice->mynode.size[i - 1] + 2);
854
855 // full size of the buffer
856 size_t siz = 1;
857 foralldir(d) siz *= (lattice->mynode.size[d] + 2);
858
859 buffer.resize(siz);
860#if defined(CUDA) || defined(HIP)
861 // d_malloc mallocs from device if needed
862 T *data = (T *)d_malloc(sizeof(T) * siz);
863#else
864 T *data = buffer.data();
865#endif
866
867 // now collect bulk
868#pragma hila novector direct_access(data)
869 onsites(ALL) {
871 nodec = X.coordinates() - nmin;
872
873 unsigned i = nodec.dot(nmul);
874 data[i] = (*this)[X];
875 }
876
877 // collect nn-halos
878
879 Field<T> corners = 0;
880 Field<T> corner2 = 0;
881#if NDIM > 2
882 Field<T> corner3 = 0;
883#if NDIM > 3
884 Field<T> corner4 = 0;
885#endif
886#endif
887
889 foralldir(d1) {
890 dirs[0] = d1;
891 // gather d1 halo
892 collect_field_halo_data_(data, (*this), corners, dirs, 0);
893
894 for (int d2 = d1 + 1; d2 < NDIM; ++d2) {
895 dirs[1] = d2;
896 collect_field_halo_data_(data, corners, corner2, dirs, 1);
897#if NDIM > 2
898 for (int d3 = d2 + 1; d3 < NDIM; ++d3) {
899 dirs[2] = d3;
900 collect_field_halo_data_(data, corner2, corner3, dirs, 2);
901#if NDIM > 3
902 for (int d4 = d3 + 1; d4 < NDIM; ++d4) {
903 dirs[3] = d4;
904 collect_field_halo_data_(data, corner3, corner4, dirs, 3);
905 }
906#endif
907 }
908#endif
909 }
910 }
911
912
913#if defined(CUDA) || defined(HIP)
914 gpuMemcpy(buffer.data(), data, sizeof(T) * siz, gpuMemcpyDeviceToHost);
915 d_free(data);
916#endif
917}
918
919
920template <typename T>
921void Field<T>::block_from(Field<T> &orig) {
922 assert(orig.is_initialized(ALL) && "block_from()-method field is not initialized");
923
924 this->check_alloc();
925 lattice_struct *blocklat = this->fs->mylattice.ptr();
926 lattice_struct *parentlat = orig.fs->mylattice.ptr();
927 lattice_struct *currentlat = lattice.ptr();
928
929 assert(blocklat->parent == parentlat && "blocking must happen from parent lattice Field");
930
931 // If no sites on this node there's nothing to do
932 if (blocklat->mynode.volume == 0)
933 return;
934
935
936 // alloc temp array, size of the blocked lattice
937 size_t bufsize = blocklat->mynode.volume;
938 T *buf = (T *)d_malloc(bufsize * sizeof(T));
939
940 // switch to parent if needed
941 lattice.switch_to(parentlat);
942
943 CoordinateVector blockfactor = parentlat->l_size.element_div(blocklat->l_size);
944 CoordinateVector cvmin = blocklat->mynode.min;
945 auto size_factor = blocklat->mynode.size_factor;
946
947#pragma hila direct_access(buf)
948 onsites(ALL) {
949 if (X.coordinates().is_divisible(blockfactor)) {
950 // get blocked coords logically on this
951 Vector<NDIM, unsigned> cv = X.coordinates().element_div(blockfactor) - cvmin;
952 buf[cv.dot(size_factor)] = orig[X];
953 }
954 }
955
956 lattice.switch_to(blocklat);
957
958#pragma hila direct_access(buf)
959 onsites(ALL) {
960 // get blocked coords logically on this node
961 Vector<NDIM, unsigned> cv = X.coordinates() - cvmin;
962 (*this)[X] = buf[cv.dot(size_factor)];
963 }
964
965 lattice.switch_to(currentlat);
966
967 d_free(buf);
968}
969
970template <typename T>
971void Field<T>::unblock_to(Field<T> &target) const {
972 assert(this->is_initialized(ALL) && "unblock_to()-method field is not initialized");
973 target.check_alloc();
974
975 lattice_struct *blocklat = this->fs->mylattice.ptr();
976 lattice_struct *parentlat = target.fs->mylattice.ptr();
977 lattice_struct *currentlat = lattice.ptr();
978
979 assert(blocklat->parent == parentlat && "unblocking must happen to parent lattice Field");
980
981 // If no sites on this node there's nothing to do
982 if (blocklat->mynode.volume == 0)
983 return;
984
985 // alloc temp array, size of the blocked lattice
986 size_t bufsize = blocklat->mynode.volume;
987 T *buf = (T *)d_malloc(bufsize * sizeof(T));
988
989 CoordinateVector blockfactor = parentlat->l_size.element_div(blocklat->l_size);
990 CoordinateVector cvmin = blocklat->mynode.min;
991 auto size_factor = blocklat->mynode.size_factor;
992
993 lattice.switch_to(blocklat);
994
995#pragma hila direct_access(buf)
996 onsites(ALL) {
997 // get blocked coords logically on this node
998 Vector<NDIM, unsigned> cv = X.coordinates() - cvmin;
999 buf[cv.dot(size_factor)] = (*this)[X];
1000 }
1001
1002 lattice.switch_to(parentlat);
1003
1004#pragma hila direct_access(buf)
1005 onsites(ALL) {
1006 if (X.coordinates().is_divisible(blockfactor)) {
1007 // get blocked coords logically on this
1008 Vector<NDIM, unsigned> cv = X.coordinates().element_div(blockfactor) - cvmin;
1009 target[X] = buf[cv.dot(size_factor)];
1010 }
1011 }
1012
1013 lattice.switch_to(currentlat);
1014
1015 d_free(buf);
1016}
1017
1018#endif
CoordinateVector_t element_div(const CoordinateVector_t &m) const
Element-by-element division See also .mod(), which does element-by-element positive mod.
The field class implements the standard methods for accessing Fields. Hilapp replaces the parity acce...
Definition field.h:62
std::vector< T > get_subvolume(const CoordinateVector &cmin, const CoordinateVector &cmax, bool broadcast=false) const
Get a subvolume of the field elements to all nodes.
Definition field_comm.h:675
Field< T > & shift(const CoordinateVector &v, Field< T > &r, Parity par) const
Create a periodically shifted copy of the field.
Definition field_comm.h:162
bool is_initialized(Parity p) const
Returns true if the Field has been assigned to.
Definition field.h:430
field_struct *__restrict__ fs
Field::fs holds all of the field content in Field::field_struct.
Definition field.h:244
void check_alloc()
Allocate Field if it is not already allocated.
Definition field.h:458
dir_mask_t start_gather(Direction d, Parity p=ALL) const
Definition field_comm.h:262
std::vector< T > get_elements(const std::vector< CoordinateVector > &coord_list, bool broadcast=false) const
Get a list of elements and store them into a vector.
Definition field_comm.h:658
std::vector< T > get_slice(const CoordinateVector &c, bool broadcast=false) const
Get a slice (subvolume)
Definition field_comm.h:696
void unblock_to(Field< T > &target) const
Copy content to the argument Field on blocked (sparse) sites. a.unblock_to(b) is the inverse of a....
Definition field_comm.h:971
int size(Direction d) const
lattice.size() -> CoordinateVector or lattice.size(d) -> int returns the dimensions of the lattice,...
Definition lattice.h:433
lattice_struct * ptr() const
get non-const pointer to lattice_struct (cf. operator ->)
Definition lattice.h:474
Lattice switch_to(lattice_struct *lat)
With a valid lattice_struct pointer, switch this lattice to be active.
Definition lattice.h:503
T e(const int i, const int j) const
Standard array indexing operation for matrices.
Definition matrix.h:279
R dot(const Matrix< p, q, S > &rhs) const
Dot product.
Definition matrix.h:1314
const Array< n, m, T > & asArray() const
Cast Matrix to Array.
Definition matrix.h:476
Matrix class which defines matrix operations.
Definition matrix.h:1742
bool is_on_mynode(const CoordinateVector &c) const
Is the coordinate on THIS node ?
Definition lattice.cpp:147
std::array< nn_comminfo_struct, NDIRS > nn_comminfo
nearest neighbour comminfo struct
Definition lattice.h:210
unsigned site_index(const CoordinateVector &c) const
Definition lattice.cpp:165
int node_rank(const CoordinateVector &c) const
Definition lattice.cpp:102
T abs(const Complex< T > &a)
Return absolute value of Complex number.
Definition cmplx.h:1266
Parity
Parity enum with values EVEN, ODD, ALL; refers to parity of the site. Parity of site (x,...
constexpr Parity EVEN
bit pattern: 001
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:80
constexpr Parity ODD
bit pattern: 010
unsigned char dir_mask_t
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
constexpr Parity ALL
bit pattern: 011
#define RESTRICT
Definition defs.h:52
This files containts definitions for the Field class and the classes required to define it such as fi...
void collect_field_halo_data_(T *data, const Field< T > &src, Field< T > &dest, const Vector< 4, int > &dirs, int ndir)
Copy local data with halo - useful for visualization.
Definition field_comm.h:786
int myrank()
rank of this node
Definition com_mpi.cpp:237
std::ostream out
this is our default output file stream
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
Definition com_mpi.h:170
void terminate(int status)
is_vectorizable_type<T>::value is always false if the target is not vectorizable
Definition vector_types.h:8
Information necessary to communicate with a node.
Definition lattice.h:137
nn-communication has only 1 node to talk to
Definition lattice.h:195
bool is_on_edge(Direction d) const
true if this node is on the edge of the lattice to dir d
Definition lattice.h:110