HILA
Loading...
Searching...
No Matches
lattice_vector.h
1#ifndef _BACKEND_LATTICE_H_
2#define _BACKEND_LATTICE_H_
3
4#include "../lattice.h"
5#include "vector_types.h"
6
7#ifndef SUBNODE_LAYOUT
8static_assert(0, "SUBNODE_LAYOUT needs to be defined to use vectorized lattice");
9#endif
10
11/////////////////////////////////////////////////////////////////////////////////////////
12/// Vectorized lattice struct stores information needed for vectorized access to lattice fields.
13/// - loop_begin, loop_end
14/// - neighbour arrays
15/// - twist permutations when needed
16/// - allocation size, including new halo sites (replaces field_alloc_size)
17/// these are used by fields which are laid out with vector structure
18template <int vector_size>
20 static_assert(vector_size > 0, "Vector size in vectorized_lattice_struct");
21
22 public:
23 /// pointer to the original lattice
24 // lattice_struct *lattice;
25
26 /// vector sites on this node
27 size_t v_sites;
28 /// subnode divisions to different directions
30 /// origin of the 1st subnode = origin of mynode
32
33 /// True if boundary needs a permutation
35 /// True if the boundary elements are local
37 /// permutation vectors
38 int boundary_permutation[NDIRS][vector_size];
39 /// offsets to boundary halos
40 unsigned halo_offset[NDIRS], halo_offset_odd[NDIRS], n_halo_vectors[NDIRS];
41 /// storage for indexes to halo sites
43
44 /// move data from receive buffer -- sending is fine as it is
45 /// takes the role of nn_comms
46 unsigned *recv_list[NDIRS];
47 /// The size of the receive list in each direction
49
50 // coordinate offsets to nodes
51 using int_vector_t = typename hila::vector_base_type<int, vector_size>::type;
52 int_vector_t coordinate_offset[NDIM];
53 // using coordinate_compound_vec_type = typename Vector<NDIM,
54 // typename vector_base_type<int,vector_size>::type>; coordinate_compound_vec_type
55 // coordinate_offset;
56 CoordinateVector *RESTRICT coordinate_base;
57
58 /// Storage for neighbour indexes on each site
60
61 /// The storage size of a field
62 size_t alloc_size;
63 /// A wait array for the vectorized field
64 unsigned char *RESTRICT vec_wait_arr_;
65
66 /// Check if this is the first subnode
68 v = v.mod(lattice.size());
69 foralldir (d) {
70 if (v[d] < subnode_origin[d] || v[d] >= subnode_origin[d] + subnode_size[d])
71 return false;
72 }
73 return true;
74 }
75
76 /////////////////////////////////////////////////////////////////////////////////////////////
77 /// Set up the vectorized lattice:
78 /// * Split the lattice and record size and splits
79 /// * Map indeces into local coordinate_list
80 /// * Set up neighbour vector references
81 /// mapping between the sites: here i is the original lattice site index
82 /// vector_index = i / vector_size;
83 /// index_in_vector = i % vector_size;
84 /// Note that this maps site indices of 64 bit elements (double)and 32 bit (float) so
85 /// that
86 /// vector_index(32) = vector_index(64)/2 and
87 /// index_in_vector(32) = index_in_vector(64) + (vector_index(64)%2)*vector_size(64)
88 /// This removes the last direction where number of subnodes is divisible
89 /////////////////////////////////////////////////////////////////////////////////////////////
91 /// Initialize
92
93 /// sites on vector
94 v_sites = lattice.mynode.volume() / vector_size;
95 subdivisions = lattice.mynode.subnodes.divisions;
96 subnode_size = lattice.mynode.subnodes.size;
97 subnode_origin = lattice.mynode.min;
98
99 // hila::out0 << "Subdivisions " << subdivisions << '\n';
100
101 // the basic division is done using "float" vectors -
102 // for "double" vectors the vector_size and number of subnodes
103 // is halved to direction lattice.mynode.subnodes.lastype_divided.dir
104
105 if (vector_size == VECTOR_SIZE / sizeof(double)) {
106 subdivisions[lattice.mynode.subnodes.merged_subnodes_dir] /= 2;
107 subnode_size[lattice.mynode.subnodes.merged_subnodes_dir] *= 2;
108 }
109
110 hila::out0 << "Setting up lattice struct with vector of size " << vector_size
111 << " elements\n";
112
114
115 for (Direction d = (Direction)0; d < NDIRS; d++) {
116 neighbours[d] = (unsigned *)memalloc(v_sites * sizeof(unsigned));
117 }
118
120
123
125
126 } // end of initialization
127
128 /////////////////////////////////////////////////////////////////////
129 /// Find the boundary permutations to different directions
130 /////////////////////////////////////////////////////////////////////
132
133 // boundary permutation is done in a "layout-agnostic" way:
134 // go to the edge of the subnode, check the neighbour, and
135 // identify the permutation as index % vector_length
136 // Uses
137
138 int step = 1, sdmul = 1;
139 foralldir (d) {
141
142 // upper corner of 1st subnode
143 CoordinateVector here;
144 here = subnode_origin + subnode_size;
145 here.asArray() -= 1;
146
147 unsigned idx = lattice.site_index(here);
148 assert(idx % vector_size == 0); // is it really on 1st subnode
149
150 // loop over subnodes
151 for (int i = 0; i < vector_size; i++) {
152
153 // get the site index of the neighbouring site
154 CoordinateVector h = lattice.coordinates(idx + i) + d;
155
156 // remember to mod the coordinate on lattice
157 h = h.mod(lattice.size());
158 int rank = lattice.node_rank(h);
159 unsigned nn = lattice.site_index(h, rank);
160 boundary_permutation[d][i] = nn % vector_size;
161 }
162
163 // permutation to opposite direction is the inverse, thus:
164 for (int i = 0; i < vector_size; i++)
166 }
167 }
168
169 /////////////////////////////////////////////////////////////////////////
170 /// Set the neighbour array, and also local halo mapping
171 /// reserve extra storage for permutation halo sites
172 /// there are 2*(area) v-sites to each direction with permutation,
173 /// + mpi buffers too if needed. (separately allocated)
174 /// to directions without permutation there is also
175 /// - 2*(area) v-sites, filled in directly by MPI or copying from local node
176 /// if no mpi comm. The copying is done to ease implementing different boundary
177 /// conditions
178 /// These come automatically when we tally up the neigbours below
179 /////////////////////////////////////////////////////////////////////////
181
182 // check special case: 1st subnode is across the whole lattice to Direction d and
183 // no boundary permutation
184 // we do the copy also in this case, in order to implement other boundary
185 // conditions Slows down a bit the periodic case, but with MPI comms this should
186 // make no difference
187
188 foralldir (d) {
189 if (lattice.nodes.n_divisions[d] == 1 && !is_boundary_permutation[d]) {
191 } else {
193 }
194 }
195
196 // accumulate here points off-subnode (to halo)
197 size_t c_offset = v_sites;
198 for (Direction d = (Direction)0; d < NDIRS; ++d) {
199
200 halo_offset[d] = c_offset;
201 for (int i = 0; i < v_sites; i++) {
202 int j = vector_size * i; // the "original lattice" index for the 1st site of vector
203 CoordinateVector here = lattice.coordinates(j);
204 // std::cout << here << '\n';
205
206 if (is_on_first_subnode(here + d)) {
207
208 assert(lattice.neighb[d][j] % vector_size == 0); // consistency check
209 Direction ad = abs(d);
210
212 ((is_up_dir(d) && here[ad] == lattice.size(ad) - 1) ||
213 (is_up_dir(-d) && here[ad] == 0))) {
214 neighbours[d][i] = c_offset++;
215 } else {
216 // standard branch, within the subnode
217 neighbours[d][i] = lattice.neighb[d][j] / vector_size;
218 }
219 } else {
220 neighbours[d][i] = c_offset++; // now points beyond the lattice
221 }
222 }
223 n_halo_vectors[d] = c_offset - halo_offset[d];
224 halo_offset_odd[d] = halo_offset[d] + n_halo_vectors[d] / 2;
225 assert(n_halo_vectors[d] % 2 == 0);
226
227 /// set also the index array, if needed
228 /// halo_index[d] points to the subnode modded neighbour site to dir d, if
229 /// there is boundary twist (meaning there are on-node subnodes to this dir)
230 /// we'll use the standard neighb array to do this.
231 if (n_halo_vectors[d] > 0 && is_boundary_permutation[abs(d)]) {
232 halo_index[d] = (unsigned *)memalloc(n_halo_vectors[d] * sizeof(unsigned));
233 int j = 0;
234 for (int i = 0; i < v_sites; i++) {
235 if (neighbours[d][i] >= v_sites) {
236 // scan neighbour sites within the vector i -- some must be inside
237 // the node, which we want in this case
238 int k, n = -1;
239 bool found = false;
240 for (k = 0; k < vector_size; k++) {
241 if (lattice.neighb[d][i * vector_size + k] < lattice.mynode.sites) {
242 if (!found) {
243 n = lattice.neighb[d][i * vector_size + k] / vector_size;
244 found = true;
245 } else
246 assert(n ==
247 lattice.neighb[d][i * vector_size + k] / vector_size);
248 }
249 }
250 assert(n >= 0);
251 halo_index[d][j++] = n;
252 }
253 }
254 assert(j == n_halo_vectors[d]);
255
256 } else if (only_local_boundary_copy[d]) {
257 // set the local untwisted array copy here
258
259 halo_index[d] = (unsigned *)memalloc(n_halo_vectors[d] * sizeof(unsigned));
260 int j = 0;
261 for (int i = 0; i < v_sites; i++) {
262 if (neighbours[d][i] >= v_sites) {
263 halo_index[d][j++] = lattice.neighb[d][i * vector_size] / vector_size;
264 assert(lattice.neighb[d][i * vector_size] % vector_size == 0);
265 }
266 }
267
268 } else
269 halo_index[d] = nullptr; // no special copy here - mpi fills
270 }
271
272 /// Finally, how big the field allocation should be - IN SITES, not vectors
273 alloc_size = c_offset * vector_size;
274
275 if (alloc_size >= (1ULL << 32)) {
276 report_too_large_node();
277 }
278 }
279
280 //////////////////////////////////////////////////////////////////////////////
281 /// Get neighbour receive indices for MPI
282 //////////////////////////////////////////////////////////////////////////////
284
285 for (Direction d = (Direction)0; d < NDIRS; d++) {
286 if (is_boundary_permutation[abs(d)] && lattice.nodes.n_divisions[abs(d)] > 1) {
287
288 // now need to receive and copy - note: now this is in terms of
289 // non-vector sites. Set the recv_list to point to where to move the
290 // stuff Note: now the stuff has to be moved to boundary_halo, not to
291 // lattice n!
292
293 recv_list_size[d] = lattice.mynode.sites / lattice.mynode.size[abs(d)];
294 recv_list[d] = (unsigned *)memalloc(recv_list_size[d] * sizeof(unsigned));
295
296 int j = 0;
297 for (int i = 0; i < lattice.mynode.sites; i++) {
298 if (lattice.neighb[d][i] >= lattice.mynode.sites) {
299
300 // i/vector_size is the "vector index" of site, and
301 // i % vector_size the index within the vector.
302 // vector neighbour is neighbours[d][i/vector_size]
303 // remember to do the permutation too
304
305 recv_list[d][j++] =
306 neighbours[d][i / vector_size] * vector_size + (i % vector_size);
307 }
308 }
309 assert(j == recv_list_size[d]);
310
311 } else {
312 // now use halo_offset directly for buffer
313 recv_list[d] = nullptr;
314 recv_list_size[d] = 0;
315 }
316 }
317 }
318
319 /////////////////////////////////////////////////////////////////////////
320 /// Build the structs for coordinates
321 /////////////////////////////////////////////////////////////////////////
323
324 /// first vector_size elements should give the coordinates of vector offsets
325 CoordinateVector base = lattice.coordinates(0);
326 for (int i = 0; i < vector_size; i++) {
327 CoordinateVector diff = lattice.coordinates(i) - base;
328 foralldir (d)
329 coordinate_offset[d].insert(i, diff[d]);
330 }
331
332 // and then set the coordinate_base with the original coords
333 coordinate_base = (CoordinateVector *)memalloc(v_sites * sizeof(CoordinateVector));
334 for (int i = 0; i < v_sites; i++) {
335 coordinate_base[i] = lattice.coordinates(vector_size * i);
336 }
337 }
338
339////////////////////////////////////////////////////////////////////////////
340/// Finally, initialize wait arrays
341/// it is a bit mask array containing a bit at location dir if the neighbour
342/// at that dir is out of the local volume
344 vec_wait_arr_ = (dir_mask_t *)memalloc(v_sites * sizeof(dir_mask_t));
345
346 for (int i = 0; i < v_sites; i++) {
347 vec_wait_arr_[i] = 0; /* basic, no wait */
348 foralldir (dir) {
349 Direction odir = -dir;
350 if (lattice.nodes.n_divisions[dir] > 1) {
351 if (neighbours[dir][i] >= v_sites)
352 vec_wait_arr_[i] = vec_wait_arr_[i] | (1 << dir);
353 if (neighbours[odir][i] >= v_sites)
354 vec_wait_arr_[i] = vec_wait_arr_[i] | (1 << odir);
355 }
356 }
357 }
358 }
359
360 /////////////////////////////////////////////////////////////////////////
361 /// Return the communication info
363 return lattice.get_comminfo(d);
364 }
365
366 /////////////////////////////////////////////////////////////////////////
367 /// get neighbours for this, with 2 different methods:
368 /// First vector neighbour. Now idx is the vector index
369 unsigned vector_neighbour(Direction d, int idx) const {
370 return neighbours[d][idx];
371 }
372
373 /// this gives the neighbour when the lattice is traversed
374 /// site-by-site. Now idx is the "true" site index, not vector index
375 unsigned site_neighbour(Direction d, int idx) const {
376 return vector_size * neighbours[d][idx / vector_size] + idx % vector_size;
377 }
378
379 //////////////////////////////////////////////////////////////////////////
380 /// Return the number of sites that need to be allocated
381 /// returns sites, not vectors!
382 unsigned field_alloc_size() const {
383 return alloc_size;
384 }
385
386 //////////////////////////////////////////////////////////////////////////
387 /// Return the coordinates of each vector nested as
388 /// coordinate[Direction][vector_index]
389 auto coordinates(int idx) const {
390 // std::array<typename vector_base_type<int,vector_size>::type ,NDIM> r;
392 // Vector<NDIM,int_vector_t> r;
393 foralldir (d)
394 r.e(d) = coordinate_offset[d] + coordinate_base[idx][d];
395 return r;
396 }
397
398 auto coordinate(unsigned idx, Direction d) const {
399 return coordinate_offset[d] + coordinate_base[idx][d];
400 }
401
402 // parity is the same for all elements in vector, return scalar
403 ::Parity site_parity(int idx) {
404 return coordinate_base[idx].parity();
405 }
406
407 /// First index in a lattice loop
408 unsigned loop_begin(::Parity P) const {
409 if (P == ODD) {
410 return v_sites / 2;
411 } else {
412 return 0;
413 }
414 }
415
416 /// Last index in a lattice loop
417 unsigned loop_end(::Parity P) const {
418 if (P == EVEN) {
419 return v_sites / 2;
420 } else {
421 return v_sites;
422 }
423 }
424};
425
426///////////////////////////////////////////////////////////////////////////////
427/// Helper class for loading the vectorized lattice
428///////////////////////////////////////////////////////////////////////////////
429
431
432 void setup(const lattice_struct &lat) {}
433
434 /// Returns a vectorized lattice with given vector size
435 template <int vector_size>
437 // Create one if not already created
438
439 assert(lattice.id() == 0 &&
440 "Vectorized lattice layout only possible with main (original) lattice");
441
442
443 static vectorized_lattice_struct<vector_size> *vlat = nullptr;
444 if (vlat == nullptr) {
446 }
447
448 return vlat;
449 }
450};
451
452#endif
CoordinateVector_t mod(const CoordinateVector_t &m) const
T e(const int i, const int j) const
Standard array indexing operation for matrices.
Definition matrix.h:272
const Array< n, m, T > & asArray() const
Cast Matrix to Array.
Definition matrix.h:467
unsigned *__restrict__ neighb[NDIRS]
Main neighbour index array.
Definition lattice.h:203
unsigned site_index(const CoordinateVector &c) const
Definition lattice.cpp:136
T abs(const Complex< T > &a)
Return absolute value of Complex number.
Definition cmplx.h:1322
Parity
Parity enum with values EVEN, ODD, ALL; refers to parity of the site. Parity of site (x,...
constexpr Parity EVEN
bit pattern: 001
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:78
constexpr unsigned NDIRS
Number of directions.
Definition coordinates.h:57
constexpr Parity ODD
bit pattern: 010
unsigned char dir_mask_t
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
#define RESTRICT
Definition defs.h:50
std::ostream out0
This writes output only from main process (node 0)
Helper class for loading the vectorized lattice.
void setup(lattice_struct *lattice)
vectorized_lattice_struct< vector_size > * get_vectorized_lattice()
Returns a vectorized lattice with given vector size.
nn-communication has only 1 node to talk to
Definition lattice.h:185
unsigned loop_end(::Parity P) const
Last index in a lattice loop.
void get_receive_lists()
Get neighbour receive indices for MPI.
unsigned char *__restrict__ vec_wait_arr_
A wait array for the vectorized field.
void set_coordinates()
Build the structs for coordinates.
bool only_local_boundary_copy[NDIRS]
True if the boundary elements are local.
unsigned * recv_list[NDIRS]
CoordinateVector subnode_origin
origin of the 1st subnode = origin of mynode
unsigned field_alloc_size() const
bool is_boundary_permutation[4]
True if boundary needs a permutation.
lattice_struct::nn_comminfo_struct get_comminfo(int d)
Return the communication info.
CoordinateVector subdivisions
subnode divisions to different directions
bool is_on_first_subnode(CoordinateVector v)
Check if this is the first subnode.
size_t v_sites
pointer to the original lattice
unsigned *__restrict__ halo_index[NDIRS]
storage for indexes to halo sites
size_t alloc_size
The storage size of a field.
auto coordinates(int idx) const
unsigned vector_neighbour(Direction d, int idx) const
void get_boundary_permutations()
Find the boundary permutations to different directions.
unsigned recv_list_size[NDIRS]
The size of the receive list in each direction.
unsigned halo_offset[NDIRS]
offsets to boundary halos
int boundary_permutation[NDIRS][vector_size]
permutation vectors
unsigned site_neighbour(Direction d, int idx) const
unsigned *__restrict__ neighbours[NDIRS]
Storage for neighbour indexes on each site.
unsigned loop_begin(::Parity P) const
First index in a lattice loop.