HILA
Loading...
Searching...
No Matches
setup_layout_vector.cpp
1/// Setup layout does the node division. This version
2/// first tries an even distribution, with equally sized
3/// nodes, and if that fails allows slightly different
4/// node sizes.
5
6#include "plumbing/defs.h"
7#include "plumbing/lattice.h"
8
9/***************************************************************/
10
11/* number of primes to be used in factorization */
12#define NPRIMES 12
13const static int prime[NPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37};
14
15// Set up now squaresize and nsquares - arrays
16// Print info to outf as we proceed
17
18void lattice_struct::setup_layout() {
19 int nfactors[NPRIMES];
20 CoordinateVector nodesiz;
21
22 hila::print_dashed_line();
23 hila::out0 << "LAYOUT: subnode lattice, with " << VECTOR_SIZE / sizeof(float) << " subnodes\n"
24 << "Enabling vector length " << VECTOR_SIZE * 8
25 << " bits = " << VECTOR_SIZE / sizeof(double) << " doubles or "
26 << VECTOR_SIZE / sizeof(float) << " floats/ints\n";
27 hila::out0 << "Lattice size ";
28 foralldir(d) {
29 if (d != 0)
30 hila::out0 << " x ";
31 hila::out0 << l_size[d];
32 }
33 hila::out0 << " = " << l_volume << " sites\n";
34 hila::out0 << "Dividing to " << hila::number_of_nodes() << " nodes\n";
35 hila::out0 << "Layout using vector of " << number_of_subnodes << " elements\n";
36
37 foralldir(d) if (l_size[d] % 2 != 0) {
38 hila::out0 << "Lattice must be even to all directions for vector/AVX layout.\n"
39 << "Use std layout or GPU for odd sizes\n";
41 }
42
43 // we want to divide up to numnode * vector_size virtual nodes
44 // use the float vector size to divide to hila::number_of_nodes() * vector_size
45 // nodes, this is the most demanding. The directions where the extra divisions have
46 // been done the node size must be even, so that these can be handled by vectors
47
48 // Factorize the node number in primes
49 // These factors must be used in slicing the lattice!
50
51 // number of virtual nodes
52 int nn = hila::number_of_nodes() * number_of_subnodes;
53
54 int i = nn;
55 for (int n = 0; n < NPRIMES; n++) {
56 nfactors[n] = 0;
57 while (i % prime[n] == 0) {
58 i /= prime[n];
59 nfactors[n]++;
60 }
61 }
62 if (i != 1) {
63 hila::out0 << "Cannot factorize " << hila::number_of_nodes() << " nodes with primes up to "
64 << prime[NPRIMES - 1] << '\n';
66 }
67
68 // strategy: try to increase the box size to one of the directions until rem = 0
69 // find the optimal direction to do it
70 // Use simple heuristic: take the dim with the least amount of added "ghost sites"
71
72 CoordinateVector nsize;
73 int64_t ghosts[NDIM];
74 foralldir(d) {
75 int64_t cosize = l_volume / l_size[d];
76 int64_t n = l_size[d];
77 while ((n * cosize) % nn != 0)
78 n++; // virtual size can be odd
79 // now nsize is the new would-be size
80 ghosts[d] = (n - l_size[d]) * cosize;
81 nsize[d] = n;
82 }
83
84 int64_t ghost_volume = 1;
85 foralldir(d) ghost_volume *= nsize[d];
86
87 // if the division goes even nsize = size() and ghost_volume = volume
88
89 // now try to divide the nsize-volume to subnodes. We don't try to do
90 // the division to direction where there are ghost sites
91
92 CoordinateVector divisions, subdiv;
93
94 int gdir;
95 bool secondtime = false;
96 do {
97 // try the division a couple of times, if the 1st fails
98
99 if (ghost_volume > l_volume) {
100 gdir = NDIM - 1;
101 foralldir(j) if (ghosts[gdir] > ghosts[j]) gdir = j;
102 // gdir is the direction where we do uneven division (if done)
103 // hila::out0 << "gdir " << gdir << " ghosts gdir " << ghosts[gdir] << " nsize
104 // "
105 // << nsize[gdir] << '\n';
106 } else
107 gdir = -1;
108
109 foralldir(i) {
110 nodesiz[i] = (i == gdir) ? nsize[i] : l_size[i]; // start with ghosted lattice size
111 divisions[i] = 1;
112 }
113
114 for (int n = NPRIMES - 1; n >= 0; n--)
115 for (i = 0; i < nfactors[n]; i++) {
116 // figure out which direction to divide -- start from the largest prime,
117 // because we don't want this to be last divisor! (would probably wind
118 // up with size 1)
119
120 // Try to keep even node sizes, these are needed for vectors
121 // We don't worry about evenness to gdir
122
123 // find largest divisible dimension of h-cubes
124 int msize = 1;
125 int dir, mdir;
126 for (dir = 0; dir < NDIM; dir++) {
127 if (nodesiz[dir] > msize &&
128 ((dir == gdir && nodesiz[dir] % prime[n] == 0) ||
129 (dir != gdir && nodesiz[dir] % (2 * prime[n]) == 0)) &&
130 nodesiz[dir] / prime[n] > 3) {
131 msize = nodesiz[dir];
132 mdir = dir;
133 }
134 }
135
136 // even divide failed, divide to odd
137 if (msize == 1) {
138 for (dir = 0; dir < NDIM; dir++) {
139 if (nodesiz[dir] > msize && dir != gdir && nodesiz[dir] % (prime[n]) == 0) {
140 msize = nodesiz[dir];
141 mdir = dir;
142 }
143 }
144 }
145
146 if (msize == 1) {
147 // This cannot happen
148 hila::out0 << "CANNOT HAPPEN! in setup_layout_vector.c\n";
150 }
151
152 // Now slice it
153 nodesiz[mdir] /= prime[n];
154 divisions[mdir] *= prime[n];
155
156 // hila::out0 << nodesiz << ' ' << divisions << '\n';
157 }
158
159 // Division done, now check that the div makes sense
160
161 bool fail = false;
162
163 foralldir(dir) if (nodesiz[dir] < 3) fail = true; // don't allow nodes of size 1 or 2
164
165 if (!fail) {
166
167 // check here that this can be used for vectorized division
168
169 subdiv.fill(1);
170 bool div_done;
171 int n_subn = 1;
172 do {
173 div_done = false;
174 foralldir(dir) {
175 // the direction where the vector subnodes are must not be
176 // an uneven direction, node size to the direction should be
177 // divisible by 2 and the number of nodes to this dir should also be
178 // a multiple of subdivs
179
180 int sd = subdiv[dir] * 2;
181 if (dir != gdir && nodesiz[dir] % 2 == 0 && divisions[dir] % sd == 0 &&
182 n_subn < number_of_subnodes) {
183 subdiv[dir] = sd;
184 n_subn *= 2;
185 div_done = true;
186 mynode.subnodes.merged_subnodes_dir = dir;
187 }
188 }
189 } while (div_done && n_subn < number_of_subnodes);
190
191 if (n_subn != number_of_subnodes)
192 fail = true;
193 }
194
195 if (fail && !secondtime && gdir >= 0) {
196 secondtime = true;
197 ghosts[gdir] =
198 (1ULL << 62); // this short-circuits direction gdir, some other taken next
199 } else if (fail) {
200 hila::out0 << "Could not successfully lay out the lattice with "
201 << hila::number_of_nodes() << " nodes!\n";
202 hila::out0 << " The division of ";
203 foralldir(d) {
204 hila::out0 << lattice.size(d);
205 if (d < NDIM - 1)
206 hila::out0 << '*';
207 }
208 hila::out0 << " lattice using " << hila::number_of_nodes()
209 << " nodes with vector layout can be done\n";
210 hila::out0 << " if the lattice can be divided into ";
211 hila::out0 << hila::number_of_nodes() << '*' << number_of_subnodes
212 << " virtual nodes so that the virtual node size is\n";
213 hila::out0 << " even to directions where the extra divisions are done, "
214 "and the node size is > 2.\n";
215
217 }
218
219 } while (secondtime);
220
221 // set up struct nodes variables
222 nodes.number = hila::number_of_nodes();
223 foralldir(dir) {
224 nodesiz[dir] *= subdiv[dir];
225 nodes.n_divisions[dir] = divisions[dir] / subdiv[dir];
226 nodes.divisors[dir].resize(nodes.n_divisions[dir] + 1);
227 // Node divisors: note, this MUST BE compatible with
228 // node_rank in lattice.cpp
229 // to be sure, we use naively the same method than in node_rank
230 // last element will be l_size[dir], for convenience
231 int n = -1;
232 for (int i = 0; i <= l_size[dir]; i++)
233 if ((i * nodes.n_divisions[dir]) / l_size[dir] != n) {
234 ++n;
235 nodes.divisors[dir][n] = i;
236 }
237 }
238
239 // set up the subnode divisions here -- rest is set in setup_node
240 foralldir(d) mynode.subnodes.divisions[d] = subdiv[d];
241
242 // mynode is set up in setup_node
243
244 // Now division done - check how good it is
245 int ghost_slices = 0;
246 if (gdir >= 0) {
247 ghost_slices = nsize[gdir] - l_size[gdir];
248
249 hila::out0 << "\nUsing uneven node division to direction " << gdir << ":\n";
250 hila::out0 << "Lengths: " << nodes.n_divisions[gdir] - ghost_slices << " * ("
251 << nodesiz[gdir] << " sites) + " << ghost_slices << " * (" << nodesiz[gdir] - 1
252 << " sites)\n";
253 hila::out0 << "Divisions: ";
254 for (int i = 0; i < nodes.n_divisions[gdir]; i++) {
255 if (i > 0)
256 hila::out0 << " - ";
257 hila::out0 << nodes.divisors[gdir][i + 1] - nodes.divisors[gdir][i];
258 }
259 hila::out0 << "\nFilling efficiency: " << (100.0 * l_size[gdir]) / nsize[gdir] << "%\n";
260
261 if (ghost_slices > nodes.n_divisions[gdir] / 2)
262 hila::out0 << "NOTE: number of smaller nodes > large nodes \n";
263 }
264
265 // this was hila::number_of_nodes() > 1
266 if (1) {
267 hila::out0 << "\nSites on node: ";
268 foralldir(dir) {
269 if (dir > 0)
270 hila::out0 << " x ";
271 if (dir == gdir)
272 hila::out0 << '(' << nodesiz[dir] - 1 << '-' << nodesiz[dir] << ')';
273 else
274 hila::out0 << nodesiz[dir];
275 }
276 int64_t ns = 1;
277 foralldir(dir) ns *= nodesiz[dir];
278 if (ghost_slices > 0) {
279 int64_t ns2 = ns * (nodesiz[gdir] - 1) / nodesiz[gdir];
280 hila::out0 << " = " << ns2 << " - " << ns << '\n';
281 } else {
282 hila::out0 << " = " << ns << '\n';
283 }
284
285 hila::out0 << "Node layout: ";
286 foralldir(dir) {
287 if (dir > 0)
288 hila::out0 << " x ";
289 hila::out0 << nodes.n_divisions[dir];
290 }
291 hila::out0 << " = " << hila::number_of_nodes() << " nodes\n";
292
293#ifdef VECTORIZED
294
295 hila::out0 << "Node subdivision to 32bit elems: ";
296 foralldir(dir) {
297 if (dir > 0)
298 hila::out0 << " x ";
299 hila::out0 << subdiv[dir];
300 }
301 hila::out0 << " = " << number_of_subnodes << " subnodes\n";
302
303 hila::out0 << "Sites on subnodes: ";
304 foralldir(dir) {
305 if (dir > 0)
306 hila::out0 << " x ";
307 if (dir == gdir)
308 hila::out0 << '(' << nodesiz[dir] - 1 << '-' << nodesiz[dir] << ')';
309 else
310 hila::out0 << nodesiz[dir] / subdiv[dir];
311 }
312 hila::out0 << '\n';
313
314 Direction dmerge = mynode.subnodes.merged_subnodes_dir;
315
316 hila::out0 << "Node subdivision to 64bit elems: ";
317 foralldir(dir) {
318 if (dir > 0)
319 hila::out0 << " x ";
320 hila::out0 << ((dir == dmerge) ? subdiv[dir] / 2 : subdiv[dir]);
321 }
322 hila::out0 << " = " << number_of_subnodes / 2 << " subnodes\n";
323
324 hila::out0 << "Sites on subnodes: ";
325 foralldir(dir) {
326 if (dir > 0)
327 hila::out0 << " x ";
328 if (dir == gdir)
329 hila::out0 << '(' << nodesiz[dir] - 1 << '-' << nodesiz[dir] << ')';
330 else
331 hila::out0 << ((dir == dmerge) ? 2 * nodesiz[dir] / subdiv[dir]
332 : nodesiz[dir] / subdiv[dir]);
333 }
334 hila::out0 << '\n';
335
336#endif
337 }
338
339 // For MPI, remap the nodes for periodic torus
340 // in the desired manner
341 // we have at least 2 options:
342 // map_node_layout_trivial.c
343 // map_node_layout_block2.c - for 2^n n.n. blocks
344
345 nodes.create_remap();
346
347
348 hila::print_dashed_line();
349}
int size(Direction d) const
lattice.size() -> CoordinateVector or lattice.size(d) -> int returns the dimensions of the lattice,...
Definition lattice.h:433
const auto & fill(const S rhs)
Matrix fill.
Definition matrix.h:1022
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:80
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
This file defines all includes for HILA.
int number_of_nodes()
how many nodes there are
Definition com_mpi.cpp:248
std::ostream out0
This writes output only from main process (node 0)
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...