HILA
Loading...
Searching...
No Matches
setup_layout_vector.cpp
1/// Setup layout does the node division. This version
2/// first tries an even distribution, with equally sized
3/// nodes, and if that fails allows slightly different
4/// node sizes.
5
6#include "plumbing/defs.h"
7#include "plumbing/lattice.h"
8
9/***************************************************************/
10
11/* number of primes to be used in factorization */
12#define NPRIMES 12
13const static int prime[NPRIMES] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37};
14
15// Set up now squaresize and nsquares - arrays
16// Print info to outf as we proceed
17
18void lattice_struct::setup_layout() {
19 int nfactors[NPRIMES];
20 CoordinateVector nodesiz;
21
22 print_dashed_line();
23 hila::out0 << "LAYOUT: subnode lattice, with " << VECTOR_SIZE / sizeof(float)
24 << " subnodes\n"
25 << "Enabling vector length " << VECTOR_SIZE * 8
26 << " bits = " << VECTOR_SIZE / sizeof(double) << " doubles or "
27 << VECTOR_SIZE / sizeof(float) << " floats/ints\n";
28 hila::out0 << "Lattice size ";
29 foralldir (d) {
30 if (d != 0)
31 hila::out0 << " x ";
32 hila::out0 << size(d);
33 }
34 hila::out0 << " = " << l_volume << " sites\n";
35 hila::out0 << "Dividing to " << hila::number_of_nodes() << " nodes\n";
36 hila::out0 << "Layout using vector of " << number_of_subnodes << " elements\n";
37
38 foralldir (d)
39 if (size(d) % 2 != 0) {
40 hila::out0 << "Lattice must be even to all directions (odd size:TODO)\n";
42 }
43
44 // we want to divide up to numnode * vector_size virtual nodes
45 // use the float vector size to divide to hila::number_of_nodes() * vector_size
46 // nodes, this is the most demanding. The directions where the extra divisions have
47 // been done the node size must be even, so that these can be handled by vectors
48
49 // Factorize the node number in primes
50 // These factors must be used in slicing the lattice!
51
52 // number of virtual nodes
53 int nn = hila::number_of_nodes() * number_of_subnodes;
54
55 int i = nn;
56 for (int n = 0; n < NPRIMES; n++) {
57 nfactors[n] = 0;
58 while (i % prime[n] == 0) {
59 i /= prime[n];
60 nfactors[n]++;
61 }
62 }
63 if (i != 1) {
64 hila::out0 << "Cannot factorize " << hila::number_of_nodes()
65 << " nodes with primes up to " << prime[NPRIMES - 1] << '\n';
67 }
68
69 // strategy: try to increase the box size to one of the directions until rem = 0
70 // find the optimal direction to do it
71 // Use simple heuristic: take the dim with the least amount of added "ghost sites"
72
73 CoordinateVector nsize;
74 int64_t ghosts[NDIM];
75 foralldir (d) {
76 int64_t cosize = l_volume / size(d);
77 int64_t n = size(d);
78 while ((n * cosize) % nn != 0)
79 n++; // virtual size can be odd
80 // now nsize is the new would-be size
81 ghosts[d] = (n - size(d)) * cosize;
82 nsize[d] = n;
83 }
84
85 int64_t ghost_volume = 1;
86 foralldir (d)
87 ghost_volume *= nsize[d];
88
89 // if the division goes even nsize = size() and ghost_volume = volume
90
91 // now try to divide the nsize-volume to subnodes. We don't try to do
92 // the division to direction where there are ghost sites
93
94 CoordinateVector divisions, subdiv;
95
96 int gdir;
97 bool secondtime = false;
98 do {
99 // try the division a couple of times, if the 1st fails
100
101 if (ghost_volume > l_volume) {
102 gdir = NDIM - 1;
103 foralldir (j)
104 if (ghosts[gdir] > ghosts[j])
105 gdir = j;
106 // gdir is the direction where we do uneven division (if done)
107 // hila::out0 << "gdir " << gdir << " ghosts gdir " << ghosts[gdir] << " nsize
108 // "
109 // << nsize[gdir] << '\n';
110 } else
111 gdir = -1;
112
113 foralldir (i) {
114 nodesiz[i] =
115 (i == gdir) ? nsize[i] : size(i); // start with ghosted lattice size
116 divisions[i] = 1;
117 }
118
119 for (int n = NPRIMES - 1; n >= 0; n--)
120 for (i = 0; i < nfactors[n]; i++) {
121 // figure out which direction to divide -- start from the largest prime,
122 // because we don't want this to be last divisor! (would probably wind
123 // up with size 1)
124
125 // Try to keep even node sizes, these are needed for vectors
126 // We don't worry about evenness to gdir
127
128 // find largest divisible dimension of h-cubes
129 int msize = 1;
130 int dir, mdir;
131 for (dir = 0; dir < NDIM; dir++) {
132 if (nodesiz[dir] > msize &&
133 ((dir == gdir && nodesiz[dir] % prime[n] == 0) ||
134 (dir != gdir && nodesiz[dir] % (2 * prime[n]) == 0)) &&
135 nodesiz[dir] / prime[n] > 3) {
136 msize = nodesiz[dir];
137 mdir = dir;
138 }
139 }
140
141 // even divide failed, divide to odd
142 if (msize == 1) {
143 for (dir = 0; dir < NDIM; dir++) {
144 if (nodesiz[dir] > msize && dir != gdir &&
145 nodesiz[dir] % (prime[n]) == 0) {
146 msize = nodesiz[dir];
147 mdir = dir;
148 }
149 }
150 }
151
152 if (msize == 1) {
153 // This cannot happen
154 hila::out0 << "CANNOT HAPPEN! in setup_layout_vector.c\n";
156 }
157
158 // Now slice it
159 nodesiz[mdir] /= prime[n];
160 divisions[mdir] *= prime[n];
161
162 // hila::out0 << nodesiz << ' ' << divisions << '\n';
163 }
164
165 // Division done, now check that the div makes sense
166
167 bool fail = false;
168
169 foralldir (dir)
170 if (nodesiz[dir] < 3)
171 fail = true; // don't allow nodes of size 1 or 2
172
173 if (!fail) {
174
175 // check here that this can be used for vectorized division
176
177 subdiv.fill(1);
178 bool div_done;
179 int n_subn = 1;
180 do {
181 div_done = false;
182 foralldir (dir) {
183 // the direction where the vector subnodes are must not be
184 // an uneven direction, node size to the direction should be
185 // divisible by 2 and the number of nodes to this dir should also be
186 // a multiple of subdivs
187
188 int sd = subdiv[dir] * 2;
189 if (dir != gdir && nodesiz[dir] % 2 == 0 &&
190 divisions[dir] % sd == 0 && n_subn < number_of_subnodes) {
191 subdiv[dir] = sd;
192 n_subn *= 2;
193 div_done = true;
194 mynode.subnodes.merged_subnodes_dir = dir;
195 }
196 }
197 } while (div_done && n_subn < number_of_subnodes);
198
199 if (n_subn != number_of_subnodes)
200 fail = true;
201 }
202
203 if (fail && !secondtime && gdir >= 0) {
204 secondtime = true;
205 ghosts[gdir] =
206 (1ULL
207 << 62); // this short-circuits direction gdir, some other taken next
208 } else if (fail) {
209 hila::out0 << "Could not successfully lay out the lattice with "
210 << hila::number_of_nodes() << " nodes!\n";
211 hila::out0 << " The division of ";
212 foralldir (d) {
213 hila::out0 << lattice.size(d);
214 if (d < NDIM - 1)
215 hila::out0 << '*';
216 }
217 hila::out0 << " lattice using " << hila::number_of_nodes()
218 << " nodes with vector layout can be done\n";
219 hila::out0 << " if the lattice can be divided into ";
220 hila::out0 << hila::number_of_nodes() << '*' << number_of_subnodes
221 << " virtual nodes so that the virtual node size is\n";
222 hila::out0 << " even to directions where the extra divisions are done, "
223 "and the node size is > 2.\n";
224
226 }
227
228 } while (secondtime);
229
230 // set up struct nodes variables
231 nodes.number = hila::number_of_nodes();
232 foralldir (dir) {
233 nodesiz[dir] *= subdiv[dir];
234 nodes.n_divisions[dir] = divisions[dir] / subdiv[dir];
235 nodes.divisors[dir].resize(nodes.n_divisions[dir] + 1);
236 // Node divisors: note, this MUST BE compatible with
237 // node_rank in lattice.cpp
238 // to be sure, we use naively the same method than in node_rank
239 // last element will be size(dir), for convenience
240 int n = -1;
241 for (int i = 0; i <= size(dir); i++)
242 if ((i * nodes.n_divisions[dir]) / size(dir) != n) {
243 ++n;
244 nodes.divisors[dir][n] = i;
245 }
246 }
247
248 // set up the subnode divisions here -- rest is set in setup_node
249 foralldir (d)
250 mynode.subnodes.divisions[d] = subdiv[d];
251
252 // mynode is set up in setup_node
253
254 // Now division done - check how good it is
255 int ghost_slices = 0;
256 if (gdir >= 0) {
257 ghost_slices = nsize[gdir] - size(gdir);
258
259 hila::out0 << "\nUsing uneven node division to direction " << gdir << ":\n";
260 hila::out0 << "Lengths: " << nodes.n_divisions[gdir] - ghost_slices << " * ("
261 << nodesiz[gdir] << " sites) + " << ghost_slices << " * ("
262 << nodesiz[gdir] - 1 << " sites)\n";
263 hila::out0 << "Divisions: ";
264 for (int i = 0; i < nodes.n_divisions[gdir]; i++) {
265 if (i > 0)
266 hila::out0 << " - ";
267 hila::out0 << nodes.divisors[gdir][i + 1] - nodes.divisors[gdir][i];
268 }
269 hila::out0 << "\nFilling efficiency: " << (100.0 * size(gdir)) / nsize[gdir]
270 << "%\n";
271
272 if (ghost_slices > nodes.n_divisions[gdir] / 2)
273 hila::out0 << "NOTE: number of smaller nodes > large nodes \n";
274 }
275
276 // this was hila::number_of_nodes() > 1
277 if (1) {
278 hila::out0 << "\nSites on node: ";
279 foralldir (dir) {
280 if (dir > 0)
281 hila::out0 << " x ";
282 if (dir == gdir)
283 hila::out0 << '(' << nodesiz[dir] - 1 << '-' << nodesiz[dir] << ')';
284 else
285 hila::out0 << nodesiz[dir];
286 }
287 int64_t ns = 1;
288 foralldir (dir)
289 ns *= nodesiz[dir];
290 if (ghost_slices > 0) {
291 int64_t ns2 = ns * (nodesiz[gdir] - 1) / nodesiz[gdir];
292 hila::out0 << " = " << ns2 << " - " << ns << '\n';
293 } else {
294 hila::out0 << " = " << ns << '\n';
295 }
296
297 hila::out0 << "Node layout: ";
298 foralldir (dir) {
299 if (dir > 0)
300 hila::out0 << " x ";
301 hila::out0 << nodes.n_divisions[dir];
302 }
303 hila::out0 << " = " << hila::number_of_nodes() << " nodes\n";
304
305#ifdef VECTORIZED
306
307 hila::out0 << "Node subdivision to 32bit elems: ";
308 foralldir (dir) {
309 if (dir > 0)
310 hila::out0 << " x ";
311 hila::out0 << subdiv[dir];
312 }
313 hila::out0 << " = " << number_of_subnodes << " subnodes\n";
314
315 hila::out0 << "Sites on subnodes: ";
316 foralldir (dir) {
317 if (dir > 0)
318 hila::out0 << " x ";
319 if (dir == gdir)
320 hila::out0 << '(' << nodesiz[dir] - 1 << '-' << nodesiz[dir] << ')';
321 else
322 hila::out0 << nodesiz[dir] / subdiv[dir];
323 }
324 hila::out0 << '\n';
325
326 Direction dmerge = mynode.subnodes.merged_subnodes_dir;
327
328 hila::out0 << "Node subdivision to 64bit elems: ";
329 foralldir (dir) {
330 if (dir > 0)
331 hila::out0 << " x ";
332 hila::out0 << ((dir == dmerge) ? subdiv[dir] / 2 : subdiv[dir]);
333 }
334 hila::out0 << " = " << number_of_subnodes / 2 << " subnodes\n";
335
336 hila::out0 << "Sites on subnodes: ";
337 foralldir (dir) {
338 if (dir > 0)
339 hila::out0 << " x ";
340 if (dir == gdir)
341 hila::out0 << '(' << nodesiz[dir] - 1 << '-' << nodesiz[dir] << ')';
342 else
343 hila::out0 << ((dir == dmerge) ? 2 * nodesiz[dir] / subdiv[dir]
344 : nodesiz[dir] / subdiv[dir]);
345 }
346 hila::out0 << '\n';
347
348#endif
349 }
350
351 // For MPI, remap the nodes for periodic torus
352 // in the desired manner
353 // we have at least 2 options:
354 // map_node_layout_trivial.c
355 // map_node_layout_block2.c - for 2^n n.n. blocks
356
357 nodes.create_remap();
358
359
360 print_dashed_line();
361}
const auto & fill(const S rhs)
Matrix fill.
Definition matrix.h:937
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:78
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
This file defines all includes for HILA.
int number_of_nodes()
how many nodes there are
Definition com_mpi.cpp:245
std::ostream out0
This writes output only from main process (node 0)
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...