HILA
Loading...
Searching...
No Matches
clusters.h
Go to the documentation of this file.
1#ifndef HILA_CLUSTERS_H_
2#define HILA_CLUSTERS_H_
3
4#include "hila.h"
5
6#include <algorithm>
7
8#include "gpucub.h"
9
10/**
11 * @file clusters.h
12 * @brief provides cluster finding tools
13 * @details clusters are grown with the help of auxiliary Field variable of type
14 * Field<uint8_t>. Elements of this accept values 0 .. 254, allowing for 255 cluster classes.
15 * Special value hila::clusters::background indicates neutral, background sites.
16 *
17 * Example:
18 * ----------------------
19 * #include "clusters.h"
20 *
21 * Field<uint8_t> cltype;
22 * // initialize field cltype to values 0, 1 or background
23 * onsites(ALL) {
24 * if (<some condition 1>) cltype[X] = 0;
25 * else if (condition 2>) cltype[X] = 1;
26 * else cltype[X] = hila::clusters::background;
27 * }
28 *
29 * // create connected clusters of sites with cltype values 0 and 1
30 * hila::clusters cl(cltype);
31 *
32 * // above is equivalent to "hila::clusters cl; cl.find(cltype);"
33 *
34 * if (hila::myrank() == 0) {
35 * hila::out << "Got " << cl.number() << " clusters\n";
36 * for (int i = 0; i < cl.number(); i++) {
37 * hila::out << "Cluster " << i << " type " << cl.type(i) << " size " << cl.size(i) << '\n';
38 * }
39 * }
40 * ----------------------
41 *
42 * Functions:
43 *
44 * Constructor: initialize with constructor hila::clusters(const Field<uint8_t> & clustertype)
45 * Example:
46 * hila::clusters cluster(cltype); // initialize var cluster
47 *
48 * where cltype is of type Field<uint8_t>. This constructor builds connected clusters of the
49 * sites with the same cltype value. Special value of hila::clusters::background are ignored.
50 *
51 * Above is equivalent with
52 * hila::clusters cluster;
53 * cluster.find(cltype);
54 *
55 * Note: initalization is a relatively expensive operation
56 *
57 * size_t hila::clusters::number() - return the total number of clusters.
58 * Example:
59 * hila::out0 << "Found " << cluster.number() << " clusters\n";
60 *
61 * int64_t hila::clusters::size(size_t cl_number) - return the size of cluster number cl_number
62 * Example:
63 * hila::out0 << "Size of cluster 0 is " << cluster.size(0) << '\n';
64 *
65 * uint8_t hila::clusters::type(size_t cl_number) - return the cluster type
66 * hila::out0 << "Type of cluster 0 is " << cluster.size(0) << '\n';
67 *
68 * std::vector<SiteIndex> hila::clusters::sites(size_t cl_number) - return the sites of cluster
69 * cl_number Example: print the coordinates of cluster number 0: auto clsites = cluster.sites(0);
70 * for (auto & r : clsites) {
71 * hila::out0 << r.coordinates() << '\n;
72 * }
73 * Note: .sites() must be called by all MPI ranks, otherwise deadlock occurs
74 *
75 * int64_t hila::clusters::area(size_t cl_number) - return the area of the cluster
76 * Area is defined by the number of links where one end belongs to the cluster, another does not.
77 * Note: .area() must be called by all MPI ranks
78 */
79
80
81namespace hila {
82
83#define CLUSTER_BACKGROUND_ 0xFF
84
85template <typename inttype, std::enable_if_t<std::is_integral<inttype>::value, int> = 0>
86inline uint64_t set_cl_label(const CoordinateVector &cv, const inttype type) {
87 uint64_t r;
88 // we'll give the same label for all background sites
89 if (type == CLUSTER_BACKGROUND_)
90 r = static_cast<uint64_t>(CLUSTER_BACKGROUND_) << 54;
91 else
92 r = SiteIndex(cv).value | (static_cast<uint64_t>(type) << 54);
93 return r;
94}
95
96// return the top 8 bits of the encoded site+type
97inline uint8_t get_cl_label_type(const uint64_t v) {
98 return 0xff & (v >> 54);
99}
100
101
102class clusters {
103
104 private:
105 struct cl_struct {
106 uint64_t label;
107 int64_t size, area;
108 };
109
110 // clist vector contains information for all clusters. NOTE: the
111 // content is valid only on rank == 0.
112 std::vector<cl_struct> clist;
113
114 // We'll use uint64_t to encode the site and the type of the site.
115 // Set the top 8 bits to type, and the bottom 54 bits to SiteIndex.
116 Field<uint64_t> labels;
117
118 inline void make_local_clist();
119
120 void assert_cl_index(size_t i) const {
121 if (i >= clist.size()) {
122 hila::out0 << "Too large cluster index " << i << ", there are only " << clist.size()
123 << " clusters\n";
124 exit(0);
125 }
126 }
127
128
129 public:
130 /// hila::clusters::background is special value to mark "non-interesting" sites.
131 /// Using this can make finding clusters faster
132 static constexpr uint8_t background = CLUSTER_BACKGROUND_; // 8 ones here = 255
133
134 clusters() = default;
135 ~clusters() = default;
136
137 template <typename inttype, std::enable_if_t<std::is_integral<inttype>::value, int> = 0>
138 clusters(const Field<inttype> &type) {
139 find(type);
140 }
141
142 /// @brief find nearest-neighbour -connected clusters which have the same type
143 /// @param type field which contains the type of the site, possible values 0-254.
144 /// special value hila::clusters::background indicates site does not belong to any cluster
145 template <typename inttype, std::enable_if_t<std::is_integral<inttype>::value, int> = 0>
146 void find(const Field<inttype> &type) {
147 make_labels(type);
148 classify();
149 }
150
151 /// @brief number of clusters found
152
153 size_t number() const {
154 return clist.size();
155 }
156
157 /// @brief return size of cluster i
158 /// @param i cluster number
159
160 int64_t size(size_t i) const {
161 assert_cl_index(i);
162 return clist[i].size;
163 }
164
165 /// @brief return type of the cluster
166 /// @param i cluster number
167
168 uint8_t type(size_t i) const {
169 assert_cl_index(i);
170 return get_cl_label_type(clist[i].label);
171 }
172
173 /// @brief return the label of cluster i
174 /// @param i cluster number
175 /// Each cluster has unique 64-bit label
176
177 uint64_t label(size_t i) const {
178 assert_cl_index(i);
179 return clist[i].label;
180 }
181
182
183 /// @brief returns the area of the cluster number i
184 /// @param i cluster number
185 /// First time call involves reduction, after that value buffered
186 /// Must be called from all MPI ranks
187
188 int64_t area(size_t i) {
189 assert_cl_index(i);
190 // check if this is already computed
191 if (clist[i].area > 0)
192 return clist[i].area;
193
194 uint64_t lbl = label(i);
195 uint64_t cl_area = 0;
196
197 onsites(ALL) {
198 if (labels[X] == lbl) {
199 for (Direction d = e_x; d < NDIRS; ++d) {
200 if (labels[X + d] != lbl)
201 cl_area += 1;
202 }
203 }
204 }
205
206 clist[i].area = cl_area;
207 return cl_area;
208 }
209
210 /// @brief returns std::vector of SiteIndex for cluster number i
211 /// @param i cluster number
212 /// Expensive operation, can possibly overflow the memory of the node 0
213 /// Must be called from all MPI ranks
214
215 std::vector<SiteIndex> sites(size_t i) const {
216 SiteSelect sites;
217 uint64_t la = label(i);
218
219 onsites(ALL) {
220 if (labels[X] == la)
221 sites.select(X);
222 }
223 hila::out << "Node " << hila::myrank() << " got " << sites.size() << " sites\n";
224 return sites.move_sites();
225 }
226
227
228 /// @brief make only the Field representation of cluster labels, without counting the clusters
229 /// @param type - input field classifying the sites
230 /// @return a const reference to label Field
231 /// @details Usually this call is not needed, use hila::clusters::find() or constructor
232 template <typename inttype, std::enable_if_t<std::is_integral<inttype>::value, int> = 0>
233 void make_labels(const Field<inttype> &type) {
234
235 // mark every site with site index on 54 low, leaving 8 bits at the top for the type
236 labels[ALL] = set_cl_label(X.coordinates(), type[X]);
237
238 Reduction<int64_t> changed;
239 changed.delayed();
240 do {
241 changed = 0;
242 for (Parity par : {EVEN, ODD}) {
243 // take away annoying warning
244#pragma hila safe_access(labels)
245 onsites(par) {
246 auto type_0 = get_cl_label_type(labels[X]);
247 if (type_0 != background) {
248 for (Direction d = e_x; d < NDIRS; ++d) {
249 auto label_1 = labels[X + d];
250 auto type_1 = get_cl_label_type(label_1);
251 if (type_0 == type_1 && labels[X] > label_1) {
252 labels[X] = label_1;
253 changed += 1;
254 }
255 }
256 }
257 }
258 }
259
260 // hila::out0 << "g " << changed.value() << '\n';
261
262 } while (changed.value() > 0);
263 }
264
265 /// @brief obtain const refence to cluster label Field var
266 /// clusters::find() must have been called before this
267
268 const auto &get_labels() const {
269 return labels;
270 }
271
272 /// @brief classify clusters from existing label field (make_labels() called before)
273 /// @details Constructs vector with cluster info, which can be queried
274 /// Normally not needed, hila::clusters::find() already does the search
275 void classify() {
276
277 if (!labels.is_initialized(ALL)) {
278 hila::error("hila::clusters::classify() called without preceding "
279 "hila::clusters::make_labels()");
280 }
281
282 clist.clear();
283
284 make_local_clist();
285
286 // Now merge the clist across mpi ranks
287 // communicate and merge in node pairs
288
289 int nn = hila::number_of_nodes();
290 int myrank = hila::myrank();
291
292 for (int step = 1; step < nn; step *= 2) {
293 if (myrank % (2 * step) == step) {
294
295 // we're "odd" pair, send to "even"
296 hila::send_to(myrank - step, clist);
297 clist.clear(); // in order to save space
298
299 } else if (myrank % (2 * step) == 0 && myrank + step < nn) {
300
301 // Now "even", receive from "odd" and merge data
302 std::vector<cl_struct> clist_n, merged;
303 hila::receive_from(myrank + step, clist_n);
304
305 int i = 0, n = 0;
306 while (i < clist.size() && n < clist_n.size()) {
307 if (clist[i].label < clist_n[n].label) {
308 merged.push_back(clist[i++]);
309
310 } else if (clist[i].label == clist_n[n].label) {
311 merged.push_back(clist[i++]);
312 merged.back().size += clist_n[n++].size;
313 } else {
314 merged.push_back(clist_n[n++]);
315 }
316 }
317 // only 1 of the following is done (if either)
318 while (i < clist.size()) {
319 merged.push_back(clist[i++]);
320 }
321 while (n < clist_n.size()) {
322 merged.push_back(clist_n[n++]);
323 }
324 clist_n.clear();
325 clist = std::move(merged);
326 }
327 }
328
330
331 // deliver clist to all ranks after all
332 hila::broadcast(clist);
333
334 } // void classify()
335
336
337}; // class clusters
338
339
340#if (defined(CUDA) || defined(HIP)) && !defined(HILAPP)
341
342inline void clusters::make_local_clist() {
343
344 void *d_temp_storage = nullptr;
345 size_t temp_storage_bytes = 0;
346 uint64_t *buf;
347
348 gpuMalloc(&buf, lattice->mynode.volume * sizeof(uint64_t));
349 GPU_CHECK(gpucub::DeviceRadixSort::SortKeys(
350 d_temp_storage, temp_storage_bytes, labels.field_buffer(), buf, lattice->mynode.volume));
351
352 // Allocate temporary storage
353 gpuMalloc(&d_temp_storage, temp_storage_bytes);
354
355 GPU_CHECK(gpucub::DeviceRadixSort::SortKeys(
356 d_temp_storage, temp_storage_bytes, labels.field_buffer(), buf, lattice->mynode.volume));
357
358 gpuFree(d_temp_storage);
359 // now buf contains sorted labels
360
361 d_temp_storage = nullptr;
362 temp_storage_bytes = 0;
363 uint64_t *d_labels;
364 int64_t *d_sizes, *d_num_clust;
365 gpuMalloc(&d_labels, lattice->mynode.volume * sizeof(uint64_t));
366 gpuMalloc(&d_sizes, (lattice->mynode.volume + 1) * sizeof(int64_t));
367 d_num_clust = d_sizes + lattice->mynode.volume; // the last element
368
369 GPU_CHECK(gpucub::DeviceRunLengthEncode::Encode(d_temp_storage, temp_storage_bytes, buf,
370 d_labels, d_sizes, d_num_clust,
371 lattice->mynode.volume));
372
373 // Allocate temporary storage
374 gpuMalloc(&d_temp_storage, temp_storage_bytes);
375
376 // Run encoding
377 GPU_CHECK(gpucub::DeviceRunLengthEncode::Encode(d_temp_storage, temp_storage_bytes, buf,
378 d_labels, d_sizes, d_num_clust,
379 lattice->mynode.volume));
380
381 gpuFree(d_temp_storage);
382
383 int64_t nclusters;
384 gpuMemcpy(&nclusters, d_num_clust, sizeof(int64_t), gpuMemcpyDeviceToHost);
385
386 std::vector<uint64_t> labels(nclusters);
387 std::vector<int64_t> sizes(nclusters);
388 if (nclusters > 0) {
389 gpuMemcpy(labels.data(), d_labels, sizeof(uint64_t) * nclusters, gpuMemcpyDeviceToHost);
390 gpuMemcpy(sizes.data(), d_sizes, sizeof(int64_t) * nclusters, gpuMemcpyDeviceToHost);
391 }
392
393 gpuFree(d_sizes);
394 gpuFree(d_labels);
395
396 clist.resize(nclusters);
397 for (int i = 0; i < nclusters; i++) {
398 clist[i] = {labels[i], sizes[i], 0};
399 }
400 if (clist.size() > 0 && get_cl_label_type(clist.back().label) == hila::clusters::background)
401 clist.pop_back();
402}
403
404#else
405
406inline void clusters::make_local_clist() {
407
408 // this changes the ordering of labels - copy it
409 auto lb = labels;
410 auto *buf = lb.field_buffer();
411 std::sort(buf, buf + lattice->mynode.volume);
412 // background labels are last after sort, stop handling when these are met
413 int64_t i = 0;
414 while (i < lattice->mynode.volume && get_cl_label_type(buf[i]) != background) {
415 int64_t istart = i;
416 auto label = buf[i];
417 for (++i; i < lattice->mynode.volume && buf[i] == label; ++i)
418 ;
419 clist.push_back({label, i - istart, 0});
420 }
421}
422
423#endif
424
425
426} // namespace hila
427
428
429#endif
The field class implements the standard methods for accessing Fields. Hilapp replaces the parity acce...
Definition field.h:62
bool is_initialized(Parity p) const
Returns true if the Field has been assigned to.
Definition field.h:430
Special reduction class: enables delayed and non-blocking reductions, which are not possible with the...
Definition reduction.h:14
const T value()
Return value of the reduction variable. Wait for the comms if needed.
Definition reduction.h:151
Reduction & delayed(bool b=true)
deferred(bool) turns deferred on or off. By default turns on.
Definition reduction.h:142
Running index for locating sites on the lattice.
Definition site_index.h:17
Parity
Parity enum with values EVEN, ODD, ALL; refers to parity of the site. Parity of site (x,...
constexpr Parity EVEN
bit pattern: 001
constexpr unsigned NDIRS
Number of directions.
Definition coordinates.h:57
constexpr Parity ODD
bit pattern: 010
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
constexpr Parity ALL
bit pattern: 011
Implement hila::swap for gauge fields.
Definition array.h:982
void barrier()
sync MPI
Definition com_mpi.cpp:264
int myrank()
rank of this node
Definition com_mpi.cpp:237
int number_of_nodes()
how many nodes there are
Definition com_mpi.cpp:248
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
Definition com_mpi.h:170