1#ifndef HILA_SITE_SELECT_H_
2#define HILA_SITE_SELECT_H_
35 std::vector<SiteIndex> sites;
39 bool auto_join =
true;
43 size_t nmax = lattice.
volume();
45 size_t current_index = 0;
46 size_t previous_site = SIZE_MAX;
47 size_t n_overflow = 0;
52 explicit SiteSelect() {
57 previous_site = SIZE_MAX;
61 SiteSelect(
const SiteSelect &a) =
default;
64 ~SiteSelect() =
default;
75 if (s.value == previous_site) {
76 sites[current_index - 1] = s;
78 sites[current_index] = s;
79 previous_site = s.value;
84 SiteSelect &no_join() {
89 SiteSelect &max_size(
size_t _max) {
95 sites.resize(lattice->mynode.volume);
97 previous_site = SIZE_MAX;
105 previous_site = SIZE_MAX;
108 size_t size()
const {
113 return sites.at(i).coordinates();
116 const SiteIndex site_index(
size_t i)
const {
123 std::vector<SiteIndex> move_sites() {
124 return std::move(sites);
129 std::vector<std::nullptr_t> v;
130 join_data_vectors(v);
136 template <
typename T>
137 void join_data_vectors(std::vector<T> &dp) {
140 size_t nsend = nmax - sites.size();
141 hila::send_to(n, nsend);
144 std::vector<SiteIndex> s;
145 hila::receive_from(n, s);
148 n_overflow += s.back().value;
151 sites.reserve(sites.size() + s.size());
152 sites.insert(sites.end(), s.begin(), s.end());
154 if constexpr (!std::is_same<T, std::nullptr_t>::value) {
155 std::vector<T> recvdata;
156 hila::receive_from(n, recvdata);
157 dp.reserve(sites.size());
158 dp.insert(dp.end(), recvdata.begin(), recvdata.end());
163 hila::receive_from(n, over);
172 hila::receive_from(0, nsend);
174 if (nsend < sites.size()) {
175 n_overflow += sites.size() - nsend;
180 sites.push_back(n_overflow);
181 hila::send_to(0, sites);
183 if constexpr (!std::is_same<T, std::nullptr_t>::value) {
184 dp.resize(sites.size() - 1);
185 hila::send_to(0, dp);
190 hila::send_to(0, sites.size() + n_overflow);
201#if !(defined(CUDA) || defined(HIP)) || defined(HILAPP)
203 void endloop_action() {
204 if (current_index > nmax) {
206 n_overflow = current_index - nmax;
207 current_index = nmax;
209 sites.resize(current_index);
218 template <
typename T>
219 void copy_data_to_host_vector(std::vector<T> &dvec,
const char *flag,
const T *d_data) {
220 void *d_temp_storage =
nullptr;
221 size_t temp_storage_bytes = 0;
224 gpuMalloc(&out, lattice->mynode.volume *
sizeof(T));
227 gpuMalloc(&num_selected_d,
sizeof(
int));
230 GPU_CHECK(gpucub::DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_data, flag,
231 out, num_selected_d, lattice->mynode.volume));
233 gpuMalloc(&d_temp_storage, temp_storage_bytes);
235 GPU_CHECK(gpucub::DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_data, flag,
236 out, num_selected_d, lattice->mynode.volume));
238 gpuFree(d_temp_storage);
241 gpuMemcpy(&num_selected, num_selected_d,
sizeof(
int), gpuMemcpyDeviceToHost);
242 gpuFree(num_selected_d);
244 if (num_selected > nmax) {
245 n_overflow = num_selected - nmax;
248 dvec.resize(num_selected);
250 gpuMemcpy(dvec.data(), out,
sizeof(T) * num_selected, gpuMemcpyDeviceToHost);
255 void endloop_action(
const char *flag,
const SiteIndex *d_sites) {
257 copy_data_to_host_vector(sites, flag, d_sites);
266class site_value_select_type_ {};
269class SiteValueSelect :
public SiteSelect {
271 std::vector<T> values;
274 explicit SiteValueSelect() : SiteSelect() {
277 ~SiteValueSelect() =
default;
278 SiteValueSelect(
const SiteValueSelect &v) =
default;
282 values.resize(lattice->mynode.volume);
290 site_value_select_type_ select(
const X_index_type x,
const T &val) {
291 return site_value_select_type_();
294 void select_site_value(
const SiteIndex s,
const T &val) {
295 values[current_index] = val;
296 SiteSelect::select_site(s);
306 join_data_vectors(values);
310#if !(defined(CUDA) || defined(HIP)) || defined(HILAPP)
312 void endloop_action() {
313 bool save = auto_join;
315 SiteSelect::endloop_action();
316 values.resize(current_index);
324 void endloop_action(
const char *flag,
const SiteIndex *d_sites,
const T *d_values) {
325 copy_data_to_host_vector(sites, flag, d_sites);
326 copy_data_to_host_vector(values, flag, d_values);
340inline void dummy_func_2() {
int64_t volume() const
lattice.volume() returns lattice volume Can be used inside onsites()-loops
Running index for locating sites on the lattice.
X-coordinate type - "dummy" class.
constexpr Parity ALL
bit pattern: 011
int myrank()
rank of this node
int number_of_nodes()
how many nodes there are
std::ostream out
this is our default output file stream