HILA
Loading...
Searching...
No Matches
gpu_minmax.h
1#ifndef GPU_MINMAX_H_
2#define GPU_MINMAX_H_
3
4#if !defined(HILAPP)
5// hilapp does not have to read through this, nothing to convert
6// and hilapp does not understand .cuh -files
7
8#include "hila.h"
9
10#if defined(CUDA)
11#include <cub/cub.cuh>
12namespace gpucub = cub;
13using keyvalueindexT = int;
14#endif
15
16#if defined(HIP)
17#include <hipcub/hipcub.hpp>
18namespace gpucub = hipcub;
19#if defined(HIPCUB_VERSION) && HIPCUB_VERSION >= 300300
20// For some strange reason hipcub 3.3 used long in KeyValuePair ?? bug in library?
21using keyvalueindexT = long;
22#else
23using keyvalueindexT = int;
24#endif
25#endif // HIP
26
27
28template <typename T>
29T Field<T>::gpu_minmax(bool is_min, Parity par, CoordinateVector &loc) const {
30
31 // skip the cub/hipcub bits in hilapp, not needed
32
33 const lattice_struct &mylat = fs->mylattice.ref();
34 int64_t num_items = mylat.loop_end(par) - mylat.loop_begin(par);
35
36 // Declare, allocate, and initialize device-accessible pointers
37 // for input and output
38 T *data_in = this->field_buffer() + mylat.loop_begin(par); // ptr to data
39 gpucub::KeyValuePair<keyvalueindexT, T> *result_p, result;
40
41 gpuMalloc(&result_p, sizeof(gpucub::KeyValuePair<keyvalueindexT, T>));
42
43 // Determine temporary device storage requirements
44 void *d_temp_storage = nullptr;
45 size_t temp_storage_bytes = 0;
46
47 if (is_min) {
48 GPU_CHECK(gpucub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, data_in,
49 result_p, num_items));
50 } else {
51 GPU_CHECK(gpucub::DeviceReduce::ArgMax(d_temp_storage, temp_storage_bytes, data_in,
52 result_p, num_items));
53 }
54
55 // Allocate temporary storage
56 // hila::out0 << "gpu_minmax: alloc " << temp_storage_bytes << " bytes\n";
57 gpuMalloc(&d_temp_storage, temp_storage_bytes);
58
59 // Run argmin-reduction
60 if (is_min) {
61 GPU_CHECK(gpucub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, data_in,
62 result_p, num_items));
63 } else {
64 GPU_CHECK(gpucub::DeviceReduce::ArgMax(d_temp_storage, temp_storage_bytes, data_in,
65 result_p, num_items));
66 }
67
68 gpuFree(d_temp_storage);
69
70 gpuMemcpy(&result, result_p, sizeof(gpucub::KeyValuePair<keyvalueindexT, T>),
71 gpuMemcpyDeviceToHost);
72
73 gpuFree(result_p);
74
75 loc = mylat.coordinates(result.key + mylat.loop_begin(par));
76 return result.value;
77}
78
79#endif // ! HILAPP
80
81
82#endif
T gpu_minmax(bool min_or_max, Parity par, CoordinateVector &loc) const
Declare gpu_reduce here, defined only for GPU targets.
Definition gpu_minmax.h:29
Parity
Parity enum with values EVEN, ODD, ALL; refers to parity of the site. Parity of site (x,...