HILA
Loading...
Searching...
No Matches
backend_gpu/defs.h
1#ifndef HILA_GPU_DEFS_H
2#define HILA_GPU_DEFS_H
3
4// On Puhti, use UCX_MEMTYPE_CACHE=n with
5// GPU_AWARE_MPI
6
7#include <sstream>
8#include <iostream>
9
10// Prototypes for memory pool ops
11void gpu_memory_pool_alloc(void **p, size_t req_size);
12void gpu_memory_pool_free(void *ptr);
13void gpu_memory_pool_purge();
14void gpu_memory_pool_report();
15
16////////////////////////////////////////////////////////////////////////////////////
17// some device rng headers
18////////////////////////////////////////////////////////////////////////////////////
19namespace hila {
20// double random(); // defined in random.h
21void seed_device_rng(unsigned long long seed);
22} // namespace hila
23
24namespace hila {
25void free_device_rng();
26} // namespace hila
27
28
29#ifndef HILAPP
30
31// GPU specific definitions
32
33////////////////////////////////////////////////////////////////////////////////////
34// Some cuda-specific definitions
35////////////////////////////////////////////////////////////////////////////////////
36#if defined(CUDA)
37
38#include <cuda.h>
39#include <cuda_runtime.h>
40#include <cub/cub.cuh>
41
42using gpuError = cudaError;
43#define gpuSuccess cudaSuccess
44
45/////////////////////////////////////////////
46// If gpu memory pool in use, the interface to memory
47#ifdef GPU_MEMORY_POOL
48#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
49#define gpuFree(a) gpu_memory_pool_free(a)
50#define gpuMemPoolPurge() gpu_memory_pool_purge()
51#define gpuMemPoolReport() gpu_memory_pool_report()
52
53#else
54// here std interfaces
55
56// clang-format off
57#define gpuMemPoolPurge() do { } while (0)
58#define gpuMemPoolReport() do { } while (0)
59// clang-format on
60
61#ifdef CUDA_MALLOC_ASYNC
62#define gpuMalloc(a, b) GPU_CHECK(cudaMallocAsync(a, b, 0))
63#define gpuFree(a) GPU_CHECK(cudaFreeAsync(a, 0))
64
65#else
66#define gpuMalloc(a, b) GPU_CHECK(cudaMalloc((void **)a, b))
67#define gpuFree(a) GPU_CHECK(cudaFree(a))
68
69#endif
70
71#endif // gpu memory pool
72/////////////////////////////////////////////
73
74
75#define gpuGetLastError cudaGetLastError
76#define gpuMemcpy(a, b, c, d) GPU_CHECK(cudaMemcpy(a, b, c, d))
77#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
78#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
79#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
80#define gpuDeviceSynchronize() GPU_CHECK(cudaDeviceSynchronize())
81#define gpuStreamSynchronize(a) GPU_CHECK(cudaStreamSynchronize(a))
82#define gpuStreamCreate(a) GPU_CHECK(cudaStreamCreate(a))
83#define gpuStreamDestroy(a) GPU_CHECK(cudaStreamDestroy(a))
84#define gpuMemset(a, b, c) GPU_CHECK(cudaMemset(a, b, c))
85#define gpuMemcpyToSymbol(a, b, size, c, dir) GPU_CHECK(cudaMemcpyToSymbol(a, b, size, c, dir))
86#define gpuFuncAttributes cudaFuncAttributes
87#define gpuFuncGetAttributes cudaFuncGetAttributes
88
89#define GPUTYPESTR "CUDA"
90
91#ifdef __CUDA_ARCH__
92#define _GPU_DEVICE_COMPILE_ __CUDA_ARCH__
93#endif
94
95////////////////////////////////////////////////////////////////////////////////////
96// Same for HIP
97////////////////////////////////////////////////////////////////////////////////////
98#elif defined(HIP)
99
100#include <hip/hip_runtime.h>
101#include <hiprand/hiprand.h>
102
103// #include <hipcub/hipcub.hpp>*
104
105using gpuError = hipError_t;
106#define gpuSuccess hipSuccess
107
108/////////////////////////////////////////////
109// If gpu memory pool in use, the interface to memory
110#ifdef GPU_MEMORY_POOL
111#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
112#define gpuFree(a) gpu_memory_pool_free(a)
113#define gpuMemPoolPurge() gpu_memory_pool_purge()
114#define gpuMemPoolReport() gpu_memory_pool_report()
115
116
117#else
118// here std interfaces
119
120// clang-format off
121#define gpuMemPoolPurge() do {} while (0)
122#define gpuMemPoolReport() do {} while (0)
123// clang-format on
124
125#define gpuMalloc(a, b) GPU_CHECK(hipMalloc((void **)a, b))
126#define gpuFree(a) GPU_CHECK(hipFree(a))
127
128#endif // ifdef memory pool
129
130#define gpuGetLastError hipGetLastError
131#define gpuMemcpy(a, b, siz, d) GPU_CHECK(hipMemcpy(a, b, siz, d))
132#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
133#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
134#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
135#define gpuDeviceSynchronize() GPU_CHECK(hipDeviceSynchronize())
136#define gpuStreamSynchronize(a) GPU_CHECK(hipStreamSynchronize(a))
137#define gpuStreamCreate(a) GPU_CHECK(hipStreamCreate(a))
138#define gpuStreamDestroy(a) GPU_CHECK(hipStreamDestroy(a))
139#define gpuMemset(a, b, c) GPU_CHECK(hipMemset(a, b, c))
140#define gpuMemcpyToSymbol(a, b, size, c, dir) \
141 GPU_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(a), b, size, c, dir))
142#define gpuFuncAttributes hipFuncAttributes
143#define gpuFuncGetAttributes hipFuncGetAttributes
144
145
146#define GPUTYPESTR "HIP"
147
148#ifdef __HIP_DEVICE_COMPILE__
149#define _GPU_DEVICE_COMPILE_ __HIP_DEVICE_COMPILE__
150#endif
151
152#endif
153////////////////////////////////////////////////////////////////////////////////////
154// General GPU (cuda/hip) definitions
155////////////////////////////////////////////////////////////////////////////////////
156
157
158#define GPU_CHECK(cmd) \
159 do { \
160 auto code = cmd; \
161 gpu_exit_on_error(code, #cmd, __FILE__, __LINE__); \
162 } while (0)
163
164#define check_device_error(msg) gpu_exit_on_error(msg, __FILE__, __LINE__)
165#define check_device_error_code(code, msg) gpu_exit_on_error(code, msg, __FILE__, __LINE__)
166void gpu_exit_on_error(const char *msg, const char *file, int line);
167void gpu_exit_on_error(gpuError code, const char *msg, const char *file, int line);
168
169namespace hila {
170inline void synchronize_threads() {
171 gpuDeviceSynchronize();
172}
173} // namespace hila
174
175#else // NOW HILAPP
176
177////////////////////////////////////////////////////////////////////////////////////
178// Now not cuda or hip - hilapp stage scans this section
179///////////////////////////////////////////////////////////////////////////////////
180
181
182using gpuError = int;
183
184// Define empty stubs - return 1 (true)
185// clang-format off
186#define gpuMalloc(a, b) do {} while(0)
187#define gpuFree(a) do {} while(0)
188#define gpuMemcpy(a, b, siz, d) do {} while(0)
189#define gpuMemcpyHostToDevice 1
190#define gpuMemcpyDeviceToHost 2
191#define gpuMemset(a,b,c) do {} while(0)
192#define gpuMemcpyToSymbol(a, b, size, c, dir) do {} while(0)
193
194#define gpuMemPoolPurge() do {} while(0)
195#define gpuMemPoolReport() do {} while(0)
196
197#define check_device_error(msg) do {} while(0)
198#define check_device_error_code(code, msg) do {} while(0)
199
200#define gpuStreamSynchronize(a) do {} while(0)
201#define gpuDeviceSynchronize() do {} while(0)
202
203#define gpuGetLastError cudaGetLastError
204
205
206// clang-format on
207
208
209#define GPUTYPESTR "NONE"
210
211namespace hila {
212inline void synchronize_threads() {}
213} // namespace hila
214
215#endif
216////////////////////////////////////////////////////////////////////////////////////
217
218void initialize_gpu(int rank, int device);
219void gpu_device_info();
220
221// This is not the CUDA compiler
222// Maybe hilapp?
223
224namespace hila {
225
226// Implements test for basic in types, similar to
227/// std::is_arithmetic, but allows the backend to add
228/// it's own basic tyes (such as AVX vectors)
229template <class T>
230struct is_arithmetic : std::integral_constant<bool, std::is_arithmetic<T>::value> {};
231
232template <class T, class U>
233struct is_assignable : std::integral_constant<bool, std::is_assignable<T, U>::value> {};
234
235template <class T>
236struct is_floating_point : std::integral_constant<bool, std::is_floating_point<T>::value> {};
237
238} // namespace hila
239
240#endif
Implement hila::swap for gauge fields.
Definition array.h:982
void free_device_rng()
Free GPU RNG state, does nothing on non-GPU archs.
Definition hila_gpu.cpp:107