HILA
Loading...
Searching...
No Matches
backend_gpu/defs.h
1#ifndef GPU_DEFS_H
2#define GPU_DEFS_H
3
4// On Puhti, use UCX_MEMTYPE_CACHE=n with
5// GPU_AWARE_MPI
6
7#include <sstream>
8#include <iostream>
9
10// Prototypes for memory pool ops
11void gpu_memory_pool_alloc(void **p, size_t req_size);
12void gpu_memory_pool_free(void *ptr);
13void gpu_memory_pool_purge();
14void gpu_memory_pool_report();
15
16////////////////////////////////////////////////////////////////////////////////////
17// some device rng headers
18////////////////////////////////////////////////////////////////////////////////////
19namespace hila {
20// double random(); // defined in random.h
21void seed_device_rng(unsigned long long seed);
22} // namespace hila
23
24namespace hila {
25void free_device_rng();
26} // namespace hila
27
28
29#ifndef HILAPP
30
31// GPU specific definitions
32
33////////////////////////////////////////////////////////////////////////////////////
34// Some cuda-specific definitions
35////////////////////////////////////////////////////////////////////////////////////
36#if defined(CUDA)
37
38#include <cuda.h>
39#include <cuda_runtime.h>
40#include <cub/cub.cuh>
41
42// THis set in params.h now
43// #define N_threads 256 // Threads per block for CUDA TODO: make configurable?
44
45using gpuError = cudaError;
46#define gpuSuccess cudaSuccess
47
48/////////////////////////////////////////////
49// If gpu memory pool in use, the interface to memory
50#ifdef GPU_MEMORY_POOL
51#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
52#define gpuFree(a) gpu_memory_pool_free(a)
53#define gpuMemPoolPurge() gpu_memory_pool_purge()
54#define gpuMemPoolReport() gpu_memory_pool_report()
55
56#else
57// here std interfaces
58
59// clang-format off
60#define gpuMemPoolPurge() do { } while (0)
61#define gpuMemPoolReport() do { } while (0)
62// clang-format on
63
64#ifdef CUDA_MALLOC_ASYNC
65#define gpuMalloc(a, b) GPU_CHECK(cudaMallocAsync(a, b, 0))
66#define gpuFree(a) GPU_CHECK(cudaFreeAsync(a, 0))
67
68#else
69#define gpuMalloc(a, b) GPU_CHECK(cudaMalloc((void **)a, b))
70#define gpuFree(a) GPU_CHECK(cudaFree(a))
71
72#endif
73
74#endif // gpu memory pool
75/////////////////////////////////////////////
76
77
78#define gpuGetLastError cudaGetLastError
79#define gpuMemcpy(a, b, c, d) GPU_CHECK(cudaMemcpy(a, b, c, d))
80#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
81#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
82#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
83#define gpuDeviceSynchronize() GPU_CHECK(cudaDeviceSynchronize())
84#define gpuStreamSynchronize(a) GPU_CHECK(cudaStreamSynchronize(a))
85#define gpuMemset(a,b,c) GPU_CHECK(cudaMemset(a,b,c))
86#define gpuMemcpyToSymbol(a, b, size, c, dir) GPU_CHECK(cudaMemcpyToSymbol(a, b, size, c, dir))
87
88#define GPUTYPESTR "CUDA"
89
90#ifdef __CUDA_ARCH__
91#define __GPU_DEVICE_COMPILE__ __CUDA_ARCH__
92#endif
93
94////////////////////////////////////////////////////////////////////////////////////
95// Same for HIP
96////////////////////////////////////////////////////////////////////////////////////
97#elif defined(HIP)
98
99#include <hip/hip_runtime.h>
100#include <hiprand/hiprand.h>
101
102//#include <hipcub/hipcub.hpp>*
103
104// Set in params.h now
105// #define N_threads 256 // Threads per block for CUDAs
106
107using gpuError = hipError_t;
108#define gpuSuccess hipSuccess
109
110/////////////////////////////////////////////
111// If gpu memory pool in use, the interface to memory
112#ifdef GPU_MEMORY_POOL
113#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
114#define gpuFree(a) gpu_memory_pool_free(a)
115#define gpuMemPoolPurge() gpu_memory_pool_purge()
116#define gpuMemPoolReport() gpu_memory_pool_report()
117
118
119#else
120// here std interfaces
121
122// clang-format off
123#define gpuMemPoolPurge() do {} while (0)
124#define gpuMemPoolReport() do {} while (0)
125// clang-format on
126
127#define gpuMalloc(a, b) GPU_CHECK(hipMalloc((void **)a, b))
128#define gpuFree(a) GPU_CHECK(hipFree(a))
129
130#endif // ifdef memory pool
131
132#define gpuGetLastError hipGetLastError
133#define gpuMemcpy(a, b, siz, d) GPU_CHECK(hipMemcpy(a, b, siz, d))
134#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
135#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
136#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
137#define gpuDeviceSynchronize() GPU_CHECK(hipDeviceSynchronize())
138#define gpuStreamSynchronize(a) GPU_CHECK(hipStreamSynchronize(a))
139#define gpuMemset(a,b,c) GPU_CHECK(hipMemset(a,b,c))
140#define gpuMemcpyToSymbol(a, b, size, c, dir) \
141 GPU_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(a), b, size, c, dir))
142
143
144#define GPUTYPESTR "HIP"
145
146#ifdef __HIP_DEVICE_COMPILE__
147#define __GPU_DEVICE_COMPILE__ __HIP_DEVICE_COMPILE__
148#endif
149
150#endif
151////////////////////////////////////////////////////////////////////////////////////
152// General GPU (cuda/hip) definitions
153////////////////////////////////////////////////////////////////////////////////////
154
155
156#define GPU_CHECK(cmd) \
157 do { \
158 auto code = cmd; \
159 gpu_exit_on_error(code, #cmd, __FILE__, __LINE__); \
160 } while (0)
161
162#define check_device_error(msg) gpu_exit_on_error(msg, __FILE__, __LINE__)
163#define check_device_error_code(code, msg) \
164 gpu_exit_on_error(code, msg, __FILE__, __LINE__)
165void gpu_exit_on_error(const char *msg, const char *file, int line);
166void gpu_exit_on_error(gpuError code, const char *msg, const char *file, int line);
167
168namespace hila {
169inline void synchronize_threads() {
170 gpuDeviceSynchronize();
171}
172} // namespace hila
173
174#else // NOW HILAPP
175
176////////////////////////////////////////////////////////////////////////////////////
177// Now not cuda or hip - hilapp stage scans this section
178///////////////////////////////////////////////////////////////////////////////////
179
180
181using gpuError = int;
182
183// Define empty stubs - return 1 (true)
184// clang-format off
185#define gpuMalloc(a, b) do {} while(0)
186#define gpuFree(a) do {} while(0)
187#define gpuMemcpy(a, b, siz, d) do {} while(0)
188#define gpuMemcpyHostToDevice 1
189#define gpuMemcpyDeviceToHost 2
190#define gpuMemset(a,b,c) do {} while(0)
191#define gpuMemcpyToSymbol(a, b, size, c, dir) do {} while(0)
192
193#define gpuMemPoolPurge() do {} while(0)
194#define gpuMemPoolReport() do {} while(0)
195
196#define check_device_error(msg) do {} while(0)
197#define check_device_error_code(code, msg) do {} while(0)
198
199#define gpuStreamSynchronize(a) do {} while(0)
200#define gpuDeviceSynchronize() do {} while(0)
201
202#define gpuGetLastError cudaGetLastError
203
204
205// clang-format on
206
207
208#define GPUTYPESTR "NONE"
209
210namespace hila {
211inline void synchronize_threads() {}
212} // namespace hila
213
214#endif
215////////////////////////////////////////////////////////////////////////////////////
216
217void initialize_gpu(int rank,int device);
218void gpu_device_info();
219
220// This is not the CUDA compiler
221// Maybe hilapp?
222
223namespace hila {
224
225// Implements test for basic in types, similar to
226/// std::is_arithmetic, but allows the backend to add
227/// it's own basic tyes (such as AVX vectors)
228template <class T>
229struct is_arithmetic : std::integral_constant<bool, std::is_arithmetic<T>::value> {};
230
231template <class T, class U>
232struct is_assignable : std::integral_constant<bool, std::is_assignable<T, U>::value> {};
233
234template <class T>
235struct is_floating_point
236 : std::integral_constant<bool, std::is_floating_point<T>::value> {};
237
238} // namespace hila
239
240#endif
Implement hila::swap for gauge fields.
Definition array.h:981
void free_device_rng()
Free GPU RNG state, does nothing on non-GPU archs.
Definition hila_gpu.cpp:104