HILA
Loading...
Searching...
No Matches
backend_gpu/defs.h
1#ifndef GPU_DEFS_H
2#define GPU_DEFS_H
3
4// On Puhti, use UCX_MEMTYPE_CACHE=n with
5// GPU_AWARE_MPI
6
7#include <sstream>
8#include <iostream>
9
10// Prototypes for memory pool ops
11void gpu_memory_pool_alloc(void **p, size_t req_size);
12void gpu_memory_pool_free(void *ptr);
13void gpu_memory_pool_purge();
14void gpu_memory_pool_report();
15
16////////////////////////////////////////////////////////////////////////////////////
17// some device rng headers
18////////////////////////////////////////////////////////////////////////////////////
19namespace hila {
20// double random(); // defined in random.h
21void seed_device_rng(unsigned long long seed);
22} // namespace hila
23
24namespace hila {
25void free_device_rng();
26} // namespace hila
27
28
29#ifndef HILAPP
30
31// GPU specific definitions
32
33////////////////////////////////////////////////////////////////////////////////////
34// Some cuda-specific definitions
35////////////////////////////////////////////////////////////////////////////////////
36#if defined(CUDA)
37
38#include <cuda.h>
39#include <cuda_runtime.h>
40#include <cub/cub.cuh>
41
42// THis set in params.h now
43// #define N_threads 256 // Threads per block for CUDA TODO: make configurable?
44
45using gpuError = cudaError;
46#define gpuSuccess cudaSuccess
47
48/////////////////////////////////////////////
49// If gpu memory pool in use, the interface to memory
50#ifdef GPU_MEMORY_POOL
51#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
52#define gpuFree(a) gpu_memory_pool_free(a)
53#define gpuMemPoolPurge() gpu_memory_pool_purge()
54#define gpuMemPoolReport() gpu_memory_pool_report()
55
56#else
57// here std interfaces
58
59// clang-format off
60#define gpuMemPoolPurge() do { } while (0)
61#define gpuMemPoolReport() do { } while (0)
62// clang-format on
63
64#ifdef CUDA_MALLOC_ASYNC
65#define gpuMalloc(a, b) GPU_CHECK(cudaMallocAsync(a, b, 0))
66#define gpuFree(a) GPU_CHECK(cudaFreeAsync(a, 0))
67
68#else
69#define gpuMalloc(a, b) GPU_CHECK(cudaMalloc((void **)a, b))
70#define gpuFree(a) GPU_CHECK(cudaFree(a))
71
72#endif
73
74#endif // gpu memory pool
75/////////////////////////////////////////////
76
77
78#define gpuGetLastError cudaGetLastError
79#define gpuMemcpy(a, b, c, d) GPU_CHECK(cudaMemcpy(a, b, c, d))
80#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
81#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
82#define gpuDeviceSynchronize() GPU_CHECK(cudaDeviceSynchronize())
83#define gpuStreamSynchronize(a) GPU_CHECK(cudaStreamSynchronize(a))
84#define gpuMemset(a,b,c) GPU_CHECK(cudaMemset(a,b,c))
85
86#define GPUTYPESTR "CUDA"
87
88#ifdef __CUDA_ARCH__
89#define __GPU_DEVICE_COMPILE__ __CUDA_ARCH__
90#endif
91
92////////////////////////////////////////////////////////////////////////////////////
93// Same for HIP
94////////////////////////////////////////////////////////////////////////////////////
95#elif defined(HIP)
96
97#include <hip/hip_runtime.h>
98#include <hiprand/hiprand.h>
99
100//#include <hipcub/hipcub.hpp>*
101
102// Set in params.h now
103// #define N_threads 256 // Threads per block for CUDAs
104
105using gpuError = hipError_t;
106#define gpuSuccess hipSuccess
107
108/////////////////////////////////////////////
109// If gpu memory pool in use, the interface to memory
110#ifdef GPU_MEMORY_POOL
111#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
112#define gpuFree(a) gpu_memory_pool_free(a)
113#define gpuMemPoolPurge() gpu_memory_pool_purge()
114#define gpuMemPoolReport() gpu_memory_pool_report()
115
116
117#else
118// here std interfaces
119
120// clang-format off
121#define gpuMemPoolPurge() do {} while (0)
122#define gpuMemPoolReport() do {} while (0)
123// clang-format on
124
125#define gpuMalloc(a, b) GPU_CHECK(hipMalloc((void **)a, b))
126#define gpuFree(a) GPU_CHECK(hipFree(a))
127
128#endif // ifdef memory pool
129
130#define gpuGetLastError hipGetLastError
131#define gpuMemcpy(a, b, siz, d) GPU_CHECK(hipMemcpy(a, b, siz, d))
132#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
133#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
134#define gpuDeviceSynchronize() GPU_CHECK(hipDeviceSynchronize())
135#define gpuStreamSynchronize(a) GPU_CHECK(hipStreamSynchronize(a))
136#define gpuMemset(a,b,c) GPU_CHECK(hipMemset(a,b,c))
137
138#define GPUTYPESTR "HIP"
139
140#ifdef __HIP_DEVICE_COMPILE__
141#define __GPU_DEVICE_COMPILE__ __HIP_DEVICE_COMPILE__
142#endif
143
144#endif
145////////////////////////////////////////////////////////////////////////////////////
146// General GPU (cuda/hip) definitions
147////////////////////////////////////////////////////////////////////////////////////
148
149
150#define GPU_CHECK(cmd) \
151 do { \
152 auto code = cmd; \
153 gpu_exit_on_error(code, #cmd, __FILE__, __LINE__); \
154 } while (0)
155
156#define check_device_error(msg) gpu_exit_on_error(msg, __FILE__, __LINE__)
157#define check_device_error_code(code, msg) \
158 gpu_exit_on_error(code, msg, __FILE__, __LINE__)
159void gpu_exit_on_error(const char *msg, const char *file, int line);
160void gpu_exit_on_error(gpuError code, const char *msg, const char *file, int line);
161
162namespace hila {
163inline void synchronize_threads() {
164 gpuDeviceSynchronize();
165}
166} // namespace hila
167
168#else // NOW HILAPP
169
170////////////////////////////////////////////////////////////////////////////////////
171// Now not cuda or hip - hilapp stage scans this section
172///////////////////////////////////////////////////////////////////////////////////
173
174
175using gpuError = int;
176
177// Define empty stubs - return 1 (true)
178// clang-format off
179#define gpuMalloc(a, b) do {} while(0)
180#define gpuFree(a) do {} while(0)
181#define gpuMemcpy(a, b, siz, d) do {} while(0)
182#define gpuMemPoolPurge() do {} while(0)
183#define gpuMemPoolReport() do {} while(0)
184
185#define check_device_error(msg) do {} while(0)
186#define check_device_error_code(code, msg) do {} while(0)
187
188#define gpuStreamSynchronize(a) do {} while(0)
189#define gpuDeviceSynchronize() do {} while(0)
190// clang-format on
191
192
193#define GPUTYPESTR "NONE"
194
195namespace hila {
196inline void synchronize_threads() {}
197} // namespace hila
198
199#endif
200////////////////////////////////////////////////////////////////////////////////////
201
202void initialize_gpu(int rank,int device);
203void gpu_device_info();
204
205// This is not the CUDA compiler
206// Maybe hilapp?
207
208namespace hila {
209
210// Implements test for basic in types, similar to
211/// std::is_arithmetic, but allows the backend to add
212/// it's own basic tyes (such as AVX vectors)
213template <class T>
214struct is_arithmetic : std::integral_constant<bool, std::is_arithmetic<T>::value> {};
215
216template <class T, class U>
217struct is_assignable : std::integral_constant<bool, std::is_assignable<T, U>::value> {};
218
219template <class T>
220struct is_floating_point
221 : std::integral_constant<bool, std::is_floating_point<T>::value> {};
222
223} // namespace hila
224
225#endif
Invert diagonal + const. matrix using Sherman-Morrison formula.
Definition array.h:920
void free_device_rng()
Free GPU RNG state, does nothing on non-GPU archs.
Definition hila_gpu.cpp:106