11void gpu_memory_pool_alloc(
void **p,
size_t req_size);
12void gpu_memory_pool_free(
void *ptr);
13void gpu_memory_pool_purge();
14void gpu_memory_pool_report();
21void seed_device_rng(
unsigned long long seed);
39#include <cuda_runtime.h>
42using gpuError = cudaError;
43#define gpuSuccess cudaSuccess
48#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
49#define gpuFree(a) gpu_memory_pool_free(a)
50#define gpuMemPoolPurge() gpu_memory_pool_purge()
51#define gpuMemPoolReport() gpu_memory_pool_report()
57#define gpuMemPoolPurge() do { } while (0)
58#define gpuMemPoolReport() do { } while (0)
61#ifdef CUDA_MALLOC_ASYNC
62#define gpuMalloc(a, b) GPU_CHECK(cudaMallocAsync(a, b, 0))
63#define gpuFree(a) GPU_CHECK(cudaFreeAsync(a, 0))
66#define gpuMalloc(a, b) GPU_CHECK(cudaMalloc((void **)a, b))
67#define gpuFree(a) GPU_CHECK(cudaFree(a))
75#define gpuGetLastError cudaGetLastError
76#define gpuMemcpy(a, b, c, d) GPU_CHECK(cudaMemcpy(a, b, c, d))
77#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
78#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
79#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
80#define gpuDeviceSynchronize() GPU_CHECK(cudaDeviceSynchronize())
81#define gpuStreamSynchronize(a) GPU_CHECK(cudaStreamSynchronize(a))
82#define gpuStreamCreate(a) GPU_CHECK(cudaStreamCreate(a))
83#define gpuStreamDestroy(a) GPU_CHECK(cudaStreamDestroy(a))
84#define gpuMemset(a, b, c) GPU_CHECK(cudaMemset(a, b, c))
85#define gpuMemcpyToSymbol(a, b, size, c, dir) GPU_CHECK(cudaMemcpyToSymbol(a, b, size, c, dir))
86#define gpuFuncAttributes cudaFuncAttributes
87#define gpuFuncGetAttributes cudaFuncGetAttributes
89#define GPUTYPESTR "CUDA"
92#define _GPU_DEVICE_COMPILE_ __CUDA_ARCH__
100#include <hip/hip_runtime.h>
101#include <hiprand/hiprand.h>
105using gpuError = hipError_t;
106#define gpuSuccess hipSuccess
110#ifdef GPU_MEMORY_POOL
111#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
112#define gpuFree(a) gpu_memory_pool_free(a)
113#define gpuMemPoolPurge() gpu_memory_pool_purge()
114#define gpuMemPoolReport() gpu_memory_pool_report()
121#define gpuMemPoolPurge() do {} while (0)
122#define gpuMemPoolReport() do {} while (0)
125#define gpuMalloc(a, b) GPU_CHECK(hipMalloc((void **)a, b))
126#define gpuFree(a) GPU_CHECK(hipFree(a))
130#define gpuGetLastError hipGetLastError
131#define gpuMemcpy(a, b, siz, d) GPU_CHECK(hipMemcpy(a, b, siz, d))
132#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
133#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
134#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
135#define gpuDeviceSynchronize() GPU_CHECK(hipDeviceSynchronize())
136#define gpuStreamSynchronize(a) GPU_CHECK(hipStreamSynchronize(a))
137#define gpuStreamCreate(a) GPU_CHECK(hipStreamCreate(a))
138#define gpuStreamDestroy(a) GPU_CHECK(hipStreamDestroy(a))
139#define gpuMemset(a, b, c) GPU_CHECK(hipMemset(a, b, c))
140#define gpuMemcpyToSymbol(a, b, size, c, dir) \
141 GPU_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(a), b, size, c, dir))
142#define gpuFuncAttributes hipFuncAttributes
143#define gpuFuncGetAttributes hipFuncGetAttributes
146#define GPUTYPESTR "HIP"
148#ifdef __HIP_DEVICE_COMPILE__
149#define _GPU_DEVICE_COMPILE_ __HIP_DEVICE_COMPILE__
158#define GPU_CHECK(cmd) \
161 gpu_exit_on_error(code, #cmd, __FILE__, __LINE__); \
164#define check_device_error(msg) gpu_exit_on_error(msg, __FILE__, __LINE__)
165#define check_device_error_code(code, msg) gpu_exit_on_error(code, msg, __FILE__, __LINE__)
166void gpu_exit_on_error(
const char *msg,
const char *file,
int line);
167void gpu_exit_on_error(gpuError code,
const char *msg,
const char *file,
int line);
170inline void synchronize_threads() {
171 gpuDeviceSynchronize();
186#define gpuMalloc(a, b) do {} while(0)
187#define gpuFree(a) do {} while(0)
188#define gpuMemcpy(a, b, siz, d) do {} while(0)
189#define gpuMemcpyHostToDevice 1
190#define gpuMemcpyDeviceToHost 2
191#define gpuMemset(a,b,c) do {} while(0)
192#define gpuMemcpyToSymbol(a, b, size, c, dir) do {} while(0)
194#define gpuMemPoolPurge() do {} while(0)
195#define gpuMemPoolReport() do {} while(0)
197#define check_device_error(msg) do {} while(0)
198#define check_device_error_code(code, msg) do {} while(0)
200#define gpuStreamSynchronize(a) do {} while(0)
201#define gpuDeviceSynchronize() do {} while(0)
203#define gpuGetLastError cudaGetLastError
209#define GPUTYPESTR "NONE"
212inline void synchronize_threads() {}
218void initialize_gpu(
int rank,
int device);
219void gpu_device_info();
230struct is_arithmetic : std::integral_constant<bool, std::is_arithmetic<T>::value> {};
232template <
class T,
class U>
233struct is_assignable : std::integral_constant<bool, std::is_assignable<T, U>::value> {};
236struct is_floating_point : std::integral_constant<bool, std::is_floating_point<T>::value> {};
Implement hila::swap for gauge fields.
void free_device_rng()
Free GPU RNG state, does nothing on non-GPU archs.