11void gpu_memory_pool_alloc(
void **p,
size_t req_size);
12void gpu_memory_pool_free(
void *ptr);
13void gpu_memory_pool_purge();
14void gpu_memory_pool_report();
21void seed_device_rng(
unsigned long long seed);
39#include <cuda_runtime.h>
45using gpuError = cudaError;
46#define gpuSuccess cudaSuccess
51#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
52#define gpuFree(a) gpu_memory_pool_free(a)
53#define gpuMemPoolPurge() gpu_memory_pool_purge()
54#define gpuMemPoolReport() gpu_memory_pool_report()
60#define gpuMemPoolPurge() do { } while (0)
61#define gpuMemPoolReport() do { } while (0)
64#ifdef CUDA_MALLOC_ASYNC
65#define gpuMalloc(a, b) GPU_CHECK(cudaMallocAsync(a, b, 0))
66#define gpuFree(a) GPU_CHECK(cudaFreeAsync(a, 0))
69#define gpuMalloc(a, b) GPU_CHECK(cudaMalloc((void **)a, b))
70#define gpuFree(a) GPU_CHECK(cudaFree(a))
78#define gpuGetLastError cudaGetLastError
79#define gpuMemcpy(a, b, c, d) GPU_CHECK(cudaMemcpy(a, b, c, d))
80#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
81#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
82#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
83#define gpuDeviceSynchronize() GPU_CHECK(cudaDeviceSynchronize())
84#define gpuStreamSynchronize(a) GPU_CHECK(cudaStreamSynchronize(a))
85#define gpuMemset(a,b,c) GPU_CHECK(cudaMemset(a,b,c))
86#define gpuMemcpyToSymbol(a, b, size, c, dir) GPU_CHECK(cudaMemcpyToSymbol(a, b, size, c, dir))
88#define GPUTYPESTR "CUDA"
91#define __GPU_DEVICE_COMPILE__ __CUDA_ARCH__
99#include <hip/hip_runtime.h>
100#include <hiprand/hiprand.h>
107using gpuError = hipError_t;
108#define gpuSuccess hipSuccess
112#ifdef GPU_MEMORY_POOL
113#define gpuMalloc(a, b) gpu_memory_pool_alloc((void **)a, b)
114#define gpuFree(a) gpu_memory_pool_free(a)
115#define gpuMemPoolPurge() gpu_memory_pool_purge()
116#define gpuMemPoolReport() gpu_memory_pool_report()
123#define gpuMemPoolPurge() do {} while (0)
124#define gpuMemPoolReport() do {} while (0)
127#define gpuMalloc(a, b) GPU_CHECK(hipMalloc((void **)a, b))
128#define gpuFree(a) GPU_CHECK(hipFree(a))
132#define gpuGetLastError hipGetLastError
133#define gpuMemcpy(a, b, siz, d) GPU_CHECK(hipMemcpy(a, b, siz, d))
134#define gpuMemcpyHostToDevice hipMemcpyHostToDevice
135#define gpuMemcpyDeviceToHost hipMemcpyDeviceToHost
136#define gpuMemcpyDeviceToDevice hipMemcpyDeviceToDevice
137#define gpuDeviceSynchronize() GPU_CHECK(hipDeviceSynchronize())
138#define gpuStreamSynchronize(a) GPU_CHECK(hipStreamSynchronize(a))
139#define gpuMemset(a,b,c) GPU_CHECK(hipMemset(a,b,c))
140#define gpuMemcpyToSymbol(a, b, size, c, dir) \
141 GPU_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(a), b, size, c, dir))
144#define GPUTYPESTR "HIP"
146#ifdef __HIP_DEVICE_COMPILE__
147#define __GPU_DEVICE_COMPILE__ __HIP_DEVICE_COMPILE__
156#define GPU_CHECK(cmd) \
159 gpu_exit_on_error(code, #cmd, __FILE__, __LINE__); \
162#define check_device_error(msg) gpu_exit_on_error(msg, __FILE__, __LINE__)
163#define check_device_error_code(code, msg) \
164 gpu_exit_on_error(code, msg, __FILE__, __LINE__)
165void gpu_exit_on_error(
const char *msg,
const char *file,
int line);
166void gpu_exit_on_error(gpuError code,
const char *msg,
const char *file,
int line);
169inline void synchronize_threads() {
170 gpuDeviceSynchronize();
185#define gpuMalloc(a, b) do {} while(0)
186#define gpuFree(a) do {} while(0)
187#define gpuMemcpy(a, b, siz, d) do {} while(0)
188#define gpuMemcpyHostToDevice 1
189#define gpuMemcpyDeviceToHost 2
190#define gpuMemset(a,b,c) do {} while(0)
191#define gpuMemcpyToSymbol(a, b, size, c, dir) do {} while(0)
193#define gpuMemPoolPurge() do {} while(0)
194#define gpuMemPoolReport() do {} while(0)
196#define check_device_error(msg) do {} while(0)
197#define check_device_error_code(code, msg) do {} while(0)
199#define gpuStreamSynchronize(a) do {} while(0)
200#define gpuDeviceSynchronize() do {} while(0)
202#define gpuGetLastError cudaGetLastError
208#define GPUTYPESTR "NONE"
211inline void synchronize_threads() {}
217void initialize_gpu(
int rank,
int device);
218void gpu_device_info();
229struct is_arithmetic : std::integral_constant<bool, std::is_arithmetic<T>::value> {};
231template <
class T,
class U>
232struct is_assignable : std::integral_constant<bool, std::is_assignable<T, U>::value> {};
235struct is_floating_point
236 : std::integral_constant<bool, std::is_floating_point<T>::value> {};
Implement hila::swap for gauge fields.
void free_device_rng()
Free GPU RNG state, does nothing on non-GPU archs.