5#include "plumbing/lattice.h"
7#include "plumbing/backend_gpu/defs.h"
29#if defined(GPU_MEMORY_POOL)
31#ifndef GPU_MEMORY_POOL_FRACTION
32#define GPU_MEMORY_POOL_FRACTION 0.2
35#if !defined(CUDA) && !defined(HIP)
36static_assert(0 &&
"HIP or CUDA must be defined");
40#define ALLOC_ALIGNMENT 256
51static size_t max_used_size = 0;
52static size_t current_used_size = 0;
53static size_t total_pool_size = 0;
54static size_t n_allocs = 0;
55static size_t free_list_size;
56static size_t gpu_total_memory = 0;
58static double free_list_avg_size = 0;
59static double free_list_avg_search = 0;
61static block *memory_blocks;
62static block *free_blocks;
63static block *in_use_blocks;
65static block *unused_blocks;
70block *alloc_more_block_descriptors() {
74 p = (block *)memalloc(n_blocks *
sizeof(block));
76 for (
int i = 0; i < n_blocks - 1; i++) {
80 p[n_blocks - 1].up =
nullptr;
85block *get_block_descriptor() {
86 if (unused_blocks ==
nullptr) {
87 unused_blocks = alloc_more_block_descriptors();
89 block *ret = unused_blocks;
90 unused_blocks = unused_blocks->up;
92 ret->up = ret->down = ret->next = ret->prev =
nullptr;
96void release_block_descriptor(block *p) {
97 p->up = unused_blocks;
104void *gpu_memory_allocate(
size_t m_alloc) {
106 const int mb = kb * kb;
109 if (m_alloc % ALLOC_ALIGNMENT != 0)
110 m_alloc = m_alloc - m_alloc % ALLOC_ALIGNMENT + ALLOC_ALIGNMENT;
112 double fraction = (double)m_alloc / gpu_total_memory;
114 hila::out0 <<
"GPU memory: allocating " << m_alloc / mb <<
" MB out of total "
115 << gpu_total_memory / mb <<
"(" << (int)(fraction * 100) <<
"%)\n";
116 total_pool_size += m_alloc;
122 GPU_CHECK(cudaMalloc(&b, m_alloc));
124 GPU_CHECK(hipMalloc(&b, m_alloc));
134void gpu_memory_pool_init() {
138 cudaDeviceProp props;
140 GPU_CHECK(cudaGetDevice(&my_device));
141 GPU_CHECK(cudaGetDeviceProperties(&props, my_device));
143 hipDeviceProp_t props;
145 GPU_CHECK(hipGetDevice(&my_device));
146 GPU_CHECK(hipGetDeviceProperties(&props, my_device));
149 gpu_total_memory = props.totalGlobalMem;
152 size_t m_alloc = gpu_total_memory * GPU_MEMORY_POOL_FRACTION;
154 m_alloc = m_alloc - m_alloc % ALLOC_ALIGNMENT + ALLOC_ALIGNMENT;
156 block *b = get_block_descriptor();
159 b->ptr = gpu_memory_allocate(m_alloc);
161 b->up = b->down = b->next = b->prev =
nullptr;
167 in_use_blocks =
nullptr;
170 current_used_size = 0;
178void remove_from_list(block *p, block **head) {
179 if (p->next !=
nullptr)
180 p->next->prev = p->prev;
181 if (p->prev !=
nullptr)
182 p->prev->next = p->next;
187void insert_to_list_head(block *p, block **head) {
188 if (*head !=
nullptr)
200void merge_block_down_free(block *p) {
201 block *pdown = p->down;
202 pdown->size += p->size;
204 if (p->up !=
nullptr)
209 remove_from_list(p, &free_blocks);
213 release_block_descriptor(p);
217void merge_block_up_free(block *p) {
219 pup->size += p->size;
222 if (p->down !=
nullptr)
228 remove_from_list(p, &free_blocks);
232 release_block_descriptor(p);
237block *split_free_block(block *p,
size_t req_size) {
238 block *b = get_block_descriptor();
242 if (b->up !=
nullptr)
246 b->ptr =
static_cast<char *
>(p->ptr) + p->size - req_size;
257void mark_block_free(block *p) {
259 insert_to_list_head(p, &free_blocks);
264void add_block_to_top(block *p) {
266 if (memory_blocks ==
nullptr) {
271 for (b = memory_blocks; b->up !=
nullptr; b = b->up)
282void gpu_memory_pool_alloc(
void **ret,
size_t req_size) {
284 static bool initialized =
false;
288 gpu_memory_pool_init();
291 gpuStreamSynchronize(0);
294 size_t align_mod = req_size % ALLOC_ALIGNMENT;
296 req_size = req_size - align_mod + ALLOC_ALIGNMENT;
301 free_list_avg_size += free_list_size;
304 bool found_match =
false;
306 block *ptr = free_blocks;
307 for (block *p = free_blocks; p !=
nullptr; p = p->next) {
309 if (p->size == req_size) {
315 if (p->size > req_size) {
317 if (!found_match || ptr->size > p->size) {
324 free_list_avg_search += steps;
328 if (ptr->size > req_size) {
329 ptr = split_free_block(ptr, req_size);
333 remove_from_list(ptr, &free_blocks);
335 ptr->is_free =
false;
339 insert_to_list_head(ptr, &in_use_blocks);
341 current_used_size += req_size;
342 if (current_used_size > max_used_size)
343 max_used_size = current_used_size;
350 if (total_pool_size < (1.0 - 1.5 * GPU_MEMORY_POOL_FRACTION) * gpu_total_memory) {
351 size_t m_alloc = GPU_MEMORY_POOL_FRACTION * gpu_total_memory;
352 m_alloc = (m_alloc > req_size) ? m_alloc : req_size;
354 if (m_alloc + total_pool_size < 0.9 * gpu_total_memory) {
356 block *p = get_block_descriptor();
362 p = get_block_descriptor();
363 p->ptr = gpu_memory_allocate(m_alloc);
364 total_pool_size += m_alloc;
369 gpu_memory_pool_alloc(ret, req_size);
376 << req_size <<
", current pool size " << total_pool_size << std::endl;
384void gpu_memory_pool_free(
void *ptr) {
387 for (block *f = in_use_blocks; f !=
nullptr; f = f->next) {
390 remove_from_list(f, &in_use_blocks);
392 current_used_size -= f->size;
395 block *down = f->down;
397 if (down !=
nullptr && down->is_free) {
398 merge_block_down_free(f);
399 if (up !=
nullptr && up->is_free) {
400 merge_block_down_free(up);
402 }
else if (up !=
nullptr && up->is_free) {
403 merge_block_up_free(f);
414 hila::out <<
"Memory free error - unknown pointer " << ptr <<
'\n';
419void gpu_memory_pool_purge() {}
421void gpu_memory_pool_report() {
423 hila::out <<
"\nGPU Memory pool statistics from node 0:\n";
424 hila::out <<
" Total pool size " << ((double)total_pool_size) / (1024 * 1024) <<
" MB\n";
425 hila::out <<
" # of allocations " << n_allocs <<
'\n';
426 hila::out <<
" Average free list search " << free_list_avg_search / n_allocs
428 hila::out <<
" Average free list size " << free_list_avg_size / n_allocs <<
" items\n";
429 hila::out <<
" Maximum memory use " << max_used_size / (1024 * 1024) <<
" MB\n\n";
This file defines all includes for HILA.
This files containts definitions for the Field class and the classes required to define it such as fi...
int myrank()
rank of this node
std::ostream out
this is our default output file stream
std::ostream out0
This writes output only from main process (node 0)
void terminate(int status)