HILA
Loading...
Searching...
No Matches
memory_pool.cpp
1///////////////////////////////////////////
2/// gpu_malloc.cpp - simple list-based alloc program for cuda/hip
3
4#include "plumbing/defs.h"
5#include "plumbing/lattice.h"
6#include "plumbing/field.h"
7#include "plumbing/backend_gpu/defs.h"
8#include <list>
9#include <iomanip>
10
11// no real need for HILAPP to go through here
12#if defined(GPU_MEMORY_POOL) && !defined(HILAPP)
13
14#if defined(HIP)
15#define gpuMallocDirect(a, b) GPU_CHECK(hipMalloc(a, b))
16#define gpuFreeDirect(a) GPU_CHECK(hipFree(a))
17#elif defined(CUDA)
18#define gpuMallocDirect(a, b) GPU_CHECK(cudaMalloc(a, b))
19#define gpuFreeDirect(a) GPU_CHECK(cudaFree(a))
20#else
21static_assert(0 && "HIP or CUDA must be defined");
22#endif
23
24// Compile with make .. OPTS="-DPOOL_DEBUG"
25// #define POOL_DEBUG
26
27// keep relatively large min allocation
28#define MIN_ALLOC_SIZE 128
29
30struct allocation {
31 void *ptr;
32 size_t size;
33};
34
35static size_t total_size = 0;
36static size_t n_allocs = 0;
37static size_t n_true_allocs = 0;
38static double free_list_avg_size = 0;
39static double free_list_avg_search = 0;
40
41static std::list<allocation> free_list = {};
42static std::list<allocation> in_use_list = {};
43
44void gpu_memory_pool_alloc(void **p, size_t req_size) {
45
46 if (req_size < MIN_ALLOC_SIZE) {
47 req_size = MIN_ALLOC_SIZE;
48 }
49
50 n_allocs++;
51 free_list_avg_size += free_list.size();
52
53 // do we have free stuff? Simple linear search - list should not be too large
54 bool found_match = false;
55 auto ptr = free_list.begin();
56 int steps = 0;
57 for (auto it = free_list.begin(); it != free_list.end(); it++) {
58 steps++;
59 if (it->size == req_size) {
60 found_match = true;
61 ptr = it;
62 break; // perfect match, that's it
63 }
64
65 // allow allocated blocks at most twice larger
66 if (it->size > req_size && it->size < 2 * req_size) {
67 if (!found_match || ptr->size > it->size) {
68 ptr = it;
69 }
70 found_match = true;
71 }
72 }
73
74 free_list_avg_search += steps;
75
76 // got it, move to in_use_list to the beginning (faster to find)
77 if (found_match) {
78 *p = ptr->ptr;
79 in_use_list.splice(in_use_list.begin(), free_list, ptr);
80
81#ifdef POOL_DEBUG
82 hila::out << "GPU MEMORY: request " << req_size << " gave block " << ptr->size
83 << " current total " << total_size << '\n';
84#endif
85
86 } else {
87
88 // did not find free memory - allocate
89 // alloc failure caught by gpuMalloc
90 allocation a;
91 gpuMallocDirect(&(a.ptr), req_size);
92 a.size = req_size;
93 in_use_list.push_front(a);
94 *p = a.ptr;
95
96 n_true_allocs++;
97 total_size += req_size;
98
99#ifdef POOL_DEBUG
100 hila::out << "GPU MEMORY: request " << req_size << " NEW allocation, current total "
101 << total_size << '\n';
102#endif
103 }
104}
105
106void gpu_memory_pool_free(void *ptr) {
107
108 // search the list for the memory block
109 for (auto it = in_use_list.begin(); it != in_use_list.end(); it++) {
110 if (it->ptr == ptr) {
111 // found the allocation, move to free list to the beginning
112
113 free_list.splice(free_list.begin(), in_use_list, it);
114
115#ifdef POOL_DEBUG
116 hila::out << "GPU MEMORY: FREE block of size " << it->size << ", current total "
117 << total_size << '\n';
118#endif
119
120 return;
121 }
122 }
123
124 // did not find! serious error, quit
125 hila::out << "Memory free error - unknown pointer " << ptr << '\n';
127}
128
129/// Release free memory to the system - avoids extra allocations
130void gpu_memory_pool_purge() {
131
132 for (auto it = free_list.begin(); it != free_list.end(); it++) {
133 gpuFreeDirect(it->ptr);
134
135 total_size -= it->size;
136
137#ifdef POOL_DEBUG
138 hila::out << "GPU MEMORY: Purging " << it->size << ", bytes, total size " << total_size
139 << '\n';
140#endif
141 }
142
143 free_list.clear();
144}
145
146void gpu_memory_pool_report() {
147 if (hila::myrank() == 0) {
148 hila::out << "\nGPU Memory pool statistics from node 0:\n";
149 hila::out << " Total pool size " << ((double)total_size) / (1024 * 1024) << " MB\n";
150 hila::out << " # of allocations " << n_allocs << " real allocs " << std::setprecision(2)
151 << ((double)n_true_allocs) / n_allocs * 100 << "%\n";
152 hila::out << " Average free list search " << free_list_avg_search / n_allocs
153 << " steps\n";
154 hila::out << " Average free list size " << free_list_avg_size / n_allocs << " items\n\n";
155 }
156}
157
158#endif // GPU_MEMORY_POOL
This file defines all includes for HILA.
This files containts definitions for the Field class and the classes required to define it such as fi...
int myrank()
rank of this node
Definition com_mpi.cpp:235
std::ostream out
this is our default output file stream
void terminate(int status)