HILA
Loading...
Searching...
No Matches
bench.cpp
Go to the documentation of this file.
1/**
2 * @file bench.cpp
3 * @author Kari Rummukainen
4 * @brief Simple benchmark application
5 * @details Measures performance of the following operations
6 * - Random number generator
7 * - 3x3 matrix multiplication
8 * - Nearest neighbour communication
9 * - FFT
10 * - Simple smear update
11 */
12#include "hila.h"
13
14// unistd.h needed for isatty()
15#include <unistd.h>
16
17
18// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
19// benchmark rnd generation
20
21void bench_random() {
22
23 hila::out0 << "\n-------------------------------------\n";
24 hila::out0 << "Random number generation: ";
25
27
28 // warm it up
29 f[ALL] = hila::random();
30
31 // estimate loop number
32 auto time = hila::gettime();
33 for (int i = 0; i < 10; ++i) {
34 f[ALL] = hila::random();
35 }
37 time = hila::gettime() - time;
38
39 int n_loops = 50.0 / time; // gives 5s
40 hila::out0 << n_loops << " iterations\n";
41
42 time = hila::gettime();
43 for (int i = 0; i < n_loops; ++i) {
44 f[ALL] = hila::random();
45 }
47 time = hila::gettime() - time;
48
49 hila::out0 << " In separate onsites loops: " << time << " seconds, " << time / n_loops
50 << " per loop, " << time / n_loops / lattice.volume() << " per site\n";
51
52 time = hila::gettime();
53
54 onsites(ALL) {
55 for (int i = 0; i < n_loops; ++i) {
56 f[X] = hila::random();
57 }
58 }
59
61 time = hila::gettime() - time;
62
63 hila::out0 << " In a single onsites loop: " << time << " seconds, " << time / n_loops
64 << " per loop, " << time / n_loops / lattice.volume() << " per site\n";
65}
66
67
68// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
69// Benchmark 3x3 matrix multiply
70
71void bench_matrix() {
72
73 hila::out0 << "\n-------------------------------------\n";
74 hila::out0 << "3x3 Complex double matrix multiply: ";
75
77
78 // warm it up
79 f = 1;
80 g = 1;
81
82 // estimate loop number
83 auto time = hila::gettime();
84 for (int i = 0; i < 10; ++i) {
85 g[ALL] = f[X] * f[X];
86 }
88 time = hila::gettime() - time;
89
90 int n_loops = 50.0 / time; // gives 5s
91 hila::out0 << n_loops << " iterations\n";
92
93 time = hila::gettime();
94 for (int i = 0; i < n_loops; ++i) {
95 f[ALL] = f[X] * f[X];
96 }
98 time = hila::gettime() - time;
99
100 hila::out0 << " In separate onsites loops: " << time << " seconds, " << time / n_loops
101 << " per loop, " << time / n_loops / lattice.volume() << " per site\n";
102
103 time = hila::gettime();
104 onsites(ALL) {
105 for (int i = 0; i < n_loops; ++i) {
106 f[X] = f[X] * f[X];
107 }
108 }
110 time = hila::gettime() - time;
111
112 hila::out0 << " In a single onsites loop: " << time << " seconds, " << time / n_loops
113 << " per loop, " << time / n_loops / lattice.volume() << " per site\n";
114}
115
116
117// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
118// Benchmark mpi comms
119
120void bench_comm() {
121
122 constexpr int n_gathers = 300;
123
124 hila::out0 << "\n-------------------------------------\n";
125 hila::out0 << "Nearest neighbour communication: complex field\n";
126
127 Field<Complex<double>> df = 0;
128
129 // warm up a bit
130 foralldir(d) {
131 for (int i = 0; i < 3; i++) {
132 df.gather(d);
133 df.gather(-d);
134 df.mark_changed(ALL);
135 }
136 }
137
138 for (Direction d = e_x; d < NDIRECTIONS; ++d) {
139 df.gather(d);
140 df.mark_changed(ALL);
141
142 auto time = hila::gettime();
143
144 for (int i = 0; i < n_gathers; i++) {
145 df.gather(d);
146 df.mark_changed(ALL);
147 }
149 time = hila::gettime() - time;
150 hila::out0 << " Gather from direction " << hila::prettyprint(d) << ": " << time / n_gathers
151 << " s/gather\n";
152 }
153}
154
155//--------------------------------------------------------------------------------
156
157// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
158// Benchmark FFT
159
160void bench_fft() {
161
162 constexpr int n_fft = 20;
163
164 hila::out0 << "\n-------------------------------------\n";
165 hila::out0 << "FFT of a double complex field\n";
166
167 Field<Complex<double>> df = 0, rf;
168
169 df.gaussian_random();
170
171 rf = df.FFT();
172
173 auto time = hila::gettime();
174
175 for (int i = 0; i < n_fft; ++i) {
176 rf = df.FFT();
177 }
179 time = hila::gettime() - time;
180
181 hila::out0 << " " << n_fft << " FFTs " << time << " sec, " << time / n_fft
182 << " for single FFT\n";
183}
184
185// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
186// Benchmark simple smear-update
187
188void bench_update() {
189
190 constexpr int n_update = 500;
191
192 hila::out0 << "\n-------------------------------------\n";
193 hila::out0 << "NN-smear complex field " << n_update << " times\n";
194
195 Field<Complex<double>> df, rf;
196
197 df.gaussian_random();
198
199 onsites(ALL) {
200 rf[X] = df[X];
201 foralldir(d) rf[X] += df[X + d] + df[X - d];
202 }
203 df.mark_changed(ALL);
204
205 auto time = hila::gettime();
206 for (int i = 0; i < n_update; i++) {
207 onsites(ALL) {
208 rf[X] = df[X];
209 foralldir(d) rf[X] += df[X + d] + df[X - d];
210 }
211 df = rf;
212 }
214 time = hila::gettime() - time;
215
216 hila::out0 << " Total time " << time << " s, one update " << time / n_update << ", per site "
217 << time / n_update / lattice.volume() << '\n';
218}
219
220// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
221// Benchmark simple smear-update
222
223void bench_matrix_update() {
224
225 constexpr int n_update = 50;
226
227 hila::out0 << "\n-------------------------------------\n";
228 hila::out0 << "NN-mult SU(5) matrix field " << n_update << " times\n";
229
230 Field<SU<5, double>> df, rf;
231
232 df = 1;
233 rf = 1;
234
235 foralldir(d) onsites(ALL) {
236 rf[X] = rf[X] * df[X + d] * df[X - d];
237 }
238 df.mark_changed(ALL);
239
240 auto time = hila::gettime();
241 for (int i = 0; i < n_update; i++) {
242 onsites(ALL) {
243 foralldir(d) rf[X] += df[X + d] * df[X - d];
244 }
245 df.mark_changed(ALL);
246 }
248 time = hila::gettime() - time;
249
250 hila::out0 << " Total time " << time << " s, one update " << time / n_update << ", per site "
251 << time / n_update / lattice.volume() << '\n';
252}
253
254
255//////////////////////////////////////////////////////////////////////////////////////////
256
257int main(int argc, char **argv) {
258
259 hila::initialize(argc, argv);
260
261 hila::out0 << "HILA benchmark program\n";
262
263 hila::input par("parameters");
264
265 CoordinateVector lsize = par.get("lattice size"); // reads NDIM numbers
266 long seed = par.get("random seed");
267
268 par.close();
269
270 // setting up the lattice is convenient to do after reading
271 // the parameters
272 lattice.setup(lsize);
273
274 // We need random number here
275 hila::seed_random(seed);
276
277
278 ///////////////////////////////////////////////////////////////
279 // start tests
280
281 hila::out0 << "###################################\n\n";
282
283 bench_random();
284
285 bench_matrix();
286
287 bench_comm();
288
289 bench_fft();
290
291 bench_update();
292
293 bench_matrix_update();
294
295 hila::out0 << "\n##################################\n";
296
298}
The field class implements the standard methods for accessing Fields. Hilapp replaces the parity acce...
Definition field.h:61
hila::input - Class for parsing runtime parameter files.
Definition input.h:52
void setup(const CoordinateVector &siz)
General lattice setup.
Definition lattice.cpp:33
#define foralldir(d)
Macro to loop over (all) Direction(s)
Definition coordinates.h:78
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
constexpr Parity ALL
bit pattern: 011
void synchronize()
synchronize mpi
Definition com_mpi.cpp:255
double random()
Real valued uniform random number generator.
Definition hila_gpu.cpp:118
std::ostream out0
This writes output only from main process (node 0)
void initialize(int argc, char **argv)
Read in command line arguments. Initialise default stream and MPI communication.
void seed_random(uint64_t seed, bool device_rng=true)
Seed random generators with 64-bit unsigned value. On MPI shuffles the seed so that different MPI ran...
Definition random.cpp:86
double gettime()
Definition timing.cpp:163
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...