HILA
Loading...
Searching...
No Matches
bench_field.cpp
2#include "dirac/conjugate_gradient.h"
3
4#define N 3
5
6#ifndef SEED
7#define SEED 100
8#endif
9
10CoordinateVector latsize = {32, 32, 32, 32};
11
12int main(int argc, char **argv) {
13 int n_runs = 1;
14 double msecs;
15 struct timeval start, end;
16 double timing;
17 double sum;
18 float fsum;
19
20 hila::initialize(argc, argv);
21
22 lattice.setup(latsize);
23
25
26 Field<double> dfield1, dfield2, dfield3;
27 Field<float> ffield1, ffield2, ffield3;
28 onsites(ALL) {
29 dfield1[X] = hila::random();
30 dfield2[X] = hila::random();
31 dfield3[X] = hila::random();
32 }
33
34 onsites(ALL) {
35 ffield1[X] = hila::random();
36 ffield2[X] = hila::random();
37 ffield3[X] = hila::random();
38 }
39
40 // Benchmark simple scalar Field operation (Memory bandwith)
41 timing = 0;
42 for (n_runs = 1; timing < mintime;) {
43 n_runs *= 2;
44 gettimeofday(&start, NULL);
45 for (int i = 0; i < n_runs; i++) {
46 dfield1[ALL] = dfield2[X] * dfield3[X];
47 }
48 // // synchronize();
49 gettimeofday(&end, NULL);
50 timing = timediff(start, end);
51 hila::broadcast(timing);
52 }
53 timing = timing / (double)n_runs;
54 hila::out0 << "Double multiply : " << timing << " ms \n";
55
56 timing = 0;
57 for (n_runs = 1; timing < mintime;) {
58 n_runs *= 2;
59 gettimeofday(&start, NULL);
60 for (int i = 0; i < n_runs; i++) {
61 dfield1[ALL] = dfield2[X] + dfield3[X];
62 }
63 // // synchronize();
64 gettimeofday(&end, NULL);
65 timing = timediff(start, end);
66 hila::broadcast(timing);
67 }
68 timing = timing / (double)n_runs;
69 hila::out0 << "Double add : " << timing << " ms \n";
70
71 timing = 0;
72 for (n_runs = 1; timing < mintime;) {
73 n_runs *= 2;
74 gettimeofday(&start, NULL);
75 for (int i = 0; i < n_runs; i++) {
76 ffield1[ALL] = ffield2[X] * ffield3[X];
77 }
78 // synchronize();
79 gettimeofday(&end, NULL);
80 timing = timediff(start, end);
81 hila::broadcast(timing);
82 }
83 timing = timing / (double)n_runs;
84 hila::out0 << "Float multiply : " << timing << " ms \n";
85
86 timing = 0;
87 for (n_runs = 1; timing < mintime;) {
88 n_runs *= 2;
89 gettimeofday(&start, NULL);
90 for (int i = 0; i < n_runs; i++) {
91 ffield1[ALL] = ffield2[X] + ffield3[X];
92 }
93 // synchronize();
94 gettimeofday(&end, NULL);
95 timing = timediff(start, end);
96 hila::broadcast(timing);
97 }
98 timing = timing / (double)n_runs;
99 hila::out0 << "Float add : " << timing << " ms \n";
100
111
112 // Generate random values
113 onsites(ALL) {
114 matrix1[X].random();
115 matrix2[X].random();
116 matrix3[X].random();
117 vector1[X].random();
118 vector2[X].random();
119 }
120
121 onsites(ALL) {
122 fmatrix1[X].random();
123 fmatrix2[X].random();
124 fmatrix3[X].random();
125 fvector1[X].random();
126 fvector2[X].random();
127 }
128
129 // Interesting case of using the same memory three times
130 timing = 0;
131 for (n_runs = 1; timing < mintime;) {
132 n_runs *= 2;
133 gettimeofday(&start, NULL);
134 for (int i = 0; i < n_runs; i++) {
135 matrix1[ALL] = matrix1[X] * matrix1[X];
136 }
137 // synchronize();
138 gettimeofday(&end, NULL);
139 timing = timediff(start, end);
140 hila::broadcast(timing);
141 }
142 timing = timing / (double)n_runs;
143 hila::out0 << "Matrix1 = Matrix1 * Matrix1 : " << timing << " ms \n";
144
145 // Time MATRIX * MATRIX
146 timing = 0;
147 for (n_runs = 1; timing < mintime;) {
148 n_runs *= 2;
149 gettimeofday(&start, NULL);
150 for (int i = 0; i < n_runs; i++) {
151 matrix3[ALL] = matrix1[X] * matrix2[X];
152 }
153 // synchronize();
154 gettimeofday(&end, NULL);
155 timing = timediff(start, end);
156 hila::broadcast(timing);
157 }
158 timing = timing / (double)n_runs;
159 hila::out0 << "Matrix * Matrix: " << timing << "ms \n";
160
161 // Time MATRIX * MATRIX
162 timing = 0;
163 for (n_runs = 1; timing < mintime;) {
164 n_runs *= 2;
165 gettimeofday(&start, NULL);
166 for (int i = 0; i < n_runs; i++) {
167 fmatrix3[ALL] = fmatrix1[X] * fmatrix2[X];
168 }
169 // synchronize();
170 gettimeofday(&end, NULL);
171 timing = timediff(start, end);
172 hila::broadcast(timing);
173 }
174 timing = timing / (double)n_runs;
175 hila::out0 << "Single Precision Matrix * Matrix: " << timing << "ms \n";
176
177 // Time VECTOR * MATRIX
178 timing = 0;
179 for (n_runs = 1; timing < mintime;) {
180 n_runs *= 2;
181 gettimeofday(&start, NULL);
182 for (int i = 0; i < n_runs; i++) {
183 vector2[ALL] = matrix1[X] * vector1[X];
184 }
185 // synchronize();
186 gettimeofday(&end, NULL);
187 timing = timediff(start, end);
188 hila::broadcast(timing);
189 }
190 timing = timing / (double)n_runs;
191 hila::out0 << "Vector * Matrix: " << timing << " ms \n";
192
193 // Time VECTOR * MATRIX
194 timing = 0;
195 for (n_runs = 1; timing < mintime;) {
196 n_runs *= 2;
197 gettimeofday(&start, NULL);
198 for (int i = 0; i < n_runs; i++) {
199 fvector2[ALL] = fmatrix1[X] * fvector1[X];
200 }
201 // synchronize();
202 gettimeofday(&end, NULL);
203 timing = timediff(start, end);
204 hila::broadcast(timing);
205 // hila::out0 << "timing " << timing << '\n';
206 }
207 timing = timing / (double)n_runs;
208 hila::out0 << "Single Precision Vector * Matrix: " << timing << " ms \n";
209
210 // Time VECTOR NORM
211 timing = sum = 0;
212 onsites(ALL) { // Warm up. Why does this affect the time?
213 sum += vector1[X].squarenorm();
214 }
215 for (n_runs = 1; timing < mintime;) {
216 n_runs *= 2;
217 gettimeofday(&start, NULL);
218
219 sum = 0;
220 for (int i = 0; i < n_runs; i++) {
221 onsites(ALL) { sum += vector1[X].squarenorm(); }
222 }
223 // synchronize();
224 gettimeofday(&end, NULL);
225 timing = timediff(start, end);
226 hila::broadcast(timing);
227 }
228 timing = timing / (double)n_runs;
229 hila::out0 << "Vector square sum: " << timing << " ms \n";
230
231 // Time FLOAT VECTOR NORM
232 timing = 0;
233 for (n_runs = 1; timing < mintime;) {
234 n_runs *= 2;
235 gettimeofday(&start, NULL);
236 fsum = 0;
237 for (int i = 0; i < n_runs; i++) {
238 onsites(ALL) { fsum += fvector1[X].squarenorm(); }
239 }
240 // synchronize();
241 gettimeofday(&end, NULL);
242 timing = timediff(start, end);
243 hila::broadcast(timing);
244 }
245 timing = timing / (double)n_runs;
246 hila::out0 << "Single Precision vector square sum: " << timing << " ms \n";
247
248 // Time COMMUNICATION of a MATRIX
249 timing = 0;
250 for (n_runs = 1; timing < mintime;) {
251 n_runs *= 2;
252 gettimeofday(&start, NULL);
253
254 for (int i = 0; i < n_runs; i++) {
255 matrix1.mark_changed(ALL);
256 for (int dir = 0; dir < NDIRS; dir++) {
257 matrix1.gather((Direction)dir, ALL);
258 }
259 }
260
261 // synchronize();
262 gettimeofday(&end, NULL);
263 timing = timediff(start, end);
264 hila::broadcast(timing);
265 }
266 timing = timing / 2 / NDIRS / (double)n_runs;
267 hila::out0 << "Matrix nearest neighbour communication: " << timing << " ms \n";
268
270}
The field class implements the standard methods for accessing Fields. Hilapp replaces the parity acce...
Definition field.h:61
double squarenorm() const
Squarenorm.
Definition field.h:1064
void setup(const CoordinateVector &siz)
General lattice setup.
Definition lattice.cpp:33
This header file defines:
constexpr unsigned NDIRS
Number of directions.
Definition coordinates.h:57
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
Definition coordinates.h:34
constexpr Parity ALL
bit pattern: 011
double random()
Real valued uniform random number generator.
Definition hila_gpu.cpp:118
std::ostream out0
This writes output only from main process (node 0)
void initialize(int argc, char **argv)
Read in command line arguments. Initialise default stream and MPI communication.
void seed_random(uint64_t seed, bool device_rng=true)
Seed random generators with 64-bit unsigned value. On MPI shuffles the seed so that different MPI ran...
Definition random.cpp:86
T broadcast(T &var, int rank=0)
Broadcast the value of var to all MPI ranks from rank (default=0).
Definition com_mpi.h:153
void finishrun()
Normal, controlled exit - all nodes must call this. Prints timing information and information about c...