HILA
Loading...
Searching...
No Matches
backend_vector/defs.h
1#ifndef VECTOR_DEFS_H
2#define VECTOR_DEFS_H
3
4#include "plumbing/defs.h"
5
6#ifndef EVEN_SITES_FIRST
7static_assert(0 && "EVEN_SITES_FIRST must be defined for vectorized code");
8// TODO: Make this work also without EVEN_SITES_FIRST!
9#else
10
11#ifdef HILAPP
12// If hilapp, include stubs for vector types
13#include "hilapp_vector.h"
14
15// If instrset is not defined, give a reasonable number (does not matter at this stage)
16#ifndef INSTRSET
17#define INSTRSET 8
18#endif
19
20// end of HILAPP section
21#else
22
23// #include <immintrin.h> // this file should be included in vectorclass.h, if needed
24#include "vectorclass/vectorclass.h"
25#include "vectorclass/vectormath_exp.h"
26#include "vectorclass/vectormath_trig.h"
27#include "vectorclass/vectormath_hyp.h"
28
29#endif
30
31#define VECTORIZED
32
33#ifndef VECTOR_SIZE
34#define VECTOR_SIZE 32
35#endif
36
37namespace hila {
38
39// Trivial synchronization
40inline void synchronize_threads() {}
41
42
43/// Implements test for basic in types, similar to
44/// std::is_arithmetic, but allows the backend to add
45/// it's own basic types (such as AVX vectors)
46template <class T>
48 : std::integral_constant<
49 bool, std::is_same<T, Vec4d>::value || std::is_same<T, Vec4q>::value ||
50 std::is_same<T, Vec8f>::value || std::is_same<T, Vec8i>::value ||
51 std::is_same<T, Vec8d>::value || std::is_same<T, Vec8q>::value ||
52 std::is_same<T, Vec16f>::value || std::is_same<T, Vec16i>::value> {
53};
54
55template <class T>
56struct is_arithmetic : std::integral_constant<bool, std::is_arithmetic<T>::value ||
57 is_avx_vector<T>::value> {};
58
59template <class T>
60struct avx_vector_type_info {
61 using type = T;
62 static constexpr int size = sizeof(T);
63 static constexpr int elements = 1;
64};
65
66template <>
67struct avx_vector_type_info<Vec4d> {
68 using type = double;
69 static constexpr int size = 4 * sizeof(double);
70 static constexpr int elements = 4;
71};
72template <>
73struct avx_vector_type_info<Vec4q> {
74 using type = int64_t;
75 static constexpr int size = 4 * sizeof(int64_t);
76 static constexpr int elements = 4;
77};
78template <>
79struct avx_vector_type_info<Vec8i> {
80 using type = int;
81 static constexpr int size = 8 * sizeof(int);
82 static constexpr int elements = 8;
83};
84template <>
85struct avx_vector_type_info<Vec8f> {
86 using type = float;
87 static constexpr int size = 8 * sizeof(float);
88 static constexpr int elements = 8;
89};
90template <>
91struct avx_vector_type_info<Vec8d> {
92 using type = double;
93 static constexpr int size = 8 * sizeof(double);
94 static constexpr int elements = 8;
95};
96template <>
97struct avx_vector_type_info<Vec8q> {
98 using type = int64_t;
99 static constexpr int size = 8 * sizeof(int64_t);
100 static constexpr int elements = 8;
101};
102template <>
103struct avx_vector_type_info<Vec16f> {
104 using type = float;
105 static constexpr int size = 16 * sizeof(float);
106 static constexpr int elements = 16;
107};
108template <>
109struct avx_vector_type_info<Vec16i> {
110 using type = int;
111 static constexpr int size = 16 * sizeof(int);
112 static constexpr int elements = 16;
113};
114
115template <class T, class U>
116struct is_assignable
117 : std::integral_constant<
118 bool,
119 std::is_assignable<T, U>::value ||
120 (is_avx_vector<U>::value &&
121 ((!is_avx_vector<T>::value &&
122 std::is_assignable<T,
123 typename avx_vector_type_info<U>::type>::value) ||
124 (is_avx_vector<T>::value &&
125 (avx_vector_type_info<T>::size == avx_vector_type_info<U>::size) &&
126 (avx_vector_type_info<T>::elements ==
127 avx_vector_type_info<U>::elements))))> {};
128
129
130template <class T>
131struct is_floating_point
132 : std::integral_constant<
133 bool,
134 std::is_floating_point<T>::value ||
135 std::is_floating_point<typename hila::avx_vector_type_info<T>::type>::value> {};
136
137} // namespace hila
138
139
140/*** The next section contains basic operations for vectors ***/
141
142// Norm squared
143inline Vec4d squarenorm(Vec4d val) {
144 return val * val;
145}
146
147inline Vec4q squarenorm(Vec4q val) {
148 return val * val;
149}
150
151inline Vec8f squarenorm(Vec8f val) {
152 return val * val;
153}
154
155inline Vec8i squarenorm(Vec8i val) {
156 return val * val;
157}
158
159inline Vec8d squarenorm(Vec8d val) {
160 return val * val;
161}
162
163inline Vec8q squarenorm(Vec8q val) {
164 return val * val;
165}
166
167inline Vec16f squarenorm(Vec16f val) {
168 return val * val;
169}
170
171inline Vec16i squarenorm(Vec16i val) {
172 return val * val;
173}
174
175// Reductions
176inline double reduce_sum(Vec4d v) {
177 double sum = 0;
178 double store[4];
179 v.store(&(store[0]));
180 for (int i = 0; i < 4; i++)
181 sum += store[i];
182 return sum;
183}
184
185inline double reduce_sum(Vec8f v) {
186 double sum = 0;
187 float store[8];
188 v.store(&(store[0]));
189 for (int i = 0; i < 8; i++)
190 sum += store[i];
191 return sum;
192}
193
194inline int64_t reduce_sum(Vec8i v) {
195 int64_t sum = 0;
196 int store[8];
197 v.store(&(store[0]));
198 for (int i = 0; i < 8; i++)
199 sum += store[i];
200 return sum;
201}
202
203inline double reduce_sum(Vec8d v) {
204 double sum = 0;
205 double store[8];
206 v.store(&(store[0]));
207 for (int i = 0; i < 8; i++)
208 sum += store[i];
209 return sum;
210}
211
212inline double reduce_sum(Vec16f v) {
213 double sum = 0;
214 float store[16];
215 v.store(&(store[0]));
216 for (int i = 0; i < 16; i++)
217 sum += store[i];
218 return sum;
219}
220
221inline int64_t reduce_sum(Vec16i v) {
222 int64_t sum = 0;
223 int store[16];
224 v.store(&(store[0]));
225 for (int i = 0; i < 16; i++)
226 sum += store[i];
227 return sum;
228}
229
230inline int64_t reduce_sum(Vec4q v) {
231 int64_t sum = 0;
232 int64_t store[4];
233 v.store(&(store[0]));
234 for (int i = 0; i < 4; i++)
235 sum += store[i];
236 return sum;
237}
238
239inline int64_t reduce_sum(Vec8q v) {
240 int64_t sum = 0;
241 int64_t store[4];
242 v.store(&(store[0]));
243 for (int i = 0; i < 8; i++)
244 sum += store[i];
245 return sum;
246}
247
248inline double reduce_prod(Vec4d v) {
249 double sum = 1;
250 double store[4];
251 v.store(&(store[0]));
252 for (int i = 0; i < 4; i++)
253 sum *= store[i];
254 return sum;
255}
256
257inline double reduce_prod(Vec8f v) {
258 double sum = 0;
259 float store[8];
260 v.store(&(store[0]));
261 for (int i = 0; i < 8; i++)
262 sum *= store[i];
263 return sum;
264}
265
266inline double reduce_prod(Vec8i v) {
267 double sum = 0;
268 int store[8];
269 v.store(&(store[0]));
270 for (int i = 0; i < 8; i++)
271 sum *= store[i];
272 return sum;
273}
274
275inline double reduce_prod(Vec8d v) {
276 double sum = 1;
277 double store[8];
278 v.store(&(store[0]));
279 for (int i = 0; i < 8; i++)
280 sum *= store[i];
281 return sum;
282}
283
284inline double reduce_prod(Vec16f v) {
285 double sum = 0;
286 float store[16];
287 v.store(&(store[0]));
288 for (int i = 0; i < 16; i++)
289 sum *= store[i];
290 return sum;
291}
292
293inline double reduce_prod(Vec16i v) {
294 double sum = 0;
295 int store[16];
296 v.store(&(store[0]));
297 for (int i = 0; i < 16; i++)
298 sum *= store[i];
299 return sum;
300}
301
302inline double reduce_prod(Vec4q v) {
303 double sum = 0;
304 int64_t store[4];
305 v.store(&(store[0]));
306 for (int i = 0; i < 4; i++)
307 sum *= store[i];
308 return sum;
309}
310
311inline double reduce_prod(Vec8q v) {
312 double sum = 0;
313 int64_t store[8];
314 v.store(&(store[0]));
315 for (int i = 0; i < 8; i++)
316 sum *= store[i];
317 return sum;
318}
319
320// Return the
321template <typename base_t, typename vector_t, typename T, typename vecT>
322T reduce_sum_in_vector(const vecT &vt) {
323 constexpr int nvec = sizeof(vecT) / sizeof(vector_t);
324 static_assert(nvec == sizeof(T) / sizeof(base_t),
325 "Mismatch in vectorized type sizes");
326 T res;
327 auto *vptr = (const vector_t *)(&vt);
328 base_t *bptr = (base_t *)(&res);
329 for (int i = 0; i < nvec; i++) {
330 bptr[i] = reduce_sum(vptr[i]);
331 }
332 return res;
333}
334
335/// If vector elements are implemented in the c++ code,
336/// reductions tead to problematic beo base variables need to be supported.
337/// Will this lhavior?
338// template <typename Vec> inline double &operator+=(double &lhs, const Vec rhs) {
339// lhs += reduce_prod(rhs);
340// return lhs;
341//}
342
343// template <typename Vec> inline float &operator+=(float &lhs, const Vec rhs) {
344// lhs += reduce_prod(rhs);
345// return lhs;
346//}
347
348// Define modulo operator for integer vector
349inline Vec16i operator%(const Vec16i &lhs, const int &rhs) {
350 Vec16i r;
351 int tvec1[16], tvec2[16];
352 lhs.store(&(tvec1[0]));
353 for (int i = 0; i < 16; i++)
354 tvec2[i] = tvec1[i] % rhs;
355 r.load(&(tvec2[0]));
356 return r;
357}
358
359inline Vec8i operator%(const Vec8i &lhs, const int &rhs) {
360 Vec8i r;
361 int tvec1[8], tvec2[8];
362 lhs.store(&(tvec1[0]));
363 for (int i = 0; i < 8; i++)
364 tvec2[i] = tvec1[i] % rhs;
365 r.load(&(tvec2[0]));
366 return r;
367}
368
369inline Vec4i operator%(const Vec4i &lhs, const int &rhs) {
370 Vec4i r;
371 int tvec1[4], tvec2[4];
372 lhs.store(&(tvec1[0]));
373 for (int i = 0; i < 4; i++)
374 tvec2[i] = tvec1[i] % rhs;
375 r.load(&(tvec2[0]));
376 return r;
377}
378
379// Random numbers
380// Since you cannot specialize by return type,
381// it needs to be a struct...
382
383#ifdef THESE_ARE_NOT_NEEDED_BUT_LEAVE_HERE
384
385#if VECTOR_SIZE == 32
386template <typename T>
387inline auto hila_random_vector() {
388 Vec8f r;
389 float tvec[8];
390 for (int i = 0; i < 8; i++) {
391 tvec[i] = mersenne();
392 }
393 r.load(&(tvec[0]));
394 return r;
395};
396
397template <>
398inline auto hila_random_vector<double>() {
399 Vec4d r;
400 double tvec[4];
401 for (int i = 0; i < 4; i++) {
402 tvec[i] = mersenne();
403 }
404 r.load(&(tvec[0]));
405 return r;
406};
407
408#elif VECTOR_SIZE == 64
409
410template <typename T>
411inline auto hila_random_vector() {
412 Vec16f r;
413 float tvec[16];
414 for (int i = 0; i < 16; i++) {
415 tvec[i] = mersenne();
416 }
417 r.load(&(tvec[0]));
418 return r;
419};
420
421template <>
422inline auto hila_random_vector<double>() {
423 Vec8d r;
424 double tvec[8];
425 for (int i = 0; i < 8; i++) {
426 tvec[i] = mersenne();
427 }
428 r.load(&(tvec[0]));
429 return r;
430};
431
432#endif // NOT NEEDED
433
434
435#endif // not HILAPP
436
437#endif // EVEN_SITES_FIRST
438
439#endif
hila::arithmetic_type< T > squarenorm(const Array< n, m, T > &rhs)
Return square norm of Array.
Definition array.h:957
This file defines all includes for HILA.
Invert diagonal + const. matrix using Sherman-Morrison formula.
Definition array.h:920