6#ifndef EVEN_SITES_FIRST
7static_assert(0 &&
"EVEN_SITES_FIRST must be defined for vectorized code");
13#include "hilapp_vector.h"
24#include "vectorclass/vectorclass.h"
25#include "vectorclass/vectormath_exp.h"
26#include "vectorclass/vectormath_trig.h"
27#include "vectorclass/vectormath_hyp.h"
40inline void synchronize_threads() {}
48 : std::integral_constant<
49 bool, std::is_same<T, Vec4d>::value || std::is_same<T, Vec4q>::value ||
50 std::is_same<T, Vec8f>::value || std::is_same<T, Vec8i>::value ||
51 std::is_same<T, Vec8d>::value || std::is_same<T, Vec8q>::value ||
52 std::is_same<T, Vec16f>::value || std::is_same<T, Vec16i>::value> {
56struct is_arithmetic : std::integral_constant<bool, std::is_arithmetic<T>::value ||
57 is_avx_vector<T>::value> {};
60struct avx_vector_type_info {
62 static constexpr int size =
sizeof(T);
63 static constexpr int elements = 1;
67struct avx_vector_type_info<Vec4d> {
69 static constexpr int size = 4 *
sizeof(double);
70 static constexpr int elements = 4;
73struct avx_vector_type_info<Vec4q> {
75 static constexpr int size = 4 *
sizeof(int64_t);
76 static constexpr int elements = 4;
79struct avx_vector_type_info<Vec8i> {
81 static constexpr int size = 8 *
sizeof(int);
82 static constexpr int elements = 8;
85struct avx_vector_type_info<Vec8f> {
87 static constexpr int size = 8 *
sizeof(float);
88 static constexpr int elements = 8;
91struct avx_vector_type_info<Vec8d> {
93 static constexpr int size = 8 *
sizeof(double);
94 static constexpr int elements = 8;
97struct avx_vector_type_info<Vec8q> {
99 static constexpr int size = 8 *
sizeof(int64_t);
100 static constexpr int elements = 8;
103struct avx_vector_type_info<Vec16f> {
105 static constexpr int size = 16 *
sizeof(float);
106 static constexpr int elements = 16;
109struct avx_vector_type_info<Vec16i> {
111 static constexpr int size = 16 *
sizeof(int);
112 static constexpr int elements = 16;
115template <
class T,
class U>
117 : std::integral_constant<
119 std::is_assignable<T, U>::value ||
120 (is_avx_vector<U>::value &&
121 ((!is_avx_vector<T>::value &&
122 std::is_assignable<T,
123 typename avx_vector_type_info<U>::type>::value) ||
124 (is_avx_vector<T>::value &&
125 (avx_vector_type_info<T>::size == avx_vector_type_info<U>::size) &&
126 (avx_vector_type_info<T>::elements ==
127 avx_vector_type_info<U>::elements))))> {};
131struct is_floating_point
132 : std::integral_constant<
134 std::is_floating_point<T>::value ||
135 std::is_floating_point<typename hila::avx_vector_type_info<T>::type>::value> {};
176inline double reduce_sum(Vec4d v) {
179 v.store(&(store[0]));
180 for (
int i = 0; i < 4; i++)
185inline double reduce_sum(Vec8f v) {
188 v.store(&(store[0]));
189 for (
int i = 0; i < 8; i++)
194inline int64_t reduce_sum(Vec8i v) {
197 v.store(&(store[0]));
198 for (
int i = 0; i < 8; i++)
203inline double reduce_sum(Vec8d v) {
206 v.store(&(store[0]));
207 for (
int i = 0; i < 8; i++)
212inline double reduce_sum(Vec16f v) {
215 v.store(&(store[0]));
216 for (
int i = 0; i < 16; i++)
221inline int64_t reduce_sum(Vec16i v) {
224 v.store(&(store[0]));
225 for (
int i = 0; i < 16; i++)
230inline int64_t reduce_sum(Vec4q v) {
233 v.store(&(store[0]));
234 for (
int i = 0; i < 4; i++)
239inline int64_t reduce_sum(Vec8q v) {
242 v.store(&(store[0]));
243 for (
int i = 0; i < 8; i++)
248inline double reduce_prod(Vec4d v) {
251 v.store(&(store[0]));
252 for (
int i = 0; i < 4; i++)
257inline double reduce_prod(Vec8f v) {
260 v.store(&(store[0]));
261 for (
int i = 0; i < 8; i++)
266inline double reduce_prod(Vec8i v) {
269 v.store(&(store[0]));
270 for (
int i = 0; i < 8; i++)
275inline double reduce_prod(Vec8d v) {
278 v.store(&(store[0]));
279 for (
int i = 0; i < 8; i++)
284inline double reduce_prod(Vec16f v) {
287 v.store(&(store[0]));
288 for (
int i = 0; i < 16; i++)
293inline double reduce_prod(Vec16i v) {
296 v.store(&(store[0]));
297 for (
int i = 0; i < 16; i++)
302inline double reduce_prod(Vec4q v) {
305 v.store(&(store[0]));
306 for (
int i = 0; i < 4; i++)
311inline double reduce_prod(Vec8q v) {
314 v.store(&(store[0]));
315 for (
int i = 0; i < 8; i++)
321template <
typename base_t,
typename vector_t,
typename T,
typename vecT>
322T reduce_sum_in_vector(
const vecT &vt) {
323 constexpr int nvec =
sizeof(vecT) /
sizeof(vector_t);
324 static_assert(nvec ==
sizeof(T) /
sizeof(base_t),
325 "Mismatch in vectorized type sizes");
327 auto *vptr = (
const vector_t *)(&vt);
328 base_t *bptr = (base_t *)(&res);
329 for (
int i = 0; i < nvec; i++) {
330 bptr[i] = reduce_sum(vptr[i]);
349inline Vec16i operator%(
const Vec16i &lhs,
const int &rhs) {
351 int tvec1[16], tvec2[16];
352 lhs.store(&(tvec1[0]));
353 for (
int i = 0; i < 16; i++)
354 tvec2[i] = tvec1[i] % rhs;
359inline Vec8i operator%(
const Vec8i &lhs,
const int &rhs) {
361 int tvec1[8], tvec2[8];
362 lhs.store(&(tvec1[0]));
363 for (
int i = 0; i < 8; i++)
364 tvec2[i] = tvec1[i] % rhs;
369inline Vec4i operator%(
const Vec4i &lhs,
const int &rhs) {
371 int tvec1[4], tvec2[4];
372 lhs.store(&(tvec1[0]));
373 for (
int i = 0; i < 4; i++)
374 tvec2[i] = tvec1[i] % rhs;
383#ifdef THESE_ARE_NOT_NEEDED_BUT_LEAVE_HERE
387inline auto hila_random_vector() {
390 for (
int i = 0; i < 8; i++) {
391 tvec[i] = mersenne();
398inline auto hila_random_vector<double>() {
401 for (
int i = 0; i < 4; i++) {
402 tvec[i] = mersenne();
408#elif VECTOR_SIZE == 64
411inline auto hila_random_vector() {
414 for (
int i = 0; i < 16; i++) {
415 tvec[i] = mersenne();
422inline auto hila_random_vector<double>() {
425 for (
int i = 0; i < 8; i++) {
426 tvec[i] = mersenne();
hila::arithmetic_type< T > squarenorm(const Array< n, m, T > &rhs)
Return square norm of Array.
This file defines all includes for HILA.
Implement hila::swap for gauge fields.