5#if !defined(CUDA) && !defined(HIP)
13#include "plumbing/timing.h"
20#define WRK_GATHER_TAG 42
21#define WRK_SCATTER_TAG 43
32 int n =
pmod((*this).e(d), lattice.size(d));
33 if (n > lattice.size(d) / 2)
36 k[d] = n * 2.0 * M_PI / lattice.size(d);
49 unsigned column_offset;
50 unsigned column_number;
56extern std::vector<pencil_struct> hila_pencil_comms[NDIM];
71template <
typename T,
typename cmplx_t>
74 cmplx_t c[
sizeof(T) /
sizeof(cmplx_t)];
80template <
typename cmplx_t>
95 std::vector<cmplx_t *> rec_p;
96 std::vector<int> rec_size;
100 extern size_t pencil_recv_buf_size[NDIM];
102 elements = _elements;
104 only_reflect = _reflect;
106 local_volume = lattice.mynode.volume();
113 if (pencil_recv_buf_size[d] > buf_size)
114 buf_size = pencil_recv_buf_size[d];
116 if (buf_size < local_volume)
117 buf_size = local_volume;
120 send_buf = (cmplx_t *)d_malloc(buf_size *
sizeof(cmplx_t) * elements);
121 receive_buf = (cmplx_t *)d_malloc(buf_size *
sizeof(cmplx_t) * elements);
152 rec_p.resize(hila_pencil_comms[dir].size());
153 rec_size.resize(hila_pencil_comms[dir].size());
155 cmplx_t *p = receive_buf;
157 for (pencil_struct &fn : hila_pencil_comms[dir]) {
163 rec_size[i] = fn.size_to_dir;
164 p += fn.recv_buf_size * elements;
170 rec_p[i] = send_buf + fn.column_offset * elements;
171 rec_size[i] = fn.size_to_dir;
181 template <
typename T>
185 pencil_collect_timer.start();
187 constexpr int elements =
sizeof(T) /
sizeof(cmplx_t);
193 const size_t elem_offset =
196 cmplx_t *sb = send_buf;
199#pragma hila novector direct_access(sb)
202 T_union<T, cmplx_t> v;
204 int off = offset.dot(X.coordinates() - nmin);
205 for (
int i = 0; i < elements; i++) {
206 sb[off + i * elem_offset] = v.c[i];
210 pencil_collect_timer.stop();
215 template <
typename T>
218 constexpr int elements =
sizeof(T) /
sizeof(cmplx_t);
221 pencil_save_timer.start();
228 cmplx_t *rb = receive_buf;
231#pragma hila novector direct_access(rb)
234 T_union<T, cmplx_t> v;
236 size_t off = offset.dot(X.coordinates() - nmin);
237 for (
int i = 0; i < elements; i++) {
238 v.c[i] = rb[off + i * elem_offset];
243 pencil_save_timer.stop();
254 pencil_reshuffle_timer.start();
263 cmplx_t *sb = send_buf;
264 cmplx_t *rb = receive_buf;
266#pragma hila novector direct_access(sb, rb)
269 size_t off_in = offset_in.dot(v);
270 size_t off_out = offset_out.dot(v);
271 for (
int e = 0; e < elem; e++) {
272 sb[off_out + e * e_offset_out] = rb[off_in + e * e_offset_in];
276 pencil_reshuffle_timer.stop();
283 void swap_buffers() {
284 std::swap(send_buf, receive_buf);
294 template <
typename T>
298 assert(lattice.id() == input.
fs->lattice_id &&
"Default lattice mismatch in fft");
303 bool first_dir =
true;
307 if (directions[dir]) {
341 result.mark_changed(
ALL);
346void FFT_delete_plans();
353#include "plumbing/fft_fftw_transform.h"
355#elif defined(CUDA) || defined(HIP)
357#include "plumbing/backend_gpu/fft_gpu_transform.h"
379 static_assert(hila::contains_complex<T>::value,
380 "FFT_field argument fields must contain complex type");
384 constexpr size_t elements =
sizeof(T) /
sizeof(cmplx_t);
454 return FFT(cv, fftdir);
470 "FFT_real_to_complex can be applied only to Field<real-type> variable");
474 return cf.FFT(fftdir);
508 if (cv[d] > 0 && cv[d] < lattice.size(d) / 2)
510 if (cv[d] > lattice.size(d) / 2)
521 static_assert(hila::is_complex<T>::value,
522 "FFT_complex_to_real can be applied only to Field<Complex<>> type variable");
525 auto rf = this->reflect();
531 }
else if (type == -1) {
532 rf[X] = rf[X].conj();
534 rf[X].real() = (*this)[X].real();
549 onsites(
ALL) res[X] = rf[X].
real();
562 constexpr int elements = 1;
566 hila_fft<T> refl(elements, fft_direction::forward,
true);
Array< n, m, hila::arithmetic_type< T > > imag(const Array< n, m, T > &arg)
Return imaginary part of Array.
hila::arithmetic_type< T > squarenorm(const Array< n, m, T > &rhs)
Return square norm of Array.
Array< n, m, hila::arithmetic_type< T > > real(const Array< n, m, T > &arg)
Return real part of Array.
Vector< 4, double > convert_to_k() const
Convert momentum space CoordinateVector to wave number k, where -pi/2 < k_i <= pi_2 Utility function ...
The field class implements the standard methods for accessing Fields. Hilapp replaces the parity acce...
Field< A > real() const
Returns real part of Field.
field_struct *__restrict__ fs
Field::fs holds all of the field content in Field::field_struct.
Field< Complex< hila::arithmetic_type< T > > > FFT_real_to_complex(fft_direction fdir=fft_direction::forward) const
void check_alloc()
Allocate Field if it is not already allocated.
const auto & fill(const S rhs)
Matrix fill.
Matrix class which defines matrix operations.
void gather_data()
send column data to nodes
void setup_direction(Direction _dir)
Initialize fft to Direction dir.
void collect_data(const Field< T > &f)
void scatter_data()
inverse of gather_data
void reshuffle_data(Direction prev_dir)
void transform()
transform does the actual fft.
void save_result(Field< T > &f)
Inverse of the fft_collect_data: write fft'd data from receive_buf to field.
void full_transform(const Field< T > &input, Field< T > &result, const CoordinateVector &directions)
Do the transform itself (fft or reflect only)
Definition of Complex types.
This header file defines:
int pmod(const int a, const int b)
#define foralldir(d)
Macro to loop over (all) Direction(s)
Direction
Enumerator for direction that assigns integer to direction to be interpreted as unit vector.
constexpr Parity ALL
bit pattern: 011
This file defines all includes for HILA.
fft_direction
define a class for FFT direction
void init_pencil_direction(Direction d)
Initialize fft direction - defined in fft.cpp.
size_t pencil_get_buffer_offsets(const Direction dir, const size_t elements, CoordinateVector &offset, CoordinateVector &nmin)
void FFT_field(const Field< T > &input, Field< T > &result, const CoordinateVector &directions, fft_direction fftdir=fft_direction::forward)
int FFT_complex_to_real_loc(const CoordinateVector &cv)
This files containts definitions for the Field class and the classes required to define it such as fi...
int myrank()
rank of this node