1#ifndef ReductionVector_H_
2#define ReductionVector_H_
50 bool comm_is_on =
false;
53 bool is_allreduce_ =
true;
54 bool is_nonblocking_ =
false;
55 bool is_delayed_ =
false;
57 bool delay_is_on =
false;
58 bool is_delayed_sum =
true;
62 void reduce_operation(MPI_Op operation) {
71 dtype = get_MPI_number_type<T>();
73 if (dtype == MPI_BYTE) {
74 assert(
sizeof(T) < 0 &&
"Unknown number_type in vector reduction");
77 reduction_timer.start();
79 if (is_nonblocking_) {
80 MPI_Iallreduce(MPI_IN_PLACE, (
void *)val.data(),
81 sizeof(T) * val.size() /
sizeof(hila::arithmetic_type<T>),
82 dtype, operation, lattice.mpi_comm_lat, &request);
84 MPI_Allreduce(MPI_IN_PLACE, (
void *)val.data(),
85 sizeof(T) * val.size() /
sizeof(hila::arithmetic_type<T>),
86 dtype, operation, lattice.mpi_comm_lat);
90 if (is_nonblocking_) {
91 MPI_Ireduce(MPI_IN_PLACE, (
void *)val.data(),
92 sizeof(T) * val.size() /
sizeof(hila::arithmetic_type<T>),
93 dtype, operation, 0, lattice.mpi_comm_lat, &request);
95 MPI_Reduce(MPI_IN_PLACE, (
void *)val.data(),
96 sizeof(T) * val.size() /
sizeof(hila::arithmetic_type<T>),
97 dtype, operation, 0, lattice.mpi_comm_lat);
100 if (is_nonblocking_) {
101 MPI_Ireduce((
void *)val.data(), (
void *)val.data(),
102 sizeof(T) * val.size() /
sizeof(hila::arithmetic_type<T>),
103 dtype, operation, 0, lattice.mpi_comm_lat, &request);
105 MPI_Reduce((
void *)val.data(), (
void *)val.data(),
106 sizeof(T) * val.size() /
sizeof(hila::arithmetic_type<T>),
107 dtype, operation, 0, lattice.mpi_comm_lat);
111 reduction_timer.stop();
116 using iterator =
typename std::vector<T>::iterator;
117 using const_iterator =
typename std::vector<T>::const_iterator;
125 const_iterator begin()
const {
128 const_iterator end()
const {
141 MPI_Cancel(&request);
159 bool is_allreduce() {
160 return is_allreduce_;
168 bool is_nonblocking() {
169 return is_nonblocking_;
183 template <typename S, std::enable_if_t<std::is_assignable<T &, S>::value,
int> = 0>
220 if (delay_is_on && is_delayed_sum ==
false) {
221 assert(0 &&
"Cannot mix sum and product reductions!");
225 reduce_operation(MPI_SUM);
235 static_assert(std::is_same<T, int>::value || std::is_same<T, long>::value ||
236 std::is_same<T, float>::value ||
237 std::is_same<T, double>::value ||
238 std::is_same<T, long double>::value,
239 "Type not implemented for product reduction");
242 if (delay_is_on && is_delayed_sum ==
true) {
243 assert(0 &&
"Cannot mix sum and product reductions!");
247 reduce_operation(MPI_PROD);
255 reduction_wait_timer.start();
257 MPI_Wait(&request, &status);
258 reduction_wait_timer.stop();
269 reduce_operation(MPI_SUM);
271 reduce_operation(MPI_PROD);
286 std::vector<T> vector() {
296 void resize(
size_t count) {
299 void resize(
size_t count,
const T &v) {
300 val.resize(count, v);
307 void push_back(
const T &v) {
T & operator[](const int i)
And access operators - these do in practice everything already!
size_t size() const
methods from std::vector:
ReductionVector()=default
ReductionVector & delayed(bool b=true)
deferred(bool) turns deferred on or off. By default turns on.
void start_reduce()
For delayed reduction, reduce starts or completes the reduction operation.
void reduce()
Complete non-blocking or delayed reduction.
void init_product()
Init is to be called before every site loop.
~ReductionVector()
Destructor cleans up communications if they are in progress.
void operator=(const S &rhs)
ReductionVector & nonblocking(bool b=true)
nonblocking(bool) turns allreduce on or off. By default on.
void operator=(std::nullptr_t np)
Assignment from 0.
void init_sum()
Init is to be called before every site loop.
ReductionVector & allreduce(bool b=true)
allreduce(bool) turns allreduce on or off. By default on.
void wait()
Wait for MPI to complete, if it is currently going on.
T * data()
data() returns ptr to the raw storage
int myrank()
rank of this node