2019-05-12 21:12:07 +02:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <array>
|
|
|
|
|
|
|
|
#include <general.hh>
|
|
|
|
#include <grid_fft.hh>
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename data_t, typename derived_t>
|
2019-05-20 17:23:52 +02:00
|
|
|
class BaseConvolver
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
|
|
|
protected:
|
2019-10-03 00:22:02 +02:00
|
|
|
std::array<size_t, 3> np_;
|
|
|
|
std::array<real_t, 3> length_;
|
|
|
|
|
2019-05-12 21:12:07 +02:00
|
|
|
public:
|
2019-10-03 00:22:02 +02:00
|
|
|
BaseConvolver(const std::array<size_t, 3> &N, const std::array<real_t, 3> &L)
|
|
|
|
: np_(N), length_(L) {}
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-10-10 21:47:02 +02:00
|
|
|
BaseConvolver( const BaseConvolver& ) = delete;
|
|
|
|
|
2019-05-20 17:23:52 +02:00
|
|
|
virtual ~BaseConvolver() {}
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-05-20 17:23:52 +02:00
|
|
|
// implements convolution of two Fourier-space fields
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename kfunc1, typename kfunc2, typename opp>
|
|
|
|
void convolve2(kfunc1 kf1, kfunc2 kf2, opp op) {}
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-05-20 17:23:52 +02:00
|
|
|
// implements convolution of three Fourier-space fields
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename kfunc1, typename kfunc2, typename kfunc3, typename opp>
|
|
|
|
void convolve3(kfunc1 kf1, kfunc2 kf2, kfunc3 kf3, opp op) {}
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-05-20 17:23:52 +02:00
|
|
|
public:
|
2019-10-10 21:47:02 +02:00
|
|
|
|
|
|
|
template <typename opp>
|
|
|
|
void convolve_Gradients(Grid_FFT<data_t> &inl, const std::array<int, 1> &d1l,
|
|
|
|
Grid_FFT<data_t> &inr, const std::array<int, 1> &d1r,
|
|
|
|
opp output_op)
|
|
|
|
{
|
|
|
|
// transform to FS in case fields are not
|
|
|
|
inl.FourierTransformForward();
|
|
|
|
inr.FourierTransformForward();
|
|
|
|
// perform convolution of Hessians
|
|
|
|
static_cast<derived_t &>(*this).convolve2(
|
2019-10-15 19:48:38 +02:00
|
|
|
[&inl,&d1l](size_t i, size_t j, size_t k) -> ccomplex_t {
|
|
|
|
auto grad1 = inl.gradient(d1l[0],{i,j,k});
|
|
|
|
return grad1*inl.kelem(i, j, k);
|
2019-10-10 21:47:02 +02:00
|
|
|
},
|
2019-10-15 19:48:38 +02:00
|
|
|
[&inr,&d1r](size_t i, size_t j, size_t k) -> ccomplex_t {
|
|
|
|
auto grad1 = inr.gradient(d1r[0],{i,j,k});
|
|
|
|
return grad1*inr.kelem(i, j, k);
|
2019-10-10 21:47:02 +02:00
|
|
|
},
|
|
|
|
output_op);
|
|
|
|
}
|
|
|
|
|
2019-10-15 19:48:38 +02:00
|
|
|
// template <typename opp>
|
|
|
|
// void convolve_Gradient_and_Hessian(Grid_FFT<data_t> &inl, const std::array<int, 1> &d1l,
|
|
|
|
// Grid_FFT<data_t> &inr, const std::array<int, 2> &d2r,
|
|
|
|
// opp output_op)
|
|
|
|
// {
|
|
|
|
// // transform to FS in case fields are not
|
|
|
|
// inl.FourierTransformForward();
|
|
|
|
// inr.FourierTransformForward();
|
|
|
|
// // perform convolution of Hessians
|
|
|
|
// static_cast<derived_t &>(*this).convolve2(
|
|
|
|
// [&](size_t i, size_t j, size_t k) -> ccomplex_t {
|
|
|
|
// auto kk = inl.template get_k<real_t>(i, j, k);
|
|
|
|
// return ccomplex_t(0.0, -kk[d1l[0]]) * inl.kelem(i, j, k);
|
|
|
|
// },
|
|
|
|
// [&](size_t i, size_t j, size_t k) -> ccomplex_t {
|
|
|
|
// auto kk = inr.template get_k<real_t>(i, j, k);
|
|
|
|
// return -kk[d2r[0]] * kk[d2r[1]] * inr.kelem(i, j, k);
|
|
|
|
// },
|
|
|
|
// output_op);
|
|
|
|
// }
|
2019-05-27 22:41:39 +02:00
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename opp>
|
|
|
|
void convolve_Hessians(Grid_FFT<data_t> &inl, const std::array<int, 2> &d2l,
|
|
|
|
Grid_FFT<data_t> &inr, const std::array<int, 2> &d2r,
|
|
|
|
opp output_op)
|
|
|
|
{
|
2019-05-12 21:12:07 +02:00
|
|
|
// transform to FS in case fields are not
|
|
|
|
inl.FourierTransformForward();
|
|
|
|
inr.FourierTransformForward();
|
|
|
|
// perform convolution of Hessians
|
2019-10-03 00:22:02 +02:00
|
|
|
static_cast<derived_t &>(*this).convolve2(
|
2019-10-15 19:48:38 +02:00
|
|
|
[&inl,&d2l](size_t i, size_t j, size_t k) -> ccomplex_t {
|
|
|
|
auto grad1 = inl.gradient(d2l[0],{i,j,k});
|
|
|
|
auto grad2 = inl.gradient(d2l[1],{i,j,k});
|
|
|
|
return grad1*grad2*inl.kelem(i, j, k);
|
2019-10-03 00:22:02 +02:00
|
|
|
},
|
2019-10-15 19:48:38 +02:00
|
|
|
[&inr,&d2r](size_t i, size_t j, size_t k) -> ccomplex_t {
|
|
|
|
auto grad1 = inr.gradient(d2r[0],{i,j,k});
|
|
|
|
auto grad2 = inr.gradient(d2r[1],{i,j,k});
|
|
|
|
return grad1*grad2*inr.kelem(i, j, k);
|
2019-05-12 21:12:07 +02:00
|
|
|
},
|
2019-10-03 00:22:02 +02:00
|
|
|
output_op);
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename opp>
|
|
|
|
void convolve_Hessians(Grid_FFT<data_t> &inl, const std::array<int, 2> &d2l,
|
|
|
|
Grid_FFT<data_t> &inm, const std::array<int, 2> &d2m,
|
|
|
|
Grid_FFT<data_t> &inr, const std::array<int, 2> &d2r,
|
|
|
|
opp output_op)
|
2019-05-14 12:29:27 +02:00
|
|
|
{
|
|
|
|
// transform to FS in case fields are not
|
|
|
|
inl.FourierTransformForward();
|
|
|
|
inm.FourierTransformForward();
|
|
|
|
inr.FourierTransformForward();
|
|
|
|
// perform convolution of Hessians
|
2019-10-03 00:22:02 +02:00
|
|
|
static_cast<derived_t &>(*this).convolve3(
|
|
|
|
[&inl, &d2l](size_t i, size_t j, size_t k) -> ccomplex_t {
|
2019-10-15 19:48:38 +02:00
|
|
|
auto grad1 = inl.gradient(d2l[0],{i,j,k});
|
|
|
|
auto grad2 = inl.gradient(d2l[1],{i,j,k});
|
|
|
|
return grad1*grad2*inl.kelem(i, j, k);
|
2019-05-14 12:29:27 +02:00
|
|
|
},
|
2019-10-03 00:22:02 +02:00
|
|
|
[&inm, &d2m](size_t i, size_t j, size_t k) -> ccomplex_t {
|
2019-10-15 19:48:38 +02:00
|
|
|
auto grad1 = inm.gradient(d2m[0],{i,j,k});
|
|
|
|
auto grad2 = inm.gradient(d2m[1],{i,j,k});
|
|
|
|
return grad1*grad2*inm.kelem(i, j, k);
|
2019-05-14 12:29:27 +02:00
|
|
|
},
|
2019-10-03 00:22:02 +02:00
|
|
|
[&inr, &d2r](size_t i, size_t j, size_t k) -> ccomplex_t {
|
2019-10-15 19:48:38 +02:00
|
|
|
auto grad1 = inr.gradient(d2r[0],{i,j,k});
|
|
|
|
auto grad2 = inr.gradient(d2r[1],{i,j,k});
|
|
|
|
return grad1*grad2*inr.kelem(i, j, k);
|
2019-10-03 00:22:02 +02:00
|
|
|
},
|
|
|
|
output_op);
|
2019-05-14 12:29:27 +02:00
|
|
|
}
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename opp>
|
|
|
|
void convolve_SumOfHessians(Grid_FFT<data_t> &inl, const std::array<int, 2> &d2l,
|
|
|
|
Grid_FFT<data_t> &inr, const std::array<int, 2> &d2r1, const std::array<int, 2> &d2r2,
|
|
|
|
opp output_op)
|
2019-05-23 14:53:11 +02:00
|
|
|
{
|
2019-05-12 21:12:07 +02:00
|
|
|
// transform to FS in case fields are not
|
|
|
|
inl.FourierTransformForward();
|
|
|
|
inr.FourierTransformForward();
|
|
|
|
// perform convolution of Hessians
|
2019-10-03 00:22:02 +02:00
|
|
|
static_cast<derived_t &>(*this).convolve2(
|
|
|
|
[&inl, &d2l](size_t i, size_t j, size_t k) -> ccomplex_t {
|
2019-10-15 19:48:38 +02:00
|
|
|
auto grad1 = inl.gradient(d2l[0],{i,j,k});
|
|
|
|
auto grad2 = inl.gradient(d2l[1],{i,j,k});
|
|
|
|
return grad1*grad2*inl.kelem(i, j, k);
|
2019-10-03 00:22:02 +02:00
|
|
|
},
|
|
|
|
[&inr, &d2r1, &d2r2](size_t i, size_t j, size_t k) -> ccomplex_t {
|
2019-10-15 19:48:38 +02:00
|
|
|
auto grad11 = inr.gradient(d2r1[0],{i,j,k});
|
|
|
|
auto grad12 = inr.gradient(d2r1[1],{i,j,k});
|
|
|
|
auto grad21 = inr.gradient(d2r2[0],{i,j,k});
|
|
|
|
auto grad22 = inr.gradient(d2r2[1],{i,j,k});
|
|
|
|
return (grad11*grad12+grad21*grad22)*inr.kelem(i, j, k);
|
2019-05-12 21:12:07 +02:00
|
|
|
},
|
2019-10-03 00:22:02 +02:00
|
|
|
output_op);
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
2019-05-21 22:26:15 +02:00
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename opp>
|
|
|
|
void convolve_DifferenceOfHessians(Grid_FFT<data_t> &inl, const std::array<int, 2> &d2l,
|
|
|
|
Grid_FFT<data_t> &inr, const std::array<int, 2> &d2r1, const std::array<int, 2> &d2r2,
|
|
|
|
opp output_op)
|
2019-05-23 14:53:11 +02:00
|
|
|
{
|
2019-05-21 22:26:15 +02:00
|
|
|
// transform to FS in case fields are not
|
|
|
|
inl.FourierTransformForward();
|
|
|
|
inr.FourierTransformForward();
|
|
|
|
// perform convolution of Hessians
|
2019-10-03 00:22:02 +02:00
|
|
|
static_cast<derived_t &>(*this).convolve2(
|
|
|
|
[&inl, &d2l](size_t i, size_t j, size_t k) -> ccomplex_t {
|
2019-10-15 19:48:38 +02:00
|
|
|
auto grad1 = inl.gradient(d2l[0],{i,j,k});
|
|
|
|
auto grad2 = inl.gradient(d2l[1],{i,j,k});
|
|
|
|
return grad1*grad2*inl.kelem(i, j, k);
|
2019-10-03 00:22:02 +02:00
|
|
|
},
|
|
|
|
[&inr, &d2r1, &d2r2](size_t i, size_t j, size_t k) -> ccomplex_t {
|
2019-10-15 19:48:38 +02:00
|
|
|
auto grad11 = inr.gradient(d2r1[0],{i,j,k});
|
|
|
|
auto grad12 = inr.gradient(d2r1[1],{i,j,k});
|
|
|
|
auto grad21 = inr.gradient(d2r2[0],{i,j,k});
|
|
|
|
auto grad22 = inr.gradient(d2r2[1],{i,j,k});
|
|
|
|
return (grad11*grad12-grad21*grad22)*inr.kelem(i, j, k);
|
2019-05-21 22:26:15 +02:00
|
|
|
},
|
2019-10-03 00:22:02 +02:00
|
|
|
output_op);
|
2019-05-21 22:26:15 +02:00
|
|
|
}
|
2019-05-20 17:23:52 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
//! naive convolution class, disrespecting aliasing
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename data_t>
|
2019-05-20 17:23:52 +02:00
|
|
|
class NaiveConvolver : public BaseConvolver<data_t, NaiveConvolver<data_t>>
|
|
|
|
{
|
|
|
|
protected:
|
|
|
|
Grid_FFT<data_t> *fbuf1_, *fbuf2_;
|
|
|
|
|
|
|
|
using BaseConvolver<data_t, NaiveConvolver<data_t>>::np_;
|
|
|
|
using BaseConvolver<data_t, NaiveConvolver<data_t>>::length_;
|
|
|
|
|
|
|
|
public:
|
2019-10-03 00:22:02 +02:00
|
|
|
NaiveConvolver(const std::array<size_t, 3> &N, const std::array<real_t, 3> &L)
|
|
|
|
: BaseConvolver<data_t, NaiveConvolver<data_t>>(N, L)
|
2019-05-20 17:23:52 +02:00
|
|
|
{
|
|
|
|
fbuf1_ = new Grid_FFT<data_t>(N, length_, kspace_id);
|
|
|
|
fbuf2_ = new Grid_FFT<data_t>(N, length_, kspace_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
~NaiveConvolver()
|
|
|
|
{
|
|
|
|
delete fbuf1_;
|
|
|
|
delete fbuf2_;
|
|
|
|
}
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename kfunc1, typename kfunc2, typename opp>
|
|
|
|
void convolve2(kfunc1 kf1, kfunc2 kf2, opp output_op)
|
2019-05-20 17:23:52 +02:00
|
|
|
{
|
|
|
|
//... prepare data 1
|
|
|
|
fbuf1_->FourierTransformForward(false);
|
2019-10-03 00:22:02 +02:00
|
|
|
this->copy_in(kf1, *fbuf1_);
|
|
|
|
|
2019-05-20 17:23:52 +02:00
|
|
|
//... prepare data 2
|
|
|
|
fbuf2_->FourierTransformForward(false);
|
2019-10-03 00:22:02 +02:00
|
|
|
this->copy_in(kf2, *fbuf2_);
|
2019-05-20 17:23:52 +02:00
|
|
|
|
|
|
|
//... convolve
|
|
|
|
fbuf1_->FourierTransformBackward();
|
|
|
|
fbuf2_->FourierTransformBackward();
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < fbuf1_->ntot_; ++i)
|
|
|
|
{
|
2019-05-20 17:23:52 +02:00
|
|
|
(*fbuf2_).relem(i) *= (*fbuf1_).relem(i);
|
|
|
|
}
|
|
|
|
fbuf2_->FourierTransformForward();
|
2019-10-09 17:25:15 +02:00
|
|
|
// fbuf2_->dealias();
|
2019-10-03 00:22:02 +02:00
|
|
|
//... copy data back
|
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < fbuf2_->ntot_; ++i)
|
|
|
|
{
|
|
|
|
output_op(i, (*fbuf2_)[i]);
|
2019-05-23 14:53:11 +02:00
|
|
|
}
|
2019-10-09 17:25:15 +02:00
|
|
|
|
|
|
|
|
2019-05-20 17:23:52 +02:00
|
|
|
}
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename kfunc1, typename kfunc2, typename kfunc3, typename opp>
|
|
|
|
void convolve3(kfunc1 kf1, kfunc2 kf2, kfunc3 kf3, opp output_op)
|
2019-05-20 17:23:52 +02:00
|
|
|
{
|
|
|
|
//... prepare data 1
|
|
|
|
fbuf1_->FourierTransformForward(false);
|
2019-10-03 00:22:02 +02:00
|
|
|
this->copy_in(kf1, *fbuf1_);
|
|
|
|
|
2019-05-20 17:23:52 +02:00
|
|
|
//... prepare data 2
|
|
|
|
fbuf2_->FourierTransformForward(false);
|
2019-10-03 00:22:02 +02:00
|
|
|
this->copy_in(kf2, *fbuf2_);
|
2019-05-20 17:23:52 +02:00
|
|
|
|
|
|
|
//... convolve
|
|
|
|
fbuf1_->FourierTransformBackward();
|
|
|
|
fbuf2_->FourierTransformBackward();
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < fbuf1_->ntot_; ++i)
|
|
|
|
{
|
2019-05-20 17:23:52 +02:00
|
|
|
(*fbuf2_).relem(i) *= (*fbuf1_).relem(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
//... prepare data 2
|
|
|
|
fbuf1_->FourierTransformForward(false);
|
2019-10-03 00:22:02 +02:00
|
|
|
this->copy_in(kf3, *fbuf1_);
|
2019-05-20 17:23:52 +02:00
|
|
|
|
|
|
|
//... convolve
|
|
|
|
fbuf1_->FourierTransformBackward();
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < fbuf1_->ntot_; ++i)
|
|
|
|
{
|
2019-05-20 17:23:52 +02:00
|
|
|
(*fbuf2_).relem(i) *= (*fbuf1_).relem(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
fbuf2_->FourierTransformForward();
|
2019-10-03 00:22:02 +02:00
|
|
|
//... copy data back
|
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < fbuf2_->ntot_; ++i)
|
|
|
|
{
|
|
|
|
output_op(i, (*fbuf2_)[i]);
|
2019-05-23 14:53:11 +02:00
|
|
|
}
|
2019-05-20 17:23:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename kfunc>
|
|
|
|
void copy_in(kfunc kf, Grid_FFT<data_t> &g)
|
2019-05-20 17:23:52 +02:00
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < g.size(0); ++i)
|
|
|
|
{
|
|
|
|
for (size_t j = 0; j < g.size(1); ++j)
|
|
|
|
{
|
|
|
|
for (size_t k = 0; k < g.size(2); ++k)
|
|
|
|
{
|
2019-05-20 17:23:52 +02:00
|
|
|
g.kelem(i, j, k) = kf(i, j, k);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
//! convolution class, respecting Orszag's 3/2 rule
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename data_t>
|
2019-05-20 17:23:52 +02:00
|
|
|
class OrszagConvolver : public BaseConvolver<data_t, OrszagConvolver<data_t>>
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
Grid_FFT<data_t> *f1p_, *f2p_;
|
2019-10-02 20:57:40 +02:00
|
|
|
Grid_FFT<data_t> *fbuf_;
|
2019-05-20 17:23:52 +02:00
|
|
|
|
|
|
|
using BaseConvolver<data_t, OrszagConvolver<data_t>>::np_;
|
|
|
|
using BaseConvolver<data_t, OrszagConvolver<data_t>>::length_;
|
2019-10-03 00:22:02 +02:00
|
|
|
|
2019-05-20 17:23:52 +02:00
|
|
|
ccomplex_t *crecvbuf_;
|
|
|
|
real_t *recvbuf_;
|
|
|
|
size_t maxslicesz_;
|
|
|
|
std::vector<ptrdiff_t> offsets_, offsetsp_;
|
|
|
|
std::vector<size_t> sizes_, sizesp_;
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
int get_task(ptrdiff_t index, const std::vector<ptrdiff_t> &offsets, const std::vector<size_t> &sizes, const int ntasks)
|
2019-05-20 17:23:52 +02:00
|
|
|
{
|
|
|
|
int itask = 0;
|
2019-10-03 00:22:02 +02:00
|
|
|
while (itask < ntasks - 1 && offsets[itask + 1] <= index)
|
|
|
|
++itask;
|
2019-05-20 17:23:52 +02:00
|
|
|
return itask;
|
|
|
|
}
|
|
|
|
|
|
|
|
public:
|
2019-10-03 00:22:02 +02:00
|
|
|
OrszagConvolver(const std::array<size_t, 3> &N, const std::array<real_t, 3> &L)
|
|
|
|
: BaseConvolver<data_t, OrszagConvolver<data_t>>({3 * N[0] / 2, 3 * N[1] / 2, 3 * N[2] / 2}, L)
|
2019-05-20 17:23:52 +02:00
|
|
|
{
|
|
|
|
//... create temporaries
|
2019-10-03 00:22:02 +02:00
|
|
|
f1p_ = new Grid_FFT<data_t>(np_, length_, kspace_id);
|
|
|
|
f2p_ = new Grid_FFT<data_t>(np_, length_, kspace_id);
|
2019-05-20 17:23:52 +02:00
|
|
|
fbuf_ = new Grid_FFT<data_t>(N, length_, kspace_id); // needed for MPI, or for triple conv.
|
|
|
|
|
|
|
|
#if defined(USE_MPI)
|
|
|
|
maxslicesz_ = f1p_->sizes_[1] * f1p_->sizes_[3] * 2;
|
|
|
|
|
|
|
|
crecvbuf_ = new ccomplex_t[maxslicesz_ / 2];
|
|
|
|
recvbuf_ = reinterpret_cast<real_t *>(&crecvbuf_[0]);
|
|
|
|
|
2020-04-02 11:18:21 +02:00
|
|
|
int ntasks(MPI::get_size());
|
2019-05-20 17:23:52 +02:00
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
offsets_.assign(ntasks, 0);
|
|
|
|
offsetsp_.assign(ntasks, 0);
|
|
|
|
sizes_.assign(ntasks, 0);
|
|
|
|
sizesp_.assign(ntasks, 0);
|
2019-05-20 17:23:52 +02:00
|
|
|
|
|
|
|
size_t tsize = N[0], tsizep = f1p_->size(0);
|
|
|
|
|
|
|
|
MPI_Allgather(&fbuf_->local_1_start_, 1, MPI_LONG_LONG, &offsets_[0], 1,
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_LONG_LONG, MPI_COMM_WORLD);
|
2019-05-20 17:23:52 +02:00
|
|
|
MPI_Allgather(&f1p_->local_1_start_, 1, MPI_LONG_LONG, &offsetsp_[0], 1,
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_LONG_LONG, MPI_COMM_WORLD);
|
2019-05-20 17:23:52 +02:00
|
|
|
MPI_Allgather(&tsize, 1, MPI_LONG_LONG, &sizes_[0], 1, MPI_LONG_LONG,
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_COMM_WORLD);
|
2019-05-20 17:23:52 +02:00
|
|
|
MPI_Allgather(&tsizep, 1, MPI_LONG_LONG, &sizesp_[0], 1, MPI_LONG_LONG,
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_COMM_WORLD);
|
2019-05-20 17:23:52 +02:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
~OrszagConvolver()
|
|
|
|
{
|
|
|
|
delete f1p_;
|
|
|
|
delete f2p_;
|
|
|
|
delete fbuf_;
|
|
|
|
#if defined(USE_MPI)
|
|
|
|
delete[] crecvbuf_;
|
|
|
|
#endif
|
|
|
|
}
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename kfunc1, typename kfunc2, typename opp>
|
|
|
|
void convolve2(kfunc1 kf1, kfunc2 kf2, opp output_op)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
|
|
|
//... prepare data 1
|
|
|
|
f1p_->FourierTransformForward(false);
|
2019-10-03 00:22:02 +02:00
|
|
|
this->pad_insert(kf1, *f1p_);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-05-14 12:29:27 +02:00
|
|
|
//... prepare data 2
|
2019-05-12 21:12:07 +02:00
|
|
|
f2p_->FourierTransformForward(false);
|
2019-10-03 00:22:02 +02:00
|
|
|
this->pad_insert(kf2, *f2p_);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
//... convolve
|
|
|
|
f1p_->FourierTransformBackward();
|
|
|
|
f2p_->FourierTransformBackward();
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < f1p_->ntot_; ++i)
|
|
|
|
{
|
2019-05-12 21:12:07 +02:00
|
|
|
(*f2p_).relem(i) *= (*f1p_).relem(i);
|
|
|
|
}
|
|
|
|
f2p_->FourierTransformForward();
|
|
|
|
//... copy data back
|
2019-05-23 14:53:11 +02:00
|
|
|
unpad(*f2p_, output_op);
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
template <typename kfunc1, typename kfunc2, typename kfunc3, typename opp>
|
|
|
|
void convolve3(kfunc1 kf1, kfunc2 kf2, kfunc3 kf3, opp output_op)
|
2019-05-14 12:29:27 +02:00
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
auto assign_to = [](auto &g) { return [&](auto i, auto v) { g[i] = v; }; };
|
2019-10-09 17:25:15 +02:00
|
|
|
fbuf_->FourierTransformForward(false);
|
2019-10-03 00:22:02 +02:00
|
|
|
convolve2(kf1, kf2, assign_to(*fbuf_));
|
|
|
|
convolve2([&](size_t i, size_t j, size_t k) -> ccomplex_t { return fbuf_->kelem(i, j, k); }, kf3, output_op);
|
2019-05-14 12:29:27 +02:00
|
|
|
}
|
|
|
|
|
2019-05-23 14:53:11 +02:00
|
|
|
// template< typename opp >
|
|
|
|
// void test_pad_unpad( Grid_FFT<data_t> & in, Grid_FFT<data_t> & res, opp op )
|
|
|
|
// {
|
|
|
|
// //... prepare data 1
|
|
|
|
// f1p_->FourierTransformForward(false);
|
|
|
|
// this->pad_insert( [&in]( size_t i, size_t j, size_t k ){return in.kelem(i,j,k);}, *f1p_ );
|
|
|
|
// f1p_->FourierTransformBackward();
|
|
|
|
// f1p_->FourierTransformForward();
|
|
|
|
// res.FourierTransformForward();
|
|
|
|
// unpad(*f1p_, res, op);
|
|
|
|
// }
|
2019-05-14 12:29:27 +02:00
|
|
|
|
2019-05-12 21:12:07 +02:00
|
|
|
private:
|
|
|
|
template <typename kdep_functor>
|
2019-10-03 00:22:02 +02:00
|
|
|
void pad_insert(kdep_functor kfunc, Grid_FFT<data_t> &fp)
|
|
|
|
{
|
|
|
|
assert(fp.space_ == kspace_id);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
const double rfac = std::pow(1.5, 1.5);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
fp.zero();
|
2019-10-03 00:22:02 +02:00
|
|
|
|
|
|
|
#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
|
2019-05-12 21:12:07 +02:00
|
|
|
size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < 2 * fp.size(0) / 3; ++i)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-13 09:56:58 +02:00
|
|
|
size_t ip = (i > nhalf[0]) ? i + nhalf[0] : i;
|
2019-10-03 00:22:02 +02:00
|
|
|
for (size_t j = 0; j < 2 * fp.size(1) / 3; ++j)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-13 09:56:58 +02:00
|
|
|
size_t jp = (j > nhalf[1]) ? j + nhalf[1] : j;
|
2019-10-03 00:22:02 +02:00
|
|
|
for (size_t k = 0; k < 2 * fp.size(2) / 3; ++k)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-13 09:56:58 +02:00
|
|
|
size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k;
|
2019-05-12 21:12:07 +02:00
|
|
|
// if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
|
|
|
|
fp.kelem(ip, jp, kp) = kfunc(i, j, k) * rfac;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#else /// then USE_MPI is defined ////////////////////////////////////////////////////////////
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
2019-05-14 12:29:27 +02:00
|
|
|
fbuf_->FourierTransformForward(false);
|
2019-05-12 21:12:07 +02:00
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
double tstart = get_wtime();
|
|
|
|
csoca::dlog << "[MPI] Started scatter for convolution" << std::endl;
|
|
|
|
|
|
|
|
//... collect offsets
|
|
|
|
|
2019-05-14 12:29:27 +02:00
|
|
|
assert(fbuf_->space_ == kspace_id);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-05-14 12:29:27 +02:00
|
|
|
size_t nf[3] = {fbuf_->size(0), fbuf_->size(1), fbuf_->size(2)};
|
2019-05-12 21:12:07 +02:00
|
|
|
size_t nfp[3] = {fp.size(0), fp.size(1), fp.size(2)};
|
2019-05-21 00:23:03 +02:00
|
|
|
size_t fny[3] = {fbuf_->n_[1] / 2, fbuf_->n_[0] / 2, fbuf_->n_[2] / 2};
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
//... local size must be divisible by 2, otherwise this gets too complicated
|
2019-05-14 12:29:27 +02:00
|
|
|
assert(fbuf_->n_[1] % 2 == 0);
|
2019-10-03 00:22:02 +02:00
|
|
|
size_t slicesz = fbuf_->size(1) * fbuf_->size(3);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_Datatype datatype =
|
|
|
|
(typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-05-14 12:29:27 +02:00
|
|
|
// fill MPI send buffer with results of kfunc
|
2019-10-03 00:22:02 +02:00
|
|
|
|
|
|
|
#pragma omp parallel for
|
2019-05-14 12:29:27 +02:00
|
|
|
for (size_t i = 0; i < fbuf_->size(0); ++i)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-14 12:29:27 +02:00
|
|
|
for (size_t j = 0; j < fbuf_->size(1); ++j)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-14 12:29:27 +02:00
|
|
|
for (size_t k = 0; k < fbuf_->size(2); ++k)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-14 12:29:27 +02:00
|
|
|
fbuf_->kelem(i, j, k) = kfunc(i, j, k) * rfac;
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-13 09:56:58 +02:00
|
|
|
MPI_Status status;
|
|
|
|
|
|
|
|
std::vector<MPI_Request> req;
|
|
|
|
MPI_Request temp_req;
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
// send data from buffer
|
|
|
|
for (size_t i = 0; i < nf[0]; ++i)
|
|
|
|
{
|
|
|
|
size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
if (iglobal <= fny[0])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
|
|
|
int sendto = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
2019-05-14 12:29:27 +02:00
|
|
|
MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
2019-10-03 00:22:02 +02:00
|
|
|
(int)iglobal, MPI_COMM_WORLD, &temp_req);
|
2019-05-12 21:12:07 +02:00
|
|
|
req.push_back(temp_req);
|
2019-05-13 12:34:16 +02:00
|
|
|
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal << " to task " << sendto << ", size = " << slicesz << std::endl;
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
2019-10-03 00:22:02 +02:00
|
|
|
if (iglobal >= fny[0])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-21 00:23:03 +02:00
|
|
|
int sendto = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
2019-05-14 12:29:27 +02:00
|
|
|
MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
2019-10-03 00:22:02 +02:00
|
|
|
(int)(iglobal + fny[0]), MPI_COMM_WORLD, &temp_req);
|
2019-05-12 21:12:07 +02:00
|
|
|
req.push_back(temp_req);
|
2019-05-13 12:34:16 +02:00
|
|
|
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal+fny[0] << " to task " << sendto << ", size = " << slicesz<< std::endl;
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < nfp[0]; ++i)
|
|
|
|
{
|
|
|
|
size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
if (iglobal <= fny[0] || iglobal >= 2 * fny[0])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
|
|
|
int recvfrom = 0;
|
2019-05-21 00:23:03 +02:00
|
|
|
if (iglobal <= fny[0])
|
2019-05-12 21:12:07 +02:00
|
|
|
recvfrom = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
|
|
|
|
else
|
2019-05-21 00:23:03 +02:00
|
|
|
recvfrom = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-05-13 12:34:16 +02:00
|
|
|
// std::cout << "task " << CONFIG::MPI_task_rank << " : receive #" << iglobal << " from task "
|
|
|
|
// << recvfrom << ", size = " << slicesz << ", " << crecvbuf_ << ", " << datatype << std::endl;
|
2019-05-24 10:46:07 +02:00
|
|
|
status.MPI_ERROR = MPI_SUCCESS;
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_COMM_WORLD, &status);
|
2019-05-13 12:34:16 +02:00
|
|
|
// std::cout << "---> ok! " << (bool)(status.MPI_ERROR==MPI_SUCCESS) << std::endl;
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-05-14 12:29:27 +02:00
|
|
|
assert(status.MPI_ERROR == MPI_SUCCESS);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
for (size_t j = 0; j < nf[1]; ++j)
|
|
|
|
{
|
2019-05-21 00:23:03 +02:00
|
|
|
if (j <= fny[1])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
|
|
|
size_t jp = j;
|
|
|
|
for (size_t k = 0; k < nf[2]; ++k)
|
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
if (typeid(data_t) == typeid(real_t))
|
|
|
|
{
|
2019-05-14 12:29:27 +02:00
|
|
|
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
2019-10-03 00:22:02 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-05-21 00:23:03 +02:00
|
|
|
if (k <= fny[2])
|
2019-05-15 05:30:47 +02:00
|
|
|
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
2019-05-21 00:23:03 +02:00
|
|
|
if (k >= fny[2])
|
2019-10-03 00:22:02 +02:00
|
|
|
fp.kelem(i, jp, k + nf[2] / 2) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
2019-05-15 05:30:47 +02:00
|
|
|
}
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
if (j >= fny[1])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-21 00:23:03 +02:00
|
|
|
size_t jp = j + fny[1];
|
2019-05-12 21:12:07 +02:00
|
|
|
for (size_t k = 0; k < nf[2]; ++k)
|
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
if (typeid(data_t) == typeid(real_t))
|
|
|
|
{
|
2019-05-14 12:29:27 +02:00
|
|
|
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
2019-10-03 00:22:02 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-05-21 00:23:03 +02:00
|
|
|
if (k <= fny[2])
|
2019-05-15 05:30:47 +02:00
|
|
|
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
2019-05-21 00:23:03 +02:00
|
|
|
if (k >= fny[2])
|
|
|
|
fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
2019-05-15 05:30:47 +02:00
|
|
|
}
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < req.size(); ++i)
|
|
|
|
{
|
|
|
|
// need to set status as wait does not necessarily modify it
|
|
|
|
// c.f. http://www.open-mpi.org/community/lists/devel/2007/04/1402.php
|
|
|
|
status.MPI_ERROR = MPI_SUCCESS;
|
2019-05-21 00:23:03 +02:00
|
|
|
// std::cout << "task " << CONFIG::MPI_task_rank << " : checking request No" << i << std::endl;
|
2019-05-12 21:12:07 +02:00
|
|
|
MPI_Wait(&req[i], &status);
|
2019-05-21 00:23:03 +02:00
|
|
|
// std::cout << "---> ok!" << std::endl;
|
2019-05-12 21:12:07 +02:00
|
|
|
assert(status.MPI_ERROR == MPI_SUCCESS);
|
|
|
|
}
|
|
|
|
|
|
|
|
// usleep(1000);
|
|
|
|
|
|
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
|
|
|
|
// std::cerr << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! <<<<<"
|
|
|
|
// << std::endl; ofs << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed!
|
|
|
|
// <<<<<" << std::endl;
|
|
|
|
csoca::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n",
|
2019-10-03 00:22:02 +02:00
|
|
|
get_wtime() - tstart);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#endif /// end of ifdef/ifndef USE_MPI ///////////////////////////////////////////////////////////////
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename operator_t>
|
2019-10-03 00:22:02 +02:00
|
|
|
void unpad(const Grid_FFT<data_t> &fp, operator_t output_op)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-23 14:53:11 +02:00
|
|
|
const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
// make sure we're in Fourier space...
|
2019-10-03 00:22:02 +02:00
|
|
|
assert(fp.space_ == kspace_id);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
|
2019-05-23 14:53:11 +02:00
|
|
|
fbuf_->FourierTransformForward(false);
|
|
|
|
size_t nhalf[3] = {fbuf_->n_[0] / 2, fbuf_->n_[1] / 2, fbuf_->n_[2] / 2};
|
2019-10-03 00:22:02 +02:00
|
|
|
|
|
|
|
#pragma omp parallel for
|
2019-05-23 14:53:11 +02:00
|
|
|
for (size_t i = 0; i < fbuf_->size(0); ++i)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-14 12:29:27 +02:00
|
|
|
size_t ip = (i > nhalf[0]) ? i + nhalf[0] : i;
|
2019-05-23 14:53:11 +02:00
|
|
|
for (size_t j = 0; j < fbuf_->size(1); ++j)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-14 12:29:27 +02:00
|
|
|
size_t jp = (j > nhalf[1]) ? j + nhalf[1] : j;
|
2019-05-23 14:53:11 +02:00
|
|
|
for (size_t k = 0; k < fbuf_->size(2); ++k)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-14 12:29:27 +02:00
|
|
|
size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k;
|
2019-05-12 21:12:07 +02:00
|
|
|
// if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
|
2019-05-23 14:53:11 +02:00
|
|
|
fbuf_->kelem(i, j, k) = fp.kelem(ip, jp, kp) / rfac;
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
//... copy data back
|
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < fbuf_->ntot_; ++i)
|
|
|
|
{
|
|
|
|
output_op(i, (*fbuf_)[i]);
|
2019-05-23 14:53:11 +02:00
|
|
|
}
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#else /// then USE_MPI is defined //////////////////////////////////////////////////////////////
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
double tstart = get_wtime();
|
|
|
|
|
2019-05-14 12:29:27 +02:00
|
|
|
csoca::dlog << "[MPI] Started gather for convolution";
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
|
2019-05-23 14:53:11 +02:00
|
|
|
size_t nf[3] = {fbuf_->size(0), fbuf_->size(1), fbuf_->size(2)};
|
2019-05-12 21:12:07 +02:00
|
|
|
size_t nfp[4] = {fp.size(0), fp.size(1), fp.size(2), fp.size(3)};
|
2019-05-23 14:53:11 +02:00
|
|
|
size_t fny[3] = {fbuf_->n_[1] / 2, fbuf_->n_[0] / 2, fbuf_->n_[2] / 2};
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
size_t slicesz = fp.size(1) * fp.size(3);
|
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_Datatype datatype =
|
|
|
|
(typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
|
2019-05-12 21:12:07 +02:00
|
|
|
|
|
|
|
MPI_Status status;
|
|
|
|
|
|
|
|
//... local size must be divisible by 2, otherwise this gets too complicated
|
|
|
|
// assert( tsize%2 == 0 );
|
|
|
|
|
|
|
|
std::vector<MPI_Request> req;
|
|
|
|
MPI_Request temp_req;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < nfp[0]; ++i)
|
|
|
|
{
|
|
|
|
size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
|
|
|
|
|
|
|
|
//... sending
|
2019-05-15 05:30:47 +02:00
|
|
|
if (iglobal <= fny[0])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
|
|
|
int sendto = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
|
|
|
|
|
|
|
|
MPI_Isend(&fp.kelem(i * slicesz), (int)slicesz, datatype, sendto, (int)iglobal,
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_COMM_WORLD, &temp_req);
|
2019-05-12 21:12:07 +02:00
|
|
|
req.push_back(temp_req);
|
|
|
|
}
|
2019-05-15 05:30:47 +02:00
|
|
|
else if (iglobal >= 2 * fny[0])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
|
|
|
int sendto = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size);
|
|
|
|
MPI_Isend(&fp.kelem(i * slicesz), (int)slicesz, datatype, sendto, (int)iglobal,
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_COMM_WORLD, &temp_req);
|
2019-05-12 21:12:07 +02:00
|
|
|
req.push_back(temp_req);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-15 05:30:47 +02:00
|
|
|
fbuf_->zero();
|
|
|
|
|
2019-05-12 21:12:07 +02:00
|
|
|
for (size_t i = 0; i < nf[0]; ++i)
|
|
|
|
{
|
|
|
|
size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
|
|
|
|
int recvfrom = 0;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (iglobal <= fny[0])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0;
|
2019-05-15 05:30:47 +02:00
|
|
|
|
2019-05-12 21:12:07 +02:00
|
|
|
recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
|
|
|
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
|
2019-10-03 00:22:02 +02:00
|
|
|
MPI_COMM_WORLD, &status);
|
2019-05-15 05:30:47 +02:00
|
|
|
|
|
|
|
for (size_t j = 0; j < nf[1]; ++j)
|
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
real_t wj = (j == fny[1]) ? 0.5 : 1.0;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (j <= fny[1])
|
|
|
|
{
|
|
|
|
size_t jp = j;
|
|
|
|
for (size_t k = 0; k < nf[2]; ++k)
|
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
if (typeid(data_t) == typeid(real_t))
|
|
|
|
{
|
|
|
|
real_t w = wi * wj;
|
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
real_t wk = (k == fny[2]) ? 0.5 : 1.0;
|
|
|
|
real_t w = wi * wj * wk;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (k <= fny[2])
|
2019-10-03 00:22:02 +02:00
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (k >= fny[2])
|
2019-10-03 00:22:02 +02:00
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
|
|
|
|
if (w < 1.0)
|
|
|
|
{
|
2019-05-15 05:30:47 +02:00
|
|
|
fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (j >= fny[1])
|
|
|
|
{
|
|
|
|
size_t jp = j + fny[1];
|
|
|
|
for (size_t k = 0; k < nf[2]; ++k)
|
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
if (typeid(data_t) == typeid(real_t))
|
|
|
|
{
|
|
|
|
real_t w = wi * wj;
|
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
real_t wk = (k == fny[2]) ? 0.5 : 1.0;
|
|
|
|
real_t w = wi * wj * wk;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (k <= fny[2])
|
2019-10-03 00:22:02 +02:00
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (k >= fny[2])
|
2019-10-03 00:22:02 +02:00
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
|
|
|
|
if (w < 1.0)
|
|
|
|
{
|
2019-05-15 05:30:47 +02:00
|
|
|
fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
2019-05-15 05:30:47 +02:00
|
|
|
if (iglobal >= fny[0])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0;
|
2019-05-15 05:30:47 +02:00
|
|
|
|
2019-05-12 21:12:07 +02:00
|
|
|
recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
|
|
|
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom,
|
2019-10-03 00:22:02 +02:00
|
|
|
(int)(iglobal + fny[0]), MPI_COMM_WORLD, &status);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-05-15 05:30:47 +02:00
|
|
|
for (size_t j = 0; j < nf[1]; ++j)
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
real_t wj = (j == fny[1]) ? 0.5 : 1.0;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (j <= fny[1])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-15 05:30:47 +02:00
|
|
|
size_t jp = j;
|
|
|
|
for (size_t k = 0; k < nf[2]; ++k)
|
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
if (typeid(data_t) == typeid(real_t))
|
|
|
|
{
|
|
|
|
real_t w = wi * wj;
|
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
real_t wk = (k == fny[2]) ? 0.5 : 1.0;
|
|
|
|
real_t w = wi * wj * wk;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (k <= fny[2])
|
2019-10-03 00:22:02 +02:00
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (k >= fny[2])
|
2019-10-03 00:22:02 +02:00
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
|
|
|
|
if (w < 1.0)
|
|
|
|
{
|
2019-05-15 05:30:47 +02:00
|
|
|
fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
2019-05-15 05:30:47 +02:00
|
|
|
if (j >= fny[1])
|
2019-05-12 21:12:07 +02:00
|
|
|
{
|
2019-05-15 05:30:47 +02:00
|
|
|
size_t jp = j + fny[1];
|
|
|
|
for (size_t k = 0; k < nf[2]; ++k)
|
|
|
|
{
|
2019-10-03 00:22:02 +02:00
|
|
|
if (typeid(data_t) == typeid(real_t))
|
|
|
|
{
|
|
|
|
real_t w = wi * wj;
|
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
real_t wk = (k == fny[2]) ? 0.5 : 1.0;
|
|
|
|
real_t w = wi * wj * wk;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (k <= fny[2])
|
2019-10-03 00:22:02 +02:00
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
2019-05-15 05:30:47 +02:00
|
|
|
if (k >= fny[2])
|
2019-10-03 00:22:02 +02:00
|
|
|
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
|
|
|
|
if (w < 1.0)
|
|
|
|
{
|
2019-05-15 05:30:47 +02:00
|
|
|
fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-10-03 00:22:02 +02:00
|
|
|
|
|
|
|
//... copy data back
|
|
|
|
#pragma omp parallel for
|
|
|
|
for (size_t i = 0; i < fbuf_->ntot_; ++i)
|
|
|
|
{
|
|
|
|
output_op(i, (*fbuf_)[i]);
|
2019-05-15 05:30:47 +02:00
|
|
|
}
|
|
|
|
|
2019-05-12 21:12:07 +02:00
|
|
|
for (size_t i = 0; i < req.size(); ++i)
|
|
|
|
{
|
|
|
|
// need to preset status as wait does not necessarily modify it to reflect
|
|
|
|
// success c.f.
|
|
|
|
// http://www.open-mpi.org/community/lists/devel/2007/04/1402.php
|
|
|
|
status.MPI_ERROR = MPI_SUCCESS;
|
|
|
|
|
|
|
|
MPI_Wait(&req[i], &status);
|
|
|
|
assert(status.MPI_ERROR == MPI_SUCCESS);
|
|
|
|
}
|
|
|
|
|
|
|
|
MPI_Barrier(MPI_COMM_WORLD);
|
|
|
|
|
2019-05-14 12:29:27 +02:00
|
|
|
csoca::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart);
|
2019-05-12 21:12:07 +02:00
|
|
|
|
2019-10-03 00:22:02 +02:00
|
|
|
#endif /// end of ifdef/ifndef USE_MPI //////////////////////////////////////////////////////////////
|
2019-05-12 21:12:07 +02:00
|
|
|
}
|
|
|
|
};
|