mirror of
https://github.com/cosmo-sims/monofonIC.git
synced 2024-09-19 17:03:45 +02:00
changed old convolution padding routines to use new fourier interpolation routine
This commit is contained in:
parent
d8cd286f87
commit
955b5987f8
1 changed files with 25 additions and 355 deletions
|
@ -427,19 +427,18 @@ public:
|
|||
// }
|
||||
|
||||
private:
|
||||
template <typename kdep_functor>
|
||||
void pad_insert(kdep_functor kfunc, Grid_FFT<data_t> &fp)
|
||||
{
|
||||
assert(fp.space_ == kspace_id);
|
||||
|
||||
template <typename kdep_functor>
|
||||
void pad_insert( kdep_functor kfunc, Grid_FFT<data_t> &fp)
|
||||
{
|
||||
const real_t rfac = std::pow(1.5, 1.5);
|
||||
|
||||
#if !defined(USE_MPI)
|
||||
const size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
|
||||
|
||||
fp.zero();
|
||||
|
||||
#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
|
||||
const size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
|
||||
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < 2 * fp.size(0) / 3; ++i)
|
||||
{
|
||||
size_t ip = (i > nhalf[0]) ? i + nhalf[0] : i;
|
||||
|
@ -453,37 +452,10 @@ private:
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else /// then USE_MPI is defined ////////////////////////////////////////////////////////////
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
#else
|
||||
fbuf_->FourierTransformForward(false);
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
double tstart = get_wtime();
|
||||
music::dlog << "[MPI] Started scatter for convolution" << std::endl;
|
||||
|
||||
//... collect offsets
|
||||
|
||||
assert(fbuf_->space_ == kspace_id);
|
||||
|
||||
size_t nf[3] = {fbuf_->size(0), fbuf_->size(1), fbuf_->size(2)};
|
||||
size_t nfp[3] = {fp.size(0), fp.size(1), fp.size(2)};
|
||||
size_t fny[3] = {fbuf_->n_[1] / 2, fbuf_->n_[0] / 2, fbuf_->n_[2] / 2};
|
||||
|
||||
//... local size must be divisible by 2, otherwise this gets too complicated
|
||||
assert(fbuf_->n_[1] % 2 == 0);
|
||||
size_t slicesz = fbuf_->size(1) * fbuf_->size(3);
|
||||
|
||||
MPI_Datatype datatype =
|
||||
(typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX
|
||||
: (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX
|
||||
: (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX
|
||||
: MPI_BYTE;
|
||||
|
||||
// fill MPI send buffer with results of kfunc
|
||||
|
||||
#pragma omp parallel for
|
||||
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < fbuf_->size(0); ++i)
|
||||
{
|
||||
for (size_t j = 0; j < fbuf_->size(1); ++j)
|
||||
|
@ -495,125 +467,13 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
MPI_Status status;
|
||||
|
||||
std::vector<MPI_Request> req;
|
||||
MPI_Request temp_req;
|
||||
|
||||
// send data from buffer
|
||||
for (size_t i = 0; i < nf[0]; ++i)
|
||||
{
|
||||
size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
|
||||
|
||||
if (iglobal <= fny[0])
|
||||
{
|
||||
int sendto = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||
MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||
(int)iglobal, MPI_COMM_WORLD, &temp_req);
|
||||
req.push_back(temp_req);
|
||||
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal << " to task " << sendto << ", size = " << slicesz << std::endl;
|
||||
}
|
||||
if (iglobal >= fny[0])
|
||||
{
|
||||
int sendto = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||
MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||
(int)(iglobal + fny[0]), MPI_COMM_WORLD, &temp_req);
|
||||
req.push_back(temp_req);
|
||||
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal+fny[0] << " to task " << sendto << ", size = " << slicesz<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nfp[0]; ++i)
|
||||
{
|
||||
size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
|
||||
|
||||
if (iglobal <= fny[0] || iglobal >= 2 * fny[0])
|
||||
{
|
||||
int recvfrom = 0;
|
||||
if (iglobal <= fny[0])
|
||||
recvfrom = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
|
||||
else
|
||||
recvfrom = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size);
|
||||
|
||||
// std::cout << "task " << CONFIG::MPI_task_rank << " : receive #" << iglobal << " from task "
|
||||
// << recvfrom << ", size = " << slicesz << ", " << crecvbuf_ << ", " << datatype << std::endl;
|
||||
status.MPI_ERROR = MPI_SUCCESS;
|
||||
|
||||
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
|
||||
MPI_COMM_WORLD, &status);
|
||||
// std::cout << "---> ok! " << (bool)(status.MPI_ERROR==MPI_SUCCESS) << std::endl;
|
||||
|
||||
assert(status.MPI_ERROR == MPI_SUCCESS);
|
||||
|
||||
for (size_t j = 0; j < nf[1]; ++j)
|
||||
{
|
||||
if (j <= fny[1])
|
||||
{
|
||||
size_t jp = j;
|
||||
for (size_t k = 0; k < nf[2]; ++k)
|
||||
{
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||
}
|
||||
else
|
||||
{
|
||||
if (k <= fny[2])
|
||||
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||
if (k >= fny[2])
|
||||
fp.kelem(i, jp, k + nf[2] / 2) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (j >= fny[1])
|
||||
{
|
||||
size_t jp = j + fny[1];
|
||||
for (size_t k = 0; k < nf[2]; ++k)
|
||||
{
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||
}
|
||||
else
|
||||
{
|
||||
if (k <= fny[2])
|
||||
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||
if (k >= fny[2])
|
||||
fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < req.size(); ++i)
|
||||
{
|
||||
// need to set status as wait does not necessarily modify it
|
||||
// c.f. http://www.open-mpi.org/community/lists/devel/2007/04/1402.php
|
||||
status.MPI_ERROR = MPI_SUCCESS;
|
||||
// std::cout << "task " << CONFIG::MPI_task_rank << " : checking request No" << i << std::endl;
|
||||
MPI_Wait(&req[i], &status);
|
||||
// std::cout << "---> ok!" << std::endl;
|
||||
assert(status.MPI_ERROR == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
// usleep(1000);
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
// std::cerr << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! <<<<<"
|
||||
// << std::endl; ofs << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed!
|
||||
// <<<<<" << std::endl;
|
||||
music::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n",
|
||||
get_wtime() - tstart);
|
||||
|
||||
#endif /// end of ifdef/ifndef USE_MPI ///////////////////////////////////////////////////////////////
|
||||
fbuf_->FourierInterpolateCopyTo( fp );
|
||||
|
||||
#endif //defined(USE_MPI)
|
||||
}
|
||||
|
||||
template <typename operator_t>
|
||||
void unpad(const Grid_FFT<data_t> &fp, operator_t output_op)
|
||||
void unpad( Grid_FFT<data_t> &fp, operator_t output_op)
|
||||
{
|
||||
const real_t rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
|
||||
|
||||
|
@ -624,7 +484,7 @@ private:
|
|||
fbuf_->FourierTransformForward(false);
|
||||
size_t nhalf[3] = {fbuf_->n_[0] / 2, fbuf_->n_[1] / 2, fbuf_->n_[2] / 2};
|
||||
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < fbuf_->size(0); ++i)
|
||||
{
|
||||
size_t ip = (i > nhalf[0]) ? i + nhalf[0] : i;
|
||||
|
@ -643,193 +503,6 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
//... copy data back
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < fbuf_->ntot_; ++i)
|
||||
{
|
||||
output_op(i, (*fbuf_)[i]);
|
||||
}
|
||||
|
||||
#else /// then USE_MPI is defined //////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
double tstart = get_wtime();
|
||||
|
||||
music::dlog << "[MPI] Started gather for convolution";
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
size_t nf[3] = {fbuf_->size(0), fbuf_->size(1), fbuf_->size(2)};
|
||||
size_t nfp[4] = {fp.size(0), fp.size(1), fp.size(2), fp.size(3)};
|
||||
size_t fny[3] = {fbuf_->n_[1] / 2, fbuf_->n_[0] / 2, fbuf_->n_[2] / 2};
|
||||
|
||||
size_t slicesz = fp.size(1) * fp.size(3);
|
||||
|
||||
MPI_Datatype datatype =
|
||||
(typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX
|
||||
: (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX
|
||||
: (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX
|
||||
: MPI_BYTE;
|
||||
|
||||
MPI_Status status;
|
||||
|
||||
//... local size must be divisible by 2, otherwise this gets too complicated
|
||||
// assert( tsize%2 == 0 );
|
||||
|
||||
std::vector<MPI_Request> req;
|
||||
MPI_Request temp_req;
|
||||
|
||||
for (size_t i = 0; i < nfp[0]; ++i)
|
||||
{
|
||||
size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
|
||||
|
||||
//... sending
|
||||
if (iglobal <= fny[0])
|
||||
{
|
||||
int sendto = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
|
||||
|
||||
MPI_Isend(&fp.kelem(i * slicesz), (int)slicesz, datatype, sendto, (int)iglobal,
|
||||
MPI_COMM_WORLD, &temp_req);
|
||||
req.push_back(temp_req);
|
||||
}
|
||||
else if (iglobal >= 2 * fny[0])
|
||||
{
|
||||
int sendto = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size);
|
||||
MPI_Isend(&fp.kelem(i * slicesz), (int)slicesz, datatype, sendto, (int)iglobal,
|
||||
MPI_COMM_WORLD, &temp_req);
|
||||
req.push_back(temp_req);
|
||||
}
|
||||
}
|
||||
|
||||
fbuf_->zero();
|
||||
|
||||
for (size_t i = 0; i < nf[0]; ++i)
|
||||
{
|
||||
size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
|
||||
int recvfrom = 0;
|
||||
if (iglobal <= fny[0])
|
||||
{
|
||||
real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0;
|
||||
|
||||
recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
|
||||
MPI_COMM_WORLD, &status);
|
||||
|
||||
for (size_t j = 0; j < nf[1]; ++j)
|
||||
{
|
||||
real_t wj = (j == fny[1]) ? 0.0 : 1.0;
|
||||
if (j <= fny[1])
|
||||
{
|
||||
size_t jp = j;
|
||||
for (size_t k = 0; k < nf[2]; ++k)
|
||||
{
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
real_t wk = (k == fny[2]) ? 0.0 : 1.0;
|
||||
real_t w = wi * wj * wk;
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
||||
}
|
||||
else
|
||||
{
|
||||
real_t wk = (k == fny[2]) ? 0.0 : 1.0;
|
||||
real_t w = wi * wj * wk;
|
||||
if (k < fny[2])
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
||||
if (k > fny[2])
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
|
||||
// if (w < 1.0)
|
||||
// {
|
||||
// fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
if (j >= fny[1])
|
||||
{
|
||||
size_t jp = j + fny[1];
|
||||
for (size_t k = 0; k < nf[2]; ++k)
|
||||
{
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
real_t wk = (k == fny[2]) ? 0.0 : 1.0;
|
||||
real_t w = wi * wj * wk;
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
||||
}
|
||||
else
|
||||
{
|
||||
real_t wk = (k == fny[2]) ? 0.0 : 1.0;
|
||||
real_t w = wi * wj * wk;
|
||||
if (k < fny[2])
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
||||
if (k > fny[2])
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
|
||||
// if (w < 1.0)
|
||||
// {
|
||||
// fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (iglobal >= fny[0])
|
||||
{
|
||||
real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0;
|
||||
|
||||
recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom,
|
||||
(int)(iglobal + fny[0]), MPI_COMM_WORLD, &status);
|
||||
|
||||
for (size_t j = 0; j < nf[1]; ++j)
|
||||
{
|
||||
real_t wj = (j == fny[1]) ? 0.0 : 1.0;
|
||||
if (j <= fny[1])
|
||||
{
|
||||
size_t jp = j;
|
||||
for (size_t k = 0; k < nf[2]; ++k)
|
||||
{
|
||||
const real_t wk = (k == fny[2]) ? 0.0 : 1.0;
|
||||
const real_t w = wi * wj * wk;
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
real_t wk = (k == fny[2]) ? 0.0 : 1.0;
|
||||
real_t w = wi * wj * wk;
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (k < fny[2])
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
||||
if (k > fny[2])
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (j >= fny[1])
|
||||
{
|
||||
size_t jp = j + fny[1];
|
||||
for (size_t k = 0; k < nf[2]; ++k)
|
||||
{
|
||||
const real_t wk = (k == fny[2]) ? 0.0 : 1.0;
|
||||
const real_t w = wi * wj * wk;
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (k < fny[2])
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
|
||||
if (k > fny[2])
|
||||
fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//... copy data back
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < fbuf_->ntot_; ++i)
|
||||
|
@ -837,21 +510,18 @@ private:
|
|||
output_op(i, (*fbuf_)[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < req.size(); ++i)
|
||||
{
|
||||
// need to preset status as wait does not necessarily modify it to reflect
|
||||
// success c.f.
|
||||
// http://www.open-mpi.org/community/lists/devel/2007/04/1402.php
|
||||
status.MPI_ERROR = MPI_SUCCESS;
|
||||
#else /// then USE_MPI is defined //////////////////////////////////////////////////////////////
|
||||
|
||||
fp.FourierInterpolateCopyTo( *fbuf_ );
|
||||
|
||||
MPI_Wait(&req[i], &status);
|
||||
assert(status.MPI_ERROR == MPI_SUCCESS);
|
||||
//... copy data back
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < fbuf_->ntot_; ++i)
|
||||
{
|
||||
|
||||
output_op(i, (*fbuf_)[i] / rfac);
|
||||
}
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
music::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart);
|
||||
|
||||
#endif /// end of ifdef/ifndef USE_MPI //////////////////////////////////////////////////////////////
|
||||
#endif //defined(USE_MPI)
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue