mirror of
https://github.com/cosmo-sims/monofonIC.git
synced 2024-09-19 17:03:45 +02:00
added new fourier copy-interpolation routine
This commit is contained in:
parent
048ea5172c
commit
edcfbbab58
2 changed files with 214 additions and 5 deletions
|
@ -134,7 +134,7 @@ public:
|
|||
//! set all field elements to zero
|
||||
void zero() noexcept
|
||||
{
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < ntot_; ++i)
|
||||
data_[i] = 0.0;
|
||||
}
|
||||
|
@ -155,8 +155,8 @@ public:
|
|||
assert(this->n_[1] == g.n_[1]);
|
||||
assert(this->n_[2] == g.n_[2]);
|
||||
|
||||
// now we can copy all the data over
|
||||
#pragma omp parallel for
|
||||
// now we can copy all the data over
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < ntot_; ++i)
|
||||
data_[i] = g.data_[i];
|
||||
}
|
||||
|
@ -357,6 +357,7 @@ public:
|
|||
return val;
|
||||
}
|
||||
|
||||
|
||||
inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
|
||||
{
|
||||
if( bdistributed ){
|
||||
|
@ -791,13 +792,17 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
//! perform a backwards Fourier transform
|
||||
void FourierTransformBackward(bool do_transform = true);
|
||||
|
||||
//! perform a forwards Fourier transform
|
||||
void FourierTransformForward(bool do_transform = true);
|
||||
|
||||
void ApplyNorm(void);
|
||||
//! perform a copy operation between to FFT grids that might not be of the same size
|
||||
void FourierInterpolateCopyTo( grid_fft_t &grid_to );
|
||||
|
||||
void FillRandomReal(unsigned long int seed = 123456ul);
|
||||
//! normalise field
|
||||
void ApplyNorm(void);
|
||||
|
||||
void Write_to_HDF5(std::string fname, std::string datasetname) const;
|
||||
|
||||
|
|
204
src/grid_fft.cc
204
src/grid_fft.cc
|
@ -185,6 +185,7 @@ void Grid_FFT<data_t, bdistributed>::ApplyNorm(void)
|
|||
data_[i] *= fft_norm_fac_;
|
||||
}
|
||||
|
||||
//! Perform a backwards Fourier transformation
|
||||
template <typename data_t, bool bdistributed>
|
||||
void Grid_FFT<data_t, bdistributed>::FourierTransformForward(bool do_transform)
|
||||
{
|
||||
|
@ -217,6 +218,7 @@ void Grid_FFT<data_t, bdistributed>::FourierTransformForward(bool do_transform)
|
|||
}
|
||||
}
|
||||
|
||||
//! Perform a forward Fourier transformation
|
||||
template <typename data_t, bool bdistributed>
|
||||
void Grid_FFT<data_t, bdistributed>::FourierTransformBackward(bool do_transform)
|
||||
{
|
||||
|
@ -248,6 +250,208 @@ void Grid_FFT<data_t, bdistributed>::FourierTransformBackward(bool do_transform)
|
|||
}
|
||||
}
|
||||
|
||||
//! Perform a copy to another field, not necessarily the same size, using Fourier interpolation
|
||||
template <typename data_t, bool bdistributed>
|
||||
void Grid_FFT<data_t, bdistributed>::FourierInterpolateCopyTo( grid_fft_t &grid_to )
|
||||
{
|
||||
grid_fft_t &grid_from = *this;
|
||||
|
||||
//... transform both grids to Fourier space
|
||||
grid_from.FourierTransformForward(true);
|
||||
grid_to.FourierTransformForward(false);
|
||||
|
||||
// if grids are same size, we directly copy without the need for interpolation
|
||||
if( grid_from.n_[0] == grid_to.n_[0]
|
||||
&& grid_from.n_[1] == grid_to.n_[1]
|
||||
&& grid_from.n_[2] == grid_to.n_[2] )
|
||||
{
|
||||
grid_to.copy_from( grid_from );
|
||||
return;
|
||||
}
|
||||
|
||||
// set to zero so that regions without data are zeroed
|
||||
grid_to.zero();
|
||||
|
||||
// if not running MPI, then can do without sending and receiving
|
||||
#if !defined(USE_MPI)
|
||||
|
||||
// determine effective Nyquist modes representable by both fields and their locations in array
|
||||
size_t fny0_left = std::min(grid_from.n_[0] / 2, grid_to.n_[0] / 2);
|
||||
size_t fny0_right = std::max(grid_from.n_[0] - grid_to.n_[0] / 2, grid_from.n_[0] / 2);
|
||||
size_t fny1_left = std::min(grid_from.n_[1] / 2, grid_to.n_[1] / 2);
|
||||
size_t fny1_right = std::max(grid_from.n_[1] - grid_to.n_[1] / 2, grid_from.n_[1] / 2);
|
||||
size_t fny2_left = std::min(grid_from.n_[2] / 2, grid_to.n_[2] / 2);
|
||||
// size_t fny2_right = std::max(grid_from.n_[2] - grid_to.n_[2] / 2, grid_from.n_[2] / 2);
|
||||
|
||||
const size_t fny0_left_recv = fny0_left;
|
||||
const size_t fny0_right_recv = (fny0_right + grid_to.n_[0]) - grid_from.n_[0];
|
||||
const size_t fny1_left_recv = fny1_left;
|
||||
const size_t fny1_right_recv = (fny1_right + grid_to.n_[1]) - grid_from.n_[1];
|
||||
const size_t fny2_left_recv = fny2_left;
|
||||
// const size_t fny2_right_recv = (fny2_right + grid_to.n_[2]) - grid_from.n_[2];
|
||||
|
||||
#pragma omp parallel for
|
||||
for( size_t i=0; i<grid_to.size(0); ++i )
|
||||
{
|
||||
if (i < fny0_left_recv || i > fny0_right_recv)
|
||||
{
|
||||
size_t isend = (i < fny0_left_recv)? i : (i + grid_from.n_[0]) - grid_to.n_[0];
|
||||
|
||||
// copy data slice into new grid, avoiding modes that do not exist in both
|
||||
for( size_t j=0; j<grid_to.size(1); ++j )
|
||||
{
|
||||
if( j < fny1_left_recv || j > fny1_right_recv )
|
||||
{
|
||||
const size_t jsend = (j < fny1_left_recv)? j : (j + grid_from.n_[1]) - grid_to.n_[1];
|
||||
|
||||
for( size_t k=0; k<fny2_left_recv; ++k )
|
||||
{
|
||||
grid_to.kelem(i,j,k) = grid_from.kelem(isend,jsend,k);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
// if they are not the same size, we use Fourier interpolation to upscale/downscale
|
||||
double tstart = get_wtime();
|
||||
music::dlog << "[MPI] Started scatter for Fourier interpolation/copy" << std::endl;
|
||||
|
||||
//... determine communication offsets
|
||||
std::vector<ptrdiff_t> offsets_send, offsets_recv;
|
||||
|
||||
// this should use bisection, not linear search
|
||||
auto get_task = [](ptrdiff_t index, const std::vector<ptrdiff_t> &offsets, const int ntasks) -> int
|
||||
{
|
||||
int itask = 0;
|
||||
while (itask < ntasks - 1 && offsets[itask + 1] <= index)
|
||||
++itask;
|
||||
return itask;
|
||||
};
|
||||
|
||||
int ntasks(MPI::get_size());
|
||||
|
||||
offsets_send.assign(ntasks, 0);
|
||||
offsets_recv.assign(ntasks, 0);
|
||||
|
||||
MPI_Allgather(&grid_from.local_1_start_, 1, MPI_LONG_LONG, &offsets_send[0], 1,
|
||||
MPI_LONG_LONG, MPI_COMM_WORLD);
|
||||
MPI_Allgather(&grid_to.local_1_start_, 1, MPI_LONG_LONG, &offsets_recv[0], 1,
|
||||
MPI_LONG_LONG, MPI_COMM_WORLD);
|
||||
|
||||
MPI_Datatype datatype =
|
||||
(typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX
|
||||
: (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX
|
||||
: (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX
|
||||
: MPI_BYTE;
|
||||
|
||||
const size_t slicesz = grid_from.size(1) * grid_from.size(3);
|
||||
|
||||
// determine effective Nyquist modes representable by both fields and their locations in array
|
||||
size_t fny0_left = std::min(grid_from.n_[1] / 2, grid_to.n_[1] / 2);
|
||||
size_t fny0_right = std::max(grid_from.n_[1] - grid_to.n_[1] / 2, grid_from.n_[1] / 2);
|
||||
size_t fny1_left = std::min(grid_from.n_[0] / 2, grid_to.n_[0] / 2);
|
||||
size_t fny1_right = std::max(grid_from.n_[0] - grid_to.n_[0] / 2, grid_from.n_[0] / 2);
|
||||
size_t fny2_left = std::min(grid_from.n_[2] / 2, grid_to.n_[2] / 2);
|
||||
// size_t fny2_right = std::max(grid_from.n_[2] - grid_to.n_[2] / 2, grid_from.n_[2] / 2);
|
||||
|
||||
const size_t fny0_left_recv = fny0_left;
|
||||
const size_t fny0_right_recv = (fny0_right + grid_to.n_[1]) - grid_from.n_[1];
|
||||
const size_t fny1_left_recv = fny1_left;
|
||||
const size_t fny1_right_recv = (fny1_right + grid_to.n_[0]) - grid_from.n_[0];
|
||||
const size_t fny2_left_recv = fny2_left;
|
||||
// const size_t fny2_right_recv = (fny2_right + grid_to.n_[2]) - grid_from.n_[2];
|
||||
|
||||
//--- send data from buffer ---------------------------------------------------
|
||||
|
||||
std::vector<MPI_Request> req;
|
||||
MPI_Request temp_req;
|
||||
|
||||
for (size_t i = 0; i < grid_from.size(0); ++i)
|
||||
{
|
||||
size_t iglobal_send = i + offsets_send[CONFIG::MPI_task_rank];
|
||||
|
||||
if (iglobal_send < fny0_left)
|
||||
{
|
||||
size_t iglobal_recv = iglobal_send;
|
||||
int sendto = get_task(iglobal_recv, offsets_recv, CONFIG::MPI_task_size);
|
||||
MPI_Isend(&grid_from.kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||
(int)iglobal_recv, MPI_COMM_WORLD, &temp_req);
|
||||
req.push_back(temp_req);
|
||||
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal_send << " to task " << sendto << ", size = " << slicesz << std::endl;
|
||||
}
|
||||
if (iglobal_send > fny0_right)
|
||||
{
|
||||
size_t iglobal_recv = (iglobal_send + grid_to.n_[1]) - grid_from.n_[1];
|
||||
int sendto = get_task(iglobal_recv, offsets_recv, CONFIG::MPI_task_size);
|
||||
MPI_Isend(&grid_from.kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||
(int)iglobal_recv, MPI_COMM_WORLD, &temp_req);
|
||||
req.push_back(temp_req);
|
||||
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal_send << " to task " << sendto << ", size = " << slicesz<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
//--- receive data ------------------------------------------------------------
|
||||
#pragma omp parallel
|
||||
{
|
||||
MPI_Status status;
|
||||
ccomplex_t * recvbuf = new ccomplex_t[ slicesz ];
|
||||
|
||||
#pragma omp for
|
||||
for( size_t i=0; i<grid_to.size(0); ++i )
|
||||
{
|
||||
size_t iglobal_recv = i + offsets_recv[CONFIG::MPI_task_rank];
|
||||
|
||||
if (iglobal_recv < fny0_left_recv || iglobal_recv > fny0_right_recv)
|
||||
{
|
||||
size_t iglobal_send = (iglobal_recv < fny0_left_recv)? iglobal_recv : (iglobal_recv + grid_from.n_[1]) - grid_to.n_[1];
|
||||
|
||||
int recvfrom = get_task(iglobal_send, offsets_send, CONFIG::MPI_task_size);
|
||||
|
||||
// receive data slice and check for MPI errors when in debug mode
|
||||
status.MPI_ERROR = MPI_SUCCESS;
|
||||
MPI_Recv(&recvbuf[0], (int)slicesz, datatype, recvfrom, (int)iglobal_recv, MPI_COMM_WORLD, &status);
|
||||
assert(status.MPI_ERROR == MPI_SUCCESS);
|
||||
|
||||
// copy data slice into new grid, avoiding modes that do not exist in both
|
||||
for( size_t j=0; j<grid_to.size(1); ++j )
|
||||
{
|
||||
if( j < fny1_left_recv || j > fny1_right_recv )
|
||||
{
|
||||
const size_t jsend = (j < fny1_left_recv)? j : (j + grid_from.n_[0]) - grid_to.n_[0];
|
||||
|
||||
for( size_t k=0; k<fny2_left_recv; ++k )
|
||||
{
|
||||
grid_to.kelem(i,j,k) = recvbuf[jsend * grid_from.sizes_[3] + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
delete[] recvbuf;
|
||||
}
|
||||
|
||||
MPI_Status status;
|
||||
for (size_t i = 0; i < req.size(); ++i)
|
||||
{
|
||||
// need to set status as wait does not necessarily modify it
|
||||
// c.f. http://www.open-mpi.org/community/lists/devel/2007/04/1402.php
|
||||
status.MPI_ERROR = MPI_SUCCESS;
|
||||
// std::cout << "task " << CONFIG::MPI_task_rank << " : checking request No" << i << std::endl;
|
||||
MPI_Wait(&req[i], &status);
|
||||
// std::cout << "---> ok!" << std::endl;
|
||||
assert(status.MPI_ERROR == MPI_SUCCESS);
|
||||
}
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
music::dlog.Print("[MPI] Completed scatter for Fourier interpolation/copy, took %fs\n",
|
||||
get_wtime() - tstart);
|
||||
#endif //defined(USE_MPI)
|
||||
}
|
||||
|
||||
|
||||
#define H5_USE_16_API
|
||||
#include <hdf5.h>
|
||||
|
||||
|
|
Loading…
Reference in a new issue