1
0
Fork 0
mirror of https://github.com/cosmo-sims/monofonIC.git synced 2024-09-19 17:03:45 +02:00

added new fourier copy-interpolation routine

This commit is contained in:
Oliver Hahn 2020-11-11 16:26:49 +01:00
parent 048ea5172c
commit edcfbbab58
2 changed files with 214 additions and 5 deletions

View file

@ -134,7 +134,7 @@ public:
//! set all field elements to zero
void zero() noexcept
{
#pragma omp parallel for
#pragma omp parallel for
for (size_t i = 0; i < ntot_; ++i)
data_[i] = 0.0;
}
@ -155,8 +155,8 @@ public:
assert(this->n_[1] == g.n_[1]);
assert(this->n_[2] == g.n_[2]);
// now we can copy all the data over
#pragma omp parallel for
// now we can copy all the data over
#pragma omp parallel for
for (size_t i = 0; i < ntot_; ++i)
data_[i] = g.data_[i];
}
@ -357,6 +357,7 @@ public:
return val;
}
inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
{
if( bdistributed ){
@ -791,13 +792,17 @@ public:
}
}
//! perform a backwards Fourier transform
void FourierTransformBackward(bool do_transform = true);
//! perform a forwards Fourier transform
void FourierTransformForward(bool do_transform = true);
void ApplyNorm(void);
//! perform a copy operation between to FFT grids that might not be of the same size
void FourierInterpolateCopyTo( grid_fft_t &grid_to );
void FillRandomReal(unsigned long int seed = 123456ul);
//! normalise field
void ApplyNorm(void);
void Write_to_HDF5(std::string fname, std::string datasetname) const;

View file

@ -185,6 +185,7 @@ void Grid_FFT<data_t, bdistributed>::ApplyNorm(void)
data_[i] *= fft_norm_fac_;
}
//! Perform a backwards Fourier transformation
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::FourierTransformForward(bool do_transform)
{
@ -217,6 +218,7 @@ void Grid_FFT<data_t, bdistributed>::FourierTransformForward(bool do_transform)
}
}
//! Perform a forward Fourier transformation
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::FourierTransformBackward(bool do_transform)
{
@ -248,6 +250,208 @@ void Grid_FFT<data_t, bdistributed>::FourierTransformBackward(bool do_transform)
}
}
//! Perform a copy to another field, not necessarily the same size, using Fourier interpolation
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::FourierInterpolateCopyTo( grid_fft_t &grid_to )
{
grid_fft_t &grid_from = *this;
//... transform both grids to Fourier space
grid_from.FourierTransformForward(true);
grid_to.FourierTransformForward(false);
// if grids are same size, we directly copy without the need for interpolation
if( grid_from.n_[0] == grid_to.n_[0]
&& grid_from.n_[1] == grid_to.n_[1]
&& grid_from.n_[2] == grid_to.n_[2] )
{
grid_to.copy_from( grid_from );
return;
}
// set to zero so that regions without data are zeroed
grid_to.zero();
// if not running MPI, then can do without sending and receiving
#if !defined(USE_MPI)
// determine effective Nyquist modes representable by both fields and their locations in array
size_t fny0_left = std::min(grid_from.n_[0] / 2, grid_to.n_[0] / 2);
size_t fny0_right = std::max(grid_from.n_[0] - grid_to.n_[0] / 2, grid_from.n_[0] / 2);
size_t fny1_left = std::min(grid_from.n_[1] / 2, grid_to.n_[1] / 2);
size_t fny1_right = std::max(grid_from.n_[1] - grid_to.n_[1] / 2, grid_from.n_[1] / 2);
size_t fny2_left = std::min(grid_from.n_[2] / 2, grid_to.n_[2] / 2);
// size_t fny2_right = std::max(grid_from.n_[2] - grid_to.n_[2] / 2, grid_from.n_[2] / 2);
const size_t fny0_left_recv = fny0_left;
const size_t fny0_right_recv = (fny0_right + grid_to.n_[0]) - grid_from.n_[0];
const size_t fny1_left_recv = fny1_left;
const size_t fny1_right_recv = (fny1_right + grid_to.n_[1]) - grid_from.n_[1];
const size_t fny2_left_recv = fny2_left;
// const size_t fny2_right_recv = (fny2_right + grid_to.n_[2]) - grid_from.n_[2];
#pragma omp parallel for
for( size_t i=0; i<grid_to.size(0); ++i )
{
if (i < fny0_left_recv || i > fny0_right_recv)
{
size_t isend = (i < fny0_left_recv)? i : (i + grid_from.n_[0]) - grid_to.n_[0];
// copy data slice into new grid, avoiding modes that do not exist in both
for( size_t j=0; j<grid_to.size(1); ++j )
{
if( j < fny1_left_recv || j > fny1_right_recv )
{
const size_t jsend = (j < fny1_left_recv)? j : (j + grid_from.n_[1]) - grid_to.n_[1];
for( size_t k=0; k<fny2_left_recv; ++k )
{
grid_to.kelem(i,j,k) = grid_from.kelem(isend,jsend,k);
}
}
}
}
}
#else
// if they are not the same size, we use Fourier interpolation to upscale/downscale
double tstart = get_wtime();
music::dlog << "[MPI] Started scatter for Fourier interpolation/copy" << std::endl;
//... determine communication offsets
std::vector<ptrdiff_t> offsets_send, offsets_recv;
// this should use bisection, not linear search
auto get_task = [](ptrdiff_t index, const std::vector<ptrdiff_t> &offsets, const int ntasks) -> int
{
int itask = 0;
while (itask < ntasks - 1 && offsets[itask + 1] <= index)
++itask;
return itask;
};
int ntasks(MPI::get_size());
offsets_send.assign(ntasks, 0);
offsets_recv.assign(ntasks, 0);
MPI_Allgather(&grid_from.local_1_start_, 1, MPI_LONG_LONG, &offsets_send[0], 1,
MPI_LONG_LONG, MPI_COMM_WORLD);
MPI_Allgather(&grid_to.local_1_start_, 1, MPI_LONG_LONG, &offsets_recv[0], 1,
MPI_LONG_LONG, MPI_COMM_WORLD);
MPI_Datatype datatype =
(typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX
: (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX
: (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX
: MPI_BYTE;
const size_t slicesz = grid_from.size(1) * grid_from.size(3);
// determine effective Nyquist modes representable by both fields and their locations in array
size_t fny0_left = std::min(grid_from.n_[1] / 2, grid_to.n_[1] / 2);
size_t fny0_right = std::max(grid_from.n_[1] - grid_to.n_[1] / 2, grid_from.n_[1] / 2);
size_t fny1_left = std::min(grid_from.n_[0] / 2, grid_to.n_[0] / 2);
size_t fny1_right = std::max(grid_from.n_[0] - grid_to.n_[0] / 2, grid_from.n_[0] / 2);
size_t fny2_left = std::min(grid_from.n_[2] / 2, grid_to.n_[2] / 2);
// size_t fny2_right = std::max(grid_from.n_[2] - grid_to.n_[2] / 2, grid_from.n_[2] / 2);
const size_t fny0_left_recv = fny0_left;
const size_t fny0_right_recv = (fny0_right + grid_to.n_[1]) - grid_from.n_[1];
const size_t fny1_left_recv = fny1_left;
const size_t fny1_right_recv = (fny1_right + grid_to.n_[0]) - grid_from.n_[0];
const size_t fny2_left_recv = fny2_left;
// const size_t fny2_right_recv = (fny2_right + grid_to.n_[2]) - grid_from.n_[2];
//--- send data from buffer ---------------------------------------------------
std::vector<MPI_Request> req;
MPI_Request temp_req;
for (size_t i = 0; i < grid_from.size(0); ++i)
{
size_t iglobal_send = i + offsets_send[CONFIG::MPI_task_rank];
if (iglobal_send < fny0_left)
{
size_t iglobal_recv = iglobal_send;
int sendto = get_task(iglobal_recv, offsets_recv, CONFIG::MPI_task_size);
MPI_Isend(&grid_from.kelem(i * slicesz), (int)slicesz, datatype, sendto,
(int)iglobal_recv, MPI_COMM_WORLD, &temp_req);
req.push_back(temp_req);
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal_send << " to task " << sendto << ", size = " << slicesz << std::endl;
}
if (iglobal_send > fny0_right)
{
size_t iglobal_recv = (iglobal_send + grid_to.n_[1]) - grid_from.n_[1];
int sendto = get_task(iglobal_recv, offsets_recv, CONFIG::MPI_task_size);
MPI_Isend(&grid_from.kelem(i * slicesz), (int)slicesz, datatype, sendto,
(int)iglobal_recv, MPI_COMM_WORLD, &temp_req);
req.push_back(temp_req);
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal_send << " to task " << sendto << ", size = " << slicesz<< std::endl;
}
}
//--- receive data ------------------------------------------------------------
#pragma omp parallel
{
MPI_Status status;
ccomplex_t * recvbuf = new ccomplex_t[ slicesz ];
#pragma omp for
for( size_t i=0; i<grid_to.size(0); ++i )
{
size_t iglobal_recv = i + offsets_recv[CONFIG::MPI_task_rank];
if (iglobal_recv < fny0_left_recv || iglobal_recv > fny0_right_recv)
{
size_t iglobal_send = (iglobal_recv < fny0_left_recv)? iglobal_recv : (iglobal_recv + grid_from.n_[1]) - grid_to.n_[1];
int recvfrom = get_task(iglobal_send, offsets_send, CONFIG::MPI_task_size);
// receive data slice and check for MPI errors when in debug mode
status.MPI_ERROR = MPI_SUCCESS;
MPI_Recv(&recvbuf[0], (int)slicesz, datatype, recvfrom, (int)iglobal_recv, MPI_COMM_WORLD, &status);
assert(status.MPI_ERROR == MPI_SUCCESS);
// copy data slice into new grid, avoiding modes that do not exist in both
for( size_t j=0; j<grid_to.size(1); ++j )
{
if( j < fny1_left_recv || j > fny1_right_recv )
{
const size_t jsend = (j < fny1_left_recv)? j : (j + grid_from.n_[0]) - grid_to.n_[0];
for( size_t k=0; k<fny2_left_recv; ++k )
{
grid_to.kelem(i,j,k) = recvbuf[jsend * grid_from.sizes_[3] + k];
}
}
}
}
}
delete[] recvbuf;
}
MPI_Status status;
for (size_t i = 0; i < req.size(); ++i)
{
// need to set status as wait does not necessarily modify it
// c.f. http://www.open-mpi.org/community/lists/devel/2007/04/1402.php
status.MPI_ERROR = MPI_SUCCESS;
// std::cout << "task " << CONFIG::MPI_task_rank << " : checking request No" << i << std::endl;
MPI_Wait(&req[i], &status);
// std::cout << "---> ok!" << std::endl;
assert(status.MPI_ERROR == MPI_SUCCESS);
}
MPI_Barrier(MPI_COMM_WORLD);
music::dlog.Print("[MPI] Completed scatter for Fourier interpolation/copy, took %fs\n",
get_wtime() - tstart);
#endif //defined(USE_MPI)
}
#define H5_USE_16_API
#include <hdf5.h>