From edcfbbab588b0bc38877e35e09d2be353c1619e5 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Wed, 11 Nov 2020 16:26:49 +0100 Subject: [PATCH] added new fourier copy-interpolation routine --- include/grid_fft.hh | 15 ++-- src/grid_fft.cc | 204 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+), 5 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index a49e1a6..608705a 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -134,7 +134,7 @@ public: //! set all field elements to zero void zero() noexcept { -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < ntot_; ++i) data_[i] = 0.0; } @@ -155,8 +155,8 @@ public: assert(this->n_[1] == g.n_[1]); assert(this->n_[2] == g.n_[2]); -// now we can copy all the data over -#pragma omp parallel for + // now we can copy all the data over + #pragma omp parallel for for (size_t i = 0; i < ntot_; ++i) data_[i] = g.data_[i]; } @@ -357,6 +357,7 @@ public: return val; } + inline ccomplex_t gradient( const int idim, std::array ijk ) const { if( bdistributed ){ @@ -791,13 +792,17 @@ public: } } + //! perform a backwards Fourier transform void FourierTransformBackward(bool do_transform = true); + //! perform a forwards Fourier transform void FourierTransformForward(bool do_transform = true); - void ApplyNorm(void); + //! perform a copy operation between to FFT grids that might not be of the same size + void FourierInterpolateCopyTo( grid_fft_t &grid_to ); - void FillRandomReal(unsigned long int seed = 123456ul); + //! normalise field + void ApplyNorm(void); void Write_to_HDF5(std::string fname, std::string datasetname) const; diff --git a/src/grid_fft.cc b/src/grid_fft.cc index 8de61fc..a60a428 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -185,6 +185,7 @@ void Grid_FFT::ApplyNorm(void) data_[i] *= fft_norm_fac_; } +//! Perform a backwards Fourier transformation template void Grid_FFT::FourierTransformForward(bool do_transform) { @@ -217,6 +218,7 @@ void Grid_FFT::FourierTransformForward(bool do_transform) } } +//! Perform a forward Fourier transformation template void Grid_FFT::FourierTransformBackward(bool do_transform) { @@ -248,6 +250,208 @@ void Grid_FFT::FourierTransformBackward(bool do_transform) } } +//! Perform a copy to another field, not necessarily the same size, using Fourier interpolation +template +void Grid_FFT::FourierInterpolateCopyTo( grid_fft_t &grid_to ) +{ + grid_fft_t &grid_from = *this; + + //... transform both grids to Fourier space + grid_from.FourierTransformForward(true); + grid_to.FourierTransformForward(false); + + // if grids are same size, we directly copy without the need for interpolation + if( grid_from.n_[0] == grid_to.n_[0] + && grid_from.n_[1] == grid_to.n_[1] + && grid_from.n_[2] == grid_to.n_[2] ) + { + grid_to.copy_from( grid_from ); + return; + } + + // set to zero so that regions without data are zeroed + grid_to.zero(); + + // if not running MPI, then can do without sending and receiving +#if !defined(USE_MPI) + + // determine effective Nyquist modes representable by both fields and their locations in array + size_t fny0_left = std::min(grid_from.n_[0] / 2, grid_to.n_[0] / 2); + size_t fny0_right = std::max(grid_from.n_[0] - grid_to.n_[0] / 2, grid_from.n_[0] / 2); + size_t fny1_left = std::min(grid_from.n_[1] / 2, grid_to.n_[1] / 2); + size_t fny1_right = std::max(grid_from.n_[1] - grid_to.n_[1] / 2, grid_from.n_[1] / 2); + size_t fny2_left = std::min(grid_from.n_[2] / 2, grid_to.n_[2] / 2); + // size_t fny2_right = std::max(grid_from.n_[2] - grid_to.n_[2] / 2, grid_from.n_[2] / 2); + + const size_t fny0_left_recv = fny0_left; + const size_t fny0_right_recv = (fny0_right + grid_to.n_[0]) - grid_from.n_[0]; + const size_t fny1_left_recv = fny1_left; + const size_t fny1_right_recv = (fny1_right + grid_to.n_[1]) - grid_from.n_[1]; + const size_t fny2_left_recv = fny2_left; + // const size_t fny2_right_recv = (fny2_right + grid_to.n_[2]) - grid_from.n_[2]; + + #pragma omp parallel for + for( size_t i=0; i fny0_right_recv) + { + size_t isend = (i < fny0_left_recv)? i : (i + grid_from.n_[0]) - grid_to.n_[0]; + + // copy data slice into new grid, avoiding modes that do not exist in both + for( size_t j=0; j fny1_right_recv ) + { + const size_t jsend = (j < fny1_left_recv)? j : (j + grid_from.n_[1]) - grid_to.n_[1]; + + for( size_t k=0; k offsets_send, offsets_recv; + + // this should use bisection, not linear search + auto get_task = [](ptrdiff_t index, const std::vector &offsets, const int ntasks) -> int + { + int itask = 0; + while (itask < ntasks - 1 && offsets[itask + 1] <= index) + ++itask; + return itask; + }; + + int ntasks(MPI::get_size()); + + offsets_send.assign(ntasks, 0); + offsets_recv.assign(ntasks, 0); + + MPI_Allgather(&grid_from.local_1_start_, 1, MPI_LONG_LONG, &offsets_send[0], 1, + MPI_LONG_LONG, MPI_COMM_WORLD); + MPI_Allgather(&grid_to.local_1_start_, 1, MPI_LONG_LONG, &offsets_recv[0], 1, + MPI_LONG_LONG, MPI_COMM_WORLD); + + MPI_Datatype datatype = + (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX + : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX + : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX + : MPI_BYTE; + + const size_t slicesz = grid_from.size(1) * grid_from.size(3); + + // determine effective Nyquist modes representable by both fields and their locations in array + size_t fny0_left = std::min(grid_from.n_[1] / 2, grid_to.n_[1] / 2); + size_t fny0_right = std::max(grid_from.n_[1] - grid_to.n_[1] / 2, grid_from.n_[1] / 2); + size_t fny1_left = std::min(grid_from.n_[0] / 2, grid_to.n_[0] / 2); + size_t fny1_right = std::max(grid_from.n_[0] - grid_to.n_[0] / 2, grid_from.n_[0] / 2); + size_t fny2_left = std::min(grid_from.n_[2] / 2, grid_to.n_[2] / 2); + // size_t fny2_right = std::max(grid_from.n_[2] - grid_to.n_[2] / 2, grid_from.n_[2] / 2); + + const size_t fny0_left_recv = fny0_left; + const size_t fny0_right_recv = (fny0_right + grid_to.n_[1]) - grid_from.n_[1]; + const size_t fny1_left_recv = fny1_left; + const size_t fny1_right_recv = (fny1_right + grid_to.n_[0]) - grid_from.n_[0]; + const size_t fny2_left_recv = fny2_left; + // const size_t fny2_right_recv = (fny2_right + grid_to.n_[2]) - grid_from.n_[2]; + + //--- send data from buffer --------------------------------------------------- + + std::vector req; + MPI_Request temp_req; + + for (size_t i = 0; i < grid_from.size(0); ++i) + { + size_t iglobal_send = i + offsets_send[CONFIG::MPI_task_rank]; + + if (iglobal_send < fny0_left) + { + size_t iglobal_recv = iglobal_send; + int sendto = get_task(iglobal_recv, offsets_recv, CONFIG::MPI_task_size); + MPI_Isend(&grid_from.kelem(i * slicesz), (int)slicesz, datatype, sendto, + (int)iglobal_recv, MPI_COMM_WORLD, &temp_req); + req.push_back(temp_req); + // std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal_send << " to task " << sendto << ", size = " << slicesz << std::endl; + } + if (iglobal_send > fny0_right) + { + size_t iglobal_recv = (iglobal_send + grid_to.n_[1]) - grid_from.n_[1]; + int sendto = get_task(iglobal_recv, offsets_recv, CONFIG::MPI_task_size); + MPI_Isend(&grid_from.kelem(i * slicesz), (int)slicesz, datatype, sendto, + (int)iglobal_recv, MPI_COMM_WORLD, &temp_req); + req.push_back(temp_req); + // std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal_send << " to task " << sendto << ", size = " << slicesz<< std::endl; + } + } + + //--- receive data ------------------------------------------------------------ + #pragma omp parallel + { + MPI_Status status; + ccomplex_t * recvbuf = new ccomplex_t[ slicesz ]; + + #pragma omp for + for( size_t i=0; i fny0_right_recv) + { + size_t iglobal_send = (iglobal_recv < fny0_left_recv)? iglobal_recv : (iglobal_recv + grid_from.n_[1]) - grid_to.n_[1]; + + int recvfrom = get_task(iglobal_send, offsets_send, CONFIG::MPI_task_size); + + // receive data slice and check for MPI errors when in debug mode + status.MPI_ERROR = MPI_SUCCESS; + MPI_Recv(&recvbuf[0], (int)slicesz, datatype, recvfrom, (int)iglobal_recv, MPI_COMM_WORLD, &status); + assert(status.MPI_ERROR == MPI_SUCCESS); + + // copy data slice into new grid, avoiding modes that do not exist in both + for( size_t j=0; j fny1_right_recv ) + { + const size_t jsend = (j < fny1_left_recv)? j : (j + grid_from.n_[0]) - grid_to.n_[0]; + + for( size_t k=0; k ok!" << std::endl; + assert(status.MPI_ERROR == MPI_SUCCESS); + } + + MPI_Barrier(MPI_COMM_WORLD); + + music::dlog.Print("[MPI] Completed scatter for Fourier interpolation/copy, took %fs\n", + get_wtime() - tstart); +#endif //defined(USE_MPI) +} + + #define H5_USE_16_API #include