diff --git a/include/convolution.hh b/include/convolution.hh index feb2513..6dfa80d 100644 --- a/include/convolution.hh +++ b/include/convolution.hh @@ -11,9 +11,8 @@ class OrszagConvolver { protected: Grid_FFT *f1p_, *f2p_; -#ifdef USE_MPI - Grid_FFT *fMPIbuf_; -#endif + Grid_FFT *fbuf_; + std::array np_; std::array length_; @@ -54,11 +53,11 @@ public: : np_({3*N[0]/2,3*N[1]/2,3*N[2]/2}), length_(L) { //... create temporaries - f1p_ = new Grid_FFT(np_, length_, kspace_id); - f2p_ = new Grid_FFT(np_, length_, kspace_id); + f1p_ = new Grid_FFT(np_, length_, kspace_id); + f2p_ = new Grid_FFT(np_, length_, kspace_id); + fbuf_ = new Grid_FFT(N, length_, kspace_id); // needed for MPI, or for triple conv. #if defined(USE_MPI) - fMPIbuf_ = new Grid_FFT(N, length_, kspace_id); maxslicesz_ = f1p_->sizes_[1] * f1p_->sizes_[3] * 2; crecvbuf_ = new ccomplex_t[maxslicesz_ / 2]; @@ -70,14 +69,10 @@ public: offsetsp_.assign(ntasks,0); sizes_.assign(ntasks,0); sizesp_.assign(ntasks,0); - // offsets_ = new ptrdiff_t[ntasks]; - // offsetsp_ = new ptrdiff_t[ntasks]; - // sizes_ = new ptrdiff_t[ntasks]; - // sizesp_ = new ptrdiff_t[ntasks]; size_t tsize = N[0], tsizep = f1p_->size(0); - MPI_Allgather(&fMPIbuf_->local_1_start_, 1, MPI_LONG_LONG, &offsets_[0], 1, + MPI_Allgather(&fbuf_->local_1_start_, 1, MPI_LONG_LONG, &offsets_[0], 1, MPI_LONG_LONG, MPI_COMM_WORLD); MPI_Allgather(&f1p_->local_1_start_, 1, MPI_LONG_LONG, &offsetsp_[0], 1, MPI_LONG_LONG, MPI_COMM_WORLD); @@ -92,13 +87,9 @@ public: { delete f1p_; delete f2p_; + delete fbuf_; #if defined(USE_MPI) - delete fMPIbuf_; delete[] crecvbuf_; - // delete[] offsets_; - // delete[] offsetsp_; - // delete[] sizes_; - // delete[] sizesp_; #endif } @@ -119,6 +110,32 @@ public: }, res, op ); } + template< typename opp > + void convolve_Hessians( Grid_FFT & inl, const std::array& d2l, + Grid_FFT & inm, const std::array& d2m, + Grid_FFT & inr, const std::array& d2r, + Grid_FFT & res, opp op ) + { + // transform to FS in case fields are not + inl.FourierTransformForward(); + inm.FourierTransformForward(); + inr.FourierTransformForward(); + // perform convolution of Hessians + this->convolve3( + [&]( size_t i, size_t j, size_t k ) -> ccomplex_t{ + auto kk = inl.template get_k(i,j,k); + return -kk[d2l[0]] * kk[d2l[1]] * inl.kelem(i,j,k); + }, + [&]( size_t i, size_t j, size_t k ) -> ccomplex_t{ + auto kk = inl.template get_k(i,j,k); + return -kk[d2m[0]] * kk[d2m[1]] * inm.kelem(i,j,k); + }, + [&]( size_t i, size_t j, size_t k ){ + auto kk = inr.template get_k(i,j,k); + return -kk[d2r[0]] * kk[d2r[1]] * inr.kelem(i,j,k); + }, res, op ); + } + template< typename opp > void convolve_SumHessians( Grid_FFT & inl, const std::array& d2l, Grid_FFT & inr, const std::array& d2r1, const std::array& d2r2, Grid_FFT & res, opp op ){ @@ -144,7 +161,7 @@ public: f1p_->FourierTransformForward(false); this->pad_insert( kf1, *f1p_ ); - //... prepare data 1 + //... prepare data 2 f2p_->FourierTransformForward(false); this->pad_insert( kf2, *f2p_ ); @@ -162,6 +179,44 @@ public: unpad(*f2p_, res, op); } + template< typename kfunc1, typename kfunc2, typename kfunc3, typename opp > + void convolve3( kfunc1 kf1, kfunc2 kf2, kfunc3 kf3, Grid_FFT & res, opp op ) + { + convolve2( kf1, kf2, *fbuf_, []( ccomplex_t res, ccomplex_t ){ return res; } ); + //... prepare data 1 + f1p_->FourierTransformForward(false); + this->pad_insert( [&]( size_t i, size_t j, size_t k ){return fbuf_->kelem(i,j,k);}, *f1p_ ); + + //... prepare data 2 + f2p_->FourierTransformForward(false); + this->pad_insert( kf3, *f2p_ ); + + //... convolve + f1p_->FourierTransformBackward(); + f2p_->FourierTransformBackward(); + + #pragma omp parallel for + for (size_t i = 0; i < f1p_->ntot_; ++i){ + (*f2p_).relem(i) *= (*f1p_).relem(i); + } + f2p_->FourierTransformForward(); + //... copy data back + res.FourierTransformForward(); + unpad(*f2p_, res, op); + } + + template< typename opp > + void test_pad_unpad( Grid_FFT & in, Grid_FFT & res, opp op ) + { + //... prepare data 1 + f1p_->FourierTransformForward(false); + this->pad_insert( [&]( size_t i, size_t j, size_t k ){return in.kelem(i,j,k);}, *f1p_ ); + f1p_->FourierTransformBackward(); + f1p_->FourierTransformForward(); + res.FourierTransformForward(); + unpad(*f1p_, res, op); + } + //... inplace interface /*void convolve3( const Grid_FFT & f1, const Grid_FFT & f2, const Grid_FFT & f3, Grid_FFT & res ) { @@ -174,20 +229,12 @@ private: template void pad_insert( kdep_functor kfunc, Grid_FFT &fp ){ assert( fp.space_ == kspace_id ); - - // size_t dn[3] = { - // fp.n_[0]/3,// fp.n_[0] - f.n_[0], - // fp.n_[1]/3,// fp.n_[1] - f.n_[1], - // fp.n_[2]/3// fp.n_[2] - f.n_[2], - // }; - const double rfac = std::pow(1.5,1.5);//std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(f.n_[0] * f.n_[1] * f.n_[2]); + const double rfac = std::pow(1.5,1.5); fp.zero(); - #if !defined(USE_MPI) //////////////////////////////////////////////////////////////////////////////////// - //size_t nhalf[3] = {f.n_[0] / 2, f.n_[1] / 2, f.n_[2] / 2}; size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3}; #pragma omp parallel for @@ -201,7 +248,6 @@ private: { size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k; // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue; - //fp.kelem(ip, jp, kp) = f.kelem(i, j, k) * rfac; fp.kelem(ip, jp, kp) = kfunc(i, j, k) * rfac; } } @@ -210,7 +256,7 @@ private: #else /// then USE_MPI is defined //////////////////////////////////////////////////////////// MPI_Barrier(MPI_COMM_WORLD); - + fbuf_->FourierTransformForward(false); ///////////////////////////////////////////////////////////////////// double tstart = get_wtime(); @@ -218,48 +264,30 @@ private: //... collect offsets - assert(fMPIbuf_->space_ == kspace_id); + assert(fbuf_->space_ == kspace_id); - size_t nf[3] = {fMPIbuf_->size(0), fMPIbuf_->size(1), fMPIbuf_->size(2)}; + size_t nf[3] = {fbuf_->size(0), fbuf_->size(1), fbuf_->size(2)}; size_t nfp[3] = {fp.size(0), fp.size(1), fp.size(2)}; - size_t fny[3] = {fMPIbuf_->n_[1] / 2, fMPIbuf_->n_[0] / 2, fMPIbuf_->n_[2] / 2}; //... local size must be divisible by 2, otherwise this gets too complicated - assert(fMPIbuf_->n_[1] % 2 == 0); - - size_t slicesz = fMPIbuf_->size(1) * fMPIbuf_->size(3); //*2; - - // comunicate - // check if this is a real field (then we get the wrong size) - // if (typeid(data_t) == typeid(real_t)) - // slicesz *= 2; // then sizeof(real_t) gives only half of a complex - - // MPI_Datatype datatype = - // (typeid(data_t) == typeid(float)) - // ? MPI_FLOAT - // : (typeid(data_t) == typeid(double)) - // ? MPI_DOUBLE - // : (typeid(data_t) == typeid(std::complex)) - // ? MPI_COMPLEX - // : (typeid(data_t) == typeid(std::complex)) - // ? MPI_DOUBLE_COMPLEX - // : MPI_INT; + assert(fbuf_->n_[1] % 2 == 0); + size_t slicesz = fbuf_->size(1) * fbuf_->size(3); MPI_Datatype datatype = (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE; - // fill MPI send buffer - fMPIbuf_->FourierTransformForward(false); + // fill MPI send buffer with results of kfunc + #pragma omp parallel for - for (size_t i = 0; i < fMPIbuf_->size(0); ++i) + for (size_t i = 0; i < fbuf_->size(0); ++i) { - for (size_t j = 0; j < fMPIbuf_->size(1); ++j) + for (size_t j = 0; j < fbuf_->size(1); ++j) { - for (size_t k = 0; k < fMPIbuf_->size(2); ++k) + for (size_t k = 0; k < fbuf_->size(2); ++k) { - fMPIbuf_->kelem(i, j, k) = kfunc(i, j, k) * rfac; + fbuf_->kelem(i, j, k) = kfunc(i, j, k) * rfac; } } } @@ -274,19 +302,19 @@ private: { size_t iglobal = i + offsets_[CONFIG::MPI_task_rank]; - if (iglobal < fny[0]) + if (iglobal < nf[0]/2 )//fny[0]) { int sendto = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size); - MPI_Isend(&fMPIbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto, + MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto, (int)iglobal, MPI_COMM_WORLD, &temp_req); req.push_back(temp_req); // std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal << " to task " << sendto << ", size = " << slicesz << std::endl; } - if (iglobal > fny[0]) + if (iglobal > nf[0]/2) //fny[0]) { - int sendto = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size); - MPI_Isend(&fMPIbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto, - (int)(iglobal + fny[0]), MPI_COMM_WORLD, &temp_req); + int sendto = get_task(iglobal + nf[0]/2, offsetsp_, sizesp_, CONFIG::MPI_task_size); + MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto, + (int)(iglobal + nf[0]/2), MPI_COMM_WORLD, &temp_req); req.push_back(temp_req); // std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal+fny[0] << " to task " << sendto << ", size = " << slicesz<< std::endl; } @@ -296,13 +324,13 @@ private: { size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank]; - if (iglobal < fny[0] || iglobal > 2 * fny[0]) + if (iglobal < nf[0]/2 || iglobal > nf[0]) { int recvfrom = 0; - if (iglobal <= fny[0]) + if (iglobal < nf[0]/2) recvfrom = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size); else - recvfrom = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size); + recvfrom = get_task(iglobal - nf[0]/2, offsets_, sizes_, CONFIG::MPI_task_size); // std::cout << "task " << CONFIG::MPI_task_rank << " : receive #" << iglobal << " from task " // << recvfrom << ", size = " << slicesz << ", " << crecvbuf_ << ", " << datatype << std::endl; @@ -311,34 +339,31 @@ private: MPI_COMM_WORLD, &status); // std::cout << "---> ok! " << (bool)(status.MPI_ERROR==MPI_SUCCESS) << std::endl; - // assert(status.MPI_ERROR == MPI_SUCCESS); + assert(status.MPI_ERROR == MPI_SUCCESS); for (size_t j = 0; j < nf[1]; ++j) { - if (j < fny[1]) + if (j < nf[1]/2) { size_t jp = j; for (size_t k = 0; k < nf[2]; ++k) { - // size_t kp = (k>fny[2])? k+fny[2] : k; - if (k < fny[2]) - fp.kelem(i, jp, k) = crecvbuf_[j * fMPIbuf_->sizes_[3] + k]; - else if (k > fny[2]) - fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * fMPIbuf_->sizes_[3] + k]; + if (k < nf[2]/2) + fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k]; + else if (k > nf[2]/2) + fp.kelem(i, jp, k + nf[2]/2) = crecvbuf_[j * fbuf_->sizes_[3] + k]; } } - else if (j > fny[1]) + else if (j > nf[1]/2) { - size_t jp = j + fny[1]; + size_t jp = j + nf[1]/2; for (size_t k = 0; k < nf[2]; ++k) { - // size_t kp = (k>fny[2])? k+fny[2] : k; - // fp.kelem(i,jp,kp) = crecvbuf_[j*f.sizes_[3]+k]; - if (k < fny[2]) - fp.kelem(i, jp, k) = crecvbuf_[j * fMPIbuf_->sizes_[3] + k]; - else if (k > fny[2]) - fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * fMPIbuf_->sizes_[3] + k]; + if (k < nf[2]/2) + fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k]; + else if (k > nf[2]/2) + fp.kelem(i, jp, k + nf[2]/2) = crecvbuf_[j * fbuf_->sizes_[3] + k]; } } } @@ -373,32 +398,24 @@ private: template void unpad(const Grid_FFT &fp, Grid_FFT &f, operator_t op ) { - // assert(fp.n_[0] == 3 * f.n_[0] / 2); - // assert(fp.n_[1] == 3 * f.n_[1] / 2); - // assert(fp.n_[2] == 3 * f.n_[2] / 2); + const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(f.n_[0] * f.n_[1] * f.n_[2]); // make sure we're in Fourier space... assert( fp.space_ == kspace_id ); f.FourierTransformForward(); #if !defined(USE_MPI) //////////////////////////////////////////////////////////////////////////////////// - size_t dn[3] = { - fp.n_[0] - f.n_[0], - fp.n_[1] - f.n_[1], - fp.n_[2] - f.n_[2], - }; size_t nhalf[3] = {f.n_[0] / 2, f.n_[1] / 2, f.n_[2] / 2}; - const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(f.n_[0] * f.n_[1] * f.n_[2]); - + for (size_t i = 0; i < f.size(0); ++i) { - size_t ip = (i > nhalf[0]) ? i + dn[0] : i; + size_t ip = (i > nhalf[0]) ? i + nhalf[0] : i; for (size_t j = 0; j < f.size(1); ++j) { - size_t jp = (j > nhalf[1]) ? j + dn[1] : j; + size_t jp = (j > nhalf[1]) ? j + nhalf[1] : j; for (size_t k = 0; k < f.size(2); ++k) { - size_t kp = (k > nhalf[2]) ? k + dn[2] : k; + size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k; // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue; f.kelem(i, j, k) = op(fp.kelem(ip, jp, kp) / rfac, f.kelem(i, j, k)); } @@ -411,7 +428,7 @@ private: double tstart = get_wtime(); - csoca::ilog << "[MPI] Started gather for convolution"; + csoca::dlog << "[MPI] Started gather for convolution"; MPI_Barrier(MPI_COMM_WORLD); @@ -421,20 +438,6 @@ private: size_t slicesz = fp.size(1) * fp.size(3); - // if (typeid(data_t) == typeid(real_t)) - // slicesz *= 2; // then sizeof(real_t) gives only half of a complex - - // MPI_Datatype datatype = - // (typeid(data_t) == typeid(float)) - // ? MPI_FLOAT - // : (typeid(data_t) == typeid(double)) - // ? MPI_DOUBLE - // : (typeid(data_t) == typeid(std::complex)) - // ? MPI_COMPLEX - // : (typeid(data_t) == typeid(std::complex)) - // ? MPI_DOUBLE_COMPLEX - // : MPI_INT; - MPI_Datatype datatype = (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE; @@ -444,8 +447,6 @@ private: //... local size must be divisible by 2, otherwise this gets too complicated // assert( tsize%2 == 0 ); - f.zero(); - std::vector req; MPI_Request temp_req; @@ -503,12 +504,10 @@ private: size_t jp = j; for (size_t k = 0; k < nf[2]; ++k) { - // size_t kp = (k>fny[2])? k+fny[2] : k; - // f.kelem(i,j,k) = crecvbuf_[jp*nfp[3]+kp]; if (k < fny[2]) - f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k],f.kelem(i, j, k)); + f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k]/rfac,f.kelem(i, j, k)); else if (k > fny[2]) - f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k + fny[2]], f.kelem(i, j, k)); + f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k + fny[2]]/rfac, f.kelem(i, j, k)); } } if (j > fny[1]) @@ -516,12 +515,10 @@ private: size_t jp = j + fny[1]; for (size_t k = 0; k < nf[2]; ++k) { - // size_t kp = (k>fny[2])? k+fny[2] : k; - // f.kelem(i,j,k) = crecvbuf_[jp*nfp[3]+kp]; if (k < fny[2]) - f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k], f.kelem(i, j, k)); + f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k]/rfac, f.kelem(i, j, k)); else if (k > fny[2]) - f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k + fny[2]], f.kelem(i, j, k)); + f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k + fny[2]]/rfac, f.kelem(i, j, k)); } } } @@ -540,7 +537,7 @@ private: MPI_Barrier(MPI_COMM_WORLD); - csoca::ilog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart); + csoca::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart); #endif /// end of ifdef/ifndef USE_MPI ////////////////////////////////////////////////////////////// } diff --git a/src/grid_fft.cc b/src/grid_fft.cc index ab3a51d..0b4d621 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -42,7 +42,7 @@ void Grid_FFT::Setup(void) ntot_ = (n_[2] + 2) * n_[1] * n_[0]; - csoca::ilog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); + csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); if (typeid(data_t) == typeid(real_t)) { data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(real_t))); @@ -120,7 +120,8 @@ void Grid_FFT::Setup(void) cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD, &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); ntot_ = 2 * cmplxsz; - data_ = (data_t*)fftw_malloc(ntot_ * sizeof(real_t)); + data_ = + (data_t*)fftw_malloc(ntot_ * sizeof(real_t)); cdata_ = reinterpret_cast(data_); plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); @@ -146,7 +147,7 @@ void Grid_FFT::Setup(void) } - csoca::ilog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); + csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]); @@ -222,7 +223,7 @@ void Grid_FFT::FourierTransformForward(bool do_transform) this->ApplyNorm(); wtime = get_wtime() - wtime; - csoca::ilog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime); + csoca::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime); } sizes_[0] = local_1_size_; @@ -253,7 +254,7 @@ void Grid_FFT::FourierTransformBackward(bool do_transform) this->ApplyNorm(); wtime = get_wtime() - wtime; - csoca::ilog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime); + csoca::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime); } sizes_[0] = local_0_size_; sizes_[1] = n_[1]; @@ -386,28 +387,34 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) plist_id = H5P_DEFAULT; #endif + memspace = H5Screate_simple(3, count, NULL); + filespace = H5Dget_space(dset_id); + for (size_t i = 0; i < size(0); ++i) { #if defined(USE_MPI) offset[0] = mpi_rank * size(0) + i; #else - offset[0] = i; + offset[0] = i; #endif - for (size_t j = 0; j < size(1); ++j) + for (size_t j = 0; j < size(1); ++j){ for (size_t k = 0; k < size(2); ++k) { buf[j * size(2) + k] = std::real(relem(i, j, k)); } + } - memspace = H5Screate_simple(3, count, NULL); - filespace = H5Dget_space(dset_id); + H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL); H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf); - H5Sclose(memspace); - H5Sclose(filespace); + } + + H5Sclose(filespace); + H5Sclose(memspace); + #if defined(USE_MPI) && defined(USE_MPI_IO) H5Pclose(plist_id); #endif @@ -455,7 +462,7 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) #if defined(USE_MPI) offset[0] = mpi_rank * size(0) + i; #else - offset[0] = i; + offset[0] = i; #endif for (size_t j = 0; j < size(1); ++j) diff --git a/src/main.cc b/src/main.cc index f0f4d91..0a72241 100644 --- a/src/main.cc +++ b/src/main.cc @@ -164,6 +164,7 @@ int main( int argc, char** argv ) //====================================================================== //... compute 1LPT displacement potential .... // phi = - delta / k^2 + csoca::ilog << "Computing phi(1) term..." << std::endl; phi.FourierTransformForward(); phi.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { @@ -181,8 +182,12 @@ int main( int argc, char** argv ) auto sub_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val-res; }; #if 1 - // phi_xx * phi_yy + csoca::ilog << "Computing phi(2) term..." << std::endl; + // Compute the source term for phi(2) Conv.convolve_SumHessians( phi, {0,0}, phi, {1,1}, {2,2}, phi2, assign_op ); + // Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi2, assign_op ); + // Conv.convolve_Hessians( phi, {0,0}, phi, {2,2}, phi2, add_op ); + Conv.convolve_Hessians( phi, {1,1}, phi, {2,2}, phi2, add_op ); Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi2, sub_op ); Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi2, sub_op ); @@ -226,7 +231,9 @@ int main( int argc, char** argv ) #if 1 auto sub2_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val-2.0*res; }; + auto add2_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val+2.0*res; }; + csoca::ilog << "Computing phi(3a) term..." << std::endl; Conv.convolve_SumHessians( phi, {0,0}, phi2, {1,1}, {2,2}, phi3a, assign_op ); Conv.convolve_SumHessians( phi, {1,1}, phi2, {2,2}, {0,0}, phi3a, add_op ); Conv.convolve_SumHessians( phi, {2,2}, phi2, {0,0}, {1,1}, phi3a, add_op ); @@ -237,6 +244,14 @@ int main( int argc, char** argv ) phi3a.apply_function_k_dep([&](auto x, auto k) { return 0.5 * x; }); + + csoca::ilog << "Computing phi(3b) term..." << std::endl; + Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi, {2,2}, phi3b, assign_op ); + Conv.convolve_Hessians( phi, {0,1}, phi, {0,2}, phi, {1,2}, phi3b, add2_op ); + Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi, {0,0}, phi3b, sub_op ); + Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi, {1,1}, phi3b, sub_op ); + Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi, {2,2}, phi3b, sub_op ); + #else @@ -286,7 +301,7 @@ int main( int argc, char** argv ) phi3b.FourierTransformForward(); phi3b.apply_function_k_dep([&](auto x, auto k) { real_t kmod2 = k.norm_squared(); - return x * (-1.0 / kmod2) * phifac; + return x * (-1.0 / kmod2) * phifac / phifac / phifac; }); phi3b.zero_DC_mode(); @@ -347,6 +362,8 @@ int main( int argc, char** argv ) if( CONFIG::MPI_task_rank == 0 ) unlink(fname_hdf5.c_str()); MPI_Barrier( MPI_COMM_WORLD ); +#else + unlink(fname_hdf5.c_str()); #endif phi.Write_to_HDF5(fname_hdf5, "phi");