mirror of
https://github.com/cosmo-sims/monofonIC.git
synced 2024-09-19 17:03:45 +02:00
MPI fixes (mostly 1 bug for real->complex conv.)
This commit is contained in:
parent
105383684e
commit
55b109da47
5 changed files with 171 additions and 156 deletions
|
@ -11,7 +11,7 @@ class OrszagConvolver
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
Grid_FFT<data_t> *f1p_, *f2p_;
|
Grid_FFT<data_t> *f1p_, *f2p_;
|
||||||
Grid_FFT<data_t> *fbuf_;
|
Grid_FFT<data_t> *fbuf_, *fbuf2_;
|
||||||
|
|
||||||
std::array<size_t,3> np_;
|
std::array<size_t,3> np_;
|
||||||
std::array<real_t,3> length_;
|
std::array<real_t,3> length_;
|
||||||
|
@ -56,6 +56,7 @@ public:
|
||||||
f1p_ = new Grid_FFT<data_t>(np_, length_, kspace_id);
|
f1p_ = new Grid_FFT<data_t>(np_, length_, kspace_id);
|
||||||
f2p_ = new Grid_FFT<data_t>(np_, length_, kspace_id);
|
f2p_ = new Grid_FFT<data_t>(np_, length_, kspace_id);
|
||||||
fbuf_ = new Grid_FFT<data_t>(N, length_, kspace_id); // needed for MPI, or for triple conv.
|
fbuf_ = new Grid_FFT<data_t>(N, length_, kspace_id); // needed for MPI, or for triple conv.
|
||||||
|
fbuf2_ = new Grid_FFT<data_t>(N, length_, kspace_id); // needed for MPI, or for triple conv.
|
||||||
|
|
||||||
#if defined(USE_MPI)
|
#if defined(USE_MPI)
|
||||||
maxslicesz_ = f1p_->sizes_[1] * f1p_->sizes_[3] * 2;
|
maxslicesz_ = f1p_->sizes_[1] * f1p_->sizes_[3] * 2;
|
||||||
|
@ -88,6 +89,7 @@ public:
|
||||||
delete f1p_;
|
delete f1p_;
|
||||||
delete f2p_;
|
delete f2p_;
|
||||||
delete fbuf_;
|
delete fbuf_;
|
||||||
|
delete fbuf2_;
|
||||||
#if defined(USE_MPI)
|
#if defined(USE_MPI)
|
||||||
delete[] crecvbuf_;
|
delete[] crecvbuf_;
|
||||||
#endif
|
#endif
|
||||||
|
@ -122,15 +124,15 @@ public:
|
||||||
inr.FourierTransformForward();
|
inr.FourierTransformForward();
|
||||||
// perform convolution of Hessians
|
// perform convolution of Hessians
|
||||||
this->convolve3(
|
this->convolve3(
|
||||||
[&]( size_t i, size_t j, size_t k ) -> ccomplex_t{
|
[&inl,&d2l]( size_t i, size_t j, size_t k ) -> ccomplex_t{
|
||||||
auto kk = inl.template get_k<real_t>(i,j,k);
|
auto kk = inl.template get_k<real_t>(i,j,k);
|
||||||
return -kk[d2l[0]] * kk[d2l[1]] * inl.kelem(i,j,k);
|
return -kk[d2l[0]] * kk[d2l[1]] * inl.kelem(i,j,k);
|
||||||
},
|
},
|
||||||
[&]( size_t i, size_t j, size_t k ) -> ccomplex_t{
|
[&inm,&d2m]( size_t i, size_t j, size_t k ) -> ccomplex_t{
|
||||||
auto kk = inl.template get_k<real_t>(i,j,k);
|
auto kk = inm.template get_k<real_t>(i,j,k);
|
||||||
return -kk[d2m[0]] * kk[d2m[1]] * inm.kelem(i,j,k);
|
return -kk[d2m[0]] * kk[d2m[1]] * inm.kelem(i,j,k);
|
||||||
},
|
},
|
||||||
[&]( size_t i, size_t j, size_t k ){
|
[&inr,&d2r]( size_t i, size_t j, size_t k ) -> ccomplex_t{
|
||||||
auto kk = inr.template get_k<real_t>(i,j,k);
|
auto kk = inr.template get_k<real_t>(i,j,k);
|
||||||
return -kk[d2r[0]] * kk[d2r[1]] * inr.kelem(i,j,k);
|
return -kk[d2r[0]] * kk[d2r[1]] * inr.kelem(i,j,k);
|
||||||
}, res, op );
|
}, res, op );
|
||||||
|
@ -144,11 +146,11 @@ public:
|
||||||
inr.FourierTransformForward();
|
inr.FourierTransformForward();
|
||||||
// perform convolution of Hessians
|
// perform convolution of Hessians
|
||||||
this->convolve2(
|
this->convolve2(
|
||||||
[&]( size_t i, size_t j, size_t k ) -> ccomplex_t{
|
[&inl,&d2l]( size_t i, size_t j, size_t k ) -> ccomplex_t{
|
||||||
auto kk = inl.template get_k<real_t>(i,j,k);
|
auto kk = inl.template get_k<real_t>(i,j,k);
|
||||||
return -kk[d2l[0]] * kk[d2l[1]] * inl.kelem(i,j,k);
|
return -kk[d2l[0]] * kk[d2l[1]] * inl.kelem(i,j,k);
|
||||||
},
|
},
|
||||||
[&]( size_t i, size_t j, size_t k ){
|
[&inr,&d2r1,&d2r2]( size_t i, size_t j, size_t k ) -> ccomplex_t{
|
||||||
auto kk = inr.template get_k<real_t>(i,j,k);
|
auto kk = inr.template get_k<real_t>(i,j,k);
|
||||||
return (-kk[d2r1[0]] * kk[d2r1[1]] -kk[d2r2[0]] * kk[d2r2[1]]) * inr.kelem(i,j,k);
|
return (-kk[d2r1[0]] * kk[d2r1[1]] -kk[d2r2[0]] * kk[d2r2[1]]) * inr.kelem(i,j,k);
|
||||||
}, res, op );
|
}, res, op );
|
||||||
|
@ -182,10 +184,14 @@ public:
|
||||||
template< typename kfunc1, typename kfunc2, typename kfunc3, typename opp >
|
template< typename kfunc1, typename kfunc2, typename kfunc3, typename opp >
|
||||||
void convolve3( kfunc1 kf1, kfunc2 kf2, kfunc3 kf3, Grid_FFT<data_t> & res, opp op )
|
void convolve3( kfunc1 kf1, kfunc2 kf2, kfunc3 kf3, Grid_FFT<data_t> & res, opp op )
|
||||||
{
|
{
|
||||||
convolve2( kf1, kf2, *fbuf_, []( ccomplex_t res, ccomplex_t ){ return res; } );
|
#warning double check if fbuf_ can be used here, or fbuf2, in case remove fbuf2
|
||||||
|
fbuf_->FourierTransformForward(false);
|
||||||
|
// convolve kf1 and kf2, store result in fbuf_
|
||||||
|
convolve2( kf1, kf2, *fbuf_, []( ccomplex_t r, ccomplex_t )->ccomplex_t{ return r; } );
|
||||||
//... prepare data 1
|
//... prepare data 1
|
||||||
f1p_->FourierTransformForward(false);
|
f1p_->FourierTransformForward(false);
|
||||||
this->pad_insert( [&]( size_t i, size_t j, size_t k ){return fbuf_->kelem(i,j,k);}, *f1p_ );
|
// pad result from fbuf_ to f1p_, fbuf_ is now unused
|
||||||
|
this->pad_insert( [&]( size_t i, size_t j, size_t k )->ccomplex_t{return fbuf_->kelem(i,j,k);}, *f1p_ );
|
||||||
|
|
||||||
//... prepare data 2
|
//... prepare data 2
|
||||||
f2p_->FourierTransformForward(false);
|
f2p_->FourierTransformForward(false);
|
||||||
|
@ -210,21 +216,13 @@ public:
|
||||||
{
|
{
|
||||||
//... prepare data 1
|
//... prepare data 1
|
||||||
f1p_->FourierTransformForward(false);
|
f1p_->FourierTransformForward(false);
|
||||||
this->pad_insert( [&]( size_t i, size_t j, size_t k ){return in.kelem(i,j,k);}, *f1p_ );
|
this->pad_insert( [&in]( size_t i, size_t j, size_t k ){return in.kelem(i,j,k);}, *f1p_ );
|
||||||
f1p_->FourierTransformBackward();
|
f1p_->FourierTransformBackward();
|
||||||
f1p_->FourierTransformForward();
|
f1p_->FourierTransformForward();
|
||||||
res.FourierTransformForward();
|
res.FourierTransformForward();
|
||||||
unpad(*f1p_, res, op);
|
unpad(*f1p_, res, op);
|
||||||
}
|
}
|
||||||
|
|
||||||
//... inplace interface
|
|
||||||
/*void convolve3( const Grid_FFT<data_t> & f1, const Grid_FFT<data_t> & f2, const Grid_FFT<data_t> & f3, Grid_FFT<data_t> & res )
|
|
||||||
{
|
|
||||||
convolve2( f1, f2, res );
|
|
||||||
convolve2( res, f3, res );
|
|
||||||
}*/
|
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <typename kdep_functor>
|
template <typename kdep_functor>
|
||||||
void pad_insert( kdep_functor kfunc, Grid_FFT<data_t> &fp ){
|
void pad_insert( kdep_functor kfunc, Grid_FFT<data_t> &fp ){
|
||||||
|
@ -302,7 +300,7 @@ private:
|
||||||
{
|
{
|
||||||
size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
|
size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
|
||||||
|
|
||||||
if (iglobal < nf[0]/2 )//fny[0])
|
if (iglobal <= nf[0]/2 )//fny[0])
|
||||||
{
|
{
|
||||||
int sendto = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
int sendto = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||||
MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||||
|
@ -310,7 +308,7 @@ private:
|
||||||
req.push_back(temp_req);
|
req.push_back(temp_req);
|
||||||
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal << " to task " << sendto << ", size = " << slicesz << std::endl;
|
// std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal << " to task " << sendto << ", size = " << slicesz << std::endl;
|
||||||
}
|
}
|
||||||
if (iglobal > nf[0]/2) //fny[0])
|
if (iglobal >= nf[0]/2) //fny[0])
|
||||||
{
|
{
|
||||||
int sendto = get_task(iglobal + nf[0]/2, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
int sendto = get_task(iglobal + nf[0]/2, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||||
MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
MPI_Isend(&fbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||||
|
@ -324,10 +322,10 @@ private:
|
||||||
{
|
{
|
||||||
size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
|
size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
|
||||||
|
|
||||||
if (iglobal < nf[0]/2 || iglobal > nf[0])
|
if (iglobal <= nf[0]/2 || iglobal >= nf[0])
|
||||||
{
|
{
|
||||||
int recvfrom = 0;
|
int recvfrom = 0;
|
||||||
if (iglobal < nf[0]/2)
|
if (iglobal <= nf[0]/2)
|
||||||
recvfrom = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
|
recvfrom = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
|
||||||
else
|
else
|
||||||
recvfrom = get_task(iglobal - nf[0]/2, offsets_, sizes_, CONFIG::MPI_task_size);
|
recvfrom = get_task(iglobal - nf[0]/2, offsets_, sizes_, CONFIG::MPI_task_size);
|
||||||
|
@ -343,32 +341,41 @@ private:
|
||||||
|
|
||||||
for (size_t j = 0; j < nf[1]; ++j)
|
for (size_t j = 0; j < nf[1]; ++j)
|
||||||
{
|
{
|
||||||
if (j < nf[1]/2)
|
if (j <= nf[1]/2)
|
||||||
{
|
{
|
||||||
size_t jp = j;
|
size_t jp = j;
|
||||||
for (size_t k = 0; k < nf[2]; ++k)
|
for (size_t k = 0; k < nf[2]; ++k)
|
||||||
{
|
{
|
||||||
if (k < nf[2]/2)
|
if( typeid(data_t)==typeid(real_t) ){
|
||||||
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||||
else if (k > nf[2]/2)
|
}else{
|
||||||
|
if (k <= nf[2]/2)
|
||||||
|
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||||
|
if (k >= nf[2]/2)
|
||||||
fp.kelem(i, jp, k + nf[2]/2) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
fp.kelem(i, jp, k + nf[2]/2) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (j > nf[1]/2)
|
if (j >= nf[1]/2)
|
||||||
{
|
{
|
||||||
size_t jp = j + nf[1]/2;
|
size_t jp = j + nf[1]/2;
|
||||||
for (size_t k = 0; k < nf[2]; ++k)
|
for (size_t k = 0; k < nf[2]; ++k)
|
||||||
{
|
{
|
||||||
if (k < nf[2]/2)
|
if( typeid(data_t)==typeid(real_t) ){
|
||||||
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||||
else if (k > nf[2]/2)
|
}else{
|
||||||
|
if (k <= nf[2]/2)
|
||||||
|
fp.kelem(i, jp, k) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||||
|
if (k >= nf[2]/2)
|
||||||
fp.kelem(i, jp, k + nf[2]/2) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
fp.kelem(i, jp, k + nf[2]/2) = crecvbuf_[j * fbuf_->sizes_[3] + k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < req.size(); ++i)
|
for (size_t i = 0; i < req.size(); ++i)
|
||||||
{
|
{
|
||||||
|
@ -455,7 +462,7 @@ private:
|
||||||
size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
|
size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
|
||||||
|
|
||||||
//... sending
|
//... sending
|
||||||
if (iglobal < fny[0])
|
if (iglobal <= fny[0])
|
||||||
{
|
{
|
||||||
int sendto = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
|
int sendto = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
|
||||||
|
|
||||||
|
@ -463,7 +470,7 @@ private:
|
||||||
MPI_COMM_WORLD, &temp_req);
|
MPI_COMM_WORLD, &temp_req);
|
||||||
req.push_back(temp_req);
|
req.push_back(temp_req);
|
||||||
}
|
}
|
||||||
else if (iglobal > 2 * fny[0])
|
else if (iglobal >= 2 * fny[0])
|
||||||
{
|
{
|
||||||
int sendto = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size);
|
int sendto = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size);
|
||||||
MPI_Isend(&fp.kelem(i * slicesz), (int)slicesz, datatype, sendto, (int)iglobal,
|
MPI_Isend(&fp.kelem(i * slicesz), (int)slicesz, datatype, sendto, (int)iglobal,
|
||||||
|
@ -472,57 +479,135 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fbuf_->zero();
|
||||||
|
|
||||||
for (size_t i = 0; i < nf[0]; ++i)
|
for (size_t i = 0; i < nf[0]; ++i)
|
||||||
{
|
{
|
||||||
size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
|
size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
|
||||||
|
|
||||||
status.MPI_ERROR = MPI_SUCCESS;
|
|
||||||
|
|
||||||
int recvfrom = 0;
|
int recvfrom = 0;
|
||||||
if (iglobal < fny[0])
|
if (iglobal <= fny[0])
|
||||||
{
|
{
|
||||||
|
real_t wi = (iglobal == fny[0])? 0.5 : 1.0;
|
||||||
|
|
||||||
recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||||
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
|
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
|
||||||
MPI_COMM_WORLD, &status);
|
MPI_COMM_WORLD, &status);
|
||||||
}
|
|
||||||
else if (iglobal > fny[0])
|
|
||||||
{
|
|
||||||
recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
|
||||||
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom,
|
|
||||||
(int)(iglobal + fny[0]), MPI_COMM_WORLD, &status);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
continue;
|
|
||||||
|
|
||||||
assert(status.MPI_ERROR == MPI_SUCCESS);
|
|
||||||
|
|
||||||
for (size_t j = 0; j < nf[1]; ++j)
|
for (size_t j = 0; j < nf[1]; ++j)
|
||||||
{
|
{
|
||||||
|
real_t wj = (j==fny[1])? 0.5 : 1.0;
|
||||||
if (j < fny[1])
|
if (j <= fny[1])
|
||||||
{
|
{
|
||||||
size_t jp = j;
|
size_t jp = j;
|
||||||
for (size_t k = 0; k < nf[2]; ++k)
|
for (size_t k = 0; k < nf[2]; ++k)
|
||||||
{
|
{
|
||||||
if (k < fny[2])
|
if( typeid(data_t)==typeid(real_t) ){
|
||||||
f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k]/rfac,f.kelem(i, j, k));
|
real_t w = wi*wj;
|
||||||
else if (k > fny[2])
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k]/rfac;
|
||||||
f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k + fny[2]]/rfac, f.kelem(i, j, k));
|
}else{
|
||||||
|
real_t wk = (k==fny[2])? 0.5 : 1.0;
|
||||||
|
real_t w = wi*wj*wk;
|
||||||
|
if (k <= fny[2])
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k]/rfac;
|
||||||
|
if (k >= fny[2])
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k + fny[2]]/rfac;
|
||||||
|
if( w<1.0 ){
|
||||||
|
fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (j > fny[1])
|
}
|
||||||
|
}
|
||||||
|
if (j >= fny[1])
|
||||||
{
|
{
|
||||||
size_t jp = j + fny[1];
|
size_t jp = j + fny[1];
|
||||||
for (size_t k = 0; k < nf[2]; ++k)
|
for (size_t k = 0; k < nf[2]; ++k)
|
||||||
{
|
{
|
||||||
if (k < fny[2])
|
if( typeid(data_t)==typeid(real_t) ){
|
||||||
f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k]/rfac, f.kelem(i, j, k));
|
real_t w = wi*wj;
|
||||||
else if (k > fny[2])
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k]/rfac;
|
||||||
f.kelem(i, j, k) = op(crecvbuf_[jp * nfp[3] + k + fny[2]]/rfac, f.kelem(i, j, k));
|
}else{
|
||||||
|
real_t wk = (k==fny[2])? 0.5 : 1.0;
|
||||||
|
real_t w = wi*wj*wk;
|
||||||
|
if (k <= fny[2])
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k]/rfac;
|
||||||
|
if (k >= fny[2])
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k + fny[2]]/rfac;
|
||||||
|
if( w<1.0 ){
|
||||||
|
fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (iglobal >= fny[0])
|
||||||
|
{
|
||||||
|
real_t wi = (iglobal == fny[0])? 0.5 : 1.0;
|
||||||
|
|
||||||
|
recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||||
|
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom,
|
||||||
|
(int)(iglobal + fny[0]), MPI_COMM_WORLD, &status);
|
||||||
|
|
||||||
|
for (size_t j = 0; j < nf[1]; ++j)
|
||||||
|
{
|
||||||
|
real_t wj = (j==fny[1])? 0.5 : 1.0;
|
||||||
|
if (j <= fny[1])
|
||||||
|
{
|
||||||
|
size_t jp = j;
|
||||||
|
for (size_t k = 0; k < nf[2]; ++k)
|
||||||
|
{
|
||||||
|
if( typeid(data_t)==typeid(real_t) ){
|
||||||
|
real_t w = wi*wj;
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k]/rfac;
|
||||||
|
}else{
|
||||||
|
real_t wk = (k==fny[2])? 0.5 : 1.0;
|
||||||
|
real_t w = wi*wj*wk;
|
||||||
|
if (k <= fny[2])
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k]/rfac;
|
||||||
|
if (k >= fny[2])
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k + fny[2]]/rfac;
|
||||||
|
if( w<1.0 ){
|
||||||
|
fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (j >= fny[1])
|
||||||
|
{
|
||||||
|
size_t jp = j + fny[1];
|
||||||
|
for (size_t k = 0; k < nf[2]; ++k)
|
||||||
|
{
|
||||||
|
if( typeid(data_t)==typeid(real_t) ){
|
||||||
|
real_t w = wi*wj;
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k]/rfac;
|
||||||
|
}else{
|
||||||
|
real_t wk = (k==fny[2])? 0.5 : 1.0;
|
||||||
|
real_t w = wi*wj*wk;
|
||||||
|
if (k <= fny[2])
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k]/rfac;
|
||||||
|
if (k >= fny[2])
|
||||||
|
fbuf_->kelem(i, j, k) += w*crecvbuf_[jp * nfp[3] + k + fny[2]]/rfac;
|
||||||
|
if( w<1.0 ){
|
||||||
|
fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (size_t i = 0; i < fbuf_->size(0); ++i)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < fbuf_->size(1); ++j)
|
||||||
|
{
|
||||||
|
for (size_t k = 0; k < fbuf_->size(2); ++k)
|
||||||
|
{
|
||||||
|
f.kelem(i, j, k) = op(fbuf_->kelem(i, j, k), f.kelem(i, j, k));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < req.size(); ++i)
|
for (size_t i = 0; i < req.size(); ++i)
|
||||||
{
|
{
|
||||||
|
|
|
@ -133,7 +133,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ft>
|
template <typename ft>
|
||||||
vec3<ft> get_r(const size_t &i, const size_t &j, const size_t &k) const
|
vec3<ft> get_r(const size_t i, const size_t j, const size_t k) const
|
||||||
{
|
{
|
||||||
vec3<ft> rr;
|
vec3<ft> rr;
|
||||||
|
|
||||||
|
@ -149,7 +149,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void cell_pos( int ilevel, size_t i, size_t j, size_t k, double* x ) const {
|
void cell_pos( int ilevel, size_t i, size_t j, size_t k, double* x ) const {
|
||||||
|
#warning needs to be fixed for MPI
|
||||||
x[0] = double(i)/size(0);
|
x[0] = double(i)/size(0);
|
||||||
x[1] = double(j)/size(1);
|
x[1] = double(j)/size(1);
|
||||||
x[2] = double(k)/size(2);
|
x[2] = double(k)/size(2);
|
||||||
|
|
98
src/main.cc
98
src/main.cc
|
@ -22,7 +22,7 @@ int MPI_task_size = 1;
|
||||||
bool MPI_ok = false;
|
bool MPI_ok = false;
|
||||||
bool MPI_threads_ok = false;
|
bool MPI_threads_ok = false;
|
||||||
bool FFTW_threads_ok = false;
|
bool FFTW_threads_ok = false;
|
||||||
};
|
}
|
||||||
|
|
||||||
RNG_plugin *the_random_number_generator;
|
RNG_plugin *the_random_number_generator;
|
||||||
TransferFunction_plugin *the_transfer_function;
|
TransferFunction_plugin *the_transfer_function;
|
||||||
|
@ -89,7 +89,7 @@ int main( int argc, char** argv )
|
||||||
const real_t astart = 1.0/(1.0+zstart);
|
const real_t astart = 1.0/(1.0+zstart);
|
||||||
const real_t volfac(std::pow(boxlen / ngrid / 2.0 / M_PI, 1.5));
|
const real_t volfac(std::pow(boxlen / ngrid / 2.0 / M_PI, 1.5));
|
||||||
const real_t phifac = 1.0 / boxlen / boxlen; // to have potential in box units
|
const real_t phifac = 1.0 / boxlen / boxlen; // to have potential in box units
|
||||||
const real_t deriv_fac = 1.0 ;//boxlen;
|
// const real_t deriv_fac = 1.0 ;//boxlen;
|
||||||
|
|
||||||
// real_t Dplus0 = the_config.GetValue<real_t>("setup", "Dplus0");
|
// real_t Dplus0 = the_config.GetValue<real_t>("setup", "Dplus0");
|
||||||
// real_t Ddot0 = 1.0;
|
// real_t Ddot0 = 1.0;
|
||||||
|
@ -179,46 +179,19 @@ int main( int argc, char** argv )
|
||||||
|
|
||||||
auto assign_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return res; };
|
auto assign_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return res; };
|
||||||
auto add_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val+res; };
|
auto add_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val+res; };
|
||||||
|
auto add2_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val+2.0*res; };
|
||||||
auto sub_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val-res; };
|
auto sub_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val-res; };
|
||||||
|
auto sub2_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val-2.0*res; };
|
||||||
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
csoca::ilog << "Computing phi(2) term..." << std::endl;
|
csoca::ilog << "Computing phi(2) term..." << std::endl;
|
||||||
// Compute the source term for phi(2)
|
// Compute the source term for phi(2)
|
||||||
Conv.convolve_SumHessians( phi, {0,0}, phi, {1,1}, {2,2}, phi2, assign_op );
|
Conv.convolve_SumHessians( phi, {0,0}, phi, {1,1}, {2,2}, phi2, assign_op );
|
||||||
// Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi2, assign_op );
|
|
||||||
// Conv.convolve_Hessians( phi, {0,0}, phi, {2,2}, phi2, add_op );
|
|
||||||
|
|
||||||
Conv.convolve_Hessians( phi, {1,1}, phi, {2,2}, phi2, add_op );
|
Conv.convolve_Hessians( phi, {1,1}, phi, {2,2}, phi2, add_op );
|
||||||
Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi2, sub_op );
|
Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi2, sub_op );
|
||||||
Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi2, sub_op );
|
Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi2, sub_op );
|
||||||
Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi2, sub_op );
|
Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi2, sub_op );
|
||||||
|
|
||||||
#else
|
|
||||||
phi2.FourierTransformBackward();
|
|
||||||
phi_xx.FourierTransformBackward();
|
|
||||||
phi_xy.FourierTransformBackward();
|
|
||||||
phi_xz.FourierTransformBackward();
|
|
||||||
phi_yy.FourierTransformBackward();
|
|
||||||
phi_yz.FourierTransformBackward();
|
|
||||||
phi_zz.FourierTransformBackward();
|
|
||||||
for (size_t i = 0; i < phi2.size(0); ++i)
|
|
||||||
{
|
|
||||||
for (size_t j = 0; j < phi2.size(1); ++j)
|
|
||||||
{
|
|
||||||
for (size_t k = 0; k < phi2.size(2); ++k)
|
|
||||||
{
|
|
||||||
size_t idx = phi2.get_idx(i, j, k);
|
|
||||||
|
|
||||||
phi2.relem(idx) = phi_xx.relem(idx)*(phi_yy.relem(idx)+phi_zz.relem(idx))
|
|
||||||
+phi_yy.relem(idx)*phi_zz.relem(idx)
|
|
||||||
-phi_xy.relem(idx)*phi_xy.relem(idx)
|
|
||||||
-phi_xz.relem(idx)*phi_xz.relem(idx)
|
|
||||||
-phi_yz.relem(idx)*phi_yz.relem(idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
phi2.FourierTransformForward();
|
phi2.FourierTransformForward();
|
||||||
phi2.apply_function_k_dep([&](auto x, auto k) {
|
phi2.apply_function_k_dep([&](auto x, auto k) {
|
||||||
real_t kmod2 = k.norm_squared();
|
real_t kmod2 = k.norm_squared();
|
||||||
|
@ -229,10 +202,6 @@ int main( int argc, char** argv )
|
||||||
//======================================================================
|
//======================================================================
|
||||||
//... compute 3LPT displacement potential
|
//... compute 3LPT displacement potential
|
||||||
|
|
||||||
#if 1
|
|
||||||
auto sub2_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val-2.0*res; };
|
|
||||||
auto add2_op = []( ccomplex_t res, ccomplex_t val ) -> ccomplex_t{ return val+2.0*res; };
|
|
||||||
|
|
||||||
csoca::ilog << "Computing phi(3a) term..." << std::endl;
|
csoca::ilog << "Computing phi(3a) term..." << std::endl;
|
||||||
Conv.convolve_SumHessians( phi, {0,0}, phi2, {1,1}, {2,2}, phi3a, assign_op );
|
Conv.convolve_SumHessians( phi, {0,0}, phi2, {1,1}, {2,2}, phi3a, assign_op );
|
||||||
Conv.convolve_SumHessians( phi, {1,1}, phi2, {2,2}, {0,0}, phi3a, add_op );
|
Conv.convolve_SumHessians( phi, {1,1}, phi2, {2,2}, {0,0}, phi3a, add_op );
|
||||||
|
@ -245,52 +214,6 @@ int main( int argc, char** argv )
|
||||||
return 0.5 * x;
|
return 0.5 * x;
|
||||||
});
|
});
|
||||||
|
|
||||||
csoca::ilog << "Computing phi(3b) term..." << std::endl;
|
|
||||||
Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi, {2,2}, phi3b, assign_op );
|
|
||||||
Conv.convolve_Hessians( phi, {0,1}, phi, {0,2}, phi, {1,2}, phi3b, add2_op );
|
|
||||||
Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi, {0,0}, phi3b, sub_op );
|
|
||||||
Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi, {1,1}, phi3b, sub_op );
|
|
||||||
Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi, {2,2}, phi3b, sub_op );
|
|
||||||
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
|
|
||||||
phi2_xx.FourierTransformBackward();
|
|
||||||
phi2_xy.FourierTransformBackward();
|
|
||||||
phi2_xz.FourierTransformBackward();
|
|
||||||
phi2_yy.FourierTransformBackward();
|
|
||||||
phi2_yz.FourierTransformBackward();
|
|
||||||
phi2_zz.FourierTransformBackward();
|
|
||||||
|
|
||||||
for (size_t i = 0; i < phi3a.size(0); ++i)
|
|
||||||
{
|
|
||||||
for (size_t j = 0; j < phi3a.size(1); ++j)
|
|
||||||
{
|
|
||||||
for (size_t k = 0; k < phi3a.size(2); ++k)
|
|
||||||
{
|
|
||||||
size_t idx = phi3a.get_idx(i, j, k);
|
|
||||||
|
|
||||||
phi3a.relem(idx) = 0.5 * (
|
|
||||||
+ phi_xx.relem(idx) * ( phi2_yy.relem(idx) + phi2_zz.relem(idx) )
|
|
||||||
+ phi_yy.relem(idx) * ( phi2_zz.relem(idx) + phi2_xx.relem(idx) )
|
|
||||||
+ phi_zz.relem(idx) * ( phi2_xx.relem(idx) + phi2_yy.relem(idx) )
|
|
||||||
- phi_xy.relem(idx) * phi2_xy.relem(idx) * 2.0
|
|
||||||
- phi_xz.relem(idx) * phi2_xz.relem(idx) * 2.0
|
|
||||||
- phi_yz.relem(idx) * phi2_yz.relem(idx) * 2.0
|
|
||||||
);
|
|
||||||
|
|
||||||
phi3b.relem(idx) =
|
|
||||||
+ phi_xx.relem(idx)*phi_yy.relem(idx)*phi_zz.relem(idx)
|
|
||||||
+ phi_xy.relem(idx)*phi_xz.relem(idx)*phi_yz.relem(idx) * 2.0
|
|
||||||
- phi_yz.relem(idx)*phi_yz.relem(idx)*phi_xx.relem(idx)
|
|
||||||
- phi_xz.relem(idx)*phi_xz.relem(idx)*phi_yy.relem(idx)
|
|
||||||
- phi_xy.relem(idx)*phi_xy.relem(idx)*phi_zz.relem(idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
phi3a.FourierTransformForward();
|
phi3a.FourierTransformForward();
|
||||||
phi3a.apply_function_k_dep([&](auto x, auto k) {
|
phi3a.apply_function_k_dep([&](auto x, auto k) {
|
||||||
real_t kmod2 = k.norm_squared();
|
real_t kmod2 = k.norm_squared();
|
||||||
|
@ -298,10 +221,17 @@ int main( int argc, char** argv )
|
||||||
});
|
});
|
||||||
phi3a.zero_DC_mode();
|
phi3a.zero_DC_mode();
|
||||||
|
|
||||||
|
csoca::ilog << "Computing phi(3b) term..." << std::endl;
|
||||||
|
Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi, {2,2}, phi3b, assign_op );
|
||||||
|
Conv.convolve_Hessians( phi, {0,1}, phi, {0,2}, phi, {1,2}, phi3b, add2_op );
|
||||||
|
Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi, {0,0}, phi3b, sub_op );
|
||||||
|
Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi, {1,1}, phi3b, sub_op );
|
||||||
|
Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi, {2,2}, phi3b, sub_op );
|
||||||
|
|
||||||
phi3b.FourierTransformForward();
|
phi3b.FourierTransformForward();
|
||||||
phi3b.apply_function_k_dep([&](auto x, auto k) {
|
phi3b.apply_function_k_dep([&](auto x, auto k) {
|
||||||
real_t kmod2 = k.norm_squared();
|
real_t kmod2 = k.norm_squared();
|
||||||
return x * (-1.0 / kmod2) * phifac / phifac / phifac;
|
return x * (-1.0 / kmod2) * phifac / phifac / phifac/phifac;
|
||||||
});
|
});
|
||||||
phi3b.zero_DC_mode();
|
phi3b.zero_DC_mode();
|
||||||
|
|
||||||
|
@ -400,7 +330,7 @@ int main( int argc, char** argv )
|
||||||
{
|
{
|
||||||
auto kk = phi.get_k<real_t>(i,j,k);
|
auto kk = phi.get_k<real_t>(i,j,k);
|
||||||
size_t idx = phi.get_idx(i,j,k);
|
size_t idx = phi.get_idx(i,j,k);
|
||||||
auto laplace = -kk.norm_squared();
|
// auto laplace = -kk.norm_squared();
|
||||||
|
|
||||||
// scale potentials with respective order growth factors
|
// scale potentials with respective order growth factors
|
||||||
phi.kelem(idx) *= g1;
|
phi.kelem(idx) *= g1;
|
||||||
|
|
|
@ -24,11 +24,11 @@ void print_output_plugins()
|
||||||
|
|
||||||
std::map< std::string, output_plugin_creator *>::iterator it;
|
std::map< std::string, output_plugin_creator *>::iterator it;
|
||||||
it = m.begin();
|
it = m.begin();
|
||||||
std::cout << " - Available output plug-ins:\n";
|
csoca::ilog << " - Available output plug-ins:\n";
|
||||||
while( it!=m.end() )
|
while( it!=m.end() )
|
||||||
{
|
{
|
||||||
if( (*it).second )
|
if( (*it).second )
|
||||||
std::cout << "\t\'" << (*it).first << "\'\n";
|
csoca::ilog << "\t\'" << (*it).first << "\'\n";
|
||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,12 +43,12 @@ output_plugin *select_output_plugin( ConfigFile& cf )
|
||||||
|
|
||||||
if( !the_output_plugin_creator )
|
if( !the_output_plugin_creator )
|
||||||
{
|
{
|
||||||
std::cerr << " - Error: output plug-in \'" << formatname << "\' not found." << std::endl;
|
csoca::elog << " - Error: output plug-in \'" << formatname << "\' not found." << std::endl;
|
||||||
print_output_plugins();
|
print_output_plugins();
|
||||||
throw std::runtime_error("Unknown output plug-in");
|
throw std::runtime_error("Unknown output plug-in");
|
||||||
|
|
||||||
}else
|
}else
|
||||||
std::cout << " - Selecting output plug-in \'" << formatname << "\'..." << std::endl;
|
csoca::ilog << " - Selecting output plug-in \'" << formatname << "\'..." << std::endl;
|
||||||
|
|
||||||
output_plugin *the_output_plugin
|
output_plugin *the_output_plugin
|
||||||
= the_output_plugin_creator->create( cf );
|
= the_output_plugin_creator->create( cf );
|
||||||
|
|
|
@ -1018,9 +1018,9 @@ public:
|
||||||
|
|
||||||
ofs_temp.write((char *)&blksize, sizeof(size_t));
|
ofs_temp.write((char *)&blksize, sizeof(size_t));
|
||||||
|
|
||||||
int levelmaxcoarse = gh.levelmax() - 4;
|
// int levelmaxcoarse = gh.levelmax() - 4;
|
||||||
if (!spread_coarse_acrosstypes_)
|
// if (!spread_coarse_acrosstypes_)
|
||||||
levelmaxcoarse = gh.levelmax() - 1;
|
// levelmaxcoarse = gh.levelmax() - 1;
|
||||||
|
|
||||||
//for( int ilevel=levelmaxcoarse; ilevel>=(int)gh.levelmin(); --ilevel )
|
//for( int ilevel=levelmaxcoarse; ilevel>=(int)gh.levelmin(); --ilevel )
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue