mirror of
https://github.com/cosmo-sims/monofonIC.git
synced 2024-09-19 17:03:45 +02:00
clean up, working commit
This commit is contained in:
parent
3e488acef6
commit
c8a30ffb18
4 changed files with 106 additions and 126 deletions
|
@ -1,6 +1,6 @@
|
|||
[setup]
|
||||
LPTorder = 2
|
||||
GridRes = 256
|
||||
GridRes = 128
|
||||
BoxLength = 300
|
||||
zstart = 1.0
|
||||
H0 = 70.0
|
||||
|
|
|
@ -19,19 +19,31 @@ protected:
|
|||
|
||||
ccomplex_t *crecvbuf_;
|
||||
real_t *recvbuf_;
|
||||
ptrdiff_t *offsets_;
|
||||
ptrdiff_t *offsetsp_;
|
||||
ptrdiff_t *sizes_;
|
||||
ptrdiff_t *sizesp_;
|
||||
size_t maxslicesz_;
|
||||
std::vector<ptrdiff_t> offsets_, offsetsp_;
|
||||
std::vector<size_t> sizes_, sizesp_;
|
||||
|
||||
// ptrdiff_t *offsets_;
|
||||
// ptrdiff_t *offsetsp_;
|
||||
// ptrdiff_t *sizes_;
|
||||
// ptrdiff_t *sizesp_;
|
||||
|
||||
private:
|
||||
int get_task( ptrdiff_t index, const ptrdiff_t *offsets, const ptrdiff_t *sizes, const int ntasks ) const
|
||||
// int get_task( ptrdiff_t index, const ptrdiff_t *offsets, const ptrdiff_t *sizes, const int ntasks ) const
|
||||
// {
|
||||
// int itask = 0;
|
||||
// while( itask < ntasks-1 && offsets[itask+1] <= index ) ++itask;
|
||||
// return itask;
|
||||
// }
|
||||
|
||||
// get task based on offsets
|
||||
|
||||
int get_task(ptrdiff_t index, const std::vector<ptrdiff_t>& offsets, const std::vector<size_t>& sizes, const int ntasks )
|
||||
{
|
||||
int itask = 0;
|
||||
while( itask < ntasks-1 && offsets[itask+1] <= index ) ++itask;
|
||||
while (itask < ntasks - 1 && offsets[itask + 1] <= index) ++itask;
|
||||
return itask;
|
||||
}
|
||||
|
||||
// void pad_insert( const Grid_FFT<data_t> & f, Grid_FFT<data_t> & fp );
|
||||
// void unpad( const Grid_FFT<data_t> & fp, Grid_FFT< data_t > & f );
|
||||
|
||||
|
@ -46,22 +58,26 @@ public:
|
|||
f2p_ = new Grid_FFT<data_t>(np_, length_, kspace_id);
|
||||
|
||||
#if defined(USE_MPI)
|
||||
fMPIbuf_ = Grid_FFT<data_t>(N, length_, kspace_id);
|
||||
size_t maxslicesz = f1p_->sizes_[1] * f1p_->sizes_[3] * 2;
|
||||
fMPIbuf_ = new Grid_FFT<data_t>(N, length_, kspace_id);
|
||||
maxslicesz_ = f1p_->sizes_[1] * f1p_->sizes_[3] * 2;
|
||||
|
||||
crecvbuf_ = new ccomplex_t[maxslicesz / 2];
|
||||
crecvbuf_ = new ccomplex_t[maxslicesz_ / 2];
|
||||
recvbuf_ = reinterpret_cast<real_t *>(&crecvbuf_[0]);
|
||||
|
||||
int ntasks(MPI_Get_size());
|
||||
|
||||
offsets_ = new ptrdiff_t[ntasks];
|
||||
offsetsp_ = new ptrdiff_t[ntasks];
|
||||
sizes_ = new ptrdiff_t[ntasks];
|
||||
sizesp_ = new ptrdiff_t[ntasks];
|
||||
offsets_.assign(ntasks,0);
|
||||
offsetsp_.assign(ntasks,0);
|
||||
sizes_.assign(ntasks,0);
|
||||
sizesp_.assign(ntasks,0);
|
||||
// offsets_ = new ptrdiff_t[ntasks];
|
||||
// offsetsp_ = new ptrdiff_t[ntasks];
|
||||
// sizes_ = new ptrdiff_t[ntasks];
|
||||
// sizesp_ = new ptrdiff_t[ntasks];
|
||||
|
||||
size_t tsize = N[0], tsizep = f1p_->size(0);
|
||||
|
||||
MPI_Allgather(&f.local_1_start_, 1, MPI_LONG_LONG, &offsets_[0], 1,
|
||||
MPI_Allgather(&fMPIbuf_->local_1_start_, 1, MPI_LONG_LONG, &offsets_[0], 1,
|
||||
MPI_LONG_LONG, MPI_COMM_WORLD);
|
||||
MPI_Allgather(&f1p_->local_1_start_, 1, MPI_LONG_LONG, &offsetsp_[0], 1,
|
||||
MPI_LONG_LONG, MPI_COMM_WORLD);
|
||||
|
@ -79,10 +95,10 @@ public:
|
|||
#if defined(USE_MPI)
|
||||
delete fMPIbuf_;
|
||||
delete[] crecvbuf_;
|
||||
delete[] offsets_;
|
||||
delete[] offsetsp_;
|
||||
delete[] sizes_;
|
||||
delete[] sizesp_;
|
||||
// delete[] offsets_;
|
||||
// delete[] offsetsp_;
|
||||
// delete[] sizes_;
|
||||
// delete[] sizesp_;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -160,30 +176,30 @@ private:
|
|||
assert( fp.space_ == kspace_id );
|
||||
|
||||
|
||||
size_t dn[3] = {
|
||||
fp.n_[0]/3,// fp.n_[0] - f.n_[0],
|
||||
fp.n_[1]/3,// fp.n_[1] - f.n_[1],
|
||||
fp.n_[2]/3// fp.n_[2] - f.n_[2],
|
||||
};
|
||||
// size_t dn[3] = {
|
||||
// fp.n_[0]/3,// fp.n_[0] - f.n_[0],
|
||||
// fp.n_[1]/3,// fp.n_[1] - f.n_[1],
|
||||
// fp.n_[2]/3// fp.n_[2] - f.n_[2],
|
||||
// };
|
||||
const double rfac = std::pow(1.5,1.5);//std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(f.n_[0] * f.n_[1] * f.n_[2]);
|
||||
|
||||
fp.zero();
|
||||
|
||||
|
||||
#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//size_t nhalf[3] = {f.n_[0] / 2, f.n_[1] / 2, f.n_[2] / 2};
|
||||
size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
|
||||
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < 2*fp.size(0)/3; ++i)
|
||||
{
|
||||
size_t ip = (i > nhalf[0]) ? i + dn[0] : i;
|
||||
size_t ip = (i > nhalf[0]) ? i + nhalf[0] : i;
|
||||
for (size_t j = 0; j < 2*fp.size(1)/3; ++j)
|
||||
{
|
||||
size_t jp = (j > nhalf[1]) ? j + dn[1] : j;
|
||||
size_t jp = (j > nhalf[1]) ? j + nhalf[1] : j;
|
||||
for (size_t k = 0; k < 2*fp.size(2)/3; ++k)
|
||||
{
|
||||
size_t kp = (k > nhalf[2]) ? k + dn[2] : k;
|
||||
size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k;
|
||||
// if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
|
||||
//fp.kelem(ip, jp, kp) = f.kelem(i, j, k) * rfac;
|
||||
fp.kelem(ip, jp, kp) = kfunc(i, j, k) * rfac;
|
||||
|
@ -193,49 +209,25 @@ private:
|
|||
|
||||
#else /// then USE_MPI is defined ////////////////////////////////////////////////////////////
|
||||
|
||||
throw std::runtime_error("need to implement buffering before sending for MPI");
|
||||
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
size_t maxslicesz = fp.sizes_[1] * fp.sizes_[3] * 2;
|
||||
|
||||
std::vector<ccomplex_t> crecvbuf_(maxslicesz / 2, 0);
|
||||
real_t *recvbuf_ = reinterpret_cast<real_t *>(&crecvbuf_[0]);
|
||||
|
||||
std::vector<ptrdiff_t>
|
||||
offsets_(CONFIG::MPI_task_size, 0),
|
||||
offsetsp_(CONFIG::MPI_task_size, 0),
|
||||
sizes_(CONFIG::MPI_task_size, 0),
|
||||
sizesp_(CONFIG::MPI_task_size, 0);
|
||||
|
||||
size_t tsize = f.size(0), tsizep = fp.size(0);
|
||||
|
||||
MPI_Allgather(&f.local_1_start_, 1, MPI_LONG_LONG, &offsets_[0], 1,
|
||||
MPI_LONG_LONG, MPI_COMM_WORLD);
|
||||
MPI_Allgather(&fp.local_1_start_, 1, MPI_LONG_LONG, &offsetsp_[0], 1,
|
||||
MPI_LONG_LONG, MPI_COMM_WORLD);
|
||||
MPI_Allgather(&tsize, 1, MPI_LONG_LONG, &sizes_[0], 1, MPI_LONG_LONG,
|
||||
MPI_COMM_WORLD);
|
||||
MPI_Allgather(&tsizep, 1, MPI_LONG_LONG, &sizesp_[0], 1, MPI_LONG_LONG,
|
||||
MPI_COMM_WORLD);
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
double tstart = get_wtime();
|
||||
csoca::dlog << "[MPI] Started scatter for convolution" << std::endl;
|
||||
|
||||
//... collect offsets
|
||||
|
||||
assert(f.space_ == kspace_id);
|
||||
assert(fMPIbuf_->space_ == kspace_id);
|
||||
|
||||
size_t nf[3] = {f.size(0), f.size(1), f.size(2)};
|
||||
size_t nf[3] = {fMPIbuf_->size(0), fMPIbuf_->size(1), fMPIbuf_->size(2)};
|
||||
size_t nfp[3] = {fp.size(0), fp.size(1), fp.size(2)};
|
||||
size_t fny[3] = {f.n_[1] / 2, f.n_[0] / 2, f.n_[2] / 2};
|
||||
size_t fny[3] = {fMPIbuf_->n_[1] / 2, fMPIbuf_->n_[0] / 2, fMPIbuf_->n_[2] / 2};
|
||||
|
||||
//... local size must be divisible by 2, otherwise this gets too complicated
|
||||
assert(f.n_[1] % 2 == 0);
|
||||
assert(fMPIbuf_->n_[1] % 2 == 0);
|
||||
|
||||
size_t slicesz = f.size(1) * f.size(3); //*2;
|
||||
size_t slicesz = fMPIbuf_->size(1) * fMPIbuf_->size(3); //*2;
|
||||
|
||||
// comunicate
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
|
@ -252,30 +244,25 @@ private:
|
|||
? MPI_DOUBLE_COMPLEX
|
||||
: MPI_INT;
|
||||
|
||||
MPI_Status status;
|
||||
|
||||
std::vector<MPI_Request> req;
|
||||
MPI_Request temp_req;
|
||||
|
||||
// fill MPI send buffer
|
||||
fMPIbuf_.FourierTransformForward(false);
|
||||
fMPIbuf_->FourierTransformForward(false);
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < 2*fp.size(0)/3; ++i)
|
||||
for (size_t i = 0; i < fMPIbuf_->size(0); ++i)
|
||||
{
|
||||
size_t ip = (i > nhalf[0]) ? i + dn[0] : i;
|
||||
for (size_t j = 0; j < 2*fp.size(1)/3; ++j)
|
||||
for (size_t j = 0; j < fMPIbuf_->size(1); ++j)
|
||||
{
|
||||
size_t jp = (j > nhalf[1]) ? j + dn[1] : j;
|
||||
for (size_t k = 0; k < 2*fp.size(2)/3; ++k)
|
||||
for (size_t k = 0; k < fMPIbuf_->size(2); ++k)
|
||||
{
|
||||
size_t kp = (k > nhalf[2]) ? k + dn[2] : k;
|
||||
// if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
|
||||
//fp.kelem(ip, jp, kp) = f.kelem(i, j, k) * rfac;
|
||||
fMPIbuf_.kelem(ip, jp, kp) = kfunc(i, j, k) * rfac;
|
||||
fMPIbuf_->kelem(i, j, k) = kfunc(i, j, k) * rfac;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Status status;
|
||||
|
||||
std::vector<MPI_Request> req;
|
||||
MPI_Request temp_req;
|
||||
|
||||
// send data from buffer
|
||||
for (size_t i = 0; i < nf[0]; ++i)
|
||||
|
@ -285,20 +272,18 @@ private:
|
|||
if (iglobal < fny[0])
|
||||
{
|
||||
int sendto = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||
MPI_Isend(&fMPIbuf_.kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||
MPI_Isend(&fMPIbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||
(int)iglobal, MPI_COMM_WORLD, &temp_req);
|
||||
req.push_back(temp_req);
|
||||
// ofs << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ":
|
||||
// Isend #" << iglobal << " to task " << sendto << std::endl;
|
||||
std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal << " to task " << sendto << ", size = " << slicesz << std::endl;
|
||||
}
|
||||
if (iglobal > fny[0])
|
||||
{
|
||||
int sendto = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
|
||||
MPI_Isend(&fMPIbuf_.kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||
MPI_Isend(&fMPIbuf_->kelem(i * slicesz), (int)slicesz, datatype, sendto,
|
||||
(int)(iglobal + fny[0]), MPI_COMM_WORLD, &temp_req);
|
||||
req.push_back(temp_req);
|
||||
// ofs << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ":
|
||||
// Isend #" << iglobal+fny[0] << " to task " << sendto << std::endl;
|
||||
std::cout << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ": Isend #" << iglobal+fny[0] << " to task " << sendto << ", size = " << slicesz<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -314,15 +299,14 @@ private:
|
|||
else
|
||||
recvfrom = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size);
|
||||
|
||||
// ofs << "task " << CONFIG::MPI_task_rank << " : receive #" << iglobal << " from task "
|
||||
// << recvfrom << std::endl;
|
||||
std::cout << "task " << CONFIG::MPI_task_rank << " : receive #" << iglobal << " from task "
|
||||
<< recvfrom << ", size = " << slicesz << ", " << crecvbuf_ << ", " << datatype << std::endl;
|
||||
|
||||
MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
|
||||
MPI_COMM_WORLD, &status);
|
||||
// ofs << "---> ok! " << (bool)(status.Get_error()==MPI::SUCCESS) <<
|
||||
// std::endl;
|
||||
std::cout << "---> ok! " << (bool)(status.MPI_ERROR==MPI_SUCCESS) << std::endl;
|
||||
|
||||
assert(status.MPI_ERROR == MPI_SUCCESS);
|
||||
// assert(status.MPI_ERROR == MPI_SUCCESS);
|
||||
|
||||
for (size_t j = 0; j < nf[1]; ++j)
|
||||
{
|
||||
|
@ -333,9 +317,9 @@ private:
|
|||
{
|
||||
// size_t kp = (k>fny[2])? k+fny[2] : k;
|
||||
if (k < fny[2])
|
||||
fp.kelem(i, jp, k) = crecvbuf_[j * f.sizes_[3] + k];
|
||||
fp.kelem(i, jp, k) = crecvbuf_[j * fMPIbuf_->sizes_[3] + k];
|
||||
else if (k > fny[2])
|
||||
fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * f.sizes_[3] + k];
|
||||
fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * fMPIbuf_->sizes_[3] + k];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -347,9 +331,9 @@ private:
|
|||
// size_t kp = (k>fny[2])? k+fny[2] : k;
|
||||
// fp.kelem(i,jp,kp) = crecvbuf_[j*f.sizes_[3]+k];
|
||||
if (k < fny[2])
|
||||
fp.kelem(i, jp, k) = crecvbuf_[j * f.sizes_[3] + k];
|
||||
fp.kelem(i, jp, k) = crecvbuf_[j * fMPIbuf_->sizes_[3] + k];
|
||||
else if (k > fny[2])
|
||||
fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * f.sizes_[3] + k];
|
||||
fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * fMPIbuf_->sizes_[3] + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -392,17 +376,14 @@ private:
|
|||
assert( fp.space_ == kspace_id );
|
||||
f.FourierTransformForward();
|
||||
|
||||
#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
|
||||
size_t dn[3] = {
|
||||
fp.n_[0] - f.n_[0],
|
||||
fp.n_[1] - f.n_[1],
|
||||
fp.n_[2] - f.n_[2],
|
||||
};
|
||||
|
||||
const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(f.n_[0] * f.n_[1] * f.n_[2]);
|
||||
|
||||
#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
size_t nhalf[3] = {f.n_[0] / 2, f.n_[1] / 2, f.n_[2] / 2};
|
||||
const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(f.n_[0] * f.n_[1] * f.n_[2]);
|
||||
|
||||
for (size_t i = 0; i < f.size(0); ++i)
|
||||
{
|
||||
|
@ -422,28 +403,6 @@ private:
|
|||
#else /// then USE_MPI is defined //////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
size_t maxslicesz = fp.sizes_[1] * fp.sizes_[3] * 2;
|
||||
|
||||
std::vector<ccomplex_t> crecvbuf_(maxslicesz / 2,0);
|
||||
real_t* recvbuf_ = reinterpret_cast<real_t *>(&crecvbuf_[0]);
|
||||
|
||||
std::vector<ptrdiff_t>
|
||||
offsets_(CONFIG::MPI_task_size, 0),
|
||||
offsetsp_(CONFIG::MPI_task_size, 0),
|
||||
sizes_(CONFIG::MPI_task_size, 0),
|
||||
sizesp_(CONFIG::MPI_task_size, 0);
|
||||
|
||||
size_t tsize = f.size(0), tsizep = fp.size(0);
|
||||
|
||||
MPI_Allgather(&f.local_1_start_, 1, MPI_LONG_LONG, &offsets_[0], 1,
|
||||
MPI_LONG_LONG, MPI_COMM_WORLD);
|
||||
MPI_Allgather(&fp.local_1_start_, 1, MPI_LONG_LONG, &offsetsp_[0], 1,
|
||||
MPI_LONG_LONG, MPI_COMM_WORLD);
|
||||
MPI_Allgather(&tsize, 1, MPI_LONG_LONG, &sizes_[0], 1, MPI_LONG_LONG,
|
||||
MPI_COMM_WORLD);
|
||||
MPI_Allgather(&tsizep, 1, MPI_LONG_LONG, &sizesp_[0], 1, MPI_LONG_LONG,
|
||||
MPI_COMM_WORLD);
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
double tstart = get_wtime();
|
||||
|
||||
|
|
|
@ -56,6 +56,36 @@ inline int MPI_Get_size( void ){
|
|||
assert( ret==MPI_SUCCESS );
|
||||
return size;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
MPI_Datatype GetMPIDatatype( void )
|
||||
{
|
||||
if( typeid(T) == typeid(std::complex<float>) )
|
||||
return MPI_COMPLEX;
|
||||
|
||||
if( typeid(T) == typeid(std::complex<double>) )
|
||||
return MPI_DOUBLE_COMPLEX;
|
||||
|
||||
if( typeid(T) == typeid(int) )
|
||||
return MPI_INT;
|
||||
|
||||
if( typeid(T) == typeid(unsigned) )
|
||||
return MPI_UNSIGNED;
|
||||
|
||||
if( typeid(T) == typeid(float) )
|
||||
return MPI_FLOAT;
|
||||
|
||||
if( typeid(T) == typeid(double) )
|
||||
return MPI_DOUBLE;
|
||||
|
||||
if( typeid(T) == typeid(char) )
|
||||
return MPI_CHAR;
|
||||
|
||||
abort();
|
||||
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
|
@ -72,16 +102,7 @@ inline int MPI_Get_size( void ){
|
|||
#endif
|
||||
#endif
|
||||
|
||||
// get task based on offsets
|
||||
template <typename array_type>
|
||||
int get_task(ptrdiff_t index, const array_type &offsets, const array_type& sizes,
|
||||
const int ntasks )
|
||||
{
|
||||
int itask = 0;
|
||||
while (itask < ntasks - 1 && offsets[itask + 1] <= index)
|
||||
++itask;
|
||||
return itask;
|
||||
}
|
||||
|
||||
|
||||
namespace CONFIG
|
||||
{
|
||||
|
|
|
@ -419,8 +419,8 @@ public:
|
|||
}
|
||||
#if defined(USE_MPI)
|
||||
data_t glob_sum = 0.0;
|
||||
MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&globsum),
|
||||
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
|
||||
1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
|
||||
sum = glob_sum;
|
||||
#endif
|
||||
sum /= sizes_[0]*sizes_[1]*sizes_[2];
|
||||
|
|
Loading…
Reference in a new issue