mirror of
https://github.com/cosmo-sims/MUSIC.git
synced 2024-09-19 17:03:46 +02:00
removed realspace kernel
This commit is contained in:
parent
437e77f74f
commit
bb9c07cf4d
1 changed files with 1 additions and 890 deletions
|
@ -13,7 +13,6 @@
|
|||
#include "convolution_kernel.hh"
|
||||
|
||||
#if defined(FFTW3) && defined(SINGLE_PRECISION)
|
||||
//#define fftw_complex fftwf_complex
|
||||
typedef fftw_complex fftwf_complex;
|
||||
#endif
|
||||
|
||||
|
@ -338,892 +337,7 @@ public:
|
|||
void deallocate() {}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename real_t>
|
||||
class kernel_real_cached : public kernel
|
||||
{
|
||||
protected:
|
||||
std::vector<real_t> kdata_;
|
||||
void precompute_kernel(transfer_function *ptf, tf_type type, const refinement_hierarchy &refh);
|
||||
|
||||
public:
|
||||
kernel_real_cached(config_file &cf, transfer_function *ptf, refinement_hierarchy &refh, tf_type type)
|
||||
: kernel(cf, ptf, refh, type)
|
||||
{
|
||||
precompute_kernel(ptf, type, refh);
|
||||
}
|
||||
|
||||
kernel *fetch_kernel(int ilevel, bool isolated = false);
|
||||
|
||||
void *get_ptr()
|
||||
{
|
||||
return reinterpret_cast<void *>(&kdata_[0]);
|
||||
}
|
||||
|
||||
bool is_ksampled()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void at_k(size_t, const double *, double *) {}
|
||||
|
||||
~kernel_real_cached()
|
||||
{
|
||||
deallocate();
|
||||
}
|
||||
|
||||
void deallocate()
|
||||
{
|
||||
std::vector<real_t>().swap(kdata_);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename real_t>
|
||||
kernel *kernel_real_cached<real_t>::fetch_kernel(int ilevel, bool isolated)
|
||||
{
|
||||
char cachefname[128];
|
||||
sprintf(cachefname, "temp_kernel_level%03d.tmp", ilevel);
|
||||
FILE *fp = fopen(cachefname, "r");
|
||||
|
||||
std::cout << " - Fetching kernel for level " << ilevel << std::endl;
|
||||
|
||||
LOGUSER("Loading kernel for level %3d from file \'%s\'...", ilevel, cachefname);
|
||||
|
||||
if (fp == NULL)
|
||||
{
|
||||
LOGERR("Could not open kernel file \'%s\'.", cachefname);
|
||||
throw std::runtime_error("Internal error: cached convolution kernel does not exist on disk!");
|
||||
}
|
||||
|
||||
unsigned nx, ny, nz;
|
||||
size_t nread = 0;
|
||||
|
||||
nread = fread(reinterpret_cast<void *>(&nx), sizeof(unsigned), 1, fp);
|
||||
nread = fread(reinterpret_cast<void *>(&ny), sizeof(unsigned), 1, fp);
|
||||
nread = fread(reinterpret_cast<void *>(&nz), sizeof(unsigned), 1, fp);
|
||||
|
||||
kdata_.assign((size_t)nx * (size_t)ny * (size_t)nz, 0.0);
|
||||
|
||||
for (size_t ix = 0; ix < nx; ++ix)
|
||||
{
|
||||
const size_t sz = ny * nz;
|
||||
nread = fread(reinterpret_cast<void *>(&kdata_[(size_t)ix * sz]), sizeof(fftw_real), sz, fp);
|
||||
assert(nread == sz);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
//... set parameters
|
||||
|
||||
double boxlength = pcf_->getValue<double>("setup", "boxlength");
|
||||
double dx = boxlength / (1 << ilevel);
|
||||
|
||||
cparam_.nx = nx;
|
||||
cparam_.ny = ny;
|
||||
cparam_.nz = nz - 2;
|
||||
cparam_.lx = dx * cparam_.nx;
|
||||
cparam_.ly = dx * cparam_.ny;
|
||||
cparam_.lz = dx * cparam_.nz;
|
||||
cparam_.pcf = pcf_;
|
||||
|
||||
fftw_real *rkernel = reinterpret_cast<fftw_real *>(&kdata_[0]);
|
||||
|
||||
#ifdef FFTW3
|
||||
#ifdef SINGLE_PRECISION
|
||||
fftwf_complex *kkernel = reinterpret_cast<fftwf_complex *>(&rkernel[0]);
|
||||
fftwf_plan plan = fftwf_plan_dft_r2c_3d(cparam_.nx, cparam_.ny, cparam_.nz, rkernel, kkernel, FFTW_ESTIMATE);
|
||||
fftwf_execute(plan);
|
||||
fftwf_destroy_plan(plan);
|
||||
#else
|
||||
fftw_complex *kkernel = reinterpret_cast<fftw_complex *>(&rkernel[0]);
|
||||
fftw_plan plan = fftw_plan_dft_r2c_3d(cparam_.nx, cparam_.ny, cparam_.nz, rkernel, kkernel, FFTW_ESTIMATE);
|
||||
fftw_execute(plan);
|
||||
fftw_destroy_plan(plan);
|
||||
#endif
|
||||
#else
|
||||
rfftwnd_plan plan = rfftw3d_create_plan(cparam_.nx, cparam_.ny, cparam_.nz,
|
||||
FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
|
||||
|
||||
#ifndef SINGLETHREAD_FFTW
|
||||
rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), plan, rkernel, NULL);
|
||||
#else
|
||||
rfftwnd_one_real_to_complex(plan, rkernel, NULL);
|
||||
#endif
|
||||
|
||||
rfftwnd_destroy_plan(plan);
|
||||
#endif
|
||||
return this;
|
||||
}
|
||||
|
||||
template <typename real_t>
|
||||
inline real_t sqr(real_t x)
|
||||
{
|
||||
return x * x;
|
||||
}
|
||||
|
||||
template <typename real_t>
|
||||
inline real_t eval_split_recurse(const TransferFunction_real *tfr, real_t *xmid, real_t dx, real_t prevval, int nsplit)
|
||||
{
|
||||
const real_t abs_err = 1e-8, rel_err = 1e-6;
|
||||
const int nmaxsplits = 12;
|
||||
|
||||
real_t dxnew = dx / 2, dxnew2 = dx / 4;
|
||||
real_t dV = dxnew * dxnew * dxnew;
|
||||
|
||||
real_t xl[3] = {xmid[0] - dxnew2, xmid[1] - dxnew2, xmid[2] - dxnew2};
|
||||
real_t xr[3] = {xmid[0] + dxnew2, xmid[1] + dxnew2, xmid[2] + dxnew2};
|
||||
|
||||
real_t xc[8][3] =
|
||||
{
|
||||
{xl[0], xl[1], xl[2]},
|
||||
{xl[0], xl[1], xr[2]},
|
||||
{xl[0], xr[1], xl[2]},
|
||||
{xl[0], xr[1], xr[2]},
|
||||
{xr[0], xl[1], xl[2]},
|
||||
{xr[0], xl[1], xr[2]},
|
||||
{xr[0], xr[1], xl[2]},
|
||||
{xr[0], xr[1], xr[2]},
|
||||
};
|
||||
|
||||
real_t rr2, res[8], ressum = 0.;
|
||||
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
rr2 = sqr(xc[i][0]) + sqr(xc[i][1]) + sqr(xc[i][2]);
|
||||
res[i] = tfr->compute_real(rr2) * dV;
|
||||
if (res[i] != res[i])
|
||||
{
|
||||
LOGERR("NaN encountered at r=%f, dx=%f, dV=%f : TF = %f", sqrt(rr2), dx, dV, res[i]);
|
||||
abort();
|
||||
}
|
||||
ressum += res[i];
|
||||
}
|
||||
|
||||
real_t ae = fabs((prevval - ressum));
|
||||
real_t re = fabs(ae / ressum);
|
||||
|
||||
if (ae < abs_err || re < rel_err)
|
||||
return ressum;
|
||||
|
||||
if (nsplit > nmaxsplits)
|
||||
{
|
||||
//LOGWARN("reached maximum number of supdivisions in eval_split_recurse. Ending recursion... : abs. err.=%f, rel. err.=%f",ae, re);
|
||||
return ressum;
|
||||
}
|
||||
|
||||
//otherwise keep splitting
|
||||
ressum = 0;
|
||||
for (int i = 0; i < 8; ++i)
|
||||
ressum += eval_split_recurse(tfr, xc[i], dxnew, res[i], nsplit + 1);
|
||||
|
||||
return ressum;
|
||||
}
|
||||
|
||||
template <typename real_t>
|
||||
inline real_t eval_split_recurse(const TransferFunction_real *tfr, real_t *xmid, real_t dx, int nsplit = 0)
|
||||
{
|
||||
//sqr(xmid[0])+sqr(xmid[1])+sqr(xmid[2])
|
||||
|
||||
real_t rr2 = sqr(xmid[0]) + sqr(xmid[1]) + sqr(xmid[2]);
|
||||
real_t prevval = tfr->compute_real(rr2) * dx * dx * dx;
|
||||
return eval_split_recurse(tfr, xmid, dx, prevval, nsplit);
|
||||
}
|
||||
|
||||
#define OLD_KERNEL_SAMPLING
|
||||
|
||||
template <typename real_t>
|
||||
void kernel_real_cached<real_t>::precompute_kernel(transfer_function *ptf, tf_type type, const refinement_hierarchy &refh)
|
||||
{
|
||||
//... compute kernel for finest level
|
||||
int nx, ny, nz;
|
||||
real_t dx, lx, ly, lz;
|
||||
|
||||
real_t
|
||||
boxlength = pcf_->getValue<double>("setup", "boxlength"),
|
||||
boxlength2 = 0.5 * boxlength;
|
||||
|
||||
int
|
||||
levelmax = refh.levelmax(),
|
||||
levelmin = refh.levelmin();
|
||||
|
||||
LOGUSER("Precomputing transfer function kernels...");
|
||||
|
||||
nx = refh.size(refh.levelmax(), 0);
|
||||
ny = refh.size(refh.levelmax(), 1);
|
||||
nz = refh.size(refh.levelmax(), 2);
|
||||
|
||||
if (levelmax != levelmin)
|
||||
{
|
||||
nx *= 2;
|
||||
ny *= 2;
|
||||
nz *= 2;
|
||||
}
|
||||
|
||||
dx = boxlength / (1 << refh.levelmax());
|
||||
lx = dx * nx;
|
||||
ly = dx * ny;
|
||||
lz = dx * nz;
|
||||
|
||||
real_t
|
||||
kny = M_PI / dx,
|
||||
fac = lx * ly * lz / pow(2.0 * M_PI, 3) / ((double)nx * (double)ny * (double)nz),
|
||||
nspec = pcf_->getValue<double>("cosmology", "nspec"),
|
||||
pnorm = pcf_->getValue<double>("cosmology", "pnorm");
|
||||
|
||||
bool
|
||||
bperiodic = pcf_->getValueSafe<bool>("setup", "periodic_TF", true),
|
||||
deconv = pcf_->getValueSafe<bool>("setup", "deconvolve", true);
|
||||
// bool deconv_baryons = true;//pcf_->getValueSafe<bool>("setup","deconvolve_baryons",false) || do_SPH;
|
||||
bool bsmooth_baryons = false; //type==baryon && !deconv_baryons;
|
||||
//bool bbaryons = type==baryon | type==vbaryon;
|
||||
bool kspacepoisson = ((pcf_->getValueSafe<bool>("poisson", "fft_fine", true) |
|
||||
pcf_->getValueSafe<bool>("poisson", "kspace", false))); // & !(type==baryon&!do_SPH);//&!baryons ;
|
||||
|
||||
std::cout << " - Computing transfer function kernel...\n";
|
||||
|
||||
TransferFunction_real *tfr = new TransferFunction_real(boxlength, 1 << levelmax, type, ptf, nspec, pnorm,
|
||||
0.25 * dx, 2.0 * boxlength, kny, (int)pow(2, levelmax + 2));
|
||||
|
||||
fftw_real *rkernel = new fftw_real[(size_t)nx * (size_t)ny * ((size_t)nz + 2)], *rkernel_coarse;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < nx; ++i)
|
||||
for (int j = 0; j < ny; ++j)
|
||||
for (int k = 0; k < nz; ++k)
|
||||
{
|
||||
size_t q = ((size_t)(i)*ny + (size_t)(j)) * (size_t)(nz + 2) + (size_t)(k);
|
||||
rkernel[q] = 0.0;
|
||||
}
|
||||
|
||||
LOGUSER("Computing fine kernel (level %d)...", levelmax);
|
||||
|
||||
#ifdef OLD_KERNEL_SAMPLING
|
||||
int ref_fac = (deconv && kspacepoisson) ? 2 : 0;
|
||||
const int ql = -ref_fac / 2 + 1, qr = ql + ref_fac;
|
||||
const double rf8 = pow(ref_fac, 3);
|
||||
const double dx05 = 0.5 * dx, dx025 = 0.25 * dx;
|
||||
#endif
|
||||
|
||||
if (bperiodic)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i <= nx / 2; ++i)
|
||||
for (int j = 0; j <= ny / 2; ++j)
|
||||
for (int k = 0; k <= nz / 2; ++k)
|
||||
{
|
||||
int iix(i), iiy(j), iiz(k);
|
||||
real_t rr[3];
|
||||
|
||||
if (iix > (int)nx / 2)
|
||||
iix -= nx;
|
||||
if (iiy > (int)ny / 2)
|
||||
iiy -= ny;
|
||||
if (iiz > (int)nz / 2)
|
||||
iiz -= nz;
|
||||
|
||||
//... speed up 8x by copying data to other octants
|
||||
size_t idx[8];
|
||||
|
||||
idx[0] = ((size_t)(i)*ny + (size_t)(j)) * 2 * (nz / 2 + 1) + (size_t)(k);
|
||||
idx[1] = ((size_t)(nx - i) * ny + (size_t)(j)) * 2 * (nz / 2 + 1) + (size_t)(k);
|
||||
idx[2] = ((size_t)(i)*ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(k);
|
||||
idx[3] = ((size_t)(nx - i) * ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(k);
|
||||
idx[4] = ((size_t)(i)*ny + (size_t)(j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
|
||||
idx[5] = ((size_t)(nx - i) * ny + (size_t)(j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
|
||||
idx[6] = ((size_t)(i)*ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
|
||||
idx[7] = ((size_t)(nx - i) * ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
|
||||
|
||||
if (i == 0 || i == nx / 2)
|
||||
{
|
||||
idx[1] = idx[3] = idx[5] = idx[7] = (size_t)-1;
|
||||
}
|
||||
if (j == 0 || j == ny / 2)
|
||||
{
|
||||
idx[2] = idx[3] = idx[6] = idx[7] = (size_t)-1;
|
||||
}
|
||||
if (k == 0 || k == nz / 2)
|
||||
{
|
||||
idx[4] = idx[5] = idx[6] = idx[7] = (size_t)-1;
|
||||
}
|
||||
|
||||
double val = 0.0;
|
||||
|
||||
for (int ii = -1; ii <= 1; ++ii)
|
||||
for (int jj = -1; jj <= 1; ++jj)
|
||||
for (int kk = -1; kk <= 1; ++kk)
|
||||
{
|
||||
rr[0] = ((double)iix) * dx + ii * boxlength;
|
||||
rr[1] = ((double)iiy) * dx + jj * boxlength;
|
||||
rr[2] = ((double)iiz) * dx + kk * boxlength;
|
||||
|
||||
if (rr[0] > -boxlength && rr[0] <= boxlength && rr[1] > -boxlength && rr[1] <= boxlength && rr[2] > -boxlength && rr[2] <= boxlength)
|
||||
{
|
||||
#ifdef OLD_KERNEL_SAMPLING
|
||||
if (ref_fac > 0)
|
||||
{
|
||||
double rrr[3];
|
||||
register double rrr2[3];
|
||||
for (int iii = ql; iii < qr; ++iii)
|
||||
{
|
||||
rrr[0] = rr[0] + (double)iii * dx05 - dx025;
|
||||
rrr2[0] = rrr[0] * rrr[0];
|
||||
for (int jjj = ql; jjj < qr; ++jjj)
|
||||
{
|
||||
rrr[1] = rr[1] + (double)jjj * dx05 - dx025;
|
||||
rrr2[1] = rrr[1] * rrr[1];
|
||||
for (int kkk = ql; kkk < qr; ++kkk)
|
||||
{
|
||||
rrr[2] = rr[2] + (double)kkk * dx05 - dx025;
|
||||
rrr2[2] = rrr[2] * rrr[2];
|
||||
val += tfr->compute_real(rrr2[0] + rrr2[1] + rrr2[2]) / rf8;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
val += tfr->compute_real(rr[0] * rr[0] + rr[1] * rr[1] + rr[2] * rr[2]);
|
||||
}
|
||||
|
||||
#else // !OLD_KERNEL_SAMPLING
|
||||
val += eval_split_recurse(tfr, rr, dx) / (dx * dx * dx);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
val *= fac;
|
||||
|
||||
for (int q = 0; q < 8; ++q)
|
||||
if (idx[q] != (size_t)-1)
|
||||
rkernel[idx[q]] = val;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < nx; ++i)
|
||||
for (int j = 0; j < ny; ++j)
|
||||
for (int k = 0; k < nz; ++k)
|
||||
{
|
||||
int iix(i), iiy(j), iiz(k);
|
||||
real_t rr[3];
|
||||
|
||||
if (iix > (int)nx / 2)
|
||||
iix -= nx;
|
||||
if (iiy > (int)ny / 2)
|
||||
iiy -= ny;
|
||||
if (iiz > (int)nz / 2)
|
||||
iiz -= nz;
|
||||
|
||||
//size_t idx = ((size_t)i*ny + (size_t)j) * 2*(nz/2+1) + (size_t)k;
|
||||
|
||||
rr[0] = ((double)iix) * dx;
|
||||
rr[1] = ((double)iiy) * dx;
|
||||
rr[2] = ((double)iiz) * dx;
|
||||
|
||||
//rkernel[idx] = 0.0;
|
||||
|
||||
//rr2 = rr[0]*rr[0]+rr[1]*rr[1]+rr[2]*rr[2];
|
||||
|
||||
//... speed up 8x by copying data to other octants
|
||||
size_t idx[8];
|
||||
|
||||
idx[0] = ((size_t)(i)*ny + (size_t)(j)) * 2 * (nz / 2 + 1) + (size_t)(k);
|
||||
idx[1] = ((size_t)(nx - i) * ny + (size_t)(j)) * 2 * (nz / 2 + 1) + (size_t)(k);
|
||||
idx[2] = ((size_t)(i)*ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(k);
|
||||
idx[3] = ((size_t)(nx - i) * ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(k);
|
||||
idx[4] = ((size_t)(i)*ny + (size_t)(j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
|
||||
idx[5] = ((size_t)(nx - i) * ny + (size_t)(j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
|
||||
idx[6] = ((size_t)(i)*ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
|
||||
idx[7] = ((size_t)(nx - i) * ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
|
||||
|
||||
if (i == 0 || i == nx / 2)
|
||||
{
|
||||
idx[1] = idx[3] = idx[5] = idx[7] = (size_t)-1;
|
||||
}
|
||||
if (j == 0 || j == ny / 2)
|
||||
{
|
||||
idx[2] = idx[3] = idx[6] = idx[7] = (size_t)-1;
|
||||
}
|
||||
if (k == 0 || k == nz / 2)
|
||||
{
|
||||
idx[4] = idx[5] = idx[6] = idx[7] = (size_t)-1;
|
||||
}
|
||||
|
||||
double val = 0.0; //(fftw_real)tfr->compute_real(rr2)*fac;
|
||||
|
||||
#ifdef OLD_KERNEL_SAMPLING
|
||||
|
||||
if (ref_fac > 0)
|
||||
{
|
||||
double rrr[3];
|
||||
register double rrr2[3];
|
||||
for (int iii = ql; iii < qr; ++iii)
|
||||
{
|
||||
rrr[0] = rr[0] + (double)iii * dx05 - dx025;
|
||||
rrr2[0] = rrr[0] * rrr[0];
|
||||
for (int jjj = ql; jjj < qr; ++jjj)
|
||||
{
|
||||
rrr[1] = rr[1] + (double)jjj * dx05 - dx025;
|
||||
rrr2[1] = rrr[1] * rrr[1];
|
||||
for (int kkk = ql; kkk < qr; ++kkk)
|
||||
{
|
||||
rrr[2] = rr[2] + (double)kkk * dx05 - dx025;
|
||||
rrr2[2] = rrr[2] * rrr[2];
|
||||
val += tfr->compute_real(rrr2[0] + rrr2[1] + rrr2[2]) / rf8;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
val = tfr->compute_real(rr[0] * rr[0] + rr[1] * rr[1] + rr[2] * rr[2]);
|
||||
}
|
||||
|
||||
#else
|
||||
if (i == 0 && j == 0 && k == 0)
|
||||
continue;
|
||||
|
||||
// use new exact volume integration scheme
|
||||
val = eval_split_recurse(tfr, rr, dx) / (dx * dx * dx);
|
||||
|
||||
#endif
|
||||
|
||||
//if( rr2 <= boxlength2*boxlength2 )
|
||||
//rkernel[idx] += (fftw_real)tfr->compute_real(rr2)*fac;
|
||||
val *= fac;
|
||||
|
||||
for (int q = 0; q < 8; ++q)
|
||||
if (idx[q] != (size_t)-1)
|
||||
rkernel[idx[q]] = val;
|
||||
}
|
||||
}
|
||||
{
|
||||
#ifdef OLD_KERNEL_SAMPLING
|
||||
rkernel[0] = tfr->compute_real(0.0) * fac;
|
||||
#else
|
||||
real_t xmid[3] = {0.0, 0.0, 0.0};
|
||||
rkernel[0] = fac * eval_split_recurse(tfr, xmid, dx) / (dx * dx * dx);
|
||||
#endif
|
||||
}
|
||||
/*************************************************************************************/
|
||||
/*************************************************************************************/
|
||||
/******* perform deconvolution *******************************************************/
|
||||
|
||||
//bool baryons = type==baryon||type==vbaryon;
|
||||
if (deconv)
|
||||
{
|
||||
|
||||
LOGUSER("Deconvolving fine kernel...");
|
||||
std::cout << " - Deconvolving density kernel...\n";
|
||||
|
||||
double fftnorm = 1.0 / ((size_t)nx * (size_t)ny * (size_t)nz);
|
||||
double k0 = rkernel[0];
|
||||
|
||||
fftw_complex *kkernel = reinterpret_cast<fftw_complex *>(&rkernel[0]);
|
||||
|
||||
//... subtract white noise component before deconvolution
|
||||
if (!bsmooth_baryons)
|
||||
rkernel[0] = 0.0;
|
||||
|
||||
#ifdef FFTW3
|
||||
#ifdef SINGLE_PRECISION
|
||||
fftwf_plan
|
||||
plan = fftwf_plan_dft_r2c_3d(nx, ny, nz, rkernel, kkernel, FFTW_ESTIMATE),
|
||||
iplan = fftwf_plan_dft_c2r_3d(nx, ny, nz, kkernel, rkernel, FFTW_ESTIMATE);
|
||||
|
||||
fftwf_execute(plan);
|
||||
#else
|
||||
fftw_plan
|
||||
plan = fftw_plan_dft_r2c_3d(nx, ny, nz, rkernel, kkernel, FFTW_ESTIMATE),
|
||||
iplan = fftw_plan_dft_c2r_3d(nx, ny, nz, kkernel, rkernel, FFTW_ESTIMATE);
|
||||
|
||||
fftw_execute(plan);
|
||||
#endif
|
||||
#else
|
||||
rfftwnd_plan plan = rfftw3d_create_plan(nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE),
|
||||
iplan = rfftw3d_create_plan(nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
|
||||
|
||||
#ifndef SINGLETHREAD_FFTW
|
||||
rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), plan, rkernel, NULL);
|
||||
#else
|
||||
rfftwnd_one_real_to_complex(plan, rkernel, NULL);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (deconv)
|
||||
{
|
||||
|
||||
double ksum = 0.0;
|
||||
size_t kcount = 0;
|
||||
double kmax = 0.5 * M_PI / std::max(nx, std::max(ny, nz));
|
||||
|
||||
#pragma omp parallel for reduction(+ \
|
||||
: ksum, kcount)
|
||||
for (int i = 0; i < nx; ++i)
|
||||
for (int j = 0; j < ny; ++j)
|
||||
for (int k = 0; k < nz / 2 + 1; ++k)
|
||||
{
|
||||
double kx, ky, kz;
|
||||
|
||||
kx = (double)i;
|
||||
ky = (double)j;
|
||||
kz = (double)k;
|
||||
|
||||
if (kx > nx / 2)
|
||||
kx -= nx;
|
||||
if (ky > ny / 2)
|
||||
ky -= ny;
|
||||
|
||||
double kkmax = kmax;
|
||||
size_t q = ((size_t)i * ny + (size_t)j) * (size_t)(nz / 2 + 1) + (size_t)k;
|
||||
|
||||
if (!bsmooth_baryons)
|
||||
{
|
||||
if (kspacepoisson)
|
||||
{
|
||||
//... Use child average response function to emulate sub-sampling
|
||||
double ipix = cos(kx * kkmax) * cos(ky * kkmax) * cos(kz * kkmax);
|
||||
|
||||
RE(kkernel[q]) /= ipix;
|
||||
IM(kkernel[q]) /= ipix;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
//... Use piecewise constant response function (NGP-kernel)
|
||||
//... for finite difference methods
|
||||
kkmax = kmax;
|
||||
double ipix = 1.0;
|
||||
if (i > 0)
|
||||
ipix /= sin(kx * 2.0 * kkmax) / (kx * 2.0 * kkmax);
|
||||
if (j > 0)
|
||||
ipix /= sin(ky * 2.0 * kkmax) / (ky * 2.0 * kkmax);
|
||||
if (k > 0)
|
||||
ipix /= sin(kz * 2.0 * kkmax) / (kz * 2.0 * kkmax);
|
||||
|
||||
RE(kkernel[q]) *= ipix;
|
||||
IM(kkernel[q]) *= ipix;
|
||||
}
|
||||
}
|
||||
#if 1
|
||||
else
|
||||
{
|
||||
//... if smooth==true, convolve with
|
||||
//... NGP kernel to get CIC smoothness
|
||||
|
||||
//kkmax *= 2.0;
|
||||
|
||||
double ipix = 1.0;
|
||||
if (i > 0)
|
||||
ipix /= sin(kx * 2.0 * kkmax) / (kx * 2.0 * kkmax);
|
||||
if (j > 0)
|
||||
ipix /= sin(ky * 2.0 * kkmax) / (ky * 2.0 * kkmax);
|
||||
if (k > 0)
|
||||
ipix /= sin(kz * 2.0 * kkmax) / (kz * 2.0 * kkmax);
|
||||
|
||||
RE(kkernel[q]) /= ipix;
|
||||
IM(kkernel[q]) /= ipix;
|
||||
}
|
||||
#endif
|
||||
|
||||
//... store k-space average
|
||||
if (k == 0 || k == nz / 2)
|
||||
{
|
||||
ksum += RE(kkernel[q]);
|
||||
kcount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
ksum += 2.0 * (RE(kkernel[q]));
|
||||
kcount += 2;
|
||||
}
|
||||
}
|
||||
|
||||
double dk;
|
||||
|
||||
//... re-add white noise component for finest grid
|
||||
dk = k0 - ksum / kcount;
|
||||
|
||||
//... set white noise component to zero if smoothing is enabled
|
||||
//if( false )//cparam_.smooth )
|
||||
if (bsmooth_baryons)
|
||||
dk = 0.0;
|
||||
|
||||
//... enforce the r=0 component by adjusting the k-space mean
|
||||
#pragma omp parallel for reduction(+ \
|
||||
: ksum, kcount)
|
||||
for (int i = 0; i < nx; ++i)
|
||||
for (int j = 0; j < ny; ++j)
|
||||
for (int k = 0; k < (nz / 2 + 1); ++k)
|
||||
{
|
||||
size_t q = ((size_t)i * ny + (size_t)j) * (nz / 2 + 1) + (size_t)k;
|
||||
|
||||
RE(kkernel[q]) += dk;
|
||||
|
||||
RE(kkernel[q]) *= fftnorm;
|
||||
IM(kkernel[q]) *= fftnorm;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef FFTW3
|
||||
#ifdef SINGLE_PRECISION
|
||||
fftwf_execute(iplan);
|
||||
fftwf_destroy_plan(plan);
|
||||
fftwf_destroy_plan(iplan);
|
||||
#else
|
||||
fftw_execute(iplan);
|
||||
fftw_destroy_plan(plan);
|
||||
fftw_destroy_plan(iplan);
|
||||
#endif
|
||||
#else
|
||||
#ifndef SINGLETHREAD_FFTW
|
||||
rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), iplan, kkernel, NULL);
|
||||
#else
|
||||
rfftwnd_one_complex_to_real(iplan, kkernel, NULL);
|
||||
#endif
|
||||
rfftwnd_destroy_plan(plan);
|
||||
rfftwnd_destroy_plan(iplan);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*************************************************************************************/
|
||||
/*************************************************************************************/
|
||||
/*************************************************************************************/
|
||||
|
||||
char cachefname[128];
|
||||
sprintf(cachefname, "temp_kernel_level%03d.tmp", levelmax);
|
||||
LOGUSER("Storing kernel in temp file \'%s\'.", cachefname);
|
||||
|
||||
FILE *fp = fopen(cachefname, "w+");
|
||||
unsigned q = nx;
|
||||
fwrite(reinterpret_cast<void *>(&q), sizeof(unsigned), 1, fp);
|
||||
q = ny;
|
||||
fwrite(reinterpret_cast<void *>(&q), sizeof(unsigned), 1, fp);
|
||||
q = 2 * (nz / 2 + 1);
|
||||
fwrite(reinterpret_cast<void *>(&q), sizeof(unsigned), 1, fp);
|
||||
|
||||
for (int ix = 0; ix < nx; ++ix)
|
||||
{
|
||||
size_t sz = ny * 2 * (nz / 2 + 1);
|
||||
//fwrite( reinterpret_cast<void*>(&rkernel[0]), sizeof(fftw_real), nx*ny*2*(nz/2+1), fp );
|
||||
fwrite(reinterpret_cast<void *>(&rkernel[(size_t)ix * sz]), sizeof(fftw_real), sz, fp);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
//... average and fill for other levels
|
||||
for (int ilevel = levelmax - 1; ilevel >= levelmin; ilevel--)
|
||||
{
|
||||
LOGUSER("Computing coarse kernel (level %d)...", ilevel);
|
||||
|
||||
int nxc, nyc, nzc;
|
||||
real_t dxc, lxc, lyc, lzc;
|
||||
|
||||
nxc = refh.size(ilevel, 0);
|
||||
nyc = refh.size(ilevel, 1);
|
||||
nzc = refh.size(ilevel, 2);
|
||||
|
||||
if (ilevel != levelmin)
|
||||
{
|
||||
nxc *= 2;
|
||||
nyc *= 2;
|
||||
nzc *= 2;
|
||||
}
|
||||
|
||||
dxc = boxlength / (1 << ilevel);
|
||||
lxc = dxc * nxc;
|
||||
lyc = dxc * nyc;
|
||||
lzc = dxc * nzc;
|
||||
|
||||
rkernel_coarse = new fftw_real[(size_t)nxc * (size_t)nyc * 2 * ((size_t)nzc / 2 + 1)];
|
||||
fac = lxc * lyc * lzc / pow(2.0 * M_PI, 3) / ((double)nxc * (double)nyc * (double)nzc);
|
||||
|
||||
if (bperiodic)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i <= nxc / 2; ++i)
|
||||
for (int j = 0; j <= nyc / 2; ++j)
|
||||
for (int k = 0; k <= nzc / 2; ++k)
|
||||
{
|
||||
int iix(i), iiy(j), iiz(k);
|
||||
real_t rr[3], rr2;
|
||||
|
||||
if (iix > (int)nxc / 2)
|
||||
iix -= nxc;
|
||||
if (iiy > (int)nyc / 2)
|
||||
iiy -= nyc;
|
||||
if (iiz > (int)nzc / 2)
|
||||
iiz -= nzc;
|
||||
|
||||
//... speed up 8x by copying data to other octants
|
||||
size_t idx[8];
|
||||
|
||||
idx[0] = ((size_t)(i)*nyc + (size_t)(j)) * 2 * (nzc / 2 + 1) + (size_t)(k);
|
||||
idx[1] = ((size_t)(nxc - i) * nyc + (size_t)(j)) * 2 * (nzc / 2 + 1) + (size_t)(k);
|
||||
idx[2] = ((size_t)(i)*nyc + (size_t)(nyc - j)) * 2 * (nzc / 2 + 1) + (size_t)(k);
|
||||
idx[3] = ((size_t)(nxc - i) * nyc + (size_t)(nyc - j)) * 2 * (nzc / 2 + 1) + (size_t)(k);
|
||||
idx[4] = ((size_t)(i)*nyc + (size_t)(j)) * 2 * (nzc / 2 + 1) + (size_t)(nzc - k);
|
||||
idx[5] = ((size_t)(nxc - i) * nyc + (size_t)(j)) * 2 * (nzc / 2 + 1) + (size_t)(nzc - k);
|
||||
idx[6] = ((size_t)(i)*nyc + (size_t)(nyc - j)) * 2 * (nzc / 2 + 1) + (size_t)(nzc - k);
|
||||
idx[7] = ((size_t)(nxc - i) * nyc + (size_t)(nyc - j)) * 2 * (nzc / 2 + 1) + (size_t)(nzc - k);
|
||||
|
||||
if (i == 0 || i == nxc / 2)
|
||||
{
|
||||
idx[1] = idx[3] = idx[5] = idx[7] = (size_t)-1;
|
||||
}
|
||||
if (j == 0 || j == nyc / 2)
|
||||
{
|
||||
idx[2] = idx[3] = idx[6] = idx[7] = (size_t)-1;
|
||||
}
|
||||
if (k == 0 || k == nzc / 2)
|
||||
{
|
||||
idx[4] = idx[5] = idx[6] = idx[7] = (size_t)-1;
|
||||
}
|
||||
|
||||
double val = 0.0;
|
||||
|
||||
for (int ii = -1; ii <= 1; ++ii)
|
||||
for (int jj = -1; jj <= 1; ++jj)
|
||||
for (int kk = -1; kk <= 1; ++kk)
|
||||
{
|
||||
rr[0] = ((double)iix) * dxc + ii * boxlength;
|
||||
rr[1] = ((double)iiy) * dxc + jj * boxlength;
|
||||
rr[2] = ((double)iiz) * dxc + kk * boxlength;
|
||||
|
||||
if (rr[0] > -boxlength && rr[0] < boxlength && rr[1] > -boxlength && rr[1] < boxlength && rr[2] > -boxlength && rr[2] < boxlength)
|
||||
{
|
||||
#ifdef OLD_KERNEL_SAMPLING
|
||||
rr2 = rr[0] * rr[0] + rr[1] * rr[1] + rr[2] * rr[2];
|
||||
val += tfr->compute_real(rr2);
|
||||
#else // ! OLD_KERNEL_SAMPLING
|
||||
val += eval_split_recurse(tfr, rr, dxc) / (dxc * dxc * dxc);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
val *= fac;
|
||||
|
||||
for (int qq = 0; qq < 8; ++qq)
|
||||
if (idx[qq] != (size_t)-1)
|
||||
rkernel_coarse[idx[qq]] = val;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < nxc; ++i)
|
||||
for (int j = 0; j < nyc; ++j)
|
||||
for (int k = 0; k < nzc; ++k)
|
||||
{
|
||||
int iix(i), iiy(j), iiz(k);
|
||||
real_t rr[3];
|
||||
|
||||
if (iix > (int)nxc / 2)
|
||||
iix -= nxc;
|
||||
if (iiy > (int)nyc / 2)
|
||||
iiy -= nyc;
|
||||
if (iiz > (int)nzc / 2)
|
||||
iiz -= nzc;
|
||||
|
||||
size_t idx = ((size_t)i * nyc + (size_t)j) * 2 * (nzc / 2 + 1) + (size_t)k;
|
||||
|
||||
rr[0] = ((double)iix) * dxc;
|
||||
rr[1] = ((double)iiy) * dxc;
|
||||
rr[2] = ((double)iiz) * dxc;
|
||||
|
||||
#ifdef OLD_KERNEL_SAMPLING
|
||||
rkernel_coarse[idx] = 0.0;
|
||||
|
||||
real_t rr2 = rr[0] * rr[0] + rr[1] * rr[1] + rr[2] * rr[2];
|
||||
if (fabs(rr[0]) <= boxlength2 || fabs(rr[1]) <= boxlength2 || fabs(rr[2]) <= boxlength2)
|
||||
rkernel_coarse[idx] += (fftw_real)tfr->compute_real(rr2) * fac;
|
||||
#else
|
||||
|
||||
rkernel_coarse[idx] = 0.0;
|
||||
|
||||
//if( i==0 && j==0 && k==0 ) continue;
|
||||
real_t val = eval_split_recurse(tfr, rr, dxc) / (dxc * dxc * dxc);
|
||||
|
||||
if (fabs(rr[0]) <= boxlength2 || fabs(rr[1]) <= boxlength2 || fabs(rr[2]) <= boxlength2)
|
||||
rkernel_coarse[idx] += val * fac;
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef OLD_KERNEL_SAMPLING
|
||||
LOGUSER("Averaging fine kernel to coarse kernel...");
|
||||
|
||||
//... copy averaged and convolved fine kernel to coarse kernel
|
||||
#pragma omp parallel for
|
||||
for (int ix = 0; ix < nx; ix += 2)
|
||||
for (int iy = 0; iy < ny; iy += 2)
|
||||
for (int iz = 0; iz < nz; iz += 2)
|
||||
{
|
||||
int iix(ix / 2), iiy(iy / 2), iiz(iz / 2);
|
||||
if (ix > nx / 2)
|
||||
iix += nxc - nx / 2;
|
||||
if (iy > ny / 2)
|
||||
iiy += nyc - ny / 2;
|
||||
if (iz > nz / 2)
|
||||
iiz += nzc - nz / 2;
|
||||
|
||||
if (ix == nx / 2 || iy == ny / 2 || iz == nz / 2)
|
||||
continue;
|
||||
|
||||
for (int i = 0; i <= 1; ++i)
|
||||
for (int j = 0; j <= 1; ++j)
|
||||
for (int k = 0; k <= 1; ++k)
|
||||
if (i == 0 && k == 0 && j == 0)
|
||||
rkernel_coarse[ACC_RC(iix, iiy, iiz)] =
|
||||
0.125 * (rkernel[ACC_RF(ix - i, iy - j, iz - k)] + rkernel[ACC_RF(ix - i + 1, iy - j, iz - k)] + rkernel[ACC_RF(ix - i, iy - j + 1, iz - k)] + rkernel[ACC_RF(ix - i, iy - j, iz - k + 1)] + rkernel[ACC_RF(ix - i + 1, iy - j + 1, iz - k)] + rkernel[ACC_RF(ix - i + 1, iy - j, iz - k + 1)] + rkernel[ACC_RF(ix - i, iy - j + 1, iz - k + 1)] + rkernel[ACC_RF(ix - i + 1, iy - j + 1, iz - k + 1)]);
|
||||
|
||||
else
|
||||
{
|
||||
|
||||
rkernel_coarse[ACC_RC(iix, iiy, iiz)] +=
|
||||
0.125 * (rkernel[ACC_RF(ix - i, iy - j, iz - k)] + rkernel[ACC_RF(ix - i + 1, iy - j, iz - k)] + rkernel[ACC_RF(ix - i, iy - j + 1, iz - k)] + rkernel[ACC_RF(ix - i, iy - j, iz - k + 1)] + rkernel[ACC_RF(ix - i + 1, iy - j + 1, iz - k)] + rkernel[ACC_RF(ix - i + 1, iy - j, iz - k + 1)] + rkernel[ACC_RF(ix - i, iy - j + 1, iz - k + 1)] + rkernel[ACC_RF(ix - i + 1, iy - j + 1, iz - k + 1)]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // #OLD_KERNEL_SAMPLING
|
||||
sprintf(cachefname, "temp_kernel_level%03d.tmp", ilevel);
|
||||
LOGUSER("Storing kernel in temp file \'%s\'.", cachefname);
|
||||
fp = fopen(cachefname, "w+");
|
||||
q = nxc;
|
||||
fwrite(reinterpret_cast<void *>(&q), sizeof(unsigned), 1, fp);
|
||||
q = nyc;
|
||||
fwrite(reinterpret_cast<void *>(&q), sizeof(unsigned), 1, fp);
|
||||
q = 2 * (nzc / 2 + 1);
|
||||
fwrite(reinterpret_cast<void *>(&q), sizeof(unsigned), 1, fp);
|
||||
|
||||
for (int ix = 0; ix < nxc; ++ix)
|
||||
{
|
||||
size_t sz = nyc * 2 * (nzc / 2 + 1);
|
||||
//fwrite( reinterpret_cast<void*>(&rkernel_coarse[0]), sizeof(fftw_real), nxc*nyc*2*(nzc/2+1), fp );
|
||||
fwrite(reinterpret_cast<void *>(&rkernel_coarse[ix * sz]), sizeof(fftw_real), sz, fp);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
delete[] rkernel;
|
||||
|
||||
//... prepare for next iteration
|
||||
nx = nxc;
|
||||
ny = nyc;
|
||||
nz = nzc;
|
||||
lx = lxc;
|
||||
ly = lyc;
|
||||
lz = lzc;
|
||||
dx = dxc;
|
||||
rkernel = rkernel_coarse;
|
||||
}
|
||||
|
||||
//... clean up
|
||||
delete[] rkernel;
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
} // namespace convolution
|
||||
|
||||
|
@ -1232,9 +346,6 @@ void kernel_real_cached<real_t>::precompute_kernel(transfer_function *ptf, tf_ty
|
|||
|
||||
namespace
|
||||
{
|
||||
convolution::kernel_creator_concrete<convolution::kernel_real_cached<double>> creator_d("tf_kernel_real_double");
|
||||
convolution::kernel_creator_concrete<convolution::kernel_real_cached<float>> creator_f("tf_kernel_real_float");
|
||||
|
||||
convolution::kernel_creator_concrete<convolution::kernel_k<double>> creator_kd("tf_kernel_k_double");
|
||||
convolution::kernel_creator_concrete<convolution::kernel_k<float>> creator_kf("tf_kernel_k_float");
|
||||
} // namespace
|
||||
|
|
Loading…
Reference in a new issue