1
0
Fork 0
mirror of https://github.com/cosmo-sims/MUSIC.git synced 2024-09-19 17:03:46 +02:00

added Angulo&Pontzen fixing and pairing

This commit is contained in:
Oliver Hahn 2019-07-16 12:04:23 +02:00
parent 2678e48a70
commit 6ad3934f23
3 changed files with 2018 additions and 1997 deletions

View file

@ -4,7 +4,7 @@
a code to generate multi-scale initial conditions
for cosmological simulations
Copyright (C) 2010 Oliver Hahn
Copyright (C) 2010-19 Oliver Hahn
*/
@ -19,7 +19,8 @@ typedef fftw_complex fftwf_complex;
double T0 = 1.0;
namespace convolution{
namespace convolution
{
std::map<std::string, kernel_creator *> &
get_kernel_map()
@ -29,12 +30,13 @@ namespace convolution{
}
template <typename real_t>
void perform( kernel * pk, void *pd, bool shift )
void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip)
{
//return;
parameters cparam_ = pk->cparam_;
double fftnorm = pow(2.0*M_PI,1.5)/sqrt(cparam_.lx*cparam_.ly*cparam_.lz)/sqrt((double)cparam_.nx*(double)cparam_.ny*(double)cparam_.nz);
double fftnormp = 1.0/sqrt((double)cparam_.nx * (double)cparam_.ny * (double)cparam_.nz);
double fftnorm = pow(2.0 * M_PI, 1.5) / sqrt(cparam_.lx * cparam_.ly * cparam_.lz) * fftnormp;
fftw_complex *cdata, *ckernel;
fftw_real *data;
@ -43,12 +45,9 @@ namespace convolution{
cdata = reinterpret_cast<fftw_complex *>(data);
ckernel = reinterpret_cast<fftw_complex *>(pk->get_ptr());
std::cout << " - Performing density convolution... ("
<< cparam_.nx << ", " << cparam_.ny << ", " << cparam_.nz << ")\n";
LOGUSER("Performing kernel convolution on (%5d,%5d,%5d) grid", cparam_.nx, cparam_.ny, cparam_.nz);
LOGUSER("Performing forward FFT...");
#ifdef FFTW3
@ -100,7 +99,8 @@ namespace convolution{
std::complex<double> dcmode(RE(cdata[0]), IM(cdata[0]));
if( !pk->is_ksampled() ) {
if (!pk->is_ksampled())
{
#pragma omp parallel for
for (int i = 0; i < cparam_.nx; ++i)
@ -115,8 +115,10 @@ namespace convolution{
ky = (double)j;
kz = (double)k;
if( kx > cparam_.nx/2 ) kx -= cparam_.nx;
if( ky > cparam_.ny/2 ) ky -= cparam_.ny;
if (kx > cparam_.nx / 2)
kx -= cparam_.nx;
if (ky > cparam_.ny / 2)
ky -= cparam_.ny;
double arg = (kx + ky + kz) * dstag;
std::complex<double> carg(cos(arg), sin(arg));
@ -125,14 +127,21 @@ namespace convolution{
ccdata(RE(cdata[ii]), IM(cdata[ii])),
cckernel(RE(ckernel[ii]), IM(ckernel[ii]));
if( fix ){
ccdata = ccdata / std::abs(ccdata);
}
if( flip ){
ccdata = -ccdata;
}
ccdata = ccdata * cckernel * fftnorm * carg;
RE(cdata[ii]) = ccdata.real();
IM(cdata[ii]) = ccdata.imag();
}
}else{
}
else
{
#pragma omp parallel
{
@ -145,7 +154,8 @@ namespace convolution{
#pragma omp for
for (int i = 0; i < cparam_.nx; ++i)
for( int j=0; j<cparam_.ny; ++j ) {
for (int j = 0; j < cparam_.ny; ++j)
{
for (int k = 0; k < cparam_.nz / 2 + 1; ++k)
{
@ -155,8 +165,10 @@ namespace convolution{
ky = (double)j;
kz = (double)k;
if( kx > cparam_.nx/2 ) kx -= cparam_.nx;
if( ky > cparam_.ny/2 ) ky -= cparam_.ny;
if (kx > cparam_.nx / 2)
kx -= cparam_.nx;
if (ky > cparam_.ny / 2)
ky -= cparam_.ny;
kvec[k] = sqrt(kx * kx + ky * ky + kz * kz);
argvec[k] = (kx + ky + kz) * dstag;
@ -171,6 +183,13 @@ namespace convolution{
std::complex<double> ccdata(RE(cdata[ii]), IM(cdata[ii]));
if( fix ){
ccdata = ccdata / std::abs(ccdata) / fftnormp;
}
if( flip ){
ccdata = -ccdata;
}
ccdata = ccdata * Tkvec[k] * fftnorm * carg;
RE(cdata[ii]) = ccdata.real();
@ -188,7 +207,6 @@ namespace convolution{
IM(cdata[0]) = 0.0;
}
LOGUSER("Performing backward FFT...");
#ifdef FFTW3
@ -213,9 +231,9 @@ namespace convolution{
rfftwnd_destroy_plan(iplan);
#endif
// set the DC mode here to avoid a possible truncation error in single precision
if( pk->is_ksampled() ) {
if (pk->is_ksampled())
{
size_t nelem = (size_t)cparam_.nx * (size_t)cparam_.ny * (size_t)cparam_.nz;
real_t mean = dcmode.real() * fftnorm / (real_t)nelem;
@ -225,9 +243,8 @@ namespace convolution{
}
}
template void perform<double>( kernel* pk, void *pd, bool shift );
template void perform<float>( kernel* pk, void *pd, bool shift );
template void perform<double>(kernel *pk, void *pd, bool shift, bool fix, bool flip);
template void perform<float>(kernel *pk, void *pd, bool shift, bool fix, bool flip);
/*****************************************************************************************/
/*** SPECIFIC KERNEL IMPLEMENTATIONS *********************************************/
@ -313,12 +330,8 @@ namespace convolution{
~kernel_k() { delete tfk_; }
void deallocate() {}
};
////////////////////////////////////////////////////////////////////////////
template <typename real_t>
@ -338,10 +351,14 @@ namespace convolution{
kernel *fetch_kernel(int ilevel, bool isolated = false);
void *get_ptr()
{ return reinterpret_cast<void*> (&kdata_[0]); }
{
return reinterpret_cast<void *>(&kdata_[0]);
}
bool is_ksampled()
{ return false; }
{
return false;
}
void at_k(size_t, const double *, double *) {}
@ -354,7 +371,6 @@ namespace convolution{
{
std::vector<real_t>().swap(kdata_);
}
};
template <typename real_t>
@ -364,7 +380,6 @@ namespace convolution{
sprintf(cachefname, "temp_kernel_level%03d.tmp", ilevel);
FILE *fp = fopen(cachefname, "r");
std::cout << " - Fetching kernel for level " << ilevel << std::endl;
LOGUSER("Loading kernel for level %3d from file \'%s\'...", ilevel, cachefname);
@ -424,7 +439,6 @@ namespace convolution{
rfftwnd_plan plan = rfftw3d_create_plan(cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
#ifndef SINGLETHREAD_FFTW
rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), plan, rkernel, NULL);
#else
@ -438,7 +452,9 @@ namespace convolution{
template <typename real_t>
inline real_t sqr(real_t x)
{ return x*x; }
{
return x * x;
}
template <typename real_t>
inline real_t eval_split_recurse(const TransferFunction_real *tfr, real_t *xmid, real_t dx, real_t prevval, int nsplit)
@ -470,14 +486,19 @@ namespace convolution{
{
rr2 = sqr(xc[i][0]) + sqr(xc[i][1]) + sqr(xc[i][2]);
res[i] = tfr->compute_real(rr2) * dV;
if( res[i] != res[i] ){ LOGERR("NaN encountered at r=%f, dx=%f, dV=%f : TF = %f",sqrt(rr2),dx,dV,res[i]); abort(); }
if (res[i] != res[i])
{
LOGERR("NaN encountered at r=%f, dx=%f, dV=%f : TF = %f", sqrt(rr2), dx, dV, res[i]);
abort();
}
ressum += res[i];
}
real_t ae = fabs((prevval - ressum));
real_t re = fabs(ae / ressum);
if( ae < abs_err || re < rel_err ) return ressum;
if (ae < abs_err || re < rel_err)
return ressum;
if (nsplit > nmaxsplits)
{
@ -588,10 +609,12 @@ namespace convolution{
int iix(i), iiy(j), iiz(k);
real_t rr[3];
if( iix > (int)nx/2 ) iix -= nx;
if( iiy > (int)ny/2 ) iiy -= ny;
if( iiz > (int)nz/2 ) iiz -= nz;
if (iix > (int)nx / 2)
iix -= nx;
if (iiy > (int)ny / 2)
iiy -= ny;
if (iiz > (int)nz / 2)
iiz -= nz;
//... speed up 8x by copying data to other octants
size_t idx[8];
@ -605,9 +628,18 @@ namespace convolution{
idx[6] = ((size_t)(i)*ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
idx[7] = ((size_t)(nx - i) * ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
if(i==0||i==nx/2){ idx[1]=idx[3]=idx[5]=idx[7]=(size_t)-1;}
if(j==0||j==ny/2){ idx[2]=idx[3]=idx[6]=idx[7]=(size_t)-1;}
if(k==0||k==nz/2){ idx[4]=idx[5]=idx[6]=idx[7]=(size_t)-1;}
if (i == 0 || i == nx / 2)
{
idx[1] = idx[3] = idx[5] = idx[7] = (size_t)-1;
}
if (j == 0 || j == ny / 2)
{
idx[2] = idx[3] = idx[6] = idx[7] = (size_t)-1;
}
if (k == 0 || k == nz / 2)
{
idx[4] = idx[5] = idx[6] = idx[7] = (size_t)-1;
}
double val = 0.0;
@ -619,9 +651,7 @@ namespace convolution{
rr[1] = ((double)iiy) * dx + jj * boxlength;
rr[2] = ((double)iiz) * dx + kk * boxlength;
if( rr[0] > -boxlength && rr[0] <= boxlength
&& rr[1] > -boxlength && rr[1] <= boxlength
&& rr[2] > -boxlength && rr[2] <= boxlength )
if (rr[0] > -boxlength && rr[0] <= boxlength && rr[1] > -boxlength && rr[1] <= boxlength && rr[2] > -boxlength && rr[2] <= boxlength)
{
#ifdef OLD_KERNEL_SAMPLING
if (ref_fac > 0)
@ -650,7 +680,6 @@ namespace convolution{
val += tfr->compute_real(rr[0] * rr[0] + rr[1] * rr[1] + rr[2] * rr[2]);
}
#else // !OLD_KERNEL_SAMPLING
val += eval_split_recurse(tfr, rr, dx) / (dx * dx * dx);
#endif
@ -663,8 +692,9 @@ namespace convolution{
if (idx[q] != (size_t)-1)
rkernel[idx[q]] = val;
}
}else{
}
else
{
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
for (int j = 0; j < ny; ++j)
@ -673,9 +703,12 @@ namespace convolution{
int iix(i), iiy(j), iiz(k);
real_t rr[3];
if( iix > (int)nx/2 ) iix -= nx;
if( iiy > (int)ny/2 ) iiy -= ny;
if( iiz > (int)nz/2 ) iiz -= nz;
if (iix > (int)nx / 2)
iix -= nx;
if (iiy > (int)ny / 2)
iiy -= ny;
if (iiz > (int)nz / 2)
iiz -= nz;
//size_t idx = ((size_t)i*ny + (size_t)j) * 2*(nz/2+1) + (size_t)k;
@ -699,9 +732,18 @@ namespace convolution{
idx[6] = ((size_t)(i)*ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
idx[7] = ((size_t)(nx - i) * ny + (size_t)(ny - j)) * 2 * (nz / 2 + 1) + (size_t)(nz - k);
if(i==0||i==nx/2){ idx[1]=idx[3]=idx[5]=idx[7]=(size_t)-1;}
if(j==0||j==ny/2){ idx[2]=idx[3]=idx[6]=idx[7]=(size_t)-1;}
if(k==0||k==nz/2){ idx[4]=idx[5]=idx[6]=idx[7]=(size_t)-1;}
if (i == 0 || i == nx / 2)
{
idx[1] = idx[3] = idx[5] = idx[7] = (size_t)-1;
}
if (j == 0 || j == ny / 2)
{
idx[2] = idx[3] = idx[6] = idx[7] = (size_t)-1;
}
if (k == 0 || k == nz / 2)
{
idx[4] = idx[5] = idx[6] = idx[7] = (size_t)-1;
}
double val = 0.0; //(fftw_real)tfr->compute_real(rr2)*fac;
@ -734,7 +776,8 @@ namespace convolution{
}
#else
if( i == 0 && j == 0 && k == 0 ) continue;
if (i == 0 && j == 0 && k == 0)
continue;
// use new exact volume integration scheme
val = eval_split_recurse(tfr, rr, dx) / (dx * dx * dx);
@ -748,7 +791,6 @@ namespace convolution{
for (int q = 0; q < 8; ++q)
if (idx[q] != (size_t)-1)
rkernel[idx[q]] = val;
}
}
{
@ -763,8 +805,6 @@ namespace convolution{
/*************************************************************************************/
/******* perform deconvolution *******************************************************/
//bool baryons = type==baryon||type==vbaryon;
if (deconv)
{
@ -772,8 +812,6 @@ namespace convolution{
LOGUSER("Deconvolving fine kernel...");
std::cout << " - Deconvolving density kernel...\n";
double fftnorm = 1.0 / ((size_t)nx * (size_t)ny * (size_t)nz);
double k0 = rkernel[0];
@ -808,9 +846,6 @@ namespace convolution{
#endif
#endif
if (deconv)
{
@ -818,7 +853,8 @@ namespace convolution{
size_t kcount = 0;
double kmax = 0.5 * M_PI / std::max(nx, std::max(ny, nz));
#pragma omp parallel for reduction(+:ksum,kcount)
#pragma omp parallel for reduction(+ \
: ksum, kcount)
for (int i = 0; i < nx; ++i)
for (int j = 0; j < ny; ++j)
for (int k = 0; k < nz / 2 + 1; ++k)
@ -829,9 +865,10 @@ namespace convolution{
ky = (double)j;
kz = (double)k;
if( kx > nx/2 ) kx -= nx;
if( ky > ny/2 ) ky -= ny;
if (kx > nx / 2)
kx -= nx;
if (ky > ny / 2)
ky -= ny;
double kkmax = kmax;
size_t q = ((size_t)i * ny + (size_t)j) * (size_t)(nz / 2 + 1) + (size_t)k;
@ -845,8 +882,9 @@ namespace convolution{
RE(kkernel[q]) /= ipix;
IM(kkernel[q]) /= ipix;
}else{
}
else
{
//... Use piecewise constant response function (NGP-kernel)
//... for finite difference methods
@ -861,12 +899,11 @@ namespace convolution{
RE(kkernel[q]) *= ipix;
IM(kkernel[q]) *= ipix;
}
}
#if 1
else{
else
{
//... if smooth==true, convolve with
//... NGP kernel to get CIC smoothness
@ -882,7 +919,6 @@ namespace convolution{
RE(kkernel[q]) /= ipix;
IM(kkernel[q]) /= ipix;
}
#endif
@ -891,11 +927,12 @@ namespace convolution{
{
ksum += RE(kkernel[q]);
kcount++;
}else{
}
else
{
ksum += 2.0 * (RE(kkernel[q]));
kcount += 2;
}
}
double dk;
@ -908,10 +945,9 @@ namespace convolution{
if (bsmooth_baryons)
dk = 0.0;
//... enforce the r=0 component by adjusting the k-space mean
#pragma omp parallel for reduction(+:ksum,kcount)
#pragma omp parallel for reduction(+ \
: ksum, kcount)
for (int i = 0; i < nx; ++i)
for (int j = 0; j < ny; ++j)
for (int k = 0; k < (nz / 2 + 1); ++k)
@ -925,8 +961,6 @@ namespace convolution{
}
}
#ifdef FFTW3
#ifdef SINGLE_PRECISION
fftwf_execute(iplan);
@ -952,7 +986,6 @@ namespace convolution{
/*************************************************************************************/
/*************************************************************************************/
char cachefname[128];
sprintf(cachefname, "temp_kernel_level%03d.tmp", levelmax);
LOGUSER("Storing kernel in temp file \'%s\'.", cachefname);
@ -965,7 +998,6 @@ namespace convolution{
q = 2 * (nz / 2 + 1);
fwrite(reinterpret_cast<void *>(&q), sizeof(unsigned), 1, fp);
for (int ix = 0; ix < nx; ++ix)
{
size_t sz = ny * 2 * (nz / 2 + 1);
@ -973,9 +1005,6 @@ namespace convolution{
fwrite(reinterpret_cast<void *>(&rkernel[(size_t)ix * sz]), sizeof(fftw_real), sz, fp);
}
fclose(fp);
//... average and fill for other levels
@ -997,7 +1026,6 @@ namespace convolution{
nzc *= 2;
}
dxc = boxlength / (1 << ilevel);
lxc = dxc * nxc;
lyc = dxc * nyc;
@ -1016,9 +1044,12 @@ namespace convolution{
int iix(i), iiy(j), iiz(k);
real_t rr[3], rr2;
if( iix > (int)nxc/2 ) iix -= nxc;
if( iiy > (int)nyc/2 ) iiy -= nyc;
if( iiz > (int)nzc/2 ) iiz -= nzc;
if (iix > (int)nxc / 2)
iix -= nxc;
if (iiy > (int)nyc / 2)
iiy -= nyc;
if (iiz > (int)nzc / 2)
iiz -= nzc;
//... speed up 8x by copying data to other octants
size_t idx[8];
@ -1032,9 +1063,18 @@ namespace convolution{
idx[6] = ((size_t)(i)*nyc + (size_t)(nyc - j)) * 2 * (nzc / 2 + 1) + (size_t)(nzc - k);
idx[7] = ((size_t)(nxc - i) * nyc + (size_t)(nyc - j)) * 2 * (nzc / 2 + 1) + (size_t)(nzc - k);
if(i==0||i==nxc/2){ idx[1]=idx[3]=idx[5]=idx[7]=(size_t)-1;}
if(j==0||j==nyc/2){ idx[2]=idx[3]=idx[6]=idx[7]=(size_t)-1;}
if(k==0||k==nzc/2){ idx[4]=idx[5]=idx[6]=idx[7]=(size_t)-1;}
if (i == 0 || i == nxc / 2)
{
idx[1] = idx[3] = idx[5] = idx[7] = (size_t)-1;
}
if (j == 0 || j == nyc / 2)
{
idx[2] = idx[3] = idx[6] = idx[7] = (size_t)-1;
}
if (k == 0 || k == nzc / 2)
{
idx[4] = idx[5] = idx[6] = idx[7] = (size_t)-1;
}
double val = 0.0;
@ -1046,9 +1086,7 @@ namespace convolution{
rr[1] = ((double)iiy) * dxc + jj * boxlength;
rr[2] = ((double)iiz) * dxc + kk * boxlength;
if( rr[0] > -boxlength && rr[0] < boxlength
&& rr[1] > -boxlength && rr[1] < boxlength
&& rr[2] > -boxlength && rr[2] < boxlength )
if (rr[0] > -boxlength && rr[0] < boxlength && rr[1] > -boxlength && rr[1] < boxlength && rr[2] > -boxlength && rr[2] < boxlength)
{
#ifdef OLD_KERNEL_SAMPLING
rr2 = rr[0] * rr[0] + rr[1] * rr[1] + rr[2] * rr[2];
@ -1065,8 +1103,9 @@ namespace convolution{
if (idx[qq] != (size_t)-1)
rkernel_coarse[idx[qq]] = val;
}
}else{
}
else
{
#pragma omp parallel for
for (int i = 0; i < nxc; ++i)
for (int j = 0; j < nyc; ++j)
@ -1075,9 +1114,12 @@ namespace convolution{
int iix(i), iiy(j), iiz(k);
real_t rr[3];
if( iix > (int)nxc/2 ) iix -= nxc;
if( iiy > (int)nyc/2 ) iiy -= nyc;
if( iiz > (int)nzc/2 ) iiz -= nzc;
if (iix > (int)nxc / 2)
iix -= nxc;
if (iiy > (int)nyc / 2)
iiy -= nyc;
if (iiz > (int)nzc / 2)
iiz -= nzc;
size_t idx = ((size_t)i * nyc + (size_t)j) * 2 * (nzc / 2 + 1) + (size_t)k;
@ -1102,11 +1144,9 @@ namespace convolution{
rkernel_coarse[idx] += val * fac;
#endif
}
}
#ifdef OLD_KERNEL_SAMPLING
LOGUSER("Averaging fine kernel to coarse kernel...");
@ -1117,32 +1157,29 @@ namespace convolution{
for (int iz = 0; iz < nz; iz += 2)
{
int iix(ix / 2), iiy(iy / 2), iiz(iz / 2);
if( ix>nx/2 ) iix+=nxc-nx/2;
if( iy>ny/2 ) iiy+=nyc-ny/2;
if( iz>nz/2 ) iiz+=nzc-nz/2;
if (ix > nx / 2)
iix += nxc - nx / 2;
if (iy > ny / 2)
iiy += nyc - ny / 2;
if (iz > nz / 2)
iiz += nzc - nz / 2;
if( ix==nx/2||iy==ny/2||iz==nz/2 ) continue;
if (ix == nx / 2 || iy == ny / 2 || iz == nz / 2)
continue;
for (int i = 0; i <= 1; ++i)
for (int j = 0; j <= 1; ++j)
for (int k = 0; k <= 1; ++k)
if (i == 0 && k == 0 && j == 0)
rkernel_coarse[ACC_RC(iix, iiy, iiz)] =
0.125 * ( rkernel[ACC_RF(ix-i,iy-j,iz-k)] + rkernel[ACC_RF(ix-i+1,iy-j,iz-k)]
+rkernel[ACC_RF(ix-i,iy-j+1,iz-k)] + rkernel[ACC_RF(ix-i,iy-j,iz-k+1)]
+rkernel[ACC_RF(ix-i+1,iy-j+1,iz-k)] + rkernel[ACC_RF(ix-i+1,iy-j,iz-k+1)]
+rkernel[ACC_RF(ix-i,iy-j+1,iz-k+1)] + rkernel[ACC_RF(ix-i+1,iy-j+1,iz-k+1)]);
0.125 * (rkernel[ACC_RF(ix - i, iy - j, iz - k)] + rkernel[ACC_RF(ix - i + 1, iy - j, iz - k)] + rkernel[ACC_RF(ix - i, iy - j + 1, iz - k)] + rkernel[ACC_RF(ix - i, iy - j, iz - k + 1)] + rkernel[ACC_RF(ix - i + 1, iy - j + 1, iz - k)] + rkernel[ACC_RF(ix - i + 1, iy - j, iz - k + 1)] + rkernel[ACC_RF(ix - i, iy - j + 1, iz - k + 1)] + rkernel[ACC_RF(ix - i + 1, iy - j + 1, iz - k + 1)]);
else
{
rkernel_coarse[ACC_RC(iix, iiy, iiz)] +=
0.125 * ( rkernel[ACC_RF(ix-i,iy-j,iz-k)] + rkernel[ACC_RF(ix-i+1,iy-j,iz-k)]
+rkernel[ACC_RF(ix-i,iy-j+1,iz-k)] + rkernel[ACC_RF(ix-i,iy-j,iz-k+1)]
+rkernel[ACC_RF(ix-i+1,iy-j+1,iz-k)] + rkernel[ACC_RF(ix-i+1,iy-j,iz-k+1)]
+rkernel[ACC_RF(ix-i,iy-j+1,iz-k+1)] + rkernel[ACC_RF(ix-i+1,iy-j+1,iz-k+1)]);
0.125 * (rkernel[ACC_RF(ix - i, iy - j, iz - k)] + rkernel[ACC_RF(ix - i + 1, iy - j, iz - k)] + rkernel[ACC_RF(ix - i, iy - j + 1, iz - k)] + rkernel[ACC_RF(ix - i, iy - j, iz - k + 1)] + rkernel[ACC_RF(ix - i + 1, iy - j + 1, iz - k)] + rkernel[ACC_RF(ix - i + 1, iy - j, iz - k + 1)] + rkernel[ACC_RF(ix - i, iy - j + 1, iz - k + 1)] + rkernel[ACC_RF(ix - i + 1, iy - j + 1, iz - k + 1)]);
}
}
#endif // #OLD_KERNEL_SAMPLING
@ -1182,13 +1219,11 @@ namespace convolution{
delete[] rkernel;
}
}
} // namespace convolution
/**************************************************************************************/
/**************************************************************************************/
namespace
{
convolution::kernel_creator_concrete<convolution::kernel_real_cached<double>> creator_d("tf_kernel_real_double");
@ -1196,7 +1231,4 @@ namespace
convolution::kernel_creator_concrete<convolution::kernel_k<double>> creator_kd("tf_kernel_k_double");
convolution::kernel_creator_concrete<convolution::kernel_k<float>> creator_kf("tf_kernel_k_float");
}
} // namespace

View file

@ -4,7 +4,7 @@
a code to generate multi-scale initial conditions
for cosmological simulations
Copyright (C) 2010 Oliver Hahn
Copyright (C) 2010-19 Oliver Hahn
*/
@ -18,11 +18,11 @@
#include "densities.hh"
#include "transfer_function.hh"
#define ACC_RF(i, j, k) (((((size_t)(i) + nx) % nx) * ny + (((size_t)(j) + ny) % ny)) * 2 * (nz / 2 + 1) + (((size_t)(k) + nz) % nz))
#define ACC_RC(i, j, k) (((((size_t)(i) + nxc) % nxc) * nyc + (((size_t)(j) + nyc) % nyc)) * 2 * (nzc / 2 + 1) + (((size_t)(k) + nzc) % nzc))
namespace convolution{
namespace convolution
{
//! encapsulates all parameters required for transfer function convolution
struct parameters
@ -37,14 +37,12 @@ namespace convolution{
bool smooth;
};
/////////////////////////////////////////////////////////////////
//! abstract base class for a transfer function convolution kernel
class kernel{
class kernel
{
public:
//! all parameters (physical/numerical)
parameters cparam_;
@ -56,7 +54,8 @@ namespace convolution{
//! constructor
kernel(config_file &cf, transfer_function *ptf, refinement_hierarchy &refh, tf_type type)
: pcf_(&cf), ptf_(ptf), prefh_(&refh), type_(type) //cparam_( cp )
{ }
{
}
//! dummy constructor
/*kernel( void )
@ -81,7 +80,6 @@ namespace convolution{
virtual void deallocate() = 0;
};
//! abstract factory class to create convolution kernels
struct kernel_creator
{
@ -92,33 +90,30 @@ namespace convolution{
virtual ~kernel_creator() {}
};
//! access map to the various kernel classes through the factory
std::map<std::string, kernel_creator *> &get_kernel_map();
//! actual implementation of the factory class for kernel objects
template <class Derived>
struct kernel_creator_concrete : public kernel_creator
{
//! constructor inserts the kernel class in the map
kernel_creator_concrete(const std::string &kernel_name)
{ get_kernel_map()[ kernel_name ] = this; }
{
get_kernel_map()[kernel_name] = this;
}
//! creates an instance of the kernel object
kernel *create(config_file &cf, transfer_function *ptf, refinement_hierarchy &refh, tf_type type) const
{ return new Derived( cf, ptf, refh, type ); }
{
return new Derived(cf, ptf, refh, type);
}
};
//! actual implementation of the FFT convolution (independent of the actual kernel)
template <typename real_t>
void perform( kernel* pk, void *pd, bool shift );
void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip);
} //namespace convolution
#endif //__CONVOLUTION_KERNELS_HH

View file

@ -13,7 +13,6 @@
#include "densities.hh"
#include "convolution_kernel.hh"
//TODO: this should be a larger number by default, just to maintain consistency with old default
#define DEF_RAN_CUBE_SIZE 32
@ -30,13 +29,14 @@ double Blend_Function( double k, double kmax )
double const eps = blend_sharpness;
float kp = (1.0f - 2.0f * eps) * kmax;
if( kabs >= kmax ) return 0.;
if( kabs > kp ) return 1.0f/(expf( (kp-kmax)/(k-kp) + (kp-kmax)/(k-kmax) ) + 1.0f);
if (kabs >= kmax)
return 0.;
if (kabs > kp)
return 1.0f / (expf((kp - kmax) / (k - kp) + (kp - kmax) / (k - kmax)) + 1.0f);
return 1.0f;
#endif
}
template <typename m1, typename m2>
void fft_coarsen(m1 &v, m2 &V)
{
@ -98,8 +98,10 @@ void fft_coarsen( m1& v, m2& V )
{
int ii(i), jj(j), kk(k);
if( i > (int)nxF/2 ) ii += (int)nxf/2;
if( j > (int)nyF/2 ) jj += (int)nyf/2;
if (i > (int)nxF / 2)
ii += (int)nxf / 2;
if (j > (int)nyF / 2)
jj += (int)nyf / 2;
size_t qc, qf;
@ -107,7 +109,6 @@ void fft_coarsen( m1& v, m2& V )
double ky = (j <= (int)nyF / 2) ? (double)j : (double)(j - (int)nyF);
double kz = (k <= (int)nzF / 2) ? (double)k : (double)(k - (int)nzF);
qc = ((size_t)i * nyF + (size_t)j) * (nzF / 2 + 1) + (size_t)k;
qf = ((size_t)ii * nyf + (size_t)jj) * (nzf / 2 + 1) + (size_t)kk;
@ -163,7 +164,6 @@ void fft_coarsen( m1& v, m2& V )
#endif
}
//#define NO_COARSE_OVERLAP
template <typename m1, typename m2>
@ -183,9 +183,9 @@ void fft_interpolate( m1& V, m2& v, bool from_basegrid=false )
oxf += nxF / 4 - nxf / 8;
oyf += nyF / 4 - nyf / 8;
ozf += nzF / 4 - nzf / 8;
}else{
}
else
{
oxf -= nxf / 8;
oyf -= nyf / 8;
ozf -= nzf / 8;
@ -234,7 +234,6 @@ void fft_interpolate( m1& V, m2& v, bool from_basegrid=false )
}
#endif
#pragma omp parallel for
for (int i = 0; i < (int)nxf; ++i)
for (int j = 0; j < (int)nyf; ++j)
@ -281,7 +280,6 @@ void fft_interpolate( m1& V, m2& v, bool from_basegrid=false )
double sqrt8 = 8.0; //sqrt(8.0);
double phasefac = -0.5;
// this enables filtered splicing of coarse and fine modes
#if 1
for (int i = 0; i < (int)nxc; i++)
@ -290,10 +288,12 @@ void fft_interpolate( m1& V, m2& v, bool from_basegrid=false )
{
int ii(i), jj(j), kk(k);
if( i> (int)nxc/2 ) ii += (int)nxf/2;
if( j> (int)nyc/2 ) jj += (int)nyf/2;
if( k> (int)nzc/2 ) kk += (int)nzf/2;
if (i > (int)nxc / 2)
ii += (int)nxf / 2;
if (j > (int)nyc / 2)
jj += (int)nyf / 2;
if (k > (int)nzc / 2)
kk += (int)nzf / 2;
size_t qc, qf;
qc = ((size_t)i * (size_t)nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k;
@ -460,8 +460,6 @@ void fft_interpolate( m1& V, m2& v, bool from_basegrid=false )
delete[] rfine;
}
/*******************************************************************************************/
/*******************************************************************************************/
/*******************************************************************************************/
@ -477,6 +475,9 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
bool kspace = cf.getValue<bool>("setup", "kspace_TF");
bool fix = cf.getValueSafe<bool>("setup","fix_mode_amplitude",false);
bool flip = cf.getValueSafe<bool>("setup","flip_mode_amplitude",false);
unsigned nbase = 1 << levelmin;
std::cerr << " - Running unigrid version\n";
@ -508,7 +509,6 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
#endif
}
//... initialize convolution kernel
convolution::kernel *the_tf_kernel = the_kernel_creator->create(cf, ptf, refh, type);
@ -526,7 +526,7 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
the_tf_kernel->fetch_kernel(levelmin, false);
//... perform convolution
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ), shift );
convolution::perform<real_t>(the_tf_kernel, reinterpret_cast<void *>(top->get_data_ptr()), shift, fix, flip);
//... clean up kernel
delete the_tf_kernel;
@ -541,7 +541,6 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
delete top;
}
/*******************************************************************************************/
/*******************************************************************************************/
/*******************************************************************************************/
@ -568,6 +567,13 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
levelmax = cf.getValue<unsigned>("setup", "levelmax");
kspaceTF = cf.getValue<bool>("setup", "kspace_TF");
bool fix = cf.getValueSafe<bool>("setup","fix_mode_amplitude",false);
bool flip = cf.getValueSafe<bool>("setup","flip_mode_amplitude",false);
if( fix && levelmin != levelmax ){
LOGWARN("You have chosen mode fixing for a zoom. This is not well tested,\n please proceed at your own risk...");
}
blend_sharpness = cf.getValueSafe<double>("setup", "kspace_filter", blend_sharpness);
unsigned nbase = 1 << levelmin;
@ -599,7 +605,8 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
convolution::kernel *the_tf_kernel = the_kernel_creator->create(cf, ptf, refh, type);
/***** PERFORM CONVOLUTIONS *****/
if( kspaceTF ){
if (kspaceTF)
{
//... create and initialize density grids with white noise
DensityGrid<real_t> *top(NULL);
@ -610,7 +617,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
top = new DensityGrid<real_t>(nbase, nbase, nbase);
LOGINFO("Performing noise convolution on level %3d", levelmin);
rand.load(*top, levelmin);
convolution::perform<real_t>( the_tf_kernel->fetch_kernel( levelmin, false ), reinterpret_cast<void*>( top->get_data_ptr() ), shift );
convolution::perform<real_t>(the_tf_kernel->fetch_kernel(levelmin, false), reinterpret_cast<void *>(top->get_data_ptr()), shift, fix, flip);
delta.create_base_hierarchy(levelmin);
top->copy(*delta.get_grid(levelmin));
@ -638,7 +645,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
rand.load(*fine, levelmin + i);
convolution::perform<real_t>(the_tf_kernel->fetch_kernel(levelmin + i, true),
reinterpret_cast<void*>( fine->get_data_ptr() ), shift );
reinterpret_cast<void *>(fine->get_data_ptr()), shift, fix, flip);
if (i == 1)
fft_interpolate(*top, *fine, true);
@ -654,15 +661,18 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
fine->copy_unpad(*delta.get_grid(levelmin + i));
if( i==1 ) delete top;
else delete coarse;
if (i == 1)
delete top;
else
delete coarse;
coarse = fine;
}
delete coarse;
}else{
}
else
{
//... create and initialize density grids with white noise
PaddedDensitySubGrid<real_t> *coarse(NULL), *fine(NULL);
@ -678,10 +688,9 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
//rand_gen.load( *top, levelmin );
rand.load(*top, levelmin);
convolution::perform<real_t>(the_tf_kernel->fetch_kernel(levelmax),
reinterpret_cast<void *>(top->get_data_ptr()),
shift );
shift, fix, flip);
the_tf_kernel->deallocate();
delta.create_base_hierarchy(levelmin);
@ -689,14 +698,12 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
delete top;
}
for (int i = 0; i < (int)levelmax - (int)levelmin; ++i)
{
//.......................................................................................................//
//... GENERATE/FILL WITH RANDOM NUMBERS .................................................................//
//.......................................................................................................//
if (i == 0)
{
top = new DensityGrid<real_t>(nbase, nbase, nbase);
@ -733,10 +740,9 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
//... 1) compute standard convolution for levelmin
LOGUSER("Computing density self-contribution");
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ), shift );
convolution::perform<real_t>(the_tf_kernel, reinterpret_cast<void *>(top->get_data_ptr()), shift, fix, flip);
top->copy(*delta.get_grid(levelmin));
//... 2) compute contribution to finer grids from non-refined region
LOGUSER("Computing long-range component for finer grid.");
*top = top_save;
@ -744,7 +750,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
top->zero_subgrid(refh.offset(levelmin + i + 1, 0), refh.offset(levelmin + i + 1, 1), refh.offset(levelmin + i + 1, 2),
refh.size(levelmin + i + 1, 0) / 2, refh.size(levelmin + i + 1, 1) / 2, refh.size(levelmin + i + 1, 2) / 2);
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ), shift );
convolution::perform<real_t>(the_tf_kernel, reinterpret_cast<void *>(top->get_data_ptr()), shift, fix, flip);
the_tf_kernel->deallocate();
meshvar_bnd delta_longrange(*delta.get_grid(levelmin));
@ -782,7 +788,6 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
delta.add_patch(refh.offset(levelmin + i + 1, 0), refh.offset(levelmin + i + 1, 1), refh.offset(levelmin + i + 1, 2),
refh.size(levelmin + i + 1, 0), refh.size(levelmin + i + 1, 1), refh.size(levelmin + i + 1, 2));
//... copy coarse grid long-range component to fine grid
LOGUSER("Injecting long range component");
//mg_straight().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) );
@ -794,10 +799,9 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
//... 1) the inner region
LOGUSER("Computing density self-contribution");
coarse->subtract_boundary_oct_mean();
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()), shift );
convolution::perform<real_t>(the_tf_kernel, reinterpret_cast<void *>(coarse->get_data_ptr()), shift, fix, flip);
coarse->copy_add_unpad(*delta.get_grid(levelmin + i));
//... 2) the 'BC' for the next finer grid
LOGUSER("Computing long-range component for finer grid.");
*coarse = coarse_save;
@ -805,7 +809,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
coarse->zero_subgrid(refh.offset(levelmin + i + 1, 0), refh.offset(levelmin + i + 1, 1), refh.offset(levelmin + i + 1, 2),
refh.size(levelmin + i + 1, 0) / 2, refh.size(levelmin + i + 1, 1) / 2, refh.size(levelmin + i + 1, 2) / 2);
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( coarse->get_data_ptr() ), shift );
convolution::perform<real_t>(the_tf_kernel, reinterpret_cast<void *>(coarse->get_data_ptr()), shift, fix, flip);
//... interpolate to finer grid(s)
meshvar_bnd delta_longrange(*delta.get_grid(levelmin + i));
@ -820,7 +824,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
LOGUSER("Computing coarse grid correction");
*coarse = coarse_save;
coarse->subtract_oct_mean();
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()), shift );
convolution::perform<real_t>(the_tf_kernel, reinterpret_cast<void *>(coarse->get_data_ptr()), shift, fix, flip);
coarse->subtract_mean();
coarse->upload_bnd_add(*delta.get_grid(levelmin + i - 1));
@ -829,7 +833,6 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
delete coarse;
}
coarse = fine;
}
@ -853,12 +856,11 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
coarse->subtract_boundary_oct_mean();
//... perform convolution
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()), shift );
convolution::perform<real_t>(the_tf_kernel, reinterpret_cast<void *>(coarse->get_data_ptr()), shift, fix, flip);
//... copy to grid hierarchy
coarse->copy_add_unpad(*delta.get_grid(levelmax));
//... 2) boundary correction to top grid
LOGUSER("Computing coarse grid correction");
*coarse = coarse_save;
@ -867,7 +869,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
coarse->subtract_oct_mean();
//... perform convolution
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()), shift );
convolution::perform<real_t>(the_tf_kernel, reinterpret_cast<void *>(coarse->get_data_ptr()), shift, fix, flip);
the_tf_kernel->deallocate();
@ -878,7 +880,6 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
delete coarse;
}
}
delete the_tf_kernel;
@ -896,7 +897,6 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
LOGUSER("Finished computing the density field in %fs", tend - tstart);
}
/*******************************************************************************************/
/*******************************************************************************************/
/*******************************************************************************************/
@ -915,7 +915,8 @@ void normalize_density( grid_hierarchy& delta )
ny = delta.get_grid(levelmin)->size(1);
nz = delta.get_grid(levelmin)->size(2);
#pragma omp parallel for reduction(+:sum)
#pragma omp parallel for reduction(+ \
: sum)
for (int ix = 0; ix < (int)nx; ++ix)
for (size_t iy = 0; iy < ny; ++iy)
for (size_t iz = 0; iz < nz; ++iz)
@ -962,12 +963,7 @@ void coarsen_density( const refinement_hierarchy& rh, GridHierarchy<real_t>& u,
for (unsigned i = 1; i <= rh.levelmax(); ++i)
{
if( rh.offset(i,0) != u.get_grid(i)->offset(0)
|| rh.offset(i,1) != u.get_grid(i)->offset(1)
|| rh.offset(i,2) != u.get_grid(i)->offset(2)
|| rh.size(i,0) != u.get_grid(i)->size(0)
|| rh.size(i,1) != u.get_grid(i)->size(1)
|| rh.size(i,2) != u.get_grid(i)->size(2) )
if (rh.offset(i, 0) != u.get_grid(i)->offset(0) || rh.offset(i, 1) != u.get_grid(i)->offset(1) || rh.offset(i, 2) != u.get_grid(i)->offset(2) || rh.size(i, 0) != u.get_grid(i)->size(0) || rh.size(i, 1) != u.get_grid(i)->size(1) || rh.size(i, 2) != u.get_grid(i)->size(2))
{
u.cut_patch(i, rh.offset_abs(i, 0), rh.offset_abs(i, 1), rh.offset_abs(i, 2),
rh.size(i, 0), rh.size(i, 1), rh.size(i, 2));
@ -976,6 +972,4 @@ void coarsen_density( const refinement_hierarchy& rh, GridHierarchy<real_t>& u,
for (int i = rh.levelmax(); i > 0; --i)
mg_straight().restrict(*(u.get_grid(i)), *(u.get_grid(i - 1)));
}