1
0
Fork 0
mirror of https://github.com/cosmo-sims/MUSIC.git synced 2024-09-19 17:03:46 +02:00

major mod: added subgrid deconvolution for DM

* The DM transfer function is now deconvolved with the cell assignment
  function in order to restore power near the Nyquist wavenumber.
* Added an option to change the random number sample cube size
  [random]/cubesize. This should be set >128 to generate un-
  correlated numbers. True fix pending.
* Removed some unused code
* Increased Buffer size for Gadget-2 output plug-in
This commit is contained in:
Oliver Hahn 2010-07-21 01:10:29 -07:00
parent 2b8e08172c
commit 002763346f
12 changed files with 567 additions and 1377 deletions

View file

@ -65,6 +65,9 @@ namespace convolution{
rfftwnd_plan iplan, plan;
std::cout << " - Performing FFT convolution...\n";
plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
@ -103,226 +106,9 @@ namespace convolution{
}
template< typename real_t >
void perform_filtered( kernel * pk, void *pd )
{
parameters cparam_ = pk->cparam_;
double
kny, kmax,
fftnorm = pow(2.0*M_PI,1.5)/sqrt(cparam_.lx*cparam_.ly*cparam_.lz)/sqrt((double)(cparam_.nx*cparam_.ny*cparam_.nz));
fftw_complex *cdata,*ckernel;
fftw_real *data;
kny = cparam_.nx*M_PI/cparam_.lx;
kmax = M_PI/2.0;
data = reinterpret_cast<fftw_real*>(pd);
cdata = reinterpret_cast<fftw_complex*>(data);
ckernel = reinterpret_cast<fftw_complex*>( pk->get_ptr() );
rfftwnd_plan iplan, plan;
plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
#ifndef SINGLETHREAD_FFTW
rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL );
#else
rfftwnd_one_real_to_complex( plan, data, NULL );
#endif
//double kmax2 = kmax*kmax;
#pragma omp parallel for
for( int i=0; i<cparam_.nx; ++i )
for( int j=0; j<cparam_.ny; ++j )
for( int k=0; k<cparam_.nz/2+1; ++k )
{
unsigned ii = (i*cparam_.ny + j) * (cparam_.nz/2+1) + k;
complex ccdata(cdata[ii].re,cdata[ii].im), cckernel(ckernel[ii].re,ckernel[ii].im);
int ik(i), jk(j);
if( ik > cparam_.nx/2 )
ik -= cparam_.nx;
if( jk > cparam_.ny/2 )
jk -= cparam_.ny;
double
kx( M_PI*(double)ik/cparam_.nx ),
ky( M_PI*(double)jk/cparam_.ny ),
kz( M_PI*(double)k/cparam_.nz );//,
// kk( kx*kx+ky*ky+kz*kz );
//... cos(k) is the Hanning filter function (cf. Bertschinger 2001)
double filter = 0.0;
if( true ){//kk <= kmax2 ){
//filter = cos( kx )*cos( ky )*cos( kz );
filter = cos( 0.5*kx )*cos( 0.5*ky )*cos( 0.5*kz );
//filter = 1.0;
//filter *=filter;
}
//filter = 1.0;
ccdata = ccdata * cckernel *fftnorm * filter;
cdata[ii].re = ccdata.real();
cdata[ii].im = ccdata.imag();
}
#ifndef SINGLETHREAD_FFTW
rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL);
#else
rfftwnd_one_complex_to_real(iplan, cdata, NULL);
#endif
rfftwnd_destroy_plan(plan);
rfftwnd_destroy_plan(iplan);
}
template void perform<double>( kernel* pk, void *pd );
template void perform<float>( kernel* pk, void *pd );
template void perform_filtered<double>( kernel* pk, void *pd );
template void perform_filtered<float>( kernel* pk, void *pd );
void truncate( kernel* pk, double R, double alpha )
{
parameters cparam_ = pk->cparam_;
double
dx = cparam_.lx/cparam_.nx,
dy = cparam_.ly/cparam_.ny,
dz = cparam_.lz/cparam_.nz;
double fftnorm = 1.0/(cparam_.nx * cparam_.ny * cparam_.nz);
rfftwnd_plan iplan, plan;
plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
fftw_real *rkernel = reinterpret_cast<fftw_real*>( pk->get_ptr() );
fftw_complex *ckernel = reinterpret_cast<fftw_complex*>( pk->get_ptr() );
#ifndef SINGLETHREAD_FFTW
rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, ckernel, NULL);
#else
rfftwnd_one_complex_to_real(iplan, ckernel, NULL);
#endif
#pragma omp parallel for
for( int i=0; i<cparam_.nx; ++i )
for( int j=0; j<cparam_.ny; ++j )
for( int k=0; k<cparam_.nz; ++k )
{
int iix(i), iiy(j), iiz(k);
double rr[3], rr2;
if( iix > (int)cparam_.nx/2 ) iix -= cparam_.nx;
if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny;
if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz;
rr[0] = (double)iix * dx;
rr[1] = (double)iiy * dy;
rr[2] = (double)iiz * dz;
rr2 = rr[0]*rr[0] + rr[1]*rr[1] + rr[2]*rr[2];
unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
rkernel[idx] *= 0.5*(erfc((sqrt(rr2)-R)*alpha))*fftnorm;
}
#ifndef SINGLETHREAD_FFTW
rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL );
#else
rfftwnd_one_real_to_complex( plan, rkernel, NULL );
#endif
}
void truncate_sharp( kernel* pk, double R )
{
parameters cparam_ = pk->cparam_;
double
dx = cparam_.lx/cparam_.nx,
dy = cparam_.ly/cparam_.ny,
dz = cparam_.lz/cparam_.nz,
R2 = R*R;
double fftnorm = 1.0/(cparam_.nx * cparam_.ny * cparam_.nz);
rfftwnd_plan iplan, plan;
plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
fftw_real *rkernel = reinterpret_cast<fftw_real*>( pk->get_ptr() );
fftw_complex *ckernel = reinterpret_cast<fftw_complex*>( pk->get_ptr() );
#ifndef SINGLETHREAD_FFTW
rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, ckernel, NULL);
#else
rfftwnd_one_complex_to_real(iplan, ckernel, NULL);
#endif
#pragma omp parallel for
for( int i=0; i<cparam_.nx; ++i )
for( int j=0; j<cparam_.ny; ++j )
for( int k=0; k<cparam_.nz; ++k )
{
int iix(i), iiy(j), iiz(k);
double rr[3], rr2;
if( iix > (int)cparam_.nx/2 ) iix -= cparam_.nx;
if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny;
if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz;
rr[0] = (double)iix * dx;
rr[1] = (double)iiy * dy;
rr[2] = (double)iiz * dz;
rr2 = rr[0]*rr[0] + rr[1]*rr[1] + rr[2]*rr[2];
unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
if( rr2 > R2 )
{
rkernel[idx] = 0.0;
}else {
rkernel[idx] *= fftnorm;
}
}
#ifndef SINGLETHREAD_FFTW
rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL );
#else
rfftwnd_one_real_to_complex( plan, rkernel, NULL );
#endif
}
/*****************************************************************************************\
*** SPECIFIC KERNEL IMPLEMENTATIONS *********************************************
\*****************************************************************************************/
@ -350,9 +136,6 @@ namespace convolution{
{ std::vector<real_t>().swap( kdata_ ); }
};
template< typename real_t >
@ -365,25 +148,41 @@ namespace convolution{
dy = cparam_.ly/cparam_.ny,
dz = cparam_.lz/cparam_.nz,
boxlength = cparam_.pcf->getValue<double>("setup","boxlength"),
nspec = cparam_.pcf->getValue<double>("cosmology","nspec"),//cosmo.nspect,
pnorm = cparam_.pcf->getValue<double>("cosmology","pnorm"),//cosmo.pnorm,
dplus = cparam_.pcf->getValue<double>("cosmology","dplus");//,//cosmo.dplus;
// t00f = cparam_.t0scale;
nspec = cparam_.pcf->getValue<double>("cosmology","nspec"),
pnorm = cparam_.pcf->getValue<double>("cosmology","pnorm"),
dplus = cparam_.pcf->getValue<double>("cosmology","dplus");
unsigned
levelmax = cparam_.pcf->getValue<unsigned>("setup","levelmax");
levelmax = cparam_.pcf->getValue<unsigned>("setup","levelmax");
bool
bperiodic = cparam_.pcf->getValueSafe<bool>("setup","periodic_TF",true);
bperiodic = cparam_.pcf->getValueSafe<bool>("setup","periodic_TF",true);
//if( cparam_.normalize )
// kny *= 2.0;
TransferFunction_real *tfr = new TransferFunction_real(cparam_.ptf,nspec,pnorm,dplus,0.25*cparam_.lx/(double)cparam_.nx,2.0*boxlength,kny, (int)pow(2,levelmax+2));
std::cout << " - Computing transfer function kernel...\n";
if( bperiodic )
if(! cparam_.is_finest )
kny *= pow(2,cparam_.coarse_fact);
TransferFunction_real *tfr =
new TransferFunction_real(cparam_.ptf,nspec,pnorm,dplus,
0.25*cparam_.lx/(double)cparam_.nx,2.0*boxlength,kny, (int)pow(2,levelmax+2));
fftw_real *rkernel = reinterpret_cast<fftw_real*>( &kdata_[0] );
fftw_complex *kkernel = reinterpret_cast<fftw_complex*>( &kdata_[0] );
rfftwnd_plan plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
int nzp = cparam_.nz/2+1;
double kmax = 0.5*M_PI/std::max(cparam_.nx,std::max(cparam_.ny,cparam_.nz));
if( bperiodic )
{
#pragma omp parallel for
#pragma omp parallel for
for( int i=0; i<cparam_.nx; ++i )
for( int j=0; j<cparam_.ny; ++j )
for( int k=0; k<cparam_.nz; ++k )
@ -416,6 +215,22 @@ namespace convolution{
kdata_[idx] *= fac;
if( cparam_.deconvolve )
{
double rx((double)iix*dx/boxlength),ry((double)iiy*dy/boxlength),rz((double)iiz*dz/boxlength), ico;
ico = 1.0;
if( i>0 )
ico *= sin(M_PI*rx)/(M_PI*rx);
if( j>0 )
ico *= sin(M_PI*ry)/(M_PI*ry);
if( k>0 )
ico *= sin(M_PI*rz)/(M_PI*rz);
kdata_[idx] /= ico;
}
}
}else{
#pragma omp parallel for
@ -439,63 +254,103 @@ namespace convolution{
rr2 = rr[0]*rr[0]+rr[1]*rr[1]+rr[2]*rr[2];
kdata_[idx] = tfr->compute_real(rr2)*fac;
if( cparam_.deconvolve )
{
double rx((double)iix*dx/boxlength),ry((double)iiy*dy/boxlength),rz((double)iiz*dz/boxlength), ico;
ico = 1.0;
if( i>0 )
ico *= sin(M_PI*rx)/(M_PI*rx);
if( j>0 )
ico *= sin(M_PI*ry)/(M_PI*ry);
if( k>0 )
ico *= sin(M_PI*rz)/(M_PI*rz);
kdata_[idx] /= ico;
}
}
}
//kdata_[0] = tfr->compute_real(dx*dx*0.25)*fac;
//std::cerr << "T(r=0) = " << kdata_[0]/fac << std::endl;
//if( cparam_.normalize )
// kdata_[0] *= 0.125;//= 0.0;//*= 0.125;
//... determine normalization
double sum = 0.0;
#pragma omp parallel for reduction(+:sum)
for( int i=0; i<cparam_.nx; ++i )
for( int j=0; j<cparam_.ny; ++j )
for( int k=0; k<cparam_.nz; ++k )
{
unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
//if( idx > 0 )
sum += kdata_[idx];
}
//std::cerr << " - The convolution kernel has avg of " << (sum+kdata_[0])/(cparam_.nx*cparam_.ny*cparam_.nz) << std::endl;
sum /= cparam_.nx*cparam_.ny*cparam_.nz;//-1;
if( false )//cparam_.normalize )
{
#pragma omp parallel for
for( int i=0; i<cparam_.nx; ++i )
for( int j=0; j<cparam_.ny; ++j )
for( int k=0; k<cparam_.nz; ++k )
{
unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
//if( idx > 0 )
kdata_[idx] -= sum;
}
}
if(!cparam_.is_finest)
kdata_[0] /= pow(2.0,1.5*cparam_.coarse_fact);
delete tfr;
//if( t00f < 0.0 )
// kdata_[0] += (tfr->compute_real(0.125*dx*dx) - tfr->compute_real(0.0))*fac;
fftw_real *rkernel = reinterpret_cast<fftw_real*>( &kdata_[0] );
rfftwnd_plan plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
double k0 = kdata_[0];
if( cparam_.deconvolve )
kdata_[0] = 0.0;
//kdata_[0] = tfr->compute_real(dx*dx*0.25)*fac;
//std::cerr << "T(r=0) = " << kdata_[0]/fac << std::endl;
#ifndef SINGLETHREAD_FFTW
rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL );
#else
rfftwnd_one_real_to_complex( plan, rkernel, NULL );
#endif
rfftwnd_destroy_plan(plan);
delete tfr;
if( cparam_.deconvolve )
{
double ksum = 0.0;
unsigned kcount = 0;
#pragma omp parallel for reduction(+:ksum,kcount)
for( int i=0; i<cparam_.nx; ++i )
for( int j=0; j<cparam_.ny; ++j )
for( int k=0; k<nzp; ++k )
{
double kx,ky,kz;
kx = (double)i;
ky = (double)j;
kz = (double)k;
if( kx > cparam_.nx/2 ) kx -= cparam_.nx;
if( ky > cparam_.ny/2 ) ky -= cparam_.ny;
double ipix = 1.0;
if( !cparam_.is_finest )
ipix *= (cos(kx*kmax)*cos(ky*kmax)*cos(kz*kmax));
if( i > 0 )
ipix /= sin(kx*2.0*kmax)/(kx*2.0*kmax);
if( j > 0 )
ipix /= sin(ky*2.0*kmax)/(ky*2.0*kmax);
if( k > 0 )
ipix /= sin(kz*2.0*kmax)/(kz*2.0*kmax);
unsigned q = (i*cparam_.ny+j)*nzp+k;
kkernel[q].re *= ipix;
kkernel[q].im *= ipix;
if( k==0 || k==cparam_.nz/2 )
{
ksum += kkernel[q].re;
kcount++;
}else{
ksum += 2.0*(kkernel[q].re);
kcount+=2;
}
}
double dk = k0-ksum/kcount;
//... enforce the r=0 component by adjusting the k-space mean
#pragma omp parallel for reduction(+:ksum,kcount)
for( int i=0; i<cparam_.nx; ++i )
for( int j=0; j<cparam_.ny; ++j )
for( int k=0; k<nzp; ++k )
{
unsigned q = (i*cparam_.ny+j)*nzp+k;
kkernel[q].re += dk;
}
}
rfftwnd_destroy_plan(plan);
}
@ -529,11 +384,7 @@ namespace convolution{
void kernel_k<real_t>::compute_kernel( void )
{
double
//kny = cparam_.nx*M_PI/cparam_.lx,
fac = cparam_.lx*cparam_.ly*cparam_.lz/pow(2.0*M_PI,3),// /(cparam_.nx*cparam_.ny*cparam_.nz),
// dx = cparam_.lx/cparam_.nx,
// dy = cparam_.ly/cparam_.ny,
// dz = cparam_.lz/cparam_.nz,
fac = cparam_.lx*cparam_.ly*cparam_.lz/pow(2.0*M_PI,3),
boxlength = cparam_.pcf->getValue<double>("setup","boxlength"),
nspec = cparam_.pcf->getValue<double>("cosmology","nspec"),
pnorm = cparam_.pcf->getValue<double>("cosmology","pnorm"),

View file

@ -40,7 +40,9 @@ namespace convolution{
double lx,ly,lz;//,boxlength;
config_file *pcf;
transfer_function* ptf;
bool normalize;
unsigned coarse_fact;
bool deconvolve;
bool is_finest;
};
@ -97,18 +99,6 @@ namespace convolution{
template< typename real_t >
void perform( kernel* pk, void *pd );
//! actual implementation of the FFT convolution (independent of the actual kernel)
//! with Hann (cf. Bertschinger 2001) window filter applied
template< typename real_t >
void perform_filtered( kernel* pk, void *pd );
//! truncate the kernel softly at R, used for boundary corrections
void truncate( kernel*pk, double R, double alpha );
//! truncate the kernel sharply at R, used for boundary corrections
void truncate_sharp( kernel*pk, double R );
}; //namespace convolution

View file

@ -24,6 +24,12 @@
#include "densities.hh"
#include "convolution_kernel.hh"
//... uncomment this to have a single peak in the centre and otherwise zeros
//#define SINGLE_PEAK
//TODO: this should be a larger number by default, just to maintain consistency with old default
#define DEF_RAN_CUBE_SIZE 32
bool is_number(const std::string& s)
{
for (unsigned i = 0; i < s.length(); i++)
@ -34,10 +40,13 @@ bool is_number(const std::string& s)
}
// TODO: use optimized convolution routine when in unigrid mode
void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type, refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace )
void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type,
refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace, bool bdeconvolve )
{
unsigned levelmin,levelmax,levelminPoisson;
real_t boxlength;
unsigned ran_cube_size;
std::vector<long> rngseeds;
std::vector<std::string> rngfnames;
@ -47,7 +56,9 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
levelmax = cf.getValue<unsigned>("setup","levelmax");
boxlength = cf.getValue<real_t>( "setup", "boxlength" );
std::cerr << " RUNNING UNIGRID VERSION\n";
ran_cube_size = cf.getValueSafe<unsigned>("random","cubesize",DEF_RAN_CUBE_SIZE);
std::cerr << " - Running unigrid version\n";
//... parse random number options
for( int i=0; i<=100; ++i )
@ -89,6 +100,8 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
if( kspace )
{
std::cout << " - Using k-space transfer function kernel.\n";
#ifdef SINGLE_PRECISION
the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_float" ];
#else
@ -97,6 +110,8 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
}
else
{
std::cout << " - Using real-space transfer function kernel.\n";
#ifdef SINGLE_PRECISION
the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ];
#else
@ -136,7 +151,7 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
x0[0] /= pow(2,levelmin); x0[1] /= pow(2,levelmin); x0[2] /= pow(2,levelmin);
lx[0] /= pow(2,levelmin); lx[1] /= pow(2,levelmin); lx[2] /= pow(2,levelmin);
random_numbers<real_t> *rc = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], true );//, x0, lx );
random_numbers<real_t> *rc = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], true );//, x0, lx );
if( shift[0]!=0||shift[1]!=0||shift[2]!=0 )
@ -146,13 +161,20 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
rc->fill_all(*top);
delete rc;
#ifdef SINGLE_PEAK
top->zero();
(*top)(top->size(0)/2, top->size(1)/2, top->size(2)/2) = 1.0;
#endif
conv_param.lx = boxlength;
conv_param.ly = boxlength;
conv_param.lz = boxlength;
conv_param.nx = top->nx_;
conv_param.ny = top->ny_;
conv_param.nz = top->nz_;
conv_param.normalize = false;
conv_param.coarse_fact = 0;
conv_param.deconvolve = bdeconvolve;
conv_param.is_finest = true;
convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
@ -198,27 +220,32 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
}
}
void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type type, refinement_hierarchy& refh, grid_hierarchy& delta )
void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type type,
refinement_hierarchy& refh, grid_hierarchy& delta, bool bdeconvolve=true )
{
unsigned levelmin,levelmax,levelminPoisson;
real_t boxlength;
std::vector<long> rngseeds;
std::vector<std::string> rngfnames;
bool force_shift(false);
bool force_shift(false), kspaceTF;
unsigned ran_cube_size;
levelminPoisson = cf.getValue<unsigned>("setup","levelmin");
levelmin = cf.getValueSafe<unsigned>("setup","levelmin_TF",levelminPoisson);
levelmax = cf.getValue<unsigned>("setup","levelmax");
boxlength = cf.getValue<real_t>( "setup", "boxlength" );
force_shift = cf.getValueSafe<bool>("setup", "force_shift", force_shift );
levelmin = cf.getValueSafe<unsigned>("setup","levelmin_TF",levelminPoisson);
levelmax = cf.getValue<unsigned>("setup","levelmax");
boxlength = cf.getValue<real_t>( "setup", "boxlength" );
force_shift = cf.getValueSafe<bool>("setup", "force_shift", force_shift );
kspaceTF = cf.getValueSafe<bool>("setup", "kspace_TF", false);
ran_cube_size = cf.getValueSafe<unsigned>("random","cubesize",DEF_RAN_CUBE_SIZE);
// TODO: need to make sure unigrid gets called whenever possible
/*if( levelmin == levelmax && levelmin==levelminPoisson )
// FIXME: temporarily disabled
if( false )//levelmin == levelmax && levelmin==levelminPoisson )
{
GenerateDensityUnigrid(cf,ptf,type,refh,delta);
GenerateDensityUnigrid(cf,ptf,type,refh,delta,kspaceTF,bdeconvolve);
return;
}*/
}
//... parse random number options
@ -253,10 +280,6 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
float lxref[3];
std::string temp = cf.getValue<std::string>( "setup", "ref_extent" );
sscanf( temp.c_str(), "%g,%g,%g", &lxref[0],&lxref[1],&lxref[2] );
//double lextmin = std::min(lxref[0],std::min(lxref[1],lxref[2]));
// alpha = 0.1*..., cutoff = 0.25 /// maybe 0.08, 0.3
real_t alpha = 0.05*(real_t)nbase/boxlength;//0.45*lextmin*(double)nbase;///boxlength;
real_t cutoff = 0.25;//lextmin*0.9;
int shift[3];
@ -317,7 +340,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
//... if random numbers are not given for lower levels, obtain them by averaging
if( lmingiven >= (int)levelmin )
{
randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), 32, rngseeds[lmingiven], true );//, x0, lx );
randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), ran_cube_size, rngseeds[lmingiven], true );//, x0, lx );
for( int ilevel = lmingiven-1; ilevel >= (int)levelmin; --ilevel ){
if( rngseeds[ilevel-levelmin] > 0 )
@ -333,7 +356,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
{
throw std::runtime_error("You provided a seed for a level below levelmin, this is not supported yet.");
randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), 32, rngseeds[lmingiven], true );//, x0, lx );
randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), ran_cube_size, rngseeds[lmingiven], true );//, x0, lx );
for( int ilevel = lmingiven+1; ilevel <= (int)levelmin; ++ilevel )
{
@ -376,7 +399,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
if( levelminPoisson == levelmin && !force_shift)
{
if( randc[levelmin] == NULL )
rc = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], true );
rc = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], true );
else
rc = randc[levelmin];
@ -392,10 +415,10 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
int lx[3] = { refh.size(levelmin,0), refh.size(levelmin,1), refh.size(levelmin,2) };
if( randc[levelmin] == NULL )
rc = randc[levelmin] = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], x0, lx );
rc = randc[levelmin] = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], x0, lx );
//
//if( randc[levelmin] == NULL )
// rc = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], true );
// rc = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], true );
else
rc = randc[levelmin];
@ -404,13 +427,20 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
delete rc;
#ifdef SINGLE_PEAK
top->zero();
(*top)(top->size(0)/2, top->size(1)/2, top->size(2)/2) = 1.0;
#endif
conv_param.lx = boxlength;
conv_param.ly = boxlength;
conv_param.lz = boxlength;
conv_param.nx = top->nx_;
conv_param.ny = top->ny_;
conv_param.nz = top->nz_;
conv_param.normalize = false;
conv_param.coarse_fact = 0;
conv_param.deconvolve = bdeconvolve;
conv_param.is_finest = true;
convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
@ -420,7 +450,6 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
top->copy( *delta.get_grid(levelmin) );
delete top;
}
//}
for( int i=0; i< (int)levelmax-(int)levelmin; ++i )
@ -444,7 +473,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
refh.size(levelmin,2) };
if( randc[levelmin] == NULL )
rc = randc[levelmin] = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], x0, lx );
rc = randc[levelmin] = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], x0, lx );
else
rc = randc[levelmin];
@ -465,39 +494,28 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
random_numbers<real_t> *rc;
int x0[3],lx[3];
int lfac = (int)pow(2.0,i+1);
x0[0] = refh.offset_abs(levelmin+i+1,0)-lfac*shift[0]; //((real_t)(offtotx[levelmin+i+1]+lfac*shift[0]))/pow(2.0,levelmin+i+1);
x0[1] = refh.offset_abs(levelmin+i+1,1)-lfac*shift[1]; //((real_t)(offtoty[levelmin+i+1]+lfac*shift[1]))/pow(2.0,levelmin+i+1);
x0[2] = refh.offset_abs(levelmin+i+1,2)-lfac*shift[2]; //((real_t)(offtotz[levelmin+i+1]+lfac*shift[2]))/pow(2.0,levelmin+i+1);
lx[0] = refh.size(levelmin+i+1,0); // /pow(2.0,levelmin+i+1);
lx[1] = refh.size(levelmin+i+1,1); // /pow(2.0,levelmin+i+1);
lx[2] = refh.size(levelmin+i+1,2); // /pow(2.0,levelmin+i+1);
//x0[0] -= 0.5*lx[0]; lx[0] *= 2.0;
//x0[1] -= 0.5*lx[1]; lx[1] *= 2.0;
//x0[2] -= 0.5*lx[2]; lx[2] *= 2.0;
x0[0] = refh.offset_abs(levelmin+i+1,0)-lfac*shift[0];
x0[1] = refh.offset_abs(levelmin+i+1,1)-lfac*shift[1];
x0[2] = refh.offset_abs(levelmin+i+1,2)-lfac*shift[2];
lx[0] = refh.size(levelmin+i+1,0);
lx[1] = refh.size(levelmin+i+1,1);
lx[2] = refh.size(levelmin+i+1,2);
if( randc[levelmin+i+1] == NULL )
rc = randc[levelmin+i+1] = new random_numbers<real_t>((unsigned)pow(2,levelmin+i+1), 32, rngseeds[levelmin+i+1], x0, lx);
rc = randc[levelmin+i+1] = new random_numbers<real_t>((unsigned)pow(2,levelmin+i+1), ran_cube_size, rngseeds[levelmin+i+1], x0, lx);
else
rc = randc[levelmin+i+1];
fine->fill_rand( rc, 1.0, x0[0]-fine->nx_/4, x0[1]-fine->ny_/4, x0[2]-fine->nz_/4 );
if( i+levelmin+1 > (unsigned)lmingiven )
{
if(i==0)
fine->constrain( *top );
else
fine->constrain( *coarse );
}
{
/*int llfac = (int)pow(2.0,i+1);
fine->fill_rand( rc, 1.0, offtotx[levelmin+i+1]-llfac*shift[0],
offtoty[levelmin+i+1]-llfac*shift[1],
offtotz[levelmin+i+1]-llfac*shift[2] );*/
fine->fill_rand( rc, 1.0, x0[0]-fine->nx_/4, x0[1]-fine->ny_/4, x0[2]-fine->nz_/4 );
if( i+levelmin+1 > (unsigned)lmingiven )
{
if(i==0)
fine->constrain( *top );
else
fine->constrain( *coarse );
}
}
delete rc;
rc = NULL;
}
@ -515,25 +533,33 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
delta.create_base_hierarchy(levelmin);
#ifdef SINGLE_PEAK
{
top->zero();
(*top)(top->size(0)/2, top->size(1)/2, top->size(2)/2) = 1.0/pow(2,1.5*(levelmax-levelmin));
}
#endif
DensityGrid<real_t> top_save( *top );
conv_param.lx = boxlength;
conv_param.ly = boxlength;
conv_param.lz = boxlength;
conv_param.nx = top->nx_;
conv_param.ny = top->ny_;
conv_param.nz = top->nz_;
conv_param.normalize = true;
conv_param.coarse_fact = levelmax-levelmin;
conv_param.deconvolve = bdeconvolve;
conv_param.is_finest = false;
convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
//... 1) compute standard convolution for levelmin
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
top->copy( *delta.get_grid(levelmin) );
//... 2) compute contribution to finer grids from non-refined region
*top = top_save;
top_save.clear();
@ -541,24 +567,20 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 );
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
delete the_tf_kernel;
meshvar_bnd delta_longrange( *delta.get_grid(levelmin) );
top->copy( delta_longrange );
delete the_tf_kernel;
delete top;
delete top;
//... restrict these contributions to the next level
delta.add_patch( refh.offset(levelmin+1,0), refh.offset(levelmin+1,1), refh.offset(levelmin+1,2),
refh.size(levelmin+1,0), refh.size(levelmin+1,1), refh.size(levelmin+1,2) );
//mg_linear().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
mg_cubic().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
//mg_lin().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
}
else
{
@ -571,7 +593,6 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
delta.add_patch( refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2),
refh.size(levelmin+i+1,0), refh.size(levelmin+i+1,1), refh.size(levelmin+i+1,2) );
//mg_linear().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) );
mg_cubic().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) );
real_t dx,lx,ly,lz;
@ -581,81 +602,46 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
ly = dx * coarse->ny_;
lz = dx * coarse->nz_;
real_t lmin = std::min( lx, std::min(ly,lz) );
//.. set convolution parameters
//TODO: this needs to be changed, forgetting to set a parameter will not be warned!
conv_param.lx = lx;
conv_param.ly = ly;
conv_param.lz = lz;
conv_param.nx = coarse->nx_;
conv_param.ny = coarse->ny_;
conv_param.nz = coarse->nz_;
conv_param.normalize = false;
conv_param.coarse_fact = levelmax-levelmin-i;
conv_param.deconvolve = bdeconvolve;
conv_param.is_finest = false;
convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
PaddedDensitySubGrid<real_t> coarse_save( *coarse );
//... 2) the inner region
coarse->zero_boundary();
//... 1) the inner region
coarse->subtract_boundary_oct_mean();
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
coarse->copy_add_unpad( *delta.get_grid(levelmin+i) );
//... 3) the 'BC' for the next finer grid
//... 2) the 'BC' for the next finer grid
*coarse = coarse_save;
coarse->subtract_boundary_oct_mean();
coarse->zero_subgrid(refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2),
refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 );
coarse->zero_boundary();
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
meshvar_bnd delta_longrange( *delta.get_grid(levelmin+i) );
coarse->copy_unpad( delta_longrange );
//mg_linear().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
mg_cubic().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
#if 1
//... FFT (isolated) boundary noise contribution
//convolution::truncate( the_tf_kernel, cutoff*lmin, 1e-8*alpha );
convolution::truncate_sharp( the_tf_kernel, cutoff*lmin );
//... 3) the coarse-grid correction
*coarse = coarse_save;
//coarse_save.clear();
coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
coarse->subtract_oct_mean();
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
coarse->copy_add_unpad( *delta.get_grid(levelmin+i) );
#endif
#if 0
*coarse = coarse_save;
coarse->zero_boundary();
coarse->subtract_oct_mean();
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
//coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
coarse->upload_bnd_add( *delta.get_grid(levelmin+i-1) );
#endif
/**coarse = coarse_save;
coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
coarse->set_to_oct_mean();
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
coarse->copy_subtract_unpad( *delta.get_grid(levelmin+i) );*/
delete the_tf_kernel;
delete coarse;
@ -679,63 +665,61 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
lx = dx * coarse->nx_;
ly = dx * coarse->ny_;
lz = dx * coarse->nz_;
real_t lmin = std::min( lx, std::min(ly,lz) );
//... set convolution parameters
conv_param.lx = lx;
conv_param.ly = ly;
conv_param.lz = lz;
conv_param.nx = coarse->nx_;
conv_param.ny = coarse->ny_;
conv_param.nz = coarse->nz_;
conv_param.normalize = false;
conv_param.coarse_fact = 0;
conv_param.deconvolve = bdeconvolve;
conv_param.is_finest = true;
#ifdef SINGLE_PEAK
{
coarse->zero();
int
i0 = pow(2,levelmax)/2 - refh.offset_abs(levelmax,0) + coarse->nx_/4,
i1 = pow(2,levelmax)/2 - refh.offset_abs(levelmax,1) + coarse->nx_/4,
i2 = pow(2,levelmax)/2 - refh.offset_abs(levelmax,2) + coarse->nx_/4;
(*coarse)(i0,i1,i2) = 1.0;
}
#endif
//... 1) grid self-contributio
//... with LR/SR splitting and full subgrid
PaddedDensitySubGrid<real_t> coarse_save( *coarse );
//... create convolution kernel
convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
//... 2) the inner region
coarse->zero_boundary();
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
coarse->copy_add_unpad( *delta.get_grid(levelmax) );
//... compute convolution for isolated grid
//... 1) the padded boundary region
#if 1
//... boundary correction
convolution::truncate( the_tf_kernel, cutoff*lmin, alpha );
//convolution::truncate_sharp( the_tf_kernel, cutoff*lmin );
*coarse = coarse_save;
coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
coarse->subtract_oct_mean();
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
coarse->copy_add_unpad( *delta.get_grid(levelmax) );
//... subtract oct mean on boundary but not in interior
coarse->subtract_boundary_oct_mean();
#endif
#if 0
// coarse correction
*coarse = coarse_save;
coarse->zero_boundary();
coarse->subtract_oct_mean();
//... perform convolution
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
//... copy to grid hierarchy
coarse->copy_add_unpad( *delta.get_grid(levelmax) );
//... 2) boundary correction to top grid
*coarse = coarse_save;
//... subtract oct mean
coarse->subtract_oct_mean();
//... perform convolution
convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
//... upload data to coarser grid
coarse->upload_bnd_add( *delta.get_grid(levelmax-1) );
// meshvar_bnd delta_longrange( *delta.get_grid(levelmax) );
//coarse->copy_unpad( delta_longrange );
//mg_straight().restrict_add( delta_longrange, *delta.get_grid(levelmax-1) );
#endif
delete the_tf_kernel;
delete coarse;
}
@ -751,11 +735,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
}
}
//... enforce mean condition
//for( int i=levelmin; i<(int)levelmax; ++i )
// enforce_mean( (*delta.get_grid(i+1)), (*delta.get_grid(i)) );
#if 1
//... subtract the box mean.... this will otherwise add
//... a constant curvature term to the potential
double sum = 0.0;
@ -788,9 +768,9 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
for( int iy=0; iy<ny; ++iy )
for( int iz=0; iz<nz; ++iz )
(*delta.get_grid(i))(ix,iy,iz) -= sum;
}
#endif
//... fill coarser levels with data from finer ones...
for( int i=levelmax; i>0; --i )
mg_straight().restrict( (*delta.get_grid(i)), (*delta.get_grid(i-1)) );
@ -800,6 +780,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
void normalize_density( grid_hierarchy& delta )
{
double sum = 0.0;
unsigned levelmin = delta.levelmin(), levelmax = delta.levelmax();

View file

@ -39,6 +39,8 @@
#endif
#endif
#include <assert.h>
#include "config_file.hh"
#include "random.hh"
#include "cosmology.hh"
@ -46,10 +48,10 @@
void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type type,
refinement_hierarchy& refh, grid_hierarchy& delta );
refinement_hierarchy& refh, grid_hierarchy& delta, bool bdeconvolve );
void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type,
refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace=false );
refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace, bool deconvolve );
void normalize_density( grid_hierarchy& delta );
@ -109,6 +111,26 @@ public:
std::vector<real_t>().swap(data_);
}
//! query the 3D array sizes of the density object
/*! returns the size of the 3D density object along a specified dimension
* @params i the dimension for which size is to be returned
* @returns array size along dimension i
*/
int size( int i )
{
if(i==0) return nx_;
if(i==1) return ny_;
return ny_;
}
//! zeroes the density object
/*! sets all values to 0.0
*/
void zero( void )
{
data_.assign(data_.size(),0.0);
}
//! assigns the contents of another DensityGrid to this
DensityGrid& operator=( const DensityGrid<real_t>& g )
{
@ -141,9 +163,9 @@ public:
* @params i0 x-offset (shift) in cells of the density field with respect to the random number field
* @params j0 y-offset (shift) in cells of the density field with respect to the random number field
* @params k0 z-offset (shift) in cells of the density field with respect to the random number field
* @params zero boolean, if true, the global mean will be subtracted
* @params setzero boolean, if true, the global mean will be subtracted
*/
void fill_rand( /*const*/ random_numbers<real_t>* prc, real_t variance, int i0, int j0, int k0, bool zero=false )
void fill_rand( /*const*/ random_numbers<real_t>* prc, real_t variance, int i0, int j0, int k0, bool setzero=false )
{
double sum = 0.0;
@ -158,7 +180,7 @@ public:
sum /= nx_*ny_*nz_;
if( zero )
if( setzero )
{
#pragma omp parallel for
for( int i=0; i<nx_; ++i )
@ -191,6 +213,32 @@ public:
v(ix,iy,iz) += (*this)(ix,iy,iz);
}
//! subtract the mean of the field
/*! subtracting the total mean implies that a restriction does not change the mass
*/
void subtract_mean( void )
{
double sum = 0.0;
unsigned count;
for( int i=0; i<nx_; i++ )
for( int j=0; j<ny_; j++ )
for( int k=0; k<nz_; k++ )
{
sum += (*this)(i,j,k);
count++;
}
sum /= count;
for( int i=0; i<nx_; i++ )
for( int j=0; j<ny_; j++ )
for( int k=0; k<nz_; k++ )
(*this)(i,j,k) -= sum;
}
//! subtract the mean of each oct of the field
/*! subtracting the mean of each oct implies that a restriction of the field to the coarser level
* is zero everywhere (needed for Hoffman-Ribak constraints)
@ -691,6 +739,72 @@ public:
v(ixu,iyu,izu) -= (*this)(ix,iy,iz);
}
double oct_mean( int i, int j, int k )
{
return 0.125*((*this)(i,j,k)+(*this)(i+1,j,k)+(*this)(i,j+1,k)+(*this)(i,j,k+1)
+(*this)(i+1,j+1,k)+(*this)(i+1,j,k+1)+(*this)(i,j+1,k+1)+(*this)(i+1,j+1,k+1));
}
void set_oct_mean( int i, int j, int k, double val )
{
(*this)(i,j,k) = val;
(*this)(i+1,j,k) = val;
(*this)(i,j+1,k) = val;
(*this)(i,j,k+1) = val;
(*this)(i+1,j+1,k) = val;
(*this)(i+1,j,k+1) = val;
(*this)(i,j+1,k+1) = val;
(*this)(i+1,j+1,k+1) = val;
}
void add_oct_val( int i, int j, int k, double val )
{
(*this)(i,j,k) += val;
(*this)(i+1,j,k) += val;
(*this)(i,j+1,k) += val;
(*this)(i,j,k+1) += val;
(*this)(i+1,j+1,k) += val;
(*this)(i+1,j,k+1) += val;
(*this)(i,j+1,k+1) += val;
(*this)(i+1,j+1,k+1) += val;
}
void subtract_boundary_oct_mean( void )
{
#pragma omp parallel for
for( int ix=0; ix<nx_/4-1; ix+=2 )
{
for( int iy=0; iy<ny_; iy+=2 )
for( int iz=0; iz<nz_; iz+=2 )
{
add_oct_val( ix,iy,iz, -oct_mean(ix,iy,iz) );
add_oct_val( ix+3*nx_/4,iy,iz, -oct_mean(ix+3*nx_/4,iy,iz) );
}
}
#pragma omp parallel for
for( int ix=0; ix<nx_; ix+=2 )
for( int iy=0; iy<ny_/4-1; iy+=2 )
{
for( int iz=0; iz<nz_; iz+=2 )
{
add_oct_val( ix,iy,iz, -oct_mean(ix,iy,iz) );
add_oct_val( ix,iy+3*ny_/4,iz, -oct_mean(ix,iy+3*ny_/4,iz) );
}
}
#pragma omp parallel for
for( int ix=0; ix<nx_; ix+=2 )
for( int iy=0; iy<ny_; iy+=2 )
for( int iz=0; iz<nz_/4-1; iz+=2 )
{
add_oct_val( ix,iy,iz, -oct_mean(ix,iy,iz) );
add_oct_val( ix,iy,iz+3*nz_/4, -oct_mean(ix,iy,iz+3*nz_/4) );
}
}
void zero_boundary( void )
{
for( int ix=0; ix<nx_/4; ++ix )
@ -806,6 +920,9 @@ inline void enforce_mean( M& v, M& V )
finemean /= count;
double dmean = coarsemean-finemean;
dmean = dmean/sqrt(2.0);
std::cerr << " - enforce_mean correction : fine = " << finemean << ", coarse = " << coarsemean << ", diff = " << dmean << std::endl;
#pragma omp parallel for reduction(+:coarsemean,finemean)
for( int i=0; i<nx; ++i )

189
main.cc
View file

@ -99,26 +99,26 @@ void modify_grid_for_TF( const refinement_hierarchy& rh_full, refinement_hierarc
for( unsigned i=lbase+1; i<=lmax; ++i )
{
int x0[3], lx[3], lmax = 0;
int x0[3], lx[3], lxmax = 0;
for( int j=0; j<3; ++j )
{
lx[j] = rh_TF.size(i,j)+2*pad;
x0[j] = rh_TF.offset_abs(i,j)-pad;
if( lx[j] > lmax )
lmax = lx[j];
if( lx[j] > lxmax )
lxmax = lx[j];
}
if( lmax%2 == 1 )
lmax+=1;
if( lxmax%2 == 1 )
lxmax+=1;
for( int j=0; j<3; ++j )
{
double dl = 0.5*((double)(lmax-lx[j]));
int add_left = ceil(dl);
double dl = 0.5*((double)(lxmax-lx[j]));
int add_left = (int)ceil(dl);
lx[j] = lmax;
lx[j] = lxmax;
x0[j] -= add_left;
}
@ -155,6 +155,10 @@ void coarsen_density( const refinement_hierarchy& rh, grid_hierarchy& u )
rh.size(i,0), rh.size(i,1), rh.size(i,2) );
}
}
for( int i=rh.levelmax(); i>0; --i )
mg_straight().restrict( *(u.get_grid(i)), *(u.get_grid(i-1)) );
}
void store_grid_structure( config_file& cf, const refinement_hierarchy& rh )
@ -405,106 +409,48 @@ int main (int argc, const char * argv[])
try{
if( ! do_2LPT )
{
//.. don't use 2LPT ...
if( !the_transfer_function_plugin->tf_is_distinct() || !do_baryons )
{
std::cerr << " - Computing ICs from the total matter power spectrum..." << std::endl;
grid_hierarchy f( nbnd ), u(nbnd);
//... cdm density and displacements
std::cout << "=============================================================\n";
std::cout << " COMPUTING DARK MATTER PARTICLE ICs\n";
std::cout << "-------------------------------------------------------------\n";
grid_hierarchy f( nbnd ), u(nbnd);
GenerateDensityHierarchy( cf, the_transfer_function_plugin, cdm , rh_TF, f, true );
coarsen_density(rh_Poisson, f);
normalize_density(f);
u = f; u.zero();
err = the_poisson_solver->solve(f, u);
GenerateDensityHierarchy( cf, the_transfer_function_plugin, total , rh_TF, f );
coarsen_density(rh_Poisson, f);
normalize_density(f);
u = f; u.zero();
err = the_poisson_solver->solve(f, u);
#if 0
grid_hierarchy u2( nbnd );
compute_Lu( u, u2 );
u=u2;
#endif
the_output_plugin->write_dm_density(f);
the_output_plugin->write_dm_mass(f);
if( do_baryons )
{
if( do_LLA )
compute_LLA_density( u, f, grad_order );
the_output_plugin->write_gas_density(f);
}
f.deallocate();
the_output_plugin->write_dm_potential(u);
the_output_plugin->write_dm_density(f);
the_output_plugin->write_dm_mass(f);
f.deallocate();
the_output_plugin->write_dm_potential(u);
//... DM displacements
{
grid_hierarchy data_forIO(u);
for( int icoord = 0; icoord < 3; ++icoord )
{
//... displacement
the_poisson_solver->gradient(icoord, u, data_forIO );
if( do_CVM)
subtract_finest_mean( data_forIO );
the_output_plugin->write_dm_position(icoord, data_forIO );
//... velocity
if( do_UVA )
{
//FIXME: this still needs to be added
throw std::runtime_error("Updated Velocity Approximation not yet supported!");
}
else
{
data_forIO *= cosmo.vfact;
the_output_plugin->write_dm_velocity(icoord, data_forIO);
}
if( do_baryons )
the_output_plugin->write_gas_velocity(icoord, data_forIO);
}
}else{
//... cdm density and displacements
grid_hierarchy f( nbnd ), u(nbnd);
GenerateDensityHierarchy( cf, the_transfer_function_plugin, cdm , rh_TF, f );
coarsen_density(rh_Poisson, f);
normalize_density(f);
u = f; u.zero();
err = the_poisson_solver->solve(f, u);
the_output_plugin->write_dm_density(f);
the_output_plugin->write_dm_mass(f);
f.deallocate();
the_output_plugin->write_dm_potential(u);
//... DM displacements
{
grid_hierarchy data_forIO(u);
for( int icoord = 0; icoord < 3; ++icoord )
{
//... displacement
the_poisson_solver->gradient(icoord, u, data_forIO );
the_output_plugin->write_dm_position(icoord, data_forIO );
}
}
}
//... gas density
if( do_baryons )
{
std::cout << "=============================================================\n";
std::cout << " COMPUTING BARYON ICs\n";
std::cout << "-------------------------------------------------------------\n";
//... gas density
GenerateDensityHierarchy( cf, the_transfer_function_plugin, baryon , rh_TF, f );
GenerateDensityHierarchy( cf, the_transfer_function_plugin, baryon , rh_TF, f, false );
coarsen_density(rh_Poisson, f);
normalize_density(f);
@ -516,28 +462,31 @@ int main (int argc, const char * argv[])
}
the_output_plugin->write_gas_density(f);
//... velocities
if( do_UVA )
}
//... velocities
if( do_UVA )
{
//... need to co-add potentials...
throw std::runtime_error("Updated Velocity Approximation not yet supported!");
}
else
{
if( do_baryons )
{
//... need to co-add potentials...
throw std::runtime_error("Updated Velocity Approximation not yet supported!");
}
else
{
GenerateDensityHierarchy( cf, the_transfer_function_plugin, total , rh_TF, f );
GenerateDensityHierarchy( cf, the_transfer_function_plugin, total , rh_TF, f, true );
u = f; u.zero();
err = the_poisson_solver->solve(f, u);
grid_hierarchy data_forIO(u);
for( int icoord = 0; icoord < 3; ++icoord )
{
//... displacement
the_poisson_solver->gradient(icoord, u, data_forIO );
data_forIO *= cosmo.vfact;
the_output_plugin->write_dm_velocity(icoord, data_forIO);
}
grid_hierarchy data_forIO(u);
for( int icoord = 0; icoord < 3; ++icoord )
{
//... displacement
the_poisson_solver->gradient(icoord, u, data_forIO );
data_forIO *= cosmo.vfact;
the_output_plugin->write_dm_velocity(icoord, data_forIO);
if( do_baryons )
the_output_plugin->write_gas_velocity(icoord, data_forIO);
}
}
}
@ -547,7 +496,7 @@ int main (int argc, const char * argv[])
{
grid_hierarchy f( nbnd ), u1(nbnd), u2(nbnd);
GenerateDensityHierarchy( cf, the_transfer_function_plugin, total , rh_TF, f );
GenerateDensityHierarchy( cf, the_transfer_function_plugin, total , rh_TF, f, true );
coarsen_density(rh_Poisson, f);
normalize_density(f);
@ -630,8 +579,8 @@ int main (int argc, const char * argv[])
//... clean up
delete the_output_plugin;
std::cout << "-------------------------------------------------------------\n";
std::cout << "=============================================================\n";
if( !bfatal )
std::cout << " - Wrote output file \'" << outfname << "\'\n using plugin \'" << outformat << "\'...\n";

View file

@ -32,10 +32,6 @@ inline double interp2( double x1, double x2, double x3, double f1, double f2, do
}
inline double interp4left( double f0, double f1, double f2, double f3, double f4 )
{
//return -4.0/231.0*f0+4.0/7.0*f1+5.0/7.0*f2-1.0/3.0*f3+5./77.0*f4;

View file

@ -49,31 +49,7 @@ public:
dx = 0.5*((double)sz - 0.5)*s;
dy = 0.5*((double)sy - 0.5)*s;
dz = 0.5*((double)sx - 0.5)*s;
//int xDim = V.m_nx, yDim = V.m_ny;
//int xyDim = xDim * yDim;
//double *pv = &V(x-1,y-1,z-1);
/* factors for Catmull-Rom interpolation */
/*
u[0] = -0.5 * CUBE (dx) + SQR (dx) - 0.5 * dx;
u[1] = 1.5 * CUBE (dx) - 2.5 * SQR (dx) + 1;
u[2] = -1.5 * CUBE (dx) + 2 * SQR (dx) + 0.5 * dx;
u[3] = 0.5 * CUBE (dx) - 0.5 * SQR (dx);
v[0] = -0.5 * CUBE (dy) + SQR (dy) - 0.5 * dy;
v[1] = 1.5 * CUBE (dy) - 2.5 * SQR (dy) + 1;
v[2] = -1.5 * CUBE (dy) + 2 * SQR (dy) + 0.5 * dy;
v[3] = 0.5 * CUBE (dy) - 0.5 * SQR (dy);
w[0] = -0.5 * CUBE (dz) + SQR (dz) - 0.5 * dz;
w[1] = 1.5 * CUBE (dz) - 2.5 * SQR (dz) + 1;
w[2] = -1.5 * CUBE (dz) + 2 * SQR (dz) + 0.5 * dz;
w[3] = 0.5 * CUBE (dz) - 0.5 * SQR (dz);
*/
if( sz == 1 )
{
u[0] = -4.5/64.0;
@ -114,26 +90,6 @@ public:
}
/*for (k = 0; k < 4; k++)
{
q[k] = 0;
for (j = 0; j < 4; j++)
{
r[j] = 0;
for (i = 0; i < 4; i++)
{
r[j] += u[i] * *pv;
pv++;
}
q[k] += v[j] * r[j];
pv += xDim - 4;
}
vox += w[k] * q[k];
pv += xyDim - 4 * xDim;
}*/
for (k = 0; k < 4; k++)
{
q[k] = 0;
@ -142,15 +98,11 @@ public:
r[j] = 0;
for (i = 0; i < 4; i++)
{
//r[j] += u[i] * V(x+k-1,y+j-1,z+i-1);
r[j] += u[i] * V(x+k-2+sx,y+j-2+sy,z+i-2+sz);
//pv++;
}
q[k] += v[j] * r[j];
//pv += xDim - 4;
}
vox += w[k] * q[k];
//pv += xyDim - 4 * xDim;
}
@ -173,28 +125,8 @@ public:
oy = v.offset(1),
oz = v.offset(2);
/*for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(ox+i,oy+j,oz+k) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );*/
/*#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(ox+i,oy+j,oz+k) = 0.125 * ( interp_cubic<1,0,0>(i2+1,j2,k2,v)
+interp_cubic<0,1,0>(i2,j2+1,k2,v)
+interp_cubic<0,0,1>(i2,j2,k2+1,v)
+interp_cubic<1,1,0>(i2+1,j2+1,k2,v)
+interp_cubic<0,1,1>(i2,j2+1,k2+1,v)
+interp_cubic<1,0,1>(i2+1,j2,k2+1,v)
+interp_cubic<1,1,1>(i2+1,j2+1,k2+1,v)
+interp_cubic<0,0,0>(i2,j2,k2,v));
} */
#pragma omp parallel for
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
@ -225,12 +157,7 @@ public:
oy = v.offset(1),
oz = v.offset(2);
/*for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(ox+i,oy+j,oz+k) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );*/
#pragma omp parallel for
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
@ -249,20 +176,8 @@ public:
}
/*template< typename m1, typename m2 >
inline void restrict( m1& v, m2& V )
{
int
nx = V.size(0),
ny = V.size(1),
nz = V.size(2);
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(i,j,k) = interp_cubic<1,1,1>(i2, j2, k2, v, 2.0);
}*/
//.. straight restriction on boundary
template< typename m1, typename m2 >
inline void restrict_bnd( const m1& v, m2& V ) const
{
@ -411,18 +326,14 @@ public:
template< typename m1, typename m2 >
inline void prolong_bnd( m1& V, m2& v )
{
// int
// nx = V.size(0),
// ny = V.size(1),
// nz = V.size(2);
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
int nbnd = V.m_nbnd;
int nbndtop = nbnd/2;
@ -500,11 +411,7 @@ public:
for( i=0; i<4; ++i )
for( j=0; j<4; ++j )
for( k=0; k<4; ++k )
{
vox += w[i]*w[j]*w[k] * V(x+i-1,y+j-1,z+k-1);
//std::cerr << "[" << i << "," << j << "," << k << "] -- " << u[i]*v[j]*w[k] << std::endl;
}
//vox = (V(x+1,y,z)+V(x,y+1,z)+V(x,y,z+1)+V(x,y,z)+V(x+1,y+1,z)+V(x+1,y,z+1)+V(x,y+1,z+1)+V(x+1,y+1,z+1));
return vox;
}
@ -512,7 +419,6 @@ public:
inline double interp_lin( int x, int y, int z, M& V, double s=1.0 ) const
{
double u[2], v[2], w[2];
//double r[2], q[2];
double vox = 0;
int i,j,k;
@ -539,51 +445,11 @@ public:
w[1] = 1.0/4.0;
}
/*if( sx==0 ){
u[0] = 1.0/64.0;
u[1] = 3.0/64.0;
}else{
u[0] = 3.0/64.0;
u[1] = 1.0/64.0;
}
if( sy==0 ){
v[0] = 1.0/64.0;
v[1] = 3.0/64.0;
}else{
v[0] = 3.0/64.0;
v[1] = 1.0/64.0;
}
if( sz==0 ){
w[0] = 1.0/64.0;
w[1] = 3.0/64.0;
}else{
w[0] = 3.0/64.0;
w[1] = 1.0/64.0;
}*/
//std::cerr << "(" << sx << "," << sy << "," << sz << ") : " << std::endl;
/*for (k = 0; k < 2; k++)
{
q[k] = 0;
for (j = 0; j < 2; j++)
{
r[j] = 0;
for (i = 0; i < 2; i++)
r[j] += u[i] * V(x+k+sx-1,y+j+sy-1,z+i+sz-1);
q[k] += v[j] * r[j];
}
//std::cerr << "[" << i << "," << j << "," << k << "] -- " << w[k]*q[k] << std::endl;
vox += w[k] * q[k];
}*/
for( i=0; i<2; ++i )
for( j=0; j<2; ++j )
for( k=0; k<2; ++k )
{
vox += u[i]*v[j]*w[k] * V(x+i+sx-1,y+j+sy-1,z+k+sz-1);
//std::cerr << "[" << i << "," << j << "," << k << "] -- " << u[i]*v[j]*w[k] << std::endl;
}
//std::cerr << "(" << sx << "," << sy << "," << sz << ") : " << vox <<std::endl;
//throw std::runtime_error("break");
return vox;
}
@ -667,68 +533,9 @@ public:
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(i+ox,j+oy,k+oz) = 0.125 * restr_lin(i2, j2, k2, v);
/*V(i+ox,j+oy,k+oz) = 0.125*(interp_lin<1,0,0>(i2,j2,k2,v)+interp_lin<0,1,0>(i2,j2,k2,v)+
interp_lin<0,0,1>(i2,j2,k2,v)+interp_lin<1,1,0>(i2,j2,k2,v)+
interp_lin<1,0,1>(i2,j2,k2,v)+interp_lin<0,1,1>(i2,j2,k2,v)+
interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2,j2,k2,v));*/
}
}
/*
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
V(i+ox,j+oy,k+oz) = 0.125 * (interp_lin<1,0,0>(i2+2,j2,k2,v)+interp_lin<0,1,0>(i2,j2+2,k2,v)+
interp_lin<0,0,1>(i2,j2,k2+2,v)+interp_lin<1,1,0>(i2+1,j2+2,k2,v)+
interp_lin<1,0,1>(i2+2,j2,k2+2,v)+interp_lin<0,1,1>(i2,j2+2,k2+2,v)+
interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2+2,j2+2,k2+2,v));
}
}
*/
}
#if 0
template< typename m1, typename m2 >
inline void restrict_add( const m1& v, m2& V ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
/*#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(i+ox,j+oy,k+oz) += 0.125 * (interp_lin<1,0,0>(i2,j2,k2,v)+interp_lin<0,1,0>(i2,j2,k2,v)+
interp_lin<0,0,1>(i2,j2,k2,v)+interp_lin<1,1,0>(i2,j2,k2,v)+
interp_lin<1,0,1>(i2,j2,k2,v)+interp_lin<0,1,1>(i2,j2,k2,v)+
interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2,j2,k2,v));
}*/
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
V(i+ox,j+oy,k+oz) += 0.125 * (interp_lin<1,0,0>(i2+2,j2,k2,v)+interp_lin<0,1,0>(i2,j2+2,k2,v)+
interp_lin<0,0,1>(i2,j2,k2+2,v)+interp_lin<1,1,0>(i2+1,j2+2,k2,v)+
interp_lin<1,0,1>(i2+2,j2,k2+2,v)+interp_lin<0,1,1>(i2,j2+2,k2+2,v)+
interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2+2,j2+2,k2+2,v));
}
}
}
#endif
};
@ -1039,28 +846,6 @@ public:
//... restricts v to V
/* inline void restrict( const MeshvarBnd<real_t>& v, MeshvarBnd<real_t>& V ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(i+ox,j+oy,k+oz) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );
}
}
*/
template< typename m1, typename m2 >
inline void restrict( const m1& v, m2& V ) const
{
@ -1327,34 +1112,6 @@ public:
}
}
//... prolongs V to v
/*template< typename m1, typename m2 >
inline void prolong_add( m1& V, m2& v )
{
unsigned
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
v(i2+1,j2,k2) += V(i+ox,j+oy,k+oz);
v(i2,j2+1,k2) += V(i+ox,j+oy,k+oz);
v(i2,j2,k2+1) += V(i+ox,j+oy,k+oz);
v(i2+1,j2+1,k2) += V(i+ox,j+oy,k+oz);
v(i2+1,j2,k2+1) += V(i+ox,j+oy,k+oz);
v(i2+1,j2+1,k2+1) += V(i+ox,j+oy,k+oz);
v(i2,j2+1,k2+1) += V(i+ox,j+oy,k+oz);
v(i2,j2,k2) += V(i+ox,j+oy,k+oz);
}
}*/
};

View file

@ -64,7 +64,7 @@ protected:
bool m_is_ini;
GridHierarchy<T> *m_pu, *m_pf, *m_pfsave;
//GridHierarchy<bool> *m_pmask;
const MeshvarBnd<T> *m_pubnd;
double compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<T>& unew );
@ -83,20 +83,17 @@ protected:
void twoGrid( unsigned ilevel );
//void interp_coarse_fine( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf=true );
void setBC( unsigned ilevel );
void make_periodic( MeshvarBnd<T> *u );
void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine );
//void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine );
public:
solver( GridHierarchy<T>& f, //const MeshvarBnd<T>& uBC_top,
opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth );
solver( GridHierarchy<T>& f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth );
~solver()
{ /*delete m_pmask;*/ }
{ }
double solve( GridHierarchy<T>& u, double accuracy, double h=-1.0, bool verbose=false );
@ -111,14 +108,13 @@ public:
template< class S, class I, class O, typename T >
solver<S,I,O,T>::solver( GridHierarchy<T>& f, //const MeshvarBnd<T>& ubnd,
opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth )
solver<S,I,O,T>::solver( GridHierarchy<T>& f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth )
: m_scheme(), m_gridop(), m_npresmooth( npresmooth ), m_npostsmooth( npostsmooth ),
m_smoother( smoother ), m_ilevelmin( f.levelmin() ), m_is_ini( true ), m_pf( &f )
{
m_is_ini = true;
// TODO: maybe later : add more than one refinement region
// TODO: maybe later : add more than one refinement region, then we need the mask
//... initialize the refinement mask
//m_pmask = new GridHierarchy<bool>( f.m_nbnd );
//m_pmask->create_base_hierarchy(f.levelmin());
@ -193,7 +189,7 @@ void solver<S,I,O,T>::SOR( T h, MeshvarBnd<T> *u, const MeshvarBnd<T>* f )
double
alpha = 1.2,
//alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0,
//alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0, //.. ideal alpha
ialpha = 1.0-alpha;
#pragma omp parallel for
@ -309,12 +305,10 @@ void solver<S,I,O,T>::twoGrid( unsigned ilevel )
//... restrict Lu
m_gridop.restrict( Lu, tLu );
//mg_straight().restrict( Lu, tLu );
Lu.deallocate();
//... restrict source term
m_gridop.restrict( *ff, *fc );
//mg_straight().restrict( *ff, *fc );
int oi, oj, ok;
oi = ff->offset(0);
@ -483,32 +477,29 @@ double solver<S,I,O,T>::solve( GridHierarchy<T>& uh, double acc, double h, bool
{
double err;
unsigned niter = 0;
bool fullverbose = false;
//GridHierarchy<T> uhnew(uh);//, fsave(*m_pf);
m_pu = &uh;
unsigned niter = 0;
//... iterate ...//
while (true)
{
twoGrid( uh.levelmax() );
//err = compute_error( *m_pu, uhnew, verbose );
err = compute_RMS_resid( *m_pu, *m_pf, verbose );
err = compute_RMS_resid( *m_pu, *m_pf, fullverbose );
++niter;
if( verbose ){
std::cout << " --> Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl;
std::cout << " ---------------------------------------------------\n";
std::cout << " - Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl;
if(fullverbose)
std::cout << " ---------------------------------------------------\n";
}
if( (niter > 1) && ((err < acc) || (niter > 8)) )
if( (niter > 1) && ((err < acc) || (niter > 20)) )
break;
//uhnew = *m_pu;
//*m_pf = fsave;
}
if( err > acc )
@ -517,109 +508,29 @@ double solver<S,I,O,T>::solve( GridHierarchy<T>& uh, double acc, double h, bool
std::cout << " - Converged in " << niter << " steps to req. acc. of " << acc << std::endl;
//uh = uhnew;
//*m_pf = fsave;
//.. make sure that the RHS does not contain the FAS corrections any more
for( int i=m_pf->levelmax(); i>0; --i )//(int)m_pf->levelmin(); --i )
for( int i=m_pf->levelmax(); i>0; --i )
m_gridop.restrict( *m_pf->get_grid(i), *m_pf->get_grid(i-1) );
return err;
}
template< class S, class I, class O, typename T >
void solver<S,I,O,T>::interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine )
{
MeshvarBnd<T> *u = &fine;
MeshvarBnd<T> *utop = &coarse;
int
xoff = u->offset(0),
yoff = u->offset(1),
zoff = u->offset(2);
//... don't do anything if we are not an additional refinement region
if( xoff == 0 && yoff == 0 && zoff == 0 )
return;
int
nx = u->size(0),
ny = u->size(1),
nz = u->size(2);
//.. perform cubic interpolation
mg_cubic().prolong_bnd( *utop, *u );
//.. match fluxes
#pragma omp parallel for schedule(dynamic)
for( int ix=-1; ix<=nx; ++ix )
for( int iy=-1; iy<=ny; ++iy )
for( int iz=-1; iz<=nz; ++iz )
{
bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
if( xbnd || ybnd || zbnd )
{
//... only deal with proper ghostzones
if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
continue;
int ixtop = (int)(0.5*(double)(ix))+xoff;
int iytop = (int)(0.5*(double)(iy))+yoff;
int iztop = (int)(0.5*(double)(iz))+zoff;
if( ix==-1 ) ixtop=xoff-1;
if( iy==-1 ) iytop=yoff-1;
if( iz==-1 ) iztop=zoff-1;
double flux;;
if( ix == -1 && iy%2==0 && iz%2==0 )
{
flux = 0.0;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
{
flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k));
}
flux /= 4.0;
double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
}
}
}
}
//TODO: this only works for 2nd order! (but actually not needed)
template< class S, class I, class O, typename T >
void solver<S,I,O,T>::setBC( unsigned ilevel )
{
//... set only on level before additional refinement starts
//if( ilevel == m_ilevelmin )
if( ilevel == m_ilevelmin )
{
MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
//int nbnd = u->m_nbnd,
int
nx = u->size(0),
ny = u->size(1),
nz = u->size(2);
/*for( int ix=-nbnd; ix<nx+nbnd; ++ix )
for( int iy=-nbnd; iy<ny+nbnd; ++iy )
for( int iz=-nbnd; iz<nz+nbnd; ++iz )
if( ix<0||ix>=nx||iy<0||iy>=ny||iz<0||iz>=nz )
(*u)(ix,iy,iz) = (*m_pubnd)(ix,iy,iz);*/
nx = u->size(0),
ny = u->size(1),
nz = u->size(2);
for( int iy=0; iy<ny; ++iy )
for( int iz=0; iz<nz; ++iz )
{
@ -643,21 +554,7 @@ void solver<S,I,O,T>::setBC( unsigned ilevel )
}/*else if( ilevel < m_ilevelmin ) {
MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
int nbnd = u->m_nbnd,
nx = u->size(0),
ny = u->size(1),
nz = u->size(2);
for( int ix=-nbnd; ix<nx+nbnd; ++ix )
for( int iy=-nbnd; iy<ny+nbnd; ++iy )
for( int iz=-nbnd; iz<nz+nbnd; ++iz )
if( ix<0||ix>=nx||iy<0||iy>=ny||iz<0||iz>=nz )
(*u)(ix,iy,iz) = 0.0;
}*/
}
}
@ -715,336 +612,8 @@ void solver<S,I,O,T>::make_periodic( MeshvarBnd<T> *u )
}
}
#if 0
if( u->m_nbnd == 1 )
{
#pragma omp parallel
{
if( u->offset(0) == 0 )
for( int iy=-1; iy<=ny; ++iy )
for( int iz=-1; iz<=nz; ++iz )
{
int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz );
(*u)(-1,iy,iz) = (*u)(nx-1,iiy,iiz);
(*u)(nx,iy,iz) = (*u)(0,iiy,iiz);
}
if( u->offset(1) == 0 )
for( int ix=-1; ix<=nx; ++ix )
for( int iz=-1; iz<=nz; ++iz )
{
int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz );
(*u)(ix,-1,iz) = (*u)(iix,ny-1,iiz);
(*u)(ix,ny,iz) = (*u)(iix,0,iiz);
}
if( u->offset(2) == 0 )
for( int ix=-1; ix<=nx; ++ix )
for( int iy=-1; iy<=ny; ++iy )
{
int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny );
(*u)(ix,iy,-1) = (*u)(iix,iiy,nz-1);
(*u)(ix,iy,nz) = (*u)(iix,iiy,0);
}
}
}else if( u->m_nbnd == 2 ){
if( u->offset(0) == 0 )
for( int iy=-2; iy<=ny+1; ++iy )
for( int iz=-2; iz<=nz+1; ++iz )
{
int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz );
(*u)(-1,iy,iz) = (*u)(nx-1,iiy,iiz);
(*u)(-2,iy,iz) = (*u)(nx-2,iiy,iiz);
(*u)(nx,iy,iz) = (*u)(0,iiy,iiz);
(*u)(nx+1,iy,iz) = (*u)(1,iiy,iiz);
}
if( u->offset(1) == 0 )
for( int ix=-2; ix<=nx+1; ++ix )
for( int iz=-2; iz<=nz+1; ++iz )
{
int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz );
(*u)(ix,-1,iz) = (*u)(iix,ny-1,iiz);
(*u)(ix,-2,iz) = (*u)(iix,ny-2,iiz);
(*u)(ix,ny,iz) = (*u)(iix,0,iiz);
(*u)(ix,ny+1,iz) = (*u)(iix,1,iiz);
}
if( u->offset(2) == 0 )
for( int ix=-2; ix<=nx+1; ++ix )
for( int iy=-2; iy<=ny+1; ++iy )
{
int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny );
(*u)(ix,iy,-1) = (*u)(iix,iiy,nz-1);
(*u)(ix,iy,-2) = (*u)(iix,iiy,nz-2);
(*u)(ix,iy,nz) = (*u)(iix,iiy,0);
(*u)(ix,iy,nz+1) = (*u)(iix,iiy,1);
}
}else
throw std::runtime_error("solver<S,I,O,T>::make_periodic: don't know how to deal with boundary!");
#endif
}
#if 0
template< class S, class I, class O, typename T >
void solver<S,I,O,T>::interp_coarse_fine( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf )
{
MeshvarBnd<T> *u = &fine;
MeshvarBnd<T> *utop = &coarse;
bcf = true;;
//bcf = false;
int
xoff = u->offset(0),
yoff = u->offset(1),
zoff = u->offset(2);
//... don't do anything if we are not an additional refinement region
if( xoff == 0 && yoff == 0 && zoff == 0 )
return;
int
nx = u->size(0),
ny = u->size(1),
nz = u->size(2);
//... set boundary condition for fine grid
#pragma omp parallel for schedule(dynamic)
for( int ix=-1; ix<=nx; ++ix )
for( int iy=-1; iy<=ny; ++iy )
for( int iz=-1; iz<=nz; ++iz )
{
bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
//if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz)
if( xbnd || ybnd || zbnd )
//if( xbnd ^ ybnd ^ zbnd )
{
//... only deal with proper ghostzones
if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
continue;
/*int ixtop = (int)(0.5*(double)(ix+2*xoff)+1e-3);
int iytop = (int)(0.5*(double)(iy+2*yoff)+1e-3);
int iztop = (int)(0.5*(double)(iz+2*zoff)+1e-3);*/
int ixtop = (int)(0.5*(double)(ix))+xoff;
int iytop = (int)(0.5*(double)(iy))+yoff;
int iztop = (int)(0.5*(double)(iz))+zoff;
if( ix==-1 ) ixtop=xoff-1;
if( iy==-1 ) iytop=yoff-1;
if( iz==-1 ) iztop=zoff-1;
double ustar1, ustar2, ustar3, uhat;
double fac = 0.5;//0.25;
double flux;;
if( ix == -1 && iy%2==0 && iz%2==0 )
{
flux = 0.0;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
{
ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
uhat = interp2( /*-1.0, 0.0, 1.0, */ustar1, ustar2, ustar3, fac*((double)k-0.5) );
//(*u)(ix,iy+j,iz+k) = 0.0;//(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k), -1.0 );
(*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) );
flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k));
}
flux /= 4.0;
double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
//dflux *= 2.0;
if( bcf )
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix,iy+j,iz+k) -= dflux;
else
(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop+1,iytop,iztop) - 2.0*flux;
}
// right boundary
if( ix == nx && iy%2==0 && iz%2==0 )
{
flux = 0.0;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
{
ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
//(*u)(ix,iy+j,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix-1,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 1.0 );
(*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat );
flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k));
}
flux /= 4.0;
double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux;
//dflux *= 2.0;
if( bcf )
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix,iy+j,iz+k) += dflux;
else
(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop-1,iytop,iztop) + 2.0*flux;
}
// bottom boundary
if( iy == -1 && ix%2==0 && iz%2==0 )
{
flux = 0.0;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
{
ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
//(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k), -1.0 );
(*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) );
flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k));
}
flux /= 4.0;
//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - flux;
double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
//dflux *= 2.0;
if( bcf )
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix+j,iy,iz+k) -= dflux;
else
(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - 2.0*flux;
}
// top boundary
if( iy == ny && ix%2==0 && iz%2==0 )
{
flux = 0.0;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
{
ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
//(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy-1,iz+k), (*u)(ix+j,iy-2,iz+k), 1.0 );
(*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat );
flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k));
}
flux /= 4.0;
//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + flux;
double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux;
//dflux *= 2.0;
if( bcf )
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix+j,iy,iz+k) += dflux;
else
(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + 2.0*flux;
}
// front boundary
if( iz == -1 && ix%2==0 && iy%2==0 )
{
flux = 0.0;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
{
ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
//(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2), -1.0 );
(*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) );
flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz));
}
flux /= 4.0;
//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - flux;
double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
//dflux *= 2.0;
if( bcf )
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix+j,iy+k,iz) -= dflux;
else
(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - 2.0*flux;
}
// back boundary
if( iz == nz && ix%2==0 && iy%2==0 )
{
flux = 0.0;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
{
ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
//(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy+k,iz-1), (*u)(ix+j,iy+k,iz-2), 1.0 );
(*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat );
flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1));
}
flux /= 4.0;
//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + flux;
double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux;
//dflux *= 2.0;
if( bcf )
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix+j,iy+k,iz) += dflux;
else
(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + 2.0*flux;
}
}
}
}
#endif
END_MULTIGRID_NAMESPACE

View file

@ -231,6 +231,10 @@ protected:
npleft = nptot,
n2read = std::min((int)block_buf_size_,npleft);
std::cout << " - Writing " << nptot << " particles to Gadget file...\n"
<< " type 1 : " << header_.npart[1] << "\n"
<< " type 5 : " << header_.npart[5] << "\n";
//... particle coordinates ..................................................
unsigned blk;
@ -413,7 +417,8 @@ public:
gadget2_output_plugin( config_file& cf )//std::string afname, Cosmology cosm, Parameters param, unsigned block_buf_size = 100000 )
: output_plugin( cf ), ofs_( fname_.c_str(), std::ios::binary|std::ios::trunc )
{
block_buf_size_ = cf_.getValueSafe<unsigned>("output","gadget_blksize",100000);
block_buf_size_ = cf_.getValueSafe<unsigned>("output","gadget_blksize",1048576);
//block_buf_size_ = cf_.getValueSafe<unsigned>("output","gadget_blksize",100000);
//bbndparticles_ = !cf_.getValueSafe<bool>("output","gadget_nobndpart",false);
bmorethan2bnd_ = false;

View file

@ -75,7 +75,10 @@ double multigrid_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u )
unsigned verbosity = cf_.getValueSafe<unsigned>("setup","verbosity",2);
if( verbosity > 0 )
{
std::cout << "-------------------------------------------------------------\n";
std::cout << " - Invoking multi-grid Poisson solver..." << std::endl;
}
double acc = 1e-5, err;
std::string ps_smoother_name;
@ -306,7 +309,10 @@ double fft_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u )
throw std::runtime_error("fft_poisson_plugin::solve : k-space method can only be used in unigrid mode (levelmin=levelmax)");
if( verbosity > 0 )
{
std::cout << "-------------------------------------------------------------\n";
std::cout << " - Invoking unigrid FFT Poisson solver..." << std::endl;
}
int nx,ny,nz,nzp;
nx = f.get_grid(f.levelmax())->size(0);

View file

@ -83,6 +83,22 @@ protected:
return mean/(cubesize_*cubesize_*cubesize_);
}
//! subtract a constant from an entire cube
void subtract_from_cube( int i, int j, int k, double val )
{
i = (i+ncubes_)%ncubes_;
j = (j+ncubes_)%ncubes_;
k = (k+ncubes_)%ncubes_;
long icube = (i*ncubes_+j)*ncubes_+k;
for( int ii=0; ii<(int)cubesize_; ++ii )
for( int jj=0; jj<(int)cubesize_; ++jj )
for( int kk=0; kk<(int)cubesize_; ++kk )
(*rnums_[icube])(ii,jj,kk) -= val;
}
//! copy random numbers from a cube to a full grid array
template< class C >
void copy_cube( int i, int j, int k, C& dat )
@ -124,6 +140,8 @@ protected:
//! initialize member variables and allocate memory
void initialize( void )
{
std::cerr << " - Generating random numbers w/ sample cube size of " << cubesize_ << std::endl;
ncubes_ = std::max((int)((double)res_/cubesize_),1);
if( res_ < cubesize_ )
{
@ -184,6 +202,20 @@ protected:
sum+=fill_cube(ii, jj, kk);
}
//... subtract mean
#pragma omp parallel for reduction(+:sum)
for( int i=0; i<(int)ncubes_; ++i )
for( int j=0; j<(int)ncubes_; ++j )
for( int k=0; k<(int)ncubes_; ++k )
{
int ii(i),jj(j),kk(k);
ii = (ii+ncubes_)%ncubes_;
jj = (jj+ncubes_)%ncubes_;
kk = (kk+ncubes_)%ncubes_;
subtract_from_cube(ii,jj,kk,sum/(ncubes_*ncubes_*ncubes_));
}
return sum/(ncubes_*ncubes_*ncubes_);
}
@ -412,7 +444,8 @@ public:
rmean = sum/count;
rvar = sum2/count-rmean*rmean;
std::cout << " - Restricted random numbers have mean = " << rmean << " and var = " << rvar << std::endl;
std::cout << " - Restricted random numbers have\n"
<< " mean = " << rmean << ", var = " << rvar << std::endl;
}

View file

@ -152,7 +152,7 @@ public:
double Tr0_;
real_t Tmin_, Tmax_, Tscale_;
real_t rneg_, rneg2_;
real_t rneg_, rneg2_, kny_;
static transfer_function *ptf_;
static real_t nspec_;
@ -199,7 +199,8 @@ protected:
#ifdef NZERO_Q
//q = 0.4;
q = 0.5;
//q = 0.5;
q = 0.9;
#endif
double kmin = qmin, kmax=qmax;
@ -227,15 +228,13 @@ protected:
std::ofstream ofsk("transfer_k.txt");
double sum_in = 0.0;
for( unsigned i=0; i<N; ++i )
{
double k = k0*exp(((int)i - (int)N/2+1) * dlnk);
//double k = k0*exp(((int)i - (int)N/2) * dlnk);
//double k = k0*exp(ii * dlnk);
//... some constants missing ...//
in[i].re = dplus*sqrtpnorm*ptf_->compute( k )*pow(k,0.5*nspec_)*pow(k,1.5-q);
in[i].im = 0.0;
@ -270,7 +269,6 @@ protected:
out[i].im = cu.imag()*fftnorm;
#else
//complex x(dir*q, (double)ii*2.0*M_PI/L);
complex x(dir*q, (double)ii*2.0*M_PI/L);
gsl_sf_result g_a, g_p;
@ -333,26 +331,14 @@ protected:
{
int ii = i;
ii -= N/2-1;
//ii -= N/2;
//if( ii>N/2)
// ii-=N;
double r = r0*exp(-ii*dlnr);
rr[N-i-1] = r;
TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) * in[i].re*pow(r,-(1.5+q));
//TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) * in[i].re*exp( -dir*(q+1.5)*ii*dlnr +q*log(k0r0))/r0;
//rr[i] = r;
//TT[i] = 4.0*M_PI* sqrt(M_PI/2.0) * in[i].re*pow(r,-(1.5+q));
}
{
std::ofstream ofs("transfer_real_new.txt");
std::ofstream ofs("transfer_real.txt");
for( unsigned i=0; i<N; ++i )
{
int ii = i;
@ -380,6 +366,8 @@ public:
real_t q = 0.8;
kny_ = knymax;
std::vector<double> r,T;
transform( pnorm, dplus, nr, q, r, T );
@ -396,7 +384,7 @@ public:
//.. need a factor sqrt( 2*kny^2_x + 2*kny^2_y + 2*kny^2_z )/2 = sqrt(3/2)kny (in equilateral case)
//#warning Need to integrate from Boxsize (see below)?
gsl_integration_qag (&F, 0.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error);
//gsl_integration_qag (&F, 2.0*M_PI/100.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error);
//gsl_integration_qag (&F, 0.0, knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error);
gsl_integration_workspace_free(wp);
@ -407,19 +395,6 @@ public:
for( unsigned i=0; i<r.size(); ++i )
{
// spline positive and negative part separately
/*if( T[i] > 0.0 )
{
xp.push_back( 2.0*log10(r[i]) );
yp.push_back( log10(T[i]) );
rneg_ = r[i];
rneg2_ = rneg_*rneg_;
}else {
xn.push_back( 2.0*log10(r[i]) );
yn.push_back( log10(-T[i]) );
}*/
if( r[i] > rmin && r[i] < rmax )
{
xsp.push_back( log10(r[i]) );
@ -433,10 +408,8 @@ public:
accp = gsl_interp_accel_alloc ();
//accn = gsl_interp_accel_alloc ();
//... spline interpolation is only marginally slower here
//splinep = gsl_spline_alloc (gsl_interp_cspline, xsp.size() );
splinep = gsl_spline_alloc (gsl_interp_akima, xsp.size() );
//... set up everything for spline interpolation
@ -445,11 +418,9 @@ public:
//.. build lookup table using spline interpolation
m_xmin = log10(rmin);//m_xtable[0];
m_xmax = log10(rmax);//m_xtable.back();
m_dx = (m_xmax-m_xmin)/nr;//(m_xtable.size()-1);
m_xmin = log10(rmin);
m_xmax = log10(rmax);
m_dx = (m_xmax-m_xmin)/nr;
for(unsigned i=0; i<nr; ++i )
{
@ -457,9 +428,11 @@ public:
m_ytable.push_back( gsl_spline_eval(splinep, (m_xtable.back()), accp) );
}
//... DEBUG: output of spline interpolated real space transfer function
if(false)
{
real_t dlogr = (log10(rmax)-log10(rmin))/1000;
std::ofstream ofs("transfer_splinep.txt");
std::ofstream ofs("transfer_real.txt");
for( int i=0; i< 1000; ++i )
{
@ -483,7 +456,6 @@ public:
~TransferFunction_real()
{ }
#if 1
inline real_t compute_real( real_t r2 ) const
{
const double EPS = 1e-8;
@ -492,22 +464,9 @@ public:
if( r2 <Reps2 )
return Tr0_;
/*real_t logr = log10(r2);
int i = (int)((logr-m_xmin)/m_dx);
if( i < 0 ) i = 0;
else if( i >= m_xtable.size()-1 ) i = m_xtable.size()-2;
real_t y1,y2;
y1 = m_ytable[i];
y2 = m_ytable[i+1];
//std::cerr << y1 << " " << y2 << std::endl;
return y1 + (y2-y1)*(logr/m_dx-(real_t)i);*/
double r=0.5*log10(r2);//sqrt(r2);
double r=0.5*log10(r2);
double ii = (r-m_xmin)/m_dx;
int i = (int)ii;//((r-m_xmin)/m_dx);
int i = (int)ii;
if( i < 0 ) i = 0;
else if( i >= (int)m_xtable.size()-1 ) i = m_xtable.size()-2;
@ -516,32 +475,9 @@ public:
y1 = m_ytable[i];
y2 = m_ytable[i+1];
// return y1 + (y2-y1)*((r-m_xmin)/m_dx-(real_t)i);
return (real_t)((y1 + (y2-y1)*(ii-(double)i))/r2);
}
#else
inline real_t compute_real( real_t r2 ) const
{
const real_t EPS = 1e-8;
const real_t Reps2 = EPS*EPS;
if( r2 <Reps2 )
return Tr0_;
/*if( r2 < rneg2_ )
q = pow(10.0,gsl_spline_eval (splinep, log10(r2), accp));
else
q = -pow(10.0,gsl_spline_eval(splinen, log10(r2), accn));*/
real_t q;
real_t logr2 = log10(r2);
q = pow(10.0,gsl_spline_eval(splinep, logr2, accp));
real_t sign = 1.0;
if( gsl_spline_eval(splinen, logr2, accn) < 0.0 )
sign = -1.0;
return q*sign;
}
#endif
};