mirror of
https://github.com/cosmo-sims/MUSIC.git
synced 2024-09-19 17:03:46 +02:00
Speeded up transfer function calculation significantly.
Added an approximate log10 algorithm. Use spherical symmetry of problem to avoid computing duplicate values.
This commit is contained in:
parent
ae0d833155
commit
785601d4ae
6 changed files with 156 additions and 34 deletions
30
Numerics.hh
30
Numerics.hh
|
@ -41,6 +41,36 @@
|
|||
|
||||
real_t integrate( double (* func) (double x, void * params), double a, double b, void *params=NULL);
|
||||
|
||||
typedef __attribute__((__may_alias__)) int aint;
|
||||
|
||||
inline float fast_log2 (float val)
|
||||
{
|
||||
//if( sizeof(int) != sizeof(float) )
|
||||
// throw std::runtime_error("fast_log2 will fail on this system!!");
|
||||
aint * const exp_ptr = reinterpret_cast <aint *> (&val);
|
||||
aint x = *exp_ptr;
|
||||
const int log_2 = ((x >> 23) & 255) - 128;
|
||||
x &= ~(255 << 23);
|
||||
x += 127 << 23;
|
||||
*exp_ptr = x;
|
||||
|
||||
val = ((-1.0f/3) * val + 2) * val - 2.0f/3; // (1)
|
||||
|
||||
return (val + log_2);
|
||||
}
|
||||
|
||||
inline float fast_log (const float &val)
|
||||
{
|
||||
return (fast_log2 (val) * 0.69314718f);
|
||||
}
|
||||
|
||||
inline float fast_log10 (const float &val)
|
||||
{
|
||||
return (fast_log2 (val) * 0.3010299956639812f);
|
||||
}
|
||||
|
||||
|
||||
|
||||
struct Base_interp
|
||||
{
|
||||
int n, mm, jsav, cor, dj;
|
||||
|
|
|
@ -159,7 +159,7 @@ namespace convolution{
|
|||
bool
|
||||
bperiodic = cparam_.pcf->getValueSafe<bool>("setup","periodic_TF",true);
|
||||
|
||||
const int ref_fac = (int)(pow(2,cparam_.coarse_fact)+0.5), ql = -ref_fac/2+1, qr=ql+ref_fac, rf8=pow(ref_fac,3);
|
||||
const int ref_fac = (int)(pow(2,cparam_.coarse_fact)+0.5), ql = -ref_fac/2+1, qr=ql+ref_fac, rf8=(int)pow(ref_fac,3);
|
||||
|
||||
std::cout << " - Computing transfer function kernel...\n";
|
||||
|
||||
|
@ -179,13 +179,15 @@ namespace convolution{
|
|||
int nzp = cparam_.nz/2+1;
|
||||
double kmax = 0.5*M_PI/std::max(cparam_.nx,std::max(cparam_.ny,cparam_.nz));
|
||||
|
||||
T0 = tfr->compute_real(0.0);
|
||||
|
||||
//... obtain a grid-version of the real space transfer function
|
||||
if( bperiodic )
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for( int i=0; i<cparam_.nx; ++i )
|
||||
for( int j=0; j<cparam_.ny; ++j )
|
||||
for( int k=0; k<cparam_.nz; ++k )
|
||||
#pragma omp parallel for //schedule(static,4)
|
||||
for( int i=0; i<=cparam_.nx/2; ++i )
|
||||
for( int j=0; j<=cparam_.ny/2; ++j )
|
||||
for( int k=0; k<=cparam_.nz/2; ++k )
|
||||
{
|
||||
int iix(i), iiy(j), iiz(k);
|
||||
double rr[3], rr2;
|
||||
|
@ -194,7 +196,25 @@ namespace convolution{
|
|||
if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny;
|
||||
if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz;
|
||||
|
||||
unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
|
||||
|
||||
//... speed up 8x by copying data to other octants
|
||||
int idx[8];
|
||||
int nx=cparam_.nx, ny = cparam_.ny, nz = cparam_.nz;
|
||||
|
||||
idx[0] = ((i)*cparam_.ny + (j)) * 2*(cparam_.nz/2+1) + (k);
|
||||
idx[1] = ((nx-i)*cparam_.ny + (j)) * 2*(cparam_.nz/2+1) + (k);
|
||||
idx[2] = ((i)*cparam_.ny + (ny-j)) * 2*(cparam_.nz/2+1) + (k);
|
||||
idx[3] = ((nx-i)*cparam_.ny + (ny-j)) * 2*(cparam_.nz/2+1) + (k);
|
||||
idx[4] = ((i)*cparam_.ny + (j)) * 2*(cparam_.nz/2+1) + (nz-k);
|
||||
idx[5] = ((nx-i)*cparam_.ny + (j)) * 2*(cparam_.nz/2+1) + (nz-k);
|
||||
idx[6] = ((i)*cparam_.ny + (ny-j)) * 2*(cparam_.nz/2+1) + (nz-k);
|
||||
idx[7] = ((nx-i)*cparam_.ny + (ny-j)) * 2*(cparam_.nz/2+1) + (nz-k);
|
||||
|
||||
if(i==0||i==nx/2){ idx[1]=idx[3]=idx[5]=idx[7]=-1;}
|
||||
if(j==0||j==ny/2){ idx[2]=idx[3]=idx[6]=idx[7]=-1;}
|
||||
if(k==0||k==nz/2){ idx[4]=idx[5]=idx[6]=idx[7]=-1;}
|
||||
|
||||
double val = 0.0;
|
||||
|
||||
for( int ii=-1; ii<=1; ++ii )
|
||||
for( int jj=-1; jj<=1; ++jj )
|
||||
|
@ -204,11 +224,11 @@ namespace convolution{
|
|||
rr[1] = ((double)iiy ) * dy + jj*boxlength;
|
||||
rr[2] = ((double)iiz ) * dz + kk*boxlength;
|
||||
|
||||
if( rr[0] > -boxlength && rr[0] <= boxlength
|
||||
&& rr[1] > -boxlength && rr[1] <= boxlength
|
||||
&& rr[2] > -boxlength && rr[2] <= boxlength )
|
||||
if( rr[0] > -boxlength && rr[0] < boxlength
|
||||
&& rr[1] > -boxlength && rr[1] < boxlength
|
||||
&& rr[2] > -boxlength && rr[2] < boxlength )
|
||||
{
|
||||
if( ref_fac > 1 )
|
||||
if( ref_fac > 1 )//&& fabs(tfr->get_grad(rr2)*dx/T0) > 1e-4 )
|
||||
{
|
||||
double rrr[3];
|
||||
register double rrr2[3];
|
||||
|
@ -225,19 +245,25 @@ namespace convolution{
|
|||
rrr[2] = rr[2]+(double)kkk*0.5*dx - 0.25*dx;
|
||||
rrr2[2]= rrr[2]*rrr[2];
|
||||
rr2 = rrr2[0]+rrr2[1]+rrr2[2];
|
||||
kdata_[idx] += (fftw_real)(tfr->compute_real(rr2)/rf8);
|
||||
val += tfr->compute_real(rr2)/rf8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}else{
|
||||
rr2 = rr[0]*rr[0]+rr[1]*rr[1]+rr[2]*rr[2];
|
||||
val += tfr->compute_real(rr2);
|
||||
kdata_[idx] += (fftw_real)tfr->compute_real(rr2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kdata_[idx] *= fac;
|
||||
val *= fac;
|
||||
|
||||
for(int q=0;q<8;++q)
|
||||
if(idx[q]!=-1)
|
||||
kdata_[idx[q]] = val;
|
||||
|
||||
}
|
||||
}else{
|
||||
#pragma omp parallel for
|
||||
|
|
7
mesh.hh
7
mesh.hh
|
@ -865,7 +865,7 @@ class refinement_hierarchy
|
|||
std::vector<double> x0_,y0_,z0_,xl_,yl_,zl_;
|
||||
std::vector<unsigned> ox_,oy_,oz_,oax_,oay_,oaz_;
|
||||
std::vector<unsigned> nx_,ny_,nz_;
|
||||
unsigned levelmin_, levelmax_, padding_;
|
||||
unsigned levelmin_, levelmax_, levelmin_tf_, padding_;
|
||||
config_file& cf_;
|
||||
bool align_top_;
|
||||
double x0ref_[3], lxref_[3];
|
||||
|
@ -885,6 +885,7 @@ public:
|
|||
//... query the parameter data we need
|
||||
levelmin_ = cf_.getValue<unsigned>("setup","levelmin");
|
||||
levelmax_ = cf_.getValue<unsigned>("setup","levelmax");
|
||||
levelmin_tf_= cf_.getValueSafe<unsigned>("setup","levelmin_TF",levelmin_);
|
||||
padding_ = cf_.getValue<unsigned>("setup","padding");
|
||||
align_top_ = cf_.getValue<bool>("setup","align_top");
|
||||
|
||||
|
@ -899,7 +900,9 @@ public:
|
|||
temp = cf_.getValue<std::string>( "setup", "ref_extent" );
|
||||
sscanf( temp.c_str(), "%lf,%lf,%lf", &lxref_[0],&lxref_[1],&lxref_[2] );
|
||||
|
||||
unsigned ncoarse = (unsigned)pow(2,levelmin_);
|
||||
unsigned
|
||||
ncoarse = (unsigned)pow(2,levelmin_);
|
||||
//ncoarse_tf = (unsigned)pow(2,levelmin_tf_);
|
||||
|
||||
//... determine shift
|
||||
|
||||
|
|
|
@ -215,8 +215,8 @@ public:
|
|||
|
||||
//double dmean = coarsemean-finemean;
|
||||
|
||||
/*//... subtract the mean difference caused by interpolation
|
||||
#pragma omp parallel for
|
||||
//... subtract the mean difference caused by interpolation
|
||||
/*#pragma omp parallel for
|
||||
for( int i=0; i<nx; ++i )
|
||||
for( int j=0; j<ny; ++j )
|
||||
for( int k=0; k<nz; ++k )
|
||||
|
|
51
random.hh
51
random.hh
|
@ -143,7 +143,6 @@ protected:
|
|||
//! initialize member variables and allocate memory
|
||||
void initialize( void )
|
||||
{
|
||||
std::cerr << " - Generating random numbers w/ sample cube size of " << cubesize_ << std::endl;
|
||||
|
||||
ncubes_ = std::max((int)((double)res_/cubesize_),1);
|
||||
if( res_ < cubesize_ )
|
||||
|
@ -152,6 +151,8 @@ protected:
|
|||
cubesize_ = res_;
|
||||
}
|
||||
|
||||
std::cout << " - Generating random numbers w/ sample cube size of " << cubesize_ << std::endl;
|
||||
|
||||
rnums_.assign( ncubes_*ncubes_*ncubes_, NULL );
|
||||
}
|
||||
|
||||
|
@ -382,6 +383,7 @@ public:
|
|||
random_numbers( unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx )
|
||||
: res_( res ), cubesize_( cubesize ), ncubes_( 1 ), baseseed_( baseseed )
|
||||
{
|
||||
std::cout << " - Generating random numbers (1) with seed " << baseseed << std::endl;
|
||||
initialize();
|
||||
fill_subvolume( x0, lx );
|
||||
}
|
||||
|
@ -390,6 +392,8 @@ public:
|
|||
random_numbers( unsigned res, unsigned cubesize, long baseseed, bool zeromean=true )
|
||||
: res_( res ), cubesize_( cubesize ), ncubes_( 1 ), baseseed_( baseseed )
|
||||
{
|
||||
std::cout << " - Generating random numbers (2) with seed " << baseseed << std::endl;
|
||||
|
||||
double mean = 0.0;
|
||||
initialize();
|
||||
mean = fill_all();
|
||||
|
@ -482,7 +486,7 @@ public:
|
|||
#ifdef RAND_DEBUG
|
||||
(*rnums_[0])(kk,jj,ii) = 0.0;
|
||||
#else
|
||||
(*rnums_[0])(kk,jj,ii) = in_float[q++];
|
||||
(*rnums_[0])(kk,jj,ii) = -in_float[q++];
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -512,7 +516,7 @@ public:
|
|||
#ifdef RAND_DEBUG
|
||||
(*rnums_[0])(kk,jj,ii) = 0.0;
|
||||
#else
|
||||
(*rnums_[0])(kk,jj,ii) = in_double[q++];
|
||||
(*rnums_[0])(kk,jj,ii) = -in_double[q++];
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -535,6 +539,47 @@ public:
|
|||
|
||||
}
|
||||
|
||||
//... create constrained new field
|
||||
random_numbers( random_numbers<T>& rc, unsigned cubesize, long baseseed, bool zeromean=true )
|
||||
: res_( 2*rc.res_ ), cubesize_( cubesize ), ncubes_( 1 ), baseseed_( baseseed )
|
||||
{
|
||||
double mean = 0.0;
|
||||
initialize();
|
||||
mean = fill_all();
|
||||
|
||||
if( zeromean )
|
||||
{
|
||||
for(unsigned i=0; i<res_; ++i )
|
||||
for( unsigned j=0; j<res_; ++j )
|
||||
for( unsigned k=0; k<res_; ++k )
|
||||
(*this)(i,j,k) -= mean;
|
||||
}
|
||||
|
||||
std::cout << " - Generating a constrained random number set with seed " << baseseed << "...\n";
|
||||
|
||||
double fac = 1./sqrt(8.0);
|
||||
|
||||
for( unsigned i=0,ii=0; i<res_; i+=2,++ii )
|
||||
for( unsigned j=0,jj=0; j<res_; j+=2,++jj )
|
||||
for( unsigned k=0,kk=0; k<res_; k+=2,++kk )
|
||||
{
|
||||
double topval = rc(ii,jj,kk);
|
||||
double locmean = 0.125*((*this)(i,j,k)+(*this)(i+1,j,k)+(*this)(i,j+1,k)+(*this)(i,j,k+1)+
|
||||
(*this)(i+1,j+1,k)+(*this)(i+1,j,k+1)+(*this)(i,j+1,k+1)+(*this)(i+1,j+1,k+1));
|
||||
double dif = fac*topval-locmean;
|
||||
|
||||
(*this)(i,j,k) += dif;
|
||||
(*this)(i+1,j,k) += dif;
|
||||
(*this)(i,j+1,k) += dif;
|
||||
(*this)(i,j,k+1) += dif;
|
||||
(*this)(i+1,j+1,k) += dif;
|
||||
(*this)(i+1,j,k+1) += dif;
|
||||
(*this)(i,j+1,k+1) += dif;
|
||||
(*this)(i+1,j+1,k+1) += dif;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//... copy construct by averaging down
|
||||
explicit random_numbers( /*const*/ random_numbers <T>& rc )
|
||||
{
|
||||
|
|
|
@ -355,8 +355,8 @@ protected:
|
|||
fftw_destroy_plan(p);
|
||||
fftw_destroy_plan(ip);
|
||||
}
|
||||
std::vector<real_t> m_xtable,m_ytable;
|
||||
double m_xmin, m_xmax, m_dx;
|
||||
std::vector<real_t> m_xtable,m_ytable,m_dytable;
|
||||
double m_xmin, m_xmax, m_dx, m_rdx;
|
||||
public:
|
||||
TransferFunction_real( transfer_function *tf, real_t nspec, real_t pnorm, real_t dplus, real_t rmin, real_t rmax, real_t knymax, unsigned nr )
|
||||
{
|
||||
|
@ -399,16 +399,12 @@ public:
|
|||
{
|
||||
if( r[i] > rmin && r[i] < rmax )
|
||||
{
|
||||
xsp.push_back( log10(r[i]) );
|
||||
xsp.push_back( r[i] );//log10(r[i]) );
|
||||
ysp.push_back( T[i]*r[i]*r[i] );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
accp = gsl_interp_accel_alloc ();
|
||||
|
||||
//... spline interpolation is only marginally slower here
|
||||
|
@ -417,12 +413,11 @@ public:
|
|||
//... set up everything for spline interpolation
|
||||
gsl_spline_init (splinep, &xsp[0], &ysp[0], xsp.size() );
|
||||
|
||||
|
||||
|
||||
//.. build lookup table using spline interpolation
|
||||
m_xmin = log10(rmin);
|
||||
m_xmax = log10(rmax);
|
||||
m_dx = (m_xmax-m_xmin)/nr;
|
||||
m_rdx = 1.0/m_dx;
|
||||
|
||||
for(unsigned i=0; i<nr; ++i )
|
||||
{
|
||||
|
@ -430,6 +425,14 @@ public:
|
|||
m_ytable.push_back( gsl_spline_eval(splinep, (m_xtable.back()), accp) );
|
||||
}
|
||||
|
||||
for(unsigned i=0; i<nr-1; ++i )
|
||||
{
|
||||
real_t dy,dr;
|
||||
dy = m_ytable[i+1]/pow(m_xtable[i+1],2)-m_ytable[i]/pow(m_xtable[i],2);
|
||||
dr = pow(10.0,m_xtable[i+1])-pow(10.0,m_xtable[i]);
|
||||
m_dytable.push_back(dy/dr);
|
||||
}
|
||||
|
||||
//... DEBUG: output of spline interpolated real space transfer function
|
||||
if(false)
|
||||
{
|
||||
|
@ -458,6 +461,19 @@ public:
|
|||
~TransferFunction_real()
|
||||
{ }
|
||||
|
||||
inline real_t get_grad( real_t r2 ) const
|
||||
{
|
||||
double r = 0.5*fast_log10(r2);
|
||||
double ii = (r-m_xmin)*m_rdx;
|
||||
int i = (int)ii;
|
||||
|
||||
i=std::max(0,i);
|
||||
i=std::min(i, (int)m_xtable.size()-2);
|
||||
|
||||
return (real_t)m_dytable[i];
|
||||
}
|
||||
|
||||
//... fast version
|
||||
inline real_t compute_real( real_t r2 ) const
|
||||
{
|
||||
const double EPS = 1e-8;
|
||||
|
@ -466,20 +482,22 @@ public:
|
|||
if( r2 <Reps2 )
|
||||
return Tr0_;
|
||||
|
||||
double r=0.5*log10(r2);
|
||||
double ii = (r-m_xmin)/m_dx;
|
||||
double r = 0.5*fast_log10(r2);
|
||||
//float r = 0.5f*log10f(r2);
|
||||
|
||||
double ii = (r-m_xmin)*m_rdx;
|
||||
int i = (int)ii;
|
||||
|
||||
if( i < 0 ) i = 0;
|
||||
else if( i >= (int)m_xtable.size()-1 ) i = m_xtable.size()-2;
|
||||
i=std::max(0,i);
|
||||
i=std::min(i, (int)m_xtable.size()-2);
|
||||
|
||||
double y1,y2;
|
||||
y1 = m_ytable[i];
|
||||
y2 = m_ytable[i+1];
|
||||
|
||||
//divide by r**2 because r^2 T is tabulated
|
||||
return (real_t)((y1 + (y2-y1)*(ii-(double)i))/r2);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue