1
0
Fork 0
mirror of https://github.com/cosmo-sims/MUSIC.git synced 2024-09-19 17:03:46 +02:00

More 64 bit compliance changes.

New convergence criterium for multi-grid (No longer reduction of initial residual by X, but rel. error of X).
This commit is contained in:
Oliver Hahn 2011-06-01 18:05:12 -07:00
parent aac5720a41
commit d6da7b16fc
10 changed files with 281 additions and 417 deletions

View file

@ -31,6 +31,7 @@ namespace convolution{
void perform( kernel * pk, void *pd, bool shift )
{
//return;
parameters cparam_ = pk->cparam_;
double fftnorm = pow(2.0*M_PI,1.5)/sqrt(cparam_.lx*cparam_.ly*cparam_.lz)/sqrt((double)cparam_.nx*(double)cparam_.ny*(double)cparam_.nz);

View file

@ -11,14 +11,6 @@
#include "densities.hh"
#include "convolution_kernel.hh"
//... THIS IS FOR TESTING, TODO: TAKE OUT FOR RELEASE VERSION
//... uncomment this to have a single peak in the centre and otherwise zeros
//#define SINGLE_PEAK
//#define SINGLE_OCT_PEAK
//#define OFF_OCT_PEAK
//#define DEGRADE_RAND
//TODO: this should be a larger number by default, just to maintain consistency with old default
#define DEF_RAN_CUBE_SIZE 32
@ -87,42 +79,6 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
//... fill with random numbers
rand.load( *top, levelmin );
#if defined(SINGLE_PEAK)
top->zero();
(*top)(top->size(0)/2, top->size(1)/2, top->size(2)/2) = 1.0;
#elif defined(SINGLE_OCT_PEAK)
{
top->zero();
unsigned i0=top->size(0)/2, i1=top->size(1)/2, i2=top->size(2)/2;
double weight = 1.0;
(*top)(i0,i1,i2) = weight/8.0;
(*top)(i0+1,i1,i2) = weight/8.0;
(*top)(i0,i1+1,i2) = weight/8.0;
(*top)(i0,i1,i2+1) = weight/8.0;
(*top)(i0+1,i1+1,i2) = weight/8.0;
(*top)(i0+1,i1,i2+1) = weight/8.0;
(*top)(i0,i1+1,i2+1) = weight/8.0;
(*top)(i0+1,i1+1,i2+1) = weight/8.0;
}
#endif
#if defined(OFF_OCT_PEAK)
{
top->zero();
unsigned i0=top->size(0)/8, i1=top->size(1)/2, i2=top->size(2)/2;
(*top)(i0,i1,i2) = 1.0/8.0;
(*top)(i0+1,i1,i2) = 1.0/8.0;
(*top)(i0,i1+1,i2) = 1.0/8.0;
(*top)(i0,i1,i2+1) = 1.0/8.0;
(*top)(i0+1,i1+1,i2) = 1.0/8.0;
(*top)(i0+1,i1,i2+1) = 1.0/8.0;
(*top)(i0,i1+1,i2+1) = 1.0/8.0;
(*top)(i0+1,i1+1,i2+1) = 1.0/8.0;
}
#endif
//... load convolution kernel
the_tf_kernel->fetch_kernel( levelmin, false );
@ -220,43 +176,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
//rand_gen.load( *top, levelmin );
rand.load( *top, levelmin );
#if defined(SINGLE_PEAK)
top->zero();
(*top)(top->size(0)/2, top->size(1)/2, top->size(2)/2) = 1.0;
#elif defined(SINGLE_OCT_PEAK)
{
std::cerr << ">>> setting single oct peak <<<\n";
top->zero();
unsigned i0=top->size(0)/2, i1=top->size(1)/2, i2=top->size(2)/2;
(*top)(i0,i1,i2) = 1.0/8.0;
(*top)(i0+1,i1,i2) = 1.0/8.0;
(*top)(i0,i1+1,i2) = 1.0/8.0;
(*top)(i0,i1,i2+1) = 1.0/8.0;
(*top)(i0+1,i1+1,i2) = 1.0/8.0;
(*top)(i0+1,i1,i2+1) = 1.0/8.0;
(*top)(i0,i1+1,i2+1) = 1.0/8.0;
(*top)(i0+1,i1+1,i2+1) = 1.0/8.0;
}
#endif
#if defined(OFF_OCT_PEAK)
{
top->zero();
unsigned i0=top->size(0)/8, i1=top->size(1)/2, i2=top->size(2)/2;
(*top)(i0,i1,i2) = 1.0/8.0;
(*top)(i0+1,i1,i2) = 1.0/8.0;
(*top)(i0,i1+1,i2) = 1.0/8.0;
(*top)(i0,i1,i2+1) = 1.0/8.0;
(*top)(i0+1,i1+1,i2) = 1.0/8.0;
(*top)(i0+1,i1,i2+1) = 1.0/8.0;
(*top)(i0,i1+1,i2+1) = 1.0/8.0;
(*top)(i0+1,i1+1,i2+1) = 1.0/8.0;
}
#endif
convolution::perform<real_t>( the_tf_kernel->fetch_kernel( levelmax ), reinterpret_cast<void*>( top->get_data_ptr() ), shift );
the_tf_kernel->deallocate();
@ -297,23 +217,6 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
LOGUSER("Creating base hierarchy...");
delta.create_base_hierarchy(levelmin);
#if defined(SINGLE_PEAK) || defined(SINGLE_OCT_PEAK)
{
top->zero();
(*top)(top->size(0)/2, top->size(1)/2, top->size(2)/2) = 1.0/pow(2,1.5*(levelmax-levelmin));
}
#endif
#if defined(OFF_OCT_PEAK)
{
top->zero();
unsigned i0=top->size(0)/8, i1=top->size(1)/2, i2=top->size(2)/2;
(*top)(i0,i1,i2) = 1.0/pow(2,1.5*(levelmax-levelmin));
}
#endif
DensityGrid<real_t> top_save( *top );
the_tf_kernel->fetch_kernel( levelmin );
@ -400,6 +303,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
LOGUSER("Injecting long range component");
//mg_straight().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
mg_cubic().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
//... 3) the coarse-grid correction
@ -427,35 +331,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
\**********************************************************************************************************/
std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl;
LOGUSER("Performing noise convolution on level %3d",levelmax);
#if defined(SINGLE_PEAK) || defined(SINGLE_OCT_PEAK)
{
coarse->zero();
int
i0 = (1<<(levelmax-1)) - refh.offset_abs(levelmax,0) + coarse->nx_/4,
i1 = (1<<(levelmax-1)) - refh.offset_abs(levelmax,1) + coarse->nx_/4,
i2 = (1<<(levelmax-1)) - refh.offset_abs(levelmax,2) + coarse->nx_/4;
#if defined(SINGLE_PEAK)
(*coarse)(i0,i1,i2) = 1.0;
#elif defined(SINGLE_OCT_PEAK)
(*coarse)(i0,i1,i2) = 1.0/8.0;
(*coarse)(i0+1,i1,i2) = 1.0/8.0;
(*coarse)(i0,i1+1,i2) = 1.0/8.0;
(*coarse)(i0,i1,i2+1) = 1.0/8.0;
(*coarse)(i0+1,i1+1,i2) = 1.0/8.0;
(*coarse)(i0+1,i1,i2+1) = 1.0/8.0;
(*coarse)(i0,i1+1,i2+1) = 1.0/8.0;
(*coarse)(i0+1,i1+1,i2+1) = 1.0/8.0;
#endif
}
#endif
#if defined(OFF_OCT_PEAK)
coarse->zero();
#endif
//... 1) grid self-contribution
LOGUSER("Computing density self-contribution");
PaddedDensitySubGrid<real_t> coarse_save( *coarse );
@ -494,55 +370,7 @@ void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type
}
delete the_tf_kernel;
#if 0
// NEVER, NEVER ENABLE THE FOLLOWING
//... enforce mean condition
//for( int i=levelmin; i<(int)levelmax; ++i )
// enforce_mean( (*delta.get_grid(i+1)), (*delta.get_grid(i)) );
/*for( unsigned i=levelmax; i>levelmin; --i )
enforce_coarse_mean( (*delta.get_grid(i)), (*delta.get_grid(i-1)) );*/
#endif
#if 0
//... subtract the box mean.... this will otherwise add
//... a constant curvature term to the potential
double sum = 0.0;
{
int nx,ny,nz;
nx = delta.get_grid(levelmin)->size(0);
ny = delta.get_grid(levelmin)->size(1);
nz = delta.get_grid(levelmin)->size(2);
for( int ix=0; ix<nx; ++ix )
for( int iy=0; iy<ny; ++iy )
for( int iz=0; iz<nz; ++iz )
sum += (*delta.get_grid(levelmin))(ix,iy,iz);
sum /= (nx*ny*nz);
}
std::cout << " - Top grid mean density is off by " << sum << ", correcting..." << std::endl;
for( unsigned i=levelmin; i<=levelmax; ++i )
{
int nx,ny,nz;
nx = delta.get_grid(i)->size(0);
ny = delta.get_grid(i)->size(1);
nz = delta.get_grid(i)->size(2);
for( int ix=0; ix<nx; ++ix )
for( int iy=0; iy<ny; ++iy )
for( int iz=0; iz<nz; ++iz )
(*delta.get_grid(i))(ix,iy,iz) -= sum;
}
#endif
tend = omp_get_wtime();
if( true )//verbosity > 1 )
std::cout << " - Density calculation took " << tend-tstart << "s with " << omp_get_max_threads() << " threads." << std::endl;

View file

@ -140,7 +140,7 @@ public:
*/
void fill_rand( /*const*/ random_numbers<real_t>* prc, real_t variance, int i0, int j0, int k0, bool setzero=false )
{
double sum = 0.0;
long double sum = 0.0;
#pragma omp parallel for reduction(+:sum)
for( int i=0; i<nx_; ++i )
@ -193,7 +193,7 @@ public:
*/
double subtract_mean( void )
{
double sum = 0.0;
long double sum = 0.0;
size_t count = 0;
#pragma omp parallel for reduction(+:sum,count)
@ -517,7 +517,7 @@ public:
for( int iz=0; iz<nz_; ++iz )
(*this)(ix,iy,iz) = 0.0;
std::cerr << oxsub+lxsub << " -> " << nx_ << std::endl;
//std::cerr << oxsub+lxsub << " -> " << nx_ << std::endl;
for( int ix=oxsub+lxsub; ix<nx_; ++ix )
for( int iy=0; iy<ny_; ++iy )
for( int iz=0; iz<nz_; ++iz )
@ -859,7 +859,7 @@ inline void enforce_coarse_mean( M& v, M& V )
innersum /= innercount;
outersum /= outercount;
std::cerr << "***\n";
//std::cerr << "***\n";
double dcorr = innersum * innercount / outercount;

View file

@ -213,7 +213,8 @@ public:
template< class C >
inline real_t apply( const C& c, const int i, const int j, const int k ) const
{
return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k);
//return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k);
return (double)c(i-1,j,k)+(double)c(i+1,j,k)+(double)c(i,j-1,k)+(double)c(i,j+1,k)+(double)c(i,j,k-1)+(double)c(i,j,k+1)-6.0*(double)c(i,j,k);
}
template< class C >

View file

@ -1335,6 +1335,7 @@ public:
typedef GridHierarchy<real_t> grid_hierarchy;
typedef MeshvarBnd<real_t> meshvar_bnd;
typedef MeshvarBnd<double> meshvar_bnd_double;
typedef Meshvar<real_t> meshvar;

View file

@ -22,9 +22,9 @@ struct coarse_fine_interpolation
//! general 2nd order polynomial interpolation
inline double interp2( double x1, double x2, double x3, double f1, double f2, double f3, double x )
inline real_t interp2( real_t x1, real_t x2, real_t x3, real_t f1, real_t f2, real_t f3, real_t x )
{
double a,b,c;
real_t a,b,c;
a = (x1 * f3 - x3 * f1 - x2 * f3 - x1 * f2 + x2 * f1 + x3 * f2) / (x1 * x3 * x3 - x2 * x3 * x3 + x2 * x1 * x1 - x3 * x1 * x1 + x3 * x2 * x2 - x1 * x2 * x2);
b = -(x1 * x1 * f3 - x1 * x1 * f2 - f1 * x3 * x3 + f2 * x3 * x3 - x2 * x2 * f3 + f1 * x2 * x2) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3);
c = (x1 * x1 * x2 * f3 - x1 * x1 * x3 * f2 - x2 * x2 * x1 * f3 + f2 * x1 * x3 * x3 + x2 * x2 * x3 * f1 - f1 * x2 * x3 * x3) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3);
@ -34,36 +34,36 @@ inline double interp2( double x1, double x2, double x3, double f1, double f2, do
//! optimized 4th order polynomial interpolation across a left boundary for i-1 values
inline double interp4left( double f0, double f1, double f2, double f3, double f4 )
inline real_t interp4left( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4 )
{
//return -4.0/231.0*f0+4.0/7.0*f1+5.0/7.0*f2-1.0/3.0*f3+5./77.0*f4;
return 1.0/13.0*f0-21./55.*f1+7.0/9.0*f2+8./15.*f3-8./1287*f4;
}
//! optimized 4th order polynomial interpolation across a right boundary for i+1 values
inline double interp4right( double f0, double f1, double f2, double f3, double f4 )
inline real_t interp4right( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4 )
{
return interp4left(f4,f3,f2,f1,f0);
}
//! optimized 4th order polynomial interpolation across a left boundary for i-2 values
inline double interp4lleft( double f0, double f1, double f2, double f3, double f4 )
inline real_t interp4lleft( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4 )
{
//return 16./231.*f0+48.0/35.0*f1-6.0/7.0*f2-8.0/15.0*f3-9./77.*f4;
return -15./91.*f0+8./11.*f1+-10./9.*f2+32./21.*f3+32./1287.*f4;
}
//! optimized 4th order polynomial interpolation across a right boundary for i+2 values
inline double interp4rright( double f0, double f1, double f2, double f3, double f4 )
inline real_t interp4rright( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4 )
{
return interp4lleft(f4,f3,f2,f1,f0);
}
//! general 4th order polynomial interpolation
inline double interp4( double fm2, double fm1, double f0, double fp1, double fp2, double x )
inline real_t interp4( real_t fm2, real_t fm1, real_t f0, real_t fp1, real_t fp2, real_t x )
{
double x2 = x*x, x3=x2*x, x4=x3*x;
double a,b,c,d,e;
real_t x2 = x*x, x3=x2*x, x4=x3*x;
real_t a,b,c,d,e;
a= 1.0/24.0*fm2-1.0/6.0*fm1+0.25*f0-1.0/6.0*fp1+1.0/24.0*fp2;
b=-1.0/12.0*fm2+1.0/6.0*fm1-1.0/6.0*fp1+1.0/12.0*fp2;
@ -75,10 +75,10 @@ inline double interp4( double fm2, double fm1, double f0, double fp1, double fp2
}
//! general 4th order polynomial interpolation
inline double interp4( double* f, double x )
inline real_t interp4( real_t* f, real_t x )
{
double x2 = x*x, x3=x2*x, x4=x3*x;
double a,b,c,d,e;
real_t x2 = x*x, x3=x2*x, x4=x3*x;
real_t a,b,c,d,e;
a= 1.0/24.0*f[0]-1.0/6.0*f[1]+0.25*f[2]-1.0/6.0*f[3]+1.0/24.0*f[4];
b=-1.0/12.0*f[0]+1.0/6.0*f[1]-1.0/6.0*f[3]+1.0/12.0*f[4];
@ -90,10 +90,10 @@ inline double interp4( double* f, double x )
}
//! general 6th order polynomial interpolation
inline double interp6( double *f, double x )
inline real_t interp6( real_t *f, real_t x )
{
double x2 = x*x, x3=x2*x, x4=x3*x, x5=x4*x, x6=x5*x;
double a,b,c,d,e,g,h;
real_t x2 = x*x, x3=x2*x, x4=x3*x, x5=x4*x, x6=x5*x;
real_t a,b,c,d,e,g,h;
a=(f[0]-6.*f[1]+15.*f[2]-20.*f[3]+15.*f[4]-6.*f[5]+f[6])/720.;
b=(-3.*f[0]+12.*f[1]-15.*f[2]+15*f[4]-12.*f[5]+3.*f[6])/720.;
@ -108,11 +108,11 @@ inline double interp6( double *f, double x )
}
//! general 6th order polynomial interpolation
inline double interp6( double f0, double f1, double f2, double f3, double f4, double f5, double f6, double x )
inline real_t interp6( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4, real_t f5, real_t f6, real_t x )
{
double x2 = x*x, x3=x2*x, x4=x3*x, x5=x4*x, x6=x5*x;
double a,b,c,d,e,g,h;
double f[7]={
real_t x2 = x*x, x3=x2*x, x4=x3*x, x5=x4*x, x6=x5*x;
real_t a,b,c,d,e,g,h;
real_t f[7]={
f0,f1,f2,f3,f4,f5,f6
};
@ -129,9 +129,9 @@ inline double interp6( double f0, double f1, double f2, double f3, double f4, do
}
//! general 2nd order polynomial interpolation
inline double interp2( double fleft, double fcenter, double fright, double x )
inline real_t interp2( real_t fleft, real_t fcenter, real_t fright, real_t x )
{
double a,b,c;
real_t a,b,c;
a = 0.5*(fleft+fright)-fcenter;
b = 0.5*(fright-fleft);
c = fcenter;
@ -140,9 +140,9 @@ inline double interp2( double fleft, double fcenter, double fright, double x )
}
//! optimized 2nd order polynomial interpolation for i-1 values
inline double interp2left( double fleft, double fcenter, double fright )
inline real_t interp2left( real_t fleft, real_t fcenter, real_t fright )
{
double a,b,c;
real_t a,b,c;
a = (6.0*fright-10.0*fcenter+4.0*fleft)/15.0;
b = (-4.0*fleft+9.0*fright-5.0*fcenter)/15.0;
c = fcenter;
@ -151,9 +151,9 @@ inline double interp2left( double fleft, double fcenter, double fright )
}
//! optimized 2nd order polynomial interpolation for i+1 values
inline double interp2right( double fleft, double fcenter, double fright )
inline real_t interp2right( real_t fleft, real_t fcenter, real_t fright )
{
double a,b,c;
real_t a,b,c;
a = (6.0*fleft-10.0*fcenter+4.0*fright)/15.0;
b = (4.0*fright-9.0*fleft+5.0*fcenter)/15.0;
c = fcenter;
@ -162,9 +162,9 @@ inline double interp2right( double fleft, double fcenter, double fright )
}
//! optimized 2nd order polynomial interpolation for i-2 values
inline double interp2lleft( double fleft, double fcenter, double fright )
inline real_t interp2lleft( real_t fleft, real_t fcenter, real_t fright )
{
double a,b,c;
real_t a,b,c;
a = (-10.0*fcenter+4.0*fleft+6.0*fright)/15.0;
b = (-12.0*fleft+15.0*fcenter-3.0*fright)/15.0;
c = (-3.0*fright+10.0*fcenter+8.0*fleft)/15.0;
@ -173,9 +173,9 @@ inline double interp2lleft( double fleft, double fcenter, double fright )
}
//! optimized 2nd order polynomial interpolation for i+2 values
inline double interp2rright( double fleft, double fcenter, double fright )
inline real_t interp2rright( real_t fleft, real_t fcenter, real_t fright )
{
double a,b,c;
real_t a,b,c;
a = (-10.0*fcenter+4.0*fleft+6.0*fright)/15.0;
b = (-12.0*fleft+15.0*fcenter-3.0*fright)/15.0;
c = (-3.0*fright+10.0*fcenter+8.0*fleft)/15.0;
@ -184,37 +184,37 @@ inline double interp2rright( double fleft, double fcenter, double fright )
}
//! optimized 6th order polynomial interpolation for i-1 values
inline double interp6left( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
inline real_t interp6left( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4, real_t f5, real_t f6 )
{
return 4./2431.*f0-24./1001.*f1+4./7.*f2+60./77.*f3-6./13.*f4+12./77.*f5-5./221.*f6;
}
//! optimized 6th order polynomial interpolation for i-2 values
inline double interp6lleft( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
inline real_t interp6lleft( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4, real_t f5, real_t f6 )
{
return -12./2431.*f0+40./429.*f1+4./3.*f2-10./11.*f3+28./39.*f4-3./11.*f5+28./663.*f6;
}
//! optimized 6th order polynomial interpolation for i-3 values
inline double interp6llleft( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
inline real_t interp6llleft( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4, real_t f5, real_t f6 )
{
return -36./2431.*f0+600./1001.*f1+20./21.*f2-100./77.*f3+15./13.*f4-36./77.*f5+50./663.*f6;
}
//! optimized 6th order polynomial interpolation for i+1 values
inline double interp6right( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
inline real_t interp6right( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4, real_t f5, real_t f6 )
{
return interp6left(f6,f5,f4,f3,f2,f1,f0);
}
//! optimized 6th order polynomial interpolation for i+2 values
inline double interp6rright( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
inline real_t interp6rright( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4, real_t f5, real_t f6 )
{
return interp6lleft(f6,f5,f4,f3,f2,f1,f0);
}
//! optimized 6th order polynomial interpolation for i+3 values
inline double interp6rrright( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
inline real_t interp6rrright( real_t f0, real_t f1, real_t f2, real_t f3, real_t f4, real_t f5, real_t f6 )
{
return interp6llleft(f6,f5,f4,f3,f2,f1,f0);
}
@ -270,19 +270,19 @@ struct cubic_interp
if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
continue;
int ixtop = (int)(0.5*(double)(ix))+xoff;
int iytop = (int)(0.5*(double)(iy))+yoff;
int iztop = (int)(0.5*(double)(iz))+zoff;
int ixtop = (int)(0.5*(real_t)(ix))+xoff;
int iytop = (int)(0.5*(real_t)(iy))+yoff;
int iztop = (int)(0.5*(real_t)(iz))+zoff;
if( ix==-1 ) ixtop=xoff-1;
if( iy==-1 ) iytop=yoff-1;
if( iz==-1 ) iztop=zoff-1;
double coarse_flux, fine_flux, dflux;
real_t coarse_flux, fine_flux, dflux;
//double fac = 0.125*27.0/24.0, fac2 = -0.125*1.0/24.0;//0.125;//24.0/26.0*0.125*0.5;
double fac = 0.5*0.125*27.0/24.0, fac2 = -0.5*0.125*1.0/24.0;
//double fac = 0.125*15.0/12.0, fac2 = -0.125*1.0/12.0;
//real_t fac = 0.125*27.0/24.0, fac2 = -0.125*1.0/24.0;//0.125;//24.0/26.0*0.125*0.5;
real_t fac = 0.5*0.125*27.0/24.0, fac2 = -0.5*0.125*1.0/24.0;
//real_t fac = 0.125*15.0/12.0, fac2 = -0.125*1.0/12.0;
if(xbnd)
{
@ -290,12 +290,12 @@ struct cubic_interp
{
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_x(-1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_x(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_x(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_x(-1,*u,ix+1,iy+1,iz+1);
coarse_flux = Laplace_flux_O4<double>().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -312,12 +312,12 @@ struct cubic_interp
{
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_x(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_x(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_x(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_x(+1,*u,ix,iy+1,iz+1);
coarse_flux = Laplace_flux_O4<double>().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -338,12 +338,12 @@ struct cubic_interp
{
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix+1,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_y(-1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_y(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_y(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_y(-1,*u,ix+1,iy+1,iz+1);
coarse_flux = Laplace_flux_O4<double>().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -359,12 +359,12 @@ struct cubic_interp
{
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_y(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_y(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_y(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_y(+1,*u,ix+1,iy,iz+1);
coarse_flux = Laplace_flux_O4<double>().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -384,12 +384,12 @@ struct cubic_interp
{
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix+1,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_z(-1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_z(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_z(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_z(-1,*u,ix+1,iy+1,iz+1);
coarse_flux = Laplace_flux_O4<double>().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -405,12 +405,12 @@ struct cubic_interp
{
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_z(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_z(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_z(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_z(+1,*u,ix+1,iy+1,iz);
coarse_flux = Laplace_flux_O4<double>().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -475,17 +475,17 @@ struct interp_O3_fluxcorr
if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
continue;
int ixtop = (int)(0.5*(double)(ix))+xoff;
int iytop = (int)(0.5*(double)(iy))+yoff;
int iztop = (int)(0.5*(double)(iz))+zoff;
int ixtop = (int)(0.5*(real_t)(ix))+xoff;
int iytop = (int)(0.5*(real_t)(iy))+yoff;
int iztop = (int)(0.5*(real_t)(iz))+zoff;
if( ix==-1 ) ixtop=xoff-1;
if( iy==-1 ) iytop=yoff-1;
if( iz==-1 ) iztop=zoff-1;
double ustar1, ustar2, ustar3, uhat;
double fac = 0.5;//0.25;
double flux;;
real_t ustar1, ustar2, ustar3, uhat;
real_t fac = 0.5;//0.25;
real_t flux;;
// left boundary
if( ix == -1 && iy%2==0 && iz%2==0 )
{
@ -493,11 +493,11 @@ struct interp_O3_fluxcorr
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
{
ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((real_t)j-0.5) );
ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((real_t)j-0.5) );
ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((real_t)j-0.5) );
uhat = interp2( ustar1, ustar2, ustar3, fac*((double)k-0.5) );
uhat = interp2( ustar1, ustar2, ustar3, fac*((real_t)k-0.5) );
(*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) );
@ -506,7 +506,7 @@ struct interp_O3_fluxcorr
flux /= 4.0;
double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
real_t dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix,iy+j,iz+k) -= dflux;
@ -518,17 +518,17 @@ struct interp_O3_fluxcorr
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
{
ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((real_t)j-0.5) );
ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((real_t)j-0.5) );
ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((real_t)j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((real_t)k-0.5) );
(*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat );
flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k));
}
flux /= 4.0;
double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux;
real_t dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix,iy+j,iz+k) += dflux;
@ -545,14 +545,14 @@ struct interp_O3_fluxcorr
ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((real_t)k-0.5) );
(*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) );
flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k));
}
flux /= 4.0;
double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
real_t dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix+j,iy,iz+k) -= dflux;
@ -568,14 +568,14 @@ struct interp_O3_fluxcorr
ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((real_t)k-0.5) );
(*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat );
flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k));
}
flux /= 4.0;
double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux;
real_t dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix+j,iy,iz+k) += dflux;
@ -592,14 +592,14 @@ struct interp_O3_fluxcorr
ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((real_t)k-0.5) );
(*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) );
flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz));
}
flux /= 4.0;
double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
real_t dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix+j,iy+k,iz) -= dflux;
@ -616,14 +616,14 @@ struct interp_O3_fluxcorr
ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((real_t)k-0.5) );
(*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat );
flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1));
}
flux /= 4.0;
double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux;
real_t dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux;
for( int j=0;j<=1;j++)
for( int k=0;k<=1;k++)
(*u)(ix+j,iy+k,iz) += dflux;
@ -680,20 +680,20 @@ struct interp_O5_fluxcorr
if( bnd )
{
int ixtop = (int)(0.5*(double)(ix))+xoff;
int iytop = (int)(0.5*(double)(iy))+yoff;
int iztop = (int)(0.5*(double)(iz))+zoff;
int ixtop = (int)(0.5*(real_t)(ix))+xoff;
int iytop = (int)(0.5*(real_t)(iy))+yoff;
int iztop = (int)(0.5*(real_t)(iz))+zoff;
if( ix==-1 ) ixtop=xoff-1;
if( iy==-1 ) iytop=yoff-1;
if( iz==-1 ) iztop=zoff-1;
double ustar[5], uhat[2];
double fac = 0.5;
real_t ustar[5], uhat[2];
real_t fac = 0.5;
double coarse_flux, fine_flux, dflux;
real_t coarse_flux, fine_flux, dflux;
double ffac = 12./14.;
real_t ffac = 12./14.;
// left boundary
if( ix == -1 && iy%2==0 && iz%2==0 )
@ -706,8 +706,8 @@ struct interp_O5_fluxcorr
for( int q=-2;q<=2;++q )
ustar[q+2] = interp4( (*utop)(ixtop+p-1,iytop-2,iztop+q), (*utop)(ixtop+p-1,iytop-1,iztop+q),
(*utop)(ixtop+p-1,iytop,iztop+q), (*utop)(ixtop+p-1,iytop+1,iztop+q),
(*utop)(ixtop+p-1,iytop+2,iztop+q), fac*((double)j-0.5) );
uhat[p] = interp4( ustar, fac*((double)k-0.5) );//-1.5 );
(*utop)(ixtop+p-1,iytop+2,iztop+q), fac*((real_t)j-0.5) );
uhat[p] = interp4( ustar, fac*((real_t)k-0.5) );//-1.5 );
}
(*u)(ix,iy+j,iz+k) = interp4left( uhat[0], uhat[1], (*u)(ix+1,iy+j,iz+k),
@ -717,13 +717,13 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_x(-1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_x(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_x(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_x(-1,*u,ix+1,iy+1,iz+1);
fine_flux /= 4.0;
coarse_flux = Laplace_flux_O4<double>().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
dflux = coarse_flux - fine_flux;
@ -747,8 +747,8 @@ struct interp_O5_fluxcorr
for( int q=-2;q<=2;++q )
ustar[q+2] = interp4( (*utop)(ixtop+p,iytop-2,iztop+q), (*utop)(ixtop+p,iytop-1,iztop+q),
(*utop)(ixtop+p,iytop,iztop+q), (*utop)(ixtop+p,iytop+1,iztop+q),
(*utop)(ixtop+p,iytop+2,iztop+q), fac*((double)j-0.5) );
uhat[p] = interp4( ustar, fac*((double)k-0.5));//-1.5 );
(*utop)(ixtop+p,iytop+2,iztop+q), fac*((real_t)j-0.5) );
uhat[p] = interp4( ustar, fac*((real_t)k-0.5));//-1.5 );
}
(*u)(ix,iy+j,iz+k) = interp4right( (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k),
@ -758,12 +758,12 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_x(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_x(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_x(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_x(+1,*u,ix,iy+1,iz+1);
coarse_flux = Laplace_flux_O4<double>().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -787,8 +787,8 @@ struct interp_O5_fluxcorr
for( int q=-2;q<=2;++q )
ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+p-1,iztop+q), (*utop)(ixtop-1,iytop+p-1,iztop+q),
(*utop)(ixtop,iytop+p-1,iztop+q), (*utop)(ixtop+1,iytop+p-1,iztop+q),
(*utop)(ixtop+2,iytop+p-1,iztop+q), fac*((double)j-0.5) );
uhat[p] = interp4( ustar, fac*((double)k-0.5));//-1.5 );
(*utop)(ixtop+2,iytop+p-1,iztop+q), fac*((real_t)j-0.5) );
uhat[p] = interp4( ustar, fac*((real_t)k-0.5));//-1.5 );
}
(*u)(ix+j,iy,iz+k) = interp4left( uhat[0], uhat[1], (*u)(ix+j,iy+1,iz+k),
@ -798,12 +798,12 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix+1,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_y(-1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_y(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_y(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_y(-1,*u,ix+1,iy+1,iz+1);
coarse_flux = Laplace_flux_O4<double>().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -827,8 +827,8 @@ struct interp_O5_fluxcorr
for( int q=-2;q<=2;++q )
ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+p,iztop+q), (*utop)(ixtop-1,iytop+p,iztop+q),
(*utop)(ixtop,iytop+p,iztop+q), (*utop)(ixtop+1,iytop+p,iztop+q),
(*utop)(ixtop+2,iytop+p,iztop+q), fac*((double)j-0.5) );
uhat[p] = interp4( ustar, fac*((double)k-0.5));//+1.5 );
(*utop)(ixtop+2,iytop+p,iztop+q), fac*((real_t)j-0.5) );
uhat[p] = interp4( ustar, fac*((real_t)k-0.5));//+1.5 );
}
(*u)(ix+j,iy,iz+k) = interp4right( (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k),
@ -838,12 +838,12 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_y(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_y(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_y(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_y(+1,*u,ix+1,iy,iz+1);
coarse_flux = Laplace_flux_O4<double>().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -868,8 +868,8 @@ struct interp_O5_fluxcorr
for( int q=-2;q<=2;++q )
ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+q,iztop+p-1), (*utop)(ixtop-1,iytop+q,iztop+p-1),
(*utop)(ixtop,iytop+q,iztop+p-1), (*utop)(ixtop+1,iytop+q,iztop+p-1),
(*utop)(ixtop+2,iytop+q,iztop+p-1), fac*((double)j-0.5) );
uhat[p] = interp4( ustar, fac*((double)k-0.5));//-1.5 );
(*utop)(ixtop+2,iytop+q,iztop+p-1), fac*((real_t)j-0.5) );
uhat[p] = interp4( ustar, fac*((real_t)k-0.5));//-1.5 );
}
(*u)(ix+j,iy+k,iz) = interp4left( uhat[0], uhat[1], (*u)(ix+j,iy+k,iz+1),
@ -880,12 +880,12 @@ struct interp_O5_fluxcorr
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix+1,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_z(-1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_z(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_z(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O4<real_t>().apply_z(-1,*u,ix+1,iy+1,iz+1);
coarse_flux = Laplace_flux_O4<double>().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -909,8 +909,8 @@ struct interp_O5_fluxcorr
for( int q=-2;q<=2;++q )
ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+q,iztop+p), (*utop)(ixtop-1,iytop+q,iztop+p),
(*utop)(ixtop,iytop+q,iztop+p), (*utop)(ixtop+1,iytop+q,iztop+p),
(*utop)(ixtop+2,iytop+q,iztop+p), fac*((double)j-0.5) );
uhat[p] = interp4( ustar, fac*((double)k-0.5));//+1.5 );
(*utop)(ixtop+2,iytop+q,iztop+p), fac*((real_t)j-0.5) );
uhat[p] = interp4( ustar, fac*((real_t)k-0.5));//+1.5 );
}
(*u)(ix+j,iy+k,iz) = interp4right( (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2),
@ -920,12 +920,12 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_z(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_z(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_z(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O4<real_t>().apply_z(+1,*u,ix+1,iy+1,iz);
coarse_flux = Laplace_flux_O4<double>().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O4<real_t>().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -987,20 +987,20 @@ struct interp_O7_fluxcorr
if( bnd )
{
int ixtop = (int)(0.5*(double)(ix))+xoff;
int iytop = (int)(0.5*(double)(iy))+yoff;
int iztop = (int)(0.5*(double)(iz))+zoff;
int ixtop = (int)(0.5*(real_t)(ix))+xoff;
int iytop = (int)(0.5*(real_t)(iy))+yoff;
int iztop = (int)(0.5*(real_t)(iz))+zoff;
if( ix==-1 ) ixtop=xoff-1;
if( iy==-1 ) iytop=yoff-1;
if( iz==-1 ) iztop=zoff-1;
double ustar[7], uhat[3];
double fac = 0.5;
real_t ustar[7], uhat[3];
real_t fac = 0.5;
double coarse_flux, fine_flux, dflux;
real_t coarse_flux, fine_flux, dflux;
double ffac = 180./222.;
real_t ffac = 180./222.;
// left boundary
if( ix == -1 && iy%2==0 && iz%2==0 )
@ -1014,8 +1014,8 @@ struct interp_O7_fluxcorr
ustar[q+3] = interp6( (*utop)(ixtop+p-2,iytop-3,iztop+q), (*utop)(ixtop+p-2,iytop-2,iztop+q),
(*utop)(ixtop+p-2,iytop-1,iztop+q), (*utop)(ixtop+p-2,iytop,iztop+q),
(*utop)(ixtop+p-2,iytop+1,iztop+q), (*utop)(ixtop+p-2,iytop+2,iztop+q),
(*utop)(ixtop+p-2,iytop+3,iztop+q), fac*((double)j-0.5) );
uhat[p] = interp6( ustar, fac*((double)k-0.5));//-1.5 );
(*utop)(ixtop+p-2,iytop+3,iztop+q), fac*((real_t)j-0.5) );
uhat[p] = interp6( ustar, fac*((real_t)k-0.5));//-1.5 );
}
(*u)(ix,iy+j,iz+k) = interp6left( uhat[0], uhat[1], uhat[2], (*u)(ix+1,iy+j,iz+k),
@ -1027,13 +1027,13 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O6<double>().apply_x(-1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O6<double>().apply_x(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O6<double>().apply_x(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O6<double>().apply_x(-1,*u,ix+1,iy+1,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_x(-1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_x(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_x(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_x(-1,*u,ix+1,iy+1,iz+1);
fine_flux /= 4.0;
coarse_flux = Laplace_flux_O6<double>().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O6<real_t>().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
dflux = coarse_flux - fine_flux;
@ -1059,8 +1059,8 @@ struct interp_O7_fluxcorr
ustar[q+3] = interp6( (*utop)(ixtop+p,iytop-3,iztop+q), (*utop)(ixtop+p,iytop-2,iztop+q),
(*utop)(ixtop+p,iytop-1,iztop+q), (*utop)(ixtop+p,iytop,iztop+q),
(*utop)(ixtop+p,iytop+1,iztop+q), (*utop)(ixtop+p,iytop+2,iztop+q),
(*utop)(ixtop+p,iytop+3,iztop+q), fac*((double)j-0.5) );
uhat[p] = interp6( ustar, fac*((double)k-0.5) );//-1.5 );
(*utop)(ixtop+p,iytop+3,iztop+q), fac*((real_t)j-0.5) );
uhat[p] = interp6( ustar, fac*((real_t)k-0.5) );//-1.5 );
}
(*u)(ix,iy+j,iz+k) = interp6right( (*u)(ix-4,iy+j,iz+k), (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k),
@ -1074,12 +1074,12 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O6<double>().apply_x(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O6<double>().apply_x(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O6<double>().apply_x(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O6<double>().apply_x(+1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_x(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_x(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_x(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_x(+1,*u,ix,iy+1,iz+1);
coarse_flux = Laplace_flux_O6<double>().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O6<real_t>().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -1105,8 +1105,8 @@ struct interp_O7_fluxcorr
ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+p-2,iztop+q), (*utop)(ixtop-2,iytop+p-2,iztop+q),
(*utop)(ixtop-1,iytop+p-2,iztop+q), (*utop)(ixtop,iytop+p-2,iztop+q),
(*utop)(ixtop+1,iytop+p-2,iztop+q), (*utop)(ixtop+2,iytop+p-2,iztop+q),
(*utop)(ixtop+3,iytop+p-2,iztop+q), fac*((double)j-0.5) );
uhat[p] = interp6( ustar, fac*((double)k-0.5));//-1.5 );
(*utop)(ixtop+3,iytop+p-2,iztop+q), fac*((real_t)j-0.5) );
uhat[p] = interp6( ustar, fac*((real_t)k-0.5));//-1.5 );
}
(*u)(ix+j,iy,iz+k) = interp6left( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+1,iz+k),
@ -1119,12 +1119,12 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O6<double>().apply_y(-1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O6<double>().apply_y(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O6<double>().apply_y(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O6<double>().apply_y(-1,*u,ix+1,iy+1,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_y(-1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_y(-1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_y(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_y(-1,*u,ix+1,iy+1,iz+1);
coarse_flux = Laplace_flux_O6<double>().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
coarse_flux = Laplace_flux_O6<real_t>().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -1150,8 +1150,8 @@ struct interp_O7_fluxcorr
ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+p,iztop+q), (*utop)(ixtop-2,iytop+p,iztop+q),
(*utop)(ixtop-1,iytop+p,iztop+q), (*utop)(ixtop,iytop+p,iztop+q),
(*utop)(ixtop+1,iytop+p,iztop+q), (*utop)(ixtop+2,iytop+p,iztop+q),
(*utop)(ixtop+3,iytop+p,iztop+q), fac*((double)j-0.5) );
uhat[p] = interp6( ustar, fac*((double)k-0.5));//+1.5 );
(*utop)(ixtop+3,iytop+p,iztop+q), fac*((real_t)j-0.5) );
uhat[p] = interp6( ustar, fac*((real_t)k-0.5));//+1.5 );
}
(*u)(ix+j,iy,iz+k) = interp6right( (*u)(ix+j,iy-4,iz+k), (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k),
@ -1164,12 +1164,12 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O6<double>().apply_y(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O6<double>().apply_y(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O6<double>().apply_y(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O6<double>().apply_y(+1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_y(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_y(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_y(+1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_y(+1,*u,ix+1,iy,iz+1);
coarse_flux = Laplace_flux_O6<double>().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O6<real_t>().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -1196,8 +1196,8 @@ struct interp_O7_fluxcorr
ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+q,iztop+p-2), (*utop)(ixtop-2,iytop+q,iztop+p-2),
(*utop)(ixtop-1,iytop+q,iztop+p-2), (*utop)(ixtop,iytop+q,iztop+p-2),
(*utop)(ixtop+1,iytop+q,iztop+p-2), (*utop)(ixtop+2,iytop+q,iztop+p-2),
(*utop)(ixtop+3,iytop+q,iztop+p-2), fac*((double)j-0.5) );
uhat[p] = interp6( ustar, fac*((double)k-0.5));//-1.5 );
(*utop)(ixtop+3,iytop+q,iztop+p-2), fac*((real_t)j-0.5) );
uhat[p] = interp6( ustar, fac*((real_t)k-0.5));//-1.5 );
}
(*u)(ix+j,iy+k,iz) = interp6left( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+k,iz+1),
@ -1210,12 +1210,12 @@ struct interp_O7_fluxcorr
fine_flux = 0.0;
fine_flux += Laplace_flux_O6<double>().apply_z(-1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O6<double>().apply_z(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O6<double>().apply_z(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O6<double>().apply_z(-1,*u,ix+1,iy+1,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_z(-1,*u,ix,iy,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_z(-1,*u,ix+1,iy,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_z(-1,*u,ix,iy+1,iz+1);
fine_flux += Laplace_flux_O6<real_t>().apply_z(-1,*u,ix+1,iy+1,iz+1);
coarse_flux = Laplace_flux_O6<double>().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
coarse_flux = Laplace_flux_O6<real_t>().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@ -1241,8 +1241,8 @@ struct interp_O7_fluxcorr
ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+q,iztop+p), (*utop)(ixtop-2,iytop+q,iztop+p),
(*utop)(ixtop-1,iytop+q,iztop+p), (*utop)(ixtop,iytop+q,iztop+p),
(*utop)(ixtop+1,iytop+q,iztop+p), (*utop)(ixtop+2,iytop+q,iztop+p),
(*utop)(ixtop+3,iytop+q,iztop+p), fac*((double)j-0.5) );
uhat[p] = interp6( ustar, fac*((double)k-0.5));//+1.5 );
(*utop)(ixtop+3,iytop+q,iztop+p), fac*((real_t)j-0.5) );
uhat[p] = interp6( ustar, fac*((real_t)k-0.5));//+1.5 );
}
(*u)(ix+j,iy+k,iz) = interp6right( (*u)(ix+j,iy+k,iz-4), (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2),
@ -1255,12 +1255,12 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
fine_flux += Laplace_flux_O6<double>().apply_z(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O6<double>().apply_z(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O6<double>().apply_z(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O6<double>().apply_z(+1,*u,ix+1,iy+1,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_z(+1,*u,ix,iy,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_z(+1,*u,ix+1,iy,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_z(+1,*u,ix,iy+1,iz);
fine_flux += Laplace_flux_O6<real_t>().apply_z(+1,*u,ix+1,iy+1,iz);
coarse_flux = Laplace_flux_O6<double>().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
coarse_flux = Laplace_flux_O6<real_t>().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;

View file

@ -271,7 +271,7 @@ public:
for( int j=0; j<ny; ++j )
for( int k=0; k<nz; ++k )
v(i,j,k) += dmean;
*/
*/
}
};

View file

@ -211,7 +211,8 @@ void solver<S,I,O,T>::twoGrid( unsigned ilevel )
{
MeshvarBnd<T> *uf, *uc, *ff, *fc;
T
double
h = 1.0/(1<<ilevel),
c0 = -1.0/m_scheme.ccoeff(),
h2 = h*h;
@ -282,9 +283,9 @@ void solver<S,I,O,T>::twoGrid( unsigned ilevel )
//....................................................................
int
oxp = uf->offset(0),
oyp = uf->offset(1),
ozp = uf->offset(2);
oxp = uf->offset(0),
oyp = uf->offset(1),
ozp = uf->offset(2);
meshvar_bnd tLu(*uc,false);
#pragma omp parallel for
@ -315,12 +316,12 @@ void solver<S,I,O,T>::twoGrid( unsigned ilevel )
oj = ff->offset(1);
ok = ff->offset(2);
#pragma omp parallel for
#pragma omp parallel for
for( int ix=oi; ix<oi+(int)ff->size(0)/2; ++ix )
for( int iy=oj; iy<oj+(int)ff->size(1)/2; ++iy )
for( int iz=ok; iz<ok+(int)ff->size(2)/2; ++iz )
(*fc)(ix,iy,iz) += ((tLu( ix, iy, iz ) - (m_scheme.apply( *uc, ix, iy, iz )/(4.0*h2))));
tLu.deallocate();
meshvar_bnd ucsave(*uc,true);
@ -336,12 +337,13 @@ void solver<S,I,O,T>::twoGrid( unsigned ilevel )
meshvar_bnd cc(*uc,false);
//... compute correction on coarse grid
#pragma omp parallel for
for( int ix=0; ix<(int)cc.size(0); ++ix )
for( int iy=0; iy<(int)cc.size(1); ++iy )
for( int iz=0; iz<(int)cc.size(2); ++iz )
cc(ix,iy,iz) = (*uc)(ix,iy,iz) - ucsave(ix,iy,iz);
cc(ix,iy,iz) = (*uc)(ix,iy,iz) - ucsave(ix,iy,iz);
ucsave.deallocate();
@ -386,7 +388,7 @@ double solver<S,I,O,T>::compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<
nz = u.size(2);
double err = 0.0;
unsigned count = 0;
size_t count = 0;
#pragma omp parallel for reduction(+:err,count)
for( int ix=0; ix<nx; ++ix )
@ -394,7 +396,7 @@ double solver<S,I,O,T>::compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<
for( int iz=0; iz<nz; ++iz )
if( fabs(unew(ix,iy,iz)) > 0.0 )//&& u(ix,iy,iz) != unew(ix,iy,iz) )
{
err += fabs(1.0 - u(ix,iy,iz)/unew(ix,iy,iz));
err += fabs(1.0 - (double)u(ix,iy,iz)/(double)unew(ix,iy,iz));
++count;
}
@ -438,32 +440,37 @@ double solver<S,I,O,T>::compute_RMS_resid( const GridHierarchy<T>& uh, const Gri
ny = uh.get_grid(ilevel)->size(1),
nz = uh.get_grid(ilevel)->size(2);
double h = 1.0/(1<<ilevel), h2=h*h, err;
double sum = 0.0;
double h = 1.0/(1<<ilevel), h2=h*h;
double sum = 0.0, sumd2 = 0.0;
size_t count = 0;
#pragma omp parallel for reduction(+:sum,count)
#pragma omp parallel for reduction(+:sum,sumd2,count)
for( int ix=0; ix<nx; ++ix )
for( int iy=0; iy<ny; ++iy )
for( int iz=0; iz<nz; ++iz )
{
double r = (m_scheme.apply( *uh.get_grid(ilevel), ix, iy, iz )/h2 + (*fh.get_grid(ilevel))(ix,iy,iz));
double d = (double)(*fh.get_grid(ilevel))(ix,iy,iz);
sumd2 += d*d;
double r = ((double)m_scheme.apply( *uh.get_grid(ilevel), ix, iy, iz )/h2 + (double)(*fh.get_grid(ilevel))(ix,iy,iz));
sum += r*r;
++count;
}
if( m_is_ini )
m_residu_ini[ilevel] = sqrt(sum)/count;
err = sqrt(sum)/count/m_residu_ini[ilevel];
double err_abs = sqrt(sum/count);
double err_rel = err_abs / sqrt(sumd2/count);
if( verbose && !m_is_ini )
std::cout << " Level " << std::setw(6) << ilevel << ", Error = " << err << std::endl;
std::cout << " Level " << std::setw(6) << ilevel << ", Error = " << err_rel << std::endl;
LOGDEBUG("[mg] level %3d, rms residual %g",ilevel,err);
LOGDEBUG("[mg] level %3d, rms residual %g, rel. error %g",ilevel, err_abs, err_rel);
if( err > maxerr )
maxerr = err;
if( err_rel > maxerr )
maxerr = err_rel;
}
@ -481,10 +488,12 @@ double solver<S,I,O,T>::solve( GridHierarchy<T>& uh, double acc, double h, bool
double err, maxerr = 1e30;
unsigned niter = 0;
bool fullverbose = false;
bool fullverbose = true;//false;
m_pu = &uh;
//err = compute_RMS_resid( *m_pu, *m_pf, fullverbose );
//... iterate ...//
while (true)
{

View file

@ -426,30 +426,23 @@ random_numbers<T>::random_numbers( random_numbers<T>& rc, unsigned cubesize, lon
nxc=lx[0]/2, nyc=lx[1]/2, nzc=lx[2]/2;
fftw_real
*rcoarse = new fftw_real[nxc*nyc*(nzc+2)],
*rfine = new fftw_real[nx*ny*(nz+2)];
fftw_real *rfine = new fftw_real[nx*ny*(nz+2)];
fftw_complex *cfine = reinterpret_cast<fftw_complex*> (rfine);
fftw_complex
*ccoarse = reinterpret_cast<fftw_complex*> (rcoarse),
*cfine = reinterpret_cast<fftw_complex*> (rfine);
#ifdef FFTW3
#ifdef SINGLE_PRECISION
fftwf_plan
pc = fftwf_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
pf = fftwf_plan_dft_r2c_3d( nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
ipf = fftwf_plan_dft_c2r_3d( nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
pf = fftwf_plan_dft_r2c_3d( nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
ipf = fftwf_plan_dft_c2r_3d( nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
#else
fftw_plan
pc = fftw_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
pf = fftw_plan_dft_r2c_3d( nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
ipf = fftw_plan_dft_c2r_3d( nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
pf = fftw_plan_dft_r2c_3d( nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
ipf = fftw_plan_dft_c2r_3d( nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
#endif
#else
rfftwnd_plan
pc = rfftw3d_create_plan( nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
pf = rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
ipf = rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
pf = rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
ipf = rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
#endif
#pragma omp parallel for
@ -460,6 +453,22 @@ random_numbers<T>::random_numbers( random_numbers<T>& rc, unsigned cubesize, lon
size_t q = ((size_t)i*(size_t)ny+(size_t)j)*(size_t)(nz+2)+(size_t)k;
rfine[q] = (*this)(x0[0]+i,x0[1]+j,x0[2]+k);
}
this->free_all_mem(); // temporarily free memory, allocate again later
fftw_real *rcoarse = new fftw_real[nxc*nyc*(nzc+2)];
fftw_complex *ccoarse = reinterpret_cast<fftw_complex*> (rcoarse);
#ifdef FFTW3
#ifdef SINGLE_PRECISION
fftwf_plan pc = fftwf_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE);
#else
fftw_plan pc = fftw_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE);
#endif
#else
rfftwnd_plan pc = rfftw3d_create_plan( nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
#endif
#pragma omp parallel for
for( int i=0; i<(int)nxc; i++ )
@ -489,7 +498,7 @@ random_numbers<T>::random_numbers( random_numbers<T>& rc, unsigned cubesize, lon
double fftnorm = 1.0/((double)nx*(double)ny*(double)nz);
#pragma omp parallel for
#pragma omp parallel for
for( int i=0; i<(int)nxc; i++ )
for( int j=0; j<(int)nyc; j++ )
for( int k=0; k<(int)nzc/2+1; k++ )
@ -513,7 +522,7 @@ random_numbers<T>::random_numbers( random_numbers<T>& rc, unsigned cubesize, lon
delete[] rcoarse;
#pragma omp parallel for
#pragma omp parallel for
for( int i=0; i<(int)nx; i++ )
for( int j=0; j<(int)ny; j++ )
for( int k=0; k<(int)nz/2+1; k++ )
@ -542,13 +551,13 @@ random_numbers<T>::random_numbers( random_numbers<T>& rc, unsigned cubesize, lon
#endif
#endif
#pragma omp parallel for
#pragma omp parallel for
for( int i=0; i<(int)nx; i++ )
for( int j=0; j<(int)ny; j++ )
for( int k=0; k<(int)nz; k++ )
{
size_t q = ((size_t)i*ny+(size_t)j)*(nz+2)+(size_t)k;
(*this)(x0[0]+i,x0[1]+j,x0[2]+k) = rfine[q];
(*this)(x0[0]+i,x0[1]+j,x0[2]+k,false) = rfine[q];
}
delete[] rfine;
@ -962,9 +971,11 @@ void random_number_generator<rng,T>::correct_avg( int icoarse, int ifine )
qf[6] = (1*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+0;
qf[7] = (1*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+1;
double d = 0.0;
for( int q=0; q<8; ++q )
deg_rand[qc] += fac*fine_rand[qf[q]];
d += fac*fine_rand[qf[q]];
deg_rand[qc] += d;
}
}

View file

@ -146,8 +146,8 @@ public:
rnums_.clear();
}
//! access a random number
inline T& operator()( int i, int j, int k )
//! access a random number, this allocates a cube and fills it with consistent random numbers
inline T& operator()( int i, int j, int k, bool fillrand=true )
{
int ic, jc, kc, is, js, ks;
@ -165,7 +165,9 @@ public:
{
//... cube has not been precomputed. fill now with random numbers
rnums_[ icube ] = new Meshvar<T>( cubesize_, 0, 0, 0 );
fill_cube(ic, jc, kc);
if( fillrand )
fill_cube(ic, jc, kc);
}
//... determine cell in cube
@ -176,6 +178,17 @@ public:
return (*rnums_[ icube ])(is,js,ks);
}
//! free all cubes
void free_all_mem( void )
{
for( unsigned i=0; i<rnums_.size(); ++i )
if( rnums_[i] != NULL )
{
delete rnums_[i];
rnums_[i] = NULL;
}
}
};