From c48c73488abc7f64345c4397d9c175381dc4721d Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 2 Jul 2010 11:49:30 -0700 Subject: [PATCH] Initial commit, beta version release candidate --- Makefile | 61 ++ Numerics.cc | 154 ++++ Numerics.hh | 204 +++++ TransferFunction.hh | 1318 +++++++++++++++++++++++++++++ config_file.hh | 327 ++++++++ convolution_kernel.cc | 583 +++++++++++++ convolution_kernel.hh | 116 +++ cosmology.cc | 222 +++++ cosmology.hh | 224 +++++ densities.cc | 837 +++++++++++++++++++ densities.hh | 827 +++++++++++++++++++ fd_schemes.hh | 408 +++++++++ general.hh | 121 +++ main.cc | 605 ++++++++++++++ mesh.hh | 1181 ++++++++++++++++++++++++++ mg_interp.hh | 1267 ++++++++++++++++++++++++++++ mg_operators.hh | 1362 ++++++++++++++++++++++++++++++ mg_solver.hh | 1051 +++++++++++++++++++++++ output.cc | 73 ++ output.hh | 176 ++++ plugins/HDF_IO.hh | 1024 +++++++++++++++++++++++ plugins/output_enzo.cc | 396 +++++++++ plugins/output_gadget2.cc | 670 +++++++++++++++ plugins/output_generic.cc | 192 +++++ plugins/output_grafic2.cc | 325 ++++++++ plugins/transfer_bbks.cc | 95 +++ plugins/transfer_camb.cc | 167 ++++ plugins/transfer_eisenstein.cc | 218 +++++ plugins/transfer_inflation.cc | 64 ++ poisson.cc | 499 +++++++++++ poisson.hh | 140 ++++ random.hh | 459 +++++++++++ schemes.hh | 275 +++++++ solver.hh | 1110 +++++++++++++++++++++++++ tests.hh | 309 +++++++ transfer_function.cc | 72 ++ transfer_function.hh | 1420 ++++++++++++++++++++++++++++++++ 37 files changed, 18552 insertions(+) create mode 100644 Makefile create mode 100644 Numerics.cc create mode 100644 Numerics.hh create mode 100644 TransferFunction.hh create mode 100644 config_file.hh create mode 100644 convolution_kernel.cc create mode 100644 convolution_kernel.hh create mode 100644 cosmology.cc create mode 100644 cosmology.hh create mode 100644 densities.cc create mode 100644 densities.hh create mode 100644 fd_schemes.hh create mode 100644 general.hh create mode 100644 main.cc create mode 100644 mesh.hh create mode 100644 mg_interp.hh create mode 100644 mg_operators.hh create mode 100644 mg_solver.hh create mode 100644 output.cc create mode 100644 output.hh create mode 100755 plugins/HDF_IO.hh create mode 100644 plugins/output_enzo.cc create mode 100644 plugins/output_gadget2.cc create mode 100644 plugins/output_generic.cc create mode 100644 plugins/output_grafic2.cc create mode 100644 plugins/transfer_bbks.cc create mode 100644 plugins/transfer_camb.cc create mode 100644 plugins/transfer_eisenstein.cc create mode 100644 plugins/transfer_inflation.cc create mode 100644 poisson.cc create mode 100644 poisson.hh create mode 100644 random.hh create mode 100644 schemes.hh create mode 100644 solver.hh create mode 100644 tests.hh create mode 100644 transfer_function.cc create mode 100644 transfer_function.hh diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..93a2de6 --- /dev/null +++ b/Makefile @@ -0,0 +1,61 @@ +############################################################################## +### compile time configuration options +MULTITHREADFFTW = yes +SINGLEPRECISION = no +HAVEHDF5 = yes + +############################################################################## +### compiler and path settings +CC = g++ +OPT = -O3 +CFLAGS = -Wall -fopenmp +LFLAGS = -fopenmp -lgsl -lgslcblas +CPATHS = -I. -I$(HOME)/local/include -I/opt/local/include -I/usr/local/include +LPATHS = -L$(HOME)/local/lib -L/opt/local/lib -L/usr/local/lib + +############################################################################## +# if you have FFTW 2.1.5 with multi-thread support, you can enable the option +ifeq ($(MULTITHREADFFTW), yes) + ifeq ($(SINGLEPRECISION), yes) + LFLAGS += -lsfftw_threads -lsrfftw_threads + else + LFLAGS += -ldfftw_threads -ldrfftw_threads + endif +else + CFLAGS += -DSINGLETHREAD_FFTW +endif + +############################################################################## +# this section makes sure that the correct FFTW libraries are linked +ifeq ($(SINGLEPRECISION), yes) + CFLAGS += -DSINGLE_PRECISION + LFLAGS += -lsrfftw -lsfftw +else + LFLAGS += -ldrfftw -ldfftw +endif + +############################################################################## +#if you have HDF5 installed, you can also enable the following options +ifeq ($(HAVEHDF5), yes) + OPT += -DH5_USE_16_API -DHAVE_HDF5 + LFLAGS += -lhdf5 +endif + +############################################################################## +CFLAGS += $(OPT) +TARGET = MUSIC +OBJS = output.o transfer_function.o Numerics.o \ + convolution_kernel.o densities.o cosmology.o main.o \ + $(patsubst plugins/%.cc,plugins/%.o,$(wildcard plugins/*.cc)) + +############################################################################## +all: $(OBJS) $(TARGET) + +$(TARGET): $(OBJS) + $(CC) $(LPATHS) -o $@ $^ $(LFLAGS) +%.o: %.cc *.hh Makefile + $(CC) $(CFLAGS) $(CPATHS) -c $< -o $@ + +clean: + rm -rf $(OBJS) + diff --git a/Numerics.cc b/Numerics.cc new file mode 100644 index 0000000..8692ce0 --- /dev/null +++ b/Numerics.cc @@ -0,0 +1,154 @@ +/* + + numerics.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#ifdef WITH_MPI + #ifdef MANNO + #include + #else + #include + #endif +#endif +#include +#include "Numerics.hh" + +#ifndef REL_PRECISION +#define REL_PRECISION 1.e-4 +#endif + +int Base_interp::locate(const double x) +{ + int ju,jm,jl; + if (n < 2 || mm < 2 || mm > n) throw("locate size error"); + bool ascnd=(xx[n-1] >= xx[0]); + jl=0; + ju=n-1; + while (ju-jl > 1) { + jm = (ju+jl) >> 1; + if ((x >= xx[jm]) == ascnd) + jl=jm; + else + ju=jm; + } + cor = abs(jl-jsav) > dj ? 0 : 1; + jsav = jl; + return std::max(0,std::min(n-mm,jl-((mm-2)>>1))); +} + +int Base_interp::hunt(const double x) +{ + int jl=jsav, jm, ju, inc=1; + if (n < 2 || mm < 2 || mm > n) throw("hunt size error"); + + bool ascnd=(xx[n-1] >= xx[0]); + + if (jl < 0 || jl > n-1) { + jl=0; + ju=n-1; + } else { + if ((x >= xx[jl]) == ascnd) { + for (;;) { + ju = jl + inc; + if (ju >= n-1) { + ju = n-1; + break; + } else if ((x < xx[ju]) == ascnd) break; + else { + jl = ju; + inc += inc; + } + } + } else { + ju = jl; + for (;;) { + jl = jl - inc; + if (jl <= 0) { jl = 0; break;} + else if ((x >= xx[jl]) == ascnd) break; + else { + ju = jl; + inc += inc; + } + } + } + } + + while (ju-jl > 1) { + jm = (ju+jl) >> 1; + if ((x >= xx[jm]) == ascnd) + jl=jm; + else + ju=jm; + + } + + cor = abs(jl-jsav) > dj ? 0 : 1; + jsav = jl; + + return std::max(0,std::min(n-mm,jl-((mm-2)>>1))); +} + +#if 1 +real_t integrate( double (* func) (double x, void * params), double a, double b, void *params ) +{ + gsl_function F; + F.function = func; + F.params = params;//NULL; + + double result; + double error; + //size_t neval; + gsl_integration_workspace *w = gsl_integration_workspace_alloc(100000); + gsl_integration_qag( &F, a, b, 0, REL_PRECISION, 100000, 6, w, &result, &error ); + //gsl_integration_qng( &F, a, b, 0, REL_PRECISION, &result, &error, &neval ); + + //gsl_integration_qags( &F, a, b, 0, REL_PRECISION, 10000, w, &result, &error ); + + gsl_integration_workspace_free(w); + + + if( error > 10*REL_PRECISION ) + std::cerr << " - Warning: no convergence in function 'integrate', rel. error=" << error/result << std::endl; + + return (real_t)result; +} +#else + +real_t integrate( double (* func) (double x, void * params), double a, double b, void *params ) +{ + unsigned nn = 1000; + double la = log10(a), lb = log10(b), dlk = (lb-la)/(nn-1); + + double sum = 0.0; + for( unsigned i=1; i. + + */ + +#ifndef __NUMERICS_HH +#define __NUMERICS_HH + +#ifdef WITH_MPI + #ifdef MANNO + #include + #else + #include + #endif +#endif + +#include +#include +#include +#include +#include "general.hh" + + +real_t integrate( double (* func) (double x, void * params), double a, double b, void *params=NULL); + +struct Base_interp +{ + int n, mm, jsav, cor, dj; + const double *xx, *yy; + + Base_interp(std::vector &x, const double *y, int m) + : n(x.size()), mm(m), jsav(0), cor(0), dj(0), xx(&x[0]), yy(y) + { + dj = std::min(1,(int)pow((double)n,0.25)); + } + + double interp(double x) { + int jlo = cor ? hunt(x) : locate(x); + return rawinterp(jlo,x); + } + + virtual ~Base_interp() + { } + + int locate(const double x); + int hunt(const double x); + + double virtual rawinterp(int jlo, double x) = 0; + +}; + + + + +class Spline_interp : public Base_interp +{ +protected: + std::vector y2; + + void sety2( const double *xv, const double *yv, double yp1, double ypn) + { + int i,k; + double p,qn,sig,un; + //int n=y2.size(); + std::vector u(n-1,0.0); + + if (yp1 > 0.99e99) + y2[0]=u[0]=0.0; + else { + y2[0] = -0.5; + u[0]=(3.0/(xv[1]-xv[0]))*((yv[1]-yv[0])/(xv[1]-xv[0])-yp1); + } + + for (i=1;i 0.99e99) + qn=un=0.0; + else { + qn=0.5; + un=(3.0/(xv[n-1]-xv[n-2]))*(ypn-(yv[n-1]-yv[n-2])/(xv[n-1]-xv[n-2])); + } + + y2[n-1]=(un-qn*u[n-2])/(qn*y2[n-2]+1.0); + + for (k=n-2;k>=0;k--) + y2[k]=y2[k]*y2[k+1]+u[k]; + } + + +public: + + Spline_interp( std::vector &xv, std::vector &yv, double yp1=1e99, double ypn=1.e99 ) + : Base_interp( xv, &yv[0], xv.size() ), y2( xv.size(), 0.0 ) + { + sety2( &xv[0], &yv[0], yp1, ypn ); + } + + virtual ~Spline_interp() + { } + + double rawinterp( int jl, double x ) + { + int klo=jl,khi=jl+1; + double y,h,b,a; + + h=xx[khi]-xx[klo]; + + if (h == 0.0) throw("Bad input to routine splint"); + + a=(xx[khi]-x)/h; + b=(x-xx[klo])/h; + y=a*yy[klo]+b*yy[khi]+((a*a*a-a)*y2[klo] +(b*b*b-b)*y2[khi])*(h*h)/6.0; + + return y; + } + +}; + + + +#if 1 +inline unsigned locate( const double x, const std::vector vx ) +{ + long unsigned ju,jm,jl; + bool ascnd=(vx[vx.size()-1]>=vx[0]); + jl = 0; + ju = vx.size()-1; + while( ju-jl > 1 ) { + jm = (ju+jl)>>1; + if( (x >= vx[jm]) == ascnd ) + jl = jm; + else + ju = jm; + } + return std::max((long unsigned)0,std::min((long unsigned)(vx.size()-2),(long unsigned)jl)); +} +#else +inline unsigned locate( const real_t x, const std::vector vx ) +{ + unsigned i=0; + while( vx[i]0)i=i-1; + return i; +} +#endif + + +inline real_t linint( const double x, const std::vector& xx, const std::vector& yy ) +{ + unsigned i = locate(x,xx); +/* if(i==xx.size()-1) //--i; + return 0.0; + if( x=xx[xx.size()-1] ) + return yy[yy.size()-1]; + double a = 1.0/(xx[i+1]-xx[i]); + double dy = (yy[i+1]-yy[i])*a; + double y0 = (yy[i]*xx[i+1]-xx[i]*yy[i+1])*a; + return dy*x+y0; +} +/* +inline float linint( float x, const std::vector& xx, const std::vector& yy ) +{ + + int i=0; + while( xx[i]0)i=i-1; + float dy = (yy[i+1]-yy[i])/(xx[i+1]-xx[i]); + float y0 = (yy[i]*xx[i+1]-xx[i]*yy[i+1])/(xx[i+1]-xx[i]); + return dy*x+y0; +} +*/ + + +#endif + + diff --git a/TransferFunction.hh b/TransferFunction.hh new file mode 100644 index 0000000..dc7ea52 --- /dev/null +++ b/TransferFunction.hh @@ -0,0 +1,1318 @@ +/* + This file is part of FROLIC - + a tool to generate initial conditions for cosmological simulations + + Copyright (C) 2008-09 Oliver Hahn, ojha@gmx.de + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef __TRANSFERFUNCTION_HH +#define __TRANSFERFUNCTION_HH + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "Numerics.hh" +#include "general.hh" + +#include + +#define NZERO_Q + +typedef std::complex complex; + +//! Abstract base class for transfer functions +/*! + This class implements a purely virtual interface that can be + used to derive instances implementing various transfer functions. +*/ +class TransferFunction{ +public: + Cosmology m_Cosmology; + +public: + + TransferFunction( Cosmology acosm ) : m_Cosmology( acosm ) { }; + virtual double compute( double k ) = 0; + virtual ~TransferFunction(){ }; + virtual double get_kmax( void ) = 0; + virtual double get_kmin( void ) = 0; +}; + +class TransferFunction_real +{ + +public: + gsl_interp_accel *accp, *accn; + gsl_spline *splinep, *splinen; + double Tr0_, Tmin_, Tmax_, Tscale_; + double rneg_, rneg2_; + static TransferFunction *ptf_; + static double nspec_; + +protected: + + double krgood( double mu, double q, double dlnr, double kr ) + { + double krnew = kr; + complex cdgamma, zm, zp; + double arg, iarg, xm, xp, y; + gsl_sf_result g_a, g_p; + + xp = 0.5*(mu+1.0+q); + xm = 0.5*(mu+1.0-q); + y = M_PI/(2.0*dlnr); + zp=complex(xp,y); + zm=complex(xm,y); + + gsl_sf_lngamma_complex_e (zp.real(), zp.imag(), &g_a, &g_p); + zp=std::polar(exp(g_a.val),g_p.val); + double zpa = g_p.val; + + gsl_sf_lngamma_complex_e (zm.real(), zm.imag(), &g_a, &g_p); + zm=std::polar(exp(g_a.val),g_p.val); + double zma = g_p.val; + + arg=log(2.0/kr)/dlnr+(zpa+zma)/M_PI; + iarg=(double)((int)(arg + 0.5)); + + if( arg!=iarg ) + krnew=kr*exp((arg-iarg)*dlnr); + + return krnew; + } + + void transform( double pnorm, double dplus, unsigned N, double q, std::vector& rr, std::vector& TT ) + { + const double mu = 0.5; + double qmin = 1.0e-6, qmax = 1.0e+6; + + q = 0.0; + + N = 16384; + +#ifdef NZERO_Q + //q = 0.4; + q = 0.2; +#endif + + double kmin = qmin, kmax=qmax; + double rmin = qmin, rmax = qmax; + double k0 = exp(0.5*(log(kmax)+log(kmin))); + double r0 = exp(0.5*(log(rmax)+log(rmin))); + double L = log(rmax)-log(rmin); + double k0r0 = k0*r0; + double dlnk = L/N, dlnr = L/N; + + double sqrtpnorm = sqrt(pnorm); + + double dir = 1.0; + + double fftnorm = 1.0/N; + + fftw_complex in[N], out[N]; + fftw_plan p,ip; + + //... perform anti-ringing correction from Hamilton (2000) + k0r0 = krgood( mu, q, dlnr, k0r0 ); + + std::ofstream ofsk("transfer_k.txt"); + double sum_in = 0.0; + for( unsigned i=0; icompute( k )*pow(k,0.5*nspec_)*pow(k,1.5-q); + in[i].im = 0.0; + + sum_in += in[i].re; + ofsk << std::setw(16) << k < (int)N/2 ) + ii -= N; + +#ifndef NZERO_Q + double y=ii*M_PI/L; + complex zp((mu+1.0)*0.5,y); + gsl_sf_result g_a, g_p; + gsl_sf_lngamma_complex_e(zp.real(), zp.imag(), &g_a, &g_p); + + double arg = 2.0*(log(2.0/k0r0)*y+g_p.val); + complex cu = complex(out[i].re,out[i].im)*std::polar(1.0,arg); + out[i].re = cu.real()*fftnorm; + out[i].im = cu.imag()*fftnorm; + +#else + //complex x(dir*q, (double)ii*2.0*M_PI/L); + complex x(dir*q, (double)ii*2.0*M_PI/L); + gsl_sf_result g_a, g_p; + + complex g1, g2, garg, U, phase; + complex twotox = pow(complex(2.0,0.0),x); + + ///////////////////////////////////////////////////////// + //.. evaluate complex Gamma functions + + garg = 0.5*(mu+1.0+x); + gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p); + g1 = std::polar(exp(g_a.val),g_p.val); + + + garg = 0.5*(mu+1.0-x); + gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p); + g2 = std::polar(exp(g_a.val),g_p.val); + + ///////////////////////////////////////////////////////// + //.. compute U + + if( (fabs(g2.real()) < 1e-19 && fabs(g2.imag()) < 1e-19) ) + { + //std::cerr << "Warning : encountered possible singularity in TransferFunction_real::transform!\n"; + g1 = 1.0; g2 = 1.0; + } + + + U = twotox * g1 / g2; + phase = pow(complex(k0r0,0.0),complex(0.0,2.0*M_PI*(double)ii/L)); + + complex cu = complex(out[i].re,out[i].im)*U*phase*fftnorm; + + out[i].re = cu.real(); + out[i].im = cu.imag(); + + if( (out[i].re != out[i].re)||(out[i].im != out[i].im) ) + { std::cerr << "NaN @ i=" << i << ", U= " << U << ", phase = " << phase << ", g1 = " << g1 << ", g2 = " << g2 << std::endl; + std::cerr << "mu+1+q = " << mu+1.0+q << std::endl; + //break; + } + +#endif + + } + + /*out[N/2].im = 0.0; + out[N/2+1].im = 0.0; + out[N/2+1].re = out[N/2].re; + out[N/2].im = 0.0;*/ + + fftw_one(ip, out, in); + + rr.assign(N,0.0); + TT.assign(N,0.0); + + r0 = k0r0/k0; + + for( unsigned i=0; iN/2) + // ii-=N; + + + + double r = r0*exp(-ii*dlnr); + rr[N-i-1] = r; + TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) * in[i].re*pow(r,-(1.5+q)); + + //TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) * in[i].re*exp( -dir*(q+1.5)*ii*dlnr +q*log(k0r0))/r0; + + //rr[i] = r; + //TT[i] = 4.0*M_PI* sqrt(M_PI/2.0) * in[i].re*pow(r,-(1.5+q)); + + } + + + { + std::ofstream ofs("transfer_real_new.txt"); + for( unsigned i=0; i r,T,xp,yp,xn,yn; + + transform( pnorm, dplus, nr, q, r, T ); + + //... determine r=0 zero component by integrating up to the Nyquist frequency + gsl_integration_workspace * wp; + gsl_function F; + wp = gsl_integration_workspace_alloc(20000); + F.function = &call_wrapper; + double par[2]; par[0] = dplus*sqrt(pnorm); //par[1] = M_PI/kny; + F.params = (void*)par; + double error; + + //#warning factor of sqrt(1.5) needs to be adjusted for non-equilateral boxes + //.. need a factor sqrt( 2*kny^2_x + 2*kny^2_y + 2*kny^2_z )/2 = sqrt(3/2)kny (in equilateral case) + gsl_integration_qag (&F, 0.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); + //Tr0_ = 0.0; + gsl_integration_workspace_free(wp); + + + for( unsigned i=0; i 0.0 ) + { + xp.push_back( 2.0*log10(r[i]) ); + yp.push_back( log10(T[i]) ); + rneg_ = r[i]; + rneg2_ = rneg_*rneg_; + }else { + xn.push_back( 2.0*log10(r[i]) ); + yn.push_back( log10(-T[i]) ); + }*/ + + + if( r[i] > rmin && r[i] < rmax ) + { + xp.push_back( 2.0*log10(r[i]) ); + yp.push_back( log10(fabs(T[i])) ); + xn.push_back( 2.0*log10(r[i]) ); + if( T[i] >= 0.0 ) + yn.push_back( 1.0 ); + else + yn.push_back( -1.0 ); + + + //ofs << std::setw(16) << xp.back() << std::setw(16) << yp.back() << std::endl; + } + + } + + + + + + accp = gsl_interp_accel_alloc (); + accn = gsl_interp_accel_alloc (); + + //... spline interpolation is only marginally slower here + splinep = gsl_spline_alloc (gsl_interp_cspline, xp.size() ); + splinen = gsl_spline_alloc (gsl_interp_cspline, xn.size() ); + + //... set up everything for spline interpolation + gsl_spline_init (splinep, &xp[0], &yp[0], xp.size() ); + gsl_spline_init (splinen, &xn[0], &yn[0], xn.size() ); + + + + + { + double dlogr = (log10(rmax)-log10(rmin))/100; + std::ofstream ofs("transfer_splinep.txt"); + + for( int i=0; i< 100; ++i ) + { + double r = rmin*pow(10.0,i*dlogr); + ofs << std::setw(16) << r << std::setw(16) << compute_real(r*r) << std::endl; + } + } + + } + + static double call_wrapper( double k, void *arg ) + { + double *a = (double*)arg; + return 4.0*M_PI*a[0]*ptf_->compute( k )*pow(k,0.5*nspec_)*k*k; + } + + ~TransferFunction_real() + { + gsl_spline_free (splinen); + gsl_interp_accel_free (accn); + gsl_spline_free (splinep); + gsl_interp_accel_free (accp); + + } + + inline double compute_real( double r2 ) const + { + const double EPS = 1e-8; + const double Reps2 = EPS*EPS; + + if( r2 x(nr+1,0.0),y(nr+1,0.0); + + + /*std::ofstream ofs("transfer_fourier.txt"); + for( int i=0; i<1000; ++i ) + { + double k=1.e-3*pow(10.0,i*6.0/1000.0); + ofs << k << "\t\t" << call_wrapper(k, NULL) << std::endl; + }*/ + + + double fac = sqrt(pnorm)*dplus; + + std::ofstream ofs("transfer_real_old.txt"); + + for( unsigned i=0; i> k; + ss >> Tk; + + m_tab_k.push_back( k ); + m_tab_Tk.push_back( Tk ); + + } +#ifdef WITH_MPI + } + + unsigned n=m_tab_k.size(); + MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 ); + + if( MPI::COMM_WORLD.Get_rank() > 0 ){ + m_tab_k.assign(n,0); + m_tab_Tk.assign(n,0); + } + + MPI::COMM_WORLD.Bcast( &m_tab_k[0], n, MPI_DOUBLE, 0 ); + MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_DOUBLE, 0 ); +#endif + + } + +public: + TransferFunction_tab( Cosmology aCosm, std::string filename ) + : TransferFunction( aCosm ), m_filename( filename ) + { + read_table(); + } + + virtual inline double compute( double k ){ + + return linint( k, m_tab_k, m_tab_Tk ); + } + + inline double get_kmin( void ){ + return m_tab_k[1]; + } + + inline double get_kmax( void ){ + return m_tab_k[m_tab_k.size()-2]; + } + +}; + +class TransferFunction_CAMB : public TransferFunction{ +public: + enum TFtype{ + TF_baryon, TF_cdm, TF_total + }; + + +private: + //Cosmology m_Cosmology; + + std::string m_filename_Pk, m_filename_Tk; + std::vector m_tab_k, m_tab_Tk; + + Spline_interp *m_psinterp; + gsl_interp_accel *acc; + gsl_spline *spline; + + + + void read_table( TFtype iwhich ){ +#ifdef WITH_MPI + if( MPI::COMM_WORLD.Get_rank() == 0 ){ +#endif + std::cerr + << " - reading tabulated transfer function data from file \n" + << " \'" << m_filename_Tk << "\'\n"; + + std::string line; + std::ifstream ifs( m_filename_Tk.c_str() ); + + m_tab_k.clear(); + m_tab_Tk.clear(); + + while( !ifs.eof() ){ + getline(ifs,line); + + if(ifs.eof()) break; + + std::stringstream ss(line); + + double k, Tkc, Tkb, Tkg, Tkr, Tknu, Tktot; + ss >> k; + ss >> Tkc; + ss >> Tkb; + ss >> Tkg; + ss >> Tkr; + ss >> Tknu; + ss >> Tktot; + + m_tab_k.push_back( log10(k) ); + + switch( iwhich ){ + case TF_total: + m_tab_Tk.push_back( log10(Tktot) ); + break; + case TF_baryon: + m_tab_Tk.push_back( log10(Tkb) ); + break; + case TF_cdm: + m_tab_Tk.push_back( log10(Tkc) ); + break; + } + //m_tab_Tk_cdm.push_back( Tkc ); + //m_tab_Tk_baryon.push_back( Tkb ); + //m_tab_Tk_tot.push_back( Tktot ); + } + + ifs.close(); + + + + +#ifdef WITH_MPI + } + + unsigned n=m_tab_k.size(); + MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 ); + + if( MPI::COMM_WORLD.Get_rank() > 0 ){ + m_tab_k.assign(n,0); + m_tab_Tk.assign(n,0); + } + + MPI::COMM_WORLD.Bcast( &m_tab_k[0], n, MPI_DOUBLE, 0 ); + MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_DOUBLE, 0 ); +#endif + + } + +public: + TransferFunction_CAMB( Cosmology aCosm, std::string filename_Tk, TFtype iwhich ) + : TransferFunction( aCosm ), m_filename_Tk( filename_Tk ), m_psinterp( NULL ) + { + read_table( iwhich ); + + //m_psinterp = new Spline_interp( m_tab_k, m_tab_Tk ); + + acc = gsl_interp_accel_alloc(); + //spline = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() ); + spline = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() ); + + gsl_spline_init (spline, &m_tab_k[0], &m_tab_Tk[0], m_tab_k.size() ); + + } + + virtual ~TransferFunction_CAMB() + { + //delete m_psinterp; + + gsl_spline_free (spline); + gsl_interp_accel_free (acc); + } + + virtual inline double compute( double k ){ + //double Tkc = linint( k, m_tab_k, m_tab_Tk_cdm ); + //double Tkb = linint( k, m_tab_k, m_tab_Tk_baryon ); + + //return pow(10.0, linint( log10(k), m_tab_k, m_tab_Tk )); + + //return pow(10.0, m_psinterp->interp(log10(k)) ); + + //if( k0 */ + growth_to_z0, /* D_1(z)/D_1(0) -- the growth relative to z=0 */ + hhubble, /* Need to pass Hubble constant to TFmdm_onek_hmpc() */ + k_equality, /* The comoving wave number of the horizon at equality*/ + obhh, /* Omega_baryon * hubble^2 */ + omega_curv, /* = 1 - omega_matter - omega_lambda */ + omega_lambda_z, /* Omega_lambda at the given redshift */ + omega_matter_z, /* Omega_matter at the given redshift */ + omhh, /* Omega_matter * hubble^2 */ + onhh, /* Omega_hdm * hubble^2 */ + p_c, /* The correction to the exponent before drag epoch */ + p_cb, /* The correction to the exponent after drag epoch */ + sound_horizon_fit, /* The sound horizon at the drag epoch */ + theta_cmb, /* The temperature of the CMB, in units of 2.7 K */ + y_drag, /* Ratio of z_equality to z_drag */ + z_drag, /* Redshift of the drag epoch */ + z_equality; /* Redshift of matter-radiation equality */ + + /* The following are set in TFmdm_onek_mpc() */ + float gamma_eff, /* Effective \Gamma */ + growth_cb, /* Growth factor for CDM+Baryon perturbations */ + growth_cbnu, /* Growth factor for CDM+Baryon+Neutrino pert. */ + max_fs_correction, /* Correction near maximal free streaming */ + qq, /* Wavenumber rescaled by \Gamma */ + qq_eff, /* Wavenumber rescaled by effective Gamma */ + qq_nu, /* Wavenumber compared to maximal free streaming */ + tf_master, /* Master TF */ + tf_sup, /* Suppressed TF */ + y_freestream; /* The epoch of free-streaming for a given scale */ + + /* Finally, TFmdm_onek_mpc() and TFmdm_onek_hmpc() give their answers as */ + float tf_cb, /* The transfer function for density-weighted + CDM + Baryon perturbations. */ + tf_cbnu; /* The transfer function for density-weighted + CDM + Baryon + Massive Neutrino perturbations. */ + + /* By default, these functions return tf_cb */ + + /* ------------------------- TFmdm_set_cosm() ------------------------ */ + int TFmdm_set_cosm(float omega_matter, float omega_baryon, float omega_hdm, + int degen_hdm, float omega_lambda, float hubble, float redshift) + /* This routine takes cosmological parameters and a redshift and sets up + all the internal scalar quantities needed to compute the transfer function. */ + /* INPUT: omega_matter -- Density of CDM, baryons, and massive neutrinos, + in units of the critical density. */ + /* omega_baryon -- Density of baryons, in units of critical. */ + /* omega_hdm -- Density of massive neutrinos, in units of critical */ + /* degen_hdm -- (Int) Number of degenerate massive neutrino species */ + /* omega_lambda -- Cosmological constant */ + /* hubble -- Hubble constant, in units of 100 km/s/Mpc */ + /* redshift -- The redshift at which to evaluate */ + /* OUTPUT: Returns 0 if all is well, 1 if a warning was issued. Otherwise, + sets many global variables for use in TFmdm_onek_mpc() */ + { + float z_drag_b1, z_drag_b2, omega_denom; + int qwarn; + qwarn = 0; + + theta_cmb = 2.728/2.7; /* Assuming T_cmb = 2.728 K */ + + /* Look for strange input */ + if (omega_baryon<0.0) { + fprintf(stderr, + "TFmdm_set_cosm(): Negative omega_baryon set to trace amount.\n"); + qwarn = 1; + } + if (omega_hdm<0.0) { + fprintf(stderr, + "TFmdm_set_cosm(): Negative omega_hdm set to trace amount.\n"); + qwarn = 1; + } + if (hubble<=0.0) { + fprintf(stderr,"TFmdm_set_cosm(): Negative Hubble constant illegal.\n"); + exit(1); /* Can't recover */ + } else if (hubble>2.0) { + fprintf(stderr,"TFmdm_set_cosm(): Hubble constant should be in units of 100 km/s/Mpc.\n"); + qwarn = 1; + } + if (redshift<=-1.0) { + fprintf(stderr,"TFmdm_set_cosm(): Redshift < -1 is illegal.\n"); + exit(1); + } else if (redshift>99.0) { + fprintf(stderr, + "TFmdm_set_cosm(): Large redshift entered. TF may be inaccurate.\n"); + qwarn = 1; + } + if (degen_hdm<1) degen_hdm=1; + num_degen_hdm = (float) degen_hdm; + /* Have to save this for TFmdm_onek_mpc() */ + /* This routine would crash if baryons or neutrinos were zero, + so don't allow that */ + if (omega_baryon<=0) omega_baryon=1e-5; + if (omega_hdm<=0) omega_hdm=1e-5; + + omega_curv = 1.0-omega_matter-omega_lambda; + omhh = omega_matter*SQR(hubble); + obhh = omega_baryon*SQR(hubble); + onhh = omega_hdm*SQR(hubble); + f_baryon = omega_baryon/omega_matter; + f_hdm = omega_hdm/omega_matter; + f_cdm = 1.0-f_baryon-f_hdm; + f_cb = f_cdm+f_baryon; + f_bnu = f_baryon+f_hdm; + + /* Compute the equality scale. */ + z_equality = 25000.0*omhh/SQR(SQR(theta_cmb)); /* Actually 1+z_eq */ + k_equality = 0.0746*omhh/SQR(theta_cmb); + + /* Compute the drag epoch and sound horizon */ + z_drag_b1 = 0.313*pow(omhh,-0.419)*(1+0.607*pow(omhh,0.674)); + z_drag_b2 = 0.238*pow(omhh,0.223); + z_drag = 1291*pow(omhh,0.251)/(1.0+0.659*pow(omhh,0.828))* + (1.0+z_drag_b1*pow(obhh,z_drag_b2)); + y_drag = z_equality/(1.0+z_drag); + + sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1.0+10.0*pow(obhh,0.75)); + + /* Set up for the free-streaming & infall growth function */ + p_c = 0.25*(5.0-sqrt(1+24.0*f_cdm)); + p_cb = 0.25*(5.0-sqrt(1+24.0*f_cb)); + + omega_denom = omega_lambda+SQR(1.0+redshift)*(omega_curv+ + omega_matter*(1.0+redshift)); + omega_lambda_z = omega_lambda/omega_denom; + omega_matter_z = omega_matter*SQR(1.0+redshift)*(1.0+redshift)/omega_denom; + growth_k0 = z_equality/(1.0+redshift)*2.5*omega_matter_z/ + (pow(omega_matter_z,4.0/7.0)-omega_lambda_z+ + (1.0+omega_matter_z/2.0)*(1.0+omega_lambda_z/70.0)); + growth_to_z0 = z_equality*2.5*omega_matter/(pow(omega_matter,4.0/7.0) + -omega_lambda + (1.0+omega_matter/2.0)*(1.0+omega_lambda/70.0)); + growth_to_z0 = growth_k0/growth_to_z0; + + /* Compute small-scale suppression */ + alpha_nu = f_cdm/f_cb*(5.0-2.*(p_c+p_cb))/(5.-4.*p_cb)* + pow(1+y_drag,p_cb-p_c)* + (1+f_bnu*(-0.553+0.126*f_bnu*f_bnu))/ + (1-0.193*sqrt(f_hdm*num_degen_hdm)+0.169*f_hdm*pow(num_degen_hdm,0.2))* + (1+(p_c-p_cb)/2*(1+1/(3.-4.*p_c)/(7.-4.*p_cb))/(1+y_drag)); + alpha_gamma = sqrt(alpha_nu); + beta_c = 1/(1-0.949*f_bnu); + /* Done setting scalar variables */ + hhubble = hubble; /* Need to pass Hubble constant to TFmdm_onek_hmpc() */ + return qwarn; + } + + /* ---------------------------- TFmdm_onek_mpc() ---------------------- */ + + float TFmdm_onek_mpc(float kk) + /* Given a wavenumber in Mpc^-1, return the transfer function for the + cosmology held in the global variables. */ + /* Input: kk -- Wavenumber in Mpc^-1 */ + /* Output: The following are set as global variables: + growth_cb -- the transfer function for density-weighted + CDM + Baryon perturbations. + growth_cbnu -- the transfer function for density-weighted + CDM + Baryon + Massive Neutrino perturbations. */ + /* The function returns growth_cb */ + { + float tf_sup_L, tf_sup_C; + float temp1, temp2; + + qq = kk/omhh*SQR(theta_cmb); + + /* Compute the scale-dependent growth functions */ + y_freestream = 17.2*f_hdm*(1+0.488*pow(f_hdm,-7.0/6.0))* + SQR(num_degen_hdm*qq/f_hdm); + temp1 = pow(growth_k0, 1.0-p_cb); + temp2 = pow(growth_k0/(1+y_freestream),0.7); + growth_cb = pow(1.0+temp2, p_cb/0.7)*temp1; + growth_cbnu = pow(pow(f_cb,0.7/p_cb)+temp2, p_cb/0.7)*temp1; + + /* Compute the master function */ + gamma_eff =omhh*(alpha_gamma+(1-alpha_gamma)/ + (1+SQR(SQR(kk*sound_horizon_fit*0.43)))); + qq_eff = qq*omhh/gamma_eff; + + tf_sup_L = log(2.71828+1.84*beta_c*alpha_gamma*qq_eff); + tf_sup_C = 14.4+325/(1+60.5*pow(qq_eff,1.11)); + tf_sup = tf_sup_L/(tf_sup_L+tf_sup_C*SQR(qq_eff)); + + qq_nu = 3.92*qq*sqrt(num_degen_hdm/f_hdm); + max_fs_correction = 1+1.2*pow(f_hdm,0.64)*pow(num_degen_hdm,0.3+0.6*f_hdm)/ + (pow(qq_nu,-1.6)+pow(qq_nu,0.8)); + tf_master = tf_sup*max_fs_correction; + + /* Now compute the CDM+HDM+baryon transfer functions */ + tf_cb = tf_master*growth_cb/growth_k0; + tf_cbnu = tf_master*growth_cbnu/growth_k0; + return tf_cb; + } + + /* ---------------------------- TFmdm_onek_hmpc() ---------------------- */ + + float TFmdm_onek_hmpc(float kk) + /* Given a wavenumber in h Mpc^-1, return the transfer function for the + cosmology held in the global variables. */ + /* Input: kk -- Wavenumber in h Mpc^-1 */ + /* Output: The following are set as global variables: + growth_cb -- the transfer function for density-weighted + CDM + Baryon perturbations. + growth_cbnu -- the transfer function for density-weighted + CDM + Baryon + Massive Neutrino perturbations. */ + /* The function returns growth_cb */ + { + return TFmdm_onek_mpc(kk*hhubble); + } + + + + TransferFunction_EisensteinNeutrino( Cosmology aCosm, double Omega_HDM, int degen_HDM, double Tcmb = 2.726 ) + : TransferFunction(aCosm)//, m_h0( aCosm.H0*0.01 ) + { + TFmdm_set_cosm( aCosm.Omega_m, aCosm.Omega_b, Omega_HDM, degen_HDM, aCosm.Omega_L, aCosm.H0, aCosm.astart ); + + } + + //! Computes the transfer function for k in Mpc/h by calling TFfit_onek + virtual inline double compute( double k ){ + return TFmdm_onek_hmpc( k ); + + + /*double tfb, tfcdm, fb, fc; //, tfull + TFfit_onek( k*m_h0, &tfb, &tfcdm ); + + fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m); + fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ; + + return fb*tfb+fc*tfcdm;*/ + //return 1.0; + } + + +}; + +#endif diff --git a/config_file.hh b/config_file.hh new file mode 100644 index 0000000..69dad12 --- /dev/null +++ b/config_file.hh @@ -0,0 +1,327 @@ +/* + + config_file.hh - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#ifndef __CONFIG_FILE_HH +#define __CONFIG_FILE_HH + +#include +#include +#include +#include +#include +#include +#include +#include + +/*! + * @class config_file + * @brief provides read/write access to configuration options + * + * This class provides access to the configuration file. The + * configuration is stored in hash-pairs and can be queried and + * validated by the responsible class/routine + */ +class config_file { + + //! current line number + unsigned m_iLine; + + //! hash table for key/value pairs, stored as strings + std::map m_Items; + +public: + + //! removes all white space from string source + /*! + * @param source the string to be trimmed + * @param delims a string of delimiting characters + * @return trimmed string + */ + std::string trim(std::string const& source, char const* delims = " \t\r\n") const{ + std::string result(source); + //... skip initial whitespace ... + std::string::size_type index = result.find_last_not_of(delims); + if(index != std::string::npos) + result.erase(++index); + //... find beginning of trailing whitespace ... + index = result.find_first_not_of(delims); + //... remove trailing whitespace ... + if(index != std::string::npos) + result.erase(0, index); + else + result.erase(); + return result; + } + + //! converts between different variable types + /*! + * The main purpose of this function is to parse and convert + * a string argument into numbers, booleans, etc... + * @param ival the input value (typically a std::string) + * @param oval the interpreted/converted value + */ + template + void convert( const in_value & ival, out_value & oval) const + { + std::stringstream ss; + ss << ival; //.. insert value into stream + ss >> oval; //.. retrieve value from stream + + if (! ss.eof()) { + //.. conversion error + std::cerr << "Error: conversion of \'" << ival << "\' failed." << std::endl; + throw ErrInvalidConversion(std::string("invalid conversion to ")+typeid(out_value).name()+'.'); + } + } + + //! constructor of class config_file + /*! @param FileName the path/name of the configuration file to be parsed + */ + config_file( std::string const& FileName ) + : m_iLine(0), m_Items() + { + std::ifstream file(FileName.c_str()); + + if( !file.is_open() ) + throw std::runtime_error(std::string("Error: Could not open config file \'")+FileName+std::string("\'")); + + std::string line; + std::string name; + std::string value; + std::string inSection; + int posEqual; + m_iLine=0; + //.. walk through all lines .. + while (std::getline(file,line)) { + ++m_iLine; + //.. encounterd EOL ? + if (! line.length()) continue; + + //.. encountered comment ? + unsigned long idx; + if( (idx=line.find_first_of("#;%")) != std::string::npos ) + line.erase(idx); + + //.. encountered section tag ? + if (line[0] == '[') { + inSection=trim(line.substr(1,line.find(']')-1)); + continue; + } + + //.. seek end of entry name .. + posEqual=line.find('='); + name = trim(line.substr(0,posEqual)); + value = trim(line.substr(posEqual+1)); + + //.. add key/value pair to hash table .. + m_Items[inSection+'/'+name] = value; + } + } + + //! inserts a key/value pair in the hash map + /*! @param key the key value, usually "section/key" + * @param value the value of the key, also a string + */ + void insertValue( std::string const& key, std::string const& value ) + { + m_Items[key] = value; + } + + //! inserts a key/value pair in the hash map + /*! @param section section name. values are stored under "section/key" + * @param key the key value usually "section/key" + * @param value the value of the key, also a string + */ + void insertValue( std::string const& section, std::string const& key, std::string const& value ) + { + m_Items[section+'/'+key] = value; + } + + //! checks if a key is part of the hash map + /*! @param section the section name of the key + * @param key the key name to be checked + * @return true if the key is present, false otherwise + */ + bool containsKey( std::string const& section, std::string const& key ) + { + std::map::const_iterator i = m_Items.find(section+'/'+key); + if ( i == m_Items.end() ) + return false; + return true; + } + + //! checks if a key is part of the hash map + /*! @param key the key name to be checked + * @return true if the key is present, false otherwise + */ + bool containsKey( std::string const& key ) + { + std::map::const_iterator i = m_Items.find(key); + if ( i == m_Items.end() ) + return false; + return true; + } + + + //! return value of a key + /*! returns the value of a given key, throws a ErrItemNotFound + * exception if the key is not available in the hash map. + * @param key the key name + * @return the value of the key + * @sa ErrItemNotFound + */ + template T getValue( std::string const& key ) const{ + return getValue( "", key ); + } + + //! return value of a key + /*! returns the value of a given key, throws a ErrItemNotFound + * exception if the key is not available in the hash map. + * @param section the section name for the key + * @param key the key name + * @return the value of the key + * @sa ErrItemNotFound + */ + template T getValue( std::string const& section, std::string const& key ) const + { + T r; + std::map::const_iterator i = m_Items.find(section + '/' + key); + if ( i == m_Items.end() ) + throw ErrItemNotFound('\'' + section + '/' + key + std::string("\' not found.")); + + convert(i->second,r); + return r; + } + + //! exception safe version of getValue + /*! returns the value of a given key, returns a default value rather + * than a ErrItemNotFound exception if the key is not found. + * @param section the section name for the key + * @param key the key name + * @param default_value the value that is returned if the key is not found + * @return the key value (if key found) otherwise default_value + */ + template T getValueSafe( std::string const& section, std::string const& key, T default_value ) const + { + T r; + try{ + r = getValue( section, key ); + } catch( ErrItemNotFound ) { + r = default_value; + } + return r; + } + + + //! exception safe version of getValue + /*! returns the value of a given key, returns a default value rather + * than a ErrItemNotFound exception if the key is not found. + * @param key the key name + * @param default_value the value that is returned if the key is not found + * @return the key value (if key found) otherwise default_value + */ + template T getValueSafe( std::string const& key, T default_value ) const + { + return getValueSafe( "", key, default_value ); + } + + + //! dumps all key-value pairs to a std::ostream + void dump( std::ostream& out ) + { + std::map::const_iterator i = m_Items.begin(); + while( i!=m_Items.end() ) + { + if( i->second.length() > 0 ) + out << std::setw(24) << std::left << i->first << " = " << i->second << std::endl; + ++i; + } + } + + //--- EXCEPTIONS --- + + //! runtime error that is thrown if key is not found in getValue + class ErrItemNotFound : public std::runtime_error{ + public: + ErrItemNotFound( std::string itemname ) + : std::runtime_error( itemname.c_str() ) + {} + }; + + //! runtime error that is thrown if type conversion fails + class ErrInvalidConversion : public std::runtime_error{ + public: + ErrInvalidConversion( std::string errmsg ) + : std::runtime_error( errmsg ) + {} + }; + + + class ErrIllegalIdentifier : public std::runtime_error{ + public: + ErrIllegalIdentifier( std::string errmsg ) + : std::runtime_error( errmsg ) + {} + }; + +}; + +//==== below are template specialisations =======================================// + +//... Function: getValue( strSection, strEntry ) ... +//... Descript: specialization of getValue for type boolean to interpret strings ... +//... like "true" and "false" etc. +//... converts the string to type bool, returns type bool ... +template<> +inline bool config_file::getValue( std::string const& strSection, std::string const& strEntry ) const{ + std::string r1 = getValue( strSection, strEntry ); + if( r1=="true" || r1=="yes" || r1=="on" || r1=="1" ) + return true; + if( r1=="false" || r1=="no" || r1=="off" || r1=="0" ) + return false; + throw ErrIllegalIdentifier(std::string("Illegal identifier \'")+r1+std::string("\' in \'")+strEntry+std::string("\'.")); + //return false; + } + +template<> +inline bool config_file::getValueSafe( std::string const& strSection, std::string const& strEntry, bool defaultValue ) const{ + std::string r1; + try{ + r1 = getValue( strSection, strEntry ); + if( r1=="true" || r1=="yes" || r1=="on" || r1=="1" ) + return true; + if( r1=="false" || r1=="no" || r1=="off" || r1=="0" ) + return false; + } catch( ErrItemNotFound ) { + return defaultValue; + } + return defaultValue; +} + +template<> +inline void config_file::convert( const std::string & ival, std::string & oval) const +{ + oval = ival; +} + +#endif //__CONFIG_FILE_HH diff --git a/convolution_kernel.cc b/convolution_kernel.cc new file mode 100644 index 0000000..b96ddd1 --- /dev/null +++ b/convolution_kernel.cc @@ -0,0 +1,583 @@ +/* + + convolution_kernel.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#ifdef SINGLE_PRECISION + #ifdef SINGLETHREAD_FFTW + #include + #else + #include + #endif +#else + #ifdef SINGLETHREAD_FFTW + #include + #else + #include + #endif +#endif + + +#include "densities.hh" +#include "convolution_kernel.hh" + + + +namespace convolution{ + + std::map< std::string, kernel_creator *>& + get_kernel_map() + { + static std::map< std::string, kernel_creator* > kernel_map; + return kernel_map; + } + + template< typename real_t > + void perform( kernel * pk, void *pd ) + { + parameters cparam_ = pk->cparam_; + double fftnorm = pow(2.0*M_PI,1.5)/sqrt(cparam_.lx*cparam_.ly*cparam_.lz)/sqrt((double)(cparam_.nx*cparam_.ny*cparam_.nz)); + + fftw_complex *cdata,*ckernel; + fftw_real *data; + + data = reinterpret_cast(pd); + cdata = reinterpret_cast(data); + ckernel = reinterpret_cast( pk->get_ptr() ); + + rfftwnd_plan iplan, plan; + + plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz, + FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE); + + iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz, + FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); + + + #ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL ); + #else + rfftwnd_one_real_to_complex( plan, data, NULL ); + #endif + + #pragma omp parallel for + for( int i=0; i + void perform_filtered( kernel * pk, void *pd ) + { + parameters cparam_ = pk->cparam_; + double + kny, kmax, + fftnorm = pow(2.0*M_PI,1.5)/sqrt(cparam_.lx*cparam_.ly*cparam_.lz)/sqrt((double)(cparam_.nx*cparam_.ny*cparam_.nz)); + + fftw_complex *cdata,*ckernel; + fftw_real *data; + + kny = cparam_.nx*M_PI/cparam_.lx; + kmax = M_PI/2.0; + + data = reinterpret_cast(pd); + cdata = reinterpret_cast(data); + ckernel = reinterpret_cast( pk->get_ptr() ); + + rfftwnd_plan iplan, plan; + + plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz, + FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE); + + iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz, + FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); + + + #ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL ); + #else + rfftwnd_one_real_to_complex( plan, data, NULL ); + #endif + + //double kmax2 = kmax*kmax; + + #pragma omp parallel for + for( int i=0; i cparam_.nx/2 ) + ik -= cparam_.nx; + if( jk > cparam_.ny/2 ) + jk -= cparam_.ny; + + double + kx( M_PI*(double)ik/cparam_.nx ), + ky( M_PI*(double)jk/cparam_.ny ), + kz( M_PI*(double)k/cparam_.nz );//, + // kk( kx*kx+ky*ky+kz*kz ); + + //... cos(k) is the Hanning filter function (cf. Bertschinger 2001) + double filter = 0.0; + if( true ){//kk <= kmax2 ){ + //filter = cos( kx )*cos( ky )*cos( kz ); + filter = cos( 0.5*kx )*cos( 0.5*ky )*cos( 0.5*kz ); + //filter = 1.0; + //filter *=filter; + } + //filter = 1.0; + + ccdata = ccdata * cckernel *fftnorm * filter; + + cdata[ii].re = ccdata.real(); + cdata[ii].im = ccdata.imag(); + } + + #ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL); + #else + rfftwnd_one_complex_to_real(iplan, cdata, NULL); + #endif + + rfftwnd_destroy_plan(plan); + rfftwnd_destroy_plan(iplan); + } + + + template void perform( kernel* pk, void *pd ); + template void perform( kernel* pk, void *pd ); + template void perform_filtered( kernel* pk, void *pd ); + template void perform_filtered( kernel* pk, void *pd ); + + + void truncate( kernel* pk, double R, double alpha ) + { + parameters cparam_ = pk->cparam_; + + double + dx = cparam_.lx/cparam_.nx, + dy = cparam_.ly/cparam_.ny, + dz = cparam_.lz/cparam_.nz; + + double fftnorm = 1.0/(cparam_.nx * cparam_.ny * cparam_.nz); + + rfftwnd_plan iplan, plan; + + plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz, + FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE); + + iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz, + FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); + + fftw_real *rkernel = reinterpret_cast( pk->get_ptr() ); + fftw_complex *ckernel = reinterpret_cast( pk->get_ptr() ); + + #ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, ckernel, NULL); + #else + rfftwnd_one_complex_to_real(iplan, ckernel, NULL); + #endif + + + #pragma omp parallel for + for( int i=0; i (int)cparam_.nx/2 ) iix -= cparam_.nx; + if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny; + if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz; + + rr[0] = (double)iix * dx; + rr[1] = (double)iiy * dy; + rr[2] = (double)iiz * dz; + + rr2 = rr[0]*rr[0] + rr[1]*rr[1] + rr[2]*rr[2]; + + unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k; + rkernel[idx] *= 0.5*(erfc((sqrt(rr2)-R)*alpha))*fftnorm; + } + + + + #ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL ); + #else + rfftwnd_one_real_to_complex( plan, rkernel, NULL ); + #endif + + } + + void truncate_sharp( kernel* pk, double R ) + { + parameters cparam_ = pk->cparam_; + + double + dx = cparam_.lx/cparam_.nx, + dy = cparam_.ly/cparam_.ny, + dz = cparam_.lz/cparam_.nz, + R2 = R*R; + + double fftnorm = 1.0/(cparam_.nx * cparam_.ny * cparam_.nz); + + rfftwnd_plan iplan, plan; + + plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz, + FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE); + + iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz, + FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); + + fftw_real *rkernel = reinterpret_cast( pk->get_ptr() ); + fftw_complex *ckernel = reinterpret_cast( pk->get_ptr() ); + +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, ckernel, NULL); +#else + rfftwnd_one_complex_to_real(iplan, ckernel, NULL); +#endif + + +#pragma omp parallel for + for( int i=0; i (int)cparam_.nx/2 ) iix -= cparam_.nx; + if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny; + if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz; + + rr[0] = (double)iix * dx; + rr[1] = (double)iiy * dy; + rr[2] = (double)iiz * dz; + + rr2 = rr[0]*rr[0] + rr[1]*rr[1] + rr[2]*rr[2]; + unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k; + if( rr2 > R2 ) + { + + rkernel[idx] = 0.0; + }else { + rkernel[idx] *= fftnorm; + } + + } + + + +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL ); +#else + rfftwnd_one_real_to_complex( plan, rkernel, NULL ); +#endif + + } + + + /*****************************************************************************************\ + *** SPECIFIC KERNEL IMPLEMENTATIONS ********************************************* + \*****************************************************************************************/ + + template< typename real_t > + class kernel_real : public kernel + { + protected: + std::vector kdata_; + + void compute_kernel( void ); + + public: + kernel_real( const parameters& cp ) + : kernel( cp ) + { + kdata_.assign( cparam_.nx*cparam_.ny*2*(cparam_.nz/2+1), 0.0 ); + compute_kernel(); + } + + void *get_ptr() + { return reinterpret_cast (&kdata_[0]); } + + ~kernel_real() + { std::vector().swap( kdata_ ); } + + }; + + + + + + template< typename real_t > + void kernel_real::compute_kernel( void ) + { + double + kny = cparam_.nx*M_PI/cparam_.lx, + fac = cparam_.lx*cparam_.ly*cparam_.lz/pow(2.0*M_PI,3)/(cparam_.nx*cparam_.ny*cparam_.nz), + dx = cparam_.lx/cparam_.nx, + dy = cparam_.ly/cparam_.ny, + dz = cparam_.lz/cparam_.nz, + boxlength = cparam_.pcf->getValue("setup","boxlength"), + nspec = cparam_.pcf->getValue("cosmology","nspec"),//cosmo.nspect, + pnorm = cparam_.pcf->getValue("cosmology","pnorm"),//cosmo.pnorm, + dplus = cparam_.pcf->getValue("cosmology","dplus");//,//cosmo.dplus; + // t00f = cparam_.t0scale; + unsigned + levelmax = cparam_.pcf->getValue("setup","levelmax"); + + bool + bperiodic = cparam_.pcf->getValueSafe("setup","periodic_TF",true); + + //if( cparam_.normalize ) + // kny *= 2.0; + + TransferFunction_real *tfr = new TransferFunction_real(cparam_.ptf,nspec,pnorm,dplus,0.25*cparam_.lx/(double)cparam_.nx,2.0*boxlength,kny, (int)pow(2,levelmax+2)); + + if( bperiodic ) + { + + #pragma omp parallel for + for( int i=0; i (int)cparam_.nx/2 ) iix -= cparam_.nx; + if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny; + if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz; + + unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k; + + for( int ii=-1; ii<=1; ++ii ) + for( int jj=-1; jj<=1; ++jj ) + for( int kk=-1; kk<=1; ++kk ) + { + rr[0] = ((double)iix ) * dx + ii*boxlength; + rr[1] = ((double)iiy ) * dy + jj*boxlength; + rr[2] = ((double)iiz ) * dz + kk*boxlength; + + if( fabs(rr[0]) < boxlength + && fabs(rr[1]) < boxlength + && fabs(rr[2]) < boxlength ) + { + rr2 = rr[0]*rr[0]+rr[1]*rr[1]+rr[2]*rr[2]; + kdata_[idx] += tfr->compute_real(rr2); + } + } + + kdata_[idx] *= fac; + + } + }else{ + #pragma omp parallel for + for( int i=0; i (int)cparam_.nx/2 ) iix -= cparam_.nx; + if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny; + if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz; + + unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k; + + rr[0] = ((double)iix ) * dx; + rr[1] = ((double)iiy ) * dy; + rr[2] = ((double)iiz ) * dz; + + rr2 = rr[0]*rr[0]+rr[1]*rr[1]+rr[2]*rr[2]; + kdata_[idx] = tfr->compute_real(rr2)*fac; + + + } + } + //kdata_[0] = tfr->compute_real(dx*dx*0.25)*fac; + + //std::cerr << "T(r=0) = " << kdata_[0]/fac << std::endl; + //if( cparam_.normalize ) + // kdata_[0] *= 0.125;//= 0.0;//*= 0.125; + + //... determine normalization + double sum = 0.0; + #pragma omp parallel for + for( int i=0; i 0 ) + sum += kdata_[idx]; + } + + + //std::cerr << " - The convolution kernel has avg of " << (sum+kdata_[0])/(cparam_.nx*cparam_.ny*cparam_.nz) << std::endl; + + sum /= cparam_.nx*cparam_.ny*cparam_.nz;//-1; + + if( cparam_.normalize ) + { + + /*for( int i=0; i 0 ) + kdata_[idx] -= sum; + }*/ + } + + + //if( t00f < 0.0 ) + // kdata_[0] += (tfr->compute_real(0.125*dx*dx) - tfr->compute_real(0.0))*fac; + + fftw_real *rkernel = reinterpret_cast( &kdata_[0] ); + rfftwnd_plan plan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz, + FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE); + + #ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL ); + #else + rfftwnd_one_real_to_complex( plan, rkernel, NULL ); + #endif + + rfftwnd_destroy_plan(plan); + + delete tfr; + } + + + + + template< typename real_t > + class kernel_k : public kernel + { + protected: + std::vector kdata_; + + void compute_kernel( void ); + + public: + kernel_k( const parameters& cp ) + : kernel( cp ) + { + kdata_.assign( cparam_.nx*cparam_.ny*2*(cparam_.nz/2+1), 0.0 ); + compute_kernel(); + } + + void *get_ptr() + { return reinterpret_cast (&kdata_[0]); } + + ~kernel_k() + { std::vector().swap( kdata_ ); } + + }; + + template< typename real_t > + void kernel_k::compute_kernel( void ) + { + double + //kny = cparam_.nx*M_PI/cparam_.lx, + fac = cparam_.lx*cparam_.ly*cparam_.lz/pow(2.0*M_PI,3),// /(cparam_.nx*cparam_.ny*cparam_.nz), + // dx = cparam_.lx/cparam_.nx, + // dy = cparam_.ly/cparam_.ny, + // dz = cparam_.lz/cparam_.nz, + boxlength = cparam_.pcf->getValue("setup","boxlength"), + nspec = cparam_.pcf->getValue("cosmology","nspec"), + pnorm = cparam_.pcf->getValue("cosmology","pnorm"), + dplus = cparam_.pcf->getValue("cosmology","dplus"); + + TransferFunction_k *tfk = new TransferFunction_k(cparam_.ptf,nspec,pnorm,dplus); + + fftw_complex *kdata = reinterpret_cast ( this->get_ptr() ); + + unsigned nx = cparam_.nx, ny = cparam_.ny, nz = cparam_.nz, nzp = (nz/2+1); + fac =1.0;//*= 1.0/sqrt(nx*ny*nz); + + double kfac = 2.0*M_PI/boxlength; + unsigned q=0; + for( int i=0; i nx/2 ) kx -= nx; + if( ky > ny/2 ) ky -= ny; + + q = (i*ny+j)*nzp+k; + kdata[q].re = fac*tfk->compute(kfac*sqrt(kx*kx+ky*ky+kz*kz)); + kdata[q].im = 0.0; + + } + + + delete tfk; + } +}; + +namespace{ + convolution::kernel_creator_concrete< convolution::kernel_real > creator_d("tf_kernel_real_double"); + convolution::kernel_creator_concrete< convolution::kernel_real > creator_f("tf_kernel_real_float"); + convolution::kernel_creator_concrete< convolution::kernel_k > creator_kd("tf_kernel_k_double"); + convolution::kernel_creator_concrete< convolution::kernel_k > creator_kf("tf_kernel_k_float"); + +} + + + diff --git a/convolution_kernel.hh b/convolution_kernel.hh new file mode 100644 index 0000000..201f415 --- /dev/null +++ b/convolution_kernel.hh @@ -0,0 +1,116 @@ +/* + + convolution_kernel.hh - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#ifndef __CONVOLUTION_KERNELS_HH +#define __CONVOLUTION_KERNELS_HH + +#include +#include + +#include "config_file.hh" +#include "densities.hh" +#include "transfer_function.hh" + +namespace convolution{ + + //! encapsulates all parameters required for transfer function convolution + struct parameters + { + int nx,ny,nz; + double lx,ly,lz;//,boxlength; + config_file *pcf; + transfer_function* ptf; + bool normalize; + }; + + + //! abstract base class for a transfer function convolution kernel + class kernel{ + public: + + //! all parameters (physical/numerical) + parameters cparam_; + + //! constructor + kernel( const parameters& cp ) + : cparam_( cp ) + { } + + //! virtual destructor + virtual ~kernel(){ }; + + //! purely virtual method to obtain a pointer to the underlying data + virtual void* get_ptr() = 0; + }; + + + //! abstract factory class to create convolution kernels + struct kernel_creator + { + //! creates a convolution kernel object + virtual kernel * create( const parameters& cp ) const = 0; + + //! destructor + virtual ~kernel_creator() { } + }; + + + //! access map to the various kernel classes through the factory + std::map< std::string, kernel_creator *>& get_kernel_map(); + + + //! actual implementation of the factory class for kernel objects + template< class Derived > + struct kernel_creator_concrete : public kernel_creator + { + //! constructor inserts the kernel class in the map + kernel_creator_concrete( const std::string& kernel_name ) + { get_kernel_map()[ kernel_name ] = this; } + + //! creates an instance of the kernel object + kernel * create( const parameters& cp ) const + { return new Derived( cp ); } + }; + + + //! actual implementation of the FFT convolution (independent of the actual kernel) + template< typename real_t > + void perform( kernel* pk, void *pd ); + + //! actual implementation of the FFT convolution (independent of the actual kernel) + //! with Hann (cf. Bertschinger 2001) window filter applied + template< typename real_t > + void perform_filtered( kernel* pk, void *pd ); + + + //! truncate the kernel softly at R, used for boundary corrections + void truncate( kernel*pk, double R, double alpha ); + + //! truncate the kernel sharply at R, used for boundary corrections + void truncate_sharp( kernel*pk, double R ); + +}; //namespace convolution + + + +#endif //__CONVOLUTION_KERNELS_HH diff --git a/cosmology.cc b/cosmology.cc new file mode 100644 index 0000000..b40c9e1 --- /dev/null +++ b/cosmology.cc @@ -0,0 +1,222 @@ +/* + + cosmology.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#include "cosmology.hh" +#include "mesh.hh" +#include "mg_operators.hh" + +#define ACC(i,j,k) ((*u.get_grid((ilevel)))((i),(j),(k))) +#define SQR(x) ((x)*(x)) + + +void compute_LLA_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order ) +{ + fnew = u; + + for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + { + double h = pow(2.0,ilevel), h2 = h*h, h2_4 = 0.25*h2; + meshvar_bnd *pvar = fnew.get_grid(ilevel); + + + if( order == 2 ) + { + #pragma omp parallel for //reduction(+:sum_corr,sum,sum2) + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + { + double D[3][3]; + + D[0][0] = (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2; + D[1][1] = (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2; + D[2][2] = (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2; + + D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4; + D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4; + D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4; + + (*pvar)(ix,iy,iz) = D[0][0]+D[1][1]+D[2][2] + + ( D[0][0]*D[1][1] + D[0][0]*D[2][2] + D[1][1]*D[2][2] + + D[0][1]*D[1][0] + D[0][2]*D[2][0] + D[1][2]*D[2][1] + + D[0][0]*D[0][0] + D[1][1]*D[1][1] + D[2][2]*D[2][2] ); + + } + } + else if ( order == 4 ) + { + #pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + { + double D[3][3]; + + D[0][0] = (-ACC(ix-2,iy,iz)+16.*ACC(ix-1,iy,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix+1,iy,iz)-ACC(ix+2,iy,iz)) * h2/12.0; + D[1][1] = (-ACC(ix,iy-2,iz)+16.*ACC(ix,iy-1,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy+1,iz)-ACC(ix,iy+2,iz)) * h2/12.0; + D[2][2] = (-ACC(ix,iy,iz-2)+16.*ACC(ix,iy,iz-1)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy,iz+1)-ACC(ix,iy,iz+2)) * h2/12.0; + + D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4; + D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4; + D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4; + + (*pvar)(ix,iy,iz) = D[0][0]+D[1][1]+D[2][2] + + ( D[0][0]*D[1][1] + D[0][0]*D[2][2] + D[1][1]*D[2][2] + + D[0][1]*D[1][0] + D[0][2]*D[2][0] + D[1][2]*D[2][1] + + D[0][0]*D[0][0] + D[1][1]*D[1][1] + D[2][2]*D[2][2] ); + + } + }else + throw std::runtime_error("compute_LLA_density : invalid operator order specified"); + + } + +} + + +void compute_Lu_density( const grid_hierarchy& u, grid_hierarchy& fnew ) +{ + fnew = u; + + for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + { + double h = pow(2.0,ilevel), h2 = h*h; + meshvar_bnd *pvar = fnew.get_grid(ilevel); + + #pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + { + double D[3][3]; + + D[0][0] = 1.0 + (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2; + D[1][1] = 1.0 + (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2; + D[2][2] = 1.0 + (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2; + + (*pvar)(ix,iy,iz) = D[0][0]+D[1][1]+D[2][2] - 3.0; + + } + } + +} + + +void compute_2LPT_source( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order ) +{ + fnew = u; + + for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + { + double h = pow(2.0,ilevel), h2 = h*h, h2_4 = 0.25*h2; + meshvar_bnd *pvar = fnew.get_grid(ilevel); + + if ( order == 2 ) + { + + #pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + { + double D[3][3]; + + D[0][0] = (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2; + D[1][1] = (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2; + D[2][2] = (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2; + D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4; + D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4; + D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4; + + + (*pvar)(ix,iy,iz) = D[0][0]*D[1][1] - SQR( D[0][1] ) + + D[0][0]*D[2][2] - SQR( D[0][2] ) + + D[1][1]*D[2][2] - SQR( D[1][2] ); + + } + } + else if ( order == 4 ) + { + #pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + { + double D[3][3]; + + D[0][0] = (-ACC(ix-2,iy,iz)+16.*ACC(ix-1,iy,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix+1,iy,iz)-ACC(ix+2,iy,iz)) * h2/12.0; + D[1][1] = (-ACC(ix,iy-2,iz)+16.*ACC(ix,iy-1,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy+1,iz)-ACC(ix,iy+2,iz)) * h2/12.0; + D[2][2] = (-ACC(ix,iy,iz-2)+16.*ACC(ix,iy,iz-1)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy,iz+1)-ACC(ix,iy,iz+2)) * h2/12.0; + D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4; + D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4; + D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4; + + + (*pvar)(ix,iy,iz) = D[0][0]*D[1][1] - SQR( D[0][1] ) + + D[0][0]*D[2][2] - SQR( D[0][2] ) + + D[1][1]*D[2][2] - SQR( D[1][2] ); + + } + } + else + throw std::runtime_error("compute_2LPT_source : invalid operator order specified"); + + + } + + + //.. subtract global mean so the multi-grid poisson solver behaves well + + for( int i=fnew.levelmax(); i>(int)fnew.levelmin(); --i ) + mg_straight().restrict( (*fnew.get_grid(i)), (*fnew.get_grid(i-1)) ); + + double sum = 0.0; + int nx,ny,nz; + + nx = fnew.get_grid(fnew.levelmin())->size(0); + ny = fnew.get_grid(fnew.levelmin())->size(1); + nz = fnew.get_grid(fnew.levelmin())->size(2); + + for( int ix=0; ixsize(0); + ny = fnew.get_grid(i)->size(1); + nz = fnew.get_grid(i)->size(2); + + for( int ix=0; ix. + + */ + +#ifndef _COSMOLOGY_HH +#define _COSMOLOGY_HH + +#include "mesh.hh" +#include "general.hh" +#include "transfer_function.hh" + +/*! + * @class CosmoCalc + * @brief provides functions to compute cosmological quantities + * + * This class provides member functions to compute cosmological quantities + * related to the Friedmann equations and linear perturbation theory + */ +class CosmoCalc +{ +public: + //! data structure to store cosmological parameters + Cosmology m_Cosmology; + + //! pointer to an instance of a transfer function plugin + transfer_function_plugin *m_pTransferFunction; + + + //! constructor for a cosmology calculator object + /*! + * @params acosmo a cosmological parameters structure + * @params pTransferFunction pointer to an instance of a transfer function object + */ + + CosmoCalc( const Cosmology acosmo, transfer_function_plugin *pTransferFunction ) + { + m_Cosmology = acosmo; + m_pTransferFunction = pTransferFunction; + } + + //! returns the amplitude of amplitude of the power spectrum + /*! + * @params k the wave number in h/Mpc + * @params a the expansion factor of the universe + * @return power spectrum amplitude for wave number k at time a + */ + inline real_t Power( real_t k, real_t a ){ + real_t m_Dplus = CalcGrowthFactor( a ); + real_t m_DplusOne = CalcGrowthFactor( 1.0 ); + real_t m_pNorm = ComputePNorm( 1e4 ); + m_Dplus /= m_DplusOne; + m_DplusOne = 1.0; + real_t scale = m_Dplus/m_DplusOne; + return m_pNorm*scale*scale*TransferSq(k)*pow((double)k,(double)m_Cosmology.nspect); + } + + //! integrand function for Calc_fPeebles + /*! + * @sa Calc_fPeebles + */ + inline static double fy( double a, void *Params ) + { + Cosmology *cosm = (Cosmology*)Params; + double y = cosm->Omega_m*(1.0/a-1.0) + cosm->Omega_L*(a*a-1.0) + 1.0; + return 1.0/pow(y,1.5); + } + + //! calculates d log D+/d log a + /*! this version follows the Peebles (TBD: add citation) + * formula to compute Bertschinger's vfact + */ + inline real_t Calc_fPeebles( real_t a ) + { + real_t y = m_Cosmology.Omega_m*(1.0/a-1.0) + m_Cosmology.Omega_L*(a*a-1.0) + 1.0; + real_t fact = integrate( &fy, 1e-6, a, (void*)&m_Cosmology ); + return (m_Cosmology.Omega_L*a*a-0.5*m_Cosmology.Omega_m/a)/y - 1.0 + a*fy(a,(void*)&m_Cosmology)/fact; + } + + //! Computes the linear theory growth factor D+ + /*! Function integrates over member function GrowthIntegrand */ + inline real_t CalcGrowthFactor( real_t a ) + { + real_t eta = sqrt((double)(m_Cosmology.Omega_m/a+m_Cosmology.Omega_L*a*a + +1.0-m_Cosmology.Omega_m-m_Cosmology.Omega_L)); + + real_t integral = integrate( &GrowthIntegrand, 0.0, a, (void*)&m_Cosmology ); + return eta/a*integral; + } + + //! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+ + inline static double GrowthIntegrand( double a, void *Params ) + { + Cosmology *cosm = (Cosmology*)Params; + double eta = sqrt((double)(cosm->Omega_m/a+cosm->Omega_L*a*a + +1.0-cosm->Omega_m-cosm->Omega_L)); + return 2.5/(eta*eta*eta); + } + + //! Compute the factor relating particle displacement and velocity + real_t ComputeVFact_old( real_t a ){ + real_t fomega, dlogadt, eta; + real_t Omega_k = 1.0 - m_Cosmology.Omega_m - m_Cosmology.Omega_L; + + real_t Dplus = CalcGrowthFactor( a ); + + eta = sqrt( (double)(m_Cosmology.Omega_m/a+ m_Cosmology.Omega_L*a*a + Omega_k )); + fomega = (2.5/Dplus-1.5*m_Cosmology.Omega_m/a-Omega_k)/eta/eta; + dlogadt = a*eta; + + //... /100.0 since we would have to multiply by H0 to convert + //... the displacement to velocity units. But displacement is + //... in Mpc/h, and H0 in units of h is 100. + return fomega * dlogadt/a *100.0; + } + + real_t ComputeVFact( real_t a ){ + return Calc_fPeebles(a)*sqrt(m_Cosmology.Omega_m/a+m_Cosmology.Omega_L*a*a+1.0-m_Cosmology.Omega_m-m_Cosmology.Omega_L)*100.0; + } + + real_t ComputedDdt( real_t a ) + { + return Calc_fPeebles(a); + } + + + //! Integrand for the sigma_8 normalization of the power spectrum + /*! Returns the value of the primordial power spectrum multiplied with + the transfer function and the window function of 8 Mpc/h at wave number k */ + static double dSigma8( double k, void *Params ) + { + if( k<=0.0 ) + return 0.0f; + + transfer_function *ptf = (transfer_function *)Params; + + double x = k*8.0; + double w = 3.0*(sin(x)-x*cos(x))/(x*x*x); + static double nspect = (double)ptf->cosmo_.nspect; + + double tf = ptf->compute(k); + + //... no growth factor since we compute at z=0 and normalize so that D+(z=0)=1 + return k*k * w*w * pow((double)k,(double)nspect) * tf*tf; + + } + + + //! Computes the square of the transfer function + /*! Function evaluates the supplied transfer function m_pTransferFunction + and returns the square of its value at wave number k + \param k wave number at which to evaluate the transfer function + */ + inline real_t TransferSq( real_t k ){ + //.. parameter supplied transfer function + real_t tf1 = m_pTransferFunction->compute(k); + return tf1*tf1; + } + + + //! Computes the normalization for the power spectrum + /*! + * integrates the power spectrum to fix the normalization to that given + * by the sigma_8 parameter + */ + real_t ComputePNorm( real_t kmax ) + { + real_t sigma0, kmin; + kmax = m_pTransferFunction->get_kmax();//m_Cosmology.H0/8.0; + kmin = m_pTransferFunction->get_kmin();//0.0; + sigma0 = 4.0 * M_PI * integrate( &dSigma8, (double)kmin, (double)kmax, (void*)m_pTransferFunction ); + + return m_Cosmology.sigma8*m_Cosmology.sigma8/sigma0; + + } + + +}; + + +//! compute the jeans sound speed +/*! given a density in g/cm^-3 and a mass in g it gives back the sound + * speed in cm/s for which the input mass is equal to the jeans mass + * @params rho density + * @params mass mass scale + * @return jeans sound speed + */ +inline double jeans_sound_speed( double rho, double mass ) +{ + const double G = 6.67e-8; + return pow( 6.0*mass/M_PI*sqrt(rho)*pow(G,1.5), 1.0/3.0 ); +} + +//! computes the density from the potential using the Laplacian +void compute_Lu_density( const grid_hierarchy& u, grid_hierarchy& fnew ); + +//! computes the 2nd order density perturbations using also off-diagonal terms in the potential Hessian +void compute_LLA_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order=4 ); + +//! computes the source term for the 2nd order perturbations in the displacements +void compute_2LPT_source( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order=4 ); + + + +#endif // _COSMOLOGY_HH + diff --git a/densities.cc b/densities.cc new file mode 100644 index 0000000..88bc784 --- /dev/null +++ b/densities.cc @@ -0,0 +1,837 @@ +/* + + densities.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#include "densities.hh" +#include "convolution_kernel.hh" + +bool is_number(const std::string& s) +{ + for (unsigned i = 0; i < s.length(); i++) + if (!std::isdigit(s[i])&&s[i]!='-' ) + return false; + + return true; +} + +// TODO: use optimized convolution routine when in unigrid mode +void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type, refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace ) +{ + unsigned levelmin,levelmax,levelminPoisson; + real_t boxlength; + std::vector rngseeds; + std::vector rngfnames; + + + levelminPoisson = cf.getValue("setup","levelmin"); + levelmin = cf.getValueSafe("setup","levelminTF",levelminPoisson); + levelmax = cf.getValue("setup","levelmax"); + boxlength = cf.getValue( "setup", "boxlength" ); + + //... parse random number options + for( int i=0; i<=100; ++i ) + { + char seedstr[128]; + std::string tempstr; + sprintf(seedstr,"seed[%d]",i); + if( cf.containsKey( "random", seedstr ) ) + tempstr = cf.getValue( "random", seedstr ); + else + tempstr = std::string("-2"); + + if( is_number( tempstr ) ) + { + long ltemp; + cf.convert( tempstr, ltemp ); + rngfnames.push_back( "" ); + rngseeds.push_back( ltemp ); + }else{ + rngfnames.push_back( tempstr ); + rngseeds.push_back(-1); + std::cout << " - Random numbers for level " << std::setw(3) << i << " will be read from file.\n"; + } + + } + + //... parse grid setup parameters + unsigned nbase = (unsigned)pow(2,levelmin); + float lxref[3]; + std::string temp = cf.getValue( "setup", "ref_extent" ); + sscanf( temp.c_str(), "%g,%g,%g", &lxref[0],&lxref[1],&lxref[2] ); + + int shift[3]; + shift[0] = cf.getValue("setup","shift_x"); + shift[1] = cf.getValue("setup","shift_y"); + shift[2] = cf.getValue("setup","shift_z"); + + convolution::kernel_creator *the_kernel_creator; + + if( kspace ) + { + #ifdef SINGLE_PRECISION + the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_float" ]; + #else + the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_double" ]; + #endif + } + else + { + #ifdef SINGLE_PRECISION + the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ]; + #else + the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_double" ]; + #endif + } + + convolution::parameters conv_param; + conv_param.ptf = ptf; + conv_param.pcf = &cf; + + + //.. determine for which levels random seeds/random number files are given + int lmaxread = -1, lmingiven = -1; + for( unsigned ilevel = 0; ilevel < rngseeds.size(); ++ilevel ) + { + if( rngfnames[ilevel].size() > 0 ) + lmaxread = ilevel; + if( rngseeds[ilevel] > 0 && lmingiven == -1 ) + lmingiven = ilevel; + } + + if( (unsigned)lmingiven!=levelmin || levelmin!=levelminPoisson ) + { + std::cerr <<" - Internal error: GenerateDensityUnigrid was called for a non-trivial\n" + <<" problem set-up. This should not happen, GenerateDensityHierarchy\n" + <<" should be called instead\n"; + throw std::runtime_error("Internal error"); + } + + std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl; + + DensityGrid *top = new DensityGrid( nbase, nbase, nbase ); + + double x0[3] = { refh.offset(levelmin,0), refh.offset(levelmin,1), refh.offset(levelmin,2) }; + double lx[3] = { refh.size(levelmin,0), refh.size(levelmin,1), refh.size(levelmin,2) }; + x0[0] /= pow(2,levelmin); x0[1] /= pow(2,levelmin); x0[2] /= pow(2,levelmin); + lx[0] /= pow(2,levelmin); lx[1] /= pow(2,levelmin); lx[2] /= pow(2,levelmin); + + random_numbers *rc = new random_numbers( nbase, 32, rngseeds[levelmin], true );//, x0, lx ); + + + if( shift[0]!=0||shift[1]!=0||shift[2]!=0 ) + std::cout << " - WARNING: will ignore non-zero shift in unigrid mode!\n"; + + //top->fill_rand( rc, 1.0, 0, 0, 0, true ); + rc->fill_all(*top); + delete rc; + + conv_param.lx = boxlength; + conv_param.ly = boxlength; + conv_param.lz = boxlength; + conv_param.nx = top->nx_; + conv_param.ny = top->ny_; + conv_param.nz = top->nz_; + conv_param.normalize = false; + + convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param ); + convolution::perform( the_tf_kernel, reinterpret_cast( top->get_data_ptr() ) ); + delete the_tf_kernel; + + delta.create_base_hierarchy(levelmin); + top->copy( *delta.get_grid(levelmin) ); + delete top; + + + for( int i=levelmax; i>0; --i ) + mg_straight().restrict( (*delta.get_grid(i)), (*delta.get_grid(i-1)) ); + + double sum = 0.0; + { + int nx,ny,nz; + + nx = delta.get_grid(levelmin)->size(0); + ny = delta.get_grid(levelmin)->size(1); + nz = delta.get_grid(levelmin)->size(2); + + for( int ix=0; ixsize(0); + ny = delta.get_grid(i)->size(1); + nz = delta.get_grid(i)->size(2); + + for( int ix=0; ix rngseeds; + std::vector rngfnames; + + + levelminPoisson = cf.getValue("setup","levelmin"); + levelmin = cf.getValueSafe("setup","levelminTF",levelminPoisson); + levelmax = cf.getValue("setup","levelmax"); + boxlength = cf.getValue( "setup", "boxlength" ); + + // TODO: need to make sure unigrid gets called whenever possible + if( levelmin == levelmax && levelmin==levelminPoisson ) + { + GenerateDensityUnigrid(cf,ptf,type,refh,delta); + return; + } + + + //... parse random number options + for( int i=0; i<=100; ++i ) + { + char seedstr[128]; + std::string tempstr; + sprintf(seedstr,"seed[%d]",i); + if( cf.containsKey( "random", seedstr ) ) + tempstr = cf.getValue( "random", seedstr ); + else + tempstr = std::string("-2"); + + if( is_number( tempstr ) ) + { + long ltemp; + cf.convert( tempstr, ltemp ); + rngfnames.push_back( "" ); + rngseeds.push_back( ltemp ); + }else{ + rngfnames.push_back( tempstr ); + rngseeds.push_back(-1); + std::cout << " - Random numbers for level " << std::setw(3) << i << " will be read from file.\n"; + } + + } + + //... parse grid setup parameters + unsigned nbase = (unsigned)pow(2,levelmin); + float lxref[3]; + std::string temp = cf.getValue( "setup", "ref_extent" ); + sscanf( temp.c_str(), "%g,%g,%g", &lxref[0],&lxref[1],&lxref[2] ); + //double lextmin = std::min(lxref[0],std::min(lxref[1],lxref[2])); + real_t alpha = 0.1*(real_t)nbase/boxlength;//0.45*lextmin*(double)nbase;///boxlength; + real_t cutoff = 0.25;//lextmin*0.9; + + + int shift[3]; + shift[0] = cf.getValue("setup","shift_x"); + shift[1] = cf.getValue("setup","shift_y"); + shift[2] = cf.getValue("setup","shift_z"); + +#ifdef SINGLE_PRECISION + convolution::kernel_creator *the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ]; +#else + convolution::kernel_creator *the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_double" ]; +#endif + + convolution::parameters conv_param; + //conv_param.boxlength = boxlength; + conv_param.ptf = ptf; + conv_param.pcf = &cf; + //conv_param.cosmo = cosmo; + + + + + //... compute absolute grid offsets + std::vector offtotx(levelmax+1,0),offtoty(levelmax+1,0),offtotz(levelmax+1,0); + for( unsigned ilevel = levelmin+1; ilevel<=levelmax; ++ilevel ) + { + //... build a partial sum to get absolute offsets + offtotx[ilevel] = 2*(offtotx[ilevel-1]+refh.offset(ilevel,0)); + offtoty[ilevel] = 2*(offtoty[ilevel-1]+refh.offset(ilevel,1)); + offtotz[ilevel] = 2*(offtotz[ilevel-1]+refh.offset(ilevel,2)); + } + for( unsigned ilevel = levelmin+1; ilevel<=levelmax; ++ilevel ) + { + //... the arrays are doubled in size for the isolated BCs + offtotx[ilevel] -= refh.size(ilevel,0)/2; + offtoty[ilevel] -= refh.size(ilevel,1)/2; + offtotz[ilevel] -= refh.size(ilevel,2)/2; + } + + //.. determine for which levels random seeds/random number files are given + int lmaxread = -1, lmingiven = -1; + for( unsigned ilevel = 0; ilevel < rngseeds.size(); ++ilevel ) + { + if( rngfnames[ilevel].size() > 0 ) + lmaxread = ilevel; + if( rngseeds[ilevel] > 0 && lmingiven == -1 ) + lmingiven = ilevel; + } + + //... if random numbers are to be read from file, do this now + std::vector< random_numbers* > randc; + randc.assign(std::max(lmaxread,std::max(lmingiven,(int)levelmax))+1,(random_numbers*)NULL); + + if( lmaxread >= (int)levelmin ) + { + randc[lmaxread] = new random_numbers( (unsigned)pow(2,lmaxread), rngfnames[lmaxread] ); + for( int ilevel = lmaxread-1; ilevel >= (int)levelmin; --ilevel ) + randc[ilevel] = new random_numbers( *randc[ilevel+1] ); + } + + + //... if random numbers are not given for lower levels, obtain them by averaging + if( lmingiven >= (int)levelmin ) + { + //double x0[3] = { 0.0, 0.0, 0.0 }; + //double lx[3] = { 1.0, 1.0, 1.0 }; + + randc[lmingiven] = new random_numbers( (unsigned)pow(2,lmingiven), 32, rngseeds[lmingiven], true );//, x0, lx ); + + for( int ilevel = lmingiven-1; ilevel >= (int)levelmin; --ilevel ){ + if( rngseeds[ilevel-levelmin] > 0 ) + std::cerr << " - Warning: random seed for level " << ilevel << " will be ignored.\n" + << " consistency requires that it is obtained by restriction from level " << lmingiven << std::endl; + + randc[ilevel] = new random_numbers( *randc[ilevel+1] ); + } + } + + //... if random seeds are given for levels coarser than levelmin, use them as constraints + if( lmingiven < (int)levelmin ) + { + throw std::runtime_error("You provided a seed for a level below levelmin, this is not supported yet."); + //double x0[3] = { 0.0, 0.0, 0.0 }; + //double lx[3] = { 1.0, 1.0, 1.0 }; + + randc[lmingiven] = new random_numbers( (unsigned)pow(2,lmingiven), 32, rngseeds[lmingiven], true );//, x0, lx ); + + for( int ilevel = lmingiven+1; ilevel <= (int)levelmin; ++ilevel ) + { + long seed = rngseeds[ilevel]; + if( seed <= 0 ) + seed = rngseeds[lmingiven+ilevel]; + + randc[ilevel] = new random_numbers( *randc[ilevel-1] ); + } + + delete randc[lmingiven]; + randc[lmingiven] = NULL; + } + + //... create and initialize density grids with white noise + + PaddedDensitySubGrid* coarse(NULL), *fine(NULL); + DensityGrid* top(NULL); + + //... clean up ...// + for( unsigned i=0; ilevelmax ) + if( randc[i] != NULL ) + { + delete randc[i]; + randc[i]=NULL; + } + } + + + //... perform convolutions ...// + if( levelmax == levelmin ) + { + std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl; + + top = new DensityGrid( nbase, nbase, nbase ); + + real_t x0[3] = { refh.offset(levelmin,0), refh.offset(levelmin,1), refh.offset(levelmin,2) }; + real_t lx[3] = { refh.size(levelmin,0), refh.size(levelmin,1), refh.size(levelmin,2) }; + x0[0] /= pow(2,levelmin); x0[1] /= pow(2,levelmin); x0[2] /= pow(2,levelmin); + lx[0] /= pow(2,levelmin); lx[1] /= pow(2,levelmin); lx[2] /= pow(2,levelmin); + + random_numbers *rc; + + if( randc[levelmin] == NULL ) + rc = new random_numbers( nbase, 32, rngseeds[levelmin], true );//, x0, lx ); + else + rc = randc[levelmin]; + +#if 1 + if( shift[0]!=0||shift[1]!=0||shift[2]!=0 ) + std::cout << " - WARNING: will ignore non-zero shift in unigrid mode!\n"; + + top->fill_rand( rc, 1.0, 0, 0, 0, true ); +#else + + top->fill_rand( rc, 1.0, -shift[0], -shift[1], -shift[2], true ); +#endif + delete rc; + + conv_param.lx = boxlength; + conv_param.ly = boxlength; + conv_param.lz = boxlength; + conv_param.nx = top->nx_; + conv_param.ny = top->ny_; + conv_param.nz = top->nz_; + conv_param.normalize = false; + + convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param ); + convolution::perform( the_tf_kernel, reinterpret_cast( top->get_data_ptr() ) ); + delete the_tf_kernel; + + delta.create_base_hierarchy(levelmin); + top->copy( *delta.get_grid(levelmin) ); + delete top; + } + //} + + + for( int i=0; i< (int)levelmax-(int)levelmin; ++i ) + { + //.......................................................................................................// + //... GENERATE/FILL WITH RANDOM NUMBERS .................................................................// + //.......................................................................................................// + + + if( i==0 ) + { + top = new DensityGrid( nbase, nbase, nbase ); + + random_numbers *rc; + int x0[3] = { refh.offset_abs(levelmin,0)-shift[0], + refh.offset_abs(levelmin,1)-shift[1], + refh.offset_abs(levelmin,2)-shift[2] }; + + int lx[3] = { refh.size(levelmin,0), + refh.size(levelmin,1), + refh.size(levelmin,2) }; + + if( randc[levelmin] == NULL ) + rc = randc[levelmin] = new random_numbers( nbase, 32, rngseeds[levelmin], x0, lx ); + else + rc = randc[levelmin]; + + top->fill_rand( rc, 1.0, -shift[0], -shift[1], -shift[2], true ); + delete rc; + randc[levelmin] = NULL; + + } + + fine = new PaddedDensitySubGrid( refh.offset(levelmin+i+1,0), + refh.offset(levelmin+i+1,1), + refh.offset(levelmin+i+1,2), + refh.size(levelmin+i+1,0), + refh.size(levelmin+i+1,1), + refh.size(levelmin+i+1,2) ); + + { + random_numbers *rc; + int x0[3],lx[3]; + int lfac = (int)pow(2.0,i+1); + x0[0] = refh.offset_abs(levelmin+i+1,0)-lfac*shift[0]; //((real_t)(offtotx[levelmin+i+1]+lfac*shift[0]))/pow(2.0,levelmin+i+1); + x0[1] = refh.offset_abs(levelmin+i+1,1)-lfac*shift[1]; //((real_t)(offtoty[levelmin+i+1]+lfac*shift[1]))/pow(2.0,levelmin+i+1); + x0[2] = refh.offset_abs(levelmin+i+1,2)-lfac*shift[2]; //((real_t)(offtotz[levelmin+i+1]+lfac*shift[2]))/pow(2.0,levelmin+i+1); + lx[0] = refh.size(levelmin+i+1,0); // /pow(2.0,levelmin+i+1); + lx[1] = refh.size(levelmin+i+1,1); // /pow(2.0,levelmin+i+1); + lx[2] = refh.size(levelmin+i+1,2); // /pow(2.0,levelmin+i+1); + + //x0[0] -= 0.5*lx[0]; lx[0] *= 2.0; + //x0[1] -= 0.5*lx[1]; lx[1] *= 2.0; + //x0[2] -= 0.5*lx[2]; lx[2] *= 2.0; + + if( randc[levelmin+i+1] == NULL ) + rc = randc[levelmin+i+1] = new random_numbers((unsigned)pow(2,levelmin+i+1), 32, rngseeds[levelmin+i+1], x0, lx); + else + rc = randc[levelmin+i+1]; + + + { + /*int llfac = (int)pow(2.0,i+1); + fine->fill_rand( rc, 1.0, offtotx[levelmin+i+1]-llfac*shift[0], + offtoty[levelmin+i+1]-llfac*shift[1], + offtotz[levelmin+i+1]-llfac*shift[2] );*/ + + fine->fill_rand( rc, 1.0, x0[0]-fine->nx_/4, x0[1]-fine->ny_/4, x0[2]-fine->nz_/4 ); + + if( i+levelmin+1 > (unsigned)lmingiven ) + { + if(i==0) + fine->constrain( *top ); + else + fine->constrain( *coarse ); + } + } + delete rc; + rc = NULL; + } + + + //.......................................................................................................// + //... PERFORM CONVOLUTIONS ..............................................................................// + //.......................................................................................................// + if( i==0 ) + { + /**********************************************************************************************************\ + * multi-grid: top-level grid grids ..... + \**********************************************************************************************************/ + std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmin+i << " ..." << std::endl; + + delta.create_base_hierarchy(levelmin); + + DensityGrid top_save( *top ); + + + conv_param.lx = boxlength; + conv_param.ly = boxlength; + conv_param.lz = boxlength; + conv_param.nx = top->nx_; + conv_param.ny = top->ny_; + conv_param.nz = top->nz_; + conv_param.normalize = true; + + convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param ); + + + //... 1) compute standard convolution for levelmin + convolution::perform( the_tf_kernel, reinterpret_cast( top->get_data_ptr() ) ); + //convolution::perform_filtered( the_tf_kernel, reinterpret_cast( top->get_data_ptr() ) ); + top->copy( *delta.get_grid(levelmin) ); + + //... 2) compute contribution to finer grids from non-refined region + *top = top_save; + top_save.clear(); + top->zero_subgrid(refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), + refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 ); + + convolution::perform( the_tf_kernel, reinterpret_cast( top->get_data_ptr() ) ); + //convolution::perform_filtered( the_tf_kernel, reinterpret_cast( top->get_data_ptr() ) ); + + + + meshvar_bnd delta_longrange( *delta.get_grid(levelmin) ); + top->copy( delta_longrange ); + delete the_tf_kernel; + delete top; + + //... restrict these contributions to the next level + delta.add_patch( refh.offset(levelmin+1,0), refh.offset(levelmin+1,1), refh.offset(levelmin+1,2), + refh.size(levelmin+1,0), refh.size(levelmin+1,1), refh.size(levelmin+1,2) ); + + //mg_linear().prolong( delta_longrange, *delta.get_grid(levelmin+1) ); + mg_cubic().prolong( delta_longrange, *delta.get_grid(levelmin+1) ); + + + } + else + { + /**********************************************************************************************************\ + * multi-grid: intermediate sub-grids ..... + \**********************************************************************************************************/ + + std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmin+i << " ..." << std::endl; + + delta.add_patch( refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), + refh.size(levelmin+i+1,0), refh.size(levelmin+i+1,1), refh.size(levelmin+i+1,2) ); + + //mg_linear().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) ); + mg_cubic().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) ); + + real_t dx,lx,ly,lz; + dx = boxlength/pow(2.0,levelmin+i); + + lx = dx * coarse->nx_; + ly = dx * coarse->ny_; + lz = dx * coarse->nz_; + + real_t lmin = std::min( lx, std::min(ly,lz) ); + + + conv_param.lx = lx; + conv_param.ly = ly; + conv_param.lz = lz; + conv_param.nx = coarse->nx_; + conv_param.ny = coarse->ny_; + conv_param.nz = coarse->nz_; + conv_param.normalize = false; + + convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param ); + + PaddedDensitySubGrid coarse_save( *coarse ); + + //... 2) the inner region + + coarse->zero_boundary(); + convolution::perform( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + //convolution::perform_filtered( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + + + coarse->copy_add_unpad( *delta.get_grid(levelmin+i) ); + + + //... 3) the 'BC' for the next finer grid + *coarse = coarse_save; + + coarse->zero_subgrid(refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), + refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 ); + coarse->zero_boundary(); + + convolution::perform( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + //convolution::perform_filtered( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + + + + meshvar_bnd delta_longrange( *delta.get_grid(levelmin+i) ); + coarse->copy_unpad( delta_longrange ); + //mg_linear().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) ); + mg_cubic().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) ); + + +#if 1 + //... FFT (isolated) boundary noise contribution + //convolution::truncate( the_tf_kernel, cutoff*lmin, 1e-8*alpha ); + convolution::truncate_sharp( the_tf_kernel, cutoff*lmin ); + *coarse = coarse_save; + //coarse_save.clear(); + coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2); + coarse->subtract_oct_mean(); + + convolution::perform( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + //convolution::perform_filtered( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + + coarse->copy_add_unpad( *delta.get_grid(levelmin+i) ); +#endif + +#if 0 + *coarse = coarse_save; + coarse->zero_boundary(); + coarse->subtract_oct_mean(); + convolution::perform( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + //coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2); + + coarse->upload_bnd_add( *delta.get_grid(levelmin+i-1) ); +#endif + + /**coarse = coarse_save; + coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2); + coarse->set_to_oct_mean(); + + convolution::perform( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + coarse->copy_subtract_unpad( *delta.get_grid(levelmin+i) );*/ + + + delete the_tf_kernel; + delete coarse; + } + + + coarse = fine; + } + + //... and convolution for finest grid (outside loop) + if( levelmax > levelmin ) + { + /**********************************************************************************************************\ + * multi-grid: finest sub-grid ..... + \**********************************************************************************************************/ + std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl; + + real_t dx,lx,ly,lz; + dx = boxlength/pow(2.0,levelmax); + + lx = dx * coarse->nx_; + ly = dx * coarse->ny_; + lz = dx * coarse->nz_; + real_t lmin = std::min( lx, std::min(ly,lz) ); + + conv_param.lx = lx; + conv_param.ly = ly; + conv_param.lz = lz; + conv_param.nx = coarse->nx_; + conv_param.ny = coarse->ny_; + conv_param.nz = coarse->nz_; + conv_param.normalize = false; + + //... with LR/SR splitting and full subgrid + PaddedDensitySubGrid coarse_save( *coarse ); + + convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param ); + //... 2) the inner region + + coarse->zero_boundary(); + convolution::perform( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + coarse->copy_add_unpad( *delta.get_grid(levelmax) ); + + + //... compute convolution for isolated grid + //... 1) the padded boundary region + + + + +#if 1 + convolution::truncate( the_tf_kernel, cutoff*lmin, alpha ); + + *coarse = coarse_save; + coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2); + coarse->subtract_oct_mean(); + + convolution::perform( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + coarse->copy_add_unpad( *delta.get_grid(levelmax) ); + +#endif +#if 0 + // coarse correction + *coarse = coarse_save; + coarse->zero_boundary(); + coarse->subtract_oct_mean(); + convolution::perform( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + //coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2); + + coarse->upload_bnd_add( *delta.get_grid(levelmax-1) ); + + // meshvar_bnd delta_longrange( *delta.get_grid(levelmax) ); + //coarse->copy_unpad( delta_longrange ); + //mg_straight().restrict_add( delta_longrange, *delta.get_grid(levelmax-1) ); +#endif + + + + /**coarse = coarse_save; + coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2); + coarse->set_to_oct_mean(); + + convolution::perform( the_tf_kernel, reinterpret_cast (coarse->get_data_ptr()) ); + coarse->copy_subtract_unpad( *delta.get_grid(levelmax) );*/ + + + delete the_tf_kernel; + delete coarse; + } + + //... clean up ...// + for( unsigned i=0; ilevelmax ) + if( randc[i] != NULL ) + { + delete randc[i]; + randc[i]=NULL; + } + } + + //... enforce mean condition + for( int i=levelmin; i<(int)levelmax; ++i ) + enforce_mean( (*delta.get_grid(i+1)), (*delta.get_grid(i)) ); + + + + + + + double sum; + +#if 0 + //... subtract the box mean.... this will otherwise add + //... a constant curvature term to the potential + sum = 0.0; + { + int nx,ny,nz; + + nx = delta.get_grid(levelmin)->size(0); + ny = delta.get_grid(levelmin)->size(1); + nz = delta.get_grid(levelmin)->size(2); + + for( int ix=0; ixsize(0); + ny = delta.get_grid(i)->size(1); + nz = delta.get_grid(i)->size(2); + + for( int ix=0; ix(int)levelmin; --i ) + for( int i=levelmax; i>0; --i ) + mg_straight().restrict( (*delta.get_grid(i)), (*delta.get_grid(i-1)) ); + +#if 1 + sum = 0.0; + { + int nx,ny,nz; + + nx = delta.get_grid(levelmin)->size(0); + ny = delta.get_grid(levelmin)->size(1); + nz = delta.get_grid(levelmin)->size(2); + + for( int ix=0; ixsize(0); + ny = delta.get_grid(i)->size(1); + nz = delta.get_grid(i)->size(2); + + for( int ix=0; ix. + + */ + +#ifndef __DENSITIES_HH +#define __DENSITIES_HH + + +#ifdef SINGLE_PRECISION + #ifdef SINGLETHREAD_FFTW + #include + #else + #include + #endif +#else + #ifdef SINGLETHREAD_FFTW + #include + #else + #include + #endif +#endif + +#include "config_file.hh" +#include "random.hh" +#include "cosmology.hh" +#include "transfer_function.hh" + + +void GenerateDensityHierarchy( config_file& cf, transfer_function *ptf, tf_type type, + refinement_hierarchy& refh, grid_hierarchy& delta ); + +void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type, + refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace=false ); + +/*! + * @class DensityGrid + * @brief provides infrastructure for computing the initial density field + * + * This class provides access and data management member functions that + * are used when computing the initial density field by convolution with + * transfer functions. + */ +template< typename real_t > +class DensityGrid +{ +public: + + int nx_; //!< number of grid cells in x-direction + int ny_; //!< number of grid cells in y-direction + int nz_; //!< number of grid cells in z-direction + int nzp_; //!< number of cells in memory (z-dir), used for Nyquist padding + + //! the actual data container in the form of a 1D array + std::vector< real_t > data_; + + //! constructor + /*! constructs an instance given the dimensions of the density field + * @params nx the number of cells in x + * @params ny the number of cells in y + * @params nz the number of cells in z + */ + DensityGrid( unsigned nx, unsigned ny, unsigned nz ) + : nx_(nx), ny_(ny), nz_(nz) + { + data_.assign(nx_*ny_*2*(nz_/2+1),0.0); + nzp_ = 2*(nz_/2+1); + } + + //! copy constructor + explicit DensityGrid( const DensityGrid & g ) + : nx_(g.nx_), ny_(g.ny_), nz_(g.nz_), nzp_(g.nzp_) + { + data_ = g.data_; + } + + //!destructor + ~DensityGrid() + { } + + //! clears the density object + /*! sets all dimensions to zero and frees the memory + */ + void clear( void ) + { + nx_ = ny_ = nz_ = nzp_ = 0; + data_.clear(); + std::vector().swap(data_); + } + + //! assigns the contents of another DensityGrid to this + DensityGrid& operator=( const DensityGrid& g ) + { + nx_ = g.nx_; + ny_ = g.ny_; + nz_ = g.nz_; + nzp_= g.nzp_; + data_ = g.data_; + + return *this; + } + + //! 3D index based data access operator + inline real_t& operator()( int i, int j, int k ) + { return data_[(i*ny_+j)*nzp_+k]; } + + //! 3D index based const data access operator + inline const real_t& operator()( int i, int j, int k ) const + { return data_[(i*ny_+j)*nzp_+k]; } + + //! recover the pointer to the 1D data array + inline real_t * get_data_ptr( void ) + { return &data_[0]; } + + + //! fills the density field with random number values + /*! given a pointer to a random_numbers object, fills the field with random values + * @params prc pointer to a random_numbers object + * @params variance the variance of the random numbers (the values returned by prc are multiplied by this) + * @params i0 x-offset (shift) in cells of the density field with respect to the random number field + * @params j0 y-offset (shift) in cells of the density field with respect to the random number field + * @params k0 z-offset (shift) in cells of the density field with respect to the random number field + * @params zero boolean, if true, the global mean will be subtracted + */ + void fill_rand( /*const*/ random_numbers* prc, real_t variance, int i0, int j0, int k0, bool zero=false ) + { + double sum = 0.0; + + #pragma omp parallel for reduction(+:sum) + for( int i=0; i + void copy( array3& v ) + { + for( int ix=0; ix<(int)nx_; ++ix ) + for( int iy=0; iy<(int)ny_; ++iy ) + for( int iz=0; iz<(int)nz_; ++iz ) + v(ix,iy,iz) = (*this)(ix,iy,iz); + + } + + //! adds the data from another field with 3D index-based access operator + template< class array3 > + void copy_add( array3& v ) + { + for( int ix=0; ix<(int)nx_; ++ix ) + for( int iy=0; iy<(int)ny_; ++iy ) + for( int iz=0; iz<(int)nz_; ++iz ) + v(ix,iy,iz) += (*this)(ix,iy,iz); + } + + //! subtract the mean of each oct of the field + /*! subtracting the mean of each oct implies that a restriction of the field to the coarser level + * is zero everywhere (needed for Hoffman-Ribak constraints) + */ + void subtract_oct_mean( void ) + { + for( int i=0; i=oxsub+lxsub) + || (iy=oysub+lysub) + || (iz=ozsub+lzsub) ) + (*this)(ix,iy,iz) = 0.0; + + } + } + + //! sets the field to zero outside of a rectangular box + void zero_but_subgrid( int oxsub, int oysub, int ozsub, int lxsub, int lysub, int lzsub ) + { + for( int ix=0; ix=oxsub+lxsub) + || (iy=oysub+lysub) + || (iz=ozsub+lzsub) ) + (*this)(ix,iy,iz) = 0.0; + + } + } + + //! sets the field to zero except on the boundary of a rectangular box + void zero_but_subgrid_bnd( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub ) + { + //... correct offsets for padding (not needed for top grid) + + // zero the subgrid + for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix ) + for( int iy=oysub; iy<(int)(oysub+lysub); ++iy ) + for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz ) + (*this)(ix,iy,iz) = 0.0; + + oxsub -= lxsub/2; + oysub -= lysub/2; + ozsub -= lzsub/2; + lxsub *= 2; + lysub *= 2; + lzsub *= 2; + + // zero the outside, except the boundary + //#pragma parallel nowait + { + + for( int ix=0; ix +class PaddedDensitySubGrid : public DensityGrid +{ +public: + using DensityGrid::nx_; + using DensityGrid::ny_; + using DensityGrid::nz_; + using DensityGrid::data_; + + using DensityGrid::fill_rand; + using DensityGrid::get_data_ptr; + + int ox_, oy_, oz_; +public: + + PaddedDensitySubGrid( int ox, int oy, int oz, unsigned nx, unsigned ny, unsigned nz) + : DensityGrid(2*nx,2*ny,2*nz), ox_(ox), oy_(oy), oz_(oz) + { + if( ox-nx/4 < 0 || oy-ny/4 < 0 || oz-nz/4 < 0 ) + throw std::runtime_error("subgrid extends across top grid"); + + //.. the size in top grid cells is nx/2,ny/2,nz/2, so padding starts at ox-nx/4... + //.. loop over relevant part of top grid and copy down grid values + /*for( int ix=ox-nx/4,ixs=0; ix& o ) + : DensityGrid( o ), ox_(o.ox_), oy_(o.oy_), oz_(o.oz_) + { } + + void zero_padded_subgrid( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub ) + { + //... correct offsets for padding (not needed for top grid) + + oxsub += nx_/4; + oysub += ny_/4; + ozsub += nz_/4; + + //... correct offsets for padding (not needed for top grid) + for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix ) + for( int iy=oysub; iy<(int)(oysub+lysub); ++iy ) + for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz ) + (*this)(ix,iy,iz) = 0.0; + + } + + void zero_subgrid( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub ) + { + //... correct offsets for padding (not needed for top grid) + + oxsub += nx_/4; + oysub += ny_/4; + ozsub += nz_/4; + + for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix ) + for( int iy=oysub; iy<(int)(oysub+lysub); ++iy ) + for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz ) + (*this)(ix,iy,iz) = 0.0; + + } + + void zero_but_subgrid_bnd( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub ) + { + //... correct offsets for padding (not needed for top grid) + + oxsub += nx_/4; + oysub += ny_/4; + ozsub += nz_/4; + + //lxsub += nx_/4; + //lysub += nx_/4; + //lzsub += nx_/4; + + // zero the subgrid + for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix ) + for( int iy=oysub; iy<(int)(oysub+lysub); ++iy ) + for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz ) + (*this)(ix,iy,iz) = 0.0; + + oxsub -= lxsub/2; + oysub -= lysub/2; + ozsub -= lzsub/2; + lxsub *= 2; + lysub *= 2; + lzsub *= 2; + + // zero the outside, except the boundary + //#pragma parallel block nowait + { + + for( int ix=0; ix " << nx_ << std::endl; + for( int ix=oxsub+lxsub; ix + void upload_bnd_add( array3& top ) + { + int ox=ox_-nx_/8, oy=oy_-ny_/8, oz=oz_-nz_/8; + int lx=ox+nx_/2, ly=oy+ny_/2, lz=oz+nz_/2; + ox = std::max(ox,0); + oy = std::max(oy,0); + oz = std::max(oz,0); + lx = std::min(lx,(int)top.size(0)); + ly = std::min(ly,(int)top.size(1)); + lz = std::min(lz,(int)top.size(2)); + + + if( ox < 0 || oy < 0 || oz < 0 ){ + + std::cerr << "offset = " << ox << ", " << oy << ", " << oz << std::endl; + std::cerr << "nx = " << nx_ << ", " << ny_ << ", " << nz_ << std::endl; + + throw std::runtime_error("Subgrid extends beyond top grid. Increase levelmin or reduce size of refinement region!"); + } + for( int ix=0,ixu=ox;ix=3*nx_/4) + || (iy=3*ny_/4) + || (iz=3*nz_/4) ) + { + top(ixu,iyu,izu) += 0.125* ((*this)(ix,iy,iz)+(*this)(ix+1,iy,iz)+(*this)(ix,iy+1,iz)+(*this)(ix,iy,iz+1) + +(*this)(ix+1,iy+1,iz)+(*this)(ix+1,iy,iz+1)+(*this)(ix,iy+1,iz+1)+(*this)(ix+1,iy+1,iz+1) ); + // std::cerr << "*"; + } + } + } + + void constrain( const DensityGrid& top ) + { + int ox=ox_-nx_/8, oy=oy_-ny_/8, oz=oz_-nz_/8; + + if( ox < 0 || oy < 0 || oz < 0 ){ + + std::cerr << "offset = " << ox << ", " << oy << ", " << oz << std::endl; + std::cerr << "nx = " << nx_ << ", " << ny_ << ", " << nz_ << std::endl; + + throw std::runtime_error("Subgrid extends beyond top grid. Increase levelmin or reduce size of refinement region!"); + } + + //...top grid is not padded + + //.. fine grid will be multiplied by sqrt(8) later to account for increase in variance + //.. so, we need to divide the coarse grid value by it + + real_t fac = 1.0/sqrt(8); + for( int ixs=0,ix=ox;ixs& top ) + { + ///... top grid is padded too... + //.. ox is shifted by nx_/4, padded overlap starts at -nx_/8 + int ox=ox_+top.nx_/4-nx_/8, oy=oy_+top.ny_/4-ny_/8, oz=oz_+top.nz_/4-nz_/8; + if( ox < 0 || oy < 0 || oz < 0 ) + throw std::runtime_error("subgrid extends beyond top grid"); + double fac = 1.0/sqrt(8.0); + for( int ixs=0,ix=ox;ixs + void upload_bnd( unsigned nbnd, array3& v ) + { + for( int ix=nx_/4-2*nbnd,ixu=-nbnd; ix<3*nx_/4+2*nbnd; ix+=2,++ixu) + for( int iy=ny_/4-2*nbnd,iyu=-nbnd; iy<3*ny_/4+2*nbnd; iy+=2,++iyu ) + for( int iz=nz_/4-2*nbnd,izu=-nbnd; iz<3*nz_/4+2*nbnd; iz+=2,++izu ) + { + if( (ix=3*nx_/4) + || (iy=3*ny_/4) + || (iz=3*nz_/4) ) + { + v(ixu+ox_,iyu+oy_,izu+oz_) + = 1.0; + } + } + } + + template< class array3 > + void copy_unpad( array3& v ) + { + for( int ix=nx_/4,ixu=0; ix<3*nx_/4; ++ix,++ixu ) + for( int iy=ny_/4,iyu=0; iy<3*ny_/4; ++iy,++iyu ) + for( int iz=nz_/4,izu=0; iz<3*nz_/4; ++iz,++izu ) + v(ixu,iyu,izu) = (*this)(ix,iy,iz); + } + + template< class array3 > + void copy_add_unpad( array3& v ) + { + for( int ix=nx_/4,ixu=0; ix<3*nx_/4; ++ix,++ixu ) + for( int iy=ny_/4,iyu=0; iy<3*ny_/4; ++iy,++iyu ) + for( int iz=nz_/4,izu=0; iz<3*nz_/4; ++iz,++izu ) + v(ixu,iyu,izu) += (*this)(ix,iy,iz); + } + + template< class array3 > + void copy_subtract_unpad( array3& v ) + { + for( int ix=nx_/4,ixu=0; ix<3*nx_/4; ++ix,++ixu ) + for( int iy=ny_/4,iyu=0; iy<3*ny_/4; ++iy,++iyu ) + for( int iz=nz_/4,izu=0; iz<3*nz_/4; ++iz,++izu ) + v(ixu,iyu,izu) -= (*this)(ix,iy,iz); + } + + void zero_boundary( void ) + { + for( int ix=0; ix +inline void enforce_coarse_mean( M& v, M& V ) +{ + double outersum =0.0, innersum = 0.0; + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + unsigned innercount = 0, outercount = 0; + for( int i=0; i ox && i < ox+nx) && + (j > oy && j < oy+ny) && + (k > oz && k < oz+nz )) + { + innersum += V(i,j,k); + ++innercount; + } + else + { + outersum += V(i,j,k); + ++outercount; + } + + } + innersum /= innercount; + outersum /= outercount; + + double dcorr = innersum * innercount / outercount; + + for( int i=0; i ox && i < ox+nx) && + (j > oy && j < oy+ny) && + (k > oz && k < oz+nz ))) + V(i,j,k) -= outersum + dcorr;//innersum; + } + +} + +template< typename M > +inline void enforce_mean( M& v, M& V ) +{ + + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + + double coarsemean = 0.0, finemean = 0.0; + unsigned count = 0; + #pragma omp parallel for reduction(+:coarsemean,finemean,count) + for( int i=0; i. + +*/ + +#ifndef __FD_SCHEMES_HH +#define __FD_SCHEMES_HH + +#include +#include + + +//... abstract implementation of the Poisson/Force scheme +template< class L, class G, typename real_t=double > +class scheme +{ +public: + typedef L laplacian; + typedef G gradient; + + laplacian m_laplacian; + gradient m_gradient; + + template< class C > + inline real_t grad_x( const C&c, const int i, const int j, const int k ) + { return m_gradient.apply_x( c,i,j,k ); } + + template< class C > + inline real_t grad_y( const C&c, const int i, const int j, const int k ) + { return m_gradient.apply_y( c,i,j,k ); } + + template< class C > + inline real_t grad_z( const C&c, const int i, const int j, const int k ) + { return m_gradient.apply_z( c,i,j,k ); } + + template< class C > + inline real_t L_apply( const C&c, const int i, const int j, const int k ) + { return m_laplacian.apply( c,i,j,k ); } + + template< class C > + inline real_t L_rhs( const C&c, const int i, const int j, const int k ) + { return m_laplacian.rhs( c,i,j,k ); } + + inline real_t ccoeff( void ) + { return m_laplacian.ccoeff(); } + +}; + + +template< int nextent, typename T > +class gradient +{ + typedef T real_t; + std::vector m_stencil; + const unsigned nl; +public: + + gradient() + : nl( 2*nextent+1 ) + { + m_stencil.assign(nl*nl*nl,(real_t)0.0); + } + + real_t& operator()(int i) + { return m_stencil[i+nextent]; } + + const real_t& operator()(int i) const + { return m_stencil[i+nextent]; } + + template< class C > + inline void apply( const C& c, C& f, int dir ) + { + f = c; + + int nx=c.size(0), ny=c.size(1), nz=c.size(2); + double hx = 1.0/(nx+1.0), hy = 1.0/(ny+1.0), hz = 1.0/(nz+1.0); + + f.zero(); + + if( dir == 0 ) + for( int i=0; i +class base_stencil +{ +protected: + std::vector m_stencil; + const unsigned nl; +public: + bool m_modsource; + +public: + base_stencil( bool amodsource = false ) + : nl( 2*nextent+1 ), m_modsource( amodsource ) + { + m_stencil.assign(nl*nl*nl,(real_t)0.0); + } + + real_t& operator()(int i, int j, int k) + { return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; } + + const real_t& operator()(unsigned i, unsigned j, unsigned k) const + { return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; } + + template< class C > + inline real_t rhs( const C& c, const int i, const int j, const int k ) + { + real_t sum = this->apply( c, i, j, k ); + sum -= (*this)(0,0,0) * c(i,j,k); + return sum; + } + + inline real_t ccoeff( void ) + { + return (*this)(0,0,0); + } + + + template< class C > + inline real_t apply( const C& c, const int i, const int j, const int k ) + { + real_t sum = 0.0; + + for( int ii=-nextent; ii<=nextent; ++ii ) + for( int jj=-nextent; jj<=nextent; ++jj ) + for( int kk=-nextent; kk<=nextent; ++kk ) + sum += (*this)(ii,jj,kk) * c(i+ii,j+jj,k+kk); + + return sum; + } + + template< class C > + inline real_t modsource( const C& c, const int i, const int j, const int k ) + { + return 0.0; + } + +}; + + +/***************************************************************************************/ +/***************************************************************************************/ +/***************************************************************************************/ + + +//... Implementation of the Gradient schemes............................................ + + +template< typename real_t > +class deriv_2P : public gradient<1,real_t> +{ + +public: + deriv_2P( void ) + { + (*this)( 0 ) = 0.0; + (*this)(-1 ) = -0.5; + (*this)(+1 ) = +0.5; + } + + +}; + +//... Implementation of the Laplacian schemes.......................................... + + +template< typename real_t > +class stencil_7P : public base_stencil<1,real_t> +{ + +public: + stencil_7P( void ) + { + (*this)( 0, 0, 0) = -6.0; + (*this)(-1, 0, 0) = +1.0; + (*this)(+1, 0, 0) = +1.0; + (*this)( 0,-1, 0) = +1.0; + (*this)( 0,+1, 0) = +1.0; + (*this)( 0, 0,-1) = +1.0; + (*this)( 0, 0,+1) = +1.0; + } + + template< class C > + inline real_t apply( const C& c, const int i, const int j, const int k ) const + { + return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k); + } + + template< class C > + inline real_t rhs( const C& c, const int i, const int j, const int k ) const + { + return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1); + } + + inline real_t ccoeff( void ) + { + return -6.0; + } +}; + + +template< typename real_t > +class stencil_13P : public base_stencil<2,real_t> +{ + +public: + stencil_13P( void ) + { + (*this)( 0, 0, 0) = -90.0/12.; + + (*this)(-1, 0, 0) = + (*this)(+1, 0, 0) = + (*this)( 0,-1, 0) = + (*this)( 0,+1, 0) = + (*this)( 0, 0,-1) = + (*this)( 0, 0,+1) = 16./12.; + + (*this)(-2, 0, 0) = + (*this)(+2, 0, 0) = + (*this)( 0,-2, 0) = + (*this)( 0,+2, 0) = + (*this)( 0, 0,-2) = + (*this)( 0, 0,+2) = -1./12.; + } + + template< class C > + inline real_t apply( const C& c, const int i, const int j, const int k ) + { + return + (-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2)) + +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)) + -90.0*c(i,j,k))/12.0; + } + + template< class C > + inline real_t rhs( const C& c, const int i, const int j, const int k ) + { + return + (-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2)) + +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)))/12.0; + } + + inline real_t ccoeff( void ) + { + return -90.0/12.0; + } +}; + +template< typename real_t > +class stencil_19P : public base_stencil<3,real_t> +{ + +public: + stencil_19P( void ) + { + (*this)( 0, 0, 0) = -1470./180.; + + (*this)(-1, 0, 0) = + (*this)(+1, 0, 0) = + (*this)( 0,-1, 0) = + (*this)( 0,+1, 0) = + (*this)( 0, 0,-1) = + (*this)( 0, 0,+1) = 270./180.; + + (*this)(-2, 0, 0) = + (*this)(+2, 0, 0) = + (*this)( 0,-2, 0) = + (*this)( 0,+2, 0) = + (*this)( 0, 0,-2) = + (*this)( 0, 0,+2) = -27./180.; + + (*this)(-3, 0, 0) = + (*this)(+3, 0, 0) = + (*this)( 0,-3, 0) = + (*this)( 0,+3, 0) = + (*this)( 0, 0,-3) = + (*this)( 0, 0,+3) = 2./180.; + + } + + template< class C > + inline real_t apply( const C& c, const int i, const int j, const int k ) + { + return + (2.0*(c(i-3,j,k)+c(i+3,j,k)+c(i,j-3,k)+c(i,j+3,k)+c(i,j,k-3)+c(i,j,k+3)) + -27.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2)) + +270.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)) + -1470.0*c(i,j,k))/180.0; + } + + template< class C > + inline real_t rhs( const C& c, const int i, const int j, const int k ) + { + return + (2.0*(c(i-3,j,k)+c(i+3,j,k)+c(i,j-3,k)+c(i,j+3,k)+c(i,j,k-3)+c(i,j,k+3)) + -27.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2)) + +270.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)))/180.0; + } + + inline real_t ccoeff( void ) + { + return -1470.0/180.0; + } + +}; + + +template< typename real_t > +class Laplace_flux_O4 +{ +public: + + //.. idir is -1 for left boundary, +1 for right boundary + template< class C > + inline double apply_x( int idir, const C& c, const int i, const int j, const int k ) + { + double fac = -((double)idir)/12.0; + return fac*(-c(i-2,j,k)+15.0*c(i-1,j,k)-15.0*c(i,j,k)+c(i+1,j,k)); + } + + template< class C > + inline double apply_y( int idir, const C& c, const int i, const int j, const int k ) + { + double fac = -((double)idir)/12.0; + return fac*(-c(i,j-2,k)+15.0*c(i,j-1,k)-15.0*c(i,j,k)+c(i,j+1,k)); + } + + template< class C > + inline double apply_z( int idir, const C& c, const int i, const int j, const int k ) + { + double fac = -((double)idir)/12.0; + return fac*(-c(i,j,k-2)+15.0*c(i,j,k-1)-15.0*c(i,j,k)+c(i,j,k+1)); + } + +}; + + +template< typename real_t > +class Laplace_flux_O6 +{ +public: + + //.. idir is -1 for left boundary, +1 for right boundary + template< class C > + inline double apply_x( int idir, const C& c, const int i, const int j, const int k ) + { + double fac = -((double)idir)/180.0; + return fac*(2.*c(i-3,j,k)-25.*c(i-2,j,k)+245.*c(i-1,j,k)-245.0*c(i,j,k)+25.*c(i+1,j,k)-2.*c(i+2,j,k)); + } + + template< class C > + inline double apply_y( int idir, const C& c, const int i, const int j, const int k ) + { + double fac = -((double)idir)/180.0; + return fac*(2.*c(i,j-3,k)-25.*c(i,j-2,k)+245.*c(i,j-1,k)-245.0*c(i,j,k)+25.*c(i,j+1,k)-2.*c(i,j+2,k)); + } + + template< class C > + inline double apply_z( int idir, const C& c, const int i, const int j, const int k ) + { + double fac = -((double)idir)/180.0; + return fac*(2.*c(i,j,k-3)-25.*c(i,j,k-2)+245.*c(i,j,k-1)-245.0*c(i,j,k)+25.*c(i,j,k+1)-2.*c(i,j,k+2)); + } + +}; + + + +#endif + + diff --git a/general.hh b/general.hh new file mode 100644 index 0000000..a7e498e --- /dev/null +++ b/general.hh @@ -0,0 +1,121 @@ +/* + + general.hh - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#ifndef __GENERAL_HH +#define __GENERAL_HH + + +#include + +#ifdef WITH_MPI + #ifdef MANNO + #include + #else + #include + #endif +#else +#include +#endif + +#ifdef SINGLE_PRECISION + #ifdef WITH_MPI + #include "srfftw_mpi.h" + #define MPI_TREAL MPI_FLOAT + #else + #include "srfftw.h" + #endif + typedef float real_t; +#else + #ifdef WITH_MPI + #include "drfftw_mpi.h" + #define MPI_TREAL MPI_DOUBLE + #else + #include "drfftw.h" + #endif + typedef double real_t; +#endif + +#include + + +#include "mesh.hh" +typedef GridHierarchy grid_hierarchy; +typedef MeshvarBnd meshvar_bnd; +typedef Meshvar meshvar; + + +/* Convenience from Numerical Recipes in C, 2nd edition */ +template< typename T > +inline T SQR( T a ){ + return a*a; +} + +template< typename T > +inline T CUBE( T a ){ + return a*a*a; +} + +template< typename T > +inline T POW4( T a ){ + return a*a*a*a; +} + + +// --- structure for cosmological parameters --- // +typedef struct cosmology{ + double + Omega_m, // baryon+dark matter density + Omega_b, // baryon matter density + Omega_L, // dark energy density + H0, // Hubble constant + nspect, // long-wave spectral index (scale free is nspect=1) + sigma8, // power spectrum normalization + //Gamma, // shape parameter (of historical interest, as a free parameter) + //fnl, // non-gaussian contribution parameter + //w0, // dark energy equation of state parameter (not implemented, i.e. =1 at the moment) + //wa, // dark energy equation of state parameter (not implemented, i.e. =1 at the moment) + dplus, + pnorm, + vfact, + WDMmass, + WDMg_x, + astart; // expansion factor a for which to generate initial conditions +}Cosmology; + +typedef struct { + unsigned levelmin, levelmax; + double boxlength; + std::vector offx,offy,offz,llx,lly,llz; +}Parameters; + + +inline double time_seconds( void ) +{ + #ifdef WITH_MPI + return MPI_Wtime(); + #else + return ((double) clock()) / CLOCKS_PER_SEC; + #endif +} + +#endif diff --git a/main.cc b/main.cc new file mode 100644 index 0000000..244ecb7 --- /dev/null +++ b/main.cc @@ -0,0 +1,605 @@ +/* + + main.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + + +#include +#include +#include +#include + +#include +#include +#include + + + +#include "general.hh" +#include "output.hh" + +#include "config_file.hh" + +#include "poisson.hh" +#include "mg_solver.hh" +#include "fd_schemes.hh" +#include "random.hh" +#include "densities.hh" + +#include "cosmology.hh" +#include "transfer_function.hh" + +#define THE_CODE_NAME "music!" +#define THE_CODE_VERSION "0.2a" + + +namespace music +{ + + struct framework + { + transfer_function *the_transfer_function; + //poisson_solver *the_poisson_solver; + config_file *the_config_file; + refinement_hierarchy *the_refinement_hierarchy; + }; + +} + +transfer_function *TransferFunction_real::ptf_ = NULL; +transfer_function *TransferFunction_k::ptf_ = NULL; + +real_t TransferFunction_real::nspec_ = -1.0; +real_t TransferFunction_k::nspec_ = -1.0; + +void splash(void) +{ + + std::cout + << "\n __ __ __ __ ______ __ ______ \n" + << " /\\ \"-./ \\ /\\ \\/\\ \\ /\\ ___\\ /\\ \\ /\\ ___\\ \n" + << " \\ \\ \\-./\\ \\ \\ \\ \\_\\ \\ \\ \\___ \\ \\ \\ \\ \\ \\ \\____ \n" + << " \\ \\_\\ \\ \\_\\ \\ \\_____\\ \\/\\_____\\ \\ \\_\\ \\ \\_____\\ \n" + << " \\/_/ \\/_/ \\/_____/ \\/_____/ \\/_/ \\/_____/ \n\n" + << " this is " << THE_CODE_NAME << " version " << THE_CODE_VERSION << "\n\n\n"; + + +} + +void modify_grid_for_TF( const refinement_hierarchy& rh_full, refinement_hierarchy& rh_TF, config_file& cf ) +{ + unsigned lbase, lbaseTF, lmax; + + lbase = cf.getValue( "setup", "levelmin" ); + lbaseTF = cf.getValueSafe( "setup", "levelminTF", lbase ); + lmax = cf.getValue( "setup", "levelmax" ); + + rh_TF = rh_full; + + unsigned pad = 4; + + for( unsigned i=lbase+1; i<=lmax; ++i ) + { + rh_TF.adjust_level(i, rh_TF.size(i,0)+2*pad, rh_TF.size(i,1)+2*pad, rh_TF.size(i,2)+2*pad, + rh_TF.offset_abs(i,0)-pad, rh_TF.offset_abs(i,1)-pad, rh_TF.offset_abs(i,2)-pad); + } + + if( lbaseTF > lbase ) + { + std::cout << " - Will use levelmin = " << lbaseTF << " to compute density field...\n"; + + for( unsigned i=lbase; i<=lbaseTF; ++i ) + { + unsigned nfull = (unsigned)pow(2,i); + rh_TF.adjust_level(i, nfull, nfull, nfull, 0, 0, 0); + } + } + + + +} + +void coarsen_density( const refinement_hierarchy& rh, grid_hierarchy& u ) +{ + for( unsigned i=1; i<=rh.levelmax(); ++i ) + { + if( rh.offset(i,0) != u.get_grid(i)->offset(0) + || rh.offset(i,1) != u.get_grid(i)->offset(1) + || rh.offset(i,2) != u.get_grid(i)->offset(2) + || rh.size(i,0) != u.get_grid(i)->size(0) + || rh.size(i,1) != u.get_grid(i)->size(1) + || rh.size(i,2) != u.get_grid(i)->size(2) ) + { + u.cut_patch(i, rh.offset_abs(i,0), rh.offset_abs(i,1), rh.offset_abs(i,2), + rh.size(i,0), rh.size(i,1), rh.size(i,2) ); + } + } +} + +void store_grid_structure( config_file& cf, const refinement_hierarchy& rh ) +{ + char str1[128], str2[128]; + for( unsigned i=rh.levelmin(); i<=rh.levelmax(); ++i ) + { + for( int j=0; j<3; ++j ) + { + sprintf(str1,"offset(%d,%d)",i,j); + sprintf(str2,"%d",rh.offset(i,j)); + cf.insertValue("setup",str1,str2); + + sprintf(str1,"size(%d,%d)",i,j); + sprintf(str2,"%d",rh.size(i,j)); + cf.insertValue("setup",str1,str2); + + } + } +} + +//... this is just for testing purposes ... +void compute_Lu( const grid_hierarchy& u, grid_hierarchy& Du ) +{ + Du = u; + + for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + { + double h = pow(2.0,ilevel); + h = h*h; + meshvar_bnd *pvar = Du.get_grid(ilevel); + + /*#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = -((*u.get_grid(ilevel))(ix+1,iy,iz)+(*u.get_grid(ilevel))(ix-1,iy,iz)+ + (*u.get_grid(ilevel))(ix,iy+1,iz)+(*u.get_grid(ilevel))(ix,iy-1,iz)+ + (*u.get_grid(ilevel))(ix,iy,iz+1)+(*u.get_grid(ilevel))(ix,iy,iz-1)- + 6.0*(*u.get_grid(ilevel))(ix,iy,iz))*h; + */ + + /*#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = -stencil_13P().apply(*u.get_grid(ilevel),ix,iy,iz)*h; + */ + + #pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = -stencil_19P().apply(*u.get_grid(ilevel),ix,iy,iz)*h; + } +} + +void subtract_finest_mean( grid_hierarchy& u ) +{ + std::cout << " - Subtracting component mean...\n"; + double sum = 0.0; + for( int ix = 0; ix < (int)(*u.get_grid(u.levelmax())).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(u.levelmax())).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(u.levelmax())).size(2); ++iz ) + sum += 0.5*(*u.get_grid(u.levelmax()))(ix,iy,iz); + + sum /= (*u.get_grid(u.levelmax())).size(0) + * (*u.get_grid(u.levelmax())).size(1) + * (*u.get_grid(u.levelmax())).size(2); + + for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + #pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*u.get_grid(ilevel))(ix,iy,iz) -= sum; + +} + + +/*****************************************************************************************************/ +/*****************************************************************************************************/ +/*****************************************************************************************************/ + + + +int main (int argc, const char * argv[]) +{ + const unsigned nbnd = 4; + + unsigned lbase, lmax, lbaseTF; + double err; + + cosmology cosmo; + double boxlength, zstart; + std::vector rngseeds; + std::vector rngfnames; + + double x0[3], lx[3]; + unsigned npad = 8; + + splash(); + if( argc != 2 ){ + std::cout << " This version is compiled with the following plug-ins:\n"; + + print_transfer_function_plugins(); + print_output_plugins(); + + std::cerr << "\n In order to run, you need to specify a parameter file!\n\n"; + exit(0); + } + + + /******************************************************************************************************/ + /* read and interpret config file *********************************************************************/ + /******************************************************************************************************/ + config_file cf(argv[1]); + std::string tfname,randfname,temp, outformat, outfname, poisson_solver_name;; + bool shift_back(false), align_top(false), kspace(false); + float tf0,tf1,tf2; + + + boxlength = cf.getValue( "setup", "boxlength" ); + lbase = cf.getValue( "setup", "levelmin" ); + lmax = cf.getValue( "setup", "levelmax" ); + lbaseTF = cf.getValueSafe( "setup", "levelminTF", lbase ); + + temp = cf.getValue( "setup", "ref_offset" ); + sscanf( temp.c_str(), "%g,%g,%g", &tf0, &tf1, &tf2 ); x0[0] = tf0; x0[1] = tf1; x0[2] = tf2; + + + temp = cf.getValue( "setup", "ref_extent" ); + sscanf( temp.c_str(), "%g,%g,%g", &tf0, &tf1, &tf2 ); lx[0] = tf0; lx[1] = tf1; lx[2] = tf2; + + npad = cf.getValue( "setup", "padding" ); + align_top = cf.getValueSafe( "setup", "align_top", false ); + + + kspace = cf.getValueSafe( "setup", "kspace", false ); + if( kspace ) + poisson_solver_name = std::string("fft_poisson"); + else + poisson_solver_name = std::string("mg_poisson"); + + // TODO: move cosmology parameters reading to cosmo_calc + zstart = cf.getValue( "setup", "zstart" ); + cosmo.astart = 1.0/(1.0+zstart); + cosmo.Omega_b = cf.getValue( "cosmology", "Omega_b" ); + cosmo.Omega_m = cf.getValue( "cosmology", "Omega_m" ); + cosmo.Omega_L = cf.getValue( "cosmology", "Omega_L" ); + cosmo.H0 = cf.getValue( "cosmology", "H0" ); + cosmo.sigma8 = cf.getValue( "cosmology", "sigma_8" ); + cosmo.nspect = cf.getValue( "cosmology", "nspec" ); + cosmo.WDMg_x = cf.getValueSafe( "cosmology", "WDMg_x", 1.5 ); + cosmo.WDMmass = cf.getValueSafe( "cosmology", "WDMmass", 0.0 ); + + //cosmo.Gamma = cf.getValueSafe( "cosmology", "Gamma", -1.0 ); + + /******************************************************************************************************/ + /******************************************************************************************************/ + + shift_back = cf.getValueSafe( "output", "shift_back", shift_back ); + outformat = cf.getValue( "output", "format" ); + outfname = cf.getValue( "output", "filename" ); + + + + /******************************************************************************************************/ + /******************************************************************************************************/ + /******************************************************************************************************/ + +#ifndef SINGLETHREAD_FFTW + fftw_threads_init(); +#endif + + transfer_function_plugin *the_transfer_function_plugin + = select_transfer_function_plugin( cf ); + + CosmoCalc ccalc(cosmo,the_transfer_function_plugin); + cosmo.pnorm = ccalc.ComputePNorm( 2.0*M_PI/boxlength ); + cosmo.dplus = ccalc.CalcGrowthFactor( cosmo.astart )/ccalc.CalcGrowthFactor( 1.0 ); + cosmo.vfact = ccalc.ComputeVFact( cosmo.astart ); + { + char tmpstr[128]; + sprintf(tmpstr,"%.12g",cosmo.pnorm); + cf.insertValue("cosmology","pnorm",tmpstr); + sprintf(tmpstr,"%.12g",cosmo.dplus); + cf.insertValue("cosmology","dplus",tmpstr); + sprintf(tmpstr,"%.12g",cosmo.vfact); + cf.insertValue("cosmology","vfact",tmpstr); + + } + + /******************************************************************************************************/ + /******************************************************************************************************/ + + bool + do_baryons = cf.getValue("setup","baryons"), + do_2LPT = cf.getValue("setup","use_2LPT"), + do_LLA = cf.getValue("setup","use_LLA"), + do_CVM = cf.getValueSafe("setup","center_velocities",false), + do_UVA = false;//FIXME: cf.getValue("setup","use_UVA"); + + + refinement_hierarchy rh_Poisson( cf ); + store_grid_structure(cf, rh_Poisson); + rh_Poisson.output(); + + + refinement_hierarchy rh_TF( rh_Poisson ); + modify_grid_for_TF( rh_Poisson, rh_TF, cf ); + //rh_TF.output(); + + + if( !the_transfer_function_plugin->tf_is_distinct() && do_baryons ) + std::cout << " - WARNING: The selected transfer function does not support\n" + << " distinct amplitudes for baryon and DM fields!\n" + << " Perturbation amplitudes will be identical!" << std::endl; + + + + output_plugin *the_output_plugin = select_output_plugin( cf ); + + + + poisson_plugin_creator *the_poisson_plugin_creator = get_poisson_plugin_map()[ poisson_solver_name ]; + poisson_plugin *the_poisson_solver = the_poisson_plugin_creator->create( cf ); + + //... THIS IS THE MAIN DRIVER BRANCHING TREE RUNNING THE VARIOUS PARTS OF THE CODE + bool bfatal = false; + try{ + if( ! do_2LPT ) + { + //.. don't use 2LPT ... + + if( !the_transfer_function_plugin->tf_is_distinct() || !do_baryons ) + { + std::cerr << " - Computing ICs from the total matter power spectrum..." << std::endl; + + grid_hierarchy f( nbnd ), u(nbnd); + + GenerateDensityHierarchy( cf, the_transfer_function_plugin, total , rh_TF, f ); + coarsen_density(rh_Poisson, f); + + u = f; u.zero(); + err = the_poisson_solver->solve(f, u); + + #if 0 + grid_hierarchy u2( nbnd ); + compute_Lu( u, u2 ); + u=u2; + #endif + + the_output_plugin->write_dm_density(f); + the_output_plugin->write_dm_mass(f); + + if( do_baryons ) + { + if( do_LLA ) + compute_LLA_density( u, f ); + + the_output_plugin->write_gas_density(f); + } + f.deallocate(); + + the_output_plugin->write_dm_potential(u); + + grid_hierarchy data_forIO(u); + for( int icoord = 0; icoord < 3; ++icoord ) + { + + //... displacement + the_poisson_solver->gradient(icoord, u, data_forIO ); + + if( do_CVM) + subtract_finest_mean( data_forIO ); + the_output_plugin->write_dm_position(icoord, data_forIO ); + + //... velocity + if( do_UVA ) + { + //FIXME: this still needs to be added + throw std::runtime_error("Updated Velocity Approximation not yet supported!"); + } + else + { + data_forIO *= cosmo.vfact; + the_output_plugin->write_dm_velocity(icoord, data_forIO); + } + + if( do_baryons ) + the_output_plugin->write_gas_velocity(icoord, data_forIO); + + } + + }else{ + + //... cdm density and displacements + grid_hierarchy f( nbnd ), u(nbnd); + + GenerateDensityHierarchy( cf, the_transfer_function_plugin, cdm , rh_TF, f ); + u = f; u.zero(); + + err = the_poisson_solver->solve(f, u); + + the_output_plugin->write_dm_density(f); + the_output_plugin->write_dm_mass(f); + f.deallocate(); + + the_output_plugin->write_dm_potential(u); + + + //... DM displacements + { + grid_hierarchy data_forIO(u); + for( int icoord = 0; icoord < 3; ++icoord ) + { + //... displacement + the_poisson_solver->gradient(icoord, u, data_forIO ); + the_output_plugin->write_dm_position(icoord, data_forIO ); + } + } + + //... gas density + GenerateDensityHierarchy( cf, the_transfer_function_plugin, baryon , rh_TF, f ); + + if( do_LLA ) + { + u = f; u.zero(); + err = the_poisson_solver->solve(f, u); + compute_LLA_density( u, f ); + } + + the_output_plugin->write_gas_density(f); + + //... velocities + if( do_UVA ) + { + //... need to co-add potentials... + throw std::runtime_error("Updated Velocity Approximation not yet supported!"); + } + else + { + GenerateDensityHierarchy( cf, the_transfer_function_plugin, total , rh_TF, f ); + u = f; u.zero(); + err = the_poisson_solver->solve(f, u); + + grid_hierarchy data_forIO(u); + for( int icoord = 0; icoord < 3; ++icoord ) + { + //... displacement + the_poisson_solver->gradient(icoord, u, data_forIO ); + data_forIO *= cosmo.vfact; + the_output_plugin->write_dm_velocity(icoord, data_forIO); + the_output_plugin->write_gas_velocity(icoord, data_forIO); + } + } + } + + }else { + //.. use 2LPT ... + if( !the_transfer_function_plugin->tf_is_distinct() || !do_baryons ) + { + grid_hierarchy f( nbnd ), u1(nbnd), u2(nbnd); + + GenerateDensityHierarchy( cf, the_transfer_function_plugin, total , rh_TF, f ); + u1 = f; u1.zero(); + + the_output_plugin->write_dm_density(f); + the_output_plugin->write_dm_mass(f); + + //... compute 1LPT term + err = the_poisson_solver->solve(f, u1); + + the_output_plugin->write_dm_potential(u1); + + //... compute 2LPT term + u2 = f; u2.zero(); + + compute_2LPT_source(u1, f); + err = the_poisson_solver->solve(f, u2); + u2 *= 3.0/7.0; + + + grid_hierarchy data_forIO(u1); + for( int icoord = 0; icoord < 3; ++icoord ) + { + + //... displacement + the_poisson_solver->gradient(icoord, u1, data_forIO ); + the_poisson_solver->gradient(icoord, u2, f ); + + //... co-add for the displacement + data_forIO += f; + the_output_plugin->write_dm_position(icoord, data_forIO ); + + //... velocity + if( do_UVA ) + { + throw std::runtime_error("Updated Velocity Approximation not yet supported!"); + } + else + { + //... for velocities, the 2LPT term has a factor of 2, so add a second time + data_forIO += f; + data_forIO *= cosmo.vfact; + + the_output_plugin->write_dm_velocity(icoord, data_forIO); + } + + if( do_baryons ) + the_output_plugin->write_gas_velocity(icoord, data_forIO); + } + + //... + u1 += u2; + + if( do_baryons ) + { + if( do_LLA ) + compute_LLA_density( u1, f ); + else + compute_Lu_density( u1, f ); + + the_output_plugin->write_gas_density(f); + } + + } + else + { + throw std::runtime_error("2LPT for distinct baryon/CDM not implemented yet."); + } + + } + the_output_plugin->finalize(); + }catch(...) + { + std::cerr << " - A fatal error occured. We need to exit...\n"; + bfatal; + } + + //... clean up + delete the_output_plugin; + + std::cout << "-------------------------------------------------------------\n"; + + if( !bfatal ) + std::cout << " - Wrote output file \'" << outfname << "\'\n using plugin \'" << outformat << "\'...\n"; + + delete the_transfer_function_plugin; + delete the_poisson_solver; + + /** we are done ! **/ + std::cout << " - Done!" << std::endl << std::endl; + + + ///*****************************************/// + + std::string save_fname(std::string(argv[1])+std::string("_stats")); + std::ofstream ofs(save_fname.c_str()); + time_t ltime=time(NULL); + + ofs << "Parameter dump for the run on " << asctime( localtime(<ime) ); + ofs << "You ran " << THE_CODE_NAME << " version " << THE_CODE_VERSION << std::endl << std::endl; + + cf.dump( ofs ); + + + + return 0; +} diff --git a/mesh.hh b/mesh.hh new file mode 100644 index 0000000..56de243 --- /dev/null +++ b/mesh.hh @@ -0,0 +1,1181 @@ +/* + + mesh.hh - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#ifndef __MESH_HH +#define __MESH_HH + +#include +#include +#include +#include + +#include + +#include "config_file.hh" + + +//! base class for all things that have rectangular mesh structure +template +class Meshvar{ +public: + typedef T real_t; + + unsigned + m_nx, //!< x-extent of the rectangular mesh + m_ny, //!< y-extent of the rectangular mesh + m_nz; //!< z-extent of the rectangular mesh + + unsigned + m_offx, //!< x-offset of the grid (just as a helper, not used inside the class) + m_offy, //!< y-offset of the grid (just as a helper, not used inside the class) + m_offz; //!< z-offset of the grid (just as a helper, not used inside the class) + + real_t * m_pdata; //!< pointer to the dynamic data array + + //! constructor for cubic mesh + explicit Meshvar( unsigned n, unsigned offx, unsigned offy, unsigned offz ) + : m_nx( n ), m_ny( n ), m_nz( n ), m_offx( offx ), m_offy( offy ), m_offz( offz ) + { + m_pdata = new real_t[m_nx*m_ny*m_nz]; + } + + //! constructor for rectangular mesh + Meshvar( unsigned nx, unsigned ny, unsigned nz, unsigned offx, unsigned offy, unsigned offz ) + : m_nx( nx ), m_ny( ny ), m_nz( nz ), m_offx( offx ), m_offy( offy ), m_offz( offz ) + { + m_pdata = new real_t[m_nx*m_ny*m_nz]; + } + + //! variant copy constructor with optional copying of the actual data + Meshvar( const Meshvar& m, bool copy_over=true ) + { + m_nx = m.m_nx; + m_ny = m.m_ny; + m_nz = m.m_nz; + + m_offx = m.m_offx; + m_offy = m.m_offy; + m_offz = m.m_offz; + + m_pdata = new real_t[m_nx*m_ny*m_nz]; + + if( copy_over ) + for( unsigned i=0; i& m ) + { + m_nx = m.m_nx; + m_ny = m.m_ny; + m_nz = m.m_nz; + + m_offx = m.m_offx; + m_offy = m.m_offy; + m_offz = m.m_offz; + + m_pdata = new real_t[m_nx*m_ny*m_nz]; + + for( unsigned i=0; i& operator*=( real_t x ) + { + for( unsigned i=0; i& operator+=( real_t x ) + { + for( unsigned i=0; i& operator/=( real_t x ) + { + for( unsigned i=0; i& operator-=( real_t x ) + { + for( unsigned i=0; i& operator*=( const Meshvar& v ) + { + if( v.m_nx*v.m_ny*v.m_nz != m_nx*m_ny*m_nz ) + throw std::runtime_error("Meshvar::operator*= : attempt to operate on incompatible data"); + + for( unsigned i=0; i& operator/=( const Meshvar& v ) + { + if( v.m_nx*v.m_ny*v.m_nz != m_nx*m_ny*m_nz ) + throw std::runtime_error("Meshvar::operator/= : attempt to operate on incompatible data"); + + for( unsigned i=0; i& operator+=( const Meshvar& v ) + { + if( v.m_nx*v.m_ny*v.m_nz != m_nx*m_ny*m_nz ) + throw std::runtime_error("Meshvar::operator+= : attempt to operate on incompatible data"); + + for( unsigned i=0; i& operator-=( const Meshvar& v ) + { + if( v.m_nx*v.m_ny*v.m_nz != m_nx*m_ny*m_nz ) + throw std::runtime_error("Meshvar::operator-= : attempt to operate on incompatible data"); + + for( unsigned i=0; i& operator=( const Meshvar& m ) + { + m_nx = m.m_nx; + m_ny = m.m_ny; + m_nz = m.m_nz; + + m_offx = m.m_offx; + m_offy = m.m_offy; + m_offz = m.m_offz; + + if( m_pdata != NULL ) + delete m_pdata; + + m_pdata = new real_t[m_nx*m_ny*m_nz]; + + for( unsigned i=0; i +class MeshvarBnd : public Meshvar< T >{ + using Meshvar::m_nx; + using Meshvar::m_ny; + using Meshvar::m_nz; + using Meshvar::m_pdata; + + + +public: + typedef T real_t; + unsigned m_nbnd; + + //! most general constructor + MeshvarBnd( unsigned nbnd, unsigned nx, unsigned ny, unsigned nz, unsigned xoff, unsigned yoff, unsigned zoff ) + : Meshvar( nx+2*nbnd, ny+2*nbnd, nz+2*nbnd, xoff, yoff, zoff ), m_nbnd( nbnd ) + { } + + //! zero-offset constructor + MeshvarBnd( unsigned nbnd, unsigned nx, unsigned ny, unsigned nz ) + : Meshvar( nx+2*nbnd, ny+2*nbnd, nz+2*nbnd, 0, 0, 0 ), m_nbnd( nbnd ) + { } + + //! constructor for cubic meshes + MeshvarBnd( unsigned nbnd, unsigned n, unsigned xoff, unsigned yoff, unsigned zoff ) + : Meshvar( n+2*nbnd, xoff, yoff, zoff ), m_nbnd( nbnd ) + { } + + //! constructor for cubic meshes with zero offset + MeshvarBnd( unsigned nbnd, unsigned n ) + : Meshvar( n+2*nbnd, 0, 0, 0 ), m_nbnd( nbnd ) + { } + + //! modified copy constructor, allows to avoid copying actual data + MeshvarBnd( const MeshvarBnd& v, bool copyover ) + : Meshvar( v, copyover ), m_nbnd( v.m_nbnd ) + { } + + //! copy constructor + explicit MeshvarBnd( const MeshvarBnd& v ) + : Meshvar( v, true ), m_nbnd( v.m_nbnd ) + { } + + //! get extent of the mesh along a specified dimension + inline unsigned size( unsigned dim=0 ) const + { + if( dim == 0 ) return m_nx-2*m_nbnd; + if( dim == 1 ) return m_ny-2*m_nbnd; + return m_nz-2*m_nbnd; + } + + //! 3D random access to the data block via index 3-tuples + inline real_t& operator()(const int ix, const int iy, const int iz ) + { + int iix(ix+m_nbnd), iiy(iy+m_nbnd), iiz(iz+m_nbnd); + return m_pdata[ (iix*m_ny+iiy)*m_nz + iiz ]; + } + + //! 3D random access to the data block via index 3-tuples (const) + inline const real_t& operator()(const int ix, const int iy, const int iz ) const + { + int iix(ix+m_nbnd), iiy(iy+m_nbnd), iiz(iz+m_nbnd); + return m_pdata[ (iix*m_ny+iiy)*m_nz + iiz ]; + } + + //! assignment operator for rectangular meshes with ghost zones + MeshvarBnd& operator=( const MeshvarBnd& m ) + { + this->m_nx = m.m_nx; + this->m_ny = m.m_ny; + this->m_nz = m.m_nz; + + if( m_pdata != NULL ) + delete[] m_pdata; + + m_pdata = new real_t[m_nx*m_ny*m_nz]; + + for( unsigned i=0; im_pdata[i] = m.m_pdata[i]; + + return *this; + } + + //! sets the value of all ghost zones to zero + void zero_bnd( void ) + { + + int nx,ny,nz; + nx = this->size(0); + ny = this->size(1); + nz = this->size(2); + + + for( int j=-m_nbnd; jsize(0) << ", " << this->size(1) << ", " << this->size(2) << "]\n"; + std::cout << "ghost region has length of " << nbnd << std::endl; + + std::cout.precision(3); + for(int i=-nbnd; i<(int)this->size(0)+nbnd; ++i ) + { + std::cout << "ix = " << i << ": \n"; + + for (int j=-nbnd; j<(int)this->size(1)+nbnd; ++j) { + for (int k=-nbnd; k<(int)this->size(2)+nbnd; ++k) { + if( i<0||i>=this->size(0)||j<0||j>=this->size(1)||k<0||k>=this->size(2)) + std::cout << "[" << std::setw(6) << (*this)(i,j,k) << "] "; + else + std::cout << std::setw(8) << (*this)(i,j,k) << " "; + } + std::cout << std::endl; + } + + std::cout << std::endl; + + } + } +}; + + + +//! class that subsumes a nested grid collection +template< typename T > +class GridHierarchy +{ +public: + + //! number of ghost cells on boundary + unsigned m_nbnd; + + //! highest level without adaptive refinement + unsigned m_levelmin; + + //! vector of pointers to the underlying rectangular mesh data for each level + std::vector< MeshvarBnd* > m_pgrids; + + std::vector + m_xoffabs, //!< vector of x-offsets of a level mesh relative to the coarser level + m_yoffabs, //!< vector of x-offsets of a level mesh relative to the coarser level + m_zoffabs; //!< vector of x-offsets of a level mesh relative to the coarser level + +protected: + + //! check whether a given grid has identical hierarchy, dimensions to this + bool is_consistent( const GridHierarchy& gh ) + { + if( gh.levelmax()!=levelmax() ) + return false; + + if( gh.levelmin()!=levelmin() ) + return false; + + for( unsigned i=levelmin(); i<=levelmax(); ++i ) + for( int j=0; j<3; ++j ) + { + if( size(i,j) != gh.size(i,j) ) + return false; + if( offset(i,j) != gh.offset(i,j) ) + return false; + } + + return true; + } + +public: + + + //! return a pointer to the MeshvarBnd object representing data for one level + MeshvarBnd *get_grid( unsigned ilevel ) + { + + if( ilevel >= m_pgrids.size() ) + { + std::cerr << "Attempt to access level " << ilevel << " but maxlevel = " << m_pgrids.size()-1 << std::endl; + throw std::runtime_error("Fatal: attempt to access non-existent grid"); + } + return m_pgrids[ilevel]; + } + + //! return a pointer to the MeshvarBnd object representing data for one level (const) + const MeshvarBnd *get_grid( unsigned ilevel ) const + { + if( ilevel >= m_pgrids.size() ) + { + std::cerr << "Attempt to access level " << ilevel << " but maxlevel = " << m_pgrids.size()-1 << std::endl; + throw std::runtime_error("Fatal: attempt to access non-existent grid"); + } + + return m_pgrids[ilevel]; + } + + + //! constructor for a collection of rectangular grids representing a multi-level hierarchy + /*! creates an empty hierarchy, levelmin is initially zero, no grids are stored + * @param nbnd number of ghost zones added at the boundary + */ + explicit GridHierarchy( unsigned nbnd ) + : m_nbnd( nbnd ), m_levelmin( 0 ) + { + m_pgrids.clear(); + } + + //! copy constructor + explicit GridHierarchy( const GridHierarchy & gh ) + { + for( unsigned i=0; i<=gh.levelmax(); ++i ) + m_pgrids.push_back( new MeshvarBnd( *gh.get_grid(i) ) ); + + m_nbnd = gh.m_nbnd; + m_levelmin = gh.m_levelmin; + + m_xoffabs = gh.m_xoffabs; + m_yoffabs = gh.m_yoffabs; + m_zoffabs = gh.m_zoffabs; + } + + //! destructor + ~GridHierarchy() + { + this->deallocate(); + + } + + //! free all memory occupied by the grid hierarchy + void deallocate() + { + for( unsigned i=0; ioffset(idim); + } + + //! get size of a grid at specified refinement level + /*! the size describes the number of cells along one dimension of a grid + * @param ilevel the level for which the size is to be determined + * @param idim the dimension along which the size is to be determined + * @return integer value denoting the size of refinement grid at level ilevel along dimension idim + */ + int size( int ilevel, int idim ) const + { + return m_pgrids[ilevel]->size(idim); + } + + + //! get the absolute offset of a grid at specified refinement level + /*! the absolute offset describes the shift of a refinement grid with respect to the simulation volume + * @param ilevel the level for which the offset is to be determined + * @param idim the dimension along which the offset is to be determined + * @return integer value denoting the absolute offset in units of fine grid cells + * @sa offset + */ + int offset_abs( int ilevel, int idim ) const + { + if( idim == 0 ) return m_xoffabs[ilevel]; + if( idim == 1 ) return m_yoffabs[ilevel]; + return m_zoffabs[ilevel]; + } + + + //! get the coordinate posisition of a grid cell + /*! returns the position of a grid cell at specified level relative to the simulation volume + * @param ilevel the refinement level of the grid cell + * @param i the x-index of the cell in the level grid + * @param j the y-index of the cell in the level grid + * @param k the z-index of the cell in the level grid + * @param ppos pointer to a double[3] array to which the coordinates are written + * @return none + */ + void cell_pos( unsigned ilevel, int i, int j, int k, double* ppos ) const + { + double h = 1.0/pow(2,ilevel);//, htop = h*2.0; + ppos[0] = h*((double)offset_abs(ilevel,0)+(double)i+0.5); + ppos[1] = h*((double)offset_abs(ilevel,1)+(double)j+0.5); + ppos[2] = h*((double)offset_abs(ilevel,2)+(double)k+0.5); + + if( ppos[0] >= 1.0 || ppos[1] >= 1.0 || ppos[2] >= 1.0 ) + std::cerr << " - Cell seems outside domain! : (" << ppos[0] << ", " << ppos[1] << ", " << ppos[2] << "\n"; + } + + //! checks whether a given grid cell is refined + /*! a grid cell counts as refined if it is divided into 8 cells at the next higher level + * @param ilevel the refinement level of the grid cell + * @param i the x-index of the cell in the level grid + * @param j the y-index of the cell in the level grid + * @param k the z-index of the cell in the level grid + * @return true if cell is refined, false otherwise + */ + bool is_refined( unsigned ilevel, int i, int j, int k ) const + { + if( ilevel == levelmax() ) return false; + + if( i < offset(ilevel+1,0) || i >= offset(ilevel+1, 0)+size(ilevel+1,0)/2 || + j < offset(ilevel+1,1) || j >= offset(ilevel+1, 1)+size(ilevel+1,1)/2 || + k < offset(ilevel+1,2) || k >= offset(ilevel+1, 2)+size(ilevel+1,2)/2 ) + return false; + + return true; + } + + //! sets the values of all grids on all levels to zero + void zero( void ) + { + for( unsigned i=0; izero(); + } + + + //! count the number of cells that are not further refined (=leafs) + /*! for allocation purposes it is useful to query the number of cells to be expected + * @param lmin the minimum refinement level to consider + * @param lmax the maximum refinement level to consider + * @return the integer number of cells between lmin and lmax that are not further refined + */ + unsigned count_leaf_cells( unsigned lmin, unsigned lmax ) const + { + unsigned npcount = 0; + + for( int ilevel=lmax; ilevel>=(int)lmin; --ilevel ) + for( unsigned i=0; isize(0); ++i ) + for( unsigned j=0; jsize(1); ++j ) + for( unsigned k=0; ksize(2); ++k ) + if( ! is_refined(ilevel,i,j,k) ) + ++npcount; + + return npcount; + } + + //! count the number of cells that are not further refined (=leafs) + /*! for allocation purposes it is useful to query the number of cells to be expected + * @return the integer number of cells in the hierarchy that are not further refined + */ + unsigned count_leaf_cells( void ) const + { + return count_leaf_cells( levelmin(), levelmax() ); + } + + //! creates a hierarchy of coextensive grids, refined by factors of 2 + /*! creates a hierarchy of lmax grids, each extending over the whole simulation volume with + * grid length 2^n for level 0<=n<=lmax + * @param lmax the maximum refinement level to be added (sets the resolution to 2^lmax for each dim) + * @return none + */ + void create_base_hierarchy( unsigned lmax ) + { + unsigned n=1; + m_pgrids.clear(); + for( unsigned i=0; i<= lmax; ++i ) + { + //std::cout << "....adding level " << i << " (" << n << ", " << n << ", " << n << ")" << std::endl; + m_pgrids.push_back( new MeshvarBnd( m_nbnd, n, n, n, 0, 0, 0 ) ); + m_pgrids[i]->zero(); + n *= 2; + + m_xoffabs.push_back( 0 ); + m_yoffabs.push_back( 0 ); + m_zoffabs.push_back( 0 ); + } + + m_levelmin = lmax; + } + + GridHierarchy& operator*=( T x ) + { + for( unsigned i=0; i& operator/=( T x ) + { + for( unsigned i=0; i& operator+=( T x ) + { + for( unsigned i=0; i& operator-=( T x ) + { + for( unsigned i=0; i& operator*=( const GridHierarchy& gh ) + { + if( !is_consistent(gh) ) + throw std::runtime_error("GridHierarchy::operator*= : attempt to operate on incompatible data"); + + for( unsigned i=0; i& operator/=( const GridHierarchy& gh ) + { + if( !is_consistent(gh) ) + throw std::runtime_error("GridHierarchy::operator/= : attempt to operate on incompatible data"); + + for( unsigned i=0; i& operator+=( const GridHierarchy& gh ) + { + if( !is_consistent(gh) ) + throw std::runtime_error("GridHierarchy::operator+= : attempt to operate on incompatible data"); + + for( unsigned i=0; i& operator-=( const GridHierarchy& gh ) + { + if( !is_consistent(gh) ) + throw std::runtime_error("GridHierarchy::operator-= : attempt to operate on incompatible data"); + + for( unsigned i=0; i( m_nbnd, nx, ny, nz, xoff, yoff, zoff ) ); + m_pgrids.back()->zero(); + + //.. add absolute offsets (in units of current level grid cells) + m_xoffabs.push_back( 2*(m_xoffabs.back() + xoff) ); + m_yoffabs.push_back( 2*(m_yoffabs.back() + yoff) ); + m_zoffabs.push_back( 2*(m_zoffabs.back() + zoff) ); + } + + void cut_patch( unsigned ilevel, unsigned xoff, unsigned yoff, unsigned zoff, unsigned nx, unsigned ny, unsigned nz) + { + + unsigned dx,dy,dz,dxtop,dytop,dztop; + dx = xoff-m_xoffabs[ilevel]; + dy = yoff-m_yoffabs[ilevel]; + dz = zoff-m_zoffabs[ilevel]; + + dxtop = m_pgrids[ilevel]->offset(0)+dx/2; + dytop = m_pgrids[ilevel]->offset(1)+dy/2; + dztop = m_pgrids[ilevel]->offset(2)+dz/2; + + + MeshvarBnd *mnew = new MeshvarBnd( m_nbnd, nx, ny, nz, dxtop, dytop, dztop ); + + + //... copy data + for( unsigned i=0; ioffset(0) -= dx;//xoff; + m_pgrids[ilevel+1]->offset(1) -= dy;//yoff; + m_pgrids[ilevel+1]->offset(2) -= dz;//zoff; + } + + find_new_levelmin(); + } + + void find_new_levelmin( void ) + { + for( unsigned i=0; i<=levelmax(); ++i ) + { + unsigned n = (unsigned)pow(2,i); + if( m_pgrids[i]->size(0) == n && + m_pgrids[i]->size(1) == n && + m_pgrids[i]->size(2) == n ) + { + m_levelmin=i; + } + } + } + + unsigned levelmax( void ) const + { + return m_pgrids.size()-1; + } + + unsigned levelmin( void ) const + { + return m_levelmin; + } + + GridHierarchy& operator=( const GridHierarchy& gh ) + { + for( unsigned i=0; i( *gh.get_grid(i) ) ); + m_levelmin = gh.levelmin(); + m_nbnd = gh.m_nbnd; + + m_xoffabs = gh.m_xoffabs; + m_yoffabs = gh.m_yoffabs; + m_zoffabs = gh.m_zoffabs; + + + return *this; + } +}; + +//! class that computes the refinement structure given parameters +class refinement_hierarchy +{ + std::vector x0_,y0_,z0_,xl_,yl_,zl_; + std::vector ox_,oy_,oz_,oax_,oay_,oaz_; + std::vector nx_,ny_,nz_; + unsigned levelmin_, levelmax_, padding_; + config_file& cf_; + bool align_top_; + float x0ref_[3], lxref_[3]; + int xshift_[3]; + +public: + + refinement_hierarchy( const refinement_hierarchy& rh ) + : cf_( rh.cf_ ) + { + *this = rh; + } + + explicit refinement_hierarchy( config_file& cf ) + : cf_( cf ) + { + //... query the parameter data we need + levelmin_ = cf_.getValue("setup","levelmin"); + levelmax_ = cf_.getValue("setup","levelmax"); + padding_ = cf_.getValue("setup","padding"); + align_top_ = cf_.getValue("setup","align_top"); + + std::string temp; + + temp = cf_.getValue( "setup", "ref_offset" ); + sscanf( temp.c_str(), "%g,%g,%g", &x0ref_[0], &x0ref_[1], &x0ref_[2] ); + + temp = cf_.getValue( "setup", "ref_extent" ); + sscanf( temp.c_str(), "%g,%g,%g", &lxref_[0],&lxref_[1],&lxref_[2] ); + + unsigned ncoarse = (unsigned)pow(2,levelmin_); + + //... determine shift + + double xc[3]; + xc[0] = fmod(x0ref_[0]+0.5*lxref_[0],1.0); + xc[1] = fmod(x0ref_[1]+0.5*lxref_[1],1.0); + xc[2] = fmod(x0ref_[2]+0.5*lxref_[2],1.0); + xshift_[0] = (int)((0.5-xc[0])*ncoarse); + xshift_[1] = (int)((0.5-xc[1])*ncoarse); + xshift_[2] = (int)((0.5-xc[2])*ncoarse); + + char strtmp[32]; + sprintf( strtmp, "%d", xshift_[0] ); cf_.insertValue( "setup", "shift_x", strtmp ); + sprintf( strtmp, "%d", xshift_[1] ); cf_.insertValue( "setup", "shift_y", strtmp ); + sprintf( strtmp, "%d", xshift_[2] ); cf_.insertValue( "setup", "shift_z", strtmp ); + + + x0ref_[0] += (double)xshift_[0]/ncoarse; + x0ref_[1] += (double)xshift_[1]/ncoarse; + x0ref_[2] += (double)xshift_[2]/ncoarse; + + + //... initialize arrays + x0_.assign(levelmax_+1,0.0); xl_.assign(levelmax_+1,1.0); + y0_.assign(levelmax_+1,0.0); yl_.assign(levelmax_+1,1.0); + z0_.assign(levelmax_+1,0.0); zl_.assign(levelmax_+1,1.0); + ox_.assign(levelmax_+1,0); nx_.assign(levelmax_+1,0); + oy_.assign(levelmax_+1,0); ny_.assign(levelmax_+1,0); + oz_.assign(levelmax_+1,0); nz_.assign(levelmax_+1,0); + + oax_.assign(levelmax_+1,0); + oay_.assign(levelmax_+1,0); + oaz_.assign(levelmax_+1,0); + + + nx_[levelmin_] = ncoarse; + ny_[levelmin_] = ncoarse; + nz_[levelmin_] = ncoarse; + + + //... determine the position of the refinement region on the finest grid + int il,jl,kl,ir,jr,kr; + int nresmax = (int)pow(2,levelmax_); + il = (int)(x0ref_[0] * nresmax); + jl = (int)(x0ref_[1] * nresmax); + kl = (int)(x0ref_[2] * nresmax); + ir = (int)((x0ref_[0]+lxref_[0]) * nresmax + 1.0); + jr = (int)((x0ref_[0]+lxref_[0]) * nresmax + 1.0); + kr = (int)((x0ref_[0]+lxref_[0]) * nresmax + 1.0); + + //... align with coarser grids ... + if( align_top_ ) + { + //... require alignment with top grid + unsigned nref = (unsigned)pow(2,levelmax_-levelmin_)*2; + + il = (int)((double)il/nref)*nref; + jl = (int)((double)jl/nref)*nref; + kl = (int)((double)kl/nref)*nref; + + ir = (int)((double)ir/nref+1.0)*nref; + jr = (int)((double)jr/nref+1.0)*nref; + kr = (int)((double)kr/nref+1.0)*nref; + + }else{ + //... require alignment with coarser grid + il -= il%2; jl -= jl%2; kl -= kl%2; + ir += ir%2; jr += jr%2; kr += kr%2; + } + + if( levelmin_ != levelmax_ ) + { + oax_[levelmax_] = (il+nresmax)%nresmax; + oay_[levelmax_] = (jl+nresmax)%nresmax; + oaz_[levelmax_] = (kl+nresmax)%nresmax; + nx_[levelmax_] = ir-il; + ny_[levelmax_] = jr-jl; + nz_[levelmax_] = kr-kl; + } + + //... determine position of coarser grids + for( unsigned ilevel=levelmax_-1; ilevel> levelmin_; --ilevel ) + { + il = (int)((double)il * 0.5 - padding_); + jl = (int)((double)jl * 0.5 - padding_); + kl = (int)((double)kl * 0.5 - padding_); + + ir = (int)((double)ir * 0.5 + padding_); + jr = (int)((double)jr * 0.5 + padding_); + kr = (int)((double)kr * 0.5 + padding_); + + //... align with coarser grids ... + if( align_top_ ) + { + //... require alignment with top grid + unsigned nref = (unsigned)pow(2,ilevel-levelmin_); + + il = (int)((double)il/nref)*nref; + jl = (int)((double)jl/nref)*nref; + kl = (int)((double)kl/nref)*nref; + + ir = (int)((double)ir/nref+1.0)*nref; + jr = (int)((double)jr/nref+1.0)*nref; + kr = (int)((double)kr/nref+1.0)*nref; + + }else{ + //... require alignment with coarser grid + il -= il%2; jl -= jl%2; kl -= kl%2; + ir += ir%2; jr += jr%2; kr += kr%2; + } + + oax_[ilevel] = il; oay_[ilevel] = jl; oaz_[ilevel] = kl; + nx_[ilevel] = ir-il; ny_[ilevel] = jr-jl; nz_[ilevel] = kr-kl; + + } + + //... determine relative offsets between grids + for( unsigned ilevel=levelmax_; ilevel>levelmin_; --ilevel ) + { + ox_[ilevel] = (oax_[ilevel]/2 - oax_[ilevel-1]); + oy_[ilevel] = (oay_[ilevel]/2 - oay_[ilevel-1]); + oz_[ilevel] = (oaz_[ilevel]/2 - oaz_[ilevel-1]); + } + + for( unsigned ilevel=levelmin_+1; ilevel<=levelmax_; ++ilevel ) + { + double h = 1.0/pow(2,ilevel); + + x0_[ilevel] = h*(double)oax_[ilevel]; + y0_[ilevel] = h*(double)oay_[ilevel]; + z0_[ilevel] = h*(double)oaz_[ilevel]; + + xl_[ilevel] = h*(double)nx_[ilevel]; + yl_[ilevel] = h*(double)ny_[ilevel]; + zl_[ilevel] = h*(double)nz_[ilevel]; + } + + for( unsigned ilevel=0; ilevel <=levelmin_; ++ilevel ) + { + unsigned n = (unsigned)pow(2,ilevel); + + xl_[ilevel] = yl_[ilevel] = zl_[ilevel] = 1.0; + nx_[ilevel] = ny_[ilevel] = nz_[ilevel] = n; + } + } + + refinement_hierarchy& operator=( const refinement_hierarchy& o ) + { + levelmin_ = o.levelmin_; + levelmax_ = o.levelmax_; + padding_ = o.padding_; + cf_ = o.cf_; + align_top_ = o.align_top_; + for( int i=0; i<3; ++i ) + { + x0ref_[i] = o.x0ref_[i]; + lxref_[i] = o.lxref_[i]; + xshift_[i] = o.xshift_[i]; + } + + x0_ = o.x0_; y0_ = o.y0_; z0_ = o.z0_; + xl_ = o.xl_; yl_ = o.yl_; zl_ = o.zl_; + ox_ = o.ox_; oy_ = o.oy_; oz_ = o.oz_; + oax_= o.oax_; oay_ = o.oay_; oaz_ = o.oaz_; + nx_ = o.nx_; ny_=o.ny_; nz_=o.nz_; + + return *this; + } + + void adjust_level( unsigned ilevel, int nx, int ny, int nz, int oax, int oay, int oaz ) + { + double h = 1.0/pow(2,ilevel); + + int dx,dy,dz; + + dx = oax_[ilevel] - oax; + dy = oay_[ilevel] - oay; + dz = oaz_[ilevel] - oaz; + + ox_[ilevel] -= dx/2; + oy_[ilevel] -= dy/2; + oz_[ilevel] -= dz/2; + + oax_[ilevel] = oax; + oay_[ilevel] = oay; + oaz_[ilevel] = oaz; + + nx_[ilevel] = nx; + ny_[ilevel] = ny; + nz_[ilevel] = nz; + + x0_[ilevel] = h*oax; + y0_[ilevel] = h*oay; + z0_[ilevel] = h*oaz; + + xl_[ilevel] = h*nx; + yl_[ilevel] = h*ny; + zl_[ilevel] = h*nz; + + if( ilevel < levelmax_ ) + { + ox_[ilevel+1] += dx; + oy_[ilevel+1] += dy; + oz_[ilevel+1] += dz; + } + + find_new_levelmin(); + + } + + void find_new_levelmin( void ) + { + for( unsigned i=0; i<=levelmax(); ++i ) + { + unsigned n = (unsigned)pow(2,i); + if( oax_[i]==0 && oay_[i]==0 && oaz_[i]==0 + && nx_[i]==n && ny_[i]==n && nz_[i]==n ) + { + levelmin_=i; + } + } + + std::cerr << " - refinement_hierarchy: set new levelmin to " << levelmin_ << std::endl; + } + + unsigned offset_abs( unsigned ilevel, int dim ) const + { + if( dim==0 ) + return oax_.at(ilevel); + if( dim==1 ) + return oay_.at(ilevel); + return oaz_.at(ilevel); + } + + unsigned offset( unsigned ilevel, int dim ) const + { + if( dim==0 ) + return ox_.at(ilevel); + if( dim==1 ) + return oy_.at(ilevel); + return oz_.at(ilevel); + } + + unsigned size( unsigned ilevel, int dim ) const + { + if( dim==0 ) + return nx_.at(ilevel); + if( dim==1 ) + return ny_.at(ilevel); + return nz_.at(ilevel); + } + + unsigned levelmin( void ) const + { return levelmin_; } + + unsigned levelmax( void ) const + { return levelmax_; } + + + + void output( void ) + { + std::cout << "-------------------------------------------------------------\n"; + + if( xshift_[0]!=0||xshift_[1]!=0||xshift_[2]!=0 ) + std::cout + << " - Domain will be shifted by (" << xshift_[0] << ", " << xshift_[1] << ", " << xshift_[2] << ")\n" << std::endl + << " - Grid structure:\n"; + + for( unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel ) + { + std::cout + << " Level " << std::setw(3) << ilevel << " : offset = (" << std::setw(5) << ox_[ilevel] << ", " << std::setw(5) << oy_[ilevel] << ", " << std::setw(5) << oz_[ilevel] << ")\n" + << " size = (" << std::setw(5) << nx_[ilevel] << ", " << std::setw(5) << ny_[ilevel] << ", " << std::setw(5) << nz_[ilevel] << ")\n"; + } + std::cout << "-------------------------------------------------------------\n"; + + } +}; + +#endif + diff --git a/mg_interp.hh b/mg_interp.hh new file mode 100644 index 0000000..9afcad5 --- /dev/null +++ b/mg_interp.hh @@ -0,0 +1,1267 @@ +/* + * mg_interp.hh + * FROLIC_mg + * + * Created by Oliver Hahn on 5/27/10. + * Copyright 2010 KIPAC/Stanford University. All rights reserved. + * + */ + +#ifndef __MG_INTERP_HH +#define __MG_INTERP_HH + +#include "mg_operators.hh" + + +struct coarse_fine_interpolation +{ + template< class G > + void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine ) + { } +}; + + +inline double interp2( double x1, double x2, double x3, double f1, double f2, double f3, double x ) +{ + double a,b,c; + a = (x1 * f3 - x3 * f1 - x2 * f3 - x1 * f2 + x2 * f1 + x3 * f2) / (x1 * x3 * x3 - x2 * x3 * x3 + x2 * x1 * x1 - x3 * x1 * x1 + x3 * x2 * x2 - x1 * x2 * x2); + b = -(x1 * x1 * f3 - x1 * x1 * f2 - f1 * x3 * x3 + f2 * x3 * x3 - x2 * x2 * f3 + f1 * x2 * x2) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3); + c = (x1 * x1 * x2 * f3 - x1 * x1 * x3 * f2 - x2 * x2 * x1 * f3 + f2 * x1 * x3 * x3 + x2 * x2 * x3 * f1 - f1 * x2 * x3 * x3) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3); + + return a*x*x+b*x+c; +} + + + + + + +inline double interp4left( double f0, double f1, double f2, double f3, double f4 ) +{ + //return -4.0/231.0*f0+4.0/7.0*f1+5.0/7.0*f2-1.0/3.0*f3+5./77.0*f4; + return 1.0/13.0*f0-21./55.*f1+7.0/9.0*f2+8./15.*f3-8./1287*f4; +} + +inline double interp4right( double f0, double f1, double f2, double f3, double f4 ) +{ + return interp4left(f4,f3,f2,f1,f0); +} + +inline double interp4lleft( double f0, double f1, double f2, double f3, double f4 ) +{ + //return 16./231.*f0+48.0/35.0*f1-6.0/7.0*f2-8.0/15.0*f3-9./77.*f4; + return -15./91.*f0+8./11.*f1+-10./9.*f2+32./21.*f3+32./1287.*f4; +} + +inline double interp4rright( double f0, double f1, double f2, double f3, double f4 ) +{ + return interp4lleft(f4,f3,f2,f1,f0); +} + +inline double interp4( double fm2, double fm1, double f0, double fp1, double fp2, double x ) +{ + double x2 = x*x, x3=x2*x, x4=x3*x; + double a,b,c,d,e; + + a= 1.0/24.0*fm2-1.0/6.0*fm1+0.25*f0-1.0/6.0*fp1+1.0/24.0*fp2; + b=-1.0/12.0*fm2+1.0/6.0*fm1-1.0/6.0*fp1+1.0/12.0*fp2; + c=-1.0/24.0*fm2+2.0/3.0*fm1-5.0/4.0*f0+2.0/3.0*fp1-1.0/24.0*fp2; + d= 1.0/12.0*fm2-2.0/3.0*fm1+2.0/3.0*fp1-1.0/12.0*fp2; + e=f0; + + return a*x4+b*x3+c*x2+d*x+e; +} + +inline double interp4( double* f, double x ) +{ + double x2 = x*x, x3=x2*x, x4=x3*x; + double a,b,c,d,e; + + a= 1.0/24.0*f[0]-1.0/6.0*f[1]+0.25*f[2]-1.0/6.0*f[3]+1.0/24.0*f[4]; + b=-1.0/12.0*f[0]+1.0/6.0*f[1]-1.0/6.0*f[3]+1.0/12.0*f[4]; + c=-1.0/24.0*f[0]+2.0/3.0*f[1]-5.0/4.0*f[2]+2.0/3.0*f[3]-1.0/24.0*f[4]; + d= 1.0/12.0*f[0]-2.0/3.0*f[1]+2.0/3.0*f[3]-1.0/12.0*f[4]; + e=f[2]; + + return a*x4+b*x3+c*x2+d*x+e; +} + + +inline double interp6( double *f, double x ) +{ + double x2 = x*x, x3=x2*x, x4=x3*x, x5=x4*x, x6=x5*x; + double a,b,c,d,e,g,h; + + a=(f[0]-6.*f[1]+15.*f[2]-20.*f[3]+15.*f[4]-6.*f[5]+f[6])/720.; + b=(-3.*f[0]+12.*f[1]-15.*f[2]+15*f[4]-12.*f[5]+3.*f[6])/720.; + c=(-5.*f[0]+60.*f[1]-195.*f[2]+280.*f[3]-195.*f[4]+60.*f[5]-5.*f[6])/720.; + d=(15.*f[0]-120.*f[1]+195.*f[2]-195.*f[4]+120.*f[5]-15.*f[6])/720.; + e=(4.*f[0]-54.*f[1]+540.*f[2]-980.*f[3]+540.*f[4]-54.*f[5]+4.*f[6])/720.; + g=(-12.*f[0]+108.*f[1]-540.*f[2]+540.*f[4]-108.*f[5]+12.*f[6])/720.; + h=f[3]; + + return a*x6+b*x5+c*x4+d*x3+e*x2+g*x+h; + +} + +inline double interp6( double f0, double f1, double f2, double f3, double f4, double f5, double f6, double x ) +{ + double x2 = x*x, x3=x2*x, x4=x3*x, x5=x4*x, x6=x5*x; + double a,b,c,d,e,g,h; + double f[7]={ + f0,f1,f2,f3,f4,f5,f6 + }; + + a=(f[0]-6.*f[1]+15.*f[2]-20.*f[3]+15.*f[4]-6.*f[5]+f[6])/720.; + b=(-3.*f[0]+12.*f[1]-15.*f[2]+15*f[4]-12.*f[5]+3.*f[6])/720.; + c=(-5.*f[0]+60.*f[1]-195.*f[2]+280.*f[3]-195.*f[4]+60.*f[5]-5.*f[6])/720.; + d=(15.*f[0]-120.*f[1]+195.*f[2]-195.*f[4]+120.*f[5]-15.*f[6])/720.; + e=(4.*f[0]-54.*f[1]+540.*f[2]-980.*f[3]+540.*f[4]-54.*f[5]+4.*f[6])/720.; + g=(-12.*f[0]+108.*f[1]-540.*f[2]+540.*f[4]-108.*f[5]+12.*f[6])/720.; + h=f[3]; + + return a*x6+b*x5+c*x4+d*x3+e*x2+g*x+h; + +} + +inline double interp2( double fleft, double fcenter, double fright, double x ) +{ + double a,b,c; + a = 0.5*(fleft+fright)-fcenter; + b = 0.5*(fright-fleft); + c = fcenter; + + return a*x*x+b*x+c; +} + + +inline double interp2left( double fleft, double fcenter, double fright ) +{ + double a,b,c; + a = (6.0*fright-10.0*fcenter+4.0*fleft)/15.0; + b = (-4.0*fleft+9.0*fright-5.0*fcenter)/15.0; + c = fcenter; + + return a-b+c; +} + +inline double interp2right( double fleft, double fcenter, double fright ) +{ + double a,b,c; + a = (6.0*fleft-10.0*fcenter+4.0*fright)/15.0; + b = (4.0*fright-9.0*fleft+5.0*fcenter)/15.0; + c = fcenter; + + return a+b+c; +} + +inline double interp2lleft( double fleft, double fcenter, double fright ) +{ + double a,b,c; + a = (-10.0*fcenter+4.0*fleft+6.0*fright)/15.0; + b = (-12.0*fleft+15.0*fcenter-3.0*fright)/15.0; + c = (-3.0*fright+10.0*fcenter+8.0*fleft)/15.0; + + return a-b+c; +} + +inline double interp2rright( double fleft, double fcenter, double fright ) +{ + double a,b,c; + a = (-10.0*fcenter+4.0*fleft+6.0*fright)/15.0; + b = (-12.0*fleft+15.0*fcenter-3.0*fright)/15.0; + c = (-3.0*fright+10.0*fcenter+8.0*fleft)/15.0; + + return a-b+c; +} + + +inline double interp6left( double f0, double f1, double f2, double f3, double f4, double f5, double f6 ) +{ + //return -77./2565.*f6+44./221.*f5-14./25.*f4+308./351.*f3+352./675.*f2-16./1755.*f1+112./104975.*f0; + return 4./2431.*f0-24./1001.*f1+4./7.*f2+60./77.*f3-6./13.*f4+12./77.*f5-5./221.*f6; +} + +inline double interp6lleft( double f0, double f1, double f2, double f3, double f4, double f5, double f6 ) +{ + //return -16./4199.*f0+16./429.*f1+32./21.*f2-675./1547.*f5-50./39.*f3+12./11.*f4+4./57.*f6; + return -12./2431.*f0+40./429.*f1+4./3.*f2-10./11.*f3+28./39.*f4-3./11.*f5+28./663.*f6; +} + +inline double interp6llleft( double f0, double f1, double f2, double f3, double f4, double f5, double f6 ) +{ + //return -432./20995.*f0+112./429.*f1+32./15.*f2-324./221.*f5-140./39.*f3+189./55.*f4+14./57.*f6; + return -36./2431.*f0+600./1001.*f1+20./21.*f2-100./77.*f3+15./13.*f4-36./77.*f5+50./663.*f6; +} + +inline double interp6right( double f0, double f1, double f2, double f3, double f4, double f5, double f6 ) +{ + return interp6left(f6,f5,f4,f3,f2,f1,f0); +} +inline double interp6rright( double f0, double f1, double f2, double f3, double f4, double f5, double f6 ) +{ + return interp6lleft(f6,f5,f4,f3,f2,f1,f0); +} +inline double interp6rrright( double f0, double f1, double f2, double f3, double f4, double f5, double f6 ) +{ + return interp6llleft(f6,f5,f4,f3,f2,f1,f0); +} + + + + +#include "fd_schemes.hh" + +struct cubic_interp +: public coarse_fine_interpolation +{ + template< class G > + void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine ) + { + mg_cubic().prolong_bnd( coarse, fine ); + + + //return; + //...................................................... + + G *u = &fine; + G *utop = &coarse; + + int + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); + + //... don't do anything if we are not an additional refinement region + if( xoff == 0 && yoff == 0 && zoff == 0 ) + return; + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + +#pragma omp parallel for schedule(dynamic) + for( int ix=-1; ix<=nx; ++ix ) + for( int iy=-1; iy<=ny; ++iy ) + for( int iz=-1; iz<=nz; ++iz ) + { + bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz); + + //if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz) + if( xbnd || ybnd || zbnd ) + //if( xbnd ^ ybnd ^ zbnd ) + { + + //... only deal with proper ghostzones + if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd)) + continue; + + int ixtop = (int)(0.5*(double)(ix))+xoff; + int iytop = (int)(0.5*(double)(iy))+yoff; + int iztop = (int)(0.5*(double)(iz))+zoff; + + if( ix==-1 ) ixtop=xoff-1; + if( iy==-1 ) iytop=yoff-1; + if( iz==-1 ) iztop=zoff-1; + + double coarse_flux, fine_flux, dflux; + + //double fac = 0.125*27.0/24.0, fac2 = -0.125*1.0/24.0;//0.125;//24.0/26.0*0.125*0.5; + double fac = 0.5*0.125*27.0/24.0, fac2 = -0.5*0.125*1.0/24.0; + //double fac = 0.125*15.0/12.0, fac2 = -0.125*1.0/12.0; + + if(xbnd) + { + if( ix==-1 ) + { + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1); + + coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int j=0;j<2;++j) + for( int k=0;k<2;++k) + { + (*u)(ix,iy+j,iz+k) += fac*dflux; + (*u)(ix-1,iy+j,iz+k) += fac2*dflux; + } + //(*u)(ix,iy+j,iz+k) += 24.0/27.0*dflux; + } + else if( ix==nx ) + { + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1); + + coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int j=0;j<2;++j) + for( int k=0;k<2;++k) + { + (*u)(ix,iy+j,iz+k) += fac*dflux; + (*u)(ix+1,iy+j,iz+k) += fac2*dflux; + } + //(*u)(ix,iy+j,iz+k) += 24.0/27.0*dflux; + } + } + + if(ybnd) + { + if( iy==-1 ) + { + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1); + + coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int k=0;k<2;++k) + { + (*u)(ix+i,iy,iz+k) += fac*dflux; + (*u)(ix+i,iy-1,iz+k) += fac2*dflux; + } + } + else if( iy==ny ) + { + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1); + + coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int k=0;k<2;++k) + { + (*u)(ix+i,iy,iz+k) += fac*dflux; + (*u)(ix+i,iy+1,iz+k) += fac2*dflux; + } + } + } + + if(zbnd) + { + if( iz==-1 ) + { + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1); + + coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int j=0;j<2;++j) + { + (*u)(ix+i,iy+j,iz) += fac*dflux; + (*u)(ix+i,iy+j,iz-1) += fac2*dflux; + } + } + else if( iz==nz ) + { + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz); + + coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int j=0;j<2;++j) + { + (*u)(ix+i,iy+j,iz) += fac*dflux; + (*u)(ix+i,iy+j,iz+1) += fac2*dflux; + } + } + } + } + } + + } +}; + + + +struct interp_O3_fluxcorr +: public coarse_fine_interpolation +{ + template< class G > + void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine ) + { + + //... use cubic interpolation to get all values on boundary + mg_cubic().prolong_bnd( coarse, fine ); + + //... use flux corrected quadratic interpolation for the + //... the boundary overlapped by the Laplace stencil + G *u = &fine; + G *utop = &coarse; + + int + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); + + //... don't do anything if we are not an additional refinement region + if( xoff == 0 && yoff == 0 && zoff == 0 ) + return; + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + //... set boundary condition for fine grid + #pragma omp parallel for schedule(dynamic) + for( int ix=-1; ix<=nx; ++ix ) + for( int iy=-1; iy<=ny; ++iy ) + for( int iz=-1; iz<=nz; ++iz ) + { + bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz); + + if( xbnd || ybnd || zbnd ) + { + + //... only deal with proper ghostzones + if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd)) + continue; + + int ixtop = (int)(0.5*(double)(ix))+xoff; + int iytop = (int)(0.5*(double)(iy))+yoff; + int iztop = (int)(0.5*(double)(iz))+zoff; + + if( ix==-1 ) ixtop=xoff-1; + if( iy==-1 ) iytop=yoff-1; + if( iz==-1 ) iztop=zoff-1; + + double ustar1, ustar2, ustar3, uhat; + double fac = 0.5;//0.25; + double flux;; + if( ix == -1 && iy%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) ); + ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) ); + ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) ); + + uhat = interp2( ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + (*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) ); + + flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k)); + } + + flux /= 4.0; + + double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix,iy+j,iz+k) -= dflux; + } + // right boundary + if( ix == nx && iy%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) ); + ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) ); + ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + (*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat ); + flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k)); + } + flux /= 4.0; + double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix,iy+j,iz+k) += dflux; + } + + // bottom boundary + if( iy == -1 && ix%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + (*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) ); + + flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k)); + } + flux /= 4.0; + double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy,iz+k) -= dflux; + } + // top boundary + if( iy == ny && ix%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + (*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat ); + + flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k)); + } + flux /= 4.0; + double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy,iz+k) += dflux; + } + + // front boundary + if( iz == -1 && ix%2==0 && iy%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + (*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) ); + + flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz)); + } + flux /= 4.0; + double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy+k,iz) -= dflux; + } + + // back boundary + if( iz == nz && ix%2==0 && iy%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + (*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat ); + + flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1)); + } + flux /= 4.0; + double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy+k,iz) += dflux; + } + + } + } + + } +}; + +struct interp_O5_fluxcorr +: public coarse_fine_interpolation +{ + + + template< class G > + void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine ) + { + + //... use cubic interpolation to get all values on boundary + mg_cubic().prolong_bnd( coarse, fine ); + + //... use flux corrected quadratic interpolation for the + //... the boundary overlapped by the Laplace stencil + G *u = &fine; + G *utop = &coarse; + + + int + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); + + //... don't do anything if we are not an additional refinement region + if( xoff == 0 && yoff == 0 && zoff == 0 ) + return; + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + //... set boundary condition for fine grid + #pragma omp parallel for schedule(dynamic) + for( int ix=-1; ix<=nx; ++ix ) + for( int iy=-1; iy<=ny; ++iy ) + for( int iz=-1; iz<=nz; ++iz ) + { + bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz); + bool bnd=xbnd|ybnd|zbnd; + + if( bnd ) + { + + int ixtop = (int)(0.5*(double)(ix))+xoff; + int iytop = (int)(0.5*(double)(iy))+yoff; + int iztop = (int)(0.5*(double)(iz))+zoff; + + if( ix==-1 ) ixtop=xoff-1; + if( iy==-1 ) iytop=yoff-1; + if( iz==-1 ) iztop=zoff-1; + + double ustar[5], uhat[2]; + double fac = 0.5; + + double coarse_flux, fine_flux, dflux; + + double ffac = 12./14.; + + // left boundary + if( ix == -1 && iy%2==0 && iz%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<2; ++p ) + { + for( int q=-2;q<=2;++q ) + ustar[q+2] = interp4( (*utop)(ixtop+p-1,iytop-2,iztop+q), (*utop)(ixtop+p-1,iytop-1,iztop+q), + (*utop)(ixtop+p-1,iytop,iztop+q), (*utop)(ixtop+p-1,iytop+1,iztop+q), + (*utop)(ixtop+p-1,iytop+2,iztop+q), fac*((double)j-0.5) ); + uhat[p] = interp4( ustar, fac*((double)k-0.5)-1.5 ); + } + + (*u)(ix,iy+j,iz+k) = interp4left( uhat[0], uhat[1], (*u)(ix+1,iy+j,iz+k), + (*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k) ); + (*u)(ix-1,iy+j,iz+k) = interp4lleft( uhat[0], uhat[1], (*u)(ix+1,iy+j,iz+k), + (*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k) ); + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1); + fine_flux /= 4.0; + + coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0; + + dflux = coarse_flux - fine_flux; + + for(int j=0;j<2;++j) + for( int k=0;k<2;++k) + { + (*u)(ix,iy+j,iz+k) += ffac*dflux; + (*u)(ix-1,iy+j,iz+k) += ffac*dflux; + } + + + } + // right boundary + if( ix == nx && iy%2==0 && iz%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<2; ++p ) + { + for( int q=-2;q<=2;++q ) + ustar[q+2] = interp4( (*utop)(ixtop+p,iytop-2,iztop+q), (*utop)(ixtop+p,iytop-1,iztop+q), + (*utop)(ixtop+p,iytop,iztop+q), (*utop)(ixtop+p,iytop+1,iztop+q), + (*utop)(ixtop+p,iytop+2,iztop+q), fac*((double)j-0.5) ); + uhat[p] = interp4( ustar, fac*((double)k-0.5)-1.5 ); + } + + (*u)(ix,iy+j,iz+k) = interp4right( (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), + (*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1] ); + (*u)(ix+1,iy+j,iz+k) = interp4rright( (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), + (*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1] ); + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1); + + coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int j=0;j<2;++j) + for( int k=0;k<2;++k) + { + (*u)(ix,iy+j,iz+k) += ffac*dflux; + (*u)(ix+1,iy+j,iz+k) += ffac*dflux; + } + + } + // bottom boundary + if( iy == -1 && ix%2==0 && iz%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<2; ++p ) + { + for( int q=-2;q<=2;++q ) + ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+p-1,iztop+q), (*utop)(ixtop-1,iytop+p-1,iztop+q), + (*utop)(ixtop,iytop+p-1,iztop+q), (*utop)(ixtop+1,iytop+p-1,iztop+q), + (*utop)(ixtop+2,iytop+p-1,iztop+q), fac*((double)j-0.5) ); + uhat[p] = interp4( ustar, fac*((double)k-0.5)-1.5 ); + } + + (*u)(ix+j,iy,iz+k) = interp4left( uhat[0], uhat[1], (*u)(ix+j,iy+1,iz+k), + (*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k) ); + (*u)(ix+j,iy-1,iz+k) = interp4lleft( uhat[0], uhat[1], (*u)(ix+j,iy+1,iz+k), + (*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k) ); + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1); + + coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int k=0;k<2;++k) + { + (*u)(ix+i,iy,iz+k) += ffac*dflux; + (*u)(ix+i,iy-1,iz+k) += ffac*dflux; + } + + } + // top boundary + if( iy == ny && ix%2==0 && iz%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<2; ++p ) + { + for( int q=-2;q<=2;++q ) + ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+p,iztop+q), (*utop)(ixtop-1,iytop+p,iztop+q), + (*utop)(ixtop,iytop+p,iztop+q), (*utop)(ixtop+1,iytop+p,iztop+q), + (*utop)(ixtop+2,iytop+p,iztop+q), fac*((double)j-0.5) ); + uhat[p] = interp4( ustar, fac*((double)k-0.5)+1.5 ); + } + + (*u)(ix+j,iy,iz+k) = interp4right( (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), + (*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1] ); + (*u)(ix+j,iy+1,iz+k) = interp4rright( (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), + (*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1] ); + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1); + + coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int k=0;k<2;++k) + { + (*u)(ix+i,iy,iz+k) += ffac*dflux; + (*u)(ix+i,iy+1,iz+k) += ffac*dflux; + } + + + } + // front boundary + if( iz == -1 && ix%2==0 && iy%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<2; ++p ) + { + for( int q=-2;q<=2;++q ) + ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+q,iztop+p-1), (*utop)(ixtop-1,iytop+q,iztop+p-1), + (*utop)(ixtop,iytop+q,iztop+p-1), (*utop)(ixtop+1,iytop+q,iztop+p-1), + (*utop)(ixtop+2,iytop+q,iztop+p-1), fac*((double)j-0.5) ); + uhat[p] = interp4( ustar, fac*((double)k-0.5)-1.5 ); + } + + (*u)(ix+j,iy+k,iz) = interp4left( uhat[0], uhat[1], (*u)(ix+j,iy+k,iz+1), + (*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3) ); + (*u)(ix+j,iy+k,iz-1) = interp4lleft( uhat[0], uhat[1], (*u)(ix+j,iy+k,iz+1), + (*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3) ); + } + + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1); + + coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int j=0;j<2;++j) + { + (*u)(ix+i,iy+j,iz) += ffac*dflux; + (*u)(ix+i,iy+j,iz-1) += ffac*dflux; + } + + } + // back boundary + if( iz == nz && ix%2==0 && iy%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<2; ++p ) + { + for( int q=-2;q<=2;++q ) + ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+q,iztop+p), (*utop)(ixtop-1,iytop+q,iztop+p), + (*utop)(ixtop,iytop+q,iztop+p), (*utop)(ixtop+1,iytop+q,iztop+p), + (*utop)(ixtop+2,iytop+q,iztop+p), fac*((double)j-0.5) ); + uhat[p] = interp4( ustar, fac*((double)k-0.5)+1.5 ); + } + + (*u)(ix+j,iy+k,iz) = interp4right( (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), + (*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1] ); + (*u)(ix+j,iy+k,iz+1) = interp4rright((*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), + (*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1] ); + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz); + + coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int j=0;j<2;++j) + { + (*u)(ix+i,iy+j,iz) += ffac*dflux; + (*u)(ix+i,iy+j,iz+1) += ffac*dflux; + } + } + } + } + } +}; + + +struct interp_O7_fluxcorr +: public coarse_fine_interpolation +{ + + + template< class G > + void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine ) + { + + //... use cubic interpolation to get all values on boundary + mg_cubic().prolong_bnd( coarse, fine ); + + //... use flux corrected quadratic interpolation for the + //... the boundary overlapped by the Laplace stencil + G *u = &fine; + G *utop = &coarse; + + + int + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); + + //... don't do anything if we are not an additional refinement region + if( xoff == 0 && yoff == 0 && zoff == 0 ) + return; + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + //... set boundary condition for fine grid +#pragma omp parallel for schedule(dynamic) + for( int ix=-1; ix<=nx; ++ix ) + for( int iy=-1; iy<=ny; ++iy ) + for( int iz=-1; iz<=nz; ++iz ) + { + bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz); + bool bnd=xbnd|ybnd|zbnd; + + if( bnd ) + { + + int ixtop = (int)(0.5*(double)(ix))+xoff; + int iytop = (int)(0.5*(double)(iy))+yoff; + int iztop = (int)(0.5*(double)(iz))+zoff; + + if( ix==-1 ) ixtop=xoff-1; + if( iy==-1 ) iytop=yoff-1; + if( iz==-1 ) iztop=zoff-1; + + double ustar[7], uhat[3]; + double fac = 0.5; + + double coarse_flux, fine_flux, dflux; + + double ffac = 180./222.; + + // left boundary + if( ix == -1 && iy%2==0 && iz%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<3; ++p ) + { + for( int q=-3;q<=3;++q ) + ustar[q+3] = interp6( (*utop)(ixtop+p-2,iytop-3,iztop+q), (*utop)(ixtop+p-2,iytop-2,iztop+q), + (*utop)(ixtop+p-2,iytop-1,iztop+q), (*utop)(ixtop+p-2,iytop,iztop+q), + (*utop)(ixtop+p-2,iytop+1,iztop+q), (*utop)(ixtop+p-2,iytop+2,iztop+q), + (*utop)(ixtop+p-2,iytop+3,iztop+q), fac*((double)j-0.5) ); + uhat[p] = interp6( ustar, fac*((double)k-0.5)-1.5 ); + } + + (*u)(ix,iy+j,iz+k) = interp6left( uhat[0], uhat[1], uhat[2], (*u)(ix+1,iy+j,iz+k), + (*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k), (*u)(ix+4,iy+j,iz+k) ); + (*u)(ix-1,iy+j,iz+k) = interp6lleft( uhat[0], uhat[1], uhat[2], (*u)(ix+1,iy+j,iz+k), + (*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k),(*u)(ix+4,iy+j,iz+k) ); + (*u)(ix-2,iy+j,iz+k) = interp6llleft( uhat[0], uhat[1], uhat[2], (*u)(ix+1,iy+j,iz+k), + (*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k),(*u)(ix+4,iy+j,iz+k) ); + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz+1); + fine_flux /= 4.0; + + coarse_flux = Laplace_flux_O6().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0; + + dflux = coarse_flux - fine_flux; + + for(int j=0;j<2;++j) + for( int k=0;k<2;++k) + { + (*u)(ix,iy+j,iz+k) += ffac*dflux; + (*u)(ix-1,iy+j,iz+k) += ffac*dflux; + (*u)(ix-2,iy+j,iz+k) += ffac*dflux; + } + + + } + // right boundary + if( ix == nx && iy%2==0 && iz%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<3; ++p ) + { + for( int q=-3;q<=3;++q ) + ustar[q+3] = interp6( (*utop)(ixtop+p,iytop-3,iztop+q), (*utop)(ixtop+p,iytop-2,iztop+q), + (*utop)(ixtop+p,iytop-1,iztop+q), (*utop)(ixtop+p,iytop,iztop+q), + (*utop)(ixtop+p,iytop+1,iztop+q), (*utop)(ixtop+p,iytop+2,iztop+q), + (*utop)(ixtop+p,iytop+3,iztop+q), fac*((double)j-0.5) ); + uhat[p] = interp6( ustar, fac*((double)k-0.5)-1.5 ); + } + + (*u)(ix,iy+j,iz+k) = interp6right( (*u)(ix-4,iy+j,iz+k), (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), + (*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1], uhat[2] ); + (*u)(ix+1,iy+j,iz+k) = interp6rright( (*u)(ix-4,iy+j,iz+k), (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), + (*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1], uhat[2] ); + (*u)(ix+2,iy+j,iz+k) = interp6rrright( (*u)(ix-4,iy+j,iz+k), (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), + (*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1], uhat[2] ); + + + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz+1); + + coarse_flux = Laplace_flux_O6().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int j=0;j<2;++j) + for( int k=0;k<2;++k) + { + (*u)(ix,iy+j,iz+k) += ffac*dflux; + (*u)(ix+1,iy+j,iz+k) += ffac*dflux; + (*u)(ix+2,iy+j,iz+k) += ffac*dflux; + } + + } + // bottom boundary + if( iy == -1 && ix%2==0 && iz%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<3; ++p ) + { + for( int q=-3;q<=3;++q ) + ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+p-2,iztop+q), (*utop)(ixtop-2,iytop+p-2,iztop+q), + (*utop)(ixtop-1,iytop+p-2,iztop+q), (*utop)(ixtop,iytop+p-2,iztop+q), + (*utop)(ixtop+1,iytop+p-2,iztop+q), (*utop)(ixtop+2,iytop+p-2,iztop+q), + (*utop)(ixtop+3,iytop+p-2,iztop+q), fac*((double)j-0.5) ); + uhat[p] = interp6( ustar, fac*((double)k-0.5)-1.5 ); + } + + (*u)(ix+j,iy,iz+k) = interp6left( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+1,iz+k), + (*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k),(*u)(ix+j,iy+4,iz+k) ); + (*u)(ix+j,iy-1,iz+k) = interp6lleft( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+1,iz+k), + (*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k),(*u)(ix+j,iy+4,iz+k) ); + (*u)(ix+j,iy-2,iz+k) = interp6llleft( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+1,iz+k), + (*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k),(*u)(ix+j,iy+4,iz+k) ); + + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz+1); + + coarse_flux = Laplace_flux_O6().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int k=0;k<2;++k) + { + (*u)(ix+i,iy,iz+k) += ffac*dflux; + (*u)(ix+i,iy-1,iz+k) += ffac*dflux; + (*u)(ix+i,iy-2,iz+k) += ffac*dflux; + } + + } + // top boundary + if( iy == ny && ix%2==0 && iz%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<3; ++p ) + { + for( int q=-3;q<=3;++q ) + ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+p,iztop+q), (*utop)(ixtop-2,iytop+p,iztop+q), + (*utop)(ixtop-1,iytop+p,iztop+q), (*utop)(ixtop,iytop+p,iztop+q), + (*utop)(ixtop+1,iytop+p,iztop+q), (*utop)(ixtop+2,iytop+p,iztop+q), + (*utop)(ixtop+3,iytop+p,iztop+q), fac*((double)j-0.5) ); + uhat[p] = interp6( ustar, fac*((double)k-0.5)+1.5 ); + } + + (*u)(ix+j,iy,iz+k) = interp6right( (*u)(ix+j,iy-4,iz+k), (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), + (*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1], uhat[2] ); + (*u)(ix+j,iy+1,iz+k) = interp6rright( (*u)(ix+j,iy-4,iz+k), (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), + (*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1], uhat[2] ); + (*u)(ix+j,iy+2,iz+k) = interp6rrright( (*u)(ix+j,iy-4,iz+k), (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), + (*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1], uhat[2] ); + + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz+1); + + coarse_flux = Laplace_flux_O6().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int k=0;k<2;++k) + { + (*u)(ix+i,iy,iz+k) += ffac*dflux; + (*u)(ix+i,iy+1,iz+k) += ffac*dflux; + (*u)(ix+i,iy+2,iz+k) += ffac*dflux; + } + + + } + // front boundary + if( iz == -1 && ix%2==0 && iy%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<3; ++p ) + { + for( int q=-3;q<=3;++q ) + ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+q,iztop+p-2), (*utop)(ixtop-2,iytop+q,iztop+p-2), + (*utop)(ixtop-1,iytop+q,iztop+p-2), (*utop)(ixtop,iytop+q,iztop+p-2), + (*utop)(ixtop+1,iytop+q,iztop+p-2), (*utop)(ixtop+2,iytop+q,iztop+p-2), + (*utop)(ixtop+3,iytop+q,iztop+p-2), fac*((double)j-0.5) ); + uhat[p] = interp6( ustar, fac*((double)k-0.5)-1.5 ); + } + + (*u)(ix+j,iy+k,iz) = interp6left( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+k,iz+1), + (*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3),(*u)(ix+j,iy+k,iz+4) ); + (*u)(ix+j,iy+k,iz-1) = interp6lleft( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+k,iz+1), + (*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3), (*u)(ix+j,iy+k,iz+4) ); + (*u)(ix+j,iy+k,iz-2) = interp6llleft( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+k,iz+1), + (*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3), (*u)(ix+j,iy+k,iz+4) ); + } + + + fine_flux = 0.0; + fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy+1,iz+1); + + coarse_flux = Laplace_flux_O6().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int j=0;j<2;++j) + { + (*u)(ix+i,iy+j,iz) += ffac*dflux; + (*u)(ix+i,iy+j,iz-1) += ffac*dflux; + (*u)(ix+i,iy+j,iz-2) += ffac*dflux; + } + + } + // back boundary + if( iz == nz && ix%2==0 && iy%2==0 ) + { + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + for( int p=0; p<3; ++p ) + { + for( int q=-3;q<=3;++q ) + ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+q,iztop+p), (*utop)(ixtop-2,iytop+q,iztop+p), + (*utop)(ixtop-1,iytop+q,iztop+p), (*utop)(ixtop,iytop+q,iztop+p), + (*utop)(ixtop+1,iytop+q,iztop+p), (*utop)(ixtop+2,iytop+q,iztop+p), + (*utop)(ixtop+3,iytop+q,iztop+p), fac*((double)j-0.5) ); + uhat[p] = interp6( ustar, fac*((double)k-0.5)+1.5 ); + } + + (*u)(ix+j,iy+k,iz) = interp6right( (*u)(ix+j,iy+k,iz-4), (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), + (*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1], uhat[2] ); + (*u)(ix+j,iy+k,iz+1) = interp6rright( (*u)(ix+j,iy+k,iz-4), (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), + (*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1], uhat[2] ); + (*u)(ix+j,iy+k,iz+2) = interp6rrright( (*u)(ix+j,iy+k,iz-4), (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), + (*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1], uhat[2] ); + + } + + fine_flux = 0.0; + fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy+1,iz); + + coarse_flux = Laplace_flux_O6().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0; + fine_flux /= 4.0; + + dflux = coarse_flux - fine_flux; + + for(int i=0;i<2;++i) + for( int j=0;j<2;++j) + { + (*u)(ix+i,iy+j,iz) += ffac*dflux; + (*u)(ix+i,iy+j,iz+1) += ffac*dflux; + (*u)(ix+i,iy+j,iz+2) += ffac*dflux; + } + } + } + } + } +}; + + +#endif // __MG_INTERP_HH diff --git a/mg_operators.hh b/mg_operators.hh new file mode 100644 index 0000000..f3d235f --- /dev/null +++ b/mg_operators.hh @@ -0,0 +1,1362 @@ +/* + + mg_operators.hh - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#ifndef __MG_OPERATORS_HH +#define __MG_OPERATORS_HH + + +class mg_cubic +{ +protected: + template< typename real_t > + inline double CUBE( real_t x ) const + { return x*x*x; } + + template< typename T > + inline T SQR( T x ) const + { return x*x; } + +public: + template< int sx, int sy, int sz, typename M > + inline double interp_cubic( int x, int y, int z, M& V, double s=1.0 ) const + { + int i, j, k; + double dx, dy, dz; + double u[4], v[4], w[4]; + double r[4], q[4]; + double vox = 0; + + dx = 0.5*((double)sz - 0.5)*s; + dy = 0.5*((double)sy - 0.5)*s; + dz = 0.5*((double)sx - 0.5)*s; + + //int xDim = V.m_nx, yDim = V.m_ny; + //int xyDim = xDim * yDim; + + //double *pv = &V(x-1,y-1,z-1); + + /* factors for Catmull-Rom interpolation */ + + /* + u[0] = -0.5 * CUBE (dx) + SQR (dx) - 0.5 * dx; + u[1] = 1.5 * CUBE (dx) - 2.5 * SQR (dx) + 1; + u[2] = -1.5 * CUBE (dx) + 2 * SQR (dx) + 0.5 * dx; + u[3] = 0.5 * CUBE (dx) - 0.5 * SQR (dx); + + v[0] = -0.5 * CUBE (dy) + SQR (dy) - 0.5 * dy; + v[1] = 1.5 * CUBE (dy) - 2.5 * SQR (dy) + 1; + v[2] = -1.5 * CUBE (dy) + 2 * SQR (dy) + 0.5 * dy; + v[3] = 0.5 * CUBE (dy) - 0.5 * SQR (dy); + + w[0] = -0.5 * CUBE (dz) + SQR (dz) - 0.5 * dz; + w[1] = 1.5 * CUBE (dz) - 2.5 * SQR (dz) + 1; + w[2] = -1.5 * CUBE (dz) + 2 * SQR (dz) + 0.5 * dz; + w[3] = 0.5 * CUBE (dz) - 0.5 * SQR (dz); + */ + + if( sz == 1 ) + { + u[0] = -4.5/64.0; + u[1] = 55.5/64.0; + u[2] = 14.5/64.0; + u[3] = -1.5/64.0; + } else { + u[0] = -1.5/64.0; + u[1] = 14.5/64.0; + u[2] = 55.5/64.0; + u[3] = -4.5/64.0; + } + + if( sy == 1 ) + { + v[0] = -4.5/64.0; + v[1] = 55.5/64.0; + v[2] = 14.5/64.0; + v[3] = -1.5/64.0; + } else{ + v[0] = -1.5/64.0; + v[1] = 14.5/64.0; + v[2] = 55.5/64.0; + v[3] = -4.5/64.0; + } + + if( sx == 1 ) + { + w[0] = -4.5/64.0; + w[1] = 55.5/64.0; + w[2] = 14.5/64.0; + w[3] = -1.5/64.0; + } else { + w[0] = -1.5/64.0; + w[1] = 14.5/64.0; + w[2] = 55.5/64.0; + w[3] = -4.5/64.0; + } + + + + + /*for (k = 0; k < 4; k++) + { + q[k] = 0; + for (j = 0; j < 4; j++) + { + r[j] = 0; + for (i = 0; i < 4; i++) + { + r[j] += u[i] * *pv; + pv++; + } + q[k] += v[j] * r[j]; + pv += xDim - 4; + } + vox += w[k] * q[k]; + pv += xyDim - 4 * xDim; + }*/ + + for (k = 0; k < 4; k++) + { + q[k] = 0; + for (j = 0; j < 4; j++) + { + r[j] = 0; + for (i = 0; i < 4; i++) + { + //r[j] += u[i] * V(x+k-1,y+j-1,z+i-1); + r[j] += u[i] * V(x+k-2+sx,y+j-2+sy,z+i-2+sz); + //pv++; + } + q[k] += v[j] * r[j]; + //pv += xDim - 4; + } + vox += w[k] * q[k]; + //pv += xyDim - 4 * xDim; + } + + + return vox; + + } + +public: + + + //... restricts v to V + template< typename m1, typename m2 > + inline void restrict( m1& v, m2& V ) + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + /*for( int i=0,i2=0; i(i2+1,j2,k2,v) + +interp_cubic<0,1,0>(i2,j2+1,k2,v) + +interp_cubic<0,0,1>(i2,j2,k2+1,v) + +interp_cubic<1,1,0>(i2+1,j2+1,k2,v) + +interp_cubic<0,1,1>(i2,j2+1,k2+1,v) + +interp_cubic<1,0,1>(i2+1,j2,k2+1,v) + +interp_cubic<1,1,1>(i2+1,j2+1,k2+1,v) + +interp_cubic<0,0,0>(i2,j2,k2,v)); + } */ + +#pragma omp parallel for + for( int i=0; i(i2,j2,k2,v) + +interp_cubic<0,1,0>(i2,j2,k2,v) + +interp_cubic<0,0,1>(i2,j2,k2,v) + +interp_cubic<1,1,0>(i2,j2,k2,v) + +interp_cubic<0,1,1>(i2,j2,k2,v) + +interp_cubic<1,0,1>(i2,j2,k2,v) + +interp_cubic<1,1,1>(i2,j2,k2,v) + +interp_cubic<0,0,0>(i2,j2,k2,v)); + } + + + } + + //... restricts v to V + template< typename m1, typename m2 > + inline void restrict_add( m1& v, m2& V ) + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + /*for( int i=0,i2=0; i(i2+1,j2,k2,v) + +interp_cubic<0,1,0>(i2,j2+1,k2,v) + +interp_cubic<0,0,1>(i2,j2,k2+1,v) + +interp_cubic<1,1,0>(i2+1,j2+1,k2,v) + +interp_cubic<0,1,1>(i2,j2+1,k2+1,v) + +interp_cubic<1,0,1>(i2+1,j2,k2+1,v) + +interp_cubic<1,1,1>(i2+1,j2+1,k2+1,v) + +interp_cubic<0,0,0>(i2,j2,k2,v)); + } + + + } + + /*template< typename m1, typename m2 > + inline void restrict( m1& v, m2& V ) + { + int + nx = V.size(0), + ny = V.size(1), + nz = V.size(2); + + for( int i=0,i2=0; i(i2, j2, k2, v, 2.0); + }*/ + + template< typename m1, typename m2 > + inline void restrict_bnd( const m1& v, m2& V ) const + { + unsigned + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + + //... boundary points + for( int j=0,j2=0; j + inline void restrict_bnd_add( const m1& v, m2& V ) const + { + + unsigned + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + + //... boundary points + for( int j=0,j2=0; j + inline void prolong( m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + #pragma omp parallel for + for( int i=0; i(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2) = interp_cubic<0,1,0>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2+1) = interp_cubic<0,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2) = interp_cubic<1,1,0>(i+ox,j+oy,k+oz,V); + v(i2+1,j2,k2+1) = interp_cubic<1,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2+1) = interp_cubic<1,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2+1) = interp_cubic<0,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2) = interp_cubic<0,0,0>(i+ox,j+oy,k+oz,V); + } + } + } + + template< typename m1, typename m2 > + inline void prolong_add( m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel for + for( int i=0; i(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2) += interp_cubic<0,1,0>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2+1) += interp_cubic<0,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2) += interp_cubic<1,1,0>(i+ox,j+oy,k+oz,V); + v(i2+1,j2,k2+1) += interp_cubic<1,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2+1) += interp_cubic<1,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2+1) += interp_cubic<0,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2) += interp_cubic<0,0,0>(i+ox,j+oy,k+oz,V); + } + } + } + + template< typename m1, typename m2 > + inline void prolong_bnd( m1& V, m2& v ) + { + // int + // nx = V.size(0), + // ny = V.size(1), + // nz = V.size(2); + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + int nbnd = V.m_nbnd; + int nbndtop = nbnd/2; + + + #pragma omp parallel for + for( int i=-nbndtop; i=0&&i=0&&j=0&&k(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2) = interp_cubic<0,1,0>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2+1) = interp_cubic<0,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2) = interp_cubic<1,1,0>(i+ox,j+oy,k+oz,V); + v(i2+1,j2,k2+1) = interp_cubic<1,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2+1) = interp_cubic<1,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2+1) = interp_cubic<0,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2) = interp_cubic<0,0,0>(i+ox,j+oy,k+oz,V); + } + } + } + + template< typename m1, typename m2 > + inline void prolong_add_bnd( m1& V, m2& v ) + { + unsigned + nx = V.size(0), + ny = V.size(1), + nz = V.size(2); + + #pragma omp parallel for + for( int i=-1; i<=nx; ++i ) + { + int i2(2*i); + for( int j=-1,j2=-2; j<=ny; ++j,j2+=2 ) + for( int k=-1,k2=-2; k<=nz; ++k,k2+=2 ) + { + + if( i>=0&&i=0&&j=0&&k(i,j,k,V); + v(i2,j2+1,k2) += interp_cubic<0,1,0>(i,j,k,V); + v(i2,j2,k2+1) += interp_cubic<0,0,1>(i,j,k,V); + v(i2+1,j2+1,k2) += interp_cubic<1,1,0>(i,j,k,V); + v(i2+1,j2,k2+1) += interp_cubic<1,0,1>(i,j,k,V); + v(i2+1,j2+1,k2+1) += interp_cubic<1,1,1>(i,j,k,V); + v(i2,j2+1,k2+1) += interp_cubic<0,1,1>(i,j,k,V); + v(i2,j2,k2) += interp_cubic<0,0,0>(i,j,k,V); + } + } + } + + +}; + + + +class mg_lin +{ +public: + + template< typename M > + inline double restr_lin( int x, int y, int z, M& V ) const + { + double w[4] = { 1.0/4.0, 3.0/4.0, 3.0/4.0, 1.0/4.0 }; + double vox = 0.0; + int i,j,k; + + for( i=0; i<4; ++i ) + for( j=0; j<4; ++j ) + for( k=0; k<4; ++k ) + { + vox += w[i]*w[j]*w[k] * V(x+i-1,y+j-1,z+k-1); + //std::cerr << "[" << i << "," << j << "," << k << "] -- " << u[i]*v[j]*w[k] << std::endl; + } + //vox = (V(x+1,y,z)+V(x,y+1,z)+V(x,y,z+1)+V(x,y,z)+V(x+1,y+1,z)+V(x+1,y,z+1)+V(x,y+1,z+1)+V(x+1,y+1,z+1)); + return vox; + } + + template< int sx, int sy, int sz, typename M > + inline double interp_lin( int x, int y, int z, M& V, double s=1.0 ) const + { + double u[2], v[2], w[2]; + double r[2], q[2]; + double vox = 0; + int i,j,k; + + + if( sx==0 ){ + u[0] = 1.0/4.0; + u[1] = 3.0/4.0; + }else{ + u[0] = 3.0/4.0; + u[1] = 1.0/4.0; + } + if( sy==0 ){ + v[0] = 1.0/4.0; + v[1] = 3.0/4.0; + }else{ + v[0] = 3.0/4.0; + v[1] = 1.0/4.0; + } + if( sz==0 ){ + w[0] = 1.0/4.0; + w[1] = 3.0/4.0; + }else{ + w[0] = 3.0/4.0; + w[1] = 1.0/4.0; + } + + /*if( sx==0 ){ + u[0] = 1.0/64.0; + u[1] = 3.0/64.0; + }else{ + u[0] = 3.0/64.0; + u[1] = 1.0/64.0; + } + if( sy==0 ){ + v[0] = 1.0/64.0; + v[1] = 3.0/64.0; + }else{ + v[0] = 3.0/64.0; + v[1] = 1.0/64.0; + } + if( sz==0 ){ + w[0] = 1.0/64.0; + w[1] = 3.0/64.0; + }else{ + w[0] = 3.0/64.0; + w[1] = 1.0/64.0; + }*/ + + //std::cerr << "(" << sx << "," << sy << "," << sz << ") : " << std::endl; + /*for (k = 0; k < 2; k++) + { + q[k] = 0; + for (j = 0; j < 2; j++) + { + r[j] = 0; + for (i = 0; i < 2; i++) + r[j] += u[i] * V(x+k+sx-1,y+j+sy-1,z+i+sz-1); + q[k] += v[j] * r[j]; + } + //std::cerr << "[" << i << "," << j << "," << k << "] -- " << w[k]*q[k] << std::endl; + vox += w[k] * q[k]; + }*/ + for( i=0; i<2; ++i ) + for( j=0; j<2; ++j ) + for( k=0; k<2; ++k ) + { + vox += u[i]*v[j]*w[k] * V(x+i+sx-1,y+j+sy-1,z+k+sz-1); + //std::cerr << "[" << i << "," << j << "," << k << "] -- " << u[i]*v[j]*w[k] << std::endl; + } + //std::cerr << "(" << sx << "," << sy << "," << sz << ") : " << vox < + inline void prolong( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + #pragma omp parallel for + for( int i=0; i(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2) = interp_lin<0,1,0>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2+1) = interp_lin<0,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2) = interp_lin<1,1,0>(i+ox,j+oy,k+oz,V); + v(i2+1,j2,k2+1) = interp_lin<1,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2+1) = interp_lin<1,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2+1) = interp_lin<0,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2) = interp_lin<0,0,0>(i+ox,j+oy,k+oz,V); + } + } + } + + template< typename m1, typename m2 > + inline void prolong_add( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel for + for( int i=0; i(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2) += interp_lin<0,1,0>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2+1) += interp_lin<0,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2) += interp_lin<1,1,0>(i+ox,j+oy,k+oz,V); + v(i2+1,j2,k2+1) += interp_lin<1,0,1>(i+ox,j+oy,k+oz,V); + v(i2+1,j2+1,k2+1) += interp_lin<1,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2+1,k2+1) += interp_lin<0,1,1>(i+ox,j+oy,k+oz,V); + v(i2,j2,k2) += interp_lin<0,0,0>(i+ox,j+oy,k+oz,V); + } + } + } + + + template< typename m1, typename m2 > + inline void restrict( const m1& v, m2& V ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + #pragma omp parallel for + for( int i=0; i(i2,j2,k2,v)+interp_lin<0,1,0>(i2,j2,k2,v)+ + interp_lin<0,0,1>(i2,j2,k2,v)+interp_lin<1,1,0>(i2,j2,k2,v)+ + interp_lin<1,0,1>(i2,j2,k2,v)+interp_lin<0,1,1>(i2,j2,k2,v)+ + interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2,j2,k2,v));*/ + } + + /* + for( int i=0; i(i2+2,j2,k2,v)+interp_lin<0,1,0>(i2,j2+2,k2,v)+ + interp_lin<0,0,1>(i2,j2,k2+2,v)+interp_lin<1,1,0>(i2+1,j2+2,k2,v)+ + interp_lin<1,0,1>(i2+2,j2,k2+2,v)+interp_lin<0,1,1>(i2,j2+2,k2+2,v)+ + interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2+2,j2+2,k2+2,v)); + + } + } + */ + } +#if 0 + template< typename m1, typename m2 > + inline void restrict_add( const m1& v, m2& V ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + /*#pragma omp parallel for + for( int i=0; i(i2,j2,k2,v)+interp_lin<0,1,0>(i2,j2,k2,v)+ + interp_lin<0,0,1>(i2,j2,k2,v)+interp_lin<1,1,0>(i2,j2,k2,v)+ + interp_lin<1,0,1>(i2,j2,k2,v)+interp_lin<0,1,1>(i2,j2,k2,v)+ + interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2,j2,k2,v)); + }*/ + + for( int i=0; i(i2+2,j2,k2,v)+interp_lin<0,1,0>(i2,j2+2,k2,v)+ + interp_lin<0,0,1>(i2,j2,k2+2,v)+interp_lin<1,1,0>(i2+1,j2+2,k2,v)+ + interp_lin<1,0,1>(i2+2,j2,k2+2,v)+interp_lin<0,1,1>(i2,j2+2,k2+2,v)+ + interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2+2,j2+2,k2+2,v)); + + } + } + } +#endif + +}; + + + +//template< typename T > +class mg_linear +{ +public: + //typedef T real_t; + + + //inline void prolong_bnd( const MeshvarBnd& V, MeshvarBnd& v ) const + template< typename m1, typename m2 > + inline void prolong_bnd( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel + { + + { + for(int q=0;q<2; ++q) + { + int i=nx; + if(q==0) i=-1; + + int i2 = 2*i; + for( int j=0,j2=0; j + inline void prolong_add_bnd( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel + { + + { + for(int q=0;q<2; ++q) + { + int i=nx; + if(q==0) i=-1; + + int i2 = 2*i; + for( int j=0,j2=0; j + inline void prolong( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel for + for( int i=0; i + inline void prolong_add( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel for + for( int i=0; i +class mg_straight +{ +public: + + //typedef T real_t; + + template< typename m1, typename m2 > + inline void restrict_bnd( const m1& v, m2& V ) const + { + unsigned + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + + //... boundary points + for( int j=0,j2=0; j& v, MeshvarBnd& V ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + #pragma omp parallel for + for( int i=0; i + inline void restrict( const m1& v, m2& V ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel for + for( int i=0; i + inline void restrict_add( const m1& v, m2& V ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel for + for( int i=0; i + inline void restrict_bnd_add( const m1& v, m2& V ) const + { + unsigned + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + + //... boundary points + for( int j=0,j2=0; j + inline void prolong( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel + { + +#pragma omp for nowait + for( int i=0; i + inline void prolong_bnd( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel + { + + + + for( int j=0,j2=0; j + inline void prolong_add_bnd( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + +#pragma omp parallel + { + + + + for( int j=0,j2=0; j + inline void prolong_add( const m1& V, m2& v ) const + { + int + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + #pragma omp parallel for + for( int i=0; i + inline void prolong_add( m1& V, m2& v ) + { + unsigned + nx = v.size(0)/2, + ny = v.size(1)/2, + nz = v.size(2)/2, + ox = v.offset(0), + oy = v.offset(1), + oz = v.offset(2); + + + for( int i=0,i2=0; i. + +*/ + +#ifndef __MG_SOLVER_HH +#define __MG_SOLVER_HH + +#include +#include + +#include "mg_operators.hh" +#include "mg_interp.hh" + +#include "mesh.hh" + +#define BEGIN_MULTIGRID_NAMESPACE namespace multigrid { +#define END_MULTIGRID_NAMESPACE } + +BEGIN_MULTIGRID_NAMESPACE + +namespace opt { + enum smtype { sm_jacobi, sm_gauss_seidel, sm_sor }; +} + + + +template< class S, class I, class O, typename T=double > +class solver +{ +public: + typedef S scheme; + typedef O mgop; + typedef I interp; + +protected: + scheme m_scheme; + mgop m_gridop; + unsigned m_npresmooth, m_npostsmooth; + opt::smtype m_smoother; + unsigned m_ilevelmin; + + const static bool m_bperiodic = true; + + std::vector m_residu_ini; + bool m_is_ini; + + GridHierarchy *m_pu, *m_pf, *m_pfsave; + //GridHierarchy *m_pmask; + const MeshvarBnd *m_pubnd; + + double compute_error( const MeshvarBnd& u, const MeshvarBnd& unew ); + + double compute_error( const GridHierarchy& uh, const GridHierarchy& uhnew, bool verbose ); + + double compute_RMS_resid( const GridHierarchy& uh, const GridHierarchy& fh, bool verbose ); + +protected: + + void Jacobi( T h, MeshvarBnd* u, const MeshvarBnd* f ); + + void GaussSeidel( T h, MeshvarBnd* u, const MeshvarBnd* f ); + + void SOR( T h, MeshvarBnd* u, const MeshvarBnd* f ); + + void twoGrid( unsigned ilevel ); + + //void interp_coarse_fine( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf=true ); + + void setBC( unsigned ilevel ); + + void make_periodic( MeshvarBnd *u ); + + void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine ); + +public: + solver( GridHierarchy& f, //const MeshvarBnd& uBC_top, + opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth ); + + ~solver() + { /*delete m_pmask;*/ } + + double solve( GridHierarchy& u, double accuracy, double h=-1.0, bool verbose=false ); + + double solve( GridHierarchy& u, double accuracy, bool verbose=false ) + { + return this->solve ( u, accuracy, -1.0, verbose ); + } + + + +}; + + +template< class S, class I, class O, typename T > +solver::solver( GridHierarchy& f, //const MeshvarBnd& ubnd, + opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth ) +: m_scheme(), m_gridop(), m_npresmooth( npresmooth ), m_npostsmooth( npostsmooth ), +m_smoother( smoother ), m_ilevelmin( f.levelmin() ), m_pf( &f ), m_is_ini( true ) +{ + m_is_ini = true; + + // TODO: maybe later : add more than one refinement region + //... initialize the refinement mask + //m_pmask = new GridHierarchy( f.m_nbnd ); + //m_pmask->create_base_hierarchy(f.levelmin()); + + /*for( unsigned ilevel=f.levelmin()+1; ilevel<=f.levelmax(); ++ilevel ) + { + meshvar_bnd* pf = f.get_grid(ilevel); + m_pmask->add_patch( pf->offset(0), pf->offset(1), pf->offset(2), pf->size(0), pf->size(1), pf->size(2) ); + } + + m_pmask->zero(); + + for( unsigned ilevel=0; ilevel *pf = f.get_grid(ilevel); + for( int ix=0; ix < (int)pf->size(0); ++ix ) + for( int iy=0; iy < (int)pf->size(1); ++iy ) + for( int iz=0; iz < (int)pf->size(2); ++iz ) + (*m_pmask->get_grid(ilevel))(ix,iy,iz) = true; + } + + for( unsigned ilevel=m_ilevelmin; ilevel* pf = f.get_grid(ilevel+1);//, *pfc = f.get_grid(ilevel); + + for( int ix=pf->offset(0); ix < (int)(pf->offset(0)+pf->size(0)/2); ++ix ) + for( int iy=pf->offset(1); iy < (int)(pf->offset(1)+pf->size(1)/2); ++iy ) + for( int iz=pf->offset(2); iz < (int)(pf->offset(2)+pf->size(2)/2); ++iz ) + (*m_pmask->get_grid(ilevel))(ix,iy,iz) = true; + } + */ +} + + +template< class S, class I, class O, typename T > +void solver::Jacobi( T h, MeshvarBnd *u, const MeshvarBnd* f ) +{ + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + double + c0 = -1.0/m_scheme.ccoeff(), + h2 = h*h; + + MeshvarBnd uold(*u); + + double alpha = 0.95, ialpha = 1.0-alpha; + + #pragma omp parallel for + for( int ix=0; ix +void solver::SOR( T h, MeshvarBnd *u, const MeshvarBnd* f ) +{ + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + double + c0 = -1.0/m_scheme.ccoeff(), + h2 = h*h; + + MeshvarBnd uold(*u); + + double + alpha = 1.2, + //alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0, + ialpha = 1.0-alpha; + + #pragma omp parallel for + for( int ix=0; ix +void solver::GaussSeidel( T h, MeshvarBnd* u, const MeshvarBnd* f ) +{ + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + T + c0 = -1.0/m_scheme.ccoeff(), + h2 = h*h; + + for( int color=0; color < 2; ++color ) + #pragma omp parallel for + for( int ix=0; ix +void solver::twoGrid( unsigned ilevel ) +{ + MeshvarBnd *uf, *uc, *ff, *fc; + + T + h = 1.0/(pow(2.0,ilevel)), + c0 = -1.0/m_scheme.ccoeff(), + h2 = h*h; + + uf = m_pu->get_grid(ilevel); + ff = m_pf->get_grid(ilevel); + + uc = m_pu->get_grid(ilevel-1); + fc = m_pf->get_grid(ilevel-1); + + + int + nx = uf->size(0), + ny = uf->size(1), + nz = uf->size(2); + + if( m_bperiodic && ilevel <= m_ilevelmin) + make_periodic( uf ); + else if(!m_bperiodic) + setBC( ilevel ); + + //... do smoothing sweeps with specified solver + for( unsigned i=0; i m_ilevelmin ) + interp().interp_coarse_fine(ilevel,*uc,*uf); + + if( m_smoother == opt::sm_gauss_seidel ) + GaussSeidel( h, uf, ff ); + + else if( m_smoother == opt::sm_jacobi ) + Jacobi( h, uf, ff); + + else if( m_smoother == opt::sm_sor ) + SOR( h, uf, ff ); + + if( m_bperiodic && ilevel <= m_ilevelmin ) + make_periodic( uf ); + } + + + m_gridop.restrict( *uf, *uc ); + + //... essential!! + if( m_bperiodic && ilevel <= m_ilevelmin ) + make_periodic( uc ); + else if( ilevel > m_ilevelmin ) + interp().interp_coarse_fine(ilevel,*uc,*uf); + + + + meshvar_bnd Lu(*uf,false); + Lu.zero(); + + #pragma omp parallel for + for( int ix=0; ixoffset(0); + oj = ff->offset(1); + ok = ff->offset(2); + + #pragma omp parallel for + for( int ix=oi; ixsize(0)/2; ++ix ) + for( int iy=oj; iysize(1)/2; ++iy ) + for( int iz=ok; izsize(2)/2; ++iz ) + (*fc)(ix,iy,iz) += ((tLu( ix, iy, iz ) - (m_scheme.apply( *uc, ix, iy, iz )/(4.0*h2)))); + + tLu.deallocate(); + + meshvar_bnd ucsave(*uc,true); + + //... have we reached the end of the recursion or do we need to go up one level? + if( ilevel == 1 ) + if( m_bperiodic ) + (*uc)(0,0,0) = 0.0; + else + (*uc)(0,0,0) = (m_scheme.rhs( (*uc), 0, 0, 0 ) + 4.0 * h2 * (*fc)(0,0,0))*c0; + else + twoGrid( ilevel-1 ); + + meshvar_bnd cc(*uc,false); + + //... compute correction on coarse grid + #pragma omp parallel for + for( int ix=0; ix<(int)cc.size(0); ++ix ) + for( int iy=0; iy<(int)cc.size(1); ++iy ) + for( int iz=0; iz<(int)cc.size(2); ++iz ) + cc(ix,iy,iz) = (*uc)(ix,iy,iz) - ucsave(ix,iy,iz); + + ucsave.deallocate(); + + if( m_bperiodic && ilevel <= m_ilevelmin ) + make_periodic( &cc ); + + m_gridop.prolong_add( cc, *uf ); + + //... interpolate and apply coarse-fine boundary conditions on fine level + if( m_bperiodic && ilevel <= m_ilevelmin ) + make_periodic( uf ); + else if(!m_bperiodic) + setBC( ilevel ); + + //... do smoothing sweeps with specified solver + for( unsigned i=0; i m_ilevelmin ) + interp().interp_coarse_fine(ilevel,*uc,*uf); + + if( m_smoother == opt::sm_gauss_seidel ) + GaussSeidel( h, uf, ff ); + + else if( m_smoother == opt::sm_jacobi ) + Jacobi( h, uf, ff); + + else if( m_smoother == opt::sm_sor ) + SOR( h, uf, ff ); + + if( m_bperiodic && ilevel <= m_ilevelmin ) + make_periodic( uf ); + + } +} + +template< class S, class I, class O, typename T > +double solver::compute_error( const MeshvarBnd& u, const MeshvarBnd& unew ) +{ + int + nx = u.size(0), + ny = u.size(1), + nz = u.size(2); + + double err = 0.0; + unsigned count = 0; + + #pragma omp parallel for reduction(+:err,count) + for( int ix=0; ix 0.0 )//&& u(ix,iy,iz) != unew(ix,iy,iz) ) + { + err += fabs(1.0 - u(ix,iy,iz)/unew(ix,iy,iz)); + + ++count; + } + + if( count != 0 ) + err /= count; + + return err; +} + +template< class S, class I, class O, typename T > +double solver::compute_error( const GridHierarchy& uh, const GridHierarchy& uhnew, bool verbose ) +{ + double maxerr = 0.0; + + for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel ) + { + double err = 0.0; + err = compute_error( *uh.get_grid(ilevel), *uhnew.get_grid(ilevel) ); + + if( verbose ) + std::cout << " Level " << std::setw(6) << ilevel << ", Error = " << err << std::endl; + maxerr = std::max(maxerr,err); + + } + return maxerr; +} + +template< class S, class I, class O, typename T > +double solver::compute_RMS_resid( const GridHierarchy& uh, const GridHierarchy& fh, bool verbose ) +{ + if( m_is_ini ) + m_residu_ini.assign( uh.levelmax()+1, 0.0 ); + + double maxerr=0.0; + + for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel ) + { + int + nx = uh.get_grid(ilevel)->size(0), + ny = uh.get_grid(ilevel)->size(1), + nz = uh.get_grid(ilevel)->size(2); + + double h = 1.0/pow(2,ilevel), h2=h*h, err; + double sum = 0.0; + unsigned count = 0; + + #pragma omp parallel for reduction(+:sum,count) + for( int ix=0; ix Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl; + std::cout << " ---------------------------------------------------\n"; + } + + if( (niter > 1) && ((err < acc) || (niter > 8)) ) + break; + + //uhnew = *m_pu; + //*m_pf = fsave; + } + + if( err > acc ) + std::cout << "Error : no convergence in Poisson solver" << std::endl; + else if( verbose ) + std::cout << " - Converged in " << niter << " steps to req. acc. of " << acc << std::endl; + + + //uh = uhnew; + //*m_pf = fsave; + + //.. make sure that the RHS does not contain the FAS corrections any more + for( int i=m_pf->levelmax(); i>0; --i )//(int)m_pf->levelmin(); --i ) + m_gridop.restrict( *m_pf->get_grid(i), *m_pf->get_grid(i-1) ); + + + return err; +} + +template< class S, class I, class O, typename T > +void solver::interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine ) +{ + MeshvarBnd *u = &fine; + MeshvarBnd *utop = &coarse; + + int + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); + + //... don't do anything if we are not an additional refinement region + if( xoff == 0 && yoff == 0 && zoff == 0 ) + return; + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + //.. perform cubic interpolation + mg_cubic().prolong_bnd( *utop, *u ); + + //.. match fluxes + #pragma omp parallel for schedule(dynamic) + for( int ix=-1; ix<=nx; ++ix ) + for( int iy=-1; iy<=ny; ++iy ) + for( int iz=-1; iz<=nz; ++iz ) + { + bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz); + + if( xbnd || ybnd || zbnd ) + { + + //... only deal with proper ghostzones + if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd)) + continue; + + int ixtop = (int)(0.5*(double)(ix))+xoff; + int iytop = (int)(0.5*(double)(iy))+yoff; + int iztop = (int)(0.5*(double)(iz))+zoff; + + if( ix==-1 ) ixtop=xoff-1; + if( iy==-1 ) iytop=yoff-1; + if( iz==-1 ) iztop=zoff-1; + + double flux;; + if( ix == -1 && iy%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k)); + } + + + + flux /= 4.0; + + double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + } + } + } + +} + + + + + +template< class S, class I, class O, typename T > +void solver::setBC( unsigned ilevel ) +{ + //... set only on level before additional refinement starts + //if( ilevel == m_ilevelmin ) + if( ilevel == m_ilevelmin ) + { + MeshvarBnd *u = m_pu->get_grid(ilevel); + //int nbnd = u->m_nbnd, + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + /*for( int ix=-nbnd; ix=nx||iy<0||iy>=ny||iz<0||iz>=nz ) + (*u)(ix,iy,iz) = (*m_pubnd)(ix,iy,iz);*/ + + + for( int iy=0; iy *u = m_pu->get_grid(ilevel); + int nbnd = u->m_nbnd, + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + for( int ix=-nbnd; ix=nx||iy<0||iy>=ny||iz<0||iz>=nz ) + (*u)(ix,iy,iz) = 0.0; + }*/ + + +} + + + +//... enforce periodic boundary conditions +template< class S, class I, class O, typename T > +void solver::make_periodic( MeshvarBnd *u ) +{ + + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + int nb = u->m_nbnd; + + + //if( u->offset(0) == 0 ) + for( int iy=-nb; iyoffset(1) == 0 ) + for( int ix=-nb; ixoffset(2) == 0 ) + for( int ix=-nb; ixm_nbnd == 1 ) + { + + #pragma omp parallel + { + + if( u->offset(0) == 0 ) + for( int iy=-1; iy<=ny; ++iy ) + for( int iz=-1; iz<=nz; ++iz ) + { + int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz ); + + (*u)(-1,iy,iz) = (*u)(nx-1,iiy,iiz); + (*u)(nx,iy,iz) = (*u)(0,iiy,iiz); + } + + if( u->offset(1) == 0 ) + for( int ix=-1; ix<=nx; ++ix ) + for( int iz=-1; iz<=nz; ++iz ) + { + int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz ); + + (*u)(ix,-1,iz) = (*u)(iix,ny-1,iiz); + (*u)(ix,ny,iz) = (*u)(iix,0,iiz); + } + + if( u->offset(2) == 0 ) + for( int ix=-1; ix<=nx; ++ix ) + for( int iy=-1; iy<=ny; ++iy ) + { + int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny ); + + (*u)(ix,iy,-1) = (*u)(iix,iiy,nz-1); + (*u)(ix,iy,nz) = (*u)(iix,iiy,0); + } + + + } + }else if( u->m_nbnd == 2 ){ + if( u->offset(0) == 0 ) + for( int iy=-2; iy<=ny+1; ++iy ) + for( int iz=-2; iz<=nz+1; ++iz ) + { + int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz ); + + (*u)(-1,iy,iz) = (*u)(nx-1,iiy,iiz); + (*u)(-2,iy,iz) = (*u)(nx-2,iiy,iiz); + (*u)(nx,iy,iz) = (*u)(0,iiy,iiz); + (*u)(nx+1,iy,iz) = (*u)(1,iiy,iiz); + } + + if( u->offset(1) == 0 ) + for( int ix=-2; ix<=nx+1; ++ix ) + for( int iz=-2; iz<=nz+1; ++iz ) + { + int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz ); + + (*u)(ix,-1,iz) = (*u)(iix,ny-1,iiz); + (*u)(ix,-2,iz) = (*u)(iix,ny-2,iiz); + (*u)(ix,ny,iz) = (*u)(iix,0,iiz); + (*u)(ix,ny+1,iz) = (*u)(iix,1,iiz); + } + + if( u->offset(2) == 0 ) + for( int ix=-2; ix<=nx+1; ++ix ) + for( int iy=-2; iy<=ny+1; ++iy ) + { + int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny ); + + (*u)(ix,iy,-1) = (*u)(iix,iiy,nz-1); + (*u)(ix,iy,-2) = (*u)(iix,iiy,nz-2); + (*u)(ix,iy,nz) = (*u)(iix,iiy,0); + (*u)(ix,iy,nz+1) = (*u)(iix,iiy,1); + } + }else + throw std::runtime_error("solver::make_periodic: don't know how to deal with boundary!"); +#endif +} + +#if 0 +template< class S, class I, class O, typename T > +void solver::interp_coarse_fine( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf ) +{ + MeshvarBnd *u = &fine; + MeshvarBnd *utop = &coarse; + + + bcf = true;; + //bcf = false; + + int + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); + + //... don't do anything if we are not an additional refinement region + if( xoff == 0 && yoff == 0 && zoff == 0 ) + return; + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + //... set boundary condition for fine grid + +#pragma omp parallel for schedule(dynamic) + for( int ix=-1; ix<=nx; ++ix ) + for( int iy=-1; iy<=ny; ++iy ) + for( int iz=-1; iz<=nz; ++iz ) + { + bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz); + + //if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz) + if( xbnd || ybnd || zbnd ) + //if( xbnd ^ ybnd ^ zbnd ) + { + + //... only deal with proper ghostzones + if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd)) + continue; + + /*int ixtop = (int)(0.5*(double)(ix+2*xoff)+1e-3); + int iytop = (int)(0.5*(double)(iy+2*yoff)+1e-3); + int iztop = (int)(0.5*(double)(iz+2*zoff)+1e-3);*/ + + int ixtop = (int)(0.5*(double)(ix))+xoff; + int iytop = (int)(0.5*(double)(iy))+yoff; + int iztop = (int)(0.5*(double)(iz))+zoff; + + if( ix==-1 ) ixtop=xoff-1; + if( iy==-1 ) iytop=yoff-1; + if( iz==-1 ) iztop=zoff-1; + + double ustar1, ustar2, ustar3, uhat; + double fac = 0.5;//0.25; + double flux;; + if( ix == -1 && iy%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) ); + ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) ); + ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) ); + + uhat = interp2( /*-1.0, 0.0, 1.0, */ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix,iy+j,iz+k) = 0.0;//(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k), -1.0 ); + + (*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) ); + + flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k)); + } + + + + flux /= 4.0; + + double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + + //dflux *= 2.0; + + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix,iy+j,iz+k) -= dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop+1,iytop,iztop) - 2.0*flux; + + + } + // right boundary + if( ix == nx && iy%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) ); + ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) ); + ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix,iy+j,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix-1,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 1.0 ); + (*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat ); + flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k)); + } + flux /= 4.0; + + + double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux; + //dflux *= 2.0; + + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix,iy+j,iz+k) += dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop-1,iytop,iztop) + 2.0*flux; + + } + // bottom boundary + if( iy == -1 && ix%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k), -1.0 ); + (*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) ); + + flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k)); + } + flux /= 4.0; + //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - flux; + double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + //dflux *= 2.0; + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy,iz+k) -= dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - 2.0*flux; + + } + // top boundary + if( iy == ny && ix%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy-1,iz+k), (*u)(ix+j,iy-2,iz+k), 1.0 ); + (*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat ); + + flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k)); + } + flux /= 4.0; + //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + flux; + double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux; + //dflux *= 2.0; + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy,iz+k) += dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + 2.0*flux; + + } + // front boundary + if( iz == -1 && ix%2==0 && iy%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2), -1.0 ); + (*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) ); + + flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz)); + } + flux /= 4.0; + //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - flux; + double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + //dflux *= 2.0; + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy+k,iz) -= dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - 2.0*flux; + + } + // back boundary + if( iz == nz && ix%2==0 && iy%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy+k,iz-1), (*u)(ix+j,iy+k,iz-2), 1.0 ); + (*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat ); + + flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1)); + } + flux /= 4.0; + //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + flux; + double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux; + //dflux *= 2.0; + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy+k,iz) += dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + 2.0*flux; + } + + } + } + +} +#endif + +END_MULTIGRID_NAMESPACE + +#endif diff --git a/output.cc b/output.cc new file mode 100644 index 0000000..9975204 --- /dev/null +++ b/output.cc @@ -0,0 +1,73 @@ +/* + + output.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#include "output.hh" + + +std::map< std::string, output_plugin_creator *>& +get_output_plugin_map() +{ + static std::map< std::string, output_plugin_creator* > output_plugin_map; + return output_plugin_map; +} + +void print_output_plugins() +{ + std::map< std::string, output_plugin_creator *>& m = get_output_plugin_map(); + + std::map< std::string, output_plugin_creator *>::iterator it; + it = m.begin(); + std::cout << " - Available output plug-ins:\n"; + while( it!=m.end() ) + { + if( (*it).second ) + std::cout << "\t\'" << (*it).first << "\'\n"; + ++it; + } + +} + +output_plugin *select_output_plugin( config_file& cf ) +{ + std::string formatname = cf.getValue( "output", "format" ); + + output_plugin_creator *the_output_plugin_creator + = get_output_plugin_map()[ formatname ]; + + if( !the_output_plugin_creator ) + { + std::cerr << " - Error: output plug-in \'" << formatname << "\' not found." << std::endl; + print_output_plugins(); + throw std::runtime_error("Unknown output plug-in"); + + }else + std::cout << " - Selecting output plug-in \'" << formatname << "\'..." << std::endl; + + output_plugin *the_output_plugin + = the_output_plugin_creator->create( cf ); + + return the_output_plugin; +} + + + diff --git a/output.hh b/output.hh new file mode 100644 index 0000000..0c6c7e9 --- /dev/null +++ b/output.hh @@ -0,0 +1,176 @@ +/* + + output.hh - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#ifndef __OUTPUT_HH +#define __OUTPUT_HH + +#include +#include + +#include "general.hh" + + +/*! + * @class output_plugin + * @brief abstract base class for output plug-ins + * + * This class provides the abstract base class for all output plug-ins. + * All output plug-ins need to derive from it and implement the purely + * virtual member functions. + */ +class output_plugin +{ +protected: + + //! reference to the config_file object that holds all configuration options + config_file& cf_; + + //! output file or directory name + std::string fname_; + + //! minimum refinement level + unsigned levelmin_; + + //! maximum refinement level + unsigned levelmax_; + + std::vector + offx_, //!< vector describing the x-offset of each level + offy_, //!< vector describing the y-offset of each level + offz_, //!< vector describing the z-offset of each level + sizex_, //!< vector describing the extent in x of each level + sizey_, //!< vector describing the extent in y of each level + sizez_; //!< vector describing the extent in z of each level + + //! quick access function to query properties of the refinement grid from the configuration options + /*! @param name name of the config property + * @param icomp component index (0=x, 1=y, 2=z) + * @param oit output iterator (e.g. std::back_inserter for vectors) + */ + template< typename output_iterator > + void query_grid_prop( std::string name, int icomp, output_iterator oit ) + { + char str[128]; + for( unsigned i=levelmin_; i<=levelmax_; ++i ) + { + sprintf( str, "%s(%d,%d)", name.c_str(), i, icomp ); + *oit = cf_.getValue( "setup", str ); + ++oit; + } + } + +public: + + //! constructor + explicit output_plugin( config_file& cf ) + : cf_(cf) + { + fname_ = cf.getValue("output","filename"); + levelmin_ = cf.getValue( "setup", "levelmin" ); + levelmax_ = cf.getValue( "setup", "levelmax" ); + + query_grid_prop( "offset", 0, std::back_inserter(offx_) ); + query_grid_prop( "offset", 1, std::back_inserter(offy_) ); + query_grid_prop( "offset", 2, std::back_inserter(offz_) ); + + query_grid_prop( "size", 0, std::back_inserter(sizex_) ); + query_grid_prop( "size", 1, std::back_inserter(sizey_) ); + query_grid_prop( "size", 2, std::back_inserter(sizez_) ); + } + + //! destructor + virtual ~output_plugin() + { } + + //! purely virtual prototype to write the masses for each dark matter particle + virtual void write_dm_mass( const grid_hierarchy& gh ) = 0; + + //! purely virtual prototype to write the dark matter density field + virtual void write_dm_density( const grid_hierarchy& gh ) = 0; + + //! purely virtual prototype to write the dark matter gravitational potential (from which displacements are computed in 1LPT) + virtual void write_dm_potential( const grid_hierarchy& gh ) = 0; + + //! purely virtual prototype to write dark matter particle velocities + virtual void write_dm_velocity( int coord, const grid_hierarchy& gh ) = 0; + + //! purely virtual prototype to write dark matter particle positions + virtual void write_dm_position( int coord, const grid_hierarchy& gh ) = 0; + + //! purely virtual prototype to write the baryon velocities + virtual void write_gas_velocity( int coord, const grid_hierarchy& gh ) = 0; + + //! purely virtual prototype to write the baryon coordinates + virtual void write_gas_position( int coord, const grid_hierarchy& gh ) = 0; + + //! purely virtual prototype to write the baryon density field + virtual void write_gas_density( const grid_hierarchy& gh ) = 0; + + //! purely virtual prototype to write the baryon gravitational potential (from which displacements are computed in 1LPT) + virtual void write_gas_potential( const grid_hierarchy& gh ) = 0; + + //! purely virtual prototype for all things to be done at the very end + virtual void finalize( void ) = 0; +}; + +/*! + * @brief implements abstract factory design pattern for output plug-ins + */ +struct output_plugin_creator +{ + //! create an instance of a plug-in + virtual output_plugin * create( config_file& cf ) const = 0; + + //! destroy an instance of a plug-in + virtual ~output_plugin_creator() { } +}; + +//! maps the name of a plug-in to a pointer of the factory pattern +std::map< std::string, output_plugin_creator *>& get_output_plugin_map(); + +//! print a list of all registered output plug-ins +void print_output_plugins(); + +/*! + * @brief concrete factory pattern for output plug-ins + */ +template< class Derived > +struct output_plugin_creator_concrete : public output_plugin_creator +{ + //! register the plug-in by its name + output_plugin_creator_concrete( const std::string& plugin_name ) + { + get_output_plugin_map()[ plugin_name ] = this; + } + + //! create an instance of the plug-in + output_plugin * create( config_file& cf ) const + { + return new Derived( cf ); + } +}; + +//! failsafe version to select the output plug-in +output_plugin *select_output_plugin( config_file& cf ); + +#endif // __OUTPUT_HH diff --git a/plugins/HDF_IO.hh b/plugins/HDF_IO.hh new file mode 100755 index 0000000..f0cee50 --- /dev/null +++ b/plugins/HDF_IO.hh @@ -0,0 +1,1024 @@ +#ifndef __HDF_IO_HH +#define __HDF_IO_HH + +/* + HDF_IO.hh -- templated C++ HDF5 front-end functions, v1.2b + + Copyright (C) 2006-7 Oliver Hahn -- ojha@gmx.de + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "hdf5.h" + +template +hid_t GetDataType( void ) +{ + if( typeid(T) == typeid(int) ) + return H5T_NATIVE_INT; + + if( typeid(T) == typeid(unsigned) ) + return H5T_NATIVE_UINT; + + if( typeid(T) == typeid(float) ) + return H5T_NATIVE_FLOAT; + + if( typeid(T) == typeid(double) ) + return H5T_NATIVE_DOUBLE; + + //if( typeid(T) == typeid(long long) ) + // return H5T_NATIVE_LLONG; + + std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n"; + return -1; +} + +#include + +class HDFException : public std::runtime_error { + public: + HDFException( const std::string &errtxt ) : std::runtime_error(errtxt) { } +}; + + +inline bool DoesFileExist( std::string Filename ){ + bool flag = false; + std::fstream fin(Filename.c_str(),std::ios::in|std::ios::binary); + if( fin.is_open() ) + flag=true; + fin.close(); + return flag; +} + +inline void AssertFileOpen( std::string Filename ) +{ + if( !DoesFileExist( Filename ) ){ + std::fstream fout( Filename.c_str(), std::ios::out|std::ios::binary); + fout.close(); + } +} + +inline void HDFCreateFile( std::string Filename ) +{ + hid_t HDF_FileID; + HDF_FileID = H5Fcreate( Filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT ); + H5Fclose( HDF_FileID ); +} + +template< typename T> +inline void HDFReadVector( const std::string Filename, const std::string ObjName, std::vector &Data ) +{ + HDFReadDataset( Filename, ObjName, Data ); +} + + +inline void HDFGetDatasetExtent( const std::string Filename, const std::string ObjName, std::vector &Extent ) +{ + hid_t HDF_FileID, HDF_DatasetID, HDF_DataspaceID; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( HDF_DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); + + int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); + + hsize_t *dimsize = new hsize_t[ndims]; + + H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + + Extent.clear(); + for(int i=0; i +inline void HDFReadDataset( const std::string Filename, const std::string ObjName, std::vector &Data ) +{ + + hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID; + hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( HDF_DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); + + int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); + + hsize_t dimsize[ndims]; + + H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + + HDF_StorageSize = 1; + for(int i=0; i +inline void HDFReadSelect( const std::string Filename, const std::string ObjName, const std::vector& ii, std::vector &Data ){ + + hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID; + hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( HDF_DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); + + hsize_t block[2]; + block[0] = ii.size(); + block[1] = 1; + + + Data.clear(); + Data.reserve( block[0]*block[1] ); + Data.assign( block[0]*block[1], (T)1 ); + + HDF_MemspaceID = H5Screate_simple( 2, block, NULL ); + // H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block ); + H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, ii.size(), (const hsize_t *)&ii[0] ); + + H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] ); + + H5Sclose( HDF_DataspaceID ); + H5Sclose( HDF_MemspaceID ); + H5Dclose( HDF_DatasetID ); + H5Fclose( HDF_FileID ); + +} + +template +inline void HDFReadVectorSelect( const std::string Filename, const std::string ObjName, const std::vector& ii, std::vector &Data ){ + + hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID; +// hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( HDF_DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); + int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); + hsize_t dimsize[ndims]; + H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + + hsize_t block[2]; + block[0] = ii.size(); + block[1] = 3; + + std::vector coord; + for( unsigned i=0; i().swap(ii); + + + + + if( ii.size() == 0 ){ + std::cerr << "attempted to read empty block. skipping....\n"; + return; + } + //std::cerr << "starting 2 read...\n"; + H5Sselect_none( HDF_DataspaceID ); + if( H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, coord.size()/2, (const hsize_t *)&coord[0] ) < 0 )//(const hsize_t**)&coord[0] ) < 0 ) + std::cerr << " - could not select elements properly\n"; + + if(H5Sselect_valid( HDF_DataspaceID )<=0 ){ + std::cerr << "\n - sorry, invalid element selection in file \'"<< Filename.c_str() << "\'. \n - dumping 10 first indices...\n"; + + /*for( unsigned i=0; i<10; ++i ){ + for( unsigned k=0; k<3; ++k ){ + std::cerr << coord[3*i+k] << " "; + } + std::cerr << "\n"; + }*/ + + return; + } + + std::vector().swap(coord); + Data.assign( block[0]*block[1], (T)0 ); + HDF_MemspaceID = H5Screate_simple( 2, &block[0], NULL ); + + H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] ); + + + H5Sclose( HDF_DataspaceID ); + H5Sclose( HDF_MemspaceID ); + H5Dclose( HDF_DatasetID ); + H5Fclose( HDF_FileID ); + +} + +template< typename T > +inline void HDFReadVectorSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector &Data ) +{ + hsize_t + offset[2], + stride[2], + count[2], + block[2]; + + hid_t MemspaceID, FilespaceID, DatasetID, FileID; + hid_t Type = GetDataType(); + + FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + DatasetID = H5Dopen( FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( FileID ); + throw HDFException(ss.str()); + return; + } + + FilespaceID = H5Dget_space( DatasetID ); + + offset[0] = nStart; + offset[1] = 0; + + count[0] = 1; + count[1] = 1; + + stride[0] = 1; + stride[1] = 1; + + block[0] = nCount; + block[1] = 3; + + + Data.clear(); + Data.reserve( block[0]*block[1] ); + Data.assign( block[0]*block[1], (T)1 ); + + MemspaceID = H5Screate_simple( 2, block, NULL ); + H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block ); + + H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] ); + + H5Sclose( FilespaceID ); + H5Sclose( MemspaceID ); + H5Dclose( DatasetID ); + H5Fclose( FileID ); +} + +template< typename T > +inline void HDFReadDatasetSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector &Data ) +{ + hsize_t + offset[2], + stride[2], + count[2], + block[2]; + + hid_t MemspaceID, FilespaceID, DatasetID, FileID; + hid_t Type = GetDataType(); + + FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + DatasetID = H5Dopen( FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( FileID ); + throw HDFException(ss.str()); + return; + } + + FilespaceID = H5Dget_space( DatasetID ); + + offset[0] = nStart; + offset[1] = 0; + + count[0] = 1; + count[1] = 1; + + stride[0] = 1; + stride[1] = 1; + + block[0] = nCount; + block[1] = 1; + + + Data.clear(); + Data.reserve( block[0]*block[1] ); + Data.assign( block[0]*block[1], (T)1 ); + + MemspaceID = H5Screate_simple( 2, block, NULL ); + H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block ); + + H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] ); + + H5Sclose( FilespaceID ); + H5Sclose( MemspaceID ); + H5Dclose( DatasetID ); + H5Fclose( FileID ); +} + +template< typename T> +inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T &Data ) +{ + + hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID; + // hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... attempt to open attribute + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() ); + + if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){ + std::stringstream ss; + ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n"; + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + + H5Aread( HDF_AttributeID, HDF_Type, &Data ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + + H5Aclose( HDF_AttributeID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); + +} + +template< typename T> +inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, std::vector &Data ) +{ + + hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID, HDF_DataspaceID; + hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... attempt to open attribute + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() ); + + if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){ + std::stringstream ss; + ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n"; + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Aget_space( HDF_AttributeID ); + + int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); + + hsize_t dimsize[ndims]; + + H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + + HDF_StorageSize = 1; + for(int i=0; i +inline void HDFWriteDataset( const std::string Filename, const std::string ObjName, const std::vector &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims = Data.size(); + HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); + HDF_DatasetID = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type, + HDF_DataspaceID, H5P_DEFAULT ); + H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, + H5P_DEFAULT, &Data[0] ); + H5Dclose( HDF_DatasetID ); + H5Sclose( HDF_DataspaceID ); + + H5Fclose( HDF_FileID ); +} + +template< typename T > +inline void HDFWriteGroupDataset( const std::string Filename, const std::string GrpName, const std::string ObjName, const std::vector &Data ) +{ + + hid_t + HDF_FileID, + HDF_GroupID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_GroupID = H5Gopen( HDF_FileID, GrpName.c_str() ); + + HDF_Type = GetDataType(); + + HDF_Dims = Data.size(); + HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); + HDF_DatasetID = H5Dcreate( HDF_GroupID, ObjName.c_str(), HDF_Type, + HDF_DataspaceID, H5P_DEFAULT ); + H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, + H5P_DEFAULT, &Data[0] ); + H5Dclose( HDF_DatasetID ); + H5Sclose( HDF_DataspaceID ); + + H5Gclose( HDF_GroupID ); + + H5Fclose( HDF_FileID ); +} + + +template< typename T > +inline void HDFWriteDataset2D( const std::string Filename, const std::string ObjName, const std::vector< std::vector > &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims[2]; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims[0] = Data.size(); + HDF_Dims[1] = Data[0].size(); + HDF_DataspaceID = H5Screate_simple(2, HDF_Dims, NULL); + HDF_DatasetID = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type, + HDF_DataspaceID, H5P_DEFAULT ); + + T *tmp = new T[HDF_Dims[0]*HDF_Dims[1]]; + + unsigned k=0; + for(unsigned i=0; i +inline void HDFWriteDataset3D( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims[3]; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims[0] = nd[0]; + HDF_Dims[1] = nd[1]; + HDF_Dims[2] = nd[2]; + + //std::cerr << nd[0]< +inline void HDFWriteDataset3Ds( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims[4]; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims[0] = 1; + HDF_Dims[1] = nd[0]; + HDF_Dims[2] = nd[1]; + HDF_Dims[3] = nd[2]; + + //std::cerr << nd[0]< +inline void HDFWriteDatasetVector( const std::string Filename, const std::string ObjName, const std::vector &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims[2]; + + // hsize_t HDF_Dims; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims[0] = (hsize_t)(Data.size()/3); + HDF_Dims[1] = 3; + + if( Data.size() % 3 != 0 ){ + std::cerr << " - Warning: Trying to write vector data in HDFWriteDatasetVector\n" + << " but array length not divisible by 3!\n\n"; + + } + + HDF_DataspaceID = H5Screate_simple(2, HDF_Dims, NULL); + HDF_DatasetID = H5Dcreate( HDF_FileID, ObjName.c_str(), H5T_NATIVE_FLOAT, + HDF_DataspaceID, H5P_DEFAULT ); + H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, + H5P_DEFAULT, &Data[0] ); + H5Dclose( HDF_DatasetID ); + H5Sclose( HDF_DataspaceID ); + + H5Fclose( HDF_FileID ); +} + +inline void HDFCreateGroup( const std::string Filename, const std::string GroupName ) +{ + hid_t HDF_FileID, HDF_GroupID; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_GroupID = H5Gcreate( HDF_FileID, GroupName.c_str(), 0 ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); + +} + +inline void HDFCreateSubGroup( const std::string Filename, const std::string SuperGroupName, const std::string GroupName ) +{ + hid_t HDF_FileID, HDF_GroupID, HDF_SuperGroupID; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_SuperGroupID = H5Gopen( HDF_FileID, SuperGroupName.c_str() ); + HDF_GroupID = H5Gcreate( HDF_SuperGroupID, GroupName.c_str(), 0 ); + H5Gclose( HDF_GroupID ); + H5Gclose( HDF_SuperGroupID ); + H5Fclose( HDF_FileID ); + +} + +template< typename T > +inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, const std::vector< T > &Data ) +{ + hid_t HDF_FileID, + HDF_GroupID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + hsize_t HDF_Dims; + + HDF_DatatypeID = GetDataType(); + + HDF_Dims = (hsize_t)(Data.size()); + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); + + HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); +} + +template< typename T > +inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, const std::vector< T > &Data ) +{ + hid_t HDF_FileID, + HDF_DatasetID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + hsize_t HDF_Dims; + + HDF_DatatypeID = GetDataType(); + + HDF_Dims = (hsize_t)(Data.size()); + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_DatasetID = H5Dopen( HDF_FileID, DatasetName.c_str() ); + HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); + + HDF_AttributeID = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Dclose( HDF_DatasetID ); + H5Fclose( HDF_FileID ); +} + + +template< typename T > +inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T +Data ) +{ + + hid_t + HDF_FileID, + HDF_GroupID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + HDF_DatatypeID = GetDataType(); + + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_DataspaceID = H5Screate(H5S_SCALAR); + HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, + HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); +} + +template< typename T > +inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, T Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + HDF_DatatypeID = GetDataType(); + + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_DatasetID = H5Dopen( HDF_FileID, DatasetName.c_str() ); + HDF_DataspaceID = H5Screate(H5S_SCALAR); + HDF_AttributeID = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID, + HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Dclose( HDF_DatasetID ); + H5Fclose( HDF_FileID ); +} + +template< typename T > +inline void HDFWriteSubGroupAttribute( const std::string Filename, const std::string GroupName, const std::string SubGroupName, const std::string ObjName, T +Data ) +{ + + hid_t + HDF_FileID, + HDF_GroupID, + HDF_SubGroupID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + HDF_DatatypeID = GetDataType(); + + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + std::cerr << "opening " << GroupName.c_str() << std::endl; + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + std::cerr << "opening " << SubGroupName.c_str() << std::endl; + HDF_SubGroupID = H5Gopen( HDF_GroupID, SubGroupName.c_str() ); + HDF_DataspaceID = H5Screate(H5S_SCALAR); + HDF_AttributeID = H5Acreate(HDF_SubGroupID, ObjName.c_str(), HDF_DatatypeID, + HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Gclose( HDF_SubGroupID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); +} + +template<> +inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, std::string Data ) +{ + + hid_t + HDF_FileID, + HDF_GroupID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + HDF_DatatypeID = H5Tcopy( H5T_C_S1 ); + + H5Tset_size( HDF_DatatypeID, Data.size() ); + H5Tset_strpad(HDF_DatatypeID, H5T_STR_NULLPAD); + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_DataspaceID = H5Screate(H5S_SCALAR); + HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, + HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, Data.c_str() ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); +} +#endif diff --git a/plugins/output_enzo.cc b/plugins/output_enzo.cc new file mode 100644 index 0000000..a66928b --- /dev/null +++ b/plugins/output_enzo.cc @@ -0,0 +1,396 @@ +/* + + output_enzo.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#ifdef HAVE_HDF5 + +#include +#include + +#include "output.hh" + +#include "HDF_IO.hh" + +class enzo_output_plugin : public output_plugin +{ +protected: + + struct patch_header{ + int component_rank; + int component_size; + std::vector dimensions; + int rank; + std::vector top_grid_dims; + std::vector top_grid_end; + std::vector top_grid_start; + }; + + struct sim_header{ + std::vector dimensions; + std::vector offset; + float a_start; + float dx; + float h0; + float omega_b; + float omega_m; + float omega_v; + float vfact; + }; + + + sim_header the_sim_header; + + void write_sim_header( std::string fname, const sim_header& h ) + { + HDFWriteGroupAttribute( fname, "/", "Dimensions", h.dimensions ); + HDFWriteGroupAttribute( fname, "/", "Offset", h.offset ); + HDFWriteGroupAttribute( fname, "/", "a_start", h.a_start ); + HDFWriteGroupAttribute( fname, "/", "dx", h.dx ); + HDFWriteGroupAttribute( fname, "/", "h0", h.h0 ); + HDFWriteGroupAttribute( fname, "/", "omega_b", h.omega_b ); + HDFWriteGroupAttribute( fname, "/", "omega_m", h.omega_m ); + HDFWriteGroupAttribute( fname, "/", "omega_v", h.omega_v ); + HDFWriteGroupAttribute( fname, "/", "vfact", h.vfact ); + } + + void write_patch_header( std::string fname, std::string dsetname, const patch_header& h ) + { + HDFWriteDatasetAttribute( fname, dsetname, "Component_Rank", h.component_rank ); + HDFWriteDatasetAttribute( fname, dsetname, "Component_Size", h.component_size ); + HDFWriteDatasetAttribute( fname, dsetname, "Dimensions", h.dimensions ); + HDFWriteDatasetAttribute( fname, dsetname, "Rank", h.rank ); + HDFWriteDatasetAttribute( fname, dsetname, "TopGridDims", h.top_grid_dims ); + HDFWriteDatasetAttribute( fname, dsetname, "TopGridEnd", h.top_grid_end ); + HDFWriteDatasetAttribute( fname, dsetname, "TopGridStart", h.top_grid_start ); + } + + + void dump_grid_data( std::string fieldname, const grid_hierarchy& gh, double factor = 1.0, double add = 0.0 ) + { + char enzoname[256], filename[256]; + + for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel ) + { + std::vector ng; + ng.push_back( gh.get_grid(ilevel)->size(0) ); + ng.push_back( gh.get_grid(ilevel)->size(1) ); + ng.push_back( gh.get_grid(ilevel)->size(2) ); + + //... need to copy data because we need to get rid of the ghost zones + std::vector data; + data.reserve( ng[0]*ng[1]*ng[2] ); + + for( int k=0; k(&ng[0]), data ); + + //... header data for the patch + patch_header ph; + + ph.component_rank = 1; + ph.component_size = ng[0]*ng[1]*ng[2]; + ph.dimensions = ng; + ph.rank = 3; + + ph.top_grid_dims.assign(3,(int)pow(2,levelmin_)); + + //... offset_abs is in units of the current level cell size + + double rfac = 1.0/pow(2,ilevel-levelmin_); + + ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 0)*rfac) ); + ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 1)*rfac) ); + ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 2)*rfac) ); + + ph.top_grid_end.push_back( ph.top_grid_start[0] + (int)(ng[0]*rfac) ); + ph.top_grid_end.push_back( ph.top_grid_start[1] + (int)(ng[1]*rfac) ); + ph.top_grid_end.push_back( ph.top_grid_start[2] + (int)(ng[2]*rfac) ); + + write_patch_header( filename, enzoname, ph ); + } + } + +public: + + enzo_output_plugin( config_file& cf ) + : output_plugin( cf ) + { + if( mkdir( fname_.c_str(), 0777 ) ) + { + perror( fname_.c_str() ); + throw std::runtime_error("Error in enzo_output_plugin!"); + } + + bool bhave_hydro = cf_.getValue("setup","baryons"); + + the_sim_header.dimensions.push_back( (int)pow(2,levelmin_) ); + the_sim_header.dimensions.push_back( (int)pow(2,levelmin_) ); + the_sim_header.dimensions.push_back( (int)pow(2,levelmin_) ); + + the_sim_header.offset.push_back( 0 ); + the_sim_header.offset.push_back( 0 ); + the_sim_header.offset.push_back( 0 ); + + the_sim_header.a_start = 1.0/(1.0+cf.getValue("setup","zstart")); + the_sim_header.dx = cf.getValue("setup","boxlength")/the_sim_header.dimensions[0]/(cf.getValue("cosmology","H0")*0.01); // not sure?!? + the_sim_header.h0 = cf.getValue("cosmology","H0")*0.01; + + if( bhave_hydro ) + the_sim_header.omega_b = cf.getValue("cosmology","Omega_b"); + else + the_sim_header.omega_b = 0.0; + + the_sim_header.omega_m = cf.getValue("cosmology","Omega_m"); + the_sim_header.omega_v = cf.getValue("cosmology","Omega_L"); + the_sim_header.vfact = cf.getValue("cosmology","vfact")*the_sim_header.h0; //.. need to multiply by h, ENZO wants this factor for non h-1 units + + } + + ~enzo_output_plugin() + { } + + void write_dm_mass( const grid_hierarchy& gh ) + { /* do nothing, not needed */ } + + + void write_dm_density( const grid_hierarchy& gh ) + { /* do nothing, not needed */ + + bool bhave_hydro = cf_.getValue("setup","baryons"); + char filename[256]; + unsigned nbase = (unsigned)pow(2,levelmin_); + + sprintf( filename, "%s/parameter_file.txt", fname_.c_str() ); + + std::ofstream ofs( filename, std::ios::trunc ); + + ofs + << "#\n" + << "ProblemType = 30 // cosmology simulation\n" + << "TopGridRank = 3\n" + << "TopGridDimensions = " << nbase << " " << nbase << " " << nbase << "\n" + << "SelfGravity = 1 // gravity on\n" + << "TopGridGravityBoundary = 0 // Periodic BC for gravity\n" + << "LeftFaceBoundaryCondition = 3 3 3 // same for fluid\n" + << "RightFaceBoundaryCondition = 3 3 3\n" + << "\n" + << "#\n"; + + if( bhave_hydro ) + ofs + << "CosmologySimulationOmegaBaryonNow = " << the_sim_header.omega_b << "\n" + << "CosmologySimulationOmegaCDMNow = " << the_sim_header.omega_m-the_sim_header.omega_b << "\n"; + else + ofs + << "CosmologySimulationOmegaBaryonNow = " << 0.0 << "\n" + << "CosmologySimulationOmegaCDMNow = " << the_sim_header.omega_m << "\n"; + + if( bhave_hydro ) + ofs + << "CosmologySimulationDensityName = GridDensity\n" + << "CosmologySimulationVelocity1Name = GridVelocities_x\n" + << "CosmologySimulationVelocity2Name = GridVelocities_y\n" + << "CosmologySimulationVelocity3Name = GridVelocities_z\n"; + + ofs + << "CosmologySimulationCalculatePositions = 1\n" + << "CosmologySimulationParticleVelocity1Name = ParticleVelocities_x\n" + << "CosmologySimulationParticleVelocity2Name = ParticleVelocities_y\n" + << "CosmologySimulationParticleVelocity3Name = ParticleVelocities_z\n" + << "\n" + << "#\n" + << "# define cosmology parameters\n" + << "#\n" + << "ComovingCoordinates = 1 // Expansion ON\n" + << "CosmologyOmegaMatterNow = " << the_sim_header.omega_m << "\n" + << "CosmologyOmegaLambdaNow = " << the_sim_header.omega_v << "\n" + << "CosmologyHubbleConstantNow = " << the_sim_header.h0 << " // in 100 km/s/Mpc\n" + << "CosmologyComovingBoxSize = " << cf_.getValue("setup","boxlength") << " // in Mpc/h\n" + << "CosmologyMaxExpansionRate = 0.015 // maximum allowed delta(a)/a\n" + << "CosmologyInitialRedshift = " << cf_.getValue("setup","zstart") << " //\n" + << "CosmologyFinalRedshift = 0 //\n" + << "GravitationalConstant = 1 // this must be true for cosmology\n" + << "#\n" + << "#\n" + << "CosmologySimulationNumberOfInitialGrids = " << 1+levelmax_-levelmin_ << "\n"; + + + //... only for additionally refined grids + for( unsigned ilevel = 0; ilevel< levelmax_-levelmin_; ++ilevel ) + { + double h = 1.0/pow(2,levelmin_+1+ilevel); + + ofs + + << "CosmologySimulationGridDimension[" << 1+ilevel << "] = " + << gh.size( levelmin_+ilevel+1, 0 ) << " " + << gh.size( levelmin_+ilevel+1, 1 ) << " " + << gh.size( levelmin_+ilevel+1, 2 ) << "\n" + + << "CosmologySimulationGridLeftEdge[" << 1+ilevel << "] = " + << h*gh.offset_abs(levelmin_+ilevel+1, 0) << " " + << h*gh.offset_abs(levelmin_+ilevel+1, 1) << " " + << h*gh.offset_abs(levelmin_+ilevel+1, 2) << "\n" + + << "CosmologySimulationGridRightEdge[" << 1+ilevel << "] = " + << h*(gh.offset_abs(levelmin_+ilevel+1, 0)+gh.size( levelmin_+ilevel+1, 0 )) << " " + << h*(gh.offset_abs(levelmin_+ilevel+1, 1)+gh.size( levelmin_+ilevel+1, 1 )) << " " + << h*(gh.offset_abs(levelmin_+ilevel+1, 2)+gh.size( levelmin_+ilevel+1, 2 )) << "\n" + + << "CosmologySimulationGridLevel[" << 1+ilevel << "] = " << 1+ilevel << "\n"; + } + + + + + + + } + + + void write_dm_velocity( int coord, const grid_hierarchy& gh ) + { + char enzoname[256]; + sprintf( enzoname, "ParticleVelocities_%c", (char)('x'+coord) ); + + double vunit = 1.0/(1.225e2*sqrt(the_sim_header.omega_m/the_sim_header.a_start));// /the_sim_header.h0; + + dump_grid_data( enzoname, gh, vunit ); + } + + + void write_dm_position( int coord, const grid_hierarchy& gh ) + { + char fieldname[256]; + sprintf( fieldname, "ParticlePositions_%c", (char)('x'+coord) ); + + char enzoname[256], filename[256]; + + for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel ) + { + std::vector ng; + ng.push_back( gh.get_grid(ilevel)->size(0) ); + ng.push_back( gh.get_grid(ilevel)->size(1) ); + ng.push_back( gh.get_grid(ilevel)->size(2) ); + + //... need to copy data because we need to get rid of the ghost zones + std::vector data; + data.reserve( ng[0]*ng[1]*ng[2] ); + + for( int k=0; k(&ng[0]), data ); + + //-------------------------------------------------------------------------- + //... header data for the patch + patch_header ph; + + ph.component_rank = 1; + ph.component_size = ng[0]*ng[1]*ng[2]; + ph.dimensions = ng; + ph.rank = 3; + + ph.top_grid_dims.assign(3,(int)pow(2,levelmin_)); + + //... offset_abs is in units of the current level cell size + + double rfac = 1.0/pow(2,ilevel-levelmin_); + + ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 0)*rfac) ); + ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 1)*rfac) ); + ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 2)*rfac) ); + + ph.top_grid_end.push_back( ph.top_grid_start[0] + (int)(ng[0]*rfac) ); + ph.top_grid_end.push_back( ph.top_grid_start[1] + (int)(ng[1]*rfac) ); + ph.top_grid_end.push_back( ph.top_grid_start[2] + (int)(ng[2]*rfac) ); + + write_patch_header( filename, enzoname, ph ); + //-------------------------------------------------------------------------- + } + } + + void write_dm_potential( const grid_hierarchy& gh ) + { } + + void write_gas_potential( const grid_hierarchy& gh ) + { } + + + void write_gas_velocity( int coord, const grid_hierarchy& gh ) + { + double vunit = 1.0/(1.225e2*sqrt(the_sim_header.omega_m/the_sim_header.a_start)); + + char enzoname[256]; + sprintf( enzoname, "GridVelocities_%c", (char)('x'+coord) ); + dump_grid_data( enzoname, gh, vunit ); + } + + + void write_gas_position( int coord, const grid_hierarchy& gh ) + { + /* do nothing, not needed */ + } + + + void write_gas_density( const grid_hierarchy& gh ) + { + + char enzoname[256]; + sprintf( enzoname, "GridDensity" ); + dump_grid_data( enzoname, gh, the_sim_header.omega_b/the_sim_header.omega_m, 1.0 ); + } + + + void finalize( void ) + { } + + +}; + +namespace{ + output_plugin_creator_concrete creator("enzo"); +} + +#endif + diff --git a/plugins/output_gadget2.cc b/plugins/output_gadget2.cc new file mode 100644 index 0000000..1e538ca --- /dev/null +++ b/plugins/output_gadget2.cc @@ -0,0 +1,670 @@ +/* + + output_gadget2.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#include +#include "output.hh" + +template< typename T_store=float > +class gadget2_output_plugin : public output_plugin +{ +protected: + + std::ofstream ofs_; + bool bmultimass_; + + + typedef struct io_header + { + int npart[6]; + double mass[6]; + double time; + double redshift; + int flag_sfr; + int flag_feedback; + unsigned int npartTotal[6]; + int flag_cooling; + int num_files; + double BoxSize; + double Omega0; + double OmegaLambda; + double HubbleParam; + int flag_stellarage; + int flag_metals; + unsigned int npartTotalHighWord[6]; + int flag_entropy_instead_u; + char fill[60]; + }header; + + + header header_; + + std::string fname; + + enum iofields { + id_dm_mass, id_dm_vel, id_dm_pos, id_gas_vel, id_gas_rho, id_gas_temp + }; + + unsigned block_buf_size_; + //bool bbndparticles_; + bool bmorethan2bnd_; + + + void assemble_scalar( unsigned nptot, std::string ifname ) + { + T_store *tmp; + + std::ifstream + ifs( ifname.c_str(), std::ios::binary ); + + int + npleft = nptot, + n2read = std::min((int)block_buf_size_,npleft); + + tmp = new T_store[block_buf_size_]; + + std::vector adata; + adata.reserve( block_buf_size_ ); + + unsigned blk; + ifs.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store) ){ + delete[] tmp; + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + npleft = nptot; + n2read = std::min((int)block_buf_size_,npleft); + int blksize = nptot*sizeof(T_store); + + ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + while( n2read > 0 ) + { + ifs.read( reinterpret_cast(&tmp[0]), n2read*sizeof(T_store) ); + ofs_.write( reinterpret_cast(&tmp[0]), n2read*sizeof(T_store) ); + npleft -= n2read; + n2read = std::min( (int)block_buf_size_,npleft ); + } + ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + + remove( fname.c_str() ); + + + } + + void assemble_vector( unsigned nptot, std::string fname1, std::string fname2,std::string fname3 ) + { + T_store *tmp1, *tmp2, *tmp3; + + std::ifstream + ifs1( fname1.c_str(), std::ios::binary ), + ifs2( fname2.c_str(), std::ios::binary ), + ifs3( fname3.c_str(), std::ios::binary ); + + int + npleft = nptot, + n2read = std::min((int)block_buf_size_,npleft); + + + tmp1 = new T_store[block_buf_size_]; + tmp2 = new T_store[block_buf_size_]; + tmp3 = new T_store[block_buf_size_]; + + std::vector adata3; + adata3.reserve( 3*block_buf_size_ ); + + unsigned blk; + ifs1.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store) ){ + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + + ifs2.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store) ) + { + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + ifs3.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store) ) + { + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + + //int blksize = 3*nptot*sizeof(T_store); + //ofs_.write( (char *)&blksize, sizeof(int) ); + + while( n2read > 0 ) + { + //ifs1.read( (char*)&tmp1[0], n2read*sizeof(T_store) ); + ifs1.read( reinterpret_cast(&tmp1[0]), n2read*sizeof(T_store) ); + ifs2.read( reinterpret_cast(&tmp2[0]), n2read*sizeof(T_store) ); + ifs3.read( reinterpret_cast(&tmp3[0]), n2read*sizeof(T_store) ); + + for( int i=0; i(&adata3[0]), 3*n2read*sizeof(T_store) ); + + adata3.clear(); + npleft -= n2read; + n2read = std::min( (int)block_buf_size_,npleft ); + } + //ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + + remove( fname1.c_str() ); + remove( fname2.c_str() ); + remove( fname3.c_str() ); + + + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + + } + + + void assemble_gadget_file( void ) + { + int blksize = sizeof(header); + + //... write the header ....................................................... + ofs_.write( (char *)&blksize, sizeof(int) ); + ofs_.write( (char *)&header_, sizeof(header) ); + ofs_.write( (char *)&blksize, sizeof(int) ); + + //............................................................................ + //... copy from the temporary files, interleave the data and save ............ + + char fnx[256],fny[256],fnz[256],fnvx[256],fnvy[256],fnvz[256],fnm[256]; + + sprintf( fnx, "___ic_temp_%05d.bin", 100*id_dm_pos+0 ); + sprintf( fny, "___ic_temp_%05d.bin", 100*id_dm_pos+1 ); + sprintf( fnz, "___ic_temp_%05d.bin", 100*id_dm_pos+2 ); + sprintf( fnvx, "___ic_temp_%05d.bin", 100*id_dm_vel+0 ); + sprintf( fnvy, "___ic_temp_%05d.bin", 100*id_dm_vel+1 ); + sprintf( fnvz, "___ic_temp_%05d.bin", 100*id_dm_vel+2 ); + sprintf( fnm, "___ic_temp_%05d.bin", 100*id_dm_mass ); + + std::ifstream + ifs1( fnx, std::ios::binary ), + ifs2( fny, std::ios::binary ), + ifs3( fnz, std::ios::binary ); + + + const int + nptot = header_.npart[1]+header_.npart[5]; + int + npleft = nptot, + n2read = std::min((int)block_buf_size_,npleft); + + + //... particle coordinates .................................................. + unsigned blk; + ifs1.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store) ) + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + + ifs2.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store) ) + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + + ifs3.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store) ) + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + + std::vector adata3; + adata3.reserve( 3*block_buf_size_ ); + T_store *tmp1, *tmp2, *tmp3; + + tmp1 = new T_store[block_buf_size_]; + tmp2 = new T_store[block_buf_size_]; + tmp3 = new T_store[block_buf_size_]; + + blksize = 3*nptot*sizeof(T_store); + ofs_.write( (char *)&blksize, sizeof(int) ); + + while( n2read > 0 ) + { + //ifs1.read( (char*)&tmp1[0], n2read*sizeof(T_store) ); + ifs1.read( reinterpret_cast(&tmp1[0]), n2read*sizeof(T_store) ); + ifs2.read( reinterpret_cast(&tmp2[0]), n2read*sizeof(T_store) ); + ifs3.read( reinterpret_cast(&tmp3[0]), n2read*sizeof(T_store) ); + + for( int i=0; i(&adata3[0]), 3*n2read*sizeof(T_store) ); + + adata3.clear(); + npleft -= n2read; + n2read = std::min( (int)block_buf_size_,npleft ); + } + ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + + remove( fnx ); + remove( fny ); + remove( fnz ); + + //... particle velocities .................................................. + ifs1.close(); ifs1.open( fnvx, std::ios::binary ); + ifs2.close(); ifs2.open( fnvy, std::ios::binary ); + ifs3.close(); ifs3.open( fnvz, std::ios::binary ); + + ifs1.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store)){ + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + ifs2.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store)){ + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + ifs3.read( (char *)&blk, sizeof(int) ); + if( blk != nptot*sizeof(T_store)){ + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + npleft = nptot; + n2read = std::min((int)block_buf_size_,npleft); + + + ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + + while( n2read > 0 ) + { + ifs1.read( reinterpret_cast(&tmp1[0]), n2read*sizeof(T_store) ); + ifs2.read( reinterpret_cast(&tmp2[0]), n2read*sizeof(T_store) ); + ifs3.read( reinterpret_cast(&tmp3[0]), n2read*sizeof(T_store) ); + + for( int i=0; i(&adata3[0]), 3*n2read*sizeof(T_store) ); + + adata3.clear(); + npleft -= n2read; + n2read = std::min( (int)block_buf_size_,npleft ); + } + ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + remove( fnvx ); + remove( fnvy ); + remove( fnvz ); + + + delete[] tmp2; + delete[] tmp3; + + + //... particle IDs .......................................................... + std::vector ids(block_buf_size_,0); + + unsigned idcount = 0; + npleft = nptot; + n2read = std::min((int)block_buf_size_,npleft); + blksize = sizeof(unsigned)*nptot; + + //... generate contiguous IDs and store in file .......................// + ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + while( n2read > 0 ) + { + for( int i=0; i(&ids[0]), n2read*sizeof(unsigned) ); + ids.clear(); + npleft -= n2read; + n2read = std::min( (int)block_buf_size_,npleft ); + } + ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + + + //... particle masses ....................................................... + if( bmultimass_ && bmorethan2bnd_ ) + { + unsigned npcoarse = header_.npart[5]; + + ifs1.close(); + ifs1.open( fnm, std::ios::binary ); + + if( !ifs1.good() ) + throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); + + ifs1.read( (char *)&blk, sizeof(int) ); + if( blk != npcoarse*sizeof(T_store)){ + delete[] tmp1; + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + + npleft = npcoarse; + n2read = std::min((int)block_buf_size_,npleft); + blksize = npcoarse*sizeof(T_store); + + ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + while( n2read > 0 ) + { + ifs1.read( reinterpret_cast(&tmp1[0]), n2read*sizeof(T_store) ); + ofs_.write( reinterpret_cast(&tmp1[0]), n2read*sizeof(T_store) ); + npleft -= n2read; + n2read = std::min( (int)block_buf_size_,npleft ); + } + ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); + + remove( fnm ); + } + + delete[] tmp1; + ofs_.flush(); + } + + +public: + + gadget2_output_plugin( config_file& cf )//std::string afname, Cosmology cosm, Parameters param, unsigned block_buf_size = 100000 ) + : output_plugin( cf ), ofs_( fname_.c_str(), std::ios::binary|std::ios::trunc ) + { + block_buf_size_ = cf_.getValueSafe("output","gadget_blksize",100000); + //bbndparticles_ = !cf_.getValueSafe("output","gadget_nobndpart",false); + + bmorethan2bnd_ = false; + if( levelmax_ > levelmin_ +1) + bmorethan2bnd_ = true; + + bmultimass_ = true; + if( levelmax_ == levelmin_ ) + bmultimass_ = false; + + + for( int i=0; i<6; ++i ) + { + header_.npart[i] = 0; + header_.npartTotal[i] = 0; + header_.npartTotalHighWord[i] = 0; + header_.mass[i] = 0.0; + } + + //... set time ...................................................... + header_.redshift = cf.getValue("setup","zstart"); + header_.time = 1.0/(1.0+header_.redshift); + + //... SF flags + header_.flag_sfr = 0; + header_.flag_feedback = 0; + header_.flag_cooling = 0; + + //... + header_.num_files = 1; + header_.BoxSize = cf.getValue("setup","boxlength"); + header_.Omega0 = cf.getValue("cosmology","Omega_m"); + header_.OmegaLambda = cf.getValue("cosmology","Omega_L"); + header_.HubbleParam = cf.getValue("cosmology","H0"); + + header_.flag_stellarage = 0; + header_.flag_metals = 0; + + + header_.flag_entropy_instead_u = 0; + } + + + void write_dm_mass( const grid_hierarchy& gh ) + { + double rhoc = 27.7519737; // h^2 1e10 M_sol / Mpc^3 + + header_.mass[1] = header_.Omega0 * rhoc * pow(header_.BoxSize,3.)/pow(2,3*levelmax_); + + if( bmorethan2bnd_ ) + { + + std::vector temp_dat; + temp_dat.clear(); + + for( int ilevel=gh.levelmax()-1; ilevel>=(int)gh.levelmin(); --ilevel ) + { + double pmass = header_.Omega0 * rhoc * pow(header_.BoxSize,3.)/pow(2,3*ilevel); + + for( unsigned i=0; isize(0); ++i ) + for( unsigned j=0; jsize(1); ++j ) + for( unsigned k=0; ksize(2); ++k ) + if( ! gh.is_refined(ilevel,i,j,k) ) + temp_dat.push_back( pmass ); + } + + char temp_fname[256]; + sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_mass ); + std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc ); + + int blksize = sizeof(T_store)*temp_dat.size(); + ofs_temp.write( (char *)&blksize, sizeof(int) ); + ofs_temp.write( (char*)&temp_dat[0], blksize ); + ofs_temp.write( (char *)&blksize, sizeof(int) ); + + + } + else + { + header_.mass[5] = header_.Omega0 * rhoc * pow(header_.BoxSize,3.)/pow(2,3*levelmin_); + } + } + + + void write_dm_position( int coord, const grid_hierarchy& gh ) + { + //... count number of leaf cells ...// + unsigned npcoarse = 0, npfine = 0; + + npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); + if( bmultimass_ ) + npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax()-1); + + + //... determine if we need to shift the coordinates back + double *shift = NULL; + + if( cf_.getValueSafe("output","shift_back",false ) ) + { + if( coord == 0 ) + std::cout << " - gadget2 output plug-in will shift particle positions back...\n"; + + double h = 1.0/pow(2,levelmin_); + shift = new double[3]; + shift[0] = -(double)cf_.getValue( "setup", "shift_x" )*h; + shift[1] = -(double)cf_.getValue( "setup", "shift_y" )*h; + shift[2] = -(double)cf_.getValue( "setup", "shift_z" )*h; + } + + + //... + header_.npart[1] = npfine; + header_.npart[5] = npcoarse; + header_.npartTotal[1] = npfine; + header_.npartTotal[5] = npcoarse; + header_.npartTotalHighWord[1] = 0; + header_.npartTotalHighWord[5] = 0; + + + //... collect displacements and convert to absolute coordinates with correct + //... units + std::vector temp_data; + temp_data.reserve( npfine ); + + double xfac = header_.BoxSize; + + for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel ) + for( unsigned i=0; isize(0); ++i ) + for( unsigned j=0; jsize(1); ++j ) + for( unsigned k=0; ksize(2); ++k ) + if( ! gh.is_refined(ilevel,i,j,k) ) + { + double xx[3]; + gh.cell_pos(ilevel, i, j, k, xx); + if( shift != NULL ) + xx[coord] += shift[coord]; + xx[coord] = fmod( (xx[coord]+(*gh.get_grid(ilevel))(i,j,k))*xfac + header_.BoxSize, header_.BoxSize ); + temp_data.push_back( xx[coord] ); + } + + + //... dump to temporary file + char temp_fname[256]; + + sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_pos+coord ); + std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc ); + + int blksize = sizeof(T_store)*temp_data.size(); + ofs_temp.write( (char *)&blksize, sizeof(int) ); + ofs_temp.write( (char*)&temp_data[0], blksize ); + ofs_temp.write( (char *)&blksize, sizeof(int) ); + ofs_temp.close(); + + + + if( shift != NULL ) + delete[] shift; + + } + + void write_dm_velocity( int coord, const grid_hierarchy& gh ) + { + //... count number of leaf cells ...// + unsigned npcoarse = 0, npfine = 0; + + npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); + if( bmultimass_ ) + npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax()-1); + + header_.npart[1] = npfine; + header_.npart[5] = npcoarse; + header_.npartTotal[1] = npfine; + header_.npartTotal[5] = npcoarse; + header_.npartTotalHighWord[1] = 0; + header_.npartTotalHighWord[5] = 0; + + //... collect displacements and convert to absolute coordinates with correct + //... units + std::vector temp_data; + temp_data.reserve( npfine+npcoarse ); + + float isqrta = 1.0f/sqrt(header_.time); + //float vfac = cosm_.astart/100.0*isqrta*param_.boxlength; + float vfac = isqrta*header_.BoxSize; + + for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel ) + for( unsigned i=0; isize(0); ++i ) + for( unsigned j=0; jsize(1); ++j ) + for( unsigned k=0; ksize(2); ++k ) + if( ! gh.is_refined(ilevel,i,j,k) ) + temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac ); + + //... dump to temporary file + char temp_fname[256]; + + sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_vel+coord ); + std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc ); + + int blksize = sizeof(T_store)*temp_data.size(); + ofs_temp.write( (char *)&blksize, sizeof(int) ); + ofs_temp.write( (char*)&temp_data[0], blksize ); + ofs_temp.write( (char *)&blksize, sizeof(int) ); + ofs_temp.close(); + + + } + + void write_dm_density( const grid_hierarchy& gh ) + { + //... we don't care about DM density for Gadget + } + + void write_dm_potential( const grid_hierarchy& gh ) + { } + + void write_gas_potential( const grid_hierarchy& gh ) + { } + + + + //... write data for gas -- don't do this + void write_gas_velocity( int coord, const grid_hierarchy& gh ) + { + std::cout << " - WARNING: Gadget-2 output plug-in does not support baryons yet!\n" + << " Baryon data is not written to file!" << std::endl; + } + + void write_gas_position( int coord, const grid_hierarchy& gh ) + { + std::cout << " - WARNING: Gadget-2 output plug-in does not support baryons yet!\n" + << " Baryon data is not written to file!" << std::endl; + } + + void write_gas_density( const grid_hierarchy& gh ) + { + std::cout << " - WARNING: Gadget-2 output plug-in does not support baryons yet!\n" + << " Baryon data is not written to file!" << std::endl; + } + + void finalize( void ) + { + this->assemble_gadget_file(); + } +}; + + + +namespace{ + output_plugin_creator_concrete< gadget2_output_plugin > creator1("gadget2"); +#ifndef SINGLE_PRECISION + output_plugin_creator_concrete< gadget2_output_plugin > creator2("gadget2_double"); +#endif +} + diff --git a/plugins/output_generic.cc b/plugins/output_generic.cc new file mode 100644 index 0000000..8308ccd --- /dev/null +++ b/plugins/output_generic.cc @@ -0,0 +1,192 @@ +/* + + output_generic.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + + +#ifdef HAVE_HDF5 + +#include "output.hh" +#include "HDF_IO.hh" + + +class generic_output_plugin : public output_plugin +{ +protected: + + using output_plugin::cf_; + + template< typename Tt > + void write2HDF5( std::string fname, std::string dname, const MeshvarBnd& data ) + { + int nres = data.size(0), nb = data.m_nbnd; + std::vector vdata; + vdata.reserve((unsigned)pow(nres+2*nb,3)); + for(int i=-nb; i creator("generic"); +} + + +#endif + diff --git a/plugins/output_grafic2.cc b/plugins/output_grafic2.cc new file mode 100644 index 0000000..58f4f6c --- /dev/null +++ b/plugins/output_grafic2.cc @@ -0,0 +1,325 @@ +/* + + output_grafic2.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#include +#include +#include +#include "output.hh" + + +//! Implementation of class grafic2_output_plugin +/*! + This class implements a grafic-2 (cf. Bertschinger 2001) compatible + output format. With some RAMSES extras. +*/ +class grafic2_output_plugin : public output_plugin +{ +protected: + + + typedef struct{ + int n1, n2, n3; + float dxini0; + float xoff10,xoff20,xoff30; + float astart0,omega_m0,omega_l0,h00; + + }header; + + bool bhavehydro_; + + + void write_file_header( std::ofstream& ofs, unsigned ilevel, const grid_hierarchy& gh ) + { + header loc_head; + + double + boxlength = cf_.getValue("setup","boxlength"), + H0 = cf_.getValue("cosmology","H0"), + zstart = cf_.getValue("setup","zstart"), + astart = 1.0/(1.0+zstart), + omegam = cf_.getValue("cosmology","Omega_m"), + omegaL = cf_.getValue("cosmology","Omega_L"); + + loc_head.n1 = gh.get_grid(ilevel)->size(0); + loc_head.n2 = gh.get_grid(ilevel)->size(1); + loc_head.n3 = gh.get_grid(ilevel)->size(2); + + loc_head.dxini0 = boxlength / (H0*0.01) / pow(2.0,ilevel); + + loc_head.xoff10 = gh.offset_abs(ilevel,0) * loc_head.dxini0; + loc_head.xoff20 = gh.offset_abs(ilevel,1) * loc_head.dxini0; + loc_head.xoff30 = gh.offset_abs(ilevel,2) * loc_head.dxini0; + + loc_head.astart0 = astart; + loc_head.omega_m0 = omegam; + loc_head.omega_l0 = omegaL; + loc_head.h00 = H0; + + + int blksz = sizeof(header); + ofs.write( reinterpret_cast (&blksz), sizeof(int) ); + ofs.write( reinterpret_cast (&loc_head), blksz ); + ofs.write( reinterpret_cast (&blksz), sizeof(int) ); + + } + + void write_sliced_array( std::ofstream& ofs, unsigned ilevel, const grid_hierarchy& gh, float fac = 1.0f ) + { + unsigned n1,n2,n3; + n1 = gh.get_grid(ilevel)->size(0); + n2 = gh.get_grid(ilevel)->size(1); + n3 = gh.get_grid(ilevel)->size(2); + + std::vector data(n1*n2,0.0f); + + for( unsigned i=0; i (&blksize), sizeof(unsigned) ); + ofs.write( reinterpret_cast (&data[0]), blksize ); + ofs.write( reinterpret_cast (&blksize), sizeof(unsigned) ); + + } + } + + void write_ramses_namelist( const grid_hierarchy& gh ) + { + //... also write the refinement options to a dummy namelist file + char ff[256]; + sprintf(ff,"%s/ramses.nml",fname_.c_str() ); + + std::ofstream ofst(ff,std::ios::trunc); + + ofst + << "&INIT_PARAMS\n" + << "filetype=\'grafic\'\n"; + for( unsigned i=gh.levelmin();i<=gh.levelmax(); ++i) + { + sprintf(ff,"initfile(%d)=\'%s/level_%03d\'\n",i-gh.levelmin()+1,fname_.c_str(), i ); + ofst << std::string(ff); + } + ofst << "/\n\n"; + + + double xc,yc,zc,l; + + + + ofst + << "&AMR_PARAMS\n" + << "levelmin=" << gh.levelmin() << "\n" + << "levelmax=" << gh.levelmax() << "\n" + << "ngridtot=2000000\n" + << "nparttot=3000000\n" + << "nexpand=1\n/\n\n"; + + + if( gh.levelmax() > gh.levelmin() ) + { + l = pow(2.0,gh.levelmin()+1); + xc = ((double)gh.offset_abs(gh.levelmin()+1,0)+0.5*(double)gh.size(gh.levelmin()+1,0))/l; + yc = ((double)gh.offset_abs(gh.levelmin()+1,1)+0.5*(double)gh.size(gh.levelmin()+1,1))/l; + zc = ((double)gh.offset_abs(gh.levelmin()+1,2)+0.5*(double)gh.size(gh.levelmin()+1,2))/l; + + ofst << "&REFINE_PARAMS\n" + << "m_refine=0."; + + + for( unsigned i=gh.levelmin()+1;i("setup","baryons"); + } + + /*~grafic2_output_plugin() + { }*/ + + + void write_dm_position( int coord, const grid_hierarchy& gh ) + { + double + boxlength = cf_.getValue("setup","boxlength"); + // H0 = cf_.getValue("cosmology","H0"); + + for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel ) + { + + char ff[256]; + sprintf(ff,"%s/level_%03d/ic_posc%c",fname_.c_str(), ilevel, (char)('x'+coord) ); + + std::ofstream ofs(ff,std::ios::binary|std::ios::trunc); + + write_file_header( ofs, ilevel, gh ); + write_sliced_array( ofs, ilevel, gh, boxlength ); + } + } + + void write_dm_velocity( int coord, const grid_hierarchy& gh ) + { + double + boxlength = cf_.getValue("setup","boxlength"); + + for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel ) + { + + char ff[256]; + sprintf(ff,"%s/level_%03d/ic_velc%c",fname_.c_str(), ilevel, (char)('x'+coord) ); + + std::ofstream ofs(ff,std::ios::binary|std::ios::trunc); + + write_file_header( ofs, ilevel, gh ); + write_sliced_array( ofs, ilevel, gh, boxlength ); + } + } + + void write_gas_velocity( int coord, const grid_hierarchy& gh ) + { + double + boxlength = cf_.getValue("setup","boxlength"); + + for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel ) + { + + char ff[256]; + sprintf(ff,"%s/level_%03d/ic_velb%c",fname_.c_str(), ilevel, (char)('x'+coord) ); + + std::ofstream ofs(ff,std::ios::binary|std::ios::trunc); + + write_file_header( ofs, ilevel, gh ); + write_sliced_array( ofs, ilevel, gh, boxlength ); + } + } + + void write_gas_density( const grid_hierarchy& gh ) + { + for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel ) + { + + char ff[256]; + sprintf(ff,"%s/level_%03d/ic_deltab",fname_.c_str(), ilevel ); + + std::ofstream ofs(ff,std::ios::binary|std::ios::trunc); + + write_file_header( ofs, ilevel, gh ); + write_sliced_array( ofs, ilevel, gh ); + } + + } + + + void write_dm_density( const grid_hierarchy& gh ) + { + if(! bhavehydro_ ) + write_gas_density(gh); + + if( cf_.getValueSafe("output","ramses_nml",true) ) + write_ramses_namelist(gh); + + } + + void write_dm_mass( const grid_hierarchy& gh ) + { /* do nothing, not used... */ } + + void write_dm_potential( const grid_hierarchy& gh ) + { /* do nothing, not used... */ } + + void write_gas_potential( const grid_hierarchy& gh ) + { /* do nothing, not used... */ } + + void write_gas_position( int coord, const grid_hierarchy& gh ) + { /* do nothing, not used... */ } + + void finalize( void ) + { } + +}; + +namespace{ + output_plugin_creator_concrete creator("grafic2"); +} + diff --git a/plugins/transfer_bbks.cc b/plugins/transfer_bbks.cc new file mode 100644 index 0000000..3266d9b --- /dev/null +++ b/plugins/transfer_bbks.cc @@ -0,0 +1,95 @@ +/* + + transfer_bbks.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#include "transfer_function.hh" + +//! Implementation of class TransferFunction_BBKS for the BBKS transfer function +/*! + This class implements the analytical fit to the matter transfer + function by Bardeen, Bond, Kaiser & Szalay (BBKS). + ( see Bardeen et al. (1986) ) + */ +class transfer_bbks_plugin : public transfer_function_plugin{ +private: + double m_Gamma; + +public: + //! Constructor + /*! + \param aCosm Structure of type Cosmology carrying the cosmological parameters + \param bSugiyama flag whether the Sugiyama (1995) correction shall be applied (default=true) + */ + transfer_bbks_plugin( config_file& cf ) + : transfer_function_plugin( cf ) + { + double Omega0 = cosmo_.Omega_m; + double FreeGamma = -1.0; + + bool bSugiyama(true); + + try{ + bSugiyama= pcf_->getValue( "cosmology", "sugiyama_corr" ); + }catch(...){ + throw std::runtime_error("Error in \'tranfer_bbks_plugin\': need to specify \'[cosmology]/sugiyama_corr = [true/false]"); + } + + FreeGamma = pcf_->getValueSafe( "cosmology", "gamma", FreeGamma ); + + if( FreeGamma <= 0.0 ){ + m_Gamma = Omega0*0.01*cosmo_.H0; + if( bSugiyama ) + m_Gamma *= exp(-cosmo_.Omega_b*(1.0+sqrt(2.0*0.01*cosmo_.H0)/Omega0)); + }else + m_Gamma = FreeGamma; + + tf_distinct_ = false; + + + } + + //! computes the value of the BBKS transfer function for mode k (in h/Mpc) + inline double compute( double k, tf_type type ){ + double q, f1, f2; + + q = k/(m_Gamma); + f1 = log(1.0 + 2.34*q)/(2.34*q); + f2 = 1.0 + q*(3.89 + q*(259.21 + q*(162.771336 + q*2027.16958081))); + + return f1/sqrt(sqrt(f2)); + + } + + inline double get_kmin( void ){ + return 1e-4; + } + + inline double get_kmax( void ){ + return 1.e4; + } +}; + + +namespace{ + transfer_function_plugin_creator_concrete< transfer_bbks_plugin > creator("bbks"); +} + diff --git a/plugins/transfer_camb.cc b/plugins/transfer_camb.cc new file mode 100644 index 0000000..5085250 --- /dev/null +++ b/plugins/transfer_camb.cc @@ -0,0 +1,167 @@ +/* + + transfer_camb.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#include "transfer_function.hh" + +class transfer_CAMB_plugin : public transfer_function_plugin +{ + +private: + //Cosmology m_Cosmology; + + std::string m_filename_Pk, m_filename_Tk; + std::vector m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon; + + Spline_interp *m_psinterp; + gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon; + gsl_spline *spline_tot, *spline_cdm, *spline_baryon; + + + + void read_table( void ){ +#ifdef WITH_MPI + if( MPI::COMM_WORLD.Get_rank() == 0 ){ +#endif + std::cerr + << " - reading tabulated transfer function data from file \n" + << " \'" << m_filename_Tk << "\'\n"; + + std::string line; + std::ifstream ifs( m_filename_Tk.c_str() ); + + m_tab_k.clear(); + m_tab_Tk_tot.clear(); + m_tab_Tk_cdm.clear(); + m_tab_Tk_baryon.clear(); + + while( !ifs.eof() ){ + getline(ifs,line); + + if(ifs.eof()) break; + + std::stringstream ss(line); + + double k, Tkc, Tkb, Tkg, Tkr, Tknu, Tktot; + ss >> k; + ss >> Tkc; + ss >> Tkb; + ss >> Tkg; + ss >> Tkr; + ss >> Tknu; + ss >> Tktot; + + m_tab_k.push_back( log10(k) ); + + m_tab_Tk_tot.push_back( log10(Tktot) ); + m_tab_Tk_baryon.push_back( log10(Tkb) ); + m_tab_Tk_cdm.push_back( log10(Tkc) ); + + } + + ifs.close(); + + + + +#ifdef WITH_MPI + } + + unsigned n=m_tab_k.size(); + MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 ); + + if( MPI::COMM_WORLD.Get_rank() > 0 ){ + m_tab_k.assign(n,0); + m_tab_Tk_tot.assign(n,0); + m_tab_Tk_cdm.assign(n,0); + m_tab_Tk_baryon.assign(n,0); + + } + + MPI::COMM_WORLD.Bcast( &m_tab_k[0], n, MPI_DOUBLE, 0 ); + MPI::COMM_WORLD.Bcast( &m_tab_Tk_tot[0], n, MPI_DOUBLE, 0 ); + MPI::COMM_WORLD.Bcast( &m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0 ); + MPI::COMM_WORLD.Bcast( &m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0 ); + +#endif + + } + +public: + transfer_CAMB_plugin( config_file& cf )//Cosmology aCosm, std::string filename_Tk, TFtype iwhich ) + : transfer_function_plugin( cf )//, m_filename_Tk( filename_Tk ), m_psinterp( NULL ) + { + m_filename_Tk = pcf_->getValue("cosmology","transfer_file"); + + read_table( ); + + acc_tot = gsl_interp_accel_alloc(); + acc_cdm = gsl_interp_accel_alloc(); + acc_baryon = gsl_interp_accel_alloc(); + + + spline_tot = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() ); + spline_cdm = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() ); + spline_baryon = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() ); + + gsl_spline_init (spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size() ); + gsl_spline_init (spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size() ); + gsl_spline_init (spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0], m_tab_k.size() ); + + tf_distinct_ = true; + } + + ~transfer_CAMB_plugin() + { + gsl_spline_free (spline_tot); + gsl_spline_free (spline_cdm); + gsl_spline_free (spline_baryon); + + gsl_interp_accel_free (acc_tot); + gsl_interp_accel_free (acc_cdm); + gsl_interp_accel_free (acc_baryon); + } + + inline double compute( double k, tf_type type ){ + if( type == total ) + return pow(10.0, gsl_spline_eval (spline_tot, log10(k), acc_tot) ); + if( type == cdm ) + return pow(10.0, gsl_spline_eval (spline_cdm, log10(k), acc_cdm) ); + + return pow(10.0, gsl_spline_eval (spline_baryon, log10(k), acc_baryon) ); + } + + inline double get_kmin( void ){ + return pow(10.0,m_tab_k[1]); + } + + inline double get_kmax( void ){ + return pow(10.0,m_tab_k[m_tab_k.size()-2]); + } + +}; + +namespace{ + transfer_function_plugin_creator_concrete< transfer_CAMB_plugin > creator("camb_file"); +} + + diff --git a/plugins/transfer_eisenstein.cc b/plugins/transfer_eisenstein.cc new file mode 100644 index 0000000..70d7e8a --- /dev/null +++ b/plugins/transfer_eisenstein.cc @@ -0,0 +1,218 @@ +/* + + transfer_eisenstein.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#include "transfer_function.hh" + +//! Implementation of abstract base class TransferFunction for the Eisenstein & Hu transfer function +/*! + This class implements the analytical fit to the matter transfer + function by Eisenstein & Hu (1999). In fact it is their code. + */ +class transfer_eisenstein_plugin : public transfer_function_plugin +{ +protected: + using transfer_function_plugin::cosmo_; + + //Cosmology m_Cosmology; + double m_h0; + double omhh, /* Omega_matter*h^2 */ + obhh, /* Omega_baryon*h^2 */ + theta_cmb, /* Tcmb in units of 2.7 K */ + z_equality, /* Redshift of matter-radiation equality, really 1+z */ + k_equality, /* Scale of equality, in Mpc^-1 */ + z_drag, /* Redshift of drag epoch */ + R_drag, /* Photon-baryon ratio at drag epoch */ + R_equality, /* Photon-baryon ratio at equality epoch */ + sound_horizon, /* Sound horizon at drag epoch, in Mpc */ + k_silk, /* Silk damping scale, in Mpc^-1 */ + alpha_c, /* CDM suppression */ + beta_c, /* CDM log shift */ + alpha_b, /* Baryon suppression */ + beta_b, /* Baryon envelope shift */ + beta_node, /* Sound horizon shift */ + k_peak, /* Fit to wavenumber of first peak, in Mpc^-1 */ + sound_horizon_fit, /* Fit to sound horizon, in Mpc */ + alpha_gamma; /* Gamma suppression in approximate TF */ + + //! private member function: sets internal quantities for Eisenstein & Hu fitting + void TFset_parameters(double omega0hh, double f_baryon, double Tcmb) + /* Set all the scalars quantities for Eisenstein & Hu 1997 fitting formula */ + /* Input: omega0hh -- The density of CDM and baryons, in units of critical dens, + multiplied by the square of the Hubble constant, in units + of 100 km/s/Mpc */ + /* f_baryon -- The fraction of baryons to CDM */ + /* Tcmb -- The temperature of the CMB in Kelvin. Tcmb<=0 forces use + of the COBE value of 2.728 K. */ + /* Output: Nothing, but set many global variables used in TFfit_onek(). + You can access them yourself, if you want. */ + /* Note: Units are always Mpc, never h^-1 Mpc. */ + { + double z_drag_b1, z_drag_b2; + double alpha_c_a1, alpha_c_a2, beta_c_b1, beta_c_b2, alpha_b_G, y; + + if (f_baryon<=0.0 || omega0hh<=0.0) { + fprintf(stderr, "TFset_parameters(): Illegal input.\n"); + exit(1); + } + omhh = omega0hh; + obhh = omhh*f_baryon; + if (Tcmb<=0.0) Tcmb=2.728; /* COBE FIRAS */ + theta_cmb = Tcmb/2.7; + + z_equality = 2.50e4*omhh/POW4(theta_cmb); /* Really 1+z */ + k_equality = 0.0746*omhh/SQR(theta_cmb); + + z_drag_b1 = 0.313*pow((double)omhh,-0.419)*(1+0.607*pow((double)omhh,0.674)); + z_drag_b2 = 0.238*pow((double)omhh,0.223); + z_drag = 1291*pow(omhh,0.251)/(1+0.659*pow((double)omhh,0.828))* + (1+z_drag_b1*pow((double)obhh,(double)z_drag_b2)); + + R_drag = 31.5*obhh/POW4(theta_cmb)*(1000/(1+z_drag)); + R_equality = 31.5*obhh/POW4(theta_cmb)*(1000/z_equality); + + sound_horizon = 2./3./k_equality*sqrt(6./R_equality)* + log((sqrt(1+R_drag)+sqrt(R_drag+R_equality))/(1+sqrt(R_equality))); + + k_silk = 1.6*pow((double)obhh,0.52)*pow((double)omhh,0.73)*(1+pow((double)10.4*omhh,-0.95)); + + alpha_c_a1 = pow((double)46.9*omhh,0.670)*(1+pow(32.1*omhh,-0.532)); + alpha_c_a2 = pow((double)12.0*omhh,0.424)*(1+pow(45.0*omhh,-0.582)); + alpha_c = pow(alpha_c_a1,-f_baryon)* + pow(alpha_c_a2,-CUBE(f_baryon)); + + beta_c_b1 = 0.944/(1+pow(458*omhh,-0.708)); + beta_c_b2 = pow(0.395*omhh, -0.0266); + beta_c = 1.0/(1+beta_c_b1*(pow(1-f_baryon, beta_c_b2)-1)); + + y = z_equality/(1+z_drag); + alpha_b_G = y*(-6.*sqrt(1+y)+(2.+3.*y)*log((sqrt(1+y)+1)/(sqrt(1+y)-1))); + alpha_b = 2.07*k_equality*sound_horizon*pow(1+R_drag,-0.75)*alpha_b_G; + + beta_node = 8.41*pow(omhh, 0.435); + beta_b = 0.5+f_baryon+(3.-2.*f_baryon)*sqrt(pow(17.2*omhh,2.0)+1); + + k_peak = 2.5*3.14159*(1+0.217*omhh)/sound_horizon; + sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1+10.0*pow(obhh,0.75)); + + alpha_gamma = 1-0.328*log(431.0*omhh)*f_baryon + 0.38*log(22.3*omhh)* + SQR(f_baryon); + + return; + } + + //! private member function: computes transfer function for mode k (k in Mpc) + inline double TFfit_onek(double k, double *tf_baryon, double *tf_cdm) + /* Input: k -- Wavenumber at which to calculate transfer function, in Mpc^-1. + *tf_baryon, *tf_cdm -- Input value not used; replaced on output if + the input was not NULL. */ + /* Output: Returns the value of the full transfer function fitting formula. + This is the form given in Section 3 of Eisenstein & Hu (1997). + *tf_baryon -- The baryonic contribution to the full fit. + *tf_cdm -- The CDM contribution to the full fit. */ + /* Notes: Units are Mpc, not h^-1 Mpc. */ + { + double T_c_ln_beta, T_c_ln_nobeta, T_c_C_alpha, T_c_C_noalpha; + double q, xx, xx_tilde;//, q_eff; + double T_c_f, T_c, s_tilde, T_b_T0, T_b, f_baryon, T_full; + //double T_0_L0, T_0_C0, T_0, gamma_eff; + //double T_nowiggles_L0, T_nowiggles_C0, T_nowiggles; + + k = fabs(k); /* Just define negative k as positive */ + if (k==0.0) { + if (tf_baryon!=NULL) *tf_baryon = 1.0; + if (tf_cdm!=NULL) *tf_cdm = 1.0; + return 1.0; + } + + q = k/13.41/k_equality; + xx = k*sound_horizon; + + T_c_ln_beta = log(2.718282+1.8*beta_c*q); + T_c_ln_nobeta = log(2.718282+1.8*q); + T_c_C_alpha = 14.2/alpha_c + 386.0/(1+69.9*pow(q,1.08)); + T_c_C_noalpha = 14.2 + 386.0/(1+69.9*pow(q,1.08)); + + T_c_f = 1.0/(1.0+POW4(xx/5.4)); + T_c = T_c_f*T_c_ln_beta/(T_c_ln_beta+T_c_C_noalpha*SQR(q)) + + (1-T_c_f)*T_c_ln_beta/(T_c_ln_beta+T_c_C_alpha*SQR(q)); + + s_tilde = sound_horizon*pow(1+CUBE(beta_node/xx),-1./3.); + xx_tilde = k*s_tilde; + + T_b_T0 = T_c_ln_nobeta/(T_c_ln_nobeta+T_c_C_noalpha*SQR(q)); + T_b = sin(xx_tilde)/(xx_tilde)*(T_b_T0/(1+SQR(xx/5.2))+ + alpha_b/(1+CUBE(beta_b/xx))*exp(-pow(k/k_silk,1.4))); + + f_baryon = obhh/omhh; + T_full = f_baryon*T_b + (1-f_baryon)*T_c; + + /* Now to store these transfer functions */ + if (tf_baryon!=NULL) *tf_baryon = T_b; + if (tf_cdm!=NULL) *tf_cdm = T_c; + return T_full; + } + +public: + //! Constructor for Eisenstein & Hu fitting for transfer function + /*! + \param aCosm structure of type Cosmology carrying the cosmological parameters + \param Tcmb mean temperature of the CMB fluctuations (defaults to + Tcmb = 2.726 if not specified) + */ + transfer_eisenstein_plugin( config_file &cf )//Cosmology aCosm, double Tcmb = 2.726 ) + : transfer_function_plugin(cf), m_h0( cosmo_.H0*0.01 ) + { + double Tcmb = pcf_->getValueSafe("cosmology","Tcmb",2.726); + TFset_parameters( (cosmo_.Omega_m)*cosmo_.H0*cosmo_.H0*(0.01*0.01), + cosmo_.Omega_b/(cosmo_.Omega_m-cosmo_.Omega_b),//-aCosm.Omega_b), + Tcmb); + tf_distinct_ = false; + } + + //! Computes the transfer function for k in Mpc/h by calling TFfit_onek + inline double compute( double k, tf_type type ){ + double tfb, tfcdm, fb, fc; //, tfull + TFfit_onek( k*m_h0, &tfb, &tfcdm ); + + fb = cosmo_.Omega_b/(cosmo_.Omega_m); + fc = (cosmo_.Omega_m-cosmo_.Omega_b)/(cosmo_.Omega_m) ; + + return fb*tfb+fc*tfcdm; + //return 1.0; + } + + + inline double get_kmin( void ){ + return 1e-4; + } + + inline double get_kmax( void ){ + return 1.e4; + } + +}; + +namespace{ + transfer_function_plugin_creator_concrete< transfer_eisenstein_plugin > creator("eisenstein"); +} + diff --git a/plugins/transfer_inflation.cc b/plugins/transfer_inflation.cc new file mode 100644 index 0000000..79a8b6c --- /dev/null +++ b/plugins/transfer_inflation.cc @@ -0,0 +1,64 @@ +/* + + tranfer_inflation.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#include "transfer_function.hh" + +class transfer_inflation_plugin : public transfer_function_plugin +{ +protected: + + double ns2_; + +public: + + transfer_inflation_plugin( config_file& cf ) + : transfer_function_plugin( cf ) + { + ns2_ = 0.5*cf.getValue("cosmology","nspec"); + tf_distinct_ = true; + } + + ~transfer_inflation_plugin(){ }; + + double compute( double k, tf_type type=baryon) + { + return pow(k,ns2_); + } + + double get_kmax( void ) + { + return 1e10; + } + + double get_kmin( void ) + { + return 1e-30; + } + +}; + + +namespace{ + transfer_function_plugin_creator_concrete< transfer_inflation_plugin > creator("inflation"); +} + diff --git a/poisson.cc b/poisson.cc new file mode 100644 index 0000000..a5c6243 --- /dev/null +++ b/poisson.cc @@ -0,0 +1,499 @@ +/* + + poisson.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +/****** ABSTRACT FACTORY PATTERN IMPLEMENTATION *******/ + +#include "poisson.hh" +std::map< std::string, poisson_plugin_creator *>& +get_poisson_plugin_map() +{ + static std::map< std::string, poisson_plugin_creator* > poisson_plugin_map; + return poisson_plugin_map; +} + +void print_poisson_plugins() +{ + std::map< std::string, poisson_plugin_creator *>& m = get_poisson_plugin_map(); + + std::map< std::string, poisson_plugin_creator *>::iterator it; + it = m.begin(); + std::cout << " - Available poisson solver plug-ins:\n"; + while( it!=m.end() ) + { + if( (*it).second ) + std::cout << "\t\'" << (*it).first << "\'\n"; + ++it; + } + + +} + + +/****** CALL IMPLEMENTATIONS OF POISSON SOLVER CLASSES ******/ + +#include "mg_solver.hh" +#include "fd_schemes.hh" + +#ifdef SINGLE_PRECISION +typedef multigrid::solver< stencil_7P, interp_O3_fluxcorr, mg_straight, float > poisson_solver_O2; +typedef multigrid::solver< stencil_13P, interp_O5_fluxcorr, mg_straight, float > poisson_solver_O4; +typedef multigrid::solver< stencil_19P, interp_O7_fluxcorr, mg_straight, float > poisson_solver_O6; +#else +typedef multigrid::solver< stencil_7P, interp_O3_fluxcorr, mg_straight, double > poisson_solver_O2; +typedef multigrid::solver< stencil_13P, interp_O5_fluxcorr, mg_straight, double > poisson_solver_O4; +typedef multigrid::solver< stencil_19P, interp_O7_fluxcorr, mg_straight, double > poisson_solver_O6; +#endif + + +/**************************************************************************************/ +/**************************************************************************************/ +#pragma mark - + + +double multigrid_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u ) +{ + unsigned verbosity = cf_.getValueSafe("setup","verbosity",2); + + if( verbosity > 0 ) + std::cout << " - Invoking multi-grid Poisson solver..." << std::endl; + + double acc = 1e-5, err; + std::string ps_smoother_name; + unsigned ps_presmooth, ps_postsmooth, order; + + acc = cf_.getValueSafe("poisson","accuracy",acc); + ps_presmooth = cf_.getValueSafe("poisson","pre_smooth",3); + ps_postsmooth = cf_.getValueSafe("poisson","post_smooth",3); + ps_smoother_name = cf_.getValueSafe("poisson","smoother","gs"); + order = cf_.getValueSafe( "setup", "laplace_order", 4 ); + + multigrid::opt::smtype ps_smtype = multigrid::opt::sm_gauss_seidel; + + if ( ps_smoother_name == std::string("gs") ) + ps_smtype = multigrid::opt::sm_gauss_seidel; + else if ( ps_smoother_name == std::string("jacobi") ) + ps_smtype = multigrid::opt::sm_jacobi; + else if ( ps_smoother_name == std::string("sor") ) + ps_smtype = multigrid::opt::sm_sor; + else + std::cerr << " - Warning: unknown smoother \'" << ps_smoother_name << "\' for multigrid solver!\n" + << " reverting to \'gs\' (Gauss-Seidel)" << std::endl; + + + double tstart, tend; + tstart = omp_get_wtime(); + + + //----- run Poisson solver -----// + if( order == 2 ) + { + poisson_solver_O2 ps( f, ps_smtype, ps_presmooth, ps_postsmooth ); + err = ps.solve( u, acc, true ); + } + else if( order == 4 ) + { + poisson_solver_O4 ps( f, ps_smtype, ps_presmooth, ps_postsmooth ); + err = ps.solve( u, acc, true ); + } + else if( order == 6 ) + { + poisson_solver_O6 ps( f, ps_smtype, ps_presmooth, ps_postsmooth ); + err = ps.solve( u, acc, true ); + } + else + { + throw std::runtime_error("Invalid order specified for Laplace operator"); + } + + //------------------------------// + + tend = omp_get_wtime(); + if( verbosity > 1 ) + std::cout << " - Poisson solver took " << tend-tstart << "s with " << omp_get_max_threads() << " threads." << std::endl; + + return err; +} + +double multigrid_poisson_plugin::gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +{ + Du = u; + + unsigned order = cf_.getValueSafe( "setup", "grad_order", 4 ); + + switch( order ) + { + case 2: + implementation().gradient_O2( dir, u, Du ); + break; + case 4: + implementation().gradient_O4( dir, u, Du ); + break; + case 6: + implementation().gradient_O6( dir, u, Du ); + break; + default: + throw std::runtime_error("Invalid order specified for gradient operator!"); + } + + return 0.0; +} + +void multigrid_poisson_plugin::implementation::gradient_O2( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +{ + for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + { + double h = pow(2.0,ilevel); + meshvar_bnd *pvar = Du.get_grid(ilevel); + + if( dir == 0 ) +#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = 0.5*((*u.get_grid(ilevel))(ix+1,iy,iz)-(*u.get_grid(ilevel))(ix-1,iy,iz))*h; + + else if( dir == 1 ) +#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = 0.5*((*u.get_grid(ilevel))(ix,iy+1,iz)-(*u.get_grid(ilevel))(ix,iy-1,iz))*h; + + else if( dir == 2 ) +#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = 0.5*((*u.get_grid(ilevel))(ix,iy,iz+1)-(*u.get_grid(ilevel))(ix,iy,iz-1))*h; + } +} + +void multigrid_poisson_plugin::implementation::gradient_O4( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +{ + for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + { + double h = pow(2.0,ilevel); + meshvar_bnd *pvar = Du.get_grid(ilevel); + + h /= 12.0; + + if( dir == 0 ) +#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = ((*u.get_grid(ilevel))(ix-2,iy,iz) + -8.0*(*u.get_grid(ilevel))(ix-1,iy,iz) + +8.0*(*u.get_grid(ilevel))(ix+1,iy,iz) + -(*u.get_grid(ilevel))(ix+2,iy,iz))*h; + + else if( dir == 1 ) +#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = ((*u.get_grid(ilevel))(ix,iy-2,iz) + -8.0*(*u.get_grid(ilevel))(ix,iy-1,iz) + +8.0*(*u.get_grid(ilevel))(ix,iy+1,iz) + -(*u.get_grid(ilevel))(ix,iy+2,iz))*h; + + else if( dir == 2 ) +#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = ((*u.get_grid(ilevel))(ix,iy,iz-2) + -8.0*(*u.get_grid(ilevel))(ix,iy,iz-1) + +8.0*(*u.get_grid(ilevel))(ix,iy,iz+1) + -(*u.get_grid(ilevel))(ix,iy,iz+2))*h; + } + +} + +void multigrid_poisson_plugin::implementation::gradient_O6( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +{ + for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + { + double h = pow(2.0,ilevel); + meshvar_bnd *pvar = Du.get_grid(ilevel); + + h /= 60.; + if( dir == 0 ) +#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = + (-(*u.get_grid(ilevel))(ix-3,iy,iz) + +9.0*(*u.get_grid(ilevel))(ix-2,iy,iz) + -45.0*(*u.get_grid(ilevel))(ix-1,iy,iz) + +45.0*(*u.get_grid(ilevel))(ix+1,iy,iz) + -9.0*(*u.get_grid(ilevel))(ix+2,iy,iz) + +(*u.get_grid(ilevel))(ix+3,iy,iz))*h; + + else if( dir == 1 ) +#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = + (-(*u.get_grid(ilevel))(ix,iy-3,iz) + +9.0*(*u.get_grid(ilevel))(ix,iy-2,iz) + -45.0*(*u.get_grid(ilevel))(ix,iy-1,iz) + +45.0*(*u.get_grid(ilevel))(ix,iy+1,iz) + -9.0*(*u.get_grid(ilevel))(ix,iy+2,iz) + +(*u.get_grid(ilevel))(ix,iy+3,iz))*h; + + else if( dir == 2 ) +#pragma omp parallel for + for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + (*pvar)(ix,iy,iz) = + (-(*u.get_grid(ilevel))(ix,iy,iz-3) + +9.0*(*u.get_grid(ilevel))(ix,iy,iz-2) + -45.0*(*u.get_grid(ilevel))(ix,iy,iz-1) + +45.0*(*u.get_grid(ilevel))(ix,iy,iz+1) + -9.0*(*u.get_grid(ilevel))(ix,iy,iz+2) + +(*u.get_grid(ilevel))(ix,iy,iz+3))*h; + } + +} + +/**************************************************************************************/ +/**************************************************************************************/ +#pragma mark - + +#ifdef SINGLE_PRECISION +#ifdef SINGLETHREAD_FFTW +#include +#else +#include +#endif +#else +#ifdef SINGLETHREAD_FFTW +#include +#else +#include +#endif +#endif + +double fft_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u ) +{ + unsigned verbosity = cf_.getValueSafe("setup","verbosity",2); + + if( f.levelmin() != f.levelmax() ) + throw std::runtime_error("fft_poisson_plugin::solve : k-space method can only be used in unigrid mode (levelmin=levelmax)"); + + if( verbosity > 0 ) + std::cout << " - Invoking unigrid FFT Poisson solver..." << std::endl; + + int nx,ny,nz,nzp; + nx = f.get_grid(f.levelmax())->size(0); + ny = f.get_grid(f.levelmax())->size(1); + nz = f.get_grid(f.levelmax())->size(2); + nzp = 2*(nz/2+1); + + + //... copy data .................................................. + fftw_real *data = new fftw_real[nx*ny*nzp]; + fftw_complex *cdata = reinterpret_cast (data); + + for( int i=0; i("setup","boxlength"); + double kfac = 2.0*M_PI/boxlength; + double fac = -1.0/(nx*ny*nz)/boxlength; + + for( int i=0; inx/2) ii-=nx; + int jj = j; if(jj>ny/2) jj-=ny; + double ki = (double)ii; + double kj = (double)jj; + double kk = (double)k; + + double kk2 = kfac*kfac*(ki*ki+kj*kj+kk*kk); + + unsigned idx = (i*ny+j)*nzp/2+k; + double re = cdata[idx].re; + double im = cdata[idx].im; + + cdata[idx].re = -re/kk2*fac; + cdata[idx].im = -im/kk2*fac; + } + + cdata[0].re = 0.0; + cdata[0].im = 0.0; + +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL ); +#else + rfftwnd_one_complex_to_real( iplan, cdata, NULL ); +#endif + + rfftwnd_destroy_plan(plan); + rfftwnd_destroy_plan(iplan); + + //... copy data .......................................... + for( int i=0; isize(0); + ny = u.get_grid(u.levelmax())->size(1); + nz = u.get_grid(u.levelmax())->size(2); + nzp = 2*(nz/2+1); + + //... copy data .................................................. + fftw_real *data = new fftw_real[nx*ny*nzp]; + fftw_complex *cdata = reinterpret_cast (data); + + for( int i=0; i("setup","boxlength"); + double kfac = 2.0*M_PI/boxlength; + + for( int i=0; inx/2) ii-=nx; + int jj = j; if(jj>ny/2) jj-=ny; + double ki = (double)ii; + double kj = (double)jj; + double kk = (double)k; + + double kdir; + if( dir == 0 ) + kdir = kfac*ki; + else if( dir == 1 ) + kdir = kfac*kj; + else if( dir == 2 ) + kdir = kfac*kk; + + unsigned idx = (i*ny+j)*nzp/2+k; + double re = cdata[idx].re; + double im = cdata[idx].im; + + cdata[idx].re = fac*im*kdir; + cdata[idx].im = -fac*re*kdir; + } + + cdata[0].re = 0.0; + cdata[0].im = 0.0; + +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL ); +#else + rfftwnd_one_complex_to_real( iplan, cdata, NULL ); +#endif + + rfftwnd_destroy_plan(plan); + rfftwnd_destroy_plan(iplan); + + //... copy data .......................................... + for( int i=0; i multigrid_poisson_creator("mg_poisson"); + poisson_plugin_creator_concrete fft_poisson_creator("fft_poisson"); +} diff --git a/poisson.hh b/poisson.hh new file mode 100644 index 0000000..9c4d5cf --- /dev/null +++ b/poisson.hh @@ -0,0 +1,140 @@ +/* + + poisson.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + */ + +#ifndef __POISSON_HH +#define __POISSON_HH + +#include +#include + +#include "general.hh" + +class poisson_plugin +{ +protected: + + //! reference to the config_file object that holds all configuration options + config_file& cf_; + +public: + + //! constructor + explicit poisson_plugin( config_file& cf ) + : cf_(cf) + { } + + //! destructor + virtual ~poisson_plugin() + { } + + virtual double solve( grid_hierarchy& f, grid_hierarchy& u ) = 0; + virtual double gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du ) = 0; + +}; + +#pragma mark - + +/*! + * @brief implements abstract factory design pattern for poisson solver plug-ins + */ +struct poisson_plugin_creator +{ + //! create an instance of a plug-in + virtual poisson_plugin * create( config_file& cf ) const = 0; + + //! destroy an instance of a plug-in + virtual ~poisson_plugin_creator() { } +}; + +//! maps the name of a plug-in to a pointer of the factory pattern +std::map< std::string, poisson_plugin_creator *>& get_poisson_plugin_map(); + +//! print a list of all registered output plug-ins +void print_poisson_plugins(); + + +/*! + * @brief concrete factory pattern for output plug-ins + */ +template< class Derived > +struct poisson_plugin_creator_concrete : public poisson_plugin_creator +{ + //! register the plug-in by its name + poisson_plugin_creator_concrete( const std::string& plugin_name ) + { + get_poisson_plugin_map()[ plugin_name ] = this; + } + + //! create an instance of the plug-in + poisson_plugin * create( config_file& cf ) const + { + return new Derived( cf ); + } +}; + +/**************************************************************************************/ +/**************************************************************************************/ +#pragma mark - + +class multigrid_poisson_plugin : public poisson_plugin +{ +public: + + explicit multigrid_poisson_plugin( config_file& cf ) + : poisson_plugin( cf ) + { } + + double solve( grid_hierarchy& f, grid_hierarchy& u ); + double gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du ); + +protected: + + struct implementation + { + double solve_O2( grid_hierarchy& f, grid_hierarchy& u ); + double solve_O4( grid_hierarchy& f, grid_hierarchy& u ); + void gradient_O2( int dir, grid_hierarchy& u, grid_hierarchy& Du ); + void gradient_O4( int dir, grid_hierarchy& u, grid_hierarchy& Du ); + void gradient_O6( int dir, grid_hierarchy& u, grid_hierarchy& Du ); + }; +}; + +/**************************************************************************************/ +/**************************************************************************************/ +#pragma mark - + +class fft_poisson_plugin : public poisson_plugin +{ +public: + + explicit fft_poisson_plugin( config_file& cf ) + : poisson_plugin( cf ) + { } + + double solve( grid_hierarchy& f, grid_hierarchy& u ); + double gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du ); + +}; + +#endif // __POISSON_HH + diff --git a/random.hh b/random.hh new file mode 100644 index 0000000..63597b3 --- /dev/null +++ b/random.hh @@ -0,0 +1,459 @@ +/* + + random.hh - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#ifndef __RANDOM_HH +#define __RANDOM_HH + +#include +#include +#include +#include + +#include "mesh.hh" +#include "mg_operators.hh" + +/*! + * @brief encapsulates all things random number generator related + */ +template< typename T > +class random_numbers +{ +public: + unsigned + res_, //!< resolution of the full mesh + cubesize_, //!< size of one independent random number cube + ncubes_; //!< number of random number cubes to cover the full mesh + long baseseed_; //!< base seed from which cube seeds are computed + + //! vector of 3D meshes (the random number cbues) with random numbers + std::vector< Meshvar* > rnums_; + +protected: + + //! fills a subcube with random numbers + double fill_cube( int i, int j, int k) + { + + gsl_rng *RNG = gsl_rng_alloc( gsl_rng_mt19937 ); + + i = (i+ncubes_)%ncubes_; + j = (j+ncubes_)%ncubes_; + k = (k+ncubes_)%ncubes_; + + long icube = (i*ncubes_+j)*ncubes_+k; + long cubeseed = baseseed_+icube; //... each cube gets its unique seed + + gsl_rng_set( RNG, cubeseed ); + + if( rnums_[icube] == NULL ) + rnums_[icube] = new Meshvar( cubesize_, 0, 0, 0 ); + + double mean = 0.0; + + for( int ii=0; ii<(int)cubesize_; ++ii ) + for( int jj=0; jj<(int)cubesize_; ++jj ) + for( int kk=0; kk<(int)cubesize_; ++kk ) + { + (*rnums_[icube])(ii,jj,kk) = gsl_ran_ugaussian_ratio_method( RNG ); + mean += (*rnums_[icube])(ii,jj,kk); + } + + gsl_rng_free( RNG ); + + return mean/(cubesize_*cubesize_*cubesize_); + } + + //! copy random numbers from a cube to a full grid array + template< class C > + void copy_cube( int i, int j, int k, C& dat ) + { + int offi, offj, offk; + + offi = i*cubesize_; + offj = j*cubesize_; + offk = k*cubesize_; + + i = (i+ncubes_)%ncubes_; + j = (j+ncubes_)%ncubes_; + k = (k+ncubes_)%ncubes_; + + long icube = (i*ncubes_+j)*ncubes_+k; + + for( int ii=0; ii<(int)cubesize_; ++ii ) + for( int jj=0; jj<(int)cubesize_; ++jj ) + for( int kk=0; kk<(int)cubesize_; ++kk ) + dat(offi+ii,offj+jj,offk+kk) = (*rnums_[icube])(ii,jj,kk); + } + + //! free the memory associated with a subcube + void free_cube( int i, int j, int k ) + { + + i = (i+ncubes_)%ncubes_; + j = (j+ncubes_)%ncubes_; + k = (k+ncubes_)%ncubes_; + long icube = (i*ncubes_+j)*ncubes_+k; + + if( rnums_[icube] != NULL ) + { + delete rnums_[icube]; + rnums_[icube] = NULL; + } + } + + //! initialize member variables and allocate memory + void initialize( void ) + { + ncubes_ = std::max((int)((double)res_/cubesize_),1); + if( res_ < cubesize_ ) + { + ncubes_ = 1; + cubesize_ = res_; + } + + rnums_.assign( ncubes_*ncubes_*ncubes_, NULL ); + } + + //! fill a cubic subvolume of the full grid with random numbers + double fill_subvolume( int *i0, int *n ) + { + unsigned i0cube[3], ncube[3]; + + i0cube[0] = (int)((double)i0[0]/cubesize_); + i0cube[1] = (int)((double)i0[1]/cubesize_); + i0cube[2] = (int)((double)i0[2]/cubesize_); + + ncube[0] = (int)((double)(i0[0]+n[0])/cubesize_+1.0)-i0cube[0]; + ncube[1] = (int)((double)(i0[1]+n[1])/cubesize_+1.0)-i0cube[1]; + ncube[2] = (int)((double)(i0[2]+n[2])/cubesize_+1.0)-i0cube[2]; + + double mean = 0.0; + + #pragma omp parallel for reduction(+:mean) + for( int i=i0cube[0]; i<(int)ncube[0]; ++i ) + for( int j=i0cube[1]; j<(int)ncube[1]; ++j ) + for( int k=i0cube[2]; k<(int)ncube[2]; ++k ) + { + int ii(i),jj(j),kk(k); + + ii = (ii+ncubes_)%ncubes_; + jj = (jj+ncubes_)%ncubes_; + kk = (kk+ncubes_)%ncubes_; + + mean += fill_cube(ii, jj, kk); + } + return mean/(ncube[0]*ncube[1]*ncube[2]); + } + + //! fill an entire grid with random numbers + double fill_all( void ) + { + double sum = 0.0; + + #pragma omp parallel for reduction(+:sum) + for( int i=0; i<(int)ncubes_; ++i ) + for( int j=0; j<(int)ncubes_; ++j ) + for( int k=0; k<(int)ncubes_; ++k ) + { + int ii(i),jj(j),kk(k); + + ii = (ii+ncubes_)%ncubes_; + jj = (jj+ncubes_)%ncubes_; + kk = (kk+ncubes_)%ncubes_; + + sum+=fill_cube(ii, jj, kk); + } + + return sum/(ncubes_*ncubes_*ncubes_); + } + +public: + template< class C > + double fill_all( C& dat ) + { + double sum = 0.0; + + #pragma omp parallel for reduction(+:sum) + for( int i=0; i<(int)ncubes_; ++i ) + for( int j=0; j<(int)ncubes_; ++j ) + for( int k=0; k<(int)ncubes_; ++k ) + { + int ii(i),jj(j),kk(k); + + ii = (ii+ncubes_)%ncubes_; + jj = (jj+ncubes_)%ncubes_; + kk = (kk+ncubes_)%ncubes_; + + sum+=fill_cube(ii, jj, kk); + copy_cube(ii,jj,kk,dat); + free_cube(ii, jj, kk); + } + + return sum/(ncubes_*ncubes_*ncubes_); + } + + +public: + + //! constructor + random_numbers( unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx ) + : res_( res ), cubesize_( cubesize ), ncubes_( 1 ), baseseed_( baseseed ) + { + initialize(); + fill_subvolume( x0, lx ); + } + + //! constructor + random_numbers( unsigned res, unsigned cubesize, long baseseed, bool zeromean=true ) + : res_( res ), cubesize_( cubesize ), ncubes_( 1 ), baseseed_( baseseed ) + { + double mean = 0.0; + initialize(); + mean = fill_all(); + + if( zeromean ) + { + for(unsigned i=0; i( res, 0, 0, 0 ) ); + + std::ifstream ifs(randfname.c_str(), std::ios::binary); + if( !ifs ) + throw std::runtime_error(std::string("Could not open random number file \'")+randfname+std::string("\'!")); + + unsigned vartype; + unsigned nx,ny,nz,blksz; + int iseed; + long seed; + //ifs.read( (char*)&vartype, sizeof(unsigned) ); + + //... read header .../ + ifs.read( reinterpret_cast (&blksz), sizeof(int) ); + + if( blksz != 4*sizeof(int) ) + throw std::runtime_error("corrupt random number file"); + + ifs.read( reinterpret_cast (&nx), sizeof(unsigned) ); + ifs.read( reinterpret_cast (&ny), sizeof(unsigned) ); + ifs.read( reinterpret_cast (&nz), sizeof(unsigned) ); + ifs.read( reinterpret_cast (&iseed), sizeof(int) ); + seed = (long)iseed; + + ifs.read( reinterpret_cast (&blksz), sizeof(int) ); + + //... read data ...// + + ifs.read( reinterpret_cast (&blksz), sizeof(int) ); + if( blksz == nx*ny*sizeof(float) ) + vartype = 4; + else if( blksz == nx*ny*sizeof(double) ) + vartype = 8; + else + throw std::runtime_error("corrupt random number file"); + + + std::vector in_float; + std::vector in_double; + + std::cout << " - Random number file \'" << randfname << "\'\n" + << " contains " << nx*ny*nz << " numbers. Reading..." << std::endl; + + double sum = 0.0, sum2 = 0.0; + unsigned count = 0; + + if( vartype == 4 ) + { + for( int ii=0; ii<(int)nz; ++ii ) + { + in_float.assign(nx*ny,0.0f); + ifs.read( (char*)&in_float[0], nx*ny*sizeof(float) ); + + for( int jj=0,q=0; jj<(int)ny; ++jj ) + for( int kk=0; kk<(int)nx; ++kk ){ + sum += in_float[q]; + sum2 += in_float[q]*in_float[q]; + ++count; + +#ifdef RAND_DEBUG + (*rnums_[0])(ii,jj,kk) = 0.0; +#else + (*rnums_[0])(ii,jj,kk) = in_float[q++]; +#endif + + } + + } + } + else if( vartype == 8 ) + { + for( int ii=0; ii<(int)nz; ++ii ) + { + in_double.assign(nx*ny,0.0f); + ifs.read( (char*)&in_double[0], nx*ny*sizeof(double) ); + + for( int jj=0,q=0; jj<(int)ny; ++jj ) + for( int kk=0; kk<(int)nx; ++kk ) + { + sum += in_double[q]; + sum2 += in_double[q]*in_double[q]; + ++count; +#ifdef RAND_DEBUG + (*rnums_[0])(ii,jj,kk) = 0.0; +#else + (*rnums_[0])(ii,jj,kk) = in_double[q++]; +#endif + + } + } + } + + double mean, var; + mean = sum/count; + var = sum2/count-mean*mean; +#ifdef RAND_DEBUG + (*rnums_[0])(cubesize_/2-1,cubesize_/2-1,cubesize_/2-1) = 1.0; +#endif + + std::cout << " - Random numbers in file have \n" + << " mean = " << mean << " and var = " << var << std::endl; + + } + + //... copy construct by averaging down + explicit random_numbers( /*const*/ random_numbers & rc ) + { + //if( res > rc.m_res || (res/rc.m_res)%2 != 0 ) + // throw std::runtime_error("Invalid restriction in random number container copy constructor."); + + double sum = 0.0, sum2 = 0.0; + unsigned count = 0; + + //... initialize properties of container + if( rc.rnums_.size() == 1 ) + { + res_ = rc.res_/2; + cubesize_ = res_; + ncubes_ = 1; + baseseed_ = -2; + + //... use restriction to get consistent random numbers on coarser grid + mg_straight gop; + rnums_.push_back( new Meshvar( res_, 0, 0, 0 ) ); + gop.restrict( *rc.rnums_[0], *rnums_[0] ); + + + for( unsigned i=0; i< rnums_[0]->size(0); ++i ) + for( unsigned j=0; j< rnums_[0]->size(1); ++j ) + for( unsigned k=0; k< rnums_[0]->size(2); ++k ) + { + (*rnums_[0])(i,j,k) *= sqrt(8); //.. maintain that var(delta)=1 + sum += (*rnums_[0])(i,j,k); + sum2+= (*rnums_[0])(i,j,k) * (*rnums_[0])(i,j,k); + ++count; + } + } + else + { + res_ = rc.res_/2; + cubesize_ = res_; + ncubes_ = 1; + baseseed_ = -2; + + rnums_.push_back( new Meshvar( res_, 0, 0, 0 ) ); + double fac = 1.0/sqrt(8); + + for( unsigned i=0,ii=0; i( cubesize_, 0, 0, 0 ); + fill_cube(ic, jc, kc); + } + + //... determine cell in cube + is = (i - ic * cubesize_ + cubesize_) % cubesize_; + js = (j - jc * cubesize_ + cubesize_) % cubesize_; + ks = (k - kc * cubesize_ + cubesize_) % cubesize_; + + return (*rnums_[ icube ])(is,js,ks); + } + + +}; + + +#endif //__RANDOM_HH + diff --git a/schemes.hh b/schemes.hh new file mode 100644 index 0000000..900118a --- /dev/null +++ b/schemes.hh @@ -0,0 +1,275 @@ +/* + * schemes.hh + * GravitySolver + * + * Created by Oliver Hahn on 2/1/10. + * Copyright 2010 KIPAC/Stanford University. All rights reserved. + * + */ + +#ifndef __SCHEME_HH +#define __SCHEME_HH + +#include +#include + +#include "solver.hh" + +//... abstract implementation of the Poisson/Force scheme +template< class L, class G, typename real_t=double > +class scheme +{ +public: + typedef L laplacian; + typedef G gradient; + + laplacian m_laplacian; + gradient m_gradient; + + template< class C > + inline real_t grad_x( const C&c, const int i, const int j, const int k ) + { return m_gradient.apply_x( c,i,j,k ); } + + template< class C > + inline real_t grad_y( const C&c, const int i, const int j, const int k ) + { return m_gradient.apply_y( c,i,j,k ); } + + template< class C > + inline real_t grad_z( const C&c, const int i, const int j, const int k ) + { return m_gradient.apply_z( c,i,j,k ); } + + template< class C > + inline real_t L_apply( const C&c, const int i, const int j, const int k ) + { return m_laplacian.apply( c,i,j,k ); } + + template< class C > + inline real_t L_rhs( const C&c, const int i, const int j, const int k ) + { return m_laplacian.rhs( c,i,j,k ); } + + inline real_t ccoeff( void ) + { return m_laplacian.ccoeff(); } + +}; + + +template< int nextent, typename T > +class gradient +{ + typedef T real_t; + std::vector m_stencil; + const unsigned nl; +public: + + gradient() + : nl( 2*nextent+1 ) + { + m_stencil.assign(nl*nl*nl,(real_t)0.0); + } + + real_t& operator()(int i) + { return m_stencil[i+nextent]; } + + const real_t& operator()(int i) const + { return m_stencil[i+nextent]; } + + template< class C > + inline void apply( const C& c, C& f, int dir ) + { + f = c; + + int nx=c.size(0), ny=c.size(1), nz=c.size(2); + double hx = 1.0/(nx+1.0), hy = 1.0/(ny+1.0), hz = 1.0/(nz+1.0); + + f.zero(); + + if( dir == 0 ) + for( int i=0; i +class base_stencil +{ +protected: + std::vector m_stencil; + const unsigned nl; +public: + bool m_modsource; + +public: + base_stencil( bool amodsource = false ) + : nl( 2*nextent+1 ), m_modsource( amodsource ) + { + m_stencil.assign(nl*nl*nl,(real_t)0.0); + } + + real_t& operator()(int i, int j, int k) + { return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; } + + const real_t& operator()(unsigned i, unsigned j, unsigned k) const + { return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; } + + template< class C > + inline real_t rhs( const C& c, const int i, const int j, const int k ) + { + real_t sum = this->apply( c, i, j, k ); + sum -= (*this)(0,0,0) * c(i,j,k); + return sum; + } + + inline real_t ccoeff( void ) + { + return (*this)(0,0,0); + } + + + template< class C > + inline real_t apply( const C& c, const int i, const int j, const int k ) + { + real_t sum = 0.0; + + for( int ii=-nextent; ii<=nextent; ++ii ) + for( int jj=-nextent; jj<=nextent; ++jj ) + for( int kk=-nextent; kk<=nextent; ++kk ) + sum += (*this)(ii,jj,kk) * c(i+ii,j+jj,k+kk); + + return sum; + } + + template< class C > + inline real_t modsource( const C& c, const int i, const int j, const int k ) + { + return 0.0; + } + +}; + + +/***************************************************************************************/ +/***************************************************************************************/ +/***************************************************************************************/ + + +//... Implementation of the Gradient schemes............................................ + + +template< typename real_t > +class deriv_2P : public gradient<1,real_t> +{ + +public: + deriv_2P( void ) + { + (*this)( 0 ) = 0.0; + (*this)(-1 ) = -0.5; + (*this)(+1 ) = +0.5; + } + + +}; + +//... Implementation of the Laplacian schemes.......................................... + + +template< typename real_t > +class stencil_7P : public base_stencil<1,real_t> +{ + +public: + stencil_7P( void ) + { + (*this)( 0, 0, 0) = -6.0; + (*this)(-1, 0, 0) = +1.0; + (*this)(+1, 0, 0) = +1.0; + (*this)( 0,-1, 0) = +1.0; + (*this)( 0,+1, 0) = +1.0; + (*this)( 0, 0,-1) = +1.0; + (*this)( 0, 0,+1) = +1.0; + } + + template< class C > + inline real_t apply( const C& c, const int i, const int j, const int k ) const + { + return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k); + } + + template< class C > + inline real_t rhs( const C& c, const int i, const int j, const int k ) const + { + return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1); + } + + inline real_t ccoeff( void ) + { + return -6.0; + } +}; + + +template< typename real_t > +class stencil_13P : public base_stencil<2,real_t> +{ + +public: + stencil_13P( void ) + { + (*this)( 0, 0, 0) = -90.0/12.; + + (*this)(-1, 0, 0) = + (*this)(+1, 0, 0) = + (*this)( 0,-1, 0) = + (*this)( 0,+1, 0) = + (*this)( 0, 0,-1) = + (*this)( 0, 0,+1) = 16./12.; + + (*this)(-2, 0, 0) = + (*this)(+2, 0, 0) = + (*this)( 0,-2, 0) = + (*this)( 0,+2, 0) = + (*this)( 0, 0,-2) = + (*this)( 0, 0,+2) = -1./12.; + } + + template< class C > + inline real_t apply( const C& c, const int i, const int j, const int k ) + { + return + (-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2)) + +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)) + -90.0*c(i,j,k))/12.0; + } + + template< class C > + inline real_t rhs( const C& c, const int i, const int j, const int k ) + { + return + (-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2)) + +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)))/12.0; + } + + inline real_t ccoeff( void ) + { + return -90.0/12.0; + } +}; + +#endif + + diff --git a/solver.hh b/solver.hh new file mode 100644 index 0000000..148eb55 --- /dev/null +++ b/solver.hh @@ -0,0 +1,1110 @@ +/* + * solver.h + * GravitySolver + * + * Created by Oliver Hahn on 1/20/10. + * Copyright 2010 KIPAC/Stanford University. All rights reserved. + * + */ + +#ifndef __SOLVER_HH +#define __SOLVER_HH + +#include +#include +#include "mesh.hh" + +#define BEGIN_MULTIGRID_NAMESPACE namespace multigrid { +#define END_MULTIGRID_NAMESPACE } + +BEGIN_MULTIGRID_NAMESPACE + +namespace opt { + enum smtype { sm_jacobi, sm_gauss_seidel, sm_sor }; +} + +template< class S, class O, typename T=double > +class solver +{ +public: + typedef S scheme; + typedef O mgop; + +protected: + scheme m_scheme; + mgop m_gridop; + unsigned m_npresmooth, m_npostsmooth; + opt::smtype m_smoother; + unsigned m_ilevelmin; + + const static bool m_bperiodic = true; + + GridHierarchy *m_pu, *m_pf, *m_pfsave; + GridHierarchy *m_pmask; + const MeshvarBnd *m_pubnd; + + double compute_error( const MeshvarBnd& u, const MeshvarBnd& unew ); + + double compute_error( const GridHierarchy& uh, const GridHierarchy& uhnew, bool verbose ); + +protected: + + void Jacobi( T h, MeshvarBnd* u, const MeshvarBnd* f ); + + void GaussSeidel( T h, MeshvarBnd* u, const MeshvarBnd* f ); + + void SOR( T h, MeshvarBnd* u, const MeshvarBnd* f ); + + void twoGrid( unsigned ilevel ); + + void interp_coarse_fine( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf=true ); + + void setBC( unsigned ilevel ); + + void make_periodic( MeshvarBnd *u ); + + void interp_cubic( MeshvarBnd& coarse, MeshvarBnd& fine, int itop, int jtop, int ktop, int i, int j, int k ); + void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf ); + +public: + solver( GridHierarchy& f, //const MeshvarBnd& uBC_top, + opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth ); + + ~solver() + { delete m_pmask; } + + double solve( GridHierarchy& u, double accuracy, double h=-1.0, bool verbose=false ); + + double solve( GridHierarchy& u, double accuracy, bool verbose=false ) + { + return this->solve ( u, accuracy, -1.0, verbose ); + } + + + +}; + + +template< class S, class O, typename T > +solver::solver( GridHierarchy& f, //const MeshvarBnd& ubnd, + opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth ) +: m_scheme(), m_gridop(), m_npresmooth( npresmooth ), m_npostsmooth( npostsmooth ), +m_smoother( smoother ), m_ilevelmin( f.levelmin() ), m_pf( &f )//, m_pubnd( &ubnd ) +{ + //... initialize the refinement mask + m_pmask = new GridHierarchy( f.m_nbnd ); + m_pmask->create_base_hierarchy(f.levelmin()); + + for( unsigned ilevel=f.levelmin()+1; ilevel<=f.levelmax(); ++ilevel ) + { + meshvar_bnd* pf = f.get_grid(ilevel); + m_pmask->add_patch( pf->offset(0), pf->offset(1), pf->offset(2), pf->size(0), pf->size(1), pf->size(2) ); + } + + m_pmask->zero(); + + for( unsigned ilevel=0; ilevel *pf = f.get_grid(ilevel); + for( int ix=0; ix < (int)pf->size(0); ++ix ) + for( int iy=0; iy < (int)pf->size(1); ++iy ) + for( int iz=0; iz < (int)pf->size(2); ++iz ) + (*m_pmask->get_grid(ilevel))(ix,iy,iz) = true; + } + + for( unsigned ilevel=m_ilevelmin; ilevel* pf = f.get_grid(ilevel+1);//, *pfc = f.get_grid(ilevel); + + for( int ix=pf->offset(0); ix < (int)(pf->offset(0)+pf->size(0)/2); ++ix ) + for( int iy=pf->offset(1); iy < (int)(pf->offset(1)+pf->size(1)/2); ++iy ) + for( int iz=pf->offset(2); iz < (int)(pf->offset(2)+pf->size(2)/2); ++iz ) + (*m_pmask->get_grid(ilevel))(ix,iy,iz) = true; + } + +} + + +template< class S, class O, typename T > +void solver::Jacobi( T h, MeshvarBnd *u, const MeshvarBnd* f ) +{ + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + double + c0 = -1.0/m_scheme.ccoeff(), + h2 = h*h; + + MeshvarBnd uold(*u); + + double alpha = 0.95, ialpha = 1.0-alpha; + + #pragma omp parallel for + for( int ix=0; ix +void solver::SOR( T h, MeshvarBnd *u, const MeshvarBnd* f ) +{ + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + double + c0 = -1.0/m_scheme.ccoeff(), + h2 = h*h; + + MeshvarBnd uold(*u); + + double + alpha = 1.2, + //alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0, + ialpha = 1.0-alpha; + + //std::cerr << "omega_opt = " << alpha << std::endl; + + #pragma omp parallel for + for( int ix=0; ix +void solver::GaussSeidel( T h, MeshvarBnd* u, const MeshvarBnd* f ) +{ + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + T + c0 = -1.0/m_scheme.ccoeff(), + h2 = h*h; + + for( int color=0; color < 2; ++color ) + #pragma omp parallel for + for( int ix=0; ix +void solver::twoGrid( unsigned ilevel ) +{ + MeshvarBnd *uf, *uc, *ff, *fc; + + T + h = 1.0/(pow(2.0,ilevel)), + c0 = -1.0/m_scheme.ccoeff(), + h2 = h*h; + + uf = m_pu->get_grid(ilevel); + ff = m_pf->get_grid(ilevel); + + uc = m_pu->get_grid(ilevel-1); + fc = m_pf->get_grid(ilevel-1); + + int + nx = uf->size(0), + ny = uf->size(1), + nz = uf->size(2); + + if( m_bperiodic && ilevel <= m_ilevelmin) + make_periodic( uf ); + else if(!m_bperiodic) + setBC( ilevel ); + + //... do smoothing sweeps with specified solver + for( unsigned i=0; i m_ilevelmin ) + interp_coarse_fine(ilevel, *uc, *uf ); + + if( m_smoother == opt::sm_gauss_seidel ) + GaussSeidel( h, uf, ff ); + + else if( m_smoother == opt::sm_jacobi ) + Jacobi( h, uf, ff); + + else if( m_smoother == opt::sm_sor ) + SOR( h, uf, ff ); + + if( m_bperiodic && ilevel <= m_ilevelmin ) + make_periodic( uf ); + } + + + m_gridop.restrict( *uf, *uc ); + + //... essential!! + if( m_bperiodic && ilevel <= m_ilevelmin ) + make_periodic( uc ); + else if( m_bperiodic ) + interp_coarse_fine(ilevel,*uc,*uf); + + meshvar_bnd Lu(*uf,false); + Lu.zero(); + #pragma omp parallel for + for( int ix=0; ixsize(0); ++ix ) + for( int iy=0; iy<(int)uc->size(1); ++iy ) + for( int iz=0; iz<(int)uc->size(2); ++iz ) + if( (*m_pmask->get_grid(ilevel-1))(ix,iy,iz) == true ) + (*fc)(ix,iy,iz) += ((tLu( ix, iy, iz ) - (m_scheme.apply( *uc, ix, iy, iz )/(4.0*h2)))); + + + tLu.deallocate(); + + meshvar_bnd ucsave(*uc,true); + + //... have we reached the end of the recursion or do we need to go up one level? + if( ilevel == 1 ) + if( m_bperiodic ) + (*uc)(0,0,0) = 0.0; + else + (*uc)(0,0,0) = (m_scheme.rhs( (*uc), 0, 0, 0 ) + 4.0 * h2 * (*fc)(0,0,0))*c0; + else + twoGrid( ilevel-1 ); + + meshvar_bnd cc(*uc,false); + + //... compute correction on coarse grid + #pragma omp parallel for + for( int ix=0; ix<(int)cc.size(0); ++ix ) + for( int iy=0; iy<(int)cc.size(1); ++iy ) + for( int iz=0; iz<(int)cc.size(2); ++iz ) + cc(ix,iy,iz) = (*uc)(ix,iy,iz) - ucsave(ix,iy,iz); + + ucsave.deallocate(); + + + //... prolongate correction to fine grid + meshvar_bnd cf(*uf,false); + m_gridop.prolong( cc, cf ); + + cc.deallocate(); + + + #pragma omp parallel for + for( int ix=0; ix m_ilevelmin ) + // interp_coarse_fine(ilevel, *uc, *uf ); + + //... do smoothing sweeps with specified solver + for( unsigned i=0; i m_ilevelmin ) + interp_coarse_fine(ilevel, *uc, *uf ); + + if( m_smoother == opt::sm_gauss_seidel ) + GaussSeidel( h, uf, ff ); + + else if( m_smoother == opt::sm_jacobi ) + Jacobi( h, uf, ff); + + else if( m_smoother == opt::sm_sor ) + SOR( h, uf, ff ); + + if( m_bperiodic && ilevel <= m_ilevelmin ) + make_periodic( uf ); + + } +} + +template< class S, class O, typename T > +double solver::compute_error( const MeshvarBnd& u, const MeshvarBnd& unew ) +{ + int + nx = u.size(0), + ny = u.size(1), + nz = u.size(2); + + double err = 0.0; + unsigned count = 0; + +#pragma omp parallel for reduction(+:err,count) + for( int ix=0; ix 0.0 )//&& u(ix,iy,iz) != unew(ix,iy,iz) ) + { + err += fabs(1.0 - u(ix,iy,iz)/unew(ix,iy,iz)); + ++count; + } + + if( count != 0 ) + err /= count; + + return err; +} + +template< class S, class O, typename T > +double solver::compute_error( const GridHierarchy& uh, const GridHierarchy& uhnew, bool verbose ) +{ + double maxerr = 0.0; + + for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel ) + { + double err = 0.0; + err = compute_error( *uh.get_grid(ilevel), *uhnew.get_grid(ilevel) ); + + if( verbose ) + std::cout << " Level " << std::setw(6) << ilevel << ", Error = " << err << std::endl; + maxerr = std::max(maxerr,err); + + } + return maxerr; +} + +template< class S, class O, typename T > +double solver::solve( GridHierarchy& uh, double acc, double h, bool verbose ) +{ + + double err; + + GridHierarchy uhnew(uh);//, fsave(*m_pf); + m_pu = &uh; + + unsigned niter = 0; + + //... iterate ...// + while (true) + { + + + twoGrid( uh.levelmax() ); + err = compute_error( *m_pu, uhnew, verbose ); + ++niter; + + if( verbose ){ + std::cout << "--> Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl; + std::cout << "-------------------------------------------------------------\n"; + } + + if( (niter > 1) && ((err < acc) || (niter > 20)) ) + break; + + uhnew = *m_pu; + //*m_pf = fsave; + } + + if( err > acc ) + std::cout << "Error : no convergence in Poisson solver" << std::endl; + else if( verbose ) + std::cout << " - Converged in " << niter << " steps to req. acc. of " << acc << std::endl; + + + //uh = uhnew; + //*m_pf = fsave; + return err; +} + +inline double interp2( double x1, double x2, double x3, double f1, double f2, double f3, double x ) +{ + double a,b,c; + a = (x1 * f3 - x3 * f1 - x2 * f3 - x1 * f2 + x2 * f1 + x3 * f2) / (x1 * x3 * x3 - x2 * x3 * x3 + x2 * x1 * x1 - x3 * x1 * x1 + x3 * x2 * x2 - x1 * x2 * x2); + b = -(x1 * x1 * f3 - x1 * x1 * f2 - f1 * x3 * x3 + f2 * x3 * x3 - x2 * x2 * f3 + f1 * x2 * x2) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3); + c = (x1 * x1 * x2 * f3 - x1 * x1 * x3 * f2 - x2 * x2 * x1 * f3 + f2 * x1 * x3 * x3 + x2 * x2 * x3 * f1 - f1 * x2 * x3 * x3) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3); + + return a*x*x+b*x+c; +} + +inline double interp2( double fleft, double fcenter, double fright, double x ) +{ + double a,b,c; + a = 0.5*(fleft+fright)-fcenter; + b = 0.5*(fright-fleft); + c = fcenter; + + return a*x*x+b*x+c; +} + + +inline double interp2left( double fleft, double fcenter, double fright ) +{ + double a,b,c; + a = (6.0*fright-10.0*fcenter+4.0*fleft)/15.0; + b = (-4.0*fleft+9.0*fright-5.0*fcenter)/15.0; + c = fcenter; + + return a-b+c; +} + +inline double interp2right( double fleft, double fcenter, double fright ) +{ + double a,b,c; + a = (6.0*fleft-10.0*fcenter+4.0*fright)/15.0; + b = (4.0*fright-9.0*fleft+5.0*fcenter)/15.0; + c = fcenter; + + return a+b+c; +} + +template< class S, class O, typename T > +void solver::interp_cubic( MeshvarBnd& coarse, MeshvarBnd& fine, int i, int j, int k, int itop, int jtop, int ktop ) +{ + MeshvarBnd &u = fine; + MeshvarBnd &utop = coarse; + + /* + u(i+0,j+0,k+0) = ( -125.*utop(itop-2,jtop-2,ktop-2) +875.*utop(itop-2,jtop-2,ktop-1) +2625.*utop(itop-2,jtop-2,ktop) + -175.*utop(itop-2,jtop-2,ktop+1) +875.*utop(itop-2,jtop-1,ktop-2) -6125.*utop(itop-2,jtop-1,ktop-1) + -18375.*utop(itop-2,jtop-1,ktop) +1225.*utop(itop-2,jtop-1,ktop+1) +2625.*utop(itop-2,jtop,ktop-2) + -18375.*utop(itop-2,jtop,ktop-1) -55125.*utop(itop-2,jtop,ktop) +3675.*utop(itop-2,jtop,ktop+1) + -175.*utop(itop-2,jtop+1,ktop-2) +1225.*utop(itop-2,jtop+1,ktop-1) +3675.*utop(itop-2,jtop+1,ktop) + -245.*utop(itop-2,jtop+1,ktop+1) +875.*utop(itop-1,jtop-2,ktop-2) -6125.*utop(itop-1,jtop-2,ktop-1) + -18375.*utop(itop-1,jtop-2,ktop) +1225.*utop(itop-1,jtop-2,ktop+1) -6125.*utop(itop-1,jtop-1,ktop-2) + +42875.*utop(itop-1,jtop-1,ktop-1) +128625.*utop(itop-1,jtop-1,ktop) -8575.*utop(itop-1,jtop-1,ktop+1) + -18375.*utop(itop-1,jtop,ktop-2) +128625.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) + -25725.*utop(itop-1,jtop,ktop+1) +1225.*utop(itop-1,jtop+1,ktop-2) -8575.*utop(itop-1,jtop+1,ktop-1) + -25725.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) +2625.*utop(itop,jtop-2,ktop-2) + -18375.*utop(itop,jtop-2,ktop-1) -55125.*utop(itop,jtop-2,ktop) +3675.*utop(itop,jtop-2,ktop+1) + -18375.*utop(itop,jtop-1,ktop-2) +128625.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) + -25725.*utop(itop,jtop-1,ktop+1) -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) + +1157625.*utop(itop,jtop,ktop) -77175.*utop(itop,jtop,ktop+1) +3675.*utop(itop,jtop+1,ktop-2) + -25725.*utop(itop,jtop+1,ktop-1) -77175.*utop(itop,jtop+1,ktop) +5145.*utop(itop,jtop+1,ktop+1) + -175.*utop(itop+1,jtop-2,ktop-2) +1225.*utop(itop+1,jtop-2,ktop-1) +3675.*utop(itop+1,jtop-2,ktop) + -245.*utop(itop+1,jtop-2,ktop+1) +1225.*utop(itop+1,jtop-1,ktop-2) -8575.*utop(itop+1,jtop-1,ktop-1) + -25725.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) +3675.*utop(itop+1,jtop,ktop-2) + -25725.*utop(itop+1,jtop,ktop-1) -77175.*utop(itop+1,jtop,ktop) +5145.*utop(itop+1,jtop,ktop+1) + -245.*utop(itop+1,jtop+1,ktop-2) +1715.*utop(itop+1,jtop+1,ktop-1) +5145.*utop(itop+1,jtop+1,ktop) + -343.*utop(itop+1,jtop+1,ktop+1) )/2097152.; + u(i+0,j+0,k+1) = ( -175.*utop(itop-2,jtop-2,ktop-1) +2625.*utop(itop-2,jtop-2,ktop) +875.*utop(itop-2,jtop-2,ktop+1) + -125.*utop(itop-2,jtop-2,ktop+2) +1225.*utop(itop-2,jtop-1,ktop-1) -18375.*utop(itop-2,jtop-1,ktop) + -6125.*utop(itop-2,jtop-1,ktop+1) +875.*utop(itop-2,jtop-1,ktop+2) +3675.*utop(itop-2,jtop,ktop-1) + -55125.*utop(itop-2,jtop,ktop) -18375.*utop(itop-2,jtop,ktop+1) +2625.*utop(itop-2,jtop,ktop+2) + -245.*utop(itop-2,jtop+1,ktop-1) +3675.*utop(itop-2,jtop+1,ktop) +1225.*utop(itop-2,jtop+1,ktop+1) + -175.*utop(itop-2,jtop+1,ktop+2) +1225.*utop(itop-1,jtop-2,ktop-1) -18375.*utop(itop-1,jtop-2,ktop) + -6125.*utop(itop-1,jtop-2,ktop+1) +875.*utop(itop-1,jtop-2,ktop+2) -8575.*utop(itop-1,jtop-1,ktop-1) + +128625.*utop(itop-1,jtop-1,ktop) +42875.*utop(itop-1,jtop-1,ktop+1) -6125.*utop(itop-1,jtop-1,ktop+2) + -25725.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) +128625.*utop(itop-1,jtop,ktop+1) + -18375.*utop(itop-1,jtop,ktop+2) +1715.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) + -8575.*utop(itop-1,jtop+1,ktop+1) +1225.*utop(itop-1,jtop+1,ktop+2) +3675.*utop(itop,jtop-2,ktop-1) + -55125.*utop(itop,jtop-2,ktop) -18375.*utop(itop,jtop-2,ktop+1) +2625.*utop(itop,jtop-2,ktop+2) + -25725.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) +128625.*utop(itop,jtop-1,ktop+1) + -18375.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) + +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) +5145.*utop(itop,jtop+1,ktop-1) + -77175.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +3675.*utop(itop,jtop+1,ktop+2) + -245.*utop(itop+1,jtop-2,ktop-1) +3675.*utop(itop+1,jtop-2,ktop) +1225.*utop(itop+1,jtop-2,ktop+1) + -175.*utop(itop+1,jtop-2,ktop+2) +1715.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) + -8575.*utop(itop+1,jtop-1,ktop+1) +1225.*utop(itop+1,jtop-1,ktop+2) +5145.*utop(itop+1,jtop,ktop-1) + -77175.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) +3675.*utop(itop+1,jtop,ktop+2) + -343.*utop(itop+1,jtop+1,ktop-1) +5145.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) + -245.*utop(itop+1,jtop+1,ktop+2) )/2097152.; + u(i+0,j+1,k+0) = ( -175.*utop(itop-2,jtop-1,ktop-2) +1225.*utop(itop-2,jtop-1,ktop-1) +3675.*utop(itop-2,jtop-1,ktop) + -245.*utop(itop-2,jtop-1,ktop+1) +2625.*utop(itop-2,jtop,ktop-2) -18375.*utop(itop-2,jtop,ktop-1) + -55125.*utop(itop-2,jtop,ktop) +3675.*utop(itop-2,jtop,ktop+1) +875.*utop(itop-2,jtop+1,ktop-2) + -6125.*utop(itop-2,jtop+1,ktop-1) -18375.*utop(itop-2,jtop+1,ktop) +1225.*utop(itop-2,jtop+1,ktop+1) + -125.*utop(itop-2,jtop+2,ktop-2) +875.*utop(itop-2,jtop+2,ktop-1) +2625.*utop(itop-2,jtop+2,ktop) + -175.*utop(itop-2,jtop+2,ktop+1) +1225.*utop(itop-1,jtop-1,ktop-2) -8575.*utop(itop-1,jtop-1,ktop-1) + -25725.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) -18375.*utop(itop-1,jtop,ktop-2) + +128625.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) + -6125.*utop(itop-1,jtop+1,ktop-2) +42875.*utop(itop-1,jtop+1,ktop-1) +128625.*utop(itop-1,jtop+1,ktop) + -8575.*utop(itop-1,jtop+1,ktop+1) +875.*utop(itop-1,jtop+2,ktop-2) -6125.*utop(itop-1,jtop+2,ktop-1) + -18375.*utop(itop-1,jtop+2,ktop) +1225.*utop(itop-1,jtop+2,ktop+1) +3675.*utop(itop,jtop-1,ktop-2) + -25725.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) +5145.*utop(itop,jtop-1,ktop+1) + -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) + -77175.*utop(itop,jtop,ktop+1) -18375.*utop(itop,jtop+1,ktop-2) +128625.*utop(itop,jtop+1,ktop-1) + +385875.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +2625.*utop(itop,jtop+2,ktop-2) + -18375.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) +3675.*utop(itop,jtop+2,ktop+1) + -245.*utop(itop+1,jtop-1,ktop-2) +1715.*utop(itop+1,jtop-1,ktop-1) +5145.*utop(itop+1,jtop-1,ktop) + -343.*utop(itop+1,jtop-1,ktop+1) +3675.*utop(itop+1,jtop,ktop-2) -25725.*utop(itop+1,jtop,ktop-1) + -77175.*utop(itop+1,jtop,ktop) +5145.*utop(itop+1,jtop,ktop+1) +1225.*utop(itop+1,jtop+1,ktop-2) + -8575.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) + -175.*utop(itop+1,jtop+2,ktop-2) +1225.*utop(itop+1,jtop+2,ktop-1) +3675.*utop(itop+1,jtop+2,ktop) + -245.*utop(itop+1,jtop+2,ktop+1) )/2097152.; + u(i+0,j+1,k+1) = ( -245.*utop(itop-2,jtop-1,ktop-1) +3675.*utop(itop-2,jtop-1,ktop) +1225.*utop(itop-2,jtop-1,ktop+1) + -175.*utop(itop-2,jtop-1,ktop+2) +3675.*utop(itop-2,jtop,ktop-1) -55125.*utop(itop-2,jtop,ktop) + -18375.*utop(itop-2,jtop,ktop+1) +2625.*utop(itop-2,jtop,ktop+2) +1225.*utop(itop-2,jtop+1,ktop-1) + -18375.*utop(itop-2,jtop+1,ktop) -6125.*utop(itop-2,jtop+1,ktop+1) +875.*utop(itop-2,jtop+1,ktop+2) + -175.*utop(itop-2,jtop+2,ktop-1) +2625.*utop(itop-2,jtop+2,ktop) +875.*utop(itop-2,jtop+2,ktop+1) + -125.*utop(itop-2,jtop+2,ktop+2) +1715.*utop(itop-1,jtop-1,ktop-1) -25725.*utop(itop-1,jtop-1,ktop) + -8575.*utop(itop-1,jtop-1,ktop+1) +1225.*utop(itop-1,jtop-1,ktop+2) -25725.*utop(itop-1,jtop,ktop-1) + +385875.*utop(itop-1,jtop,ktop) +128625.*utop(itop-1,jtop,ktop+1) -18375.*utop(itop-1,jtop,ktop+2) + -8575.*utop(itop-1,jtop+1,ktop-1) +128625.*utop(itop-1,jtop+1,ktop) +42875.*utop(itop-1,jtop+1,ktop+1) + -6125.*utop(itop-1,jtop+1,ktop+2) +1225.*utop(itop-1,jtop+2,ktop-1) -18375.*utop(itop-1,jtop+2,ktop) + -6125.*utop(itop-1,jtop+2,ktop+1) +875.*utop(itop-1,jtop+2,ktop+2) +5145.*utop(itop,jtop-1,ktop-1) + -77175.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) +3675.*utop(itop,jtop-1,ktop+2) + -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) + -55125.*utop(itop,jtop,ktop+2) -25725.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) + +128625.*utop(itop,jtop+1,ktop+1) -18375.*utop(itop,jtop+1,ktop+2) +3675.*utop(itop,jtop+2,ktop-1) + -55125.*utop(itop,jtop+2,ktop) -18375.*utop(itop,jtop+2,ktop+1) +2625.*utop(itop,jtop+2,ktop+2) + -343.*utop(itop+1,jtop-1,ktop-1) +5145.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) + -245.*utop(itop+1,jtop-1,ktop+2) +5145.*utop(itop+1,jtop,ktop-1) -77175.*utop(itop+1,jtop,ktop) + -25725.*utop(itop+1,jtop,ktop+1) +3675.*utop(itop+1,jtop,ktop+2) +1715.*utop(itop+1,jtop+1,ktop-1) + -25725.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +1225.*utop(itop+1,jtop+1,ktop+2) + -245.*utop(itop+1,jtop+2,ktop-1) +3675.*utop(itop+1,jtop+2,ktop) +1225.*utop(itop+1,jtop+2,ktop+1) + -175.*utop(itop+1,jtop+2,ktop+2) )/2097152.; + u(i+1,j+0,k+0) = ( -175.*utop(itop-1,jtop-2,ktop-2) +1225.*utop(itop-1,jtop-2,ktop-1) +3675.*utop(itop-1,jtop-2,ktop) + -245.*utop(itop-1,jtop-2,ktop+1) +1225.*utop(itop-1,jtop-1,ktop-2) -8575.*utop(itop-1,jtop-1,ktop-1) + -25725.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) +3675.*utop(itop-1,jtop,ktop-2) + -25725.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) +5145.*utop(itop-1,jtop,ktop+1) + -245.*utop(itop-1,jtop+1,ktop-2) +1715.*utop(itop-1,jtop+1,ktop-1) +5145.*utop(itop-1,jtop+1,ktop) + -343.*utop(itop-1,jtop+1,ktop+1) +2625.*utop(itop,jtop-2,ktop-2) -18375.*utop(itop,jtop-2,ktop-1) + -55125.*utop(itop,jtop-2,ktop) +3675.*utop(itop,jtop-2,ktop+1) -18375.*utop(itop,jtop-1,ktop-2) + +128625.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) + -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) + -77175.*utop(itop,jtop,ktop+1) +3675.*utop(itop,jtop+1,ktop-2) -25725.*utop(itop,jtop+1,ktop-1) + -77175.*utop(itop,jtop+1,ktop) +5145.*utop(itop,jtop+1,ktop+1) +875.*utop(itop+1,jtop-2,ktop-2) + -6125.*utop(itop+1,jtop-2,ktop-1) -18375.*utop(itop+1,jtop-2,ktop) +1225.*utop(itop+1,jtop-2,ktop+1) + -6125.*utop(itop+1,jtop-1,ktop-2) +42875.*utop(itop+1,jtop-1,ktop-1) +128625.*utop(itop+1,jtop-1,ktop) + -8575.*utop(itop+1,jtop-1,ktop+1) -18375.*utop(itop+1,jtop,ktop-2) +128625.*utop(itop+1,jtop,ktop-1) + +385875.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) +1225.*utop(itop+1,jtop+1,ktop-2) + -8575.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) + -125.*utop(itop+2,jtop-2,ktop-2) +875.*utop(itop+2,jtop-2,ktop-1) +2625.*utop(itop+2,jtop-2,ktop) + -175.*utop(itop+2,jtop-2,ktop+1) +875.*utop(itop+2,jtop-1,ktop-2) -6125.*utop(itop+2,jtop-1,ktop-1) + -18375.*utop(itop+2,jtop-1,ktop) +1225.*utop(itop+2,jtop-1,ktop+1) +2625.*utop(itop+2,jtop,ktop-2) + -18375.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) +3675.*utop(itop+2,jtop,ktop+1) + -175.*utop(itop+2,jtop+1,ktop-2) +1225.*utop(itop+2,jtop+1,ktop-1) +3675.*utop(itop+2,jtop+1,ktop) + -245.*utop(itop+2,jtop+1,ktop+1) )/2097152.; + u(i+1,j+0,k+1) = ( -245.*utop(itop-1,jtop-2,ktop-1) +3675.*utop(itop-1,jtop-2,ktop) +1225.*utop(itop-1,jtop-2,ktop+1) -175.*utop(itop-1,jtop-2,ktop+2) +1715.*utop(itop-1,jtop-1,ktop-1) -25725.*utop(itop-1,jtop-1,ktop) -8575.*utop(itop-1,jtop-1,ktop+1) +1225.*utop(itop-1,jtop-1,ktop+2) +5145.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) +3675.*utop(itop-1,jtop,ktop+2) -343.*utop(itop-1,jtop+1,ktop-1) +5145.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) -245.*utop(itop-1,jtop+1,ktop+2) +3675.*utop(itop,jtop-2,ktop-1) -55125.*utop(itop,jtop-2,ktop) -18375.*utop(itop,jtop-2,ktop+1) +2625.*utop(itop,jtop-2,ktop+2) -25725.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) +128625.*utop(itop,jtop-1,ktop+1) -18375.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) +5145.*utop(itop,jtop+1,ktop-1) -77175.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +3675.*utop(itop,jtop+1,ktop+2) +1225.*utop(itop+1,jtop-2,ktop-1) -18375.*utop(itop+1,jtop-2,ktop) -6125.*utop(itop+1,jtop-2,ktop+1) +875.*utop(itop+1,jtop-2,ktop+2) -8575.*utop(itop+1,jtop-1,ktop-1) +128625.*utop(itop+1,jtop-1,ktop) +42875.*utop(itop+1,jtop-1,ktop+1) -6125.*utop(itop+1,jtop-1,ktop+2) -25725.*utop(itop+1,jtop,ktop-1) +385875.*utop(itop+1,jtop,ktop) +128625.*utop(itop+1,jtop,ktop+1) -18375.*utop(itop+1,jtop,ktop+2) +1715.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +1225.*utop(itop+1,jtop+1,ktop+2) -175.*utop(itop+2,jtop-2,ktop-1) +2625.*utop(itop+2,jtop-2,ktop) +875.*utop(itop+2,jtop-2,ktop+1) -125.*utop(itop+2,jtop-2,ktop+2) +1225.*utop(itop+2,jtop-1,ktop-1) -18375.*utop(itop+2,jtop-1,ktop) -6125.*utop(itop+2,jtop-1,ktop+1) +875.*utop(itop+2,jtop-1,ktop+2) +3675.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) -18375.*utop(itop+2,jtop,ktop+1) +2625.*utop(itop+2,jtop,ktop+2) -245.*utop(itop+2,jtop+1,ktop-1) +3675.*utop(itop+2,jtop+1,ktop) +1225.*utop(itop+2,jtop+1,ktop+1) -175.*utop(itop+2,jtop+1,ktop+2) )/2097152.; + u(i+1,j+1,k+0) = ( -245.*utop(itop-1,jtop-1,ktop-2) +1715.*utop(itop-1,jtop-1,ktop-1) +5145.*utop(itop-1,jtop-1,ktop) -343.*utop(itop-1,jtop-1,ktop+1) +3675.*utop(itop-1,jtop,ktop-2) -25725.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) +5145.*utop(itop-1,jtop,ktop+1) +1225.*utop(itop-1,jtop+1,ktop-2) -8575.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) -175.*utop(itop-1,jtop+2,ktop-2) +1225.*utop(itop-1,jtop+2,ktop-1) +3675.*utop(itop-1,jtop+2,ktop) -245.*utop(itop-1,jtop+2,ktop+1) +3675.*utop(itop,jtop-1,ktop-2) -25725.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) +5145.*utop(itop,jtop-1,ktop+1) -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) -77175.*utop(itop,jtop,ktop+1) -18375.*utop(itop,jtop+1,ktop-2) +128625.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +2625.*utop(itop,jtop+2,ktop-2) -18375.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) +3675.*utop(itop,jtop+2,ktop+1) +1225.*utop(itop+1,jtop-1,ktop-2) -8575.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) -18375.*utop(itop+1,jtop,ktop-2) +128625.*utop(itop+1,jtop,ktop-1) +385875.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) -6125.*utop(itop+1,jtop+1,ktop-2) +42875.*utop(itop+1,jtop+1,ktop-1) +128625.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +875.*utop(itop+1,jtop+2,ktop-2) -6125.*utop(itop+1,jtop+2,ktop-1) -18375.*utop(itop+1,jtop+2,ktop) +1225.*utop(itop+1,jtop+2,ktop+1) -175.*utop(itop+2,jtop-1,ktop-2) +1225.*utop(itop+2,jtop-1,ktop-1) +3675.*utop(itop+2,jtop-1,ktop) -245.*utop(itop+2,jtop-1,ktop+1) +2625.*utop(itop+2,jtop,ktop-2) -18375.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) +3675.*utop(itop+2,jtop,ktop+1) +875.*utop(itop+2,jtop+1,ktop-2) -6125.*utop(itop+2,jtop+1,ktop-1) -18375.*utop(itop+2,jtop+1,ktop) +1225.*utop(itop+2,jtop+1,ktop+1) -125.*utop(itop+2,jtop+2,ktop-2) +875.*utop(itop+2,jtop+2,ktop-1) +2625.*utop(itop+2,jtop+2,ktop) -175.*utop(itop+2,jtop+2,ktop+1) )/2097152.; + u(i+1,j+1,k+1) = ( -343.*utop(itop-1,jtop-1,ktop-1) +5145.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) -245.*utop(itop-1,jtop-1,ktop+2) +5145.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) +3675.*utop(itop-1,jtop,ktop+2) +1715.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) -8575.*utop(itop-1,jtop+1,ktop+1) +1225.*utop(itop-1,jtop+1,ktop+2) -245.*utop(itop-1,jtop+2,ktop-1) +3675.*utop(itop-1,jtop+2,ktop) +1225.*utop(itop-1,jtop+2,ktop+1) -175.*utop(itop-1,jtop+2,ktop+2) +5145.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) +3675.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) -25725.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) +128625.*utop(itop,jtop+1,ktop+1) -18375.*utop(itop,jtop+1,ktop+2) +3675.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) -18375.*utop(itop,jtop+2,ktop+1) +2625.*utop(itop,jtop+2,ktop+2) +1715.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) -8575.*utop(itop+1,jtop-1,ktop+1) +1225.*utop(itop+1,jtop-1,ktop+2) -25725.*utop(itop+1,jtop,ktop-1) +385875.*utop(itop+1,jtop,ktop) +128625.*utop(itop+1,jtop,ktop+1) -18375.*utop(itop+1,jtop,ktop+2) -8575.*utop(itop+1,jtop+1,ktop-1) +128625.*utop(itop+1,jtop+1,ktop) +42875.*utop(itop+1,jtop+1,ktop+1) -6125.*utop(itop+1,jtop+1,ktop+2) +1225.*utop(itop+1,jtop+2,ktop-1) -18375.*utop(itop+1,jtop+2,ktop) -6125.*utop(itop+1,jtop+2,ktop+1) +875.*utop(itop+1,jtop+2,ktop+2) -245.*utop(itop+2,jtop-1,ktop-1) +3675.*utop(itop+2,jtop-1,ktop) +1225.*utop(itop+2,jtop-1,ktop+1) -175.*utop(itop+2,jtop-1,ktop+2) +3675.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) -18375.*utop(itop+2,jtop,ktop+1) +2625.*utop(itop+2,jtop,ktop+2) +1225.*utop(itop+2,jtop+1,ktop-1) -18375.*utop(itop+2,jtop+1,ktop) -6125.*utop(itop+2,jtop+1,ktop+1) +875.*utop(itop+2,jtop+1,ktop+2) -175.*utop(itop+2,jtop+2,ktop-1) +2625.*utop(itop+2,jtop+2,ktop) +875.*utop(itop+2,jtop+2,ktop+1) -125.*utop(itop+2,jtop+2,ktop+2) )/2097152.; + */ + + u(i+0,j+0,k+0) = ( -1.060835e-05*utop(itop-2,jtop-2,ktop-2) +9.901123e-05*utop(itop-2,jtop-2,ktop-1) +4.455505e-04*utop(itop-2,jtop-2,ktop) -5.940674e-05*utop(itop-2,jtop-2,ktop+1) +8.250936e-06*utop(itop-2,jtop-2,ktop+2) +9.901123e-05*utop(itop-2,jtop-1,ktop-2) -9.241048e-04*utop(itop-2,jtop-1,ktop-1) -4.158472e-03*utop(itop-2,jtop-1,ktop) +5.544629e-04*utop(itop-2,jtop-1,ktop+1) -7.700874e-05*utop(itop-2,jtop-1,ktop+2) +4.455505e-04*utop(itop-2,jtop,ktop-2) -4.158472e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) +2.495083e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) -5.940674e-05*utop(itop-2,jtop+1,ktop-2) +5.544629e-04*utop(itop-2,jtop+1,ktop-1) +2.495083e-03*utop(itop-2,jtop+1,ktop) -3.326777e-04*utop(itop-2,jtop+1,ktop+1) +4.620524e-05*utop(itop-2,jtop+1,ktop+2) +8.250936e-06*utop(itop-2,jtop+2,ktop-2) -7.700874e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) +4.620524e-05*utop(itop-2,jtop+2,ktop+1) -6.417395e-06*utop(itop-2,jtop+2,ktop+2) +9.901123e-05*utop(itop-1,jtop-2,ktop-2) -9.241048e-04*utop(itop-1,jtop-2,ktop-1) -4.158472e-03*utop(itop-1,jtop-2,ktop) +5.544629e-04*utop(itop-1,jtop-2,ktop+1) -7.700874e-05*utop(itop-1,jtop-2,ktop+2) -9.241048e-04*utop(itop-1,jtop-1,ktop-2) +8.624978e-03*utop(itop-1,jtop-1,ktop-1) +3.881240e-02*utop(itop-1,jtop-1,ktop) -5.174987e-03*utop(itop-1,jtop-1,ktop+1) +7.187482e-04*utop(itop-1,jtop-1,ktop+2) -4.158472e-03*utop(itop-1,jtop,ktop-2) +3.881240e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +3.234367e-03*utop(itop-1,jtop,ktop+2) +5.544629e-04*utop(itop-1,jtop+1,ktop-2) -5.174987e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) +3.104992e-03*utop(itop-1,jtop+1,ktop+1) -4.312489e-04*utop(itop-1,jtop+1,ktop+2) -7.700874e-05*utop(itop-1,jtop+2,ktop-2) +7.187482e-04*utop(itop-1,jtop+2,ktop-1) +3.234367e-03*utop(itop-1,jtop+2,ktop) -4.312489e-04*utop(itop-1,jtop+2,ktop+1) +5.989568e-05*utop(itop-1,jtop+2,ktop+2) +4.455505e-04*utop(itop,jtop-2,ktop-2) -4.158472e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) +2.495083e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -4.158472e-03*utop(itop,jtop-1,ktop-2) +3.881240e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +3.234367e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) +2.495083e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) +1.397246e-02*utop(itop,jtop+1,ktop+1) -1.940620e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +3.234367e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) -1.940620e-03*utop(itop,jtop+2,ktop+1) +2.695306e-04*utop(itop,jtop+2,ktop+2) -5.940674e-05*utop(itop+1,jtop-2,ktop-2) +5.544629e-04*utop(itop+1,jtop-2,ktop-1) +2.495083e-03*utop(itop+1,jtop-2,ktop) -3.326777e-04*utop(itop+1,jtop-2,ktop+1) +4.620524e-05*utop(itop+1,jtop-2,ktop+2) +5.544629e-04*utop(itop+1,jtop-1,ktop-2) -5.174987e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) +3.104992e-03*utop(itop+1,jtop-1,ktop+1) -4.312489e-04*utop(itop+1,jtop-1,ktop+2) +2.495083e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) +1.397246e-02*utop(itop+1,jtop,ktop+1) -1.940620e-03*utop(itop+1,jtop,ktop+2) -3.326777e-04*utop(itop+1,jtop+1,ktop-2) +3.104992e-03*utop(itop+1,jtop+1,ktop-1) +1.397246e-02*utop(itop+1,jtop+1,ktop) -1.862995e-03*utop(itop+1,jtop+1,ktop+1) +2.587494e-04*utop(itop+1,jtop+1,ktop+2) +4.620524e-05*utop(itop+1,jtop+2,ktop-2) -4.312489e-04*utop(itop+1,jtop+2,ktop-1) -1.940620e-03*utop(itop+1,jtop+2,ktop) +2.587494e-04*utop(itop+1,jtop+2,ktop+1) -3.593741e-05*utop(itop+1,jtop+2,ktop+2) +8.250936e-06*utop(itop+2,jtop-2,ktop-2) -7.700874e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) +4.620524e-05*utop(itop+2,jtop-2,ktop+1) -6.417395e-06*utop(itop+2,jtop-2,ktop+2) -7.700874e-05*utop(itop+2,jtop-1,ktop-2) +7.187482e-04*utop(itop+2,jtop-1,ktop-1) +3.234367e-03*utop(itop+2,jtop-1,ktop) -4.312489e-04*utop(itop+2,jtop-1,ktop+1) +5.989568e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +3.234367e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) -1.940620e-03*utop(itop+2,jtop,ktop+1) +2.695306e-04*utop(itop+2,jtop,ktop+2) +4.620524e-05*utop(itop+2,jtop+1,ktop-2) -4.312489e-04*utop(itop+2,jtop+1,ktop-1) -1.940620e-03*utop(itop+2,jtop+1,ktop) +2.587494e-04*utop(itop+2,jtop+1,ktop+1) -3.593741e-05*utop(itop+2,jtop+1,ktop+2) -6.417395e-06*utop(itop+2,jtop+2,ktop-2) +5.989568e-05*utop(itop+2,jtop+2,ktop-1) +2.695306e-04*utop(itop+2,jtop+2,ktop) -3.593741e-05*utop(itop+2,jtop+2,ktop+1) +4.991307e-06*utop(itop+2,jtop+2,ktop+2)); + u(i+0,j+0,k+1) = ( +8.250936e-06*utop(itop-2,jtop-2,ktop-2) -5.940674e-05*utop(itop-2,jtop-2,ktop-1) +4.455505e-04*utop(itop-2,jtop-2,ktop) +9.901123e-05*utop(itop-2,jtop-2,ktop+1) -1.060835e-05*utop(itop-2,jtop-2,ktop+2) -7.700874e-05*utop(itop-2,jtop-1,ktop-2) +5.544629e-04*utop(itop-2,jtop-1,ktop-1) -4.158472e-03*utop(itop-2,jtop-1,ktop) -9.241048e-04*utop(itop-2,jtop-1,ktop+1) +9.901123e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +2.495083e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) -4.158472e-03*utop(itop-2,jtop,ktop+1) +4.455505e-04*utop(itop-2,jtop,ktop+2) +4.620524e-05*utop(itop-2,jtop+1,ktop-2) -3.326777e-04*utop(itop-2,jtop+1,ktop-1) +2.495083e-03*utop(itop-2,jtop+1,ktop) +5.544629e-04*utop(itop-2,jtop+1,ktop+1) -5.940674e-05*utop(itop-2,jtop+1,ktop+2) -6.417395e-06*utop(itop-2,jtop+2,ktop-2) +4.620524e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) -7.700874e-05*utop(itop-2,jtop+2,ktop+1) +8.250936e-06*utop(itop-2,jtop+2,ktop+2) -7.700874e-05*utop(itop-1,jtop-2,ktop-2) +5.544629e-04*utop(itop-1,jtop-2,ktop-1) -4.158472e-03*utop(itop-1,jtop-2,ktop) -9.241048e-04*utop(itop-1,jtop-2,ktop+1) +9.901123e-05*utop(itop-1,jtop-2,ktop+2) +7.187482e-04*utop(itop-1,jtop-1,ktop-2) -5.174987e-03*utop(itop-1,jtop-1,ktop-1) +3.881240e-02*utop(itop-1,jtop-1,ktop) +8.624978e-03*utop(itop-1,jtop-1,ktop+1) -9.241048e-04*utop(itop-1,jtop-1,ktop+2) +3.234367e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) +3.881240e-02*utop(itop-1,jtop,ktop+1) -4.158472e-03*utop(itop-1,jtop,ktop+2) -4.312489e-04*utop(itop-1,jtop+1,ktop-2) +3.104992e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) -5.174987e-03*utop(itop-1,jtop+1,ktop+1) +5.544629e-04*utop(itop-1,jtop+1,ktop+2) +5.989568e-05*utop(itop-1,jtop+2,ktop-2) -4.312489e-04*utop(itop-1,jtop+2,ktop-1) +3.234367e-03*utop(itop-1,jtop+2,ktop) +7.187482e-04*utop(itop-1,jtop+2,ktop+1) -7.700874e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +2.495083e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) -4.158472e-03*utop(itop,jtop-2,ktop+1) +4.455505e-04*utop(itop,jtop-2,ktop+2) +3.234367e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) +3.881240e-02*utop(itop,jtop-1,ktop+1) -4.158472e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) -1.940620e-03*utop(itop,jtop+1,ktop-2) +1.397246e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +2.495083e-03*utop(itop,jtop+1,ktop+2) +2.695306e-04*utop(itop,jtop+2,ktop-2) -1.940620e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) +3.234367e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) +4.620524e-05*utop(itop+1,jtop-2,ktop-2) -3.326777e-04*utop(itop+1,jtop-2,ktop-1) +2.495083e-03*utop(itop+1,jtop-2,ktop) +5.544629e-04*utop(itop+1,jtop-2,ktop+1) -5.940674e-05*utop(itop+1,jtop-2,ktop+2) -4.312489e-04*utop(itop+1,jtop-1,ktop-2) +3.104992e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) -5.174987e-03*utop(itop+1,jtop-1,ktop+1) +5.544629e-04*utop(itop+1,jtop-1,ktop+2) -1.940620e-03*utop(itop+1,jtop,ktop-2) +1.397246e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +2.495083e-03*utop(itop+1,jtop,ktop+2) +2.587494e-04*utop(itop+1,jtop+1,ktop-2) -1.862995e-03*utop(itop+1,jtop+1,ktop-1) +1.397246e-02*utop(itop+1,jtop+1,ktop) +3.104992e-03*utop(itop+1,jtop+1,ktop+1) -3.326777e-04*utop(itop+1,jtop+1,ktop+2) -3.593741e-05*utop(itop+1,jtop+2,ktop-2) +2.587494e-04*utop(itop+1,jtop+2,ktop-1) -1.940620e-03*utop(itop+1,jtop+2,ktop) -4.312489e-04*utop(itop+1,jtop+2,ktop+1) +4.620524e-05*utop(itop+1,jtop+2,ktop+2) -6.417395e-06*utop(itop+2,jtop-2,ktop-2) +4.620524e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) -7.700874e-05*utop(itop+2,jtop-2,ktop+1) +8.250936e-06*utop(itop+2,jtop-2,ktop+2) +5.989568e-05*utop(itop+2,jtop-1,ktop-2) -4.312489e-04*utop(itop+2,jtop-1,ktop-1) +3.234367e-03*utop(itop+2,jtop-1,ktop) +7.187482e-04*utop(itop+2,jtop-1,ktop+1) -7.700874e-05*utop(itop+2,jtop-1,ktop+2) +2.695306e-04*utop(itop+2,jtop,ktop-2) -1.940620e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) +3.234367e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) -3.593741e-05*utop(itop+2,jtop+1,ktop-2) +2.587494e-04*utop(itop+2,jtop+1,ktop-1) -1.940620e-03*utop(itop+2,jtop+1,ktop) -4.312489e-04*utop(itop+2,jtop+1,ktop+1) +4.620524e-05*utop(itop+2,jtop+1,ktop+2) +4.991307e-06*utop(itop+2,jtop+2,ktop-2) -3.593741e-05*utop(itop+2,jtop+2,ktop-1) +2.695306e-04*utop(itop+2,jtop+2,ktop) +5.989568e-05*utop(itop+2,jtop+2,ktop+1) -6.417395e-06*utop(itop+2,jtop+2,ktop+2)); + u(i+0,j+1,k+0) = ( +8.250936e-06*utop(itop-2,jtop-2,ktop-2) -7.700874e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) +4.620524e-05*utop(itop-2,jtop-2,ktop+1) -6.417395e-06*utop(itop-2,jtop-2,ktop+2) -5.940674e-05*utop(itop-2,jtop-1,ktop-2) +5.544629e-04*utop(itop-2,jtop-1,ktop-1) +2.495083e-03*utop(itop-2,jtop-1,ktop) -3.326777e-04*utop(itop-2,jtop-1,ktop+1) +4.620524e-05*utop(itop-2,jtop-1,ktop+2) +4.455505e-04*utop(itop-2,jtop,ktop-2) -4.158472e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) +2.495083e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) +9.901123e-05*utop(itop-2,jtop+1,ktop-2) -9.241048e-04*utop(itop-2,jtop+1,ktop-1) -4.158472e-03*utop(itop-2,jtop+1,ktop) +5.544629e-04*utop(itop-2,jtop+1,ktop+1) -7.700874e-05*utop(itop-2,jtop+1,ktop+2) -1.060835e-05*utop(itop-2,jtop+2,ktop-2) +9.901123e-05*utop(itop-2,jtop+2,ktop-1) +4.455505e-04*utop(itop-2,jtop+2,ktop) -5.940674e-05*utop(itop-2,jtop+2,ktop+1) +8.250936e-06*utop(itop-2,jtop+2,ktop+2) -7.700874e-05*utop(itop-1,jtop-2,ktop-2) +7.187482e-04*utop(itop-1,jtop-2,ktop-1) +3.234367e-03*utop(itop-1,jtop-2,ktop) -4.312489e-04*utop(itop-1,jtop-2,ktop+1) +5.989568e-05*utop(itop-1,jtop-2,ktop+2) +5.544629e-04*utop(itop-1,jtop-1,ktop-2) -5.174987e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) +3.104992e-03*utop(itop-1,jtop-1,ktop+1) -4.312489e-04*utop(itop-1,jtop-1,ktop+2) -4.158472e-03*utop(itop-1,jtop,ktop-2) +3.881240e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +3.234367e-03*utop(itop-1,jtop,ktop+2) -9.241048e-04*utop(itop-1,jtop+1,ktop-2) +8.624978e-03*utop(itop-1,jtop+1,ktop-1) +3.881240e-02*utop(itop-1,jtop+1,ktop) -5.174987e-03*utop(itop-1,jtop+1,ktop+1) +7.187482e-04*utop(itop-1,jtop+1,ktop+2) +9.901123e-05*utop(itop-1,jtop+2,ktop-2) -9.241048e-04*utop(itop-1,jtop+2,ktop-1) -4.158472e-03*utop(itop-1,jtop+2,ktop) +5.544629e-04*utop(itop-1,jtop+2,ktop+1) -7.700874e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +3.234367e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) -1.940620e-03*utop(itop,jtop-2,ktop+1) +2.695306e-04*utop(itop,jtop-2,ktop+2) +2.495083e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) +1.397246e-02*utop(itop,jtop-1,ktop+1) -1.940620e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) -4.158472e-03*utop(itop,jtop+1,ktop-2) +3.881240e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +3.234367e-03*utop(itop,jtop+1,ktop+2) +4.455505e-04*utop(itop,jtop+2,ktop-2) -4.158472e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) +2.495083e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) +4.620524e-05*utop(itop+1,jtop-2,ktop-2) -4.312489e-04*utop(itop+1,jtop-2,ktop-1) -1.940620e-03*utop(itop+1,jtop-2,ktop) +2.587494e-04*utop(itop+1,jtop-2,ktop+1) -3.593741e-05*utop(itop+1,jtop-2,ktop+2) -3.326777e-04*utop(itop+1,jtop-1,ktop-2) +3.104992e-03*utop(itop+1,jtop-1,ktop-1) +1.397246e-02*utop(itop+1,jtop-1,ktop) -1.862995e-03*utop(itop+1,jtop-1,ktop+1) +2.587494e-04*utop(itop+1,jtop-1,ktop+2) +2.495083e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) +1.397246e-02*utop(itop+1,jtop,ktop+1) -1.940620e-03*utop(itop+1,jtop,ktop+2) +5.544629e-04*utop(itop+1,jtop+1,ktop-2) -5.174987e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) +3.104992e-03*utop(itop+1,jtop+1,ktop+1) -4.312489e-04*utop(itop+1,jtop+1,ktop+2) -5.940674e-05*utop(itop+1,jtop+2,ktop-2) +5.544629e-04*utop(itop+1,jtop+2,ktop-1) +2.495083e-03*utop(itop+1,jtop+2,ktop) -3.326777e-04*utop(itop+1,jtop+2,ktop+1) +4.620524e-05*utop(itop+1,jtop+2,ktop+2) -6.417395e-06*utop(itop+2,jtop-2,ktop-2) +5.989568e-05*utop(itop+2,jtop-2,ktop-1) +2.695306e-04*utop(itop+2,jtop-2,ktop) -3.593741e-05*utop(itop+2,jtop-2,ktop+1) +4.991307e-06*utop(itop+2,jtop-2,ktop+2) +4.620524e-05*utop(itop+2,jtop-1,ktop-2) -4.312489e-04*utop(itop+2,jtop-1,ktop-1) -1.940620e-03*utop(itop+2,jtop-1,ktop) +2.587494e-04*utop(itop+2,jtop-1,ktop+1) -3.593741e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +3.234367e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) -1.940620e-03*utop(itop+2,jtop,ktop+1) +2.695306e-04*utop(itop+2,jtop,ktop+2) -7.700874e-05*utop(itop+2,jtop+1,ktop-2) +7.187482e-04*utop(itop+2,jtop+1,ktop-1) +3.234367e-03*utop(itop+2,jtop+1,ktop) -4.312489e-04*utop(itop+2,jtop+1,ktop+1) +5.989568e-05*utop(itop+2,jtop+1,ktop+2) +8.250936e-06*utop(itop+2,jtop+2,ktop-2) -7.700874e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) +4.620524e-05*utop(itop+2,jtop+2,ktop+1) -6.417395e-06*utop(itop+2,jtop+2,ktop+2)); + u(i+0,j+1,k+1) = ( -6.417395e-06*utop(itop-2,jtop-2,ktop-2) +4.620524e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) -7.700874e-05*utop(itop-2,jtop-2,ktop+1) +8.250936e-06*utop(itop-2,jtop-2,ktop+2) +4.620524e-05*utop(itop-2,jtop-1,ktop-2) -3.326777e-04*utop(itop-2,jtop-1,ktop-1) +2.495083e-03*utop(itop-2,jtop-1,ktop) +5.544629e-04*utop(itop-2,jtop-1,ktop+1) -5.940674e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +2.495083e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) -4.158472e-03*utop(itop-2,jtop,ktop+1) +4.455505e-04*utop(itop-2,jtop,ktop+2) -7.700874e-05*utop(itop-2,jtop+1,ktop-2) +5.544629e-04*utop(itop-2,jtop+1,ktop-1) -4.158472e-03*utop(itop-2,jtop+1,ktop) -9.241048e-04*utop(itop-2,jtop+1,ktop+1) +9.901123e-05*utop(itop-2,jtop+1,ktop+2) +8.250936e-06*utop(itop-2,jtop+2,ktop-2) -5.940674e-05*utop(itop-2,jtop+2,ktop-1) +4.455505e-04*utop(itop-2,jtop+2,ktop) +9.901123e-05*utop(itop-2,jtop+2,ktop+1) -1.060835e-05*utop(itop-2,jtop+2,ktop+2) +5.989568e-05*utop(itop-1,jtop-2,ktop-2) -4.312489e-04*utop(itop-1,jtop-2,ktop-1) +3.234367e-03*utop(itop-1,jtop-2,ktop) +7.187482e-04*utop(itop-1,jtop-2,ktop+1) -7.700874e-05*utop(itop-1,jtop-2,ktop+2) -4.312489e-04*utop(itop-1,jtop-1,ktop-2) +3.104992e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) -5.174987e-03*utop(itop-1,jtop-1,ktop+1) +5.544629e-04*utop(itop-1,jtop-1,ktop+2) +3.234367e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) +3.881240e-02*utop(itop-1,jtop,ktop+1) -4.158472e-03*utop(itop-1,jtop,ktop+2) +7.187482e-04*utop(itop-1,jtop+1,ktop-2) -5.174987e-03*utop(itop-1,jtop+1,ktop-1) +3.881240e-02*utop(itop-1,jtop+1,ktop) +8.624978e-03*utop(itop-1,jtop+1,ktop+1) -9.241048e-04*utop(itop-1,jtop+1,ktop+2) -7.700874e-05*utop(itop-1,jtop+2,ktop-2) +5.544629e-04*utop(itop-1,jtop+2,ktop-1) -4.158472e-03*utop(itop-1,jtop+2,ktop) -9.241048e-04*utop(itop-1,jtop+2,ktop+1) +9.901123e-05*utop(itop-1,jtop+2,ktop+2) +2.695306e-04*utop(itop,jtop-2,ktop-2) -1.940620e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) +3.234367e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -1.940620e-03*utop(itop,jtop-1,ktop-2) +1.397246e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +2.495083e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) +3.234367e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) +3.881240e-02*utop(itop,jtop+1,ktop+1) -4.158472e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +2.495083e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) -4.158472e-03*utop(itop,jtop+2,ktop+1) +4.455505e-04*utop(itop,jtop+2,ktop+2) -3.593741e-05*utop(itop+1,jtop-2,ktop-2) +2.587494e-04*utop(itop+1,jtop-2,ktop-1) -1.940620e-03*utop(itop+1,jtop-2,ktop) -4.312489e-04*utop(itop+1,jtop-2,ktop+1) +4.620524e-05*utop(itop+1,jtop-2,ktop+2) +2.587494e-04*utop(itop+1,jtop-1,ktop-2) -1.862995e-03*utop(itop+1,jtop-1,ktop-1) +1.397246e-02*utop(itop+1,jtop-1,ktop) +3.104992e-03*utop(itop+1,jtop-1,ktop+1) -3.326777e-04*utop(itop+1,jtop-1,ktop+2) -1.940620e-03*utop(itop+1,jtop,ktop-2) +1.397246e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +2.495083e-03*utop(itop+1,jtop,ktop+2) -4.312489e-04*utop(itop+1,jtop+1,ktop-2) +3.104992e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) -5.174987e-03*utop(itop+1,jtop+1,ktop+1) +5.544629e-04*utop(itop+1,jtop+1,ktop+2) +4.620524e-05*utop(itop+1,jtop+2,ktop-2) -3.326777e-04*utop(itop+1,jtop+2,ktop-1) +2.495083e-03*utop(itop+1,jtop+2,ktop) +5.544629e-04*utop(itop+1,jtop+2,ktop+1) -5.940674e-05*utop(itop+1,jtop+2,ktop+2) +4.991307e-06*utop(itop+2,jtop-2,ktop-2) -3.593741e-05*utop(itop+2,jtop-2,ktop-1) +2.695306e-04*utop(itop+2,jtop-2,ktop) +5.989568e-05*utop(itop+2,jtop-2,ktop+1) -6.417395e-06*utop(itop+2,jtop-2,ktop+2) -3.593741e-05*utop(itop+2,jtop-1,ktop-2) +2.587494e-04*utop(itop+2,jtop-1,ktop-1) -1.940620e-03*utop(itop+2,jtop-1,ktop) -4.312489e-04*utop(itop+2,jtop-1,ktop+1) +4.620524e-05*utop(itop+2,jtop-1,ktop+2) +2.695306e-04*utop(itop+2,jtop,ktop-2) -1.940620e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) +3.234367e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) +5.989568e-05*utop(itop+2,jtop+1,ktop-2) -4.312489e-04*utop(itop+2,jtop+1,ktop-1) +3.234367e-03*utop(itop+2,jtop+1,ktop) +7.187482e-04*utop(itop+2,jtop+1,ktop+1) -7.700874e-05*utop(itop+2,jtop+1,ktop+2) -6.417395e-06*utop(itop+2,jtop+2,ktop-2) +4.620524e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) -7.700874e-05*utop(itop+2,jtop+2,ktop+1) +8.250936e-06*utop(itop+2,jtop+2,ktop+2)); + u(i+1,j+0,k+0) = ( +8.250936e-06*utop(itop-2,jtop-2,ktop-2) -7.700874e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) +4.620524e-05*utop(itop-2,jtop-2,ktop+1) -6.417395e-06*utop(itop-2,jtop-2,ktop+2) -7.700874e-05*utop(itop-2,jtop-1,ktop-2) +7.187482e-04*utop(itop-2,jtop-1,ktop-1) +3.234367e-03*utop(itop-2,jtop-1,ktop) -4.312489e-04*utop(itop-2,jtop-1,ktop+1) +5.989568e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +3.234367e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) -1.940620e-03*utop(itop-2,jtop,ktop+1) +2.695306e-04*utop(itop-2,jtop,ktop+2) +4.620524e-05*utop(itop-2,jtop+1,ktop-2) -4.312489e-04*utop(itop-2,jtop+1,ktop-1) -1.940620e-03*utop(itop-2,jtop+1,ktop) +2.587494e-04*utop(itop-2,jtop+1,ktop+1) -3.593741e-05*utop(itop-2,jtop+1,ktop+2) -6.417395e-06*utop(itop-2,jtop+2,ktop-2) +5.989568e-05*utop(itop-2,jtop+2,ktop-1) +2.695306e-04*utop(itop-2,jtop+2,ktop) -3.593741e-05*utop(itop-2,jtop+2,ktop+1) +4.991307e-06*utop(itop-2,jtop+2,ktop+2) -5.940674e-05*utop(itop-1,jtop-2,ktop-2) +5.544629e-04*utop(itop-1,jtop-2,ktop-1) +2.495083e-03*utop(itop-1,jtop-2,ktop) -3.326777e-04*utop(itop-1,jtop-2,ktop+1) +4.620524e-05*utop(itop-1,jtop-2,ktop+2) +5.544629e-04*utop(itop-1,jtop-1,ktop-2) -5.174987e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) +3.104992e-03*utop(itop-1,jtop-1,ktop+1) -4.312489e-04*utop(itop-1,jtop-1,ktop+2) +2.495083e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) +1.397246e-02*utop(itop-1,jtop,ktop+1) -1.940620e-03*utop(itop-1,jtop,ktop+2) -3.326777e-04*utop(itop-1,jtop+1,ktop-2) +3.104992e-03*utop(itop-1,jtop+1,ktop-1) +1.397246e-02*utop(itop-1,jtop+1,ktop) -1.862995e-03*utop(itop-1,jtop+1,ktop+1) +2.587494e-04*utop(itop-1,jtop+1,ktop+2) +4.620524e-05*utop(itop-1,jtop+2,ktop-2) -4.312489e-04*utop(itop-1,jtop+2,ktop-1) -1.940620e-03*utop(itop-1,jtop+2,ktop) +2.587494e-04*utop(itop-1,jtop+2,ktop+1) -3.593741e-05*utop(itop-1,jtop+2,ktop+2) +4.455505e-04*utop(itop,jtop-2,ktop-2) -4.158472e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) +2.495083e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -4.158472e-03*utop(itop,jtop-1,ktop-2) +3.881240e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +3.234367e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) +2.495083e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) +1.397246e-02*utop(itop,jtop+1,ktop+1) -1.940620e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +3.234367e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) -1.940620e-03*utop(itop,jtop+2,ktop+1) +2.695306e-04*utop(itop,jtop+2,ktop+2) +9.901123e-05*utop(itop+1,jtop-2,ktop-2) -9.241048e-04*utop(itop+1,jtop-2,ktop-1) -4.158472e-03*utop(itop+1,jtop-2,ktop) +5.544629e-04*utop(itop+1,jtop-2,ktop+1) -7.700874e-05*utop(itop+1,jtop-2,ktop+2) -9.241048e-04*utop(itop+1,jtop-1,ktop-2) +8.624978e-03*utop(itop+1,jtop-1,ktop-1) +3.881240e-02*utop(itop+1,jtop-1,ktop) -5.174987e-03*utop(itop+1,jtop-1,ktop+1) +7.187482e-04*utop(itop+1,jtop-1,ktop+2) -4.158472e-03*utop(itop+1,jtop,ktop-2) +3.881240e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +3.234367e-03*utop(itop+1,jtop,ktop+2) +5.544629e-04*utop(itop+1,jtop+1,ktop-2) -5.174987e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) +3.104992e-03*utop(itop+1,jtop+1,ktop+1) -4.312489e-04*utop(itop+1,jtop+1,ktop+2) -7.700874e-05*utop(itop+1,jtop+2,ktop-2) +7.187482e-04*utop(itop+1,jtop+2,ktop-1) +3.234367e-03*utop(itop+1,jtop+2,ktop) -4.312489e-04*utop(itop+1,jtop+2,ktop+1) +5.989568e-05*utop(itop+1,jtop+2,ktop+2) -1.060835e-05*utop(itop+2,jtop-2,ktop-2) +9.901123e-05*utop(itop+2,jtop-2,ktop-1) +4.455505e-04*utop(itop+2,jtop-2,ktop) -5.940674e-05*utop(itop+2,jtop-2,ktop+1) +8.250936e-06*utop(itop+2,jtop-2,ktop+2) +9.901123e-05*utop(itop+2,jtop-1,ktop-2) -9.241048e-04*utop(itop+2,jtop-1,ktop-1) -4.158472e-03*utop(itop+2,jtop-1,ktop) +5.544629e-04*utop(itop+2,jtop-1,ktop+1) -7.700874e-05*utop(itop+2,jtop-1,ktop+2) +4.455505e-04*utop(itop+2,jtop,ktop-2) -4.158472e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) +2.495083e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) -5.940674e-05*utop(itop+2,jtop+1,ktop-2) +5.544629e-04*utop(itop+2,jtop+1,ktop-1) +2.495083e-03*utop(itop+2,jtop+1,ktop) -3.326777e-04*utop(itop+2,jtop+1,ktop+1) +4.620524e-05*utop(itop+2,jtop+1,ktop+2) +8.250936e-06*utop(itop+2,jtop+2,ktop-2) -7.700874e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) +4.620524e-05*utop(itop+2,jtop+2,ktop+1) -6.417395e-06*utop(itop+2,jtop+2,ktop+2)); + u(i+1,j+0,k+1) = ( -6.417395e-06*utop(itop-2,jtop-2,ktop-2) +4.620524e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) -7.700874e-05*utop(itop-2,jtop-2,ktop+1) +8.250936e-06*utop(itop-2,jtop-2,ktop+2) +5.989568e-05*utop(itop-2,jtop-1,ktop-2) -4.312489e-04*utop(itop-2,jtop-1,ktop-1) +3.234367e-03*utop(itop-2,jtop-1,ktop) +7.187482e-04*utop(itop-2,jtop-1,ktop+1) -7.700874e-05*utop(itop-2,jtop-1,ktop+2) +2.695306e-04*utop(itop-2,jtop,ktop-2) -1.940620e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) +3.234367e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) -3.593741e-05*utop(itop-2,jtop+1,ktop-2) +2.587494e-04*utop(itop-2,jtop+1,ktop-1) -1.940620e-03*utop(itop-2,jtop+1,ktop) -4.312489e-04*utop(itop-2,jtop+1,ktop+1) +4.620524e-05*utop(itop-2,jtop+1,ktop+2) +4.991307e-06*utop(itop-2,jtop+2,ktop-2) -3.593741e-05*utop(itop-2,jtop+2,ktop-1) +2.695306e-04*utop(itop-2,jtop+2,ktop) +5.989568e-05*utop(itop-2,jtop+2,ktop+1) -6.417395e-06*utop(itop-2,jtop+2,ktop+2) +4.620524e-05*utop(itop-1,jtop-2,ktop-2) -3.326777e-04*utop(itop-1,jtop-2,ktop-1) +2.495083e-03*utop(itop-1,jtop-2,ktop) +5.544629e-04*utop(itop-1,jtop-2,ktop+1) -5.940674e-05*utop(itop-1,jtop-2,ktop+2) -4.312489e-04*utop(itop-1,jtop-1,ktop-2) +3.104992e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) -5.174987e-03*utop(itop-1,jtop-1,ktop+1) +5.544629e-04*utop(itop-1,jtop-1,ktop+2) -1.940620e-03*utop(itop-1,jtop,ktop-2) +1.397246e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +2.495083e-03*utop(itop-1,jtop,ktop+2) +2.587494e-04*utop(itop-1,jtop+1,ktop-2) -1.862995e-03*utop(itop-1,jtop+1,ktop-1) +1.397246e-02*utop(itop-1,jtop+1,ktop) +3.104992e-03*utop(itop-1,jtop+1,ktop+1) -3.326777e-04*utop(itop-1,jtop+1,ktop+2) -3.593741e-05*utop(itop-1,jtop+2,ktop-2) +2.587494e-04*utop(itop-1,jtop+2,ktop-1) -1.940620e-03*utop(itop-1,jtop+2,ktop) -4.312489e-04*utop(itop-1,jtop+2,ktop+1) +4.620524e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +2.495083e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) -4.158472e-03*utop(itop,jtop-2,ktop+1) +4.455505e-04*utop(itop,jtop-2,ktop+2) +3.234367e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) +3.881240e-02*utop(itop,jtop-1,ktop+1) -4.158472e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) -1.940620e-03*utop(itop,jtop+1,ktop-2) +1.397246e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +2.495083e-03*utop(itop,jtop+1,ktop+2) +2.695306e-04*utop(itop,jtop+2,ktop-2) -1.940620e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) +3.234367e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) -7.700874e-05*utop(itop+1,jtop-2,ktop-2) +5.544629e-04*utop(itop+1,jtop-2,ktop-1) -4.158472e-03*utop(itop+1,jtop-2,ktop) -9.241048e-04*utop(itop+1,jtop-2,ktop+1) +9.901123e-05*utop(itop+1,jtop-2,ktop+2) +7.187482e-04*utop(itop+1,jtop-1,ktop-2) -5.174987e-03*utop(itop+1,jtop-1,ktop-1) +3.881240e-02*utop(itop+1,jtop-1,ktop) +8.624978e-03*utop(itop+1,jtop-1,ktop+1) -9.241048e-04*utop(itop+1,jtop-1,ktop+2) +3.234367e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) +3.881240e-02*utop(itop+1,jtop,ktop+1) -4.158472e-03*utop(itop+1,jtop,ktop+2) -4.312489e-04*utop(itop+1,jtop+1,ktop-2) +3.104992e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) -5.174987e-03*utop(itop+1,jtop+1,ktop+1) +5.544629e-04*utop(itop+1,jtop+1,ktop+2) +5.989568e-05*utop(itop+1,jtop+2,ktop-2) -4.312489e-04*utop(itop+1,jtop+2,ktop-1) +3.234367e-03*utop(itop+1,jtop+2,ktop) +7.187482e-04*utop(itop+1,jtop+2,ktop+1) -7.700874e-05*utop(itop+1,jtop+2,ktop+2) +8.250936e-06*utop(itop+2,jtop-2,ktop-2) -5.940674e-05*utop(itop+2,jtop-2,ktop-1) +4.455505e-04*utop(itop+2,jtop-2,ktop) +9.901123e-05*utop(itop+2,jtop-2,ktop+1) -1.060835e-05*utop(itop+2,jtop-2,ktop+2) -7.700874e-05*utop(itop+2,jtop-1,ktop-2) +5.544629e-04*utop(itop+2,jtop-1,ktop-1) -4.158472e-03*utop(itop+2,jtop-1,ktop) -9.241048e-04*utop(itop+2,jtop-1,ktop+1) +9.901123e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +2.495083e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) -4.158472e-03*utop(itop+2,jtop,ktop+1) +4.455505e-04*utop(itop+2,jtop,ktop+2) +4.620524e-05*utop(itop+2,jtop+1,ktop-2) -3.326777e-04*utop(itop+2,jtop+1,ktop-1) +2.495083e-03*utop(itop+2,jtop+1,ktop) +5.544629e-04*utop(itop+2,jtop+1,ktop+1) -5.940674e-05*utop(itop+2,jtop+1,ktop+2) -6.417395e-06*utop(itop+2,jtop+2,ktop-2) +4.620524e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) -7.700874e-05*utop(itop+2,jtop+2,ktop+1) +8.250936e-06*utop(itop+2,jtop+2,ktop+2)); + u(i+1,j+1,k+0) = ( -6.417395e-06*utop(itop-2,jtop-2,ktop-2) +5.989568e-05*utop(itop-2,jtop-2,ktop-1) +2.695306e-04*utop(itop-2,jtop-2,ktop) -3.593741e-05*utop(itop-2,jtop-2,ktop+1) +4.991307e-06*utop(itop-2,jtop-2,ktop+2) +4.620524e-05*utop(itop-2,jtop-1,ktop-2) -4.312489e-04*utop(itop-2,jtop-1,ktop-1) -1.940620e-03*utop(itop-2,jtop-1,ktop) +2.587494e-04*utop(itop-2,jtop-1,ktop+1) -3.593741e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +3.234367e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) -1.940620e-03*utop(itop-2,jtop,ktop+1) +2.695306e-04*utop(itop-2,jtop,ktop+2) -7.700874e-05*utop(itop-2,jtop+1,ktop-2) +7.187482e-04*utop(itop-2,jtop+1,ktop-1) +3.234367e-03*utop(itop-2,jtop+1,ktop) -4.312489e-04*utop(itop-2,jtop+1,ktop+1) +5.989568e-05*utop(itop-2,jtop+1,ktop+2) +8.250936e-06*utop(itop-2,jtop+2,ktop-2) -7.700874e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) +4.620524e-05*utop(itop-2,jtop+2,ktop+1) -6.417395e-06*utop(itop-2,jtop+2,ktop+2) +4.620524e-05*utop(itop-1,jtop-2,ktop-2) -4.312489e-04*utop(itop-1,jtop-2,ktop-1) -1.940620e-03*utop(itop-1,jtop-2,ktop) +2.587494e-04*utop(itop-1,jtop-2,ktop+1) -3.593741e-05*utop(itop-1,jtop-2,ktop+2) -3.326777e-04*utop(itop-1,jtop-1,ktop-2) +3.104992e-03*utop(itop-1,jtop-1,ktop-1) +1.397246e-02*utop(itop-1,jtop-1,ktop) -1.862995e-03*utop(itop-1,jtop-1,ktop+1) +2.587494e-04*utop(itop-1,jtop-1,ktop+2) +2.495083e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) +1.397246e-02*utop(itop-1,jtop,ktop+1) -1.940620e-03*utop(itop-1,jtop,ktop+2) +5.544629e-04*utop(itop-1,jtop+1,ktop-2) -5.174987e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) +3.104992e-03*utop(itop-1,jtop+1,ktop+1) -4.312489e-04*utop(itop-1,jtop+1,ktop+2) -5.940674e-05*utop(itop-1,jtop+2,ktop-2) +5.544629e-04*utop(itop-1,jtop+2,ktop-1) +2.495083e-03*utop(itop-1,jtop+2,ktop) -3.326777e-04*utop(itop-1,jtop+2,ktop+1) +4.620524e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +3.234367e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) -1.940620e-03*utop(itop,jtop-2,ktop+1) +2.695306e-04*utop(itop,jtop-2,ktop+2) +2.495083e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) +1.397246e-02*utop(itop,jtop-1,ktop+1) -1.940620e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) -4.158472e-03*utop(itop,jtop+1,ktop-2) +3.881240e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +3.234367e-03*utop(itop,jtop+1,ktop+2) +4.455505e-04*utop(itop,jtop+2,ktop-2) -4.158472e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) +2.495083e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) -7.700874e-05*utop(itop+1,jtop-2,ktop-2) +7.187482e-04*utop(itop+1,jtop-2,ktop-1) +3.234367e-03*utop(itop+1,jtop-2,ktop) -4.312489e-04*utop(itop+1,jtop-2,ktop+1) +5.989568e-05*utop(itop+1,jtop-2,ktop+2) +5.544629e-04*utop(itop+1,jtop-1,ktop-2) -5.174987e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) +3.104992e-03*utop(itop+1,jtop-1,ktop+1) -4.312489e-04*utop(itop+1,jtop-1,ktop+2) -4.158472e-03*utop(itop+1,jtop,ktop-2) +3.881240e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +3.234367e-03*utop(itop+1,jtop,ktop+2) -9.241048e-04*utop(itop+1,jtop+1,ktop-2) +8.624978e-03*utop(itop+1,jtop+1,ktop-1) +3.881240e-02*utop(itop+1,jtop+1,ktop) -5.174987e-03*utop(itop+1,jtop+1,ktop+1) +7.187482e-04*utop(itop+1,jtop+1,ktop+2) +9.901123e-05*utop(itop+1,jtop+2,ktop-2) -9.241048e-04*utop(itop+1,jtop+2,ktop-1) -4.158472e-03*utop(itop+1,jtop+2,ktop) +5.544629e-04*utop(itop+1,jtop+2,ktop+1) -7.700874e-05*utop(itop+1,jtop+2,ktop+2) +8.250936e-06*utop(itop+2,jtop-2,ktop-2) -7.700874e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) +4.620524e-05*utop(itop+2,jtop-2,ktop+1) -6.417395e-06*utop(itop+2,jtop-2,ktop+2) -5.940674e-05*utop(itop+2,jtop-1,ktop-2) +5.544629e-04*utop(itop+2,jtop-1,ktop-1) +2.495083e-03*utop(itop+2,jtop-1,ktop) -3.326777e-04*utop(itop+2,jtop-1,ktop+1) +4.620524e-05*utop(itop+2,jtop-1,ktop+2) +4.455505e-04*utop(itop+2,jtop,ktop-2) -4.158472e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) +2.495083e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) +9.901123e-05*utop(itop+2,jtop+1,ktop-2) -9.241048e-04*utop(itop+2,jtop+1,ktop-1) -4.158472e-03*utop(itop+2,jtop+1,ktop) +5.544629e-04*utop(itop+2,jtop+1,ktop+1) -7.700874e-05*utop(itop+2,jtop+1,ktop+2) -1.060835e-05*utop(itop+2,jtop+2,ktop-2) +9.901123e-05*utop(itop+2,jtop+2,ktop-1) +4.455505e-04*utop(itop+2,jtop+2,ktop) -5.940674e-05*utop(itop+2,jtop+2,ktop+1) +8.250936e-06*utop(itop+2,jtop+2,ktop+2)); + u(i+1,j+1,k+1) = ( +4.991307e-06*utop(itop-2,jtop-2,ktop-2) -3.593741e-05*utop(itop-2,jtop-2,ktop-1) +2.695306e-04*utop(itop-2,jtop-2,ktop) +5.989568e-05*utop(itop-2,jtop-2,ktop+1) -6.417395e-06*utop(itop-2,jtop-2,ktop+2) -3.593741e-05*utop(itop-2,jtop-1,ktop-2) +2.587494e-04*utop(itop-2,jtop-1,ktop-1) -1.940620e-03*utop(itop-2,jtop-1,ktop) -4.312489e-04*utop(itop-2,jtop-1,ktop+1) +4.620524e-05*utop(itop-2,jtop-1,ktop+2) +2.695306e-04*utop(itop-2,jtop,ktop-2) -1.940620e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) +3.234367e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) +5.989568e-05*utop(itop-2,jtop+1,ktop-2) -4.312489e-04*utop(itop-2,jtop+1,ktop-1) +3.234367e-03*utop(itop-2,jtop+1,ktop) +7.187482e-04*utop(itop-2,jtop+1,ktop+1) -7.700874e-05*utop(itop-2,jtop+1,ktop+2) -6.417395e-06*utop(itop-2,jtop+2,ktop-2) +4.620524e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) -7.700874e-05*utop(itop-2,jtop+2,ktop+1) +8.250936e-06*utop(itop-2,jtop+2,ktop+2) -3.593741e-05*utop(itop-1,jtop-2,ktop-2) +2.587494e-04*utop(itop-1,jtop-2,ktop-1) -1.940620e-03*utop(itop-1,jtop-2,ktop) -4.312489e-04*utop(itop-1,jtop-2,ktop+1) +4.620524e-05*utop(itop-1,jtop-2,ktop+2) +2.587494e-04*utop(itop-1,jtop-1,ktop-2) -1.862995e-03*utop(itop-1,jtop-1,ktop-1) +1.397246e-02*utop(itop-1,jtop-1,ktop) +3.104992e-03*utop(itop-1,jtop-1,ktop+1) -3.326777e-04*utop(itop-1,jtop-1,ktop+2) -1.940620e-03*utop(itop-1,jtop,ktop-2) +1.397246e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +2.495083e-03*utop(itop-1,jtop,ktop+2) -4.312489e-04*utop(itop-1,jtop+1,ktop-2) +3.104992e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) -5.174987e-03*utop(itop-1,jtop+1,ktop+1) +5.544629e-04*utop(itop-1,jtop+1,ktop+2) +4.620524e-05*utop(itop-1,jtop+2,ktop-2) -3.326777e-04*utop(itop-1,jtop+2,ktop-1) +2.495083e-03*utop(itop-1,jtop+2,ktop) +5.544629e-04*utop(itop-1,jtop+2,ktop+1) -5.940674e-05*utop(itop-1,jtop+2,ktop+2) +2.695306e-04*utop(itop,jtop-2,ktop-2) -1.940620e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) +3.234367e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -1.940620e-03*utop(itop,jtop-1,ktop-2) +1.397246e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +2.495083e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) +3.234367e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) +3.881240e-02*utop(itop,jtop+1,ktop+1) -4.158472e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +2.495083e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) -4.158472e-03*utop(itop,jtop+2,ktop+1) +4.455505e-04*utop(itop,jtop+2,ktop+2) +5.989568e-05*utop(itop+1,jtop-2,ktop-2) -4.312489e-04*utop(itop+1,jtop-2,ktop-1) +3.234367e-03*utop(itop+1,jtop-2,ktop) +7.187482e-04*utop(itop+1,jtop-2,ktop+1) -7.700874e-05*utop(itop+1,jtop-2,ktop+2) -4.312489e-04*utop(itop+1,jtop-1,ktop-2) +3.104992e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) -5.174987e-03*utop(itop+1,jtop-1,ktop+1) +5.544629e-04*utop(itop+1,jtop-1,ktop+2) +3.234367e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) +3.881240e-02*utop(itop+1,jtop,ktop+1) -4.158472e-03*utop(itop+1,jtop,ktop+2) +7.187482e-04*utop(itop+1,jtop+1,ktop-2) -5.174987e-03*utop(itop+1,jtop+1,ktop-1) +3.881240e-02*utop(itop+1,jtop+1,ktop) +8.624978e-03*utop(itop+1,jtop+1,ktop+1) -9.241048e-04*utop(itop+1,jtop+1,ktop+2) -7.700874e-05*utop(itop+1,jtop+2,ktop-2) +5.544629e-04*utop(itop+1,jtop+2,ktop-1) -4.158472e-03*utop(itop+1,jtop+2,ktop) -9.241048e-04*utop(itop+1,jtop+2,ktop+1) +9.901123e-05*utop(itop+1,jtop+2,ktop+2) -6.417395e-06*utop(itop+2,jtop-2,ktop-2) +4.620524e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) -7.700874e-05*utop(itop+2,jtop-2,ktop+1) +8.250936e-06*utop(itop+2,jtop-2,ktop+2) +4.620524e-05*utop(itop+2,jtop-1,ktop-2) -3.326777e-04*utop(itop+2,jtop-1,ktop-1) +2.495083e-03*utop(itop+2,jtop-1,ktop) +5.544629e-04*utop(itop+2,jtop-1,ktop+1) -5.940674e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +2.495083e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) -4.158472e-03*utop(itop+2,jtop,ktop+1) +4.455505e-04*utop(itop+2,jtop,ktop+2) -7.700874e-05*utop(itop+2,jtop+1,ktop-2) +5.544629e-04*utop(itop+2,jtop+1,ktop-1) -4.158472e-03*utop(itop+2,jtop+1,ktop) -9.241048e-04*utop(itop+2,jtop+1,ktop+1) +9.901123e-05*utop(itop+2,jtop+1,ktop+2) +8.250936e-06*utop(itop+2,jtop+2,ktop-2) -5.940674e-05*utop(itop+2,jtop+2,ktop-1) +4.455505e-04*utop(itop+2,jtop+2,ktop) +9.901123e-05*utop(itop+2,jtop+2,ktop+1) -1.060835e-05*utop(itop+2,jtop+2,ktop+2)); + + +} + + +template< class S, class O, typename T > +void solver::interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf=false ) +{ + + MeshvarBnd *u = &fine; + MeshvarBnd *utop = &coarse; + + + bcf = true; + + int + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); + + //... don't do anything if we are not an additional refinement region + if( ilevel <= m_ilevelmin ) + return; + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + for( int j=0; j +void solver::interp_coarse_fine( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf ) +{ + MeshvarBnd *u = &fine; + MeshvarBnd *utop = &coarse; + + + bcf = true;; + //bcf = false; + + int + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); + + //... don't do anything if we are not an additional refinement region + if( xoff == 0 && yoff == 0 && zoff == 0 ) + return; + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + //... set boundary condition for fine grid + + #pragma omp parallel for schedule(dynamic) + for( int ix=-1; ix<=nx; ++ix ) + for( int iy=-1; iy<=ny; ++iy ) + for( int iz=-1; iz<=nz; ++iz ) + { + bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz); + + //if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz) + if( xbnd || ybnd || zbnd ) + //if( xbnd ^ ybnd ^ zbnd ) + { + + //... only deal with proper ghostzones + if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd)) + continue; + + /*int ixtop = (int)(0.5*(double)(ix+2*xoff)+1e-3); + int iytop = (int)(0.5*(double)(iy+2*yoff)+1e-3); + int iztop = (int)(0.5*(double)(iz+2*zoff)+1e-3);*/ + + int ixtop = (int)(0.5*(double)(ix))+xoff; + int iytop = (int)(0.5*(double)(iy))+yoff; + int iztop = (int)(0.5*(double)(iz))+zoff; + + if( ix==-1 ) ixtop=xoff-1; + if( iy==-1 ) iytop=yoff-1; + if( iz==-1 ) iztop=zoff-1; + + double ustar1, ustar2, ustar3, uhat; + double fac = 0.5;//0.25; + double flux;; + if( ix == -1 && iy%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) ); + ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) ); + ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) ); + + uhat = interp2( /*-1.0, 0.0, 1.0, */ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix,iy+j,iz+k) = 0.0;//(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k), -1.0 ); + + (*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) ); + + flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k)); + } + + + + flux /= 4.0; + + double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + + //dflux *= 2.0; + + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix,iy+j,iz+k) -= dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop+1,iytop,iztop) - 2.0*flux; + + + } + // right boundary + if( ix == nx && iy%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) ); + ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) ); + ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix,iy+j,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix-1,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 1.0 ); + (*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat ); + flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k)); + } + flux /= 4.0; + + + double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux; + //dflux *= 2.0; + + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix,iy+j,iz+k) += dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop-1,iytop,iztop) + 2.0*flux; + + } + // bottom boundary + if( iy == -1 && ix%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k), -1.0 ); + (*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) ); + + flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k)); + } + flux /= 4.0; + //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - flux; + double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + //dflux *= 2.0; + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy,iz+k) -= dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - 2.0*flux; + + } + // top boundary + if( iy == ny && ix%2==0 && iz%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy-1,iz+k), (*u)(ix+j,iy-2,iz+k), 1.0 ); + (*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat ); + + flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k)); + } + flux /= 4.0; + //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + flux; + double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux; + //dflux *= 2.0; + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy,iz+k) += dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + 2.0*flux; + + } + // front boundary + if( iz == -1 && ix%2==0 && iy%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2), -1.0 ); + (*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) ); + + flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz)); + } + flux /= 4.0; + //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - flux; + double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; + //dflux *= 2.0; + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy+k,iz) -= dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - 2.0*flux; + + } + // back boundary + if( iz == nz && ix%2==0 && iy%2==0 ) + { + flux = 0.0; + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + { + ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) ); + ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); + ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) ); + + uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); + + //(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy+k,iz-1), (*u)(ix+j,iy+k,iz-2), 1.0 ); + (*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat ); + + flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1)); + } + flux /= 4.0; + //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + flux; + double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux; + //dflux *= 2.0; + if( bcf ) + for( int j=0;j<=1;j++) + for( int k=0;k<=1;k++) + (*u)(ix+j,iy+k,iz) += dflux; + else + (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + 2.0*flux; + } + + } + } + +} + + +#if 1 +template< class S, class O, typename T > +void solver::setBC( unsigned ilevel ) +{ + //... set only on level before additional refinement starts + //if( ilevel == m_ilevelmin ) + if( ilevel == m_ilevelmin ) + { + MeshvarBnd *u = m_pu->get_grid(ilevel); + //int nbnd = u->m_nbnd, + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + /*for( int ix=-nbnd; ix=nx||iy<0||iy>=ny||iz<0||iz>=nz ) + (*u)(ix,iy,iz) = (*m_pubnd)(ix,iy,iz);*/ + + + for( int iy=0; iy *u = m_pu->get_grid(ilevel); + int nbnd = u->m_nbnd, + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + for( int ix=-nbnd; ix=nx||iy<0||iy>=ny||iz<0||iz>=nz ) + (*u)(ix,iy,iz) = 0.0; + }*/ + + +} + +#else + +//... enforce periodic boundary conditions +template< class S, class O, typename T > +void solver::setBC( unsigned ilevel ) +{ + MeshvarBnd *u = m_pu->get_grid(ilevel); + + //... set only on level before additional refinement starts + if( ilevel <= m_ilevelmin ) + { + + int nbnd = u->m_nbnd, + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + //(*u)(0,0,0) = 0.0; + + + double sum = 0.0; + for( int ix=0; ix +void solver::make_periodic( MeshvarBnd *u ) +{ + + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + + #pragma omp parallel + { + + if( u->offset(0) == 0 ) + for( int iy=0; iyoffset(1) == 0 ) + for( int ix=0; ixoffset(2) == 0 ) + for( int ix=0; ix. + +*/ + + +#ifndef __TESTS_HH +#define __TESTS_HH + +#include + + +inline double CIC_interp_back( const MeshvarBnd& A, double x, double y, double z ) +{ + int + ix = (int)x, + iy = (int)y, + iz = (int)z, + ix1 = (ix+1), + iy1 = (iy+1), + iz1 = (iz+1); + + + double + dx = (double)(x - (double)ix), + dy = (double)(y - (double)iy), + dz = (double)(z - (double)iz), + tx = 1.0-dx, + ty = 1.0-dy, + tz = 1.0-dz; + + double + f_xyz = A(ix,iy,iz)*tx*ty*tz, + f_Xyz = A(ix1,iy,iz)*dx*ty*tz, + f_xYz = A(ix,iy1,iz)*tx*dy*tz, + f_xyZ = A(ix,iy,iz1)*tx*ty*dz, + f_XYz = A(ix1,iy1,iz)*dx*dy*tz, + f_XyZ = A(ix1,iy,iz1)*dx*ty*dz, + f_xYZ = A(ix,iy1,iz1)*tx*dy*dz, + f_XYZ = A(ix1,iy1,iz1)*dx*dy*dz; + + return f_xyz + f_Xyz + f_xYz + f_xyZ + f_XYz + f_XyZ + f_xYZ + f_XYZ; +} + +inline double TSC_interp_back( const MeshvarBnd& A, double x, double y, double z ) +{ + double val = 0.0; + int xngp = (int)x, yngp = (int)y, zngp = (int)z; + + for( int xx = xngp-1; xx <= xngp+1; ++xx ) + { + double weightx = 1.0; + double dx = fabs(x-(double)xx); + int axx(xx); + + if( xx==xngp ) + weightx *= 0.75-dx*dx; + else{ + weightx *= 1.125 - 1.5*dx + 0.5*dx*dx; + } + + for( int yy = yngp-1; yy <= yngp+1; ++yy ) + { + double weighty = weightx; + double dy = fabs(y-(double)yy); + int ayy(yy); + + if( yy==yngp ) + weighty *= 0.75-dy*dy; + else{ + weighty *= 1.125 - 1.5*dy + 0.5*dy*dy; + } + + for( int zz = zngp-1; zz <= zngp+1; ++zz ) + { + double weightz = weighty; + double dz = fabs(z-(double)zz); + int azz(zz); + + if( zz==zngp ) + weightz *= 0.75-dz*dz; + else{ + weightz *= 1.125 - 1.5*dz + 0.5*dz*dz; + } + + val += A(axx,ayy,azz) * weightz; + } + } + } + + return val; +} + +class TestProblem{ +public: + MeshvarBnd m_rho, m_uana, m_ubnd, m_xgrad, m_ygrad, m_zgrad; + int m_nb, m_nres; + double m_h; + + TestProblem( int nb, int nres ) + : m_rho( nb, nres ), m_uana( nb, nres ), m_ubnd( nb, nres ), + m_xgrad( nb, nres ), m_ygrad( nb, nres ), m_zgrad( nb, nres ), + m_nb( nb ), m_nres( nres ), m_h( 1.0/((double)nres ) )//m_h( 1.0/((double)nres+1.0 ) ) + { } + +}; + +class TSC_Test : public TestProblem{ +public: + double m_q; + + class TSCcube{ + public: + std::vector m_data; + + + TSCcube() + { + m_data.assign(27,0.0); + + //.. center + (*this)( 0, 0, 0) = 27./64.; + + //.. faces + (*this)(-1, 0, 0) = + (*this)(+1, 0, 0) = + (*this)( 0,-1, 0) = + (*this)( 0,+1, 0) = + (*this)( 0, 0,-1) = + (*this)( 0, 0,+1) = 9./128.; + + //.. edges + (*this)(-1,-1, 0) = + (*this)(-1,+1, 0) = + (*this)(+1,-1, 0) = + (*this)(+1,+1, 0) = + (*this)(-1, 0,-1) = + (*this)(-1, 0,+1) = + (*this)(+1, 0,-1) = + (*this)(+1, 0,+1) = + (*this)( 0,-1,-1) = + (*this)( 0,-1,+1) = + (*this)( 0,+1,-1) = + (*this)( 0,+1,+1) = 3./256.; + + //.. corners + (*this)(-1,-1,-1) = + (*this)(-1,+1,-1) = + (*this)(-1,-1,+1) = + (*this)(-1,+1,+1) = + (*this)(+1,-1,-1) = + (*this)(+1,+1,-1) = + (*this)(+1,-1,+1) = + (*this)(+1,+1,+1) = 1./512.; + + } + + double& operator()(int i, int j, int k) + { return m_data[ ((i+1)*3+(j+1))*3 +(k+1)]; } + + const double& operator()(int i, int j, int k) const + { return m_data[ ((i+1)*3+(j+1))*3 +(k+1)]; } + }; + + TSC_Test( int nb, int nres, double q=-1.0 ) + : TestProblem(nb, nres), m_q(q) + { + TSCcube c; + int xm(nres/2-1), ym(nres/2-1), zm(nres/2-1); + double xxm((double)xm*m_h), yym((double)ym*m_h), zzm((double)zm*m_h); + + double fourpi = 4.0*M_PI; + + m_uana.zero(); + m_ubnd.zero(); + m_xgrad.zero(); + m_ygrad.zero(); + m_zgrad.zero(); + + for( int i=-nb; i. + +*/ + + +#include "transfer_function.hh" + + +std::map< std::string, transfer_function_plugin_creator *>& +get_transfer_function_plugin_map() +{ + static std::map< std::string, transfer_function_plugin_creator* > transfer_function_plugin_map; + return transfer_function_plugin_map; +} + +void print_transfer_function_plugins() +{ + std::map< std::string, transfer_function_plugin_creator *>& m = get_transfer_function_plugin_map(); + std::map< std::string, transfer_function_plugin_creator *>::iterator it; + it = m.begin(); + std::cout << " - Available transfer function plug-ins:\n"; + while( it!=m.end() ) + { + if( (*it).second ) + std::cout << "\t\'" << (*it).first << "\'\n"; + ++it; + } + + +} + +transfer_function_plugin *select_transfer_function_plugin( config_file& cf ) +{ + std::string tfname = cf.getValue( "cosmology", "transfer" ); + + transfer_function_plugin_creator *the_transfer_function_plugin_creator + = get_transfer_function_plugin_map()[ tfname ]; + + if( !the_transfer_function_plugin_creator ) + { + std::cerr << " - Error: transfer function plug-in \'" << tfname << "\' not found." << std::endl; + print_transfer_function_plugins(); + throw std::runtime_error("Unknown transfer function plug-in"); + + }else + std::cout << " - Selecting transfer function plug-in \'" << tfname << "\'..." << std::endl; + + transfer_function_plugin *the_transfer_function_plugin + = the_transfer_function_plugin_creator->create( cf ); + + return the_transfer_function_plugin; +} + diff --git a/transfer_function.hh b/transfer_function.hh new file mode 100644 index 0000000..b8a9aa6 --- /dev/null +++ b/transfer_function.hh @@ -0,0 +1,1420 @@ +/* + + transfer_function.hh - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +*/ + +#ifndef __TRANSFERFUNCTION_HH +#define __TRANSFERFUNCTION_HH + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "Numerics.hh" +#include "general.hh" + +#include + + + +#include "config_file.hh" + +enum tf_type{ + total, cdm, baryon +}; + +//! Abstract base class for transfer functions +/*! + This class implements a purely virtual interface that can be + used to derive instances implementing various transfer functions. + */ +class transfer_function_plugin{ +public: + Cosmology cosmo_; + config_file *pcf_; + bool tf_distinct_; + +public: + + transfer_function_plugin( config_file& cf ) + : pcf_( &cf ) + { + real_t zstart; + zstart = pcf_->getValue( "setup", "zstart" ); + cosmo_.astart = 1.0/(1.0+zstart); + cosmo_.Omega_b = pcf_->getValue( "cosmology", "Omega_b" ); + cosmo_.Omega_m = pcf_->getValue( "cosmology", "Omega_m" ); + cosmo_.Omega_L = pcf_->getValue( "cosmology", "Omega_L" ); + cosmo_.H0 = pcf_->getValue( "cosmology", "H0" ); + cosmo_.sigma8 = pcf_->getValue( "cosmology", "sigma_8" ); + cosmo_.nspect = pcf_->getValue( "cosmology", "nspec" ); + } + virtual ~transfer_function_plugin(){ }; + + virtual double compute( double k, tf_type type=baryon) = 0; + + virtual double get_kmax( void ) = 0; + virtual double get_kmin( void ) = 0; + bool tf_is_distinct( void ) + { return tf_distinct_; } +}; + + +struct transfer_function_plugin_creator +{ + virtual transfer_function_plugin * create( config_file& cf ) const = 0; + virtual ~transfer_function_plugin_creator() { } +}; + +std::map< std::string, transfer_function_plugin_creator *>& get_transfer_function_plugin_map(); +void print_transfer_function_plugins( void ); + +template< class Derived > +struct transfer_function_plugin_creator_concrete : public transfer_function_plugin_creator +{ + transfer_function_plugin_creator_concrete( const std::string& plugin_name ) + { + get_transfer_function_plugin_map()[ plugin_name ] = this; + } + + transfer_function_plugin * create( config_file& cf ) const + { + return new Derived( cf ); + } +}; + +typedef transfer_function_plugin transfer_function; + +transfer_function_plugin *select_transfer_function_plugin( config_file& cf ); + + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ + +class TransferFunction_k +{ +public: + static transfer_function *ptf_; + static real_t nspec_; + double dplus_, pnorm_, sqrtpnorm_; + + TransferFunction_k( transfer_function *tf, real_t nspec, real_t pnorm, real_t dplus ) + : dplus_(dplus), pnorm_(pnorm) + { + ptf_ = tf; + nspec_ = nspec; + sqrtpnorm_ = sqrt( pnorm_ ); + } + + inline real_t compute( real_t k ) const + { + return dplus_*sqrtpnorm_*pow(k,0.5*nspec_)*ptf_->compute(k); + } +}; + + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ + +#define NZERO_Q +typedef std::complex complex; +class TransferFunction_real +{ + +public: + + double Tr0_; + real_t Tmin_, Tmax_, Tscale_; + real_t rneg_, rneg2_; + static transfer_function *ptf_; + static real_t nspec_; + +protected: + + real_t krgood( real_t mu, real_t q, real_t dlnr, real_t kr ) + { + double krnew = kr; + complex cdgamma, zm, zp; + double arg, iarg, xneg, xpos, y; + gsl_sf_result g_a, g_p; + + xpos = 0.5*(mu+1.0+q); + xneg = 0.5*(mu+1.0-q); + y = M_PI/(2.0*dlnr); + zp=complex(xpos,y); + zm=complex(xneg,y); + + gsl_sf_lngamma_complex_e (zp.real(), zp.imag(), &g_a, &g_p); + zp=std::polar(exp(g_a.val),g_p.val); + real_t zpa = g_p.val; + + gsl_sf_lngamma_complex_e (zm.real(), zm.imag(), &g_a, &g_p); + zm=std::polar(exp(g_a.val),g_p.val); + real_t zma = g_p.val; + + arg=log(2.0/kr)/dlnr+(zpa+zma)/M_PI; + iarg=(real_t)((int)(arg + 0.5)); + + if( arg!=iarg ) + krnew=kr*exp((arg-iarg)*dlnr); + + return krnew; + } + + void transform( real_t pnorm, real_t dplus, unsigned N, real_t q, std::vector& rr, std::vector& TT ) + { + const double mu = 0.5; + double qmin = 1.0e-6, qmax = 1.0e+6; + + q = 0.0; + + N = 16384; + +#ifdef NZERO_Q + //q = 0.4; + q = 0.5; +#endif + + double kmin = qmin, kmax=qmax; + double rmin = qmin, rmax = qmax; + double k0 = exp(0.5*(log(kmax)+log(kmin))); + double r0 = exp(0.5*(log(rmax)+log(rmin))); + double L = log(rmax)-log(rmin); + double k0r0 = k0*r0; + double dlnk = L/N, dlnr = L/N; + + double sqrtpnorm = sqrt(pnorm); + + double dir = 1.0; + + double fftnorm = 1.0/N; + + fftw_complex *in, *out; + fftw_plan p,ip; + + in = new fftw_complex[N]; + out = new fftw_complex[N]; + + //... perform anti-ringing correction from Hamilton (2000) + k0r0 = krgood( mu, q, dlnr, k0r0 ); + + std::ofstream ofsk("transfer_k.txt"); + double sum_in = 0.0; + for( unsigned i=0; icompute( k )*pow(k,0.5*nspec_)*pow(k,1.5-q); + in[i].im = 0.0; + + sum_in += in[i].re; + ofsk << std::setw(16) << k < (int)N/2 ) + ii -= N; + +#ifndef NZERO_Q + double y=ii*M_PI/L; + complex zp((mu+1.0)*0.5,y); + gsl_sf_result g_a, g_p; + gsl_sf_lngamma_complex_e(zp.real(), zp.imag(), &g_a, &g_p); + + double arg = 2.0*(log(2.0/k0r0)*y+g_p.val); + complex cu = complex(out[i].re,out[i].im)*std::polar(1.0,arg); + out[i].re = cu.real()*fftnorm; + out[i].im = cu.imag()*fftnorm; + +#else + //complex x(dir*q, (double)ii*2.0*M_PI/L); + complex x(dir*q, (double)ii*2.0*M_PI/L); + gsl_sf_result g_a, g_p; + + complex g1, g2, garg, U, phase; + complex twotox = pow(complex(2.0,0.0),x); + + ///////////////////////////////////////////////////////// + //.. evaluate complex Gamma functions + + garg = 0.5*(mu+1.0+x); + gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p); + g1 = std::polar(exp(g_a.val),g_p.val); + + + garg = 0.5*(mu+1.0-x); + gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p); + g2 = std::polar(exp(g_a.val),g_p.val); + + ///////////////////////////////////////////////////////// + //.. compute U + + if( (fabs(g2.real()) < 1e-19 && fabs(g2.imag()) < 1e-19) ) + { + //std::cerr << "Warning : encountered possible singularity in TransferFunction_real::transform!\n"; + g1 = 1.0; g2 = 1.0; + } + + + U = twotox * g1 / g2; + phase = pow(complex(k0r0,0.0),complex(0.0,2.0*M_PI*(double)ii/L)); + + complex cu = complex(out[i].re,out[i].im)*U*phase*fftnorm; + + out[i].re = cu.real(); + out[i].im = cu.imag(); + + if( (out[i].re != out[i].re)||(out[i].im != out[i].im) ) + { std::cerr << "NaN @ i=" << i << ", U= " << U << ", phase = " << phase << ", g1 = " << g1 << ", g2 = " << g2 << std::endl; + std::cerr << "mu+1+q = " << mu+1.0+q << std::endl; + //break; + } + +#endif + + } + + /*out[N/2].im = 0.0; + out[N/2+1].im = 0.0; + out[N/2+1].re = out[N/2].re; + out[N/2].im = 0.0;*/ + + fftw_one(ip, out, in); + + rr.assign(N,0.0); + TT.assign(N,0.0); + + r0 = k0r0/k0; + + for( unsigned i=0; iN/2) + // ii-=N; + + + + double r = r0*exp(-ii*dlnr); + rr[N-i-1] = r; + TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) * in[i].re*pow(r,-(1.5+q)); + + //TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) * in[i].re*exp( -dir*(q+1.5)*ii*dlnr +q*log(k0r0))/r0; + + //rr[i] = r; + //TT[i] = 4.0*M_PI* sqrt(M_PI/2.0) * in[i].re*pow(r,-(1.5+q)); + + } + + + { + std::ofstream ofs("transfer_real_new.txt"); + for( unsigned i=0; i m_xtable,m_ytable; + double m_xmin, m_xmax, m_dx; +public: + TransferFunction_real( transfer_function *tf, real_t nspec, real_t pnorm, real_t dplus, real_t rmin, real_t rmax, real_t knymax, unsigned nr ) + { + + ptf_ = tf; + nspec_ = nspec; + + real_t q = 0.8; + + std::vector r,T; + + transform( pnorm, dplus, nr, q, r, T ); + + //... determine r=0 zero component by integrating up to the Nyquist frequency + gsl_integration_workspace * wp; + gsl_function F; + wp = gsl_integration_workspace_alloc(20000); + F.function = &call_wrapper; + double par[2]; par[0] = dplus*sqrt(pnorm); //par[1] = M_PI/kny; + F.params = (void*)par; + double error; + + //.. need a factor sqrt( 2*kny^2_x + 2*kny^2_y + 2*kny^2_z )/2 = sqrt(3/2)kny (in equilateral case) + //#warning Need to integrate from Boxsize (see below)? + gsl_integration_qag (&F, 0.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); + //gsl_integration_qag (&F, 2.0*M_PI/100.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); + + gsl_integration_workspace_free(wp); + + gsl_interp_accel *accp; + gsl_spline *splinep; + + std::vector xsp, ysp; + + for( unsigned i=0; i 0.0 ) + { + xp.push_back( 2.0*log10(r[i]) ); + yp.push_back( log10(T[i]) ); + rneg_ = r[i]; + rneg2_ = rneg_*rneg_; + }else { + xn.push_back( 2.0*log10(r[i]) ); + yn.push_back( log10(-T[i]) ); + }*/ + + + if( r[i] > rmin && r[i] < rmax ) + { + xsp.push_back( log10(r[i]) ); + ysp.push_back( T[i]*r[i]*r[i] ); + } + + } + + + + + + accp = gsl_interp_accel_alloc (); + //accn = gsl_interp_accel_alloc (); + + //... spline interpolation is only marginally slower here + //splinep = gsl_spline_alloc (gsl_interp_cspline, xsp.size() ); + splinep = gsl_spline_alloc (gsl_interp_akima, xsp.size() ); + + //... set up everything for spline interpolation + gsl_spline_init (splinep, &xsp[0], &ysp[0], xsp.size() ); + + + + //.. build lookup table using spline interpolation + + + m_xmin = log10(rmin);//m_xtable[0]; + m_xmax = log10(rmax);//m_xtable.back(); + m_dx = (m_xmax-m_xmin)/nr;//(m_xtable.size()-1); + + for(unsigned i=0; icompute( k )*pow(k,0.5*nspec_)*k*k; + } + + ~TransferFunction_real() + { } + +#if 1 + inline real_t compute_real( real_t r2 ) const + { + const double EPS = 1e-8; + const double Reps2 = EPS*EPS; + + if( r2 = m_xtable.size()-1 ) i = m_xtable.size()-2; + + real_t y1,y2; + y1 = m_ytable[i]; + y2 = m_ytable[i+1]; + + //std::cerr << y1 << " " << y2 << std::endl; + return y1 + (y2-y1)*(logr/m_dx-(real_t)i);*/ + + double r=0.5*log10(r2);//sqrt(r2); + double ii = (r-m_xmin)/m_dx; + int i = (int)ii;//((r-m_xmin)/m_dx); + + if( i < 0 ) i = 0; + else if( i >= (int)m_xtable.size()-1 ) i = m_xtable.size()-2; + + double y1,y2; + y1 = m_ytable[i]; + y2 = m_ytable[i+1]; + + // return y1 + (y2-y1)*((r-m_xmin)/m_dx-(real_t)i); + return (real_t)((y1 + (y2-y1)*(ii-(double)i))/r2); + } +#else + inline real_t compute_real( real_t r2 ) const + { + const real_t EPS = 1e-8; + const real_t Reps2 = EPS*EPS; + + if( r2 x(nr+1,0.0),y(nr+1,0.0); + + + /*std::ofstream ofs("transfer_fourier.txt"); + for( int i=0; i<1000; ++i ) + { + real_t k=1.e-3*pow(10.0,i*6.0/1000.0); + ofs << k << "\t\t" << call_wrapper(k, NULL) << std::endl; + }*/ + + + real_t fac = sqrt(pnorm)*dplus; + + std::ofstream ofs("transfer_real_old.txt"); + + for( unsigned i=0; i> k; + ss >> Tk; + + m_tab_k.push_back( k ); + m_tab_Tk.push_back( Tk ); + + } +#ifdef WITH_MPI + } + + unsigned n=m_tab_k.size(); + MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 ); + + if( MPI::COMM_WORLD.Get_rank() > 0 ){ + m_tab_k.assign(n,0); + m_tab_Tk.assign(n,0); + } +#ifdef SINGLE_PRECISION + MPI::COMM_WORLD.Bcast( &m_tab_k[0], n, MPI_FLOAT, 0 ); + MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_FLOAT, 0 ); +#else + MPI::COMM_WORLD.Bcast( &m_tab_k[0], n, MPI_DOUBLE, 0 ); + MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_DOUBLE, 0 ); +#endif +#endif + + } + +public: + TransferFunction_tab( Cosmology aCosm, std::string filename ) + : TransferFunction( aCosm ), m_filename( filename ) + { + read_table(); + } + + virtual inline real_t compute( real_t k ){ + + return linint( k, m_tab_k, m_tab_Tk ); + } + + inline real_t get_kmin( void ){ + return m_tab_k[1]; + } + + inline real_t get_kmax( void ){ + return m_tab_k[m_tab_k.size()-2]; + } + +}; + +class TransferFunction_CAMB : public TransferFunction{ +public: + enum TFtype{ + TF_baryon, TF_cdm, TF_total + }; + + +private: + //Cosmology m_Cosmology; + + std::string m_filename_Pk, m_filename_Tk; + std::vector m_tab_k, m_tab_Tk; + + Spline_interp *m_psinterp; + gsl_interp_accel *acc; + gsl_spline *spline; + + + + void read_table( TFtype iwhich ){ +#ifdef WITH_MPI + if( MPI::COMM_WORLD.Get_rank() == 0 ){ +#endif + std::cerr + << " - reading tabulated transfer function data from file \n" + << " \'" << m_filename_Tk << "\'\n"; + + std::string line; + std::ifstream ifs( m_filename_Tk.c_str() ); + + m_tab_k.clear(); + m_tab_Tk.clear(); + + while( !ifs.eof() ){ + getline(ifs,line); + + if(ifs.eof()) break; + + std::stringstream ss(line); + + real_t k, Tkc, Tkb, Tkg, Tkr, Tknu, Tktot; + ss >> k; + ss >> Tkc; + ss >> Tkb; + ss >> Tkg; + ss >> Tkr; + ss >> Tknu; + ss >> Tktot; + + m_tab_k.push_back( log10(k) ); + + switch( iwhich ){ + case TF_total: + m_tab_Tk.push_back( log10(Tktot) ); + break; + case TF_baryon: + m_tab_Tk.push_back( log10(Tkb) ); + break; + case TF_cdm: + m_tab_Tk.push_back( log10(Tkc) ); + break; + } + //m_tab_Tk_cdm.push_back( Tkc ); + //m_tab_Tk_baryon.push_back( Tkb ); + //m_tab_Tk_tot.push_back( Tktot ); + } + + ifs.close(); + + + + +#ifdef WITH_MPI + } + + unsigned n=m_tab_k.size(); + MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 ); + + if( MPI::COMM_WORLD.Get_rank() > 0 ){ + m_tab_k.assign(n,0); + m_tab_Tk.assign(n,0); + } +#ifdef SINGLE_PRECISION + MPI::COMM_WORLD.Bcast( &m_tab_k[0], n, MPI_FLOAT, 0 ); + MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_FLOAT, 0 ); +#else + MPI::COMM_WORLD.Bcast( &m_tab_k[0], n, MPI_DOUBLE, 0 ); + MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_DOUBLE, 0 ); +#endif +#endif + + } + +public: + TransferFunction_CAMB( Cosmology aCosm, std::string filename_Tk, TFtype iwhich ) + : TransferFunction( aCosm ), m_filename_Tk( filename_Tk ), m_psinterp( NULL ) + { + read_table( iwhich ); + + //m_psinterp = new Spline_interp( m_tab_k, m_tab_Tk ); + + acc = gsl_interp_accel_alloc(); + //spline = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() ); + spline = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() ); + + gsl_spline_init (spline, &m_tab_k[0], &m_tab_Tk[0], m_tab_k.size() ); + + } + + virtual ~TransferFunction_CAMB() + { + //delete m_psinterp; + + gsl_spline_free (spline); + gsl_interp_accel_free (acc); + } + + virtual inline real_t compute( real_t k ){ + //real_t Tkc = linint( k, m_tab_k, m_tab_Tk_cdm ); + //real_t Tkb = linint( k, m_tab_k, m_tab_Tk_baryon ); + + //return pow(10.0, linint( log10(k), m_tab_k, m_tab_Tk )); + + //return pow(10.0, m_psinterp->interp(log10(k)) ); + + //if( k0 */ + growth_to_z0, /* D_1(z)/D_1(0) -- the growth relative to z=0 */ + hhubble, /* Need to pass Hubble constant to TFmdm_onek_hmpc() */ + k_equality, /* The comoving wave number of the horizon at equality*/ + obhh, /* Omega_baryon * hubble^2 */ + omega_curv, /* = 1 - omega_matter - omega_lambda */ + omega_lambda_z, /* Omega_lambda at the given redshift */ + omega_matter_z, /* Omega_matter at the given redshift */ + omhh, /* Omega_matter * hubble^2 */ + onhh, /* Omega_hdm * hubble^2 */ + p_c, /* The correction to the exponent before drag epoch */ + p_cb, /* The correction to the exponent after drag epoch */ + sound_horizon_fit, /* The sound horizon at the drag epoch */ + theta_cmb, /* The temperature of the CMB, in units of 2.7 K */ + y_drag, /* Ratio of z_equality to z_drag */ + z_drag, /* Redshift of the drag epoch */ + z_equality; /* Redshift of matter-radiation equality */ + + /* The following are set in TFmdm_onek_mpc() */ + float gamma_eff, /* Effective \Gamma */ + growth_cb, /* Growth factor for CDM+Baryon perturbations */ + growth_cbnu, /* Growth factor for CDM+Baryon+Neutrino pert. */ + max_fs_correction, /* Correction near maximal free streaming */ + qq, /* Wavenumber rescaled by \Gamma */ + qq_eff, /* Wavenumber rescaled by effective Gamma */ + qq_nu, /* Wavenumber compared to maximal free streaming */ + tf_master, /* Master TF */ + tf_sup, /* Suppressed TF */ + y_freestream; /* The epoch of free-streaming for a given scale */ + + /* Finally, TFmdm_onek_mpc() and TFmdm_onek_hmpc() give their answers as */ + float tf_cb, /* The transfer function for density-weighted + CDM + Baryon perturbations. */ + tf_cbnu; /* The transfer function for density-weighted + CDM + Baryon + Massive Neutrino perturbations. */ + + /* By default, these functions return tf_cb */ + + /* ------------------------- TFmdm_set_cosm() ------------------------ */ + int TFmdm_set_cosm(float omega_matter, float omega_baryon, float omega_hdm, + int degen_hdm, float omega_lambda, float hubble, float redshift) + /* This routine takes cosmological parameters and a redshift and sets up + all the internal scalar quantities needed to compute the transfer function. */ + /* INPUT: omega_matter -- Density of CDM, baryons, and massive neutrinos, + in units of the critical density. */ + /* omega_baryon -- Density of baryons, in units of critical. */ + /* omega_hdm -- Density of massive neutrinos, in units of critical */ + /* degen_hdm -- (Int) Number of degenerate massive neutrino species */ + /* omega_lambda -- Cosmological constant */ + /* hubble -- Hubble constant, in units of 100 km/s/Mpc */ + /* redshift -- The redshift at which to evaluate */ + /* OUTPUT: Returns 0 if all is well, 1 if a warning was issued. Otherwise, + sets many global variables for use in TFmdm_onek_mpc() */ + { + float z_drag_b1, z_drag_b2, omega_denom; + int qwarn; + qwarn = 0; + + theta_cmb = 2.728/2.7; /* Assuming T_cmb = 2.728 K */ + + /* Look for strange input */ + if (omega_baryon<0.0) { + fprintf(stderr, + "TFmdm_set_cosm(): Negative omega_baryon set to trace amount.\n"); + qwarn = 1; + } + if (omega_hdm<0.0) { + fprintf(stderr, + "TFmdm_set_cosm(): Negative omega_hdm set to trace amount.\n"); + qwarn = 1; + } + if (hubble<=0.0) { + fprintf(stderr,"TFmdm_set_cosm(): Negative Hubble constant illegal.\n"); + exit(1); /* Can't recover */ + } else if (hubble>2.0) { + fprintf(stderr,"TFmdm_set_cosm(): Hubble constant should be in units of 100 km/s/Mpc.\n"); + qwarn = 1; + } + if (redshift<=-1.0) { + fprintf(stderr,"TFmdm_set_cosm(): Redshift < -1 is illegal.\n"); + exit(1); + } else if (redshift>99.0) { + fprintf(stderr, + "TFmdm_set_cosm(): Large redshift entered. TF may be inaccurate.\n"); + qwarn = 1; + } + if (degen_hdm<1) degen_hdm=1; + num_degen_hdm = (float) degen_hdm; + /* Have to save this for TFmdm_onek_mpc() */ + /* This routine would crash if baryons or neutrinos were zero, + so don't allow that */ + if (omega_baryon<=0) omega_baryon=1e-5; + if (omega_hdm<=0) omega_hdm=1e-5; + + omega_curv = 1.0-omega_matter-omega_lambda; + omhh = omega_matter*SQR(hubble); + obhh = omega_baryon*SQR(hubble); + onhh = omega_hdm*SQR(hubble); + f_baryon = omega_baryon/omega_matter; + f_hdm = omega_hdm/omega_matter; + f_cdm = 1.0-f_baryon-f_hdm; + f_cb = f_cdm+f_baryon; + f_bnu = f_baryon+f_hdm; + + /* Compute the equality scale. */ + z_equality = 25000.0*omhh/SQR(SQR(theta_cmb)); /* Actually 1+z_eq */ + k_equality = 0.0746*omhh/SQR(theta_cmb); + + /* Compute the drag epoch and sound horizon */ + z_drag_b1 = 0.313*pow(omhh,-0.419)*(1+0.607*pow(omhh,0.674)); + z_drag_b2 = 0.238*pow(omhh,0.223); + z_drag = 1291*pow(omhh,0.251)/(1.0+0.659*pow(omhh,0.828))* + (1.0+z_drag_b1*pow(obhh,z_drag_b2)); + y_drag = z_equality/(1.0+z_drag); + + sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1.0+10.0*pow(obhh,0.75)); + + /* Set up for the free-streaming & infall growth function */ + p_c = 0.25*(5.0-sqrt(1+24.0*f_cdm)); + p_cb = 0.25*(5.0-sqrt(1+24.0*f_cb)); + + omega_denom = omega_lambda+SQR(1.0+redshift)*(omega_curv+ + omega_matter*(1.0+redshift)); + omega_lambda_z = omega_lambda/omega_denom; + omega_matter_z = omega_matter*SQR(1.0+redshift)*(1.0+redshift)/omega_denom; + growth_k0 = z_equality/(1.0+redshift)*2.5*omega_matter_z/ + (pow(omega_matter_z,4.0/7.0)-omega_lambda_z+ + (1.0+omega_matter_z/2.0)*(1.0+omega_lambda_z/70.0)); + growth_to_z0 = z_equality*2.5*omega_matter/(pow(omega_matter,4.0/7.0) + -omega_lambda + (1.0+omega_matter/2.0)*(1.0+omega_lambda/70.0)); + growth_to_z0 = growth_k0/growth_to_z0; + + /* Compute small-scale suppression */ + alpha_nu = f_cdm/f_cb*(5.0-2.*(p_c+p_cb))/(5.-4.*p_cb)* + pow(1+y_drag,p_cb-p_c)* + (1+f_bnu*(-0.553+0.126*f_bnu*f_bnu))/ + (1-0.193*sqrt(f_hdm*num_degen_hdm)+0.169*f_hdm*pow(num_degen_hdm,0.2))* + (1+(p_c-p_cb)/2*(1+1/(3.-4.*p_c)/(7.-4.*p_cb))/(1+y_drag)); + alpha_gamma = sqrt(alpha_nu); + beta_c = 1/(1-0.949*f_bnu); + /* Done setting scalar variables */ + hhubble = hubble; /* Need to pass Hubble constant to TFmdm_onek_hmpc() */ + return qwarn; + } + + /* ---------------------------- TFmdm_onek_mpc() ---------------------- */ + + float TFmdm_onek_mpc(float kk) + /* Given a wavenumber in Mpc^-1, return the transfer function for the + cosmology held in the global variables. */ + /* Input: kk -- Wavenumber in Mpc^-1 */ + /* Output: The following are set as global variables: + growth_cb -- the transfer function for density-weighted + CDM + Baryon perturbations. + growth_cbnu -- the transfer function for density-weighted + CDM + Baryon + Massive Neutrino perturbations. */ + /* The function returns growth_cb */ + { + float tf_sup_L, tf_sup_C; + float temp1, temp2; + + qq = kk/omhh*SQR(theta_cmb); + + /* Compute the scale-dependent growth functions */ + y_freestream = 17.2*f_hdm*(1+0.488*pow(f_hdm,-7.0/6.0))* + SQR(num_degen_hdm*qq/f_hdm); + temp1 = pow(growth_k0, 1.0-p_cb); + temp2 = pow(growth_k0/(1+y_freestream),0.7); + growth_cb = pow(1.0+temp2, p_cb/0.7)*temp1; + growth_cbnu = pow(pow(f_cb,0.7/p_cb)+temp2, p_cb/0.7)*temp1; + + /* Compute the master function */ + gamma_eff =omhh*(alpha_gamma+(1-alpha_gamma)/ + (1+SQR(SQR(kk*sound_horizon_fit*0.43)))); + qq_eff = qq*omhh/gamma_eff; + + tf_sup_L = log(2.71828+1.84*beta_c*alpha_gamma*qq_eff); + tf_sup_C = 14.4+325/(1+60.5*pow(qq_eff,1.11)); + tf_sup = tf_sup_L/(tf_sup_L+tf_sup_C*SQR(qq_eff)); + + qq_nu = 3.92*qq*sqrt(num_degen_hdm/f_hdm); + max_fs_correction = 1+1.2*pow(f_hdm,0.64)*pow(num_degen_hdm,0.3+0.6*f_hdm)/ + (pow(qq_nu,-1.6)+pow(qq_nu,0.8)); + tf_master = tf_sup*max_fs_correction; + + /* Now compute the CDM+HDM+baryon transfer functions */ + tf_cb = tf_master*growth_cb/growth_k0; + tf_cbnu = tf_master*growth_cbnu/growth_k0; + return tf_cb; + } + + /* ---------------------------- TFmdm_onek_hmpc() ---------------------- */ + + float TFmdm_onek_hmpc(float kk) + /* Given a wavenumber in h Mpc^-1, return the transfer function for the + cosmology held in the global variables. */ + /* Input: kk -- Wavenumber in h Mpc^-1 */ + /* Output: The following are set as global variables: + growth_cb -- the transfer function for density-weighted + CDM + Baryon perturbations. + growth_cbnu -- the transfer function for density-weighted + CDM + Baryon + Massive Neutrino perturbations. */ + /* The function returns growth_cb */ + { + return TFmdm_onek_mpc(kk*hhubble); + } + + + + TransferFunction_EisensteinNeutrino( Cosmology aCosm, real_t Omega_HDM, int degen_HDM, real_t Tcmb = 2.726 ) + : TransferFunction(aCosm)//, m_h0( aCosm.H0*0.01 ) + { + TFmdm_set_cosm( aCosm.Omega_m, aCosm.Omega_b, Omega_HDM, degen_HDM, aCosm.Omega_L, aCosm.H0, aCosm.astart ); + + } + + //! Computes the transfer function for k in Mpc/h by calling TFfit_onek + virtual inline real_t compute( real_t k ){ + return TFmdm_onek_hmpc( k ); + + + /*real_t tfb, tfcdm, fb, fc; //, tfull + TFfit_onek( k*m_h0, &tfb, &tfcdm ); + + fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m); + fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ; + + return fb*tfb+fc*tfcdm;*/ + //return 1.0; + } + + +}; + +#endif + +#endif