major mod: added subgrid deconvolution for DM

* The DM transfer function is now deconvolved with the cell assignment function in order to restore power near the Nyquist wavenumber. * Added an option to change the random number sample cube size [random]/cubesize. This should be set >128 to generate un- correlated numbers. True fix pending. * Removed some unused code * Increased Buffer size for Gadget-2 output plug-in
2024-09-19 17:03:46 +02:00 · 2010-07-21 01:10:29 -07:00 · 2010-07-21 01:10:29 -07:00 · 002763346f
commit 002763346f
parent 2b8e08172c
12 changed files with 567 additions and 1377 deletions
--- a/convolution_kernel.cc
+++ b/convolution_kernel.cc
@ -65,6 +65,9 @@ namespace convolution{
 		
 		rfftwnd_plan	iplan, plan;
 		
+		std::cout << "   - Performing FFT convolution...\n";
+		
+		
 		plan  = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
 									FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
 		
@ -103,226 +106,9 @@ namespace convolution{
 	}
 	
 	
-	template< typename real_t >
-	void perform_filtered( kernel * pk, void *pd )
-	{
-		parameters cparam_ = pk->cparam_;
-		double 
-			kny, kmax,
-			fftnorm = pow(2.0*M_PI,1.5)/sqrt(cparam_.lx*cparam_.ly*cparam_.lz)/sqrt((double)(cparam_.nx*cparam_.ny*cparam_.nz));
-		
-		fftw_complex	*cdata,*ckernel;
-		fftw_real		*data;
-		
-		kny			= cparam_.nx*M_PI/cparam_.lx;
-		kmax        = M_PI/2.0;
-		
-		data		= reinterpret_cast<fftw_real*>(pd);
-		cdata		= reinterpret_cast<fftw_complex*>(data);
-		ckernel		= reinterpret_cast<fftw_complex*>( pk->get_ptr() );
-		
-		rfftwnd_plan	iplan, plan;
-		
-		plan  = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
-									FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
-		
-		iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
-									FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
-		
-		
-		#ifndef SINGLETHREAD_FFTW		
-		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL );
-		#else
-		rfftwnd_one_real_to_complex( plan, data, NULL );
-		#endif
-		
-		//double kmax2 = kmax*kmax;
-		
-		#pragma omp parallel for
-		for( int i=0; i<cparam_.nx; ++i )
-			for( int j=0; j<cparam_.ny; ++j )
-				for( int k=0; k<cparam_.nz/2+1; ++k )
-				{
-					unsigned ii = (i*cparam_.ny + j) * (cparam_.nz/2+1) + k;
-					
-					complex ccdata(cdata[ii].re,cdata[ii].im), cckernel(ckernel[ii].re,ckernel[ii].im);
-					
-					int ik(i), jk(j);
-					
-					if( ik > cparam_.nx/2 )
-						ik -= cparam_.nx;
-					if( jk > cparam_.ny/2 )
-						jk -= cparam_.ny;
-					
-					double 
-						kx( M_PI*(double)ik/cparam_.nx ),
-						ky( M_PI*(double)jk/cparam_.ny ),
-					kz( M_PI*(double)k/cparam_.nz );//,
-													//						kk( kx*kx+ky*ky+kz*kz );
-					
-					//... cos(k) is the Hanning filter function (cf. Bertschinger 2001)
-					double filter = 0.0;
-					if( true ){//kk <= kmax2 ){
-						//filter = cos( kx )*cos( ky )*cos( kz );
-						filter = cos( 0.5*kx )*cos( 0.5*ky )*cos( 0.5*kz );
-							   //filter = 1.0;
-						//filter *=filter;
-					}
-					//filter = 1.0;
-					
-					ccdata = ccdata * cckernel *fftnorm * filter;
-					
-					cdata[ii].re = ccdata.real();
-					cdata[ii].im = ccdata.imag();
-				}
-		
-		#ifndef SINGLETHREAD_FFTW		
-		rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL);
-		#else		
-		rfftwnd_one_complex_to_real(iplan, cdata, NULL);
-		#endif
-		
-		rfftwnd_destroy_plan(plan);
-		rfftwnd_destroy_plan(iplan);
-	}
-	
-	
 	template void perform<double>( kernel* pk, void *pd );
 	template void perform<float>( kernel* pk, void *pd );
-	template void perform_filtered<double>( kernel* pk, void *pd );
-	template void perform_filtered<float>( kernel* pk, void *pd );
 	
-	
-	void truncate( kernel* pk, double R, double alpha )
-	{
-		parameters cparam_ = pk->cparam_;
-
-		double 
-			dx		= cparam_.lx/cparam_.nx, 
-			dy		= cparam_.ly/cparam_.ny, 
-			dz		= cparam_.lz/cparam_.nz;
-		
-		double fftnorm = 1.0/(cparam_.nx * cparam_.ny * cparam_.nz);
-		
-		rfftwnd_plan	iplan, plan;
-		
-		plan  = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
-									FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
-		
-		iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
-									FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
-		
-		fftw_real		*rkernel	= reinterpret_cast<fftw_real*>( pk->get_ptr() );
-		fftw_complex	*ckernel	= reinterpret_cast<fftw_complex*>( pk->get_ptr() );
-		
-		#ifndef SINGLETHREAD_FFTW		
-		rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, ckernel, NULL);
-		#else		
-		rfftwnd_one_complex_to_real(iplan, ckernel, NULL);
-		#endif
-		
-		
-		#pragma omp parallel for
-		for( int i=0; i<cparam_.nx; ++i )
-			for( int j=0; j<cparam_.ny; ++j )
-				for( int k=0; k<cparam_.nz; ++k )
-				{
-					int iix(i), iiy(j), iiz(k);
-					double rr[3], rr2;
-					
-					if( iix > (int)cparam_.nx/2 ) iix -= cparam_.nx;
-					if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny;
-					if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz;
-					
-					rr[0] = (double)iix * dx;
-					rr[1] = (double)iiy * dy;
-					rr[2] = (double)iiz * dz;
-					
-					rr2 = rr[0]*rr[0] + rr[1]*rr[1] + rr[2]*rr[2];
-					
-					unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
-					rkernel[idx] *= 0.5*(erfc((sqrt(rr2)-R)*alpha))*fftnorm;
-				}
-		
-		
-		
-		#ifndef SINGLETHREAD_FFTW		
-		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL );
-		#else
-		rfftwnd_one_real_to_complex( plan, rkernel, NULL );
-		#endif
-		
-	}
-	
-	void truncate_sharp( kernel* pk, double R )
-	{
-		parameters cparam_ = pk->cparam_;
-		
-		double 
-		dx		= cparam_.lx/cparam_.nx, 
-		dy		= cparam_.ly/cparam_.ny, 
-		dz		= cparam_.lz/cparam_.nz,
-		R2		= R*R;
-		
-		double fftnorm = 1.0/(cparam_.nx * cparam_.ny * cparam_.nz);
-		
-		rfftwnd_plan	iplan, plan;
-		
-		plan  = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
-									FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
-		
-		iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
-									FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
-		
-		fftw_real		*rkernel	= reinterpret_cast<fftw_real*>( pk->get_ptr() );
-		fftw_complex	*ckernel	= reinterpret_cast<fftw_complex*>( pk->get_ptr() );
-		
-#ifndef SINGLETHREAD_FFTW		
-		rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, ckernel, NULL);
-#else		
-		rfftwnd_one_complex_to_real(iplan, ckernel, NULL);
-#endif
-		
-		
-#pragma omp parallel for
-		for( int i=0; i<cparam_.nx; ++i )
-			for( int j=0; j<cparam_.ny; ++j )
-				for( int k=0; k<cparam_.nz; ++k )
-				{
-					int iix(i), iiy(j), iiz(k);
-					double rr[3], rr2;
-					
-					if( iix > (int)cparam_.nx/2 ) iix -= cparam_.nx;
-					if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny;
-					if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz;
-					
-					rr[0] = (double)iix * dx;
-					rr[1] = (double)iiy * dy;
-					rr[2] = (double)iiz * dz;
-					
-					rr2 = rr[0]*rr[0] + rr[1]*rr[1] + rr[2]*rr[2];
-					unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
-					if( rr2 > R2 )
-					{
-						
-						rkernel[idx] = 0.0;
-					}else {
-						rkernel[idx] *= fftnorm;
-					}
-
-				}
-		
-		
-		
-#ifndef SINGLETHREAD_FFTW		
-		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL );
-#else
-		rfftwnd_one_real_to_complex( plan, rkernel, NULL );
-#endif
-		
-	}
-
-
 	/*****************************************************************************************\
 	 ***    SPECIFIC KERNEL IMPLEMENTATIONS      *********************************************
 	\*****************************************************************************************/	
@ -350,9 +136,6 @@ namespace convolution{
 		{	std::vector<real_t>().swap( kdata_ );	}
 		
 	};
-
-
-	
 	
 	
 	template< typename real_t >
@ -365,25 +148,41 @@ namespace convolution{
 			dy			= cparam_.ly/cparam_.ny, 
 			dz			= cparam_.lz/cparam_.nz,
 			boxlength	= cparam_.pcf->getValue<double>("setup","boxlength"),
-			nspec		= cparam_.pcf->getValue<double>("cosmology","nspec"),//cosmo.nspect,
-			pnorm		= cparam_.pcf->getValue<double>("cosmology","pnorm"),//cosmo.pnorm,
-		dplus		= cparam_.pcf->getValue<double>("cosmology","dplus");//,//cosmo.dplus;
-																		 //			t00f        = cparam_.t0scale;
+			nspec		= cparam_.pcf->getValue<double>("cosmology","nspec"),
+			pnorm		= cparam_.pcf->getValue<double>("cosmology","pnorm"),
+			dplus		= cparam_.pcf->getValue<double>("cosmology","dplus");
+
 		unsigned 
-		levelmax	= cparam_.pcf->getValue<unsigned>("setup","levelmax");
+			levelmax	= cparam_.pcf->getValue<unsigned>("setup","levelmax");
 		
 		bool
-		bperiodic	= cparam_.pcf->getValueSafe<bool>("setup","periodic_TF",true);
+			bperiodic	= cparam_.pcf->getValueSafe<bool>("setup","periodic_TF",true);
 		
-		//if( cparam_.normalize )
-		//	kny *= 2.0;
 		
-		TransferFunction_real *tfr = new TransferFunction_real(cparam_.ptf,nspec,pnorm,dplus,0.25*cparam_.lx/(double)cparam_.nx,2.0*boxlength,kny, (int)pow(2,levelmax+2));
+		std::cout << "   - Computing transfer function kernel...\n";
 		
-		if( bperiodic )
+		
+		
+		if(! cparam_.is_finest )
+			kny *= pow(2,cparam_.coarse_fact);
+		
+		TransferFunction_real *tfr = 
+				new TransferFunction_real(cparam_.ptf,nspec,pnorm,dplus,
+							0.25*cparam_.lx/(double)cparam_.nx,2.0*boxlength,kny, (int)pow(2,levelmax+2));
+		
+		fftw_real		*rkernel	= reinterpret_cast<fftw_real*>( &kdata_[0] );
+		fftw_complex	*kkernel	= reinterpret_cast<fftw_complex*>( &kdata_[0] );
+		
+		rfftwnd_plan	plan		= rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+												FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
+						
+		int nzp = cparam_.nz/2+1;
+		double kmax = 0.5*M_PI/std::max(cparam_.nx,std::max(cparam_.ny,cparam_.nz));
+		
+		if( bperiodic  )
 		{
 			
-			#pragma omp parallel for
+			#pragma omp parallel for 
 			for( int i=0; i<cparam_.nx; ++i )
 				for( int j=0; j<cparam_.ny; ++j )
 					for( int k=0; k<cparam_.nz; ++k )
@ -416,6 +215,22 @@ namespace convolution{
 						
 						kdata_[idx] *= fac;
 						
+						if( cparam_.deconvolve )
+						{
+							double rx((double)iix*dx/boxlength),ry((double)iiy*dy/boxlength),rz((double)iiz*dz/boxlength), ico;
+							ico = 1.0;
+							if( i>0 )
+								ico *= sin(M_PI*rx)/(M_PI*rx);
+							if( j>0 )
+								ico *= sin(M_PI*ry)/(M_PI*ry);
+							if( k>0 )
+								ico *= sin(M_PI*rz)/(M_PI*rz);
+							kdata_[idx] /= ico;
+							
+						}
+						
+						
+						
 					}
 		}else{
 			#pragma omp parallel for
@ -439,63 +254,103 @@ namespace convolution{
 						rr2 = rr[0]*rr[0]+rr[1]*rr[1]+rr[2]*rr[2];
 						kdata_[idx] = tfr->compute_real(rr2)*fac;
 						
+						if( cparam_.deconvolve )
+						{
+							double rx((double)iix*dx/boxlength),ry((double)iiy*dy/boxlength),rz((double)iiz*dz/boxlength), ico;
+							ico = 1.0;
+							if( i>0 )
+								ico *= sin(M_PI*rx)/(M_PI*rx);
+							if( j>0 )
+								ico *= sin(M_PI*ry)/(M_PI*ry);
+							if( k>0 )
+								ico *= sin(M_PI*rz)/(M_PI*rz);
+							kdata_[idx] /= ico;
+						}
 						
-					}
-		}		
-		//kdata_[0] = tfr->compute_real(dx*dx*0.25)*fac;
-		
-		//std::cerr << "T(r=0) = " << kdata_[0]/fac << std::endl;
-		//if( cparam_.normalize )
-		//	kdata_[0] *= 0.125;//= 0.0;//*= 0.125;
-		
-		//... determine normalization
-		double sum = 0.0;
-		#pragma omp parallel for reduction(+:sum)
-		for( int i=0; i<cparam_.nx; ++i )
-			for( int j=0; j<cparam_.ny; ++j )
-				for( int k=0; k<cparam_.nz; ++k )
-				{	
-					unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
-					//if( idx > 0 )
-						sum += kdata_[idx];
-				}
-		
-		
-		//std::cerr << " - The convolution kernel has avg of " << (sum+kdata_[0])/(cparam_.nx*cparam_.ny*cparam_.nz) << std::endl;
-		
-		sum /= cparam_.nx*cparam_.ny*cparam_.nz;//-1;
-		
-		if( false )//cparam_.normalize )
-		{
-			
-			#pragma omp parallel for
-			for( int i=0; i<cparam_.nx; ++i )
-				for( int j=0; j<cparam_.ny; ++j )
-					for( int k=0; k<cparam_.nz; ++k )
-					{
-						unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
-						//if( idx > 0 )
-							kdata_[idx] -= sum;
 					}
 		}
 		
+		if(!cparam_.is_finest)
+			kdata_[0] /= pow(2.0,1.5*cparam_.coarse_fact);
+				
+		delete tfr;
 		
-		//if( t00f < 0.0 )
-		//	kdata_[0] += (tfr->compute_real(0.125*dx*dx) - tfr->compute_real(0.0))*fac;
-		
-		fftw_real		*rkernel	= reinterpret_cast<fftw_real*>( &kdata_[0] );
-		rfftwnd_plan	plan		= rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
-												FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		double k0 = kdata_[0];
+			
+		if( cparam_.deconvolve )
+			kdata_[0] = 0.0;
+				
+		//kdata_[0] = tfr->compute_real(dx*dx*0.25)*fac;
+		//std::cerr << "T(r=0) = " << kdata_[0]/fac << std::endl;
 		
 		#ifndef SINGLETHREAD_FFTW		
 		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL );
 		#else
 		rfftwnd_one_real_to_complex( plan, rkernel, NULL );
 		#endif
-	
-		rfftwnd_destroy_plan(plan);
 		
-		delete tfr;
+		if( cparam_.deconvolve )
+		{
+			
+			double ksum = 0.0;
+			unsigned kcount = 0;
+			
+			#pragma omp parallel for reduction(+:ksum,kcount)
+			for( int i=0; i<cparam_.nx; ++i )
+				for( int j=0; j<cparam_.ny; ++j )
+					for( int k=0; k<nzp; ++k )
+					{
+						double kx,ky,kz;
+						
+						kx = (double)i;
+						ky = (double)j;
+						kz = (double)k;
+						
+						if( kx > cparam_.nx/2 ) kx -= cparam_.nx;
+						if( ky > cparam_.ny/2 ) ky -= cparam_.ny;
+						
+
+						double ipix = 1.0;
+						
+						if( !cparam_.is_finest )
+							ipix *= (cos(kx*kmax)*cos(ky*kmax)*cos(kz*kmax));
+
+						if( i > 0 )
+							ipix /= sin(kx*2.0*kmax)/(kx*2.0*kmax);
+						if( j > 0 )
+							ipix /= sin(ky*2.0*kmax)/(ky*2.0*kmax);
+						if( k > 0 )
+							ipix /= sin(kz*2.0*kmax)/(kz*2.0*kmax);
+						
+						unsigned q  = (i*cparam_.ny+j)*nzp+k;
+						kkernel[q].re *= ipix;
+						kkernel[q].im *= ipix;
+						
+						if( k==0 || k==cparam_.nz/2 )
+						{
+							ksum  += kkernel[q].re;
+							kcount++;
+						}else{
+							ksum  += 2.0*(kkernel[q].re);
+							kcount+=2;
+						}
+					}
+			
+			double dk = k0-ksum/kcount;
+			
+			//... enforce the r=0 component by adjusting the k-space mean
+			#pragma omp parallel for reduction(+:ksum,kcount)
+			for( int i=0; i<cparam_.nx; ++i )
+				for( int j=0; j<cparam_.ny; ++j )
+					for( int k=0; k<nzp; ++k )
+					{
+						unsigned q  = (i*cparam_.ny+j)*nzp+k;
+						kkernel[q].re += dk;
+						
+					}
+		}		
+		
+		rfftwnd_destroy_plan(plan);
 	}
 	

@ -529,11 +384,7 @@ namespace convolution{
 	void kernel_k<real_t>::compute_kernel( void )
 	{
 		double 
-		//kny			= cparam_.nx*M_PI/cparam_.lx,
-		fac			= cparam_.lx*cparam_.ly*cparam_.lz/pow(2.0*M_PI,3),// /(cparam_.nx*cparam_.ny*cparam_.nz),
-																	   //	dx			= cparam_.lx/cparam_.nx, 
-																	   //			dy			= cparam_.ly/cparam_.ny, 
-																	   //			dz			= cparam_.lz/cparam_.nz,
+			fac			= cparam_.lx*cparam_.ly*cparam_.lz/pow(2.0*M_PI,3),
 			boxlength	= cparam_.pcf->getValue<double>("setup","boxlength"),
 			nspec		= cparam_.pcf->getValue<double>("cosmology","nspec"),
 			pnorm		= cparam_.pcf->getValue<double>("cosmology","pnorm"),
--- a/convolution_kernel.hh
+++ b/convolution_kernel.hh
@ -40,7 +40,9 @@ namespace convolution{
 		double lx,ly,lz;//,boxlength;
 		config_file *pcf;
 		transfer_function* ptf;
-		bool normalize;
+		unsigned coarse_fact;
+		bool deconvolve;
+		bool is_finest;
 	};

 	
@ -97,18 +99,6 @@ namespace convolution{
 	template< typename real_t >
 	void perform( kernel* pk, void *pd );

-	//! actual implementation of the FFT convolution (independent of the actual kernel)
-	//! with Hann (cf. Bertschinger 2001) window filter applied
-	template< typename real_t >
-	void perform_filtered( kernel* pk, void *pd );
-	
-	
-	//! truncate the kernel softly at R, used for boundary corrections
-	void truncate( kernel*pk, double R, double alpha );
-	
-	//! truncate the kernel sharply at R, used for boundary corrections
-	void truncate_sharp( kernel*pk, double R );
-	
 }; //namespace convolution


--- a/densities.cc
+++ b/densities.cc
@ -24,6 +24,12 @@
 #include "densities.hh"
 #include "convolution_kernel.hh"

+//... uncomment this to have a single peak in the centre and otherwise zeros
+//#define SINGLE_PEAK
+
+//TODO: this should be a larger number by default, just to maintain consistency with old default
+#define DEF_RAN_CUBE_SIZE	32
+
 bool is_number(const std::string& s)
 {
 	for (unsigned i = 0; i < s.length(); i++)
@ -34,10 +40,13 @@ bool is_number(const std::string& s)
 }

 // TODO: use optimized convolution routine when in unigrid mode 
-void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type, refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace )
+void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type, 
+							refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace, bool bdeconvolve )
 {
 	unsigned    levelmin,levelmax,levelminPoisson;
 	real_t		boxlength;
+	unsigned	ran_cube_size;
+	
 	std::vector<long> rngseeds;
 	std::vector<std::string> rngfnames;
 	
@ -47,7 +56,9 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
 	levelmax	= cf.getValue<unsigned>("setup","levelmax");
 	boxlength   = cf.getValue<real_t>( "setup", "boxlength" );
 	
-	std::cerr << " RUNNING UNIGRID VERSION\n";
+	ran_cube_size = cf.getValueSafe<unsigned>("random","cubesize",DEF_RAN_CUBE_SIZE);
+	
+	std::cerr << " - Running unigrid version\n";
 	
 	//... parse random number options
 	for( int i=0; i<=100; ++i )
@ -89,6 +100,8 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
 	
 	if( kspace )
 	{
+		std::cout << " - Using k-space transfer function kernel.\n";
+		
 		#ifdef SINGLE_PRECISION	
 		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_float" ];
 		#else
@ -97,6 +110,8 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
 	}
 	else
 	{
+		std::cout << " - Using real-space transfer function kernel.\n";
+		
 		#ifdef SINGLE_PRECISION	
 		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ];
 		#else
@ -136,7 +151,7 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
 	x0[0] /= pow(2,levelmin); x0[1] /= pow(2,levelmin); x0[2] /= pow(2,levelmin);
 	lx[0] /= pow(2,levelmin); lx[1] /= pow(2,levelmin); lx[2] /= pow(2,levelmin);
 	
-	random_numbers<real_t> *rc = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], true );//, x0, lx );
+	random_numbers<real_t> *rc = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], true );//, x0, lx );


 	if( shift[0]!=0||shift[1]!=0||shift[2]!=0 )
@ -146,13 +161,20 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
 	rc->fill_all(*top);
 	delete rc;
 	
+#ifdef SINGLE_PEAK
+	top->zero();
+	(*top)(top->size(0)/2, top->size(1)/2, top->size(2)/2) = 1.0;
+#endif
+	
 	conv_param.lx = boxlength;
 	conv_param.ly = boxlength;
 	conv_param.lz = boxlength;
 	conv_param.nx = top->nx_;
 	conv_param.ny = top->ny_;
 	conv_param.nz = top->nz_;
-	conv_param.normalize = false;
+	conv_param.coarse_fact = 0;
+	conv_param.deconvolve = bdeconvolve;
+	conv_param.is_finest = true;	
 	
 	convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
 	convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
@ -198,27 +220,32 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
 	}
 }

-void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type type, refinement_hierarchy& refh, grid_hierarchy& delta )
+void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type type, 
+							  refinement_hierarchy& refh, grid_hierarchy& delta, bool bdeconvolve=true )
 {
 	unsigned    levelmin,levelmax,levelminPoisson;
 	real_t		boxlength;
 	std::vector<long> rngseeds;
 	std::vector<std::string> rngfnames;
-	bool force_shift(false);
-	
+	bool force_shift(false), kspaceTF;
+	unsigned	ran_cube_size;
 	
 	levelminPoisson	= cf.getValue<unsigned>("setup","levelmin");
-	levelmin	= cf.getValueSafe<unsigned>("setup","levelmin_TF",levelminPoisson);
-	levelmax	= cf.getValue<unsigned>("setup","levelmax");
-	boxlength   = cf.getValue<real_t>( "setup", "boxlength" );
-	force_shift	= cf.getValueSafe<bool>("setup", "force_shift", force_shift );
+	levelmin		= cf.getValueSafe<unsigned>("setup","levelmin_TF",levelminPoisson);
+	levelmax		= cf.getValue<unsigned>("setup","levelmax");
+	boxlength		= cf.getValue<real_t>( "setup", "boxlength" );
+	force_shift		= cf.getValueSafe<bool>("setup", "force_shift", force_shift );
+	kspaceTF		= cf.getValueSafe<bool>("setup", "kspace_TF", false);
+	ran_cube_size	= cf.getValueSafe<unsigned>("random","cubesize",DEF_RAN_CUBE_SIZE);
+	
 	
 	// TODO: need to make sure unigrid gets called whenever possible
-	/*if( levelmin == levelmax && levelmin==levelminPoisson )
+	// FIXME: temporarily disabled
+	if( false )//levelmin == levelmax && levelmin==levelminPoisson )
 	{	
-		GenerateDensityUnigrid(cf,ptf,type,refh,delta);
+		GenerateDensityUnigrid(cf,ptf,type,refh,delta,kspaceTF,bdeconvolve);
 		return;
-	}*/
+	}
 	
 	
 	//... parse random number options
@ -253,10 +280,6 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 	float lxref[3];
 	std::string temp	= cf.getValue<std::string>( "setup", "ref_extent" );
 	sscanf( temp.c_str(), "%g,%g,%g", &lxref[0],&lxref[1],&lxref[2] );
-	//double      lextmin = std::min(lxref[0],std::min(lxref[1],lxref[2]));
-	// alpha = 0.1*..., cutoff = 0.25 /// maybe 0.08, 0.3
-	real_t		alpha	= 0.05*(real_t)nbase/boxlength;//0.45*lextmin*(double)nbase;///boxlength;
-	real_t		cutoff	= 0.25;//lextmin*0.9;
 	
 	
 	int shift[3];
@ -317,7 +340,7 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 	//... if random numbers are not given for lower levels, obtain them by averaging
 	if( lmingiven >= (int)levelmin )
 	{
-		randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), 32, rngseeds[lmingiven], true );//, x0, lx );
+		randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), ran_cube_size, rngseeds[lmingiven], true );//, x0, lx );
 		
 		for( int ilevel = lmingiven-1; ilevel >= (int)levelmin; --ilevel ){
 			if( rngseeds[ilevel-levelmin] > 0 )
@ -333,7 +356,7 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 	{
 		throw std::runtime_error("You provided a seed for a level below levelmin, this is not supported yet.");
 		
-		randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), 32, rngseeds[lmingiven], true );//, x0, lx );
+		randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), ran_cube_size, rngseeds[lmingiven], true );//, x0, lx );
 		
 		for( int ilevel = lmingiven+1; ilevel <= (int)levelmin; ++ilevel )
 		{
@ -376,7 +399,7 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 		if( levelminPoisson == levelmin && !force_shift)
 		{
 			if( randc[levelmin] == NULL )
-				rc = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], true );
+				rc = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], true );
 			else
 				rc = randc[levelmin];
 			
@ -392,10 +415,10 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			int lx[3] = { refh.size(levelmin,0), refh.size(levelmin,1), refh.size(levelmin,2) };
 			
 			if( randc[levelmin] == NULL )
-				rc = randc[levelmin] = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], x0, lx );
+				rc = randc[levelmin] = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], x0, lx );
 			//
 			//if( randc[levelmin] == NULL )
-			//	rc = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], true );
+			//	rc = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], true );
 			else
 				rc = randc[levelmin];
 			
@ -404,13 +427,20 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type

 		delete rc;
 		
+#ifdef SINGLE_PEAK
+		top->zero();
+		(*top)(top->size(0)/2, top->size(1)/2, top->size(2)/2) = 1.0;
+#endif
+		
 		conv_param.lx = boxlength;
 		conv_param.ly = boxlength;
 		conv_param.lz = boxlength;
 		conv_param.nx = top->nx_;
 		conv_param.ny = top->ny_;
 		conv_param.nz = top->nz_;
-		conv_param.normalize = false;
+		conv_param.coarse_fact = 0;
+		conv_param.deconvolve = bdeconvolve;
+		conv_param.is_finest = true;	
 		
 		convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
 		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
@ -420,7 +450,6 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 		top->copy( *delta.get_grid(levelmin) );
 		delete top;
 	}
-	//}		
 		
 	
 	for( int i=0; i< (int)levelmax-(int)levelmin; ++i )
@ -444,7 +473,7 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 				refh.size(levelmin,2) };
 			
 			if( randc[levelmin] == NULL )
-				rc = randc[levelmin] = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], x0, lx );
+				rc = randc[levelmin] = new random_numbers<real_t>( nbase, ran_cube_size, rngseeds[levelmin], x0, lx );
 			else
 				rc = randc[levelmin];
 			
@ -465,39 +494,28 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			random_numbers<real_t> *rc;
 			int x0[3],lx[3];
 			int lfac = (int)pow(2.0,i+1);
-			x0[0] = refh.offset_abs(levelmin+i+1,0)-lfac*shift[0]; //((real_t)(offtotx[levelmin+i+1]+lfac*shift[0]))/pow(2.0,levelmin+i+1);
-			x0[1] = refh.offset_abs(levelmin+i+1,1)-lfac*shift[1]; //((real_t)(offtoty[levelmin+i+1]+lfac*shift[1]))/pow(2.0,levelmin+i+1);
-			x0[2] = refh.offset_abs(levelmin+i+1,2)-lfac*shift[2]; //((real_t)(offtotz[levelmin+i+1]+lfac*shift[2]))/pow(2.0,levelmin+i+1);
-			lx[0] = refh.size(levelmin+i+1,0); // /pow(2.0,levelmin+i+1);
-			lx[1] = refh.size(levelmin+i+1,1); // /pow(2.0,levelmin+i+1);
-			lx[2] = refh.size(levelmin+i+1,2); // /pow(2.0,levelmin+i+1);
-			
-			//x0[0] -= 0.5*lx[0];  lx[0] *= 2.0;
-			//x0[1] -= 0.5*lx[1];  lx[1] *= 2.0;
-			//x0[2] -= 0.5*lx[2];  lx[2] *= 2.0;
+			x0[0] = refh.offset_abs(levelmin+i+1,0)-lfac*shift[0]; 
+			x0[1] = refh.offset_abs(levelmin+i+1,1)-lfac*shift[1]; 
+			x0[2] = refh.offset_abs(levelmin+i+1,2)-lfac*shift[2]; 
+			lx[0] = refh.size(levelmin+i+1,0); 
+			lx[1] = refh.size(levelmin+i+1,1); 
+			lx[2] = refh.size(levelmin+i+1,2); 
 			
 			if( randc[levelmin+i+1] == NULL )
-				rc = randc[levelmin+i+1] = new random_numbers<real_t>((unsigned)pow(2,levelmin+i+1), 32, rngseeds[levelmin+i+1], x0, lx);
+				rc = randc[levelmin+i+1] = new random_numbers<real_t>((unsigned)pow(2,levelmin+i+1), ran_cube_size, rngseeds[levelmin+i+1], x0, lx);
 			else
 				rc = randc[levelmin+i+1];
 			
+			fine->fill_rand( rc, 1.0, x0[0]-fine->nx_/4, x0[1]-fine->ny_/4, x0[2]-fine->nz_/4 );
+				
+			if( i+levelmin+1 > (unsigned)lmingiven )
+			{	
+				if(i==0)
+					fine->constrain( *top );
+				else
+					fine->constrain( *coarse );
+			}				
 			
-			{
-				/*int llfac = (int)pow(2.0,i+1);
-				fine->fill_rand( rc, 1.0,	offtotx[levelmin+i+1]-llfac*shift[0], 
-								offtoty[levelmin+i+1]-llfac*shift[1], 
-								offtotz[levelmin+i+1]-llfac*shift[2] );*/
-				
-				fine->fill_rand( rc, 1.0, x0[0]-fine->nx_/4, x0[1]-fine->ny_/4, x0[2]-fine->nz_/4 );
-				
-				if( i+levelmin+1 > (unsigned)lmingiven )
-				{	
-					if(i==0)
-						fine->constrain( *top );
-					else
-						fine->constrain( *coarse );
-				}				
-			}
 			delete rc;
 			rc = NULL;
 		}
@ -515,25 +533,33 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			
 			delta.create_base_hierarchy(levelmin);
 			
+#ifdef SINGLE_PEAK
+			{
+				top->zero();
+				(*top)(top->size(0)/2, top->size(1)/2, top->size(2)/2) = 1.0/pow(2,1.5*(levelmax-levelmin));
+			}
+
+#endif	
+			
 			DensityGrid<real_t> top_save( *top );
-			
-			
+
 			conv_param.lx = boxlength;
 			conv_param.ly = boxlength;
 			conv_param.lz = boxlength;
 			conv_param.nx = top->nx_;
 			conv_param.ny = top->ny_;
 			conv_param.nz = top->nz_;
-			conv_param.normalize = true;
-			
+			conv_param.coarse_fact = levelmax-levelmin;
+			conv_param.deconvolve = bdeconvolve;
+			conv_param.is_finest = false;
 			convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
 			
 			
 			//... 1) compute standard convolution for levelmin
 			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
-			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
 			top->copy( *delta.get_grid(levelmin) );
 			
+			
 			//... 2) compute contribution to finer grids from non-refined region
 			*top = top_save;
 			top_save.clear();
@ -541,24 +567,20 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 							  refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 );

 			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
-			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
-			
 			
+			delete the_tf_kernel;
+
 			
 			meshvar_bnd delta_longrange( *delta.get_grid(levelmin) );
 			top->copy( delta_longrange );
-			delete the_tf_kernel;
-			delete top;
+			delete top;			
 			
 			//... restrict these contributions to the next level
 			delta.add_patch( refh.offset(levelmin+1,0), refh.offset(levelmin+1,1), refh.offset(levelmin+1,2), 
 							refh.size(levelmin+1,0), refh.size(levelmin+1,1), refh.size(levelmin+1,2) );
 			
-			//mg_linear().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
 			mg_cubic().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
-			//mg_lin().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
-			
-			
+					
 		}
 		else
 		{
@ -571,7 +593,6 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			delta.add_patch( refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
 							refh.size(levelmin+i+1,0), refh.size(levelmin+i+1,1), refh.size(levelmin+i+1,2) );
 			
-			//mg_linear().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) );
 			mg_cubic().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) );
 			
 			real_t dx,lx,ly,lz;
@ -581,81 +602,46 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			ly = dx * coarse->ny_; 
 			lz = dx * coarse->nz_; 
 			
-			real_t lmin = std::min( lx, std::min(ly,lz) );
-			
-			
+			//.. set convolution parameters
+			//TODO: this needs to be changed, forgetting to set a parameter will not be warned!
 			conv_param.lx = lx;
 			conv_param.ly = ly;
 			conv_param.lz = lz;
 			conv_param.nx = coarse->nx_;
 			conv_param.ny = coarse->ny_;
 			conv_param.nz = coarse->nz_;
-			conv_param.normalize = false;
+			conv_param.coarse_fact = levelmax-levelmin-i;
+			conv_param.deconvolve = bdeconvolve;
+			conv_param.is_finest = false;	
 			
 			convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
 			
 			PaddedDensitySubGrid<real_t> coarse_save( *coarse );
 					
-			//... 2) the inner region
-			
-			coarse->zero_boundary();
+			//... 1) the inner region
+			coarse->subtract_boundary_oct_mean();
 			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-			
-			
 			coarse->copy_add_unpad( *delta.get_grid(levelmin+i) );
 			
 			
-			//... 3) the 'BC' for the next finer grid
+			//... 2) the 'BC' for the next finer grid
 			*coarse = coarse_save;
-
+			coarse->subtract_boundary_oct_mean();
 			coarse->zero_subgrid(refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
 								 refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 );
-			coarse->zero_boundary();
 			
 			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-			
-			
 			
 			meshvar_bnd delta_longrange( *delta.get_grid(levelmin+i) );
 			coarse->copy_unpad( delta_longrange );
-			//mg_linear().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
 			mg_cubic().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
 			
-			
-#if 1
-			//... FFT (isolated) boundary noise contribution
-			//convolution::truncate( the_tf_kernel, cutoff*lmin, 1e-8*alpha );
-			convolution::truncate_sharp( the_tf_kernel, cutoff*lmin );
+
+			//... 3) the coarse-grid correction
 			*coarse = coarse_save;
-			//coarse_save.clear();
-			coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
-			coarse->subtract_oct_mean();
-			
-			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-			
-			coarse->copy_add_unpad( *delta.get_grid(levelmin+i) );
-#endif
-			
-#if 0
-			*coarse = coarse_save;
-			coarse->zero_boundary();
 			coarse->subtract_oct_mean();
 			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-			//coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
-			
 			coarse->upload_bnd_add( *delta.get_grid(levelmin+i-1) );
-#endif
-			
-			/**coarse = coarse_save;
-			coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
-			coarse->set_to_oct_mean();
-			
-			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-			coarse->copy_subtract_unpad( *delta.get_grid(levelmin+i) );*/
-			
 			
 			delete the_tf_kernel;
 			delete coarse;
@ -679,63 +665,61 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 		lx = dx * coarse->nx_; 
 		ly = dx * coarse->ny_; 
 		lz = dx * coarse->nz_; 
-		real_t lmin = std::min( lx, std::min(ly,lz) );
-		
 		
+		//... set convolution parameters
 		conv_param.lx = lx;
 		conv_param.ly = ly;
 		conv_param.lz = lz;
 		conv_param.nx = coarse->nx_;
 		conv_param.ny = coarse->ny_;
 		conv_param.nz = coarse->nz_;
-		conv_param.normalize = false;
+		conv_param.coarse_fact = 0;
+		conv_param.deconvolve = bdeconvolve;
+		conv_param.is_finest = true;	
+		
+#ifdef SINGLE_PEAK
+		{
+			coarse->zero();
+			
+			int 
+				i0 = pow(2,levelmax)/2 - refh.offset_abs(levelmax,0) + coarse->nx_/4,
+				i1 = pow(2,levelmax)/2 - refh.offset_abs(levelmax,1) + coarse->nx_/4,
+				i2 = pow(2,levelmax)/2 - refh.offset_abs(levelmax,2) + coarse->nx_/4;
+		
+				(*coarse)(i0,i1,i2) = 1.0;
+		}
+#endif
+		
+		//... 1) grid self-contributio
 		
-		//... with LR/SR splitting and full subgrid		
 		PaddedDensitySubGrid<real_t> coarse_save( *coarse );
 		
+		//... create convolution kernel
 		convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
-		//... 2) the inner region
 		
-		coarse->zero_boundary();
-		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-		coarse->copy_add_unpad( *delta.get_grid(levelmax) );
-		
-		
-		//... compute convolution for isolated grid
-		//... 1) the padded boundary region
-	
-	
-		
-		
-#if 1
-		//... boundary correction
-		convolution::truncate( the_tf_kernel, cutoff*lmin, alpha );
-		//convolution::truncate_sharp( the_tf_kernel, cutoff*lmin );
-		
-		
-		*coarse = coarse_save;
-		coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
-		coarse->subtract_oct_mean();
-		
-		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
-		coarse->copy_add_unpad( *delta.get_grid(levelmax) );
+		//... subtract oct mean on boundary but not in interior
+		coarse->subtract_boundary_oct_mean();

-#endif
-#if 0
-		// coarse correction
-		*coarse = coarse_save;
-		coarse->zero_boundary();
-		coarse->subtract_oct_mean();
+		//... perform convolution
 		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
 		
+		//... copy to grid hierarchy
+		coarse->copy_add_unpad( *delta.get_grid(levelmax) );
+		
+				
+		//... 2) boundary correction to top grid
+		*coarse = coarse_save;
+		
+		//... subtract oct mean
+		coarse->subtract_oct_mean();
+		
+		
+		//... perform convolution
+		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+		
+		//... upload data to coarser grid
 		coarse->upload_bnd_add( *delta.get_grid(levelmax-1) );
 		
-		//	meshvar_bnd delta_longrange( *delta.get_grid(levelmax) );
-		//coarse->copy_unpad( delta_longrange );
-		//mg_straight().restrict_add( delta_longrange, *delta.get_grid(levelmax-1) );
-#endif	
-		
-		
 		delete the_tf_kernel;		
 		delete coarse;
 	}
@ -751,11 +735,7 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			}
 	}
 	
-	//... enforce mean condition
-	//for( int i=levelmin; i<(int)levelmax; ++i )
-		//	enforce_mean( (*delta.get_grid(i+1)), (*delta.get_grid(i)) );
-	 
-#if 1
+	
 	//... subtract the box mean.... this will otherwise add
 	//... a constant curvature term to the potential
 	double sum = 0.0;
@ -788,9 +768,9 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			for( int iy=0; iy<ny; ++iy )
 				for( int iz=0; iz<nz; ++iz )
 					(*delta.get_grid(i))(ix,iy,iz) -= sum;
+		
 	}
-#endif	
-	
+
 	//... fill coarser levels with data from finer ones...
 	for( int i=levelmax; i>0; --i )
 		mg_straight().restrict( (*delta.get_grid(i)), (*delta.get_grid(i-1)) );
@ -800,6 +780,7 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type

 void normalize_density( grid_hierarchy& delta )
 {
+	
 	double sum = 0.0;
 	unsigned levelmin = delta.levelmin(), levelmax = delta.levelmax();
 	
--- a/densities.hh
+++ b/densities.hh
@ -39,6 +39,8 @@
 	#endif
 #endif

+#include <assert.h>
+
 #include "config_file.hh"
 #include "random.hh"
 #include "cosmology.hh"
@ -46,10 +48,10 @@


 void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type type, 
-							  refinement_hierarchy& refh, grid_hierarchy& delta );
+							  refinement_hierarchy& refh, grid_hierarchy& delta, bool bdeconvolve );

 void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type, 
-							refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace=false );
+							refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace, bool deconvolve );

 void normalize_density( grid_hierarchy& delta );

@ -109,6 +111,26 @@ public:
 		std::vector<real_t>().swap(data_);
 	}
 	
+	//! query the 3D array sizes of the density object
+	/*! returns the size of the 3D density object along a specified dimension
+	 * @params i the dimension for which size is to be returned
+	 * @returns array size along dimension i
+	 */
+	int size( int i )
+	{
+		if(i==0) return nx_;
+		if(i==1) return ny_;
+		return ny_;
+	}
+	
+	//! zeroes the density object
+	/*! sets all values to 0.0
+	 */
+	void zero( void )
+	{
+		data_.assign(data_.size(),0.0);
+	}
+	
 	//! assigns the contents of another DensityGrid to this
 	DensityGrid& operator=( const DensityGrid<real_t>& g )
 	{
@ -141,9 +163,9 @@ public:
 	 *  @params i0 x-offset (shift) in cells of the density field with respect to the random number field
 	 *  @params j0 y-offset (shift) in cells of the density field with respect to the random number field
 	 *  @params k0 z-offset (shift) in cells of the density field with respect to the random number field
-	 *  @params zero boolean, if true, the global mean will be subtracted
+	 *  @params setzero boolean, if true, the global mean will be subtracted
 	 */
-	void fill_rand( /*const*/ random_numbers<real_t>* prc, real_t variance, int i0, int j0, int k0, bool zero=false )
+	void fill_rand( /*const*/ random_numbers<real_t>* prc, real_t variance, int i0, int j0, int k0, bool setzero=false )
 	{
 		double sum = 0.0;
 		
@ -158,7 +180,7 @@ public:
 		
 		sum /= nx_*ny_*nz_;
 		
-		if( zero )
+		if( setzero )
 		{
 			#pragma omp parallel for
 			for( int i=0; i<nx_; ++i )
@ -191,6 +213,32 @@ public:
 					v(ix,iy,iz) += (*this)(ix,iy,iz);
 	}
 	
+	//! subtract the mean of the field 
+	/*! subtracting the total mean implies that a restriction does not change the mass
+	 */
+	void subtract_mean( void )
+	{
+		double sum = 0.0;
+		unsigned count;
+		
+		for( int i=0; i<nx_; i++ )
+			for( int j=0; j<ny_; j++ )
+				for( int k=0; k<nz_; k++ )
+				{
+					sum += (*this)(i,j,k);
+					count++;
+				}
+		sum /= count;
+		
+					
+					
+		for( int i=0; i<nx_; i++ )
+			for( int j=0; j<ny_; j++ )
+				for( int k=0; k<nz_; k++ )
+					(*this)(i,j,k)			-= sum;
+		
+	}
+	
 	//! subtract the mean of each oct of the field 
 	/*! subtracting the mean of each oct implies that a restriction of the field to the coarser level
 	 *  is zero everywhere (needed for Hoffman-Ribak constraints)
@ -691,6 +739,72 @@ public:
 					v(ixu,iyu,izu) -= (*this)(ix,iy,iz);
 	}
 	
+	
+	double oct_mean( int i, int j, int k )
+	{
+		
+		return 0.125*((*this)(i,j,k)+(*this)(i+1,j,k)+(*this)(i,j+1,k)+(*this)(i,j,k+1)
+					  +(*this)(i+1,j+1,k)+(*this)(i+1,j,k+1)+(*this)(i,j+1,k+1)+(*this)(i+1,j+1,k+1));
+	}
+	
+	void set_oct_mean( int i, int j, int k, double val )
+	{
+		(*this)(i,j,k)			= val;
+		(*this)(i+1,j,k)		= val;
+		(*this)(i,j+1,k)		= val;
+		(*this)(i,j,k+1)		= val;
+		(*this)(i+1,j+1,k)		= val;
+		(*this)(i+1,j,k+1)		= val;
+		(*this)(i,j+1,k+1)		= val;
+		(*this)(i+1,j+1,k+1)	= val;
+	}
+	
+	void add_oct_val( int i, int j, int k, double val )
+	{
+		(*this)(i,j,k)		+= val;
+		(*this)(i+1,j,k)		+= val;
+		(*this)(i,j+1,k)		+= val;
+		(*this)(i,j,k+1)		+= val;
+		(*this)(i+1,j+1,k)	+= val;
+		(*this)(i+1,j,k+1)	+= val;
+		(*this)(i,j+1,k+1)	+= val;
+		(*this)(i+1,j+1,k+1)	+= val;
+	}
+	
+	void subtract_boundary_oct_mean( void )
+	{
+		#pragma omp parallel for
+		for( int ix=0; ix<nx_/4-1; ix+=2 )
+		{	
+			for( int iy=0; iy<ny_; iy+=2 )
+				for( int iz=0; iz<nz_; iz+=2 )
+				{
+					add_oct_val( ix,iy,iz, -oct_mean(ix,iy,iz) );
+					add_oct_val( ix+3*nx_/4,iy,iz, -oct_mean(ix+3*nx_/4,iy,iz) );
+				}
+		}
+		
+		#pragma omp parallel for
+		for( int ix=0; ix<nx_; ix+=2 )
+			for( int iy=0; iy<ny_/4-1; iy+=2 )
+			{
+				for( int iz=0; iz<nz_; iz+=2 )
+				{
+					add_oct_val( ix,iy,iz, -oct_mean(ix,iy,iz) );
+					add_oct_val( ix,iy+3*ny_/4,iz, -oct_mean(ix,iy+3*ny_/4,iz) );
+				}
+			}
+		
+		#pragma omp parallel for
+		for( int ix=0; ix<nx_; ix+=2 )
+			for( int iy=0; iy<ny_; iy+=2 )
+				for( int iz=0; iz<nz_/4-1; iz+=2 )
+				{
+					add_oct_val( ix,iy,iz, -oct_mean(ix,iy,iz) );
+					add_oct_val( ix,iy,iz+3*nz_/4, -oct_mean(ix,iy,iz+3*nz_/4) );
+				}
+	}
+	
 	void zero_boundary( void )
 	{
 		for( int ix=0; ix<nx_/4; ++ix )
@ -806,6 +920,9 @@ inline void enforce_mean( M& v, M& V )
 	finemean /= count;
 	
 	double dmean = coarsemean-finemean;
+	dmean = dmean/sqrt(2.0);
+	
+	std::cerr << " - enforce_mean correction : fine = " << finemean << ", coarse = " << coarsemean << ", diff = " << dmean << std::endl;
 	
 #pragma omp parallel for reduction(+:coarsemean,finemean)
 	for( int i=0; i<nx; ++i )
--- a/main.cc
+++ b/main.cc
@ -99,26 +99,26 @@ void modify_grid_for_TF( const refinement_hierarchy& rh_full, refinement_hierarc
 	
 	for( unsigned i=lbase+1; i<=lmax; ++i )
 	{
-		int x0[3], lx[3], lmax = 0;
+		int x0[3], lx[3], lxmax = 0;
 		
 		for( int j=0; j<3; ++j )
 		{
 			lx[j] = rh_TF.size(i,j)+2*pad;
 			x0[j] = rh_TF.offset_abs(i,j)-pad;
 			
-			if( lx[j] > lmax )
-				lmax = lx[j];
+			if( lx[j] > lxmax )
+				lxmax = lx[j];
 		}
 		
-		if( lmax%2 == 1 )
-			lmax+=1;
+		if( lxmax%2 == 1 )
+			lxmax+=1;
 		
 		for( int j=0; j<3; ++j )
 		{
-			double dl = 0.5*((double)(lmax-lx[j]));
-			int add_left = ceil(dl);
+			double dl = 0.5*((double)(lxmax-lx[j]));
+			int add_left = (int)ceil(dl);
 			
-			lx[j] = lmax;
+			lx[j] = lxmax;
 			x0[j] -= add_left;
 		}
 		
@ -155,6 +155,10 @@ void coarsen_density( const refinement_hierarchy& rh, grid_hierarchy& u )
 						rh.size(i,0), rh.size(i,1), rh.size(i,2) );
 		}
 	}
+	
+	for( int i=rh.levelmax(); i>0; --i )
+		mg_straight().restrict( *(u.get_grid(i)), *(u.get_grid(i-1)) );
+
 }

 void store_grid_structure( config_file& cf, const refinement_hierarchy& rh )
@ -405,106 +409,48 @@ int main (int argc, const char * argv[])
 	try{
 		if( ! do_2LPT )
 		{
-			//.. don't use 2LPT	...
-		
-			if( !the_transfer_function_plugin->tf_is_distinct() || !do_baryons )
-			{	
-				std::cerr << " - Computing ICs from the total matter power spectrum..." << std::endl;
-				
-				grid_hierarchy f( nbnd ), u(nbnd);
+			//... cdm density and displacements
+			std::cout << "=============================================================\n";
+			std::cout << "   COMPUTING DARK MATTER PARTICLE ICs\n";
+			std::cout << "-------------------------------------------------------------\n";
+			
+			
+			grid_hierarchy f( nbnd ), u(nbnd);
+			
+			GenerateDensityHierarchy(	cf, the_transfer_function_plugin, cdm , rh_TF, f, true );
+			coarsen_density(rh_Poisson, f);
+			normalize_density(f);
+			
+			u = f;	u.zero();
+			
+			err = the_poisson_solver->solve(f, u);

-				
-				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, total , rh_TF, f );
-				
-				coarsen_density(rh_Poisson, f);
-				normalize_density(f);
-				
-				
-				u = f;	u.zero();
-				err = the_poisson_solver->solve(f, u);
-				
-				
-	#if 0
-				grid_hierarchy u2( nbnd );
-				compute_Lu( u, u2 );
-				u=u2;
-	#endif
-				
-				the_output_plugin->write_dm_density(f);
-				the_output_plugin->write_dm_mass(f);
-				
-				if( do_baryons )
-				{
-					if( do_LLA )
-						compute_LLA_density( u, f, grad_order );
-					
-					the_output_plugin->write_gas_density(f);
-				}
-				f.deallocate();
-				
-				the_output_plugin->write_dm_potential(u);
-				
+			the_output_plugin->write_dm_density(f);
+			the_output_plugin->write_dm_mass(f);
+			f.deallocate();
+			
+			the_output_plugin->write_dm_potential(u);
+			
+			
+			//... DM displacements
+			{
 				grid_hierarchy data_forIO(u);
 				for( int icoord = 0; icoord < 3; ++icoord )
 				{
-					
 					//... displacement
 					the_poisson_solver->gradient(icoord, u, data_forIO );
-
-					if( do_CVM)
-						subtract_finest_mean( data_forIO );
 					the_output_plugin->write_dm_position(icoord, data_forIO );
-					
-					//... velocity
-					if( do_UVA )
-					{
-						//FIXME: this still needs to be added
-						throw std::runtime_error("Updated Velocity Approximation not yet supported!");
-					}
-					else
-					{
-						data_forIO *= cosmo.vfact;
-						the_output_plugin->write_dm_velocity(icoord, data_forIO);					
-					}
-
-					if( do_baryons )
-						the_output_plugin->write_gas_velocity(icoord, data_forIO);				
-
-				}	
-				
-			}else{
-				
-				//... cdm density and displacements
-				grid_hierarchy f( nbnd ), u(nbnd);
-				
-				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, cdm , rh_TF, f );
-				coarsen_density(rh_Poisson, f);
-				normalize_density(f);
-				
-				u = f;	u.zero();
-				
-				err = the_poisson_solver->solve(f, u);
-
-				the_output_plugin->write_dm_density(f);
-				the_output_plugin->write_dm_mass(f);
-				f.deallocate();
-				
-				the_output_plugin->write_dm_potential(u);
-				
-				
-				//... DM displacements
-				{
-					grid_hierarchy data_forIO(u);
-					for( int icoord = 0; icoord < 3; ++icoord )
-					{
-						//... displacement
-						the_poisson_solver->gradient(icoord, u, data_forIO );
-						the_output_plugin->write_dm_position(icoord, data_forIO );
-					}
 				}
+			}
+			
+			//... gas density
+			if( do_baryons )
+			{
+				std::cout << "=============================================================\n";
+				std::cout << "   COMPUTING BARYON ICs\n";
+				std::cout << "-------------------------------------------------------------\n";
 				
-				//... gas density
-				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, baryon , rh_TF, f );
+				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, baryon , rh_TF, f, false );
 				coarsen_density(rh_Poisson, f);
 				normalize_density(f);
 				
@ -516,28 +462,31 @@ int main (int argc, const char * argv[])
 				}
 				
 				the_output_plugin->write_gas_density(f);
-				
-				//... velocities
-				if( do_UVA )
+			}
+			
+			//... velocities
+			if( do_UVA )
+			{
+				//... need to co-add potentials...
+				throw std::runtime_error("Updated Velocity Approximation not yet supported!");
+			}
+			else
+			{
+				if( do_baryons )
 				{
-					//... need to co-add potentials...
-					throw std::runtime_error("Updated Velocity Approximation not yet supported!");
-				}
-				else
-				{
-					GenerateDensityHierarchy(	cf, the_transfer_function_plugin, total , rh_TF, f );
+					GenerateDensityHierarchy(	cf, the_transfer_function_plugin, total , rh_TF, f, true );
 					u = f;	u.zero();
 					err = the_poisson_solver->solve(f, u);
-					
-					grid_hierarchy data_forIO(u);
-					for( int icoord = 0; icoord < 3; ++icoord )
-					{
-						//... displacement
-						the_poisson_solver->gradient(icoord, u, data_forIO );
-						data_forIO *= cosmo.vfact;
-						the_output_plugin->write_dm_velocity(icoord, data_forIO);
+				}
+				grid_hierarchy data_forIO(u);
+				for( int icoord = 0; icoord < 3; ++icoord )
+				{
+					//... displacement
+					the_poisson_solver->gradient(icoord, u, data_forIO );
+					data_forIO *= cosmo.vfact;
+					the_output_plugin->write_dm_velocity(icoord, data_forIO);
+					if( do_baryons )
 						the_output_plugin->write_gas_velocity(icoord, data_forIO);
-					}
 				}
 			}
 			
@ -547,7 +496,7 @@ int main (int argc, const char * argv[])
 			{	
 				grid_hierarchy f( nbnd ), u1(nbnd), u2(nbnd);
 				
-				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, total , rh_TF, f );
+				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, total , rh_TF, f, true );
 				coarsen_density(rh_Poisson, f);
 				normalize_density(f);
 				
@ -630,8 +579,8 @@ int main (int argc, const char * argv[])
 	//... clean up
 	delete the_output_plugin;
 	
-	std::cout << "-------------------------------------------------------------\n";
-
+	std::cout << "=============================================================\n";
+	
 	if( !bfatal )
 		std::cout << " - Wrote output file \'" << outfname << "\'\n     using plugin \'" << outformat << "\'...\n";
 	
--- a/mg_interp.hh
+++ b/mg_interp.hh
@ -32,10 +32,6 @@ inline double interp2( double x1, double x2, double x3, double f1, double f2, do
 }


-
-
-
-
 inline double interp4left( double f0, double f1, double f2, double f3, double f4 )
 {
 	//return -4.0/231.0*f0+4.0/7.0*f1+5.0/7.0*f2-1.0/3.0*f3+5./77.0*f4; 
--- a/mg_operators.hh
+++ b/mg_operators.hh
@ -49,31 +49,7 @@ public:
 		dx = 0.5*((double)sz - 0.5)*s;
 		dy = 0.5*((double)sy - 0.5)*s;
 		dz = 0.5*((double)sx - 0.5)*s;
-		
-		//int xDim = V.m_nx, yDim = V.m_ny;
-		//int xyDim = xDim * yDim;
-		
-		//double *pv = &V(x-1,y-1,z-1);
-		
-		/* factors for Catmull-Rom interpolation */
-		
-		/*
-		u[0] = -0.5 * CUBE (dx) + SQR (dx) - 0.5 * dx;
-		u[1] = 1.5 * CUBE (dx) - 2.5 * SQR (dx) + 1;
-		u[2] = -1.5 * CUBE (dx) + 2 * SQR (dx) + 0.5 * dx;
-		u[3] = 0.5 * CUBE (dx) - 0.5 * SQR (dx);
-		
-		v[0] = -0.5 * CUBE (dy) + SQR (dy) - 0.5 * dy;
-		v[1] = 1.5 * CUBE (dy) - 2.5 * SQR (dy) + 1;
-		v[2] = -1.5 * CUBE (dy) + 2 * SQR (dy) + 0.5 * dy;
-		v[3] = 0.5 * CUBE (dy) - 0.5 * SQR (dy);
-		
-		w[0] = -0.5 * CUBE (dz) + SQR (dz) - 0.5 * dz;
-		w[1] = 1.5 * CUBE (dz) - 2.5 * SQR (dz) + 1;
-		w[2] = -1.5 * CUBE (dz) + 2 * SQR (dz) + 0.5 * dz;
-		w[3] = 0.5 * CUBE (dz) - 0.5 * SQR (dz);
-		*/
-		
+				
 		if( sz == 1 )
 		{	
 			u[0] = -4.5/64.0;
@ -114,26 +90,6 @@ public:
 		}


-		 
-		 
-		/*for (k = 0; k < 4; k++)
-		{
-			q[k] = 0;
-			for (j = 0; j < 4; j++)
-			{
-				r[j] = 0;
-				for (i = 0; i < 4; i++)
-				{
-					r[j] += u[i] * *pv;
-					pv++;
-				}
-				q[k] += v[j] * r[j];
-				pv += xDim - 4;
-			}
-			vox += w[k] * q[k];
-			pv += xyDim - 4 * xDim;
-		}*/
-		
 		for (k = 0; k < 4; k++)
 		{
 			q[k] = 0;
@ -142,15 +98,11 @@ public:
 				r[j] = 0;
 				for (i = 0; i < 4; i++)
 				{
-					//r[j] += u[i] * V(x+k-1,y+j-1,z+i-1);
 					r[j] += u[i] * V(x+k-2+sx,y+j-2+sy,z+i-2+sz);
-					//pv++;
 				}
 				q[k] += v[j] * r[j];
-				//pv += xDim - 4;
 			}
 			vox += w[k] * q[k];
-			//pv += xyDim - 4 * xDim;
 		}
 		
 		
@ -173,28 +125,8 @@ public:
 		oy = v.offset(1),
 		oz = v.offset(2);
 		
-		/*for( int i=0,i2=0; i<nx; ++i,i2+=2 )
-			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
-				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
-					V(ox+i,oy+j,oz+k) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
-									    v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );*/
-		/*#pragma omp parallel for
-		for( int i=0; i<nx; ++i )
-		{
-			int i2 = 2*i;
-			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
-				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
-					V(ox+i,oy+j,oz+k) = 0.125 * ( interp_cubic<1,0,0>(i2+1,j2,k2,v)
-												 +interp_cubic<0,1,0>(i2,j2+1,k2,v)
-												 +interp_cubic<0,0,1>(i2,j2,k2+1,v)
-												 +interp_cubic<1,1,0>(i2+1,j2+1,k2,v)
-												 +interp_cubic<0,1,1>(i2,j2+1,k2+1,v)
-												 +interp_cubic<1,0,1>(i2+1,j2,k2+1,v)
-												 +interp_cubic<1,1,1>(i2+1,j2+1,k2+1,v)
-												 +interp_cubic<0,0,0>(i2,j2,k2,v));
-		} */
 		
-#pragma omp parallel for
+		#pragma omp parallel for
 		for( int i=0; i<nx; ++i )
 		{
 			int i2 = 2*i;
@ -225,12 +157,7 @@ public:
 		oy = v.offset(1),
 		oz = v.offset(2);
 		
-		/*for( int i=0,i2=0; i<nx; ++i,i2+=2 )
-		 for( int j=0,j2=0; j<ny; ++j,j2+=2 )
-		 for( int k=0,k2=0; k<nz; ++k,k2+=2 )
-		 V(ox+i,oy+j,oz+k) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
-		 v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );*/
-#pragma omp parallel for
+		#pragma omp parallel for
 		for( int i=0; i<nx; ++i )
 		{
 			int i2 = 2*i;
@ -249,20 +176,8 @@ public:
 		
 	}
 	
-	/*template< typename m1, typename m2 >
-	inline void restrict( m1& v, m2& V )
-	{
-		int 
-		nx = V.size(0), 
-		ny = V.size(1), 
-		nz = V.size(2);
-		
-		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
-			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
-				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
-					V(i,j,k)  = interp_cubic<1,1,1>(i2, j2, k2, v, 2.0);
-	}*/
 	
+	//.. straight restriction on boundary
 	template< typename m1, typename m2 >
 	inline void restrict_bnd( const m1& v, m2& V ) const
 	{	
@ -411,18 +326,14 @@ public:
 	template< typename m1, typename m2 >
 	inline void prolong_bnd( m1& V, m2& v )
 	{
-		//		int 
-		//		nx = V.size(0), 
-		//		ny = V.size(1), 
-		//		nz = V.size(2);
 		int 
-		nx = v.size(0)/2, 
-		ny = v.size(1)/2, 
-		nz = v.size(2)/2,
-		ox = v.offset(0),
-		oy = v.offset(1),
-		oz = v.offset(2);
-		
+			nx = v.size(0)/2, 
+			ny = v.size(1)/2, 
+			nz = v.size(2)/2,
+			ox = v.offset(0),
+			oy = v.offset(1),
+			oz = v.offset(2);
+			
 		int nbnd = V.m_nbnd;
 		int nbndtop = nbnd/2;
 		
@ -500,11 +411,7 @@ public:
 		for( i=0; i<4; ++i )
 			for( j=0; j<4; ++j )
 				for( k=0; k<4; ++k )
-				{
 					vox += w[i]*w[j]*w[k] * V(x+i-1,y+j-1,z+k-1);
-					//std::cerr << "[" << i << "," << j << "," << k << "] -- " << u[i]*v[j]*w[k] << std::endl;
-				}
-		//vox = (V(x+1,y,z)+V(x,y+1,z)+V(x,y,z+1)+V(x,y,z)+V(x+1,y+1,z)+V(x+1,y,z+1)+V(x,y+1,z+1)+V(x+1,y+1,z+1));
 		return vox;
 	}
 	
@ -512,7 +419,6 @@ public:
 	inline double interp_lin( int x, int y, int z, M& V, double s=1.0 ) const
 	{
 		double           u[2], v[2], w[2];
-		//double			 r[2], q[2];
 		double           vox = 0;
 		int				 i,j,k;
 		
@ -539,51 +445,11 @@ public:
 			w[1] = 1.0/4.0;
 		}
 		
-		/*if( sx==0 ){
-		 u[0] = 1.0/64.0;
-		 u[1] = 3.0/64.0;
-		 }else{
-		 u[0] = 3.0/64.0;
-		 u[1] = 1.0/64.0;
-		 }
-		 if( sy==0 ){
-		 v[0] = 1.0/64.0;
-		 v[1] = 3.0/64.0;
-		 }else{
-		 v[0] = 3.0/64.0;
-		 v[1] = 1.0/64.0;
-		 }
-		 if( sz==0 ){
-		 w[0] = 1.0/64.0;
-		 w[1] = 3.0/64.0;
-		 }else{
-		 w[0] = 3.0/64.0;
-		 w[1] = 1.0/64.0;
-		 }*/
-		
-		//std::cerr << "(" << sx << "," << sy << "," << sz << ") : " << std::endl;
-		/*for (k = 0; k < 2; k++)
-		{
-			q[k] = 0;
-			for (j = 0; j < 2; j++)
-			{
-				r[j] = 0;
-				for (i = 0; i < 2; i++)
-					r[j] += u[i] * V(x+k+sx-1,y+j+sy-1,z+i+sz-1);
-				q[k] += v[j] * r[j];
-			}
-			//std::cerr << "[" << i << "," << j << "," << k << "] -- " << w[k]*q[k] << std::endl;
-			vox += w[k] * q[k];
-		}*/
 		for( i=0; i<2; ++i )
 			for( j=0; j<2; ++j )
 				for( k=0; k<2; ++k )
-				{
 					vox += u[i]*v[j]*w[k] * V(x+i+sx-1,y+j+sy-1,z+k+sz-1);
-					//std::cerr << "[" << i << "," << j << "," << k << "] -- " << u[i]*v[j]*w[k] << std::endl;
-				}
-		//std::cerr << "(" << sx << "," << sy << "," << sz << ") : " << vox <<std::endl;
-		//throw std::runtime_error("break");
+		
 		return vox;
 	}
 	
@ -667,68 +533,9 @@ public:
 			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
 				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
 					V(i+ox,j+oy,k+oz) = 0.125 * restr_lin(i2, j2, k2, v);
-					/*V(i+ox,j+oy,k+oz) = 0.125*(interp_lin<1,0,0>(i2,j2,k2,v)+interp_lin<0,1,0>(i2,j2,k2,v)+
-												 interp_lin<0,0,1>(i2,j2,k2,v)+interp_lin<1,1,0>(i2,j2,k2,v)+
-												 interp_lin<1,0,1>(i2,j2,k2,v)+interp_lin<0,1,1>(i2,j2,k2,v)+
-												 interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2,j2,k2,v));*/
-		}
+		}		
 		
-		/*
-		for( int i=0; i<nx; ++i )
-		{
-			int i2 = 2*i;
-			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
-				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
-				{
-					V(i+ox,j+oy,k+oz) = 0.125 * (interp_lin<1,0,0>(i2+2,j2,k2,v)+interp_lin<0,1,0>(i2,j2+2,k2,v)+
-												  interp_lin<0,0,1>(i2,j2,k2+2,v)+interp_lin<1,1,0>(i2+1,j2+2,k2,v)+
-												  interp_lin<1,0,1>(i2+2,j2,k2+2,v)+interp_lin<0,1,1>(i2,j2+2,k2+2,v)+
-												  interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2+2,j2+2,k2+2,v));
-										
-				}
-		}
-		 */
 	}	
-#if 0	
-	template< typename m1, typename m2 >
-	inline void restrict_add( const m1& v, m2& V ) const
-	{
-		int 
-		nx = v.size(0)/2, 
-		ny = v.size(1)/2, 
-		nz = v.size(2)/2,
-		ox = v.offset(0),
-		oy = v.offset(1),
-		oz = v.offset(2);
-		
-		/*#pragma omp parallel for
-		for( int i=0; i<nx; ++i )
-		{
-			int i2 = 2*i;
-			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
-				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
-					V(i+ox,j+oy,k+oz) += 0.125 * (interp_lin<1,0,0>(i2,j2,k2,v)+interp_lin<0,1,0>(i2,j2,k2,v)+
-												 interp_lin<0,0,1>(i2,j2,k2,v)+interp_lin<1,1,0>(i2,j2,k2,v)+
-												 interp_lin<1,0,1>(i2,j2,k2,v)+interp_lin<0,1,1>(i2,j2,k2,v)+
-												 interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2,j2,k2,v));
-		}*/
-		
-		for( int i=0; i<nx; ++i )
-		{
-			int i2 = 2*i;
-			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
-				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
-				{
-					V(i+ox,j+oy,k+oz) += 0.125 * (interp_lin<1,0,0>(i2+2,j2,k2,v)+interp_lin<0,1,0>(i2,j2+2,k2,v)+
-												 interp_lin<0,0,1>(i2,j2,k2+2,v)+interp_lin<1,1,0>(i2+1,j2+2,k2,v)+
-												 interp_lin<1,0,1>(i2+2,j2,k2+2,v)+interp_lin<0,1,1>(i2,j2+2,k2+2,v)+
-												 interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2+2,j2+2,k2+2,v));
-					
-				}
-		}
-	}	
-#endif
-	
 };


@ -1039,28 +846,6 @@ public:
 	
 	
 	//... restricts v to V
-	/*	inline void restrict( const MeshvarBnd<real_t>& v, MeshvarBnd<real_t>& V ) const
-	 {
-	 int 
-	 nx = v.size(0)/2, 
-	 ny = v.size(1)/2, 
-	 nz = v.size(2)/2,
-	 ox = v.offset(0),
-	 oy = v.offset(1),
-	 oz = v.offset(2);
-	 
-	 #pragma omp parallel for
-	 for( int i=0; i<nx; ++i )
-	 {
-	 int i2 = 2*i;
-	 for( int j=0,j2=0; j<ny; ++j,j2+=2 )
-	 for( int k=0,k2=0; k<nz; ++k,k2+=2 )
-	 V(i+ox,j+oy,k+oz) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
-	 v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );
-	 }
-	 }	
-	 */
-	
 	template< typename m1, typename m2 >
 	inline void restrict( const m1& v, m2& V ) const
 	{
@ -1327,34 +1112,6 @@ public:
 		}
 	}
 	
-	//... prolongs V to v
-	/*template< typename m1, typename m2 >
-	inline void prolong_add( m1& V, m2& v )
-	{
-		unsigned 
-		nx = v.size(0)/2, 
-		ny = v.size(1)/2, 
-		nz = v.size(2)/2,
-		ox = v.offset(0),
-		oy = v.offset(1),
-		oz = v.offset(2);
-		
-		
-		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
-			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
-				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
-				{
-					v(i2+1,j2,k2)		+= V(i+ox,j+oy,k+oz);
-					v(i2,j2+1,k2)		+= V(i+ox,j+oy,k+oz);
-					v(i2,j2,k2+1)		+= V(i+ox,j+oy,k+oz);
-					v(i2+1,j2+1,k2)		+= V(i+ox,j+oy,k+oz);
-					v(i2+1,j2,k2+1)		+= V(i+ox,j+oy,k+oz);
-					v(i2+1,j2+1,k2+1)	+= V(i+ox,j+oy,k+oz);
-					v(i2,j2+1,k2+1)		+= V(i+ox,j+oy,k+oz);
-					v(i2,j2,k2)			+= V(i+ox,j+oy,k+oz);
-				}
-		
-	}*/
 };


--- a/mg_solver.hh
+++ b/mg_solver.hh
@ -64,7 +64,7 @@ protected:
 	bool m_is_ini;

 	GridHierarchy<T>	*m_pu, *m_pf, *m_pfsave;	
-	//GridHierarchy<bool> *m_pmask;
+	
 	const MeshvarBnd<T> *m_pubnd;
 	
 	double compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<T>& unew );
@ -83,20 +83,17 @@ protected:
 	
 	void twoGrid( unsigned ilevel );
 	
-	//void interp_coarse_fine( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf=true );
-	
 	void setBC( unsigned ilevel );
 	
 	void make_periodic( MeshvarBnd<T> *u );
 	
-	void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine );
+	//void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine );
 		
 public:
-	solver( GridHierarchy<T>& f, //const MeshvarBnd<T>& uBC_top, 
-				   opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth );
+	solver( GridHierarchy<T>& f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth );
 	
 	~solver()
-	{ /*delete m_pmask;*/ }
+	{  }
 	
 	double solve( GridHierarchy<T>& u, double accuracy, double h=-1.0, bool verbose=false );
 	
@ -111,14 +108,13 @@ public:


 template< class S, class I, class O, typename T >
-solver<S,I,O,T>::solver( GridHierarchy<T>& f, //const MeshvarBnd<T>& ubnd, 
-					opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth )
+solver<S,I,O,T>::solver( GridHierarchy<T>& f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth )
 :	m_scheme(), m_gridop(), m_npresmooth( npresmooth ), m_npostsmooth( npostsmooth ), 
 m_smoother( smoother ), m_ilevelmin( f.levelmin() ), m_is_ini( true ), m_pf( &f )
 { 
 	m_is_ini = true;
 	
-	// TODO: maybe later : add more than one refinement region
+	// TODO: maybe later : add more than one refinement region, then we need the mask
 	//... initialize the refinement mask
 	//m_pmask = new GridHierarchy<bool>( f.m_nbnd );
 	//m_pmask->create_base_hierarchy(f.levelmin());
@ -193,7 +189,7 @@ void solver<S,I,O,T>::SOR( T h, MeshvarBnd<T> *u, const MeshvarBnd<T>* f )
 	
 	double 
 		alpha = 1.2, 
-	//alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0,
+	//alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0, //.. ideal alpha
 		ialpha = 1.0-alpha;
 	
 	#pragma omp parallel for
@ -309,12 +305,10 @@ void solver<S,I,O,T>::twoGrid( unsigned ilevel )
 	
 	//... restrict Lu
 	m_gridop.restrict( Lu, tLu );
-	//mg_straight().restrict( Lu, tLu );
 	Lu.deallocate();
 	
 	//... restrict source term
 	m_gridop.restrict( *ff, *fc );
-	//mg_straight().restrict( *ff, *fc );
 	
 	int oi, oj, ok;
 	oi = ff->offset(0);
@ -483,32 +477,29 @@ double solver<S,I,O,T>::solve( GridHierarchy<T>& uh, double acc, double h, bool
 {

 	double err;
+	unsigned niter = 0;
+	
+	bool fullverbose = false;
 	
-	//GridHierarchy<T> uhnew(uh);//, fsave(*m_pf);
 	m_pu = &uh;
 	
-    unsigned niter = 0;
-	
 	//... iterate ...//
 	while (true)
 	{
 		
 		
 		twoGrid( uh.levelmax() );
-		//err = compute_error( *m_pu, uhnew, verbose );
-		err = compute_RMS_resid( *m_pu, *m_pf, verbose );
+		err = compute_RMS_resid( *m_pu, *m_pf, fullverbose );
 		++niter;
 		
 		if( verbose ){
-			std::cout << "  --> Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl;
-			std::cout << "      ---------------------------------------------------\n";
+			std::cout << "   - Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl;
+			if(fullverbose)
+				std::cout << "     ---------------------------------------------------\n";
 		}
 			
-		if( (niter > 1) && ((err < acc) || (niter > 8)) )
+		if( (niter > 1) && ((err < acc) || (niter > 20)) )
 			break;
-		
-		//uhnew = *m_pu;
-		//*m_pf = fsave;
 	}		
 	
 	if( err > acc )
@ -517,109 +508,29 @@ double solver<S,I,O,T>::solve( GridHierarchy<T>& uh, double acc, double h, bool
 		std::cout << " - Converged in " << niter << " steps to req. acc. of " << acc << std::endl;

 	
-	//uh = uhnew;
-	//*m_pf = fsave;
-	
 	//.. make sure that the RHS does not contain the FAS corrections any more
-	for( int i=m_pf->levelmax(); i>0; --i )//(int)m_pf->levelmin(); --i )
+	for( int i=m_pf->levelmax(); i>0; --i )
 		m_gridop.restrict( *m_pf->get_grid(i), *m_pf->get_grid(i-1) );
 	
 	
 	return err;
 }

-template< class S, class I, class O, typename T >
-void solver<S,I,O,T>::interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine )
-{
-	MeshvarBnd<T> *u    = &fine;
-	MeshvarBnd<T> *utop = &coarse;
-	
-	int
-	xoff = u->offset(0),
-	yoff = u->offset(1),
-	zoff = u->offset(2);
-	
-	//... don't do anything if we are not an additional refinement region
-	if( xoff == 0 && yoff == 0 && zoff == 0 )
-		return;
-	
-	int
-	nx = u->size(0), 
-	ny = u->size(1), 
-	nz = u->size(2);
-	
-	//.. perform cubic interpolation
-	mg_cubic().prolong_bnd( *utop, *u );
-	
-	//.. match fluxes
-	#pragma omp parallel for schedule(dynamic)
-	for( int ix=-1; ix<=nx; ++ix )
-		for( int iy=-1; iy<=ny; ++iy )
-			for( int iz=-1; iz<=nz; ++iz )
-			{
-				bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
-				
-				if( xbnd || ybnd || zbnd )
-				{
-					
-					//... only deal with proper ghostzones
-					if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
-						continue;
-					
-					int ixtop = (int)(0.5*(double)(ix))+xoff;
-					int iytop = (int)(0.5*(double)(iy))+yoff;
-					int iztop = (int)(0.5*(double)(iz))+zoff;
-					
-					if( ix==-1 ) ixtop=xoff-1;
-					if( iy==-1 ) iytop=yoff-1;
-					if( iz==-1 ) iztop=zoff-1;
-					
-					double flux;;
-				    if( ix == -1 && iy%2==0 && iz%2==0 )
-					{					
-						flux = 0.0;
-						for( int j=0;j<=1;j++)
-							for( int k=0;k<=1;k++)
-							{		
-								flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k));
-							}
-						
-						
-						
-						flux /= 4.0;
-						
-						double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
-					}
-				}
-			}
-	
-}
-
-
-


+//TODO: this only works for 2nd order! (but actually not needed)
 template< class S, class I, class O, typename T >
 void solver<S,I,O,T>::setBC( unsigned ilevel )
 {
 	//... set only on level before additional refinement starts
-	//if( ilevel == m_ilevelmin )
 	if( ilevel == m_ilevelmin )
 	{
 		MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
-		//int nbnd = u->m_nbnd,
 		int
-		nx = u->size(0), 
-		ny = u->size(1), 
-		nz = u->size(2);
-		
-		/*for( int ix=-nbnd; ix<nx+nbnd; ++ix )
-			for( int iy=-nbnd; iy<ny+nbnd; ++iy )
-				for( int iz=-nbnd; iz<nz+nbnd; ++iz )
-					if( ix<0||ix>=nx||iy<0||iy>=ny||iz<0||iz>=nz )
-						(*u)(ix,iy,iz) = (*m_pubnd)(ix,iy,iz);*/
-		
-		
+			nx = u->size(0), 
+			ny = u->size(1), 
+			nz = u->size(2);
+			
 		for( int iy=0; iy<ny; ++iy )
 			for( int iz=0; iz<nz; ++iz )
 			{
@ -643,21 +554,7 @@ void solver<S,I,O,T>::setBC( unsigned ilevel )
 		
 		
 		
-	}/*else if( ilevel < m_ilevelmin ) {
-		MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
-		int nbnd = u->m_nbnd,
-		nx = u->size(0), 
-		ny = u->size(1), 
-		nz = u->size(2);
-		
-		for( int ix=-nbnd; ix<nx+nbnd; ++ix )
-			for( int iy=-nbnd; iy<ny+nbnd; ++iy )
-				for( int iz=-nbnd; iz<nz+nbnd; ++iz )
-					if( ix<0||ix>=nx||iy<0||iy>=ny||iz<0||iz>=nz )
-						(*u)(ix,iy,iz) = 0.0;
-	}*/
-
-
+	}
 }


@ -715,336 +612,8 @@ void solver<S,I,O,T>::make_periodic( MeshvarBnd<T> *u )
 				}
 			}
 	
-#if 0
-	if( u->m_nbnd == 1 )
-	{
-			
-		#pragma omp parallel
-		{
-			
-			if( u->offset(0) == 0 )
-				for( int iy=-1; iy<=ny; ++iy )
-					for( int iz=-1; iz<=nz; ++iz )
-					{
-						int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz );
-
-						(*u)(-1,iy,iz) = (*u)(nx-1,iiy,iiz);
-						(*u)(nx,iy,iz) = (*u)(0,iiy,iiz);
-					}
-			
-			if( u->offset(1) == 0 )
-				for( int ix=-1; ix<=nx; ++ix )
-					for( int iz=-1; iz<=nz; ++iz )
-					{
-						int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz );
-
-						(*u)(ix,-1,iz) = (*u)(iix,ny-1,iiz);
-						(*u)(ix,ny,iz) = (*u)(iix,0,iiz);
-					}
-			
-			if( u->offset(2) == 0 )
-				for( int ix=-1; ix<=nx; ++ix )
-					for( int iy=-1; iy<=ny; ++iy )
-					{
-						int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny );
-
-						(*u)(ix,iy,-1) = (*u)(iix,iiy,nz-1);
-						(*u)(ix,iy,nz) = (*u)(iix,iiy,0);
-					}									  
-			
-			
-		}
-	}else if( u->m_nbnd == 2 ){
-		if( u->offset(0) == 0 )
-			for( int iy=-2; iy<=ny+1; ++iy )
-				for( int iz=-2; iz<=nz+1; ++iz )
-				{
-					int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz );
-					
-					(*u)(-1,iy,iz) = (*u)(nx-1,iiy,iiz);
-					(*u)(-2,iy,iz) = (*u)(nx-2,iiy,iiz);
-					(*u)(nx,iy,iz) = (*u)(0,iiy,iiz);
-					(*u)(nx+1,iy,iz) = (*u)(1,iiy,iiz);
-				}
-		
-		if( u->offset(1) == 0 )
-			for( int ix=-2; ix<=nx+1; ++ix )
-				for( int iz=-2; iz<=nz+1; ++iz )
-				{
-					int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz );
-					
-					(*u)(ix,-1,iz) = (*u)(iix,ny-1,iiz);
-					(*u)(ix,-2,iz) = (*u)(iix,ny-2,iiz);
-					(*u)(ix,ny,iz) = (*u)(iix,0,iiz);
-					(*u)(ix,ny+1,iz) = (*u)(iix,1,iiz);
-				}
-		
-		if( u->offset(2) == 0 )
-			for( int ix=-2; ix<=nx+1; ++ix )
-				for( int iy=-2; iy<=ny+1; ++iy )
-				{
-					int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny );
-					
-					(*u)(ix,iy,-1) = (*u)(iix,iiy,nz-1);
-					(*u)(ix,iy,-2) = (*u)(iix,iiy,nz-2);
-					(*u)(ix,iy,nz) = (*u)(iix,iiy,0);
-					(*u)(ix,iy,nz+1) = (*u)(iix,iiy,1);
-				}			
-	}else
-		throw std::runtime_error("solver<S,I,O,T>::make_periodic: don't know how to deal with boundary!");
-#endif
 }

-#if 0
-template< class S, class I, class O, typename T >
-void solver<S,I,O,T>::interp_coarse_fine( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf )
-{
-	MeshvarBnd<T> *u    = &fine;
-	MeshvarBnd<T> *utop = &coarse;
-	
-	
-	bcf = true;;
-	//bcf = false;
-	
-	int
-	xoff = u->offset(0),
-	yoff = u->offset(1),
-	zoff = u->offset(2);
-	
-	//... don't do anything if we are not an additional refinement region
-	if( xoff == 0 && yoff == 0 && zoff == 0 )
-		return;
-	
-	int
-	nx = u->size(0), 
-	ny = u->size(1), 
-	nz = u->size(2);
-	
-	//... set boundary condition for fine grid
-	
-#pragma omp parallel for schedule(dynamic)
-	for( int ix=-1; ix<=nx; ++ix )
-		for( int iy=-1; iy<=ny; ++iy )
-			for( int iz=-1; iz<=nz; ++iz )
-			{
-				bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
-				
-				//if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz)
-				if( xbnd || ybnd || zbnd )
-					//if( xbnd ^ ybnd ^ zbnd )
-				{
-					
-					//... only deal with proper ghostzones
-					if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
-						continue;
-					
-					/*int ixtop = (int)(0.5*(double)(ix+2*xoff)+1e-3);
-					 int iytop = (int)(0.5*(double)(iy+2*yoff)+1e-3);
-					 int iztop = (int)(0.5*(double)(iz+2*zoff)+1e-3);*/
-					
-					int ixtop = (int)(0.5*(double)(ix))+xoff;
-					int iytop = (int)(0.5*(double)(iy))+yoff;
-					int iztop = (int)(0.5*(double)(iz))+zoff;
-					
-					if( ix==-1 ) ixtop=xoff-1;
-					if( iy==-1 ) iytop=yoff-1;
-					if( iz==-1 ) iztop=zoff-1;
-					
-					double ustar1, ustar2, ustar3, uhat;			
-					double fac = 0.5;//0.25;
-					double flux;;
-				    if( ix == -1 && iy%2==0 && iz%2==0 )
-					{
-						flux = 0.0;
-						for( int j=0;j<=1;j++)
-							for( int k=0;k<=1;k++)
-							{		
-								ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
-								ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
-								ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
-								
-								uhat   = interp2( /*-1.0, 0.0, 1.0, */ustar1, ustar2, ustar3, fac*((double)k-0.5) );
-								
-								//(*u)(ix,iy+j,iz+k) = 0.0;//(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k), -1.0 );
-								
-								(*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) );
-								
-								flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k));
-							}
-						
-						
-						
-						flux /= 4.0;
-						
-						double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
-						
-						//dflux *= 2.0;
-						
-						if( bcf )
-							for( int j=0;j<=1;j++)
-								for( int k=0;k<=1;k++)
-									(*u)(ix,iy+j,iz+k) -= dflux;
-						else
-							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop+1,iytop,iztop) - 2.0*flux;
-						
-						
-					}
-					// right boundary
-					if( ix == nx && iy%2==0 && iz%2==0 )
-					{
-						flux = 0.0;
-						for( int j=0;j<=1;j++)
-							for( int k=0;k<=1;k++)
-							{		
-								ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
-								ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
-								ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
-								
-								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
-								
-								//(*u)(ix,iy+j,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix-1,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 1.0 );
-								(*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat );
-								flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k));
-							}
-						flux /= 4.0;
-						
-						
-						double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux;
-						//dflux *= 2.0;
-						
-						if( bcf )
-							for( int j=0;j<=1;j++)
-								for( int k=0;k<=1;k++)
-									(*u)(ix,iy+j,iz+k) += dflux;
-						else
-							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop-1,iytop,iztop) + 2.0*flux;
-						
-					}
-					// bottom boundary
-					if( iy == -1 && ix%2==0 && iz%2==0 )
-					{
-						flux = 0.0;
-						for( int j=0;j<=1;j++)
-							for( int k=0;k<=1;k++)
-							{
-								ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
-								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
-								ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
-								
-								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
-								
-								//(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k), -1.0 );
-								(*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) );
-								
-								flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k));
-							}
-						flux /= 4.0;
-						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - flux;
-						double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
-						//dflux *= 2.0;
-						if( bcf )
-							for( int j=0;j<=1;j++)
-								for( int k=0;k<=1;k++)
-									(*u)(ix+j,iy,iz+k) -= dflux;
-						else
-							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - 2.0*flux;
-						
-					}
-					// top boundary
-					if( iy == ny && ix%2==0 && iz%2==0 )
-					{
-						flux = 0.0;
-						for( int j=0;j<=1;j++)
-							for( int k=0;k<=1;k++)
-							{		
-								ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
-								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
-								ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
-								
-								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
-								
-								//(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy-1,iz+k), (*u)(ix+j,iy-2,iz+k), 1.0 );
-								(*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat  );
-								
-								flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k));
-							}
-						flux /= 4.0;
-						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + flux;
-						double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux;
-						//dflux *= 2.0;
-						if( bcf )
-							for( int j=0;j<=1;j++)
-								for( int k=0;k<=1;k++)
-									(*u)(ix+j,iy,iz+k) += dflux;
-						else
-							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + 2.0*flux;
-						
-					}
-					// front boundary
-					if( iz == -1 && ix%2==0 && iy%2==0 )
-					{
-						flux = 0.0;
-						for( int j=0;j<=1;j++)
-							for( int k=0;k<=1;k++)
-							{		
-								ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
-								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
-								ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
-								
-								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
-								
-								//(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2), -1.0 );
-								(*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) );
-								
-								flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz));
-							}
-						flux /= 4.0;
-						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - flux;
-						double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
-						//dflux *= 2.0;
-						if( bcf )
-							for( int j=0;j<=1;j++)
-								for( int k=0;k<=1;k++)
-									(*u)(ix+j,iy+k,iz) -= dflux;
-						else
-							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - 2.0*flux;
-						
-					}
-					// back boundary
-					if( iz == nz && ix%2==0 && iy%2==0 )
-					{
-						flux = 0.0;
-						for( int j=0;j<=1;j++)
-							for( int k=0;k<=1;k++)
-							{		
-								ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
-								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
-								ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
-								
-								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
-								
-								//(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy+k,iz-1), (*u)(ix+j,iy+k,iz-2), 1.0 );
-								(*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat );
-								
-								flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1));
-							}
-						flux /= 4.0;
-						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + flux;
-						double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux;
-						//dflux *= 2.0;
-						if( bcf )
-							for( int j=0;j<=1;j++)
-								for( int k=0;k<=1;k++)
-									(*u)(ix+j,iy+k,iz) += dflux;
-						else
-							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + 2.0*flux;
-					}
-					
-				}
-			}
-	
-}
-#endif

 END_MULTIGRID_NAMESPACE
 
--- a/plugins/output_gadget2.cc
+++ b/plugins/output_gadget2.cc
@ -231,6 +231,10 @@ protected:
 		npleft = nptot, 
 		n2read = std::min((int)block_buf_size_,npleft);
 		
+		std::cout << " - Writing " << nptot << " particles to Gadget file...\n"
+				  << "      type 1 : " << header_.npart[1] << "\n"
+				  << "      type 5 : " << header_.npart[5] << "\n";
+		
 		
 		//... particle coordinates ..................................................
 		unsigned blk;
@ -413,7 +417,8 @@ public:
 	gadget2_output_plugin( config_file& cf )//std::string afname, Cosmology cosm, Parameters param, unsigned block_buf_size = 100000 )
 	: output_plugin( cf ), ofs_( fname_.c_str(), std::ios::binary|std::ios::trunc )	
 	{
-		block_buf_size_ = cf_.getValueSafe<unsigned>("output","gadget_blksize",100000);
+		block_buf_size_ = cf_.getValueSafe<unsigned>("output","gadget_blksize",1048576);
+		//block_buf_size_ = cf_.getValueSafe<unsigned>("output","gadget_blksize",100000);
 		//bbndparticles_  = !cf_.getValueSafe<bool>("output","gadget_nobndpart",false);
 		
 		bmorethan2bnd_ = false;
--- a/poisson.cc
+++ b/poisson.cc
@ -75,7 +75,10 @@ double multigrid_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u )
 	unsigned verbosity = cf_.getValueSafe<unsigned>("setup","verbosity",2);
 	
 	if( verbosity > 0 )
+	{	
+		std::cout << "-------------------------------------------------------------\n";
 		std::cout << " - Invoking multi-grid Poisson solver..." << std::endl;
+	}
 	
 	double acc = 1e-5, err;
 	std::string ps_smoother_name;
@ -306,7 +309,10 @@ double fft_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u )
 		throw std::runtime_error("fft_poisson_plugin::solve : k-space method can only be used in unigrid mode (levelmin=levelmax)");
 	
 	if( verbosity > 0 )
+	{	
+		std::cout << "-------------------------------------------------------------\n";
 		std::cout << " - Invoking unigrid FFT Poisson solver..." << std::endl;
+	}
 	
 	int nx,ny,nz,nzp;
 	nx = f.get_grid(f.levelmax())->size(0);
--- a/random.hh
+++ b/random.hh
@ -83,6 +83,22 @@ protected:
 		return mean/(cubesize_*cubesize_*cubesize_);
 	}
 	
+	//! subtract a constant from an entire cube
+	void subtract_from_cube( int i, int j, int k, double val )
+	{
+		i = (i+ncubes_)%ncubes_;
+		j = (j+ncubes_)%ncubes_;
+		k = (k+ncubes_)%ncubes_;
+		
+		long icube = (i*ncubes_+j)*ncubes_+k;
+		
+		for( int ii=0; ii<(int)cubesize_; ++ii )
+			for( int jj=0; jj<(int)cubesize_; ++jj )
+				for( int kk=0; kk<(int)cubesize_; ++kk )
+					(*rnums_[icube])(ii,jj,kk) -= val;
+		
+	}
+	
 	//! copy random numbers from a cube to a full grid array
 	template< class C >
 	void copy_cube( int i, int j, int k, C& dat )
@ -124,6 +140,8 @@ protected:
 	//! initialize member variables and allocate memory
 	void initialize( void )
 	{
+		std::cerr << " - Generating random numbers w/ sample cube size of " << cubesize_ << std::endl;
+		
 		ncubes_ = std::max((int)((double)res_/cubesize_),1);
 		if( res_ < cubesize_ )
 		{	
@ -184,6 +202,20 @@ protected:
 					sum+=fill_cube(ii, jj, kk);
 				}
 		
+		//... subtract mean
+		#pragma omp parallel for reduction(+:sum)
+		for( int i=0; i<(int)ncubes_; ++i )
+			for( int j=0; j<(int)ncubes_; ++j )
+				for( int k=0; k<(int)ncubes_; ++k )
+				{
+					int ii(i),jj(j),kk(k);
+					
+					ii = (ii+ncubes_)%ncubes_;
+					jj = (jj+ncubes_)%ncubes_;
+					kk = (kk+ncubes_)%ncubes_;
+					subtract_from_cube(ii,jj,kk,sum/(ncubes_*ncubes_*ncubes_));
+				}
+		
 		return sum/(ncubes_*ncubes_*ncubes_);
 	}

@ -412,7 +444,8 @@ public:
 		rmean = sum/count;
 		rvar = sum2/count-rmean*rmean;
 		
-		std::cout << " - Restricted random numbers have mean = " << rmean << " and var = " << rvar << std::endl;
+		std::cout << " - Restricted random numbers have\n"
+				  << "       mean = " << rmean << ", var = " << rvar << std::endl;
 	}
 	
 	
--- a/transfer_function.hh
+++ b/transfer_function.hh
@ -152,7 +152,7 @@ public:
 	
 	double Tr0_;
 	real_t Tmin_, Tmax_, Tscale_;
-	real_t rneg_, rneg2_;
+	real_t rneg_, rneg2_, kny_;
 	static transfer_function *ptf_;
 	static real_t nspec_;
 	
@ -199,7 +199,8 @@ protected:
 		
 #ifdef NZERO_Q
 		//q = 0.4;
-		q = 0.5;
+		//q = 0.5;
+		q = 0.9;
 #endif
 		
 		double kmin = qmin, kmax=qmax;
@ -227,15 +228,13 @@ protected:

 		std::ofstream ofsk("transfer_k.txt");
 		double sum_in = 0.0;
+		
+
+		
 		for( unsigned i=0; i<N; ++i )
 		{
 			
 			double k = k0*exp(((int)i - (int)N/2+1) * dlnk);
-			//double k = k0*exp(((int)i - (int)N/2) * dlnk);
-			//double k = k0*exp(ii * dlnk);
-			
-			//... some constants missing ...//
-			
 			in[i].re = dplus*sqrtpnorm*ptf_->compute( k )*pow(k,0.5*nspec_)*pow(k,1.5-q);
 			in[i].im = 0.0;
 			
@ -270,7 +269,6 @@ protected:
 			out[i].im = cu.imag()*fftnorm;
 			
 #else		
-			//complex x(dir*q, (double)ii*2.0*M_PI/L);
 			complex x(dir*q, (double)ii*2.0*M_PI/L);
 			gsl_sf_result g_a, g_p;
 			
@ -333,26 +331,14 @@ protected:
 		{
 			int ii = i;
 			ii -= N/2-1;
-			//ii -= N/2;
-			//if( ii>N/2)
-			//	ii-=N;
-			
-			
-			
 			double r = r0*exp(-ii*dlnr);
 			rr[N-i-1] = r;
 			TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
-			
-			//TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*exp( -dir*(q+1.5)*ii*dlnr +q*log(k0r0))/r0;
-			
-			//rr[i] = r;
-			//TT[i] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
-			
 		}
 		
 		
 		{
-			std::ofstream ofs("transfer_real_new.txt");
+			std::ofstream ofs("transfer_real.txt");
 			for( unsigned i=0; i<N; ++i )
 			{
 				int ii = i;
@ -380,6 +366,8 @@ public:
 	
 		real_t q = 0.8;
 		
+		kny_ = knymax;
+		
 		std::vector<double> r,T;
 		
 		transform( pnorm, dplus, nr, q, r, T );
@ -396,7 +384,7 @@ public:
 		//.. need a factor sqrt( 2*kny^2_x + 2*kny^2_y + 2*kny^2_z )/2 = sqrt(3/2)kny (in equilateral case)
 		//#warning Need to integrate from Boxsize (see below)?
 		gsl_integration_qag (&F, 0.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); 
-		//gsl_integration_qag (&F, 2.0*M_PI/100.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); 
+		//gsl_integration_qag (&F, 0.0, knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); 
 		
 		gsl_integration_workspace_free(wp);
 				
@ -407,19 +395,6 @@ public:
 		
 		for( unsigned i=0; i<r.size(); ++i )
 		{
-			// spline positive and negative part separately
-			/*if( T[i] > 0.0 )
-			{
-				xp.push_back( 2.0*log10(r[i]) );
-				yp.push_back( log10(T[i]) );
-				rneg_ = r[i];
-				rneg2_ = rneg_*rneg_;
-			}else {
-				xn.push_back( 2.0*log10(r[i]) );
-				yn.push_back( log10(-T[i]) );
-			}*/
-			
-			
 			if( r[i] > rmin && r[i] < rmax )
 			{
 				xsp.push_back( log10(r[i]) );
@ -433,10 +408,8 @@ public:
 		
 		
 		accp = gsl_interp_accel_alloc ();
-		//accn = gsl_interp_accel_alloc ();
-		
+
 		//... spline interpolation is only marginally slower here
-		//splinep = gsl_spline_alloc (gsl_interp_cspline, xsp.size() );
 		splinep = gsl_spline_alloc (gsl_interp_akima, xsp.size() );

 		//... set up everything for spline interpolation
@ -445,11 +418,9 @@ public:

 		
 		//.. build lookup table using spline interpolation
-		
-		
-		m_xmin = log10(rmin);//m_xtable[0];
-		m_xmax = log10(rmax);//m_xtable.back();
-		m_dx   = (m_xmax-m_xmin)/nr;//(m_xtable.size()-1);
+		m_xmin = log10(rmin);
+		m_xmax = log10(rmax);
+		m_dx   = (m_xmax-m_xmin)/nr;
 		
 		for(unsigned i=0; i<nr; ++i )
 		{
@ -457,9 +428,11 @@ public:
 			m_ytable.push_back( gsl_spline_eval(splinep, (m_xtable.back()), accp) );
 		}
 		
+		//... DEBUG: output of spline interpolated real space transfer function
+		if(false)
 		{
 			real_t dlogr = (log10(rmax)-log10(rmin))/1000;
-			std::ofstream ofs("transfer_splinep.txt");			
+			std::ofstream ofs("transfer_real.txt");			
 			
 			for( int i=0; i< 1000; ++i ) 
 			{
@ -483,7 +456,6 @@ public:
 	~TransferFunction_real()
 	{ }
 	
-#if 1
 	inline real_t compute_real( real_t r2 ) const
 	{
 		const double EPS = 1e-8;
@ -492,22 +464,9 @@ public:
 		if( r2 <Reps2 )
 			return Tr0_;
 		
-		/*real_t logr = log10(r2);
-		int i = (int)((logr-m_xmin)/m_dx);
-		
-		if( i < 0 ) i = 0;
-		else if( i >= m_xtable.size()-1 ) i = m_xtable.size()-2;
-		
-		real_t y1,y2;
-		y1 = m_ytable[i];
-		y2 = m_ytable[i+1];
-		
-		//std::cerr << y1 << " " << y2 << std::endl;
-		return y1 + (y2-y1)*(logr/m_dx-(real_t)i);*/
-		
-		double r=0.5*log10(r2);//sqrt(r2);
+		double r=0.5*log10(r2);
 		double ii = (r-m_xmin)/m_dx;
-		int i = (int)ii;//((r-m_xmin)/m_dx);
+		int i = (int)ii;
 		
 		if( i < 0 ) i = 0;
 		else if( i >= (int)m_xtable.size()-1 ) i = m_xtable.size()-2;
@ -516,32 +475,9 @@ public:
 		y1 = m_ytable[i];
 		y2 = m_ytable[i+1];
 		
-		//		return y1 + (y2-y1)*((r-m_xmin)/m_dx-(real_t)i);
 		return (real_t)((y1 + (y2-y1)*(ii-(double)i))/r2);
 	}
-#else	
-	inline real_t compute_real( real_t r2 ) const
-	{
-		const real_t EPS = 1e-8;
-		const real_t Reps2 = EPS*EPS;
-		
-		if( r2 <Reps2 )
-			return Tr0_;

-		/*if( r2 < rneg2_ )
-			q = pow(10.0,gsl_spline_eval (splinep, log10(r2), accp));
-		else
-			q = -pow(10.0,gsl_spline_eval(splinen, log10(r2), accn));*/
-		
-		real_t q;		
-		real_t logr2 = log10(r2);
-		q = pow(10.0,gsl_spline_eval(splinep, logr2, accp));
-		real_t sign = 1.0;
-		if( gsl_spline_eval(splinen, logr2, accn) < 0.0 )
-			sign = -1.0;
-		return q*sign;
-	}
-#endif
 };