merging upstream changes

2024-09-19 17:03:46 +02:00 · 2013-11-05 15:34:27 +01:00 · 2013-11-05 15:34:27 +01:00 · df94ef9fac
commit df94ef9fac
parent a3b409352d 458607f2f6
21 changed files with 7246 additions and 1251 deletions
--- a/4
+++ b/4
@ -3,14 +3,14 @@
 FFTW3		= yes
 MULTITHREADFFTW	= yes
 SINGLEPRECISION	= no
-HAVEHDF5        = no
+HAVEHDF5        = yes
 HAVEBOXLIB	= no
 BOXLIB_HOME     = ${HOME}/nyx_tot_sterben/BoxLib

 ##############################################################################
 ### compiler and path settings
 CC      = g++
-OPT     = -Wall -Wno-unknown-pragmas -O3 -g -msse2
+OPT     = -Wall -Wno-unknown-pragmas -O3 -g -mtune=native
 CFLAGS  =  
 LFLAGS  = -lgsl -lgslcblas 
 CPATHS  = -I. -I$(HOME)/local/include -I/opt/local/include -I/usr/local/include
--- a/convolution_kernel.cc
+++ b/convolution_kernel.cc
--- a/convolution_kernel.hh
+++ b/convolution_kernel.hh
@ -41,39 +41,45 @@ namespace convolution{
 	/////////////////////////////////////////////////////////////////
 	
 	
-	//! abstract base class for a transfer function convolution kernel
-	class kernel{
-	public:
-		
-		//! all parameters (physical/numerical)
-		parameters cparam_;
-		
-		config_file *pcf_;
-		transfer_function* ptf_;
-		refinement_hierarchy* prefh_;
-		tf_type type_;
-		
-		//! constructor
-		kernel( config_file& cf, transfer_function* ptf, refinement_hierarchy& refh, tf_type type )
-		: pcf_(&cf), ptf_(ptf), prefh_(&refh), type_(type)//cparam_( cp )
-		{	}
-		
-		//! dummy constructor
-		/*kernel( void )
-		{	}*/
-		
-		//! compute/load the kernel
-		virtual kernel* fetch_kernel( int ilevel, bool isolated=false ) = 0;
-		
-		//! virtual destructor
-		virtual ~kernel(){ };
-		
-		//! purely virtual method to obtain a pointer to the underlying data
-		virtual void* get_ptr() = 0;	
-		
-		//! free memory
-		virtual void deallocate() = 0;
-	};
+  //! abstract base class for a transfer function convolution kernel
+  class kernel{
+  public:
+    
+    //! all parameters (physical/numerical)
+    parameters cparam_;
+    
+    config_file *pcf_;
+    transfer_function* ptf_;
+    refinement_hierarchy* prefh_;
+    tf_type type_;
+    
+    //! constructor
+    kernel( config_file& cf, transfer_function* ptf, refinement_hierarchy& refh, tf_type type )
+      : pcf_(&cf), ptf_(ptf), prefh_(&refh), type_(type)//cparam_( cp )
+    {	}
+    
+    //! dummy constructor
+    /*kernel( void )
+      {	}*/
+    
+    //! compute/load the kernel
+    virtual kernel* fetch_kernel( int ilevel, bool isolated=false ) = 0;
+    
+    //! virtual destructor
+    virtual ~kernel(){ };
+    
+    //! purely virtual method to obtain a pointer to the underlying data
+    virtual void* get_ptr() = 0;
+
+    //! purely virtual method to determine whether the kernel is k-sampled or not
+    virtual bool is_ksampled() = 0;
+
+    //! purely virtual vectorized method to compute the kernel value if is_ksampled
+    virtual void at_k( size_t len, const double* in_k, double* out_Tk ) = 0;
+    
+    //! free memory
+    virtual void deallocate() = 0;
+  };

 	
 	//! abstract factory class to create convolution kernels
--- a/densities.cc
+++ b/densities.cc
@ -8,6 +8,8 @@
 
 */

+#include <cstring>
+
 #include "densities.hh"
 #include "convolution_kernel.hh"

@ -16,6 +18,234 @@
 #define DEF_RAN_CUBE_SIZE	32


+template< typename m1, typename m2 >
+void fft_interpolate( m1& V, m2& v, bool from_basegrid=false ) 
+{
+  int oxf = v.offset(0), oyf = v.offset(1), ozf = v.offset(2);
+  size_t nxf = v.size(0), nyf = v.size(1), nzf = v.size(2), nzfp = nzf+2;
+  size_t nxF = V.size(0), nyF = V.size(1), nzF = V.size(2);
+    
+  if( !from_basegrid )
+    {
+      oxf += nxF/4;
+      oyf += nyF/4;
+      ozf += nzF/4;
+    }
+  
+  LOGUSER("FFT interpolate: offset=%d,%d,%d size=%d,%d,%d",oxf,oyf,ozf,nxf,nyf,nzf);
+
+  // cut out piece of coarse grid that overlaps the fine:
+  assert( nxf%2==0 && nyf%2==0 && nzf%2==0 );
+  
+  size_t nxc = nxf/2, nyc = nyf/2, nzc = nzf/2, nzcp = nzf/2+2;
+  
+  fftw_real *rcoarse = new fftw_real[ nxc * nyc * nzcp ];
+  fftw_complex *ccoarse = reinterpret_cast<fftw_complex*> (rcoarse);
+  
+  fftw_real *rfine = new fftw_real[ nxf * nyf * nzfp];
+  fftw_complex *cfine = reinterpret_cast<fftw_complex*> (rfine);
+  
+  // copy coarse data to rcoarse[.]
+  memset( rcoarse, 0, sizeof(fftw_real) * nxc*nyc*nzcp );
+  #pragma omp parallel for
+  for( int i=0; i<(int)nxc/2; ++i )
+    for( int j=0; j<(int)nyc/2; ++j )
+      for( int k=0; k<(int)nzc/2; ++k )
+	{
+	  int ii(i+nxc/4);
+	  int jj(j+nyc/4);
+	  int kk(k+nzc/4);
+	  size_t q = ((size_t)ii*nyc+(size_t)jj)*nzcp+(size_t)kk;
+	  rcoarse[q] = V( oxf+i, oyf+j, ozf+k );
+	}
+    
+  #pragma omp parallel for
+  for( int i=0; i<(int)nxf; ++i )
+    for( int j=0; j<(int)nyf; ++j )
+      for( int k=0; k<(int)nzf; ++k ) 
+	{
+	  size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k;
+	  rfine[q] = v(i,j,k);
+	}
+
+#ifdef FFTW3
+#ifdef SINGLE_PRECISION
+    fftwf_plan
+      pc  = fftwf_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
+      pf  = fftwf_plan_dft_r2c_3d( nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+      ipf = fftwf_plan_dft_c2r_3d( nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
+    fftwf_execute( pc );
+    fftwf_execute( pf );
+#else
+    fftw_plan
+      pc  = fftw_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
+      pf  = fftw_plan_dft_r2c_3d( nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+      ipf = fftw_plan_dft_c2r_3d( nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
+    fftw_execute( pc );
+    fftw_execute( pf );
+#endif
+#else
+    rfftwnd_plan 
+      pc  = rfftw3d_create_plan( nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+      pf  = rfftw3d_create_plan( nxf, nyf, nzf, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+      ipf = rfftw3d_create_plan( nxf, nyf, nzf, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+    
+#ifndef SINGLETHREAD_FFTW		
+    rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pc, rcoarse, NULL );
+    rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pf, rfine, NULL );
+#else
+    rfftwnd_one_real_to_complex( pc, rcoarse, NULL );
+    rfftwnd_one_real_to_complex( pf, rfine, NULL );
+#endif
+#endif
+
+    /*************************************************/
+    //.. perform actual interpolation
+    double fftnorm = 1.0/((double)nxf*(double)nyf*(double)nzf);
+    double sqrt8 = 8.0;//sqrt(8.0);
+    double phasefac = -0.5;
+    
+     // 0 0
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc/2+1; i++ )
+      for( int j=0; j<(int)nyc/2+1; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i),jj(j),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    double kx = (i <= (int)nxc/2)? (double)i : (double)(i-(int)nxc);
+	    double ky = (j <= (int)nyc/2)? (double)j : (double)(j-(int)nyc);
+	    double kz = (k <= (int)nzc/2)? (double)k : (double)(k-(int)nzc);
+	    
+	    double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+	    std::complex<double> val_phas( cos(phase), sin(phase) );
+	    
+	    std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+	    val *= sqrt8 * val_phas;
+            
+	    RE(cfine[qf]) = val.real();
+	    IM(cfine[qf]) = val.imag();
+	  }
+
+    // 1 0
+    #pragma omp parallel for
+    for( int i=nxc/2; i<(int)nxc; i++ )
+      for( int j=0; j<(int)nyc/2+1; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i+nxf/2),jj(j),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    double kx = (i <= (int)nxc/2)? (double)i : (double)(i-(int)nxc);
+	    double ky = (j <= (int)nyc/2)? (double)j : (double)(j-(int)nyc);
+	    double kz = (k <= (int)nzc/2)? (double)k : (double)(k-(int)nzc);
+	    
+	    double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+	    std::complex<double> val_phas( cos(phase), sin(phase) );
+	    
+	    std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+	    val *= sqrt8 * val_phas;
+            
+	    RE(cfine[qf]) = val.real();
+	    IM(cfine[qf]) = val.imag();
+	  }
+
+    // 0 1
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc/2+1; i++ )
+      for( int j=nyc/2; j<(int)nyc; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i),jj(j+nyf/2),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    double kx = (i <= (int)nxc/2)? (double)i : (double)(i-(int)nxc);
+	    double ky = (j <= (int)nyc/2)? (double)j : (double)(j-(int)nyc);
+	    double kz = (k <= (int)nzc/2)? (double)k : (double)(k-(int)nzc);
+	    
+	    double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+	    std::complex<double> val_phas( cos(phase), sin(phase) );
+	    
+	    std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+	    val *= sqrt8 * val_phas;
+            
+	    RE(cfine[qf]) = val.real();
+	    IM(cfine[qf]) = val.imag();
+	  }
+    
+    // 1 1
+    #pragma omp parallel for
+    for( int i=nxc/2; i<(int)nxc; i++ )
+      for( int j=nyc/2; j<(int)nyc; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i+nxf/2),jj(j+nyf/2),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    double kx = (i <= (int)nxc/2)? (double)i : (double)(i-(int)nxc);
+	    double ky = (j <= (int)nyc/2)? (double)j : (double)(j-(int)nyc);
+	    double kz = (k <= (int)nzc/2)? (double)k : (double)(k-(int)nzc);
+	    
+	    double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+	    std::complex<double> val_phas( cos(phase), sin(phase) );
+	    
+	    std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+	    val *= sqrt8 * val_phas;
+            
+	    RE(cfine[qf]) = val.real();
+	    IM(cfine[qf]) = val.imag();
+	  }
+        
+    delete[] rcoarse;
+
+     /*************************************************/    
+
+#ifdef FFTW3
+  #ifdef SINGLE_PRECISION
+    fftwf_execute( ipf );
+    fftwf_destroy_plan(pf);
+    fftwf_destroy_plan(pc);
+    fftwf_destroy_plan(ipf);
+  #else
+    fftw_execute( ipf );
+    fftw_destroy_plan(pf);
+    fftw_destroy_plan(pc);
+    fftw_destroy_plan(ipf);
+  #endif
+#else
+  #ifndef SINGLETHREAD_FFTW		
+    rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), ipf, cfine, NULL );
+  #else
+    rfftwnd_one_complex_to_real( ipf, cfine, NULL );
+  #endif
+    fftwnd_destroy_plan(pf);
+    fftwnd_destroy_plan(pc);
+    fftwnd_destroy_plan(ipf);
+#endif
+
+    // copy back and normalize
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxf; ++i )
+      for( int j=0; j<(int)nyf; ++j )
+	for( int k=0; k<(int)nzf; ++k ) 
+	  {
+	    size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k;
+	    v(i,j,k) = rfine[q] * fftnorm;
+	  }
+
+    delete[] rfine;
+}
+
+

 /*******************************************************************************************/
 /*******************************************************************************************/
@ -46,9 +276,9 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
 		LOGUSER("Using k-space transfer function kernel.");
 		
 		#ifdef SINGLE_PRECISION	
-		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_float" ];
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_float" ];
 		#else
-		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_double" ];
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_double" ];
 		#endif
 	}
 	else
@ -102,73 +332,129 @@ void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type ty
 /*******************************************************************************************/

 void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type type, 
-							  refinement_hierarchy& refh, rand_gen& rand, grid_hierarchy& delta, bool smooth, bool shift )
+				refinement_hierarchy& refh, rand_gen& rand, 
+				grid_hierarchy& delta, bool smooth, bool shift )
 {
-	unsigned					levelmin,levelmax,levelminPoisson;
-	std::vector<long>			rngseeds;
-	std::vector<std::string>	rngfnames;
-	bool						kspaceTF;
-	
-	double tstart, tend;
-
+  unsigned levelmin, levelmax, levelminPoisson;
+  std::vector<long> rngseeds;
+  std::vector<std::string> rngfnames;
+  bool kspaceTF;
+  
+  double tstart, tend;
+  
 #ifndef SINGLETHREAD_FFTW
-	tstart = omp_get_wtime();
+  tstart = omp_get_wtime();
 #else
-	tstart = (double)clock() / CLOCKS_PER_SEC;
+  tstart = (double)clock() / CLOCKS_PER_SEC;
 #endif
+  
+  levelminPoisson = cf.getValue<unsigned>("setup","levelmin");
+  levelmin = cf.getValueSafe<unsigned>("setup","levelmin_TF",levelminPoisson);
+  levelmax = cf.getValue<unsigned>("setup","levelmax");
+  kspaceTF = cf.getValueSafe<bool>("setup", "kspace_TF", false);
+  
+  unsigned nbase = 1<<levelmin;
 	
-	levelminPoisson	= cf.getValue<unsigned>("setup","levelmin");
-	levelmin		= cf.getValueSafe<unsigned>("setup","levelmin_TF",levelminPoisson);
-	levelmax		= cf.getValue<unsigned>("setup","levelmax");
-	kspaceTF		= cf.getValueSafe<bool>("setup", "kspace_TF", false);
+  convolution::kernel_creator *the_kernel_creator;
 	
-	
-	unsigned	nbase	= (unsigned)pow(2,levelmin);
-	
-	convolution::kernel_creator *the_kernel_creator;
-	
-	if( kspaceTF )
-	{
-		if( levelmin!=levelmax )
-		{	
-			LOGERR("K-space transfer function can only be used in unigrid density mode!");
-			throw std::runtime_error("k-space transfer function can only be used in unigrid density mode");
-			
-		}
-		
-		std::cout << " - Using k-space transfer function kernel.\n";
-		LOGUSER("Using k-space transfer function kernel.");
-		
+  if( kspaceTF )
+    {
+      std::cout << " - Using k-space transfer function kernel.\n";
+      LOGUSER("Using k-space transfer function kernel.");
+      
 #ifdef SINGLE_PRECISION	
-		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_float" ];
+      the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_float" ];
 #else
-		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_double" ];
+      the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_double" ];
 #endif
-	}
+    }
+  else
+    {
+      std::cout << " - Using real-space transfer function kernel.\n";
+      LOGUSER("Using real-space transfer function kernel.");
+#ifdef SINGLE_PRECISION	
+      the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ];
+#else
+      the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_double" ];
+#endif
+    }	
+	
+  convolution::kernel *the_tf_kernel = the_kernel_creator->create( cf, ptf, refh, type );
+	
+  /***** PERFORM CONVOLUTIONS *****/
+  if( kspaceTF ){
+
+    //... create and initialize density grids with white noise	
+    DensityGrid<real_t> *top(NULL);
+    PaddedDensitySubGrid<real_t> *coarse(NULL), *fine(NULL);
+    int nlevels = (int)levelmax-(int)levelmin+1;
+    
+    // do coarse level
+    top = new DensityGrid<real_t>( nbase, nbase, nbase );
+    LOGINFO("Performing noise convolution on level %3d",levelmin);
+    rand.load(*top,levelmin);
+    convolution::perform<real_t>( the_tf_kernel->fetch_kernel( levelmin, false ), reinterpret_cast<void*>( top->get_data_ptr() ), shift );
+    
+    delta.create_base_hierarchy(levelmin);
+    top->copy( *delta.get_grid(levelmin) );
+    
+    for( int i=1; i<nlevels; ++i )
+      {
+	LOGINFO("Performing noise convolution on level %3d...",levelmin+i);
+	/////////////////////////////////////////////////////////////////////////
+	//... add new refinement patch
+	LOGUSER("Allocating refinement patch");
+	LOGUSER("   offset=(%5d,%5d,%5d)",refh.offset(levelmin+i,0), 
+		refh.offset(levelmin+i,1), refh.offset(levelmin+i,2));
+	LOGUSER("   size  =(%5d,%5d,%5d)",refh.size(levelmin+i,0), 
+		refh.size(levelmin+i,1), refh.size(levelmin+i,2));
+	
+	fine = new PaddedDensitySubGrid<real_t>(refh.offset(levelmin+i,0), 
+						refh.offset(levelmin+i,1), 
+						refh.offset(levelmin+i,2),
+						refh.size(levelmin+i,0), 
+						refh.size(levelmin+i,1), 
+						refh.size(levelmin+i,2) );
+	/////////////////////////////////////////////////////////////////////////
+
+	// load white noise for patch
+	rand.load(*fine,levelmin+i);	
+	
+	convolution::perform<real_t>( the_tf_kernel->fetch_kernel( levelmin+i, true ), 
+				      reinterpret_cast<void*>( fine->get_data_ptr() ), shift );
+	
+	if( i==1 )
+	  fft_interpolate( *top, *fine, true );
 	else
-	{
-		std::cout << " - Using real-space transfer function kernel.\n";
-		LOGUSER("Using real-space transfer function kernel.");
-#ifdef SINGLE_PRECISION	
-		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ];
-#else
-		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_double" ];
-#endif
-	}	
+	  fft_interpolate( *coarse, *fine, false ); 
 	
-	
-	//... create and initialize density grids with white noise	
+	delta.add_patch( refh.offset(levelmin+i,0), 
+			 refh.offset(levelmin+i,1),
+			 refh.offset(levelmin+i,2), 
+			 refh.size(levelmin+i,0),
+			 refh.size(levelmin+i,1),
+			 refh.size(levelmin+i,2) );
+
+	fine->copy_unpad( *delta.get_grid(levelmin+i) );
+
+	if( i==1 ) delete top;
+	else delete coarse;
+
+	coarse = fine;
+      }
+
+    delete coarse;
+
+  }else{
+	  
+        //... create and initialize density grids with white noise	
 	PaddedDensitySubGrid<real_t>* coarse(NULL), *fine(NULL);
-	
 	DensityGrid<real_t>* top(NULL);
-		
-	convolution::kernel *the_tf_kernel = the_kernel_creator->create( cf, ptf, refh, type );
-		
-	/***** PERFORM CONVOLUTIONS *****/
-	
+
 	if( levelmax == levelmin )
 	{
-		std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl;
+		std::cout << " - Performing noise convolution on level " 
+			  << std::setw(2) << levelmax << " ..." << std::endl;
 		LOGUSER("Performing noise convolution on level %3d...",levelmax);
 		
 		top = new DensityGrid<real_t>( nbase, nbase, nbase );
@ -176,7 +462,9 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 		rand.load( *top, levelmin );


-		convolution::perform<real_t>( the_tf_kernel->fetch_kernel( levelmax ), reinterpret_cast<void*>( top->get_data_ptr() ), shift );
+		convolution::perform<real_t>( the_tf_kernel->fetch_kernel( levelmax ), 
+					      reinterpret_cast<void*>( top->get_data_ptr() ), 
+					      shift );
 		the_tf_kernel->deallocate();
 		
 		delta.create_base_hierarchy(levelmin);
@ -198,8 +486,13 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			rand.load(*top,levelmin);
 		}
 		
-		fine = new PaddedDensitySubGrid<real_t>( refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
-												refh.size(levelmin+i+1,0), 	refh.size(levelmin+i+1,1), 	refh.size(levelmin+i+1,2) );
+		fine = new PaddedDensitySubGrid<real_t>( refh.offset(levelmin+i+1,0), 
+							 refh.offset(levelmin+i+1,1),
+							 refh.offset(levelmin+i+1,2), 
+							 refh.size(levelmin+i+1,0), 
+							 refh.size(levelmin+i+1,1),
+							 refh.size(levelmin+i+1,2) );
+
 		rand.load(*fine,levelmin+i+1);
 		
 		//.......................................................................................................//
@ -210,7 +503,8 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			/**********************************************************************************************************\
 			 *	multi-grid: top-level grid grids .....
 			 \**********************************************************************************************************/ 
-			std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmin+i << " ..." << std::endl;
+			std::cout << " - Performing noise convolution on level " 
+				  << std::setw(2) << levelmin+i << " ..." << std::endl;
 			LOGUSER("Performing noise convolution on level %3d",levelmin+i);
 			
 			LOGUSER("Creating base hierarchy...");
@ -311,7 +605,7 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 			coarse->subtract_oct_mean();
 			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()), shift );
 			coarse->subtract_mean();
-			coarse->upload_bnd_add( *delta.get_grid(levelmin+i-1) );
+			//coarse->upload_bnd_add( *delta.get_grid(levelmin+i-1) );
 			
 			//... clean up
 			the_tf_kernel->deallocate();
@ -363,10 +657,12 @@ void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type
 		coarse->subtract_mean();
 		
 		//... upload data to coarser grid
-		coarse->upload_bnd_add( *delta.get_grid(levelmax-1) );
+		//coarse->upload_bnd_add( *delta.get_grid(levelmax-1) );
 			
 		delete coarse;
 	}
+
+  }
 	
 	delete the_tf_kernel;
 			
@ -429,24 +725,34 @@ void normalize_density( grid_hierarchy& delta )
 	}
 }

-
 void coarsen_density( const refinement_hierarchy& rh, GridHierarchy<real_t>& u )
 {
-	for( int i=rh.levelmax(); i>0; --i )
-		mg_straight().restrict( *(u.get_grid(i)), *(u.get_grid(i-1)) );
-	
-	for( unsigned i=1; i<=rh.levelmax(); ++i )
+  unsigned levelmin_TF = u.levelmin();//rh.levelmin();
+    
+/*    for( int i=rh.levelmax(); i>0; --i )
+        mg_straight().restrict( *(u.get_grid(i)), *(u.get_grid(i-1)) );*/
+    
+  for( int i=levelmin_TF; i>0; --i )
+    mg_straight().restrict( *(u.get_grid(i)), *(u.get_grid(i-1)) );
+    
+  
+  //for( unsigned i=levelmin_TF+1; i<=rh.levelmax(); ++i )
+  for( unsigned i=1; i<=rh.levelmax(); ++i )
+    {
+      if( rh.offset(i,0) != u.get_grid(i)->offset(0)
+	  || rh.offset(i,1) != u.get_grid(i)->offset(1)
+	  || rh.offset(i,2) != u.get_grid(i)->offset(2)
+	  || rh.size(i,0) != u.get_grid(i)->size(0)
+	  || rh.size(i,1) != u.get_grid(i)->size(1)
+	  || rh.size(i,2) != u.get_grid(i)->size(2) )
 	{
-		if(	  rh.offset(i,0) != u.get_grid(i)->offset(0)
-   		   || rh.offset(i,1) != u.get_grid(i)->offset(1)
-		   || rh.offset(i,2) != u.get_grid(i)->offset(2)
-		   || rh.size(i,0) != u.get_grid(i)->size(0)
-		   || rh.size(i,1) != u.get_grid(i)->size(1)
-		   || rh.size(i,2) != u.get_grid(i)->size(2) )
-		{
-			u.cut_patch(i, rh.offset_abs(i,0), rh.offset_abs(i,1), rh.offset_abs(i,2), 
-						rh.size(i,0), rh.size(i,1), rh.size(i,2) );
-		}
+	  u.cut_patch(i, rh.offset_abs(i,0), rh.offset_abs(i,1), rh.offset_abs(i,2), 
+	  	      rh.size(i,0), rh.size(i,1), rh.size(i,2) );
 	}
+    }
+  
+    for( int i=rh.levelmax(); i>0; --i )
+        mg_straight().restrict( *(u.get_grid(i)), *(u.get_grid(i-1)) );
+
 }

--- a/densities.hh
+++ b/densities.hh
@ -46,7 +46,15 @@ public:
 	size_t ny_;	//!< number of grid cells in y-direction
 	size_t nz_;	//!< number of grid cells in z-direction
 	size_t nzp_;	//!< number of cells in memory (z-dir), used for Nyquist padding
+
+        size_t nv_[3];
 	
+        int ox_;        //!< offset of grid in x-direction
+        int oy_;        //!< offset of grid in y-direction
+        int oz_;        //!< offset of grid in z-direction
+
+        size_t ov_[3];
+
 	//! the actual data container in the form of a 1D array
 	std::vector< real_t > data_;
 	
@ -57,17 +65,29 @@ public:
 	 * @param nz the number of cells in z
 	 */
 	DensityGrid( unsigned nx, unsigned ny, unsigned nz )
-	: nx_(nx), ny_(ny), nz_(nz)
+	  : nx_(nx), ny_(ny), nz_(nz), nzp_( 2*(nz_/2+1) ), ox_(0), oy_(0), oz_(0)
 	{
-		data_.assign((size_t)nx_*(size_t)ny_*2*((size_t)nz_/2+1),0.0);
-		nzp_ = 2*(nz_/2+1);
+		data_.assign((size_t)nx_*(size_t)ny_*(size_t)nzp_,0.0);
+		nv_[0] = nx_; nv_[1] = ny_; nv_[2] = nz_;
+		ov_[0] = ox_; ov_[1] = oy_; ov_[2] = oz_;
 	}
 	
+        DensityGrid( unsigned nx, unsigned ny, unsigned nz, int ox, int oy, int oz )
+	  : nx_(nx), ny_(ny), nz_(nz), nzp_( 2*(nz_/2+1) ), ox_(ox), oy_(oy), oz_(oz)
+	{
+		data_.assign((size_t)nx_*(size_t)ny_*(size_t)nzp_,0.0);
+		nv_[0] = nx_; nv_[1] = ny_; nv_[2] = nz_;
+		ov_[0] = ox_; ov_[1] = oy_; ov_[2] = oz_;
+	}
+
 	//! copy constructor
 	explicit DensityGrid( const DensityGrid<real_t> & g )
-	: nx_(g.nx_), ny_(g.ny_), nz_(g.nz_), nzp_(g.nzp_)
+	  : nx_(g.nx_), ny_(g.ny_), nz_(g.nz_), nzp_(g.nzp_), 
+	    ox_(g.ox_), oy_(g.oy_), oz_(g.oz_)
 	{
 		data_ = g.data_;
+		nv_[0] = nx_; nv_[1] = ny_; nv_[2] = nz_;
+		ov_[0] = ox_; ov_[1] = oy_; ov_[2] = oz_;
 	}
 	
 	//!destructor
@ -80,6 +100,10 @@ public:
 	void clear( void )
 	{
 		nx_ = ny_ = nz_ = nzp_ = 0;
+		ox_ = oy_ = oz_ = 0;
+		nv_[0] = nv_[1] = nv_[2] = 0;
+		ov_[0] = ov_[1] = ov_[2] = 0;
+
 		data_.clear();
 		std::vector<real_t>().swap(data_);
 	}
@ -90,11 +114,13 @@ public:
 	 * @returns array size along dimension i
 	 */
 	size_t size( int i )
-	{
-		if(i==0) return nx_;
-		if(i==1) return ny_;
-		return ny_;
-	}
+        {  return nv_[i];  }
+
+        int offset( int i )
+        {  return ov_[i];  }
+
+
+        
 	
 	//! zeroes the density object
 	/*! sets all values to 0.0
@ -111,6 +137,9 @@ public:
 		ny_ = g.ny_;
 		nz_ = g.nz_;
 		nzp_= g.nzp_;
+		ox_ = g.ox_;
+		oy_ = g.oy_;
+		oz_ = g.oz_;
 		data_ = g.data_;
 		
 		return *this;
@ -413,16 +442,19 @@ public:
 	using DensityGrid<real_t>::nx_;
 	using DensityGrid<real_t>::ny_;
 	using DensityGrid<real_t>::nz_;
+        using DensityGrid<real_t>::ox_;
+	using DensityGrid<real_t>::oy_;
+	using DensityGrid<real_t>::oz_;
 	using DensityGrid<real_t>::data_;	
 	
 	using DensityGrid<real_t>::fill_rand;
 	using DensityGrid<real_t>::get_data_ptr;
 	
-	int ox_, oy_, oz_;
+	
 public:
 	
 	PaddedDensitySubGrid( int ox, int oy, int oz, unsigned nx, unsigned ny, unsigned nz)
-	: DensityGrid<real_t>(2*nx,2*ny,2*nz), ox_(ox), oy_(oy), oz_(oz)
+	  : DensityGrid<real_t>(2*nx,2*ny,2*nz,ox,oy,oz)
 	{

 		//if( 2*ox-(int)nx/4 < 0 || 2*oy-(int)ny/4 < 0 || 2*oz-(int)nz/4 < 0 )
@ -446,7 +478,7 @@ public:
 	}
 	
 	PaddedDensitySubGrid( const PaddedDensitySubGrid<real_t>& o )
-	: DensityGrid<real_t>( o ), ox_(o.ox_), oy_(o.oy_), oz_(o.oz_)
+	: DensityGrid<real_t>( o )
 	{ }
 	
 	void zero_padded_subgrid( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub )
@ -564,8 +596,34 @@ public:
 						
 						if( ic >=0 && ic < (int)top.size(0) && jc >= 0 && jc < (int)top.size(1) && kc >= 0 && kc < (int)top.size(2) )
 						{
-							top(ic,jc,kc) += 0.125* ((*this)(ix,iy,iz)+(*this)(ix+1,iy,iz)+(*this)(ix,iy+1,iz)+(*this)(ix,iy,iz+1)
-											+(*this)(ix+1,iy+1,iz)+(*this)(ix+1,iy,iz+1)+(*this)(ix,iy+1,iz+1)+(*this)(ix+1,iy+1,iz+1) );
+						  top(ic,jc,kc) += 0.125* ((*this)(ix,iy,iz)+(*this)(ix+1,iy,iz)+(*this)(ix,iy+1,iz)+(*this)(ix,iy,iz+1)
+									   +(*this)(ix+1,iy+1,iz)+(*this)(ix+1,iy,iz+1)+(*this)(ix,iy+1,iz+1)+(*this)(ix+1,iy+1,iz+1) );
+						}
+					}
+				}
+	}
+    
+    template< class array3 >
+	void upload_bnd( array3& top )
+	{
+        #pragma omp parallel for
+		for( int ix=0; ix<(int)nx_; ix+=2 )
+			for( int iy=0; iy<(int)ny_; iy+=2 )
+				for( int iz=0; iz<(int)nz_; iz+=2 )
+				{
+					
+					if( (ix<(int)nx_/4||ix>=3*(int)nx_/4) || (iy<(int)ny_/4||iy>=3*(int)ny_/4) || (iz<(int)nz_/4||iz>=3*(int)nz_/4) )
+					{
+						int ic,jc,kc;
+						
+						ic = ox_+(ix-nx_/4)/2;
+						jc = oy_+(iy-ny_/4)/2;
+						kc = oz_+(iz-nz_/4)/2;
+						
+						if( ic >=0 && ic < (int)top.size(0) && jc >= 0 && jc < (int)top.size(1) && kc >= 0 && kc < (int)top.size(2) )
+						{
+							top(ic,jc,kc) == 0.125* ((*this)(ix,iy,iz)+(*this)(ix+1,iy,iz)+(*this)(ix,iy+1,iz)+(*this)(ix,iy,iz+1)
+                                                     +(*this)(ix+1,iy+1,iz)+(*this)(ix+1,iy,iz+1)+(*this)(ix,iy+1,iz+1)+(*this)(ix+1,iy+1,iz+1) );
 						}
 					}
 				}
@ -825,6 +883,87 @@ public:

 };

+template< typename M1, typename M2 >
+inline void enforce_coarse_mean_for_overlap( M1& v, M2& V )
+{
+	double coarsesum =0.0, finesum = 0.0, coarsemean, finemean;
+	size_t
+    nx = v.size(0)/4,
+    ny = v.size(1)/4,
+    nz = v.size(2)/4,
+    ox = v.offset(0)+nx/2,
+    oy = v.offset(1)+ny/2,
+    oz = v.offset(2)+nz/2;
+    
+    //    ic = ox_+(ix-nx_/4)/2;
+    
+	size_t coarsecount = 0, finecount = 0;
+	for( unsigned i=0; i<V.size(0); ++i )
+		for( unsigned j=0; j<V.size(1); ++j )
+			for( unsigned k=0; k<V.size(2); ++k )
+			{
+				if( (i >= ox && i < ox+nx) &&
+                   (j >= oy && j < oy+ny) &&
+                   (k >= oz && k < oz+nz ))
+				{
+					coarsesum += V(i,j,k);
+					++coarsecount;
+				}
+			}
+    
+    nx = v.size(0)/2;
+    ny = v.size(1)/2;
+    nz = v.size(2)/2;
+    ox = nx/2;
+    oy = ny/2;
+    oz = nz/2;
+    
+    for( unsigned i=0; i<v.size(0); ++i )
+		for( unsigned j=0; j<v.size(1); ++j )
+			for( unsigned k=0; k<v.size(2); ++k )
+			{
+                if( (i >= ox && i < ox+nx) &&
+                   (j >= oy && j < oy+ny) &&
+                   (k >= oz && k < oz+nz ))
+				{
+                    finesum += v(i,j,k);
+                    ++finecount;
+                }
+            }
+    
+    finemean = finesum/finecount;
+	coarsemean = coarsesum/coarsecount;
+	
+	
+	//std::cerr << "***\n";
+	
+	//double dcorr = (coarsesum-finesum)/finecount;//mean-fine_mean);//  innersum * innercount / outercount;
+	double dcorr = coarsemean - finemean;//(coarsemsum-finesum)/finecount;
+    for( unsigned i=0; i<v.size(0); ++i )
+		for( unsigned j=0; j<v.size(1); ++j )
+			for( unsigned k=0; k<v.size(2); ++k )
+                v(i,j,k) = v(i,j,k) + dcorr;
+    
+    
+    finesum = 0.0; finecount = 0;
+    for( unsigned i=0; i<v.size(0); ++i )
+		for( unsigned j=0; j<v.size(1); ++j )
+			for( unsigned k=0; k<v.size(2); ++k )
+			{
+                if( (i >= ox && i < ox+nx) &&
+                   (j >= oy && j < oy+ny) &&
+                   (k >= oz && k < oz+nz ))
+				{
+                    finesum += v(i,j,k);
+                    ++finecount;
+                }
+            }
+    finemean = finesum/finecount;
+    
+    std::cerr << "coarse mean = " << coarsemean << ", fine mean = " << finemean;
+}
+
+
 template< typename M >
 inline void enforce_coarse_mean( M& v, M& V )
 {
--- a/fft_operators.hh
+++ b/fft_operators.hh
@ -0,0 +1,204 @@
+#ifndef __FFT_OPERATORS_HH
+#define __FFT_OPERATORS_HH
+struct fft_interp{
+
+  template< typename m1, typename m2 >
+  void interpolate( m1& V, m2& v, bool fourier_splice = false ) const
+  {
+    int oxc = V.offset(0), oyc = V.offset(1), ozc = V.offset(2);
+    int oxf = v.offset(0), oyf = v.offset(1), ozf = v.offset(2);
+    
+    size_t nxf = v.size(0), nyf = v.size(1), nzf = v.size(2), nzfp = nzf+2;
+
+    // cut out piece of coarse grid that overlaps the fine:
+    assert( nxf%2==0 && nyf%2==0 && nzf%2==0 );
+
+    size_t nxc = nxf/2, nyc = nyf/2, nzc = nzf/2, nzcp = nzf/2+2;
+
+    fftw_real *rcoarse = new fftw_real[ nxc * nyc * nzcp ];
+    fftw_complex *ccoarse = reinterpret_cast<fftw_complex*> (rcoarse);
+
+    fftw_real *rfine = new fftw_real[ nxf * nyf * nzfp];
+    fftw_complex *cfine = reinterpret_cast<fftw_complex*> (rfine);
+
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc; ++i )
+      for( int j=0; j<(int)nyc; ++j )
+	for( int k=0; k<(int)nzc; ++k ) 
+	  {
+	    size_t q = ((size_t)i*nyc+(size_t)j)*nzcp+(size_t)k;
+	    rcoarse[q] = V( oxf+i, oyf+j, ozf+k );
+	  }
+
+    if( fourier_splice )
+      {
+	#pragma omp parallel for
+	for( int i=0; i<(int)nxf; ++i )
+	  for( int j=0; j<(int)nyf; ++j )
+	    for( int k=0; k<(int)nzf; ++k ) 
+	      {
+		size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k;
+		rfine[q] = v(i,j,k);
+	      }
+      }
+    else
+      {
+	#pragma omp parallel for
+	for( size_t i=0; i<nxf*nyf*nzfp; ++i )
+	  rfine[i] = 0.0;
+      }
+
+#ifdef FFTW3
+#ifdef SINGLE_PRECISION
+    fftwf_plan
+      pc  = fftwf_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
+      pf  = fftwf_plan_dft_r2c_3d( nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+      ipf = fftwf_plan_dft_c2r_3d( nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
+    fftwf_execute( pc );
+    if( fourier_splice )
+      fftwf_execute( pf );
+#else
+    fftw_plan
+      pc  = fftw_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
+      pf  = fftw_plan_dft_r2c_3d( nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+      ipf = fftw_plan_dft_c2r_3d( nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
+    fftw_execute( pc );
+    if( fourier_splice )
+      fftwf_execute( pf );
+#endif
+#else
+    rfftwnd_plan 
+      pc  = rfftw3d_create_plan( nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+      pf  = rfftw3d_create_plan( nxf, nyf, nzf, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+      ipf = rfftw3d_create_plan( nxf, nyf, nzf, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+    
+#ifndef SINGLETHREAD_FFTW		
+    rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pc, rcoarse, NULL );
+    if( fourier_splice )
+      rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pf, rfine, NULL );
+#else
+    rfftwnd_one_real_to_complex( pc, rcoarse, NULL );
+    if( fourier_splice )
+      rfftwnd_one_real_to_complex( pf, rfine, NULL );
+#endif
+#endif
+
+    /*************************************************/
+    //.. perform actual interpolation
+    double fftnorm = 1.0/((double)nxf*(double)nyf*(double)nzf);
+    double sqrt8 = sqrt(8.0);
+
+    // 0 0
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc/2+1; i++ )
+      for( int j=0; j<(int)nyc/2+1; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i),jj(j),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
+	    IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
+	  }
+
+    // 1 0
+    #pragma omp parallel for
+    for( int i=nxc/2; i<(int)nxc; i++ )
+      for( int j=0; j<(int)nyc/2+1; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i+nx/2),jj(j),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
+            
+	    RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
+	    IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
+            
+	    //if( k==0 & (i==(int)nxc/2 || j==(int)nyc/2) )
+	    //  IM(cfine[qf]) *= -1.0;
+	  }
+
+    // 0 1
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc/2+1; i++ )
+      for( int j=nyc/2; j<(int)nyc; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i),jj(j+ny/2),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
+            
+	    RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
+	    IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
+            
+	    //if( k==0 && (i==(int)nxc/2 || j==(int)nyc/2) )
+	    //  IM(cfine[qf]) *= -1.0;
+	  }
+    
+    // 1 1
+    #pragma omp parallel for
+    for( int i=nxc/2; i<(int)nxc; i++ )
+      for( int j=nyc/2; j<(int)nyc; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i+nx/2),jj(j+ny/2),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
+	    IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
+	  }
+        
+    delete[] rcoarse;
+
+    /*************************************************/    
+
+#ifdef FFTW3
+  #ifdef SINGLE_PRECISION
+    fftwf_execute( ipf );
+    fftwf_destroy_plan(pf);
+    fftwf_destroy_plan(pc);
+    fftwf_destroy_plan(ipf);
+  #else
+    fftw_execute( ipf );
+    fftw_destroy_plan(pf);
+    fftw_destroy_plan(pc);
+    fftw_destroy_plan(ipf);
+  #endif
+#else
+  #ifndef SINGLETHREAD_FFTW		
+    rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), ipf, cfine, NULL );
+  #else
+    rfftwnd_one_complex_to_real( ipf, cfine, NULL );
+  #endif
+    fftwnd_destroy_plan(pf);
+    fftwnd_destroy_plan(pc);
+    fftwnd_destroy_plan(ipf);
+#endif
+
+    // copy back and normalize
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxf; ++i )
+      for( int j=0; j<(int)nyf; ++j )
+	for( int k=0; k<(int)nzf; ++k ) 
+	  {
+	    size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k;
+	    v(i,j,k) = rfine[q] * fftnorm;
+	  }
+
+    delete[] rcoarse;
+    delete[] rfine;
+
+  }
+
+
+
+};
+
+
+#endif //__FFT_OPERATORS_HH
--- a/main.cc
+++ b/main.cc
@ -270,6 +270,7 @@ double compute_finest_sigma( grid_hierarchy& u )
 /*****************************************************************************************************/

 region_generator_plugin *the_region_generator;
+RNG_plugin *the_random_number_generator;

 int main (int argc, const char * argv[]) 
 {
@ -286,8 +287,9 @@ int main (int argc, const char * argv[])
 	if( argc != 2 ){
 		std::cout << " This version is compiled with the following plug-ins:\n";
 		
-        print_region_generator_plugins();
+		print_region_generator_plugins();
 		print_transfer_function_plugins();
+		print_RNG_plugins();
 		print_output_plugins();
 		
 		std::cerr << "\n In order to run, you need to specify a parameter file!\n\n";
@ -390,7 +392,6 @@ int main (int argc, const char * argv[])
 	transfer_function_plugin *the_transfer_function_plugin
 		= select_transfer_function_plugin( cf );
    
-    
 	cosmology cosmo( cf );
 	
 	std::cout << " - starting at a=" << cosmo.astart << std::endl;
@ -430,6 +431,7 @@ int main (int argc, const char * argv[])
 	
    the_region_generator = select_region_generator_plugin( cf );
  
+    the_random_number_generator = select_RNG_plugin( cf );
 	//------------------------------------------------------------------------------
 	//... determine run parameters
 	//------------------------------------------------------------------------------
--- a/mesh.hh
+++ b/mesh.hh
--- a/plugins/convex_hull.hh
+++ b/plugins/convex_hull.hh
@ -216,18 +216,20 @@ struct convex_hull{
    }
    
    template< typename T >
-    bool check_point( const T* x ) const
+    bool check_point( const T* x, double dist = 0.0 ) const
    {
+        dist *= -1.0;
+        
        for( size_t i=0; i<normals_L_.size()/3; ++i )
        {
            double xp[3] = {x[0]-x0_L_[3*i+0],x[1]-x0_L_[3*i+1],x[2]-x0_L_[3*i+2]};
-            if( dot( xp, &normals_L_[3*i])<0.0 ) return false;
+            if( dot( xp, &normals_L_[3*i]) < dist ) return false;
        }
        
        for( size_t i=0; i<normals_U_.size()/3; ++i )
        {
            double xp[3] = {x[0]-x0_U_[3*i+0],x[1]-x0_U_[3*i+1],x[2]-x0_U_[3*i+2]};
-            if( dot( xp, &normals_U_[3*i])<0.0 ) return false;
+            if( dot( xp, &normals_U_[3*i]) < dist ) return false;
        }
        
        return true;
--- a/plugins/output_gadget2_2comp.cc
+++ b/plugins/output_gadget2_2comp.cc
--- a/plugins/output_gadget_tetmesh.cc
+++ b/plugins/output_gadget_tetmesh.cc
--- a/plugins/output_generic.cc
+++ b/plugins/output_generic.cc
@ -4,7 +4,7 @@
 a code to generate multi-scale initial conditions 
 for cosmological simulations 
 
- Copyright (C) 2010  Oliver Hahn
+ Copyright (C) 2010-13  Oliver Hahn
 
 */

@ -32,7 +32,7 @@ protected:
 				for(int k=-nb; k<n2+nb; ++k )
 					vdata.push_back( data(i,j,k) );
 		
-		unsigned nd[3] = { n0+2*nb,n1+2*nb,n2+2*nb	};
+		unsigned nd[3] = { (unsigned)(n0+2*nb),(unsigned)(n1+2*nb),(unsigned)(n2+2*nb)	};
 		HDFWriteDataset3D( fname, dname, nd, vdata);
 	}
 	
--- a/plugins/output_tipsy_resample.cc
+++ b/plugins/output_tipsy_resample.cc
--- a/plugins/random_music.cc
+++ b/plugins/random_music.cc
@ -0,0 +1,20 @@
+#include "random.hh"
+
+class RNG_music : public RNG_plugin{
+public:
+  explicit RNG_music( config_file& cf )
+  : RNG_plugin( cf )
+  { }
+    
+  ~RNG_music() { }
+    
+  bool is_multiscale() const
+  {   return true;   } 
+};
+
+
+namespace{
+  RNG_plugin_creator_concrete< RNG_music > creator("MUSIC");
+
+
+}
--- a/plugins/region_convex_hull.cc
+++ b/plugins/region_convex_hull.cc
@ -1,3 +1,13 @@
+/*
+ 
+ region_convex_hull.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010-13  Oliver Hahn
+ 
+ */
+
 #include <vector>
 #include <iostream>
 #include <cmath>
@ -21,6 +31,8 @@ private:
    double vfac_;
    bool do_extra_padding_;
    
+    std::vector<float> level_dist_;
+    
    void apply_shift( size_t Np, double *p, int *shift, int levelmin )
    {
        double dx = 1.0/(1<<levelmin);
@ -66,7 +78,7 @@ public:
        phull_->expand( sqrt(3.)*dx );
        
        // output the center
-        float c[3] = { phull_->centroid_[0], phull_->centroid_[1], phull_->centroid_[2] };
+        double c[3] = { phull_->centroid_[0], phull_->centroid_[1], phull_->centroid_[2] };
        LOGINFO("Region center from convex hull centroid determined at\n\t (%f,%f,%f)",c[0],c[1],c[2]);
        
        //-----------------------------------------------------------------
@ -79,6 +91,14 @@ public:
            if( output_plugin == std::string("grafic2") )
                do_extra_padding_ = true;
        }
+        
+        level_dist_.assign( levelmax+1, 0.0 );
+        // generate the higher level ellipsoids
+        for( int ilevel = levelmax_-1; ilevel > 0; --ilevel )
+        {
+            dx = 1.0/(1ul<<(ilevel));
+            level_dist_[ilevel-1] = level_dist_[ilevel] + padding_ * dx;
+        }
    }
    
    ~region_convex_hull_plugin()
@ -114,8 +134,8 @@ public:
        // it might have enlarged it, but who cares...
    }

-    bool query_point( double *x )
-    {   return phull_->check_point( x );   }
+    bool query_point( double *x, int ilevel )
+    {   return phull_->check_point( x, level_dist_[ilevel] );   }
    
    bool is_grid_dim_forced( size_t* ndims )
    {   return false;   }
@ -130,7 +150,7 @@ public:
    void get_center_unshifted( double *xc )
    {
        double dx = 1.0/(1<<shift_level);
-        float c[3] = { phull_->centroid_[0], phull_->centroid_[1], phull_->centroid_[2] };
+        double c[3] = { phull_->centroid_[0], phull_->centroid_[1], phull_->centroid_[2] };
        xc[0] = c[0]+shift[0]*dx;
        xc[1] = c[1]+shift[1]*dx;
        xc[2] = c[2]+shift[2]*dx;
--- a/plugins/region_ellipsoid.cc
+++ b/plugins/region_ellipsoid.cc
@ -1,4 +1,14 @@
 #include <vector>
+/*
+ 
+ region_ellipsoid.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010-13  Oliver Hahn
+ 
+ */
+
 #include <iostream>
 #include <cmath>
 #include <cassert>
@ -53,7 +63,7 @@ void Inverse_4x4( float *mat )
    double det; /* determinant */
    double dst[16];
                
-/* transpose matrix */
+    /* transpose matrix */
    for (int i = 0; i < 4; i++)
    {
        src[i] = mat[i*4];
@ -164,11 +174,13 @@ protected:
    float *Q;
    float *u;
    
+    float detA, detA13;
+    
    float v1[3],v2[3],v3[3],r1,r2,r3;
    
    float V[9], mu[3];
    
-    bool axes_computed;
+    bool axes_computed, hold_point_data;
    
    void compute_axes( void )
    {
@ -286,7 +298,7 @@ protected:
    
 public:
    min_ellipsoid( size_t N_, double* P )
-    : N( N_ ), axes_computed( false )
+    : N( N_ ), axes_computed( false ), hold_point_data( true )
    {
        // --- initialize ---
        LOGINFO("computing minimum bounding ellipsoid from %lld points",N);
@ -339,10 +351,13 @@ public:
        
        Inverse_3x3( Ainv, A );
        for( size_t i=0; i<9; ++i ){ A[i] /= 3.0; Ainv[i] *= 3.0; }
+        
+        detA = Determinant_3x3( A );
+        detA13 = pow( detA, 1.0/3.0 );
    }
    
    min_ellipsoid( const double* A_, const double *c_ )
-    : N( 0 ), axes_computed( false )
+    : N( 0 ), axes_computed( false ), hold_point_data( false )
    {
        for( int i=0; i<9; ++i )
        {   A[i] = A_[i]; Ainv[i] = 0.0;   }
@ -350,18 +365,51 @@ public:
            c[i] = c_[i];
    }
    
+    min_ellipsoid( const min_ellipsoid& e )
+    : N( 0 ), hold_point_data( false )
+    {
+        for( int i=0; i<16; ++i )
+            X[i] = e.X[i];
+        
+        for( int i=0; i<3; ++i )
+        {
+            c[i] = e.c[i];
+            v1[i] = e.v1[i];
+            v2[i] = e.v2[i];
+            v3[i] = e.v3[i];
+            mu[i] = e.mu[i];
+        }
+        
+        for( int i=0; i<9; ++i )
+        {
+            A[i] = e.A[i];
+            Ainv[i] = e.Ainv[i];
+            V[i] = e.V[i];
+        }
+        
+        N = e.N;
+        detA = e.detA;
+        detA13 = e.detA13;
+        axes_computed = e.axes_computed;
+    }
+    
    ~min_ellipsoid()
    {
-        delete[] u;
-        delete[] Q;
+        if( hold_point_data )
+        {
+            delete[] u;
+            delete[] Q;
+        }
    }
    
    template<typename T>
-    bool check_point( const T *x )
+    bool check_point( const T *x, double dist = 0.0 )
    {
-        float q[3] = {x[0]-c[0],x[1]-c[1],x[2]-c[2]};
+        dist = (dist + 1.0) * detA13;
        
-        double r = 0.0;
+        T q[3] = {x[0]-c[0],x[1]-c[1],x[2]-c[2]};
+        
+        T r = 0.0;
        for( int i=0; i<3; ++i )
            for( int j=0; j<3; ++j )
                r += q[i]*A[3*j+i]*q[j];
@ -416,17 +464,12 @@ public:
  
    void expand_ellipsoid( float dr )
    {
-      //print();
-        
-        
-        
-        
        if( !axes_computed )
        {
-            std::cerr << "computing axes.....\n";
+            LOGUSER("computing ellipsoid axes.....");
            compute_axes();
        }
-        
+        float muold[3] = {mu[0],mu[1],mu[2]};
        float munew[3];
        for( int i=0; i<3; ++i )
          munew[i] = sgn(mu[i])/sqr(1.0/sqrt(fabs(mu[i]))+dr);
@ -446,7 +489,11 @@ public:
        
        Inverse_3x3( A, Ainv );
      
-      //print();
+        LOGUSER("computing ellipsoid axes.....");
+        compute_axes();
+        
+        LOGINFO("mu = %f %f %f -> %f %f %f", muold[0], muold[1], muold[2], mu[0], mu[1], mu[2]);
+        //print();
    }
 };

@ -458,12 +505,10 @@ public:
 class region_ellipsoid_plugin : public region_generator_plugin{
 private:
    
-    min_ellipsoid *pellip_;
+    std::vector< min_ellipsoid * > pellip_;
    int shift[3], shift_level, padding_;
    double vfac_;
    bool do_extra_padding_;
-  
-    
    
    void apply_shift( size_t Np, double *p, int *shift, int levelmin )
    {
@ -482,6 +527,10 @@ public:
    {
        std::vector<double> pp;
        
+        for( unsigned i=0; i<=levelmax_; ++i )
+            pellip_.push_back( NULL );
+        
+        
        // sanity check
        if( !cf.containsKey("setup", "region_point_file") &&
           !( cf.containsKey("setup","region_ellipsoid_matrix[0]") &&
@ -498,12 +547,10 @@ public:
        padding_ = cf.getValue<int>("setup","padding");
        
        std::string point_file;
-        bool bfrom_file = true;
-        
+              
        if( cf.containsKey("setup", "region_point_file") )
        {
            point_file = cf.getValue<std::string>("setup","region_point_file");
-            bfrom_file = true;
            
            point_reader pfr;
            pfr.read_points_from_file( point_file, vfac_, pp );
@ -533,7 +580,9 @@ public:
                shift_level = point_levelmin;
            }
            
-            pellip_ = new min_ellipsoid( pp.size()/3, &pp[0] );
+            
+            
+            pellip_[levelmax_-1] = new min_ellipsoid( pp.size()/3, &pp[0] );
            
            
        } else {
@ -550,7 +599,7 @@ public:
            strtmp = cf.getValue<std::string>("setup","region_ellipsoid_center");
            sscanf( strtmp.c_str(), "%lf,%lf,%lf", &c[0],&c[1],&c[2] );
            
-            pellip_ = new min_ellipsoid( A, c );
+            pellip_[levelmax_-1] = new min_ellipsoid( A, c );
            
        }
        
@ -586,8 +635,8 @@ public:
        
        // output the center
        float c[3], A[9];
-        pellip_->get_center( c );
-        pellip_->get_matrix( A );
+        pellip_[levelmax_-1]->get_center( c );
+        pellip_[levelmax_-1]->get_matrix( A );
        
        LOGINFO("Region center for ellipsoid determined at\n\t xc = ( %f %f %f )",c[0],c[1],c[2]);
        LOGINFO("Ellipsoid matrix determined as\n\t      ( %f %f %f )\n\t  A = ( %f %f %f )\n\t      ( %f %f %f )",
@ -598,8 +647,18 @@ public:
        //expand the ellipsoid by one grid cell
      
        unsigned levelmax = cf.getValue<unsigned>("setup","levelmax");
+        unsigned npad = cf.getValue<unsigned>("setup","padding");
        double dx = 1.0/(1ul<<levelmax);
-        pellip_->expand_ellipsoid( dx );
+        pellip_[levelmax_-1]->expand_ellipsoid( dx );
+        
+        
+        // generate the higher level ellipsoids
+        for( int ilevel = levelmax_-1; ilevel > 0; --ilevel )
+        {
+            dx = 1.0/(1ul<<(ilevel));
+            pellip_[ ilevel-1 ] = new min_ellipsoid( *pellip_[ ilevel ] );
+            pellip_[ ilevel-1 ]->expand_ellipsoid( npad*dx );
+        }
        
        
        
@ -617,12 +676,21 @@ public:
    
    ~region_ellipsoid_plugin()
    {
-        delete pellip_;
+        for( unsigned i=0; i<=levelmax_; ++i )
+            if( pellip_[i] != NULL )
+                delete pellip_[i];
    }
    
    void get_AABB( double *left, double *right, unsigned level )
    {
-        pellip_->get_AABB( left, right );
+      if( level <= levelmin_ )
+        {
+            left[0] = left[1] = left[2] = 0.0;
+            right[0] = right[1] = right[2] = 1.0;
+            return;
+        }
+        
+        pellip_[level-1]->get_AABB( left, right );
      
        double dx = 1.0/(1ul<<level);
        double pad = (double)(padding_+1) * dx;
@ -645,8 +713,10 @@ public:
      // it might have enlarged it, but who cares...
    }
  
-    bool query_point( double *x )
-    {   return pellip_->check_point( x );   }
+    bool query_point( double *x, int level )
+    {
+        return pellip_[level]->check_point( x );
+    }
    
    bool is_grid_dim_forced( size_t* ndims )
    {   return false;   }
@ -654,7 +724,7 @@ public:
    void get_center( double *xc )
    {
        float c[3];
-        pellip_->get_center( c );
+        pellip_[levelmax_]->get_center( c );
        
        xc[0] = c[0];
        xc[1] = c[1];
@ -666,7 +736,7 @@ public:
    double dx = 1.0/(1<<shift_level);
    float c[3];

-    pellip_->get_center( c );        
+    pellip_[levelmax_]->get_center( c );

    xc[0] = c[0]+shift[0]*dx;
    xc[1] = c[1]+shift[1]*dx;
--- a/random.cc
+++ b/random.cc
@ -12,6 +12,57 @@

 // TODO: move all this into a plugin!!!

+std::map< std::string, RNG_plugin_creator *>& 
+get_RNG_plugin_map()
+{
+  static std::map< std::string, RNG_plugin_creator* > RNG_plugin_map;
+  return RNG_plugin_map;
+}
+
+void print_RNG_plugins()
+{
+  std::map< std::string, RNG_plugin_creator *>& m = get_RNG_plugin_map();
+  std::map< std::string, RNG_plugin_creator *>::iterator it;
+  it = m.begin();
+  std::cout << " - Available random number generator plug-ins:\n";
+  while( it!=m.end() )
+    {
+      if( (*it).second )
+	std::cout << "\t\'" << (*it).first << "\'\n";
+      ++it;
+    }	
+}
+
+RNG_plugin *select_RNG_plugin( config_file& cf )
+{
+	std::string rngname = cf.getValueSafe<std::string>( "random", "generator", "MUSIC" );
+	
+	RNG_plugin_creator *the_RNG_plugin_creator 
+	= get_RNG_plugin_map()[ rngname ];
+	
+	if( !the_RNG_plugin_creator )
+	{	
+		std::cerr << " - Error: random number generator plug-in \'" << rngname << "\' not found." << std::endl;
+		LOGERR("Invalid/Unregistered random number generator plug-in encountered : %s",rngname.c_str() );
+		print_RNG_plugins();
+		throw std::runtime_error("Unknown random number generator plug-in");
+		
+	}else
+	{	
+		std::cout << " - Selecting random number generator plug-in \'" << rngname << "\'..." << std::endl;
+		LOGUSER("Selecting random number generator plug-in  : %s",rngname.c_str() );
+	}
+	
+	RNG_plugin *the_RNG_plugin 
+	= the_RNG_plugin_creator->create( cf );
+	
+	return the_RNG_plugin;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////
+

 #if defined(FFTW3) && defined( SINGLE_PRECISION)
 //#define fftw_complex fftwf_complex
@ -524,12 +575,23 @@ random_numbers<T>::random_numbers( /*const*/ random_numbers <T>& rc, bool kdegra
 					if( j > nyc/2 ) jj += ny/2;
 					
 					size_t qc,qf;
+
+					double kx = (i <= (int)nxc/2)? (double)i : (double)(i-(int)nxc);
+					double ky = (j <= (int)nyc/2)? (double)j : (double)(j-(int)nyc);
+					double kz = (k <= (int)nzc/2)? (double)k : (double)(k-(int)nzc);
+					
 					
 					qc = ((size_t)i*nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
 					qf = ((size_t)ii*ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
+
+					std::complex<double> val_fine(RE(cfine[qf]),IM(cfine[qf]));
+					double phase =  (kx/nxc + ky/nyc + kz/nzc) * 0.5 * M_PI;
+					std::complex<double> val_phas( cos(phase), sin(phase) );
+
+					val_fine *= val_phas * fftnorm/sqrt(8.0);
 					
-					RE(ccoarse[qc]) = 1.0/sqrt(8.0)*RE(cfine[qf])*fftnorm;
-					IM(ccoarse[qc]) = 1.0/sqrt(8.0)*IM(cfine[qf])*fftnorm;
+					RE(ccoarse[qc]) = val_fine.real();
+					IM(ccoarse[qc]) = val_fine.imag();
 				}
 		
 		delete[] rfine;
@ -647,7 +709,7 @@ random_numbers<T>::random_numbers( /*const*/ random_numbers <T>& rc, bool kdegra

 template< typename T >
 random_numbers<T>::random_numbers( random_numbers<T>& rc, unsigned cubesize, long baseseed, 
-								   bool kspace, int *x0_, int *lx_, bool zeromean )
+				   bool kspace,bool isolated, int *x0_, int *lx_, bool zeromean )
 : res_( 2*rc.res_ ), cubesize_( cubesize ), ncubes_( 1 ), baseseed_( baseseed )
 {
 	initialize();
@ -675,163 +737,210 @@ random_numbers<T>::random_numbers( random_numbers<T>& rc, unsigned cubesize, lon
 	if( kspace )
 	{
 		
-		LOGINFO("Generating a constrained random number set with seed %ld\n    using coarse mode replacement...",baseseed);
-		assert(lx[0]%2==0 && lx[1]%2==0 && lx[2]%2==0);
-		size_t nx=lx[0], ny=lx[1], nz=lx[2],
-		nxc=lx[0]/2, nyc=lx[1]/2, nzc=lx[2]/2;
+	  LOGINFO("lx = %d,%d,%d  x0 = %d,%d,%d",lx[0],lx[1],lx[2],x0[0],x0[1],x0[2]);
+	  LOGINFO("Generating a constrained random number set with seed %ld\n    using coarse mode replacement...",baseseed);
+	  assert(lx[0]%2==0 && lx[1]%2==0 && lx[2]%2==0);
+	  size_t nx=lx[0], ny=lx[1], nz=lx[2],
+	    nxc=lx[0]/2, nyc=lx[1]/2, nzc=lx[2]/2;
+	  
 		
-		
-		fftw_real *rfine = new fftw_real[nx*ny*(nz+2l)];
-		fftw_complex *cfine = reinterpret_cast<fftw_complex*> (rfine);
-		
-#ifdef FFTW3
-	#ifdef SINGLE_PRECISION
-		fftwf_plan
-			pf  = fftwf_plan_dft_r2c_3d( nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
-			ipf	= fftwf_plan_dft_c2r_3d( nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
-	#else
-		fftw_plan
-			pf  = fftw_plan_dft_r2c_3d( nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
-			ipf	= fftw_plan_dft_c2r_3d( nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
-	#endif
-#else
-		rfftwnd_plan 
-			pf	= rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
-			ipf	= rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
-#endif
-		
-#pragma omp parallel for
-		for( int i=0; i<(int)nx; i++ )
-			for( int j=0; j<(int)ny; j++ )
-				for( int k=0; k<(int)nz; k++ )
-				{
-					size_t q = ((size_t)i*(size_t)ny+(size_t)j)*(size_t)(nz+2)+(size_t)k;
-					rfine[q] = (*this)(x0[0]+i,x0[1]+j,x0[2]+k);
-				}
-		//this->free_all_mem();	// temporarily free memory, allocate again later
-		
-		
-		
-		fftw_real *rcoarse = new fftw_real[nxc*nyc*(nzc+2)];
-		fftw_complex *ccoarse = reinterpret_cast<fftw_complex*> (rcoarse);
+	  fftw_real *rfine = new fftw_real[nx*ny*(nz+2l)];
+	  fftw_complex *cfine = reinterpret_cast<fftw_complex*> (rfine);
 		
 #ifdef FFTW3
 #ifdef SINGLE_PRECISION
-		fftwf_plan pc  = fftwf_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE);
+	  fftwf_plan
+	    pf  = fftwf_plan_dft_r2c_3d( nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
+	    ipf	= fftwf_plan_dft_c2r_3d( nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
 #else
-		fftw_plan pc  = fftw_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE);
+	  fftw_plan
+	    pf  = fftw_plan_dft_r2c_3d( nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
+	    ipf	= fftw_plan_dft_c2r_3d( nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
 #endif
 #else
-		rfftwnd_plan pc	= rfftw3d_create_plan( nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
-#endif		
-		
+	  rfftwnd_plan 
+	    pf	= rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+	    ipf	= rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+#endif
+	  
 #pragma omp parallel for
-		for( int i=0; i<(int)nxc; i++ )
-			for( int j=0; j<(int)nyc; j++ )
-				for( int k=0; k<(int)nzc; k++ )
-				{
-					size_t q = ((size_t)i*(size_t)nyc+(size_t)j)*(size_t)(nzc+2)+(size_t)k;
-					rcoarse[q] = rc(x0[0]/2+i,x0[1]/2+j,x0[2]/2+k);
-				}
+	  for( int i=0; i<(int)nx; i++ )
+	    for( int j=0; j<(int)ny; j++ )
+	      for( int k=0; k<(int)nz; k++ )
+		{
+		  size_t q = ((size_t)i*(size_t)ny+(size_t)j)*(size_t)(nz+2)+(size_t)k;
+		  rfine[q] = (*this)(x0[0]+i,x0[1]+j,x0[2]+k);
+		}
+	  //this->free_all_mem();	// temporarily free memory, allocate again later
+		
+		
+		
+	  fftw_real *rcoarse = new fftw_real[nxc*nyc*(nzc+2)];
+	  fftw_complex *ccoarse = reinterpret_cast<fftw_complex*> (rcoarse);
+		
 #ifdef FFTW3
-	#ifdef SINGLE_PRECISION
-		fftwf_execute( pc );
-		fftwf_execute( pf );
-	#else
-		fftw_execute( pc );
-		fftw_execute( pf );	
-	#endif
+#ifdef SINGLE_PRECISION
+	  fftwf_plan pc  = fftwf_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE);
+#else
+	  fftw_plan pc  = fftw_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE);
+#endif
+#else
+	  rfftwnd_plan pc	= rfftw3d_create_plan( nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
+#endif		
+	  
+#pragma omp parallel for
+	  for( int i=0; i<(int)nxc; i++ )
+	    for( int j=0; j<(int)nyc; j++ )
+	      for( int k=0; k<(int)nzc; k++ )
+		{
+		  size_t q = ((size_t)i*(size_t)nyc+(size_t)j)*(size_t)(nzc+2)+(size_t)k;
+		  rcoarse[q] = rc(x0[0]/2+i,x0[1]/2+j,x0[2]/2+k);
+		}
+#ifdef FFTW3
+#ifdef SINGLE_PRECISION
+	  fftwf_execute( pc );
+	  fftwf_execute( pf );
+#else
+	  fftw_execute( pc );
+	  fftw_execute( pf );	
+#endif
 #else
 #ifndef SINGLETHREAD_FFTW		
-		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pc, rcoarse, NULL );
-		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pf, rfine, NULL );
+	  rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pc, rcoarse, NULL );
+	  rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pf, rfine, NULL );
 #else
-		rfftwnd_one_real_to_complex( pc, rcoarse, NULL );
-		rfftwnd_one_real_to_complex( pf, rfine, NULL );
+	  rfftwnd_one_real_to_complex( pc, rcoarse, NULL );
+	  rfftwnd_one_real_to_complex( pf, rfine, NULL );
 #endif
 #endif
-		
-		double fftnorm = 1.0/((double)nx*(double)ny*(double)nz);
-        double sqrt8 = sqrt(8.0);
+	  
+	  double fftnorm = 1.0/((double)nx*(double)ny*(double)nz);
+	  double sqrt8 = sqrt(8.0);
+        double phasefac = -0.5;//-1.0;//-0.125;
+
+	  //if( isolated ) phasefac *= 1.5;

        // embedding of coarse white noise by fourier interpolation
        
        // 0 0
-        #pragma omp parallel for
-		for( int i=0; i<(int)nxc/2+1; i++ )
-			for( int j=0; j<(int)nyc/2+1; j++ )
-				for( int k=0; k<(int)nzc/2+1; k++ )
-                {
-                    int ii(i),jj(j),kk(k);
-                    size_t qc,qf;
-					qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
-					qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
-                    
-					RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
-					IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
-                }
+#pragma omp parallel for
+	for( int i=0; i<(int)nxc/2+1; i++ )
+	  for( int j=0; j<(int)nyc/2+1; j++ )
+	    for( int k=0; k<(int)nzc/2+1; k++ )
+	      {
+		int ii(i),jj(j),kk(k);
+		size_t qc,qf;
+		qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+		qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
+
+		double kx = (i <= (int)nxc/2)? (double)i : (double)(i-(int)nxc);
+		double ky = (j <= (int)nyc/2)? (double)j : (double)(j-(int)nyc);
+		double kz = (k <= (int)nzc/2)? (double)k : (double)(k-(int)nzc);
+					
+		double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+		std::complex<double> val_phas( cos(phase), sin(phase) );
+
+		std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+		val *= sqrt8 * val_phas;
+                
+		RE(cfine[qf]) = val.real();
+		IM(cfine[qf]) = val.imag();
+
+		//if( k==0 & (i==(int)nxc/2 || j==(int)nyc/2) )
+		//  IM(cfine[qf]) *= -1.0;
+	      }
        // 1 0
-        #pragma omp parallel for
-		for( int i=nxc/2; i<(int)nxc; i++ )
-			for( int j=0; j<(int)nyc/2+1; j++ )
-				for( int k=0; k<(int)nzc/2+1; k++ )
-                {
-                    int ii(i+nx/2),jj(j),kk(k);
-                    size_t qc,qf;
-					qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
-					qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
-                    
-					RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
-					IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
-                    
-                    if( i==(int)nxc/2 || j==(int)nyc/2 )
-                        IM(cfine[qf]) *= -1.0;
-                }
+#pragma omp parallel for
+	for( int i=nxc/2; i<(int)nxc; i++ )
+	  for( int j=0; j<(int)nyc/2+1; j++ )
+	    for( int k=0; k<(int)nzc/2+1; k++ )
+	      {
+		int ii(i+nx/2),jj(j),kk(k);
+		size_t qc,qf;
+		qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+		qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
+                
+		double kx = (i <= (int)nxc/2)? (double)i : (double)(i-(int)nxc);
+		double ky = (j <= (int)nyc/2)? (double)j : (double)(j-(int)nyc);
+		double kz = (k <= (int)nzc/2)? (double)k : (double)(k-(int)nzc);
+					
+		double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+		std::complex<double> val_phas( cos(phase), sin(phase) );
+
+		std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+		val *= sqrt8 * val_phas;
+                
+		RE(cfine[qf]) = val.real();
+		IM(cfine[qf]) = val.imag();
+                
+		//if( k==0 & (i==(int)nxc/2 || j==(int)nyc/2) )
+		//IM(cfine[qf]) *= -1.0;
+	      }
        // 0 1
-        #pragma omp parallel for
-		for( int i=0; i<(int)nxc/2+1; i++ )
-			for( int j=nyc/2; j<(int)nyc; j++ )
-				for( int k=0; k<(int)nzc/2+1; k++ )
-                {
-                    int ii(i),jj(j+ny/2),kk(k);
-                    size_t qc,qf;
-					qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
-					qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
-                    
-					RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
-					IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
-                    
-                    if( i==(int)nxc/2 || j==(int)nyc/2 )
-                        IM(cfine[qf]) *= -1.0;
-                }
+#pragma omp parallel for
+	for( int i=0; i<(int)nxc/2+1; i++ )
+	  for( int j=nyc/2; j<(int)nyc; j++ )
+	    for( int k=0; k<(int)nzc/2+1; k++ )
+	      {
+		int ii(i),jj(j+ny/2),kk(k);
+		size_t qc,qf;
+		qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+		qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
+                
+		double kx = (i <= (int)nxc/2)? (double)i : (double)(i-(int)nxc);
+		double ky = (j <= (int)nyc/2)? (double)j : (double)(j-(int)nyc);
+		double kz = (k <= (int)nzc/2)? (double)k : (double)(k-(int)nzc);
+					
+		double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc)  * M_PI;
+		std::complex<double> val_phas( cos(phase), sin(phase) );
+
+		std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+		val *= sqrt8 * val_phas;
+                
+		RE(cfine[qf]) = val.real();
+		IM(cfine[qf]) = val.imag();
+                
+		//if( k==0 && (i==(int)nxc/2 || j==(int)nyc/2) )
+		//  IM(cfine[qf]) *= -1.0;
+	      }
        
        // 1 1
-        #pragma omp parallel for
-		for( int i=nxc/2; i<(int)nxc; i++ )
-			for( int j=nyc/2; j<(int)nyc; j++ )
-				for( int k=0; k<(int)nzc/2+1; k++ )
-                {
-                    int ii(i+nx/2),jj(j+ny/2),kk(k);
-                    size_t qc,qf;
-					qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
-					qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
-                    
-					RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
-					IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
-                }
-        		
-		delete[] rcoarse;
-		
-		#pragma omp parallel for
-		for( int i=0; i<(int)nx; i++ )
-			for( int j=0; j<(int)ny; j++ )
-				for( int k=0; k<(int)nz/2+1; k++ )
-				{
-					size_t q = ((size_t)i*ny+(size_t)j)*(nz/2+1)+(size_t)k;
+#pragma omp parallel for
+	for( int i=nxc/2; i<(int)nxc; i++ )
+	  for( int j=nyc/2; j<(int)nyc; j++ )
+	    for( int k=0; k<(int)nzc/2+1; k++ )
+	      {
+		int ii(i+nx/2),jj(j+ny/2),kk(k);
+		size_t qc,qf;
+		qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+		qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
+                
+		double kx = (i <= (int)nxc/2)? (double)i : (double)(i-(int)nxc);
+		double ky = (j <= (int)nyc/2)? (double)j : (double)(j-(int)nyc);
+		double kz = (k <= (int)nzc/2)? (double)k : (double)(k-(int)nzc);
+					
+		double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+		std::complex<double> val_phas( cos(phase), sin(phase) );

-					RE(cfine[q]) *= fftnorm;
-					IM(cfine[q]) *= fftnorm;
-				}
+		std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+		val *= sqrt8 * val_phas;
+                
+		RE(cfine[qf]) = val.real();
+		IM(cfine[qf]) = val.imag();
+	      }
+        
+	delete[] rcoarse;
+	
+#pragma omp parallel for
+	for( int i=0; i<(int)nx; i++ )
+	  for( int j=0; j<(int)ny; j++ )
+	    for( int k=0; k<(int)nz/2+1; k++ )
+	      {
+		size_t q = ((size_t)i*ny+(size_t)j)*(nz/2+1)+(size_t)k;
+		
+		RE(cfine[q]) *= fftnorm;
+		IM(cfine[q]) *= fftnorm;
+	      }

 		
 		
@ -1269,190 +1378,188 @@ void random_number_generator<rng,T>::parse_rand_parameters( void )
 template< typename rng, typename T >
 void random_number_generator<rng,T>::correct_avg( int icoarse, int ifine )
 {
-	int shift[3], levelmin_poisson;
-	shift[0] = pcf_->getValue<int>("setup","shift_x");
-	shift[1] = pcf_->getValue<int>("setup","shift_y");
-	shift[2] = pcf_->getValue<int>("setup","shift_z");
-	
-	levelmin_poisson = pcf_->getValue<unsigned>("setup","levelmin");
-	
-	int lfacc = 1<<(icoarse-levelmin_poisson);
-	
-	
-	
-	
-	int nc[3], i0c[3], nf[3], i0f[3];
-	if( icoarse != levelmin_ )
-	{
-		nc[0]  = 2*prefh_->size(icoarse, 0);
-		nc[1]  = 2*prefh_->size(icoarse, 1);
-		nc[2]  = 2*prefh_->size(icoarse, 2);
-		i0c[0] = prefh_->offset_abs(icoarse, 0) - lfacc*shift[0] - nc[0]/4;
-		i0c[1] = prefh_->offset_abs(icoarse, 1) - lfacc*shift[1] - nc[1]/4;
-		i0c[2] = prefh_->offset_abs(icoarse, 2) - lfacc*shift[2] - nc[2]/4;
-		
-	}
-	else
-	{
-		nc[0]  = prefh_->size(icoarse, 0);
-		nc[1]  = prefh_->size(icoarse, 1);
-		nc[2]  = prefh_->size(icoarse, 2);
-		i0c[0] = - lfacc*shift[0];
-		i0c[1] = - lfacc*shift[1];
-		i0c[2] = - lfacc*shift[2];
-	}
-	nf[0]  = 2*prefh_->size(ifine, 0);
-	nf[1]  = 2*prefh_->size(ifine, 1);
-	nf[2]  = 2*prefh_->size(ifine, 2);
-	i0f[0] = prefh_->offset_abs(ifine, 0) - 2*lfacc*shift[0] - nf[0]/4;
-	i0f[1] = prefh_->offset_abs(ifine, 1) - 2*lfacc*shift[1] - nf[1]/4;
-	i0f[2] = prefh_->offset_abs(ifine, 2) - 2*lfacc*shift[2] - nf[2]/4;
-	
-	//.................................
-	if( disk_cached_ )
-	{
-		char fncoarse[128], fnfine[128];
-		sprintf(fncoarse,"wnoise_%04d.bin",icoarse);
-		sprintf(fnfine,"wnoise_%04d.bin",ifine);
-		
-		std::ifstream 
-		iffine( fnfine, std::ios::binary ), 
-		ifcoarse( fncoarse, std::ios::binary );
-		
-		int nxc,nyc,nzc,nxf,nyf,nzf;
-		iffine.read( reinterpret_cast<char*> (&nxf), sizeof(unsigned) );
-		iffine.read( reinterpret_cast<char*> (&nyf), sizeof(unsigned) );
-		iffine.read( reinterpret_cast<char*> (&nzf), sizeof(unsigned) );
-		
-		ifcoarse.read( reinterpret_cast<char*> (&nxc), sizeof(unsigned) );
-		ifcoarse.read( reinterpret_cast<char*> (&nyc), sizeof(unsigned) );
-		ifcoarse.read( reinterpret_cast<char*> (&nzc), sizeof(unsigned) );
-		
-		if( nxf!=nf[0] || nyf!=nf[1] || nzf!=nf[2] || nxc!=nc[0] || nyc!=nc[1] || nzc!=nc[2] )
-		{
+    int shift[3], levelmin_poisson;
+    shift[0] = pcf_->getValue<int>("setup","shift_x");
+    shift[1] = pcf_->getValue<int>("setup","shift_y");
+    shift[2] = pcf_->getValue<int>("setup","shift_z");
+    
+    levelmin_poisson = pcf_->getValue<unsigned>("setup","levelmin");
+    
+    int lfacc = 1<<(icoarse-levelmin_poisson);
+    
+    
+    
+    
+    int nc[3], i0c[3], nf[3], i0f[3];
+    if( icoarse != levelmin_ )
+    {
+        nc[0]  = 2*prefh_->size(icoarse, 0);
+        nc[1]  = 2*prefh_->size(icoarse, 1);
+        nc[2]  = 2*prefh_->size(icoarse, 2);
+        i0c[0] = prefh_->offset_abs(icoarse, 0) - lfacc*shift[0] - nc[0]/4;
+        i0c[1] = prefh_->offset_abs(icoarse, 1) - lfacc*shift[1] - nc[1]/4;
+        i0c[2] = prefh_->offset_abs(icoarse, 2) - lfacc*shift[2] - nc[2]/4;
+        
+    }
+    else
+    {
+        nc[0]  = prefh_->size(icoarse, 0);
+        nc[1]  = prefh_->size(icoarse, 1);
+        nc[2]  = prefh_->size(icoarse, 2);
+        i0c[0] = - lfacc*shift[0];
+        i0c[1] = - lfacc*shift[1];
+        i0c[2] = - lfacc*shift[2];
+    }
+    nf[0]  = 2*prefh_->size(ifine, 0);
+    nf[1]  = 2*prefh_->size(ifine, 1);
+    nf[2]  = 2*prefh_->size(ifine, 2);
+    i0f[0] = prefh_->offset_abs(ifine, 0) - 2*lfacc*shift[0] - nf[0]/4;
+    i0f[1] = prefh_->offset_abs(ifine, 1) - 2*lfacc*shift[1] - nf[1]/4;
+    i0f[2] = prefh_->offset_abs(ifine, 2) - 2*lfacc*shift[2] - nf[2]/4;
+    
+    //.................................
+    if( disk_cached_ )
+    {
+        char fncoarse[128], fnfine[128];
+        sprintf(fncoarse,"wnoise_%04d.bin",icoarse);
+        sprintf(fnfine,"wnoise_%04d.bin",ifine);
+        
+        std::ifstream
+        iffine( fnfine, std::ios::binary ),
+        ifcoarse( fncoarse, std::ios::binary );
+        
+        int nxc,nyc,nzc,nxf,nyf,nzf;
+        iffine.read( reinterpret_cast<char*> (&nxf), sizeof(unsigned) );
+        iffine.read( reinterpret_cast<char*> (&nyf), sizeof(unsigned) );
+        iffine.read( reinterpret_cast<char*> (&nzf), sizeof(unsigned) );
+        
+        ifcoarse.read( reinterpret_cast<char*> (&nxc), sizeof(unsigned) );
+        ifcoarse.read( reinterpret_cast<char*> (&nyc), sizeof(unsigned) );
+        ifcoarse.read( reinterpret_cast<char*> (&nzc), sizeof(unsigned) );
+        
+        if( nxf!=nf[0] || nyf!=nf[1] || nzf!=nf[2] || nxc!=nc[0] || nyc!=nc[1] || nzc!=nc[2] )
+        {
            LOGERR("White noise file mismatch. This should not happen. Notify a developer!");
            throw std::runtime_error("White noise file mismatch. This should not happen. Notify a developer!");
        }
-		int nxd(nxf/2),nyd(nyf/2),nzd(nzf/2);
-		std::vector<T> deg_rand( (size_t)nxd*(size_t)nyd*(size_t)nzd, 0.0 );
-		double fac = 1.0/sqrt(8.0);
-		
-		for( int i=0, ic=0; i<nxf; i+=2, ic++ )
-		{	
-			std::vector<T> fine_rand( 2*nyf*nzf, 0.0 );
-			iffine.read( reinterpret_cast<char*> (&fine_rand[0]), 2*nyf*nzf*sizeof(T) );
-			
+        int nxd(nxf/2),nyd(nyf/2),nzd(nzf/2);
+        std::vector<T> deg_rand( (size_t)nxd*(size_t)nyd*(size_t)nzd, 0.0 );
+        double fac = 1.0/sqrt(8.0);
+        
+        for( int i=0, ic=0; i<nxf; i+=2, ic++ )
+        {
+            std::vector<T> fine_rand( 2*nyf*nzf, 0.0 );
+            iffine.read( reinterpret_cast<char*> (&fine_rand[0]), 2*nyf*nzf*sizeof(T) );
+            
 #pragma omp parallel for
-			for( int j=0; j<nyf; j+=2 )
-				for( int k=0; k<nzf; k+=2 )
-				{
-					int jc = j/2, kc = k/2;
-					//size_t qc = (((size_t)i/2)*(size_t)nyd+((size_t)j/2))*(size_t)nzd+((size_t)k/2);
-					size_t qc = ((size_t)(ic*nyd+jc))*(size_t)nzd+(size_t)kc;
-					
-					size_t qf[8];
-					qf[0] = (0*(size_t)nyf+(size_t)j+0)*(size_t)nzf+(size_t)k+0;
-					qf[1] = (0*(size_t)nyf+(size_t)j+0)*(size_t)nzf+(size_t)k+1;
-					qf[2] = (0*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+0;
-					qf[3] = (0*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+1;
-					qf[4] = (1*(size_t)nyf+(size_t)j+0)*(size_t)nzf+(size_t)k+0;
-					qf[5] = (1*(size_t)nyf+(size_t)j+0)*(size_t)nzf+(size_t)k+1;
-					qf[6] = (1*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+0;
-					qf[7] = (1*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+1;
-					
-					double d = 0.0;
-					for( int q=0; q<8; ++q )
-						d += fac*fine_rand[qf[q]];
-					
-					//deg_rand[qc] += d;
-					deg_rand[qc] = d;
-				}
-		}
-		
-		//... now deg_rand holds the oct-averaged fine field, store this in the coarse field
-		std::vector<T> coarse_rand(nxc*nyc*nzc,0.0);
-		ifcoarse.read( reinterpret_cast<char*> (&coarse_rand[0]), nxc*nyc*nzc*sizeof(T) );
-		
-		int di,dj,dk;
-		
-		di = i0f[0]/2-i0c[0];
-		dj = i0f[1]/2-i0c[1];
-		dk = i0f[2]/2-i0c[2];
-		
+            for( int j=0; j<nyf; j+=2 )
+                for( int k=0; k<nzf; k+=2 )
+                {
+                    int jc = j/2, kc = k/2;
+                    //size_t qc = (((size_t)i/2)*(size_t)nyd+((size_t)j/2))*(size_t)nzd+((size_t)k/2);
+                    size_t qc = ((size_t)(ic*nyd+jc))*(size_t)nzd+(size_t)kc;
+                    
+                    size_t qf[8];
+                    qf[0] = (0*(size_t)nyf+(size_t)j+0)*(size_t)nzf+(size_t)k+0;
+                    qf[1] = (0*(size_t)nyf+(size_t)j+0)*(size_t)nzf+(size_t)k+1;
+                    qf[2] = (0*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+0;
+                    qf[3] = (0*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+1;
+                    qf[4] = (1*(size_t)nyf+(size_t)j+0)*(size_t)nzf+(size_t)k+0;
+                    qf[5] = (1*(size_t)nyf+(size_t)j+0)*(size_t)nzf+(size_t)k+1;
+                    qf[6] = (1*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+0;
+                    qf[7] = (1*(size_t)nyf+(size_t)j+1)*(size_t)nzf+(size_t)k+1;
+                    
+                    double d = 0.0;
+                    for( int q=0; q<8; ++q )
+                        d += fac*fine_rand[qf[q]];
+                    
+                    //deg_rand[qc] += d;
+                    deg_rand[qc] = d;
+                }
+        }
+        
+        //... now deg_rand holds the oct-averaged fine field, store this in the coarse field
+        std::vector<T> coarse_rand(nxc*nyc*nzc,0.0);
+        ifcoarse.read( reinterpret_cast<char*> (&coarse_rand[0]), nxc*nyc*nzc*sizeof(T) );
+        
+        int di,dj,dk;
+        
+        di = i0f[0]/2-i0c[0];
+        dj = i0f[1]/2-i0c[1];
+        dk = i0f[2]/2-i0c[2];
+        
 #pragma omp parallel for
-		for( int i=0; i<nxd; i++ )
-			for( int j=0; j<nyd; j++ )
-				for( int k=0; k<nzd; k++ )
-				{
-					//unsigned qc = (((i+di+nxc)%nxc)*nyc+(((j+dj+nyc)%nyc)))*nzc+((k+dk+nzc)%nzc);
-					
-					if( i+di < 0 || i+di >= nxc || j+dj < 0 || j+dj >= nyc || k+dk < 0 || k+dk >= nzc )
-						continue;
-					
-					size_t qc = (((size_t)i+(size_t)di)*(size_t)nyc+((size_t)j+(size_t)dj))*(size_t)nzc+(size_t)(k+dk);
-					size_t qcd = (size_t)(i*nyd+j)*(size_t)nzd+(size_t)k;
-					
-					coarse_rand[qc] = deg_rand[qcd];
-				}
-		
-		deg_rand.clear();
-		
-		ifcoarse.close();
-		std::ofstream ofcoarse( fncoarse, std::ios::binary|std::ios::trunc );
-		ofcoarse.write( reinterpret_cast<char*> (&nxc), sizeof(unsigned) );
-		ofcoarse.write( reinterpret_cast<char*> (&nyc), sizeof(unsigned) );
-		ofcoarse.write( reinterpret_cast<char*> (&nzc), sizeof(unsigned) );
-		ofcoarse.write( reinterpret_cast<char*> (&coarse_rand[0]), nxc*nyc*nzc*sizeof(T) );
-		ofcoarse.close();	
-	}
-	else
-	{
-		int nxc,nyc,nzc,nxf,nyf,nzf;
-		nxc = nc[0]; nyc = nc[1]; nzc = nc[2];
-		nxf = nf[0]; nyf = nf[1]; nzf = nf[2];
-		int nxd(nxf/2),nyd(nyf/2),nzd(nzf/2);
-		
-		int di,dj,dk;
-		
-		di = i0f[0]/2-i0c[0];
-		dj = i0f[1]/2-i0c[1];
-		dk = i0f[2]/2-i0c[2];
-		
-		double fac = 1.0/sqrt(8.0);
-		
+        for( int i=0; i<nxd; i++ )
+            for( int j=0; j<nyd; j++ )
+                for( int k=0; k<nzd; k++ )
+                {
+                    //unsigned qc = (((i+di+nxc)%nxc)*nyc+(((j+dj+nyc)%nyc)))*nzc+((k+dk+nzc)%nzc);
+                    
+                    if( i+di < 0 || i+di >= nxc || j+dj < 0 || j+dj >= nyc || k+dk < 0 || k+dk >= nzc )
+                        continue;
+                    
+                    size_t qc = (((size_t)i+(size_t)di)*(size_t)nyc+((size_t)j+(size_t)dj))*(size_t)nzc+(size_t)(k+dk);
+                    size_t qcd = (size_t)(i*nyd+j)*(size_t)nzd+(size_t)k;
+                    
+                    coarse_rand[qc] = deg_rand[qcd];
+                }
+        
+        deg_rand.clear();
+        
+        ifcoarse.close();
+        std::ofstream ofcoarse( fncoarse, std::ios::binary|std::ios::trunc );
+        ofcoarse.write( reinterpret_cast<char*> (&nxc), sizeof(unsigned) );
+        ofcoarse.write( reinterpret_cast<char*> (&nyc), sizeof(unsigned) );
+        ofcoarse.write( reinterpret_cast<char*> (&nzc), sizeof(unsigned) );
+        ofcoarse.write( reinterpret_cast<char*> (&coarse_rand[0]), nxc*nyc*nzc*sizeof(T) );
+        ofcoarse.close();
+    }
+    else
+    {
+        int nxc,nyc,nzc,nxf,nyf,nzf;
+        nxc = nc[0]; nyc = nc[1]; nzc = nc[2];
+        nxf = nf[0]; nyf = nf[1]; nzf = nf[2];
+        int nxd(nxf/2),nyd(nyf/2),nzd(nzf/2);
+        
+        int di,dj,dk;
+        
+        di = i0f[0]/2-i0c[0];
+        dj = i0f[1]/2-i0c[1];
+        dk = i0f[2]/2-i0c[2];
+        
+        double fac = 1.0/sqrt(8.0);
+        
 #pragma omp parallel for
-		for( int i=0; i<nxd; i++ )
-			for( int j=0; j<nyd; j++ )
-				for( int k=0; k<nzd; k++ )
-				{
-					if( i+di < 0 || i+di >= nxc || j+dj < 0 || j+dj >= nyc || k+dk < 0 || k+dk >= nzc )
-						continue;
-					
-					size_t qf[8];
-					qf[0] = (size_t)((2*i+0)*nyf+2*j+0)*(size_t)nzf+(size_t)(2*k+0);
-					qf[1] = (size_t)((2*i+0)*nyf+2*j+0)*(size_t)nzf+(size_t)(2*k+1);
-					qf[2] = (size_t)((2*i+0)*nyf+2*j+1)*(size_t)nzf+(size_t)(2*k+0);
-					qf[3] = (size_t)((2*i+0)*nyf+2*j+1)*(size_t)nzf+(size_t)(2*k+1);
-					qf[4] = (size_t)((2*i+1)*nyf+2*j+0)*(size_t)nzf+(size_t)(2*k+0);
-					qf[5] = (size_t)((2*i+1)*nyf+2*j+0)*(size_t)nzf+(size_t)(2*k+1);
-					qf[6] = (size_t)((2*i+1)*nyf+2*j+1)*(size_t)nzf+(size_t)(2*k+0);
-					qf[7] = (size_t)((2*i+1)*nyf+2*j+1)*(size_t)nzf+(size_t)(2*k+1);
-					
-					double finesum = 0.0;
-					for( int q=0; q<8; ++q )
-						finesum += fac*(*mem_cache_[ifine-levelmin_])[qf[q]];
-					
-					size_t qc = ((size_t)(i+di)*nyc+(size_t)(j+dj))*(size_t)nzc+(size_t)(k+dk);
-					
-					(*mem_cache_[icoarse-levelmin_])[qc] = finesum;
-				}						
-	}
-	
-	
+        for( int i=0; i<nxd; i++ )
+            for( int j=0; j<nyd; j++ )
+                for( int k=0; k<nzd; k++ )
+                {
+                    if( i+di < 0 || i+di >= nxc || j+dj < 0 || j+dj >= nyc || k+dk < 0 || k+dk >= nzc )
+                        continue;
+                    
+                    size_t qf[8];
+                    qf[0] = (size_t)((2*i+0)*nyf+2*j+0)*(size_t)nzf+(size_t)(2*k+0);
+                    qf[1] = (size_t)((2*i+0)*nyf+2*j+0)*(size_t)nzf+(size_t)(2*k+1);
+                    qf[2] = (size_t)((2*i+0)*nyf+2*j+1)*(size_t)nzf+(size_t)(2*k+0);
+                    qf[3] = (size_t)((2*i+0)*nyf+2*j+1)*(size_t)nzf+(size_t)(2*k+1);
+                    qf[4] = (size_t)((2*i+1)*nyf+2*j+0)*(size_t)nzf+(size_t)(2*k+0);
+                    qf[5] = (size_t)((2*i+1)*nyf+2*j+0)*(size_t)nzf+(size_t)(2*k+1);
+                    qf[6] = (size_t)((2*i+1)*nyf+2*j+1)*(size_t)nzf+(size_t)(2*k+0);
+                    qf[7] = (size_t)((2*i+1)*nyf+2*j+1)*(size_t)nzf+(size_t)(2*k+1);
+                    
+                    double finesum = 0.0;
+                    for( int q=0; q<8; ++q )
+                        finesum += fac*(*mem_cache_[ifine-levelmin_])[qf[q]];
+                    
+                    size_t qc = ((size_t)(i+di)*nyc+(size_t)(j+dj))*(size_t)nzc+(size_t)(k+dk);
+                    
+                    (*mem_cache_[icoarse-levelmin_])[qc] = finesum;
+                }
+    }
+    
+    
 }

-
-
 template< typename rng, typename T >
 void random_number_generator<rng,T>::compute_random_numbers( void )
 {
@ -1467,7 +1574,7 @@ void random_number_generator<rng,T>::compute_random_numbers( void )
 	if( levelmin_seed_ < levelmin_ )
 	{
 		if( rngfnames_[levelmin_seed_].size() > 0 )
-			randc[levelmin_seed_] 
+			randc[levelmin_seed_]
 			= new rng( 1<<levelmin_seed_, rngfnames_[levelmin_seed_], rndsign );
 		else
 			randc[levelmin_seed_]
@ -1561,13 +1668,13 @@ void random_number_generator<rng,T>::compute_random_numbers( void )
 		x0[2] = prefh_->offset_abs(ilevel, 2) - lfac*shift[2] - lx[2]/4;
 		
 		if( randc[ilevel] == NULL )
-			randc[ilevel] = new rng( *randc[ilevel-1], ran_cube_size_, rngseeds_[ilevel], kavg, x0, lx );
+		  randc[ilevel] = new rng( *randc[ilevel-1], ran_cube_size_, rngseeds_[ilevel], kavg, ilevel==levelmin_+1, x0, lx );
 		delete randc[ilevel-1];
 		randc[ilevel-1] = NULL;
 		
 		//... apply constraints to this level, if any
 		//if( ilevel == levelmax_ )
-		constraints.apply( ilevel, x0, lx, randc[ilevel] );
+		//constraints.apply( ilevel, x0, lx, randc[ilevel] );
 		
 		//... store numbers
 		store_rnd( ilevel, randc[ilevel] );
@ -1575,15 +1682,11 @@ void random_number_generator<rng,T>::compute_random_numbers( void )
 	
 	delete randc[levelmax_];
 	randc[levelmax_] = NULL;
-	
-	//... make sure that the coarse grid contains oct averages where it overlaps with a fine grid
-	//... this also ensures that constraints enforced on fine grids are carried to the coarser grids
-	for( int ilevel=levelmax_; ilevel>levelmin_; --ilevel )
-		correct_avg( ilevel-1, ilevel );
-	
-	
-	
-	
+    
+    //... make sure that the coarse grid contains oct averages where it overlaps with a fine grid
+    //... this also ensures that constraints enforced on fine grids are carried to the coarser grids
+    //for( int ilevel=levelmax_; ilevel>levelmin_; --ilevel )
+    //    correct_avg( ilevel-1, ilevel );
 	
 	//.. we do not have random numbers for a coarse level, generate them
 	/*if( levelmax_rand_ >= (int)levelmin_ )
--- a/random.hh
+++ b/random.hh
@ -32,6 +32,49 @@
 #include "constraints.hh"


+class RNG_plugin{
+protected:
+  config_file *pcf_;		//!< pointer to config_file from which to read parameters
+public:
+  explicit RNG_plugin( config_file& cf )
+  : pcf_( &cf )
+  { }
+  virtual ~RNG_plugin() { }
+  virtual bool is_multiscale() const  = 0; 
+};
+
+
+struct RNG_plugin_creator
+{
+  virtual RNG_plugin * create( config_file& cf ) const = 0;
+  virtual ~RNG_plugin_creator() { }
+};
+
+std::map< std::string, RNG_plugin_creator *>&
+get_RNG_plugin_map();
+
+void print_RNG_plugins( void );
+
+template< class Derived >
+struct RNG_plugin_creator_concrete : public RNG_plugin_creator
+{
+  //! register the plugin by its name
+  RNG_plugin_creator_concrete( const std::string& plugin_name )
+  {
+    get_RNG_plugin_map()[ plugin_name ] = this;
+  }
+
+  //! create an instance of the plugin
+  RNG_plugin* create( config_file& cf ) const
+  {
+    return new Derived( cf );
+  }
+};
+
+typedef RNG_plugin RNG_instance;
+RNG_plugin *select_RNG_plugin( config_file& cf );
+
+
 /*!
 * @brief encapsulates all things random number generator related
 */
@ -152,7 +195,7 @@ public:
 	
 	//! constructor for constrained fine field
 	random_numbers( random_numbers<T>& rc, unsigned cubesize, long baseseed, 
-				    bool kspace=false, int *x0_=NULL, int *lx_=NULL, bool zeromean=true );
+			bool kspace=false, bool isolated=false, int *x0_=NULL, int *lx_=NULL, bool zeromean=true );
 	
 	//! constructor
 	random_numbers( unsigned res, unsigned cubesize, long baseseed, bool zeromean=true );
@ -308,22 +351,52 @@ public:
 			
 			if( nx!=(int)A.size(0) || ny!=(int)A.size(1) || nz!=(int)A.size(2) )
 			{	
-				LOGERR("White noise file is not aligned with array. File: [%d,%d,%d]. Mem: [%d,%d,%d].",nx,ny,nz,A.size(0),A.size(1),A.size(2));
-				throw std::runtime_error("White noise file is not aligned with array. This is an internal inconsistency and bad.");
-			}
+
+			  if( nx==(int)A.size(0)*2 && ny==(int)A.size(1)*2 && nz==(int)A.size(2)*2 )
+			    {
+			      std::cerr << "CHECKPOINT" << std::endl;
+
+
+			      int ox = nx/4, oy = ny/4, oz = nz/4;
+			      std::vector<T> slice( ny*nz, 0.0 );
+
+			      for( int i=0; i<nx; ++i )
+				{
+				  ifs.read( reinterpret_cast<char*> ( &slice[0] ), ny*nz*sizeof(T) );
+			      
+				  if( i<ox ) continue;
+				  if( i>=3*ox ) break;
+
+                                  #pragma omp parallel for
+				  for( int j=oy; j<3*oy; ++j )
+				    for( int k=oz; k<3*oz; ++k )
+				      A(i-ox,j-oy,k-oz) = slice[j*nz+k];
+				}		
+			  
+			      ifs.close();	
+			    }
+			  else
+			    {
+			      LOGERR("White noise file is not aligned with array. File: [%d,%d,%d]. Mem: [%d,%d,%d].",
+				     nx,ny,nz,A.size(0),A.size(1),A.size(2));
+			      throw std::runtime_error("White noise file is not aligned with array. This is an internal inconsistency and bad.");
+			    }
+			}else{
 			
-			for( int i=0; i<nx; ++i )
-			{
-				std::vector<T> slice( ny*nz, 0.0 );
-				ifs.read( reinterpret_cast<char*> ( &slice[0] ), ny*nz*sizeof(T) );
-				
-				#pragma omp parallel for
-				for( int j=0; j<ny; ++j )
-					for( int k=0; k<nz; ++k )
-						A(i,j,k) = slice[j*nz+k];
-				
-			}		
-			ifs.close();	
+			  for( int i=0; i<nx; ++i )
+			    {
+			      std::vector<T> slice( ny*nz, 0.0 );
+			      ifs.read( reinterpret_cast<char*> ( &slice[0] ), ny*nz*sizeof(T) );
+			      
+                              #pragma omp parallel for
+			      for( int j=0; j<ny; ++j )
+				for( int k=0; k<nz; ++k )
+				  A(i,j,k) = slice[j*nz+k];
+			      
+			    }		
+			  
+			  ifs.close();	
+			}
 		}
 		else
 		{
--- a/region_generator.cc
+++ b/region_generator.cc
@ -173,8 +173,10 @@ public:
      //fprintf(stderr,"left = %f,%f,%f - right = %f,%f,%f\n",left[0],left[1],left[2],right[0],right[1],right[2]);
    }
    
-    bool query_point( double *x )
+    bool query_point( double *x, int ilevel )
    {
+        return true;
+/*
        bool check = true;
        double dx;
        for( int i=0; i<3; ++i )
@ -186,6 +188,8 @@ public:
            check &= ((dx >= padding_fine_) & (dx <= lxref_[i]-padding_fine_));
        }
        return check;
+  */      
+        
    }
    
    bool is_grid_dim_forced( size_t* ndims )
--- a/region_generator.hh
+++ b/region_generator.hh
@ -12,10 +12,14 @@
 class region_generator_plugin{
 public:
    config_file *pcf_;
+    unsigned levelmin_, levelmax_;
+    
 public:
    region_generator_plugin( config_file& cf )
    : pcf_( &cf )
    {
+        levelmin_ = cf.getValue<int>("setup","levelmin");
+        levelmax_ = cf.getValue<int>("setup","levelmax");
    }
    
    //! destructor
@ -25,7 +29,7 @@ public:
    virtual void get_AABB( double *left, double *right, unsigned level) = 0;
    
    //! query whether a point intersects the region
-    virtual bool query_point( double *x ) = 0;
+    virtual bool query_point( double *x, int level ) = 0;
    
    //! query whether the region generator explicitly forces the grid dimensions
    virtual bool is_grid_dim_forced( size_t *ndims ) = 0;
--- a/transfer_function.hh
+++ b/transfer_function.hh
@ -532,7 +532,7 @@ public:
 		
 		for( unsigned i=0; i<r.size(); ++i )
 		{
-			if( r[i] > rmin && r[i] < rmax )
+			if( r[i] > rmin/512. && r[i] < rmax )
 			{
 				xsp.push_back( log10(r[i]) );
 				ysp.push_back( T[i]*r[i]*r[i] );
@ -676,7 +676,17 @@ public:
 		y2 = m_ytable[i+1];
 		
 		//divide by r**2 because r^2 T is tabulated
-		return (real_t)((y1 + (y2-y1)*(ii-(double)i))/r2);
+		//return (real_t)((y1 + (y2-y1)*(ii-(double)i))/r2);
+        
+        real_t retval = (real_t)((y1 + (y2-y1)*(ii-(double)i))/r2);
+        
+        if( retval != retval ){
+            std::cerr << "FAILURE FAILURE FAILURE" << std::endl;
+            fprintf(stderr,"r2 = %f, r = %f, i = %d, y1 = %f, y2 = %f, xtable[i]=%f",r2,r,i,y1,y2, m_xtable[i]);
+            abort();
+        }
+        
+        return retval;
 	}
 };