initial local commit of MUSIC 2.0

2024-09-19 17:03:46 +02:00 · 2013-10-10 23:24:21 +02:00 · 2013-10-10 23:24:21 +02:00 · 7177cfe2bf
commit 7177cfe2bf
61 changed files with 33893 additions and 0 deletions
--- a/125
+++ b/125
@ -0,0 +1,125 @@
+##############################################################################
+### compile time configuration options
+FFTW3		= yes
+MULTITHREADFFTW	= yes
+SINGLEPRECISION	= no
+HAVEHDF5        = yes
+HAVEBOXLIB	= no
+BOXLIB_HOME     = ${HOME}/nyx_tot_sterben/BoxLib
+
+##############################################################################
+### compiler and path settings
+CC      = g++
+OPT     = -Wall -Wno-unknown-pragmas -O3 -g -mtune=native
+CFLAGS  =  
+LFLAGS  = -lgsl -lgslcblas 
+CPATHS  = -I. -I/opt/local/include -I/usr/local/include
+LPATHS  = -L/opt/local/lib -L/usr/local/lib
+
+##############################################################################
+# if you have FFTW 2.1.5 or 3.x with multi-thread support, you can enable the 
+# option MULTITHREADFFTW
+ifeq ($(strip $(MULTITHREADFFTW)), yes)
+  ifeq ($(CC), mpiicpc)
+    CFLAGS += -openmp
+    LFLAGS += -openmp
+  else
+    CFLAGS += -fopenmp
+    LFLAGS += -fopenmp
+  endif
+  ifeq ($(strip $(FFTW3)),yes)
+	ifeq ($(strip $(SINGLEPRECISION)), yes)
+		LFLAGS  +=  -lfftw3f_threads
+	else
+		LFLAGS  +=  -lfftw3_threads
+	endif
+  else
+    ifeq ($(strip $(SINGLEPRECISION)), yes)
+      LFLAGS  += -lsrfftw_threads -lsfftw_threads
+    else
+      LFLAGS  += -ldrfftw_threads -ldfftw_threads
+    endif
+  endif
+else
+  CFLAGS  += -DSINGLETHREAD_FFTW
+endif
+
+ifeq ($(strip $(FFTW3)),yes)
+  CFLAGS += -DFFTW3
+endif
+
+##############################################################################
+# this section makes sure that the correct FFTW libraries are linked
+ifeq ($(strip $(SINGLEPRECISION)), yes)
+  CFLAGS  += -DSINGLE_PRECISION
+  ifeq ($(FFTW3),yes)
+    LFLAGS += -lfftw3f
+  else
+    LFLAGS  += -lsrfftw -lsfftw
+  endif
+else
+  ifeq ($(strip $(FFTW3)),yes)
+    LFLAGS += -lfftw3
+  else
+    LFLAGS  += -ldrfftw -ldfftw
+  endif
+endif
+
+##############################################################################
+#if you have HDF5 installed, you can also enable the following options
+ifeq ($(strip $(HAVEHDF5)), yes)
+  OPT += -DH5_USE_16_API -DHAVE_HDF5
+  LFLAGS += -lhdf5
+endif
+
+##############################################################################
+CFLAGS += $(OPT)
+TARGET  = MUSIC
+OBJS    = output.o transfer_function.o Numerics.o defaults.o constraints.o random.o\
+		convolution_kernel.o region_generator.o densities.o cosmology.o poisson.o\
+		densities.o cosmology.o poisson.o log.o main.o \
+		$(patsubst plugins/%.cc,plugins/%.o,$(wildcard plugins/*.cc))
+
+##############################################################################
+# stuff for BoxLib
+BLOBJS = ""
+ifeq ($(strip $(HAVEBOXLIB)), yes)
+  IN_MUSIC = YES
+  TOP = ${PWD}/plugins/nyx_plugin
+  CCbla := $(CC)
+  include plugins/nyx_plugin/Make.ic
+  CC  := $(CCbla)
+  CPATHS += $(INCLUDE_LOCATIONS)
+  LPATHS += -L$(objEXETempDir)
+  BLOBJS = $(foreach obj,$(objForExecs),plugins/boxlib_stuff/$(obj))
+#
+endif
+
+##############################################################################
+all: $(OBJS) $(TARGET) Makefile
+#	cd plugins/boxlib_stuff; make
+
+bla:
+	echo $(BLOBJS)
+
+ifeq ($(strip $(HAVEBOXLIB)), yes)
+$(TARGET): $(OBJS) plugins/nyx_plugin/*.cpp
+	cd plugins/nyx_plugin; make BOXLIB_HOME=$(BOXLIB_HOME) FFTW3=$(FFTW3) SINGLE=$(SINGLEPRECISION)
+	$(CC) $(LPATHS) -o $@ $^ $(LFLAGS) $(BLOBJS) -lifcore
+else
+$(TARGET): $(OBJS)
+	$(CC) $(LPATHS) -o $@ $^ $(LFLAGS)
+endif
+
+#%.o: %.cc *.hh Makefile 
+%.o: %.cc *.hh
+	$(CC) $(CFLAGS) $(CPATHS) -c $< -o $@
+
+clean:
+	rm -rf $(OBJS)
+ifeq ($(strip $(HAVEBOXLIB)), yes)
+	oldpath=`pwd`
+	cd plugins/nyx_plugin; make realclean BOXLIB_HOME=$(BOXLIB_HOME)
+endif
+	cd $(oldpath)
+	
--- a/Numerics.cc
+++ b/Numerics.cc
@ -0,0 +1,49 @@
+/*
+ 
+ numerics.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+#ifdef WITH_MPI
+  #ifdef MANNO
+    #include <mpi.h>
+  #else
+    #include <mpi++.h>
+  #endif
+#endif
+#include <iostream>
+#include "Numerics.hh"
+
+
+#ifndef REL_PRECISION
+#define REL_PRECISION 1.e-5
+#endif
+
+real_t integrate( double (* func) (double x, void * params), double a, double b, void *params )
+{
+	gsl_function F;
+	F.function = func;
+	F.params = params;
+
+	double result;
+	double error;
+
+	
+	gsl_set_error_handler_off ();
+	gsl_integration_workspace *w = gsl_integration_workspace_alloc(100000);
+	gsl_integration_qag( &F, a, b, 0, REL_PRECISION, 100000, 6, w, &result, &error );
+	
+	
+	gsl_integration_workspace_free(w);
+
+	gsl_set_error_handler(NULL);
+
+	if( error/result > REL_PRECISION )
+		std::cerr << " - Warning: no convergence in function 'integrate', rel. error=" << error/result << std::endl;
+
+	return (real_t)result;
+}
--- a/Numerics.hh
+++ b/Numerics.hh
@ -0,0 +1,96 @@
+/*
+ 
+ numerics.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#ifndef __NUMERICS_HH
+#define __NUMERICS_HH
+
+#ifdef WITH_MPI
+  #ifdef MANNO
+    #include <mpi.h>
+  #else
+    #include <mpi++.h>
+  #endif
+#endif
+
+#include <cmath>
+#include <gsl/gsl_integration.h>
+#include <gsl/gsl_errno.h>
+
+#include <vector>
+#include <algorithm>
+#include "general.hh"
+
+
+
+real_t integrate( double (* func) (double x, void * params), double a, double b, void *params=NULL);
+
+typedef __attribute__((__may_alias__)) int aint;
+
+inline float fast_log2 (float val)
+{
+	//if( sizeof(int) != sizeof(float) )
+	//	throw std::runtime_error("fast_log2 will fail on this system!!");
+	aint * const    exp_ptr = reinterpret_cast <aint *> (&val);
+	aint            x = *exp_ptr;
+	const int      log_2 = ((x >> 23) & 255) - 128;
+	x &= ~(255 << 23);
+	x += 127 << 23;
+	*exp_ptr = x;
+	
+	val = ((-1.0f/3) * val + 2) * val - 2.0f/3;   // (1)
+	
+	return (val + log_2);
+} 
+
+inline float fast_log (const float &val)
+{
+	return (fast_log2 (val) * 0.69314718f);
+} 
+
+inline float fast_log10 (const float &val)
+{
+	return (fast_log2 (val) * 0.3010299956639812f);
+} 
+
+inline unsigned locate( const double x, const std::vector<double> vx )
+{
+	long unsigned ju,jm,jl;
+	bool ascnd=(vx[vx.size()-1]>=vx[0]);
+	jl = 0;
+	ju = vx.size()-1;
+	while( ju-jl > 1 ) {
+		jm = (ju+jl)>>1;
+		if( (x >= vx[jm]) == ascnd )
+			jl = jm;
+		else
+			ju = jm;
+	}
+	return std::max((long unsigned)0,std::min((long unsigned)(vx.size()-2),(long unsigned)jl));
+}
+
+
+inline real_t linint( const double x, const std::vector<double>& xx, const std::vector<double>& yy )
+{
+	unsigned i = locate(x,xx);
+
+	if( x<xx[0] )
+		return yy[0];
+	if( x>=xx[xx.size()-1] )
+		return yy[yy.size()-1]; 
+	double a  = 1.0/(xx[i+1]-xx[i]);
+	double dy = (yy[i+1]-yy[i])*a;
+	double y0 = (yy[i]*xx[i+1]-xx[i]*yy[i+1])*a;
+  return dy*x+y0;
+}
+
+
+#endif
+
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,38 @@
+MUSIC - multi-scale cosmological initial conditions
+===================================================
+
+MUSIC is a computer program to generate nested grid initial conditions for
+high-resolution "zoom" cosmological simulations. A detailed description
+of the algorithms can be found in [Hahn & Abel (2011)][1]. You can
+download the user's guide [here][3]. Please consider joining the
+[user mailing list][2].
+
+Current MUSIC key features are:
+
+- Supports output for RAMSES, ENZO, Arepo, Gadget-2/3, ART, Pkdgrav/Gasoline 
+and NyX via plugins. New codes can be added.
+
+- Support for first (1LPT) and second order (2LPT) Lagrangian perturbation 
+theory, local Lagrangian approximation (LLA) for baryons with grid codes.
+
+- Pluggable transfer functions, currently CAMB, Eisenstein&Hu, BBKS, Warm 
+Dark Matter variants. Distinct baryon+CDM fields.
+
+- Minimum bounding ellipsoid and convex hull shaped high-res regions supported 
+with most codes, supports refinement mask generation for RAMSES.
+
+- Parallelized with OpenMP
+    
+- Requires FFTW (v2 or v3), GSL (and HDF5 for output for some codes)
+
+
+This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
+or FITNESS FOR A PARTICULAR PURPOSE. By downloading and using MUSIC, you 
+agree to the LICENSE, distributed with the source code in a text 
+file of the same name.
+
+
+[1]: http://arxiv.org/abs/1103.6031
+[2]: https://groups.google.com/forum/#!forum/cosmo_music
+[3]: https://bitbucket.org/ohahn/music/downloads/MUSIC_Users_Guide.pdf
--- a/TransferFunction.hh
+++ b/TransferFunction.hh
@ -0,0 +1,410 @@
+/*
+ This file is part of MUSIC -
+ a tool to generate initial conditions for cosmological simulations
+ 
+ Copyright (C) 2008-12  Oliver Hahn, ojha@gmx.de
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __TRANSFERFUNCTION_HH
+#define __TRANSFERFUNCTION_HH
+
+#include <vector>
+#include <sstream>
+#include <fstream>
+#include <iostream>
+#include <cmath>
+#include <stdexcept>
+
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_sf_gamma.h>
+
+#include "Numerics.hh"
+#include "general.hh"
+
+#include <complex>
+
+#define NZERO_Q
+
+typedef std::complex<double> complex;
+
+//! Abstract base class for transfer functions
+/*!
+    This class implements a purely virtual interface that can be
+    used to derive instances implementing various transfer functions.
+*/ 
+class TransferFunction{
+public:
+	Cosmology m_Cosmology;
+	
+public:
+  
+	TransferFunction( Cosmology acosm ) : m_Cosmology( acosm ) { };
+	virtual double compute( double k ) = 0;
+	virtual ~TransferFunction(){ };
+	virtual double get_kmax( void ) = 0;
+	virtual double get_kmin( void ) = 0;
+};
+
+class TransferFunction_real
+{
+	
+public:
+	gsl_interp_accel *accp, *accn;
+	gsl_spline *splinep, *splinen;
+	double Tr0_, Tmin_, Tmax_, Tscale_;
+	double rneg_, rneg2_;
+	static TransferFunction *ptf_;
+	static double nspec_;
+	
+protected:
+	
+	double krgood( double mu, double q, double dlnr, double kr )
+	{
+		double krnew = kr;
+		complex cdgamma, zm, zp;
+		double arg, iarg, xm, xp, y;
+		gsl_sf_result g_a, g_p;
+		
+		xp = 0.5*(mu+1.0+q);
+		xm = 0.5*(mu+1.0-q);
+		y = M_PI/(2.0*dlnr);
+		zp=complex(xp,y);
+		zm=complex(xm,y);
+		
+		gsl_sf_lngamma_complex_e (zp.real(), zp.imag(), &g_a, &g_p);
+		zp=std::polar(exp(g_a.val),g_p.val);
+		double zpa = g_p.val;
+
+		gsl_sf_lngamma_complex_e (zm.real(), zm.imag(), &g_a, &g_p);
+		zm=std::polar(exp(g_a.val),g_p.val);
+		double zma = g_p.val;
+		
+		arg=log(2.0/kr)/dlnr+(zpa+zma)/M_PI;
+		iarg=(double)((int)(arg + 0.5));
+		
+		if( arg!=iarg )
+			krnew=kr*exp((arg-iarg)*dlnr);
+		
+		return krnew;
+	}
+	
+	void transform( double pnorm, double dplus, unsigned N, double q, std::vector<double>& rr, std::vector<double>& TT )
+	{
+		const double mu = 0.5;
+		double qmin = 1.0e-6, qmax = 1.0e+6;
+		
+		q = 0.0;
+		
+		N = 16384;
+		
+#ifdef NZERO_Q
+		//q = 0.4;
+		q = 0.2;
+#endif
+		
+		double kmin = qmin, kmax=qmax;
+		double rmin = qmin, rmax = qmax;
+		double k0 = exp(0.5*(log(kmax)+log(kmin)));
+		double r0 = exp(0.5*(log(rmax)+log(rmin)));
+		double L = log(rmax)-log(rmin);
+		double k0r0 = k0*r0;
+		double dlnk = L/N, dlnr = L/N;
+		
+		double sqrtpnorm = sqrt(pnorm);
+		
+		double dir = 1.0;
+		
+		double fftnorm = 1.0/N;
+		
+		fftw_complex in[N], out[N];
+		fftw_plan p,ip;
+		
+		//... perform anti-ringing correction from Hamilton (2000)
+		k0r0 = krgood( mu, q, dlnr, k0r0 );
+
+		std::ofstream ofsk("transfer_k.txt");
+		double sum_in = 0.0;
+		for( unsigned i=0; i<N; ++i )
+		{
+			
+			double k = k0*exp(((int)i - (int)N/2+1) * dlnk);
+			//double k = k0*exp(((int)i - (int)N/2) * dlnk);
+			//double k = k0*exp(ii * dlnk);
+			
+			//... some constants missing ...//
+			in[i].re = dplus*sqrtpnorm*ptf_->compute( k )*pow(k,0.5*nspec_)*pow(k,1.5-q);
+			in[i].im = 0.0;
+			
+			sum_in += in[i].re;
+			ofsk << std::setw(16) << k <<std::setw(16) << in[i].re << std::endl;
+		}
+		ofsk.close();
+		
+		
+		p = fftw_create_plan(N, FFTW_FORWARD, FFTW_ESTIMATE);
+		ip = fftw_create_plan(N, FFTW_BACKWARD, FFTW_ESTIMATE);
+		
+		//fftw_one(p, in, out);
+		fftw_one(p, in, out);
+		
+		//... compute the Hankel transform by convolution with the Bessel function
+		for( unsigned i=0; i<N; ++i )
+		{
+			int ii=i;
+			if( ii > (int)N/2 )
+				ii -= N;
+			
+#ifndef NZERO_Q
+			double y=ii*M_PI/L;
+			complex zp((mu+1.0)*0.5,y);
+			gsl_sf_result g_a, g_p;
+			gsl_sf_lngamma_complex_e(zp.real(), zp.imag(), &g_a, &g_p);
+			
+			double arg = 2.0*(log(2.0/k0r0)*y+g_p.val);
+			complex cu = complex(out[i].re,out[i].im)*std::polar(1.0,arg);
+			out[i].re = cu.real()*fftnorm;
+			out[i].im = cu.imag()*fftnorm;
+			
+#else		
+			//complex x(dir*q, (double)ii*2.0*M_PI/L);
+			complex x(dir*q, (double)ii*2.0*M_PI/L);
+			gsl_sf_result g_a, g_p;
+			
+			complex g1, g2, garg, U, phase;						
+			complex twotox = pow(complex(2.0,0.0),x);
+			
+			/////////////////////////////////////////////////////////
+			//.. evaluate complex Gamma functions
+			
+			garg = 0.5*(mu+1.0+x);
+			gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p);
+			g1 = std::polar(exp(g_a.val),g_p.val);
+
+			
+			garg = 0.5*(mu+1.0-x);
+			gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p);
+			g2 = std::polar(exp(g_a.val),g_p.val);
+
+			/////////////////////////////////////////////////////////
+			//.. compute U
+			
+			if( (fabs(g2.real()) < 1e-19 && fabs(g2.imag()) < 1e-19) )
+			{
+				//std::cerr << "Warning : encountered possible singularity in TransferFunction_real::transform!\n";
+				g1 = 1.0; g2 = 1.0;
+			}
+			
+			
+			U = twotox * g1 / g2;
+			phase = pow(complex(k0r0,0.0),complex(0.0,2.0*M_PI*(double)ii/L));
+			
+			complex cu = complex(out[i].re,out[i].im)*U*phase*fftnorm;
+			
+			out[i].re = cu.real();
+			out[i].im = cu.imag();
+			
+			if( (out[i].re != out[i].re)||(out[i].im != out[i].im) )
+			{	std::cerr << "NaN @ i=" << i << ", U= " << U << ", phase = " << phase << ", g1 = " << g1 << ", g2 = " << g2 << std::endl;
+				std::cerr << "mu+1+q = " << mu+1.0+q << std::endl;
+				//break;
+			}
+			
+#endif
+
+		}
+			
+		/*out[N/2].im = 0.0;
+		out[N/2+1].im = 0.0;
+		out[N/2+1].re = out[N/2].re;
+		out[N/2].im = 0.0;*/
+		
+		fftw_one(ip, out, in);
+		
+		rr.assign(N,0.0);
+		TT.assign(N,0.0);
+		
+		r0 = k0r0/k0;
+		
+		for( unsigned i=0; i<N; ++i )
+		{
+			int ii = i;
+			ii -= N/2-1;
+			//ii -= N/2;
+			//if( ii>N/2)
+			//	ii-=N;
+			
+			
+			
+			double r = r0*exp(-ii*dlnr);
+			rr[N-i-1] = r;
+			TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
+			
+			//TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*exp( -dir*(q+1.5)*ii*dlnr +q*log(k0r0))/r0;
+			
+			//rr[i] = r;
+			//TT[i] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
+			
+		}
+		
+		
+		{
+			std::ofstream ofs("transfer_real_new.txt");
+			for( unsigned i=0; i<N; ++i )
+			{
+				int ii = i;
+				ii -= N/2-1;
+				
+				double r = r0*exp(-ii*dlnr);//r0*exp(ii*dlnr);
+				double T = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
+				ofs << r << "\t\t" << T << "\t\t" << in[i].im << std::endl;
+			}
+		}
+		
+
+		fftw_destroy_plan(p);
+		fftw_destroy_plan(ip);
+	}
+	
+public:
+	TransferFunction_real( TransferFunction *tf, double nspec, double pnorm, double dplus, double rmin, double rmax, double knymax, unsigned nr )
+	{
+				
+		ptf_ = tf;
+		nspec_ = nspec;
+	
+		double q = 0.8;
+		
+		std::vector<double> r,T,xp,yp,xn,yn;
+		
+		transform( pnorm, dplus, nr, q, r, T );
+		
+		//... determine r=0 zero component by integrating up to the Nyquist frequency
+		gsl_integration_workspace * wp; 
+		gsl_function F;
+		wp = gsl_integration_workspace_alloc(20000);
+		F.function = &call_wrapper;
+		double par[2]; par[0] = dplus*sqrt(pnorm); //par[1] = M_PI/kny;
+		F.params = (void*)par;
+		double error;
+		
+		//#warning factor of sqrt(1.5) needs to be adjusted for non-equilateral boxes
+		//.. need a factor sqrt( 2*kny^2_x + 2*kny^2_y + 2*kny^2_z )/2 = sqrt(3/2)kny (in equilateral case)
+		gsl_integration_qag (&F, 0.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); 
+		//Tr0_ = 0.0;
+		gsl_integration_workspace_free(wp);
+				
+		
+		for( unsigned i=0; i<r.size(); ++i )
+		{
+			// spline positive and negative part separately
+			/*if( T[i] > 0.0 )
+			{
+				xp.push_back( 2.0*log10(r[i]) );
+				yp.push_back( log10(T[i]) );
+				rneg_ = r[i];
+				rneg2_ = rneg_*rneg_;
+			}else {
+				xn.push_back( 2.0*log10(r[i]) );
+				yn.push_back( log10(-T[i]) );
+			}*/
+			
+			
+			if( r[i] > rmin && r[i] < rmax )
+			{
+				xp.push_back( 2.0*log10(r[i]) );
+				yp.push_back( log10(fabs(T[i])) );
+				xn.push_back( 2.0*log10(r[i]) );
+				if( T[i] >= 0.0 ) 
+					yn.push_back( 1.0 );
+				else
+					yn.push_back( -1.0 );
+				
+				
+				//ofs << std::setw(16) << xp.back() << std::setw(16) << yp.back() << std::endl;
+			}
+			
+		}
+		
+
+		
+		
+		
+		accp = gsl_interp_accel_alloc ();
+		accn = gsl_interp_accel_alloc ();
+		
+		//... spline interpolation is only marginally slower here
+		splinep = gsl_spline_alloc (gsl_interp_cspline, xp.size() );
+		splinen = gsl_spline_alloc (gsl_interp_cspline, xn.size() );
+
+		//... set up everything for spline interpolation
+		gsl_spline_init (splinep, &xp[0], &yp[0], xp.size() );
+		gsl_spline_init (splinen, &xn[0], &yn[0], xn.size() );		
+		
+
+		
+		
+		{
+			double dlogr = (log10(rmax)-log10(rmin))/100;
+			std::ofstream ofs("transfer_splinep.txt");			
+			
+			for( int i=0; i< 100; ++i ) 
+			{
+				double r = rmin*pow(10.0,i*dlogr);
+				ofs << std::setw(16) << r << std::setw(16) << compute_real(r*r) << std::endl;
+			}
+		}
+		
+	}
+	
+	static double call_wrapper( double k, void *arg )
+	{
+		double *a = (double*)arg;
+		return 4.0*M_PI*a[0]*ptf_->compute( k )*pow(k,0.5*nspec_)*k*k;
+	}
+	
+	~TransferFunction_real()
+	{
+		gsl_spline_free (splinen);
+		gsl_interp_accel_free (accn);
+		gsl_spline_free (splinep);
+		gsl_interp_accel_free (accp);
+
+	}
+	
+	inline double compute_real( double r2 ) const
+	{
+		const double EPS = 1e-8;
+		const double Reps2 = EPS*EPS;
+		
+		if( r2 <Reps2 )
+			return Tr0_;
+		double q;
+		/*if( r2 < rneg2_ )
+			q = pow(10.0,gsl_spline_eval (splinep, log10(r2), accp));
+		else
+			q = -pow(10.0,gsl_spline_eval(splinen, log10(r2), accn));*/
+		
+		double logr2 = log10(r2);
+		q = pow(10.0,gsl_spline_eval(splinep, logr2, accp));
+		double sign = 1.0;
+		if( gsl_spline_eval(splinen, logr2, accn) < 0.0 )
+			sign = -1.0;
+		return q*sign;
+	}
+};
+
+#endif
--- a/config_file.hh
+++ b/config_file.hh
@ -0,0 +1,355 @@
+/*
+ 
+ config_file.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#ifndef __CONFIG_FILE_HH
+#define __CONFIG_FILE_HH
+
+#include <string>
+#include <sstream>
+#include <map>
+#include <fstream>
+#include <iostream>
+#include <iomanip>
+#include <stdexcept>
+#include <typeinfo>
+
+#include "log.hh"
+
+/*!
+ * @class config_file
+ * @brief provides read/write access to configuration options
+ *
+ * This class provides access to the configuration file. The 
+ * configuration is stored in hash-pairs and can be queried and
+ * validated by the responsible class/routine
+ */
+class config_file {
+  
+  //! current line number
+  unsigned m_iLine;
+
+  //! hash table for key/value pairs, stored as strings
+  std::map<std::string, std::string> m_Items;
+
+public:
+	
+  //! removes all white space from string source
+  /*!
+   * @param source the string to be trimmed
+   * @param delims a string of delimiting characters
+   * @return trimmed string
+   */
+  std::string trim(std::string const& source, char const* delims = " \t\r\n") const{
+    std::string result(source);
+    //... skip initial whitespace ...
+    std::string::size_type index = result.find_last_not_of(delims);
+    if(index != std::string::npos)
+      result.erase(++index);
+    //... find beginning of trailing whitespace ...
+    index = result.find_first_not_of(delims);
+    //... remove trailing whitespace ...
+    if(index != std::string::npos)
+      result.erase(0, index);
+    else
+      result.erase();
+    return result;
+  }
+  
+  //! converts between different variable types
+  /*!
+   *  The main purpose of this function is to parse and convert
+   *  a string argument into numbers, booleans, etc...
+   * @param ival the input value (typically a std::string)
+   * @param oval the interpreted/converted value
+   */
+  template <class in_value, class out_value>
+  void convert( const in_value & ival, out_value & oval) const
+  {
+		std::stringstream ss;
+		ss << ival; //.. insert value into stream
+		ss >> oval; //.. retrieve value from stream
+
+		if (! ss.eof()) {
+		  //.. conversion error
+			std::cerr << "Error: conversion of \'" << ival << "\' failed." << std::endl;
+		  throw ErrInvalidConversion(std::string("invalid conversion to ")+typeid(out_value).name()+'.');
+		}
+  }
+
+  //! constructor of class config_file
+  /*! @param FileName the path/name of the configuration file to be parsed
+   */
+  config_file( std::string const& FileName )
+	: m_iLine(0), m_Items()	
+  {
+    std::ifstream file(FileName.c_str());
+	  
+	if( !file.is_open() )
+		throw std::runtime_error(std::string("Error: Could not open config file \'")+FileName+std::string("\'"));
+
+    std::string line;
+    std::string name;
+    std::string value;
+    std::string inSection;
+    int posEqual;
+    m_iLine=0;
+    //.. walk through all lines ..
+    while (std::getline(file,line)) {
+      ++m_iLine;
+      //.. encounterd EOL ?
+      if (! line.length()) continue;
+      
+      //.. encountered comment ?
+      unsigned long idx;
+      if( (idx=line.find_first_of("#;%")) != std::string::npos )
+        line.erase(idx);
+
+      //.. encountered section tag ?
+      if (line[0] == '[') {
+        inSection=trim(line.substr(1,line.find(']')-1));
+        continue;
+      }
+
+      //.. seek end of entry name ..
+      posEqual=line.find('=');
+      name  = trim(line.substr(0,posEqual));
+      value = trim(line.substr(posEqual+1));
+	
+	  if( (size_t)posEqual==std::string::npos && (name.size()!=0||value.size()!=0) )
+	  {
+		  LOGWARN("Ignoring non-assignment in %s:%d",FileName.c_str(),m_iLine);
+		  continue;
+	  }
+	
+	  if(name.length()==0&&value.size()!=0)
+	  {  
+		  LOGWARN("Ignoring assignment missing entry name in %s:%d",FileName.c_str(),m_iLine);
+		  continue;
+		  
+	  }
+		
+	  if(value.length()==0&&name.size()!=0)
+	  {	  
+		  LOGWARN("Empty entry will be ignored in %s:%d",FileName.c_str(),m_iLine);
+		  continue;
+	  }
+		
+	  if( value.length()==0&&name.size()==0)
+		  continue;
+      
+      //.. add key/value pair to hash table ..
+	  if( m_Items.find(inSection+'/'+name) != m_Items.end() )
+		  LOGWARN("Redeclaration overwrites previous value in %s:%d",FileName.c_str(),m_iLine);
+	
+	  m_Items[inSection+'/'+name] = value;
+		
+    }
+  }
+	
+	//! inserts a key/value pair in the hash map
+	/*! @param key the key value, usually "section/key"
+	 *  @param value the value of the key, also a string
+	 */
+	void insertValue( std::string const& key, std::string const& value )
+	{
+		m_Items[key] = value;
+	}
+	
+	//! inserts a key/value pair in the hash map
+	/*! @param section section name. values are stored under "section/key"
+	 *  @param key the key value usually "section/key"
+	 *  @param value the value of the key, also a string
+	 */
+	void insertValue( std::string const& section, std::string const& key, std::string const& value )
+	{
+		m_Items[section+'/'+key] = value;
+	}
+	
+	//! checks if a key is part of the hash map
+	/*! @param section the section name of the key
+	 *  @param key the key name to be checked
+	 *  @return true if the key is present, false otherwise
+	 */
+	bool containsKey( std::string const& section, std::string const& key )
+	{
+		std::map<std::string,std::string>::const_iterator i = m_Items.find(section+'/'+key);
+		if ( i == m_Items.end() ) 
+			return false;
+		return true;
+	}
+	
+	//! checks if a key is part of the hash map
+	/*! @param key the key name to be checked
+	 *  @return true if the key is present, false otherwise
+	 */
+	bool containsKey( std::string const& key )
+	{
+		std::map<std::string,std::string>::const_iterator i = m_Items.find(key);
+		if ( i == m_Items.end() ) 
+			return false;
+		return true;
+	}
+	
+	
+	//! return value of a key
+	/*! returns the value of a given key, throws a ErrItemNotFound
+	*  exception if the key is not available in the hash map.
+	*  @param key the key name
+	*  @return the value of the key
+	*  @sa ErrItemNotFound
+	*/
+	template<class T> T getValue( std::string const& key ) const{
+		return getValue<T>( "", key );
+	}
+
+	//! return value of a key
+	/*! returns the value of a given key, throws a ErrItemNotFound
+	 *  exception if the key is not available in the hash map.
+	 *  @param section the section name for the key
+	 *  @param key the key name
+	 *  @return the value of the key
+	 *  @sa ErrItemNotFound
+	 */
+	template<class T> T getValue( std::string const& section, std::string const& key ) const
+	{
+		T r;
+		std::map<std::string,std::string>::const_iterator i = m_Items.find(section + '/' + key);
+		if ( i == m_Items.end() ) 
+		  throw ErrItemNotFound('\'' + section + '/' + key + std::string("\' not found."));
+		  
+		convert(i->second,r);
+		return r;
+	}
+
+	//! exception safe version of getValue
+	/*! returns the value of a given key, returns a default value rather
+	 *  than a ErrItemNotFound exception if the key is not found.
+	 *  @param section the section name for the key
+	 *  @param key the key name
+	 *  @param default_value the value that is returned if the key is not found
+	 *  @return the key value (if key found) otherwise default_value
+	 */
+	template<class T> T getValueSafe( std::string const& section, std::string const& key, T default_value ) const
+	{
+		T r;
+		try{
+		  r = getValue<T>( section, key );
+		} catch( ErrItemNotFound ) {
+		  r = default_value;
+		}
+		return r;
+	}
+
+	
+	//! exception safe version of getValue
+	/*! returns the value of a given key, returns a default value rather
+	 *  than a ErrItemNotFound exception if the key is not found.
+	 *  @param key the key name
+	 *  @param default_value the value that is returned if the key is not found
+	 *  @return the key value (if key found) otherwise default_value
+	 */
+	template<class T> T getValueSafe( std::string const& key, T default_value ) const
+	{
+		return getValueSafe( "", key, default_value );
+	}
+
+	
+	//! dumps all key-value pairs to a std::ostream
+	void dump( std::ostream& out )
+	{
+		std::map<std::string,std::string>::const_iterator i = m_Items.begin();
+		while( i!=m_Items.end() )
+		{
+			if( i->second.length() > 0 )
+				out << std::setw(24) << std::left << i->first << "  =  " << i->second  << std::endl;
+			++i;
+		}
+	}
+	
+	void log_dump( void )
+	{
+		LOGUSER("List of all configuration options:");
+		std::map<std::string,std::string>::const_iterator i = m_Items.begin();
+		while( i!=m_Items.end() )
+		{
+			if( i->second.length() > 0 )
+				LOGUSER("  %24s = %s",(i->first).c_str(),(i->second).c_str());//out << std::setw(24) << std::left << i->first << "  =  " << i->second  << std::endl;
+			++i;
+		}
+	}
+
+  //--- EXCEPTIONS ---
+
+	//! runtime error that is thrown if key is not found in getValue
+	class ErrItemNotFound : public std::runtime_error{
+	public:
+		ErrItemNotFound( std::string itemname )
+		  : std::runtime_error( itemname.c_str() )
+		{}
+	};
+
+	//! runtime error that is thrown if type conversion fails
+	class ErrInvalidConversion : public std::runtime_error{
+	public:
+		ErrInvalidConversion( std::string errmsg )
+		  : std::runtime_error( errmsg )
+		{}
+	};
+
+
+	//! runtime error that is thrown if identifier is not found in keys
+	class ErrIllegalIdentifier : public std::runtime_error{
+	public:
+		ErrIllegalIdentifier( std::string errmsg )
+		  : std::runtime_error( errmsg )
+		{}
+	};
+  
+};
+
+//==== below are template specialisations =======================================//
+
+//... Function: getValue( strSection, strEntry ) ...
+//... Descript: specialization of getValue for type boolean to interpret strings ...
+//...           like "true" and "false" etc.
+//...           converts the string to type bool, returns type bool ...
+template<> 
+inline bool config_file::getValue<bool>( std::string const& strSection, std::string const& strEntry ) const{
+  std::string r1 = getValue<std::string>( strSection, strEntry );
+    if( r1=="true" || r1=="yes" || r1=="on" || r1=="1" )
+      return true;
+    if( r1=="false" || r1=="no" || r1=="off" || r1=="0" )
+      return false;
+    throw ErrIllegalIdentifier(std::string("Illegal identifier \'")+r1+std::string("\' in \'")+strEntry+std::string("\'."));
+    //return false;
+  }
+
+template<>
+inline bool config_file::getValueSafe<bool>( std::string const& strSection, std::string const& strEntry, bool defaultValue ) const{
+  std::string r1;
+  try{
+    r1 = getValue<std::string>( strSection, strEntry );
+    if( r1=="true" || r1=="yes" || r1=="on" || r1=="1" )
+      return true;
+    if( r1=="false" || r1=="no" || r1=="off" || r1=="0" )
+      return false;
+  } catch( ErrItemNotFound ) {
+    return defaultValue;
+  }
+  return defaultValue;
+}
+
+template<>
+inline void config_file::convert<std::string,std::string>( const std::string & ival, std::string & oval) const
+{
+	oval = ival;
+}
+
+#endif //__CONFIG_FILE_HH
--- a/constraints.cc
+++ b/constraints.cc
@ -0,0 +1,495 @@
+/*
+ 
+ constraints.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include "constraints.hh"
+
+double dsigma2_tophat( double k, void *params );
+double dsigma2_gauss( double k, void *params );
+double find_coll_z( const std::vector<double>& z, const std::vector<double>& sigma, double nu );
+void compute_sigma_tophat( config_file& cf, transfer_function *ptf, double R, std::vector<double>& z, std::vector<double>& sigma );
+void compute_sigma_gauss( config_file& cf, transfer_function *ptf, double R, std::vector<double>& z, std::vector<double>& sigma );
+
+
+
+double dsigma2_tophat( double k, void *pparams )
+{
+	if( k<=0.0 )
+		return 0.0;
+	
+	char **params = reinterpret_cast<char**> (pparams);
+	
+	transfer_function *ptf = reinterpret_cast<transfer_function*>(params[0]);
+	double x = k * *reinterpret_cast<double*>(params[1]);
+	double nspect = *reinterpret_cast<double*>(params[2]);
+	
+	double w = 3.0*(sin(x)-x*cos(x))/(x*x*x);
+	
+	double tfk = ptf->compute(k,total);
+	return k*k * w*w * pow(k,nspect) * tfk*tfk;
+}
+
+double dsigma2_gauss( double k, void *pparams )
+{
+	if( k<=0.0 )
+		return 0.0;
+	
+	char **params = reinterpret_cast<char**> (pparams);
+	
+	transfer_function *ptf = reinterpret_cast<transfer_function*> (params[0]);
+	double x = k * *reinterpret_cast<double*>(params[1]);
+	double nspect = *reinterpret_cast<double*>(params[2]);
+	
+	double w = exp(-x*x*0.5);
+	
+	double tfk = ptf->compute(k,total);
+	return k*k * w*w * pow(k,nspect) * tfk*tfk;
+}
+
+double find_coll_z( const std::vector<double>& z, const std::vector<double>& sigma, double nu )
+{
+	double dcoll = 1.686/nu;
+	double zcoll = 0.0;
+	
+	gsl_interp_accel *acc = gsl_interp_accel_alloc ();
+	gsl_spline *spline = gsl_spline_alloc (gsl_interp_cspline, z.size());
+	
+	gsl_spline_init (spline, &sigma[0], &z[0], z.size() );
+	
+	zcoll = gsl_spline_eval(spline, dcoll, acc );
+	
+	gsl_spline_free (spline);
+	gsl_interp_accel_free (acc);
+	
+	return zcoll;
+}
+
+
+void compute_sigma_tophat( config_file& cf, transfer_function *ptf, double R, std::vector<double>& z, std::vector<double>& sigma )
+{
+	z.clear();
+	sigma.clear();
+	
+	cosmology cosm( cf );
+	CosmoCalc ccalc( cosm, ptf );
+	
+	double zmin = 0.0, zmax = 200.0;
+	int nz = 100;
+	for( int i=0; i <nz; ++i )
+		z.push_back( zmax - i*(zmax-zmin)/(nz-1.0) );
+	
+	double D0 = ccalc.CalcGrowthFactor(1.0);
+
+	double sigma8 = cf.getValue<double>("cosmology","sigma_8"); 
+	double nspec = cf.getValue<double>("cosmology","nspec");
+	
+	double sigma0 = 0.0;
+	{
+		double eight=8.0;
+
+		char *params[3];
+		params[0] = reinterpret_cast<char*> (ptf);
+		params[1] = reinterpret_cast<char*> (&eight);
+		params[2] = reinterpret_cast<char*> (&nspec);
+		
+		sigma0 = sqrt(4.0*M_PI*integrate( &dsigma2_tophat, 1e-4, 1e4, reinterpret_cast<void*>(params) ));
+	}
+	
+	for( int i=0; i <nz; ++i )
+	{
+		void *params[3];
+		params[0] = reinterpret_cast<char*> (ptf);
+		params[1] = reinterpret_cast<char*> (&R);
+		params[2] = reinterpret_cast<char*> (&nspec);
+		
+		double sig = sqrt(4.0*M_PI*integrate( &dsigma2_tophat, 1e-4, 1e4, reinterpret_cast<void*>(params) ));
+		double Dz  = ccalc.CalcGrowthFactor(1./(1.+z[i]));
+		sigma.push_back( sig*sigma8/sigma0*Dz/D0 );
+	}
+}
+
+void compute_sigma_gauss( config_file& cf, transfer_function *ptf, double R, std::vector<double>& z, std::vector<double>& sigma )
+{
+	z.clear();
+	sigma.clear();
+	
+	cosmology cosm( cf );
+	CosmoCalc ccalc( cosm, ptf );
+	
+	double zmin = 0.0, zmax = 200.0;
+	int nz = 100;
+	for( int i=0; i <nz; ++i )
+		z.push_back( zmax - i*(zmax-zmin)/(nz-1.0) );
+	
+	double D0 = ccalc.CalcGrowthFactor(1.0);
+	
+	double sigma8 = cf.getValue<double>("cosmology","sigma_8"); 
+	double nspec = cf.getValue<double>("cosmology","nspec");
+	
+	double sigma0 = 0.0;
+	{
+		double eight=8.0;
+		
+		char *params[3];
+		params[0] = reinterpret_cast<char*> (ptf);
+		params[1] = reinterpret_cast<char*> (&eight);
+		params[2] = reinterpret_cast<char*> (&nspec);
+		
+		sigma0 = sqrt(4.0*M_PI*integrate( &dsigma2_tophat, 1e-4, 1e4, reinterpret_cast<void*>(params) ));
+	}
+	
+	for( int i=0; i <nz; ++i )
+	{
+		void *params[3];
+		params[0] = reinterpret_cast<char*> (ptf);
+		params[1] = reinterpret_cast<char*> (&R);
+		params[2] = reinterpret_cast<char*> (&nspec);
+		
+		double sig = sqrt(4.0*M_PI*integrate( &dsigma2_gauss, 1e-4, 1e4, reinterpret_cast<void*>(params) ));
+		double Dz  = ccalc.CalcGrowthFactor(1./(1.+z[i]));
+		
+		//std::cerr << z[i] << "    " << sig << std::endl;
+		sigma.push_back( sig*sigma8/sigma0*Dz/D0 );
+	}
+}
+
+
+constraint_set::constraint_set( config_file& cf, transfer_function *ptf )
+: pcf_( &cf ), ptf_( ptf )
+{
+	pcosmo_ = new Cosmology( cf );
+	pccalc_ = new CosmoCalc( *pcosmo_, ptf_ );
+	dplus0_ = 1.0;//pccalc_->CalcGrowthFactor( 1.0 );
+	
+	
+	unsigned i=0;
+	
+    unsigned levelmin = pcf_->getValue<unsigned>("setup","levelmin");
+	unsigned levelmin_TF = pcf_->getValueSafe<unsigned>("setup","levelmin_TF",levelmin);
+	constr_level_ = pcf_->getValueSafe<unsigned>("constraints","level",levelmin_TF);
+	
+	constr_level_ = std::max(constr_level_,levelmin_TF);
+	
+	double omegam = pcf_->getValue<double>("cosmology","Omega_m");
+	double rhom = omegam*2.77519737e11; //... mean matter density
+	
+	//... use EdS density for estimation
+	//double rhom = 2.77519737e11;
+	
+	std::map< std::string, constr_type> constr_type_map;
+	constr_type_map.insert( std::pair<std::string,constr_type>("halo",halo) );
+	constr_type_map.insert( std::pair<std::string,constr_type>("peak",peak) );
+	
+	while(true)
+	{
+		char temp1[128];
+		std::string temp2;
+		sprintf(temp1,"constraint[%u].type",i);
+		if( cf.containsKey( "constraints", temp1 ) )
+		{
+			std::string str_type = cf.getValue<std::string>( "constraints", temp1 );
+			if( constr_type_map.find(str_type) == constr_type_map.end() )
+				throw std::runtime_error("Unknown constraint type!\n");
+			
+			//... parse a new constraint
+			constraint new_c;
+			
+			new_c.type = constr_type_map[ str_type ];
+			
+			//... read position of constraint
+			sprintf(temp1,"constraint[%u].pos",i);
+			temp2 = cf.getValue<std::string>( "constraints", temp1 );
+			sscanf(temp2.c_str(), "%lf,%lf,%lf", &new_c.x, &new_c.y, &new_c.z);
+			
+			if( new_c.type == halo)
+			{
+				//.. halo type constraints take mass and collapse redshift
+				sprintf(temp1,"constraint[%u].mass",i);
+				double mass = cf.getValue<double>( "constraints", temp1 );
+				
+				sprintf(temp1,"constraint[%u].zform",i);
+				double zcoll = cf.getValue<double>( "constraints", temp1 );
+				new_c.Rg = pow((mass/pow(2.*M_PI,1.5)/rhom),1./3.);
+				
+				new_c.sigma = 1.686/(pccalc_->CalcGrowthFactor(1./(1.+zcoll))/pccalc_->CalcGrowthFactor(1.0));
+				
+				LOGINFO("Constraint %d : halo with %g h-1 M_o",i,pow(2.*M_PI,1.5)*rhom*pow(new_c.Rg,3));
+			}
+			else if( new_c.type == peak )
+			{
+				//... peak type constraints take a scale and a peak height
+				//sprintf(temp1,"constraint[%u].Rg",i);
+				//new_c.Rg = cf.getValue<double>( "constraints", temp1 );
+				//double mass = pow(new_c.Rg,3.0)*rhom*pow(2.*M_PI,1.5);
+				
+				sprintf(temp1,"constraint[%u].mass",i);
+				double mass = cf.getValue<double>( "constraints", temp1 );
+				new_c.Rg = pow((mass/pow(2.*M_PI,1.5)/rhom),1./3.);
+				double Rtophat = pow(mass/4.0*3.0/M_PI/rhom,1./3.);
+				sprintf(temp1,"constraint[%u].nu",i);
+				double nu = cf.getValue<double>( "constraints", temp1 );
+				
+				std::vector<double> z,sigma;
+				compute_sigma_tophat( cf, ptf, Rtophat, z, sigma );
+				double zcoll = find_coll_z( z, sigma, nu );
+				
+				//LOGINFO("Probable collapse redshift for constraint %d : z = %f @ M = %g", i, zcoll,mass );
+				
+				compute_sigma_gauss( cf, ptf, new_c.Rg, z, sigma );
+				new_c.sigma = nu*sigma.back();
+				
+				//LOGINFO("Constraint %d : peak with Rg=%g h-1 Mpc and nu = %g",i,new_c.Rg,new_c.sigma);
+				LOGINFO("Constraint %3d : peak",i);
+				LOGINFO("   M = %g h-1 M_o, nu = %.2f sigma", mass, nu );
+				LOGINFO("   estimated z_coll = %f, sigma = %f", zcoll, new_c.sigma );
+				
+			}
+			
+			new_c.Rg2 = new_c.Rg*new_c.Rg;
+			
+			cset_.push_back( new_c );
+			
+		}else
+			break;
+		
+		++i;
+	}
+	
+	LOGINFO("Found %d density constraint(s) to be obeyed.",cset_.size());
+}
+
+
+void constraint_set::wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector<double>& g0, matrix& cinv, fftw_complex* cw )
+{
+	double lsub = nx*dx;
+	double dk = 2.0*M_PI/lsub, d3k=dk*dk*dk;
+	
+	double pnorm = pcf_->getValue<double>("cosmology","pnorm");
+	double nspec = pcf_->getValue<double>("cosmology","nspec");
+	pnorm *= dplus0_*dplus0_;
+	
+	size_t nconstr = cset_.size();
+	size_t nzp=nz/2+1;
+	
+	
+	
+	/*for( size_t i=0; i<nconstr; ++i )
+	 for( size_t j=0; j<nconstr; ++j )
+	 {
+	 std::cerr << "fact    = " << (cset_[j].sigma-g0[j])*cinv(i,j) << "\n";
+	 std::cerr << "g(j)    = " << cset_[j].sigma << "\n";
+	 std::cerr << "g0(j)   = " << g0[j] << "\n";
+	 std::cerr << "qinv    = " << cinv(i,j) << "\n";
+	 }
+	 */
+	
+	
+	double chisq = 0.0, chisq0 = 0.0;
+	for( size_t i=0; i<nconstr; ++i )
+		for( size_t j=0; j<nconstr; ++j )
+		{
+			chisq += cset_[i].sigma*cinv(i,j)*cset_[j].sigma;
+			chisq0 += g0[i]*cinv(i,j)*g0[j];
+		}
+	LOGINFO("Chi squared for the constraints:\n       sampled = %f, desired = %f", chisq0, chisq );
+	
+	std::vector<double> sigma(nconstr,0.0);
+	
+	#pragma omp parallel 
+	{
+		std::vector<double> sigma_loc(nconstr,0.0);
+		
+		#pragma omp for 
+		for( int ix=0; ix<(int)nx; ++ix )
+		{	
+			double iix(ix); if( iix > nx/2 ) iix-=nx;
+			iix *= 2.0*M_PI/nx;
+			
+			for( size_t iy=0; iy<ny; ++iy )
+			{	
+				double iiy(iy); if( iiy > ny/2 ) iiy-=ny;
+				iiy *= 2.0*M_PI/nx;
+				for( size_t iz=0; iz<nzp; ++iz )
+				{
+					double iiz(iz);
+					iiz *= 2.0*M_PI/nx;
+					
+					double k = sqrt(iix*iix+iiy*iiy+iiz*iiz)*(double)nx/lsub;
+					
+					double T = ptf_->compute(k,total);
+					double Pk = pnorm*T*T*pow(k,nspec)*d3k;
+					
+					size_t q = ((size_t)ix*ny+(size_t)iy)*nzp+(size_t)iz;
+					
+					double fac = sqrt(Pk);
+					
+					for( unsigned i=0; i<nconstr; ++i )
+						for( unsigned j=0; j<=i; ++j )
+						{
+							std::complex<double> 
+							ci = eval_constr(i,iix,iiy,iiz),
+							cj = eval_constr(j,iix,iiy,iiz);
+							
+							RE(cw[q]) += (cset_[j].sigma-g0[j])*cinv(i,j) * std::real(ci)*fac;
+							IM(cw[q]) += (cset_[j].sigma-g0[j])*cinv(i,j) * std::imag(ci)*fac;
+							
+							if( i!=j )
+							{
+								RE(cw[q]) += (cset_[i].sigma-g0[i])*cinv(j,i) * std::real(cj)*fac;
+								IM(cw[q]) += (cset_[i].sigma-g0[i])*cinv(j,i) * std::imag(cj)*fac;								
+							}
+							else
+							{
+								if( iz>0&&iz<nz/2 )
+									sigma_loc[i] += 2.0*std::real(std::conj(ci)*std::complex<double>(RE(cw[q]),IM(cw[q])))*fac;
+								else
+									sigma_loc[i] += std::real(std::conj(ci)*std::complex<double>(RE(cw[q]),IM(cw[q])))*fac;
+							}
+						}
+				}
+				
+			}
+			
+		}
+		
+		//.. 'critical' section for the global reduction
+		#pragma omp critical
+		{
+			for(int i=0; i<(int)nconstr; ++i )
+				sigma[i] += sigma_loc[i];
+		}
+	}
+	
+	for(int i=0; i<(int)nconstr; ++i )
+		LOGINFO("Constraint %3d : sigma = %+6f (%+6f)",i,sigma[i],cset_[i].sigma);
+}
+
+
+
+void constraint_set::wnoise_constr_corr( double dx, fftw_complex* cw, size_t nx, size_t ny, size_t nz, std::vector<double>& g0 )
+{
+	size_t nconstr = cset_.size();
+	size_t nzp=nz/2+1;
+	
+	g0.assign(nconstr,0.0);
+	
+	double pnorm = pcf_->getValue<double>("cosmology","pnorm");
+	double nspec = pcf_->getValue<double>("cosmology","nspec");
+	pnorm *= dplus0_*dplus0_;
+	double lsub = nx*dx;
+	double dk = 2.0*M_PI/lsub, d3k=dk*dk*dk;
+	
+	for( size_t i=0; i<nconstr; ++i )
+	{
+		double gg = 0.0;
+		
+		#pragma omp parallel for reduction(+:gg)
+		for( int ix=0; ix<(int)nx; ++ix )
+		{	
+			double iix(ix); if( iix > nx/2 ) iix-=nx;
+			iix *= 2.0*M_PI/nx;
+			
+			for( size_t iy=0; iy<ny; ++iy )
+			{	
+				double iiy(iy); if( iiy > ny/2 ) iiy-=ny;
+				iiy *= 2.0*M_PI/nx;
+				for( size_t iz=0; iz<nzp; ++iz )
+				{
+					double iiz(iz);
+					iiz *= 2.0*M_PI/nx;
+					
+					double k = sqrt(iix*iix+iiy*iiy+iiz*iiz)*(double)nx/lsub;
+					double T = ptf_->compute(k,total);
+					
+					std::complex<double> v(std::conj(eval_constr(i,iix,iiy,iiz)));
+					
+					v *= sqrt(pnorm*pow(k,nspec)*T*T*d3k);
+					
+					
+					if( iz>0&&iz<nz/2)
+						v*=2;
+					
+					size_t q = ((size_t)ix*ny+(size_t)iy)*nzp+(size_t)iz;
+
+					std::complex<double> ccw(RE(cw[q]),IM(cw[q]));
+
+					gg += std::real(v*ccw);
+					
+				}
+			}
+		}
+		
+		g0[i] = gg;
+	}
+}
+
+
+
+
+void constraint_set::icov_constr( double dx, size_t nx, size_t ny, size_t nz, matrix& cij )
+{
+	size_t nconstr = cset_.size();
+	size_t nzp=nz/2+1;
+	
+	double pnorm = pcf_->getValue<double>("cosmology","pnorm");
+	double nspec = pcf_->getValue<double>("cosmology","nspec");
+	pnorm *= dplus0_*dplus0_;
+	
+	cij		= matrix(nconstr,nconstr);
+	
+	double lsub = nx*dx;
+	double dk = 2.0*M_PI/lsub, d3k=dk*dk*dk;
+	
+	//... compute lower triangle of covariance matrix
+	//... and fill in upper triangle
+	for( unsigned i=0; i<nconstr; ++i )
+		for( unsigned j=0; j<=i; ++j )
+		{
+			
+			float c1(0.0), c2(0.0);
+			
+#pragma omp parallel for reduction(+:c1,c2)
+			for( int ix=0; ix<(int)nx; ++ix )
+			{	
+				double iix(ix); if( iix > nx/2 ) iix-=nx;
+				iix *= 2.0*M_PI/nx;
+				
+				for( size_t iy=0; iy<ny; ++iy )
+				{	
+					double iiy(iy); if( iiy > ny/2 ) iiy-=ny;
+					iiy *= 2.0*M_PI/nx;
+					for( size_t iz=0; iz<nzp; ++iz )
+					{
+						double iiz(iz);
+						iiz *= 2.0*M_PI/nx;
+						
+						double k = sqrt(iix*iix+iiy*iiy+iiz*iiz)*(double)nx/lsub;
+						double T = ptf_->compute(k,total);
+						std::complex<double> v(std::conj(eval_constr(i,iix,iiy,iiz)));
+						v *= eval_constr(j,iix,iiy,iiz);
+						v *= pnorm * pow(k,nspec) * T * T * d3k;
+						
+						if( iz>0&&iz<nz/2)
+							v*=2;
+						
+						c1 += std::real(v);
+						c2 += std::real(std::conj(v));
+					}
+				}
+			}
+			
+			cij(i,j) = c1;
+			cij(j,i) = c2;
+		}
+	
+	//... invert convariance matrix
+	cij.invert();
+	
+}
+
--- a/constraints.hh
+++ b/constraints.hh
@ -0,0 +1,422 @@
+/*
+ 
+ constraints.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#ifndef __CONSTRAINTS_HH
+#define __CONSTRAINTS_HH
+
+#include <vector>
+#include <complex>
+
+#include <gsl/gsl_linalg.h>
+
+#include "general.hh"
+#include "config_file.hh"
+#include "transfer_function.hh"
+#include "cosmology.hh"
+
+
+//! matrix class serving as a gsl wrapper
+class matrix
+{
+protected:
+	gsl_matrix * m_;
+	//double *data_;
+	size_t M_, N_;
+	
+public:
+	matrix( size_t M, size_t N )
+	: M_(M), N_(N)
+	{
+		m_ = gsl_matrix_alloc(M_,N_);
+	}
+	
+	matrix( size_t N )
+	: M_(N), N_(N)
+	{
+		m_ = gsl_matrix_alloc(M_,N_);
+	}
+	
+	matrix( const matrix& o )
+	{
+		M_ = o.M_;
+		N_ = o.N_;
+		m_ = gsl_matrix_alloc(M_,N_);
+		gsl_matrix_memcpy(m_, o.m_ );
+	}
+	
+	~matrix()
+	{
+		gsl_matrix_free( m_ );
+	}
+	
+	double& operator()( size_t i, size_t j )
+	{	return *gsl_matrix_ptr( m_, i, j );	}
+	
+	const double& operator()( size_t i, size_t j ) const
+	{	return *gsl_matrix_const_ptr( m_, i, j );	}
+	
+	matrix& operator=( const matrix& o )
+	{
+		gsl_matrix_free( m_ );
+		
+		M_ = o.M_;
+		N_ = o.N_;
+		m_ = gsl_matrix_alloc(M_,N_);
+		gsl_matrix_memcpy(m_, o.m_ );
+		return *this;
+	}
+	
+	
+	matrix& invert()
+	{
+		if( M_!=N_ )
+			throw std::runtime_error("Attempt to invert a non-square matrix!");
+		
+		int s;
+		gsl_matrix* im = gsl_matrix_alloc(M_,N_);
+		
+		gsl_permutation * p = gsl_permutation_alloc (M_);
+		gsl_linalg_LU_decomp( m_, p, &s );
+		gsl_linalg_LU_invert( m_, p, im );
+		
+		gsl_matrix_memcpy(m_, im);
+		
+		gsl_permutation_free(p);
+		gsl_matrix_free(im);
+		return *this;
+	}
+};
+
+
+//! class to impose constraints on the white noise field (van de Weygaert & Bertschinger 1996)
+class constraint_set
+{
+	
+public:
+	enum constr_type{ halo, peak };
+	
+protected:
+	
+	struct constraint{
+		constr_type type;
+		double x,y,z;
+		double gx,gy,gz;
+		double Rg, Rg2;
+		double gRg, gRg2;
+		double sigma;
+	};
+	
+	config_file *pcf_;
+	std::vector<constraint> cset_;
+	transfer_function *ptf_;
+	CosmoCalc *pccalc_;
+	Cosmology *pcosmo_;
+	double dplus0_;
+	unsigned constr_level_;
+	
+	
+	inline std::complex<double> eval_constr( size_t icon, double kx, double ky, double kz )
+	{
+		double re, im, kdotx, k2;
+		
+		kdotx = cset_[icon].gx*kx+cset_[icon].gy*ky+cset_[icon].gz*kz;
+		k2    = kx*kx+ky*ky+kz*kz;
+		
+		re  = im = exp(-k2*cset_[icon].gRg2/2.0);
+		re *= cos( kdotx );
+		im *= sin( kdotx );
+		
+		return std::complex<double>(re,im);
+	}
+	
+	
+#if defined(FFTW3) && defined(SINGLE_PRECISION)
+	
+	//! apply constraints to the white noise
+	void wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector<double>& g0, matrix& cinv, fftwf_complex* cw );
+	
+	//! measure sigma for each constraint in the unconstrained noise
+	void wnoise_constr_corr( double dx, fftwf_complex* cw, size_t nx, size_t ny, size_t nz, std::vector<double>& g0 );
+	
+#else
+	//! apply constraints to the white noise
+	void wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector<double>& g0, matrix& cinv, fftw_complex* cw );
+	
+	//! measure sigma for each constraint in the unconstrained noise
+	void wnoise_constr_corr( double dx, fftw_complex* cw, size_t nx, size_t ny, size_t nz, std::vector<double>& g0 );
+	
+#endif
+	
+	//! compute the covariance between the constraints
+	void icov_constr( double dx, size_t nx, size_t ny, size_t nz, matrix& cij );
+	
+	
+public:
+	
+	
+	//! constructor 
+	constraint_set( config_file& cf, transfer_function *ptf );
+	
+	//! destructor
+	~constraint_set()
+	{
+		delete pccalc_;
+		delete pcosmo_;
+	}
+	
+	
+	template< typename rng >
+	void apply( unsigned ilevel, int x0[], int lx[], rng* wnoise )
+	{
+		if( cset_.size() == 0 || constr_level_ != ilevel )
+			return;
+		
+		unsigned nlvl = 1<<ilevel;
+		double boxlength = pcf_->getValue<double>("setup","boxlength");
+		
+		//... compute constraint coordinates for grid
+		for( size_t i=0; i<cset_.size(); ++i )
+		{
+			cset_[i].gx = cset_[i].x * (double)nlvl;
+			cset_[i].gy = cset_[i].y * (double)nlvl;
+			cset_[i].gz = cset_[i].z * (double)nlvl;
+			cset_[i].gRg = cset_[i].Rg/boxlength * (double)nlvl;
+			cset_[i].gRg2 = cset_[i].gRg*cset_[i].gRg;
+			
+			if(cset_[i].gRg > 0.5*lx[0])
+				LOGWARN("Constraint %d appears to be too large scale",i);
+		}
+		
+		
+		std::vector<double> g0;
+		
+//		unsigned levelmax = pcf_->getValue<unsigned>("setup","levelmax");
+		unsigned levelmin = pcf_->getValue<unsigned>("setup","levelmin_TF");
+		
+		bool bperiodic = ilevel==levelmin;
+		double dx = pcf_->getValue<double>("setup","boxlength")/(1<<ilevel);
+		
+
+		LOGINFO("Computing constrained realization...");		
+		
+		if( bperiodic )
+		{
+			//... we are operating on the periodic coarse grid
+			size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz+2;
+			fftw_real * w = new fftw_real[nx*ny*nzp];
+			
+			
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+			fftwf_complex * cw = reinterpret_cast<fftwf_complex*> (w);
+			fftwf_plan	p  = fftwf_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE),
+						ip = fftwf_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE);
+	#else
+			fftw_complex * cw = reinterpret_cast<fftw_complex*> (w);
+			fftw_plan	p  = fftw_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE),
+						ip = fftw_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE);
+	#endif
+#else
+			fftw_complex * cw = reinterpret_cast<fftw_complex*> (w);
+			rfftwnd_plan p	= rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+						 ip = rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+#endif
+			
+			double fftnorm = 1.0/sqrt(nx*ny*nz);
+			
+			#pragma omp parallel for
+			for( int i=0; i<(int)nx; i++ )
+				for( int j=0; j<(int)ny; j++ )
+					for( int k=0; k<(int)nz; k++ )
+					{
+						size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k;
+						w[q] = (*wnoise)((x0[0]+i)%nx,(x0[1]+j)%ny,(x0[2]+k)%nz)*fftnorm;
+					}
+			
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+			fftwf_execute( p );
+	#else
+			fftw_execute( p );
+	#endif
+#else
+#ifndef SINGLETHREAD_FFTW		
+			rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), p, w, NULL );
+#else
+			rfftwnd_one_real_to_complex( p, w, NULL );
+#endif
+#endif
+			wnoise_constr_corr( dx, cw, nx, ny, nz, g0 );
+			
+			matrix c(2,2);
+			icov_constr( dx, nx, ny, nz, c );
+			
+			
+			wnoise_constr_corr( dx, nx, ny, nz, g0, c, cw );
+			
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+			fftwf_execute( ip );
+	#else
+			fftw_execute( ip );
+	#endif
+#else
+#ifndef SINGLETHREAD_FFTW		
+			rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), ip, cw, NULL );
+#else
+			rfftwnd_one_complex_to_real( ip, cw, NULL );
+#endif
+#endif
+			
+			#pragma omp parallel for
+			for( int i=0; i<(int)nx; i++ )
+				for( int j=0; j<(int)ny; j++ )
+					for( int k=0; k<(int)nz; k++ )
+					{
+						size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k;
+						(*wnoise)((x0[0]+i),(x0[1]+j),(x0[2]+k)) = w[q]*fftnorm;
+					}
+			
+			LOGINFO("Applied constraints to level %d.",ilevel);
+			
+						
+			delete[] w;
+			
+			
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+			fftwf_destroy_plan(p);
+	#else
+			fftw_destroy_plan(p);
+	#endif
+#else
+			fftwnd_destroy_plan(p);
+#endif
+		}else{
+			
+			//... we are operating on a refinement grid, not necessarily the finest
+			
+			size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz+2;
+			fftw_real * w = new fftw_real[nx*ny*nzp];
+			
+			
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+			fftwf_complex * cw = reinterpret_cast<fftwf_complex*> (w);
+			fftwf_plan	p  = fftwf_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE),
+						ip = fftwf_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE);
+	#else
+			fftw_complex * cw = reinterpret_cast<fftw_complex*> (w);
+			fftw_plan	p  = fftw_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE),
+						ip = fftw_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE);
+	#endif
+#else
+			fftw_complex * cw = reinterpret_cast<fftw_complex*> (w);
+			rfftwnd_plan p	= rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+			ip = rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+#endif
+			
+			double fftnorm = 1.0/sqrt(nx*ny*nz);
+			
+			int il = nx/4, ir = 3*nx/4, jl=ny/4, jr = 3*ny/4, kl = nz/4, kr = 3*nz/4;
+			
+			#pragma omp parallel for
+			for( int i=0; i<(int)nx; i++ )
+				for( int j=0; j<(int)ny; j++ )
+					for( int k=0; k<(int)nz; k++ )
+					{
+						size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k;
+						
+						if( i>=il && i<ir && j>=jl && j<jr && k>=kl && k<kr )
+							w[q] = (*wnoise)((x0[0]+i),(x0[1]+j),(x0[2]+k))*fftnorm;
+						else
+							w[q] = 0.0;
+
+					}
+			
+			int nlvl05 = 1<<(ilevel-1);
+			int xs = nlvl05-x0[0], ys = nlvl05-x0[1], zs = nlvl05-x0[2];
+			
+			for( size_t i=0; i<cset_.size(); ++i )
+			{
+				cset_[i].gx -= xs;
+				cset_[i].gy -= ys;
+				cset_[i].gz -= zs;
+			}
+			
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+			fftwf_execute( p );
+	#else
+			fftw_execute( p );
+	#endif
+#else
+#ifndef SINGLETHREAD_FFTW		
+			rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), p, w, NULL );
+#else
+			rfftwnd_one_real_to_complex( p, w, NULL );
+#endif
+#endif
+			wnoise_constr_corr( dx, cw, nx, ny, nz, g0 );
+			
+			matrix c(2,2);
+			icov_constr( dx, nx, ny, nz, c );
+			
+			
+			wnoise_constr_corr( dx, nx, ny, nz, g0, c, cw );
+			
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+			fftwf_execute( ip );
+	#else
+			fftw_execute( ip );
+	#endif
+#else
+#ifndef SINGLETHREAD_FFTW		
+			rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), ip, cw, NULL );
+#else
+			rfftwnd_one_complex_to_real( ip, cw, NULL );
+#endif
+#endif
+			
+			#pragma omp parallel for
+			for( int i=0; i<(int)nx; i++ )
+				for( int j=0; j<(int)ny; j++ )
+					for( int k=0; k<(int)nz; k++ )
+					{
+						size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k;
+						if( i>=il && i<ir && j>=jl && j<jr && k>=kl && k<kr )
+							(*wnoise)((x0[0]+i),(x0[1]+j),(x0[2]+k)) = w[q]*fftnorm;
+					}
+			
+
+			LOGINFO("Applied constraints to level %d.",ilevel);	
+			
+			delete[] w;
+			
+			
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+			fftwf_destroy_plan(p);
+	#else
+			fftw_destroy_plan(p);
+	#endif
+#else
+			fftwnd_destroy_plan(p);
+#endif
+			
+		}
+		
+	}
+	
+};
+
+
+#endif // __CONSTRAINTS_HH
--- a/convolution_kernel.cc
+++ b/convolution_kernel.cc
--- a/convolution_kernel.hh
+++ b/convolution_kernel.hh
@ -0,0 +1,124 @@
+/*
+ 
+ convolution_kernel.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+#ifndef __CONVOLUTION_KERNELS_HH
+#define __CONVOLUTION_KERNELS_HH
+
+#include <string>
+#include <map>
+
+#include "config_file.hh"
+#include "densities.hh"
+#include "transfer_function.hh"
+
+
+#define ACC_RF(i,j,k) (((((size_t)(i)+nx)%nx)*ny+(((size_t)(j)+ny)%ny))*2*(nz/2+1)+(((size_t)(k)+nz)%nz))
+#define ACC_RC(i,j,k) (((((size_t)(i)+nxc)%nxc)*nyc+(((size_t)(j)+nyc)%nyc))*2*(nzc/2+1)+(((size_t)(k)+nzc)%nzc))
+
+namespace convolution{
+
+	//! encapsulates all parameters required for transfer function convolution
+	struct parameters
+	{
+		int nx,ny,nz;
+		double lx,ly,lz;//,boxlength;
+		config_file *pcf;
+		transfer_function* ptf;
+		unsigned coarse_fact;
+		bool deconvolve;
+		bool is_finest;
+		bool smooth;
+	};
+	
+	
+	/////////////////////////////////////////////////////////////////
+	
+	
+  //! abstract base class for a transfer function convolution kernel
+  class kernel{
+  public:
+    
+    //! all parameters (physical/numerical)
+    parameters cparam_;
+    
+    config_file *pcf_;
+    transfer_function* ptf_;
+    refinement_hierarchy* prefh_;
+    tf_type type_;
+    
+    //! constructor
+    kernel( config_file& cf, transfer_function* ptf, refinement_hierarchy& refh, tf_type type )
+      : pcf_(&cf), ptf_(ptf), prefh_(&refh), type_(type)//cparam_( cp )
+    {	}
+    
+    //! dummy constructor
+    /*kernel( void )
+      {	}*/
+    
+    //! compute/load the kernel
+    virtual kernel* fetch_kernel( int ilevel, bool isolated=false ) = 0;
+    
+    //! virtual destructor
+    virtual ~kernel(){ };
+    
+    //! purely virtual method to obtain a pointer to the underlying data
+    virtual void* get_ptr() = 0;
+
+    //! purely virtual method to determine whether the kernel is k-sampled or not
+    virtual bool is_ksampled() = 0;
+
+    //! purely virtual vectorized method to compute the kernel value if is_ksampled
+    virtual void at_k( size_t len, const double* in_k, double* out_Tk ) = 0;
+    
+    //! free memory
+    virtual void deallocate() = 0;
+  };
+
+	
+	//! abstract factory class to create convolution kernels
+	struct kernel_creator
+	{
+		//! creates a convolution kernel object
+		virtual kernel * create( config_file& cf, transfer_function* ptf, refinement_hierarchy& refh, tf_type type ) const = 0;
+		
+		//! destructor
+		virtual ~kernel_creator() { }
+	};
+	
+	
+	//! access map to the various kernel classes through the factory
+	std::map< std::string, kernel_creator *>& get_kernel_map();
+	
+	
+	//! actual implementation of the factory class for kernel objects
+	template< class Derived >
+	struct kernel_creator_concrete : public kernel_creator
+	{
+		//! constructor inserts the kernel class in the map
+		kernel_creator_concrete( const std::string& kernel_name )
+		{	get_kernel_map()[ kernel_name ] = this; 	}
+		
+		//! creates an instance of the kernel object
+		kernel * create( config_file& cf, transfer_function* ptf, refinement_hierarchy& refh, tf_type type ) const
+		{	return new Derived( cf, ptf, refh, type );	}
+	};
+
+	
+	//! actual implementation of the FFT convolution (independent of the actual kernel)
+	template< typename real_t >
+	void perform( kernel* pk, void *pd, bool shift );
+	
+	
+	
+	
+} //namespace convolution
+
+	
+#endif //__CONVOLUTION_KERNELS_HH
--- a/cosmology.cc
+++ b/cosmology.cc
@ -0,0 +1,632 @@
+/*
+ 
+ cosmology.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include "cosmology.hh"
+#include "mesh.hh"
+#include "mg_operators.hh"
+#include "general.hh"
+
+#define ACC(i,j,k) ((*u.get_grid((ilevel)))((i),(j),(k)))
+#define SQR(x)	((x)*(x))
+
+#if defined(FFTW3) && defined(SINGLE_PRECISION)
+#define fftw_complex fftwf_complex
+#endif
+
+
+void compute_LLA_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order )
+{
+	fnew = u;
+	
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel), h2 = h*h, h2_4 = 0.25*h2;
+		meshvar_bnd *pvar = fnew.get_grid(ilevel);
+		
+		
+		if( order == 2 )
+		{
+			#pragma omp parallel for //reduction(+:sum_corr,sum,sum2)
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2;
+						D[1][1] = (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2;
+						D[2][2] = (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2;
+											
+						D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
+						D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
+						D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
+						
+						D[0][0] += 1.0;
+						D[1][1] += 1.0;
+						D[2][2] += 1.0;
+						
+						double det = D[0][0]*D[1][1]*D[2][2]
+						-	D[0][0]*D[1][2]*D[2][1]
+						-   D[1][0]*D[0][1]*D[2][2]
+						+	D[1][0]*D[0][2]*D[1][2]
+						+	D[2][0]*D[0][1]*D[1][2]
+						-	D[2][0]*D[0][2]*D[1][1];
+						
+						(*pvar)(ix,iy,iz) = 1.0/det-1.0;
+						
+					}
+		}
+		else if ( order == 4 )
+		{
+			#pragma omp parallel for 
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (-ACC(ix-2,iy,iz)+16.*ACC(ix-1,iy,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix+1,iy,iz)-ACC(ix+2,iy,iz)) * h2/12.0;
+						D[1][1] = (-ACC(ix,iy-2,iz)+16.*ACC(ix,iy-1,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy+1,iz)-ACC(ix,iy+2,iz)) * h2/12.0;
+						D[2][2] = (-ACC(ix,iy,iz-2)+16.*ACC(ix,iy,iz-1)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy,iz+1)-ACC(ix,iy,iz+2)) * h2/12.0;
+						
+						D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
+						D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
+						D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
+						
+						
+						D[0][0] += 1.0;
+						D[1][1] += 1.0;
+						D[2][2] += 1.0;
+						
+						double det = D[0][0]*D[1][1]*D[2][2]
+						-	D[0][0]*D[1][2]*D[2][1]
+						-   D[1][0]*D[0][1]*D[2][2]
+						+	D[1][0]*D[0][2]*D[1][2]
+						+	D[2][0]*D[0][1]*D[1][2]
+						-	D[2][0]*D[0][2]*D[1][1];
+						
+						(*pvar)(ix,iy,iz) = 1.0/det-1.0;
+						
+					}
+		}
+		else if ( order == 6 )
+		{
+			h2_4/=36.;
+			h2/=180.;
+			#pragma omp parallel for 
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (2.*ACC(ix-3,iy,iz)-27.*ACC(ix-2,iy,iz)+270.*ACC(ix-1,iy,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix+1,iy,iz)-27.*ACC(ix+2,iy,iz)+2.*ACC(ix+3,iy,iz)) * h2;
+						D[1][1] = (2.*ACC(ix,iy-3,iz)-27.*ACC(ix,iy-2,iz)+270.*ACC(ix,iy-1,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy+1,iz)-27.*ACC(ix,iy+2,iz)+2.*ACC(ix,iy+3,iz)) * h2;
+						D[2][2] = (2.*ACC(ix,iy,iz-3)-27.*ACC(ix,iy,iz-2)+270.*ACC(ix,iy,iz-1)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy,iz+1)-27.*ACC(ix,iy,iz+2)+2.*ACC(ix,iy,iz+3)) * h2;
+						
+						//.. this is actually 8th order accurate
+						D[0][1] = D[1][0] = (64.*(ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))
+											 -8.*(ACC(ix-2,iy-1,iz)-ACC(ix+2,iy-1,iz)-ACC(ix-2,iy+1,iz)+ACC(ix+2,iy+1,iz)
+												+ ACC(ix-1,iy-2,iz)-ACC(ix-1,iy+2,iz)-ACC(ix+1,iy-2,iz)+ACC(ix+1,iy+2,iz))
+											 +1.*(ACC(ix-2,iy-2,iz)-ACC(ix-2,iy+2,iz)-ACC(ix+2,iy-2,iz)+ACC(ix+2,iy+2,iz)))*h2_4;
+						D[0][2] = D[2][0] = (64.*(ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))
+											 -8.*(ACC(ix-2,iy,iz-1)-ACC(ix+2,iy,iz-1)-ACC(ix-2,iy,iz+1)+ACC(ix+2,iy,iz+1)
+												+ ACC(ix-1,iy,iz-2)-ACC(ix-1,iy,iz+2)-ACC(ix+1,iy,iz-2)+ACC(ix+1,iy,iz+2))
+											 +1.*(ACC(ix-2,iy,iz-2)-ACC(ix-2,iy,iz+2)-ACC(ix+2,iy,iz-2)+ACC(ix+2,iy,iz+2)))*h2_4;
+						D[1][2] = D[2][1] = (64.*(ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))
+											 -8.*(ACC(ix,iy-2,iz-1)-ACC(ix,iy+2,iz-1)-ACC(ix,iy-2,iz+1)+ACC(ix,iy+2,iz+1)
+												+ ACC(ix,iy-1,iz-2)-ACC(ix,iy-1,iz+2)-ACC(ix,iy+1,iz-2)+ACC(ix,iy+1,iz+2))
+											 +1.*(ACC(ix,iy-2,iz-2)-ACC(ix,iy-2,iz+2)-ACC(ix,iy+2,iz-2)+ACC(ix,iy+2,iz+2)))*h2_4;
+						
+						D[0][0] += 1.0;
+						D[1][1] += 1.0;
+						D[2][2] += 1.0;
+						
+						double det = D[0][0]*D[1][1]*D[2][2]
+						-	D[0][0]*D[1][2]*D[2][1]
+						-   D[1][0]*D[0][1]*D[2][2]
+						+	D[1][0]*D[0][2]*D[1][2]
+						+	D[2][0]*D[0][1]*D[1][2]
+						-	D[2][0]*D[0][2]*D[1][1];
+						
+						(*pvar)(ix,iy,iz) = 1.0/det-1.0;
+						
+					}
+			
+		}else
+			throw std::runtime_error("compute_LLA_density : invalid operator order specified");
+
+	}
+	
+}
+
+
+void compute_Lu_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order )
+{
+	fnew = u;
+	
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel), h2 = h*h;
+		meshvar_bnd *pvar = fnew.get_grid(ilevel);
+		
+		#pragma omp parallel for
+		for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+			for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+				for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+				{
+					double D[3][3];
+					
+					D[0][0] = 1.0 + (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2;
+					D[1][1] = 1.0 + (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2;
+					D[2][2] = 1.0 + (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2;
+					
+					(*pvar)(ix,iy,iz) = -(D[0][0]+D[1][1]+D[2][2] - 3.0);
+					
+				}
+	}
+	
+}
+
+#pragma mark -
+
+
+void compute_2LPT_source_FFT( config_file& cf_, const grid_hierarchy& u, grid_hierarchy& fnew )
+{
+	if( u.levelmin() != u.levelmax() )
+		throw std::runtime_error("FFT 2LPT can only be run in Unigrid mode!");
+	
+	fnew = u;
+	size_t nx,ny,nz,nzp;
+	nx = u.get_grid(u.levelmax())->size(0);
+	ny = u.get_grid(u.levelmax())->size(1);
+	nz = u.get_grid(u.levelmax())->size(2);
+	nzp = 2*(nz/2+1);
+	
+	//... copy data ..................................................
+	fftw_real *data = new fftw_real[nx*ny*nzp];
+	fftw_complex *cdata = reinterpret_cast<fftw_complex*> (data);
+	
+	fftw_complex	*cdata_11, *cdata_12, *cdata_13, *cdata_22, *cdata_23, *cdata_33;
+	fftw_real		*data_11, *data_12, *data_13, *data_22, *data_23, *data_33;
+	
+	data_11 = new fftw_real[nx*ny*nzp]; cdata_11 = reinterpret_cast<fftw_complex*> (data_11);
+	data_12 = new fftw_real[nx*ny*nzp]; cdata_12 = reinterpret_cast<fftw_complex*> (data_12);
+	data_13 = new fftw_real[nx*ny*nzp]; cdata_13 = reinterpret_cast<fftw_complex*> (data_13);
+	data_22 = new fftw_real[nx*ny*nzp]; cdata_22 = reinterpret_cast<fftw_complex*> (data_22);
+	data_23 = new fftw_real[nx*ny*nzp]; cdata_23 = reinterpret_cast<fftw_complex*> (data_23);
+	data_33 = new fftw_real[nx*ny*nzp]; cdata_33 = reinterpret_cast<fftw_complex*> (data_33);
+	
+	#pragma omp parallel for
+	for( int i=0; i<(int)nx; ++i )
+		for( size_t j=0; j<ny; ++j )	
+			for( size_t k=0; k<nz; ++k )
+			{
+				size_t idx = ((size_t)i*ny+j)*nzp+k;
+				data[idx] = (*u.get_grid(u.levelmax()))(i,j,k);
+			}
+	
+	//... perform FFT and Poisson solve................................
+#ifdef FFTW3
+	
+	#ifdef SINGLE_PRECISION
+	fftwf_plan
+		plan  = fftwf_plan_dft_r2c_3d(nx,ny,nz, data, cdata, FFTW_ESTIMATE),
+		iplan = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata, data, FFTW_ESTIMATE),
+		ip11  = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_11, data_11, FFTW_ESTIMATE),
+		ip12  = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_12, data_12, FFTW_ESTIMATE),
+		ip13  = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_13, data_13, FFTW_ESTIMATE),
+		ip22  = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_22, data_22, FFTW_ESTIMATE),
+		ip23  = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_23, data_23, FFTW_ESTIMATE),
+		ip33  = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_33, data_33, FFTW_ESTIMATE);
+	
+	fftwf_execute(plan);
+	
+	#else
+	
+	fftw_plan
+		plan  = fftw_plan_dft_r2c_3d(nx,ny,nz, data, cdata, FFTW_ESTIMATE),
+		iplan = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata, data, FFTW_ESTIMATE),
+		ip11  = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_11, data_11, FFTW_ESTIMATE),
+		ip12  = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_12, data_12, FFTW_ESTIMATE),
+		ip13  = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_13, data_13, FFTW_ESTIMATE),
+		ip22  = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_22, data_22, FFTW_ESTIMATE),
+		ip23  = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_23, data_23, FFTW_ESTIMATE),
+		ip33  = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_33, data_33, FFTW_ESTIMATE);
+	
+	fftw_execute(plan);
+	
+	#endif
+	
+	double kfac = 2.0*M_PI;
+	double norm = 1.0/((double)(nx*ny*nz));
+	
+	#pragma omp parallel for
+	for( int i=0; i<(int)nx; ++i )
+		for( size_t j=0; j<ny; ++j )	
+			for( size_t l=0; l<nz/2+1; ++l )
+			{
+				int ii = i; if(ii>(int)nx/2) ii-=nx;
+				int jj = (int)j; if(jj>(int)ny/2) jj-=ny;
+				double ki = (double)ii;
+				double kj = (double)jj;
+				double kk = (double)l;
+				
+				double k[3];
+				k[0] = (double)ki * kfac;
+				k[1] = (double)kj * kfac;
+				k[2] = (double)kk * kfac;
+				
+				size_t idx = ((size_t)i*ny+j)*nzp/2+l;
+				//double re = cdata[idx][0];
+				//double im = cdata[idx][1];
+				
+				cdata_11[idx][0] = -k[0]*k[0] * cdata[idx][0] * norm;
+				cdata_11[idx][1] = -k[0]*k[0] * cdata[idx][1] * norm;
+				
+				cdata_12[idx][0] = -k[0]*k[1] * cdata[idx][0] * norm;
+				cdata_12[idx][1] = -k[0]*k[1] * cdata[idx][1] * norm;
+				
+				cdata_13[idx][0] = -k[0]*k[2] * cdata[idx][0] * norm;
+				cdata_13[idx][1] = -k[0]*k[2] * cdata[idx][1] * norm;
+				
+				cdata_22[idx][0] = -k[1]*k[1] * cdata[idx][0] * norm;
+				cdata_22[idx][1] = -k[1]*k[1] * cdata[idx][1] * norm;
+				
+				cdata_23[idx][0] = -k[1]*k[2] * cdata[idx][0] * norm;
+				cdata_23[idx][1] = -k[1]*k[2] * cdata[idx][1] * norm;
+				
+				cdata_33[idx][0] = -k[2]*k[2] * cdata[idx][0] * norm;
+				cdata_33[idx][1] = -k[2]*k[2] * cdata[idx][1] * norm;
+				
+				
+				if( i==(int)nx/2||j==ny/2||l==nz/2)
+				{
+					cdata_11[idx][0] = 0.0;
+					cdata_11[idx][1] = 0.0;
+					
+					cdata_12[idx][0] = 0.0;
+					cdata_12[idx][1] = 0.0;
+					
+					cdata_13[idx][0] = 0.0;
+					cdata_13[idx][1] = 0.0;
+					
+					cdata_22[idx][0] = 0.0;
+					cdata_22[idx][1] = 0.0;
+					
+					cdata_23[idx][0] = 0.0;
+					cdata_23[idx][1] = 0.0;
+					
+					cdata_33[idx][0] = 0.0;
+					cdata_33[idx][1] = 0.0;
+				}
+				
+			}
+	
+	delete[] data;
+	/*cdata_11[0][0]	= 0.0; cdata_11[0][1]	= 0.0;
+	 cdata_12[0][0]	= 0.0; cdata_12[0][1]	= 0.0;
+	 cdata_13[0][0]	= 0.0; cdata_13[0][1]	= 0.0;
+	 cdata_22[0][0]	= 0.0; cdata_22[0][1]	= 0.0;
+	 cdata_23[0][0]	= 0.0; cdata_23[0][1]	= 0.0;
+	 cdata_33[0][0]	= 0.0; cdata_33[0][1]	= 0.0;*/
+	
+	
+#ifdef SINGLE_PRECISION
+	fftwf_execute(ip11);
+	fftwf_execute(ip12);
+	fftwf_execute(ip13);
+	fftwf_execute(ip22);
+	fftwf_execute(ip23);
+	fftwf_execute(ip33);
+	
+	fftwf_destroy_plan(plan);
+	fftwf_destroy_plan(iplan);
+	fftwf_destroy_plan(ip11);
+	fftwf_destroy_plan(ip12);
+	fftwf_destroy_plan(ip13);
+	fftwf_destroy_plan(ip22);
+	fftwf_destroy_plan(ip23);
+	fftwf_destroy_plan(ip33);
+#else
+	fftw_execute(ip11);
+	fftw_execute(ip12);
+	fftw_execute(ip13);
+	fftw_execute(ip22);
+	fftw_execute(ip23);
+	fftw_execute(ip33);
+	
+	fftw_destroy_plan(plan);
+	fftw_destroy_plan(iplan);
+	fftw_destroy_plan(ip11);
+	fftw_destroy_plan(ip12);
+	fftw_destroy_plan(ip13);
+	fftw_destroy_plan(ip22);
+	fftw_destroy_plan(ip23);
+	fftw_destroy_plan(ip33);
+
+#endif
+//#endif
+	
+	
+#else
+	rfftwnd_plan 
+		plan = rfftw3d_create_plan( nx,ny,nz,
+								   FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+		iplan = rfftw3d_create_plan( nx,ny,nz,
+									FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+	
+	
+	#ifndef SINGLETHREAD_FFTW		
+	rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL );
+	#else
+	rfftwnd_one_real_to_complex( plan, data, NULL );
+	#endif
+//#endif
+	//double fac = -1.0/(nx*ny*nz);
+	double kfac = 2.0*M_PI;
+	double norm = 1.0/((double)(nx*ny*nz));
+	
+	#pragma omp parallel for
+	for( int i=0; i<(int)nx; ++i )
+		for( size_t j=0; j<ny; ++j )	
+			for( size_t l=0; l<nz/2+1; ++l )
+			{
+				int ii = (int)i; if(ii>(int)(nx/2)) ii-=(int)nx;
+				int jj = (int)j; if(jj>(int)(ny/2)) jj-=(int)ny;
+				double ki = (double)ii;
+				double kj = (double)jj;
+				double kk = (double)l;
+				
+				double k[3];
+				k[0] = (double)ki * kfac;
+				k[1] = (double)kj * kfac;
+				k[2] = (double)kk * kfac;
+				
+				size_t idx = ((size_t)i*ny+j)*nzp/2+l;
+				//double re = cdata[idx].re;
+				//double im = cdata[idx].im;
+				
+				cdata_11[idx].re = -k[0]*k[0] * cdata[idx].re * norm;
+				cdata_11[idx].im = -k[0]*k[0] * cdata[idx].im * norm;
+				
+				cdata_12[idx].re = -k[0]*k[1] * cdata[idx].re * norm;
+				cdata_12[idx].im = -k[0]*k[1] * cdata[idx].im * norm;
+				
+				cdata_13[idx].re = -k[0]*k[2] * cdata[idx].re * norm;
+				cdata_13[idx].im = -k[0]*k[2] * cdata[idx].im * norm;
+				
+				cdata_22[idx].re = -k[1]*k[1] * cdata[idx].re * norm;
+				cdata_22[idx].im = -k[1]*k[1] * cdata[idx].im * norm;
+				
+				cdata_23[idx].re = -k[1]*k[2] * cdata[idx].re * norm;
+				cdata_23[idx].im = -k[1]*k[2] * cdata[idx].im * norm;
+				
+				cdata_33[idx].re = -k[2]*k[2] * cdata[idx].re * norm;
+				cdata_33[idx].im = -k[2]*k[2] * cdata[idx].im * norm;
+				
+				
+				if( i==(int)(nx/2)||j==ny/2||l==nz/2)
+				{
+					cdata_11[idx].re = 0.0;
+					cdata_11[idx].im = 0.0;
+					
+					cdata_12[idx].re = 0.0;
+					cdata_12[idx].im = 0.0;
+					
+					cdata_13[idx].re = 0.0;
+					cdata_13[idx].im = 0.0;
+					
+					cdata_22[idx].re = 0.0;
+					cdata_22[idx].im = 0.0;
+					
+					cdata_23[idx].re = 0.0;
+					cdata_23[idx].im = 0.0;
+					
+					cdata_33[idx].re = 0.0;
+					cdata_33[idx].im = 0.0;
+				}
+				
+			}
+	
+	delete[] data;
+	/*cdata_11[0].re	= 0.0; cdata_11[0].im	= 0.0;
+	cdata_12[0].re	= 0.0; cdata_12[0].im	= 0.0;
+	cdata_13[0].re	= 0.0; cdata_13[0].im	= 0.0;
+	cdata_22[0].re	= 0.0; cdata_22[0].im	= 0.0;
+	cdata_23[0].re	= 0.0; cdata_23[0].im	= 0.0;
+	cdata_33[0].re	= 0.0; cdata_33[0].im	= 0.0;*/
+	
+	
+#ifndef SINGLETHREAD_FFTW		
+	//rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL );
+	rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_11, NULL );
+	rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_12, NULL );
+	rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_13, NULL );
+	rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_22, NULL );
+	rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_23, NULL );
+	rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_33, NULL );
+#else
+	//rfftwnd_one_complex_to_real( iplan, cdata, NULL );
+	rfftwnd_one_complex_to_real(iplan, cdata_11, NULL );
+	rfftwnd_one_complex_to_real(iplan, cdata_12, NULL );
+	rfftwnd_one_complex_to_real(iplan, cdata_13, NULL );
+	rfftwnd_one_complex_to_real(iplan, cdata_22, NULL );
+	rfftwnd_one_complex_to_real(iplan, cdata_23, NULL );
+	rfftwnd_one_complex_to_real(iplan, cdata_33, NULL );
+#endif
+	
+	
+	
+	rfftwnd_destroy_plan(plan);
+	rfftwnd_destroy_plan(iplan);
+#endif
+
+
+	//... copy data ..........................................
+	#pragma omp parallel for
+	for( int i=0; i<(int)nx; ++i )
+		for( size_t j=0; j<ny; ++j )	
+			for( size_t k=0; k<nz; ++k )
+			{
+				size_t ii = ((size_t)i*ny+j)*nzp+k;
+				(*fnew.get_grid(u.levelmax()))(i,j,k) = (( data_11[ii]*data_22[ii]-data_12[ii]*data_12[ii] ) +
+														 ( data_11[ii]*data_33[ii]-data_13[ii]*data_13[ii] ) +
+														 ( data_22[ii]*data_33[ii]-data_23[ii]*data_23[ii] ) );
+				
+				//(*fnew.get_grid(u.levelmax()))(i,j,k) = 
+				
+			}
+	
+	//delete[] data;
+	delete[] data_11;
+	delete[] data_12;
+	delete[] data_13;
+	delete[] data_23;
+	delete[] data_22;
+	delete[] data_33;
+}
+
+
+void compute_2LPT_source( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order )
+{
+	fnew = u;
+	
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel), h2 = h*h, h2_4 = 0.25*h2;
+		meshvar_bnd *pvar = fnew.get_grid(ilevel);
+		
+		if ( order == 2 )
+		{
+			
+			#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2;
+						D[1][1] = (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2;
+						D[2][2] = (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2;
+						D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
+						D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
+						D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
+						
+						
+						(*pvar)(ix,iy,iz) =  ( D[0][0]*D[1][1] - SQR( D[0][1] )
+											+ D[0][0]*D[2][2] - SQR( D[0][2] )
+											+ D[1][1]*D[2][2] - SQR( D[1][2] ));
+						
+					}
+		}
+		else if ( order == 4 )
+		{
+			#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (-ACC(ix-2,iy,iz)+16.*ACC(ix-1,iy,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix+1,iy,iz)-ACC(ix+2,iy,iz)) * h2/12.0;
+						D[1][1] = (-ACC(ix,iy-2,iz)+16.*ACC(ix,iy-1,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy+1,iz)-ACC(ix,iy+2,iz)) * h2/12.0;
+						D[2][2] = (-ACC(ix,iy,iz-2)+16.*ACC(ix,iy,iz-1)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy,iz+1)-ACC(ix,iy,iz+2)) * h2/12.0;
+						D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
+						D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
+						D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
+						
+						
+						(*pvar)(ix,iy,iz) =  ( D[0][0]*D[1][1] - SQR( D[0][1] )
+						+ D[0][0]*D[2][2] - SQR( D[0][2] )
+						+ D[1][1]*D[2][2] - SQR( D[1][2] ));
+						
+					}
+		}
+		else if ( order == 6 )
+		{
+			h2_4/=36.;
+			h2/=180.;
+#pragma omp parallel for 
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (2.*ACC(ix-3,iy,iz)-27.*ACC(ix-2,iy,iz)+270.*ACC(ix-1,iy,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix+1,iy,iz)-27.*ACC(ix+2,iy,iz)+2.*ACC(ix+3,iy,iz)) * h2;
+						D[1][1] = (2.*ACC(ix,iy-3,iz)-27.*ACC(ix,iy-2,iz)+270.*ACC(ix,iy-1,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy+1,iz)-27.*ACC(ix,iy+2,iz)+2.*ACC(ix,iy+3,iz)) * h2;
+						D[2][2] = (2.*ACC(ix,iy,iz-3)-27.*ACC(ix,iy,iz-2)+270.*ACC(ix,iy,iz-1)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy,iz+1)-27.*ACC(ix,iy,iz+2)+2.*ACC(ix,iy,iz+3)) * h2;
+						
+						//.. this is actually 8th order accurate
+						D[0][1] = D[1][0] = (64.*(ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))
+											 -8.*(ACC(ix-2,iy-1,iz)-ACC(ix+2,iy-1,iz)-ACC(ix-2,iy+1,iz)+ACC(ix+2,iy+1,iz)
+												  + ACC(ix-1,iy-2,iz)-ACC(ix-1,iy+2,iz)-ACC(ix+1,iy-2,iz)+ACC(ix+1,iy+2,iz))
+											 +1.*(ACC(ix-2,iy-2,iz)-ACC(ix-2,iy+2,iz)-ACC(ix+2,iy-2,iz)+ACC(ix+2,iy+2,iz)))*h2_4;
+						D[0][2] = D[2][0] = (64.*(ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))
+											 -8.*(ACC(ix-2,iy,iz-1)-ACC(ix+2,iy,iz-1)-ACC(ix-2,iy,iz+1)+ACC(ix+2,iy,iz+1)
+												  + ACC(ix-1,iy,iz-2)-ACC(ix-1,iy,iz+2)-ACC(ix+1,iy,iz-2)+ACC(ix+1,iy,iz+2))
+											 +1.*(ACC(ix-2,iy,iz-2)-ACC(ix-2,iy,iz+2)-ACC(ix+2,iy,iz-2)+ACC(ix+2,iy,iz+2)))*h2_4;
+						D[1][2] = D[2][1] = (64.*(ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))
+											 -8.*(ACC(ix,iy-2,iz-1)-ACC(ix,iy+2,iz-1)-ACC(ix,iy-2,iz+1)+ACC(ix,iy+2,iz+1)
+												  + ACC(ix,iy-1,iz-2)-ACC(ix,iy-1,iz+2)-ACC(ix,iy+1,iz-2)+ACC(ix,iy+1,iz+2))
+											 +1.*(ACC(ix,iy-2,iz-2)-ACC(ix,iy-2,iz+2)-ACC(ix,iy+2,iz-2)+ACC(ix,iy+2,iz+2)))*h2_4;
+						
+						(*pvar)(ix,iy,iz) =  ( D[0][0]*D[1][1] - SQR( D[0][1] )
+											+ D[0][0]*D[2][2] - SQR( D[0][2] )
+											+ D[1][1]*D[2][2] - SQR( D[1][2] ) );
+						
+					}
+			
+			
+		}
+		else
+			throw std::runtime_error("compute_2LPT_source : invalid operator order specified");
+
+
+	}
+	
+	
+	//.. subtract global mean so the multi-grid poisson solver behaves well
+	
+	for( int i=fnew.levelmax(); i>(int)fnew.levelmin(); --i )
+		mg_straight().restrict( (*fnew.get_grid(i)), (*fnew.get_grid(i-1)) );
+	
+	long double sum = 0.0;
+	int nx,ny,nz;
+	
+	nx = fnew.get_grid(fnew.levelmin())->size(0);
+	ny = fnew.get_grid(fnew.levelmin())->size(1);
+	nz = fnew.get_grid(fnew.levelmin())->size(2);
+	
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				sum += (*fnew.get_grid(fnew.levelmin()))(ix,iy,iz);
+	
+	sum /= (double)((size_t)nx*(size_t)ny*(size_t)nz);
+	
+	for( unsigned i=fnew.levelmin(); i<=fnew.levelmax(); ++i )
+	{		
+		nx = fnew.get_grid(i)->size(0);
+		ny = fnew.get_grid(i)->size(1);
+		nz = fnew.get_grid(i)->size(2);
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					(*fnew.get_grid(i))(ix,iy,iz) -= sum;
+	}
+	
+}
+#undef SQR
+#undef ACC
+
--- a/cosmology.hh
+++ b/cosmology.hh
@ -0,0 +1,309 @@
+/*
+ 
+ cosmology.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#ifndef _COSMOLOGY_HH
+#define _COSMOLOGY_HH
+
+
+#include "transfer_function.hh"
+#include "mesh.hh"
+#include "general.hh"
+
+/*!
+ * @class CosmoCalc
+ * @brief provides functions to compute cosmological quantities
+ *
+ * This class provides member functions to compute cosmological quantities
+ * related to the Friedmann equations and linear perturbation theory
+ */
+class CosmoCalc
+{
+public:
+	//! data structure to store cosmological parameters
+	Cosmology m_Cosmology;
+	
+	//! pointer to an instance of a transfer function plugin
+	transfer_function_plugin *m_pTransferFunction;
+	
+	
+	//! constructor for a cosmology calculator object
+	/*!
+	 * @param acosmo a cosmological parameters structure
+	 * @param pTransferFunction pointer to an instance of a transfer function object
+	 */
+	 
+	CosmoCalc( const Cosmology acosmo, transfer_function_plugin *pTransferFunction )
+	{
+		m_Cosmology = acosmo;
+		m_pTransferFunction = pTransferFunction;
+	}
+	
+	//! returns the amplitude of amplitude of the power spectrum
+	/*!
+	 * @param k the wave number in h/Mpc
+	 * @param a the expansion factor of the universe
+	 * @returns power spectrum amplitude for wave number k at time a
+	 */
+	inline real_t Power( real_t k, real_t a ){
+		real_t m_Dplus    = CalcGrowthFactor( a );
+		real_t m_DplusOne = CalcGrowthFactor( 1.0 );
+		real_t m_pNorm = ComputePNorm( 1e4 );
+		m_Dplus    /= m_DplusOne;
+		m_DplusOne = 1.0;
+		real_t scale = m_Dplus/m_DplusOne;
+		return m_pNorm*scale*scale*TransferSq(k)*pow((double)k,(double)m_Cosmology.nspect);
+	}
+	
+	//! integrand function for Calc_fPeebles
+	/*!
+	 * @sa Calc_fPeebles
+	 */
+	inline static double fy( double a, void *Params )
+	{
+		Cosmology *cosm = (Cosmology*)Params;
+		double y = cosm->Omega_m*(1.0/a-1.0) + cosm->Omega_L*(a*a-1.0) + 1.0;
+		return 1.0/pow(y,1.5);
+	}
+	
+	//! calculates d log D+/d log a
+	/*! this version follows the Peebles (TBD: add citation)
+	 *  formula to compute Bertschinger's vfact
+	 */
+	inline real_t Calc_fPeebles( real_t a )
+	{
+		real_t y = m_Cosmology.Omega_m*(1.0/a-1.0) + m_Cosmology.Omega_L*(a*a-1.0) + 1.0;
+		real_t fact = integrate( &fy, 1e-6, a, (void*)&m_Cosmology );
+		return (m_Cosmology.Omega_L*a*a-0.5*m_Cosmology.Omega_m/a)/y - 1.0 + a*fy(a,(void*)&m_Cosmology)/fact;
+	}
+	
+	//! Computes the linear theory growth factor D+
+	/*! Function integrates over member function GrowthIntegrand */
+	inline real_t CalcGrowthFactor( real_t a )
+	{ 
+		real_t eta =  sqrt((double)(m_Cosmology.Omega_r/a/a+m_Cosmology.Omega_m/a+m_Cosmology.Omega_L*a*a
+								  +1.0-m_Cosmology.Omega_m-m_Cosmology.Omega_L));
+		
+		real_t integral = integrate( &GrowthIntegrand, 0.0, a, (void*)&m_Cosmology );
+		return eta/a*integral;
+	}
+    
+    inline static double InvHubble( double a, void *Params )
+    {
+        Cosmology *cosm = (Cosmology*)Params;
+		double eta = 1.0/(100.0*sqrt((double)(cosm->Omega_m/(a*a*a)+cosm->Omega_L
+								   +(1.0-cosm->Omega_m-cosm->Omega_L)/(a*a))));
+        return eta;
+    }
+    
+    inline real_t CalcTFac( real_t a0, real_t a1 )
+    {
+        real_t fact = integrate( &InvHubble, a0, a1, (void*)&m_Cosmology );
+        return 1.0/fact;
+    }
+    
+	//! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+
+	inline static double GrowthIntegrand( double a, void *Params )
+	{
+		Cosmology *cosm = (Cosmology*)Params;
+		double eta = sqrt((double)(cosm->Omega_r/a/a+cosm->Omega_m/a+cosm->Omega_L*a*a
+								   +1.0-cosm->Omega_m-cosm->Omega_L));
+		return 2.5/(eta*eta*eta);
+	}
+    
+    //! Computes the linear theory growth factor D+
+	/*! Function integrates over member function GrowthIntegrand */
+	inline real_t CalcGrowthFactor_Matter( real_t a )
+	{ 
+		real_t eta =  sqrt((double)(m_Cosmology.Omega_m/a+m_Cosmology.Omega_L*a*a
+                                    +1.0-m_Cosmology.Omega_m-m_Cosmology.Omega_L));
+		
+		real_t integral = integrate( &GrowthIntegrand_Matter, 0.0, a, (void*)&m_Cosmology );
+		return eta/a*integral;
+	}
+    
+	//! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+
+	inline static double GrowthIntegrand_Matter( double a, void *Params )
+	{
+		Cosmology *cosm = (Cosmology*)Params;
+		double eta = sqrt((double)(cosm->Omega_m/a+cosm->Omega_L*a*a
+								   +1.0-cosm->Omega_m-cosm->Omega_L));
+		return 2.5/(eta*eta*eta);
+	}
+	
+    real_t ComputeVFactnew( real_t a ){
+     return Calc_fPeebles(a)*sqrt(m_Cosmology.Omega_m/a+m_Cosmology.Omega_L*a*a+1.0-m_Cosmology.Omega_m-m_Cosmology.Omega_L)*100.0;
+    }
+     
+     real_t ComputedDdt( real_t a )
+     {
+     return Calc_fPeebles(a);
+     }
+    
+	//! Compute the factor relating particle displacement and velocity
+	real_t ComputeVFact( real_t a ){
+		real_t fomega, dlogadt, eta;
+		real_t Omega_k = 1.0 - m_Cosmology.Omega_m - m_Cosmology.Omega_L;
+		
+		real_t Dplus = CalcGrowthFactor( a );
+		
+		eta     = sqrt( (double)(m_Cosmology.Omega_r/a/a+m_Cosmology.Omega_m/a+ m_Cosmology.Omega_L*a*a + Omega_k ));
+		fomega  = (2.5/Dplus-1.5*m_Cosmology.Omega_m/a-Omega_k)/eta/eta;
+		dlogadt = a*eta;
+		
+		//... /100.0 since we would have to multiply by H0 to convert
+		//... the displacement to velocity units. But displacement is
+		//... in Mpc/h, and H0 in units of h is 100.
+        
+        //std::cerr << "vfact1 = " << fomega * dlogadt/a *100.0 << "\n";
+        //std::cerr << "vfact2 = " << ComputeVFactnew(a) << "\n";
+        
+		return fomega * dlogadt/a *100.0;
+	}
+	
+	
+	
+	
+	//! Integrand for the sigma_8 normalization of the power spectrum
+	/*! Returns the value of the primordial power spectrum multiplied with 
+	 the transfer function and the window function of 8 Mpc/h at wave number k */
+	static double dSigma8( double k, void *Params )
+	{
+		if( k<=0.0 )
+			return 0.0f;
+		
+		transfer_function *ptf = (transfer_function *)Params;
+		
+		double x = k*8.0;
+		double w = 3.0*(sin(x)-x*cos(x))/(x*x*x);
+		static double nspect = (double)ptf->cosmo_.nspect;
+		
+		double tf = ptf->compute(k, total);
+		
+		//... no growth factor since we compute at z=0 and normalize so that D+(z=0)=1
+		return k*k * w*w * pow((double)k,(double)nspect) * tf*tf;
+		
+	}
+    
+    //! Integrand for the sigma_8 normalization of the power spectrum
+	/*! Returns the value of the primordial power spectrum multiplied with 
+	 the transfer function and the window function of 8 Mpc/h at wave number k */
+	static double dSigma8_0( double k, void *Params )
+	{
+		if( k<=0.0 )
+			return 0.0f;
+		
+		transfer_function *ptf = (transfer_function *)Params;
+		
+		double x = k*8.0;
+		double w = 3.0*(sin(x)-x*cos(x))/(x*x*x);
+		static double nspect = (double)ptf->cosmo_.nspect;
+		
+		double tf = ptf->compute(k, total0);
+		
+		//... no growth factor since we compute at z=0 and normalize so that D+(z=0)=1
+		return k*k * w*w * pow((double)k,(double)nspect) * tf*tf;
+		
+	}
+	
+	
+	//! Computes the square of the transfer function
+	/*! Function evaluates the supplied transfer function m_pTransferFunction
+	 * and returns the square of its value at wave number k
+	 * @param k wave number at which to evaluate the transfer function
+	 */
+	inline real_t TransferSq( real_t k ){
+		//.. parameter supplied transfer function
+		real_t tf1 = m_pTransferFunction->compute(k, total);
+		return tf1*tf1;
+	}
+	
+	
+	//! Computes the normalization for the power spectrum
+	/*!
+	 * integrates the power spectrum to fix the normalization to that given
+	 * by the sigma_8 parameter
+	 */
+	real_t ComputePNorm( real_t kmax )
+	{
+		real_t sigma0, kmin;
+		kmax = m_pTransferFunction->get_kmax();//m_Cosmology.H0/8.0;
+		kmin = m_pTransferFunction->get_kmin();//0.0;
+        
+        if( !m_pTransferFunction->tf_has_total0() )
+            sigma0 = 4.0 * M_PI * integrate( &dSigma8, (double)kmin, (double)kmax, (void*)m_pTransferFunction );
+		else
+            sigma0 = 4.0 * M_PI * integrate( &dSigma8_0, (double)kmin, (double)kmax, (void*)m_pTransferFunction );
+		
+        return m_Cosmology.sigma8*m_Cosmology.sigma8/sigma0;
+	}
+    
+    
+	
+    
+	//! integrand function for comoving line element
+	inline static double dline( double a, void *Params )
+	{
+		Cosmology *cosm = (Cosmology*)Params;
+		double y = (cosm->Omega_m + cosm->Omega_L*a*a*a)*a;
+		
+		return 1./sqrt(y);
+	}
+	
+	//! compute necessary initial velocity kick to prevent motion of zoom region 
+	real_t ComputeVelocityCompensation( double astart, double afinal )
+	{
+		double s = integrate( &dline, astart, afinal, (void*)&m_Cosmology );
+		//std::cerr << "s      = " << s << std::endl;
+		//std::cerr << "D(z_f) = " << CalcGrowthFactor(afinal) << std::endl;
+		//std::cerr << "D(z_i) = " << CalcGrowthFactor(astart) << std::endl;
+		return m_Cosmology.H0 * CalcGrowthFactor(afinal)/CalcGrowthFactor(astart)/s;
+	}
+	
+	real_t ComputeVelocityCompensation_2LPT( double astart, double afinal )
+	{
+		double s = integrate( &dline, astart, afinal, (void*)&m_Cosmology );
+		//std::cerr << "s      = " << s << std::endl;
+		//std::cerr << "D(z_f) = " << CalcGrowthFactor(afinal) << std::endl;
+		//std::cerr << "D(z_i) = " << CalcGrowthFactor(astart) << std::endl;
+		return m_Cosmology.H0 * pow(CalcGrowthFactor(afinal)/CalcGrowthFactor(astart),2.0)/s;
+	}
+	
+
+	
+};
+
+
+//! compute the jeans sound speed
+/*! given a density in g/cm^-3 and a mass in g it gives back the sound
+ *  speed in cm/s for which the input mass is equal to the jeans mass
+ *  @param rho density 
+ *  @param mass mass scale
+ *  @returns jeans sound speed
+ */
+inline double jeans_sound_speed( double rho, double mass )
+{
+	const double G = 6.67e-8;
+	return pow( 6.0*mass/M_PI*sqrt(rho)*pow(G,1.5), 1.0/3.0 );
+}
+
+//! computes the density from the potential using the Laplacian
+void compute_Lu_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order=4 );
+
+//! computes the 2nd order density perturbations using also off-diagonal terms in the potential Hessian 
+void compute_LLA_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order=4 );
+
+//! computes the source term for the 2nd order perturbations in the displacements
+void compute_2LPT_source( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order=4 );
+
+void compute_2LPT_source_FFT( config_file& cf_, const grid_hierarchy& u, grid_hierarchy& fnew );
+
+
+#endif // _COSMOLOGY_HH
+
--- a/defaults.cc
+++ b/defaults.cc
@ -0,0 +1,53 @@
+/*
+ 
+ defaults.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include "defaults.hh"
+
+#define ADD_DEF(x,a,b,c) def.insert(std::pair<std::string,default_conf>(x,default_conf(a,b,c)));
+
+default_options defaults;
+
+default_options::default_options()
+{
+	//... [setup] ...
+	ADD_DEF("align_top",		"setup",	"align_top",		"yes");
+	ADD_DEF("baryons",			"setup",	"baryons",			"no");
+	ADD_DEF("center_v",			"setup",	"center_velocities","no");
+	ADD_DEF("deconvolve",		"setup",	"deconvolve",		"yes");
+	ADD_DEF("exact_shotnoise",	"setup",	"exact_shotnoise",	"yes");
+	ADD_DEF("overlap",			"setup",	"overlap",			"8");
+	ADD_DEF("padding",			"setup",	"padding",			"16");
+	ADD_DEF("periodic_TF",		"setup",	"periodic_TF",		"yes");
+	ADD_DEF("use_2LPT",			"setup",	"use_2LPT",			"yes");
+	ADD_DEF("use_LLA",			"setup",	"use_LLA",			"no");
+
+	//... [poisson] ...
+	ADD_DEF("mgacc",			"poisson",	"accuracy",			"1e-4");
+	ADD_DEF("mggrad",			"poisson",	"grad_order",		"6");
+	ADD_DEF("mglapl",			"poisson",	"laplce_order",		"6");
+	ADD_DEF("fft_fine",			"poisson",	"fft_fine",			"yes");
+	ADD_DEF("kspace_poisson",	"poisson",	"kspace",			"no");
+	
+	
+	//... deprecated
+	ADD_DEF("avg_fine",			"setup",	"avg_fine",			"no");
+}
+
+
+
+
+#undef ADD_DEF
+
+
+
+
+
+
+
--- a/defaults.hh
+++ b/defaults.hh
@ -0,0 +1,44 @@
+/*
+ 
+ defaults.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+
+#ifndef __DEFAULTS_HH
+#define __DEFAULTS_HH
+
+#include <string>
+#include <map>
+
+struct default_conf{
+	std::string sec;
+	std::string tag;
+	std::string val;
+	default_conf( std::string sec_, std::string tag_, std::string val_ )
+	: sec(sec_), tag(tag_), val(val_)
+	{ }
+};
+
+
+class default_options{
+protected:
+	std::map<std::string,default_conf> def;
+public:
+	default_options();
+	
+	template<typename T> 
+	void query( std::string tag )
+	{}
+	
+};
+
+extern default_options defaults;
+
+
+#endif //__DEFAULTS_HH
+
--- a/densities.cc
+++ b/densities.cc
@ -0,0 +1,818 @@
+/*
+ 
+ densities.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include "densities.hh"
+#include "convolution_kernel.hh"
+
+
+//TODO: this should be a larger number by default, just to maintain consistency with old default
+#define DEF_RAN_CUBE_SIZE	32
+
+
+template< typename m1, typename m2 >
+void fft_interpolate( m1& V, m2& v, bool fourier_splice = false, bool ispadded=false, bool bothpadded=false ) 
+{
+  //int oxc = V.offset(0), oyc = V.offset(1), ozc = V.offset(2);
+  int oxf = v.offset(0), oyf = v.offset(1), ozf = v.offset(2);
+  size_t nxf = v.size(0), nyf = v.size(1), nzf = v.size(2), nzfp = nzf+2;
+  
+  fourier_splice = false;
+
+  if( ispadded )
+    {
+      oxf -= nxf/8;
+      oyf -= nyf/8;
+      ozf -= nzf/8;
+    }
+  else if( bothpadded )
+    {
+      oxf *= 2;
+      oyf *= 2;
+      ozf *= 2;
+    }
+  
+  LOGINFO("FFT interpolate: offset=%d,%d,%d size=%d,%d,%d",oxf,oyf,ozf,nxf,nyf,nzf);
+
+  // cut out piece of coarse grid that overlaps the fine:
+  assert( nxf%2==0 && nyf%2==0 && nzf%2==0 );
+  
+  size_t nxc = nxf/2, nyc = nyf/2, nzc = nzf/2, nzcp = nzf/2+2;
+  
+  fftw_real *rcoarse = new fftw_real[ nxc * nyc * nzcp ];
+  fftw_complex *ccoarse = reinterpret_cast<fftw_complex*> (rcoarse);
+  
+  fftw_real *rfine = new fftw_real[ nxf * nyf * nzfp];
+  fftw_complex *cfine = reinterpret_cast<fftw_complex*> (rfine);
+  
+  #pragma omp parallel for
+  for( int i=0; i<(int)nxc; ++i )
+    for( int j=0; j<(int)nyc; ++j )
+      for( int k=0; k<(int)nzc; ++k ) 
+	{
+	  size_t q = ((size_t)i*nyc+(size_t)j)*nzcp+(size_t)k;
+	  rcoarse[q] = V( oxf+i, oyf+j, ozf+k );
+	}
+
+    if( fourier_splice )
+      {
+	#pragma omp parallel for
+	for( int i=0; i<(int)nxf; ++i )
+	  for( int j=0; j<(int)nyf; ++j )
+	    for( int k=0; k<(int)nzf; ++k ) 
+	      {
+		size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k;
+		rfine[q] = v(i,j,k);
+	      }
+      }
+    else
+      {
+	#pragma omp parallel for
+	for( int i=0; i<(int)nxf; ++i )
+	  for( int j=0; j<(int)nyf; ++j )
+	    for( int k=0; k<(int)nzf; ++k ) 
+	      {
+		size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k;
+		rfine[q] = 0.0;
+	      }
+      }
+
+#ifdef FFTW3
+#ifdef SINGLE_PRECISION
+    fftwf_plan
+      pc  = fftwf_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
+      pf  = fftwf_plan_dft_r2c_3d( nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+      ipf = fftwf_plan_dft_c2r_3d( nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
+    fftwf_execute( pc );
+    if( fourier_splice )
+      fftwf_execute( pf );
+#else
+    fftw_plan
+      pc  = fftw_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
+      pf  = fftw_plan_dft_r2c_3d( nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+      ipf = fftw_plan_dft_c2r_3d( nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
+    fftw_execute( pc );
+    if( fourier_splice )
+      fftw_execute( pf );
+#endif
+#else
+    rfftwnd_plan 
+      pc  = rfftw3d_create_plan( nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+      pf  = rfftw3d_create_plan( nxf, nyf, nzf, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+      ipf = rfftw3d_create_plan( nxf, nyf, nzf, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+    
+#ifndef SINGLETHREAD_FFTW		
+    rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pc, rcoarse, NULL );
+    if( fourier_splice )
+      rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pf, rfine, NULL );
+#else
+    rfftwnd_one_real_to_complex( pc, rcoarse, NULL );
+    if( fourier_splice )
+      rfftwnd_one_real_to_complex( pf, rfine, NULL );
+#endif
+#endif
+
+    /*************************************************/
+    //.. perform actual interpolation
+    double fftnorm = 1.0/((double)nxf*(double)nyf*(double)nzf);
+    double sqrt8 = 8.0;//sqrt(8.0);
+    double phasefac = -0.125;
+
+    if( ispadded ) phasefac *= 1.5;
+    //if( bothpadded ) phasefac = -0.125;
+    
+     // 0 0
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc/2+1; i++ )
+      for( int j=0; j<(int)nyc/2+1; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i),jj(j),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    double kx = (i <= nxc/2)? (double)i : (double)(i-nxc);
+	    double ky = (j <= nyc/2)? (double)j : (double)(j-nyc);
+	    double kz = (k <= nzc/2)? (double)k : (double)(k-nzc);
+	    
+	    double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+	    std::complex<double> val_phas( cos(phase), sin(phase) );
+	    
+	    std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+	    val *= sqrt8 * val_phas;
+            
+	    RE(cfine[qf]) = val.real();
+	    IM(cfine[qf]) = val.imag();
+	  }
+
+    // 1 0
+    #pragma omp parallel for
+    for( int i=nxc/2; i<(int)nxc; i++ )
+      for( int j=0; j<(int)nyc/2+1; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i+nxf/2),jj(j),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    double kx = (i <= nxc/2)? (double)i : (double)(i-nxc);
+	    double ky = (j <= nyc/2)? (double)j : (double)(j-nyc);
+	    double kz = (k <= nzc/2)? (double)k : (double)(k-nzc);
+	    
+	    double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+	    std::complex<double> val_phas( cos(phase), sin(phase) );
+	    
+	    std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+	    val *= sqrt8 * val_phas;
+            
+	    RE(cfine[qf]) = val.real();
+	    IM(cfine[qf]) = val.imag();
+	  }
+
+    // 0 1
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc/2+1; i++ )
+      for( int j=nyc/2; j<(int)nyc; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i),jj(j+nyf/2),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    double kx = (i <= nxc/2)? (double)i : (double)(i-nxc);
+	    double ky = (j <= nyc/2)? (double)j : (double)(j-nyc);
+	    double kz = (k <= nzc/2)? (double)k : (double)(k-nzc);
+	    
+	    double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+	    std::complex<double> val_phas( cos(phase), sin(phase) );
+	    
+	    std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+	    val *= sqrt8 * val_phas;
+            
+	    RE(cfine[qf]) = val.real();
+	    IM(cfine[qf]) = val.imag();
+	  }
+    
+    // 1 1
+    #pragma omp parallel for
+    for( int i=nxc/2; i<(int)nxc; i++ )
+      for( int j=nyc/2; j<(int)nyc; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i+nxf/2),jj(j+nyf/2),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    double kx = (i <= nxc/2)? (double)i : (double)(i-nxc);
+	    double ky = (j <= nyc/2)? (double)j : (double)(j-nyc);
+	    double kz = (k <= nzc/2)? (double)k : (double)(k-nzc);
+	    
+	    double phase =  phasefac * (kx/nxc + ky/nyc + kz/nzc) * M_PI;
+	    std::complex<double> val_phas( cos(phase), sin(phase) );
+	    
+	    std::complex<double> val(RE(ccoarse[qc]),IM(ccoarse[qc]));
+	    val *= sqrt8 * val_phas;
+            
+	    RE(cfine[qf]) = val.real();
+	    IM(cfine[qf]) = val.imag();
+	  }
+        
+    delete[] rcoarse;
+
+     /*************************************************/    
+
+#ifdef FFTW3
+  #ifdef SINGLE_PRECISION
+    fftwf_execute( ipf );
+    fftwf_destroy_plan(pf);
+    fftwf_destroy_plan(pc);
+    fftwf_destroy_plan(ipf);
+  #else
+    fftw_execute( ipf );
+    fftw_destroy_plan(pf);
+    fftw_destroy_plan(pc);
+    fftw_destroy_plan(ipf);
+  #endif
+#else
+  #ifndef SINGLETHREAD_FFTW		
+    rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), ipf, cfine, NULL );
+  #else
+    rfftwnd_one_complex_to_real( ipf, cfine, NULL );
+  #endif
+    fftwnd_destroy_plan(pf);
+    fftwnd_destroy_plan(pc);
+    fftwnd_destroy_plan(ipf);
+#endif
+
+    // copy back and normalize
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxf; ++i )
+      for( int j=0; j<(int)nyf; ++j )
+	for( int k=0; k<(int)nzf; ++k ) 
+	  {
+	    size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k;
+	    v(i,j,k) = rfine[q] * fftnorm;
+	  }
+
+    delete[] rfine;
+
+}
+
+
+
+/*******************************************************************************************/
+/*******************************************************************************************/
+/*******************************************************************************************/
+
+void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type, 
+							refinement_hierarchy& refh, rand_gen& rand, grid_hierarchy& delta, bool smooth, bool shift )
+{
+	unsigned    levelmin,levelmax,levelminPoisson;
+	
+	levelminPoisson	= cf.getValue<unsigned>("setup","levelmin");
+	levelmin	= cf.getValueSafe<unsigned>("setup","levelmin_TF",levelminPoisson);
+	levelmax	= cf.getValue<unsigned>("setup","levelmax");
+	
+	bool kspace = cf.getValueSafe<unsigned>("setup","kspace_TF",false);
+	
+	unsigned	nbase	= 1<<levelmin;
+	
+	std::cerr << " - Running unigrid version\n";
+	LOGUSER("Running unigrid density convolution...");
+	
+	//... select the transfer function to be used
+	convolution::kernel_creator *the_kernel_creator;
+
+	if( kspace )
+	{
+		std::cout << " - Using new k-space transfer function kernel.\n";
+		LOGUSER("Using k-space transfer function kernel.");
+		
+		#ifdef SINGLE_PRECISION	
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_float" ];
+		#else
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_double" ];
+		#endif
+	}
+	else
+	{
+		std::cout << " - Using real-space transfer function kernel.\n";
+		LOGUSER("Using real-space transfer function kernel.");
+		
+		#ifdef SINGLE_PRECISION	
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ];
+		#else
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_double" ];
+		#endif
+	}	
+		
+	
+	//... initialize convolution kernel
+	convolution::kernel *the_tf_kernel = the_kernel_creator->create( cf, ptf, refh, type );
+
+	//...
+	std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl;
+	LOGUSER("Performing noise convolution on level %3d",levelmax);
+	
+	//... create convolution mesh
+	DensityGrid<real_t> *top = new DensityGrid<real_t>( nbase, nbase, nbase );
+	
+	//... fill with random numbers
+	rand.load( *top, levelmin );
+		
+	//... load convolution kernel
+	the_tf_kernel->fetch_kernel( levelmin, false );
+	
+	//... perform convolution
+	convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ), shift );
+	
+	//... clean up kernel
+	delete the_tf_kernel;
+	
+	//... create multi-grid hierarchy
+	delta.create_base_hierarchy(levelmin);
+	
+	//... copy convolved field to multi-grid hierarchy
+	top->copy( *delta.get_grid(levelmin) );
+	
+	//... delete convolution grid
+	delete top;
+}
+
+
+/*******************************************************************************************/
+/*******************************************************************************************/
+/*******************************************************************************************/
+
+void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type type, 
+							  refinement_hierarchy& refh, rand_gen& rand, grid_hierarchy& delta, bool smooth, bool shift )
+{
+	unsigned					levelmin,levelmax,levelminPoisson;
+	std::vector<long>			rngseeds;
+	std::vector<std::string>	rngfnames;
+	bool						kspaceTF;
+	
+	double tstart, tend;
+
+#ifndef SINGLETHREAD_FFTW
+	tstart = omp_get_wtime();
+#else
+	tstart = (double)clock() / CLOCKS_PER_SEC;
+#endif
+	
+	levelminPoisson	= cf.getValue<unsigned>("setup","levelmin");
+	levelmin		= cf.getValueSafe<unsigned>("setup","levelmin_TF",levelminPoisson);
+	levelmax		= cf.getValue<unsigned>("setup","levelmax");
+	kspaceTF		= cf.getValueSafe<bool>("setup", "kspace_TF", false);
+	
+	
+	unsigned	nbase	= 1<<levelmin;
+	
+	convolution::kernel_creator *the_kernel_creator;
+	
+	if( kspaceTF )
+	{
+		if( levelmin!=levelmax )
+		{	
+		  //LOGERR("K-space transfer function can only be used in unigrid density mode!");
+		  //		throw std::runtime_error("k-space transfer function can only be used in unigrid density mode");
+			
+			std::cout << " - Using new k-space transfer function kernel.\n";
+			LOGUSER("Using new k-space transfer function kernel.");
+		
+#ifdef SINGLE_PRECISION	
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_float" ];
+#else
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_double" ];
+#endif
+
+		}else{
+		
+		std::cout << " - Using new k-space transfer function kernel.\n";
+		LOGUSER("Using new k-space transfer function kernel.");
+		
+#ifdef SINGLE_PRECISION	
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_float" ];
+#else
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_new_double" ];
+#endif
+
+		}
+	}
+	else
+	{
+		std::cout << " - Using real-space transfer function kernel.\n";
+		LOGUSER("Using real-space transfer function kernel.");
+#ifdef SINGLE_PRECISION	
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ];
+#else
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_double" ];
+#endif
+	}	
+	
+	convolution::kernel *the_tf_kernel = the_kernel_creator->create( cf, ptf, refh, type );
+	
+		
+	/***** PERFORM CONVOLUTIONS *****/
+	
+
+  if( kspaceTF ){
+
+    //... create and initialize density grids with white noise	
+    DensityGrid<real_t> *top(NULL);
+    PaddedDensitySubGrid<real_t> *coarse(NULL), *fine(NULL);
+    //DensityGrid<real_t> *coarse(NULL), *fine(NULL);
+    
+    int nlevels = (int)levelmax-(int)levelmin+1;
+
+    LOGINFO(">>>>>>>>> NEW KERNEL LOOP <<<<<<<<<<<<");
+
+    // do coarse level
+    top = new DensityGrid<real_t>( nbase, nbase, nbase );
+    LOGINFO("Performing noise convolution on level %3d",levelmin);
+    rand.load(*top,levelmin);
+    convolution::perform<real_t>( the_tf_kernel->fetch_kernel( levelmin, false ), reinterpret_cast<void*>( top->get_data_ptr() ), shift );
+    
+    delta.create_base_hierarchy(levelmin);
+    top->copy( *delta.get_grid(levelmin) );
+    
+    for( int i=1; i<nlevels; ++i )
+      {
+	std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmin+i << " ..." << std::endl;
+	LOGINFO("Performing noise convolution on level %3d",levelmin+i);
+	
+	//... add new refinement patch
+	LOGINFO("Allocating refinement patch");
+	LOGINFO("   offset=(%5d,%5d,%5d)",refh.offset(levelmin+i,0), refh.offset(levelmin+i,1), refh.offset(levelmin+i,2));
+	LOGINFO("   size  =(%5d,%5d,%5d)",refh.size(levelmin+i,0), refh.size(levelmin+i,1), refh.size(levelmin+i,2));
+	
+
+
+	fine = new PaddedDensitySubGrid<real_t>(refh.offset(levelmin+i,0), refh.offset(levelmin+i,1), refh.offset(levelmin+i,2),
+						refh.size(levelmin+i,0), refh.size(levelmin+i,1), refh.size(levelmin+i,2) );
+	//fine = new DensityGrid<real_t>(refh.size(levelmin+i,0), refh.size(levelmin+i,1), refh.size(levelmin+i,2),
+	//			       refh.offset(levelmin+i,0), refh.offset(levelmin+i,1), refh.offset(levelmin+i,2) );
+	
+	rand.load(*fine,levelmin+i);	
+
+	//std::cerr << "check 1" << std::endl;
+	//fine->zero_boundary();
+
+	convolution::perform<real_t>( the_tf_kernel->fetch_kernel( levelmin+i, true ), reinterpret_cast<void*>( fine->get_data_ptr() ), shift );
+	
+	//std::cerr << "check 2" << std::endl;
+
+	/*if( i==1 )
+	  fft_interpolate( *top, *fine, true, true, false );
+	else
+	  fft_interpolate( *coarse, *fine, true, false, true );//bool fourier_splice = false ) 
+	*/
+
+	//std::cerr << "check 3" << std::endl;
+
+	if( i==1 )
+	  enforce_coarse_mean_for_overlap( *fine, *top );
+	else
+	  enforce_coarse_mean_for_overlap( *fine, *coarse );
+	
+
+
+	delta.add_patch( refh.offset(levelmin+i,0), refh.offset(levelmin+i,1), refh.offset(levelmin+i,2), 
+			 refh.size(levelmin+i,0), refh.size(levelmin+i,1), refh.size(levelmin+i,2) );
+
+
+	//delta.get_grid(levelmin+i)->zero();
+	//	fine->upload_bnd_add( *delta.get_grid(levelmin+i-1) );
+	//fine->upload_bnd( *delta.get_grid(levelmin+i-1) );
+	fine->copy_unpad( *delta.get_grid(levelmin+i) );
+
+	//fine->subtract_oct_mean();
+	
+
+	//fine->copy( *delta.get_grid(levelmin+i) );
+
+	//	std::cerr << "check 4" << std::endl;
+
+	if( i==1 )
+	  delete top;
+	else
+	  delete coarse;
+
+	coarse = fine;
+      }
+
+    delete coarse;
+
+  }else{
+	  
+        //... create and initialize density grids with white noise	
+	PaddedDensitySubGrid<real_t>* coarse(NULL), *fine(NULL);
+	DensityGrid<real_t>* top(NULL);
+
+	if( levelmax == levelmin )
+	{
+		std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl;
+		LOGUSER("Performing noise convolution on level %3d...",levelmax);
+		
+		top = new DensityGrid<real_t>( nbase, nbase, nbase );
+		//rand_gen.load( *top, levelmin );
+		rand.load( *top, levelmin );
+
+
+		convolution::perform<real_t>( the_tf_kernel->fetch_kernel( levelmax ), reinterpret_cast<void*>( top->get_data_ptr() ), shift );
+		the_tf_kernel->deallocate();
+		
+		delta.create_base_hierarchy(levelmin);
+		top->copy( *delta.get_grid(levelmin) );
+		delete top;
+	}
+		
+	
+	for( int i=0; i< (int)levelmax-(int)levelmin; ++i )
+	{
+		//.......................................................................................................//
+		//... GENERATE/FILL WITH RANDOM NUMBERS .................................................................//
+		//.......................................................................................................//
+		
+		
+		if( i==0 )
+		{
+			top = new DensityGrid<real_t>( nbase, nbase, nbase );
+			rand.load(*top,levelmin);
+		}
+		
+		fine = new PaddedDensitySubGrid<real_t>( refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
+							 refh.size(levelmin+i+1,0), refh.size(levelmin+i+1,1), refh.size(levelmin+i+1,2) );
+
+		rand.load(*fine,levelmin+i+1);
+		
+		//.......................................................................................................//
+		//... PERFORM CONVOLUTIONS ..............................................................................//
+		//.......................................................................................................//		
+		if( i==0 )
+		{
+			/**********************************************************************************************************\
+			 *	multi-grid: top-level grid grids .....
+			 \**********************************************************************************************************/ 
+			std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmin+i << " ..." << std::endl;
+			LOGUSER("Performing noise convolution on level %3d",levelmin+i);
+			
+			LOGUSER("Creating base hierarchy...");
+			delta.create_base_hierarchy(levelmin);
+			
+			DensityGrid<real_t> top_save( *top );
+
+			the_tf_kernel->fetch_kernel( levelmin );
+			
+			//... 1) compute standard convolution for levelmin
+			LOGUSER("Computing density self-contribution");
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ), shift );
+			top->copy( *delta.get_grid(levelmin) );
+			
+			
+			//... 2) compute contribution to finer grids from non-refined region
+			LOGUSER("Computing long-range component for finer grid.");
+			*top = top_save;
+			top_save.clear();
+			top->zero_subgrid(refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
+							  refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 );
+
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ), shift );
+			the_tf_kernel->deallocate();
+			
+			meshvar_bnd delta_longrange( *delta.get_grid(levelmin) );
+			top->copy( delta_longrange );
+			delete top;			
+			
+			//... inject these contributions to the next level
+			LOGUSER("Allocating refinement patch");
+			LOGUSER("   offset=(%5d,%5d,%5d)",refh.offset(levelmin+1,0), refh.offset(levelmin+1,1), refh.offset(levelmin+1,2));
+			LOGUSER("   size  =(%5d,%5d,%5d)",refh.size(levelmin+1,0), refh.size(levelmin+1,1), refh.size(levelmin+1,2));
+			
+			delta.add_patch( refh.offset(levelmin+1,0), refh.offset(levelmin+1,1), refh.offset(levelmin+1,2), 
+							refh.size(levelmin+1,0), refh.size(levelmin+1,1), refh.size(levelmin+1,2) );
+			
+			LOGUSER("Injecting long range component");
+			//mg_straight().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
+			//mg_cubic_mult().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
+			
+			mg_cubic().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
+		}
+		else
+		{
+			/**********************************************************************************************************\
+			 *	multi-grid: intermediate sub-grids .....
+			 \**********************************************************************************************************/ 
+			
+			std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmin+i << " ..." << std::endl;
+			LOGUSER("Performing noise convolution on level %3d",levelmin+i);
+			
+			//... add new refinement patch
+			LOGUSER("Allocating refinement patch");
+			LOGUSER("   offset=(%5d,%5d,%5d)",refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2));
+			LOGUSER("   size  =(%5d,%5d,%5d)",refh.size(levelmin+i+1,0), refh.size(levelmin+i+1,1), refh.size(levelmin+i+1,2));
+			
+			delta.add_patch( refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
+							refh.size(levelmin+i+1,0), refh.size(levelmin+i+1,1), refh.size(levelmin+i+1,2) );
+			
+			
+			//... copy coarse grid long-range component to fine grid
+			LOGUSER("Injecting long range component");
+			//mg_straight().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) );
+			mg_cubic().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) );
+			
+			PaddedDensitySubGrid<real_t> coarse_save( *coarse );
+			the_tf_kernel->fetch_kernel( levelmin+i );
+					
+			//... 1) the inner region
+			LOGUSER("Computing density self-contribution");
+			coarse->subtract_boundary_oct_mean();
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()), shift );
+			coarse->copy_add_unpad( *delta.get_grid(levelmin+i) );
+			
+			
+			//... 2) the 'BC' for the next finer grid
+			LOGUSER("Computing long-range component for finer grid.");
+			*coarse = coarse_save;
+			coarse->subtract_boundary_oct_mean();
+			coarse->zero_subgrid(refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
+								 refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 );
+			
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( coarse->get_data_ptr() ), shift );
+			
+			//... interpolate to finer grid(s)
+			meshvar_bnd delta_longrange( *delta.get_grid(levelmin+i) );
+			coarse->copy_unpad( delta_longrange );
+			
+			LOGUSER("Injecting long range component");
+			//mg_straight().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
+			
+			mg_cubic().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
+
+			//... 3) the coarse-grid correction
+			LOGUSER("Computing coarse grid correction");
+			*coarse = coarse_save;
+			coarse->subtract_oct_mean();
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()), shift );
+			coarse->subtract_mean();
+			coarse->upload_bnd_add( *delta.get_grid(levelmin+i-1) );
+			
+			//... clean up
+			the_tf_kernel->deallocate();
+			delete coarse;
+		}
+		
+		
+		coarse = fine;
+	}
+	
+	//... and convolution for finest grid (outside loop)
+	if( levelmax > levelmin )
+	{
+		/**********************************************************************************************************\
+		 *	multi-grid: finest sub-grid .....
+		 \**********************************************************************************************************/ 
+		std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl;
+		LOGUSER("Performing noise convolution on level %3d",levelmax);
+				
+		//... 1) grid self-contribution
+		LOGUSER("Computing density self-contribution");
+		PaddedDensitySubGrid<real_t> coarse_save( *coarse );
+		
+		//... create convolution kernel
+		the_tf_kernel->fetch_kernel( levelmax );
+		
+		//... subtract oct mean on boundary but not in interior
+		coarse->subtract_boundary_oct_mean();
+		
+		//... perform convolution
+		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()), shift );
+		
+		//... copy to grid hierarchy
+		coarse->copy_add_unpad( *delta.get_grid(levelmax) );
+		
+
+		//... 2) boundary correction to top grid
+		LOGUSER("Computing coarse grid correction");
+		*coarse = coarse_save;
+		
+		//... subtract oct mean
+		coarse->subtract_oct_mean();
+		
+		//... perform convolution
+		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()), shift );
+
+		the_tf_kernel->deallocate();
+		
+		coarse->subtract_mean();
+		
+		//... upload data to coarser grid
+		coarse->upload_bnd_add( *delta.get_grid(levelmax-1) );
+			
+		delete coarse;
+	}
+
+  }
+	
+	delete the_tf_kernel;
+			
+#ifndef SINGLETHREAD_FFTW
+	tend = omp_get_wtime();
+	if( true ) //verbosity > 1 )
+		std::cout << " - Density calculation took " << tend-tstart << "s with " << omp_get_max_threads() << " threads." << std::endl;
+#else
+	tend = (double)clock() / CLOCKS_PER_SEC;
+	if( true )//verbosity > 1 )
+		std::cout << " - Density calculation took " << tend-tstart << "s." << std::endl;
+#endif
+	
+	LOGUSER("Finished computing the density field in %fs",tend-tstart);
+}
+
+
+/*******************************************************************************************/
+/*******************************************************************************************/
+/*******************************************************************************************/
+
+void normalize_density( grid_hierarchy& delta )
+{	
+	//return;
+	
+	long double sum = 0.0;
+	unsigned levelmin = delta.levelmin(), levelmax = delta.levelmax();
+	
+	{
+		size_t nx,ny,nz;
+		
+		nx = delta.get_grid(levelmin)->size(0);
+		ny = delta.get_grid(levelmin)->size(1);
+		nz = delta.get_grid(levelmin)->size(2);
+		
+		#pragma omp parallel for reduction(+:sum)
+		for( int ix=0; ix<(int)nx; ++ix )
+			for( size_t iy=0; iy<ny; ++iy )
+				for( size_t iz=0; iz<nz; ++iz )
+					sum += (*delta.get_grid(levelmin))(ix,iy,iz);
+		
+		sum /= (double)(nx*ny*nz);
+	}
+	
+	std::cout << " - Top grid mean density is off by " << sum << ", correcting..." << std::endl;
+	LOGUSER("Grid mean density is %g. Correcting...",sum);
+	
+	for( unsigned i=levelmin; i<=levelmax; ++i )
+	{		
+		size_t nx,ny,nz;
+		nx = delta.get_grid(i)->size(0);
+		ny = delta.get_grid(i)->size(1);
+		nz = delta.get_grid(i)->size(2);
+		
+		#pragma omp parallel for
+		for( int ix=0; ix<(int)nx; ++ix )
+			for( size_t iy=0; iy<ny; ++iy )
+				for( size_t iz=0; iz<nz; ++iz )
+					(*delta.get_grid(i))(ix,iy,iz) -= sum;
+	}
+}
+
+
+void coarsen_density( const refinement_hierarchy& rh, GridHierarchy<real_t>& u )
+{
+
+  /* for( int i=rh.levelmax(); i>0; --i )
+  mg_straight().restrict( *(u.get_grid(i)), *(u.get_grid(i-1)) );
+  */
+  for( unsigned i=1; i<=rh.levelmax(); ++i )
+    {
+      if( rh.offset(i,0) != u.get_grid(i)->offset(0)
+	  || rh.offset(i,1) != u.get_grid(i)->offset(1)
+	  || rh.offset(i,2) != u.get_grid(i)->offset(2)
+	  || rh.size(i,0) != u.get_grid(i)->size(0)
+	  || rh.size(i,1) != u.get_grid(i)->size(1)
+	  || rh.size(i,2) != u.get_grid(i)->size(2) )
+	{
+	  //u.cut_patch(i, rh.offset_abs(i,0), rh.offset_abs(i,1), rh.offset_abs(i,2), 
+	  //	      rh.size(i,0), rh.size(i,1), rh.size(i,2) );
+
+	  u.cut_patch_enforce_top_density( i, rh.offset_abs(i,0), rh.offset_abs(i,1), rh.offset_abs(i,2), 
+	  				   rh.size(i,0), rh.size(i,1), rh.size(i,2) );
+	}
+      
+      //u.get_grid(i)->zero_bnd();
+    }
+  
+  for( int i=rh.levelmax(); i>0; --i )
+    mg_straight().restrict( *(u.get_grid(i)), *(u.get_grid(i-1)) );
+
+}
+
--- a/densities.hh
+++ b/densities.hh
--- a/fd_schemes.hh
+++ b/fd_schemes.hh
@ -0,0 +1,451 @@
+/*
+ 
+ fd_schemes.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+#ifndef __FD_SCHEMES_HH
+#define __FD_SCHEMES_HH
+
+#include <vector>
+#include <stdexcept>
+
+
+//! abstract implementation of the Poisson/Force scheme
+template< class L, class G, typename real_t=double >
+class scheme
+{
+public:
+	typedef L laplacian;
+	typedef G gradient;
+	
+	laplacian m_laplacian;
+	gradient m_gradient;
+	
+	//! gradient along x-direction
+	template< class C >
+	inline real_t grad_x( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_x( c,i,j,k ); }
+
+	//! gradient along y-direction
+	template< class C >
+	inline real_t grad_y( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_y( c,i,j,k ); }
+	
+	//! gradient along z-direction	
+	template< class C >
+	inline real_t grad_z( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_z( c,i,j,k ); }
+	
+	//! apply Laplace operator
+	template< class C >
+	inline real_t L_apply( const C&c, const int i, const int j, const int k ) 
+	{ return m_laplacian.apply( c,i,j,k ); }
+	
+	//! compute an explicit solution for the central component of the discrete Poisson's equation
+	template< class C >
+	inline real_t L_rhs( const C&c, const int i, const int j, const int k ) 
+	{ return m_laplacian.rhs( c,i,j,k ); }
+	
+	inline real_t ccoeff( void )
+	{ return m_laplacian.ccoeff(); }
+	
+};
+
+//! base class for finite difference gradients
+template< int nextent, typename T >
+class gradient
+{
+	typedef T real_t;
+	std::vector<real_t> m_stencil;
+	const unsigned nl;
+public:
+	
+	gradient()
+	: nl( 2*nextent+1 )
+	{ 
+		m_stencil.assign(nl*nl*nl,(real_t)0.0);
+	}
+	
+	real_t& operator()(int i)
+	{ return m_stencil[i+nextent]; }
+	
+	const real_t& operator()(int i) const
+	{ return m_stencil[i+nextent]; }
+	
+	template< class C >
+	inline void apply( const C& c, C& f, int dir )
+	{
+		f = c;
+		
+		int nx=c.size(0), ny=c.size(1), nz=c.size(2);		
+		double hx = 1.0/(nx+1.0), hy = 1.0/(ny+1.0), hz = 1.0/(nz+1.0);
+		
+		f.zero();
+		
+		if( dir == 0 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int ii = -nextent; ii<=nextent; ++ii )
+							f(i,j,k) += (*this)(ii) * c(i+ii,j,k)/hx;
+		else if( dir == 1 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int jj = -nextent; jj<=nextent; ++jj )
+							f(i,j,k) += (*this)(jj) * c(i,j+jj,k)/hy;
+		else if( dir == 2 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int kk = -nextent; kk<=nextent; ++kk )
+							f(i,j,k) += (*this)(kk) * c(i,j,k+kk)/hz;
+		
+	}
+};
+
+//! base class for finite difference stencils
+template< int nextent, typename real_t >
+class base_stencil
+{
+protected:
+	std::vector<real_t> m_stencil;
+	const unsigned nl;
+public:
+	bool m_modsource;
+	
+public:
+	base_stencil( bool amodsource = false )
+	: nl( 2*nextent+1 ), m_modsource( amodsource )
+	{
+		m_stencil.assign(nl*nl*nl,(real_t)0.0);
+	}
+	
+	real_t& operator()(int i, int j, int k)
+	{ return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; }
+	
+	const real_t& operator()(unsigned i, unsigned j, unsigned k) const
+	{ return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; }
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		real_t sum = this->apply( c, i, j, k );
+		sum -= (*this)(0,0,0) * c(i,j,k);
+		return sum;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return (*this)(0,0,0);
+	}
+	
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		real_t sum = 0.0;
+		
+		for( int ii=-nextent; ii<=nextent; ++ii )
+			for( int jj=-nextent; jj<=nextent; ++jj )
+				for( int kk=-nextent; kk<=nextent; ++kk )
+					sum += (*this)(ii,jj,kk) * c(i+ii,j+jj,k+kk);
+		
+		return sum;
+	}
+	
+	template< class C >
+	inline real_t modsource( const C& c, const int i, const int j, const int k )
+	{
+		return 0.0;
+	}
+	
+};
+
+
+/***************************************************************************************/
+/***************************************************************************************/
+/***************************************************************************************/
+
+
+//... Implementation of the Gradient schemes............................................
+
+
+template< typename real_t >
+class deriv_2P : public gradient<1,real_t>
+{
+	
+public:
+	deriv_2P( void )
+	{
+		(*this)( 0 ) =  0.0;
+		(*this)(-1 ) = -0.5;
+		(*this)(+1 ) = +0.5;		
+	}
+	
+	
+};
+
+//... Implementation of the Laplacian schemes..........................................
+
+//! 7-point, 2nd order finite difference Laplacian
+template< typename real_t >
+class stencil_7P : public base_stencil<1,real_t>
+{
+	
+public:
+	stencil_7P( void )
+	{
+		(*this)( 0, 0, 0) = -6.0;
+		(*this)(-1, 0, 0) = +1.0;
+		(*this)(+1, 0, 0) = +1.0;
+		(*this)( 0,-1, 0) = +1.0;
+		(*this)( 0,+1, 0) = +1.0;
+		(*this)( 0, 0,-1) = +1.0;
+		(*this)( 0, 0,+1) = +1.0;
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k ) const
+	{
+		//return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k);
+		return (double)c(i-1,j,k)+(double)c(i+1,j,k)+(double)c(i,j-1,k)+(double)c(i,j+1,k)+(double)c(i,j,k-1)+(double)c(i,j,k+1)-6.0*(double)c(i,j,k);
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k ) const
+	{
+		return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1);
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -6.0;
+	}
+};
+
+//! 13-point, 4th order finite difference Laplacian
+template< typename real_t >
+class stencil_13P : public base_stencil<2,real_t>
+{
+	
+public:
+	stencil_13P( void )
+	{
+		(*this)( 0, 0, 0) = -90.0/12.;
+		
+		(*this)(-1, 0, 0) = 
+		(*this)(+1, 0, 0) = 
+		(*this)( 0,-1, 0) = 
+		(*this)( 0,+1, 0) = 
+		(*this)( 0, 0,-1) = 
+		(*this)( 0, 0,+1) = 16./12.;
+		
+		(*this)(-2, 0, 0) = 
+		(*this)(+2, 0, 0) = 
+		(*this)( 0,-2, 0) = 
+		(*this)( 0,+2, 0) = 
+		(*this)( 0, 0,-2) = 
+		(*this)( 0, 0,+2) = -1./12.;
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		return 
+			(-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+			 +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1))
+			 -90.0*c(i,j,k))/12.0;
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		return 
+			(-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+			 +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)))/12.0;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -90.0/12.0;
+	}
+};
+
+
+//! 19-point, 6th order finite difference Laplacian
+template< typename real_t >
+class stencil_19P : public base_stencil<3,real_t>
+{
+	
+public:
+	stencil_19P( void )
+	{
+		(*this)( 0, 0, 0) = -1470./180.;
+		
+		(*this)(-1, 0, 0) = 
+		(*this)(+1, 0, 0) = 
+		(*this)( 0,-1, 0) = 
+		(*this)( 0,+1, 0) = 
+		(*this)( 0, 0,-1) = 
+		(*this)( 0, 0,+1) = 270./180.;
+		
+		(*this)(-2, 0, 0) = 
+		(*this)(+2, 0, 0) = 
+		(*this)( 0,-2, 0) = 
+		(*this)( 0,+2, 0) = 
+		(*this)( 0, 0,-2) = 
+		(*this)( 0, 0,+2) = -27./180.;
+		
+		(*this)(-3, 0, 0) = 
+		(*this)(+3, 0, 0) = 
+		(*this)( 0,-3, 0) = 
+		(*this)( 0,+3, 0) = 
+		(*this)( 0, 0,-3) = 
+		(*this)( 0, 0,+3) = 2./180.;
+		
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		return 
+		(2.0*(c(i-3,j,k)+c(i+3,j,k)+c(i,j-3,k)+c(i,j+3,k)+c(i,j,k-3)+c(i,j,k+3))
+		 -27.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+		 +270.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1))
+		 -1470.0*c(i,j,k))/180.0;
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		return 
+		(2.0*(c(i-3,j,k)+c(i+3,j,k)+c(i,j-3,k)+c(i,j+3,k)+c(i,j,k-3)+c(i,j,k+3))
+		 -27.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+		 +270.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)))/180.0;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -1470.0/180.0;
+	}
+	
+};
+
+
+//! flux operator for the 4th order FD Laplacian
+template< typename real_t >
+class Laplace_flux_O4
+{
+public:
+	/*! computes flux across a surface normal to x-direction
+	 * @param idir idir is -1 for left boundary, +1 for right boundary
+	 * @param c array on which to apply the operator
+	 * @param i grid x-index
+	 * @param j grid y-index
+	 * @param k grid z-index
+	 * @returns flux value
+	 */
+	template< class C >
+	inline double apply_x( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/12.0;
+		return fac*(-c(i-2,j,k)+15.0*c(i-1,j,k)-15.0*c(i,j,k)+c(i+1,j,k));
+	}
+	
+	/*! computes flux across a surface normal to y-direction
+	 * @param idir idir is -1 for left boundary, +1 for right boundary
+	 * @param c array on which to apply the operator
+	 * @param i grid x-index
+	 * @param j grid y-index
+	 * @param k grid z-index
+	 * @returns flux value
+	 */
+	template< class C >
+	inline double apply_y( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/12.0;
+		return fac*(-c(i,j-2,k)+15.0*c(i,j-1,k)-15.0*c(i,j,k)+c(i,j+1,k));
+	}
+	
+	/*! computes flux across a surface normal to z-direction
+	 * @param idir idir is -1 for left boundary, +1 for right boundary
+	 * @param c array on which to apply the operator
+	 * @param i grid x-index
+	 * @param j grid y-index
+	 * @param k grid z-index
+	 * @returns flux value
+	 */
+	template< class C >
+	inline double apply_z( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/12.0;
+		return fac*(-c(i,j,k-2)+15.0*c(i,j,k-1)-15.0*c(i,j,k)+c(i,j,k+1));
+	}
+	
+};
+
+
+//! flux operator for the 6th order FD Laplacian
+template< typename real_t >
+class Laplace_flux_O6
+{
+public:
+	
+	/*! computes flux across a surface normal to x-direction
+	 * @param idir idir is -1 for left boundary, +1 for right boundary
+	 * @param c array on which to apply the operator
+	 * @param i grid x-index
+	 * @param j grid y-index
+	 * @param k grid z-index
+	 * @returns flux value
+	 */
+	template< class C >
+	inline double apply_x( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/180.0;
+		return fac*(2.*c(i-3,j,k)-25.*c(i-2,j,k)+245.*c(i-1,j,k)-245.0*c(i,j,k)+25.*c(i+1,j,k)-2.*c(i+2,j,k));
+	}
+	
+	/*! computes flux across a surface normal to y-direction
+	 * @param idir idir is -1 for left boundary, +1 for right boundary
+	 * @param c array on which to apply the operator
+	 * @param i grid x-index
+	 * @param j grid y-index
+	 * @param k grid z-index
+	 * @returns flux value
+	 */
+	template< class C >
+	inline double apply_y( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/180.0;
+		return fac*(2.*c(i,j-3,k)-25.*c(i,j-2,k)+245.*c(i,j-1,k)-245.0*c(i,j,k)+25.*c(i,j+1,k)-2.*c(i,j+2,k));
+	}
+	
+	/*! computes flux across a surface normal to z-direction
+	 * @param idir idir is -1 for left boundary, +1 for right boundary
+	 * @param c array on which to apply the operator
+	 * @param i grid x-index
+	 * @param j grid y-index
+	 * @param k grid z-index
+	 * @returns flux value
+	 */
+	template< class C >
+	inline double apply_z( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/180.0;
+		return fac*(2.*c(i,j,k-3)-25.*c(i,j,k-2)+245.*c(i,j,k-1)-245.0*c(i,j,k)+25.*c(i,j,k+1)-2.*c(i,j,k+2));
+	}
+	
+};
+
+
+
+#endif
+
+
--- a/fft_operators.hh
+++ b/fft_operators.hh
@ -0,0 +1,204 @@
+#ifndef __FFT_OPERATORS_HH
+#define __FFT_OPERATORS_HH
+struct fft_interp{
+
+  template< typename m1, typename m2 >
+  void interpolate( m1& V, m2& v, bool fourier_splice = false ) const
+  {
+    int oxc = V.offset(0), oyc = V.offset(1), ozc = V.offset(2);
+    int oxf = v.offset(0), oyf = v.offset(1), ozf = v.offset(2);
+    
+    size_t nxf = v.size(0), nyf = v.size(1), nzf = v.size(2), nzfp = nzf+2;
+
+    // cut out piece of coarse grid that overlaps the fine:
+    assert( nxf%2==0 && nyf%2==0 && nzf%2==0 );
+
+    size_t nxc = nxf/2, nyc = nyf/2, nzc = nzf/2, nzcp = nzf/2+2;
+
+    fftw_real *rcoarse = new fftw_real[ nxc * nyc * nzcp ];
+    fftw_complex *ccoarse = reinterpret_cast<fftw_complex*> (rcoarse);
+
+    fftw_real *rfine = new fftw_real[ nxf * nyf * nzfp];
+    fftw_complex *cfine = reinterpret_cast<fftw_complex*> (rfine);
+
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc; ++i )
+      for( int j=0; j<(int)nyc; ++j )
+	for( int k=0; k<(int)nzc; ++k ) 
+	  {
+	    size_t q = ((size_t)i*nyc+(size_t)j)*nzcp+(size_t)k;
+	    rcoarse[q] = V( oxf+i, oyf+j, ozf+k );
+	  }
+
+    if( fourier_splice )
+      {
+	#pragma omp parallel for
+	for( int i=0; i<(int)nxf; ++i )
+	  for( int j=0; j<(int)nyf; ++j )
+	    for( int k=0; k<(int)nzf; ++k ) 
+	      {
+		size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k;
+		rfine[q] = v(i,j,k);
+	      }
+      }
+    else
+      {
+	#pragma omp parallel for
+	for( size_t i=0; i<nxf*nyf*nzfp; ++i )
+	  rfine[i] = 0.0;
+      }
+
+#ifdef FFTW3
+#ifdef SINGLE_PRECISION
+    fftwf_plan
+      pc  = fftwf_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
+      pf  = fftwf_plan_dft_r2c_3d( nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+      ipf = fftwf_plan_dft_c2r_3d( nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
+    fftwf_execute( pc );
+    if( fourier_splice )
+      fftwf_execute( pf );
+#else
+    fftw_plan
+      pc  = fftw_plan_dft_r2c_3d( nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
+      pf  = fftw_plan_dft_r2c_3d( nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+      ipf = fftw_plan_dft_c2r_3d( nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
+    fftw_execute( pc );
+    if( fourier_splice )
+      fftwf_execute( pf );
+#endif
+#else
+    rfftwnd_plan 
+      pc  = rfftw3d_create_plan( nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+      pf  = rfftw3d_create_plan( nxf, nyf, nzf, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+      ipf = rfftw3d_create_plan( nxf, nyf, nzf, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+    
+#ifndef SINGLETHREAD_FFTW		
+    rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pc, rcoarse, NULL );
+    if( fourier_splice )
+      rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pf, rfine, NULL );
+#else
+    rfftwnd_one_real_to_complex( pc, rcoarse, NULL );
+    if( fourier_splice )
+      rfftwnd_one_real_to_complex( pf, rfine, NULL );
+#endif
+#endif
+
+    /*************************************************/
+    //.. perform actual interpolation
+    double fftnorm = 1.0/((double)nxf*(double)nyf*(double)nzf);
+    double sqrt8 = sqrt(8.0);
+
+    // 0 0
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc/2+1; i++ )
+      for( int j=0; j<(int)nyc/2+1; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i),jj(j),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
+	    IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
+	  }
+
+    // 1 0
+    #pragma omp parallel for
+    for( int i=nxc/2; i<(int)nxc; i++ )
+      for( int j=0; j<(int)nyc/2+1; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i+nx/2),jj(j),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
+            
+	    RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
+	    IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
+            
+	    //if( k==0 & (i==(int)nxc/2 || j==(int)nyc/2) )
+	    //  IM(cfine[qf]) *= -1.0;
+	  }
+
+    // 0 1
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxc/2+1; i++ )
+      for( int j=nyc/2; j<(int)nyc; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i),jj(j+ny/2),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)ny+(size_t)jj)*(nz/2+1)+(size_t)kk;
+            
+	    RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
+	    IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
+            
+	    //if( k==0 && (i==(int)nxc/2 || j==(int)nyc/2) )
+	    //  IM(cfine[qf]) *= -1.0;
+	  }
+    
+    // 1 1
+    #pragma omp parallel for
+    for( int i=nxc/2; i<(int)nxc; i++ )
+      for( int j=nyc/2; j<(int)nyc; j++ )
+	for( int k=0; k<(int)nzc/2+1; k++ )
+	  {
+	    int ii(i+nx/2),jj(j+ny/2),kk(k);
+	    size_t qc,qf;
+	    qc = ((size_t)i*(size_t)nyc+(size_t)j)*(nzc/2+1)+(size_t)k;
+	    qf = ((size_t)ii*(size_t)nyf+(size_t)jj)*(nzf/2+1)+(size_t)kk;
+            
+	    RE(cfine[qf]) = sqrt8*RE(ccoarse[qc]);
+	    IM(cfine[qf]) = sqrt8*IM(ccoarse[qc]);
+	  }
+        
+    delete[] rcoarse;
+
+    /*************************************************/    
+
+#ifdef FFTW3
+  #ifdef SINGLE_PRECISION
+    fftwf_execute( ipf );
+    fftwf_destroy_plan(pf);
+    fftwf_destroy_plan(pc);
+    fftwf_destroy_plan(ipf);
+  #else
+    fftw_execute( ipf );
+    fftw_destroy_plan(pf);
+    fftw_destroy_plan(pc);
+    fftw_destroy_plan(ipf);
+  #endif
+#else
+  #ifndef SINGLETHREAD_FFTW		
+    rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), ipf, cfine, NULL );
+  #else
+    rfftwnd_one_complex_to_real( ipf, cfine, NULL );
+  #endif
+    fftwnd_destroy_plan(pf);
+    fftwnd_destroy_plan(pc);
+    fftwnd_destroy_plan(ipf);
+#endif
+
+    // copy back and normalize
+    #pragma omp parallel for
+    for( int i=0; i<(int)nxf; ++i )
+      for( int j=0; j<(int)nyf; ++j )
+	for( int k=0; k<(int)nzf; ++k ) 
+	  {
+	    size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k;
+	    v(i,j,k) = rfine[q] * fftnorm;
+	  }
+
+    delete[] rcoarse;
+    delete[] rfine;
+
+  }
+
+
+
+};
+
+
+#endif //__FFT_OPERATORS_HH
--- a/general.hh
+++ b/general.hh
@ -0,0 +1,174 @@
+/*
+ 
+ general.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+#ifndef __GENERAL_HH
+#define __GENERAL_HH
+
+#include "log.hh"
+
+#include <cassert>
+#include "omp.h"
+
+#ifdef WITH_MPI
+  #ifdef MANNO
+    #include <mpi.h>
+  #else
+    #include <mpi++.h>
+  #endif
+#else
+#include <time.h>
+#endif
+
+#ifdef FFTW3
+	#include <fftw3.h>
+	#if defined(SINGLE_PRECISION)
+	typedef float fftw_real;
+	#else
+	typedef double fftw_real;
+	#endif
+
+#else
+	#if defined(SINGLE_PRECISION) and not defined(SINGLETHREAD_FFTW)
+	#include <srfftw.h>
+	#include <srfftw_threads.h>
+	#elif defined(SINGLE_PRECISION) and defined(SINGLETHREAD_FFTW)
+	#include <srfftw.h>
+	#elif not defined(SINGLE_PRECISION) and not defined(SINGLETHREAD_FFTW)
+	#include <drfftw.h>
+	#include <drfftw_threads.h>
+	#elif not defined(SINGLE_PRECISION) and defined(SINGLETHREAD_FFTW)
+	#include <drfftw.h>
+	#endif
+#endif
+
+#ifdef SINGLE_PRECISION
+	typedef float real_t;
+#else
+	typedef double real_t;
+#endif
+
+
+#ifdef FFTW3
+	#define RE(x) ((x)[0])
+	#define IM(x) ((x)[1])
+#else
+	#define RE(x) ((x).re)
+	#define IM(x) ((x).im)
+#endif
+
+#if defined(FFTW3) && defined(SINGLE_PRECISION)
+#define fftw_complex fftwf_complex
+#endif
+
+
+
+#include <vector>
+
+#include "config_file.hh"
+//#include "mesh.hh"
+
+
+
+//! compute square of argument
+template< typename T >
+inline T SQR( T a ){
+  return a*a;
+}
+
+//! compute cube of argument
+template< typename T >
+inline T CUBE( T a ){
+  return a*a*a;
+}
+
+//! compute 4th power of argument
+template< typename T >
+inline T POW4( T a ){
+	return SQR(SQR(a));
+  //return a*a*a*a;
+}
+
+
+//! structure for cosmological parameters
+typedef struct cosmology{
+  double 
+    Omega_m,		//!< baryon+dark matter density
+    Omega_b,		//!< baryon matter density
+    Omega_L,		//!< dark energy density
+    Omega_r,        //!< photon + relativistic particle density
+    H0,				//!< Hubble constant
+    nspect,			//!< long-wave spectral index (scale free is nspect=1) 
+    sigma8,			//!< power spectrum normalization
+	//Gamma,		//!< shape parameter (of historical interest, as a free parameter)
+    //fnl,			//!< non-gaussian contribution parameter
+	//w0,			//!< dark energy equation of state parameter (not implemented, i.e. =1 at the moment)
+	//wa,			//!< dark energy equation of state parameter (not implemented, i.e. =1 at the moment)
+	dplus,			//!< linear perturbation growth factor
+	pnorm,			//!< actual power spectrum normalisation factor
+	vfact,			//!< velocity<->displacement conversion factor in Zel'dovich approx.
+	WDMmass,		//!< Warm DM particle mass
+	WDMg_x,			//!< Warm DM particle degrees of freedom
+	astart;			//!< expansion factor a for which to generate initial conditions
+	
+	cosmology( config_file cf )
+	{
+		double zstart = cf.getValue<double>( "setup", "zstart" );
+		
+		astart		= 1.0/(1.0+zstart);
+		Omega_b		= cf.getValue<double>( "cosmology", "Omega_b" );
+		Omega_m		= cf.getValue<double>( "cosmology", "Omega_m" );
+		Omega_L		= cf.getValue<double>( "cosmology", "Omega_L" );
+        Omega_r     = cf.getValueSafe<double>( "cosmology", "Omega_r", 0.0 ); // no longer default to nonzero (8.3e-5)
+		H0			= cf.getValue<double>( "cosmology", "H0" );
+		sigma8		= cf.getValue<double>( "cosmology", "sigma_8" );
+		nspect		= cf.getValue<double>( "cosmology", "nspec" );
+		WDMg_x		= cf.getValueSafe<double>( "cosmology", "WDMg_x", 1.5 );
+		WDMmass		= cf.getValueSafe<double>( "cosmology", "WDMmass", 0.0 );
+		
+		dplus			= 0.0;
+		pnorm			= 0.0;
+		vfact			= 0.0;
+	}
+	
+	cosmology( void )
+	{
+		
+	}
+}Cosmology;
+
+//! basic box/grid/refinement structure parameters
+typedef struct {
+	unsigned levelmin, levelmax;
+	double boxlength;
+	std::vector<unsigned> offx,offy,offz,llx,lly,llz;
+}Parameters;
+
+//! measure elapsed wallclock time
+inline double time_seconds( void )
+{
+  #ifdef WITH_MPI
+    return MPI_Wtime();
+  #else
+    return ((double) clock()) / CLOCKS_PER_SEC;
+  #endif
+}
+
+
+inline bool is_number(const std::string& s)
+{
+	for (unsigned i = 0; i < s.length(); i++)
+		if (!std::isdigit(s[i])&&s[i]!='-' )
+			return false;
+	
+	return true;
+}
+
+
+#endif
--- a/log.cc
+++ b/log.cc
@ -0,0 +1,124 @@
+/*
+ 
+ log.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include "log.hh"
+#include <iostream>
+#include <algorithm>
+
+std::string RemoveMultipleWhiteSpaces( std::string s );
+
+
+std::string MUSIC::log::outputFile_;
+std::ofstream MUSIC::log::outputStream_;
+std::list<MUSIC::log::message> MUSIC::log::messages_;
+void (*MUSIC::log::receiver)(const message&) = NULL;
+MUSIC::log::messageType MUSIC::log::logLevel_;
+
+
+std::string RemoveMultipleWhiteSpaces( std::string s )
+{ 
+	std::string search = "  "; // this is 2 spaces
+	size_t index;
+	
+	while( (index = s.find(search)) != std::string::npos )
+	{ // remove 1 character from the string at index
+		s.erase(index,1);
+	}
+	
+	return s;
+}
+
+void MUSIC::log::send(messageType type, const std::string& text_)
+//void MUSIC::log::send(messageType type, std::stringstream& textstr)
+{
+	std::string text(text_);// = textstr.str();
+	// Skip logging if minimum level is higher
+    if (logLevel_)
+		if (type < logLevel_) return;
+	// log message
+	MUSIC::log::message m;
+	m.type = type;
+	m.text = text;
+	time_t t = time(NULL);
+	m.when = localtime(&t);
+	messages_.push_back(m);
+	
+	if( type==Info||type==Warning||type==Error||type==FatalError )
+	{	
+		std::cout << " - "; 
+		if(type==Warning)
+			std::cout << "WARNING: ";
+		if(type==Error)
+			std::cout << "ERROR: ";
+		if(type==FatalError)
+			std::cout << "FATAL: ";
+		std::cout << text << std::endl;
+	}
+	
+	std::replace(text.begin(),text.end(),'\n',' ');
+	RemoveMultipleWhiteSpaces(text);
+	
+	// if enabled logging to file
+	if(outputStream_.is_open())
+	{
+		// print time
+		char buffer[9];
+		strftime(buffer, 9, "%X", m.when);
+		outputStream_ << buffer;
+		
+		// print type
+		switch(type)
+		{
+			case Info:		outputStream_ << " | info    | "; break;
+			case DebugInfo: outputStream_ << " | debug   | "; break;
+			case Warning:	outputStream_ << " | warning | "; break;
+			case Error:		outputStream_ << " | ERROR   | "; break;
+			case FatalError:outputStream_ << " | FATAL   | "; break;
+			case User:		outputStream_ << " | info    | "; break;
+			default:		outputStream_ << " | ";
+		}
+		
+		// print description
+		outputStream_ << text << std::endl;
+	}
+	
+	// if user wants to catch messages, send it to him
+	if(receiver)
+		receiver(m);
+}
+
+
+void MUSIC::log::setOutput(const std::string& filename)
+{
+	//logDebug("Setting output log file: " + filename);
+	outputFile_ = filename;
+	
+	// close old one
+	if(outputStream_.is_open())
+		outputStream_.close();
+	
+	// create file
+	outputStream_.open(filename.c_str());
+	if(!outputStream_.is_open())
+		LOGERR("Cannot create/open logfile \'%s\'.",filename.c_str());
+}
+
+void MUSIC::log::setLevel(const MUSIC::log::messageType level) 
+{
+    logLevel_ = level;
+}
+
+
+MUSIC::log::~log()
+{
+	if(outputStream_.is_open())
+		outputStream_.close();
+}
+
--- a/log.hh
+++ b/log.hh
@ -0,0 +1,173 @@
+/*
+ 
+ log.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#ifndef __LOG_HH
+#define __LOG_HH
+
+#include <string>
+#include <list>
+#include <fstream>
+#include <ctime>
+#include <cstdarg>
+#include <sstream>
+
+/*!
+ *	\brief	System for logging runtime library errors, warnings, etc.
+ *
+ *	This is the class that catches every (debug) info, warning, error, or user message and
+ *	processes it. Messages can be written to files and/or forwarded to user function for
+ *	processing messages.
+ */
+namespace MUSIC
+{
+	
+class log
+{
+public:
+	log(){}
+	~log();
+	
+	/*!
+	 *	\brief	Types of logged messages.
+	 */
+	enum messageType
+	{
+		Info,
+		DebugInfo,
+		Warning,
+		Error,
+		FatalError,
+		User
+	};
+	
+	/*!
+	 *	\brief	Logged message of type MessageType with some info.
+	 */
+	struct message
+	{
+		messageType type;
+		std::string text;
+		tm* when;
+	};
+	
+	/*!
+	 *	\brief	Open file where to log the messages.
+	 */
+	static void setOutput(const std::string& filename);
+	
+	/*!
+	 *	\brief	Get the filename of log.
+	 */
+	static const std::string& output() { return outputFile_; }
+	
+	/*!
+	 *	\brief	Add a new message to log.
+	 *	\param	type	Type of the new message.
+	 *	\param	text	Message.
+	 *	\remarks Message is directly passes to user reciever if one is set.
+	 */
+	static void send(messageType type, const std::string& text);
+	//static void send(messageType type, std::string& text);
+	
+	/*!
+	 *	\brief	Get the list of all of the logged messages.
+	 */
+	static const std::list<message>& messages() { return messages_; }
+	
+	/*!
+	 *	\brief	Get the last logged message.
+	 */
+	static const message& lastMessage() { return messages_.back(); }
+	
+	/*!
+	 *	\brief	Set user function to receive newly sent messages to logger.
+	 */
+	static void setUserReceiver(void (*userFunc)(const message&)) { receiver = userFunc; }
+	
+	/*!
+	 *	\brief	Set minimum level of message to be logged.
+	 */
+	static void setLevel(const log::messageType level);
+	
+private:
+	
+	static std::string outputFile_;
+	static std::ofstream outputStream_;
+	static std::list<message> messages_;
+	static messageType logLevel_;
+	static void (*receiver)(const message&);
+};
+
+}
+
+
+inline void LOGERR( const char* str, ... )
+{
+	char out[1024];
+	va_list argptr;
+	va_start(argptr,str);
+	va_end(argptr);
+	vsprintf(out,str,argptr);
+	MUSIC::log::send(MUSIC::log::Error, std::string(out));
+}
+
+inline void LOGWARN( const char* str, ... )
+{
+	char out[1024];
+	va_list argptr;
+	va_start(argptr,str);
+	va_end(argptr);
+	vsprintf(out,str,argptr);
+	MUSIC::log::send(MUSIC::log::Warning, std::string(out));
+}
+
+inline void LOGFATAL( const char* str, ... )
+{
+	char out[1024];
+	va_list argptr;
+	va_start(argptr,str);
+	va_end(argptr);
+	vsprintf(out,str,argptr);
+	MUSIC::log::send(MUSIC::log::FatalError, std::string(out));
+}
+
+inline void LOGDEBUG( const char* str, ... )
+{
+	char out[1024];
+	va_list argptr;
+	va_start(argptr,str);
+	va_end(argptr);
+	vsprintf(out,str,argptr);
+	MUSIC::log::send(MUSIC::log::DebugInfo, std::string(out));
+}
+
+inline void LOGUSER( const char* str, ... )
+{
+	char out[1024];
+	va_list argptr;
+	va_start(argptr,str);
+	va_end(argptr);
+	vsprintf(out,str,argptr);
+	MUSIC::log::send(MUSIC::log::User, std::string(out));
+}
+
+inline void LOGINFO( const char* str, ... )
+{
+	char out[1024];
+	va_list argptr;
+	va_start(argptr,str);
+	va_end(argptr);
+	vsprintf(out,str,argptr);
+	MUSIC::log::send(MUSIC::log::Info, std::string(out));
+}
+
+#endif //__LOG_HH
+
+
--- a/main.cc
+++ b/main.cc
--- a/mesh.hh
+++ b/mesh.hh
--- a/mg_interp.hh
+++ b/mg_interp.hh
--- a/mg_operators.hh
+++ b/mg_operators.hh
--- a/mg_solver.hh
+++ b/mg_solver.hh
@ -0,0 +1,679 @@
+/*
+ 
+ mg_solver.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+#ifndef __MG_SOLVER_HH
+#define __MG_SOLVER_HH
+
+#include <cmath>
+#include <iostream>
+
+#include "mg_operators.hh"
+#include "mg_interp.hh"
+
+#include "mesh.hh"
+
+#define BEGIN_MULTIGRID_NAMESPACE namespace multigrid {
+#define END_MULTIGRID_NAMESPACE }
+
+BEGIN_MULTIGRID_NAMESPACE
+	
+//! options for multigrid smoothing operation
+namespace opt {
+	enum smtype { sm_jacobi, sm_gauss_seidel, sm_sor };
+}
+
+
+//! actual implementation of FAS adaptive multigrid solver
+template< class S, class I, class O, typename T=double >
+class solver
+{
+public:
+	typedef S scheme;
+	typedef O mgop;
+	typedef I interp;
+
+protected:
+	scheme				m_scheme;				//!< finite difference scheme
+	mgop				m_gridop;				//!< grid prolongation and restriction operator
+	unsigned			m_npresmooth,			//!< number of pre sweeps
+						m_npostsmooth;			//!< number of post sweeps
+	opt::smtype			m_smoother;				//!< smoothing method to be applied
+	unsigned			m_ilevelmin;			//!< index of the top grid level
+	
+	const static bool	m_bperiodic = true;		//!< flag whether top grid is periodic
+	
+	std::vector<double> m_residu_ini;			//!< vector of initial residuals for each level
+	bool m_is_ini;								//!< bool that is true for first iteration
+
+	GridHierarchy<T>	*m_pu,					//!< pointer to GridHierarchy for solution u
+						*m_pf,					//!< pointer to GridHierarchy for right-hand-side
+						*m_pfsave;				//!< pointer to saved state of right-hand-side (unused)
+	
+	const MeshvarBnd<T> *m_pubnd;
+	
+	//! compute residual for a level
+  double compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<T>& unew, int ilevel );
+	
+	//! compute residuals for entire grid hierarchy
+	double compute_error( const GridHierarchy<T>& uh, const GridHierarchy<T>& uhnew, bool verbose );
+	
+	//! compute residuals for entire grid hierarchy
+	double compute_RMS_resid( const GridHierarchy<T>& uh, const GridHierarchy<T>& fh, bool verbose );
+
+protected:
+	
+	//! Jacobi smoothing 
+	void Jacobi( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f );
+	
+	//! Gauss-Seidel smoothing
+	void GaussSeidel( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f );
+	
+	//! Successive-Overrelaxation smoothing
+	void SOR( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f );
+	
+	//! main two-grid (V-cycle) for multi-grid iterations
+	void twoGrid( unsigned ilevel );
+	
+	//! apply boundary conditions
+	void setBC( unsigned ilevel );
+	
+	//! make top grid periodic boundary conditions
+	void make_periodic( MeshvarBnd<T> *u );
+	
+	//void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine );
+		
+public:
+	
+	//! constructor
+	solver( GridHierarchy<T>& f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth );
+	
+	//! destructor
+	~solver()
+	{  }
+	
+	//! solve Poisson's equation 
+	double solve( GridHierarchy<T>& u, double accuracy, double h=-1.0, bool verbose=false );
+	
+	//! solve Poisson's equation 
+	double solve( GridHierarchy<T>& u, double accuracy, bool verbose=false )
+	{
+		return this->solve ( u, accuracy, -1.0, verbose );
+	}
+	
+	
+	
+};
+
+
+template< class S, class I, class O, typename T >
+solver<S,I,O,T>::solver( GridHierarchy<T>& f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth )
+:	m_scheme(), m_gridop(), m_npresmooth( npresmooth ), m_npostsmooth( npostsmooth ), 
+m_smoother( smoother ), m_ilevelmin( f.levelmin() ), m_is_ini( true ), m_pf( &f )
+{ 
+	m_is_ini = true;
+}
+
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::Jacobi( T h, MeshvarBnd<T> *u, const MeshvarBnd<T>* f )
+{
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+	
+	double 
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+	
+	MeshvarBnd<T> uold(*u);
+	
+	double alpha = 0.95, ialpha = 1.0-alpha;
+	
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				(*u)(ix,iy,iz) = ialpha * uold(ix,iy,iz) + alpha * (m_scheme.rhs( uold, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+}
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::SOR( T h, MeshvarBnd<T> *u, const MeshvarBnd<T>* f )
+{
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+
+	double 
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+		
+	MeshvarBnd<T> uold(*u);
+	
+	double 
+		alpha = 1.2, 
+	//alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0, //.. ideal alpha
+		ialpha = 1.0-alpha;
+	
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				if( (ix+iy+iz)%2==0 )
+					(*u)(ix,iy,iz) = ialpha * uold(ix,iy,iz) + alpha * (m_scheme.rhs( uold, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+	
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				if( (ix+iy+iz)%2!=0 )
+					(*u)(ix,iy,iz) = ialpha * uold(ix,iy,iz) + alpha * (m_scheme.rhs( *u, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+	
+	
+}
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::GaussSeidel( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f )
+{
+	int 
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+	
+	T
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+	
+	for( int color=0; color < 2; ++color )
+		#pragma omp parallel for
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					if( (ix+iy+iz)%2 == color )
+						(*u)(ix,iy,iz) = (m_scheme.rhs( *u, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+}
+
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::twoGrid( unsigned ilevel )
+{
+	MeshvarBnd<T> *uf, *uc, *ff, *fc;
+	
+	
+	double 
+		h = 1.0/(1<<ilevel),
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+	
+	uf = m_pu->get_grid(ilevel);
+	ff = m_pf->get_grid(ilevel);	
+	
+	uc = m_pu->get_grid(ilevel-1);
+	fc = m_pf->get_grid(ilevel-1);	
+	
+	
+	int 
+		nx = uf->size(0), 
+		ny = uf->size(1), 
+		nz = uf->size(2);
+	
+	if( m_bperiodic && ilevel <= m_ilevelmin)
+		make_periodic( uf );
+	else if(!m_bperiodic)
+		setBC( ilevel );
+	
+	//... do smoothing sweeps with specified solver
+	for( unsigned i=0; i<m_npresmooth; ++i ){
+		
+		if( ilevel > m_ilevelmin )
+			interp().interp_coarse_fine(ilevel,*uc,*uf);
+		
+		if( m_smoother == opt::sm_gauss_seidel )
+			GaussSeidel( h, uf, ff );
+			
+		else if( m_smoother == opt::sm_jacobi )
+			Jacobi( h, uf, ff);		
+			
+		else if( m_smoother == opt::sm_sor )
+			SOR( h, uf, ff );
+		
+		if( m_bperiodic && ilevel <= m_ilevelmin )
+			make_periodic( uf );
+	}
+			
+	
+	m_gridop.restrict( *uf, *uc );
+	
+	//... essential!!
+	if( m_bperiodic && ilevel <= m_ilevelmin )
+		make_periodic( uc );
+	else if( ilevel > m_ilevelmin )
+		interp().interp_coarse_fine(ilevel,*uc,*uf);
+		
+	
+	//....................................................................
+	//... we now use hard-coded restriction+operatore app, see below
+	/*meshvar_bnd Lu(*uf,false);
+	Lu.zero();
+
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				Lu(ix,iy,iz) = m_scheme.apply( (*uf), ix, iy, iz )/h2;
+	
+	meshvar_bnd tLu(*uc,false);
+	
+	
+	//... restrict Lu
+	m_gridop.restrict( Lu, tLu );
+	Lu.deallocate();*/
+	//.................................................................... 
+	
+	int 
+		oxp = uf->offset(0),
+		oyp = uf->offset(1),
+		ozp = uf->offset(2);
+	
+	meshvar_bnd tLu(*uc,false);
+	#pragma omp parallel for
+	for( int ix=0; ix<nx/2; ++ix )
+	{	
+		int iix=2*ix;
+		for( int iy=0,iiy=0; iy<ny/2; ++iy,iiy+=2 )
+		
+		
+			for( int iz=0,iiz=0; iz<nz/2; ++iz,iiz+=2 )
+				tLu(ix+oxp,iy+oyp,iz+ozp) = 0.125 * (
+							 m_scheme.apply( (*uf), iix, iiy, iiz )
+							+m_scheme.apply( (*uf), iix, iiy, iiz+1 )
+							+m_scheme.apply( (*uf), iix, iiy+1, iiz )
+							+m_scheme.apply( (*uf), iix, iiy+1, iiz+1 )
+							+m_scheme.apply( (*uf), iix+1, iiy, iiz )
+							+m_scheme.apply( (*uf), iix+1, iiy, iiz+1 )
+							+m_scheme.apply( (*uf), iix+1, iiy+1, iiz )
+							+m_scheme.apply( (*uf), iix+1, iiy+1, iiz+1 )
+						)/h2;
+	}
+	
+	//... restrict source term
+	m_gridop.restrict( *ff, *fc );
+	
+	int oi, oj, ok;
+	oi = ff->offset(0);
+	oj = ff->offset(1);
+	ok = ff->offset(2);
+	
+	#pragma omp parallel for 
+	for( int ix=oi; ix<oi+(int)ff->size(0)/2; ++ix )
+		for( int iy=oj; iy<oj+(int)ff->size(1)/2; ++iy )
+			for( int iz=ok; iz<ok+(int)ff->size(2)/2; ++iz )
+				(*fc)(ix,iy,iz) += ((tLu( ix, iy, iz ) - (m_scheme.apply( *uc, ix, iy, iz )/(4.0*h2))));
+									
+	tLu.deallocate();
+	
+	meshvar_bnd ucsave(*uc,true);
+						
+	//... have we reached the end of the recursion or do we need to go up one level?
+	if( ilevel == 1 )
+		if( m_bperiodic )
+			(*uc)(0,0,0) = 0.0;
+		else 
+			(*uc)(0,0,0) = (m_scheme.rhs( (*uc), 0, 0, 0 ) + 4.0 * h2 * (*fc)(0,0,0))*c0;
+	else
+		twoGrid( ilevel-1 );
+	
+	meshvar_bnd cc(*uc,false);
+	
+		
+	//... compute correction on coarse grid
+	#pragma omp parallel for
+	for( int ix=0; ix<(int)cc.size(0); ++ix )
+		for( int iy=0; iy<(int)cc.size(1); ++iy )
+			for( int iz=0; iz<(int)cc.size(2); ++iz )
+				cc(ix,iy,iz) = (*uc)(ix,iy,iz) - ucsave(ix,iy,iz);	
+		
+	ucsave.deallocate();
+
+	if( m_bperiodic && ilevel <= m_ilevelmin )
+		make_periodic( &cc );
+
+	m_gridop.prolong_add( cc, *uf );
+	
+	//... interpolate and apply coarse-fine boundary conditions on fine level
+	if( m_bperiodic && ilevel <= m_ilevelmin )
+		make_periodic( uf );
+	else if(!m_bperiodic)
+		setBC( ilevel );
+	
+	//... do smoothing sweeps with specified solver
+	for( unsigned i=0; i<m_npostsmooth; ++i ){
+		
+		if( ilevel > m_ilevelmin )
+			interp().interp_coarse_fine(ilevel,*uc,*uf);
+
+		if( m_smoother == opt::sm_gauss_seidel )
+			GaussSeidel( h, uf, ff );
+		
+		else if( m_smoother == opt::sm_jacobi )
+			Jacobi( h, uf, ff);		
+		
+		else if( m_smoother == opt::sm_sor )
+			SOR( h, uf, ff );
+		
+		if( m_bperiodic && ilevel <= m_ilevelmin )
+			make_periodic( uf );
+
+	}
+
+}
+
+template< class S, class I, class O, typename T >
+double solver<S,I,O,T>::compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<T>& f, int ilevel )
+{
+	int 
+		nx = u.size(0), 
+		ny = u.size(1), 
+		nz = u.size(2);
+	
+	double err = 0.0, err2 = 0.0;
+	size_t count = 0;
+
+	double h = 1.0/(1ul<<ilevel), h2=h*h;
+	
+	#pragma omp parallel for reduction(+:err,count)
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+			  if( true )//fabs(unew(ix,iy,iz)) > 0.0 )//&& u(ix,iy,iz) != unew(ix,iy,iz) )
+				{
+				  //err += fabs(1.0 - (double)u(ix,iy,iz)/(double)unew(ix,iy,iz));
+				  /*err += fabs(((double)m_scheme.apply( u, ix, iy, iz )/h2 + (double)(f(ix,iy,iz)) ));
+				    err2 += fabs((double)f(ix,iy,iz));*/
+
+				  err += fabs( (double)m_scheme.apply( u, ix, iy, iz )/h2/(double)(f(ix,iy,iz)) + 1.0 );
+					++count;
+				}
+	
+	  if( count != 0 )
+	    err /= count; 
+	  
+	return err;
+}
+
+template< class S, class I, class O, typename T >
+double solver<S,I,O,T>::compute_error( const GridHierarchy<T>& uh, const GridHierarchy<T>& fh, bool verbose )
+{
+	double maxerr = 0.0;
+
+	for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel )
+	{
+		int 
+		  nx = uh.get_grid(ilevel)->size(0), 
+		  ny = uh.get_grid(ilevel)->size(1), 
+		  nz = uh.get_grid(ilevel)->size(2);
+	
+		double err = 0.0, mean_res = 0.0;
+		size_t count = 0;
+
+		double h = 1.0/(1ul<<ilevel), h2=h*h;
+	
+                #pragma omp parallel for reduction(+:err,count)
+		for( int ix=0; ix<nx; ++ix )
+		  for( int iy=0; iy<ny; ++iy )
+		    for( int iz=0; iz<nz; ++iz )
+			{
+			  double res =  (double)m_scheme.apply( *uh.get_grid(ilevel), ix, iy, iz ) + h2 * (double)((*fh.get_grid(ilevel))(ix,iy,iz));
+			  double val = (*uh.get_grid(ilevel))( ix, iy, iz );
+
+			  if( fabs(val) > 0.0 )
+			    {
+			      err += fabs( res/val );
+			      mean_res += fabs(res);
+			      ++count;
+			    }
+			}
+	
+		if( count != 0 )
+		  {
+		    err /= count; 
+		    mean_res /= count;
+		  }
+		if( verbose )
+			std::cout << "      Level " << std::setw(6) << ilevel << ",   Error = " << err << std::endl;
+
+		LOGDEBUG("[mg]      level %3d,  residual %g,  rel. error %g",ilevel, mean_res, err);
+		
+		maxerr = std::max(maxerr,err);
+		
+	}
+	return maxerr;
+}
+
+template< class S, class I, class O, typename T >
+double solver<S,I,O,T>::compute_RMS_resid( const GridHierarchy<T>& uh, const GridHierarchy<T>& fh, bool verbose )
+{
+	if( m_is_ini )
+		m_residu_ini.assign( uh.levelmax()+1, 0.0 );
+	
+	double maxerr=0.0;
+	
+	for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel )
+	{
+		int 
+		nx = uh.get_grid(ilevel)->size(0), 
+		ny = uh.get_grid(ilevel)->size(1), 
+		nz = uh.get_grid(ilevel)->size(2);
+		
+		double h = 1.0/(1<<ilevel), h2=h*h;
+		double sum = 0.0, sumd2 = 0.0;
+		size_t count = 0;
+		
+		#pragma omp parallel for reduction(+:sum,sumd2,count)
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+				{
+					double d = (double)(*fh.get_grid(ilevel))(ix,iy,iz);
+					sumd2 += d*d;
+					
+					double r = ((double)m_scheme.apply( *uh.get_grid(ilevel), ix, iy, iz )/h2 + (double)(*fh.get_grid(ilevel))(ix,iy,iz));
+					sum += r*r;
+
+					++count;
+				}
+		
+		if( m_is_ini )
+			m_residu_ini[ilevel] =  sqrt(sum)/count;
+		
+		double err_abs = sqrt(sum/count);
+		double err_rel = err_abs / sqrt(sumd2/count);
+		
+		if( verbose && !m_is_ini )
+			std::cout << "      Level " << std::setw(6) << ilevel << ",   Error = " << err_rel << std::endl;		
+		
+		LOGDEBUG("[mg]      level %3d,  rms residual %g,  rel. error %g",ilevel, err_abs, err_rel);
+		
+		if( err_rel > maxerr )
+			maxerr = err_rel;
+		
+	}
+	
+	if( m_is_ini )
+		m_is_ini = false;
+	
+	return maxerr;
+}
+
+
+template< class S, class I, class O, typename T >
+double solver<S,I,O,T>::solve( GridHierarchy<T>& uh, double acc, double h, bool verbose )
+{
+
+	double err, maxerr = 1e30;
+	unsigned niter = 0;
+	
+	bool fullverbose = false;
+	
+	m_pu = &uh;
+	
+	//err = compute_RMS_resid( *m_pu, *m_pf, fullverbose );
+	
+	//... iterate ...//
+	while (true)
+	{
+		
+		LOGUSER("Performing multi-grid V-cycle...");
+		twoGrid( uh.levelmax() );
+		
+		//err = compute_RMS_resid( *m_pu, *m_pf, fullverbose );
+		err = compute_error( *m_pu, *m_pf, fullverbose );
+		++niter;
+		
+		if( fullverbose ){
+			LOGUSER("  multigrid iteration %3d, maximum RMS residual = %g", niter, err );
+			std::cout << "   - Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl;
+			std::cout << "     ---------------------------------------------------\n";
+		}
+		
+		if( err < maxerr )
+			maxerr = err;
+			
+		if( (niter > 1) && ((err < acc) || (niter > 20)) )
+			break;
+	}		
+	
+	if( err > acc )
+	{	
+		std::cout << "Error : no convergence in Poisson solver" << std::endl;
+		LOGERR("No convergence in Poisson solver, final error: %g.",err);
+	}
+	else if( verbose )
+	{	
+		std::cout << " - Converged in " << niter << " steps to " << maxerr << std::endl;
+		LOGUSER("Poisson solver converged to max. error of %g in %d steps.",err,niter);
+	}
+
+	
+	//.. make sure that the RHS does not contain the FAS corrections any more
+	for( int i=m_pf->levelmax(); i>0; --i )
+		m_gridop.restrict( *m_pf->get_grid(i), *m_pf->get_grid(i-1) );
+	
+	
+	return err;
+}
+
+
+
+//TODO: this only works for 2nd order! (but actually not needed)
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::setBC( unsigned ilevel )
+{
+	//... set only on level before additional refinement starts
+	if( ilevel == m_ilevelmin )
+	{
+		MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
+		int
+			nx = u->size(0), 
+			ny = u->size(1), 
+			nz = u->size(2);
+			
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+			{
+				(*u)(-1,iy,iz) = 2.0*(*m_pubnd)(-1,iy,iz) - (*u)(0,iy,iz);
+				(*u)(nx,iy,iz) = 2.0*(*m_pubnd)(nx,iy,iz) - (*u)(nx-1,iy,iz);;
+			}
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iz=0; iz<nz; ++iz )
+			{
+				(*u)(ix,-1,iz) = 2.0*(*m_pubnd)(ix,-1,iz) - (*u)(ix,0,iz);
+				(*u)(ix,ny,iz) = 2.0*(*m_pubnd)(ix,ny,iz) - (*u)(ix,ny-1,iz);
+			}
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+			{
+				(*u)(ix,iy,-1) = 2.0*(*m_pubnd)(ix,iy,-1) - (*u)(ix,iy,0);
+				(*u)(ix,iy,nz) = 2.0*(*m_pubnd)(ix,iy,nz) - (*u)(ix,iy,nz-1);
+			}		
+		
+		
+		
+	}
+}
+
+
+
+//... enforce periodic boundary conditions
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::make_periodic( MeshvarBnd<T> *u )
+{
+	
+
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+	int nb = u->m_nbnd;
+	
+		
+	//if( u->offset(0) == 0 )
+		for( int iy=-nb; iy<ny+nb; ++iy )
+			for( int iz=-nb; iz<nz+nb; ++iz )
+			{
+				int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz );
+				
+				for( int i=-nb; i<0; ++i )
+				{
+					(*u)(i,iy,iz) = (*u)(nx+i,iiy,iiz);
+					(*u)(nx-1-i,iy,iz) = (*u)(-1-i,iiy,iiz);	
+				}
+				
+			}
+	
+	//if( u->offset(1) == 0 )
+		for( int ix=-nb; ix<nx+nb; ++ix )
+			for( int iz=-nb; iz<nz+nb; ++iz )
+			{
+				int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz );
+				
+				for( int i=-nb; i<0; ++i )
+				{
+					(*u)(ix,i,iz) = (*u)(iix,ny+i,iiz);
+					(*u)(ix,ny-1-i,iz) = (*u)(iix,-1-i,iiz);
+				}
+			}
+	
+	//if( u->offset(2) == 0 )
+		for( int ix=-nb; ix<nx+nb; ++ix )
+			for( int iy=-nb; iy<ny+nb; ++iy )
+			{
+				int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny );
+				
+				for( int i=-nb; i<0; ++i )
+				{
+					(*u)(ix,iy,i) = (*u)(iix,iiy,nz+i);
+					(*u)(ix,iy,nz-1-i) = (*u)(iix,iiy,-1-i);
+				}
+			}
+	
+}
+
+
+END_MULTIGRID_NAMESPACE
+ 
+#endif
--- a/output.cc
+++ b/output.cc
@ -0,0 +1,60 @@
+/*
+ 
+ output.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+#include "output.hh"
+
+
+std::map< std::string, output_plugin_creator *>& 
+get_output_plugin_map()
+{
+	static std::map< std::string, output_plugin_creator* > output_plugin_map;
+	return output_plugin_map;
+}
+
+void print_output_plugins()
+{
+	std::map< std::string, output_plugin_creator *>& m = get_output_plugin_map();
+	
+	std::map< std::string, output_plugin_creator *>::iterator it;
+	it = m.begin();
+	std::cout << " - Available output plug-ins:\n";
+	while( it!=m.end() )
+	{
+		if( (*it).second )
+			std::cout << "\t\'" << (*it).first << "\'\n";
+		++it;
+	}
+		
+}
+
+output_plugin *select_output_plugin( config_file& cf )
+{
+	std::string formatname = cf.getValue<std::string>( "output", "format" );
+	
+	output_plugin_creator *the_output_plugin_creator 
+	= get_output_plugin_map()[ formatname ];
+	
+	if( !the_output_plugin_creator )
+	{	
+		std::cerr << " - Error: output plug-in \'" << formatname << "\' not found." << std::endl;
+		print_output_plugins();
+		throw std::runtime_error("Unknown output plug-in");
+		
+	}else
+		std::cout << " - Selecting output plug-in \'" << formatname << "\'..." << std::endl;
+	
+	output_plugin *the_output_plugin 
+	= the_output_plugin_creator->create( cf );
+	
+	return the_output_plugin;
+}
+
+
+
--- a/output.hh
+++ b/output.hh
@ -0,0 +1,164 @@
+/*
+ 
+ output.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+#ifndef __OUTPUT_HH
+#define __OUTPUT_HH
+
+#include <string>
+#include <map>
+
+#include "general.hh"
+#include "mesh.hh"
+
+
+/*!
+ * @class output_plugin
+ * @brief abstract base class for output plug-ins
+ *
+ * This class provides the abstract base class for all output plug-ins.
+ * All output plug-ins need to derive from it and implement the purely
+ * virtual member functions.
+ */
+class output_plugin
+{
+protected:
+	
+	//! reference to the config_file object that holds all configuration options
+	config_file& cf_;
+	
+	//! output file or directory name
+	std::string fname_;
+	
+	//! minimum refinement level
+	unsigned levelmin_;
+	
+	//! maximum refinement level
+	unsigned levelmax_;
+	
+	std::vector<unsigned> 
+		offx_,		//!< vector describing the x-offset of each level
+		offy_,		//!< vector describing the y-offset of each level
+		offz_,		//!< vector describing the z-offset of each level
+		sizex_,		//!< vector describing the extent in x of each level
+		sizey_,		//!< vector describing the extent in y of each level
+		sizez_;		//!< vector describing the extent in z of each level
+	
+	//! quick access function to query properties of the refinement grid from the configuration options
+	/*! @param name	name of the config property
+	 *  @param icomp component index (0=x, 1=y, 2=z)
+	 *  @param oit output iterator (e.g. std::back_inserter for vectors)
+	 */
+	template< typename output_iterator >
+	void query_grid_prop( std::string name, int icomp, output_iterator oit )
+	{
+		char str[128];
+		for( unsigned i=levelmin_; i<=levelmax_; ++i )
+		{
+			sprintf( str, "%s(%u,%d)", name.c_str(), i, icomp );
+			*oit = cf_.getValue<unsigned>( "setup", str );
+			++oit;
+		}
+	}
+	
+public:
+	
+	//! constructor
+	explicit output_plugin( config_file& cf )
+	: cf_(cf)
+	{ 
+		fname_		= cf.getValue<std::string>("output","filename");
+		levelmin_	= cf.getValue<unsigned>( "setup", "levelmin" );
+		levelmax_	= cf.getValue<unsigned>( "setup", "levelmax" );
+
+		query_grid_prop( "offset", 0, std::back_inserter(offx_) );
+		query_grid_prop( "offset", 1, std::back_inserter(offy_) );
+		query_grid_prop( "offset", 2, std::back_inserter(offz_) );
+		
+		query_grid_prop( "size", 0, std::back_inserter(sizex_) );
+		query_grid_prop( "size", 1, std::back_inserter(sizey_) );
+		query_grid_prop( "size", 2, std::back_inserter(sizez_) );
+	}
+	
+	//! destructor
+	virtual ~output_plugin()
+	{ }
+	
+	//! purely virtual prototype to write the masses for each dark matter particle
+	virtual void write_dm_mass( const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write the dark matter density field 
+	virtual void write_dm_density( const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write the dark matter gravitational potential (from which displacements are computed in 1LPT)
+	virtual void write_dm_potential( const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write dark matter particle velocities
+	virtual void write_dm_velocity( int coord, const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write dark matter particle positions
+	virtual void write_dm_position( int coord, const grid_hierarchy& gh ) = 0;
+
+	//! purely virtual prototype to write the baryon velocities
+	virtual void write_gas_velocity( int coord, const grid_hierarchy& gh ) = 0;
+
+	//! purely virtual prototype to write the baryon coordinates
+	virtual void write_gas_position( int coord, const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write the baryon density field
+	virtual void write_gas_density( const grid_hierarchy& gh )  = 0;
+	
+	//! purely virtual prototype to write the baryon gravitational potential (from which displacements are computed in 1LPT)
+	virtual void write_gas_potential( const grid_hierarchy& gh )  = 0;
+	
+	//! purely virtual prototype for all things to be done at the very end
+	virtual void finalize( void ) = 0;
+};
+
+/*!
+ * @brief implements abstract factory design pattern for output plug-ins
+ */
+struct output_plugin_creator
+{
+	//! create an instance of a plug-in
+	virtual output_plugin * create( config_file& cf ) const = 0;
+	
+	//! destroy an instance of a plug-in
+	virtual ~output_plugin_creator() { }
+};
+
+//! maps the name of a plug-in to a pointer of the factory pattern 
+std::map< std::string, output_plugin_creator *>& get_output_plugin_map();
+
+//! print a list of all registered output plug-ins
+void print_output_plugins();
+
+/*!
+ * @brief concrete factory pattern for output plug-ins
+ */
+template< class Derived >
+struct output_plugin_creator_concrete : public output_plugin_creator
+{
+	//! register the plug-in by its name
+	output_plugin_creator_concrete( const std::string& plugin_name )
+	{
+		get_output_plugin_map()[ plugin_name ] = this;
+	}
+	
+	//! create an instance of the plug-in
+	output_plugin * create( config_file& cf ) const
+	{
+		return new Derived( cf );
+	}
+};
+
+//! failsafe version to select the output plug-in
+output_plugin *select_output_plugin( config_file& cf );
+
+#endif // __OUTPUT_HH
--- a/plugins/HDF_IO.hh
+++ b/plugins/HDF_IO.hh
--- a/plugins/convex_hull.hh
+++ b/plugins/convex_hull.hh
@ -0,0 +1,318 @@
+#ifndef CONVEX_HULL_HH
+#define CONVEX_HULL_HH
+
+#include <vector>
+#include <set>
+#include <cmath>
+
+#include <omp.h>
+
+#include "log.hh"
+
+/***** Slow but short convex hull Implementation ******/
+/* 
+ * Finds the convex hull of a set of data points.
+ * Very simple implementation using the O(nh) gift-wrapping algorithm
+ * Does not properly take care of degeneracies or round-off problems,
+ * in that sense, only 'approximate' convex hull.
+ *
+ * adapted and expanded from the 'ch3quad' code by Timothy Chan 
+ * (https://cs.uwaterloo.ca/~tmchan)
+ */
+
+template< typename real_t >
+struct convex_hull{
+    typedef const real_t *cpr_;
+    
+    size_t npoints_;
+    std::vector<int> faceidx_L_, faceidx_U_;
+    std::vector<real_t> normals_L_, normals_U_;
+    std::vector<real_t> x0_L_, x0_U_;
+    real_t centroid_[3], volume_;
+    real_t left_[3], right_[3];
+    
+    inline double turn( cpr_ p, cpr_ q, cpr_ r ) const
+    {   return (q[0]-p[0])*(r[1]-p[1]) - (r[0]-p[0])*(q[1]-p[1]);   }
+    
+    template< bool islower >
+    inline double orient( cpr_ p, cpr_ q, cpr_ r, cpr_ s ) const
+    {
+        if( islower )
+            return (q[2]-p[2])*turn(p,r,s) - (r[2]-p[2])*turn(p,q,s) + (s[2]-p[2])*turn(p,q,r);
+        
+        return (p[2]-q[2])*turn(p,r,s) - (p[2]-r[2])*turn(p,q,s) + (p[2]-s[2])*turn(p,q,r);
+    }
+    
+    inline real_t dot( cpr_ x, cpr_ y ) const 
+    {
+        return x[0]*y[0]+x[1]*y[1]+x[2]*y[2];
+    }
+    
+    inline real_t det3x3( cpr_ a ) const 
+    {
+        return (a[0]*(a[4]*a[8]-a[7]*a[5])
+                + a[1]*(a[5]*a[6]-a[8]*a[3])
+                + a[2]*(a[3]*a[7]-a[6]*a[4]));
+    }
+    
+    void compute_face_normals( cpr_ points )
+    {
+        normals_L_.assign( faceidx_L_.size(), 0.0 );
+        normals_U_.assign( faceidx_U_.size(), 0.0 );
+        x0_L_.assign( faceidx_L_.size(), 0.0 );
+        x0_U_.assign( faceidx_U_.size(), 0.0 );
+        
+        #pragma omp parallel for
+        for( int i=0; i<(int)faceidx_L_.size()/3; ++i )
+        {
+            real_t d1[3], d2[3];
+            for( int j=0; j<3; ++j )
+            {
+                x0_L_[3*i+j] = points[ 3*faceidx_L_[3*i+0] + j ];
+                d1[j] = points[ 3*faceidx_L_[3*i+1] + j ] - points[ 3*faceidx_L_[3*i+0] + j ];
+                d2[j] = points[ 3*faceidx_L_[3*i+2] + j ] - points[ 3*faceidx_L_[3*i+0] + j ];
+            }
+            
+            normals_L_[3*i+0] = d1[1]*d2[2] - d1[2]*d2[1];
+            normals_L_[3*i+1] = d1[2]*d2[0] - d1[0]*d2[2];
+            normals_L_[3*i+2] = d1[0]*d2[1] - d1[1]*d2[0];
+            
+            // negative sign for lower hull
+            double norm_n = -sqrt(normals_L_[3*i+0]*normals_L_[3*i+0]+
+                                  normals_L_[3*i+1]*normals_L_[3*i+1]+
+                                  normals_L_[3*i+2]*normals_L_[3*i+2]);
+            
+            normals_L_[3*i+0] /= norm_n;
+            normals_L_[3*i+1] /= norm_n;
+            normals_L_[3*i+2] /= norm_n;
+            
+            
+        }
+        
+        #pragma omp parallel for
+        for( int i=0; i<(int)faceidx_U_.size()/3; ++i )
+        {
+            real_t d1[3], d2[3];
+            for( int j=0; j<3; ++j )
+            {
+                x0_U_[3*i+j] = points[ 3*faceidx_U_[3*i+0] + j ];
+                d1[j] = points[ 3*faceidx_U_[3*i+1] + j ] - points[ 3*faceidx_U_[3*i+0] + j ];
+                d2[j] = points[ 3*faceidx_U_[3*i+2] + j ] - points[ 3*faceidx_U_[3*i+0] + j ];
+            }
+            
+            normals_U_[3*i+0] = d1[1]*d2[2] - d1[2]*d2[1];
+            normals_U_[3*i+1] = d1[2]*d2[0] - d1[0]*d2[2];
+            normals_U_[3*i+2] = d1[0]*d2[1] - d1[1]*d2[0];
+            
+            double norm_n = sqrt(normals_U_[3*i+0]*normals_U_[3*i+0]+
+                                 normals_U_[3*i+1]*normals_U_[3*i+1]+
+                                 normals_U_[3*i+2]*normals_U_[3*i+2]);
+            
+            normals_U_[3*i+0] /= norm_n;
+            normals_U_[3*i+1] /= norm_n;
+            normals_U_[3*i+2] /= norm_n;
+        }
+    }
+    
+    void compute_center( cpr_ points )
+    {
+        real_t xc[3] = {0.0,0.0,0.0};
+        real_t xcp[3] = {0.0,0.0,0.0};
+        
+        real_t totvol = 0.0;
+        
+        for( size_t i=0; i<3*npoints_; ++i )
+            xc[i%3] += points[i];
+        
+        xc[0] /= npoints_;
+        xc[1] /= npoints_;
+        xc[2] /= npoints_;
+        
+        
+        for( size_t i=0; i<faceidx_L_.size()/3; ++i )
+        {
+            real_t a[9];
+            real_t xct[3] = {xc[0],xc[1],xc[2]};
+            
+            for( size_t j=0; j<3; ++j )
+            {
+                for( size_t k=0; k<3; ++k )
+                {
+                    xct[k] += points[3*faceidx_L_[3*i+j]+k];
+                    a[3*j+k] = points[3*faceidx_L_[3*i+j]+k]-xc[k];
+                }
+            }
+            
+            xct[0] *= 0.25;
+            xct[1] *= 0.25;
+            xct[2] *= 0.25;
+            
+            real_t vol = fabs(det3x3(a))/6.0;
+            
+            totvol += vol;
+            xcp[0] += vol * xct[0];
+            xcp[1] += vol * xct[1];
+            xcp[2] += vol * xct[2];
+        }
+        
+        for( size_t i=0; i<faceidx_U_.size()/3; ++i )
+        {
+            real_t a[9];
+            real_t xct[3] = {xc[0],xc[1],xc[2]};
+            
+            for( size_t j=0; j<3; ++j )
+            {
+                for( size_t k=0; k<3; ++k )
+                {
+                    xct[k] += points[3*faceidx_U_[3*i+j]+k];
+                    a[3*j+k] = points[3*faceidx_U_[3*i+j]+k]-xc[k];
+                }
+            }
+            
+            xct[0] *= 0.25;
+            xct[1] *= 0.25;
+            xct[2] *= 0.25;
+            
+            real_t vol = fabs(det3x3(a))/6.0;
+            
+            totvol += vol;
+            xcp[0] += vol * xct[0];
+            xcp[1] += vol * xct[1];
+            xcp[2] += vol * xct[2];
+        }
+        
+        
+        volume_ = totvol;
+        centroid_[0] = xcp[0] / totvol;
+        centroid_[1] = xcp[1] / totvol;
+        centroid_[2] = xcp[2] / totvol;
+    }
+    
+    template< bool islower >
+    void wrap( cpr_ points, int i, int j, std::vector<int>& idx )
+    {
+        int k,l,m;
+        int h = (int)idx.size()/3;
+        
+        for( m=0; m<h; ++m )
+            if( ( idx[3*m+0]==i && idx[3*m+1]==j ) ||
+               ( idx[3*m+1]==i && idx[3*m+2]==j ) ||
+               ( idx[3*m+2]==i && idx[3*m+0]==j ) )
+                return;
+        
+        for( k=i, l=0; l < (int)npoints_; ++l )
+            if( turn(&points[3*i],&points[3*j],&points[3*l]) < 0 &&
+               orient<islower>(&points[3*i],&points[3*j],&points[3*k],&points[3*l]) >=0 )
+                k = l;
+        
+        if( turn(&points[3*i],&points[3*j],&points[3*k]) >= 0 ) return;
+        
+        idx.push_back( i );
+        idx.push_back( j );
+        idx.push_back( k );
+        
+        wrap<islower>( points, k, j, idx );
+        wrap<islower>( points, i, k, idx );
+    }
+    
+    template< typename T >
+    bool check_point( const T* x ) const
+    {
+        for( size_t i=0; i<normals_L_.size()/3; ++i )
+        {
+            double xp[3] = {x[0]-x0_L_[3*i+0],x[1]-x0_L_[3*i+1],x[2]-x0_L_[3*i+2]};
+            if( dot( xp, &normals_L_[3*i])<0.0 ) return false;
+        }
+        
+        for( size_t i=0; i<normals_U_.size()/3; ++i )
+        {
+            double xp[3] = {x[0]-x0_U_[3*i+0],x[1]-x0_U_[3*i+1],x[2]-x0_U_[3*i+2]};
+            if( dot( xp, &normals_U_[3*i])<0.0 ) return false;
+        }
+        
+        return true;
+    }
+    
+    void expand_vector_from_centroid( real_t *v, double dr  )
+    {
+        double dx[3], d = 0.0;
+        for( int i=0; i<3; ++i )
+        {
+            dx[i] = v[i]-centroid_[i];
+            d += dx[i]*dx[i];
+        }
+        d = sqrt(d);
+        for( int i=0; i<3; ++i )
+            v[i] += dr * dx[i];
+    }
+    
+    void expand( real_t dr )
+    {
+        for( size_t i=0; i<normals_L_.size(); i+=3 )
+            expand_vector_from_centroid( &x0_L_[3*i], dr );
+        for( size_t i=0; i<normals_U_.size(); i+=3 )
+            expand_vector_from_centroid( &x0_U_[3*i], dr );
+        
+        expand_vector_from_centroid( left_, dr );
+        expand_vector_from_centroid( right_, dr );
+        
+    }
+    
+    void get_defining_indices( std::set<int>& unique ) const
+    {
+        unique.clear();
+        
+        for( size_t i=0; i<faceidx_L_.size(); ++i )
+            unique.insert( faceidx_L_[i] );
+        for( size_t i=0; i<faceidx_U_.size(); ++i )
+            unique.insert( faceidx_U_[i] );
+    }
+    
+    convex_hull( cpr_ points, size_t npoints )
+    : npoints_( npoints )
+    {
+        faceidx_L_.reserve(npoints_*3);
+        faceidx_U_.reserve(npoints_*3);
+        
+        size_t i,j,l;
+        for( i=0, l=1; l<npoints_; ++l )
+            if( points[3*i] > points[3*l] ) i=l;
+        for( j=i, l=0; l<npoints_; ++l )
+            if( i!=l && turn(&points[3*i],&points[3*j],&points[3*l]) >= 0 ) j=l;
+        
+        int nt = omp_get_num_threads();
+        omp_set_num_threads( std::min(2,omp_get_max_threads()) );
+        
+        #pragma omp parallel for
+        for( int thread=0; thread<2; ++thread )
+        {
+            if( thread==0 )
+                wrap<true>( points, i, j, faceidx_L_ );
+            if( thread==1 )
+                wrap<false>( points, i, j, faceidx_U_ );
+        }
+        
+        omp_set_num_threads(nt);
+        
+        compute_face_normals( points );
+        compute_center( points );
+        
+        // finally compute AABB
+        left_[0] = left_[1] = left_[2] = 1e30;
+        right_[0] = right_[1] = right_[2] = -1e30;
+        
+        for( size_t q=0; q<npoints_; ++q )
+            for( size_t p=0; p<3; ++p )
+            {
+                if( points[3*q+p] > right_[p] )
+                    right_[p] = points[3*q+p];
+                if( points[3*q+p] < left_[p] )
+                    left_[p] = points[3*q+p];
+            }
+        
+    }
+    
+};
+
+
+#endif // CONVEX_HULL_HH
--- a/plugins/output_arepo.cc
+++ b/plugins/output_arepo.cc
@ -0,0 +1,655 @@
+/*
+ * output_arepo.cc - This file is part of MUSIC -
+ * a code to generate multi-scale initial conditions 
+ * for cosmological simulations
+ * 
+ * Copyright (C) 2010  Oliver Hahn
+ * 
+ * Plugin: Dylan Nelson (dnelson@cfa.harvard.edu)
+ */
+ 
+#ifdef HAVE_HDF5
+
+#define GAS_PARTTYPE 0
+#define HIGHRES_DM_PARTTYPE 1
+#define COARSE_DM_DEFAULT_PARTTYPE 2
+#define STAR_PARTTYPE 4
+#define NTYPES 6
+
+#include <sstream>
+#include <string>
+#include <algorithm>
+#include "output.hh"
+#include "HDF_IO.hh"
+
+class arepo_output_plugin : public output_plugin
+{ 
+protected:
+	
+	// header/config
+	std::vector<int> nPart;
+	std::vector<int> nPartTotal;
+	std::vector<double> massTable;
+	double time, redshift, boxSize;
+	int numFiles, doublePrec;
+	
+	double omega0, omega_L, hubbleParam;
+	
+	// configuration
+	double UnitLength_in_cm, UnitMass_in_g, UnitVelocity_in_cm_per_s;
+	double omega_b, rhoCrit;
+	double posFac, velFac;
+	int coarsePartType, nPartTotAllTypes;
+	bool doBaryons, useLongIDs;
+	
+	size_t npfine, npart, npcoarse;
+	std::vector<size_t> levelcounts;
+	
+	// parameter file hints
+	int pmgrid, gridboost;
+	float softening, Tini;
+	
+	using output_plugin::cf_;
+	
+	// Nx1 vector (e.g. masses,particleids)
+	template< typename T >
+	void writeHDF5_a( std::string fieldName, int partTypeNum, const std::vector<T> &data )
+	{
+    hid_t HDF_FileID, HDF_GroupID, HDF_DatasetID, HDF_DataspaceID, HDF_Type;
+    hsize_t HDF_Dims;
+		
+		std::stringstream GrpName;
+    GrpName << "PartType" << partTypeNum;
+		
+    HDF_FileID = H5Fopen( fname_.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+    HDF_GroupID = H5Gopen( HDF_FileID, GrpName.str().c_str() );
+
+    HDF_Type         = GetDataType<T>();
+    HDF_Dims         = data.size();
+    HDF_DataspaceID  = H5Screate_simple(1, &HDF_Dims, NULL);
+    HDF_DatasetID    = H5Dcreate( HDF_GroupID, fieldName.c_str(), HDF_Type, HDF_DataspaceID, H5P_DEFAULT );
+		
+		// write and close
+    H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data[0] );
+		
+    H5Dclose( HDF_DatasetID );
+    H5Sclose( HDF_DataspaceID );
+
+    H5Gclose( HDF_GroupID );
+    H5Fclose( HDF_FileID );
+	}
+	
+	// Nx3 vector (e.g. positions,velocities), where coord = index of the second dimension (writen one at a time)
+	void writeHDF5_b( std::string fieldName, int coord, int partTypeNum, std::vector<float> &data, bool readFlag = false )
+	{
+    hid_t HDF_FileID, HDF_GroupID, HDF_DatasetID, HDF_DataspaceID, HDF_Type;
+    hsize_t HDF_Dims[2], HDF_DimsMem[2];
+		
+		std::stringstream GrpName;
+    GrpName << "PartType" << partTypeNum;
+
+    HDF_FileID = H5Fopen( fname_.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+    HDF_GroupID = H5Gopen( HDF_FileID, GrpName.str().c_str() );
+
+    HDF_Type         = GetDataType<float>();
+    HDF_Dims[0]      = data.size();
+		HDF_Dims[1]      = 3;
+		
+		// if dataset does not yet exist, create it (on first coord call)	
+		if( !(H5Lexists(HDF_GroupID, fieldName.c_str(), H5P_DEFAULT)) )
+		{
+		  HDF_DataspaceID = H5Screate_simple(2, HDF_Dims, NULL);
+      HDF_DatasetID = H5Dcreate( HDF_GroupID, fieldName.c_str(), HDF_Type, HDF_DataspaceID, H5P_DEFAULT );
+			
+			H5Sclose( HDF_DataspaceID );
+			H5Dclose( HDF_DatasetID );
+		}
+		
+		// make memory space (just indicates the size/shape of data)
+		HDF_DimsMem[0] = HDF_Dims[0];
+		HDF_DimsMem[1] = 1;
+		hid_t HDF_MemoryspaceID = H5Screate_simple(2, HDF_DimsMem, NULL);
+		
+		// open hyperslab
+		hsize_t count[2]={1,1}, stride[2]={1,1}, offset[2]={0,0};
+		
+		offset[1] = coord;       // set where in the second dimension to write
+		count[0]  = HDF_Dims[0]; // set size in the first dimension (num particles of this type)
+		
+		HDF_DatasetID   = H5Dopen(HDF_GroupID, fieldName.c_str());
+		HDF_DataspaceID = H5Dget_space(HDF_DatasetID);
+		
+		H5Sselect_hyperslab(HDF_DataspaceID, H5S_SELECT_SET, offset, stride, count, NULL /*, HDF_Dims*/); //HDF_DimsMem
+		
+		// write (or read) and close
+		if( readFlag )
+			H5Dread( HDF_DatasetID, HDF_Type, HDF_MemoryspaceID, HDF_DataspaceID, H5P_DEFAULT, &data[0] );
+		else
+			H5Dwrite( HDF_DatasetID, HDF_Type, HDF_MemoryspaceID, HDF_DataspaceID, H5P_DEFAULT, &data[0] );
+		
+    H5Dclose( HDF_DatasetID );
+    H5Gclose( HDF_GroupID );
+    H5Fclose( HDF_FileID );
+	}
+	
+	// called from finalize()
+  void generateAndWriteIDs( void )
+  {
+		long long offset = 0;
+		nPartTotAllTypes = 0;
+		
+		for( size_t i=0; i < nPartTotal.size(); i++ )
+		{
+			if( !nPartTotal[i] )
+				continue;
+				
+			nPartTotAllTypes += nPartTotal[i];
+				
+		  if( !useLongIDs ) 
+			{
+				std::vector<int> ids = std::vector<int>(nPartTotal[i]);
+				for( int j=0; j < nPartTotal[i]; j++ )
+					ids[j] = offset + j;
+					
+				writeHDF5_a( "ParticleIDs", i, ids );
+			}
+			else
+			{
+		    std::vector<long long> ids = std::vector<long long>(nPartTotal[i]);
+				for( long long j=0; j < nPartTotal[i]; j++ )
+					ids[j] = offset + j;
+					
+				writeHDF5_a( "ParticleIDs", i, ids );
+			}
+			
+			// make IDs of all particle types sequential (unique) = unnecessary, but consistent with gadget output format
+			offset += nPartTotal[i];
+		}
+	}
+	
+	void countLeafCells( const grid_hierarchy& gh )
+	{
+		npfine = 0; npart = 0; npcoarse = 0;
+		
+		npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax());
+		npart = gh.count_leaf_cells(gh.levelmin(), gh.levelmax());
+		
+		if( levelmax_ != levelmin_ ) // multimass
+			npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax()-1);
+	}
+
+public:
+	arepo_output_plugin( config_file& cf ) : output_plugin( cf )
+	{
+		// ensure that everyone knows we want to do SPH, implies: bsph=1, bbshift=1, decic_baryons=1
+		// -> instead of just writing gas densities (which are here ignored), the gas displacements are also written
+		cf.insertValue("setup","do_SPH","yes");
+		
+		// init header and config parameters
+		nPart      = std::vector<int>(NTYPES,0);
+		nPartTotal = std::vector<int>(NTYPES,0);
+		massTable  = std::vector<double>(NTYPES,0.0);
+		
+		coarsePartType   = cf.getValueSafe<unsigned>("output","arepo_coarsetype",COARSE_DM_DEFAULT_PARTTYPE);
+		UnitLength_in_cm = cf.getValueSafe<double>("output","arepo_unitlength",3.085678e21); // 1.0 kpc
+		UnitMass_in_g    = cf.getValueSafe<double>("output","arepo_unitmass",1.989e43); // 1.0e10 solar masses
+		UnitVelocity_in_cm_per_s = cf.getValueSafe<double>("output","arepo_unitvel",1e5); // 1 km/sec
+		
+	  omega0     = cf.getValue<double>("cosmology","Omega_m");
+		omega_b    = cf.getValue<double>("cosmology","Omega_b");
+		omega_L    = cf.getValue<double>("cosmology","Omega_L");
+		redshift   = cf.getValue<double>("setup","zstart");
+		boxSize    = cf.getValue<double>("setup","boxlength");
+		doBaryons  = cf.getValueSafe<bool>("setup","baryons",false);
+		useLongIDs = cf.getValueSafe<bool>("output","arepo_longids",false);
+		numFiles   = cf.getValueSafe<unsigned>("output","arepo_num_files",1);
+		doublePrec = cf.getValueSafe<int>("output","arepo_doubleprec",0);
+		
+		if( numFiles != 1 )
+      throw std::runtime_error("Error: arepo_num_files>1 not yet supported.");
+		if( doublePrec )
+			throw std::runtime_error("Error: arepo_doubleprec not yet supported.");
+		
+		// factors which multiply positions and velocities
+		time   = 1.0/(1.0+redshift);
+		posFac = 3.085678e24 / UnitLength_in_cm; // MUSIC uses Mpc internally, i.e. posFac=1e3 for kpc output
+		velFac = ( 1.0f / sqrt(time) ) * boxSize; // TODO: should be normalized by posFac?
+		
+		// critical density
+		rhoCrit = 27.7519737e-9; // in h^2 1e10 M_sol / kpc^3
+		rhoCrit *= pow(UnitLength_in_cm/3.085678e21, 3.0);
+		rhoCrit *= (1.989e43/UnitMass_in_g);
+		
+		// calculate PMGRID suggestion
+		pmgrid = pow(2,levelmin_) * 2; // unigrid
+		gridboost = 1;
+		
+		if( levelmin_ != levelmax_ )
+		{
+			double lxref[3];
+			double pmgrid_new;
+			
+			std::string temp = cf.getValue<std::string>( "setup", "ref_extent" );
+			std::remove_if(temp.begin(),temp.end(),isspace);
+			sscanf( temp.c_str(), "%lf,%lf,%lf", &lxref[0],&lxref[1],&lxref[2] );
+			
+			// fraction box length of the zoom region
+			lxref[0] = pow( (lxref[0]*lxref[1]*lxref[2]),0.333 );
+			
+			pmgrid_new = pow(2,levelmax_) * 2; // to cover entire box at highest resolution
+			pmgrid_new *= lxref[0]; // only need to cover a fraction
+			
+			if( (gridboost=round(pmgrid_new/pmgrid)) > 1 )
+				gridboost = pow(2, ceil(log(gridboost)/log(2.0))); // round to nearest, higher power of 2
+		}
+		
+		// calculate Tini for gas
+		hubbleParam = cf.getValue<double>("cosmology","H0")/100.0;
+		
+		double astart = 1.0/(1.0+redshift);
+		double h2     = hubbleParam*hubbleParam;
+		double adec   = 1.0/( 160.0*pow(omega_b*h2/0.022,2.0/5.0) );
+		double Tcmb0  = 2.726;
+		
+		Tini = astart<adec? Tcmb0/astart : Tcmb0/astart/astart*adec;
+		
+		// calculate softening suggestion
+		softening = (boxSize * posFac) / pow(2,levelmax_) / 40.0;
+		
+		// header and sanity checks
+		if ( !doBaryons )
+			massTable[HIGHRES_DM_PARTTYPE] = omega0 * rhoCrit * pow(boxSize*posFac,3.0)/pow(2,3*levelmax_);
+		else
+			massTable[HIGHRES_DM_PARTTYPE] = (omega0-omega_b) * rhoCrit * pow(boxSize*posFac,3.0)/pow(2,3*levelmax_);
+		
+		if ( coarsePartType == GAS_PARTTYPE || coarsePartType == HIGHRES_DM_PARTTYPE)
+      throw std::runtime_error("Error: Specified illegal Arepo particle type for coarse particles.");
+		if ( coarsePartType == STAR_PARTTYPE )
+			LOGWARN("WARNING: Specified coarse particle type will collide with stars if USE_SFR enabled.");
+		
+		// create file
+		HDFCreateFile(fname_);
+				
+		// create particle type groups
+		std::stringstream GrpName;
+    GrpName << "PartType" << HIGHRES_DM_PARTTYPE;
+
+		HDFCreateGroup(fname_, GrpName.str().c_str()); // highres or unigrid DM
+		
+		if( doBaryons )
+		{
+			GrpName.str("");
+			GrpName << "PartType" << GAS_PARTTYPE;
+		  HDFCreateGroup(fname_, GrpName.str().c_str()); // gas
+		}
+		
+		if( levelmax_ != levelmin_ ) // multimass
+		{
+			GrpName.str("");
+			GrpName << "PartType" << coarsePartType;
+		  HDFCreateGroup(fname_, "PartType2"); // coarse DM
+		}
+	}
+	
+	~arepo_output_plugin()
+	{	}
+	
+	/* ------------------------------------------------------------------------------- */
+	
+	void write_dm_mass( const grid_hierarchy& gh )
+	{
+		countLeafCells(gh);
+		
+		// fill levelcount for header
+		levelcounts = std::vector<size_t>(levelmax_-levelmin_+1);
+		for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+			levelcounts[gh.levelmax()-ilevel] = gh.count_leaf_cells(ilevel, ilevel);
+		
+    if( levelmax_ > levelmin_ +1 ) // morethan2bnd
+		{
+			// DM particles will have variable masses
+			size_t count = 0;
+			
+			std::vector<float> data(npcoarse);
+			
+			for( int ilevel=gh.levelmax()-1; ilevel>=(int)gh.levelmin(); --ilevel )
+			{
+        // baryon particles live only on finest grid, these particles here are total matter particles
+				float pmass = omega0 * rhoCrit * pow(boxSize*posFac,3.0)/pow(2,3*ilevel);	
+				
+				for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+					for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+						for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+							if( ! gh.is_refined(ilevel,i,j,k) )
+							{
+								data[count++] = pmass;
+							}
+			}
+			
+			if( count != npcoarse )
+				throw std::runtime_error("Internal consistency error while writing masses");
+				
+			writeHDF5_a( "Masses", coarsePartType, data ); // write DM
+			
+		}
+		else
+		{
+			// DM particles will all have the same mass, just write to massTable
+			if( levelmax_ != levelmin_ ) // multimass
+			  massTable[coarsePartType] = omega0 * rhoCrit * pow(boxSize*posFac,3.0)/pow(2,3*levelmin_);
+		}		
+	}
+	
+	void write_dm_position( int coord, const grid_hierarchy& gh )
+	{
+		countLeafCells(gh);
+		
+		// update header
+		nPart[HIGHRES_DM_PARTTYPE] = npfine;
+		nPart[coarsePartType]      = npcoarse;
+		nPartTotal[HIGHRES_DM_PARTTYPE] = npfine;
+		nPartTotal[coarsePartType]      = npcoarse;
+		
+		// FINE: collect displacements and convert to absolute coordinates with correct units
+		int ilevel = gh.levelmax();
+		
+		std::vector<float> data(npfine);
+		size_t count = 0;
+		
+		for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+			for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+				for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+					if( ! gh.is_refined(ilevel,i,j,k) )
+					{
+						double xx[3];
+						gh.cell_pos(ilevel, i, j, k, xx);
+							
+						xx[coord] = (xx[coord] + (*gh.get_grid(ilevel))(i,j,k)) * boxSize;
+						xx[coord] = fmod( xx[coord] + boxSize,boxSize );
+						
+						data[count++] = (float) (xx[coord] * posFac);
+					}
+						
+		writeHDF5_b( "Coordinates", coord, HIGHRES_DM_PARTTYPE, data );	// write fine DM
+		
+		if( count != npfine )
+			throw std::runtime_error("Internal consistency error while writing fine DM pos");
+		
+		// COARSE: collect displacements and convert to absolute coordinates with correct units
+		if( levelmax_ != levelmin_ ) // multimass
+		{
+			data = std::vector<float> (npcoarse,0.0);
+			count = 0;
+			
+			for( int ilevel=gh.levelmax()-1; ilevel>=(int)gh.levelmin(); --ilevel )
+				for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+					for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+						for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+							if( ! gh.is_refined(ilevel,i,j,k) )
+							{
+								double xx[3];
+								gh.cell_pos(ilevel, i, j, k, xx);
+								
+								xx[coord] = (xx[coord] + (*gh.get_grid(ilevel))(i,j,k)) * boxSize;
+								
+								if ( !doBaryons ) // if so, we will handle the mod in write_gas_position
+									xx[coord] = fmod( xx[coord] + boxSize,boxSize ) * posFac;
+																
+								data[count++] = (float) xx[coord];
+							}
+				
+				if( count != npcoarse )
+					throw std::runtime_error("Internal consistency error while writing coarse DM pos");
+					
+				writeHDF5_b( "Coordinates", coord, coarsePartType, data ); // write coarse DM
+		}
+	}
+	
+	void write_dm_velocity( int coord, const grid_hierarchy& gh )
+	{
+		countLeafCells(gh);
+			
+		// FINE: collect velocities and convert to correct units
+		int ilevel = gh.levelmax();
+		
+		std::vector<float> data(npfine);
+		size_t count = 0;
+		
+		for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+			for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+				for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+					if( ! gh.is_refined(ilevel,i,j,k) )
+					{
+						data[count++] = (*gh.get_grid(ilevel))(i,j,k) * velFac;
+					}
+						
+		writeHDF5_b( "Velocities", coord, HIGHRES_DM_PARTTYPE, data ); // write fine DM
+		
+		if( count != npfine )
+			throw std::runtime_error("Internal consistency error while writing fine DM pos");
+		
+		// COARSE: collect velocities and convert to correct units
+		if( levelmax_ != levelmin_ ) // multimass
+		{
+			data = std::vector<float> (npcoarse,0.0);
+			count = 0;
+			
+			for( int ilevel=gh.levelmax()-1; ilevel>=(int)gh.levelmin(); --ilevel )
+				for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+					for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+						for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+							if( ! gh.is_refined(ilevel,i,j,k) )
+							{
+								data[count++] = (*gh.get_grid(ilevel))(i,j,k) * velFac;
+							}
+				
+				if( count != npcoarse )
+					throw std::runtime_error("Internal consistency error while writing coarse DM pos");
+					
+				writeHDF5_b( "Velocities", coord, coarsePartType, data ); // write coarse DM
+		}
+	
+	}
+	
+	void write_dm_density( const grid_hierarchy& gh )
+	{ /* skip */ }
+	
+	void write_dm_potential( const grid_hierarchy& gh )
+	{ /* skip */ }
+	
+	/* ------------------------------------------------------------------------------- */
+	
+	void write_gas_velocity( int coord, const grid_hierarchy& gh )
+	{	
+		countLeafCells(gh);
+		
+		std::vector<float> gas_data(npart); // read/write gas at all levels from the gh
+		size_t count = 0;
+		
+		for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel )
+			for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+				for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+					for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+						if( ! gh.is_refined(ilevel,i,j,k) )
+						{
+							gas_data[count++] = (*gh.get_grid(ilevel))(i,j,k) * velFac;
+						}
+						
+		if( count != npart )
+			throw std::runtime_error("Internal consistency error while writing GAS pos");
+					
+		// calculate modified DM velocities if: multimass and baryons present
+		if( doBaryons && npcoarse )
+		{
+			double facb = omega_b / omega0;
+			double facc = (omega0 - omega_b) / omega0;
+			
+			std::vector<float> dm_data(npcoarse);
+			
+			writeHDF5_b( "Velocities", coord, coarsePartType, dm_data, true ); // read coarse DM vels
+			
+			// overwrite 
+			for( size_t i=0; i < npcoarse; i++ )
+				dm_data[i] = facc*dm_data[i] + facb*gas_data[npfine + i];
+
+			writeHDF5_b( "Velocities", coord, coarsePartType, dm_data ); // overwrite coarse DM vels
+		} // dm_data deallocated
+		
+		// restrict gas_data to fine only and request write
+		std::vector<float> data( gas_data.begin() + 0, gas_data.begin() + npfine );
+		
+		std::vector<float>().swap( gas_data ); // deallocate
+		
+		writeHDF5_b( "Velocities", coord, GAS_PARTTYPE, data );	 // write highres gas
+	}
+	
+	void write_gas_position( int coord, const grid_hierarchy& gh )
+	{
+		countLeafCells(gh);
+		
+		// update header (will actually write only gas at levelmax)
+		nPart[GAS_PARTTYPE] = npfine;
+		nPartTotal[GAS_PARTTYPE] = npfine;
+		
+		std::vector<double> gas_data(npart); // read/write gas at all levels from the gh
+		size_t count = 0;
+		
+		double h = 1.0/(1ul<<gh.levelmax());
+		
+		for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+			for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+				for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+					for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+						if( ! gh.is_refined(ilevel,i,j,k) )
+						{
+							double xx[3];
+							gh.cell_pos(ilevel, i, j, k, xx);
+							
+							// shift particle positions (this has to be done as the same shift
+							// is used when computing the convolution kernel for SPH baryons)
+							xx[coord] += 0.5*h;
+														
+							xx[coord] = (xx[coord] + (*gh.get_grid(ilevel))(i,j,k)) * boxSize;
+											
+							gas_data[count++] = xx[coord];
+						}
+					
+		if( count != npart )
+			throw std::runtime_error("Internal consistency error while writing coarse DM pos");
+					
+		// calculate modified DM coordinates if: multimass and baryons present
+		if( doBaryons && npcoarse )
+		{
+			double facb = omega_b / omega0;
+			double facc = (omega0 - omega_b) / omega0;
+			
+			std::vector<float> dm_data(npcoarse);
+			
+			writeHDF5_b( "Coordinates", coord, coarsePartType, dm_data, true ); // read coarse DM vels
+			
+			// overwrite 
+			for( size_t i=0; i < npcoarse; i++ ) {
+				dm_data[i] = facc*dm_data[i] + facb*gas_data[npfine + i];
+				dm_data[i] = fmod( dm_data[i] + boxSize, boxSize ) * posFac;
+			}
+
+			writeHDF5_b( "Coordinates", coord, coarsePartType, dm_data ); // overwrite coarse DM vels
+		}
+		
+		// restrict gas_data to fine only and request write
+		//std::vector<float> data( gas_data.begin() + 0, gas_data.begin() + npfine );
+		
+		std::vector<float> data(npfine);
+		
+		for( size_t i = 0; i < npfine; i++ )
+			data[i] = (float) ( fmod( gas_data[i] + boxSize, boxSize ) * posFac );
+		
+		std::vector<double>().swap( gas_data ); // deallocate
+		
+		writeHDF5_b( "Coordinates", coord, GAS_PARTTYPE, data ); // write highres gas
+
+	}
+	
+	void write_gas_density( const grid_hierarchy& gh )
+	{
+		// if only saving highres gas, then all gas cells have the same initial mass
+		// do not write out densities as we write out displacements
+		if( doBaryons )
+			massTable[GAS_PARTTYPE] = omega_b * rhoCrit * pow(boxSize*posFac,3.0)/pow(2,3*levelmax_);
+	}
+	
+	void write_gas_potential( const grid_hierarchy& gh )
+	{ /* skip */ }
+	
+	void finalize( void )
+	{		
+		// generate and add contiguous IDs for each particle type we have written
+		generateAndWriteIDs();
+		
+		// write final header (some of these fields are required, others are extra info)
+		HDFCreateGroup(fname_, "Header");
+		
+		std::vector<unsigned int> nPartTotalHW(nPartTotal.size());
+		for( size_t i=0; i < nPartTotalHW.size(); i++ )
+			nPartTotalHW[i] = (unsigned)( (size_t)nPartTotal[i] >> 32 );
+			
+		HDFWriteGroupAttribute(fname_, "Header", "NumPart_ThisFile",       nPart );
+		HDFWriteGroupAttribute(fname_, "Header", "NumPart_Total",          nPartTotal );
+		HDFWriteGroupAttribute(fname_, "Header", "NumPart_Total_HighWord", nPartTotalHW );
+		HDFWriteGroupAttribute(fname_, "Header", "MassTable",              massTable );
+		HDFWriteGroupAttribute(fname_, "Header", "BoxSize",                boxSize );
+		HDFWriteGroupAttribute(fname_, "Header", "NumFilesPerSnapshot",    numFiles );
+		HDFWriteGroupAttribute(fname_, "Header", "Time",                   time );
+		HDFWriteGroupAttribute(fname_, "Header", "Redshift",               redshift );
+		HDFWriteGroupAttribute(fname_, "Header", "Omega0",                 omega0 );
+		HDFWriteGroupAttribute(fname_, "Header", "OmegaLambda",            omega_L );
+		HDFWriteGroupAttribute(fname_, "Header", "OmegaBaryon",            omega_b );
+		HDFWriteGroupAttribute(fname_, "Header", "HubbleParam",            hubbleParam );
+		HDFWriteGroupAttribute(fname_, "Header", "Flag_Sfr",               0 );
+		HDFWriteGroupAttribute(fname_, "Header", "Flag_Cooling",           0 );
+		HDFWriteGroupAttribute(fname_, "Header", "Flag_StellarAge",        0 );
+		HDFWriteGroupAttribute(fname_, "Header", "Flag_Metals",            0 );
+		HDFWriteGroupAttribute(fname_, "Header", "Flag_Feedback",          0 );
+		HDFWriteGroupAttribute(fname_, "Header", "Flag_DoublePrecision",   doublePrec );
+		HDFWriteGroupAttribute(fname_, "Header", "Music_levelmin",         levelmin_ );
+		HDFWriteGroupAttribute(fname_, "Header", "Music_levelmax",         levelmax_ );
+		HDFWriteGroupAttribute(fname_, "Header", "Music_levelcounts",      levelcounts );
+		HDFWriteGroupAttribute(fname_, "Header", "haveBaryons",            (int)doBaryons );
+		HDFWriteGroupAttribute(fname_, "Header", "longIDs",                (int)useLongIDs );
+		HDFWriteGroupAttribute(fname_, "Header", "suggested_pmgrid",       pmgrid );
+		HDFWriteGroupAttribute(fname_, "Header", "suggested_gridboost",    gridboost );
+		HDFWriteGroupAttribute(fname_, "Header", "suggested_highressoft",  softening );
+		HDFWriteGroupAttribute(fname_, "Header", "suggested_gas_Tinit",    Tini );
+		
+		// output particle counts
+		std::cout << " - Arepo : wrote " << nPartTotAllTypes << " particles to file..." << std::endl;
+		for( size_t i=0; i < nPartTotal.size(); i++ )
+			std::cout << "    type [" << i << "] : " << std::setw(12) << nPartTotal[i] << std::endl;
+			
+		// give config/parameter file hints		
+		if( useLongIDs )
+			std::cout << " - Arepo: Wrote 64bit IDs, enable LONGIDS." << std::endl;
+		if( NTYPES > 6 )
+			std::cout << " - Arepo: Using [" << NTYPES << "] particle types, set NTYPES to match." << std::endl;
+		if( doBaryons )
+			std::cout << " - Arepo: Wrote gas, set REFINEMENT_HIGH_RES_GAS and GENERATE_GAS_IN_ICS with "
+			          << "SPLIT_PARTICLE_TYPE=" << pow(2,coarsePartType) << "." << std::endl;
+		if( levelmax_ != levelmin_ )
+			std::cout << " - Arepo: Have zoom type ICs, set PLACEHIGHRESREGION=" << pow(2,HIGHRES_DM_PARTTYPE)
+                << " (suggest PMGRID=" << pmgrid << " with GRIDBOOST=" << gridboost << ")." << std::endl;
+		if( levelmax_ > levelmin_ + 1 )
+			std::cout << " - Arepo: More than one coarse DM mass using same type, set INDIVIDUAL_GRAVITY_SOFTENING=" 
+			          << pow(2,coarsePartType) << " (+" << pow(2,STAR_PARTTYPE) << " if including stars)." << std::endl;
+		if( doBaryons )
+			std::cout << " - Arepo: Set initial gas temperature to " << std::fixed << std::setprecision(3) << Tini << " K." << std::endl;
+		std::cout << " - Arepo: Suggest grav softening = " << std::setprecision(3) << softening << " for high res DM." << std::endl;
+			
+	}
+	
+};
+
+namespace{
+	output_plugin_creator_concrete< arepo_output_plugin > creator("arepo");
+}
+
+#endif // HAVE_HDF5
--- a/plugins/output_art.cc
+++ b/plugins/output_art.cc
@ -0,0 +1,912 @@
+/*
+ 
+ output_art.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2012  Jose Onorbe & Oliver Hahn
+ 
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <vector>
+
+#include "output.hh"
+
+template<typename T>
+inline T bytereorder(T v )
+{
+	T rval;
+	(reinterpret_cast<unsigned char*>(&rval))[3] = (reinterpret_cast<unsigned char*>(&v))[0];
+	(reinterpret_cast<unsigned char*>(&rval))[2] = (reinterpret_cast<unsigned char*>(&v))[1];
+	(reinterpret_cast<unsigned char*>(&rval))[1] = (reinterpret_cast<unsigned char*>(&v))[2];
+	(reinterpret_cast<unsigned char*>(&rval))[0] = (reinterpret_cast<unsigned char*>(&v))[3];
+	return rval;
+}
+
+
+template< typename T_store=float >
+class art_output_plugin : public output_plugin
+{
+public:
+	bool do_baryons_;
+    bool swap_endianness_;
+	double omegab_, omegam_;
+	double gamma_;
+    double astart_;
+    double zstart_;
+	size_t npcdm_;
+    int hsize_;
+    
+protected:
+    
+    enum iofields {
+		id_dm_pos, id_dm_vel, id_gas_pos, id_gas_vel
+    };
+
+	struct header
+	{
+		char head[45];
+		float aexpN; // current expansion factor
+       	float aexp0; // initial expansion factor
+		float amplt; // Amplitude of density fluctuations
+		float astep; // Delta a -> time step. 
+				// This value is also stored in pt.dat (binary 1 float)
+				// It is recalculated by art for the next steps so just a small value should work
+		int istep; // step (=0 in IC)
+		float partw; // mass of highest res particle.
+        float TINTG; //=0 in IC
+        float EKIN; //SUM 0.5 * m_i*(v_i**2) in code units
+        float EKIN1; //=0 in IC
+        float EKIN2; //=0 in IC
+        float AU0; //=0 in IC
+        float AEU0; //=0 in IC
+        int NROWC; // Number of particles in 1 dim (number of particles per page = NROW**2) 
+	    int NGRIDC; // Number of cells in 1 dim
+        int nspecies; // number of dm species
+	    int Nseed; // random number used ( 0 for MUSIC? or set the random number used in the lowest level?)
+        float Om0; //Omega_m
+	    float Oml0; //Omega_L
+        float hubble; //hubble constant h=H/100
+	    float Wp5; // 
+        float Ocurv; //Omega_k
+	    float Omb0; // this parameter only appears in header in hydro runs
+		float wpart[10]; // extras[0-9] particle masses from high res to low res (normalized to low res particle)
+		    //  Mass of smallest particle=wpart[0]*0m0*2.746e+11*(Box/NGRID)**3 -> Msun/h
+		    //  Mass of largest  particle=wpart[nspecies-1]*0m0*2.746e+11*(Box/NGRID)**3 -> Msun/h
+		int lpart[10]; // extras[10-19] number of particles from high res to low res cumulative!!! 
+		    //(i.e., lpart[0]=Nhigh res particles; lpart[1]=lpart[0]+N_this_level; etc) so lpart[nspecies-1]=N total
+	    float extras[80]; //extras[20-99] 
+		     //extras[9]=iLblock ->0 in IC 
+             //extras[10]=LevMin  ->0 in IC
+             //extras[11]=LevSmall ->0 in IC
+             //extras[12]=LevLarge ->0 in IC
+             //extras[13]=Omegab  ->0 in IC; fix it?
+             //extras[14]=sig8    ->0 in IC; fix it?
+             //extras[15]=Spslope ->0 in IC; fix it? Slope of the Power spectrum
+             //extras[16]=iDEswtch ->0 in IC; DE Flag=0:LCDM 1:w 2:RP 3:SUGRA
+             //extras[17]=DEw0    ->0 in IC; w0 for DE z=0
+             //extras[18]=DEwprime ->0 in IC; DE parameter
+		     //extras[59]= 0 or 1; is used as switch for random numbers generators [do not apply in music use 0?]
+		     //extras[60]= lux - level of luxury  [do not apply in music use 0?]
+		     //extras[79]=Lbox (Mpc/h)
+
+	};
+
+	struct ptf
+	{
+		float astep;
+	};
+	
+	header header_;
+	ptf ptf_;
+	std::string fname;
+	size_t np_fine_gas_, np_fine_dm_, np_coarse_dm_;
+	size_t block_buf_size_;
+	size_t npartmax_;
+    
+    double YHe_;
+	
+	
+	// helper class to read temp files
+	class pistream : public std::ifstream
+	{
+	public:
+		pistream (std::string fname, size_t npart )
+		: std::ifstream( fname.c_str(), std::ios::binary )
+		{
+			size_t blk;
+			
+			if( !this->good() )
+			{	
+				LOGERR("Could not open buffer file in ART output plug-in");
+				throw std::runtime_error("Could not open buffer file in ART output plug-in");
+			}
+			
+			this->read( (char*)&blk, sizeof(size_t) );
+			
+			if( blk != (size_t)(npart*sizeof(T_store)) )
+			{	
+				LOGERR("Internal consistency error in ART output plug-in");
+				LOGERR("Expected %d bytes in temp file but found %d",npart*(unsigned)sizeof(T_store),blk);
+				throw std::runtime_error("Internal consistency error in ART output plug-in");
+			}
+		}
+		
+		pistream ()
+		{
+			
+		}
+		
+		void open(std::string fname, size_t npart )
+		{
+			std::ifstream::open( fname.c_str(), std::ios::binary );
+			size_t blk;
+			
+			if( !this->good() )
+			{	
+				LOGERR("Could not open buffer file \'%s\' in ART output plug-in",fname.c_str());
+				throw std::runtime_error("Could not open buffer file in ART output plug-in");
+			}
+			
+			this->read( (char*)&blk, sizeof(size_t) );
+			
+			if( blk != (size_t)(npart*sizeof(T_store)) )
+			{	
+				LOGERR("Internal consistency error in ART output plug-in");
+				LOGERR("Expected %d bytes in temp file but found %d",npart*(unsigned)sizeof(T_store),blk);
+				throw std::runtime_error("Internal consistency error in ART output plug-in");
+			}
+		}
+	};
+	
+	
+	// non-public member functions
+	void write_header_file( void ) //PMcrd.DAT
+	{
+
+        std::string fout;
+        if(do_baryons_)
+            fout = "/PMcrdIC.DAT";
+        else
+	        fout = "/PMcrd.DAT";
+	    std::string partfname = fname_ + fout;
+        std::ofstream ofs( partfname.c_str(), std::ios::trunc );
+	    //ofs.open(fname_.c_str(), std::ios::binary|std::ios::trunc );
+		header this_header(header_);
+        //Should be 529 in a dm only run; 533 in a baryon run
+	    //but not working for alignment so it must be written one by one:
+        int blksize = hsize_;
+        if( swap_endianness_ ) 
+        {
+		    LOGINFO("ART : swap_endianness option enabled");
+            blksize = bytereorder( blksize );
+            this_header.aexpN = bytereorder( this_header.aexpN );
+            this_header.aexp0 = bytereorder( this_header.aexp0 );
+            this_header.amplt = bytereorder( this_header.amplt );
+            this_header.astep = bytereorder( this_header.astep );
+            this_header.istep = bytereorder( this_header.istep );
+            this_header.partw = bytereorder( this_header.partw );
+            this_header.TINTG = bytereorder( this_header.TINTG );
+            this_header.EKIN = bytereorder( this_header.EKIN );
+            this_header.EKIN1 = bytereorder( this_header.EKIN1 );
+            this_header.EKIN2 = bytereorder( this_header.EKIN2 );
+            this_header.AEU0 = bytereorder( this_header.AEU0 );
+            this_header.AEU0 = bytereorder( this_header.AEU0 );
+            this_header.NROWC = bytereorder( this_header.NROWC );
+           	this_header.NGRIDC = bytereorder( this_header.NGRIDC );
+       	    this_header.nspecies = bytereorder( this_header.nspecies );
+           	this_header.Nseed = bytereorder( this_header.Nseed );
+           	this_header.Om0 = bytereorder( this_header.Om0);
+       	    this_header.Oml0 = bytereorder( this_header.Oml0 );
+           	this_header.hubble = bytereorder( this_header.hubble );
+            this_header.Wp5 = bytereorder( this_header.Wp5 );
+            this_header.Ocurv = bytereorder( this_header.Ocurv );
+            this_header.Omb0 = bytereorder( this_header.Omb0 );
+            for( int i=0; i<10; ++i )
+		    {
+            		this_header.wpart[i] = bytereorder( this_header.wpart[i] );
+            		this_header.lpart[i] = bytereorder( this_header.lpart[i] );
+		    }
+       	    for( int i=0; i<80; ++i )
+		    {
+            		this_header.extras[i] = bytereorder( this_header.extras[i] );
+		    }
+        }
+		ofs.write( (char *)&blksize, sizeof(int) );
+		//ofs.write( (char *)&this_header,sizeof(header));  //Not working because struct aligment, so:
+		ofs.write( (char *)&this_header.head,sizeof(this_header.head));   
+		ofs.write( (char *)&this_header.aexpN,sizeof(this_header.aexpN));   
+		ofs.write( (char *)&this_header.aexp0,sizeof(this_header.aexp0));   
+		ofs.write( (char *)&this_header.amplt,sizeof(this_header.amplt));   
+		ofs.write( (char *)&this_header.astep,sizeof(this_header.astep));   
+		ofs.write( (char *)&this_header.istep,sizeof(this_header.istep));   
+		ofs.write( (char *)&this_header.partw,sizeof(this_header.partw));   
+		ofs.write( (char *)&this_header.TINTG,sizeof(this_header.TINTG));   
+		ofs.write( (char *)&this_header.EKIN,sizeof(this_header.EKIN));   
+		ofs.write( (char *)&this_header.EKIN1,sizeof(this_header.EKIN1));   
+		ofs.write( (char *)&this_header.EKIN2,sizeof(this_header.EKIN2));   
+		ofs.write( (char *)&this_header.AEU0,sizeof(this_header.AEU0));   
+		ofs.write( (char *)&this_header.AEU0,sizeof(this_header.AEU0));   
+		ofs.write( (char *)&this_header.NROWC,sizeof(this_header.NROWC));   
+		ofs.write( (char *)&this_header.NGRIDC,sizeof(this_header.NGRIDC));   
+		ofs.write( (char *)&this_header.nspecies,sizeof(this_header.nspecies));   
+		ofs.write( (char *)&this_header.Nseed,sizeof(this_header.Nseed));   
+		ofs.write( (char *)&this_header.Om0,sizeof(this_header.Om0));   
+		ofs.write( (char *)&this_header.Oml0,sizeof(this_header.Oml0));   
+		ofs.write( (char *)&this_header.hubble,sizeof(this_header.hubble));   
+		ofs.write( (char *)&this_header.Wp5,sizeof(this_header.Wp5));   
+		ofs.write( (char *)&this_header.Ocurv,sizeof(this_header.Ocurv));   
+		ofs.write( (char *)&this_header.wpart,sizeof(this_header.wpart));   
+		ofs.write( (char *)&this_header.lpart,sizeof(this_header.lpart));   
+		ofs.write( (char *)&this_header.extras,sizeof(this_header.extras));   
+		ofs.write( (char *)&blksize, sizeof(int) );
+		ofs.close();
+		LOGINFO("ART : done writing header file.");
+	}
+
+	void write_pt_file( void ) //pt.dat
+	{
+        std::string partfname = fname_ + "/pt.dat";
+        std::ofstream ofs( partfname.c_str(), std::ios::trunc );
+        //ofs.open(fname_.c_str(), std::ios::binary|std::ios::trunc );
+		ptf this_ptf(ptf_);
+		int blksize = sizeof(ptf); //4
+        if( swap_endianness_ ) 
+        {
+            blksize = bytereorder( blksize );
+            this_ptf = bytereorder( this_ptf );
+        }
+		ofs.write( (char *)&blksize, sizeof(int) );
+		ofs.write( (char *)&this_ptf,sizeof(ptf));
+		ofs.write( (char *)&blksize, sizeof(int) );
+		ofs.close();
+		LOGINFO("ART : done writing pt file.");
+	}
+    
+    
+    void adjust_buf_endianness( T_store* buf )
+    {
+        if( swap_endianness_ )
+        {
+            for( size_t i=0; i<block_buf_size_; ++i )
+                buf[i] = bytereorder<T_store>( buf[i] );
+        }
+    }
+
+	/*
+     The direct format write the particle data in pages. Each page of particles is read into a common block,
+	 which has the structure: X(Npage),Y(Npage),Z(Npage),Vx(Npage),Vy(Npage),Vz(Npage). 
+	 There are NO Fortran size blocks pre or after these blocks!!
+
+	 The number of particles in each page (Npage) is Npage = Nrow**2; Npages = (N_particles -1)/NPAGE +1
+	 so in last page sometimes can be tricky (zooms): N_in_last=N_particles -NPAGE*(Npages-1)
+     But keep in mind that ART expects all pages to be written in full regarding of the actual number of particles
+     you care about.
+
+    */
+	void assemble_DM_file( void ) //PMcrs0.DAT
+	{
+		// file name
+
+        std::string fout;
+        if(do_baryons_)
+	        fout = "/PMcrs0IC.DAT";
+        else
+		    fout = "/PMcrs0.DAT";
+
+        std::string partfname = fname_ + fout;
+		std::ofstream ofs( partfname.c_str(), std::ios::trunc );
+		
+		// generate all temp file names
+		char fnx[256],fny[256],fnz[256],fnvx[256],fnvy[256],fnvz[256];
+		sprintf( fnx,  "___ic_temp_%05d.bin", 100*id_dm_pos+0 );
+		sprintf( fny,  "___ic_temp_%05d.bin", 100*id_dm_pos+1 );
+		sprintf( fnz,  "___ic_temp_%05d.bin", 100*id_dm_pos+2 );
+		sprintf( fnvx, "___ic_temp_%05d.bin", 100*id_dm_vel+0 );
+		sprintf( fnvy, "___ic_temp_%05d.bin", 100*id_dm_vel+1 );
+		sprintf( fnvz, "___ic_temp_%05d.bin", 100*id_dm_vel+2 );
+		
+		// create buffers for temporary data
+		T_store *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
+		
+		tmp1 = new T_store[block_buf_size_];
+		tmp2 = new T_store[block_buf_size_];
+		tmp3 = new T_store[block_buf_size_];
+		tmp4 = new T_store[block_buf_size_];
+		tmp5 = new T_store[block_buf_size_];
+		tmp6 = new T_store[block_buf_size_];
+		
+		
+		// read in the data from the temporary files in slabs and write it to the output file
+		size_t npleft, n2read;
+		size_t npcdm = npcdm_;
+		
+		LOGINFO("writing DM data to ART format file");
+        //ofs.open(fname_.c_str(), std::ios::binary|std::ios::trunc );
+
+		pistream ifs_x, ifs_y, ifs_z, ifs_vx, ifs_vy, ifs_vz;
+		
+		ifs_x.open( fnx, npcdm );
+		ifs_y.open( fny, npcdm );
+		ifs_z.open( fnz, npcdm );
+		ifs_vx.open( fnvx, npcdm );
+		ifs_vy.open( fnvy, npcdm );
+		ifs_vz.open( fnvz, npcdm );
+		
+		npleft = npcdm;
+		n2read = std::min(block_buf_size_,npleft);
+		while( n2read > 0 )
+		{
+            // To make sure last page in zooms have 0s in non-relevant values
+            // NOT MANDATORY. Can be commented if makes things slow 
+            // but I do not like the idea of writting data in the file
+            //  that could be interpreted as real.
+            if(n2read<block_buf_size_)
+            {
+                for (int i = 0; i < int(block_buf_size_); i++)
+                {
+                    tmp1[i]=0.0;tmp2[i]=0.0;tmp3[i]=0.0;tmp4[i]=0.0;
+                    tmp5[i]=0.0;tmp6[i]=0.0;
+                }
+            }
+			ifs_x.read( reinterpret_cast<char*>(&tmp1[0]), n2read*sizeof(T_store) );
+			ifs_y.read( reinterpret_cast<char*>(&tmp2[0]), n2read*sizeof(T_store) );
+			ifs_z.read( reinterpret_cast<char*>(&tmp3[0]), n2read*sizeof(T_store) );
+			ifs_vx.read( reinterpret_cast<char*>(&tmp4[0]), n2read*sizeof(T_store) );
+			ifs_vy.read( reinterpret_cast<char*>(&tmp5[0]), n2read*sizeof(T_store) );
+			ifs_vz.read( reinterpret_cast<char*>(&tmp6[0]), n2read*sizeof(T_store) );
+            
+            adjust_buf_endianness( tmp1 );
+            adjust_buf_endianness( tmp2 );
+            adjust_buf_endianness( tmp3 );
+            adjust_buf_endianness( tmp4 );
+            adjust_buf_endianness( tmp5 );
+            adjust_buf_endianness( tmp6 );
+		
+            ofs.write( reinterpret_cast<char*>(&tmp1[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp2[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp3[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp4[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp5[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp6[0]), block_buf_size_*sizeof(T_store) );
+
+			npleft -= n2read;
+			n2read = std::min( block_buf_size_,npleft );
+		}
+		
+		ifs_x.close();
+		ifs_y.close();
+		ifs_z.close();
+		ifs_vx.close();
+		ifs_vy.close();
+		ifs_vz.close();
+		ofs.close();
+		
+		// clean up temp files
+        unlink(fnx);
+		unlink(fny);
+		unlink(fnz);
+		unlink(fnvx);
+		unlink(fnvy);
+		unlink(fnvz);
+
+        delete[] tmp1;
+        delete[] tmp2;
+        delete[] tmp3;
+        delete[] tmp4;
+        delete[] tmp5;
+        delete[] tmp6;
+		
+		LOGINFO("ART : done writing DM file.");
+		
+	}
+	
+	
+	/*
+     ART users currently create the baryon grid structure from the dark matter data file. 
+     Therefore they have decided that the best way to implement baryons for ART in MUSIC was
+     by creating a file with the same dm format but using the baryon displacements and velocities. 
+     From this file they will create the actual grid suign their tools.
+     
+     So here we have just to re-create the dark matter file format but using the baryon data.
+    */
+	void assemble_gas_file( void ) //PMcrs0_GAS.DAT
+	{
+		// file name
+		std::string partfname = fname_ + "/PMcrs0_GAS.DAT";
+		std::ofstream ofs( partfname.c_str(), std::ios::trunc );
+		
+		// generate all temp file names
+		char fnx[256],fny[256],fnz[256],fnvx[256],fnvy[256],fnvz[256];
+		sprintf( fnx,  "___ic_temp_%05d.bin", 100*id_gas_pos+0 );
+		sprintf( fny,  "___ic_temp_%05d.bin", 100*id_gas_pos+1 );
+		sprintf( fnz,  "___ic_temp_%05d.bin", 100*id_gas_pos+2 );
+		sprintf( fnvx, "___ic_temp_%05d.bin", 100*id_gas_vel+0 );
+		sprintf( fnvy, "___ic_temp_%05d.bin", 100*id_gas_vel+1 );
+		sprintf( fnvz, "___ic_temp_%05d.bin", 100*id_gas_vel+2 );
+		
+		// create buffers for temporary data
+		T_store *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
+		
+		tmp1 = new T_store[block_buf_size_];
+		tmp2 = new T_store[block_buf_size_];
+		tmp3 = new T_store[block_buf_size_];
+		tmp4 = new T_store[block_buf_size_];
+		tmp5 = new T_store[block_buf_size_];
+		tmp6 = new T_store[block_buf_size_];
+		
+		
+		// read in the data from the temporary files in slabs and write it to the output file
+		size_t npleft, n2read;
+		size_t npcgas = npcdm_; // # of gas elemets should be equal to # of dm elements
+		
+		LOGINFO("writing gas data to ART format file");
+        //ofs.open(fname_.c_str(), std::ios::binary|std::ios::trunc );
+
+		pistream ifs_x, ifs_y, ifs_z, ifs_vx, ifs_vy, ifs_vz;
+
+		
+		ifs_x.open( fnx, npcgas );
+		ifs_y.open( fny, npcgas );
+		ifs_z.open( fnz, npcgas );
+		ifs_vx.open( fnvx, npcgas );
+		ifs_vy.open( fnvy, npcgas );
+		ifs_vz.open( fnvz, npcgas );
+		
+		npleft = npcgas;
+		n2read = std::min(block_buf_size_,npleft);
+		while( n2read > 0 )
+		{
+            // To make sure last page in zooms have 0s in non-relevant values
+            // NOT MANDATORY. Can be commented if makes things slow 
+            // but I do not like the idea of writting data in the file
+            //  that could be interpreted as real.
+            if(n2read<block_buf_size_)
+            {
+                for (int i = 0; i < int(block_buf_size_); i++)
+                {
+                    tmp1[i]=0.0;tmp2[i]=0.0;tmp3[i]=0.0;tmp4[i]=0.0;
+                    tmp5[i]=0.0;tmp6[i]=0.0;
+                }
+            }
+			ifs_x.read( reinterpret_cast<char*>(&tmp1[0]), n2read*sizeof(T_store) );
+			ifs_y.read( reinterpret_cast<char*>(&tmp2[0]), n2read*sizeof(T_store) );
+			ifs_z.read( reinterpret_cast<char*>(&tmp3[0]), n2read*sizeof(T_store) );
+			ifs_vx.read( reinterpret_cast<char*>(&tmp4[0]), n2read*sizeof(T_store) );
+			ifs_vy.read( reinterpret_cast<char*>(&tmp5[0]), n2read*sizeof(T_store) );
+			ifs_vz.read( reinterpret_cast<char*>(&tmp6[0]), n2read*sizeof(T_store) );
+            
+            adjust_buf_endianness( tmp1 );
+            adjust_buf_endianness( tmp2 );
+            adjust_buf_endianness( tmp3 );
+            adjust_buf_endianness( tmp4 );
+            adjust_buf_endianness( tmp5 );
+            adjust_buf_endianness( tmp6 );
+		
+            ofs.write( reinterpret_cast<char*>(&tmp1[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp2[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp3[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp4[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp5[0]), block_buf_size_*sizeof(T_store) );
+            ofs.write( reinterpret_cast<char*>(&tmp6[0]), block_buf_size_*sizeof(T_store) );
+
+			npleft -= n2read;
+			n2read = std::min( block_buf_size_,npleft );
+		}
+		
+		ifs_x.close();
+		ifs_y.close();
+		ifs_z.close();
+		ifs_vx.close();
+		ifs_vy.close();
+		ifs_vz.close();
+		ofs.close();
+		
+		// clean up temp files
+        unlink(fnx);
+		unlink(fny);
+		unlink(fnz);
+		unlink(fnvx);
+		unlink(fnvy);
+		unlink(fnvz);
+
+        delete[] tmp1;
+        delete[] tmp2;
+        delete[] tmp3;
+        delete[] tmp4;
+        delete[] tmp5;
+        delete[] tmp6;
+
+		LOGINFO("ART : done writing gas file.");
+        // Temperature
+        const double Tcmb0 = 2.726;
+        const double h2    = header_.hubble*header_.hubble;
+        const double adec  = 1.0/(160.*pow(omegab_*h2/0.022,2.0/5.0));
+        const double Tini  = astart_<adec? Tcmb0/astart_ : Tcmb0/astart_/astart_*adec;
+        const double mu    = (Tini>1.e4) ? 4.0/(8.-5.*YHe_) : 4.0/(1.+3.*(1.-YHe_));
+		LOGINFO("ART : set initial gas temperature to %.3f K (%.3f K/mu)",Tini, Tini/mu);
+
+		
+	}
+
+public:
+
+
+	explicit art_output_plugin ( config_file& cf )
+	: output_plugin( cf )
+	{
+	    if( mkdir( fname_.c_str(), 0777 ) );
+
+		do_baryons_ = cf.getValueSafe<bool>("setup","baryons",false);
+        // We need to say that we want to do SPH for baryons 
+        // because if not MUSIC does not calculate/write gas positions
+        cf.insertValue("setup","do_SPH","yes");
+        // header size (alignment problem)
+        hsize_ = 529; // dm & hydro run
+
+		omegab_  = cf.getValueSafe<double>("cosmology","Omega_b",0.0);
+	    omegam_  = cf.getValue<double>("cosmology","Omega_m");
+        zstart_  = cf.getValue<double>("setup","zstart");
+        astart_ = 1.0/(1.0+zstart_);
+        
+        
+        swap_endianness_ = cf.getValueSafe<bool>("output","art_swap_endian",true);
+		
+		int levelmin = cf.getValue<unsigned>("setup","levelmin");
+		int levelmax = cf.getValue<unsigned>("setup","levelmax");
+        block_buf_size_ = (size_t) (pow(pow(2,levelmax),2)); //Npage=nrow^2; Number of particles in each page
+		
+		YHe_ = cf.getValueSafe<double>("cosmology","YHe",0.248);
+        gamma_ = cf.getValueSafe<double>("cosmology","gamma",5.0/3.0);
+		// Set header
+        std::string thead;
+        thead=cf.getValueSafe<std::string>("output","header","ICs generated using MUSIC");
+        strcpy(header_.head,thead.c_str()); // text for the header; any easy way to add also the version?
+        std::string ws = " "; // Filling with blanks. Any better way?
+        for (int i=thead.size(); i<45;i++) 
+        {
+            header_.head[i]=ws[0]; 
+        }
+        header_.aexpN = astart_;
+        header_.aexp0 = header_.aexpN;
+		header_.amplt = 0.0; // Amplitude of density fluctuations
+		header_.astep = cf.getValue<double>("output","astep"); // Seems that this must also be in the config file 
+		ptf_.astep=header_.astep; // to write pt file
+		header_.istep = 0; // step (=0 in IC)
+		header_.partw = 0.0; // mass of highest res particle. SEE BELOW
+       	header_.TINTG = 0; //=0 in IC
+       	header_.EKIN = 0.0; //SUM 0.5 * m_i*(v_i**2) in code units. Seems that 0 is ok for ICs
+       	header_.EKIN1 = 0; //=0 in IC
+       	header_.EKIN2 = 0; //=0 in IC
+       	header_.AU0 = 0; //=0 in IC
+       	header_.AEU0 = 0; //=0 in IC
+		header_.NROWC = (int) pow(2,levelmax); // Number of particles in 1 dim (number of particles per page = NROW**2) 
+		header_.NGRIDC = (int) pow(2,levelmin); // Number of cells in 1 dim
+       	header_.nspecies = 0; // number of dm species
+		for( int ilevel=levelmax; ilevel>=(int)levelmin; --ilevel )
+		{
+        		header_.nspecies+=1; 
+		}
+		//header_.partw  SEE BELOW
+			
+	    header_.Nseed = 0; // random number used ( 0 for MUSIC? or set the random number used in the lowest level?)
+        header_.Om0 = cf.getValue<double>("cosmology","Omega_m"); //Omega_m
+	    header_.Oml0 = cf.getValue<double>("cosmology","Omega_L"); //Omega_L
+        header_.hubble = cf.getValue<double>("cosmology","H0")/100; //hubble constant h=H/100
+	    header_.Wp5 = 0.0; // 0.0
+		header_.Ocurv = 1.0 - header_.Oml0 - header_.Om0; //
+	    header_.Omb0 = cf.getValue<double>("cosmology","Omega_b");; // this parameter only appears in header in hydro runs
+		for (int i=0;i<10;i++)
+		{
+			header_.wpart[i] = 0.0; // extras[0-9] part. masses from high res to low res (normalized to low res particle)
+			header_.lpart[i] = 0; // extras[10-19] # particles from high res to low res cumulative!!! 
+		}
+		for (int i=0;i<header_.nspecies;i++)
+		{
+		    header_.wpart[i] = 1.0/pow(8.0,(header_.nspecies-i-1)); //from high res to lo res // 8 should be changed for internal variable?
+		}
+		header_.partw = header_.wpart[0]; // mass of highest res particle. 
+		for (int i=0;i<80;i++)
+		{
+	        	header_.extras[i] = 0.0; //extras[20-99] 
+		}
+        header_.extras[13] = cf.getValueSafe<double>("cosmology","Omega_b",0.0); 
+        header_.extras[14] = cf.getValue<double>("cosmology","sigma_8"); 
+        header_.extras[15] = cf.getValue<double>("cosmology","nspec"); //Slope of the Power spectrum
+        header_.extras[79] = cf.getValue<double>("setup","boxlength"); 
+
+		LOGINFO("ART : done header info.");
+        
+	}
+
+
+    
+    void write_dm_mass( const grid_hierarchy& gh )
+	{
+		
+        //... write data for dark matter mass......
+        // This is not needed for ART
+    }
+    
+    void write_dm_position( int coord, const grid_hierarchy& gh )
+	{
+        size_t nptot = gh.count_leaf_cells(gh.levelmin(), gh.levelmax());
+		//... store all the meta data about the grid hierarchy in header variables
+        npcdm_ = nptot;
+		for (int i=0;i<header_.nspecies;i++)
+		{
+			header_.lpart[i] = gh.count_leaf_cells(gh.levelmax()-i, gh.levelmax()); //cumulative!!
+		}
+
+	    // Now, let us write the dm particle info	
+		std::vector<T_store> temp_data;
+		temp_data.reserve( block_buf_size_ );
+		
+		
+	    //coordinates are in the range 1 - (NGRID+1)
+    	// so scale factor is  scaleX = Box/NGRID -> to Mpc/h (Box in Mpc/h) 
+		double xfac = (double) header_.NGRIDC; 
+
+		char temp_fname[256];
+		sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_pos+coord );
+		std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc );
+		
+		size_t blksize = sizeof(T_store)*nptot;
+		ofs_temp.write( (char *)&blksize, sizeof(size_t) );
+		
+		size_t nwritten = 0;
+		for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+			for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+				for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+					for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+						if( ! gh.is_refined(ilevel,i,j,k) )
+						{
+							double xx[3];
+							gh.cell_pos(ilevel, i, j, k, xx);
+							
+							xx[coord] = fmod( (xx[coord]+(*gh.get_grid(ilevel))(i,j,k)) + 1.0, 1.0 ) ;
+							xx[coord] = (xx[coord]*xfac)+1.0; 
+							//xx[coord] = ((xx[coord]+(*gh.get_grid(ilevel))(i,j,k))); 
+							
+							if( temp_data.size() < block_buf_size_ )
+								temp_data.push_back( xx[coord] );
+							else
+							{
+								ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ );
+								nwritten += block_buf_size_;
+								temp_data.clear();
+								temp_data.push_back( xx[coord] );
+							}
+						}
+		
+		if( temp_data.size() > 0 )
+		{	
+			ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*temp_data.size() );
+			nwritten += temp_data.size();
+		}
+		
+		if( nwritten != nptot )
+			throw std::runtime_error("Internal consistency error while writing temporary file for positions");
+		
+		//... dump to temporary file
+		ofs_temp.write( (char *)&blksize, sizeof(size_t) );
+		
+		if( ofs_temp.bad() )
+			throw std::runtime_error("I/O error while writing temporary file for positions");
+		
+		ofs_temp.close();
+    }
+    
+    void write_dm_velocity( int coord, const grid_hierarchy& gh )
+	{
+        size_t nptot = gh.count_leaf_cells(gh.levelmin(), gh.levelmax());
+		
+		std::vector<T_store> temp_data;
+		temp_data.reserve( block_buf_size_ );
+		
+                //In ART velocities are P = a_expansion*V_pec/(x_0H_0) 
+		// where x_0 = comoving cell_size=Box/Ngrid;H_0 = Hubble at z=0
+		// so scale factor to physical km/s is convV= BoxV/AEXPN/NGRID 
+		// (BoxV is Box*100; aexpn=current expansion factor)
+                //internal units of MUSIC: To km/s just multiply by Lbox
+		double vfac =  (header_.aexpN*header_.NGRIDC)/(100.0);  
+        
+		char temp_fname[256];
+		sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_vel+coord );
+		std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc );
+		
+		size_t blksize = sizeof(T_store)*nptot;
+		ofs_temp.write( (char *)&blksize, sizeof(size_t) );
+		
+		size_t nwritten = 0;
+		for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+			for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+				for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+					for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+						if( ! gh.is_refined(ilevel,i,j,k) )
+						{
+							if( temp_data.size() < block_buf_size_ )
+								temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac );
+							else 
+							{
+								ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ );
+								nwritten += block_buf_size_;
+								temp_data.clear();
+								temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac );
+							}
+							
+						}
+		
+		if( temp_data.size() > 0 )
+		{	
+			ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*temp_data.size() );
+			nwritten += temp_data.size();
+		}
+		
+		if( nwritten != nptot )
+			throw std::runtime_error("Internal consistency error while writing temporary file for DM velocities");
+		
+		//... dump to temporary file
+		ofs_temp.write( (char *)&blksize, sizeof(size_t) );
+		
+		if( ofs_temp.bad() )
+			throw std::runtime_error("I/O error while writing temporary file for DM velocities");
+		
+		ofs_temp.close();
+    }
+    
+    void write_dm_density( const grid_hierarchy& gh )
+	{
+		//... we don't care about DM density for art
+	}
+    
+    void write_dm_potential( const grid_hierarchy& gh )
+	{ }
+	
+	
+	
+    void write_gas_position( int coord, const grid_hierarchy& gh )
+	{
+        
+        size_t nptot = gh.count_leaf_cells(gh.levelmin(), gh.levelmax());
+		
+		std::vector<T_store> temp_data;
+		temp_data.reserve( block_buf_size_ );
+		
+		
+	    //ART coordinates are in the range 1 - (NGRID+1)
+		double xfac = (double) header_.NGRIDC; 
+
+		char temp_fname[256];
+		sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_gas_pos+coord );
+		std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc );
+		
+		size_t blksize = sizeof(T_store)*nptot;
+		ofs_temp.write( (char *)&blksize, sizeof(size_t) );
+		
+		size_t nwritten = 0;
+		for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+			for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+				for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+					for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+						if( ! gh.is_refined(ilevel,i,j,k) )
+						{
+							double xx[3];
+							gh.cell_pos(ilevel, i, j, k, xx);
+							
+							xx[coord] = fmod( (xx[coord]+(*gh.get_grid(ilevel))(i,j,k)) + 1.0, 1.0 ) ;
+							xx[coord] = (xx[coord]*xfac)+1.0; 
+							
+							if( temp_data.size() < block_buf_size_ )
+								temp_data.push_back( xx[coord] );
+							else
+							{
+								ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ );
+								nwritten += block_buf_size_;
+								temp_data.clear();
+								temp_data.push_back( xx[coord] );
+							}
+						}
+		
+		if( temp_data.size() > 0 )
+		{	
+			ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*temp_data.size() );
+			nwritten += temp_data.size();
+		}
+		
+		if( nwritten != nptot )
+			throw std::runtime_error("Internal consistency error while writing temporary file for gas positions");
+		
+		//... dump to temporary file
+		ofs_temp.write( (char *)&blksize, sizeof(size_t) );
+		
+		if( ofs_temp.bad() )
+			throw std::runtime_error("I/O error while writing temporary file for gas positions");
+		
+		ofs_temp.close();
+    
+    }
+
+	void write_gas_velocity( int coord, const grid_hierarchy& gh )
+	{
+        
+        size_t nptot = gh.count_leaf_cells(gh.levelmin(), gh.levelmax());
+		
+		std::vector<T_store> temp_data;
+		temp_data.reserve( block_buf_size_ );
+		
+                //In ART velocities are P = a_expansion*V_pec/(x_0H_0) 
+		// where x_0 = comoving cell_size=Box/Ngrid;H_0 = Hubble at z=0
+		// so scale factor to physical km/s is convV= BoxV/AEXPN/NGRID 
+		// (BoxV is Box*100; aexpn=current expansion factor)
+                //internal units of MUSIC: To km/s just multiply by Lbox
+		double vfac =  (header_.aexpN*header_.NGRIDC)/(100.0);  
+        
+		char temp_fname[256];
+		sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_gas_vel+coord );
+		std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc );
+		
+		size_t blksize = sizeof(T_store)*nptot;
+		ofs_temp.write( (char *)&blksize, sizeof(size_t) );
+		
+		size_t nwritten = 0;
+		for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+			for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+				for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+					for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+						if( ! gh.is_refined(ilevel,i,j,k) )
+						{
+							if( temp_data.size() < block_buf_size_ )
+								temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac );
+							else 
+							{
+								ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ );
+								nwritten += block_buf_size_;
+								temp_data.clear();
+								temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac );
+							}
+							
+						}
+		
+		if( temp_data.size() > 0 )
+		{	
+			ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*temp_data.size() );
+			nwritten += temp_data.size();
+		}
+		
+		if( nwritten != nptot )
+			throw std::runtime_error("Internal consistency error while writing temporary file for gas velocities");
+		
+		//... dump to temporary file
+		ofs_temp.write( (char *)&blksize, sizeof(size_t) );
+		
+		if( ofs_temp.bad() )
+			throw std::runtime_error("I/O error while writing temporary file for gas velocities");
+		
+		ofs_temp.close();
+    }
+    
+    void write_gas_density( const grid_hierarchy& gh )
+	{ }
+	void write_gas_potential( const grid_hierarchy& gh )
+	{ }
+
+	void finalize( void )
+	{ 
+        this->write_header_file();
+        this->write_pt_file();    
+		this->assemble_DM_file();
+        if(do_baryons_)
+        {
+            this->assemble_gas_file();
+        }
+	}
+};
+
+namespace{
+	output_plugin_creator_concrete<art_output_plugin<float> > creator("art");
+}
--- a/plugins/output_cart.cc
+++ b/plugins/output_cart.cc
--- a/plugins/output_enzo.cc
+++ b/plugins/output_enzo.cc
@ -0,0 +1,513 @@
+/*
+
+output_enzo.cc - This file is part of MUSIC -
+a code to generate multi-scale initial conditions
+for cosmological simulations
+
+Copyright (C) 2010  Oliver Hahn
+
+*/
+
+#ifdef HAVE_HDF5
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "output.hh"
+
+#include "HDF_IO.hh"
+
+#define MAX_SLAB_SIZE   268435456  // = 256 MBytes
+
+
+class enzo_output_plugin : public output_plugin
+{
+protected:
+
+      struct patch_header{
+              int component_rank;
+              size_t component_size;
+              std::vector<int> dimensions;
+              int rank;
+              std::vector<int> top_grid_dims;
+              std::vector<int> top_grid_end;
+              std::vector<int> top_grid_start;
+      };
+
+      struct sim_header{
+              std::vector<int> dimensions;
+              std::vector<int> offset;
+              float a_start;
+              float dx;
+              float h0;
+              float omega_b;
+              float omega_m;
+              float omega_v;
+              float vfact;
+      };
+
+
+      sim_header the_sim_header;
+
+      void write_sim_header( std::string fname, const sim_header& h )
+      {
+              HDFWriteGroupAttribute( fname, "/", "Dimensions", h.dimensions );
+              HDFWriteGroupAttribute( fname, "/", "Offset", h.offset );
+              HDFWriteGroupAttribute( fname, "/", "a_start", h.a_start );
+              HDFWriteGroupAttribute( fname, "/", "dx", h.dx );
+              HDFWriteGroupAttribute( fname, "/", "h0", h.h0 );
+              HDFWriteGroupAttribute( fname, "/", "omega_b", h.omega_b );
+              HDFWriteGroupAttribute( fname, "/", "omega_m", h.omega_m );
+              HDFWriteGroupAttribute( fname, "/", "omega_v", h.omega_v );
+              HDFWriteGroupAttribute( fname, "/", "vfact", h.vfact );
+      }
+
+      void write_patch_header( std::string fname, std::string dsetname, const patch_header& h )
+      {
+              HDFWriteDatasetAttribute( fname, dsetname, "Component_Rank", h.component_rank );
+              HDFWriteDatasetAttribute( fname, dsetname, "Component_Size", h.component_size );
+              HDFWriteDatasetAttribute( fname, dsetname, "Dimensions", h.dimensions );
+              HDFWriteDatasetAttribute( fname, dsetname, "Rank", h.rank );
+              HDFWriteDatasetAttribute( fname, dsetname, "TopGridDims", h.top_grid_dims );
+              HDFWriteDatasetAttribute( fname, dsetname, "TopGridEnd", h.top_grid_end );
+              HDFWriteDatasetAttribute( fname, dsetname, "TopGridStart", h.top_grid_start );
+      }
+
+
+      void dump_grid_data( std::string fieldname, const grid_hierarchy& gh, double factor = 1.0, double add = 0.0 )
+      {
+              char enzoname[256], filename[256];
+
+              for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+              {
+                      std::vector<int> ng, ng_fortran;
+                      ng.push_back( gh.get_grid(ilevel)->size(0) );
+                      ng.push_back( gh.get_grid(ilevel)->size(1) );
+                      ng.push_back( gh.get_grid(ilevel)->size(2) );
+
+          ng_fortran.push_back( gh.get_grid(ilevel)->size(2) );
+                      ng_fortran.push_back( gh.get_grid(ilevel)->size(1) );
+                      ng_fortran.push_back( gh.get_grid(ilevel)->size(0) );
+
+
+                      //... need to copy data because we need to get rid of the ghost zones
+                      //... write in slabs if data is more than MAX_SLAB_SIZE (default 128 MB)
+
+                      //... full 3D block size
+                      size_t all_data_size = (size_t)ng[0] * (size_t)ng[1] * (size_t)ng[2];
+
+                      //... write in slabs of MAX_SLAB_SIZE unless all_data_size is anyway smaller
+                      size_t max_slab_size = std::min((size_t)MAX_SLAB_SIZE/sizeof(double), all_data_size );
+
+                      //... but one slab hast to be at least the size of one slice
+                      max_slab_size = std::max(((size_t)ng[0] * (size_t)ng[1]), max_slab_size );
+
+                      //... number of slices in one slab
+                      size_t slices_in_slab = (size_t)((double)max_slab_size / ((size_t)ng[0] * (size_t)ng[1]));
+
+                      size_t nsz[3] = { ng[2], ng[1], ng[0] };
+
+                      if( levelmin_ != levelmax_ )
+                              sprintf( enzoname, "%s.%d", fieldname.c_str(), ilevel-levelmin_ );
+                      else
+                              sprintf( enzoname, "%s", fieldname.c_str() );
+
+                      sprintf( filename, "%s/%s", fname_.c_str(), enzoname );
+
+                      HDFCreateFile( filename );
+                      write_sim_header( filename, the_sim_header );
+
+#ifdef SINGLE_PRECISION
+                      //... create full array in file
+                      HDFHyperslabWriter3Ds<float> *slab_writer = new HDFHyperslabWriter3Ds<float>( filename, enzoname, nsz );
+
+                      //... create buffer
+                      float *data_buf = new float[ slices_in_slab * (size_t)ng[0] * (size_t)ng[1] ];
+#else
+                      //... create full array in file
+                      HDFHyperslabWriter3Ds<double> *slab_writer = new HDFHyperslabWriter3Ds<double>( filename, enzoname, nsz );
+
+                      //... create buffer
+                      double *data_buf = new double[ slices_in_slab * (size_t)ng[0] * (size_t)ng[1] ];
+#endif
+
+                      //... write slice by slice
+                      size_t slices_written = 0;
+                      while( slices_written < (size_t)ng[2] )
+                      {
+                              slices_in_slab = std::min( (size_t)ng[2]-slices_written, slices_in_slab );
+
+                              #pragma omp parallel for
+                              for( int k=0; k<(int)slices_in_slab; ++k )
+                                      for( int j=0; j<ng[1]; ++j )
+                                              for( int i=0; i<ng[0]; ++i )
+                                                      data_buf[ (size_t)(k*ng[1]+j)*(size_t)ng[0]+(size_t)i ] =
+                                                                      (add+(*gh.get_grid(ilevel))(i,j,k+slices_written))*factor;
+
+                              size_t count[3], offset[3];
+
+                              count[0] = slices_in_slab;
+                              count[1] = ng[1];
+                              count[2] = ng[0];
+
+                              offset[0] = slices_written;;
+                              offset[1] = 0;
+                              offset[2] = 0;
+
+                              slab_writer->write_slab( data_buf, count, offset );
+                              slices_written += slices_in_slab;
+
+                      }
+
+                      //... free buffer
+                      delete[] data_buf;
+
+                      //... finalize writing and close dataset
+                      delete slab_writer;
+
+
+                      //... header data for the patch
+                      patch_header ph;
+
+                      ph.component_rank       = 1;
+                      ph.component_size       = (size_t)ng[0]*(size_t)ng[1]*(size_t)ng[2];
+                      ph.dimensions           = ng;
+                      ph.rank                         = 3;
+
+                      ph.top_grid_dims.assign(3, 1<<levelmin_);
+
+                      //... offset_abs is in units of the current level cell size
+
+                      double rfac = 1.0/(1<<(ilevel-levelmin_));
+
+                      ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 0)*rfac) );
+                      ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 1)*rfac) );
+                      ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 2)*rfac) );
+
+                      ph.top_grid_end.push_back( ph.top_grid_start[0] + (int)(ng[0]*rfac) );
+                      ph.top_grid_end.push_back( ph.top_grid_start[1] + (int)(ng[1]*rfac) );
+                      ph.top_grid_end.push_back( ph.top_grid_start[2] + (int)(ng[2]*rfac) );
+
+                      write_patch_header( filename, enzoname, ph );
+              }
+      }
+
+public:
+
+      enzo_output_plugin( config_file& cf )
+      : output_plugin( cf )
+      {
+              if( mkdir( fname_.c_str(), 0777 ) )
+              {
+                      perror( fname_.c_str() );
+                      throw std::runtime_error("Error in enzo_output_plugin!");
+              }
+
+              bool bhave_hydro = cf_.getValue<bool>("setup","baryons");
+              bool align_top                  = cf.getValueSafe<bool>( "setup", "align_top", false );
+
+              if( !align_top )
+                  LOGWARN("Old ENZO versions may require \'align_top=true\'!");
+
+              the_sim_header.dimensions.push_back( 1<<levelmin_ );
+              the_sim_header.dimensions.push_back( 1<<levelmin_ );
+              the_sim_header.dimensions.push_back( 1<<levelmin_ );
+
+              the_sim_header.offset.push_back( 0 );
+              the_sim_header.offset.push_back( 0 );
+              the_sim_header.offset.push_back( 0 );
+
+              the_sim_header.a_start          = 1.0/(1.0+cf.getValue<double>("setup","zstart"));
+              the_sim_header.dx                       = cf.getValue<double>("setup","boxlength")/the_sim_header.dimensions[0]/(cf.getValue<double>("cosmology","H0")*0.01); // not sure?!?
+              the_sim_header.h0                       = cf.getValue<double>("cosmology","H0")*0.01;
+
+              if( bhave_hydro )
+                      the_sim_header.omega_b          = cf.getValue<double>("cosmology","Omega_b");
+              else
+                      the_sim_header.omega_b          = 0.0;
+
+              the_sim_header.omega_m          = cf.getValue<double>("cosmology","Omega_m");
+              the_sim_header.omega_v          = cf.getValue<double>("cosmology","Omega_L");
+              the_sim_header.vfact            = cf.getValue<double>("cosmology","vfact")*the_sim_header.h0;   //.. need to multiply by h, ENZO wants this factor for non h-1 units
+
+      }
+
+      ~enzo_output_plugin()
+      { }
+
+      void write_dm_mass( const grid_hierarchy& gh )
+      {       /* do nothing, not needed */    }
+
+
+      void write_dm_density( const grid_hierarchy& gh )
+      {       /* write the parameter file data */
+
+              bool bhave_hydro = cf_.getValue<bool>("setup","baryons");
+              double refine_region_fraction  = cf_.getValueSafe<double>( "output", "enzo_refine_region_fraction", 0.8 );
+              char filename[256];
+              unsigned nbase = (unsigned)pow(2,levelmin_);
+
+              sprintf( filename, "%s/parameter_file.txt", fname_.c_str() );
+
+              std::ofstream ofs( filename, std::ios::trunc );
+
+              ofs
+                      << "# Relevant Section of Enzo Paramter File (NOT COMPLETE!) \n"
+                      << "ProblemType                              = 30      // cosmology simulation\n"
+                      << "TopGridRank                              = 3\n"
+                      << "TopGridDimensions                        = " << nbase << " " << nbase << " " << nbase << "\n"
+                      << "SelfGravity                              = 1       // gravity on\n"
+                      << "TopGridGravityBoundary                   = 0       // Periodic BC for gravity\n"
+                      << "LeftFaceBoundaryCondition                = 3 3 3   // same for fluid\n"
+                      << "RightFaceBoundaryCondition               = 3 3 3\n"
+                      << "RefineBy                                 = 2\n"
+                      << "\n"
+                      << "#\n";
+
+              if( bhave_hydro )
+                      ofs
+                      << "CosmologySimulationOmegaBaryonNow        = " << the_sim_header.omega_b << "\n"
+                      << "CosmologySimulationOmegaCDMNow           = " << the_sim_header.omega_m-the_sim_header.omega_b << "\n";
+              else
+                      ofs
+                      << "CosmologySimulationOmegaBaryonNow        = " << 0.0 << "\n"
+                      << "CosmologySimulationOmegaCDMNow           = " << the_sim_header.omega_m << "\n";
+
+              if( bhave_hydro )
+                      ofs
+                      << "CosmologySimulationDensityName           = GridDensity\n"
+                      << "CosmologySimulationVelocity1Name         = GridVelocities_x\n"
+                      << "CosmologySimulationVelocity2Name         = GridVelocities_y\n"
+                      << "CosmologySimulationVelocity3Name         = GridVelocities_z\n";
+
+              ofs
+                      << "CosmologySimulationCalculatePositions    = 1\n"
+                      << "CosmologySimulationParticleVelocity1Name = ParticleVelocities_x\n"
+                      << "CosmologySimulationParticleVelocity2Name = ParticleVelocities_y\n"
+                      << "CosmologySimulationParticleVelocity3Name = ParticleVelocities_z\n"
+                      << "CosmologySimulationParticleDisplacement1Name = ParticleDisplacements_x\n"
+                      << "CosmologySimulationParticleDisplacement2Name = ParticleDisplacements_y\n"
+                      << "CosmologySimulationParticleDisplacement3Name = ParticleDisplacements_z\n"
+                      << "\n"
+                      << "#\n"
+                      << "#  define cosmology parameters\n"
+                      << "#\n"
+                      << "ComovingCoordinates                      = 1       // Expansion ON\n"
+                      << "CosmologyOmegaMatterNow                  = " << the_sim_header.omega_m << "\n"
+                      << "CosmologyOmegaLambdaNow                  = " << the_sim_header.omega_v << "\n"
+                      << "CosmologyHubbleConstantNow               = " << the_sim_header.h0 << "     // in 100 km/s/Mpc\n"
+                      << "CosmologyComovingBoxSize                 = " << cf_.getValue<double>("setup","boxlength") << "    // in Mpc/h\n"
+                      << "CosmologyMaxExpansionRate                = 0.015   // maximum allowed delta(a)/a\n"
+                      << "CosmologyInitialRedshift                 = " << cf_.getValue<double>("setup","zstart") << "      //\n"
+                      << "CosmologyFinalRedshift                   = 0       //\n"
+                      << "GravitationalConstant                    = 1       // this must be true for cosmology\n"
+                      << "#\n"
+                      << "#\n"
+          << "ParallelRootGridIO                       = 1\n"
+          << "ParallelParticleIO                       = 1\n"
+          << "PartitionNestedGrids                     = 1\n"
+                      << "CosmologySimulationNumberOfInitialGrids  = " << 1+levelmax_-levelmin_ << "\n";
+
+
+              int num_prec = 10;
+
+              if( levelmax_ > 15 )
+                num_prec = 17;
+
+              //... only for additionally refined grids
+              for( unsigned ilevel = 0; ilevel< levelmax_-levelmin_; ++ilevel )
+              {
+                      double h = 1.0/(1<<(levelmin_+1+ilevel));
+
+                      ofs
+
+                      << "CosmologySimulationGridDimension[" << 1+ilevel << "]      = "
+                              << std::setw(16) << gh.size( levelmin_+ilevel+1, 0 ) << " "
+                              << std::setw(16) << gh.size( levelmin_+ilevel+1, 1 ) << " "
+                              << std::setw(16) << gh.size( levelmin_+ilevel+1, 2 ) << "\n"
+
+                      << "CosmologySimulationGridLeftEdge[" << 1+ilevel << "]       = "
+                              << std::setw(num_prec+6) << std::setprecision(num_prec) << h*gh.offset_abs(levelmin_+ilevel+1, 0) << " "
+                              << std::setw(num_prec+6) << std::setprecision(num_prec) << h*gh.offset_abs(levelmin_+ilevel+1, 1) << " "
+                              << std::setw(num_prec+6) << std::setprecision(num_prec) << h*gh.offset_abs(levelmin_+ilevel+1, 2) << "\n"
+
+                      << "CosmologySimulationGridRightEdge[" << 1+ilevel << "]      = "
+                              << std::setw(num_prec+6) << std::setprecision(num_prec) << h*(gh.offset_abs(levelmin_+ilevel+1, 0)+gh.size( levelmin_+ilevel+1, 0 )) << " "
+                              << std::setw(num_prec+6) << std::setprecision(num_prec) << h*(gh.offset_abs(levelmin_+ilevel+1, 1)+gh.size( levelmin_+ilevel+1, 1 )) << " "
+                              << std::setw(num_prec+6) << std::setprecision(num_prec) << h*(gh.offset_abs(levelmin_+ilevel+1, 2)+gh.size( levelmin_+ilevel+1, 2 )) << "\n"
+
+                      << "CosmologySimulationGridLevel[" << 1+ilevel << "]          = " << 1+ilevel << "\n";
+              }
+
+      if( levelmin_ != levelmax_ )
+      {
+          double h = 1.0/(1<<levelmax_);
+
+
+          double cen[3],le[3],re[3];
+          for (int i=0;i<3;i++)
+            {
+              cen[i] = gh.offset_abs(levelmax_, i)+gh.size( levelmax_, i )/2;
+              le[i]  = cen[i]-refine_region_fraction*gh.size( levelmax_,i)/2;
+              re[i]  = le[i] +refine_region_fraction*gh.size( levelmax_, i);
+            }
+
+
+          ofs
+              << "#\n"
+              << "# region allowed for further refinement\n"
+              << "#\n"
+            //                << "RefineRegionAutoAdjust                   = 1\n"
+              << "RefineRegionLeftEdge                     = "
+                  << std::setw(num_prec+6) << std::setprecision(num_prec) << h*le[0] << " "
+                  << std::setw(num_prec+6) << std::setprecision(num_prec) << h*le[1] << " "
+                  << std::setw(num_prec+6) << std::setprecision(num_prec) << h*le[2] << "\n"
+              << "RefineRegionRightEdge                     = "
+                  << std::setw(num_prec+6) << std::setprecision(num_prec) << h*re[0] << " "
+                  << std::setw(num_prec+6) << std::setprecision(num_prec) << h*re[1] << " "
+                  << std::setw(num_prec+6) << std::setprecision(num_prec) << h*re[2]<< "\n";
+      }
+
+
+      // determine density maximum and minimum location
+      real_t rhomax = -1e30, rhomin = 1e30;
+      double loc_rhomax[3] = {0.0,0.0,0.0}, loc_rhomin[3] = {0.0,0.0,0.0};
+      int lvl_rhomax = 0, lvl_rhomin = 0;
+      real_t rhomax_lm = -1e30, rhomin_lm = 1e30;
+      double loc_rhomax_lm[3] = {0.0,0.0,0.0}, loc_rhomin_lm[3] = {0.0,0.0,0.0};
+
+
+      for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+                      for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+                              for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+                                      for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+                                              if( ! gh.is_refined(ilevel,i,j,k) )
+                                              {
+                                                      real_t rho = (*gh.get_grid(ilevel))(i,j,k);
+
+                          if( rho > rhomax )
+                          {
+                              rhomax = rho;
+                              lvl_rhomax = ilevel;
+                              gh.cell_pos(ilevel, i, j, k, loc_rhomax);
+                          }
+
+                          if( rho < rhomin )
+                          {
+                              rhomin = rho;
+                              lvl_rhomin = ilevel;
+                              gh.cell_pos(ilevel, i, j, k, loc_rhomin);
+                          }
+
+                          if( ilevel == (int)gh.levelmax() )
+                          {
+                              if( rho > rhomax_lm )
+                              {
+                                  rhomax_lm = rho;
+                                  gh.cell_pos(ilevel, i, j, k, loc_rhomax_lm);
+                              }
+
+                              if( rho < rhomin_lm )
+                              {
+                                  rhomin_lm = rho;
+                                  gh.cell_pos(ilevel, i, j, k, loc_rhomin_lm);
+                              }
+                          }
+                      }
+
+      double h = 1.0/(1<<levelmin_);
+      double shift[3];
+      shift[0] = -(double)cf_.getValue<int>( "setup", "shift_x" )*h;
+      shift[1] = -(double)cf_.getValue<int>( "setup", "shift_y" )*h;
+      shift[2] = -(double)cf_.getValue<int>( "setup", "shift_z" )*h;
+
+      if( gh.levelmin() != gh.levelmax() )
+      {
+          LOGINFO("Global density extrema: ");
+          LOGINFO("  minimum: delta=%f at (%f,%f,%f) (level=%d)",rhomin,loc_rhomin[0],loc_rhomin[1],loc_rhomin[2],lvl_rhomin);
+          LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomin[0]+shift[0],loc_rhomin[1]+shift[1],loc_rhomin[2]+shift[2]);
+          LOGINFO("  maximum: delta=%f at (%f,%f,%f) (level=%d)",rhomax,loc_rhomax[0],loc_rhomax[1],loc_rhomax[2],lvl_rhomax);
+          LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomax[0]+shift[0],loc_rhomax[1]+shift[1],loc_rhomax[2]+shift[2]);
+
+          LOGINFO("Density extrema on finest level: ");
+          LOGINFO("  minimum: delta=%f at (%f,%f,%f)",rhomin_lm,loc_rhomin_lm[0],loc_rhomin_lm[1],loc_rhomin_lm[2]);
+          LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomin_lm[0]+shift[0],loc_rhomin_lm[1]+shift[1],loc_rhomin_lm[2]+shift[2]);
+          LOGINFO("  maximum: delta=%f at (%f,%f,%f)",rhomax_lm,loc_rhomax_lm[0],loc_rhomax_lm[1],loc_rhomax_lm[2]);
+          LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomax_lm[0]+shift[0],loc_rhomax_lm[1]+shift[1],loc_rhomax_lm[2]+shift[2]);
+
+      }else{
+          LOGINFO("Global density extrema: ");
+          LOGINFO("  minimum: delta=%f at (%f,%f,%f)",rhomin,loc_rhomin[0],loc_rhomin[1],loc_rhomin[2]);
+          LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomin[0]+shift[0],loc_rhomin[1]+shift[1],loc_rhomin[2]+shift[2]);
+          LOGINFO("  maximum: delta=%f at (%f,%f,%f)",rhomax,loc_rhomax[0],loc_rhomax[1],loc_rhomax[2]);
+          LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomax[0]+shift[0],loc_rhomax[1]+shift[1],loc_rhomax[2]+shift[2]);
+
+      }
+
+      }
+
+
+      void write_dm_velocity( int coord, const grid_hierarchy& gh )
+      {
+              char enzoname[256];
+              sprintf( enzoname, "ParticleVelocities_%c", (char)('x'+coord) );
+
+              double vunit = 1.0/(1.225e2*sqrt(the_sim_header.omega_m/the_sim_header.a_start));
+
+              dump_grid_data( enzoname, gh, vunit );
+      }
+
+
+      void write_dm_position( int coord, const grid_hierarchy& gh )
+      {
+              char enzoname[256];
+              sprintf( enzoname, "ParticleDisplacements_%c", (char)('x'+coord) );
+
+      dump_grid_data( enzoname, gh );
+      }
+
+      void write_dm_potential( const grid_hierarchy& gh )
+      { }
+
+      void write_gas_potential( const grid_hierarchy& gh )
+      { }
+
+
+      void write_gas_velocity( int coord, const grid_hierarchy& gh )
+      {
+              double vunit = 1.0/(1.225e2*sqrt(the_sim_header.omega_m/the_sim_header.a_start));
+
+              char enzoname[256];
+              sprintf( enzoname, "GridVelocities_%c", (char)('x'+coord) );
+              dump_grid_data( enzoname, gh, vunit );
+      }
+
+
+      void write_gas_position( int coord, const grid_hierarchy& gh )
+      {
+              /* do nothing, not needed */
+      }
+
+
+      void write_gas_density( const grid_hierarchy& gh )
+      {
+
+              char enzoname[256];
+              sprintf( enzoname, "GridDensity" );
+              dump_grid_data( enzoname, gh, the_sim_header.omega_b/the_sim_header.omega_m, 1.0 );
+      }
+
+
+      void finalize( void )
+      {       }
+
+
+};
+
+namespace{
+      output_plugin_creator_concrete<enzo_output_plugin> creator("enzo");
+}
+
+#endif
+
+
--- a/plugins/output_gadget2.cc
+++ b/plugins/output_gadget2.cc
--- a/plugins/output_gadget2_2comp.cc
+++ b/plugins/output_gadget2_2comp.cc
--- a/plugins/output_generic.cc
+++ b/plugins/output_generic.cc
@ -0,0 +1,255 @@
+/*
+ 
+ output_generic.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+
+#ifdef HAVE_HDF5
+
+#include "output.hh"
+#include "HDF_IO.hh"
+
+
+class generic_output_plugin : public output_plugin
+{
+protected:
+	
+	using output_plugin::cf_;
+		
+	template< typename Tt >
+	void write2HDF5( std::string fname, std::string dname, const MeshvarBnd<Tt>& data )
+	{
+		int n0 = data.size(0), n1 = data.size(1), n2 = data.size(2), nb = data.m_nbnd;
+		std::vector<Tt> vdata;
+		vdata.reserve((unsigned)(n0+2*nb)*(n1+2*nb)*(n2+2*nb));
+		for(int i=-nb; i<n0+nb; ++i )
+			for(int j=-nb; j<n1+nb; ++j )
+				for(int k=-nb; k<n2+nb; ++k )
+					vdata.push_back( data(i,j,k) );
+		
+		unsigned nd[3] = { n0+2*nb,n1+2*nb,n2+2*nb	};
+		HDFWriteDataset3D( fname, dname, nd, vdata);
+	}
+	
+public:
+	generic_output_plugin( config_file& cf )//std::string fname, Cosmology cosm, Parameters param )
+	: output_plugin( cf )//fname, cosm, param )
+	{
+
+		HDFCreateFile(fname_);
+		
+		HDFCreateGroup(fname_, "header");
+
+		HDFWriteDataset(fname_,"/header/grid_off_x",offx_);
+		HDFWriteDataset(fname_,"/header/grid_off_y",offy_);
+		HDFWriteDataset(fname_,"/header/grid_off_z",offz_);
+		
+		HDFWriteDataset(fname_,"/header/grid_len_x",sizex_);
+		HDFWriteDataset(fname_,"/header/grid_len_y",sizey_);
+		HDFWriteDataset(fname_,"/header/grid_len_z",sizez_);
+		
+		HDFWriteGroupAttribute(fname_, "header", "levelmin", levelmin_ );
+		HDFWriteGroupAttribute(fname_, "header", "levelmax", levelmax_ );
+	}
+	
+	~generic_output_plugin()
+	{	}
+	
+	void write_dm_mass( const grid_hierarchy& gh )
+	{	}
+	
+	void write_dm_velocity( int coord, const grid_hierarchy& gh )
+	{
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			if( coord == 0 )
+				sprintf(sstr,"level_%03d_DM_vx",ilevel);
+			else if( coord == 1 )
+				sprintf(sstr,"level_%03d_DM_vy",ilevel);
+			else if( coord == 2 )
+				sprintf(sstr,"level_%03d_DM_vz",ilevel);
+			
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void write_dm_position( int coord, const grid_hierarchy& gh )
+	{
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			if( coord == 0 )
+				sprintf(sstr,"level_%03d_DM_dx",ilevel);
+			else if( coord == 1 )
+				sprintf(sstr,"level_%03d_DM_dy",ilevel);
+			else if( coord == 2 )
+				sprintf(sstr,"level_%03d_DM_dz",ilevel);
+			
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void write_dm_density( const grid_hierarchy& gh )
+	{
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			sprintf(sstr,"level_%03d_DM_rho",ilevel);
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+
+
+
+		// determine density maximum and minimum location
+		real_t rhomax = -1e30, rhomin = 1e30;
+		double loc_rhomax[3] = {0.0,0.0,0.0}, loc_rhomin[3] = {0.0,0.0,0.0};
+		int lvl_rhomax = 0, lvl_rhomin = 0;
+		real_t rhomax_lm = -1e30, rhomin_lm = 1e30;
+		double loc_rhomax_lm[3] = {0.0,0.0,0.0}, loc_rhomin_lm[3] = {0.0,0.0,0.0};
+        
+        
+		for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+		  for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+		    for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+		      for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+			if( ! gh.is_refined(ilevel,i,j,k) )
+			  {
+			    real_t rho = (*gh.get_grid(ilevel))(i,j,k);
+                            
+                            if( rho > rhomax )
+			      {
+                                rhomax = rho;
+                                lvl_rhomax = ilevel;
+                                gh.cell_pos(ilevel, i, j, k, loc_rhomax);
+			      }
+                            
+                            if( rho < rhomin )
+			      {
+                                rhomin = rho;
+                                lvl_rhomin = ilevel;
+                                gh.cell_pos(ilevel, i, j, k, loc_rhomin);
+			      }
+                            
+                            if( ilevel == (int)gh.levelmax() )
+			      {
+                                if( rho > rhomax_lm )
+				  {
+                                    rhomax_lm = rho;
+                                    gh.cell_pos(ilevel, i, j, k, loc_rhomax_lm);
+				  }
+                                
+                                if( rho < rhomin_lm )
+				  {
+                                    rhomin_lm = rho;
+                                    gh.cell_pos(ilevel, i, j, k, loc_rhomin_lm);
+				  }
+			      }
+			  }
+
+		double h = 1.0/(1<<levelmin_);
+		double shift[3];
+		shift[0] = -(double)cf_.getValue<int>( "setup", "shift_x" )*h;
+		shift[1] = -(double)cf_.getValue<int>( "setup", "shift_y" )*h;
+		shift[2] = -(double)cf_.getValue<int>( "setup", "shift_z" )*h;
+			
+		if( gh.levelmin() != gh.levelmax() )
+		  {
+		    LOGINFO("Global density extrema: ");
+		    LOGINFO("  minimum: delta=%f at (%f,%f,%f) (level=%d)",rhomin,loc_rhomin[0],loc_rhomin[1],loc_rhomin[2],lvl_rhomin);
+		    LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomin[0]+shift[0],loc_rhomin[1]+shift[1],loc_rhomin[2]+shift[2]);
+		    LOGINFO("  maximum: delta=%f at (%f,%f,%f) (level=%d)",rhomax,loc_rhomax[0],loc_rhomax[1],loc_rhomax[2],lvl_rhomax);
+		    LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomax[0]+shift[0],loc_rhomax[1]+shift[1],loc_rhomax[2]+shift[2]);
+		    
+		    LOGINFO("Density extrema on finest level: ");
+		    LOGINFO("  minimum: delta=%f at (%f,%f,%f)",rhomin_lm,loc_rhomin_lm[0],loc_rhomin_lm[1],loc_rhomin_lm[2]);
+		    LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomin_lm[0]+shift[0],loc_rhomin_lm[1]+shift[1],loc_rhomin_lm[2]+shift[2]);
+		    LOGINFO("  maximum: delta=%f at (%f,%f,%f)",rhomax_lm,loc_rhomax_lm[0],loc_rhomax_lm[1],loc_rhomax_lm[2]);
+		    LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomax_lm[0]+shift[0],loc_rhomax_lm[1]+shift[1],loc_rhomax_lm[2]+shift[2]);
+		    
+		  }else{
+		  LOGINFO("Global density extrema: ");
+		  LOGINFO("  minimum: delta=%f at (%f,%f,%f)",rhomin,loc_rhomin[0],loc_rhomin[1],loc_rhomin[2]);
+		  LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomin[0]+shift[0],loc_rhomin[1]+shift[1],loc_rhomin[2]+shift[2]);
+		  LOGINFO("  maximum: delta=%f at (%f,%f,%f)",rhomax,loc_rhomax[0],loc_rhomax[1],loc_rhomax[2]);
+		  LOGINFO("       shifted back at (%f,%f,%f)",loc_rhomax[0]+shift[0],loc_rhomax[1]+shift[1],loc_rhomax[2]+shift[2]);
+            
+		}
+	}
+	
+	void write_dm_potential( const grid_hierarchy& gh )
+	{ 
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			sprintf(sstr,"level_%03d_DM_potential",ilevel);
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void write_gas_potential( const grid_hierarchy& gh )
+	{ 
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			sprintf(sstr,"level_%03d_BA_potential",ilevel);
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	
+	
+	void write_gas_velocity( int coord, const grid_hierarchy& gh )
+	{	
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			if( coord == 0 )
+				sprintf(sstr,"level_%03d_BA_vx",ilevel);
+			else if( coord == 1 )
+				sprintf(sstr,"level_%03d_BA_vy",ilevel);
+			else if( coord == 2 )
+				sprintf(sstr,"level_%03d_BA_vz",ilevel);
+			
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void write_gas_position( int coord, const grid_hierarchy& gh )
+	{	}
+	
+	void write_gas_density( const grid_hierarchy& gh )
+	{	
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			sprintf(sstr,"level_%03d_BA_rho",ilevel);
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void finalize( void )
+	{	}
+};
+
+
+
+namespace{
+	output_plugin_creator_concrete< generic_output_plugin > creator("generic");
+}
+
+
+#endif
+
--- a/plugins/output_grafic2.cc
+++ b/plugins/output_grafic2.cc
@ -0,0 +1,592 @@
+/*
+ 
+ output_grafic2.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fstream>
+#include "output.hh"
+
+
+//! Implementation of class grafic2_output_plugin 
+/*!
+ This class implements a grafic-2 (cf. Bertschinger 2001) compatible
+ output format. With some RAMSES extras.
+*/
+class grafic2_output_plugin : public output_plugin
+{
+protected:
+	
+	
+	typedef struct{
+		int n1, n2, n3;
+		float dxini0;
+		float xoff10,xoff20,xoff30;
+		float astart0,omega_m0,omega_l0,h00;
+		
+	}header;
+	
+	bool bhavehydro_;
+  //float metal_floor_;
+    int passive_variable_index_;
+    float passive_variable_value_;
+	
+	void write_file_header( std::ofstream& ofs, unsigned ilevel, const grid_hierarchy& gh )
+	{
+		header loc_head;
+		
+		double 
+			boxlength	= cf_.getValue<double>("setup","boxlength"),
+			H0			= cf_.getValue<double>("cosmology","H0"),
+			zstart		= cf_.getValue<double>("setup","zstart"),
+			astart		= 1.0/(1.0+zstart),
+			omegam		= cf_.getValue<double>("cosmology","Omega_m"),
+			omegaL		= cf_.getValue<double>("cosmology","Omega_L");
+		
+		loc_head.n1 = gh.get_grid(ilevel)->size(0);
+		loc_head.n2 = gh.get_grid(ilevel)->size(1);
+		loc_head.n3 = gh.get_grid(ilevel)->size(2);
+		
+		loc_head.dxini0 = boxlength / (H0*0.01) / pow(2.0,ilevel);
+		
+		loc_head.xoff10 = gh.offset_abs(ilevel,0) * loc_head.dxini0;
+		loc_head.xoff20 = gh.offset_abs(ilevel,1) * loc_head.dxini0;
+		loc_head.xoff30 = gh.offset_abs(ilevel,2) * loc_head.dxini0;
+		
+		loc_head.astart0 = astart;
+		loc_head.omega_m0 = omegam;
+		loc_head.omega_l0 = omegaL;
+		loc_head.h00 = H0;
+		
+		
+		int blksz = sizeof(header);
+		ofs.write( reinterpret_cast<char*> (&blksz), sizeof(int) );
+		ofs.write( reinterpret_cast<char*> (&loc_head), blksz );
+		ofs.write( reinterpret_cast<char*> (&blksz), sizeof(int) );
+		
+	}
+	
+	void write_sliced_array( std::ofstream& ofs, unsigned ilevel, const grid_hierarchy& gh, float fac = 1.0f )
+	{
+		unsigned n1,n2,n3;
+		n1 = gh.get_grid(ilevel)->size(0);
+		n2 = gh.get_grid(ilevel)->size(1);
+		n3 = gh.get_grid(ilevel)->size(2);
+		
+		std::vector<float> data(n1*n2,0.0f);
+		
+		for( unsigned i=0; i<n3; ++i )
+		{
+			
+			data.clear();
+			
+			for( unsigned j=0; j<n2; ++j )
+				for( unsigned k=0; k<n1; ++k )
+					data[j*n1+k] = (*gh.get_grid(ilevel))(k,j,i) * fac;
+			
+			unsigned blksize = n1*n2*sizeof(float);
+			
+			ofs.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+			ofs.write( reinterpret_cast<char*> (&data[0]), blksize );
+			ofs.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+			
+		}
+	}
+    
+    size_t restrict_mask( size_t n1, size_t n2, size_t n3, size_t o1, size_t o2, size_t o3,
+                        size_t n1c, size_t n2c, size_t n3c, const float* finemask, float* coarsemask )
+    {
+        //unsigned n1p = n1/2, n2p = n2/2, n3p = n3/2;
+        
+        for( size_t i=0; i<n1c*n2c*n3c; ++i )
+            coarsemask[i] = 0.0f;
+        
+        for( size_t i=0; i<n1; ++i )
+        {
+            size_t ii=i/2+o1;
+            for( size_t j=0; j<n2; ++j )
+            {
+                size_t jj=j/2+o2;
+                for( size_t k=0; k<n3; ++k )
+                {
+                    size_t kk=k/2+o3;
+                    if( finemask[ (i*n2+j)*n3+k ] )
+                        coarsemask[(ii*n2c+jj)*n3c+kk] += 1.0f;
+                }
+            }
+        }
+        
+        size_t count_ref = 0;
+        for( size_t i=0; i<n1c*n2c*n3c; ++i )
+            if( coarsemask[i] > 0.1f )
+            {
+                coarsemask[i] = 1.0f;
+                ++count_ref;
+            }
+        return count_ref;
+        
+        
+    }
+    
+    void write_refinement_mask( const grid_hierarchy& gh )
+    {
+        
+        // generate mask for highest level
+        char ff[256];
+       
+        size_t n1,n2,n3;
+            n1 = gh.get_grid(gh.levelmax())->size(0);
+            n2 = gh.get_grid(gh.levelmax())->size(1);
+            n3 = gh.get_grid(gh.levelmax())->size(2);
+        
+        std::vector<float> data(n1*n2*n3,0.0f);
+        
+        // do finest level
+        {
+            // get mask for levelmax
+            for( size_t i=0; i<n1; ++i )
+                for( size_t j=0; j<n2; ++j )
+                    for( size_t k=0; k<n3; ++k )
+                        if( !gh.is_refined(gh.levelmax(),i,j,k) )
+                            data[(i*n2+j)*n3+k] = 1.0;
+                        else
+                            data[(i*n2+j)*n3+k] = 0.0;
+            
+            // write mask
+            sprintf(ff,"%s/level_%03d/ic_refmap",fname_.c_str(), gh.levelmax() );
+            std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+	    write_file_header( ofs, gh.levelmax(), gh );
+
+	    std::ofstream ofs_metals;
+	    
+	    if( passive_variable_value_ > 0.0f )
+	      {
+            sprintf(ff,"%s/level_%03d/ic_pvar_%05d",fname_.c_str(), gh.levelmax(), passive_variable_index_ );
+            ofs_metals.open(ff,std::ios::binary|std::ios::trunc);
+            write_file_header( ofs_metals, gh.levelmax(), gh );
+	      }
+
+           
+            
+            std::vector<float> block(n1*n2,0.0f);
+            for( unsigned k=0; k<n3; ++k )
+            {
+                for( unsigned j=0; j<n2; ++j )
+                    for( unsigned i=0; i<n1; ++i )
+                        block[j*n1+i] = data[(i*n2+j)*n3+k];
+                
+                unsigned blksize = n1*n2*sizeof(float);
+                
+                ofs.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+                ofs.write( reinterpret_cast<char*> (&block[0]), blksize );
+                ofs.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+
+
+		if( passive_variable_value_ > 0.0f ){
+
+		  for( unsigned j=0; j<n2; ++j )
+		    for( unsigned i=0; i<n1; ++i )
+		      block[j*n1+i] = data[(i*n2+j)*n3+k] * passive_variable_value_;
+                
+		  unsigned blksize = n1*n2*sizeof(float);
+                
+		  ofs_metals.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+		  ofs_metals.write( reinterpret_cast<char*> (&block[0]), blksize );
+		  ofs_metals.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+		}
+            }
+        }
+        
+        // do all coarser levels
+        for( unsigned ilevel=levelmax_-1; ilevel>=levelmin_; --ilevel )
+        {
+            size_t n1c,n2c,n3c,o1,o2,o3;
+            n1c = gh.get_grid(ilevel)->size(0);
+            n2c = gh.get_grid(ilevel)->size(1);
+            n3c = gh.get_grid(ilevel)->size(2);
+            
+            n1 = gh.get_grid(ilevel+1)->size(0);
+            n2 = gh.get_grid(ilevel+1)->size(1);
+            n3 = gh.get_grid(ilevel+1)->size(2);
+            
+            o1 = gh.get_grid(ilevel+1)->offset(0);
+            o2 = gh.get_grid(ilevel+1)->offset(1);
+            o3 = gh.get_grid(ilevel+1)->offset(2);
+            
+            std::vector<float> data_coarse( n1c*n2c*n3c, 0.0f );
+            
+            /*if( ilevel <= levelmax_-2 )
+            {
+                for( size_t i=0; i<n1*n2*n3; ++i )
+                    data[i] = 0.0;
+                
+                for( unsigned i=2; i<n1-2; ++i )
+                    for( unsigned j=2; j<n2-2; ++j )
+                        for( unsigned k=2; k<n3-2; ++k )
+                            data[(i*n2+j)*n3+k] = 1.0;
+			    }*/
+            
+            size_t nref;
+            nref = restrict_mask( n1, n2, n3, o1, o2, o3, n1c, n2c, n3c, &data[0], &data_coarse[0] );
+            
+            LOGINFO("%f of cells on level %d are refined",(double)nref/(n1c*n2c*n3c),ilevel);
+            
+            sprintf(ff,"%s/level_%03d/ic_refmap",fname_.c_str(), ilevel );
+            std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+            write_file_header( ofs, ilevel, gh );
+
+	    std::ofstream ofs_metals;
+	    if( passive_variable_value_ > 0.0f )
+	      {
+		sprintf(ff,"%s/level_%03d/ic_pvar_%05d",fname_.c_str(), ilevel, passive_variable_index_ );
+		ofs_metals.open(ff,std::ios::binary|std::ios::trunc);
+		write_file_header( ofs_metals, ilevel, gh );
+	      }
+
+
+            std::vector<float> block(n1c*n2c,0.0f);
+            for( unsigned i=0; i<n3c; ++i )
+            {
+                for( unsigned j=0; j<n2c; ++j )
+                    for( unsigned k=0; k<n1c; ++k )
+                        block[j*n1c+k] = data_coarse[(k*n2c+j)*n3c+i];
+                
+                unsigned blksize = n1c*n2c*sizeof(float);
+                
+                ofs.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+                ofs.write( reinterpret_cast<char*> (&block[0]), blksize );
+                ofs.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+
+		if( passive_variable_value_ > 0.0f ){
+
+		  for( unsigned j=0; j<n2c; ++j )
+                    for( unsigned k=0; k<n1c; ++k )
+                        block[j*n1c+k] = data_coarse[(k*n2c+j)*n3c+i] * passive_variable_value_;
+                
+		  unsigned blksize = n1c*n2c*sizeof(float);
+                
+		  ofs_metals.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+		  ofs_metals.write( reinterpret_cast<char*> (&block[0]), blksize );
+		  ofs_metals.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+		}
+            }
+            
+            data.swap( data_coarse );
+            
+        }
+    }
+    
+    void write_ramses_namelist( const grid_hierarchy& gh )
+	{
+		//... also write the refinement options to a dummy namelist file
+		char ff[256];
+		sprintf(ff,"%s/ramses.nml",fname_.c_str() );
+		
+		std::ofstream ofst(ff,std::ios::trunc);
+      
+        // -- RUN_PARAMS -- //
+        ofst
+          << "&RUN_PARAMS\n"
+          << "cosmo=.true.\n"
+          << "pic=.true.\n"
+          << "poisson=.true.\n";
+      
+        if( bhavehydro_ )
+          ofst << "hydro=.true.\n";
+        else
+          ofst << "hydro=.false.\n";
+          
+        ofst
+          << "nrestart=0\n"
+          << "nremap=1\n"
+          << "nsubcycle=";
+      
+        for( unsigned ilevel=gh.levelmin(); ilevel<=gh.levelmax(); ++ilevel )
+          ofst << "1,";
+        ofst << "1,2\n";
+      
+        ofst
+          << "ncontrol=1\n"
+          << "verbose=.false.\n/\n\n";
+		
+        // -- INIT_PARAMS -- //
+        ofst
+          << "&INIT_PARAMS\n"
+          << "filetype=\'grafic\'\n";
+		for( unsigned i=gh.levelmin();i<=gh.levelmax(); ++i)
+		{
+			sprintf(ff,"initfile(%d)=\'%s/level_%03d\'\n",i-gh.levelmin()+1,fname_.c_str(), i );
+			ofst << std::string(ff);
+		}
+		ofst << "/\n\n";
+		
+		
+        unsigned naddref = 8; // initialize with settings for 10 additional levels of refinement
+        unsigned nexpand = (cf_.getValue<unsigned>("setup","padding")-1)/2;
+        
+        // -- AMR_PARAMS -- //
+        ofst << "&AMR_PARAMS\n"
+            << "levelmin=" << gh.levelmin() << "\n"
+            << "levelmax=" << gh.levelmax()+naddref << "\n"
+            << "nexpand=";
+      
+        if( gh.levelmax() == gh.levelmin() )
+          ofst << "1";
+        else 
+        {
+          for( unsigned ilevel=gh.levelmin(); ilevel<gh.levelmax()-1; ++ilevel )
+            ofst << nexpand << ",";
+          ofst << "1,1";
+          
+        }
+      
+        ofst << "\n"
+             << "ngridtot=2000000\n"
+             << "nparttot=3000000\n"
+             << "/\n\n";
+      
+        ofst << "&REFINE_PARAMS\n"
+            << "m_refine=" << gh.levelmax()-gh.levelmin()+1+naddref << "*8.,\n";
+      
+        if( bhavehydro_ )
+          ofst << "ivar_refine=" << 5+passive_variable_index_ << "\n"
+               << "var_cut_refine=" << passive_variable_value_*0.01 << "\n";
+        else
+          ofst << "ivar_refine=0\n";
+      
+        ofst << "mass_cut_refine=" << 2.0/pow(2,3*gh.levelmax()) << "\n"
+             << "interpol_var=1\n"
+             << "interpol_type=0\n"
+             << "/\n\n";
+        
+        
+		LOGINFO("The grafic2 output plug-in wrote the grid data to a partial");
+		LOGINFO("   RAMSES namelist file \'%s\'",fname_.c_str() );
+    }
+	
+	void write_ramses_namelist_old( const grid_hierarchy& gh )
+	{
+		//... also write the refinement options to a dummy namelist file
+		char ff[256];
+		sprintf(ff,"%s/ramses.nml",fname_.c_str() );
+		
+		std::ofstream ofst(ff,std::ios::trunc);
+		
+		ofst 
+		<< "&INIT_PARAMS\n"
+		<< "filetype=\'grafic\'\n";
+		for( unsigned i=gh.levelmin();i<=gh.levelmax(); ++i)
+		{
+			sprintf(ff,"initfile(%d)=\'%s/level_%03d\'\n",i-gh.levelmin()+1,fname_.c_str(), i );
+			ofst << std::string(ff);
+		}
+		ofst << "/\n\n";
+		
+		
+		double xc,yc,zc,l;
+		
+		ofst
+		<< "&AMR_PARAMS\n"
+		<< "levelmin=" << gh.levelmin() << "\n"
+		<< "levelmax=" << gh.levelmax() << "\n"
+		<< "ngridtot=2000000\n"
+		<< "nparttot=3000000\n"
+		<< "nexpand=1\n/\n\n";
+		
+		const size_t fprec = 12, fwid = 16;
+		
+		if( gh.levelmax() > gh.levelmin() )
+		{
+			l = (double)(1l<<(gh.levelmin()+1));
+			xc = ((double)gh.offset_abs(gh.levelmin()+1,0)+0.5*(double)gh.size(gh.levelmin()+1,0))/l;
+			yc = ((double)gh.offset_abs(gh.levelmin()+1,1)+0.5*(double)gh.size(gh.levelmin()+1,1))/l;
+			zc = ((double)gh.offset_abs(gh.levelmin()+1,2)+0.5*(double)gh.size(gh.levelmin()+1,2))/l;	
+		
+			ofst << "&REFINE_PARAMS\n"
+			<< "m_refine=  "<< std::setw(fwid) << std::setprecision(fprec) << 0.0;
+			
+			
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+				ofst << "," << std::setw(fwid) << std::setprecision(fprec) << 0.0;
+			ofst << "\nx_refine=  "<< std::setw(fwid) << std::setprecision(fprec) << xc;
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+			{	
+				l = (double)(1l<<(i+1));
+				xc = ((double)gh.offset_abs(i+1,0)+0.5*(double)gh.size(i+1,0))/l;
+				ofst << ","<< std::setw(fwid) << std::setprecision(fprec) << xc;
+			}
+			ofst << "\ny_refine=  "<< std::setw(fwid) << std::setprecision(fprec) << yc;
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+			{	
+				l = (double)(1l<<(i+1));
+				yc = ((double)gh.offset_abs(i+1,1)+0.5*(double)gh.size(i+1,1))/l;
+				ofst << ","<< std::setw(fwid) << std::setprecision(fprec) << yc;
+			}
+			ofst << "\nz_refine=  "<< std::setw(fwid) << std::setprecision(fprec) << zc;
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+			{	
+				l = (double)(1l<<(i+1));
+				zc = ((double)gh.offset_abs(i+1,2)+0.5*(double)gh.size(i+1,2))/l;
+				ofst << ","<< std::setw(fwid) << std::setprecision(fprec) << zc;
+			}
+			
+			ofst << "\nr_refine=  ";
+			for(unsigned i=gh.levelmin();i<gh.levelmax(); ++i )
+			{
+				size_t nmax = std::min(gh.size(i+1,0),std::min(gh.size(i+1,1),gh.size(i+1,2)));
+				
+				double r = (nmax-4.0)/(double)(1l<<(i+1));
+				if( i==gh.levelmin() )
+					ofst << std::setw(fwid) << std::setprecision(fprec) << r;
+				else
+					ofst << "," << std::setw(fwid) << std::setprecision(fprec) << r;
+			}
+			ofst << "\nexp_refine=" << std::setw(fwid) << std::setprecision(fprec) << 10.0;
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+				ofst << "," << std::setw(fwid) << std::setprecision(fprec) << 10.0;
+			ofst << "\n/\n";
+		}
+		
+		sprintf(ff,"%s/ramses.nml",fname_.c_str() );
+		std::cout	<< " - The grafic2 output plug-in wrote the grid data to a partial\n"
+		<< "   RAMSES namelist file \'" << ff << "\'\n"; 
+	}
+	
+public:
+	
+	grafic2_output_plugin( config_file& cf )
+	: output_plugin( cf )
+	{
+		// create directory structure
+		remove( fname_.c_str() );
+		mkdir( fname_.c_str(), 0777 );
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			char fp[256];
+			sprintf(fp,"%s/level_%03d",fname_.c_str(), ilevel );
+			mkdir( fp, 0777 );
+		}
+		
+		
+		bhavehydro_ = cf.getValue<bool>("setup","baryons");
+      //metal_floor_ = cf.getValueSafe<float>("output","ramses_metal_floor",1e-5);
+        passive_variable_index_ = cf.getValueSafe<int>("output","ramses_pvar_idx",1);
+        passive_variable_value_ = cf.getValueSafe<float>("output","ramses_pvar_val",1.0f);
+	}
+	
+	/*~grafic2_output_plugin()
+	 { }*/
+	
+	
+	void write_dm_position( int coord, const grid_hierarchy& gh  )
+	{
+		double 
+		boxlength	= cf_.getValue<double>("setup","boxlength");
+		
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			
+			char ff[256];
+			sprintf(ff,"%s/level_%03d/ic_posc%c",fname_.c_str(), ilevel, (char)('x'+coord) );
+			
+			std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+			
+			write_file_header( ofs, ilevel, gh );
+			write_sliced_array( ofs, ilevel, gh, boxlength );
+		}
+	}
+	
+	void write_dm_velocity( int coord, const grid_hierarchy& gh )
+	{
+		double 
+		boxlength	= cf_.getValue<double>("setup","boxlength");
+		
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			
+			char ff[256];
+			sprintf(ff,"%s/level_%03d/ic_velc%c",fname_.c_str(), ilevel, (char)('x'+coord) );
+			
+			std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+			
+			write_file_header( ofs, ilevel, gh );
+			write_sliced_array( ofs, ilevel, gh, boxlength );
+		}
+	}
+	
+	void write_gas_velocity( int coord, const grid_hierarchy& gh )
+	{	
+		double 
+		boxlength	= cf_.getValue<double>("setup","boxlength");
+		
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			
+			char ff[256];
+			sprintf(ff,"%s/level_%03d/ic_velb%c",fname_.c_str(), ilevel, (char)('x'+coord) );
+			
+			std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+			
+			write_file_header( ofs, ilevel, gh );
+			write_sliced_array( ofs, ilevel, gh, boxlength );
+		}
+	}
+	
+	void write_gas_density( const grid_hierarchy& gh )
+	{	
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			
+			char ff[256];
+			sprintf(ff,"%s/level_%03d/ic_deltab",fname_.c_str(), ilevel );
+			
+			std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+			
+			write_file_header( ofs, ilevel, gh );
+			write_sliced_array( ofs, ilevel, gh );
+		}
+		
+	}
+	
+	
+	void write_dm_density( const grid_hierarchy& gh )
+	{	
+		if(! bhavehydro_ )
+			write_gas_density(gh);
+		
+		if( cf_.getValueSafe<bool>("output","ramses_nml",true) )
+			write_ramses_namelist(gh);
+        else if( cf_.getValueSafe<bool>("output","ramses_old_nml",false) )
+			write_ramses_namelist_old(gh);
+      
+        if( gh.levelmin() != gh.levelmax() )
+          write_refinement_mask( gh );
+	}
+	
+	void write_dm_mass( const grid_hierarchy& gh )
+	{	/* do nothing, not used... */ }
+	
+	void write_dm_potential( const grid_hierarchy& gh )
+	{	/* do nothing, not used... */ }
+	
+	void write_gas_potential( const grid_hierarchy& gh )
+	{	/* do nothing, not used... */ }
+	
+	void write_gas_position( int coord, const grid_hierarchy& gh )
+	{	/* do nothing, not used... */ }
+	
+	void finalize( void )
+	{	}
+	
+};
+
+namespace{
+	output_plugin_creator_concrete<grafic2_output_plugin> creator("grafic2");
+}
+
--- a/plugins/output_tipsy.cc
+++ b/plugins/output_tipsy.cc
--- a/plugins/output_tipsy_resample.cc
+++ b/plugins/output_tipsy_resample.cc
--- a/plugins/point_file_reader.hh
+++ b/plugins/point_file_reader.hh
@ -0,0 +1,135 @@
+#ifndef POINT_FILE_READER_HH
+#define POINT_FILE_READER_HH
+
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include "log.hh"
+
+struct point_reader{
+    
+    int num_columns;
+    
+    point_reader( void )
+    : num_columns( 0 )
+    { }
+    
+    bool isFloat( std::string myString )
+    {
+        std::istringstream iss(myString);
+        double f;
+        //iss >> std::noskipws >> f; // noskipws considers leading whitespace invalid
+        // Check the entire string was consumed and if either failbit or badbit is set
+        iss >> f;
+        return iss.eof() && !iss.fail();
+    }
+    
+    template< typename real_t >
+    void read_points_from_file( std::string fname, float vfac_, std::vector<real_t>& p )
+    {
+        std::ifstream ifs(fname.c_str());
+        if( !ifs )
+        {
+            LOGERR("region_ellipsoid_plugin::read_points_from_file : Could not open file \'%s\'",fname.c_str());
+            throw std::runtime_error("region_ellipsoid_plugin::read_points_from_file : cannot open point file.");
+        }
+        
+        int colcount = 0, colcount1 = 0, row = 0;
+        p.clear();
+        
+        while( ifs )
+        {
+            std::string s;
+            if( !getline(ifs,s) )break;
+            std::stringstream ss(s);
+            colcount1 = 0;
+            while(ss)
+            {
+                if( !getline(ss,s,' ') ) break;
+                if( !isFloat( s ) ) continue;
+                p.push_back( strtod(s.c_str(),NULL) );
+                
+                if( row == 0 )
+                    colcount++;
+                else
+                    colcount1++;
+            }
+            ++row;
+            
+            if( row>1 && colcount != colcount1 )
+                LOGERR("error on line %d of input file",row);
+            
+            //std::cout << std::endl;
+        }
+        
+        LOGINFO("region point file appears to contain %d columns",colcount);
+        
+        if( p.size()%3 != 0 && p.size()%6 != 0 )
+        {
+            LOGERR("Region point file \'%s\' does not contain triplets (%d elems)",fname.c_str(),p.size());
+            throw std::runtime_error("region_ellipsoid_plugin::read_points_from_file : file does not contain triplets.");
+        }
+        
+        
+        double x0[3] = { p[0],p[1],p[2] }, dx;
+        
+        if( colcount == 3 )
+        {
+            // only positions are given
+            
+            for( size_t i=3; i<p.size(); i+=3 )
+            {
+                for( size_t j=0; j<3; ++j )
+                {
+                    dx = p[i+j]-x0[j];
+                    if( dx < -0.5 ) dx += 1.0;
+                    else if( dx > 0.5 ) dx -= 1.0;
+                    p[i+j] = x0[j] + dx;
+                }
+            }
+        }
+        else if( colcount == 6 )
+        {
+            // positions and velocities are given
+            
+            //... include the velocties to unapply Zeldovich approx.
+            
+            for( size_t j=3; j<6; ++j )
+            {
+                dx = (p[j-3]-p[j]/vfac_)-x0[j-3];
+                if( dx < -0.5 ) dx += 1.0;
+                else if( dx > 0.5 ) dx -= 1.0;
+                p[j] = x0[j-3] + dx;
+            }
+            
+            for( size_t i=6; i<p.size(); i+=6 )
+            {
+                for( size_t j=0; j<3; ++j )
+                {
+                    dx = p[i+j]-x0[j];
+                    if( dx < -0.5 ) dx += 1.0;
+                    else if( dx > 0.5 ) dx -= 1.0;
+                    p[i+j] = x0[j] + dx;
+                }
+                
+                for( size_t j=3; j<6; ++j )
+                {
+                    dx = (p[i+j-3]-p[i+j]/vfac_)-x0[j-3];
+                    if( dx < -0.5 ) dx += 1.0;
+                    else if( dx > 0.5 ) dx -= 1.0;
+                    p[i+j] = x0[j-3] + dx;
+                }
+            }
+        }
+        else
+            LOGERR("Problem interpreting the region point file \'%s\'", fname.c_str() );
+        
+        num_columns = colcount;
+    }
+    
+    
+};
+
+
+#endif
--- a/plugins/region_convex_hull.cc
+++ b/plugins/region_convex_hull.cc
@ -0,0 +1,144 @@
+#include <vector>
+#include <iostream>
+#include <cmath>
+#include <cassert>
+#include <fstream>
+#include <sstream>
+#include <cctype>
+#include <algorithm>
+
+#include "region_generator.hh"
+#include "convex_hull.hh"
+#include "point_file_reader.hh"
+
+
+//! Convex hull region plugin
+class region_convex_hull_plugin : public region_generator_plugin{
+private:
+    
+    convex_hull<double> *phull_;
+    int shift[3], shift_level, padding_;
+    double vfac_;
+    bool do_extra_padding_;
+    
+    void apply_shift( size_t Np, double *p, int *shift, int levelmin )
+    {
+        double dx = 1.0/(1<<levelmin);
+        LOGINFO("unapplying shift of previous zoom region to region particles :\n" \
+                "\t [%d,%d,%d] = (%f,%f,%f)",shift[0],shift[1],shift[2],shift[0]*dx,shift[1]*dx,shift[2]*dx);
+        
+        for( size_t i=0,i3=0; i<Np; i++,i3+=3 )
+            for( size_t j=0; j<3; ++j )
+                p[i3+j] = p[i3+j]-shift[j]*dx;
+    }
+    
+public:
+    explicit region_convex_hull_plugin( config_file& cf )
+    : region_generator_plugin( cf )
+    {
+        std::vector<double> pp;
+        
+        vfac_ = cf.getValue<double>("cosmology","vfact");
+        padding_ = cf.getValue<int>("setup","padding");
+        
+        
+        std::string point_file = cf.getValue<std::string>("setup","region_point_file");
+        
+        point_reader pfr;
+        pfr.read_points_from_file( point_file, vfac_, pp );
+        
+        if( cf.containsKey("setup","region_point_shift") )
+        {
+            std::string point_shift = cf.getValue<std::string>("setup","region_point_shift");
+            sscanf( point_shift.c_str(), "%d,%d,%d", &shift[0],&shift[1],&shift[2] );
+            unsigned point_levelmin = cf.getValue<unsigned>("setup","region_point_levelmin");
+            
+            apply_shift( pp.size()/3, &pp[0], shift, point_levelmin );
+            shift_level = point_levelmin;
+        }
+        
+        // compute the convex hull
+        phull_ =  new convex_hull<double>(  &pp[0], pp.size()/3 );
+        
+        //expand the ellipsoid by one grid cell
+        unsigned levelmax = cf.getValue<unsigned>("setup","levelmax");
+        double dx = 1.0/(1ul<<levelmax);
+        phull_->expand( sqrt(3.)*dx );
+        
+        // output the center
+        float c[3] = { phull_->centroid_[0], phull_->centroid_[1], phull_->centroid_[2] };
+        LOGINFO("Region center from convex hull centroid determined at\n\t (%f,%f,%f)",c[0],c[1],c[2]);
+        
+        //-----------------------------------------------------------------
+        // when querying the bounding box, do we need extra padding?
+        do_extra_padding_ = false;
+        
+        // conditions should be added here
+        {
+            std::string output_plugin = cf.getValue<std::string>("output","format");
+            if( output_plugin == std::string("grafic2") )
+                do_extra_padding_ = true;
+        }
+    }
+    
+    ~region_convex_hull_plugin()
+    {
+        delete phull_;
+    }
+    
+    void get_AABB( double *left, double *right, unsigned level )
+    {
+        for( int i=0; i<3; ++i )
+        {
+            left[i] = phull_->left_[i];
+            right[i] = phull_->right_[i];
+        }
+        double dx = 1.0/(1ul<<level);
+        double pad = (double)(padding_+1) * dx;
+        
+        if( ! do_extra_padding_ ) pad = 0.0;
+        
+        double ext = sqrt(3)*dx + pad;
+        
+        for( int i=0;i<3;++i )
+        {
+            left[i]  -= ext;
+            right[i] += ext;
+        }
+        
+    }
+    
+    void update_AABB( double *left, double *right )
+    {
+        // we ignore this, the grid generator must have generated a grid that contains the ellipsoid
+        // it might have enlarged it, but who cares...
+    }
+
+    bool query_point( double *x )
+    {   return phull_->check_point( x );   }
+    
+    bool is_grid_dim_forced( size_t* ndims )
+    {   return false;   }
+    
+    void get_center( double *xc )
+    {
+        xc[0] = phull_->centroid_[0];
+        xc[1] = phull_->centroid_[1];
+        xc[2] = phull_->centroid_[2];
+    }
+    
+    void get_center_unshifted( double *xc )
+    {
+        double dx = 1.0/(1<<shift_level);
+        float c[3] = { phull_->centroid_[0], phull_->centroid_[1], phull_->centroid_[2] };
+        xc[0] = c[0]+shift[0]*dx;
+        xc[1] = c[1]+shift[1]*dx;
+        xc[2] = c[2]+shift[2]*dx;
+        
+    }
+};
+
+namespace{
+    region_generator_plugin_creator_concrete< region_convex_hull_plugin > creator("convex_hull");
+}
+
--- a/plugins/region_ellipsoid.cc
+++ b/plugins/region_ellipsoid.cc
@ -0,0 +1,681 @@
+#include <vector>
+#include <iostream>
+#include <cmath>
+#include <cassert>
+#include <fstream>
+#include <sstream>
+#include <cctype>
+#include <algorithm>
+
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_eigen.h>
+
+#include "region_generator.hh"
+
+
+/***** Math helper functions ******/
+
+//! return square of argument
+template <typename X>
+inline X sqr( X x )
+{ return x*x; }
+
+//! Determinant of 3x3 matrix
+inline double Determinant_3x3( const float *data )
+{
+    float detS = data[0]*(data[4]*data[8]-data[7]*data[5])
+    - data[1]*(data[3]*data[8]-data[5]*data[6])
+    + data[2]*(data[3]*data[7]-data[4]*data[6]);
+    
+    return detS;
+}
+
+//! Inverse of 3x3 matrix
+inline void Inverse_3x3( const float *data, float *m )
+{
+    float invdet = 1.0f/Determinant_3x3( data );
+    
+    m[0] = (data[4]*data[8]-data[7]*data[5])*invdet;
+    m[1] = -(data[1]*data[8]-data[2]*data[7])*invdet;
+    m[2] = (data[1]*data[5]-data[2]*data[4])*invdet;
+    m[3] = -(data[3]*data[8]-data[5]*data[6])*invdet;
+    m[4] = (data[0]*data[8]-data[2]*data[6])*invdet;
+    m[5] = -(data[0]*data[5]-data[2]*data[3])*invdet;
+    m[6] = (data[3]*data[7]-data[4]*data[6])*invdet;
+    m[7] = -(data[0]*data[7]-data[1]*data[6])*invdet;
+    m[8] = (data[0]*data[4]-data[1]*data[3])*invdet;
+}
+
+void Inverse_4x4( float *mat )
+{
+    double tmp[12]; /* temp array for pairs */
+    double src[16]; /* array of transpose source matrix */
+    double det; /* determinant */
+    double dst[16];
+                
+/* transpose matrix */
+    for (int i = 0; i < 4; i++)
+    {
+        src[i] = mat[i*4];
+        src[i + 4] = mat[i*4 + 1];
+        src[i + 8] = mat[i*4 + 2];
+        src[i + 12] = mat[i*4 + 3];
+    }
+    
+    tmp[0] = src[10] * src[15];
+    tmp[1] = src[11] * src[14];
+    tmp[2] = src[9] * src[15];
+    tmp[3] = src[11] * src[13];
+    tmp[4] = src[9] * src[14];
+    tmp[5] = src[10] * src[13];
+    tmp[6] = src[8] * src[15];
+    tmp[7] = src[11] * src[12];
+    tmp[8] = src[8] * src[14];
+    tmp[9] = src[10] * src[12];
+    tmp[10] = src[8] * src[13];
+    tmp[11] = src[9] * src[12];
+    
+    /* calculate first 8 elements (cofactors) */
+    dst[0] = tmp[0]*src[5] + tmp[3]*src[6] + tmp[4]*src[7];
+    dst[0] -= tmp[1]*src[5] + tmp[2]*src[6] + tmp[5]*src[7];
+    dst[1] = tmp[1]*src[4] + tmp[6]*src[6] + tmp[9]*src[7];
+    dst[1] -= tmp[0]*src[4] + tmp[7]*src[6] + tmp[8]*src[7];
+    dst[2] = tmp[2]*src[4] + tmp[7]*src[5] + tmp[10]*src[7];
+    dst[2] -= tmp[3]*src[4] + tmp[6]*src[5] + tmp[11]*src[7];
+    dst[3] = tmp[5]*src[4] + tmp[8]*src[5] + tmp[11]*src[6];
+    dst[3] -= tmp[4]*src[4] + tmp[9]*src[5] + tmp[10]*src[6];
+    dst[4] = tmp[1]*src[1] + tmp[2]*src[2] + tmp[5]*src[3];
+    dst[4] -= tmp[0]*src[1] + tmp[3]*src[2] + tmp[4]*src[3];
+    dst[5] = tmp[0]*src[0] + tmp[7]*src[2] + tmp[8]*src[3];
+    dst[5] -= tmp[1]*src[0] + tmp[6]*src[2] + tmp[9]*src[3];
+    dst[6] = tmp[3]*src[0] + tmp[6]*src[1] + tmp[11]*src[3];
+    dst[6] -= tmp[2]*src[0] + tmp[7]*src[1] + tmp[10]*src[3];
+    dst[7] = tmp[4]*src[0] + tmp[9]*src[1] + tmp[10]*src[2];
+    dst[7] -= tmp[5]*src[0] + tmp[8]*src[1] + tmp[11]*src[2];
+    
+    /* calculate pairs for second 8 elements (cofactors) */
+    tmp[0] = src[2]*src[7];
+    tmp[1] = src[3]*src[6];
+    tmp[2] = src[1]*src[7];
+    tmp[3] = src[3]*src[5];
+    tmp[4] = src[1]*src[6];
+    tmp[5] = src[2]*src[5];
+    tmp[6] = src[0]*src[7];
+    tmp[7] = src[3]*src[4];
+    tmp[8] = src[0]*src[6];
+    tmp[9] = src[2]*src[4];
+    tmp[10] = src[0]*src[5];
+    tmp[11] = src[1]*src[4];
+    
+    /* calculate second 8 elements (cofactors) */
+    dst[8] = tmp[0]*src[13] + tmp[3]*src[14] + tmp[4]*src[15];
+    dst[8] -= tmp[1]*src[13] + tmp[2]*src[14] + tmp[5]*src[15];
+    dst[9] = tmp[1]*src[12] + tmp[6]*src[14] + tmp[9]*src[15];
+    dst[9] -= tmp[0]*src[12] + tmp[7]*src[14] + tmp[8]*src[15];
+    dst[10] = tmp[2]*src[12] + tmp[7]*src[13] + tmp[10]*src[15];
+    dst[10]-= tmp[3]*src[12] + tmp[6]*src[13] + tmp[11]*src[15];
+    dst[11] = tmp[5]*src[12] + tmp[8]*src[13] + tmp[11]*src[14];
+    dst[11]-= tmp[4]*src[12] + tmp[9]*src[13] + tmp[10]*src[14];
+    dst[12] = tmp[2]*src[10] + tmp[5]*src[11] + tmp[1]*src[9];
+    dst[12]-= tmp[4]*src[11] + tmp[0]*src[9] + tmp[3]*src[10];
+    dst[13] = tmp[8]*src[11] + tmp[0]*src[8] + tmp[7]*src[10];
+    dst[13]-= tmp[6]*src[10] + tmp[9]*src[11] + tmp[1]*src[8];
+    dst[14] = tmp[6]*src[9] + tmp[11]*src[11] + tmp[3]*src[8];
+    dst[14]-= tmp[10]*src[11] + tmp[2]*src[8] + tmp[7]*src[9];
+    dst[15] = tmp[10]*src[10] + tmp[4]*src[8] + tmp[9]*src[9];
+    dst[15]-= tmp[8]*src[9] + tmp[11]*src[10] + tmp[5]*src[8];
+    
+    /* calculate determinant */
+    det=src[0]*dst[0]+src[1]*dst[1]+src[2]*dst[2]+src[3]*dst[3];
+    
+    /* calculate matrix inverse */
+    det = 1/det;
+    for (int j = 0; j < 16; j++)
+    {    dst[j] *= det;
+        mat[j] = dst[j];
+    }
+  
+}
+
+/***** Minimum Volume Bounding Ellipsoid Implementation ******/
+/*
+ * Finds the minimum volume enclosing ellipsoid (MVEE) of a set of data
+ * points stored in matrix P. The following optimization problem is solved:
+ *
+ *     minimize log(det(A))
+ *     s.t. (P_i - c)'*A*(P_i - c)<= 1
+ *
+ * in variables A and c, where P_i is the i-th column of the matrix P.
+ * The solver is based on Khachiyan Algorithm, and the final solution is
+ * different from the optimal value by the pre-specified amount of 'tolerance'.
+ *
+ * The ellipsoid equation is given in the canonical form
+ *     (x-c)' A (x-c) <= 1
+ *
+ * Code was adapted from the MATLAB version by Nima Moshtagh (nima@seas.upenn.edu)
+ */
+class min_ellipsoid
+{
+protected:
+    size_t N;
+    float X[16];
+    float c[3];
+    float A[9], Ainv[9];
+    float *Q;
+    float *u;
+    
+    float v1[3],v2[3],v3[3],r1,r2,r3;
+    
+    float V[9], mu[3];
+    
+    bool axes_computed;
+    
+    void compute_axes( void )
+    {
+        gsl_vector     *eval;
+        gsl_matrix     *evec;
+        gsl_eigen_symmv_workspace *w;
+        
+        eval = gsl_vector_alloc(3);
+        evec = gsl_matrix_alloc(3, 3);
+        
+        w = gsl_eigen_symmv_alloc(3);
+        
+        // promote to double, GSL wants double
+        double dA[9];
+        for( int i=0; i<9; ++i ) dA[i] = (double)A[i];
+        
+        gsl_matrix_view	m = gsl_matrix_view_array( dA, 3, 3);
+        gsl_eigen_symmv( &m.matrix, eval, evec, w);
+        
+        gsl_eigen_symmv_sort( eval, evec, GSL_EIGEN_SORT_VAL_ASC);
+        
+        gsl_vector_view evec_i;
+        
+        for( int i=0; i<3; ++i )
+        {
+            mu[i] = gsl_vector_get(eval, i);
+            evec_i = gsl_matrix_column (evec, i);
+            for( int j=0; j<3; ++j )
+                V[3*i+j] = gsl_vector_get(&evec_i.vector,j);
+        }
+        
+        r1 = 1.0 / sqrt( gsl_vector_get(eval, 0) );
+        r2 = 1.0 / sqrt( gsl_vector_get(eval, 1) );
+        r3 = 1.0 / sqrt( gsl_vector_get(eval, 2) );
+        
+        evec_i = gsl_matrix_column (evec, 0);
+        v1[0] = gsl_vector_get(&evec_i.vector,0);
+        v1[1] = gsl_vector_get(&evec_i.vector,1);
+        v1[2] = gsl_vector_get(&evec_i.vector,2);
+        
+        evec_i = gsl_matrix_column (evec, 1);
+        v2[0] = gsl_vector_get(&evec_i.vector,0);
+        v2[1] = gsl_vector_get(&evec_i.vector,1);
+        v2[2] = gsl_vector_get(&evec_i.vector,2);
+        
+        evec_i = gsl_matrix_column (evec, 2);
+        v3[0] = gsl_vector_get(&evec_i.vector,0);
+        v3[1] = gsl_vector_get(&evec_i.vector,1);
+        v3[2] = gsl_vector_get(&evec_i.vector,2);
+        
+        gsl_vector_free(eval);
+        gsl_matrix_free(evec);
+        gsl_eigen_symmv_free (w);
+        
+        axes_computed = true;
+    }
+  
+    // use the Khachiyan Algorithm to find the minimum bounding ellipsoid
+    void compute( double tol = 0.001, int maxit = 10000 )
+    {
+        double err = 10.0 * tol;
+        float *unew = new float[N];
+        int count = 0;
+        double temp;
+        
+        while( err > tol && count < maxit )
+        {
+            for( int i=0; i<4; ++i )
+                for( int j=0,i4=4*i; j<4; ++j )
+                {
+                    const int k = i4+j;
+                    temp = 0.0;
+                    for( size_t l=0; l<N; ++l )
+                        temp += (double)(Q[4*l+i] * u[l] * Q[4*l+j]);
+                    X[k] = temp;
+                }
+            
+            Inverse_4x4(X);
+            
+            int imax = 0; float Mmax = -1e30;
+            double m;
+            for( size_t i=0; i<N; ++i )
+            {
+                m = 0.0;
+                for( int k=0; k<4; ++k )
+                    for( int l=0; l<4; ++l )
+                        m += (double)(Q[4*i+k] * X[4*l+k] * Q[4*i+l]);
+                if( m > Mmax )
+                {
+                    imax = i;
+                    Mmax = m;
+                }
+            }
+            
+            float step_size = (Mmax-4.0f)/(4.0f*(Mmax-1.0f)), step_size1 = 1.0f-step_size;
+            for( size_t i=0; i<N; ++i )
+                unew[i] = u[i] * step_size1;
+            unew[imax] += step_size;
+            
+            err = 0.0;
+            for( size_t i=0; i<N; ++i )
+            {
+                err += sqr(unew[i]-u[i]);
+                u[i] = unew[i];
+            }
+            err = sqrt(err);
+            ++count;
+        }
+        
+        if( count >= maxit )
+            LOGERR("No convergence in min_ellipsoid::compute: maximum number of iterations reached!");
+        
+        delete[] unew;
+    }
+    
+public:
+    min_ellipsoid( size_t N_, double* P )
+    : N( N_ ), axes_computed( false )
+    {
+        // --- initialize ---
+        LOGINFO("computing minimum bounding ellipsoid from %lld points",N);
+      
+        Q = new float[4*N];
+        u = new float[N];
+        
+        for( size_t i=0; i<N; ++i )
+            u[i] = 1.0/N;
+        
+        for( size_t i=0; i<N; ++i )
+        {
+            int i4=4*i, i3=3*i;
+            for( size_t j=0; j<3; ++j )
+                Q[i4+j] = P[i3+j];
+            Q[i4+3] = 1.0f;
+        }
+        
+        //--- compute the actual ellipsoid using the Khachiyan Algorithm ---
+        compute();
+        
+        //--- determine the ellipsoid A matrix ---
+        double Pu[3];
+        for( int j=0; j<3; ++j )
+        {
+            Pu[j] = 0.0;
+            for( size_t i=0; i<N; ++i )
+                Pu[j] += P[3*i+j] * u[i];
+        }
+      
+        // determine center
+        c[0] = Pu[0]; c[1] = Pu[1]; c[2] = Pu[2];
+      
+        // need to do summation in double precision due to
+        // possible catastrophic cancellation issues when
+        // using many input points
+        double Atmp[9];
+        for( int i=0; i<3; ++i )
+            for( int j=0,i3=3*i; j<3; ++j )
+            {
+                const int k = i3+j;
+                Atmp[k] = 0.0;
+                for( size_t l=0; l<N; ++l )
+                    Atmp[k] += P[3*l+i] * u[l] * P[3*l+j];
+                Atmp[k] -= Pu[i]*Pu[j];
+            }
+      
+        for( int i=0;i<9;++i)
+          Ainv[i] = Atmp[i];
+        
+        Inverse_3x3( Ainv, A );
+        for( size_t i=0; i<9; ++i ){ A[i] /= 3.0; Ainv[i] *= 3.0; }
+    }
+    
+    min_ellipsoid( const double* A_, const double *c_ )
+    : N( 0 ), axes_computed( false )
+    {
+        for( int i=0; i<9; ++i )
+        {   A[i] = A_[i]; Ainv[i] = 0.0;   }
+        for( int i=0; i<3; ++i )
+            c[i] = c_[i];
+    }
+    
+    ~min_ellipsoid()
+    {
+        delete[] u;
+        delete[] Q;
+    }
+    
+    template<typename T>
+    bool check_point( const T *x )
+    {
+        float q[3] = {x[0]-c[0],x[1]-c[1],x[2]-c[2]};
+        
+        double r = 0.0;
+        for( int i=0; i<3; ++i )
+            for( int j=0; j<3; ++j )
+                r += q[i]*A[3*j+i]*q[j];
+        
+        return r <= 1.0;
+    }
+    
+    void print( void )
+    {
+        std::cout << "A = \n";
+        for( int i=0; i<9; ++i )
+        {
+            if( i%3==0 ) std::cout << std::endl;
+            std::cout << A[i] << "   ";
+        }
+        std::cout << std::endl;
+        std::cout << "Ainv = \n";
+        for( int i=0; i<9; ++i )
+        {
+            if( i%3==0 ) std::cout << std::endl;
+            std::cout << Ainv[i] << "   ";
+        }
+        std::cout << std::endl;
+        std::cout << "c = (" << c[0] << ", " << c[1] << ", " << c[2] << ")\n";
+    }
+    
+    template<typename T>
+    void get_AABB( T *left, T *right )
+    {
+        for( int i=0; i<3; ++i )
+        {
+            left[i]  = c[i] - sqrt(Ainv[3*i+i]);
+            right[i] = c[i] + sqrt(Ainv[3*i+i]);
+        }
+    }
+    
+    void get_center( float* xc )
+    {
+        for( int i=0; i<3; ++i ) xc[i] = c[i];
+    }
+  
+    void get_matrix( float* AA )
+    {
+        for( int i=0; i<9; ++i ) AA[i] = A[i];
+    }
+    
+    double sgn( double x )
+    {
+      if( x < 0.0 ) return -1.0;
+      return 1.0;
+    }
+  
+    void expand_ellipsoid( float dr )
+    {
+      //print();
+        
+        
+        
+        
+        if( !axes_computed )
+        {
+            std::cerr << "computing axes.....\n";
+            compute_axes();
+        }
+        
+        float munew[3];
+        for( int i=0; i<3; ++i )
+          munew[i] = sgn(mu[i])/sqr(1.0/sqrt(fabs(mu[i]))+dr);
+          
+      
+        float Anew[9];
+        for(int i=0; i<3; ++i )
+            for( int j=0; j<3; ++j )
+            {
+                Anew[3*i+j] = 0.0;
+                for( int k=0; k<3; ++k )
+                    Anew[3*i+j] += V[3*k+i] * munew[k] * V[3*k+j];
+            }
+        
+        for( int i=0; i<9; ++i )
+            A[i] = Anew[i];
+        
+        Inverse_3x3( A, Ainv );
+      
+      //print();
+    }
+};
+
+
+#include "point_file_reader.hh"
+#include "convex_hull.hh"
+
+//! Minimum volume enclosing ellipsoid plugin
+class region_ellipsoid_plugin : public region_generator_plugin{
+private:
+    
+    min_ellipsoid *pellip_;
+    int shift[3], shift_level, padding_;
+    double vfac_;
+    bool do_extra_padding_;
+  
+    
+    
+    void apply_shift( size_t Np, double *p, int *shift, int levelmin )
+    {
+        double dx = 1.0/(1<<levelmin);
+        LOGINFO("unapplying shift of previous zoom region to region particles :\n" \
+                "\t [%d,%d,%d] = (%f,%f,%f)",shift[0],shift[1],shift[2],shift[0]*dx,shift[1]*dx,shift[2]*dx);
+      
+        for( size_t i=0,i3=0; i<Np; i++,i3+=3 )
+            for( size_t j=0; j<3; ++j )
+                p[i3+j] = p[i3+j]-shift[j]*dx;
+    }
+  
+public:
+    explicit region_ellipsoid_plugin( config_file& cf )
+    : region_generator_plugin( cf )
+    {
+        std::vector<double> pp;
+        
+        // sanity check
+        if( !cf.containsKey("setup", "region_point_file") &&
+           !( cf.containsKey("setup","region_ellipsoid_matrix[0]") &&
+              cf.containsKey("setup","region_ellipsoid_matrix[1]") &&
+              cf.containsKey("setup","region_ellipsoid_matrix[2]") &&
+              cf.containsKey("setup","region_ellipsoid_center") ) )
+        {
+            LOGERR("Insufficient data for region=ellipsoid\n Need to specify either \'region_point_file\' or the ellipsoid equation.");
+            throw std::runtime_error("Insufficient data for region=ellipsoid");
+        }
+        //
+      
+        vfac_ = cf.getValue<double>("cosmology","vfact");
+        padding_ = cf.getValue<int>("setup","padding");
+        
+        std::string point_file;
+        bool bfrom_file = true;
+        
+        if( cf.containsKey("setup", "region_point_file") )
+        {
+            point_file = cf.getValue<std::string>("setup","region_point_file");
+            bfrom_file = true;
+            
+            point_reader pfr;
+            pfr.read_points_from_file( point_file, vfac_, pp );
+            
+            // if file has more than three columns, just take first three
+            // at the moment...
+            if( pfr.num_columns > 3 )
+            {
+                std::vector<double> xx;
+                xx.reserve( 3 * pp.size()/pfr.num_columns );
+                
+                for( size_t i=0; i<pp.size()/pfr.num_columns; ++i )
+                    for( size_t j=0; j<3; ++j )
+                        xx.push_back( pp[ pfr.num_columns * i + j ] );
+                
+                pp.swap( xx );
+            }
+            
+            
+            if( cf.containsKey("setup","region_point_shift") )
+            {
+                std::string point_shift = cf.getValue<std::string>("setup","region_point_shift");
+                sscanf( point_shift.c_str(), "%d,%d,%d", &shift[0],&shift[1],&shift[2] );
+                unsigned point_levelmin = cf.getValue<unsigned>("setup","region_point_levelmin");
+                
+                apply_shift( pp.size()/3, &pp[0], shift, point_levelmin );
+                shift_level = point_levelmin;
+            }
+            
+            pellip_ = new min_ellipsoid( pp.size()/3, &pp[0] );
+            
+            
+        } else {
+            double A[9] = {0}, c[3] = {0};
+            std::string strtmp;
+            
+            strtmp = cf.getValue<std::string>("setup","region_ellipsoid_matrix[0]");
+            sscanf( strtmp.c_str(), "%lf,%lf,%lf", &A[0],&A[1],&A[2] );
+            strtmp = cf.getValue<std::string>("setup","region_ellipsoid_matrix[1]");
+            sscanf( strtmp.c_str(), "%lf,%lf,%lf", &A[3],&A[4],&A[5] );
+            strtmp = cf.getValue<std::string>("setup","region_ellipsoid_matrix[2]");
+            sscanf( strtmp.c_str(), "%lf,%lf,%lf", &A[6],&A[7],&A[8] );
+            
+            strtmp = cf.getValue<std::string>("setup","region_ellipsoid_center");
+            sscanf( strtmp.c_str(), "%lf,%lf,%lf", &c[0],&c[1],&c[2] );
+            
+            pellip_ = new min_ellipsoid( A, c );
+            
+        }
+        
+        
+
+        if( false )
+        {
+            // compute convex hull and use only hull points to speed things up
+            // BUT THIS NEEDS MORE TESTING BEFORE IT GOES IN THE REPO
+            LOGINFO("Computing convex hull for %llu points", pp.size()/3 );
+            convex_hull<double> ch( &pp[0], pp.size()/3 );
+            std::set<int> unique;
+            ch.get_defining_indices( unique );
+            std::set<int>::iterator it = unique.begin();
+            
+            std::vector<double> pphull;
+            pphull.reserve( unique.size()*3 );
+            
+            while( it != unique.end() )
+            {
+                int idx = *it;
+                
+                pphull.push_back( pp[3*idx+0] );
+                pphull.push_back( pp[3*idx+1] );
+                pphull.push_back( pp[3*idx+2] );
+                
+                ++it;
+            }
+            
+            pphull.swap( pp );
+        }
+        
+        
+        // output the center
+        float c[3], A[9];
+        pellip_->get_center( c );
+        pellip_->get_matrix( A );
+        
+        LOGINFO("Region center for ellipsoid determined at\n\t xc = ( %f %f %f )",c[0],c[1],c[2]);
+        LOGINFO("Ellipsoid matrix determined as\n\t      ( %f %f %f )\n\t  A = ( %f %f %f )\n\t      ( %f %f %f )",
+                A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[8] );
+        
+        
+        
+        //expand the ellipsoid by one grid cell
+      
+        unsigned levelmax = cf.getValue<unsigned>("setup","levelmax");
+        double dx = 1.0/(1ul<<levelmax);
+        pellip_->expand_ellipsoid( dx );
+        
+        
+        
+        //-----------------------------------------------------------------
+        // when querying the bounding box, do we need extra padding?
+        do_extra_padding_ = false;
+      
+        // conditions should be added here
+        {
+          std::string output_plugin = cf.getValue<std::string>("output","format");
+          if( output_plugin == std::string("grafic2") )
+            do_extra_padding_ = true;
+        }
+    }
+    
+    ~region_ellipsoid_plugin()
+    {
+        delete pellip_;
+    }
+    
+    void get_AABB( double *left, double *right, unsigned level )
+    {
+        pellip_->get_AABB( left, right );
+      
+        double dx = 1.0/(1ul<<level);
+        double pad = (double)(padding_+1) * dx;
+      
+        if( ! do_extra_padding_ ) pad = 0.0;
+      
+        double ext = sqrt(3)*dx + pad;
+      
+        for( int i=0;i<3;++i )
+        {
+            left[i]  -= ext;
+            right[i] += ext;
+        }
+        
+    }
+  
+    void update_AABB( double *left, double *right )
+    {
+      // we ignore this, the grid generator must have generated a grid that contains the ellipsoid
+      // it might have enlarged it, but who cares...
+    }
+  
+    bool query_point( double *x )
+    {   return pellip_->check_point( x );   }
+    
+    bool is_grid_dim_forced( size_t* ndims )
+    {   return false;   }
+    
+    void get_center( double *xc )
+    {
+        float c[3];
+        pellip_->get_center( c );
+        
+        xc[0] = c[0];
+        xc[1] = c[1];
+        xc[2] = c[2];
+    }
+
+  void get_center_unshifted( double *xc )
+  {
+    double dx = 1.0/(1<<shift_level);
+    float c[3];
+
+    pellip_->get_center( c );        
+
+    xc[0] = c[0]+shift[0]*dx;
+    xc[1] = c[1]+shift[1]*dx;
+    xc[2] = c[2]+shift[2]*dx;
+
+  }
+
+};
+
+namespace{
+    region_generator_plugin_creator_concrete< region_ellipsoid_plugin > creator("ellipsoid");
+}
--- a/plugins/transfer_bbks.cc
+++ b/plugins/transfer_bbks.cc
@ -0,0 +1,85 @@
+/*
+ 
+ transfer_bbks.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include "transfer_function.hh"
+
+//! Implementation of class TransferFunction_BBKS for the BBKS transfer function 
+/*!
+ This class implements the analytical fit to the matter transfer
+ function by Bardeen, Bond, Kaiser & Szalay (BBKS).
+ ( see Bardeen et al. (1986) )
+ */
+class transfer_bbks_plugin : public transfer_function_plugin{
+private:
+	double      m_Gamma;
+	
+public:
+	//! Constructor
+	/*!
+	 \param aCosm Structure of type Cosmology carrying the cosmological parameters
+	 \param bSugiyama flag whether the Sugiyama (1995) correction shall be applied (default=true)
+	 */
+	transfer_bbks_plugin( config_file& cf )
+    : transfer_function_plugin( cf )
+	{  
+		double Omega0 = cosmo_.Omega_m;
+		double FreeGamma = -1.0;
+		
+		bool bSugiyama(true);
+		
+		try{
+			bSugiyama= pcf_->getValue<bool>( "cosmology", "sugiyama_corr" );
+		}catch(...){
+			throw std::runtime_error("Error in \'tranfer_bbks_plugin\': need to specify \'[cosmology]/sugiyama_corr = [true/false]");
+		}
+		
+		FreeGamma = pcf_->getValueSafe<double>( "cosmology", "gamma", FreeGamma );
+		
+		if( FreeGamma <= 0.0 ){
+			m_Gamma = Omega0*0.01*cosmo_.H0;
+			if( bSugiyama )
+				m_Gamma *= exp(-cosmo_.Omega_b*(1.0+sqrt(2.0*0.01*cosmo_.H0)/Omega0));
+		}else
+			m_Gamma = FreeGamma;
+		
+		tf_distinct_ = false;
+		tf_withvel_  = false;
+		
+	}
+	
+	//! computes the value of the BBKS transfer function for mode k (in h/Mpc)
+	inline double compute( double k, tf_type type ){
+		double q, f1, f2;
+		
+		if(k < 1e-7 )
+			return 1.0;
+		
+		q = k/(m_Gamma);
+		f1 = log(1.0 + 2.34*q)/(2.34*q);
+		f2 = 1.0 + q*(3.89 + q*(259.21 + q*(162.771336 + q*2027.16958081)));
+		
+		return f1/sqrt(sqrt(f2));
+		
+	}
+	
+	inline double get_kmin( void ){
+		return 1e-4;
+	}
+	
+	inline double get_kmax( void ){
+		return 1.e4;
+	}
+};
+
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_bbks_plugin > creator("bbks");
+}
+
--- a/plugins/transfer_camb.cc
+++ b/plugins/transfer_camb.cc
@ -0,0 +1,216 @@
+/*
+ 
+ transfer_camb.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+#include "transfer_function.hh"
+
+class transfer_CAMB_plugin : public transfer_function_plugin
+{
+	
+private:
+	std::string m_filename_Pk, m_filename_Tk;
+	std::vector<double> m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon;
+	gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon;
+	gsl_spline *spline_tot, *spline_cdm, *spline_baryon;
+	
+    double m_kmin, m_kmax;
+    unsigned m_nlines;
+	
+	void read_table( void ){
+#ifdef WITH_MPI
+		if( MPI::COMM_WORLD.Get_rank() == 0 ){
+#endif
+			std::cerr 
+			<< " - reading tabulated transfer function data from file \n"
+			<< "    \'" << m_filename_Tk << "\'\n";
+			
+			std::string line;
+			std::ifstream ifs( m_filename_Tk.c_str() );
+			
+			if(! ifs.good() )
+				throw std::runtime_error("Could not find transfer function file \'"+m_filename_Tk+"\'");
+			
+			m_tab_k.clear();
+			m_tab_Tk_tot.clear();
+			m_tab_Tk_cdm.clear();
+			m_tab_Tk_baryon.clear();
+          
+            m_kmin = 1e30;
+            m_kmax = -1e30;
+            m_nlines = 0;
+			
+			while( !ifs.eof() ){
+				getline(ifs,line);
+				
+				if(ifs.eof()) break;
+				
+				std::stringstream ss(line);
+				
+				double k, Tkc, Tkb, Tkg, Tkr, Tknu, Tktot;
+				ss >> k;
+				ss >> Tkc;
+				ss >> Tkb;
+				ss >> Tkg;
+				ss >> Tkr;
+				ss >> Tknu;
+				ss >> Tktot;
+              
+                if( k < m_kmin ) m_kmin = k;
+                if( k > m_kmax ) m_kmax = k;
+				
+				m_tab_k.push_back( log10(k) );
+				
+				m_tab_Tk_tot.push_back( log10(Tktot) );
+				m_tab_Tk_baryon.push_back( log10(Tkb) );
+				m_tab_Tk_cdm.push_back( log10(Tkc) );
+                ++m_nlines;
+			}
+			
+			ifs.close();
+			
+			
+			
+			
+#ifdef WITH_MPI
+		}
+		
+		unsigned n=m_tab_k.size();
+		MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 );
+		
+		if( MPI::COMM_WORLD.Get_rank() > 0 ){
+			m_tab_k.assign(n,0);
+			m_tab_Tk_tot.assign(n,0);
+			m_tab_Tk_cdm.assign(n,0);
+			m_tab_Tk_baryon.assign(n,0);
+
+		}
+		
+		MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_tot[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0 );
+
+#endif
+		
+	}
+	
+public:
+	transfer_CAMB_plugin( config_file& cf )
+	: transfer_function_plugin( cf )
+	{
+		m_filename_Tk = pcf_->getValue<std::string>("cosmology","transfer_file");
+		
+		read_table( );
+		
+		acc_tot = gsl_interp_accel_alloc();
+		acc_cdm = gsl_interp_accel_alloc();
+		acc_baryon = gsl_interp_accel_alloc();
+		
+		
+		spline_tot = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_cdm = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_baryon = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		
+		gsl_spline_init (spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size() );
+		gsl_spline_init (spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size() );
+		gsl_spline_init (spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0], m_tab_k.size() );
+		
+		tf_distinct_ = true;
+		tf_withvel_  = false;
+	}
+	
+	~transfer_CAMB_plugin()
+	{
+		gsl_spline_free (spline_tot);
+		gsl_spline_free (spline_cdm);
+		gsl_spline_free (spline_baryon);
+		
+		gsl_interp_accel_free (acc_tot);
+		gsl_interp_accel_free (acc_cdm);
+		gsl_interp_accel_free (acc_baryon);
+	}
+
+    // linear interpolation in log-log
+    inline double extrap_right( double k, const tf_type& type )
+    {
+      double v1(1.0), v2(1.0);
+      
+      int n=m_tab_k.size()-1, n1=n-1;
+      switch( type )
+      {
+        case cdm:
+          v1 = m_tab_Tk_cdm[n1];
+          v2 = m_tab_Tk_cdm[n];
+          break;
+        case baryon:
+          v1 = m_tab_Tk_baryon[n1];
+          v2 = m_tab_Tk_baryon[n];
+          break;
+        case vcdm:
+        case vbaryon:
+        case total:
+          v1 = m_tab_Tk_tot[n1];
+          v2 = m_tab_Tk_tot[n];
+          break;
+          
+        default:
+          throw std::runtime_error("Invalid type requested in transfer function evaluation");
+      }
+      
+      double lk = log10(k);
+      double dk = m_tab_k[n]-m_tab_k[n1];
+      double delk = lk-m_tab_k[n];
+      
+      return pow(10.0,(v2-v1)/dk*(delk)+v2);
+    }
+    
+	inline double compute( double k, tf_type type )
+    {
+	    // use constant interpolation on the left side of the tabulated values
+        if( k < m_kmin )
+        {
+          if( type == cdm )
+            return pow(10.0,m_tab_Tk_cdm[0]);
+          
+          else if( type == baryon )
+            return pow(10.0,m_tab_Tk_baryon[0]);
+          
+          return pow(10.0,m_tab_Tk_tot[0]);
+          
+        }
+        // use linear interpolation on the right side of the tabulated values
+        else if( k>m_kmax )
+          return extrap_right( k, type );
+          
+      
+        double lk = log10(k);
+		if( type == cdm )
+			return pow(10.0, gsl_spline_eval (spline_cdm, lk, acc_cdm) );
+
+		if( type == baryon )
+			return pow(10.0, gsl_spline_eval (spline_baryon, lk, acc_baryon) );
+		
+		return pow(10.0, gsl_spline_eval (spline_tot, lk, acc_tot) );
+	}
+	
+	inline double get_kmin( void ){
+		return pow(10.0,m_tab_k[1]);
+	}
+	
+	inline double get_kmax( void ){
+		return pow(10.0,m_tab_k[m_tab_k.size()-2]);
+	}
+	
+};
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_CAMB_plugin > creator("camb_file");
+}
+
+
--- a/plugins/transfer_eisenstein.cc
+++ b/plugins/transfer_eisenstein.cc
@ -0,0 +1,325 @@
+/*
+ 
+ transfer_eisenstein.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ */
+
+#include "transfer_function.hh"
+
+// forward declaration of WDM class
+class transfer_eisenstein_wdm_plugin;
+
+
+struct eisenstein_transfer
+{
+  //Cosmology m_Cosmology;
+  double  m_h0;
+  double	omhh,		/* Omega_matter*h^2 */
+  obhh,		/* Omega_baryon*h^2 */
+  theta_cmb,	/* Tcmb in units of 2.7 K */
+  z_equality,	/* Redshift of matter-radiation equality, really 1+z */
+  k_equality,	/* Scale of equality, in Mpc^-1 */
+  z_drag,		/* Redshift of drag epoch */
+  R_drag,		/* Photon-baryon ratio at drag epoch */
+  R_equality,	/* Photon-baryon ratio at equality epoch */
+  sound_horizon,	/* Sound horizon at drag epoch, in Mpc */
+  k_silk,		/* Silk damping scale, in Mpc^-1 */
+  alpha_c,	/* CDM suppression */
+  beta_c,		/* CDM log shift */
+  alpha_b,	/* Baryon suppression */
+  beta_b,		/* Baryon envelope shift */
+  beta_node,	/* Sound horizon shift */
+  k_peak,		/* Fit to wavenumber of first peak, in Mpc^-1 */
+  sound_horizon_fit,	/* Fit to sound horizon, in Mpc */
+  alpha_gamma;	/* Gamma suppression in approximate TF */
+  
+  //! private member function: sets internal quantities for Eisenstein & Hu fitting
+  void TFset_parameters(double omega0hh, double f_baryon, double Tcmb)
+  /* Set all the scalars quantities for Eisenstein & Hu 1997 fitting formula */
+  /* Input: omega0hh -- The density of CDM and baryons, in units of critical dens,
+   multiplied by the square of the Hubble constant, in units
+   of 100 km/s/Mpc */
+  /* 	  f_baryon -- The fraction of baryons to CDM */
+  /*        Tcmb -- The temperature of the CMB in Kelvin.  Tcmb<=0 forces use
+   of the COBE value of  2.728 K. */
+  /* Output: Nothing, but set many global variables used in TFfit_onek().
+   You can access them yourself, if you want. */
+  /* Note: Units are always Mpc, never h^-1 Mpc. */
+  {
+    double z_drag_b1, z_drag_b2;
+    double alpha_c_a1, alpha_c_a2, beta_c_b1, beta_c_b2, alpha_b_G, y;
+    
+    if (f_baryon<=0.0 || omega0hh<=0.0) {
+      fprintf(stderr, "TFset_parameters(): Illegal input.\n");
+      exit(1);
+    }
+    omhh = omega0hh;
+    obhh = omhh*f_baryon;
+    if (Tcmb<=0.0) Tcmb=2.728;	/* COBE FIRAS */
+    theta_cmb = Tcmb/2.7;
+    
+    z_equality = 2.50e4*omhh/POW4(theta_cmb);  /* Really 1+z */
+    k_equality = 0.0746*omhh/SQR(theta_cmb);
+    
+    z_drag_b1 = 0.313*pow((double)omhh,-0.419)*(1+0.607*pow((double)omhh,0.674));
+    z_drag_b2 = 0.238*pow((double)omhh,0.223);
+    z_drag = 1291*pow(omhh,0.251)/(1+0.659*pow((double)omhh,0.828))*
+    (1+z_drag_b1*pow((double)obhh,(double)z_drag_b2));
+    
+    R_drag = 31.5*obhh/POW4(theta_cmb)*(1000/(1+z_drag));
+    R_equality = 31.5*obhh/POW4(theta_cmb)*(1000/z_equality);
+    
+    sound_horizon = 2./3./k_equality*sqrt(6./R_equality)*
+    log((sqrt(1+R_drag)+sqrt(R_drag+R_equality))/(1+sqrt(R_equality)));
+    
+    k_silk = 1.6*pow((double)obhh,0.52)*pow((double)omhh,0.73)*(1+pow((double)10.4*omhh,-0.95));
+    
+    alpha_c_a1 = pow((double)46.9*omhh,0.670)*(1+pow(32.1*omhh,-0.532));
+    alpha_c_a2 = pow((double)12.0*omhh,0.424)*(1+pow(45.0*omhh,-0.582));
+    alpha_c = pow(alpha_c_a1,-f_baryon)*
+    pow(alpha_c_a2,-CUBE(f_baryon));
+    
+    beta_c_b1 = 0.944/(1+pow(458*omhh,-0.708));
+    beta_c_b2 = pow(0.395*omhh, -0.0266);
+    beta_c = 1.0/(1+beta_c_b1*(pow(1-f_baryon, beta_c_b2)-1));
+    
+    y = z_equality/(1+z_drag);
+    alpha_b_G = y*(-6.*sqrt(1+y)+(2.+3.*y)*log((sqrt(1+y)+1)/(sqrt(1+y)-1)));
+    alpha_b = 2.07*k_equality*sound_horizon*pow(1+R_drag,-0.75)*alpha_b_G;
+    
+    beta_node = 8.41*pow(omhh, 0.435);
+    beta_b = 0.5+f_baryon+(3.-2.*f_baryon)*sqrt(pow(17.2*omhh,2.0)+1);
+    
+    k_peak = 2.5*3.14159*(1+0.217*omhh)/sound_horizon;
+    sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1+10.0*pow(obhh,0.75));
+    
+    alpha_gamma = 1-0.328*log(431.0*omhh)*f_baryon + 0.38*log(22.3*omhh)*
+    SQR(f_baryon);
+    
+    return;
+  }
+  
+  //! private member function: computes transfer function for mode k (k in Mpc)
+  inline double TFfit_onek(double k, double *tf_baryon, double *tf_cdm)
+  /* Input: k -- Wavenumber at which to calculate transfer function, in Mpc^-1.
+   *tf_baryon, *tf_cdm -- Input value not used; replaced on output if
+   the input was not NULL. */
+  /* Output: Returns the value of the full transfer function fitting formula.
+   This is the form given in Section 3 of Eisenstein & Hu (1997).
+   *tf_baryon -- The baryonic contribution to the full fit.
+   *tf_cdm -- The CDM contribution to the full fit. */
+  /* Notes: Units are Mpc, not h^-1 Mpc. */
+  {
+    double T_c_ln_beta, T_c_ln_nobeta, T_c_C_alpha, T_c_C_noalpha;
+    double q, xx, xx_tilde;//, q_eff;
+    double T_c_f, T_c, s_tilde, T_b_T0, T_b, f_baryon, T_full;
+    //double T_0_L0, T_0_C0, T_0, gamma_eff;
+    //double T_nowiggles_L0, T_nowiggles_C0, T_nowiggles;
+    
+    k = fabs(k);	/* Just define negative k as positive */
+    if (k==0.0) {
+      if (tf_baryon!=NULL) *tf_baryon = 1.0;
+      if (tf_cdm!=NULL) *tf_cdm = 1.0;
+      return 1.0;
+    }
+    
+    q = k/13.41/k_equality;
+    xx = k*sound_horizon;
+    
+    T_c_ln_beta = log(2.718282+1.8*beta_c*q);
+    T_c_ln_nobeta = log(2.718282+1.8*q);
+    T_c_C_alpha = 14.2/alpha_c + 386.0/(1+69.9*pow(q,1.08));
+    T_c_C_noalpha = 14.2 + 386.0/(1+69.9*pow(q,1.08));
+    
+    T_c_f = 1.0/(1.0+POW4(xx/5.4));
+    T_c = T_c_f*T_c_ln_beta/(T_c_ln_beta+T_c_C_noalpha*SQR(q)) +
+    (1-T_c_f)*T_c_ln_beta/(T_c_ln_beta+T_c_C_alpha*SQR(q));
+    
+    s_tilde = sound_horizon*pow(1.+CUBE(beta_node/xx),-1./3.);
+    xx_tilde = k*s_tilde;
+    
+    T_b_T0 = T_c_ln_nobeta/(T_c_ln_nobeta+T_c_C_noalpha*SQR(q));
+    T_b = sin(xx_tilde)/(xx_tilde)*(T_b_T0/(1.+SQR(xx/5.2))+
+                                    alpha_b/(1.+CUBE(beta_b/xx))*exp(-pow(k/k_silk,1.4)));
+    
+    f_baryon = obhh/omhh;
+    T_full = f_baryon*T_b + (1-f_baryon)*T_c;
+    
+    /* Now to store these transfer functions */
+    if (tf_baryon!=NULL) *tf_baryon = T_b;
+    if (tf_cdm!=NULL) *tf_cdm = T_c;
+    return T_full;
+  }
+  
+  double fb_, fc_;
+  
+  eisenstein_transfer()
+  {  }
+  
+  void set_parameters( const cosmology& cosmo, double Tcmb )
+  {
+    m_h0 = cosmo.H0*0.01;
+    TFset_parameters( (cosmo.Omega_m)*cosmo.H0*cosmo.H0*(0.01*0.01),
+                     cosmo.Omega_b/(cosmo.Omega_m-cosmo.Omega_b), Tcmb);
+    
+    fb_ = cosmo.Omega_b/(cosmo.Omega_m);
+    fc_ = (cosmo.Omega_m-cosmo.Omega_b)/(cosmo.Omega_m) ;
+  }
+  
+  inline double at_k( double k )
+  {
+    double tfb, tfcdm;
+    TFfit_onek( k*m_h0, &tfb, &tfcdm );
+    return fb_*tfb+fc_*tfcdm;
+  }
+};
+
+
+//! Implementation of abstract base class TransferFunction for the Eisenstein & Hu transfer function 
+/*!
+ This class implements the analytical fit to the matter transfer
+ function by Eisenstein & Hu (1999). In fact it is their code.
+ */
+class transfer_eisenstein_plugin : public transfer_function_plugin
+{
+protected:
+	using transfer_function_plugin::cosmo_;
+   eisenstein_transfer etf_;
+	
+public:
+	//! Constructor for Eisenstein & Hu fitting for transfer function
+	/*!
+	 \param aCosm structure of type Cosmology carrying the cosmological parameters
+	 \param Tcmb mean temperature of the CMB fluctuations (defaults to
+	 Tcmb = 2.726 if not specified)
+	 */
+	transfer_eisenstein_plugin( config_file &cf )//Cosmology aCosm, double Tcmb = 2.726 )
+    :  transfer_function_plugin(cf)
+	{
+		double Tcmb = pcf_->getValueSafe("cosmology","Tcmb",2.726);
+		
+        etf_.set_parameters( cosmo_, Tcmb );
+		
+		tf_distinct_ = false;
+		tf_withvel_  = false;
+	}
+	
+	//! Computes the transfer function for k in Mpc/h by calling TFfit_onek
+	inline double compute( double k, tf_type type ){
+        return etf_.at_k( k );
+	}
+	
+	inline double get_kmin( void ){
+		return 1e-4;
+	}
+	
+	inline double get_kmax( void ){
+		return 1.e4;
+	}
+	
+};
+
+
+#include <map>
+class transfer_eisenstein_wdm_plugin : public transfer_function_plugin
+{
+protected:
+	real_t m_WDMalpha, m_h0;
+	double omegam_, wdmm_, wdmgx_, wdmnu_, H0_, omegab_;
+    std::string type_;
+    std::map< std::string, int > typemap_;
+  
+    eisenstein_transfer etf_;
+  
+    enum wdmtyp { wdm_bode, wdm_viel, wdm_bode_wrong=99};
+    
+public:
+	transfer_eisenstein_wdm_plugin( config_file &cf )
+	: transfer_function_plugin(cf), m_h0( cosmo_.H0*0.01 )
+	{
+        double Tcmb = pcf_->getValueSafe("cosmology","Tcmb",2.726);
+        etf_.set_parameters( cosmo_, Tcmb );
+      
+        typemap_.insert( std::pair<std::string,int>( "BODE", wdm_bode ) );
+        typemap_.insert( std::pair<std::string,int>( "VIEL", wdm_viel ) ); // add the other types
+        typemap_.insert( std::pair<std::string,int>( "BODE_WRONG", wdm_bode_wrong ) ); // add the other types
+        
+		omegam_ = cf.getValue<double>("cosmology","Omega_m");
+		omegab_ = cf.getValue<double>("cosmology","Omega_b");
+		wdmm_   = cf.getValue<double>("cosmology","WDMmass");
+      
+		
+        H0_     = cf.getValue<double>("cosmology","H0");
+        type_   = cf.getValueSafe<std::string>("cosmology","WDMtftype","BODE");
+        
+        //type_ = std::string( toupper( type_.c_str() ) );
+        
+        if( typemap_.find( type_ ) == typemap_.end() )
+            throw std::runtime_error("unknown transfer function fit for WDM");
+        
+        m_WDMalpha = 1.0;
+        
+        switch( typemap_[type_] )
+        {
+            //... parameterisation from Bode et al. (2001), ApJ, 556, 93
+            case wdm_bode:
+                    wdmnu_  = cf.getValueSafe<double>("cosmology","WDMnu",1.0);
+                    wdmgx_  = cf.getValueSafe<double>("cosmology","WDMg_x",1.5);
+                    m_WDMalpha = 0.05 * pow( omegam_/0.4,0.15)
+                        *pow(H0_*0.01/0.65,1.3)*pow(wdmm_,-1.15)
+                        *pow(1.5/wdmgx_,0.29);
+                
+                break;
+            
+            //... parameterisation from Viel et al. (2005), Phys Rev D, 71
+            case wdm_viel:
+                    wdmnu_  = cf.getValueSafe<double>("cosmology","WDMnu",1.12);
+                    m_WDMalpha = 0.049 * pow( omegam_/0.25,0.11)
+                        *pow(H0_*0.01/0.7,1.22)*pow(wdmm_,-1.11);
+                break;
+            
+            
+            //.... below is for historical reasons due to the buggy parameterisation
+            //.... in early versions of MUSIC, but apart from H instead of h, Bode et al.
+            case wdm_bode_wrong:
+                    wdmnu_  = cf.getValueSafe<double>("cosmology","WDMnu",1.0);
+                    wdmgx_  = cf.getValueSafe<double>("cosmology","WDMg_x",1.5);
+                    m_WDMalpha = 0.05 * pow( omegam_/0.4,0.15)
+                        *pow(H0_/0.65,1.3)*pow(wdmm_,-1.15)
+                        *pow(1.5/wdmgx_,0.29);
+                break;
+                
+            default:
+                    wdmnu_  = cf.getValueSafe<double>("cosmology","WDMnu",1.0);
+                    wdmgx_  = cf.getValueSafe<double>("cosmology","WDMg_x",1.5);
+                    m_WDMalpha = 0.05 * pow( omegam_/0.4,0.15)
+                        *pow(H0_*0.01/0.65,1.3)*pow(wdmm_,-1.15)
+                        *pow(1.5/wdmgx_,0.29);
+                break;
+        }
+	}
+	
+	inline double compute( double k, tf_type type )
+	{
+		return etf_.at_k( k )*pow(1.0+pow(m_WDMalpha*k,2.0*wdmnu_),-5.0/wdmnu_);
+	}
+  
+    inline double get_kmin( void ){
+      return 1e-4;
+    }
+    
+    inline double get_kmax( void ){
+      return 1.e4;
+    }
+	
+};
+
+
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_eisenstein_plugin > creator("eisenstein");
+	transfer_function_plugin_creator_concrete< transfer_eisenstein_wdm_plugin > creator2("eisenstein_wdm");
+}
+
--- a/plugins/transfer_eisenstein_suppressLSS.cc
+++ b/plugins/transfer_eisenstein_suppressLSS.cc
@ -0,0 +1,243 @@
+/*
+ 
+ transfer_eisenstein.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ */
+
+#include "transfer_function.hh"
+
+// forward declaration of WDM class
+class transfer_eisenstein_wdm_plugin;
+
+
+struct eisenstein_transfer
+{
+  //Cosmology m_Cosmology;
+  double  m_h0;
+  double	omhh,		/* Omega_matter*h^2 */
+  obhh,		/* Omega_baryon*h^2 */
+  theta_cmb,	/* Tcmb in units of 2.7 K */
+  z_equality,	/* Redshift of matter-radiation equality, really 1+z */
+  k_equality,	/* Scale of equality, in Mpc^-1 */
+  z_drag,		/* Redshift of drag epoch */
+  R_drag,		/* Photon-baryon ratio at drag epoch */
+  R_equality,	/* Photon-baryon ratio at equality epoch */
+  sound_horizon,	/* Sound horizon at drag epoch, in Mpc */
+  k_silk,		/* Silk damping scale, in Mpc^-1 */
+  alpha_c,	/* CDM suppression */
+  beta_c,		/* CDM log shift */
+  alpha_b,	/* Baryon suppression */
+  beta_b,		/* Baryon envelope shift */
+  beta_node,	/* Sound horizon shift */
+  k_peak,		/* Fit to wavenumber of first peak, in Mpc^-1 */
+  sound_horizon_fit,	/* Fit to sound horizon, in Mpc */
+  alpha_gamma;	/* Gamma suppression in approximate TF */
+  
+  //! private member function: sets internal quantities for Eisenstein & Hu fitting
+  void TFset_parameters(double omega0hh, double f_baryon, double Tcmb)
+  /* Set all the scalars quantities for Eisenstein & Hu 1997 fitting formula */
+  /* Input: omega0hh -- The density of CDM and baryons, in units of critical dens,
+   multiplied by the square of the Hubble constant, in units
+   of 100 km/s/Mpc */
+  /* 	  f_baryon -- The fraction of baryons to CDM */
+  /*        Tcmb -- The temperature of the CMB in Kelvin.  Tcmb<=0 forces use
+   of the COBE value of  2.728 K. */
+  /* Output: Nothing, but set many global variables used in TFfit_onek().
+   You can access them yourself, if you want. */
+  /* Note: Units are always Mpc, never h^-1 Mpc. */
+  {
+    double z_drag_b1, z_drag_b2;
+    double alpha_c_a1, alpha_c_a2, beta_c_b1, beta_c_b2, alpha_b_G, y;
+    
+    if (f_baryon<=0.0 || omega0hh<=0.0) {
+      fprintf(stderr, "TFset_parameters(): Illegal input.\n");
+      exit(1);
+    }
+    omhh = omega0hh;
+    obhh = omhh*f_baryon;
+    if (Tcmb<=0.0) Tcmb=2.728;	/* COBE FIRAS */
+    theta_cmb = Tcmb/2.7;
+    
+    z_equality = 2.50e4*omhh/POW4(theta_cmb);  /* Really 1+z */
+    k_equality = 0.0746*omhh/SQR(theta_cmb);
+    
+    z_drag_b1 = 0.313*pow((double)omhh,-0.419)*(1+0.607*pow((double)omhh,0.674));
+    z_drag_b2 = 0.238*pow((double)omhh,0.223);
+    z_drag = 1291*pow(omhh,0.251)/(1+0.659*pow((double)omhh,0.828))*
+    (1+z_drag_b1*pow((double)obhh,(double)z_drag_b2));
+    
+    R_drag = 31.5*obhh/POW4(theta_cmb)*(1000/(1+z_drag));
+    R_equality = 31.5*obhh/POW4(theta_cmb)*(1000/z_equality);
+    
+    sound_horizon = 2./3./k_equality*sqrt(6./R_equality)*
+    log((sqrt(1+R_drag)+sqrt(R_drag+R_equality))/(1+sqrt(R_equality)));
+    
+    k_silk = 1.6*pow((double)obhh,0.52)*pow((double)omhh,0.73)*(1+pow((double)10.4*omhh,-0.95));
+    
+    alpha_c_a1 = pow((double)46.9*omhh,0.670)*(1+pow(32.1*omhh,-0.532));
+    alpha_c_a2 = pow((double)12.0*omhh,0.424)*(1+pow(45.0*omhh,-0.582));
+    alpha_c = pow(alpha_c_a1,-f_baryon)*
+    pow(alpha_c_a2,-CUBE(f_baryon));
+    
+    beta_c_b1 = 0.944/(1+pow(458*omhh,-0.708));
+    beta_c_b2 = pow(0.395*omhh, -0.0266);
+    beta_c = 1.0/(1+beta_c_b1*(pow(1-f_baryon, beta_c_b2)-1));
+    
+    y = z_equality/(1+z_drag);
+    alpha_b_G = y*(-6.*sqrt(1+y)+(2.+3.*y)*log((sqrt(1+y)+1)/(sqrt(1+y)-1)));
+    alpha_b = 2.07*k_equality*sound_horizon*pow(1+R_drag,-0.75)*alpha_b_G;
+    
+    beta_node = 8.41*pow(omhh, 0.435);
+    beta_b = 0.5+f_baryon+(3.-2.*f_baryon)*sqrt(pow(17.2*omhh,2.0)+1);
+    
+    k_peak = 2.5*3.14159*(1+0.217*omhh)/sound_horizon;
+    sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1+10.0*pow(obhh,0.75));
+    
+    alpha_gamma = 1-0.328*log(431.0*omhh)*f_baryon + 0.38*log(22.3*omhh)*
+    SQR(f_baryon);
+    
+    return;
+  }
+  
+  //! private member function: computes transfer function for mode k (k in Mpc)
+  inline double TFfit_onek(double k, double *tf_baryon, double *tf_cdm)
+  /* Input: k -- Wavenumber at which to calculate transfer function, in Mpc^-1.
+   *tf_baryon, *tf_cdm -- Input value not used; replaced on output if
+   the input was not NULL. */
+  /* Output: Returns the value of the full transfer function fitting formula.
+   This is the form given in Section 3 of Eisenstein & Hu (1997).
+   *tf_baryon -- The baryonic contribution to the full fit.
+   *tf_cdm -- The CDM contribution to the full fit. */
+  /* Notes: Units are Mpc, not h^-1 Mpc. */
+  {
+    double T_c_ln_beta, T_c_ln_nobeta, T_c_C_alpha, T_c_C_noalpha;
+    double q, xx, xx_tilde;//, q_eff;
+    double T_c_f, T_c, s_tilde, T_b_T0, T_b, f_baryon, T_full;
+    //double T_0_L0, T_0_C0, T_0, gamma_eff;
+    //double T_nowiggles_L0, T_nowiggles_C0, T_nowiggles;
+    
+    k = fabs(k);	/* Just define negative k as positive */
+    if (k==0.0) {
+      if (tf_baryon!=NULL) *tf_baryon = 1.0;
+      if (tf_cdm!=NULL) *tf_cdm = 1.0;
+      return 1.0;
+    }
+    
+    q = k/13.41/k_equality;
+    xx = k*sound_horizon;
+    
+    T_c_ln_beta = log(2.718282+1.8*beta_c*q);
+    T_c_ln_nobeta = log(2.718282+1.8*q);
+    T_c_C_alpha = 14.2/alpha_c + 386.0/(1+69.9*pow(q,1.08));
+    T_c_C_noalpha = 14.2 + 386.0/(1+69.9*pow(q,1.08));
+    
+    T_c_f = 1.0/(1.0+POW4(xx/5.4));
+    T_c = T_c_f*T_c_ln_beta/(T_c_ln_beta+T_c_C_noalpha*SQR(q)) +
+    (1-T_c_f)*T_c_ln_beta/(T_c_ln_beta+T_c_C_alpha*SQR(q));
+    
+    s_tilde = sound_horizon*pow(1.+CUBE(beta_node/xx),-1./3.);
+    xx_tilde = k*s_tilde;
+    
+    T_b_T0 = T_c_ln_nobeta/(T_c_ln_nobeta+T_c_C_noalpha*SQR(q));
+    T_b = sin(xx_tilde)/(xx_tilde)*(T_b_T0/(1.+SQR(xx/5.2))+
+                                    alpha_b/(1.+CUBE(beta_b/xx))*exp(-pow(k/k_silk,1.4)));
+    
+    f_baryon = obhh/omhh;
+    T_full = f_baryon*T_b + (1-f_baryon)*T_c;
+    
+    /* Now to store these transfer functions */
+    if (tf_baryon!=NULL) *tf_baryon = T_b;
+    if (tf_cdm!=NULL) *tf_cdm = T_c;
+    return T_full;
+  }
+  
+  double fb_, fc_;
+  
+  eisenstein_transfer()
+  {  }
+  
+  void set_parameters( const cosmology& cosmo, double Tcmb )
+  {
+    m_h0 = cosmo.H0*0.01;
+    TFset_parameters( (cosmo.Omega_m)*cosmo.H0*cosmo.H0*(0.01*0.01),
+                     cosmo.Omega_b/(cosmo.Omega_m-cosmo.Omega_b), Tcmb);
+    
+    fb_ = cosmo.Omega_b/(cosmo.Omega_m);
+    fc_ = (cosmo.Omega_m-cosmo.Omega_b)/(cosmo.Omega_m) ;
+  }
+  
+  inline double at_k( double k )
+  {
+    double tfb, tfcdm;
+    TFfit_onek( k*m_h0, &tfb, &tfcdm );
+    return fb_*tfb+fc_*tfcdm;
+  }
+};
+
+#include "cosmology.hh"
+
+//! Implementation of abstract base class TransferFunction for the Eisenstein & Hu transfer function with an additional suppression of large-scale power
+/*!
+ This class implements the analytical fit to the matter transfer
+ function by Eisenstein & Hu (1999). In fact it is their code.
+ */
+class transfer_eisensteinS_plugin : public transfer_function_plugin
+{
+protected:
+	using transfer_function_plugin::cosmo_;
+   eisenstein_transfer etf_;
+    double ktrunc_, normfac_;
+    double dplus_;
+	
+public:
+	//! Constructor for Eisenstein & Hu fitting for transfer function
+	/*!
+	 \param aCosm structure of type Cosmology carrying the cosmological parameters
+	 \param Tcmb mean temperature of the CMB fluctuations (defaults to
+	 Tcmb = 2.726 if not specified)
+	 */
+	transfer_eisensteinS_plugin( config_file &cf )//Cosmology aCosm, double Tcmb = 2.726 )
+    :  transfer_function_plugin(cf)
+	{
+		double Tcmb = pcf_->getValueSafe<double>("cosmology","Tcmb",2.726);
+        //double boxlength = pcf_->getValue<double>("setup","boxlength");
+        ktrunc_ = pcf_->getValue<double>("cosmology","ktrunc");
+        normfac_ = 2.0/M_PI;
+		
+        etf_.set_parameters( cosmo_, Tcmb );
+		
+		tf_distinct_ = false;
+		tf_withvel_  = false;
+        tf_withtotal0_ = true;
+        
+        cosmology cosmo( cf );
+        
+        CosmoCalc ccalc(cosmo, this);
+        dplus_ = ccalc.CalcGrowthFactor( cosmo.astart )/ccalc.CalcGrowthFactor( 1.0 );
+	}
+	
+	//! Computes the transfer function for k in Mpc/h by calling TFfit_onek
+	inline double compute( double k, tf_type type ){
+        if( type == total0 )
+            return etf_.at_k( k )/dplus_;
+        return etf_.at_k( k ) * atan(k/ktrunc_)*normfac_;
+	}
+	
+	inline double get_kmin( void ){
+		return 1e-4;
+	}
+	
+	inline double get_kmax( void ){
+		return 1.e4;
+	}
+	
+};
+
+
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_eisensteinS_plugin > creator("eisenstein_suppress");
+}
+
--- a/plugins/transfer_inflation.cc
+++ b/plugins/transfer_inflation.cc
@ -0,0 +1,64 @@
+/*
+ 
+ tranfer_inflation.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#include "transfer_function.hh"
+
+class transfer_inflation_plugin : public transfer_function_plugin
+{
+protected:
+	
+	double ns2_;
+	
+public:
+	
+	transfer_inflation_plugin( config_file& cf ) 
+	: transfer_function_plugin( cf )
+	{ 
+		ns2_ = 0.5*cf.getValue<double>("cosmology","nspec");
+		tf_distinct_ = true;
+	}
+	
+	~transfer_inflation_plugin(){ };
+	
+	double compute( double k, tf_type type=baryon)
+	{
+		return pow(k,ns2_);
+	}
+	
+	double get_kmax( void )
+	{
+		return 1e10;
+	}
+	
+	double get_kmin( void )
+	{
+		return 1e-30;
+	}
+	
+};
+
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_inflation_plugin > creator("inflation");
+}
+
--- a/plugins/transfer_linger++.cc
+++ b/plugins/transfer_linger++.cc
@ -0,0 +1,329 @@
+/*
+ 
+ transfer_camb.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include "transfer_function.hh"
+
+class transfer_LINGERpp_plugin : public transfer_function_plugin
+{
+	
+private:
+	std::string m_filename_Pk, m_filename_Tk;
+	std::vector<double> m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon, m_tab_Tvk_cdm, m_tab_Tvk_baryon, m_tab_Tk_tot0;
+	gsl_interp_accel *acc_dtot, *acc_dcdm, *acc_dbaryon, *acc_vcdm, *acc_vbaryon, *acc_dtot0;
+	gsl_spline *spline_dtot, *spline_dcdm, *spline_dbaryon, *spline_vcdm, *spline_vbaryon, *spline_dtot0;
+	
+	bool m_bnovrel;	
+	bool m_bz0norm;
+	
+	void read_table( void ){
+#ifdef WITH_MPI
+		if( MPI::COMM_WORLD.Get_rank() == 0 ){
+#endif
+			std::cerr 
+			<< " - reading tabulated transfer function data from file \n"
+			<< "    \'" << m_filename_Tk << "\'\n";
+			
+			std::string line;
+			std::ifstream ifs( m_filename_Tk.c_str() );
+			
+			if(! ifs.good() )
+				throw std::runtime_error("Could not find transfer function file \'"+m_filename_Tk+"\'");
+			
+			m_tab_k.clear();
+			m_tab_Tk_tot.clear();
+			m_tab_Tk_cdm.clear();
+			m_tab_Tk_baryon.clear();
+			m_tab_Tvk_cdm.clear();
+			m_tab_Tvk_baryon.clear();
+			m_tab_Tk_tot0.clear();
+			
+			const double zero = 1e-10;
+			
+			while( !ifs.eof() ){
+				getline(ifs,line);
+				
+				if(ifs.eof()) break;
+				
+				std::stringstream ss(line);
+				
+				double k, Tkc, Tkb, Tktot, Tkvc, Tkvb, Tktot0;
+				ss >> k;
+				ss >> Tktot;
+				ss >> Tkc;
+				ss >> Tkb;
+				ss >> Tkvc;
+				ss >> Tkvb;
+                ss >> Tktot0;
+
+		if( m_bnovrel )
+		{
+			std::cerr << " - transfer_linger++ : disabling baryon-DM relative velocity\n";
+			Tkvb = Tkvc;
+		}		
+				Tktot = std::max(zero,Tktot);
+				Tkc   = std::max(zero,Tkc);
+				Tkb   = std::max(zero,Tkb);
+				Tkvc  = std::max(zero,Tkvc);
+				Tkvb  = std::max(zero,Tkvb);
+                Tktot0= std::max(zero,Tktot0);
+								
+				m_tab_k.push_back( log10(k) );
+				
+				m_tab_Tk_tot.push_back( log10(Tktot) );
+				m_tab_Tk_baryon.push_back( log10(Tkb) );
+				m_tab_Tk_cdm.push_back( log10(Tkc) );
+				m_tab_Tvk_cdm.push_back( log10(Tkvc) );
+				m_tab_Tvk_baryon.push_back( log10(Tkvb) );
+                m_tab_Tk_tot0.push_back( log10(Tktot0) );
+				
+			}
+			
+			ifs.close();			
+#ifdef WITH_MPI
+		}
+		
+		unsigned n=m_tab_k.size();
+		MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 );
+		
+		if( MPI::COMM_WORLD.Get_rank() > 0 ){
+			m_tab_k.assign(n,0);
+			m_tab_Tk_tot.assign(n,0);
+			m_tab_Tk_cdm.assign(n,0);
+			m_tab_Tk_baryon.assign(n,0);
+			m_tab_Tvk_cdm.assign(n,0);
+			m_tab_Tvk_baryon.assign(n,0);
+            m_tab_Tk_tot0.assign(n,0);
+		}
+		
+		MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_tot[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tvk_cdm[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tvk_baryon[0], n, MPI_DOUBLE, 0 );
+        MPI::COMM_WORLD.Bcast( &m_tab_Tk_tot0[0], n, MPI_DOUBLE, 0 );
+		
+#endif
+		
+	}
+	
+public:
+	transfer_LINGERpp_plugin( config_file& cf )
+	: transfer_function_plugin( cf )
+	{
+		m_filename_Tk	= pcf_->getValue<std::string>("cosmology","transfer_file");
+		
+		//.. disable the baryon-CDM relative velocity (both follow the total matter potential)
+		m_bnovrel		= pcf_->getValueSafe<bool>("cosmology","no_vrel",false);
+		
+		//.. normalize at z=0 rather than using the linearly scaled zini spectrum
+		//.. this can be different due to radiation still being non-negligible at
+		//.. high redshifts
+		m_bz0norm		= pcf_->getValueSafe<bool>("cosmology","z0norm",true);
+		
+		tf_distinct_   = true;
+		tf_withvel_    = true;
+		tf_velunits_   = true;
+
+		//.. normalize with z=0 spectrum rather than zini spectrum?
+		if( m_bz0norm )
+			tf_withtotal0_ = true;
+		else
+			tf_withtotal0_ = false;
+		
+		
+		read_table( );
+		
+		acc_dtot = gsl_interp_accel_alloc();
+		acc_dcdm = gsl_interp_accel_alloc();
+		acc_dbaryon = gsl_interp_accel_alloc();
+		acc_vcdm = gsl_interp_accel_alloc();
+		acc_vbaryon = gsl_interp_accel_alloc();
+		acc_dtot0 = gsl_interp_accel_alloc();
+		
+		spline_dtot = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_dcdm = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_dbaryon = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_vcdm = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_vbaryon = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_dtot0 = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		
+		gsl_spline_init (spline_dtot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size() );
+		gsl_spline_init (spline_dcdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size() );
+		gsl_spline_init (spline_dbaryon, &m_tab_k[0], &m_tab_Tk_baryon[0], m_tab_k.size() );
+		gsl_spline_init (spline_vcdm, &m_tab_k[0], &m_tab_Tvk_cdm[0], m_tab_k.size() );
+		gsl_spline_init (spline_vbaryon, &m_tab_k[0], &m_tab_Tvk_baryon[0], m_tab_k.size() );
+		
+		if( tf_withtotal0_ )
+			gsl_spline_init (spline_dtot0, &m_tab_k[0], &m_tab_Tk_tot0[0], m_tab_k.size() );
+		else
+			gsl_spline_init (spline_dtot0, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size() );
+	}
+	
+	~transfer_LINGERpp_plugin()
+	{
+		gsl_spline_free (spline_dtot);
+		gsl_spline_free (spline_dcdm);
+		gsl_spline_free (spline_dbaryon);
+		gsl_spline_free (spline_vcdm);
+		gsl_spline_free (spline_vbaryon);
+		gsl_spline_free (spline_dtot0);
+		
+		gsl_interp_accel_free (acc_dtot);
+		gsl_interp_accel_free (acc_dcdm);
+		gsl_interp_accel_free (acc_dbaryon);
+		gsl_interp_accel_free (acc_vcdm);
+		gsl_interp_accel_free (acc_vbaryon);
+        gsl_interp_accel_free (acc_dtot0);
+	}
+	
+	inline double extrap_left( double k, const tf_type& type ) 
+	{
+		if( k<1e-8 )
+			return 1.0;
+		
+		double v1(1.0), v2(1.0);
+		switch( type )
+		{
+			case cdm:
+				v1 = m_tab_Tk_cdm[0];
+				v2 = m_tab_Tk_cdm[1];
+				break;
+			case baryon:
+				v1 = m_tab_Tk_baryon[0];
+				v2 = m_tab_Tk_baryon[1];
+				break;
+			case vcdm:
+				v1 = m_tab_Tvk_cdm[0];
+				v2 = m_tab_Tvk_cdm[1];
+				break;
+			case vbaryon:
+				v1 = m_tab_Tvk_baryon[0];
+				v2 = m_tab_Tvk_baryon[1];
+				break;
+			case total: 
+				v1 = m_tab_Tk_tot[0];
+				v2 = m_tab_Tk_tot[1];
+				break;
+            case total0:
+                v1 = m_tab_Tk_tot0[0];
+				v2 = m_tab_Tk_tot0[1];
+				break;
+				
+			default:
+				throw std::runtime_error("Invalid type requested in transfer function evaluation");
+		}
+		
+		double lk = log10(k);
+		double dk = m_tab_k[1]-m_tab_k[0];
+		double delk = lk-m_tab_k[0];
+		
+		//double xi = (v2-v1)/dk;
+		return pow(10.0,(v2-v1)/dk*(delk)+v1);
+	}
+	
+	inline double extrap_right( double k, const tf_type& type ) 
+	{
+		double v1(1.0), v2(1.0);
+		
+		int n=m_tab_k.size()-1, n1=n-1;
+		switch( type )
+		{
+			case cdm:
+				v1 = m_tab_Tk_cdm[n1];
+				v2 = m_tab_Tk_cdm[n];
+				break;
+			case baryon:
+				v1 = m_tab_Tk_baryon[n1];
+				v2 = m_tab_Tk_baryon[n];
+				break;
+			case vcdm:
+				v1 = m_tab_Tvk_cdm[n1];
+				v2 = m_tab_Tvk_cdm[n];
+				break;
+			case vbaryon:
+				v1 = m_tab_Tvk_baryon[n1];
+				v2 = m_tab_Tvk_baryon[n];
+				break;
+			case total: 
+				v1 = m_tab_Tk_tot[n1];
+				v2 = m_tab_Tk_tot[n];
+				break;
+            case total0:
+                v1 = m_tab_Tk_tot0[n1];
+                v2 = m_tab_Tk_tot0[n];
+                break;
+				
+			default:
+				throw std::runtime_error("Invalid type requested in transfer function evaluation");
+		}
+		
+		double lk = log10(k);
+		double dk = m_tab_k[n]-m_tab_k[n1];
+		double delk = lk-m_tab_k[n];
+		
+		//double xi = (v2-v1)/dk;
+		return pow(10.0,(v2-v1)/dk*(delk)+v2);
+	}
+	
+	inline double compute( double k, tf_type type ){
+		
+		double lk = log10(k);
+		
+		//if( lk<m_tab_k[1])
+		//	return 1.0;
+		
+		//if( lk>m_tab_k[m_tab_k.size()-2] );
+		//	return m_tab_Tk_cdm[m_tab_k.size()-2]/k/k;
+		
+		if( k<get_kmin() )
+			return extrap_left(k, type );
+		
+		if( k>get_kmax() )
+			return extrap_right(k,type );
+		
+		
+		switch( type )
+		{
+			case cdm:
+				return pow(10.0, gsl_spline_eval (spline_dcdm, lk, acc_dcdm) );
+			case baryon:
+				return pow(10.0, gsl_spline_eval (spline_dbaryon, lk, acc_dbaryon) );
+			case vcdm:
+				return pow(10.0, gsl_spline_eval (spline_vcdm, lk, acc_vcdm) );
+			case vbaryon:
+				return pow(10.0, gsl_spline_eval (spline_vbaryon, lk, acc_vbaryon) );
+			case total: 
+				return pow(10.0, gsl_spline_eval (spline_dtot, lk, acc_dtot) );
+            case total0:
+                return pow(10.0, gsl_spline_eval (spline_dtot0, lk, acc_dtot0) );
+                
+			default:
+				throw std::runtime_error("Invalid type requested in transfer function evaluation");
+		}
+		
+		return 1.0;
+	}
+	
+	inline double get_kmin( void ){
+		return pow(10.0,m_tab_k[0]);
+	}
+	
+	inline double get_kmax( void ){
+		return pow(10.0,m_tab_k[m_tab_k.size()-1]);
+	}
+	
+};
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_LINGERpp_plugin > creator("linger++");
+}
+
+
--- a/plugins/transfer_music.cc
+++ b/plugins/transfer_music.cc
@ -0,0 +1,296 @@
+/*
+ 
+ transfer_camb.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include "transfer_function.hh"
+
+class transfer_MUSIC_plugin : public transfer_function_plugin
+{
+	
+private:
+	std::string m_filename_Pk, m_filename_Tk;
+	std::vector<double> m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon, m_tab_Tvk_cdm, m_tab_Tvk_baryon;
+	gsl_interp_accel *acc_dtot, *acc_dcdm, *acc_dbaryon, *acc_vcdm, *acc_vbaryon;
+	gsl_spline *spline_dtot, *spline_dcdm, *spline_dbaryon, *spline_vcdm, *spline_vbaryon;
+	
+	
+	
+	void read_table( void ){
+#ifdef WITH_MPI
+		if( MPI::COMM_WORLD.Get_rank() == 0 ){
+#endif
+			std::cerr 
+			<< " - reading tabulated transfer function data from file \n"
+			<< "    \'" << m_filename_Tk << "\'\n";
+			
+			std::string line;
+			std::ifstream ifs( m_filename_Tk.c_str() );
+			
+			if(! ifs.good() )
+				throw std::runtime_error("Could not find transfer function file \'"+m_filename_Tk+"\'");
+			
+			m_tab_k.clear();
+			m_tab_Tk_tot.clear();
+			m_tab_Tk_cdm.clear();
+			m_tab_Tk_baryon.clear();
+			m_tab_Tvk_cdm.clear();
+			m_tab_Tvk_baryon.clear();
+			
+            double Tktotmin = 1e30, Tkcmin = 1e30, Tkbmin = 1e30, Tkvcmin = 1e30, Tkvbmin = 1e30;
+			double ktotmin = 1e30, kcmin = 1e30, kbmin = 1e30, kvcmin = 1e30, kvbmin = 1e30;
+            
+			while( !ifs.eof() ){
+				getline(ifs,line);
+				
+				if(ifs.eof()) break;
+				
+				std::stringstream ss(line);
+				
+				double k, Tkc, Tkb, Tktot, Tkvc, Tkvb;
+				ss >> k;
+				ss >> Tktot;
+				ss >> Tkc;
+				ss >> Tkb;
+				ss >> Tkvc;
+				ss >> Tkvb;
+                
+                // store log(k)
+                m_tab_k.push_back( log10(k) );
+                
+                // store linear TF values now, will take logs later
+				m_tab_Tk_tot.push_back( Tktot );
+				m_tab_Tk_baryon.push_back( Tkb );
+				m_tab_Tk_cdm.push_back( Tkc );
+				m_tab_Tvk_cdm.push_back( Tkvc );
+				m_tab_Tvk_baryon.push_back( Tkvb );
+                
+                // save point where the function was last positive in case
+                if( Tktot > 0.0 && Tktot < Tktotmin ){ Tktotmin = Tktot; ktotmin = k; }
+                if( Tkc > 0.0 && Tkc < Tkcmin ){ Tkcmin = Tkc; kcmin = k; }
+                if( Tkb > 0.0 && Tkb < Tkbmin ){ Tkbmin = Tkb; kbmin = k; }
+                if( Tkvc > 0.0 && Tkvc < Tkvcmin ){ Tkvcmin = Tkvc; kvcmin = k; }
+                if( Tkvb > 0.0 && Tkvb < Tkvbmin ){ Tkvbmin = Tkvb; kvbmin = k; }
+			}
+            
+            for( size_t i=0; i<m_tab_k.size(); ++i )
+            {
+                double ik2 = 1.0/pow(10.0,2.0*m_tab_k[i]);
+                // take logarithms, if TF negative, extrapolate from smallest positive point with k**(-2)...
+                // this should disappear again upon integration with linger++
+                m_tab_Tk_tot[i]     = log10( (m_tab_Tk_tot[i]>0.0)? m_tab_Tk_tot[i] : Tktotmin*ktotmin*ik2 );
+                m_tab_Tk_cdm[i]     = log10( (m_tab_Tk_cdm[i]>0.0)? m_tab_Tk_cdm[i] : Tkcmin*kcmin*ik2 );
+                m_tab_Tk_baryon[i]  = log10( (m_tab_Tk_baryon[i]>0.0)? m_tab_Tk_baryon[i] : Tkbmin*kbmin*ik2 );
+                m_tab_Tvk_cdm[i]    = log10( (m_tab_Tvk_cdm[i]>0.0)? m_tab_Tvk_cdm[i] : Tkvcmin*kvcmin*ik2 );
+                m_tab_Tvk_baryon[i] = log10( (m_tab_Tvk_baryon[i]>0.0)? m_tab_Tvk_baryon[i] : Tkvbmin*kvbmin*ik2);
+            }
+			
+			ifs.close();			
+#ifdef WITH_MPI
+		}
+		
+		unsigned n=m_tab_k.size();
+		MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 );
+		
+		if( MPI::COMM_WORLD.Get_rank() > 0 ){
+			m_tab_k.assign(n,0);
+			m_tab_Tk_tot.assign(n,0);
+			m_tab_Tk_cdm.assign(n,0);
+			m_tab_Tk_baryon.assign(n,0);
+			m_tab_Tvk_cdm.assign(n,0);
+			m_tab_Tvk_baryon.assign(n,0);
+		}
+		
+		MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_tot[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tvk_cdm[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tvk_baryon[0], n, MPI_DOUBLE, 0 );
+#endif
+		
+	}
+	
+public:
+	transfer_MUSIC_plugin( config_file& cf )
+	: transfer_function_plugin( cf )
+	{
+		m_filename_Tk = pcf_->getValue<std::string>("cosmology","transfer_file");
+		
+		read_table( );
+		
+		acc_dtot = gsl_interp_accel_alloc();
+		acc_dcdm = gsl_interp_accel_alloc();
+		acc_dbaryon = gsl_interp_accel_alloc();
+		acc_vcdm = gsl_interp_accel_alloc();
+		acc_vbaryon = gsl_interp_accel_alloc();
+		
+		spline_dtot = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_dcdm = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_dbaryon = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_vcdm = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline_vbaryon = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		
+		gsl_spline_init (spline_dtot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size() );
+		gsl_spline_init (spline_dcdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size() );
+		gsl_spline_init (spline_dbaryon, &m_tab_k[0], &m_tab_Tk_baryon[0], m_tab_k.size() );
+		gsl_spline_init (spline_vcdm, &m_tab_k[0], &m_tab_Tvk_cdm[0], m_tab_k.size() );
+		gsl_spline_init (spline_vbaryon, &m_tab_k[0], &m_tab_Tvk_baryon[0], m_tab_k.size() );
+		
+		tf_distinct_ = true;
+		tf_withvel_  = true;
+	}
+	
+	~transfer_MUSIC_plugin()
+	{
+		gsl_spline_free (spline_dtot);
+		gsl_spline_free (spline_dcdm);
+		gsl_spline_free (spline_dbaryon);
+		gsl_spline_free (spline_vcdm);
+		gsl_spline_free (spline_vbaryon);
+		
+		gsl_interp_accel_free (acc_dtot);
+		gsl_interp_accel_free (acc_dcdm);
+		gsl_interp_accel_free (acc_dbaryon);
+		gsl_interp_accel_free (acc_vcdm);
+		gsl_interp_accel_free (acc_vbaryon);
+	}
+	
+	inline double extrap_left( double k, const tf_type& type ) 
+	{
+		if( k<1e-8 )
+			return 1.0;
+		
+		double v1(1.0), v2(1.0);
+		switch( type )
+		{
+			case cdm:
+				v1 = m_tab_Tk_cdm[0];
+				v2 = m_tab_Tk_cdm[1];
+				break;
+			case baryon:
+				v1 = m_tab_Tk_baryon[0];
+				v2 = m_tab_Tk_baryon[1];
+				break;
+			case vcdm:
+				v1 = m_tab_Tvk_cdm[0];
+				v2 = m_tab_Tvk_cdm[1];
+				break;
+			case vbaryon:
+				v1 = m_tab_Tvk_baryon[0];
+				v2 = m_tab_Tvk_baryon[1];
+				break;
+			case total: 
+				v1 = m_tab_Tk_tot[0];
+				v2 = m_tab_Tk_tot[1];
+				break;
+				
+			default:
+				throw std::runtime_error("Invalid type requested in transfer function evaluation");
+		}
+		
+		double lk = log10(k);
+		double dk = m_tab_k[1]-m_tab_k[0];
+		double delk = lk-m_tab_k[0];
+		
+		//double xi = (v2-v1)/dk;
+		return pow(10.0,(v2-v1)/dk*(delk)+v1);
+	}
+	
+	inline double extrap_right( double k, const tf_type& type ) 
+	{
+		double v1(1.0), v2(1.0);
+		
+		int n=m_tab_k.size()-1, n1=n-1;
+		switch( type )
+		{
+			case cdm:
+				v1 = m_tab_Tk_cdm[n1];
+				v2 = m_tab_Tk_cdm[n];
+				break;
+			case baryon:
+				v1 = m_tab_Tk_baryon[n1];
+				v2 = m_tab_Tk_baryon[n];
+				break;
+			case vcdm:
+				v1 = m_tab_Tvk_cdm[n1];
+				v2 = m_tab_Tvk_cdm[n];
+				break;
+			case vbaryon:
+				v1 = m_tab_Tvk_baryon[n1];
+				v2 = m_tab_Tvk_baryon[n];
+				break;
+			case total: 
+				v1 = m_tab_Tk_tot[n1];
+				v2 = m_tab_Tk_tot[n];
+				break;
+				
+			default:
+				throw std::runtime_error("Invalid type requested in transfer function evaluation");
+		}
+		
+		double lk = log10(k);
+		double dk = m_tab_k[n]-m_tab_k[n1];
+		double delk = lk-m_tab_k[n];
+		
+		//double xi = (v2-v1)/dk;
+		return pow(10.0,(v2-v1)/dk*(delk)+v2);
+	}
+	
+	inline double compute( double k, tf_type type ){
+		
+		double lk = log10(k);
+		
+		//if( lk<m_tab_k[1])
+		//	return 1.0;
+		
+		//if( lk>m_tab_k[m_tab_k.size()-2] );
+		//	return m_tab_Tk_cdm[m_tab_k.size()-2]/k/k;
+		
+		if( k<get_kmin() )
+			return extrap_left(k, type );
+		
+		if( k>get_kmax() )
+			return extrap_right(k,type );
+		
+		
+		switch( type )
+		{
+			case cdm:
+				return pow(10.0, gsl_spline_eval (spline_dcdm, lk, acc_dcdm) );
+			case baryon:
+				return pow(10.0, gsl_spline_eval (spline_dbaryon, lk, acc_dbaryon) );
+			case vcdm:
+				return pow(10.0, gsl_spline_eval (spline_vcdm, lk, acc_vcdm) );
+			case vbaryon:
+				return pow(10.0, gsl_spline_eval (spline_vbaryon, lk, acc_vbaryon) );
+			case total: 
+				return pow(10.0, gsl_spline_eval (spline_dtot, lk, acc_dtot) );
+				
+			default:
+				throw std::runtime_error("Invalid type requested in transfer function evaluation");
+		}
+		
+		return 1.0;
+	}
+	
+	inline double get_kmin( void ){
+		return pow(10.0,m_tab_k[0]);
+	}
+	
+	inline double get_kmax( void ){
+		return pow(10.0,m_tab_k[m_tab_k.size()-1]);
+	}
+	
+};
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_MUSIC_plugin > creator("music");
+}
+
+
--- a/poisson.cc
+++ b/poisson.cc
--- a/poisson.hh
+++ b/poisson.hh
@ -0,0 +1,189 @@
+/*
+ 
+ poisson.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#ifndef __POISSON_HH
+#define __POISSON_HH
+
+#include <string>
+#include <map>
+
+#include "general.hh"
+#include "mesh.hh"
+
+//! abstract base class for Poisson solvers and gradient calculations
+class poisson_plugin
+{
+protected:
+	
+	//! reference to the config_file object that holds all configuration options
+	config_file& cf_;
+	
+public:
+	
+	//! constructor
+	explicit poisson_plugin( config_file& cf )
+	: cf_(cf)
+	{ }
+	
+	//! destructor
+	virtual ~poisson_plugin()
+	{ }
+	
+	//! solve Poisson's equation Du=f
+	virtual double solve( grid_hierarchy& f, grid_hierarchy& u ) = 0;
+	
+	//! compute the gradient of u
+	virtual double gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du ) = 0;
+	
+	//! compute the gradient and add
+	virtual double gradient_add( int dir, grid_hierarchy& u, grid_hierarchy& Du ) = 0;
+	
+};
+
+#pragma mark -
+
+/*!
+ * @brief implements abstract factory design pattern for poisson solver plug-ins
+ */
+struct poisson_plugin_creator
+{
+	//! create an instance of a plug-in
+	virtual poisson_plugin * create( config_file& cf ) const = 0;
+	
+	//! destroy an instance of a plug-in
+	virtual ~poisson_plugin_creator() { }
+};
+
+//! maps the name of a plug-in to a pointer of the factory pattern 
+std::map< std::string, poisson_plugin_creator *>& get_poisson_plugin_map();
+
+//! print a list of all registered output plug-ins
+void print_poisson_plugins();
+
+
+/*!
+ * @brief concrete factory pattern for output plug-ins
+ */
+template< class Derived >
+struct poisson_plugin_creator_concrete : public poisson_plugin_creator
+{
+	//! register the plug-in by its name
+	poisson_plugin_creator_concrete( const std::string& plugin_name )
+	{
+		get_poisson_plugin_map()[ plugin_name ] = this;
+	}
+	
+	//! create an instance of the plug-in
+	poisson_plugin * create( config_file& cf ) const
+	{
+		return new Derived( cf );
+	}
+};
+
+/**************************************************************************************/
+/**************************************************************************************/
+#pragma mark -
+
+//! adaptive FAS multigrid implementation of abstract base class poisson_plugin
+class multigrid_poisson_plugin : public poisson_plugin
+{
+public:
+	
+	//! constructor
+	explicit multigrid_poisson_plugin( config_file& cf )
+	: poisson_plugin( cf )
+	{ }
+	
+	//! solve Poisson's equation Du=f
+	double solve( grid_hierarchy& f, grid_hierarchy& u );
+	
+	//! compute the gradient of u
+	double gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+	
+	//! compute the gradient and add
+	double gradient_add( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+	
+protected:
+	
+	//! various FD approximation implementations
+	struct implementation
+	{
+		//! solve 2nd order FD approximation to Poisson's equation
+		double solve_O2( grid_hierarchy& f, grid_hierarchy& u );
+		
+		//! solve 4th order FD approximation to Poisson's equation
+		double solve_O4( grid_hierarchy& f, grid_hierarchy& u );
+		
+		//! solve 6th order FD approximation to Poisson's equation		
+		double solve_O6( grid_hierarchy& f, grid_hierarchy& u );
+		
+		//! compute 2nd order FD gradient
+		void gradient_O2( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+
+		//! compute and add 2nd order FD gradient
+		void gradient_add_O2( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+		
+		//! compute 4th order FD gradient
+		void gradient_O4( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+		
+		//! compute and add 4th order FD gradient
+		void gradient_add_O4( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+		
+		//! compute 6th order FD gradient
+		void gradient_O6( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+		
+		//! compute and add 6th order FD gradient
+		void gradient_add_O6( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+	};
+};
+
+/**************************************************************************************/
+/**************************************************************************************/
+#pragma mark -
+
+//! FFT based implementation of abstract base class poisson_plugin
+class fft_poisson_plugin : public poisson_plugin
+{
+public:
+	
+	//! constructor
+	explicit fft_poisson_plugin( config_file& cf )
+	: poisson_plugin( cf )
+	{ }
+	
+	//! solve Poisson's equation Du=f
+	double solve( grid_hierarchy& f, grid_hierarchy& u );
+	
+	//! compute the gradient of u
+	double gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+	
+	//! compute the gradient and add
+	double gradient_add( int dir, grid_hierarchy& u, grid_hierarchy& Du ){ return 0.0; }
+	
+	
+};
+
+/**************************************************************************************/
+/**************************************************************************************/
+#pragma mark -
+
+template< typename T >
+void poisson_hybrid( T& f, int idir, int order, bool periodic, bool deconvolve_cic );
+
+
+
+
+
+
+
+
+
+#endif // __POISSON_HH
+
--- a/random.cc
+++ b/random.cc
--- a/random.hh
+++ b/random.hh
@ -0,0 +1,394 @@
+/*
+ 
+ random.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+//... for testing purposes.............
+//#define DEGRADE_RAND1
+//#define DEGRADE_RAND2
+//.....................................
+
+#ifndef __RANDOM_HH
+#define __RANDOM_HH
+
+#define DEF_RAN_CUBE_SIZE	32
+
+#include <fstream>
+#include <algorithm>
+#include <map>
+#include <omp.h>
+
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+
+#include "general.hh"
+#include "mesh.hh"
+#include "mg_operators.hh"
+#include "constraints.hh"
+
+
+/*!
+ * @brief encapsulates all things random number generator related
+ */
+template< typename T >
+class random_numbers
+{
+public:
+	unsigned 
+		res_,		//!< resolution of the full mesh
+		cubesize_,	//!< size of one independent random number cube
+		ncubes_;	//!< number of random number cubes to cover the full mesh
+	long baseseed_;	//!< base seed from which cube seeds are computed 
+	
+    
+protected:
+	//! vector of 3D meshes (the random number cubes) with random numbers
+	std::vector< Meshvar<T>* > rnums_;
+    
+    //! map of 3D indices to cube index
+    std::map<size_t,size_t> cubemap_;
+    
+    typedef std::map<size_t,size_t>::iterator cubemap_iterator;
+	
+protected:
+    
+    //! register a cube with the hash map
+    void register_cube( int i, int j, int k);
+	
+	//! fills a subcube with random numbers
+	double fill_cube( int i, int j, int k);
+	
+	//! subtract a constant from an entire cube
+	void subtract_from_cube( int i, int j, int k, double val );
+	
+	//! copy random numbers from a cube to a full grid array
+	template< class C >
+	void copy_cube( int i, int j, int k, C& dat )
+	{
+		int offi, offj, offk;
+		
+		offi = i*cubesize_;
+		offj = j*cubesize_;
+		offk = k*cubesize_;
+		
+		i = (i+ncubes_)%ncubes_;
+		j = (j+ncubes_)%ncubes_;
+		k = (k+ncubes_)%ncubes_;
+		
+		size_t icube = (i*ncubes_+j)*ncubes_+k;
+        cubemap_iterator it = cubemap_.find( icube );
+        
+        if( it == cubemap_.end() )
+        {
+            LOGERR("attempting to copy data from non-existing RND cube %d,%d,%d",i,j,k);
+            throw std::runtime_error("attempting to copy data from non-existing RND cube");
+        }
+        
+        size_t cubeidx = it->second;
+		
+		for( int ii=0; ii<(int)cubesize_; ++ii )
+			for( int jj=0; jj<(int)cubesize_; ++jj )
+				for( int kk=0; kk<(int)cubesize_; ++kk )
+					dat(offi+ii,offj+jj,offk+kk) = (*rnums_[cubeidx])(ii,jj,kk);
+	}
+	
+	//! free the memory associated with a subcube
+	void free_cube( int i, int j, int k );
+	
+	//! initialize member variables and allocate memory
+	void initialize( void );
+	
+	//! fill a cubic subvolume of the full grid with random numbers
+	double fill_subvolume( int *i0, int *n );
+	
+	//! fill an entire grid with random numbers
+	double fill_all( void );
+	
+	//! fill an external array instead of the internal field
+	template< class C >
+	double fill_all( C& dat )
+	{
+		double sum = 0.0;
+		
+        for( int i=0; i<(int)ncubes_; ++i )
+			for( int j=0; j<(int)ncubes_; ++j )
+				for( int k=0; k<(int)ncubes_; ++k )
+				{
+					int ii(i),jj(j),kk(k);
+                    register_cube(ii,jj,kk);
+                }
+        
+		#pragma omp parallel for reduction(+:sum)
+		for( int i=0; i<(int)ncubes_; ++i )
+			for( int j=0; j<(int)ncubes_; ++j )
+				for( int k=0; k<(int)ncubes_; ++k )
+				{
+					int ii(i),jj(j),kk(k);
+					
+					ii = (ii+ncubes_)%ncubes_;
+					jj = (jj+ncubes_)%ncubes_;
+					kk = (kk+ncubes_)%ncubes_;
+					
+					sum+=fill_cube(ii, jj, kk);
+					copy_cube(ii,jj,kk,dat);
+					free_cube(ii, jj, kk);
+				}
+		
+		return sum/(ncubes_*ncubes_*ncubes_);
+	}
+	
+	//! write the number of allocated random number cubes to stdout
+	void print_allocated( void );
+	
+public:
+	
+	//! constructor
+	random_numbers( unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx );	
+	
+	//! constructor for constrained fine field
+	random_numbers( random_numbers<T>& rc, unsigned cubesize, long baseseed, 
+			bool kspace=false, bool isolated=false, int *x0_=NULL, int *lx_=NULL, bool zeromean=true );
+	
+	//! constructor
+	random_numbers( unsigned res, unsigned cubesize, long baseseed, bool zeromean=true );
+	
+	
+	//! constructor to read white noise from file
+	random_numbers( unsigned res, std::string randfname, bool rndsign );
+	
+
+	//! copy constructor for averaged field (not copying) hence explicit!
+	explicit random_numbers( /*const*/ random_numbers <T>& rc, bool kdegrade = true );
+	
+	//! destructor
+	~random_numbers()
+	{
+		for( unsigned i=0; i<rnums_.size(); ++i )
+			if( rnums_[i] != NULL )
+				delete rnums_[i];
+		rnums_.clear();
+	}
+	
+	//! access a random number, this allocates a cube and fills it with consistent random numbers
+	inline T& operator()( int i, int j, int k, bool fillrand=true )
+	{
+		int ic, jc, kc, is, js, ks;
+		
+		if( ncubes_ == 0 )
+			throw std::runtime_error("random_numbers: internal error, not properly initialized");
+		
+		//... determine cube
+		ic = (int)((double)i/cubesize_ + ncubes_) % ncubes_;
+		jc = (int)((double)j/cubesize_ + ncubes_) % ncubes_;
+		kc = (int)((double)k/cubesize_ + ncubes_) % ncubes_;
+		
+		size_t icube = ((size_t)ic*ncubes_+(size_t)jc)*ncubes_+(size_t)kc;
+		
+        cubemap_iterator it = cubemap_.find( icube );
+        
+        if( it == cubemap_.end() )
+        {
+            LOGERR("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d",ic,jc,kc,i,j,k);
+            throw std::runtime_error("attempting to copy data from non-existing RND cube");
+            
+        }
+        
+        size_t cubeidx = it->second;
+        
+		if( rnums_[ cubeidx ] == NULL )
+		{
+            LOGERR("Attempting to access data from non-allocated RND cube %d,%d,%d",ic,jc,kc);
+            throw std::runtime_error("attempting to access data from non-allocated RND cube");
+		}
+		
+		//... determine cell in cube
+		is = (i - ic * cubesize_ + cubesize_) % cubesize_;
+		js = (j - jc * cubesize_ + cubesize_) % cubesize_;
+		ks = (k - kc * cubesize_ + cubesize_) % cubesize_;
+		
+        return (*rnums_[ cubeidx ])(is,js,ks);
+	}
+	
+	//! free all cubes
+	void free_all_mem( void )
+	{
+		for( unsigned i=0; i<rnums_.size(); ++i )
+			if( rnums_[i] != NULL )
+			{
+				delete rnums_[i];	
+				rnums_[i] = NULL;
+			}
+	}
+	
+	
+};
+
+
+/*!
+ * @brief encapsulates all things for multi-scale white noise generation
+ */
+template< typename rng, typename T >
+class random_number_generator
+{
+protected:
+	config_file						* pcf_;
+	refinement_hierarchy			* prefh_;
+	constraint_set					constraints;
+	
+	int								levelmin_, 
+									levelmax_, 
+									levelmin_seed_;
+	std::vector<long>				rngseeds_;
+	std::vector<std::string>		rngfnames_;
+	
+	bool							disk_cached_;
+	bool							restart_;
+	std::vector< std::vector<T>* >	mem_cache_;
+	
+	unsigned						ran_cube_size_;
+	
+
+protected:
+	
+	//! checks if the specified string is numeric
+	bool is_number(const std::string& s);
+	
+	//! parses the random number parameters in the conf file
+	void parse_rand_parameters( void );
+	
+	//! correct coarse grid averages for the change in small scale when using Fourier interpolation
+	void correct_avg( int icoarse, int ifine );
+	
+	//! the main driver routine for multi-scale white noise generation
+	void compute_random_numbers( void );
+	
+	//! store the white noise fields in memory or on disk
+	void store_rnd( int ilevel, rng* prng );
+	
+
+public:
+	
+	//! constructor
+	random_number_generator( config_file& cf, refinement_hierarchy& refh, transfer_function *ptf = NULL );	
+	
+	//! destructor
+	~random_number_generator();
+	
+	//! load random numbers to a new array
+	template< typename array >
+	void load( array& A, int ilevel )
+	{
+		if( restart_ )
+			LOGINFO("Attempting to restart using random numbers for level %d\n     from file \'wnoise_%04d.bin\'.",ilevel,ilevel);
+		
+		if( disk_cached_ )
+		{
+			char fname[128];
+			sprintf(fname,"wnoise_%04d.bin",ilevel);
+			
+			LOGUSER("Loading white noise from file \'%s\'...",fname);
+			
+			std::ifstream ifs( fname, std::ios::binary );
+			if( !ifs.good() )
+			{	
+				LOGERR("White noise file \'%s\'was not found.",fname);
+				throw std::runtime_error("A white noise file was not found. This is an internal inconsistency and bad.");
+				
+			}
+			
+			int nx,ny,nz;
+			ifs.read( reinterpret_cast<char*> (&nx), sizeof(int) );
+			ifs.read( reinterpret_cast<char*> (&ny), sizeof(int) );
+			ifs.read( reinterpret_cast<char*> (&nz), sizeof(int) );
+			
+			if( nx!=(int)A.size(0) || ny!=(int)A.size(1) || nz!=(int)A.size(2) )
+			{	
+
+			  if( nx==(int)A.size(0)*2 && ny==(int)A.size(1)*2 && nz==(int)A.size(2)*2 )
+			    {
+			      std::cerr << "CHECKPOINT" << std::endl;
+
+
+			      int ox = nx/4, oy = ny/4, oz = nz/4;
+			      std::vector<T> slice( ny*nz, 0.0 );
+
+			      for( int i=0; i<nx; ++i )
+				{
+				  ifs.read( reinterpret_cast<char*> ( &slice[0] ), ny*nz*sizeof(T) );
+			      
+				  if( i<ox ) continue;
+				  if( i>=3*ox ) break;
+
+                                  #pragma omp parallel for
+				  for( int j=oy; j<3*oy; ++j )
+				    for( int k=oz; k<3*oz; ++k )
+				      A(i-ox,j-oy,k-oz) = slice[j*nz+k];
+				}		
+			  
+			      ifs.close();	
+			    }
+			  else
+			    {
+			      LOGERR("White noise file is not aligned with array. File: [%d,%d,%d]. Mem: [%d,%d,%d].",
+				     nx,ny,nz,A.size(0),A.size(1),A.size(2));
+			      throw std::runtime_error("White noise file is not aligned with array. This is an internal inconsistency and bad.");
+			    }
+			}else{
+			
+			  for( int i=0; i<nx; ++i )
+			    {
+			      std::vector<T> slice( ny*nz, 0.0 );
+			      ifs.read( reinterpret_cast<char*> ( &slice[0] ), ny*nz*sizeof(T) );
+			      
+                              #pragma omp parallel for
+			      for( int j=0; j<ny; ++j )
+				for( int k=0; k<nz; ++k )
+				  A(i,j,k) = slice[j*nz+k];
+			      
+			    }		
+			  
+			  ifs.close();	
+			}
+		}
+		else
+		{
+			LOGUSER("Copying white noise from memory cache...");
+			
+			if( mem_cache_[ilevel-levelmin_] == NULL )
+				LOGERR("Tried to access mem-cached random numbers for level %d. But these are not available!\n",ilevel);
+			
+			int nx( A.size(0) ), ny( A.size(1) ), nz( A.size(2) );
+			
+			if ( (size_t)nx*(size_t)ny*(size_t)nz != mem_cache_[ilevel-levelmin_]->size() )
+			{
+				LOGERR("White noise file is not aligned with array. File: [%d,%d,%d]. Mem: [%d,%d,%d].",nx,ny,nz,A.size(0),A.size(1),A.size(2));
+				throw std::runtime_error("White noise file is not aligned with array. This is an internal inconsistency and bad");
+			}
+			
+			#pragma omp parallel for
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						A(i,j,k) = (*mem_cache_[ilevel-levelmin_])[((size_t)i*ny+(size_t)j)*nz+(size_t)k];
+			
+			std::vector<T>().swap( *mem_cache_[ilevel-levelmin_] );
+			delete mem_cache_[ilevel-levelmin_];
+			mem_cache_[ilevel-levelmin_] = NULL;
+			
+		}
+
+		
+	}
+};
+
+typedef random_numbers<real_t> rand_nums;
+typedef random_number_generator< rand_nums,real_t> rand_gen;
+
+
+#endif //__RANDOM_HH
+
--- a/region_generator.cc
+++ b/region_generator.cc
@ -0,0 +1,215 @@
+#include <algorithm>
+#include "region_generator.hh"
+
+std::map< std::string, region_generator_plugin_creator *>&
+get_region_generator_plugin_map()
+{
+	static std::map< std::string, region_generator_plugin_creator* > region_generator_plugin_map;
+	return region_generator_plugin_map;
+}
+
+void print_region_generator_plugins()
+{
+	std::map< std::string, region_generator_plugin_creator *>& m = get_region_generator_plugin_map();
+	std::map< std::string, region_generator_plugin_creator *>::iterator it;
+	it = m.begin();
+	std::cout << " - Available region generator plug-ins:\n";
+	while( it!=m.end() )
+	{
+		if( (*it).second )
+			std::cout << "\t\'" << (*it).first << "\'\n";
+		++it;
+	}
+}
+
+region_generator_plugin *select_region_generator_plugin( config_file& cf )
+{
+	std::string rgname = cf.getValueSafe<std::string>( "setup", "region", "box" );
+	
+	region_generator_plugin_creator *the_region_generator_plugin_creator
+	= get_region_generator_plugin_map()[ rgname ];
+	
+	if( !the_region_generator_plugin_creator )
+	{
+		std::cerr << " - Error: region generator plug-in \'" << rgname << "\' not found." << std::endl;
+		LOGERR("Invalid/Unregistered region generator plug-in encountered : %s",rgname.c_str() );
+		print_region_generator_plugins();
+		throw std::runtime_error("Unknown region generator plug-in");
+		
+	}else
+	{
+		std::cout << " - Selecting region generator plug-in \'" << rgname << "\'..." << std::endl;
+		LOGUSER("Selecting region generator plug-in  : %s",rgname.c_str() );
+	}
+	
+	region_generator_plugin *the_region_generator_plugin
+	= the_region_generator_plugin_creator->create( cf );
+	
+	return the_region_generator_plugin;
+}
+
+/*******************************************************************************/
+/*******************************************************************************/
+/*******************************************************************************/
+
+#include <cmath>
+
+class region_box_plugin : public region_generator_plugin{
+private:
+    double
+        x0ref_[3],      //!< coordinates of refinement region origin (in [0..1[)
+        lxref_[3],      //!< extent of refinement region (int [0..1[)
+        xcref_[3];
+    size_t lnref_[3];
+    bool bhave_nref_;
+    unsigned levelmin_, levelmax_;
+    bool do_extra_padding_;
+    int padding_;
+    double padding_fine_;
+
+public:
+    region_box_plugin( config_file& cf )
+    : region_generator_plugin( cf )
+    {
+        levelmin_ = pcf_->getValue<unsigned>("setup","levelmin");
+        levelmax_ = pcf_->getValue<unsigned>("setup","levelmax");
+        
+        if( levelmin_ != levelmax_ )
+        {
+            padding_ = cf.getValue<int>("setup","padding");
+
+            std::string temp;
+            
+            if( !pcf_->containsKey("setup","ref_offset") && !pcf_->containsKey("setup","ref_center") )
+            {
+                LOGERR("Found levelmin!=levelmax but neither ref_offset nor ref_center was specified.");
+                throw std::runtime_error("Found levelmin!=levelmax but neither ref_offset nor ref_center was specified.");
+            }
+            if( !pcf_->containsKey("setup","ref_extent") && !pcf_->containsKey("setup","ref_dims") )
+            {
+                LOGERR("Found levelmin!=levelmax but neither ref_extent nor ref_dims was specified.");
+                throw std::runtime_error("Found levelmin!=levelmax but neither ref_extent nor ref_dims was specified.");
+            }
+            if( pcf_->containsKey("setup","ref_extent") )
+            {
+                temp                = pcf_->getValue<std::string>( "setup", "ref_extent" );
+                std::remove_if(temp.begin(),temp.end(),isspace);
+                sscanf( temp.c_str(), "%lf,%lf,%lf", &lxref_[0],&lxref_[1],&lxref_[2] );
+                bhave_nref_ = false;
+            }else if( pcf_->containsKey("setup","ref_dims") ){
+                temp = pcf_->getValue<std::string>("setup","ref_dims");
+                std::remove_if(temp.begin(),temp.end(),isspace);
+                sscanf( temp.c_str(), "%ld,%ld,%ld", &lnref_[0],&lnref_[1],&lnref_[2] );
+                bhave_nref_ = true;
+                
+                lxref_[0] = lnref_[0] * 1.0/(double)(1<<levelmax_);
+                lxref_[1] = lnref_[1] * 1.0/(double)(1<<levelmax_);
+                lxref_[2] = lnref_[2] * 1.0/(double)(1<<levelmax_);
+            }
+            
+            if( pcf_->containsKey("setup","ref_center") )
+            {
+                temp            = pcf_->getValue<std::string>( "setup", "ref_center" );
+                std::remove_if(temp.begin(),temp.end(),isspace);
+                sscanf( temp.c_str(), "%lf,%lf,%lf", &xcref_[0], &xcref_[1], &xcref_[2] );
+                x0ref_[0] = fmod( xcref_[0]-0.5*lxref_[0]+1.0,1.0);
+                x0ref_[1] = fmod( xcref_[1]-0.5*lxref_[1]+1.0,1.0);
+                x0ref_[2] = fmod( xcref_[2]-0.5*lxref_[2]+1.0,1.0);
+                
+            }else if( pcf_->containsKey("setup","ref_offset") ){
+                temp            = pcf_->getValue<std::string>( "setup", "ref_offset" );
+                std::remove_if(temp.begin(),temp.end(),isspace);
+                sscanf( temp.c_str(), "%lf,%lf,%lf", &x0ref_[0], &x0ref_[1], &x0ref_[2] );
+                
+                xcref_[0] = fmod( x0ref_[0]+0.5*lxref_[0], 1.0 );
+                xcref_[1] = fmod( x0ref_[1]+0.5*lxref_[1], 1.0 );
+                xcref_[2] = fmod( x0ref_[2]+0.5*lxref_[2], 1.0 );
+            }
+          
+            // conditions should be added here
+            {
+                do_extra_padding_ = false;
+                std::string output_plugin = cf.getValue<std::string>("output","format");
+                if( output_plugin == std::string("grafic2") )
+                    do_extra_padding_ = true;
+                padding_fine_ = 0.0;
+                if( do_extra_padding_ )
+                    padding_fine_ = (double)(padding_+1) * 1.0/(1ul<<levelmax_);
+            }
+        }
+        else
+        {
+            x0ref_[0] = x0ref_[1] = x0ref_[2] = 0.0;
+            lxref_[0] = lxref_[1] = lxref_[2] = 1.0;
+            xcref_[0] = xcref_[1] = xcref_[2] = 0.5;
+            
+        }
+    }
+    
+    void get_AABB( double *left, double *right, unsigned level )
+    { 
+        double dx = 1.0/(1ul<<level);
+        double pad = (double)(padding_+1) * dx;
+      
+        if( ! do_extra_padding_ ) pad = 0.0;
+        
+        for( int i=0; i<3; ++i )
+        {
+            left[i] = x0ref_[i] - pad;
+            right[i] = x0ref_[i] + lxref_[i] + pad;
+        }
+    }
+  
+    void update_AABB( double *left, double *right )
+    {
+      for( int i=0; i<3; ++i )
+      {
+        double dx = right[i] - left[i];
+        if( dx < -0.5 ) dx += 1.0; else if (dx > 0.5 ) dx -= 1.0;
+        x0ref_[i] = left[i];
+        lxref_[i] = dx;
+        xcref_[i] = left[i] + 0.5 * dx;
+      }
+      //fprintf(stderr,"left = %f,%f,%f - right = %f,%f,%f\n",left[0],left[1],left[2],right[0],right[1],right[2]);
+    }
+    
+    bool query_point( double *x )
+    {
+        bool check = true;
+        double dx;
+        for( int i=0; i<3; ++i )
+        {
+            dx = x[i] - x0ref_[i];
+            if( dx < -0.5 ) dx += 1.0;
+            else if (dx > 0.5 ) dx -= 1.0;
+            
+            check &= ((dx >= padding_fine_) & (dx <= lxref_[i]-padding_fine_));
+        }
+        return check;
+    }
+    
+    bool is_grid_dim_forced( size_t* ndims )
+    {
+        for( int i=0; i<3; ++i )
+            ndims[i] = lnref_[i];
+        return bhave_nref_;
+    }
+    
+    void get_center( double *xc )
+    {
+        xc[0] = xcref_[0];
+        xc[1] = xcref_[1];
+        xc[2] = xcref_[2];
+    }
+
+  void get_center_unshifted( double *xc )
+  {
+    get_center( xc );
+  }
+};
+
+namespace{
+    region_generator_plugin_creator_concrete< region_box_plugin > creator("box");
+}
+
+
--- a/region_generator.hh
+++ b/region_generator.hh
@ -0,0 +1,80 @@
+#ifndef __REGION_GENERATOR_HH
+#define __REGION_GENERATOR_HH
+
+#include <vector>
+#include "config_file.hh"
+
+//! Abstract base class for region generators
+/*!
+ This class implements a purely virtual interface that can be
+ used to derive instances implementing various region generators.
+ */
+class region_generator_plugin{
+public:
+    config_file *pcf_;
+public:
+    region_generator_plugin( config_file& cf )
+    : pcf_( &cf )
+    {
+    }
+    
+    //! destructor
+    virtual ~region_generator_plugin() { };
+    
+    //! compute the bounding box of the region
+    virtual void get_AABB( double *left, double *right, unsigned level) = 0;
+    
+    //! query whether a point intersects the region
+    virtual bool query_point( double *x ) = 0;
+    
+    //! query whether the region generator explicitly forces the grid dimensions
+    virtual bool is_grid_dim_forced( size_t *ndims ) = 0;
+    
+    //! get the center of the region
+    virtual void get_center( double *xc ) = 0;
+
+    //! get the center of the region with a possible re-centering unapplied
+    virtual void get_center_unshifted( double *xc ) = 0;
+  
+    //! update the highres bounding box to what the grid generator actually uses
+    virtual void update_AABB( double *left, double *right ) = 0;
+};
+
+//! Implements abstract factory design pattern for region generator plug-ins
+struct region_generator_plugin_creator
+{
+	//! create an instance of a transfer function plug-in
+	virtual region_generator_plugin * create( config_file& cf ) const = 0;
+	
+	//! destroy an instance of a plug-in
+	virtual ~region_generator_plugin_creator() { }
+};
+
+//! Write names of registered region generator plug-ins to stdout
+std::map< std::string, region_generator_plugin_creator *>& get_region_generator_plugin_map();
+void print_region_generator_plugins( void );
+
+//! Concrete factory pattern for region generator plug-ins
+template< class Derived >
+struct region_generator_plugin_creator_concrete : public region_generator_plugin_creator
+{
+	//! register the plug-in by its name
+	region_generator_plugin_creator_concrete( const std::string& plugin_name )
+	{
+		get_region_generator_plugin_map()[ plugin_name ] = this;
+	}
+	
+	//! create an instance of the plug-in
+	region_generator_plugin * create( config_file& cf ) const
+	{
+		return new Derived( cf );
+	}
+};
+
+typedef region_generator_plugin region_generator;
+
+region_generator_plugin *select_region_generator_plugin( config_file& cf );
+
+extern region_generator_plugin *the_region_generator;
+
+#endif
--- a/schemes.hh
+++ b/schemes.hh
@ -0,0 +1,275 @@
+/*
+ *  schemes.hh
+ *  GravitySolver
+ *
+ *  Created by Oliver Hahn on 2/1/10.
+ *  Copyright 2010 KIPAC/Stanford University. All rights reserved.
+ *
+ */
+
+#ifndef __SCHEME_HH
+#define __SCHEME_HH
+
+#include <vector>
+#include <stdexcept>
+
+#include "solver.hh"
+
+//... abstract implementation of the Poisson/Force scheme
+template< class L, class G, typename real_t=double >
+class scheme
+{
+public:
+	typedef L laplacian;
+	typedef G gradient;
+	
+	laplacian m_laplacian;
+	gradient m_gradient;
+	
+	template< class C >
+	inline real_t grad_x( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_x( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t grad_y( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_y( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t grad_z( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_z( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t L_apply( const C&c, const int i, const int j, const int k ) 
+	{ return m_laplacian.apply( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t L_rhs( const C&c, const int i, const int j, const int k ) 
+	{ return m_laplacian.rhs( c,i,j,k ); }
+	
+	inline real_t ccoeff( void )
+	{ return m_laplacian.ccoeff(); }
+	
+};
+
+
+template< int nextent, typename T >
+class gradient
+{
+	typedef T real_t;
+	std::vector<real_t> m_stencil;
+	const unsigned nl;
+public:
+	
+	gradient()
+	: nl( 2*nextent+1 )
+	{ 
+		m_stencil.assign(nl*nl*nl,(real_t)0.0);
+	}
+	
+	real_t& operator()(int i)
+	{ return m_stencil[i+nextent]; }
+	
+	const real_t& operator()(int i) const
+	{ return m_stencil[i+nextent]; }
+	
+	template< class C >
+	inline void apply( const C& c, C& f, int dir )
+	{
+		f = c;
+		
+		int nx=c.size(0), ny=c.size(1), nz=c.size(2);		
+		double hx = 1.0/(nx+1.0), hy = 1.0/(ny+1.0), hz = 1.0/(nz+1.0);
+		
+		f.zero();
+		
+		if( dir == 0 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int ii = -nextent; ii<=nextent; ++ii )
+							f(i,j,k) += (*this)(ii) * c(i+ii,j,k)/hx;
+		else if( dir == 1 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int jj = -nextent; jj<=nextent; ++jj )
+							f(i,j,k) += (*this)(jj) * c(i,j+jj,k)/hy;
+		else if( dir == 2 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int kk = -nextent; kk<=nextent; ++kk )
+							f(i,j,k) += (*this)(kk) * c(i,j,k+kk)/hz;
+		
+	}
+};
+
+template< int nextent, typename real_t >
+class base_stencil
+{
+protected:
+	std::vector<real_t> m_stencil;
+	const unsigned nl;
+public:
+	bool m_modsource;
+	
+public:
+	base_stencil( bool amodsource = false )
+	: nl( 2*nextent+1 ), m_modsource( amodsource )
+	{
+		m_stencil.assign(nl*nl*nl,(real_t)0.0);
+	}
+	
+	real_t& operator()(int i, int j, int k)
+	{ return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; }
+	
+	const real_t& operator()(unsigned i, unsigned j, unsigned k) const
+	{ return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; }
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		real_t sum = this->apply( c, i, j, k );
+		sum -= (*this)(0,0,0) * c(i,j,k);
+		return sum;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return (*this)(0,0,0);
+	}
+	
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		real_t sum = 0.0;
+		
+		for( int ii=-nextent; ii<=nextent; ++ii )
+			for( int jj=-nextent; jj<=nextent; ++jj )
+				for( int kk=-nextent; kk<=nextent; ++kk )
+					sum += (*this)(ii,jj,kk) * c(i+ii,j+jj,k+kk);
+		
+		return sum;
+	}
+	
+	template< class C >
+	inline real_t modsource( const C& c, const int i, const int j, const int k )
+	{
+		return 0.0;
+	}
+	
+};
+
+
+/***************************************************************************************/
+/***************************************************************************************/
+/***************************************************************************************/
+
+
+//... Implementation of the Gradient schemes............................................
+
+
+template< typename real_t >
+class deriv_2P : public gradient<1,real_t>
+{
+	
+public:
+	deriv_2P( void )
+	{
+		(*this)( 0 ) =  0.0;
+		(*this)(-1 ) = -0.5;
+		(*this)(+1 ) = +0.5;		
+	}
+	
+	
+};
+
+//... Implementation of the Laplacian schemes..........................................
+
+
+template< typename real_t >
+class stencil_7P : public base_stencil<1,real_t>
+{
+	
+public:
+	stencil_7P( void )
+	{
+		(*this)( 0, 0, 0) = -6.0;
+		(*this)(-1, 0, 0) = +1.0;
+		(*this)(+1, 0, 0) = +1.0;
+		(*this)( 0,-1, 0) = +1.0;
+		(*this)( 0,+1, 0) = +1.0;
+		(*this)( 0, 0,-1) = +1.0;
+		(*this)( 0, 0,+1) = +1.0;
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k ) const
+	{
+		return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k);
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k ) const
+	{
+		return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1);
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -6.0;
+	}
+};
+
+
+template< typename real_t >
+class stencil_13P : public base_stencil<2,real_t>
+{
+	
+public:
+	stencil_13P( void )
+	{
+		(*this)( 0, 0, 0) = -90.0/12.;
+		
+		(*this)(-1, 0, 0) = 
+		(*this)(+1, 0, 0) = 
+		(*this)( 0,-1, 0) = 
+		(*this)( 0,+1, 0) = 
+		(*this)( 0, 0,-1) = 
+		(*this)( 0, 0,+1) = 16./12.;
+		
+		(*this)(-2, 0, 0) = 
+		(*this)(+2, 0, 0) = 
+		(*this)( 0,-2, 0) = 
+		(*this)( 0,+2, 0) = 
+		(*this)( 0, 0,-2) = 
+		(*this)( 0, 0,+2) = -1./12.;
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		return 
+			(-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+			 +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1))
+			 -90.0*c(i,j,k))/12.0;
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		return 
+			(-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+			 +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)))/12.0;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -90.0/12.0;
+	}
+};
+
+#endif
+
+
--- a/solver.hh
+++ b/solver.hh
--- a/tests.hh
+++ b/tests.hh
@ -0,0 +1,296 @@
+/*
+ 
+ tests.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+
+#ifndef __TESTS_HH
+#define __TESTS_HH
+
+#include <math.h>
+
+
+inline double CIC_interp_back( const MeshvarBnd<double>& A, double x, double y, double z )
+{
+	int 
+		ix  = (int)x,
+		iy  = (int)y,
+		iz  = (int)z,
+		ix1 = (ix+1),
+		iy1 = (iy+1),
+		iz1 = (iz+1);
+	
+	
+    double
+		dx = (double)(x - (double)ix),
+		dy = (double)(y - (double)iy),
+		dz = (double)(z - (double)iz),
+		tx = 1.0-dx,
+		ty = 1.0-dy,
+		tz = 1.0-dz;
+	
+    double
+		f_xyz = A(ix,iy,iz)*tx*ty*tz,
+		f_Xyz = A(ix1,iy,iz)*dx*ty*tz,
+		f_xYz = A(ix,iy1,iz)*tx*dy*tz,
+		f_xyZ = A(ix,iy,iz1)*tx*ty*dz,
+		f_XYz = A(ix1,iy1,iz)*dx*dy*tz,
+		f_XyZ = A(ix1,iy,iz1)*dx*ty*dz,
+		f_xYZ = A(ix,iy1,iz1)*tx*dy*dz,
+		f_XYZ = A(ix1,iy1,iz1)*dx*dy*dz;
+	
+    return f_xyz + f_Xyz + f_xYz + f_xyZ + f_XYz + f_XyZ + f_xYZ + f_XYZ;
+}
+
+inline double TSC_interp_back( const MeshvarBnd<double>& A, double x, double y, double z )
+{
+	double val = 0.0;
+    int xngp = (int)x, yngp = (int)y, zngp = (int)z;
+        
+	for( int xx = xngp-1; xx <= xngp+1; ++xx )
+	{
+		double weightx = 1.0;
+		double dx = fabs(x-(double)xx);
+		int axx(xx);
+		
+		if( xx==xngp )
+			weightx *= 0.75-dx*dx;
+		else{
+			weightx *= 1.125 - 1.5*dx + 0.5*dx*dx;
+		}
+		
+		for( int yy = yngp-1; yy <= yngp+1; ++yy )
+		{
+			double weighty = weightx;
+			double dy = fabs(y-(double)yy);
+			int ayy(yy);
+			
+			if( yy==yngp )
+				weighty *= 0.75-dy*dy;
+			else{
+				weighty *= 1.125 - 1.5*dy + 0.5*dy*dy;
+			}
+			
+			for( int zz = zngp-1; zz <= zngp+1; ++zz )
+			{
+				double weightz = weighty;
+				double dz = fabs(z-(double)zz);
+				int azz(zz);
+				
+				if( zz==zngp )
+					weightz *= 0.75-dz*dz;
+				else{
+					weightz *= 1.125 - 1.5*dz + 0.5*dz*dz;
+				}
+				
+				val += A(axx,ayy,azz) * weightz;
+			}
+		}
+	}
+	
+	return val;
+}
+
+class TestProblem{
+public:
+	MeshvarBnd<double> m_rho, m_uana, m_ubnd, m_xgrad, m_ygrad, m_zgrad;
+	int m_nb, m_nres;
+	double m_h;
+	
+	TestProblem( int nb, int nres )
+	: m_rho( nb, nres ), m_uana( nb, nres ), m_ubnd( nb, nres ),
+	m_xgrad( nb, nres ), m_ygrad( nb, nres ), m_zgrad( nb, nres ),
+	m_nb( nb ), m_nres( nres ), m_h( 1.0/((double)nres ) )//m_h( 1.0/((double)nres+1.0 ) )
+	{ }
+	
+};
+
+class TSC_Test : public TestProblem{
+public:
+	double m_q;
+	
+	class TSCcube{
+	public:
+		std::vector<double> m_data;
+		
+		
+		TSCcube()
+		{
+			m_data.assign(27,0.0);
+			
+			//.. center
+			(*this)( 0, 0, 0) = 27./64.;
+			
+			//.. faces
+			(*this)(-1, 0, 0) = 
+			(*this)(+1, 0, 0) = 
+			(*this)( 0,-1, 0) = 
+			(*this)( 0,+1, 0) = 
+			(*this)( 0, 0,-1) = 
+			(*this)( 0, 0,+1) = 9./128.;
+			
+			//.. edges
+			(*this)(-1,-1, 0) =
+			(*this)(-1,+1, 0) =
+			(*this)(+1,-1, 0) =
+			(*this)(+1,+1, 0) =
+			(*this)(-1, 0,-1) =
+			(*this)(-1, 0,+1) =
+			(*this)(+1, 0,-1) =
+			(*this)(+1, 0,+1) =
+			(*this)( 0,-1,-1) =
+			(*this)( 0,-1,+1) =
+			(*this)( 0,+1,-1) =
+			(*this)( 0,+1,+1) = 3./256.;
+			
+			//.. corners
+			(*this)(-1,-1,-1) =
+			(*this)(-1,+1,-1) =
+			(*this)(-1,-1,+1) =
+			(*this)(-1,+1,+1) =
+			(*this)(+1,-1,-1) =
+			(*this)(+1,+1,-1) =
+			(*this)(+1,-1,+1) =
+			(*this)(+1,+1,+1) = 1./512.;
+			
+		}
+		
+		double& operator()(int i, int j, int k)
+		{ return m_data[ ((i+1)*3+(j+1))*3 +(k+1)]; }
+		
+		const double& operator()(int i, int j, int k) const
+		{ return m_data[ ((i+1)*3+(j+1))*3 +(k+1)]; }
+	};
+	
+	TSC_Test( int nb, int nres, double q=-1.0 )
+	: TestProblem(nb, nres), m_q(q)
+	{
+		TSCcube c;
+		int xm(nres/2-1), ym(nres/2-1), zm(nres/2-1);
+		double xxm((double)xm*m_h), yym((double)ym*m_h), zzm((double)zm*m_h);
+		
+		double fourpi = 4.0*M_PI;
+		
+		m_uana.zero();
+		m_ubnd.zero();
+		m_xgrad.zero();
+		m_ygrad.zero();
+		m_zgrad.zero();
+		
+		for( int i=-nb; i<nres+nb; ++i )
+			for( int j=-nb; j<nres+nb; ++j )
+				for( int k=-nb; k<nres+nb; ++k )
+				{
+					
+					//double xxm((double)xm), yym((double)ym), zzm((double)zm);
+					double xx((double)i*m_h), yy((double)j*m_h), zz((double)k*m_h);
+					
+					
+					for( int ix=-1; ix<=1; ++ix )
+						for( int iy=-1; iy<=1; ++iy )
+							for( int iz=-1; iz<=1; ++iz )
+							{
+								double dx(xx-(xxm+ix*m_h)), dy(yy-(yym+iy*m_h)), dz(zz-(zzm+iz*m_h));
+								double d3 = pow(dx*dx+dy*dy+dz*dz,1.5);
+								
+								double dphi = m_q*c(ix,iy,iz)/sqrt(dx*dx+dy*dy+dz*dz);
+								
+								if( i==xm && j==ym && k==zm )
+									m_rho(i+ix,j+iy,k+iz) = m_q*c(ix,iy,iz)/(m_h*m_h*m_h);
+								
+								if( d3 < 1e-10 )
+									continue;
+								
+								m_uana(i,j,k) += dphi/fourpi;
+								m_ubnd(i,j,k) += dphi/fourpi;
+								
+								m_xgrad(i,j,k) -= m_q*c(ix,iy,iz)*dx/d3/fourpi;
+								m_ygrad(i,j,k) -= m_q*c(ix,iy,iz)*dy/d3/fourpi;
+								m_zgrad(i,j,k) -= m_q*c(ix,iy,iz)*dz/d3/fourpi;
+								
+								
+							}
+				}
+		
+		
+		//m_rho(xm,ym,zm) = 4.0*M_PI*m_q/(m_h*m_h*m_h);
+		
+	}
+};
+
+
+class PointMassTest : public TestProblem{
+public:
+	double m_q;
+	
+	PointMassTest( int nb, int nres, double q=-1.0 )
+	: TestProblem(nb, nres), m_q( q )
+	{
+		//int xm(nres/2-1), ym(nres/2-1), zm(nres/2-1);
+		int xm(nres/2), ym(nres/2), zm(nres/2);
+		double xxm((double)xm*m_h), yym((double)ym*m_h), zzm((double)zm*m_h);
+		m_rho.zero();
+		
+		double fourpi = 4.0*M_PI;
+		
+		for( int i=-nb; i<nres+nb; ++i )
+			for( int j=-nb; j<nres+nb; ++j )
+				for( int k=-nb; k<nres+nb; ++k )
+				{
+					
+					//double xxm((double)xm), yym((double)ym), zzm((double)zm);
+					double xx((double)i*m_h), yy((double)j*m_h), zz((double)k*m_h);
+					
+					double dx(xx-xxm), dy(yy-yym), dz(zz-zzm);
+					m_uana(i,j,k) = m_q/sqrt(dx*dx+dy*dy+dz*dz)/fourpi;///2.0/M_PI;
+					m_ubnd(i,j,k) = 0.0;//m_uana(i,j,k);
+					
+					double d3 = pow(dx*dx+dy*dy+dz*dz,1.5);
+					m_xgrad(i,j,k) = -m_q*dx/d3/fourpi;
+					m_ygrad(i,j,k) = -m_q*dy/d3/fourpi;
+					m_zgrad(i,j,k) = -m_q*dz/d3/fourpi;
+				}
+		
+		
+		for( int iy=0; iy<nres; ++iy )
+			for( int iz=0; iz<nres; ++iz )
+			{
+				double dx=0.5+0.5*m_h, dy=((double)iy+0.5)*m_h-0.5, dz=((double)iz+0.5)*m_h-0.5, d=sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(-1,iy,iz) = m_q/d/fourpi;
+				dx = 0.5-0.5*m_h;
+				d = sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(nres,iy,iz) = m_q/d/fourpi;
+			}
+		
+		for( int ix=0; ix<nres; ++ix )
+			for( int iz=0; iz<nres; ++iz )
+			{
+				double dx=((double)ix+0.5)*m_h-0.5, dy=0.5+0.5*m_h, dz=((double)iz+0.5)*m_h-0.5, d=sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(ix,-1,iz) = m_q/d/fourpi;
+				dy=0.5-0.5*m_h;
+				d = sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(ix,nres,iz) = m_q/d/fourpi;
+			}
+		
+		for( int ix=0; ix<nres; ++ix )
+			for( int iy=0; iy<nres; ++iy )
+			{
+				double dx=((double)ix+0.5)*m_h-0.5, dy=((double)iy+0.5)*m_h-0.5, dz=0.5+0.5*m_h, d=sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(ix,iy,-1) = m_q/d/fourpi;
+				dz=0.5-0.5*m_h;
+				d = sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(ix,iy,nres) = m_q/d/fourpi;
+			}
+		
+		
+		m_rho(xm,ym,zm) = m_q/(m_h*m_h*m_h);
+	}	
+};
+
+
+#endif
--- a/transfer_function.cc
+++ b/transfer_function.cc
@ -0,0 +1,63 @@
+/*
+ 
+ transfer_function.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+
+#include "transfer_function.hh"
+
+
+std::map< std::string, transfer_function_plugin_creator *>& 
+get_transfer_function_plugin_map()
+{
+	static std::map< std::string, transfer_function_plugin_creator* > transfer_function_plugin_map;
+	return transfer_function_plugin_map;
+}
+
+void print_transfer_function_plugins()
+{
+	std::map< std::string, transfer_function_plugin_creator *>& m = get_transfer_function_plugin_map();
+	std::map< std::string, transfer_function_plugin_creator *>::iterator it;
+	it = m.begin();
+	std::cout << " - Available transfer function plug-ins:\n";
+	while( it!=m.end() )
+	{
+		if( (*it).second )
+			std::cout << "\t\'" << (*it).first << "\'\n";
+		++it;
+	}
+	
+	
+}
+
+transfer_function_plugin *select_transfer_function_plugin( config_file& cf )
+{
+	std::string tfname = cf.getValue<std::string>( "cosmology", "transfer" );
+	
+	transfer_function_plugin_creator *the_transfer_function_plugin_creator 
+	= get_transfer_function_plugin_map()[ tfname ];
+	
+	if( !the_transfer_function_plugin_creator )
+	{	
+		std::cerr << " - Error: transfer function plug-in \'" << tfname << "\' not found." << std::endl;
+		LOGERR("Invalid/Unregistered transfer function plug-in encountered : %s",tfname.c_str() );
+		print_transfer_function_plugins();
+		throw std::runtime_error("Unknown transfer function plug-in");
+		
+	}else
+	{	
+		std::cout << " - Selecting transfer function plug-in \'" << tfname << "\'..." << std::endl;
+		LOGUSER("Selecting transfer function plug-in  : %s",tfname.c_str() );
+	}
+	
+	transfer_function_plugin *the_transfer_function_plugin 
+	= the_transfer_function_plugin_creator->create( cf );
+	
+	return the_transfer_function_plugin;
+}
+
--- a/transfer_function.hh
+++ b/transfer_function.hh
@ -0,0 +1,684 @@
+/*
+ 
+ transfer_function.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+*/
+
+#ifndef __TRANSFERFUNCTION_HH
+#define __TRANSFERFUNCTION_HH
+
+#include <vector>
+#include <sstream>
+#include <fstream>
+#include <iostream>
+#include <cmath>
+#include <stdexcept>
+#include <complex>
+
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_sf_gamma.h>
+
+#include "Numerics.hh"
+#include "config_file.hh"
+
+
+enum tf_type{
+	total, cdm, baryon, vcdm, vbaryon, total0
+};
+
+#define GSL_INTEGRATION_ERR 1e-5
+
+//! Abstract base class for transfer functions
+/*!
+ This class implements a purely virtual interface that can be
+ used to derive instances implementing various transfer functions.
+ */ 
+class transfer_function_plugin{
+public:
+	Cosmology cosmo_;		//!< cosmological parameter, read from config_file
+	config_file *pcf_;		//!< pointer to config_file from which to read parameters
+	bool tf_distinct_;		//!< bool if density transfer function is distinct for baryons and DM
+	bool tf_withvel_;		//!< bool if also have velocity transfer functions
+	bool tf_withtotal0_;	//!< have the z=0 spectrum for normalisation purposes
+	bool tf_velunits_;		//!< velocities are in velocity units (km/s)
+public:
+	
+	//! constructor
+	transfer_function_plugin( config_file& cf ) 
+	: pcf_( &cf ), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false)
+	{
+		real_t zstart;
+		zstart				= pcf_->getValue<real_t>( "setup", "zstart" );
+		cosmo_.astart		= 1.0/(1.0+zstart);
+		cosmo_.Omega_b		= pcf_->getValue<real_t>( "cosmology", "Omega_b" );
+		cosmo_.Omega_m		= pcf_->getValue<real_t>( "cosmology", "Omega_m" );
+		cosmo_.Omega_L		= pcf_->getValue<real_t>( "cosmology", "Omega_L" );
+		cosmo_.H0			= pcf_->getValue<real_t>( "cosmology", "H0" );
+		cosmo_.sigma8		= pcf_->getValue<real_t>( "cosmology", "sigma_8" );
+		cosmo_.nspect		= pcf_->getValue<real_t>( "cosmology", "nspec" );
+	}
+	
+	//! destructor
+	virtual ~transfer_function_plugin(){ };
+	
+	//! compute value of transfer function at waven umber
+	virtual double compute( double k, tf_type type) = 0;
+
+	//! return maximum wave number allowed
+	virtual double get_kmax( void ) = 0;
+	
+	//! return minimum wave number allowed
+	virtual double get_kmin( void ) = 0;
+	
+	//! return if density transfer function is distinct for baryons and DM
+	bool tf_is_distinct( void )
+	{	return tf_distinct_;	}
+	
+	//! return if we also have velocity transfer functions
+	bool tf_has_velocities( void )
+	{	return tf_withvel_; }
+    
+    //! return if we also have a z=0 transfer function for normalisation
+    bool tf_has_total0( void )
+    {   return tf_withtotal0_; }
+	
+	//! return if velocity returned is in velocity or in displacement units
+	bool tf_velocity_units( void )
+	{	return tf_velunits_; }
+};
+
+
+//! Implements abstract factory design pattern for transfer function plug-ins
+struct transfer_function_plugin_creator
+{
+	//! create an instance of a transfer function plug-in
+	virtual transfer_function_plugin * create( config_file& cf ) const = 0;
+	
+	//! destroy an instance of a plug-in 
+	virtual ~transfer_function_plugin_creator() { }
+};
+
+//! Write names of registered transfer function plug-ins to stdout
+std::map< std::string, transfer_function_plugin_creator *>& get_transfer_function_plugin_map();
+void print_transfer_function_plugins( void );
+
+//! Concrete factory pattern for transfer function plug-ins
+template< class Derived >
+struct transfer_function_plugin_creator_concrete : public transfer_function_plugin_creator
+{
+	//! register the plug-in by its name 
+	transfer_function_plugin_creator_concrete( const std::string& plugin_name )
+	{
+		get_transfer_function_plugin_map()[ plugin_name ] = this;
+	}
+	
+	//! create an instance of the plug-in 
+	transfer_function_plugin * create( config_file& cf ) const
+	{
+		return new Derived( cf );
+	}
+};
+
+typedef transfer_function_plugin transfer_function;
+
+transfer_function_plugin *select_transfer_function_plugin( config_file& cf );
+
+
+/**********************************************************************/
+/**********************************************************************/
+/**********************************************************************/
+
+//! k-space transfer function
+class TransferFunction_k
+{
+public:
+	static transfer_function *ptf_;
+	static real_t nspec_;
+	double pnorm_, sqrtpnorm_;
+	static tf_type type_;
+	
+	TransferFunction_k( tf_type type, transfer_function *tf, real_t nspec, real_t pnorm )
+	: pnorm_(pnorm)
+	{
+		ptf_ = tf;
+		nspec_ = nspec;
+		sqrtpnorm_ = sqrt( pnorm_ );
+		type_ = type;
+
+		std::string fname("input_powerspec_cdm.txt");
+		
+		if( type == baryon )
+			fname = "input_powerspec_baryon.txt";
+		if( type == total )
+			fname = "input_powerspec_total.txt";
+		
+		if( type == cdm || type == baryon || type == total )
+		{
+			std::ofstream ofs(fname.c_str());
+			double kmin=-3, kmax=3, dk=(kmax-kmin)/100.;
+			
+			for( int i=0; i<100; ++i )
+			{ 
+				double k = pow(10.0,kmin+i*dk);
+				ofs << std::setw(16) << k 
+			    << std::setw(16) << pow(sqrtpnorm_*pow(k,0.5*nspec_)*ptf_->compute(k,type_),2)
+			    << std::endl;
+			}
+			
+		}
+
+
+	}
+	
+	inline real_t compute( real_t k ) const
+	{
+		return sqrtpnorm_*pow(k,0.5*nspec_)*ptf_->compute(k,type_);
+	}
+};
+
+
+/**********************************************************************/
+/**********************************************************************/
+/**********************************************************************/
+
+#define NZERO_Q
+typedef std::complex<double> complex;
+class TransferFunction_real
+{
+	
+public:
+	
+	double Tr0_;
+	real_t Tmin_, Tmax_, Tscale_;
+	real_t rneg_, rneg2_, kny_;
+	static transfer_function *ptf_;
+	static real_t nspec_;
+	
+protected:
+	
+	real_t krgood( real_t mu, real_t q, real_t dlnr, real_t kr )
+	{
+		double krnew = kr;
+		complex cdgamma, zm, zp;
+		double arg, iarg, xneg, xpos, y;
+		gsl_sf_result g_a, g_p;
+		
+		xpos = 0.5*(mu+1.0+q);
+		xneg = 0.5*(mu+1.0-q);
+		y = M_PI/(2.0*dlnr);
+		zp=complex(xpos,y);
+		zm=complex(xneg,y);
+		
+		gsl_sf_lngamma_complex_e (zp.real(), zp.imag(), &g_a, &g_p);
+		zp=std::polar(exp(g_a.val),g_p.val);
+		real_t zpa = g_p.val;
+
+		gsl_sf_lngamma_complex_e (zm.real(), zm.imag(), &g_a, &g_p);
+		zm=std::polar(exp(g_a.val),g_p.val);
+		real_t zma = g_p.val;
+		
+		arg=log(2.0/kr)/dlnr+(zpa+zma)/M_PI;
+		iarg=(real_t)((int)(arg + 0.5));
+		
+		if( arg!=iarg )
+			krnew=kr*exp((arg-iarg)*dlnr);
+		
+		return krnew;
+	}
+	
+	void transform( real_t pnorm, unsigned N, real_t q, std::vector<double>& rr, std::vector<double>& TT )
+	{
+		const double mu = 0.5;
+		double qmin = 1.0e-7, qmax = 1.0e+7;
+		
+		q = 0.0;
+		
+		//N = 16384;
+		N = 1<<12;
+		
+#ifdef NZERO_Q
+		q=0.4;
+		//q=-0.1;
+#endif
+		
+		double kmin = qmin, kmax=qmax;
+		double rmin = qmin, rmax = qmax;
+		double k0 = exp(0.5*(log(kmax)+log(kmin)));
+		double r0 = exp(0.5*(log(rmax)+log(rmin)));
+		double L = log(rmax)-log(rmin);
+		double k0r0 = k0*r0;
+		double dlnk = L/N, dlnr = L/N;
+		
+		double sqrtpnorm = sqrt(pnorm);
+		
+		double dir = 1.0;
+		
+		double fftnorm = 1.0/N;
+		
+		fftw_complex *in, *out;
+		
+		in = new fftw_complex[N];
+		out = new fftw_complex[N];
+		
+		//... perform anti-ringing correction from Hamilton (2000)
+		k0r0 = krgood( mu, q, dlnr, k0r0 );
+		
+		std::string ofname;
+		switch( type_ )
+		{
+			case cdm:		
+				ofname = "input_powerspec_cdm.txt"; break;
+			case baryon:	
+				ofname = "input_powerspec_baryon.txt"; break;
+			case total:		
+				ofname = "input_powerspec_total.txt"; break;
+			case vcdm:		
+				ofname = "input_powerspec_vcdm.txt"; break;
+			case vbaryon:	
+				ofname = "input_powerspec_vbaryon.txt"; break;
+			default:
+				throw std::runtime_error("Unknown transfer function type in TransferFunction_real::transform");
+		}
+			
+		
+		std::ofstream ofsk(ofname.c_str());
+		double sum_in = 0.0;
+		
+		for( unsigned i=0; i<N; ++i )
+		{
+			double k = k0*exp(((int)i - (int)N/2+1) * dlnk);
+			double T = ptf_->compute( k, type_ );
+			double del = sqrtpnorm*T*pow(k,0.5*nspec_);
+			
+			RE(in[i]) = del*pow(k,1.5-q);
+			IM(in[i]) = 0.0;
+			
+			sum_in += RE(in[i]);	
+			
+			ofsk << std::setw(16) << k <<std::setw(16) << del*del << std::setw(16) << T << std::endl;
+		}
+		ofsk.close();
+		
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+		fftwf_plan p,ip;
+		p = fftwf_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
+		ip = fftwf_plan_dft_1d(N, out, in, FFTW_BACKWARD, FFTW_ESTIMATE);
+		fftwf_execute(p);
+	#else
+		fftw_plan p,ip;
+		p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE);
+		ip = fftw_plan_dft_1d(N, out, in, FFTW_BACKWARD, FFTW_ESTIMATE);
+		fftw_execute(p);
+	#endif
+#else
+		fftw_plan p,ip;
+		p = fftw_create_plan(N, FFTW_FORWARD, FFTW_ESTIMATE);
+		ip = fftw_create_plan(N, FFTW_BACKWARD, FFTW_ESTIMATE);
+		fftw_one(p, in, out);
+#endif
+		
+		//... compute the Hankel transform by convolution with the Bessel function
+		for( unsigned i=0; i<N; ++i )
+		{
+			int ii=i;
+			if( ii > (int)N/2 )
+				ii -= N;
+			
+#ifndef NZERO_Q
+			double y=ii*M_PI/L;
+			complex zp((mu+1.0)*0.5,y);
+			gsl_sf_result g_a, g_p;
+			gsl_sf_lngamma_complex_e(zp.real(), zp.imag(), &g_a, &g_p);
+			
+			double arg = 2.0*(log(2.0/k0r0)*y+g_p.val);
+			//complex cu = complex(out[i].re,out[i].im)*std::polar(1.0,arg);
+			//out[i].re = cu.real()*fftnorm;
+			//out[i].im = cu.imag()*fftnorm;
+			
+			complex cu = complex( RE(out[i]), IM(out[i]) ) * std::polar(1.0,arg);
+			RE(out[i]) = cu.real()*fftnorm;
+			IM(out[i]) = cu.imag()*fftnorm;
+			
+			
+#else		
+			complex x(dir*q, (double)ii*2.0*M_PI/L);
+			gsl_sf_result g_a, g_p;
+			
+			complex g1, g2, garg, U, phase;						
+			complex twotox = pow(complex(2.0,0.0),x);
+			
+			/////////////////////////////////////////////////////////
+			//.. evaluate complex Gamma functions
+			
+			garg = 0.5*(mu+1.0+x);
+			gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p);
+			g1 = std::polar(exp(g_a.val),g_p.val);
+
+			
+			garg = 0.5*(mu+1.0-x);
+			gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p);
+			g2 = std::polar(exp(g_a.val),g_p.val);
+
+			/////////////////////////////////////////////////////////
+			//.. compute U
+			
+			if( (fabs(g2.real()) < 1e-19 && fabs(g2.imag()) < 1e-19) )
+			{
+				//std::cerr << "Warning : encountered possible singularity in TransferFunction_real::transform!\n";
+				g1 = 1.0; g2 = 1.0;
+			}
+			
+			
+			U = twotox * g1 / g2;
+			phase = pow(complex(k0r0,0.0),complex(0.0,2.0*M_PI*(double)ii/L));
+			
+			complex cu = complex(RE(out[i]),IM(out[i]))*U*phase*fftnorm;
+			
+			RE(out[i]) = cu.real();
+			IM(out[i]) = cu.imag();
+
+			/*if( (RE(out[i]) != RE(out[i]))||(IM(out[i]) != IM(out[i])) )
+			{	std::cerr << "NaN @ i=" << i << ", U= " << U << ", phase = " << phase << ", g1 = " << g1 << ", g2 = " << g2 << std::endl;
+				std::cerr << "mu+1+q = " << mu+1.0+q << std::endl;
+				//break;
+			}*/
+			
+#endif
+
+		}
+		
+#ifdef FFTW3
+	#ifdef SINGLE_PRECISION
+		fftwf_execute(ip);
+	#else
+		fftw_execute(ip);
+	#endif
+#else
+		fftw_one(ip, out, in);
+#endif
+		
+		rr.assign(N,0.0);
+		TT.assign(N,0.0);
+		
+		r0 = k0r0/k0;
+		
+		for( unsigned i=0; i<N; ++i )
+		{
+			int ii = i;
+			ii -= N/2-1;
+			double r = r0*exp(-ii*dlnr);
+			
+			rr[N-i-1] = r;
+			TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) *  RE(in[i]) * pow(r,-(1.5+q));
+		}
+		
+		
+		
+		{
+			std::string fname;
+			if(type_==total) fname = "transfer_real_total.txt";
+			if(type_==cdm) fname = "transfer_real_cdm.txt";
+			if(type_==baryon) fname = "transfer_real_baryon.txt";
+			if(type_==vcdm) fname = "transfer_real_vcdm.txt";
+			if(type_==vbaryon) fname = "transfer_real_vbaryon.txt";
+			
+			std::ofstream ofs(fname.c_str());
+							  
+			for( unsigned i=0; i<N; ++i )
+			{
+				int ii = i;
+				ii -= N/2-1;
+				
+				double r = r0*exp(-ii*dlnr);//r0*exp(ii*dlnr);
+
+				double T = 4.0*M_PI* sqrt(M_PI/2.0) *  RE(in[i]) * pow(r,-(1.5+q));
+				ofs << r << "\t\t" << T << "\t\t" << IM(in[i]) << std::endl;				
+			}
+		}
+		
+		delete[] in;
+		delete[] out;
+		
+#if defined(FFTW3) && defined(SINGLE_PRECISION)
+		fftwf_destroy_plan(p);
+		fftwf_destroy_plan(ip);
+#else
+		fftw_destroy_plan(p);
+		fftw_destroy_plan(ip);
+#endif
+	}
+	std::vector<real_t> m_xtable,m_ytable,m_dytable;
+	double m_xmin, m_xmax, m_dx, m_rdx;
+	static tf_type type_;
+	
+	
+public:
+	
+	TransferFunction_real( double boxlength, int nfull, tf_type type, transfer_function *tf, 
+						   real_t nspec, real_t pnorm, real_t rmin, real_t rmax, real_t knymax, unsigned nr )
+	{
+		real_t q = 0.8;
+		
+		ptf_	= tf;
+		nspec_	= nspec;
+		type_	= type;
+		kny_	= knymax;
+		
+		
+		/*****************************************************************/
+		//... compute the FFTlog transform of the k^n T(k) kernel
+
+		std::vector<double> r,T;
+		transform( pnorm, nr, q, r, T );
+		
+		gsl_set_error_handler_off ();
+		
+		
+		/*****************************************************************/
+		//... compute T(r=0) by 3D k-space integration
+		{
+			const double REL_PRECISION=1.e-5;
+			
+			gsl_integration_workspace * ws = gsl_integration_workspace_alloc (1000);
+			gsl_function ff;
+			
+			double kmin = 2.0*M_PI/boxlength;
+			double kmax = nfull*M_PI/boxlength;
+
+			//... integrate 0..kmax
+			double a[6];
+			a[3] = 0.1*kmin;
+			a[4] = kmax;
+			  
+			ff.function = &call_x;
+			ff.params = reinterpret_cast<void*> (a);
+			double res, err, res2, err2;
+			
+			gsl_integration_qags( &ff, a[3], a[4], 0.0, GSL_INTEGRATION_ERR, 1000, ws, &res, &err );
+			
+			if( err/res > REL_PRECISION )
+				std::cerr << " - Warning: no convergence in \'TransferFunction_real\', rel. error=" << err/res << std::endl;
+			
+			//... integrate 0..kmin
+			a[3] = 0.1*kmin;
+			a[4] = kmin;
+			gsl_integration_qags( &ff, a[3], a[4], 0.0, GSL_INTEGRATION_ERR, 1000, ws, &res2, &err2 );
+			
+			if( err2/res2 > 10*REL_PRECISION )
+				std::cerr << " - Warning: no convergence in \'TransferFunction_real\', rel. error=" << err2/res2 << std::endl;
+
+			gsl_integration_workspace_free ( ws );
+
+			//.. get kmin..kmax
+			res -= res2;
+			//.. *8 because we only integrated one octant
+			res *= 8.0*sqrt(pnorm);
+			Tr0_ = res;
+		}
+		
+		/*****************************************************************/
+		//... store as table for spline interpolation
+		
+		gsl_interp_accel *accp;
+		gsl_spline *splinep;
+		
+		std::vector<double> xsp, ysp;
+		
+		for( unsigned i=0; i<r.size(); ++i )
+		{
+			if( r[i] > rmin && r[i] < rmax )
+			{
+				xsp.push_back( log10(r[i]) );
+				ysp.push_back( T[i]*r[i]*r[i] );
+			}
+			
+		}
+		
+		accp = gsl_interp_accel_alloc ();
+
+		//... spline interpolation is only marginally slower here
+		splinep = gsl_spline_alloc (gsl_interp_akima, xsp.size() );
+
+		//... set up everything for spline interpolation
+		gsl_spline_init (splinep, &xsp[0], &ysp[0], xsp.size() );
+		
+		//.. build lookup table using spline interpolation
+		m_xmin = log10(rmin);
+		m_xmax = log10(rmax);
+		m_dx   = (m_xmax-m_xmin)/nr;
+		m_rdx  = 1.0/m_dx;
+		
+		for(unsigned i=0; i<nr; ++i )
+		{
+			m_xtable.push_back( m_xmin+i*m_dx );
+			m_ytable.push_back( gsl_spline_eval(splinep, (m_xtable.back()), accp) );
+		}
+		
+		for(unsigned i=0; i<nr-1; ++i )
+		{
+			real_t dy,dr;
+			dy = m_ytable[i+1]/pow(m_xtable[i+1],2)-m_ytable[i]/pow(m_xtable[i],2);
+			dr = pow(10.0,m_xtable[i+1])-pow(10.0,m_xtable[i]);
+			m_dytable.push_back(dy/dr);
+		}
+		
+		gsl_spline_free (splinep);
+		gsl_interp_accel_free (accp);
+	}
+	
+	
+	static double call_wrapper( double k, void *arg )
+	{
+		double *a = (double*)arg;
+
+		double T = ptf_->compute( k, type_ );
+		
+		return 4.0*M_PI*a[0]*T*pow(k,0.5*nspec_)*k*k;
+	}
+	
+	
+	
+	static double call_x( double kx, void *arg )
+	{
+		gsl_integration_workspace * wx = gsl_integration_workspace_alloc (1000);
+		
+		double *a = (double*)arg;
+		double kmin = a[3], kmax = a[4];
+		
+		a[0] = kx;
+		
+		gsl_function FX;
+		FX.function = &call_y;
+		FX.params = reinterpret_cast<void*> (a);
+		
+		double resx, errx;
+		gsl_integration_qags( &FX, kmin, kmax, 0.0, GSL_INTEGRATION_ERR, 1000, wx, &resx, &errx );
+							 
+		gsl_integration_workspace_free (wx);
+		
+		return resx;
+	}
+	
+	static double call_y( double ky, void *arg )
+	{
+		gsl_integration_workspace * wy = gsl_integration_workspace_alloc (1000);
+		
+		double *a = (double*)arg;
+		double kmin = a[3], kmax = a[4];
+		
+		a[1] = ky;
+		
+		gsl_function FY;
+		FY.function = &call_z;
+		FY.params = reinterpret_cast<void*> (a);
+		
+		double resy, erry;
+		gsl_integration_qags( &FY, kmin, kmax, 0.0, GSL_INTEGRATION_ERR, 1000, wy, &resy, &erry );
+		
+		gsl_integration_workspace_free (wy);
+		
+		return resy;
+	}
+	
+	static double call_z( double kz, void *arg )
+	{
+		double *a = (double*)arg;
+		double kx = a[0], ky = a[1];
+		
+		double kk = sqrt(kx*kx+ky*ky+kz*kz);
+		double T = ptf_->compute( kk, type_ );
+		
+		return pow(kk,0.5*nspec_)*T;
+		
+	}
+	
+	~TransferFunction_real()
+	{ }
+
+	inline real_t get_grad( real_t r2 ) const
+	{
+		double r = 0.5*fast_log10(r2);
+		double ii = (r-m_xmin)*m_rdx;
+		int i = (int)ii;
+		
+		i=std::max(0,i);
+		i=std::min(i, (int)m_xtable.size()-2);
+		
+		return (real_t)m_dytable[i];
+	}
+	
+	//... fast version
+	inline real_t compute_real( real_t r2 ) const
+	{
+		const double EPS = 1e-8;
+		const double Reps2 = EPS*EPS;
+		
+		if( r2 <Reps2 )
+			return Tr0_;
+		
+		//double r = 0.5*log10(r2);
+		double r = 0.5*fast_log10(r2);
+		
+		double ii = (r-m_xmin)*m_rdx;
+		int i = (int)ii;
+		
+		i=std::max(0,i);
+		i=std::min(i, (int)m_xtable.size()-2);
+		
+		double y1,y2;
+		y1 = m_ytable[i];
+		y2 = m_ytable[i+1];
+		
+		//divide by r**2 because r^2 T is tabulated
+		return (real_t)((y1 + (y2-y1)*(ii-(double)i))/r2);
+	}
+};
+
+
+#endif