From c48c73488abc7f64345c4397d9c175381dc4721d Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 2 Jul 2010 11:49:30 -0700
Subject: [PATCH] Initial commit, beta version release candidate

---
 Makefile                       |   61 ++
 Numerics.cc                    |  154 ++++
 Numerics.hh                    |  204 +++++
 TransferFunction.hh            | 1318 +++++++++++++++++++++++++++++
 config_file.hh                 |  327 ++++++++
 convolution_kernel.cc          |  583 +++++++++++++
 convolution_kernel.hh          |  116 +++
 cosmology.cc                   |  222 +++++
 cosmology.hh                   |  224 +++++
 densities.cc                   |  837 +++++++++++++++++++
 densities.hh                   |  827 +++++++++++++++++++
 fd_schemes.hh                  |  408 +++++++++
 general.hh                     |  121 +++
 main.cc                        |  605 ++++++++++++++
 mesh.hh                        | 1181 ++++++++++++++++++++++++++
 mg_interp.hh                   | 1267 ++++++++++++++++++++++++++++
 mg_operators.hh                | 1362 ++++++++++++++++++++++++++++++
 mg_solver.hh                   | 1051 +++++++++++++++++++++++
 output.cc                      |   73 ++
 output.hh                      |  176 ++++
 plugins/HDF_IO.hh              | 1024 +++++++++++++++++++++++
 plugins/output_enzo.cc         |  396 +++++++++
 plugins/output_gadget2.cc      |  670 +++++++++++++++
 plugins/output_generic.cc      |  192 +++++
 plugins/output_grafic2.cc      |  325 ++++++++
 plugins/transfer_bbks.cc       |   95 +++
 plugins/transfer_camb.cc       |  167 ++++
 plugins/transfer_eisenstein.cc |  218 +++++
 plugins/transfer_inflation.cc  |   64 ++
 poisson.cc                     |  499 +++++++++++
 poisson.hh                     |  140 ++++
 random.hh                      |  459 +++++++++++
 schemes.hh                     |  275 +++++++
 solver.hh                      | 1110 +++++++++++++++++++++++++
 tests.hh                       |  309 +++++++
 transfer_function.cc           |   72 ++
 transfer_function.hh           | 1420 ++++++++++++++++++++++++++++++++
 37 files changed, 18552 insertions(+)
 create mode 100644 Makefile
 create mode 100644 Numerics.cc
 create mode 100644 Numerics.hh
 create mode 100644 TransferFunction.hh
 create mode 100644 config_file.hh
 create mode 100644 convolution_kernel.cc
 create mode 100644 convolution_kernel.hh
 create mode 100644 cosmology.cc
 create mode 100644 cosmology.hh
 create mode 100644 densities.cc
 create mode 100644 densities.hh
 create mode 100644 fd_schemes.hh
 create mode 100644 general.hh
 create mode 100644 main.cc
 create mode 100644 mesh.hh
 create mode 100644 mg_interp.hh
 create mode 100644 mg_operators.hh
 create mode 100644 mg_solver.hh
 create mode 100644 output.cc
 create mode 100644 output.hh
 create mode 100755 plugins/HDF_IO.hh
 create mode 100644 plugins/output_enzo.cc
 create mode 100644 plugins/output_gadget2.cc
 create mode 100644 plugins/output_generic.cc
 create mode 100644 plugins/output_grafic2.cc
 create mode 100644 plugins/transfer_bbks.cc
 create mode 100644 plugins/transfer_camb.cc
 create mode 100644 plugins/transfer_eisenstein.cc
 create mode 100644 plugins/transfer_inflation.cc
 create mode 100644 poisson.cc
 create mode 100644 poisson.hh
 create mode 100644 random.hh
 create mode 100644 schemes.hh
 create mode 100644 solver.hh
 create mode 100644 tests.hh
 create mode 100644 transfer_function.cc
 create mode 100644 transfer_function.hh

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..93a2de6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,61 @@
+##############################################################################
+### compile time configuration options
+MULTITHREADFFTW	= yes
+SINGLEPRECISION	= no
+HAVEHDF5        = yes
+
+##############################################################################
+### compiler and path settings
+CC      = g++
+OPT     = -O3
+CFLAGS  = -Wall -fopenmp 
+LFLAGS  = -fopenmp -lgsl -lgslcblas 
+CPATHS  = -I. -I$(HOME)/local/include -I/opt/local/include -I/usr/local/include
+LPATHS  = -L$(HOME)/local/lib -L/opt/local/lib -L/usr/local/lib
+
+##############################################################################
+# if you have FFTW 2.1.5 with multi-thread support, you can enable the option
+ifeq ($(MULTITHREADFFTW), yes)
+  ifeq ($(SINGLEPRECISION), yes)
+    LFLAGS  += -lsfftw_threads -lsrfftw_threads
+  else
+    LFLAGS  += -ldfftw_threads -ldrfftw_threads
+  endif
+else
+  CFLAGS  += -DSINGLETHREAD_FFTW
+endif
+
+##############################################################################
+# this section makes sure that the correct FFTW libraries are linked
+ifeq ($(SINGLEPRECISION), yes)
+  CFLAGS  += -DSINGLE_PRECISION
+  LFLAGS  += -lsrfftw -lsfftw
+else
+  LFLAGS  += -ldrfftw -ldfftw
+endif
+
+##############################################################################
+#if you have HDF5 installed, you can also enable the following options
+ifeq ($(HAVEHDF5), yes)
+  OPT += -DH5_USE_16_API -DHAVE_HDF5
+  LFLAGS += -lhdf5
+endif
+
+##############################################################################
+CFLAGS += $(OPT)
+TARGET  = MUSIC
+OBJS    = output.o transfer_function.o Numerics.o \
+		convolution_kernel.o densities.o cosmology.o main.o \
+		$(patsubst plugins/%.cc,plugins/%.o,$(wildcard plugins/*.cc))
+
+##############################################################################
+all: $(OBJS) $(TARGET)
+
+$(TARGET): $(OBJS)
+	$(CC) $(LPATHS) -o $@ $^ $(LFLAGS)
+%.o: %.cc *.hh Makefile 
+	$(CC) $(CFLAGS) $(CPATHS) -c $< -o $@
+
+clean:
+	rm -rf $(OBJS)
+
diff --git a/Numerics.cc b/Numerics.cc
new file mode 100644
index 0000000..8692ce0
--- /dev/null
+++ b/Numerics.cc
@@ -0,0 +1,154 @@
+/*
+ 
+ numerics.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifdef WITH_MPI
+  #ifdef MANNO
+    #include <mpi.h>
+  #else
+    #include <mpi++.h>
+  #endif
+#endif
+#include <iostream>
+#include "Numerics.hh"
+
+#ifndef REL_PRECISION
+#define REL_PRECISION 1.e-4
+#endif
+
+int Base_interp::locate(const double x)
+{
+	int ju,jm,jl; 
+	if (n < 2 || mm < 2 || mm > n) throw("locate size error");
+	bool ascnd=(xx[n-1] >= xx[0]); 
+	jl=0; 
+	ju=n-1; 
+	while (ju-jl > 1) {
+		jm = (ju+jl) >> 1; 
+		if ((x >= xx[jm]) == ascnd)
+			jl=jm; 
+		else
+			ju=jm;
+	}
+	cor = abs(jl-jsav) > dj ? 0 : 1; 
+	jsav = jl; 
+	return std::max(0,std::min(n-mm,jl-((mm-2)>>1)));
+}
+
+int Base_interp::hunt(const double x)
+{
+	int jl=jsav, jm, ju, inc=1; 
+	if (n < 2 || mm < 2 || mm > n) throw("hunt size error");
+	
+	bool ascnd=(xx[n-1] >= xx[0]); 
+	
+	if (jl < 0 || jl > n-1) {
+		jl=0;
+		ju=n-1; 
+	} else {
+		if ((x >= xx[jl]) == ascnd) {
+			for (;;) {
+				ju = jl + inc; 
+				if (ju >= n-1) { 
+					ju = n-1; 
+					break;
+				} else if ((x < xx[ju]) == ascnd) break;
+				else {
+					jl = ju;
+					inc += inc;
+				}
+			}
+		} else {
+			ju = jl; 
+			for (;;) {
+				jl = jl - inc;
+				if (jl <= 0) { jl = 0; break;}
+				else if ((x >= xx[jl]) == ascnd) break;
+				else {
+					ju = jl; 
+					inc += inc;
+				}
+			}
+		}
+	}
+	
+	while (ju-jl > 1) {
+		jm = (ju+jl) >> 1; 
+		if ((x >= xx[jm]) == ascnd)
+			jl=jm; 
+		else
+			ju=jm;
+		
+	}
+	
+	cor = abs(jl-jsav) > dj ? 0 : 1; 
+	jsav = jl; 
+	
+	return std::max(0,std::min(n-mm,jl-((mm-2)>>1)));
+}
+
+#if 1
+real_t integrate( double (* func) (double x, void * params), double a, double b, void *params )
+{
+  gsl_function F;
+  F.function = func;
+  F.params = params;//NULL;
+
+  double result;
+  double error;
+	//size_t neval;
+  gsl_integration_workspace *w = gsl_integration_workspace_alloc(100000);
+  gsl_integration_qag( &F, a, b, 0, REL_PRECISION, 100000, 6, w, &result, &error );
+	//gsl_integration_qng( &F, a, b, 0, REL_PRECISION, &result, &error, &neval );
+
+	//gsl_integration_qags( &F, a, b, 0, REL_PRECISION, 10000, w, &result, &error );
+	
+  gsl_integration_workspace_free(w);
+
+
+  if( error > 10*REL_PRECISION )
+    std::cerr << " - Warning: no convergence in function 'integrate', rel. error=" << error/result << std::endl;
+
+  return (real_t)result;
+}
+#else
+
+real_t integrate( double (* func) (double x, void * params), double a, double b, void *params )
+{
+	unsigned nn = 1000;
+	double la = log10(a), lb = log10(b), dlk = (lb-la)/(nn-1);
+	
+	double sum = 0.0;
+	for( unsigned i=1; i<nn; ++i )
+	{
+		
+		double xr = pow(10.0, la+i*dlk );
+		double xl = pow(10.0, la+(i-1)*dlk );
+		
+		
+		sum += (xr-xl)*func(0.5*(xl+xr),params);
+		
+	}
+	return sum;
+}
+
+#endif
diff --git a/Numerics.hh b/Numerics.hh
new file mode 100644
index 0000000..37c46ea
--- /dev/null
+++ b/Numerics.hh
@@ -0,0 +1,204 @@
+/*
+ 
+ numerics.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#ifndef __NUMERICS_HH
+#define __NUMERICS_HH
+
+#ifdef WITH_MPI
+  #ifdef MANNO
+    #include <mpi.h>
+  #else
+    #include <mpi++.h>
+  #endif
+#endif
+
+#include <cmath>
+#include <gsl/gsl_integration.h>
+#include <vector>
+#include <algorithm>
+#include "general.hh"
+
+
+real_t integrate( double (* func) (double x, void * params), double a, double b, void *params=NULL);
+
+struct Base_interp
+{
+	int n, mm, jsav, cor, dj; 
+	const double *xx, *yy; 
+	
+	Base_interp(std::vector<double> &x, const double *y, int m)
+	: n(x.size()), mm(m), jsav(0), cor(0), dj(0), xx(&x[0]), yy(y) 
+	{ 
+		dj = std::min(1,(int)pow((double)n,0.25));
+	}
+
+	double interp(double x) {
+		int jlo = cor ? hunt(x) : locate(x); 
+		return rawinterp(jlo,x);
+	}
+	
+	virtual ~Base_interp()
+	{ }
+	
+	int locate(const double x);
+	int hunt(const double x);
+
+	double virtual rawinterp(int jlo, double x) = 0;
+
+};
+
+
+
+
+class Spline_interp : public Base_interp
+{
+protected:
+	std::vector<double> y2;
+	
+	void sety2( const double *xv, const double *yv, double yp1, double ypn)
+	{
+		int i,k; 
+		double p,qn,sig,un; 
+		//int n=y2.size(); 
+		std::vector<double> u(n-1,0.0); 
+		
+		if (yp1 > 0.99e99)
+			y2[0]=u[0]=0.0; 
+		else {
+			y2[0] = -0.5; 
+			u[0]=(3.0/(xv[1]-xv[0]))*((yv[1]-yv[0])/(xv[1]-xv[0])-yp1);
+		} 
+		
+		for (i=1;i<n-1;i++) {
+			sig=(xv[i]-xv[i-1])/(xv[i+1]-xv[i-1]); 
+			p=sig*y2[i-1]+2.0; 
+			y2[i]=(sig-1.0)/p; 
+			u[i]=(yv[i+1]-yv[i])/(xv[i+1]-xv[i]) - (yv[i]-yv[i-1])/(xv[i]-xv[i-1]); 
+			u[i]=(6.0*u[i]/(xv[i+1]-xv[i-1])-sig*u[i-1])/p;
+		} 
+		
+		if (ypn > 0.99e99)
+			qn=un=0.0; 
+		else {
+			qn=0.5; 
+			un=(3.0/(xv[n-1]-xv[n-2]))*(ypn-(yv[n-1]-yv[n-2])/(xv[n-1]-xv[n-2]));
+		} 
+		
+		y2[n-1]=(un-qn*u[n-2])/(qn*y2[n-2]+1.0); 
+		
+		for (k=n-2;k>=0;k--)
+			y2[k]=y2[k]*y2[k+1]+u[k];
+	}
+	
+	
+public:
+	
+	Spline_interp( std::vector<double> &xv, std::vector<double> &yv, double yp1=1e99, double ypn=1.e99 )
+	: Base_interp( xv, &yv[0], xv.size() ), y2( xv.size(), 0.0 )
+	{
+		sety2( &xv[0], &yv[0], yp1, ypn );
+	}
+	
+	virtual ~Spline_interp()
+	{ }
+
+	double rawinterp( int jl, double x )
+	{
+		int klo=jl,khi=jl+1; 
+		double y,h,b,a; 
+		
+		h=xx[khi]-xx[klo]; 
+		
+		if (h == 0.0) throw("Bad input to routine splint"); 
+		
+		a=(xx[khi]-x)/h;
+		b=(x-xx[klo])/h;
+		y=a*yy[klo]+b*yy[khi]+((a*a*a-a)*y2[klo] +(b*b*b-b)*y2[khi])*(h*h)/6.0; 
+		
+		return y;
+	}
+
+};
+
+
+
+#if 1
+inline unsigned locate( const double x, const std::vector<double> vx )
+{
+	long unsigned ju,jm,jl;
+	bool ascnd=(vx[vx.size()-1]>=vx[0]);
+	jl = 0;
+	ju = vx.size()-1;
+	while( ju-jl > 1 ) {
+		jm = (ju+jl)>>1;
+		if( (x >= vx[jm]) == ascnd )
+			jl = jm;
+		else
+			ju = jm;
+	}
+	return std::max((long unsigned)0,std::min((long unsigned)(vx.size()-2),(long unsigned)jl));
+}
+#else
+inline unsigned locate( const real_t x, const std::vector<double> vx )
+{
+	unsigned i=0;
+	while( vx[i]<x && (unsigned)i< vx.size() ) ++i;
+  if(i>0)i=i-1;
+	return i;
+}
+#endif
+
+
+inline real_t linint( const double x, const std::vector<double>& xx, const std::vector<double>& yy )
+{
+	unsigned i = locate(x,xx);
+/*	if(i==xx.size()-1) //--i;
+		return 0.0;
+	if( x<xx[0] )
+		return 0.0;*/
+	if( x<xx[0] )
+		return yy[0];
+	if( x>=xx[xx.size()-1] )
+		return yy[yy.size()-1]; 
+	double a  = 1.0/(xx[i+1]-xx[i]);
+	double dy = (yy[i+1]-yy[i])*a;
+	double y0 = (yy[i]*xx[i+1]-xx[i]*yy[i+1])*a;
+  return dy*x+y0;
+}
+/*
+inline float linint( float x, const std::vector<float>& xx, const std::vector<float>& yy )
+{
+
+        int i=0;
+        while( xx[i]<x && (unsigned)i< xx.size() ) ++i;
+        if(i>0)i=i-1;
+        float dy = (yy[i+1]-yy[i])/(xx[i+1]-xx[i]);
+        float y0 = (yy[i]*xx[i+1]-xx[i]*yy[i+1])/(xx[i+1]-xx[i]);
+        return dy*x+y0;
+}
+*/
+
+
+#endif
+
+
diff --git a/TransferFunction.hh b/TransferFunction.hh
new file mode 100644
index 0000000..dc7ea52
--- /dev/null
+++ b/TransferFunction.hh
@@ -0,0 +1,1318 @@
+/*
+ This file is part of FROLIC -
+ a tool to generate initial conditions for cosmological simulations
+ 
+ Copyright (C) 2008-09  Oliver Hahn, ojha@gmx.de
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __TRANSFERFUNCTION_HH
+#define __TRANSFERFUNCTION_HH
+
+#include <vector>
+#include <sstream>
+#include <fstream>
+#include <iostream>
+#include <cmath>
+#include <stdexcept>
+
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_sf_gamma.h>
+
+#include "Numerics.hh"
+#include "general.hh"
+
+#include <complex>
+
+#define NZERO_Q
+
+typedef std::complex<double> complex;
+
+//! Abstract base class for transfer functions
+/*!
+    This class implements a purely virtual interface that can be
+    used to derive instances implementing various transfer functions.
+*/ 
+class TransferFunction{
+public:
+	Cosmology m_Cosmology;
+	
+public:
+  
+	TransferFunction( Cosmology acosm ) : m_Cosmology( acosm ) { };
+	virtual double compute( double k ) = 0;
+	virtual ~TransferFunction(){ };
+	virtual double get_kmax( void ) = 0;
+	virtual double get_kmin( void ) = 0;
+};
+
+class TransferFunction_real
+{
+	
+public:
+	gsl_interp_accel *accp, *accn;
+	gsl_spline *splinep, *splinen;
+	double Tr0_, Tmin_, Tmax_, Tscale_;
+	double rneg_, rneg2_;
+	static TransferFunction *ptf_;
+	static double nspec_;
+	
+protected:
+	
+	double krgood( double mu, double q, double dlnr, double kr )
+	{
+		double krnew = kr;
+		complex cdgamma, zm, zp;
+		double arg, iarg, xm, xp, y;
+		gsl_sf_result g_a, g_p;
+		
+		xp = 0.5*(mu+1.0+q);
+		xm = 0.5*(mu+1.0-q);
+		y = M_PI/(2.0*dlnr);
+		zp=complex(xp,y);
+		zm=complex(xm,y);
+		
+		gsl_sf_lngamma_complex_e (zp.real(), zp.imag(), &g_a, &g_p);
+		zp=std::polar(exp(g_a.val),g_p.val);
+		double zpa = g_p.val;
+
+		gsl_sf_lngamma_complex_e (zm.real(), zm.imag(), &g_a, &g_p);
+		zm=std::polar(exp(g_a.val),g_p.val);
+		double zma = g_p.val;
+		
+		arg=log(2.0/kr)/dlnr+(zpa+zma)/M_PI;
+		iarg=(double)((int)(arg + 0.5));
+		
+		if( arg!=iarg )
+			krnew=kr*exp((arg-iarg)*dlnr);
+		
+		return krnew;
+	}
+	
+	void transform( double pnorm, double dplus, unsigned N, double q, std::vector<double>& rr, std::vector<double>& TT )
+	{
+		const double mu = 0.5;
+		double qmin = 1.0e-6, qmax = 1.0e+6;
+		
+		q = 0.0;
+		
+		N = 16384;
+		
+#ifdef NZERO_Q
+		//q = 0.4;
+		q = 0.2;
+#endif
+		
+		double kmin = qmin, kmax=qmax;
+		double rmin = qmin, rmax = qmax;
+		double k0 = exp(0.5*(log(kmax)+log(kmin)));
+		double r0 = exp(0.5*(log(rmax)+log(rmin)));
+		double L = log(rmax)-log(rmin);
+		double k0r0 = k0*r0;
+		double dlnk = L/N, dlnr = L/N;
+		
+		double sqrtpnorm = sqrt(pnorm);
+		
+		double dir = 1.0;
+		
+		double fftnorm = 1.0/N;
+		
+		fftw_complex in[N], out[N];
+		fftw_plan p,ip;
+		
+		//... perform anti-ringing correction from Hamilton (2000)
+		k0r0 = krgood( mu, q, dlnr, k0r0 );
+
+		std::ofstream ofsk("transfer_k.txt");
+		double sum_in = 0.0;
+		for( unsigned i=0; i<N; ++i )
+		{
+			
+			double k = k0*exp(((int)i - (int)N/2+1) * dlnk);
+			//double k = k0*exp(((int)i - (int)N/2) * dlnk);
+			//double k = k0*exp(ii * dlnk);
+			
+			//... some constants missing ...//
+			in[i].re = dplus*sqrtpnorm*ptf_->compute( k )*pow(k,0.5*nspec_)*pow(k,1.5-q);
+			in[i].im = 0.0;
+			
+			sum_in += in[i].re;
+			ofsk << std::setw(16) << k <<std::setw(16) << in[i].re << std::endl;
+		}
+		ofsk.close();
+		
+		
+		p = fftw_create_plan(N, FFTW_FORWARD, FFTW_ESTIMATE);
+		ip = fftw_create_plan(N, FFTW_BACKWARD, FFTW_ESTIMATE);
+		
+		//fftw_one(p, in, out);
+		fftw_one(p, in, out);
+		
+		//... compute the Hankel transform by convolution with the Bessel function
+		for( unsigned i=0; i<N; ++i )
+		{
+			int ii=i;
+			if( ii > (int)N/2 )
+				ii -= N;
+			
+#ifndef NZERO_Q
+			double y=ii*M_PI/L;
+			complex zp((mu+1.0)*0.5,y);
+			gsl_sf_result g_a, g_p;
+			gsl_sf_lngamma_complex_e(zp.real(), zp.imag(), &g_a, &g_p);
+			
+			double arg = 2.0*(log(2.0/k0r0)*y+g_p.val);
+			complex cu = complex(out[i].re,out[i].im)*std::polar(1.0,arg);
+			out[i].re = cu.real()*fftnorm;
+			out[i].im = cu.imag()*fftnorm;
+			
+#else		
+			//complex x(dir*q, (double)ii*2.0*M_PI/L);
+			complex x(dir*q, (double)ii*2.0*M_PI/L);
+			gsl_sf_result g_a, g_p;
+			
+			complex g1, g2, garg, U, phase;						
+			complex twotox = pow(complex(2.0,0.0),x);
+			
+			/////////////////////////////////////////////////////////
+			//.. evaluate complex Gamma functions
+			
+			garg = 0.5*(mu+1.0+x);
+			gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p);
+			g1 = std::polar(exp(g_a.val),g_p.val);
+
+			
+			garg = 0.5*(mu+1.0-x);
+			gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p);
+			g2 = std::polar(exp(g_a.val),g_p.val);
+
+			/////////////////////////////////////////////////////////
+			//.. compute U
+			
+			if( (fabs(g2.real()) < 1e-19 && fabs(g2.imag()) < 1e-19) )
+			{
+				//std::cerr << "Warning : encountered possible singularity in TransferFunction_real::transform!\n";
+				g1 = 1.0; g2 = 1.0;
+			}
+			
+			
+			U = twotox * g1 / g2;
+			phase = pow(complex(k0r0,0.0),complex(0.0,2.0*M_PI*(double)ii/L));
+			
+			complex cu = complex(out[i].re,out[i].im)*U*phase*fftnorm;
+			
+			out[i].re = cu.real();
+			out[i].im = cu.imag();
+			
+			if( (out[i].re != out[i].re)||(out[i].im != out[i].im) )
+			{	std::cerr << "NaN @ i=" << i << ", U= " << U << ", phase = " << phase << ", g1 = " << g1 << ", g2 = " << g2 << std::endl;
+				std::cerr << "mu+1+q = " << mu+1.0+q << std::endl;
+				//break;
+			}
+			
+#endif
+
+		}
+			
+		/*out[N/2].im = 0.0;
+		out[N/2+1].im = 0.0;
+		out[N/2+1].re = out[N/2].re;
+		out[N/2].im = 0.0;*/
+		
+		fftw_one(ip, out, in);
+		
+		rr.assign(N,0.0);
+		TT.assign(N,0.0);
+		
+		r0 = k0r0/k0;
+		
+		for( unsigned i=0; i<N; ++i )
+		{
+			int ii = i;
+			ii -= N/2-1;
+			//ii -= N/2;
+			//if( ii>N/2)
+			//	ii-=N;
+			
+			
+			
+			double r = r0*exp(-ii*dlnr);
+			rr[N-i-1] = r;
+			TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
+			
+			//TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*exp( -dir*(q+1.5)*ii*dlnr +q*log(k0r0))/r0;
+			
+			//rr[i] = r;
+			//TT[i] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
+			
+		}
+		
+		
+		{
+			std::ofstream ofs("transfer_real_new.txt");
+			for( unsigned i=0; i<N; ++i )
+			{
+				int ii = i;
+				ii -= N/2-1;
+				
+				double r = r0*exp(-ii*dlnr);//r0*exp(ii*dlnr);
+				double T = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
+				ofs << r << "\t\t" << T << "\t\t" << in[i].im << std::endl;
+			}
+		}
+		
+
+		fftw_destroy_plan(p);
+		fftw_destroy_plan(ip);
+	}
+	
+public:
+	TransferFunction_real( TransferFunction *tf, double nspec, double pnorm, double dplus, double rmin, double rmax, double knymax, unsigned nr )
+	{
+				
+		ptf_ = tf;
+		nspec_ = nspec;
+	
+		double q = 0.8;
+		
+		std::vector<double> r,T,xp,yp,xn,yn;
+		
+		transform( pnorm, dplus, nr, q, r, T );
+		
+		//... determine r=0 zero component by integrating up to the Nyquist frequency
+		gsl_integration_workspace * wp; 
+		gsl_function F;
+		wp = gsl_integration_workspace_alloc(20000);
+		F.function = &call_wrapper;
+		double par[2]; par[0] = dplus*sqrt(pnorm); //par[1] = M_PI/kny;
+		F.params = (void*)par;
+		double error;
+		
+		//#warning factor of sqrt(1.5) needs to be adjusted for non-equilateral boxes
+		//.. need a factor sqrt( 2*kny^2_x + 2*kny^2_y + 2*kny^2_z )/2 = sqrt(3/2)kny (in equilateral case)
+		gsl_integration_qag (&F, 0.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); 
+		//Tr0_ = 0.0;
+		gsl_integration_workspace_free(wp);
+				
+		
+		for( unsigned i=0; i<r.size(); ++i )
+		{
+			// spline positive and negative part separately
+			/*if( T[i] > 0.0 )
+			{
+				xp.push_back( 2.0*log10(r[i]) );
+				yp.push_back( log10(T[i]) );
+				rneg_ = r[i];
+				rneg2_ = rneg_*rneg_;
+			}else {
+				xn.push_back( 2.0*log10(r[i]) );
+				yn.push_back( log10(-T[i]) );
+			}*/
+			
+			
+			if( r[i] > rmin && r[i] < rmax )
+			{
+				xp.push_back( 2.0*log10(r[i]) );
+				yp.push_back( log10(fabs(T[i])) );
+				xn.push_back( 2.0*log10(r[i]) );
+				if( T[i] >= 0.0 ) 
+					yn.push_back( 1.0 );
+				else
+					yn.push_back( -1.0 );
+				
+				
+				//ofs << std::setw(16) << xp.back() << std::setw(16) << yp.back() << std::endl;
+			}
+			
+		}
+		
+
+		
+		
+		
+		accp = gsl_interp_accel_alloc ();
+		accn = gsl_interp_accel_alloc ();
+		
+		//... spline interpolation is only marginally slower here
+		splinep = gsl_spline_alloc (gsl_interp_cspline, xp.size() );
+		splinen = gsl_spline_alloc (gsl_interp_cspline, xn.size() );
+
+		//... set up everything for spline interpolation
+		gsl_spline_init (splinep, &xp[0], &yp[0], xp.size() );
+		gsl_spline_init (splinen, &xn[0], &yn[0], xn.size() );		
+		
+
+		
+		
+		{
+			double dlogr = (log10(rmax)-log10(rmin))/100;
+			std::ofstream ofs("transfer_splinep.txt");			
+			
+			for( int i=0; i< 100; ++i ) 
+			{
+				double r = rmin*pow(10.0,i*dlogr);
+				ofs << std::setw(16) << r << std::setw(16) << compute_real(r*r) << std::endl;
+			}
+		}
+		
+	}
+	
+	static double call_wrapper( double k, void *arg )
+	{
+		double *a = (double*)arg;
+		return 4.0*M_PI*a[0]*ptf_->compute( k )*pow(k,0.5*nspec_)*k*k;
+	}
+	
+	~TransferFunction_real()
+	{
+		gsl_spline_free (splinen);
+		gsl_interp_accel_free (accn);
+		gsl_spline_free (splinep);
+		gsl_interp_accel_free (accp);
+
+	}
+	
+	inline double compute_real( double r2 ) const
+	{
+		const double EPS = 1e-8;
+		const double Reps2 = EPS*EPS;
+		
+		if( r2 <Reps2 )
+			return Tr0_;
+		double q;
+		/*if( r2 < rneg2_ )
+			q = pow(10.0,gsl_spline_eval (splinep, log10(r2), accp));
+		else
+			q = -pow(10.0,gsl_spline_eval(splinen, log10(r2), accn));*/
+		
+		double logr2 = log10(r2);
+		q = pow(10.0,gsl_spline_eval(splinep, logr2, accp));
+		double sign = 1.0;
+		if( gsl_spline_eval(splinen, logr2, accn) < 0.0 )
+			sign = -1.0;
+		return q*sign;
+	}
+};
+
+class TransferFunction_real_old
+{
+protected:
+	bool m_breal_init;
+	static double nspec;
+	gsl_integration_workspace * wp; 
+	gsl_integration_workspace * c_wp;
+	gsl_integration_qawo_table * wf;
+	gsl_function F;
+	
+	gsl_interp_accel *acc;
+	gsl_spline *spline;
+	
+	static TransferFunction *ptf;
+	
+public:
+	TransferFunction_real_old( TransferFunction *tf, double ns, double pnorm, double dplus, double rmin, double rmax, unsigned nr )
+	: m_breal_init(false)
+	{ 
+		
+		ptf = tf;
+		nspec = ns;
+		wf = gsl_integration_qawo_table_alloc(0.0, 10000.0, GSL_INTEG_SINE, 10000);            
+		
+		
+		wp = gsl_integration_workspace_alloc(20000);
+		c_wp = gsl_integration_workspace_alloc(20000);
+		
+		acc = gsl_interp_accel_alloc ();
+		spline = gsl_spline_alloc (gsl_interp_cspline, nr+1);
+		
+		double dlogr = (log10(rmax)-log10(rmin))/nr;
+		
+		std::vector<double> x(nr+1,0.0),y(nr+1,0.0);
+		
+		
+		/*std::ofstream ofs("transfer_fourier.txt");
+		for( int i=0; i<1000; ++i )
+		{
+			double k=1.e-3*pow(10.0,i*6.0/1000.0);
+			ofs << k << "\t\t" << call_wrapper(k, NULL) << std::endl;
+		}*/
+		
+
+		double fac = sqrt(pnorm)*dplus;
+		
+		std::ofstream ofs("transfer_real_old.txt");
+		
+		for( unsigned i=0; i<nr+1; ++i )
+		{
+			double r = rmin*pow(10.0,(double)i*dlogr);
+			double result1, result2, error;
+			
+			//... integrate from zero to first point ...//
+			double k0 = 1e-2;
+
+			F.function = &call_wrapper2;
+			double k[1]; k[0] = r;
+			F.params = (void*)k;
+			
+			gsl_integration_qags (&F, 0.0, k0, 0, 1e-7, 20000,  wp, &result1, &error); 
+			
+			//... integrate further ...//
+			F.function = &call_wrapper;
+			F.params = NULL;
+
+			//std::cerr << "r = " << r << std::endl; 
+
+			gsl_integration_qawo_table_set(wf,r,100000.0,GSL_INTEG_SINE);
+			gsl_integration_qawf(&F,k0,1e-8,20000,wp,c_wp,wf,&result2,&error);
+			
+			result1 = 4.0*M_PI*fac*(result1+result2)/r;
+			
+			x[i] = log10(r*r);
+			y[i] = log10(result1);
+			
+			ofs << std::setw(16) << r << std::setw(16) << result1 << std::endl;
+			
+		}
+		
+		gsl_spline_init (spline, &x[0], &y[0], nr+1);
+		
+		gsl_integration_qawo_table_free(wf);
+		gsl_integration_workspace_free(wp);
+		gsl_integration_workspace_free(c_wp);
+		
+	}
+	
+	~TransferFunction_real_old()
+	{
+		gsl_spline_free (spline);
+		gsl_interp_accel_free (acc);
+	}
+	
+	static double call_wrapper( double k, void *arg )
+	{
+		//return ptf->compute( k );
+		return ptf->compute( k )*k*pow(k,0.5*nspec);
+		//return ptf->compute(k);
+	}
+	
+	static double call_wrapper2( double k, void *arg )
+	{
+		//return ptf->compute( k );
+		double r = ((double*)arg)[0];
+		return ptf->compute( k )*k*pow(k,0.5*nspec) *sin(k*r);
+		//return ptf->compute(k);
+	}
+	
+	double compute_real( double r2 )
+	{
+		if( r2 < 1e-2 )
+			return 0.0;
+		return pow(10.0,gsl_spline_eval (spline, log10(r2), acc));
+	}
+};
+
+//! Implementation of class TransferFunction_tab for the BBKS transfer function 
+/*!
+    
+*/
+class TransferFunction_tab : public TransferFunction{
+private:
+  //Cosmology m_Cosmology;
+
+  std::string m_filename;
+  std::vector<double> m_tab_k;
+  std::vector<double> m_tab_Tk;
+	
+
+  void read_table( void ){
+#ifdef WITH_MPI
+    if( MPI::COMM_WORLD.Get_rank() == 0 ){
+#endif
+      std::cerr << " - reading tabulated transfer function from file \'"
+                << m_filename << "\'\n";
+      std::string line;
+      std::ifstream ifs( m_filename.c_str() );
+      
+      m_tab_k.clear();
+      m_tab_Tk.clear();
+      
+      while( !ifs.eof() ){
+        getline(ifs,line);
+        std::stringstream ss(line);
+        
+        double k, Tk;
+        ss >> k;
+        ss >> Tk;
+        
+        m_tab_k.push_back( k );
+        m_tab_Tk.push_back( Tk );
+        
+      }
+#ifdef WITH_MPI
+    }
+    
+    unsigned n=m_tab_k.size();
+    MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 );
+    
+    if( MPI::COMM_WORLD.Get_rank() > 0 ){
+      m_tab_k.assign(n,0);
+      m_tab_Tk.assign(n,0);
+    }
+    
+    MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_DOUBLE, 0 );
+    MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_DOUBLE, 0 );
+#endif
+    
+  }
+
+public:
+  TransferFunction_tab( Cosmology aCosm, std::string filename )
+  : TransferFunction( aCosm ), m_filename( filename )
+  {
+    read_table();
+  }
+  
+  virtual inline double compute( double k ){
+    
+    return linint( k, m_tab_k, m_tab_Tk );
+  }
+  
+  inline double get_kmin( void ){
+    return m_tab_k[1];
+  }
+  
+  inline double get_kmax( void ){
+    return m_tab_k[m_tab_k.size()-2];
+  }
+  
+};
+
+class TransferFunction_CAMB : public TransferFunction{
+public:
+	enum TFtype{
+		TF_baryon, TF_cdm, TF_total
+	};
+	
+	
+private:
+	//Cosmology m_Cosmology;
+	
+	std::string m_filename_Pk, m_filename_Tk;
+	std::vector<double> m_tab_k, m_tab_Tk;
+	
+	Spline_interp *m_psinterp;
+	gsl_interp_accel *acc;
+	gsl_spline *spline;
+	
+	
+	
+	void read_table( TFtype iwhich ){
+#ifdef WITH_MPI
+		if( MPI::COMM_WORLD.Get_rank() == 0 ){
+#endif
+			std::cerr 
+				<< " - reading tabulated transfer function data from file \n"
+				<< "    \'" << m_filename_Tk << "\'\n";
+			
+			std::string line;
+			std::ifstream ifs( m_filename_Tk.c_str() );
+			
+			m_tab_k.clear();
+			m_tab_Tk.clear();
+			
+			while( !ifs.eof() ){
+				getline(ifs,line);
+				
+				if(ifs.eof()) break;
+				
+				std::stringstream ss(line);
+				
+				double k, Tkc, Tkb, Tkg, Tkr, Tknu, Tktot;
+				ss >> k;
+				ss >> Tkc;
+				ss >> Tkb;
+				ss >> Tkg;
+				ss >> Tkr;
+				ss >> Tknu;
+				ss >> Tktot;
+				
+				m_tab_k.push_back( log10(k) );
+				
+				switch( iwhich ){
+					case TF_total:
+						m_tab_Tk.push_back( log10(Tktot) );
+						break;
+					case TF_baryon:
+						m_tab_Tk.push_back( log10(Tkb) );
+						break;
+					case TF_cdm:
+						m_tab_Tk.push_back( log10(Tkc) );
+						break;
+				}
+				//m_tab_Tk_cdm.push_back( Tkc );
+				//m_tab_Tk_baryon.push_back( Tkb );
+				//m_tab_Tk_tot.push_back( Tktot );
+			}
+			
+			ifs.close();
+			
+			
+			
+			
+#ifdef WITH_MPI
+		}
+		
+		unsigned n=m_tab_k.size();
+		MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 );
+		
+		if( MPI::COMM_WORLD.Get_rank() > 0 ){
+			m_tab_k.assign(n,0);
+			m_tab_Tk.assign(n,0);
+		}
+		
+		MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_DOUBLE, 0 );
+#endif
+		
+	}
+	
+public:
+	TransferFunction_CAMB( Cosmology aCosm, std::string filename_Tk, TFtype iwhich )
+	: TransferFunction( aCosm ), m_filename_Tk( filename_Tk ), m_psinterp( NULL )
+	{
+		read_table( iwhich );
+		
+		//m_psinterp = new Spline_interp( m_tab_k, m_tab_Tk );
+		
+		acc = gsl_interp_accel_alloc();
+		//spline = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() );
+		
+		gsl_spline_init (spline, &m_tab_k[0], &m_tab_Tk[0], m_tab_k.size() );
+		
+	}
+	
+	virtual ~TransferFunction_CAMB()
+	{
+		//delete m_psinterp;
+		
+		gsl_spline_free (spline);
+		gsl_interp_accel_free (acc);
+	}
+	
+	virtual inline double compute( double k ){
+		//double Tkc = linint( k, m_tab_k, m_tab_Tk_cdm );
+		//double Tkb = linint( k, m_tab_k, m_tab_Tk_baryon );
+		
+		//return pow(10.0, linint( log10(k), m_tab_k, m_tab_Tk ));
+		
+		//return pow(10.0, m_psinterp->interp(log10(k)) );
+		
+		//if( k<get_kmin() )
+		//	return pow(10.0,m_tab_k[1]);
+		
+		return pow(10.0, gsl_spline_eval (spline, log10(k), acc) );
+	}
+	
+	inline double get_kmin( void ){
+		return pow(10.0,m_tab_k[1]);
+	}
+	
+	inline double get_kmax( void ){
+		return pow(10.0,m_tab_k[m_tab_k.size()-2]);
+	}
+	
+};
+
+
+//! Implementation of class TransferFunction_BBKS for the BBKS transfer function 
+/*!
+    This class implements the analytical fit to the matter transfer
+    function by Bardeen, Bond, Kaiser & Szalay (BBKS).
+    ( see Bardeen et al. (1986) )
+*/
+class TransferFunction_BBKS : public TransferFunction{
+private:
+  //Cosmology m_Cosmology;
+  double      m_Gamma;
+
+public:
+  //! Constructor
+  /*!
+    \param aCosm Structure of type Cosmology carrying the cosmological parameters
+	\param bSugiyama flag whether the Sugiyama (1995) correction shall be applied (default=true)
+  */
+  TransferFunction_BBKS( Cosmology aCosm, bool bSugiyama=true, double FreeGamma=-1.0 )
+    : TransferFunction( aCosm )
+  {  
+    double Omega0 = m_Cosmology.Omega_m;
+	  
+    if( FreeGamma <= 0.0 ){
+      m_Gamma = Omega0*0.01*m_Cosmology.H0;
+      if( bSugiyama )
+        m_Gamma *= exp(-m_Cosmology.Omega_b*(1.0+sqrt(2.0*0.01*m_Cosmology.H0)/Omega0));
+    }else
+      m_Gamma = FreeGamma;
+	
+	
+  }
+
+  //! computes the value of the BBKS transfer function for mode k (in h/Mpc)
+  virtual inline double compute( double k ){
+    double q, f1, f2;
+    
+    q = k/(m_Gamma);
+    f1 = log(1.0 + 2.34*q)/(2.34*q);
+    f2 = 1.0 + q*(3.89 + q*(259.21 + q*(162.771336 + q*2027.16958081)));
+    
+    return f1/sqrt(sqrt(f2));
+
+  }
+  
+  inline double get_kmin( void ){
+    return 0.0;
+  }
+  
+  inline double get_kmax( void ){
+    return 1.e3;
+  }
+};
+
+//! Implementation of abstract base class TransferFunction for the Eisenstein & Hu transfer function 
+/*!
+    This class implements the analytical fit to the matter transfer
+    function by Eisenstein & Hu (1999).
+*/
+class TransferFunction_Eisenstein : public TransferFunction{
+protected:
+  //Cosmology m_Cosmology;
+  double  m_h0;
+  double	omhh,		/* Omega_matter*h^2 */
+	obhh,		/* Omega_baryon*h^2 */
+	theta_cmb,	/* Tcmb in units of 2.7 K */
+	z_equality,	/* Redshift of matter-radiation equality, really 1+z */
+	k_equality,	/* Scale of equality, in Mpc^-1 */
+	z_drag,		/* Redshift of drag epoch */
+	R_drag,		/* Photon-baryon ratio at drag epoch */
+	R_equality,	/* Photon-baryon ratio at equality epoch */
+	sound_horizon,	/* Sound horizon at drag epoch, in Mpc */
+	k_silk,		/* Silk damping scale, in Mpc^-1 */
+	alpha_c,	/* CDM suppression */
+	beta_c,		/* CDM log shift */
+	alpha_b,	/* Baryon suppression */
+	beta_b,		/* Baryon envelope shift */
+	beta_node,	/* Sound horizon shift */
+	k_peak,		/* Fit to wavenumber of first peak, in Mpc^-1 */
+	sound_horizon_fit,	/* Fit to sound horizon, in Mpc */
+	alpha_gamma;	/* Gamma suppression in approximate TF */
+
+  //! private member function: sets internal quantities for Eisenstein & Hu fitting
+  void TFset_parameters(double omega0hh, double f_baryon, double Tcmb)
+  /* Set all the scalars quantities for Eisenstein & Hu 1997 fitting formula */
+  /* Input: omega0hh -- The density of CDM and baryons, in units of critical dens,
+     multiplied by the square of the Hubble constant, in units
+     of 100 km/s/Mpc */
+  /* 	  f_baryon -- The fraction of baryons to CDM */
+  /*        Tcmb -- The temperature of the CMB in Kelvin.  Tcmb<=0 forces use
+	    of the COBE value of  2.728 K. */
+  /* Output: Nothing, but set many global variables used in TFfit_onek(). 
+     You can access them yourself, if you want. */
+  /* Note: Units are always Mpc, never h^-1 Mpc. */
+  {
+    double z_drag_b1, z_drag_b2;
+    double alpha_c_a1, alpha_c_a2, beta_c_b1, beta_c_b2, alpha_b_G, y;
+	  
+    if (f_baryon<=0.0 || omega0hh<=0.0) {
+      fprintf(stderr, "TFset_parameters(): Illegal input.\n");
+      exit(1);
+    }
+    omhh = omega0hh;
+    obhh = omhh*f_baryon;
+    if (Tcmb<=0.0) Tcmb=2.728;	/* COBE FIRAS */
+    theta_cmb = Tcmb/2.7;
+    
+    z_equality = 2.50e4*omhh/POW4(theta_cmb);  /* Really 1+z */
+    k_equality = 0.0746*omhh/SQR(theta_cmb);
+
+    z_drag_b1 = 0.313*pow((double)omhh,-0.419)*(1+0.607*pow((double)omhh,0.674));
+    z_drag_b2 = 0.238*pow((double)omhh,0.223);
+    z_drag = 1291*pow(omhh,0.251)/(1+0.659*pow((double)omhh,0.828))*
+		(1+z_drag_b1*pow((double)obhh,(double)z_drag_b2));
+    
+    R_drag = 31.5*obhh/POW4(theta_cmb)*(1000/(1+z_drag));
+    R_equality = 31.5*obhh/POW4(theta_cmb)*(1000/z_equality);
+
+    sound_horizon = 2./3./k_equality*sqrt(6./R_equality)*
+	    log((sqrt(1+R_drag)+sqrt(R_drag+R_equality))/(1+sqrt(R_equality)));
+
+    k_silk = 1.6*pow((double)obhh,0.52)*pow((double)omhh,0.73)*(1+pow((double)10.4*omhh,-0.95));
+
+    alpha_c_a1 = pow((double)46.9*omhh,0.670)*(1+pow(32.1*omhh,-0.532));
+    alpha_c_a2 = pow((double)12.0*omhh,0.424)*(1+pow(45.0*omhh,-0.582));
+    alpha_c = pow(alpha_c_a1,-f_baryon)*
+		pow(alpha_c_a2,-CUBE(f_baryon));
+    
+    beta_c_b1 = 0.944/(1+pow(458*omhh,-0.708));
+    beta_c_b2 = pow(0.395*omhh, -0.0266);
+    beta_c = 1.0/(1+beta_c_b1*(pow(1-f_baryon, beta_c_b2)-1));
+
+    y = z_equality/(1+z_drag);
+    alpha_b_G = y*(-6.*sqrt(1+y)+(2.+3.*y)*log((sqrt(1+y)+1)/(sqrt(1+y)-1)));
+    alpha_b = 2.07*k_equality*sound_horizon*pow(1+R_drag,-0.75)*alpha_b_G;
+
+    beta_node = 8.41*pow(omhh, 0.435);
+    beta_b = 0.5+f_baryon+(3.-2.*f_baryon)*sqrt(pow(17.2*omhh,2.0)+1);
+
+    k_peak = 2.5*3.14159*(1+0.217*omhh)/sound_horizon;
+    sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1+10.0*pow(obhh,0.75));
+
+    alpha_gamma = 1-0.328*log(431.0*omhh)*f_baryon + 0.38*log(22.3*omhh)*
+		SQR(f_baryon);
+    
+    return;
+  }
+
+  //! private member function: computes transfer function for mode k (k in Mpc)
+  inline double TFfit_onek(double k, double *tf_baryon, double *tf_cdm)
+  /* Input: k -- Wavenumber at which to calculate transfer function, in Mpc^-1.
+   *tf_baryon, *tf_cdm -- Input value not used; replaced on output if
+   the input was not NULL. */
+  /* Output: Returns the value of the full transfer function fitting formula.
+     This is the form given in Section 3 of Eisenstein & Hu (1997).
+     *tf_baryon -- The baryonic contribution to the full fit.
+     *tf_cdm -- The CDM contribution to the full fit. */
+  /* Notes: Units are Mpc, not h^-1 Mpc. */
+  {
+    double T_c_ln_beta, T_c_ln_nobeta, T_c_C_alpha, T_c_C_noalpha;
+    double q, xx, xx_tilde;//, q_eff;
+    double T_c_f, T_c, s_tilde, T_b_T0, T_b, f_baryon, T_full;
+    //double T_0_L0, T_0_C0, T_0, gamma_eff; 
+    //double T_nowiggles_L0, T_nowiggles_C0, T_nowiggles;
+
+    k = fabs(k);	/* Just define negative k as positive */
+    if (k==0.0) {
+	if (tf_baryon!=NULL) *tf_baryon = 1.0;
+	if (tf_cdm!=NULL) *tf_cdm = 1.0;
+	return 1.0;
+    }
+
+    q = k/13.41/k_equality;
+    xx = k*sound_horizon;
+
+    T_c_ln_beta = log(2.718282+1.8*beta_c*q);
+    T_c_ln_nobeta = log(2.718282+1.8*q);
+    T_c_C_alpha = 14.2/alpha_c + 386.0/(1+69.9*pow(q,1.08));
+    T_c_C_noalpha = 14.2 + 386.0/(1+69.9*pow(q,1.08));
+
+    T_c_f = 1.0/(1.0+POW4(xx/5.4));
+    T_c = T_c_f*T_c_ln_beta/(T_c_ln_beta+T_c_C_noalpha*SQR(q)) +
+	    (1-T_c_f)*T_c_ln_beta/(T_c_ln_beta+T_c_C_alpha*SQR(q));
+    
+    s_tilde = sound_horizon*pow(1+CUBE(beta_node/xx),-1./3.);
+    xx_tilde = k*s_tilde;
+
+    T_b_T0 = T_c_ln_nobeta/(T_c_ln_nobeta+T_c_C_noalpha*SQR(q));
+    T_b = sin(xx_tilde)/(xx_tilde)*(T_b_T0/(1+SQR(xx/5.2))+
+		alpha_b/(1+CUBE(beta_b/xx))*exp(-pow(k/k_silk,1.4)));
+    
+    f_baryon = obhh/omhh;
+    T_full = f_baryon*T_b + (1-f_baryon)*T_c;
+
+    /* Now to store these transfer functions */
+    if (tf_baryon!=NULL) *tf_baryon = T_b;
+    if (tf_cdm!=NULL) *tf_cdm = T_c;
+    return T_full;
+}
+
+public:
+  //! Constructor for Eisenstein & Hu fitting for transfer function
+  /*!
+    \param aCosm structure of type Cosmology carrying the cosmological parameters
+    \param Tcmb mean temperature of the CMB fluctuations (defaults to
+    Tcmb = 2.726 if not specified)
+  */
+  TransferFunction_Eisenstein( Cosmology aCosm, double Tcmb = 2.726 )
+    :  TransferFunction(aCosm), m_h0( aCosm.H0*0.01 )
+  {
+    TFset_parameters( (aCosm.Omega_m)*aCosm.H0*aCosm.H0*(0.01*0.01), 
+		      aCosm.Omega_b/(aCosm.Omega_m-aCosm.Omega_b),//-aCosm.Omega_b), 
+		    Tcmb);
+  }
+
+  //! Computes the transfer function for k in Mpc/h by calling TFfit_onek
+  virtual inline double compute( double k ){
+    double tfb, tfcdm, fb, fc; //, tfull
+    TFfit_onek( k*m_h0, &tfb, &tfcdm );
+    
+    fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m);
+    fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ;
+    
+    return fb*tfb+fc*tfcdm;
+    //return 1.0;
+  }
+	
+	
+  inline void compute( double k, double &tf_dm, double &tf_baryon ){
+	double tfb, tfcdm, fb, fc; //, tfull
+	TFfit_onek( k*m_h0, &tfb, &tfcdm );
+	  
+	fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m);
+	fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ;
+	  
+	tf_dm = fc*tfcdm;
+	tf_baryon = fb*tfb;
+  }
+  
+	
+  inline double get_kmin( void ){
+    return 0.0;
+  }
+  
+  inline double get_kmax( void ){
+    return 1.e3;
+  }
+
+};
+
+
+
+class TransferFunction_EisensteinWDM : public TransferFunction_Eisenstein
+{
+protected:
+	using TransferFunction_Eisenstein::TFfit_onek;
+	double m_WDMalpha; 
+	
+public:
+	TransferFunction_EisensteinWDM( Cosmology aCosm, double Tcmb = 2.726 )
+	: TransferFunction_Eisenstein(aCosm,Tcmb)
+	{
+		m_WDMalpha = 0.05 * pow( aCosm.Omega_m/0.4,0.15)
+					*pow(aCosm.H0/0.65,1.3)*pow(aCosm.WDMmass,-1.15)
+					*pow(1.5/aCosm.WDMg_x,0.29);
+	}
+	
+	inline double compute( double k )
+	{
+		double tfb, tfcdm, fb, fc; //, tfull
+		TFfit_onek( k*m_h0, &tfb, &tfcdm );
+		
+		fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m);
+		fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ;
+	
+		double tf = fb*tfb+fc*tfcdm;
+		
+		return tf*pow(1.0+(m_WDMalpha*k)*(m_WDMalpha*k),-5.0);
+		
+	}
+	
+};
+
+
+
+
+class TransferFunction_EisensteinNeutrino : public TransferFunction
+{
+	/* Fitting Formulae for CDM + Baryon + Massive Neutrino (MDM) cosmologies. */
+	/* Daniel J. Eisenstein & Wayne Hu, Institute for Advanced Study */
+	
+	/* There are two primary routines here, one to set the cosmology, the
+	 other to construct the transfer function for a single wavenumber k. 
+	 You should call the former once (per cosmology) and the latter as 
+	 many times as you want. */
+	
+	/* TFmdm_set_cosm() -- User passes all the cosmological parameters as
+	 arguments; the routine sets up all of the scalar quantites needed 
+	 computation of the fitting formula.  The input parameters are: 
+	 1) omega_matter -- Density of CDM, baryons, and massive neutrinos,
+	 in units of the critical density. 
+	 2) omega_baryon -- Density of baryons, in units of critical. 
+	 3) omega_hdm    -- Density of massive neutrinos, in units of critical 
+	 4) degen_hdm    -- (Int) Number of degenerate massive neutrino species 
+	 5) omega_lambda -- Cosmological constant 
+	 6) hubble       -- Hubble constant, in units of 100 km/s/Mpc 
+	 7) redshift     -- The redshift at which to evaluate */
+	
+	/* TFmdm_onek_mpc() -- User passes a single wavenumber, in units of Mpc^-1.
+	 Routine returns the transfer function from the Eisenstein & Hu
+	 fitting formula, based on the cosmology currently held in the 
+	 internal variables.  The routine returns T_cb (the CDM+Baryon
+	 density-weighted transfer function), although T_cbn (the CDM+
+	 Baryon+Neutrino density-weighted transfer function) is stored
+	 in the global variable tf_cbnu. */
+	
+	/* We also supply TFmdm_onek_hmpc(), which is identical to the previous
+	 routine, but takes the wavenumber in units of h Mpc^-1. */
+	
+	/* We hold the internal scalar quantities in global variables, so that
+	 the user may access them in an external program, via "extern" declarations. */
+	
+	/* Please note that all internal length scales are in Mpc, not h^-1 Mpc! */
+	
+	/* -------------------------- Prototypes ----------------------------- */
+	
+/*	int TFmdm_set_cosm(float omega_matter, float omega_baryon, float omega_hdm,
+					   int degen_hdm, float omega_lambda, float hubble, float redshift);
+	float TFmdm_onek_mpc(float kk);
+	float TFmdm_onek_hmpc(float kk);
+*/	
+	
+	/* ------------------------- Global Variables ------------------------ */
+	
+	/* The following are set in TFmdm_set_cosm() */
+	float   alpha_gamma,	/* sqrt(alpha_nu) */
+	alpha_nu,	/* The small-scale suppression */
+	beta_c,		/* The correction to the log in the small-scale */
+	num_degen_hdm,	/* Number of degenerate massive neutrino species */
+	f_baryon,	/* Baryon fraction */
+	f_bnu,		/* Baryon + Massive Neutrino fraction */
+	f_cb,		/* Baryon + CDM fraction */
+	f_cdm,		/* CDM fraction */
+	f_hdm,		/* Massive Neutrino fraction */
+	growth_k0,	/* D_1(z) -- the growth function as k->0 */
+	growth_to_z0,	/* D_1(z)/D_1(0) -- the growth relative to z=0 */
+	hhubble,	/* Need to pass Hubble constant to TFmdm_onek_hmpc() */
+	k_equality,	/* The comoving wave number of the horizon at equality*/
+	obhh,		/* Omega_baryon * hubble^2 */
+	omega_curv,	/* = 1 - omega_matter - omega_lambda */
+	omega_lambda_z, /* Omega_lambda at the given redshift */
+	omega_matter_z,	/* Omega_matter at the given redshift */
+	omhh,		/* Omega_matter * hubble^2 */
+	onhh,		/* Omega_hdm * hubble^2 */
+	p_c,		/* The correction to the exponent before drag epoch */
+	p_cb,		/* The correction to the exponent after drag epoch */
+	sound_horizon_fit,  /* The sound horizon at the drag epoch */
+	theta_cmb,	/* The temperature of the CMB, in units of 2.7 K */
+	y_drag,		/* Ratio of z_equality to z_drag */
+	z_drag,		/* Redshift of the drag epoch */
+	z_equality;	/* Redshift of matter-radiation equality */
+	
+	/* The following are set in TFmdm_onek_mpc() */
+	float	gamma_eff,	/* Effective \Gamma */
+	growth_cb,	/* Growth factor for CDM+Baryon perturbations */
+	growth_cbnu,	/* Growth factor for CDM+Baryon+Neutrino pert. */
+	max_fs_correction,  /* Correction near maximal free streaming */
+	qq,		/* Wavenumber rescaled by \Gamma */
+	qq_eff,		/* Wavenumber rescaled by effective Gamma */
+	qq_nu,		/* Wavenumber compared to maximal free streaming */
+	tf_master,	/* Master TF */
+	tf_sup,		/* Suppressed TF */
+	y_freestream; 	/* The epoch of free-streaming for a given scale */
+	
+	/* Finally, TFmdm_onek_mpc() and TFmdm_onek_hmpc() give their answers as */
+	float   tf_cb,		/* The transfer function for density-weighted
+						 CDM + Baryon perturbations. */
+	tf_cbnu;	/* The transfer function for density-weighted
+				 CDM + Baryon + Massive Neutrino perturbations. */
+	
+	/* By default, these functions return tf_cb */
+	
+	/* ------------------------- TFmdm_set_cosm() ------------------------ */
+	int TFmdm_set_cosm(float omega_matter, float omega_baryon, float omega_hdm,
+					   int degen_hdm, float omega_lambda, float hubble, float redshift)
+	/* This routine takes cosmological parameters and a redshift and sets up
+	 all the internal scalar quantities needed to compute the transfer function. */
+	/* INPUT: omega_matter -- Density of CDM, baryons, and massive neutrinos,
+	 in units of the critical density. */
+	/* 	  omega_baryon -- Density of baryons, in units of critical. */
+	/* 	  omega_hdm    -- Density of massive neutrinos, in units of critical */
+	/* 	  degen_hdm    -- (Int) Number of degenerate massive neutrino species */
+	/*        omega_lambda -- Cosmological constant */
+	/* 	  hubble       -- Hubble constant, in units of 100 km/s/Mpc */
+	/*        redshift     -- The redshift at which to evaluate */
+	/* OUTPUT: Returns 0 if all is well, 1 if a warning was issued.  Otherwise,
+	 sets many global variables for use in TFmdm_onek_mpc() */
+	{
+		float z_drag_b1, z_drag_b2, omega_denom;
+		int qwarn;
+		qwarn = 0;
+		
+		theta_cmb = 2.728/2.7;	/* Assuming T_cmb = 2.728 K */
+		
+		/* Look for strange input */
+		if (omega_baryon<0.0) {
+			fprintf(stderr,
+					"TFmdm_set_cosm(): Negative omega_baryon set to trace amount.\n");
+			qwarn = 1;
+		}
+		if (omega_hdm<0.0) {
+			fprintf(stderr,
+					"TFmdm_set_cosm(): Negative omega_hdm set to trace amount.\n");
+			qwarn = 1;
+		}
+		if (hubble<=0.0) {
+			fprintf(stderr,"TFmdm_set_cosm(): Negative Hubble constant illegal.\n");
+			exit(1);  /* Can't recover */
+		} else if (hubble>2.0) {
+			fprintf(stderr,"TFmdm_set_cosm(): Hubble constant should be in units of 100 km/s/Mpc.\n");
+			qwarn = 1;
+		}
+		if (redshift<=-1.0) {
+			fprintf(stderr,"TFmdm_set_cosm(): Redshift < -1 is illegal.\n");
+			exit(1);
+		} else if (redshift>99.0) {
+			fprintf(stderr,
+					"TFmdm_set_cosm(): Large redshift entered.  TF may be inaccurate.\n");
+			qwarn = 1;
+		}
+		if (degen_hdm<1) degen_hdm=1;
+		num_degen_hdm = (float) degen_hdm;	
+		/* Have to save this for TFmdm_onek_mpc() */
+		/* This routine would crash if baryons or neutrinos were zero, 
+		 so don't allow that */
+		if (omega_baryon<=0) omega_baryon=1e-5;
+		if (omega_hdm<=0) omega_hdm=1e-5;
+		
+		omega_curv = 1.0-omega_matter-omega_lambda;
+		omhh = omega_matter*SQR(hubble);
+		obhh = omega_baryon*SQR(hubble);
+		onhh = omega_hdm*SQR(hubble);
+		f_baryon = omega_baryon/omega_matter;
+		f_hdm = omega_hdm/omega_matter;
+		f_cdm = 1.0-f_baryon-f_hdm;
+		f_cb = f_cdm+f_baryon;
+		f_bnu = f_baryon+f_hdm;
+		
+		/* Compute the equality scale. */
+		z_equality = 25000.0*omhh/SQR(SQR(theta_cmb));	/* Actually 1+z_eq */
+		k_equality = 0.0746*omhh/SQR(theta_cmb);
+		
+		/* Compute the drag epoch and sound horizon */
+		z_drag_b1 = 0.313*pow(omhh,-0.419)*(1+0.607*pow(omhh,0.674));
+		z_drag_b2 = 0.238*pow(omhh,0.223);
+		z_drag = 1291*pow(omhh,0.251)/(1.0+0.659*pow(omhh,0.828))*
+		(1.0+z_drag_b1*pow(obhh,z_drag_b2));
+		y_drag = z_equality/(1.0+z_drag);
+		
+		sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1.0+10.0*pow(obhh,0.75));
+		
+		/* Set up for the free-streaming & infall growth function */
+		p_c = 0.25*(5.0-sqrt(1+24.0*f_cdm));
+		p_cb = 0.25*(5.0-sqrt(1+24.0*f_cb));
+		
+		omega_denom = omega_lambda+SQR(1.0+redshift)*(omega_curv+
+													  omega_matter*(1.0+redshift));
+		omega_lambda_z = omega_lambda/omega_denom;
+		omega_matter_z = omega_matter*SQR(1.0+redshift)*(1.0+redshift)/omega_denom;
+		growth_k0 = z_equality/(1.0+redshift)*2.5*omega_matter_z/
+	    (pow(omega_matter_z,4.0/7.0)-omega_lambda_z+
+		 (1.0+omega_matter_z/2.0)*(1.0+omega_lambda_z/70.0));
+		growth_to_z0 = z_equality*2.5*omega_matter/(pow(omega_matter,4.0/7.0)
+													-omega_lambda + (1.0+omega_matter/2.0)*(1.0+omega_lambda/70.0));
+		growth_to_z0 = growth_k0/growth_to_z0;	
+		
+		/* Compute small-scale suppression */
+		alpha_nu = f_cdm/f_cb*(5.0-2.*(p_c+p_cb))/(5.-4.*p_cb)*
+		pow(1+y_drag,p_cb-p_c)*
+		(1+f_bnu*(-0.553+0.126*f_bnu*f_bnu))/
+		(1-0.193*sqrt(f_hdm*num_degen_hdm)+0.169*f_hdm*pow(num_degen_hdm,0.2))*
+		(1+(p_c-p_cb)/2*(1+1/(3.-4.*p_c)/(7.-4.*p_cb))/(1+y_drag));
+		alpha_gamma = sqrt(alpha_nu);
+		beta_c = 1/(1-0.949*f_bnu);
+		/* Done setting scalar variables */
+		hhubble = hubble;	/* Need to pass Hubble constant to TFmdm_onek_hmpc() */
+		return qwarn;
+	}
+	
+	/* ---------------------------- TFmdm_onek_mpc() ---------------------- */
+	
+	float TFmdm_onek_mpc(float kk)
+	/* Given a wavenumber in Mpc^-1, return the transfer function for the
+	 cosmology held in the global variables. */
+	/* Input: kk -- Wavenumber in Mpc^-1 */
+	/* Output: The following are set as global variables:
+	 growth_cb -- the transfer function for density-weighted
+	 CDM + Baryon perturbations. 
+	 growth_cbnu -- the transfer function for density-weighted
+	 CDM + Baryon + Massive Neutrino perturbations. */
+	/* The function returns growth_cb */
+	{
+		float tf_sup_L, tf_sup_C;
+		float temp1, temp2;
+		
+		qq = kk/omhh*SQR(theta_cmb);
+		
+		/* Compute the scale-dependent growth functions */
+		y_freestream = 17.2*f_hdm*(1+0.488*pow(f_hdm,-7.0/6.0))*
+		SQR(num_degen_hdm*qq/f_hdm);
+		temp1 = pow(growth_k0, 1.0-p_cb);
+		temp2 = pow(growth_k0/(1+y_freestream),0.7);
+		growth_cb = pow(1.0+temp2, p_cb/0.7)*temp1;
+		growth_cbnu = pow(pow(f_cb,0.7/p_cb)+temp2, p_cb/0.7)*temp1;
+		
+		/* Compute the master function */
+		gamma_eff =omhh*(alpha_gamma+(1-alpha_gamma)/
+						 (1+SQR(SQR(kk*sound_horizon_fit*0.43))));
+		qq_eff = qq*omhh/gamma_eff;
+		
+		tf_sup_L = log(2.71828+1.84*beta_c*alpha_gamma*qq_eff);
+		tf_sup_C = 14.4+325/(1+60.5*pow(qq_eff,1.11));
+		tf_sup = tf_sup_L/(tf_sup_L+tf_sup_C*SQR(qq_eff));
+		
+		qq_nu = 3.92*qq*sqrt(num_degen_hdm/f_hdm);
+		max_fs_correction = 1+1.2*pow(f_hdm,0.64)*pow(num_degen_hdm,0.3+0.6*f_hdm)/
+		(pow(qq_nu,-1.6)+pow(qq_nu,0.8));
+		tf_master = tf_sup*max_fs_correction;
+		
+		/* Now compute the CDM+HDM+baryon transfer functions */
+		tf_cb = tf_master*growth_cb/growth_k0;
+		tf_cbnu = tf_master*growth_cbnu/growth_k0;
+		return tf_cb;
+	}
+	
+	/* ---------------------------- TFmdm_onek_hmpc() ---------------------- */
+	
+	float TFmdm_onek_hmpc(float kk)
+	/* Given a wavenumber in h Mpc^-1, return the transfer function for the
+	 cosmology held in the global variables. */
+	/* Input: kk -- Wavenumber in h Mpc^-1 */
+	/* Output: The following are set as global variables:
+	 growth_cb -- the transfer function for density-weighted
+	 CDM + Baryon perturbations. 
+	 growth_cbnu -- the transfer function for density-weighted
+	 CDM + Baryon + Massive Neutrino perturbations. */
+	/* The function returns growth_cb */
+	{
+		return TFmdm_onek_mpc(kk*hhubble);
+	}
+	
+	
+	
+	TransferFunction_EisensteinNeutrino( Cosmology aCosm, double Omega_HDM, int degen_HDM, double Tcmb = 2.726 )
+    :  TransferFunction(aCosm)//, m_h0( aCosm.H0*0.01 )
+	{
+		TFmdm_set_cosm( aCosm.Omega_m, aCosm.Omega_b, Omega_HDM, degen_HDM, aCosm.Omega_L, aCosm.H0, aCosm.astart );
+		
+	}
+	
+	//! Computes the transfer function for k in Mpc/h by calling TFfit_onek
+	virtual inline double compute( double k ){
+		return TFmdm_onek_hmpc( k );
+		
+		
+		/*double tfb, tfcdm, fb, fc; //, tfull
+		TFfit_onek( k*m_h0, &tfb, &tfcdm );
+		
+		fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m);
+		fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ;
+		
+		return fb*tfb+fc*tfcdm;*/
+		//return 1.0;
+	}
+	
+	
+};
+
+#endif
diff --git a/config_file.hh b/config_file.hh
new file mode 100644
index 0000000..69dad12
--- /dev/null
+++ b/config_file.hh
@@ -0,0 +1,327 @@
+/*
+ 
+ config_file.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#ifndef __CONFIG_FILE_HH
+#define __CONFIG_FILE_HH
+
+#include <string>
+#include <sstream>
+#include <map>
+#include <fstream>
+#include <iostream>
+#include <iomanip>
+#include <stdexcept>
+#include <typeinfo>
+
+/*!
+ * @class config_file
+ * @brief provides read/write access to configuration options
+ *
+ * This class provides access to the configuration file. The 
+ * configuration is stored in hash-pairs and can be queried and
+ * validated by the responsible class/routine
+ */
+class config_file {
+  
+  //! current line number
+  unsigned m_iLine;
+
+  //! hash table for key/value pairs, stored as strings
+  std::map<std::string, std::string> m_Items;
+
+public:
+	
+  //! removes all white space from string source
+  /*!
+   * @param source the string to be trimmed
+   * @param delims a string of delimiting characters
+   * @return trimmed string
+   */
+  std::string trim(std::string const& source, char const* delims = " \t\r\n") const{
+    std::string result(source);
+    //... skip initial whitespace ...
+    std::string::size_type index = result.find_last_not_of(delims);
+    if(index != std::string::npos)
+      result.erase(++index);
+    //... find beginning of trailing whitespace ...
+    index = result.find_first_not_of(delims);
+    //... remove trailing whitespace ...
+    if(index != std::string::npos)
+      result.erase(0, index);
+    else
+      result.erase();
+    return result;
+  }
+  
+  //! converts between different variable types
+  /*!
+   *  The main purpose of this function is to parse and convert
+   *  a string argument into numbers, booleans, etc...
+   * @param ival the input value (typically a std::string)
+   * @param oval the interpreted/converted value
+   */
+  template <class in_value, class out_value>
+  void convert( const in_value & ival, out_value & oval) const
+  {
+		std::stringstream ss;
+		ss << ival; //.. insert value into stream
+		ss >> oval; //.. retrieve value from stream
+
+		if (! ss.eof()) {
+		  //.. conversion error
+			std::cerr << "Error: conversion of \'" << ival << "\' failed." << std::endl;
+		  throw ErrInvalidConversion(std::string("invalid conversion to ")+typeid(out_value).name()+'.');
+		}
+  }
+
+  //! constructor of class config_file
+  /*! @param FileName the path/name of the configuration file to be parsed
+   */
+  config_file( std::string const& FileName )
+	: m_iLine(0), m_Items()	
+  {
+    std::ifstream file(FileName.c_str());
+	  
+	if( !file.is_open() )
+		throw std::runtime_error(std::string("Error: Could not open config file \'")+FileName+std::string("\'"));
+
+    std::string line;
+    std::string name;
+    std::string value;
+    std::string inSection;
+    int posEqual;
+    m_iLine=0;
+    //.. walk through all lines ..
+    while (std::getline(file,line)) {
+      ++m_iLine;
+      //.. encounterd EOL ?
+      if (! line.length()) continue;
+      
+      //.. encountered comment ?
+      unsigned long idx;
+      if( (idx=line.find_first_of("#;%")) != std::string::npos )
+        line.erase(idx);
+
+      //.. encountered section tag ?
+      if (line[0] == '[') {
+        inSection=trim(line.substr(1,line.find(']')-1));
+        continue;
+      }
+
+      //.. seek end of entry name ..
+      posEqual=line.find('=');
+      name  = trim(line.substr(0,posEqual));
+      value = trim(line.substr(posEqual+1));
+      
+      //.. add key/value pair to hash table ..
+      m_Items[inSection+'/'+name] = value;
+    }
+  }
+	
+	//! inserts a key/value pair in the hash map
+	/*! @param key the key value, usually "section/key"
+	 *  @param value the value of the key, also a string
+	 */
+	void insertValue( std::string const& key, std::string const& value )
+	{
+		m_Items[key] = value;
+	}
+	
+	//! inserts a key/value pair in the hash map
+	/*! @param section section name. values are stored under "section/key"
+	 *  @param key the key value usually "section/key"
+	 *  @param value the value of the key, also a string
+	 */
+	void insertValue( std::string const& section, std::string const& key, std::string const& value )
+	{
+		m_Items[section+'/'+key] = value;
+	}
+	
+	//! checks if a key is part of the hash map
+	/*! @param section the section name of the key
+	 *  @param key the key name to be checked
+	 *  @return true if the key is present, false otherwise
+	 */
+	bool containsKey( std::string const& section, std::string const& key )
+	{
+		std::map<std::string,std::string>::const_iterator i = m_Items.find(section+'/'+key);
+		if ( i == m_Items.end() ) 
+			return false;
+		return true;
+	}
+	
+	//! checks if a key is part of the hash map
+	/*! @param key the key name to be checked
+	 *  @return true if the key is present, false otherwise
+	 */
+	bool containsKey( std::string const& key )
+	{
+		std::map<std::string,std::string>::const_iterator i = m_Items.find(key);
+		if ( i == m_Items.end() ) 
+			return false;
+		return true;
+	}
+	
+	
+	//! return value of a key
+	/*! returns the value of a given key, throws a ErrItemNotFound
+	*  exception if the key is not available in the hash map.
+	*  @param key the key name
+	*  @return the value of the key
+	*  @sa ErrItemNotFound
+	*/
+	template<class T> T getValue( std::string const& key ) const{
+		return getValue<T>( "", key );
+	}
+
+	//! return value of a key
+	/*! returns the value of a given key, throws a ErrItemNotFound
+	 *  exception if the key is not available in the hash map.
+	 *  @param section the section name for the key
+	 *  @param key the key name
+	 *  @return the value of the key
+	 *  @sa ErrItemNotFound
+	 */
+	template<class T> T getValue( std::string const& section, std::string const& key ) const
+	{
+		T r;
+		std::map<std::string,std::string>::const_iterator i = m_Items.find(section + '/' + key);
+		if ( i == m_Items.end() ) 
+		  throw ErrItemNotFound('\'' + section + '/' + key + std::string("\' not found."));
+		  
+		convert(i->second,r);
+		return r;
+	}
+
+	//! exception safe version of getValue
+	/*! returns the value of a given key, returns a default value rather
+	 *  than a ErrItemNotFound exception if the key is not found.
+	 *  @param section the section name for the key
+	 *  @param key the key name
+	 *  @param default_value the value that is returned if the key is not found
+	 *  @return the key value (if key found) otherwise default_value
+	 */
+	template<class T> T getValueSafe( std::string const& section, std::string const& key, T default_value ) const
+	{
+		T r;
+		try{
+		  r = getValue<T>( section, key );
+		} catch( ErrItemNotFound ) {
+		  r = default_value;
+		}
+		return r;
+	}
+
+	
+	//! exception safe version of getValue
+	/*! returns the value of a given key, returns a default value rather
+	 *  than a ErrItemNotFound exception if the key is not found.
+	 *  @param key the key name
+	 *  @param default_value the value that is returned if the key is not found
+	 *  @return the key value (if key found) otherwise default_value
+	 */
+	template<class T> T getValueSafe( std::string const& key, T default_value ) const
+	{
+		return getValueSafe( "", key, default_value );
+	}
+
+	
+	//! dumps all key-value pairs to a std::ostream
+	void dump( std::ostream& out )
+	{
+		std::map<std::string,std::string>::const_iterator i = m_Items.begin();
+		while( i!=m_Items.end() )
+		{
+			if( i->second.length() > 0 )
+				out << std::setw(24) << std::left << i->first << "  =  " << i->second  << std::endl;
+			++i;
+		}
+	}
+
+  //--- EXCEPTIONS ---
+
+	//! runtime error that is thrown if key is not found in getValue
+	class ErrItemNotFound : public std::runtime_error{
+	public:
+		ErrItemNotFound( std::string itemname )
+		  : std::runtime_error( itemname.c_str() )
+		{}
+	};
+
+	//! runtime error that is thrown if type conversion fails
+	class ErrInvalidConversion : public std::runtime_error{
+	public:
+		ErrInvalidConversion( std::string errmsg )
+		  : std::runtime_error( errmsg )
+		{}
+	};
+
+	
+	class ErrIllegalIdentifier : public std::runtime_error{
+	public:
+		ErrIllegalIdentifier( std::string errmsg )
+		  : std::runtime_error( errmsg )
+		{}
+	};
+  
+};
+
+//==== below are template specialisations =======================================//
+
+//... Function: getValue( strSection, strEntry ) ...
+//... Descript: specialization of getValue for type boolean to interpret strings ...
+//...           like "true" and "false" etc.
+//...           converts the string to type bool, returns type bool ...
+template<> 
+inline bool config_file::getValue<bool>( std::string const& strSection, std::string const& strEntry ) const{
+  std::string r1 = getValue<std::string>( strSection, strEntry );
+    if( r1=="true" || r1=="yes" || r1=="on" || r1=="1" )
+      return true;
+    if( r1=="false" || r1=="no" || r1=="off" || r1=="0" )
+      return false;
+    throw ErrIllegalIdentifier(std::string("Illegal identifier \'")+r1+std::string("\' in \'")+strEntry+std::string("\'."));
+    //return false;
+  }
+
+template<>
+inline bool config_file::getValueSafe<bool>( std::string const& strSection, std::string const& strEntry, bool defaultValue ) const{
+  std::string r1;
+  try{
+    r1 = getValue<std::string>( strSection, strEntry );
+    if( r1=="true" || r1=="yes" || r1=="on" || r1=="1" )
+      return true;
+    if( r1=="false" || r1=="no" || r1=="off" || r1=="0" )
+      return false;
+  } catch( ErrItemNotFound ) {
+    return defaultValue;
+  }
+  return defaultValue;
+}
+
+template<>
+inline void config_file::convert<std::string,std::string>( const std::string & ival, std::string & oval) const
+{
+	oval = ival;
+}
+
+#endif //__CONFIG_FILE_HH
diff --git a/convolution_kernel.cc b/convolution_kernel.cc
new file mode 100644
index 0000000..b96ddd1
--- /dev/null
+++ b/convolution_kernel.cc
@@ -0,0 +1,583 @@
+/*
+ 
+ convolution_kernel.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifdef SINGLE_PRECISION
+	#ifdef SINGLETHREAD_FFTW
+		#include <srfftw.h>
+	#else
+		#include <srfftw_threads.h>
+	#endif
+#else
+	#ifdef SINGLETHREAD_FFTW
+		#include <drfftw.h>
+	#else
+		#include <drfftw_threads.h>
+	#endif
+#endif
+
+
+#include "densities.hh"
+#include "convolution_kernel.hh"
+
+
+
+namespace convolution{
+
+	std::map< std::string, kernel_creator *>& 
+	get_kernel_map()
+	{
+		static std::map< std::string, kernel_creator* > kernel_map;
+		return kernel_map;
+	}
+	
+	template< typename real_t >
+	void perform( kernel * pk, void *pd )
+	{
+		parameters cparam_ = pk->cparam_;
+		double fftnorm = pow(2.0*M_PI,1.5)/sqrt(cparam_.lx*cparam_.ly*cparam_.lz)/sqrt((double)(cparam_.nx*cparam_.ny*cparam_.nz));
+		
+		fftw_complex	*cdata,*ckernel;
+		fftw_real		*data;
+		
+		data		= reinterpret_cast<fftw_real*>(pd);
+		cdata		= reinterpret_cast<fftw_complex*>(data);
+		ckernel		= reinterpret_cast<fftw_complex*>( pk->get_ptr() );
+		
+		rfftwnd_plan	iplan, plan;
+		
+		plan  = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+									FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		
+		iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+									FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		
+		
+		#ifndef SINGLETHREAD_FFTW		
+		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL );
+		#else
+		rfftwnd_one_real_to_complex( plan, data, NULL );
+		#endif
+		
+		#pragma omp parallel for
+		for( int i=0; i<cparam_.nx; ++i )
+			for( int j=0; j<cparam_.ny; ++j )
+				for( int k=0; k<cparam_.nz/2+1; ++k )
+				{
+					unsigned ii = (i*cparam_.ny + j) * (cparam_.nz/2+1) + k;
+					
+					complex ccdata(cdata[ii].re,cdata[ii].im), cckernel(ckernel[ii].re,ckernel[ii].im);
+					ccdata = ccdata * cckernel *fftnorm;
+					
+					cdata[ii].re = ccdata.real();
+					cdata[ii].im = ccdata.imag();
+				}
+		
+		#ifndef SINGLETHREAD_FFTW		
+		rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL);
+		#else		
+		rfftwnd_one_complex_to_real(iplan, cdata, NULL);
+		#endif
+		
+		rfftwnd_destroy_plan(plan);
+		rfftwnd_destroy_plan(iplan);
+	}
+	
+	
+	template< typename real_t >
+	void perform_filtered( kernel * pk, void *pd )
+	{
+		parameters cparam_ = pk->cparam_;
+		double 
+			kny, kmax,
+			fftnorm = pow(2.0*M_PI,1.5)/sqrt(cparam_.lx*cparam_.ly*cparam_.lz)/sqrt((double)(cparam_.nx*cparam_.ny*cparam_.nz));
+		
+		fftw_complex	*cdata,*ckernel;
+		fftw_real		*data;
+		
+		kny			= cparam_.nx*M_PI/cparam_.lx;
+		kmax        = M_PI/2.0;
+		
+		data		= reinterpret_cast<fftw_real*>(pd);
+		cdata		= reinterpret_cast<fftw_complex*>(data);
+		ckernel		= reinterpret_cast<fftw_complex*>( pk->get_ptr() );
+		
+		rfftwnd_plan	iplan, plan;
+		
+		plan  = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+									FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		
+		iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+									FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		
+		
+		#ifndef SINGLETHREAD_FFTW		
+		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL );
+		#else
+		rfftwnd_one_real_to_complex( plan, data, NULL );
+		#endif
+		
+		//double kmax2 = kmax*kmax;
+		
+		#pragma omp parallel for
+		for( int i=0; i<cparam_.nx; ++i )
+			for( int j=0; j<cparam_.ny; ++j )
+				for( int k=0; k<cparam_.nz/2+1; ++k )
+				{
+					unsigned ii = (i*cparam_.ny + j) * (cparam_.nz/2+1) + k;
+					
+					complex ccdata(cdata[ii].re,cdata[ii].im), cckernel(ckernel[ii].re,ckernel[ii].im);
+					
+					int ik(i), jk(j);
+					
+					if( ik > cparam_.nx/2 )
+						ik -= cparam_.nx;
+					if( jk > cparam_.ny/2 )
+						jk -= cparam_.ny;
+					
+					double 
+						kx( M_PI*(double)ik/cparam_.nx ),
+						ky( M_PI*(double)jk/cparam_.ny ),
+					kz( M_PI*(double)k/cparam_.nz );//,
+													//						kk( kx*kx+ky*ky+kz*kz );
+					
+					//... cos(k) is the Hanning filter function (cf. Bertschinger 2001)
+					double filter = 0.0;
+					if( true ){//kk <= kmax2 ){
+						//filter = cos( kx )*cos( ky )*cos( kz );
+						filter = cos( 0.5*kx )*cos( 0.5*ky )*cos( 0.5*kz );
+							   //filter = 1.0;
+						//filter *=filter;
+					}
+					//filter = 1.0;
+					
+					ccdata = ccdata * cckernel *fftnorm * filter;
+					
+					cdata[ii].re = ccdata.real();
+					cdata[ii].im = ccdata.imag();
+				}
+		
+		#ifndef SINGLETHREAD_FFTW		
+		rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL);
+		#else		
+		rfftwnd_one_complex_to_real(iplan, cdata, NULL);
+		#endif
+		
+		rfftwnd_destroy_plan(plan);
+		rfftwnd_destroy_plan(iplan);
+	}
+	
+	
+	template void perform<double>( kernel* pk, void *pd );
+	template void perform<float>( kernel* pk, void *pd );
+	template void perform_filtered<double>( kernel* pk, void *pd );
+	template void perform_filtered<float>( kernel* pk, void *pd );
+	
+	
+	void truncate( kernel* pk, double R, double alpha )
+	{
+		parameters cparam_ = pk->cparam_;
+
+		double 
+			dx		= cparam_.lx/cparam_.nx, 
+			dy		= cparam_.ly/cparam_.ny, 
+			dz		= cparam_.lz/cparam_.nz;
+		
+		double fftnorm = 1.0/(cparam_.nx * cparam_.ny * cparam_.nz);
+		
+		rfftwnd_plan	iplan, plan;
+		
+		plan  = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+									FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		
+		iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+									FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		
+		fftw_real		*rkernel	= reinterpret_cast<fftw_real*>( pk->get_ptr() );
+		fftw_complex	*ckernel	= reinterpret_cast<fftw_complex*>( pk->get_ptr() );
+		
+		#ifndef SINGLETHREAD_FFTW		
+		rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, ckernel, NULL);
+		#else		
+		rfftwnd_one_complex_to_real(iplan, ckernel, NULL);
+		#endif
+		
+		
+		#pragma omp parallel for
+		for( int i=0; i<cparam_.nx; ++i )
+			for( int j=0; j<cparam_.ny; ++j )
+				for( int k=0; k<cparam_.nz; ++k )
+				{
+					int iix(i), iiy(j), iiz(k);
+					double rr[3], rr2;
+					
+					if( iix > (int)cparam_.nx/2 ) iix -= cparam_.nx;
+					if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny;
+					if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz;
+					
+					rr[0] = (double)iix * dx;
+					rr[1] = (double)iiy * dy;
+					rr[2] = (double)iiz * dz;
+					
+					rr2 = rr[0]*rr[0] + rr[1]*rr[1] + rr[2]*rr[2];
+					
+					unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
+					rkernel[idx] *= 0.5*(erfc((sqrt(rr2)-R)*alpha))*fftnorm;
+				}
+		
+		
+		
+		#ifndef SINGLETHREAD_FFTW		
+		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL );
+		#else
+		rfftwnd_one_real_to_complex( plan, rkernel, NULL );
+		#endif
+		
+	}
+	
+	void truncate_sharp( kernel* pk, double R )
+	{
+		parameters cparam_ = pk->cparam_;
+		
+		double 
+		dx		= cparam_.lx/cparam_.nx, 
+		dy		= cparam_.ly/cparam_.ny, 
+		dz		= cparam_.lz/cparam_.nz,
+		R2		= R*R;
+		
+		double fftnorm = 1.0/(cparam_.nx * cparam_.ny * cparam_.nz);
+		
+		rfftwnd_plan	iplan, plan;
+		
+		plan  = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+									FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		
+		iplan = rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+									FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		
+		fftw_real		*rkernel	= reinterpret_cast<fftw_real*>( pk->get_ptr() );
+		fftw_complex	*ckernel	= reinterpret_cast<fftw_complex*>( pk->get_ptr() );
+		
+#ifndef SINGLETHREAD_FFTW		
+		rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, ckernel, NULL);
+#else		
+		rfftwnd_one_complex_to_real(iplan, ckernel, NULL);
+#endif
+		
+		
+#pragma omp parallel for
+		for( int i=0; i<cparam_.nx; ++i )
+			for( int j=0; j<cparam_.ny; ++j )
+				for( int k=0; k<cparam_.nz; ++k )
+				{
+					int iix(i), iiy(j), iiz(k);
+					double rr[3], rr2;
+					
+					if( iix > (int)cparam_.nx/2 ) iix -= cparam_.nx;
+					if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny;
+					if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz;
+					
+					rr[0] = (double)iix * dx;
+					rr[1] = (double)iiy * dy;
+					rr[2] = (double)iiz * dz;
+					
+					rr2 = rr[0]*rr[0] + rr[1]*rr[1] + rr[2]*rr[2];
+					unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
+					if( rr2 > R2 )
+					{
+						
+						rkernel[idx] = 0.0;
+					}else {
+						rkernel[idx] *= fftnorm;
+					}
+
+				}
+		
+		
+		
+#ifndef SINGLETHREAD_FFTW		
+		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL );
+#else
+		rfftwnd_one_real_to_complex( plan, rkernel, NULL );
+#endif
+		
+	}
+
+
+	/*****************************************************************************************\
+	 ***    SPECIFIC KERNEL IMPLEMENTATIONS      *********************************************
+	\*****************************************************************************************/	
+	
+	template< typename real_t >
+	class kernel_real : public kernel
+	{
+	protected:
+		std::vector<real_t> kdata_;
+
+		void compute_kernel( void );
+		
+	public:
+		kernel_real( const parameters& cp )
+		: kernel( cp )
+		{
+			kdata_.assign( cparam_.nx*cparam_.ny*2*(cparam_.nz/2+1), 0.0 );
+			compute_kernel();
+		}
+		
+		void *get_ptr()
+		{	return reinterpret_cast<void*> (&kdata_[0]);	}
+		
+		~kernel_real()
+		{	std::vector<real_t>().swap( kdata_ );	}
+		
+	};
+
+
+	
+	
+	
+	template< typename real_t >
+	void kernel_real<real_t>::compute_kernel( void )
+	{
+		double 
+			kny			= cparam_.nx*M_PI/cparam_.lx,
+			fac			= cparam_.lx*cparam_.ly*cparam_.lz/pow(2.0*M_PI,3)/(cparam_.nx*cparam_.ny*cparam_.nz),
+			dx			= cparam_.lx/cparam_.nx, 
+			dy			= cparam_.ly/cparam_.ny, 
+			dz			= cparam_.lz/cparam_.nz,
+			boxlength	= cparam_.pcf->getValue<double>("setup","boxlength"),
+			nspec		= cparam_.pcf->getValue<double>("cosmology","nspec"),//cosmo.nspect,
+			pnorm		= cparam_.pcf->getValue<double>("cosmology","pnorm"),//cosmo.pnorm,
+		dplus		= cparam_.pcf->getValue<double>("cosmology","dplus");//,//cosmo.dplus;
+																		 //			t00f        = cparam_.t0scale;
+		unsigned 
+		levelmax	= cparam_.pcf->getValue<unsigned>("setup","levelmax");
+		
+		bool
+		bperiodic	= cparam_.pcf->getValueSafe<bool>("setup","periodic_TF",true);
+		
+		//if( cparam_.normalize )
+		//	kny *= 2.0;
+		
+		TransferFunction_real *tfr = new TransferFunction_real(cparam_.ptf,nspec,pnorm,dplus,0.25*cparam_.lx/(double)cparam_.nx,2.0*boxlength,kny, (int)pow(2,levelmax+2));
+		
+		if( bperiodic )
+		{
+			
+			#pragma omp parallel for
+			for( int i=0; i<cparam_.nx; ++i )
+				for( int j=0; j<cparam_.ny; ++j )
+					for( int k=0; k<cparam_.nz; ++k )
+					{
+						int iix(i), iiy(j), iiz(k);
+						double rr[3], rr2;
+						
+						if( iix > (int)cparam_.nx/2 ) iix -= cparam_.nx;
+						if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny;
+						if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz;
+						
+						unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
+						
+						for( int ii=-1; ii<=1; ++ii )
+							for( int jj=-1; jj<=1; ++jj )
+								for( int kk=-1; kk<=1; ++kk )
+								{
+									rr[0] = ((double)iix ) * dx + ii*boxlength;
+									rr[1] = ((double)iiy ) * dy + jj*boxlength;
+									rr[2] = ((double)iiz ) * dz + kk*boxlength;
+									
+									if( fabs(rr[0]) < boxlength
+									 && fabs(rr[1]) < boxlength
+									 && fabs(rr[2]) < boxlength )
+									{
+										rr2 = rr[0]*rr[0]+rr[1]*rr[1]+rr[2]*rr[2];
+										kdata_[idx] += tfr->compute_real(rr2);
+									}
+								}
+						
+						kdata_[idx] *= fac;
+						
+					}
+		}else{
+			#pragma omp parallel for
+			for( int i=0; i<cparam_.nx; ++i )
+				for( int j=0; j<cparam_.ny; ++j )
+					for( int k=0; k<cparam_.nz; ++k )
+					{
+						int iix(i), iiy(j), iiz(k);
+						double rr[3], rr2;
+						
+						if( iix > (int)cparam_.nx/2 ) iix -= cparam_.nx;
+						if( iiy > (int)cparam_.ny/2 ) iiy -= cparam_.ny;
+						if( iiz > (int)cparam_.nz/2 ) iiz -= cparam_.nz;
+						
+						unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
+						
+						rr[0] = ((double)iix ) * dx;
+						rr[1] = ((double)iiy ) * dy;
+						rr[2] = ((double)iiz ) * dz;
+						
+						rr2 = rr[0]*rr[0]+rr[1]*rr[1]+rr[2]*rr[2];
+						kdata_[idx] = tfr->compute_real(rr2)*fac;
+						
+						
+					}
+		}		
+		//kdata_[0] = tfr->compute_real(dx*dx*0.25)*fac;
+		
+		//std::cerr << "T(r=0) = " << kdata_[0]/fac << std::endl;
+		//if( cparam_.normalize )
+		//	kdata_[0] *= 0.125;//= 0.0;//*= 0.125;
+		
+		//... determine normalization
+		double sum = 0.0;
+		#pragma omp parallel for
+		for( int i=0; i<cparam_.nx; ++i )
+			for( int j=0; j<cparam_.ny; ++j )
+				for( int k=0; k<cparam_.nz; ++k )
+				{	
+					unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
+					//if( idx > 0 )
+						sum += kdata_[idx];
+				}
+		
+		
+		//std::cerr << " - The convolution kernel has avg of " << (sum+kdata_[0])/(cparam_.nx*cparam_.ny*cparam_.nz) << std::endl;
+		
+		sum /= cparam_.nx*cparam_.ny*cparam_.nz;//-1;
+		
+		if( cparam_.normalize )
+		{
+			
+			/*for( int i=0; i<cparam_.nx; ++i )
+				for( int j=0; j<cparam_.ny; ++j )
+					for( int k=0; k<cparam_.nz; ++k )
+					{
+						unsigned idx = (i*cparam_.ny + j) * 2*(cparam_.nz/2+1) + k;
+						//if( idx > 0 )
+							kdata_[idx] -= sum;
+					}*/
+		}
+		
+		
+		//if( t00f < 0.0 )
+		//	kdata_[0] += (tfr->compute_real(0.125*dx*dx) - tfr->compute_real(0.0))*fac;
+		
+		fftw_real		*rkernel	= reinterpret_cast<fftw_real*>( &kdata_[0] );
+		rfftwnd_plan	plan		= rfftw3d_create_plan( cparam_.nx, cparam_.ny, cparam_.nz,
+												FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE);
+		
+		#ifndef SINGLETHREAD_FFTW		
+		rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, rkernel, NULL );
+		#else
+		rfftwnd_one_real_to_complex( plan, rkernel, NULL );
+		#endif
+	
+		rfftwnd_destroy_plan(plan);
+		
+		delete tfr;
+	}
+	
+
+
+
+	template< typename real_t >
+	class kernel_k : public kernel
+	{
+	protected:
+		std::vector<real_t> kdata_;
+		
+		void compute_kernel( void );
+		
+	public:
+		kernel_k( const parameters& cp )
+		: kernel( cp )
+		{
+			kdata_.assign( cparam_.nx*cparam_.ny*2*(cparam_.nz/2+1), 0.0 );
+			compute_kernel();
+		}
+		
+		void *get_ptr()
+		{	return reinterpret_cast<void*> (&kdata_[0]);	}
+		
+		~kernel_k()
+		{	std::vector<real_t>().swap( kdata_ );	}
+		
+	};
+	
+	template< typename real_t >
+	void kernel_k<real_t>::compute_kernel( void )
+	{
+		double 
+		//kny			= cparam_.nx*M_PI/cparam_.lx,
+		fac			= cparam_.lx*cparam_.ly*cparam_.lz/pow(2.0*M_PI,3),// /(cparam_.nx*cparam_.ny*cparam_.nz),
+																	   //	dx			= cparam_.lx/cparam_.nx, 
+																	   //			dy			= cparam_.ly/cparam_.ny, 
+																	   //			dz			= cparam_.lz/cparam_.nz,
+			boxlength	= cparam_.pcf->getValue<double>("setup","boxlength"),
+			nspec		= cparam_.pcf->getValue<double>("cosmology","nspec"),
+			pnorm		= cparam_.pcf->getValue<double>("cosmology","pnorm"),
+			dplus		= cparam_.pcf->getValue<double>("cosmology","dplus");
+		
+		TransferFunction_k *tfk = new TransferFunction_k(cparam_.ptf,nspec,pnorm,dplus);
+		
+		fftw_complex *kdata = reinterpret_cast<fftw_complex*> ( this->get_ptr() );
+		
+		unsigned nx = cparam_.nx, ny = cparam_.ny, nz = cparam_.nz, nzp = (nz/2+1);
+		fac =1.0;//*= 1.0/sqrt(nx*ny*nz);
+		
+		double kfac = 2.0*M_PI/boxlength;
+		unsigned q=0;
+		for( int i=0; i<cparam_.nx; ++i )
+			for( int j=0; j<cparam_.ny; ++j )
+				for( int k=0; k<cparam_.nz/2+1; ++k )
+				{
+					double kx,ky,kz;
+					
+					kx = (double)i;
+					ky = (double)j;
+					kz = (double)k;
+					
+					if( kx > nx/2 ) kx -= nx;
+					if( ky > ny/2 ) ky -= ny;
+					
+					q = (i*ny+j)*nzp+k;
+					kdata[q].re = fac*tfk->compute(kfac*sqrt(kx*kx+ky*ky+kz*kz));
+					kdata[q].im = 0.0;
+					
+				}
+		
+		
+		delete tfk;
+	}
+};
+	
+namespace{
+	convolution::kernel_creator_concrete< convolution::kernel_real<double> > creator_d("tf_kernel_real_double");
+	convolution::kernel_creator_concrete< convolution::kernel_real<float> > creator_f("tf_kernel_real_float");
+	convolution::kernel_creator_concrete< convolution::kernel_k<double> > creator_kd("tf_kernel_k_double");
+	convolution::kernel_creator_concrete< convolution::kernel_k<float> > creator_kf("tf_kernel_k_float");
+
+}
+
+
+
diff --git a/convolution_kernel.hh b/convolution_kernel.hh
new file mode 100644
index 0000000..201f415
--- /dev/null
+++ b/convolution_kernel.hh
@@ -0,0 +1,116 @@
+/*
+ 
+ convolution_kernel.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifndef __CONVOLUTION_KERNELS_HH
+#define __CONVOLUTION_KERNELS_HH
+
+#include <string>
+#include <map>
+
+#include "config_file.hh"
+#include "densities.hh"
+#include "transfer_function.hh"
+
+namespace convolution{
+
+	//! encapsulates all parameters required for transfer function convolution
+	struct parameters
+	{
+		int nx,ny,nz;
+		double lx,ly,lz;//,boxlength;
+		config_file *pcf;
+		transfer_function* ptf;
+		bool normalize;
+	};
+
+	
+	//! abstract base class for a transfer function convolution kernel
+	class kernel{
+	public:
+		
+		//! all parameters (physical/numerical)
+		parameters cparam_;
+		
+		//! constructor
+		kernel( const parameters& cp )
+		: cparam_( cp )
+		{	}
+		
+		//! virtual destructor
+		virtual ~kernel(){ };
+		
+		//! purely virtual method to obtain a pointer to the underlying data
+		virtual void* get_ptr() = 0;	
+	};
+
+	
+	//! abstract factory class to create convolution kernels
+	struct kernel_creator
+	{
+		//! creates a convolution kernel object
+		virtual kernel * create( const parameters& cp ) const = 0;
+		
+		//! destructor
+		virtual ~kernel_creator() { }
+	};
+	
+	
+	//! access map to the various kernel classes through the factory
+	std::map< std::string, kernel_creator *>& get_kernel_map();
+	
+	
+	//! actual implementation of the factory class for kernel objects
+	template< class Derived >
+	struct kernel_creator_concrete : public kernel_creator
+	{
+		//! constructor inserts the kernel class in the map
+		kernel_creator_concrete( const std::string& kernel_name )
+		{	get_kernel_map()[ kernel_name ] = this; 	}
+		
+		//! creates an instance of the kernel object
+		kernel * create( const parameters& cp ) const
+		{	return new Derived( cp );	}
+	};
+
+	
+	//! actual implementation of the FFT convolution (independent of the actual kernel)
+	template< typename real_t >
+	void perform( kernel* pk, void *pd );
+
+	//! actual implementation of the FFT convolution (independent of the actual kernel)
+	//! with Hann (cf. Bertschinger 2001) window filter applied
+	template< typename real_t >
+	void perform_filtered( kernel* pk, void *pd );
+	
+	
+	//! truncate the kernel softly at R, used for boundary corrections
+	void truncate( kernel*pk, double R, double alpha );
+	
+	//! truncate the kernel sharply at R, used for boundary corrections
+	void truncate_sharp( kernel*pk, double R );
+	
+}; //namespace convolution
+
+
+	
+#endif //__CONVOLUTION_KERNELS_HH
diff --git a/cosmology.cc b/cosmology.cc
new file mode 100644
index 0000000..b40c9e1
--- /dev/null
+++ b/cosmology.cc
@@ -0,0 +1,222 @@
+/*
+ 
+ cosmology.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#include "cosmology.hh"
+#include "mesh.hh"
+#include "mg_operators.hh"
+
+#define ACC(i,j,k) ((*u.get_grid((ilevel)))((i),(j),(k)))
+#define SQR(x)	((x)*(x))
+
+
+void compute_LLA_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order )
+{
+	fnew = u;
+	
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel), h2 = h*h, h2_4 = 0.25*h2;
+		meshvar_bnd *pvar = fnew.get_grid(ilevel);
+		
+		
+		if( order == 2 )
+		{
+			#pragma omp parallel for //reduction(+:sum_corr,sum,sum2)
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2;
+						D[1][1] = (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2;
+						D[2][2] = (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2;
+											
+						D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
+						D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
+						D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
+						
+						(*pvar)(ix,iy,iz) = D[0][0]+D[1][1]+D[2][2] +
+							( D[0][0]*D[1][1] + D[0][0]*D[2][2] + D[1][1]*D[2][2] +
+							  D[0][1]*D[1][0] + D[0][2]*D[2][0] + D[1][2]*D[2][1] +
+							  D[0][0]*D[0][0] + D[1][1]*D[1][1] + D[2][2]*D[2][2] );
+						
+					}
+		}
+		else if ( order == 4 )
+		{
+			#pragma omp parallel for 
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (-ACC(ix-2,iy,iz)+16.*ACC(ix-1,iy,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix+1,iy,iz)-ACC(ix+2,iy,iz)) * h2/12.0;
+						D[1][1] = (-ACC(ix,iy-2,iz)+16.*ACC(ix,iy-1,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy+1,iz)-ACC(ix,iy+2,iz)) * h2/12.0;
+						D[2][2] = (-ACC(ix,iy,iz-2)+16.*ACC(ix,iy,iz-1)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy,iz+1)-ACC(ix,iy,iz+2)) * h2/12.0;
+						
+						D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
+						D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
+						D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
+						
+						(*pvar)(ix,iy,iz) = D[0][0]+D[1][1]+D[2][2] +
+						( D[0][0]*D[1][1] + D[0][0]*D[2][2] + D[1][1]*D[2][2] +
+						 D[0][1]*D[1][0] + D[0][2]*D[2][0] + D[1][2]*D[2][1] +
+						 D[0][0]*D[0][0] + D[1][1]*D[1][1] + D[2][2]*D[2][2] );
+						
+					}
+		}else
+			throw std::runtime_error("compute_LLA_density : invalid operator order specified");
+
+	}
+	
+}
+
+
+void compute_Lu_density( const grid_hierarchy& u, grid_hierarchy& fnew )
+{
+	fnew = u;
+	
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel), h2 = h*h;
+		meshvar_bnd *pvar = fnew.get_grid(ilevel);
+		
+		#pragma omp parallel for
+		for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+			for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+				for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+				{
+					double D[3][3];
+					
+					D[0][0] = 1.0 + (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2;
+					D[1][1] = 1.0 + (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2;
+					D[2][2] = 1.0 + (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2;
+					
+					(*pvar)(ix,iy,iz) = D[0][0]+D[1][1]+D[2][2] - 3.0;
+					
+				}
+	}
+	
+}
+
+
+void compute_2LPT_source( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order )
+{
+	fnew = u;
+	
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel), h2 = h*h, h2_4 = 0.25*h2;
+		meshvar_bnd *pvar = fnew.get_grid(ilevel);
+		
+		if ( order == 2 )
+		{
+			
+			#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2;
+						D[1][1] = (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2;
+						D[2][2] = (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2;
+						D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
+						D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
+						D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
+						
+						
+						(*pvar)(ix,iy,iz) =   D[0][0]*D[1][1] - SQR( D[0][1] )
+											+ D[0][0]*D[2][2] - SQR( D[0][2] )
+											+ D[1][1]*D[2][2] - SQR( D[1][2] );
+						
+					}
+		}
+		else if ( order == 4 )
+		{
+			#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					{
+						double D[3][3];
+						
+						D[0][0] = (-ACC(ix-2,iy,iz)+16.*ACC(ix-1,iy,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix+1,iy,iz)-ACC(ix+2,iy,iz)) * h2/12.0;
+						D[1][1] = (-ACC(ix,iy-2,iz)+16.*ACC(ix,iy-1,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy+1,iz)-ACC(ix,iy+2,iz)) * h2/12.0;
+						D[2][2] = (-ACC(ix,iy,iz-2)+16.*ACC(ix,iy,iz-1)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy,iz+1)-ACC(ix,iy,iz+2)) * h2/12.0;
+						D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
+						D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
+						D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
+						
+						
+						(*pvar)(ix,iy,iz) =   D[0][0]*D[1][1] - SQR( D[0][1] )
+						+ D[0][0]*D[2][2] - SQR( D[0][2] )
+						+ D[1][1]*D[2][2] - SQR( D[1][2] );
+						
+					}
+		}
+		else
+			throw std::runtime_error("compute_2LPT_source : invalid operator order specified");
+
+
+	}
+	
+	
+	//.. subtract global mean so the multi-grid poisson solver behaves well
+	
+	for( int i=fnew.levelmax(); i>(int)fnew.levelmin(); --i )
+		mg_straight().restrict( (*fnew.get_grid(i)), (*fnew.get_grid(i-1)) );
+	
+	double sum = 0.0;
+	int nx,ny,nz;
+	
+	nx = fnew.get_grid(fnew.levelmin())->size(0);
+	ny = fnew.get_grid(fnew.levelmin())->size(1);
+	nz = fnew.get_grid(fnew.levelmin())->size(2);
+	
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				sum += (*fnew.get_grid(fnew.levelmin()))(ix,iy,iz);
+	
+	sum /= (nx*ny*nz);
+	
+	for( unsigned i=fnew.levelmin(); i<=fnew.levelmax(); ++i )
+	{		
+		nx = fnew.get_grid(i)->size(0);
+		ny = fnew.get_grid(i)->size(1);
+		nz = fnew.get_grid(i)->size(2);
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					(*fnew.get_grid(i))(ix,iy,iz) -= sum;
+	}
+	
+}
+#undef SQR
+#undef ACC
+
diff --git a/cosmology.hh b/cosmology.hh
new file mode 100644
index 0000000..333735a
--- /dev/null
+++ b/cosmology.hh
@@ -0,0 +1,224 @@
+/*
+ 
+ cosmology.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#ifndef _COSMOLOGY_HH
+#define _COSMOLOGY_HH
+
+#include "mesh.hh"
+#include "general.hh"
+#include "transfer_function.hh"
+
+/*!
+ * @class CosmoCalc
+ * @brief provides functions to compute cosmological quantities
+ *
+ * This class provides member functions to compute cosmological quantities
+ * related to the Friedmann equations and linear perturbation theory
+ */
+class CosmoCalc
+{
+public:
+	//! data structure to store cosmological parameters
+	Cosmology m_Cosmology;
+	
+	//! pointer to an instance of a transfer function plugin
+	transfer_function_plugin *m_pTransferFunction;
+	
+	
+	//! constructor for a cosmology calculator object
+	/*!
+	 * @params acosmo a cosmological parameters structure
+	 * @params pTransferFunction pointer to an instance of a transfer function object
+	 */
+	 
+	CosmoCalc( const Cosmology acosmo, transfer_function_plugin *pTransferFunction )
+	{
+		m_Cosmology = acosmo;
+		m_pTransferFunction = pTransferFunction;
+	}
+	
+	//! returns the amplitude of amplitude of the power spectrum
+	/*!
+	 * @params k the wave number in h/Mpc
+	 * @params a the expansion factor of the universe
+	 * @return power spectrum amplitude for wave number k at time a
+	 */
+	inline real_t Power( real_t k, real_t a ){
+		real_t m_Dplus    = CalcGrowthFactor( a );
+		real_t m_DplusOne = CalcGrowthFactor( 1.0 );
+		real_t m_pNorm = ComputePNorm( 1e4 );
+		m_Dplus    /= m_DplusOne;
+		m_DplusOne = 1.0;
+		real_t scale = m_Dplus/m_DplusOne;
+		return m_pNorm*scale*scale*TransferSq(k)*pow((double)k,(double)m_Cosmology.nspect);
+	}
+	
+	//! integrand function for Calc_fPeebles
+	/*!
+	 * @sa Calc_fPeebles
+	 */
+	inline static double fy( double a, void *Params )
+	{
+		Cosmology *cosm = (Cosmology*)Params;
+		double y = cosm->Omega_m*(1.0/a-1.0) + cosm->Omega_L*(a*a-1.0) + 1.0;
+		return 1.0/pow(y,1.5);
+	}
+	
+	//! calculates d log D+/d log a
+	/*! this version follows the Peebles (TBD: add citation)
+	 *  formula to compute Bertschinger's vfact
+	 */
+	inline real_t Calc_fPeebles( real_t a )
+	{
+		real_t y = m_Cosmology.Omega_m*(1.0/a-1.0) + m_Cosmology.Omega_L*(a*a-1.0) + 1.0;
+		real_t fact = integrate( &fy, 1e-6, a, (void*)&m_Cosmology );
+		return (m_Cosmology.Omega_L*a*a-0.5*m_Cosmology.Omega_m/a)/y - 1.0 + a*fy(a,(void*)&m_Cosmology)/fact;
+	}
+	
+	//! Computes the linear theory growth factor D+
+	/*! Function integrates over member function GrowthIntegrand */
+	inline real_t CalcGrowthFactor( real_t a )
+	{ 
+		real_t eta =  sqrt((double)(m_Cosmology.Omega_m/a+m_Cosmology.Omega_L*a*a
+								  +1.0-m_Cosmology.Omega_m-m_Cosmology.Omega_L));
+		
+		real_t integral = integrate( &GrowthIntegrand, 0.0, a, (void*)&m_Cosmology );
+		return eta/a*integral;
+	}
+    
+	//! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+
+	inline static double GrowthIntegrand( double a, void *Params )
+	{
+		Cosmology *cosm = (Cosmology*)Params;
+		double eta = sqrt((double)(cosm->Omega_m/a+cosm->Omega_L*a*a
+								   +1.0-cosm->Omega_m-cosm->Omega_L));
+		return 2.5/(eta*eta*eta);
+	}
+	
+	//! Compute the factor relating particle displacement and velocity
+	real_t ComputeVFact_old( real_t a ){
+		real_t fomega, dlogadt, eta;
+		real_t Omega_k = 1.0 - m_Cosmology.Omega_m - m_Cosmology.Omega_L;
+		
+		real_t Dplus = CalcGrowthFactor( a );
+		
+		eta     = sqrt( (double)(m_Cosmology.Omega_m/a+ m_Cosmology.Omega_L*a*a + Omega_k ));
+		fomega  = (2.5/Dplus-1.5*m_Cosmology.Omega_m/a-Omega_k)/eta/eta;
+		dlogadt = a*eta;
+		
+		//... /100.0 since we would have to multiply by H0 to convert
+		//... the displacement to velocity units. But displacement is
+		//... in Mpc/h, and H0 in units of h is 100.
+		return fomega * dlogadt/a *100.0;
+	}
+	
+	real_t ComputeVFact( real_t a ){
+		return Calc_fPeebles(a)*sqrt(m_Cosmology.Omega_m/a+m_Cosmology.Omega_L*a*a+1.0-m_Cosmology.Omega_m-m_Cosmology.Omega_L)*100.0;
+	}
+	
+	real_t ComputedDdt( real_t a )
+	{
+		return Calc_fPeebles(a);
+	}
+	
+	
+	//! Integrand for the sigma_8 normalization of the power spectrum
+	/*! Returns the value of the primordial power spectrum multiplied with 
+	 the transfer function and the window function of 8 Mpc/h at wave number k */
+	static double dSigma8( double k, void *Params )
+	{
+		if( k<=0.0 )
+			return 0.0f;
+		
+		transfer_function *ptf = (transfer_function *)Params;
+		
+		double x = k*8.0;
+		double w = 3.0*(sin(x)-x*cos(x))/(x*x*x);
+		static double nspect = (double)ptf->cosmo_.nspect;
+		
+		double tf = ptf->compute(k);
+		
+		//... no growth factor since we compute at z=0 and normalize so that D+(z=0)=1
+		return k*k * w*w * pow((double)k,(double)nspect) * tf*tf;
+		
+	}
+	
+	
+	//! Computes the square of the transfer function
+	/*! Function evaluates the supplied transfer function m_pTransferFunction
+	 and returns the square of its value at wave number k
+	 \param k wave number at which to evaluate the transfer function
+	 */
+	inline real_t TransferSq( real_t k ){
+		//.. parameter supplied transfer function
+		real_t tf1 = m_pTransferFunction->compute(k);
+		return tf1*tf1;
+	}
+	
+	
+	//! Computes the normalization for the power spectrum
+	/*!
+	 * integrates the power spectrum to fix the normalization to that given
+	 * by the sigma_8 parameter
+	 */
+	real_t ComputePNorm( real_t kmax )
+	{
+		real_t sigma0, kmin;
+		kmax = m_pTransferFunction->get_kmax();//m_Cosmology.H0/8.0;
+		kmin = m_pTransferFunction->get_kmin();//0.0;
+		sigma0 = 4.0 * M_PI * integrate( &dSigma8, (double)kmin, (double)kmax, (void*)m_pTransferFunction );
+		
+		return m_Cosmology.sigma8*m_Cosmology.sigma8/sigma0;
+		
+	}
+	
+	
+};
+
+
+//! compute the jeans sound speed
+/*! given a density in g/cm^-3 and a mass in g it gives back the sound
+ *  speed in cm/s for which the input mass is equal to the jeans mass
+ *  @params rho density 
+ *  @params mass mass scale
+ *  @return jeans sound speed
+ */
+inline double jeans_sound_speed( double rho, double mass )
+{
+	const double G = 6.67e-8;
+	return pow( 6.0*mass/M_PI*sqrt(rho)*pow(G,1.5), 1.0/3.0 );
+}
+
+//! computes the density from the potential using the Laplacian
+void compute_Lu_density( const grid_hierarchy& u, grid_hierarchy& fnew );
+
+//! computes the 2nd order density perturbations using also off-diagonal terms in the potential Hessian 
+void compute_LLA_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order=4 );
+
+//! computes the source term for the 2nd order perturbations in the displacements
+void compute_2LPT_source( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order=4 );
+
+
+
+#endif // _COSMOLOGY_HH
+
diff --git a/densities.cc b/densities.cc
new file mode 100644
index 0000000..88bc784
--- /dev/null
+++ b/densities.cc
@@ -0,0 +1,837 @@
+/*
+ 
+ densities.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#include "densities.hh"
+#include "convolution_kernel.hh"
+
+bool is_number(const std::string& s)
+{
+	for (unsigned i = 0; i < s.length(); i++)
+		if (!std::isdigit(s[i])&&s[i]!='-' )
+			return false;
+	
+	return true;
+}
+
+// TODO: use optimized convolution routine when in unigrid mode 
+void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type, refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace )
+{
+	unsigned    levelmin,levelmax,levelminPoisson;
+	real_t		boxlength;
+	std::vector<long> rngseeds;
+	std::vector<std::string> rngfnames;
+	
+	
+	levelminPoisson	= cf.getValue<unsigned>("setup","levelmin");
+	levelmin	= cf.getValueSafe<unsigned>("setup","levelminTF",levelminPoisson);
+	levelmax	= cf.getValue<unsigned>("setup","levelmax");
+	boxlength   = cf.getValue<real_t>( "setup", "boxlength" );
+	
+	//... parse random number options
+	for( int i=0; i<=100; ++i )
+	{
+		char seedstr[128];
+		std::string tempstr;
+		sprintf(seedstr,"seed[%d]",i);
+		if( cf.containsKey( "random", seedstr ) )
+			tempstr = cf.getValue<std::string>( "random", seedstr );
+		else
+			tempstr = std::string("-2");
+		
+		if( is_number( tempstr ) )
+		{	
+			long ltemp;
+			cf.convert( tempstr, ltemp );
+			rngfnames.push_back( "" );
+			rngseeds.push_back( ltemp );
+		}else{
+			rngfnames.push_back( tempstr );
+			rngseeds.push_back(-1);
+			std::cout << " - Random numbers for level " << std::setw(3) << i << " will be read from file.\n";
+		}
+		
+	}
+	
+	//... parse grid setup parameters
+	unsigned	nbase	= (unsigned)pow(2,levelmin);
+	float lxref[3];
+	std::string temp	= cf.getValue<std::string>( "setup", "ref_extent" );
+	sscanf( temp.c_str(), "%g,%g,%g", &lxref[0],&lxref[1],&lxref[2] );
+	
+	int shift[3];
+	shift[0] = cf.getValue<int>("setup","shift_x");
+	shift[1] = cf.getValue<int>("setup","shift_y");
+	shift[2] = cf.getValue<int>("setup","shift_z");
+	
+	convolution::kernel_creator *the_kernel_creator;
+	
+	if( kspace )
+	{
+		#ifdef SINGLE_PRECISION	
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_float" ];
+		#else
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_k_double" ];
+		#endif
+	}
+	else
+	{
+		#ifdef SINGLE_PRECISION	
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ];
+		#else
+		the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_double" ];
+		#endif
+	}	
+		
+	convolution::parameters conv_param;
+	conv_param.ptf = ptf;
+	conv_param.pcf = &cf;
+	
+		
+	//.. determine for which levels random seeds/random number files are given
+	int lmaxread = -1, lmingiven = -1;
+	for( unsigned ilevel = 0; ilevel < rngseeds.size(); ++ilevel )
+	{	
+		if( rngfnames[ilevel].size() > 0 )
+			lmaxread = ilevel;
+		if( rngseeds[ilevel] > 0 && lmingiven == -1 )
+			lmingiven = ilevel;
+	}
+	
+	if( (unsigned)lmingiven!=levelmin || levelmin!=levelminPoisson )
+	{
+		std::cerr <<" - Internal error: GenerateDensityUnigrid was called for a non-trivial\n"
+				  <<"       problem set-up. This should not happen, GenerateDensityHierarchy\n"
+				  <<"       should be called instead\n";
+		throw std::runtime_error("Internal error");
+	}
+	
+	std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl;
+	
+	DensityGrid<real_t> *top = new DensityGrid<real_t>( nbase, nbase, nbase );
+	
+	double x0[3] = { refh.offset(levelmin,0), refh.offset(levelmin,1), refh.offset(levelmin,2) };
+	double lx[3] = { refh.size(levelmin,0), refh.size(levelmin,1), refh.size(levelmin,2) };
+	x0[0] /= pow(2,levelmin); x0[1] /= pow(2,levelmin); x0[2] /= pow(2,levelmin);
+	lx[0] /= pow(2,levelmin); lx[1] /= pow(2,levelmin); lx[2] /= pow(2,levelmin);
+	
+	random_numbers<real_t> *rc = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], true );//, x0, lx );
+
+
+	if( shift[0]!=0||shift[1]!=0||shift[2]!=0 )
+		std::cout << " - WARNING: will ignore non-zero shift in unigrid mode!\n";
+	
+	//top->fill_rand( rc, 1.0, 0, 0, 0, true );
+	rc->fill_all(*top);
+	delete rc;
+	
+	conv_param.lx = boxlength;
+	conv_param.ly = boxlength;
+	conv_param.lz = boxlength;
+	conv_param.nx = top->nx_;
+	conv_param.ny = top->ny_;
+	conv_param.nz = top->nz_;
+	conv_param.normalize = false;
+	
+	convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
+	convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
+	delete the_tf_kernel;
+	
+	delta.create_base_hierarchy(levelmin);
+	top->copy( *delta.get_grid(levelmin) );
+	delete top;
+
+	
+	for( int i=levelmax; i>0; --i )
+		mg_straight().restrict( (*delta.get_grid(i)), (*delta.get_grid(i-1)) );
+	
+	double sum = 0.0;
+	{
+		int nx,ny,nz;
+		
+		nx = delta.get_grid(levelmin)->size(0);
+		ny = delta.get_grid(levelmin)->size(1);
+		nz = delta.get_grid(levelmin)->size(2);
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					sum += (*delta.get_grid(levelmin))(ix,iy,iz);
+		
+		sum /= (nx*ny*nz);
+	}
+	
+	std::cout << " - Top grid mean density is off by " << sum << ", correcting..." << std::endl;
+	
+	for( unsigned i=levelmin; i<=levelmax; ++i )
+	{		
+		int nx,ny,nz;
+		nx = delta.get_grid(i)->size(0);
+		ny = delta.get_grid(i)->size(1);
+		nz = delta.get_grid(i)->size(2);
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					(*delta.get_grid(i))(ix,iy,iz) -= sum;
+	}
+}
+
+void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type type, refinement_hierarchy& refh, grid_hierarchy& delta )
+{
+	unsigned    levelmin,levelmax,levelminPoisson;
+	real_t		boxlength;
+	std::vector<long> rngseeds;
+	std::vector<std::string> rngfnames;
+	
+	
+	levelminPoisson	= cf.getValue<unsigned>("setup","levelmin");
+	levelmin	= cf.getValueSafe<unsigned>("setup","levelminTF",levelminPoisson);
+	levelmax	= cf.getValue<unsigned>("setup","levelmax");
+	boxlength   = cf.getValue<real_t>( "setup", "boxlength" );
+	
+	// TODO: need to make sure unigrid gets called whenever possible
+	if( levelmin == levelmax && levelmin==levelminPoisson )
+	{	
+		GenerateDensityUnigrid(cf,ptf,type,refh,delta);
+		return;
+	}
+	
+	
+	//... parse random number options
+	for( int i=0; i<=100; ++i )
+	{
+		char seedstr[128];
+		std::string tempstr;
+		sprintf(seedstr,"seed[%d]",i);
+		if( cf.containsKey( "random", seedstr ) )
+			tempstr = cf.getValue<std::string>( "random", seedstr );
+		else
+			tempstr = std::string("-2");
+
+		if( is_number( tempstr ) )
+		{	
+			long ltemp;
+			cf.convert( tempstr, ltemp );
+			rngfnames.push_back( "" );
+			rngseeds.push_back( ltemp );
+		}else{
+			rngfnames.push_back( tempstr );
+			rngseeds.push_back(-1);
+			std::cout << " - Random numbers for level " << std::setw(3) << i << " will be read from file.\n";
+		}
+			
+	}
+	
+	//... parse grid setup parameters
+	unsigned	nbase	= (unsigned)pow(2,levelmin);
+	float lxref[3];
+	std::string temp	= cf.getValue<std::string>( "setup", "ref_extent" );
+	sscanf( temp.c_str(), "%g,%g,%g", &lxref[0],&lxref[1],&lxref[2] );
+	//double      lextmin = std::min(lxref[0],std::min(lxref[1],lxref[2]));
+	real_t		alpha	= 0.1*(real_t)nbase/boxlength;//0.45*lextmin*(double)nbase;///boxlength;
+	real_t		cutoff	= 0.25;//lextmin*0.9;
+	
+	
+	int shift[3];
+	shift[0] = cf.getValue<int>("setup","shift_x");
+	shift[1] = cf.getValue<int>("setup","shift_y");
+	shift[2] = cf.getValue<int>("setup","shift_z");
+	
+#ifdef SINGLE_PRECISION	
+	convolution::kernel_creator *the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_float" ];
+#else
+	convolution::kernel_creator *the_kernel_creator = convolution::get_kernel_map()[ "tf_kernel_real_double" ];
+#endif
+	
+	convolution::parameters conv_param;
+	//conv_param.boxlength = boxlength;
+	conv_param.ptf = ptf;
+	conv_param.pcf = &cf;
+	//conv_param.cosmo = cosmo;
+	
+	
+	
+	
+	//... compute absolute grid offsets
+	std::vector<int> offtotx(levelmax+1,0),offtoty(levelmax+1,0),offtotz(levelmax+1,0);
+	for( unsigned ilevel = levelmin+1; ilevel<=levelmax; ++ilevel )
+	{
+		//... build a partial sum to get absolute offsets
+		offtotx[ilevel] = 2*(offtotx[ilevel-1]+refh.offset(ilevel,0));
+		offtoty[ilevel] = 2*(offtoty[ilevel-1]+refh.offset(ilevel,1));
+		offtotz[ilevel] = 2*(offtotz[ilevel-1]+refh.offset(ilevel,2));
+	}
+	for( unsigned ilevel = levelmin+1; ilevel<=levelmax; ++ilevel )
+	{
+		//... the arrays are doubled in size for the isolated BCs
+		offtotx[ilevel] -= refh.size(ilevel,0)/2;
+		offtoty[ilevel] -= refh.size(ilevel,1)/2;
+		offtotz[ilevel] -= refh.size(ilevel,2)/2;
+	}
+	
+	//.. determine for which levels random seeds/random number files are given
+	int lmaxread = -1, lmingiven = -1;
+	for( unsigned ilevel = 0; ilevel < rngseeds.size(); ++ilevel )
+	{	
+		if( rngfnames[ilevel].size() > 0 )
+			lmaxread = ilevel;
+		if( rngseeds[ilevel] > 0 && lmingiven == -1 )
+			lmingiven = ilevel;
+	}
+	
+	//... if random numbers are to be read from file, do this now
+	std::vector< random_numbers<real_t>* > randc;
+	randc.assign(std::max(lmaxread,std::max(lmingiven,(int)levelmax))+1,(random_numbers<real_t>*)NULL);
+	
+	if( lmaxread >= (int)levelmin )
+	{
+		randc[lmaxread] = new random_numbers<real_t>( (unsigned)pow(2,lmaxread), rngfnames[lmaxread] );
+		for( int ilevel = lmaxread-1; ilevel >= (int)levelmin; --ilevel )
+			randc[ilevel] = new random_numbers<real_t>( *randc[ilevel+1] );
+	}
+	
+	
+	//... if random numbers are not given for lower levels, obtain them by averaging
+	if( lmingiven >= (int)levelmin )
+	{
+		//double x0[3] = { 0.0, 0.0, 0.0 };
+		//double lx[3] = { 1.0, 1.0, 1.0 };
+		
+		randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), 32, rngseeds[lmingiven], true );//, x0, lx );
+		
+		for( int ilevel = lmingiven-1; ilevel >= (int)levelmin; --ilevel ){
+			if( rngseeds[ilevel-levelmin] > 0 )
+				std::cerr << " - Warning: random seed for level " << ilevel << " will be ignored.\n"
+				<< "            consistency requires that it is obtained by restriction from level " << lmingiven << std::endl;
+			
+			randc[ilevel] = new random_numbers<real_t>( *randc[ilevel+1] );
+		}
+	}
+	
+	//... if random seeds are given for levels coarser than levelmin, use them as constraints
+	if( lmingiven < (int)levelmin )
+	{
+		throw std::runtime_error("You provided a seed for a level below levelmin, this is not supported yet.");
+		//double x0[3] = { 0.0, 0.0, 0.0 };
+		//double lx[3] = { 1.0, 1.0, 1.0 };
+		
+		randc[lmingiven] = new random_numbers<real_t>( (unsigned)pow(2,lmingiven), 32, rngseeds[lmingiven], true );//, x0, lx );
+		
+		for( int ilevel = lmingiven+1; ilevel <= (int)levelmin; ++ilevel )
+		{
+			long seed = rngseeds[ilevel];
+			if( seed <= 0 )
+				seed = rngseeds[lmingiven+ilevel];
+			
+			randc[ilevel] = new random_numbers<real_t>( *randc[ilevel-1] );
+		}
+		
+		delete randc[lmingiven];
+		randc[lmingiven] = NULL;
+	}	
+	
+		//... create and initialize density grids with white noise
+		
+	PaddedDensitySubGrid<real_t>* coarse(NULL), *fine(NULL);
+	DensityGrid<real_t>* top(NULL);
+		
+	//... clean up ...//
+	for( unsigned i=0; i<randc.size(); ++i )
+	{
+		if( i<levelmin || i>levelmax )
+			if( randc[i] != NULL )
+			{	
+				delete randc[i];
+				randc[i]=NULL;
+			}
+	}
+		
+		
+		//... perform convolutions ...//
+	if( levelmax == levelmin )
+	{
+		std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl;
+		
+		top = new DensityGrid<real_t>( nbase, nbase, nbase );
+			
+		real_t x0[3] = { refh.offset(levelmin,0), refh.offset(levelmin,1), refh.offset(levelmin,2) };
+		real_t lx[3] = { refh.size(levelmin,0), refh.size(levelmin,1), refh.size(levelmin,2) };
+		x0[0] /= pow(2,levelmin); x0[1] /= pow(2,levelmin); x0[2] /= pow(2,levelmin);
+		lx[0] /= pow(2,levelmin); lx[1] /= pow(2,levelmin); lx[2] /= pow(2,levelmin);
+		
+		random_numbers<real_t> *rc;
+		
+		if( randc[levelmin] == NULL )
+			rc = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], true );//, x0, lx );
+		else
+			rc = randc[levelmin];
+		
+#if 1
+		if( shift[0]!=0||shift[1]!=0||shift[2]!=0 )
+			std::cout << " - WARNING: will ignore non-zero shift in unigrid mode!\n";
+
+		top->fill_rand( rc, 1.0, 0, 0, 0, true );
+#else		
+		
+		top->fill_rand( rc, 1.0, -shift[0], -shift[1], -shift[2], true );
+#endif
+		delete rc;
+		
+		conv_param.lx = boxlength;
+		conv_param.ly = boxlength;
+		conv_param.lz = boxlength;
+		conv_param.nx = top->nx_;
+		conv_param.ny = top->ny_;
+		conv_param.nz = top->nz_;
+		conv_param.normalize = false;
+		
+		convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
+		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
+		delete the_tf_kernel;
+		
+		delta.create_base_hierarchy(levelmin);
+		top->copy( *delta.get_grid(levelmin) );
+		delete top;
+	}
+	//}		
+		
+	
+	for( int i=0; i< (int)levelmax-(int)levelmin; ++i )
+	{
+		//.......................................................................................................//
+		//... GENERATE/FILL WITH RANDOM NUMBERS .................................................................//
+		//.......................................................................................................//
+		
+		
+		if( i==0 )
+		{
+			top = new DensityGrid<real_t>( nbase, nbase, nbase );
+			
+			random_numbers<real_t> *rc;
+			int x0[3] = {	refh.offset_abs(levelmin,0)-shift[0], 
+				refh.offset_abs(levelmin,1)-shift[1], 
+				refh.offset_abs(levelmin,2)-shift[2] };
+			
+			int lx[3] = {	refh.size(levelmin,0), 
+				refh.size(levelmin,1), 
+				refh.size(levelmin,2) };
+			
+			if( randc[levelmin] == NULL )
+				rc = randc[levelmin] = new random_numbers<real_t>( nbase, 32, rngseeds[levelmin], x0, lx );
+			else
+				rc = randc[levelmin];
+			
+			top->fill_rand( rc, 1.0, -shift[0], -shift[1], -shift[2], true );
+			delete rc;
+			randc[levelmin] = NULL;
+			
+		}
+		
+		fine = new PaddedDensitySubGrid<real_t>(	refh.offset(levelmin+i+1,0), 
+												refh.offset(levelmin+i+1,1), 
+												refh.offset(levelmin+i+1,2), 
+												refh.size(levelmin+i+1,0), 
+												refh.size(levelmin+i+1,1), 
+												refh.size(levelmin+i+1,2) );
+		
+		{
+			random_numbers<real_t> *rc;
+			int x0[3],lx[3];
+			int lfac = (int)pow(2.0,i+1);
+			x0[0] = refh.offset_abs(levelmin+i+1,0)-lfac*shift[0]; //((real_t)(offtotx[levelmin+i+1]+lfac*shift[0]))/pow(2.0,levelmin+i+1);
+			x0[1] = refh.offset_abs(levelmin+i+1,1)-lfac*shift[1]; //((real_t)(offtoty[levelmin+i+1]+lfac*shift[1]))/pow(2.0,levelmin+i+1);
+			x0[2] = refh.offset_abs(levelmin+i+1,2)-lfac*shift[2]; //((real_t)(offtotz[levelmin+i+1]+lfac*shift[2]))/pow(2.0,levelmin+i+1);
+			lx[0] = refh.size(levelmin+i+1,0); // /pow(2.0,levelmin+i+1);
+			lx[1] = refh.size(levelmin+i+1,1); // /pow(2.0,levelmin+i+1);
+			lx[2] = refh.size(levelmin+i+1,2); // /pow(2.0,levelmin+i+1);
+			
+			//x0[0] -= 0.5*lx[0];  lx[0] *= 2.0;
+			//x0[1] -= 0.5*lx[1];  lx[1] *= 2.0;
+			//x0[2] -= 0.5*lx[2];  lx[2] *= 2.0;
+			
+			if( randc[levelmin+i+1] == NULL )
+				rc = randc[levelmin+i+1] = new random_numbers<real_t>((unsigned)pow(2,levelmin+i+1), 32, rngseeds[levelmin+i+1], x0, lx);
+			else
+				rc = randc[levelmin+i+1];
+			
+			
+			{
+				/*int llfac = (int)pow(2.0,i+1);
+				fine->fill_rand( rc, 1.0,	offtotx[levelmin+i+1]-llfac*shift[0], 
+								offtoty[levelmin+i+1]-llfac*shift[1], 
+								offtotz[levelmin+i+1]-llfac*shift[2] );*/
+				
+				fine->fill_rand( rc, 1.0, x0[0]-fine->nx_/4, x0[1]-fine->ny_/4, x0[2]-fine->nz_/4 );
+				
+				if( i+levelmin+1 > (unsigned)lmingiven )
+				{	
+					if(i==0)
+						fine->constrain( *top );
+					else
+						fine->constrain( *coarse );
+				}				
+			}
+			delete rc;
+			rc = NULL;
+		}
+		
+		
+		//.......................................................................................................//
+		//... PERFORM CONVOLUTIONS ..............................................................................//
+		//.......................................................................................................//		
+		if( i==0 )
+		{
+			/**********************************************************************************************************\
+			 *	multi-grid: top-level grid grids .....
+			 \**********************************************************************************************************/ 
+			std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmin+i << " ..." << std::endl;
+			
+			delta.create_base_hierarchy(levelmin);
+			
+			DensityGrid<real_t> top_save( *top );
+			
+			
+			conv_param.lx = boxlength;
+			conv_param.ly = boxlength;
+			conv_param.lz = boxlength;
+			conv_param.nx = top->nx_;
+			conv_param.ny = top->ny_;
+			conv_param.nz = top->nz_;
+			conv_param.normalize = true;
+			
+			convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
+			
+			
+			//... 1) compute standard convolution for levelmin
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
+			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
+			top->copy( *delta.get_grid(levelmin) );
+			
+			//... 2) compute contribution to finer grids from non-refined region
+			*top = top_save;
+			top_save.clear();
+			top->zero_subgrid(refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
+							  refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 );
+
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
+			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*>( top->get_data_ptr() ) );
+			
+			
+			
+			meshvar_bnd delta_longrange( *delta.get_grid(levelmin) );
+			top->copy( delta_longrange );
+			delete the_tf_kernel;
+			delete top;
+			
+			//... restrict these contributions to the next level
+			delta.add_patch( refh.offset(levelmin+1,0), refh.offset(levelmin+1,1), refh.offset(levelmin+1,2), 
+							refh.size(levelmin+1,0), refh.size(levelmin+1,1), refh.size(levelmin+1,2) );
+			
+			//mg_linear().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
+			mg_cubic().prolong( delta_longrange, *delta.get_grid(levelmin+1) );
+			
+			
+		}
+		else
+		{
+			/**********************************************************************************************************\
+			 *	multi-grid: intermediate sub-grids .....
+			 \**********************************************************************************************************/ 
+			
+			std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmin+i << " ..." << std::endl;
+			
+			delta.add_patch( refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
+							refh.size(levelmin+i+1,0), refh.size(levelmin+i+1,1), refh.size(levelmin+i+1,2) );
+			
+			//mg_linear().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) );
+			mg_cubic().prolong( *delta.get_grid(levelmin+i), *delta.get_grid(levelmin+i+1) );
+			
+			real_t dx,lx,ly,lz;
+			dx = boxlength/pow(2.0,levelmin+i);
+			
+			lx = dx * coarse->nx_; 
+			ly = dx * coarse->ny_; 
+			lz = dx * coarse->nz_; 
+			
+			real_t lmin = std::min( lx, std::min(ly,lz) );
+			
+			
+			conv_param.lx = lx;
+			conv_param.ly = ly;
+			conv_param.lz = lz;
+			conv_param.nx = coarse->nx_;
+			conv_param.ny = coarse->ny_;
+			conv_param.nz = coarse->nz_;
+			conv_param.normalize = false;
+			
+			convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
+			
+			PaddedDensitySubGrid<real_t> coarse_save( *coarse );
+					
+			//... 2) the inner region
+			
+			coarse->zero_boundary();
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+			
+			
+			coarse->copy_add_unpad( *delta.get_grid(levelmin+i) );
+			
+			
+			//... 3) the 'BC' for the next finer grid
+			*coarse = coarse_save;
+
+			coarse->zero_subgrid(refh.offset(levelmin+i+1,0), refh.offset(levelmin+i+1,1), refh.offset(levelmin+i+1,2), 
+								 refh.size(levelmin+i+1,0)/2, refh.size(levelmin+i+1,1)/2, refh.size(levelmin+i+1,2)/2 );
+			coarse->zero_boundary();
+			
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+			
+			
+			
+			meshvar_bnd delta_longrange( *delta.get_grid(levelmin+i) );
+			coarse->copy_unpad( delta_longrange );
+			//mg_linear().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
+			mg_cubic().prolong_add( delta_longrange, *delta.get_grid(levelmin+i+1) );
+			
+			
+#if 1
+			//... FFT (isolated) boundary noise contribution
+			//convolution::truncate( the_tf_kernel, cutoff*lmin, 1e-8*alpha );
+			convolution::truncate_sharp( the_tf_kernel, cutoff*lmin );
+			*coarse = coarse_save;
+			//coarse_save.clear();
+			coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
+			coarse->subtract_oct_mean();
+			
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+			//convolution::perform_filtered<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+			
+			coarse->copy_add_unpad( *delta.get_grid(levelmin+i) );
+#endif
+			
+#if 0
+			*coarse = coarse_save;
+			coarse->zero_boundary();
+			coarse->subtract_oct_mean();
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+			//coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
+			
+			coarse->upload_bnd_add( *delta.get_grid(levelmin+i-1) );
+#endif
+			
+			/**coarse = coarse_save;
+			coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
+			coarse->set_to_oct_mean();
+			
+			convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+			coarse->copy_subtract_unpad( *delta.get_grid(levelmin+i) );*/
+			
+			
+			delete the_tf_kernel;
+			delete coarse;
+		}
+		
+		
+		coarse = fine;
+	}
+	
+	//... and convolution for finest grid (outside loop)
+	if( levelmax > levelmin )
+	{
+		/**********************************************************************************************************\
+		 *	multi-grid: finest sub-grid .....
+		 \**********************************************************************************************************/ 
+		std::cout << " - Performing noise convolution on level " << std::setw(2) << levelmax << " ..." << std::endl;
+
+		real_t dx,lx,ly,lz;
+		dx = boxlength/pow(2.0,levelmax);
+		
+		lx = dx * coarse->nx_; 
+		ly = dx * coarse->ny_; 
+		lz = dx * coarse->nz_; 
+		real_t lmin = std::min( lx, std::min(ly,lz) );
+		
+		conv_param.lx = lx;
+		conv_param.ly = ly;
+		conv_param.lz = lz;
+		conv_param.nx = coarse->nx_;
+		conv_param.ny = coarse->ny_;
+		conv_param.nz = coarse->nz_;
+		conv_param.normalize = false;
+		
+		//... with LR/SR splitting and full subgrid		
+		PaddedDensitySubGrid<real_t> coarse_save( *coarse );
+		
+		convolution::kernel *the_tf_kernel = the_kernel_creator->create( conv_param );
+		//... 2) the inner region
+		
+		coarse->zero_boundary();
+		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+		coarse->copy_add_unpad( *delta.get_grid(levelmax) );
+		
+		
+		//... compute convolution for isolated grid
+		//... 1) the padded boundary region
+	
+	
+		
+		
+#if 1
+		convolution::truncate( the_tf_kernel, cutoff*lmin, alpha );
+		
+		*coarse = coarse_save;
+		coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
+		coarse->subtract_oct_mean();
+		
+		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+		coarse->copy_add_unpad( *delta.get_grid(levelmax) );
+
+#endif
+#if 0
+		// coarse correction
+		*coarse = coarse_save;
+		coarse->zero_boundary();
+		coarse->subtract_oct_mean();
+		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+		//coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
+		
+		coarse->upload_bnd_add( *delta.get_grid(levelmax-1) );
+		
+		//	meshvar_bnd delta_longrange( *delta.get_grid(levelmax) );
+		//coarse->copy_unpad( delta_longrange );
+		//mg_straight().restrict_add( delta_longrange, *delta.get_grid(levelmax-1) );
+#endif	
+		
+		
+		
+		/**coarse = coarse_save;
+		coarse->zero_subgrid(0,0,0,coarse->nx_/2,coarse->ny_/2,coarse->nz_/2);
+		coarse->set_to_oct_mean();
+		
+		convolution::perform<real_t>( the_tf_kernel, reinterpret_cast<void*> (coarse->get_data_ptr()) );
+		coarse->copy_subtract_unpad( *delta.get_grid(levelmax) );*/
+		
+		
+		delete the_tf_kernel;		
+		delete coarse;
+	}
+	
+	//... clean up ...//
+	for( unsigned i=0; i<randc.size(); ++i )
+	{
+		if( i<levelmin || i>levelmax )
+			if( randc[i] != NULL )
+			{	
+				delete randc[i];
+				randc[i]=NULL;
+			}
+	}
+	
+	//... enforce mean condition
+	for( int i=levelmin; i<(int)levelmax; ++i )
+			enforce_mean( (*delta.get_grid(i+1)), (*delta.get_grid(i)) );
+	
+	
+	
+	
+	
+	
+	double sum;
+	
+#if 0
+	//... subtract the box mean.... this will otherwise add
+	//... a constant curvature term to the potential
+	sum = 0.0;
+	{
+		int nx,ny,nz;
+		
+		nx = delta.get_grid(levelmin)->size(0);
+		ny = delta.get_grid(levelmin)->size(1);
+		nz = delta.get_grid(levelmin)->size(2);
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					sum += (*delta.get_grid(levelmin))(ix,iy,iz);
+		
+		sum /= (nx*ny*nz);
+	}
+	
+	std::cout << " - Top grid mean density is off by " << sum << ", correcting..." << std::endl;
+
+	for( unsigned i=levelmin; i<=levelmax; ++i )
+	{		
+		int nx,ny,nz;
+		nx = delta.get_grid(i)->size(0);
+		ny = delta.get_grid(i)->size(1);
+		nz = delta.get_grid(i)->size(2);
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					(*delta.get_grid(i))(ix,iy,iz) -= sum;
+	}
+#endif
+	
+	//... fill coarser levels with data from finer ones...
+	//	for( int i=levelmax; i>(int)levelmin; --i )
+	for( int i=levelmax; i>0; --i )
+		mg_straight().restrict( (*delta.get_grid(i)), (*delta.get_grid(i-1)) );
+	
+#if 1
+	sum = 0.0;
+	{
+		int nx,ny,nz;
+		
+		nx = delta.get_grid(levelmin)->size(0);
+		ny = delta.get_grid(levelmin)->size(1);
+		nz = delta.get_grid(levelmin)->size(2);
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					sum += (*delta.get_grid(levelmin))(ix,iy,iz);
+		
+		sum /= (nx*ny*nz);
+	}
+	
+	std::cout << " - Top grid mean density is off by " << sum << ", correcting..." << std::endl;
+
+	for( unsigned i=levelmin; i<=levelmax; ++i )
+	{		
+		int nx,ny,nz;
+		nx = delta.get_grid(i)->size(0);
+		ny = delta.get_grid(i)->size(1);
+		nz = delta.get_grid(i)->size(2);
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					(*delta.get_grid(i))(ix,iy,iz) -= sum;
+	}
+#endif
+	
+}
+
diff --git a/densities.hh b/densities.hh
new file mode 100644
index 0000000..8e1d699
--- /dev/null
+++ b/densities.hh
@@ -0,0 +1,827 @@
+/*
+ 
+ densities.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#ifndef __DENSITIES_HH
+#define __DENSITIES_HH
+
+
+#ifdef SINGLE_PRECISION
+	#ifdef SINGLETHREAD_FFTW
+	#include <srfftw.h>
+	#else
+	#include <srfftw_threads.h>
+	#endif
+#else
+	#ifdef SINGLETHREAD_FFTW
+	#include <drfftw.h>
+	#else
+	#include <drfftw_threads.h>
+	#endif
+#endif
+
+#include "config_file.hh"
+#include "random.hh"
+#include "cosmology.hh"
+#include "transfer_function.hh"
+
+
+void GenerateDensityHierarchy(	config_file& cf, transfer_function *ptf, tf_type type, 
+							  refinement_hierarchy& refh, grid_hierarchy& delta );
+
+void GenerateDensityUnigrid( config_file& cf, transfer_function *ptf, tf_type type, 
+							refinement_hierarchy& refh, grid_hierarchy& delta, bool kspace=false );
+
+/*!
+ * @class DensityGrid
+ * @brief provides infrastructure for computing the initial density field
+ *
+ * This class provides access and data management member functions that
+ * are used when computing the initial density field by convolution with
+ * transfer functions.
+ */
+template< typename real_t >
+class DensityGrid
+{
+public:
+	
+	int nx_;	//!< number of grid cells in x-direction
+	int ny_;	//!< number of grid cells in y-direction
+	int nz_;	//!< number of grid cells in z-direction
+	int nzp_;	//!< number of cells in memory (z-dir), used for Nyquist padding
+	
+	//! the actual data container in the form of a 1D array
+	std::vector< real_t > data_;
+	
+	//! constructor
+	/*! constructs an instance given the dimensions of the density field
+	 * @params nx the number of cells in x
+	 * @params ny the number of cells in y
+	 * @params nz the number of cells in z
+	 */
+	DensityGrid( unsigned nx, unsigned ny, unsigned nz )
+	: nx_(nx), ny_(ny), nz_(nz)
+	{
+		data_.assign(nx_*ny_*2*(nz_/2+1),0.0);
+		nzp_ = 2*(nz_/2+1);
+	}
+	
+	//! copy constructor
+	explicit DensityGrid( const DensityGrid<real_t> & g )
+	: nx_(g.nx_), ny_(g.ny_), nz_(g.nz_), nzp_(g.nzp_)
+	{
+		data_ = g.data_;
+	}
+	
+	//!destructor
+	~DensityGrid()
+	{	}
+	
+	//! clears the density object
+	/*! sets all dimensions to zero and frees the memory
+	 */
+	void clear( void )
+	{
+		nx_ = ny_ = nz_ = nzp_ = 0;
+		data_.clear();
+		std::vector<real_t>().swap(data_);
+	}
+	
+	//! assigns the contents of another DensityGrid to this
+	DensityGrid& operator=( const DensityGrid<real_t>& g )
+	{
+		nx_ = g.nx_;
+		ny_ = g.ny_;
+		nz_ = g.nz_;
+		nzp_= g.nzp_;
+		data_ = g.data_;
+		
+		return *this;
+	}
+	
+	//! 3D index based data access operator
+	inline real_t& operator()( int i, int j, int k )
+	{	return data_[(i*ny_+j)*nzp_+k]; 	}
+	
+	//! 3D index based const data access operator 
+	inline const real_t& operator()( int i, int j, int k ) const
+	{	return data_[(i*ny_+j)*nzp_+k]; 	}
+	
+	//! recover the pointer to the 1D data array
+	inline real_t * get_data_ptr( void )
+	{	return &data_[0];	}
+	
+	
+	//! fills the density field with random number values
+	/*! given a pointer to a random_numbers object, fills the field with random values
+	 *  @params prc pointer to a random_numbers object
+	 *  @params variance the variance of the random numbers (the values returned by prc are multiplied by this)
+	 *  @params i0 x-offset (shift) in cells of the density field with respect to the random number field
+	 *  @params j0 y-offset (shift) in cells of the density field with respect to the random number field
+	 *  @params k0 z-offset (shift) in cells of the density field with respect to the random number field
+	 *  @params zero boolean, if true, the global mean will be subtracted
+	 */
+	void fill_rand( /*const*/ random_numbers<real_t>* prc, real_t variance, int i0, int j0, int k0, bool zero=false )
+	{
+		double sum = 0.0;
+		
+		#pragma omp parallel for reduction(+:sum)
+		for( int i=0; i<nx_; ++i )
+			for( int j=0; j<ny_; ++j )
+				for( int k=0; k<nz_; ++k )
+				{
+					(*this)(i,j,k) = (*prc)(i0+i,j0+j,k0+k) * variance;
+					sum += (*this)(i,j,k);
+				}
+		
+		sum /= nx_*ny_*nz_;
+		
+		if( zero )
+		{
+			#pragma omp parallel for
+			for( int i=0; i<nx_; ++i )
+				for( int j=0; j<ny_; ++j )
+					for( int k=0; k<nz_; ++k )
+						(*this)(i,j,k) -= sum;
+						
+		}
+					
+	}
+	
+	//! copies the data from another field with 3D index-based access operator
+	template< class array3 >
+	void copy( array3& v )
+	{
+		for( int ix=0; ix<(int)nx_; ++ix )
+			for( int iy=0; iy<(int)ny_; ++iy )	
+				for( int iz=0; iz<(int)nz_; ++iz )
+					v(ix,iy,iz) = (*this)(ix,iy,iz);
+						
+	}
+
+	//! adds the data from another field with 3D index-based access operator
+	template< class array3 >
+	void copy_add( array3& v )
+	{
+		for( int ix=0; ix<(int)nx_; ++ix )
+			for( int iy=0; iy<(int)ny_; ++iy )	
+				for( int iz=0; iz<(int)nz_; ++iz )
+					v(ix,iy,iz) += (*this)(ix,iy,iz);
+	}
+	
+	//! subtract the mean of each oct of the field 
+	/*! subtracting the mean of each oct implies that a restriction of the field to the coarser level
+	 *  is zero everywhere (needed for Hoffman-Ribak constraints)
+	 */
+	void subtract_oct_mean( void )
+	{
+		for( int i=0; i<nx_; i+=2 )
+			for( int j=0; j<ny_; j+=2 )
+				for( int k=0; k<nz_; k+=2 )
+				{
+					real_t avg = 
+						0.125*((*this)(i,j,k)+(*this)(i+1,j,k)+(*this)(i,j+1,k)+(*this)(i,j,k+1)+
+							   (*this)(i+1,j+1,k)+(*this)(i+1,j,k+1)+(*this)(i,j+1,k+1)+(*this)(i+1,j+1,k+1));
+						
+					(*this)(i,j,k)			-= avg;
+					(*this)(i+1,j,k)		-= avg;
+					(*this)(i,j+1,k)		-= avg;
+					(*this)(i,j,k+1)		-= avg;
+					(*this)(i+1,j+1,k)		-= avg;
+					(*this)(i+1,j,k+1)		-= avg;
+					(*this)(i,j+1,k+1)		-= avg;
+					(*this)(i+1,j+1,k+1)	-= avg;
+					
+				}
+		
+	}
+	
+	//! replaces the value of all cells in an oct with the average value of the oct
+	void set_to_oct_mean( void )
+	{
+		for( int i=0; i<nx_; i+=2 )
+			for( int j=0; j<ny_; j+=2 )
+				for( int k=0; k<nz_; k+=2 )
+				{
+					real_t avg = 
+					0.125*((*this)(i,j,k)+(*this)(i+1,j,k)+(*this)(i,j+1,k)+(*this)(i,j,k+1)+
+						   (*this)(i+1,j+1,k)+(*this)(i+1,j,k+1)+(*this)(i,j+1,k+1)+(*this)(i+1,j+1,k+1));
+					
+					(*this)(i,j,k)			= avg;
+					(*this)(i+1,j,k)		= avg;
+					(*this)(i,j+1,k)		= avg;
+					(*this)(i,j,k+1)		= avg;
+					(*this)(i+1,j+1,k)		= avg;
+					(*this)(i+1,j,k+1)		= avg;
+					(*this)(i,j+1,k+1)		= avg;
+					(*this)(i+1,j+1,k+1)	= avg;
+					
+				}
+		
+	}
+	
+	//! sets the field to zero inside of a rectangular box
+	void zero_subgrid( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub )
+	{
+		//... correct offsets for padding (not needed for top grid)
+		for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix )
+			for( int iy=oysub; iy<(int)(oysub+lysub); ++iy )	
+				for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		
+	}
+	
+	
+	//! sets the field to zero inside of a rectangular box including the boundary of the box (used for isolated FFT)
+	void zero_padded_subgrid( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub )
+	{
+		//... correct offsets for padding (not needed for top grid)
+		oxsub -= lxsub/2;
+		oysub -= lysub/2;
+		ozsub -= lzsub/2;
+		lxsub *= 2;
+		lysub *= 2;
+		lzsub *= 2;
+		for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix )
+			for( int iy=oysub; iy<(int)(oysub+lysub); ++iy )	
+				for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+					
+	}
+	
+	//! sets the field to zero outside of a rectangular box including the boundary of the box (used for isolated FFT)
+	void zero_but_padded_subgrid( int oxsub, int oysub, int ozsub, int lxsub, int lysub, int lzsub )
+	{
+		//... correct offsets for padding (not needed for top grid)
+		oxsub -= lxsub/2;
+		oysub -= lysub/2;
+		ozsub -= lzsub/2;
+		lxsub *= 2;
+		lysub *= 2;
+		lzsub *= 2;
+		
+		for( int ix=0; ix<nx_; ++ix )
+			for( int iy=0; iy<ny_; ++iy )
+				for( int iz=0; iz<nz_; ++iz )		
+				{
+					if( (ix<oxsub||ix>=oxsub+lxsub)
+					   ||  (iy<oysub||iy>=oysub+lysub)
+					   ||  (iz<ozsub||iz>=ozsub+lzsub) )
+					(*this)(ix,iy,iz) = 0.0;	
+					   
+				}
+	}
+	
+	//! sets the field to zero outside of a rectangular box
+	void zero_but_subgrid( int oxsub, int oysub, int ozsub, int lxsub, int lysub, int lzsub )
+	{
+		for( int ix=0; ix<nx_; ++ix )
+			for( int iy=0; iy<ny_; ++iy )
+				for( int iz=0; iz<nz_; ++iz )		
+				{
+					if( (ix<oxsub||ix>=oxsub+lxsub)
+					   ||  (iy<oysub||iy>=oysub+lysub)
+					   ||  (iz<ozsub||iz>=ozsub+lzsub) )
+						(*this)(ix,iy,iz) = 0.0;	
+					
+				}
+	}
+	
+	//! sets the field to zero except on the boundary of a rectangular box
+	void zero_but_subgrid_bnd( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub )
+	{
+		//... correct offsets for padding (not needed for top grid)
+		
+		// zero the subgrid
+		for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix )
+			for( int iy=oysub; iy<(int)(oysub+lysub); ++iy )	
+				for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		
+		oxsub -= lxsub/2;
+		oysub -= lysub/2;
+		ozsub -= lzsub/2;
+		lxsub *= 2;
+		lysub *= 2;
+		lzsub *= 2;
+		
+		// zero the outside, except the boundary
+		//#pragma parallel nowait
+		{
+			
+		for( int ix=0; ix<oxsub; ++ix )
+			for( int iy=0; iy<ny_; ++iy )
+				for( int iz=0; iz<nz_; ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+			
+		for( int ix=oxsub+lxsub; ix<nx_; ++ix )
+			for( int iy=0; iy<ny_; ++iy )
+				for( int iz=0; iz<nz_; ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		
+		for( int ix=0; ix<nx_; ++ix )
+			for( int iy=0; iy<oysub; ++iy )
+				for( int iz=0; iz<nz_; ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		for( int ix=0; ix<nx_; ++ix )
+			for( int iy=oysub+lysub; iy<ny_; ++iy )
+				for( int iz=0; iz<nz_; ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		
+		for( int ix=0; ix<nx_; ++ix )
+			for( int iy=0; iy<ny_; ++iy )
+				for( int iz=0; iz<ozsub; ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		for( int ix=0; ix<nx_; ++ix )
+			for( int iy=0; iy<ny_; ++iy )
+				for( int iz=ozsub+lzsub; iz<nz_; ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		
+		}
+		
+	}
+	
+};
+
+template< typename real_t >
+class PaddedDensitySubGrid : public DensityGrid<real_t>
+{
+public:
+	using DensityGrid<real_t>::nx_;
+	using DensityGrid<real_t>::ny_;
+	using DensityGrid<real_t>::nz_;
+	using DensityGrid<real_t>::data_;	
+	
+	using DensityGrid<real_t>::fill_rand;
+	using DensityGrid<real_t>::get_data_ptr;
+	
+	int ox_, oy_, oz_;
+public:
+	
+	PaddedDensitySubGrid( int ox, int oy, int oz, unsigned nx, unsigned ny, unsigned nz)
+	: DensityGrid<real_t>(2*nx,2*ny,2*nz), ox_(ox), oy_(oy), oz_(oz)
+	{
+		if( ox-nx/4 < 0 || oy-ny/4 < 0 || oz-nz/4 < 0 )
+			throw std::runtime_error("subgrid extends across top grid");
+		
+		//.. the size in top grid cells is nx/2,ny/2,nz/2, so padding starts at ox-nx/4...
+		//.. loop over relevant part of top grid and copy down grid values
+		/*for( int ix=ox-nx/4,ixs=0; ix<ox+3*nx/4; ix++,ixs+=2 )
+		 for( int iy=oy-ny/4,iys=0; iy<oy+3*ny/4; iy++,iys+=2 )
+		 for( int iz=oz-nz/4,izs=0; iz<oz+3*nz/4; iz++,izs+=2 )
+		 {
+		 (*this)(ixs,iys,izs)		=
+		 (*this)(ixs+1,iys,izs)		=
+		 (*this)(ixs,iys+1,izs)		=
+		 (*this)(ixs,iys,izs+1)		=
+		 (*this)(ixs+1,iys+1,izs)	=
+		 (*this)(ixs+1,iys,izs+1)	=
+		 (*this)(ixs,iys+1,izs+1)	=
+		 (*this)(ixs+1,iys+1,izs+1)	=	top(ix,iy,iz);
+		 }*/
+	}
+	
+	PaddedDensitySubGrid( const PaddedDensitySubGrid<real_t>& o )
+	: DensityGrid<real_t>( o ), ox_(o.ox_), oy_(o.oy_), oz_(o.oz_)
+	{ }
+	
+	void zero_padded_subgrid( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub )
+	{
+		//... correct offsets for padding (not needed for top grid)
+		
+		oxsub += nx_/4;
+		oysub += ny_/4;
+		ozsub += nz_/4;
+		
+		//... correct offsets for padding (not needed for top grid)
+		for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix )
+			for( int iy=oysub; iy<(int)(oysub+lysub); ++iy )	
+				for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		
+	}
+	
+	void zero_subgrid( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub )
+	{
+		//... correct offsets for padding (not needed for top grid)
+		
+		oxsub += nx_/4;
+		oysub += ny_/4;
+		ozsub += nz_/4;
+		
+		for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix )
+			for( int iy=oysub; iy<(int)(oysub+lysub); ++iy )	
+				for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		
+	}
+	
+	void zero_but_subgrid_bnd( unsigned oxsub, unsigned oysub, unsigned ozsub, unsigned lxsub, unsigned lysub, unsigned lzsub )
+	{
+		//... correct offsets for padding (not needed for top grid)
+		
+		oxsub += nx_/4;
+		oysub += ny_/4;
+		ozsub += nz_/4;
+		
+		//lxsub += nx_/4;
+		//lysub += nx_/4;
+		//lzsub += nx_/4;
+		
+		// zero the subgrid
+		for( int ix=oxsub; ix<(int)(oxsub+lxsub); ++ix )
+			for( int iy=oysub; iy<(int)(oysub+lysub); ++iy )	
+				for( int iz=ozsub; iz<(int)(ozsub+lzsub); ++iz )
+					(*this)(ix,iy,iz) = 0.0;
+		
+		oxsub -= lxsub/2;
+		oysub -= lysub/2;
+		ozsub -= lzsub/2;
+		lxsub *= 2;
+		lysub *= 2;
+		lzsub *= 2;
+		
+		// zero the outside, except the boundary
+		//#pragma parallel block nowait
+		{
+			
+			for( int ix=0; ix<oxsub; ++ix )
+				for( int iy=0; iy<ny_; ++iy )
+					for( int iz=0; iz<nz_; ++iz )
+						(*this)(ix,iy,iz) = 0.0;
+			
+			std::cerr << oxsub+lxsub << " -> " << nx_ << std::endl;
+			for( int ix=oxsub+lxsub; ix<nx_; ++ix )
+				for( int iy=0; iy<ny_; ++iy )
+					for( int iz=0; iz<nz_; ++iz )
+						(*this)(ix,iy,iz) = 0.0;
+			
+			for( int ix=0; ix<nx_; ++ix )
+				for( int iy=0; iy<oysub; ++iy )
+					for( int iz=0; iz<nz_; ++iz )
+						(*this)(ix,iy,iz) = 0.0;
+			for( int ix=0; ix<nx_; ++ix )
+				for( int iy=oysub+lysub; iy<ny_; ++iy )
+					for( int iz=0; iz<nz_; ++iz )
+						(*this)(ix,iy,iz) = 0.0;
+			
+			for( int ix=0; ix<nx_; ++ix )
+				for( int iy=0; iy<ny_; ++iy )
+					for( int iz=0; iz<ozsub; ++iz )
+						(*this)(ix,iy,iz) = 0.0;
+			for( int ix=0; ix<nx_; ++ix )
+				for( int iy=0; iy<ny_; ++iy )
+					for( int iz=ozsub+lzsub; iz<nz_; ++iz )
+						(*this)(ix,iy,iz) = 0.0;
+			
+		}
+	}		
+	
+	
+	template< class array3 >
+	void upload_bnd_add( array3& top )
+	{
+		int ox=ox_-nx_/8, oy=oy_-ny_/8, oz=oz_-nz_/8;
+		int lx=ox+nx_/2, ly=oy+ny_/2, lz=oz+nz_/2;
+		ox = std::max(ox,0);
+		oy = std::max(oy,0);
+		oz = std::max(oz,0);
+		lx = std::min(lx,(int)top.size(0));
+		ly = std::min(ly,(int)top.size(1));
+		lz = std::min(lz,(int)top.size(2));
+		
+		
+		if( ox < 0 || oy < 0 || oz < 0 ){
+			
+			std::cerr << "offset = " << ox << ", " << oy << ", " << oz << std::endl;
+			std::cerr << "nx     = " << nx_ << ", " << ny_ << ", " << nz_ << std::endl;
+			
+			throw std::runtime_error("Subgrid extends beyond top grid. Increase levelmin or reduce size of refinement region!");
+		}
+		for( int ix=0,ixu=ox;ix<nx_&&ixu<lx;ix+=2,++ixu )
+			for( int iy=0,iyu=oy;iy<ny_&&iyu<ly;iy+=2,++iyu )
+				for( int iz=0,izu=oz;iz<nz_&&izu<lz;iz+=2,++izu )
+				{
+					if( (ix<nx_/4||ix>=3*nx_/4)
+					   || (iy<ny_/4||iy>=3*ny_/4)
+					   || (iz<nz_/4||iz>=3*nz_/4) )
+					{
+						top(ixu,iyu,izu) += 0.125* ((*this)(ix,iy,iz)+(*this)(ix+1,iy,iz)+(*this)(ix,iy+1,iz)+(*this)(ix,iy,iz+1)
+											+(*this)(ix+1,iy+1,iz)+(*this)(ix+1,iy,iz+1)+(*this)(ix,iy+1,iz+1)+(*this)(ix+1,iy+1,iz+1) );
+												 //						std::cerr << "*";
+					}
+				}
+	}
+	
+	void constrain( const DensityGrid<real_t>& top  )
+	{
+		int ox=ox_-nx_/8, oy=oy_-ny_/8, oz=oz_-nz_/8;
+		
+		if( ox < 0 || oy < 0 || oz < 0 ){
+		
+			std::cerr << "offset = " << ox << ", " << oy << ", " << oz << std::endl;
+			std::cerr << "nx     = " << nx_ << ", " << ny_ << ", " << nz_ << std::endl;
+			
+			throw std::runtime_error("Subgrid extends beyond top grid. Increase levelmin or reduce size of refinement region!");
+		}
+			
+		//...top grid is not padded
+		
+		//.. fine grid will be multiplied by sqrt(8) later to account for increase in variance
+		//.. so, we need to divide the coarse grid value by it
+		
+		real_t fac = 1.0/sqrt(8);
+		for( int ixs=0,ix=ox;ixs<nx_;ixs+=2,++ix )
+			for( int iys=0,iy=oy;iys<ny_;iys+=2,++iy )
+				for( int izs=0,iz=oz;izs<nz_;izs+=2,++iz )
+				{
+					real_t mean = 0.125 * 
+						((*this)(ixs,iys,izs)+
+						 (*this)(ixs+1,iys,izs)+
+						 (*this)(ixs,iys+1,izs)+
+						 (*this)(ixs,iys,izs+1)+
+						 (*this)(ixs+1,iys+1,izs)+
+						 (*this)(ixs+1,iys,izs+1)+
+						 (*this)(ixs,iys+1,izs+1)+
+						 (*this)(ixs+1,iys+1,izs+1));
+					 
+					
+					double aa = top(ix,iy,iz)*fac - mean;
+					
+					(*this)(ixs,iys,izs)		+=	aa;
+					(*this)(ixs+1,iys,izs)		+=	aa;
+					(*this)(ixs,iys+1,izs)		+=	aa;
+					(*this)(ixs,iys,izs+1)		+=	aa;
+					(*this)(ixs+1,iys+1,izs)	+=	aa;
+					(*this)(ixs+1,iys,izs+1)	+=	aa;
+					(*this)(ixs,iys+1,izs+1)	+=	aa;
+					(*this)(ixs+1,iys+1,izs+1)	+=	aa;
+				}
+		
+#if 0		
+		unsigned nx=nx_/2, ny=ny_/2, nz=nz_/2;
+		
+		int ox=ox_, oy=oy_, oz=oz_;
+		
+		if( ox-nx/4 < 0 || oy-ny/4 < 0 || oz-nz/4 < 0 )
+			throw("subgrid extends across top grid");
+		
+		//.. the size in top grid cells is nx/2,ny/2,nz/2, so padding starts at ox-nx/4...
+		//.. loop over relevant part of top grid and copy down grid values
+		for( int ix=ox-nx/4,ixs=0; ix<ox+3*nx/4; ix++,ixs+=2 )
+			for( int iy=oy-ny/4,iys=0; iy<oy+3*ny/4; iy++,iys+=2 )
+				for( int iz=oz-nz/4,izs=0; iz<oz+3*nz/4; iz++,izs+=2 )
+				{
+					//... removed -r, as the mean will be taken out right at the beginning
+					(*this)(ixs,iys,izs)		+=	top(ix,iy,iz);
+					(*this)(ixs+1,iys,izs)		+=	top(ix,iy,iz);
+					(*this)(ixs,iys+1,izs)		+=	top(ix,iy,iz);
+					(*this)(ixs,iys,izs+1)		+=	top(ix,iy,iz);
+					(*this)(ixs+1,iys+1,izs)	+=	top(ix,iy,iz);
+					(*this)(ixs+1,iys,izs+1)	+=	top(ix,iy,iz);
+					(*this)(ixs,iys+1,izs+1)	+=	top(ix,iy,iz);
+					(*this)(ixs+1,iys+1,izs+1)	+=	top(ix,iy,iz);
+				}
+#endif
+	}
+	
+	void constrain( const PaddedDensitySubGrid<real_t>& top )
+	{
+		///... top grid is padded too...
+		//.. ox is shifted by nx_/4, padded overlap starts at -nx_/8
+		int ox=ox_+top.nx_/4-nx_/8, oy=oy_+top.ny_/4-ny_/8, oz=oz_+top.nz_/4-nz_/8;
+		if( ox < 0 || oy < 0 || oz < 0 )
+			throw std::runtime_error("subgrid extends beyond top grid");
+		double fac = 1.0/sqrt(8.0);
+		for( int ixs=0,ix=ox;ixs<nx_;ixs+=2,++ix )
+			for( int iys=0,iy=oy;iys<ny_;iys+=2,++iy )
+				for( int izs=0,iz=oz;izs<nz_;izs+=2,++iz )
+				{
+					
+					double mean = 0.125 * 
+					((*this)(ixs,iys,izs)+
+					 (*this)(ixs+1,iys,izs)+
+					 (*this)(ixs,iys+1,izs)+
+					 (*this)(ixs,iys,izs+1)+
+					 (*this)(ixs+1,iys+1,izs)+
+					 (*this)(ixs+1,iys,izs+1)+
+					 (*this)(ixs,iys+1,izs+1)+
+					 (*this)(ixs+1,iys+1,izs+1));
+					
+					
+					double aa = top(ix,iy,iz)*fac - mean;
+					
+					(*this)(ixs,iys,izs)		+=	aa;
+					(*this)(ixs+1,iys,izs)		+=	aa;
+					(*this)(ixs,iys+1,izs)		+=	aa;
+					(*this)(ixs,iys,izs+1)		+=	aa;
+					(*this)(ixs+1,iys+1,izs)	+=	aa;
+					(*this)(ixs+1,iys,izs+1)	+=	aa;
+					(*this)(ixs,iys+1,izs+1)	+=	aa;
+					(*this)(ixs+1,iys+1,izs+1)	+=	aa;
+				}
+
+	}
+	
+	
+	template< class array3 >
+	void upload_bnd( unsigned nbnd, array3& v )
+	{
+		for( int ix=nx_/4-2*nbnd,ixu=-nbnd; ix<3*nx_/4+2*nbnd; ix+=2,++ixu)
+			for( int iy=ny_/4-2*nbnd,iyu=-nbnd; iy<3*ny_/4+2*nbnd; iy+=2,++iyu )	
+				for( int iz=nz_/4-2*nbnd,izu=-nbnd; iz<3*nz_/4+2*nbnd; iz+=2,++izu )
+				{
+					if( (ix<nx_/4||ix>=3*nx_/4)
+					   || (iy<ny_/4||iy>=3*ny_/4)
+					   || (iz<nz_/4||iz>=3*nz_/4) )
+					   {
+						   v(ixu+ox_,iyu+oy_,izu+oz_) 
+						   = 1.0;
+					   }
+				}
+	}
+	
+	template< class array3 >
+	void copy_unpad( array3& v )
+	{
+		for( int ix=nx_/4,ixu=0; ix<3*nx_/4; ++ix,++ixu )
+			for( int iy=ny_/4,iyu=0; iy<3*ny_/4; ++iy,++iyu )	
+				for( int iz=nz_/4,izu=0; iz<3*nz_/4; ++iz,++izu )
+					v(ixu,iyu,izu) = (*this)(ix,iy,iz);
+	}
+	
+	template< class array3 >
+	void copy_add_unpad( array3& v )
+	{
+		for( int ix=nx_/4,ixu=0; ix<3*nx_/4; ++ix,++ixu )
+			for( int iy=ny_/4,iyu=0; iy<3*ny_/4; ++iy,++iyu )	
+				for( int iz=nz_/4,izu=0; iz<3*nz_/4; ++iz,++izu )
+					v(ixu,iyu,izu) += (*this)(ix,iy,iz);
+	}
+	
+	template< class array3 >
+	void copy_subtract_unpad( array3& v )
+	{
+		for( int ix=nx_/4,ixu=0; ix<3*nx_/4; ++ix,++ixu )
+			for( int iy=ny_/4,iyu=0; iy<3*ny_/4; ++iy,++iyu )	
+				for( int iz=nz_/4,izu=0; iz<3*nz_/4; ++iz,++izu )
+					v(ixu,iyu,izu) -= (*this)(ix,iy,iz);
+	}
+	
+	void zero_boundary( void )
+	{
+		for( int ix=0; ix<nx_/4; ++ix )
+			for( int iy=0; iy<ny_; ++iy )
+				for( int iz=0; iz<nz_; ++iz )
+			{
+				(*this)(ix,iy,iz) = 0.0;
+				(*this)(ix+3*nx_/4,iy,iz) = 0.0;
+			}
+		
+		for( int ix=0; ix<nx_; ++ix )
+			for( int iy=0; iy<ny_/4; ++iy )
+				for( int iz=0; iz<nz_; ++iz )
+				{
+					(*this)(ix,iy,iz) = 0.0;
+					(*this)(ix,iy+3*ny_/4,iz) = 0.0;
+				}
+
+		
+		for( int ix=0; ix<nx_; ++ix )
+			for( int iy=0; iy<ny_; ++iy )
+				for( int iz=0; iz<nz_/4; ++iz )
+				{
+					(*this)(ix,iy,iz) = 0.0;
+					(*this)(ix,iy,iz+3*nz_/4) = 0.0;
+				}
+		
+	}
+	
+
+};
+
+template< typename M >
+inline void enforce_coarse_mean( M& v, M& V )
+{
+	double outersum =0.0, innersum = 0.0;
+	int 
+	nx = v.size(0)/2, 
+	ny = v.size(1)/2, 
+	nz = v.size(2)/2,
+	ox = v.offset(0),
+	oy = v.offset(1),
+	oz = v.offset(2);
+
+	unsigned innercount = 0, outercount = 0;
+	for( int i=0; i<V.size(0); ++i )
+		for( int j=0; j<V.size(1); ++j )
+			for( int k=0; k<V.size(2); ++k )
+			{
+				if( (i > ox && i < ox+nx) && 
+				    (j > oy && j < oy+ny) &&
+				    (k > oz && k < oz+nz ))
+				{	
+					innersum += V(i,j,k);
+					++innercount;
+				}
+				else 
+				{
+					outersum += V(i,j,k);
+					++outercount;
+				}
+
+			}
+	innersum /= innercount;
+	outersum /= outercount;
+	
+	double dcorr = innersum * innercount / outercount;
+	
+	for( int i=0; i<V.size(0); ++i )
+		for( int j=0; j<V.size(1); ++j )
+			for( int k=0; k<V.size(2); ++k )
+			{
+				if( !((i > ox && i < ox+nx) && 
+				   (j > oy && j < oy+ny) &&
+				   (k > oz && k < oz+nz )))
+					V(i,j,k) -= outersum + dcorr;//innersum;
+			}
+	
+}
+
+template< typename M >
+inline void enforce_mean( M& v, M& V )
+{
+
+	int 
+	nx = v.size(0)/2, 
+	ny = v.size(1)/2, 
+	nz = v.size(2)/2,
+	ox = v.offset(0),
+	oy = v.offset(1),
+	oz = v.offset(2);
+
+	
+	double coarsemean = 0.0, finemean = 0.0;
+	unsigned count = 0;
+	#pragma omp parallel for reduction(+:coarsemean,finemean,count)
+	for( int i=0; i<nx; ++i )
+	{
+		int i2 = 2*i;
+		for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+			{
+				
+				finemean += 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
+											v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );
+				
+				coarsemean += V(i+ox,j+oy,k+oz);
+				++count;
+			}
+	}
+	
+	coarsemean /= count;
+	finemean /= count;
+	
+	double dmean = coarsemean-finemean;
+	
+#pragma omp parallel for reduction(+:coarsemean,finemean)
+	for( int i=0; i<nx; ++i )
+	{
+		int i2 = 2*i;
+		for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+			{
+				
+				v(i2+1,j2,k2)		+= dmean;
+				v(i2,j2+1,k2)		+= dmean; 
+				v(i2,j2,k2+1)		+= dmean;
+				v(i2+1,j2+1,k2)		+= dmean;
+				v(i2+1,j2,k2+1)		+= dmean;
+				v(i2+1,j2+1,k2+1)	+= dmean;
+				v(i2,j2+1,k2+1)		+= dmean;
+				v(i2,j2,k2)			+= dmean;
+			}
+	}
+	
+}
+#endif
+
diff --git a/fd_schemes.hh b/fd_schemes.hh
new file mode 100644
index 0000000..4c7b051
--- /dev/null
+++ b/fd_schemes.hh
@@ -0,0 +1,408 @@
+/*
+ 
+ fd_schemes.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifndef __FD_SCHEMES_HH
+#define __FD_SCHEMES_HH
+
+#include <vector>
+#include <stdexcept>
+
+
+//... abstract implementation of the Poisson/Force scheme
+template< class L, class G, typename real_t=double >
+class scheme
+{
+public:
+	typedef L laplacian;
+	typedef G gradient;
+	
+	laplacian m_laplacian;
+	gradient m_gradient;
+	
+	template< class C >
+	inline real_t grad_x( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_x( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t grad_y( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_y( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t grad_z( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_z( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t L_apply( const C&c, const int i, const int j, const int k ) 
+	{ return m_laplacian.apply( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t L_rhs( const C&c, const int i, const int j, const int k ) 
+	{ return m_laplacian.rhs( c,i,j,k ); }
+	
+	inline real_t ccoeff( void )
+	{ return m_laplacian.ccoeff(); }
+	
+};
+
+
+template< int nextent, typename T >
+class gradient
+{
+	typedef T real_t;
+	std::vector<real_t> m_stencil;
+	const unsigned nl;
+public:
+	
+	gradient()
+	: nl( 2*nextent+1 )
+	{ 
+		m_stencil.assign(nl*nl*nl,(real_t)0.0);
+	}
+	
+	real_t& operator()(int i)
+	{ return m_stencil[i+nextent]; }
+	
+	const real_t& operator()(int i) const
+	{ return m_stencil[i+nextent]; }
+	
+	template< class C >
+	inline void apply( const C& c, C& f, int dir )
+	{
+		f = c;
+		
+		int nx=c.size(0), ny=c.size(1), nz=c.size(2);		
+		double hx = 1.0/(nx+1.0), hy = 1.0/(ny+1.0), hz = 1.0/(nz+1.0);
+		
+		f.zero();
+		
+		if( dir == 0 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int ii = -nextent; ii<=nextent; ++ii )
+							f(i,j,k) += (*this)(ii) * c(i+ii,j,k)/hx;
+		else if( dir == 1 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int jj = -nextent; jj<=nextent; ++jj )
+							f(i,j,k) += (*this)(jj) * c(i,j+jj,k)/hy;
+		else if( dir == 2 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int kk = -nextent; kk<=nextent; ++kk )
+							f(i,j,k) += (*this)(kk) * c(i,j,k+kk)/hz;
+		
+	}
+};
+
+template< int nextent, typename real_t >
+class base_stencil
+{
+protected:
+	std::vector<real_t> m_stencil;
+	const unsigned nl;
+public:
+	bool m_modsource;
+	
+public:
+	base_stencil( bool amodsource = false )
+	: nl( 2*nextent+1 ), m_modsource( amodsource )
+	{
+		m_stencil.assign(nl*nl*nl,(real_t)0.0);
+	}
+	
+	real_t& operator()(int i, int j, int k)
+	{ return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; }
+	
+	const real_t& operator()(unsigned i, unsigned j, unsigned k) const
+	{ return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; }
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		real_t sum = this->apply( c, i, j, k );
+		sum -= (*this)(0,0,0) * c(i,j,k);
+		return sum;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return (*this)(0,0,0);
+	}
+	
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		real_t sum = 0.0;
+		
+		for( int ii=-nextent; ii<=nextent; ++ii )
+			for( int jj=-nextent; jj<=nextent; ++jj )
+				for( int kk=-nextent; kk<=nextent; ++kk )
+					sum += (*this)(ii,jj,kk) * c(i+ii,j+jj,k+kk);
+		
+		return sum;
+	}
+	
+	template< class C >
+	inline real_t modsource( const C& c, const int i, const int j, const int k )
+	{
+		return 0.0;
+	}
+	
+};
+
+
+/***************************************************************************************/
+/***************************************************************************************/
+/***************************************************************************************/
+
+
+//... Implementation of the Gradient schemes............................................
+
+
+template< typename real_t >
+class deriv_2P : public gradient<1,real_t>
+{
+	
+public:
+	deriv_2P( void )
+	{
+		(*this)( 0 ) =  0.0;
+		(*this)(-1 ) = -0.5;
+		(*this)(+1 ) = +0.5;		
+	}
+	
+	
+};
+
+//... Implementation of the Laplacian schemes..........................................
+
+
+template< typename real_t >
+class stencil_7P : public base_stencil<1,real_t>
+{
+	
+public:
+	stencil_7P( void )
+	{
+		(*this)( 0, 0, 0) = -6.0;
+		(*this)(-1, 0, 0) = +1.0;
+		(*this)(+1, 0, 0) = +1.0;
+		(*this)( 0,-1, 0) = +1.0;
+		(*this)( 0,+1, 0) = +1.0;
+		(*this)( 0, 0,-1) = +1.0;
+		(*this)( 0, 0,+1) = +1.0;
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k ) const
+	{
+		return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k);
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k ) const
+	{
+		return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1);
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -6.0;
+	}
+};
+
+
+template< typename real_t >
+class stencil_13P : public base_stencil<2,real_t>
+{
+	
+public:
+	stencil_13P( void )
+	{
+		(*this)( 0, 0, 0) = -90.0/12.;
+		
+		(*this)(-1, 0, 0) = 
+		(*this)(+1, 0, 0) = 
+		(*this)( 0,-1, 0) = 
+		(*this)( 0,+1, 0) = 
+		(*this)( 0, 0,-1) = 
+		(*this)( 0, 0,+1) = 16./12.;
+		
+		(*this)(-2, 0, 0) = 
+		(*this)(+2, 0, 0) = 
+		(*this)( 0,-2, 0) = 
+		(*this)( 0,+2, 0) = 
+		(*this)( 0, 0,-2) = 
+		(*this)( 0, 0,+2) = -1./12.;
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		return 
+			(-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+			 +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1))
+			 -90.0*c(i,j,k))/12.0;
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		return 
+			(-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+			 +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)))/12.0;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -90.0/12.0;
+	}
+};
+
+template< typename real_t >
+class stencil_19P : public base_stencil<3,real_t>
+{
+	
+public:
+	stencil_19P( void )
+	{
+		(*this)( 0, 0, 0) = -1470./180.;
+		
+		(*this)(-1, 0, 0) = 
+		(*this)(+1, 0, 0) = 
+		(*this)( 0,-1, 0) = 
+		(*this)( 0,+1, 0) = 
+		(*this)( 0, 0,-1) = 
+		(*this)( 0, 0,+1) = 270./180.;
+		
+		(*this)(-2, 0, 0) = 
+		(*this)(+2, 0, 0) = 
+		(*this)( 0,-2, 0) = 
+		(*this)( 0,+2, 0) = 
+		(*this)( 0, 0,-2) = 
+		(*this)( 0, 0,+2) = -27./180.;
+		
+		(*this)(-3, 0, 0) = 
+		(*this)(+3, 0, 0) = 
+		(*this)( 0,-3, 0) = 
+		(*this)( 0,+3, 0) = 
+		(*this)( 0, 0,-3) = 
+		(*this)( 0, 0,+3) = 2./180.;
+		
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		return 
+		(2.0*(c(i-3,j,k)+c(i+3,j,k)+c(i,j-3,k)+c(i,j+3,k)+c(i,j,k-3)+c(i,j,k+3))
+		 -27.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+		 +270.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1))
+		 -1470.0*c(i,j,k))/180.0;
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		return 
+		(2.0*(c(i-3,j,k)+c(i+3,j,k)+c(i,j-3,k)+c(i,j+3,k)+c(i,j,k-3)+c(i,j,k+3))
+		 -27.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+		 +270.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)))/180.0;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -1470.0/180.0;
+	}
+	
+};
+
+
+template< typename real_t >
+class Laplace_flux_O4
+{
+public:
+	
+	//.. idir is -1 for left boundary, +1 for right boundary
+	template< class C >
+	inline double apply_x( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/12.0;
+		return fac*(-c(i-2,j,k)+15.0*c(i-1,j,k)-15.0*c(i,j,k)+c(i+1,j,k));
+	}
+	
+	template< class C >
+	inline double apply_y( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/12.0;
+		return fac*(-c(i,j-2,k)+15.0*c(i,j-1,k)-15.0*c(i,j,k)+c(i,j+1,k));
+	}
+	
+	template< class C >
+	inline double apply_z( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/12.0;
+		return fac*(-c(i,j,k-2)+15.0*c(i,j,k-1)-15.0*c(i,j,k)+c(i,j,k+1));
+	}
+	
+};
+
+
+template< typename real_t >
+class Laplace_flux_O6
+{
+public:
+	
+	//.. idir is -1 for left boundary, +1 for right boundary
+	template< class C >
+	inline double apply_x( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/180.0;
+		return fac*(2.*c(i-3,j,k)-25.*c(i-2,j,k)+245.*c(i-1,j,k)-245.0*c(i,j,k)+25.*c(i+1,j,k)-2.*c(i+2,j,k));
+	}
+	
+	template< class C >
+	inline double apply_y( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/180.0;
+		return fac*(2.*c(i,j-3,k)-25.*c(i,j-2,k)+245.*c(i,j-1,k)-245.0*c(i,j,k)+25.*c(i,j+1,k)-2.*c(i,j+2,k));
+	}
+	
+	template< class C >
+	inline double apply_z( int idir, const C& c, const int i, const int j, const int k )
+	{
+		double fac = -((double)idir)/180.0;
+		return fac*(2.*c(i,j,k-3)-25.*c(i,j,k-2)+245.*c(i,j,k-1)-245.0*c(i,j,k)+25.*c(i,j,k+1)-2.*c(i,j,k+2));
+	}
+	
+};
+
+
+
+#endif
+
+
diff --git a/general.hh b/general.hh
new file mode 100644
index 0000000..a7e498e
--- /dev/null
+++ b/general.hh
@@ -0,0 +1,121 @@
+/*
+ 
+ general.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifndef __GENERAL_HH
+#define __GENERAL_HH
+
+
+#include <omp.h>
+
+#ifdef WITH_MPI
+  #ifdef MANNO
+    #include <mpi.h>
+  #else
+    #include <mpi++.h>
+  #endif
+#else
+#include <time.h>
+#endif
+
+#ifdef SINGLE_PRECISION
+ #ifdef WITH_MPI
+  #include "srfftw_mpi.h"
+  #define MPI_TREAL MPI_FLOAT
+ #else
+  #include "srfftw.h"
+ #endif
+  typedef float real_t;
+#else
+ #ifdef WITH_MPI
+  #include "drfftw_mpi.h"
+  #define MPI_TREAL MPI_DOUBLE
+ #else
+  #include "drfftw.h"
+ #endif
+  typedef double real_t;
+#endif
+
+#include <vector>
+
+
+#include "mesh.hh"
+typedef GridHierarchy<real_t> grid_hierarchy;
+typedef MeshvarBnd<real_t> meshvar_bnd;
+typedef Meshvar<real_t> meshvar;
+
+
+/* Convenience from Numerical Recipes in C, 2nd edition */
+template< typename T >
+inline T SQR( T a ){
+  return a*a;
+}
+
+template< typename T >
+inline T CUBE( T a ){
+  return a*a*a;
+}
+
+template< typename T >
+inline T POW4( T a ){
+  return a*a*a*a;
+}
+
+
+// --- structure for cosmological parameters --- //
+typedef struct cosmology{
+  double 
+    Omega_m, // baryon+dark matter density
+    Omega_b, // baryon matter density
+    Omega_L, // dark energy density
+    H0,      // Hubble constant
+    nspect,  // long-wave spectral index (scale free is nspect=1) 
+    sigma8,  // power spectrum normalization
+	//Gamma,   // shape parameter (of historical interest, as a free parameter)
+    //fnl,     // non-gaussian contribution parameter
+	//w0,      // dark energy equation of state parameter (not implemented, i.e. =1 at the moment)
+	//wa,      // dark energy equation of state parameter (not implemented, i.e. =1 at the moment)
+	dplus,
+	pnorm,
+	vfact,
+	WDMmass,
+	WDMg_x,
+	astart;  // expansion factor a for which to generate initial conditions
+}Cosmology;
+
+typedef struct {
+	unsigned levelmin, levelmax;
+	double boxlength;
+	std::vector<unsigned> offx,offy,offz,llx,lly,llz;
+}Parameters;
+
+
+inline double time_seconds( void )
+{
+  #ifdef WITH_MPI
+    return MPI_Wtime();
+  #else
+    return ((double) clock()) / CLOCKS_PER_SEC;
+  #endif
+}
+
+#endif
diff --git a/main.cc b/main.cc
new file mode 100644
index 0000000..244ecb7
--- /dev/null
+++ b/main.cc
@@ -0,0 +1,605 @@
+/*
+ 
+ main.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+
+#include <stdio.h>
+#include <iostream>
+#include <iomanip>
+#include <math.h>
+
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+#include <gsl/gsl_integration.h>
+
+
+
+#include "general.hh"
+#include "output.hh"
+
+#include "config_file.hh"
+
+#include "poisson.hh"
+#include "mg_solver.hh"
+#include "fd_schemes.hh"
+#include "random.hh"
+#include "densities.hh"
+
+#include "cosmology.hh"
+#include "transfer_function.hh"
+
+#define THE_CODE_NAME "music!"
+#define THE_CODE_VERSION "0.2a"
+
+
+namespace music
+{
+
+	struct framework
+	{
+		transfer_function *the_transfer_function;
+		//poisson_solver *the_poisson_solver;
+		config_file *the_config_file;
+		refinement_hierarchy *the_refinement_hierarchy;
+	};
+	
+}
+ 
+transfer_function *TransferFunction_real::ptf_ = NULL;
+transfer_function *TransferFunction_k::ptf_ = NULL;
+
+real_t TransferFunction_real::nspec_ = -1.0;
+real_t TransferFunction_k::nspec_ = -1.0;
+
+void splash(void)
+{
+	
+	std::cout 
+	<< "\n    __    __     __  __     ______     __     ______      \n"
+	<< "   /\\ \"-./  \\   /\\ \\/\\ \\   /\\  ___\\   /\\ \\   /\\  ___\\  \n"   
+	<< "   \\ \\ \\-./\\ \\  \\ \\ \\_\\ \\  \\ \\___  \\  \\ \\ \\  \\ \\ \\____ \n"  
+	<< "    \\ \\_\\ \\ \\_\\  \\ \\_____\\  \\/\\_____\\  \\ \\_\\  \\ \\_____\\ \n"
+	<< "     \\/_/  \\/_/   \\/_____/   \\/_____/   \\/_/   \\/_____/ \n\n"
+	<< "                            this is " << THE_CODE_NAME << " version " << THE_CODE_VERSION << "\n\n\n";
+	
+	
+}
+
+void modify_grid_for_TF( const refinement_hierarchy& rh_full, refinement_hierarchy& rh_TF, config_file& cf )
+{
+	unsigned lbase, lbaseTF, lmax;
+	
+	lbase				= cf.getValue<unsigned>( "setup", "levelmin" );
+	lbaseTF				= cf.getValueSafe<unsigned>( "setup", "levelminTF", lbase );
+	lmax				= cf.getValue<unsigned>( "setup", "levelmax" );
+	
+	rh_TF = rh_full;
+	
+	unsigned pad = 4;
+	
+	for( unsigned i=lbase+1; i<=lmax; ++i )
+	{
+		rh_TF.adjust_level(i, rh_TF.size(i,0)+2*pad, rh_TF.size(i,1)+2*pad, rh_TF.size(i,2)+2*pad, 
+						   rh_TF.offset_abs(i,0)-pad, rh_TF.offset_abs(i,1)-pad, rh_TF.offset_abs(i,2)-pad);
+	}
+	
+	if( lbaseTF > lbase )
+	{
+		std::cout << " - Will use levelmin = " << lbaseTF << " to compute density field...\n";
+	
+		for( unsigned i=lbase; i<=lbaseTF; ++i )
+		{
+			unsigned nfull = (unsigned)pow(2,i);
+			rh_TF.adjust_level(i, nfull, nfull, nfull, 0, 0, 0);
+		}
+	}
+	
+	
+		
+}
+
+void coarsen_density( const refinement_hierarchy& rh, grid_hierarchy& u )
+{
+	for( unsigned i=1; i<=rh.levelmax(); ++i )
+	{
+		if(	  rh.offset(i,0) != u.get_grid(i)->offset(0)
+   		   || rh.offset(i,1) != u.get_grid(i)->offset(1)
+		   || rh.offset(i,2) != u.get_grid(i)->offset(2)
+		   || rh.size(i,0) != u.get_grid(i)->size(0)
+		   || rh.size(i,1) != u.get_grid(i)->size(1)
+		   || rh.size(i,2) != u.get_grid(i)->size(2) )
+		{
+			u.cut_patch(i, rh.offset_abs(i,0), rh.offset_abs(i,1), rh.offset_abs(i,2), 
+						rh.size(i,0), rh.size(i,1), rh.size(i,2) );
+		}
+	}
+}
+
+void store_grid_structure( config_file& cf, const refinement_hierarchy& rh )
+{
+	char str1[128], str2[128];
+	for( unsigned i=rh.levelmin(); i<=rh.levelmax(); ++i )
+	{
+		for( int j=0; j<3; ++j )
+		{
+			sprintf(str1,"offset(%d,%d)",i,j);	
+			sprintf(str2,"%d",rh.offset(i,j));
+			cf.insertValue("setup",str1,str2);
+
+			sprintf(str1,"size(%d,%d)",i,j);	
+			sprintf(str2,"%d",rh.size(i,j));
+			cf.insertValue("setup",str1,str2);
+			
+		}		
+	}
+}
+
+//... this is just for testing purposes ...
+void compute_Lu( const grid_hierarchy& u, grid_hierarchy& Du )
+{
+	Du = u;
+	
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel);
+		h = h*h;
+		meshvar_bnd *pvar = Du.get_grid(ilevel);
+		
+		/*#pragma omp parallel for
+		for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+			for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+				for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					(*pvar)(ix,iy,iz) = -((*u.get_grid(ilevel))(ix+1,iy,iz)+(*u.get_grid(ilevel))(ix-1,iy,iz)+
+										 (*u.get_grid(ilevel))(ix,iy+1,iz)+(*u.get_grid(ilevel))(ix,iy-1,iz)+
+										 (*u.get_grid(ilevel))(ix,iy,iz+1)+(*u.get_grid(ilevel))(ix,iy,iz-1)-
+										 6.0*(*u.get_grid(ilevel))(ix,iy,iz))*h;
+		 */
+		
+	/*#pragma omp parallel for
+		for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+			for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+				for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					(*pvar)(ix,iy,iz) = -stencil_13P<double>().apply(*u.get_grid(ilevel),ix,iy,iz)*h;
+	*/	
+	
+	#pragma omp parallel for
+		for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+			for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+				for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					(*pvar)(ix,iy,iz) = -stencil_19P<double>().apply(*u.get_grid(ilevel),ix,iy,iz)*h;
+	}
+}
+
+void subtract_finest_mean( grid_hierarchy& u )
+{
+	std::cout << " - Subtracting component mean...\n";
+	double sum = 0.0;
+	for( int ix = 0; ix < (int)(*u.get_grid(u.levelmax())).size(0); ++ix )
+		for( int iy = 0; iy < (int)(*u.get_grid(u.levelmax())).size(1); ++iy )
+			for( int iz = 0; iz < (int)(*u.get_grid(u.levelmax())).size(2); ++iz )
+				sum += 0.5*(*u.get_grid(u.levelmax()))(ix,iy,iz);
+	
+	sum /= (*u.get_grid(u.levelmax())).size(0)
+			* (*u.get_grid(u.levelmax())).size(1)
+			* (*u.get_grid(u.levelmax())).size(2);
+	
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+		#pragma omp parallel for
+		for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+			for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+				for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+					(*u.get_grid(ilevel))(ix,iy,iz) -= sum;
+	
+}
+
+
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+
+
+
+int main (int argc, const char * argv[]) 
+{
+	const unsigned nbnd = 4;
+	
+	unsigned lbase, lmax, lbaseTF;
+	double   err;
+	
+	cosmology cosmo;
+	double boxlength, zstart;
+	std::vector<long> rngseeds;
+	std::vector<std::string> rngfnames;
+	
+	double x0[3], lx[3];
+	unsigned npad = 8;
+	
+	splash();
+	if( argc != 2 ){
+		std::cout << " This version is compiled with the following plug-ins:\n";
+		
+		print_transfer_function_plugins();
+		print_output_plugins();
+		
+		std::cerr << "\n In order to run, you need to specify a parameter file!\n\n";
+		exit(0);
+	}
+	
+	
+	/******************************************************************************************************/
+	/* read and interpret config file *********************************************************************/
+	/******************************************************************************************************/
+	config_file cf(argv[1]);
+	std::string tfname,randfname,temp, outformat, outfname, poisson_solver_name;;
+	bool shift_back(false), align_top(false), kspace(false);
+	float tf0,tf1,tf2;
+	
+	
+	boxlength           = cf.getValue<double>( "setup", "boxlength" );
+	lbase				= cf.getValue<unsigned>( "setup", "levelmin" );
+	lmax				= cf.getValue<unsigned>( "setup", "levelmax" );
+	lbaseTF				= cf.getValueSafe<unsigned>( "setup", "levelminTF", lbase );
+	
+	temp				= cf.getValue<std::string>( "setup", "ref_offset" );
+	sscanf( temp.c_str(), "%g,%g,%g", &tf0, &tf1, &tf2 ); x0[0] = tf0; x0[1] = tf1; x0[2] = tf2;
+	
+	
+	temp				= cf.getValue<std::string>( "setup", "ref_extent" );
+	sscanf( temp.c_str(), "%g,%g,%g", &tf0, &tf1, &tf2 ); lx[0] = tf0; lx[1] = tf1; lx[2] = tf2;
+	
+	npad                = cf.getValue<unsigned>( "setup", "padding" );
+	align_top			= cf.getValueSafe<bool>( "setup", "align_top", false );
+	
+	
+	kspace				= cf.getValueSafe<bool>( "setup", "kspace", false );
+	if( kspace )
+		poisson_solver_name = std::string("fft_poisson");
+	else
+		poisson_solver_name = std::string("mg_poisson");
+	
+	// TODO: move cosmology parameters reading to cosmo_calc
+	zstart				= cf.getValue<double>( "setup", "zstart" );
+	cosmo.astart		= 1.0/(1.0+zstart);
+	cosmo.Omega_b		= cf.getValue<double>( "cosmology", "Omega_b" );
+	cosmo.Omega_m		= cf.getValue<double>( "cosmology", "Omega_m" );
+	cosmo.Omega_L		= cf.getValue<double>( "cosmology", "Omega_L" );
+	cosmo.H0			= cf.getValue<double>( "cosmology", "H0" );
+	cosmo.sigma8		= cf.getValue<double>( "cosmology", "sigma_8" );
+	cosmo.nspect		= cf.getValue<double>( "cosmology", "nspec" );
+	cosmo.WDMg_x		= cf.getValueSafe<double>( "cosmology", "WDMg_x", 1.5 );
+	cosmo.WDMmass		= cf.getValueSafe<double>( "cosmology", "WDMmass", 0.0 );
+	
+	//cosmo.Gamma			= cf.getValueSafe<double>( "cosmology", "Gamma", -1.0 );
+	
+	/******************************************************************************************************/
+	/******************************************************************************************************/
+	
+	shift_back			= cf.getValueSafe<bool>( "output", "shift_back", shift_back );
+	outformat			= cf.getValue<std::string>( "output", "format" );
+	outfname			= cf.getValue<std::string>( "output", "filename" );
+	
+	
+	
+	/******************************************************************************************************/
+	/******************************************************************************************************/
+	/******************************************************************************************************/
+
+#ifndef SINGLETHREAD_FFTW	
+	fftw_threads_init();
+#endif
+	
+	transfer_function_plugin *the_transfer_function_plugin
+		= select_transfer_function_plugin( cf );
+	
+	CosmoCalc ccalc(cosmo,the_transfer_function_plugin);
+	cosmo.pnorm	= ccalc.ComputePNorm( 2.0*M_PI/boxlength );
+	cosmo.dplus	= ccalc.CalcGrowthFactor( cosmo.astart )/ccalc.CalcGrowthFactor( 1.0 );
+	cosmo.vfact = ccalc.ComputeVFact( cosmo.astart );
+	{
+		char tmpstr[128];
+		sprintf(tmpstr,"%.12g",cosmo.pnorm);
+		cf.insertValue("cosmology","pnorm",tmpstr);
+		sprintf(tmpstr,"%.12g",cosmo.dplus);
+		cf.insertValue("cosmology","dplus",tmpstr);
+		sprintf(tmpstr,"%.12g",cosmo.vfact);
+		cf.insertValue("cosmology","vfact",tmpstr);
+		
+	}
+	
+	/******************************************************************************************************/
+	/******************************************************************************************************/
+	
+	bool 
+		do_baryons	= cf.getValue<bool>("setup","baryons"),
+		do_2LPT		= cf.getValue<bool>("setup","use_2LPT"),
+		do_LLA		= cf.getValue<bool>("setup","use_LLA"),
+		do_CVM		= cf.getValueSafe<bool>("setup","center_velocities",false),
+		do_UVA		= false;//FIXME: cf.getValue<bool>("setup","use_UVA");
+	
+
+	refinement_hierarchy rh_Poisson( cf );
+	store_grid_structure(cf, rh_Poisson);
+	rh_Poisson.output();
+	
+	
+	refinement_hierarchy rh_TF( rh_Poisson );
+	modify_grid_for_TF( rh_Poisson, rh_TF, cf );
+	//rh_TF.output();
+
+	
+	if( !the_transfer_function_plugin->tf_is_distinct() && do_baryons )
+		std::cout	<< " - WARNING: The selected transfer function does not support\n"
+					<< "            distinct amplitudes for baryon and DM fields!\n"
+					<< "            Perturbation amplitudes will be identical!" << std::endl;
+	
+	
+	
+	output_plugin *the_output_plugin = select_output_plugin( cf );
+	
+	
+		
+	poisson_plugin_creator *the_poisson_plugin_creator = get_poisson_plugin_map()[ poisson_solver_name ];
+	poisson_plugin *the_poisson_solver = the_poisson_plugin_creator->create( cf );
+	
+	//... THIS IS THE MAIN DRIVER BRANCHING TREE RUNNING THE VARIOUS PARTS OF THE CODE
+	bool bfatal = false;
+	try{
+		if( ! do_2LPT )
+		{
+			//.. don't use 2LPT	...
+		
+			if( !the_transfer_function_plugin->tf_is_distinct() || !do_baryons )
+			{	
+				std::cerr << " - Computing ICs from the total matter power spectrum..." << std::endl;
+				
+				grid_hierarchy f( nbnd ), u(nbnd);
+
+				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, total , rh_TF, f );
+				coarsen_density(rh_Poisson, f);
+				
+				u = f;	u.zero();
+				err = the_poisson_solver->solve(f, u);
+
+	#if 0
+				grid_hierarchy u2( nbnd );
+				compute_Lu( u, u2 );
+				u=u2;
+	#endif
+				
+				the_output_plugin->write_dm_density(f);
+				the_output_plugin->write_dm_mass(f);
+				
+				if( do_baryons )
+				{
+					if( do_LLA )
+						compute_LLA_density( u, f );
+					
+					the_output_plugin->write_gas_density(f);
+				}
+				f.deallocate();
+				
+				the_output_plugin->write_dm_potential(u);
+				
+				grid_hierarchy data_forIO(u);
+				for( int icoord = 0; icoord < 3; ++icoord )
+				{
+					
+					//... displacement
+					the_poisson_solver->gradient(icoord, u, data_forIO );
+
+					if( do_CVM)
+						subtract_finest_mean( data_forIO );
+					the_output_plugin->write_dm_position(icoord, data_forIO );
+					
+					//... velocity
+					if( do_UVA )
+					{
+						//FIXME: this still needs to be added
+						throw std::runtime_error("Updated Velocity Approximation not yet supported!");
+					}
+					else
+					{
+						data_forIO *= cosmo.vfact;
+						the_output_plugin->write_dm_velocity(icoord, data_forIO);					
+					}
+
+					if( do_baryons )
+						the_output_plugin->write_gas_velocity(icoord, data_forIO);				
+
+				}	
+				
+			}else{
+				
+				//... cdm density and displacements
+				grid_hierarchy f( nbnd ), u(nbnd);
+				
+				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, cdm , rh_TF, f );
+				u = f;	u.zero();
+				
+				err = the_poisson_solver->solve(f, u);
+
+				the_output_plugin->write_dm_density(f);
+				the_output_plugin->write_dm_mass(f);
+				f.deallocate();
+				
+				the_output_plugin->write_dm_potential(u);
+				
+				
+				//... DM displacements
+				{
+					grid_hierarchy data_forIO(u);
+					for( int icoord = 0; icoord < 3; ++icoord )
+					{
+						//... displacement
+						the_poisson_solver->gradient(icoord, u, data_forIO );
+						the_output_plugin->write_dm_position(icoord, data_forIO );
+					}
+				}
+				
+				//... gas density
+				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, baryon , rh_TF, f );
+				
+				if( do_LLA )
+				{
+					u = f;	u.zero();
+					err = the_poisson_solver->solve(f, u);
+					compute_LLA_density( u, f );
+				}
+				
+				the_output_plugin->write_gas_density(f);
+				
+				//... velocities
+				if( do_UVA )
+				{
+					//... need to co-add potentials...
+					throw std::runtime_error("Updated Velocity Approximation not yet supported!");
+				}
+				else
+				{
+					GenerateDensityHierarchy(	cf, the_transfer_function_plugin, total , rh_TF, f );
+					u = f;	u.zero();
+					err = the_poisson_solver->solve(f, u);
+					
+					grid_hierarchy data_forIO(u);
+					for( int icoord = 0; icoord < 3; ++icoord )
+					{
+						//... displacement
+						the_poisson_solver->gradient(icoord, u, data_forIO );
+						data_forIO *= cosmo.vfact;
+						the_output_plugin->write_dm_velocity(icoord, data_forIO);
+						the_output_plugin->write_gas_velocity(icoord, data_forIO);
+					}
+				}
+			}
+			
+		}else {
+			//.. use 2LPT ...
+			if( !the_transfer_function_plugin->tf_is_distinct() || !do_baryons )
+			{	
+				grid_hierarchy f( nbnd ), u1(nbnd), u2(nbnd);
+				
+				GenerateDensityHierarchy(	cf, the_transfer_function_plugin, total , rh_TF, f );
+				u1 = f;	u1.zero();
+
+				the_output_plugin->write_dm_density(f);
+				the_output_plugin->write_dm_mass(f);
+				
+				//... compute 1LPT term
+				err = the_poisson_solver->solve(f, u1);
+
+				the_output_plugin->write_dm_potential(u1);
+				
+				//... compute 2LPT term
+				u2 = f; u2.zero();
+				
+				compute_2LPT_source(u1, f);
+				err = the_poisson_solver->solve(f, u2);
+				u2 *= 3.0/7.0;
+					
+				
+				grid_hierarchy data_forIO(u1);
+				for( int icoord = 0; icoord < 3; ++icoord )
+				{
+					
+					//... displacement
+					the_poisson_solver->gradient(icoord, u1, data_forIO );
+					the_poisson_solver->gradient(icoord, u2, f );
+
+					//... co-add for the displacement
+					data_forIO += f;
+					the_output_plugin->write_dm_position(icoord, data_forIO );
+					
+					//... velocity
+					if( do_UVA )
+					{
+						throw std::runtime_error("Updated Velocity Approximation not yet supported!");
+					}
+					else
+					{
+						//... for velocities, the 2LPT term has a factor of 2, so add a second time
+						data_forIO += f;
+						data_forIO *= cosmo.vfact;
+						
+						the_output_plugin->write_dm_velocity(icoord, data_forIO);					
+					}
+					
+					if( do_baryons )
+						the_output_plugin->write_gas_velocity(icoord, data_forIO);				
+				}
+				
+				//...
+				u1 += u2;
+
+				if( do_baryons )
+				{	
+					if( do_LLA )
+						compute_LLA_density( u1, f );
+					else
+						compute_Lu_density( u1, f );
+					
+					the_output_plugin->write_gas_density(f);
+				}
+				
+			}
+			else
+			{
+				throw std::runtime_error("2LPT for distinct baryon/CDM not implemented yet.");			
+			}
+
+		}
+		the_output_plugin->finalize();
+	}catch(...)
+	{
+		std::cerr << " - A fatal error occured. We need to exit...\n";
+		bfatal;
+	}
+	
+	//... clean up
+	delete the_output_plugin;
+	
+	std::cout << "-------------------------------------------------------------\n";
+
+	if( !bfatal )
+		std::cout << " - Wrote output file \'" << outfname << "\'\n     using plugin \'" << outformat << "\'...\n";
+	
+	delete the_transfer_function_plugin;
+	delete the_poisson_solver;
+	
+	/** we are done ! **/
+	std::cout << " - Done!" << std::endl << std::endl;
+	
+	
+	///*****************************************///
+	
+	std::string save_fname(std::string(argv[1])+std::string("_stats"));
+	std::ofstream ofs(save_fname.c_str());
+	time_t ltime=time(NULL);
+	
+	ofs << "Parameter dump for the run on " << asctime( localtime(&ltime) );
+	ofs << "You ran " << THE_CODE_NAME << " version " << THE_CODE_VERSION << std::endl << std::endl;
+	
+	cf.dump( ofs );
+
+	
+	
+	return 0;
+}
diff --git a/mesh.hh b/mesh.hh
new file mode 100644
index 0000000..56de243
--- /dev/null
+++ b/mesh.hh
@@ -0,0 +1,1181 @@
+/*
+ 
+ mesh.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifndef __MESH_HH
+#define __MESH_HH
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <stdexcept>
+
+#include <math.h>
+
+#include "config_file.hh"
+
+
+//! base class for all things that have rectangular mesh structure
+template<typename T>
+class Meshvar{
+public:
+	typedef T real_t;
+	
+	unsigned 
+		m_nx,	//!< x-extent of the rectangular mesh
+		m_ny,	//!< y-extent of the rectangular mesh
+		m_nz;	//!< z-extent of the rectangular mesh
+	
+	unsigned 
+		m_offx, //!< x-offset of the grid (just as a helper, not used inside the class)
+		m_offy, //!< y-offset of the grid (just as a helper, not used inside the class)
+		m_offz;	//!< z-offset of the grid (just as a helper, not used inside the class)
+	
+	real_t * m_pdata; //!< pointer to the dynamic data array
+	
+	//! constructor for cubic mesh
+	explicit Meshvar( unsigned n, unsigned offx, unsigned offy, unsigned offz )
+	: m_nx( n ), m_ny( n ), m_nz( n ), m_offx( offx ), m_offy( offy ), m_offz( offz )
+	{
+		m_pdata = new real_t[m_nx*m_ny*m_nz];
+	}
+	
+	//! constructor for rectangular mesh
+	Meshvar( unsigned nx, unsigned ny, unsigned nz, unsigned offx, unsigned offy, unsigned offz )
+	: m_nx( nx ), m_ny( ny ), m_nz( nz ), m_offx( offx ), m_offy( offy ), m_offz( offz )
+	{
+		m_pdata = new real_t[m_nx*m_ny*m_nz];
+	}
+	
+	//! variant copy constructor with optional copying of the actual data
+	Meshvar( const Meshvar<real_t>& m, bool copy_over=true )
+	{
+		m_nx = m.m_nx;
+		m_ny = m.m_ny;
+		m_nz = m.m_nz;
+		
+		m_offx = m.m_offx;
+		m_offy = m.m_offy;
+		m_offz = m.m_offz;
+		
+		m_pdata = new real_t[m_nx*m_ny*m_nz];
+		
+		if( copy_over )
+			for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+				m_pdata[i] = m.m_pdata[i];
+	}
+	
+	//! standard copy constructor
+	explicit Meshvar( const Meshvar<real_t>& m )
+	{
+		m_nx = m.m_nx;
+		m_ny = m.m_ny;
+		m_nz = m.m_nz;
+		
+		m_offx = m.m_offx;
+		m_offy = m.m_offy;
+		m_offz = m.m_offz;
+		
+		m_pdata = new real_t[m_nx*m_ny*m_nz];
+		
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] = m.m_pdata[i];
+	}
+	
+	//! destructor
+	~Meshvar()
+	{
+		if( m_pdata != NULL )
+			delete[] m_pdata;
+	}
+	
+	//! deallocate the data, but keep the structure
+	inline void deallocate( void )
+	{
+		if( m_pdata != NULL )
+			delete[] m_pdata;
+		m_pdata = NULL;
+	}
+	
+	//! get extent of the mesh along a specified dimension (const)
+	inline unsigned size( unsigned dim ) const
+	{
+		if( dim == 0 ) return m_nx;
+		if( dim == 1 ) return m_ny;
+		return m_nz;
+	}
+	
+	//! get extent of the mesh along a specified dimension
+	inline unsigned& size( unsigned dim )
+	{
+		if( dim == 0 ) return m_nx;
+		if( dim == 1 ) return m_ny;
+		return m_nz;
+	}
+	
+	//! get offset of the mesh along a specified dimension  (const)
+	inline unsigned offset( unsigned dim ) const
+	{
+		if( dim == 0 ) return m_offx;
+		if( dim == 1 ) return m_offy;
+		return m_offz;
+	}
+	
+	//! get extent of the mesh along a specified dimension
+	inline unsigned& offset( unsigned dim )
+	{
+		if( dim == 0 ) return m_offx;
+		if( dim == 1 ) return m_offy;
+		return m_offz;
+	}
+	
+	//! set all the data to zero values
+	void zero( void )
+	{
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] = 0.0;
+	}
+	
+	//! direct array random acces to the data block
+	inline real_t * operator[]( const int i )
+	{	return &m_pdata[i];	}
+
+	//! direct array random acces to the data block (const)
+	inline const real_t * operator[]( const int i ) const
+	{	return &m_pdata[i];	}
+	
+	//! 3D random access to the data block via index 3-tuples
+	inline real_t& operator()(const int ix, const int iy, const int iz )
+	{	return m_pdata[ (ix*m_ny+iy)*m_nz + iz ];	}
+	
+	//! 3D random access to the data block via index 3-tuples (const)
+	inline const real_t& operator()(const int ix, const int iy, const int iz ) const
+	{	return m_pdata[ (ix*m_ny+iy)*m_nz + iz ];	}
+	
+	//! direct multiplication of the whole data block with a number
+	Meshvar<real_t>& operator*=( real_t x )
+	{
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] *= x;
+		return *this;
+	}
+	
+	//! direct addition of a number to the whole data block
+	Meshvar<real_t>& operator+=( real_t x )
+	{
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] += x;
+		return *this;
+	}
+
+	//! direct element-wise division of the whole data block by a number
+	Meshvar<real_t>& operator/=( real_t x )
+	{
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] /= x;
+		return *this;
+	}
+	
+	
+	//! direct subtraction of a number from the whole data block
+	Meshvar<real_t>& operator-=( real_t x )
+	{
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] -= x;
+		return *this;
+	}
+	
+	//! direct element-wise multiplication with another compatible mesh
+	Meshvar<real_t>& operator*=( const Meshvar<real_t>& v )
+	{
+		if( v.m_nx*v.m_ny*v.m_nz != m_nx*m_ny*m_nz )
+			throw std::runtime_error("Meshvar::operator*= : attempt to operate on incompatible data");
+		
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] *= v.m_pdata[i];
+		
+		return *this;
+	}
+	
+	//! direct element-wise division with another compatible mesh
+	Meshvar<real_t>& operator/=( const Meshvar<real_t>& v )
+	{
+		if( v.m_nx*v.m_ny*v.m_nz != m_nx*m_ny*m_nz )
+			throw std::runtime_error("Meshvar::operator/= : attempt to operate on incompatible data");
+		
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] /= v.m_pdata[i];
+		
+		return *this;
+	}
+	
+	//! direct element-wise addition of another compatible mesh
+	Meshvar<real_t>& operator+=( const Meshvar<real_t>& v )
+	{
+		if( v.m_nx*v.m_ny*v.m_nz != m_nx*m_ny*m_nz )
+			throw std::runtime_error("Meshvar::operator+= : attempt to operate on incompatible data");
+		
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] += v.m_pdata[i];
+		
+		return *this;
+	}
+	
+	//! direct element-wise subtraction of another compatible mesh
+	Meshvar<real_t>& operator-=( const Meshvar<real_t>& v )
+	{
+		if( v.m_nx*v.m_ny*v.m_nz != m_nx*m_ny*m_nz )
+			throw std::runtime_error("Meshvar::operator-= : attempt to operate on incompatible data");
+		
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] -= v.m_pdata[i];
+		
+		return *this;
+	}
+	
+	//! assignment operator for rectangular meshes
+	Meshvar<real_t>& operator=( const Meshvar<real_t>& m )
+	{
+		m_nx = m.m_nx;
+		m_ny = m.m_ny;
+		m_nz = m.m_nz;
+		
+		m_offx = m.m_offx;
+		m_offy = m.m_offy;
+		m_offz = m.m_offz;
+		
+		if( m_pdata != NULL )
+			delete m_pdata;
+		
+		m_pdata = new real_t[m_nx*m_ny*m_nz];
+		
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			m_pdata[i] = m.m_pdata[i];
+		
+		return *this;
+	}
+};
+
+//! MeshvarBnd derived class adding boundary ghost cell functionality
+template< typename T >
+class MeshvarBnd : public Meshvar< T >{
+	using Meshvar<T>::m_nx;
+	using Meshvar<T>::m_ny;
+	using Meshvar<T>::m_nz;
+	using Meshvar<T>::m_pdata;
+	
+	
+	
+public:
+	typedef T real_t;
+	unsigned m_nbnd;
+	
+	//! most general constructor
+	MeshvarBnd( unsigned nbnd, unsigned nx, unsigned ny, unsigned nz, unsigned xoff, unsigned yoff, unsigned zoff )
+	: Meshvar<real_t>( nx+2*nbnd, ny+2*nbnd, nz+2*nbnd, xoff, yoff, zoff ), m_nbnd( nbnd )
+	{ 	}
+
+	//! zero-offset constructor
+	MeshvarBnd( unsigned nbnd, unsigned nx, unsigned ny, unsigned nz )
+	: Meshvar<real_t>( nx+2*nbnd, ny+2*nbnd, nz+2*nbnd, 0, 0, 0 ), m_nbnd( nbnd )
+	{ 	}
+	
+	//! constructor for cubic meshes
+	MeshvarBnd( unsigned nbnd, unsigned n, unsigned xoff, unsigned yoff, unsigned zoff )
+	: Meshvar<real_t>( n+2*nbnd, xoff, yoff, zoff ), m_nbnd( nbnd )
+	{ 	}
+	
+	//! constructor for cubic meshes with zero offset
+	MeshvarBnd( unsigned nbnd, unsigned n )
+	: Meshvar<real_t>( n+2*nbnd, 0, 0, 0 ), m_nbnd( nbnd )
+	{ 	}
+	
+	//! modified copy constructor, allows to avoid copying actual data
+	MeshvarBnd( const MeshvarBnd<real_t>& v, bool copyover )
+	: Meshvar<real_t>( v, copyover ), m_nbnd( v.m_nbnd )
+	{   }
+	
+	//! copy constructor
+	explicit MeshvarBnd( const MeshvarBnd<real_t>& v )
+	: Meshvar<real_t>( v, true ), m_nbnd( v.m_nbnd )
+	{   }
+	
+	//! get extent of the mesh along a specified dimension
+	inline unsigned size( unsigned dim=0 ) const
+	{
+		if( dim == 0 ) return m_nx-2*m_nbnd;
+		if( dim == 1 ) return m_ny-2*m_nbnd;
+		return m_nz-2*m_nbnd;
+	}
+
+	//! 3D random access to the data block via index 3-tuples
+	inline real_t& operator()(const int ix, const int iy, const int iz )
+	{
+		int iix(ix+m_nbnd), iiy(iy+m_nbnd), iiz(iz+m_nbnd);
+		return m_pdata[ (iix*m_ny+iiy)*m_nz + iiz ];
+	}
+	
+	//! 3D random access to the data block via index 3-tuples (const)
+	inline const real_t& operator()(const int ix, const int iy, const int iz ) const
+	{
+		int iix(ix+m_nbnd), iiy(iy+m_nbnd), iiz(iz+m_nbnd);
+		return m_pdata[ (iix*m_ny+iiy)*m_nz + iiz ];
+	}
+	
+	//! assignment operator for rectangular meshes with ghost zones
+	MeshvarBnd<real_t>& operator=( const MeshvarBnd<real_t>& m )
+	{
+		this->m_nx = m.m_nx;
+		this->m_ny = m.m_ny;
+		this->m_nz = m.m_nz;
+		
+		if( m_pdata != NULL )
+			delete[] m_pdata;
+			
+		m_pdata = new real_t[m_nx*m_ny*m_nz];
+		
+		for( unsigned i=0; i<m_nx*m_ny*m_nz; ++i )
+			this->m_pdata[i] = m.m_pdata[i];
+		
+		return *this;
+	}
+
+	//! sets the value of all ghost zones to zero
+	void zero_bnd( void )
+	{
+		
+		int nx,ny,nz;
+		nx = this->size(0);
+		ny = this->size(1);
+		nz = this->size(2);
+		
+		
+		for( int j=-m_nbnd; j<ny+m_nbnd; ++j )
+			for( int k=-m_nbnd; k<nz+m_nbnd; ++k ){
+				for( int i=-m_nbnd;i<0;++i )
+				{
+					(*this)(i,j,k)  = 0.0;
+					(*this)(nx-1-i,j,k) = 0.0;	
+				}
+				
+			}
+		
+		for( int i=-m_nbnd; i<nx+m_nbnd; ++i )
+			for( int k=-m_nbnd; k<nz+m_nbnd; ++k ){
+				for( int j=-m_nbnd;j<0;++j )
+				{
+					(*this)(i,j,k) = 0.0;
+					(*this)(i,ny-j-1,k) = 0.0;
+				}
+				
+			}
+		
+		for( int i=-m_nbnd; i<nx+m_nbnd; ++i )
+			for( int j=-m_nbnd; j<ny+m_nbnd; ++j ){
+				for( int k=-m_nbnd;k<0;++k )
+				{
+					(*this)(i,j,k) = 0.0;
+					(*this)(i,j,nz-k-1) = 0.0;	
+				}
+				
+			}
+	}
+	
+	//! outputs the data, for debugging only, not practical for large datasets
+	void print( void ) const
+	{
+		int nbnd = m_nbnd;
+		
+		std::cout << "size is [" << this->size(0) << ", " << this->size(1) << ", " << this->size(2) << "]\n";
+		std::cout << "ghost region has length of " << nbnd << std::endl;
+		
+		std::cout.precision(3);
+		for(int i=-nbnd; i<(int)this->size(0)+nbnd; ++i )
+		{
+			std::cout << "ix = " << i << ": \n";
+			
+			for (int j=-nbnd; j<(int)this->size(1)+nbnd; ++j) {
+				for (int k=-nbnd; k<(int)this->size(2)+nbnd; ++k) {
+					if( i<0||i>=this->size(0)||j<0||j>=this->size(1)||k<0||k>=this->size(2))
+						std::cout << "[" << std::setw(6) << (*this)(i,j,k) << "] ";
+					else
+						std::cout << std::setw(8) <<  (*this)(i,j,k) << " ";
+				}
+				std::cout << std::endl;
+			}
+			
+			std::cout << std::endl;
+			
+		}
+	}
+};
+
+
+
+//! class that subsumes a nested grid collection
+template< typename T >
+class GridHierarchy
+{
+public:
+	
+	//! number of ghost cells on boundary
+	unsigned m_nbnd;
+	
+	//! highest level without adaptive refinement
+	unsigned m_levelmin;
+	
+	//! vector of pointers to the underlying rectangular mesh data for each level
+	std::vector< MeshvarBnd<T>* > m_pgrids;
+	
+	std::vector<int> 
+		m_xoffabs,		//!< vector of x-offsets of a level mesh relative to the coarser level
+		m_yoffabs,		//!< vector of x-offsets of a level mesh relative to the coarser level
+		m_zoffabs;		//!< vector of x-offsets of a level mesh relative to the coarser level
+	
+protected:
+	
+	//! check whether a given grid has identical hierarchy, dimensions to this 
+	bool is_consistent( const GridHierarchy<T>& gh )
+	{
+		if( gh.levelmax()!=levelmax() )
+			return false;
+		
+		if( gh.levelmin()!=levelmin() )
+			return false;
+		
+		for( unsigned i=levelmin(); i<=levelmax(); ++i )
+			for( int j=0; j<3; ++j )
+			{
+				if( size(i,j) != gh.size(i,j) )
+					return false;
+				if( offset(i,j) != gh.offset(i,j) )
+					return false;
+			}
+		
+		return true;
+	}
+	
+public:
+	
+	
+	//! return a pointer to the MeshvarBnd object representing data for one level
+	MeshvarBnd<T> *get_grid( unsigned ilevel )
+	{	
+
+		if( ilevel >= m_pgrids.size() )
+		{
+			std::cerr << "Attempt to access level " << ilevel << " but maxlevel = " << m_pgrids.size()-1 << std::endl;
+			throw std::runtime_error("Fatal: attempt to access non-existent grid");
+		}
+		return m_pgrids[ilevel];  
+	}
+
+	//! return a pointer to the MeshvarBnd object representing data for one level (const)	
+	const MeshvarBnd<T> *get_grid( unsigned ilevel ) const
+	{	
+		if( ilevel >= m_pgrids.size() )
+		{
+			std::cerr << "Attempt to access level " << ilevel << " but maxlevel = " << m_pgrids.size()-1 << std::endl;
+			throw std::runtime_error("Fatal: attempt to access non-existent grid");
+		}
+
+		return m_pgrids[ilevel];  
+	}
+	
+	
+	//! constructor for a collection of rectangular grids representing a multi-level hierarchy
+	/*! creates an empty hierarchy, levelmin is initially zero, no grids are stored
+	 * @param nbnd number of ghost zones added at the boundary
+	 */
+	explicit GridHierarchy( unsigned nbnd )
+	: m_nbnd( nbnd ), m_levelmin( 0 )
+	{
+		m_pgrids.clear();
+	}
+	
+	//! copy constructor
+	explicit GridHierarchy( const GridHierarchy<T> & gh )
+	{
+		for( unsigned i=0; i<=gh.levelmax(); ++i )
+			m_pgrids.push_back( new MeshvarBnd<T>( *gh.get_grid(i) ) );
+		
+		m_nbnd = gh.m_nbnd;
+		m_levelmin = gh.m_levelmin;
+		
+		m_xoffabs = gh.m_xoffabs;
+		m_yoffabs = gh.m_yoffabs;
+		m_zoffabs = gh.m_zoffabs;
+	}
+	
+	//! destructor
+	~GridHierarchy()
+	{
+		this->deallocate();
+		
+	}
+	
+	//! free all memory occupied by the grid hierarchy
+	void deallocate()
+	{
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			delete m_pgrids[i];
+		m_pgrids.clear();
+		
+		m_xoffabs.clear();
+		m_yoffabs.clear();
+		m_zoffabs.clear();
+		m_levelmin = 0;
+	}
+	
+	
+	//! get offset of a grid at specified refinement level
+	/*! the offset describes the shift of a refinement grid with respect to its coarser parent grid
+	 *  @param ilevel the level for which the offset is to be determined
+	 *  @param idim the dimension along which the offset is to be determined
+	 *  @return integer value denoting the offset in units of coarse grid cells
+	 *  @sa offset_abs
+	 */
+	int offset( int ilevel, int idim ) const
+	{
+		return m_pgrids[ilevel]->offset(idim);
+	}
+	
+	//! get size of a grid at specified refinement level
+	/*! the size describes the number of cells along one dimension of a grid
+	 *  @param ilevel the level for which the size is to be determined
+	 *  @param idim the dimension along which the size is to be determined
+	 *  @return integer value denoting the size of refinement grid at level ilevel along dimension idim
+	 */
+	int size( int ilevel, int idim ) const
+	{
+		return m_pgrids[ilevel]->size(idim);
+	}
+	
+	
+	//! get the absolute offset of a grid at specified refinement level
+	/*! the absolute offset describes the shift of a refinement grid with respect to the simulation volume
+	 *  @param ilevel the level for which the offset is to be determined
+	 *  @param idim the dimension along which the offset is to be determined
+	 *  @return integer value denoting the absolute offset in units of fine grid cells
+	 *  @sa offset
+	 */
+	int offset_abs( int ilevel, int idim ) const
+	{
+		if( idim == 0 ) return m_xoffabs[ilevel];
+		if( idim == 1 ) return m_yoffabs[ilevel];
+		return m_zoffabs[ilevel];
+	}
+	
+	
+	//! get the coordinate posisition of a grid cell 
+	/*! returns the position of a grid cell at specified level relative to the simulation volume
+	 *  @param ilevel the refinement level of the grid cell
+	 *  @param i the x-index of the cell in the level grid
+	 *  @param j the y-index of the cell in the level grid
+	 *  @param k the z-index of the cell in the level grid
+	 *  @param ppos pointer to a double[3] array to which the coordinates are written
+	 *  @return none
+	 */
+	void cell_pos( unsigned ilevel, int i, int j, int k, double* ppos ) const
+	{
+		double h = 1.0/pow(2,ilevel);//, htop = h*2.0;
+		ppos[0] = h*((double)offset_abs(ilevel,0)+(double)i+0.5);
+		ppos[1] = h*((double)offset_abs(ilevel,1)+(double)j+0.5);
+		ppos[2] = h*((double)offset_abs(ilevel,2)+(double)k+0.5);
+		
+		if( ppos[0] >= 1.0 || ppos[1] >= 1.0 || ppos[2] >= 1.0 )
+			std::cerr << " - Cell seems outside domain! : (" << ppos[0] << ", " << ppos[1] << ", " << ppos[2] << "\n";
+	}
+	
+	//! checks whether a given grid cell is refined
+	/*! a grid cell counts as refined if it is divided into 8 cells at the next higher level
+	 *  @param ilevel the refinement level of the grid cell
+	 *  @param i the x-index of the cell in the level grid
+	 *  @param j the y-index of the cell in the level grid
+	 *  @param k the z-index of the cell in the level grid
+	 *  @return true if cell is refined, false otherwise
+	 */
+	bool is_refined( unsigned ilevel, int i, int j, int k ) const
+	{
+		if( ilevel == levelmax() ) return false;
+		
+		if( i < offset(ilevel+1,0) || i >= offset(ilevel+1, 0)+size(ilevel+1,0)/2 ||
+		    j < offset(ilevel+1,1) || j >= offset(ilevel+1, 1)+size(ilevel+1,1)/2 ||
+		    k < offset(ilevel+1,2) || k >= offset(ilevel+1, 2)+size(ilevel+1,2)/2 )
+			return false;
+		
+		return true;
+	}
+	
+	//! sets the values of all grids on all levels to zero
+	void zero( void )
+	{
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			m_pgrids[i]->zero();
+	}
+	
+	
+	//! count the number of cells that are not further refined (=leafs)
+	/*! for allocation purposes it is useful to query the number of cells to be expected
+	 *  @param lmin the minimum refinement level to consider
+	 *  @param lmax the maximum refinement level to consider
+	 *  @return the integer number of cells between lmin and lmax that are not further refined
+	 */
+	unsigned count_leaf_cells( unsigned lmin, unsigned lmax ) const
+	{
+		unsigned npcount = 0;
+		
+		for( int ilevel=lmax; ilevel>=(int)lmin; --ilevel )
+			for( unsigned i=0; i<get_grid(ilevel)->size(0); ++i )
+				for( unsigned j=0; j<get_grid(ilevel)->size(1); ++j )
+					for( unsigned k=0; k<get_grid(ilevel)->size(2); ++k )
+						if( ! is_refined(ilevel,i,j,k) )
+							++npcount;
+		
+		return npcount;
+	}
+	
+	//! count the number of cells that are not further refined (=leafs)
+	/*! for allocation purposes it is useful to query the number of cells to be expected
+	 *  @return the integer number of cells in the hierarchy that are not further refined
+	 */
+	unsigned count_leaf_cells( void ) const
+	{
+		return count_leaf_cells( levelmin(), levelmax() );
+	}
+	
+	//! creates a hierarchy of coextensive grids, refined by factors of 2
+	/*! creates a hierarchy of lmax grids, each extending over the whole simulation volume with
+	 *  grid length 2^n for level 0<=n<=lmax
+	 *  @param lmax the maximum refinement level to be added (sets the resolution to 2^lmax for each dim)
+	 *  @return none
+	 */
+	void create_base_hierarchy( unsigned lmax )
+	{
+		unsigned n=1;
+		m_pgrids.clear();
+		for( unsigned i=0; i<= lmax; ++i )
+		{
+			//std::cout << "....adding level " << i << " (" << n << ", " << n << ", " << n << ")" << std::endl;
+			m_pgrids.push_back( new MeshvarBnd<T>( m_nbnd, n, n, n, 0, 0, 0 ) );
+			m_pgrids[i]->zero();
+     		n *= 2;
+			
+			m_xoffabs.push_back( 0 );
+			m_yoffabs.push_back( 0 );
+			m_zoffabs.push_back( 0 );
+		}
+		
+		m_levelmin = lmax;
+	}
+	
+	GridHierarchy<T>& operator*=( T x )
+	{
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			(*m_pgrids[i]) *= x;
+		return *this;
+	}
+	
+	GridHierarchy<T>& operator/=( T x )
+	{
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			(*m_pgrids[i]) /= x;
+		return *this;
+	}
+	
+	GridHierarchy<T>& operator+=( T x )
+	{
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			(*m_pgrids[i]) += x;
+		return *this;
+	}
+	
+	GridHierarchy<T>& operator-=( T x )
+	{
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			(*m_pgrids[i]) -= x;
+		return *this;
+	}
+	
+	GridHierarchy<T>& operator*=( const GridHierarchy& gh )
+	{
+		if( !is_consistent(gh) )
+			throw std::runtime_error("GridHierarchy::operator*= : attempt to operate on incompatible data");
+		
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			(*m_pgrids[i]) *= *gh.get_grid(i);
+		return *this;
+	}
+	
+	GridHierarchy<T>& operator/=( const GridHierarchy& gh )
+	{
+		if( !is_consistent(gh) )
+			throw std::runtime_error("GridHierarchy::operator/= : attempt to operate on incompatible data");
+		
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			(*m_pgrids[i]) /= *gh.get_grid(i);
+		return *this;
+	}
+	
+	GridHierarchy<T>& operator+=( const GridHierarchy& gh )
+	{
+		if( !is_consistent(gh) )
+			throw std::runtime_error("GridHierarchy::operator+= : attempt to operate on incompatible data");
+		
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			(*m_pgrids[i]) += *gh.get_grid(i);
+		return *this;
+	}
+	
+	GridHierarchy<T>& operator-=( const GridHierarchy& gh )
+	{
+		if( !is_consistent(gh) )
+			throw std::runtime_error("GridHierarchy::operator-= : attempt to operate on incompatible data");
+		
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			(*m_pgrids[i]) -= *gh.get_grid(i);
+		return *this;
+	}
+	
+	
+	
+	//... Xoff is in units of the coarser grid,
+	//... nX is the extent in fine grid cells
+	void add_patch( unsigned xoff, unsigned yoff, unsigned zoff, unsigned nx, unsigned ny, unsigned nz )
+	{
+		m_pgrids.push_back( new MeshvarBnd<T>( m_nbnd, nx, ny, nz, xoff, yoff, zoff ) );
+		m_pgrids.back()->zero();
+		
+		//.. add absolute offsets (in units of current level grid cells)
+		m_xoffabs.push_back( 2*(m_xoffabs.back() + xoff) );
+		m_yoffabs.push_back( 2*(m_yoffabs.back() + yoff) );
+		m_zoffabs.push_back( 2*(m_zoffabs.back() + zoff) );
+	}
+	
+	void cut_patch( unsigned ilevel, unsigned xoff, unsigned yoff, unsigned zoff, unsigned nx, unsigned ny, unsigned nz)
+	{
+		
+		unsigned dx,dy,dz,dxtop,dytop,dztop;
+		dx = xoff-m_xoffabs[ilevel];
+		dy = yoff-m_yoffabs[ilevel];
+		dz = zoff-m_zoffabs[ilevel];
+		
+		dxtop = m_pgrids[ilevel]->offset(0)+dx/2;
+		dytop = m_pgrids[ilevel]->offset(1)+dy/2;
+		dztop = m_pgrids[ilevel]->offset(2)+dz/2;
+		
+		
+		MeshvarBnd<T> *mnew = new MeshvarBnd<T>( m_nbnd, nx, ny, nz, dxtop, dytop, dztop );
+		
+		
+		//... copy data
+		for( unsigned i=0; i<nx; ++i )
+			for( unsigned j=0; j<ny; ++j )
+				for( unsigned k=0; k<nz; ++k )
+					//(*mnew)(i,j,k) = (*m_pgrids[ilevel])(i+xoff,j+yoff,k+zoff);
+					(*mnew)(i,j,k) = (*m_pgrids[ilevel])(i+dx,j+dy,k+dz);
+
+		//... replace in hierarchy
+		delete m_pgrids[ilevel];
+		m_pgrids[ilevel] = mnew;
+		
+		//... update offsets
+
+		m_xoffabs[ilevel] += dx;//xoff;
+		m_yoffabs[ilevel] += dy;//yoff;
+		m_zoffabs[ilevel] += dz;//zoff;
+		
+		
+		if( ilevel < levelmax() )
+		{
+			m_pgrids[ilevel+1]->offset(0) -= dx;//xoff;
+			m_pgrids[ilevel+1]->offset(1) -= dy;//yoff;
+			m_pgrids[ilevel+1]->offset(2) -= dz;//zoff;
+		}
+		
+		find_new_levelmin();
+	}
+	
+	void find_new_levelmin( void )
+	{
+		for( unsigned i=0; i<=levelmax(); ++i )
+		{
+			unsigned n = (unsigned)pow(2,i);
+			if(	m_pgrids[i]->size(0) == n &&
+			   	m_pgrids[i]->size(1) == n &&
+			   	m_pgrids[i]->size(2) == n )
+			{
+				m_levelmin=i;
+			}
+		}
+	}
+	
+	unsigned levelmax( void ) const
+	{
+		return m_pgrids.size()-1;
+	}
+	
+	unsigned levelmin( void ) const
+	{
+		return m_levelmin;
+	}
+	
+	GridHierarchy& operator=( const GridHierarchy<T>& gh )
+	{
+		for( unsigned i=0; i<m_pgrids.size(); ++i )
+			delete m_pgrids[i];
+		m_pgrids.clear();
+		
+		for( unsigned i=0; i<=gh.levelmax(); ++i )
+			m_pgrids.push_back( new MeshvarBnd<T>( *gh.get_grid(i) ) );
+		m_levelmin = gh.levelmin();
+		m_nbnd = gh.m_nbnd;
+		
+		m_xoffabs = gh.m_xoffabs;
+		m_yoffabs = gh.m_yoffabs;
+		m_zoffabs = gh.m_zoffabs;
+		
+		
+		return *this;
+	}
+};
+
+//! class that computes the refinement structure given parameters
+class refinement_hierarchy
+{
+	std::vector<double> x0_,y0_,z0_,xl_,yl_,zl_;
+	std::vector<unsigned> ox_,oy_,oz_,oax_,oay_,oaz_;
+	std::vector<unsigned> nx_,ny_,nz_;
+	unsigned levelmin_, levelmax_, padding_;
+	config_file& cf_;
+	bool align_top_;
+	float x0ref_[3], lxref_[3];
+	int xshift_[3];
+	
+public:
+	
+	refinement_hierarchy( const refinement_hierarchy& rh )
+	: cf_( rh.cf_ )
+	{
+		*this = rh;
+	}
+	
+	explicit refinement_hierarchy( config_file& cf )
+	: cf_( cf )
+	{
+		//... query the parameter data we need
+		levelmin_	= cf_.getValue<unsigned>("setup","levelmin");
+		levelmax_	= cf_.getValue<unsigned>("setup","levelmax");
+		padding_	= cf_.getValue<unsigned>("setup","padding");
+		align_top_	= cf_.getValue<bool>("setup","align_top");
+		
+		std::string temp;
+		
+		temp		= cf_.getValue<std::string>( "setup", "ref_offset" );
+		sscanf( temp.c_str(), "%g,%g,%g", &x0ref_[0], &x0ref_[1], &x0ref_[2] ); 
+		
+		temp		= cf_.getValue<std::string>( "setup", "ref_extent" );
+		sscanf( temp.c_str(), "%g,%g,%g", &lxref_[0],&lxref_[1],&lxref_[2] );
+		
+		unsigned ncoarse = (unsigned)pow(2,levelmin_);
+		
+		//... determine shift
+
+		double xc[3];
+		xc[0] = fmod(x0ref_[0]+0.5*lxref_[0],1.0);
+		xc[1] = fmod(x0ref_[1]+0.5*lxref_[1],1.0);
+		xc[2] = fmod(x0ref_[2]+0.5*lxref_[2],1.0);
+		xshift_[0] = (int)((0.5-xc[0])*ncoarse);
+		xshift_[1] = (int)((0.5-xc[1])*ncoarse);
+		xshift_[2] = (int)((0.5-xc[2])*ncoarse);
+		
+		char strtmp[32];
+		sprintf( strtmp, "%d", xshift_[0] );	cf_.insertValue( "setup", "shift_x", strtmp );
+		sprintf( strtmp, "%d", xshift_[1] );	cf_.insertValue( "setup", "shift_y", strtmp );
+		sprintf( strtmp, "%d", xshift_[2] );	cf_.insertValue( "setup", "shift_z", strtmp );
+		
+		
+		x0ref_[0] += (double)xshift_[0]/ncoarse;
+		x0ref_[1] += (double)xshift_[1]/ncoarse;
+		x0ref_[2] += (double)xshift_[2]/ncoarse;
+		
+		
+		//... initialize arrays 
+		x0_.assign(levelmax_+1,0.0);	xl_.assign(levelmax_+1,1.0);
+		y0_.assign(levelmax_+1,0.0);	yl_.assign(levelmax_+1,1.0);
+		z0_.assign(levelmax_+1,0.0);	zl_.assign(levelmax_+1,1.0);
+		ox_.assign(levelmax_+1,0);		nx_.assign(levelmax_+1,0);
+		oy_.assign(levelmax_+1,0);		ny_.assign(levelmax_+1,0);
+		oz_.assign(levelmax_+1,0);		nz_.assign(levelmax_+1,0);
+		
+		oax_.assign(levelmax_+1,0);
+		oay_.assign(levelmax_+1,0);
+		oaz_.assign(levelmax_+1,0);
+		
+		
+		nx_[levelmin_] = ncoarse;
+		ny_[levelmin_] = ncoarse;
+		nz_[levelmin_] = ncoarse;
+		
+		
+		//... determine the position of the refinement region on the finest grid
+		int il,jl,kl,ir,jr,kr;
+		int nresmax = (int)pow(2,levelmax_);
+		il = (int)(x0ref_[0] * nresmax);
+		jl = (int)(x0ref_[1] * nresmax);
+		kl = (int)(x0ref_[2] * nresmax);
+		ir = (int)((x0ref_[0]+lxref_[0]) * nresmax + 1.0);
+		jr = (int)((x0ref_[0]+lxref_[0]) * nresmax + 1.0);
+		kr = (int)((x0ref_[0]+lxref_[0]) * nresmax + 1.0);
+		
+		//... align with coarser grids ...
+		if( align_top_ )
+		{
+			//... require alignment with top grid
+			unsigned nref = (unsigned)pow(2,levelmax_-levelmin_)*2;
+			
+			il = (int)((double)il/nref)*nref;
+			jl = (int)((double)jl/nref)*nref;
+			kl = (int)((double)kl/nref)*nref;
+			
+			ir = (int)((double)ir/nref+1.0)*nref;
+			jr = (int)((double)jr/nref+1.0)*nref;
+			kr = (int)((double)kr/nref+1.0)*nref;
+			
+		}else{
+			//... require alignment with coarser grid
+			il -= il%2; jl -= jl%2; kl -= kl%2;
+			ir += ir%2; jr += jr%2; kr += kr%2; 
+		}
+		
+		if( levelmin_ != levelmax_ )
+		{
+			oax_[levelmax_] = (il+nresmax)%nresmax;
+			oay_[levelmax_] = (jl+nresmax)%nresmax;
+			oaz_[levelmax_] = (kl+nresmax)%nresmax;
+			nx_[levelmax_]  = ir-il;	
+			ny_[levelmax_]  = jr-jl;	
+			nz_[levelmax_]  = kr-kl;
+		}
+		
+		//... determine position of coarser grids
+		for( unsigned ilevel=levelmax_-1; ilevel> levelmin_; --ilevel )
+		{
+			il = (int)((double)il * 0.5 - padding_);
+			jl = (int)((double)jl * 0.5 - padding_);
+			kl = (int)((double)kl * 0.5 - padding_);
+			
+			ir = (int)((double)ir * 0.5 + padding_);
+			jr = (int)((double)jr * 0.5 + padding_);
+			kr = (int)((double)kr * 0.5 + padding_);
+			
+			//... align with coarser grids ...
+			if( align_top_ )
+			{
+				//... require alignment with top grid
+				unsigned nref = (unsigned)pow(2,ilevel-levelmin_);
+				
+				il = (int)((double)il/nref)*nref;
+				jl = (int)((double)jl/nref)*nref;
+				kl = (int)((double)kl/nref)*nref;
+				
+				ir = (int)((double)ir/nref+1.0)*nref;
+				jr = (int)((double)jr/nref+1.0)*nref;
+				kr = (int)((double)kr/nref+1.0)*nref;
+				
+			}else{
+				//... require alignment with coarser grid
+				il -= il%2; jl -= jl%2; kl -= kl%2;
+				ir += ir%2; jr += jr%2; kr += kr%2; 
+			}
+			
+			oax_[ilevel] = il;		oay_[ilevel] = jl;		oaz_[ilevel] = kl;
+			nx_[ilevel]  = ir-il;	ny_[ilevel]  = jr-jl;	nz_[ilevel]  = kr-kl;
+			
+		}
+		
+		//... determine relative offsets between grids
+		for( unsigned ilevel=levelmax_; ilevel>levelmin_; --ilevel )
+		{
+			ox_[ilevel] = (oax_[ilevel]/2 - oax_[ilevel-1]);
+			oy_[ilevel] = (oay_[ilevel]/2 - oay_[ilevel-1]);
+			oz_[ilevel] = (oaz_[ilevel]/2 - oaz_[ilevel-1]);
+		}
+		
+		for( unsigned ilevel=levelmin_+1; ilevel<=levelmax_; ++ilevel )
+		{
+			double h = 1.0/pow(2,ilevel);
+			
+			x0_[ilevel] = h*(double)oax_[ilevel];
+			y0_[ilevel] = h*(double)oay_[ilevel];
+			z0_[ilevel] = h*(double)oaz_[ilevel];
+			
+			xl_[ilevel] = h*(double)nx_[ilevel];
+			yl_[ilevel] = h*(double)ny_[ilevel];
+			zl_[ilevel] = h*(double)nz_[ilevel];
+		}
+		
+		for( unsigned ilevel=0; ilevel <=levelmin_; ++ilevel )
+		{
+			unsigned n = (unsigned)pow(2,ilevel);
+			
+			xl_[ilevel] = yl_[ilevel] = zl_[ilevel] = 1.0;
+			nx_[ilevel] = ny_[ilevel] = nz_[ilevel] = n;
+		}
+	}
+	
+	refinement_hierarchy& operator=( const refinement_hierarchy& o )
+	{
+		levelmin_ = o.levelmin_;
+		levelmax_ = o.levelmax_;
+		padding_  = o.padding_;
+		cf_ = o.cf_;
+		align_top_ = o.align_top_;
+		for( int i=0; i<3; ++i )
+		{
+			x0ref_[i] = o.x0ref_[i];
+			lxref_[i] = o.lxref_[i];
+			xshift_[i] = o.xshift_[i];
+		}
+		
+		x0_ = o.x0_; y0_ = o.y0_; z0_ = o.z0_;
+		xl_ = o.xl_; yl_ = o.yl_; zl_ = o.zl_;
+		ox_ = o.ox_; oy_ = o.oy_; oz_ = o.oz_;
+		oax_= o.oax_; oay_ = o.oay_; oaz_ = o.oaz_;
+		nx_ = o.nx_; ny_=o.ny_; nz_=o.nz_;
+		
+		return *this;
+	}
+	
+	void adjust_level( unsigned ilevel, int nx, int ny, int nz, int oax, int oay, int oaz )
+	{
+		double h = 1.0/pow(2,ilevel);
+		
+		int dx,dy,dz;
+		
+		dx = oax_[ilevel] - oax;
+		dy = oay_[ilevel] - oay;
+		dz = oaz_[ilevel] - oaz;
+		
+		ox_[ilevel] -= dx/2;
+		oy_[ilevel] -= dy/2;
+		oz_[ilevel] -= dz/2;
+		
+		oax_[ilevel] = oax; 
+		oay_[ilevel] = oay; 
+		oaz_[ilevel] = oaz;
+		
+		nx_[ilevel] = nx;
+		ny_[ilevel] = ny;
+		nz_[ilevel] = nz;
+		
+		x0_[ilevel] = h*oax;
+		y0_[ilevel] = h*oay;
+		z0_[ilevel] = h*oaz;
+		
+		xl_[ilevel] = h*nx;
+		yl_[ilevel] = h*ny;
+		zl_[ilevel] = h*nz;
+		
+		if( ilevel < levelmax_ )
+		{
+			ox_[ilevel+1] += dx;
+			oy_[ilevel+1] += dy;
+			oz_[ilevel+1] += dz;
+		}
+		
+		find_new_levelmin();
+		
+	}
+	
+	void find_new_levelmin( void )
+	{
+		for( unsigned i=0; i<=levelmax(); ++i )
+		{
+			unsigned n = (unsigned)pow(2,i);
+			if(	oax_[i]==0 && oay_[i]==0 && oaz_[i]==0
+			   && nx_[i]==n && ny_[i]==n && nz_[i]==n )
+			{
+				levelmin_=i;
+			}
+		}
+		
+		std::cerr << " - refinement_hierarchy: set new levelmin to " << levelmin_ << std::endl;
+	}
+	
+	unsigned offset_abs( unsigned ilevel, int dim ) const
+	{
+		if( dim==0 )
+			return oax_.at(ilevel);
+		if( dim==1 )
+			return oay_.at(ilevel);
+		return oaz_.at(ilevel);
+	}
+	
+	unsigned offset( unsigned ilevel, int dim ) const
+	{
+		if( dim==0 )
+			return ox_.at(ilevel);
+		if( dim==1 )
+			return oy_.at(ilevel);
+		return oz_.at(ilevel);
+	}
+	
+	unsigned size( unsigned ilevel, int dim ) const
+	{
+		if( dim==0 )
+			return nx_.at(ilevel);
+		if( dim==1 )
+			return ny_.at(ilevel);
+		return nz_.at(ilevel);
+	}
+	
+	unsigned levelmin( void ) const
+	{	return levelmin_;	}
+	
+	unsigned levelmax( void ) const
+	{	return levelmax_;	}
+	
+	
+	
+	void output( void )
+	{
+		std::cout << "-------------------------------------------------------------\n";
+		
+		if( xshift_[0]!=0||xshift_[1]!=0||xshift_[2]!=0 )
+			std::cout 
+				<< " - Domain will be shifted by (" << xshift_[0] << ", " << xshift_[1] << ", " << xshift_[2] << ")\n" << std::endl
+				<< " - Grid structure:\n";
+		
+		for( unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			std::cout 
+			<< "     Level " << std::setw(3) << ilevel << " :   offset = (" << std::setw(5) << ox_[ilevel] << ", " << std::setw(5) << oy_[ilevel] << ", " << std::setw(5) << oz_[ilevel] << ")\n"
+			<< "                   size   = (" << std::setw(5) << nx_[ilevel] << ", " << std::setw(5) << ny_[ilevel] << ", " << std::setw(5) << nz_[ilevel] << ")\n";
+		}
+		std::cout << "-------------------------------------------------------------\n";
+		
+	}
+};
+
+#endif
+
diff --git a/mg_interp.hh b/mg_interp.hh
new file mode 100644
index 0000000..9afcad5
--- /dev/null
+++ b/mg_interp.hh
@@ -0,0 +1,1267 @@
+/*
+ *  mg_interp.hh
+ *  FROLIC_mg
+ *
+ *  Created by Oliver Hahn on 5/27/10.
+ *  Copyright 2010 KIPAC/Stanford University. All rights reserved.
+ *
+ */
+
+#ifndef __MG_INTERP_HH
+#define __MG_INTERP_HH
+
+#include "mg_operators.hh"
+
+
+struct coarse_fine_interpolation
+{
+	template< class G >
+	void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine )
+	{ }
+};
+
+
+inline double interp2( double x1, double x2, double x3, double f1, double f2, double f3, double x )
+{
+	double a,b,c;	
+	a = (x1 * f3 - x3 * f1 - x2 * f3 - x1 * f2 + x2 * f1 + x3 * f2) / (x1 * x3 * x3 - x2 * x3 * x3 + x2 * x1 * x1 - x3 * x1 * x1 + x3 * x2 * x2 - x1 * x2 * x2);
+	b = -(x1 * x1 * f3 - x1 * x1 * f2 - f1 * x3 * x3 + f2 * x3 * x3 - x2 * x2 * f3 + f1 * x2 * x2) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3);
+	c = (x1 * x1 * x2 * f3 - x1 * x1 * x3 * f2 - x2 * x2 * x1 * f3 + f2 * x1 * x3 * x3 + x2 * x2 * x3 * f1 - f1 * x2 * x3 * x3) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3);
+	
+	return a*x*x+b*x+c;
+}
+
+
+
+
+
+
+inline double interp4left( double f0, double f1, double f2, double f3, double f4 )
+{
+	//return -4.0/231.0*f0+4.0/7.0*f1+5.0/7.0*f2-1.0/3.0*f3+5./77.0*f4; 
+	return 1.0/13.0*f0-21./55.*f1+7.0/9.0*f2+8./15.*f3-8./1287*f4;
+}
+
+inline double interp4right( double f0, double f1, double f2, double f3, double f4 )
+{
+	return interp4left(f4,f3,f2,f1,f0);
+}
+
+inline double interp4lleft( double f0, double f1, double f2, double f3, double f4 )
+{
+	//return 16./231.*f0+48.0/35.0*f1-6.0/7.0*f2-8.0/15.0*f3-9./77.*f4;
+	return -15./91.*f0+8./11.*f1+-10./9.*f2+32./21.*f3+32./1287.*f4;
+}
+
+inline double interp4rright( double f0, double f1, double f2, double f3, double f4 )
+{
+	return interp4lleft(f4,f3,f2,f1,f0);
+}
+
+inline double interp4( double fm2, double fm1, double f0, double fp1, double fp2, double x )
+{
+	double x2 = x*x, x3=x2*x, x4=x3*x;
+	double a,b,c,d,e;
+	
+	a= 1.0/24.0*fm2-1.0/6.0*fm1+0.25*f0-1.0/6.0*fp1+1.0/24.0*fp2;
+	b=-1.0/12.0*fm2+1.0/6.0*fm1-1.0/6.0*fp1+1.0/12.0*fp2;
+	c=-1.0/24.0*fm2+2.0/3.0*fm1-5.0/4.0*f0+2.0/3.0*fp1-1.0/24.0*fp2;
+	d= 1.0/12.0*fm2-2.0/3.0*fm1+2.0/3.0*fp1-1.0/12.0*fp2;
+	e=f0;
+	
+	return a*x4+b*x3+c*x2+d*x+e;
+}
+
+inline double interp4( double* f, double x )
+{
+	double x2 = x*x, x3=x2*x, x4=x3*x;
+	double a,b,c,d,e;
+	
+	a= 1.0/24.0*f[0]-1.0/6.0*f[1]+0.25*f[2]-1.0/6.0*f[3]+1.0/24.0*f[4];
+	b=-1.0/12.0*f[0]+1.0/6.0*f[1]-1.0/6.0*f[3]+1.0/12.0*f[4];
+	c=-1.0/24.0*f[0]+2.0/3.0*f[1]-5.0/4.0*f[2]+2.0/3.0*f[3]-1.0/24.0*f[4];
+	d= 1.0/12.0*f[0]-2.0/3.0*f[1]+2.0/3.0*f[3]-1.0/12.0*f[4];
+	e=f[2];
+	
+	return a*x4+b*x3+c*x2+d*x+e;
+}
+
+
+inline double interp6( double *f, double x )
+{
+	double x2 = x*x, x3=x2*x, x4=x3*x, x5=x4*x, x6=x5*x;
+	double a,b,c,d,e,g,h;
+	
+	a=(f[0]-6.*f[1]+15.*f[2]-20.*f[3]+15.*f[4]-6.*f[5]+f[6])/720.;
+	b=(-3.*f[0]+12.*f[1]-15.*f[2]+15*f[4]-12.*f[5]+3.*f[6])/720.;
+	c=(-5.*f[0]+60.*f[1]-195.*f[2]+280.*f[3]-195.*f[4]+60.*f[5]-5.*f[6])/720.;
+	d=(15.*f[0]-120.*f[1]+195.*f[2]-195.*f[4]+120.*f[5]-15.*f[6])/720.;
+	e=(4.*f[0]-54.*f[1]+540.*f[2]-980.*f[3]+540.*f[4]-54.*f[5]+4.*f[6])/720.;
+	g=(-12.*f[0]+108.*f[1]-540.*f[2]+540.*f[4]-108.*f[5]+12.*f[6])/720.;
+	h=f[3];
+	
+	return a*x6+b*x5+c*x4+d*x3+e*x2+g*x+h;
+	
+}
+
+inline double interp6( double f0, double f1, double f2, double f3, double f4, double f5, double f6, double x )
+{
+	double x2 = x*x, x3=x2*x, x4=x3*x, x5=x4*x, x6=x5*x;
+	double a,b,c,d,e,g,h;
+	double f[7]={
+		f0,f1,f2,f3,f4,f5,f6
+	};
+	
+	a=(f[0]-6.*f[1]+15.*f[2]-20.*f[3]+15.*f[4]-6.*f[5]+f[6])/720.;
+	b=(-3.*f[0]+12.*f[1]-15.*f[2]+15*f[4]-12.*f[5]+3.*f[6])/720.;
+	c=(-5.*f[0]+60.*f[1]-195.*f[2]+280.*f[3]-195.*f[4]+60.*f[5]-5.*f[6])/720.;
+	d=(15.*f[0]-120.*f[1]+195.*f[2]-195.*f[4]+120.*f[5]-15.*f[6])/720.;
+	e=(4.*f[0]-54.*f[1]+540.*f[2]-980.*f[3]+540.*f[4]-54.*f[5]+4.*f[6])/720.;
+	g=(-12.*f[0]+108.*f[1]-540.*f[2]+540.*f[4]-108.*f[5]+12.*f[6])/720.;
+	h=f[3];
+	
+	return a*x6+b*x5+c*x4+d*x3+e*x2+g*x+h;
+	
+}
+
+inline double interp2( double fleft, double fcenter, double fright, double x )
+{
+	double a,b,c;
+	a = 0.5*(fleft+fright)-fcenter;
+	b = 0.5*(fright-fleft);
+	c = fcenter;
+	
+	return a*x*x+b*x+c;
+}
+
+
+inline double interp2left( double fleft, double fcenter, double fright )
+{
+	double a,b,c;
+	a = (6.0*fright-10.0*fcenter+4.0*fleft)/15.0;
+	b = (-4.0*fleft+9.0*fright-5.0*fcenter)/15.0;
+	c = fcenter;
+	
+	return a-b+c;
+}
+
+inline double interp2right( double fleft, double fcenter, double fright )
+{
+	double a,b,c;
+	a = (6.0*fleft-10.0*fcenter+4.0*fright)/15.0;
+	b = (4.0*fright-9.0*fleft+5.0*fcenter)/15.0;
+	c = fcenter;
+	
+	return a+b+c;
+}
+
+inline double interp2lleft( double fleft, double fcenter, double fright )
+{
+	double a,b,c;
+	a = (-10.0*fcenter+4.0*fleft+6.0*fright)/15.0;
+	b = (-12.0*fleft+15.0*fcenter-3.0*fright)/15.0;
+	c = (-3.0*fright+10.0*fcenter+8.0*fleft)/15.0;
+	
+	return a-b+c;
+}
+
+inline double interp2rright( double fleft, double fcenter, double fright )
+{
+	double a,b,c;
+	a = (-10.0*fcenter+4.0*fleft+6.0*fright)/15.0;
+	b = (-12.0*fleft+15.0*fcenter-3.0*fright)/15.0;
+	c = (-3.0*fright+10.0*fcenter+8.0*fleft)/15.0;
+	
+	return a-b+c;
+}
+
+
+inline double interp6left( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
+{
+	//return -77./2565.*f6+44./221.*f5-14./25.*f4+308./351.*f3+352./675.*f2-16./1755.*f1+112./104975.*f0;
+	return 4./2431.*f0-24./1001.*f1+4./7.*f2+60./77.*f3-6./13.*f4+12./77.*f5-5./221.*f6;
+}
+
+inline double interp6lleft( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
+{
+	//return -16./4199.*f0+16./429.*f1+32./21.*f2-675./1547.*f5-50./39.*f3+12./11.*f4+4./57.*f6;
+	return -12./2431.*f0+40./429.*f1+4./3.*f2-10./11.*f3+28./39.*f4-3./11.*f5+28./663.*f6;
+}
+
+inline double interp6llleft( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
+{
+	//return -432./20995.*f0+112./429.*f1+32./15.*f2-324./221.*f5-140./39.*f3+189./55.*f4+14./57.*f6;
+	return -36./2431.*f0+600./1001.*f1+20./21.*f2-100./77.*f3+15./13.*f4-36./77.*f5+50./663.*f6;
+}
+
+inline double interp6right( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
+{
+	return interp6left(f6,f5,f4,f3,f2,f1,f0);
+}
+inline double interp6rright( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
+{
+	return interp6lleft(f6,f5,f4,f3,f2,f1,f0);
+}
+inline double interp6rrright( double f0, double f1, double f2, double f3, double f4, double f5, double f6 )
+{
+	return interp6llleft(f6,f5,f4,f3,f2,f1,f0);
+}
+
+
+
+
+#include "fd_schemes.hh"
+
+struct cubic_interp
+: public coarse_fine_interpolation
+{
+	template< class G >
+	void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine )
+	{
+		mg_cubic().prolong_bnd( coarse, fine );
+		
+		
+		//return;
+		//......................................................
+		
+		G *u    = &fine;
+		G *utop = &coarse;
+		
+		int
+		xoff = u->offset(0),
+		yoff = u->offset(1),
+		zoff = u->offset(2);
+		
+		//... don't do anything if we are not an additional refinement region
+		if( xoff == 0 && yoff == 0 && zoff == 0 )
+			return;
+		
+		int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+		
+#pragma omp parallel for schedule(dynamic)
+		for( int ix=-1; ix<=nx; ++ix )
+			for( int iy=-1; iy<=ny; ++iy )
+				for( int iz=-1; iz<=nz; ++iz )
+				{
+					bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
+					
+					//if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz)
+					if( xbnd || ybnd || zbnd )
+						//if( xbnd ^ ybnd ^ zbnd )
+					{
+						
+						//... only deal with proper ghostzones
+						if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
+							continue;
+						
+						int ixtop = (int)(0.5*(double)(ix))+xoff;
+						int iytop = (int)(0.5*(double)(iy))+yoff;
+						int iztop = (int)(0.5*(double)(iz))+zoff;
+						
+						if( ix==-1 ) ixtop=xoff-1;
+						if( iy==-1 ) iytop=yoff-1;
+						if( iz==-1 ) iztop=zoff-1;
+
+						double coarse_flux, fine_flux, dflux;
+				
+						//double fac = 0.125*27.0/24.0, fac2 = -0.125*1.0/24.0;//0.125;//24.0/26.0*0.125*0.5;
+						double fac = 0.5*0.125*27.0/24.0, fac2 = -0.5*0.125*1.0/24.0;
+						//double fac = 0.125*15.0/12.0, fac2 = -0.125*1.0/12.0;
+						
+						if(xbnd)
+						{
+							if( ix==-1 )
+							{
+								
+								fine_flux = 0.0;
+								fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy+1,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy,iz+1);
+								fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy+1,iz+1);
+								
+								coarse_flux = Laplace_flux_O4<double>().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
+								fine_flux /= 4.0;
+							
+								dflux = coarse_flux - fine_flux;
+								
+								for(int j=0;j<2;++j)
+									for( int k=0;k<2;++k)
+									{
+										(*u)(ix,iy+j,iz+k)   += fac*dflux;
+										(*u)(ix-1,iy+j,iz+k) += fac2*dflux;
+									}
+								//(*u)(ix,iy+j,iz+k) += 24.0/27.0*dflux;
+							}
+							else if( ix==nx )									
+							{
+								
+								fine_flux = 0.0;
+								fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy+1,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy,iz+1);
+								fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy+1,iz+1);
+								
+								coarse_flux = Laplace_flux_O4<double>().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
+								fine_flux /= 4.0;
+								
+								dflux = coarse_flux - fine_flux;
+								
+								for(int j=0;j<2;++j)
+									for( int k=0;k<2;++k)
+									{
+										(*u)(ix,iy+j,iz+k)   += fac*dflux;
+										(*u)(ix+1,iy+j,iz+k) += fac2*dflux;
+									}
+								//(*u)(ix,iy+j,iz+k) += 24.0/27.0*dflux;
+							}
+						}
+						
+						if(ybnd)
+						{
+							if( iy==-1 )
+							{
+								
+								fine_flux = 0.0;
+								fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix,iy+1,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix+1,iy+1,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix,iy+1,iz+1);
+								fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix+1,iy+1,iz+1);
+								
+								coarse_flux = Laplace_flux_O4<double>().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
+								fine_flux /= 4.0;
+								
+								dflux = coarse_flux - fine_flux;
+								
+								for(int i=0;i<2;++i)
+									for( int k=0;k<2;++k)
+									{
+										(*u)(ix+i,iy,iz+k)   += fac*dflux;
+										(*u)(ix+i,iy-1,iz+k) += fac2*dflux;
+									}
+							}
+							else if( iy==ny )									
+							{
+								
+								fine_flux = 0.0;
+								fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix,iy,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix+1,iy,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix,iy,iz+1);
+								fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix+1,iy,iz+1);
+								
+								coarse_flux = Laplace_flux_O4<double>().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
+								fine_flux /= 4.0;
+								
+								dflux = coarse_flux - fine_flux;
+								
+								for(int i=0;i<2;++i)
+									for( int k=0;k<2;++k)
+									{
+										(*u)(ix+i,iy,iz+k)   += fac*dflux;
+										(*u)(ix+i,iy+1,iz+k) += fac2*dflux;
+									}
+							}
+						}
+						
+						if(zbnd)
+						{
+							if( iz==-1 )
+							{
+								
+								fine_flux = 0.0;
+								fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix,iy,iz+1);
+								fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix+1,iy,iz+1);
+								fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix,iy+1,iz+1);
+								fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix+1,iy+1,iz+1);
+								
+								coarse_flux = Laplace_flux_O4<double>().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
+								fine_flux /= 4.0;
+								
+								dflux = coarse_flux - fine_flux;
+								
+								for(int i=0;i<2;++i)
+									for( int j=0;j<2;++j)
+									{
+										(*u)(ix+i,iy+j,iz)   += fac*dflux;
+										(*u)(ix+i,iy+j,iz-1) += fac2*dflux;
+									}
+							}
+							else if( iz==nz )									
+							{
+								
+								fine_flux = 0.0;
+								fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix,iy,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix+1,iy,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix,iy+1,iz);
+								fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix+1,iy+1,iz);
+								
+								coarse_flux = Laplace_flux_O4<double>().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
+								fine_flux /= 4.0;
+								
+								dflux = coarse_flux - fine_flux;
+								
+								for(int i=0;i<2;++i)
+									for( int j=0;j<2;++j)
+									{
+										(*u)(ix+i,iy+j,iz)   += fac*dflux;
+										(*u)(ix+i,iy+j,iz+1) += fac2*dflux;
+									}
+							}
+						}
+					}
+				}
+		
+	}
+};
+
+
+
+struct interp_O3_fluxcorr
+: public coarse_fine_interpolation
+{
+	template< class G >
+	void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine )
+	{
+		
+		//... use cubic interpolation to get all values on boundary
+		mg_cubic().prolong_bnd( coarse, fine );
+
+		//... use flux corrected quadratic interpolation for the
+		//... the boundary overlapped by the Laplace stencil
+		G *u    = &fine;
+		G *utop = &coarse;
+		
+		int
+			xoff = u->offset(0),
+			yoff = u->offset(1),
+			zoff = u->offset(2);
+		
+		//... don't do anything if we are not an additional refinement region
+		if( xoff == 0 && yoff == 0 && zoff == 0 )
+			return;
+		
+		int
+			nx = u->size(0), 
+			ny = u->size(1), 
+			nz = u->size(2);
+		
+		//... set boundary condition for fine grid
+		#pragma omp parallel for schedule(dynamic)
+		for( int ix=-1; ix<=nx; ++ix )
+			for( int iy=-1; iy<=ny; ++iy )
+				for( int iz=-1; iz<=nz; ++iz )
+				{
+					bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
+					
+					if( xbnd || ybnd || zbnd )
+					{
+						
+						//... only deal with proper ghostzones
+						if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
+							continue;
+						
+						int ixtop = (int)(0.5*(double)(ix))+xoff;
+						int iytop = (int)(0.5*(double)(iy))+yoff;
+						int iztop = (int)(0.5*(double)(iz))+zoff;
+						
+						if( ix==-1 ) ixtop=xoff-1;
+						if( iy==-1 ) iytop=yoff-1;
+						if( iz==-1 ) iztop=zoff-1;
+						
+						double ustar1, ustar2, ustar3, uhat;			
+						double fac = 0.5;//0.25;
+						double flux;;
+						if( ix == -1 && iy%2==0 && iz%2==0 )
+						{
+							flux = 0.0;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
+									ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
+									ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
+									
+									uhat   = interp2( ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+									
+									(*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) );
+									
+									flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k));
+								}
+							
+							flux /= 4.0;
+							
+							double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix,iy+j,iz+k) -= dflux;
+						}
+						// right boundary
+						if( ix == nx && iy%2==0 && iz%2==0 )
+						{
+							flux = 0.0;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
+									ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
+									ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
+									
+									uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+									
+									(*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat );
+									flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k));
+								}
+							flux /= 4.0;
+							double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix,iy+j,iz+k) += dflux;
+						}
+
+						// bottom boundary
+						if( iy == -1 && ix%2==0 && iz%2==0 )
+						{
+							flux = 0.0;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{
+									ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
+									ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+									ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
+									
+									uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+									
+									(*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) );
+									
+									flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k));
+								}
+							flux /= 4.0;
+							double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix+j,iy,iz+k) -= dflux;
+						}
+						// top boundary
+						if( iy == ny && ix%2==0 && iz%2==0 )
+						{
+							flux = 0.0;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
+									ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+									ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
+									
+									uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+									
+									(*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat  );
+									
+									flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k));
+								}
+							flux /= 4.0;
+							double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix+j,iy,iz+k) += dflux;
+						}
+
+						// front boundary
+						if( iz == -1 && ix%2==0 && iy%2==0 )
+						{
+							flux = 0.0;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
+									ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+									ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
+									
+									uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+									
+									(*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) );
+									
+									flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz));
+								}
+							flux /= 4.0;
+							double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix+j,iy+k,iz) -= dflux;
+						}
+
+						// back boundary
+						if( iz == nz && ix%2==0 && iy%2==0 )
+						{
+							flux = 0.0;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
+									ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+									ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
+									
+									uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+									
+									(*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat );
+									
+									flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1));
+								}
+							flux /= 4.0;
+							double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux;
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix+j,iy+k,iz) += dflux;
+						}
+						
+					}
+				}
+		
+	}
+};
+
+struct interp_O5_fluxcorr
+: public coarse_fine_interpolation
+{
+	
+		
+	template< class G >
+	void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine )
+	{
+		
+		//... use cubic interpolation to get all values on boundary
+		mg_cubic().prolong_bnd( coarse, fine );
+		
+		//... use flux corrected quadratic interpolation for the
+		//... the boundary overlapped by the Laplace stencil
+		G *u    = &fine;
+		G *utop = &coarse;
+		
+		
+		int
+			xoff = u->offset(0),
+			yoff = u->offset(1),
+			zoff = u->offset(2);
+			
+		//... don't do anything if we are not an additional refinement region
+		if( xoff == 0 && yoff == 0 && zoff == 0 )
+			return;
+		
+		int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+		
+		//... set boundary condition for fine grid
+		#pragma omp parallel for schedule(dynamic)
+		for( int ix=-1; ix<=nx; ++ix )
+			for( int iy=-1; iy<=ny; ++iy )
+				for( int iz=-1; iz<=nz; ++iz )
+				{
+					bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
+					bool bnd=xbnd|ybnd|zbnd;
+					
+					if( bnd )
+					{
+						
+						int ixtop = (int)(0.5*(double)(ix))+xoff;
+						int iytop = (int)(0.5*(double)(iy))+yoff;
+						int iztop = (int)(0.5*(double)(iz))+zoff;
+						
+						if( ix==-1 ) ixtop=xoff-1;
+						if( iy==-1 ) iytop=yoff-1;
+						if( iz==-1 ) iztop=zoff-1;
+						
+						double ustar[5], uhat[2];			
+						double fac = 0.5;
+						
+						double coarse_flux, fine_flux, dflux;
+						
+						double ffac = 12./14.;
+									
+						// left boundary
+						if( ix == -1 && iy%2==0 && iz%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									for( int p=0; p<2; ++p )
+									{
+										for( int q=-2;q<=2;++q )
+											ustar[q+2] = interp4( (*utop)(ixtop+p-1,iytop-2,iztop+q), (*utop)(ixtop+p-1,iytop-1,iztop+q), 
+																(*utop)(ixtop+p-1,iytop,iztop+q),   (*utop)(ixtop+p-1,iytop+1,iztop+q), 
+																(*utop)(ixtop+p-1,iytop+2,iztop+q), fac*((double)j-0.5) );
+										uhat[p] = interp4( ustar, fac*((double)k-0.5)-1.5 );
+									}
+									
+									(*u)(ix,iy+j,iz+k)   = interp4left( uhat[0], uhat[1], (*u)(ix+1,iy+j,iz+k), 
+																	   (*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k) );
+									(*u)(ix-1,iy+j,iz+k) = interp4lleft( uhat[0], uhat[1], (*u)(ix+1,iy+j,iz+k), 
+																		(*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k) );
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy+1,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy,iz+1);
+							fine_flux += Laplace_flux_O4<double>().apply_x(-1,*u,ix+1,iy+1,iz+1);
+							fine_flux /= 4.0;
+							
+							coarse_flux = Laplace_flux_O4<double>().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int j=0;j<2;++j)
+								for( int k=0;k<2;++k)
+								{
+									(*u)(ix,iy+j,iz+k)   += ffac*dflux;
+									(*u)(ix-1,iy+j,iz+k) += ffac*dflux;
+								}
+							
+							
+						}
+						// right boundary
+						if( ix == nx && iy%2==0 && iz%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									for( int p=0; p<2; ++p )
+									{
+										for( int q=-2;q<=2;++q )
+											ustar[q+2] = interp4( (*utop)(ixtop+p,iytop-2,iztop+q), (*utop)(ixtop+p,iytop-1,iztop+q), 
+																 (*utop)(ixtop+p,iytop,iztop+q),   (*utop)(ixtop+p,iytop+1,iztop+q), 
+																 (*utop)(ixtop+p,iytop+2,iztop+q), fac*((double)j-0.5) );
+										uhat[p] = interp4( ustar, fac*((double)k-0.5)-1.5 );
+									}
+									
+									(*u)(ix,iy+j,iz+k)   = interp4right( (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 
+																		(*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1] );
+									(*u)(ix+1,iy+j,iz+k) = interp4rright( (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 
+																		 (*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1] );
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy+1,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy,iz+1);
+							fine_flux += Laplace_flux_O4<double>().apply_x(+1,*u,ix,iy+1,iz+1);
+							
+							coarse_flux = Laplace_flux_O4<double>().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int j=0;j<2;++j)
+								for( int k=0;k<2;++k)
+								{
+									(*u)(ix,iy+j,iz+k)   += ffac*dflux;
+									(*u)(ix+1,iy+j,iz+k) += ffac*dflux;
+								}
+							
+						}
+						// bottom boundary
+						if( iy == -1 && ix%2==0 && iz%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{	
+									for( int p=0; p<2; ++p )
+									{
+										for( int q=-2;q<=2;++q )
+											ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+p-1,iztop+q), (*utop)(ixtop-1,iytop+p-1,iztop+q), 
+																 (*utop)(ixtop,iytop+p-1,iztop+q),   (*utop)(ixtop+1,iytop+p-1,iztop+q), 
+																 (*utop)(ixtop+2,iytop+p-1,iztop+q), fac*((double)j-0.5) );
+										uhat[p] = interp4( ustar, fac*((double)k-0.5)-1.5 );
+									}
+									
+									(*u)(ix+j,iy,iz+k)   = interp4left( uhat[0], uhat[1], (*u)(ix+j,iy+1,iz+k), 
+																	   (*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k) );									
+									(*u)(ix+j,iy-1,iz+k) = interp4lleft( uhat[0], uhat[1], (*u)(ix+j,iy+1,iz+k), 
+																		(*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k) );
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix,iy+1,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix+1,iy+1,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix,iy+1,iz+1);
+							fine_flux += Laplace_flux_O4<double>().apply_y(-1,*u,ix+1,iy+1,iz+1);
+							
+							coarse_flux = Laplace_flux_O4<double>().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int i=0;i<2;++i)
+								for( int k=0;k<2;++k)
+								{
+									(*u)(ix+i,iy,iz+k)   += ffac*dflux;
+									(*u)(ix+i,iy-1,iz+k) += ffac*dflux;
+								}
+							
+						}
+						// top boundary
+						if( iy == ny && ix%2==0 && iz%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{	
+									for( int p=0; p<2; ++p )
+									{
+										for( int q=-2;q<=2;++q )
+											ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+p,iztop+q), (*utop)(ixtop-1,iytop+p,iztop+q), 
+																 (*utop)(ixtop,iytop+p,iztop+q),   (*utop)(ixtop+1,iytop+p,iztop+q), 
+																 (*utop)(ixtop+2,iytop+p,iztop+q), fac*((double)j-0.5) );
+										uhat[p] = interp4( ustar, fac*((double)k-0.5)+1.5 );
+									}
+									
+									(*u)(ix+j,iy,iz+k)   = interp4right( (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), 
+																		(*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1] );									
+									(*u)(ix+j,iy+1,iz+k) = interp4rright( (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), 
+																		 (*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1] );									
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix,iy,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix+1,iy,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix,iy,iz+1);
+							fine_flux += Laplace_flux_O4<double>().apply_y(+1,*u,ix+1,iy,iz+1);
+							
+							coarse_flux = Laplace_flux_O4<double>().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int i=0;i<2;++i)
+								for( int k=0;k<2;++k)
+								{
+									(*u)(ix+i,iy,iz+k)   += ffac*dflux;
+									(*u)(ix+i,iy+1,iz+k) += ffac*dflux;
+								}
+							
+							
+						}
+						// front boundary
+						if( iz == -1 && ix%2==0 && iy%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									for( int p=0; p<2; ++p )
+									{
+										for( int q=-2;q<=2;++q )
+											ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+q,iztop+p-1), (*utop)(ixtop-1,iytop+q,iztop+p-1), 
+																 (*utop)(ixtop,iytop+q,iztop+p-1),   (*utop)(ixtop+1,iytop+q,iztop+p-1), 
+																 (*utop)(ixtop+2,iytop+q,iztop+p-1), fac*((double)j-0.5) );
+										uhat[p] = interp4( ustar, fac*((double)k-0.5)-1.5 );
+									}
+									
+									(*u)(ix+j,iy+k,iz)   = interp4left( uhat[0], uhat[1], (*u)(ix+j,iy+k,iz+1), 
+																	   (*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3) );									
+									(*u)(ix+j,iy+k,iz-1) = interp4lleft( uhat[0], uhat[1], (*u)(ix+j,iy+k,iz+1), 
+																		(*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3) );
+								}
+
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix,iy,iz+1);
+							fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix+1,iy,iz+1);
+							fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix,iy+1,iz+1);
+							fine_flux += Laplace_flux_O4<double>().apply_z(-1,*u,ix+1,iy+1,iz+1);
+							
+							coarse_flux = Laplace_flux_O4<double>().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int i=0;i<2;++i)
+								for( int j=0;j<2;++j)
+								{
+									(*u)(ix+i,iy+j,iz)   += ffac*dflux;
+									(*u)(ix+i,iy+j,iz-1) += ffac*dflux;
+								}
+							
+						}
+						// back boundary
+						if( iz == nz && ix%2==0 && iy%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									for( int p=0; p<2; ++p )
+									{
+										for( int q=-2;q<=2;++q )
+											ustar[q+2] = interp4( (*utop)(ixtop-2,iytop+q,iztop+p), (*utop)(ixtop-1,iytop+q,iztop+p), 
+																 (*utop)(ixtop,iytop+q,iztop+p),   (*utop)(ixtop+1,iytop+q,iztop+p), 
+																 (*utop)(ixtop+2,iytop+q,iztop+p), fac*((double)j-0.5) );
+										uhat[p] = interp4( ustar, fac*((double)k-0.5)+1.5 );
+									}
+									
+									(*u)(ix+j,iy+k,iz)   = interp4right( (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), 
+																		(*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1] );									
+									(*u)(ix+j,iy+k,iz+1) = interp4rright((*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), 
+																		 (*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1] );
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix,iy,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix+1,iy,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix,iy+1,iz);
+							fine_flux += Laplace_flux_O4<double>().apply_z(+1,*u,ix+1,iy+1,iz);
+							
+							coarse_flux = Laplace_flux_O4<double>().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int i=0;i<2;++i)
+								for( int j=0;j<2;++j)
+								{
+									(*u)(ix+i,iy+j,iz)   += ffac*dflux;
+									(*u)(ix+i,iy+j,iz+1) += ffac*dflux;
+								}
+						}
+					}
+				}
+	}
+};
+
+
+struct interp_O7_fluxcorr
+: public coarse_fine_interpolation
+{
+	
+	
+	template< class G >
+	void interp_coarse_fine( unsigned ilevel, G& coarse, G& fine )
+	{
+		
+		//... use cubic interpolation to get all values on boundary
+		mg_cubic().prolong_bnd( coarse, fine );
+		
+		//... use flux corrected quadratic interpolation for the
+		//... the boundary overlapped by the Laplace stencil
+		G *u    = &fine;
+		G *utop = &coarse;
+		
+		
+		int
+			xoff = u->offset(0),
+			yoff = u->offset(1),
+			zoff = u->offset(2);
+			
+		//... don't do anything if we are not an additional refinement region
+		if( xoff == 0 && yoff == 0 && zoff == 0 )
+			return;
+		
+		int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+		
+		//... set boundary condition for fine grid
+#pragma omp parallel for schedule(dynamic)
+		for( int ix=-1; ix<=nx; ++ix )
+			for( int iy=-1; iy<=ny; ++iy )
+				for( int iz=-1; iz<=nz; ++iz )
+				{
+					bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
+					bool bnd=xbnd|ybnd|zbnd;
+					
+					if( bnd )
+					{
+						
+						int ixtop = (int)(0.5*(double)(ix))+xoff;
+						int iytop = (int)(0.5*(double)(iy))+yoff;
+						int iztop = (int)(0.5*(double)(iz))+zoff;
+						
+						if( ix==-1 ) ixtop=xoff-1;
+						if( iy==-1 ) iytop=yoff-1;
+						if( iz==-1 ) iztop=zoff-1;
+						
+						double ustar[7], uhat[3];			
+						double fac = 0.5;
+						
+						double coarse_flux, fine_flux, dflux;
+						
+						double ffac = 180./222.;
+						
+						// left boundary
+						if( ix == -1 && iy%2==0 && iz%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									for( int p=0; p<3; ++p )
+									{
+										for( int q=-3;q<=3;++q )
+											ustar[q+3] = interp6( (*utop)(ixtop+p-2,iytop-3,iztop+q), (*utop)(ixtop+p-2,iytop-2,iztop+q), 
+																 (*utop)(ixtop+p-2,iytop-1,iztop+q), (*utop)(ixtop+p-2,iytop,iztop+q),   
+																 (*utop)(ixtop+p-2,iytop+1,iztop+q), (*utop)(ixtop+p-2,iytop+2,iztop+q), 
+																 (*utop)(ixtop+p-2,iytop+3,iztop+q), fac*((double)j-0.5) );
+										uhat[p] = interp6( ustar, fac*((double)k-0.5)-1.5 );
+									}
+									
+									(*u)(ix,iy+j,iz+k)   = interp6left( uhat[0], uhat[1], uhat[2], (*u)(ix+1,iy+j,iz+k), 
+																	   (*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k), (*u)(ix+4,iy+j,iz+k) );
+									(*u)(ix-1,iy+j,iz+k) = interp6lleft( uhat[0], uhat[1], uhat[2], (*u)(ix+1,iy+j,iz+k), 
+																		(*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k),(*u)(ix+4,iy+j,iz+k) );
+									(*u)(ix-2,iy+j,iz+k) = interp6llleft( uhat[0], uhat[1], uhat[2], (*u)(ix+1,iy+j,iz+k), 
+																		(*u)(ix+2,iy+j,iz+k), (*u)(ix+3,iy+j,iz+k),(*u)(ix+4,iy+j,iz+k) );
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O6<double>().apply_x(-1,*u,ix+1,iy,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_x(-1,*u,ix+1,iy+1,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_x(-1,*u,ix+1,iy,iz+1);
+							fine_flux += Laplace_flux_O6<double>().apply_x(-1,*u,ix+1,iy+1,iz+1);
+							fine_flux /= 4.0;
+							
+							coarse_flux = Laplace_flux_O6<double>().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int j=0;j<2;++j)
+								for( int k=0;k<2;++k)
+								{
+									(*u)(ix,iy+j,iz+k)   += ffac*dflux;
+									(*u)(ix-1,iy+j,iz+k) += ffac*dflux;
+									(*u)(ix-2,iy+j,iz+k) += ffac*dflux;
+								}
+							
+							
+						}
+						// right boundary
+						if( ix == nx && iy%2==0 && iz%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									for( int p=0; p<3; ++p )
+									{
+										for( int q=-3;q<=3;++q )
+											ustar[q+3] = interp6( (*utop)(ixtop+p,iytop-3,iztop+q), (*utop)(ixtop+p,iytop-2,iztop+q), 
+																 (*utop)(ixtop+p,iytop-1,iztop+q), (*utop)(ixtop+p,iytop,iztop+q),
+																 (*utop)(ixtop+p,iytop+1,iztop+q), (*utop)(ixtop+p,iytop+2,iztop+q),
+																 (*utop)(ixtop+p,iytop+3,iztop+q), fac*((double)j-0.5) );
+										uhat[p] = interp6( ustar, fac*((double)k-0.5)-1.5 );
+									}
+									
+									(*u)(ix,iy+j,iz+k)   = interp6right( (*u)(ix-4,iy+j,iz+k), (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 
+																		(*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1], uhat[2] );
+									(*u)(ix+1,iy+j,iz+k)   = interp6rright( (*u)(ix-4,iy+j,iz+k), (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 
+																		(*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1], uhat[2] );
+									(*u)(ix+2,iy+j,iz+k)   = interp6rrright( (*u)(ix-4,iy+j,iz+k), (*u)(ix-3,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 
+																		(*u)(ix-1,iy+j,iz+k), uhat[0], uhat[1], uhat[2] );
+
+									
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O6<double>().apply_x(+1,*u,ix,iy,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_x(+1,*u,ix,iy+1,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_x(+1,*u,ix,iy,iz+1);
+							fine_flux += Laplace_flux_O6<double>().apply_x(+1,*u,ix,iy+1,iz+1);
+							
+							coarse_flux = Laplace_flux_O6<double>().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int j=0;j<2;++j)
+								for( int k=0;k<2;++k)
+								{
+									(*u)(ix,iy+j,iz+k)   += ffac*dflux;
+									(*u)(ix+1,iy+j,iz+k) += ffac*dflux;
+									(*u)(ix+2,iy+j,iz+k) += ffac*dflux;
+								}
+							
+						}
+						// bottom boundary
+						if( iy == -1 && ix%2==0 && iz%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{	
+									for( int p=0; p<3; ++p )
+									{
+										for( int q=-3;q<=3;++q )
+											ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+p-2,iztop+q), (*utop)(ixtop-2,iytop+p-2,iztop+q),
+																 (*utop)(ixtop-1,iytop+p-2,iztop+q), (*utop)(ixtop,iytop+p-2,iztop+q),   
+																 (*utop)(ixtop+1,iytop+p-2,iztop+q), (*utop)(ixtop+2,iytop+p-2,iztop+q),
+																 (*utop)(ixtop+3,iytop+p-2,iztop+q), fac*((double)j-0.5) );
+										uhat[p] = interp6( ustar, fac*((double)k-0.5)-1.5 );
+									}
+									
+									(*u)(ix+j,iy,iz+k)   = interp6left( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+1,iz+k), 
+																	   (*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k),(*u)(ix+j,iy+4,iz+k) );									
+									(*u)(ix+j,iy-1,iz+k)   = interp6lleft( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+1,iz+k), 
+																	   (*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k),(*u)(ix+j,iy+4,iz+k) );									
+									(*u)(ix+j,iy-2,iz+k)   = interp6llleft( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+1,iz+k), 
+																	   (*u)(ix+j,iy+2,iz+k), (*u)(ix+j,iy+3,iz+k),(*u)(ix+j,iy+4,iz+k) );									
+
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O6<double>().apply_y(-1,*u,ix,iy+1,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_y(-1,*u,ix+1,iy+1,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_y(-1,*u,ix,iy+1,iz+1);
+							fine_flux += Laplace_flux_O6<double>().apply_y(-1,*u,ix+1,iy+1,iz+1);
+							
+							coarse_flux = Laplace_flux_O6<double>().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int i=0;i<2;++i)
+								for( int k=0;k<2;++k)
+								{
+									(*u)(ix+i,iy,iz+k)   += ffac*dflux;
+									(*u)(ix+i,iy-1,iz+k) += ffac*dflux;
+									(*u)(ix+i,iy-2,iz+k) += ffac*dflux;
+								}
+							
+						}
+						// top boundary
+						if( iy == ny && ix%2==0 && iz%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{	
+									for( int p=0; p<3; ++p )
+									{
+										for( int q=-3;q<=3;++q )
+											ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+p,iztop+q),  (*utop)(ixtop-2,iytop+p,iztop+q), 
+																 (*utop)(ixtop-1,iytop+p,iztop+q), (*utop)(ixtop,iytop+p,iztop+q), 
+																 (*utop)(ixtop+1,iytop+p,iztop+q), (*utop)(ixtop+2,iytop+p,iztop+q),
+																  (*utop)(ixtop+3,iytop+p,iztop+q), fac*((double)j-0.5) );
+										uhat[p] = interp6( ustar, fac*((double)k-0.5)+1.5 );
+									}
+									
+									(*u)(ix+j,iy,iz+k)   = interp6right( (*u)(ix+j,iy-4,iz+k), (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), 
+																		(*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1], uhat[2] );									
+									(*u)(ix+j,iy+1,iz+k)   = interp6rright( (*u)(ix+j,iy-4,iz+k), (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), 
+																		(*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1], uhat[2] );									
+									(*u)(ix+j,iy+2,iz+k)   = interp6rrright( (*u)(ix+j,iy-4,iz+k), (*u)(ix+j,iy-3,iz+k), (*u)(ix+j,iy-2,iz+k), 
+																		(*u)(ix+j,iy-1,iz+k), uhat[0], uhat[1], uhat[2] );									
+
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O6<double>().apply_y(+1,*u,ix,iy,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_y(+1,*u,ix+1,iy,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_y(+1,*u,ix,iy,iz+1);
+							fine_flux += Laplace_flux_O6<double>().apply_y(+1,*u,ix+1,iy,iz+1);
+							
+							coarse_flux = Laplace_flux_O6<double>().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int i=0;i<2;++i)
+								for( int k=0;k<2;++k)
+								{
+									(*u)(ix+i,iy,iz+k)   += ffac*dflux;
+									(*u)(ix+i,iy+1,iz+k) += ffac*dflux;
+									(*u)(ix+i,iy+2,iz+k) += ffac*dflux;
+								}
+							
+							
+						}
+						// front boundary
+						if( iz == -1 && ix%2==0 && iy%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									for( int p=0; p<3; ++p )
+									{
+										for( int q=-3;q<=3;++q )
+											ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+q,iztop+p-2), (*utop)(ixtop-2,iytop+q,iztop+p-2),
+																 (*utop)(ixtop-1,iytop+q,iztop+p-2), (*utop)(ixtop,iytop+q,iztop+p-2), 
+																 (*utop)(ixtop+1,iytop+q,iztop+p-2), (*utop)(ixtop+2,iytop+q,iztop+p-2),
+																 (*utop)(ixtop+3,iytop+q,iztop+p-2), fac*((double)j-0.5) );
+										uhat[p] = interp6( ustar, fac*((double)k-0.5)-1.5 );
+									}
+									
+									(*u)(ix+j,iy+k,iz)   = interp6left( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+k,iz+1), 
+																	   (*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3),(*u)(ix+j,iy+k,iz+4) );									
+									(*u)(ix+j,iy+k,iz-1)   = interp6lleft( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+k,iz+1), 
+																	   (*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3), (*u)(ix+j,iy+k,iz+4) );									
+									(*u)(ix+j,iy+k,iz-2)   = interp6llleft( uhat[0], uhat[1], uhat[2], (*u)(ix+j,iy+k,iz+1), 
+																	   (*u)(ix+j,iy+k,iz+2), (*u)(ix+j,iy+k,iz+3), (*u)(ix+j,iy+k,iz+4) );									
+								}
+							
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O6<double>().apply_z(-1,*u,ix,iy,iz+1);
+							fine_flux += Laplace_flux_O6<double>().apply_z(-1,*u,ix+1,iy,iz+1);
+							fine_flux += Laplace_flux_O6<double>().apply_z(-1,*u,ix,iy+1,iz+1);
+							fine_flux += Laplace_flux_O6<double>().apply_z(-1,*u,ix+1,iy+1,iz+1);
+							
+							coarse_flux = Laplace_flux_O6<double>().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int i=0;i<2;++i)
+								for( int j=0;j<2;++j)
+								{
+									(*u)(ix+i,iy+j,iz)   += ffac*dflux;
+									(*u)(ix+i,iy+j,iz-1) += ffac*dflux;
+									(*u)(ix+i,iy+j,iz-2) += ffac*dflux;
+								}
+							
+						}
+						// back boundary
+						if( iz == nz && ix%2==0 && iy%2==0 )
+						{
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+								{		
+									for( int p=0; p<3; ++p )
+									{
+										for( int q=-3;q<=3;++q )
+											ustar[q+3] = interp6( (*utop)(ixtop-3,iytop+q,iztop+p), (*utop)(ixtop-2,iytop+q,iztop+p), 
+																 (*utop)(ixtop-1,iytop+q,iztop+p), (*utop)(ixtop,iytop+q,iztop+p),   
+																 (*utop)(ixtop+1,iytop+q,iztop+p), (*utop)(ixtop+2,iytop+q,iztop+p), 
+																 (*utop)(ixtop+3,iytop+q,iztop+p), fac*((double)j-0.5) );
+										uhat[p] = interp6( ustar, fac*((double)k-0.5)+1.5 );
+									}
+									
+									(*u)(ix+j,iy+k,iz)   = interp6right( (*u)(ix+j,iy+k,iz-4), (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), 
+																		(*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1], uhat[2] );
+									(*u)(ix+j,iy+k,iz+1)   = interp6rright( (*u)(ix+j,iy+k,iz-4), (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), 
+																		(*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1], uhat[2] );
+									(*u)(ix+j,iy+k,iz+2)   = interp6rrright( (*u)(ix+j,iy+k,iz-4), (*u)(ix+j,iy+k,iz-3), (*u)(ix+j,iy+k,iz-2), 
+																		(*u)(ix+j,iy+k,iz-1), uhat[0], uhat[1], uhat[2] );
+
+								}
+							
+							fine_flux = 0.0;
+							fine_flux += Laplace_flux_O6<double>().apply_z(+1,*u,ix,iy,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_z(+1,*u,ix+1,iy,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_z(+1,*u,ix,iy+1,iz);
+							fine_flux += Laplace_flux_O6<double>().apply_z(+1,*u,ix+1,iy+1,iz);
+							
+							coarse_flux = Laplace_flux_O6<double>().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
+							fine_flux /= 4.0;
+							
+							dflux = coarse_flux - fine_flux;
+							
+							for(int i=0;i<2;++i)
+								for( int j=0;j<2;++j)
+								{
+									(*u)(ix+i,iy+j,iz)   += ffac*dflux;
+									(*u)(ix+i,iy+j,iz+1) += ffac*dflux;
+									(*u)(ix+i,iy+j,iz+2) += ffac*dflux;
+								}
+						}
+					}
+				}
+	}
+};
+
+
+#endif // __MG_INTERP_HH
diff --git a/mg_operators.hh b/mg_operators.hh
new file mode 100644
index 0000000..f3d235f
--- /dev/null
+++ b/mg_operators.hh
@@ -0,0 +1,1362 @@
+/*
+ 
+ mg_operators.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifndef __MG_OPERATORS_HH
+#define __MG_OPERATORS_HH
+
+
+class mg_cubic
+{
+protected:
+	template< typename real_t >
+	inline double CUBE( real_t x ) const
+	{ return x*x*x; }
+	
+	template< typename T >
+	inline T SQR( T x ) const
+	{ return x*x; }
+
+public:
+	template< int sx, int sy, int sz, typename M >
+	inline double interp_cubic( int x, int y, int z, M& V, double s=1.0 ) const
+	{
+		int				i, j, k;
+		double           dx, dy, dz;
+		double           u[4], v[4], w[4];
+		double           r[4], q[4];
+		double           vox = 0;
+		
+		dx = 0.5*((double)sz - 0.5)*s;
+		dy = 0.5*((double)sy - 0.5)*s;
+		dz = 0.5*((double)sx - 0.5)*s;
+		
+		//int xDim = V.m_nx, yDim = V.m_ny;
+		//int xyDim = xDim * yDim;
+		
+		//double *pv = &V(x-1,y-1,z-1);
+		
+		/* factors for Catmull-Rom interpolation */
+		
+		/*
+		u[0] = -0.5 * CUBE (dx) + SQR (dx) - 0.5 * dx;
+		u[1] = 1.5 * CUBE (dx) - 2.5 * SQR (dx) + 1;
+		u[2] = -1.5 * CUBE (dx) + 2 * SQR (dx) + 0.5 * dx;
+		u[3] = 0.5 * CUBE (dx) - 0.5 * SQR (dx);
+		
+		v[0] = -0.5 * CUBE (dy) + SQR (dy) - 0.5 * dy;
+		v[1] = 1.5 * CUBE (dy) - 2.5 * SQR (dy) + 1;
+		v[2] = -1.5 * CUBE (dy) + 2 * SQR (dy) + 0.5 * dy;
+		v[3] = 0.5 * CUBE (dy) - 0.5 * SQR (dy);
+		
+		w[0] = -0.5 * CUBE (dz) + SQR (dz) - 0.5 * dz;
+		w[1] = 1.5 * CUBE (dz) - 2.5 * SQR (dz) + 1;
+		w[2] = -1.5 * CUBE (dz) + 2 * SQR (dz) + 0.5 * dz;
+		w[3] = 0.5 * CUBE (dz) - 0.5 * SQR (dz);
+		*/
+		
+		if( sz == 1 )
+		{	
+			u[0] = -4.5/64.0;
+			u[1] = 55.5/64.0;
+			u[2] = 14.5/64.0;
+			u[3] = -1.5/64.0;
+		} else {
+			u[0] = -1.5/64.0;
+			u[1] = 14.5/64.0;
+			u[2] = 55.5/64.0;
+			u[3] = -4.5/64.0;
+		}
+		
+		if( sy == 1 )
+		{
+			v[0] = -4.5/64.0;
+			v[1] = 55.5/64.0;
+			v[2] = 14.5/64.0;
+			v[3] = -1.5/64.0;
+		} else{
+			v[0] = -1.5/64.0;
+			v[1] = 14.5/64.0;
+			v[2] = 55.5/64.0;
+			v[3] = -4.5/64.0;
+		}
+		
+		if( sx == 1 )
+		{
+			w[0] = -4.5/64.0;
+			w[1] = 55.5/64.0;
+			w[2] = 14.5/64.0;
+			w[3] = -1.5/64.0;
+		} else {
+			w[0] = -1.5/64.0;
+			w[1] = 14.5/64.0;
+			w[2] = 55.5/64.0;
+			w[3] = -4.5/64.0;
+		}
+
+
+		 
+		 
+		/*for (k = 0; k < 4; k++)
+		{
+			q[k] = 0;
+			for (j = 0; j < 4; j++)
+			{
+				r[j] = 0;
+				for (i = 0; i < 4; i++)
+				{
+					r[j] += u[i] * *pv;
+					pv++;
+				}
+				q[k] += v[j] * r[j];
+				pv += xDim - 4;
+			}
+			vox += w[k] * q[k];
+			pv += xyDim - 4 * xDim;
+		}*/
+		
+		for (k = 0; k < 4; k++)
+		{
+			q[k] = 0;
+			for (j = 0; j < 4; j++)
+			{
+				r[j] = 0;
+				for (i = 0; i < 4; i++)
+				{
+					//r[j] += u[i] * V(x+k-1,y+j-1,z+i-1);
+					r[j] += u[i] * V(x+k-2+sx,y+j-2+sy,z+i-2+sz);
+					//pv++;
+				}
+				q[k] += v[j] * r[j];
+				//pv += xDim - 4;
+			}
+			vox += w[k] * q[k];
+			//pv += xyDim - 4 * xDim;
+		}
+		
+		
+		return vox;
+		
+	}
+	
+public:
+	
+	
+	//... restricts v to V
+	template< typename m1, typename m2 >
+	inline void restrict( m1& v, m2& V )
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		/*for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					V(ox+i,oy+j,oz+k) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
+									    v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );*/
+		/*#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					V(ox+i,oy+j,oz+k) = 0.125 * ( interp_cubic<1,0,0>(i2+1,j2,k2,v)
+												 +interp_cubic<0,1,0>(i2,j2+1,k2,v)
+												 +interp_cubic<0,0,1>(i2,j2,k2+1,v)
+												 +interp_cubic<1,1,0>(i2+1,j2+1,k2,v)
+												 +interp_cubic<0,1,1>(i2,j2+1,k2+1,v)
+												 +interp_cubic<1,0,1>(i2+1,j2,k2+1,v)
+												 +interp_cubic<1,1,1>(i2+1,j2+1,k2+1,v)
+												 +interp_cubic<0,0,0>(i2,j2,k2,v));
+		} */
+		
+#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					V(ox+i,oy+j,oz+k) = 0.125 * ( interp_cubic<1,0,0>(i2,j2,k2,v)
+												 +interp_cubic<0,1,0>(i2,j2,k2,v)
+												 +interp_cubic<0,0,1>(i2,j2,k2,v)
+												 +interp_cubic<1,1,0>(i2,j2,k2,v)
+												 +interp_cubic<0,1,1>(i2,j2,k2,v)
+												 +interp_cubic<1,0,1>(i2,j2,k2,v)
+												 +interp_cubic<1,1,1>(i2,j2,k2,v)
+												 +interp_cubic<0,0,0>(i2,j2,k2,v));
+		} 
+												 		
+		
+	}
+	
+	//... restricts v to V
+	template< typename m1, typename m2 >
+	inline void restrict_add( m1& v, m2& V )
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		/*for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+		 for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+		 for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+		 V(ox+i,oy+j,oz+k) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
+		 v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );*/
+#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					V(ox+i,oy+j,oz+k) += 0.125 * ( interp_cubic<1,0,0>(i2+1,j2,k2,v)
+												 +interp_cubic<0,1,0>(i2,j2+1,k2,v)
+												 +interp_cubic<0,0,1>(i2,j2,k2+1,v)
+												 +interp_cubic<1,1,0>(i2+1,j2+1,k2,v)
+												 +interp_cubic<0,1,1>(i2,j2+1,k2+1,v)
+												 +interp_cubic<1,0,1>(i2+1,j2,k2+1,v)
+												 +interp_cubic<1,1,1>(i2+1,j2+1,k2+1,v)
+												 +interp_cubic<0,0,0>(i2,j2,k2,v));
+		} 
+		
+		
+	}
+	
+	/*template< typename m1, typename m2 >
+	inline void restrict( m1& v, m2& V )
+	{
+		int 
+		nx = V.size(0), 
+		ny = V.size(1), 
+		nz = V.size(2);
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					V(i,j,k)  = interp_cubic<1,1,1>(i2, j2, k2, v, 2.0);
+	}*/
+	
+	template< typename m1, typename m2 >
+	inline void restrict_bnd( const m1& v, m2& V ) const
+	{	
+		unsigned 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		
+		//... boundary points
+		for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+				V(ox-1,j+oy,k+oz)  = 0.125*(v(-1,j2,k2)+v(-1,j2+1,k2)+v(-1,j2,k2+1)+v(-1,j2+1,k2+1)+
+											v(-2,j2,k2)+v(-2,j2+1,k2)+v(-2,j2,k2+1)+v(-2,j2+1,k2+1));
+				V(ox+nx,j+oy,k+oz) = 0.125*(v(2*nx,j2,k2)+v(2*nx,j2+1,k2)+v(2*nx,j2,k2+1)+v(2*nx,j2+1,k2+1)+
+											v(2*nx+1,j2,k2)+v(2*nx+1,j2+1,k2)+v(2*nx+1,j2,k2+1)+v(2*nx+1,j2+1,k2+1));
+			}
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+				V(i+ox,oy-1,k+oz)  = 0.125*(v(i2,-1,k2)+v(i2+1,-1,k2)+v(i2,-1,k2+1)+v(i2+1,-1,k2+1)+
+										    v(i2,-2,k2)+v(i2+1,-2,k2)+v(i2,-2,k2+1)+v(i2+1,-2,k2+1) );
+				V(i+ox,oy+ny,k+oz) = 0.125*(v(i2,2*ny,k2)+v(i2+1,2*ny,k2)+v(i2,2*ny,k2+1)+v(i2+1,2*ny,k2+1)+
+											v(i2,2*ny+1,k2)+v(i2+1,2*ny+1,k2)+v(i2,2*ny+1,k2+1)+v(i2+1,2*ny+1,k2+1));
+			}
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
+				V(i+ox,j+oy,oz-1)  = 0.125*(v(i2,j2,-1)+v(i2+1,j2,-1)+v(i2,j2+1,-1)+v(i2+1,j2+1,-1)+
+											v(i2,j2,-2)+v(i2+1,j2,-2)+v(i2,j2+1,-2)+v(i2+1,j2+1,-2));
+				V(i+ox,j+oy,oz+nz) = 0.125*(v(i2,j2,2*nz)+v(i2+1,j2,2*nz)+v(i2,j2+1,2*nz)+v(i2+1,j2+1,2*nz)+
+											v(i2,j2,2*nz+1)+v(i2+1,j2,2*nz+1)+v(i2,j2+1,2*nz+1)+v(i2+1,j2+1,2*nz+1));
+			}
+		
+		
+	}
+	
+	template< typename m1, typename m2 >
+	inline void restrict_bnd_add( const m1& v, m2& V ) const
+	{
+		
+		unsigned 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		
+		//... boundary points
+		for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+				V(ox-1,j+oy,k+oz)  += 0.125*(v(-1,j2,k2)+v(-1,j2+1,k2)+v(-1,j2,k2+1)+v(-1,j2+1,k2+1)+
+									 		 v(-2,j2,k2)+v(-2,j2+1,k2)+v(-2,j2,k2+1)+v(-2,j2+1,k2+1));
+				V(ox+nx,j+oy,k+oz) += 0.125*(v(2*nx,j2,k2)+v(2*nx,j2+1,k2)+v(2*nx,j2,k2+1)+v(2*nx,j2+1,k2+1)+
+											 v(2*nx+1,j2,k2)+v(2*nx+1,j2+1,k2)+v(2*nx+1,j2,k2+1)+v(2*nx+1,j2+1,k2+1));
+			}
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+				V(i+ox,oy-1,k+oz)  += 0.125*(v(i2,-1,k2)+v(i2+1,-1,k2)+v(i2,-1,k2+1)+v(i2+1,-1,k2+1)+
+										     v(i2,-2,k2)+v(i2+1,-2,k2)+v(i2,-2,k2+1)+v(i2+1,-2,k2+1) );
+				V(i+ox,oy+ny,k+oz) += 0.125*(v(i2,2*ny,k2)+v(i2+1,2*ny,k2)+v(i2,2*ny,k2+1)+v(i2+1,2*ny,k2+1)+
+											 v(i2,2*ny+1,k2)+v(i2+1,2*ny+1,k2)+v(i2,2*ny+1,k2+1)+v(i2+1,2*ny+1,k2+1));
+			}
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
+				V(i+ox,j+oy,oz-1)  += 0.125*(v(i2,j2,-1)+v(i2+1,j2,-1)+v(i2,j2+1,-1)+v(i2+1,j2+1,-1)+
+											 v(i2,j2,-2)+v(i2+1,j2,-2)+v(i2,j2+1,-2)+v(i2+1,j2+1,-2));
+				V(i+ox,j+oy,oz+nz) += 0.125*(v(i2,j2,2*nz)+v(i2+1,j2,2*nz)+v(i2,j2+1,2*nz)+v(i2+1,j2+1,2*nz)+
+											 v(i2,j2,2*nz+1)+v(i2+1,j2,2*nz+1)+v(i2,j2+1,2*nz+1)+v(i2+1,j2+1,2*nz+1));
+			}
+		
+		
+	}
+	
+	
+	
+
+			
+	template< typename m1, typename m2 >
+	inline void prolong( m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2(2*i);
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					v(i2+1,j2,k2)		= interp_cubic<1,0,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2)		= interp_cubic<0,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2+1)		= interp_cubic<0,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2)		= interp_cubic<1,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2,k2+1)		= interp_cubic<1,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2+1)	= interp_cubic<1,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2+1)		= interp_cubic<0,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2)			= interp_cubic<0,0,0>(i+ox,j+oy,k+oz,V);
+				}
+		}
+	}
+
+	template< typename m1, typename m2 >
+	inline void prolong_add( m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2(2*i);
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					v(i2+1,j2,k2)		+= interp_cubic<1,0,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2)		+= interp_cubic<0,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2+1)		+= interp_cubic<0,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2)		+= interp_cubic<1,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2,k2+1)		+= interp_cubic<1,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2+1)	+= interp_cubic<1,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2+1)		+= interp_cubic<0,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2)			+= interp_cubic<0,0,0>(i+ox,j+oy,k+oz,V);
+				}
+		}
+	}
+	
+	template< typename m1, typename m2 >
+	inline void prolong_bnd( m1& V, m2& v )
+	{
+		//		int 
+		//		nx = V.size(0), 
+		//		ny = V.size(1), 
+		//		nz = V.size(2);
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		int nbnd = V.m_nbnd;
+		int nbndtop = nbnd/2;
+		
+		
+		#pragma omp parallel for
+		for( int i=-nbndtop; i<nx+nbndtop; ++i )
+		{	
+			int i2(2*i);
+			for( int j=-nbndtop,j2=-2*nbndtop; j<ny+nbndtop; ++j,j2+=2 )
+				for( int k=-nbndtop,k2=-2*nbndtop; k<nz+nbndtop; ++k,k2+=2 )
+				{
+					
+					if( i>=0&&i<nx&&j>=0&&j<ny&&k>=0&&k<nz )
+						continue;
+					
+					v(i2+1,j2,k2)		= interp_cubic<1,0,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2)		= interp_cubic<0,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2+1)		= interp_cubic<0,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2)		= interp_cubic<1,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2,k2+1)		= interp_cubic<1,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2+1)	= interp_cubic<1,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2+1)		= interp_cubic<0,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2)			= interp_cubic<0,0,0>(i+ox,j+oy,k+oz,V);
+				}
+		}
+	}
+	
+	template< typename m1, typename m2 >
+	inline void prolong_add_bnd( m1& V, m2& v )
+	{
+		unsigned 
+		nx = V.size(0), 
+		ny = V.size(1), 
+		nz = V.size(2);
+		
+		#pragma omp parallel for
+		for( int i=-1; i<=nx; ++i )
+		{	
+			int i2(2*i);
+			for( int j=-1,j2=-2; j<=ny; ++j,j2+=2 )
+				for( int k=-1,k2=-2; k<=nz; ++k,k2+=2 )
+				{
+					
+					if( i>=0&&i<nx&&j>=0&&j<ny&&k>=0&&k<nz )
+						continue;
+					
+					v(i2+1,j2,k2)		+= interp_cubic<1,0,0>(i,j,k,V);
+					v(i2,j2+1,k2)		+= interp_cubic<0,1,0>(i,j,k,V);
+					v(i2,j2,k2+1)		+= interp_cubic<0,0,1>(i,j,k,V);
+					v(i2+1,j2+1,k2)		+= interp_cubic<1,1,0>(i,j,k,V);
+					v(i2+1,j2,k2+1)		+= interp_cubic<1,0,1>(i,j,k,V);
+					v(i2+1,j2+1,k2+1)	+= interp_cubic<1,1,1>(i,j,k,V);
+					v(i2,j2+1,k2+1)		+= interp_cubic<0,1,1>(i,j,k,V);
+					v(i2,j2,k2)			+= interp_cubic<0,0,0>(i,j,k,V);
+				}
+		}
+	}
+	
+	
+};
+
+
+
+class mg_lin
+{
+public:
+	
+	template< typename M >
+	inline double restr_lin( int x, int y, int z, M& V ) const
+	{
+		double w[4] = { 1.0/4.0, 3.0/4.0, 3.0/4.0, 1.0/4.0 };
+		double vox = 0.0;
+		int i,j,k;
+		
+		for( i=0; i<4; ++i )
+			for( j=0; j<4; ++j )
+				for( k=0; k<4; ++k )
+				{
+					vox += w[i]*w[j]*w[k] * V(x+i-1,y+j-1,z+k-1);
+					//std::cerr << "[" << i << "," << j << "," << k << "] -- " << u[i]*v[j]*w[k] << std::endl;
+				}
+		//vox = (V(x+1,y,z)+V(x,y+1,z)+V(x,y,z+1)+V(x,y,z)+V(x+1,y+1,z)+V(x+1,y,z+1)+V(x,y+1,z+1)+V(x+1,y+1,z+1));
+		return vox;
+	}
+	
+	template< int sx, int sy, int sz, typename M >
+	inline double interp_lin( int x, int y, int z, M& V, double s=1.0 ) const
+	{
+		double           u[2], v[2], w[2];
+		double			 r[2], q[2];
+		double           vox = 0;
+		int				 i,j,k;
+		
+		
+		if( sx==0 ){
+			u[0] = 1.0/4.0;
+			u[1] = 3.0/4.0;
+		}else{
+			u[0] = 3.0/4.0;
+			u[1] = 1.0/4.0;
+		}
+		if( sy==0 ){
+			v[0] = 1.0/4.0;
+			v[1] = 3.0/4.0;
+		}else{
+			v[0] = 3.0/4.0;
+			v[1] = 1.0/4.0;
+		}
+		if( sz==0 ){
+			w[0] = 1.0/4.0;
+			w[1] = 3.0/4.0;
+		}else{
+			w[0] = 3.0/4.0;
+			w[1] = 1.0/4.0;
+		}
+		
+		/*if( sx==0 ){
+		 u[0] = 1.0/64.0;
+		 u[1] = 3.0/64.0;
+		 }else{
+		 u[0] = 3.0/64.0;
+		 u[1] = 1.0/64.0;
+		 }
+		 if( sy==0 ){
+		 v[0] = 1.0/64.0;
+		 v[1] = 3.0/64.0;
+		 }else{
+		 v[0] = 3.0/64.0;
+		 v[1] = 1.0/64.0;
+		 }
+		 if( sz==0 ){
+		 w[0] = 1.0/64.0;
+		 w[1] = 3.0/64.0;
+		 }else{
+		 w[0] = 3.0/64.0;
+		 w[1] = 1.0/64.0;
+		 }*/
+		
+		//std::cerr << "(" << sx << "," << sy << "," << sz << ") : " << std::endl;
+		/*for (k = 0; k < 2; k++)
+		{
+			q[k] = 0;
+			for (j = 0; j < 2; j++)
+			{
+				r[j] = 0;
+				for (i = 0; i < 2; i++)
+					r[j] += u[i] * V(x+k+sx-1,y+j+sy-1,z+i+sz-1);
+				q[k] += v[j] * r[j];
+			}
+			//std::cerr << "[" << i << "," << j << "," << k << "] -- " << w[k]*q[k] << std::endl;
+			vox += w[k] * q[k];
+		}*/
+		for( i=0; i<2; ++i )
+			for( j=0; j<2; ++j )
+				for( k=0; k<2; ++k )
+				{
+					vox += u[i]*v[j]*w[k] * V(x+i+sx-1,y+j+sy-1,z+k+sz-1);
+					//std::cerr << "[" << i << "," << j << "," << k << "] -- " << u[i]*v[j]*w[k] << std::endl;
+				}
+		//std::cerr << "(" << sx << "," << sy << "," << sz << ") : " << vox <<std::endl;
+		//throw std::runtime_error("break");
+		return vox;
+	}
+	
+	
+	template< typename m1, typename m2 >
+	inline void prolong( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2(2*i);
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					v(i2+1,j2,k2)		= interp_lin<1,0,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2)		= interp_lin<0,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2+1)		= interp_lin<0,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2)		= interp_lin<1,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2,k2+1)		= interp_lin<1,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2+1)	= interp_lin<1,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2+1)		= interp_lin<0,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2)			= interp_lin<0,0,0>(i+ox,j+oy,k+oz,V);
+				}
+		}
+	}
+	
+	template< typename m1, typename m2 >
+	inline void prolong_add( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2(2*i);
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					v(i2+1,j2,k2)		+= interp_lin<1,0,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2)		+= interp_lin<0,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2+1)		+= interp_lin<0,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2)		+= interp_lin<1,1,0>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2,k2+1)		+= interp_lin<1,0,1>(i+ox,j+oy,k+oz,V);
+					v(i2+1,j2+1,k2+1)	+= interp_lin<1,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2+1,k2+1)		+= interp_lin<0,1,1>(i+ox,j+oy,k+oz,V);
+					v(i2,j2,k2)			+= interp_lin<0,0,0>(i+ox,j+oy,k+oz,V);
+				}
+		}
+	}
+	
+	
+	template< typename m1, typename m2 >
+	inline void restrict( const m1& v, m2& V ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+	#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					V(i+ox,j+oy,k+oz) = 0.125 * restr_lin(i2, j2, k2, v);
+					/*V(i+ox,j+oy,k+oz) = 0.125*(interp_lin<1,0,0>(i2,j2,k2,v)+interp_lin<0,1,0>(i2,j2,k2,v)+
+												 interp_lin<0,0,1>(i2,j2,k2,v)+interp_lin<1,1,0>(i2,j2,k2,v)+
+												 interp_lin<1,0,1>(i2,j2,k2,v)+interp_lin<0,1,1>(i2,j2,k2,v)+
+												 interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2,j2,k2,v));*/
+		}
+		
+		/*
+		for( int i=0; i<nx; ++i )
+		{
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					V(i+ox,j+oy,k+oz) = 0.125 * (interp_lin<1,0,0>(i2+2,j2,k2,v)+interp_lin<0,1,0>(i2,j2+2,k2,v)+
+												  interp_lin<0,0,1>(i2,j2,k2+2,v)+interp_lin<1,1,0>(i2+1,j2+2,k2,v)+
+												  interp_lin<1,0,1>(i2+2,j2,k2+2,v)+interp_lin<0,1,1>(i2,j2+2,k2+2,v)+
+												  interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2+2,j2+2,k2+2,v));
+										
+				}
+		}
+		 */
+	}	
+#if 0	
+	template< typename m1, typename m2 >
+	inline void restrict_add( const m1& v, m2& V ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		/*#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					V(i+ox,j+oy,k+oz) += 0.125 * (interp_lin<1,0,0>(i2,j2,k2,v)+interp_lin<0,1,0>(i2,j2,k2,v)+
+												 interp_lin<0,0,1>(i2,j2,k2,v)+interp_lin<1,1,0>(i2,j2,k2,v)+
+												 interp_lin<1,0,1>(i2,j2,k2,v)+interp_lin<0,1,1>(i2,j2,k2,v)+
+												 interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2,j2,k2,v));
+		}*/
+		
+		for( int i=0; i<nx; ++i )
+		{
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					V(i+ox,j+oy,k+oz) += 0.125 * (interp_lin<1,0,0>(i2+2,j2,k2,v)+interp_lin<0,1,0>(i2,j2+2,k2,v)+
+												 interp_lin<0,0,1>(i2,j2,k2+2,v)+interp_lin<1,1,0>(i2+1,j2+2,k2,v)+
+												 interp_lin<1,0,1>(i2+2,j2,k2+2,v)+interp_lin<0,1,1>(i2,j2+2,k2+2,v)+
+												 interp_lin<0,0,0>(i2,j2,k2,v)+interp_lin<1,1,1>(i2+2,j2+2,k2+2,v));
+					
+				}
+		}
+	}	
+#endif
+	
+};
+
+
+
+//template< typename T >
+class mg_linear
+{
+public:
+	//typedef T real_t;
+	
+	
+	//inline void prolong_bnd( const MeshvarBnd<real_t>& V, MeshvarBnd<real_t>& v ) const
+	template< typename m1, typename m2 >
+	inline void prolong_bnd( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel 
+		{
+			
+			{
+				for(int q=0;q<2; ++q)
+				{
+					int i=nx;
+					if(q==0) i=-1;
+					
+					int i2 = 2*i;
+					for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+						for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+						{
+							int ii=i+ox,jj=j+oy,kk=k+oz;
+							v(i2,j2,k2)			= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
+							v(i2+1,j2,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
+							v(i2,j2+1,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
+							v(i2,j2,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
+							
+							v(i2+1,j2+1,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
+							v(i2+1,j2,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
+							v(i2,j2+1,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
+							v(i2+1,j2+1,k2+1)	= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
+						}
+				}
+			}
+			
+			{
+				for(int q=0;q<2; ++q)
+				{
+					int j=ny;
+					if(q==0) j=-1;
+					
+					int j2 = 2*j;
+					for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+						for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+						{
+							int ii=i+ox,jj=j+oy,kk=k+oz;
+							v(i2,j2,k2)			= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
+							v(i2+1,j2,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
+							v(i2,j2+1,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
+							v(i2,j2,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
+							
+							v(i2+1,j2+1,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
+							v(i2+1,j2,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
+							v(i2,j2+1,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
+							v(i2+1,j2+1,k2+1)	= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
+						}
+				}
+			}
+			
+			{
+				for(int q=0;q<2; ++q)
+				{
+					int k = nz;
+					if(q==0) k=-1;
+					
+					int k2 = 2*k;
+					for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+						for( int j=0,j2=0; j<nx; ++i,i2+=2 )
+						{
+							int ii=i+ox,jj=j+oy,kk=k+oz;
+							v(i2,j2,k2)			= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
+							v(i2+1,j2,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
+							v(i2,j2+1,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
+							v(i2,j2,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
+							
+							v(i2+1,j2+1,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
+							v(i2+1,j2,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
+							v(i2,j2+1,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
+							v(i2+1,j2+1,k2+1)	= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
+							
+							
+						}
+				}
+			}
+			
+			
+#pragma omp barrier
+		}
+	}
+	
+	template< typename m1, typename m2 >
+	inline void prolong_add_bnd( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel 
+		{
+			
+			{
+				for(int q=0;q<2; ++q)
+				{
+					int i=nx;
+					if(q==0) i=-1;
+					
+					int i2 = 2*i;
+					for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+						for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+						{
+							int ii=i+ox,jj=j+oy,kk=k+oz;
+							v(i2,j2,k2)			+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
+							v(i2+1,j2,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
+							v(i2,j2+1,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
+							v(i2,j2,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
+							
+							v(i2+1,j2+1,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
+							v(i2+1,j2,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
+							v(i2,j2+1,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
+							v(i2+1,j2+1,k2+1)	+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
+						}
+				}
+			}
+			
+			{
+				for(int q=0;q<2; ++q)
+				{
+					int j=ny;
+					if(q==0) j=-1;
+					
+					int j2 = 2*j;
+					for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+						for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+						{
+							int ii=i+ox,jj=j+oy,kk=k+oz;
+							v(i2,j2,k2)			+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
+							v(i2+1,j2,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
+							v(i2,j2+1,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
+							v(i2,j2,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
+							
+							v(i2+1,j2+1,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
+							v(i2+1,j2,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
+							v(i2,j2+1,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
+							v(i2+1,j2+1,k2+1)	+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
+						}
+				}
+			}
+			
+			{
+				for(int q=0;q<2; ++q)
+				{
+					int k = nz;
+					if(q==0) k=-1;
+					
+					int k2 = 2*k;
+					for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+						for( int j=0,j2=0; j<nx; ++i,i2+=2 )
+						{
+							int ii=i+ox,jj=j+oy,kk=k+oz;
+							v(i2,j2,k2)			+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
+							v(i2+1,j2,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
+							v(i2,j2+1,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
+							v(i2,j2,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
+							
+							v(i2+1,j2+1,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
+							v(i2+1,j2,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
+							v(i2,j2+1,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
+							v(i2+1,j2+1,k2+1)	+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
+							
+							
+						}
+				}
+			}
+			
+			
+#pragma omp barrier
+		}
+	}
+	
+	template< typename m1, typename m2 >
+	inline void prolong( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel for
+		for( int i=0; i<nx; ++i ){
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					int ii=i+ox,jj=j+oy,kk=k+oz;
+					v(i2,j2,k2)			= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
+					v(i2+1,j2,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
+					v(i2,j2+1,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
+					v(i2,j2,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
+					
+					v(i2+1,j2+1,k2)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
+					v(i2+1,j2,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
+					v(i2,j2+1,k2+1)		= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
+					v(i2+1,j2+1,k2+1)	= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
+					
+					
+				}
+		}
+	}
+	
+	template< typename m1, typename m2 >
+	inline void prolong_add( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel for
+		for( int i=0; i<nx; ++i ){
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					int ii=i+ox,jj=j+oy,kk=k+oz;
+					v(i2,j2,k2)			+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
+					v(i2+1,j2,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
+					v(i2,j2+1,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
+					v(i2,j2,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
+					
+					v(i2+1,j2+1,k2)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
+					v(i2+1,j2,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
+					v(i2,j2+1,k2+1)		+= (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
+					v(i2+1,j2+1,k2+1)	+= (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
+					
+					
+				}
+		}
+	}
+	
+};
+
+
+//template< typename T >
+class mg_straight
+{
+public:
+	
+	//typedef T real_t;
+	
+	template< typename m1, typename m2 >
+	inline void restrict_bnd( const m1& v, m2& V ) const
+	{	
+		unsigned 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		
+		//... boundary points
+		for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+				V(ox-1,j+oy,k+oz)  = 0.25*(v(-1,j2,k2)+v(-1,j2+1,k2)+v(-1,j2,k2+1)+v(-1,j2+1,k2+1));
+				V(ox+nx,j+oy,k+oz) = 0.25*(v(2*nx,j2,k2)+v(2*nx,j2+1,k2)+v(2*nx,j2,k2+1)+v(2*nx,j2+1,k2+1));
+			}
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+				V(i+ox,oy-1,k+oz)  = 0.25*(v(i2,-1,k2)+v(i2+1,-1,k2)+v(i2,-1,k2+1)+v(i2+1,-1,k2+1));
+				V(i+ox,oy+ny,k+oz) = 0.25*(v(i2,2*ny,k2)+v(i2+1,2*ny,k2)+v(i2,2*ny,k2+1)+v(i2+1,2*ny,k2+1));
+			}
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
+				V(i+ox,j+oy,oz-1)  = 0.25*(v(i2,j2,-1)+v(i2+1,j2,-1)+v(i2,j2+1,-1)+v(i2+1,j2+1,-1));
+				V(i+ox,j+oy,oz+nz) = 0.25*(v(i2,j2,2*nz)+v(i2+1,j2,2*nz)+v(i2,j2+1,2*nz)+v(i2+1,j2+1,2*nz));
+			}
+		
+		
+	}
+	
+	
+	//... restricts v to V
+	/*	inline void restrict( const MeshvarBnd<real_t>& v, MeshvarBnd<real_t>& V ) const
+	 {
+	 int 
+	 nx = v.size(0)/2, 
+	 ny = v.size(1)/2, 
+	 nz = v.size(2)/2,
+	 ox = v.offset(0),
+	 oy = v.offset(1),
+	 oz = v.offset(2);
+	 
+	 #pragma omp parallel for
+	 for( int i=0; i<nx; ++i )
+	 {
+	 int i2 = 2*i;
+	 for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+	 for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+	 V(i+ox,j+oy,k+oz) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
+	 v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );
+	 }
+	 }	
+	 */
+	
+	template< typename m1, typename m2 >
+	inline void restrict( const m1& v, m2& V ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					V(i+ox,j+oy,k+oz) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
+												 v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );
+		}
+	}	
+	
+	template< typename m1, typename m2 >
+	inline void restrict_add( const m1& v, m2& V ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel for
+		for( int i=0; i<nx; ++i )
+		{
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					V(i+ox,j+oy,k+oz) += 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
+												 v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );
+		}
+	}	
+	
+	template< typename m1, typename m2 >
+	inline void restrict_bnd_add( const m1& v, m2& V ) const
+	{	
+		unsigned 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		
+		//... boundary points
+		for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+				V(ox-1,j+oy,k+oz)  += 0.25*(v(-1,j2,k2)+v(-1,j2+1,k2)+v(-1,j2,k2+1)+v(-1,j2+1,k2+1));
+				V(ox+nx,j+oy,k+oz) += 0.25*(v(2*nx,j2,k2)+v(2*nx,j2+1,k2)+v(2*nx,j2,k2+1)+v(2*nx,j2+1,k2+1));
+			}
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+				V(i+ox,oy-1,k+oz)  += 0.25*(v(i2,-1,k2)+v(i2+1,-1,k2)+v(i2,-1,k2+1)+v(i2+1,-1,k2+1));
+				V(i+ox,oy+ny,k+oz) += 0.25*(v(i2,2*ny,k2)+v(i2+1,2*ny,k2)+v(i2,2*ny,k2+1)+v(i2+1,2*ny,k2+1));
+			}
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
+				V(i+ox,j+oy,oz-1)  += 0.25*(v(i2,j2,-1)+v(i2+1,j2,-1)+v(i2,j2+1,-1)+v(i2+1,j2+1,-1));
+				V(i+ox,j+oy,oz+nz) += 0.25*(v(i2,j2,2*nz)+v(i2+1,j2,2*nz)+v(i2,j2+1,2*nz)+v(i2+1,j2+1,2*nz));
+			}
+		
+		
+	}
+	
+	//... prolongs V to v
+	template< typename m1, typename m2 >
+	inline void prolong( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel 
+		{
+			
+#pragma omp for nowait
+			for( int i=0; i<nx; ++i ){
+				int i2 = 2*i;
+				for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+					for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+					{
+						v(i2+1,j2,k2)		= 
+						v(i2,j2+1,k2)		= 
+						v(i2,j2,k2+1)		= 
+						v(i2+1,j2+1,k2)		= 
+						v(i2+1,j2,k2+1)		= 
+						v(i2+1,j2+1,k2+1)	= 
+						v(i2,j2+1,k2+1)		= 
+						v(i2,j2,k2)			= V(i+ox,j+oy,k+oz);
+					}
+			}
+			
+			
+#pragma omp barrier
+		}
+		
+	}
+	
+	//... prolongs V to v
+	template< typename m1, typename m2 >
+	inline void prolong_bnd( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel 
+		{
+			
+			
+			
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+					v(-1,j2,k2) = 
+					v(-1,j2+1,k2)=
+					v(-1,j2,k2+1)=
+					v(-1,j2+1,k2+1) = V(ox-1,j+oy,k+oz);
+					
+					v(2*nx,j2,k2)=
+					v(2*nx,j2+1,k2)=
+					v(2*nx,j2,k2+1)=
+					v(2*nx,j2+1,k2+1)=V(ox+nx,j+oy,k+oz);
+				}
+			
+			for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+					v(i2,-1,k2)=
+					v(i2+1,-1,k2)=
+					v(i2,-1,k2+1)=
+					v(i2+1,-1,k2+1)=V(i+ox,oy-1,k+oz);
+					v(i2,2*ny,k2)=
+					v(i2+1,2*ny,k2)=
+					v(i2,2*ny,k2+1)=
+					v(i2+1,2*ny,k2+1)=V(i+ox,oy+ny,k+oz);
+				}
+			
+			for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+				for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
+					v(i2,j2,-1)=
+					v(i2+1,j2,-1)=
+					v(i2,j2+1,-1)=
+					v(i2+1,j2+1,-1)=V(i+ox,j+oy,oz-1);
+					
+					v(i2,j2,2*nz)=
+					v(i2+1,j2,2*nz)=
+					v(i2,j2+1,2*nz)=
+					v(i2+1,j2+1,2*nz)=V(i+ox,j+oy,oz+nz);
+				}
+			
+#pragma omp barrier
+		}
+		
+	}
+	
+	
+	//... prolongs V to v
+	template< typename m1, typename m2 >
+	inline void prolong_add_bnd( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+#pragma omp parallel 
+		{
+			
+			
+			
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+					v(-1,j2,k2) +=V(ox-1,j+oy,k+oz);
+					v(-1,j2+1,k2)+=V(ox-1,j+oy,k+oz);
+					v(-1,j2,k2+1)+=V(ox-1,j+oy,k+oz);
+					v(-1,j2+1,k2+1)+=V(ox-1,j+oy,k+oz);
+					
+					v(2*nx,j2,k2)+=V(ox+nx,j+oy,k+oz);
+					v(2*nx,j2+1,k2)+=V(ox+nx,j+oy,k+oz);
+					v(2*nx,j2,k2+1)+=V(ox+nx,j+oy,k+oz);
+					v(2*nx,j2+1,k2+1)+=V(ox+nx,j+oy,k+oz);
+				}
+			
+			for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
+					v(i2,-1,k2)+=V(i+ox,oy-1,k+oz);
+					v(i2+1,-1,k2)+=V(i+ox,oy-1,k+oz);
+					v(i2,-1,k2+1)+=V(i+ox,oy-1,k+oz);
+					v(i2+1,-1,k2+1)+=V(i+ox,oy-1,k+oz);
+					v(i2,2*ny,k2)+=V(i+ox,oy+ny,k+oz);
+					v(i2+1,2*ny,k2)+=V(i+ox,oy+ny,k+oz);
+					v(i2,2*ny,k2+1)+=V(i+ox,oy+ny,k+oz);
+					v(i2+1,2*ny,k2+1)+=V(i+ox,oy+ny,k+oz);
+				}
+			
+			for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+				for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
+					v(i2,j2,-1)+=V(i+ox,j+oy,oz-1);
+					v(i2+1,j2,-1)+=V(i+ox,j+oy,oz-1);
+					v(i2,j2+1,-1)+=V(i+ox,j+oy,oz-1);
+					v(i2+1,j2+1,-1)+=V(i+ox,j+oy,oz-1);
+					
+					v(i2,j2,2*nz)+=V(i+ox,j+oy,oz+nz);
+					v(i2+1,j2,2*nz)+=V(i+ox,j+oy,oz+nz);
+					v(i2,j2+1,2*nz)+=V(i+ox,j+oy,oz+nz);
+					v(i2+1,j2+1,2*nz)+=V(i+ox,j+oy,oz+nz);
+				}
+			
+#pragma omp barrier
+		}
+		
+	}
+	
+	template< typename m1, typename m2 >
+	inline void prolong_add( const m1& V, m2& v ) const
+	{
+		int 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		#pragma omp parallel for
+		for( int i=0; i<nx; ++i ){
+			int i2 = 2*i;
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					v(i2+1,j2,k2)		+= V(i+ox,j+oy,k+oz);
+					v(i2,j2+1,k2)		+= V(i+ox,j+oy,k+oz);
+					v(i2,j2,k2+1)		+= V(i+ox,j+oy,k+oz);
+					v(i2+1,j2+1,k2)		+= V(i+ox,j+oy,k+oz);
+					v(i2+1,j2,k2+1)		+= V(i+ox,j+oy,k+oz);
+					v(i2+1,j2+1,k2+1)	+= V(i+ox,j+oy,k+oz);
+					v(i2,j2+1,k2+1)		+= V(i+ox,j+oy,k+oz);
+					v(i2,j2,k2)			+= V(i+ox,j+oy,k+oz);
+				}
+		}
+	}
+	
+	//... prolongs V to v
+	/*template< typename m1, typename m2 >
+	inline void prolong_add( m1& V, m2& v )
+	{
+		unsigned 
+		nx = v.size(0)/2, 
+		ny = v.size(1)/2, 
+		nz = v.size(2)/2,
+		ox = v.offset(0),
+		oy = v.offset(1),
+		oz = v.offset(2);
+		
+		
+		for( int i=0,i2=0; i<nx; ++i,i2+=2 )
+			for( int j=0,j2=0; j<ny; ++j,j2+=2 )
+				for( int k=0,k2=0; k<nz; ++k,k2+=2 )
+				{
+					v(i2+1,j2,k2)		+= V(i+ox,j+oy,k+oz);
+					v(i2,j2+1,k2)		+= V(i+ox,j+oy,k+oz);
+					v(i2,j2,k2+1)		+= V(i+ox,j+oy,k+oz);
+					v(i2+1,j2+1,k2)		+= V(i+ox,j+oy,k+oz);
+					v(i2+1,j2,k2+1)		+= V(i+ox,j+oy,k+oz);
+					v(i2+1,j2+1,k2+1)	+= V(i+ox,j+oy,k+oz);
+					v(i2,j2+1,k2+1)		+= V(i+ox,j+oy,k+oz);
+					v(i2,j2,k2)			+= V(i+ox,j+oy,k+oz);
+				}
+		
+	}*/
+};
+
+
+#endif //__MG_OPERATORS_HH
+
diff --git a/mg_solver.hh b/mg_solver.hh
new file mode 100644
index 0000000..b21e09a
--- /dev/null
+++ b/mg_solver.hh
@@ -0,0 +1,1051 @@
+/*
+ 
+ mg_solver.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifndef __MG_SOLVER_HH
+#define __MG_SOLVER_HH
+
+#include <cmath>
+#include <iostream>
+
+#include "mg_operators.hh"
+#include "mg_interp.hh"
+
+#include "mesh.hh"
+
+#define BEGIN_MULTIGRID_NAMESPACE namespace multigrid {
+#define END_MULTIGRID_NAMESPACE }
+
+BEGIN_MULTIGRID_NAMESPACE
+	
+namespace opt {
+	enum smtype { sm_jacobi, sm_gauss_seidel, sm_sor };
+}
+
+
+
+template< class S, class I, class O, typename T=double >
+class solver
+{
+public:
+	typedef S scheme;
+	typedef O mgop;
+	typedef I interp;
+
+protected:
+	scheme				m_scheme;
+	mgop				m_gridop;
+	unsigned			m_npresmooth, m_npostsmooth;
+	opt::smtype			m_smoother;
+	unsigned			m_ilevelmin;
+	
+	const static bool	m_bperiodic = true;
+	
+	std::vector<double> m_residu_ini;
+	bool m_is_ini;
+
+	GridHierarchy<T>	*m_pu, *m_pf, *m_pfsave;	
+	//GridHierarchy<bool> *m_pmask;
+	const MeshvarBnd<T> *m_pubnd;
+	
+	double compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<T>& unew );
+	
+	double compute_error( const GridHierarchy<T>& uh, const GridHierarchy<T>& uhnew, bool verbose );
+	
+	double compute_RMS_resid( const GridHierarchy<T>& uh, const GridHierarchy<T>& fh, bool verbose );
+
+protected:
+	
+	void Jacobi( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f );
+	
+	void GaussSeidel( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f );
+	
+	void SOR( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f );
+	
+	void twoGrid( unsigned ilevel );
+	
+	//void interp_coarse_fine( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf=true );
+	
+	void setBC( unsigned ilevel );
+	
+	void make_periodic( MeshvarBnd<T> *u );
+	
+	void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine );
+		
+public:
+	solver( GridHierarchy<T>& f, //const MeshvarBnd<T>& uBC_top, 
+				   opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth );
+	
+	~solver()
+	{ /*delete m_pmask;*/ }
+	
+	double solve( GridHierarchy<T>& u, double accuracy, double h=-1.0, bool verbose=false );
+	
+	double solve( GridHierarchy<T>& u, double accuracy, bool verbose=false )
+	{
+		return this->solve ( u, accuracy, -1.0, verbose );
+	}
+	
+	
+	
+};
+
+
+template< class S, class I, class O, typename T >
+solver<S,I,O,T>::solver( GridHierarchy<T>& f, //const MeshvarBnd<T>& ubnd, 
+					opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth )
+:	m_scheme(), m_gridop(), m_npresmooth( npresmooth ), m_npostsmooth( npostsmooth ), 
+m_smoother( smoother ), m_ilevelmin( f.levelmin() ), m_pf( &f ), m_is_ini( true )
+{ 
+	m_is_ini = true;
+	
+	// TODO: maybe later : add more than one refinement region
+	//... initialize the refinement mask
+	//m_pmask = new GridHierarchy<bool>( f.m_nbnd );
+	//m_pmask->create_base_hierarchy(f.levelmin());
+			
+	/*for( unsigned ilevel=f.levelmin()+1; ilevel<=f.levelmax(); ++ilevel )
+	{
+		meshvar_bnd* pf = f.get_grid(ilevel);
+		m_pmask->add_patch( pf->offset(0), pf->offset(1), pf->offset(2), pf->size(0), pf->size(1), pf->size(2) );
+	}
+	
+	m_pmask->zero();
+	
+	for( unsigned ilevel=0; ilevel<f.levelmin(); ++ilevel )
+	{
+		MeshvarBnd<T> *pf = f.get_grid(ilevel);
+		for( int ix=0; ix < (int)pf->size(0); ++ix )
+			for( int iy=0; iy < (int)pf->size(1); ++iy )
+				for( int iz=0; iz < (int)pf->size(2); ++iz )
+					(*m_pmask->get_grid(ilevel))(ix,iy,iz) = true;
+	}
+	
+	for( unsigned ilevel=m_ilevelmin; ilevel<f.levelmax(); ++ilevel )
+	{
+		MeshvarBnd<T>* pf = f.get_grid(ilevel+1);//, *pfc = f.get_grid(ilevel);
+		
+		for( int ix=pf->offset(0); ix < (int)(pf->offset(0)+pf->size(0)/2); ++ix )
+			for( int iy=pf->offset(1); iy < (int)(pf->offset(1)+pf->size(1)/2); ++iy )
+				for( int iz=pf->offset(2); iz < (int)(pf->offset(2)+pf->size(2)/2); ++iz )
+					(*m_pmask->get_grid(ilevel))(ix,iy,iz) = true;
+	}
+	*/	
+}
+
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::Jacobi( T h, MeshvarBnd<T> *u, const MeshvarBnd<T>* f )
+{
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+	
+	double 
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+	
+	MeshvarBnd<T> uold(*u);
+	
+	double alpha = 0.95, ialpha = 1.0-alpha;
+	
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				(*u)(ix,iy,iz) = ialpha * uold(ix,iy,iz) + alpha * (m_scheme.rhs( uold, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+}
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::SOR( T h, MeshvarBnd<T> *u, const MeshvarBnd<T>* f )
+{
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+
+	double 
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+		
+	MeshvarBnd<T> uold(*u);
+	
+	double 
+		alpha = 1.2, 
+	//alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0,
+		ialpha = 1.0-alpha;
+	
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				if( (ix+iy+iz)%2==0 )
+					(*u)(ix,iy,iz) = ialpha * uold(ix,iy,iz) + alpha * (m_scheme.rhs( uold, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+	
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				if( (ix+iy+iz)%2!=0 )
+					(*u)(ix,iy,iz) = ialpha * uold(ix,iy,iz) + alpha * (m_scheme.rhs( *u, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+	
+	
+}
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::GaussSeidel( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f )
+{
+	int 
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+	
+	T
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+	
+	for( int color=0; color < 2; ++color )
+		#pragma omp parallel for
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					if( (ix+iy+iz)%2 == color )
+						(*u)(ix,iy,iz) = (m_scheme.rhs( *u, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+}
+
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::twoGrid( unsigned ilevel )
+{
+	MeshvarBnd<T> *uf, *uc, *ff, *fc;
+	
+	T 
+		h = 1.0/(pow(2.0,ilevel)),
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+	
+	uf = m_pu->get_grid(ilevel);
+	ff = m_pf->get_grid(ilevel);	
+	
+	uc = m_pu->get_grid(ilevel-1);
+	fc = m_pf->get_grid(ilevel-1);	
+	
+	
+	int 
+		nx = uf->size(0), 
+		ny = uf->size(1), 
+		nz = uf->size(2);
+	
+	if( m_bperiodic && ilevel <= m_ilevelmin)
+		make_periodic( uf );
+	else if(!m_bperiodic)
+		setBC( ilevel );
+	
+	//... do smoothing sweeps with specified solver
+	for( unsigned i=0; i<m_npresmooth; ++i ){
+		
+		if( ilevel > m_ilevelmin )
+			interp().interp_coarse_fine(ilevel,*uc,*uf);
+		
+		if( m_smoother == opt::sm_gauss_seidel )
+			GaussSeidel( h, uf, ff );
+			
+		else if( m_smoother == opt::sm_jacobi )
+			Jacobi( h, uf, ff);		
+			
+		else if( m_smoother == opt::sm_sor )
+			SOR( h, uf, ff );
+		
+		if( m_bperiodic && ilevel <= m_ilevelmin )
+			make_periodic( uf );
+	}
+			
+	
+	m_gridop.restrict( *uf, *uc );
+	
+	//... essential!!
+	if( m_bperiodic && ilevel <= m_ilevelmin )
+		make_periodic( uc );
+	else if( ilevel > m_ilevelmin )
+		interp().interp_coarse_fine(ilevel,*uc,*uf);
+		
+	
+
+	meshvar_bnd Lu(*uf,false);
+	Lu.zero();
+
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				Lu(ix,iy,iz) = m_scheme.apply( (*uf), ix, iy, iz )/h2;
+	
+	meshvar_bnd tLu(*uc,false);
+	
+	
+	//... restrict Lu
+	m_gridop.restrict( Lu, tLu );
+	//mg_straight().restrict( Lu, tLu );
+	Lu.deallocate();
+	
+	//... restrict source term
+	m_gridop.restrict( *ff, *fc );
+	//mg_straight().restrict( *ff, *fc );
+	
+	int oi, oj, ok;
+	oi = ff->offset(0);
+	oj = ff->offset(1);
+	ok = ff->offset(2);
+	
+	#pragma omp parallel for
+	for( int ix=oi; ix<oi+(int)ff->size(0)/2; ++ix )
+		for( int iy=oj; iy<oj+(int)ff->size(1)/2; ++iy )
+			for( int iz=ok; iz<ok+(int)ff->size(2)/2; ++iz )
+				(*fc)(ix,iy,iz) += ((tLu( ix, iy, iz ) - (m_scheme.apply( *uc, ix, iy, iz )/(4.0*h2))));
+					
+	tLu.deallocate();
+	
+	meshvar_bnd ucsave(*uc,true);
+						
+	//... have we reached the end of the recursion or do we need to go up one level?
+	if( ilevel == 1 )
+		if( m_bperiodic )
+			(*uc)(0,0,0) = 0.0;
+		else 
+			(*uc)(0,0,0) = (m_scheme.rhs( (*uc), 0, 0, 0 ) + 4.0 * h2 * (*fc)(0,0,0))*c0;
+	else
+		twoGrid( ilevel-1 );
+	
+	meshvar_bnd cc(*uc,false);
+	
+	//... compute correction on coarse grid
+	#pragma omp parallel for
+	for( int ix=0; ix<(int)cc.size(0); ++ix )
+		for( int iy=0; iy<(int)cc.size(1); ++iy )
+			for( int iz=0; iz<(int)cc.size(2); ++iz )
+				cc(ix,iy,iz) = (*uc)(ix,iy,iz) - ucsave(ix,iy,iz);
+		
+	ucsave.deallocate();
+
+	if( m_bperiodic && ilevel <= m_ilevelmin )
+		make_periodic( &cc );
+
+	m_gridop.prolong_add( cc, *uf );
+	
+	//... interpolate and apply coarse-fine boundary conditions on fine level
+	if( m_bperiodic && ilevel <= m_ilevelmin )
+		make_periodic( uf );
+	else if(!m_bperiodic)
+		setBC( ilevel );
+	
+	//... do smoothing sweeps with specified solver
+	for( unsigned i=0; i<m_npostsmooth; ++i ){
+		
+		if( ilevel > m_ilevelmin )
+			interp().interp_coarse_fine(ilevel,*uc,*uf);
+
+		if( m_smoother == opt::sm_gauss_seidel )
+			GaussSeidel( h, uf, ff );
+		
+		else if( m_smoother == opt::sm_jacobi )
+			Jacobi( h, uf, ff);		
+		
+		else if( m_smoother == opt::sm_sor )
+			SOR( h, uf, ff );
+		
+		if( m_bperiodic && ilevel <= m_ilevelmin )
+			make_periodic( uf );
+
+	}
+}
+
+template< class S, class I, class O, typename T >
+double solver<S,I,O,T>::compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<T>& unew )
+{
+	int 
+		nx = u.size(0), 
+		ny = u.size(1), 
+		nz = u.size(2);
+	
+	double err = 0.0;
+	unsigned count = 0;
+	
+	#pragma omp parallel for reduction(+:err,count)
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				if( fabs(unew(ix,iy,iz)) > 0.0 )//&& u(ix,iy,iz) != unew(ix,iy,iz) )
+				{
+					err += fabs(1.0 - u(ix,iy,iz)/unew(ix,iy,iz));
+					
+					++count;
+				}
+	
+	if( count != 0 )
+		err /= count;
+	
+	return err;
+}
+
+template< class S, class I, class O, typename T >
+double solver<S,I,O,T>::compute_error( const GridHierarchy<T>& uh, const GridHierarchy<T>& uhnew, bool verbose )
+{
+	double maxerr = 0.0;
+	
+	for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel )
+	{
+		double err = 0.0;
+		err = compute_error( *uh.get_grid(ilevel), *uhnew.get_grid(ilevel) );
+		
+		if( verbose )
+			std::cout << "      Level " << std::setw(6) << ilevel << ",   Error = " << err << std::endl;
+		maxerr = std::max(maxerr,err);
+		
+	}
+	return maxerr;
+}
+
+template< class S, class I, class O, typename T >
+double solver<S,I,O,T>::compute_RMS_resid( const GridHierarchy<T>& uh, const GridHierarchy<T>& fh, bool verbose )
+{
+	if( m_is_ini )
+		m_residu_ini.assign( uh.levelmax()+1, 0.0 );
+	
+	double maxerr=0.0;
+	
+	for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel )
+	{
+		int 
+		nx = uh.get_grid(ilevel)->size(0), 
+		ny = uh.get_grid(ilevel)->size(1), 
+		nz = uh.get_grid(ilevel)->size(2);
+		
+		double h = 1.0/pow(2,ilevel), h2=h*h, err;
+		double sum = 0.0;
+		unsigned count = 0;
+		
+		#pragma omp parallel for reduction(+:sum,count)
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+				{
+					double r = (m_scheme.apply( *uh.get_grid(ilevel), ix, iy, iz )/h2 + (*fh.get_grid(ilevel))(ix,iy,iz));
+					sum += r*r;
+					++count;
+				}
+		
+		if( m_is_ini )
+			m_residu_ini[ilevel] =  sqrt(sum)/count;
+		
+		err = sqrt(sum)/count/m_residu_ini[ilevel];
+		
+		if( verbose && !m_is_ini )
+			std::cout << "      Level " << std::setw(6) << ilevel << ",   Error = " << err << std::endl;
+		
+		if( err > maxerr )
+			maxerr = err;
+		
+	}
+	
+	if( m_is_ini )
+		m_is_ini = false;
+	
+	return maxerr;
+}
+
+
+template< class S, class I, class O, typename T >
+double solver<S,I,O,T>::solve( GridHierarchy<T>& uh, double acc, double h, bool verbose )
+{
+
+	double err;
+	
+	//GridHierarchy<T> uhnew(uh);//, fsave(*m_pf);
+	m_pu = &uh;
+	
+    unsigned niter = 0;
+	
+	//... iterate ...//
+	while (true)
+	{
+		
+		
+		twoGrid( uh.levelmax() );
+		//err = compute_error( *m_pu, uhnew, verbose );
+		err = compute_RMS_resid( *m_pu, *m_pf, verbose );
+		++niter;
+		
+		if( verbose ){
+			std::cout << "  --> Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl;
+			std::cout << "      ---------------------------------------------------\n";
+		}
+			
+		if( (niter > 1) && ((err < acc) || (niter > 8)) )
+			break;
+		
+		//uhnew = *m_pu;
+		//*m_pf = fsave;
+	}		
+	
+	if( err > acc )
+		std::cout << "Error : no convergence in Poisson solver" << std::endl;
+	else if( verbose )
+		std::cout << " - Converged in " << niter << " steps to req. acc. of " << acc << std::endl;
+
+	
+	//uh = uhnew;
+	//*m_pf = fsave;
+	
+	//.. make sure that the RHS does not contain the FAS corrections any more
+	for( int i=m_pf->levelmax(); i>0; --i )//(int)m_pf->levelmin(); --i )
+		m_gridop.restrict( *m_pf->get_grid(i), *m_pf->get_grid(i-1) );
+	
+	
+	return err;
+}
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine )
+{
+	MeshvarBnd<T> *u    = &fine;
+	MeshvarBnd<T> *utop = &coarse;
+	
+	int
+	xoff = u->offset(0),
+	yoff = u->offset(1),
+	zoff = u->offset(2);
+	
+	//... don't do anything if we are not an additional refinement region
+	if( xoff == 0 && yoff == 0 && zoff == 0 )
+		return;
+	
+	int
+	nx = u->size(0), 
+	ny = u->size(1), 
+	nz = u->size(2);
+	
+	//.. perform cubic interpolation
+	mg_cubic().prolong_bnd( *utop, *u );
+	
+	//.. match fluxes
+	#pragma omp parallel for schedule(dynamic)
+	for( int ix=-1; ix<=nx; ++ix )
+		for( int iy=-1; iy<=ny; ++iy )
+			for( int iz=-1; iz<=nz; ++iz )
+			{
+				bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
+				
+				if( xbnd || ybnd || zbnd )
+				{
+					
+					//... only deal with proper ghostzones
+					if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
+						continue;
+					
+					int ixtop = (int)(0.5*(double)(ix))+xoff;
+					int iytop = (int)(0.5*(double)(iy))+yoff;
+					int iztop = (int)(0.5*(double)(iz))+zoff;
+					
+					if( ix==-1 ) ixtop=xoff-1;
+					if( iy==-1 ) iytop=yoff-1;
+					if( iz==-1 ) iztop=zoff-1;
+					
+					double flux;;
+				    if( ix == -1 && iy%2==0 && iz%2==0 )
+					{					
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k));
+							}
+						
+						
+						
+						flux /= 4.0;
+						
+						double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+					}
+				}
+			}
+	
+}
+
+
+
+
+
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::setBC( unsigned ilevel )
+{
+	//... set only on level before additional refinement starts
+	//if( ilevel == m_ilevelmin )
+	if( ilevel == m_ilevelmin )
+	{
+		MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
+		//int nbnd = u->m_nbnd,
+		int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+		
+		/*for( int ix=-nbnd; ix<nx+nbnd; ++ix )
+			for( int iy=-nbnd; iy<ny+nbnd; ++iy )
+				for( int iz=-nbnd; iz<nz+nbnd; ++iz )
+					if( ix<0||ix>=nx||iy<0||iy>=ny||iz<0||iz>=nz )
+						(*u)(ix,iy,iz) = (*m_pubnd)(ix,iy,iz);*/
+		
+		
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+			{
+				(*u)(-1,iy,iz) = 2.0*(*m_pubnd)(-1,iy,iz) - (*u)(0,iy,iz);
+				(*u)(nx,iy,iz) = 2.0*(*m_pubnd)(nx,iy,iz) - (*u)(nx-1,iy,iz);;
+			}
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iz=0; iz<nz; ++iz )
+			{
+				(*u)(ix,-1,iz) = 2.0*(*m_pubnd)(ix,-1,iz) - (*u)(ix,0,iz);
+				(*u)(ix,ny,iz) = 2.0*(*m_pubnd)(ix,ny,iz) - (*u)(ix,ny-1,iz);
+			}
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+			{
+				(*u)(ix,iy,-1) = 2.0*(*m_pubnd)(ix,iy,-1) - (*u)(ix,iy,0);
+				(*u)(ix,iy,nz) = 2.0*(*m_pubnd)(ix,iy,nz) - (*u)(ix,iy,nz-1);
+			}		
+		
+		
+		
+	}/*else if( ilevel < m_ilevelmin ) {
+		MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
+		int nbnd = u->m_nbnd,
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+		
+		for( int ix=-nbnd; ix<nx+nbnd; ++ix )
+			for( int iy=-nbnd; iy<ny+nbnd; ++iy )
+				for( int iz=-nbnd; iz<nz+nbnd; ++iz )
+					if( ix<0||ix>=nx||iy<0||iy>=ny||iz<0||iz>=nz )
+						(*u)(ix,iy,iz) = 0.0;
+	}*/
+
+
+}
+
+
+
+//... enforce periodic boundary conditions
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::make_periodic( MeshvarBnd<T> *u )
+{
+	
+
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+	int nb = u->m_nbnd;
+	
+	
+	//if( u->offset(0) == 0 )
+		for( int iy=-nb; iy<ny+nb; ++iy )
+			for( int iz=-nb; iz<nz+nb; ++iz )
+			{
+				int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz );
+				
+				for( int i=-nb; i<0; ++i )
+				{
+					(*u)(i,iy,iz) = (*u)(nx+i,iiy,iiz);
+					(*u)(nx-1-i,iy,iz) = (*u)(-1-i,iiy,iiz);	
+				}
+				
+			}
+	
+	//if( u->offset(1) == 0 )
+		for( int ix=-nb; ix<nx+nb; ++ix )
+			for( int iz=-nb; iz<nz+nb; ++iz )
+			{
+				int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz );
+				
+				for( int i=-nb; i<0; ++i )
+				{
+					(*u)(ix,i,iz) = (*u)(iix,ny+i,iiz);
+					(*u)(ix,ny-1-i,iz) = (*u)(iix,-1-i,iiz);
+				}
+			}
+	
+	//if( u->offset(2) == 0 )
+		for( int ix=-nb; ix<nx+nb; ++ix )
+			for( int iy=-nb; iy<ny+nb; ++iy )
+			{
+				int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny );
+				
+				for( int i=-nb; i<0; ++i )
+				{
+					(*u)(ix,iy,i) = (*u)(iix,iiy,nz+i);
+					(*u)(ix,iy,nz-1-i) = (*u)(iix,iiy,-1-i);
+				}
+			}
+	
+#if 0
+	if( u->m_nbnd == 1 )
+	{
+			
+		#pragma omp parallel
+		{
+			
+			if( u->offset(0) == 0 )
+				for( int iy=-1; iy<=ny; ++iy )
+					for( int iz=-1; iz<=nz; ++iz )
+					{
+						int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz );
+
+						(*u)(-1,iy,iz) = (*u)(nx-1,iiy,iiz);
+						(*u)(nx,iy,iz) = (*u)(0,iiy,iiz);
+					}
+			
+			if( u->offset(1) == 0 )
+				for( int ix=-1; ix<=nx; ++ix )
+					for( int iz=-1; iz<=nz; ++iz )
+					{
+						int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz );
+
+						(*u)(ix,-1,iz) = (*u)(iix,ny-1,iiz);
+						(*u)(ix,ny,iz) = (*u)(iix,0,iiz);
+					}
+			
+			if( u->offset(2) == 0 )
+				for( int ix=-1; ix<=nx; ++ix )
+					for( int iy=-1; iy<=ny; ++iy )
+					{
+						int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny );
+
+						(*u)(ix,iy,-1) = (*u)(iix,iiy,nz-1);
+						(*u)(ix,iy,nz) = (*u)(iix,iiy,0);
+					}									  
+			
+			
+		}
+	}else if( u->m_nbnd == 2 ){
+		if( u->offset(0) == 0 )
+			for( int iy=-2; iy<=ny+1; ++iy )
+				for( int iz=-2; iz<=nz+1; ++iz )
+				{
+					int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz );
+					
+					(*u)(-1,iy,iz) = (*u)(nx-1,iiy,iiz);
+					(*u)(-2,iy,iz) = (*u)(nx-2,iiy,iiz);
+					(*u)(nx,iy,iz) = (*u)(0,iiy,iiz);
+					(*u)(nx+1,iy,iz) = (*u)(1,iiy,iiz);
+				}
+		
+		if( u->offset(1) == 0 )
+			for( int ix=-2; ix<=nx+1; ++ix )
+				for( int iz=-2; iz<=nz+1; ++iz )
+				{
+					int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz );
+					
+					(*u)(ix,-1,iz) = (*u)(iix,ny-1,iiz);
+					(*u)(ix,-2,iz) = (*u)(iix,ny-2,iiz);
+					(*u)(ix,ny,iz) = (*u)(iix,0,iiz);
+					(*u)(ix,ny+1,iz) = (*u)(iix,1,iiz);
+				}
+		
+		if( u->offset(2) == 0 )
+			for( int ix=-2; ix<=nx+1; ++ix )
+				for( int iy=-2; iy<=ny+1; ++iy )
+				{
+					int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny );
+					
+					(*u)(ix,iy,-1) = (*u)(iix,iiy,nz-1);
+					(*u)(ix,iy,-2) = (*u)(iix,iiy,nz-2);
+					(*u)(ix,iy,nz) = (*u)(iix,iiy,0);
+					(*u)(ix,iy,nz+1) = (*u)(iix,iiy,1);
+				}			
+	}else
+		throw std::runtime_error("solver<S,I,O,T>::make_periodic: don't know how to deal with boundary!");
+#endif
+}
+
+#if 0
+template< class S, class I, class O, typename T >
+void solver<S,I,O,T>::interp_coarse_fine( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf )
+{
+	MeshvarBnd<T> *u    = &fine;
+	MeshvarBnd<T> *utop = &coarse;
+	
+	
+	bcf = true;;
+	//bcf = false;
+	
+	int
+	xoff = u->offset(0),
+	yoff = u->offset(1),
+	zoff = u->offset(2);
+	
+	//... don't do anything if we are not an additional refinement region
+	if( xoff == 0 && yoff == 0 && zoff == 0 )
+		return;
+	
+	int
+	nx = u->size(0), 
+	ny = u->size(1), 
+	nz = u->size(2);
+	
+	//... set boundary condition for fine grid
+	
+#pragma omp parallel for schedule(dynamic)
+	for( int ix=-1; ix<=nx; ++ix )
+		for( int iy=-1; iy<=ny; ++iy )
+			for( int iz=-1; iz<=nz; ++iz )
+			{
+				bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
+				
+				//if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz)
+				if( xbnd || ybnd || zbnd )
+					//if( xbnd ^ ybnd ^ zbnd )
+				{
+					
+					//... only deal with proper ghostzones
+					if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
+						continue;
+					
+					/*int ixtop = (int)(0.5*(double)(ix+2*xoff)+1e-3);
+					 int iytop = (int)(0.5*(double)(iy+2*yoff)+1e-3);
+					 int iztop = (int)(0.5*(double)(iz+2*zoff)+1e-3);*/
+					
+					int ixtop = (int)(0.5*(double)(ix))+xoff;
+					int iytop = (int)(0.5*(double)(iy))+yoff;
+					int iztop = (int)(0.5*(double)(iz))+zoff;
+					
+					if( ix==-1 ) ixtop=xoff-1;
+					if( iy==-1 ) iytop=yoff-1;
+					if( iz==-1 ) iztop=zoff-1;
+					
+					double ustar1, ustar2, ustar3, uhat;			
+					double fac = 0.5;//0.25;
+					double flux;;
+				    if( ix == -1 && iy%2==0 && iz%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
+								
+								uhat   = interp2( /*-1.0, 0.0, 1.0, */ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix,iy+j,iz+k) = 0.0;//(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k), -1.0 );
+								
+								(*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) );
+								
+								flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k));
+							}
+						
+						
+						
+						flux /= 4.0;
+						
+						double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+						
+						//dflux *= 2.0;
+						
+						if( bcf )
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix,iy+j,iz+k) -= dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop+1,iytop,iztop) - 2.0*flux;
+						
+						
+					}
+					// right boundary
+					if( ix == nx && iy%2==0 && iz%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix,iy+j,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix-1,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 1.0 );
+								(*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat );
+								flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k));
+							}
+						flux /= 4.0;
+						
+						
+						double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux;
+						//dflux *= 2.0;
+						
+						if( bcf )
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix,iy+j,iz+k) += dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop-1,iytop,iztop) + 2.0*flux;
+						
+					}
+					// bottom boundary
+					if( iy == -1 && ix%2==0 && iz%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{
+								ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k), -1.0 );
+								(*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) );
+								
+								flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k));
+							}
+						flux /= 4.0;
+						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - flux;
+						double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+						//dflux *= 2.0;
+						if( bcf )
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix+j,iy,iz+k) -= dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - 2.0*flux;
+						
+					}
+					// top boundary
+					if( iy == ny && ix%2==0 && iz%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy-1,iz+k), (*u)(ix+j,iy-2,iz+k), 1.0 );
+								(*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat  );
+								
+								flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k));
+							}
+						flux /= 4.0;
+						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + flux;
+						double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux;
+						//dflux *= 2.0;
+						if( bcf )
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix+j,iy,iz+k) += dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + 2.0*flux;
+						
+					}
+					// front boundary
+					if( iz == -1 && ix%2==0 && iy%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2), -1.0 );
+								(*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) );
+								
+								flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz));
+							}
+						flux /= 4.0;
+						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - flux;
+						double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+						//dflux *= 2.0;
+						if( bcf )
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix+j,iy+k,iz) -= dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - 2.0*flux;
+						
+					}
+					// back boundary
+					if( iz == nz && ix%2==0 && iy%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy+k,iz-1), (*u)(ix+j,iy+k,iz-2), 1.0 );
+								(*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat );
+								
+								flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1));
+							}
+						flux /= 4.0;
+						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + flux;
+						double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux;
+						//dflux *= 2.0;
+						if( bcf )
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix+j,iy+k,iz) += dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + 2.0*flux;
+					}
+					
+				}
+			}
+	
+}
+#endif
+
+END_MULTIGRID_NAMESPACE
+ 
+#endif
diff --git a/output.cc b/output.cc
new file mode 100644
index 0000000..9975204
--- /dev/null
+++ b/output.cc
@@ -0,0 +1,73 @@
+/*
+ 
+ output.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#include "output.hh"
+
+
+std::map< std::string, output_plugin_creator *>& 
+get_output_plugin_map()
+{
+	static std::map< std::string, output_plugin_creator* > output_plugin_map;
+	return output_plugin_map;
+}
+
+void print_output_plugins()
+{
+	std::map< std::string, output_plugin_creator *>& m = get_output_plugin_map();
+	
+	std::map< std::string, output_plugin_creator *>::iterator it;
+	it = m.begin();
+	std::cout << " - Available output plug-ins:\n";
+	while( it!=m.end() )
+	{
+		if( (*it).second )
+			std::cout << "\t\'" << (*it).first << "\'\n";
+		++it;
+	}
+		
+}
+
+output_plugin *select_output_plugin( config_file& cf )
+{
+	std::string formatname = cf.getValue<std::string>( "output", "format" );
+	
+	output_plugin_creator *the_output_plugin_creator 
+	= get_output_plugin_map()[ formatname ];
+	
+	if( !the_output_plugin_creator )
+	{	
+		std::cerr << " - Error: output plug-in \'" << formatname << "\' not found." << std::endl;
+		print_output_plugins();
+		throw std::runtime_error("Unknown output plug-in");
+		
+	}else
+		std::cout << " - Selecting output plug-in \'" << formatname << "\'..." << std::endl;
+	
+	output_plugin *the_output_plugin 
+	= the_output_plugin_creator->create( cf );
+	
+	return the_output_plugin;
+}
+
+
+
diff --git a/output.hh b/output.hh
new file mode 100644
index 0000000..0c6c7e9
--- /dev/null
+++ b/output.hh
@@ -0,0 +1,176 @@
+/*
+ 
+ output.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifndef __OUTPUT_HH
+#define __OUTPUT_HH
+
+#include <string>
+#include <map>
+
+#include "general.hh"
+
+
+/*!
+ * @class output_plugin
+ * @brief abstract base class for output plug-ins
+ *
+ * This class provides the abstract base class for all output plug-ins.
+ * All output plug-ins need to derive from it and implement the purely
+ * virtual member functions.
+ */
+class output_plugin
+{
+protected:
+	
+	//! reference to the config_file object that holds all configuration options
+	config_file& cf_;
+	
+	//! output file or directory name
+	std::string fname_;
+	
+	//! minimum refinement level
+	unsigned levelmin_;
+	
+	//! maximum refinement level
+	unsigned levelmax_;
+	
+	std::vector<unsigned> 
+		offx_,		//!< vector describing the x-offset of each level
+		offy_,		//!< vector describing the y-offset of each level
+		offz_,		//!< vector describing the z-offset of each level
+		sizex_,		//!< vector describing the extent in x of each level
+		sizey_,		//!< vector describing the extent in y of each level
+		sizez_;		//!< vector describing the extent in z of each level
+	
+	//! quick access function to query properties of the refinement grid from the configuration options
+	/*! @param name	name of the config property
+	 *  @param icomp component index (0=x, 1=y, 2=z)
+	 *  @param oit output iterator (e.g. std::back_inserter for vectors)
+	 */
+	template< typename output_iterator >
+	void query_grid_prop( std::string name, int icomp, output_iterator oit )
+	{
+		char str[128];
+		for( unsigned i=levelmin_; i<=levelmax_; ++i )
+		{
+			sprintf( str, "%s(%d,%d)", name.c_str(), i, icomp );
+			*oit = cf_.getValue<unsigned>( "setup", str );
+			++oit;
+		}
+	}
+	
+public:
+	
+	//! constructor
+	explicit output_plugin( config_file& cf )
+	: cf_(cf)
+	{ 
+		fname_		= cf.getValue<std::string>("output","filename");
+		levelmin_	= cf.getValue<unsigned>( "setup", "levelmin" );
+		levelmax_	= cf.getValue<unsigned>( "setup", "levelmax" );
+
+		query_grid_prop( "offset", 0, std::back_inserter(offx_) );
+		query_grid_prop( "offset", 1, std::back_inserter(offy_) );
+		query_grid_prop( "offset", 2, std::back_inserter(offz_) );
+		
+		query_grid_prop( "size", 0, std::back_inserter(sizex_) );
+		query_grid_prop( "size", 1, std::back_inserter(sizey_) );
+		query_grid_prop( "size", 2, std::back_inserter(sizez_) );
+	}
+	
+	//! destructor
+	virtual ~output_plugin()
+	{ }
+	
+	//! purely virtual prototype to write the masses for each dark matter particle
+	virtual void write_dm_mass( const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write the dark matter density field 
+	virtual void write_dm_density( const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write the dark matter gravitational potential (from which displacements are computed in 1LPT)
+	virtual void write_dm_potential( const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write dark matter particle velocities
+	virtual void write_dm_velocity( int coord, const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write dark matter particle positions
+	virtual void write_dm_position( int coord, const grid_hierarchy& gh ) = 0;
+
+	//! purely virtual prototype to write the baryon velocities
+	virtual void write_gas_velocity( int coord, const grid_hierarchy& gh ) = 0;
+
+	//! purely virtual prototype to write the baryon coordinates
+	virtual void write_gas_position( int coord, const grid_hierarchy& gh ) = 0;
+	
+	//! purely virtual prototype to write the baryon density field
+	virtual void write_gas_density( const grid_hierarchy& gh )  = 0;
+	
+	//! purely virtual prototype to write the baryon gravitational potential (from which displacements are computed in 1LPT)
+	virtual void write_gas_potential( const grid_hierarchy& gh )  = 0;
+	
+	//! purely virtual prototype for all things to be done at the very end
+	virtual void finalize( void ) = 0;
+};
+
+/*!
+ * @brief implements abstract factory design pattern for output plug-ins
+ */
+struct output_plugin_creator
+{
+	//! create an instance of a plug-in
+	virtual output_plugin * create( config_file& cf ) const = 0;
+	
+	//! destroy an instance of a plug-in
+	virtual ~output_plugin_creator() { }
+};
+
+//! maps the name of a plug-in to a pointer of the factory pattern 
+std::map< std::string, output_plugin_creator *>& get_output_plugin_map();
+
+//! print a list of all registered output plug-ins
+void print_output_plugins();
+
+/*!
+ * @brief concrete factory pattern for output plug-ins
+ */
+template< class Derived >
+struct output_plugin_creator_concrete : public output_plugin_creator
+{
+	//! register the plug-in by its name
+	output_plugin_creator_concrete( const std::string& plugin_name )
+	{
+		get_output_plugin_map()[ plugin_name ] = this;
+	}
+	
+	//! create an instance of the plug-in
+	output_plugin * create( config_file& cf ) const
+	{
+		return new Derived( cf );
+	}
+};
+
+//! failsafe version to select the output plug-in
+output_plugin *select_output_plugin( config_file& cf );
+
+#endif // __OUTPUT_HH
diff --git a/plugins/HDF_IO.hh b/plugins/HDF_IO.hh
new file mode 100755
index 0000000..f0cee50
--- /dev/null
+++ b/plugins/HDF_IO.hh
@@ -0,0 +1,1024 @@
+#ifndef __HDF_IO_HH
+#define __HDF_IO_HH
+
+/*
+    HDF_IO.hh  --  templated C++ HDF5 front-end functions, v1.2b
+      
+    Copyright (C) 2006-7  Oliver Hahn  --  ojha@gmx.de
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <typeinfo>
+#include <vector>
+#include <cstdlib>
+#include "hdf5.h"
+
+template<typename T>
+hid_t GetDataType( void )
+{
+  if( typeid(T) == typeid(int) )
+    return H5T_NATIVE_INT;
+
+  if( typeid(T) == typeid(unsigned) )
+    return H5T_NATIVE_UINT;
+
+  if( typeid(T) == typeid(float) )
+    return H5T_NATIVE_FLOAT;
+
+  if( typeid(T) == typeid(double) )
+    return H5T_NATIVE_DOUBLE;
+  
+	//if( typeid(T) == typeid(long long) )
+	//  return H5T_NATIVE_LLONG;
+  
+  std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n";
+  return -1;
+}
+
+#include <stdexcept>
+ 
+class HDFException : public std::runtime_error {
+	public:
+	 HDFException( const std::string &errtxt ) : std::runtime_error(errtxt) { }
+};
+
+
+inline bool DoesFileExist( std::string Filename ){
+        bool flag = false;
+        std::fstream fin(Filename.c_str(),std::ios::in|std::ios::binary);
+        if( fin.is_open() )
+                flag=true;
+        fin.close();
+        return flag;
+}
+
+inline void AssertFileOpen( std::string Filename )
+{
+  if( !DoesFileExist( Filename ) ){
+    std::fstream fout( Filename.c_str(), std::ios::out|std::ios::binary);
+    fout.close();
+  }
+}
+
+inline void HDFCreateFile( std::string Filename )
+{
+  hid_t HDF_FileID;
+  HDF_FileID = H5Fcreate( Filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT );
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T>
+inline void HDFReadVector( const std::string Filename, const std::string ObjName, std::vector<T> &Data )
+{
+  HDFReadDataset( Filename, ObjName, Data );
+}
+
+
+inline void HDFGetDatasetExtent( const std::string Filename, const std::string ObjName, std::vector<int> &Extent )
+{
+  hid_t HDF_FileID, HDF_DatasetID, HDF_DataspaceID;
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( HDF_DatasetID < 0 ){
+	  std::stringstream ss;
+	  ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+	  H5Fclose( HDF_FileID );
+	  throw HDFException(ss.str());
+	  return;
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
+
+  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
+  
+  hsize_t *dimsize = new hsize_t[ndims];
+
+  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+
+  Extent.clear();
+  for(int i=0; i<ndims; ++i )
+    Extent.push_back( dimsize[i] );
+
+  delete[] dimsize;
+	
+  H5Sclose( HDF_DataspaceID );
+  H5Dclose( HDF_DatasetID );
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFReadDataset( const std::string Filename, const std::string ObjName, std::vector<T> &Data )
+{
+ 
+  hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID;
+  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( HDF_DatasetID < 0 ){
+	  std::stringstream ss;
+	  ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+	  Data.clear();
+	  H5Fclose( HDF_FileID );
+	  throw HDFException(ss.str());
+	  return;
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
+
+  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
+  
+  hsize_t dimsize[ndims];
+
+  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+
+  HDF_StorageSize = 1;
+  for(int i=0; i<ndims; ++i )
+    HDF_StorageSize *= dimsize[i];
+
+  //... adjust the array size to hold the data
+  Data.clear();
+  Data.reserve( HDF_StorageSize );
+  Data.assign( HDF_StorageSize, (T)1 );
+
+  if( Data.capacity() < HDF_StorageSize ){
+    std::cerr << " - Error: not enough memory to store all data in HDFReadDataset!\n\n";
+    abort();
+  }
+
+  //... read the dataset
+  H5Dread( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, &Data[0] );
+
+  if( Data.size() != HDF_StorageSize ){
+    std::cerr << " - Error: something went wrong while reading!\n\n";
+    abort();
+  }
+
+  H5Sclose( HDF_DataspaceID );
+  H5Dclose( HDF_DatasetID );
+  H5Fclose( HDF_FileID );
+}
+
+template<typename T >
+inline void HDFReadSelect( const std::string Filename, const std::string ObjName, const std::vector<unsigned>& ii, std::vector<T> &Data ){
+
+  hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID;
+  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( HDF_DatasetID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    Data.clear();
+    H5Fclose( HDF_FileID );
+    throw HDFException(ss.str());
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
+ 
+  hsize_t block[2];
+  block[0]  = ii.size();
+  block[1]  = 1;
+
+
+  Data.clear();
+  Data.reserve( block[0]*block[1] );
+  Data.assign( block[0]*block[1], (T)1 );
+  
+  HDF_MemspaceID = H5Screate_simple( 2, block, NULL );
+  //  H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block );
+  H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, ii.size(), (const hsize_t *)&ii[0] );
+
+  H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] );
+  
+  H5Sclose( HDF_DataspaceID );
+  H5Sclose( HDF_MemspaceID );
+  H5Dclose( HDF_DatasetID );
+  H5Fclose( HDF_FileID );
+
+}
+
+template<typename T >
+inline void HDFReadVectorSelect( const std::string Filename, const std::string ObjName, const std::vector<unsigned>& ii, std::vector<T> &Data ){
+
+  hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID;
+//  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( HDF_DatasetID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    Data.clear();
+    H5Fclose( HDF_FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
+  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
+  hsize_t dimsize[ndims];
+  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+
+  hsize_t block[2];
+  block[0]  = ii.size();
+  block[1]  = 3;
+  
+  std::vector<hsize_t> coord;
+  for( unsigned i=0; i<ii.size(); ++i )
+    for( unsigned k=0; k<3; ++k ){
+      coord.push_back(ii[i]);
+      coord.push_back(k);
+    }
+  //std::vector<unsigned>().swap(ii);
+  
+
+  
+
+  if( ii.size() == 0 ){
+    std::cerr << "attempted to read empty block. skipping....\n"; 
+    return;
+  }
+  //std::cerr << "starting 2 read...\n";
+  H5Sselect_none( HDF_DataspaceID );
+  if( H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, coord.size()/2, (const hsize_t *)&coord[0] ) < 0 )//(const hsize_t**)&coord[0] ) < 0 )
+    std::cerr << " - could not select elements properly\n";
+
+  if(H5Sselect_valid( HDF_DataspaceID )<=0 ){
+    std::cerr << "\n - sorry, invalid element selection in file \'"<< Filename.c_str() << "\'. \n - dumping 10 first indices...\n";
+
+    /*for( unsigned i=0; i<10; ++i ){
+      for( unsigned k=0; k<3; ++k ){
+        std::cerr << coord[3*i+k] << " ";
+      }
+      std::cerr << "\n";
+    }*/
+
+    return;
+  }
+
+  std::vector<hsize_t>().swap(coord);
+  Data.assign( block[0]*block[1], (T)0 );
+  HDF_MemspaceID = H5Screate_simple( 2, &block[0], NULL );
+
+  H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] );
+  
+
+  H5Sclose( HDF_DataspaceID );
+  H5Sclose( HDF_MemspaceID );
+  H5Dclose( HDF_DatasetID );
+  H5Fclose( HDF_FileID );
+
+}
+
+template< typename T >
+inline void HDFReadVectorSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector<T> &Data )
+{
+  hsize_t 
+      offset[2],
+      stride[2],
+      count[2],
+      block[2];
+    
+  hid_t MemspaceID, FilespaceID, DatasetID, FileID;
+  hid_t Type =  GetDataType<T>();
+  
+  FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+
+ //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  DatasetID =  H5Dopen( FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( DatasetID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    Data.clear();
+    H5Fclose( FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+  FilespaceID = H5Dget_space( DatasetID );
+  
+  offset[0] = nStart;
+  offset[1] = 0;
+  
+  count[0]  = 1;
+  count[1]  = 1;
+  
+  stride[0] = 1;
+  stride[1] = 1;
+  
+  block[0]  = nCount;
+  block[1]  = 3;
+
+
+  Data.clear();
+  Data.reserve( block[0]*block[1] );
+  Data.assign( block[0]*block[1], (T)1 );
+  
+  MemspaceID = H5Screate_simple( 2, block, NULL );
+  H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block );
+
+  H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] );
+
+  H5Sclose( FilespaceID );
+  H5Sclose( MemspaceID );
+  H5Dclose( DatasetID );
+  H5Fclose( FileID );
+}
+
+template< typename T >
+inline void HDFReadDatasetSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector<T> &Data )
+{
+  hsize_t 
+      offset[2],
+      stride[2],
+      count[2],
+      block[2];
+    
+  hid_t MemspaceID, FilespaceID, DatasetID, FileID;
+  hid_t Type =  GetDataType<T>();
+  
+  FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+
+ //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  DatasetID =  H5Dopen( FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( DatasetID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    Data.clear();
+    H5Fclose( FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+  FilespaceID = H5Dget_space( DatasetID );
+  
+  offset[0] = nStart;
+  offset[1] = 0;
+  
+  count[0]  = 1;
+  count[1]  = 1;
+  
+  stride[0] = 1;
+  stride[1] = 1;
+  
+  block[0]  = nCount;
+  block[1]  = 1;
+
+
+  Data.clear();
+  Data.reserve( block[0]*block[1] );
+  Data.assign( block[0]*block[1], (T)1 );
+  
+  MemspaceID = H5Screate_simple( 2, block, NULL );
+  H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block );
+
+  H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] );
+
+  H5Sclose( FilespaceID );
+  H5Sclose( MemspaceID );
+  H5Dclose( DatasetID );
+  H5Fclose( FileID );
+}
+
+template< typename T>
+inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T &Data )
+{
+
+  hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID;
+  //  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... attempt to open attribute
+  
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() );
+
+  if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    H5Fclose( HDF_FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+
+  H5Aread( HDF_AttributeID, HDF_Type, &Data );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+
+  H5Aclose( HDF_AttributeID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+
+}
+
+template< typename T>
+inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, std::vector<T> &Data )
+{
+
+  hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID, HDF_DataspaceID;
+  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... attempt to open attribute
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() );
+
+  if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    H5Fclose( HDF_FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Aget_space( HDF_AttributeID );
+
+  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
+  
+  hsize_t dimsize[ndims];
+
+  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+
+  HDF_StorageSize = 1;
+  for(int i=0; i<ndims; ++i )
+    HDF_StorageSize *= dimsize[i];
+
+  //... adjust the array size to hold the data
+  Data.clear();
+  Data.reserve( HDF_StorageSize );
+  Data.assign( HDF_StorageSize, (T)1 );
+
+  H5Aread( HDF_AttributeID, HDF_Type, &Data[0] );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  H5Aclose( HDF_AttributeID );
+  H5Sclose( HDF_DataspaceID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteDataset( const std::string Filename, const std::string ObjName, const std::vector<T> &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims;
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims                = Data.size();
+  HDF_DataspaceID         = H5Screate_simple(1, &HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, &Data[0] );
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteGroupDataset( const std::string Filename, const std::string GrpName, const std::string ObjName, const std::vector<T> &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_GroupID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims;
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  
+  HDF_GroupID = H5Gopen( HDF_FileID, GrpName.c_str() );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims                = Data.size();
+  HDF_DataspaceID         = H5Screate_simple(1, &HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_GroupID, ObjName.c_str(), HDF_Type,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, &Data[0] );
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+  
+  H5Gclose( HDF_GroupID );
+
+  H5Fclose( HDF_FileID );
+}
+
+
+template< typename T >
+inline void HDFWriteDataset2D( const std::string Filename, const std::string ObjName, const std::vector<  std::vector<T> > &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims[2];
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims[0]             = Data.size();
+  HDF_Dims[1]             = Data[0].size();
+  HDF_DataspaceID         = H5Screate_simple(2, HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+
+  T *tmp = new T[HDF_Dims[0]*HDF_Dims[1]];
+
+  unsigned k=0;
+  for(unsigned i=0; i<HDF_Dims[0]; ++i )
+    for(unsigned j=0; j<HDF_Dims[1]; ++j ){
+      tmp[k++] = (Data[i])[j];
+    }
+
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, tmp );
+
+  delete[] tmp;
+
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteDataset3D( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims[3];
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims[0]             = nd[0];
+  HDF_Dims[1]             = nd[1];
+  HDF_Dims[2]             = nd[2];
+  
+  //std::cerr << nd[0]<<nd[1]<<nd[2]<<"\n";
+  HDF_DataspaceID         = H5Screate_simple(3, HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, &Data[0] );
+
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteDataset3Ds( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data )
+{
+	
+	hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+	
+	hsize_t HDF_Dims[4];
+	
+	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+	
+	HDF_Type                = GetDataType<T>();
+	
+	HDF_Dims[0]             = 1;
+	HDF_Dims[1]             = nd[0];
+	HDF_Dims[2]             = nd[1];
+	HDF_Dims[3]             = nd[2];
+	
+	//std::cerr << nd[0]<<nd[1]<<nd[2]<<"\n";
+	HDF_DataspaceID         = H5Screate_simple(4, HDF_Dims, NULL);
+	HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
+										HDF_DataspaceID, H5P_DEFAULT );
+	
+	H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+			 H5P_DEFAULT, &Data[0] );
+	
+	H5Dclose( HDF_DatasetID );
+	H5Sclose( HDF_DataspaceID );
+	
+	H5Fclose( HDF_FileID );
+}
+
+
+template< typename T >
+inline void HDFWriteDatasetVector( const std::string Filename, const std::string ObjName, const std::vector<T> &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims[2];
+
+  //  hsize_t HDF_Dims;
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims[0]             = (hsize_t)(Data.size()/3);
+  HDF_Dims[1]             = 3;
+
+  if( Data.size() % 3 != 0 ){
+    std::cerr << " - Warning: Trying to write vector data in HDFWriteDatasetVector\n"
+              << "            but array length not divisible by 3!\n\n";
+
+  }
+
+  HDF_DataspaceID         = H5Screate_simple(2, HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), H5T_NATIVE_FLOAT,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, &Data[0] );
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+
+  H5Fclose( HDF_FileID );
+}
+
+inline void HDFCreateGroup( const std::string Filename, const std::string GroupName )
+{
+	hid_t HDF_FileID, HDF_GroupID;
+
+	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+	HDF_GroupID = H5Gcreate( HDF_FileID, GroupName.c_str(), 0 );
+	H5Gclose( HDF_GroupID );
+	H5Fclose( HDF_FileID );
+
+}
+
+inline void HDFCreateSubGroup( const std::string Filename, const std::string SuperGroupName, const std::string GroupName )
+{
+	hid_t HDF_FileID, HDF_GroupID, HDF_SuperGroupID;
+
+	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  HDF_SuperGroupID = H5Gopen( HDF_FileID, SuperGroupName.c_str() );
+	HDF_GroupID = H5Gcreate( HDF_SuperGroupID, GroupName.c_str(), 0 );
+	H5Gclose( HDF_GroupID );
+  H5Gclose( HDF_SuperGroupID );
+	H5Fclose( HDF_FileID );
+
+}
+
+template< typename T >
+inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, const std::vector< T > &Data )
+{
+ hid_t    HDF_FileID,    
+	  HDF_GroupID,    
+	  HDF_AttributeID,    
+	  HDF_DataspaceID,    
+	  HDF_DatatypeID;
+ 
+ hsize_t  HDF_Dims;
+ 
+ HDF_DatatypeID = GetDataType<T>();  
+ 
+ HDF_Dims = (hsize_t)(Data.size());
+
+ 
+ HDF_FileID      = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );  
+ HDF_GroupID     = H5Gopen( HDF_FileID, GroupName.c_str() );  
+ HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL);
+
+ HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT);  
+ H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] );  
+ H5Aclose( HDF_AttributeID );  
+ H5Sclose( HDF_DataspaceID );  
+ H5Gclose( HDF_GroupID );  
+ H5Fclose( HDF_FileID ); 
+}
+
+template< typename T >
+inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, const std::vector< T > &Data )
+{
+	hid_t    HDF_FileID,    
+	HDF_DatasetID,    
+	HDF_AttributeID,    
+	HDF_DataspaceID,    
+	HDF_DatatypeID;
+	
+	hsize_t  HDF_Dims;
+	
+	HDF_DatatypeID = GetDataType<T>();  
+	
+	HDF_Dims = (hsize_t)(Data.size());
+	
+	
+	HDF_FileID      = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );  
+	HDF_DatasetID   = H5Dopen( HDF_FileID, DatasetName.c_str() );  
+	HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL);
+	
+	HDF_AttributeID = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT);  
+	H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] );  
+	H5Aclose( HDF_AttributeID );  
+	H5Sclose( HDF_DataspaceID );  
+	H5Dclose( HDF_DatasetID );  
+	H5Fclose( HDF_FileID ); 
+}
+
+
+template< typename T >
+inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T
+Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_GroupID,
+    HDF_AttributeID,
+    HDF_DataspaceID,
+    HDF_DatatypeID;
+
+  HDF_DatatypeID = GetDataType<T>();
+
+  
+  
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  HDF_DataspaceID         = H5Screate(H5S_SCALAR);
+  HDF_AttributeID         = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID,
+                                      HDF_DataspaceID, H5P_DEFAULT);
+  H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data );
+  H5Aclose( HDF_AttributeID );
+  H5Sclose( HDF_DataspaceID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, T Data )
+{
+	
+	hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_AttributeID,
+    HDF_DataspaceID,
+    HDF_DatatypeID;
+	
+	HDF_DatatypeID = GetDataType<T>();
+	
+	
+	
+	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+	HDF_DatasetID = H5Dopen( HDF_FileID, DatasetName.c_str() );
+	HDF_DataspaceID         = H5Screate(H5S_SCALAR);
+	HDF_AttributeID         = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID,
+										HDF_DataspaceID, H5P_DEFAULT);
+	H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data );
+	H5Aclose( HDF_AttributeID );
+	H5Sclose( HDF_DataspaceID );
+	H5Dclose( HDF_DatasetID );
+	H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteSubGroupAttribute( const std::string Filename, const std::string GroupName,  const std::string SubGroupName, const std::string ObjName, T
+Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_GroupID,
+    HDF_SubGroupID,
+    HDF_AttributeID,
+    HDF_DataspaceID,
+    HDF_DatatypeID;
+
+  HDF_DatatypeID = GetDataType<T>();
+
+  
+  
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  std::cerr << "opening " << GroupName.c_str() << std::endl;
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  std::cerr << "opening " << SubGroupName.c_str() << std::endl;
+  HDF_SubGroupID = H5Gopen( HDF_GroupID, SubGroupName.c_str() );
+  HDF_DataspaceID         = H5Screate(H5S_SCALAR);
+  HDF_AttributeID         = H5Acreate(HDF_SubGroupID, ObjName.c_str(), HDF_DatatypeID,
+                                      HDF_DataspaceID, H5P_DEFAULT);
+  H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data );
+  H5Aclose( HDF_AttributeID );
+  H5Sclose( HDF_DataspaceID );
+  H5Gclose( HDF_SubGroupID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+}
+
+template<>
+inline void HDFWriteGroupAttribute<std::string>( const std::string Filename, const std::string GroupName, const std::string ObjName, std::string Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_GroupID,
+    HDF_AttributeID,
+    HDF_DataspaceID,
+    HDF_DatatypeID;
+
+  HDF_DatatypeID = H5Tcopy( H5T_C_S1 );
+
+  H5Tset_size( HDF_DatatypeID, Data.size() );
+  H5Tset_strpad(HDF_DatatypeID, H5T_STR_NULLPAD);
+  
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  HDF_DataspaceID         = H5Screate(H5S_SCALAR);
+  HDF_AttributeID         = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID,
+                                      HDF_DataspaceID, H5P_DEFAULT);
+  H5Awrite( HDF_AttributeID, HDF_DatatypeID, Data.c_str() );
+  H5Aclose( HDF_AttributeID );
+  H5Sclose( HDF_DataspaceID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+}
+#endif
diff --git a/plugins/output_enzo.cc b/plugins/output_enzo.cc
new file mode 100644
index 0000000..a66928b
--- /dev/null
+++ b/plugins/output_enzo.cc
@@ -0,0 +1,396 @@
+/*
+ 
+ output_enzo.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#ifdef HAVE_HDF5
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "output.hh"
+
+#include "HDF_IO.hh"
+
+class enzo_output_plugin : public output_plugin
+{
+protected:
+	
+	struct patch_header{
+		int component_rank;
+		int component_size;
+		std::vector<int> dimensions;
+		int rank;
+		std::vector<int> top_grid_dims;
+		std::vector<int> top_grid_end;
+		std::vector<int> top_grid_start;
+	};
+	
+	struct sim_header{
+		std::vector<int> dimensions;
+		std::vector<int> offset;
+		float a_start;
+		float dx;
+		float h0;
+		float omega_b;
+		float omega_m;
+		float omega_v;
+		float vfact;
+	};
+	
+	
+	sim_header the_sim_header;
+	
+	void write_sim_header( std::string fname, const sim_header& h )
+	{
+		HDFWriteGroupAttribute( fname, "/", "Dimensions", h.dimensions );
+		HDFWriteGroupAttribute( fname, "/", "Offset", h.offset );
+		HDFWriteGroupAttribute( fname, "/", "a_start", h.a_start );
+		HDFWriteGroupAttribute( fname, "/", "dx", h.dx );
+		HDFWriteGroupAttribute( fname, "/", "h0", h.h0 );
+		HDFWriteGroupAttribute( fname, "/", "omega_b", h.omega_b );
+		HDFWriteGroupAttribute( fname, "/", "omega_m", h.omega_m );
+		HDFWriteGroupAttribute( fname, "/", "omega_v", h.omega_v );
+		HDFWriteGroupAttribute( fname, "/", "vfact", h.vfact );
+	}
+	
+	void write_patch_header( std::string fname, std::string dsetname, const patch_header& h )
+	{
+		HDFWriteDatasetAttribute( fname, dsetname, "Component_Rank", h.component_rank );
+		HDFWriteDatasetAttribute( fname, dsetname, "Component_Size", h.component_size );
+		HDFWriteDatasetAttribute( fname, dsetname, "Dimensions", h.dimensions );
+		HDFWriteDatasetAttribute( fname, dsetname, "Rank", h.rank );
+		HDFWriteDatasetAttribute( fname, dsetname, "TopGridDims", h.top_grid_dims );
+		HDFWriteDatasetAttribute( fname, dsetname, "TopGridEnd", h.top_grid_end );
+		HDFWriteDatasetAttribute( fname, dsetname, "TopGridStart", h.top_grid_start );
+	}
+	
+	
+	void dump_grid_data( std::string fieldname, const grid_hierarchy& gh, double factor = 1.0, double add = 0.0 )
+	{
+		char enzoname[256], filename[256];
+		
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			std::vector<int> ng;
+			ng.push_back( gh.get_grid(ilevel)->size(0) );
+			ng.push_back( gh.get_grid(ilevel)->size(1) );
+			ng.push_back( gh.get_grid(ilevel)->size(2) );
+			
+			//... need to copy data because we need to get rid of the ghost zones
+			std::vector<double> data;
+			data.reserve( ng[0]*ng[1]*ng[2] );
+			
+			for( int k=0; k<ng[2]; ++k )
+				for( int j=0; j<ng[1]; ++j )
+					for( int i=0; i<ng[0]; ++i )
+						data.push_back( (add+(*gh.get_grid(ilevel))(i,j,k))*factor );
+			
+			sprintf( enzoname, "%s.%d", fieldname.c_str(), ilevel-levelmin_ );
+			sprintf( filename, "%s/%s", fname_.c_str(), enzoname );
+			
+			HDFCreateFile( filename );
+			write_sim_header( filename, the_sim_header );
+			HDFWriteDataset3Ds( filename, enzoname, reinterpret_cast<unsigned*>(&ng[0]), data );
+			
+			//... header data for the patch
+			patch_header ph;
+			
+			ph.component_rank	= 1;
+			ph.component_size	= ng[0]*ng[1]*ng[2];
+			ph.dimensions		= ng;
+			ph.rank				= 3;
+			
+			ph.top_grid_dims.assign(3,(int)pow(2,levelmin_));
+			
+			//... offset_abs is in units of the current level cell size
+			
+			double rfac = 1.0/pow(2,ilevel-levelmin_);
+			
+			ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 0)*rfac) );
+			ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 1)*rfac) );
+			ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 2)*rfac) );
+			 
+			ph.top_grid_end.push_back( ph.top_grid_start[0] + (int)(ng[0]*rfac) );
+			ph.top_grid_end.push_back( ph.top_grid_start[1] + (int)(ng[1]*rfac) );
+			ph.top_grid_end.push_back( ph.top_grid_start[2] + (int)(ng[2]*rfac) );
+			
+			write_patch_header( filename, enzoname, ph );
+		}
+	}
+	
+public:
+	
+	enzo_output_plugin( config_file& cf )
+	: output_plugin( cf )
+	{ 
+		if( mkdir( fname_.c_str(), 0777 ) )
+		{
+			perror( fname_.c_str() );
+			throw std::runtime_error("Error in enzo_output_plugin!");
+		}
+		
+		bool bhave_hydro = cf_.getValue<bool>("setup","baryons");
+		
+		the_sim_header.dimensions.push_back( (int)pow(2,levelmin_) );
+		the_sim_header.dimensions.push_back( (int)pow(2,levelmin_) );
+		the_sim_header.dimensions.push_back( (int)pow(2,levelmin_) );
+		
+		the_sim_header.offset.push_back( 0 );
+		the_sim_header.offset.push_back( 0 );
+		the_sim_header.offset.push_back( 0 );
+		
+		the_sim_header.a_start		= 1.0/(1.0+cf.getValue<double>("setup","zstart"));
+		the_sim_header.dx			= cf.getValue<double>("setup","boxlength")/the_sim_header.dimensions[0]/(cf.getValue<double>("cosmology","H0")*0.01); // not sure?!?
+		the_sim_header.h0			= cf.getValue<double>("cosmology","H0")*0.01;
+		
+		if( bhave_hydro )
+			the_sim_header.omega_b		= cf.getValue<double>("cosmology","Omega_b");
+		else
+			the_sim_header.omega_b		= 0.0;
+		
+		the_sim_header.omega_m		= cf.getValue<double>("cosmology","Omega_m");
+		the_sim_header.omega_v		= cf.getValue<double>("cosmology","Omega_L");
+		the_sim_header.vfact		= cf.getValue<double>("cosmology","vfact")*the_sim_header.h0;   //.. need to multiply by h, ENZO wants this factor for non h-1 units
+		
+	}
+	
+	~enzo_output_plugin()
+	{ }
+	
+	void write_dm_mass( const grid_hierarchy& gh )
+	{	/* do nothing, not needed */	}
+	
+	
+	void write_dm_density( const grid_hierarchy& gh )
+	{	/* do nothing, not needed */	
+	
+		bool bhave_hydro = cf_.getValue<bool>("setup","baryons");
+		char filename[256];
+		unsigned nbase = (unsigned)pow(2,levelmin_);
+		
+		sprintf( filename, "%s/parameter_file.txt", fname_.c_str() );
+		
+		std::ofstream ofs( filename, std::ios::trunc );
+		
+		ofs
+		<< "#\n"
+		<< "ProblemType                = 30      // cosmology simulation\n"
+		<< "TopGridRank                = 3\n"
+		<< "TopGridDimensions          = " << nbase << " " << nbase << " " << nbase << "\n"
+		<< "SelfGravity                = 1       // gravity on\n"
+		<< "TopGridGravityBoundary     = 0       // Periodic BC for gravity\n"
+		<< "LeftFaceBoundaryCondition  = 3 3 3   // same for fluid\n"
+		<< "RightFaceBoundaryCondition = 3 3 3\n"
+		<< "\n"
+		<< "#\n";
+		
+		if( bhave_hydro )
+			ofs
+			<< "CosmologySimulationOmegaBaryonNow       = " << the_sim_header.omega_b << "\n"
+			<< "CosmologySimulationOmegaCDMNow          = " << the_sim_header.omega_m-the_sim_header.omega_b << "\n";
+		else
+			ofs
+			<< "CosmologySimulationOmegaBaryonNow       = " << 0.0 << "\n"
+			<< "CosmologySimulationOmegaCDMNow          = " << the_sim_header.omega_m << "\n";
+		
+		if( bhave_hydro )
+			ofs
+			<< "CosmologySimulationDensityName          = GridDensity\n"
+			<< "CosmologySimulationVelocity1Name        = GridVelocities_x\n"
+			<< "CosmologySimulationVelocity2Name        = GridVelocities_y\n"
+			<< "CosmologySimulationVelocity3Name        = GridVelocities_z\n";
+		
+		ofs
+		<< "CosmologySimulationCalculatePositions    = 1\n"
+		<< "CosmologySimulationParticleVelocity1Name = ParticleVelocities_x\n"
+		<< "CosmologySimulationParticleVelocity2Name = ParticleVelocities_y\n"
+		<< "CosmologySimulationParticleVelocity3Name = ParticleVelocities_z\n"
+		<< "\n"
+		<< "#\n"
+		<< "#  define cosmology parameters\n"
+		<< "#\n"
+		<< "ComovingCoordinates        = 1       // Expansion ON\n"
+		<< "CosmologyOmegaMatterNow    = " << the_sim_header.omega_m << "\n"
+		<< "CosmologyOmegaLambdaNow    = " << the_sim_header.omega_v << "\n"
+		<< "CosmologyHubbleConstantNow = " << the_sim_header.h0 << "     // in 100 km/s/Mpc\n"
+		<< "CosmologyComovingBoxSize   = " << cf_.getValue<double>("setup","boxlength") << "    // in Mpc/h\n"
+		<< "CosmologyMaxExpansionRate  = 0.015   // maximum allowed delta(a)/a\n"
+		<< "CosmologyInitialRedshift   = " << cf_.getValue<double>("setup","zstart") << "      //\n"
+		<< "CosmologyFinalRedshift     = 0       //\n"
+		<< "GravitationalConstant      = 1       // this must be true for cosmology\n"
+		<< "#\n"
+		<< "#\n"
+		<< "CosmologySimulationNumberOfInitialGrids = " << 1+levelmax_-levelmin_ << "\n";
+		
+		
+		//... only for additionally refined grids
+		for( unsigned ilevel = 0; ilevel< levelmax_-levelmin_; ++ilevel )
+		{
+			double h = 1.0/pow(2,levelmin_+1+ilevel);
+			
+			ofs
+			
+			<< "CosmologySimulationGridDimension[" << 1+ilevel << "] = " 
+				<< gh.size( levelmin_+ilevel+1, 0 ) << " "
+				<< gh.size( levelmin_+ilevel+1, 1 ) << " "
+				<< gh.size( levelmin_+ilevel+1, 2 ) << "\n"
+			
+			<< "CosmologySimulationGridLeftEdge[" << 1+ilevel << "] = "
+				<< h*gh.offset_abs(levelmin_+ilevel+1, 0) << " "
+				<< h*gh.offset_abs(levelmin_+ilevel+1, 1) << " "
+				<< h*gh.offset_abs(levelmin_+ilevel+1, 2) << "\n"
+			
+			<< "CosmologySimulationGridRightEdge[" << 1+ilevel << "] = "
+				<< h*(gh.offset_abs(levelmin_+ilevel+1, 0)+gh.size( levelmin_+ilevel+1, 0 )) << " "
+				<< h*(gh.offset_abs(levelmin_+ilevel+1, 1)+gh.size( levelmin_+ilevel+1, 1 )) << " "
+				<< h*(gh.offset_abs(levelmin_+ilevel+1, 2)+gh.size( levelmin_+ilevel+1, 2 )) << "\n"
+			
+			<< "CosmologySimulationGridLevel[" << 1+ilevel << "] = " << 1+ilevel << "\n";
+		}
+		
+		
+		
+		
+		
+	
+	}
+	
+	
+	void write_dm_velocity( int coord, const grid_hierarchy& gh )
+	{
+		char enzoname[256];
+		sprintf( enzoname, "ParticleVelocities_%c", (char)('x'+coord) );
+		
+		double vunit = 1.0/(1.225e2*sqrt(the_sim_header.omega_m/the_sim_header.a_start));// /the_sim_header.h0;
+		
+		dump_grid_data( enzoname, gh, vunit );
+	}
+	
+	
+	void write_dm_position( int coord, const grid_hierarchy& gh )
+	{
+		char fieldname[256];
+		sprintf( fieldname, "ParticlePositions_%c", (char)('x'+coord) );
+		
+		char enzoname[256], filename[256];
+		
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			std::vector<int> ng;
+			ng.push_back( gh.get_grid(ilevel)->size(0) );
+			ng.push_back( gh.get_grid(ilevel)->size(1) );
+			ng.push_back( gh.get_grid(ilevel)->size(2) );
+			
+			//... need to copy data because we need to get rid of the ghost zones
+			std::vector<double> data;
+			data.reserve( ng[0]*ng[1]*ng[2] );
+
+			for( int k=0; k<ng[2]; ++k )
+				for( int j=0; j<ng[1]; ++j )
+					for( int i=0; i<ng[0]; ++i )
+					{
+						double xx[3];
+						gh.cell_pos(ilevel, i, j, k, xx);
+						data.push_back( fmod( 1.0 + xx[coord] + (*gh.get_grid(ilevel))(k,j,i), 1.0 ) );
+					}
+						
+			//--------------------------------------------------------------------------
+			
+			sprintf( enzoname, "%s.%d", fieldname, ilevel-levelmin_ );
+			sprintf( filename, "%s/%s", fname_.c_str(), enzoname );
+			
+			HDFCreateFile( filename );
+			write_sim_header( filename, the_sim_header );
+			HDFWriteDataset3Ds( filename, enzoname, reinterpret_cast<unsigned*>(&ng[0]), data );
+			
+			//--------------------------------------------------------------------------
+			//... header data for the patch
+			patch_header ph;
+			
+			ph.component_rank	= 1;
+			ph.component_size	= ng[0]*ng[1]*ng[2];
+			ph.dimensions		= ng;
+			ph.rank				= 3;
+			
+			ph.top_grid_dims.assign(3,(int)pow(2,levelmin_));
+			
+			//... offset_abs is in units of the current level cell size
+			
+			double rfac = 1.0/pow(2,ilevel-levelmin_);
+			
+			ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 0)*rfac) );
+			ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 1)*rfac) );
+			ph.top_grid_start.push_back( (int)(gh.offset_abs(ilevel, 2)*rfac) );
+			
+			ph.top_grid_end.push_back( ph.top_grid_start[0] + (int)(ng[0]*rfac) );
+			ph.top_grid_end.push_back( ph.top_grid_start[1] + (int)(ng[1]*rfac) );
+			ph.top_grid_end.push_back( ph.top_grid_start[2] + (int)(ng[2]*rfac) );
+			
+			write_patch_header( filename, enzoname, ph );
+			//--------------------------------------------------------------------------
+		}
+	}
+	
+	void write_dm_potential( const grid_hierarchy& gh )
+	{ }
+	
+	void write_gas_potential( const grid_hierarchy& gh )
+	{ }
+	
+	
+	void write_gas_velocity( int coord, const grid_hierarchy& gh )
+	{
+		double vunit = 1.0/(1.225e2*sqrt(the_sim_header.omega_m/the_sim_header.a_start));
+		
+		char enzoname[256];
+		sprintf( enzoname, "GridVelocities_%c", (char)('x'+coord) );
+		dump_grid_data( enzoname, gh, vunit );
+	}
+	
+	
+	void write_gas_position( int coord, const grid_hierarchy& gh )
+	{	
+		/* do nothing, not needed */	
+	}
+	
+	
+	void write_gas_density( const grid_hierarchy& gh )
+	{
+		
+		char enzoname[256];
+		sprintf( enzoname, "GridDensity" );
+		dump_grid_data( enzoname, gh, the_sim_header.omega_b/the_sim_header.omega_m, 1.0 );
+	}
+	
+	
+	void finalize( void )
+	{	}
+		
+	
+};
+
+namespace{
+	output_plugin_creator_concrete<enzo_output_plugin> creator("enzo");
+}
+
+#endif
+
diff --git a/plugins/output_gadget2.cc b/plugins/output_gadget2.cc
new file mode 100644
index 0000000..1e538ca
--- /dev/null
+++ b/plugins/output_gadget2.cc
@@ -0,0 +1,670 @@
+/*
+ 
+ output_gadget2.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#include <fstream>
+#include "output.hh"
+
+template< typename T_store=float >
+class gadget2_output_plugin : public output_plugin
+{
+protected:
+	
+	std::ofstream ofs_;
+	bool bmultimass_;
+	
+	
+	typedef struct io_header
+	{
+		int npart[6];                        
+		double mass[6];                      
+		double time;                         
+		double redshift;                     
+		int flag_sfr;                        
+		int flag_feedback;                   
+		unsigned int npartTotal[6];          
+		int flag_cooling;                    
+		int num_files;                       
+		double BoxSize;                      
+		double Omega0;                       
+		double OmegaLambda;                  
+		double HubbleParam;                  
+		int flag_stellarage;                 
+		int flag_metals;                     
+		unsigned int npartTotalHighWord[6];  
+		int  flag_entropy_instead_u;         
+		char fill[60];                       
+	}header;                       
+	
+	
+	header header_;
+	
+	std::string fname;
+	
+	enum iofields {
+		id_dm_mass, id_dm_vel, id_dm_pos, id_gas_vel, id_gas_rho, id_gas_temp
+	};
+	
+	unsigned block_buf_size_;
+	//bool bbndparticles_;
+	bool bmorethan2bnd_;
+	
+	
+	void assemble_scalar( unsigned nptot, std::string ifname )
+	{
+		T_store *tmp;
+		
+		std::ifstream 
+		ifs( ifname.c_str(), std::ios::binary );
+		
+		int
+		npleft = nptot, 
+		n2read = std::min((int)block_buf_size_,npleft);
+		
+		tmp = new T_store[block_buf_size_];
+		
+		std::vector<T_store> adata;
+		adata.reserve( block_buf_size_ );
+		
+		unsigned blk;
+		ifs.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store) ){
+			delete[] tmp;
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		}
+		
+		npleft  = nptot;
+		n2read  = std::min((int)block_buf_size_,npleft);
+		int blksize = nptot*sizeof(T_store);
+		
+		ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+		while( n2read > 0 )
+		{
+			ifs.read( reinterpret_cast<char*>(&tmp[0]), n2read*sizeof(T_store) );
+			ofs_.write( reinterpret_cast<char*>(&tmp[0]), n2read*sizeof(T_store) );
+			npleft -= n2read;
+			n2read = std::min( (int)block_buf_size_,npleft );
+		}
+		ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+		
+		remove( fname.c_str() );
+		
+		
+	}
+	
+	void assemble_vector( unsigned nptot, std::string fname1, std::string fname2,std::string fname3 )
+	{
+		T_store *tmp1, *tmp2, *tmp3;
+
+		std::ifstream 
+			ifs1( fname1.c_str(), std::ios::binary ),	
+			ifs2( fname2.c_str(), std::ios::binary ),	
+			ifs3( fname3.c_str(), std::ios::binary );
+		
+		int
+			npleft = nptot, 
+			n2read = std::min((int)block_buf_size_,npleft);
+		
+		
+		tmp1 = new T_store[block_buf_size_];
+		tmp2 = new T_store[block_buf_size_];
+		tmp3 = new T_store[block_buf_size_];
+		
+		std::vector<T_store> adata3;
+		adata3.reserve( 3*block_buf_size_ );
+		
+		unsigned blk;
+		ifs1.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store) ){
+			delete[] tmp1;
+			delete[] tmp2;
+			delete[] tmp3;
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		}
+			
+		
+		ifs2.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store) )
+		{	
+			delete[] tmp1;
+			delete[] tmp2;
+			delete[] tmp3;
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		}
+		
+		ifs3.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store) )
+		{	
+			delete[] tmp1;
+			delete[] tmp2;
+			delete[] tmp3;
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		}
+		
+		
+		//int blksize = 3*nptot*sizeof(T_store);
+		//ofs_.write( (char *)&blksize, sizeof(int) );
+		
+		while( n2read > 0 )
+		{
+			//ifs1.read( (char*)&tmp1[0], n2read*sizeof(T_store) );
+			ifs1.read( reinterpret_cast<char*>(&tmp1[0]), n2read*sizeof(T_store) );
+			ifs2.read( reinterpret_cast<char*>(&tmp2[0]), n2read*sizeof(T_store) );
+			ifs3.read( reinterpret_cast<char*>(&tmp3[0]), n2read*sizeof(T_store) );
+			
+			for( int i=0; i<n2read; ++i )
+			{
+				adata3.push_back( tmp1[i] );
+				adata3.push_back( tmp2[i] );
+				adata3.push_back( tmp3[i] );
+			}
+			ofs_.write( reinterpret_cast<char*>(&adata3[0]), 3*n2read*sizeof(T_store) );
+			
+			adata3.clear();
+			npleft -= n2read;
+			n2read = std::min( (int)block_buf_size_,npleft );
+		}
+		//ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+		
+		remove( fname1.c_str() );
+		remove( fname2.c_str() );
+		remove( fname3.c_str() );
+		
+		
+		delete[] tmp1;
+		delete[] tmp2;
+		delete[] tmp3;
+		
+	}
+	
+	
+	void assemble_gadget_file( void )
+	{
+		int blksize = sizeof(header);
+		
+		//... write the header .......................................................
+		ofs_.write( (char *)&blksize, sizeof(int) );
+		ofs_.write( (char *)&header_, sizeof(header) );
+		ofs_.write( (char *)&blksize, sizeof(int) );
+		
+		//............................................................................
+		//... copy from the temporary files, interleave the data and save ............
+		
+		char fnx[256],fny[256],fnz[256],fnvx[256],fnvy[256],fnvz[256],fnm[256];
+		
+		sprintf( fnx,  "___ic_temp_%05d.bin", 100*id_dm_pos+0 );
+		sprintf( fny,  "___ic_temp_%05d.bin", 100*id_dm_pos+1 );
+		sprintf( fnz,  "___ic_temp_%05d.bin", 100*id_dm_pos+2 );
+		sprintf( fnvx, "___ic_temp_%05d.bin", 100*id_dm_vel+0 );
+		sprintf( fnvy, "___ic_temp_%05d.bin", 100*id_dm_vel+1 );
+		sprintf( fnvz, "___ic_temp_%05d.bin", 100*id_dm_vel+2 );
+		sprintf( fnm,  "___ic_temp_%05d.bin", 100*id_dm_mass  );
+		
+		std::ifstream 
+		ifs1( fnx, std::ios::binary ),	
+		ifs2( fny, std::ios::binary ),	
+		ifs3( fnz, std::ios::binary );
+		
+		
+		const int 
+		nptot = header_.npart[1]+header_.npart[5];
+		int
+		npleft = nptot, 
+		n2read = std::min((int)block_buf_size_,npleft);
+		
+		
+		//... particle coordinates ..................................................
+		unsigned blk;
+		ifs1.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store) )
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		
+		ifs2.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store) )
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		
+		ifs3.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store) )
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		
+		std::vector<T_store> adata3;
+		adata3.reserve( 3*block_buf_size_ );
+		T_store *tmp1, *tmp2, *tmp3;
+		
+		tmp1 = new T_store[block_buf_size_];
+		tmp2 = new T_store[block_buf_size_];
+		tmp3 = new T_store[block_buf_size_];
+		
+		blksize = 3*nptot*sizeof(T_store);
+		ofs_.write( (char *)&blksize, sizeof(int) );
+		
+		while( n2read > 0 )
+		{
+			//ifs1.read( (char*)&tmp1[0], n2read*sizeof(T_store) );
+			ifs1.read( reinterpret_cast<char*>(&tmp1[0]), n2read*sizeof(T_store) );
+			ifs2.read( reinterpret_cast<char*>(&tmp2[0]), n2read*sizeof(T_store) );
+			ifs3.read( reinterpret_cast<char*>(&tmp3[0]), n2read*sizeof(T_store) );
+			
+			for( int i=0; i<n2read; ++i )
+			{
+				adata3.push_back( tmp1[i] );
+				adata3.push_back( tmp2[i] );
+				adata3.push_back( tmp3[i] );
+			}
+			ofs_.write( reinterpret_cast<char*>(&adata3[0]), 3*n2read*sizeof(T_store) );
+			
+			adata3.clear();
+			npleft -= n2read;
+			n2read = std::min( (int)block_buf_size_,npleft );
+		}
+		ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+		
+		remove( fnx );
+		remove( fny );
+		remove( fnz );
+		
+		//... particle velocities ..................................................
+		ifs1.close(); ifs1.open( fnvx, std::ios::binary );
+		ifs2.close(); ifs2.open( fnvy, std::ios::binary );
+		ifs3.close(); ifs3.open( fnvz, std::ios::binary );
+		
+		ifs1.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store)){
+			delete[] tmp1;
+			delete[] tmp2;
+			delete[] tmp3;
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		}
+		
+		ifs2.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store)){
+			delete[] tmp1;
+			delete[] tmp2;
+			delete[] tmp3;
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		}
+		
+		ifs3.read( (char *)&blk, sizeof(int) );
+		if( blk != nptot*sizeof(T_store)){
+			delete[] tmp1;
+			delete[] tmp2;
+			delete[] tmp3;
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		}
+		
+		npleft = nptot;
+		n2read = std::min((int)block_buf_size_,npleft);
+		
+		
+		ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+		
+		while( n2read > 0 )
+		{
+			ifs1.read( reinterpret_cast<char*>(&tmp1[0]), n2read*sizeof(T_store) );
+			ifs2.read( reinterpret_cast<char*>(&tmp2[0]), n2read*sizeof(T_store) );
+			ifs3.read( reinterpret_cast<char*>(&tmp3[0]), n2read*sizeof(T_store) );
+			
+			for( int i=0; i<n2read; ++i )
+			{
+				adata3.push_back( tmp1[i] );
+				adata3.push_back( tmp2[i] );
+				adata3.push_back( tmp3[i] );
+			}
+			
+			ofs_.write( reinterpret_cast<char*>(&adata3[0]), 3*n2read*sizeof(T_store) );
+			
+			adata3.clear();
+			npleft -= n2read;
+			n2read = std::min( (int)block_buf_size_,npleft );
+		}
+		ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+		remove( fnvx );
+		remove( fnvy );
+		remove( fnvz );
+		
+		
+		delete[] tmp2;
+		delete[] tmp3;
+		
+		
+		//... particle IDs ..........................................................
+		std::vector<unsigned> ids(block_buf_size_,0);
+		
+		unsigned idcount = 0;
+		npleft	= nptot;
+		n2read	= std::min((int)block_buf_size_,npleft);
+		blksize = sizeof(unsigned)*nptot;
+		
+		//... generate contiguous IDs and store in file .......................//
+		ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+		while( n2read > 0 )
+		{
+			for( int i=0; i<n2read; ++i )
+				ids[i] = idcount++;
+			ofs_.write( reinterpret_cast<char*>(&ids[0]), n2read*sizeof(unsigned) );
+			ids.clear();
+			npleft -= n2read;
+			n2read = std::min( (int)block_buf_size_,npleft );
+		}
+		ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+		
+		
+		//... particle masses .......................................................
+		if( bmultimass_ && bmorethan2bnd_ )
+		{
+			unsigned npcoarse = header_.npart[5];
+			
+			ifs1.close();
+			ifs1.open( fnm, std::ios::binary );
+			
+			if( !ifs1.good() )
+				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
+			
+			ifs1.read( (char *)&blk, sizeof(int) );
+			if( blk != npcoarse*sizeof(T_store)){
+				delete[] tmp1;
+				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+			}
+			
+			
+			npleft  = npcoarse;
+			n2read  = std::min((int)block_buf_size_,npleft);
+			blksize = npcoarse*sizeof(T_store);
+			
+			ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+			while( n2read > 0 )
+			{
+				ifs1.read( reinterpret_cast<char*>(&tmp1[0]), n2read*sizeof(T_store) );
+				ofs_.write( reinterpret_cast<char*>(&tmp1[0]), n2read*sizeof(T_store) );
+				npleft -= n2read;
+				n2read = std::min( (int)block_buf_size_,npleft );
+			}
+			ofs_.write( reinterpret_cast<char*>(&blksize), sizeof(int) );
+			
+			remove( fnm );
+		}
+		
+		delete[] tmp1;
+		ofs_.flush();
+	}
+	
+	
+public:
+	
+	gadget2_output_plugin( config_file& cf )//std::string afname, Cosmology cosm, Parameters param, unsigned block_buf_size = 100000 )
+	: output_plugin( cf ), ofs_( fname_.c_str(), std::ios::binary|std::ios::trunc )	
+	{
+		block_buf_size_ = cf_.getValueSafe<unsigned>("output","gadget_blksize",100000);
+		//bbndparticles_  = !cf_.getValueSafe<bool>("output","gadget_nobndpart",false);
+		
+		bmorethan2bnd_ = false;
+		if( levelmax_ > levelmin_ +1)
+			bmorethan2bnd_ = true;
+
+		bmultimass_ = true;
+		if( levelmax_ == levelmin_ )
+			bmultimass_ = false;
+			
+		
+		for( int i=0; i<6; ++i )
+		{
+			header_.npart[i] = 0;
+			header_.npartTotal[i] = 0;
+			header_.npartTotalHighWord[i] = 0;
+			header_.mass[i] = 0.0;
+		}
+		
+		//... set time ......................................................
+		header_.redshift = cf.getValue<double>("setup","zstart");
+		header_.time = 1.0/(1.0+header_.redshift);
+		
+		//... SF flags
+		header_.flag_sfr = 0;
+		header_.flag_feedback = 0;
+		header_.flag_cooling = 0;
+		
+		//... 
+		header_.num_files = 1;
+		header_.BoxSize = cf.getValue<double>("setup","boxlength");
+		header_.Omega0 = cf.getValue<double>("cosmology","Omega_m");
+		header_.OmegaLambda = cf.getValue<double>("cosmology","Omega_L");
+		header_.HubbleParam = cf.getValue<double>("cosmology","H0");
+		
+		header_.flag_stellarage = 0;
+		header_.flag_metals = 0;
+		
+		
+		header_.flag_entropy_instead_u = 0;
+	}
+	
+	
+	void write_dm_mass( const grid_hierarchy& gh )
+	{
+		double rhoc = 27.7519737; // h^2 1e10 M_sol / Mpc^3
+		
+		header_.mass[1] = header_.Omega0 * rhoc * pow(header_.BoxSize,3.)/pow(2,3*levelmax_);
+		
+		if( bmorethan2bnd_ )
+		{
+			
+			std::vector<T_store> temp_dat;
+			temp_dat.clear();
+						
+			for( int ilevel=gh.levelmax()-1; ilevel>=(int)gh.levelmin(); --ilevel )
+			{
+				double pmass = header_.Omega0 * rhoc * pow(header_.BoxSize,3.)/pow(2,3*ilevel);			
+				
+				for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+					for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+						for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+							if( ! gh.is_refined(ilevel,i,j,k) )
+								temp_dat.push_back( pmass );
+			}
+			
+			char temp_fname[256];
+			sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_mass );
+			std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc );
+			
+			int blksize = sizeof(T_store)*temp_dat.size();
+			ofs_temp.write( (char *)&blksize, sizeof(int) );
+			ofs_temp.write( (char*)&temp_dat[0], blksize );		
+			ofs_temp.write( (char *)&blksize, sizeof(int) );
+			
+			
+		}
+		else 
+		{
+			header_.mass[5] = header_.Omega0 * rhoc * pow(header_.BoxSize,3.)/pow(2,3*levelmin_);
+		}
+	}
+	
+	
+	void write_dm_position( int coord, const grid_hierarchy& gh )
+	{
+		//... count number of leaf cells ...//
+		unsigned npcoarse = 0, npfine = 0;
+		
+		npfine   = gh.count_leaf_cells(gh.levelmax(), gh.levelmax());
+		if( bmultimass_ )
+			npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax()-1);
+		
+		
+		//... determine if we need to shift the coordinates back
+		double *shift = NULL;
+		
+		if( cf_.getValueSafe<bool>("output","shift_back",false ) )
+		{
+			if( coord == 0 )
+				std::cout << " - gadget2 output plug-in will shift particle positions back...\n";
+			
+			double h = 1.0/pow(2,levelmin_);
+			shift = new double[3];
+			shift[0] = -(double)cf_.getValue<int>( "setup", "shift_x" )*h;
+			shift[1] = -(double)cf_.getValue<int>( "setup", "shift_y" )*h;
+			shift[2] = -(double)cf_.getValue<int>( "setup", "shift_z" )*h;
+		}
+		
+		
+		//...
+		header_.npart[1] = npfine;
+		header_.npart[5] = npcoarse;
+		header_.npartTotal[1] = npfine;
+		header_.npartTotal[5] = npcoarse;
+		header_.npartTotalHighWord[1] = 0;
+		header_.npartTotalHighWord[5] = 0;
+		
+		
+		//... collect displacements and convert to absolute coordinates with correct
+		//... units
+		std::vector<T_store> temp_data;
+		temp_data.reserve( npfine );
+		
+		double xfac = header_.BoxSize;
+		
+		for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+			for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+				for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+					for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+						if( ! gh.is_refined(ilevel,i,j,k) )
+						{
+							double xx[3];
+							gh.cell_pos(ilevel, i, j, k, xx);
+							if( shift != NULL )
+								xx[coord] += shift[coord];
+							xx[coord] = fmod( (xx[coord]+(*gh.get_grid(ilevel))(i,j,k))*xfac + header_.BoxSize, header_.BoxSize );
+							temp_data.push_back( xx[coord] );
+						}
+		
+		
+		//... dump to temporary file
+		char temp_fname[256];
+		
+		sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_pos+coord );
+		std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc );
+		
+		int blksize = sizeof(T_store)*temp_data.size();
+		ofs_temp.write( (char *)&blksize, sizeof(int) );
+		ofs_temp.write( (char*)&temp_data[0], blksize );
+		ofs_temp.write( (char *)&blksize, sizeof(int) );
+		ofs_temp.close();
+		
+		
+				
+		if( shift != NULL )
+			delete[] shift;
+		
+	}
+	
+	void write_dm_velocity( int coord, const grid_hierarchy& gh )
+	{
+		//... count number of leaf cells ...//
+		unsigned npcoarse = 0, npfine = 0;
+		
+		npfine   = gh.count_leaf_cells(gh.levelmax(), gh.levelmax());
+		if( bmultimass_ )
+			npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax()-1);
+		
+		header_.npart[1] = npfine;
+		header_.npart[5] = npcoarse;
+		header_.npartTotal[1] = npfine;
+		header_.npartTotal[5] = npcoarse;
+		header_.npartTotalHighWord[1] = 0;
+		header_.npartTotalHighWord[5] = 0;
+		
+		//... collect displacements and convert to absolute coordinates with correct
+		//... units
+		std::vector<T_store> temp_data;
+		temp_data.reserve( npfine+npcoarse );
+		
+		float isqrta = 1.0f/sqrt(header_.time);
+		//float vfac = cosm_.astart/100.0*isqrta*param_.boxlength;
+		float vfac = isqrta*header_.BoxSize;
+		
+		for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel )
+			for( unsigned i=0; i<gh.get_grid(ilevel)->size(0); ++i )
+				for( unsigned j=0; j<gh.get_grid(ilevel)->size(1); ++j )
+					for( unsigned k=0; k<gh.get_grid(ilevel)->size(2); ++k )
+						if( ! gh.is_refined(ilevel,i,j,k) )
+							temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac );
+							
+		//... dump to temporary file
+		char temp_fname[256];
+		
+		sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_vel+coord );
+		std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc );
+		
+		int blksize = sizeof(T_store)*temp_data.size();
+		ofs_temp.write( (char *)&blksize, sizeof(int) );
+		ofs_temp.write( (char*)&temp_data[0], blksize );
+		ofs_temp.write( (char *)&blksize, sizeof(int) );
+		ofs_temp.close();
+
+		
+	}
+	
+	void write_dm_density( const grid_hierarchy& gh )
+	{
+		//... we don't care about DM density for Gadget
+	}
+	
+	void write_dm_potential( const grid_hierarchy& gh )
+	{ }
+	
+	void write_gas_potential( const grid_hierarchy& gh )
+	{ }
+	
+	
+	
+	//... write data for gas -- don't do this
+	void write_gas_velocity( int coord, const grid_hierarchy& gh )
+	{	
+		std::cout << " - WARNING: Gadget-2 output plug-in does not support baryons yet!\n"
+				  << "            Baryon data is not written to file!" << std::endl;
+	}
+	
+	void write_gas_position( int coord, const grid_hierarchy& gh )
+	{	
+		std::cout << " - WARNING: Gadget-2 output plug-in does not support baryons yet!\n"
+				  << "            Baryon data is not written to file!" << std::endl;
+	}
+	
+	void write_gas_density( const grid_hierarchy& gh )
+	{	
+		std::cout << " - WARNING: Gadget-2 output plug-in does not support baryons yet!\n"
+				  << "            Baryon data is not written to file!" << std::endl;
+	}
+	
+	void finalize( void )
+	{	
+		this->assemble_gadget_file();
+	}
+};
+
+
+
+namespace{
+	output_plugin_creator_concrete< gadget2_output_plugin<float> > creator1("gadget2");
+#ifndef SINGLE_PRECISION
+	output_plugin_creator_concrete< gadget2_output_plugin<double> > creator2("gadget2_double");
+#endif
+}
+
diff --git a/plugins/output_generic.cc b/plugins/output_generic.cc
new file mode 100644
index 0000000..8308ccd
--- /dev/null
+++ b/plugins/output_generic.cc
@@ -0,0 +1,192 @@
+/*
+ 
+ output_generic.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+
+#ifdef HAVE_HDF5
+
+#include "output.hh"
+#include "HDF_IO.hh"
+
+
+class generic_output_plugin : public output_plugin
+{
+protected:
+	
+	using output_plugin::cf_;
+		
+	template< typename Tt >
+	void write2HDF5( std::string fname, std::string dname, const MeshvarBnd<Tt>& data )
+	{
+		int nres = data.size(0), nb = data.m_nbnd;
+		std::vector<Tt> vdata;
+		vdata.reserve((unsigned)pow(nres+2*nb,3));
+		for(int i=-nb; i<nres+nb; ++i )
+			for(int j=-nb; j<nres+nb; ++j )
+				for(int k=-nb; k<nres+nb; ++k )
+					vdata.push_back( data(i,j,k) );
+		
+		unsigned nd[3] = { nres+2*nb,nres+2*nb,nres+2*nb	};
+		HDFWriteDataset3D( fname, dname, nd, vdata);
+		
+	}
+	
+public:
+	generic_output_plugin( config_file& cf )//std::string fname, Cosmology cosm, Parameters param )
+	: output_plugin( cf )//fname, cosm, param )
+	{
+
+		HDFCreateFile(fname_);
+		
+		HDFCreateGroup(fname_, "header");
+
+		HDFWriteDataset(fname_,"/header/grid_off_x",offx_);
+		HDFWriteDataset(fname_,"/header/grid_off_y",offy_);
+		HDFWriteDataset(fname_,"/header/grid_off_z",offz_);
+		
+		HDFWriteDataset(fname_,"/header/grid_len_x",sizex_);
+		HDFWriteDataset(fname_,"/header/grid_len_y",sizey_);
+		HDFWriteDataset(fname_,"/header/grid_len_z",sizez_);
+		
+		HDFWriteGroupAttribute(fname_, "header", "levelmin", levelmin_ );
+		HDFWriteGroupAttribute(fname_, "header", "levelmax", levelmax_ );
+	}
+	
+	~generic_output_plugin()
+	{	}
+	
+	void write_dm_mass( const grid_hierarchy& gh )
+	{	}
+	
+	void write_dm_velocity( int coord, const grid_hierarchy& gh )
+	{
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			if( coord == 0 )
+				sprintf(sstr,"level_%03d_DM_vx",ilevel);
+			else if( coord == 1 )
+				sprintf(sstr,"level_%03d_DM_vy",ilevel);
+			else if( coord == 2 )
+				sprintf(sstr,"level_%03d_DM_vz",ilevel);
+			
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void write_dm_position( int coord, const grid_hierarchy& gh )
+	{
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			if( coord == 0 )
+				sprintf(sstr,"level_%03d_DM_dx",ilevel);
+			else if( coord == 1 )
+				sprintf(sstr,"level_%03d_DM_dy",ilevel);
+			else if( coord == 2 )
+				sprintf(sstr,"level_%03d_DM_dz",ilevel);
+			
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void write_dm_density( const grid_hierarchy& gh )
+	{
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			sprintf(sstr,"level_%03d_DM_rho",ilevel);
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void write_dm_potential( const grid_hierarchy& gh )
+	{ 
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			sprintf(sstr,"level_%03d_DM_potential",ilevel);
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void write_gas_potential( const grid_hierarchy& gh )
+	{ 
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			sprintf(sstr,"level_%03d_BA_potential",ilevel);
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	
+	
+	void write_gas_velocity( int coord, const grid_hierarchy& gh )
+	{	
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			if( coord == 0 )
+				sprintf(sstr,"level_%03d_BA_vx",ilevel);
+			else if( coord == 1 )
+				sprintf(sstr,"level_%03d_BA_vy",ilevel);
+			else if( coord == 2 )
+				sprintf(sstr,"level_%03d_BA_vz",ilevel);
+			
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void write_gas_position( int coord, const grid_hierarchy& gh )
+	{	}
+	
+	void write_gas_density( const grid_hierarchy& gh )
+	{	
+		char sstr[128];
+		
+		for( unsigned ilevel=0; ilevel<=levelmax_; ++ilevel )
+		{
+			sprintf(sstr,"level_%03d_BA_rho",ilevel);
+			write2HDF5( fname_, sstr, *gh.get_grid(ilevel) );
+		}
+	}
+	
+	void finalize( void )
+	{	}
+};
+
+
+
+namespace{
+	output_plugin_creator_concrete< generic_output_plugin > creator("generic");
+}
+
+
+#endif
+
diff --git a/plugins/output_grafic2.cc b/plugins/output_grafic2.cc
new file mode 100644
index 0000000..58f4f6c
--- /dev/null
+++ b/plugins/output_grafic2.cc
@@ -0,0 +1,325 @@
+/*
+ 
+ output_grafic2.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fstream>
+#include "output.hh"
+
+
+//! Implementation of class grafic2_output_plugin 
+/*!
+ This class implements a grafic-2 (cf. Bertschinger 2001) compatible
+ output format. With some RAMSES extras.
+*/
+class grafic2_output_plugin : public output_plugin
+{
+protected:
+	
+	
+	typedef struct{
+		int n1, n2, n3;
+		float dxini0;
+		float xoff10,xoff20,xoff30;
+		float astart0,omega_m0,omega_l0,h00;
+		
+	}header;
+	
+	bool bhavehydro_;
+	
+	
+	void write_file_header( std::ofstream& ofs, unsigned ilevel, const grid_hierarchy& gh )
+	{
+		header loc_head;
+		
+		double 
+			boxlength	= cf_.getValue<double>("setup","boxlength"),
+			H0			= cf_.getValue<double>("cosmology","H0"),
+			zstart		= cf_.getValue<double>("setup","zstart"),
+			astart		= 1.0/(1.0+zstart),
+			omegam		= cf_.getValue<double>("cosmology","Omega_m"),
+			omegaL		= cf_.getValue<double>("cosmology","Omega_L");
+		
+		loc_head.n1 = gh.get_grid(ilevel)->size(0);
+		loc_head.n2 = gh.get_grid(ilevel)->size(1);
+		loc_head.n3 = gh.get_grid(ilevel)->size(2);
+		
+		loc_head.dxini0 = boxlength / (H0*0.01) / pow(2.0,ilevel);
+		
+		loc_head.xoff10 = gh.offset_abs(ilevel,0) * loc_head.dxini0;
+		loc_head.xoff20 = gh.offset_abs(ilevel,1) * loc_head.dxini0;
+		loc_head.xoff30 = gh.offset_abs(ilevel,2) * loc_head.dxini0;
+		
+		loc_head.astart0 = astart;
+		loc_head.omega_m0 = omegam;
+		loc_head.omega_l0 = omegaL;
+		loc_head.h00 = H0;
+		
+		
+		int blksz = sizeof(header);
+		ofs.write( reinterpret_cast<char*> (&blksz), sizeof(int) );
+		ofs.write( reinterpret_cast<char*> (&loc_head), blksz );
+		ofs.write( reinterpret_cast<char*> (&blksz), sizeof(int) );
+		
+	}
+	
+	void write_sliced_array( std::ofstream& ofs, unsigned ilevel, const grid_hierarchy& gh, float fac = 1.0f )
+	{
+		unsigned n1,n2,n3;
+		n1 = gh.get_grid(ilevel)->size(0);
+		n2 = gh.get_grid(ilevel)->size(1);
+		n3 = gh.get_grid(ilevel)->size(2);
+		
+		std::vector<float> data(n1*n2,0.0f);
+		
+		for( unsigned i=0; i<n1; ++i )
+		{
+			
+			data.clear();
+			
+			for( unsigned j=0; j<n2; ++j )
+				for( unsigned k=0; k<n3; ++k )
+					data[j*n3+k] = (*gh.get_grid(ilevel))(k,j,i) * fac;
+			
+			unsigned blksize = n2*n3*sizeof(float);
+			
+			ofs.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+			ofs.write( reinterpret_cast<char*> (&data[0]), blksize );
+			ofs.write( reinterpret_cast<char*> (&blksize), sizeof(unsigned) );
+			
+		}
+	}
+	
+	void write_ramses_namelist( const grid_hierarchy& gh )
+	{
+		//... also write the refinement options to a dummy namelist file
+		char ff[256];
+		sprintf(ff,"%s/ramses.nml",fname_.c_str() );
+		
+		std::ofstream ofst(ff,std::ios::trunc);
+		
+		ofst 
+		<< "&INIT_PARAMS\n"
+		<< "filetype=\'grafic\'\n";
+		for( unsigned i=gh.levelmin();i<=gh.levelmax(); ++i)
+		{
+			sprintf(ff,"initfile(%d)=\'%s/level_%03d\'\n",i-gh.levelmin()+1,fname_.c_str(), i );
+			ofst << std::string(ff);
+		}
+		ofst << "/\n\n";
+		
+		
+		double xc,yc,zc,l;
+		
+		
+		
+		ofst
+		<< "&AMR_PARAMS\n"
+		<< "levelmin=" << gh.levelmin() << "\n"
+		<< "levelmax=" << gh.levelmax() << "\n"
+		<< "ngridtot=2000000\n"
+		<< "nparttot=3000000\n"
+		<< "nexpand=1\n/\n\n";
+		
+		
+		if( gh.levelmax() > gh.levelmin() )
+		{
+			l = pow(2.0,gh.levelmin()+1);
+			xc = ((double)gh.offset_abs(gh.levelmin()+1,0)+0.5*(double)gh.size(gh.levelmin()+1,0))/l;
+			yc = ((double)gh.offset_abs(gh.levelmin()+1,1)+0.5*(double)gh.size(gh.levelmin()+1,1))/l;
+			zc = ((double)gh.offset_abs(gh.levelmin()+1,2)+0.5*(double)gh.size(gh.levelmin()+1,2))/l;	
+		
+			ofst << "&REFINE_PARAMS\n"
+				<< "m_refine=0.";
+			
+			
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+				ofst << ",0.";
+			ofst << "\nx_refine="<< std::setw(12) << xc;
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+			{	
+				l = pow(2.0,i+1);
+				xc = ((double)gh.offset_abs(i+1,0)+0.5*(double)gh.size(i+1,0))/l;
+				ofst << ","<< std::setw(12) << xc;
+			}
+			ofst << "\ny_refine="<< std::setw(12)<< yc;
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+			{	
+				l = pow(2.0,i+1);
+				yc = ((double)gh.offset_abs(i+1,1)+0.5*(double)gh.size(i+1,1))/l;
+				ofst << ","<< std::setw(12) << yc;
+			}
+			ofst << "\nz_refine="<< std::setw(12) << zc;
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+			{	
+				l = pow(2.0,i+1);
+				zc = ((double)gh.offset_abs(i+1,2)+0.5*(double)gh.size(i+1,2))/l;
+				ofst << ","<< std::setw(12) << zc;
+			}
+			
+			ofst << "\nr_refine=";
+			for(unsigned i=gh.levelmin();i<gh.levelmax(); ++i )
+			{
+				double r = (gh.size(i+1,0)-4.0)/pow(2.0,i+1);
+				if( i==gh.levelmin() )
+					ofst << std::setw(12) << r;
+				else
+					ofst << "," << std::setw(12) << r;
+			}
+			ofst << "\nexp_refine=10.0";
+			for( unsigned i=gh.levelmin()+1;i<gh.levelmax(); ++i)
+				ofst << ",10.0";
+			ofst << "\n/\n";
+		}
+		
+		sprintf(ff,"%s/ramses.nml",fname_.c_str() );
+		std::cout	<< " - The grafic2 output plug-in wrote the grid data to a partial\n"
+		<< "   RAMSES namelist file \'" << ff << "\'\n"; 
+	}
+	
+public:
+	
+	grafic2_output_plugin( config_file& cf )
+	: output_plugin( cf )
+	{
+		// create directory structure
+		remove( fname_.c_str() );
+		mkdir( fname_.c_str(), 0777 );
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			char fp[256];
+			sprintf(fp,"%s/level_%03d",fname_.c_str(), ilevel );
+			mkdir( fp, 0777 );
+		}
+		
+		
+		bhavehydro_ = cf.getValue<bool>("setup","baryons");
+	}
+	
+	/*~grafic2_output_plugin()
+	 { }*/
+	
+	
+	void write_dm_position( int coord, const grid_hierarchy& gh  )
+	{
+		double 
+		boxlength	= cf_.getValue<double>("setup","boxlength");
+		//			H0			= cf_.getValue<double>("cosmology","H0");
+		
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			
+			char ff[256];
+			sprintf(ff,"%s/level_%03d/ic_posc%c",fname_.c_str(), ilevel, (char)('x'+coord) );
+			
+			std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+			
+			write_file_header( ofs, ilevel, gh );
+			write_sliced_array( ofs, ilevel, gh, boxlength );
+		}
+	}
+	
+	void write_dm_velocity( int coord, const grid_hierarchy& gh )
+	{
+		double 
+		boxlength	= cf_.getValue<double>("setup","boxlength");
+		
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			
+			char ff[256];
+			sprintf(ff,"%s/level_%03d/ic_velc%c",fname_.c_str(), ilevel, (char)('x'+coord) );
+			
+			std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+			
+			write_file_header( ofs, ilevel, gh );
+			write_sliced_array( ofs, ilevel, gh, boxlength );
+		}
+	}
+	
+	void write_gas_velocity( int coord, const grid_hierarchy& gh )
+	{	
+		double 
+		boxlength	= cf_.getValue<double>("setup","boxlength");
+		
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			
+			char ff[256];
+			sprintf(ff,"%s/level_%03d/ic_velb%c",fname_.c_str(), ilevel, (char)('x'+coord) );
+			
+			std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+			
+			write_file_header( ofs, ilevel, gh );
+			write_sliced_array( ofs, ilevel, gh, boxlength );
+		}
+	}
+	
+	void write_gas_density( const grid_hierarchy& gh )
+	{	
+		for(unsigned ilevel=levelmin_; ilevel<=levelmax_; ++ilevel )
+		{
+			
+			char ff[256];
+			sprintf(ff,"%s/level_%03d/ic_deltab",fname_.c_str(), ilevel );
+			
+			std::ofstream ofs(ff,std::ios::binary|std::ios::trunc);
+			
+			write_file_header( ofs, ilevel, gh );
+			write_sliced_array( ofs, ilevel, gh );
+		}
+		
+	}
+	
+	
+	void write_dm_density( const grid_hierarchy& gh )
+	{	
+		if(! bhavehydro_ )
+			write_gas_density(gh);
+		
+		if( cf_.getValueSafe<bool>("output","ramses_nml",true) )
+			write_ramses_namelist(gh);
+		
+	}
+	
+	void write_dm_mass( const grid_hierarchy& gh )
+	{	/* do nothing, not used... */ }
+	
+	void write_dm_potential( const grid_hierarchy& gh )
+	{	/* do nothing, not used... */ }
+	
+	void write_gas_potential( const grid_hierarchy& gh )
+	{	/* do nothing, not used... */ }
+	
+	void write_gas_position( int coord, const grid_hierarchy& gh )
+	{	/* do nothing, not used... */ }
+	
+	void finalize( void )
+	{	}
+	
+};
+
+namespace{
+	output_plugin_creator_concrete<grafic2_output_plugin> creator("grafic2");
+}
+
diff --git a/plugins/transfer_bbks.cc b/plugins/transfer_bbks.cc
new file mode 100644
index 0000000..3266d9b
--- /dev/null
+++ b/plugins/transfer_bbks.cc
@@ -0,0 +1,95 @@
+/*
+ 
+ transfer_bbks.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#include "transfer_function.hh"
+
+//! Implementation of class TransferFunction_BBKS for the BBKS transfer function 
+/*!
+ This class implements the analytical fit to the matter transfer
+ function by Bardeen, Bond, Kaiser & Szalay (BBKS).
+ ( see Bardeen et al. (1986) )
+ */
+class transfer_bbks_plugin : public transfer_function_plugin{
+private:
+	double      m_Gamma;
+	
+public:
+	//! Constructor
+	/*!
+	 \param aCosm Structure of type Cosmology carrying the cosmological parameters
+	 \param bSugiyama flag whether the Sugiyama (1995) correction shall be applied (default=true)
+	 */
+	transfer_bbks_plugin( config_file& cf )
+    : transfer_function_plugin( cf )
+	{  
+		double Omega0 = cosmo_.Omega_m;
+		double FreeGamma = -1.0;
+		
+		bool bSugiyama(true);
+		
+		try{
+			bSugiyama= pcf_->getValue<bool>( "cosmology", "sugiyama_corr" );
+		}catch(...){
+			throw std::runtime_error("Error in \'tranfer_bbks_plugin\': need to specify \'[cosmology]/sugiyama_corr = [true/false]");
+		}
+		
+		FreeGamma = pcf_->getValueSafe<double>( "cosmology", "gamma", FreeGamma );
+		
+		if( FreeGamma <= 0.0 ){
+			m_Gamma = Omega0*0.01*cosmo_.H0;
+			if( bSugiyama )
+				m_Gamma *= exp(-cosmo_.Omega_b*(1.0+sqrt(2.0*0.01*cosmo_.H0)/Omega0));
+		}else
+			m_Gamma = FreeGamma;
+		
+		tf_distinct_ = false;
+	
+		
+	}
+	
+	//! computes the value of the BBKS transfer function for mode k (in h/Mpc)
+	inline double compute( double k, tf_type type ){
+		double q, f1, f2;
+		
+		q = k/(m_Gamma);
+		f1 = log(1.0 + 2.34*q)/(2.34*q);
+		f2 = 1.0 + q*(3.89 + q*(259.21 + q*(162.771336 + q*2027.16958081)));
+		
+		return f1/sqrt(sqrt(f2));
+		
+	}
+	
+	inline double get_kmin( void ){
+		return 1e-4;
+	}
+	
+	inline double get_kmax( void ){
+		return 1.e4;
+	}
+};
+
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_bbks_plugin > creator("bbks");
+}
+
diff --git a/plugins/transfer_camb.cc b/plugins/transfer_camb.cc
new file mode 100644
index 0000000..5085250
--- /dev/null
+++ b/plugins/transfer_camb.cc
@@ -0,0 +1,167 @@
+/*
+ 
+ transfer_camb.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#include "transfer_function.hh"
+
+class transfer_CAMB_plugin : public transfer_function_plugin
+{
+	
+private:
+	//Cosmology m_Cosmology;
+	
+	std::string m_filename_Pk, m_filename_Tk;
+	std::vector<double> m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon;
+	
+	Spline_interp *m_psinterp;
+	gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon;
+	gsl_spline *spline_tot, *spline_cdm, *spline_baryon;
+	
+	
+	
+	void read_table( void ){
+#ifdef WITH_MPI
+		if( MPI::COMM_WORLD.Get_rank() == 0 ){
+#endif
+			std::cerr 
+			<< " - reading tabulated transfer function data from file \n"
+			<< "    \'" << m_filename_Tk << "\'\n";
+			
+			std::string line;
+			std::ifstream ifs( m_filename_Tk.c_str() );
+			
+			m_tab_k.clear();
+			m_tab_Tk_tot.clear();
+			m_tab_Tk_cdm.clear();
+			m_tab_Tk_baryon.clear();
+			
+			while( !ifs.eof() ){
+				getline(ifs,line);
+				
+				if(ifs.eof()) break;
+				
+				std::stringstream ss(line);
+				
+				double k, Tkc, Tkb, Tkg, Tkr, Tknu, Tktot;
+				ss >> k;
+				ss >> Tkc;
+				ss >> Tkb;
+				ss >> Tkg;
+				ss >> Tkr;
+				ss >> Tknu;
+				ss >> Tktot;
+				
+				m_tab_k.push_back( log10(k) );
+				
+				m_tab_Tk_tot.push_back( log10(Tktot) );
+				m_tab_Tk_baryon.push_back( log10(Tkb) );
+				m_tab_Tk_cdm.push_back( log10(Tkc) );
+				
+			}
+			
+			ifs.close();
+			
+			
+			
+			
+#ifdef WITH_MPI
+		}
+		
+		unsigned n=m_tab_k.size();
+		MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 );
+		
+		if( MPI::COMM_WORLD.Get_rank() > 0 ){
+			m_tab_k.assign(n,0);
+			m_tab_Tk_tot.assign(n,0);
+			m_tab_Tk_cdm.assign(n,0);
+			m_tab_Tk_baryon.assign(n,0);
+
+		}
+		
+		MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_tot[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0 );
+
+#endif
+		
+	}
+	
+public:
+	transfer_CAMB_plugin( config_file& cf )//Cosmology aCosm, std::string filename_Tk, TFtype iwhich )
+	: transfer_function_plugin( cf )//, m_filename_Tk( filename_Tk ), m_psinterp( NULL )
+	{
+		m_filename_Tk = pcf_->getValue<std::string>("cosmology","transfer_file");
+		
+		read_table( );
+		
+		acc_tot = gsl_interp_accel_alloc();
+		acc_cdm = gsl_interp_accel_alloc();
+		acc_baryon = gsl_interp_accel_alloc();
+		
+		
+		spline_tot = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() );
+		spline_cdm = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() );
+		spline_baryon = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() );
+		
+		gsl_spline_init (spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size() );
+		gsl_spline_init (spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size() );
+		gsl_spline_init (spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0], m_tab_k.size() );
+		
+		tf_distinct_ = true;
+	}
+	
+	~transfer_CAMB_plugin()
+	{
+		gsl_spline_free (spline_tot);
+		gsl_spline_free (spline_cdm);
+		gsl_spline_free (spline_baryon);
+		
+		gsl_interp_accel_free (acc_tot);
+		gsl_interp_accel_free (acc_cdm);
+		gsl_interp_accel_free (acc_baryon);
+	}
+	
+	inline double compute( double k, tf_type type ){
+		if( type == total )
+			return pow(10.0, gsl_spline_eval (spline_tot, log10(k), acc_tot) );
+		if( type == cdm )
+			return pow(10.0, gsl_spline_eval (spline_cdm, log10(k), acc_cdm) );
+		
+		return pow(10.0, gsl_spline_eval (spline_baryon, log10(k), acc_baryon) );
+	}
+	
+	inline double get_kmin( void ){
+		return pow(10.0,m_tab_k[1]);
+	}
+	
+	inline double get_kmax( void ){
+		return pow(10.0,m_tab_k[m_tab_k.size()-2]);
+	}
+	
+};
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_CAMB_plugin > creator("camb_file");
+}
+
+
diff --git a/plugins/transfer_eisenstein.cc b/plugins/transfer_eisenstein.cc
new file mode 100644
index 0000000..70d7e8a
--- /dev/null
+++ b/plugins/transfer_eisenstein.cc
@@ -0,0 +1,218 @@
+/*
+ 
+ transfer_eisenstein.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#include "transfer_function.hh"
+
+//! Implementation of abstract base class TransferFunction for the Eisenstein & Hu transfer function 
+/*!
+ This class implements the analytical fit to the matter transfer
+ function by Eisenstein & Hu (1999). In fact it is their code.
+ */
+class transfer_eisenstein_plugin : public transfer_function_plugin
+{
+protected:
+	using transfer_function_plugin::cosmo_;
+	
+	//Cosmology m_Cosmology;
+	double  m_h0;
+	double	omhh,		/* Omega_matter*h^2 */
+	obhh,		/* Omega_baryon*h^2 */
+	theta_cmb,	/* Tcmb in units of 2.7 K */
+	z_equality,	/* Redshift of matter-radiation equality, really 1+z */
+	k_equality,	/* Scale of equality, in Mpc^-1 */
+	z_drag,		/* Redshift of drag epoch */
+	R_drag,		/* Photon-baryon ratio at drag epoch */
+	R_equality,	/* Photon-baryon ratio at equality epoch */
+	sound_horizon,	/* Sound horizon at drag epoch, in Mpc */
+	k_silk,		/* Silk damping scale, in Mpc^-1 */
+	alpha_c,	/* CDM suppression */
+	beta_c,		/* CDM log shift */
+	alpha_b,	/* Baryon suppression */
+	beta_b,		/* Baryon envelope shift */
+	beta_node,	/* Sound horizon shift */
+	k_peak,		/* Fit to wavenumber of first peak, in Mpc^-1 */
+	sound_horizon_fit,	/* Fit to sound horizon, in Mpc */
+	alpha_gamma;	/* Gamma suppression in approximate TF */
+	
+	//! private member function: sets internal quantities for Eisenstein & Hu fitting
+	void TFset_parameters(double omega0hh, double f_baryon, double Tcmb)
+	/* Set all the scalars quantities for Eisenstein & Hu 1997 fitting formula */
+	/* Input: omega0hh -- The density of CDM and baryons, in units of critical dens,
+     multiplied by the square of the Hubble constant, in units
+     of 100 km/s/Mpc */
+	/* 	  f_baryon -- The fraction of baryons to CDM */
+	/*        Tcmb -- The temperature of the CMB in Kelvin.  Tcmb<=0 forces use
+	 of the COBE value of  2.728 K. */
+	/* Output: Nothing, but set many global variables used in TFfit_onek(). 
+     You can access them yourself, if you want. */
+	/* Note: Units are always Mpc, never h^-1 Mpc. */
+	{
+		double z_drag_b1, z_drag_b2;
+		double alpha_c_a1, alpha_c_a2, beta_c_b1, beta_c_b2, alpha_b_G, y;
+		
+		if (f_baryon<=0.0 || omega0hh<=0.0) {
+			fprintf(stderr, "TFset_parameters(): Illegal input.\n");
+			exit(1);
+		}
+		omhh = omega0hh;
+		obhh = omhh*f_baryon;
+		if (Tcmb<=0.0) Tcmb=2.728;	/* COBE FIRAS */
+		theta_cmb = Tcmb/2.7;
+		
+		z_equality = 2.50e4*omhh/POW4(theta_cmb);  /* Really 1+z */
+		k_equality = 0.0746*omhh/SQR(theta_cmb);
+		
+		z_drag_b1 = 0.313*pow((double)omhh,-0.419)*(1+0.607*pow((double)omhh,0.674));
+		z_drag_b2 = 0.238*pow((double)omhh,0.223);
+		z_drag = 1291*pow(omhh,0.251)/(1+0.659*pow((double)omhh,0.828))*
+		(1+z_drag_b1*pow((double)obhh,(double)z_drag_b2));
+		
+		R_drag = 31.5*obhh/POW4(theta_cmb)*(1000/(1+z_drag));
+		R_equality = 31.5*obhh/POW4(theta_cmb)*(1000/z_equality);
+		
+		sound_horizon = 2./3./k_equality*sqrt(6./R_equality)*
+	    log((sqrt(1+R_drag)+sqrt(R_drag+R_equality))/(1+sqrt(R_equality)));
+		
+		k_silk = 1.6*pow((double)obhh,0.52)*pow((double)omhh,0.73)*(1+pow((double)10.4*omhh,-0.95));
+		
+		alpha_c_a1 = pow((double)46.9*omhh,0.670)*(1+pow(32.1*omhh,-0.532));
+		alpha_c_a2 = pow((double)12.0*omhh,0.424)*(1+pow(45.0*omhh,-0.582));
+		alpha_c = pow(alpha_c_a1,-f_baryon)*
+		pow(alpha_c_a2,-CUBE(f_baryon));
+		
+		beta_c_b1 = 0.944/(1+pow(458*omhh,-0.708));
+		beta_c_b2 = pow(0.395*omhh, -0.0266);
+		beta_c = 1.0/(1+beta_c_b1*(pow(1-f_baryon, beta_c_b2)-1));
+		
+		y = z_equality/(1+z_drag);
+		alpha_b_G = y*(-6.*sqrt(1+y)+(2.+3.*y)*log((sqrt(1+y)+1)/(sqrt(1+y)-1)));
+		alpha_b = 2.07*k_equality*sound_horizon*pow(1+R_drag,-0.75)*alpha_b_G;
+		
+		beta_node = 8.41*pow(omhh, 0.435);
+		beta_b = 0.5+f_baryon+(3.-2.*f_baryon)*sqrt(pow(17.2*omhh,2.0)+1);
+		
+		k_peak = 2.5*3.14159*(1+0.217*omhh)/sound_horizon;
+		sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1+10.0*pow(obhh,0.75));
+		
+		alpha_gamma = 1-0.328*log(431.0*omhh)*f_baryon + 0.38*log(22.3*omhh)*
+		SQR(f_baryon);
+		
+		return;
+	}
+	
+	//! private member function: computes transfer function for mode k (k in Mpc)
+	inline double TFfit_onek(double k, double *tf_baryon, double *tf_cdm)
+	/* Input: k -- Wavenumber at which to calculate transfer function, in Mpc^-1.
+	 *tf_baryon, *tf_cdm -- Input value not used; replaced on output if
+	 the input was not NULL. */
+	/* Output: Returns the value of the full transfer function fitting formula.
+     This is the form given in Section 3 of Eisenstein & Hu (1997).
+     *tf_baryon -- The baryonic contribution to the full fit.
+     *tf_cdm -- The CDM contribution to the full fit. */
+	/* Notes: Units are Mpc, not h^-1 Mpc. */
+	{
+		double T_c_ln_beta, T_c_ln_nobeta, T_c_C_alpha, T_c_C_noalpha;
+		double q, xx, xx_tilde;//, q_eff;
+		double T_c_f, T_c, s_tilde, T_b_T0, T_b, f_baryon, T_full;
+		//double T_0_L0, T_0_C0, T_0, gamma_eff; 
+		//double T_nowiggles_L0, T_nowiggles_C0, T_nowiggles;
+		
+		k = fabs(k);	/* Just define negative k as positive */
+		if (k==0.0) {
+			if (tf_baryon!=NULL) *tf_baryon = 1.0;
+			if (tf_cdm!=NULL) *tf_cdm = 1.0;
+			return 1.0;
+		}
+		
+		q = k/13.41/k_equality;
+		xx = k*sound_horizon;
+		
+		T_c_ln_beta = log(2.718282+1.8*beta_c*q);
+		T_c_ln_nobeta = log(2.718282+1.8*q);
+		T_c_C_alpha = 14.2/alpha_c + 386.0/(1+69.9*pow(q,1.08));
+		T_c_C_noalpha = 14.2 + 386.0/(1+69.9*pow(q,1.08));
+		
+		T_c_f = 1.0/(1.0+POW4(xx/5.4));
+		T_c = T_c_f*T_c_ln_beta/(T_c_ln_beta+T_c_C_noalpha*SQR(q)) +
+	    (1-T_c_f)*T_c_ln_beta/(T_c_ln_beta+T_c_C_alpha*SQR(q));
+		
+		s_tilde = sound_horizon*pow(1+CUBE(beta_node/xx),-1./3.);
+		xx_tilde = k*s_tilde;
+		
+		T_b_T0 = T_c_ln_nobeta/(T_c_ln_nobeta+T_c_C_noalpha*SQR(q));
+		T_b = sin(xx_tilde)/(xx_tilde)*(T_b_T0/(1+SQR(xx/5.2))+
+										alpha_b/(1+CUBE(beta_b/xx))*exp(-pow(k/k_silk,1.4)));
+		
+		f_baryon = obhh/omhh;
+		T_full = f_baryon*T_b + (1-f_baryon)*T_c;
+		
+		/* Now to store these transfer functions */
+		if (tf_baryon!=NULL) *tf_baryon = T_b;
+		if (tf_cdm!=NULL) *tf_cdm = T_c;
+		return T_full;
+	}
+	
+public:
+	//! Constructor for Eisenstein & Hu fitting for transfer function
+	/*!
+	 \param aCosm structure of type Cosmology carrying the cosmological parameters
+	 \param Tcmb mean temperature of the CMB fluctuations (defaults to
+	 Tcmb = 2.726 if not specified)
+	 */
+	transfer_eisenstein_plugin( config_file &cf )//Cosmology aCosm, double Tcmb = 2.726 )
+    :  transfer_function_plugin(cf), m_h0( cosmo_.H0*0.01 )
+	{
+		double Tcmb = pcf_->getValueSafe("cosmology","Tcmb",2.726);
+		TFset_parameters( (cosmo_.Omega_m)*cosmo_.H0*cosmo_.H0*(0.01*0.01), 
+						 cosmo_.Omega_b/(cosmo_.Omega_m-cosmo_.Omega_b),//-aCosm.Omega_b), 
+						 Tcmb);
+		tf_distinct_ = false;
+	}
+	
+	//! Computes the transfer function for k in Mpc/h by calling TFfit_onek
+	inline double compute( double k, tf_type type ){
+		double tfb, tfcdm, fb, fc; //, tfull
+		TFfit_onek( k*m_h0, &tfb, &tfcdm );
+		
+		fb = cosmo_.Omega_b/(cosmo_.Omega_m);
+		fc = (cosmo_.Omega_m-cosmo_.Omega_b)/(cosmo_.Omega_m) ;
+		
+		return fb*tfb+fc*tfcdm;
+		//return 1.0;
+	}
+	
+	
+	inline double get_kmin( void ){
+		return 1e-4;
+	}
+	
+	inline double get_kmax( void ){
+		return 1.e4;
+	}
+	
+};
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_eisenstein_plugin > creator("eisenstein");
+}
+
diff --git a/plugins/transfer_inflation.cc b/plugins/transfer_inflation.cc
new file mode 100644
index 0000000..79a8b6c
--- /dev/null
+++ b/plugins/transfer_inflation.cc
@@ -0,0 +1,64 @@
+/*
+ 
+ tranfer_inflation.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#include "transfer_function.hh"
+
+class transfer_inflation_plugin : public transfer_function_plugin
+{
+protected:
+	
+	double ns2_;
+	
+public:
+	
+	transfer_inflation_plugin( config_file& cf ) 
+	: transfer_function_plugin( cf )
+	{ 
+		ns2_ = 0.5*cf.getValue<double>("cosmology","nspec");
+		tf_distinct_ = true;
+	}
+	
+	~transfer_inflation_plugin(){ };
+	
+	double compute( double k, tf_type type=baryon)
+	{
+		return pow(k,ns2_);
+	}
+	
+	double get_kmax( void )
+	{
+		return 1e10;
+	}
+	
+	double get_kmin( void )
+	{
+		return 1e-30;
+	}
+	
+};
+
+
+namespace{
+	transfer_function_plugin_creator_concrete< transfer_inflation_plugin > creator("inflation");
+}
+
diff --git a/poisson.cc b/poisson.cc
new file mode 100644
index 0000000..a5c6243
--- /dev/null
+++ b/poisson.cc
@@ -0,0 +1,499 @@
+/*
+ 
+ poisson.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+/****** ABSTRACT FACTORY PATTERN IMPLEMENTATION *******/
+
+#include "poisson.hh"
+std::map< std::string, poisson_plugin_creator *>& 
+get_poisson_plugin_map()
+{
+	static std::map< std::string, poisson_plugin_creator* > poisson_plugin_map;
+	return poisson_plugin_map;
+}
+
+void print_poisson_plugins()
+{
+	std::map< std::string, poisson_plugin_creator *>& m = get_poisson_plugin_map();
+	
+	std::map< std::string, poisson_plugin_creator *>::iterator it;
+	it = m.begin();
+	std::cout << " - Available poisson solver plug-ins:\n";
+	while( it!=m.end() )
+	{
+		if( (*it).second )
+			std::cout << "\t\'" << (*it).first << "\'\n";
+		++it;
+	}
+	
+	
+}
+
+
+/****** CALL IMPLEMENTATIONS OF POISSON SOLVER CLASSES ******/
+
+#include "mg_solver.hh"
+#include "fd_schemes.hh"
+
+#ifdef SINGLE_PRECISION
+typedef multigrid::solver< stencil_7P<float>, interp_O3_fluxcorr, mg_straight, float > poisson_solver_O2;
+typedef multigrid::solver< stencil_13P<float>, interp_O5_fluxcorr, mg_straight, float > poisson_solver_O4;
+typedef multigrid::solver< stencil_19P<float>, interp_O7_fluxcorr, mg_straight, float > poisson_solver_O6;
+#else
+typedef multigrid::solver< stencil_7P<double>, interp_O3_fluxcorr, mg_straight, double > poisson_solver_O2;
+typedef multigrid::solver< stencil_13P<double>, interp_O5_fluxcorr, mg_straight, double > poisson_solver_O4;
+typedef multigrid::solver< stencil_19P<double>, interp_O7_fluxcorr, mg_straight, double > poisson_solver_O6;
+#endif
+
+
+/**************************************************************************************/
+/**************************************************************************************/
+#pragma mark -
+
+
+double multigrid_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u )
+{
+	unsigned verbosity = cf_.getValueSafe<unsigned>("setup","verbosity",2);
+	
+	if( verbosity > 0 )
+		std::cout << " - Invoking multi-grid Poisson solver..." << std::endl;
+	
+	double acc = 1e-5, err;
+	std::string ps_smoother_name;
+	unsigned ps_presmooth, ps_postsmooth, order;
+	
+	acc                 = cf_.getValueSafe<double>("poisson","accuracy",acc);
+	ps_presmooth		= cf_.getValueSafe<unsigned>("poisson","pre_smooth",3);
+	ps_postsmooth		= cf_.getValueSafe<unsigned>("poisson","post_smooth",3);
+	ps_smoother_name	= cf_.getValueSafe<std::string>("poisson","smoother","gs");
+	order				= cf_.getValueSafe<unsigned>( "setup", "laplace_order", 4 );
+	
+	multigrid::opt::smtype ps_smtype = multigrid::opt::sm_gauss_seidel;
+	
+	if ( ps_smoother_name == std::string("gs") )
+		ps_smtype = multigrid::opt::sm_gauss_seidel;
+	else if ( ps_smoother_name == std::string("jacobi") )
+		ps_smtype = multigrid::opt::sm_jacobi;
+	else if ( ps_smoother_name == std::string("sor") )
+		ps_smtype = multigrid::opt::sm_sor;
+	else
+		std::cerr << " - Warning: unknown smoother \'" << ps_smoother_name << "\' for multigrid solver!\n"
+		<< "            reverting to \'gs\' (Gauss-Seidel)" << std::endl;
+	
+	
+	double tstart, tend;
+	tstart = omp_get_wtime();
+	
+	
+	//----- run Poisson solver -----//
+	if( order == 2 )
+	{
+		poisson_solver_O2 ps( f, ps_smtype, ps_presmooth, ps_postsmooth );
+		err = ps.solve( u, acc, true );	
+	}
+	else if( order == 4 )
+	{
+		poisson_solver_O4 ps( f, ps_smtype, ps_presmooth, ps_postsmooth );
+		err = ps.solve( u, acc, true );	
+	}
+	else if( order == 6 )
+	{
+		poisson_solver_O6 ps( f, ps_smtype, ps_presmooth, ps_postsmooth );
+		err = ps.solve( u, acc, true );	
+	}	
+	else
+	{	
+		throw std::runtime_error("Invalid order specified for Laplace operator");
+	}
+	
+	//------------------------------//
+	
+	tend = omp_get_wtime();
+	if( verbosity > 1 )
+		std::cout << " - Poisson solver took " << tend-tstart << "s with " << omp_get_max_threads() << " threads." << std::endl;
+	
+	return err;
+}
+
+double multigrid_poisson_plugin::gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du )
+{
+	Du = u;
+	
+	unsigned order = cf_.getValueSafe<unsigned>( "setup", "grad_order", 4 );
+	
+	switch( order )
+	{
+		case 2: 
+			implementation().gradient_O2( dir, u, Du );
+			break;	
+		case 4: 
+			implementation().gradient_O4( dir, u, Du );
+			break;
+		case 6: 
+			implementation().gradient_O6( dir, u, Du );
+			break;
+		default:
+			throw std::runtime_error("Invalid order specified for gradient operator!");
+	}
+	
+	return 0.0;
+}
+
+void multigrid_poisson_plugin::implementation::gradient_O2( int dir, grid_hierarchy& u, grid_hierarchy& Du )
+{
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel);
+		meshvar_bnd *pvar = Du.get_grid(ilevel);
+		
+		if( dir == 0 )
+#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+						(*pvar)(ix,iy,iz) = 0.5*((*u.get_grid(ilevel))(ix+1,iy,iz)-(*u.get_grid(ilevel))(ix-1,iy,iz))*h;
+		
+		else if( dir == 1 )
+#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+						(*pvar)(ix,iy,iz) = 0.5*((*u.get_grid(ilevel))(ix,iy+1,iz)-(*u.get_grid(ilevel))(ix,iy-1,iz))*h;
+		
+		else if( dir == 2 )
+#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+						(*pvar)(ix,iy,iz) = 0.5*((*u.get_grid(ilevel))(ix,iy,iz+1)-(*u.get_grid(ilevel))(ix,iy,iz-1))*h;	
+	}
+}
+
+void multigrid_poisson_plugin::implementation::gradient_O4( int dir, grid_hierarchy& u, grid_hierarchy& Du )
+{
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel);
+		meshvar_bnd *pvar = Du.get_grid(ilevel);
+		
+		h /= 12.0;
+		
+		if( dir == 0 )
+#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+						(*pvar)(ix,iy,iz) = ((*u.get_grid(ilevel))(ix-2,iy,iz)
+											-8.0*(*u.get_grid(ilevel))(ix-1,iy,iz)
+											 +8.0*(*u.get_grid(ilevel))(ix+1,iy,iz)
+											 -(*u.get_grid(ilevel))(ix+2,iy,iz))*h;
+		
+		else if( dir == 1 )
+#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+						(*pvar)(ix,iy,iz) = ((*u.get_grid(ilevel))(ix,iy-2,iz)
+											 -8.0*(*u.get_grid(ilevel))(ix,iy-1,iz)
+											 +8.0*(*u.get_grid(ilevel))(ix,iy+1,iz)
+											 -(*u.get_grid(ilevel))(ix,iy+2,iz))*h;
+		
+		else if( dir == 2 )
+#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+						(*pvar)(ix,iy,iz) = ((*u.get_grid(ilevel))(ix,iy,iz-2)
+											 -8.0*(*u.get_grid(ilevel))(ix,iy,iz-1)
+											 +8.0*(*u.get_grid(ilevel))(ix,iy,iz+1)
+											 -(*u.get_grid(ilevel))(ix,iy,iz+2))*h;
+	}		
+	
+}
+
+void multigrid_poisson_plugin::implementation::gradient_O6( int dir, grid_hierarchy& u, grid_hierarchy& Du )
+{
+	for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+	{
+		double h = pow(2.0,ilevel);
+		meshvar_bnd *pvar = Du.get_grid(ilevel);
+		
+		h /= 60.;
+		if( dir == 0 )
+#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+						(*pvar)(ix,iy,iz) = 
+						(-(*u.get_grid(ilevel))(ix-3,iy,iz)
+						 +9.0*(*u.get_grid(ilevel))(ix-2,iy,iz)
+						 -45.0*(*u.get_grid(ilevel))(ix-1,iy,iz)
+						 +45.0*(*u.get_grid(ilevel))(ix+1,iy,iz)
+						 -9.0*(*u.get_grid(ilevel))(ix+2,iy,iz)
+						 +(*u.get_grid(ilevel))(ix+3,iy,iz))*h;
+		
+		else if( dir == 1 )
+#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+						(*pvar)(ix,iy,iz) = 
+						(-(*u.get_grid(ilevel))(ix,iy-3,iz)
+						 +9.0*(*u.get_grid(ilevel))(ix,iy-2,iz)
+						 -45.0*(*u.get_grid(ilevel))(ix,iy-1,iz)
+						 +45.0*(*u.get_grid(ilevel))(ix,iy+1,iz)
+						 -9.0*(*u.get_grid(ilevel))(ix,iy+2,iz)
+						 +(*u.get_grid(ilevel))(ix,iy+3,iz))*h;
+		
+		else if( dir == 2 )
+#pragma omp parallel for
+			for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
+				for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
+					for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+						(*pvar)(ix,iy,iz) = 
+						(-(*u.get_grid(ilevel))(ix,iy,iz-3)
+						 +9.0*(*u.get_grid(ilevel))(ix,iy,iz-2)
+						 -45.0*(*u.get_grid(ilevel))(ix,iy,iz-1)
+						 +45.0*(*u.get_grid(ilevel))(ix,iy,iz+1)
+						 -9.0*(*u.get_grid(ilevel))(ix,iy,iz+2)
+						 +(*u.get_grid(ilevel))(ix,iy,iz+3))*h;
+	}
+		
+}
+	
+/**************************************************************************************/
+/**************************************************************************************/
+#pragma mark -
+
+#ifdef SINGLE_PRECISION
+#ifdef SINGLETHREAD_FFTW
+#include <srfftw.h>
+#else
+#include <srfftw_threads.h>
+#endif
+#else
+#ifdef SINGLETHREAD_FFTW
+#include <drfftw.h>
+#else
+#include <drfftw_threads.h>
+#endif
+#endif
+
+double fft_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u )
+{
+	unsigned verbosity = cf_.getValueSafe<unsigned>("setup","verbosity",2);
+	
+	if( f.levelmin() != f.levelmax() )
+		throw std::runtime_error("fft_poisson_plugin::solve : k-space method can only be used in unigrid mode (levelmin=levelmax)");
+	
+	if( verbosity > 0 )
+		std::cout << " - Invoking unigrid FFT Poisson solver..." << std::endl;
+	
+	int nx,ny,nz,nzp;
+	nx = f.get_grid(f.levelmax())->size(0);
+	ny = f.get_grid(f.levelmax())->size(1);
+	nz = f.get_grid(f.levelmax())->size(2);
+	nzp = 2*(nz/2+1);
+	
+	
+	//... copy data ..................................................
+	fftw_real *data = new fftw_real[nx*ny*nzp];
+	fftw_complex *cdata = reinterpret_cast<fftw_complex*> (data);
+	
+	for( int i=0; i<nx; ++i )
+		for( int j=0; j<ny; ++j )	
+			for( int k=0; k<nz; ++k )
+			{
+				unsigned idx = (i*ny+j)*nzp+k;
+				data[idx] = (*f.get_grid(f.levelmax()))(i,j,k);
+			}
+	
+	//... perform FFT and Poisson solve................................
+	rfftwnd_plan 
+	plan = rfftw3d_create_plan( nx,ny,nz,
+							   FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+	iplan = rfftw3d_create_plan( nx,ny,nz,
+								FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+	
+	
+#ifndef SINGLETHREAD_FFTW		
+	rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL );
+#else
+	rfftwnd_one_real_to_complex( plan, data, NULL );
+#endif
+	
+	double boxlength = cf_.getValue<double>("setup","boxlength");
+	double kfac = 2.0*M_PI/boxlength;
+	double fac = -1.0/(nx*ny*nz)/boxlength;
+	
+	for( int i=0; i<nx; ++i )
+		for( int j=0; j<ny; ++j )	
+			for( int k=0; k<nz/2+1; ++k )
+			{
+				int ii = i; if(ii>nx/2) ii-=nx;
+				int jj = j; if(jj>ny/2) jj-=ny;
+				double ki = (double)ii;
+				double kj = (double)jj;
+				double kk = (double)k;
+				
+				double kk2 = kfac*kfac*(ki*ki+kj*kj+kk*kk);
+				
+				unsigned idx = (i*ny+j)*nzp/2+k;
+				double re = cdata[idx].re;
+				double im = cdata[idx].im;
+				
+				cdata[idx].re = -re/kk2*fac;
+				cdata[idx].im = -im/kk2*fac;
+			}
+	
+	cdata[0].re = 0.0;
+	cdata[0].im = 0.0;
+	
+#ifndef SINGLETHREAD_FFTW		
+	rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL );
+#else
+	rfftwnd_one_complex_to_real( iplan, cdata, NULL );
+#endif
+	
+	rfftwnd_destroy_plan(plan);
+	rfftwnd_destroy_plan(iplan);
+	
+	//... copy data ..........................................
+	for( int i=0; i<nx; ++i )
+		for( int j=0; j<ny; ++j )	
+			for( int k=0; k<nz; ++k )
+			{
+				unsigned idx = (i*ny+j)*nzp+k;
+				(*u.get_grid(u.levelmax()))(i,j,k) = data[idx];
+			}
+	
+	delete[] data;
+	
+	return 0.0;
+}
+
+
+double fft_poisson_plugin::gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du )
+{
+	
+	if( u.levelmin() != u.levelmax() )
+		throw std::runtime_error("fft_poisson_plugin::gradient : k-space method can only be used in unigrid mode (levelmin=levelmax)");
+	
+	Du = u;
+	int nx,ny,nz,nzp;
+	nx = u.get_grid(u.levelmax())->size(0);
+	ny = u.get_grid(u.levelmax())->size(1);
+	nz = u.get_grid(u.levelmax())->size(2);
+	nzp = 2*(nz/2+1);
+	
+	//... copy data ..................................................
+	fftw_real *data = new fftw_real[nx*ny*nzp];
+	fftw_complex *cdata = reinterpret_cast<fftw_complex*> (data);
+	
+	for( int i=0; i<nx; ++i )
+		for( int j=0; j<ny; ++j )	
+			for( int k=0; k<nz; ++k )
+			{
+				unsigned idx = (i*ny+j)*nzp+k;
+				data[idx] = (*u.get_grid(u.levelmax()))(i,j,k);
+			}
+	
+	//... perform FFT and Poisson solve................................
+	rfftwnd_plan 
+	plan = rfftw3d_create_plan( nx,ny,nz,
+							   FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
+	iplan = rfftw3d_create_plan( nx,ny,nz,
+								FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
+	
+	
+#ifndef SINGLETHREAD_FFTW		
+	rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL );
+#else
+	rfftwnd_one_real_to_complex( plan, data, NULL );
+#endif
+	
+	double fac = -1.0/(nx*ny*nz);
+	double boxlength = cf_.getValue<double>("setup","boxlength");
+	double kfac = 2.0*M_PI/boxlength;
+	
+	for( int i=0; i<nx; ++i )
+		for( int j=0; j<ny; ++j )	
+			for( int k=0; k<nz/2+1; ++k )
+			{
+				int ii = i; if(ii>nx/2) ii-=nx;
+				int jj = j; if(jj>ny/2) jj-=ny;
+				double ki = (double)ii;
+				double kj = (double)jj;
+				double kk = (double)k;
+				
+				double kdir;
+				if( dir == 0 )
+					kdir = kfac*ki;
+				else if( dir == 1 )
+					kdir = kfac*kj;
+				else if( dir == 2 )
+					kdir = kfac*kk;
+				
+				unsigned idx = (i*ny+j)*nzp/2+k;
+				double re = cdata[idx].re;
+				double im = cdata[idx].im;
+				
+				cdata[idx].re = fac*im*kdir;
+				cdata[idx].im = -fac*re*kdir;
+			}
+	
+	cdata[0].re = 0.0;
+	cdata[0].im = 0.0;
+	
+#ifndef SINGLETHREAD_FFTW		
+	rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL );
+#else
+	rfftwnd_one_complex_to_real( iplan, cdata, NULL );
+#endif
+	
+	rfftwnd_destroy_plan(plan);
+	rfftwnd_destroy_plan(iplan);
+	
+	//... copy data ..........................................
+	for( int i=0; i<nx; ++i )
+		for( int j=0; j<ny; ++j )	
+			for( int k=0; k<nz; ++k )
+			{
+				unsigned idx = (i*ny+j)*nzp+k;
+				(*Du.get_grid(u.levelmax()))(i,j,k) = data[idx];
+			}
+	
+	delete[] data;
+	return 0.0;
+}
+
+
+/**************************************************************************************/
+/**************************************************************************************/
+#pragma mark -
+
+
+
+namespace{
+	poisson_plugin_creator_concrete<multigrid_poisson_plugin> multigrid_poisson_creator("mg_poisson");
+	poisson_plugin_creator_concrete<fft_poisson_plugin> fft_poisson_creator("fft_poisson");
+}
diff --git a/poisson.hh b/poisson.hh
new file mode 100644
index 0000000..9c4d5cf
--- /dev/null
+++ b/poisson.hh
@@ -0,0 +1,140 @@
+/*
+ 
+ poisson.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+ */
+
+#ifndef __POISSON_HH
+#define __POISSON_HH
+
+#include <string>
+#include <map>
+
+#include "general.hh"
+
+class poisson_plugin
+{
+protected:
+	
+	//! reference to the config_file object that holds all configuration options
+	config_file& cf_;
+	
+public:
+	
+	//! constructor
+	explicit poisson_plugin( config_file& cf )
+	: cf_(cf)
+	{ }
+	
+	//! destructor
+	virtual ~poisson_plugin()
+	{ }
+	
+	virtual double solve( grid_hierarchy& f, grid_hierarchy& u ) = 0;
+	virtual double gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du ) = 0;
+	
+};
+
+#pragma mark -
+
+/*!
+ * @brief implements abstract factory design pattern for poisson solver plug-ins
+ */
+struct poisson_plugin_creator
+{
+	//! create an instance of a plug-in
+	virtual poisson_plugin * create( config_file& cf ) const = 0;
+	
+	//! destroy an instance of a plug-in
+	virtual ~poisson_plugin_creator() { }
+};
+
+//! maps the name of a plug-in to a pointer of the factory pattern 
+std::map< std::string, poisson_plugin_creator *>& get_poisson_plugin_map();
+
+//! print a list of all registered output plug-ins
+void print_poisson_plugins();
+
+
+/*!
+ * @brief concrete factory pattern for output plug-ins
+ */
+template< class Derived >
+struct poisson_plugin_creator_concrete : public poisson_plugin_creator
+{
+	//! register the plug-in by its name
+	poisson_plugin_creator_concrete( const std::string& plugin_name )
+	{
+		get_poisson_plugin_map()[ plugin_name ] = this;
+	}
+	
+	//! create an instance of the plug-in
+	poisson_plugin * create( config_file& cf ) const
+	{
+		return new Derived( cf );
+	}
+};
+
+/**************************************************************************************/
+/**************************************************************************************/
+#pragma mark -
+
+class multigrid_poisson_plugin : public poisson_plugin
+{
+public:
+	
+	explicit multigrid_poisson_plugin( config_file& cf )
+	: poisson_plugin( cf )
+	{ }
+	
+	double solve( grid_hierarchy& f, grid_hierarchy& u );
+	double gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+	
+protected:
+	
+	struct implementation
+	{
+		double solve_O2( grid_hierarchy& f, grid_hierarchy& u );
+		double solve_O4( grid_hierarchy& f, grid_hierarchy& u );
+		void gradient_O2( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+		void gradient_O4( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+		void gradient_O6( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+	};
+};
+
+/**************************************************************************************/
+/**************************************************************************************/
+#pragma mark -
+
+class fft_poisson_plugin : public poisson_plugin
+{
+public:
+	
+	explicit fft_poisson_plugin( config_file& cf )
+	: poisson_plugin( cf )
+	{ }
+	
+	double solve( grid_hierarchy& f, grid_hierarchy& u );
+	double gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du );
+	
+};
+
+#endif // __POISSON_HH
+
diff --git a/random.hh b/random.hh
new file mode 100644
index 0000000..63597b3
--- /dev/null
+++ b/random.hh
@@ -0,0 +1,459 @@
+/*
+ 
+ random.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifndef __RANDOM_HH
+#define __RANDOM_HH
+
+#include <fstream>
+#include <algorithm>
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+
+#include "mesh.hh"
+#include "mg_operators.hh"
+
+/*!
+ * @brief encapsulates all things random number generator related
+ */
+template< typename T >
+class random_numbers
+{
+public:
+	unsigned 
+		res_,		//!< resolution of the full mesh
+		cubesize_,	//!< size of one independent random number cube
+		ncubes_;	//!< number of random number cubes to cover the full mesh
+	long baseseed_;	//!< base seed from which cube seeds are computed 
+	
+	//! vector of 3D meshes (the random number cbues) with random numbers
+	std::vector< Meshvar<T>* > rnums_;	
+	
+protected:
+	
+	//! fills a subcube with random numbers
+	double fill_cube( int i, int j, int k)
+	{
+		
+		gsl_rng	*RNG = gsl_rng_alloc( gsl_rng_mt19937 );
+		
+		i = (i+ncubes_)%ncubes_;
+		j = (j+ncubes_)%ncubes_;
+		k = (k+ncubes_)%ncubes_;
+		
+		long icube = (i*ncubes_+j)*ncubes_+k;
+		long cubeseed = baseseed_+icube; //... each cube gets its unique seed
+		
+		gsl_rng_set( RNG, cubeseed );
+		
+		if( rnums_[icube] == NULL )
+			rnums_[icube] =  new Meshvar<T>( cubesize_, 0, 0, 0 );
+		
+		double mean = 0.0;
+		
+		for( int ii=0; ii<(int)cubesize_; ++ii )
+			for( int jj=0; jj<(int)cubesize_; ++jj )
+				for( int kk=0; kk<(int)cubesize_; ++kk )
+				{	
+					(*rnums_[icube])(ii,jj,kk) = gsl_ran_ugaussian_ratio_method( RNG );
+					mean += (*rnums_[icube])(ii,jj,kk);
+				}
+		
+		gsl_rng_free( RNG );
+		
+		return mean/(cubesize_*cubesize_*cubesize_);
+	}
+	
+	//! copy random numbers from a cube to a full grid array
+	template< class C >
+	void copy_cube( int i, int j, int k, C& dat )
+	{
+		int offi, offj, offk;
+		
+		offi = i*cubesize_;
+		offj = j*cubesize_;
+		offk = k*cubesize_;
+		
+		i = (i+ncubes_)%ncubes_;
+		j = (j+ncubes_)%ncubes_;
+		k = (k+ncubes_)%ncubes_;
+		
+		long icube = (i*ncubes_+j)*ncubes_+k;
+		
+		for( int ii=0; ii<(int)cubesize_; ++ii )
+			for( int jj=0; jj<(int)cubesize_; ++jj )
+				for( int kk=0; kk<(int)cubesize_; ++kk )
+					dat(offi+ii,offj+jj,offk+kk) = (*rnums_[icube])(ii,jj,kk);
+	}
+	
+	//! free the memory associated with a subcube
+	void free_cube( int i, int j, int k ) 
+	{
+
+		i = (i+ncubes_)%ncubes_;
+		j = (j+ncubes_)%ncubes_;
+		k = (k+ncubes_)%ncubes_;
+		long icube = (i*ncubes_+j)*ncubes_+k;
+		
+		if( rnums_[icube] != NULL )
+		{
+			delete rnums_[icube];
+			rnums_[icube] = NULL;
+		}
+	}
+	
+	//! initialize member variables and allocate memory
+	void initialize( void )
+	{
+		ncubes_ = std::max((int)((double)res_/cubesize_),1);
+		if( res_ < cubesize_ )
+		{	
+			ncubes_ = 1;
+			cubesize_ = res_;
+		}
+		
+		rnums_.assign( ncubes_*ncubes_*ncubes_, NULL );
+	}
+	
+	//! fill a cubic subvolume of the full grid with random numbers
+	double fill_subvolume( int *i0, int *n )
+	{
+		unsigned i0cube[3], ncube[3];
+		
+		i0cube[0] = (int)((double)i0[0]/cubesize_);
+		i0cube[1] = (int)((double)i0[1]/cubesize_);
+		i0cube[2] = (int)((double)i0[2]/cubesize_);
+		
+		ncube[0] = (int)((double)(i0[0]+n[0])/cubesize_+1.0)-i0cube[0];
+		ncube[1] = (int)((double)(i0[1]+n[1])/cubesize_+1.0)-i0cube[1];
+		ncube[2] = (int)((double)(i0[2]+n[2])/cubesize_+1.0)-i0cube[2];
+		
+		double mean = 0.0;
+		
+		#pragma omp parallel for reduction(+:mean)
+		for( int i=i0cube[0]; i<(int)ncube[0]; ++i )
+			for( int j=i0cube[1]; j<(int)ncube[1]; ++j )
+				for( int k=i0cube[2]; k<(int)ncube[2]; ++k )
+				{
+					int ii(i),jj(j),kk(k);
+					
+					ii = (ii+ncubes_)%ncubes_;
+					jj = (jj+ncubes_)%ncubes_;
+					kk = (kk+ncubes_)%ncubes_;
+					
+					mean += fill_cube(ii, jj, kk);
+				}
+		return mean/(ncube[0]*ncube[1]*ncube[2]);
+	}
+	
+	//! fill an entire grid with random numbers
+	double fill_all( void )
+	{
+		double sum = 0.0;
+		
+		#pragma omp parallel for reduction(+:sum)
+		for( int i=0; i<(int)ncubes_; ++i )
+			for( int j=0; j<(int)ncubes_; ++j )
+				for( int k=0; k<(int)ncubes_; ++k )
+				{
+					int ii(i),jj(j),kk(k);
+					
+					ii = (ii+ncubes_)%ncubes_;
+					jj = (jj+ncubes_)%ncubes_;
+					kk = (kk+ncubes_)%ncubes_;
+					
+					sum+=fill_cube(ii, jj, kk);
+				}
+		
+		return sum/(ncubes_*ncubes_*ncubes_);
+	}
+
+public:
+	template< class C >
+	double fill_all( C& dat )
+	{
+		double sum = 0.0;
+		
+		#pragma omp parallel for reduction(+:sum)
+		for( int i=0; i<(int)ncubes_; ++i )
+			for( int j=0; j<(int)ncubes_; ++j )
+				for( int k=0; k<(int)ncubes_; ++k )
+				{
+					int ii(i),jj(j),kk(k);
+					
+					ii = (ii+ncubes_)%ncubes_;
+					jj = (jj+ncubes_)%ncubes_;
+					kk = (kk+ncubes_)%ncubes_;
+					
+					sum+=fill_cube(ii, jj, kk);
+					copy_cube(ii,jj,kk,dat);
+					free_cube(ii, jj, kk);
+				}
+		
+		return sum/(ncubes_*ncubes_*ncubes_);
+	}
+	
+	
+public:
+	
+	//! constructor
+	random_numbers( unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx )
+	: res_( res ), cubesize_( cubesize ), ncubes_( 1 ), baseseed_( baseseed )
+	{
+		initialize();
+		fill_subvolume( x0, lx );
+	}
+	
+	//! constructor
+	random_numbers( unsigned res, unsigned cubesize, long baseseed, bool zeromean=true )
+	: res_( res ), cubesize_( cubesize ), ncubes_( 1 ), baseseed_( baseseed )
+	{
+		double mean = 0.0;
+		initialize();
+		mean = fill_all();
+		
+		if( zeromean )
+		{
+			for(unsigned i=0; i<res_; ++i )
+				for( unsigned j=0; j<res_; ++j )
+					for( unsigned k=0; k<res_; ++k )
+						(*this)(i,j,k) -= mean;
+		}
+	}
+	
+	//! constructor
+	random_numbers( unsigned res, std::string randfname )
+	: res_( res ), cubesize_( res ), ncubes_(1)
+	{
+		rnums_.push_back( new Meshvar<T>( res, 0, 0, 0 ) );
+		
+		std::ifstream ifs(randfname.c_str(), std::ios::binary);
+		if( !ifs )
+			throw std::runtime_error(std::string("Could not open random number file \'")+randfname+std::string("\'!"));
+		
+		unsigned vartype;
+		unsigned nx,ny,nz,blksz;
+		int iseed;
+		long seed;
+		//ifs.read( (char*)&vartype, sizeof(unsigned) );
+		
+		//... read header .../
+		ifs.read( reinterpret_cast<char*> (&blksz), sizeof(int) );
+		
+		if( blksz != 4*sizeof(int) )
+		   throw std::runtime_error("corrupt random number file");
+		   
+		ifs.read( reinterpret_cast<char*> (&nx), sizeof(unsigned) );
+		ifs.read( reinterpret_cast<char*> (&ny), sizeof(unsigned) );
+		ifs.read( reinterpret_cast<char*> (&nz), sizeof(unsigned) );
+		ifs.read( reinterpret_cast<char*> (&iseed), sizeof(int) );
+		seed = (long)iseed;
+		
+		ifs.read( reinterpret_cast<char*> (&blksz), sizeof(int) );
+		
+		//... read data ...//
+		
+		ifs.read( reinterpret_cast<char*> (&blksz), sizeof(int) );
+		if( blksz == nx*ny*sizeof(float) )
+			vartype = 4;
+		else if( blksz == nx*ny*sizeof(double) )
+			vartype = 8;
+		else
+			throw std::runtime_error("corrupt random number file");
+		
+			
+		std::vector<float> in_float;
+		std::vector<double> in_double;
+		
+		std::cout << " - Random number file \'" << randfname << "\'\n"
+				  << "   contains " << nx*ny*nz << " numbers. Reading..." << std::endl;
+		
+		double sum = 0.0, sum2 = 0.0;
+		unsigned count = 0;
+		
+		if( vartype == 4 )
+		{
+			for( int ii=0; ii<(int)nz; ++ii )
+			{
+				in_float.assign(nx*ny,0.0f);
+				ifs.read( (char*)&in_float[0], nx*ny*sizeof(float) );
+				
+				for( int jj=0,q=0; jj<(int)ny; ++jj )
+					for( int kk=0; kk<(int)nx; ++kk ){
+						sum += in_float[q];
+						sum2 += in_float[q]*in_float[q];
+						++count;
+						
+#ifdef RAND_DEBUG
+						(*rnums_[0])(ii,jj,kk) = 0.0;
+#else
+						(*rnums_[0])(ii,jj,kk) = in_float[q++];
+#endif
+						
+					}
+				
+			}
+		}
+		else if( vartype == 8 )
+		{
+			for( int ii=0; ii<(int)nz; ++ii )
+			{
+				in_double.assign(nx*ny,0.0f);				
+				ifs.read( (char*)&in_double[0], nx*ny*sizeof(double) );
+				
+				for( int jj=0,q=0; jj<(int)ny; ++jj )
+					for( int kk=0; kk<(int)nx; ++kk )
+					{
+						sum += in_double[q];
+						sum2 += in_double[q]*in_double[q];
+						++count;
+#ifdef RAND_DEBUG						
+						(*rnums_[0])(ii,jj,kk) = 0.0;
+#else
+						(*rnums_[0])(ii,jj,kk) = in_double[q++];
+#endif
+						
+					}
+			}
+		}
+		
+		double mean, var;
+		mean = sum/count;
+		var = sum2/count-mean*mean;
+#ifdef RAND_DEBUG
+		(*rnums_[0])(cubesize_/2-1,cubesize_/2-1,cubesize_/2-1) = 1.0;
+#endif
+		
+		std::cout << " - Random numbers in file have \n"
+				  << "     mean = " << mean << " and var = " << var << std::endl;
+		
+	}
+	
+	//... copy construct by averaging down
+	explicit random_numbers( /*const*/ random_numbers <T>& rc )
+	{
+		//if( res > rc.m_res || (res/rc.m_res)%2 != 0 )
+		//			throw std::runtime_error("Invalid restriction in random number container copy constructor.");
+		
+		double sum = 0.0, sum2 = 0.0;
+		unsigned count = 0;
+		
+		//... initialize properties of container
+		if( rc.rnums_.size() == 1 )
+		{
+			res_		= rc.res_/2;
+			cubesize_	= res_;
+			ncubes_		= 1;
+			baseseed_	= -2;
+			
+			//... use restriction to get consistent random numbers on coarser grid
+			mg_straight gop;
+			rnums_.push_back( new Meshvar<T>( res_, 0, 0, 0 ) );		
+			gop.restrict( *rc.rnums_[0], *rnums_[0] );
+			
+			
+			for( unsigned i=0; i< rnums_[0]->size(0); ++i )
+				for( unsigned j=0; j< rnums_[0]->size(1); ++j )
+					for( unsigned k=0; k< rnums_[0]->size(2); ++k )
+					{
+						(*rnums_[0])(i,j,k) *= sqrt(8); //.. maintain that var(delta)=1
+						sum += (*rnums_[0])(i,j,k);
+						sum2+= (*rnums_[0])(i,j,k) * (*rnums_[0])(i,j,k);
+						++count;
+					}
+		}
+		else 
+		{
+			res_		= rc.res_/2;
+			cubesize_	= res_;
+			ncubes_		= 1;
+			baseseed_	= -2;
+			
+			rnums_.push_back( new Meshvar<T>( res_, 0, 0, 0 ) );
+			double fac = 1.0/sqrt(8);
+			
+			for( unsigned i=0,ii=0; i<rc.res_; i+=2,++ii )
+				for( unsigned j=0,jj=0; j<rc.res_; j+=2,++jj )
+					for( unsigned k=0,kk=0; k<rc.res_; k+=2,++kk )
+					{
+						(*rnums_[0])(ii,jj,kk) = fac * 
+						( rc(i,j,k)+rc(i+1,j,k)+rc(i,j+1,k)+rc(i,j,k+1)+
+						 rc(i+1,j+1,k)+rc(i+1,j,k+1)+rc(i,j+1,k+1)+rc(i+1,j+1,k+1));
+						
+						sum += (*rnums_[0])(ii,jj,kk);
+						sum2+= (*rnums_[0])(ii,jj,kk) * (*rnums_[0])(ii,jj,kk);
+						++count;
+					}
+			
+		}
+		
+		
+		
+		double rmean, rvar;
+		rmean = sum/count;
+		rvar = sum2/count-rmean*rmean;
+		
+		std::cout << " - Restricted random numbers have mean = " << rmean << " and var = " << rvar << std::endl;
+	}
+	
+	
+	~random_numbers()
+	{
+		for( unsigned i=0; i<rnums_.size(); ++i )
+			if( rnums_[i] != NULL )
+				delete rnums_[i];
+		rnums_.clear();
+	}
+	
+	T& operator()( int i, int j, int k )
+	{
+		int ic, jc, kc, is, js, ks;
+		
+		if( ncubes_ == 0 )
+			throw std::runtime_error("random_numbers: internal error, not properly initialized");
+		
+		//... determine cube
+		ic = (int)((double)i/cubesize_ + ncubes_) % ncubes_;
+		jc = (int)((double)j/cubesize_ + ncubes_) % ncubes_;
+		kc = (int)((double)k/cubesize_ + ncubes_) % ncubes_;
+		
+		long icube = (ic*ncubes_+jc)*ncubes_+kc;
+		if( rnums_[ icube ] == NULL )
+		{	
+			rnums_[ icube ] = new Meshvar<T>( cubesize_, 0, 0, 0 );
+			fill_cube(ic, jc, kc);
+		}
+		
+		//... determine cell in cube
+		is = (i - ic * cubesize_ + cubesize_) % cubesize_;
+		js = (j - jc * cubesize_ + cubesize_) % cubesize_;
+		ks = (k - kc * cubesize_ + cubesize_) % cubesize_;
+		
+		return (*rnums_[ icube ])(is,js,ks);
+	}
+	
+	
+};
+
+
+#endif //__RANDOM_HH
+
diff --git a/schemes.hh b/schemes.hh
new file mode 100644
index 0000000..900118a
--- /dev/null
+++ b/schemes.hh
@@ -0,0 +1,275 @@
+/*
+ *  schemes.hh
+ *  GravitySolver
+ *
+ *  Created by Oliver Hahn on 2/1/10.
+ *  Copyright 2010 KIPAC/Stanford University. All rights reserved.
+ *
+ */
+
+#ifndef __SCHEME_HH
+#define __SCHEME_HH
+
+#include <vector>
+#include <stdexcept>
+
+#include "solver.hh"
+
+//... abstract implementation of the Poisson/Force scheme
+template< class L, class G, typename real_t=double >
+class scheme
+{
+public:
+	typedef L laplacian;
+	typedef G gradient;
+	
+	laplacian m_laplacian;
+	gradient m_gradient;
+	
+	template< class C >
+	inline real_t grad_x( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_x( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t grad_y( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_y( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t grad_z( const C&c, const int i, const int j, const int k )
+	{ return m_gradient.apply_z( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t L_apply( const C&c, const int i, const int j, const int k ) 
+	{ return m_laplacian.apply( c,i,j,k ); }
+	
+	template< class C >
+	inline real_t L_rhs( const C&c, const int i, const int j, const int k ) 
+	{ return m_laplacian.rhs( c,i,j,k ); }
+	
+	inline real_t ccoeff( void )
+	{ return m_laplacian.ccoeff(); }
+	
+};
+
+
+template< int nextent, typename T >
+class gradient
+{
+	typedef T real_t;
+	std::vector<real_t> m_stencil;
+	const unsigned nl;
+public:
+	
+	gradient()
+	: nl( 2*nextent+1 )
+	{ 
+		m_stencil.assign(nl*nl*nl,(real_t)0.0);
+	}
+	
+	real_t& operator()(int i)
+	{ return m_stencil[i+nextent]; }
+	
+	const real_t& operator()(int i) const
+	{ return m_stencil[i+nextent]; }
+	
+	template< class C >
+	inline void apply( const C& c, C& f, int dir )
+	{
+		f = c;
+		
+		int nx=c.size(0), ny=c.size(1), nz=c.size(2);		
+		double hx = 1.0/(nx+1.0), hy = 1.0/(ny+1.0), hz = 1.0/(nz+1.0);
+		
+		f.zero();
+		
+		if( dir == 0 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int ii = -nextent; ii<=nextent; ++ii )
+							f(i,j,k) += (*this)(ii) * c(i+ii,j,k)/hx;
+		else if( dir == 1 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int jj = -nextent; jj<=nextent; ++jj )
+							f(i,j,k) += (*this)(jj) * c(i,j+jj,k)/hy;
+		else if( dir == 2 )
+			for( int i=0; i<nx; ++i )
+				for( int j=0; j<ny; ++j )
+					for( int k=0; k<nz; ++k )
+						for( int kk = -nextent; kk<=nextent; ++kk )
+							f(i,j,k) += (*this)(kk) * c(i,j,k+kk)/hz;
+		
+	}
+};
+
+template< int nextent, typename real_t >
+class base_stencil
+{
+protected:
+	std::vector<real_t> m_stencil;
+	const unsigned nl;
+public:
+	bool m_modsource;
+	
+public:
+	base_stencil( bool amodsource = false )
+	: nl( 2*nextent+1 ), m_modsource( amodsource )
+	{
+		m_stencil.assign(nl*nl*nl,(real_t)0.0);
+	}
+	
+	real_t& operator()(int i, int j, int k)
+	{ return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; }
+	
+	const real_t& operator()(unsigned i, unsigned j, unsigned k) const
+	{ return m_stencil[((i+nextent)*nl+(j+nextent))*nl+(k+nextent)]; }
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		real_t sum = this->apply( c, i, j, k );
+		sum -= (*this)(0,0,0) * c(i,j,k);
+		return sum;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return (*this)(0,0,0);
+	}
+	
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		real_t sum = 0.0;
+		
+		for( int ii=-nextent; ii<=nextent; ++ii )
+			for( int jj=-nextent; jj<=nextent; ++jj )
+				for( int kk=-nextent; kk<=nextent; ++kk )
+					sum += (*this)(ii,jj,kk) * c(i+ii,j+jj,k+kk);
+		
+		return sum;
+	}
+	
+	template< class C >
+	inline real_t modsource( const C& c, const int i, const int j, const int k )
+	{
+		return 0.0;
+	}
+	
+};
+
+
+/***************************************************************************************/
+/***************************************************************************************/
+/***************************************************************************************/
+
+
+//... Implementation of the Gradient schemes............................................
+
+
+template< typename real_t >
+class deriv_2P : public gradient<1,real_t>
+{
+	
+public:
+	deriv_2P( void )
+	{
+		(*this)( 0 ) =  0.0;
+		(*this)(-1 ) = -0.5;
+		(*this)(+1 ) = +0.5;		
+	}
+	
+	
+};
+
+//... Implementation of the Laplacian schemes..........................................
+
+
+template< typename real_t >
+class stencil_7P : public base_stencil<1,real_t>
+{
+	
+public:
+	stencil_7P( void )
+	{
+		(*this)( 0, 0, 0) = -6.0;
+		(*this)(-1, 0, 0) = +1.0;
+		(*this)(+1, 0, 0) = +1.0;
+		(*this)( 0,-1, 0) = +1.0;
+		(*this)( 0,+1, 0) = +1.0;
+		(*this)( 0, 0,-1) = +1.0;
+		(*this)( 0, 0,+1) = +1.0;
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k ) const
+	{
+		return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k);
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k ) const
+	{
+		return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1);
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -6.0;
+	}
+};
+
+
+template< typename real_t >
+class stencil_13P : public base_stencil<2,real_t>
+{
+	
+public:
+	stencil_13P( void )
+	{
+		(*this)( 0, 0, 0) = -90.0/12.;
+		
+		(*this)(-1, 0, 0) = 
+		(*this)(+1, 0, 0) = 
+		(*this)( 0,-1, 0) = 
+		(*this)( 0,+1, 0) = 
+		(*this)( 0, 0,-1) = 
+		(*this)( 0, 0,+1) = 16./12.;
+		
+		(*this)(-2, 0, 0) = 
+		(*this)(+2, 0, 0) = 
+		(*this)( 0,-2, 0) = 
+		(*this)( 0,+2, 0) = 
+		(*this)( 0, 0,-2) = 
+		(*this)( 0, 0,+2) = -1./12.;
+	}
+	
+	template< class C >
+	inline real_t apply( const C& c, const int i, const int j, const int k )
+	{
+		return 
+			(-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+			 +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1))
+			 -90.0*c(i,j,k))/12.0;
+	}
+	
+	template< class C >
+	inline real_t rhs( const C& c, const int i, const int j, const int k )
+	{
+		return 
+			(-1.0*(c(i-2,j,k)+c(i+2,j,k)+c(i,j-2,k)+c(i,j+2,k)+c(i,j,k-2)+c(i,j,k+2))
+			 +16.0*(c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)))/12.0;
+	}
+	
+	inline real_t ccoeff( void )
+	{
+		return -90.0/12.0;
+	}
+};
+
+#endif
+
+
diff --git a/solver.hh b/solver.hh
new file mode 100644
index 0000000..148eb55
--- /dev/null
+++ b/solver.hh
@@ -0,0 +1,1110 @@
+/*
+ *  solver.h
+ *  GravitySolver
+ *
+ *  Created by Oliver Hahn on 1/20/10.
+ *  Copyright 2010 KIPAC/Stanford University. All rights reserved.
+ *
+ */
+
+#ifndef __SOLVER_HH
+#define __SOLVER_HH
+
+#include <cmath>
+#include <iostream>
+#include "mesh.hh"
+
+#define BEGIN_MULTIGRID_NAMESPACE namespace multigrid {
+#define END_MULTIGRID_NAMESPACE }
+
+BEGIN_MULTIGRID_NAMESPACE
+	
+namespace opt {
+	enum smtype { sm_jacobi, sm_gauss_seidel, sm_sor };
+}
+
+template< class S, class O, typename T=double >
+class solver
+{
+public:
+	typedef S scheme;
+	typedef O mgop;
+
+protected:
+	scheme				m_scheme;
+	mgop				m_gridop;
+	unsigned			m_npresmooth, m_npostsmooth;
+	opt::smtype			m_smoother;
+	unsigned			m_ilevelmin;
+	
+	const static bool	m_bperiodic = true;
+
+	GridHierarchy<T>	*m_pu, *m_pf, *m_pfsave;	
+	GridHierarchy<bool> *m_pmask;
+	const MeshvarBnd<T> *m_pubnd;
+	
+	double compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<T>& unew );
+	
+	double compute_error( const GridHierarchy<T>& uh, const GridHierarchy<T>& uhnew, bool verbose );
+
+protected:
+	
+	void Jacobi( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f );
+	
+	void GaussSeidel( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f );
+	
+	void SOR( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f );
+	
+	void twoGrid( unsigned ilevel );
+	
+	void interp_coarse_fine( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf=true );
+	
+	void setBC( unsigned ilevel );
+	
+	void make_periodic( MeshvarBnd<T> *u );
+	
+	void interp_cubic( MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, int itop, int jtop, int ktop, int i, int j, int k );
+	void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf );
+	
+public:
+	solver( GridHierarchy<T>& f, //const MeshvarBnd<T>& uBC_top, 
+				   opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth );
+	
+	~solver()
+	{ delete m_pmask; }
+	
+	double solve( GridHierarchy<T>& u, double accuracy, double h=-1.0, bool verbose=false );
+	
+	double solve( GridHierarchy<T>& u, double accuracy, bool verbose=false )
+	{
+		return this->solve ( u, accuracy, -1.0, verbose );
+	}
+	
+	
+	
+};
+
+
+template< class S, class O, typename T >
+solver<S,O,T>::solver( GridHierarchy<T>& f, //const MeshvarBnd<T>& ubnd, 
+					opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth )
+:	m_scheme(), m_gridop(), m_npresmooth( npresmooth ), m_npostsmooth( npostsmooth ), 
+m_smoother( smoother ), m_ilevelmin( f.levelmin() ), m_pf( &f )//, m_pubnd( &ubnd )
+{ 
+	//... initialize the refinement mask
+	m_pmask = new GridHierarchy<bool>( f.m_nbnd );
+	m_pmask->create_base_hierarchy(f.levelmin());
+			
+	for( unsigned ilevel=f.levelmin()+1; ilevel<=f.levelmax(); ++ilevel )
+	{
+		meshvar_bnd* pf = f.get_grid(ilevel);
+		m_pmask->add_patch( pf->offset(0), pf->offset(1), pf->offset(2), pf->size(0), pf->size(1), pf->size(2) );
+	}
+	
+	m_pmask->zero();
+	
+	for( unsigned ilevel=0; ilevel<f.levelmin(); ++ilevel )
+	{
+		MeshvarBnd<T> *pf = f.get_grid(ilevel);
+		for( int ix=0; ix < (int)pf->size(0); ++ix )
+			for( int iy=0; iy < (int)pf->size(1); ++iy )
+				for( int iz=0; iz < (int)pf->size(2); ++iz )
+					(*m_pmask->get_grid(ilevel))(ix,iy,iz) = true;
+	}
+	
+	for( unsigned ilevel=m_ilevelmin; ilevel<f.levelmax(); ++ilevel )
+	{
+		MeshvarBnd<T>* pf = f.get_grid(ilevel+1);//, *pfc = f.get_grid(ilevel);
+		
+		for( int ix=pf->offset(0); ix < (int)(pf->offset(0)+pf->size(0)/2); ++ix )
+			for( int iy=pf->offset(1); iy < (int)(pf->offset(1)+pf->size(1)/2); ++iy )
+				for( int iz=pf->offset(2); iz < (int)(pf->offset(2)+pf->size(2)/2); ++iz )
+					(*m_pmask->get_grid(ilevel))(ix,iy,iz) = true;
+	}
+		
+}
+
+
+template< class S, class O, typename T >
+void solver<S,O,T>::Jacobi( T h, MeshvarBnd<T> *u, const MeshvarBnd<T>* f )
+{
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+	
+	double 
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+	
+	MeshvarBnd<T> uold(*u);
+	
+	double alpha = 0.95, ialpha = 1.0-alpha;
+	
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				(*u)(ix,iy,iz) = ialpha * uold(ix,iy,iz) + alpha * (m_scheme.rhs( uold, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+	
+	
+	
+}
+
+template< class S, class O, typename T >
+void solver<S,O,T>::SOR( T h, MeshvarBnd<T> *u, const MeshvarBnd<T>* f )
+{
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+
+	double 
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+		
+	MeshvarBnd<T> uold(*u);
+	
+	double 
+		alpha = 1.2, 
+	//alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0,
+		ialpha = 1.0-alpha;
+	
+	//std::cerr << "omega_opt = " << alpha << std::endl;
+	
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				if( (ix+iy+iz)%2==0 )
+					(*u)(ix,iy,iz) = ialpha * uold(ix,iy,iz) + alpha * (m_scheme.rhs( uold, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+	
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				if( (ix+iy+iz)%2!=0 )
+					(*u)(ix,iy,iz) = ialpha * uold(ix,iy,iz) + alpha * (m_scheme.rhs( *u, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+	
+	
+}
+
+template< class S, class O, typename T >
+void solver<S,O,T>::GaussSeidel( T h, MeshvarBnd<T>* u, const MeshvarBnd<T>* f )
+{
+	int 
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+	
+	T
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+	
+	for( int color=0; color < 2; ++color )
+		#pragma omp parallel for
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					if( (ix+iy+iz)%2 == color )
+						(*u)(ix,iy,iz) = (m_scheme.rhs( *u, ix, iy, iz ) + h2 * (*f)(ix,iy,iz))*c0;
+	
+}
+
+
+template< class S, class O, typename T >
+void solver<S,O,T>::twoGrid( unsigned ilevel )
+{
+	MeshvarBnd<T> *uf, *uc, *ff, *fc;
+	
+	T 
+		h = 1.0/(pow(2.0,ilevel)),
+		c0 = -1.0/m_scheme.ccoeff(),
+		h2 = h*h; 
+	
+	uf = m_pu->get_grid(ilevel);
+	ff = m_pf->get_grid(ilevel);	
+	
+	uc = m_pu->get_grid(ilevel-1);
+	fc = m_pf->get_grid(ilevel-1);	
+	
+	int 
+		nx = uf->size(0), 
+		ny = uf->size(1), 
+		nz = uf->size(2);
+	
+	if( m_bperiodic && ilevel <= m_ilevelmin)
+		make_periodic( uf );
+	else if(!m_bperiodic)
+		setBC( ilevel );
+	
+	//... do smoothing sweeps with specified solver
+	for( unsigned i=0; i<m_npresmooth; ++i ){
+		
+		if( ilevel > m_ilevelmin )
+			interp_coarse_fine(ilevel, *uc, *uf );
+		
+		if( m_smoother == opt::sm_gauss_seidel )
+			GaussSeidel( h, uf, ff );
+			
+		else if( m_smoother == opt::sm_jacobi )
+			Jacobi( h, uf, ff);		
+			
+		else if( m_smoother == opt::sm_sor )
+			SOR( h, uf, ff );
+		
+		if( m_bperiodic && ilevel <= m_ilevelmin )
+			make_periodic( uf );
+	}
+			
+	
+	m_gridop.restrict( *uf, *uc );
+	
+	//... essential!!
+	if( m_bperiodic && ilevel <= m_ilevelmin )
+		make_periodic( uc );
+	else if( m_bperiodic )
+		interp_coarse_fine(ilevel,*uc,*uf);
+	
+	meshvar_bnd Lu(*uf,false);
+	Lu.zero();
+	#pragma omp parallel for
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				Lu(ix,iy,iz) = m_scheme.apply( (*uf), ix, iy, iz )/h2;
+	
+	meshvar_bnd tLu(*uc,false);
+	
+	//... restrict Lu
+	m_gridop.restrict( Lu, tLu );
+	Lu.deallocate();
+	
+	//... restrict source term
+	m_gridop.restrict( *ff, *fc );
+	
+	//... compute RHS tau-correction
+	#pragma omp parallel for schedule(dynamic)
+	for( int ix=0; ix<(int)uc->size(0); ++ix )
+		for( int iy=0; iy<(int)uc->size(1); ++iy )
+			for( int iz=0; iz<(int)uc->size(2); ++iz )
+				if( (*m_pmask->get_grid(ilevel-1))(ix,iy,iz) == true )
+					(*fc)(ix,iy,iz) += ((tLu( ix, iy, iz ) - (m_scheme.apply( *uc, ix, iy, iz )/(4.0*h2))));
+				
+					
+	tLu.deallocate();
+	
+	meshvar_bnd ucsave(*uc,true);
+						
+	//... have we reached the end of the recursion or do we need to go up one level?
+	if( ilevel == 1 )
+		if( m_bperiodic )
+			(*uc)(0,0,0) = 0.0;
+		else 
+			(*uc)(0,0,0) = (m_scheme.rhs( (*uc), 0, 0, 0 ) + 4.0 * h2 * (*fc)(0,0,0))*c0;
+	else
+		twoGrid( ilevel-1 );
+	
+	meshvar_bnd cc(*uc,false);
+	
+	//... compute correction on coarse grid
+	#pragma omp parallel for
+	for( int ix=0; ix<(int)cc.size(0); ++ix )
+		for( int iy=0; iy<(int)cc.size(1); ++iy )
+			for( int iz=0; iz<(int)cc.size(2); ++iz )
+				cc(ix,iy,iz) = (*uc)(ix,iy,iz) - ucsave(ix,iy,iz);
+		
+	ucsave.deallocate();
+
+
+	//... prolongate correction to fine grid	
+	meshvar_bnd cf(*uf,false);
+	m_gridop.prolong( cc, cf );
+	
+	cc.deallocate();
+	
+	
+	#pragma omp parallel for 
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				(*uf)(ix,iy,iz) += cf(ix,iy,iz);
+	
+
+	cf.deallocate();
+				
+	//... interpolate and apply coarse-fine boundary conditions on fine level
+	if( m_bperiodic && ilevel <= m_ilevelmin )
+		make_periodic( uf );
+	else if(!m_bperiodic)
+		setBC( ilevel );
+	
+	//if( ilevel > m_ilevelmin )
+	//	interp_coarse_fine(ilevel, *uc, *uf );
+
+	//... do smoothing sweeps with specified solver
+	for( unsigned i=0; i<m_npostsmooth; ++i ){
+		
+		if( ilevel > m_ilevelmin )
+			interp_coarse_fine(ilevel, *uc, *uf );
+
+		if( m_smoother == opt::sm_gauss_seidel )
+			GaussSeidel( h, uf, ff );
+		
+		else if( m_smoother == opt::sm_jacobi )
+			Jacobi( h, uf, ff);		
+		
+		else if( m_smoother == opt::sm_sor )
+			SOR( h, uf, ff );
+		
+		if( m_bperiodic && ilevel <= m_ilevelmin )
+			make_periodic( uf );
+
+	}
+}
+
+template< class S, class O, typename T >
+double solver<S,O,T>::compute_error( const MeshvarBnd<T>& u, const MeshvarBnd<T>& unew )
+{
+	int 
+		nx = u.size(0), 
+		ny = u.size(1), 
+		nz = u.size(2);
+	
+	double err = 0.0;
+	unsigned count = 0;
+	
+#pragma omp parallel for reduction(+:err,count)
+	for( int ix=0; ix<nx; ++ix )
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+				if( fabs(unew(ix,iy,iz)) > 0.0 )//&& u(ix,iy,iz) != unew(ix,iy,iz) )
+				{
+					err += fabs(1.0 - u(ix,iy,iz)/unew(ix,iy,iz));
+					++count;
+				}
+	
+	if( count != 0 )
+		err /= count;
+	
+	return err;
+}
+
+template< class S, class O, typename T >
+double solver<S,O,T>::compute_error( const GridHierarchy<T>& uh, const GridHierarchy<T>& uhnew, bool verbose )
+{
+	double maxerr = 0.0;
+	
+	for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel )
+	{
+		double err = 0.0;
+		err = compute_error( *uh.get_grid(ilevel), *uhnew.get_grid(ilevel) );
+		
+		if( verbose )
+			std::cout << "    Level " << std::setw(6) << ilevel << ",   Error = " << err << std::endl;
+		maxerr = std::max(maxerr,err);
+		
+	}
+	return maxerr;
+}
+
+template< class S, class O, typename T >
+double solver<S,O,T>::solve( GridHierarchy<T>& uh, double acc, double h, bool verbose )
+{
+
+	double err;
+	
+	GridHierarchy<T> uhnew(uh);//, fsave(*m_pf);
+	m_pu = &uh;
+	
+    unsigned niter = 0;
+	
+	//... iterate ...//
+	while (true)
+	{
+		
+		
+		twoGrid( uh.levelmax() );
+		err = compute_error( *m_pu, uhnew, verbose );
+		++niter;
+		
+		if( verbose ){
+			std::cout << "--> Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl;
+			std::cout << "-------------------------------------------------------------\n";
+		}
+			
+		if( (niter > 1) && ((err < acc) || (niter > 20)) )
+			break;
+		
+		uhnew = *m_pu;
+		//*m_pf = fsave;
+	}		
+	
+	if( err > acc )
+		std::cout << "Error : no convergence in Poisson solver" << std::endl;
+	else if( verbose )
+		std::cout << " - Converged in " << niter << " steps to req. acc. of " << acc << std::endl;
+
+	
+	//uh = uhnew;
+	//*m_pf = fsave;
+	return err;
+}
+
+inline double interp2( double x1, double x2, double x3, double f1, double f2, double f3, double x )
+{
+	double a,b,c;	
+	a = (x1 * f3 - x3 * f1 - x2 * f3 - x1 * f2 + x2 * f1 + x3 * f2) / (x1 * x3 * x3 - x2 * x3 * x3 + x2 * x1 * x1 - x3 * x1 * x1 + x3 * x2 * x2 - x1 * x2 * x2);
+	b = -(x1 * x1 * f3 - x1 * x1 * f2 - f1 * x3 * x3 + f2 * x3 * x3 - x2 * x2 * f3 + f1 * x2 * x2) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3);
+	c = (x1 * x1 * x2 * f3 - x1 * x1 * x3 * f2 - x2 * x2 * x1 * f3 + f2 * x1 * x3 * x3 + x2 * x2 * x3 * f1 - f1 * x2 * x3 * x3) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3);
+	
+	return a*x*x+b*x+c;
+}
+
+inline double interp2( double fleft, double fcenter, double fright, double x )
+{
+	double a,b,c;
+	a = 0.5*(fleft+fright)-fcenter;
+	b = 0.5*(fright-fleft);
+	c = fcenter;
+	
+	return a*x*x+b*x+c;
+}
+
+
+inline double interp2left( double fleft, double fcenter, double fright )
+{
+	double a,b,c;
+	a = (6.0*fright-10.0*fcenter+4.0*fleft)/15.0;
+	b = (-4.0*fleft+9.0*fright-5.0*fcenter)/15.0;
+	c = fcenter;
+	
+	return a-b+c;
+}
+
+inline double interp2right( double fleft, double fcenter, double fright )
+{
+	double a,b,c;
+	a = (6.0*fleft-10.0*fcenter+4.0*fright)/15.0;
+	b = (4.0*fright-9.0*fleft+5.0*fcenter)/15.0;
+	c = fcenter;
+	
+	return a+b+c;
+}
+
+template< class S, class O, typename T >
+void solver<S,O,T>::interp_cubic( MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, int i, int j, int k, int itop, int jtop, int ktop )
+{
+	MeshvarBnd<T> &u    = fine;
+	MeshvarBnd<T> &utop = coarse;
+	
+	/*
+	u(i+0,j+0,k+0) = ( -125.*utop(itop-2,jtop-2,ktop-2) +875.*utop(itop-2,jtop-2,ktop-1) +2625.*utop(itop-2,jtop-2,ktop) 
+					  -175.*utop(itop-2,jtop-2,ktop+1) +875.*utop(itop-2,jtop-1,ktop-2) -6125.*utop(itop-2,jtop-1,ktop-1) 
+					  -18375.*utop(itop-2,jtop-1,ktop) +1225.*utop(itop-2,jtop-1,ktop+1) +2625.*utop(itop-2,jtop,ktop-2) 
+					  -18375.*utop(itop-2,jtop,ktop-1) -55125.*utop(itop-2,jtop,ktop) +3675.*utop(itop-2,jtop,ktop+1) 
+					  -175.*utop(itop-2,jtop+1,ktop-2) +1225.*utop(itop-2,jtop+1,ktop-1) +3675.*utop(itop-2,jtop+1,ktop) 
+					  -245.*utop(itop-2,jtop+1,ktop+1) +875.*utop(itop-1,jtop-2,ktop-2) -6125.*utop(itop-1,jtop-2,ktop-1) 
+					  -18375.*utop(itop-1,jtop-2,ktop) +1225.*utop(itop-1,jtop-2,ktop+1) -6125.*utop(itop-1,jtop-1,ktop-2) 
+					  +42875.*utop(itop-1,jtop-1,ktop-1) +128625.*utop(itop-1,jtop-1,ktop) -8575.*utop(itop-1,jtop-1,ktop+1) 
+					  -18375.*utop(itop-1,jtop,ktop-2) +128625.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) 
+					  -25725.*utop(itop-1,jtop,ktop+1) +1225.*utop(itop-1,jtop+1,ktop-2) -8575.*utop(itop-1,jtop+1,ktop-1) 
+					  -25725.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) +2625.*utop(itop,jtop-2,ktop-2) 
+					  -18375.*utop(itop,jtop-2,ktop-1) -55125.*utop(itop,jtop-2,ktop) +3675.*utop(itop,jtop-2,ktop+1) 
+					  -18375.*utop(itop,jtop-1,ktop-2) +128625.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) 
+					  -25725.*utop(itop,jtop-1,ktop+1) -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) 
+					  +1157625.*utop(itop,jtop,ktop) -77175.*utop(itop,jtop,ktop+1) +3675.*utop(itop,jtop+1,ktop-2) 
+					  -25725.*utop(itop,jtop+1,ktop-1) -77175.*utop(itop,jtop+1,ktop) +5145.*utop(itop,jtop+1,ktop+1) 
+					  -175.*utop(itop+1,jtop-2,ktop-2) +1225.*utop(itop+1,jtop-2,ktop-1) +3675.*utop(itop+1,jtop-2,ktop) 
+					  -245.*utop(itop+1,jtop-2,ktop+1) +1225.*utop(itop+1,jtop-1,ktop-2) -8575.*utop(itop+1,jtop-1,ktop-1) 
+					  -25725.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) +3675.*utop(itop+1,jtop,ktop-2) 
+					  -25725.*utop(itop+1,jtop,ktop-1) -77175.*utop(itop+1,jtop,ktop) +5145.*utop(itop+1,jtop,ktop+1) 
+					  -245.*utop(itop+1,jtop+1,ktop-2) +1715.*utop(itop+1,jtop+1,ktop-1) +5145.*utop(itop+1,jtop+1,ktop) 
+					  -343.*utop(itop+1,jtop+1,ktop+1) )/2097152.;
+	u(i+0,j+0,k+1) = ( -175.*utop(itop-2,jtop-2,ktop-1) +2625.*utop(itop-2,jtop-2,ktop) +875.*utop(itop-2,jtop-2,ktop+1) 
+					  -125.*utop(itop-2,jtop-2,ktop+2) +1225.*utop(itop-2,jtop-1,ktop-1) -18375.*utop(itop-2,jtop-1,ktop) 
+					  -6125.*utop(itop-2,jtop-1,ktop+1) +875.*utop(itop-2,jtop-1,ktop+2) +3675.*utop(itop-2,jtop,ktop-1) 
+					  -55125.*utop(itop-2,jtop,ktop) -18375.*utop(itop-2,jtop,ktop+1) +2625.*utop(itop-2,jtop,ktop+2) 
+					  -245.*utop(itop-2,jtop+1,ktop-1) +3675.*utop(itop-2,jtop+1,ktop) +1225.*utop(itop-2,jtop+1,ktop+1) 
+					  -175.*utop(itop-2,jtop+1,ktop+2) +1225.*utop(itop-1,jtop-2,ktop-1) -18375.*utop(itop-1,jtop-2,ktop) 
+					  -6125.*utop(itop-1,jtop-2,ktop+1) +875.*utop(itop-1,jtop-2,ktop+2) -8575.*utop(itop-1,jtop-1,ktop-1) 
+					  +128625.*utop(itop-1,jtop-1,ktop) +42875.*utop(itop-1,jtop-1,ktop+1) -6125.*utop(itop-1,jtop-1,ktop+2) 
+					  -25725.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) +128625.*utop(itop-1,jtop,ktop+1) 
+					  -18375.*utop(itop-1,jtop,ktop+2) +1715.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) 
+					  -8575.*utop(itop-1,jtop+1,ktop+1) +1225.*utop(itop-1,jtop+1,ktop+2) +3675.*utop(itop,jtop-2,ktop-1) 
+					  -55125.*utop(itop,jtop-2,ktop) -18375.*utop(itop,jtop-2,ktop+1) +2625.*utop(itop,jtop-2,ktop+2) 
+					  -25725.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) +128625.*utop(itop,jtop-1,ktop+1) 
+					  -18375.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) 
+					  +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) +5145.*utop(itop,jtop+1,ktop-1) 
+					  -77175.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +3675.*utop(itop,jtop+1,ktop+2) 
+					  -245.*utop(itop+1,jtop-2,ktop-1) +3675.*utop(itop+1,jtop-2,ktop) +1225.*utop(itop+1,jtop-2,ktop+1) 
+					  -175.*utop(itop+1,jtop-2,ktop+2) +1715.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) 
+					  -8575.*utop(itop+1,jtop-1,ktop+1) +1225.*utop(itop+1,jtop-1,ktop+2) +5145.*utop(itop+1,jtop,ktop-1) 
+					  -77175.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) +3675.*utop(itop+1,jtop,ktop+2) 
+					  -343.*utop(itop+1,jtop+1,ktop-1) +5145.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) 
+					  -245.*utop(itop+1,jtop+1,ktop+2) )/2097152.;
+	u(i+0,j+1,k+0) = ( -175.*utop(itop-2,jtop-1,ktop-2) +1225.*utop(itop-2,jtop-1,ktop-1) +3675.*utop(itop-2,jtop-1,ktop) 
+					  -245.*utop(itop-2,jtop-1,ktop+1) +2625.*utop(itop-2,jtop,ktop-2) -18375.*utop(itop-2,jtop,ktop-1) 
+					  -55125.*utop(itop-2,jtop,ktop) +3675.*utop(itop-2,jtop,ktop+1) +875.*utop(itop-2,jtop+1,ktop-2) 
+					  -6125.*utop(itop-2,jtop+1,ktop-1) -18375.*utop(itop-2,jtop+1,ktop) +1225.*utop(itop-2,jtop+1,ktop+1) 
+					  -125.*utop(itop-2,jtop+2,ktop-2) +875.*utop(itop-2,jtop+2,ktop-1) +2625.*utop(itop-2,jtop+2,ktop) 
+					  -175.*utop(itop-2,jtop+2,ktop+1) +1225.*utop(itop-1,jtop-1,ktop-2) -8575.*utop(itop-1,jtop-1,ktop-1) 
+					  -25725.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) -18375.*utop(itop-1,jtop,ktop-2) 
+					  +128625.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) 
+					  -6125.*utop(itop-1,jtop+1,ktop-2) +42875.*utop(itop-1,jtop+1,ktop-1) +128625.*utop(itop-1,jtop+1,ktop) 
+					  -8575.*utop(itop-1,jtop+1,ktop+1) +875.*utop(itop-1,jtop+2,ktop-2) -6125.*utop(itop-1,jtop+2,ktop-1) 
+					  -18375.*utop(itop-1,jtop+2,ktop) +1225.*utop(itop-1,jtop+2,ktop+1) +3675.*utop(itop,jtop-1,ktop-2) 
+					  -25725.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) +5145.*utop(itop,jtop-1,ktop+1) 
+					  -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) 
+					  -77175.*utop(itop,jtop,ktop+1) -18375.*utop(itop,jtop+1,ktop-2) +128625.*utop(itop,jtop+1,ktop-1) 
+					  +385875.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +2625.*utop(itop,jtop+2,ktop-2) 
+					  -18375.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) +3675.*utop(itop,jtop+2,ktop+1) 
+					  -245.*utop(itop+1,jtop-1,ktop-2) +1715.*utop(itop+1,jtop-1,ktop-1) +5145.*utop(itop+1,jtop-1,ktop) 
+					  -343.*utop(itop+1,jtop-1,ktop+1) +3675.*utop(itop+1,jtop,ktop-2) -25725.*utop(itop+1,jtop,ktop-1) 
+					  -77175.*utop(itop+1,jtop,ktop) +5145.*utop(itop+1,jtop,ktop+1) +1225.*utop(itop+1,jtop+1,ktop-2) 
+					  -8575.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) 
+					  -175.*utop(itop+1,jtop+2,ktop-2) +1225.*utop(itop+1,jtop+2,ktop-1) +3675.*utop(itop+1,jtop+2,ktop) 
+					  -245.*utop(itop+1,jtop+2,ktop+1) )/2097152.;
+	u(i+0,j+1,k+1) = ( -245.*utop(itop-2,jtop-1,ktop-1) +3675.*utop(itop-2,jtop-1,ktop) +1225.*utop(itop-2,jtop-1,ktop+1) 
+					  -175.*utop(itop-2,jtop-1,ktop+2) +3675.*utop(itop-2,jtop,ktop-1) -55125.*utop(itop-2,jtop,ktop) 
+					  -18375.*utop(itop-2,jtop,ktop+1) +2625.*utop(itop-2,jtop,ktop+2) +1225.*utop(itop-2,jtop+1,ktop-1) 
+					  -18375.*utop(itop-2,jtop+1,ktop) -6125.*utop(itop-2,jtop+1,ktop+1) +875.*utop(itop-2,jtop+1,ktop+2) 
+					  -175.*utop(itop-2,jtop+2,ktop-1) +2625.*utop(itop-2,jtop+2,ktop) +875.*utop(itop-2,jtop+2,ktop+1) 
+					  -125.*utop(itop-2,jtop+2,ktop+2) +1715.*utop(itop-1,jtop-1,ktop-1) -25725.*utop(itop-1,jtop-1,ktop) 
+					  -8575.*utop(itop-1,jtop-1,ktop+1) +1225.*utop(itop-1,jtop-1,ktop+2) -25725.*utop(itop-1,jtop,ktop-1) 
+					  +385875.*utop(itop-1,jtop,ktop) +128625.*utop(itop-1,jtop,ktop+1) -18375.*utop(itop-1,jtop,ktop+2) 
+					  -8575.*utop(itop-1,jtop+1,ktop-1) +128625.*utop(itop-1,jtop+1,ktop) +42875.*utop(itop-1,jtop+1,ktop+1) 
+					  -6125.*utop(itop-1,jtop+1,ktop+2) +1225.*utop(itop-1,jtop+2,ktop-1) -18375.*utop(itop-1,jtop+2,ktop) 
+					  -6125.*utop(itop-1,jtop+2,ktop+1) +875.*utop(itop-1,jtop+2,ktop+2) +5145.*utop(itop,jtop-1,ktop-1) 
+					  -77175.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) +3675.*utop(itop,jtop-1,ktop+2) 
+					  -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) 
+					  -55125.*utop(itop,jtop,ktop+2) -25725.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) 
+					  +128625.*utop(itop,jtop+1,ktop+1) -18375.*utop(itop,jtop+1,ktop+2) +3675.*utop(itop,jtop+2,ktop-1) 
+					  -55125.*utop(itop,jtop+2,ktop) -18375.*utop(itop,jtop+2,ktop+1) +2625.*utop(itop,jtop+2,ktop+2) 
+					  -343.*utop(itop+1,jtop-1,ktop-1) +5145.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) 
+					  -245.*utop(itop+1,jtop-1,ktop+2) +5145.*utop(itop+1,jtop,ktop-1) -77175.*utop(itop+1,jtop,ktop) 
+					  -25725.*utop(itop+1,jtop,ktop+1) +3675.*utop(itop+1,jtop,ktop+2) +1715.*utop(itop+1,jtop+1,ktop-1) 
+					  -25725.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +1225.*utop(itop+1,jtop+1,ktop+2) 
+					  -245.*utop(itop+1,jtop+2,ktop-1) +3675.*utop(itop+1,jtop+2,ktop) +1225.*utop(itop+1,jtop+2,ktop+1) 
+					  -175.*utop(itop+1,jtop+2,ktop+2) )/2097152.;
+	u(i+1,j+0,k+0) = ( -175.*utop(itop-1,jtop-2,ktop-2) +1225.*utop(itop-1,jtop-2,ktop-1) +3675.*utop(itop-1,jtop-2,ktop) 
+					  -245.*utop(itop-1,jtop-2,ktop+1) +1225.*utop(itop-1,jtop-1,ktop-2) -8575.*utop(itop-1,jtop-1,ktop-1) 
+					  -25725.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) +3675.*utop(itop-1,jtop,ktop-2) 
+					  -25725.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) +5145.*utop(itop-1,jtop,ktop+1) 
+					  -245.*utop(itop-1,jtop+1,ktop-2) +1715.*utop(itop-1,jtop+1,ktop-1) +5145.*utop(itop-1,jtop+1,ktop) 
+					  -343.*utop(itop-1,jtop+1,ktop+1) +2625.*utop(itop,jtop-2,ktop-2) -18375.*utop(itop,jtop-2,ktop-1) 
+					  -55125.*utop(itop,jtop-2,ktop) +3675.*utop(itop,jtop-2,ktop+1) -18375.*utop(itop,jtop-1,ktop-2) 
+					  +128625.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) 
+					  -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) 
+					  -77175.*utop(itop,jtop,ktop+1) +3675.*utop(itop,jtop+1,ktop-2) -25725.*utop(itop,jtop+1,ktop-1) 
+					  -77175.*utop(itop,jtop+1,ktop) +5145.*utop(itop,jtop+1,ktop+1) +875.*utop(itop+1,jtop-2,ktop-2) 
+					  -6125.*utop(itop+1,jtop-2,ktop-1) -18375.*utop(itop+1,jtop-2,ktop) +1225.*utop(itop+1,jtop-2,ktop+1) 
+					  -6125.*utop(itop+1,jtop-1,ktop-2) +42875.*utop(itop+1,jtop-1,ktop-1) +128625.*utop(itop+1,jtop-1,ktop) 
+					  -8575.*utop(itop+1,jtop-1,ktop+1) -18375.*utop(itop+1,jtop,ktop-2) +128625.*utop(itop+1,jtop,ktop-1) 
+					  +385875.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) +1225.*utop(itop+1,jtop+1,ktop-2) 
+					  -8575.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) 
+					  -125.*utop(itop+2,jtop-2,ktop-2) +875.*utop(itop+2,jtop-2,ktop-1) +2625.*utop(itop+2,jtop-2,ktop) 
+					  -175.*utop(itop+2,jtop-2,ktop+1) +875.*utop(itop+2,jtop-1,ktop-2) -6125.*utop(itop+2,jtop-1,ktop-1) 
+					  -18375.*utop(itop+2,jtop-1,ktop) +1225.*utop(itop+2,jtop-1,ktop+1) +2625.*utop(itop+2,jtop,ktop-2) 
+					  -18375.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) +3675.*utop(itop+2,jtop,ktop+1) 
+					  -175.*utop(itop+2,jtop+1,ktop-2) +1225.*utop(itop+2,jtop+1,ktop-1) +3675.*utop(itop+2,jtop+1,ktop) 
+					  -245.*utop(itop+2,jtop+1,ktop+1) )/2097152.;
+	u(i+1,j+0,k+1) = ( -245.*utop(itop-1,jtop-2,ktop-1) +3675.*utop(itop-1,jtop-2,ktop) +1225.*utop(itop-1,jtop-2,ktop+1) -175.*utop(itop-1,jtop-2,ktop+2) +1715.*utop(itop-1,jtop-1,ktop-1) -25725.*utop(itop-1,jtop-1,ktop) -8575.*utop(itop-1,jtop-1,ktop+1) +1225.*utop(itop-1,jtop-1,ktop+2) +5145.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) +3675.*utop(itop-1,jtop,ktop+2) -343.*utop(itop-1,jtop+1,ktop-1) +5145.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) -245.*utop(itop-1,jtop+1,ktop+2) +3675.*utop(itop,jtop-2,ktop-1) -55125.*utop(itop,jtop-2,ktop) -18375.*utop(itop,jtop-2,ktop+1) +2625.*utop(itop,jtop-2,ktop+2) -25725.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) +128625.*utop(itop,jtop-1,ktop+1) -18375.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) +5145.*utop(itop,jtop+1,ktop-1) -77175.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +3675.*utop(itop,jtop+1,ktop+2) +1225.*utop(itop+1,jtop-2,ktop-1) -18375.*utop(itop+1,jtop-2,ktop) -6125.*utop(itop+1,jtop-2,ktop+1) +875.*utop(itop+1,jtop-2,ktop+2) -8575.*utop(itop+1,jtop-1,ktop-1) +128625.*utop(itop+1,jtop-1,ktop) +42875.*utop(itop+1,jtop-1,ktop+1) -6125.*utop(itop+1,jtop-1,ktop+2) -25725.*utop(itop+1,jtop,ktop-1) +385875.*utop(itop+1,jtop,ktop) +128625.*utop(itop+1,jtop,ktop+1) -18375.*utop(itop+1,jtop,ktop+2) +1715.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +1225.*utop(itop+1,jtop+1,ktop+2) -175.*utop(itop+2,jtop-2,ktop-1) +2625.*utop(itop+2,jtop-2,ktop) +875.*utop(itop+2,jtop-2,ktop+1) -125.*utop(itop+2,jtop-2,ktop+2) +1225.*utop(itop+2,jtop-1,ktop-1) -18375.*utop(itop+2,jtop-1,ktop) -6125.*utop(itop+2,jtop-1,ktop+1) +875.*utop(itop+2,jtop-1,ktop+2) +3675.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) -18375.*utop(itop+2,jtop,ktop+1) +2625.*utop(itop+2,jtop,ktop+2) -245.*utop(itop+2,jtop+1,ktop-1) +3675.*utop(itop+2,jtop+1,ktop) +1225.*utop(itop+2,jtop+1,ktop+1) -175.*utop(itop+2,jtop+1,ktop+2) )/2097152.;
+	u(i+1,j+1,k+0) = ( -245.*utop(itop-1,jtop-1,ktop-2) +1715.*utop(itop-1,jtop-1,ktop-1) +5145.*utop(itop-1,jtop-1,ktop) -343.*utop(itop-1,jtop-1,ktop+1) +3675.*utop(itop-1,jtop,ktop-2) -25725.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) +5145.*utop(itop-1,jtop,ktop+1) +1225.*utop(itop-1,jtop+1,ktop-2) -8575.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) -175.*utop(itop-1,jtop+2,ktop-2) +1225.*utop(itop-1,jtop+2,ktop-1) +3675.*utop(itop-1,jtop+2,ktop) -245.*utop(itop-1,jtop+2,ktop+1) +3675.*utop(itop,jtop-1,ktop-2) -25725.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) +5145.*utop(itop,jtop-1,ktop+1) -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) -77175.*utop(itop,jtop,ktop+1) -18375.*utop(itop,jtop+1,ktop-2) +128625.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +2625.*utop(itop,jtop+2,ktop-2) -18375.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) +3675.*utop(itop,jtop+2,ktop+1) +1225.*utop(itop+1,jtop-1,ktop-2) -8575.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) -18375.*utop(itop+1,jtop,ktop-2) +128625.*utop(itop+1,jtop,ktop-1) +385875.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) -6125.*utop(itop+1,jtop+1,ktop-2) +42875.*utop(itop+1,jtop+1,ktop-1) +128625.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +875.*utop(itop+1,jtop+2,ktop-2) -6125.*utop(itop+1,jtop+2,ktop-1) -18375.*utop(itop+1,jtop+2,ktop) +1225.*utop(itop+1,jtop+2,ktop+1) -175.*utop(itop+2,jtop-1,ktop-2) +1225.*utop(itop+2,jtop-1,ktop-1) +3675.*utop(itop+2,jtop-1,ktop) -245.*utop(itop+2,jtop-1,ktop+1) +2625.*utop(itop+2,jtop,ktop-2) -18375.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) +3675.*utop(itop+2,jtop,ktop+1) +875.*utop(itop+2,jtop+1,ktop-2) -6125.*utop(itop+2,jtop+1,ktop-1) -18375.*utop(itop+2,jtop+1,ktop) +1225.*utop(itop+2,jtop+1,ktop+1) -125.*utop(itop+2,jtop+2,ktop-2) +875.*utop(itop+2,jtop+2,ktop-1) +2625.*utop(itop+2,jtop+2,ktop) -175.*utop(itop+2,jtop+2,ktop+1) )/2097152.;
+	u(i+1,j+1,k+1) = ( -343.*utop(itop-1,jtop-1,ktop-1) +5145.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) -245.*utop(itop-1,jtop-1,ktop+2) +5145.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) +3675.*utop(itop-1,jtop,ktop+2) +1715.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) -8575.*utop(itop-1,jtop+1,ktop+1) +1225.*utop(itop-1,jtop+1,ktop+2) -245.*utop(itop-1,jtop+2,ktop-1) +3675.*utop(itop-1,jtop+2,ktop) +1225.*utop(itop-1,jtop+2,ktop+1) -175.*utop(itop-1,jtop+2,ktop+2) +5145.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) +3675.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) -25725.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) +128625.*utop(itop,jtop+1,ktop+1) -18375.*utop(itop,jtop+1,ktop+2) +3675.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) -18375.*utop(itop,jtop+2,ktop+1) +2625.*utop(itop,jtop+2,ktop+2) +1715.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) -8575.*utop(itop+1,jtop-1,ktop+1) +1225.*utop(itop+1,jtop-1,ktop+2) -25725.*utop(itop+1,jtop,ktop-1) +385875.*utop(itop+1,jtop,ktop) +128625.*utop(itop+1,jtop,ktop+1) -18375.*utop(itop+1,jtop,ktop+2) -8575.*utop(itop+1,jtop+1,ktop-1) +128625.*utop(itop+1,jtop+1,ktop) +42875.*utop(itop+1,jtop+1,ktop+1) -6125.*utop(itop+1,jtop+1,ktop+2) +1225.*utop(itop+1,jtop+2,ktop-1) -18375.*utop(itop+1,jtop+2,ktop) -6125.*utop(itop+1,jtop+2,ktop+1) +875.*utop(itop+1,jtop+2,ktop+2) -245.*utop(itop+2,jtop-1,ktop-1) +3675.*utop(itop+2,jtop-1,ktop) +1225.*utop(itop+2,jtop-1,ktop+1) -175.*utop(itop+2,jtop-1,ktop+2) +3675.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) -18375.*utop(itop+2,jtop,ktop+1) +2625.*utop(itop+2,jtop,ktop+2) +1225.*utop(itop+2,jtop+1,ktop-1) -18375.*utop(itop+2,jtop+1,ktop) -6125.*utop(itop+2,jtop+1,ktop+1) +875.*utop(itop+2,jtop+1,ktop+2) -175.*utop(itop+2,jtop+2,ktop-1) +2625.*utop(itop+2,jtop+2,ktop) +875.*utop(itop+2,jtop+2,ktop+1) -125.*utop(itop+2,jtop+2,ktop+2) )/2097152.;
+	*/
+	
+	u(i+0,j+0,k+0) = ( -1.060835e-05*utop(itop-2,jtop-2,ktop-2) +9.901123e-05*utop(itop-2,jtop-2,ktop-1) +4.455505e-04*utop(itop-2,jtop-2,ktop) -5.940674e-05*utop(itop-2,jtop-2,ktop+1) +8.250936e-06*utop(itop-2,jtop-2,ktop+2) +9.901123e-05*utop(itop-2,jtop-1,ktop-2) -9.241048e-04*utop(itop-2,jtop-1,ktop-1) -4.158472e-03*utop(itop-2,jtop-1,ktop) +5.544629e-04*utop(itop-2,jtop-1,ktop+1) -7.700874e-05*utop(itop-2,jtop-1,ktop+2) +4.455505e-04*utop(itop-2,jtop,ktop-2) -4.158472e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) +2.495083e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) -5.940674e-05*utop(itop-2,jtop+1,ktop-2) +5.544629e-04*utop(itop-2,jtop+1,ktop-1) +2.495083e-03*utop(itop-2,jtop+1,ktop) -3.326777e-04*utop(itop-2,jtop+1,ktop+1) +4.620524e-05*utop(itop-2,jtop+1,ktop+2) +8.250936e-06*utop(itop-2,jtop+2,ktop-2) -7.700874e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) +4.620524e-05*utop(itop-2,jtop+2,ktop+1) -6.417395e-06*utop(itop-2,jtop+2,ktop+2) +9.901123e-05*utop(itop-1,jtop-2,ktop-2) -9.241048e-04*utop(itop-1,jtop-2,ktop-1) -4.158472e-03*utop(itop-1,jtop-2,ktop) +5.544629e-04*utop(itop-1,jtop-2,ktop+1) -7.700874e-05*utop(itop-1,jtop-2,ktop+2) -9.241048e-04*utop(itop-1,jtop-1,ktop-2) +8.624978e-03*utop(itop-1,jtop-1,ktop-1) +3.881240e-02*utop(itop-1,jtop-1,ktop) -5.174987e-03*utop(itop-1,jtop-1,ktop+1) +7.187482e-04*utop(itop-1,jtop-1,ktop+2) -4.158472e-03*utop(itop-1,jtop,ktop-2) +3.881240e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +3.234367e-03*utop(itop-1,jtop,ktop+2) +5.544629e-04*utop(itop-1,jtop+1,ktop-2) -5.174987e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) +3.104992e-03*utop(itop-1,jtop+1,ktop+1) -4.312489e-04*utop(itop-1,jtop+1,ktop+2) -7.700874e-05*utop(itop-1,jtop+2,ktop-2) +7.187482e-04*utop(itop-1,jtop+2,ktop-1) +3.234367e-03*utop(itop-1,jtop+2,ktop) -4.312489e-04*utop(itop-1,jtop+2,ktop+1) +5.989568e-05*utop(itop-1,jtop+2,ktop+2) +4.455505e-04*utop(itop,jtop-2,ktop-2) -4.158472e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) +2.495083e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -4.158472e-03*utop(itop,jtop-1,ktop-2) +3.881240e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +3.234367e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) +2.495083e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) +1.397246e-02*utop(itop,jtop+1,ktop+1) -1.940620e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +3.234367e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) -1.940620e-03*utop(itop,jtop+2,ktop+1) +2.695306e-04*utop(itop,jtop+2,ktop+2) -5.940674e-05*utop(itop+1,jtop-2,ktop-2) +5.544629e-04*utop(itop+1,jtop-2,ktop-1) +2.495083e-03*utop(itop+1,jtop-2,ktop) -3.326777e-04*utop(itop+1,jtop-2,ktop+1) +4.620524e-05*utop(itop+1,jtop-2,ktop+2) +5.544629e-04*utop(itop+1,jtop-1,ktop-2) -5.174987e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) +3.104992e-03*utop(itop+1,jtop-1,ktop+1) -4.312489e-04*utop(itop+1,jtop-1,ktop+2) +2.495083e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) +1.397246e-02*utop(itop+1,jtop,ktop+1) -1.940620e-03*utop(itop+1,jtop,ktop+2) -3.326777e-04*utop(itop+1,jtop+1,ktop-2) +3.104992e-03*utop(itop+1,jtop+1,ktop-1) +1.397246e-02*utop(itop+1,jtop+1,ktop) -1.862995e-03*utop(itop+1,jtop+1,ktop+1) +2.587494e-04*utop(itop+1,jtop+1,ktop+2) +4.620524e-05*utop(itop+1,jtop+2,ktop-2) -4.312489e-04*utop(itop+1,jtop+2,ktop-1) -1.940620e-03*utop(itop+1,jtop+2,ktop) +2.587494e-04*utop(itop+1,jtop+2,ktop+1) -3.593741e-05*utop(itop+1,jtop+2,ktop+2) +8.250936e-06*utop(itop+2,jtop-2,ktop-2) -7.700874e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) +4.620524e-05*utop(itop+2,jtop-2,ktop+1) -6.417395e-06*utop(itop+2,jtop-2,ktop+2) -7.700874e-05*utop(itop+2,jtop-1,ktop-2) +7.187482e-04*utop(itop+2,jtop-1,ktop-1) +3.234367e-03*utop(itop+2,jtop-1,ktop) -4.312489e-04*utop(itop+2,jtop-1,ktop+1) +5.989568e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +3.234367e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) -1.940620e-03*utop(itop+2,jtop,ktop+1) +2.695306e-04*utop(itop+2,jtop,ktop+2) +4.620524e-05*utop(itop+2,jtop+1,ktop-2) -4.312489e-04*utop(itop+2,jtop+1,ktop-1) -1.940620e-03*utop(itop+2,jtop+1,ktop) +2.587494e-04*utop(itop+2,jtop+1,ktop+1) -3.593741e-05*utop(itop+2,jtop+1,ktop+2) -6.417395e-06*utop(itop+2,jtop+2,ktop-2) +5.989568e-05*utop(itop+2,jtop+2,ktop-1) +2.695306e-04*utop(itop+2,jtop+2,ktop) -3.593741e-05*utop(itop+2,jtop+2,ktop+1) +4.991307e-06*utop(itop+2,jtop+2,ktop+2));
+	u(i+0,j+0,k+1) = ( +8.250936e-06*utop(itop-2,jtop-2,ktop-2) -5.940674e-05*utop(itop-2,jtop-2,ktop-1) +4.455505e-04*utop(itop-2,jtop-2,ktop) +9.901123e-05*utop(itop-2,jtop-2,ktop+1) -1.060835e-05*utop(itop-2,jtop-2,ktop+2) -7.700874e-05*utop(itop-2,jtop-1,ktop-2) +5.544629e-04*utop(itop-2,jtop-1,ktop-1) -4.158472e-03*utop(itop-2,jtop-1,ktop) -9.241048e-04*utop(itop-2,jtop-1,ktop+1) +9.901123e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +2.495083e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) -4.158472e-03*utop(itop-2,jtop,ktop+1) +4.455505e-04*utop(itop-2,jtop,ktop+2) +4.620524e-05*utop(itop-2,jtop+1,ktop-2) -3.326777e-04*utop(itop-2,jtop+1,ktop-1) +2.495083e-03*utop(itop-2,jtop+1,ktop) +5.544629e-04*utop(itop-2,jtop+1,ktop+1) -5.940674e-05*utop(itop-2,jtop+1,ktop+2) -6.417395e-06*utop(itop-2,jtop+2,ktop-2) +4.620524e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) -7.700874e-05*utop(itop-2,jtop+2,ktop+1) +8.250936e-06*utop(itop-2,jtop+2,ktop+2) -7.700874e-05*utop(itop-1,jtop-2,ktop-2) +5.544629e-04*utop(itop-1,jtop-2,ktop-1) -4.158472e-03*utop(itop-1,jtop-2,ktop) -9.241048e-04*utop(itop-1,jtop-2,ktop+1) +9.901123e-05*utop(itop-1,jtop-2,ktop+2) +7.187482e-04*utop(itop-1,jtop-1,ktop-2) -5.174987e-03*utop(itop-1,jtop-1,ktop-1) +3.881240e-02*utop(itop-1,jtop-1,ktop) +8.624978e-03*utop(itop-1,jtop-1,ktop+1) -9.241048e-04*utop(itop-1,jtop-1,ktop+2) +3.234367e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) +3.881240e-02*utop(itop-1,jtop,ktop+1) -4.158472e-03*utop(itop-1,jtop,ktop+2) -4.312489e-04*utop(itop-1,jtop+1,ktop-2) +3.104992e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) -5.174987e-03*utop(itop-1,jtop+1,ktop+1) +5.544629e-04*utop(itop-1,jtop+1,ktop+2) +5.989568e-05*utop(itop-1,jtop+2,ktop-2) -4.312489e-04*utop(itop-1,jtop+2,ktop-1) +3.234367e-03*utop(itop-1,jtop+2,ktop) +7.187482e-04*utop(itop-1,jtop+2,ktop+1) -7.700874e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +2.495083e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) -4.158472e-03*utop(itop,jtop-2,ktop+1) +4.455505e-04*utop(itop,jtop-2,ktop+2) +3.234367e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) +3.881240e-02*utop(itop,jtop-1,ktop+1) -4.158472e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) -1.940620e-03*utop(itop,jtop+1,ktop-2) +1.397246e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +2.495083e-03*utop(itop,jtop+1,ktop+2) +2.695306e-04*utop(itop,jtop+2,ktop-2) -1.940620e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) +3.234367e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) +4.620524e-05*utop(itop+1,jtop-2,ktop-2) -3.326777e-04*utop(itop+1,jtop-2,ktop-1) +2.495083e-03*utop(itop+1,jtop-2,ktop) +5.544629e-04*utop(itop+1,jtop-2,ktop+1) -5.940674e-05*utop(itop+1,jtop-2,ktop+2) -4.312489e-04*utop(itop+1,jtop-1,ktop-2) +3.104992e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) -5.174987e-03*utop(itop+1,jtop-1,ktop+1) +5.544629e-04*utop(itop+1,jtop-1,ktop+2) -1.940620e-03*utop(itop+1,jtop,ktop-2) +1.397246e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +2.495083e-03*utop(itop+1,jtop,ktop+2) +2.587494e-04*utop(itop+1,jtop+1,ktop-2) -1.862995e-03*utop(itop+1,jtop+1,ktop-1) +1.397246e-02*utop(itop+1,jtop+1,ktop) +3.104992e-03*utop(itop+1,jtop+1,ktop+1) -3.326777e-04*utop(itop+1,jtop+1,ktop+2) -3.593741e-05*utop(itop+1,jtop+2,ktop-2) +2.587494e-04*utop(itop+1,jtop+2,ktop-1) -1.940620e-03*utop(itop+1,jtop+2,ktop) -4.312489e-04*utop(itop+1,jtop+2,ktop+1) +4.620524e-05*utop(itop+1,jtop+2,ktop+2) -6.417395e-06*utop(itop+2,jtop-2,ktop-2) +4.620524e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) -7.700874e-05*utop(itop+2,jtop-2,ktop+1) +8.250936e-06*utop(itop+2,jtop-2,ktop+2) +5.989568e-05*utop(itop+2,jtop-1,ktop-2) -4.312489e-04*utop(itop+2,jtop-1,ktop-1) +3.234367e-03*utop(itop+2,jtop-1,ktop) +7.187482e-04*utop(itop+2,jtop-1,ktop+1) -7.700874e-05*utop(itop+2,jtop-1,ktop+2) +2.695306e-04*utop(itop+2,jtop,ktop-2) -1.940620e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) +3.234367e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) -3.593741e-05*utop(itop+2,jtop+1,ktop-2) +2.587494e-04*utop(itop+2,jtop+1,ktop-1) -1.940620e-03*utop(itop+2,jtop+1,ktop) -4.312489e-04*utop(itop+2,jtop+1,ktop+1) +4.620524e-05*utop(itop+2,jtop+1,ktop+2) +4.991307e-06*utop(itop+2,jtop+2,ktop-2) -3.593741e-05*utop(itop+2,jtop+2,ktop-1) +2.695306e-04*utop(itop+2,jtop+2,ktop) +5.989568e-05*utop(itop+2,jtop+2,ktop+1) -6.417395e-06*utop(itop+2,jtop+2,ktop+2));
+	u(i+0,j+1,k+0) = ( +8.250936e-06*utop(itop-2,jtop-2,ktop-2) -7.700874e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) +4.620524e-05*utop(itop-2,jtop-2,ktop+1) -6.417395e-06*utop(itop-2,jtop-2,ktop+2) -5.940674e-05*utop(itop-2,jtop-1,ktop-2) +5.544629e-04*utop(itop-2,jtop-1,ktop-1) +2.495083e-03*utop(itop-2,jtop-1,ktop) -3.326777e-04*utop(itop-2,jtop-1,ktop+1) +4.620524e-05*utop(itop-2,jtop-1,ktop+2) +4.455505e-04*utop(itop-2,jtop,ktop-2) -4.158472e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) +2.495083e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) +9.901123e-05*utop(itop-2,jtop+1,ktop-2) -9.241048e-04*utop(itop-2,jtop+1,ktop-1) -4.158472e-03*utop(itop-2,jtop+1,ktop) +5.544629e-04*utop(itop-2,jtop+1,ktop+1) -7.700874e-05*utop(itop-2,jtop+1,ktop+2) -1.060835e-05*utop(itop-2,jtop+2,ktop-2) +9.901123e-05*utop(itop-2,jtop+2,ktop-1) +4.455505e-04*utop(itop-2,jtop+2,ktop) -5.940674e-05*utop(itop-2,jtop+2,ktop+1) +8.250936e-06*utop(itop-2,jtop+2,ktop+2) -7.700874e-05*utop(itop-1,jtop-2,ktop-2) +7.187482e-04*utop(itop-1,jtop-2,ktop-1) +3.234367e-03*utop(itop-1,jtop-2,ktop) -4.312489e-04*utop(itop-1,jtop-2,ktop+1) +5.989568e-05*utop(itop-1,jtop-2,ktop+2) +5.544629e-04*utop(itop-1,jtop-1,ktop-2) -5.174987e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) +3.104992e-03*utop(itop-1,jtop-1,ktop+1) -4.312489e-04*utop(itop-1,jtop-1,ktop+2) -4.158472e-03*utop(itop-1,jtop,ktop-2) +3.881240e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +3.234367e-03*utop(itop-1,jtop,ktop+2) -9.241048e-04*utop(itop-1,jtop+1,ktop-2) +8.624978e-03*utop(itop-1,jtop+1,ktop-1) +3.881240e-02*utop(itop-1,jtop+1,ktop) -5.174987e-03*utop(itop-1,jtop+1,ktop+1) +7.187482e-04*utop(itop-1,jtop+1,ktop+2) +9.901123e-05*utop(itop-1,jtop+2,ktop-2) -9.241048e-04*utop(itop-1,jtop+2,ktop-1) -4.158472e-03*utop(itop-1,jtop+2,ktop) +5.544629e-04*utop(itop-1,jtop+2,ktop+1) -7.700874e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +3.234367e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) -1.940620e-03*utop(itop,jtop-2,ktop+1) +2.695306e-04*utop(itop,jtop-2,ktop+2) +2.495083e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) +1.397246e-02*utop(itop,jtop-1,ktop+1) -1.940620e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) -4.158472e-03*utop(itop,jtop+1,ktop-2) +3.881240e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +3.234367e-03*utop(itop,jtop+1,ktop+2) +4.455505e-04*utop(itop,jtop+2,ktop-2) -4.158472e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) +2.495083e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) +4.620524e-05*utop(itop+1,jtop-2,ktop-2) -4.312489e-04*utop(itop+1,jtop-2,ktop-1) -1.940620e-03*utop(itop+1,jtop-2,ktop) +2.587494e-04*utop(itop+1,jtop-2,ktop+1) -3.593741e-05*utop(itop+1,jtop-2,ktop+2) -3.326777e-04*utop(itop+1,jtop-1,ktop-2) +3.104992e-03*utop(itop+1,jtop-1,ktop-1) +1.397246e-02*utop(itop+1,jtop-1,ktop) -1.862995e-03*utop(itop+1,jtop-1,ktop+1) +2.587494e-04*utop(itop+1,jtop-1,ktop+2) +2.495083e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) +1.397246e-02*utop(itop+1,jtop,ktop+1) -1.940620e-03*utop(itop+1,jtop,ktop+2) +5.544629e-04*utop(itop+1,jtop+1,ktop-2) -5.174987e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) +3.104992e-03*utop(itop+1,jtop+1,ktop+1) -4.312489e-04*utop(itop+1,jtop+1,ktop+2) -5.940674e-05*utop(itop+1,jtop+2,ktop-2) +5.544629e-04*utop(itop+1,jtop+2,ktop-1) +2.495083e-03*utop(itop+1,jtop+2,ktop) -3.326777e-04*utop(itop+1,jtop+2,ktop+1) +4.620524e-05*utop(itop+1,jtop+2,ktop+2) -6.417395e-06*utop(itop+2,jtop-2,ktop-2) +5.989568e-05*utop(itop+2,jtop-2,ktop-1) +2.695306e-04*utop(itop+2,jtop-2,ktop) -3.593741e-05*utop(itop+2,jtop-2,ktop+1) +4.991307e-06*utop(itop+2,jtop-2,ktop+2) +4.620524e-05*utop(itop+2,jtop-1,ktop-2) -4.312489e-04*utop(itop+2,jtop-1,ktop-1) -1.940620e-03*utop(itop+2,jtop-1,ktop) +2.587494e-04*utop(itop+2,jtop-1,ktop+1) -3.593741e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +3.234367e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) -1.940620e-03*utop(itop+2,jtop,ktop+1) +2.695306e-04*utop(itop+2,jtop,ktop+2) -7.700874e-05*utop(itop+2,jtop+1,ktop-2) +7.187482e-04*utop(itop+2,jtop+1,ktop-1) +3.234367e-03*utop(itop+2,jtop+1,ktop) -4.312489e-04*utop(itop+2,jtop+1,ktop+1) +5.989568e-05*utop(itop+2,jtop+1,ktop+2) +8.250936e-06*utop(itop+2,jtop+2,ktop-2) -7.700874e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) +4.620524e-05*utop(itop+2,jtop+2,ktop+1) -6.417395e-06*utop(itop+2,jtop+2,ktop+2));
+	u(i+0,j+1,k+1) = ( -6.417395e-06*utop(itop-2,jtop-2,ktop-2) +4.620524e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) -7.700874e-05*utop(itop-2,jtop-2,ktop+1) +8.250936e-06*utop(itop-2,jtop-2,ktop+2) +4.620524e-05*utop(itop-2,jtop-1,ktop-2) -3.326777e-04*utop(itop-2,jtop-1,ktop-1) +2.495083e-03*utop(itop-2,jtop-1,ktop) +5.544629e-04*utop(itop-2,jtop-1,ktop+1) -5.940674e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +2.495083e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) -4.158472e-03*utop(itop-2,jtop,ktop+1) +4.455505e-04*utop(itop-2,jtop,ktop+2) -7.700874e-05*utop(itop-2,jtop+1,ktop-2) +5.544629e-04*utop(itop-2,jtop+1,ktop-1) -4.158472e-03*utop(itop-2,jtop+1,ktop) -9.241048e-04*utop(itop-2,jtop+1,ktop+1) +9.901123e-05*utop(itop-2,jtop+1,ktop+2) +8.250936e-06*utop(itop-2,jtop+2,ktop-2) -5.940674e-05*utop(itop-2,jtop+2,ktop-1) +4.455505e-04*utop(itop-2,jtop+2,ktop) +9.901123e-05*utop(itop-2,jtop+2,ktop+1) -1.060835e-05*utop(itop-2,jtop+2,ktop+2) +5.989568e-05*utop(itop-1,jtop-2,ktop-2) -4.312489e-04*utop(itop-1,jtop-2,ktop-1) +3.234367e-03*utop(itop-1,jtop-2,ktop) +7.187482e-04*utop(itop-1,jtop-2,ktop+1) -7.700874e-05*utop(itop-1,jtop-2,ktop+2) -4.312489e-04*utop(itop-1,jtop-1,ktop-2) +3.104992e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) -5.174987e-03*utop(itop-1,jtop-1,ktop+1) +5.544629e-04*utop(itop-1,jtop-1,ktop+2) +3.234367e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) +3.881240e-02*utop(itop-1,jtop,ktop+1) -4.158472e-03*utop(itop-1,jtop,ktop+2) +7.187482e-04*utop(itop-1,jtop+1,ktop-2) -5.174987e-03*utop(itop-1,jtop+1,ktop-1) +3.881240e-02*utop(itop-1,jtop+1,ktop) +8.624978e-03*utop(itop-1,jtop+1,ktop+1) -9.241048e-04*utop(itop-1,jtop+1,ktop+2) -7.700874e-05*utop(itop-1,jtop+2,ktop-2) +5.544629e-04*utop(itop-1,jtop+2,ktop-1) -4.158472e-03*utop(itop-1,jtop+2,ktop) -9.241048e-04*utop(itop-1,jtop+2,ktop+1) +9.901123e-05*utop(itop-1,jtop+2,ktop+2) +2.695306e-04*utop(itop,jtop-2,ktop-2) -1.940620e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) +3.234367e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -1.940620e-03*utop(itop,jtop-1,ktop-2) +1.397246e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +2.495083e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) +3.234367e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) +3.881240e-02*utop(itop,jtop+1,ktop+1) -4.158472e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +2.495083e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) -4.158472e-03*utop(itop,jtop+2,ktop+1) +4.455505e-04*utop(itop,jtop+2,ktop+2) -3.593741e-05*utop(itop+1,jtop-2,ktop-2) +2.587494e-04*utop(itop+1,jtop-2,ktop-1) -1.940620e-03*utop(itop+1,jtop-2,ktop) -4.312489e-04*utop(itop+1,jtop-2,ktop+1) +4.620524e-05*utop(itop+1,jtop-2,ktop+2) +2.587494e-04*utop(itop+1,jtop-1,ktop-2) -1.862995e-03*utop(itop+1,jtop-1,ktop-1) +1.397246e-02*utop(itop+1,jtop-1,ktop) +3.104992e-03*utop(itop+1,jtop-1,ktop+1) -3.326777e-04*utop(itop+1,jtop-1,ktop+2) -1.940620e-03*utop(itop+1,jtop,ktop-2) +1.397246e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +2.495083e-03*utop(itop+1,jtop,ktop+2) -4.312489e-04*utop(itop+1,jtop+1,ktop-2) +3.104992e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) -5.174987e-03*utop(itop+1,jtop+1,ktop+1) +5.544629e-04*utop(itop+1,jtop+1,ktop+2) +4.620524e-05*utop(itop+1,jtop+2,ktop-2) -3.326777e-04*utop(itop+1,jtop+2,ktop-1) +2.495083e-03*utop(itop+1,jtop+2,ktop) +5.544629e-04*utop(itop+1,jtop+2,ktop+1) -5.940674e-05*utop(itop+1,jtop+2,ktop+2) +4.991307e-06*utop(itop+2,jtop-2,ktop-2) -3.593741e-05*utop(itop+2,jtop-2,ktop-1) +2.695306e-04*utop(itop+2,jtop-2,ktop) +5.989568e-05*utop(itop+2,jtop-2,ktop+1) -6.417395e-06*utop(itop+2,jtop-2,ktop+2) -3.593741e-05*utop(itop+2,jtop-1,ktop-2) +2.587494e-04*utop(itop+2,jtop-1,ktop-1) -1.940620e-03*utop(itop+2,jtop-1,ktop) -4.312489e-04*utop(itop+2,jtop-1,ktop+1) +4.620524e-05*utop(itop+2,jtop-1,ktop+2) +2.695306e-04*utop(itop+2,jtop,ktop-2) -1.940620e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) +3.234367e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) +5.989568e-05*utop(itop+2,jtop+1,ktop-2) -4.312489e-04*utop(itop+2,jtop+1,ktop-1) +3.234367e-03*utop(itop+2,jtop+1,ktop) +7.187482e-04*utop(itop+2,jtop+1,ktop+1) -7.700874e-05*utop(itop+2,jtop+1,ktop+2) -6.417395e-06*utop(itop+2,jtop+2,ktop-2) +4.620524e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) -7.700874e-05*utop(itop+2,jtop+2,ktop+1) +8.250936e-06*utop(itop+2,jtop+2,ktop+2));
+	u(i+1,j+0,k+0) = ( +8.250936e-06*utop(itop-2,jtop-2,ktop-2) -7.700874e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) +4.620524e-05*utop(itop-2,jtop-2,ktop+1) -6.417395e-06*utop(itop-2,jtop-2,ktop+2) -7.700874e-05*utop(itop-2,jtop-1,ktop-2) +7.187482e-04*utop(itop-2,jtop-1,ktop-1) +3.234367e-03*utop(itop-2,jtop-1,ktop) -4.312489e-04*utop(itop-2,jtop-1,ktop+1) +5.989568e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +3.234367e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) -1.940620e-03*utop(itop-2,jtop,ktop+1) +2.695306e-04*utop(itop-2,jtop,ktop+2) +4.620524e-05*utop(itop-2,jtop+1,ktop-2) -4.312489e-04*utop(itop-2,jtop+1,ktop-1) -1.940620e-03*utop(itop-2,jtop+1,ktop) +2.587494e-04*utop(itop-2,jtop+1,ktop+1) -3.593741e-05*utop(itop-2,jtop+1,ktop+2) -6.417395e-06*utop(itop-2,jtop+2,ktop-2) +5.989568e-05*utop(itop-2,jtop+2,ktop-1) +2.695306e-04*utop(itop-2,jtop+2,ktop) -3.593741e-05*utop(itop-2,jtop+2,ktop+1) +4.991307e-06*utop(itop-2,jtop+2,ktop+2) -5.940674e-05*utop(itop-1,jtop-2,ktop-2) +5.544629e-04*utop(itop-1,jtop-2,ktop-1) +2.495083e-03*utop(itop-1,jtop-2,ktop) -3.326777e-04*utop(itop-1,jtop-2,ktop+1) +4.620524e-05*utop(itop-1,jtop-2,ktop+2) +5.544629e-04*utop(itop-1,jtop-1,ktop-2) -5.174987e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) +3.104992e-03*utop(itop-1,jtop-1,ktop+1) -4.312489e-04*utop(itop-1,jtop-1,ktop+2) +2.495083e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) +1.397246e-02*utop(itop-1,jtop,ktop+1) -1.940620e-03*utop(itop-1,jtop,ktop+2) -3.326777e-04*utop(itop-1,jtop+1,ktop-2) +3.104992e-03*utop(itop-1,jtop+1,ktop-1) +1.397246e-02*utop(itop-1,jtop+1,ktop) -1.862995e-03*utop(itop-1,jtop+1,ktop+1) +2.587494e-04*utop(itop-1,jtop+1,ktop+2) +4.620524e-05*utop(itop-1,jtop+2,ktop-2) -4.312489e-04*utop(itop-1,jtop+2,ktop-1) -1.940620e-03*utop(itop-1,jtop+2,ktop) +2.587494e-04*utop(itop-1,jtop+2,ktop+1) -3.593741e-05*utop(itop-1,jtop+2,ktop+2) +4.455505e-04*utop(itop,jtop-2,ktop-2) -4.158472e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) +2.495083e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -4.158472e-03*utop(itop,jtop-1,ktop-2) +3.881240e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +3.234367e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) +2.495083e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) +1.397246e-02*utop(itop,jtop+1,ktop+1) -1.940620e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +3.234367e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) -1.940620e-03*utop(itop,jtop+2,ktop+1) +2.695306e-04*utop(itop,jtop+2,ktop+2) +9.901123e-05*utop(itop+1,jtop-2,ktop-2) -9.241048e-04*utop(itop+1,jtop-2,ktop-1) -4.158472e-03*utop(itop+1,jtop-2,ktop) +5.544629e-04*utop(itop+1,jtop-2,ktop+1) -7.700874e-05*utop(itop+1,jtop-2,ktop+2) -9.241048e-04*utop(itop+1,jtop-1,ktop-2) +8.624978e-03*utop(itop+1,jtop-1,ktop-1) +3.881240e-02*utop(itop+1,jtop-1,ktop) -5.174987e-03*utop(itop+1,jtop-1,ktop+1) +7.187482e-04*utop(itop+1,jtop-1,ktop+2) -4.158472e-03*utop(itop+1,jtop,ktop-2) +3.881240e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +3.234367e-03*utop(itop+1,jtop,ktop+2) +5.544629e-04*utop(itop+1,jtop+1,ktop-2) -5.174987e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) +3.104992e-03*utop(itop+1,jtop+1,ktop+1) -4.312489e-04*utop(itop+1,jtop+1,ktop+2) -7.700874e-05*utop(itop+1,jtop+2,ktop-2) +7.187482e-04*utop(itop+1,jtop+2,ktop-1) +3.234367e-03*utop(itop+1,jtop+2,ktop) -4.312489e-04*utop(itop+1,jtop+2,ktop+1) +5.989568e-05*utop(itop+1,jtop+2,ktop+2) -1.060835e-05*utop(itop+2,jtop-2,ktop-2) +9.901123e-05*utop(itop+2,jtop-2,ktop-1) +4.455505e-04*utop(itop+2,jtop-2,ktop) -5.940674e-05*utop(itop+2,jtop-2,ktop+1) +8.250936e-06*utop(itop+2,jtop-2,ktop+2) +9.901123e-05*utop(itop+2,jtop-1,ktop-2) -9.241048e-04*utop(itop+2,jtop-1,ktop-1) -4.158472e-03*utop(itop+2,jtop-1,ktop) +5.544629e-04*utop(itop+2,jtop-1,ktop+1) -7.700874e-05*utop(itop+2,jtop-1,ktop+2) +4.455505e-04*utop(itop+2,jtop,ktop-2) -4.158472e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) +2.495083e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) -5.940674e-05*utop(itop+2,jtop+1,ktop-2) +5.544629e-04*utop(itop+2,jtop+1,ktop-1) +2.495083e-03*utop(itop+2,jtop+1,ktop) -3.326777e-04*utop(itop+2,jtop+1,ktop+1) +4.620524e-05*utop(itop+2,jtop+1,ktop+2) +8.250936e-06*utop(itop+2,jtop+2,ktop-2) -7.700874e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) +4.620524e-05*utop(itop+2,jtop+2,ktop+1) -6.417395e-06*utop(itop+2,jtop+2,ktop+2));
+	u(i+1,j+0,k+1) = ( -6.417395e-06*utop(itop-2,jtop-2,ktop-2) +4.620524e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) -7.700874e-05*utop(itop-2,jtop-2,ktop+1) +8.250936e-06*utop(itop-2,jtop-2,ktop+2) +5.989568e-05*utop(itop-2,jtop-1,ktop-2) -4.312489e-04*utop(itop-2,jtop-1,ktop-1) +3.234367e-03*utop(itop-2,jtop-1,ktop) +7.187482e-04*utop(itop-2,jtop-1,ktop+1) -7.700874e-05*utop(itop-2,jtop-1,ktop+2) +2.695306e-04*utop(itop-2,jtop,ktop-2) -1.940620e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) +3.234367e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) -3.593741e-05*utop(itop-2,jtop+1,ktop-2) +2.587494e-04*utop(itop-2,jtop+1,ktop-1) -1.940620e-03*utop(itop-2,jtop+1,ktop) -4.312489e-04*utop(itop-2,jtop+1,ktop+1) +4.620524e-05*utop(itop-2,jtop+1,ktop+2) +4.991307e-06*utop(itop-2,jtop+2,ktop-2) -3.593741e-05*utop(itop-2,jtop+2,ktop-1) +2.695306e-04*utop(itop-2,jtop+2,ktop) +5.989568e-05*utop(itop-2,jtop+2,ktop+1) -6.417395e-06*utop(itop-2,jtop+2,ktop+2) +4.620524e-05*utop(itop-1,jtop-2,ktop-2) -3.326777e-04*utop(itop-1,jtop-2,ktop-1) +2.495083e-03*utop(itop-1,jtop-2,ktop) +5.544629e-04*utop(itop-1,jtop-2,ktop+1) -5.940674e-05*utop(itop-1,jtop-2,ktop+2) -4.312489e-04*utop(itop-1,jtop-1,ktop-2) +3.104992e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) -5.174987e-03*utop(itop-1,jtop-1,ktop+1) +5.544629e-04*utop(itop-1,jtop-1,ktop+2) -1.940620e-03*utop(itop-1,jtop,ktop-2) +1.397246e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +2.495083e-03*utop(itop-1,jtop,ktop+2) +2.587494e-04*utop(itop-1,jtop+1,ktop-2) -1.862995e-03*utop(itop-1,jtop+1,ktop-1) +1.397246e-02*utop(itop-1,jtop+1,ktop) +3.104992e-03*utop(itop-1,jtop+1,ktop+1) -3.326777e-04*utop(itop-1,jtop+1,ktop+2) -3.593741e-05*utop(itop-1,jtop+2,ktop-2) +2.587494e-04*utop(itop-1,jtop+2,ktop-1) -1.940620e-03*utop(itop-1,jtop+2,ktop) -4.312489e-04*utop(itop-1,jtop+2,ktop+1) +4.620524e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +2.495083e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) -4.158472e-03*utop(itop,jtop-2,ktop+1) +4.455505e-04*utop(itop,jtop-2,ktop+2) +3.234367e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) +3.881240e-02*utop(itop,jtop-1,ktop+1) -4.158472e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) -1.940620e-03*utop(itop,jtop+1,ktop-2) +1.397246e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +2.495083e-03*utop(itop,jtop+1,ktop+2) +2.695306e-04*utop(itop,jtop+2,ktop-2) -1.940620e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) +3.234367e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) -7.700874e-05*utop(itop+1,jtop-2,ktop-2) +5.544629e-04*utop(itop+1,jtop-2,ktop-1) -4.158472e-03*utop(itop+1,jtop-2,ktop) -9.241048e-04*utop(itop+1,jtop-2,ktop+1) +9.901123e-05*utop(itop+1,jtop-2,ktop+2) +7.187482e-04*utop(itop+1,jtop-1,ktop-2) -5.174987e-03*utop(itop+1,jtop-1,ktop-1) +3.881240e-02*utop(itop+1,jtop-1,ktop) +8.624978e-03*utop(itop+1,jtop-1,ktop+1) -9.241048e-04*utop(itop+1,jtop-1,ktop+2) +3.234367e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) +3.881240e-02*utop(itop+1,jtop,ktop+1) -4.158472e-03*utop(itop+1,jtop,ktop+2) -4.312489e-04*utop(itop+1,jtop+1,ktop-2) +3.104992e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) -5.174987e-03*utop(itop+1,jtop+1,ktop+1) +5.544629e-04*utop(itop+1,jtop+1,ktop+2) +5.989568e-05*utop(itop+1,jtop+2,ktop-2) -4.312489e-04*utop(itop+1,jtop+2,ktop-1) +3.234367e-03*utop(itop+1,jtop+2,ktop) +7.187482e-04*utop(itop+1,jtop+2,ktop+1) -7.700874e-05*utop(itop+1,jtop+2,ktop+2) +8.250936e-06*utop(itop+2,jtop-2,ktop-2) -5.940674e-05*utop(itop+2,jtop-2,ktop-1) +4.455505e-04*utop(itop+2,jtop-2,ktop) +9.901123e-05*utop(itop+2,jtop-2,ktop+1) -1.060835e-05*utop(itop+2,jtop-2,ktop+2) -7.700874e-05*utop(itop+2,jtop-1,ktop-2) +5.544629e-04*utop(itop+2,jtop-1,ktop-1) -4.158472e-03*utop(itop+2,jtop-1,ktop) -9.241048e-04*utop(itop+2,jtop-1,ktop+1) +9.901123e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +2.495083e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) -4.158472e-03*utop(itop+2,jtop,ktop+1) +4.455505e-04*utop(itop+2,jtop,ktop+2) +4.620524e-05*utop(itop+2,jtop+1,ktop-2) -3.326777e-04*utop(itop+2,jtop+1,ktop-1) +2.495083e-03*utop(itop+2,jtop+1,ktop) +5.544629e-04*utop(itop+2,jtop+1,ktop+1) -5.940674e-05*utop(itop+2,jtop+1,ktop+2) -6.417395e-06*utop(itop+2,jtop+2,ktop-2) +4.620524e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) -7.700874e-05*utop(itop+2,jtop+2,ktop+1) +8.250936e-06*utop(itop+2,jtop+2,ktop+2));
+	u(i+1,j+1,k+0) = ( -6.417395e-06*utop(itop-2,jtop-2,ktop-2) +5.989568e-05*utop(itop-2,jtop-2,ktop-1) +2.695306e-04*utop(itop-2,jtop-2,ktop) -3.593741e-05*utop(itop-2,jtop-2,ktop+1) +4.991307e-06*utop(itop-2,jtop-2,ktop+2) +4.620524e-05*utop(itop-2,jtop-1,ktop-2) -4.312489e-04*utop(itop-2,jtop-1,ktop-1) -1.940620e-03*utop(itop-2,jtop-1,ktop) +2.587494e-04*utop(itop-2,jtop-1,ktop+1) -3.593741e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +3.234367e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) -1.940620e-03*utop(itop-2,jtop,ktop+1) +2.695306e-04*utop(itop-2,jtop,ktop+2) -7.700874e-05*utop(itop-2,jtop+1,ktop-2) +7.187482e-04*utop(itop-2,jtop+1,ktop-1) +3.234367e-03*utop(itop-2,jtop+1,ktop) -4.312489e-04*utop(itop-2,jtop+1,ktop+1) +5.989568e-05*utop(itop-2,jtop+1,ktop+2) +8.250936e-06*utop(itop-2,jtop+2,ktop-2) -7.700874e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) +4.620524e-05*utop(itop-2,jtop+2,ktop+1) -6.417395e-06*utop(itop-2,jtop+2,ktop+2) +4.620524e-05*utop(itop-1,jtop-2,ktop-2) -4.312489e-04*utop(itop-1,jtop-2,ktop-1) -1.940620e-03*utop(itop-1,jtop-2,ktop) +2.587494e-04*utop(itop-1,jtop-2,ktop+1) -3.593741e-05*utop(itop-1,jtop-2,ktop+2) -3.326777e-04*utop(itop-1,jtop-1,ktop-2) +3.104992e-03*utop(itop-1,jtop-1,ktop-1) +1.397246e-02*utop(itop-1,jtop-1,ktop) -1.862995e-03*utop(itop-1,jtop-1,ktop+1) +2.587494e-04*utop(itop-1,jtop-1,ktop+2) +2.495083e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) +1.397246e-02*utop(itop-1,jtop,ktop+1) -1.940620e-03*utop(itop-1,jtop,ktop+2) +5.544629e-04*utop(itop-1,jtop+1,ktop-2) -5.174987e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) +3.104992e-03*utop(itop-1,jtop+1,ktop+1) -4.312489e-04*utop(itop-1,jtop+1,ktop+2) -5.940674e-05*utop(itop-1,jtop+2,ktop-2) +5.544629e-04*utop(itop-1,jtop+2,ktop-1) +2.495083e-03*utop(itop-1,jtop+2,ktop) -3.326777e-04*utop(itop-1,jtop+2,ktop+1) +4.620524e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +3.234367e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) -1.940620e-03*utop(itop,jtop-2,ktop+1) +2.695306e-04*utop(itop,jtop-2,ktop+2) +2.495083e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) +1.397246e-02*utop(itop,jtop-1,ktop+1) -1.940620e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) -4.158472e-03*utop(itop,jtop+1,ktop-2) +3.881240e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +3.234367e-03*utop(itop,jtop+1,ktop+2) +4.455505e-04*utop(itop,jtop+2,ktop-2) -4.158472e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) +2.495083e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) -7.700874e-05*utop(itop+1,jtop-2,ktop-2) +7.187482e-04*utop(itop+1,jtop-2,ktop-1) +3.234367e-03*utop(itop+1,jtop-2,ktop) -4.312489e-04*utop(itop+1,jtop-2,ktop+1) +5.989568e-05*utop(itop+1,jtop-2,ktop+2) +5.544629e-04*utop(itop+1,jtop-1,ktop-2) -5.174987e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) +3.104992e-03*utop(itop+1,jtop-1,ktop+1) -4.312489e-04*utop(itop+1,jtop-1,ktop+2) -4.158472e-03*utop(itop+1,jtop,ktop-2) +3.881240e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +3.234367e-03*utop(itop+1,jtop,ktop+2) -9.241048e-04*utop(itop+1,jtop+1,ktop-2) +8.624978e-03*utop(itop+1,jtop+1,ktop-1) +3.881240e-02*utop(itop+1,jtop+1,ktop) -5.174987e-03*utop(itop+1,jtop+1,ktop+1) +7.187482e-04*utop(itop+1,jtop+1,ktop+2) +9.901123e-05*utop(itop+1,jtop+2,ktop-2) -9.241048e-04*utop(itop+1,jtop+2,ktop-1) -4.158472e-03*utop(itop+1,jtop+2,ktop) +5.544629e-04*utop(itop+1,jtop+2,ktop+1) -7.700874e-05*utop(itop+1,jtop+2,ktop+2) +8.250936e-06*utop(itop+2,jtop-2,ktop-2) -7.700874e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) +4.620524e-05*utop(itop+2,jtop-2,ktop+1) -6.417395e-06*utop(itop+2,jtop-2,ktop+2) -5.940674e-05*utop(itop+2,jtop-1,ktop-2) +5.544629e-04*utop(itop+2,jtop-1,ktop-1) +2.495083e-03*utop(itop+2,jtop-1,ktop) -3.326777e-04*utop(itop+2,jtop-1,ktop+1) +4.620524e-05*utop(itop+2,jtop-1,ktop+2) +4.455505e-04*utop(itop+2,jtop,ktop-2) -4.158472e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) +2.495083e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) +9.901123e-05*utop(itop+2,jtop+1,ktop-2) -9.241048e-04*utop(itop+2,jtop+1,ktop-1) -4.158472e-03*utop(itop+2,jtop+1,ktop) +5.544629e-04*utop(itop+2,jtop+1,ktop+1) -7.700874e-05*utop(itop+2,jtop+1,ktop+2) -1.060835e-05*utop(itop+2,jtop+2,ktop-2) +9.901123e-05*utop(itop+2,jtop+2,ktop-1) +4.455505e-04*utop(itop+2,jtop+2,ktop) -5.940674e-05*utop(itop+2,jtop+2,ktop+1) +8.250936e-06*utop(itop+2,jtop+2,ktop+2));
+	u(i+1,j+1,k+1) = ( +4.991307e-06*utop(itop-2,jtop-2,ktop-2) -3.593741e-05*utop(itop-2,jtop-2,ktop-1) +2.695306e-04*utop(itop-2,jtop-2,ktop) +5.989568e-05*utop(itop-2,jtop-2,ktop+1) -6.417395e-06*utop(itop-2,jtop-2,ktop+2) -3.593741e-05*utop(itop-2,jtop-1,ktop-2) +2.587494e-04*utop(itop-2,jtop-1,ktop-1) -1.940620e-03*utop(itop-2,jtop-1,ktop) -4.312489e-04*utop(itop-2,jtop-1,ktop+1) +4.620524e-05*utop(itop-2,jtop-1,ktop+2) +2.695306e-04*utop(itop-2,jtop,ktop-2) -1.940620e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) +3.234367e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) +5.989568e-05*utop(itop-2,jtop+1,ktop-2) -4.312489e-04*utop(itop-2,jtop+1,ktop-1) +3.234367e-03*utop(itop-2,jtop+1,ktop) +7.187482e-04*utop(itop-2,jtop+1,ktop+1) -7.700874e-05*utop(itop-2,jtop+1,ktop+2) -6.417395e-06*utop(itop-2,jtop+2,ktop-2) +4.620524e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) -7.700874e-05*utop(itop-2,jtop+2,ktop+1) +8.250936e-06*utop(itop-2,jtop+2,ktop+2) -3.593741e-05*utop(itop-1,jtop-2,ktop-2) +2.587494e-04*utop(itop-1,jtop-2,ktop-1) -1.940620e-03*utop(itop-1,jtop-2,ktop) -4.312489e-04*utop(itop-1,jtop-2,ktop+1) +4.620524e-05*utop(itop-1,jtop-2,ktop+2) +2.587494e-04*utop(itop-1,jtop-1,ktop-2) -1.862995e-03*utop(itop-1,jtop-1,ktop-1) +1.397246e-02*utop(itop-1,jtop-1,ktop) +3.104992e-03*utop(itop-1,jtop-1,ktop+1) -3.326777e-04*utop(itop-1,jtop-1,ktop+2) -1.940620e-03*utop(itop-1,jtop,ktop-2) +1.397246e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +2.495083e-03*utop(itop-1,jtop,ktop+2) -4.312489e-04*utop(itop-1,jtop+1,ktop-2) +3.104992e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) -5.174987e-03*utop(itop-1,jtop+1,ktop+1) +5.544629e-04*utop(itop-1,jtop+1,ktop+2) +4.620524e-05*utop(itop-1,jtop+2,ktop-2) -3.326777e-04*utop(itop-1,jtop+2,ktop-1) +2.495083e-03*utop(itop-1,jtop+2,ktop) +5.544629e-04*utop(itop-1,jtop+2,ktop+1) -5.940674e-05*utop(itop-1,jtop+2,ktop+2) +2.695306e-04*utop(itop,jtop-2,ktop-2) -1.940620e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) +3.234367e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -1.940620e-03*utop(itop,jtop-1,ktop-2) +1.397246e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +2.495083e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) +3.234367e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) +3.881240e-02*utop(itop,jtop+1,ktop+1) -4.158472e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +2.495083e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) -4.158472e-03*utop(itop,jtop+2,ktop+1) +4.455505e-04*utop(itop,jtop+2,ktop+2) +5.989568e-05*utop(itop+1,jtop-2,ktop-2) -4.312489e-04*utop(itop+1,jtop-2,ktop-1) +3.234367e-03*utop(itop+1,jtop-2,ktop) +7.187482e-04*utop(itop+1,jtop-2,ktop+1) -7.700874e-05*utop(itop+1,jtop-2,ktop+2) -4.312489e-04*utop(itop+1,jtop-1,ktop-2) +3.104992e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) -5.174987e-03*utop(itop+1,jtop-1,ktop+1) +5.544629e-04*utop(itop+1,jtop-1,ktop+2) +3.234367e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) +3.881240e-02*utop(itop+1,jtop,ktop+1) -4.158472e-03*utop(itop+1,jtop,ktop+2) +7.187482e-04*utop(itop+1,jtop+1,ktop-2) -5.174987e-03*utop(itop+1,jtop+1,ktop-1) +3.881240e-02*utop(itop+1,jtop+1,ktop) +8.624978e-03*utop(itop+1,jtop+1,ktop+1) -9.241048e-04*utop(itop+1,jtop+1,ktop+2) -7.700874e-05*utop(itop+1,jtop+2,ktop-2) +5.544629e-04*utop(itop+1,jtop+2,ktop-1) -4.158472e-03*utop(itop+1,jtop+2,ktop) -9.241048e-04*utop(itop+1,jtop+2,ktop+1) +9.901123e-05*utop(itop+1,jtop+2,ktop+2) -6.417395e-06*utop(itop+2,jtop-2,ktop-2) +4.620524e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) -7.700874e-05*utop(itop+2,jtop-2,ktop+1) +8.250936e-06*utop(itop+2,jtop-2,ktop+2) +4.620524e-05*utop(itop+2,jtop-1,ktop-2) -3.326777e-04*utop(itop+2,jtop-1,ktop-1) +2.495083e-03*utop(itop+2,jtop-1,ktop) +5.544629e-04*utop(itop+2,jtop-1,ktop+1) -5.940674e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +2.495083e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) -4.158472e-03*utop(itop+2,jtop,ktop+1) +4.455505e-04*utop(itop+2,jtop,ktop+2) -7.700874e-05*utop(itop+2,jtop+1,ktop-2) +5.544629e-04*utop(itop+2,jtop+1,ktop-1) -4.158472e-03*utop(itop+2,jtop+1,ktop) -9.241048e-04*utop(itop+2,jtop+1,ktop+1) +9.901123e-05*utop(itop+2,jtop+1,ktop+2) +8.250936e-06*utop(itop+2,jtop+2,ktop-2) -5.940674e-05*utop(itop+2,jtop+2,ktop-1) +4.455505e-04*utop(itop+2,jtop+2,ktop) +9.901123e-05*utop(itop+2,jtop+2,ktop+1) -1.060835e-05*utop(itop+2,jtop+2,ktop+2));
+	
+
+}
+
+
+template< class S, class O, typename T >
+void solver<S,O,T>::interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf=false )
+{
+	
+	MeshvarBnd<T> *u    = &fine;
+	MeshvarBnd<T> *utop = &coarse;
+	
+	
+	bcf = true;
+	
+	int
+	xoff = u->offset(0),
+	yoff = u->offset(1),
+	zoff = u->offset(2);
+	
+	//... don't do anything if we are not an additional refinement region
+	if( ilevel <= m_ilevelmin )
+		return;
+	
+	int
+	nx = u->size(0), 
+	ny = u->size(1), 
+	nz = u->size(2);
+	
+	for( int j=0; j<ny; ++j )
+		for( int k=0; k<nz; ++k )
+		{
+			int jtop = (int)(0.5*(double)(j))+yoff;
+			int ktop = (int)(0.5*(double)(k))+zoff;
+			
+			interp_cubic( coarse, fine, -2, j, k, xoff-1,    jtop, ktop );
+			interp_cubic( coarse, fine, nz, j, k, xoff+nz/2, jtop, ktop );
+			
+		}
+	
+	for( int i=0; i<nx; ++i )
+		for( int k=0; k<nz; ++k )
+		{
+			int itop = (int)(0.5*(double)(i))+xoff;
+			int ktop = (int)(0.5*(double)(k))+zoff;
+			
+			interp_cubic( coarse, fine, i, -2, k, itop, yoff-1,    ktop );
+			interp_cubic( coarse, fine, i, ny, k, itop, yoff+ny/2, ktop );
+			
+		}
+	
+	for( int i=0; i<nx; ++i )
+		for( int j=0; j<ny; ++j )
+		{
+			int itop = (int)(0.5*(double)(i))+xoff;
+			int jtop = (int)(0.5*(double)(j))+yoff;
+			
+			interp_cubic( coarse, fine, i, j, -2, itop, jtop, zoff-1 );
+			interp_cubic( coarse, fine, i, j, nz, itop, jtop, zoff+nz/2 );
+			
+		}
+}
+
+
+template< class S, class O, typename T >
+void solver<S,O,T>::interp_coarse_fine( unsigned ilevel, MeshvarBnd<T>& coarse, MeshvarBnd<T>& fine, bool bcf )
+{
+	MeshvarBnd<T> *u    = &fine;
+	MeshvarBnd<T> *utop = &coarse;
+	
+	
+	bcf = true;;
+	//bcf = false;
+	
+	int
+		xoff = u->offset(0),
+		yoff = u->offset(1),
+		zoff = u->offset(2);
+
+	//... don't do anything if we are not an additional refinement region
+	if( xoff == 0 && yoff == 0 && zoff == 0 )
+		return;
+	
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+	
+	//... set boundary condition for fine grid
+	
+	#pragma omp parallel for schedule(dynamic)
+	for( int ix=-1; ix<=nx; ++ix )
+		for( int iy=-1; iy<=ny; ++iy )
+			for( int iz=-1; iz<=nz; ++iz )
+			{
+				bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz);
+				
+				//if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz)
+				if( xbnd || ybnd || zbnd )
+				//if( xbnd ^ ybnd ^ zbnd )
+				{
+					
+					//... only deal with proper ghostzones
+					if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd))
+						continue;
+					
+					/*int ixtop = (int)(0.5*(double)(ix+2*xoff)+1e-3);
+					int iytop = (int)(0.5*(double)(iy+2*yoff)+1e-3);
+					int iztop = (int)(0.5*(double)(iz+2*zoff)+1e-3);*/
+					
+					int ixtop = (int)(0.5*(double)(ix))+xoff;
+					int iytop = (int)(0.5*(double)(iy))+yoff;
+					int iztop = (int)(0.5*(double)(iz))+zoff;
+					
+					if( ix==-1 ) ixtop=xoff-1;
+					if( iy==-1 ) iytop=yoff-1;
+					if( iz==-1 ) iztop=zoff-1;
+					
+					double ustar1, ustar2, ustar3, uhat;			
+					double fac = 0.5;//0.25;
+					double flux;;
+				    if( ix == -1 && iy%2==0 && iz%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
+									
+								uhat   = interp2( /*-1.0, 0.0, 1.0, */ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+							
+								//(*u)(ix,iy+j,iz+k) = 0.0;//(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k), -1.0 );
+								
+								(*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) );
+								
+								flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k));
+							}
+						
+						
+						
+						flux /= 4.0;
+						
+						double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+						
+						//dflux *= 2.0;
+						
+						if( bcf )
+							for( int j=0;j<=1;j++)
+								for( int k=0;k<=1;k++)
+									(*u)(ix,iy+j,iz+k) -= dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop+1,iytop,iztop) - 2.0*flux;
+						
+						
+					}
+					// right boundary
+					if( ix == nx && iy%2==0 && iz%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix,iy+j,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix-1,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 1.0 );
+								(*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat );
+								flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k));
+							}
+						flux /= 4.0;
+						
+						
+						double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux;
+						//dflux *= 2.0;
+						
+						if( bcf )
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+								(*u)(ix,iy+j,iz+k) += dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop-1,iytop,iztop) + 2.0*flux;
+						
+					}
+					// bottom boundary
+					if( iy == -1 && ix%2==0 && iz%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{
+								ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k), -1.0 );
+								(*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) );
+								
+								flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k));
+							}
+						flux /= 4.0;
+						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - flux;
+						double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+						//dflux *= 2.0;
+						if( bcf )
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+								(*u)(ix+j,iy,iz+k) -= dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - 2.0*flux;
+						
+					}
+					// top boundary
+					if( iy == ny && ix%2==0 && iz%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy-1,iz+k), (*u)(ix+j,iy-2,iz+k), 1.0 );
+								(*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat  );
+								
+								flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k));
+							}
+						flux /= 4.0;
+						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + flux;
+						double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux;
+						//dflux *= 2.0;
+						if( bcf )
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+								(*u)(ix+j,iy,iz+k) += dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + 2.0*flux;
+						
+					}
+					// front boundary
+					if( iz == -1 && ix%2==0 && iy%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2), -1.0 );
+								(*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) );
+								
+								flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz));
+							}
+						flux /= 4.0;
+						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - flux;
+						double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux;
+						//dflux *= 2.0;
+						if( bcf )
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+								(*u)(ix+j,iy+k,iz) -= dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - 2.0*flux;
+						
+					}
+					// back boundary
+					if( iz == nz && ix%2==0 && iy%2==0 )
+					{
+						flux = 0.0;
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+							{		
+								ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) );
+								ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) );
+								ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) );
+								
+								uhat   = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) );
+								
+								//(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy+k,iz-1), (*u)(ix+j,iy+k,iz-2), 1.0 );
+								(*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat );
+								
+								flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1));
+							}
+						flux /= 4.0;
+						//(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + flux;
+						double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux;
+						//dflux *= 2.0;
+						if( bcf )
+						for( int j=0;j<=1;j++)
+							for( int k=0;k<=1;k++)
+								(*u)(ix+j,iy+k,iz) += dflux;
+						else
+							(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + 2.0*flux;
+					}
+					
+				}
+			}
+	
+}
+
+
+#if 1
+template< class S, class O, typename T >
+void solver<S,O,T>::setBC( unsigned ilevel )
+{
+	//... set only on level before additional refinement starts
+	//if( ilevel == m_ilevelmin )
+	if( ilevel == m_ilevelmin )
+	{
+		MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
+		//int nbnd = u->m_nbnd,
+		int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+		
+		/*for( int ix=-nbnd; ix<nx+nbnd; ++ix )
+			for( int iy=-nbnd; iy<ny+nbnd; ++iy )
+				for( int iz=-nbnd; iz<nz+nbnd; ++iz )
+					if( ix<0||ix>=nx||iy<0||iy>=ny||iz<0||iz>=nz )
+						(*u)(ix,iy,iz) = (*m_pubnd)(ix,iy,iz);*/
+		
+		
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+			{
+				(*u)(-1,iy,iz) = 2.0*(*m_pubnd)(-1,iy,iz) - (*u)(0,iy,iz);
+				(*u)(nx,iy,iz) = 2.0*(*m_pubnd)(nx,iy,iz) - (*u)(nx-1,iy,iz);;
+			}
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iz=0; iz<nz; ++iz )
+			{
+				(*u)(ix,-1,iz) = 2.0*(*m_pubnd)(ix,-1,iz) - (*u)(ix,0,iz);
+				(*u)(ix,ny,iz) = 2.0*(*m_pubnd)(ix,ny,iz) - (*u)(ix,ny-1,iz);
+			}
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+			{
+				(*u)(ix,iy,-1) = 2.0*(*m_pubnd)(ix,iy,-1) - (*u)(ix,iy,0);
+				(*u)(ix,iy,nz) = 2.0*(*m_pubnd)(ix,iy,nz) - (*u)(ix,iy,nz-1);
+			}		
+		
+		
+		
+	}/*else if( ilevel < m_ilevelmin ) {
+		MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
+		int nbnd = u->m_nbnd,
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+		
+		for( int ix=-nbnd; ix<nx+nbnd; ++ix )
+			for( int iy=-nbnd; iy<ny+nbnd; ++iy )
+				for( int iz=-nbnd; iz<nz+nbnd; ++iz )
+					if( ix<0||ix>=nx||iy<0||iy>=ny||iz<0||iz>=nz )
+						(*u)(ix,iy,iz) = 0.0;
+	}*/
+
+
+}
+
+#else
+
+//... enforce periodic boundary conditions
+template< class S, class O, typename T >
+void solver<S,O,T>::setBC( unsigned ilevel )
+{
+	MeshvarBnd<T> *u = m_pu->get_grid(ilevel);
+	
+	//... set only on level before additional refinement starts
+	if( ilevel <= m_ilevelmin )
+	{
+		
+		int nbnd = u->m_nbnd,
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+		
+		//(*u)(0,0,0) = 0.0;
+		
+		
+		double sum = 0.0;
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					sum += (*u)(ix,iy,iz);
+		sum /= (nx*ny*nz);
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+					(*u)(ix,iy,iz) -= sum;
+		
+		
+		
+		
+		for( int iy=0; iy<ny; ++iy )
+			for( int iz=0; iz<nz; ++iz )
+			{
+				(*u)(-1,iy,iz) = (*u)(nx-1,iy,iz);
+				(*u)(nx,iy,iz) = (*u)(0,iy,iz);
+			}
+		
+		for( int ix=0; ix<nx; ++ix )
+			for( int iz=0; iz<nz; ++iz )
+			{
+				(*u)(ix,-1,iz) = (*u)(ix,ny-1,iz);
+				(*u)(ix,ny,iz) = (*u)(ix,0,iz);
+			}
+					
+		for( int ix=0; ix<nx; ++ix )
+			for( int iy=0; iy<ny; ++iy )
+			{
+				(*u)(ix,iy,-1) = (*u)(ix,iy,nz-1);
+				(*u)(ix,iy,nz) = (*u)(ix,iy,0);
+			}	
+		
+		
+		
+	}
+	
+}
+#endif
+
+
+//... enforce periodic boundary conditions
+template< class S, class O, typename T >
+void solver<S,O,T>::make_periodic( MeshvarBnd<T> *u )
+{
+
+	int
+		nx = u->size(0), 
+		ny = u->size(1), 
+		nz = u->size(2);
+		
+
+	#pragma omp parallel
+	{
+		
+		if( u->offset(0) == 0 )
+			for( int iy=0; iy<ny; ++iy )
+				for( int iz=0; iz<nz; ++iz )
+				{
+					(*u)(-1,iy,iz) = (*u)(nx-1,iy,iz);
+					(*u)(nx,iy,iz) = (*u)(0,iy,iz);
+				}
+		
+		if( u->offset(1) == 0 )
+			for( int ix=0; ix<nx; ++ix )
+				for( int iz=0; iz<nz; ++iz )
+				{
+					(*u)(ix,-1,iz) = (*u)(ix,ny-1,iz);
+					(*u)(ix,ny,iz) = (*u)(ix,0,iz);
+				}
+		
+		if( u->offset(2) == 0 )
+			for( int ix=0; ix<nx; ++ix )
+				for( int iy=0; iy<ny; ++iy )
+				{
+					(*u)(ix,iy,-1) = (*u)(ix,iy,nz-1);
+					(*u)(ix,iy,nz) = (*u)(ix,iy,0);
+				}									  
+			
+	}
+}
+
+END_MULTIGRID_NAMESPACE
+ 
+#endif
diff --git a/tests.hh b/tests.hh
new file mode 100644
index 0000000..e1c6adc
--- /dev/null
+++ b/tests.hh
@@ -0,0 +1,309 @@
+/*
+ 
+ tests.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+
+#ifndef __TESTS_HH
+#define __TESTS_HH
+
+#include <math.h>
+
+
+inline double CIC_interp_back( const MeshvarBnd<double>& A, double x, double y, double z )
+{
+	int 
+		ix  = (int)x,
+		iy  = (int)y,
+		iz  = (int)z,
+		ix1 = (ix+1),
+		iy1 = (iy+1),
+		iz1 = (iz+1);
+	
+	
+    double
+		dx = (double)(x - (double)ix),
+		dy = (double)(y - (double)iy),
+		dz = (double)(z - (double)iz),
+		tx = 1.0-dx,
+		ty = 1.0-dy,
+		tz = 1.0-dz;
+	
+    double
+		f_xyz = A(ix,iy,iz)*tx*ty*tz,
+		f_Xyz = A(ix1,iy,iz)*dx*ty*tz,
+		f_xYz = A(ix,iy1,iz)*tx*dy*tz,
+		f_xyZ = A(ix,iy,iz1)*tx*ty*dz,
+		f_XYz = A(ix1,iy1,iz)*dx*dy*tz,
+		f_XyZ = A(ix1,iy,iz1)*dx*ty*dz,
+		f_xYZ = A(ix,iy1,iz1)*tx*dy*dz,
+		f_XYZ = A(ix1,iy1,iz1)*dx*dy*dz;
+	
+    return f_xyz + f_Xyz + f_xYz + f_xyZ + f_XYz + f_XyZ + f_xYZ + f_XYZ;
+}
+
+inline double TSC_interp_back( const MeshvarBnd<double>& A, double x, double y, double z )
+{
+	double val = 0.0;
+    int xngp = (int)x, yngp = (int)y, zngp = (int)z;
+        
+	for( int xx = xngp-1; xx <= xngp+1; ++xx )
+	{
+		double weightx = 1.0;
+		double dx = fabs(x-(double)xx);
+		int axx(xx);
+		
+		if( xx==xngp )
+			weightx *= 0.75-dx*dx;
+		else{
+			weightx *= 1.125 - 1.5*dx + 0.5*dx*dx;
+		}
+		
+		for( int yy = yngp-1; yy <= yngp+1; ++yy )
+		{
+			double weighty = weightx;
+			double dy = fabs(y-(double)yy);
+			int ayy(yy);
+			
+			if( yy==yngp )
+				weighty *= 0.75-dy*dy;
+			else{
+				weighty *= 1.125 - 1.5*dy + 0.5*dy*dy;
+			}
+			
+			for( int zz = zngp-1; zz <= zngp+1; ++zz )
+			{
+				double weightz = weighty;
+				double dz = fabs(z-(double)zz);
+				int azz(zz);
+				
+				if( zz==zngp )
+					weightz *= 0.75-dz*dz;
+				else{
+					weightz *= 1.125 - 1.5*dz + 0.5*dz*dz;
+				}
+				
+				val += A(axx,ayy,azz) * weightz;
+			}
+		}
+	}
+	
+	return val;
+}
+
+class TestProblem{
+public:
+	MeshvarBnd<double> m_rho, m_uana, m_ubnd, m_xgrad, m_ygrad, m_zgrad;
+	int m_nb, m_nres;
+	double m_h;
+	
+	TestProblem( int nb, int nres )
+	: m_rho( nb, nres ), m_uana( nb, nres ), m_ubnd( nb, nres ),
+	m_xgrad( nb, nres ), m_ygrad( nb, nres ), m_zgrad( nb, nres ),
+	m_nb( nb ), m_nres( nres ), m_h( 1.0/((double)nres ) )//m_h( 1.0/((double)nres+1.0 ) )
+	{ }
+	
+};
+
+class TSC_Test : public TestProblem{
+public:
+	double m_q;
+	
+	class TSCcube{
+	public:
+		std::vector<double> m_data;
+		
+		
+		TSCcube()
+		{
+			m_data.assign(27,0.0);
+			
+			//.. center
+			(*this)( 0, 0, 0) = 27./64.;
+			
+			//.. faces
+			(*this)(-1, 0, 0) = 
+			(*this)(+1, 0, 0) = 
+			(*this)( 0,-1, 0) = 
+			(*this)( 0,+1, 0) = 
+			(*this)( 0, 0,-1) = 
+			(*this)( 0, 0,+1) = 9./128.;
+			
+			//.. edges
+			(*this)(-1,-1, 0) =
+			(*this)(-1,+1, 0) =
+			(*this)(+1,-1, 0) =
+			(*this)(+1,+1, 0) =
+			(*this)(-1, 0,-1) =
+			(*this)(-1, 0,+1) =
+			(*this)(+1, 0,-1) =
+			(*this)(+1, 0,+1) =
+			(*this)( 0,-1,-1) =
+			(*this)( 0,-1,+1) =
+			(*this)( 0,+1,-1) =
+			(*this)( 0,+1,+1) = 3./256.;
+			
+			//.. corners
+			(*this)(-1,-1,-1) =
+			(*this)(-1,+1,-1) =
+			(*this)(-1,-1,+1) =
+			(*this)(-1,+1,+1) =
+			(*this)(+1,-1,-1) =
+			(*this)(+1,+1,-1) =
+			(*this)(+1,-1,+1) =
+			(*this)(+1,+1,+1) = 1./512.;
+			
+		}
+		
+		double& operator()(int i, int j, int k)
+		{ return m_data[ ((i+1)*3+(j+1))*3 +(k+1)]; }
+		
+		const double& operator()(int i, int j, int k) const
+		{ return m_data[ ((i+1)*3+(j+1))*3 +(k+1)]; }
+	};
+	
+	TSC_Test( int nb, int nres, double q=-1.0 )
+	: TestProblem(nb, nres), m_q(q)
+	{
+		TSCcube c;
+		int xm(nres/2-1), ym(nres/2-1), zm(nres/2-1);
+		double xxm((double)xm*m_h), yym((double)ym*m_h), zzm((double)zm*m_h);
+		
+		double fourpi = 4.0*M_PI;
+		
+		m_uana.zero();
+		m_ubnd.zero();
+		m_xgrad.zero();
+		m_ygrad.zero();
+		m_zgrad.zero();
+		
+		for( int i=-nb; i<nres+nb; ++i )
+			for( int j=-nb; j<nres+nb; ++j )
+				for( int k=-nb; k<nres+nb; ++k )
+				{
+					
+					//double xxm((double)xm), yym((double)ym), zzm((double)zm);
+					double xx((double)i*m_h), yy((double)j*m_h), zz((double)k*m_h);
+					
+					
+					for( int ix=-1; ix<=1; ++ix )
+						for( int iy=-1; iy<=1; ++iy )
+							for( int iz=-1; iz<=1; ++iz )
+							{
+								double dx(xx-(xxm+ix*m_h)), dy(yy-(yym+iy*m_h)), dz(zz-(zzm+iz*m_h));
+								double d3 = pow(dx*dx+dy*dy+dz*dz,1.5);
+								
+								double dphi = m_q*c(ix,iy,iz)/sqrt(dx*dx+dy*dy+dz*dz);
+								
+								if( i==xm && j==ym && k==zm )
+									m_rho(i+ix,j+iy,k+iz) = m_q*c(ix,iy,iz)/(m_h*m_h*m_h);
+								
+								if( d3 < 1e-10 )
+									continue;
+								
+								m_uana(i,j,k) += dphi/fourpi;
+								m_ubnd(i,j,k) += dphi/fourpi;
+								
+								m_xgrad(i,j,k) -= m_q*c(ix,iy,iz)*dx/d3/fourpi;
+								m_ygrad(i,j,k) -= m_q*c(ix,iy,iz)*dy/d3/fourpi;
+								m_zgrad(i,j,k) -= m_q*c(ix,iy,iz)*dz/d3/fourpi;
+								
+								
+							}
+				}
+		
+		
+		//m_rho(xm,ym,zm) = 4.0*M_PI*m_q/(m_h*m_h*m_h);
+		
+	}
+};
+
+
+class PointMassTest : public TestProblem{
+public:
+	double m_q;
+	
+	PointMassTest( int nb, int nres, double q=-1.0 )
+	: TestProblem(nb, nres), m_q( q )
+	{
+		//int xm(nres/2-1), ym(nres/2-1), zm(nres/2-1);
+		int xm(nres/2), ym(nres/2), zm(nres/2);
+		double xxm((double)xm*m_h), yym((double)ym*m_h), zzm((double)zm*m_h);
+		m_rho.zero();
+		
+		double fourpi = 4.0*M_PI;
+		
+		for( int i=-nb; i<nres+nb; ++i )
+			for( int j=-nb; j<nres+nb; ++j )
+				for( int k=-nb; k<nres+nb; ++k )
+				{
+					
+					//double xxm((double)xm), yym((double)ym), zzm((double)zm);
+					double xx((double)i*m_h), yy((double)j*m_h), zz((double)k*m_h);
+					
+					double dx(xx-xxm), dy(yy-yym), dz(zz-zzm);
+					m_uana(i,j,k) = m_q/sqrt(dx*dx+dy*dy+dz*dz)/fourpi;///2.0/M_PI;
+					m_ubnd(i,j,k) = 0.0;//m_uana(i,j,k);
+					
+					double d3 = pow(dx*dx+dy*dy+dz*dz,1.5);
+					m_xgrad(i,j,k) = -m_q*dx/d3/fourpi;
+					m_ygrad(i,j,k) = -m_q*dy/d3/fourpi;
+					m_zgrad(i,j,k) = -m_q*dz/d3/fourpi;
+				}
+		
+		
+		for( int iy=0; iy<nres; ++iy )
+			for( int iz=0; iz<nres; ++iz )
+			{
+				double dx=0.5+0.5*m_h, dy=((double)iy+0.5)*m_h-0.5, dz=((double)iz+0.5)*m_h-0.5, d=sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(-1,iy,iz) = m_q/d/fourpi;
+				dx = 0.5-0.5*m_h;
+				d = sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(nres,iy,iz) = m_q/d/fourpi;
+			}
+		
+		for( int ix=0; ix<nres; ++ix )
+			for( int iz=0; iz<nres; ++iz )
+			{
+				double dx=((double)ix+0.5)*m_h-0.5, dy=0.5+0.5*m_h, dz=((double)iz+0.5)*m_h-0.5, d=sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(ix,-1,iz) = m_q/d/fourpi;
+				dy=0.5-0.5*m_h;
+				d = sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(ix,nres,iz) = m_q/d/fourpi;
+			}
+		
+		for( int ix=0; ix<nres; ++ix )
+			for( int iy=0; iy<nres; ++iy )
+			{
+				double dx=((double)ix+0.5)*m_h-0.5, dy=((double)iy+0.5)*m_h-0.5, dz=0.5+0.5*m_h, d=sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(ix,iy,-1) = m_q/d/fourpi;
+				dz=0.5-0.5*m_h;
+				d = sqrt(dx*dx+dy*dy+dz*dz);
+				m_ubnd(ix,iy,nres) = m_q/d/fourpi;
+			}
+		
+		
+		m_rho(xm,ym,zm) = m_q/(m_h*m_h*m_h);
+	}	
+};
+
+
+#endif
\ No newline at end of file
diff --git a/transfer_function.cc b/transfer_function.cc
new file mode 100644
index 0000000..34a1888
--- /dev/null
+++ b/transfer_function.cc
@@ -0,0 +1,72 @@
+/*
+ 
+ transfer_function.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+
+#include "transfer_function.hh"
+
+
+std::map< std::string, transfer_function_plugin_creator *>& 
+get_transfer_function_plugin_map()
+{
+	static std::map< std::string, transfer_function_plugin_creator* > transfer_function_plugin_map;
+	return transfer_function_plugin_map;
+}
+
+void print_transfer_function_plugins()
+{
+	std::map< std::string, transfer_function_plugin_creator *>& m = get_transfer_function_plugin_map();
+	std::map< std::string, transfer_function_plugin_creator *>::iterator it;
+	it = m.begin();
+	std::cout << " - Available transfer function plug-ins:\n";
+	while( it!=m.end() )
+	{
+		if( (*it).second )
+			std::cout << "\t\'" << (*it).first << "\'\n";
+		++it;
+	}
+	
+	
+}
+
+transfer_function_plugin *select_transfer_function_plugin( config_file& cf )
+{
+	std::string tfname = cf.getValue<std::string>( "cosmology", "transfer" );
+	
+	transfer_function_plugin_creator *the_transfer_function_plugin_creator 
+	= get_transfer_function_plugin_map()[ tfname ];
+	
+	if( !the_transfer_function_plugin_creator )
+	{	
+		std::cerr << " - Error: transfer function plug-in \'" << tfname << "\' not found." << std::endl;
+		print_transfer_function_plugins();
+		throw std::runtime_error("Unknown transfer function plug-in");
+		
+	}else
+		std::cout << " - Selecting transfer function plug-in \'" << tfname << "\'..." << std::endl;
+	
+	transfer_function_plugin *the_transfer_function_plugin 
+	= the_transfer_function_plugin_creator->create( cf );
+	
+	return the_transfer_function_plugin;
+}
+
diff --git a/transfer_function.hh b/transfer_function.hh
new file mode 100644
index 0000000..b8a9aa6
--- /dev/null
+++ b/transfer_function.hh
@@ -0,0 +1,1420 @@
+/*
+ 
+ transfer_function.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ 
+*/
+
+#ifndef __TRANSFERFUNCTION_HH
+#define __TRANSFERFUNCTION_HH
+
+#include <vector>
+#include <sstream>
+#include <fstream>
+#include <iostream>
+#include <cmath>
+#include <stdexcept>
+
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_sf_gamma.h>
+
+#include "Numerics.hh"
+#include "general.hh"
+
+#include <complex>
+
+
+
+#include "config_file.hh"
+
+enum tf_type{
+	total, cdm, baryon
+};
+
+//! Abstract base class for transfer functions
+/*!
+ This class implements a purely virtual interface that can be
+ used to derive instances implementing various transfer functions.
+ */ 
+class transfer_function_plugin{
+public:
+	Cosmology cosmo_;
+	config_file *pcf_;
+	bool tf_distinct_;
+	
+public:
+	
+	transfer_function_plugin( config_file& cf ) 
+	: pcf_( &cf )
+	{
+		real_t zstart;
+		zstart				= pcf_->getValue<real_t>( "setup", "zstart" );
+		cosmo_.astart		= 1.0/(1.0+zstart);
+		cosmo_.Omega_b		= pcf_->getValue<real_t>( "cosmology", "Omega_b" );
+		cosmo_.Omega_m		= pcf_->getValue<real_t>( "cosmology", "Omega_m" );
+		cosmo_.Omega_L		= pcf_->getValue<real_t>( "cosmology", "Omega_L" );
+		cosmo_.H0			= pcf_->getValue<real_t>( "cosmology", "H0" );
+		cosmo_.sigma8		= pcf_->getValue<real_t>( "cosmology", "sigma_8" );
+		cosmo_.nspect		= pcf_->getValue<real_t>( "cosmology", "nspec" );
+	}
+	virtual ~transfer_function_plugin(){ };
+	
+	virtual double compute( double k, tf_type type=baryon) = 0;
+
+	virtual double get_kmax( void ) = 0;
+	virtual double get_kmin( void ) = 0;
+	bool tf_is_distinct( void )
+	{	return tf_distinct_;	}
+};
+
+
+struct transfer_function_plugin_creator
+{
+	virtual transfer_function_plugin * create( config_file& cf ) const = 0;
+	virtual ~transfer_function_plugin_creator() { }
+};
+
+std::map< std::string, transfer_function_plugin_creator *>& get_transfer_function_plugin_map();
+void print_transfer_function_plugins( void );
+
+template< class Derived >
+struct transfer_function_plugin_creator_concrete : public transfer_function_plugin_creator
+{
+	transfer_function_plugin_creator_concrete( const std::string& plugin_name )
+	{
+		get_transfer_function_plugin_map()[ plugin_name ] = this;
+	}
+	
+	transfer_function_plugin * create( config_file& cf ) const
+	{
+		return new Derived( cf );
+	}
+};
+
+typedef transfer_function_plugin transfer_function;
+
+transfer_function_plugin *select_transfer_function_plugin( config_file& cf );
+
+
+/**********************************************************************/
+/**********************************************************************/
+/**********************************************************************/
+
+class TransferFunction_k
+{
+public:
+	static transfer_function *ptf_;
+	static real_t nspec_;
+	double dplus_, pnorm_, sqrtpnorm_;
+	
+	TransferFunction_k( transfer_function *tf, real_t nspec, real_t pnorm, real_t dplus )
+	: dplus_(dplus), pnorm_(pnorm)
+	{
+		ptf_ = tf;
+		nspec_ = nspec;
+		sqrtpnorm_ = sqrt( pnorm_ );
+	}
+	
+	inline real_t compute( real_t k ) const
+	{
+		return dplus_*sqrtpnorm_*pow(k,0.5*nspec_)*ptf_->compute(k);
+	}
+};
+
+
+/**********************************************************************/
+/**********************************************************************/
+/**********************************************************************/
+
+#define NZERO_Q
+typedef std::complex<double> complex;
+class TransferFunction_real
+{
+	
+public:
+	
+	double Tr0_;
+	real_t Tmin_, Tmax_, Tscale_;
+	real_t rneg_, rneg2_;
+	static transfer_function *ptf_;
+	static real_t nspec_;
+	
+protected:
+	
+	real_t krgood( real_t mu, real_t q, real_t dlnr, real_t kr )
+	{
+		double krnew = kr;
+		complex cdgamma, zm, zp;
+		double arg, iarg, xneg, xpos, y;
+		gsl_sf_result g_a, g_p;
+		
+		xpos = 0.5*(mu+1.0+q);
+		xneg = 0.5*(mu+1.0-q);
+		y = M_PI/(2.0*dlnr);
+		zp=complex(xpos,y);
+		zm=complex(xneg,y);
+		
+		gsl_sf_lngamma_complex_e (zp.real(), zp.imag(), &g_a, &g_p);
+		zp=std::polar(exp(g_a.val),g_p.val);
+		real_t zpa = g_p.val;
+
+		gsl_sf_lngamma_complex_e (zm.real(), zm.imag(), &g_a, &g_p);
+		zm=std::polar(exp(g_a.val),g_p.val);
+		real_t zma = g_p.val;
+		
+		arg=log(2.0/kr)/dlnr+(zpa+zma)/M_PI;
+		iarg=(real_t)((int)(arg + 0.5));
+		
+		if( arg!=iarg )
+			krnew=kr*exp((arg-iarg)*dlnr);
+		
+		return krnew;
+	}
+	
+	void transform( real_t pnorm, real_t dplus, unsigned N, real_t q, std::vector<double>& rr, std::vector<double>& TT )
+	{
+		const double mu = 0.5;
+		double qmin = 1.0e-6, qmax = 1.0e+6;
+		
+		q = 0.0;
+		
+		N = 16384;
+		
+#ifdef NZERO_Q
+		//q = 0.4;
+		q = 0.5;
+#endif
+		
+		double kmin = qmin, kmax=qmax;
+		double rmin = qmin, rmax = qmax;
+		double k0 = exp(0.5*(log(kmax)+log(kmin)));
+		double r0 = exp(0.5*(log(rmax)+log(rmin)));
+		double L = log(rmax)-log(rmin);
+		double k0r0 = k0*r0;
+		double dlnk = L/N, dlnr = L/N;
+		
+		double sqrtpnorm = sqrt(pnorm);
+		
+		double dir = 1.0;
+		
+		double fftnorm = 1.0/N;
+		
+		fftw_complex *in, *out;
+		fftw_plan p,ip;
+		
+		in = new fftw_complex[N];
+		out = new fftw_complex[N];
+		
+		//... perform anti-ringing correction from Hamilton (2000)
+		k0r0 = krgood( mu, q, dlnr, k0r0 );
+
+		std::ofstream ofsk("transfer_k.txt");
+		double sum_in = 0.0;
+		for( unsigned i=0; i<N; ++i )
+		{
+			
+			double k = k0*exp(((int)i - (int)N/2+1) * dlnk);
+			//double k = k0*exp(((int)i - (int)N/2) * dlnk);
+			//double k = k0*exp(ii * dlnk);
+			
+			//... some constants missing ...//
+			
+			in[i].re = dplus*sqrtpnorm*ptf_->compute( k )*pow(k,0.5*nspec_)*pow(k,1.5-q);
+			in[i].im = 0.0;
+			
+			sum_in += in[i].re;
+			ofsk << std::setw(16) << k <<std::setw(16) << in[i].re << std::endl;
+		}
+		ofsk.close();
+		
+		
+		p = fftw_create_plan(N, FFTW_FORWARD, FFTW_ESTIMATE);
+		ip = fftw_create_plan(N, FFTW_BACKWARD, FFTW_ESTIMATE);
+		
+		//fftw_one(p, in, out);
+		fftw_one(p, in, out);
+		
+		//... compute the Hankel transform by convolution with the Bessel function
+		for( unsigned i=0; i<N; ++i )
+		{
+			int ii=i;
+			if( ii > (int)N/2 )
+				ii -= N;
+			
+#ifndef NZERO_Q
+			double y=ii*M_PI/L;
+			complex zp((mu+1.0)*0.5,y);
+			gsl_sf_result g_a, g_p;
+			gsl_sf_lngamma_complex_e(zp.real(), zp.imag(), &g_a, &g_p);
+			
+			double arg = 2.0*(log(2.0/k0r0)*y+g_p.val);
+			complex cu = complex(out[i].re,out[i].im)*std::polar(1.0,arg);
+			out[i].re = cu.real()*fftnorm;
+			out[i].im = cu.imag()*fftnorm;
+			
+#else		
+			//complex x(dir*q, (double)ii*2.0*M_PI/L);
+			complex x(dir*q, (double)ii*2.0*M_PI/L);
+			gsl_sf_result g_a, g_p;
+			
+			complex g1, g2, garg, U, phase;						
+			complex twotox = pow(complex(2.0,0.0),x);
+			
+			/////////////////////////////////////////////////////////
+			//.. evaluate complex Gamma functions
+			
+			garg = 0.5*(mu+1.0+x);
+			gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p);
+			g1 = std::polar(exp(g_a.val),g_p.val);
+
+			
+			garg = 0.5*(mu+1.0-x);
+			gsl_sf_lngamma_complex_e (garg.real(), garg.imag(), &g_a, &g_p);
+			g2 = std::polar(exp(g_a.val),g_p.val);
+
+			/////////////////////////////////////////////////////////
+			//.. compute U
+			
+			if( (fabs(g2.real()) < 1e-19 && fabs(g2.imag()) < 1e-19) )
+			{
+				//std::cerr << "Warning : encountered possible singularity in TransferFunction_real::transform!\n";
+				g1 = 1.0; g2 = 1.0;
+			}
+			
+			
+			U = twotox * g1 / g2;
+			phase = pow(complex(k0r0,0.0),complex(0.0,2.0*M_PI*(double)ii/L));
+			
+			complex cu = complex(out[i].re,out[i].im)*U*phase*fftnorm;
+			
+			out[i].re = cu.real();
+			out[i].im = cu.imag();
+			
+			if( (out[i].re != out[i].re)||(out[i].im != out[i].im) )
+			{	std::cerr << "NaN @ i=" << i << ", U= " << U << ", phase = " << phase << ", g1 = " << g1 << ", g2 = " << g2 << std::endl;
+				std::cerr << "mu+1+q = " << mu+1.0+q << std::endl;
+				//break;
+			}
+			
+#endif
+
+		}
+			
+		/*out[N/2].im = 0.0;
+		out[N/2+1].im = 0.0;
+		out[N/2+1].re = out[N/2].re;
+		out[N/2].im = 0.0;*/
+		
+		fftw_one(ip, out, in);
+		
+		rr.assign(N,0.0);
+		TT.assign(N,0.0);
+		
+		r0 = k0r0/k0;
+		
+		for( unsigned i=0; i<N; ++i )
+		{
+			int ii = i;
+			ii -= N/2-1;
+			//ii -= N/2;
+			//if( ii>N/2)
+			//	ii-=N;
+			
+			
+			
+			double r = r0*exp(-ii*dlnr);
+			rr[N-i-1] = r;
+			TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
+			
+			//TT[N-i-1] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*exp( -dir*(q+1.5)*ii*dlnr +q*log(k0r0))/r0;
+			
+			//rr[i] = r;
+			//TT[i] = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
+			
+		}
+		
+		
+		{
+			std::ofstream ofs("transfer_real_new.txt");
+			for( unsigned i=0; i<N; ++i )
+			{
+				int ii = i;
+				ii -= N/2-1;
+				
+				double r = r0*exp(-ii*dlnr);//r0*exp(ii*dlnr);
+				double T = 4.0*M_PI* sqrt(M_PI/2.0) *  in[i].re*pow(r,-(1.5+q));
+				ofs << r << "\t\t" << T << "\t\t" << in[i].im << std::endl;
+			}
+		}
+		
+		delete[] in;
+		delete[] out;
+		fftw_destroy_plan(p);
+		fftw_destroy_plan(ip);
+	}
+	std::vector<real_t> m_xtable,m_ytable;
+	double m_xmin, m_xmax, m_dx;
+public:
+	TransferFunction_real( transfer_function *tf, real_t nspec, real_t pnorm, real_t dplus, real_t rmin, real_t rmax, real_t knymax, unsigned nr )
+	{
+				
+		ptf_ = tf;
+		nspec_ = nspec;
+	
+		real_t q = 0.8;
+		
+		std::vector<double> r,T;
+		
+		transform( pnorm, dplus, nr, q, r, T );
+		
+		//... determine r=0 zero component by integrating up to the Nyquist frequency
+		gsl_integration_workspace * wp; 
+		gsl_function F;
+		wp = gsl_integration_workspace_alloc(20000);
+		F.function = &call_wrapper;
+		double par[2]; par[0] = dplus*sqrt(pnorm); //par[1] = M_PI/kny;
+		F.params = (void*)par;
+		double error;
+		
+		//.. need a factor sqrt( 2*kny^2_x + 2*kny^2_y + 2*kny^2_z )/2 = sqrt(3/2)kny (in equilateral case)
+		//#warning Need to integrate from Boxsize (see below)?
+		gsl_integration_qag (&F, 0.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); 
+		//gsl_integration_qag (&F, 2.0*M_PI/100.0, sqrt(1.5)*knymax, 0, 1e-8, 20000, GSL_INTEG_GAUSS21, wp, &Tr0_, &error); 
+		
+		gsl_integration_workspace_free(wp);
+				
+		gsl_interp_accel *accp;
+		gsl_spline *splinep;
+		
+		std::vector<double> xsp, ysp;
+		
+		for( unsigned i=0; i<r.size(); ++i )
+		{
+			// spline positive and negative part separately
+			/*if( T[i] > 0.0 )
+			{
+				xp.push_back( 2.0*log10(r[i]) );
+				yp.push_back( log10(T[i]) );
+				rneg_ = r[i];
+				rneg2_ = rneg_*rneg_;
+			}else {
+				xn.push_back( 2.0*log10(r[i]) );
+				yn.push_back( log10(-T[i]) );
+			}*/
+			
+			
+			if( r[i] > rmin && r[i] < rmax )
+			{
+				xsp.push_back( log10(r[i]) );
+				ysp.push_back( T[i]*r[i]*r[i] );
+			}
+			
+		}
+		
+		
+		
+		
+		
+		accp = gsl_interp_accel_alloc ();
+		//accn = gsl_interp_accel_alloc ();
+		
+		//... spline interpolation is only marginally slower here
+		//splinep = gsl_spline_alloc (gsl_interp_cspline, xsp.size() );
+		splinep = gsl_spline_alloc (gsl_interp_akima, xsp.size() );
+
+		//... set up everything for spline interpolation
+		gsl_spline_init (splinep, &xsp[0], &ysp[0], xsp.size() );
+		
+
+		
+		//.. build lookup table using spline interpolation
+		
+		
+		m_xmin = log10(rmin);//m_xtable[0];
+		m_xmax = log10(rmax);//m_xtable.back();
+		m_dx   = (m_xmax-m_xmin)/nr;//(m_xtable.size()-1);
+		
+		for(unsigned i=0; i<nr; ++i )
+		{
+			m_xtable.push_back( m_xmin+i*m_dx );
+			m_ytable.push_back( gsl_spline_eval(splinep, (m_xtable.back()), accp) );
+		}
+		
+		{
+			real_t dlogr = (log10(rmax)-log10(rmin))/1000;
+			std::ofstream ofs("transfer_splinep.txt");			
+			
+			for( int i=0; i< 1000; ++i ) 
+			{
+				real_t ri = rmin*pow(10.0,i*dlogr);
+				//				ofs << std::setw(16) << ri << std::setw(16) << compute_real(ri*ri) << std::endl;
+				ofs << std::setw(16) << ri << std::setw(16) << compute_real(ri*ri) << std::endl;
+			}
+		}
+		
+		
+		gsl_spline_free (splinep);
+		gsl_interp_accel_free (accp);
+	}
+	
+	static double call_wrapper( double k, void *arg )
+	{
+		double *a = (double*)arg;
+		return 4.0*M_PI*a[0]*ptf_->compute( k )*pow(k,0.5*nspec_)*k*k;
+	}
+	
+	~TransferFunction_real()
+	{ }
+	
+#if 1
+	inline real_t compute_real( real_t r2 ) const
+	{
+		const double EPS = 1e-8;
+		const double Reps2 = EPS*EPS;
+		
+		if( r2 <Reps2 )
+			return Tr0_;
+		
+		/*real_t logr = log10(r2);
+		int i = (int)((logr-m_xmin)/m_dx);
+		
+		if( i < 0 ) i = 0;
+		else if( i >= m_xtable.size()-1 ) i = m_xtable.size()-2;
+		
+		real_t y1,y2;
+		y1 = m_ytable[i];
+		y2 = m_ytable[i+1];
+		
+		//std::cerr << y1 << " " << y2 << std::endl;
+		return y1 + (y2-y1)*(logr/m_dx-(real_t)i);*/
+		
+		double r=0.5*log10(r2);//sqrt(r2);
+		double ii = (r-m_xmin)/m_dx;
+		int i = (int)ii;//((r-m_xmin)/m_dx);
+		
+		if( i < 0 ) i = 0;
+		else if( i >= (int)m_xtable.size()-1 ) i = m_xtable.size()-2;
+		
+		double y1,y2;
+		y1 = m_ytable[i];
+		y2 = m_ytable[i+1];
+		
+		//		return y1 + (y2-y1)*((r-m_xmin)/m_dx-(real_t)i);
+		return (real_t)((y1 + (y2-y1)*(ii-(double)i))/r2);
+	}
+#else	
+	inline real_t compute_real( real_t r2 ) const
+	{
+		const real_t EPS = 1e-8;
+		const real_t Reps2 = EPS*EPS;
+		
+		if( r2 <Reps2 )
+			return Tr0_;
+
+		/*if( r2 < rneg2_ )
+			q = pow(10.0,gsl_spline_eval (splinep, log10(r2), accp));
+		else
+			q = -pow(10.0,gsl_spline_eval(splinen, log10(r2), accn));*/
+		
+		real_t q;		
+		real_t logr2 = log10(r2);
+		q = pow(10.0,gsl_spline_eval(splinep, logr2, accp));
+		real_t sign = 1.0;
+		if( gsl_spline_eval(splinen, logr2, accn) < 0.0 )
+			sign = -1.0;
+		return q*sign;
+	}
+#endif
+};
+
+
+#if 0
+class TransferFunction_real_old
+{
+protected:
+	bool m_breal_init;
+	static real_t nspec;
+	gsl_integration_workspace * wp; 
+	gsl_integration_workspace * c_wp;
+	gsl_integration_qawo_table * wf;
+	gsl_function F;
+	
+	gsl_interp_accel *acc;
+	gsl_spline *spline;
+	
+	static TransferFunction *ptf;
+	
+public:
+	TransferFunction_real_old( TransferFunction *tf, real_t ns, real_t pnorm, real_t dplus, real_t rmin, real_t rmax, unsigned nr )
+	: m_breal_init(false)
+	{ 
+		
+		ptf = tf;
+		nspec = ns;
+		wf = gsl_integration_qawo_table_alloc(0.0, 10000.0, GSL_INTEG_SINE, 10000);            
+		
+		
+		wp = gsl_integration_workspace_alloc(20000);
+		c_wp = gsl_integration_workspace_alloc(20000);
+		
+		acc = gsl_interp_accel_alloc ();
+		spline = gsl_spline_alloc (gsl_interp_cspline, nr+1);
+		
+		real_t dlogr = (log10(rmax)-log10(rmin))/nr;
+		
+		std::vector<real_t> x(nr+1,0.0),y(nr+1,0.0);
+		
+		
+		/*std::ofstream ofs("transfer_fourier.txt");
+		for( int i=0; i<1000; ++i )
+		{
+			real_t k=1.e-3*pow(10.0,i*6.0/1000.0);
+			ofs << k << "\t\t" << call_wrapper(k, NULL) << std::endl;
+		}*/
+		
+
+		real_t fac = sqrt(pnorm)*dplus;
+		
+		std::ofstream ofs("transfer_real_old.txt");
+		
+		for( unsigned i=0; i<nr+1; ++i )
+		{
+			real_t r = rmin*pow(10.0,(real_t)i*dlogr);
+			real_t result1, result2, error;
+			
+			//... integrate from zero to first point ...//
+			real_t k0 = 1e-2;
+
+			F.function = &call_wrapper2;
+			real_t k[1]; k[0] = r;
+			F.params = (void*)k;
+			
+			gsl_integration_qags (&F, 0.0, k0, 0, 1e-7, 20000,  wp, &result1, &error); 
+			
+			//... integrate further ...//
+			F.function = &call_wrapper;
+			F.params = NULL;
+
+			//std::cerr << "r = " << r << std::endl; 
+
+			gsl_integration_qawo_table_set(wf,r,100000.0,GSL_INTEG_SINE);
+			gsl_integration_qawf(&F,k0,1e-8,20000,wp,c_wp,wf,&result2,&error);
+			
+			result1 = 4.0*M_PI*fac*(result1+result2)/r;
+			
+			x[i] = log10(r*r);
+			y[i] = log10(result1);
+			
+			ofs << std::setw(16) << r << std::setw(16) << result1 << std::endl;
+			
+		}
+		
+		gsl_spline_init (spline, &x[0], &y[0], nr+1);
+		
+		gsl_integration_qawo_table_free(wf);
+		gsl_integration_workspace_free(wp);
+		gsl_integration_workspace_free(c_wp);
+		
+	}
+	
+	~TransferFunction_real_old()
+	{
+		gsl_spline_free (spline);
+		gsl_interp_accel_free (acc);
+	}
+	
+	static double call_wrapper( double k, void *arg )
+	{
+		//return ptf->compute( k );
+		return ptf->compute( k )*k*pow(k,0.5*nspec);
+		//return ptf->compute(k);
+	}
+	
+	static double call_wrapper2( double k, void *arg )
+	{
+		//return ptf->compute( k );
+		double r = ((double*)arg)[0];
+		return ptf->compute( k )*k*pow(k,0.5*nspec) *sin(k*r);
+		//return ptf->compute(k);
+	}
+	
+	real_t compute_real( real_t r2 )
+	{
+		if( r2 < 1e-2 )
+			return 0.0;
+		return pow(10.0,gsl_spline_eval (spline, log10(r2), acc));
+	}
+};
+#endif
+
+
+
+#if 0
+//! Implementation of class TransferFunction_tab for the BBKS transfer function 
+/*!
+    
+*/
+class TransferFunction_tab : public TransferFunction{
+private:
+  //Cosmology m_Cosmology;
+
+  std::string m_filename;
+  std::vector<real_t> m_tab_k;
+  std::vector<real_t> m_tab_Tk;
+	
+
+  void read_table( void ){
+#ifdef WITH_MPI
+    if( MPI::COMM_WORLD.Get_rank() == 0 ){
+#endif
+      std::cerr << " - reading tabulated transfer function from file \'"
+                << m_filename << "\'\n";
+      std::string line;
+      std::ifstream ifs( m_filename.c_str() );
+      
+      m_tab_k.clear();
+      m_tab_Tk.clear();
+      
+      while( !ifs.eof() ){
+        getline(ifs,line);
+        std::stringstream ss(line);
+        
+        real_t k, Tk;
+        ss >> k;
+        ss >> Tk;
+        
+        m_tab_k.push_back( k );
+        m_tab_Tk.push_back( Tk );
+        
+      }
+#ifdef WITH_MPI
+    }
+    
+    unsigned n=m_tab_k.size();
+    MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 );
+    
+    if( MPI::COMM_WORLD.Get_rank() > 0 ){
+      m_tab_k.assign(n,0);
+      m_tab_Tk.assign(n,0);
+    }
+#ifdef SINGLE_PRECISION    
+    MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_FLOAT, 0 );
+    MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_FLOAT, 0 );
+#else
+	  MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_DOUBLE, 0 );
+	  MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_DOUBLE, 0 );
+#endif
+#endif
+    
+  }
+
+public:
+  TransferFunction_tab( Cosmology aCosm, std::string filename )
+  : TransferFunction( aCosm ), m_filename( filename )
+  {
+    read_table();
+  }
+  
+  virtual inline real_t compute( real_t k ){
+    
+    return linint( k, m_tab_k, m_tab_Tk );
+  }
+  
+  inline real_t get_kmin( void ){
+    return m_tab_k[1];
+  }
+  
+  inline real_t get_kmax( void ){
+    return m_tab_k[m_tab_k.size()-2];
+  }
+  
+};
+
+class TransferFunction_CAMB : public TransferFunction{
+public:
+	enum TFtype{
+		TF_baryon, TF_cdm, TF_total
+	};
+	
+	
+private:
+	//Cosmology m_Cosmology;
+	
+	std::string m_filename_Pk, m_filename_Tk;
+	std::vector<real_t> m_tab_k, m_tab_Tk;
+	
+	Spline_interp *m_psinterp;
+	gsl_interp_accel *acc;
+	gsl_spline *spline;
+	
+	
+	
+	void read_table( TFtype iwhich ){
+#ifdef WITH_MPI
+		if( MPI::COMM_WORLD.Get_rank() == 0 ){
+#endif
+			std::cerr 
+				<< " - reading tabulated transfer function data from file \n"
+				<< "    \'" << m_filename_Tk << "\'\n";
+			
+			std::string line;
+			std::ifstream ifs( m_filename_Tk.c_str() );
+			
+			m_tab_k.clear();
+			m_tab_Tk.clear();
+			
+			while( !ifs.eof() ){
+				getline(ifs,line);
+				
+				if(ifs.eof()) break;
+				
+				std::stringstream ss(line);
+				
+				real_t k, Tkc, Tkb, Tkg, Tkr, Tknu, Tktot;
+				ss >> k;
+				ss >> Tkc;
+				ss >> Tkb;
+				ss >> Tkg;
+				ss >> Tkr;
+				ss >> Tknu;
+				ss >> Tktot;
+				
+				m_tab_k.push_back( log10(k) );
+				
+				switch( iwhich ){
+					case TF_total:
+						m_tab_Tk.push_back( log10(Tktot) );
+						break;
+					case TF_baryon:
+						m_tab_Tk.push_back( log10(Tkb) );
+						break;
+					case TF_cdm:
+						m_tab_Tk.push_back( log10(Tkc) );
+						break;
+				}
+				//m_tab_Tk_cdm.push_back( Tkc );
+				//m_tab_Tk_baryon.push_back( Tkb );
+				//m_tab_Tk_tot.push_back( Tktot );
+			}
+			
+			ifs.close();
+			
+			
+			
+			
+#ifdef WITH_MPI
+		}
+		
+		unsigned n=m_tab_k.size();
+		MPI::COMM_WORLD.Bcast( &n, 1, MPI_UNSIGNED, 0 );
+		
+		if( MPI::COMM_WORLD.Get_rank() > 0 ){
+			m_tab_k.assign(n,0);
+			m_tab_Tk.assign(n,0);
+		}
+#ifdef SINGLE_PRECISION		
+		MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_FLOAT, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_FLOAT, 0 );
+#else
+		MPI::COMM_WORLD.Bcast( &m_tab_k[0],  n, MPI_DOUBLE, 0 );
+		MPI::COMM_WORLD.Bcast( &m_tab_Tk[0], n, MPI_DOUBLE, 0 );
+#endif
+#endif
+		
+	}
+	
+public:
+	TransferFunction_CAMB( Cosmology aCosm, std::string filename_Tk, TFtype iwhich )
+	: TransferFunction( aCosm ), m_filename_Tk( filename_Tk ), m_psinterp( NULL )
+	{
+		read_table( iwhich );
+		
+		//m_psinterp = new Spline_interp( m_tab_k, m_tab_Tk );
+		
+		acc = gsl_interp_accel_alloc();
+		//spline = gsl_spline_alloc( gsl_interp_cspline, m_tab_k.size() );
+		spline = gsl_spline_alloc( gsl_interp_akima, m_tab_k.size() );
+		
+		gsl_spline_init (spline, &m_tab_k[0], &m_tab_Tk[0], m_tab_k.size() );
+		
+	}
+	
+	virtual ~TransferFunction_CAMB()
+	{
+		//delete m_psinterp;
+		
+		gsl_spline_free (spline);
+		gsl_interp_accel_free (acc);
+	}
+	
+	virtual inline real_t compute( real_t k ){
+		//real_t Tkc = linint( k, m_tab_k, m_tab_Tk_cdm );
+		//real_t Tkb = linint( k, m_tab_k, m_tab_Tk_baryon );
+		
+		//return pow(10.0, linint( log10(k), m_tab_k, m_tab_Tk ));
+		
+		//return pow(10.0, m_psinterp->interp(log10(k)) );
+		
+		//if( k<get_kmin() )
+		//	return pow(10.0,m_tab_k[1]);
+		
+		return pow(10.0, gsl_spline_eval (spline, log10(k), acc) );
+	}
+	
+	inline real_t get_kmin( void ){
+		return pow(10.0,m_tab_k[1]);
+	}
+	
+	inline real_t get_kmax( void ){
+		return pow(10.0,m_tab_k[m_tab_k.size()-2]);
+	}
+	
+};
+
+
+
+
+//! Implementation of abstract base class TransferFunction for the Eisenstein & Hu transfer function 
+/*!
+    This class implements the analytical fit to the matter transfer
+    function by Eisenstein & Hu (1999).
+*/
+class TransferFunction_Eisenstein : public TransferFunction{
+protected:
+  //Cosmology m_Cosmology;
+  double  m_h0;
+  double	omhh,		/* Omega_matter*h^2 */
+	obhh,		/* Omega_baryon*h^2 */
+	theta_cmb,	/* Tcmb in units of 2.7 K */
+	z_equality,	/* Redshift of matter-radiation equality, really 1+z */
+	k_equality,	/* Scale of equality, in Mpc^-1 */
+	z_drag,		/* Redshift of drag epoch */
+	R_drag,		/* Photon-baryon ratio at drag epoch */
+	R_equality,	/* Photon-baryon ratio at equality epoch */
+	sound_horizon,	/* Sound horizon at drag epoch, in Mpc */
+	k_silk,		/* Silk damping scale, in Mpc^-1 */
+	alpha_c,	/* CDM suppression */
+	beta_c,		/* CDM log shift */
+	alpha_b,	/* Baryon suppression */
+	beta_b,		/* Baryon envelope shift */
+	beta_node,	/* Sound horizon shift */
+	k_peak,		/* Fit to wavenumber of first peak, in Mpc^-1 */
+	sound_horizon_fit,	/* Fit to sound horizon, in Mpc */
+	alpha_gamma;	/* Gamma suppression in approximate TF */
+
+  //! private member function: sets internal quantities for Eisenstein & Hu fitting
+  void TFset_parameters(double omega0hh, double f_baryon, double Tcmb)
+  /* Set all the scalars quantities for Eisenstein & Hu 1997 fitting formula */
+  /* Input: omega0hh -- The density of CDM and baryons, in units of critical dens,
+     multiplied by the square of the Hubble constant, in units
+     of 100 km/s/Mpc */
+  /* 	  f_baryon -- The fraction of baryons to CDM */
+  /*        Tcmb -- The temperature of the CMB in Kelvin.  Tcmb<=0 forces use
+	    of the COBE value of  2.728 K. */
+  /* Output: Nothing, but set many global variables used in TFfit_onek(). 
+     You can access them yourself, if you want. */
+  /* Note: Units are always Mpc, never h^-1 Mpc. */
+  {
+    double z_drag_b1, z_drag_b2;
+    double alpha_c_a1, alpha_c_a2, beta_c_b1, beta_c_b2, alpha_b_G, y;
+	  
+    if (f_baryon<=0.0 || omega0hh<=0.0) {
+      fprintf(stderr, "TFset_parameters(): Illegal input.\n");
+      exit(1);
+    }
+    omhh = omega0hh;
+    obhh = omhh*f_baryon;
+    if (Tcmb<=0.0) Tcmb=2.728;	/* COBE FIRAS */
+    theta_cmb = Tcmb/2.7;
+    
+    z_equality = 2.50e4*omhh/POW4(theta_cmb);  /* Really 1+z */
+    k_equality = 0.0746*omhh/SQR(theta_cmb);
+
+    z_drag_b1 = 0.313*pow((double)omhh,-0.419)*(1+0.607*pow((double)omhh,0.674));
+    z_drag_b2 = 0.238*pow((double)omhh,0.223);
+    z_drag = 1291*pow(omhh,0.251)/(1+0.659*pow((double)omhh,0.828))*
+		(1+z_drag_b1*pow((double)obhh,(double)z_drag_b2));
+    
+    R_drag = 31.5*obhh/POW4(theta_cmb)*(1000/(1+z_drag));
+    R_equality = 31.5*obhh/POW4(theta_cmb)*(1000/z_equality);
+
+    sound_horizon = 2./3./k_equality*sqrt(6./R_equality)*
+	    log((sqrt(1+R_drag)+sqrt(R_drag+R_equality))/(1+sqrt(R_equality)));
+
+    k_silk = 1.6*pow((double)obhh,0.52)*pow((double)omhh,0.73)*(1+pow((double)10.4*omhh,-0.95));
+
+    alpha_c_a1 = pow((double)46.9*omhh,0.670)*(1+pow(32.1*omhh,-0.532));
+    alpha_c_a2 = pow((double)12.0*omhh,0.424)*(1+pow(45.0*omhh,-0.582));
+    alpha_c = pow(alpha_c_a1,-f_baryon)*
+		pow(alpha_c_a2,-CUBE(f_baryon));
+    
+    beta_c_b1 = 0.944/(1+pow(458*omhh,-0.708));
+    beta_c_b2 = pow(0.395*omhh, -0.0266);
+    beta_c = 1.0/(1+beta_c_b1*(pow(1-f_baryon, beta_c_b2)-1));
+
+    y = z_equality/(1+z_drag);
+    alpha_b_G = y*(-6.*sqrt(1+y)+(2.+3.*y)*log((sqrt(1+y)+1)/(sqrt(1+y)-1)));
+    alpha_b = 2.07*k_equality*sound_horizon*pow(1+R_drag,-0.75)*alpha_b_G;
+
+    beta_node = 8.41*pow(omhh, 0.435);
+    beta_b = 0.5+f_baryon+(3.-2.*f_baryon)*sqrt(pow(17.2*omhh,2.0)+1);
+
+    k_peak = 2.5*3.14159*(1+0.217*omhh)/sound_horizon;
+    sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1+10.0*pow(obhh,0.75));
+
+    alpha_gamma = 1-0.328*log(431.0*omhh)*f_baryon + 0.38*log(22.3*omhh)*
+		SQR(f_baryon);
+    
+    return;
+  }
+
+  //! private member function: computes transfer function for mode k (k in Mpc)
+  inline double TFfit_onek(double k, double *tf_baryon, double *tf_cdm)
+  /* Input: k -- Wavenumber at which to calculate transfer function, in Mpc^-1.
+   *tf_baryon, *tf_cdm -- Input value not used; replaced on output if
+   the input was not NULL. */
+  /* Output: Returns the value of the full transfer function fitting formula.
+     This is the form given in Section 3 of Eisenstein & Hu (1997).
+     *tf_baryon -- The baryonic contribution to the full fit.
+     *tf_cdm -- The CDM contribution to the full fit. */
+  /* Notes: Units are Mpc, not h^-1 Mpc. */
+  {
+    double T_c_ln_beta, T_c_ln_nobeta, T_c_C_alpha, T_c_C_noalpha;
+    double q, xx, xx_tilde;//, q_eff;
+    double T_c_f, T_c, s_tilde, T_b_T0, T_b, f_baryon, T_full;
+    //double T_0_L0, T_0_C0, T_0, gamma_eff; 
+    //double T_nowiggles_L0, T_nowiggles_C0, T_nowiggles;
+
+    k = fabs(k);	/* Just define negative k as positive */
+    if (k==0.0) {
+	if (tf_baryon!=NULL) *tf_baryon = 1.0;
+	if (tf_cdm!=NULL) *tf_cdm = 1.0;
+	return 1.0;
+    }
+
+    q = k/13.41/k_equality;
+    xx = k*sound_horizon;
+
+    T_c_ln_beta = log(2.718282+1.8*beta_c*q);
+    T_c_ln_nobeta = log(2.718282+1.8*q);
+    T_c_C_alpha = 14.2/alpha_c + 386.0/(1+69.9*pow(q,1.08));
+    T_c_C_noalpha = 14.2 + 386.0/(1+69.9*pow(q,1.08));
+
+    T_c_f = 1.0/(1.0+POW4(xx/5.4));
+    T_c = T_c_f*T_c_ln_beta/(T_c_ln_beta+T_c_C_noalpha*SQR(q)) +
+	    (1-T_c_f)*T_c_ln_beta/(T_c_ln_beta+T_c_C_alpha*SQR(q));
+    
+    s_tilde = sound_horizon*pow(1+CUBE(beta_node/xx),-1./3.);
+    xx_tilde = k*s_tilde;
+
+    T_b_T0 = T_c_ln_nobeta/(T_c_ln_nobeta+T_c_C_noalpha*SQR(q));
+    T_b = sin(xx_tilde)/(xx_tilde)*(T_b_T0/(1+SQR(xx/5.2))+
+		alpha_b/(1+CUBE(beta_b/xx))*exp(-pow(k/k_silk,1.4)));
+    
+    f_baryon = obhh/omhh;
+    T_full = f_baryon*T_b + (1-f_baryon)*T_c;
+
+    /* Now to store these transfer functions */
+    if (tf_baryon!=NULL) *tf_baryon = T_b;
+    if (tf_cdm!=NULL) *tf_cdm = T_c;
+    return T_full;
+}
+
+public:
+  //! Constructor for Eisenstein & Hu fitting for transfer function
+  /*!
+    \param aCosm structure of type Cosmology carrying the cosmological parameters
+    \param Tcmb mean temperature of the CMB fluctuations (defaults to
+    Tcmb = 2.726 if not specified)
+  */
+  TransferFunction_Eisenstein( Cosmology aCosm, double Tcmb = 2.726 )
+    :  TransferFunction(aCosm), m_h0( aCosm.H0*0.01 )
+  {
+    TFset_parameters( (aCosm.Omega_m)*aCosm.H0*aCosm.H0*(0.01*0.01), 
+		      aCosm.Omega_b/(aCosm.Omega_m-aCosm.Omega_b),//-aCosm.Omega_b), 
+		    Tcmb);
+  }
+
+  //! Computes the transfer function for k in Mpc/h by calling TFfit_onek
+  virtual inline real_t compute( real_t k ){
+    double tfb, tfcdm, fb, fc; //, tfull
+    TFfit_onek( k*m_h0, &tfb, &tfcdm );
+    
+    fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m);
+    fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ;
+    
+    return fb*tfb+fc*tfcdm;
+    //return 1.0;
+  }
+	
+	
+  inline void compute( real_t k, real_t &tf_dm, real_t &tf_baryon ){
+	double tfb, tfcdm, fb, fc; //, tfull
+	TFfit_onek( k*m_h0, &tfb, &tfcdm );
+	  
+	fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m);
+	fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ;
+	  
+	tf_dm = fc*tfcdm;
+	tf_baryon = fb*tfb;
+  }
+  
+	
+  inline real_t get_kmin( void ){
+    return 0.0;
+  }
+  
+  inline real_t get_kmax( void ){
+    return 1.e3;
+  }
+
+};
+
+
+
+class TransferFunction_EisensteinWDM : public TransferFunction_Eisenstein
+{
+protected:
+	using TransferFunction_Eisenstein::TFfit_onek;
+	real_t m_WDMalpha; 
+	
+public:
+	TransferFunction_EisensteinWDM( Cosmology aCosm, double Tcmb = 2.726 )
+	: TransferFunction_Eisenstein(aCosm,Tcmb)
+	{
+		m_WDMalpha = 0.05 * pow( aCosm.Omega_m/0.4,0.15)
+					*pow(aCosm.H0/0.65,1.3)*pow(aCosm.WDMmass,-1.15)
+					*pow(1.5/aCosm.WDMg_x,0.29);
+	}
+	
+	inline real_t compute( real_t k )
+	{
+		double tfb, tfcdm, fb, fc; //, tfull
+		TFfit_onek( k*m_h0, &tfb, &tfcdm );
+		
+		fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m);
+		fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ;
+	
+		double tf = fb*tfb+fc*tfcdm;
+		
+		return tf*pow(1.0+(m_WDMalpha*k)*(m_WDMalpha*k),-5.0);
+		
+	}
+	
+};
+
+
+
+
+class TransferFunction_EisensteinNeutrino : public TransferFunction
+{
+	/* Fitting Formulae for CDM + Baryon + Massive Neutrino (MDM) cosmologies. */
+	/* Daniel J. Eisenstein & Wayne Hu, Institute for Advanced Study */
+	
+	/* There are two primary routines here, one to set the cosmology, the
+	 other to construct the transfer function for a single wavenumber k. 
+	 You should call the former once (per cosmology) and the latter as 
+	 many times as you want. */
+	
+	/* TFmdm_set_cosm() -- User passes all the cosmological parameters as
+	 arguments; the routine sets up all of the scalar quantites needed 
+	 computation of the fitting formula.  The input parameters are: 
+	 1) omega_matter -- Density of CDM, baryons, and massive neutrinos,
+	 in units of the critical density. 
+	 2) omega_baryon -- Density of baryons, in units of critical. 
+	 3) omega_hdm    -- Density of massive neutrinos, in units of critical 
+	 4) degen_hdm    -- (Int) Number of degenerate massive neutrino species 
+	 5) omega_lambda -- Cosmological constant 
+	 6) hubble       -- Hubble constant, in units of 100 km/s/Mpc 
+	 7) redshift     -- The redshift at which to evaluate */
+	
+	/* TFmdm_onek_mpc() -- User passes a single wavenumber, in units of Mpc^-1.
+	 Routine returns the transfer function from the Eisenstein & Hu
+	 fitting formula, based on the cosmology currently held in the 
+	 internal variables.  The routine returns T_cb (the CDM+Baryon
+	 density-weighted transfer function), although T_cbn (the CDM+
+	 Baryon+Neutrino density-weighted transfer function) is stored
+	 in the global variable tf_cbnu. */
+	
+	/* We also supply TFmdm_onek_hmpc(), which is identical to the previous
+	 routine, but takes the wavenumber in units of h Mpc^-1. */
+	
+	/* We hold the internal scalar quantities in global variables, so that
+	 the user may access them in an external program, via "extern" declarations. */
+	
+	/* Please note that all internal length scales are in Mpc, not h^-1 Mpc! */
+	
+	/* -------------------------- Prototypes ----------------------------- */
+	
+/*	int TFmdm_set_cosm(float omega_matter, float omega_baryon, float omega_hdm,
+					   int degen_hdm, float omega_lambda, float hubble, float redshift);
+	float TFmdm_onek_mpc(float kk);
+	float TFmdm_onek_hmpc(float kk);
+*/	
+	
+	/* ------------------------- Global Variables ------------------------ */
+	
+	/* The following are set in TFmdm_set_cosm() */
+	float   alpha_gamma,	/* sqrt(alpha_nu) */
+	alpha_nu,	/* The small-scale suppression */
+	beta_c,		/* The correction to the log in the small-scale */
+	num_degen_hdm,	/* Number of degenerate massive neutrino species */
+	f_baryon,	/* Baryon fraction */
+	f_bnu,		/* Baryon + Massive Neutrino fraction */
+	f_cb,		/* Baryon + CDM fraction */
+	f_cdm,		/* CDM fraction */
+	f_hdm,		/* Massive Neutrino fraction */
+	growth_k0,	/* D_1(z) -- the growth function as k->0 */
+	growth_to_z0,	/* D_1(z)/D_1(0) -- the growth relative to z=0 */
+	hhubble,	/* Need to pass Hubble constant to TFmdm_onek_hmpc() */
+	k_equality,	/* The comoving wave number of the horizon at equality*/
+	obhh,		/* Omega_baryon * hubble^2 */
+	omega_curv,	/* = 1 - omega_matter - omega_lambda */
+	omega_lambda_z, /* Omega_lambda at the given redshift */
+	omega_matter_z,	/* Omega_matter at the given redshift */
+	omhh,		/* Omega_matter * hubble^2 */
+	onhh,		/* Omega_hdm * hubble^2 */
+	p_c,		/* The correction to the exponent before drag epoch */
+	p_cb,		/* The correction to the exponent after drag epoch */
+	sound_horizon_fit,  /* The sound horizon at the drag epoch */
+	theta_cmb,	/* The temperature of the CMB, in units of 2.7 K */
+	y_drag,		/* Ratio of z_equality to z_drag */
+	z_drag,		/* Redshift of the drag epoch */
+	z_equality;	/* Redshift of matter-radiation equality */
+	
+	/* The following are set in TFmdm_onek_mpc() */
+	float	gamma_eff,	/* Effective \Gamma */
+	growth_cb,	/* Growth factor for CDM+Baryon perturbations */
+	growth_cbnu,	/* Growth factor for CDM+Baryon+Neutrino pert. */
+	max_fs_correction,  /* Correction near maximal free streaming */
+	qq,		/* Wavenumber rescaled by \Gamma */
+	qq_eff,		/* Wavenumber rescaled by effective Gamma */
+	qq_nu,		/* Wavenumber compared to maximal free streaming */
+	tf_master,	/* Master TF */
+	tf_sup,		/* Suppressed TF */
+	y_freestream; 	/* The epoch of free-streaming for a given scale */
+	
+	/* Finally, TFmdm_onek_mpc() and TFmdm_onek_hmpc() give their answers as */
+	float   tf_cb,		/* The transfer function for density-weighted
+						 CDM + Baryon perturbations. */
+	tf_cbnu;	/* The transfer function for density-weighted
+				 CDM + Baryon + Massive Neutrino perturbations. */
+	
+	/* By default, these functions return tf_cb */
+	
+	/* ------------------------- TFmdm_set_cosm() ------------------------ */
+	int TFmdm_set_cosm(float omega_matter, float omega_baryon, float omega_hdm,
+					   int degen_hdm, float omega_lambda, float hubble, float redshift)
+	/* This routine takes cosmological parameters and a redshift and sets up
+	 all the internal scalar quantities needed to compute the transfer function. */
+	/* INPUT: omega_matter -- Density of CDM, baryons, and massive neutrinos,
+	 in units of the critical density. */
+	/* 	  omega_baryon -- Density of baryons, in units of critical. */
+	/* 	  omega_hdm    -- Density of massive neutrinos, in units of critical */
+	/* 	  degen_hdm    -- (Int) Number of degenerate massive neutrino species */
+	/*        omega_lambda -- Cosmological constant */
+	/* 	  hubble       -- Hubble constant, in units of 100 km/s/Mpc */
+	/*        redshift     -- The redshift at which to evaluate */
+	/* OUTPUT: Returns 0 if all is well, 1 if a warning was issued.  Otherwise,
+	 sets many global variables for use in TFmdm_onek_mpc() */
+	{
+		float z_drag_b1, z_drag_b2, omega_denom;
+		int qwarn;
+		qwarn = 0;
+		
+		theta_cmb = 2.728/2.7;	/* Assuming T_cmb = 2.728 K */
+		
+		/* Look for strange input */
+		if (omega_baryon<0.0) {
+			fprintf(stderr,
+					"TFmdm_set_cosm(): Negative omega_baryon set to trace amount.\n");
+			qwarn = 1;
+		}
+		if (omega_hdm<0.0) {
+			fprintf(stderr,
+					"TFmdm_set_cosm(): Negative omega_hdm set to trace amount.\n");
+			qwarn = 1;
+		}
+		if (hubble<=0.0) {
+			fprintf(stderr,"TFmdm_set_cosm(): Negative Hubble constant illegal.\n");
+			exit(1);  /* Can't recover */
+		} else if (hubble>2.0) {
+			fprintf(stderr,"TFmdm_set_cosm(): Hubble constant should be in units of 100 km/s/Mpc.\n");
+			qwarn = 1;
+		}
+		if (redshift<=-1.0) {
+			fprintf(stderr,"TFmdm_set_cosm(): Redshift < -1 is illegal.\n");
+			exit(1);
+		} else if (redshift>99.0) {
+			fprintf(stderr,
+					"TFmdm_set_cosm(): Large redshift entered.  TF may be inaccurate.\n");
+			qwarn = 1;
+		}
+		if (degen_hdm<1) degen_hdm=1;
+		num_degen_hdm = (float) degen_hdm;	
+		/* Have to save this for TFmdm_onek_mpc() */
+		/* This routine would crash if baryons or neutrinos were zero, 
+		 so don't allow that */
+		if (omega_baryon<=0) omega_baryon=1e-5;
+		if (omega_hdm<=0) omega_hdm=1e-5;
+		
+		omega_curv = 1.0-omega_matter-omega_lambda;
+		omhh = omega_matter*SQR(hubble);
+		obhh = omega_baryon*SQR(hubble);
+		onhh = omega_hdm*SQR(hubble);
+		f_baryon = omega_baryon/omega_matter;
+		f_hdm = omega_hdm/omega_matter;
+		f_cdm = 1.0-f_baryon-f_hdm;
+		f_cb = f_cdm+f_baryon;
+		f_bnu = f_baryon+f_hdm;
+		
+		/* Compute the equality scale. */
+		z_equality = 25000.0*omhh/SQR(SQR(theta_cmb));	/* Actually 1+z_eq */
+		k_equality = 0.0746*omhh/SQR(theta_cmb);
+		
+		/* Compute the drag epoch and sound horizon */
+		z_drag_b1 = 0.313*pow(omhh,-0.419)*(1+0.607*pow(omhh,0.674));
+		z_drag_b2 = 0.238*pow(omhh,0.223);
+		z_drag = 1291*pow(omhh,0.251)/(1.0+0.659*pow(omhh,0.828))*
+		(1.0+z_drag_b1*pow(obhh,z_drag_b2));
+		y_drag = z_equality/(1.0+z_drag);
+		
+		sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1.0+10.0*pow(obhh,0.75));
+		
+		/* Set up for the free-streaming & infall growth function */
+		p_c = 0.25*(5.0-sqrt(1+24.0*f_cdm));
+		p_cb = 0.25*(5.0-sqrt(1+24.0*f_cb));
+		
+		omega_denom = omega_lambda+SQR(1.0+redshift)*(omega_curv+
+													  omega_matter*(1.0+redshift));
+		omega_lambda_z = omega_lambda/omega_denom;
+		omega_matter_z = omega_matter*SQR(1.0+redshift)*(1.0+redshift)/omega_denom;
+		growth_k0 = z_equality/(1.0+redshift)*2.5*omega_matter_z/
+	    (pow(omega_matter_z,4.0/7.0)-omega_lambda_z+
+		 (1.0+omega_matter_z/2.0)*(1.0+omega_lambda_z/70.0));
+		growth_to_z0 = z_equality*2.5*omega_matter/(pow(omega_matter,4.0/7.0)
+													-omega_lambda + (1.0+omega_matter/2.0)*(1.0+omega_lambda/70.0));
+		growth_to_z0 = growth_k0/growth_to_z0;	
+		
+		/* Compute small-scale suppression */
+		alpha_nu = f_cdm/f_cb*(5.0-2.*(p_c+p_cb))/(5.-4.*p_cb)*
+		pow(1+y_drag,p_cb-p_c)*
+		(1+f_bnu*(-0.553+0.126*f_bnu*f_bnu))/
+		(1-0.193*sqrt(f_hdm*num_degen_hdm)+0.169*f_hdm*pow(num_degen_hdm,0.2))*
+		(1+(p_c-p_cb)/2*(1+1/(3.-4.*p_c)/(7.-4.*p_cb))/(1+y_drag));
+		alpha_gamma = sqrt(alpha_nu);
+		beta_c = 1/(1-0.949*f_bnu);
+		/* Done setting scalar variables */
+		hhubble = hubble;	/* Need to pass Hubble constant to TFmdm_onek_hmpc() */
+		return qwarn;
+	}
+	
+	/* ---------------------------- TFmdm_onek_mpc() ---------------------- */
+	
+	float TFmdm_onek_mpc(float kk)
+	/* Given a wavenumber in Mpc^-1, return the transfer function for the
+	 cosmology held in the global variables. */
+	/* Input: kk -- Wavenumber in Mpc^-1 */
+	/* Output: The following are set as global variables:
+	 growth_cb -- the transfer function for density-weighted
+	 CDM + Baryon perturbations. 
+	 growth_cbnu -- the transfer function for density-weighted
+	 CDM + Baryon + Massive Neutrino perturbations. */
+	/* The function returns growth_cb */
+	{
+		float tf_sup_L, tf_sup_C;
+		float temp1, temp2;
+		
+		qq = kk/omhh*SQR(theta_cmb);
+		
+		/* Compute the scale-dependent growth functions */
+		y_freestream = 17.2*f_hdm*(1+0.488*pow(f_hdm,-7.0/6.0))*
+		SQR(num_degen_hdm*qq/f_hdm);
+		temp1 = pow(growth_k0, 1.0-p_cb);
+		temp2 = pow(growth_k0/(1+y_freestream),0.7);
+		growth_cb = pow(1.0+temp2, p_cb/0.7)*temp1;
+		growth_cbnu = pow(pow(f_cb,0.7/p_cb)+temp2, p_cb/0.7)*temp1;
+		
+		/* Compute the master function */
+		gamma_eff =omhh*(alpha_gamma+(1-alpha_gamma)/
+						 (1+SQR(SQR(kk*sound_horizon_fit*0.43))));
+		qq_eff = qq*omhh/gamma_eff;
+		
+		tf_sup_L = log(2.71828+1.84*beta_c*alpha_gamma*qq_eff);
+		tf_sup_C = 14.4+325/(1+60.5*pow(qq_eff,1.11));
+		tf_sup = tf_sup_L/(tf_sup_L+tf_sup_C*SQR(qq_eff));
+		
+		qq_nu = 3.92*qq*sqrt(num_degen_hdm/f_hdm);
+		max_fs_correction = 1+1.2*pow(f_hdm,0.64)*pow(num_degen_hdm,0.3+0.6*f_hdm)/
+		(pow(qq_nu,-1.6)+pow(qq_nu,0.8));
+		tf_master = tf_sup*max_fs_correction;
+		
+		/* Now compute the CDM+HDM+baryon transfer functions */
+		tf_cb = tf_master*growth_cb/growth_k0;
+		tf_cbnu = tf_master*growth_cbnu/growth_k0;
+		return tf_cb;
+	}
+	
+	/* ---------------------------- TFmdm_onek_hmpc() ---------------------- */
+	
+	float TFmdm_onek_hmpc(float kk)
+	/* Given a wavenumber in h Mpc^-1, return the transfer function for the
+	 cosmology held in the global variables. */
+	/* Input: kk -- Wavenumber in h Mpc^-1 */
+	/* Output: The following are set as global variables:
+	 growth_cb -- the transfer function for density-weighted
+	 CDM + Baryon perturbations. 
+	 growth_cbnu -- the transfer function for density-weighted
+	 CDM + Baryon + Massive Neutrino perturbations. */
+	/* The function returns growth_cb */
+	{
+		return TFmdm_onek_mpc(kk*hhubble);
+	}
+	
+	
+	
+	TransferFunction_EisensteinNeutrino( Cosmology aCosm, real_t Omega_HDM, int degen_HDM, real_t Tcmb = 2.726 )
+    :  TransferFunction(aCosm)//, m_h0( aCosm.H0*0.01 )
+	{
+		TFmdm_set_cosm( aCosm.Omega_m, aCosm.Omega_b, Omega_HDM, degen_HDM, aCosm.Omega_L, aCosm.H0, aCosm.astart );
+		
+	}
+	
+	//! Computes the transfer function for k in Mpc/h by calling TFfit_onek
+	virtual inline real_t compute( real_t k ){
+		return TFmdm_onek_hmpc( k );
+		
+		
+		/*real_t tfb, tfcdm, fb, fc; //, tfull
+		TFfit_onek( k*m_h0, &tfb, &tfcdm );
+		
+		fb = m_Cosmology.Omega_b/(m_Cosmology.Omega_m);
+		fc = (m_Cosmology.Omega_m-m_Cosmology.Omega_b)/(m_Cosmology.Omega_m) ;
+		
+		return fb*tfb+fc*tfcdm;*/
+		//return 1.0;
+	}
+	
+	
+};
+
+#endif
+
+#endif