diff --git a/.gitignore b/.gitignore
index 60035a0..bcbdff2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,56 +1,14 @@
-build
+.DS_Store
 .vscode
-src/CMakeFiles/3.12.2/CompilerIdC/CMakeCCompilerId.c
-src/CMakeFiles/feature_tests.c
-src/CMakeFiles/feature_tests.cxx
-src/CMakeFiles/progress.marks
-src/CMakeFiles/3.12.2/CMakeCCompiler.cmake
-src/CMakeFiles/3.12.2/CMakeCXXCompiler.cmake
-src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_C.bin
-src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_CXX.bin
-src/CMakeFiles/3.12.2/CMakeSystem.cmake
-src/CMakeFiles/fastLPT.dir/build.make
-src/CMakeFiles/FindMPI/test_mpi.cpp
-src/CMakeFiles/FindMPI/test_mpi_C.bin
-src/CMakeFiles/FindMPI/test_mpi_CXX.bin
-src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.c
-src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.cpp
-src/CMakeFiles/FindOpenMP/OpenMPTryFlag.c
-src/CMakeFiles/FindOpenMP/OpenMPTryFlag.cpp
-src/CMakeFiles/FindOpenMP/ompver_C.bin
-src/CMakeFiles/FindOpenMP/ompver_CXX.bin
-src/CMakeFiles/fastLPT.dir/CXX.includecache
-src/CMakeFiles/fastLPT.dir/DependInfo.cmake
-src/CMakeFiles/fastLPT.dir/plugins/transfer_eisenstein.cc.o
-src/CMakeFiles/3.12.2/CompilerIdCXX/a.out
-src/CMakeFiles/fastLPT.dir/cmake_clean.cmake
-src/CMakeFiles/fastLPT.dir/depend.internal
-src/CMakeFiles/fastLPT.dir/depend.make
-src/CMakeFiles/fastLPT.dir/flags.make
-src/CMakeFiles/fastLPT.dir/grid_fft.cc.o
-src/CMakeFiles/fastLPT.dir/link.txt
-src/CMakeFiles/fastLPT.dir/logger.cc.o
-src/CMakeFiles/fastLPT.dir/main.cc.o
-src/CMakeFiles/fastLPT.dir/progress.make
-src/CMakeFiles/fastLPT.dir/random_plugin.cc.o
-src/CMakeFiles/fastLPT.dir/transfer_function_plugin.cc.o
-src/CMakeFiles/fastLPT.dir/plugins/random_music.cc.o
-src/CMakeFiles/fastLPT.dir/plugins/random_music_wnoise_generator.cc.o
-src/CMakeFiles/feature_tests.bin
-src/CMakeFiles/CMakeDirectoryInformation.cmake
-src/CMakeFiles/CMakeOutput.log
-src/CMakeFiles/Makefile.cmake
-src/CMakeFiles/Makefile2
-src/CMakeFiles/TargetDirectories.txt
-src/CMakeFiles/cmake.check_cache
-src/CMakeFiles/3.12.2/CompilerIdC/a.out
-src/CMakeFiles/3.12.2/CompilerIdCXX/CMakeCXXCompilerId.cpp
-src/CMakeFiles/hdf5/cmake_hdf5_test.c
-src/fastLPT.dSYM/Contents/Info.plist
-src/fastLPT.dSYM/Contents/Resources/DWARF/fastLPT
+build
+include/cmake_config.hh
+src/input_powerspec.txt
+CMakeCache.txt
+CMakeFiles/cmake.check_cache
+src/CMakeFiles
 src/cmake_install.cmake
 src/CMakeCache.txt
-src/fastLPT
-src/input_powerspec.txt
 src/Makefile
-.DS_Store
+external/panphasia/rand_base.mod
+external/panphasia/rand_int.mod
+external/panphasia/rand.mod
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 875fc91..be14271 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,16 +1,42 @@
 cmake_minimum_required(VERSION 3.9)
 set(PRGNAME monofonIC)
-project(monofonIC)
 
+project(monofonIC C CXX)
+
+#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -Wall -pedantic" CACHE STRING "Flags used by the compiler during Release builds." FORCE)
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -march=native -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE)
+set(CMAKE_CXX_FLAGS_DEBUG "-g -O1 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE)
+set(CMAKE_CXX_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address " CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE)
+set(CMAKE_CXX_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE)
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "Flags used by the compiler during Release builds." FORCE)
+set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE)
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING "Flags used by the compiler during Debug builds." FORCE)
+set(CMAKE_C_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUGSANADD}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE)
+set(CMAKE_C_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUGSANUNDEF}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE)
+
+
+set(default_build_type "Release")
+if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+  message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
+  set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
+      STRING "Choose the type of build." FORCE)
+  # Set the possible values of build type for cmake-gui
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
+    "Debug" "Release" "RelWithDebInfo" "DebugSanAdd" "DebugSanUndef")
+endif()
+mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF)
+mark_as_advanced(CMAKE_C_FLAGS_DEBUGSANADD CMAKE_C_FLAGS_DEBUGSANUNDEF)
+mark_as_advanced(CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT)
+
+
+########################################################################################################################
 # include class submodule
 include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake)
 
-# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic")
 find_package(PkgConfig REQUIRED)
 
-set(CMAKE_MODULE_PATH
-        "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
+set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
 
 
 ########################################################################################################################
@@ -48,21 +74,70 @@ if(ENABLE_MPI)
   endif(MPI_CXX_FOUND)
 endif(ENABLE_MPI)
 
+########################################################################################################################
+# floating point precision
+set (
+  CODE_PRECISION "DOUBLE"
+  CACHE STRING "Floating point type used for internal computations and FFTs"
+)
+set_property (
+  CACHE CODE_PRECISION
+  PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE
+)
 
+########################################################################################################################
+# convolver type, right now only orszag or naive
+set (
+  CONVOLVER_TYPE "ORSZAG"
+  CACHE STRING "Convolution algorithm to be used (Naive=no dealiasing, Orszag=dealiased)"
+)
+set_property (
+  CACHE CONVOLVER_TYPE
+  PROPERTY STRINGS ORSZAG NAIVE
+)
+
+########################################################################################################################
+# PLT options, right now only on/off
+option(ENABLE_PLT "Enable PLT (particle linear theory) corrections" OFF)
+
+
+########################################################################################################################
 # FFTW
-cmake_policy(SET CMP0074 NEW)
+if(POLICY CMP0074)
+    cmake_policy(SET CMP0074 NEW)
+endif()
 if(ENABLE_MPI)
-  find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI)
+  find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS MPI)
 else()
-  find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS)
+  find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS)
 endif(ENABLE_MPI)
+mark_as_advanced(FFTW3_SINGLE_MPI_LIBRARY FFTW3_SINGLE_OPENMP_LIBRARY FFTW3_SINGLE_SERIAL_LIBRARY FFTW3_SINGLE_THREADS_LIBRARY)
+mark_as_advanced(FFTW3_DOUBLE_MPI_LIBRARY FFTW3_DOUBLE_OPENMP_LIBRARY FFTW3_DOUBLE_SERIAL_LIBRARY FFTW3_DOUBLE_THREADS_LIBRARY)
+mark_as_advanced(FFTW3_LONGDOUBLE_MPI_LIBRARY FFTW3_LONGDOUBLE_OPENMP_LIBRARY FFTW3_LONGDOUBLE_SERIAL_LIBRARY FFTW3_LONGDOUBLE_THREADS_LIBRARY)
+mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_MPI_INCLUDE_DIR)
+mark_as_advanced(pkgcfg_lib_PC_FFTW_fftw3)
 
+########################################################################################################################
 # GSL
 find_package(GSL REQUIRED)
+mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m)
 
+########################################################################################################################
 # HDF5
 find_package(HDF5 REQUIRED)
+mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz)
 
+########################################################################################################################
+# PANPHASIA
+option(ENABLE_PANPHASIA "Enable PANPHASIA random number generator" ON)
+if(ENABLE_PANPHASIA)
+enable_language(Fortran)
+if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Intel")
+  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -132 -implicit-none")
+elseif("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffixed-line-length-132 -fimplicit-none")
+endif()
+endif(ENABLE_PANPHASIA)
 ########################################################################################################################
 # INCLUDES
 include_directories(${PROJECT_SOURCE_DIR}/include)
@@ -81,28 +156,68 @@ file( GLOB PLUGINS
   ${PROJECT_SOURCE_DIR}/src/plugins/*.cc
 )
 
+if(ENABLE_PANPHASIA)
+list (APPEND SOURCES 
+  ${PROJECT_SOURCE_DIR}/external/panphasia/panphasia_routines.f
+  ${PROJECT_SOURCE_DIR}/external/panphasia/generic_lecuyer.f90
+)
+endif()
+
+# project configuration header
+configure_file(
+  ${PROJECT_SOURCE_DIR}/include/cmake_config.hh.in
+  ${PROJECT_SOURCE_DIR}/include/cmake_config.hh
+)
+
 add_executable(${PRGNAME} ${SOURCES} ${PLUGINS})
 target_setup_class(${PRGNAME})
 
-set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 17)
+set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14)
+
 
 # mpi flags
 if(MPI_CXX_FOUND)
-  if(FFTW3_DOUBLE_MPI_FOUND)
-    target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY})
-    target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
-    target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
-  endif(FFTW3_DOUBLE_MPI_FOUND)
+  if(CODE_PRECISION STREQUAL "FLOAT")
+    if(FFTW3_SINGLE_MPI_FOUND)
+      target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_MPI_LIBRARY})
+      target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
+      target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
+    else()
+      message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for single precision!")
+    endif()
+  elseif(CODE_PRECISION STREQUAL "DOUBLE")
+    if(FFTW3_DOUBLE_MPI_FOUND)
+      target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY})
+      target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
+      target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
+    else()
+      message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for double precision!")
+    endif()
+  elseif(CODE_PRECISION STREQUAL "LONGDOUBLE")
+    if(FFTW3_LONGDOUBLE_MPI_FOUND)
+      target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_MPI_LIBRARY})
+      target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
+      target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
+    else()
+      message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for long double precision!")
+    endif()
+  endif()
 
   target_include_directories(${PRGNAME} PRIVATE ${MPI_CXX_INCLUDE_PATH})
   target_compile_options(${PRGNAME} PRIVATE "-DUSE_MPI")
   target_link_libraries(${PRGNAME} ${MPI_LIBRARIES})
 endif(MPI_CXX_FOUND)
 
-if(FFTW3_DOUBLE_THREADS_FOUND) 
+if(CODE_PRECISION STREQUAL "FLOAT" AND FFTW3_SINGLE_THREADS_FOUND) 
+  target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_THREADS_LIBRARY})
+  target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
+elseif(CODE_PRECISION STREQUAL "DOUBLE" AND FFTW3_DOUBLE_THREADS_FOUND) 
   target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_THREADS_LIBRARY})
   target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
-endif(FFTW3_DOUBLE_THREADS_FOUND)
+elseif(CODE_PRECISION STREQUAL "LONGDOUBLE" AND FFTW3_LONGDOUBLE_THREADS_FOUND) 
+  target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_THREADS_LIBRARY})
+  target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
+endif()
 
 if(HDF5_FOUND)
   # target_link_libraries(${PRGNAME} ${HDF5_C_LIBRARY_DIRS})
@@ -111,6 +226,10 @@ if(HDF5_FOUND)
   target_compile_options(${PRGNAME} PRIVATE "-DUSE_HDF5")
 endif(HDF5_FOUND)
 
+if(ENABLE_PANPHASIA)
+target_compile_options(${PRGNAME} PRIVATE "-DUSE_PANPHASIA")
+endif(ENABLE_PANPHASIA)
+
 target_link_libraries(${PRGNAME} ${FFTW3_LIBRARIES})
 target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIRS})
 
diff --git a/README.md b/README.md
index e34dce2..3d3be7b 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ High order LPT/QPT tool for single resolution simulations
 ## Build Instructions
 Clone code including submodules (currently only CLASS is used as a submodule):
 
-    git clone --recurse-submodules https://ohahn@bitbucket.org/ohahn/monofonic.git
+    git clone --recurse-submodules https://<username>@bitbucket.org/ohahn/monofonic.git
 
 
 Create build directory, configure, and build:
@@ -17,4 +17,30 @@ Create build directory, configure, and build:
     make
 
 this should create an executable in the build directory. 
-There is an example parameter file 'example.conf' in the main directory
+
+If you run into problems with CMake not being able to find your local FFTW3 or HDF5 installation, it is best to give the path directly as
+
+    FFTW3_ROOT=<path> HDF5_ROOT=<path> ccmake ..
+
+make sure to delete previous files generated by CMake before reconfiguring like this.
+
+If you want to build on macOS, then it is strongly recommended to use GNU (or Intel) compilers instead of Apple's Clang. Install them e.g. 
+via homebrew and then configure cmake to use them instead of the macOS default compiler via
+
+    CC=gcc-9 CXX=g++-9 ccmake ..
+    
+This is necessary since Apple's compilers haven't supported OpenMP for years.
+
+## Running
+
+There is an example parameter file 'example.conf' in the main directory. Possible options are explained in it, it can be run
+as a simple argument, e.g. from within the build directory:
+
+     ./monofonic ../example.conf
+
+If you want to run with MPI, you need to enable MPI support via ccmake. Then you can launch in hybrid MPI+threads mode by 
+specifying the desired number of threads per task in the config file, and the number of tasks to be launched via
+
+     mpirun -np 16 ./monofonic <path to config file>
+     
+It will then run with 16 tasks times the number of threads per task specified in the config file.
\ No newline at end of file
diff --git a/example.conf b/example.conf
index 3b6d07e..073b887 100644
--- a/example.conf
+++ b/example.conf
@@ -1,58 +1,71 @@
 [setup]
 # number of grid cells per linear dimension for calculations = particles for sc initial load
-GridRes      = 128
+GridRes         = 128
 # length of the box in Mpc/h
-BoxLength    = 250
+BoxLength       = 125
 # starting redshift
-zstart       = 49.0
+zstart          = 49.0
 # order of the LPT to be used (1,2 or 3)
-LPTorder     = 3
+LPTorder        = 1
 # also do baryon ICs?
-DoBaryons    = no
+DoBaryons       = no
 # do mode fixing à la Angulo&Pontzen
-DoFixing     = no
+DoFixing        = yes
 # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
-ParticleLoad = sc
+ParticleLoad    = sc
+# Add a possible constraint field here:
+#ConstraintFieldFile = initial_conditions.h5
+#ConstraintFieldName = ic_white_noise
+
+[cosmology]
+transfer        = CLASS
+ztarget         = 2.5
+# transfer        = eisenstein
+# transfer        = file_CAMB
+# transfer_file   = wmap5_transfer_out_z0.dat
+Omega_m         = 0.302
+Omega_b         = 0.045
+Omega_L         = 0.698
+H0              = 70.3
+sigma_8         = 0.811
+nspec           = 0.961
+
+# anisotropic large scale tidal field
+# LSS_aniso_lx    = +0.1
+# LSS_aniso_ly    = +0.1
+# LSS_aniso_lz    = -0.2
+
+[random]
+generator       = NGENIC
+seed            = 9001
 
 [testing]
 # enables diagnostic output
 # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
-test = convergence
+test            = none
 
 [execution]
-NumThreads   = 4
+NumThreads      = 8
 
 [output]
-fname_hdf5   = output_sch.hdf5
-fbase_analysis = output
+fname_hdf5      = output_sch.hdf5
+fbase_analysis  = output
 
-format       = gadget2
-filename     = ics_gadget.dat
+# format          = gadget2
+# filename        = ics_gadget.dat
+# UseLongids      = false
 
-#format       = generic
-#filename     = debug.hdf5
-#generic_out_eulerian = yes
+format          = gadget_hdf5
+filename        = ics_gadget.hdf5
 
-#format	       = grafic2
-#filename       = ics_ramses
-#grafic_use_SPT = yes
+# format          = AREPO
+# filename        = ics_arepo.hdf5
 
-[random]
-generator    = NGENIC
-seed         = 9001
+# format          = generic
+# filename        = debug.hdf5
+# generic_out_eulerian = yes
 
-[cosmology]
-#transfer     = CLASS 
-transfer     = eisenstein
-Omega_m      = 0.302
-Omega_b      = 0.045
-Omega_L      = 0.698
-H0           = 70.3
-sigma_8      = 0.811
-nspec        = 0.961
-
-# anisotropic large scale tidal field
-#LSS_aniso_lx = 0.1
-#LSS_aniso_ly = 0.1
-#LSS_aniso_lz = -0.2
+# format	        = grafic2
+# filename        = ics_ramses
+# grafic_use_SPT  = yes
 
diff --git a/example_testing.conf b/example_testing.conf
new file mode 100644
index 0000000..2890286
--- /dev/null
+++ b/example_testing.conf
@@ -0,0 +1,33 @@
+[setup]
+GridRes      = 256
+BoxLength    = 6.28318530718
+zstart       = 0.0
+LPTorder     = 1
+SymplecticPT = no
+DoFixing     = no
+
+[execution]
+NumThreads   = 4
+
+[output]
+fname_hdf5   = output.hdf5
+fbase_analysis = output
+#format       = gadget2
+#filename     = ics_gadget.dat
+format	     = generic
+filename     = debug.hdf5
+generic_out_eulerian = yes
+
+[random]
+generator    = NGENIC
+seed         = 9001
+
+[cosmology]
+#transfer     = CLASS 
+transfer     = eisenstein
+Omega_m      = 1.0
+Omega_b      = 0.045
+Omega_L      = 0.0
+H0           = 70.3
+sigma_8      = 0.811
+nspec        = 0.961
diff --git a/external/class b/external/class
index b34d7f6..6adecae 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b
+Subproject commit 6adecae2f30172a94e003155090791abf509d995
diff --git a/external/class.cmake b/external/class.cmake
index 0a3f3c2..a2e5057 100644
--- a/external/class.cmake
+++ b/external/class.cmake
@@ -32,6 +32,7 @@ if(ENABLE_CLASS)
       ${CMAKE_CURRENT_LIST_DIR}/class/build/history.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/hydrogen.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/hyperspherical.o
+      ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/hyrectools.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/input.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/lensing.o
@@ -78,6 +79,7 @@ if(ENABLE_CLASS)
       ${CMAKE_CURRENT_LIST_DIR}/class/tools/parser.c
       ${CMAKE_CURRENT_LIST_DIR}/class/tools/quadrature.c
       ${CMAKE_CURRENT_LIST_DIR}/class/tools/hyperspherical.c
+      ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.c
       ${CMAKE_CURRENT_LIST_DIR}/class/tools/common.c
       ${CMAKE_CURRENT_LIST_DIR}/class/source/input.c
       ${CMAKE_CURRENT_LIST_DIR}/class/source/background.c
@@ -131,9 +133,9 @@ macro(target_setup_class target_name)
   endif(ENABLE_CLASS)
 endmacro(target_setup_class)
 
-if(ENABLE_CLASS)
-  # test executable
-  add_executable(testTk
-    ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc)
-  target_setup_class(testTk)
-endif(ENABLE_CLASS)
\ No newline at end of file
+# if(ENABLE_CLASS)
+#   # test executable
+#   add_executable(testTk
+#     ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc)
+#   target_setup_class(testTk)
+# endif(ENABLE_CLASS)
\ No newline at end of file
diff --git a/external/fftwpp b/external/fftwpp
new file mode 160000
index 0000000..ec6b82c
--- /dev/null
+++ b/external/fftwpp
@@ -0,0 +1 @@
+Subproject commit ec6b82cc1122ba029a7a7142cf836014e992e68c
diff --git a/external/panphasia/generic_lecuyer.f90 b/external/panphasia/generic_lecuyer.f90
new file mode 100644
index 0000000..13f53ed
--- /dev/null
+++ b/external/panphasia/generic_lecuyer.f90
@@ -0,0 +1,683 @@
+!=====================================================================================c
+!        
+! The code below was written by: Stephen Booth
+!                                Edinburgh Parallel Computing Centre
+!                                The University of Edinburgh
+!                                JCMB
+!                                Mayfield Road
+!                                Edinburgh EH9 3JZ
+!                                United Kingdom
+!
+! This file is part of the software made public in
+! Jenkins and Booth 2013  - arXiv:1306.XXXX
+!
+! The software computes the Panphasia Gaussian white noise field
+! realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX
+! 
+!
+!
+! This software is free, subject to a agreeing licence conditions:
+!
+!
+! (i)  you will publish the phase descriptors and reference Jenkins (13) 
+!      for any new simulations that use Panphasia phases. You will pass on this 
+!      condition to others for any software or data you make available publically 
+!      or privately that makes use of Panphasia. 
+!
+! (ii) that you will ensure any publications using results derived from Panphasia 
+!      will be submitted as a final version to arXiv prior to or coincident with
+!      publication in a journal. 
+!
+!
+! (iii) that you report any bugs in this software as soon as confirmed to 
+!       A.R.Jenkins@durham.ac.uk 
+!
+! (iv)  that you understand that this software comes with no warranty and that is 
+!       your responsibility to ensure that it is suitable for the purpose that 
+!       you intend. 
+!
+!=====================================================================================c
+!{{{Rand_base (define kind types) 
+MODULE Rand_base
+! This module just declares the base types 
+! we may have to edit this to match to the target machine
+! we really need a power of 2 selected int kind in fortran-95 we could
+! do this with a PURE function I think.
+
+!
+! 10 decimal digits will hold 2^31
+!
+
+   INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(9)
+!  INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(10)
+!  INTEGER, PARAMETER :: Sint = 4
+
+!
+! 18-19 decimal digits will hold 2^63
+! but all 19 digit numbers require 2^65 :-(
+!
+
+   INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(17)
+!  INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(18)
+!  INTEGER, PARAMETER :: Dint = 8
+
+! type for index counters must hold Nstore
+  INTEGER, PARAMETER :: Ctype = SELECTED_INT_KIND(3)
+END MODULE Rand_base
+!}}}
+
+!{{{Rand_int (random integers mod 2^31-1) 
+
+MODULE Rand_int
+  USE Rand_base
+  IMPLICIT NONE
+! The general approach of this module is two have
+! two types Sint and Dint 
+! 
+! Sint should have at least 31 bits
+! dint shouldhave at least 63
+
+!{{{constants
+
+  INTEGER(KIND=Ctype), PARAMETER :: Nstate=5_Ctype
+  INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nbatch=128_Ctype
+  INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nstore=Nstate+Nbatch
+
+  INTEGER(KIND=Sint), PRIVATE, PARAMETER  :: M = 2147483647_Sint
+  INTEGER(KIND=Dint), PRIVATE, PARAMETER  :: Mask = 2147483647_Dint
+  INTEGER(KIND=Dint), PRIVATE, PARAMETER  :: A1 = 107374182_Dint
+  INTEGER(KIND=Dint), PRIVATE, PARAMETER  :: A5 = 104480_Dint
+  LOGICAL, PARAMETER :: Can_step_int=.TRUE.
+  LOGICAL, PARAMETER :: Can_reverse_int=.TRUE.
+
+!}}}
+
+!{{{Types
+!
+! This type holds the state of the generator
+!
+!{{{TYPE RAND_state
+
+TYPE RAND_state
+  PRIVATE
+  INTEGER(KIND=Sint) :: state(Nstore) 
+! do we need to re-fill state table this is reset when we initialise state.
+  LOGICAL :: need_fill 
+! position of the next state variable to output
+  INTEGER(KIND=Ctype) :: pos
+END TYPE RAND_state
+
+!}}}
+
+!
+! This type defines the offset type used for stepping.
+!
+!{{{TYPE RAND_offset
+
+TYPE RAND_offset
+  PRIVATE
+  INTEGER(KIND=Sint) :: poly(Nstate)
+END TYPE RAND_offset
+
+!}}}
+
+!}}}
+
+!{{{interface and overloads
+!
+! Allow automatic conversion between integers and offsets
+!
+INTERFACE ASSIGNMENT(=)
+  MODULE PROCEDURE Rand_set_offset
+  MODULE PROCEDURE Rand_load
+  MODULE PROCEDURE Rand_save
+  MODULE PROCEDURE Rand_seed
+END INTERFACE
+INTERFACE OPERATOR(+)
+  MODULE PROCEDURE Rand_add_offset
+END INTERFACE
+INTERFACE OPERATOR(*)
+  MODULE PROCEDURE Rand_mul_offset
+END INTERFACE
+
+!
+! overload + as the boost/stepping operator
+!
+INTERFACE OPERATOR(+)
+  MODULE PROCEDURE Rand_step
+  MODULE PROCEDURE Rand_boost
+END INTERFACE
+!}}}
+
+
+!{{{PUBLIC/PRIVATE 
+  PRIVATE reduce,mod_saxpy,mod_sdot,p_saxpy,p_sdot,poly_mult
+  PRIVATE poly_square, poly_power
+  PRIVATE fill_state, repack_state
+
+  PUBLIC Rand_sint, Rand_sint_vec
+
+  PUBLIC Rand_save, Rand_load
+  PUBLIC Rand_set_offset, Rand_add_offset, Rand_mul_offset
+  PUBLIC Rand_step, Rand_boost, Rand_seed
+!}}}
+
+CONTAINS
+  !{{{Internals
+  !{{{RECURSIVE FUNCTION reduce(A)
+  RECURSIVE FUNCTION reduce(A)
+  !
+  ! Take A Dint and reduce to Sint MOD M
+  !
+   INTEGER(KIND=Dint), INTENT(IN) :: A
+   INTEGER(KIND=Sint) reduce
+   INTEGER(KIND=Dint) tmp
+  
+    tmp = A  
+    DO WHILE( ISHFT(tmp, -31) .GT. 0 )
+      tmp = IAND(tmp,Mask) + ISHFT(tmp, -31)
+    END DO
+    IF( tmp .GE. M ) THEN
+      reduce = tmp - M
+    ELSE
+      reduce = tmp
+    END IF
+  END FUNCTION reduce
+  !}}}
+  !{{{RECURSIVE SUBROUTINE fill_state(x)
+  RECURSIVE SUBROUTINE fill_state(x)
+  TYPE(RAND_state), INTENT(INOUT) ::  x
+  INTEGER(KIND=Ctype) i
+  INTRINSIC IAND, ISHFT
+  INTEGER(KIND=Dint)  tmp
+    DO i=Nstate+1,Nstore
+      tmp = (x%state(i-5) * A5) + (x%state(i-1)*A1)
+      !
+      ! now reduce down to mod M efficiently
+      ! really hope the compiler in-lines this
+      !
+      ! x%state(i) = reduce(tmp)
+      DO WHILE( ISHFT(tmp, -31) .GT. 0 )
+        tmp = IAND(tmp,Mask) + ISHFT(tmp, -31)
+      END DO
+      IF( tmp .GE. M ) THEN
+        x%state(i) = tmp - M
+      ELSE
+        x%state(i) = tmp
+      END IF
+  
+    END DO
+    x%need_fill = .FALSE.
+  END SUBROUTINE fill_state
+  !}}}
+  !{{{RECURSIVE SUBROUTINE repack_state(x)
+  RECURSIVE SUBROUTINE repack_state(x)
+  TYPE(RAND_state), INTENT(INOUT) ::  x
+  INTEGER(KIND=Ctype) i
+    DO i=1,Nstate
+      x%state(i) = x%state(i+x%pos-(Nstate+1))
+    END DO
+    x%pos = Nstate + 1
+    x%need_fill = .TRUE.  
+  END SUBROUTINE repack_state
+  !}}}
+  !{{{RECURSIVE SUBROUTINE mod_saxpy(y,a,x)
+  RECURSIVE SUBROUTINE mod_saxpy(y,a,x)
+   INTEGER(KIND=Ctype) i
+   INTEGER(KIND=Sint) y(Nstate)
+   INTEGER(KIND=Sint) a
+   INTEGER(KIND=Sint) x(Nstate)
+   INTEGER(KIND=Dint) tx,ty,ta
+  
+     IF( a .EQ. 0_Sint ) RETURN
+  
+     ! We use KIND=Dint temporaries here to ensure
+     ! that we don't overflow in the expression
+  
+     ta = a
+     DO i=1,Nstate
+       ty=y(i)
+       tx=x(i)
+       y(i) = reduce(ty + ta * tx)
+     END DO
+  
+  END SUBROUTINE 
+  !}}}
+  !{{{RECURSIVE SUBROUTINE mod_sdot(res,x,y)
+  RECURSIVE SUBROUTINE mod_sdot(res,x,y)
+  INTEGER(KIND=Sint), INTENT(OUT) :: res
+  INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate) , y(Nstate)
+  INTEGER(KIND=Dint) dx, dy, dtmp
+  INTEGER(KIND=Sint) tmp
+  INTEGER(KIND=Ctype) i
+  
+    tmp = 0
+    DO i=1,Nstate
+     dx = x(i)
+     dy = y(i)
+     dtmp = tmp
+     tmp = reduce(dtmp + dx * dy)
+    END DO
+    res = tmp
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE p_saxpy(y,a)
+  RECURSIVE SUBROUTINE p_saxpy(y,a)
+   ! Calculates mod_saxpy(y,a,P)
+   INTEGER(KIND=Sint), INTENT(INOUT) :: y(Nstate)
+   INTEGER(KIND=Sint), INTENT(IN) :: a
+   INTEGER(KIND=Dint) tmp, dy, da
+     dy = y(1)
+     da = a
+     tmp = dy + da*A5
+     y(1) = reduce(tmp)
+     dy = y(5)
+     da = a
+     tmp = dy + da*A1
+     y(5) = reduce(tmp)
+  
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE p_sdot(res,n,x)
+  RECURSIVE SUBROUTINE p_sdot(res,x)
+  INTEGER(KIND=Sint), INTENT(OUT) :: res
+  INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate)
+  INTEGER(KIND=Dint) dx1, dx5, dtmp
+    dx1 = x(1)
+    dx5 = x(5)
+    
+    dtmp = A1*dx5 + A5*dx1
+    res = reduce(dtmp)
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE poly_mult(a,b)
+  RECURSIVE SUBROUTINE poly_mult(a,b)
+    INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate)
+    INTEGER(KIND=Sint), INTENT(IN) :: b(Nstate)
+    INTEGER(KIND=Sint) tmp((2*Nstate) - 1)
+    INTEGER(KIND=Ctype) i
+  
+    tmp = 0_Sint
+  
+    DO i=1,Nstate
+      CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), b)
+    END DO
+    DO i=(2*Nstate)-1, Nstate+1, -1
+      CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i))
+    END DO
+    a = tmp(1:Nstate)
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE poly_square(a)
+  RECURSIVE SUBROUTINE poly_square(a)
+    INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate)
+    INTEGER(KIND=Sint) tmp((2*Nstate) - 1)
+    INTEGER(KIND=Ctype) i
+  
+    tmp = 0_Sint
+  
+    DO i=1,Nstate
+      CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), a)
+    END DO
+    DO i=(2*Nstate)-1, Nstate+1, -1
+      CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i))
+    END DO
+    a = tmp(1:Nstate)
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE poly_power(poly,n)
+  RECURSIVE SUBROUTINE poly_power(poly,n)
+   INTEGER(KIND=Sint), INTENT(INOUT) :: poly(Nstate)
+   INTEGER, INTENT(IN) :: n
+   INTEGER nn
+   INTEGER(KIND=Sint) x(Nstate), out(Nstate)
+  
+   IF( n .EQ. 0 )THEN
+     poly = 0_Sint
+     poly(1) = 1_Sint
+     RETURN
+   ELSE IF( n .LT. 0 )THEN
+     poly = 0_Sint
+     RETURN
+   END IF
+  
+   out = 0_sint
+   out(1) = 1_Sint
+   x = poly
+   nn = n
+   DO WHILE( nn .GT. 0 )
+     IF( MOD(nn,2) .EQ. 1 )THEN
+       call poly_mult(out,x)
+     END IF
+     nn = nn/2
+     IF( nn .GT. 0 )THEN
+       call poly_square(x)
+     END IF
+   END DO 
+   poly = out
+  
+  END SUBROUTINE poly_power
+  !}}}
+  !}}}
+
+  !{{{RECURSIVE SUBROUTINE  Rand_seed( state, n )
+  RECURSIVE SUBROUTINE  Rand_seed( state, n )
+    TYPE(Rand_state), INTENT(OUT) :: state
+    INTEGER, INTENT(IN) :: n
+    ! initialise the genrator using a single integer
+    ! fist initialise to an arbitrary state then boost by a multiple 
+    ! of a long distance
+    !
+    ! state is moved forward by P^n steps
+    ! we want this to be ok for seperating parallel sequences on MPP machines
+    ! P is taken as a prime number as this should prevent strong correlations
+    ! when the generators are operated in tight lockstep.
+    ! equivalent points on different processors will also be related by a
+    ! primative polynomial
+    ! P is 2^48-59
+    TYPE(Rand_state) tmp
+    TYPE(Rand_offset), PARAMETER ::  P = &
+         Rand_offset( (/ 1509238949_Sint ,2146167999_Sint ,1539340803_Sint , &
+                     1041407428_Sint ,666274987_Sint /) )
+  
+    CALL Rand_load( tmp, (/ 5, 4, 3, 2, 1 /) )
+    state = Rand_boost( tmp, Rand_mul_offset(P, n ))
+  
+  END SUBROUTINE Rand_seed
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_load( state, input )
+  RECURSIVE SUBROUTINE Rand_load( state, input )
+  TYPE(RAND_state), INTENT(OUT) :: state
+  INTEGER, INTENT(IN) :: input(Nstate)
+  
+  INTEGER(KIND=Ctype) i
+  
+    state%state = 0_Sint
+    DO i=1,Nstate
+      state%state(i) = MOD(INT(input(i),KIND=Sint),M)
+    END DO
+    state%need_fill = .TRUE.
+    state%pos = Nstate + 1
+  END SUBROUTINE Rand_load
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_save( save_vec,state )
+  RECURSIVE SUBROUTINE Rand_save( save_vec, x ) 
+  INTEGER, INTENT(OUT) ::  save_vec(Nstate)
+  TYPE(RAND_state), INTENT(IN) ::  x
+  
+  INTEGER(KIND=Ctype) i
+    DO i=1,Nstate
+      save_vec(i) = x%state(x%pos-(Nstate+1) + i)
+    END DO
+  END SUBROUTINE Rand_save
+  !}}}
+
+  !{{{RECURSIVE SUBROUTINE Rand_set_offset( offset, n )
+  RECURSIVE SUBROUTINE Rand_set_offset( offset, n )
+  TYPE(Rand_offset), INTENT(OUT) :: offset
+  INTEGER, INTENT(IN) :: n
+  
+    offset%poly = 0_Sint
+    IF ( n .GE. 0 ) THEN
+      offset%poly(2) = 1_Sint
+      call poly_power(offset%poly,n)
+    ELSE
+      !
+      ! This is X^-1 
+      !
+      offset%poly(4) = 858869107_Sint
+      offset%poly(5) = 1840344978_Sint    
+      call poly_power(offset%poly,-n)
+    END IF
+  END SUBROUTINE Rand_set_offset
+  !}}}
+  !{{{TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b )
+  TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b )
+  TYPE(Rand_offset), INTENT(IN) :: a, b
+  
+    Rand_add_offset = a
+    CALL poly_mult(Rand_add_offset%poly,b%poly)
+    RETURN
+  END FUNCTION Rand_add_offset
+  !}}}
+  !{{{TYPE(Rand_offset) RECURSIVE  FUNCTION Rand_mul_offset( a, n )
+  TYPE(Rand_offset) RECURSIVE  FUNCTION Rand_mul_offset( a, n )
+  TYPE(Rand_offset), INTENT(IN) :: a
+  INTEGER, INTENT(IN) :: n
+    Rand_mul_offset = a
+    CALL poly_power(Rand_mul_offset%poly,n)
+    RETURN
+  END FUNCTION Rand_mul_offset
+  !}}}
+  !{{{RECURSIVE FUNCTION Rand_boost(x, offset)
+  RECURSIVE FUNCTION Rand_boost(x, offset)
+  TYPE(Rand_state) Rand_boost
+  TYPE(Rand_state), INTENT(IN) ::  x
+  TYPE(Rand_offset), INTENT(IN) :: offset
+  INTEGER(KIND=Sint) tmp(2*Nstate-1), res(Nstate)
+  INTEGER(KIND=Ctype) i
+  
+    DO i=1,Nstate
+      tmp(i) = x%state(x%pos-(Nstate+1) + i)
+    END DO
+    tmp(Nstate+1:) = 0_Sint
+  
+    DO i=1,Nstate-1
+      call P_SDOT(tmp(i+Nstate),tmp(i:Nstate+i-1))
+    END DO
+  
+    DO i=1,Nstate
+      call mod_sdot(res(i),offset%poly,tmp(i:Nstate+i-1))
+    END DO
+    Rand_boost%state = 0_Sint
+    DO i=1,Nstate
+      Rand_boost%state(i) = res(i)
+    END DO
+    Rand_boost%need_fill = .TRUE.
+    Rand_boost%pos = Nstate + 1
+  
+  END FUNCTION Rand_boost
+  !}}}
+  !{{{RECURSIVE FUNCTION Rand_step(x, n)
+  RECURSIVE FUNCTION Rand_step(x, n)
+  TYPE(Rand_state) Rand_step
+  TYPE(RAND_state), INTENT(IN) ::  x
+  INTEGER, INTENT(IN) :: n
+  TYPE(Rand_offset) tmp
+  
+    CALL Rand_set_offset(tmp,n)
+    Rand_step=Rand_boost(x,tmp)
+  
+  END FUNCTION
+  !}}}
+  
+  !{{{RECURSIVE FUNCTION Rand_sint(x)
+  RECURSIVE FUNCTION Rand_sint(x)
+    TYPE(RAND_state), INTENT(INOUT) :: x
+    INTEGER(KIND=Sint)  Rand_sint
+    IF( x%pos .GT. Nstore )THEN
+      CALL repack_state(x)
+    END IF
+    IF( x%need_fill ) CALL fill_state(x)
+    Rand_sint = x%state(x%pos)
+    x%pos = x%pos + 1
+    RETURN
+  END FUNCTION Rand_sint
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_sint_vec(iv,x)
+  RECURSIVE SUBROUTINE Rand_sint_vec(iv,x)
+    INTEGER(KIND=Sint), INTENT(OUT)  :: iv(:)
+    TYPE(RAND_state), INTENT(INOUT)  ::  x
+    INTEGER left,start, chunk, i
+  
+    start=1
+    left=SIZE(iv)
+    DO WHILE( left .GT. 0 )
+      IF( x%pos .GT. Nstore )THEN
+        CALL repack_state(x)
+      END IF
+      IF( x%need_fill ) CALL fill_state(x)
+  
+      chunk = MIN(left,Nstore-x%pos+1)
+      DO i=0,chunk-1
+        iv(start+i) = x%state(x%pos+i)
+      END DO
+      start = start + chunk
+      x%pos = x%pos + chunk
+      left = left - chunk
+    END DO
+  
+    RETURN
+  END SUBROUTINE Rand_sint_vec
+  !}}}
+
+
+END MODULE Rand_int
+
+!}}}
+
+!{{{Rand (use Rand_int to make random reals)
+
+MODULE Rand
+  USE Rand_int
+  IMPLICIT NONE
+
+!{{{Parameters
+
+  INTEGER, PARAMETER :: RAND_kind1 = SELECTED_REAL_KIND(10)
+  INTEGER, PARAMETER :: RAND_kind2 = SELECTED_REAL_KIND(6)
+
+  INTEGER, PARAMETER, PRIVATE :: Max_block=100
+  INTEGER(KIND=Sint), PRIVATE, PARAMETER  :: M = 2147483647
+  REAL(KIND=RAND_kind1), PRIVATE, PARAMETER :: INVMP1_1 = ( 1.0_RAND_kind1 / 2147483647.0_RAND_kind1 )
+  REAL(KIND=RAND_kind2), PRIVATE, PARAMETER :: INVMP1_2 = ( 1.0_RAND_kind2 / 2147483647.0_RAND_kind2 )
+
+  LOGICAL, PARAMETER :: Can_step = Can_step_int
+  LOGICAL, PARAMETER :: Can_reverse = Can_reverse_int
+
+!}}}
+  PUBLIC Rand_real
+
+
+INTERFACE Rand_real
+  MODULE PROCEDURE Rand_real1
+  MODULE PROCEDURE Rand_real2
+  MODULE PROCEDURE Rand_real_vec1
+  MODULE PROCEDURE Rand_real_vec2
+END INTERFACE
+
+
+CONTAINS
+
+  !{{{RECURSIVE SUBROUTINE Rand_real1(y,x)
+  RECURSIVE SUBROUTINE Rand_real1(y,x)
+    REAL(KIND=RAND_kind1), INTENT(OUT) :: y
+    TYPE(RAND_state), INTENT(INOUT) ::  x
+    INTEGER(KIND=Sint) Z
+  
+    Z = Rand_sint(x)
+    IF (Z .EQ. 0) Z = M
+  
+    y = ((Z-0.5d0)*INVMP1_1)
+    RETURN
+  END SUBROUTINE Rand_real1
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_real2(y,x)
+  RECURSIVE SUBROUTINE Rand_real2(y,x)
+    REAL(KIND=RAND_kind2), INTENT(OUT) :: y
+    TYPE(RAND_state), INTENT(INOUT) ::  x
+    INTEGER(KIND=Sint) Z
+  
+    Z = Rand_sint(x)
+    IF (Z .EQ. 0) Z = M
+  
+    y = ((Z-0.5d0)*INVMP1_1)  ! generate in double and truncate.
+    RETURN
+  END SUBROUTINE Rand_real2
+  !}}}
+
+  !{{{RECURSIVE SUBROUTINE Rand_real_vec1(rv,x)
+  RECURSIVE SUBROUTINE Rand_real_vec1(rv,x)
+    TYPE(RAND_state), INTENT(INOUT) ::  x
+    REAL(KIND=RAND_kind1)  rv(:)
+    INTEGER left,start, chunk, i
+    INTEGER(KIND=Sint) Z
+    INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block))
+  
+    start=0
+    left=SIZE(rv)
+    DO WHILE( left .GT. 0 )
+      chunk = MIN(left,Max_block)
+      CALL Rand_sint_vec(temp(1:chunk),x)
+      DO i=1,chunk
+       Z = temp(i)
+       IF (Z .EQ. 0) Z = M
+       rv(start+i) = (Z-0.5d0)*INVMP1_1
+      END DO 
+      start = start + chunk
+      left = left - chunk
+    END DO
+  
+    RETURN
+  END SUBROUTINE Rand_real_vec1
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_real_vec2(rv,x)
+  RECURSIVE SUBROUTINE Rand_real_vec2(rv,x)
+    TYPE(RAND_state), INTENT(INOUT) ::  x
+    REAL(KIND=RAND_kind2)  rv(:)
+    INTEGER left,start, chunk, i
+    INTEGER(KIND=Sint) Z
+    INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block))
+  
+    start=0
+    left=SIZE(rv)
+    DO WHILE( left .GT. 0 )
+      chunk = MIN(left,Max_block)
+      CALL Rand_sint_vec(temp(1:chunk),x)
+      DO i=1,chunk
+       Z = temp(i)
+       IF (Z .EQ. 0) Z = M
+       rv(start+i) = (Z-0.5d0)*INVMP1_2
+      END DO 
+      start = start + chunk
+      left = left - chunk
+    END DO
+  
+    RETURN
+  END SUBROUTINE Rand_real_vec2
+  !}}}
+END MODULE Rand
+
+!}}}
+
+!{{{test program
+! PROGRAM test_random
+! use Rand
+!     TYPE(RAND_state) x
+!     REAL y
+!      CALL Rand_load(x,(/5,4,3,2,1/)) 
+!      DO I=0,10
+!       CALL Rand_real(y,x)
+!       WRITE(*,10) I,y
+!      END DO
+!
+!10    FORMAT(I10,E25.16)
+!
+!     END
+
+!         0   0.5024326127022505E-01
+!         1   0.8260946767404675E-01
+!         2   0.2123264316469431E-01
+!         3   0.6926658791489899E+00
+!         4   0.2076155943796039E+00
+!         5   0.4327449947595596E-01
+!         6   0.2204052871093154E-01
+!         7   0.1288446951657534E+00
+!         8   0.4859915426932275E+00
+!         9   0.5721384193748236E-01
+!        10   0.7996825082227588E+00
+!
+
+
+!}}}
+
diff --git a/external/panphasia/panphasia_routines.f b/external/panphasia/panphasia_routines.f
new file mode 100644
index 0000000..2e1bfbd
--- /dev/null
+++ b/external/panphasia/panphasia_routines.f
@@ -0,0 +1,3334 @@
+c=====================================================================================c
+c        
+c The code below was written by: Adrian Jenkins,                                            
+c                                Institute for Computational Cosmology
+c                                Department of Physics
+c                                South Road
+c                                Durham, DH1 3LE
+c                                United Kingdom
+c
+c This file is part of the software made public in
+c Jenkins and Booth 2013  - arXiv:1306.XXXX
+c
+c The software computes the Panphasia Gaussian white noise field
+c realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX
+c 
+c
+c
+c This software is free, subject to a agreeing licence conditions:
+c
+c
+c (i)  you will publish the phase descriptors and reference Jenkins (13) 
+c      for any new simulations that use Panphasia phases. You will pass on this 
+c      condition to others for any software or data you make available publically 
+c      or privately that makes use of Panphasia. 
+c
+c (ii) that you will ensure any publications using results derived from Panphasia 
+c      will be submitted as a final version to arXiv prior to or coincident with
+c      publication in a journal. 
+c
+c (iii) that you report any bugs in this software as soon as confirmed to 
+c       A.R.Jenkins@durham.ac.uk 
+c
+c (iv)  that you understand that this software comes with no warranty and that is 
+c       your responsibility to ensure that it is suitable for the purpose that 
+c       you intend. 
+c
+c=====================================================================================c
+
+c=====================================================================================
+c       List of subroutines and arguments.  Each of these is documented in           c
+c       arXiV/1306.XXXX                                                              c
+c                                                                                    c
+c       Adrian Jenkins, 24/6/2013.                                                   c
+c-------------------------------------------------------------------------------------
+c  Version 1.000
+c===================================================================================
+
+      module pan_state
+      use Rand
+      implicit none
+      integer maxdim_, maxlev_, maxpow_
+      parameter (maxdim_=60,maxlev_=50, maxpow_ = 3*maxdim_)
+      integer nmulti_
+      parameter (nmulti_=64)
+      integer range_max
+      parameter(range_max=10000)
+      integer indmin,indmax
+      parameter (indmin=-1, indmax=60)
+
+
+      type state_data
+      integer base_state(5), base_lev_start(5,0:maxdim_)
+      TYPE(Rand_offset) :: poweroffset(0:maxpow_)
+      TYPE(Rand_offset) :: superjump
+      TYPE(Rand_state) :: current_state(-1:maxpow_)
+
+      integer  layer_min,layer_max,indep_field
+
+!  This module stores information needed to access the part of Panphasia
+!  selected by a particular descriptor.
+      integer*8 xorigin_store(0:1,0:1,0:1)
+      integer*8 yorigin_store(0:1,0:1,0:1)
+      integer*8 zorigin_store(0:1,0:1,0:1)
+
+      integer*4 lev_common
+      integer*4 layer_min_store,layer_max_store
+
+      integer*8 ix_abs_store,iy_abs_store,iz_abs_store    
+      integer*8 ix_per_store,iy_per_store,iz_per_store
+      integer*8 ix_rel_store,iy_rel_store,iz_rel_store
+
+      real*8 exp_coeffs(8,0:7,-1:maxdim_)
+      integer*8 xcursor(0:maxdim_),ycursor(0:maxdim_),zcursor(0:maxdim_)
+
+c    Local box parameters
+
+      integer*4 ixshift(0:1,0:1,0:1)
+      integer*4 iyshift(0:1,0:1,0:1)
+      integer*4 izshift(0:1,0:1,0:1)
+
+
+c     more state variables
+      real*8 cell_data(9,0:7)
+      integer*4 ixh_last,iyh_last,izh_last
+      integer init
+
+      integer return_cell_props_init
+      integer reset_lecuyer_state_init
+      integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax)
+
+
+
+      end type state_data
+
+
+
+c     Switch for enabling custom spherical function
+c     Set isub_spherical_function = 1 to turn on the spherical function
+      integer*4 isub_spherical_function
+      parameter (isub_spherical_function=0)
+
+      end module pan_state
+
+
+c================================================================================
+c       Begin white noise routines
+c================================================================================
+      recursive subroutine start_panphasia(ldata,descriptor,ngrid,VERBOSE)
+      use pan_state
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      character*100 descriptor
+      integer ngrid
+      integer VERBOSE
+
+      
+
+      integer*4 wn_level_base,i_base,i_base_y,i_base_z
+      integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand
+      character*20 name
+
+      integer ratio
+      integer lextra
+      integer level_p
+
+      
+      integer*8 ix_abs,iy_abs,iz_abs
+      integer*8 ix_per,iy_per,iz_per
+      integer*8 ix_rel,iy_rel,iz_rel
+      
+      !integer  layer_min,layer_max,indep_field
+      !common /oct_range/  layer_min,layer_max,indep_field
+
+      call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base,
+     &                      i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,name)
+
+
+      lextra = (log10(real(ngrid)/real(i_base))+0.001)/log10(2.0)
+      ratio = 2**lextra
+
+      if (ratio*i_base.ne.ngrid) 
+     &stop 'Value of ngrid inconsistent with dim of region in Panphasia'
+
+      level_p = wn_level_base + lextra
+
+      ix_abs = ishft(i_xorigin_base,lextra)
+      iy_abs = ishft(i_yorigin_base,lextra)
+      iz_abs = ishft(i_zorigin_base,lextra)
+
+      ix_per = i_base*ratio
+      iy_per = i_base*ratio
+      iz_per = i_base*ratio
+
+c     Set the refinement position at the origin. 
+   
+      ix_rel = 0
+      iy_rel = 0
+      iz_rel = 0
+
+      call set_phases_and_rel_origin(ldata,descriptor,level_p,ix_rel,iy_rel,iz_rel,VERBOSE)
+
+c    Finally set the octree functions required for making cosmological
+c    initial conditions.  These are passed using a common block.
+
+      ldata%layer_min = 0
+      ldata%layer_max = level_p
+      ldata%indep_field  = 1
+
+      end
+c=================================================================================
+      recursive subroutine set_phases_and_rel_origin(ldata,descriptor,lev,ix_rel,iy_rel,iz_rel,VERBOSE)
+      use pan_state
+      !use descriptor_phases
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      character*100 descriptor
+      integer lev
+      integer*8 ix_abs,iy_abs,iz_abs
+      integer*8 ix_per,iy_per,iz_per
+      integer*8 ix_rel,iy_rel,iz_rel
+      integer*8 xorigin,yorigin,zorigin
+
+      integer VERBOSE
+      integer MYID
+      integer*8 maxco
+      integer i
+      integer px,py,pz
+      
+      integer lnblnk
+      integer*8 mconst
+      parameter(mconst = 2147483647_Dint)
+
+      integer*4 wn_level_base,i_base,i_base_y,i_base_z
+      integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand
+      integer lextra,ratio
+      character*20 phase_name
+
+c-----------------------------------------------------------------------------------------------
+
+      call initialise_panphasia(ldata)
+
+      call validate_descriptor(ldata, descriptor,-1,check_rand)
+
+      call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base,
+     &                      i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,phase_name)
+      lextra = lev - wn_level_base
+      ratio  = 2**lextra
+      
+      ix_abs = ishft(i_xorigin_base,lextra)
+      iy_abs = ishft(i_yorigin_base,lextra)
+      iz_abs = ishft(i_zorigin_base,lextra)
+
+      ix_per = i_base*ratio
+      iy_per = i_base*ratio
+      iz_per = i_base*ratio
+
+c-------------------------------------------------------------------------
+c    Error checking
+c-------------------------------------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (1)'
+      
+
+      maxco = 2_dint**lev
+
+      if (ix_abs.lt.0) stop 'Error: ix_abs negative (1)'
+      if (iy_abs.lt.0) stop 'Error: iy_abs negative (1)'
+      if (iz_abs.lt.0) stop 'Error: iz_abs negative (1)'
+
+      if (ix_rel.lt.0) stop 'Error: ix_rel negative (1)'
+      if (iy_rel.lt.0) stop 'Error: iy_rel negative (1)'
+      if (iz_rel.lt.0) stop 'Error: iz_rel negative (1)'
+
+
+      if (ix_abs+ix_rel.ge.maxco)
+     &   stop 'Error: ix_abs + ix_rel out of range. (1)'
+      if (iy_abs+iy_rel.ge.maxco) 
+     &   stop 'Error: iy_abs + iy_rel out of range. (1)'
+      if (iz_abs+iz_rel.ge.maxco) 
+     &   stop 'Error: iz_abs + iz_rel out of range. (1)'
+
+c----------------------------------------------------------------------------------------
+c  To allow the local box to wrap around, if needed, define a series of eight
+c  'origins'.  For many purposes (ix,iy,iz) = (0,0,0) is the only origin needed.
+
+
+      do px=0,1
+       do py=0,1
+        do pz=0,1
+
+         xorigin = max(0,( ix_abs + ix_rel - px*ix_per )/2)
+         yorigin = max(0,( iy_abs + iy_rel - py*iy_per )/2)
+         zorigin = max(0,( iz_abs + iz_rel - pz*iz_per )/2)
+
+         ldata%ixshift(px,py,pz) = max(0, ix_abs + ix_rel -px*ix_per) - 2*xorigin
+         ldata%iyshift(px,py,pz) = max(0, iy_abs + iy_rel -py*iy_per) - 2*yorigin
+         ldata%izshift(px,py,pz) = max(0, iz_abs + iz_rel -pz*iz_per) - 2*zorigin
+
+
+c        Store box details:  store the positions at level lev-1
+  
+
+         ldata%xorigin_store(px,py,pz) = xorigin
+         ldata%yorigin_store(px,py,pz) = yorigin
+         ldata%zorigin_store(px,py,pz) = zorigin
+
+        enddo
+       enddo
+      enddo
+
+      ldata%lev_common = lev
+
+
+      ldata%ix_abs_store = ix_abs
+      ldata%iy_abs_store = iy_abs
+      ldata%iz_abs_store = iz_abs
+
+      ldata%ix_per_store = ix_per
+      ldata%iy_per_store = iy_per
+      ldata%iz_per_store = iz_per
+
+      ldata%ix_rel_store = ix_rel
+      ldata%iy_rel_store = iy_rel
+      ldata%iz_rel_store = iz_rel
+
+ 
+c  Reset all cursor values to negative numbers.
+
+      do i=0,maxdim_
+       ldata%xcursor(i) = -999
+       ldata%ycursor(i) = -999
+       ldata%zcursor(i) = -999
+      enddo
+      if (VERBOSE.gt.1) then
+         if (MYID.lt.1) then
+            print*,'----------------------------------------------------------'
+            print*,'Successfully initialised Panphasia box at level ',lev
+            write (6,105) ix_abs,iy_abs,iz_abs
+            write (6,106) ix_rel,iy_rel,iz_rel
+            write (6,107) ix_per,iy_per,iz_per
+            write (6,*)  'Phases used: ',descriptor(1:lnblnk(descriptor))
+            print*,'----------------------------------------------------------'
+         endif
+      endif
+ 105  format(' Abs origin: (',i12,',',i12,',',i12,')')
+ 106  format(' Rel origin: (',i12,',',i12,',',i12,')')
+ 107  format(' Periods   : (',i12,',',i12,',',i12,')') 
+      end 
+c================================================================================
+      recursive subroutine initialise_panphasia( ldata )
+      use Rand
+      use pan_state
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+
+      TYPE(Rand_state) :: state
+      TYPE(Rand_offset) :: offset
+      integer ninitialise
+      parameter (ninitialise=218)
+      integer i
+      real*8 rand_num
+
+
+      call Rand_seed(state,ninitialise)
+      
+      call Rand_save(ldata%base_state,state)
+
+      call Rand_set_offset(offset,1)
+
+c   Calculate offsets of powers of 2 times nmulti
+c
+
+      do i=0,maxpow_
+        ldata%poweroffset(i) = Rand_mul_offset(offset,nmulti_)
+        offset = Rand_mul_offset(offset,2)
+      enddo
+
+
+c   Compute the base state for each level. 
+
+      call Rand_load(state,ldata%base_state)
+      state = Rand_step(state,8)
+
+      do i=0,maxdim_
+       call Rand_save(ldata%base_lev_start(1,i),state)
+       state = Rand_boost(state,ldata%poweroffset(3*i))
+      enddo
+
+c   Set superjump to value 2**137   - used occasionally in computing Gaussian variables
+c   when the value of the returned random number is less an 10-6.
+
+       call Rand_set_offset(ldata%superjump,1)
+
+       do i=1,137
+         ldata%superjump = Rand_mul_offset(ldata%superjump,2)
+       enddo  
+
+
+c   Run time test to see if one particular value can be recovered.
+      
+      call Rand_load(state,ldata%base_lev_start(1,34))
+      call Rand_real(rand_num,state)
+
+      if (abs(rand_num- 0.828481889948473d0).gt.1.e-14) then
+        print*,'Error in initialisation!'
+        print*,'Rand_num     = ',rand_num
+        print*,'Target value = ', 0.828481889948473d0
+        stop
+      endif
+      return
+      end
+c=================================================================================
+      recursive subroutine panphasia_cell_properties(ldata,ixcell,iycell,izcell,cell_prop)
+      use pan_state
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      !integer  layer_min,layer_max,indep_field
+      !common /oct_range/  layer_min,layer_max,indep_field
+      integer*4 ixcell,iycell,izcell
+      real*8 cell_prop(9)
+
+      call adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,ldata%layer_min,
+     &                                           ldata%layer_max,ldata%indep_field,cell_prop)
+      return
+      end
+c=================================================================================
+      recursive subroutine adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,layer_min,
+     &                                           layer_max,indep_field,cell_prop)
+      use pan_state
+      !use descriptor_phases
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+
+      integer*4 lev
+      integer*4 ixcell,iycell,izcell
+      integer layer_min,layer_max,indep_field
+      real*8 cell_prop(9)
+c      real*8 cell_data(9,0:7)
+      integer*4 j,l,lx,ly,lz
+      integer*4 px,py,pz
+
+c      integer*4 ixh_last,iyh_last,izh_last
+
+c      integer init
+c      data init/0/
+c      save init,cell_data,ixh_last,iyh_last,izh_last  ! Keep internal state
+
+      integer*4 ixh,iyh,izh
+
+      lev = ldata%lev_common
+
+c------- Error checking -----------------------------
+
+       if (layer_min.gt.layer_max) then
+
+              if (layer_min-layer_max.eq.1) then      ! Not necessarily bad. No octree basis functions
+                   do j=1,9                           ! required at this level and position.
+                   cell_prop(j) = 0.0d0               ! Set returned cell_prop data to zero.
+                   enddo
+                   return
+              endif
+
+              print*,'Warning: layer_min.gt.layer_max!'
+              print*,'layer_min = ',layer_min
+              print*,'layer_max = ',layer_max
+              print*,'ixcell,iycell,izcell',ixcell,iycell,izcell
+
+              call flush(6)
+              stop 'Error: layer_min.gt.layer_max'
+       endif
+
+       if (layer_max.gt.ldata%lev_common) then
+          print*,'lev_common = ',ldata%lev_common
+          print*,'layer_min  = ',layer_min
+          print*,'layer_max  = ',layer_max
+          stop 'Error: layer_max.gt.lev_common'
+       endif
+       if ((indep_field.lt.-1).or.(indep_field.gt.1)) 
+     & stop 'Error: indep_field out of range'
+
+c----------------------------------------------------
+c  Check which 'origin' to use.  
+
+      px = 0
+      py = 0
+      pz = 0
+
+      if (ldata%ix_rel_store+ixcell.ge.ldata%ix_per_store) px = 1  ! Crossed x-periodic bndy
+      if (ldata%iy_rel_store+iycell.ge.ldata%iy_per_store) py = 1  ! Crossed y-periodic bndy
+      if (ldata%iz_rel_store+izcell.ge.ldata%iz_per_store) pz = 1  ! Crossed z-periodic bndy
+c----------------------------------------------------
+
+
+      ixh = (ixcell+ldata%ixshift(px,py,pz) )/2
+      iyh = (iycell+ldata%iyshift(px,py,pz) )/2
+      izh = (izcell+ldata%izshift(px,py,pz) )/2
+
+      lx  = mod(ixcell+ldata%ixshift(px,py,pz) ,2)
+      ly  = mod(iycell+ldata%iyshift(px,py,pz) ,2)
+      lz  = mod(izcell+ldata%izshift(px,py,pz) ,2)
+
+
+      l = 4*lx + 2*ly + lz   ! Determine which cell is required
+
+cc------------------   If no new evalation is needed skip assignment -----
+      if ((ldata%init.eq.1).and.(ixh.eq.ldata%ixh_last).and.(iyh.eq.ldata%iyh_last).and.
+     &   (izh.eq.ldata%izh_last).and.(layer_min.eq.ldata%layer_min_store).and.
+     &   (layer_max.eq.ldata%layer_max_store)) goto 24
+cc-----------------------------------------------------------------------------
+
+
+       call   return_cell_props(ldata,lev,ixh,iyh,izh,px,py,pz,layer_min,
+     &      layer_max,indep_field,ldata%cell_data)
+
+c  Remember previous values.
+ 
+       ldata%ixh_last = ixh
+       ldata%iyh_last = iyh
+       ldata%izh_last = izh
+
+
+ 24    continue
+
+ 
+      do j=1,9
+       cell_prop(j) = ldata%cell_data(j,l)  ! Copy the required data
+      enddo
+     
+      if (ldata%init.eq.0) ldata%init=1
+
+      return
+      end
+c=================================================================================
+      recursive subroutine return_cell_props(ldata,lev_input,ix_half,iy_half,iz_half,
+     &  px,py,pz,layer_min,layer_max,indep_field,cell_data)
+      use Rand
+      use pan_state
+      !use descriptor_phases
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      integer lev_input,ix_half,iy_half,iz_half,px,py,pz
+      integer layer_min,layer_max,indep_field 
+      real*8 cell_data(9,0:7)
+
+      real*8 garray(0:63)
+      integer lev
+      integer*8 xarray,yarray,zarray
+
+      integer i,istart,icell_name
+
+
+c      integer init
+c      data init/0/
+c      save init
+
+ 
+
+c--------------------------------------------------------
+c--------------------------- Initialise level -1 --------
+c--------------------------------------------------------
+
+      if (ldata%return_cell_props_init.eq.0) then                          ! First time called. Set up the Legendre coefficients    
+      ldata%return_cell_props_init = 1                                     ! for the root cell.   This is the first term on the
+      call Rand_load(ldata%current_state(-1),ldata%base_state) ! right hand side of the equation in appendix C of
+      call return_gaussian_array(ldata,-1,8,garray)      ! Jenkins 2013 that defines PANPHASIA.
+      ldata%exp_coeffs(1,0,-1) = garray(0)
+      ldata%exp_coeffs(2,0,-1) = garray(1)
+      ldata%exp_coeffs(3,0,-1) = garray(2)
+      ldata%exp_coeffs(4,0,-1) = garray(3)
+      ldata%exp_coeffs(5,0,-1) = garray(4)
+      ldata%exp_coeffs(6,0,-1) = garray(5)
+      ldata%exp_coeffs(7,0,-1) = garray(6)
+      ldata%exp_coeffs(8,0,-1) = garray(7)
+
+      ldata%layer_min_store = layer_min
+      ldata%layer_max_store = layer_max
+          
+      endif
+
+c--------------------------------------------------------
+c---------------------------- Error checking ------------
+c--------------------------------------------------------
+
+      lev = lev_input-1
+
+      if (lev_input.ne.ldata%lev_common) stop 'Box initialised at a different level !'
+      if (ix_half.lt.0) then
+          print*,'ix_half negative',ix_half
+          stop 'ix_half out of range!'
+      endif
+      if (iy_half.lt.0) stop 'iy_half out of range!'
+      if (iz_half.lt.0) then
+          print*,'iz_half negative',iz_half
+          stop 'iz_half out of range!' 
+      endif
+
+
+      xarray = ldata%xorigin_store(px,py,pz) + ix_half
+      yarray = ldata%yorigin_store(px,py,pz) + iy_half
+      zarray = ldata%zorigin_store(px,py,pz) + iz_half
+
+
+c   If layer_max or layer_min have changed, rebuild from the start and reset the
+c   recorded value of layer_max and layer_min
+
+      if ((layer_max.ne.ldata%layer_max_store).or.(layer_min.ne.ldata%layer_min_store)) then
+
+         if (layer_min.gt.layer_max) stop 'layer_min > layer_max : 2'
+
+         istart = max(1,layer_min-1)
+
+         ldata%layer_max_store = layer_max
+         ldata%layer_min_store = layer_min
+
+         goto 10
+
+      endif
+
+
+      if ((xarray.eq.ldata%xcursor(lev)).and.(yarray.eq.ldata%ycursor(lev)).and.(zarray.eq.ldata%zcursor(lev))) return ! Nothing to do.
+
+c===========================================================================================================
+c------------- First determine which levels need to be (re)computed
+c===========================================================================================================
+
+      istart = 0
+      do i=lev-1,0,-1
+        if ((ishft(xarray,i-lev).eq.ldata%xcursor(i)).and.(ishft(yarray,i-lev).eq.ldata%ycursor(i)).and.
+     &         (ishft(zarray,i-lev).eq.ldata%zcursor(i))) then
+            istart = i+1
+            goto 10
+        endif
+      enddo
+
+ 10   continue
+
+
+c====================================================================================
+c------------- Now compute each level as required and update (x,y,z) cursor variables
+c====================================================================================
+
+      do i=istart,lev
+
+       icell_name = 0
+
+       ldata%xcursor(i) = ishft(xarray,i-lev)
+       ldata%ycursor(i) = ishft(yarray,i-lev)
+       ldata%zcursor(i) = ishft(zarray,i-lev)
+
+       if (btest(ldata%xcursor(i),0)) icell_name = icell_name + 4
+       if (btest(ldata%ycursor(i),0)) icell_name = icell_name + 2
+       if (btest(ldata%zcursor(i),0)) icell_name = icell_name + 1
+
+       call reset_lecuyer_state(ldata,i,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i))
+
+       if (isub_spherical_function.ne.1) then
+           call return_gaussian_array(ldata,i,64,garray)
+       else
+           call return_oct_sf_expansion(ldata,i,lev,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i),
+     &                                    64,garray)
+       endif
+
+
+       call evaluate_panphasia(ldata,i,maxdim_,garray,layer_min,
+     &    layer_max, indep_field, icell_name,cell_data,ldata%exp_coeffs)
+
+      enddo
+      return
+      end
+c=================================================================================
+      recursive subroutine  evaluate_panphasia(ldata,nlev,maxdim,g,
+     &   layer_min,layer_max,indep_field,icell_name,cell_data,leg_coeff)
+      use pan_state
+      implicit none
+c---------------------------------------------------------------------------------
+c    This subroutine calculates the Legendre block coefficients for the eight child
+c    cells of an octree cell.
+c
+c----------------- Define subroutine arguments -----------------------------------
+      type(state_data), intent(inout) :: ldata
+      integer nlev,maxdim
+      integer layer_min,layer_max,indep_field
+      integer icell_name
+      real*8 leg_coeff(0:7,0:7,-1:maxdim),cell_data(0:8,0:7)
+      real*8 g(*)
+
+c----------------- Define constants using notation from appendix A of Jenkins 2013
+ 
+      real*8 a1,a2,b1,b2,b3,c1,c2,c3,c4
+
+      parameter(a1 = 0.5d0*sqrt(3.0d0),      a2 = 0.5d0)
+
+      parameter(b1 = 0.75d0,                 b2 = 0.25d0*sqrt(3.0d0))
+      parameter(b3 = 0.25d0)
+
+      parameter(c1 = sqrt(27.0d0/64.0d0),    c2 = 0.375d0)
+      parameter(c3 = sqrt(3.0d0/64.0d0),     c4 = 0.125d0)
+
+c----------------- Define octree variables --------------------------------
+
+      real*8 coeff_p000, coeff_p001, coeff_p010, coeff_p011
+      real*8 coeff_p100, coeff_p101, coeff_p110, coeff_p111
+
+      real*8 positive_octant_lc(0:7,0:1,0:1,0:1),temp_value(0:7,0:7)
+      integer i,j,ix,iy,iz
+      integer icx,icy,icz
+      integer iox,ioy,ioz
+      real*8 parity,isig
+      real*8 usually_rooteighth_factor
+c--------------------------------------------------------------------------
+
+c-------------  Set the Legendre block coefficients for the parent cell
+c               itself. These are either inherited from the octree above
+c               or set to zero depending on which levels of the octree
+c               have been selected to be populated with the octree
+c               basis functions.
+c---------------------------------------------------------------------------
+      if (nlev.ge.layer_min) then
+             coeff_p000  = leg_coeff(0,icell_name,nlev-1)
+             coeff_p001  = leg_coeff(1,icell_name,nlev-1)
+             coeff_p010  = leg_coeff(2,icell_name,nlev-1)
+             coeff_p011  = leg_coeff(3,icell_name,nlev-1)
+             coeff_p100  = leg_coeff(4,icell_name,nlev-1)
+             coeff_p101  = leg_coeff(5,icell_name,nlev-1)
+             coeff_p110  = leg_coeff(6,icell_name,nlev-1)
+             coeff_p111  = leg_coeff(7,icell_name,nlev-1)
+      else
+             coeff_p000  = 0.0d0
+             coeff_p001  = 0.0d0
+             coeff_p010  = 0.0d0
+             coeff_p011  = 0.0d0
+             coeff_p100  = 0.0d0
+             coeff_p101  = 0.0d0
+             coeff_p110  = 0.0d0
+             coeff_p111  = 0.0d0 
+      endif
+
+c   Apply layer_max and indep_field inputs ---------------------------------
+
+      if (indep_field.ne.-1) then
+         usually_rooteighth_factor = sqrt(0.125d0)
+      else
+         usually_rooteighth_factor = 0.0d0  ! This option returns only the indep field.
+      endif                                 ! For use in testing only.
+
+      if (nlev.ge.layer_max) then
+        do i=1,56
+        g(i) = 0.0d0               ! Set octree coefficients to zero as not required.
+        enddo
+      endif
+
+      if (indep_field.eq.0) then   ! Set the independent field to zero as not required.
+        do i=57,64
+        g(i) = 0.0d0
+        enddo
+      endif
+c-----------------------------------------------------------------------------
+c
+c
+c    The calculations immediately below evalute the eight Legendre block coefficients for the
+c    child cell that is furthest from the absolute coordiate origin of the octree - we call
+c    this the positive octant cell.
+c
+c    The coefficients are given by a set of matrix equations which combine the
+c    coefficients of the Legendre basis functions of the parent cell itself, with
+c    the coefficients from the octree basis functions that occupy the
+c    parent cell.   
+c
+c    The Legendre basis function coefficients of the parent cell are stored in
+c    the variables, coeff_p000 - coeff_p111 and are initialise above.
+c
+c    The coefficients of the octree basis functions are determined by the
+c    first 56 entries of the array g, which is passed down into this
+c    subroutine.
+c
+c    These two sources of information are combined using a set of linear equations.
+c    The coefficients of these linear equations are taken from the inverses or
+c    equivalently transposes of the matrices given in appendix A of Jenkins 2013.
+c    The matrices in appendix A define the PANPHASIA octree basis functions
+c    in terms of Legendre blocks.
+c
+c    All of the Legendre block functions of the parent cell, and the octree basis
+c    functions of the parent cell share one of eight distinct symmetries with respect to
+c    reflection about the x1=0,x2=0,x3=0 planes (where the origin is taken as the parent 
+c    cell centre and x1,x2,x3 are parallel to the cell edges).
+c
+c    Each function has either purely reflectional symmetry (even parity) or
+c    reflectional symmetry with a sign change (odd parity) about each of the three principal
+c    planes through the cell centre. There are therefore 8 parity types. We can label each 
+c    parity type with a binary triplet. So 000 is pure reflectional symmetry about 
+c    all of the principal planes.
+c  
+c    In the code below the parent cell Legendre block functions, and octree functions are 
+c    organised into eight groups each with eight members. Each group has a common
+c    parity type.
+c
+c    We keep the contributions of each parity type to each of the eight Legendre basis
+c    functions occupying the positive octant cell separate. Once they have all been
+c    computed, we can apply the different symmetry operations and determine the
+c    Legendre block basis functions for all eight child cells at the same time.
+c---------------------------------------------------------------------------------------
+c    000  parity
+
+      positive_octant_lc(0, 0,0,0) =  1.0d0*coeff_p000
+      positive_octant_lc(1, 0,0,0) = -1.0d0*g(1)
+      positive_octant_lc(2, 0,0,0) = -1.0d0*g(2)
+      positive_octant_lc(3, 0,0,0) =  1.0d0*g(3)
+      positive_octant_lc(4, 0,0,0) = -1.0d0*g(4)
+      positive_octant_lc(5, 0,0,0) =  1.0d0*g(5)
+      positive_octant_lc(6, 0,0,0) =  1.0d0*g(6)
+      positive_octant_lc(7, 0,0,0) = -1.0d0*g(7)
+
+c    100 parity
+
+      positive_octant_lc(0, 1,0,0) =  a1*coeff_p100  - a2*g(8)
+      positive_octant_lc(1, 1,0,0) =  g(9)
+      positive_octant_lc(2, 1,0,0) =  g(10)
+      positive_octant_lc(3, 1,0,0) = -g(11)
+      positive_octant_lc(4, 1,0,0) =  a2*coeff_p100  + a1*g(8)
+      positive_octant_lc(5, 1,0,0) = -g(12) 
+      positive_octant_lc(6, 1,0,0) = -g(13)
+      positive_octant_lc(7, 1,0,0) =  g(14)
+
+c     010 parity
+
+      positive_octant_lc(0, 0,1,0) =  a1*coeff_p010 - a2*g(15)
+      positive_octant_lc(1, 0,1,0) =  g(16) 
+      positive_octant_lc(2, 0,1,0) =  a2*coeff_p010 + a1*g(15) 
+      positive_octant_lc(3, 0,1,0) = -g(17)
+      positive_octant_lc(4, 0,1,0) =  g(18)
+      positive_octant_lc(5, 0,1,0) = -g(19)
+      positive_octant_lc(6, 0,1,0) = -g(20)
+      positive_octant_lc(7, 0,1,0) =  g(21)
+
+
+c     001 parity
+
+      positive_octant_lc(0, 0,0,1) =  a1*coeff_p001 - a2*g(22)
+      positive_octant_lc(1, 0,0,1) =  a2*coeff_p001 + a1*g(22)
+      positive_octant_lc(2, 0,0,1) =  g(23)
+      positive_octant_lc(3, 0,0,1) = -g(24)
+      positive_octant_lc(4, 0,0,1) =  g(25)
+      positive_octant_lc(5, 0,0,1) = -g(26)
+      positive_octant_lc(6, 0,0,1) = -g(27)
+      positive_octant_lc(7, 0,0,1) =  g(28)
+
+c    110 parity
+
+      positive_octant_lc(0, 1,1,0) = b1*coeff_p110 - b2*g(29) + b3*g(30) - b2*g(31)
+      positive_octant_lc(1, 1,1,0) = -g(32)
+      positive_octant_lc(2, 1,1,0) = b2*coeff_p110 - b3*g(29) - b2*g(30) + b1*g(31)
+      positive_octant_lc(3, 1,1,0) =  g(33)
+      positive_octant_lc(4, 1,1,0) = b2*coeff_p110 + b1*g(29) + b2*g(30) + b3*g(31)
+      positive_octant_lc(5, 1,1,0) =  g(34)
+      positive_octant_lc(6, 1,1,0) = b3*coeff_p110 + b2*g(29) - b1*g(30) - b2*g(31)
+      positive_octant_lc(7, 1,1,0) = -g(35) 
+
+
+c     011 parity
+
+      positive_octant_lc(0, 0,1,1) = b1*coeff_p011 - b2*g(36) + b3*g(37) - b2*g(38)
+      positive_octant_lc(1, 0,1,1) = b2*coeff_p011 - b3*g(36) - b2*g(37) + b1*g(38)
+      positive_octant_lc(2, 0,1,1) = b2*coeff_p011 + b1*g(36) + b2*g(37) + b3*g(38)
+      positive_octant_lc(3, 0,1,1) = b3*coeff_p011 + b2*g(36) - b1*g(37) - b2*g(38)
+      positive_octant_lc(4, 0,1,1) = -g(39) 
+      positive_octant_lc(5, 0,1,1) =  g(40)
+      positive_octant_lc(6, 0,1,1) =  g(41)
+      positive_octant_lc(7, 0,1,1) = -g(42)
+
+c     101 parity
+
+      positive_octant_lc(0, 1,0,1) = b1*coeff_p101 - b2*g(43) + b3*g(44) - b2*g(45)
+      positive_octant_lc(1, 1,0,1) = b2*coeff_p101 - b3*g(43) - b2*g(44) + b1*g(45) 
+      positive_octant_lc(2, 1,0,1) = -g(46) 
+      positive_octant_lc(3, 1,0,1) =  g(47)
+      positive_octant_lc(4, 1,0,1) = b2*coeff_p101 + b1*g(43) + b2*g(44) + b3*g(45)
+      positive_octant_lc(5, 1,0,1) = b3*coeff_p101 + b2*g(43) - b1*g(44) - b2*g(45)
+      positive_octant_lc(6, 1,0,1) =  g(48)
+      positive_octant_lc(7, 1,0,1) = -g(49)
+
+c     111 parity
+
+      positive_octant_lc(0, 1,1,1) = c1*coeff_p111 - c2*g(50) - c2*g(51) - c2*g(52) + c3*g(53) + c3*g(54) + c3*g(55) - c4*g(56)
+      positive_octant_lc(1, 1,1,1) = c2*coeff_p111 + c1*g(50) - c2*g(51) + c2*g(52) - c3*g(53) + c3*g(54) + c4*g(55) + c3*g(56) 
+      positive_octant_lc(2, 1,1,1) = c2*coeff_p111 + c2*g(50) + c1*g(51) - c2*g(52) - c3*g(53) - c4*g(54) + c3*g(55) - c3*g(56)
+      positive_octant_lc(3, 1,1,1) = c3*coeff_p111 - c3*g(50) - c3*g(51) + c4*g(52) - c1*g(53) - c2*g(54) - c2*g(55) - c2*g(56) 
+      positive_octant_lc(4, 1,1,1) = c2*coeff_p111 - c2*g(50) + c2*g(51) + c1*g(52) + c4*g(53) - c3*g(54) + c3*g(55) + c3*g(56)
+      positive_octant_lc(5, 1,1,1) = c3*coeff_p111 + c3*g(50) - c4*g(51) - c3*g(52) + c2*g(53) - c1*g(54) - c2*g(55) + c2*g(56)
+      positive_octant_lc(6, 1,1,1) = c3*coeff_p111 + c4*g(50) + c3*g(51) + c3*g(52) + c2*g(53) + c2*g(54) - c1*g(55) - c2*g(56)
+      positive_octant_lc(7, 1,1,1) = c4*coeff_p111 - c3*g(50) + c3*g(51) - c3*g(52) - c2*g(53) + c2*g(54) - c2*g(55) + c1*g(56)
+c--------------------------------------------------------------------------------------------
+c 
+c
+c   We now calculate the Legendre basis coefficients for all eight child cells
+c   by applying the appropriate reflectional parities to the coefficients 
+c   calculated above for the positive octant child cell.
+c
+c   See equations A2 and A3 in appendix A of Jenkins 2013.
+c
+c   The reflectional parity is given by (ix,iy,iz) loops below.
+c
+c   The (icx,icy,icz) loops below, loop over the eight child cells.
+c
+c   The positive octant child cell is given below by  (icx=icy=icz=0) or i=7.
+c
+c   The combination ix*icx +iy*icy +iz*icz is either even or odd, depending
+c   on whether the parity change is even or odd.
+c
+c   The variables iox,ioy,ioz are used to loop over the different
+c   types of Legendre basis function.
+c
+c   The combination iox*icx + ioy*icy + ioz*icz is either even and odd
+c   and identifies which coefficients keep or change sign respectively
+c   due to a pure reflection about the principal planes.
+c--------------------------------------------------------------------------------------------
+
+      do iz=0,7
+       do iy=0,7
+        temp_value(iy,iz) = 0.0d0      ! Zero temporary sums
+       enddo
+      enddo
+c--------------------------------------------------------------------------------------------
+      do iz=0,1              ! Loop over z parity (0=keep sign, 1=change sign)
+       do iy=0,1             ! Loop over y parity (0=keep sign, 1=change sign)
+        do ix=0,1            ! Loop over x parity (0=keep sign, 1=change sign)
+
+
+         do icx=0,1                      ! Loop over x-child cells
+          do icy=0,1                     ! Loop over y-child cells
+           do icz=0,1                    ! Loop over z-child cells
+
+             if (mod(ix*icx+iy*icy+iz*icz,2).eq.0) then
+                  parity = 1.0d0
+             else
+                  parity =-1.0d0
+             endif
+
+             i = 7 - 4*icx -2*icy - icz               ! Calculate which child cell this is.
+
+
+             do iox=0,1                               ! Loop over Legendre basis function type                     
+              do ioy=0,1                              ! Loop over Legendre basis function type
+               do ioz=0,1                             ! Loop over Legendre basis function type
+
+                  j = 4*iox + 2*ioy + ioz
+
+                  if (mod(iox*icx + ioy*icy + ioz*icz,2).eq.0) then
+                       isig =  parity
+                  else
+                       isig = -parity
+                  endif
+
+                  temp_value(j,i) = temp_value(j,i) + isig*positive_octant_lc(j,ix,iy,iz)
+
+               enddo
+              enddo
+             enddo
+
+           enddo   
+          enddo
+         enddo
+
+        enddo
+       enddo
+      enddo
+
+
+c   Assign values of the output variables
+
+      do i=0,7
+       do j=0,7
+         leg_coeff(j,i,nlev) = temp_value(j,i)*usually_rooteighth_factor
+         cell_data(j,i)      = leg_coeff(j,i,nlev)
+       enddo
+      enddo
+
+c   Finally set the independent field values
+    
+      cell_data(8,0) = g(57)
+      cell_data(8,1) = g(58)
+      cell_data(8,2) = g(59)
+      cell_data(8,3) = g(60)
+      cell_data(8,4) = g(61)
+      cell_data(8,5) = g(62)
+      cell_data(8,6) = g(63)
+      cell_data(8,7) = g(64)
+
+
+      return
+      end
+c=================================================================================
+      recursive subroutine reset_lecuyer_state(ldata,lev,xcursor,ycursor,zcursor)
+      use pan_state
+      implicit none
+      
+      type(state_data), intent(inout) :: ldata
+      integer lev
+      integer*8 xcursor,ycursor,zcursor
+
+c      integer indmin,indmax
+c      parameter (indmin=-1, indmax=60)
+c      integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax)
+c      save p_xcursor,p_ycursor,p_zcursor
+      integer i
+c      integer init
+c      data init/0/
+c      save init
+
+      if (ldata%reset_lecuyer_state_init.eq.0) then       ! Initialise p_cursor variables with 
+          ldata%reset_lecuyer_state_init = 1              ! negative values.
+          do i=indmin,indmax
+            ldata%p_xcursor(i) = -9999
+            ldata%p_ycursor(i) = -9999
+            ldata%p_zcursor(i) = -9999
+          enddo
+      endif
+
+      if ( (xcursor.eq.ldata%p_xcursor(lev)).and.(ycursor.eq.ldata%p_ycursor(lev)).and.
+     &      (zcursor.eq.ldata%p_zcursor(lev)+1)) then
+          ldata%p_xcursor(lev) = xcursor
+          ldata%p_ycursor(lev) = ycursor
+          ldata%p_zcursor(lev) = zcursor
+          return
+      endif
+      
+      call advance_current_state(ldata,lev,xcursor,ycursor,zcursor)
+          
+      ldata%p_xcursor(lev) = xcursor
+      ldata%p_ycursor(lev) = ycursor
+      ldata%p_zcursor(lev) = zcursor
+    
+
+      return
+      end
+c=================================================================================
+      recursive subroutine advance_current_state(ldata,lev,x,y,z)
+      use Rand
+      use pan_state
+      !use descriptor_phases
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+
+      integer lev
+      integer*8 x,y,z
+
+      integer*8 lev_range
+
+      TYPE(Rand_offset) :: offset1,offset2
+      TYPE(Rand_offset) :: offset_x,offset_y,offset_z,offset_total
+
+      integer ndiv,nrem
+      integer*8 ndiv8,nrem8
+      integer nfactor
+      parameter (nfactor=291071) ! Value unimportant except has to be > 262144
+
+
+c-----   First some error checking ------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (2)'
+
+      lev_range = 2_dint**lev
+
+
+      if ((x.lt.0).or.(x.ge.lev_range)) then 
+      print*,'x,lev,lev_range',x,lev,lev_range
+      call flush(6)
+      stop 'x out of range!'
+      endif
+      if ((y.lt.0).or.(y.ge.lev_range)) then
+      print*,'y,lev,lev_range',y,lev,lev_range       
+          stop 'y out of range!'
+      endif
+      if ((z.lt.0).or.(z.ge.lev_range)) stop 'z out of range!' 
+c----------------------------------------------------------------------------          
+c
+c Note the Rand_set_offset subroutine takes an integer*4 value
+c for the offset value. For this reason we need to use integer*4
+c values - ndiv,nrem.  As a precaution an explicit check is made
+c to be sure that these values are calculated correctly.
+c---------------------------------------------------------------------------
+
+
+      call Rand_load(ldata%current_state(lev),ldata%base_lev_start(1,lev))
+
+      if (lev.eq.0) return
+
+c     Calculate z-offset
+
+      ndiv = z/nfactor
+      nrem = z - ndiv*nfactor
+      ndiv8 = ndiv
+      nrem8 = nrem
+
+      if (ndiv8*nfactor+nrem8.ne.z)  stop 'Error in z ndiv nrem'
+
+      call Rand_set_offset(offset1,ndiv)
+      offset1 = Rand_mul_offset(offset1,nfactor)
+      call Rand_set_offset(offset2,nrem)
+      offset2 = Rand_add_offset(offset1,offset2)
+      offset_z = Rand_mul_offset(offset2,nmulti_)
+
+c     Calculate y-offset
+
+      ndiv = y/nfactor
+      nrem = y - ndiv*nfactor
+      ndiv8 = ndiv
+      nrem8 = nrem
+
+      if (ndiv8*nfactor+nrem8.ne.y) stop 'Error in y ndiv nrem'
+
+      offset1 =  Rand_mul_offset(ldata%poweroffset(lev),ndiv)
+      offset1 =  Rand_mul_offset(offset1,nfactor)
+      offset2 =  Rand_mul_offset(ldata%poweroffset(lev),nrem)
+      offset_y = Rand_add_offset(offset1,offset2)
+
+c     Calculate x-offset
+
+      ndiv = x/nfactor
+      nrem = x - ndiv*nfactor
+      ndiv8 = ndiv
+      nrem8 = nrem
+
+      if (ndiv8*nfactor+nrem8.ne.x) then
+           print*,'ndiv,nfactor,nrem,x',ndiv,nfactor,nrem,x
+           print*,'ndiv*nfactor+nrem',ndiv*nfactor+nrem
+           print*,'x-ndiv*nfactor-nrem',x-ndiv*nfactor-nrem
+           stop 'Error in x ndiv nrem'
+      endif
+
+      offset1 = Rand_mul_offset(ldata%poweroffset(2*lev),ndiv)
+      offset1 = Rand_mul_offset(offset1,nfactor)
+      offset2 = Rand_mul_offset(ldata%poweroffset(2*lev),nrem)
+      offset_x = Rand_add_offset(offset1,offset2)
+
+      offset1      = Rand_add_offset(offset_x,offset_y)
+      offset_total = Rand_add_offset(offset1, offset_z)
+ 
+      ldata%current_state(lev) = Rand_boost(ldata%current_state(lev),offset_total)
+      
+      return
+      end
+c=================================================================================
+      recursive subroutine return_gaussian_array(ldata,lev,ngauss,garray)
+      use Rand
+      use pan_state
+      implicit none 
+      type(state_data), intent(inout) :: ldata
+      integer lev,ngauss
+      real*8 garray(0:*)
+      TYPE(Rand_state) :: state
+      real*8 PI
+      parameter (PI=3.1415926535897932384d0)
+      real*8 branch
+      parameter (branch=1.d-6)
+      integer iloop
+
+      real*8 temp,mag,ang
+      integer i
+
+      if (mod(ngauss,2).ne.0) 
+     & stop 'Error in return_gaussian_array - even pairs only'
+
+c   First obtain a set of uniformly distributed pseudorandom numbers
+c   between 0 and 1. The method used is described in detail in 
+c   appendix B of Jenkins 2013.
+
+      do i=0,ngauss-1
+       call Rand_real(garray(i),ldata%current_state(lev))
+
+       if (garray(i).lt.branch) then
+          garray(i) = branch
+          state = Rand_boost(ldata%current_state(lev),ldata%superjump)
+          iloop = 0
+ 10       continue
+          call Rand_real(temp,state)
+          iloop = iloop+1
+          if (temp.lt.branch) then
+               garray(i) = garray(i)*branch
+               state = Rand_boost(state,ldata%superjump)
+               if (iloop.gt.100) then
+               print*,'Too may iterations in return_gaussian_array!'
+               call flush(6)
+               stop
+               endif               
+               goto 10
+          else
+               garray(i) = garray(i)*temp
+          endif
+       endif
+      enddo
+
+c     Apply Box-Muller transformation to create pairs of Gaussian
+c     pseudorandom numbers.
+
+      do i=0,ngauss/2-1
+
+       mag = sqrt(-2.0d0*log(garray(2*i)))
+       ang = 2.0d0*PI*garray(2*i+1)
+
+       garray(2*i)   = mag*cos(ang)
+       garray(2*i+1) = mag*sin(ang)
+ 
+      enddo
+      end
+c=================================================================================
+      recursive subroutine parse_descriptor(string,l,ix,iy,iz,side1,side2,side3,check_int,name)
+      implicit none
+      integer nchar
+      parameter(nchar=100)
+      character*100  string
+      integer*4 l,side1,side2,side3,ierror
+      integer*8 ix,iy,iz
+      integer*8 check_int
+      character*20 name
+
+
+      integer i,ip,iq,ir
+
+      ierror = 0
+
+      ip = 1
+      do while (string(ip:ip).eq.' ')
+       ip = ip + 1
+      enddo
+
+      if (string(ip:ip+7).ne.'[Panph1,') then
+           ierror = 1
+           print*,string(ip:ip+7)
+           goto 10
+      endif
+
+      ip = ip+8
+      if (string(ip:ip).ne.'L') then
+          ierror = 2
+          goto 10 
+      endif
+
+      ip = ip+1
+
+      iq  = ip + scan( string(ip:nchar),',') -1
+
+      if (ip.eq.iq) then
+          ierror = 3
+          goto 10
+      endif
+
+
+      read (string(ip:iq),*) l
+
+      ip = iq+1
+      
+      if (string(ip:ip).ne.'(') then
+         ierror = 4
+         goto 10
+      endif
+
+      ip = ip+1
+
+      iq = ip + scan( string(ip:nchar),')') -2
+
+      read(string(ip:iq),*) ix,iy,iz
+
+      ip = iq+2
+      
+      if (string(ip:ip).ne.',') then
+         ierror = 5
+         goto 10
+      endif
+
+      ip = ip+1
+      if ((string(ip:ip).ne.'S').and.(string(ip:ip).ne.'D')) then
+         ierror = 6
+         goto 10
+      endif
+
+      if (string(ip:ip).eq.'S') then
+        ip = ip + 1
+        iq = ip + scan( string(ip:nchar),',') -2
+        read (string(ip:iq),*) side1
+        side2 = side1
+        side3 = side1
+        iq = iq+1
+        if (string(iq:iq+2).ne.',CH') then
+           print*,string(ip:iq),string(iq:iq+2)
+           ierror = 6
+           goto 10
+        endif
+      else
+        ip = ip + 1
+        if (string(ip:ip).ne.'(') then
+           ierror = 7
+           goto 10
+        endif
+
+
+        ip = ip + 1
+        iq = ip + scan( string(ip:nchar),')') -2
+        read (string(ip:iq),*) side1,side2,side3
+
+        iq = iq + 1
+
+        if (string(iq:iq).ne.')') then
+           ierror = 8
+           goto 10
+        endif
+
+        iq = iq + 1
+
+         if (string(iq:iq+2).ne.',CH') then
+            ierror = 9
+            goto 10
+        endif
+
+      endif
+
+      ip = iq + 3
+
+      iq = ip + scan( string(ip:nchar),',') -2
+
+      read (string(ip:iq),*) check_int
+
+      ip = iq + 1
+
+      if (string(ip:ip).ne.',') then
+          ierror = 10
+          goto 10
+      endif
+
+      ip = ip+1
+
+      ir = ip + scan( string(ip:nchar),']') -2
+
+      iq = min(ir,ip+19)
+
+      do i=1,20
+        name(i:i)=' '
+      enddo
+
+      do i=ip,iq
+        name(i-ip+1:i-ip+1) = string(i:i)
+      enddo
+
+      iq = ir + 1
+
+      if (string(iq:iq).ne.']') then
+          ierror = 11
+          goto 10
+      endif
+
+
+ 10   continue
+
+      if (ierror.eq.0) return
+
+      print*,'Error reading panphasian descriptor. Error number:',ierror
+      stop
+
+      return
+      end
+c=================================================================================
+      recursive subroutine compose_descriptor(l,ix,iy,iz,side,check_int,name,string)
+      implicit none
+      integer nchar
+      parameter(nchar=100)
+      character*100,intent(out)::string
+      character*20 name
+      integer*4 l,ltemp
+      integer*8 side
+      integer*8 ix,iy,iz
+      integer*8 check_int
+
+      character*50 temp1,temp2,temp3,temp4,temp5,temp6
+      integer lnblnk
+
+      integer ip1,ip2,ip3,ip4,ip5,ip6
+      
+      ltemp = l
+
+ 5    continue
+      if ((mod(ix,2).eq.0).and.(mod(iy,2).eq.0).and.(mod(iz,2).eq.0).and.(mod(side,2).eq.0)) then
+        ix = ix/2
+        iy = iy/2
+        iz = iz/2
+        side = side/2
+        ltemp = ltemp-1
+        goto 5
+      endif
+
+
+      write (temp1,*) ltemp
+      ip1= scan(temp1,'0123456789')
+      write (temp2,*) ix
+      ip2= scan(temp2,'0123456789')
+      write (temp3,*) iy
+      ip3= scan(temp3,'0123456789')
+      write (temp4,*) iz
+      ip4= scan(temp4,'0123456789')
+      write (temp5,*) side
+      ip5= scan(temp5,'0123456789')
+      write (temp6,*) check_int
+      ip6= scan(temp6,'-0123456789')
+
+
+      string='[Panph1,L'//temp1(ip1:lnblnk(temp1))//',('//temp2(ip2:lnblnk(temp2))
+     &   //','//temp3(ip3:lnblnk(temp3))//','//temp4(ip4:lnblnk(temp4))//'),S'
+     &   // temp5(ip5:lnblnk(temp5))//',CH'//temp6(ip6:lnblnk(temp6))//
+     &  ','//name(1:lnblnk(name))//']'
+ 
+      return 
+
+      end
+c=================================================================================
+      recursive subroutine validate_descriptor(ldata,string,MYID,check_number)
+      use pan_state
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+      character*100 string
+      integer*8 check_number
+      integer MYID
+
+      character*20 phase_name
+      integer*4 lev
+
+      integer*8 ix_abs,iy_abs,iz_abs
+      integer*4 ix_base,iy_base,iz_base
+      
+
+      integer*8 xval,yval,zval
+      integer val_state(5)
+
+      TYPE(Rand_state) :: state
+
+      real*8 rand_num
+      integer*8 mconst,check_total,check_rand
+      parameter(mconst = 2147483647_Dint)
+      integer ascii_list(0:255)
+      integer*8 maxco
+      integer i
+      integer*8 ii
+      integer lnblnk
+      
+
+
+      call parse_descriptor(string,lev,ix_abs,iy_abs,iz_abs,
+     &                  ix_base,iy_base,iz_base,check_rand,phase_name)
+
+c-------------------------------------------------------------------------
+c    Some basic checking
+c-------------------------------------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) then
+            print*,'lev,maxlev',lev,maxlev_
+            call flush(6)
+            stop 'Level out of range! (3)'
+      endif
+
+      if ((mod(ix_abs,2).eq.0).and.(mod(iy_abs,2).eq.0).and.(mod(iz_abs,2).eq.0).and.
+     & (mod(ix_base,2).eq.0).and.(mod(iy_base,2).eq.0).and.(mod(iz_base,2).eq.0)) 
+     &    stop 'Parameters not at lowest level'
+
+
+      maxco = 2_dint**lev
+
+      if (ix_abs.lt.0) stop 'Error: ix_abs negative (2)'
+      if (iy_abs.lt.0) stop 'Error: iy_abs negative (2)'
+      if (iz_abs.lt.0) stop 'Error: iz_abs negative (2)'
+
+
+      if (ix_abs+ix_base.ge.maxco)
+     &   stop 'Error: ix_abs + ix_per out of range.'
+      if (iy_abs+iy_base.ge.maxco) 
+     &   stop 'Error: iy_abs + iy_per out of range.'
+      if (iz_abs+iz_base.ge.maxco) 
+     &   stop 'Error: iz_abs + iz_per out of range.'
+
+      check_total = 0
+
+      call initialise_panphasia(ldata)
+c    First corner
+      xval = ix_abs + ix_base - 1
+      yval = iy_abs
+      zval = iz_abs
+      call advance_current_state(ldata,lev,xval,yval,zval)
+      call Rand_real(rand_num,ldata%current_state(lev))
+      call Rand_save(val_state,ldata%current_state(lev))
+      check_total = check_total + val_state(5)
+      if (MYID.eq.0) print*,'--------------------------------------'
+      if (MYID.eq.0)  print*,'X-corner rand = ',rand_num
+      if (MYID.eq.0) print*,'State:',val_state
+c    Second corner
+      xval = ix_abs
+      yval = iy_abs + iy_base - 1
+      zval = iz_abs
+      call advance_current_state(ldata,lev,xval,yval,zval)
+      call Rand_real(rand_num,ldata%current_state(lev))
+      call Rand_save(val_state,ldata%current_state(lev))
+      check_total = check_total + val_state(5)
+      if (MYID.eq.0)  print*,'Y-corner rand = ',rand_num
+      if (MYID.eq.0) print*,'State:',val_state
+c    Third corner
+      xval = ix_abs
+      yval = iy_abs
+      zval = iz_abs + iz_base - 1
+      call advance_current_state(ldata,lev,xval,yval,zval)
+      call Rand_real(rand_num,ldata%current_state(lev))
+      call Rand_save(val_state,ldata%current_state(lev))
+      check_total = check_total + val_state(5)
+      if (MYID.eq.0)  print*,'z-corner rand = ',rand_num
+      if (MYID.eq.0) print*,'State:',val_state
+      if (MYID.eq.0) print*,'--------------------------------------'
+
+c     Now encode the name.  An integer for each ascii character is generated
+c     starting from the state which gives r0 - the first random number in
+c     Panphasia.   The integer is in the range 0 - m-1.  
+c     After making the list, then loop over non-blank characters
+c     in the name and take the ascii value, and sum the associated numbers.
+c     To avoid simple anagrams giving the same score, weight the integer
+c     by position in the string.  Finally take mod m  - to give the
+c     check number.  
+
+      call Rand_load(state,ldata%base_state)
+
+      do i=0,255
+      call Rand_real(rand_num,state)
+      call Rand_save(val_state,state)
+      ascii_list(i) = val_state(5)
+      enddo
+
+
+
+      do ii=1,lnblnk(phase_name)
+       check_total = check_total + ii*ascii_list(iachar(phase_name(ii:ii)))
+      enddo
+
+
+      check_total = mod(check_total,mconst)
+      if (check_rand.eq.-999) then         ! override the safety check number.
+            check_number = check_total
+            return
+      else
+          if (check_rand.ne.check_total) then
+           print*,'Inconsistency in the input panphasia descriptor ',MYID
+           print*,'Check_rand  = ',check_rand
+           print*,'val_state(5) =',val_state(5)
+           print*,'xval,yval,zval',xval,yval,zval
+           print*,'lev_val =  ',lev
+           call flush(6)
+           stop
+          endif
+      endif
+
+
+      return
+      end
+c=================================================================================
+      recursive subroutine generate_random_descriptor(ldata,string)
+      use Rand
+      use pan_state
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      character*100  string
+      character*100  instring
+      character*20   name
+      integer*4 unix_timestamp
+
+      real*8 lbox
+      real*8 lpanphasia
+      parameter (lpanphasia = 25000000.0)  ! Units of Mpc/h
+      integer level
+      integer*8 cell_dim
+      integer val_state(5)
+
+      TYPE(Rand_state) :: state
+      TYPE(Rand_offset) :: offset
+
+      real*8 rand_num1,rand_num2
+      integer*8 mconst,check_int
+      parameter(mconst = 2147483647_Dint)
+      integer*8 mfac,imajor,iminor
+      parameter(mfac=33554332_Dint)
+      integer ascii_list(0:255)
+      integer i,lnblnk
+      integer*8 ii
+      integer mult
+
+      integer*8 ixco,iyco,izco,irange
+
+      print*,'___________________________________________________________'
+      print*
+      print*,'            Generate a random descriptor                   '
+      print*
+      print*,'The code uses the time (the unix timestamp) plus some extra '
+      print*,'information personal to the user to choose a random region  '
+      print*,'within PANPHASIA.  The user must also specify the side length'
+      print*,'of the cosmological volume. The code assumes that the whole of'
+      print*,'PANPHASIA is 25000 Gpc/h on a side and selects an appropriate '
+      print*,'level in the octree for the descriptor.  '
+      print*,'Assuming this scaling the small scale power is defined down '
+      print*,'to a mass scale of around 10^{-12} solar masses.'
+      print*      
+      print*,'The user must also specify a human readable label for the '
+      print*,'descriptor of less than 21 characters.'
+      print*,'___________________________________________________________'
+      print*
+      print*,'Press return to continue '
+      read (*,*) 
+      print*
+      print*,'___________________________________________________________'
+      print*,'Enter the box side-length in Mpc/h units'
+      read (*,*) lbox
+      print*,'___________________________________________________________'
+      print*
+      print*
+ 5    continue
+      print*,'Enter up to 20 character name to label the descriptor (no spaces)'
+      read (*,'(a)') name
+      if ((len_trim(instring).lt.21).or.(scan(name,' ').le.len_trim(name))) goto 5
+      print*,'___________________________________________________________'
+      print*
+      print* 
+      print*,'___________________________________________________________'
+      print*,'The phases for the simulation are described by whole octree '
+      print*,'cells. Enter an odd integer that defines the number of cells '
+      print*,'you require in one dimension.  Choose this number carefully  '
+      print*,'as it will limit the possible 1-D sizes of  the of the Fourier '
+      print*,'transforms that can be used to make initial conditions to a product '
+      print*,'of this integer times any power of two. In which case the only'
+      print*,'choice is 1.)'
+      print*,'(I would recommend 3 unless the initial condition code is'
+      print*,'incapable of using grid sizes that are not purely powers of two.'
+      print*,'___________________________________________________________'
+      print*
+ 7    continue
+      print*,'Enter number of octree cells on an edge (positive odd number only) '
+      read (*,*) cell_dim
+      if ((cell_dim.le.0).or.(mod(cell_dim,2).eq.0)) goto 7
+      print*,'___________________________________________________________' 
+      call system('date +%s>tempfile_42526037646')
+      open(16,file='tempfile_42526037646',status='old')
+      read (16,*) unix_timestamp
+      close(16)
+      call system('/bin/rm tempfile_42526037646') 
+
+      print*,'Unix_timestamp determined. Value: ',unix_timestamp
+      print*,'___________________________________________________________'
+      print*
+      print*
+      print*
+      print*,'___________________________________________________________'
+      print*,'The code has just read the unix timestamp and will use this'
+      print*,'to help choose a random region in PANPHASIA.  Although it is'
+      print*,'perhaps unlikely that someone else is also running this code at '
+      print*,'the same time to the nearest second, to make it more likely'
+      print*,' still that the desciptor to be generated is unique'
+      print*,'please enter your name or some other piece of information'
+      print*,'below that you think is unlikely to be used by anyone else'
+      print*,'___________________________________________________________'
+
+      print*
+
+ 10   continue
+      print*,'Please enter your name (a minimum of six characters)'
+      read (*,'(a)') instring                         !'
+      if (len_trim(instring).lt.6) goto 10
+
+      level =  int(log10(dble(cell_dim)*lpanphasia/lbox)/log10(2.0d0))
+
+      if (level.gt.50) stop 'level >50 '
+
+
+
+c      'd' lines allow the generation of a large set of
+c       descriptors. Use to check that they are randomly
+c       positioned over the available volume.
+
+
+c    First use the unix timestamp to initialises the
+c    random generator.
+
+      call Rand_seed(state,unix_timestamp)
+      
+      call Rand_save(ldata%base_state,state)
+     
+
+c   First generate an integer from the user data.
+      call Rand_load(state,ldata%base_state)
+
+      do i=0,255
+      call Rand_real(rand_num1,state)
+      call Rand_save(val_state,state)
+      ascii_list(i) = val_state(5)
+      enddo
+
+      call Rand_set_offset(offset,1)
+
+      do ii=1,lnblnk(instring)
+       mult = mod(ii*ascii_list(iachar(instring(ii:ii))),mconst)
+       offset =  Rand_mul_offset(offset,mult)
+      enddo
+
+      call Rand_load(state,ldata%base_state)
+      state = Rand_boost(state,offset)          ! Starting point for choosing location.
+
+ 20   continue
+
+      irange = 2_Dint**level
+      imajor = irange/mfac
+      iminor = mod(irange,mfac)
+
+      call Rand_real(rand_num1,state)
+      call Rand_real(rand_num2,state)
+
+      ixco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor)
+
+      if (ixco+cell_dim.ge.irange) goto 20      ! Invalid descriptor
+
+      call Rand_real(rand_num1,state)
+      call Rand_real(rand_num2,state)
+
+      iyco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor)
+
+      if (iyco+cell_dim.ge.irange) goto 20      ! Invalid descriptor
+
+      call Rand_real(rand_num1,state)
+      call Rand_real(rand_num2,state)
+
+      izco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor)
+
+      if (izco+cell_dim.ge.irange) goto 20      ! Invalid descriptor
+
+
+c     Value of the check digit is not known. Use validate_descriptor to compute it.
+
+      check_int = -999  ! Special value required to make validate_descriptor 
+                        ! return the check digit.
+
+      call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string)
+
+      call validate_descriptor(ldata,string,-1,check_int)
+
+      call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string)
+
+
+      return
+      end
+c=================================================================================
+      recursive subroutine demo_basis_function_allocator
+
+      implicit none
+      integer nmax
+      parameter (nmax=10)
+
+      integer*4 wn_level(nmax)
+
+      integer*8 ix_abs(nmax),iy_abs(nmax),iz_abs(nmax)
+      integer*8 ix_per(nmax),iy_per(nmax),iz_per(nmax)
+      integer*8 ix_rel(nmax),iy_rel(nmax),iz_rel(nmax)
+      integer*8 ix_dim(nmax),iy_dim(nmax),iz_dim(nmax)
+
+      integer ix,iy,iz,nref
+      integer layer_min,layer_max,indep_field
+
+
+      integer*8 itot_int,itot_ib
+
+      integer inv_open
+
+c      Assign some trial values
+
+      nref = 3
+      inv_open=9
+
+      wn_level(1) = 22
+
+      ix_abs(1) = 2000000
+      iy_abs(1) = 1500032
+      iz_abs(1) = 2500032
+
+      ix_per(1) = 768
+      iy_per(1) = 768
+      iz_per(1) = 768
+
+      ix_rel(1) = 0
+      iy_rel(1) = 0
+      iz_rel(1) = 0
+
+      ix_dim(1) = 768
+      iy_dim(1) = 768
+      iz_dim(1) = 768
+
+
+      wn_level(2) = 23
+
+      ix_abs(2) = 4000000
+      iy_abs(2) = 3000064
+      iz_abs(2) = 5000064
+
+      ix_per(2) = 1536
+      iy_per(2) = 1536
+      iz_per(2) = 1536
+
+      ix_rel(2) = 256
+      iy_rel(2) = 16
+      iz_rel(2) = 720
+
+      ix_dim(2) = 768
+      iy_dim(2) = 768
+      iz_dim(2) = 768
+
+
+      wn_level(3) = 24
+
+      ix_abs(3) = 8000000
+      iy_abs(3) = 6000128
+      iz_abs(3) = 10000128
+
+      ix_per(3) = 3072
+      iy_per(3) = 3072
+      iz_per(3) = 3072
+
+      ix_rel(3) = 896
+      iy_rel(3) = 432
+      iz_rel(3) = 1840
+
+      ix_dim(3) = 768
+      iy_dim(3) = 768
+      iz_dim(3) = 768
+
+
+      itot_int = 0
+      itot_ib  = 0
+
+
+
+
+      open(10,file='ascii_dump_r1',status='unknown')
+
+      ix=320
+      do iy=0,767
+       do iz=0,767
+        call layer_choice(ix,iy,iz,1,nref,ix_abs,iy_abs,iz_abs,
+     &   ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,
+     &   wn_level,inv_open,layer_min,layer_max,indep_field) 
+        write(10,*) iy,iz,layer_min,layer_max,indep_field
+       enddo
+      enddo
+      close(10)
+
+      open(10,file='ascii_dump_r2',status='unknown')
+
+      ix=384
+      do iy=0,767
+       do iz=0,767
+        call layer_choice(ix,iy,iz,2,nref,ix_abs,iy_abs,iz_abs,
+     &   ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,
+     &   wn_level,inv_open,layer_min,layer_max,indep_field) 
+        write(10,*) iy,iz,layer_min,layer_max,indep_field
+       enddo
+      enddo
+      close(10)
+
+      open(10,file='ascii_dump_r3',status='unknown')
+
+      ix=384
+      do iy=0,767
+       do iz=0,767
+        call layer_choice(ix,iy,iz,3,nref,ix_abs,iy_abs,iz_abs,
+     &   ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,
+     &   wn_level,inv_open,layer_min,layer_max,indep_field) 
+        write(10,*) iy,iz,layer_min,layer_max,indep_field
+       enddo
+      enddo
+      close(10)
+      end
+c=================================================================================    
+      recursive subroutine layer_choice(ix0,iy0,iz0,iref,nref,
+     &  ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &  ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,
+     &  wn_level,x_fact,layer_min,layer_max,indep_field)
+      implicit none
+
+      integer ix0,iy0,iz0,iref,nref,isize,ibase
+      integer ix,iy,iz,irefplus
+      integer ione
+
+      integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref)
+      integer*8 ix_per(nref),iy_per(nref),iz_per(nref)
+      integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref)
+      integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref)
+
+      integer wn_level(nref)
+      integer layer_min,layer_max,indep_field,x_fact
+      integer idebug
+
+
+      integer interior,iboundary
+
+      if (iref.eq.9999) then
+         idebug = 1
+      else
+         idebug = 0
+      endif
+
+      ione =  1
+
+      irefplus = min(iref+1,nref)
+
+      if (nref.eq.1) then            ! Deal with simplest case
+         layer_min = 0
+         layer_max = wn_level(1)
+         indep_field  = 1
+         if (idebug.eq.1) print*,'return 1'
+         return
+      endif 
+
+c-----------  Case of the top periodic refinement.  For this refinement layer_min=0 as
+c-----------  all the larger basis functions must be included.  By default layer_max
+c-----------  is set to wn_level(1) so all basis functions are included. A check is
+c-----------  made to determine if the lowest basis function can be included in the
+c-----------  next refinement. If it can the same process is repeated for the next
+c-----------  largest basis function and this is repeated until a failure occurs.
+
+      if ((iref.eq.1).and.(nref.gt.1)) then
+         ibase = 1
+ 10      continue
+         
+         ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref)
+         iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref)
+         iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref)
+         isize = ishft(ione,ibase)
+
+         call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level,
+     &   ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &   ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact,
+     &   interior,iboundary) 
+
+         if ((interior.eq.1).and.(iboundary.eq.1)) then
+            ibase = ibase + 1
+            goto 10
+         endif
+
+           layer_min = 0
+           layer_max = wn_level(iref) - ibase + 1
+         if (layer_max.ne.wn_level(iref)) then
+           indep_field = 0
+         else
+           indep_field = 1
+         endif
+
+         if (idebug.eq.1) then
+         print*,'iref,wn_level(iref)',iref,wn_level(iref)
+         print*,'Return 2',layer_min,layer_max,indep_field
+         endif
+
+         return
+      endif
+c------------------------------------------------------------------------------------------
+c------------------------------------------------------------------------------------------
+
+
+c-----------  For second or higher refinement determine layer_min by reference 
+c-----------  to itself.  In this case the loop continues until a basis function
+c------------ is found which fits in a larger refinement
+   
+         ibase = 1
+
+ 20      continue
+
+
+         ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref)
+         iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref)
+         iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref)
+         isize = ishft(ione,ibase)
+
+         call inref(ix,iy,iz,isize,iref,iref,nref,wn_level,
+     &   ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &   ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact,
+     &   interior,iboundary) 
+
+         if ((interior.eq.1).and.(iboundary.eq.1)) then
+            ibase = ibase + 1
+            goto 20
+         endif
+
+         layer_min = wn_level(iref) - max(ibase-2,0)         ! Take last suitable refinement
+
+
+c-----------  For an intermediate refinement define layer_max by reference to
+c-----------  the next refinement
+
+         if (iref.lt.nref) then
+         ibase = 1
+
+ 30          continue
+
+            ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref)
+            iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref)
+            iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref)
+            isize = ishft(ione,ibase)
+
+            call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level, 
+     &      ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &      ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact,
+     &      interior,iboundary) 
+
+            if ((interior.eq.1).and.(iboundary.eq.1)) then
+               ibase = ibase + 1
+               goto 30
+            endif
+
+            layer_max = wn_level(iref) - ibase + 1
+
+            if (layer_min.eq.wn_level(iref)) then
+               indep_field = 1
+            else
+               indep_field = 0
+            endif
+         else
+            layer_max = wn_level(iref)
+            indep_field  = 1
+         endif
+
+         if (idebug.eq.1) then
+           print*,'Return 3'
+           print*,'layer_min,layer_max,indep_field',layer_min,layer_max,indep_field
+           print*,'interior,iboundary',interior,iboundary
+           print*,'ibase = ',ibase
+           print*,'iref,nref,wn_level(iref)',iref,nref,wn_level(iref)
+         endif
+
+
+         return
+     
+      end
+
+
+
+
+c   The function takes a given basis function specified by a corner ixc,iyc,izc
+c   and a size isz at level wn_c in the oct-tree and returns two integer values.
+c   (i)  interior:     
+c                  Value 1 if the basis function is completely within the given
+c                  refinement.
+c
+c                  Value 0 if the basis function is without the refinement, or
+c                  overlaps the edges of the refinement, or the edges of the
+c                  primary white noise patch.
+c
+c   (ii) iboundary:
+c                  Value 1 if the basis function is sufficiently far from the
+c                  refinement boundary.
+c
+c                  Value 0 otherwise.
+c   The given refinement is defined at level wn_r in the oct-tree and by the variables
+c   (ix_rel,iy_rel,iz_rel) which give the location of the refinement relative to
+c   corner of the white noise patch, (ix_per,iy_per,iz_per) which define the
+c   periodicity of the white noise patch, and (ix_dim,iy_dim,iz_dim) which
+c   define the size of the refinement.
+c
+c
+c
+c=================================================================================
+      recursive subroutine inref(ixc,iyc,izc,isz,ir1,ir2,nref,wn_level,
+     &   ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &   ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact,
+     &   interior,iboundary)
+      implicit none
+
+      integer nref
+      integer ixc,iyc,izc,isz,ir1,ir2
+      integer wn_level(nref)
+      integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref)
+      integer*8 ix_per(nref),iy_per(nref),iz_per(nref)
+      integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref)
+      integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref)
+      integer interior, iboundary
+      integer x_fact
+     
+      integer*8 ixco,iyco,izco,isize
+      integer*8 ixref0,iyref0,izref0
+      integer*8 ixref1,iyref1,izref1
+      integer*8 idist
+
+      integer delta_wn
+
+c   Error checking
+      if (ir2.lt.ir1) stop 'ir2<ir1'
+      if ((ir1.lt.1).or.(ir2.gt.nref)) 
+     &  stop 'Either/or ir1,ir2 out of range'
+
+c  First copy coordinates to integer*8 variables
+
+      ixco = ixc
+      iyco = iyc
+      izco = izc
+      isize= isz
+
+      delta_wn = wn_level(ir2)-wn_level(ir1)
+
+c  Now translate coordinates from refinement ir1 to ir2 and express relative
+c  to the origin of refinement 2.
+
+      ixco =     ishft(ix_abs(ir1)+ix_rel(ir1)+ixco,delta_wn)-ix_abs(ir2)-ix_rel(ir2)
+      iyco =     ishft(iy_abs(ir1)+iy_rel(ir1)+iyco,delta_wn)-iy_abs(ir2)-iy_rel(ir2)
+      izco =     ishft(iz_abs(ir1)+iz_rel(ir1)+izco,delta_wn)-iz_abs(ir2)-iz_rel(ir2)
+      isize=     ishft(isize,delta_wn)
+
+      ixref0 = mod(ix_per(ir2) + ixco, ix_per(ir2))
+      iyref0 = mod(iy_per(ir2) + iyco, iy_per(ir2))
+      izref0 = mod(iz_per(ir2) + izco, iz_per(ir2))
+
+      if ((ixref0.ge.ix_dim(ir2)).or.(iyref0.ge.iy_dim(ir2)).or.(izref0.ge.iz_dim(ir2))) then !
+           interior =  0
+          iboundary =  0
+          return          ! The basis function is not inside the refinement
+      endif
+
+      ixref1 = mod(ix_per(ir2) + ixco + isize, ix_per(ir2))
+      iyref1 = mod(iy_per(ir2) + iyco + isize, iy_per(ir2))
+      izref1 = mod(iz_per(ir2) + izco + isize, iz_per(ir2))
+
+      if ((ixref1.ge.ix_dim(ir2)).or.(iyref1.ge.iy_dim(ir2)).or.(izref1.ge.iz_dim(ir2))) then ! Location not in refinement
+           interior =  0
+          iboundary =  0
+          return          ! The basis function is not inside the refinement
+      endif
+
+c     The basis function is within the refinement. Now calculate the 
+c     minimum perpendicular distance of the basis function from the 
+c     edge of the refinement.
+
+
+      idist = min(ixref0,ix_dim(ir2)-ixref1,iyref0,iy_dim(ir2)-iyref1, izref0,iz_dim(ir2)-izref1)
+
+      if (idist.gt.x_fact*isize) then  
+          iboundary = 1                ! Sufficiently far from the boundary
+      else
+          iboundary = 0
+      endif
+
+c  Final check - does the basis function reside entirely in the white noise patch.
+
+      ixref0 = mod(ix_rel(ir2)+ixco       ,ix_per(ir2))
+      ixref1 = mod(ix_rel(ir2)+ixco +isize,ix_per(ir2))
+
+      iyref0 = mod(iy_rel(ir2)+iyco       ,iy_per(ir2))
+      iyref1 = mod(iy_rel(ir2)+iyco +isize,iy_per(ir2))
+
+      izref0 = mod(iz_rel(ir2)+izco       ,iz_per(ir2))
+      izref1 = mod(iz_rel(ir2)+izco +isize,iz_per(ir2))
+
+      if ((ixref1.le.ixref0).or.(iyref1.le.iyref0).or.(izref1.le.izref0)) then
+         interior  = 0
+         iboundary = 0
+         return          ! Basis function not completely in the refinement:
+      endif              ! crosses white noise patch boundary.
+      
+      interior = 1    ! Basis function is completely within the refinement
+
+      return   
+      end
+c==========================================================================================
+      recursive subroutine set_local_box(ldata,lev,ix_abs,iy_abs,iz_abs,
+     & ix_per,iy_per,iz_per, ix_rel,iy_rel,iz_rel,wn_level_base,check_rand,phase_name,MYID)
+      use pan_state
+      !use descriptor_phases
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+      !integer  layer_min,layer_max,indep_field
+      !common /oct_range/  layer_min,layer_max,indep_field
+
+
+      integer lev
+      integer*8 ix_abs,iy_abs,iz_abs
+      integer*8 ix_per,iy_per,iz_per
+      integer*8 ix_rel,iy_rel,iz_rel
+      integer*8 xorigin,yorigin,zorigin
+      integer wn_level_base
+      integer*8 check_rand
+      character*20 phase_name
+      integer MYID
+      integer*8 maxco
+      integer i
+      integer px,py,pz
+
+      integer*8 xval,yval,zval,val_side
+      integer lev_val
+      character*100 outstring
+      integer lnblnk
+      integer*8 mconst
+      parameter(mconst = 2147483647_Dint)
+c-------------------------------------------------------------------------
+
+      call initialise_panphasia(ldata)
+
+c-------------------------------------------------------------------------
+c    Error checking
+c-------------------------------------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (4)'
+
+      maxco = 2_dint**lev
+
+      if (ix_abs.lt.0) stop 'Error: ix_abs negative (3)'
+      if (iy_abs.lt.0) stop 'Error: iy_abs negative (3)'
+      if (iz_abs.lt.0) stop 'Error: iz_abs negative (3)'
+
+      if (ix_rel.lt.0) stop 'Error: ix_rel negative (2)'
+      if (iy_rel.lt.0) stop 'Error: iy_rel negative (2)'
+      if (iz_rel.lt.0) stop 'Error: iz_rel negative (2)'
+
+      if (ix_abs+ix_rel.ge.maxco)
+     &   stop 'Error: ix_abs + ix_rel out of range. (2)'
+      if (iy_abs+iy_rel.ge.maxco) 
+     &   stop 'Error: iy_abs + iy_rel out of range. (2)'
+      if (iz_abs+iz_rel.ge.maxco) 
+     &   stop 'Error: iz_abs + iz_rel out of range. (2)'
+c-----------------------------------------------------------------------------
+c  To allow the local box to wrap around, if needed, define a series of eight
+c  'origins'.  For many purposes (ix,iy,iz) = (0,0,0) is the only origin needed.
+c-----------------------------------------------------------------------------
+
+      do px=0,1
+       do py=0,1
+        do pz=0,1
+
+         xorigin = max(0,( ix_abs + ix_rel - px*ix_per )/2)
+         yorigin = max(0,( iy_abs + iy_rel - py*iy_per )/2)
+         zorigin = max(0,( iz_abs + iz_rel - pz*iz_per )/2)
+
+         ldata%ixshift(px,py,pz) = max(0, ix_abs + ix_rel -px*ix_per) - 2*xorigin
+         ldata%iyshift(px,py,pz) = max(0, iy_abs + iy_rel -py*iy_per) - 2*yorigin
+         ldata%izshift(px,py,pz) = max(0, iz_abs + iz_rel -pz*iz_per) - 2*zorigin
+
+
+c        Store box details:  store the positions at level lev-1
+  
+
+         ldata%xorigin_store(px,py,pz) = xorigin
+         ldata%yorigin_store(px,py,pz) = yorigin
+         ldata%zorigin_store(px,py,pz) = zorigin
+
+        enddo
+       enddo
+      enddo
+
+      ldata%lev_common = lev
+
+
+      ldata%ix_abs_store = ix_abs
+      ldata%iy_abs_store = iy_abs
+      ldata%iz_abs_store = iz_abs
+
+      ldata%ix_per_store = ix_per
+      ldata%iy_per_store = iy_per
+      ldata%iz_per_store = iz_per
+
+      ldata%ix_rel_store = ix_rel
+      ldata%iy_rel_store = iy_rel
+      ldata%iz_rel_store = iz_rel
+
+c------  Now validate the panphasian descriptor ---------------------------------
+c-----   Use lowest level possible
+      lev_val = wn_level_base
+      xval = ix_abs/2_dint**(lev-lev_val)
+      yval = iy_abs/2_dint**(lev-lev_val)
+      zval = iz_abs/2_dint**(lev-lev_val)
+      val_side = ix_per/2_dint**(lev-lev_val)
+      call compose_descriptor(lev_val,xval,yval,zval,val_side,check_rand,phase_name,outstring)
+      print*,'blabla: ',outstring
+      call validate_descriptor(ldata,outstring,-1,check_rand)
+c--------------------------------------------------------------------------------         
+ 
+c  Reset all cursor values to negative numbers.
+
+      do i=0,maxdim_
+       ldata%xcursor(i) = -999
+       ldata%ycursor(i) = -999
+       ldata%zcursor(i) = -999
+      enddo
+
+      if (MYID.lt.1) then
+         print*,'----------------------------------------------------------'
+         print*,'Successfully initialised Panphasia box at level ',lev
+         write (6,105) ix_abs,iy_abs,iz_abs
+         write (6,106) ix_rel,iy_rel,iz_rel
+         write (6,107) ix_per,iy_per,iz_per
+         write (6,*)  'Phases used: ',outstring(1:lnblnk(outstring))
+         print*,'----------------------------------------------------------'
+      endif
+ 105  format(' Abs origin: (',i12,',',i12,',',i12,')')
+ 106  format(' Rel origin: (',i12,',',i12,',',i12,')')
+ 107  format(' Periods   : (',i12,',',i12,',',i12,')') 
+
+c  Set default values 
+
+      ldata%layer_min =  0
+      ldata%layer_max =  lev
+      ldata%indep_field= 1
+      end 
+c=================================================================================
+
+
+
+
+c-------------------------------------------------------------------------------
+c    The goal of this function is to replace the call to return_gaussian_array in
+c    return_cell_props with an equivalent call that returns the Legendre
+c    blocks for a special spherically symmetric function defined below.
+c-------------------------------------------------------------------------------
+      recursive subroutine return_oct_sf_expansion(ldata,ii,lev,x,y,z,ndim,garray)
+      use pan_state
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+
+      integer ii,jj
+      integer lev,ndim
+      real*8 garray(0:ndim-1)
+      integer*8 x,y,z 
+      real*8 xorig,yorig,zorig
+      real*8 cell_data(0:8,0:7) 
+
+c  Debugging variables ....      
+
+      integer*8 xtemp,ytemp,ztemp
+      integer ndimension
+     
+      integer*8 pstore,xstore,ystore,zstore
+
+
+      integer i,j
+c-------------------------------------------------------------------------------
+      real*8 length,cube_centre(3),oct_cell_data(0:8,0:7)
+      integer*8 lev_range
+c-----   First some error checking ------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (2)'
+      lev_range = 2_dint**lev
+      if ((x.lt.0).or.(x.ge.lev_range)) then 
+      print*,'x,lev,lev_range',x,lev,lev_range
+      call flush(6)
+      stop 'x out of range!'
+      endif
+      if ((y.lt.0).or.(y.ge.lev_range)) then
+      print*,'y,lev,lev_range',y,lev,lev_range       
+          stop 'y out of range!'
+      endif
+      if ((z.lt.0).or.(z.ge.lev_range)) stop 'z out of range!' 
+c----------------------------------------------------------------------------
+c     Define cell centre and cell size and get Legendre block coefficients
+c     for an octree function expansion of a single layer of the octree
+c----------------------------------------------------------------------------
+
+
+      length = 1.0/dble(ldata%ix_per_store)*2.0d0**(1+lev-ii) 
+
+      xorig = dble(ldata%ix_abs_store)/2.0d0**(1+lev-ii) 
+      yorig = dble(ldata%iy_abs_store)/2.0d0**(1+lev-ii) 
+      zorig = dble(ldata%iz_abs_store)/2.0d0**(1+lev-ii) 
+
+      cube_centre(1) =  (dble(x)-xorig+0.5d0)*length
+      cube_centre(2) =  (dble(y)-yorig+0.5d0)*length
+      cube_centre(3) =  (dble(z)-zorig+0.5d0)*length
+
+
+c----------------------------------------------------------------------------
+       call octree_expansion(cube_centre,length,ndim,garray)
+c--------------------------------------------------------------------------------
+
+      return
+      end
+
+
+
+c-------------------------------------------------------------------------------
+c   Expand function of interest in octree basis functions. The
+c   result returned is the superposition of the octree functions
+c   at a single octree level, expressed as Legendre block
+c   functions
+c-------------------------------------------------------------------------------    
+      recursive subroutine octree_expansion(cube_centre,length,ndim,q)
+      implicit none
+      real*8 cube_centre(3),length,oct_cell_data(0:8,0:7)
+
+      real*8 local_centre(3), small_data(0:8,0:7),small_len
+      real*8 temp_data(0:8)
+
+      real*8 moment(0:7)
+      integer ndim
+
+
+      real*8 p(0:7),q(ndim)
+
+
+      integer ix,iy,iz,ind1,ind2
+      integer i,i1,i2,i3
+      integer isign
+
+      small_len = 0.5d0*length
+
+      do i1=0,1
+       do i2=0,1
+        do i3=0,1
+          ind2 = 4*i1 + 2*i2 + i3
+          local_centre(1) = cube_centre(1)+0.25d0*dble(2*i1-1)*length
+          local_centre(2) = cube_centre(2)+0.25d0*dble(2*i2-1)*length
+          local_centre(3) = cube_centre(3)+0.25d0*dble(2*i3-1)*length
+          call spherical_perturbation(local_centre,small_len,temp_data)
+           do i=0,8
+            small_data(i,ind2) = temp_data(i)
+          enddo
+        enddo
+       enddo
+      enddo
+
+
+      call expand_octree_coefficients(small_data,p,q)
+
+      return
+      end
+
+
+
+
+
+      recursive subroutine spherical_perturbation(cube_centre,length,cell_data)
+      implicit none
+      real*8 cube_centre(3),length,cell_data(0:8)
+      integer nfeature, nuse 
+
+      parameter (nfeature=5,nuse=1)
+      
+      real*8 centre(3), amplitude(nfeature), sigma(nfeature)
+
+      integer i,j
+      real*8 cell_data_temp(0:8)
+      real*8 pcentre(3),scaled_length
+      real*8 CellVolume
+      real*8 prefac0,prefac1,prefac2,prefac3
+
+c   Set the parameters of the perturbation.  The periodic volume is
+c   a cube of unit length, occupying the positive coordinate octant
+
+      centre(1) = 0.60226666666d0
+      centre(2) = 0.4025d0
+      centre(3) = 0.5393d0
+     
+      amplitude(1) = 1.0d0
+      sigma(1)     = 0.05d0
+
+      amplitude(2) = 1.5d0
+      sigma(2)     = 0.02d0
+
+      amplitude(3) = 0.2d0
+      sigma(3)     = 0.002d0
+
+      amplitude(4) = 0.25d0
+      sigma(4)     = 0.00024d0
+
+      amplitude(5) = 0.3d0
+      sigma(5)     = 0.00003d0
+
+      do i=0,8
+       cell_data(i) = 0.0d0
+      enddo
+      
+      do j=1,nuse
+       do i=1,3
+        pcentre(i) = (cube_centre(i)-centre(i))/sigma(j)
+       enddo
+        scaled_length = length/sigma(j)
+
+        CellVolume = length**3
+
+ 
+       call evaluate_3d_integrals(pcentre,scaled_length,cell_data_temp)
+
+
+c   Scaling factors for change of variables in 3-D integration from length to scaled_length
+
+       prefac0 = amplitude(j)/scaled_length**3
+       prefac1 = amplitude(j)/scaled_length**4
+       prefac2 = amplitude(j)/scaled_length**5
+       prefac3 = amplitude(j)/scaled_length**6
+
+       cell_data(0) = cell_data(0)+prefac0*cell_data_temp(0)*sqrt(CellVolume)  !p000
+       cell_data(1) = cell_data(1)+prefac1*cell_data_temp(1)*sqrt(CellVolume)  !p001
+       cell_data(2) = cell_data(2)+prefac1*cell_data_temp(2)*sqrt(CellVolume)  !p010
+       cell_data(3) = cell_data(3)+prefac2*cell_data_temp(3)*sqrt(CellVolume)  !p011
+       cell_data(4) = cell_data(4)+prefac1*cell_data_temp(4)*sqrt(CellVolume)  !p100
+       cell_data(5) = cell_data(5)+prefac2*cell_data_temp(5)*sqrt(CellVolume)  !p101
+       cell_data(6) = cell_data(6)+prefac2*cell_data_temp(6)*sqrt(CellVolume)  !p110
+       cell_data(7) = cell_data(7)+prefac3*cell_data_temp(7)*sqrt(CellVolume)  !p111
+ 
+       cell_data(8) = 0.0d0
+
+      enddo
+
+
+      return
+      end
+
+
+
+
+
+
+c---------------------------------------------------------------------------
+c  CODE WRITTEN BY ADRIAN JENKINS -   AUGUST 2015
+c  ORCiD:    http://orcid.org/0000-0003-4389-2232
+c---------------------------------------------------------------------------
+      recursive subroutine evaluate_3d_integrals(pos_cen,len,cell_data)
+c---------------------------------------------------------------------------
+c  GOAL
+c---------------------------------------------------------------------------
+c
+c
+c  To expand the overdensity: 
+c      
+c            rho = (3-r^2)exp(-r^2/2) 
+c
+c   In terms of Legendre basis functions (Jenkins 2013)
+c
+c
+c
+c  This overdensity function is taken from Jenkins 2010.
+c
+c  It is easy to compute the Zeldovich and 2lpt displacements generated
+c  by this function.  It can be used to test initial condition 
+c  generator codes.
+c
+c  The coefficients are computed by a 3-d integral over a cell volume. 
+c  However the integral can be written as a sum of products of 1-d integrals over x,y or z
+c  coordinates, and the 1-d integrals can all be expressed in terms
+c  of incomplete Gamma functions
+c
+c---------------------------------------------------------------------------
+      implicit none
+      real*8 pos_cen(3),len,cell_data(0:8)
+
+      real*8 pos_min(3),pos_max(3)
+      real*8 abs_u_min(3),abs_u_max(3)
+      real*8 a(0:3),c(0:3),s(0:3),p_min,p_max
+      real*8 stretch
+      real*8 gammp
+
+      real*8 si(0:3,3),ti(0:3,3)
+
+      real*8 Coeff000,Coeff001,Coeff010,Coeff011
+      real*8 Coeff100,Coeff101,Coeff110,Coeff111
+
+      integer i,j,n
+
+c---------------------------------------------------------------------------
+c  SET UP ALL COEFFICIENTS
+c---------------------------------------------------------------------------
+c   Normalising coefficients for each integral
+c
+c   Each coefficient is the product of two numbers
+c   (i) a coefficient from a Legendre block function (Jenkins 2013)
+c       for c(0) and c(2) this is unity, for c(1) and c(3) this
+c       is the sqrt(3)
+c       
+c   (ii) A coefficient of 2**( (n-1)/2)) \Gamma[ (n+1)/2]
+c        for the integral  \int x^n exp(-x^2/2) dx  - 
+c        which comes from the substitution u = x^2/2
+c        and the definition of a incomplete Gamma function
+c
+c      P(a,x) = 1/\Gamma(a) * \int_0^t exp(-t) t^{a-1} dt
+c
+c
+c---------------------------------------------------------------------------
+c
+ 
+
+      c(0) = sqrt(3.1415926535897932d0/2.0d0)
+      c(1) = 1.0d0
+      c(2) = c(0)
+      c(3) = 2.0d0
+
+c   Define a(n) = (n+1)/2
+
+      a(0) = 0.5d0
+      a(1) = 1.0d0
+      a(2) = 1.5d0
+      a(3) = 2.0d0
+
+c   The substitution used to convert the desired integral into the incomplete
+c   Gamma function (shown below) 'looses' the signs of the limits.  The n=0 and 2 the
+c   desired integrand is symmetric about the origin, while for n=1 and 3,
+c   it is antisymmetric.  The array 's' below encodes this information so
+c   that the definite integral is evaluated correctly for positive or
+c   negative values in pos_min and pos_max.  The fortran sign function
+c   is used to carry the sign of the pos_min and pos_max arguments.
+
+      s(0) = -1.0d0
+      s(1) = 1.0d0
+      s(2) = -1.0d0
+      s(3) = 1.0d0
+
+c----------------------------------------------------------------------------
+c    Change of variable - from substitution in the integral above  (u=x^2/2)
+
+      stretch = pos_cen(1)**2 + pos_cen(2)**2 + pos_cen(3)**2
+
+
+      do i=1,3
+        pos_min(i) = pos_cen(i) - 0.5d0*len
+        pos_max(i) = pos_cen(i) + 0.5d0*len
+
+        abs_u_min(i) = 0.5d0 * pos_min(i)**2
+        abs_u_max(i) = 0.5d0 * pos_max(i)**2
+      enddo
+c----------------------------------------------------------------------------
+
+
+
+c----------------------------------------------------------------------------
+c    Create a 4x3 matrix of integrals  
+c    First index n, second index coordinate (1=x,2=y,3=z)
+c
+c    si(n,j) = \int p^n exp(-p*p/2) dp     (for j=1,2,3, p = x,y,z)
+c                                               n=0,1,2,3
+c----------------------------------------------------------------------------
+
+      do n=0,3  
+       do j=1,3
+
+        if (pos_min(j).lt.0.0) then
+           p_min = s(n)
+        else 
+           p_min = 1.0d0
+        endif
+
+        if (pos_max(j).lt.0.0) then
+           p_max = s(n)
+        else 
+           p_max = 1.0d0
+        endif
+
+
+       if (stretch.lt.200.0d0) then
+     
+       si(n,j) = c(n)*
+     &  ( p_max*gammp(a(n),abs_u_max(j)) 
+     &   -p_min*gammp(a(n),abs_u_min(j))) 
+   
+       else
+         si(n,j) = 0.0d0
+       endif
+
+
+
+       ti(n,j) = si(n,j)
+       enddo
+      enddo
+c----------------------------------------------------------------------------
+
+
+
+
+c----------------------------------------------------------------------------
+c     Compute integrals with respect to the Legendre block. Each block
+c     factorises into x,y and z directions
+c     p_j1j2j3    -  where j1,j2,j3 are either zero or 1
+c
+c     p_0(x)  = 1                   Zeroth moment
+c     p_1(x)  = sqrt(12)(x-xcen)    First moment
+c
+c----------------------------------------------------------------------------c
+
+      do n=1,3,2  ! Shift origins to cell centre for first moments
+       do j=1,3
+          ti(n,j) = sqrt(12.0d0) * (ti(n,j) - pos_cen(j)*ti(n-1,j))
+       enddo
+      enddo
+
+
+
+c----------------------------------------------------------------------------
+c   Combine the computed integrals to give the 8 Legendre block
+c   expansion coefficients. 
+c----------------------------------------------------------------------------
+
+
+      Coeff000 = 3.0 * ti(0,1) * ti(0,2) * ti(0,3) 
+     &  - ti(2,1) * ti(0,2) * ti(0,3)
+     &  - ti(0,1) * ti(2,2) * ti(0,3)
+     &  - ti(0,1) * ti(0,2) * ti(2,3)
+
+ 
+      Coeff100 = 3.0 * ti(1,1) * ti(0,2) * ti(0,3) 
+     &  - ti(3,1) * ti(0,2) * ti(0,3)
+     &  - ti(1,1) * ti(2,2) * ti(0,3)
+     &  - ti(1,1) * ti(0,2) * ti(2,3)
+
+      Coeff010 = 3.0 * ti(0,1) * ti(1,2) * ti(0,3) 
+     &  - ti(2,1) * ti(1,2) * ti(0,3)
+     &  - ti(0,1) * ti(3,2) * ti(0,3)
+     &  - ti(0,1) * ti(1,2) * ti(2,3)
+
+
+      Coeff001 = 3.0 * ti(0,1) * ti(0,2) * ti(1,3) 
+     &  - ti(2,1) * ti(0,2) * ti(1,3)
+     &  - ti(0,1) * ti(2,2) * ti(1,3)
+     &  - ti(0,1) * ti(0,2) * ti(3,3)
+
+
+      Coeff110 = 3.0 * ti(1,1) * ti(1,2) * ti(0,3) 
+     &  - ti(3,1) * ti(1,2) * ti(0,3)
+     &  - ti(1,1) * ti(3,2) * ti(0,3)
+     &  - ti(1,1) * ti(1,2) * ti(2,3)
+
+      Coeff101 = 3.0 * ti(1,1) * ti(0,2) * ti(1,3) 
+     &  - ti(3,1) * ti(0,2) * ti(1,3)
+     &  - ti(1,1) * ti(2,2) * ti(1,3)
+     &  - ti(1,1) * ti(0,2) * ti(3,3)
+
+
+      Coeff011 = 3.0 * ti(0,1) * ti(1,2) * ti(1,3) 
+     &  - ti(2,1) * ti(1,2) * ti(1,3)
+     &  - ti(0,1) * ti(3,2) * ti(1,3)
+     &  - ti(0,1) * ti(1,2) * ti(3,3)
+
+
+
+      Coeff111 = 3.0 * ti(1,1) * ti(1,2) * ti(1,3) 
+     &  - ti(3,1) * ti(1,2) * ti(1,3)
+     &  - ti(1,1) * ti(3,2) * ti(1,3)
+     &  - ti(1,1) * ti(1,2) * ti(3,3)     
+
+c--------------------------------------------------------------------------
+c     Copy into output structure - ordering matches the Panphasia code
+c     (see Jenkins & Booth 2013)
+c--------------------------------------------------------------------------
+
+
+      cell_data(0) = Coeff000     ! Scales as len**1.5
+      cell_data(1) = Coeff001     ! Scales as len**3.5
+      cell_data(2) = Coeff010
+      cell_data(3) = Coeff011     ! Scales as len**5.5
+      cell_data(4) = Coeff100
+      cell_data(5) = Coeff101
+      cell_data(6) = Coeff110
+      cell_data(7) = Coeff111     ! Scales as len**7.5
+
+      cell_data(8) = 0.0d0      ! Set the 'independent' field to zero (J&B13)
+
+
+
+      return
+      end
+
+
+c=========================================================================
+c   NUMERICAL RECIPES ROUTINES BELOW - modified to make the
+c   output double precision.  Routines taken from the Blue f77 Book
+c   Value of EPS changed from 3e-7 to 3e-15, ITMAX increased from 100 to 200
+c=========================================================================
+
+      REAL*8 recursive FUNCTION gammp(a,x)
+      REAL*8 a,x
+CU    USES gcf,gser
+      REAL*8 gammcf,gamser,gln
+      if(x.lt.0..or.a.le.0.)stop 'bad arguments in gammp'
+      if(x.lt.a+1.)then
+        call gser(gamser,a,x,gln)
+        gammp=gamser
+      else
+        call gcf(gammcf,a,x,gln)
+        gammp=1.-gammcf
+      endif
+      return
+      END
+
+      recursive SUBROUTINE gcf(gammcf,a,x,gln)
+      INTEGER ITMAX
+      REAL*8 a,gammcf,gln,x,EPS,FPMIN
+      PARAMETER (ITMAX=200,EPS=3.d-15,FPMIN=1.d-290)
+CU    USES gammln
+      INTEGER i
+      REAL*8 an,b,c,d,del,h,gammln
+      gln=gammln(a)
+      b=x+1.-a
+      c=1./FPMIN
+      d=1./b
+      h=d
+      do 11 i=1,ITMAX
+        an=-i*(i-a)
+        b=b+2.
+        d=an*d+b
+        if(abs(d).lt.FPMIN)d=FPMIN
+        c=b+an/c
+        if(abs(c).lt.FPMIN)c=FPMIN
+        d=1./d
+        del=d*c
+        h=h*del
+        if(abs(del-1.).lt.EPS)goto 1
+11    continue
+      stop 'a too large, ITMAX too small in gcf'
+1     gammcf=exp(-x+a*log(x)-gln)*h
+      return
+      END
+
+      SUBROUTINE gser(gamser,a,x,gln)
+      INTEGER ITMAX
+      REAL*8 a,gamser,gln,x,EPS
+      PARAMETER (ITMAX=200,EPS=3.d-15)
+CU    USES gammln
+      INTEGER n
+      REAL*8 ap,del,sum,gammln
+      gln=gammln(a)
+      if(x.le.0.)then
+        if(x.lt.0.)stop 'x < 0 in gser'
+        gamser=0.
+        return
+      endif
+      ap=a
+      sum=1./a
+      del=sum
+      do 11 n=1,ITMAX
+        ap=ap+1.
+        del=del*x/ap
+        sum=sum+del
+        if(abs(del).lt.abs(sum)*EPS)goto 1
+11    continue
+      stop 'a too large, ITMAX too small in gser'
+1     gamser=sum*exp(-x+a*log(x)-gln)
+      return
+      END
+
+
+      REAL*8 recursive FUNCTION gammln(xx)
+      REAL*8 xx
+      INTEGER j
+      REAL*8 ser,stp,tmp,x,y,cof(6)
+      SAVE cof,stp
+      DATA cof,stp/76.18009172947146d0,-86.50532032941677d0,
+     *24.01409824083091d0,-1.231739572450155d0,.1208650973866179d-2,
+     *-.5395239384953d-5,2.5066282746310005d0/
+      x=xx
+      y=x
+      tmp=x+5.5d0
+      tmp=(x+0.5d0)*log(tmp)-tmp
+      ser=1.000000000190015d0
+      do 11 j=1,6
+        y=y+1.d0
+        ser=ser+cof(j)/y
+11    continue
+      gammln=tmp+log(stp*ser/x)
+      return
+      END
+c======================  END NR ================================================
+c===============================================================================
+      recursive subroutine expand_octree_coefficients(cell_data, p,q)
+      implicit none
+c----------------- Define subroutine arguments -----------------------------------
+      real*8 cell_data(0:8,0:7)
+      real*8 p(0:7),q(56)
+
+c----------------- Define constants using notation from appendix A of Jenkins 2013
+ 
+      real*8 a1,a2,b1,b2,b3,c1,c2,c3,c4,rooteighth_factor
+
+      parameter(a1 = 0.5d0*sqrt(3.0d0),      a2 = 0.5d0)
+
+      parameter(b1 = 0.75d0,                 b2 = 0.25d0*sqrt(3.0d0))
+      parameter(b3 = 0.25d0)
+
+      parameter(c1 = sqrt(27.0d0/64.0d0),    c2 = 0.375d0)
+      parameter(c3 = sqrt(3.0d0/64.0d0),     c4 = 0.125d0)
+
+      parameter(rooteighth_factor = sqrt(0.125d0))
+
+c----------------- Define octree variables --------------------------------
+
+      real*8 po(0:7,0:7),tsum(0:7,0:7)
+      integer iparity
+      integer i,j,ix,iy,iz
+      integer icx,icy,icz
+      integer iox,ioy,ioz
+      real*8 parity,isig
+c-----------------------------------------------------------------------------
+
+c 
+c
+c   We now calculate the Legendre basis coefficients for all eight child cells
+c   by applying the appropriate reflectional parities to the coefficients 
+c   calculated above for the positive octant child cell.
+c
+c   See equations A2 and A3 in appendix A of Jenkins 2013.
+c
+c   The reflectional parity is given by (ix,iy,iz) loops below.
+c
+c   The (icx,icy,icz) loops below, loop over the eight child cells.
+c
+c   The positive octant child cell is given below by  (icx=icy=icz=0) or i=7.
+c
+c   The combination ix*icx +iy*icy +iz*icz is either even or odd, depending
+c   on whether the parity change is even or odd.
+c
+c   The variables iox,ioy,ioz are used to loop over the different
+c   types of Legendre basis function.
+c
+c   The combination iox*icx + ioy*icy + ioz*icz is either even and odd
+c   and identifies which coefficients keep or change sign respectively
+c   due to a pure reflection about the principal planes.
+c--------------------------------------------------------------------------------------------
+      do i=0,7
+       p(i) = -9999.0d0
+      enddo
+
+      do i=1,56
+       q(i) = -999.0d0
+      enddo
+
+
+      do iz=0,7
+       do iy=0,7
+        po(iy,iz) = 0.0d0      ! Set positive octant coefficients to zero
+       enddo
+      enddo
+c--------------------------------------------------------------------------------------------
+      do iz=0,1              ! Loop over z parity (0=keep sign, 1=change sign)
+       do iy=0,1             ! Loop over y parity (0=keep sign, 1=change sign)
+        do ix=0,1            ! Loop over x parity (0=keep sign, 1=change sign)
+        iparity = 4*ix + 2*iy + iz
+
+         do icx=0,1                      ! Loop over x-child cells
+          do icy=0,1                     ! Loop over y-child cells
+           do icz=0,1                    ! Loop over z-child cells
+
+             if (mod(ix*icx+iy*icy+iz*icz,2).eq.0) then
+                  parity = 1.0d0
+             else
+                  parity =-1.0d0
+             endif
+
+             i = 7 - 4*icx -2*icy - icz               ! Calculate which child cell this is.
+
+
+             do iox=0,1                               ! Loop over Legendre basis function type                     
+              do ioy=0,1                              ! Loop over Legendre basis function type
+               do ioz=0,1                             ! Loop over Legendre basis function type
+
+                  j = 4*iox + 2*ioy + ioz
+
+                  if (mod(iox*icx + ioy*icy + ioz*icz,2).eq.0) then
+                       isig =  parity
+                  else
+                       isig = -parity
+                  endif
+
+                  po(j,iparity) = po(j,iparity) + isig*cell_data(j,i)*rooteighth_factor
+
+               enddo
+              enddo
+             enddo
+
+           enddo   
+          enddo
+         enddo
+
+        enddo
+       enddo
+      enddo
+
+
+c
+c    The calculations immediately below evalute the eight Legendre block coefficients for the
+c    child cell that is furthest from the absolute coordiate origin of the octree - we call
+c    this the positive octant cell.
+c
+c    The coefficients are given by a set of matrix equations which combine the
+c    coefficients of the Legendre basis functions of the parent cell itself, with
+c    the coefficients from the octree basis functions that occupy the
+c    parent cell.   
+c
+c    The Legendre basis function coefficients of the parent cell are stored in
+c    the variables, p(0) - p(7) and are initialise above.
+c
+c    The coefficients of the octree basis functions are determined by the
+c    first 56 entries of the array g, which is passed down into this
+c    subroutine.
+c
+c    These two sources of information are combined using a set of linear equations.
+c    The coefficients of these linear equations are taken from the inverses or
+c    equivalently transposes of the matrices given in appendix A of Jenkins 2013.
+c    The matrices in appendix A define the PANPHASIA octree basis functions
+c    in terms of Legendre blocks.
+c
+c    All of the Legendre block functions of the parent cell, and the octree basis
+c    functions of the parent cell share one of eight distinct symmetries with respect to
+c    reflection about the x1=0,x2=0,x3=0 planes (where the origin is taken as the parent 
+c    cell centre and x1,x2,x3 are parallel to the cell edges).
+c
+c    Each function has either purely reflectional symmetry (even parity) or
+c    reflectional symmetry with a sign change (odd parity) about each of the three principal
+c    planes through the cell centre. There are therefore 8 parity types. We can label each 
+c    parity type with a binary triplet. So 000 is pure reflectional symmetry about 
+c    all of the principal planes.
+c  
+c    In the code below the parent cell Legendre block functions, and octree functions are 
+c    organised into eight groups each with eight members. Each group has a common
+c    parity type.
+c
+c    We keep the contributions of each parity type to each of the eight Legendre basis
+c    functions occupying the positive octant cell separate. Once they have all been
+c    computed, we can apply the different symmetry operations and determine the
+c    Legendre block basis functions for all eight child cells at the same time.
+c---------------------------------------------------------------------------------------
+c    000/ 0-parity
+
+      p(0) =  1.0d0*po(0,0)
+      q(1) = -1.0d0*po(1,0)
+      q(2) = -1.0d0*po(2,0)
+      q(3) =  1.0d0*po(3,0)
+      q(4) = -1.0d0*po(4,0)
+      q(5) =  1.0d0*po(5,0)
+      q(6) =  1.0d0*po(6,0)
+      q(7) = -1.0d0*po(7,0)
+
+c    100/ 4-parity
+
+      p(4)  =  a1*po(0,4) +  a2*po(4,4)
+      q(8)  = -a2*po(0,4)  + a1*po(4,4)
+      q(9)  =  po(1,4)
+      q(11) = -po(3,4)
+      q(10) =  po(2,4)
+      q(12) = -po(5,4)
+      q(13) = -po(6,4)
+      q(14) =  po(7,4)
+
+c     010/ 2-parity
+
+      p(2)  =  a1*po(0,2) + a2*po(2,2)
+      q(15) = -a2*po(0,2) + a1*po(2,2) 
+      q(16) =  po(1,2) 
+      q(17) = -po(3,2)
+      q(18) =  po(4,2)
+      q(19) = -po(5,2)
+      q(20) = -po(6,2)
+      q(21) =  po(7,2)
+
+
+c     001/ 1-parity
+
+      p(1)  =  a1*po(0,1) + a2*po(1,1)
+      q(22) = -a2*po(0,1) + a1*po(1,1)
+      q(23) =  po(2,1)
+      q(24) = -po(3,1)
+      q(25) =  po(4,1)
+      q(26) = -po(5,1)
+      q(27) = -po(6,1)
+      q(28) =  po(7,1)
+
+c    110/ 6-parity
+
+      p(6)  =  b1*po(0,6) + b2*po(2,6) + b2*po(4,6) + b3*po(6,6)
+      q(29) = -b2*po(0,6) - b3*po(2,6) + b1*po(4,6) + b2*po(6,6)
+      q(30) =  b3*po(0,6) - b2*po(2,6) + b2*po(4,6) - b1*po(6,6)
+      q(31) = -b2*po(0,6) + b1*po(2,6) + b3*po(4,6) - b2*po(6,6)
+      q(32) = -po(1,6)
+      q(33) =  po(3,6)
+      q(34) =  po(5,6)
+      q(35) = -po(7,6)
+
+
+c     011/ 3-parity
+
+      p(3)  =  b1*po(0,3) + b2*po(1,3) + b2*po(2,3) + b3*po(3,3)
+      q(36) = -b2*po(0,3) - b3*po(1,3) + b1*po(2,3) + b2*po(3,3)
+      q(37) =  b3*po(0,3) - b2*po(1,3) + b2*po(2,3) - b1*po(3,3)
+      q(38) = -b2*po(0,3) + b1*po(1,3) + b3*po(2,3) - b2*po(3,3)
+      q(39) = -po(4,3)
+      q(40) =  po(5,3)
+      q(41) =  po(6,3)
+      q(42) = -po(7,3)
+
+c     101/ 5-parity
+
+
+      p(5)  =  b1*po(0,5) + b2*po(1,5) + b2*po(4,5) + b3*po(5,5)
+      q(43) = -b2*po(0,5) - b3*po(1,5) + b1*po(4,5) + b2*po(5,5)
+      q(44) =  b3*po(0,5) - b2*po(1,5) + b2*po(4,5) - b1*po(5,5)
+      q(45) = -b2*po(0,5) + b1*po(1,5) + b3*po(4,5) - b2*po(5,5)
+      q(46) = -po(2,5)
+      q(47) =  po(3,5)
+      q(48) =  po(6,5)
+      q(49) = -po(7,5)
+
+c     111/ 7-parity
+
+      p(7) = c1*po(0,7)+c2*po(1,7)+c2*po(2,7)+c3*po(3,7)+c2*po(4,7)+c3*po(5,7)+c3*po(6,7)+c4*po(7,7)
+      q(50)=-c2*po(0,7)+c1*po(1,7)+c2*po(2,7)-c3*po(3,7)-c2*po(4,7)+c3*po(5,7)+c4*po(6,7)-c3*po(7,7)
+      q(51)=-c2*po(0,7)-c2*po(1,7)+c1*po(2,7)-c3*po(3,7)+c2*po(4,7)-c4*po(5,7)+c3*po(6,7)+c3*po(7,7)
+      q(52)=-c2*po(0,7)+c2*po(1,7)-c2*po(2,7)+c4*po(3,7)+c1*po(4,7)-c3*po(5,7)+c3*po(6,7)-c3*po(7,7)
+      q(53)= c3*po(0,7)-c3*po(1,7)-c3*po(2,7)-c1*po(3,7)+c4*po(4,7)+c2*po(5,7)+c2*po(6,7)-c2*po(7,7)
+      q(54)= c3*po(0,7)+c3*po(1,7)-c4*po(2,7)-c2*po(3,7)-c3*po(4,7)-c1*po(5,7)+c2*po(6,7)+c2*po(7,7)
+      q(55)= c3*po(0,7)+c4*po(1,7)+c3*po(2,7)-c2*po(3,7)+c3*po(4,7)-c2*po(5,7)-c1*po(6,7)-c2*po(7,7)
+      q(56)=-c4*po(0,7)+c3*po(1,7)-c3*po(2,7)-c2*po(3,7)+c3*po(4,7)+c2*po(5,7)-c2*po(6,7)+c1*po(7,7)
+
+
+      return
+      end
+c===============================================================================
+
+
+      recursive subroutine compound_octree_coefficients(p,q,cell_data)
+      implicit none
+c----------------- Define subroutine arguments -----------------------------------
+      real*8 cell_data(0:8,0:7)
+      real*8 p(0:7),q(56)
+
+c----------------- Define constants using notation from appendix A of Jenkins 2013
+ 
+      real*8 a1,a2,b1,b2,b3,c1,c2,c3,c4,rooteighth_factor
+
+      parameter(a1 = 0.5d0*sqrt(3.0d0),      a2 = 0.5d0)
+
+      parameter(b1 = 0.75d0,                 b2 = 0.25d0*sqrt(3.0d0))
+      parameter(b3 = 0.25d0)
+
+      parameter(c1 = sqrt(27.0d0/64.0d0),    c2 = 0.375d0)
+      parameter(c3 = sqrt(3.0d0/64.0d0),     c4 = 0.125d0)
+
+      parameter(rooteighth_factor = sqrt(0.125d0))
+
+c----------------- Define octree variables --------------------------------
+
+      real*8 po(0:7,0:7),tsum(0:7,0:7)
+      integer iparity
+      integer i,j,ix,iy,iz
+      integer icx,icy,icz
+      integer iox,ioy,ioz
+      real*8 parity,isig
+c-----------------------------------------------------------------------------
+c
+c
+c    The calculations immediately below evalute the eight Legendre block coefficients for the
+c    child cell that is furthest from the absolute coordiate origin of the octree - we call
+c    this the positive octant cell.
+c
+c    The coefficients are given by a set of matrix equations which combine the
+c    coefficients of the Legendre basis functions of the parent cell itself, with
+c    the coefficients from the octree basis functions that occupy the
+c    parent cell.   
+c
+c    The Legendre basis function coefficients of the parent cell are stored in
+c    the variables, p(0) - p(7) and are initialise above.
+c
+c    The coefficients of the octree basis functions are determined by the
+c    first 56 entries of the array g, which is passed down into this
+c    subroutine.
+c
+c    These two sources of information are combined using a set of linear equations.
+c    The coefficients of these linear equations are taken from the inverses or
+c    equivalently transposes of the matrices given in appendix A of Jenkins 2013.
+c    The matrices in appendix A define the PANPHASIA octree basis functions
+c    in terms of Legendre blocks.
+c
+c    All of the Legendre block functions of the parent cell, and the octree basis
+c    functions of the parent cell share one of eight distinct symmetries with respect to
+c    reflection about the x1=0,x2=0,x3=0 planes (where the origin is taken as the parent 
+c    cell centre and x1,x2,x3 are parallel to the cell edges).
+c
+c    Each function has either purely reflectional symmetry (even parity) or
+c    reflectional symmetry with a sign change (odd parity) about each of the three principal
+c    planes through the cell centre. There are therefore 8 parity types. We can label each 
+c    parity type with a binary triplet. So 000 is pure reflectional symmetry about 
+c    all of the principal planes.
+c  
+c    In the code below the parent cell Legendre block functions, and octree functions are 
+c    organised into eight groups each with eight members. Each group has a common
+c    parity type.
+c
+c    We keep the contributions of each parity type to each of the eight Legendre basis
+c    functions occupying the positive octant cell separate. Once they have all been
+c    computed, we can apply the different symmetry operations and determine the
+c    Legendre block basis functions for all eight child cells at the same time.
+c---------------------------------------------------------------------------------------
+c    000/ 0-parity
+
+      po(0,0) =  1.0d0*p(0)
+      po(1,0) = -1.0d0*q(1)
+      po(2,0) = -1.0d0*q(2)
+      po(3,0) =  1.0d0*q(3)
+      po(4,0) = -1.0d0*q(4)
+      po(5,0) =  1.0d0*q(5)
+      po(6,0) =  1.0d0*q(6)
+      po(7,0) = -1.0d0*q(7)
+
+c    100/ 4-parity
+
+      po(0,4) =  a1*p(4)  - a2*q(8)
+      po(1,4) =  q(9)
+      po(2,4) =  q(10)
+      po(3,4) = -q(11)
+      po(4,4) =  a2*p(4)  + a1*q(8)
+      po(5,4) = -q(12) 
+      po(6,4) = -q(13)
+      po(7,4) =  q(14)
+
+c     010/ 2-parity
+
+      po(0,2) =  a1*p(2) - a2*q(15)
+      po(1,2) =  q(16) 
+      po(2,2) =  a2*p(2) + a1*q(15) 
+      po(3,2) = -q(17)
+      po(4,2) =  q(18)
+      po(5,2) = -q(19)
+      po(6,2) = -q(20)
+      po(7,2) =  q(21)
+
+
+c     001/ 1-parity
+
+      po(0,1) =  a1*p(1) - a2*q(22)
+      po(1,1) =  a2*p(1) + a1*q(22)
+      po(2,1) =  q(23)
+      po(3,1) = -q(24)
+      po(4,1) =  q(25)
+      po(5,1) = -q(26)
+      po(6,1) = -q(27)
+      po(7,1) =  q(28)
+
+c    110/ 6-parity
+
+      po(0,6) = b1*p(6) - b2*q(29) + b3*q(30) - b2*q(31)
+      po(1,6) = -q(32)
+      po(2,6) = b2*p(6) - b3*q(29) - b2*q(30) + b1*q(31)
+      po(3,6) =  q(33)
+      po(4,6) = b2*p(6) + b1*q(29) + b2*q(30) + b3*q(31)
+      po(5,6) =  q(34)
+      po(6,6) = b3*p(6) + b2*q(29) - b1*q(30) - b2*q(31)
+      po(7,6) = -q(35) 
+
+
+c     011/ 3-parity
+
+      po(0,3) = b1*p(3) - b2*q(36) + b3*q(37) - b2*q(38)
+      po(1,3) = b2*p(3) - b3*q(36) - b2*q(37) + b1*q(38)
+      po(2,3) = b2*p(3) + b1*q(36) + b2*q(37) + b3*q(38)
+      po(3,3) = b3*p(3) + b2*q(36) - b1*q(37) - b2*q(38)
+      po(4,3) = -q(39) 
+      po(5,3) =  q(40)
+      po(6,3) =  q(41)
+      po(7,3) = -q(42)
+
+c     101/ 5-parity
+
+      po(0,5) = b1*p(5) - b2*q(43) + b3*q(44) - b2*q(45)
+      po(1,5) = b2*p(5) - b3*q(43) - b2*q(44) + b1*q(45) 
+      po(2,5) = -q(46) 
+      po(3,5) =  q(47)
+      po(4,5) = b2*p(5) + b1*q(43) + b2*q(44) + b3*q(45)
+      po(5,5) = b3*p(5) + b2*q(43) - b1*q(44) - b2*q(45)
+      po(6,5) =  q(48)
+      po(7,5) = -q(49)
+
+c     111/ 7-parity
+
+      po(0,7) = c1*p(7) - c2*q(50) - c2*q(51) - c2*q(52) + c3*q(53) + c3*q(54) + c3*q(55) - c4*q(56)
+      po(1,7) = c2*p(7) + c1*q(50) - c2*q(51) + c2*q(52) - c3*q(53) + c3*q(54) + c4*q(55) + c3*q(56) 
+      po(2,7) = c2*p(7) + c2*q(50) + c1*q(51) - c2*q(52) - c3*q(53) - c4*q(54) + c3*q(55) - c3*q(56)
+      po(3,7) = c3*p(7) - c3*q(50) - c3*q(51) + c4*q(52) - c1*q(53) - c2*q(54) - c2*q(55) - c2*q(56) 
+      po(4,7) = c2*p(7) - c2*q(50) + c2*q(51) + c1*q(52) + c4*q(53) - c3*q(54) + c3*q(55) + c3*q(56)
+      po(5,7) = c3*p(7) + c3*q(50) - c4*q(51) - c3*q(52) + c2*q(53) - c1*q(54) - c2*q(55) + c2*q(56)
+      po(6,7) = c3*p(7) + c4*q(50) + c3*q(51) + c3*q(52) + c2*q(53) + c2*q(54) - c1*q(55) - c2*q(56)
+      po(7,7) = c4*p(7) - c3*q(50) + c3*q(51) - c3*q(52) - c2*q(53) + c2*q(54) - c2*q(55) + c1*q(56)
+c--------------------------------------------------------------------------------------------
+c 
+c
+c   We now calculate the Legendre basis coefficients for all eight child cells
+c   by applying the appropriate reflectional parities to the coefficients 
+c   calculated above for the positive octant child cell.
+c
+c   See equations A2 and A3 in appendix A of Jenkins 2013.
+c
+c   The reflectional parity is given by (ix,iy,iz) loops below.
+c
+c   The (icx,icy,icz) loops below, loop over the eight child cells.
+c
+c   The positive octant child cell is given below by  (icx=icy=icz=0) or i=7.
+c
+c   The combination ix*icx +iy*icy +iz*icz is either even or odd, depending
+c   on whether the parity change is even or odd.
+c
+c   The variables iox,ioy,ioz are used to loop over the different
+c   types of Legendre basis function.
+c
+c   The combination iox*icx + ioy*icy + ioz*icz is either even and odd
+c   and identifies which coefficients keep or change sign respectively
+c   due to a pure reflection about the principal planes.
+c--------------------------------------------------------------------------------------------
+
+      do iz=0,7
+       do iy=0,7
+        tsum(iy,iz) = 0.0d0      ! Zero temporary sums
+       enddo
+      enddo
+c--------------------------------------------------------------------------------------------
+      do iz=0,1              ! Loop over z parity (0=keep sign, 1=change sign)
+       do iy=0,1             ! Loop over y parity (0=keep sign, 1=change sign)
+        do ix=0,1            ! Loop over x parity (0=keep sign, 1=change sign)
+        iparity = 4*ix + 2*iy + iz
+
+         do icx=0,1                      ! Loop over x-child cells
+          do icy=0,1                     ! Loop over y-child cells
+           do icz=0,1                    ! Loop over z-child cells
+
+             if (mod(ix*icx+iy*icy+iz*icz,2).eq.0) then
+                  parity = 1.0d0
+             else
+                  parity =-1.0d0
+             endif
+
+             i = 7 - 4*icx -2*icy - icz               ! Calculate which child cell this is.
+
+
+             do iox=0,1                               ! Loop over Legendre basis function type                     
+              do ioy=0,1                              ! Loop over Legendre basis function type
+               do ioz=0,1                             ! Loop over Legendre basis function type
+
+                  j = 4*iox + 2*ioy + ioz
+
+                  if (mod(iox*icx + ioy*icy + ioz*icz,2).eq.0) then
+                       isig =  parity
+                  else
+                       isig = -parity
+                  endif
+
+                  tsum(j,i) = tsum(j,i) + isig*po(j,iparity)
+
+               enddo
+              enddo
+             enddo
+
+           enddo   
+          enddo
+         enddo
+
+        enddo
+       enddo
+      enddo
+
+
+c   Assign values of the output variables an set independent field to zero
+
+      do i=0,7
+       do j=0,7
+         cell_data(j,i) = tsum(j,i)*rooteighth_factor
+        enddo
+         cell_data(8,i) = 0.0d0
+      enddo
+
+
+      return
+      end
+
+
+
+
diff --git a/ics.conf b/ics.conf
new file mode 100644
index 0000000..d867cb9
--- /dev/null
+++ b/ics.conf
@@ -0,0 +1,62 @@
+[setup]
+# number of grid cells per linear dimension for calculations = particles for sc initial load
+GridRes      = 128 
+# length of the box in Mpc/h
+BoxLength    = 200
+# starting redshift
+zstart       = 24.0 
+# order of the LPT to be used (1,2 or 3)
+LPTorder     = 1
+# also do baryon ICs?
+DoBaryons    = no
+# do mode fixing à la Angulo&Pontzen
+DoFixing     = yes
+# particle load, can be 'sc' (1x), 'bcc' (2x), 'fcc' (4x), or 'rsc' (8x)
+ParticleLoad = sc
+
+[testing]
+# enables diagnostic output
+# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
+#test = potentials_and_densities
+#test = convergence
+test = none
+
+[execution]
+NumThreads   = 1
+
+[output]
+fname_hdf5   = output.hdf5
+fbase_analysis = output
+
+#format       = gadget2
+#filename     = ics_gadget.dat
+
+format       = generic
+filename     = debug.hdf5
+#generic_out_eulerian = yes
+
+#format	       = grafic2
+#filename       = ics_ramses
+#grafic_use_SPT = yes
+
+[random]
+generator    = NGENIC
+seed         = 9001
+
+[cosmology]
+transfer     = eisenstein
+#transfer     = CLASS 
+#transfer     = eisenstein_wdm
+#WDMmass      = 0.1
+Omega_m      = 0.302
+Omega_b      = 0.045
+Omega_L      = 0.698
+H0           = 70.3
+sigma_8      = 0.811
+nspec        = 0.961
+
+# anisotropic large scale tidal field
+#LSS_aniso_lx = 0.1
+#LSS_aniso_ly = 0.1
+#LSS_aniso_lz = -0.2
+
diff --git a/src/plugins/HDF_IO.hh b/include/HDF_IO.hh
similarity index 99%
rename from src/plugins/HDF_IO.hh
rename to include/HDF_IO.hh
index 965dac9..1b15b34 100755
--- a/src/plugins/HDF_IO.hh
+++ b/include/HDF_IO.hh
@@ -1,5 +1,5 @@
-#ifndef __HDF_IO_HH
-#define __HDF_IO_HH
+#pragma once
+#if defined(USE_HDF5)
 
 #define H5_USE_16_API
 
@@ -193,9 +193,9 @@ inline void HDFReadDataset( const std::string Filename, const std::string ObjNam
 
   int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
   
-  hsize_t dimsize[ndims];
+  std::vector<hsize_t> dimsize(ndims,0);
 
-  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+  H5Sget_simple_extent_dims( HDF_DataspaceID, &dimsize[0], NULL );
 
   HDF_StorageSize = 1;
   for(int i=0; i<ndims; ++i )
@@ -1082,4 +1082,5 @@ inline void HDFWriteGroupAttribute<std::string>( const std::string Filename, con
   H5Gclose( HDF_GroupID );
   H5Fclose( HDF_FileID );
 }
-#endif
+
+#endif // USE_HDF5
diff --git a/include/bounding_box.hh b/include/bounding_box.hh
index db0f481..6b70bcf 100644
--- a/include/bounding_box.hh
+++ b/include/bounding_box.hh
@@ -1,16 +1,16 @@
 #pragma once
 
-#include <vec3.hh>
+#include <math/vec3.hh>
 
 template <typename T>
 struct bounding_box
 {
-    vec3<T> x1_, x2_;
+    vec3_t<T> x1_, x2_;
 
     bounding_box(void)
     { }
 
-    bounding_box( const vec3<T>& x1, const vec3<T>& x2)
+    bounding_box( const vec3_t<T>& x1, const vec3_t<T>& x2)
     : x1_(x1), x2_(x2)
     { }
 
diff --git a/include/cmake_config.hh.in b/include/cmake_config.hh.in
new file mode 100644
index 0000000..03768a3
--- /dev/null
+++ b/include/cmake_config.hh.in
@@ -0,0 +1,34 @@
+#pragma once
+
+constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}";
+
+#define USE_PRECISION_${CODE_PRECISION}
+#if defined(USE_PRECISION_FLOAT)
+  constexpr char CMAKE_PRECISION_STR[] = "single";
+#elif defined(USE_PRECISION_DOUBLE)
+  constexpr char CMAKE_PRECISION_STR[] = "double";
+#elif defined(USE_PRECISION_LONGDOUBLE)
+  constexpr char CMAKE_PRECISION_STR[] = "long double";
+#endif 
+
+#define USE_CONVOLVER_${CONVOLVER_TYPE}
+#if defined(USE_CONVOLVER_ORSZAG)
+  constexpr char CMAKE_CONVOLVER_STR[] = "Orszag3/2";
+#elif defined(USE_CONVOLVER_NAIVE)
+  constexpr char CMAKE_CONVOLVER_STR[] = "Aliased";
+#endif
+
+#if defined(ENABLE_PLT)
+  constexpr char CMAKE_PLT_STR[] = "PLT corr. on";
+#else
+  constexpr char CMAKE_PLT_STR[] = "PLT corr. off";
+#endif
+
+// These variables are autogenerated and compiled
+// into the library by the version.cmake script. do not touch!
+extern "C"
+{
+  extern const char *GIT_TAG;
+  extern const char *GIT_REV;
+  extern const char *GIT_BRANCH;
+}
\ No newline at end of file
diff --git a/include/config_file.hh b/include/config_file.hh
index b0d6401..ab30ea2 100644
--- a/include/config_file.hh
+++ b/include/config_file.hh
@@ -12,20 +12,20 @@
 #include <logger.hh>
 
 /*!
- * @class ConfigFile
+ * @class config_file
  * @brief provides read/write access to configuration options
  *
  * This class provides access to the configuration file. The
  * configuration is stored in hash-pairs and can be queried and
  * validated by the responsible class/routine
  */
-class ConfigFile {
+class config_file {
 
   //! current line number
-  unsigned m_iLine;
+  unsigned iline_;
 
   //! hash table for key/value pairs, stored as strings
-  std::map<std::string, std::string> m_Items;
+  std::map<std::string, std::string> items_;
 
 public:
   //! removes all white space from string source
@@ -59,42 +59,42 @@ public:
    * @param oval the interpreted/converted value
    */
   template <class in_value, class out_value>
-  void Convert(const in_value &ival, out_value &oval) const {
+  void convert(const in_value &ival, out_value &oval) const {
     std::stringstream ss;
     ss << ival; //.. insert value into stream
     ss >> oval; //.. retrieve value from stream
 
     if (!ss.eof()) {
       //.. conversion error
-      csoca::elog << "Error: conversion of \'" << ival << "\' failed."
+      music::elog << "Error: conversion of \'" << ival << "\' failed."
                 << std::endl;
-      throw ErrInvalidConversion(std::string("invalid conversion to ") +
+      throw except_invalid_conversion(std::string("invalid conversion to ") +
                                  typeid(out_value).name() + '.');
     }
   }
 
   //! constructor of class config_file
-  /*! @param FileName the path/name of the configuration file to be parsed
+  /*! @param filename the path/name of the configuration file to be parsed
    */
-  explicit ConfigFile(std::string const &FileName) : m_iLine(0), m_Items() {
-    std::ifstream file(FileName.c_str());
+  explicit config_file(std::string const &filename) : iline_(0), items_() {
+    std::ifstream file(filename.c_str());
 
     if (!file.is_open()){
-      csoca::elog << "Could not open config file \'" << FileName << "\'." << std::endl;
+      music::elog << "Could not open config file \'" << filename << "\'." << std::endl;
       throw std::runtime_error(
-          std::string("Error: Could not open config file \'") + FileName +
+          std::string("Error: Could not open config file \'") + filename +
           std::string("\'"));
     }
 
     std::string line;
     std::string name;
     std::string value;
-    std::string inSection;
-    int posEqual;
-    m_iLine = 0;
+    std::string in_section;
+    int pos_equal;
+    iline_ = 0;
     //.. walk through all lines ..
     while (std::getline(file, line)) {
-      ++m_iLine;
+      ++iline_;
       //.. encounterd EOL ?
       if (!line.length())
         continue;
@@ -106,31 +106,31 @@ public:
 
       //.. encountered section tag ?
       if (line[0] == '[') {
-        inSection = trim(line.substr(1, line.find(']') - 1));
+        in_section = trim(line.substr(1, line.find(']') - 1));
         continue;
       }
 
       //.. seek end of entry name ..
-      posEqual = line.find('=');
-      name = trim(line.substr(0, posEqual));
-      value = trim(line.substr(posEqual + 1));
+      pos_equal = line.find('=');
+      name = trim(line.substr(0, pos_equal));
+      value = trim(line.substr(pos_equal + 1));
 
-      if ((size_t)posEqual == std::string::npos &&
+      if ((size_t)pos_equal == std::string::npos &&
           (name.size() != 0 || value.size() != 0)) {
-        csoca::wlog << "Ignoring non-assignment in " << FileName << ":"
-                  << m_iLine << std::endl;
+        music::wlog << "Ignoring non-assignment in " << filename << ":"
+                  << iline_ << std::endl;
         continue;
       }
 
       if (name.length() == 0 && value.size() != 0) {
-        csoca::wlog << "Ignoring assignment missing entry name in "
-                  << FileName << ":" << m_iLine << std::endl;
+        music::wlog << "Ignoring assignment missing entry name in "
+                  << filename << ":" << iline_ << std::endl;
         continue;
       }
 
       if (value.length() == 0 && name.size() != 0) {
-        csoca::wlog << "Empty entry will be ignored in " << FileName << ":"
-                  << m_iLine << std::endl;
+        music::wlog << "Empty entry will be ignored in " << filename << ":"
+                  << iline_ << std::endl;
         continue;
       }
 
@@ -138,12 +138,12 @@ public:
         continue;
 
       //.. add key/value pair to hash table ..
-      if (m_Items.find(inSection + '/' + name) != m_Items.end()) {
-        csoca::wlog << "Redeclaration overwrites previous value in "
-                  << FileName << ":" << m_iLine << std::endl;
+      if (items_.find(in_section + '/' + name) != items_.end()) {
+        music::wlog << "Redeclaration overwrites previous value in "
+                  << filename << ":" << iline_ << std::endl;
       }
 
-      m_Items[inSection + '/' + name] = value;
+      items_[in_section + '/' + name] = value;
     }
   }
 
@@ -151,8 +151,8 @@ public:
   /*! @param key the key value, usually "section/key"
    *  @param value the value of the key, also a string
    */
-  void InsertValue(std::string const &key, std::string const &value) {
-    m_Items[key] = value;
+  void insert_value(std::string const &key, std::string const &value) {
+    items_[key] = value;
   }
 
   //! inserts a key/value pair in the hash map
@@ -160,9 +160,9 @@ public:
    *  @param key the key value usually "section/key"
    *  @param value the value of the key, also a string
    */
-  void InsertValue(std::string const &section, std::string const &key,
+  void insert_value(std::string const &section, std::string const &key,
                    std::string const &value) {
-    m_Items[section + '/' + key] = value;
+    items_[section + '/' + key] = value;
   }
 
   //! checks if a key is part of the hash map
@@ -170,10 +170,10 @@ public:
    *  @param key the key name to be checked
    *  @return true if the key is present, false otherwise
    */
-  bool ContainsKey(std::string const &section, std::string const &key) {
+  bool contains_key(std::string const &section, std::string const &key) {
     std::map<std::string, std::string>::const_iterator i =
-        m_Items.find(section + '/' + key);
-    if (i == m_Items.end())
+        items_.find(section + '/' + key);
+    if (i == items_.end())
       return false;
     return true;
   }
@@ -182,57 +182,57 @@ public:
   /*! @param key the key name to be checked
    *  @return true if the key is present, false otherwise
    */
-  bool ContainsKey(std::string const &key) {
-    std::map<std::string, std::string>::const_iterator i = m_Items.find(key);
-    if (i == m_Items.end())
+  bool contains_key(std::string const &key) {
+    std::map<std::string, std::string>::const_iterator i = items_.find(key);
+    if (i == items_.end())
       return false;
     return true;
   }
 
   //! return value of a key
-  /*! returns the value of a given key, throws a ErrItemNotFound
+  /*! returns the value of a given key, throws a except_item_not_found
    *  exception if the key is not available in the hash map.
    *  @param key the key name
    *  @return the value of the key
-   *  @sa ErrItemNotFound
+   *  @sa except_item_not_found
    */
-  template <class T> T GetValue(std::string const &key) const {
-    return GetValue<T>("", key);
+  template <class T> T get_value(std::string const &key) const {
+    return get_value<T>("", key);
   }
 
   //! return value of a key
-  /*! returns the value of a given key, throws a ErrItemNotFound
+  /*! returns the value of a given key, throws a except_item_not_found
    *  exception if the key is not available in the hash map.
    *  @param section the section name for the key
    *  @param key the key name
    *  @return the value of the key
-   *  @sa ErrItemNotFound
+   *  @sa except_item_not_found
    */
   template <class T>
-  T GetValueBasic(std::string const &section, std::string const &key) const {
+  T get_value_basic(std::string const &section, std::string const &key) const {
     T r;
     std::map<std::string, std::string>::const_iterator i =
-        m_Items.find(section + '/' + key);
-    if (i == m_Items.end()){
-      throw ErrItemNotFound('\'' + section + '/' + key +
+        items_.find(section + '/' + key);
+    if (i == items_.end()){
+      throw except_item_not_found('\'' + section + '/' + key +
                             std::string("\' not found."));
     }
 
-    Convert(i->second, r);
+    convert(i->second, r);
     return r;
   }
 
   template <class T>
-  T GetValue(std::string const &section, std::string const &key) const
+  T get_value(std::string const &section, std::string const &key) const
   {
     T r;
     try
     {
-      r = GetValueBasic<T>(section, key);
+      r = get_value_basic<T>(section, key);
     }
-    catch (ErrItemNotFound& e)
+    catch (except_item_not_found& e)
     {
-      csoca::elog << e.what() << std::endl;
+      music::elog << e.what() << std::endl;
       throw;
     }
     return r;
@@ -240,40 +240,41 @@ public:
 
   //! exception safe version of getValue
   /*! returns the value of a given key, returns a default value rather
-   *  than a ErrItemNotFound exception if the key is not found.
+   *  than a except_item_not_found exception if the key is not found.
    *  @param section the section name for the key
    *  @param key the key name
    *  @param default_value the value that is returned if the key is not found
    *  @return the key value (if key found) otherwise default_value
    */
   template <class T>
-  T GetValueSafe(std::string const &section, std::string const &key,
+  T get_value_safe(std::string const &section, std::string const &key,
                  T default_value) const {
     T r;
     try {
-      r = GetValueBasic<T>(section, key);
-    } catch (ErrItemNotFound&) {
+      r = get_value_basic<T>(section, key);
+    } catch (except_item_not_found&) {
       r = default_value;
+      music::dlog << "Item \'" << section << "/" << key << " not found in config. Default = \'" << default_value << "\'" << std::endl;
     }
     return r;
   }
 
   //! exception safe version of getValue
   /*! returns the value of a given key, returns a default value rather
-   *  than a ErrItemNotFound exception if the key is not found.
+   *  than a except_item_not_found exception if the key is not found.
    *  @param key the key name
    *  @param default_value the value that is returned if the key is not found
    *  @return the key value (if key found) otherwise default_value
    */
   template <class T>
-  T GetValueSafe(std::string const &key, T default_value) const {
-    return GetValueSafe("", key, default_value);
+  T get_value_safe(std::string const &key, T default_value) const {
+    return get_value_safe("", key, default_value);
   }
 
   //! dumps all key-value pairs to a std::ostream
-  void Dump(std::ostream &out) {
-    std::map<std::string, std::string>::const_iterator i = m_Items.begin();
-    while (i != m_Items.end()) {
+  void dump(std::ostream &out) {
+    std::map<std::string, std::string>::const_iterator i = items_.begin();
+    while (i != items_.end()) {
       if (i->second.length() > 0)
         out << std::setw(24) << std::left << i->first << "  =  " << i->second
             << std::endl;
@@ -281,12 +282,12 @@ public:
     }
   }
 
-  void LogDump(void) {
-    csoca::ilog << "List of all configuration options:" << std::endl;
-    std::map<std::string, std::string>::const_iterator i = m_Items.begin();
-    while (i != m_Items.end()) {
+  void dump_to_log(void) {
+    music::ilog << "List of all configuration options:" << std::endl;
+    std::map<std::string, std::string>::const_iterator i = items_.begin();
+    while (i != items_.end()) {
       if (i->second.length() > 0)
-        csoca::ilog << std::setw(28) << i->first << " = " << i->second
+        music::ilog << std::setw(28) << i->first << " = " << i->second
                   << std::endl;
       ++i;
     }
@@ -295,16 +296,16 @@ public:
   //--- EXCEPTIONS ---
 
   //! runtime error that is thrown if key is not found in getValue
-  class ErrItemNotFound : public std::runtime_error {
+  class except_item_not_found : public std::runtime_error {
   public:
-    ErrItemNotFound(std::string itemname)
+    except_item_not_found(std::string itemname)
         : std::runtime_error(itemname.c_str()) {}
   };
 
   //! runtime error that is thrown if type conversion fails
-  class ErrInvalidConversion : public std::runtime_error {
+  class except_invalid_conversion : public std::runtime_error {
   public:
-    ErrInvalidConversion(std::string errmsg) : std::runtime_error(errmsg) {}
+    except_invalid_conversion(std::string errmsg) : std::runtime_error(errmsg) {}
   };
 
   //! runtime error that is thrown if identifier is not found in keys
@@ -323,14 +324,14 @@ public:
 //...           like "true" and "false" etc.
 //...           converts the string to type bool, returns type bool ...
 template <>
-inline bool ConfigFile::GetValue<bool>(std::string const &strSection,
+inline bool config_file::get_value<bool>(std::string const &strSection,
                                        std::string const &strEntry) const {
-  std::string r1 = GetValue<std::string>(strSection, strEntry);
+  std::string r1 = get_value<std::string>(strSection, strEntry);
   if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1")
     return true;
   if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0")
     return false;
-  csoca::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl;
+  music::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl;
   throw ErrIllegalIdentifier(std::string("Illegal identifier \'") + r1 +
                              std::string("\' in \'") + strEntry +
                              std::string("\'."));
@@ -338,17 +339,17 @@ inline bool ConfigFile::GetValue<bool>(std::string const &strSection,
 }
 
 template <>
-inline bool ConfigFile::GetValueSafe<bool>(std::string const &strSection,
+inline bool config_file::get_value_safe<bool>(std::string const &strSection,
                                            std::string const &strEntry,
                                            bool defaultValue) const {
   std::string r1;
   try {
-    r1 = GetValueBasic<std::string>(strSection, strEntry);
+    r1 = get_value_basic<std::string>(strSection, strEntry);
     if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1")
       return true;
     if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0")
       return false;
-  } catch (ErrItemNotFound&) {
+  } catch (except_item_not_found&) {
     return defaultValue;
   }
   return defaultValue;
@@ -356,7 +357,7 @@ inline bool ConfigFile::GetValueSafe<bool>(std::string const &strSection,
 
 template <>
 inline void
-ConfigFile::Convert<std::string, std::string>(const std::string &ival,
+config_file::convert<std::string, std::string>(const std::string &ival,
                                               std::string &oval) const {
   oval = ival;
 }
diff --git a/include/convolution.hh b/include/convolution.hh
index 2145445..90736b1 100644
--- a/include/convolution.hh
+++ b/include/convolution.hh
@@ -333,7 +333,7 @@ public:
         crecvbuf_ = new ccomplex_t[maxslicesz_ / 2];
         recvbuf_ = reinterpret_cast<real_t *>(&crecvbuf_[0]);
 
-        int ntasks(MPI_Get_size());
+        int ntasks(MPI::get_size());
 
         offsets_.assign(ntasks, 0);
         offsetsp_.assign(ntasks, 0);
@@ -415,12 +415,12 @@ private:
     {
         assert(fp.space_ == kspace_id);
 
-        const double rfac = std::pow(1.5, 1.5);
+        const real_t rfac = std::pow(1.5, 1.5);
 
         fp.zero();
 
 #if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
-        size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
+        const size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
 
 #pragma omp parallel for
         for (size_t i = 0; i < 2 * fp.size(0) / 3; ++i)
@@ -429,10 +429,9 @@ private:
             for (size_t j = 0; j < 2 * fp.size(1) / 3; ++j)
             {
                 size_t jp = (j > nhalf[1]) ? j + nhalf[1] : j;
-                for (size_t k = 0; k < 2 * fp.size(2) / 3; ++k)
+                for (size_t k = 0; k < nhalf[2]+1; ++k)
                 {
                     size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k;
-                    // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
                     fp.kelem(ip, jp, kp) = kfunc(i, j, k) * rfac;
                 }
             }
@@ -445,7 +444,7 @@ private:
         /////////////////////////////////////////////////////////////////////
 
         double tstart = get_wtime();
-        csoca::dlog << "[MPI] Started scatter for convolution" << std::endl;
+        music::dlog << "[MPI] Started scatter for convolution" << std::endl;
 
         //... collect offsets
 
@@ -460,7 +459,10 @@ private:
         size_t slicesz = fbuf_->size(1) * fbuf_->size(3);
 
         MPI_Datatype datatype =
-            (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
+            (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX 
+            : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX 
+            : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX
+            : MPI_BYTE;
 
         // fill MPI send buffer with results of kfunc
 
@@ -587,7 +589,7 @@ private:
         // std::cerr << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! <<<<<"
         // << std::endl;  ofs << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed!
         // <<<<<" << std::endl;
-        csoca::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n",
+        music::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n",
                           get_wtime() - tstart);
 
 #endif /// end of ifdef/ifndef USE_MPI ///////////////////////////////////////////////////////////////
@@ -596,7 +598,7 @@ private:
     template <typename operator_t>
     void unpad(const Grid_FFT<data_t> &fp, operator_t output_op)
     {
-        const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
+        const real_t rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
 
         // make sure we're in Fourier space...
         assert(fp.space_ == kspace_id);
@@ -615,8 +617,11 @@ private:
                 for (size_t k = 0; k < fbuf_->size(2); ++k)
                 {
                     size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k;
-                    // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
                     fbuf_->kelem(i, j, k) = fp.kelem(ip, jp, kp) / rfac;
+                    // zero Nyquist modes since they are not unique after convolution
+                    if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]){
+                        fbuf_->kelem(i, j, k) = 0.0; 
+                    }
                 }
             }
         }
@@ -634,7 +639,7 @@ private:
 
         double tstart = get_wtime();
 
-        csoca::dlog << "[MPI] Started gather for convolution";
+        music::dlog << "[MPI] Started gather for convolution";
 
         MPI_Barrier(MPI_COMM_WORLD);
 
@@ -645,7 +650,10 @@ private:
         size_t slicesz = fp.size(1) * fp.size(3);
 
         MPI_Datatype datatype =
-            (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
+            (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX 
+            : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX 
+            : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX 
+            : MPI_BYTE;
 
         MPI_Status status;
 
@@ -685,7 +693,7 @@ private:
             int recvfrom = 0;
             if (iglobal <= fny[0])
             {
-                real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0;
+                real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0;
 
                 recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
                 MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
@@ -693,7 +701,7 @@ private:
 
                 for (size_t j = 0; j < nf[1]; ++j)
                 {
-                    real_t wj = (j == fny[1]) ? 0.5 : 1.0;
+                    real_t wj = (j == fny[1]) ? 0.0 : 1.0;
                     if (j <= fny[1])
                     {
                         size_t jp = j;
@@ -701,21 +709,22 @@ private:
                         {
                             if (typeid(data_t) == typeid(real_t))
                             {
-                                real_t w = wi * wj;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                                real_t w = wi * wj * wk;
                                 fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                             }
                             else
                             {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                                 real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
-                                {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
+                                // if (w < 1.0)
+                                // {
+                                //     fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
+                                // }
                             }
                         }
                     }
@@ -726,21 +735,22 @@ private:
                         {
                             if (typeid(data_t) == typeid(real_t))
                             {
-                                real_t w = wi * wj;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                                real_t w = wi * wj * wk;
                                 fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                             }
                             else
                             {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                                 real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
-                                {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
+                                // if (w < 1.0)
+                                // {
+                                //     fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
+                                // }
                             }
                         }
                     }
@@ -748,7 +758,7 @@ private:
             }
             if (iglobal >= fny[0])
             {
-                real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0;
+                real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0;
 
                 recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
                 MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom,
@@ -756,29 +766,26 @@ private:
 
                 for (size_t j = 0; j < nf[1]; ++j)
                 {
-                    real_t wj = (j == fny[1]) ? 0.5 : 1.0;
+                    real_t wj = (j == fny[1]) ? 0.0 : 1.0;
                     if (j <= fny[1])
                     {
                         size_t jp = j;
                         for (size_t k = 0; k < nf[2]; ++k)
                         {
+                            const real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                            const real_t w = wi * wj * wk;
                             if (typeid(data_t) == typeid(real_t))
                             {
-                                real_t w = wi * wj;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                                real_t w = wi * wj * wk;
                                 fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                             }
                             else
                             {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
-                                real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
-                                {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
                             }
                         }
                     }
@@ -787,23 +794,18 @@ private:
                         size_t jp = j + fny[1];
                         for (size_t k = 0; k < nf[2]; ++k)
                         {
+                            const real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                            const real_t w = wi * wj * wk;
                             if (typeid(data_t) == typeid(real_t))
                             {
-                                real_t w = wi * wj;
                                 fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                             }
                             else
                             {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
-                                real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
-                                {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
                             }
                         }
                     }
@@ -811,8 +813,8 @@ private:
             }
         }
 
-//... copy data back
-#pragma omp parallel for
+        //... copy data back
+        #pragma omp parallel for
         for (size_t i = 0; i < fbuf_->ntot_; ++i)
         {
             output_op(i, (*fbuf_)[i]);
@@ -831,7 +833,7 @@ private:
 
         MPI_Barrier(MPI_COMM_WORLD);
 
-        csoca::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart);
+        music::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart);
 
 #endif /// end of ifdef/ifndef USE_MPI //////////////////////////////////////////////////////////////
     }
diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index f74039d..6f1fd7f 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -1,25 +1,43 @@
 #pragma once
 
 #include <array>
+#include <vec.hh>
 
 #include <cosmology_parameters.hh>
+#include <physical_constants.hh>
 #include <transfer_function_plugin.hh>
+#include <math/ode_integrate.hh>
 #include <logger.hh>
 
+#include <math/interpolate.hh>
+
 #include <gsl/gsl_integration.h>
+// #include <gsl/gsl_spline.h>
 #include <gsl/gsl_errno.h>
 
+namespace cosmology
+{
+
 /*!
- * @class CosmologyCalculator
+ * @class cosmology::calculator
  * @brief provides functions to compute cosmological quantities
  *
  * This class provides member functions to compute cosmological quantities
  * related to the Friedmann equations and linear perturbation theory
  */
-class CosmologyCalculator
+class calculator
 {
+public:
+    //! data structure to store cosmological parameters
+    cosmology::parameters cosmo_param_;
+
+    //! pointer to an instance of a transfer function plugin
+    std::unique_ptr<TransferFunction_plugin> transfer_function_;
+
 private:
-    static constexpr double REL_PRECISION = 1e-5;
+    static constexpr double REL_PRECISION = 1e-10;
+    interpolated_function_1d<true,true,false> D_of_a_, f_of_a_, a_of_D_;
+    double Dnow_, Dplus_start_, Dplus_target_, astart_, atarget_;
 
     real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const
     {
@@ -39,167 +57,207 @@ private:
         gsl_set_error_handler(NULL);
 
         if (error / result > REL_PRECISION)
-            csoca::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl;
+            music::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl;
 
         return (real_t)result;
     }
 
+    void compute_growth( std::vector<double>& tab_a, std::vector<double>& tab_D, std::vector<double>& tab_f )
+    {
+        using v_t = vec_t<3, double>;
+
+        // set ICs
+        const double a0 = 1e-10;
+        const double D0 = a0;
+        const double Dprime0 = 2.0 * D0 * H_of_a(a0) / std::pow(phys_const::c_SI, 2);
+        const double t0 = 1.0 / (a0 * H_of_a(a0));
+
+        v_t y0({a0, D0, Dprime0});
+
+        // set up integration
+        double dt = 1e-9;
+        double dtdid, dtnext;
+        const double amax = 2.0;
+
+        v_t yy(y0);
+        double t = t0;
+        const double eps = 1e-10;
+
+        while (yy[0] < amax)
+        {
+            // RHS of ODEs
+            auto rhs = [&](double t, v_t y) -> v_t {
+                auto a = y[0];
+                auto D = y[1];
+                auto Dprime = y[2];
+                v_t dy;
+                // da/dtau = a^2 H(a)
+                dy[0] = a * a * H_of_a(a);
+                // d D/dtau
+                dy[1] = Dprime;
+                // d^2 D / dtau^2
+                dy[2] = -a * H_of_a(a) * Dprime + 3.0 / 2.0 * cosmo_param_.Omega_m * std::pow(cosmo_param_.H0, 2) * D / a;
+                return dy;
+            };
+
+            // scale by predicted value to get approx. constant fractional errors
+            v_t yyscale = yy.abs() + dt * rhs(t, yy).abs();
+            
+            // call integrator
+            ode_integrate::rk_step_qs(dt, t, yy, yyscale, rhs, eps, dtdid, dtnext);
+
+            tab_a.push_back(yy[0]);
+            tab_D.push_back(yy[1]);
+            tab_f.push_back(yy[2]);
+
+            dt = dtnext;
+        }
+
+        // compute f, before we stored here D'
+        for (size_t i = 0; i < tab_a.size(); ++i)
+        {
+            tab_f[i] = tab_f[i] / (tab_a[i] * H_of_a(tab_a[i]) * tab_D[i]);
+            tab_D[i] = tab_D[i];
+            tab_a[i] = tab_a[i];
+        }
+    }
+
 public:
-    //! data structure to store cosmological parameters
-    CosmologyParameters cosmo_param_;
-
-    //! pointer to an instance of a transfer function plugin
-    //TransferFunction_plugin *ptransfer_fun_;
-    std::unique_ptr<TransferFunction_plugin> transfer_function_;
-
-
+    calculator() = delete;
+    calculator(const calculator& c) = delete;
     //! constructor for a cosmology calculator object
     /*!
 	 * @param acosmo a cosmological parameters structure
 	 * @param pTransferFunction pointer to an instance of a transfer function object
 	 */
 
-    explicit CosmologyCalculator(ConfigFile &cf)
-    : cosmo_param_(cf)
-    {   
+    explicit calculator(config_file &cf)
+        : cosmo_param_(cf), astart_( 1.0/(1.0+cf.get_value<double>("setup","zstart")) ),
+            atarget_( 1.0/(1.0+cf.get_value_safe<double>("cosmology","ztarget",1./astart_-1.)))
+    {
+        // pre-compute growth factors and store for interpolation
+        std::vector<double> tab_a, tab_D, tab_f;
+        this->compute_growth(tab_a, tab_D, tab_f);
+        D_of_a_.set_data(tab_a,tab_D);
+        f_of_a_.set_data(tab_a,tab_f);
+        a_of_D_.set_data(tab_D,tab_a);
+        Dnow_ = D_of_a_(1.0);
+
+        Dplus_start_ = D_of_a_( astart_ ) / Dnow_;
+        Dplus_target_ = D_of_a_( atarget_ ) / Dnow_;
+
+        // set up transfer functions and compute normalisation
         transfer_function_ = std::move(select_TransferFunction_plugin(cf));
         transfer_function_->intialise();
-        cosmo_param_.pnorm = this->ComputePNorm();
+        if( !transfer_function_->tf_isnormalised_ )
+            cosmo_param_.pnorm = this->compute_pnorm_from_sigma8();
+        else{
+            cosmo_param_.pnorm = 1.0/Dplus_target_/Dplus_target_;
+            auto sigma8 = this->compute_sigma8();
+            music::ilog << "Measured sigma_8 for given PS normalisation is " <<  sigma8 << std::endl;
+        }
         cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm);
-        csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" << " : " << (transfer_function_->tf_is_distinct()? "yes" : "no") << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "TF maximum wave number" << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
+
+        music::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons"
+                    << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl;
+        music::ilog << std::setw(32) << std::left << "TF maximum wave number"
+                    << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
+    }
+
+    ~calculator()
+    {
     }
 
     //! Write out a correctly scaled power spectrum at time a
-    void WritePowerspectrum( real_t a, std::string fname ) const
+    void write_powerspectrum(real_t a, std::string fname) const
     {
-        const real_t Dplus0 = this->CalcGrowthFactor(a) / this->CalcGrowthFactor(1.0);
+        // const real_t Dplus0 = this->get_growth_factor(a);
 
-        if( CONFIG::MPI_task_rank==0 )
+        if (CONFIG::MPI_task_rank == 0)
         {
-            double kmin = std::max(1e-4,transfer_function_->get_kmin());
+            double kmin = std::max(1e-4, transfer_function_->get_kmin());
 
             // write power spectrum to a file
             std::ofstream ofs(fname.c_str());
-            std::stringstream ss; ss << " (a=" << a <<")";
+            std::stringstream ss;
+            ss << " ,ap=" << a << "";
             ofs << "# " << std::setw(18) << "k [h/Mpc]"
-                        << std::setw(20) << ("P_dtot(k)"+ss.str()) 
-                        << std::setw(20) << ("P_dcdm(k)"+ss.str())
-                        << std::setw(20) << ("P_dbar(k)"+ss.str())
-                        << std::setw(20) << ("P_dtot(K) (a=1)")
-                        << std::setw(20) << ("P_tcdm(k)"+ss.str()) 
-                        << std::setw(20) << ("P_tbar(k)"+ss.str())
-                        << std::endl;
-            for( double k=kmin; k<transfer_function_->get_kmax(); k*=1.05 ){
+                << std::setw(20) << ("P_dtot(k,a=ap)")
+                << std::setw(20) << ("P_dcdm(k,a=ap)")
+                << std::setw(20) << ("P_dbar(k,a=ap)")
+                << std::setw(20) << ("P_tcdm(k,a=ap)")
+                << std::setw(20) << ("P_tbar(k,a=ap)")
+                << std::setw(20) << ("P_dtot(k,a=1)")
+                << std::setw(20) << ("P_dcdm(k,a=1)")
+                << std::setw(20) << ("P_dbar(k,a=1)")
+                << std::setw(20) << ("P_tcdm(k,a=1)")
+                << std::setw(20) << ("P_tbar(k,a=1)")
+                << std::setw(20) << ("P_dtot(K,a=1)")
+                << std::endl;
+            for (double k = kmin; k < transfer_function_->get_kmax(); k *= 1.05)
+            {
                 ofs << std::setw(20) << std::setprecision(10) << k
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon) * Dplus0, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total)*Dplus_start_, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm)*Dplus_start_, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon)*Dplus_start_, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm)*Dplus_start_, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon)*Dplus_start_, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vtotal), 2.0)
                     << std::endl;
             }
         }
-
-        csoca::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl;
+        music::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl;
     }
 
-    const CosmologyParameters &GetParams(void) const
+    const cosmology::parameters &get_parameters(void) const noexcept
     {
         return cosmo_param_;
     }
 
-    //! returns the amplitude of amplitude of the power spectrum
-    /*!
-	 * @param k the wave number in h/Mpc
-	 * @param a the expansion factor of the universe
-	 * @returns power spectrum amplitude for wave number k at time a
-	 */
-    inline real_t Power(real_t k, real_t a)
+    //! return the value of the Hubble function H(a) = dloga/dt 
+    inline double H_of_a(double a) const noexcept
     {
-        real_t Dplus = CalcGrowthFactor(a);
-        real_t DplusOne = CalcGrowthFactor(1.0);
-        real_t pNorm = ComputePNorm();
-        Dplus /= DplusOne;
-        DplusOne = 1.0;
-        real_t scale = Dplus / DplusOne;
-        return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect);
+        double HH2 = 0.0;
+        HH2 += cosmo_param_.Omega_r / (a * a * a * a);
+        HH2 += cosmo_param_.Omega_m / (a * a * a);
+        HH2 += cosmo_param_.Omega_k / (a * a);
+        HH2 += cosmo_param_.Omega_DE * std::pow(a, -3. * (1. + cosmo_param_.w_0 + cosmo_param_.w_a)) * exp(-3. * (1.0 - a) * cosmo_param_.w_a);
+        return cosmo_param_.H0 * std::sqrt(HH2);
     }
 
-    inline static double H_of_a(double a, void *Params)
+    //! Computes the linear theory growth factor D+, normalised to D+(a=1)=1
+    real_t get_growth_factor(real_t a) const noexcept
     {
-        CosmologyParameters *cosm = (CosmologyParameters *)Params;
-        double a2 = a * a;
-        double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a));
-        return Ha;
+        return D_of_a_(a) / Dnow_;
     }
 
-    inline static double Hprime_of_a(double a, void *Params) 
+    //! Computes the inverse of get_growth_factor
+    real_t get_a( real_t Dplus ) const noexcept
     {
-        CosmologyParameters *cosm = (CosmologyParameters *)Params;
-        double a2 = a * a;
-        double H = H_of_a(a, Params);
-        double Hprime = 1 / (a * H) * (-1.5 * cosm->Omega_m / (a2 * a) - cosm->Omega_k / a2 - 1.5 * cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a) * (1. + cosm->w_0 + (1. - a) * cosm->w_a));
-        return Hprime;
+        return a_of_D_( Dplus * Dnow_ );
     }
 
-    //! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+
-    inline static double GrowthIntegrand(double a, void *Params) 
+    //! Computes the linear theory growth rate f
+    /*! Function computes (by interpolating on precalculated table)
+     *   f = dlog D+ / dlog a
+     */
+    real_t get_f(real_t a) const noexcept
     {
-        double Ha = a * H_of_a(a, Params);
-        return 2.5 / (Ha * Ha * Ha);
-    }
-
-    //! integrand function for Calc_fPeebles
-	/*!
-	 * @sa Calc_fPeebles
-	 */
-	inline static double fIntegrand( double a, void *Params )
-	{
-		CosmologyParameters *cosm = (CosmologyParameters *)Params;
-		double y = cosm->Omega_m*(1.0/a-1.0) + cosm->Omega_DE*(a*a-1.0) + 1.0;
-		return 1.0/pow(y,1.5);
-	}
-	
-	//! calculates d log D+/d log a
-	/*! this version follows the Peebles (TBD: add citation)
-	 *  formula to compute Bertschinger's vfact
-	 */
-	inline real_t CalcGrowthRate( real_t a )
-	{
-        #warning CalcGrowthRate is only correct if dark energy is a cosmological constant, need to upgrade calculator...
-		real_t y = cosmo_param_.Omega_m*(1.0/a-1.0) + cosmo_param_.Omega_DE*(a*a-1.0) + 1.0;
-		real_t fact = integrate( &fIntegrand, 1e-6, a, (void*)&cosmo_param_ );
-		return (cosmo_param_.Omega_DE*a*a-0.5*cosmo_param_.Omega_m/a)/y - 1.0 + a*fIntegrand(a,(void*)&cosmo_param_)/fact;
-	}
-
-    //! Computes the linear theory growth factor D+
-    /*! Function integrates over member function GrowthIntegrand and computes
-    *                      /a
-    *   D+(a) = 5/2 H(a) * |  [a'^3 * H(a')^3]^(-1) da'
-    *                      /0
-    */
-    real_t CalcGrowthFactor(real_t a) const
-    {
-        real_t integral = integrate(&GrowthIntegrand, 0.0, a, (void *)&cosmo_param_);
-        return H_of_a(a, (void *)&cosmo_param_) * integral;
+        return f_of_a_(a);
     }
 
     //! Compute the factor relating particle displacement and velocity
     /*! Function computes
-    *
-    *  vfac = a^2 * H(a) * dlogD+ / d log a = a^2 * H'(a) + 5/2 * [ a * D+(a) * H(a) ]^(-1)
-    *
-    */
-    real_t CalcVFact(real_t a) const
+     *  vfac = a * (H(a)/h) * dlogD+ / dlog a 
+     */
+    real_t get_vfact(real_t a) const noexcept
     {
-        real_t Dp = CalcGrowthFactor(a);
-        real_t H = H_of_a(a, (void *)&cosmo_param_);
-        real_t Hp = Hprime_of_a(a, (void *)&cosmo_param_);
-        real_t a2 = a * a;
-
-        return (a2 * Hp + 2.5 / (a * Dp * H)) * 100.0;
+        return f_of_a_(a) * a * H_of_a(a) / cosmo_param_.h;
     }
 
     //! Integrand for the sigma_8 normalization of the power spectrum
@@ -210,8 +268,8 @@ public:
         if (k <= 0.0)
             return 0.0f;
 
-        CosmologyCalculator *pcc = reinterpret_cast<CosmologyCalculator*>(pParams);
-        
+        cosmology::calculator *pcc = reinterpret_cast<cosmology::calculator *>(pParams);
+
         double x = k * 8.0;
         double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
         static double nspect = (double)pcc->cosmo_param_.nspect;
@@ -229,8 +287,8 @@ public:
         if (k <= 0.0)
             return 0.0f;
 
-        CosmologyCalculator *pcc = reinterpret_cast<CosmologyCalculator*>(pParams);
-       
+        cosmology::calculator *pcc = reinterpret_cast<cosmology::calculator *>(pParams);
+
         double x = k * 8.0;
         double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
         static double nspect = (double)pcc->cosmo_param_.nspect;
@@ -240,24 +298,12 @@ public:
         return k * k * w * w * pow((double)k, (double)nspect) * tf * tf;
     }
 
-    //! Computes the square of the transfer function
-    /*! Function evaluates the supplied transfer function ptransfer_fun_
-	 * and returns the square of its value at wave number k
-	 * @param k wave number at which to evaluate the transfer function
-	 */
-    inline real_t TransferSq(real_t k) const
-    {
-        //.. parameter supplied transfer function
-        real_t tf1 = transfer_function_->compute(k, total);
-        return tf1 * tf1;
-    }
-
     //! Computes the amplitude of a mode from the power spectrum
     /*! Function evaluates the supplied transfer function ptransfer_fun_
 	 * and returns the amplitude of fluctuations at wave number k at z=0
 	 * @param k wave number at which to evaluate
 	 */
-    inline real_t GetAmplitude(real_t k, tf_type type) const
+    inline real_t get_amplitude(real_t k, tf_type type) const
     {
         return std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm;
     }
@@ -267,18 +313,30 @@ public:
 	 * integrates the power spectrum to fix the normalization to that given
 	 * by the sigma_8 parameter
 	 */
-    real_t ComputePNorm(void)
+    real_t compute_sigma8(void)
     {
         real_t sigma0, kmin, kmax;
         kmax = transfer_function_->get_kmax();
         kmin = transfer_function_->get_kmin();
 
         if (!transfer_function_->tf_has_total0())
-            sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this );
-        else
-            sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this );
+            sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this);
+        else{
+            sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this);
+        }
 
-        return cosmo_param_.sigma8 * cosmo_param_.sigma8 / sigma0;
+        return std::sqrt(sigma0);
+    }
+
+    //! Computes the normalization for the power spectrum
+    /*!
+	 * integrates the power spectrum to fix the normalization to that given
+	 * by the sigma_8 parameter
+	 */
+    real_t compute_pnorm_from_sigma8(void)
+    {
+        auto measured_sigma8 = this->compute_sigma8();
+        return cosmo_param_.sigma8 * cosmo_param_.sigma8 / (measured_sigma8  * measured_sigma8);
     }
 };
 
@@ -293,4 +351,6 @@ inline double jeans_sound_speed(double rho, double mass)
 {
     const double G = 6.67e-8;
     return pow(6.0 * mass / M_PI * sqrt(rho) * pow(G, 1.5), 1.0 / 3.0);
-}
\ No newline at end of file
+}
+
+} // namespace cosmology
\ No newline at end of file
diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh
index 0c4efcd..0d3a3ad 100644
--- a/include/cosmology_parameters.hh
+++ b/include/cosmology_parameters.hh
@@ -1,10 +1,21 @@
 #pragma once
+/*******************************************************************************\
+ cosmology_parameters.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************************/
 
+#include <physical_constants.hh>
 #include <config_file.hh>
 
-//! structure for cosmological parameters
-struct CosmologyParameters
+namespace cosmology
 {
+//! structure for cosmological parameters
+struct parameters
+{
+
     double
         Omega_m,  //!< baryon+dark matter density
         Omega_b,  //!< baryon matter density
@@ -12,38 +23,88 @@ struct CosmologyParameters
         Omega_r,  //!< photon + relativistic particle density
         Omega_k,  //!< curvature density
         H0,       //!< Hubble constant in km/s/Mpc
+        h,        //!< hubble parameter
         nspect,   //!< long-wave spectral index (scale free is nspect=1)
         sigma8,   //!< power spectrum normalization
+        Tcmb,     //!< CMB temperature (used to set Omega_r)
+        Neff,     //!< effective number of neutrino species (used to set Omega_r)
         w_0,      //!< dark energy equation of state parameter 1: w = w0 + a * wa
         w_a,      //!< dark energy equation of state parameter 2: w = w0 + a * wa
 
         // below are helpers to store additional information
-        dplus, //!< linear perturbation growth factor
-        pnorm, //!< actual power spectrum normalisation factor
+        dplus,     //!< linear perturbation growth factor
+        f,         //!< growth factor logarithmic derivative
+        pnorm,     //!< actual power spectrum normalisation factor
         sqrtpnorm, //!< sqrt of power spectrum normalisation factor
-        vfact; //!< velocity<->displacement conversion factor in Zel'dovich approx.
+        vfact;     //!< velocity<->displacement conversion factor in Zel'dovich approx.
 
-    explicit CosmologyParameters(ConfigFile cf)
+    parameters() = delete;
+    
+    parameters( const parameters& ) = default;
+    
+    explicit parameters(config_file cf)
     {
-        Omega_b = cf.GetValue<double>("cosmology", "Omega_b");
-        Omega_m = cf.GetValue<double>("cosmology", "Omega_m");
-        Omega_DE = cf.GetValue<double>("cosmology", "Omega_L");
-        w_0 = cf.GetValueSafe<double>("cosmology", "w0", -1.0);
-        w_a = cf.GetValueSafe<double>("cosmology", "wa", 0.0);
+        H0 = cf.get_value<double>("cosmology", "H0");
+        h  = H0 / 100.0;
 
-        Omega_r = cf.GetValueSafe<double>("cosmology", "Omega_r", 0.0); // no longer default to nonzero (8.3e-5)
+        nspect = cf.get_value<double>("cosmology", "nspec");
+
+        Omega_b = cf.get_value<double>("cosmology", "Omega_b");
+
+        Omega_m = cf.get_value<double>("cosmology", "Omega_m");
+
+        Omega_DE = cf.get_value<double>("cosmology", "Omega_L");
+
+        w_0 = cf.get_value_safe<double>("cosmology", "w0", -1.0);
+
+        w_a = cf.get_value_safe<double>("cosmology", "wa", 0.0);
+
+        Tcmb = cf.get_value_safe<double>("cosmology", "Tcmb", 2.7255);
+
+        Neff = cf.get_value_safe<double>("cosmology", "Neff", 3.046);
+
+        sigma8 = cf.get_value<double>("cosmology", "sigma_8");
+
+        // calculate energy density in ultrarelativistic species from Tcmb and Neff
+        double Omega_gamma = 4 * phys_const::sigma_SI / std::pow(phys_const::c_SI, 3) * std::pow(Tcmb, 4.0) / phys_const::rhocrit_h2_SI / (h * h);
+        double Omega_nu = Neff * Omega_gamma * 7. / 8. * std::pow(4. / 11., 4. / 3.);
+        Omega_r = Omega_gamma + Omega_nu;
+
+        if (cf.get_value_safe<bool>("cosmology", "ZeroRadiation", false))
+        {
+            Omega_r = 0.0;
+        }
+#if 1
+        // assume zero curvature, take difference from dark energy
+        Omega_DE += 1.0 - Omega_m - Omega_DE - Omega_r;
+        Omega_k  = 0.0;
+#else
+        // allow for curvature 
         Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r;
-
-        H0 = cf.GetValue<double>("cosmology", "H0");
-        sigma8 = cf.GetValue<double>("cosmology", "sigma_8");
-        nspect = cf.GetValue<double>("cosmology", "nspec");
+#endif
 
         dplus = 0.0;
         pnorm = 0.0;
         vfact = 0.0;
+
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << "Cosmological parameters are: " << std::endl;
+        music::ilog << " H0       = " << std::setw(16) << H0          << "sigma_8  = " << std::setw(16) << sigma8 << std::endl;
+        music::ilog << " Omega_c  = " << std::setw(16) << Omega_m-Omega_b << "Omega_b  = " << std::setw(16) << Omega_b << std::endl;
+        if (!cf.get_value_safe<bool>("cosmology", "ZeroRadiation", false)){
+            music::ilog << " Omega_g  = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl;
+        }else{
+            music::ilog << " Omega_r  = " << std::setw(16) << Omega_r << std::endl;
+        }
+        music::ilog << " Omega_DE = " << std::setw(16) << Omega_DE    << "nspect   = " << std::setw(16) << nspect << std::endl;
+        music::ilog << " w0       = " << std::setw(16) << w_0         << "w_a      = " << std::setw(16) << w_a << std::endl;
+
+        if( Omega_r > 0.0 )
+        {
+            music::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally."<< std::endl;
+            music::wlog << "Make sure your sim code supports this..." << std::endl;
+        }
     }
 
-    CosmologyParameters(void)
-    {
-    }
-};
\ No newline at end of file
+};
+} // namespace cosmology
\ No newline at end of file
diff --git a/include/general.hh b/include/general.hh
index 71e521a..88eb2f7 100644
--- a/include/general.hh
+++ b/include/general.hh
@@ -7,24 +7,49 @@
 
 #if defined(USE_MPI)
 #include <mpi.h>
-  #include <fftw3-mpi.h>
+#include <fftw3-mpi.h>
 #else
-  #include <fftw3.h>
+#include <fftw3.h>
 #endif
 
-#ifdef USE_SINGLEPRECISION
+#include <config_file.hh>
+
+#define _unused(x) ((void)(x))
+
+// include CMake controlled configuration settings
+#include <cmake_config.hh>
+
+#if defined(USE_PRECISION_FLOAT)
 using real_t = float;
 using complex_t = fftwf_complex;
 #define FFTW_PREFIX fftwf
-#else
+#elif defined(USE_PRECISION_DOUBLE)
 using real_t = double;
 using complex_t = fftw_complex;
 #define FFTW_PREFIX fftw
+#elif defined(USE_PRECISION_LONGDOUBLE)
+using real_t = long double;
+using complex_t = fftwl_complex;
+#define FFTW_PREFIX fftwl
 #endif
 
-enum class fluid_component { density, vx, vy, vz, dx, dy, dz };
-enum class cosmo_species { dm, baryon, neutrino };
-extern std::map<cosmo_species,std::string> cosmo_species_name;
+enum class fluid_component
+{
+  density,
+  vx,
+  vy,
+  vz,
+  dx,
+  dy,
+  dz
+};
+enum class cosmo_species
+{
+  dm,
+  baryon,
+  neutrino
+};
+extern std::map<cosmo_species, std::string> cosmo_species_name;
 
 using ccomplex_t = std::complex<real_t>;
 
@@ -45,52 +70,64 @@ using fftw_plan_t = FFTW_GEN_NAME(FFTW_PREFIX, plan);
 #if defined(USE_MPI)
 inline double get_wtime()
 {
-    return MPI_Wtime();
+  return MPI_Wtime();
 }
 
-inline int MPI_Get_rank( void ){
-    int rank, ret;
-    ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	assert( ret==MPI_SUCCESS );
-    return rank;
-}
-
-inline int MPI_Get_size( void ){
-    int size, ret;
-    ret = MPI_Comm_size(MPI_COMM_WORLD, &size);
-	assert( ret==MPI_SUCCESS );
-    return size;
-}
-
-template<typename T>
-MPI_Datatype GetMPIDatatype( void )
+namespace MPI
 {
-  if( typeid(T) == typeid(std::complex<float>) )
-    return MPI_COMPLEX;
-  
-  if( typeid(T) == typeid(std::complex<double>) )
-    return MPI_DOUBLE_COMPLEX;
 
-  if( typeid(T) == typeid(int) )
+inline int get_rank(void)
+{
+  int rank, ret;
+  ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  assert(ret == MPI_SUCCESS);
+  _unused(ret);
+  return rank;
+}
+
+inline int get_size(void)
+{
+  int size, ret;
+  ret = MPI_Comm_size(MPI_COMM_WORLD, &size);
+  assert(ret == MPI_SUCCESS);
+  _unused(ret);
+  return size;
+}
+
+template <typename T>
+inline MPI_Datatype get_datatype(void)
+{
+  if (typeid(T) == typeid(std::complex<float>))
+    return MPI_C_FLOAT_COMPLEX;
+
+  if (typeid(T) == typeid(std::complex<double>))
+    return MPI_C_DOUBLE_COMPLEX;
+
+  if (typeid(T) == typeid(std::complex<long double>))
+    return MPI_C_LONG_DOUBLE_COMPLEX;
+
+  if (typeid(T) == typeid(int))
     return MPI_INT;
 
-  if( typeid(T) == typeid(unsigned) )
+  if (typeid(T) == typeid(unsigned))
     return MPI_UNSIGNED;
 
-  if( typeid(T) == typeid(float) )
+  if (typeid(T) == typeid(float))
     return MPI_FLOAT;
 
-  if( typeid(T) == typeid(double) )
+  if (typeid(T) == typeid(double))
     return MPI_DOUBLE;
 
-  if( typeid(T) == typeid(char) )
+  if (typeid(T) == typeid(long double))
+    return MPI_LONG_DOUBLE;
+
+  if (typeid(T) == typeid(char))
     return MPI_CHAR;
 
   abort();
-
 }
 
-inline std::string GetMPIversion( void )
+inline std::string get_version(void)
 {
   int len;
   char mpi_lib_ver[MPI_MAX_LIBRARY_VERSION_STRING];
@@ -98,33 +135,31 @@ inline std::string GetMPIversion( void )
   MPI_Get_library_version(mpi_lib_ver, &len);
   return std::string(mpi_lib_ver);
 }
-
+} // namespace MPI
 
 #else
-  #if defined(_OPENMP)
-    #include <omp.h>
-    inline double get_wtime()
-    {
-      return omp_get_wtime();
-    }
-  #else
-    #include <ctime>
-    inline double get_wtime()
-    {
-      return std::clock() / double(CLOCKS_PER_SEC);
-    }
-  #endif
+#if defined(_OPENMP)
+#include <omp.h>
+inline double get_wtime()
+{
+  return omp_get_wtime();
+}
+#else
+#include <ctime>
+inline double get_wtime()
+{
+  return std::clock() / double(CLOCKS_PER_SEC);
+}
+#endif
 #endif
 
-inline void multitask_sync_barrier( void )
+inline void multitask_sync_barrier(void)
 {
 #if defined(USE_MPI)
-  MPI_Barrier( MPI_COMM_WORLD );
+  MPI_Barrier(MPI_COMM_WORLD);
 #endif
 }
 
-
-
 namespace CONFIG
 {
 extern int MPI_thread_support;
@@ -134,14 +169,4 @@ extern bool MPI_ok;
 extern bool MPI_threads_ok;
 extern bool FFTW_threads_ok;
 extern int num_threads;
-} // namespace CONFIG
-
-
-// These variables are autogenerated and compiled
-// into the library by the version.cmake script
-extern "C"
-{
-    extern const char* GIT_TAG;
-    extern const char* GIT_REV;
-    extern const char* GIT_BRANCH;
-}
\ No newline at end of file
+} // namespace CONFIG
\ No newline at end of file
diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index dcb3cb4..e07e5ed 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -4,7 +4,7 @@
 #include <array>
 #include <vector>
 
-#include <vec3.hh>
+#include <math/vec3.hh>
 #include <general.hh>
 #include <bounding_box.hh>
 #include <typeinfo>
@@ -16,22 +16,26 @@ enum space_t
 };
 
 
-template <typename data_t>
+#ifdef USE_MPI
+template <typename data_t_, bool bdistributed=true>
+#else
+template <typename data_t_, bool bdistributed=false>
+#endif
 class Grid_FFT
 {
+public:
+    using data_t = data_t_;
+    static constexpr bool is_distributed_trait{bdistributed};
+
 protected:
-#if defined(USE_MPI)
-    const MPI_Datatype MPI_data_t_type = (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
-                                                                            : (typeid(data_t) == typeid(float)) ? MPI_FLOAT
-                                                                                                                : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
-                                                                                                                                                                  : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX : MPI_INT;
-#endif
+    using grid_fft_t = Grid_FFT<data_t,bdistributed>;
+    
 public:
     std::array<size_t, 3> n_, nhalf_;
     std::array<size_t, 4> sizes_;
     size_t npr_, npc_;
     size_t ntot_;
-    std::array<real_t, 3> length_, kfac_, dx_;
+    std::array<real_t, 3> length_, kfac_, kny_, dx_;
 
     space_t space_;
     data_t *data_;
@@ -54,7 +58,7 @@ public:
     }
 
     // avoid implicit copying of data
-    Grid_FFT(const Grid_FFT<data_t> &g) = delete;
+    Grid_FFT(const grid_fft_t &g) = delete;
 
     ~Grid_FFT()
     {
@@ -64,34 +68,48 @@ public:
         }
     }
 
-    const Grid_FFT<data_t> *get_grid(size_t ilevel) const { return this; }
+    const grid_fft_t *get_grid(size_t ilevel) const { return this; }
+
+    bool is_distributed( void ) const noexcept { return bdistributed; }
 
     void Setup();
 
+    //! return the number of data_t elements that we store in the container
+    size_t memsize( void ) const noexcept { return ntot_; }
+
     //! return the (local) size of dimension i
-    size_t size(size_t i) const { return sizes_[i]; }
+    size_t size(size_t i) const noexcept { assert(i<4); return sizes_[i]; }
 
     //! return the (global) size of dimension i
-    size_t global_size(size_t i) const { return n_[i]; }
+    size_t global_size(size_t i) const noexcept { assert(i<3); return n_[i]; }
 
     //! return locally stored number of elements of field
-    size_t local_size(void) const { return local_0_size_ * n_[1] * n_[2]; }
+    size_t local_size(void) const noexcept { return local_0_size_ * n_[1] * n_[2]; }
 
     //! return a bounding box of the global extent of the field
-    const bounding_box<size_t> &get_global_range(void) const
+    const bounding_box<size_t> &get_global_range(void) const noexcept
     {
         return global_range_;
     }
 
+    bool is_nyquist_mode( size_t i, size_t j, size_t k ) const
+    {
+        assert( this->space_ == kspace_id );
+        bool bres = (i+local_1_start_ == n_[1]/2);
+        bres |= (j == n_[0]/2);
+        bres |= (k == n_[2]/2);
+        return bres;
+    }
+
     //! set all field elements to zero
-    void zero()
+    void zero() noexcept
     {
 #pragma omp parallel for
         for (size_t i = 0; i < ntot_; ++i)
             data_[i] = 0.0;
     }
 
-    void copy_from(const Grid_FFT<data_t> &g)
+    void copy_from(const grid_fft_t &g)
     {
         // make sure the two fields are in the same space
         if (g.space_ != this->space_)
@@ -113,49 +131,49 @@ public:
             data_[i] = g.data_[i];
     }
 
-    data_t &operator[](size_t i)
+    data_t &operator[](size_t i) noexcept
     {
         return data_[i];
     }
 
-    data_t &relem(size_t i, size_t j, size_t k)
+    data_t &relem(size_t i, size_t j, size_t k) noexcept 
     {
         size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
         return data_[idx];
     }
 
-    const data_t &relem(size_t i, size_t j, size_t k) const
+    const data_t &relem(size_t i, size_t j, size_t k) const noexcept
     {
         size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
         return data_[idx];
     }
 
-    ccomplex_t &kelem(size_t i, size_t j, size_t k)
+    ccomplex_t &kelem(size_t i, size_t j, size_t k) noexcept
     {
         size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
         return cdata_[idx];
     }
 
-    const ccomplex_t &kelem(size_t i, size_t j, size_t k) const
+    const ccomplex_t &kelem(size_t i, size_t j, size_t k) const noexcept
     {
         size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
         return cdata_[idx];
     }
 
-    ccomplex_t &kelem(size_t idx) { return cdata_[idx]; }
-    const ccomplex_t &kelem(size_t idx) const { return cdata_[idx]; }
-    data_t &relem(size_t idx) { return data_[idx]; }
-    const data_t &relem(size_t idx) const { return data_[idx]; }
+    ccomplex_t &kelem(size_t idx) noexcept { return cdata_[idx]; }
+    const ccomplex_t &kelem(size_t idx) const noexcept { return cdata_[idx]; }
+    data_t &relem(size_t idx) noexcept { return data_[idx]; }
+    const data_t &relem(size_t idx) const noexcept { return data_[idx]; }
 
-    size_t get_idx(size_t i, size_t j, size_t k) const
+    size_t get_idx(size_t i, size_t j, size_t k) const noexcept
     {
         return (i * sizes_[1] + j) * sizes_[3] + k;
     }
 
     template <typename ft>
-    vec3<ft> get_r(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_r(const size_t i, const size_t j, const size_t k) const noexcept
     {
-        vec3<ft> rr;
+        vec3_t<ft> rr;
 
         rr[0] = real_t(i + local_0_start_) * dx_[0];
         rr[1] = real_t(j) * dx_[1];
@@ -165,9 +183,9 @@ public:
     }
 
     template <typename ft>
-    vec3<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const noexcept
     {
-        vec3<ft> rr;
+        vec3_t<ft> rr;
 
         rr[0] = real_t(i + local_0_start_) / real_t(n_[0]);
         rr[1] = real_t(j) / real_t(n_[1]);
@@ -177,91 +195,155 @@ public:
     }
 
     template <typename ft>
-    vec3<ft> get_unit_r_staggered(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t<real_t> s) const noexcept
     {
-        vec3<ft> rr;
+        vec3_t<ft> rr;
 
-        rr[0] = (real_t(i + local_0_start_) + 0.5) / real_t(n_[0]);
-        rr[1] = (real_t(j) + 0.5) / real_t(n_[1]);
-        rr[2] = (real_t(k) + 0.5) / real_t(n_[2]);
+        rr[0] = (real_t(i + local_0_start_) + s.x) / real_t(n_[0]);
+        rr[1] = (real_t(j) + s.y) / real_t(n_[1]);
+        rr[2] = (real_t(k) + s.z) / real_t(n_[2]);
 
         return rr;
     }
 
-    template <typename ft>
-    vec3<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, double sx, double sy, double sz) const
+    vec3_t<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const noexcept
     {
-        vec3<ft> rr;
-
-        rr[0] = (real_t(i + local_0_start_) + sx) / real_t(n_[0]);
-        rr[1] = (real_t(j) + sy) / real_t(n_[1]);
-        rr[2] = (real_t(k) + sz) / real_t(n_[2]);
-
-        return rr;
+        return vec3_t<size_t>({i + local_0_start_, j, k});
     }
 
-    void cell_pos(int ilevel, size_t i, size_t j, size_t k, double *x) const
-    {
-        x[0] = double(i + local_0_start_) / size(0);
-        x[1] = double(j) / size(1);
-        x[2] = double(k) / size(2);
-    }
-
-    vec3<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const
-    {
-        return vec3<size_t>({i + local_0_start_, j, k});
-    }
-
-    size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const
+    size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const noexcept
     {
         return ((i + local_0_start_) * size(1) + j) * size(2) + k;
     }
 
-    size_t count_leaf_cells(int, int) const
+    //! deprecated function, was needed for old output plugin
+    size_t count_leaf_cells(int, int) const noexcept
     {
         return n_[0] * n_[1] * n_[2];
     }
 
-    real_t get_dx(int idim) const
+    real_t get_dx(int idim) const noexcept
     {
+        assert(idim<3&&idim>=0);
         return dx_[idim];
     }
 
-    const std::array<real_t, 3> &get_dx(void) const
+    const std::array<real_t, 3> &get_dx(void) const noexcept
     {
         return dx_;
     }
 
     template <typename ft>
-    vec3<ft> get_k(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_k(const size_t i, const size_t j, const size_t k) const noexcept
     {
-        vec3<ft> kk;
-
-#if defined(USE_MPI)
-        auto ip = i + local_1_start_;
-        kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
-        kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1];
-#else
-        kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
-        kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
-#endif
+        vec3_t<ft> kk;
+        if( bdistributed ){
+            auto ip = i + local_1_start_;
+            kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
+            kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1];
+        }else{
+            kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
+            kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
+        }
         kk[2] = (real_t(k) - real_t(k > nhalf_[2]) * n_[2]) * kfac_[2];
 
         return kk;
     }
 
+    template <typename ft>
+    vec3_t<ft> get_k(const real_t i, const real_t j, const real_t k) const noexcept
+    {
+        vec3_t<ft> kk;
+        if( bdistributed ){
+            auto ip = i + real_t(local_1_start_);
+            kk[0] = (j - real_t(j > real_t(nhalf_[0])) * n_[0]) * kfac_[0];
+            kk[1] = (ip - real_t(ip > real_t(nhalf_[1])) * n_[1]) * kfac_[1];
+        }else{
+            kk[0] = (real_t(i) - real_t(i > real_t(nhalf_[0])) * n_[0]) * kfac_[0];
+            kk[1] = (real_t(j) - real_t(j > real_t(nhalf_[1])) * n_[1]) * kfac_[1];
+        }
+        kk[2] = (real_t(k) - real_t(k > real_t(nhalf_[2])) * n_[2]) * kfac_[2];
+
+        return kk;
+    }
+
+    std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const noexcept
+    {
+        return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
+    }
+
+    data_t get_cic( const vec3_t<real_t>& v ) const noexcept
+    {
+        // warning! this doesn't work with MPI
+        vec3_t<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
+                        std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
+                        std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] });
+        size_t ix = static_cast<size_t>(x.x);
+        size_t iy = static_cast<size_t>(x.y);
+        size_t iz = static_cast<size_t>(x.z);
+        real_t dx = x.x-real_t(ix), tx = 1.0-dx;
+        real_t dy = x.y-real_t(iy), ty = 1.0-dy;
+        real_t dz = x.z-real_t(iz), tz = 1.0-dz;
+        size_t ix1 = (ix+1)%n_[0];
+        size_t iy1 = (iy+1)%n_[1];
+        size_t iz1 = (iz+1)%n_[2];
+        data_t val = 0.0;
+        val += this->relem(ix ,iy ,iz ) * tx * ty * tz;
+        val += this->relem(ix ,iy ,iz1) * tx * ty * dz;
+        val += this->relem(ix ,iy1,iz ) * tx * dy * tz;
+        val += this->relem(ix ,iy1,iz1) * tx * dy * dz;
+        val += this->relem(ix1,iy ,iz ) * dx * ty * tz;
+        val += this->relem(ix1,iy ,iz1) * dx * ty * dz;
+        val += this->relem(ix1,iy1,iz ) * dx * dy * tz;
+        val += this->relem(ix1,iy1,iz1) * dx * dy * dz;
+        return val;
+    }
+
+    ccomplex_t get_cic_kspace( const vec3_t<real_t> x ) const noexcept
+    {
+        // warning! this doesn't work with MPI
+        int ix = static_cast<int>(std::floor(x.x));
+        int iy = static_cast<int>(std::floor(x.y));
+        int iz = static_cast<int>(std::floor(x.z));
+        real_t dx = x.x-real_t(ix), tx = 1.0-dx;
+        real_t dy = x.y-real_t(iy), ty = 1.0-dy;
+        real_t dz = x.z-real_t(iz), tz = 1.0-dz;
+        size_t ix1 = (ix+1)%size(0);
+        size_t iy1 = (iy+1)%size(1);
+        size_t iz1 = std::min((iz+1),int(size(2))-1);
+        ccomplex_t val = 0.0;
+        val += this->kelem(ix ,iy ,iz ) * tx * ty * tz;
+        val += this->kelem(ix ,iy ,iz1) * tx * ty * dz;
+        val += this->kelem(ix ,iy1,iz ) * tx * dy * tz;
+        val += this->kelem(ix ,iy1,iz1) * tx * dy * dz;
+        val += this->kelem(ix1,iy ,iz ) * dx * ty * tz;
+        val += this->kelem(ix1,iy ,iz1) * dx * ty * dz;
+        val += this->kelem(ix1,iy1,iz ) * dx * dy * tz;
+        val += this->kelem(ix1,iy1,iz1) * dx * dy * dz;
+        // if( val != val ){
+           //auto k = this->get_k<real_t>(ix,iy,iz);
+           //std::cerr << ix << " " << iy << " " << iz << " " << val << " " <<  this->gradient(0,{ix,iy,iz}) << " " <<  this->gradient(1,{ix,iy,iz}) << " " <<  this->gradient(2,{ix,iy,iz}) << std::endl;
+        // }
+        return val;
+    }
+
     inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
     {
-#if defined(USE_MPI)
-        ijk[0] += local_1_start_;
-        std::swap(ijk[0],ijk[1]);
-#endif
+        if( bdistributed ){
+            ijk[0] += local_1_start_;
+            std::swap(ijk[0],ijk[1]);
+        }
         real_t rgrad = 
             (ijk[idim]!=nhalf_[idim])? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_[idim]) * n_[idim]) * kfac_[idim] : 0.0; 
         return ccomplex_t(0.0,rgrad);
     }
 
-    Grid_FFT<data_t> &operator*=(data_t x)
+    inline real_t laplacian( const std::array<size_t,3>& ijk ) const noexcept
+    {
+        return -this->get_k<real_t>(ijk[0],ijk[1],ijk[2]).norm_squared();
+    }
+
+    grid_fft_t &operator*=(data_t x)
     {
         if (space_ == kspace_id)
         {
@@ -274,7 +356,7 @@ public:
         return *this;
     }
 
-    Grid_FFT<data_t> &operator/=(data_t x)
+    grid_fft_t &operator/=(data_t x)
     {
         if (space_ == kspace_id)
         {
@@ -287,7 +369,7 @@ public:
         return *this;
     }
 
-    Grid_FFT<data_t> &apply_Laplacian(void)
+    grid_fft_t &apply_Laplacian(void)
     {
         this->FourierTransformForward();
         this->apply_function_k_dep([&](auto x, auto k) {
@@ -298,7 +380,7 @@ public:
         return *this;
     }
 
-    Grid_FFT<data_t> &apply_negative_Laplacian(void)
+    grid_fft_t &apply_negative_Laplacian(void)
     {
         this->FourierTransformForward();
         this->apply_function_k_dep([&](auto x, auto k) {
@@ -309,7 +391,7 @@ public:
         return *this;
     }
 
-    Grid_FFT<data_t> &apply_InverseLaplacian(void)
+    grid_fft_t &apply_InverseLaplacian(void)
     {
         this->FourierTransformForward();
         this->apply_function_k_dep([&](auto x, auto k) {
@@ -354,11 +436,10 @@ public:
         }
     }
 
-    double compute_2norm(void)
+    real_t compute_2norm(void) const
     {
         real_t sum1{0.0};
-#pragma omp parallel for reduction(+ \
-                                   : sum1)
+        #pragma omp parallel for reduction(+ : sum1)
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -377,60 +458,60 @@ public:
         return sum1;
     }
 
-    double std(void)
+    real_t std(void) const
     {
         double sum1{0.0}, sum2{0.0};
         size_t count{0};
 
-#pragma omp parallel for reduction(+ \
-                                   : sum1, sum2)
+        #pragma omp parallel for reduction(+ : sum1, sum2)
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
             {
                 for (size_t k = 0; k < sizes_[2]; ++k)
                 {
-                    const auto elem = std::real(this->relem(i, j, k));
-                    sum1 += elem;
-                    sum2 += elem * elem;
+                    const auto elem = (space_==kspace_id)? this->kelem(i, j, k) : this->relem(i, j, k);
+                    sum1 += std::real(elem);
+                    sum2 += std::norm(elem);// * elem;
                 }
             }
         }
         count = sizes_[0] * sizes_[1] * sizes_[2];
 
 #ifdef USE_MPI
-        double globsum1{0.0}, globsum2{0.0};
-        size_t globcount{0};
+        if( bdistributed ){
+            double globsum1{0.0}, globsum2{0.0};
+            size_t globcount{0};
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
-                      reinterpret_cast<void *>(&globsum1),
-                      1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
+                        reinterpret_cast<void *>(&globsum1),
+                        1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&sum2),
-                      reinterpret_cast<void *>(&globsum2),
-                      1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&sum2),
+                        reinterpret_cast<void *>(&globsum2),
+                        1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&count),
-                      reinterpret_cast<void *>(&globcount),
-                      1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&count),
+                        reinterpret_cast<void *>(&globcount),
+                        1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
 
-        sum1 = globsum1;
-        sum2 = globsum2;
-        count = globcount;
+            sum1 = globsum1;
+            sum2 = globsum2;
+            count = globcount;
+        }
 #endif
         sum1 /= count;
         sum2 /= count;
 
-        return std::sqrt(sum2 - sum1 * sum1);
+        return real_t(std::sqrt(sum2 - sum1 * sum1));
     }
 
-    double mean(void)
+    real_t mean(void) const
     {
         double sum1{0.0};
         size_t count{0};
 
-#pragma omp parallel for reduction(+ \
-                                   : sum1)
+        #pragma omp parallel for reduction(+ : sum1)
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -445,32 +526,34 @@ public:
         count = sizes_[0] * sizes_[1] * sizes_[2];
 
 #ifdef USE_MPI
-        double globsum1{0.0};
-        size_t globcount{0};
+        if( bdistributed ){
+            double globsum1{0.0};
+            size_t globcount{0};
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
-                      reinterpret_cast<void *>(&globsum1),
-                      1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
+                        reinterpret_cast<void *>(&globsum1),
+                        1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&count),
-                      reinterpret_cast<void *>(&globcount),
-                      1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&count),
+                        reinterpret_cast<void *>(&globcount),
+                        1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
 
-        sum1 = globsum1;
-        count = globcount;
+            sum1 = globsum1;
+            count = globcount;
+        }
 #endif
 
         sum1 /= count;
 
-        return sum1;
+        return real_t(sum1);
     }
 
     template <typename functional, typename grid_t>
     void assign_function_of_grids_r(const functional &f, const grid_t &g)
     {
-        assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
+        assert(g.size(0) == size(0) && g.size(1) == size(1)); 
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -489,10 +572,10 @@ public:
     template <typename functional, typename grid1_t, typename grid2_t>
     void assign_function_of_grids_r(const functional &f, const grid1_t &g1, const grid2_t &g2)
     {
-        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g1.size(2) == size(2));
-        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
+        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); 
+        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); 
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -518,7 +601,7 @@ public:
         assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
         assert(g3.size(0) == size(0) && g3.size(1) == size(1)); // && g3.size(2) == size(2));
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -543,7 +626,7 @@ public:
     {
         assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -565,7 +648,7 @@ public:
         assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
         assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -582,18 +665,39 @@ public:
         }
     }
 
-    template <typename functional, typename grid1_t, typename grid2_t>
-    void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2)
+    template <typename functional, typename grid_t>
+    void assign_function_of_grids_kdep(const functional &f, const grid_t &g)
     {
-        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
-        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
+        assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
             {
                 for (size_t k = 0; k < sizes_[2]; ++k)
+                {
+                    auto &elem = this->kelem(i, j, k);
+                    const auto &elemg = g.kelem(i, j, k);
+
+                    elem = f(this->get_k<real_t>(i, j, k), elemg);
+                }
+            }
+        }
+    }
+
+    template <typename functional, typename grid1_t, typename grid2_t>
+    void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2)
+    {
+        assert(g1.size(0) == size(0) && g1.size(1) == size(1) && g1.size(2) == size(2) );
+        assert(g2.size(0) == size(0) && g2.size(1) == size(1) && g2.size(2) == size(2) );
+
+        #pragma omp parallel for
+        for (size_t i = 0; i < size(0); ++i)
+        {
+            for (size_t j = 0; j < size(1); ++j)
+            {
+                for (size_t k = 0; k < size(2); ++k)
                 {
                     auto &elem = this->kelem(i, j, k);
                     const auto &elemg1 = g1.kelem(i, j, k);
@@ -608,7 +712,7 @@ public:
     template <typename functional>
     void apply_function_k_dep(const functional &f)
     {
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -625,7 +729,7 @@ public:
     template <typename functional>
     void apply_function_r_dep(const functional &f)
     {
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -649,48 +753,31 @@ public:
 
     void Write_to_HDF5(std::string fname, std::string datasetname) const;
 
+    void Read_from_HDF5( std::string fname, std::string datasetname );
+
     void Write_PowerSpectrum(std::string ofname);
 
     void Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count);
 
     void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3);
 
-    // void stagger_field(void)
-    // {
-    //     FourierTransformForward();
-    //     apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
-    //         real_t shift = k[0] * get_dx()[0] + k[1] * get_dx()[1] + k[2] * get_dx()[2];
-    //         return x * std::exp(ccomplex_t(0.0, 0.5 * shift));
-    //     });
-    //     FourierTransformBackward();
-    // }
-
-    void shift_field( double sx, double sy, double sz )
+    void shift_field( const vec3_t<real_t>& s, bool transform_back=true )
     {
         FourierTransformForward();
         apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
-#ifdef WITH_MPI
-            real_t shift = sy * k[0] * get_dx()[0] + sx * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2];
-#else
-            real_t shift = sx * k[0] * get_dx()[0] + sy * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2];
-#endif
+            real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
             return x * std::exp(ccomplex_t(0.0, shift));
         });
-        FourierTransformBackward();
-    }
-
-    void stagger_field(void)
-    {
-        this->shift_field( 0.5, 0.5, 0.5 );
+        if( transform_back ){
+            FourierTransformBackward();
+        }
     }
 
     void zero_DC_mode(void)
     {
         if (space_ == kspace_id)
         {
-#ifdef USE_MPI
-            if (CONFIG::MPI_task_rank == 0)
-#endif
+            if (CONFIG::MPI_task_rank == 0 || !bdistributed )
                 cdata_[0] = (data_t)0.0;
         }
         else
@@ -707,12 +794,14 @@ public:
                     }
                 }
             }
+            if( bdistributed ){
 #if defined(USE_MPI)
-            data_t glob_sum = 0.0;
-            MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
-                          1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
-            sum = glob_sum;
+                data_t glob_sum = 0.0;
+                MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
+                            1, MPI::get_datatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
+                sum = glob_sum;
 #endif
+            }
             sum /= sizes_[0] * sizes_[1] * sizes_[2];
 
 #pragma omp parallel for
diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh
new file mode 100644
index 0000000..5304fab
--- /dev/null
+++ b/include/grid_interpolate.hh
@@ -0,0 +1,191 @@
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include <general.hh>
+
+#include <math/vec3.hh>
+
+template <int interp_order, typename grid_t>
+struct grid_interpolate
+{
+  using data_t = typename grid_t::data_t;
+  using vec3 = std::array<real_t, 3>;
+
+  static constexpr bool is_distributed_trait = grid_t::is_distributed_trait;
+  static constexpr int interpolation_order = interp_order;
+
+  std::vector<data_t> boundary_;
+  std::vector<int> local0starts_;
+  const grid_t &gridref;
+  size_t nx_, ny_, nz_;
+
+  explicit grid_interpolate(const grid_t &g)
+      : gridref(g), nx_(g.n_[0]), ny_(g.n_[1]), nz_(g.n_[2])
+  {
+    static_assert(interpolation_order >= 0 && interpolation_order <= 2, "Interpolation order needs to be 0 (NGP), 1 (CIC), or 2 (TSC).");
+
+    if (is_distributed_trait)
+    {
+      update_ghosts( g );
+    }
+  }
+
+  void update_ghosts( const grid_t &g )
+  {
+  #if defined(USE_MPI)
+
+    int local_0_start = int(gridref.local_0_start_);
+    local0starts_.assign(MPI::get_size(), 0);
+
+    MPI_Allgather(&local_0_start, 1, MPI_INT, &local0starts_[0], 1, MPI_INT, MPI_COMM_WORLD);
+
+    //... exchange boundary
+    size_t nx = interpolation_order + 1;
+    size_t ny = g.n_[1];
+    size_t nz = g.n_[2];
+
+    boundary_.assign(nx * ny * nz, data_t{0.0});
+
+    for (size_t i = 0; i < nx; ++i)
+    {
+      for (size_t j = 0; j < ny; ++j)
+      {
+        for (size_t k = 0; k < nz; ++k)
+        {
+          boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k);
+        }
+      }
+    }
+
+    int sendto = (MPI::get_rank() + MPI::get_size() - 1) % MPI::get_size();
+    int recvfrom = (MPI::get_rank() + MPI::get_size() + 1) % MPI::get_size();
+
+    MPI_Status status;
+    status.MPI_ERROR = MPI_SUCCESS;
+
+    int err = MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype<data_t>(), sendto,
+                          MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status);
+
+    if( err != MPI_SUCCESS ){
+      char errstr[256]; int errlen=256;
+      MPI_Error_string(err, errstr, &errlen ); 
+      music::elog << "MPI_ERROR #" << err << " : " << errstr << std::endl;
+    }
+#endif
+  }
+
+  data_t get_ngp_at(const std::array<real_t, 3> &pos, std::vector<data_t> &val) const noexcept
+  {
+    size_t ix = static_cast<size_t>(pos[0]);
+    size_t iy = static_cast<size_t>(pos[1]);
+    size_t iz = static_cast<size_t>(pos[2]);
+    return gridref.relem(ix - gridref.local_0_start_, iy, iz);
+  }
+
+  data_t get_cic_at(const std::array<real_t, 3> &pos) const noexcept
+  {
+    size_t ix = static_cast<size_t>(pos[0]);
+    size_t iy = static_cast<size_t>(pos[1]);
+    size_t iz = static_cast<size_t>(pos[2]);
+    real_t dx = pos[0] - real_t(ix), tx = 1.0 - dx;
+    real_t dy = pos[1] - real_t(iy), ty = 1.0 - dy;
+    real_t dz = pos[2] - real_t(iz), tz = 1.0 - dz;
+    size_t iy1 = (iy + 1) % ny_;
+    size_t iz1 = (iz + 1) % nz_;
+
+    data_t val{0.0};
+    
+    if( is_distributed_trait ){
+      ptrdiff_t localix = ix-gridref.local_0_start_;
+      val += gridref.relem(localix, iy, iz) * tx * ty * tz;
+      val += gridref.relem(localix, iy, iz1) * tx * ty * dz;
+      val += gridref.relem(localix, iy1, iz) * tx * dy * tz;
+      val += gridref.relem(localix, iy1, iz1) * tx * dy * dz;
+
+      if( localix+1 >= gridref.local_0_size_ ){
+        size_t localix1 = localix+1 - gridref.local_0_size_;
+        val += boundary_[(localix1*ny_+iy)*nz_+iz] * dx * ty * tz;
+        val += boundary_[(localix1*ny_+iy)*nz_+iz1] * dx * ty * dz;
+        val += boundary_[(localix1*ny_+iy1)*nz_+iz] * dx * dy * tz;
+        val += boundary_[(localix1*ny_+iy1)*nz_+iz1] * dx * dy * dz;
+      }else{
+        size_t localix1 = localix+1;
+        val += gridref.relem(localix1, iy, iz) * dx * ty * tz;
+        val += gridref.relem(localix1, iy, iz1) * dx * ty * dz;
+        val += gridref.relem(localix1, iy1, iz) * dx * dy * tz;
+        val += gridref.relem(localix1, iy1, iz1) * dx * dy * dz;
+      }
+    }else{
+      size_t ix1 = (ix + 1) % nx_;
+      val += gridref.relem(ix, iy, iz) * tx * ty * tz;
+      val += gridref.relem(ix, iy, iz1) * tx * ty * dz;
+      val += gridref.relem(ix, iy1, iz) * tx * dy * tz;
+      val += gridref.relem(ix, iy1, iz1) * tx * dy * dz;
+      val += gridref.relem(ix1, iy, iz) * dx * ty * tz;
+      val += gridref.relem(ix1, iy, iz1) * dx * ty * dz;
+      val += gridref.relem(ix1, iy1, iz) * dx * dy * tz;
+      val += gridref.relem(ix1, iy1, iz1) * dx * dy * dz;
+    }
+    return val;
+  }
+
+  // data_t get_tsc_at(const std::array<real_t, 3> &pos, std::vector<data_t> &val) const
+  // {
+  // }
+
+  int get_task(const vec3 &x) const noexcept
+  {
+    const auto it = std::upper_bound(local0starts_.begin(), local0starts_.end(), int(x[0]));
+    return std::distance(local0starts_.begin(), it)-1;
+  }
+
+  void domain_decompose_pos(std::vector<vec3> &pos) const noexcept
+  {
+    if (is_distributed_trait)
+    {
+#if defined(USE_MPI)
+      std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1) < get_task(x2); });
+      std::vector<int> sendcounts(MPI::get_size(), 0), sendoffsets(MPI::get_size(), 0);
+      std::vector<int> recvcounts(MPI::get_size(), 0), recvoffsets(MPI::get_size(), 0);
+      for (auto x : pos)
+      {
+        sendcounts[get_task(x)] += 3;
+      }
+
+      MPI_Alltoall(&sendcounts[0], 1, MPI_INT, &recvcounts[0], 1, MPI_INT, MPI_COMM_WORLD);
+
+      size_t tot_receive = recvcounts[0], tot_send = sendcounts[0];
+      for (int i = 1; i < MPI::get_size(); ++i)
+      {
+        sendoffsets[i] = sendcounts[i - 1] + sendoffsets[i - 1];
+        recvoffsets[i] = recvcounts[i - 1] + recvoffsets[i - 1];
+        tot_receive += recvcounts[i];
+        tot_send += sendcounts[i];
+      }
+
+      std::vector<vec3> recvbuf(tot_receive/3,{0.,0.,0.});
+
+      MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI::get_datatype<real_t>(),
+                    &recvbuf[0], &recvcounts[0], &recvoffsets[0], MPI::get_datatype<real_t>(), MPI_COMM_WORLD);
+
+      pos.swap( recvbuf );
+#endif
+    }
+  }
+
+  ccomplex_t compensation_kernel( const vec3_t<real_t>& k ) const noexcept
+  {
+    auto sinc = []( real_t x ){ return (std::abs(x)>1e-10)? std::sin(x)/x : 1.0; };
+    real_t dfx = sinc(0.5*M_PI*k[0]/gridref.kny_[0]);
+    real_t dfy = sinc(0.5*M_PI*k[1]/gridref.kny_[1]);
+    real_t dfz = sinc(0.5*M_PI*k[2]/gridref.kny_[2]);
+    real_t del = std::pow(dfx*dfy*dfz,1+interpolation_order);
+
+    real_t shift = 0.5 * k[0] * gridref.get_dx()[0] + 0.5 * k[1] * gridref.get_dx()[1] + 0.5 * k[2] * gridref.get_dx()[2];
+
+    return std::exp(ccomplex_t(0.0, shift)) / del;
+  }
+
+};
\ No newline at end of file
diff --git a/include/ic_generator.hh b/include/ic_generator.hh
index 59471b0..3a637e8 100644
--- a/include/ic_generator.hh
+++ b/include/ic_generator.hh
@@ -9,12 +9,12 @@
 
 namespace ic_generator{
 
-    int Run( ConfigFile& the_config );
+    int Run( config_file& the_config );
     
-    int Initialise( ConfigFile& the_config );
+    int Initialise( config_file& the_config );
 
     extern std::unique_ptr<RNG_plugin> the_random_number_generator;
     extern std::unique_ptr<output_plugin> the_output_plugin;
-    extern std::unique_ptr<CosmologyCalculator>  the_cosmo_calc;
+    extern std::unique_ptr<cosmology::calculator>  the_cosmo_calc;
 
 }
diff --git a/include/logger.hh b/include/logger.hh
index 41fc287..e13012f 100644
--- a/include/logger.hh
+++ b/include/logger.hh
@@ -6,35 +6,35 @@
 #include <fstream>
 #include <iostream>
 
-namespace csoca {
+namespace music {
 
-enum LogLevel : int {
-  Off     = 0,
-  Fatal   = 1,
-  Error   = 2,
-  Warning = 3,
-  Info    = 4,
-  Debug   = 5
+enum log_level : int {
+  off     = 0,
+  fatal   = 1,
+  error   = 2,
+  warning = 3,
+  info    = 4,
+  debug   = 5
 };
 
-class Logger {
+class logger {
 private:
-  static LogLevel log_level_;
+  static log_level log_level_;
   static std::ofstream output_file_;
 
 public:
-  Logger()  = default;
-  ~Logger() = default;
+  logger()  = default;
+  ~logger() = default;
 
-  static void SetLevel(const LogLevel &level);
-  static LogLevel GetLevel();
+  static void set_level(const log_level &level);
+  static log_level get_level();
 
-  static void SetOutput(const std::string filename);
-  static void UnsetOutput();
+  static void set_output(const std::string filename);
+  static void unset_output();
 
-  static std::ofstream &GetOutput();
+  static std::ofstream &get_output();
 
-  template <typename T> Logger &operator<<(const T &item) {
+  template <typename T> logger &operator<<(const T &item) {
     std::cout << item;
     if (output_file_.is_open()) {
       output_file_ << item;
@@ -42,7 +42,7 @@ public:
     return *this;
   }
 
-  Logger &operator<<(std::ostream &(*fp)(std::ostream &)) {
+  logger &operator<<(std::ostream &(*fp)(std::ostream &)) {
     std::cout << fp;
     if (output_file_.is_open()) {
       output_file_ << fp;
@@ -51,32 +51,32 @@ public:
   }
 };
 
-class LogStream {
+class log_stream {
 private:
-  Logger &logger_;
-  LogLevel stream_level_;
+  logger &logger_;
+  log_level stream_level_;
   std::string line_prefix_, line_postfix_;
 
   bool newline;
 
 public:
-  LogStream(Logger &logger, const LogLevel &level)
+  log_stream(logger &logger, const log_level &level)
     : logger_(logger), stream_level_(level), newline(true) {
     switch (stream_level_) {
-      case LogLevel::Fatal:
+      case log_level::fatal:
         line_prefix_ = "\033[31mFatal : ";
         break;
-      case LogLevel::Error:
+      case log_level::error:
         line_prefix_ = "\033[31mError : ";
         break;
-      case LogLevel::Warning:
+      case log_level::warning:
         line_prefix_ = "\033[33mWarning : ";
         break;
-      case LogLevel::Info:
+      case log_level::info:
         //line_prefix_ = " | Info    | ";
         line_prefix_ = " \033[0m";
         break;
-      case LogLevel::Debug:
+      case log_level::debug:
         line_prefix_ = "Debug : \033[0m";
         break;
       default:
@@ -85,14 +85,14 @@ public:
     }
     line_postfix_ = "\033[0m";
   }
-  ~LogStream() = default;
+  ~log_stream() = default;
 
   inline std::string GetPrefix() const {
     return line_prefix_;
   }
 
-  template <typename T> LogStream &operator<<(const T &item) {
-    if (Logger::GetLevel() >= stream_level_) {
+  template <typename T> log_stream &operator<<(const T &item) {
+    if (logger::get_level() >= stream_level_) {
       if (newline) {
         logger_ << line_prefix_;
         newline = false;
@@ -102,8 +102,8 @@ public:
     return *this;
   }
 
-  LogStream &operator<<(std::ostream &(*fp)(std::ostream &)) {
-    if (Logger::GetLevel() >= stream_level_) {
+  log_stream &operator<<(std::ostream &(*fp)(std::ostream &)) {
+    if (logger::get_level() >= stream_level_) {
       logger_ << fp;
       logger_ << line_postfix_;
       newline = true;
@@ -125,11 +125,11 @@ public:
 };
 
 // global instantiations for different levels
-extern Logger glogger;
-extern LogStream flog;
-extern LogStream elog;
-extern LogStream wlog;
-extern LogStream ilog;
-extern LogStream dlog;
+extern logger glogger;
+extern log_stream flog;
+extern log_stream elog;
+extern log_stream wlog;
+extern log_stream ilog;
+extern log_stream dlog;
 
-} // namespace csoca
+} // namespace music
diff --git a/include/math/interpolate.hh b/include/math/interpolate.hh
new file mode 100644
index 0000000..41fe8d4
--- /dev/null
+++ b/include/math/interpolate.hh
@@ -0,0 +1,68 @@
+#pragma once
+
+#include <vector>
+#include <cassert>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_errno.h>
+
+template <bool logx, bool logy, bool periodic>
+class interpolated_function_1d
+{
+
+private:
+  bool isinit_;
+  std::vector<double> data_x_, data_y_;
+  gsl_interp_accel *gsl_ia_;
+  gsl_spline *gsl_sp_;
+
+  void deallocate()
+  {
+    gsl_spline_free(gsl_sp_);
+    gsl_interp_accel_free(gsl_ia_);
+  }
+
+public:
+  interpolated_function_1d(const interpolated_function_1d &) = delete;
+
+  interpolated_function_1d() : isinit_(false){}
+
+  interpolated_function_1d(const std::vector<double> &data_x, const std::vector<double> &data_y)
+  : isinit_(false)
+  {
+    this->set_data( data_x, data_y );
+  }
+
+  ~interpolated_function_1d()
+  {
+    if (isinit_) this->deallocate();
+  }
+
+  void set_data(const std::vector<double> &data_x, const std::vector<double> &data_y)
+  {
+    data_x_ = data_x;
+    data_y_ = data_y;
+    
+    assert(data_x_.size() == data_y_.size());
+    assert(data_x_.size() > 5);
+    assert(!(logx & periodic));
+
+    if (logx) for (auto &d : data_x_) d = std::log(d);
+    if (logy) for (auto &d : data_y_) d = std::log(d);
+
+    if (isinit_) this->deallocate();
+
+    gsl_ia_ = gsl_interp_accel_alloc();
+    gsl_sp_ = gsl_spline_alloc(periodic ? gsl_interp_cspline_periodic : gsl_interp_cspline, data_x_.size());
+    gsl_spline_init(gsl_sp_, &data_x_[0], &data_y_[0], data_x_.size());
+
+    isinit_ = true;
+  }
+
+  double operator()(double x) const noexcept
+  {
+    assert( isinit_ && !(logx&&x<=0.0) );
+    double xa = logx ? std::log(x) : x;
+    double y(gsl_spline_eval(gsl_sp_, xa, gsl_ia_));
+    return logy ? std::exp(y) : y;
+  }
+};
\ No newline at end of file
diff --git a/include/math/mat3.hh b/include/math/mat3.hh
new file mode 100644
index 0000000..75458ea
--- /dev/null
+++ b/include/math/mat3.hh
@@ -0,0 +1,146 @@
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_eigen.h>
+
+#include <math/vec3.hh>
+
+template<typename T>
+class mat3_t{
+protected:
+    std::array<T,9> data_;
+    gsl_matrix_view m_;
+    gsl_vector *eval_;
+    gsl_matrix *evec_;
+	gsl_eigen_symmv_workspace * wsp_;
+    bool bdid_alloc_gsl_;
+						
+    void init_gsl(){
+        // allocate memory for GSL operations if we haven't done so yet
+        if( !bdid_alloc_gsl_ )
+        {
+            m_ = gsl_matrix_view_array (&data_[0], 3, 3);
+            eval_ = gsl_vector_alloc (3);
+            evec_ = gsl_matrix_alloc (3, 3);
+            wsp_ = gsl_eigen_symmv_alloc (3);
+            bdid_alloc_gsl_ = true;
+        }
+    }
+
+    void free_gsl(){
+        // free memory for GSL operations if it was allocated
+        if( bdid_alloc_gsl_ )
+        {
+            gsl_eigen_symmv_free (wsp_);
+            gsl_vector_free (eval_);
+            gsl_matrix_free (evec_);
+        }
+    }
+
+public:
+
+    mat3_t()
+    : bdid_alloc_gsl_(false) 
+    {}
+
+    //! copy constructor
+    mat3_t( const mat3_t<T> &m)
+    : data_(m.data_), bdid_alloc_gsl_(false) 
+    {}
+    
+    //! move constructor
+    mat3_t( mat3_t<T> &&m)
+    : data_(std::move(m.data_)), bdid_alloc_gsl_(false) 
+    {}
+
+    //! construct mat3_t from initializer list
+    template<typename ...E>
+    mat3_t(E&&...e) 
+    : data_{{std::forward<E>(e)...}}, bdid_alloc_gsl_(false)
+    {}
+
+    mat3_t<T>& operator=(const mat3_t<T>& m) noexcept{
+        data_ = m.data_;
+        return *this;
+    }
+
+    mat3_t<T>& operator=(const mat3_t<T>&& m) noexcept{
+        data_ = std::move(m.data_);
+        return *this;
+    }
+
+    //! destructor
+    ~mat3_t(){
+        this->free_gsl();
+    }
+    
+    //! bracket index access to vector components
+    T &operator[](size_t i) noexcept { return data_[i];}
+    
+    //! const bracket index access to vector components
+    const T &operator[](size_t i) const noexcept { return data_[i]; }
+
+    //! matrix 2d index access
+    T &operator()(size_t i, size_t j) noexcept { return data_[3*i+j]; }
+
+    //! const matrix 2d index access
+    const T &operator()(size_t i, size_t j) const noexcept { return data_[3*i+j]; }
+
+    //! in-place addition
+    mat3_t<T>& operator+=( const mat3_t<T>& rhs ) noexcept{
+        for (size_t i = 0; i < 9; ++i) {
+           (*this)[i] += rhs[i];
+        }
+        return *this;
+    }
+
+    //! in-place subtraction
+    mat3_t<T>& operator-=( const mat3_t<T>& rhs ) noexcept{
+        for (size_t i = 0; i < 9; ++i) {
+           (*this)[i] -= rhs[i];
+        }
+        return *this;
+    }
+
+    void zero() noexcept{
+        for (size_t i = 0; i < 9; ++i) data_[i]=0;
+    }
+
+    void eigen( vec3_t<T>& evals, vec3_t<T>& evec1, vec3_t<T>& evec2, vec3_t<T>& evec3_t )
+    {
+        this->init_gsl();
+
+        gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_);
+        gsl_eigen_symmv_sort (eval_, evec_, GSL_EIGEN_SORT_VAL_ASC);
+
+        for( int i=0; i<3; ++i ){
+            evals[i] = gsl_vector_get( eval_, i );
+            evec1[i] = gsl_matrix_get( evec_, i, 0 );
+            evec2[i] = gsl_matrix_get( evec_, i, 1 );
+            evec3_t[i] = gsl_matrix_get( evec_, i, 2 );
+        }
+    }
+};
+
+template<typename T>
+constexpr const mat3_t<T> operator+(const mat3_t<T> &lhs, const mat3_t<T> &rhs) noexcept
+{
+    mat3_t<T> result;
+    for (size_t i = 0; i < 9; ++i) {
+        result[i] = lhs[i] + rhs[i];
+    }
+    return result;
+}
+
+// matrix - vector multiplication
+template<typename T>
+inline vec3_t<T> operator*( const mat3_t<T> &A, const vec3_t<T> &v ) noexcept
+{
+    vec3_t<T> result;
+    for( int mu=0; mu<3; ++mu ){
+        result[mu] = 0.0;
+        for( int nu=0; nu<3; ++nu ){
+            result[mu] += A(mu,nu)*v[nu];
+        }
+    }
+    return result;
+}
+
diff --git a/include/math/ode_integrate.hh b/include/math/ode_integrate.hh
new file mode 100644
index 0000000..3858b85
--- /dev/null
+++ b/include/math/ode_integrate.hh
@@ -0,0 +1,103 @@
+#pragma once
+/*******************************************************************************\
+ odetools.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************************/
+
+namespace ode_integrate
+{
+
+// simple Runge-Kutta 4th order step without error estimate
+template <typename vector_t, typename function_t>
+inline void rk4_step(double h, double &t, vector_t &y, function_t f)
+{
+    vector_t k1(h * f(t, y));
+    vector_t k2(h * f(t + h / 2, y + k1 / 2));
+    vector_t k3(h * f(t + h / 2, y + k2 / 2));
+    vector_t k4(h * f(t + h, y + k3));
+    y += (k1 + 2 * k2 + 2 * k3 + k4) / 6;
+    t += h;
+}
+
+// Cash-Karp modified Runge-Kutta scheme, 5th order with 4th order error estimate
+// see Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration"
+// in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060
+template <typename vector_t, typename function_t>
+inline vector_t ckrk5_step(double h, double &t, vector_t &y, function_t f)
+{
+  static constexpr double
+      a2 = 0.20,
+      a3 = 0.30, a4 = 0.60, a5 = 1.0, a6 = 0.8750,
+      b21 = 0.20,
+      b31 = 3.0 / 40.0, b32 = 9.0 / 40.0,
+      b41 = 0.30, b42 = -0.90, b43 = 1.20,
+      b51 = -11.0 / 54.0, b52 = 2.50, b53 = -70.0 / 27.0, b54 = 35.0 / 27.0,
+      b61 = 1631.0 / 55296.0, b62 = 175.0 / 512.0, b63 = 575.0 / 13824.0, b64 = 44275.0 / 110592.0, b65 = 253.0 / 4096.0,
+      c1 = 37.0 / 378.0, c3 = 250.0 / 621.0, c4 = 125.0 / 594.0, c6 = 512.0 / 1771.0,
+      dc1 = c1 - 2825.0 / 27648.0, dc3 = c3 - 18575.0 / 48384.0,
+      dc4 = c4 - 13525.0 / 55296.0, dc5 = -277.0 / 14336.0, dc6 = c6 - 0.250;
+
+  vector_t k1(h * f(t, y));
+  vector_t k2(h * f(t + a2 * h, y + b21 * k1));
+  vector_t k3(h * f(t + a3 * h, y + b31 * k1 + b32 * k2));
+  vector_t k4(h * f(t + a4 * h, y + b41 * k1 + b42 * k2 + b43 * k3));
+  vector_t k5(h * f(t + a5 * h, y + b51 * k1 + b52 * k2 + b53 * k3 + b54 * k4));
+  vector_t k6(h * f(t + a6 * h, y + b61 * k1 + b62 * k2 + b63 * k3 + b64 * k4 + b65 * k5));
+
+  y += c1 * k1 + c3 * k3 + c4 * k4 + c6 * k6;
+
+  return dc1 * k1 + dc3 * k3 + dc4 * k4 + dc5 * k5 + dc6 * k6;
+}
+
+// Adaptive step-size quality-controlled routine for ckrk5_step, see
+// Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration"
+// in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060
+template <typename vector_t, typename function_t>
+inline void rk_step_qs(double htry, double &t, vector_t &y, vector_t &yscale, function_t f, double eps, double &hdid, double &hnext)
+{
+  static constexpr double SAFETY{0.9};
+  static constexpr double PSHRNK{-0.25};
+  static constexpr double PGROW{-0.2};
+  static constexpr double ERRCON{1.89e-4};
+
+  auto h(htry);
+  vector_t ytemp(y);
+  vector_t yerr;
+  double errmax;
+
+do_ckrk5trialstep:
+  yerr = ckrk5_step(h, t, ytemp, f);
+  errmax = 0.0;
+  for (size_t i = 0; i < yerr.size(); ++i)
+  {
+    errmax = std::max(errmax, std::abs(yerr[i] / yscale[i]));
+  }
+  errmax = errmax / eps;
+  if (errmax > 1.0)
+  {
+    h *= std::max(0.1, SAFETY*std::pow(errmax, PSHRNK));
+    if (t + h == t)
+    {
+      std::cerr << "stepsize underflow in rkqs" << std::endl;
+      abort();
+    }
+    goto do_ckrk5trialstep;
+  }
+  else
+  {
+    if( errmax > ERRCON ){
+      hnext = h * SAFETY * std::pow(errmax, PGROW);
+    }else{
+      hnext = 5*h;
+    }
+    hdid = h;
+    t += h;
+    y = ytemp;
+  }
+}
+
+
+} // namespace ode_integrate
\ No newline at end of file
diff --git a/include/math/vec3.hh b/include/math/vec3.hh
new file mode 100644
index 0000000..3d1fe44
--- /dev/null
+++ b/include/math/vec3.hh
@@ -0,0 +1,118 @@
+/*******************************************************************\
+ vec3_t.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************/
+#pragma once
+
+//! implements a simple class of 3-vectors of arbitrary scalar type
+template< typename T >
+class vec3_t{
+private:
+    //! holds the data
+    std::array<T,3> data_;
+    
+public: 
+    //! expose access to elements via references
+    T &x,&y,&z;
+
+    //! empty constructor
+    vec3_t()
+    : x(data_[0]),y(data_[1]),z(data_[2]){}
+
+    //! copy constructor
+    vec3_t( const vec3_t<T> &v)
+    : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
+
+    //! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference
+    vec3_t( vec3_t<T>& v)
+    : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
+
+    //! move constructor
+    vec3_t( vec3_t<T> &&v)
+    : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){}
+
+    //! construct vec3_t from initializer list
+    template<typename ...E>
+    vec3_t(E&&...e) 
+    : data_{{std::forward<E>(e)...}}, x{data_[0]}, y{data_[1]}, z{data_[2]}
+    {}
+    // vec3_t( T a, T b, T c ) 
+    // : data_{{a,b,c}}, x(data_[0]), y(data_[1]), z(data_[2]){}
+    
+    //! bracket index access to vector components
+    T &operator[](size_t i) noexcept{ return data_[i];}
+    
+    //! const bracket index access to vector components
+    const T &operator[](size_t i) const noexcept { return data_[i]; }
+
+    // assignment operator
+    vec3_t<T>& operator=( const vec3_t<T>& v ) noexcept { data_=v.data_; return *this; }
+
+    //! implementation of summation of vec3_t
+    vec3_t<T> operator+( const vec3_t<T>& v ) const noexcept{ return vec3_t<T>({x+v.x,y+v.y,z+v.z}); }
+
+    //! implementation of difference of vec3_t
+    vec3_t<T> operator-( const vec3_t<T>& v ) const noexcept{ return vec3_t<T>({x-v.x,y-v.y,z-v.z}); }
+
+    //! implementation of unary negative
+    vec3_t<T> operator-() const noexcept{ return vec3_t<T>({-x,-y,-z}); }
+
+    //! implementation of scalar multiplication
+    vec3_t<T> operator*( T s ) const noexcept{ return vec3_t<T>({x*s,y*s,z*s}); }
+
+    //! implementation of scalar division
+    vec3_t<T> operator/( T s ) const noexcept{ return vec3_t<T>({x/s,y/s,z/s}); }
+
+    //! implementation of += operator
+    vec3_t<T>& operator+=( const vec3_t<T>& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; }
+
+    //! implementation of -= operator
+    vec3_t<T>& operator-=( const vec3_t<T>& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; }
+
+    //! multiply with scalar
+    vec3_t<T>& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; }
+    
+    //! divide by scalar
+    vec3_t<T>& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; }
+
+    //! compute dot product with another vector
+    T dot(const vec3_t<T> &a) const noexcept
+    {
+        return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
+    }
+    
+    //! returns 2-norm squared of vector
+    T norm_squared(void) const noexcept { return this->dot(*this); }
+
+    //! returns 2-norm of vector
+    T norm(void) const noexcept { return std::sqrt( this->norm_squared() ); }
+
+    //! wrap absolute vector to box of size p
+    vec3_t<T>& wrap_abs( T p = 1.0 ) noexcept{
+        for( auto& x : data_ ) x = std::fmod( 2*p + x, p );
+        return *this;
+    }
+
+    //! wrap relative vector to box of size p
+    vec3_t<T>& wrap_rel( T p = 1.0 ) noexcept{
+        for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x;
+        return *this;
+    }
+
+    //! ordering, allows 3d sorting of vec3_ts
+    bool operator<( const vec3_t<T>& o ) const noexcept{
+        if( x!=o.x ) return x<o.x?true:false;
+        if( y!=o.y ) return y<o.y?true:false;
+        if( z!=o.z ) return z<o.z?true:false;
+        return false;
+    }
+};
+
+//! multiplication with scalar
+template<typename T>
+vec3_t<T> operator*( T s, const vec3_t<T>& v ){
+    return vec3_t<T>({v.x*s,v.y*s,v.z*s});
+}
diff --git a/include/operators.hh b/include/operators.hh
index cc0ed67..e2f4c8e 100644
--- a/include/operators.hh
+++ b/include/operators.hh
@@ -1,9 +1,54 @@
 #pragma once
+/*
+ 
+ operators.hh - This file is part of MUSIC2 -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2019  Oliver Hahn
+ 
+*/
+#include <general.hh>
 
 namespace op{
-inline auto assign_to = [](auto &g){return [&](auto i, auto v){ g[i] = v; };};
-inline auto add_to = [](auto &g){return [&](auto i, auto v){ g[i] += v; };};
-inline auto add_twice_to = [](auto &g){return [&](auto i, auto v){ g[i] += 2*v; };};
-inline auto subtract_from = [](auto &g){return [&](auto i, auto v){ g[i] -= v; };};
-inline auto subtract_twice_from = [](auto &g){return [&](auto i, auto v){ g[i] -= 2*v; };};
+
+//!== list of primitive operators to work on fields ==!//
+
+template< typename field>
+inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };}
+
+template< typename field, typename val >
+inline auto multiply_add_to( field& g, val x ){return [&g,x](auto i, auto v){ g[i] += v*x; };}
+
+template< typename field>
+inline auto add_to( field& g ){return [&g](auto i, auto v){ g[i] += v; };}
+
+template< typename field>
+inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };}
+
+//! vanilla standard gradient
+class fourier_gradient{
+private:
+    real_t boxlen_, k0_;
+    size_t n_, nhalf_;
+public:
+    explicit fourier_gradient( const config_file& the_config )
+    : boxlen_( the_config.get_value<double>("setup", "BoxLength") ), 
+      k0_(2.0*M_PI/boxlen_),
+      n_( the_config.get_value<size_t>("setup","GridRes") ),
+      nhalf_( n_/2 )
+    {}
+
+    inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
+    {
+        real_t rgrad = 
+            (ijk[idim]!=nhalf_)? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_) * n_) : 0.0; 
+        return ccomplex_t(0.0,rgrad * k0_);
+    }
+
+    inline real_t vfac_corr( std::array<size_t,3> ijk ) const
+    {
+        return 1.0;
+    }
+};
 }
diff --git a/include/output_plugin.hh b/include/output_plugin.hh
index cc092d3..fff657c 100644
--- a/include/output_plugin.hh
+++ b/include/output_plugin.hh
@@ -21,11 +21,12 @@
 
 enum class output_type {particles,field_lagrangian,field_eulerian};
 
+
 class output_plugin
 {
 protected:
-	//! reference to the ConfigFile object that holds all configuration options
-	ConfigFile &cf_;
+	//! reference to the config_file object that holds all configuration options
+	config_file &cf_;
 
 	//! output file or directory name
 	std::string fname_;
@@ -34,17 +35,17 @@ protected:
 	std::string interface_name_;
 public:
 	//! constructor
-	output_plugin(ConfigFile &cf, std::string interface_name )
+	output_plugin(config_file &cf, std::string interface_name )
 		: cf_(cf), interface_name_(interface_name)
 	{
-		fname_ = cf_.GetValue<std::string>("output", "filename");
+		fname_ = cf_.get_value<std::string>("output", "filename");
 	}
 
 	//! virtual destructor
 	virtual ~output_plugin(){}
 
 	//! routine to write particle data for a species
-	virtual void write_particle_data(const particle::container &pc, const cosmo_species &s ) {};
+	virtual void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species ) {};
 
 	//! routine to write gridded fluid component data for a species
 	virtual void write_grid_data(const Grid_FFT<real_t> &g, const cosmo_species &s, const fluid_component &c ) {};
@@ -57,6 +58,12 @@ public:
 
 	//! routine to query whether species is written as particle data
 	// virtual bool write_species_as_particles( const cosmo_species &s ){ return !write_species_as_grid(s); }
+
+	//! query if output wants 64bit precision for real values
+	virtual bool has_64bit_reals() const = 0;
+
+	//! query if output wants 64bit precision for integer values
+	virtual bool has_64bit_ids() const = 0;
 	
 	//! routine to return a multiplicative factor that contains the desired position units for the output
 	virtual real_t position_unit() const = 0;
@@ -71,7 +78,7 @@ public:
 struct output_plugin_creator
 {
 	//! create an instance of a plug-in
-	virtual std::unique_ptr<output_plugin> create(ConfigFile &cf) const = 0;
+	virtual std::unique_ptr<output_plugin> create(config_file &cf) const = 0;
 
 	//! destroy an instance of a plug-in
 	virtual ~output_plugin_creator() {}
@@ -96,12 +103,12 @@ struct output_plugin_creator_concrete : public output_plugin_creator
 	}
 
 	//! create an instance of the plug-in
-	std::unique_ptr<output_plugin> create(ConfigFile &cf) const
+	std::unique_ptr<output_plugin> create(config_file &cf) const
 	{
 		return std::make_unique<Derived>(cf); // Derived( cf );
 	}
 };
 
 //! failsafe version to select the output plug-in
-std::unique_ptr<output_plugin> select_output_plugin(ConfigFile &cf);
+std::unique_ptr<output_plugin> select_output_plugin(config_file &cf);
 
diff --git a/include/particle_container.hh b/include/particle_container.hh
index fb05889..92b683c 100644
--- a/include/particle_container.hh
+++ b/include/particle_container.hh
@@ -1,3 +1,10 @@
+/*******************************************************************\
+ particle_container.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    10/2019 - Oliver Hahn - first implementation
+\*******************************************************************/
 #pragma once
 
 #ifdef USE_MPI
@@ -13,57 +20,96 @@ namespace particle{
 class container
 {
 public:
-	std::vector<float> positions_, velocities_;
-	std::vector<int> ids_;
+	std::vector<float > positions32_, velocities32_;
+	std::vector<double> positions64_, velocities64_;
+	
+	std::vector<uint32_t> ids32_;
+	std::vector<uint64_t> ids64_;
+	
 
-	container()
-	{
-	}
+	container(){ }
 
 	container(const container &) = delete;
 
-	const void* get_pos_ptr() const{
-		return reinterpret_cast<const void*>( &positions_[0] );
-	}
-
-	const void* get_vel_ptr() const{
-		return reinterpret_cast<const void*>( &velocities_[0] );
-	}
-
-	const void* get_ids_ptr() const{
-		return reinterpret_cast<const void*>( &ids_[0] );
-	}
-
-	void allocate(size_t nump)
+	void allocate(size_t nump, bool b64reals, bool b64ids)
 	{
-		positions_.resize(3 * nump);
-		velocities_.resize(3 * nump);
-		ids_.resize(nump);
+		if( b64reals ){
+			positions64_.resize(3 * nump);
+			velocities64_.resize(3 * nump);
+			positions32_.clear();
+			velocities32_.clear();
+		}else{
+			positions32_.resize(3 * nump);
+			velocities32_.resize(3 * nump);
+			positions64_.clear();
+			velocities64_.clear();
+		}
+
+		if( b64ids ){
+			ids64_.resize(nump);
+			ids32_.clear();
+		}else{
+			ids32_.resize(nump);
+			ids64_.clear();
+		}
 	}
 
-	void set_pos(size_t ipart, size_t idim, real_t p)
-	{
-		positions_[3 * ipart + idim] = p;
+	const void* get_pos32_ptr() const{
+		return reinterpret_cast<const void*>( &positions32_[0] );
 	}
 
-	void set_vel(size_t ipart, size_t idim, real_t p)
-	{
-		velocities_[3 * ipart + idim] = p;
+	void set_pos32(size_t ipart, size_t idim, float p){
+		positions32_[3 * ipart + idim] = p;
 	}
 
-	void set_id(size_t ipart, id_t id)
-	{
-		ids_[ipart] = id;
+	const void* get_pos64_ptr() const{
+		return reinterpret_cast<const void*>( &positions64_[0] );
+	}
+
+	inline void set_pos64(size_t ipart, size_t idim, double p){
+		positions64_[3 * ipart + idim] = p;
+	}
+
+	inline const void* get_vel32_ptr() const{
+		return reinterpret_cast<const void*>( &velocities32_[0] );
+	}
+	
+	inline void set_vel32(size_t ipart, size_t idim, float p){
+		velocities32_[3 * ipart + idim] = p;
+	}
+
+	const void* get_vel64_ptr() const{
+		return reinterpret_cast<const void*>( &velocities64_[0] );
+	}
+
+	inline void set_vel64(size_t ipart, size_t idim, double p){
+		velocities64_[3 * ipart + idim] = p;
+	}
+
+	const void* get_ids32_ptr() const{
+		return reinterpret_cast<const void*>( &ids32_[0] );
+	}
+
+	void set_id32(size_t ipart, uint32_t id){
+		ids32_[ipart] = id;
+	}
+
+	const void* get_ids64_ptr() const{
+		return reinterpret_cast<const void*>( &ids64_[0] );
+	}
+
+	void set_id64(size_t ipart, uint64_t id){
+		ids64_[ipart] = id;
 	}
 
 	size_t get_local_num_particles(void) const
 	{
-		return ids_.size();
+		return std::max(ids32_.size(),ids64_.size());
 	}
 
 	size_t get_global_num_particles(void) const
 	{
-		size_t local_nump = ids_.size(), global_nump;
+		size_t local_nump = this->get_local_num_particles(), global_nump;
 #ifdef USE_MPI
 		MPI_Allreduce(reinterpret_cast<void *>(&local_nump), reinterpret_cast<void *>(&global_nump), 1,
 					  MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
@@ -97,11 +143,11 @@ public:
 
 	void dump(void)
 	{
-		for (size_t i = 0; i < ids_.size(); ++i)
+		/*for (size_t i = 0; i < ids_.size(); ++i)
 		{
 			std::cout << positions_[3 * i + 0] << " " << positions_[3 * i + 1] << " " << positions_[3 * i + 2] << " "
 					  << velocities_[3 * i + 0] << " " << velocities_[3 * i + 1] << " " << velocities_[3 * i + 2] << std::endl;
-		}
+		}*/
 	}
 };
 
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index da88813..5fe68d1 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -1,150 +1,325 @@
+/*******************************************************************\
+ particle_generator.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    10/2019 - Oliver Hahn - first implementation
+\*******************************************************************/
 #pragma once
 
-namespace particle {
+#include <math/vec3.hh>
+#include <grid_interpolate.hh>
 
-enum lattice{
-    lattice_sc=0, lattice_bcc=1, lattice_fcc=2
-};
+#if defined(USE_HDF5)
+#include "HDF_IO.hh"
+#endif
 
-template<typename field_t>
-void initialize_lattice( container& particles, lattice lattice_type, const field_t& field ){
-    const size_t num_p_in_load = field.local_size();
-    const size_t overload = 1<<lattice_type; // 1 for sc, 2 for bcc, 4 for fcc
-
-    particles.allocate( overload * num_p_in_load );
-
-    for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
-        for( size_t j=0; j<field.size(1); ++j){
-            for( size_t k=0; k<field.size(2); ++k,++ipcount){
-                for( size_t iload=0; iload<overload; ++iload ){
-                    particles.set_id( ipcount+iload*num_p_in_load, overload*field.get_cell_idx_1d(i,j,k)+iload );
-                }
-            }
-        }
-    }
-}
-
-// invalidates field, phase shifted to unspecified position after return
-template<typename field_t>
-void set_positions( container& particles, lattice lattice_type, int idim, real_t lunit, field_t& field )
+namespace particle
 {
-    const size_t num_p_in_load = field.local_size();
+    using vec3 = std::array<real_t,3>;
 
-    for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
-        for( size_t j=0; j<field.size(1); ++j){
-            for( size_t k=0; k<field.size(2); ++k){
-                auto pos = field.template get_unit_r<real_t>(i,j,k);
-                particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-            }
-        }
-    }
+    enum lattice
+    {
+        lattice_glass = -1,
+        lattice_sc = 0,  // SC : simple cubic
+        lattice_bcc = 1, // BCC: body-centered cubic
+        lattice_fcc = 2, // FCC: face-centered cubic
+        lattice_rsc = 3, // RSC: refined simple cubic
+    };
 
-    if( lattice_type == particle::lattice_bcc ){
-        field.shift_field( 0.5, 0.5, 0.5 );
-        auto ipcount0 = num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.5,0.5,0.5);
-                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                }
-            }
-        }
-    }
-    else if( lattice_type == particle::lattice_fcc ){ 
-        // 0.5 0.5 0.0
-        field.shift_field( 0.5, 0.5, 0.0 );
-        auto ipcount0 = num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.5,0.5,0.0);
-                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                }
-            }
-        }
-        // 0.0 0.5 0.5
-        field.shift_field( -0.5, 0.0, 0.5 );
-        ipcount0 = 2*num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.0,0.5,0.5);
-                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                }
-            }
-        }
-        // 0.5 0.0 0.5
-        field.shift_field( 0.5, -0.5, 0.0 );
-        ipcount0 = 3*num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.5,0.0,0.5);
-                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                }
-            }
-        }
-    }
-}
+    const std::vector<std::vector<vec3_t<real_t>>> lattice_shifts =
+        {
+            // first shift must always be zero! (otherwise set_positions and set_velocities break)
+            /* SC : */ {{0.0, 0.0, 0.0}},
+            /* BCC: */ {{0.0, 0.0, 0.0}, {0.5, 0.5, 0.5}},
+            /* FCC: */ {{0.0, 0.0, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}},
+            /* RSC: */ {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.5}, {0.0, 0.5, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.0}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}, {0.5, 0.5, 0.5}},
+    };
 
-template<typename field_t>
-void set_velocities( container& particles, lattice lattice_type, int idim, field_t& field )
-{
-    const size_t num_p_in_load = field.local_size();
+    const std::vector<vec3_t<real_t>> second_lattice_shift =
+        {
+            /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice
+            /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!?
+            /* FCC: */ {0.5, 0.5, 0.5}, // this corresponds to NaCl lattice
+                                        // /* FCC: */ {0.25, 0.25, 0.25}, // this corresponds to Zincblende/GaAs lattice
+            /* RSC: */ {0.25, 0.25, 0.25},
+    };
 
-    for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
-        for( size_t j=0; j<field.size(1); ++j){
-            for( size_t k=0; k<field.size(2); ++k){
-                particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
+    template <typename field_t>
+    class lattice_generator
+    {
+        protected:
+
+        struct glass
+        {
+            using data_t = typename field_t::data_t;
+            size_t num_p, off_p;
+            grid_interpolate<1, field_t> interp_;
+            std::vector<vec3> glass_posr;
+
+            glass( config_file& cf, const field_t &field )
+            : num_p(0), off_p(0), interp_( field )
+            {
+                std::vector<real_t> glass_pos;
+                real_t lglassbox = 1.0;
+
+                std::string glass_fname = cf.get_value<std::string>("setup", "GlassFileName");
+                size_t ntiles = cf.get_value<size_t>("setup", "GlassTiles");
+
+#if defined(USE_HDF5)
+                HDFReadGroupAttribute(glass_fname, "Header", "BoxSize", lglassbox);
+                HDFReadDataset(glass_fname, "/PartType1/Coordinates", glass_pos);
+#else
+                throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile.");
+#endif
+
+                size_t np_in_file = glass_pos.size() / 3;
+#if defined(USE_MPI)
+                num_p = np_in_file * ntiles * ntiles * ntiles / MPI::get_size();
+                off_p = MPI::get_rank() * num_p;
+#else
+                num_p = np_in_file * ntiles * ntiles * ntiles;
+                off_p = 0;
+#endif
+
+                music::ilog << "Glass file contains " << np_in_file << " particles." << std::endl;
+
+                glass_posr.assign(num_p, {0.0, 0.0, 0.0});
+
+                std::array<real_t, 3> ng({real_t(field.n_[0]), real_t(field.n_[1]), real_t(field.n_[2])});
+
+                #pragma omp parallel for
+                for (size_t i = 0; i < num_p; ++i)
+                {
+                    size_t idxpart = off_p + i;
+                    size_t idx_in_glass = idxpart % np_in_file;
+                    size_t idxtile = idxpart / np_in_file;
+                    size_t tile_z = idxtile % (ntiles * ntiles);
+                    size_t tile_y = ((idxtile - tile_z) / ntiles) % ntiles;
+                    size_t tile_x = (((idxtile - tile_z) / ntiles) - tile_y) / ntiles;
+                    glass_posr[i][0] = std::fmod((glass_pos[3 * idx_in_glass + 0] / lglassbox + real_t(tile_x)) / ntiles * ng[0] + ng[0], ng[0]);
+                    glass_posr[i][1] = std::fmod((glass_pos[3 * idx_in_glass + 1] / lglassbox + real_t(tile_y)) / ntiles * ng[1] + ng[1], ng[1]);
+                    glass_posr[i][2] = std::fmod((glass_pos[3 * idx_in_glass + 2] / lglassbox + real_t(tile_z)) / ntiles * ng[2] + ng[2], ng[2]);
+                }
+
+#if defined(USE_MPI)
+                interp_.domain_decompose_pos(glass_posr);
+
+                num_p = glass_posr.size();
+                std::vector<size_t> all_num_p( MPI::get_size(), 0 );
+                MPI_Allgather( &num_p, 1, MPI_UNSIGNED_LONG_LONG, &all_num_p[0], 1, MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD );
+                off_p = 0;
+                for( int itask=0; itask<=MPI::get_rank(); ++itask ){
+                    off_p += all_num_p[itask];
+                }
+#endif
             }
-        }
-    }
 
-    if( lattice_type == particle::lattice_bcc ){
-        field.shift_field( 0.5, 0.5, 0.5 );
-        auto ipcount0 = num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
+            void update_ghosts( const field_t &field )
+            {
+                interp_.update_ghosts( field );
+            }
+
+            data_t get_at( const vec3& x ) const noexcept
+            {
+                return interp_.get_cic_at( x );
+            }
+
+            size_t size() const noexcept
+            {
+                return num_p;
+            }
+
+            size_t offset() const noexcept
+            {
+                return off_p;
+            }
+        };
+
+        std::unique_ptr<glass> glass_ptr_;
+
+        private:
+        particle::container particles_;
+
+        public:
+        lattice_generator(lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t &field, config_file &cf)
+        {
+            if (lattice_type != lattice_glass)
+            {
+                // number of modes present in the field
+                const size_t num_p_in_load = field.local_size();
+                // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load):
+                const size_t overload = 1ull << std::max<int>(0, lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
+                // allocate memory for all local particles
+                particles_.allocate(overload * num_p_in_load, b64reals, b64ids);
+                // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well
+
+                for (size_t i = 0, ipcount = 0; i < field.size(0); ++i)
+                {
+                    for (size_t j = 0; j < field.size(1); ++j)
+                    {
+                        for (size_t k = 0; k < field.size(2); ++k, ++ipcount)
+                        {
+                            for (size_t iload = 0; iload < overload; ++iload)
+                            {
+                                if (b64ids)
+                                {
+                                    particles_.set_id64(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload);
+                                }
+                                else
+                                {
+                                    particles_.set_id32(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                glass_ptr_ = std::make_unique<glass>( cf, field );
+                particles_.allocate(glass_ptr_->size(), b64reals, b64ids);
+
+                #pragma omp parallel for
+                for (size_t i = 0; i < glass_ptr_->size(); ++i)
+                {
+                    if (b64ids)
+                    {
+                        particles_.set_id64(i, IDoffset + i + glass_ptr_->offset());
+                    }
+                    else
+                    {
+                        particles_.set_id32(i, IDoffset + i + glass_ptr_->offset());
+                    }
                 }
             }
         }
-    }
-    else if( lattice_type == particle::lattice_fcc ){ 
-        // 0.5 0.5 0.0
-        field.shift_field( 0.5, 0.5, 0.0 );
-        auto ipcount0 = num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
-                }
-            }
-        }
-        // 0.0 0.5 0.5
-        field.shift_field( -0.5, 0.0, 0.5 );
-        ipcount0 = 2*num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
-                }
-            }
-        }
-        // 0.5 0.0 0.5
-        field.shift_field( 0.5, -0.5, 0.0 );
-        ipcount0 = 3*num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
-                }
-            }
-        }
-    }
-}
 
+        // invalidates field, phase shifted to unspecified position after return
+        void set_positions(const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t &field, config_file &cf)
+        {
+            // works only for Bravais types
+            if (lattice_type >= 0)
+            {
+                const size_t num_p_in_load = field.local_size();
+                for (int ishift = 0; ishift < (1 << lattice_type); ++ishift)
+                {
+                    // if we are dealing with the secondary lattice, apply a global shift
+                    if (ishift == 0 && is_second_lattice)
+                    {
+                        field.shift_field(second_lattice_shift[lattice_type]);
+                    }
 
-} // end namespace particles
+                    // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
+                    if (ishift > 0)
+                    {
+                        field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]);
+                    }
+                    // read out values from phase shifted field and set assoc. particle's value
+                    const auto ipcount0 = ishift * num_p_in_load;
+                    for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i)
+                    {
+                        for (size_t j = 0; j < field.size(1); ++j)
+                        {
+                            for (size_t k = 0; k < field.size(2); ++k)
+                            {
+                                auto pos = field.template get_unit_r_shifted<real_t>(i, j, k, lattice_shifts[lattice_type][ishift] + (is_second_lattice ? second_lattice_shift[lattice_type] : vec3_t<real_t>{0., 0., 0.}));
+                                if (b64reals)
+                                {
+                                    particles_.set_pos64(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k));
+                                }
+                                else
+                                {
+                                    particles_.set_pos32(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                glass_ptr_->update_ghosts( field );
+                #pragma omp parallel for
+                for (size_t i = 0; i < glass_ptr_->size(); ++i)
+                {
+                    auto pos = glass_ptr_->glass_posr[i];
+                    real_t disp = glass_ptr_->get_at(pos);
+                    if (b64reals)
+                    {
+                        particles_.set_pos64(i, idim, pos[idim] / field.n_[idim] * lunit + disp);
+                    }
+                    else
+                    {
+                        particles_.set_pos32(i, idim, pos[idim] / field.n_[idim] * lunit + disp);
+                    }
+                }
+            }
+        }
+
+        void set_velocities(lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field, config_file &cf)
+        {
+            // works only for Bravais types
+            if (lattice_type >= 0)
+            {
+                const size_t num_p_in_load = field.local_size();
+                for (int ishift = 0; ishift < (1 << lattice_type); ++ishift)
+                {
+                    // if we are dealing with the secondary lattice, apply a global shift
+                    if (ishift == 0 && is_second_lattice)
+                    {
+                        field.shift_field(second_lattice_shift[lattice_type]);
+                    }
+                    // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
+                    if (ishift > 0)
+                    {
+                        field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]);
+                    }
+                    // read out values from phase shifted field and set assoc. particle's value
+                    const auto ipcount0 = ishift * num_p_in_load;
+                    for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i)
+                    {
+                        for (size_t j = 0; j < field.size(1); ++j)
+                        {
+                            for (size_t k = 0; k < field.size(2); ++k)
+                            {
+                                if (b64reals)
+                                {
+                                    particles_.set_vel64(ipcount++, idim, field.relem(i, j, k));
+                                }
+                                else
+                                {
+                                    particles_.set_vel32(ipcount++, idim, field.relem(i, j, k));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                glass_ptr_->update_ghosts( field );
+                #pragma omp parallel for
+                for (size_t i = 0; i < glass_ptr_->size(); ++i)
+                {
+                    auto pos = glass_ptr_->glass_posr[i];
+                    real_t vel = glass_ptr_->get_at(pos);
+                    if (b64reals)
+                    {
+                        particles_.set_vel64(i, idim, vel);
+                    }
+                    else
+                    {
+                        particles_.set_vel32(i, idim, vel);
+                    }
+                }
+            }
+        }
+
+        const particle::container& get_particles() const noexcept{
+            return particles_;
+        }
+
+    }; // struct lattice
+
+} // namespace particle
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
new file mode 100644
index 0000000..a452559
--- /dev/null
+++ b/include/particle_plt.hh
@@ -0,0 +1,568 @@
+#pragma once
+
+#include <general.hh>
+#include <unistd.h> // for unlink
+
+#include <iostream>
+#include <fstream>
+
+#include <random>
+#include <map>
+
+#include <cassert>
+
+#include <particle_generator.hh>
+#include <grid_fft.hh>
+#include <math/mat3.hh>
+
+#include <gsl/gsl_sf_hyperg.h>
+inline double Hypergeometric2F1( double a, double b, double c, double x )
+{
+  return gsl_sf_hyperg_2F1( a, b, c, x);
+}
+
+#define PRODUCTION
+
+namespace particle{
+//! implement Joyce, Marcos et al. PLT calculation
+
+class lattice_gradient{
+private:
+    const real_t boxlen_, aini_;
+    const size_t ngmapto_, ngrid_, ngrid32_;
+    const real_t mapratio_, XmL_;
+    Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
+    Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
+    std::vector<vec3_t<real_t>> vectk_;
+    std::vector<vec3_t<int>> ico_, vecitk_;
+
+    bool is_even( int i ){ return (i%2)==0; }
+
+    bool is_in( int i, int j, int k, const mat3_t<int>& M ){
+        vec3_t<int> v({i,j,k});
+        auto vv = M * v;
+        return is_even(vv.x)&&is_even(vv.y)&&is_even(vv.z);
+    }
+
+    void init_D( lattice lattice_type )
+    {
+        constexpr real_t pi     = M_PI;
+        constexpr real_t twopi  = 2.0*M_PI;
+        constexpr real_t fourpi = 4.0*M_PI;
+        const     real_t sqrtpi = std::sqrt(M_PI);
+        const     real_t pi32   = std::pow(M_PI,1.5);
+
+        //! === vectors, reciprocals and normals for the SC lattice ===
+        const int charge_fac_sc = 1;
+        const mat3_t<real_t> mat_bravais_sc{
+            1.0, 0.0, 0.0,
+            0.0, 1.0, 0.0,
+            0.0, 0.0, 1.0, 
+        };
+        const mat3_t<real_t> mat_reciprocal_sc{
+            twopi, 0.0, 0.0,
+            0.0, twopi, 0.0,
+            0.0, 0.0, twopi,
+        };
+        const mat3_t<int> mat_invrecip_sc{
+            2, 0, 0,
+            0, 2, 0,
+            0, 0, 2,
+        };
+        const std::vector<vec3_t<real_t>> normals_sc{
+            {pi,0.,0.},{-pi,0.,0.},
+            {0.,pi,0.},{0.,-pi,0.},
+            {0.,0.,pi},{0.,0.,-pi},
+        };
+        
+
+        //! === vectors, reciprocals and normals for the BCC lattice ===
+        const int charge_fac_bcc = 2;
+        const mat3_t<real_t> mat_bravais_bcc{
+            1.0, 0.0, 0.5,
+            0.0, 1.0, 0.5,
+            0.0, 0.0, 0.5, 
+        };
+        const mat3_t<real_t> mat_reciprocal_bcc{
+            twopi, 0.0, 0.0,
+            0.0, twopi, 0.0,
+            -twopi, -twopi, fourpi,
+        };
+        const mat3_t<int> mat_invrecip_bcc{
+            2, 0, 0,
+            0, 2, 0,
+            1, 1, 1,
+        };
+        const std::vector<vec3_t<real_t>> normals_bcc{
+            {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
+            {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
+            {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
+        };
+        
+
+        //! === vectors, reciprocals and normals for the FCC lattice ===
+        const int charge_fac_fcc = 4;
+        const mat3_t<real_t> mat_bravais_fcc{
+            0.0, 0.5, 0.0,
+            0.5, 0.0, 1.0,
+            0.5, 0.5, 0.0, 
+        };
+        const mat3_t<real_t> mat_reciprocal_fcc{
+            -fourpi, fourpi, twopi,
+            0.0, 0.0, twopi,
+            fourpi, 0.0, -twopi,
+        };
+        const mat3_t<int> mat_invrecip_fcc{
+            0, 1, 1,
+            1, 0, 1,
+            0, 2, 0,
+        };
+        const std::vector<vec3_t<real_t>> normals_fcc{
+            {twopi,0.,0.},{-twopi,0.,0.},
+            {0.,twopi,0.},{0.,-twopi,0.},
+            {0.,0.,twopi},{0.,0.,-twopi},
+            {+pi,+pi,+pi},{+pi,+pi,-pi},
+            {+pi,-pi,+pi},{+pi,-pi,-pi},
+            {-pi,+pi,+pi},{-pi,+pi,-pi},
+            {-pi,-pi,+pi},{-pi,-pi,-pi},
+        };
+        
+        //! select the properties for the chosen lattice
+        const int ilat = lattice_type; // 0 = sc, 1 = bcc, 2 = fcc
+
+        const auto mat_bravais     = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
+        const auto mat_reciprocal  = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
+        const auto mat_invrecip    = (ilat==2)? mat_invrecip_fcc : (ilat==1)? mat_invrecip_bcc : mat_invrecip_sc;
+        const auto normals         = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
+        const auto charge_fac      = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
+
+        const ptrdiff_t nlattice = ngrid_;
+        const real_t dx = 1.0/real_t(nlattice);
+
+        const real_t eta = 4.0; // Ewald cutoff shall be 4 cells
+        const real_t alpha = 1.0/std::sqrt(2)/eta;
+        const real_t alpha2 = alpha*alpha;
+        const real_t alpha3 = alpha2*alpha;
+        
+        const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_fac;
+        const real_t fft_norm   = 1.0/std::pow(real_t(nlattice),3.0);
+        const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5);
+
+        //! just a Kronecker \delta_ij
+        auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
+
+        //! Ewald summation: short-range Green's function
+        auto add_greensftide_sr = [&]( mat3_t<real_t>& D, const vec3_t<real_t>& d ) -> void {
+            auto r = d.norm();
+            if( r< 1e-14 ) return; // return zero for r=0
+
+            const real_t r2(r*r), r3(r2*r), r5(r3*r2);
+            const real_t K1( -alpha3/pi32 * std::exp(-alpha2*r2)/r2 );
+            const real_t K2( (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r2)*r)/fourpi );
+            
+            for( int mu=0; mu<3; ++mu ){
+                for( int nu=mu; nu<3; ++nu ){
+                    real_t dd( d[mu]*d[nu] * K1 + (kronecker(mu,nu)/r3 - 3.0 * (d[mu]*d[nu])/r5) * K2 );
+                    D(mu,nu) += dd;
+                    D(nu,mu) += (mu!=nu)? dd : 0.0;
+                }
+            }
+        };
+
+        //! Ewald summation: long-range Green's function
+        auto add_greensftide_lr = [&]( mat3_t<real_t>& D, const vec3_t<real_t>& k, const vec3_t<real_t>& r ) -> void {
+            real_t kmod2 = k.norm_squared();
+            real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm;
+            for( int mu=0; mu<3; ++mu ){
+                for( int nu=mu; nu<3; ++nu ){
+                    auto dd = k[mu] * k[nu] * term;
+                    D(mu,nu) += dd;
+                    D(nu,mu) += (mu!=nu)? dd : 0.0;
+                }
+            }
+        };
+
+        //! checks if 'vec' is in the FBZ with FBZ normal vectors given in 'normals'
+        auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool {
+            for( const auto& n : normals ){ 
+                if( n.dot( vec ) > 1.0001 * n.dot(n) ){
+                    return false;
+                }
+            }
+            return true;
+        };
+        
+        constexpr ptrdiff_t lnumber = 3, knumber = 3;
+        const int numb = 1; //!< search radius when shifting vectors into FBZ
+
+        vectk_.assign(D_xx_.memsize(),vec3_t<real_t>());
+        ico_.assign(D_xx_.memsize(),vec3_t<int>());
+        vecitk_.assign(D_xx_.memsize(),vec3_t<int>());
+
+        #pragma omp parallel 
+        {
+            //... temporary to hold values of the dynamical matrix 
+            mat3_t<real_t> matD(0.0);
+
+            #pragma omp for
+            for( ptrdiff_t i=0; i<nlattice; ++i ){
+                for( ptrdiff_t j=0; j<nlattice; ++j ){
+                    for( ptrdiff_t k=0; k<nlattice; ++k ){
+                        // compute lattice site vector from (i,j,k) multiplying Bravais base matrix, and wrap back to box
+                        const vec3_t<real_t> x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)});
+                        const vec3_t<real_t> ar = (mat_bravais * x_ijk).wrap_abs();
+
+                        //... zero temporary matrix
+                        matD.zero();        
+
+                        // add real-space part of dynamical matrix, periodic copies
+                        for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
+                            for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
+                                for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){      
+                                    const vec3_t<real_t> n_ijk({real_t(ix),real_t(iy),real_t(iz)});            
+                                    const vec3_t<real_t> dr(ar - mat_bravais * n_ijk);
+                                    add_greensftide_sr(matD, dr);
+                                }
+                            }
+                        }
+
+                        // add k-space part of dynamical matrix
+                        for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){
+                            for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){
+                                for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){                      
+                                    if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
+                                        const vec3_t<real_t> k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice});
+                                        const vec3_t<real_t> ak( mat_reciprocal * k_ijk);
+
+                                        add_greensftide_lr(matD, ak, ar );
+                                    }
+                                }
+                            }
+                        } 
+
+                        D_xx_.relem(i,j,k) = matD(0,0) * charge;
+                        D_xy_.relem(i,j,k) = matD(0,1) * charge;
+                        D_xz_.relem(i,j,k) = matD(0,2) * charge;
+                        D_yy_.relem(i,j,k) = matD(1,1) * charge;
+                        D_yz_.relem(i,j,k) = matD(1,2) * charge;
+                        D_zz_.relem(i,j,k) = matD(2,2) * charge;
+                    }
+                }
+            }
+        } // end omp parallel region
+
+        // fix r=0 with background density (added later in Fourier space)
+        D_xx_.relem(0,0,0) = 1.0/3.0;
+        D_xy_.relem(0,0,0) = 0.0;
+        D_xz_.relem(0,0,0) = 0.0;
+        D_yy_.relem(0,0,0) = 1.0/3.0;
+        D_yz_.relem(0,0,0) = 0.0;
+        D_zz_.relem(0,0,0) = 1.0/3.0;
+
+        D_xx_.FourierTransformForward();
+        D_xy_.FourierTransformForward();
+        D_xz_.FourierTransformForward();
+        D_yy_.FourierTransformForward();
+        D_yz_.FourierTransformForward();
+        D_zz_.FourierTransformForward();
+
+#ifndef PRODUCTION
+        if (CONFIG::MPI_task_rank == 0)
+            unlink("debug.hdf5");
+        D_xx_.Write_to_HDF5("debug.hdf5","Dxx");
+        D_xy_.Write_to_HDF5("debug.hdf5","Dxy");
+        D_xz_.Write_to_HDF5("debug.hdf5","Dxz");
+        D_yy_.Write_to_HDF5("debug.hdf5","Dyy");
+        D_yz_.Write_to_HDF5("debug.hdf5","Dyz");
+        D_zz_.Write_to_HDF5("debug.hdf5","Dzz");
+
+        std::ofstream ofs2("test_brillouin.txt");
+#endif
+        using map_t = std::map<vec3_t<int>,size_t>;
+        map_t iimap;
+            
+        //!=== Make temporary copies before resorting to std. Fourier grid ========!//
+        Grid_FFT<real_t,false> 
+            temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+            temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+            temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0});
+
+        temp1.FourierTransformForward(false);
+        temp2.FourierTransformForward(false);
+        temp3.FourierTransformForward(false);
+            
+        #pragma omp parallel for
+        for( size_t i=0; i<D_xx_.size(0); i++ )
+        {
+            for( size_t j=0; j<D_xx_.size(1); j++ )
+            {
+                for( size_t k=0; k<D_xx_.size(2); k++ )
+                {
+                    temp1.kelem(i,j,k) = ccomplex_t(std::real(D_xx_.kelem(i,j,k)),std::real(D_xy_.kelem(i,j,k)));
+                    temp2.kelem(i,j,k) = ccomplex_t(std::real(D_xz_.kelem(i,j,k)),std::real(D_yy_.kelem(i,j,k)));
+                    temp3.kelem(i,j,k) = ccomplex_t(std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k)));
+                }
+            }
+        }
+        D_xx_.zero(); D_xy_.zero(); D_xz_.zero();
+        D_yy_.zero(); D_yz_.zero(); D_zz_.zero();
+
+        
+        //!=== Diagonalise and resort to std. Fourier grid ========!//
+        #pragma omp parallel 
+        {
+            // thread private matrix representation
+            mat3_t<real_t> D;
+            vec3_t<real_t> eval, evec1, evec2, evec3_t;
+
+            #pragma omp for
+            for( size_t i=0; i<D_xx_.size(0); i++ )
+            {
+                for( size_t j=0; j<D_xx_.size(1); j++ )
+                {
+                    for( size_t k=0; k<D_xx_.size(2); k++ )
+                    {
+                        vec3_t<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                        
+                        // put matrix elements into actual matrix
+                        D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12;
+                        D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12;
+                        D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12;
+                        D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12;
+                        D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12;
+                        D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12;
+
+                        // compute eigenstructure of matrix
+                        D.eigen(eval, evec1, evec2, evec3_t);
+                        evec3_t /= (twopi*ngrid_);
+
+                        // now determine to which modes on the regular lattice this contributes
+                        vec3_t<real_t> ar = kv / (twopi*ngrid_);
+                        vec3_t<real_t> a(mat_reciprocal * ar);
+                        
+                        // translate the k-vectors into the "candidate" FBZ
+                        for( int l1=-numb; l1<=numb; ++l1 ){
+                            for( int l2=-numb; l2<=numb; ++l2 ){
+                                for( int l3=-numb; l3<=numb; ++l3 ){
+                                    // need both halfs of Fourier space since we use real transforms
+                                    for( int isign=0; isign<=1; ++isign ){
+                                        const real_t sign = 2.0*real_t(isign)-1.0; 
+                                        const vec3_t<real_t> vshift({real_t(l1),real_t(l2),real_t(l3)});
+
+                                        vec3_t<real_t> vectk = sign * a + mat_reciprocal * vshift;
+
+                                        if( check_FBZ( normals, vectk ) )
+                                        {
+                                            int ix = std::round(vectk.x*(ngrid_)/twopi);
+                                            int iy = std::round(vectk.y*(ngrid_)/twopi);
+                                            int iz = std::round(vectk.z*(ngrid_)/twopi);
+
+                                            #pragma omp critical
+                                            {iimap.insert( std::pair<vec3_t<int>,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );}
+
+                                            temp1.kelem(i,j,k) = ccomplex_t(eval[2],eval[1]);
+                                            temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3_t.x);
+                                            temp3.kelem(i,j,k) = ccomplex_t(evec3_t.y,evec3_t.z);
+                                        }
+                                    }//sign
+                                } //l3
+                            } //l2
+                        } //l1
+                    } //k
+                } //j
+            } //i
+        }
+
+        D_xx_.kelem(0,0,0) = 1.0;
+        D_xy_.kelem(0,0,0) = 0.0;
+        D_xz_.kelem(0,0,0) = 0.0;
+
+        D_yy_.kelem(0,0,0) = 1.0;
+        D_yz_.kelem(0,0,0) = 0.0;
+        D_zz_.kelem(0,0,0) = 0.0;
+
+        //... approximate infinite lattice by inerpolating to sites not convered by current resolution...
+        #pragma omp parallel for
+        for( size_t i=0; i<D_xx_.size(0); i++ ){
+            for( size_t j=0; j<D_xx_.size(1); j++ ){
+                for( size_t k=0; k<D_xx_.size(2); k++ ){
+                    int ii = (int(i)>nlattice/2)? int(i)-nlattice : int(i);
+                    int jj = (int(j)>nlattice/2)? int(j)-nlattice : int(j);
+                    int kk = (int(k)>nlattice/2)? int(k)-nlattice : int(k);
+                    vec3_t<real_t> kv({real_t(ii),real_t(jj),real_t(kk)});
+
+                    auto align_with_k = [&]( const vec3_t<real_t>& v ) -> vec3_t<real_t>{
+                        return v*((v.dot(kv)<0.0)?-1.0:1.0);
+                    };
+
+                    vec3_t<real_t> v, l;
+                    map_t::iterator it;
+                    
+                    if( !is_in(i,j,k,mat_invrecip)  ){
+                        auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3_t<real_t>& v, vec3_t<real_t>& l ) {
+                            v = 0.0; l = 0.0;
+                            int count(0);
+                            
+                            auto add_lv = [&]( auto it ) -> void {
+                                auto q = it->second;++count;
+                                l += vec3_t<real_t>({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))});
+                                v += align_with_k(vec3_t<real_t>({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))}));
+                            };
+                            map_t::iterator it;
+                            if( (it = iimap.find({ii-1,jj,kk}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii+1,jj,kk}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii,jj-1,kk}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii,jj+1,kk}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii,jj,kk-1}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii,jj,kk+1}))!=iimap.end() ){ add_lv(it); }
+                            l/=real_t(count); v/=real_t(count);
+                        };
+                        
+                        average_lv(temp1,temp2,temp3,v,l);
+                        
+                    }else{
+                        if( (it = iimap.find({ii,jj,kk}))!=iimap.end() ){
+                            auto q = it->second;
+                            l = vec3_t<real_t>({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))});
+                            v = align_with_k(vec3_t<real_t>({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))}));
+                        }
+                    }
+                    D_xx_.kelem(i,j,k) = l[0];
+                    D_xy_.kelem(i,j,k) = l[1];
+                    D_xz_.kelem(i,j,k) = l[2];
+                    D_yy_.kelem(i,j,k) = v[0];
+                    D_yz_.kelem(i,j,k) = v[1];
+                    D_zz_.kelem(i,j,k) = v[2];
+                }
+            }
+        }
+        
+#ifdef PRODUCTION
+        #pragma omp parallel for
+        for( size_t i=0; i<D_xx_.size(0); i++ ){
+            for( size_t j=0; j<D_xx_.size(1); j++ ){
+                for( size_t k=0; k<D_xx_.size(2); k++ )
+                {
+                    vec3_t<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+
+                    double mu1 = std::real(D_xx_.kelem(i,j,k));
+                    // double mu2 = std::real(D_xy_.kelem(i,j,k));
+                    // double mu3 = std::real(D_xz_.kelem(i,j,k));
+
+                    vec3_t<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))});
+                    evec1 /= evec1.norm();
+
+                    // ///////////////////////////////////
+                    // // project onto spherical coordinate vectors
+                    
+                    real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ): 0.0;
+                    real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
+                    vec3_t<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
+
+                    // re-normalise to that longitudinal amplitude is exact
+                    double renorm = evec1.dot( e_r ); if( renorm < 0.01 ) renorm = 1.0;
+
+                    // -- store in diagonal components of D_ij
+                    D_xx_.kelem(i,j,k) = 1.0;
+                    D_yy_.kelem(i,j,k) = evec1.dot( e_theta ) / renorm;
+                    D_zz_.kelem(i,j,k) = evec1.dot( e_phi ) / renorm;
+
+                    // spatially dependent correction to vfact = \dot{D_+}/D_+
+                    D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
+                }
+            }
+        }
+        D_xy_.kelem(0,0,0) = 1.0;
+        D_xx_.kelem(0,0,0) = 1.0;
+        D_yy_.kelem(0,0,0) = 0.0;
+        D_zz_.kelem(0,0,0) = 0.0;
+
+        // unlink("debug.hdf5");
+        // D_xy_.Write_to_HDF5("debug.hdf5","mu1");
+        // D_xx_.Write_to_HDF5("debug.hdf5","e1x");
+        // D_yy_.Write_to_HDF5("debug.hdf5","e1y");
+        // D_zz_.Write_to_HDF5("debug.hdf5","e1z");
+
+#else
+        D_xx_.Write_to_HDF5("debug.hdf5","mu1");
+        D_xy_.Write_to_HDF5("debug.hdf5","mu2");
+        D_xz_.Write_to_HDF5("debug.hdf5","mu3");
+        D_yy_.Write_to_HDF5("debug.hdf5","e1x");
+        D_yz_.Write_to_HDF5("debug.hdf5","e1y");
+        D_zz_.Write_to_HDF5("debug.hdf5","e1z");
+#endif   
+    }
+
+
+public:
+    // real_t boxlen, size_t ngridother
+    explicit lattice_gradient( config_file& the_config, size_t ngridself=64 )
+    : boxlen_( the_config.get_value<double>("setup", "BoxLength") ), 
+      aini_ ( 1.0/(1.0+the_config.get_value<double>("setup", "zstart")) ),
+      ngmapto_( the_config.get_value<size_t>("setup", "GridRes") ), 
+      ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
+      XmL_ ( the_config.get_value<double>("cosmology", "Omega_L") / the_config.get_value<double>("cosmology", "Omega_m") ),
+      D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+      D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+      D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+      grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+      grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
+    { 
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        std::string lattice_str = the_config.get_value_safe<std::string>("setup","ParticleLoad","sc");
+        const lattice lattice_type = 
+            ((lattice_str=="bcc")? lattice_bcc 
+            : ((lattice_str=="fcc")? lattice_fcc 
+            : ((lattice_str=="rsc")? lattice_rsc 
+            : lattice_sc)));
+
+        music::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
+
+        double wtime = get_wtime();
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
+        
+        init_D( lattice_type );
+        // init_D__old();
+
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+    }
+
+    inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
+    {
+        real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
+
+        auto kv = D_xx_.get_k<real_t>( ix, iy, iz );
+        auto kmod = kv.norm() / mapratio_ / boxlen_;
+
+        // // project onto spherical coordinate vectors
+        auto D_r = std::real(D_xx_.get_cic_kspace({ix,iy,iz}));
+        auto D_theta = std::real(D_yy_.get_cic_kspace({ix,iy,iz}));
+        auto D_phi = std::real(D_zz_.get_cic_kspace({ix,iy,iz}));
+        
+        real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0;
+        real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
+        
+        if( idim == 0 ){
+            return ccomplex_t(0.0, kmod*(D_r * st * cp + D_theta * ct * cp - D_phi * sp)); 
+        }
+        else if( idim == 1 ){
+            return ccomplex_t(0.0, kmod*(D_r  * st * sp + D_theta * ct * sp + D_phi * cp)); 
+        }
+        return ccomplex_t(0.0, kmod*(D_r  * ct - D_theta * st)); 
+    }
+
+    inline real_t vfac_corr( std::array<size_t,3> ijk  ) const
+    {
+        real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
+        const real_t alpha = 1.0/std::real(D_xy_.get_cic_kspace({ix,iy,iz}));
+        return 1.0/alpha;
+        // // below is for LCDM, but it is a tiny correction for typical starting redshifts:
+        //! X = \Omega_\Lambda / \Omega_m
+        // return 1.0 / (alpha - (2*std::pow(aini_,3)*alpha*(2 + alpha)*XmL_*Hypergeometric2F1((3 + alpha)/3.,(5 + alpha)/3.,
+        //     (13 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_)))/
+        //     ((7 + 4*alpha)*Hypergeometric2F1(alpha/3.,(2 + alpha)/3.,(7 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_))));
+    }
+
+};
+
+}
\ No newline at end of file
diff --git a/include/physical_constants.hh b/include/physical_constants.hh
new file mode 100644
index 0000000..594eb0d
--- /dev/null
+++ b/include/physical_constants.hh
@@ -0,0 +1,62 @@
+#pragma once
+/*******************************************************************************\
+ physical_constants.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************************/
+
+// physical constants for convenience, all values have been taken from
+// the 2018 edition of the Particle Data Group Booklet,
+// http://pdg.lbl.gov/2019/mobile/reviews/pdf/rpp2018-rev-phys-constants-m.pdf
+
+namespace phys_const
+{
+// helper value of pi so that we don't need to include any other header just for this
+static constexpr double pi_ = 3.141592653589793115997963468544185161590576171875;
+
+//--- unit conversions ---------------------------------------------------
+
+// 1 Mpc in m
+static constexpr double Mpc_SI = 3.0857e22;
+
+// 1 Gyr in s
+static constexpr double Gyr_SI = 3.1536e16;
+
+// 1 eV in J
+static constexpr double eV_SI = 1.602176487e-19;
+
+// 1 erg in J
+static constexpr double erg_SI = 1e-7;
+
+//--- physical constants ------------------------------------------------
+
+// speed of light c in m/s
+static constexpr double c_SI = 2.99792458e8;
+
+// gravitational constant G in m^3/s^2/kg
+static constexpr double G_SI = 6.6740800e-11;
+
+// Boltzmann constant k_B in kg m^2/s^2/K
+static constexpr double kB_SI = 1.38064852e-23;
+
+// reduced Planck's quantum \hbar in kg m^2/s
+static constexpr double hbar_SI = 1.054571800e-34;
+
+// Stefan-Boltzmann constant sigma in J/m^2/s/K^-4
+static constexpr double sigma_SI = (pi_ * pi_) * (kB_SI * kB_SI * kB_SI * kB_SI) / 60. / (hbar_SI * hbar_SI * hbar_SI) / (c_SI * c_SI);
+
+// electron mass in kg
+static constexpr double me_SI = 9.10938356e-31;
+
+// proton mass in kg
+static constexpr double mp_SI = 1.672621898e-27;
+
+// unified atomic mass unit (u) in kg
+static constexpr double u_SI = 1.660539040e-27;
+
+// critical density of the Universe in h^2 kg/m^3
+static constexpr double rhocrit_h2_SI = 3 * 1e10 / (8 * pi_ * G_SI) / Mpc_SI / Mpc_SI;
+
+} // namespace phys_const
\ No newline at end of file
diff --git a/include/random_plugin.hh b/include/random_plugin.hh
index 3e7b77c..a91ab7e 100644
--- a/include/random_plugin.hh
+++ b/include/random_plugin.hh
@@ -10,21 +10,21 @@
 class RNG_plugin
 {
   protected:
-    ConfigFile *pcf_; //!< pointer to config_file from which to read parameters
+    config_file *pcf_; //!< pointer to config_file from which to read parameters
   public:
-    explicit RNG_plugin(ConfigFile &cf)
+    explicit RNG_plugin(config_file &cf)
         : pcf_(&cf)
     {
     }
     virtual ~RNG_plugin() {}
     virtual bool isMultiscale() const = 0;
-    virtual void Fill_Grid( Grid_FFT<real_t>& g ) const = 0;
+    virtual void Fill_Grid( Grid_FFT<real_t>& g ) = 0;//const = 0;
     //virtual void FillGrid(int level, DensityGrid<real_t> &R) = 0;
 };
 
 struct RNG_plugin_creator
 {
-    virtual std::unique_ptr<RNG_plugin> Create(ConfigFile &cf) const = 0;
+    virtual std::unique_ptr<RNG_plugin> Create(config_file &cf) const = 0;
     virtual ~RNG_plugin_creator() {}
 };
 
@@ -42,14 +42,14 @@ struct RNG_plugin_creator_concrete : public RNG_plugin_creator
     }
 
     //! create an instance of the plugin
-    std::unique_ptr<RNG_plugin> Create(ConfigFile &cf) const
+    std::unique_ptr<RNG_plugin> Create(config_file &cf) const
     {
         return std::make_unique<Derived>(cf);
     }
 };
 
 typedef RNG_plugin RNG_instance;
-std::unique_ptr<RNG_plugin> select_RNG_plugin( ConfigFile &cf);
+std::unique_ptr<RNG_plugin> select_RNG_plugin( config_file &cf);
 
 // /*!
 //  * @brief encapsulates all things for multi-scale white noise generation
@@ -58,18 +58,18 @@ std::unique_ptr<RNG_plugin> select_RNG_plugin( ConfigFile &cf);
 // class random_number_generator
 // {
 //   protected:
-//     ConfigFile *pcf_;
+//     config_file *pcf_;
 //     //const refinement_hierarchy * prefh_;
 //     RNG_plugin *generator_;
 //     int levelmin_, levelmax_;
 
 //   public:
 //     //! constructor
-//     random_number_generator( ConfigFile &cf )
+//     random_number_generator( config_file &cf )
 //         : pcf_(&cf) //, prefh_( &refh )
 //     {
-//         levelmin_ = pcf_->GetValue<int>("setup", "levelmin");
-//         levelmax_ = pcf_->GetValue<int>("setup", "levelmax");
+//         levelmin_ = pcf_->get_value<int>("setup", "levelmin");
+//         levelmax_ = pcf_->get_value<int>("setup", "levelmax");
 //         generator_ = select_RNG_plugin(cf);
 //     }
 
diff --git a/include/system_stat.hh b/include/system_stat.hh
index f911a42..fb7f6f3 100644
--- a/include/system_stat.hh
+++ b/include/system_stat.hh
@@ -1,3 +1,10 @@
+/*******************************************************************\
+ system_stat.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    08/2019 - Oliver Hahn - first implementation
+\*******************************************************************/
 #pragma once
 
 #include <string>
diff --git a/include/testing.hh b/include/testing.hh
index 53bc571..aaaae39 100644
--- a/include/testing.hh
+++ b/include/testing.hh
@@ -1,13 +1,21 @@
+/*******************************************************************\
+ testing.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    10/2019 - Michael Michaux & Oliver Hahn - first implementation
+\*******************************************************************/
 #pragma once
 
 #include <array>
 #include <general.hh>
 #include <config_file.hh>
 #include <grid_fft.hh>
+#include <cosmology_calculator.hh>
 
 namespace testing{
     void output_potentials_and_densities( 
-        ConfigFile& the_config,
+        config_file& the_config,
         size_t ngrid, real_t boxlen,
         Grid_FFT<real_t>& phi,
         Grid_FFT<real_t>& phi2,
@@ -16,7 +24,7 @@ namespace testing{
         std::array< Grid_FFT<real_t>*,3 >& A3 );
 
     void output_velocity_displacement_symmetries(
-        ConfigFile &the_config,
+        config_file &the_config,
         size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
         Grid_FFT<real_t> &phi,
         Grid_FFT<real_t> &phi2,
@@ -26,7 +34,8 @@ namespace testing{
         bool bwrite_out_fields=false);
 
     void output_convergence(
-        ConfigFile &the_config,
+        config_file &the_config,
+        cosmology::calculator* the_cosmo_calc,
         std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
         Grid_FFT<real_t> &phi,
         Grid_FFT<real_t> &phi2,
diff --git a/include/transfer_function_plugin.hh b/include/transfer_function_plugin.hh
index cd7c762..942a7ea 100644
--- a/include/transfer_function_plugin.hh
+++ b/include/transfer_function_plugin.hh
@@ -13,22 +13,29 @@ enum tf_type
     vtotal,
     vcdm,
     vbaryon,
-    total0
+    total0,
+    cdm0,
+    baryon0,
+    vtotal0,
+    vcdm0,
+    vbaryon0,
 };
 
 class TransferFunction_plugin
 {
   public:
     // Cosmology cosmo_;    //!< cosmological parameter, read from config_file
-    ConfigFile *pcf_;   //!< pointer to config_file from which to read parameters
+    config_file *pcf_;   //!< pointer to config_file from which to read parameters
     bool tf_distinct_;   //!< bool if density transfer function is distinct for baryons and DM
     bool tf_withvel_;    //!< bool if also have velocity transfer functions
     bool tf_withtotal0_; //!< have the z=0 spectrum for normalisation purposes
     bool tf_velunits_;   //!< velocities are in velocity units (km/s)
+    bool tf_isnormalised_; //!< assume that transfer functions come already correctly normalised and need be re-normalised to a specified value
+    
   public:
     //! constructor
-    TransferFunction_plugin(ConfigFile &cf)
-        : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false)
+    TransferFunction_plugin(config_file &cf)
+        : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false), tf_isnormalised_(false)
     { }
 
     //! destructor
@@ -75,7 +82,7 @@ class TransferFunction_plugin
 struct TransferFunction_plugin_creator
 {
     //! create an instance of a transfer function plug-in
-    virtual std::unique_ptr<TransferFunction_plugin> create(ConfigFile &cf) const = 0;
+    virtual std::unique_ptr<TransferFunction_plugin> create(config_file &cf) const = 0;
 
     //! destroy an instance of a plug-in
     virtual ~TransferFunction_plugin_creator() {}
@@ -96,7 +103,7 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin
     }
 
     //! create an instance of the plug-in
-    std::unique_ptr<TransferFunction_plugin> create(ConfigFile &cf) const
+    std::unique_ptr<TransferFunction_plugin> create(config_file &cf) const
     {
         return std::make_unique<Derived>(cf);
     }
@@ -104,4 +111,4 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin
 
 // typedef TransferFunction_plugin TransferFunction;
 
-std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(ConfigFile &cf);
+std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(config_file &cf);
diff --git a/include/vec.hh b/include/vec.hh
new file mode 100644
index 0000000..dd914b0
--- /dev/null
+++ b/include/vec.hh
@@ -0,0 +1,144 @@
+#pragma once
+/*******************************************************************************\
+ vec.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************************/
+
+#include <array>
+
+//! implements general N-dim vectors of arbitrary primtive type with some arithmetic ops
+template <int N, typename T = double>
+struct vec_t
+{
+  std::array<T, N> data_;
+
+  vec_t() {}
+
+  vec_t(const vec_t<N, T> &v)
+      : data_(v.data_) {}
+
+  vec_t(vec_t<N, T> &&v)
+      : data_(std::move(v.data_)) {}
+
+  template <typename... E>
+  vec_t(E... e)
+      : data_{{std::forward<E>(e)...}}
+  {
+    static_assert(sizeof...(E) == N, "Brace-enclosed initialiser list doesn't match vec_t length!");
+  }
+
+  //! bracket index access to vector components
+  T &operator[](size_t i) noexcept { return data_[i]; }
+
+  //! const bracket index access to vector components
+  const T &operator[](size_t i) const noexcept { return data_[i]; }
+
+  // assignment operator
+  vec_t<N, T> &operator=(const vec_t<N, T> &v) noexcept
+  {
+    data_ = v.data_;
+    return *this;
+  }
+
+  //! implementation of summation of vec_t
+  vec_t<N, T> operator+(const vec_t<N, T> &v) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = data_[i] + v[i];
+    return res;
+  }
+
+  //! implementation of difference of vec_t
+  vec_t<N, T> operator-(const vec_t<N, T> &v) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = data_[i] - v[i];
+    return res;
+  }
+
+  //! implementation of unary negative
+  vec_t<N, T> operator-() const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = -data_[i];
+    return res;
+  }
+
+  //! implementation of scalar multiplication
+  template <typename T2>
+  vec_t<N, T> operator*(T2 s) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = data_[i] * s;
+    return res;
+  }
+
+  //! implementation of scalar division
+  vec_t<N, T> operator/(T s) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = data_[i] / s;
+    return res;
+  }
+
+  //! takes the absolute value of each element
+  vec_t<N, T> abs(void) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = std::abs(data_[i]);
+    return res;
+  }
+
+  //! implementation of implicit summation of vec_t
+  vec_t<N, T> &operator+=(const vec_t<N, T> &v) noexcept
+  {
+    for (int i = 0; i < N; ++i)
+      data_[i] += v[i];
+    return *this;
+  }
+
+  //! implementation of implicit subtraction of vec_t
+  vec_t<N, T> &operator-=(const vec_t<N, T> &v) noexcept
+  {
+    for (int i = 0; i < N; ++i)
+      data_[i] -= v[i];
+    return *this;
+  }
+
+  //! implementation of implicit scalar multiplication of vec_t
+  vec_t<N, T> &operator*=(T s) noexcept
+  {
+    for (int i = 0; i < N; ++i)
+      data_[i] *= s;
+    return *this;
+  }
+
+  //! implementation of implicit scalar division of vec_t
+  vec_t<N, T> &operator/=(T s) noexcept
+  {
+    for (int i = 0; i < N; ++i)
+      data_[i] /= s;
+    return *this;
+  }
+
+  size_t size(void) const noexcept { return N; }
+};
+
+//! multiplication with scalar
+template <typename T2, int N, typename T = double>
+inline vec_t<N, T> operator*(T2 s, const vec_t<N, T> &v)
+{
+  vec_t<N, T> res;
+  for (int i = 0; i < N; ++i)
+    res[i] = v[i] * s;
+  return res;
+}
diff --git a/include/vec3.hh b/include/vec3.hh
deleted file mode 100644
index 9295722..0000000
--- a/include/vec3.hh
+++ /dev/null
@@ -1,41 +0,0 @@
-#pragma once
-
-template< typename T >
-class vec3{
-private:
-    std::array<T,3> data_;
-    T &x,&y,&z;
-public:    
-    vec3()
-    : x(data_[0]),y(data_[1]),z(data_[2]){}
-
-    vec3( const vec3<T> &v)
-    : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
-
-    vec3( std::array<T,3>&& d )
-    : data_(std::move(d)), x(data_[0]),y(data_[1]),z(data_[2]){}
-    
-    vec3( vec3<T> &&v)
-    : data_(std::move(v.data_)), x(data_[0]),y(data_[1]),z(data_[2]){}
-
-    T &operator[](size_t i){ return data_[i];}
-    
-    const T &operator[](size_t i) const { return data_[i]; }
-    
-    T dot(const vec3<T> &a) const 
-    {
-        return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
-    }
-    
-    T norm_squared(void) const
-    {
-        return this->dot(*this);
-    }
-
-    T norm(void) const
-    {
-        return std::sqrt( this->norm_squared() );
-    }
-
-    
-};
diff --git a/new/FindFFTW3.cmake b/new/FindFFTW3.cmake
deleted file mode 100644
index 80aa67b..0000000
--- a/new/FindFFTW3.cmake
+++ /dev/null
@@ -1,232 +0,0 @@
-# - Try to find FFTW
-#
-# By default, it will look only for the serial libraries with single, double,
-# and long double precision. Any combination of precision (SINGLE, DOUBLE,
-# LONGDOUBLE) and library type (SERIAL, [THREADS|OPENMP], MPI) is possible by
-# using the COMPONENTS keyword. For example,
-#
-# find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP MPI)
-#
-# Once done this will define
-#  FFTW3_FOUND - System has FFTW3
-#  FFTW3_INCLUDE_DIRS - The FFTW3 include directories
-#  FFTW3_LIBRARIES - The libraries needed to use FFTW3
-#  FFTW3_DEFINITIONS - Compiler switches required for using FFTW3
-#  FFTW3_$KIND_$PARALLEL_FOUND- Set if FFTW3 exists in KIND precision format for PARALLEL mode.
-#                             where KIND can be: SINGLE, DOUBLE, LONGDOUBLE
-#                             and PARALLEL: SERIAL, OPENMP, MPI, THREADS.
-#  FFTW3_$KIND_$PARALLEL_LIBRARY - The libraries needed to use.
-#  FFTW3_INCLUDE_DIR_PARALLEL - The FFTW3 include directories for parallels mode.
-
-cmake_policy(SET CMP0054 NEW)
-
-if(FFTW3_FOUND)
-  return()
-endif()
-
-if(FFTW3_INCLUDE_DIR AND FFTW3_LIBRARIES)
-  set(FFTW3_FOUND TRUE)
-  foreach(component ${FFTW3_FIND_COMPONENTS})
-    if("${FFTW3_${component}_LIBRARY}" STREQUAL "")
-        set(FFTW3_${component}_LIBRARY "${FFTW3_LIBRARIES}")
-    endif()
-  endforeach()
-  return()
-endif()
-
-macro(find_specific_libraries KIND PARALLEL)
-  list(APPEND FFTW3_FIND_COMPONENTS ${KIND}_${PARALLEL})
-  if(NOT (${PARALLEL} STREQUAL "SERIAL") AND NOT ${PARALLEL}_FOUND)
-    message(FATAL_ERROR "Please, find ${PARALLEL} libraries before FFTW")
-  endif()
-
-  find_library(FFTW3_${KIND}_${PARALLEL}_LIBRARY NAMES
-    fftw3${SUFFIX_${KIND}}${SUFFIX_${PARALLEL}}${SUFFIX_FINAL} HINTS ${HINT_DIRS})
-  if(FFTW3_${KIND}_${PARALLEL}_LIBRARY MATCHES fftw3)
-    list(APPEND FFTW3_LIBRARIES ${FFTW3_${KIND}_${PARALLEL}_LIBRARY})
-    set(FFTW3_${KIND}_${PARALLEL}_FOUND TRUE)
-
-    STRING(TOLOWER "${KIND}" kind)
-    STRING(TOLOWER "${PARALLEL}" parallel)
-    if(FFTW3_${kind}_${parallel}_LIBRARY MATCHES "\\.a$")
-      add_library(fftw3::${kind}::${parallel} STATIC IMPORTED GLOBAL)
-    else()
-      add_library(fftw3::${kind}::${parallel} SHARED IMPORTED GLOBAL)
-    endif()
-
-    # MPI Has a different included library than the others
-    # FFTW3_INCLUDE_DIR_PARALLEL will change depending of which on is used.
-    set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_INCLUDE_DIR} )
-    if(PARALLEL STREQUAL "MPI")
-      set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_${PARALLEL}_INCLUDE_DIR})
-    endif()
-
-    set_target_properties(fftw3::${kind}::${parallel} PROPERTIES
-      IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
-      INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}")
-
-    # adding target properties to the different cases
-    ##   MPI
-    if(PARALLEL STREQUAL "MPI")
-      if(MPI_C_LIBRARIES)
-        set_target_properties(fftw3::${kind}::mpi PROPERTIES
-          IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
-          INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}"
-          IMPORTED_LINK_INTERFACE_LIBRARIES ${MPI_C_LIBRARIES})
-      endif()
-    endif()
-    ##   OpenMP
-    if(PARALLEL STREQUAL "OPENMP")
-      if(OPENMP_C_FLAGS)
-        set_target_properties(fftw3::${kind}::${parallel} PROPERTIES
-           IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
-           INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}"
-           INTERFACE_COMPILE_OPTIONS "${OPENMP_C_FLAGS}")
-        endif()
-    endif()
-    ##  THREADS
-    if(PARALLEL STREQUAL "THREADS")
-      if(CMAKE_THREAD_LIBS_INIT) # TODO: this is not running
-        set_target_properties(fftw3::${kind}::${parallel} PROPERTIES
-          IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
-          INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}"
-          INTERFACE_COMPILE_OPTIONS "${CMAKE_THREAD_LIBS_INIT}")
-      endif()
-    endif()
-  endif()
-endmacro()
-
-
-
-
-if(NOT FFTW3_FIND_COMPONENTS)
-  set(FFTW3_FIND_COMPONENTS SINGLE DOUBLE LONGDOUBLE SERIAL)
-endif()
-
-string(TOUPPER "${FFTW3_FIND_COMPONENTS}" FFTW3_FIND_COMPONENTS)
-
-list(FIND FFTW3_FIND_COMPONENTS SINGLE LOOK_FOR_SINGLE)
-list(FIND FFTW3_FIND_COMPONENTS DOUBLE LOOK_FOR_DOUBLE)
-list(FIND FFTW3_FIND_COMPONENTS LONGDOUBLE LOOK_FOR_LONGDOUBLE)
-list(FIND FFTW3_FIND_COMPONENTS THREADS LOOK_FOR_THREADS)
-list(FIND FFTW3_FIND_COMPONENTS OPENMP LOOK_FOR_OPENMP)
-list(FIND FFTW3_FIND_COMPONENTS MPI LOOK_FOR_MPI)
-list(FIND FFTW3_FIND_COMPONENTS SERIAL LOOK_FOR_SERIAL)
-
-# FIXME - This may fail in computers wihtout serial
-# Default serial to obtain version number
-set(LOOK_FOR_SERIAL 1)
-
-# set serial as default if none parallel component has been set
-if((LOOK_FOR_THREADS LESS 0) AND (LOOK_FOR_MPI LESS 0) AND
-    (LOOK_FOR_OPENMP LESS 0))
-  set(LOOK_FOR_SERIAL 1)
-endif()
-
-if(MPI_C_FOUND)
-  set(MPI_FOUND ${MPI_C_FOUND})
-endif()
-unset(FFTW3_FIND_COMPONENTS)
-
-
-
-
-if(WIN32)
-  set(HINT_DIRS ${FFTW3_DIRECTORY} $ENV{FFTW3_DIRECTORY})
-else()
-  find_package(PkgConfig)
-  if(PKG_CONFIG_FOUND)
-    pkg_check_modules(PC_FFTW QUIET fftw3)
-    set(FFTW3_DEFINITIONS ${PC_FFTW3_CFLAGS_OTHER})
-  endif()
-  set(HINT_DIRS ${PC_FFTW3_INCLUDEDIR} ${PC_FFTW3_INCLUDE_DIRS}
-    ${FFTW3_INCLUDE_DIR} $ENV{FFTW3_INCLUDE_DIR} )
-endif()
-
-find_path(FFTW3_INCLUDE_DIR NAMES fftw3.h HINTS ${HINT_DIRS})
-if (LOOK_FOR_MPI)  # Probably is going to be the same as fftw3.h
-  find_path(FFTW3_MPI_INCLUDE_DIR NAMES fftw3-mpi.h HINTS ${HINT_DIRS})
-endif()
-
-function(find_version OUTVAR LIBRARY SUFFIX)
-    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c
-      # TODO: do we need to add include for mpi headers?
-      "#include <fftw3.h>
-       #include <stdio.h>
-       int main(int nargs, char const *argv[]) {
-           printf(\"%s\", fftw${SUFFIX}_version);
-           return 0;
-       }"
-  )
-if(NOT CMAKE_CROSSCOMPILING)
-    try_run(RUN_RESULT COMPILE_RESULT
-        "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/"
-        "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c"
-        CMAKE_FLAGS
-          -DLINK_LIBRARIES=${LIBRARY}
-          -DINCLUDE_DIRECTORIES=${FFTW3_INCLUDE_DIR}
-        RUN_OUTPUT_VARIABLE OUTPUT
-        COMPILE_OUTPUT_VARIABLE COUTPUT
-    )
-  endif()
-  if(RUN_RESULT EQUAL 0)
-    string(REGEX REPLACE
-        ".*([0-9]+\\.[0-9]+\\.[0-9]+).*"
-        "\\1" VERSION_STRING "${OUTPUT}"
-    )
-    set(${OUTVAR} ${VERSION_STRING} PARENT_SCOPE)
-  endif()
-endfunction()
-
-set(SUFFIX_DOUBLE "")
-set(SUFFIX_SINGLE "f")
-set(SUFFIX_LONGDOUBLE "l")
-set(SUFFIX_SERIAL "")
-set(SUFFIX_OPENMP "_omp")
-set(SUFFIX_MPI "_mpi")
-set(SUFFIX_THREADS "_threads")
-set(SUFFIX_FINAL "")
-
-if(WIN32)
-  set(SUFFIX_FINAL "-3")
-else()
-  set(HINT_DIRS ${PC_FFTW3_LIBDIR} ${PC_FFTW3_LIBRARY_DIRS}
-    $ENV{FFTW3_LIBRARY_DIR} ${FFTW3_LIBRARY_DIR} )
-endif(WIN32)
-
-unset(FFTW3_LIBRARIES)
-set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR} ) # TODO what's for?
-set(FFTW3_FLAGS_C "")
-foreach(KIND SINGLE DOUBLE LONGDOUBLE)
-  if(LOOK_FOR_${KIND} LESS 0)
-    continue()
-  endif()
-  foreach(PARALLEL SERIAL MPI OPENMP THREADS)
-    if(LOOK_FOR_${PARALLEL} LESS 0)
-      continue()
-    endif()
-    find_specific_libraries(${KIND} ${PARALLEL})
-  endforeach()
-endforeach()
-
-if(FFTW3_INCLUDE_DIR)
-  list(GET FFTW3_FIND_COMPONENTS 0 smallerrun)
-  string(REPLACE "_" ";" RUNLIST ${smallerrun})
-  list(GET RUNLIST 0 KIND)
-  list(GET RUNLIST 1 PARALLEL)
-  unset(smallerrun)
-  unset(RUNLIST)
-  # suffix is quoted so it pass empty in the case of double as it's empty
-  find_version(FFTW3_VERSION_STRING ${FFTW3_${KIND}_${PARALLEL}_LIBRARY}
-    "${SUFFIX_${KIND}}")
-endif()
-
-# FIXME: fails if use REQUIRED.
-include(FindPackageHandleStandardArgs)
-# handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE
-# if all listed variables are TRUE
-find_package_handle_standard_args(FFTW3
-    REQUIRED_VARS FFTW3_LIBRARIES FFTW3_INCLUDE_DIR
-    VERSION_VAR FFTW3_VERSION_STRING
-    HANDLE_COMPONENTS
-)
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index d5f103a..eeba708 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -2,192 +2,173 @@
 #include <grid_fft.hh>
 #include <thread>
 
-#include <gsl/gsl_rng.h>
-#include <gsl/gsl_randist.h>
-
-template <typename data_t>
-void Grid_FFT<data_t>::FillRandomReal(unsigned long int seed)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Setup(void)
 {
-    gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937);
-#if defined(USE_MPI)
-    seed += 17321 * CONFIG::MPI_task_rank;
-#endif
-    gsl_rng_set(RNG, seed);
-
-    for (size_t i = 0; i < sizes_[0]; ++i)
+    if (!bdistributed)
     {
-        for (size_t j = 0; j < sizes_[1]; ++j)
+        ntot_ = (n_[2] + 2) * n_[1] * n_[0];
+
+        music::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
+        if (typeid(data_t) == typeid(real_t))
         {
-            for (size_t k = 0; k < sizes_[2]; ++k)
-            {
-                this->relem(i, j, k) = gsl_ran_ugaussian_ratio_method(RNG);
-            }
+            data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
+            cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+
+            plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE);
+            iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE);
+        }
+        else if (typeid(data_t) == typeid(ccomplex_t))
+        {
+            data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(ccomplex_t)));
+            cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+
+            plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE);
+            iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE);
+        }
+        else
+        {
+            music::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
+        }
+
+        fft_norm_fac_ = 1.0 / std::sqrt((real_t)((size_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]));
+
+        if (typeid(data_t) == typeid(real_t))
+        {
+            npr_ = n_[2] + 2;
+            npc_ = n_[2] / 2 + 1;
+        }
+        else
+        {
+            npr_ = n_[2];
+            npc_ = n_[2];
+        }
+
+        for (int i = 0; i < 3; ++i)
+        {
+            nhalf_[i] = n_[i] / 2;
+            kfac_[i] = 2.0 * M_PI / length_[i];
+            kny_[i] = kfac_[i] * n_[i]/2;
+            dx_[i] = length_[i] / n_[i];
+
+            global_range_.x1_[i] = 0;
+            global_range_.x2_[i] = n_[i];
+        }
+
+        local_0_size_ = n_[0];
+        local_1_size_ = n_[1];
+        local_0_start_ = 0;
+        local_1_start_ = 0;
+
+        if (space_ == rspace_id)
+        {
+            sizes_[0] = n_[0];
+            sizes_[1] = n_[1];
+            sizes_[2] = n_[2];
+            sizes_[3] = npr_;
+        }
+        else
+        {
+            sizes_[0] = n_[1];
+            sizes_[1] = n_[0];
+            sizes_[2] = npc_;
+            sizes_[3] = npc_;
         }
     }
-
-    gsl_rng_free(RNG);
-}
-
-template <typename data_t>
-void Grid_FFT<data_t>::Setup(void)
-{
-#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////////////
-
-    ntot_ = (n_[2] + 2) * n_[1] * n_[0];
-
-    csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
-    if (typeid(data_t) == typeid(real_t))
-    {
-        data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
-        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
-
-        plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE);
-        iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE);
-    }
-    else if (typeid(data_t) == typeid(ccomplex_t))
-    {
-        data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(ccomplex_t)));
-        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
-
-        plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE);
-        iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE);
-    }
     else
     {
-        csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
-    }
+#ifdef USE_MPI //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
+        size_t cmplxsz;
 
-    fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
+        if (typeid(data_t) == typeid(real_t))
+        {
+            cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
+                                                             &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
+            ntot_ = 2 * cmplxsz;
+            data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
+            cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+            plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
+                                                  MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
+            iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
+                                                   MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
+        }
+        else if (typeid(data_t) == typeid(ccomplex_t))
+        {
+            cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
+                                                             &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
+            ntot_ = cmplxsz;
+            data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
+            cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+            plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
+                                              MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
+            iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
+                                               MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
+        }
+        else
+        {
+            music::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
+            abort();
+        }
 
-    if (typeid(data_t) == typeid(real_t))
-    {
-        npr_ = n_[2] + 2;
-        npc_ = n_[2] / 2 + 1;
-    }
-    else
-    {
-        npr_ = n_[2];
-        npc_ = n_[2];
-    }
+        music::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
 
-    for (int i = 0; i < 3; ++i)
-    {
-        nhalf_[i] = n_[i] / 2;
-        kfac_[i] = 2.0 * M_PI / length_[i];
-        dx_[i] = length_[i] / n_[i];
+        fft_norm_fac_ = 1.0 / sqrt((real_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]);
 
-        global_range_.x1_[i] = 0;
-        global_range_.x2_[i] = n_[i];
-    }
+        if (typeid(data_t) == typeid(real_t))
+        {
+            npr_ = n_[2] + 2;
+            npc_ = n_[2] / 2 + 1;
+        }
+        else
+        {
+            npr_ = n_[2];
+            npc_ = n_[2];
+        }
 
-    local_0_size_ = n_[0];
-    local_1_size_ = n_[1];
-    local_0_start_ = 0;
-    local_1_start_ = 0;
+        for (int i = 0; i < 3; ++i)
+        {
+            nhalf_[i] = n_[i] / 2;
+            kfac_[i] = 2.0 * M_PI / length_[i];
+            kny_[i] = kfac_[i] * n_[i]/2;
+            dx_[i] = length_[i] / n_[i];
 
-    if (space_ == rspace_id)
-    {
-        sizes_[0] = n_[0];
-        sizes_[1] = n_[1];
-        sizes_[2] = n_[2];
-        sizes_[3] = npr_;
-    }
-    else
-    {
-        sizes_[0] = n_[1];
-        sizes_[1] = n_[0];
-        sizes_[2] = npc_;
-        sizes_[3] = npc_;
-    }
-
-#else //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
-
-    size_t cmplxsz;
-
-    if (typeid(data_t) == typeid(real_t))
-    {
-        cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
-                                                         &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
-        ntot_ = 2 * cmplxsz;
-        data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
-        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
-        plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
-                                              MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
-        iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
-                                               MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
-    }
-    else if (typeid(data_t) == typeid(ccomplex_t))
-    {
-        cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
-                                                         &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
-        ntot_ = cmplxsz;
-        data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
-        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
-        plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
-                                          MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
-        iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
-                                           MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
-    }
-    else
-    {
-        csoca::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
-        abort();
-    }
-
-    csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
-    fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
-
-    if (typeid(data_t) == typeid(real_t))
-    {
-        npr_ = n_[2] + 2;
-        npc_ = n_[2] / 2 + 1;
-    }
-    else
-    {
-        npr_ = n_[2];
-        npc_ = n_[2];
-    }
-
-    for (int i = 0; i < 3; ++i)
-    {
-        nhalf_[i] = n_[i] / 2;
-        kfac_[i] = 2.0 * M_PI / length_[i];
-        dx_[i] = length_[i] / n_[i];
-
-        global_range_.x1_[i] = 0;
-        global_range_.x2_[i] = n_[i];
-    }
-    global_range_.x1_[0] = (int)local_0_start_;
-    global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_);
-
-    if (space_ == rspace_id)
-    {
-        sizes_[0] = (int)local_0_size_;
-        sizes_[1] = n_[1];
-        sizes_[2] = n_[2];
-        sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension
-    }
-    else
-    {
-        sizes_[0] = (int)local_1_size_;
-        sizes_[1] = n_[0];
-        sizes_[2] = npc_;
-        sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
-    }
+            global_range_.x1_[i] = 0;
+            global_range_.x2_[i] = n_[i];
+        }
+        global_range_.x1_[0] = (int)local_0_start_;
+        global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_);
 
+        if (space_ == rspace_id)
+        {
+            sizes_[0] = (int)local_0_size_;
+            sizes_[1] = n_[1];
+            sizes_[2] = n_[2];
+            sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension
+        }
+        else
+        {
+            sizes_[0] = (int)local_1_size_;
+            sizes_[1] = n_[0];
+            sizes_[2] = npc_;
+            sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
+        }
+#else
+        music::flog << "MPI is required for distributed FFT arrays!" << std::endl;
+        throw std::runtime_error("MPI is required for distributed FFT arrays!");
 #endif //// of #ifdef #else USE_MPI ////////////////////////////////////////////////////////////////////////////////////
+    }
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::ApplyNorm(void)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::ApplyNorm(void)
 {
 #pragma omp parallel for
     for (size_t i = 0; i < ntot_; ++i)
         data_[i] *= fft_norm_fac_;
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::FourierTransformForward(bool do_transform)
 {
 #if defined(USE_MPI)
     MPI_Barrier(MPI_COMM_WORLD);
@@ -199,12 +180,13 @@ void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
         if (do_transform)
         {
             double wtime = get_wtime();
-            csoca::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]);
-            FFTW_API(execute)(plan_);
+            music::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]);
+            FFTW_API(execute)
+            (plan_);
             this->ApplyNorm();
 
             wtime = get_wtime() - wtime;
-            csoca::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime);
+            music::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime);
         }
 
         sizes_[0] = local_1_size_;
@@ -217,8 +199,8 @@ void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
     }
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::FourierTransformBackward(bool do_transform)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::FourierTransformBackward(bool do_transform)
 {
 #if defined(USE_MPI)
     MPI_Barrier(MPI_COMM_WORLD);
@@ -229,14 +211,14 @@ void Grid_FFT<data_t>::FourierTransformBackward(bool do_transform)
         //.............................
         if (do_transform)
         {
-            csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]);
+            music::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]);
             double wtime = get_wtime();
 
             FFTW_API(execute)(iplan_);
             this->ApplyNorm();
 
             wtime = get_wtime() - wtime;
-            csoca::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime);
+            music::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime);
         }
         sizes_[0] = local_0_size_;
         sizes_[1] = n_[1];
@@ -269,9 +251,293 @@ void create_hdf5(std::string Filename)
     H5Fclose(HDF_FileID);
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname) const
+template <typename T>
+hid_t hdf5_get_data_type(void)
 {
+    if (typeid(T) == typeid(int))
+        return H5T_NATIVE_INT;
+
+    if (typeid(T) == typeid(unsigned))
+        return H5T_NATIVE_UINT;
+
+    if (typeid(T) == typeid(float))
+        return H5T_NATIVE_FLOAT;
+
+    if (typeid(T) == typeid(double))
+        return H5T_NATIVE_DOUBLE;
+    
+    if (typeid(T) == typeid(long double))
+        return H5T_NATIVE_LDOUBLE;
+
+    if (typeid(T) == typeid(long long))
+        return H5T_NATIVE_LLONG;
+
+    if (typeid(T) == typeid(unsigned long long))
+        return H5T_NATIVE_ULLONG;
+
+    if (typeid(T) == typeid(size_t))
+        return H5T_NATIVE_ULLONG;
+
+    music::elog << "[HDF_IO] trying to evaluate unsupported type in GetDataType";
+    return -1;
+}
+
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename, const std::string ObjName)
+{
+    if (bdistributed)
+    {
+        music::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl;
+        abort();
+    }
+
+    hid_t HDF_Type = hdf5_get_data_type<data_t>();
+
+    hid_t HDF_FileID = H5Fopen(Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+
+    //... save old error handler
+    herr_t (*old_func)(void *);
+    void *old_client_data;
+
+    H5Eget_auto(&old_func, &old_client_data);
+
+    //... turn off error handling by hdf5 library
+    H5Eset_auto(NULL, NULL);
+
+    //... probe dataset opening
+    hid_t HDF_DatasetID = H5Dopen(HDF_FileID, ObjName.c_str());
+
+    //... restore previous error handler
+    H5Eset_auto(old_func, old_client_data);
+
+    //... dataset did not exist or was empty
+    if (HDF_DatasetID < 0)
+    {
+        music::elog << "Dataset \'" << ObjName.c_str() << "\' does not exist or is empty." << std::endl;
+        H5Fclose(HDF_FileID);
+        abort();
+    }
+
+    //... get space associated with dataset and its extensions
+    hid_t HDF_DataspaceID = H5Dget_space(HDF_DatasetID);
+
+    int ndims = H5Sget_simple_extent_ndims(HDF_DataspaceID);
+
+    hsize_t dimsize[3];
+
+    H5Sget_simple_extent_dims(HDF_DataspaceID, dimsize, NULL);
+
+    hsize_t HDF_StorageSize = 1;
+    for (int i = 0; i < ndims; ++i)
+        HDF_StorageSize *= dimsize[i];
+
+    //... adjust the array size to hold the data
+    std::vector<data_t> Data;
+    Data.reserve(HDF_StorageSize);
+    Data.assign(HDF_StorageSize, (data_t)0);
+
+    if (Data.capacity() < HDF_StorageSize)
+    {
+        music::elog << "Not enough memory to store all data in HDFReadDataset!" << std::endl;
+        H5Sclose(HDF_DataspaceID);
+        H5Dclose(HDF_DatasetID);
+        H5Fclose(HDF_FileID);
+        abort();
+    }
+
+    //... read the dataset
+    H5Dread(HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, &Data[0]);
+
+    if (Data.size() != HDF_StorageSize)
+    {
+        music::elog << "Something went wrong while reading!" << std::endl;
+        H5Sclose(HDF_DataspaceID);
+        H5Dclose(HDF_DatasetID);
+        H5Fclose(HDF_FileID);
+        abort();
+    }
+
+    H5Sclose(HDF_DataspaceID);
+    H5Dclose(HDF_DatasetID);
+    H5Fclose(HDF_FileID);
+
+    assert(dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2]);
+    music::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl;
+
+    for (size_t i = 0; i < 3; ++i)
+        this->n_[i] = dimsize[i];
+    this->space_ = rspace_id;
+
+    if (data_ != nullptr)
+    {
+        fftw_free(data_);
+    }
+    this->Setup();
+
+    //... copy data to internal array ...
+    real_t sum1{0.0}, sum2{0.0};
+    #pragma omp parallel for reduction(+ : sum1, sum2)
+    for (size_t i = 0; i < size(0); ++i)
+    {
+        for (size_t j = 0; j < size(1); ++j)
+        {
+            for (size_t k = 0; k < size(2); ++k)
+            {
+                this->relem(i, j, k) = Data[(i * size(1) + j) * size(2) + k];
+                sum2 += std::real(this->relem(i, j, k) * this->relem(i, j, k));
+                sum1 += std::real(this->relem(i, j, k));
+            }
+        }
+    }
+    sum1 /= Data.size();
+    sum2 /= Data.size();
+    auto stdw = std::sqrt(sum2 - sum1 * sum1);
+    music::ilog << "Constraint field has <W>=" << sum1 << ", <W^2>-<W>^2=" << stdw << std::endl;
+
+    #pragma omp parallel for reduction(+ : sum1, sum2)
+    for (size_t i = 0; i < size(0); ++i)
+    {
+        for (size_t j = 0; j < size(1); ++j)
+        {
+            for (size_t k = 0; k < size(2); ++k)
+            {
+                this->relem(i, j, k) /= stdw;
+            }
+        }
+    }
+}
+
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
+{
+    // FIXME: cleanup duplicate code in this function!
+    if (!bdistributed && CONFIG::MPI_task_rank == 0)
+    {
+
+        hid_t file_id, dset_id;    /* file and dataset identifiers */
+        hid_t filespace, memspace; /* file and memory dataspace identifiers */
+        hsize_t offset[3], count[3];
+        hid_t dtype_id = H5T_NATIVE_FLOAT;
+        hid_t plist_id = H5P_DEFAULT;
+
+        if (!file_exists(fname))
+            create_hdf5(fname);
+
+        file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
+
+        for (int i = 0; i < 3; ++i)
+            count[i] = size(i);
+
+        if (typeid(data_t) == typeid(float))
+            dtype_id = H5T_NATIVE_FLOAT;
+        else if (typeid(data_t) == typeid(double))
+            dtype_id = H5T_NATIVE_DOUBLE;
+        else if (typeid(data_t) == typeid(long double))
+            dtype_id = H5T_NATIVE_LDOUBLE;    
+        else if (typeid(data_t) == typeid(std::complex<float>))
+            dtype_id = H5T_NATIVE_FLOAT;
+        else if (typeid(data_t) == typeid(std::complex<double>))
+            dtype_id = H5T_NATIVE_DOUBLE;
+        else if (typeid(data_t) == typeid(std::complex<long double>))
+            dtype_id = H5T_NATIVE_LDOUBLE;
+
+        filespace = H5Screate_simple(3, count, NULL);
+        dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
+                             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        H5Sclose(filespace);
+
+        hsize_t slice_sz = size(1) * size(2);
+
+        real_t *buf = new real_t[slice_sz];
+
+        count[0] = 1;
+        count[1] = size(1);
+        count[2] = size(2);
+
+        offset[1] = 0;
+        offset[2] = 0;
+
+        memspace = H5Screate_simple(3, count, NULL);
+        filespace = H5Dget_space(dset_id);
+
+        for (size_t i = 0; i < size(0); ++i)
+        {
+            offset[0] = i;
+            for (size_t j = 0; j < size(1); ++j)
+            {
+                for (size_t k = 0; k < size(2); ++k)
+                {
+                    if (this->space_ == rspace_id)
+                        buf[j * size(2) + k] = std::real(relem(i, j, k));
+                    else
+                        buf[j * size(2) + k] = std::real(kelem(i, j, k));
+                }
+            }
+
+            H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL);
+            H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
+        }
+
+        H5Sclose(filespace);
+        H5Sclose(memspace);
+
+        // H5Sclose(filespace);
+        H5Dclose(dset_id);
+
+        if (typeid(data_t) == typeid(std::complex<float>) ||
+            typeid(data_t) == typeid(std::complex<double>) ||
+            typeid(data_t) == typeid(std::complex<long double>) ||
+            this->space_ == kspace_id)
+        {
+            datasetname += std::string(".im");
+
+            for (int i = 0; i < 3; ++i)
+                count[i] = size(i);
+
+            filespace = H5Screate_simple(3, count, NULL);
+            dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
+                                 H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+            H5Sclose(filespace);
+
+            count[0] = 1;
+
+            for (size_t i = 0; i < size(0); ++i)
+            {
+                offset[0] = i;
+
+                for (size_t j = 0; j < size(1); ++j)
+                    for (size_t k = 0; k < size(2); ++k)
+                    {
+                        if (this->space_ == rspace_id)
+                            buf[j * size(2) + k] = std::imag(relem(i, j, k));
+                        else
+                            buf[j * size(2) + k] = std::imag(kelem(i, j, k));
+                    }
+
+                memspace = H5Screate_simple(3, count, NULL);
+                filespace = H5Dget_space(dset_id);
+
+                H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count,
+                                    NULL);
+
+                H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
+
+                H5Sclose(memspace);
+                H5Sclose(filespace);
+            }
+
+            H5Dclose(dset_id);
+
+            delete[] buf;
+        }
+
+        H5Fclose(file_id);
+        return;
+    }
+
+    if (!bdistributed && CONFIG::MPI_task_rank != 0)
+        return;
+
     hid_t file_id, dset_id;    /* file and dataset identifiers */
     hid_t filespace, memspace; /* file and memory dataspace identifiers */
     hsize_t offset[3], count[3];
@@ -282,8 +548,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
 
     int mpi_size, mpi_rank;
 
-    mpi_size = MPI_Get_size();
-    mpi_rank = MPI_Get_rank();
+    mpi_size = MPI::get_size();
+    mpi_rank = MPI::get_rank();
 
     if (!file_exists(fname) && mpi_rank == 0)
         create_hdf5(fname);
@@ -329,14 +595,14 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
             dtype_id = H5T_NATIVE_FLOAT;
         else if (typeid(data_t) == typeid(double))
             dtype_id = H5T_NATIVE_DOUBLE;
+        else if (typeid(data_t) == typeid(long double))
+            dtype_id = H5T_NATIVE_LDOUBLE;
         else if (typeid(data_t) == typeid(std::complex<float>))
-        {
             dtype_id = H5T_NATIVE_FLOAT;
-        }
         else if (typeid(data_t) == typeid(std::complex<double>))
-        {
             dtype_id = H5T_NATIVE_DOUBLE;
-        }
+        else if (typeid(data_t) == typeid(std::complex<long double>))
+            dtype_id = H5T_NATIVE_LDOUBLE;
 
 #if defined(USE_MPI) && !defined(USE_MPI_IO)
         if (itask == 0)
@@ -391,7 +657,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
             {
                 for (size_t k = 0; k < size(2); ++k)
                 {
-                    buf[j * size(2) + k] = std::real(relem(i, j, k));
+                    if (this->space_ == rspace_id)
+                        buf[j * size(2) + k] = std::real(relem(i, j, k));
+                    else
+                        buf[j * size(2) + k] = std::real(kelem(i, j, k));
                 }
             }
 
@@ -410,7 +679,9 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
         H5Dclose(dset_id);
 
         if (typeid(data_t) == typeid(std::complex<float>) ||
-            typeid(data_t) == typeid(std::complex<double>))
+            typeid(data_t) == typeid(std::complex<double>) ||
+            typeid(data_t) == typeid(std::complex<long double>) ||
+            this->space_ == kspace_id)
         {
             datasetname += std::string(".im");
 
@@ -460,7 +731,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
                 for (size_t j = 0; j < size(1); ++j)
                     for (size_t k = 0; k < size(2); ++k)
                     {
-                        buf[j * size(2) + k] = std::imag(relem(i, j, k));
+                        if (this->space_ == rspace_id)
+                            buf[j * size(2) + k] = std::imag(relem(i, j, k));
+                        else
+                            buf[j * size(2) + k] = std::imag(kelem(i, j, k));
                     }
 
                 memspace = H5Screate_simple(3, count, NULL);
@@ -493,8 +767,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
 
 #include <iomanip>
 
-template <typename data_t>
-void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
 {
     double logvmin = std::log10(vmin);
     double logvmax = std::log10(vmax);
@@ -545,13 +819,12 @@ void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, do
 #endif
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Write_PowerSpectrum(std::string ofname)
 {
     std::vector<double> bin_k, bin_P, bin_eP;
     std::vector<size_t> bin_count;
-    int nbins = 4 * std::max(nhalf_[0], std::max(nhalf_[1], nhalf_[2]));
-    this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count );
+    this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count);
 #if defined(USE_MPI)
     if (CONFIG::MPI_task_rank == 0)
     {
@@ -576,8 +849,8 @@ void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
 #endif
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count)
 {
     this->FourierTransformForward();
 
@@ -597,7 +870,7 @@ void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::ve
         for (size_t iy = 0; iy < size(1); iy++)
             for (size_t iz = 0; iz < size(2); iz++)
             {
-                vec3<double> k3 = get_k<double>(ix, iy, iz);
+                vec3_t<double> k3 = get_k<double>(ix, iy, iz);
                 double k = k3.norm();
                 int idx2 = k / dk; //int((1.0f / dklog * std::log10(k / kmin)));
                 auto z = this->kelem(ix, iy, iz);
@@ -657,5 +930,7 @@ void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::ve
 
 /********************************************************************************************/
 
-template class Grid_FFT<real_t>;
-template class Grid_FFT<ccomplex_t>;
+template class Grid_FFT<real_t, true>;
+template class Grid_FFT<real_t, false>;
+template class Grid_FFT<ccomplex_t, true>;
+template class Grid_FFT<ccomplex_t, false>;
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index ba0e209..f677551 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -7,6 +7,7 @@
 
 #include <ic_generator.hh>
 #include <particle_generator.hh>
+#include <particle_plt.hh>
 
 #include <unistd.h> // for unlink
 
@@ -21,18 +22,18 @@ namespace ic_generator{
 
 std::unique_ptr<RNG_plugin> the_random_number_generator;
 std::unique_ptr<output_plugin> the_output_plugin;
-std::unique_ptr<CosmologyCalculator>  the_cosmo_calc;
+std::unique_ptr<cosmology::calculator>  the_cosmo_calc;
 
-int Initialise( ConfigFile& the_config )
+int Initialise( config_file& the_config )
 {
     the_random_number_generator = std::move(select_RNG_plugin(the_config));
     the_output_plugin           = std::move(select_output_plugin(the_config));
-    the_cosmo_calc              = std::make_unique<CosmologyCalculator>(the_config);
+    the_cosmo_calc              = std::make_unique<cosmology::calculator>(the_config);
 
     return 0;
 }
 
-int Run( ConfigFile& the_config )
+int Run( config_file& the_config )
 {
     //--------------------------------------------------------------------------------------------------------
     // Read run parameters
@@ -40,56 +41,75 @@ int Run( ConfigFile& the_config )
 
     //--------------------------------------------------------------------------------------------------------
     //! number of resolution elements per dimension
-    const size_t ngrid = the_config.GetValue<size_t>("setup", "GridRes");
+    const size_t ngrid = the_config.get_value<size_t>("setup", "GridRes");
 
     //--------------------------------------------------------------------------------------------------------
     //! box side length in h-1 Mpc
-    const real_t boxlen = the_config.GetValue<double>("setup", "BoxLength");
+    const real_t boxlen = the_config.get_value<double>("setup", "BoxLength");
 
     //--------------------------------------------------------------------------------------------------------
     //! starting redshift
-    const real_t zstart = the_config.GetValue<double>("setup", "zstart");
+    const real_t zstart = the_config.get_value<double>("setup", "zstart");
 
     //--------------------------------------------------------------------------------------------------------
     //! order of the LPT approximation 
-    int LPTorder = the_config.GetValueSafe<double>("setup","LPTorder",100);
+    int LPTorder = the_config.get_value_safe<double>("setup","LPTorder",100);
 
     //--------------------------------------------------------------------------------------------------------
     //! initialice particles on a bcc or fcc lattice instead of a standard sc lattice (doubles and quadruples the number of particles) 
-    std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
-    const particle::lattice lattice_type = (lattice_str=="bcc")? particle::lattice_bcc 
-        : ((lattice_str=="fcc")? particle::lattice_fcc : particle::lattice_sc);
+    std::string lattice_str = the_config.get_value_safe<std::string>("setup","ParticleLoad","sc");
+    const particle::lattice lattice_type = 
+          ((lattice_str=="bcc")? particle::lattice_bcc 
+        : ((lattice_str=="fcc")? particle::lattice_fcc 
+        : ((lattice_str=="rsc")? particle::lattice_rsc 
+        : ((lattice_str=="glass")? particle::lattice_glass
+        : particle::lattice_sc))));
 
     //--------------------------------------------------------------------------------------------------------
     //! apply fixing of the complex mode amplitude following Angulo & Pontzen (2016) [https://arxiv.org/abs/1603.05253]
-    const bool bDoFixing = the_config.GetValueSafe<bool>("setup", "DoFixing", false);
+    const bool bDoFixing = the_config.get_value_safe<bool>("setup", "DoFixing", false);
 
     //--------------------------------------------------------------------------------------------------------
     //! do baryon ICs?
-    const bool bDoBaryons = the_config.GetValueSafe<bool>("setup", "DoBaryons", false );
+    const bool bDoBaryons = the_config.get_value_safe<bool>("setup", "DoBaryons", false );
+    std::map< cosmo_species, double > Omega;
+    if( bDoBaryons ){
+        double Om = the_config.get_value<double>("cosmology", "Omega_m");
+        double Ob = the_config.get_value<double>("cosmology", "Omega_b");
+        Omega[cosmo_species::dm] = Om-Ob;
+        Omega[cosmo_species::baryon] = Ob;
+    }else{
+        double Om = the_config.get_value<double>("cosmology", "Omega_m");
+        Omega[cosmo_species::dm] = Om;
+        Omega[cosmo_species::baryon] = 0.0;
+    }
+
+    //--------------------------------------------------------------------------------------------------------
+    //! do constrained ICs?
+    const bool bAddConstrainedModes =  the_config.contains_key("setup", "ConstraintFieldFile" );
 
     //--------------------------------------------------------------------------------------------------------
     //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274]
-    bool bAddExternalTides = the_config.ContainsKey("cosmology", "LSS_aniso_lx") 
-                           & the_config.ContainsKey("cosmology", "LSS_aniso_ly") 
-                           & the_config.ContainsKey("cosmology", "LSS_aniso_lz");
+    bool bAddExternalTides = the_config.contains_key("cosmology", "LSS_aniso_lx") 
+                           & the_config.contains_key("cosmology", "LSS_aniso_ly") 
+                           & the_config.contains_key("cosmology", "LSS_aniso_lz");
 
-    if( bAddExternalTides && !(  the_config.ContainsKey("cosmology", "LSS_aniso_lx") 
-                               | the_config.ContainsKey("cosmology", "LSS_aniso_ly") 
-                               | the_config.ContainsKey("cosmology", "LSS_aniso_lz") ))
+    if( bAddExternalTides && !(  the_config.contains_key("cosmology", "LSS_aniso_lx") 
+                               | the_config.contains_key("cosmology", "LSS_aniso_ly") 
+                               | the_config.contains_key("cosmology", "LSS_aniso_lz") ))
     {
-        csoca::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl;
+        music::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl;
         bAddExternalTides = false;
     }
     // Anisotropy parameters for beyond box tidal field 
     std::array<real_t,3> lss_aniso_lambda = {
-        the_config.GetValueSafe<double>("cosmology", "LSS_aniso_lx", 0.0),
-        the_config.GetValueSafe<double>("cosmology", "LSS_aniso_ly", 0.0),
-        the_config.GetValueSafe<double>("cosmology", "LSS_aniso_lz", 0.0),
+        the_config.get_value_safe<double>("cosmology", "LSS_aniso_lx", 0.0),
+        the_config.get_value_safe<double>("cosmology", "LSS_aniso_ly", 0.0),
+        the_config.get_value_safe<double>("cosmology", "LSS_aniso_lz", 0.0),
     };  
     
     if( std::abs(lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2]) > 1e-10 ){
-        csoca::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl;
+        music::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl;
         auto tr_l_3 = (lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2])/3.0;
         lss_aniso_lambda[0] -= tr_l_3;
         lss_aniso_lambda[1] -= tr_l_3;
@@ -101,20 +121,20 @@ int Run( ConfigFile& the_config )
     const real_t astart = 1.0/(1.0+zstart);
     const real_t volfac(std::pow(boxlen / ngrid / 2.0 / M_PI, 1.5));
 
-    the_cosmo_calc->WritePowerspectrum(astart, "input_powerspec.txt" );
+    the_cosmo_calc->write_powerspectrum(astart, "input_powerspec.txt" );
 
-    //csoca::ilog << "-----------------------------------------------------------------------------" << std::endl;
+    //music::ilog << "-----------------------------------------------------------------------------" << std::endl;
 
     // if( bSymplecticPT && LPTorder!=2 ){
-    //     csoca::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl;
+    //     music::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl;
     //     LPTorder = 2;
     // }
 
     //--------------------------------------------------------------------
     // Compute LPT time coefficients
     //--------------------------------------------------------------------
-    const real_t Dplus0 = the_cosmo_calc->CalcGrowthFactor(astart) / the_cosmo_calc->CalcGrowthFactor(1.0);
-    const real_t vfac   = the_cosmo_calc->CalcVFact(astart);
+    const real_t Dplus0 = the_cosmo_calc->get_growth_factor(astart);
+    const real_t vfac   = the_cosmo_calc->get_vfact(astart);
 
     const double g1  = -Dplus0;
     const double g2  = ((LPTorder>1)? -3.0/7.0*Dplus0*Dplus0 : 0.0);
@@ -132,7 +152,7 @@ int Run( ConfigFile& the_config )
     // coefficients needed for anisotropic external tides
     const double ai3 = std::pow(astart,-3);
     const double Omega_m_of_a = the_cosmo_calc->cosmo_param_.Omega_m * ai3 / (the_cosmo_calc->cosmo_param_.Omega_m * ai3 + the_cosmo_calc->cosmo_param_.Omega_DE);
-    const double f1 = the_cosmo_calc->CalcGrowthRate(astart);
+    const double f1 = the_cosmo_calc->get_f(astart);
     const double f_aniso = -4.0/3.0 * f1 * f1 / Omega_m_of_a;
 
     const std::array<real_t,3> lss_aniso_alpha = {
@@ -151,200 +171,300 @@ int Run( ConfigFile& the_config )
     Grid_FFT<real_t> A3x({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> A3y({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> A3z({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+
     //... array [.] access to components of A3:
-    std::array< Grid_FFT<real_t>*,3 > A3({&A3x,&A3y,&A3z});
+    std::array<Grid_FFT<real_t> *, 3> A3({&A3x, &A3y, &A3z});
+
+    // white noise field 
+    Grid_FFT<real_t> wnoise({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    
+    //--------------------------------------------------------------------
+    // Fill the grid with a Gaussian white noise field
+    //--------------------------------------------------------------------
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "Generating white noise field...." << std::endl;
+
+    the_random_number_generator->Fill_Grid(wnoise);
+    
+    wnoise.FourierTransformForward();
+
+    //--------------------------------------------------------------------
+    // Use externally specified large scale modes from constraints in case
+    //--------------------------------------------------------------------
+    if( bAddConstrainedModes ){
+        Grid_FFT<real_t,false> cwnoise({8,8,8}, {boxlen,boxlen,boxlen});
+        cwnoise.Read_from_HDF5( the_config.get_value<std::string>("setup", "ConstraintFieldFile"), 
+                the_config.get_value<std::string>("setup", "ConstraintFieldName") );
+        cwnoise.FourierTransformForward();
+
+        size_t ngrid_c = cwnoise.size(0), ngrid_c_2 = ngrid_c/2;
+
+        // TODO: copy over modes
+        double rs1{0.0},rs2{0.0},is1{0.0},is2{0.0};
+        double nrs1{0.0},nrs2{0.0},nis1{0.0},nis2{0.0};
+        size_t count{0};
+
+        #pragma omp parallel for reduction(+:rs1,rs2,is1,is2,nrs1,nrs2,nis1,nis2,count)
+        for( size_t i=0; i<ngrid_c; ++i ){
+            size_t il = size_t(-1);
+            if( i<ngrid_c_2 && i<ngrid/2 ) il = i;
+            if( i>ngrid_c_2 && i+ngrid-ngrid_c>ngrid/2) il = ngrid-ngrid_c+i;
+            if( il == size_t(-1) ) continue;
+            if( il<size_t(wnoise.local_1_start_) || il>=size_t(wnoise.local_1_start_+wnoise.local_1_size_)) continue;
+            il -= wnoise.local_1_start_;
+            for( size_t j=0; j<ngrid_c; ++j ){
+                size_t jl = size_t(-1);
+                if( j<ngrid_c_2 && j<ngrid/2 ) jl = j;
+                if( j>ngrid_c_2 && j+ngrid-ngrid_c>ngrid/2 ) jl = ngrid-ngrid_c+j;
+                if( jl == size_t(-1) ) continue;
+                for( size_t k=0; k<ngrid_c/2+1; ++k ){
+                    if( k>ngrid/2 ) continue;
+                    size_t kl = k;
+                    
+                    ++count;
+
+                    nrs1 += std::real(cwnoise.kelem(i,j,k));
+                    nrs2 += std::real(cwnoise.kelem(i,j,k))*std::real(cwnoise.kelem(i,j,k));
+                    nis1 += std::imag(cwnoise.kelem(i,j,k));
+                    nis2 += std::imag(cwnoise.kelem(i,j,k))*std::imag(cwnoise.kelem(i,j,k));
+
+                    rs1 += std::real(wnoise.kelem(il,jl,kl));
+                    rs2 += std::real(wnoise.kelem(il,jl,kl))*std::real(wnoise.kelem(il,jl,kl));
+                    is1 += std::imag(wnoise.kelem(il,jl,kl));
+                    is2 += std::imag(wnoise.kelem(il,jl,kl))*std::imag(wnoise.kelem(il,jl,kl));
+                    
+                #if defined(USE_MPI)
+                    wnoise.kelem(il,jl,kl) = cwnoise.kelem(j,i,k);
+                #else
+                    wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k);
+                #endif
+                }
+            }
+        }
+
+        // music::ilog << "  ... old field: re <w>=" << rs1/count << " <w^2>-<w>^2=" << rs2/count-rs1*rs1/count/count << std::endl;
+        // music::ilog << "  ... old field: im <w>=" << is1/count << " <w^2>-<w>^2=" << is2/count-is1*is1/count/count << std::endl;
+        // music::ilog << "  ... new field: re <w>=" << nrs1/count << " <w^2>-<w>^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl;
+        // music::ilog << "  ... new field: im <w>=" << nis1/count << " <w^2>-<w>^2=" << nis2/count-nis1*nis1/count/count << std::endl;
+        music::ilog << "White noise field large-scale modes overwritten with external field." << std::endl;
+    }
+
+    //--------------------------------------------------------------------
+    // Apply Normalisation factor and Angulo&Pontzen fixing or not
+    //--------------------------------------------------------------------
+
+    wnoise.apply_function_k( [&](auto wn){
+        if (bDoFixing)
+            wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn;
+        return wn / volfac;
+    });
+
+
+    //--------------------------------------------------------------------
+    // Compute the LPT terms....
+    //--------------------------------------------------------------------
 
     //--------------------------------------------------------------------
     // Create convolution class instance for non-linear terms
     //--------------------------------------------------------------------
+#if defined(USE_CONVOLVER_ORSZAG)
     OrszagConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    // NaiveConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+#elif defined(USE_CONVOLVER_NAIVE)
+    NaiveConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+#endif
     //--------------------------------------------------------------------
 
-    std::vector<cosmo_species> species_list;
-    species_list.push_back( cosmo_species::dm );
-    if( bDoBaryons ) species_list.push_back( cosmo_species::baryon );
+    //--------------------------------------------------------------------
+    // Create PLT gradient operator
+    //--------------------------------------------------------------------
+#if defined(ENABLE_PLT)
+    particle::lattice_gradient lg( the_config );
+#else
+    op::fourier_gradient lg( the_config );
+#endif
+
+    //--------------------------------------------------------------------
+    std::vector<cosmo_species> species_list;
+    species_list.push_back(cosmo_species::dm);
+    if (bDoBaryons)
+        species_list.push_back(cosmo_species::baryon);
+
+    //======================================================================
+    //... compute 1LPT displacement potential ....
+    //======================================================================
+    // phi = - delta / k^2
+
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "Generating white noise field...." << std::endl;
+
+    double wtime = get_wtime();
+    music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
+
+    phi.FourierTransformForward(false);
+    phi.assign_function_of_grids_kdep([&](auto k, auto wn) {
+        real_t kmod = k.norm();
+        ccomplex_t delta = wn * the_cosmo_calc->get_amplitude(kmod, total);
+        return -delta / (kmod * kmod);
+    }, wnoise);
+
+    phi.zero_DC_mode();
+
+    music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+
+    //======================================================================
+    //... compute 2LPT displacement potential ....
+    //======================================================================
+    if (LPTorder > 1)
+    {
+        wtime = get_wtime();
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
+        phi2.FourierTransformForward(false);
+        Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2));
+        Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2));
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, op::subtract_from(phi2));
+        Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, op::subtract_from(phi2));
+        Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, op::subtract_from(phi2));
+
+        if (bAddExternalTides)
+        {
+            phi2.assign_function_of_grids_kdep([&](vec3_t<real_t> kvec, ccomplex_t pphi, ccomplex_t pphi2) {
+                // sign in front of f_aniso is reversed since phi1 = -phi
+                return pphi2 + f_aniso * (kvec[0] * kvec[0] * lss_aniso_lambda[0] + kvec[1] * kvec[1] * lss_aniso_lambda[1] + kvec[2] * kvec[2] * lss_aniso_lambda[2]) * pphi;
+            },
+                                               phi, phi2);
+        }
+
+        phi2.apply_InverseLaplacian();
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+
+        if (bAddExternalTides)
+        {
+            music::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl;
+            music::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl;
+        }
+    }
+
+    //======================================================================
+    //... compute 3LPT displacement potential
+    //======================================================================
+    if (LPTorder > 2)
+    {
+        //... 3a term ...
+        wtime = get_wtime();
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
+        phi3a.FourierTransformForward(false);
+        Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a));
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::multiply_add_to(phi3a,2.0));
+        Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3a));
+        Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a));
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a));
+        phi3a.apply_InverseLaplacian();
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+
+        //... 3b term ...
+        wtime = get_wtime();
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
+        phi3b.FourierTransformForward(false);
+        Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b));
+        Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b));
+        Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::add_to(phi3b));
+        Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::multiply_add_to(phi3b,-2.0));
+        Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::multiply_add_to(phi3b,-2.0));
+        Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::multiply_add_to(phi3b,-2.0));
+        phi3b.apply_InverseLaplacian();
+        phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+
+        //... transversal term ...
+        wtime = get_wtime();
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
+        for (int idim = 0; idim < 3; ++idim)
+        {
+            // cyclic rotations of indices
+            int idimp = (idim + 1) % 3, idimpp = (idim + 2) % 3;
+            A3[idim]->FourierTransformForward(false);
+            Conv.convolve_Hessians(phi2, {idim, idimp}, phi, {idim, idimpp}, op::assign_to(*A3[idim]));
+            Conv.convolve_Hessians(phi2, {idim, idimpp}, phi, {idim, idimp}, op::subtract_from(*A3[idim]));
+            Conv.convolve_DifferenceOfHessians(phi, {idimp, idimpp}, phi2, {idimp, idimp}, {idimpp, idimpp}, op::add_to(*A3[idim]));
+            Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim]));
+            A3[idim]->apply_InverseLaplacian();
+        }
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+    }
+
+    // if( bSymplecticPT ){
+    //     //... transversal term ...
+    //     wtime = get_wtime();
+    //     music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush;
+    //     for( int idim=0; idim<3; ++idim ){
+    //         // cyclic rotations of indices
+    //         A3[idim]->FourierTransformForward(false);
+    //         Conv.convolve_Gradient_and_Hessian( phi, {0},  phi2, {idim,0}, assign_to(*A3[idim]) );
+    //         Conv.convolve_Gradient_and_Hessian( phi, {1},  phi2, {idim,1}, add_to(*A3[idim]) );
+    //         Conv.convolve_Gradient_and_Hessian( phi, {2},  phi2, {idim,2}, add_to(*A3[idim]) );
+    //     }
+    //     music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+
+    // }
+
+    ///... scale all potentials with respective growth factors
+    phi *= g1;
+    phi2 *= g2;
+    phi3a *= g3a;
+    phi3b *= g3b;
+    (*A3[0]) *= g3c;
+    (*A3[1]) *= g3c;
+    (*A3[2]) *= g3c;
+
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+
+    ///////////////////////////////////////////////////////////////////////
+    // we store the densities here if we compute them
+    //======================================================================
+
+    // Testing
+    const std::string testing = the_config.get_value_safe<std::string>("testing", "test", "none");
+
+    if (testing != "none")
+    {
+        music::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl;
+        if (testing == "potentials_and_densities"){
+            testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3);
+        }
+        else if (testing == "velocity_displacement_symmetries"){
+            testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
+        }
+        else if (testing == "convergence"){
+            testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
+        }
+        else{
+            music::flog << "unknown test '" << testing << "'" << std::endl;
+            std::abort();
+        }
+    }
 
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    
     for( auto& this_species : species_list )
     {
-        csoca::ilog << std::endl
+        music::ilog << std::endl
                     << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl;
 
-        //======================================================================
-        //... compute 1LPT displacement potential ....
-        //======================================================================
-        // phi = - delta / k^2
-        double wtime = get_wtime();
-        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
-
-        #if 1 //  random ICs
-        //--------------------------------------------------------------------
-        // Fill the grid with a Gaussian white noise field
-        //--------------------------------------------------------------------
-        the_random_number_generator->Fill_Grid( phi );
-
-        phi.FourierTransformForward();
-
-        phi.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
-            real_t kmod = k.norm();
-            if( bDoFixing ) x = (std::abs(x)!=0.0)? x / std::abs(x) : x; 
-            ccomplex_t delta = x * the_cosmo_calc->GetAmplitude(kmod, total);
-            return -delta / (kmod * kmod) / volfac;
-        });
-
-        phi.zero_DC_mode();
-        #else // ICs with a given phi(1) potential function
-        constexpr real_t twopi{2.0*M_PI};
-        constexpr real_t epsilon_q1d{0.25};
-
-        constexpr real_t epsy{0.25};
-        constexpr real_t epsz{0.0};//epsz{0.25};
-        
-        phi.FourierTransformBackward(false);
-
-        phi.apply_function_r_dep([&](auto v, auto r) -> real_t {
-            real_t q1 = r[0]-0.5*boxlen;//r[0]/boxlen * twopi - M_PI;
-            real_t q2 = r[1]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI;
-            real_t q3 = r[2]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI;
-
-            // std::cerr << q1  << " " << q2 << std::endl;
-            
-            return -2.0*std::cos(q1+std::cos(q2));
-            // return (-std::cos(q1) + epsilon_q1d * std::sin(q2));
-            // return (-std::cos(q1) + epsy * std::sin(q2) + epsz * std::cos(q1) * std::sin(q3));
-        });
-        phi.FourierTransformForward();
-
-
-        #endif
-        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
-
-        //======================================================================
-        //... compute 2LPT displacement potential ....
-        //======================================================================
-        if( LPTorder > 1 ){
-            wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
-            phi2.FourierTransformForward(false);
-            Conv.convolve_SumOfHessians( phi, {0,0}, phi, {1,1}, {2,2}, op::assign_to( phi2 ) );
-            Conv.convolve_Hessians( phi, {1,1}, phi, {2,2}, op::add_to(phi2) );
-            Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, op::subtract_from(phi2) );
-            Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, op::subtract_from(phi2) );
-            Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, op::subtract_from(phi2) );
-
-            if( bAddExternalTides ){
-                phi2.assign_function_of_grids_kdep([&]( vec3<real_t> kvec, ccomplex_t pphi, ccomplex_t pphi2 ){
-                    // sign in front of f_aniso is reversed since phi1 = -phi
-                    return pphi2 + f_aniso * (kvec[0]*kvec[0]*lss_aniso_lambda[0]+kvec[1]*kvec[1]*lss_aniso_lambda[1]+kvec[2]*kvec[2]*lss_aniso_lambda[2])*pphi;
-                }, phi, phi2 );
-            }
-
-            phi2.apply_InverseLaplacian();
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
-
-            if( bAddExternalTides ){
-                csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl;
-                csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl;
-            }
-        }
-
-        //======================================================================
-        //... compute 3LPT displacement potential
-        //======================================================================
-        if( LPTorder > 2 ){
-            //... 3a term ...
-            wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
-            phi3a.FourierTransformForward(false);
-            Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi, {2,2}, op::assign_to(phi3a) );
-            Conv.convolve_Hessians( phi, {0,1}, phi, {0,2}, phi, {1,2}, op::add_twice_to(phi3a) );
-            Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi, {0,0}, op::subtract_from(phi3a) );
-            Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi, {1,1}, op::subtract_from(phi3a) );
-            Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi, {2,2}, op::subtract_from(phi3a) );
-            phi3a.apply_InverseLaplacian();
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
-
-            //... 3b term ...
-            wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
-            phi3b.FourierTransformForward(false);
-            Conv.convolve_SumOfHessians( phi, {0,0}, phi2, {1,1}, {2,2}, op::assign_to(phi3b) );
-            Conv.convolve_SumOfHessians( phi, {1,1}, phi2, {2,2}, {0,0}, op::add_to(phi3b) );
-            Conv.convolve_SumOfHessians( phi, {2,2}, phi2, {0,0}, {1,1}, op::add_to(phi3b) );
-            Conv.convolve_Hessians( phi, {0,1}, phi2, {0,1}, op::subtract_twice_from(phi3b) );
-            Conv.convolve_Hessians( phi, {0,2}, phi2, {0,2}, op::subtract_twice_from(phi3b) );
-            Conv.convolve_Hessians( phi, {1,2}, phi2, {1,2}, op::subtract_twice_from(phi3b) );
-            phi3b.apply_InverseLaplacian();
-            phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
-
-            //... transversal term ...
-            wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
-            for( int idim=0; idim<3; ++idim ){
-                // cyclic rotations of indices
-                int idimp = (idim+1)%3, idimpp = (idim+2)%3;
-                A3[idim]->FourierTransformForward(false);
-                Conv.convolve_Hessians( phi2, {idim,idimp},  phi, {idim,idimpp}, op::assign_to(*A3[idim]) );
-                Conv.convolve_Hessians( phi2, {idim,idimpp}, phi, {idim,idimp},  op::subtract_from(*A3[idim]) );
-                Conv.convolve_DifferenceOfHessians( phi, {idimp,idimpp}, phi2,{idimp,idimp}, {idimpp,idimpp}, op::add_to(*A3[idim]) );
-                Conv.convolve_DifferenceOfHessians( phi2,{idimp,idimpp}, phi, {idimp,idimp}, {idimpp,idimpp}, op::subtract_from(*A3[idim]) );
-                A3[idim]->apply_InverseLaplacian();
-            }
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
-        }
-
-        // if( bSymplecticPT ){
-        //     //... transversal term ...
-        //     wtime = get_wtime();
-        //     csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush;
-        //     for( int idim=0; idim<3; ++idim ){
-        //         // cyclic rotations of indices
-        //         A3[idim]->FourierTransformForward(false);
-        //         Conv.convolve_Gradient_and_Hessian( phi, {0},  phi2, {idim,0}, assign_to(*A3[idim]) );
-        //         Conv.convolve_Gradient_and_Hessian( phi, {1},  phi2, {idim,1}, add_to(*A3[idim]) );
-        //         Conv.convolve_Gradient_and_Hessian( phi, {2},  phi2, {idim,2}, add_to(*A3[idim]) );
-        //     }
-        //     csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
-
-        // }
-
-        ///... scale all potentials with respective growth factors
-        phi *= g1;
-        phi2 *= g2;
-        phi3a *= g3a;
-        phi3b *= g3b;
-        (*A3[0]) *= g3c;
-        (*A3[1]) *= g3c;
-        (*A3[2]) *= g3c;
-
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-
-        ///////////////////////////////////////////////////////////////////////
-        // we store the densities here if we compute them
-        //======================================================================
-
-        // Testing
-        const std::string testing = the_config.GetValueSafe<std::string>("testing", "test", "none");
-
-        if(testing != "none") {
-            csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl;
-            if(testing == "potentials_and_densities") {
-                testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3);
-            } else if(testing == "velocity_displacement_symmetries") {
-                testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
-            } else if(testing == "convergence") {
-                testing::output_convergence(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
-            } else {
-                csoca::flog << "unknown test '" << testing << "'" << std::endl;
-                std::abort();
-        }
-        } else {
+        {
             // temporary storage of data
             Grid_FFT<real_t> tmp({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
 
+            std::unique_ptr<particle::lattice_generator<Grid_FFT<real_t>>> particle_lattice_generator_ptr;
+
+            // if output plugin wants particles, then we need to store them, along with their IDs
+            if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
+            {
+                // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits
+                size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ;
+
+                // allocate particle structure and generate particle IDs
+                particle_lattice_generator_ptr = 
+                std::make_unique<particle::lattice_generator<Grid_FFT<real_t>>>( lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config );
+            }
+
 
             //if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_eulerian ){
             if( the_output_plugin->write_species_as(this_species) == output_type::field_eulerian )
@@ -362,7 +482,7 @@ int Run( ConfigFile& the_config )
                 real_t std_phi1 = phi.std();
 
                 const real_t hbar = 2.0 * M_PI/ngrid * (2*std_phi1/Dplus0); //3sigma, but this might rather depend on gradients of phi...
-                csoca::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl;
+                music::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl;
                 
                 if( LPTorder == 1 ){
                     psi.assign_function_of_grids_r([hbar,Dplus0]( real_t pphi ){
@@ -435,14 +555,21 @@ int Run( ConfigFile& the_config )
                 //===================================================================================
                 // we store displacements and velocities here if we compute them
                 //===================================================================================
-                particle::container particles;
+                
+
+                bool shifted_lattice = (this_species == cosmo_species::baryon &&
+                                        the_output_plugin->write_species_as(this_species) == output_type::particles) ? true : false;
+
+                
+
+                grid_interpolate<1,Grid_FFT<real_t>> interp( tmp );
 
                 // if output plugin wants particles, then we need to store them, along with their IDs
-                if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
-                {
-                    // allocate particle structure and generate particle IDs
-                    particle::initialize_lattice( particles, lattice_type, tmp );
-                }
+                // if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
+                // {
+                //     // allocate particle structure and generate particle IDs
+                //     particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config );
+                // }
             
                 // write out positions
                 for( int idim=0; idim<3; ++idim ){
@@ -459,17 +586,37 @@ int Run( ConfigFile& the_config )
                                 size_t idx = phi.get_idx(i,j,k);
                                 auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx);
                                 // divide by Lbox, because displacement is in box units for output plugin
-                                tmp.kelem(idx) = lunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot 
-                                    + phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) );
+                                tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot 
+                                    + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) );
+
+                                if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){
+                                    tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k<real_t>(i,j,k) );
+                                }
+
+                                if( bDoBaryons ){
+                                    vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
+                                    real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
+                                    // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) :
+                                    //  (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : 
+                                    // //   the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total);
+                                    //  the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1);
+
+                                    real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) 
+                                        : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : 
+                                           the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total)) * (-g1);
+
+                                    tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen;
+                                }
                             }
                         }
                     }
+                    tmp.zero_DC_mode();
                     tmp.FourierTransformBackward();
 
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_positions( particles, lattice_type, idim, lunit, tmp );
+                        particle_lattice_generator_ptr->set_positions( lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp, the_config );
                     } 
                     // otherwise write out the grid data directly to the output plugin
                     // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian )
@@ -496,8 +643,29 @@ int Run( ConfigFile& the_config )
                                 // divide by Lbox, because displacement is in box units for output plugin
                                 auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx));
 
-                                tmp.kelem(idx) = vunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot_v 
-                                        + vfac3 * (phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) );
+                                tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v 
+                                        + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) );
+
+                                if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){
+                                    tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k<real_t>(i,j,k) );
+                                }
+
+                                if( bDoBaryons ){
+                                    vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
+                                    real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
+                                    // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm0) :
+                                    //  (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon0) : 
+                                    //      the_cosmo_calc->get_amplitude(kmod, vtotal0)) - the_cosmo_calc->get_amplitude(kmod, vtotal0);
+                                    // // the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1);
+                                    real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm) 
+                                        : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon) : 
+                                           the_cosmo_calc->get_amplitude(kmod, vtotal)) - the_cosmo_calc->get_amplitude(kmod, vtotal)) * (-g1);
+                                    tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ;
+                                }
+
+                                // correct velocity with PLT mode growth rate
+                                tmp.kelem(idx) *= lg.vfac_corr(tmp.get_k3(i,j,k));
+
 
                                 if( bAddExternalTides ){
                                     // modify velocities with anisotropic expansion factor**2
@@ -510,12 +678,13 @@ int Run( ConfigFile& the_config )
                             }
                         }
                     }
+                    tmp.zero_DC_mode();
                     tmp.FourierTransformBackward();
 
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_velocities( particles, lattice_type, idim, tmp );
+                        particle_lattice_generator_ptr->set_velocities( lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp, the_config );
                     }
                     // otherwise write out the grid data directly to the output plugin
                     else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )
@@ -527,7 +696,7 @@ int Run( ConfigFile& the_config )
 
                 if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                 {
-                    the_output_plugin->write_particle_data( particles, this_species );
+                    the_output_plugin->write_particle_data( particle_lattice_generator_ptr->get_particles(), this_species, Omega[this_species] );
                 }
                 
                 if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )
diff --git a/src/logger.cc b/src/logger.cc
index 2b93b89..26c34a5 100644
--- a/src/logger.cc
+++ b/src/logger.cc
@@ -1,19 +1,19 @@
 #include <logger.hh>
 
-namespace csoca {
+namespace music {
 
-std::ofstream Logger::output_file_;
-LogLevel Logger::log_level_ = LogLevel::Off;
+std::ofstream logger::output_file_;
+log_level logger::log_level_ = log_level::off;
 
-void Logger::SetLevel(const LogLevel &level) {
+void logger::set_level(const log_level &level) {
   log_level_ = level;
 }
 
-LogLevel Logger::GetLevel() {
+log_level logger::get_level() {
   return log_level_;
 }
 
-void Logger::SetOutput(const std::string filename) {
+void logger::set_output(const std::string filename) {
   if (output_file_.is_open()) {
     output_file_.close();
   }
@@ -21,22 +21,22 @@ void Logger::SetOutput(const std::string filename) {
   assert(output_file_.is_open());
 }
 
-void Logger::UnsetOutput() {
+void logger::unset_output() {
   if (output_file_.is_open()) {
     output_file_.close();
   }
 }
 
-std::ofstream &Logger::GetOutput() {
+std::ofstream &logger::get_output() {
   return output_file_;
 }
 
 // global instantiations for different levels
-Logger glogger;
-LogStream flog(glogger, LogLevel::Fatal);
-LogStream elog(glogger, LogLevel::Error);
-LogStream wlog(glogger, LogLevel::Warning);
-LogStream ilog(glogger, LogLevel::Info);
-LogStream dlog(glogger, LogLevel::Debug);
+logger the_logger;
+log_stream flog(the_logger, log_level::fatal);
+log_stream elog(the_logger, log_level::error);
+log_stream wlog(the_logger, log_level::warning);
+log_stream ilog(the_logger, log_level::info);
+log_stream dlog(the_logger, log_level::debug);
 
-} // namespace csoca
+} // namespace music
diff --git a/src/main.cc b/src/main.cc
index 72e9a38..c609a4a 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -3,6 +3,7 @@
 #include <iostream>
 #include <fstream>
 #include <thread>
+#include <cfenv>
 
 #if defined(_OPENMP)
 #include <omp.h>
@@ -10,6 +11,7 @@
 
 #include <general.hh>
 #include <ic_generator.hh>
+#include <particle_plt.hh>
 
 
 // initialise with "default" values
@@ -26,10 +28,28 @@ int  num_threads = 1;
 
 #include "system_stat.hh"
 
+#include <exception>
+#include <stdexcept>
+ 
+void handle_eptr(std::exception_ptr eptr) // passing by value is ok
+{
+    try {
+        if (eptr) {
+            std::rethrow_exception(eptr);
+        }
+    } catch(const std::exception& e) {
+        music::elog << "This happened: \"" << e.what() << "\"" << std::endl;
+    }
+}
+
 int main( int argc, char** argv )
 {
-    csoca::Logger::SetLevel(csoca::LogLevel::Info);
-    // csoca::Logger::SetLevel(csoca::LogLevel::Debug);
+
+#if defined(NDEBUG)
+    music::logger::set_level(music::log_level::info);
+#else
+    music::logger::set_level(music::log_level::debug);
+#endif
 
     //------------------------------------------------------------------------------
     // initialise MPI 
@@ -45,20 +65,39 @@ int main( int argc, char** argv )
     // set up lower logging levels for other tasks
     if( CONFIG::MPI_task_rank!=0 )
     {
-        csoca::Logger::SetLevel(csoca::LogLevel::Error);
+        music::logger::set_level(music::log_level::error);
     }
 #endif
 
-    csoca::ilog << "\n"
-                << " unigrid MUSIC                          .8888b                   dP  a88888b. \n"
+    // Ascii ART logo. generated via http://patorjk.com/software/taag/#p=display&f=Nancyj&t=monofonIC
+    music::ilog << "\n"
+                << " The unigrid version of MUSIC-2         .8888b                   dP  a88888b. \n"
                 << "                                        88   \"                   88 d8\'   `88 \n"
                 << "  88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa  .d8888b. 88d888b. 88 88        \n"
                 << "  88\'`88\'`88 88\'  `88 88\'  `88 88\'  `88 88     88\'  `88 88\'  `88 88 88        \n"
                 << "  88  88  88 88.  .88 88    88 88.  .88 88     88.  .88 88    88 88 Y8.   .88 \n"
-                << "  dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl
-                << "version  : v0.1a, git rev. : " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl
-                << "-------------------------------------------------------------------------------" << std::endl;
+                << "  dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl;
+
+    // git and versioning info:
+    music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl;
     
+    // Compilation CMake configuration, time etc info:
+    music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;
+
+#ifdef __GNUC__
+    music::ilog << "Compiled with GNU C++ version " << __VERSION__ <<std::endl;
+#else
+    music::ilog << "Compiled with " << __VERSION__ << std::endl;
+#endif
+
+    
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "Compile time options : " << std::endl;
+    music::ilog << "                       Precision : " << CMAKE_PRECISION_STR << std::endl;
+    music::ilog << "                    Convolutions : " << CMAKE_CONVOLVER_STR << std::endl;
+    music::ilog << "                             PLT : " << CMAKE_PLT_STR << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+
 
     //------------------------------------------------------------------------------
     // Parse command line options
@@ -71,12 +110,12 @@ int main( int argc, char** argv )
         print_RNG_plugins();
         print_output_plugins();
 
-        csoca::elog << "In order to run, you need to specify a parameter file!" << std::endl;
+        music::elog << "In order to run, you need to specify a parameter file!\n" << std::endl;
         exit(0);
     }
 
     // open the configuration file 
-    ConfigFile the_config(argv[1]);
+    config_file the_config(argv[1]);
 
     //------------------------------------------------------------------------------
     // Set up FFTW
@@ -95,7 +134,7 @@ int main( int argc, char** argv )
     FFTW_API(mpi_init)();
 #endif
 
-    CONFIG::num_threads = the_config.GetValueSafe<unsigned>("execution", "NumThreads",std::thread::hardware_concurrency());
+    CONFIG::num_threads = the_config.get_value_safe<unsigned>("execution", "NumThreads",std::thread::hardware_concurrency());
     
 #if defined(USE_FFTW_THREADS)
     if (CONFIG::FFTW_threads_ok)
@@ -110,14 +149,16 @@ int main( int argc, char** argv )
     omp_set_num_threads(CONFIG::num_threads);
 #endif
 
+    // std::feclearexcept(FE_ALL_EXCEPT);
+
     //------------------------------------------------------------------------------
     // Write code configuration to screen
     //------------------------------------------------------------------------------
     // hardware related infos
-    csoca::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl;
+    music::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl;
     
     // multi-threading related infos
-    csoca::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl;
 
     // memory related infos
     SystemStat::Memory mem;
@@ -134,34 +175,34 @@ int main( int argc, char** argv )
     MPI_Allreduce(&minupmem,&temp,1,MPI_UNSIGNED,MPI_MIN,MPI_COMM_WORLD); minupmem = temp;
     MPI_Allreduce(&maxupmem,&temp,1,MPI_UNSIGNED,MPI_MAX,MPI_COMM_WORLD); maxupmem = temp;
 #endif
-    csoca::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " <<  "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " <<  "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl;
     
     // MPI related infos
 #if defined(USE_MPI)
-    csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl;
-    csoca::dlog << std::setw(32) << std::left << "MPI version" << " : " << GetMPIversion() << std::endl;
+    music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl;
+    music::dlog << std::setw(32) << std::left << "MPI version" << " : " << MPI::get_version() << std::endl;
 #else
-    csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl;
+    music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl;
 #endif
-    csoca::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl;
+    music::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl;
     
     // Kernel related infos
     SystemStat::Kernel kern;
     auto kinfo = kern.get_kernel_info();
-    csoca::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl;
+    music::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl;
 
     // FFTW related infos
-    csoca::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "FFTW mode" << " : ";
+    music::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl;
+    music::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl;
+    music::ilog << std::setw(32) << std::left << "FFTW mode" << " : ";
 #if defined(FFTW_MODE_PATIENT)
-	csoca::ilog << "FFTW_PATIENT" << std::endl;
+	music::ilog << "FFTW_PATIENT" << std::endl;
 #elif defined(FFTW_MODE_MEASURE)
-    csoca::ilog << "FFTW_MEASURE" << std::endl;
+    music::ilog << "FFTW_MEASURE" << std::endl;
 #else
-	csoca::ilog << "FFTW_ESTIMATE" << std::endl;
+	music::ilog << "FFTW_ESTIMATE" << std::endl;
 #endif
     //--------------------------------------------------------------------
     // Initialise plug-ins
@@ -170,7 +211,8 @@ int main( int argc, char** argv )
     {
         ic_generator::Initialise( the_config );
     }catch(...){
-        csoca::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl;
+        handle_eptr( std::current_exception() );
+        music::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl;
         #if defined(USE_MPI) 
         MPI_Finalize();
         #endif
@@ -181,6 +223,8 @@ int main( int argc, char** argv )
     // do the job...
     ///////////////////////////////////////////////////////////////////////
     ic_generator::Run( the_config );
+
+    // particle::test_plt();
     ///////////////////////////////////////////////////////////////////////
 
 #if defined(USE_MPI)
@@ -188,8 +232,8 @@ int main( int argc, char** argv )
     MPI_Finalize();
 #endif
 
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    csoca::ilog << "Done." << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "Done. Have a nice day!\n" << std::endl;
 
     return 0;
 }
diff --git a/src/output_plugin.cc b/src/output_plugin.cc
index 763336e..d0a7c5d 100644
--- a/src/output_plugin.cc
+++ b/src/output_plugin.cc
@@ -23,31 +23,32 @@ void print_output_plugins()
 	
 	std::map< std::string, output_plugin_creator *>::iterator it;
 	it = m.begin();
-	csoca::ilog << "Available output plug-ins:\n";
+	music::ilog << "Available output plug-ins:\n";
 	while( it!=m.end() )
 	{
 		if( it->second )
-			csoca::ilog << "\t\'" << it->first << "\'\n";
+			music::ilog << "\t\'" << it->first << "\'\n";
 		++it;
 	}
+	music::ilog << std::endl;
 }
 
-std::unique_ptr<output_plugin> select_output_plugin( ConfigFile& cf )
+std::unique_ptr<output_plugin> select_output_plugin( config_file& cf )
 {
-	std::string formatname = cf.GetValue<std::string>( "output", "format" );
+	std::string formatname = cf.get_value<std::string>( "output", "format" );
 	
 	output_plugin_creator *the_output_plugin_creator 
 	= get_output_plugin_map()[ formatname ];
 	
 	if( !the_output_plugin_creator )
 	{	
-		csoca::elog << "Error: output plug-in \'" << formatname << "\' not found." << std::endl;
+		music::elog << "Output plug-in \'" << formatname << "\' not found." << std::endl;
 		print_output_plugins();
 		throw std::runtime_error("Unknown output plug-in");
 		
 	}else{
-		csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl;
+		music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+		music::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl;
 	}
 	
 	return std::move(the_output_plugin_creator->create( cf ));
diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc
new file mode 100644
index 0000000..1674604
--- /dev/null
+++ b/src/plugins/output_arepo.cc
@@ -0,0 +1,241 @@
+
+#ifdef USE_HDF5
+#include <unistd.h> // for unlink
+#include <output_plugin.hh>
+#include "HDF_IO.hh"
+
+template <typename T>
+std::vector<T> from_6array(const T *a)
+{
+  return std::vector<T>{{a[0], a[1], a[2], a[3], a[4], a[5]}};
+}
+
+template <typename T>
+std::vector<T> from_value(const T a)
+{
+  return std::vector<T>{{a}};
+}
+
+template <typename write_real_t>
+class gadget_hdf5_output_plugin : public output_plugin
+{
+  struct header_t
+  {
+    unsigned npart[6];
+    double mass[6];
+    double time;
+    double redshift;
+    int flag_sfr;
+    int flag_feedback;
+    unsigned int npartTotal[6];
+    int flag_cooling;
+    int num_files;
+    double BoxSize;
+    double Omega0;
+    double OmegaLambda;
+    double HubbleParam;
+    int flag_stellarage;
+    int flag_metals;
+    unsigned int npartTotalHighWord[6];
+    int flag_entropy_instead_u;
+    int flag_doubleprecision;
+  };
+
+protected:
+  int num_files_, num_simultaneous_writers_;
+  header_t header_;
+  real_t lunit_, vunit_;
+  bool blongids_;
+  std::string this_fname_;
+  double Tini_;
+  unsigned pmgrid_;
+  unsigned gridboost_;
+  int doublePrec_;
+  int doBaryons_;
+  double softening_;
+
+public:
+  //! constructor
+  explicit gadget_hdf5_output_plugin(config_file &cf)
+      : output_plugin(cf, "GADGET-HDF5")
+  {
+    num_files_ = 1;
+#ifdef USE_MPI
+    // use as many output files as we have MPI tasks
+    MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
+#endif
+    real_t astart = 1.0 / (1.0 + cf_.get_value<double>("setup", "zstart"));
+    lunit_ = cf_.get_value<double>("setup", "BoxLength");
+    vunit_ = lunit_ / std::sqrt(astart);
+    blongids_ = cf_.get_value_safe<bool>("output", "UseLongids", false);
+    num_simultaneous_writers_ = cf_.get_value_safe<int>("output", "NumSimWriters", num_files_);
+
+    for (int i = 0; i < 6; ++i)
+    {
+      header_.npart[i] = 0;
+      header_.npartTotal[i] = 0;
+      header_.npartTotalHighWord[i] = 0;
+      header_.mass[i] = 0.0;
+    }
+
+    header_.time = astart;
+    header_.redshift = 1.0 / astart - 1.0;
+    header_.flag_sfr = 0;
+    header_.flag_feedback = 0;
+    header_.flag_cooling = 0;
+    header_.num_files = num_files_;
+    header_.BoxSize = lunit_;
+    header_.Omega0 = cf_.get_value<double>("cosmology", "Omega_m");
+    header_.OmegaLambda = cf_.get_value<double>("cosmology", "Omega_L");
+    header_.HubbleParam = cf_.get_value<double>("cosmology", "H0") / 100.0;
+    header_.flag_stellarage = 0;
+    header_.flag_metals = 0;
+    header_.flag_entropy_instead_u = 0;
+    header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false;
+
+    // initial gas temperature
+    double Tcmb0 = 2.726;
+    double Omegab = cf_.get_value<double>("cosmology", "Omega_b");
+    double h = cf_.get_value<double>("cosmology", "H0") / 100.0, h2 = h*h;
+    double adec = 1.0 / (160.0 * pow(Omegab * h2 / 0.022, 2.0 / 5.0));
+    Tini_ = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec;
+
+    // suggested PM res
+    pmgrid_ = 2*cf_.get_value<double>("setup", "GridRes");
+    gridboost_ = 1;
+    softening_ = cf_.get_value<double>("setup", "BoxLength")/pmgrid_/20;
+    doBaryons_ = cf_.get_value<bool>("setup", "DoBaryons");
+#if !defined(USE_SINGLEPRECISION)
+    doublePrec_ = 1;
+#else
+    doublePrec_ = 0;
+#endif
+
+    this_fname_ = fname_;
+#ifdef USE_MPI
+    int thisrank = 0;
+    MPI_Comm_rank(MPI_COMM_WORLD, &thisrank);
+    if (num_files_ > 1)
+      this_fname_ += "." + std::to_string(thisrank);
+#endif
+
+    unlink(this_fname_.c_str());
+    HDFCreateFile(this_fname_);
+  }
+
+  // use destructor to write header post factum
+  ~gadget_hdf5_output_plugin()
+  {
+    HDFCreateGroup(this_fname_, "Header");
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array<unsigned>(header_.npart));
+    HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array<double>(header_.mass));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value<double>(header_.time));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value<double>(header_.redshift));
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array<unsigned>(header_.npartTotal));
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array<unsigned>(header_.npartTotalHighWord));
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value<int>(header_.num_files));
+    HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value<double>(header_.BoxSize));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value<double>(header_.Omega0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value<double>(header_.OmegaLambda));
+    HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value<double>(header_.HubbleParam));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_DoublePrecision", (int)doublePrec_);
+    // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmin", levelmin_);
+    // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmax", levelmax_);
+    // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelcounts", levelcounts);
+    HDFWriteGroupAttribute(this_fname_, "Header", "haveBaryons", from_value<int>((int)doBaryons_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "longIDs", from_value<int>((int)blongids_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_pmgrid", from_value<int>(pmgrid_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gridboost", from_value<int>(gridboost_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_highressoft", from_value<double>(softening_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gas_Tinit", from_value<double>(Tini_));
+
+    music::ilog << "Wrote" << std::endl;
+  }
+
+  output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
+
+  real_t position_unit() const { return lunit_; }
+
+  real_t velocity_unit() const { return vunit_; }
+
+  bool has_64bit_reals() const
+  {
+    if (typeid(write_real_t) == typeid(double))
+      return true;
+    return false;
+  }
+
+  bool has_64bit_ids() const
+  {
+    if (blongids_)
+      return true;
+    return false;
+  }
+
+  int get_species_idx(const cosmo_species &s) const
+  {
+    switch (s)
+    {
+    case cosmo_species::dm:
+      return 1;
+    case cosmo_species::baryon:
+      return 0;
+    case cosmo_species::neutrino:
+      return 3;
+    }
+    return -1;
+  }
+
+  void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species)
+  {
+    int sid = get_species_idx(s);
+
+    assert(sid != -1);
+
+    header_.npart[sid] = (pc.get_local_num_particles());
+    header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles());
+    header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32);
+
+    double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
+    double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3);
+    header_.mass[sid] = boxmass / pc.get_global_num_particles();
+
+    HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid));
+
+    //... write positions and velocities.....
+    if (this->has_64bit_reals())
+    {
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_);
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_);
+    }
+    else
+    {
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_);
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_);
+    }
+
+    //... write ids.....
+    if (this->has_64bit_ids())
+      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_);
+    else
+      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_);
+
+    // std::cout << ">>>A> " << header_.npart[sid] << std::endl;
+  }
+};
+
+namespace
+{
+#if !defined(USE_SINGLEPRECISION)
+output_plugin_creator_concrete<gadget_hdf5_output_plugin<double>> creator1("AREPO");
+#else
+output_plugin_creator_concrete<gadget_hdf5_output_plugin<float>> creator1("AREPO");
+#endif
+} // namespace
+
+#endif
\ No newline at end of file
diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc
index fddb734..0a3afbb 100644
--- a/src/plugins/output_gadget2.cc
+++ b/src/plugins/output_gadget2.cc
@@ -3,6 +3,7 @@
 
 constexpr int empty_fill_bytes{56};
 
+template <typename write_real_t>
 class gadget2_output_plugin : public output_plugin
 {
 public:
@@ -33,32 +34,48 @@ protected:
 	int num_files_;
 	header this_header_;
 	real_t lunit_, vunit_;
+	bool blongids_;
 
 public:
 	//! constructor
-	explicit gadget2_output_plugin(ConfigFile &cf )
-	: output_plugin(cf, "GADGET-2")
+	explicit gadget2_output_plugin(config_file &cf)
+			: output_plugin(cf, "GADGET-2")
 	{
 		num_files_ = 1;
 #ifdef USE_MPI
 		// use as many output files as we have MPI tasks
 		MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
 #endif
-		real_t astart = 1.0/(1.0+cf_.GetValue<double>("setup", "zstart"));
-		lunit_ = cf_.GetValue<double>("setup", "BoxLength");
+		real_t astart = 1.0 / (1.0 + cf_.get_value<double>("setup", "zstart"));
+		lunit_ = cf_.get_value<double>("setup", "BoxLength");
 		vunit_ = lunit_ / std::sqrt(astart);
+		blongids_ = cf_.get_value_safe<bool>("output", "UseLongids", false);
 	}
 
-    output_type write_species_as( const cosmo_species & ) const { return output_type::particles; }
+	output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
 
 	real_t position_unit() const { return lunit_; }
 
 	real_t velocity_unit() const { return vunit_; }
 
-	void write_particle_data(const particle::container &pc, const cosmo_species &s )
+	bool has_64bit_reals() const
 	{
-			// fill the Gadget-2 header
-		memset(reinterpret_cast<void*>(&this_header_),0,sizeof(header));
+		if (typeid(write_real_t) == typeid(double))
+			return true;
+		return false;
+	}
+
+	bool has_64bit_ids() const
+	{
+		if (blongids_)
+			return true;
+		return false;
+	}
+
+	void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species)
+	{
+		// fill the Gadget-2 header
+		memset(reinterpret_cast<void *>(&this_header_), 0, sizeof(header));
 
 		for (int i = 0; i < 6; ++i)
 		{
@@ -73,7 +90,7 @@ public:
 
 		/////
 		//... set time ......................................................
-		this_header_.redshift = cf_.GetValue<double>("setup", "zstart");
+		this_header_.redshift = cf_.get_value<double>("setup", "zstart");
 		this_header_.time = 1.0 / (1.0 + this_header_.redshift);
 
 		//... SF flags
@@ -83,10 +100,10 @@ public:
 
 		//...
 		this_header_.num_files = num_files_; //1;
-		this_header_.BoxSize = cf_.GetValue<double>("setup", "BoxLength");
-		this_header_.Omega0 = cf_.GetValue<double>("cosmology", "Omega_m");
-		this_header_.OmegaLambda = cf_.GetValue<double>("cosmology", "Omega_L");
-		this_header_.HubbleParam = cf_.GetValue<double>("cosmology", "H0") / 100.0;
+		this_header_.BoxSize = cf_.get_value<double>("setup", "BoxLength");
+		this_header_.Omega0 = cf_.get_value<double>("cosmology", "Omega_m");
+		this_header_.OmegaLambda = cf_.get_value<double>("cosmology", "Omega_L");
+		this_header_.HubbleParam = cf_.get_value<double>("cosmology", "H0") / 100.0;
 
 		this_header_.flag_stellarage = 0;
 		this_header_.flag_metals = 0;
@@ -100,50 +117,73 @@ public:
 
 		//... set masses
 		double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
-		double boxmass = this_header_.Omega0 * rhoc * std::pow(this_header_.BoxSize,3);
+		double boxmass = Omega_species * rhoc * std::pow(this_header_.BoxSize, 3);
 		this_header_.mass[1] = boxmass / pc.get_global_num_particles();
-	
+
 		std::string fname = fname_;
 		int thisrank = 0;
-		
+
 #ifdef USE_MPI
-		MPI_Comm_rank(MPI_COMM_WORLD,&thisrank);
-		if( num_files_ > 1 )
+		MPI_Comm_rank(MPI_COMM_WORLD, &thisrank);
+		if (num_files_ > 1)
 			fname += "." + std::to_string(thisrank);
 #endif
 		uint32_t blocksz;
 		std::ofstream ofs(fname.c_str(), std::ios::binary);
 
-		csoca::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl;
+		music::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl;
 
 		blocksz = sizeof(header);
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		ofs.write( reinterpret_cast<char*>(&this_header_), sizeof(header) );
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		
-		blocksz = 3 * sizeof(float) * pc.get_local_num_particles();
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		ofs.write( reinterpret_cast<const char*>(pc.get_pos_ptr()), blocksz );
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		ofs.write( reinterpret_cast<const char*>(pc.get_vel_ptr()), blocksz );
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		
-		blocksz = sizeof(float) * pc.get_local_num_particles();
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		ofs.write( reinterpret_cast<const char*>(pc.get_ids_ptr()), blocksz );
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		
+		ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+		ofs.write(reinterpret_cast<char *>(&this_header_), sizeof(header));
+		ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+
+		// we write double precision
+		if (this->has_64bit_reals())
+		{
+			blocksz = 3 * sizeof(double) * pc.get_local_num_particles();
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_pos64_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_vel64_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+		}
+		else
+		{
+			blocksz = 3 * sizeof(float) * pc.get_local_num_particles();
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_pos32_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_vel32_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+		}
+
+		// we write long IDs
+		if (this->has_64bit_ids())
+		{
+			blocksz = sizeof(uint64_t) * pc.get_local_num_particles();
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_ids64_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+		}
+		else
+		{
+			blocksz = sizeof(uint32_t) * pc.get_local_num_particles();
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_ids32_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+		}
 	}
 };
 
-
 namespace
 {
-   output_plugin_creator_concrete<gadget2_output_plugin> creator1("gadget2"); 
-// output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2");
-// #ifndef SINGLE_PRECISION
-// output_plugin_creator_concrete<gadget2_output_plugin<double>> creator2("gadget2_double");
-// #endif
+output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2");
+#if !defined(USE_SINGLEPRECISION)
+output_plugin_creator_concrete<gadget2_output_plugin<double>> creator3("gadget2_double");
+#endif
 } // namespace
diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc
new file mode 100644
index 0000000..3908e64
--- /dev/null
+++ b/src/plugins/output_gadget_hdf5.cc
@@ -0,0 +1,210 @@
+
+#ifdef USE_HDF5
+#include <unistd.h> // for unlink
+#include <output_plugin.hh>
+#include "HDF_IO.hh"
+
+template <typename T>
+std::vector<T> from_6array(const T *a)
+{
+  return std::vector<T>{{a[0], a[1], a[2], a[3], a[4], a[5]}};
+}
+
+template <typename T>
+std::vector<T> from_value(const T a)
+{
+  return std::vector<T>{{a}};
+}
+
+template <typename write_real_t>
+class gadget_hdf5_output_plugin : public output_plugin
+{
+  struct header_t
+  {
+    unsigned npart[6];
+    double mass[6];
+    double time;
+    double redshift;
+    int flag_sfr;
+    int flag_feedback;
+    unsigned int npartTotal[6];
+    int flag_cooling;
+    int num_files;
+    double BoxSize;
+    double Omega0;
+    double OmegaLambda;
+    double HubbleParam;
+    int flag_stellarage;
+    int flag_metals;
+    unsigned int npartTotalHighWord[6];
+    int flag_entropy_instead_u;
+    int flag_doubleprecision;
+  };
+
+protected:
+  int num_files_, num_simultaneous_writers_;
+  header_t header_;
+  real_t lunit_, vunit_;
+  bool blongids_;
+  std::string this_fname_;
+
+public:
+  //! constructor
+  explicit gadget_hdf5_output_plugin(config_file &cf)
+      : output_plugin(cf, "GADGET-HDF5")
+  {
+    num_files_ = 1;
+#ifdef USE_MPI
+    // use as many output files as we have MPI tasks
+    MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
+#endif
+    real_t astart = 1.0 / (1.0 + cf_.get_value<double>("setup", "zstart"));
+    lunit_ = cf_.get_value<double>("setup", "BoxLength");
+    vunit_ = lunit_ / std::sqrt(astart);
+    blongids_ = cf_.get_value_safe<bool>("output", "UseLongids", false);
+    num_simultaneous_writers_ = cf_.get_value_safe<int>("output", "NumSimWriters", num_files_);
+
+    for (int i = 0; i < 6; ++i)
+    {
+      header_.npart[i] = 0;
+      header_.npartTotal[i] = 0;
+      header_.npartTotalHighWord[i] = 0;
+      header_.mass[i] = 0.0;
+    }
+
+    header_.time = astart;
+    header_.redshift = 1.0 / astart - 1.0;
+    header_.flag_sfr = 0;
+    header_.flag_feedback = 0;
+    header_.flag_cooling = 0;
+    header_.num_files = num_files_;
+    header_.BoxSize = lunit_;
+    header_.Omega0 = cf_.get_value<double>("cosmology", "Omega_m");
+    header_.OmegaLambda = cf_.get_value<double>("cosmology", "Omega_L");
+    header_.HubbleParam = cf_.get_value<double>("cosmology", "H0") / 100.0;
+    header_.flag_stellarage = 0;
+    header_.flag_metals = 0;
+    header_.flag_entropy_instead_u = 0;
+    header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false;
+
+    this_fname_ = fname_;
+#ifdef USE_MPI
+    int thisrank = 0;
+    MPI_Comm_rank(MPI_COMM_WORLD, &thisrank);
+    if (num_files_ > 1)
+      this_fname_ += "." + std::to_string(thisrank);
+#endif
+
+    unlink(this_fname_.c_str());
+    HDFCreateFile(this_fname_);
+  }
+
+  // use destructor to write header post factum
+  ~gadget_hdf5_output_plugin()
+  {
+    if (!std::uncaught_exception()) 
+    {   
+      HDFCreateGroup(this_fname_, "Header");
+      HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array<unsigned>(header_.npart));
+      HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array<double>(header_.mass));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value<double>(header_.time));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value<double>(header_.redshift));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value<int>(header_.flag_sfr));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value<int>(header_.flag_feedback));
+      HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array<unsigned>(header_.npartTotal));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value<int>(header_.flag_cooling));
+      HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value<int>(header_.num_files));
+      HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value<double>(header_.BoxSize));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value<double>(header_.Omega0));
+      HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value<double>(header_.OmegaLambda));
+      HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value<double>(header_.HubbleParam));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value<int>(header_.flag_stellarage));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value<int>(header_.flag_metals));
+      HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array<unsigned>(header_.npartTotalHighWord));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value<int>(header_.flag_entropy_instead_u));
+
+      music::ilog << "Wrote Gadget-HDF5 file(s) to " << this_fname_ << std::endl;
+    }
+  }
+
+  output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
+
+  real_t position_unit() const { return lunit_; }
+
+  real_t velocity_unit() const { return vunit_; }
+
+  bool has_64bit_reals() const
+  {
+    if (typeid(write_real_t) == typeid(double))
+      return true;
+    return false;
+  }
+
+  bool has_64bit_ids() const
+  {
+    if (blongids_)
+      return true;
+    return false;
+  }
+
+  int get_species_idx(const cosmo_species &s) const
+  {
+    switch (s)
+    {
+    case cosmo_species::dm:
+      return 1;
+    case cosmo_species::baryon:
+      return 0;
+    case cosmo_species::neutrino:
+      return 3;
+    }
+    return -1;
+  }
+
+  void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species)
+  {
+    int sid = get_species_idx(s);
+
+    assert(sid != -1);
+
+    header_.npart[sid] = (pc.get_local_num_particles());
+    header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles());
+    header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32);
+
+    double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
+    double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3);
+    header_.mass[sid] = boxmass / pc.get_global_num_particles();
+
+    HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid));
+
+    //... write positions and velocities.....
+    if (this->has_64bit_reals())
+    {
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_);
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_);
+    }
+    else
+    {
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_);
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_);
+    }
+
+    //... write ids.....
+    if (this->has_64bit_ids())
+      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_);
+    else
+      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_);
+
+    // std::cout << ">>>A> " << header_.npart[sid] << std::endl;
+  }
+};
+
+namespace
+{
+output_plugin_creator_concrete<gadget_hdf5_output_plugin<float>> creator1("gadget_hdf5");
+#if !defined(USE_SINGLEPRECISION)
+output_plugin_creator_concrete<gadget_hdf5_output_plugin<double>> creator3("gadget_hdf5_double");
+#endif
+} // namespace
+
+#endif
\ No newline at end of file
diff --git a/src/plugins/output_generic.cc b/src/plugins/output_generic.cc
index 10eacfc..79c2139 100644
--- a/src/plugins/output_generic.cc
+++ b/src/plugins/output_generic.cc
@@ -21,13 +21,13 @@ protected:
 	bool out_eulerian_;
 public:
 	//! constructor
-	explicit generic_output_plugin(ConfigFile &cf )
+	explicit generic_output_plugin(config_file &cf )
 	: output_plugin(cf, "Generic HDF5")
 	{
-		real_t astart   = 1.0/(1.0+cf_.GetValue<double>("setup", "zstart"));
-		real_t boxsize  = cf_.GetValue<double>("setup", "BoxLength");
+		real_t astart   = 1.0/(1.0+cf_.get_value<double>("setup", "zstart"));
+		real_t boxsize  = cf_.get_value<double>("setup", "BoxLength");
 
-		out_eulerian_   = cf_.GetValueSafe<bool>("output", "generic_out_eulerian",false);
+		out_eulerian_   = cf_.get_value_safe<bool>("output", "generic_out_eulerian",false);
 
 		if( CONFIG::MPI_task_rank == 0 )
 		{
@@ -50,6 +50,10 @@ public:
 		return output_type::field_lagrangian;
 	}
 
+	bool has_64bit_reals() const{ return true; }
+
+	bool has_64bit_ids() const{ return true; }
+
 	real_t position_unit() const { return 1.0; }
 	
 	real_t velocity_unit() const { return 1.0; }
@@ -95,7 +99,7 @@ void generic_output_plugin::write_grid_data(const Grid_FFT<real_t> &g, const cos
 {
 	std::string field_name = this->get_field_name( s, c );
 	g.Write_to_HDF5(fname_, field_name);
-	csoca::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl;
+	music::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl;
 }
 
 namespace
diff --git a/src/plugins/output_grafic2.cc b/src/plugins/output_grafic2.cc
index b511dd9..b3f3f04 100644
--- a/src/plugins/output_grafic2.cc
+++ b/src/plugins/output_grafic2.cc
@@ -40,31 +40,31 @@ protected:
 
 public:
     //! constructor
-    explicit grafic2_output_plugin(ConfigFile &cf)
+    explicit grafic2_output_plugin(config_file &cf)
         : output_plugin(cf, "GRAFIC2/RAMSES")
     {
         lunit_ = 1.0;
         vunit_ = 1.0;
 
         double
-            boxlength = cf_.GetValue<double>("setup", "BoxLength"),
-            H0 = cf_.GetValue<double>("cosmology", "H0"),
-            zstart = cf_.GetValue<double>("setup", "zstart"),
+            boxlength = cf_.get_value<double>("setup", "BoxLength"),
+            H0 = cf_.get_value<double>("cosmology", "H0"),
+            zstart = cf_.get_value<double>("setup", "zstart"),
             astart = 1.0 / (1.0 + zstart),
-            omegam = cf_.GetValue<double>("cosmology", "Omega_m"),
-            omegaL = cf_.GetValue<double>("cosmology", "Omega_L");
-        uint32_t ngrid = cf_.GetValue<int>("setup", "GridRes");
+            omegam = cf_.get_value<double>("cosmology", "Omega_m"),
+            omegaL = cf_.get_value<double>("cosmology", "Omega_L");
+        uint32_t ngrid = cf_.get_value<int>("setup", "GridRes");
 
-        bUseSPT_ = cf_.GetValueSafe<bool>("output", "grafic_use_SPT", false);
+        bUseSPT_ = cf_.get_value_safe<bool>("output", "grafic_use_SPT", false);
         levelmin_ = uint32_t(std::log2(double(ngrid)) + 1e-6);
 
         if (std::abs(std::pow(2.0, levelmin_) - double(ngrid)) > 1e-4)
         {
-            csoca::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl;
+            music::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl;
             abort();
         }
 
-        bhavebaryons_ = cf_.GetValueSafe<bool>("setup", "baryons", false);
+        bhavebaryons_ = cf_.get_value_safe<bool>("setup", "baryons", false);
 
         header_.n1 = ngrid;
         header_.n2 = ngrid;
@@ -89,7 +89,7 @@ public:
         mkdir(dirname_.c_str(), 0777);
 
         // write RAMSES namelist file? if so only with one task
-        if (cf_.GetValueSafe<bool>("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 )
+        if (cf_.get_value_safe<bool>("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 )
         {
             write_ramses_namelist();
         }
@@ -102,6 +102,10 @@ public:
         return output_type::field_lagrangian;
     }
 
+    bool has_64bit_reals() const{ return false; }
+
+	bool has_64bit_ids() const{ return false; }
+
     real_t position_unit() const { return lunit_; }
 
     real_t velocity_unit() const { return vunit_; }
@@ -192,7 +196,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT<real_t> &g, const cos
             }
 
             // check field size against buffer size...
-            uint32_t ngrid = cf_.GetValue<int>("setup", "GridRes");
+            uint32_t ngrid = cf_.get_value<int>("setup", "GridRes");
             assert( g.global_size(0) == ngrid && g.global_size(1) == ngrid && g.global_size(2) == ngrid);
             assert( g.size(1) == ngrid && g.size(2) == ngrid);
             // write actual field slice by slice
@@ -219,7 +223,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT<real_t> &g, const cos
 
     } // end loop over write_rank
 
-    csoca::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl;
+    music::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl;
 }
 
 void grafic2_output_plugin::write_ramses_namelist(void) const
@@ -275,7 +279,7 @@ void grafic2_output_plugin::write_ramses_namelist(void) const
          << "m_refine=" << 1 + naddref << "*8.,\n"
          << "/\n";
 
-    csoca::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl;
+    music::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl;
 }
 
 namespace
diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc
index a13726f..28486b5 100644
--- a/src/plugins/random_music.cc
+++ b/src/plugins/random_music.cc
@@ -34,29 +34,29 @@ protected:
   //void store_rnd(int ilevel, rng *prng);
 
 public:
-  explicit RNG_music(ConfigFile &cf) : RNG_plugin(cf), initialized_(false) {}
+  explicit RNG_music(config_file &cf) : RNG_plugin(cf), initialized_(false) {}
 
   ~RNG_music() {}
 
   bool isMultiscale() const { return true; }
 
-  void Fill_Grid( Grid_FFT<real_t>& g ) const { }
+  void Fill_Grid( Grid_FFT<real_t>& g ) {} //const { }
 
   void initialize_for_grid_structure()//const refinement_hierarchy &refh)
   {
     //prefh_ = &refh;
-    levelmin_ = pcf_->GetValue<unsigned>("setup", "levelmin");
-    levelmax_ = pcf_->GetValue<unsigned>("setup", "levelmax");
+    levelmin_ = pcf_->get_value<unsigned>("setup", "levelmin");
+    levelmax_ = pcf_->get_value<unsigned>("setup", "levelmax");
 
-    ran_cube_size_ = pcf_->GetValueSafe<unsigned>("random", "cubesize", DEF_RAN_CUBE_SIZE);
-    disk_cached_ = pcf_->GetValueSafe<bool>("random", "disk_cached", true);
-    restart_ = pcf_->GetValueSafe<bool>("random", "restart", false);
+    ran_cube_size_ = pcf_->get_value_safe<unsigned>("random", "cubesize", DEF_RAN_CUBE_SIZE);
+    disk_cached_ = pcf_->get_value_safe<bool>("random", "disk_cached", true);
+    restart_ = pcf_->get_value_safe<bool>("random", "restart", false);
 
     mem_cache_.assign(levelmax_ - levelmin_ + 1, (std::vector<real_t> *)NULL);
 
     if (restart_ && !disk_cached_)
     {
-      csoca::elog.Print("Cannot restart from mem cached random numbers.");
+      music::elog.Print("Cannot restart from mem cached random numbers.");
       throw std::runtime_error("Cannot restart from mem cached random numbers.");
     }
 
@@ -93,8 +93,8 @@ void RNG_music::parse_random_parameters(void)
     std::string tempstr;
     bool noseed = false;
     sprintf(seedstr, "seed[%d]", i);
-    if (pcf_->ContainsKey("random", seedstr))
-      tempstr = pcf_->GetValue<std::string>("random", seedstr);
+    if (pcf_->contains_key("random", seedstr))
+      tempstr = pcf_->get_value<std::string>("random", seedstr);
     else
     {
       // "-2" means that no seed entry was found for that level
@@ -105,7 +105,7 @@ void RNG_music::parse_random_parameters(void)
     if (is_number(tempstr))
     {
       long ltemp;
-      pcf_->Convert(tempstr, ltemp);
+      pcf_->convert(tempstr, ltemp);
       rngfnames_.push_back("");
       if (noseed) // ltemp < 0 )
         //... generate some dummy seed which only depends on the level, negative so we know it's not
@@ -116,7 +116,7 @@ void RNG_music::parse_random_parameters(void)
       {
         if (ltemp <= 0)
         {
-          csoca::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr);
+          music::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr);
           throw std::runtime_error("Seed values need to be >0");
         }
         rngseeds_.push_back(ltemp);
@@ -126,7 +126,7 @@ void RNG_music::parse_random_parameters(void)
     {
       rngfnames_.push_back(tempstr);
       rngseeds_.push_back(-1);
-      csoca::ilog.Print("Random numbers for level %3d will be read from file.", i);
+      music::ilog.Print("Random numbers for level %3d will be read from file.", i);
     }
   }
 
@@ -141,7 +141,7 @@ void RNG_music::parse_random_parameters(void)
 
 void RNG_music::compute_random_numbers(void)
 {
-  bool rndsign = pcf_->GetValueSafe<bool>("random", "grafic_sign", false);
+  bool rndsign = pcf_->get_value_safe<bool>("random", "grafic_sign", false);
 
   std::vector<rng *> randc(std::max(levelmax_, levelmin_seed_) + 1, (rng *)NULL);
 
@@ -160,7 +160,7 @@ void RNG_music::compute_random_numbers(void)
       //#warning add possibility to read noise from file also here!
 
       if (rngfnames_[i].size() > 0)
-        csoca::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!");
+        music::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!");
 
       randc[i] = new rng(*randc[i - 1], ran_cube_size_, rngseeds_[i], true);
       delete randc[i - 1];
@@ -180,7 +180,7 @@ void RNG_music::compute_random_numbers(void)
     for (int ilevel = levelmin_seed_ - 1; ilevel >= (int)levelmin_; --ilevel)
     {
       if (rngseeds_[ilevel - levelmin_] > 0)
-        csoca::ilog.Print("Warning: random seed for level %d will be ignored.\n"
+        music::ilog.Print("Warning: random seed for level %d will be ignored.\n"
                 "            consistency requires that it is obtained by restriction from level %d",
                 ilevel, levelmin_seed_);
 
@@ -227,11 +227,11 @@ void RNG_music::compute_random_numbers(void)
   // {
   //   int lx[3], x0[3];
   //   int shift[3], levelmin_poisson;
-  //   shift[0] = pcf_->GetValue<int>("setup", "shift_x");
-  //   shift[1] = pcf_->GetValue<int>("setup", "shift_y");
-  //   shift[2] = pcf_->GetValue<int>("setup", "shift_z");
+  //   shift[0] = pcf_->get_value<int>("setup", "shift_x");
+  //   shift[1] = pcf_->get_value<int>("setup", "shift_y");
+  //   shift[2] = pcf_->get_value<int>("setup", "shift_z");
 
-  //   levelmin_poisson = pcf_->GetValue<unsigned>("setup", "levelmin");
+  //   levelmin_poisson = pcf_->get_value<unsigned>("setup", "levelmin");
 
   //   int lfac = 1 << (ilevel - levelmin_poisson);
 
diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc
index a6d4c35..18e287f 100644
--- a/src/plugins/random_music_wnoise_generator.cc
+++ b/src/plugins/random_music_wnoise_generator.cc
@@ -11,7 +11,7 @@ template <typename T>
 music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx)
     : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed)
 {
-  csoca::ilog.Print("Generating random numbers (1) with seed %ld", baseseed);
+  music::ilog.Print("Generating random numbers (1) with seed %ld", baseseed);
 
   initialize();
   fill_subvolume(x0, lx);
@@ -21,7 +21,7 @@ template <typename T>
 music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, bool zeromean)
     : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed)
 {
-  csoca::ilog.Print("Generating random numbers (2) with seed %ld", baseseed);
+  music::ilog.Print("Generating random numbers (2) with seed %ld", baseseed);
 
   double mean = 0.0;
   size_t res_l = res;
@@ -31,7 +31,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesiz
     cubesize_ = res_;
 
   if (!musicnoise)
-    csoca::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo");
+    music::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo");
 
   initialize();
 
@@ -90,7 +90,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string rand
   std::ifstream ifs(randfname.c_str(), std::ios::binary);
   if (!ifs)
   {
-    csoca::elog.Print("Could not open random number file \'%s\'!", randfname.c_str());
+    music::elog.Print("Could not open random number file \'%s\'!", randfname.c_str());
     throw std::runtime_error(std::string("Could not open random number file \'") + randfname + std::string("\'!"));
   }
 
@@ -186,7 +186,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string rand
   std::vector<float> in_float;
   std::vector<double> in_double;
 
-  csoca::ilog.Print("Random number file \'%s\'\n   contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz);
+  music::ilog.Print("Random number file \'%s\'\n   contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz);
 
   long double sum = 0.0, sum2 = 0.0;
   size_t count = 0;
@@ -285,7 +285,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string rand
   mean = sum / count;
   var = sum2 / count - mean * mean;
 
-  csoca::ilog.Print("Random numbers in file have \n     mean = %f and var = %f", mean, var);
+  music::ilog.Print("Random numbers in file have \n     mean = %f and var = %f", mean, var);
 }
 
 //... copy construct by averaging down
@@ -298,7 +298,7 @@ music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generat
   long double sum = 0.0, sum2 = 0.0;
   size_t count = 0;
 
-  csoca::ilog.Print("Generating a coarse white noise field by k-space degrading");
+  music::ilog.Print("Generating a coarse white noise field by k-space degrading");
   //... initialize properties of container
   res_ = rc.res_ / 2;
   cubesize_ = res_;
@@ -307,7 +307,7 @@ music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generat
 
   if (sizeof(real_t) != sizeof(T))
   {
-    csoca::elog.Print("type mismatch with real_t in k-space averaging");
+    music::elog.Print("type mismatch with real_t in k-space averaging");
     throw std::runtime_error("type mismatch with real_t in k-space averaging");
   }
 
@@ -405,7 +405,7 @@ music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generat
   rmean = sum / count;
   rvar = sum2 / count - rmean * rmean;
 
-  csoca::ilog.Print("Restricted random numbers have\n       mean = %f, var = %f", rmean, rvar);
+  music::ilog.Print("Restricted random numbers have\n       mean = %f, var = %f", rmean, rvar);
 }
 
 template <typename T>
@@ -438,7 +438,7 @@ music_wnoise_generator<T>::music_wnoise_generator(music_wnoise_generator<T> &rc,
   if (kspace)
   {
 
-    csoca::ilog.Print("Generating a constrained random number set with seed %ld\n    using coarse mode replacement...", baseseed);
+    music::ilog.Print("Generating a constrained random number set with seed %ld\n    using coarse mode replacement...", baseseed);
     assert(lx[0] % 2 == 0 && lx[1] % 2 == 0 && lx[2] % 2 == 0);
     size_t nx = lx[0], ny = lx[1], nz = lx[2],
            nxc = lx[0] / 2, nyc = lx[1] / 2, nzc = lx[2] / 2;
@@ -573,7 +573,7 @@ music_wnoise_generator<T>::music_wnoise_generator(music_wnoise_generator<T> &rc,
   }
   else
   {
-    csoca::ilog.Print("Generating a constrained random number set with seed %ld\n    using Hoffman-Ribak constraints...", baseseed);
+    music::ilog.Print("Generating a constrained random number set with seed %ld\n    using Hoffman-Ribak constraints...", baseseed);
 
     double fac = 1.0 / sqrt(8.0); //1./sqrt(8.0);
 
@@ -613,7 +613,7 @@ void music_wnoise_generator<T>::register_cube(int i, int j, int k)
     rnums_.push_back(NULL);
     cubemap_[icube] = rnums_.size() - 1;
 #ifdef DEBUG
-    LOGDEBUG("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]);
+    music::dlog.Print("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]);
 #endif
   }
 }
@@ -637,7 +637,7 @@ double music_wnoise_generator<T>::fill_cube(int i, int j, int k)
 
   if (it == cubemap_.end())
   {
-    csoca::elog.Print("Attempt to access non-registered random number cube!");
+    music::elog.Print("Attempt to access non-registered random number cube!");
     throw std::runtime_error("Attempt to access non-registered random number cube!");
   }
 
@@ -674,7 +674,7 @@ void music_wnoise_generator<T>::subtract_from_cube(int i, int j, int k, double v
 
   if (it == cubemap_.end())
   {
-    csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k);
+    music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k);
     throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::subtract_from_cube");
   }
 
@@ -700,7 +700,7 @@ void music_wnoise_generator<T>::free_cube(int i, int j, int k)
 
   if (it == cubemap_.end())
   {
-    csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k);
+    music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k);
     throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::free_cube");
   }
 
@@ -724,7 +724,7 @@ void music_wnoise_generator<T>::initialize(void)
     cubesize_ = res_;
   }
 
-  csoca::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_);
+  music::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_);
 }
 
 template <typename T>
@@ -741,8 +741,8 @@ double music_wnoise_generator<T>::fill_subvolume(int *i0, int *n)
   ncube[2] = (int)(n[2] / cubesize_) + 2;
 
 #ifdef DEBUG
-  LOGDEBUG("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]);
-  LOGDEBUG("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]);
+  music::dlog.Print("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]);
+  music::dlog.Print("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]);
 #endif
 
   double mean = 0.0;
@@ -836,7 +836,7 @@ void music_wnoise_generator<T>::print_allocated(void)
     if (rnums_[i] != NULL)
       ncount++;
 
-  csoca::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot);
+  music::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot);
 }
 
 template class music_wnoise_generator<float>;
diff --git a/src/plugins/random_music_wnoise_generator.hh b/src/plugins/random_music_wnoise_generator.hh
index 5b9cb36..4dd1b37 100644
--- a/src/plugins/random_music_wnoise_generator.hh
+++ b/src/plugins/random_music_wnoise_generator.hh
@@ -80,7 +80,7 @@ protected:
 
     if (it == cubemap_.end())
     {
-      csoca::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k);
+      music::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k);
       throw std::runtime_error("attempting to copy data from non-existing RND cube");
     }
 
@@ -186,7 +186,7 @@ public:
 
     if (it == cubemap_.end())
     {
-      csoca::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k);
+      music::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k);
       throw std::runtime_error("attempting to copy data from non-existing RND cube");
     }
 
@@ -194,7 +194,7 @@ public:
 
     if (rnums_[cubeidx] == NULL)
     {
-      csoca::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc);
+      music::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc);
       throw std::runtime_error("attempting to access data from non-allocated RND cube");
     }
 
diff --git a/src/plugins/random_ngenic.cc b/src/plugins/random_ngenic.cc
index 1aa1942..1498d4b 100644
--- a/src/plugins/random_ngenic.cc
+++ b/src/plugins/random_ngenic.cc
@@ -18,11 +18,11 @@ private:
     std::vector<unsigned int> SeedTable_;
 
 public:
-    explicit RNG_ngenic(ConfigFile &cf) : RNG_plugin(cf)
+    explicit RNG_ngenic(config_file &cf) : RNG_plugin(cf)
     {
 
-        RandomSeed_ = cf.GetValue<long>("random", "seed");
-        nres_ = cf.GetValue<size_t>("setup", "GridRes");
+        RandomSeed_ = cf.get_value<long>("random", "seed");
+        nres_ = cf.get_value<size_t>("setup", "GridRes");
         pRandomGenerator_ = gsl_rng_alloc(gsl_rng_ranlxd1);
         gsl_rng_set(pRandomGenerator_, RandomSeed_);
 
@@ -63,7 +63,7 @@ public:
 
     bool isMultiscale() const { return false; }
 
-    void Fill_Grid(Grid_FFT<real_t> &g) const
+    void Fill_Grid(Grid_FFT<real_t> &g) //const
     {
         g.zero();
         g.FourierTransformForward(false);
@@ -82,7 +82,11 @@ public:
                 for (size_t j = 0; j < nres_; ++j) 
                 {                   
                     ptrdiff_t jj = (j>0)? nres_ - j : 0;
-                    gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]);
+                    if( g.is_distributed() )
+                        gsl_rng_set( pRandomGenerator_, SeedTable_[j * nres_ + i]);
+                    else
+                        gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]);
+                    
                     for (size_t k = 0; k < g.size(2); ++k) 
                     {
                         double phase = gsl_rng_uniform(pRandomGenerator_) * 2 * M_PI;
@@ -101,15 +105,28 @@ public:
                         if (k > 0) {
                             if (i_in_range) g.kelem(ip,j,k) = zrand;
                         } else{ /* k=0 plane needs special treatment */
-                            if (i == 0) {
-                                if (j < nres_ / 2 && i_in_range)
-                                {
-                                    g.kelem(ip,j,k) = zrand;
-                                    g.kelem(ip,jj,k) = std::conj(zrand);
+                            if( g.is_distributed() ){
+                                if (j == 0) {
+                                    if (i < nres_ / 2 && i_in_range)
+                                    {
+                                        if(i_in_range) g.kelem(ip,jj,k) = zrand;
+                                        if(ii_in_range) g.kelem(iip,j,k) = std::conj(zrand);
+                                    }
+                                } else if (j < nres_ / 2) {
+                                    if(i_in_range) g.kelem(ip,j,k) = zrand;
+                                    if(ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
+                                }
+                            }else{
+                                if (i == 0) {
+                                    if (j < nres_ / 2 && i_in_range)
+                                    {
+                                        g.kelem(ip,j,k) = zrand;
+                                        g.kelem(ip,jj,k) = std::conj(zrand);
+                                    }
+                                } else if (i < nres_ / 2) {
+                                    if(i_in_range) g.kelem(ip,j,k) = zrand;
+                                    if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
                                 }
-                            } else if (i < nres_ / 2) {
-                                if(i_in_range) g.kelem(ip,j,k) = zrand;
-                                if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
                             }
                         }
                     }
diff --git a/src/plugins/random_panphasia.cc b/src/plugins/random_panphasia.cc
new file mode 100644
index 0000000..1489f59
--- /dev/null
+++ b/src/plugins/random_panphasia.cc
@@ -0,0 +1,522 @@
+#if defined(USE_PANPHASIA)
+
+#include <general.hh>
+#include <random_plugin.hh>
+#include <config_file.hh>
+
+#include <vector>
+#include <cmath>
+#include <cstring>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#include <grid_fft.hh>
+
+const int maxdim = 60, maxlev = 50, maxpow = 3 * maxdim;
+typedef int rand_offset_[5];
+typedef struct
+{
+  int state[133]; // Nstore = Nstate (=5) + Nbatch (=128)
+  int need_fill;
+  int pos;
+} rand_state_;
+
+/* pan_state_ struct -- corresponds to respective fortran module in panphasia_routines.f
+ * data structure that contains all panphasia state variables
+ * it needs to get passed between the fortran routines to enable
+ * thread-safe execution.
+ */
+typedef struct
+{
+  int base_state[5], base_lev_start[5][maxdim + 1];
+  rand_offset_ poweroffset[maxpow + 1], superjump;
+  rand_state_ current_state[maxpow + 2];
+
+  int layer_min, layer_max, indep_field;
+
+  long long xorigin_store[2][2][2], yorigin_store[2][2][2], zorigin_store[2][2][2];
+  int lev_common, layer_min_store, layer_max_store;
+  long long ix_abs_store, iy_abs_store, iz_abs_store, ix_per_store, iy_per_store, iz_per_store, ix_rel_store,
+      iy_rel_store, iz_rel_store;
+  double exp_coeffs[8][8][maxdim + 2];
+  long long xcursor[maxdim + 1], ycursor[maxdim + 1], zcursor[maxdim + 1];
+  int ixshift[2][2][2], iyshift[2][2][2], izshift[2][2][2];
+
+  double cell_data[9][8];
+  int ixh_last, iyh_last, izh_last;
+  int init;
+
+  int init_cell_props;
+  int init_lecuyer_state;
+  long long p_xcursor[62], p_ycursor[62], p_zcursor[62];
+
+} pan_state_;
+
+extern "C"
+{
+  void start_panphasia_(pan_state_ *lstate, const char *descriptor, int *ngrid, int *bverbose);
+
+  void parse_descriptor_(const char *descriptor, int16_t *l, int32_t *ix, int32_t *iy, int32_t *iz, int16_t *side1,
+                         int16_t *side2, int16_t *side3, int32_t *check_int, char *name);
+
+  void panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, double *cell_prop);
+
+  void adv_panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, int *layer_min,
+                                      int *layer_max, int *indep_field, double *cell_prop);
+
+  void set_phases_and_rel_origin_(pan_state_ *lstate, const char *descriptor, int *lev, long long *ix_rel,
+                                  long long *iy_rel, long long *iz_rel, int *VERBOSE);
+}
+
+struct panphasia_descriptor
+{
+  int16_t wn_level_base;
+  int32_t i_xorigin_base, i_yorigin_base, i_zorigin_base;
+  int16_t i_base, i_base_y, i_base_z;
+  int32_t check_rand;
+  std::string name;
+
+  explicit panphasia_descriptor(std::string dstring)
+  {
+    char tmp[100];
+    std::memset(tmp, ' ', 100);
+    parse_descriptor_(dstring.c_str(), &wn_level_base, &i_xorigin_base, &i_yorigin_base, &i_zorigin_base, &i_base,
+                      &i_base_y, &i_base_z, &check_rand, tmp);
+    for (int i = 0; i < 100; i++)
+      if (tmp[i] == ' ')
+      {
+        tmp[i] = '\0';
+        break;
+      }
+    name = tmp;
+    name.erase(std::remove(name.begin(), name.end(), ' '), name.end());
+  }
+};
+
+// greatest common divisor
+int gcd(int a, int b)
+{
+  if (b == 0)
+    return a;
+  return gcd(b, a % b);
+}
+
+// least common multiple
+int lcm(int a, int b) { return abs(a * b) / gcd(a, b); }
+
+// Two or largest power of 2 less than the argument
+int largest_power_two_lte(int b)
+{
+  int a = 1;
+  if (b <= a)
+    return a;
+  while (2 * a < b)
+    a = 2 * a;
+  return a;
+}
+
+class RNG_panphasia : public RNG_plugin
+{
+private:
+protected:
+  std::string descriptor_string_;
+  int num_threads_;
+  int levelmin_, levelmin_final_, levelmax_, ngrid_;
+  bool incongruent_fields_;
+  double inter_grid_phase_adjustment_;
+  // double translation_phase_;
+  pan_state_ *lstate;
+  int grid_p_, grid_m_;
+  double grid_rescale_fac_;
+  int coordinate_system_shift_[3];
+  int ix_abs_[3], ix_per_[3], ix_rel_[3], level_p_, lextra_;
+
+  void clear_panphasia_thread_states(void)
+  {
+    for (int i = 0; i < num_threads_; ++i)
+    {
+      lstate[i].init = 0;
+      lstate[i].init_cell_props = 0;
+      lstate[i].init_lecuyer_state = 0;
+    }
+  }
+
+  void initialize_for_grid_structure(void)
+  {
+    clear_panphasia_thread_states();
+    music::ilog.Print("PANPHASIA: running with %d threads", num_threads_);
+
+    // if ngrid is not a multiple of i_base, then we need to enlarge and then sample down
+    ngrid_ = pcf_->get_value<size_t>("setup", "GridRes");
+
+    grid_p_ = pdescriptor_->i_base;
+    grid_m_ = largest_power_two_lte(grid_p_);
+
+    lextra_ = (log10((double)ngrid_ / (double)pdescriptor_->i_base) + 0.001) / log10(2.0);
+    int ratio = 1 << lextra_;
+    grid_rescale_fac_ = 1.0;
+
+    coordinate_system_shift_[0] = -pcf_->get_value_safe<int>("setup", "shift_x", 0);
+    coordinate_system_shift_[1] = -pcf_->get_value_safe<int>("setup", "shift_y", 0);
+    coordinate_system_shift_[2] = -pcf_->get_value_safe<int>("setup", "shift_z", 0);
+
+    incongruent_fields_ = false;
+    if (ngrid_ != ratio * pdescriptor_->i_base)
+    {
+      incongruent_fields_ = true;
+      ngrid_ = 2 * ratio * pdescriptor_->i_base;
+      grid_rescale_fac_ = (double)ngrid_ / (1 << levelmin_);
+      music::ilog << "PANPHASIA: will use a higher resolution (using Fourier interpolation)" << std::endl;
+      music::ilog << "     (" << grid_m_ << " -> " << grid_p_ << ") * 2**ref to be compatible with PANPHASIA" << std::endl;
+    }
+  }
+
+  std::unique_ptr<panphasia_descriptor> pdescriptor_;
+
+public:
+  explicit RNG_panphasia(config_file &cf) : RNG_plugin(cf)
+  {
+    descriptor_string_ = pcf_->get_value<std::string>("random", "descriptor");
+
+#ifdef _OPENMP
+    num_threads_ = omp_get_max_threads();
+#else
+    num_threads_ = 1;
+#endif
+
+    // create independent state descriptions for each thread
+    lstate = new pan_state_[num_threads_];
+
+    // parse the descriptor for its properties
+    pdescriptor_ = std::make_unique<panphasia_descriptor>(descriptor_string_);
+
+    music::ilog.Print("PANPHASIA: descriptor \'%s\' is base %d,", pdescriptor_->name.c_str(), pdescriptor_->i_base);
+
+    // write panphasia base size into config file for the grid construction
+    // as the gridding unit we use the least common multiple of 2 and i_base
+    std::stringstream ss;
+    //ARJ  ss << lcm(2, pdescriptor_->i_base);
+    //ss <<  two_or_largest_power_two_less_than(pdescriptor_->i_base);//ARJ
+    ss << 2; //ARJ - set gridding unit to two
+    pcf_->insert_value("setup", "gridding_unit", ss.str());
+    ss.str(std::string());
+    ss << pdescriptor_->i_base;
+    pcf_->insert_value("random", "base_unit", ss.str());
+
+    this->initialize_for_grid_structure();
+  }
+
+  ~RNG_panphasia() { delete[] lstate; }
+
+  bool isMultiscale() const { return true; }
+
+  void Fill_Grid(Grid_FFT<real_t> &g)
+  {
+    auto sinc = [](real_t x) { return (std::abs(x) > 1e-16) ? std::sin(x) / x : 1.0; };
+    auto dsinc = [](real_t x) { return (std::abs(x) > 1e-16) ? (x * std::cos(x) - std::sin(x)) / (x * x) : 0.0; };
+    const real_t sqrt3{std::sqrt(3.0)}, sqrt27{std::sqrt(27.0)};
+
+    // make sure we're in the right space
+    Grid_FFT<real_t> &g0 = g;
+    g0.FourierTransformBackward(false);
+
+    // temporaries
+    Grid_FFT<real_t> g1(g.n_, g.length_);
+    Grid_FFT<real_t> g2(g.n_, g.length_);
+    Grid_FFT<real_t> g3(g.n_, g.length_);
+    Grid_FFT<real_t> g4(g.n_, g.length_);
+
+    clear_panphasia_thread_states();
+    music::ilog.Print("PANPHASIA: running with %d threads", num_threads_);
+
+    ngrid_ = pcf_->get_value<size_t>("setup", "GridRes");
+
+    grid_p_ = pdescriptor_->i_base;
+    // grid_m_ = largest_power_two_lte(grid_p_);
+    if (ngrid_ % grid_p_ != 0)
+    {
+      music::elog << "Grid resolution " << ngrid_ << " is not divisible by PANPHASIA descriptor length " << grid_p_ << std::endl;
+      throw std::runtime_error("Chosen [setup] / GridRes is not compatible with PANPHASIA descriptor length!");
+    }
+
+    double t1 = get_wtime();
+    // double tp = t1;
+
+#pragma omp parallel
+    {
+#ifdef _OPENMP
+      const int mythread = omp_get_thread_num();
+#else
+      const int mythread = 0;
+#endif
+
+      //int odd_x, odd_y, odd_z;
+      //int ng_level = ngrid_ * (1 << (level - levelmin_)); // full resolution of current level
+
+      int verbosity = (mythread == 0);
+      char descriptor[100];
+      std::memset(descriptor, 0, 100);
+      std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size());
+
+      start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity);
+
+      {
+        panphasia_descriptor d(descriptor_string_);
+
+        int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
+        int level_p = d.wn_level_base + lextra;
+        int ratio = 1 << lextra;
+
+        lstate[mythread].layer_min = 0;
+        lstate[mythread].layer_max = level_p;
+        lstate[mythread].indep_field = 1;
+
+        assert(ngrid_ == ratio * d.i_base);
+
+        long long ix_rel[3];
+        ix_rel[0] = 0; //ileft_corner_p[0];
+        ix_rel[1] = 0; //ileft_corner_p[1];
+        ix_rel[2] = 0; //ileft_corner_p[2];
+
+        set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2],
+                                   &verbosity);
+      }
+
+      if (verbosity)
+        t1 = get_wtime();
+
+      std::array<double, 9> cell_prop;
+      pan_state_ *ps = &lstate[mythread];
+
+#pragma omp for //nowait
+      for (size_t i = 0; i < g.size(0); i += 2)
+      {
+        for (size_t j = 0; j < g.size(1); j += 2)
+        {
+          for (size_t k = 0; k < g.size(2); k += 2)
+          {
+
+            // ARJ - added inner set of loops to speed up evaluation of Panphasia
+
+            for (int ix = 0; ix < 2; ++ix)
+            {
+              for (int iy = 0; iy < 2; ++iy)
+              {
+                for (int iz = 0; iz < 2; ++iz)
+                {
+                  int ilocal = i + ix;
+                  int jlocal = j + iy;
+                  int klocal = k + iz;
+
+                  int iglobal = ilocal + g.local_0_start_;
+                  int jglobal = jlocal;
+                  int kglobal = klocal;
+
+                  adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min,
+                                                 &ps->layer_max, &ps->indep_field, &cell_prop[0]);
+
+                  g0.relem(ilocal, jlocal, klocal) = cell_prop[0];
+                  g1.relem(ilocal, jlocal, klocal) = cell_prop[4];
+                  g2.relem(ilocal, jlocal, klocal) = cell_prop[2];
+                  g3.relem(ilocal, jlocal, klocal) = cell_prop[1];
+                  g4.relem(ilocal, jlocal, klocal) = cell_prop[8];
+                }
+              }
+            }
+          }
+        }
+      }
+    } // end omp parallel region
+
+    g0.FourierTransformForward();
+    g1.FourierTransformForward();
+    g2.FourierTransformForward();
+    g3.FourierTransformForward();
+    g4.FourierTransformForward();
+
+#pragma omp parallel for
+    for (size_t i = 0; i < g0.size(0); i++)
+    {
+      for (size_t j = 0; j < g0.size(1); j++)
+      {
+        for (size_t k = 0; k < g0.size(2); k++)
+        {
+          if (!g0.is_nyquist_mode(i, j, k))
+          {
+            auto kvec = g0.get_k<real_t>(i, j, k);
+
+            auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0];
+            auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1];
+            auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2];
+
+            auto fx = sinc(argx);
+            auto gx = ccomplex_t(0.0, dsinc(argx));
+            auto fy = sinc(argy);
+            auto gy = ccomplex_t(0.0, dsinc(argy));
+            auto fz = sinc(argz);
+            auto gz = ccomplex_t(0.0, dsinc(argz));
+
+            auto temp = (fx + sqrt3 * gx) * (fy + sqrt3 * gy) * (fz + sqrt3 * gz);
+            auto magnitude = std::sqrt(1.0 - std::abs(temp * temp));
+
+            auto y0(g0.kelem(i, j, k)), y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k));
+
+            g0.kelem(i, j, k) = y0 * fx * fy * fz 
+                              + sqrt3 * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) 
+                              + y4 * magnitude;
+          }
+          else
+          {
+            g0.kelem(i, j, k) = 0.0;
+          }
+        }
+      }
+    }
+
+    // music::ilog.Print("\033[31mtiming [build panphasia field]: %f s\033[0m", get_wtime() - tp);
+    // tp = get_wtime();
+
+    g1.FourierTransformBackward(false);
+    g2.FourierTransformBackward(false);
+    g3.FourierTransformBackward(false);
+    g4.FourierTransformBackward(false);
+
+#pragma omp parallel
+    {
+#ifdef _OPENMP
+      const int mythread = omp_get_thread_num();
+#else
+      const int mythread = 0;
+#endif
+
+      // int odd_x, odd_y, odd_z;
+      int verbosity = (mythread == 0);
+      char descriptor[100];
+      std::memset(descriptor, 0, 100);
+      std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size());
+
+      start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity);
+
+      {
+        panphasia_descriptor d(descriptor_string_);
+
+        int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
+        int level_p = d.wn_level_base + lextra;
+        int ratio = 1 << lextra;
+
+        lstate[mythread].layer_min = 0;
+        lstate[mythread].layer_max = level_p;
+        lstate[mythread].indep_field = 1;
+
+        assert(ngrid_ == ratio * d.i_base);
+
+        long long ix_rel[3];
+        ix_rel[0] = 0; //ileft_corner_p[0];
+        ix_rel[1] = 0; //ileft_corner_p[1];
+        ix_rel[2] = 0; //ileft_corner_p[2];
+
+        set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2],
+                                   &verbosity);
+      }
+
+      if (verbosity)
+        t1 = get_wtime();
+
+      //***************************************************************
+      // Process Panphasia values: p110, p011, p101, p111
+      //****************************************************************
+      std::array<double,9> cell_prop;
+      pan_state_ *ps = &lstate[mythread];
+
+#pragma omp for //nowait
+      for (size_t i = 0; i < g1.size(0); i += 2)
+      {
+        for (size_t j = 0; j < g1.size(1); j += 2)
+        {
+          for (size_t k = 0; k < g1.size(2); k += 2)
+          {
+            // ARJ - added inner set of loops to speed up evaluation of Panphasia
+            for (int ix = 0; ix < 2; ++ix)
+            {
+              for (int iy = 0; iy < 2; ++iy)
+              {
+                for (int iz = 0; iz < 2; ++iz)
+                {
+                  int ilocal = i + ix;
+                  int jlocal = j + iy;
+                  int klocal = k + iz;
+
+                  int iglobal = ilocal + g.local_0_start_;
+                  int jglobal = jlocal;
+                  int kglobal = klocal;
+
+                  adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min,
+                                                 &ps->layer_max, &ps->indep_field, &cell_prop[0]);
+
+                  g1.relem(ilocal, jlocal, klocal) = cell_prop[6];
+                  g2.relem(ilocal, jlocal, klocal) = cell_prop[3];
+                  g3.relem(ilocal, jlocal, klocal) = cell_prop[5];
+                  g4.relem(ilocal, jlocal, klocal) = cell_prop[7];
+                }
+              }
+            }
+          }
+        }
+      }
+    } // end omp parallel region
+
+    // music::ilog.Print("\033[31mtiming [adv_panphasia_cell_properties2]: %f s \033[0m", get_wtime() - tp);
+    // tp = get_wtime();
+
+    /////////////////////////////////////////////////////////////////////////
+    // transform and convolve with Legendres
+    g1.FourierTransformForward();
+    g2.FourierTransformForward();
+    g3.FourierTransformForward();
+    g4.FourierTransformForward();
+
+    #pragma omp parallel for 
+    for (size_t i = 0; i < g1.size(0); i++)
+    {
+      for (size_t j = 0; j < g1.size(1); j++)
+      {
+        for (size_t k = 0; k < g1.size(2); k++)
+        {
+          if (!g1.is_nyquist_mode(i, j, k))
+          {
+            auto kvec = g1.get_k<real_t>(i, j, k);
+
+            auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0];
+            auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1];
+            auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2];
+
+            auto fx = sinc(argx);
+            auto gx = ccomplex_t(0.0, dsinc(argx));
+            auto fy = sinc(argy);
+            auto gy = ccomplex_t(0.0, dsinc(argy));
+            auto fz = sinc(argz);
+            auto gz = ccomplex_t(0.0, dsinc(argz));
+
+            auto y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k));
+
+            g0.kelem(i, j, k) += 3.0 * (y1 * gx * gy * fz + y2 * fx * gy * gz + y3 * gx * fy * gz) + sqrt27 * y4 * gx * gy * gz;
+          }
+        }
+      }
+    }
+
+    // music::ilog.Print("\033[31mtiming [build panphasia field2]: %f s\033[0m", get_wtime() - tp);
+    // tp = get_wtime();
+    music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1,
+                          1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2));
+    music::ilog.Print("PANPHASIA k-space statistices: mean Re = %f, std = %f", g0.mean(), g0.std());
+  }
+};
+
+namespace
+{
+  RNG_plugin_creator_concrete<RNG_panphasia> creator("PANPHASIA");
+}
+#endif // defined(USE_PANPHASIA)
\ No newline at end of file
diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc
new file mode 100644
index 0000000..4a2baf3
--- /dev/null
+++ b/src/plugins/transfer_CAMB_file.cc
@@ -0,0 +1,344 @@
+//  transfer_CAMB.cc - This file is part of MUSIC -
+//  a code to generate multi-scale initial conditions for cosmological simulations
+
+//  Copyright (C) 2019  Oliver Hahn
+
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_spline.h>
+
+#include <vector>
+
+#include "transfer_function_plugin.hh"
+
+const double tiny = 1e-30;
+
+class transfer_CAMB_file_plugin : public TransferFunction_plugin
+{
+
+private:
+  std::string m_filename_Pk, m_filename_Tk;
+  std::vector<double> m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon;
+  std::vector<double> m_tab_Tvk_tot, m_tab_Tvk_cdm, m_tab_Tvk_baryon;
+  gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon;
+  gsl_interp_accel *acc_vtot, *acc_vcdm, *acc_vbaryon;
+  gsl_spline *spline_tot, *spline_cdm, *spline_baryon;
+  gsl_spline *spline_vtot, *spline_vcdm, *spline_vbaryon;
+
+  double m_kmin, m_kmax, m_Omega_b, m_Omega_m, m_zstart;
+  unsigned m_nlines;
+
+  bool m_linbaryoninterp;
+
+  void read_table(void)
+  {
+
+    m_nlines = 0;
+    m_linbaryoninterp = false;
+
+#ifdef WITH_MPI
+    if (MPI::COMM_WORLD.Get_rank() == 0)
+    {
+#endif
+      music::ilog.Print("Reading tabulated transfer function data from file \n    \'%s\'", m_filename_Tk.c_str());
+
+      std::string line;
+      std::ifstream ifs(m_filename_Tk.c_str());
+
+      if (!ifs.good())
+        throw std::runtime_error("Could not find transfer function file \'" + m_filename_Tk + "\'");
+
+      m_tab_k.clear();
+      m_tab_Tk_tot.clear();
+      m_tab_Tk_cdm.clear();
+      m_tab_Tk_baryon.clear();
+      m_tab_Tvk_tot.clear();
+      m_tab_Tvk_cdm.clear();    //>[150609SH: add]
+      m_tab_Tvk_baryon.clear(); //>[150609SH: add]
+
+      m_kmin = 1e30;
+      m_kmax = -1e30;
+      std::ofstream ofs("dump_transfer.txt");
+
+      while (!ifs.eof())
+      {
+        getline(ifs, line);
+        if (ifs.eof())
+          break;
+
+        // OH: ignore line if it has a comment:
+        if (line.find("#") != std::string::npos)
+          continue;
+
+        std::stringstream ss(line);
+
+        double k, Tkc, Tkb, Tktot, Tkvtot, Tkvc, Tkvb, dummy;
+
+        ss >> k;
+        ss >> Tkc;   // cdm
+        ss >> Tkb;   // baryon
+        ss >> dummy; // photon
+        ss >> dummy; // nu
+        ss >> dummy; // mass_nu
+        ss >> Tktot; // total
+        ss >> dummy; // no_nu
+        ss >> dummy; // total_de
+        ss >> dummy; // Weyl
+        ss >> Tkvc;  // v_cdm
+        ss >> Tkvb;  // v_b
+        ss >> dummy; // v_b-v_cdm
+
+        if (ss.bad() || ss.fail())
+        {
+          music::elog.Print("error reading the transfer function file (corrupt or not in expected format)!");
+          throw std::runtime_error("error reading transfer function file \'" +
+                                   m_filename_Tk + "\'");
+        }
+
+        if (m_Omega_b < 1e-6)
+          Tkvtot = Tktot;
+        else
+          Tkvtot = ((m_Omega_m - m_Omega_b) * Tkvc + m_Omega_b * Tkvb) / m_Omega_m; //MvD
+
+        m_linbaryoninterp |= Tkb < 0.0 || Tkvb < 0.0;
+
+        m_tab_k.push_back(log10(k));
+
+        m_tab_Tk_tot.push_back(Tktot);
+        m_tab_Tk_baryon.push_back(Tkb);
+        m_tab_Tk_cdm.push_back(Tkc);
+        m_tab_Tvk_tot.push_back(Tkvtot);
+        m_tab_Tvk_baryon.push_back(Tkvb);
+        m_tab_Tvk_cdm.push_back(Tkvc);
+
+        ++m_nlines;
+
+        if (k < m_kmin)
+          m_kmin = k;
+        if (k > m_kmax)
+          m_kmax = k;
+      }
+
+      for (size_t i = 0; i < m_tab_k.size(); ++i)
+      {
+        m_tab_Tk_tot[i] = log10(m_tab_Tk_tot[i]);
+        m_tab_Tk_cdm[i] = log10(m_tab_Tk_cdm[i]);
+        m_tab_Tvk_cdm[i] = log10(m_tab_Tvk_cdm[i]);
+        m_tab_Tvk_tot[i] = log10(m_tab_Tvk_tot[i]);
+
+        if (!m_linbaryoninterp)
+        {
+          m_tab_Tk_baryon[i] = log10(m_tab_Tk_baryon[i]);
+          m_tab_Tvk_baryon[i] = log10(m_tab_Tvk_baryon[i]);
+        }
+      }
+
+      ifs.close();
+
+      music::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines);
+
+      if (m_linbaryoninterp)
+        music::ilog.Print("Using log-lin interpolation for baryons\n    (TF is not "
+                          "positive definite)");
+
+#ifdef WITH_MPI
+    }
+
+    unsigned n = m_tab_k.size();
+    MPI::COMM_WORLD.Bcast(&n, 1, MPI_UNSIGNED, 0);
+
+    if (MPI::COMM_WORLD.Get_rank() > 0)
+    {
+      m_tab_k.assign(n, 0);
+      m_tab_Tk_tot.assign(n, 0);
+      m_tab_Tk_cdm.assign(n, 0);
+      m_tab_Tk_baryon.assign(n, 0);
+      m_tab_Tvk_tot.assign(n, 0);
+      m_tab_Tvk_cdm.assign(n, 0);
+      m_tab_Tvk_baryon.assign(n, 0);
+    }
+
+    MPI::COMM_WORLD.Bcast(&m_tab_k[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tk_tot[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tvk_tot[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tvk_cdm[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tvk_baryon[0], n, MPI_DOUBLE, 0);
+
+#endif
+  }
+
+public:
+  transfer_CAMB_file_plugin(config_file &cf)
+      : TransferFunction_plugin(cf)
+  {
+    m_filename_Tk = pcf_->get_value<std::string>("cosmology", "transfer_file");
+    m_Omega_m = cf.get_value<double>("cosmology", "Omega_m"); //MvD
+    m_Omega_b = cf.get_value<double>("cosmology", "Omega_b"); //MvD
+    m_zstart = cf.get_value<double>("setup", "zstart");       //MvD
+
+    read_table();
+
+    acc_tot = gsl_interp_accel_alloc();
+    acc_cdm = gsl_interp_accel_alloc();
+    acc_baryon = gsl_interp_accel_alloc();
+    acc_vtot = gsl_interp_accel_alloc();
+    acc_vcdm = gsl_interp_accel_alloc();
+    acc_vbaryon = gsl_interp_accel_alloc();
+
+    spline_tot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_cdm = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_baryon = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_vtot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_vcdm =
+        gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_vbaryon =
+        gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+
+    gsl_spline_init(spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size());
+    gsl_spline_init(spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size());
+    gsl_spline_init(spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0],
+                    m_tab_k.size());
+    gsl_spline_init(spline_vtot, &m_tab_k[0], &m_tab_Tvk_tot[0],
+                    m_tab_k.size());
+    gsl_spline_init(spline_vcdm, &m_tab_k[0], &m_tab_Tvk_cdm[0],
+                    m_tab_k.size());
+    gsl_spline_init(spline_vbaryon, &m_tab_k[0], &m_tab_Tvk_baryon[0],
+                    m_tab_k.size());
+
+    tf_distinct_ = true; // different density between CDM v.s. Baryon
+    tf_withvel_ = true;  // using velocity transfer function
+  }
+
+  ~transfer_CAMB_file_plugin()
+  {
+    gsl_spline_free(spline_tot);
+    gsl_spline_free(spline_cdm);
+    gsl_spline_free(spline_baryon);
+    gsl_spline_free(spline_vtot);
+    gsl_spline_free(spline_vcdm);
+    gsl_spline_free(spline_vbaryon);
+
+    gsl_interp_accel_free(acc_tot);
+    gsl_interp_accel_free(acc_cdm);
+    gsl_interp_accel_free(acc_baryon);
+    gsl_interp_accel_free(acc_vtot);
+    gsl_interp_accel_free(acc_vcdm);
+    gsl_interp_accel_free(acc_vbaryon);
+  }
+
+  // linear interpolation in log-log
+  inline double extrap_right(double k, const tf_type &type) const
+  {
+    int n = m_tab_k.size() - 1, n1 = n - 1;
+
+    double v1(1.0), v2(1.0);
+
+    double lk = log10(k);
+    double dk = m_tab_k[n] - m_tab_k[n1];
+    double delk = lk - m_tab_k[n];
+
+    switch (type)
+    {
+    case cdm:
+      v1 = m_tab_Tk_cdm[n1];
+      v2 = m_tab_Tk_cdm[n];
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case baryon:
+      v1 = m_tab_Tk_baryon[n1];
+      v2 = m_tab_Tk_baryon[n];
+      if (m_linbaryoninterp)
+        return std::max((v2 - v1) / dk * (delk) + v2, tiny);
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case vtotal: //>[150609SH: add]
+      v1 = m_tab_Tvk_tot[n1];
+      v2 = m_tab_Tvk_tot[n];
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case vcdm: //>[150609SH: add]
+      v1 = m_tab_Tvk_cdm[n1];
+      v2 = m_tab_Tvk_cdm[n];
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case vbaryon: //>[150609SH: add]
+      v1 = m_tab_Tvk_baryon[n1];
+      v2 = m_tab_Tvk_baryon[n];
+      if (m_linbaryoninterp)
+        return std::max((v2 - v1) / dk * (delk) + v2, tiny);
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case total:
+      v1 = m_tab_Tk_tot[n1];
+      v2 = m_tab_Tk_tot[n];
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    default:
+      throw std::runtime_error(
+          "Invalid type requested in transfer function evaluation");
+    }
+
+    return 0.0;
+  }
+
+  inline double compute(double k, tf_type type) const
+  {
+    // use constant interpolation on the left side of the tabulated values
+    if (k < m_kmin)
+    {
+      switch (type)
+      {
+      case cdm:
+        return pow(10.0, m_tab_Tk_cdm[0]);
+      case baryon:
+        if (m_linbaryoninterp)
+          return m_tab_Tk_baryon[0];
+        return pow(10.0, m_tab_Tk_baryon[0]);
+      case vtotal:
+        return pow(10.0, m_tab_Tvk_tot[0]);
+      case vcdm:
+        return pow(10.0, m_tab_Tvk_cdm[0]);
+      case vbaryon:
+        if (m_linbaryoninterp)
+          return m_tab_Tvk_baryon[0];
+        return pow(10.0, m_tab_Tvk_baryon[0]);
+      case total:
+        return pow(10.0, m_tab_Tk_tot[0]);
+      default:
+        throw std::runtime_error(
+            "Invalid type requested in transfer function evaluation");
+      }
+    }
+    // use linear interpolation on the right side of the tabulated values
+    else if (k > m_kmax)
+      return extrap_right(k, type);
+
+    double lk = log10(k);
+    switch (type)
+    {
+    case cdm:
+      return pow(10.0, gsl_spline_eval(spline_cdm, lk, acc_cdm));
+    case baryon:
+      if (m_linbaryoninterp)
+        return gsl_spline_eval(spline_baryon, lk, acc_baryon);
+      return pow(10.0, gsl_spline_eval(spline_baryon, lk, acc_baryon));
+    case vtotal:
+      return pow(10.0, gsl_spline_eval(spline_vtot, lk, acc_vtot)); //MvD
+    case vcdm:
+      return pow(10.0, gsl_spline_eval(spline_vcdm, lk, acc_vcdm));
+    case vbaryon:
+      if (m_linbaryoninterp)
+        return gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon);
+      return pow(10.0, gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon));
+    case total:
+      return pow(10.0, gsl_spline_eval(spline_tot, lk, acc_tot));
+    default:
+      throw std::runtime_error(
+          "Invalid type requested in transfer function evaluation");
+    }
+  }
+
+  inline double get_kmin(void) const { return pow(10.0, m_tab_k[1]); }
+
+  inline double get_kmax(void) const { return pow(10.0, m_tab_k[m_tab_k.size() - 2]); }
+};
+
+namespace
+{
+TransferFunction_plugin_creator_concrete<transfer_CAMB_file_plugin> creator("CAMB_file");
+}
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 85b65b8..a842736 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -9,145 +9,328 @@
 #include <string>
 #include <vector>
 #include <memory>
+#include <sstream>
 
 #include <ClassEngine.hh>
 
 #include <general.hh>
 #include <config_file.hh>
 #include <transfer_function_plugin.hh>
+#include <math/interpolate.hh>
 
-#include <gsl/gsl_errno.h>
-#include <gsl/gsl_spline.h>
-
-class transfer_CLASS_plugin : public TransferFunction_plugin {
+class transfer_CLASS_plugin : public TransferFunction_plugin
+{
 
 private:
-    std::vector<double> tab_lnk_, tab_dtot_, tab_dc_, tab_db_, tab_ttot_, tab_tc_, tab_tb_;
-    gsl_interp_accel *gsl_ia_dtot_, *gsl_ia_dc_, *gsl_ia_db_, *gsl_ia_ttot_, *gsl_ia_tc_, *gsl_ia_tb_;
-    gsl_spline *gsl_sp_dtot_, *gsl_sp_dc_, *gsl_sp_db_, *gsl_sp_ttot_, *gsl_sp_tc_, *gsl_sp_tb_;
-    double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_;
+  interpolated_function_1d<true, true, false> delta_c_, delta_b_, delta_n_, delta_m_, theta_c_, theta_b_, theta_n_, theta_m_;
+  interpolated_function_1d<true, true, false> delta_c0_, delta_b0_, delta_n0_, delta_m0_, theta_c0_, theta_b0_, theta_n0_, theta_m0_;
 
-    void ClassEngine_get_data( void ){
-        std::vector<double> d_ncdm, t_ncdm, phi, psi;
+  // single fluid growing/decaying mode decomposition
+  // gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_;
+  // gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_;
+  // std::vector<double> tab_Cplus_, tab_Cminus_;
 
-        csoca::ilog << "Computing TF via ClassEngine..." << std::endl << " ztarget = " << ztarget_ << ", zstart = " << zstart_ << " ..." << std::flush;
-        double wtime = get_wtime();
-        
-        ClassParams pars;
-        pars.add("extra metric transfer functions", "yes");
-        pars.add("z_pk",ztarget_);
-        pars.add("P_k_max_h/Mpc", kmax_);
-        pars.add("h",h_);
-        pars.add("Omega_b",Omega_b_);
-        // pars.add("Omega_k",0.0);
-        // pars.add("Omega_ur",0.0);
-        pars.add("N_ur",N_ur_);
-        pars.add("Omega_cdm",Omega_m_-Omega_b_);
-        pars.add("Omega_Lambda",1.0-Omega_m_);
-        // pars.add("Omega_fld",0.0);
-        // pars.add("Omega_scf",0.0);
-        pars.add("A_s",2.42e-9);
-        pars.add("n_s",.96); // tnis doesn't matter for TF
-        pars.add("output","dTk,vTk");
-        pars.add("YHe",0.248);
+  double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_, A_s_, n_s_, sigma8_, Tcmb_, tnorm_;
 
-        pars.add("k_per_decade_for_pk",50);
-        pars.add("k_per_decade_for_bao",50);
-        pars.add("compute damping scale","yes");
-        pars.add("z_reio",-1.0); // make sure reionisation is not included
+  ClassParams pars_;
+  std::unique_ptr<ClassEngine> the_ClassEngine_;
+  std::ofstream ofs_class_input_;
 
-        std::unique_ptr<ClassEngine> CE = std::make_unique<ClassEngine>(pars, false);
+  template <typename T>
+  void add_class_parameter(std::string parameter_name, const T parameter_value)
+  {
+    pars_.add(parameter_name, parameter_value);
+    ofs_class_input_ << parameter_name << " = " << parameter_value << std::endl;
+  }
 
-        CE->getTk(ztarget_, tab_lnk_, tab_dc_, tab_db_, d_ncdm, tab_dtot_,
-                tab_tc_, tab_tb_, t_ncdm, tab_ttot_, phi, psi );
+  //! Set up class parameters from MUSIC cosmological parameters
+  void init_ClassEngine(void)
+  {
+    //--- general parameters ------------------------------------------
+    add_class_parameter("z_max_pk", std::max(std::max(zstart_, ztarget_),199.0)); // use 1.2 as safety
+    add_class_parameter("P_k_max_h/Mpc", kmax_);
+    add_class_parameter("output", "dTk,vTk");
+    add_class_parameter("extra metric transfer functions","yes");
+    // add_class_parameter("lensing", "no");
 
-        wtime = get_wtime() - wtime;
-        csoca::ilog << "   took " << wtime << " s / " << tab_lnk_.size() << " modes."  << std::endl;
+    //--- choose gauge ------------------------------------------------
+    // add_class_parameter("extra metric transfer functions", "yes");
+    add_class_parameter("gauge", "synchronous");
+
+    //--- cosmological parameters, densities --------------------------
+    add_class_parameter("h", h_);
+
+    add_class_parameter("Omega_b", Omega_b_);
+    add_class_parameter("Omega_cdm", Omega_m_ - Omega_b_);
+    add_class_parameter("Omega_k", 0.0);
+    // add_class_parameter("Omega_Lambda",1.0-Omega_m_);
+    add_class_parameter("Omega_fld", 0.0);
+    add_class_parameter("Omega_scf", 0.0);
+    // add_class_parameter("fluid_equation_of_state","CLP");
+    // add_class_parameter("w0_fld", -1 );
+    // add_class_parameter("wa_fld", 0. );
+    // add_class_parameter("cs2_fld", 1);
+
+    //--- massive neutrinos -------------------------------------------
+#if 1
+    //default off
+    // add_class_parameter("Omega_ur",0.0);
+    add_class_parameter("N_ur", N_ur_);
+    add_class_parameter("N_ncdm", 0);
+
+#else
+    // change above to enable
+    add_class_parameter("N_ur", 0);
+    add_class_parameter("N_ncdm", 1);
+    add_class_parameter("m_ncdm", "0.4");
+    add_class_parameter("T_ncdm", 0.71611);
+#endif
+
+    //--- cosmological parameters, primordial -------------------------
+    add_class_parameter("P_k_ini type", "analytic_Pk");
+
+    if( A_s_ > 0.0 ){
+      add_class_parameter("A_s", A_s_);
+    }else{
+      add_class_parameter("sigma8", sigma8_);
     }
+    add_class_parameter("n_s", n_s_);
+    add_class_parameter("alpha_s", 0.0);
+    add_class_parameter("T_cmb", Tcmb_);
+    add_class_parameter("YHe", 0.248);
+
+    // precision parameters
+    add_class_parameter("k_per_decade_for_pk", 100);
+    add_class_parameter("k_per_decade_for_bao", 100);
+    add_class_parameter("compute damping scale", "yes");
+    add_class_parameter("tol_perturb_integration", 1.e-8);
+    add_class_parameter("tol_background_integration", 1e-9);
+
+    // high precision options from cl_permille.pre:
+    // precision file to be passed as input in order to achieve at least percent precision on scalar Cls
+    add_class_parameter("hyper_flat_approximation_nu", 7000.);
+    add_class_parameter("transfer_neglect_delta_k_S_t0", 0.17);
+    add_class_parameter("transfer_neglect_delta_k_S_t1", 0.05);
+    add_class_parameter("transfer_neglect_delta_k_S_t2", 0.17);
+    add_class_parameter("transfer_neglect_delta_k_S_e", 0.13);
+    add_class_parameter("delta_l_max", 1000);
+
+    int class_verbosity = 0;
+
+    add_class_parameter("background_verbose", class_verbosity);
+    add_class_parameter("thermodynamics_verbose", class_verbosity);
+    add_class_parameter("perturbations_verbose", class_verbosity);
+    add_class_parameter("transfer_verbose", class_verbosity);
+    add_class_parameter("primordial_verbose", class_verbosity);
+    add_class_parameter("spectra_verbose", class_verbosity);
+    add_class_parameter("nonlinear_verbose", class_verbosity);
+    add_class_parameter("lensing_verbose", class_verbosity);
+    add_class_parameter("output_verbose", class_verbosity);
+
+    // output parameters, only needed for the control CLASS .ini file that we output
+    std::stringstream zlist;
+    if (ztarget_ == zstart_)
+      zlist << ztarget_ << ((ztarget_!=0.0)? ", 0.0" : "");
+    else
+      zlist << std::max(ztarget_, zstart_) << ", " << std::min(ztarget_, zstart_) << ", 0.0";
+    add_class_parameter("z_pk", zlist.str());
+
+    music::ilog << "Computing transfer function via ClassEngine..." << std::endl;
+    double wtime = get_wtime();
+
+    the_ClassEngine_ = std::move(std::make_unique<ClassEngine>(pars_, false));
+
+    wtime = get_wtime() - wtime;
+    music::ilog << "CLASS took " << wtime << " s." << std::endl;
+  }
+
+  //! run ClassEngine with parameters set up
+  void run_ClassEngine(double z, std::vector<double> &k, std::vector<double> &dc, std::vector<double> &tc, std::vector<double> &db, std::vector<double> &tb,
+                       std::vector<double> &dn, std::vector<double> &tn, std::vector<double> &dm, std::vector<double> &tm)
+  {
+    k.clear(); 
+    dc.clear(); db.clear(); dn.clear(); dm.clear();
+    tc.clear(); tb.clear(); tn.clear(); tm.clear();
+    
+    the_ClassEngine_->getTk(z, k, dc, db, dn, dm, tc, tb, tn, tm);
+
+    real_t fc = (Omega_m_ - Omega_b_) / Omega_m_;
+    real_t fb = Omega_b_ / Omega_m_;
+
+    for (size_t i = 0; i < k.size(); ++i)
+    {
+      // convert to 'CAMB' format, since we interpolate loglog and
+      // don't want negative numbers...
+      auto ik2 = 1.0 / (k[i] * k[i]) * h_ * h_;
+      dc[i] = -dc[i] * ik2;
+      db[i] = -db[i] * ik2;
+      dn[i] = -dn[i] * ik2;
+      dm[i] = fc * dc[i] + fb * db[i];
+      tc[i] = -tc[i] * ik2;
+      tb[i] = -tb[i] * ik2;
+      tn[i] = -tn[i] * ik2;
+      tm[i] = fc * tc[i] + fb * tb[i];
+    }
+  }
 
 public:
-  explicit transfer_CLASS_plugin( ConfigFile &cf)
-  : TransferFunction_plugin(cf)
-  { 
-    h_       = pcf_->GetValue<double>("cosmology","H0") / 100.0; 
-    Omega_m_ = pcf_->GetValue<double>("cosmology","Omega_m"); 
-    Omega_b_ = pcf_->GetValue<double>("cosmology","Omega_b");
-    N_ur_    = pcf_->GetValueSafe<double>("cosmology","N_ur", 3.046);
-    ztarget_ = pcf_->GetValueSafe<double>("cosmology","ztarget",0.0);
-    zstart_  = pcf_->GetValue<double>("setup","zstart");
-    double lbox = pcf_->GetValue<double>("setup","BoxLength");
-    int nres = pcf_->GetValue<double>("setup","GridRes");
-    kmax_    = 2.0*M_PI/lbox * nres/2 * sqrt(3) * 2.0; // 120% of spatial diagonal
+  explicit transfer_CLASS_plugin(config_file &cf)
+      : TransferFunction_plugin(cf)
+  {
+    this->tf_isnormalised_ = true;
 
-    this->ClassEngine_get_data();
-    
-    gsl_ia_dtot_ = gsl_interp_accel_alloc();
-    gsl_ia_dc_   = gsl_interp_accel_alloc();
-    gsl_ia_db_   = gsl_interp_accel_alloc();
-    gsl_ia_ttot_ = gsl_interp_accel_alloc();
-    gsl_ia_tc_   = gsl_interp_accel_alloc();
-    gsl_ia_tb_   = gsl_interp_accel_alloc();
+    ofs_class_input_.open("input_class_parameters.ini", std::ios::trunc);
 
-    gsl_sp_dtot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_dc_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_db_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_ttot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_tc_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_tb_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    h_ = pcf_->get_value<double>("cosmology", "H0") / 100.0;
+    Omega_m_ = pcf_->get_value<double>("cosmology", "Omega_m");
+    Omega_b_ = pcf_->get_value<double>("cosmology", "Omega_b");
+    N_ur_ = pcf_->get_value_safe<double>("cosmology", "Neff", 3.046);
+    ztarget_ = pcf_->get_value_safe<double>("cosmology", "ztarget", 0.0);
+    atarget_ = 1.0 / (1.0 + ztarget_);
+    zstart_ = pcf_->get_value<double>("setup", "zstart");
+    astart_ = 1.0 / (1.0 + zstart_);
+    A_s_ = pcf_->get_value_safe<double>("cosmology", "A_s", -1.0);
+    n_s_ = pcf_->get_value<double>("cosmology", "nspec");
+    Tcmb_ = cf.get_value_safe<double>("cosmology", "Tcmb", 2.7255);
 
-    gsl_spline_init(gsl_sp_dtot_, &tab_lnk_[0], &tab_dtot_[0], tab_lnk_.size());
-    gsl_spline_init(gsl_sp_dc_,   &tab_lnk_[0], &tab_dc_[0],   tab_lnk_.size());
-    gsl_spline_init(gsl_sp_db_,   &tab_lnk_[0], &tab_db_[0],   tab_lnk_.size());
-    gsl_spline_init(gsl_sp_ttot_, &tab_lnk_[0], &tab_ttot_[0], tab_lnk_.size());
-    gsl_spline_init(gsl_sp_tc_,   &tab_lnk_[0], &tab_tc_[0],   tab_lnk_.size());
-    gsl_spline_init(gsl_sp_tb_,   &tab_lnk_[0], &tab_tb_[0],   tab_lnk_.size());
-
-    kmin_ = std::exp(tab_lnk_[0]);
-  
-    tf_distinct_ = true; 
-    tf_withvel_  = true; 
-  }
-    
-  ~transfer_CLASS_plugin(){
-    gsl_spline_free(gsl_sp_dtot_);
-    gsl_spline_free(gsl_sp_dc_);
-    gsl_spline_free(gsl_sp_db_);
-    gsl_spline_free(gsl_sp_ttot_);
-    gsl_spline_free(gsl_sp_tc_);
-    gsl_spline_free(gsl_sp_tb_);
-
-    gsl_interp_accel_free(gsl_ia_dtot_);
-    gsl_interp_accel_free(gsl_ia_dc_);
-    gsl_interp_accel_free(gsl_ia_db_);
-    gsl_interp_accel_free(gsl_ia_ttot_);
-    gsl_interp_accel_free(gsl_ia_tc_);
-    gsl_interp_accel_free(gsl_ia_tb_);
-  }
-
-  inline double compute(double k, tf_type type) const {
-      gsl_spline *splineT = nullptr;
-      gsl_interp_accel *accT = nullptr;
-      switch(type){
-          case total:   splineT = gsl_sp_dtot_; accT = gsl_ia_dtot_; break;
-          case cdm:     splineT = gsl_sp_dc_;   accT = gsl_ia_dc_;   break;
-          case baryon:  splineT = gsl_sp_db_;   accT = gsl_ia_db_;   break;
-          case vtotal:  splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break;
-          case vcdm:    splineT = gsl_sp_tc_;   accT = gsl_ia_tc_;   break;
-          case vbaryon: splineT = gsl_sp_tb_;   accT = gsl_ia_tb_;   break;
-          default:
-            throw std::runtime_error("Invalid type requested in transfer function evaluation");
+    if (A_s_ > 0) {
+      music::ilog << "CLASS: Using A_s=" << A_s_<< " to normalise the transfer function." << std::endl;
+    }else{
+      sigma8_ = pcf_->get_value_safe<double>("cosmology", "sigma_8", -1.0);
+      if( sigma8_ < 0 ){
+        throw std::runtime_error("Need to specify either A_s or sigma_8 for CLASS plugin...");
       }
+      music::ilog << "CLASS: Using sigma8_ =" << sigma8_<< " to normalise the transfer function." << std::endl;
+    }
 
-      double d = (k<=kmin_)? gsl_spline_eval(splineT, std::log(kmin_), accT) 
-        : gsl_spline_eval(splineT, std::log(k*h_), accT);
-      return -d/(k*k);
+    // determine highest k we will need for the resolution selected
+    double lbox = pcf_->get_value<double>("setup", "BoxLength");
+    int nres = pcf_->get_value<double>("setup", "GridRes");
+    kmax_ = std::max(20.0, 2.0 * M_PI / lbox * nres / 2 * sqrt(3) * 2.0); // 120% of spatial diagonal, or k=10h Mpc-1
+
+    // initialise CLASS and get the normalisation
+    this->init_ClassEngine();
+    A_s_ = the_ClassEngine_->get_A_s(); // this either the input one, or the one computed from sigma8
+    
+    // compute the normalisation to interface with MUSIC
+    double k_p = pcf_->get_value_safe<double>("cosmology", "k_p", 0.05);
+    tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p * h_, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0));
+
+    // compute the transfer function at z=0 using CLASS engine
+    std::vector<double> k, dc, tc, db, tb, dn, tn, dm, tm;
+    this->run_ClassEngine(0.0, k, dc, tc, db, tb, dn, tn, dm, tm);
+
+    delta_c0_.set_data(k, dc);
+    theta_c0_.set_data(k, tc);
+    delta_b0_.set_data(k, db);
+    theta_b0_.set_data(k, tb);
+    delta_n0_.set_data(k, dn);
+    theta_n0_.set_data(k, tn);
+    delta_m0_.set_data(k, dm);
+    theta_m0_.set_data(k, tm);
+
+     // compute the transfer function at z=z_target using CLASS engine
+    this->run_ClassEngine(ztarget_, k, dc, tc, db, tb, dn, tn, dm, tm);
+    delta_c_.set_data(k, dc);
+    theta_c_.set_data(k, tc);
+    delta_b_.set_data(k, db);
+    theta_b_.set_data(k, tb);
+    delta_n_.set_data(k, dn);
+    theta_n_.set_data(k, tn);
+    delta_m_.set_data(k, dm);
+    theta_m_.set_data(k, tm);
+
+    kmin_ = k[0];
+    kmax_ = k.back();
+
+    music::ilog << "CLASS table contains k = " << this->get_kmin() << " to " << this->get_kmax() << " h Mpc-1." << std::endl;
+
+    //--------------------------------------------------------------------------
+    // single fluid growing/decaying mode decomposition
+    //--------------------------------------------------------------------------
+    /*gsl_ia_Cplus_ = gsl_interp_accel_alloc();
+    gsl_ia_Cminus_ = gsl_interp_accel_alloc();
+
+    gsl_sp_Cplus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    gsl_sp_Cminus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+
+    tab_Cplus_.assign(tab_lnk_.size(), 0);
+    tab_Cminus_.assign(tab_lnk_.size(), 0);
+
+    std::ofstream ofs("grow_decay.txt");
+
+    for (size_t i = 0; i < tab_lnk_.size(); ++i)
+    {
+      tab_Cplus_[i] = (3.0 / 5.0 * tab_dtot_[i] / atarget_ - 2.0 / 5.0 * tab_ttot_[i] / atarget_);
+      tab_Cminus_[i] = (2.0 / 5.0 * std::pow(atarget_, 1.5) * (tab_dtot_[i] + tab_ttot_[i]));
+
+      ofs << std::exp(tab_lnk_[i]) << " " << tab_Cplus_[i] << " " << tab_Cminus_[i] << " " << tab_dtot_[i] << " " << tab_ttot_[i] << std::endl;
+    }
+
+    gsl_spline_init(gsl_sp_Cplus_, &tab_lnk_[0], &tab_Cplus_[0], tab_lnk_.size());
+    gsl_spline_init(gsl_sp_Cminus_, &tab_lnk_[0], &tab_Cminus_[0], tab_lnk_.size());*/
+    //--------------------------------------------------------------------------
+
+    tf_distinct_ = true;
+    tf_withvel_ = true;
+    tf_withtotal0_ = true;
   }
 
-  inline double get_kmin(void) const { return std::exp(tab_lnk_[0])/h_; }
-  inline double get_kmax(void) const { return std::exp(tab_lnk_[tab_lnk_.size()-1])/h_; }
+  ~transfer_CLASS_plugin()
+  {
+  }
+
+  inline double compute(double k, tf_type type) const
+  {
+    k *= h_;
+
+    if (k < kmin_ || k > kmax_)
+    {
+      return 0.0;
+    }
+
+    real_t val(0.0);
+    switch (type)
+    {
+      // values at ztarget:
+    case total:
+      val = delta_m_(k); break;
+    case cdm:
+      val = delta_c_(k); break;
+    case baryon:
+      val = delta_b_(k); break;
+    case vtotal:
+      val = theta_m_(k); break;
+    case vcdm:
+      val = theta_c_(k); break;
+    case vbaryon:
+      val = theta_b_(k); break;
+
+      // values at zstart:
+    case total0:
+      val = delta_m0_(k); break;
+    case cdm0:
+      val = delta_c0_(k); break;
+    case baryon0:
+      val = delta_b0_(k); break;
+    case vtotal0:
+      val = theta_m0_(k); break;
+    case vcdm0:
+      val = theta_c0_(k); break;
+    case vbaryon0:
+      val = theta_b0_(k); break;
+    default:
+      throw std::runtime_error("Invalid type requested in transfer function evaluation");
+    }
+    return val * tnorm_;
+  }
+
+  inline double get_kmin(void) const { return kmin_ / h_; }
+  inline double get_kmax(void) const { return kmax_ / h_; }
 };
 
-namespace {
+namespace
+{
 TransferFunction_plugin_creator_concrete<transfer_CLASS_plugin> creator("CLASS");
 }
 
diff --git a/src/plugins/transfer_eisenstein.cc b/src/plugins/transfer_eisenstein.cc
index 9d4c032..adc9e06 100644
--- a/src/plugins/transfer_eisenstein.cc
+++ b/src/plugins/transfer_eisenstein.cc
@@ -207,13 +207,13 @@ public:
 	 \param Tcmb mean temperature of the CMB fluctuations (defaults to
 	 Tcmb = 2.726 if not specified)
 	 */
-  transfer_eisenstein_plugin(ConfigFile &cf)
+  transfer_eisenstein_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   {
-    double Tcmb = pcf_->GetValueSafe<double>("cosmology", "Tcmb", 2.726);
-    double H0 = pcf_->GetValue<double>("cosmology", "H0");
-    double Omega_m = pcf_->GetValue<double>("cosmology", "Omega_m");
-    double Omega_b = pcf_->GetValue<double>("cosmology", "Omega_b");
+    double Tcmb = pcf_->get_value_safe<double>("cosmology", "Tcmb", 2.726);
+    double H0 = pcf_->get_value<double>("cosmology", "H0");
+    double Omega_m = pcf_->get_value<double>("cosmology", "Omega_m");
+    double Omega_b = pcf_->get_value<double>("cosmology", "Omega_b");
 
     etf_.set_parameters(H0, Omega_m, Omega_b, Tcmb);
     
@@ -257,15 +257,15 @@ protected:
   };
 
 public:
-  transfer_eisenstein_wdm_plugin(ConfigFile &cf)
+  transfer_eisenstein_wdm_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   {
-    double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726);
-    omegam_ = pcf_->GetValue<double>("cosmology", "Omega_m");
-    omegab_ = pcf_->GetValue<double>("cosmology", "Omega_b");
-    H0_ = pcf_->GetValue<double>("cosmology", "H0");
+    double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726);
+    omegam_ = pcf_->get_value<double>("cosmology", "Omega_m");
+    omegab_ = pcf_->get_value<double>("cosmology", "Omega_b");
+    H0_ = pcf_->get_value<double>("cosmology", "H0");
     m_h0 = H0_ / 100.0;
-    wdmm_ = pcf_->GetValue<double>("cosmology", "WDMmass");
+    wdmm_ = pcf_->get_value<double>("cosmology", "WDMmass");
 
     etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
 
@@ -273,7 +273,7 @@ public:
     typemap_.insert(std::pair<std::string, int>("VIEL", wdm_viel));             // add the other types
     typemap_.insert(std::pair<std::string, int>("BODE_WRONG", wdm_bode_wrong)); // add the other types
 
-    type_ = pcf_->GetValueSafe<std::string>("cosmology", "WDMtftype", "BODE");
+    type_ = pcf_->get_value_safe<std::string>("cosmology", "WDMtftype", "BODE");
 
     //type_ = std::string( toupper( type_.c_str() ) );
 
@@ -286,29 +286,29 @@ public:
     {
     //... parameterisation from Bode et al. (2001), ApJ, 556, 93
     case wdm_bode:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.0);
-      wdmgx_ = pcf_->GetValueSafe<double>("cosmology", "WDMg_x", 1.5);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.0);
+      wdmgx_ = pcf_->get_value_safe<double>("cosmology", "WDMg_x", 1.5);
       m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29);
 
       break;
 
     //... parameterisation from Viel et al. (2005), Phys Rev D, 71
     case wdm_viel:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.12);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.12);
       m_WDMalpha = 0.049 * pow(omegam_ / 0.25, 0.11) * pow(H0_ * 0.01 / 0.7, 1.22) * pow(wdmm_, -1.11);
       break;
 
     //.... below is for historical reasons due to the buggy parameterisation
     //.... in early versions of MUSIC, but apart from H instead of h, Bode et al.
     case wdm_bode_wrong:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.0);
-      wdmgx_ = pcf_->GetValueSafe<double>("cosmology", "WDMg_x", 1.5);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.0);
+      wdmgx_ = pcf_->get_value_safe<double>("cosmology", "WDMg_x", 1.5);
       m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29);
       break;
 
     default:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.0);
-      wdmgx_ = pcf_->GetValueSafe<double>("cosmology", "WDMg_x", 1.5);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.0);
+      wdmgx_ = pcf_->get_value_safe<double>("cosmology", "WDMg_x", 1.5);
       m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29);
       break;
     }
@@ -340,20 +340,20 @@ protected:
   eisenstein_transfer etf_;
 
 public:
-  transfer_eisenstein_cdmbino_plugin(ConfigFile &cf)
+  transfer_eisenstein_cdmbino_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   { 
-    double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726);
+    double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726);
 
-    omegam_ = pcf_->GetValue<double>("cosmology", "Omega_m");
-    omegab_ = pcf_->GetValue<double>("cosmology", "Omega_b");
-    H0_ = pcf_->GetValue<double>("cosmology", "H0");
+    omegam_ = pcf_->get_value<double>("cosmology", "Omega_m");
+    omegab_ = pcf_->get_value<double>("cosmology", "Omega_b");
+    H0_ = pcf_->get_value<double>("cosmology", "H0");
     m_h0 = H0_ / 100.0;
 
     etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
 
-    mcdm_ = pcf_->GetValueSafe<double>("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV
-    Tkd_ = pcf_->GetValueSafe<double>("cosmology", "CDM_Tkd", 33.0);    // temperature at which CDM particle kinetically decouples (in MeV)
+    mcdm_ = pcf_->get_value_safe<double>("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV
+    Tkd_ = pcf_->get_value_safe<double>("cosmology", "CDM_Tkd", 33.0);    // temperature at which CDM particle kinetically decouples (in MeV)
 
     kfs_ = 1.7e6 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.) / (1.0 + log(Tkd_ / 30.) / 19.2);
     kd_ = 3.8e7 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.);
@@ -395,19 +395,19 @@ protected:
   eisenstein_transfer etf_;
 
 public:
-  transfer_eisenstein_cutoff_plugin(ConfigFile &cf)
+  transfer_eisenstein_cutoff_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   { 
-    double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726);
+    double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726);
 
-    omegam_ = pcf_->GetValue<double>("cosmology", "Omega_m");
-    omegab_ = pcf_->GetValue<double>("cosmology", "Omega_b");
-    H0_ = pcf_->GetValue<double>("cosmology", "H0");
+    omegam_ = pcf_->get_value<double>("cosmology", "Omega_m");
+    omegab_ = pcf_->get_value<double>("cosmology", "Omega_b");
+    H0_ = pcf_->get_value<double>("cosmology", "H0");
     m_h0 = H0_ / 100.0;
 
     etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
 
-    Rcut_ = pcf_->GetValueSafe<double>("cosmology", "Rcut", 1.0);
+    Rcut_ = pcf_->get_value_safe<double>("cosmology", "Rcut", 1.0);
   }
 
   inline double compute(double k, tf_type type) const
@@ -434,5 +434,5 @@ namespace
 TransferFunction_plugin_creator_concrete<transfer_eisenstein_plugin> creator("eisenstein");
 TransferFunction_plugin_creator_concrete<transfer_eisenstein_wdm_plugin> creator2("eisenstein_wdm");
 TransferFunction_plugin_creator_concrete<transfer_eisenstein_cdmbino_plugin> creator3("eisenstein_cdmbino");
-TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
+// TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
 } // namespace
diff --git a/src/random_plugin.cc b/src/random_plugin.cc
index 045978f..5121efa 100644
--- a/src/random_plugin.cc
+++ b/src/random_plugin.cc
@@ -13,32 +13,33 @@ void print_RNG_plugins()
     std::map<std::string, RNG_plugin_creator *> &m = get_RNG_plugin_map();
     std::map<std::string, RNG_plugin_creator *>::iterator it;
     it = m.begin();
-    csoca::ilog << "- Available random number generator plug-ins:" << std::endl;
+    music::ilog << "Available random number generator plug-ins:" << std::endl;
     while (it != m.end())
     {
         if ((*it).second){
-            csoca::ilog.Print("\t\'%s\'\n", (*it).first.c_str());
+            music::ilog.Print("\t\'%s\'\n", (*it).first.c_str());
         }
         ++it;
     }
+    music::ilog << std::endl;
 }
 
-std::unique_ptr<RNG_plugin> select_RNG_plugin(ConfigFile &cf)
+std::unique_ptr<RNG_plugin> select_RNG_plugin(config_file &cf)
 {
-    std::string rngname = cf.GetValueSafe<std::string>("random", "generator", "MUSIC");
+    std::string rngname = cf.get_value_safe<std::string>("random", "generator", "MUSIC");
 
     RNG_plugin_creator *the_RNG_plugin_creator = get_RNG_plugin_map()[rngname];
 
     if (!the_RNG_plugin_creator)
     {
-        csoca::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str());
+        music::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str());
         print_RNG_plugins();
         throw std::runtime_error("Unknown random number generator plug-in");
     }
     else
     {
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl;
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl;
     }
 
     return std::move(the_RNG_plugin_creator->Create(cf));
diff --git a/src/testing.cc b/src/testing.cc
index bfd088d..8e88e17 100644
--- a/src/testing.cc
+++ b/src/testing.cc
@@ -9,7 +9,7 @@ namespace testing
 {
 
 void output_potentials_and_densities(
-    ConfigFile &the_config,
+    config_file &the_config,
     size_t ngrid, real_t boxlen,
     Grid_FFT<real_t> &phi,
     Grid_FFT<real_t> &phi2,
@@ -17,8 +17,8 @@ void output_potentials_and_densities(
     Grid_FFT<real_t> &phi3b,
     std::array<Grid_FFT<real_t> *, 3> &A3)
 {
-    const std::string fname_hdf5 = the_config.GetValueSafe<std::string>("output", "fname_hdf5", "output.hdf5");
-    const std::string fname_analysis = the_config.GetValueSafe<std::string>("output", "fbase_analysis", "output");
+    const std::string fname_hdf5 = the_config.get_value_safe<std::string>("output", "fname_hdf5", "output.hdf5");
+    const std::string fname_analysis = the_config.get_value_safe<std::string>("output", "fbase_analysis", "output");
 
     Grid_FFT<real_t> delta({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> delta2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
@@ -98,7 +98,7 @@ void output_potentials_and_densities(
 }
 
 void output_velocity_displacement_symmetries(
-    ConfigFile &the_config,
+    config_file &the_config,
     size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
     Grid_FFT<real_t> &phi,
     Grid_FFT<real_t> &phi2,
@@ -107,8 +107,8 @@ void output_velocity_displacement_symmetries(
     std::array<Grid_FFT<real_t> *, 3> &A3,
     bool bwrite_out_fields)
 {
-    const std::string fname_hdf5 = the_config.GetValueSafe<std::string>("output", "fname_hdf5", "output.hdf5");
-    const std::string fname_analysis = the_config.GetValueSafe<std::string>("output", "fbase_analysis", "output");
+    const std::string fname_hdf5 = the_config.get_value_safe<std::string>("output", "fname_hdf5", "output.hdf5");
+    const std::string fname_analysis = the_config.get_value_safe<std::string>("output", "fbase_analysis", "output");
 
     real_t vfac1 = vfac;
     real_t vfac2 = 2 * vfac;
@@ -232,7 +232,7 @@ void output_velocity_displacement_symmetries(
     }
 
 
-    csoca::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n"
+    music::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n"
                     << std::setw(16) << dplus << " "
                     << std::setw(16) << Icomp[0] << " "
                     << std::setw(16) << Icomp[1] << " "
@@ -241,7 +241,8 @@ void output_velocity_displacement_symmetries(
 }
 
 void output_convergence(
-    ConfigFile &the_config,
+    config_file &the_config,
+    cosmology::calculator* the_cosmo_calc,
     std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
     Grid_FFT<real_t> &phi,
     Grid_FFT<real_t> &phi2,
@@ -249,7 +250,6 @@ void output_convergence(
     Grid_FFT<real_t> &phi3b,
     std::array<Grid_FFT<real_t> *, 3> &A3)
 {
-
     // scale all potentials to remove dplus0
     phi /= dplus;
     phi2 /= dplus * dplus;
@@ -259,11 +259,95 @@ void output_convergence(
     (*A3[1]) /= dplus * dplus * dplus;
     (*A3[2]) /= dplus * dplus * dplus;
 
+    ////////////////////// theoretical convergence radius //////////////////////
+
+    // compute phi_code
+    Grid_FFT<real_t> phi_code({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    phi_code.FourierTransformForward(false);
+    #pragma omp parallel for //collapse(3)
+    for (std::size_t i = 0; i < phi_code.size(0); ++i) {
+        for (std::size_t j = 0; j < phi_code.size(1); ++j) {
+            for (std::size_t k = 0; k < phi_code.size(2); ++k) {
+                std::size_t idx = phi_code.get_idx(i, j, k);
+                phi_code.kelem(idx) = -phi.kelem(idx);
+            }
+        }
+    }
+
+    // initialize norm to 0
+    Grid_FFT<real_t> nabla_vini_norm({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    #pragma omp parallel for //collapse(3)
+    for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
+        for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
+            for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
+                std::size_t idx = nabla_vini_norm.get_idx(i, j, k);
+                nabla_vini_norm.relem(idx) = 0.0;
+            }
+        }
+    }
+
+    Grid_FFT<real_t> nabla_vini_mn({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    for(std::size_t m = 0; m < 3; m++) {
+        for(std::size_t n = m; n < 3; n++) {
+            nabla_vini_mn.FourierTransformForward(false);
+            #pragma omp parallel for //collapse(3)
+            for (std::size_t i = 0; i < phi_code.size(0); ++i) {
+                for (std::size_t j = 0; j < phi_code.size(1); ++j) {
+                    for (std::size_t k = 0; k < phi_code.size(2); ++k) {
+                        std::size_t idx = phi_code.get_idx(i, j, k);
+                        auto kk = phi_code.get_k<real_t>(i, j, k);
+                        nabla_vini_mn.kelem(idx) = phi_code.kelem(idx) * (kk[m] * kk[n]);
+                    }
+                }
+            }
+            nabla_vini_mn.FourierTransformBackward();
+            nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->get_growth_factor(1.0));
+            // sum of squares
+            #pragma omp parallel for //collapse(3)
+            for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
+                for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
+                    for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
+                        std::size_t idx = nabla_vini_norm.get_idx(i, j, k);
+                        if(m != n) {
+                            nabla_vini_norm.relem(idx) += (2.0 * nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx));
+                        } else {
+                            nabla_vini_norm.relem(idx) += (nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx));
+                        }
+                    }
+                }
+            }
+        }
+    }
+    // square root
+    #pragma omp parallel for //collapse(3)
+    for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
+        for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
+            for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
+                std::size_t idx = nabla_vini_norm.get_idx(i, j, k);
+                nabla_vini_norm.relem(idx) = std::sqrt(nabla_vini_norm.relem(idx));
+            }
+        }
+    }
+
+    // get t_eds
+    Grid_FFT<real_t> t_eds({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    #pragma omp parallel for //collapse(3)
+    for (std::size_t i = 0; i < t_eds.size(0); ++i) {
+        for (std::size_t j = 0; j < t_eds.size(1); ++j) {
+            for (std::size_t k = 0; k < t_eds.size(2); ++k) {
+                std::size_t idx = t_eds.get_idx(i, j, k);
+                t_eds.relem(idx) = 0.0204 / nabla_vini_norm.relem(idx);
+            }
+        }
+    }
+
+    ////////////////////////// 3lpt convergence test ///////////////////////////
+
     // initialize grids to 0
     Grid_FFT<real_t> psi_1({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> psi_2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> psi_3({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-#pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < psi_1.size(0); ++i) {
         for (std::size_t j = 0; j < psi_1.size(1); ++j) {
             for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@@ -290,7 +374,7 @@ void output_convergence(
         psi_2_tmp.FourierTransformForward(false);
         psi_3_tmp.FourierTransformForward(false);
 
-#pragma omp parallel for collapse(3)
+        #pragma omp parallel for //collapse(3)
         for (std::size_t i = 0; i < phi.size(0); ++i) {
             for (std::size_t j = 0; j < phi.size(1); ++j) {
                 for (std::size_t k = 0; k < phi.size(2); ++k) {
@@ -311,7 +395,7 @@ void output_convergence(
         psi_3_tmp.FourierTransformBackward();
 
         // sum of squares
-#pragma omp parallel for collapse(3)
+        #pragma omp parallel for //collapse(3)
         for (std::size_t i = 0; i < psi_1.size(0); ++i) {
             for (std::size_t j = 0; j < psi_1.size(1); ++j) {
                 for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@@ -325,7 +409,7 @@ void output_convergence(
     } // loop on dimensions
 
     // apply square root for the L2 norm
-#pragma omp parallel for collapse(3)
+#pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < psi_1.size(0); ++i) {
         for (std::size_t j = 0; j < psi_1.size(1); ++j) {
             for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@@ -339,7 +423,7 @@ void output_convergence(
 
     // convergence radius
     Grid_FFT<real_t> inv_convergence_radius({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-#pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < psi_1.size(0); ++i) {
         for (std::size_t j = 0; j < psi_1.size(1); ++j) {
             for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@@ -351,13 +435,17 @@ void output_convergence(
         }
     }
 
-    // write results
-    unlink("convergence_test.hdf5");
-    inv_convergence_radius.Write_to_HDF5("convergence_test.hdf5", "inv_convergence_radius");
-    psi_1.Write_to_HDF5("convergence_test.hdf5", "psi_1_norm");
-    psi_2.Write_to_HDF5("convergence_test.hdf5", "psi_2_norm");
-    psi_3.Write_to_HDF5("convergence_test.hdf5", "psi_3_norm");
-
+    ////////////////////////////// write results ///////////////////////////////
+    std::string convergence_test_filename("convergence_test.hdf5");
+    unlink(convergence_test_filename.c_str());
+#if defined(USE_MPI)
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
+    t_eds.Write_to_HDF5(convergence_test_filename, "t_eds");
+    inv_convergence_radius.Write_to_HDF5(convergence_test_filename, "inv_convergence_radius");
+    // psi_1.Write_to_HDF5(convergence_test_filename, "psi_1_norm");
+    // psi_2.Write_to_HDF5(convergence_test_filename, "psi_2_norm");
+    // psi_3.Write_to_HDF5(convergence_test_filename, "psi_3_norm");
 }
 
 } // namespace testing
diff --git a/src/transfer_function_plugin.cc b/src/transfer_function_plugin.cc
index e9d3748..5b2ec9e 100644
--- a/src/transfer_function_plugin.cc
+++ b/src/transfer_function_plugin.cc
@@ -13,31 +13,32 @@ void print_TransferFunction_plugins()
     std::map<std::string, TransferFunction_plugin_creator *> &m = get_TransferFunction_plugin_map();
     std::map<std::string, TransferFunction_plugin_creator *>::iterator it;
     it = m.begin();
-    csoca::ilog << "Available transfer function plug-ins:" << std::endl;
+    music::ilog << "Available transfer function plug-ins:" << std::endl;
     while (it != m.end())
     {
         if ((*it).second)
-            csoca::ilog << "\t\'" << (*it).first << "\'" << std::endl;
+            music::ilog << "\t\'" << (*it).first << "\'" << std::endl;
         ++it;
     }
+    music::ilog << std::endl;
 }
 
-std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(ConfigFile &cf)
+std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(config_file &cf)
 {
-    std::string tfname = cf.GetValue<std::string>("cosmology", "transfer");
+    std::string tfname = cf.get_value<std::string>("cosmology", "transfer");
 
     TransferFunction_plugin_creator *the_TransferFunction_plugin_creator = get_TransferFunction_plugin_map()[tfname];
 
     if (!the_TransferFunction_plugin_creator)
     {
-        csoca::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl;
+        music::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl;
         print_TransferFunction_plugins();
         throw std::runtime_error("Unknown transfer function plug-in");
     }
     else
     {
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl;
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl;
     }
 
     return std::move(the_TransferFunction_plugin_creator->create(cf));