Merge branch 'develop' into 'master'

Develop See merge request ohahn/fastlpt!3
2024-09-19 17:03:45 +02:00 · 2020-05-06 13:03:42 +02:00 · 2020-05-06 13:03:42 +02:00 · 38320d2150
commit 38320d2150
parent a58fbc9778 0937242a1b
61 changed files with 9993 additions and 1922 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,56 +1,14 @@
-build
+.DS_Store
 .vscode
-src/CMakeFiles/3.12.2/CompilerIdC/CMakeCCompilerId.c
+build
-src/CMakeFiles/feature_tests.c
+include/cmake_config.hh
-src/CMakeFiles/feature_tests.cxx
+src/input_powerspec.txt
-src/CMakeFiles/progress.marks
+CMakeCache.txt
-src/CMakeFiles/3.12.2/CMakeCCompiler.cmake
+CMakeFiles/cmake.check_cache
-src/CMakeFiles/3.12.2/CMakeCXXCompiler.cmake
+src/CMakeFiles
 src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_C.bin
 src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_CXX.bin
 src/CMakeFiles/3.12.2/CMakeSystem.cmake
 src/CMakeFiles/fastLPT.dir/build.make
 src/CMakeFiles/FindMPI/test_mpi.cpp
 src/CMakeFiles/FindMPI/test_mpi_C.bin
 src/CMakeFiles/FindMPI/test_mpi_CXX.bin
 src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.c
 src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.cpp
 src/CMakeFiles/FindOpenMP/OpenMPTryFlag.c
 src/CMakeFiles/FindOpenMP/OpenMPTryFlag.cpp
 src/CMakeFiles/FindOpenMP/ompver_C.bin
 src/CMakeFiles/FindOpenMP/ompver_CXX.bin
 src/CMakeFiles/fastLPT.dir/CXX.includecache
 src/CMakeFiles/fastLPT.dir/DependInfo.cmake
 src/CMakeFiles/fastLPT.dir/plugins/transfer_eisenstein.cc.o
 src/CMakeFiles/3.12.2/CompilerIdCXX/a.out
 src/CMakeFiles/fastLPT.dir/cmake_clean.cmake
 src/CMakeFiles/fastLPT.dir/depend.internal
 src/CMakeFiles/fastLPT.dir/depend.make
 src/CMakeFiles/fastLPT.dir/flags.make
 src/CMakeFiles/fastLPT.dir/grid_fft.cc.o
 src/CMakeFiles/fastLPT.dir/link.txt
 src/CMakeFiles/fastLPT.dir/logger.cc.o
 src/CMakeFiles/fastLPT.dir/main.cc.o
 src/CMakeFiles/fastLPT.dir/progress.make
 src/CMakeFiles/fastLPT.dir/random_plugin.cc.o
 src/CMakeFiles/fastLPT.dir/transfer_function_plugin.cc.o
 src/CMakeFiles/fastLPT.dir/plugins/random_music.cc.o
 src/CMakeFiles/fastLPT.dir/plugins/random_music_wnoise_generator.cc.o
 src/CMakeFiles/feature_tests.bin
 src/CMakeFiles/CMakeDirectoryInformation.cmake
 src/CMakeFiles/CMakeOutput.log
 src/CMakeFiles/Makefile.cmake
 src/CMakeFiles/Makefile2
 src/CMakeFiles/TargetDirectories.txt
 src/CMakeFiles/cmake.check_cache
 src/CMakeFiles/3.12.2/CompilerIdC/a.out
 src/CMakeFiles/3.12.2/CompilerIdCXX/CMakeCXXCompilerId.cpp
 src/CMakeFiles/hdf5/cmake_hdf5_test.c
 src/fastLPT.dSYM/Contents/Info.plist
 src/fastLPT.dSYM/Contents/Resources/DWARF/fastLPT
 src/cmake_install.cmake
 src/CMakeCache.txt
 src/fastLPT
 src/input_powerspec.txt
 src/Makefile
-.DS_Store
+external/panphasia/rand_base.mod
 external/panphasia/rand_int.mod
 external/panphasia/rand.mod
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,16 +1,42 @@
 cmake_minimum_required(VERSION 3.9)
 set(PRGNAME monofonIC)
 project(monofonIC)
 project(monofonIC C CXX)
 #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
 set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -Wall -pedantic" CACHE STRING "Flags used by the compiler during Release builds." FORCE)
 set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -march=native -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE)
 set(CMAKE_CXX_FLAGS_DEBUG "-g -O1 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE)
 set(CMAKE_CXX_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address " CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE)
 set(CMAKE_CXX_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE)
 set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "Flags used by the compiler during Release builds." FORCE)
 set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE)
 set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING "Flags used by the compiler during Debug builds." FORCE)
 set(CMAKE_C_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUGSANADD}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE)
 set(CMAKE_C_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUGSANUNDEF}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE)
 set(default_build_type "Release")
 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
  message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
  set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
      STRING "Choose the type of build." FORCE)
  # Set the possible values of build type for cmake-gui
  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
    "Debug" "Release" "RelWithDebInfo" "DebugSanAdd" "DebugSanUndef")
 endif()
 mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF)
 mark_as_advanced(CMAKE_C_FLAGS_DEBUGSANADD CMAKE_C_FLAGS_DEBUGSANUNDEF)
 mark_as_advanced(CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT)
 ########################################################################################################################
 # include class submodule
 include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake)
 # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic")
 find_package(PkgConfig REQUIRED)
-set(CMAKE_MODULE_PATH
+set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
        "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
 ########################################################################################################################
@ -48,21 +74,70 @@ if(ENABLE_MPI)
  endif(MPI_CXX_FOUND)
 endif(ENABLE_MPI)
 ########################################################################################################################
 # floating point precision
 set (
  CODE_PRECISION "DOUBLE"
  CACHE STRING "Floating point type used for internal computations and FFTs"
 )
 set_property (
  CACHE CODE_PRECISION
  PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE
 )
 ########################################################################################################################
 # convolver type, right now only orszag or naive
 set (
  CONVOLVER_TYPE "ORSZAG"
  CACHE STRING "Convolution algorithm to be used (Naive=no dealiasing, Orszag=dealiased)"
 )
 set_property (
  CACHE CONVOLVER_TYPE
  PROPERTY STRINGS ORSZAG NAIVE
 )
 ########################################################################################################################
 # PLT options, right now only on/off
 option(ENABLE_PLT "Enable PLT (particle linear theory) corrections" OFF)
 ########################################################################################################################
 # FFTW
-cmake_policy(SET CMP0074 NEW)
+if(POLICY CMP0074)
    cmake_policy(SET CMP0074 NEW)
 endif()
 if(ENABLE_MPI)
-  find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI)
+  find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS MPI)
 else()
-  find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS)
+  find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS)
 endif(ENABLE_MPI)
 mark_as_advanced(FFTW3_SINGLE_MPI_LIBRARY FFTW3_SINGLE_OPENMP_LIBRARY FFTW3_SINGLE_SERIAL_LIBRARY FFTW3_SINGLE_THREADS_LIBRARY)
 mark_as_advanced(FFTW3_DOUBLE_MPI_LIBRARY FFTW3_DOUBLE_OPENMP_LIBRARY FFTW3_DOUBLE_SERIAL_LIBRARY FFTW3_DOUBLE_THREADS_LIBRARY)
 mark_as_advanced(FFTW3_LONGDOUBLE_MPI_LIBRARY FFTW3_LONGDOUBLE_OPENMP_LIBRARY FFTW3_LONGDOUBLE_SERIAL_LIBRARY FFTW3_LONGDOUBLE_THREADS_LIBRARY)
 mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_MPI_INCLUDE_DIR)
 mark_as_advanced(pkgcfg_lib_PC_FFTW_fftw3)
 ########################################################################################################################
 # GSL
 find_package(GSL REQUIRED)
 mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m)
 ########################################################################################################################
 # HDF5
 find_package(HDF5 REQUIRED)
 mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz)
 ########################################################################################################################
 # PANPHASIA
 option(ENABLE_PANPHASIA "Enable PANPHASIA random number generator" ON)
 if(ENABLE_PANPHASIA)
 enable_language(Fortran)
 if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Intel")
  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -132 -implicit-none")
 elseif("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU")
  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffixed-line-length-132 -fimplicit-none")
 endif()
 endif(ENABLE_PANPHASIA)
 ########################################################################################################################
 # INCLUDES
 include_directories(${PROJECT_SOURCE_DIR}/include)
@ -81,28 +156,68 @@ file( GLOB PLUGINS
  ${PROJECT_SOURCE_DIR}/src/plugins/*.cc
 )
 if(ENABLE_PANPHASIA)
 list (APPEND SOURCES 
  ${PROJECT_SOURCE_DIR}/external/panphasia/panphasia_routines.f
  ${PROJECT_SOURCE_DIR}/external/panphasia/generic_lecuyer.f90
 )
 endif()
 # project configuration header
 configure_file(
  ${PROJECT_SOURCE_DIR}/include/cmake_config.hh.in
  ${PROJECT_SOURCE_DIR}/include/cmake_config.hh
 )
 add_executable(${PRGNAME} ${SOURCES} ${PLUGINS})
 target_setup_class(${PRGNAME})
-set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 17)
+set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14)
 # mpi flags
 if(MPI_CXX_FOUND)
  if(CODE_PRECISION STREQUAL "FLOAT")
    if(FFTW3_SINGLE_MPI_FOUND)
      target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_MPI_LIBRARY})
      target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
      target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
    else()
      message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for single precision!")
    endif()
  elseif(CODE_PRECISION STREQUAL "DOUBLE")
    if(FFTW3_DOUBLE_MPI_FOUND)
      target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY})
      target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
      target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
-  endif(FFTW3_DOUBLE_MPI_FOUND)
+    else()
      message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for double precision!")
    endif()
  elseif(CODE_PRECISION STREQUAL "LONGDOUBLE")
    if(FFTW3_LONGDOUBLE_MPI_FOUND)
      target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_MPI_LIBRARY})
      target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
      target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
    else()
      message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for long double precision!")
    endif()
  endif()
  target_include_directories(${PRGNAME} PRIVATE ${MPI_CXX_INCLUDE_PATH})
  target_compile_options(${PRGNAME} PRIVATE "-DUSE_MPI")
  target_link_libraries(${PRGNAME} ${MPI_LIBRARIES})
 endif(MPI_CXX_FOUND)
-if(FFTW3_DOUBLE_THREADS_FOUND) 
+if(CODE_PRECISION STREQUAL "FLOAT" AND FFTW3_SINGLE_THREADS_FOUND) 
  target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_THREADS_LIBRARY})
  target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
 elseif(CODE_PRECISION STREQUAL "DOUBLE" AND FFTW3_DOUBLE_THREADS_FOUND) 
  target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_THREADS_LIBRARY})
  target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
-endif(FFTW3_DOUBLE_THREADS_FOUND)
+elseif(CODE_PRECISION STREQUAL "LONGDOUBLE" AND FFTW3_LONGDOUBLE_THREADS_FOUND) 
  target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_THREADS_LIBRARY})
  target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
 endif()
 if(HDF5_FOUND)
  # target_link_libraries(${PRGNAME} ${HDF5_C_LIBRARY_DIRS})
@ -111,6 +226,10 @@ if(HDF5_FOUND)
  target_compile_options(${PRGNAME} PRIVATE "-DUSE_HDF5")
 endif(HDF5_FOUND)
 if(ENABLE_PANPHASIA)
 target_compile_options(${PRGNAME} PRIVATE "-DUSE_PANPHASIA")
 endif(ENABLE_PANPHASIA)
 target_link_libraries(${PRGNAME} ${FFTW3_LIBRARIES})
 target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIRS})
--- a/README.md
+++ b/README.md
@ -5,7 +5,7 @@ High order LPT/QPT tool for single resolution simulations
 ## Build Instructions
 Clone code including submodules (currently only CLASS is used as a submodule):
-    git clone --recurse-submodules https://ohahn@bitbucket.org/ohahn/monofonic.git
+    git clone --recurse-submodules https://<username>@bitbucket.org/ohahn/monofonic.git
 Create build directory, configure, and build:
@ -17,4 +17,30 @@ Create build directory, configure, and build:
    make
 this should create an executable in the build directory. 
-There is an example parameter file 'example.conf' in the main directory
+
 If you run into problems with CMake not being able to find your local FFTW3 or HDF5 installation, it is best to give the path directly as
    FFTW3_ROOT=<path> HDF5_ROOT=<path> ccmake ..
 make sure to delete previous files generated by CMake before reconfiguring like this.
 If you want to build on macOS, then it is strongly recommended to use GNU (or Intel) compilers instead of Apple's Clang. Install them e.g. 
 via homebrew and then configure cmake to use them instead of the macOS default compiler via
    CC=gcc-9 CXX=g++-9 ccmake ..
 This is necessary since Apple's compilers haven't supported OpenMP for years.
 ## Running
 There is an example parameter file 'example.conf' in the main directory. Possible options are explained in it, it can be run
 as a simple argument, e.g. from within the build directory:
     ./monofonic ../example.conf
 If you want to run with MPI, you need to enable MPI support via ccmake. Then you can launch in hybrid MPI+threads mode by 
 specifying the desired number of threads per task in the config file, and the number of tasks to be launched via
     mpirun -np 16 ./monofonic <path to config file>
 It will then run with 16 tasks times the number of threads per task specified in the config file.
--- a/example.conf
+++ b/example.conf
@ -2,48 +2,27 @@
 # number of grid cells per linear dimension for calculations = particles for sc initial load
 GridRes         = 128
 # length of the box in Mpc/h
-BoxLength    = 250
+BoxLength       = 125
 # starting redshift
 zstart          = 49.0
 # order of the LPT to be used (1,2 or 3)
-LPTorder     = 3
+LPTorder        = 1
 # also do baryon ICs?
 DoBaryons       = no
 # do mode fixing à la Angulo&Pontzen
-DoFixing     = no
+DoFixing        = yes
 # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
 ParticleLoad    = sc
-
+# Add a possible constraint field here:
-[testing]
+#ConstraintFieldFile = initial_conditions.h5
-# enables diagnostic output
+#ConstraintFieldName = ic_white_noise
 # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
 test = convergence
 [execution]
 NumThreads   = 4
 [output]
 fname_hdf5   = output_sch.hdf5
 fbase_analysis = output
 format       = gadget2
 filename     = ics_gadget.dat
 #format       = generic
 #filename     = debug.hdf5
 #generic_out_eulerian = yes
 #format	       = grafic2
 #filename       = ics_ramses
 #grafic_use_SPT = yes
 [random]
 generator    = NGENIC
 seed         = 9001
 [cosmology]
-#transfer     = CLASS 
+transfer        = CLASS
-transfer     = eisenstein
+ztarget         = 2.5
 # transfer        = eisenstein
 # transfer        = file_CAMB
 # transfer_file   = wmap5_transfer_out_z0.dat
 Omega_m         = 0.302
 Omega_b         = 0.045
 Omega_L         = 0.698
@ -52,7 +31,41 @@ sigma_8      = 0.811
 nspec           = 0.961
 # anisotropic large scale tidal field
-#LSS_aniso_lx = 0.1
+# LSS_aniso_lx    = +0.1
-#LSS_aniso_ly = 0.1
+# LSS_aniso_ly    = +0.1
-#LSS_aniso_lz = -0.2
+# LSS_aniso_lz    = -0.2
 [random]
 generator       = NGENIC
 seed            = 9001
 [testing]
 # enables diagnostic output
 # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
 test            = none
 [execution]
 NumThreads      = 8
 [output]
 fname_hdf5      = output_sch.hdf5
 fbase_analysis  = output
 # format          = gadget2
 # filename        = ics_gadget.dat
 # UseLongids      = false
 format          = gadget_hdf5
 filename        = ics_gadget.hdf5
 # format          = AREPO
 # filename        = ics_arepo.hdf5
 # format          = generic
 # filename        = debug.hdf5
 # generic_out_eulerian = yes
 # format	        = grafic2
 # filename        = ics_ramses
 # grafic_use_SPT  = yes
--- a/example_testing.conf
+++ b/example_testing.conf
@ -0,0 +1,33 @@
 [setup]
 GridRes      = 256
 BoxLength    = 6.28318530718
 zstart       = 0.0
 LPTorder     = 1
 SymplecticPT = no
 DoFixing     = no
 [execution]
 NumThreads   = 4
 [output]
 fname_hdf5   = output.hdf5
 fbase_analysis = output
 #format       = gadget2
 #filename     = ics_gadget.dat
 format	     = generic
 filename     = debug.hdf5
 generic_out_eulerian = yes
 [random]
 generator    = NGENIC
 seed         = 9001
 [cosmology]
 #transfer     = CLASS 
 transfer     = eisenstein
 Omega_m      = 1.0
 Omega_b      = 0.045
 Omega_L      = 0.0
 H0           = 70.3
 sigma_8      = 0.811
 nspec        = 0.961
--- a/external/class
+++ b/external/class
@ -1 +1 @@
-Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b
+Subproject commit 6adecae2f30172a94e003155090791abf509d995
--- a/external/class.cmake
+++ b/external/class.cmake
@ -32,6 +32,7 @@ if(ENABLE_CLASS)
      ${CMAKE_CURRENT_LIST_DIR}/class/build/history.o
      ${CMAKE_CURRENT_LIST_DIR}/class/build/hydrogen.o
      ${CMAKE_CURRENT_LIST_DIR}/class/build/hyperspherical.o
      ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.o
      ${CMAKE_CURRENT_LIST_DIR}/class/build/hyrectools.o
      ${CMAKE_CURRENT_LIST_DIR}/class/build/input.o
      ${CMAKE_CURRENT_LIST_DIR}/class/build/lensing.o
@ -78,6 +79,7 @@ if(ENABLE_CLASS)
      ${CMAKE_CURRENT_LIST_DIR}/class/tools/parser.c
      ${CMAKE_CURRENT_LIST_DIR}/class/tools/quadrature.c
      ${CMAKE_CURRENT_LIST_DIR}/class/tools/hyperspherical.c
      ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.c
      ${CMAKE_CURRENT_LIST_DIR}/class/tools/common.c
      ${CMAKE_CURRENT_LIST_DIR}/class/source/input.c
      ${CMAKE_CURRENT_LIST_DIR}/class/source/background.c
@ -131,9 +133,9 @@ macro(target_setup_class target_name)
  endif(ENABLE_CLASS)
 endmacro(target_setup_class)
-if(ENABLE_CLASS)
+# if(ENABLE_CLASS)
-  # test executable
+#   # test executable
-  add_executable(testTk
+#   add_executable(testTk
-    ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc)
+#     ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc)
-  target_setup_class(testTk)
+#   target_setup_class(testTk)
-endif(ENABLE_CLASS)
+# endif(ENABLE_CLASS)
--- a/external/fftwpp
+++ b/external/fftwpp
@ -0,0 +1 @@
 Subproject commit ec6b82cc1122ba029a7a7142cf836014e992e68c
--- a/external/panphasia/generic_lecuyer.f90
+++ b/external/panphasia/generic_lecuyer.f90
@ -0,0 +1,683 @@
 !=====================================================================================c
 !        
 ! The code below was written by: Stephen Booth
 !                                Edinburgh Parallel Computing Centre
 !                                The University of Edinburgh
 !                                JCMB
 !                                Mayfield Road
 !                                Edinburgh EH9 3JZ
 !                                United Kingdom
 !
 ! This file is part of the software made public in
 ! Jenkins and Booth 2013  - arXiv:1306.XXXX
 !
 ! The software computes the Panphasia Gaussian white noise field
 ! realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX
 ! 
 !
 !
 ! This software is free, subject to a agreeing licence conditions:
 !
 !
 ! (i)  you will publish the phase descriptors and reference Jenkins (13) 
 !      for any new simulations that use Panphasia phases. You will pass on this 
 !      condition to others for any software or data you make available publically 
 !      or privately that makes use of Panphasia. 
 !
 ! (ii) that you will ensure any publications using results derived from Panphasia 
 !      will be submitted as a final version to arXiv prior to or coincident with
 !      publication in a journal. 
 !
 !
 ! (iii) that you report any bugs in this software as soon as confirmed to 
 !       A.R.Jenkins@durham.ac.uk 
 !
 ! (iv)  that you understand that this software comes with no warranty and that is 
 !       your responsibility to ensure that it is suitable for the purpose that 
 !       you intend. 
 !
 !=====================================================================================c
 !{{{Rand_base (define kind types) 
 MODULE Rand_base
 ! This module just declares the base types 
 ! we may have to edit this to match to the target machine
 ! we really need a power of 2 selected int kind in fortran-95 we could
 ! do this with a PURE function I think.
 !
 ! 10 decimal digits will hold 2^31
 !
   INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(9)
 !  INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(10)
 !  INTEGER, PARAMETER :: Sint = 4
 !
 ! 18-19 decimal digits will hold 2^63
 ! but all 19 digit numbers require 2^65 :-(
 !
   INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(17)
 !  INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(18)
 !  INTEGER, PARAMETER :: Dint = 8
 ! type for index counters must hold Nstore
  INTEGER, PARAMETER :: Ctype = SELECTED_INT_KIND(3)
 END MODULE Rand_base
 !}}}
 !{{{Rand_int (random integers mod 2^31-1) 
 MODULE Rand_int
  USE Rand_base
  IMPLICIT NONE
 ! The general approach of this module is two have
 ! two types Sint and Dint 
 ! 
 ! Sint should have at least 31 bits
 ! dint shouldhave at least 63
 !{{{constants
  INTEGER(KIND=Ctype), PARAMETER :: Nstate=5_Ctype
  INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nbatch=128_Ctype
  INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nstore=Nstate+Nbatch
  INTEGER(KIND=Sint), PRIVATE, PARAMETER  :: M = 2147483647_Sint
  INTEGER(KIND=Dint), PRIVATE, PARAMETER  :: Mask = 2147483647_Dint
  INTEGER(KIND=Dint), PRIVATE, PARAMETER  :: A1 = 107374182_Dint
  INTEGER(KIND=Dint), PRIVATE, PARAMETER  :: A5 = 104480_Dint
  LOGICAL, PARAMETER :: Can_step_int=.TRUE.
  LOGICAL, PARAMETER :: Can_reverse_int=.TRUE.
 !}}}
 !{{{Types
 !
 ! This type holds the state of the generator
 !
 !{{{TYPE RAND_state
 TYPE RAND_state
  PRIVATE
  INTEGER(KIND=Sint) :: state(Nstore) 
 ! do we need to re-fill state table this is reset when we initialise state.
  LOGICAL :: need_fill 
 ! position of the next state variable to output
  INTEGER(KIND=Ctype) :: pos
 END TYPE RAND_state
 !}}}
 !
 ! This type defines the offset type used for stepping.
 !
 !{{{TYPE RAND_offset
 TYPE RAND_offset
  PRIVATE
  INTEGER(KIND=Sint) :: poly(Nstate)
 END TYPE RAND_offset
 !}}}
 !}}}
 !{{{interface and overloads
 !
 ! Allow automatic conversion between integers and offsets
 !
 INTERFACE ASSIGNMENT(=)
  MODULE PROCEDURE Rand_set_offset
  MODULE PROCEDURE Rand_load
  MODULE PROCEDURE Rand_save
  MODULE PROCEDURE Rand_seed
 END INTERFACE
 INTERFACE OPERATOR(+)
  MODULE PROCEDURE Rand_add_offset
 END INTERFACE
 INTERFACE OPERATOR(*)
  MODULE PROCEDURE Rand_mul_offset
 END INTERFACE
 !
 ! overload + as the boost/stepping operator
 !
 INTERFACE OPERATOR(+)
  MODULE PROCEDURE Rand_step
  MODULE PROCEDURE Rand_boost
 END INTERFACE
 !}}}
 !{{{PUBLIC/PRIVATE 
  PRIVATE reduce,mod_saxpy,mod_sdot,p_saxpy,p_sdot,poly_mult
  PRIVATE poly_square, poly_power
  PRIVATE fill_state, repack_state
  PUBLIC Rand_sint, Rand_sint_vec
  PUBLIC Rand_save, Rand_load
  PUBLIC Rand_set_offset, Rand_add_offset, Rand_mul_offset
  PUBLIC Rand_step, Rand_boost, Rand_seed
 !}}}
 CONTAINS
  !{{{Internals
  !{{{RECURSIVE FUNCTION reduce(A)
  RECURSIVE FUNCTION reduce(A)
  !
  ! Take A Dint and reduce to Sint MOD M
  !
   INTEGER(KIND=Dint), INTENT(IN) :: A
   INTEGER(KIND=Sint) reduce
   INTEGER(KIND=Dint) tmp
    tmp = A  
    DO WHILE( ISHFT(tmp, -31) .GT. 0 )
      tmp = IAND(tmp,Mask) + ISHFT(tmp, -31)
    END DO
    IF( tmp .GE. M ) THEN
      reduce = tmp - M
    ELSE
      reduce = tmp
    END IF
  END FUNCTION reduce
  !}}}
  !{{{RECURSIVE SUBROUTINE fill_state(x)
  RECURSIVE SUBROUTINE fill_state(x)
  TYPE(RAND_state), INTENT(INOUT) ::  x
  INTEGER(KIND=Ctype) i
  INTRINSIC IAND, ISHFT
  INTEGER(KIND=Dint)  tmp
    DO i=Nstate+1,Nstore
      tmp = (x%state(i-5) * A5) + (x%state(i-1)*A1)
      !
      ! now reduce down to mod M efficiently
      ! really hope the compiler in-lines this
      !
      ! x%state(i) = reduce(tmp)
      DO WHILE( ISHFT(tmp, -31) .GT. 0 )
        tmp = IAND(tmp,Mask) + ISHFT(tmp, -31)
      END DO
      IF( tmp .GE. M ) THEN
        x%state(i) = tmp - M
      ELSE
        x%state(i) = tmp
      END IF
    END DO
    x%need_fill = .FALSE.
  END SUBROUTINE fill_state
  !}}}
  !{{{RECURSIVE SUBROUTINE repack_state(x)
  RECURSIVE SUBROUTINE repack_state(x)
  TYPE(RAND_state), INTENT(INOUT) ::  x
  INTEGER(KIND=Ctype) i
    DO i=1,Nstate
      x%state(i) = x%state(i+x%pos-(Nstate+1))
    END DO
    x%pos = Nstate + 1
    x%need_fill = .TRUE.  
  END SUBROUTINE repack_state
  !}}}
  !{{{RECURSIVE SUBROUTINE mod_saxpy(y,a,x)
  RECURSIVE SUBROUTINE mod_saxpy(y,a,x)
   INTEGER(KIND=Ctype) i
   INTEGER(KIND=Sint) y(Nstate)
   INTEGER(KIND=Sint) a
   INTEGER(KIND=Sint) x(Nstate)
   INTEGER(KIND=Dint) tx,ty,ta
     IF( a .EQ. 0_Sint ) RETURN
     ! We use KIND=Dint temporaries here to ensure
     ! that we don't overflow in the expression
     ta = a
     DO i=1,Nstate
       ty=y(i)
       tx=x(i)
       y(i) = reduce(ty + ta * tx)
     END DO
  END SUBROUTINE 
  !}}}
  !{{{RECURSIVE SUBROUTINE mod_sdot(res,x,y)
  RECURSIVE SUBROUTINE mod_sdot(res,x,y)
  INTEGER(KIND=Sint), INTENT(OUT) :: res
  INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate) , y(Nstate)
  INTEGER(KIND=Dint) dx, dy, dtmp
  INTEGER(KIND=Sint) tmp
  INTEGER(KIND=Ctype) i
    tmp = 0
    DO i=1,Nstate
     dx = x(i)
     dy = y(i)
     dtmp = tmp
     tmp = reduce(dtmp + dx * dy)
    END DO
    res = tmp
  END SUBROUTINE
  !}}}
  !{{{RECURSIVE SUBROUTINE p_saxpy(y,a)
  RECURSIVE SUBROUTINE p_saxpy(y,a)
   ! Calculates mod_saxpy(y,a,P)
   INTEGER(KIND=Sint), INTENT(INOUT) :: y(Nstate)
   INTEGER(KIND=Sint), INTENT(IN) :: a
   INTEGER(KIND=Dint) tmp, dy, da
     dy = y(1)
     da = a
     tmp = dy + da*A5
     y(1) = reduce(tmp)
     dy = y(5)
     da = a
     tmp = dy + da*A1
     y(5) = reduce(tmp)
  END SUBROUTINE
  !}}}
  !{{{RECURSIVE SUBROUTINE p_sdot(res,n,x)
  RECURSIVE SUBROUTINE p_sdot(res,x)
  INTEGER(KIND=Sint), INTENT(OUT) :: res
  INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate)
  INTEGER(KIND=Dint) dx1, dx5, dtmp
    dx1 = x(1)
    dx5 = x(5)
    dtmp = A1*dx5 + A5*dx1
    res = reduce(dtmp)
  END SUBROUTINE
  !}}}
  !{{{RECURSIVE SUBROUTINE poly_mult(a,b)
  RECURSIVE SUBROUTINE poly_mult(a,b)
    INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate)
    INTEGER(KIND=Sint), INTENT(IN) :: b(Nstate)
    INTEGER(KIND=Sint) tmp((2*Nstate) - 1)
    INTEGER(KIND=Ctype) i
    tmp = 0_Sint
    DO i=1,Nstate
      CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), b)
    END DO
    DO i=(2*Nstate)-1, Nstate+1, -1
      CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i))
    END DO
    a = tmp(1:Nstate)
  END SUBROUTINE
  !}}}
  !{{{RECURSIVE SUBROUTINE poly_square(a)
  RECURSIVE SUBROUTINE poly_square(a)
    INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate)
    INTEGER(KIND=Sint) tmp((2*Nstate) - 1)
    INTEGER(KIND=Ctype) i
    tmp = 0_Sint
    DO i=1,Nstate
      CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), a)
    END DO
    DO i=(2*Nstate)-1, Nstate+1, -1
      CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i))
    END DO
    a = tmp(1:Nstate)
  END SUBROUTINE
  !}}}
  !{{{RECURSIVE SUBROUTINE poly_power(poly,n)
  RECURSIVE SUBROUTINE poly_power(poly,n)
   INTEGER(KIND=Sint), INTENT(INOUT) :: poly(Nstate)
   INTEGER, INTENT(IN) :: n
   INTEGER nn
   INTEGER(KIND=Sint) x(Nstate), out(Nstate)
   IF( n .EQ. 0 )THEN
     poly = 0_Sint
     poly(1) = 1_Sint
     RETURN
   ELSE IF( n .LT. 0 )THEN
     poly = 0_Sint
     RETURN
   END IF
   out = 0_sint
   out(1) = 1_Sint
   x = poly
   nn = n
   DO WHILE( nn .GT. 0 )
     IF( MOD(nn,2) .EQ. 1 )THEN
       call poly_mult(out,x)
     END IF
     nn = nn/2
     IF( nn .GT. 0 )THEN
       call poly_square(x)
     END IF
   END DO 
   poly = out
  END SUBROUTINE poly_power
  !}}}
  !}}}
  !{{{RECURSIVE SUBROUTINE  Rand_seed( state, n )
  RECURSIVE SUBROUTINE  Rand_seed( state, n )
    TYPE(Rand_state), INTENT(OUT) :: state
    INTEGER, INTENT(IN) :: n
    ! initialise the genrator using a single integer
    ! fist initialise to an arbitrary state then boost by a multiple 
    ! of a long distance
    !
    ! state is moved forward by P^n steps
    ! we want this to be ok for seperating parallel sequences on MPP machines
    ! P is taken as a prime number as this should prevent strong correlations
    ! when the generators are operated in tight lockstep.
    ! equivalent points on different processors will also be related by a
    ! primative polynomial
    ! P is 2^48-59
    TYPE(Rand_state) tmp
    TYPE(Rand_offset), PARAMETER ::  P = &
         Rand_offset( (/ 1509238949_Sint ,2146167999_Sint ,1539340803_Sint , &
                     1041407428_Sint ,666274987_Sint /) )
    CALL Rand_load( tmp, (/ 5, 4, 3, 2, 1 /) )
    state = Rand_boost( tmp, Rand_mul_offset(P, n ))
  END SUBROUTINE Rand_seed
  !}}}
  !{{{RECURSIVE SUBROUTINE Rand_load( state, input )
  RECURSIVE SUBROUTINE Rand_load( state, input )
  TYPE(RAND_state), INTENT(OUT) :: state
  INTEGER, INTENT(IN) :: input(Nstate)
  INTEGER(KIND=Ctype) i
    state%state = 0_Sint
    DO i=1,Nstate
      state%state(i) = MOD(INT(input(i),KIND=Sint),M)
    END DO
    state%need_fill = .TRUE.
    state%pos = Nstate + 1
  END SUBROUTINE Rand_load
  !}}}
  !{{{RECURSIVE SUBROUTINE Rand_save( save_vec,state )
  RECURSIVE SUBROUTINE Rand_save( save_vec, x ) 
  INTEGER, INTENT(OUT) ::  save_vec(Nstate)
  TYPE(RAND_state), INTENT(IN) ::  x
  INTEGER(KIND=Ctype) i
    DO i=1,Nstate
      save_vec(i) = x%state(x%pos-(Nstate+1) + i)
    END DO
  END SUBROUTINE Rand_save
  !}}}
  !{{{RECURSIVE SUBROUTINE Rand_set_offset( offset, n )
  RECURSIVE SUBROUTINE Rand_set_offset( offset, n )
  TYPE(Rand_offset), INTENT(OUT) :: offset
  INTEGER, INTENT(IN) :: n
    offset%poly = 0_Sint
    IF ( n .GE. 0 ) THEN
      offset%poly(2) = 1_Sint
      call poly_power(offset%poly,n)
    ELSE
      !
      ! This is X^-1 
      !
      offset%poly(4) = 858869107_Sint
      offset%poly(5) = 1840344978_Sint    
      call poly_power(offset%poly,-n)
    END IF
  END SUBROUTINE Rand_set_offset
  !}}}
  !{{{TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b )
  TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b )
  TYPE(Rand_offset), INTENT(IN) :: a, b
    Rand_add_offset = a
    CALL poly_mult(Rand_add_offset%poly,b%poly)
    RETURN
  END FUNCTION Rand_add_offset
  !}}}
  !{{{TYPE(Rand_offset) RECURSIVE  FUNCTION Rand_mul_offset( a, n )
  TYPE(Rand_offset) RECURSIVE  FUNCTION Rand_mul_offset( a, n )
  TYPE(Rand_offset), INTENT(IN) :: a
  INTEGER, INTENT(IN) :: n
    Rand_mul_offset = a
    CALL poly_power(Rand_mul_offset%poly,n)
    RETURN
  END FUNCTION Rand_mul_offset
  !}}}
  !{{{RECURSIVE FUNCTION Rand_boost(x, offset)
  RECURSIVE FUNCTION Rand_boost(x, offset)
  TYPE(Rand_state) Rand_boost
  TYPE(Rand_state), INTENT(IN) ::  x
  TYPE(Rand_offset), INTENT(IN) :: offset
  INTEGER(KIND=Sint) tmp(2*Nstate-1), res(Nstate)
  INTEGER(KIND=Ctype) i
    DO i=1,Nstate
      tmp(i) = x%state(x%pos-(Nstate+1) + i)
    END DO
    tmp(Nstate+1:) = 0_Sint
    DO i=1,Nstate-1
      call P_SDOT(tmp(i+Nstate),tmp(i:Nstate+i-1))
    END DO
    DO i=1,Nstate
      call mod_sdot(res(i),offset%poly,tmp(i:Nstate+i-1))
    END DO
    Rand_boost%state = 0_Sint
    DO i=1,Nstate
      Rand_boost%state(i) = res(i)
    END DO
    Rand_boost%need_fill = .TRUE.
    Rand_boost%pos = Nstate + 1
  END FUNCTION Rand_boost
  !}}}
  !{{{RECURSIVE FUNCTION Rand_step(x, n)
  RECURSIVE FUNCTION Rand_step(x, n)
  TYPE(Rand_state) Rand_step
  TYPE(RAND_state), INTENT(IN) ::  x
  INTEGER, INTENT(IN) :: n
  TYPE(Rand_offset) tmp
    CALL Rand_set_offset(tmp,n)
    Rand_step=Rand_boost(x,tmp)
  END FUNCTION
  !}}}
  !{{{RECURSIVE FUNCTION Rand_sint(x)
  RECURSIVE FUNCTION Rand_sint(x)
    TYPE(RAND_state), INTENT(INOUT) :: x
    INTEGER(KIND=Sint)  Rand_sint
    IF( x%pos .GT. Nstore )THEN
      CALL repack_state(x)
    END IF
    IF( x%need_fill ) CALL fill_state(x)
    Rand_sint = x%state(x%pos)
    x%pos = x%pos + 1
    RETURN
  END FUNCTION Rand_sint
  !}}}
  !{{{RECURSIVE SUBROUTINE Rand_sint_vec(iv,x)
  RECURSIVE SUBROUTINE Rand_sint_vec(iv,x)
    INTEGER(KIND=Sint), INTENT(OUT)  :: iv(:)
    TYPE(RAND_state), INTENT(INOUT)  ::  x
    INTEGER left,start, chunk, i
    start=1
    left=SIZE(iv)
    DO WHILE( left .GT. 0 )
      IF( x%pos .GT. Nstore )THEN
        CALL repack_state(x)
      END IF
      IF( x%need_fill ) CALL fill_state(x)
      chunk = MIN(left,Nstore-x%pos+1)
      DO i=0,chunk-1
        iv(start+i) = x%state(x%pos+i)
      END DO
      start = start + chunk
      x%pos = x%pos + chunk
      left = left - chunk
    END DO
    RETURN
  END SUBROUTINE Rand_sint_vec
  !}}}
 END MODULE Rand_int
 !}}}
 !{{{Rand (use Rand_int to make random reals)
 MODULE Rand
  USE Rand_int
  IMPLICIT NONE
 !{{{Parameters
  INTEGER, PARAMETER :: RAND_kind1 = SELECTED_REAL_KIND(10)
  INTEGER, PARAMETER :: RAND_kind2 = SELECTED_REAL_KIND(6)
  INTEGER, PARAMETER, PRIVATE :: Max_block=100
  INTEGER(KIND=Sint), PRIVATE, PARAMETER  :: M = 2147483647
  REAL(KIND=RAND_kind1), PRIVATE, PARAMETER :: INVMP1_1 = ( 1.0_RAND_kind1 / 2147483647.0_RAND_kind1 )
  REAL(KIND=RAND_kind2), PRIVATE, PARAMETER :: INVMP1_2 = ( 1.0_RAND_kind2 / 2147483647.0_RAND_kind2 )
  LOGICAL, PARAMETER :: Can_step = Can_step_int
  LOGICAL, PARAMETER :: Can_reverse = Can_reverse_int
 !}}}
  PUBLIC Rand_real
 INTERFACE Rand_real
  MODULE PROCEDURE Rand_real1
  MODULE PROCEDURE Rand_real2
  MODULE PROCEDURE Rand_real_vec1
  MODULE PROCEDURE Rand_real_vec2
 END INTERFACE
 CONTAINS
  !{{{RECURSIVE SUBROUTINE Rand_real1(y,x)
  RECURSIVE SUBROUTINE Rand_real1(y,x)
    REAL(KIND=RAND_kind1), INTENT(OUT) :: y
    TYPE(RAND_state), INTENT(INOUT) ::  x
    INTEGER(KIND=Sint) Z
    Z = Rand_sint(x)
    IF (Z .EQ. 0) Z = M
    y = ((Z-0.5d0)*INVMP1_1)
    RETURN
  END SUBROUTINE Rand_real1
  !}}}
  !{{{RECURSIVE SUBROUTINE Rand_real2(y,x)
  RECURSIVE SUBROUTINE Rand_real2(y,x)
    REAL(KIND=RAND_kind2), INTENT(OUT) :: y
    TYPE(RAND_state), INTENT(INOUT) ::  x
    INTEGER(KIND=Sint) Z
    Z = Rand_sint(x)
    IF (Z .EQ. 0) Z = M
    y = ((Z-0.5d0)*INVMP1_1)  ! generate in double and truncate.
    RETURN
  END SUBROUTINE Rand_real2
  !}}}
  !{{{RECURSIVE SUBROUTINE Rand_real_vec1(rv,x)
  RECURSIVE SUBROUTINE Rand_real_vec1(rv,x)
    TYPE(RAND_state), INTENT(INOUT) ::  x
    REAL(KIND=RAND_kind1)  rv(:)
    INTEGER left,start, chunk, i
    INTEGER(KIND=Sint) Z
    INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block))
    start=0
    left=SIZE(rv)
    DO WHILE( left .GT. 0 )
      chunk = MIN(left,Max_block)
      CALL Rand_sint_vec(temp(1:chunk),x)
      DO i=1,chunk
       Z = temp(i)
       IF (Z .EQ. 0) Z = M
       rv(start+i) = (Z-0.5d0)*INVMP1_1
      END DO 
      start = start + chunk
      left = left - chunk
    END DO
    RETURN
  END SUBROUTINE Rand_real_vec1
  !}}}
  !{{{RECURSIVE SUBROUTINE Rand_real_vec2(rv,x)
  RECURSIVE SUBROUTINE Rand_real_vec2(rv,x)
    TYPE(RAND_state), INTENT(INOUT) ::  x
    REAL(KIND=RAND_kind2)  rv(:)
    INTEGER left,start, chunk, i
    INTEGER(KIND=Sint) Z
    INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block))
    start=0
    left=SIZE(rv)
    DO WHILE( left .GT. 0 )
      chunk = MIN(left,Max_block)
      CALL Rand_sint_vec(temp(1:chunk),x)
      DO i=1,chunk
       Z = temp(i)
       IF (Z .EQ. 0) Z = M
       rv(start+i) = (Z-0.5d0)*INVMP1_2
      END DO 
      start = start + chunk
      left = left - chunk
    END DO
    RETURN
  END SUBROUTINE Rand_real_vec2
  !}}}
 END MODULE Rand
 !}}}
 !{{{test program
 ! PROGRAM test_random
 ! use Rand
 !     TYPE(RAND_state) x
 !     REAL y
 !      CALL Rand_load(x,(/5,4,3,2,1/)) 
 !      DO I=0,10
 !       CALL Rand_real(y,x)
 !       WRITE(*,10) I,y
 !      END DO
 !
 !10    FORMAT(I10,E25.16)
 !
 !     END
 !         0   0.5024326127022505E-01
 !         1   0.8260946767404675E-01
 !         2   0.2123264316469431E-01
 !         3   0.6926658791489899E+00
 !         4   0.2076155943796039E+00
 !         5   0.4327449947595596E-01
 !         6   0.2204052871093154E-01
 !         7   0.1288446951657534E+00
 !         8   0.4859915426932275E+00
 !         9   0.5721384193748236E-01
 !        10   0.7996825082227588E+00
 !
 !}}}
--- a/external/panphasia/panphasia_routines.f
+++ b/external/panphasia/panphasia_routines.f
--- a/ics.conf
+++ b/ics.conf
@ -0,0 +1,62 @@
 [setup]
 # number of grid cells per linear dimension for calculations = particles for sc initial load
 GridRes      = 128 
 # length of the box in Mpc/h
 BoxLength    = 200
 # starting redshift
 zstart       = 24.0 
 # order of the LPT to be used (1,2 or 3)
 LPTorder     = 1
 # also do baryon ICs?
 DoBaryons    = no
 # do mode fixing à la Angulo&Pontzen
 DoFixing     = yes
 # particle load, can be 'sc' (1x), 'bcc' (2x), 'fcc' (4x), or 'rsc' (8x)
 ParticleLoad = sc
 [testing]
 # enables diagnostic output
 # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
 #test = potentials_and_densities
 #test = convergence
 test = none
 [execution]
 NumThreads   = 1
 [output]
 fname_hdf5   = output.hdf5
 fbase_analysis = output
 #format       = gadget2
 #filename     = ics_gadget.dat
 format       = generic
 filename     = debug.hdf5
 #generic_out_eulerian = yes
 #format	       = grafic2
 #filename       = ics_ramses
 #grafic_use_SPT = yes
 [random]
 generator    = NGENIC
 seed         = 9001
 [cosmology]
 transfer     = eisenstein
 #transfer     = CLASS 
 #transfer     = eisenstein_wdm
 #WDMmass      = 0.1
 Omega_m      = 0.302
 Omega_b      = 0.045
 Omega_L      = 0.698
 H0           = 70.3
 sigma_8      = 0.811
 nspec        = 0.961
 # anisotropic large scale tidal field
 #LSS_aniso_lx = 0.1
 #LSS_aniso_ly = 0.1
 #LSS_aniso_lz = -0.2
--- a/src/plugins/HDF_IO.hh
+++ b/src/plugins/HDF_IO.hh
@ -1,5 +1,5 @@
-#ifndef __HDF_IO_HH
+#pragma once
-#define __HDF_IO_HH
+#if defined(USE_HDF5)
 #define H5_USE_16_API
@ -193,9 +193,9 @@ inline void HDFReadDataset( const std::string Filename, const std::string ObjNam
  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
-  hsize_t dimsize[ndims];
+  std::vector<hsize_t> dimsize(ndims,0);
-  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+  H5Sget_simple_extent_dims( HDF_DataspaceID, &dimsize[0], NULL );
  HDF_StorageSize = 1;
  for(int i=0; i<ndims; ++i )
@ -1082,4 +1082,5 @@ inline void HDFWriteGroupAttribute<std::string>( const std::string Filename, con
  H5Gclose( HDF_GroupID );
  H5Fclose( HDF_FileID );
 }
-#endif
+
 #endif // USE_HDF5
--- a/include/bounding_box.hh
+++ b/include/bounding_box.hh
@ -1,16 +1,16 @@
 #pragma once
-#include <vec3.hh>
+#include <math/vec3.hh>
 template <typename T>
 struct bounding_box
 {
-    vec3<T> x1_, x2_;
+    vec3_t<T> x1_, x2_;
    bounding_box(void)
    { }
-    bounding_box( const vec3<T>& x1, const vec3<T>& x2)
+    bounding_box( const vec3_t<T>& x1, const vec3_t<T>& x2)
    : x1_(x1), x2_(x2)
    { }
--- a/include/cmake_config.hh.in
+++ b/include/cmake_config.hh.in
@ -0,0 +1,34 @@
 #pragma once
 constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}";
 #define USE_PRECISION_${CODE_PRECISION}
 #if defined(USE_PRECISION_FLOAT)
  constexpr char CMAKE_PRECISION_STR[] = "single";
 #elif defined(USE_PRECISION_DOUBLE)
  constexpr char CMAKE_PRECISION_STR[] = "double";
 #elif defined(USE_PRECISION_LONGDOUBLE)
  constexpr char CMAKE_PRECISION_STR[] = "long double";
 #endif 
 #define USE_CONVOLVER_${CONVOLVER_TYPE}
 #if defined(USE_CONVOLVER_ORSZAG)
  constexpr char CMAKE_CONVOLVER_STR[] = "Orszag3/2";
 #elif defined(USE_CONVOLVER_NAIVE)
  constexpr char CMAKE_CONVOLVER_STR[] = "Aliased";
 #endif
 #if defined(ENABLE_PLT)
  constexpr char CMAKE_PLT_STR[] = "PLT corr. on";
 #else
  constexpr char CMAKE_PLT_STR[] = "PLT corr. off";
 #endif
 // These variables are autogenerated and compiled
 // into the library by the version.cmake script. do not touch!
 extern "C"
 {
  extern const char *GIT_TAG;
  extern const char *GIT_REV;
  extern const char *GIT_BRANCH;
 }
--- a/include/config_file.hh
+++ b/include/config_file.hh
@ -12,20 +12,20 @@
 #include <logger.hh>
 /*!
- * @class ConfigFile
+ * @class config_file
 * @brief provides read/write access to configuration options
 *
 * This class provides access to the configuration file. The
 * configuration is stored in hash-pairs and can be queried and
 * validated by the responsible class/routine
 */
-class ConfigFile {
+class config_file {
  //! current line number
-  unsigned m_iLine;
+  unsigned iline_;
  //! hash table for key/value pairs, stored as strings
-  std::map<std::string, std::string> m_Items;
+  std::map<std::string, std::string> items_;
 public:
  //! removes all white space from string source
@ -59,42 +59,42 @@ public:
   * @param oval the interpreted/converted value
   */
  template <class in_value, class out_value>
-  void Convert(const in_value &ival, out_value &oval) const {
+  void convert(const in_value &ival, out_value &oval) const {
    std::stringstream ss;
    ss << ival; //.. insert value into stream
    ss >> oval; //.. retrieve value from stream
    if (!ss.eof()) {
      //.. conversion error
-      csoca::elog << "Error: conversion of \'" << ival << "\' failed."
+      music::elog << "Error: conversion of \'" << ival << "\' failed."
                << std::endl;
-      throw ErrInvalidConversion(std::string("invalid conversion to ") +
+      throw except_invalid_conversion(std::string("invalid conversion to ") +
                                 typeid(out_value).name() + '.');
    }
  }
  //! constructor of class config_file
-  /*! @param FileName the path/name of the configuration file to be parsed
+  /*! @param filename the path/name of the configuration file to be parsed
   */
-  explicit ConfigFile(std::string const &FileName) : m_iLine(0), m_Items() {
+  explicit config_file(std::string const &filename) : iline_(0), items_() {
-    std::ifstream file(FileName.c_str());
+    std::ifstream file(filename.c_str());
    if (!file.is_open()){
-      csoca::elog << "Could not open config file \'" << FileName << "\'." << std::endl;
+      music::elog << "Could not open config file \'" << filename << "\'." << std::endl;
      throw std::runtime_error(
-          std::string("Error: Could not open config file \'") + FileName +
+          std::string("Error: Could not open config file \'") + filename +
          std::string("\'"));
    }
    std::string line;
    std::string name;
    std::string value;
-    std::string inSection;
+    std::string in_section;
-    int posEqual;
+    int pos_equal;
-    m_iLine = 0;
+    iline_ = 0;
    //.. walk through all lines ..
    while (std::getline(file, line)) {
-      ++m_iLine;
+      ++iline_;
      //.. encounterd EOL ?
      if (!line.length())
        continue;
@ -106,31 +106,31 @@ public:
      //.. encountered section tag ?
      if (line[0] == '[') {
-        inSection = trim(line.substr(1, line.find(']') - 1));
+        in_section = trim(line.substr(1, line.find(']') - 1));
        continue;
      }
      //.. seek end of entry name ..
-      posEqual = line.find('=');
+      pos_equal = line.find('=');
-      name = trim(line.substr(0, posEqual));
+      name = trim(line.substr(0, pos_equal));
-      value = trim(line.substr(posEqual + 1));
+      value = trim(line.substr(pos_equal + 1));
-      if ((size_t)posEqual == std::string::npos &&
+      if ((size_t)pos_equal == std::string::npos &&
          (name.size() != 0 || value.size() != 0)) {
-        csoca::wlog << "Ignoring non-assignment in " << FileName << ":"
+        music::wlog << "Ignoring non-assignment in " << filename << ":"
-                  << m_iLine << std::endl;
+                  << iline_ << std::endl;
        continue;
      }
      if (name.length() == 0 && value.size() != 0) {
-        csoca::wlog << "Ignoring assignment missing entry name in "
+        music::wlog << "Ignoring assignment missing entry name in "
-                  << FileName << ":" << m_iLine << std::endl;
+                  << filename << ":" << iline_ << std::endl;
        continue;
      }
      if (value.length() == 0 && name.size() != 0) {
-        csoca::wlog << "Empty entry will be ignored in " << FileName << ":"
+        music::wlog << "Empty entry will be ignored in " << filename << ":"
-                  << m_iLine << std::endl;
+                  << iline_ << std::endl;
        continue;
      }
@ -138,12 +138,12 @@ public:
        continue;
      //.. add key/value pair to hash table ..
-      if (m_Items.find(inSection + '/' + name) != m_Items.end()) {
+      if (items_.find(in_section + '/' + name) != items_.end()) {
-        csoca::wlog << "Redeclaration overwrites previous value in "
+        music::wlog << "Redeclaration overwrites previous value in "
-                  << FileName << ":" << m_iLine << std::endl;
+                  << filename << ":" << iline_ << std::endl;
      }
-      m_Items[inSection + '/' + name] = value;
+      items_[in_section + '/' + name] = value;
    }
  }
@ -151,8 +151,8 @@ public:
  /*! @param key the key value, usually "section/key"
   *  @param value the value of the key, also a string
   */
-  void InsertValue(std::string const &key, std::string const &value) {
+  void insert_value(std::string const &key, std::string const &value) {
-    m_Items[key] = value;
+    items_[key] = value;
  }
  //! inserts a key/value pair in the hash map
@ -160,9 +160,9 @@ public:
   *  @param key the key value usually "section/key"
   *  @param value the value of the key, also a string
   */
-  void InsertValue(std::string const &section, std::string const &key,
+  void insert_value(std::string const &section, std::string const &key,
                   std::string const &value) {
-    m_Items[section + '/' + key] = value;
+    items_[section + '/' + key] = value;
  }
  //! checks if a key is part of the hash map
@ -170,10 +170,10 @@ public:
   *  @param key the key name to be checked
   *  @return true if the key is present, false otherwise
   */
-  bool ContainsKey(std::string const &section, std::string const &key) {
+  bool contains_key(std::string const &section, std::string const &key) {
    std::map<std::string, std::string>::const_iterator i =
-        m_Items.find(section + '/' + key);
+        items_.find(section + '/' + key);
-    if (i == m_Items.end())
+    if (i == items_.end())
      return false;
    return true;
  }
@ -182,57 +182,57 @@ public:
  /*! @param key the key name to be checked
   *  @return true if the key is present, false otherwise
   */
-  bool ContainsKey(std::string const &key) {
+  bool contains_key(std::string const &key) {
-    std::map<std::string, std::string>::const_iterator i = m_Items.find(key);
+    std::map<std::string, std::string>::const_iterator i = items_.find(key);
-    if (i == m_Items.end())
+    if (i == items_.end())
      return false;
    return true;
  }
  //! return value of a key
-  /*! returns the value of a given key, throws a ErrItemNotFound
+  /*! returns the value of a given key, throws a except_item_not_found
   *  exception if the key is not available in the hash map.
   *  @param key the key name
   *  @return the value of the key
-   *  @sa ErrItemNotFound
+   *  @sa except_item_not_found
   */
-  template <class T> T GetValue(std::string const &key) const {
+  template <class T> T get_value(std::string const &key) const {
-    return GetValue<T>("", key);
+    return get_value<T>("", key);
  }
  //! return value of a key
-  /*! returns the value of a given key, throws a ErrItemNotFound
+  /*! returns the value of a given key, throws a except_item_not_found
   *  exception if the key is not available in the hash map.
   *  @param section the section name for the key
   *  @param key the key name
   *  @return the value of the key
-   *  @sa ErrItemNotFound
+   *  @sa except_item_not_found
   */
  template <class T>
-  T GetValueBasic(std::string const &section, std::string const &key) const {
+  T get_value_basic(std::string const &section, std::string const &key) const {
    T r;
    std::map<std::string, std::string>::const_iterator i =
-        m_Items.find(section + '/' + key);
+        items_.find(section + '/' + key);
-    if (i == m_Items.end()){
+    if (i == items_.end()){
-      throw ErrItemNotFound('\'' + section + '/' + key +
+      throw except_item_not_found('\'' + section + '/' + key +
                            std::string("\' not found."));
    }
-    Convert(i->second, r);
+    convert(i->second, r);
    return r;
  }
  template <class T>
-  T GetValue(std::string const &section, std::string const &key) const
+  T get_value(std::string const &section, std::string const &key) const
  {
    T r;
    try
    {
-      r = GetValueBasic<T>(section, key);
+      r = get_value_basic<T>(section, key);
    }
-    catch (ErrItemNotFound& e)
+    catch (except_item_not_found& e)
    {
-      csoca::elog << e.what() << std::endl;
+      music::elog << e.what() << std::endl;
      throw;
    }
    return r;
@ -240,40 +240,41 @@ public:
  //! exception safe version of getValue
  /*! returns the value of a given key, returns a default value rather
-   *  than a ErrItemNotFound exception if the key is not found.
+   *  than a except_item_not_found exception if the key is not found.
   *  @param section the section name for the key
   *  @param key the key name
   *  @param default_value the value that is returned if the key is not found
   *  @return the key value (if key found) otherwise default_value
   */
  template <class T>
-  T GetValueSafe(std::string const &section, std::string const &key,
+  T get_value_safe(std::string const &section, std::string const &key,
                 T default_value) const {
    T r;
    try {
-      r = GetValueBasic<T>(section, key);
+      r = get_value_basic<T>(section, key);
-    } catch (ErrItemNotFound&) {
+    } catch (except_item_not_found&) {
      r = default_value;
      music::dlog << "Item \'" << section << "/" << key << " not found in config. Default = \'" << default_value << "\'" << std::endl;
    }
    return r;
  }
  //! exception safe version of getValue
  /*! returns the value of a given key, returns a default value rather
-   *  than a ErrItemNotFound exception if the key is not found.
+   *  than a except_item_not_found exception if the key is not found.
   *  @param key the key name
   *  @param default_value the value that is returned if the key is not found
   *  @return the key value (if key found) otherwise default_value
   */
  template <class T>
-  T GetValueSafe(std::string const &key, T default_value) const {
+  T get_value_safe(std::string const &key, T default_value) const {
-    return GetValueSafe("", key, default_value);
+    return get_value_safe("", key, default_value);
  }
  //! dumps all key-value pairs to a std::ostream
-  void Dump(std::ostream &out) {
+  void dump(std::ostream &out) {
-    std::map<std::string, std::string>::const_iterator i = m_Items.begin();
+    std::map<std::string, std::string>::const_iterator i = items_.begin();
-    while (i != m_Items.end()) {
+    while (i != items_.end()) {
      if (i->second.length() > 0)
        out << std::setw(24) << std::left << i->first << "  =  " << i->second
            << std::endl;
@ -281,12 +282,12 @@ public:
    }
  }
-  void LogDump(void) {
+  void dump_to_log(void) {
-    csoca::ilog << "List of all configuration options:" << std::endl;
+    music::ilog << "List of all configuration options:" << std::endl;
-    std::map<std::string, std::string>::const_iterator i = m_Items.begin();
+    std::map<std::string, std::string>::const_iterator i = items_.begin();
-    while (i != m_Items.end()) {
+    while (i != items_.end()) {
      if (i->second.length() > 0)
-        csoca::ilog << std::setw(28) << i->first << " = " << i->second
+        music::ilog << std::setw(28) << i->first << " = " << i->second
                  << std::endl;
      ++i;
    }
@ -295,16 +296,16 @@ public:
  //--- EXCEPTIONS ---
  //! runtime error that is thrown if key is not found in getValue
-  class ErrItemNotFound : public std::runtime_error {
+  class except_item_not_found : public std::runtime_error {
  public:
-    ErrItemNotFound(std::string itemname)
+    except_item_not_found(std::string itemname)
        : std::runtime_error(itemname.c_str()) {}
  };
  //! runtime error that is thrown if type conversion fails
-  class ErrInvalidConversion : public std::runtime_error {
+  class except_invalid_conversion : public std::runtime_error {
  public:
-    ErrInvalidConversion(std::string errmsg) : std::runtime_error(errmsg) {}
+    except_invalid_conversion(std::string errmsg) : std::runtime_error(errmsg) {}
  };
  //! runtime error that is thrown if identifier is not found in keys
@ -323,14 +324,14 @@ public:
 //...           like "true" and "false" etc.
 //...           converts the string to type bool, returns type bool ...
 template <>
-inline bool ConfigFile::GetValue<bool>(std::string const &strSection,
+inline bool config_file::get_value<bool>(std::string const &strSection,
                                       std::string const &strEntry) const {
-  std::string r1 = GetValue<std::string>(strSection, strEntry);
+  std::string r1 = get_value<std::string>(strSection, strEntry);
  if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1")
    return true;
  if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0")
    return false;
-  csoca::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl;
+  music::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl;
  throw ErrIllegalIdentifier(std::string("Illegal identifier \'") + r1 +
                             std::string("\' in \'") + strEntry +
                             std::string("\'."));
@ -338,17 +339,17 @@ inline bool ConfigFile::GetValue<bool>(std::string const &strSection,
 }
 template <>
-inline bool ConfigFile::GetValueSafe<bool>(std::string const &strSection,
+inline bool config_file::get_value_safe<bool>(std::string const &strSection,
                                           std::string const &strEntry,
                                           bool defaultValue) const {
  std::string r1;
  try {
-    r1 = GetValueBasic<std::string>(strSection, strEntry);
+    r1 = get_value_basic<std::string>(strSection, strEntry);
    if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1")
      return true;
    if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0")
      return false;
-  } catch (ErrItemNotFound&) {
+  } catch (except_item_not_found&) {
    return defaultValue;
  }
  return defaultValue;
@ -356,7 +357,7 @@ inline bool ConfigFile::GetValueSafe<bool>(std::string const &strSection,
 template <>
 inline void
-ConfigFile::Convert<std::string, std::string>(const std::string &ival,
+config_file::convert<std::string, std::string>(const std::string &ival,
                                              std::string &oval) const {
  oval = ival;
 }
--- a/include/convolution.hh
+++ b/include/convolution.hh
@ -333,7 +333,7 @@ public:
        crecvbuf_ = new ccomplex_t[maxslicesz_ / 2];
        recvbuf_ = reinterpret_cast<real_t *>(&crecvbuf_[0]);
-        int ntasks(MPI_Get_size());
+        int ntasks(MPI::get_size());
        offsets_.assign(ntasks, 0);
        offsetsp_.assign(ntasks, 0);
@ -415,12 +415,12 @@ private:
    {
        assert(fp.space_ == kspace_id);
-        const double rfac = std::pow(1.5, 1.5);
+        const real_t rfac = std::pow(1.5, 1.5);
        fp.zero();
 #if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
-        size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
+        const size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
 #pragma omp parallel for
        for (size_t i = 0; i < 2 * fp.size(0) / 3; ++i)
@ -429,10 +429,9 @@ private:
            for (size_t j = 0; j < 2 * fp.size(1) / 3; ++j)
            {
                size_t jp = (j > nhalf[1]) ? j + nhalf[1] : j;
-                for (size_t k = 0; k < 2 * fp.size(2) / 3; ++k)
+                for (size_t k = 0; k < nhalf[2]+1; ++k)
                {
                    size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k;
                    // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
                    fp.kelem(ip, jp, kp) = kfunc(i, j, k) * rfac;
                }
            }
@ -445,7 +444,7 @@ private:
        /////////////////////////////////////////////////////////////////////
        double tstart = get_wtime();
-        csoca::dlog << "[MPI] Started scatter for convolution" << std::endl;
+        music::dlog << "[MPI] Started scatter for convolution" << std::endl;
        //... collect offsets
@ -460,7 +459,10 @@ private:
        size_t slicesz = fbuf_->size(1) * fbuf_->size(3);
        MPI_Datatype datatype =
-            (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
+            (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX 
            : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX 
            : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX
            : MPI_BYTE;
        // fill MPI send buffer with results of kfunc
@ -587,7 +589,7 @@ private:
        // std::cerr << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! <<<<<"
        // << std::endl;  ofs << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed!
        // <<<<<" << std::endl;
-        csoca::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n",
+        music::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n",
                          get_wtime() - tstart);
 #endif /// end of ifdef/ifndef USE_MPI ///////////////////////////////////////////////////////////////
@ -596,7 +598,7 @@ private:
    template <typename operator_t>
    void unpad(const Grid_FFT<data_t> &fp, operator_t output_op)
    {
-        const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
+        const real_t rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
        // make sure we're in Fourier space...
        assert(fp.space_ == kspace_id);
@ -615,8 +617,11 @@ private:
                for (size_t k = 0; k < fbuf_->size(2); ++k)
                {
                    size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k;
                    // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
                    fbuf_->kelem(i, j, k) = fp.kelem(ip, jp, kp) / rfac;
                    // zero Nyquist modes since they are not unique after convolution
                    if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]){
                        fbuf_->kelem(i, j, k) = 0.0; 
                    }
                }
            }
        }
@ -634,7 +639,7 @@ private:
        double tstart = get_wtime();
-        csoca::dlog << "[MPI] Started gather for convolution";
+        music::dlog << "[MPI] Started gather for convolution";
        MPI_Barrier(MPI_COMM_WORLD);
@ -645,7 +650,10 @@ private:
        size_t slicesz = fp.size(1) * fp.size(3);
        MPI_Datatype datatype =
-            (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
+            (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX 
            : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX 
            : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX 
            : MPI_BYTE;
        MPI_Status status;
@ -685,7 +693,7 @@ private:
            int recvfrom = 0;
            if (iglobal <= fny[0])
            {
-                real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0;
+                real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0;
                recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
                MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
@ -693,7 +701,7 @@ private:
                for (size_t j = 0; j < nf[1]; ++j)
                {
-                    real_t wj = (j == fny[1]) ? 0.5 : 1.0;
+                    real_t wj = (j == fny[1]) ? 0.0 : 1.0;
                    if (j <= fny[1])
                    {
                        size_t jp = j;
@ -701,21 +709,22 @@ private:
                        {
                            if (typeid(data_t) == typeid(real_t))
                            {
-                                real_t w = wi * wj;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                                real_t w = wi * wj * wk;
                                fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                            }
                            else
                            {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                                real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                    fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                    fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
+                                // if (w < 1.0)
-                                {
+                                // {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
+                                //     fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
+                                // }
                            }
                        }
                    }
@ -726,21 +735,22 @@ private:
                        {
                            if (typeid(data_t) == typeid(real_t))
                            {
-                                real_t w = wi * wj;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                                real_t w = wi * wj * wk;
                                fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                            }
                            else
                            {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                                real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                    fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                    fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
+                                // if (w < 1.0)
-                                {
+                                // {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
+                                //     fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
+                                // }
                            }
                        }
                    }
@ -748,7 +758,7 @@ private:
            }
            if (iglobal >= fny[0])
            {
-                real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0;
+                real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0;
                recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
                MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom,
@ -756,29 +766,26 @@ private:
                for (size_t j = 0; j < nf[1]; ++j)
                {
-                    real_t wj = (j == fny[1]) ? 0.5 : 1.0;
+                    real_t wj = (j == fny[1]) ? 0.0 : 1.0;
                    if (j <= fny[1])
                    {
                        size_t jp = j;
                        for (size_t k = 0; k < nf[2]; ++k)
                        {
                            const real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                            const real_t w = wi * wj * wk;
                            if (typeid(data_t) == typeid(real_t))
                            {
-                                real_t w = wi * wj;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                                real_t w = wi * wj * wk;
                                fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                            }
                            else
                            {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
+                                if (k < fny[2])
                                real_t w = wi * wj * wk;
                                if (k <= fny[2])
                                    fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                    fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
                                if (w < 1.0)
                                {
                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
                                }
                            }
                        }
                    }
@ -787,23 +794,18 @@ private:
                        size_t jp = j + fny[1];
                        for (size_t k = 0; k < nf[2]; ++k)
                        {
                            const real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                            const real_t w = wi * wj * wk;
                            if (typeid(data_t) == typeid(real_t))
                            {
                                real_t w = wi * wj;
                                fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                            }
                            else
                            {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
+                                if (k < fny[2])
                                real_t w = wi * wj * wk;
                                if (k <= fny[2])
                                    fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                    fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
                                if (w < 1.0)
                                {
                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
                                }
                            }
                        }
                    }
@ -811,8 +813,8 @@ private:
            }
        }
-//... copy data back
+        //... copy data back
-#pragma omp parallel for
+        #pragma omp parallel for
        for (size_t i = 0; i < fbuf_->ntot_; ++i)
        {
            output_op(i, (*fbuf_)[i]);
@ -831,7 +833,7 @@ private:
        MPI_Barrier(MPI_COMM_WORLD);
-        csoca::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart);
+        music::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart);
 #endif /// end of ifdef/ifndef USE_MPI //////////////////////////////////////////////////////////////
    }
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@ -1,25 +1,43 @@
 #pragma once
 #include <array>
 #include <vec.hh>
 #include <cosmology_parameters.hh>
 #include <physical_constants.hh>
 #include <transfer_function_plugin.hh>
 #include <math/ode_integrate.hh>
 #include <logger.hh>
 #include <math/interpolate.hh>
 #include <gsl/gsl_integration.h>
 // #include <gsl/gsl_spline.h>
 #include <gsl/gsl_errno.h>
 namespace cosmology
 {
 /*!
- * @class CosmologyCalculator
+ * @class cosmology::calculator
 * @brief provides functions to compute cosmological quantities
 *
 * This class provides member functions to compute cosmological quantities
 * related to the Friedmann equations and linear perturbation theory
 */
-class CosmologyCalculator
+class calculator
 {
 public:
    //! data structure to store cosmological parameters
    cosmology::parameters cosmo_param_;
    //! pointer to an instance of a transfer function plugin
    std::unique_ptr<TransferFunction_plugin> transfer_function_;
 private:
-    static constexpr double REL_PRECISION = 1e-5;
+    static constexpr double REL_PRECISION = 1e-10;
    interpolated_function_1d<true,true,false> D_of_a_, f_of_a_, a_of_D_;
    double Dnow_, Dplus_start_, Dplus_target_, astart_, atarget_;
    real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const
    {
@ -39,167 +57,207 @@ private:
        gsl_set_error_handler(NULL);
        if (error / result > REL_PRECISION)
-            csoca::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl;
+            music::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl;
        return (real_t)result;
    }
    void compute_growth( std::vector<double>& tab_a, std::vector<double>& tab_D, std::vector<double>& tab_f )
    {
        using v_t = vec_t<3, double>;
        // set ICs
        const double a0 = 1e-10;
        const double D0 = a0;
        const double Dprime0 = 2.0 * D0 * H_of_a(a0) / std::pow(phys_const::c_SI, 2);
        const double t0 = 1.0 / (a0 * H_of_a(a0));
        v_t y0({a0, D0, Dprime0});
        // set up integration
        double dt = 1e-9;
        double dtdid, dtnext;
        const double amax = 2.0;
        v_t yy(y0);
        double t = t0;
        const double eps = 1e-10;
        while (yy[0] < amax)
        {
            // RHS of ODEs
            auto rhs = [&](double t, v_t y) -> v_t {
                auto a = y[0];
                auto D = y[1];
                auto Dprime = y[2];
                v_t dy;
                // da/dtau = a^2 H(a)
                dy[0] = a * a * H_of_a(a);
                // d D/dtau
                dy[1] = Dprime;
                // d^2 D / dtau^2
                dy[2] = -a * H_of_a(a) * Dprime + 3.0 / 2.0 * cosmo_param_.Omega_m * std::pow(cosmo_param_.H0, 2) * D / a;
                return dy;
            };
            // scale by predicted value to get approx. constant fractional errors
            v_t yyscale = yy.abs() + dt * rhs(t, yy).abs();
            // call integrator
            ode_integrate::rk_step_qs(dt, t, yy, yyscale, rhs, eps, dtdid, dtnext);
            tab_a.push_back(yy[0]);
            tab_D.push_back(yy[1]);
            tab_f.push_back(yy[2]);
            dt = dtnext;
        }
        // compute f, before we stored here D'
        for (size_t i = 0; i < tab_a.size(); ++i)
        {
            tab_f[i] = tab_f[i] / (tab_a[i] * H_of_a(tab_a[i]) * tab_D[i]);
            tab_D[i] = tab_D[i];
            tab_a[i] = tab_a[i];
        }
    }
 public:
-    //! data structure to store cosmological parameters
+    calculator() = delete;
-    CosmologyParameters cosmo_param_;
+    calculator(const calculator& c) = delete;
    //! pointer to an instance of a transfer function plugin
    //TransferFunction_plugin *ptransfer_fun_;
    std::unique_ptr<TransferFunction_plugin> transfer_function_;
    //! constructor for a cosmology calculator object
    /*!
 	 * @param acosmo a cosmological parameters structure
 	 * @param pTransferFunction pointer to an instance of a transfer function object
 	 */
-    explicit CosmologyCalculator(ConfigFile &cf)
+    explicit calculator(config_file &cf)
-    : cosmo_param_(cf)
+        : cosmo_param_(cf), astart_( 1.0/(1.0+cf.get_value<double>("setup","zstart")) ),
            atarget_( 1.0/(1.0+cf.get_value_safe<double>("cosmology","ztarget",1./astart_-1.)))
    {
        // pre-compute growth factors and store for interpolation
        std::vector<double> tab_a, tab_D, tab_f;
        this->compute_growth(tab_a, tab_D, tab_f);
        D_of_a_.set_data(tab_a,tab_D);
        f_of_a_.set_data(tab_a,tab_f);
        a_of_D_.set_data(tab_D,tab_a);
        Dnow_ = D_of_a_(1.0);
        Dplus_start_ = D_of_a_( astart_ ) / Dnow_;
        Dplus_target_ = D_of_a_( atarget_ ) / Dnow_;
        // set up transfer functions and compute normalisation
        transfer_function_ = std::move(select_TransferFunction_plugin(cf));
        transfer_function_->intialise();
-        cosmo_param_.pnorm = this->ComputePNorm();
+        if( !transfer_function_->tf_isnormalised_ )
            cosmo_param_.pnorm = this->compute_pnorm_from_sigma8();
        else{
            cosmo_param_.pnorm = 1.0/Dplus_target_/Dplus_target_;
            auto sigma8 = this->compute_sigma8();
            music::ilog << "Measured sigma_8 for given PS normalisation is " <<  sigma8 << std::endl;
        }
        cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm);
-        csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" << " : " << (transfer_function_->tf_is_distinct()? "yes" : "no") << std::endl;
+
-        csoca::ilog << std::setw(32) << std::left << "TF maximum wave number" << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
+        music::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons"
                    << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl;
        music::ilog << std::setw(32) << std::left << "TF maximum wave number"
                    << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
    }
    ~calculator()
    {
    }
    //! Write out a correctly scaled power spectrum at time a
-    void WritePowerspectrum( real_t a, std::string fname ) const
+    void write_powerspectrum(real_t a, std::string fname) const
    {
-        const real_t Dplus0 = this->CalcGrowthFactor(a) / this->CalcGrowthFactor(1.0);
+        // const real_t Dplus0 = this->get_growth_factor(a);
-        if( CONFIG::MPI_task_rank==0 )
+        if (CONFIG::MPI_task_rank == 0)
        {
-            double kmin = std::max(1e-4,transfer_function_->get_kmin());
+            double kmin = std::max(1e-4, transfer_function_->get_kmin());
            // write power spectrum to a file
            std::ofstream ofs(fname.c_str());
-            std::stringstream ss; ss << " (a=" << a <<")";
+            std::stringstream ss;
            ss << " ,ap=" << a << "";
            ofs << "# " << std::setw(18) << "k [h/Mpc]"
-                        << std::setw(20) << ("P_dtot(k)"+ss.str()) 
+                << std::setw(20) << ("P_dtot(k,a=ap)")
-                        << std::setw(20) << ("P_dcdm(k)"+ss.str())
+                << std::setw(20) << ("P_dcdm(k,a=ap)")
-                        << std::setw(20) << ("P_dbar(k)"+ss.str())
+                << std::setw(20) << ("P_dbar(k,a=ap)")
-                        << std::setw(20) << ("P_dtot(K) (a=1)")
+                << std::setw(20) << ("P_tcdm(k,a=ap)")
-                        << std::setw(20) << ("P_tcdm(k)"+ss.str()) 
+                << std::setw(20) << ("P_tbar(k,a=ap)")
-                        << std::setw(20) << ("P_tbar(k)"+ss.str())
+                << std::setw(20) << ("P_dtot(k,a=1)")
                << std::setw(20) << ("P_dcdm(k,a=1)")
                << std::setw(20) << ("P_dbar(k,a=1)")
                << std::setw(20) << ("P_tcdm(k,a=1)")
                << std::setw(20) << ("P_tbar(k,a=1)")
                << std::setw(20) << ("P_dtot(K,a=1)")
                << std::endl;
-            for( double k=kmin; k<transfer_function_->get_kmax(); k*=1.05 ){
+            for (double k = kmin; k < transfer_function_->get_kmax(); k *= 1.05)
            {
                ofs << std::setw(20) << std::setprecision(10) << k
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total) * Dplus0, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total)*Dplus_start_, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm) * Dplus0, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm)*Dplus_start_, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon) * Dplus0, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon)*Dplus_start_, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm)*Dplus_start_, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm) * Dplus0, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon)*Dplus_start_, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon) * Dplus0, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total0), 2.0)
                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm0), 2.0)
                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon0), 2.0)
                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm0), 2.0)
                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon0), 2.0)
                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vtotal), 2.0)
                    << std::endl;
            }
        }
-
+        music::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl;
        csoca::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl;
    }
-    const CosmologyParameters &GetParams(void) const
+    const cosmology::parameters &get_parameters(void) const noexcept
    {
        return cosmo_param_;
    }
-    //! returns the amplitude of amplitude of the power spectrum
+    //! return the value of the Hubble function H(a) = dloga/dt 
-    /*!
+    inline double H_of_a(double a) const noexcept
-	 * @param k the wave number in h/Mpc
+    {
-	 * @param a the expansion factor of the universe
+        double HH2 = 0.0;
-	 * @returns power spectrum amplitude for wave number k at time a
+        HH2 += cosmo_param_.Omega_r / (a * a * a * a);
        HH2 += cosmo_param_.Omega_m / (a * a * a);
        HH2 += cosmo_param_.Omega_k / (a * a);
        HH2 += cosmo_param_.Omega_DE * std::pow(a, -3. * (1. + cosmo_param_.w_0 + cosmo_param_.w_a)) * exp(-3. * (1.0 - a) * cosmo_param_.w_a);
        return cosmo_param_.H0 * std::sqrt(HH2);
    }
    //! Computes the linear theory growth factor D+, normalised to D+(a=1)=1
    real_t get_growth_factor(real_t a) const noexcept
    {
        return D_of_a_(a) / Dnow_;
    }
    //! Computes the inverse of get_growth_factor
    real_t get_a( real_t Dplus ) const noexcept
    {
        return a_of_D_( Dplus * Dnow_ );
    }
    //! Computes the linear theory growth rate f
    /*! Function computes (by interpolating on precalculated table)
     *   f = dlog D+ / dlog a
     */
-    inline real_t Power(real_t k, real_t a)
+    real_t get_f(real_t a) const noexcept
    {
-        real_t Dplus = CalcGrowthFactor(a);
+        return f_of_a_(a);
        real_t DplusOne = CalcGrowthFactor(1.0);
        real_t pNorm = ComputePNorm();
        Dplus /= DplusOne;
        DplusOne = 1.0;
        real_t scale = Dplus / DplusOne;
        return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect);
    }
    inline static double H_of_a(double a, void *Params)
    {
        CosmologyParameters *cosm = (CosmologyParameters *)Params;
        double a2 = a * a;
        double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a));
        return Ha;
    }
    inline static double Hprime_of_a(double a, void *Params) 
    {
        CosmologyParameters *cosm = (CosmologyParameters *)Params;
        double a2 = a * a;
        double H = H_of_a(a, Params);
        double Hprime = 1 / (a * H) * (-1.5 * cosm->Omega_m / (a2 * a) - cosm->Omega_k / a2 - 1.5 * cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a) * (1. + cosm->w_0 + (1. - a) * cosm->w_a));
        return Hprime;
    }
    //! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+
    inline static double GrowthIntegrand(double a, void *Params) 
    {
        double Ha = a * H_of_a(a, Params);
        return 2.5 / (Ha * Ha * Ha);
    }
    //! integrand function for Calc_fPeebles
 	/*!
 	 * @sa Calc_fPeebles
 	 */
 	inline static double fIntegrand( double a, void *Params )
 	{
 		CosmologyParameters *cosm = (CosmologyParameters *)Params;
 		double y = cosm->Omega_m*(1.0/a-1.0) + cosm->Omega_DE*(a*a-1.0) + 1.0;
 		return 1.0/pow(y,1.5);
 	}
 	//! calculates d log D+/d log a
 	/*! this version follows the Peebles (TBD: add citation)
 	 *  formula to compute Bertschinger's vfact
 	 */
 	inline real_t CalcGrowthRate( real_t a )
 	{
        #warning CalcGrowthRate is only correct if dark energy is a cosmological constant, need to upgrade calculator...
 		real_t y = cosmo_param_.Omega_m*(1.0/a-1.0) + cosmo_param_.Omega_DE*(a*a-1.0) + 1.0;
 		real_t fact = integrate( &fIntegrand, 1e-6, a, (void*)&cosmo_param_ );
 		return (cosmo_param_.Omega_DE*a*a-0.5*cosmo_param_.Omega_m/a)/y - 1.0 + a*fIntegrand(a,(void*)&cosmo_param_)/fact;
 	}
    //! Computes the linear theory growth factor D+
    /*! Function integrates over member function GrowthIntegrand and computes
    *                      /a
    *   D+(a) = 5/2 H(a) * |  [a'^3 * H(a')^3]^(-1) da'
    *                      /0
    */
    real_t CalcGrowthFactor(real_t a) const
    {
        real_t integral = integrate(&GrowthIntegrand, 0.0, a, (void *)&cosmo_param_);
        return H_of_a(a, (void *)&cosmo_param_) * integral;
    }
    //! Compute the factor relating particle displacement and velocity
    /*! Function computes
-    *
+     *  vfac = a * (H(a)/h) * dlogD+ / dlog a 
    *  vfac = a^2 * H(a) * dlogD+ / d log a = a^2 * H'(a) + 5/2 * [ a * D+(a) * H(a) ]^(-1)
    *
     */
-    real_t CalcVFact(real_t a) const
+    real_t get_vfact(real_t a) const noexcept
    {
-        real_t Dp = CalcGrowthFactor(a);
+        return f_of_a_(a) * a * H_of_a(a) / cosmo_param_.h;
        real_t H = H_of_a(a, (void *)&cosmo_param_);
        real_t Hp = Hprime_of_a(a, (void *)&cosmo_param_);
        real_t a2 = a * a;
        return (a2 * Hp + 2.5 / (a * Dp * H)) * 100.0;
    }
    //! Integrand for the sigma_8 normalization of the power spectrum
@ -210,7 +268,7 @@ public:
        if (k <= 0.0)
            return 0.0f;
-        CosmologyCalculator *pcc = reinterpret_cast<CosmologyCalculator*>(pParams);
+        cosmology::calculator *pcc = reinterpret_cast<cosmology::calculator *>(pParams);
        double x = k * 8.0;
        double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
@ -229,7 +287,7 @@ public:
        if (k <= 0.0)
            return 0.0f;
-        CosmologyCalculator *pcc = reinterpret_cast<CosmologyCalculator*>(pParams);
+        cosmology::calculator *pcc = reinterpret_cast<cosmology::calculator *>(pParams);
        double x = k * 8.0;
        double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
@ -240,24 +298,12 @@ public:
        return k * k * w * w * pow((double)k, (double)nspect) * tf * tf;
    }
    //! Computes the square of the transfer function
    /*! Function evaluates the supplied transfer function ptransfer_fun_
 	 * and returns the square of its value at wave number k
 	 * @param k wave number at which to evaluate the transfer function
 	 */
    inline real_t TransferSq(real_t k) const
    {
        //.. parameter supplied transfer function
        real_t tf1 = transfer_function_->compute(k, total);
        return tf1 * tf1;
    }
    //! Computes the amplitude of a mode from the power spectrum
    /*! Function evaluates the supplied transfer function ptransfer_fun_
 	 * and returns the amplitude of fluctuations at wave number k at z=0
 	 * @param k wave number at which to evaluate
 	 */
-    inline real_t GetAmplitude(real_t k, tf_type type) const
+    inline real_t get_amplitude(real_t k, tf_type type) const
    {
        return std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm;
    }
@ -267,18 +313,30 @@ public:
 	 * integrates the power spectrum to fix the normalization to that given
 	 * by the sigma_8 parameter
 	 */
-    real_t ComputePNorm(void)
+    real_t compute_sigma8(void)
    {
        real_t sigma0, kmin, kmax;
        kmax = transfer_function_->get_kmax();
        kmin = transfer_function_->get_kmin();
        if (!transfer_function_->tf_has_total0())
-            sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this );
+            sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this);
-        else
+        else{
-            sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this );
+            sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this);
        }
-        return cosmo_param_.sigma8 * cosmo_param_.sigma8 / sigma0;
+        return std::sqrt(sigma0);
    }
    //! Computes the normalization for the power spectrum
    /*!
 	 * integrates the power spectrum to fix the normalization to that given
 	 * by the sigma_8 parameter
 	 */
    real_t compute_pnorm_from_sigma8(void)
    {
        auto measured_sigma8 = this->compute_sigma8();
        return cosmo_param_.sigma8 * cosmo_param_.sigma8 / (measured_sigma8  * measured_sigma8);
    }
 };
@ -294,3 +352,5 @@ inline double jeans_sound_speed(double rho, double mass)
    const double G = 6.67e-8;
    return pow(6.0 * mass / M_PI * sqrt(rho) * pow(G, 1.5), 1.0 / 3.0);
 }
 } // namespace cosmology
--- a/include/cosmology_parameters.hh
+++ b/include/cosmology_parameters.hh
@ -1,10 +1,21 @@
 #pragma once
 /*******************************************************************************\
 cosmology_parameters.hh - This file is part of MUSIC2 -
 a code to generate initial conditions for cosmological simulations 
 CHANGELOG (only majors, for details see repo):
    06/2019 - Oliver Hahn - first implementation
 \*******************************************************************************/
 #include <physical_constants.hh>
 #include <config_file.hh>
-//! structure for cosmological parameters
+namespace cosmology
 struct CosmologyParameters
 {
 //! structure for cosmological parameters
 struct parameters
 {
    double
        Omega_m,  //!< baryon+dark matter density
        Omega_b,  //!< baryon matter density
@ -12,38 +23,88 @@ struct CosmologyParameters
        Omega_r,  //!< photon + relativistic particle density
        Omega_k,  //!< curvature density
        H0,       //!< Hubble constant in km/s/Mpc
        h,        //!< hubble parameter
        nspect,   //!< long-wave spectral index (scale free is nspect=1)
        sigma8,   //!< power spectrum normalization
        Tcmb,     //!< CMB temperature (used to set Omega_r)
        Neff,     //!< effective number of neutrino species (used to set Omega_r)
        w_0,      //!< dark energy equation of state parameter 1: w = w0 + a * wa
        w_a,      //!< dark energy equation of state parameter 2: w = w0 + a * wa
        // below are helpers to store additional information
        dplus,     //!< linear perturbation growth factor
        f,         //!< growth factor logarithmic derivative
        pnorm,     //!< actual power spectrum normalisation factor
        sqrtpnorm, //!< sqrt of power spectrum normalisation factor
        vfact;     //!< velocity<->displacement conversion factor in Zel'dovich approx.
-    explicit CosmologyParameters(ConfigFile cf)
+    parameters() = delete;
    parameters( const parameters& ) = default;
    explicit parameters(config_file cf)
    {
-        Omega_b = cf.GetValue<double>("cosmology", "Omega_b");
+        H0 = cf.get_value<double>("cosmology", "H0");
-        Omega_m = cf.GetValue<double>("cosmology", "Omega_m");
+        h  = H0 / 100.0;
        Omega_DE = cf.GetValue<double>("cosmology", "Omega_L");
        w_0 = cf.GetValueSafe<double>("cosmology", "w0", -1.0);
        w_a = cf.GetValueSafe<double>("cosmology", "wa", 0.0);
-        Omega_r = cf.GetValueSafe<double>("cosmology", "Omega_r", 0.0); // no longer default to nonzero (8.3e-5)
+        nspect = cf.get_value<double>("cosmology", "nspec");
        Omega_b = cf.get_value<double>("cosmology", "Omega_b");
        Omega_m = cf.get_value<double>("cosmology", "Omega_m");
        Omega_DE = cf.get_value<double>("cosmology", "Omega_L");
        w_0 = cf.get_value_safe<double>("cosmology", "w0", -1.0);
        w_a = cf.get_value_safe<double>("cosmology", "wa", 0.0);
        Tcmb = cf.get_value_safe<double>("cosmology", "Tcmb", 2.7255);
        Neff = cf.get_value_safe<double>("cosmology", "Neff", 3.046);
        sigma8 = cf.get_value<double>("cosmology", "sigma_8");
        // calculate energy density in ultrarelativistic species from Tcmb and Neff
        double Omega_gamma = 4 * phys_const::sigma_SI / std::pow(phys_const::c_SI, 3) * std::pow(Tcmb, 4.0) / phys_const::rhocrit_h2_SI / (h * h);
        double Omega_nu = Neff * Omega_gamma * 7. / 8. * std::pow(4. / 11., 4. / 3.);
        Omega_r = Omega_gamma + Omega_nu;
        if (cf.get_value_safe<bool>("cosmology", "ZeroRadiation", false))
        {
            Omega_r = 0.0;
        }
 #if 1
        // assume zero curvature, take difference from dark energy
        Omega_DE += 1.0 - Omega_m - Omega_DE - Omega_r;
        Omega_k  = 0.0;
 #else
        // allow for curvature 
        Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r;
-
+#endif
        H0 = cf.GetValue<double>("cosmology", "H0");
        sigma8 = cf.GetValue<double>("cosmology", "sigma_8");
        nspect = cf.GetValue<double>("cosmology", "nspec");
        dplus = 0.0;
        pnorm = 0.0;
        vfact = 0.0;
        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
        music::ilog << "Cosmological parameters are: " << std::endl;
        music::ilog << " H0       = " << std::setw(16) << H0          << "sigma_8  = " << std::setw(16) << sigma8 << std::endl;
        music::ilog << " Omega_c  = " << std::setw(16) << Omega_m-Omega_b << "Omega_b  = " << std::setw(16) << Omega_b << std::endl;
        if (!cf.get_value_safe<bool>("cosmology", "ZeroRadiation", false)){
            music::ilog << " Omega_g  = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl;
        }else{
            music::ilog << " Omega_r  = " << std::setw(16) << Omega_r << std::endl;
        }
        music::ilog << " Omega_DE = " << std::setw(16) << Omega_DE    << "nspect   = " << std::setw(16) << nspect << std::endl;
        music::ilog << " w0       = " << std::setw(16) << w_0         << "w_a      = " << std::setw(16) << w_a << std::endl;
        if( Omega_r > 0.0 )
        {
            music::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally."<< std::endl;
            music::wlog << "Make sure your sim code supports this..." << std::endl;
        }
    }
    CosmologyParameters(void)
    {
    }
 };
 } // namespace cosmology
--- a/include/general.hh
+++ b/include/general.hh
@ -7,24 +7,49 @@
 #if defined(USE_MPI)
 #include <mpi.h>
-  #include <fftw3-mpi.h>
+#include <fftw3-mpi.h>
 #else
-  #include <fftw3.h>
+#include <fftw3.h>
 #endif
-#ifdef USE_SINGLEPRECISION
+#include <config_file.hh>
 #define _unused(x) ((void)(x))
 // include CMake controlled configuration settings
 #include <cmake_config.hh>
 #if defined(USE_PRECISION_FLOAT)
 using real_t = float;
 using complex_t = fftwf_complex;
 #define FFTW_PREFIX fftwf
-#else
+#elif defined(USE_PRECISION_DOUBLE)
 using real_t = double;
 using complex_t = fftw_complex;
 #define FFTW_PREFIX fftw
 #elif defined(USE_PRECISION_LONGDOUBLE)
 using real_t = long double;
 using complex_t = fftwl_complex;
 #define FFTW_PREFIX fftwl
 #endif
-enum class fluid_component { density, vx, vy, vz, dx, dy, dz };
+enum class fluid_component
-enum class cosmo_species { dm, baryon, neutrino };
+{
-extern std::map<cosmo_species,std::string> cosmo_species_name;
+  density,
  vx,
  vy,
  vz,
  dx,
  dy,
  dz
 };
 enum class cosmo_species
 {
  dm,
  baryon,
  neutrino
 };
 extern std::map<cosmo_species, std::string> cosmo_species_name;
 using ccomplex_t = std::complex<real_t>;
@ -48,49 +73,61 @@ inline double get_wtime()
  return MPI_Wtime();
 }
-inline int MPI_Get_rank( void ){
+namespace MPI
 {
 inline int get_rank(void)
 {
  int rank, ret;
  ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	assert( ret==MPI_SUCCESS );
+  assert(ret == MPI_SUCCESS);
  _unused(ret);
  return rank;
 }
-inline int MPI_Get_size( void ){
+inline int get_size(void)
 {
  int size, ret;
  ret = MPI_Comm_size(MPI_COMM_WORLD, &size);
-	assert( ret==MPI_SUCCESS );
+  assert(ret == MPI_SUCCESS);
  _unused(ret);
  return size;
 }
-template<typename T>
+template <typename T>
-MPI_Datatype GetMPIDatatype( void )
+inline MPI_Datatype get_datatype(void)
 {
-  if( typeid(T) == typeid(std::complex<float>) )
+  if (typeid(T) == typeid(std::complex<float>))
-    return MPI_COMPLEX;
+    return MPI_C_FLOAT_COMPLEX;
-  if( typeid(T) == typeid(std::complex<double>) )
+  if (typeid(T) == typeid(std::complex<double>))
-    return MPI_DOUBLE_COMPLEX;
+    return MPI_C_DOUBLE_COMPLEX;
-  if( typeid(T) == typeid(int) )
+  if (typeid(T) == typeid(std::complex<long double>))
    return MPI_C_LONG_DOUBLE_COMPLEX;
  if (typeid(T) == typeid(int))
    return MPI_INT;
-  if( typeid(T) == typeid(unsigned) )
+  if (typeid(T) == typeid(unsigned))
    return MPI_UNSIGNED;
-  if( typeid(T) == typeid(float) )
+  if (typeid(T) == typeid(float))
    return MPI_FLOAT;
-  if( typeid(T) == typeid(double) )
+  if (typeid(T) == typeid(double))
    return MPI_DOUBLE;
-  if( typeid(T) == typeid(char) )
+  if (typeid(T) == typeid(long double))
    return MPI_LONG_DOUBLE;
  if (typeid(T) == typeid(char))
    return MPI_CHAR;
  abort();
 }
-inline std::string GetMPIversion( void )
+inline std::string get_version(void)
 {
  int len;
  char mpi_lib_ver[MPI_MAX_LIBRARY_VERSION_STRING];
@ -98,33 +135,31 @@ inline std::string GetMPIversion( void )
  MPI_Get_library_version(mpi_lib_ver, &len);
  return std::string(mpi_lib_ver);
 }
-
+} // namespace MPI
 #else
-  #if defined(_OPENMP)
+#if defined(_OPENMP)
-    #include <omp.h>
+#include <omp.h>
-    inline double get_wtime()
+inline double get_wtime()
-    {
+{
  return omp_get_wtime();
-    }
+}
-  #else
+#else
-    #include <ctime>
+#include <ctime>
-    inline double get_wtime()
+inline double get_wtime()
-    {
+{
  return std::clock() / double(CLOCKS_PER_SEC);
-    }
+}
-  #endif
+#endif
 #endif
-inline void multitask_sync_barrier( void )
+inline void multitask_sync_barrier(void)
 {
 #if defined(USE_MPI)
-  MPI_Barrier( MPI_COMM_WORLD );
+  MPI_Barrier(MPI_COMM_WORLD);
 #endif
 }
 namespace CONFIG
 {
 extern int MPI_thread_support;
@ -135,13 +170,3 @@ extern bool MPI_threads_ok;
 extern bool FFTW_threads_ok;
 extern int num_threads;
 } // namespace CONFIG
 // These variables are autogenerated and compiled
 // into the library by the version.cmake script
 extern "C"
 {
    extern const char* GIT_TAG;
    extern const char* GIT_REV;
    extern const char* GIT_BRANCH;
 }
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@ -4,7 +4,7 @@
 #include <array>
 #include <vector>
-#include <vec3.hh>
+#include <math/vec3.hh>
 #include <general.hh>
 #include <bounding_box.hh>
 #include <typeinfo>
@ -16,22 +16,26 @@ enum space_t
 };
-template <typename data_t>
+#ifdef USE_MPI
 template <typename data_t_, bool bdistributed=true>
 #else
 template <typename data_t_, bool bdistributed=false>
 #endif
 class Grid_FFT
 {
 public:
    using data_t = data_t_;
    static constexpr bool is_distributed_trait{bdistributed};
 protected:
-#if defined(USE_MPI)
+    using grid_fft_t = Grid_FFT<data_t,bdistributed>;
-    const MPI_Datatype MPI_data_t_type = (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
+    
                                                                            : (typeid(data_t) == typeid(float)) ? MPI_FLOAT
                                                                                                                : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
                                                                                                                                                                  : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX : MPI_INT;
 #endif
 public:
    std::array<size_t, 3> n_, nhalf_;
    std::array<size_t, 4> sizes_;
    size_t npr_, npc_;
    size_t ntot_;
-    std::array<real_t, 3> length_, kfac_, dx_;
+    std::array<real_t, 3> length_, kfac_, kny_, dx_;
    space_t space_;
    data_t *data_;
@ -54,7 +58,7 @@ public:
    }
    // avoid implicit copying of data
-    Grid_FFT(const Grid_FFT<data_t> &g) = delete;
+    Grid_FFT(const grid_fft_t &g) = delete;
    ~Grid_FFT()
    {
@ -64,34 +68,48 @@ public:
        }
    }
-    const Grid_FFT<data_t> *get_grid(size_t ilevel) const { return this; }
+    const grid_fft_t *get_grid(size_t ilevel) const { return this; }
    bool is_distributed( void ) const noexcept { return bdistributed; }
    void Setup();
    //! return the number of data_t elements that we store in the container
    size_t memsize( void ) const noexcept { return ntot_; }
    //! return the (local) size of dimension i
-    size_t size(size_t i) const { return sizes_[i]; }
+    size_t size(size_t i) const noexcept { assert(i<4); return sizes_[i]; }
    //! return the (global) size of dimension i
-    size_t global_size(size_t i) const { return n_[i]; }
+    size_t global_size(size_t i) const noexcept { assert(i<3); return n_[i]; }
    //! return locally stored number of elements of field
-    size_t local_size(void) const { return local_0_size_ * n_[1] * n_[2]; }
+    size_t local_size(void) const noexcept { return local_0_size_ * n_[1] * n_[2]; }
    //! return a bounding box of the global extent of the field
-    const bounding_box<size_t> &get_global_range(void) const
+    const bounding_box<size_t> &get_global_range(void) const noexcept
    {
        return global_range_;
    }
    bool is_nyquist_mode( size_t i, size_t j, size_t k ) const
    {
        assert( this->space_ == kspace_id );
        bool bres = (i+local_1_start_ == n_[1]/2);
        bres |= (j == n_[0]/2);
        bres |= (k == n_[2]/2);
        return bres;
    }
    //! set all field elements to zero
-    void zero()
+    void zero() noexcept
    {
 #pragma omp parallel for
        for (size_t i = 0; i < ntot_; ++i)
            data_[i] = 0.0;
    }
-    void copy_from(const Grid_FFT<data_t> &g)
+    void copy_from(const grid_fft_t &g)
    {
        // make sure the two fields are in the same space
        if (g.space_ != this->space_)
@ -113,49 +131,49 @@ public:
            data_[i] = g.data_[i];
    }
-    data_t &operator[](size_t i)
+    data_t &operator[](size_t i) noexcept
    {
        return data_[i];
    }
-    data_t &relem(size_t i, size_t j, size_t k)
+    data_t &relem(size_t i, size_t j, size_t k) noexcept 
    {
        size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
        return data_[idx];
    }
-    const data_t &relem(size_t i, size_t j, size_t k) const
+    const data_t &relem(size_t i, size_t j, size_t k) const noexcept
    {
        size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
        return data_[idx];
    }
-    ccomplex_t &kelem(size_t i, size_t j, size_t k)
+    ccomplex_t &kelem(size_t i, size_t j, size_t k) noexcept
    {
        size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
        return cdata_[idx];
    }
-    const ccomplex_t &kelem(size_t i, size_t j, size_t k) const
+    const ccomplex_t &kelem(size_t i, size_t j, size_t k) const noexcept
    {
        size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
        return cdata_[idx];
    }
-    ccomplex_t &kelem(size_t idx) { return cdata_[idx]; }
+    ccomplex_t &kelem(size_t idx) noexcept { return cdata_[idx]; }
-    const ccomplex_t &kelem(size_t idx) const { return cdata_[idx]; }
+    const ccomplex_t &kelem(size_t idx) const noexcept { return cdata_[idx]; }
-    data_t &relem(size_t idx) { return data_[idx]; }
+    data_t &relem(size_t idx) noexcept { return data_[idx]; }
-    const data_t &relem(size_t idx) const { return data_[idx]; }
+    const data_t &relem(size_t idx) const noexcept { return data_[idx]; }
-    size_t get_idx(size_t i, size_t j, size_t k) const
+    size_t get_idx(size_t i, size_t j, size_t k) const noexcept
    {
        return (i * sizes_[1] + j) * sizes_[3] + k;
    }
    template <typename ft>
-    vec3<ft> get_r(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_r(const size_t i, const size_t j, const size_t k) const noexcept
    {
-        vec3<ft> rr;
+        vec3_t<ft> rr;
        rr[0] = real_t(i + local_0_start_) * dx_[0];
        rr[1] = real_t(j) * dx_[1];
@ -165,9 +183,9 @@ public:
    }
    template <typename ft>
-    vec3<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const noexcept
    {
-        vec3<ft> rr;
+        vec3_t<ft> rr;
        rr[0] = real_t(i + local_0_start_) / real_t(n_[0]);
        rr[1] = real_t(j) / real_t(n_[1]);
@ -177,91 +195,155 @@ public:
    }
    template <typename ft>
-    vec3<ft> get_unit_r_staggered(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t<real_t> s) const noexcept
    {
-        vec3<ft> rr;
+        vec3_t<ft> rr;
-        rr[0] = (real_t(i + local_0_start_) + 0.5) / real_t(n_[0]);
+        rr[0] = (real_t(i + local_0_start_) + s.x) / real_t(n_[0]);
-        rr[1] = (real_t(j) + 0.5) / real_t(n_[1]);
+        rr[1] = (real_t(j) + s.y) / real_t(n_[1]);
-        rr[2] = (real_t(k) + 0.5) / real_t(n_[2]);
+        rr[2] = (real_t(k) + s.z) / real_t(n_[2]);
        return rr;
    }
-    template <typename ft>
+    vec3_t<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const noexcept
    vec3<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, double sx, double sy, double sz) const
    {
-        vec3<ft> rr;
+        return vec3_t<size_t>({i + local_0_start_, j, k});
        rr[0] = (real_t(i + local_0_start_) + sx) / real_t(n_[0]);
        rr[1] = (real_t(j) + sy) / real_t(n_[1]);
        rr[2] = (real_t(k) + sz) / real_t(n_[2]);
        return rr;
    }
-    void cell_pos(int ilevel, size_t i, size_t j, size_t k, double *x) const
+    size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const noexcept
    {
        x[0] = double(i + local_0_start_) / size(0);
        x[1] = double(j) / size(1);
        x[2] = double(k) / size(2);
    }
    vec3<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const
    {
        return vec3<size_t>({i + local_0_start_, j, k});
    }
    size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const
    {
        return ((i + local_0_start_) * size(1) + j) * size(2) + k;
    }
-    size_t count_leaf_cells(int, int) const
+    //! deprecated function, was needed for old output plugin
    size_t count_leaf_cells(int, int) const noexcept
    {
        return n_[0] * n_[1] * n_[2];
    }
-    real_t get_dx(int idim) const
+    real_t get_dx(int idim) const noexcept
    {
        assert(idim<3&&idim>=0);
        return dx_[idim];
    }
-    const std::array<real_t, 3> &get_dx(void) const
+    const std::array<real_t, 3> &get_dx(void) const noexcept
    {
        return dx_;
    }
    template <typename ft>
-    vec3<ft> get_k(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_k(const size_t i, const size_t j, const size_t k) const noexcept
    {
-        vec3<ft> kk;
+        vec3_t<ft> kk;
-
+        if( bdistributed ){
 #if defined(USE_MPI)
            auto ip = i + local_1_start_;
            kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
            kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1];
-#else
+        }else{
            kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
            kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
-#endif
+        }
        kk[2] = (real_t(k) - real_t(k > nhalf_[2]) * n_[2]) * kfac_[2];
        return kk;
    }
    template <typename ft>
    vec3_t<ft> get_k(const real_t i, const real_t j, const real_t k) const noexcept
    {
        vec3_t<ft> kk;
        if( bdistributed ){
            auto ip = i + real_t(local_1_start_);
            kk[0] = (j - real_t(j > real_t(nhalf_[0])) * n_[0]) * kfac_[0];
            kk[1] = (ip - real_t(ip > real_t(nhalf_[1])) * n_[1]) * kfac_[1];
        }else{
            kk[0] = (real_t(i) - real_t(i > real_t(nhalf_[0])) * n_[0]) * kfac_[0];
            kk[1] = (real_t(j) - real_t(j > real_t(nhalf_[1])) * n_[1]) * kfac_[1];
        }
        kk[2] = (real_t(k) - real_t(k > real_t(nhalf_[2])) * n_[2]) * kfac_[2];
        return kk;
    }
    std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const noexcept
    {
        return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
    }
    data_t get_cic( const vec3_t<real_t>& v ) const noexcept
    {
        // warning! this doesn't work with MPI
        vec3_t<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
                        std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
                        std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] });
        size_t ix = static_cast<size_t>(x.x);
        size_t iy = static_cast<size_t>(x.y);
        size_t iz = static_cast<size_t>(x.z);
        real_t dx = x.x-real_t(ix), tx = 1.0-dx;
        real_t dy = x.y-real_t(iy), ty = 1.0-dy;
        real_t dz = x.z-real_t(iz), tz = 1.0-dz;
        size_t ix1 = (ix+1)%n_[0];
        size_t iy1 = (iy+1)%n_[1];
        size_t iz1 = (iz+1)%n_[2];
        data_t val = 0.0;
        val += this->relem(ix ,iy ,iz ) * tx * ty * tz;
        val += this->relem(ix ,iy ,iz1) * tx * ty * dz;
        val += this->relem(ix ,iy1,iz ) * tx * dy * tz;
        val += this->relem(ix ,iy1,iz1) * tx * dy * dz;
        val += this->relem(ix1,iy ,iz ) * dx * ty * tz;
        val += this->relem(ix1,iy ,iz1) * dx * ty * dz;
        val += this->relem(ix1,iy1,iz ) * dx * dy * tz;
        val += this->relem(ix1,iy1,iz1) * dx * dy * dz;
        return val;
    }
    ccomplex_t get_cic_kspace( const vec3_t<real_t> x ) const noexcept
    {
        // warning! this doesn't work with MPI
        int ix = static_cast<int>(std::floor(x.x));
        int iy = static_cast<int>(std::floor(x.y));
        int iz = static_cast<int>(std::floor(x.z));
        real_t dx = x.x-real_t(ix), tx = 1.0-dx;
        real_t dy = x.y-real_t(iy), ty = 1.0-dy;
        real_t dz = x.z-real_t(iz), tz = 1.0-dz;
        size_t ix1 = (ix+1)%size(0);
        size_t iy1 = (iy+1)%size(1);
        size_t iz1 = std::min((iz+1),int(size(2))-1);
        ccomplex_t val = 0.0;
        val += this->kelem(ix ,iy ,iz ) * tx * ty * tz;
        val += this->kelem(ix ,iy ,iz1) * tx * ty * dz;
        val += this->kelem(ix ,iy1,iz ) * tx * dy * tz;
        val += this->kelem(ix ,iy1,iz1) * tx * dy * dz;
        val += this->kelem(ix1,iy ,iz ) * dx * ty * tz;
        val += this->kelem(ix1,iy ,iz1) * dx * ty * dz;
        val += this->kelem(ix1,iy1,iz ) * dx * dy * tz;
        val += this->kelem(ix1,iy1,iz1) * dx * dy * dz;
        // if( val != val ){
           //auto k = this->get_k<real_t>(ix,iy,iz);
           //std::cerr << ix << " " << iy << " " << iz << " " << val << " " <<  this->gradient(0,{ix,iy,iz}) << " " <<  this->gradient(1,{ix,iy,iz}) << " " <<  this->gradient(2,{ix,iy,iz}) << std::endl;
        // }
        return val;
    }
    inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
    {
-#if defined(USE_MPI)
+        if( bdistributed ){
            ijk[0] += local_1_start_;
            std::swap(ijk[0],ijk[1]);
-#endif
+        }
        real_t rgrad = 
            (ijk[idim]!=nhalf_[idim])? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_[idim]) * n_[idim]) * kfac_[idim] : 0.0; 
        return ccomplex_t(0.0,rgrad);
    }
-    Grid_FFT<data_t> &operator*=(data_t x)
+    inline real_t laplacian( const std::array<size_t,3>& ijk ) const noexcept
    {
        return -this->get_k<real_t>(ijk[0],ijk[1],ijk[2]).norm_squared();
    }
    grid_fft_t &operator*=(data_t x)
    {
        if (space_ == kspace_id)
        {
@ -274,7 +356,7 @@ public:
        return *this;
    }
-    Grid_FFT<data_t> &operator/=(data_t x)
+    grid_fft_t &operator/=(data_t x)
    {
        if (space_ == kspace_id)
        {
@ -287,7 +369,7 @@ public:
        return *this;
    }
-    Grid_FFT<data_t> &apply_Laplacian(void)
+    grid_fft_t &apply_Laplacian(void)
    {
        this->FourierTransformForward();
        this->apply_function_k_dep([&](auto x, auto k) {
@ -298,7 +380,7 @@ public:
        return *this;
    }
-    Grid_FFT<data_t> &apply_negative_Laplacian(void)
+    grid_fft_t &apply_negative_Laplacian(void)
    {
        this->FourierTransformForward();
        this->apply_function_k_dep([&](auto x, auto k) {
@ -309,7 +391,7 @@ public:
        return *this;
    }
-    Grid_FFT<data_t> &apply_InverseLaplacian(void)
+    grid_fft_t &apply_InverseLaplacian(void)
    {
        this->FourierTransformForward();
        this->apply_function_k_dep([&](auto x, auto k) {
@ -354,11 +436,10 @@ public:
        }
    }
-    double compute_2norm(void)
+    real_t compute_2norm(void) const
    {
        real_t sum1{0.0};
-#pragma omp parallel for reduction(+ \
+        #pragma omp parallel for reduction(+ : sum1)
                                   : sum1)
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
@ -377,28 +458,28 @@ public:
        return sum1;
    }
-    double std(void)
+    real_t std(void) const
    {
        double sum1{0.0}, sum2{0.0};
        size_t count{0};
-#pragma omp parallel for reduction(+ \
+        #pragma omp parallel for reduction(+ : sum1, sum2)
                                   : sum1, sum2)
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
            {
                for (size_t k = 0; k < sizes_[2]; ++k)
                {
-                    const auto elem = std::real(this->relem(i, j, k));
+                    const auto elem = (space_==kspace_id)? this->kelem(i, j, k) : this->relem(i, j, k);
-                    sum1 += elem;
+                    sum1 += std::real(elem);
-                    sum2 += elem * elem;
+                    sum2 += std::norm(elem);// * elem;
                }
            }
        }
        count = sizes_[0] * sizes_[1] * sizes_[2];
 #ifdef USE_MPI
        if( bdistributed ){
            double globsum1{0.0}, globsum2{0.0};
            size_t globcount{0};
@ -417,20 +498,20 @@ public:
            sum1 = globsum1;
            sum2 = globsum2;
            count = globcount;
        }
 #endif
        sum1 /= count;
        sum2 /= count;
-        return std::sqrt(sum2 - sum1 * sum1);
+        return real_t(std::sqrt(sum2 - sum1 * sum1));
    }
-    double mean(void)
+    real_t mean(void) const
    {
        double sum1{0.0};
        size_t count{0};
-#pragma omp parallel for reduction(+ \
+        #pragma omp parallel for reduction(+ : sum1)
                                   : sum1)
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
@ -445,6 +526,7 @@ public:
        count = sizes_[0] * sizes_[1] * sizes_[2];
 #ifdef USE_MPI
        if( bdistributed ){
            double globsum1{0.0};
            size_t globcount{0};
@ -458,19 +540,20 @@ public:
            sum1 = globsum1;
            count = globcount;
        }
 #endif
        sum1 /= count;
-        return sum1;
+        return real_t(sum1);
    }
    template <typename functional, typename grid_t>
    void assign_function_of_grids_r(const functional &f, const grid_t &g)
    {
-        assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
+        assert(g.size(0) == size(0) && g.size(1) == size(1)); 
-#pragma omp parallel for
+        #pragma omp parallel for
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
@ -489,10 +572,10 @@ public:
    template <typename functional, typename grid1_t, typename grid2_t>
    void assign_function_of_grids_r(const functional &f, const grid1_t &g1, const grid2_t &g2)
    {
-        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g1.size(2) == size(2));
+        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); 
-        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
+        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); 
-#pragma omp parallel for
+        #pragma omp parallel for
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
@ -518,7 +601,7 @@ public:
        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
        assert(g3.size(0) == size(0) && g3.size(1) == size(1)); // && g3.size(2) == size(2));
-#pragma omp parallel for
+        #pragma omp parallel for
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
@ -543,7 +626,7 @@ public:
    {
        assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
-#pragma omp parallel for
+        #pragma omp parallel for
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
@ -565,7 +648,7 @@ public:
        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
-#pragma omp parallel for
+        #pragma omp parallel for
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
@ -582,18 +665,39 @@ public:
        }
    }
-    template <typename functional, typename grid1_t, typename grid2_t>
+    template <typename functional, typename grid_t>
-    void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2)
+    void assign_function_of_grids_kdep(const functional &f, const grid_t &g)
    {
-        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
+        assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
-#pragma omp parallel for
+        #pragma omp parallel for
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
            {
                for (size_t k = 0; k < sizes_[2]; ++k)
                {
                    auto &elem = this->kelem(i, j, k);
                    const auto &elemg = g.kelem(i, j, k);
                    elem = f(this->get_k<real_t>(i, j, k), elemg);
                }
            }
        }
    }
    template <typename functional, typename grid1_t, typename grid2_t>
    void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2)
    {
        assert(g1.size(0) == size(0) && g1.size(1) == size(1) && g1.size(2) == size(2) );
        assert(g2.size(0) == size(0) && g2.size(1) == size(1) && g2.size(2) == size(2) );
        #pragma omp parallel for
        for (size_t i = 0; i < size(0); ++i)
        {
            for (size_t j = 0; j < size(1); ++j)
            {
                for (size_t k = 0; k < size(2); ++k)
                {
                    auto &elem = this->kelem(i, j, k);
                    const auto &elemg1 = g1.kelem(i, j, k);
@ -608,7 +712,7 @@ public:
    template <typename functional>
    void apply_function_k_dep(const functional &f)
    {
-#pragma omp parallel for
+        #pragma omp parallel for
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
@ -625,7 +729,7 @@ public:
    template <typename functional>
    void apply_function_r_dep(const functional &f)
    {
-#pragma omp parallel for
+        #pragma omp parallel for
        for (size_t i = 0; i < sizes_[0]; ++i)
        {
            for (size_t j = 0; j < sizes_[1]; ++j)
@ -649,48 +753,31 @@ public:
    void Write_to_HDF5(std::string fname, std::string datasetname) const;
    void Read_from_HDF5( std::string fname, std::string datasetname );
    void Write_PowerSpectrum(std::string ofname);
    void Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count);
    void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3);
-    // void stagger_field(void)
+    void shift_field( const vec3_t<real_t>& s, bool transform_back=true )
    // {
    //     FourierTransformForward();
    //     apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
    //         real_t shift = k[0] * get_dx()[0] + k[1] * get_dx()[1] + k[2] * get_dx()[2];
    //         return x * std::exp(ccomplex_t(0.0, 0.5 * shift));
    //     });
    //     FourierTransformBackward();
    // }
    void shift_field( double sx, double sy, double sz )
    {
        FourierTransformForward();
        apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
-#ifdef WITH_MPI
+            real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
            real_t shift = sy * k[0] * get_dx()[0] + sx * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2];
 #else
            real_t shift = sx * k[0] * get_dx()[0] + sy * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2];
 #endif
            return x * std::exp(ccomplex_t(0.0, shift));
        });
        if( transform_back ){
            FourierTransformBackward();
        }
    void stagger_field(void)
    {
        this->shift_field( 0.5, 0.5, 0.5 );
    }
    void zero_DC_mode(void)
    {
        if (space_ == kspace_id)
        {
-#ifdef USE_MPI
+            if (CONFIG::MPI_task_rank == 0 || !bdistributed )
            if (CONFIG::MPI_task_rank == 0)
 #endif
                cdata_[0] = (data_t)0.0;
        }
        else
@ -707,12 +794,14 @@ public:
                    }
                }
            }
            if( bdistributed ){
 #if defined(USE_MPI)
                data_t glob_sum = 0.0;
                MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
-                          1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
+                            1, MPI::get_datatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
                sum = glob_sum;
 #endif
            }
            sum /= sizes_[0] * sizes_[1] * sizes_[2];
 #pragma omp parallel for
--- a/include/grid_interpolate.hh
+++ b/include/grid_interpolate.hh
@ -0,0 +1,191 @@
 #pragma once
 #include <array>
 #include <vector>
 #include <general.hh>
 #include <math/vec3.hh>
 template <int interp_order, typename grid_t>
 struct grid_interpolate
 {
  using data_t = typename grid_t::data_t;
  using vec3 = std::array<real_t, 3>;
  static constexpr bool is_distributed_trait = grid_t::is_distributed_trait;
  static constexpr int interpolation_order = interp_order;
  std::vector<data_t> boundary_;
  std::vector<int> local0starts_;
  const grid_t &gridref;
  size_t nx_, ny_, nz_;
  explicit grid_interpolate(const grid_t &g)
      : gridref(g), nx_(g.n_[0]), ny_(g.n_[1]), nz_(g.n_[2])
  {
    static_assert(interpolation_order >= 0 && interpolation_order <= 2, "Interpolation order needs to be 0 (NGP), 1 (CIC), or 2 (TSC).");
    if (is_distributed_trait)
    {
      update_ghosts( g );
    }
  }
  void update_ghosts( const grid_t &g )
  {
  #if defined(USE_MPI)
    int local_0_start = int(gridref.local_0_start_);
    local0starts_.assign(MPI::get_size(), 0);
    MPI_Allgather(&local_0_start, 1, MPI_INT, &local0starts_[0], 1, MPI_INT, MPI_COMM_WORLD);
    //... exchange boundary
    size_t nx = interpolation_order + 1;
    size_t ny = g.n_[1];
    size_t nz = g.n_[2];
    boundary_.assign(nx * ny * nz, data_t{0.0});
    for (size_t i = 0; i < nx; ++i)
    {
      for (size_t j = 0; j < ny; ++j)
      {
        for (size_t k = 0; k < nz; ++k)
        {
          boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k);
        }
      }
    }
    int sendto = (MPI::get_rank() + MPI::get_size() - 1) % MPI::get_size();
    int recvfrom = (MPI::get_rank() + MPI::get_size() + 1) % MPI::get_size();
    MPI_Status status;
    status.MPI_ERROR = MPI_SUCCESS;
    int err = MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype<data_t>(), sendto,
                          MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status);
    if( err != MPI_SUCCESS ){
      char errstr[256]; int errlen=256;
      MPI_Error_string(err, errstr, &errlen ); 
      music::elog << "MPI_ERROR #" << err << " : " << errstr << std::endl;
    }
 #endif
  }
  data_t get_ngp_at(const std::array<real_t, 3> &pos, std::vector<data_t> &val) const noexcept
  {
    size_t ix = static_cast<size_t>(pos[0]);
    size_t iy = static_cast<size_t>(pos[1]);
    size_t iz = static_cast<size_t>(pos[2]);
    return gridref.relem(ix - gridref.local_0_start_, iy, iz);
  }
  data_t get_cic_at(const std::array<real_t, 3> &pos) const noexcept
  {
    size_t ix = static_cast<size_t>(pos[0]);
    size_t iy = static_cast<size_t>(pos[1]);
    size_t iz = static_cast<size_t>(pos[2]);
    real_t dx = pos[0] - real_t(ix), tx = 1.0 - dx;
    real_t dy = pos[1] - real_t(iy), ty = 1.0 - dy;
    real_t dz = pos[2] - real_t(iz), tz = 1.0 - dz;
    size_t iy1 = (iy + 1) % ny_;
    size_t iz1 = (iz + 1) % nz_;
    data_t val{0.0};
    if( is_distributed_trait ){
      ptrdiff_t localix = ix-gridref.local_0_start_;
      val += gridref.relem(localix, iy, iz) * tx * ty * tz;
      val += gridref.relem(localix, iy, iz1) * tx * ty * dz;
      val += gridref.relem(localix, iy1, iz) * tx * dy * tz;
      val += gridref.relem(localix, iy1, iz1) * tx * dy * dz;
      if( localix+1 >= gridref.local_0_size_ ){
        size_t localix1 = localix+1 - gridref.local_0_size_;
        val += boundary_[(localix1*ny_+iy)*nz_+iz] * dx * ty * tz;
        val += boundary_[(localix1*ny_+iy)*nz_+iz1] * dx * ty * dz;
        val += boundary_[(localix1*ny_+iy1)*nz_+iz] * dx * dy * tz;
        val += boundary_[(localix1*ny_+iy1)*nz_+iz1] * dx * dy * dz;
      }else{
        size_t localix1 = localix+1;
        val += gridref.relem(localix1, iy, iz) * dx * ty * tz;
        val += gridref.relem(localix1, iy, iz1) * dx * ty * dz;
        val += gridref.relem(localix1, iy1, iz) * dx * dy * tz;
        val += gridref.relem(localix1, iy1, iz1) * dx * dy * dz;
      }
    }else{
      size_t ix1 = (ix + 1) % nx_;
      val += gridref.relem(ix, iy, iz) * tx * ty * tz;
      val += gridref.relem(ix, iy, iz1) * tx * ty * dz;
      val += gridref.relem(ix, iy1, iz) * tx * dy * tz;
      val += gridref.relem(ix, iy1, iz1) * tx * dy * dz;
      val += gridref.relem(ix1, iy, iz) * dx * ty * tz;
      val += gridref.relem(ix1, iy, iz1) * dx * ty * dz;
      val += gridref.relem(ix1, iy1, iz) * dx * dy * tz;
      val += gridref.relem(ix1, iy1, iz1) * dx * dy * dz;
    }
    return val;
  }
  // data_t get_tsc_at(const std::array<real_t, 3> &pos, std::vector<data_t> &val) const
  // {
  // }
  int get_task(const vec3 &x) const noexcept
  {
    const auto it = std::upper_bound(local0starts_.begin(), local0starts_.end(), int(x[0]));
    return std::distance(local0starts_.begin(), it)-1;
  }
  void domain_decompose_pos(std::vector<vec3> &pos) const noexcept
  {
    if (is_distributed_trait)
    {
 #if defined(USE_MPI)
      std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1) < get_task(x2); });
      std::vector<int> sendcounts(MPI::get_size(), 0), sendoffsets(MPI::get_size(), 0);
      std::vector<int> recvcounts(MPI::get_size(), 0), recvoffsets(MPI::get_size(), 0);
      for (auto x : pos)
      {
        sendcounts[get_task(x)] += 3;
      }
      MPI_Alltoall(&sendcounts[0], 1, MPI_INT, &recvcounts[0], 1, MPI_INT, MPI_COMM_WORLD);
      size_t tot_receive = recvcounts[0], tot_send = sendcounts[0];
      for (int i = 1; i < MPI::get_size(); ++i)
      {
        sendoffsets[i] = sendcounts[i - 1] + sendoffsets[i - 1];
        recvoffsets[i] = recvcounts[i - 1] + recvoffsets[i - 1];
        tot_receive += recvcounts[i];
        tot_send += sendcounts[i];
      }
      std::vector<vec3> recvbuf(tot_receive/3,{0.,0.,0.});
      MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI::get_datatype<real_t>(),
                    &recvbuf[0], &recvcounts[0], &recvoffsets[0], MPI::get_datatype<real_t>(), MPI_COMM_WORLD);
      pos.swap( recvbuf );
 #endif
    }
  }
  ccomplex_t compensation_kernel( const vec3_t<real_t>& k ) const noexcept
  {
    auto sinc = []( real_t x ){ return (std::abs(x)>1e-10)? std::sin(x)/x : 1.0; };
    real_t dfx = sinc(0.5*M_PI*k[0]/gridref.kny_[0]);
    real_t dfy = sinc(0.5*M_PI*k[1]/gridref.kny_[1]);
    real_t dfz = sinc(0.5*M_PI*k[2]/gridref.kny_[2]);
    real_t del = std::pow(dfx*dfy*dfz,1+interpolation_order);
    real_t shift = 0.5 * k[0] * gridref.get_dx()[0] + 0.5 * k[1] * gridref.get_dx()[1] + 0.5 * k[2] * gridref.get_dx()[2];
    return std::exp(ccomplex_t(0.0, shift)) / del;
  }
 };
--- a/include/ic_generator.hh
+++ b/include/ic_generator.hh
@ -9,12 +9,12 @@
 namespace ic_generator{
-    int Run( ConfigFile& the_config );
+    int Run( config_file& the_config );
-    int Initialise( ConfigFile& the_config );
+    int Initialise( config_file& the_config );
    extern std::unique_ptr<RNG_plugin> the_random_number_generator;
    extern std::unique_ptr<output_plugin> the_output_plugin;
-    extern std::unique_ptr<CosmologyCalculator>  the_cosmo_calc;
+    extern std::unique_ptr<cosmology::calculator>  the_cosmo_calc;
 }
--- a/include/logger.hh
+++ b/include/logger.hh
@ -6,35 +6,35 @@
 #include <fstream>
 #include <iostream>
-namespace csoca {
+namespace music {
-enum LogLevel : int {
+enum log_level : int {
-  Off     = 0,
+  off     = 0,
-  Fatal   = 1,
+  fatal   = 1,
-  Error   = 2,
+  error   = 2,
-  Warning = 3,
+  warning = 3,
-  Info    = 4,
+  info    = 4,
-  Debug   = 5
+  debug   = 5
 };
-class Logger {
+class logger {
 private:
-  static LogLevel log_level_;
+  static log_level log_level_;
  static std::ofstream output_file_;
 public:
-  Logger()  = default;
+  logger()  = default;
-  ~Logger() = default;
+  ~logger() = default;
-  static void SetLevel(const LogLevel &level);
+  static void set_level(const log_level &level);
-  static LogLevel GetLevel();
+  static log_level get_level();
-  static void SetOutput(const std::string filename);
+  static void set_output(const std::string filename);
-  static void UnsetOutput();
+  static void unset_output();
-  static std::ofstream &GetOutput();
+  static std::ofstream &get_output();
-  template <typename T> Logger &operator<<(const T &item) {
+  template <typename T> logger &operator<<(const T &item) {
    std::cout << item;
    if (output_file_.is_open()) {
      output_file_ << item;
@ -42,7 +42,7 @@ public:
    return *this;
  }
-  Logger &operator<<(std::ostream &(*fp)(std::ostream &)) {
+  logger &operator<<(std::ostream &(*fp)(std::ostream &)) {
    std::cout << fp;
    if (output_file_.is_open()) {
      output_file_ << fp;
@ -51,32 +51,32 @@ public:
  }
 };
-class LogStream {
+class log_stream {
 private:
-  Logger &logger_;
+  logger &logger_;
-  LogLevel stream_level_;
+  log_level stream_level_;
  std::string line_prefix_, line_postfix_;
  bool newline;
 public:
-  LogStream(Logger &logger, const LogLevel &level)
+  log_stream(logger &logger, const log_level &level)
    : logger_(logger), stream_level_(level), newline(true) {
    switch (stream_level_) {
-      case LogLevel::Fatal:
+      case log_level::fatal:
        line_prefix_ = "\033[31mFatal : ";
        break;
-      case LogLevel::Error:
+      case log_level::error:
        line_prefix_ = "\033[31mError : ";
        break;
-      case LogLevel::Warning:
+      case log_level::warning:
        line_prefix_ = "\033[33mWarning : ";
        break;
-      case LogLevel::Info:
+      case log_level::info:
        //line_prefix_ = " | Info    | ";
        line_prefix_ = " \033[0m";
        break;
-      case LogLevel::Debug:
+      case log_level::debug:
        line_prefix_ = "Debug : \033[0m";
        break;
      default:
@ -85,14 +85,14 @@ public:
    }
    line_postfix_ = "\033[0m";
  }
-  ~LogStream() = default;
+  ~log_stream() = default;
  inline std::string GetPrefix() const {
    return line_prefix_;
  }
-  template <typename T> LogStream &operator<<(const T &item) {
+  template <typename T> log_stream &operator<<(const T &item) {
-    if (Logger::GetLevel() >= stream_level_) {
+    if (logger::get_level() >= stream_level_) {
      if (newline) {
        logger_ << line_prefix_;
        newline = false;
@ -102,8 +102,8 @@ public:
    return *this;
  }
-  LogStream &operator<<(std::ostream &(*fp)(std::ostream &)) {
+  log_stream &operator<<(std::ostream &(*fp)(std::ostream &)) {
-    if (Logger::GetLevel() >= stream_level_) {
+    if (logger::get_level() >= stream_level_) {
      logger_ << fp;
      logger_ << line_postfix_;
      newline = true;
@ -125,11 +125,11 @@ public:
 };
 // global instantiations for different levels
-extern Logger glogger;
+extern logger glogger;
-extern LogStream flog;
+extern log_stream flog;
-extern LogStream elog;
+extern log_stream elog;
-extern LogStream wlog;
+extern log_stream wlog;
-extern LogStream ilog;
+extern log_stream ilog;
-extern LogStream dlog;
+extern log_stream dlog;
-} // namespace csoca
+} // namespace music
--- a/include/math/interpolate.hh
+++ b/include/math/interpolate.hh
@ -0,0 +1,68 @@
 #pragma once
 #include <vector>
 #include <cassert>
 #include <gsl/gsl_spline.h>
 #include <gsl/gsl_errno.h>
 template <bool logx, bool logy, bool periodic>
 class interpolated_function_1d
 {
 private:
  bool isinit_;
  std::vector<double> data_x_, data_y_;
  gsl_interp_accel *gsl_ia_;
  gsl_spline *gsl_sp_;
  void deallocate()
  {
    gsl_spline_free(gsl_sp_);
    gsl_interp_accel_free(gsl_ia_);
  }
 public:
  interpolated_function_1d(const interpolated_function_1d &) = delete;
  interpolated_function_1d() : isinit_(false){}
  interpolated_function_1d(const std::vector<double> &data_x, const std::vector<double> &data_y)
  : isinit_(false)
  {
    this->set_data( data_x, data_y );
  }
  ~interpolated_function_1d()
  {
    if (isinit_) this->deallocate();
  }
  void set_data(const std::vector<double> &data_x, const std::vector<double> &data_y)
  {
    data_x_ = data_x;
    data_y_ = data_y;
    assert(data_x_.size() == data_y_.size());
    assert(data_x_.size() > 5);
    assert(!(logx & periodic));
    if (logx) for (auto &d : data_x_) d = std::log(d);
    if (logy) for (auto &d : data_y_) d = std::log(d);
    if (isinit_) this->deallocate();
    gsl_ia_ = gsl_interp_accel_alloc();
    gsl_sp_ = gsl_spline_alloc(periodic ? gsl_interp_cspline_periodic : gsl_interp_cspline, data_x_.size());
    gsl_spline_init(gsl_sp_, &data_x_[0], &data_y_[0], data_x_.size());
    isinit_ = true;
  }
  double operator()(double x) const noexcept
  {
    assert( isinit_ && !(logx&&x<=0.0) );
    double xa = logx ? std::log(x) : x;
    double y(gsl_spline_eval(gsl_sp_, xa, gsl_ia_));
    return logy ? std::exp(y) : y;
  }
 };
--- a/include/math/mat3.hh
+++ b/include/math/mat3.hh
@ -0,0 +1,146 @@
 #include <gsl/gsl_math.h>
 #include <gsl/gsl_eigen.h>
 #include <math/vec3.hh>
 template<typename T>
 class mat3_t{
 protected:
    std::array<T,9> data_;
    gsl_matrix_view m_;
    gsl_vector *eval_;
    gsl_matrix *evec_;
 	gsl_eigen_symmv_workspace * wsp_;
    bool bdid_alloc_gsl_;
    void init_gsl(){
        // allocate memory for GSL operations if we haven't done so yet
        if( !bdid_alloc_gsl_ )
        {
            m_ = gsl_matrix_view_array (&data_[0], 3, 3);
            eval_ = gsl_vector_alloc (3);
            evec_ = gsl_matrix_alloc (3, 3);
            wsp_ = gsl_eigen_symmv_alloc (3);
            bdid_alloc_gsl_ = true;
        }
    }
    void free_gsl(){
        // free memory for GSL operations if it was allocated
        if( bdid_alloc_gsl_ )
        {
            gsl_eigen_symmv_free (wsp_);
            gsl_vector_free (eval_);
            gsl_matrix_free (evec_);
        }
    }
 public:
    mat3_t()
    : bdid_alloc_gsl_(false) 
    {}
    //! copy constructor
    mat3_t( const mat3_t<T> &m)
    : data_(m.data_), bdid_alloc_gsl_(false) 
    {}
    //! move constructor
    mat3_t( mat3_t<T> &&m)
    : data_(std::move(m.data_)), bdid_alloc_gsl_(false) 
    {}
    //! construct mat3_t from initializer list
    template<typename ...E>
    mat3_t(E&&...e) 
    : data_{{std::forward<E>(e)...}}, bdid_alloc_gsl_(false)
    {}
    mat3_t<T>& operator=(const mat3_t<T>& m) noexcept{
        data_ = m.data_;
        return *this;
    }
    mat3_t<T>& operator=(const mat3_t<T>&& m) noexcept{
        data_ = std::move(m.data_);
        return *this;
    }
    //! destructor
    ~mat3_t(){
        this->free_gsl();
    }
    //! bracket index access to vector components
    T &operator[](size_t i) noexcept { return data_[i];}
    //! const bracket index access to vector components
    const T &operator[](size_t i) const noexcept { return data_[i]; }
    //! matrix 2d index access
    T &operator()(size_t i, size_t j) noexcept { return data_[3*i+j]; }
    //! const matrix 2d index access
    const T &operator()(size_t i, size_t j) const noexcept { return data_[3*i+j]; }
    //! in-place addition
    mat3_t<T>& operator+=( const mat3_t<T>& rhs ) noexcept{
        for (size_t i = 0; i < 9; ++i) {
           (*this)[i] += rhs[i];
        }
        return *this;
    }
    //! in-place subtraction
    mat3_t<T>& operator-=( const mat3_t<T>& rhs ) noexcept{
        for (size_t i = 0; i < 9; ++i) {
           (*this)[i] -= rhs[i];
        }
        return *this;
    }
    void zero() noexcept{
        for (size_t i = 0; i < 9; ++i) data_[i]=0;
    }
    void eigen( vec3_t<T>& evals, vec3_t<T>& evec1, vec3_t<T>& evec2, vec3_t<T>& evec3_t )
    {
        this->init_gsl();
        gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_);
        gsl_eigen_symmv_sort (eval_, evec_, GSL_EIGEN_SORT_VAL_ASC);
        for( int i=0; i<3; ++i ){
            evals[i] = gsl_vector_get( eval_, i );
            evec1[i] = gsl_matrix_get( evec_, i, 0 );
            evec2[i] = gsl_matrix_get( evec_, i, 1 );
            evec3_t[i] = gsl_matrix_get( evec_, i, 2 );
        }
    }
 };
 template<typename T>
 constexpr const mat3_t<T> operator+(const mat3_t<T> &lhs, const mat3_t<T> &rhs) noexcept
 {
    mat3_t<T> result;
    for (size_t i = 0; i < 9; ++i) {
        result[i] = lhs[i] + rhs[i];
    }
    return result;
 }
 // matrix - vector multiplication
 template<typename T>
 inline vec3_t<T> operator*( const mat3_t<T> &A, const vec3_t<T> &v ) noexcept
 {
    vec3_t<T> result;
    for( int mu=0; mu<3; ++mu ){
        result[mu] = 0.0;
        for( int nu=0; nu<3; ++nu ){
            result[mu] += A(mu,nu)*v[nu];
        }
    }
    return result;
 }
--- a/include/math/ode_integrate.hh
+++ b/include/math/ode_integrate.hh
@ -0,0 +1,103 @@
 #pragma once
 /*******************************************************************************\
 odetools.hh - This file is part of MUSIC2 -
 a code to generate initial conditions for cosmological simulations 
 CHANGELOG (only majors, for details see repo):
    06/2019 - Oliver Hahn - first implementation
 \*******************************************************************************/
 namespace ode_integrate
 {
 // simple Runge-Kutta 4th order step without error estimate
 template <typename vector_t, typename function_t>
 inline void rk4_step(double h, double &t, vector_t &y, function_t f)
 {
    vector_t k1(h * f(t, y));
    vector_t k2(h * f(t + h / 2, y + k1 / 2));
    vector_t k3(h * f(t + h / 2, y + k2 / 2));
    vector_t k4(h * f(t + h, y + k3));
    y += (k1 + 2 * k2 + 2 * k3 + k4) / 6;
    t += h;
 }
 // Cash-Karp modified Runge-Kutta scheme, 5th order with 4th order error estimate
 // see Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration"
 // in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060
 template <typename vector_t, typename function_t>
 inline vector_t ckrk5_step(double h, double &t, vector_t &y, function_t f)
 {
  static constexpr double
      a2 = 0.20,
      a3 = 0.30, a4 = 0.60, a5 = 1.0, a6 = 0.8750,
      b21 = 0.20,
      b31 = 3.0 / 40.0, b32 = 9.0 / 40.0,
      b41 = 0.30, b42 = -0.90, b43 = 1.20,
      b51 = -11.0 / 54.0, b52 = 2.50, b53 = -70.0 / 27.0, b54 = 35.0 / 27.0,
      b61 = 1631.0 / 55296.0, b62 = 175.0 / 512.0, b63 = 575.0 / 13824.0, b64 = 44275.0 / 110592.0, b65 = 253.0 / 4096.0,
      c1 = 37.0 / 378.0, c3 = 250.0 / 621.0, c4 = 125.0 / 594.0, c6 = 512.0 / 1771.0,
      dc1 = c1 - 2825.0 / 27648.0, dc3 = c3 - 18575.0 / 48384.0,
      dc4 = c4 - 13525.0 / 55296.0, dc5 = -277.0 / 14336.0, dc6 = c6 - 0.250;
  vector_t k1(h * f(t, y));
  vector_t k2(h * f(t + a2 * h, y + b21 * k1));
  vector_t k3(h * f(t + a3 * h, y + b31 * k1 + b32 * k2));
  vector_t k4(h * f(t + a4 * h, y + b41 * k1 + b42 * k2 + b43 * k3));
  vector_t k5(h * f(t + a5 * h, y + b51 * k1 + b52 * k2 + b53 * k3 + b54 * k4));
  vector_t k6(h * f(t + a6 * h, y + b61 * k1 + b62 * k2 + b63 * k3 + b64 * k4 + b65 * k5));
  y += c1 * k1 + c3 * k3 + c4 * k4 + c6 * k6;
  return dc1 * k1 + dc3 * k3 + dc4 * k4 + dc5 * k5 + dc6 * k6;
 }
 // Adaptive step-size quality-controlled routine for ckrk5_step, see
 // Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration"
 // in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060
 template <typename vector_t, typename function_t>
 inline void rk_step_qs(double htry, double &t, vector_t &y, vector_t &yscale, function_t f, double eps, double &hdid, double &hnext)
 {
  static constexpr double SAFETY{0.9};
  static constexpr double PSHRNK{-0.25};
  static constexpr double PGROW{-0.2};
  static constexpr double ERRCON{1.89e-4};
  auto h(htry);
  vector_t ytemp(y);
  vector_t yerr;
  double errmax;
 do_ckrk5trialstep:
  yerr = ckrk5_step(h, t, ytemp, f);
  errmax = 0.0;
  for (size_t i = 0; i < yerr.size(); ++i)
  {
    errmax = std::max(errmax, std::abs(yerr[i] / yscale[i]));
  }
  errmax = errmax / eps;
  if (errmax > 1.0)
  {
    h *= std::max(0.1, SAFETY*std::pow(errmax, PSHRNK));
    if (t + h == t)
    {
      std::cerr << "stepsize underflow in rkqs" << std::endl;
      abort();
    }
    goto do_ckrk5trialstep;
  }
  else
  {
    if( errmax > ERRCON ){
      hnext = h * SAFETY * std::pow(errmax, PGROW);
    }else{
      hnext = 5*h;
    }
    hdid = h;
    t += h;
    y = ytemp;
  }
 }
 } // namespace ode_integrate
--- a/include/math/vec3.hh
+++ b/include/math/vec3.hh
@ -0,0 +1,118 @@
 /*******************************************************************\
 vec3_t.hh - This file is part of MUSIC2 -
 a code to generate initial conditions for cosmological simulations 
 CHANGELOG (only majors, for details see repo):
    06/2019 - Oliver Hahn - first implementation
 \*******************************************************************/
 #pragma once
 //! implements a simple class of 3-vectors of arbitrary scalar type
 template< typename T >
 class vec3_t{
 private:
    //! holds the data
    std::array<T,3> data_;
 public: 
    //! expose access to elements via references
    T &x,&y,&z;
    //! empty constructor
    vec3_t()
    : x(data_[0]),y(data_[1]),z(data_[2]){}
    //! copy constructor
    vec3_t( const vec3_t<T> &v)
    : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
    //! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference
    vec3_t( vec3_t<T>& v)
    : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
    //! move constructor
    vec3_t( vec3_t<T> &&v)
    : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){}
    //! construct vec3_t from initializer list
    template<typename ...E>
    vec3_t(E&&...e) 
    : data_{{std::forward<E>(e)...}}, x{data_[0]}, y{data_[1]}, z{data_[2]}
    {}
    // vec3_t( T a, T b, T c ) 
    // : data_{{a,b,c}}, x(data_[0]), y(data_[1]), z(data_[2]){}
    //! bracket index access to vector components
    T &operator[](size_t i) noexcept{ return data_[i];}
    //! const bracket index access to vector components
    const T &operator[](size_t i) const noexcept { return data_[i]; }
    // assignment operator
    vec3_t<T>& operator=( const vec3_t<T>& v ) noexcept { data_=v.data_; return *this; }
    //! implementation of summation of vec3_t
    vec3_t<T> operator+( const vec3_t<T>& v ) const noexcept{ return vec3_t<T>({x+v.x,y+v.y,z+v.z}); }
    //! implementation of difference of vec3_t
    vec3_t<T> operator-( const vec3_t<T>& v ) const noexcept{ return vec3_t<T>({x-v.x,y-v.y,z-v.z}); }
    //! implementation of unary negative
    vec3_t<T> operator-() const noexcept{ return vec3_t<T>({-x,-y,-z}); }
    //! implementation of scalar multiplication
    vec3_t<T> operator*( T s ) const noexcept{ return vec3_t<T>({x*s,y*s,z*s}); }
    //! implementation of scalar division
    vec3_t<T> operator/( T s ) const noexcept{ return vec3_t<T>({x/s,y/s,z/s}); }
    //! implementation of += operator
    vec3_t<T>& operator+=( const vec3_t<T>& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; }
    //! implementation of -= operator
    vec3_t<T>& operator-=( const vec3_t<T>& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; }
    //! multiply with scalar
    vec3_t<T>& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; }
    //! divide by scalar
    vec3_t<T>& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; }
    //! compute dot product with another vector
    T dot(const vec3_t<T> &a) const noexcept
    {
        return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
    }
    //! returns 2-norm squared of vector
    T norm_squared(void) const noexcept { return this->dot(*this); }
    //! returns 2-norm of vector
    T norm(void) const noexcept { return std::sqrt( this->norm_squared() ); }
    //! wrap absolute vector to box of size p
    vec3_t<T>& wrap_abs( T p = 1.0 ) noexcept{
        for( auto& x : data_ ) x = std::fmod( 2*p + x, p );
        return *this;
    }
    //! wrap relative vector to box of size p
    vec3_t<T>& wrap_rel( T p = 1.0 ) noexcept{
        for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x;
        return *this;
    }
    //! ordering, allows 3d sorting of vec3_ts
    bool operator<( const vec3_t<T>& o ) const noexcept{
        if( x!=o.x ) return x<o.x?true:false;
        if( y!=o.y ) return y<o.y?true:false;
        if( z!=o.z ) return z<o.z?true:false;
        return false;
    }
 };
 //! multiplication with scalar
 template<typename T>
 vec3_t<T> operator*( T s, const vec3_t<T>& v ){
    return vec3_t<T>({v.x*s,v.y*s,v.z*s});
 }
--- a/include/operators.hh
+++ b/include/operators.hh
@ -1,9 +1,54 @@
 #pragma once
 /*
 operators.hh - This file is part of MUSIC2 -
 a code to generate multi-scale initial conditions 
 for cosmological simulations 
 Copyright (C) 2019  Oliver Hahn
 */
 #include <general.hh>
 namespace op{
-inline auto assign_to = [](auto &g){return [&](auto i, auto v){ g[i] = v; };};
+
-inline auto add_to = [](auto &g){return [&](auto i, auto v){ g[i] += v; };};
+//!== list of primitive operators to work on fields ==!//
-inline auto add_twice_to = [](auto &g){return [&](auto i, auto v){ g[i] += 2*v; };};
+
-inline auto subtract_from = [](auto &g){return [&](auto i, auto v){ g[i] -= v; };};
+template< typename field>
-inline auto subtract_twice_from = [](auto &g){return [&](auto i, auto v){ g[i] -= 2*v; };};
+inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };}
 template< typename field, typename val >
 inline auto multiply_add_to( field& g, val x ){return [&g,x](auto i, auto v){ g[i] += v*x; };}
 template< typename field>
 inline auto add_to( field& g ){return [&g](auto i, auto v){ g[i] += v; };}
 template< typename field>
 inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };}
 //! vanilla standard gradient
 class fourier_gradient{
 private:
    real_t boxlen_, k0_;
    size_t n_, nhalf_;
 public:
    explicit fourier_gradient( const config_file& the_config )
    : boxlen_( the_config.get_value<double>("setup", "BoxLength") ), 
      k0_(2.0*M_PI/boxlen_),
      n_( the_config.get_value<size_t>("setup","GridRes") ),
      nhalf_( n_/2 )
    {}
    inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
    {
        real_t rgrad = 
            (ijk[idim]!=nhalf_)? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_) * n_) : 0.0; 
        return ccomplex_t(0.0,rgrad * k0_);
    }
    inline real_t vfac_corr( std::array<size_t,3> ijk ) const
    {
        return 1.0;
    }
 };
 }
--- a/include/output_plugin.hh
+++ b/include/output_plugin.hh
@ -21,11 +21,12 @@
 enum class output_type {particles,field_lagrangian,field_eulerian};
 class output_plugin
 {
 protected:
-	//! reference to the ConfigFile object that holds all configuration options
+	//! reference to the config_file object that holds all configuration options
-	ConfigFile &cf_;
+	config_file &cf_;
 	//! output file or directory name
 	std::string fname_;
@ -34,17 +35,17 @@ protected:
 	std::string interface_name_;
 public:
 	//! constructor
-	output_plugin(ConfigFile &cf, std::string interface_name )
+	output_plugin(config_file &cf, std::string interface_name )
 		: cf_(cf), interface_name_(interface_name)
 	{
-		fname_ = cf_.GetValue<std::string>("output", "filename");
+		fname_ = cf_.get_value<std::string>("output", "filename");
 	}
 	//! virtual destructor
 	virtual ~output_plugin(){}
 	//! routine to write particle data for a species
-	virtual void write_particle_data(const particle::container &pc, const cosmo_species &s ) {};
+	virtual void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species ) {};
 	//! routine to write gridded fluid component data for a species
 	virtual void write_grid_data(const Grid_FFT<real_t> &g, const cosmo_species &s, const fluid_component &c ) {};
@ -58,6 +59,12 @@ public:
 	//! routine to query whether species is written as particle data
 	// virtual bool write_species_as_particles( const cosmo_species &s ){ return !write_species_as_grid(s); }
 	//! query if output wants 64bit precision for real values
 	virtual bool has_64bit_reals() const = 0;
 	//! query if output wants 64bit precision for integer values
 	virtual bool has_64bit_ids() const = 0;
 	//! routine to return a multiplicative factor that contains the desired position units for the output
 	virtual real_t position_unit() const = 0;
@ -71,7 +78,7 @@ public:
 struct output_plugin_creator
 {
 	//! create an instance of a plug-in
-	virtual std::unique_ptr<output_plugin> create(ConfigFile &cf) const = 0;
+	virtual std::unique_ptr<output_plugin> create(config_file &cf) const = 0;
 	//! destroy an instance of a plug-in
 	virtual ~output_plugin_creator() {}
@ -96,12 +103,12 @@ struct output_plugin_creator_concrete : public output_plugin_creator
 	}
 	//! create an instance of the plug-in
-	std::unique_ptr<output_plugin> create(ConfigFile &cf) const
+	std::unique_ptr<output_plugin> create(config_file &cf) const
 	{
 		return std::make_unique<Derived>(cf); // Derived( cf );
 	}
 };
 //! failsafe version to select the output plug-in
-std::unique_ptr<output_plugin> select_output_plugin(ConfigFile &cf);
+std::unique_ptr<output_plugin> select_output_plugin(config_file &cf);
--- a/include/particle_container.hh
+++ b/include/particle_container.hh
@ -1,3 +1,10 @@
 /*******************************************************************\
 particle_container.hh - This file is part of MUSIC2 -
 a code to generate initial conditions for cosmological simulations 
 CHANGELOG (only majors, for details see repo):
    10/2019 - Oliver Hahn - first implementation
 \*******************************************************************/
 #pragma once
 #ifdef USE_MPI
@ -13,57 +20,96 @@ namespace particle{
 class container
 {
 public:
-	std::vector<float> positions_, velocities_;
+	std::vector<float > positions32_, velocities32_;
-	std::vector<int> ids_;
+	std::vector<double> positions64_, velocities64_;
-	container()
+	std::vector<uint32_t> ids32_;
-	{
+	std::vector<uint64_t> ids64_;
-	}
+	
 	container(){ }
 	container(const container &) = delete;
-	const void* get_pos_ptr() const{
+	void allocate(size_t nump, bool b64reals, bool b64ids)
 		return reinterpret_cast<const void*>( &positions_[0] );
 	}
 	const void* get_vel_ptr() const{
 		return reinterpret_cast<const void*>( &velocities_[0] );
 	}
 	const void* get_ids_ptr() const{
 		return reinterpret_cast<const void*>( &ids_[0] );
 	}
 	void allocate(size_t nump)
 	{
-		positions_.resize(3 * nump);
+		if( b64reals ){
-		velocities_.resize(3 * nump);
+			positions64_.resize(3 * nump);
-		ids_.resize(nump);
+			velocities64_.resize(3 * nump);
 			positions32_.clear();
 			velocities32_.clear();
 		}else{
 			positions32_.resize(3 * nump);
 			velocities32_.resize(3 * nump);
 			positions64_.clear();
 			velocities64_.clear();
 		}
-	void set_pos(size_t ipart, size_t idim, real_t p)
+		if( b64ids ){
-	{
+			ids64_.resize(nump);
-		positions_[3 * ipart + idim] = p;
+			ids32_.clear();
 		}else{
 			ids32_.resize(nump);
 			ids64_.clear();
 		}
 	}
-	void set_vel(size_t ipart, size_t idim, real_t p)
+	const void* get_pos32_ptr() const{
-	{
+		return reinterpret_cast<const void*>( &positions32_[0] );
 		velocities_[3 * ipart + idim] = p;
 	}
-	void set_id(size_t ipart, id_t id)
+	void set_pos32(size_t ipart, size_t idim, float p){
-	{
+		positions32_[3 * ipart + idim] = p;
-		ids_[ipart] = id;
+	}
 	const void* get_pos64_ptr() const{
 		return reinterpret_cast<const void*>( &positions64_[0] );
 	}
 	inline void set_pos64(size_t ipart, size_t idim, double p){
 		positions64_[3 * ipart + idim] = p;
 	}
 	inline const void* get_vel32_ptr() const{
 		return reinterpret_cast<const void*>( &velocities32_[0] );
 	}
 	inline void set_vel32(size_t ipart, size_t idim, float p){
 		velocities32_[3 * ipart + idim] = p;
 	}
 	const void* get_vel64_ptr() const{
 		return reinterpret_cast<const void*>( &velocities64_[0] );
 	}
 	inline void set_vel64(size_t ipart, size_t idim, double p){
 		velocities64_[3 * ipart + idim] = p;
 	}
 	const void* get_ids32_ptr() const{
 		return reinterpret_cast<const void*>( &ids32_[0] );
 	}
 	void set_id32(size_t ipart, uint32_t id){
 		ids32_[ipart] = id;
 	}
 	const void* get_ids64_ptr() const{
 		return reinterpret_cast<const void*>( &ids64_[0] );
 	}
 	void set_id64(size_t ipart, uint64_t id){
 		ids64_[ipart] = id;
 	}
 	size_t get_local_num_particles(void) const
 	{
-		return ids_.size();
+		return std::max(ids32_.size(),ids64_.size());
 	}
 	size_t get_global_num_particles(void) const
 	{
-		size_t local_nump = ids_.size(), global_nump;
+		size_t local_nump = this->get_local_num_particles(), global_nump;
 #ifdef USE_MPI
 		MPI_Allreduce(reinterpret_cast<void *>(&local_nump), reinterpret_cast<void *>(&global_nump), 1,
 					  MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
@ -97,11 +143,11 @@ public:
 	void dump(void)
 	{
-		for (size_t i = 0; i < ids_.size(); ++i)
+		/*for (size_t i = 0; i < ids_.size(); ++i)
 		{
 			std::cout << positions_[3 * i + 0] << " " << positions_[3 * i + 1] << " " << positions_[3 * i + 2] << " "
 					  << velocities_[3 * i + 0] << " " << velocities_[3 * i + 1] << " " << velocities_[3 * i + 2] << std::endl;
-		}
+		}*/
 	}
 };
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@ -1,150 +1,325 @@
 /*******************************************************************\
 particle_generator.hh - This file is part of MUSIC2 -
 a code to generate initial conditions for cosmological simulations 
 CHANGELOG (only majors, for details see repo):
    10/2019 - Oliver Hahn - first implementation
 \*******************************************************************/
 #pragma once
-namespace particle {
+#include <math/vec3.hh>
 #include <grid_interpolate.hh>
-enum lattice{
+#if defined(USE_HDF5)
-    lattice_sc=0, lattice_bcc=1, lattice_fcc=2
+#include "HDF_IO.hh"
-};
+#endif
-template<typename field_t>
+namespace particle
 void initialize_lattice( container& particles, lattice lattice_type, const field_t& field ){
    const size_t num_p_in_load = field.local_size();
    const size_t overload = 1<<lattice_type; // 1 for sc, 2 for bcc, 4 for fcc
    particles.allocate( overload * num_p_in_load );
    for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
        for( size_t j=0; j<field.size(1); ++j){
            for( size_t k=0; k<field.size(2); ++k,++ipcount){
                for( size_t iload=0; iload<overload; ++iload ){
                    particles.set_id( ipcount+iload*num_p_in_load, overload*field.get_cell_idx_1d(i,j,k)+iload );
                }
            }
        }
    }
 }
 // invalidates field, phase shifted to unspecified position after return
 template<typename field_t>
 void set_positions( container& particles, lattice lattice_type, int idim, real_t lunit, field_t& field )
 {
    using vec3 = std::array<real_t,3>;
    enum lattice
    {
        lattice_glass = -1,
        lattice_sc = 0,  // SC : simple cubic
        lattice_bcc = 1, // BCC: body-centered cubic
        lattice_fcc = 2, // FCC: face-centered cubic
        lattice_rsc = 3, // RSC: refined simple cubic
    };
    const std::vector<std::vector<vec3_t<real_t>>> lattice_shifts =
        {
            // first shift must always be zero! (otherwise set_positions and set_velocities break)
            /* SC : */ {{0.0, 0.0, 0.0}},
            /* BCC: */ {{0.0, 0.0, 0.0}, {0.5, 0.5, 0.5}},
            /* FCC: */ {{0.0, 0.0, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}},
            /* RSC: */ {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.5}, {0.0, 0.5, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.0}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}, {0.5, 0.5, 0.5}},
    };
    const std::vector<vec3_t<real_t>> second_lattice_shift =
        {
            /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice
            /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!?
            /* FCC: */ {0.5, 0.5, 0.5}, // this corresponds to NaCl lattice
                                        // /* FCC: */ {0.25, 0.25, 0.25}, // this corresponds to Zincblende/GaAs lattice
            /* RSC: */ {0.25, 0.25, 0.25},
    };
    template <typename field_t>
    class lattice_generator
    {
        protected:
        struct glass
        {
            using data_t = typename field_t::data_t;
            size_t num_p, off_p;
            grid_interpolate<1, field_t> interp_;
            std::vector<vec3> glass_posr;
            glass( config_file& cf, const field_t &field )
            : num_p(0), off_p(0), interp_( field )
            {
                std::vector<real_t> glass_pos;
                real_t lglassbox = 1.0;
                std::string glass_fname = cf.get_value<std::string>("setup", "GlassFileName");
                size_t ntiles = cf.get_value<size_t>("setup", "GlassTiles");
 #if defined(USE_HDF5)
                HDFReadGroupAttribute(glass_fname, "Header", "BoxSize", lglassbox);
                HDFReadDataset(glass_fname, "/PartType1/Coordinates", glass_pos);
 #else
                throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile.");
 #endif
                size_t np_in_file = glass_pos.size() / 3;
 #if defined(USE_MPI)
                num_p = np_in_file * ntiles * ntiles * ntiles / MPI::get_size();
                off_p = MPI::get_rank() * num_p;
 #else
                num_p = np_in_file * ntiles * ntiles * ntiles;
                off_p = 0;
 #endif
                music::ilog << "Glass file contains " << np_in_file << " particles." << std::endl;
                glass_posr.assign(num_p, {0.0, 0.0, 0.0});
                std::array<real_t, 3> ng({real_t(field.n_[0]), real_t(field.n_[1]), real_t(field.n_[2])});
                #pragma omp parallel for
                for (size_t i = 0; i < num_p; ++i)
                {
                    size_t idxpart = off_p + i;
                    size_t idx_in_glass = idxpart % np_in_file;
                    size_t idxtile = idxpart / np_in_file;
                    size_t tile_z = idxtile % (ntiles * ntiles);
                    size_t tile_y = ((idxtile - tile_z) / ntiles) % ntiles;
                    size_t tile_x = (((idxtile - tile_z) / ntiles) - tile_y) / ntiles;
                    glass_posr[i][0] = std::fmod((glass_pos[3 * idx_in_glass + 0] / lglassbox + real_t(tile_x)) / ntiles * ng[0] + ng[0], ng[0]);
                    glass_posr[i][1] = std::fmod((glass_pos[3 * idx_in_glass + 1] / lglassbox + real_t(tile_y)) / ntiles * ng[1] + ng[1], ng[1]);
                    glass_posr[i][2] = std::fmod((glass_pos[3 * idx_in_glass + 2] / lglassbox + real_t(tile_z)) / ntiles * ng[2] + ng[2], ng[2]);
                }
 #if defined(USE_MPI)
                interp_.domain_decompose_pos(glass_posr);
                num_p = glass_posr.size();
                std::vector<size_t> all_num_p( MPI::get_size(), 0 );
                MPI_Allgather( &num_p, 1, MPI_UNSIGNED_LONG_LONG, &all_num_p[0], 1, MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD );
                off_p = 0;
                for( int itask=0; itask<=MPI::get_rank(); ++itask ){
                    off_p += all_num_p[itask];
                }
 #endif
            }
            void update_ghosts( const field_t &field )
            {
                interp_.update_ghosts( field );
            }
            data_t get_at( const vec3& x ) const noexcept
            {
                return interp_.get_cic_at( x );
            }
            size_t size() const noexcept
            {
                return num_p;
            }
            size_t offset() const noexcept
            {
                return off_p;
            }
        };
        std::unique_ptr<glass> glass_ptr_;
        private:
        particle::container particles_;
        public:
        lattice_generator(lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t &field, config_file &cf)
        {
            if (lattice_type != lattice_glass)
            {
                // number of modes present in the field
                const size_t num_p_in_load = field.local_size();
                // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load):
                const size_t overload = 1ull << std::max<int>(0, lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
                // allocate memory for all local particles
                particles_.allocate(overload * num_p_in_load, b64reals, b64ids);
                // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well
-    for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
+                for (size_t i = 0, ipcount = 0; i < field.size(0); ++i)
-        for( size_t j=0; j<field.size(1); ++j){
+                {
-            for( size_t k=0; k<field.size(2); ++k){
+                    for (size_t j = 0; j < field.size(1); ++j)
-                auto pos = field.template get_unit_r<real_t>(i,j,k);
+                    {
-                particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
+                        for (size_t k = 0; k < field.size(2); ++k, ++ipcount)
                        {
                            for (size_t iload = 0; iload < overload; ++iload)
                            {
                                if (b64ids)
                                {
                                    particles_.set_id64(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload);
                                }
                                else
                                {
                                    particles_.set_id32(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload);
                                }
                            }
                        }
                    }
                }
            }
            else
            {
                glass_ptr_ = std::make_unique<glass>( cf, field );
                particles_.allocate(glass_ptr_->size(), b64reals, b64ids);
                #pragma omp parallel for
                for (size_t i = 0; i < glass_ptr_->size(); ++i)
                {
                    if (b64ids)
                    {
                        particles_.set_id64(i, IDoffset + i + glass_ptr_->offset());
                    }
                    else
                    {
                        particles_.set_id32(i, IDoffset + i + glass_ptr_->offset());
                    }
                }
            }
        }
-    if( lattice_type == particle::lattice_bcc ){
+        // invalidates field, phase shifted to unspecified position after return
-        field.shift_field( 0.5, 0.5, 0.5 );
+        void set_positions(const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t &field, config_file &cf)
-        auto ipcount0 = num_p_in_load;
+        {
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
+            // works only for Bravais types
-            for( size_t j=0; j<field.size(1); ++j){
+            if (lattice_type >= 0)
-                for( size_t k=0; k<field.size(2); ++k){
+            {
                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.5,0.5,0.5);
                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
                }
            }
        }
    }
    else if( lattice_type == particle::lattice_fcc ){ 
        // 0.5 0.5 0.0
        field.shift_field( 0.5, 0.5, 0.0 );
        auto ipcount0 = num_p_in_load;
        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
            for( size_t j=0; j<field.size(1); ++j){
                for( size_t k=0; k<field.size(2); ++k){
                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.5,0.5,0.0);
                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
                }
            }
        }
        // 0.0 0.5 0.5
        field.shift_field( -0.5, 0.0, 0.5 );
        ipcount0 = 2*num_p_in_load;
        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
            for( size_t j=0; j<field.size(1); ++j){
                for( size_t k=0; k<field.size(2); ++k){
                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.0,0.5,0.5);
                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
                }
            }
        }
        // 0.5 0.0 0.5
        field.shift_field( 0.5, -0.5, 0.0 );
        ipcount0 = 3*num_p_in_load;
        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
            for( size_t j=0; j<field.size(1); ++j){
                for( size_t k=0; k<field.size(2); ++k){
                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.5,0.0,0.5);
                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
                }
            }
        }
    }
 }
 template<typename field_t>
 void set_velocities( container& particles, lattice lattice_type, int idim, field_t& field )
 {
                const size_t num_p_in_load = field.local_size();
                for (int ishift = 0; ishift < (1 << lattice_type); ++ishift)
                {
                    // if we are dealing with the secondary lattice, apply a global shift
                    if (ishift == 0 && is_second_lattice)
                    {
                        field.shift_field(second_lattice_shift[lattice_type]);
                    }
-    for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
+                    // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
-        for( size_t j=0; j<field.size(1); ++j){
+                    if (ishift > 0)
-            for( size_t k=0; k<field.size(2); ++k){
+                    {
-                particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
+                        field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]);
                    }
                    // read out values from phase shifted field and set assoc. particle's value
                    const auto ipcount0 = ishift * num_p_in_load;
                    for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i)
                    {
                        for (size_t j = 0; j < field.size(1); ++j)
                        {
                            for (size_t k = 0; k < field.size(2); ++k)
                            {
                                auto pos = field.template get_unit_r_shifted<real_t>(i, j, k, lattice_shifts[lattice_type][ishift] + (is_second_lattice ? second_lattice_shift[lattice_type] : vec3_t<real_t>{0., 0., 0.}));
                                if (b64reals)
                                {
                                    particles_.set_pos64(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k));
                                }
                                else
                                {
                                    particles_.set_pos32(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k));
                                }
                            }
                        }
                    }
                }
            }
            else
            {
                glass_ptr_->update_ghosts( field );
                #pragma omp parallel for
                for (size_t i = 0; i < glass_ptr_->size(); ++i)
                {
                    auto pos = glass_ptr_->glass_posr[i];
                    real_t disp = glass_ptr_->get_at(pos);
                    if (b64reals)
                    {
                        particles_.set_pos64(i, idim, pos[idim] / field.n_[idim] * lunit + disp);
                    }
                    else
                    {
                        particles_.set_pos32(i, idim, pos[idim] / field.n_[idim] * lunit + disp);
                    }
                }
            }
        }
-    if( lattice_type == particle::lattice_bcc ){
+        void set_velocities(lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field, config_file &cf)
-        field.shift_field( 0.5, 0.5, 0.5 );
+        {
-        auto ipcount0 = num_p_in_load;
+            // works only for Bravais types
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
+            if (lattice_type >= 0)
-            for( size_t j=0; j<field.size(1); ++j){
+            {
-                for( size_t k=0; k<field.size(2); ++k){
+                const size_t num_p_in_load = field.local_size();
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
+                for (int ishift = 0; ishift < (1 << lattice_type); ++ishift)
                {
                    // if we are dealing with the secondary lattice, apply a global shift
                    if (ishift == 0 && is_second_lattice)
                    {
                        field.shift_field(second_lattice_shift[lattice_type]);
                    }
                    // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
                    if (ishift > 0)
                    {
                        field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]);
                    }
                    // read out values from phase shifted field and set assoc. particle's value
                    const auto ipcount0 = ishift * num_p_in_load;
                    for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i)
                    {
                        for (size_t j = 0; j < field.size(1); ++j)
                        {
                            for (size_t k = 0; k < field.size(2); ++k)
                            {
                                if (b64reals)
                                {
                                    particles_.set_vel64(ipcount++, idim, field.relem(i, j, k));
                                }
                                else
                                {
                                    particles_.set_vel32(ipcount++, idim, field.relem(i, j, k));
                                }
                            }
                        }
                    }
    else if( lattice_type == particle::lattice_fcc ){ 
        // 0.5 0.5 0.0
        field.shift_field( 0.5, 0.5, 0.0 );
        auto ipcount0 = num_p_in_load;
        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
            for( size_t j=0; j<field.size(1); ++j){
                for( size_t k=0; k<field.size(2); ++k){
                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
                }
            }
            else
            {
                glass_ptr_->update_ghosts( field );
                #pragma omp parallel for
                for (size_t i = 0; i < glass_ptr_->size(); ++i)
                {
                    auto pos = glass_ptr_->glass_posr[i];
                    real_t vel = glass_ptr_->get_at(pos);
                    if (b64reals)
                    {
                        particles_.set_vel64(i, idim, vel);
                    }
-        // 0.0 0.5 0.5
+                    else
-        field.shift_field( -0.5, 0.0, 0.5 );
+                    {
-        ipcount0 = 2*num_p_in_load;
+                        particles_.set_vel32(i, idim, vel);
        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
            for( size_t j=0; j<field.size(1); ++j){
                for( size_t k=0; k<field.size(2); ++k){
                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
                }
            }
        }
        // 0.5 0.0 0.5
        field.shift_field( 0.5, -0.5, 0.0 );
        ipcount0 = 3*num_p_in_load;
        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
            for( size_t j=0; j<field.size(1); ++j){
                for( size_t k=0; k<field.size(2); ++k){
                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
                    }
                }
            }
        }
 }
        const particle::container& get_particles() const noexcept{
            return particles_;
        }
-} // end namespace particles
+    }; // struct lattice
 } // namespace particle
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@ -0,0 +1,568 @@
 #pragma once
 #include <general.hh>
 #include <unistd.h> // for unlink
 #include <iostream>
 #include <fstream>
 #include <random>
 #include <map>
 #include <cassert>
 #include <particle_generator.hh>
 #include <grid_fft.hh>
 #include <math/mat3.hh>
 #include <gsl/gsl_sf_hyperg.h>
 inline double Hypergeometric2F1( double a, double b, double c, double x )
 {
  return gsl_sf_hyperg_2F1( a, b, c, x);
 }
 #define PRODUCTION
 namespace particle{
 //! implement Joyce, Marcos et al. PLT calculation
 class lattice_gradient{
 private:
    const real_t boxlen_, aini_;
    const size_t ngmapto_, ngrid_, ngrid32_;
    const real_t mapratio_, XmL_;
    Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
    Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
    std::vector<vec3_t<real_t>> vectk_;
    std::vector<vec3_t<int>> ico_, vecitk_;
    bool is_even( int i ){ return (i%2)==0; }
    bool is_in( int i, int j, int k, const mat3_t<int>& M ){
        vec3_t<int> v({i,j,k});
        auto vv = M * v;
        return is_even(vv.x)&&is_even(vv.y)&&is_even(vv.z);
    }
    void init_D( lattice lattice_type )
    {
        constexpr real_t pi     = M_PI;
        constexpr real_t twopi  = 2.0*M_PI;
        constexpr real_t fourpi = 4.0*M_PI;
        const     real_t sqrtpi = std::sqrt(M_PI);
        const     real_t pi32   = std::pow(M_PI,1.5);
        //! === vectors, reciprocals and normals for the SC lattice ===
        const int charge_fac_sc = 1;
        const mat3_t<real_t> mat_bravais_sc{
            1.0, 0.0, 0.0,
            0.0, 1.0, 0.0,
            0.0, 0.0, 1.0, 
        };
        const mat3_t<real_t> mat_reciprocal_sc{
            twopi, 0.0, 0.0,
            0.0, twopi, 0.0,
            0.0, 0.0, twopi,
        };
        const mat3_t<int> mat_invrecip_sc{
            2, 0, 0,
            0, 2, 0,
            0, 0, 2,
        };
        const std::vector<vec3_t<real_t>> normals_sc{
            {pi,0.,0.},{-pi,0.,0.},
            {0.,pi,0.},{0.,-pi,0.},
            {0.,0.,pi},{0.,0.,-pi},
        };
        //! === vectors, reciprocals and normals for the BCC lattice ===
        const int charge_fac_bcc = 2;
        const mat3_t<real_t> mat_bravais_bcc{
            1.0, 0.0, 0.5,
            0.0, 1.0, 0.5,
            0.0, 0.0, 0.5, 
        };
        const mat3_t<real_t> mat_reciprocal_bcc{
            twopi, 0.0, 0.0,
            0.0, twopi, 0.0,
            -twopi, -twopi, fourpi,
        };
        const mat3_t<int> mat_invrecip_bcc{
            2, 0, 0,
            0, 2, 0,
            1, 1, 1,
        };
        const std::vector<vec3_t<real_t>> normals_bcc{
            {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
            {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
            {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
        };
        //! === vectors, reciprocals and normals for the FCC lattice ===
        const int charge_fac_fcc = 4;
        const mat3_t<real_t> mat_bravais_fcc{
            0.0, 0.5, 0.0,
            0.5, 0.0, 1.0,
            0.5, 0.5, 0.0, 
        };
        const mat3_t<real_t> mat_reciprocal_fcc{
            -fourpi, fourpi, twopi,
            0.0, 0.0, twopi,
            fourpi, 0.0, -twopi,
        };
        const mat3_t<int> mat_invrecip_fcc{
            0, 1, 1,
            1, 0, 1,
            0, 2, 0,
        };
        const std::vector<vec3_t<real_t>> normals_fcc{
            {twopi,0.,0.},{-twopi,0.,0.},
            {0.,twopi,0.},{0.,-twopi,0.},
            {0.,0.,twopi},{0.,0.,-twopi},
            {+pi,+pi,+pi},{+pi,+pi,-pi},
            {+pi,-pi,+pi},{+pi,-pi,-pi},
            {-pi,+pi,+pi},{-pi,+pi,-pi},
            {-pi,-pi,+pi},{-pi,-pi,-pi},
        };
        //! select the properties for the chosen lattice
        const int ilat = lattice_type; // 0 = sc, 1 = bcc, 2 = fcc
        const auto mat_bravais     = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
        const auto mat_reciprocal  = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
        const auto mat_invrecip    = (ilat==2)? mat_invrecip_fcc : (ilat==1)? mat_invrecip_bcc : mat_invrecip_sc;
        const auto normals         = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
        const auto charge_fac      = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
        const ptrdiff_t nlattice = ngrid_;
        const real_t dx = 1.0/real_t(nlattice);
        const real_t eta = 4.0; // Ewald cutoff shall be 4 cells
        const real_t alpha = 1.0/std::sqrt(2)/eta;
        const real_t alpha2 = alpha*alpha;
        const real_t alpha3 = alpha2*alpha;
        const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_fac;
        const real_t fft_norm   = 1.0/std::pow(real_t(nlattice),3.0);
        const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5);
        //! just a Kronecker \delta_ij
        auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
        //! Ewald summation: short-range Green's function
        auto add_greensftide_sr = [&]( mat3_t<real_t>& D, const vec3_t<real_t>& d ) -> void {
            auto r = d.norm();
            if( r< 1e-14 ) return; // return zero for r=0
            const real_t r2(r*r), r3(r2*r), r5(r3*r2);
            const real_t K1( -alpha3/pi32 * std::exp(-alpha2*r2)/r2 );
            const real_t K2( (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r2)*r)/fourpi );
            for( int mu=0; mu<3; ++mu ){
                for( int nu=mu; nu<3; ++nu ){
                    real_t dd( d[mu]*d[nu] * K1 + (kronecker(mu,nu)/r3 - 3.0 * (d[mu]*d[nu])/r5) * K2 );
                    D(mu,nu) += dd;
                    D(nu,mu) += (mu!=nu)? dd : 0.0;
                }
            }
        };
        //! Ewald summation: long-range Green's function
        auto add_greensftide_lr = [&]( mat3_t<real_t>& D, const vec3_t<real_t>& k, const vec3_t<real_t>& r ) -> void {
            real_t kmod2 = k.norm_squared();
            real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm;
            for( int mu=0; mu<3; ++mu ){
                for( int nu=mu; nu<3; ++nu ){
                    auto dd = k[mu] * k[nu] * term;
                    D(mu,nu) += dd;
                    D(nu,mu) += (mu!=nu)? dd : 0.0;
                }
            }
        };
        //! checks if 'vec' is in the FBZ with FBZ normal vectors given in 'normals'
        auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool {
            for( const auto& n : normals ){ 
                if( n.dot( vec ) > 1.0001 * n.dot(n) ){
                    return false;
                }
            }
            return true;
        };
        constexpr ptrdiff_t lnumber = 3, knumber = 3;
        const int numb = 1; //!< search radius when shifting vectors into FBZ
        vectk_.assign(D_xx_.memsize(),vec3_t<real_t>());
        ico_.assign(D_xx_.memsize(),vec3_t<int>());
        vecitk_.assign(D_xx_.memsize(),vec3_t<int>());
        #pragma omp parallel 
        {
            //... temporary to hold values of the dynamical matrix 
            mat3_t<real_t> matD(0.0);
            #pragma omp for
            for( ptrdiff_t i=0; i<nlattice; ++i ){
                for( ptrdiff_t j=0; j<nlattice; ++j ){
                    for( ptrdiff_t k=0; k<nlattice; ++k ){
                        // compute lattice site vector from (i,j,k) multiplying Bravais base matrix, and wrap back to box
                        const vec3_t<real_t> x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)});
                        const vec3_t<real_t> ar = (mat_bravais * x_ijk).wrap_abs();
                        //... zero temporary matrix
                        matD.zero();        
                        // add real-space part of dynamical matrix, periodic copies
                        for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
                            for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
                                for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){      
                                    const vec3_t<real_t> n_ijk({real_t(ix),real_t(iy),real_t(iz)});            
                                    const vec3_t<real_t> dr(ar - mat_bravais * n_ijk);
                                    add_greensftide_sr(matD, dr);
                                }
                            }
                        }
                        // add k-space part of dynamical matrix
                        for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){
                            for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){
                                for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){                      
                                    if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
                                        const vec3_t<real_t> k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice});
                                        const vec3_t<real_t> ak( mat_reciprocal * k_ijk);
                                        add_greensftide_lr(matD, ak, ar );
                                    }
                                }
                            }
                        } 
                        D_xx_.relem(i,j,k) = matD(0,0) * charge;
                        D_xy_.relem(i,j,k) = matD(0,1) * charge;
                        D_xz_.relem(i,j,k) = matD(0,2) * charge;
                        D_yy_.relem(i,j,k) = matD(1,1) * charge;
                        D_yz_.relem(i,j,k) = matD(1,2) * charge;
                        D_zz_.relem(i,j,k) = matD(2,2) * charge;
                    }
                }
            }
        } // end omp parallel region
        // fix r=0 with background density (added later in Fourier space)
        D_xx_.relem(0,0,0) = 1.0/3.0;
        D_xy_.relem(0,0,0) = 0.0;
        D_xz_.relem(0,0,0) = 0.0;
        D_yy_.relem(0,0,0) = 1.0/3.0;
        D_yz_.relem(0,0,0) = 0.0;
        D_zz_.relem(0,0,0) = 1.0/3.0;
        D_xx_.FourierTransformForward();
        D_xy_.FourierTransformForward();
        D_xz_.FourierTransformForward();
        D_yy_.FourierTransformForward();
        D_yz_.FourierTransformForward();
        D_zz_.FourierTransformForward();
 #ifndef PRODUCTION
        if (CONFIG::MPI_task_rank == 0)
            unlink("debug.hdf5");
        D_xx_.Write_to_HDF5("debug.hdf5","Dxx");
        D_xy_.Write_to_HDF5("debug.hdf5","Dxy");
        D_xz_.Write_to_HDF5("debug.hdf5","Dxz");
        D_yy_.Write_to_HDF5("debug.hdf5","Dyy");
        D_yz_.Write_to_HDF5("debug.hdf5","Dyz");
        D_zz_.Write_to_HDF5("debug.hdf5","Dzz");
        std::ofstream ofs2("test_brillouin.txt");
 #endif
        using map_t = std::map<vec3_t<int>,size_t>;
        map_t iimap;
        //!=== Make temporary copies before resorting to std. Fourier grid ========!//
        Grid_FFT<real_t,false> 
            temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
            temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
            temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0});
        temp1.FourierTransformForward(false);
        temp2.FourierTransformForward(false);
        temp3.FourierTransformForward(false);
        #pragma omp parallel for
        for( size_t i=0; i<D_xx_.size(0); i++ )
        {
            for( size_t j=0; j<D_xx_.size(1); j++ )
            {
                for( size_t k=0; k<D_xx_.size(2); k++ )
                {
                    temp1.kelem(i,j,k) = ccomplex_t(std::real(D_xx_.kelem(i,j,k)),std::real(D_xy_.kelem(i,j,k)));
                    temp2.kelem(i,j,k) = ccomplex_t(std::real(D_xz_.kelem(i,j,k)),std::real(D_yy_.kelem(i,j,k)));
                    temp3.kelem(i,j,k) = ccomplex_t(std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k)));
                }
            }
        }
        D_xx_.zero(); D_xy_.zero(); D_xz_.zero();
        D_yy_.zero(); D_yz_.zero(); D_zz_.zero();
        //!=== Diagonalise and resort to std. Fourier grid ========!//
        #pragma omp parallel 
        {
            // thread private matrix representation
            mat3_t<real_t> D;
            vec3_t<real_t> eval, evec1, evec2, evec3_t;
            #pragma omp for
            for( size_t i=0; i<D_xx_.size(0); i++ )
            {
                for( size_t j=0; j<D_xx_.size(1); j++ )
                {
                    for( size_t k=0; k<D_xx_.size(2); k++ )
                    {
                        vec3_t<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
                        // put matrix elements into actual matrix
                        D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12;
                        D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12;
                        D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12;
                        D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12;
                        D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12;
                        D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12;
                        // compute eigenstructure of matrix
                        D.eigen(eval, evec1, evec2, evec3_t);
                        evec3_t /= (twopi*ngrid_);
                        // now determine to which modes on the regular lattice this contributes
                        vec3_t<real_t> ar = kv / (twopi*ngrid_);
                        vec3_t<real_t> a(mat_reciprocal * ar);
                        // translate the k-vectors into the "candidate" FBZ
                        for( int l1=-numb; l1<=numb; ++l1 ){
                            for( int l2=-numb; l2<=numb; ++l2 ){
                                for( int l3=-numb; l3<=numb; ++l3 ){
                                    // need both halfs of Fourier space since we use real transforms
                                    for( int isign=0; isign<=1; ++isign ){
                                        const real_t sign = 2.0*real_t(isign)-1.0; 
                                        const vec3_t<real_t> vshift({real_t(l1),real_t(l2),real_t(l3)});
                                        vec3_t<real_t> vectk = sign * a + mat_reciprocal * vshift;
                                        if( check_FBZ( normals, vectk ) )
                                        {
                                            int ix = std::round(vectk.x*(ngrid_)/twopi);
                                            int iy = std::round(vectk.y*(ngrid_)/twopi);
                                            int iz = std::round(vectk.z*(ngrid_)/twopi);
                                            #pragma omp critical
                                            {iimap.insert( std::pair<vec3_t<int>,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );}
                                            temp1.kelem(i,j,k) = ccomplex_t(eval[2],eval[1]);
                                            temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3_t.x);
                                            temp3.kelem(i,j,k) = ccomplex_t(evec3_t.y,evec3_t.z);
                                        }
                                    }//sign
                                } //l3
                            } //l2
                        } //l1
                    } //k
                } //j
            } //i
        }
        D_xx_.kelem(0,0,0) = 1.0;
        D_xy_.kelem(0,0,0) = 0.0;
        D_xz_.kelem(0,0,0) = 0.0;
        D_yy_.kelem(0,0,0) = 1.0;
        D_yz_.kelem(0,0,0) = 0.0;
        D_zz_.kelem(0,0,0) = 0.0;
        //... approximate infinite lattice by inerpolating to sites not convered by current resolution...
        #pragma omp parallel for
        for( size_t i=0; i<D_xx_.size(0); i++ ){
            for( size_t j=0; j<D_xx_.size(1); j++ ){
                for( size_t k=0; k<D_xx_.size(2); k++ ){
                    int ii = (int(i)>nlattice/2)? int(i)-nlattice : int(i);
                    int jj = (int(j)>nlattice/2)? int(j)-nlattice : int(j);
                    int kk = (int(k)>nlattice/2)? int(k)-nlattice : int(k);
                    vec3_t<real_t> kv({real_t(ii),real_t(jj),real_t(kk)});
                    auto align_with_k = [&]( const vec3_t<real_t>& v ) -> vec3_t<real_t>{
                        return v*((v.dot(kv)<0.0)?-1.0:1.0);
                    };
                    vec3_t<real_t> v, l;
                    map_t::iterator it;
                    if( !is_in(i,j,k,mat_invrecip)  ){
                        auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3_t<real_t>& v, vec3_t<real_t>& l ) {
                            v = 0.0; l = 0.0;
                            int count(0);
                            auto add_lv = [&]( auto it ) -> void {
                                auto q = it->second;++count;
                                l += vec3_t<real_t>({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))});
                                v += align_with_k(vec3_t<real_t>({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))}));
                            };
                            map_t::iterator it;
                            if( (it = iimap.find({ii-1,jj,kk}))!=iimap.end() ){ add_lv(it); }
                            if( (it = iimap.find({ii+1,jj,kk}))!=iimap.end() ){ add_lv(it); }
                            if( (it = iimap.find({ii,jj-1,kk}))!=iimap.end() ){ add_lv(it); }
                            if( (it = iimap.find({ii,jj+1,kk}))!=iimap.end() ){ add_lv(it); }
                            if( (it = iimap.find({ii,jj,kk-1}))!=iimap.end() ){ add_lv(it); }
                            if( (it = iimap.find({ii,jj,kk+1}))!=iimap.end() ){ add_lv(it); }
                            l/=real_t(count); v/=real_t(count);
                        };
                        average_lv(temp1,temp2,temp3,v,l);
                    }else{
                        if( (it = iimap.find({ii,jj,kk}))!=iimap.end() ){
                            auto q = it->second;
                            l = vec3_t<real_t>({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))});
                            v = align_with_k(vec3_t<real_t>({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))}));
                        }
                    }
                    D_xx_.kelem(i,j,k) = l[0];
                    D_xy_.kelem(i,j,k) = l[1];
                    D_xz_.kelem(i,j,k) = l[2];
                    D_yy_.kelem(i,j,k) = v[0];
                    D_yz_.kelem(i,j,k) = v[1];
                    D_zz_.kelem(i,j,k) = v[2];
                }
            }
        }
 #ifdef PRODUCTION
        #pragma omp parallel for
        for( size_t i=0; i<D_xx_.size(0); i++ ){
            for( size_t j=0; j<D_xx_.size(1); j++ ){
                for( size_t k=0; k<D_xx_.size(2); k++ )
                {
                    vec3_t<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
                    double mu1 = std::real(D_xx_.kelem(i,j,k));
                    // double mu2 = std::real(D_xy_.kelem(i,j,k));
                    // double mu3 = std::real(D_xz_.kelem(i,j,k));
                    vec3_t<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))});
                    evec1 /= evec1.norm();
                    // ///////////////////////////////////
                    // // project onto spherical coordinate vectors
                    real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ): 0.0;
                    real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
                    vec3_t<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
                    // re-normalise to that longitudinal amplitude is exact
                    double renorm = evec1.dot( e_r ); if( renorm < 0.01 ) renorm = 1.0;
                    // -- store in diagonal components of D_ij
                    D_xx_.kelem(i,j,k) = 1.0;
                    D_yy_.kelem(i,j,k) = evec1.dot( e_theta ) / renorm;
                    D_zz_.kelem(i,j,k) = evec1.dot( e_phi ) / renorm;
                    // spatially dependent correction to vfact = \dot{D_+}/D_+
                    D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
                }
            }
        }
        D_xy_.kelem(0,0,0) = 1.0;
        D_xx_.kelem(0,0,0) = 1.0;
        D_yy_.kelem(0,0,0) = 0.0;
        D_zz_.kelem(0,0,0) = 0.0;
        // unlink("debug.hdf5");
        // D_xy_.Write_to_HDF5("debug.hdf5","mu1");
        // D_xx_.Write_to_HDF5("debug.hdf5","e1x");
        // D_yy_.Write_to_HDF5("debug.hdf5","e1y");
        // D_zz_.Write_to_HDF5("debug.hdf5","e1z");
 #else
        D_xx_.Write_to_HDF5("debug.hdf5","mu1");
        D_xy_.Write_to_HDF5("debug.hdf5","mu2");
        D_xz_.Write_to_HDF5("debug.hdf5","mu3");
        D_yy_.Write_to_HDF5("debug.hdf5","e1x");
        D_yz_.Write_to_HDF5("debug.hdf5","e1y");
        D_zz_.Write_to_HDF5("debug.hdf5","e1z");
 #endif   
    }
 public:
    // real_t boxlen, size_t ngridother
    explicit lattice_gradient( config_file& the_config, size_t ngridself=64 )
    : boxlen_( the_config.get_value<double>("setup", "BoxLength") ), 
      aini_ ( 1.0/(1.0+the_config.get_value<double>("setup", "zstart")) ),
      ngmapto_( the_config.get_value<size_t>("setup", "GridRes") ), 
      ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
      XmL_ ( the_config.get_value<double>("cosmology", "Omega_L") / the_config.get_value<double>("cosmology", "Omega_m") ),
      D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
      D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
      D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
      grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
      grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
    { 
        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
        std::string lattice_str = the_config.get_value_safe<std::string>("setup","ParticleLoad","sc");
        const lattice lattice_type = 
            ((lattice_str=="bcc")? lattice_bcc 
            : ((lattice_str=="fcc")? lattice_fcc 
            : ((lattice_str=="rsc")? lattice_rsc 
            : lattice_sc)));
        music::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
        double wtime = get_wtime();
        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
        init_D( lattice_type );
        // init_D__old();
        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
    }
    inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
    {
        real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
        auto kv = D_xx_.get_k<real_t>( ix, iy, iz );
        auto kmod = kv.norm() / mapratio_ / boxlen_;
        // // project onto spherical coordinate vectors
        auto D_r = std::real(D_xx_.get_cic_kspace({ix,iy,iz}));
        auto D_theta = std::real(D_yy_.get_cic_kspace({ix,iy,iz}));
        auto D_phi = std::real(D_zz_.get_cic_kspace({ix,iy,iz}));
        real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0;
        real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
        if( idim == 0 ){
            return ccomplex_t(0.0, kmod*(D_r * st * cp + D_theta * ct * cp - D_phi * sp)); 
        }
        else if( idim == 1 ){
            return ccomplex_t(0.0, kmod*(D_r  * st * sp + D_theta * ct * sp + D_phi * cp)); 
        }
        return ccomplex_t(0.0, kmod*(D_r  * ct - D_theta * st)); 
    }
    inline real_t vfac_corr( std::array<size_t,3> ijk  ) const
    {
        real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
        const real_t alpha = 1.0/std::real(D_xy_.get_cic_kspace({ix,iy,iz}));
        return 1.0/alpha;
        // // below is for LCDM, but it is a tiny correction for typical starting redshifts:
        //! X = \Omega_\Lambda / \Omega_m
        // return 1.0 / (alpha - (2*std::pow(aini_,3)*alpha*(2 + alpha)*XmL_*Hypergeometric2F1((3 + alpha)/3.,(5 + alpha)/3.,
        //     (13 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_)))/
        //     ((7 + 4*alpha)*Hypergeometric2F1(alpha/3.,(2 + alpha)/3.,(7 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_))));
    }
 };
 }
--- a/include/physical_constants.hh
+++ b/include/physical_constants.hh
@ -0,0 +1,62 @@
 #pragma once
 /*******************************************************************************\
 physical_constants.hh - This file is part of MUSIC2 -
 a code to generate initial conditions for cosmological simulations 
 CHANGELOG (only majors, for details see repo):
    06/2019 - Oliver Hahn - first implementation
 \*******************************************************************************/
 // physical constants for convenience, all values have been taken from
 // the 2018 edition of the Particle Data Group Booklet,
 // http://pdg.lbl.gov/2019/mobile/reviews/pdf/rpp2018-rev-phys-constants-m.pdf
 namespace phys_const
 {
 // helper value of pi so that we don't need to include any other header just for this
 static constexpr double pi_ = 3.141592653589793115997963468544185161590576171875;
 //--- unit conversions ---------------------------------------------------
 // 1 Mpc in m
 static constexpr double Mpc_SI = 3.0857e22;
 // 1 Gyr in s
 static constexpr double Gyr_SI = 3.1536e16;
 // 1 eV in J
 static constexpr double eV_SI = 1.602176487e-19;
 // 1 erg in J
 static constexpr double erg_SI = 1e-7;
 //--- physical constants ------------------------------------------------
 // speed of light c in m/s
 static constexpr double c_SI = 2.99792458e8;
 // gravitational constant G in m^3/s^2/kg
 static constexpr double G_SI = 6.6740800e-11;
 // Boltzmann constant k_B in kg m^2/s^2/K
 static constexpr double kB_SI = 1.38064852e-23;
 // reduced Planck's quantum \hbar in kg m^2/s
 static constexpr double hbar_SI = 1.054571800e-34;
 // Stefan-Boltzmann constant sigma in J/m^2/s/K^-4
 static constexpr double sigma_SI = (pi_ * pi_) * (kB_SI * kB_SI * kB_SI * kB_SI) / 60. / (hbar_SI * hbar_SI * hbar_SI) / (c_SI * c_SI);
 // electron mass in kg
 static constexpr double me_SI = 9.10938356e-31;
 // proton mass in kg
 static constexpr double mp_SI = 1.672621898e-27;
 // unified atomic mass unit (u) in kg
 static constexpr double u_SI = 1.660539040e-27;
 // critical density of the Universe in h^2 kg/m^3
 static constexpr double rhocrit_h2_SI = 3 * 1e10 / (8 * pi_ * G_SI) / Mpc_SI / Mpc_SI;
 } // namespace phys_const
--- a/include/random_plugin.hh
+++ b/include/random_plugin.hh
@ -10,21 +10,21 @@
 class RNG_plugin
 {
  protected:
-    ConfigFile *pcf_; //!< pointer to config_file from which to read parameters
+    config_file *pcf_; //!< pointer to config_file from which to read parameters
  public:
-    explicit RNG_plugin(ConfigFile &cf)
+    explicit RNG_plugin(config_file &cf)
        : pcf_(&cf)
    {
    }
    virtual ~RNG_plugin() {}
    virtual bool isMultiscale() const = 0;
-    virtual void Fill_Grid( Grid_FFT<real_t>& g ) const = 0;
+    virtual void Fill_Grid( Grid_FFT<real_t>& g ) = 0;//const = 0;
    //virtual void FillGrid(int level, DensityGrid<real_t> &R) = 0;
 };
 struct RNG_plugin_creator
 {
-    virtual std::unique_ptr<RNG_plugin> Create(ConfigFile &cf) const = 0;
+    virtual std::unique_ptr<RNG_plugin> Create(config_file &cf) const = 0;
    virtual ~RNG_plugin_creator() {}
 };
@ -42,14 +42,14 @@ struct RNG_plugin_creator_concrete : public RNG_plugin_creator
    }
    //! create an instance of the plugin
-    std::unique_ptr<RNG_plugin> Create(ConfigFile &cf) const
+    std::unique_ptr<RNG_plugin> Create(config_file &cf) const
    {
        return std::make_unique<Derived>(cf);
    }
 };
 typedef RNG_plugin RNG_instance;
-std::unique_ptr<RNG_plugin> select_RNG_plugin( ConfigFile &cf);
+std::unique_ptr<RNG_plugin> select_RNG_plugin( config_file &cf);
 // /*!
 //  * @brief encapsulates all things for multi-scale white noise generation
@ -58,18 +58,18 @@ std::unique_ptr<RNG_plugin> select_RNG_plugin( ConfigFile &cf);
 // class random_number_generator
 // {
 //   protected:
-//     ConfigFile *pcf_;
+//     config_file *pcf_;
 //     //const refinement_hierarchy * prefh_;
 //     RNG_plugin *generator_;
 //     int levelmin_, levelmax_;
 //   public:
 //     //! constructor
-//     random_number_generator( ConfigFile &cf )
+//     random_number_generator( config_file &cf )
 //         : pcf_(&cf) //, prefh_( &refh )
 //     {
-//         levelmin_ = pcf_->GetValue<int>("setup", "levelmin");
+//         levelmin_ = pcf_->get_value<int>("setup", "levelmin");
-//         levelmax_ = pcf_->GetValue<int>("setup", "levelmax");
+//         levelmax_ = pcf_->get_value<int>("setup", "levelmax");
 //         generator_ = select_RNG_plugin(cf);
 //     }
--- a/include/system_stat.hh
+++ b/include/system_stat.hh
@ -1,3 +1,10 @@
 /*******************************************************************\
 system_stat.hh - This file is part of MUSIC2 -
 a code to generate initial conditions for cosmological simulations 
 CHANGELOG (only majors, for details see repo):
    08/2019 - Oliver Hahn - first implementation
 \*******************************************************************/
 #pragma once
 #include <string>
--- a/include/testing.hh
+++ b/include/testing.hh
@ -1,13 +1,21 @@
 /*******************************************************************\
 testing.hh - This file is part of MUSIC2 -
 a code to generate initial conditions for cosmological simulations 
 CHANGELOG (only majors, for details see repo):
    10/2019 - Michael Michaux & Oliver Hahn - first implementation
 \*******************************************************************/
 #pragma once
 #include <array>
 #include <general.hh>
 #include <config_file.hh>
 #include <grid_fft.hh>
 #include <cosmology_calculator.hh>
 namespace testing{
    void output_potentials_and_densities( 
-        ConfigFile& the_config,
+        config_file& the_config,
        size_t ngrid, real_t boxlen,
        Grid_FFT<real_t>& phi,
        Grid_FFT<real_t>& phi2,
@ -16,7 +24,7 @@ namespace testing{
        std::array< Grid_FFT<real_t>*,3 >& A3 );
    void output_velocity_displacement_symmetries(
-        ConfigFile &the_config,
+        config_file &the_config,
        size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
        Grid_FFT<real_t> &phi,
        Grid_FFT<real_t> &phi2,
@ -26,7 +34,8 @@ namespace testing{
        bool bwrite_out_fields=false);
    void output_convergence(
-        ConfigFile &the_config,
+        config_file &the_config,
        cosmology::calculator* the_cosmo_calc,
        std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
        Grid_FFT<real_t> &phi,
        Grid_FFT<real_t> &phi2,
--- a/include/transfer_function_plugin.hh
+++ b/include/transfer_function_plugin.hh
@ -13,22 +13,29 @@ enum tf_type
    vtotal,
    vcdm,
    vbaryon,
-    total0
+    total0,
    cdm0,
    baryon0,
    vtotal0,
    vcdm0,
    vbaryon0,
 };
 class TransferFunction_plugin
 {
  public:
    // Cosmology cosmo_;    //!< cosmological parameter, read from config_file
-    ConfigFile *pcf_;   //!< pointer to config_file from which to read parameters
+    config_file *pcf_;   //!< pointer to config_file from which to read parameters
    bool tf_distinct_;   //!< bool if density transfer function is distinct for baryons and DM
    bool tf_withvel_;    //!< bool if also have velocity transfer functions
    bool tf_withtotal0_; //!< have the z=0 spectrum for normalisation purposes
    bool tf_velunits_;   //!< velocities are in velocity units (km/s)
    bool tf_isnormalised_; //!< assume that transfer functions come already correctly normalised and need be re-normalised to a specified value
  public:
    //! constructor
-    TransferFunction_plugin(ConfigFile &cf)
+    TransferFunction_plugin(config_file &cf)
-        : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false)
+        : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false), tf_isnormalised_(false)
    { }
    //! destructor
@ -75,7 +82,7 @@ class TransferFunction_plugin
 struct TransferFunction_plugin_creator
 {
    //! create an instance of a transfer function plug-in
-    virtual std::unique_ptr<TransferFunction_plugin> create(ConfigFile &cf) const = 0;
+    virtual std::unique_ptr<TransferFunction_plugin> create(config_file &cf) const = 0;
    //! destroy an instance of a plug-in
    virtual ~TransferFunction_plugin_creator() {}
@ -96,7 +103,7 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin
    }
    //! create an instance of the plug-in
-    std::unique_ptr<TransferFunction_plugin> create(ConfigFile &cf) const
+    std::unique_ptr<TransferFunction_plugin> create(config_file &cf) const
    {
        return std::make_unique<Derived>(cf);
    }
@ -104,4 +111,4 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin
 // typedef TransferFunction_plugin TransferFunction;
-std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(ConfigFile &cf);
+std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(config_file &cf);
--- a/include/vec.hh
+++ b/include/vec.hh
@ -0,0 +1,144 @@
 #pragma once
 /*******************************************************************************\
 vec.hh - This file is part of MUSIC2 -
 a code to generate initial conditions for cosmological simulations 
 CHANGELOG (only majors, for details see repo):
    06/2019 - Oliver Hahn - first implementation
 \*******************************************************************************/
 #include <array>
 //! implements general N-dim vectors of arbitrary primtive type with some arithmetic ops
 template <int N, typename T = double>
 struct vec_t
 {
  std::array<T, N> data_;
  vec_t() {}
  vec_t(const vec_t<N, T> &v)
      : data_(v.data_) {}
  vec_t(vec_t<N, T> &&v)
      : data_(std::move(v.data_)) {}
  template <typename... E>
  vec_t(E... e)
      : data_{{std::forward<E>(e)...}}
  {
    static_assert(sizeof...(E) == N, "Brace-enclosed initialiser list doesn't match vec_t length!");
  }
  //! bracket index access to vector components
  T &operator[](size_t i) noexcept { return data_[i]; }
  //! const bracket index access to vector components
  const T &operator[](size_t i) const noexcept { return data_[i]; }
  // assignment operator
  vec_t<N, T> &operator=(const vec_t<N, T> &v) noexcept
  {
    data_ = v.data_;
    return *this;
  }
  //! implementation of summation of vec_t
  vec_t<N, T> operator+(const vec_t<N, T> &v) const noexcept
  {
    vec_t<N, T> res;
    for (int i = 0; i < N; ++i)
      res[i] = data_[i] + v[i];
    return res;
  }
  //! implementation of difference of vec_t
  vec_t<N, T> operator-(const vec_t<N, T> &v) const noexcept
  {
    vec_t<N, T> res;
    for (int i = 0; i < N; ++i)
      res[i] = data_[i] - v[i];
    return res;
  }
  //! implementation of unary negative
  vec_t<N, T> operator-() const noexcept
  {
    vec_t<N, T> res;
    for (int i = 0; i < N; ++i)
      res[i] = -data_[i];
    return res;
  }
  //! implementation of scalar multiplication
  template <typename T2>
  vec_t<N, T> operator*(T2 s) const noexcept
  {
    vec_t<N, T> res;
    for (int i = 0; i < N; ++i)
      res[i] = data_[i] * s;
    return res;
  }
  //! implementation of scalar division
  vec_t<N, T> operator/(T s) const noexcept
  {
    vec_t<N, T> res;
    for (int i = 0; i < N; ++i)
      res[i] = data_[i] / s;
    return res;
  }
  //! takes the absolute value of each element
  vec_t<N, T> abs(void) const noexcept
  {
    vec_t<N, T> res;
    for (int i = 0; i < N; ++i)
      res[i] = std::abs(data_[i]);
    return res;
  }
  //! implementation of implicit summation of vec_t
  vec_t<N, T> &operator+=(const vec_t<N, T> &v) noexcept
  {
    for (int i = 0; i < N; ++i)
      data_[i] += v[i];
    return *this;
  }
  //! implementation of implicit subtraction of vec_t
  vec_t<N, T> &operator-=(const vec_t<N, T> &v) noexcept
  {
    for (int i = 0; i < N; ++i)
      data_[i] -= v[i];
    return *this;
  }
  //! implementation of implicit scalar multiplication of vec_t
  vec_t<N, T> &operator*=(T s) noexcept
  {
    for (int i = 0; i < N; ++i)
      data_[i] *= s;
    return *this;
  }
  //! implementation of implicit scalar division of vec_t
  vec_t<N, T> &operator/=(T s) noexcept
  {
    for (int i = 0; i < N; ++i)
      data_[i] /= s;
    return *this;
  }
  size_t size(void) const noexcept { return N; }
 };
 //! multiplication with scalar
 template <typename T2, int N, typename T = double>
 inline vec_t<N, T> operator*(T2 s, const vec_t<N, T> &v)
 {
  vec_t<N, T> res;
  for (int i = 0; i < N; ++i)
    res[i] = v[i] * s;
  return res;
 }
--- a/include/vec3.hh
+++ b/include/vec3.hh
@ -1,41 +0,0 @@
 #pragma once
 template< typename T >
 class vec3{
 private:
    std::array<T,3> data_;
    T &x,&y,&z;
 public:    
    vec3()
    : x(data_[0]),y(data_[1]),z(data_[2]){}
    vec3( const vec3<T> &v)
    : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
    vec3( std::array<T,3>&& d )
    : data_(std::move(d)), x(data_[0]),y(data_[1]),z(data_[2]){}
    vec3( vec3<T> &&v)
    : data_(std::move(v.data_)), x(data_[0]),y(data_[1]),z(data_[2]){}
    T &operator[](size_t i){ return data_[i];}
    const T &operator[](size_t i) const { return data_[i]; }
    T dot(const vec3<T> &a) const 
    {
        return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
    }
    T norm_squared(void) const
    {
        return this->dot(*this);
    }
    T norm(void) const
    {
        return std::sqrt( this->norm_squared() );
    }
 };
--- a/new/FindFFTW3.cmake
+++ b/new/FindFFTW3.cmake
@ -1,232 +0,0 @@
 # - Try to find FFTW
 #
 # By default, it will look only for the serial libraries with single, double,
 # and long double precision. Any combination of precision (SINGLE, DOUBLE,
 # LONGDOUBLE) and library type (SERIAL, [THREADS|OPENMP], MPI) is possible by
 # using the COMPONENTS keyword. For example,
 #
 # find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP MPI)
 #
 # Once done this will define
 #  FFTW3_FOUND - System has FFTW3
 #  FFTW3_INCLUDE_DIRS - The FFTW3 include directories
 #  FFTW3_LIBRARIES - The libraries needed to use FFTW3
 #  FFTW3_DEFINITIONS - Compiler switches required for using FFTW3
 #  FFTW3_$KIND_$PARALLEL_FOUND- Set if FFTW3 exists in KIND precision format for PARALLEL mode.
 #                             where KIND can be: SINGLE, DOUBLE, LONGDOUBLE
 #                             and PARALLEL: SERIAL, OPENMP, MPI, THREADS.
 #  FFTW3_$KIND_$PARALLEL_LIBRARY - The libraries needed to use.
 #  FFTW3_INCLUDE_DIR_PARALLEL - The FFTW3 include directories for parallels mode.
 cmake_policy(SET CMP0054 NEW)
 if(FFTW3_FOUND)
  return()
 endif()
 if(FFTW3_INCLUDE_DIR AND FFTW3_LIBRARIES)
  set(FFTW3_FOUND TRUE)
  foreach(component ${FFTW3_FIND_COMPONENTS})
    if("${FFTW3_${component}_LIBRARY}" STREQUAL "")
        set(FFTW3_${component}_LIBRARY "${FFTW3_LIBRARIES}")
    endif()
  endforeach()
  return()
 endif()
 macro(find_specific_libraries KIND PARALLEL)
  list(APPEND FFTW3_FIND_COMPONENTS ${KIND}_${PARALLEL})
  if(NOT (${PARALLEL} STREQUAL "SERIAL") AND NOT ${PARALLEL}_FOUND)
    message(FATAL_ERROR "Please, find ${PARALLEL} libraries before FFTW")
  endif()
  find_library(FFTW3_${KIND}_${PARALLEL}_LIBRARY NAMES
    fftw3${SUFFIX_${KIND}}${SUFFIX_${PARALLEL}}${SUFFIX_FINAL} HINTS ${HINT_DIRS})
  if(FFTW3_${KIND}_${PARALLEL}_LIBRARY MATCHES fftw3)
    list(APPEND FFTW3_LIBRARIES ${FFTW3_${KIND}_${PARALLEL}_LIBRARY})
    set(FFTW3_${KIND}_${PARALLEL}_FOUND TRUE)
    STRING(TOLOWER "${KIND}" kind)
    STRING(TOLOWER "${PARALLEL}" parallel)
    if(FFTW3_${kind}_${parallel}_LIBRARY MATCHES "\\.a$")
      add_library(fftw3::${kind}::${parallel} STATIC IMPORTED GLOBAL)
    else()
      add_library(fftw3::${kind}::${parallel} SHARED IMPORTED GLOBAL)
    endif()
    # MPI Has a different included library than the others
    # FFTW3_INCLUDE_DIR_PARALLEL will change depending of which on is used.
    set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_INCLUDE_DIR} )
    if(PARALLEL STREQUAL "MPI")
      set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_${PARALLEL}_INCLUDE_DIR})
    endif()
    set_target_properties(fftw3::${kind}::${parallel} PROPERTIES
      IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
      INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}")
    # adding target properties to the different cases
    ##   MPI
    if(PARALLEL STREQUAL "MPI")
      if(MPI_C_LIBRARIES)
        set_target_properties(fftw3::${kind}::mpi PROPERTIES
          IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
          INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}"
          IMPORTED_LINK_INTERFACE_LIBRARIES ${MPI_C_LIBRARIES})
      endif()
    endif()
    ##   OpenMP
    if(PARALLEL STREQUAL "OPENMP")
      if(OPENMP_C_FLAGS)
        set_target_properties(fftw3::${kind}::${parallel} PROPERTIES
           IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
           INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}"
           INTERFACE_COMPILE_OPTIONS "${OPENMP_C_FLAGS}")
        endif()
    endif()
    ##  THREADS
    if(PARALLEL STREQUAL "THREADS")
      if(CMAKE_THREAD_LIBS_INIT) # TODO: this is not running
        set_target_properties(fftw3::${kind}::${parallel} PROPERTIES
          IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
          INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}"
          INTERFACE_COMPILE_OPTIONS "${CMAKE_THREAD_LIBS_INIT}")
      endif()
    endif()
  endif()
 endmacro()
 if(NOT FFTW3_FIND_COMPONENTS)
  set(FFTW3_FIND_COMPONENTS SINGLE DOUBLE LONGDOUBLE SERIAL)
 endif()
 string(TOUPPER "${FFTW3_FIND_COMPONENTS}" FFTW3_FIND_COMPONENTS)
 list(FIND FFTW3_FIND_COMPONENTS SINGLE LOOK_FOR_SINGLE)
 list(FIND FFTW3_FIND_COMPONENTS DOUBLE LOOK_FOR_DOUBLE)
 list(FIND FFTW3_FIND_COMPONENTS LONGDOUBLE LOOK_FOR_LONGDOUBLE)
 list(FIND FFTW3_FIND_COMPONENTS THREADS LOOK_FOR_THREADS)
 list(FIND FFTW3_FIND_COMPONENTS OPENMP LOOK_FOR_OPENMP)
 list(FIND FFTW3_FIND_COMPONENTS MPI LOOK_FOR_MPI)
 list(FIND FFTW3_FIND_COMPONENTS SERIAL LOOK_FOR_SERIAL)
 # FIXME - This may fail in computers wihtout serial
 # Default serial to obtain version number
 set(LOOK_FOR_SERIAL 1)
 # set serial as default if none parallel component has been set
 if((LOOK_FOR_THREADS LESS 0) AND (LOOK_FOR_MPI LESS 0) AND
    (LOOK_FOR_OPENMP LESS 0))
  set(LOOK_FOR_SERIAL 1)
 endif()
 if(MPI_C_FOUND)
  set(MPI_FOUND ${MPI_C_FOUND})
 endif()
 unset(FFTW3_FIND_COMPONENTS)
 if(WIN32)
  set(HINT_DIRS ${FFTW3_DIRECTORY} $ENV{FFTW3_DIRECTORY})
 else()
  find_package(PkgConfig)
  if(PKG_CONFIG_FOUND)
    pkg_check_modules(PC_FFTW QUIET fftw3)
    set(FFTW3_DEFINITIONS ${PC_FFTW3_CFLAGS_OTHER})
  endif()
  set(HINT_DIRS ${PC_FFTW3_INCLUDEDIR} ${PC_FFTW3_INCLUDE_DIRS}
    ${FFTW3_INCLUDE_DIR} $ENV{FFTW3_INCLUDE_DIR} )
 endif()
 find_path(FFTW3_INCLUDE_DIR NAMES fftw3.h HINTS ${HINT_DIRS})
 if (LOOK_FOR_MPI)  # Probably is going to be the same as fftw3.h
  find_path(FFTW3_MPI_INCLUDE_DIR NAMES fftw3-mpi.h HINTS ${HINT_DIRS})
 endif()
 function(find_version OUTVAR LIBRARY SUFFIX)
    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c
      # TODO: do we need to add include for mpi headers?
      "#include <fftw3.h>
       #include <stdio.h>
       int main(int nargs, char const *argv[]) {
           printf(\"%s\", fftw${SUFFIX}_version);
           return 0;
       }"
  )
 if(NOT CMAKE_CROSSCOMPILING)
    try_run(RUN_RESULT COMPILE_RESULT
        "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/"
        "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c"
        CMAKE_FLAGS
          -DLINK_LIBRARIES=${LIBRARY}
          -DINCLUDE_DIRECTORIES=${FFTW3_INCLUDE_DIR}
        RUN_OUTPUT_VARIABLE OUTPUT
        COMPILE_OUTPUT_VARIABLE COUTPUT
    )
  endif()
  if(RUN_RESULT EQUAL 0)
    string(REGEX REPLACE
        ".*([0-9]+\\.[0-9]+\\.[0-9]+).*"
        "\\1" VERSION_STRING "${OUTPUT}"
    )
    set(${OUTVAR} ${VERSION_STRING} PARENT_SCOPE)
  endif()
 endfunction()
 set(SUFFIX_DOUBLE "")
 set(SUFFIX_SINGLE "f")
 set(SUFFIX_LONGDOUBLE "l")
 set(SUFFIX_SERIAL "")
 set(SUFFIX_OPENMP "_omp")
 set(SUFFIX_MPI "_mpi")
 set(SUFFIX_THREADS "_threads")
 set(SUFFIX_FINAL "")
 if(WIN32)
  set(SUFFIX_FINAL "-3")
 else()
  set(HINT_DIRS ${PC_FFTW3_LIBDIR} ${PC_FFTW3_LIBRARY_DIRS}
    $ENV{FFTW3_LIBRARY_DIR} ${FFTW3_LIBRARY_DIR} )
 endif(WIN32)
 unset(FFTW3_LIBRARIES)
 set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR} ) # TODO what's for?
 set(FFTW3_FLAGS_C "")
 foreach(KIND SINGLE DOUBLE LONGDOUBLE)
  if(LOOK_FOR_${KIND} LESS 0)
    continue()
  endif()
  foreach(PARALLEL SERIAL MPI OPENMP THREADS)
    if(LOOK_FOR_${PARALLEL} LESS 0)
      continue()
    endif()
    find_specific_libraries(${KIND} ${PARALLEL})
  endforeach()
 endforeach()
 if(FFTW3_INCLUDE_DIR)
  list(GET FFTW3_FIND_COMPONENTS 0 smallerrun)
  string(REPLACE "_" ";" RUNLIST ${smallerrun})
  list(GET RUNLIST 0 KIND)
  list(GET RUNLIST 1 PARALLEL)
  unset(smallerrun)
  unset(RUNLIST)
  # suffix is quoted so it pass empty in the case of double as it's empty
  find_version(FFTW3_VERSION_STRING ${FFTW3_${KIND}_${PARALLEL}_LIBRARY}
    "${SUFFIX_${KIND}}")
 endif()
 # FIXME: fails if use REQUIRED.
 include(FindPackageHandleStandardArgs)
 # handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE
 # if all listed variables are TRUE
 find_package_handle_standard_args(FFTW3
    REQUIRED_VARS FFTW3_LIBRARIES FFTW3_INCLUDE_DIR
    VERSION_VAR FFTW3_VERSION_STRING
    HANDLE_COMPONENTS
 )
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@ -2,40 +2,14 @@
 #include <grid_fft.hh>
 #include <thread>
-#include <gsl/gsl_rng.h>
+template <typename data_t, bool bdistributed>
-#include <gsl/gsl_randist.h>
+void Grid_FFT<data_t, bdistributed>::Setup(void)
 template <typename data_t>
 void Grid_FFT<data_t>::FillRandomReal(unsigned long int seed)
 {
-    gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937);
+    if (!bdistributed)
 #if defined(USE_MPI)
    seed += 17321 * CONFIG::MPI_task_rank;
 #endif
    gsl_rng_set(RNG, seed);
    for (size_t i = 0; i < sizes_[0]; ++i)
    {
        for (size_t j = 0; j < sizes_[1]; ++j)
        {
            for (size_t k = 0; k < sizes_[2]; ++k)
            {
                this->relem(i, j, k) = gsl_ran_ugaussian_ratio_method(RNG);
            }
        }
    }
    gsl_rng_free(RNG);
 }
 template <typename data_t>
 void Grid_FFT<data_t>::Setup(void)
 {
 #if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////////////
        ntot_ = (n_[2] + 2) * n_[1] * n_[0];
-    csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
+        music::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
        if (typeid(data_t) == typeid(real_t))
        {
            data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
@ -54,10 +28,10 @@ void Grid_FFT<data_t>::Setup(void)
        }
        else
        {
-        csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
+            music::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
        }
-    fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
+        fft_norm_fac_ = 1.0 / std::sqrt((real_t)((size_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]));
        if (typeid(data_t) == typeid(real_t))
        {
@ -74,6 +48,7 @@ void Grid_FFT<data_t>::Setup(void)
        {
            nhalf_[i] = n_[i] / 2;
            kfac_[i] = 2.0 * M_PI / length_[i];
            kny_[i] = kfac_[i] * n_[i]/2;
            dx_[i] = length_[i] / n_[i];
            global_range_.x1_[i] = 0;
@ -99,9 +74,10 @@ void Grid_FFT<data_t>::Setup(void)
            sizes_[2] = npc_;
            sizes_[3] = npc_;
        }
-
+    }
-#else //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
+    else
-
+    {
 #ifdef USE_MPI //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
        size_t cmplxsz;
        if (typeid(data_t) == typeid(real_t))
@ -130,12 +106,13 @@ void Grid_FFT<data_t>::Setup(void)
        }
        else
        {
-        csoca::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
+            music::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
            abort();
        }
-    csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
+        music::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
-    fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
+
        fft_norm_fac_ = 1.0 / sqrt((real_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]);
        if (typeid(data_t) == typeid(real_t))
        {
@ -152,6 +129,7 @@ void Grid_FFT<data_t>::Setup(void)
        {
            nhalf_[i] = n_[i] / 2;
            kfac_[i] = 2.0 * M_PI / length_[i];
            kny_[i] = kfac_[i] * n_[i]/2;
            dx_[i] = length_[i] / n_[i];
            global_range_.x1_[i] = 0;
@ -174,20 +152,23 @@ void Grid_FFT<data_t>::Setup(void)
            sizes_[2] = npc_;
            sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
        }
-
+#else
        music::flog << "MPI is required for distributed FFT arrays!" << std::endl;
        throw std::runtime_error("MPI is required for distributed FFT arrays!");
 #endif //// of #ifdef #else USE_MPI ////////////////////////////////////////////////////////////////////////////////////
    }
 }
-template <typename data_t>
+template <typename data_t, bool bdistributed>
-void Grid_FFT<data_t>::ApplyNorm(void)
+void Grid_FFT<data_t, bdistributed>::ApplyNorm(void)
 {
 #pragma omp parallel for
    for (size_t i = 0; i < ntot_; ++i)
        data_[i] *= fft_norm_fac_;
 }
-template <typename data_t>
+template <typename data_t, bool bdistributed>
-void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
+void Grid_FFT<data_t, bdistributed>::FourierTransformForward(bool do_transform)
 {
 #if defined(USE_MPI)
    MPI_Barrier(MPI_COMM_WORLD);
@ -199,12 +180,13 @@ void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
        if (do_transform)
        {
            double wtime = get_wtime();
-            csoca::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]);
+            music::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]);
-            FFTW_API(execute)(plan_);
+            FFTW_API(execute)
            (plan_);
            this->ApplyNorm();
            wtime = get_wtime() - wtime;
-            csoca::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime);
+            music::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime);
        }
        sizes_[0] = local_1_size_;
@ -217,8 +199,8 @@ void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
    }
 }
-template <typename data_t>
+template <typename data_t, bool bdistributed>
-void Grid_FFT<data_t>::FourierTransformBackward(bool do_transform)
+void Grid_FFT<data_t, bdistributed>::FourierTransformBackward(bool do_transform)
 {
 #if defined(USE_MPI)
    MPI_Barrier(MPI_COMM_WORLD);
@ -229,14 +211,14 @@ void Grid_FFT<data_t>::FourierTransformBackward(bool do_transform)
        //.............................
        if (do_transform)
        {
-            csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]);
+            music::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]);
            double wtime = get_wtime();
            FFTW_API(execute)(iplan_);
            this->ApplyNorm();
            wtime = get_wtime() - wtime;
-            csoca::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime);
+            music::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime);
        }
        sizes_[0] = local_0_size_;
        sizes_[1] = n_[1];
@ -269,9 +251,293 @@ void create_hdf5(std::string Filename)
    H5Fclose(HDF_FileID);
 }
-template <typename data_t>
+template <typename T>
-void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname) const
+hid_t hdf5_get_data_type(void)
 {
    if (typeid(T) == typeid(int))
        return H5T_NATIVE_INT;
    if (typeid(T) == typeid(unsigned))
        return H5T_NATIVE_UINT;
    if (typeid(T) == typeid(float))
        return H5T_NATIVE_FLOAT;
    if (typeid(T) == typeid(double))
        return H5T_NATIVE_DOUBLE;
    if (typeid(T) == typeid(long double))
        return H5T_NATIVE_LDOUBLE;
    if (typeid(T) == typeid(long long))
        return H5T_NATIVE_LLONG;
    if (typeid(T) == typeid(unsigned long long))
        return H5T_NATIVE_ULLONG;
    if (typeid(T) == typeid(size_t))
        return H5T_NATIVE_ULLONG;
    music::elog << "[HDF_IO] trying to evaluate unsupported type in GetDataType";
    return -1;
 }
 template <typename data_t, bool bdistributed>
 void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename, const std::string ObjName)
 {
    if (bdistributed)
    {
        music::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl;
        abort();
    }
    hid_t HDF_Type = hdf5_get_data_type<data_t>();
    hid_t HDF_FileID = H5Fopen(Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
    //... save old error handler
    herr_t (*old_func)(void *);
    void *old_client_data;
    H5Eget_auto(&old_func, &old_client_data);
    //... turn off error handling by hdf5 library
    H5Eset_auto(NULL, NULL);
    //... probe dataset opening
    hid_t HDF_DatasetID = H5Dopen(HDF_FileID, ObjName.c_str());
    //... restore previous error handler
    H5Eset_auto(old_func, old_client_data);
    //... dataset did not exist or was empty
    if (HDF_DatasetID < 0)
    {
        music::elog << "Dataset \'" << ObjName.c_str() << "\' does not exist or is empty." << std::endl;
        H5Fclose(HDF_FileID);
        abort();
    }
    //... get space associated with dataset and its extensions
    hid_t HDF_DataspaceID = H5Dget_space(HDF_DatasetID);
    int ndims = H5Sget_simple_extent_ndims(HDF_DataspaceID);
    hsize_t dimsize[3];
    H5Sget_simple_extent_dims(HDF_DataspaceID, dimsize, NULL);
    hsize_t HDF_StorageSize = 1;
    for (int i = 0; i < ndims; ++i)
        HDF_StorageSize *= dimsize[i];
    //... adjust the array size to hold the data
    std::vector<data_t> Data;
    Data.reserve(HDF_StorageSize);
    Data.assign(HDF_StorageSize, (data_t)0);
    if (Data.capacity() < HDF_StorageSize)
    {
        music::elog << "Not enough memory to store all data in HDFReadDataset!" << std::endl;
        H5Sclose(HDF_DataspaceID);
        H5Dclose(HDF_DatasetID);
        H5Fclose(HDF_FileID);
        abort();
    }
    //... read the dataset
    H5Dread(HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, &Data[0]);
    if (Data.size() != HDF_StorageSize)
    {
        music::elog << "Something went wrong while reading!" << std::endl;
        H5Sclose(HDF_DataspaceID);
        H5Dclose(HDF_DatasetID);
        H5Fclose(HDF_FileID);
        abort();
    }
    H5Sclose(HDF_DataspaceID);
    H5Dclose(HDF_DatasetID);
    H5Fclose(HDF_FileID);
    assert(dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2]);
    music::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl;
    for (size_t i = 0; i < 3; ++i)
        this->n_[i] = dimsize[i];
    this->space_ = rspace_id;
    if (data_ != nullptr)
    {
        fftw_free(data_);
    }
    this->Setup();
    //... copy data to internal array ...
    real_t sum1{0.0}, sum2{0.0};
    #pragma omp parallel for reduction(+ : sum1, sum2)
    for (size_t i = 0; i < size(0); ++i)
    {
        for (size_t j = 0; j < size(1); ++j)
        {
            for (size_t k = 0; k < size(2); ++k)
            {
                this->relem(i, j, k) = Data[(i * size(1) + j) * size(2) + k];
                sum2 += std::real(this->relem(i, j, k) * this->relem(i, j, k));
                sum1 += std::real(this->relem(i, j, k));
            }
        }
    }
    sum1 /= Data.size();
    sum2 /= Data.size();
    auto stdw = std::sqrt(sum2 - sum1 * sum1);
    music::ilog << "Constraint field has <W>=" << sum1 << ", <W^2>-<W>^2=" << stdw << std::endl;
    #pragma omp parallel for reduction(+ : sum1, sum2)
    for (size_t i = 0; i < size(0); ++i)
    {
        for (size_t j = 0; j < size(1); ++j)
        {
            for (size_t k = 0; k < size(2); ++k)
            {
                this->relem(i, j, k) /= stdw;
            }
        }
    }
 }
 template <typename data_t, bool bdistributed>
 void Grid_FFT<data_t, bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
 {
    // FIXME: cleanup duplicate code in this function!
    if (!bdistributed && CONFIG::MPI_task_rank == 0)
    {
        hid_t file_id, dset_id;    /* file and dataset identifiers */
        hid_t filespace, memspace; /* file and memory dataspace identifiers */
        hsize_t offset[3], count[3];
        hid_t dtype_id = H5T_NATIVE_FLOAT;
        hid_t plist_id = H5P_DEFAULT;
        if (!file_exists(fname))
            create_hdf5(fname);
        file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
        for (int i = 0; i < 3; ++i)
            count[i] = size(i);
        if (typeid(data_t) == typeid(float))
            dtype_id = H5T_NATIVE_FLOAT;
        else if (typeid(data_t) == typeid(double))
            dtype_id = H5T_NATIVE_DOUBLE;
        else if (typeid(data_t) == typeid(long double))
            dtype_id = H5T_NATIVE_LDOUBLE;    
        else if (typeid(data_t) == typeid(std::complex<float>))
            dtype_id = H5T_NATIVE_FLOAT;
        else if (typeid(data_t) == typeid(std::complex<double>))
            dtype_id = H5T_NATIVE_DOUBLE;
        else if (typeid(data_t) == typeid(std::complex<long double>))
            dtype_id = H5T_NATIVE_LDOUBLE;
        filespace = H5Screate_simple(3, count, NULL);
        dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
                             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
        H5Sclose(filespace);
        hsize_t slice_sz = size(1) * size(2);
        real_t *buf = new real_t[slice_sz];
        count[0] = 1;
        count[1] = size(1);
        count[2] = size(2);
        offset[1] = 0;
        offset[2] = 0;
        memspace = H5Screate_simple(3, count, NULL);
        filespace = H5Dget_space(dset_id);
        for (size_t i = 0; i < size(0); ++i)
        {
            offset[0] = i;
            for (size_t j = 0; j < size(1); ++j)
            {
                for (size_t k = 0; k < size(2); ++k)
                {
                    if (this->space_ == rspace_id)
                        buf[j * size(2) + k] = std::real(relem(i, j, k));
                    else
                        buf[j * size(2) + k] = std::real(kelem(i, j, k));
                }
            }
            H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL);
            H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
        }
        H5Sclose(filespace);
        H5Sclose(memspace);
        // H5Sclose(filespace);
        H5Dclose(dset_id);
        if (typeid(data_t) == typeid(std::complex<float>) ||
            typeid(data_t) == typeid(std::complex<double>) ||
            typeid(data_t) == typeid(std::complex<long double>) ||
            this->space_ == kspace_id)
        {
            datasetname += std::string(".im");
            for (int i = 0; i < 3; ++i)
                count[i] = size(i);
            filespace = H5Screate_simple(3, count, NULL);
            dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
                                 H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
            H5Sclose(filespace);
            count[0] = 1;
            for (size_t i = 0; i < size(0); ++i)
            {
                offset[0] = i;
                for (size_t j = 0; j < size(1); ++j)
                    for (size_t k = 0; k < size(2); ++k)
                    {
                        if (this->space_ == rspace_id)
                            buf[j * size(2) + k] = std::imag(relem(i, j, k));
                        else
                            buf[j * size(2) + k] = std::imag(kelem(i, j, k));
                    }
                memspace = H5Screate_simple(3, count, NULL);
                filespace = H5Dget_space(dset_id);
                H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count,
                                    NULL);
                H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
                H5Sclose(memspace);
                H5Sclose(filespace);
            }
            H5Dclose(dset_id);
            delete[] buf;
        }
        H5Fclose(file_id);
        return;
    }
    if (!bdistributed && CONFIG::MPI_task_rank != 0)
        return;
    hid_t file_id, dset_id;    /* file and dataset identifiers */
    hid_t filespace, memspace; /* file and memory dataspace identifiers */
    hsize_t offset[3], count[3];
@ -282,8 +548,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
    int mpi_size, mpi_rank;
-    mpi_size = MPI_Get_size();
+    mpi_size = MPI::get_size();
-    mpi_rank = MPI_Get_rank();
+    mpi_rank = MPI::get_rank();
    if (!file_exists(fname) && mpi_rank == 0)
        create_hdf5(fname);
@ -329,14 +595,14 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
            dtype_id = H5T_NATIVE_FLOAT;
        else if (typeid(data_t) == typeid(double))
            dtype_id = H5T_NATIVE_DOUBLE;
        else if (typeid(data_t) == typeid(long double))
            dtype_id = H5T_NATIVE_LDOUBLE;
        else if (typeid(data_t) == typeid(std::complex<float>))
        {
            dtype_id = H5T_NATIVE_FLOAT;
        }
        else if (typeid(data_t) == typeid(std::complex<double>))
        {
            dtype_id = H5T_NATIVE_DOUBLE;
-        }
+        else if (typeid(data_t) == typeid(std::complex<long double>))
            dtype_id = H5T_NATIVE_LDOUBLE;
 #if defined(USE_MPI) && !defined(USE_MPI_IO)
        if (itask == 0)
@ -391,7 +657,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
            {
                for (size_t k = 0; k < size(2); ++k)
                {
                    if (this->space_ == rspace_id)
                        buf[j * size(2) + k] = std::real(relem(i, j, k));
                    else
                        buf[j * size(2) + k] = std::real(kelem(i, j, k));
                }
            }
@ -410,7 +679,9 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
        H5Dclose(dset_id);
        if (typeid(data_t) == typeid(std::complex<float>) ||
-            typeid(data_t) == typeid(std::complex<double>))
+            typeid(data_t) == typeid(std::complex<double>) ||
            typeid(data_t) == typeid(std::complex<long double>) ||
            this->space_ == kspace_id)
        {
            datasetname += std::string(".im");
@ -460,7 +731,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
                for (size_t j = 0; j < size(1); ++j)
                    for (size_t k = 0; k < size(2); ++k)
                    {
                        if (this->space_ == rspace_id)
                            buf[j * size(2) + k] = std::imag(relem(i, j, k));
                        else
                            buf[j * size(2) + k] = std::imag(kelem(i, j, k));
                    }
                memspace = H5Screate_simple(3, count, NULL);
@ -493,8 +767,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
 #include <iomanip>
-template <typename data_t>
+template <typename data_t, bool bdistributed>
-void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
+void Grid_FFT<data_t, bdistributed>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
 {
    double logvmin = std::log10(vmin);
    double logvmax = std::log10(vmax);
@ -545,13 +819,12 @@ void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, do
 #endif
 }
-template <typename data_t>
+template <typename data_t, bool bdistributed>
-void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
+void Grid_FFT<data_t, bdistributed>::Write_PowerSpectrum(std::string ofname)
 {
    std::vector<double> bin_k, bin_P, bin_eP;
    std::vector<size_t> bin_count;
-    int nbins = 4 * std::max(nhalf_[0], std::max(nhalf_[1], nhalf_[2]));
+    this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count);
    this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count );
 #if defined(USE_MPI)
    if (CONFIG::MPI_task_rank == 0)
    {
@ -576,8 +849,8 @@ void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
 #endif
 }
-template <typename data_t>
+template <typename data_t, bool bdistributed>
-void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
+void Grid_FFT<data_t, bdistributed>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count)
 {
    this->FourierTransformForward();
@ -597,7 +870,7 @@ void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::ve
        for (size_t iy = 0; iy < size(1); iy++)
            for (size_t iz = 0; iz < size(2); iz++)
            {
-                vec3<double> k3 = get_k<double>(ix, iy, iz);
+                vec3_t<double> k3 = get_k<double>(ix, iy, iz);
                double k = k3.norm();
                int idx2 = k / dk; //int((1.0f / dklog * std::log10(k / kmin)));
                auto z = this->kelem(ix, iy, iz);
@ -657,5 +930,7 @@ void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::ve
 /********************************************************************************************/
-template class Grid_FFT<real_t>;
+template class Grid_FFT<real_t, true>;
-template class Grid_FFT<ccomplex_t>;
+template class Grid_FFT<real_t, false>;
 template class Grid_FFT<ccomplex_t, true>;
 template class Grid_FFT<ccomplex_t, false>;
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@ -7,6 +7,7 @@
 #include <ic_generator.hh>
 #include <particle_generator.hh>
 #include <particle_plt.hh>
 #include <unistd.h> // for unlink
@ -21,18 +22,18 @@ namespace ic_generator{
 std::unique_ptr<RNG_plugin> the_random_number_generator;
 std::unique_ptr<output_plugin> the_output_plugin;
-std::unique_ptr<CosmologyCalculator>  the_cosmo_calc;
+std::unique_ptr<cosmology::calculator>  the_cosmo_calc;
-int Initialise( ConfigFile& the_config )
+int Initialise( config_file& the_config )
 {
    the_random_number_generator = std::move(select_RNG_plugin(the_config));
    the_output_plugin           = std::move(select_output_plugin(the_config));
-    the_cosmo_calc              = std::make_unique<CosmologyCalculator>(the_config);
+    the_cosmo_calc              = std::make_unique<cosmology::calculator>(the_config);
    return 0;
 }
-int Run( ConfigFile& the_config )
+int Run( config_file& the_config )
 {
    //--------------------------------------------------------------------------------------------------------
    // Read run parameters
@ -40,56 +41,75 @@ int Run( ConfigFile& the_config )
    //--------------------------------------------------------------------------------------------------------
    //! number of resolution elements per dimension
-    const size_t ngrid = the_config.GetValue<size_t>("setup", "GridRes");
+    const size_t ngrid = the_config.get_value<size_t>("setup", "GridRes");
    //--------------------------------------------------------------------------------------------------------
    //! box side length in h-1 Mpc
-    const real_t boxlen = the_config.GetValue<double>("setup", "BoxLength");
+    const real_t boxlen = the_config.get_value<double>("setup", "BoxLength");
    //--------------------------------------------------------------------------------------------------------
    //! starting redshift
-    const real_t zstart = the_config.GetValue<double>("setup", "zstart");
+    const real_t zstart = the_config.get_value<double>("setup", "zstart");
    //--------------------------------------------------------------------------------------------------------
    //! order of the LPT approximation 
-    int LPTorder = the_config.GetValueSafe<double>("setup","LPTorder",100);
+    int LPTorder = the_config.get_value_safe<double>("setup","LPTorder",100);
    //--------------------------------------------------------------------------------------------------------
    //! initialice particles on a bcc or fcc lattice instead of a standard sc lattice (doubles and quadruples the number of particles) 
-    std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
+    std::string lattice_str = the_config.get_value_safe<std::string>("setup","ParticleLoad","sc");
-    const particle::lattice lattice_type = (lattice_str=="bcc")? particle::lattice_bcc 
+    const particle::lattice lattice_type = 
-        : ((lattice_str=="fcc")? particle::lattice_fcc : particle::lattice_sc);
+          ((lattice_str=="bcc")? particle::lattice_bcc 
        : ((lattice_str=="fcc")? particle::lattice_fcc 
        : ((lattice_str=="rsc")? particle::lattice_rsc 
        : ((lattice_str=="glass")? particle::lattice_glass
        : particle::lattice_sc))));
    //--------------------------------------------------------------------------------------------------------
    //! apply fixing of the complex mode amplitude following Angulo & Pontzen (2016) [https://arxiv.org/abs/1603.05253]
-    const bool bDoFixing = the_config.GetValueSafe<bool>("setup", "DoFixing", false);
+    const bool bDoFixing = the_config.get_value_safe<bool>("setup", "DoFixing", false);
    //--------------------------------------------------------------------------------------------------------
    //! do baryon ICs?
-    const bool bDoBaryons = the_config.GetValueSafe<bool>("setup", "DoBaryons", false );
+    const bool bDoBaryons = the_config.get_value_safe<bool>("setup", "DoBaryons", false );
    std::map< cosmo_species, double > Omega;
    if( bDoBaryons ){
        double Om = the_config.get_value<double>("cosmology", "Omega_m");
        double Ob = the_config.get_value<double>("cosmology", "Omega_b");
        Omega[cosmo_species::dm] = Om-Ob;
        Omega[cosmo_species::baryon] = Ob;
    }else{
        double Om = the_config.get_value<double>("cosmology", "Omega_m");
        Omega[cosmo_species::dm] = Om;
        Omega[cosmo_species::baryon] = 0.0;
    }
    //--------------------------------------------------------------------------------------------------------
    //! do constrained ICs?
    const bool bAddConstrainedModes =  the_config.contains_key("setup", "ConstraintFieldFile" );
    //--------------------------------------------------------------------------------------------------------
    //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274]
-    bool bAddExternalTides = the_config.ContainsKey("cosmology", "LSS_aniso_lx") 
+    bool bAddExternalTides = the_config.contains_key("cosmology", "LSS_aniso_lx") 
-                           & the_config.ContainsKey("cosmology", "LSS_aniso_ly") 
+                           & the_config.contains_key("cosmology", "LSS_aniso_ly") 
-                           & the_config.ContainsKey("cosmology", "LSS_aniso_lz");
+                           & the_config.contains_key("cosmology", "LSS_aniso_lz");
-    if( bAddExternalTides && !(  the_config.ContainsKey("cosmology", "LSS_aniso_lx") 
+    if( bAddExternalTides && !(  the_config.contains_key("cosmology", "LSS_aniso_lx") 
-                               | the_config.ContainsKey("cosmology", "LSS_aniso_ly") 
+                               | the_config.contains_key("cosmology", "LSS_aniso_ly") 
-                               | the_config.ContainsKey("cosmology", "LSS_aniso_lz") ))
+                               | the_config.contains_key("cosmology", "LSS_aniso_lz") ))
    {
-        csoca::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl;
+        music::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl;
        bAddExternalTides = false;
    }
    // Anisotropy parameters for beyond box tidal field 
    std::array<real_t,3> lss_aniso_lambda = {
-        the_config.GetValueSafe<double>("cosmology", "LSS_aniso_lx", 0.0),
+        the_config.get_value_safe<double>("cosmology", "LSS_aniso_lx", 0.0),
-        the_config.GetValueSafe<double>("cosmology", "LSS_aniso_ly", 0.0),
+        the_config.get_value_safe<double>("cosmology", "LSS_aniso_ly", 0.0),
-        the_config.GetValueSafe<double>("cosmology", "LSS_aniso_lz", 0.0),
+        the_config.get_value_safe<double>("cosmology", "LSS_aniso_lz", 0.0),
    };  
    if( std::abs(lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2]) > 1e-10 ){
-        csoca::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl;
+        music::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl;
        auto tr_l_3 = (lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2])/3.0;
        lss_aniso_lambda[0] -= tr_l_3;
        lss_aniso_lambda[1] -= tr_l_3;
@ -101,20 +121,20 @@ int Run( ConfigFile& the_config )
    const real_t astart = 1.0/(1.0+zstart);
    const real_t volfac(std::pow(boxlen / ngrid / 2.0 / M_PI, 1.5));
-    the_cosmo_calc->WritePowerspectrum(astart, "input_powerspec.txt" );
+    the_cosmo_calc->write_powerspectrum(astart, "input_powerspec.txt" );
-    //csoca::ilog << "-----------------------------------------------------------------------------" << std::endl;
+    //music::ilog << "-----------------------------------------------------------------------------" << std::endl;
    // if( bSymplecticPT && LPTorder!=2 ){
-    //     csoca::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl;
+    //     music::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl;
    //     LPTorder = 2;
    // }
    //--------------------------------------------------------------------
    // Compute LPT time coefficients
    //--------------------------------------------------------------------
-    const real_t Dplus0 = the_cosmo_calc->CalcGrowthFactor(astart) / the_cosmo_calc->CalcGrowthFactor(1.0);
+    const real_t Dplus0 = the_cosmo_calc->get_growth_factor(astart);
-    const real_t vfac   = the_cosmo_calc->CalcVFact(astart);
+    const real_t vfac   = the_cosmo_calc->get_vfact(astart);
    const double g1  = -Dplus0;
    const double g2  = ((LPTorder>1)? -3.0/7.0*Dplus0*Dplus0 : 0.0);
@ -132,7 +152,7 @@ int Run( ConfigFile& the_config )
    // coefficients needed for anisotropic external tides
    const double ai3 = std::pow(astart,-3);
    const double Omega_m_of_a = the_cosmo_calc->cosmo_param_.Omega_m * ai3 / (the_cosmo_calc->cosmo_param_.Omega_m * ai3 + the_cosmo_calc->cosmo_param_.Omega_DE);
-    const double f1 = the_cosmo_calc->CalcGrowthRate(astart);
+    const double f1 = the_cosmo_calc->get_f(astart);
    const double f_aniso = -4.0/3.0 * f1 * f1 / Omega_m_of_a;
    const std::array<real_t,3> lss_aniso_alpha = {
@ -151,155 +171,231 @@ int Run( ConfigFile& the_config )
    Grid_FFT<real_t> A3x({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    Grid_FFT<real_t> A3y({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    Grid_FFT<real_t> A3z({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    //... array [.] access to components of A3:
-    std::array< Grid_FFT<real_t>*,3 > A3({&A3x,&A3y,&A3z});
+    std::array<Grid_FFT<real_t> *, 3> A3({&A3x, &A3y, &A3z});
    // white noise field 
    Grid_FFT<real_t> wnoise({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    //--------------------------------------------------------------------
    // Fill the grid with a Gaussian white noise field
    //--------------------------------------------------------------------
    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
    music::ilog << "Generating white noise field...." << std::endl;
    the_random_number_generator->Fill_Grid(wnoise);
    wnoise.FourierTransformForward();
    //--------------------------------------------------------------------
    // Use externally specified large scale modes from constraints in case
    //--------------------------------------------------------------------
    if( bAddConstrainedModes ){
        Grid_FFT<real_t,false> cwnoise({8,8,8}, {boxlen,boxlen,boxlen});
        cwnoise.Read_from_HDF5( the_config.get_value<std::string>("setup", "ConstraintFieldFile"), 
                the_config.get_value<std::string>("setup", "ConstraintFieldName") );
        cwnoise.FourierTransformForward();
        size_t ngrid_c = cwnoise.size(0), ngrid_c_2 = ngrid_c/2;
        // TODO: copy over modes
        double rs1{0.0},rs2{0.0},is1{0.0},is2{0.0};
        double nrs1{0.0},nrs2{0.0},nis1{0.0},nis2{0.0};
        size_t count{0};
        #pragma omp parallel for reduction(+:rs1,rs2,is1,is2,nrs1,nrs2,nis1,nis2,count)
        for( size_t i=0; i<ngrid_c; ++i ){
            size_t il = size_t(-1);
            if( i<ngrid_c_2 && i<ngrid/2 ) il = i;
            if( i>ngrid_c_2 && i+ngrid-ngrid_c>ngrid/2) il = ngrid-ngrid_c+i;
            if( il == size_t(-1) ) continue;
            if( il<size_t(wnoise.local_1_start_) || il>=size_t(wnoise.local_1_start_+wnoise.local_1_size_)) continue;
            il -= wnoise.local_1_start_;
            for( size_t j=0; j<ngrid_c; ++j ){
                size_t jl = size_t(-1);
                if( j<ngrid_c_2 && j<ngrid/2 ) jl = j;
                if( j>ngrid_c_2 && j+ngrid-ngrid_c>ngrid/2 ) jl = ngrid-ngrid_c+j;
                if( jl == size_t(-1) ) continue;
                for( size_t k=0; k<ngrid_c/2+1; ++k ){
                    if( k>ngrid/2 ) continue;
                    size_t kl = k;
                    ++count;
                    nrs1 += std::real(cwnoise.kelem(i,j,k));
                    nrs2 += std::real(cwnoise.kelem(i,j,k))*std::real(cwnoise.kelem(i,j,k));
                    nis1 += std::imag(cwnoise.kelem(i,j,k));
                    nis2 += std::imag(cwnoise.kelem(i,j,k))*std::imag(cwnoise.kelem(i,j,k));
                    rs1 += std::real(wnoise.kelem(il,jl,kl));
                    rs2 += std::real(wnoise.kelem(il,jl,kl))*std::real(wnoise.kelem(il,jl,kl));
                    is1 += std::imag(wnoise.kelem(il,jl,kl));
                    is2 += std::imag(wnoise.kelem(il,jl,kl))*std::imag(wnoise.kelem(il,jl,kl));
                #if defined(USE_MPI)
                    wnoise.kelem(il,jl,kl) = cwnoise.kelem(j,i,k);
                #else
                    wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k);
                #endif
                }
            }
        }
        // music::ilog << "  ... old field: re <w>=" << rs1/count << " <w^2>-<w>^2=" << rs2/count-rs1*rs1/count/count << std::endl;
        // music::ilog << "  ... old field: im <w>=" << is1/count << " <w^2>-<w>^2=" << is2/count-is1*is1/count/count << std::endl;
        // music::ilog << "  ... new field: re <w>=" << nrs1/count << " <w^2>-<w>^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl;
        // music::ilog << "  ... new field: im <w>=" << nis1/count << " <w^2>-<w>^2=" << nis2/count-nis1*nis1/count/count << std::endl;
        music::ilog << "White noise field large-scale modes overwritten with external field." << std::endl;
    }
    //--------------------------------------------------------------------
    // Apply Normalisation factor and Angulo&Pontzen fixing or not
    //--------------------------------------------------------------------
    wnoise.apply_function_k( [&](auto wn){
        if (bDoFixing)
            wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn;
        return wn / volfac;
    });
    //--------------------------------------------------------------------
    // Compute the LPT terms....
    //--------------------------------------------------------------------
    //--------------------------------------------------------------------
    // Create convolution class instance for non-linear terms
    //--------------------------------------------------------------------
 #if defined(USE_CONVOLVER_ORSZAG)
    OrszagConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    // NaiveConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+#elif defined(USE_CONVOLVER_NAIVE)
    NaiveConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
 #endif
    //--------------------------------------------------------------------
    //--------------------------------------------------------------------
    // Create PLT gradient operator
    //--------------------------------------------------------------------
 #if defined(ENABLE_PLT)
    particle::lattice_gradient lg( the_config );
 #else
    op::fourier_gradient lg( the_config );
 #endif
    //--------------------------------------------------------------------
    std::vector<cosmo_species> species_list;
-    species_list.push_back( cosmo_species::dm );
+    species_list.push_back(cosmo_species::dm);
-    if( bDoBaryons ) species_list.push_back( cosmo_species::baryon );
+    if (bDoBaryons)
-
+        species_list.push_back(cosmo_species::baryon);
    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
    for( auto& this_species : species_list )
    {
        csoca::ilog << std::endl
                    << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl;
    //======================================================================
    //... compute 1LPT displacement potential ....
    //======================================================================
    // phi = - delta / k^2
    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
    music::ilog << "Generating white noise field...." << std::endl;
    double wtime = get_wtime();
-        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
+    music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
-        #if 1 //  random ICs
+    phi.FourierTransformForward(false);
-        //--------------------------------------------------------------------
+    phi.assign_function_of_grids_kdep([&](auto k, auto wn) {
        // Fill the grid with a Gaussian white noise field
        //--------------------------------------------------------------------
        the_random_number_generator->Fill_Grid( phi );
        phi.FourierTransformForward();
        phi.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
        real_t kmod = k.norm();
-            if( bDoFixing ) x = (std::abs(x)!=0.0)? x / std::abs(x) : x; 
+        ccomplex_t delta = wn * the_cosmo_calc->get_amplitude(kmod, total);
-            ccomplex_t delta = x * the_cosmo_calc->GetAmplitude(kmod, total);
+        return -delta / (kmod * kmod);
-            return -delta / (kmod * kmod) / volfac;
+    }, wnoise);
        });
    phi.zero_DC_mode();
        #else // ICs with a given phi(1) potential function
        constexpr real_t twopi{2.0*M_PI};
        constexpr real_t epsilon_q1d{0.25};
-        constexpr real_t epsy{0.25};
+    music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
        constexpr real_t epsz{0.0};//epsz{0.25};
        phi.FourierTransformBackward(false);
        phi.apply_function_r_dep([&](auto v, auto r) -> real_t {
            real_t q1 = r[0]-0.5*boxlen;//r[0]/boxlen * twopi - M_PI;
            real_t q2 = r[1]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI;
            real_t q3 = r[2]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI;
            // std::cerr << q1  << " " << q2 << std::endl;
            return -2.0*std::cos(q1+std::cos(q2));
            // return (-std::cos(q1) + epsilon_q1d * std::sin(q2));
            // return (-std::cos(q1) + epsy * std::sin(q2) + epsz * std::cos(q1) * std::sin(q3));
        });
        phi.FourierTransformForward();
        #endif
        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
    //======================================================================
    //... compute 2LPT displacement potential ....
    //======================================================================
-        if( LPTorder > 1 ){
+    if (LPTorder > 1)
    {
        wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
        phi2.FourierTransformForward(false);
-            Conv.convolve_SumOfHessians( phi, {0,0}, phi, {1,1}, {2,2}, op::assign_to( phi2 ) );
+        Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2));
-            Conv.convolve_Hessians( phi, {1,1}, phi, {2,2}, op::add_to(phi2) );
+        Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2));
-            Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, op::subtract_from(phi2) );
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, op::subtract_from(phi2));
-            Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, op::subtract_from(phi2) );
+        Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, op::subtract_from(phi2));
-            Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, op::subtract_from(phi2) );
+        Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, op::subtract_from(phi2));
-            if( bAddExternalTides ){
+        if (bAddExternalTides)
-                phi2.assign_function_of_grids_kdep([&]( vec3<real_t> kvec, ccomplex_t pphi, ccomplex_t pphi2 ){
+        {
            phi2.assign_function_of_grids_kdep([&](vec3_t<real_t> kvec, ccomplex_t pphi, ccomplex_t pphi2) {
                // sign in front of f_aniso is reversed since phi1 = -phi
-                    return pphi2 + f_aniso * (kvec[0]*kvec[0]*lss_aniso_lambda[0]+kvec[1]*kvec[1]*lss_aniso_lambda[1]+kvec[2]*kvec[2]*lss_aniso_lambda[2])*pphi;
+                return pphi2 + f_aniso * (kvec[0] * kvec[0] * lss_aniso_lambda[0] + kvec[1] * kvec[1] * lss_aniso_lambda[1] + kvec[2] * kvec[2] * lss_aniso_lambda[2]) * pphi;
-                }, phi, phi2 );
+            },
                                               phi, phi2);
        }
        phi2.apply_InverseLaplacian();
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
-            if( bAddExternalTides ){
+        if (bAddExternalTides)
-                csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl;
+        {
-                csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl;
+            music::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl;
            music::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl;
        }
    }
    //======================================================================
    //... compute 3LPT displacement potential
    //======================================================================
-        if( LPTorder > 2 ){
+    if (LPTorder > 2)
    {
        //... 3a term ...
        wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
        phi3a.FourierTransformForward(false);
-            Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi, {2,2}, op::assign_to(phi3a) );
+        Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a));
-            Conv.convolve_Hessians( phi, {0,1}, phi, {0,2}, phi, {1,2}, op::add_twice_to(phi3a) );
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::multiply_add_to(phi3a,2.0));
-            Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi, {0,0}, op::subtract_from(phi3a) );
+        Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3a));
-            Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi, {1,1}, op::subtract_from(phi3a) );
+        Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a));
-            Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi, {2,2}, op::subtract_from(phi3a) );
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a));
        phi3a.apply_InverseLaplacian();
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
        //... 3b term ...
        wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
        phi3b.FourierTransformForward(false);
-            Conv.convolve_SumOfHessians( phi, {0,0}, phi2, {1,1}, {2,2}, op::assign_to(phi3b) );
+        Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b));
-            Conv.convolve_SumOfHessians( phi, {1,1}, phi2, {2,2}, {0,0}, op::add_to(phi3b) );
+        Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b));
-            Conv.convolve_SumOfHessians( phi, {2,2}, phi2, {0,0}, {1,1}, op::add_to(phi3b) );
+        Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::add_to(phi3b));
-            Conv.convolve_Hessians( phi, {0,1}, phi2, {0,1}, op::subtract_twice_from(phi3b) );
+        Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::multiply_add_to(phi3b,-2.0));
-            Conv.convolve_Hessians( phi, {0,2}, phi2, {0,2}, op::subtract_twice_from(phi3b) );
+        Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::multiply_add_to(phi3b,-2.0));
-            Conv.convolve_Hessians( phi, {1,2}, phi2, {1,2}, op::subtract_twice_from(phi3b) );
+        Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::multiply_add_to(phi3b,-2.0));
        phi3b.apply_InverseLaplacian();
        phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
        //... transversal term ...
        wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
-            for( int idim=0; idim<3; ++idim ){
+        for (int idim = 0; idim < 3; ++idim)
        {
            // cyclic rotations of indices
-                int idimp = (idim+1)%3, idimpp = (idim+2)%3;
+            int idimp = (idim + 1) % 3, idimpp = (idim + 2) % 3;
            A3[idim]->FourierTransformForward(false);
-                Conv.convolve_Hessians( phi2, {idim,idimp},  phi, {idim,idimpp}, op::assign_to(*A3[idim]) );
+            Conv.convolve_Hessians(phi2, {idim, idimp}, phi, {idim, idimpp}, op::assign_to(*A3[idim]));
-                Conv.convolve_Hessians( phi2, {idim,idimpp}, phi, {idim,idimp},  op::subtract_from(*A3[idim]) );
+            Conv.convolve_Hessians(phi2, {idim, idimpp}, phi, {idim, idimp}, op::subtract_from(*A3[idim]));
-                Conv.convolve_DifferenceOfHessians( phi, {idimp,idimpp}, phi2,{idimp,idimp}, {idimpp,idimpp}, op::add_to(*A3[idim]) );
+            Conv.convolve_DifferenceOfHessians(phi, {idimp, idimpp}, phi2, {idimp, idimp}, {idimpp, idimpp}, op::add_to(*A3[idim]));
-                Conv.convolve_DifferenceOfHessians( phi2,{idimp,idimpp}, phi, {idimp,idimp}, {idimpp,idimpp}, op::subtract_from(*A3[idim]) );
+            Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim]));
            A3[idim]->apply_InverseLaplacian();
        }
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
    }
    // if( bSymplecticPT ){
    //     //... transversal term ...
    //     wtime = get_wtime();
-        //     csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush;
+    //     music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush;
    //     for( int idim=0; idim<3; ++idim ){
    //         // cyclic rotations of indices
    //         A3[idim]->FourierTransformForward(false);
@ -307,7 +403,7 @@ int Run( ConfigFile& the_config )
    //         Conv.convolve_Gradient_and_Hessian( phi, {1},  phi2, {idim,1}, add_to(*A3[idim]) );
    //         Conv.convolve_Gradient_and_Hessian( phi, {2},  phi2, {idim,2}, add_to(*A3[idim]) );
    //     }
-        //     csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+    //     music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
    // }
@ -320,31 +416,55 @@ int Run( ConfigFile& the_config )
    (*A3[1]) *= g3c;
    (*A3[2]) *= g3c;
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
    ///////////////////////////////////////////////////////////////////////
    // we store the densities here if we compute them
    //======================================================================
    // Testing
-        const std::string testing = the_config.GetValueSafe<std::string>("testing", "test", "none");
+    const std::string testing = the_config.get_value_safe<std::string>("testing", "test", "none");
-        if(testing != "none") {
+    if (testing != "none")
-            csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl;
+    {
-            if(testing == "potentials_and_densities") {
+        music::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl;
        if (testing == "potentials_and_densities"){
            testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3);
-            } else if(testing == "velocity_displacement_symmetries") {
+        }
        else if (testing == "velocity_displacement_symmetries"){
            testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
-            } else if(testing == "convergence") {
+        }
-                testing::output_convergence(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
+        else if (testing == "convergence"){
-            } else {
+            testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
-                csoca::flog << "unknown test '" << testing << "'" << std::endl;
+        }
        else{
            music::flog << "unknown test '" << testing << "'" << std::endl;
            std::abort();
        }
-        } else {
+    }
    for( auto& this_species : species_list )
    {
        music::ilog << std::endl
                    << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl;
        {
            // temporary storage of data
            Grid_FFT<real_t> tmp({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
            std::unique_ptr<particle::lattice_generator<Grid_FFT<real_t>>> particle_lattice_generator_ptr;
            // if output plugin wants particles, then we need to store them, along with their IDs
            if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
            {
                // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits
                size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ;
                // allocate particle structure and generate particle IDs
                particle_lattice_generator_ptr = 
                std::make_unique<particle::lattice_generator<Grid_FFT<real_t>>>( lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config );
            }
            //if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_eulerian ){
            if( the_output_plugin->write_species_as(this_species) == output_type::field_eulerian )
@ -362,7 +482,7 @@ int Run( ConfigFile& the_config )
                real_t std_phi1 = phi.std();
                const real_t hbar = 2.0 * M_PI/ngrid * (2*std_phi1/Dplus0); //3sigma, but this might rather depend on gradients of phi...
-                csoca::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl;
+                music::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl;
                if( LPTorder == 1 ){
                    psi.assign_function_of_grids_r([hbar,Dplus0]( real_t pphi ){
@ -435,14 +555,21 @@ int Run( ConfigFile& the_config )
                //===================================================================================
                // we store displacements and velocities here if we compute them
                //===================================================================================
-                particle::container particles;
+                
                bool shifted_lattice = (this_species == cosmo_species::baryon &&
                                        the_output_plugin->write_species_as(this_species) == output_type::particles) ? true : false;
                grid_interpolate<1,Grid_FFT<real_t>> interp( tmp );
                // if output plugin wants particles, then we need to store them, along with their IDs
-                if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
+                // if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
-                {
+                // {
-                    // allocate particle structure and generate particle IDs
+                //     // allocate particle structure and generate particle IDs
-                    particle::initialize_lattice( particles, lattice_type, tmp );
+                //     particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config );
-                }
+                // }
                // write out positions
                for( int idim=0; idim<3; ++idim ){
@ -459,17 +586,37 @@ int Run( ConfigFile& the_config )
                                size_t idx = phi.get_idx(i,j,k);
                                auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx);
                                // divide by Lbox, because displacement is in box units for output plugin
-                                tmp.kelem(idx) = lunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot 
+                                tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot 
-                                    + phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) );
+                                    + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) );
                                if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){
                                    tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k<real_t>(i,j,k) );
                                }
                                if( bDoBaryons ){
                                    vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
                                    real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
                                    // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) :
                                    //  (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : 
                                    // //   the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total);
                                    //  the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1);
                                    real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) 
                                        : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : 
                                           the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total)) * (-g1);
                                    tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen;
                                }
                            }
                        }
                    }
                    tmp.zero_DC_mode();
                    tmp.FourierTransformBackward();
                    // if we write particle data, store particle data in particle structure
                    if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                    {
-                        particle::set_positions( particles, lattice_type, idim, lunit, tmp );
+                        particle_lattice_generator_ptr->set_positions( lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp, the_config );
                    } 
                    // otherwise write out the grid data directly to the output plugin
                    // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian )
@ -496,8 +643,29 @@ int Run( ConfigFile& the_config )
                                // divide by Lbox, because displacement is in box units for output plugin
                                auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx));
-                                tmp.kelem(idx) = vunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot_v 
+                                tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v 
-                                        + vfac3 * (phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) );
+                                        + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) );
                                if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){
                                    tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k<real_t>(i,j,k) );
                                }
                                if( bDoBaryons ){
                                    vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
                                    real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
                                    // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm0) :
                                    //  (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon0) : 
                                    //      the_cosmo_calc->get_amplitude(kmod, vtotal0)) - the_cosmo_calc->get_amplitude(kmod, vtotal0);
                                    // // the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1);
                                    real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm) 
                                        : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon) : 
                                           the_cosmo_calc->get_amplitude(kmod, vtotal)) - the_cosmo_calc->get_amplitude(kmod, vtotal)) * (-g1);
                                    tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ;
                                }
                                // correct velocity with PLT mode growth rate
                                tmp.kelem(idx) *= lg.vfac_corr(tmp.get_k3(i,j,k));
                                if( bAddExternalTides ){
                                    // modify velocities with anisotropic expansion factor**2
@ -510,12 +678,13 @@ int Run( ConfigFile& the_config )
                            }
                        }
                    }
                    tmp.zero_DC_mode();
                    tmp.FourierTransformBackward();
                    // if we write particle data, store particle data in particle structure
                    if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                    {
-                        particle::set_velocities( particles, lattice_type, idim, tmp );
+                        particle_lattice_generator_ptr->set_velocities( lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp, the_config );
                    }
                    // otherwise write out the grid data directly to the output plugin
                    else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )
@ -527,7 +696,7 @@ int Run( ConfigFile& the_config )
                if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                {
-                    the_output_plugin->write_particle_data( particles, this_species );
+                    the_output_plugin->write_particle_data( particle_lattice_generator_ptr->get_particles(), this_species, Omega[this_species] );
                }
                if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )
--- a/src/logger.cc
+++ b/src/logger.cc
@ -1,19 +1,19 @@
 #include <logger.hh>
-namespace csoca {
+namespace music {
-std::ofstream Logger::output_file_;
+std::ofstream logger::output_file_;
-LogLevel Logger::log_level_ = LogLevel::Off;
+log_level logger::log_level_ = log_level::off;
-void Logger::SetLevel(const LogLevel &level) {
+void logger::set_level(const log_level &level) {
  log_level_ = level;
 }
-LogLevel Logger::GetLevel() {
+log_level logger::get_level() {
  return log_level_;
 }
-void Logger::SetOutput(const std::string filename) {
+void logger::set_output(const std::string filename) {
  if (output_file_.is_open()) {
    output_file_.close();
  }
@ -21,22 +21,22 @@ void Logger::SetOutput(const std::string filename) {
  assert(output_file_.is_open());
 }
-void Logger::UnsetOutput() {
+void logger::unset_output() {
  if (output_file_.is_open()) {
    output_file_.close();
  }
 }
-std::ofstream &Logger::GetOutput() {
+std::ofstream &logger::get_output() {
  return output_file_;
 }
 // global instantiations for different levels
-Logger glogger;
+logger the_logger;
-LogStream flog(glogger, LogLevel::Fatal);
+log_stream flog(the_logger, log_level::fatal);
-LogStream elog(glogger, LogLevel::Error);
+log_stream elog(the_logger, log_level::error);
-LogStream wlog(glogger, LogLevel::Warning);
+log_stream wlog(the_logger, log_level::warning);
-LogStream ilog(glogger, LogLevel::Info);
+log_stream ilog(the_logger, log_level::info);
-LogStream dlog(glogger, LogLevel::Debug);
+log_stream dlog(the_logger, log_level::debug);
-} // namespace csoca
+} // namespace music
--- a/src/main.cc
+++ b/src/main.cc
@ -3,6 +3,7 @@
 #include <iostream>
 #include <fstream>
 #include <thread>
 #include <cfenv>
 #if defined(_OPENMP)
 #include <omp.h>
@ -10,6 +11,7 @@
 #include <general.hh>
 #include <ic_generator.hh>
 #include <particle_plt.hh>
 // initialise with "default" values
@ -26,10 +28,28 @@ int  num_threads = 1;
 #include "system_stat.hh"
 #include <exception>
 #include <stdexcept>
 void handle_eptr(std::exception_ptr eptr) // passing by value is ok
 {
    try {
        if (eptr) {
            std::rethrow_exception(eptr);
        }
    } catch(const std::exception& e) {
        music::elog << "This happened: \"" << e.what() << "\"" << std::endl;
    }
 }
 int main( int argc, char** argv )
 {
-    csoca::Logger::SetLevel(csoca::LogLevel::Info);
+
-    // csoca::Logger::SetLevel(csoca::LogLevel::Debug);
+#if defined(NDEBUG)
    music::logger::set_level(music::log_level::info);
 #else
    music::logger::set_level(music::log_level::debug);
 #endif
    //------------------------------------------------------------------------------
    // initialise MPI 
@ -45,19 +65,38 @@ int main( int argc, char** argv )
    // set up lower logging levels for other tasks
    if( CONFIG::MPI_task_rank!=0 )
    {
-        csoca::Logger::SetLevel(csoca::LogLevel::Error);
+        music::logger::set_level(music::log_level::error);
    }
 #endif
-    csoca::ilog << "\n"
+    // Ascii ART logo. generated via http://patorjk.com/software/taag/#p=display&f=Nancyj&t=monofonIC
-                << " unigrid MUSIC                          .8888b                   dP  a88888b. \n"
+    music::ilog << "\n"
                << " The unigrid version of MUSIC-2         .8888b                   dP  a88888b. \n"
                << "                                        88   \"                   88 d8\'   `88 \n"
                << "  88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa  .d8888b. 88d888b. 88 88        \n"
                << "  88\'`88\'`88 88\'  `88 88\'  `88 88\'  `88 88     88\'  `88 88\'  `88 88 88        \n"
                << "  88  88  88 88.  .88 88    88 88.  .88 88     88.  .88 88    88 88 Y8.   .88 \n"
-                << "  dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl
+                << "  dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl;
-                << "version  : v0.1a, git rev. : " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl
+
-                << "-------------------------------------------------------------------------------" << std::endl;
+    // git and versioning info:
    music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl;
    // Compilation CMake configuration, time etc info:
    music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;
 #ifdef __GNUC__
    music::ilog << "Compiled with GNU C++ version " << __VERSION__ <<std::endl;
 #else
    music::ilog << "Compiled with " << __VERSION__ << std::endl;
 #endif
    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
    music::ilog << "Compile time options : " << std::endl;
    music::ilog << "                       Precision : " << CMAKE_PRECISION_STR << std::endl;
    music::ilog << "                    Convolutions : " << CMAKE_CONVOLVER_STR << std::endl;
    music::ilog << "                             PLT : " << CMAKE_PLT_STR << std::endl;
    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
    //------------------------------------------------------------------------------
@ -71,12 +110,12 @@ int main( int argc, char** argv )
        print_RNG_plugins();
        print_output_plugins();
-        csoca::elog << "In order to run, you need to specify a parameter file!" << std::endl;
+        music::elog << "In order to run, you need to specify a parameter file!\n" << std::endl;
        exit(0);
    }
    // open the configuration file 
-    ConfigFile the_config(argv[1]);
+    config_file the_config(argv[1]);
    //------------------------------------------------------------------------------
    // Set up FFTW
@ -95,7 +134,7 @@ int main( int argc, char** argv )
    FFTW_API(mpi_init)();
 #endif
-    CONFIG::num_threads = the_config.GetValueSafe<unsigned>("execution", "NumThreads",std::thread::hardware_concurrency());
+    CONFIG::num_threads = the_config.get_value_safe<unsigned>("execution", "NumThreads",std::thread::hardware_concurrency());
 #if defined(USE_FFTW_THREADS)
    if (CONFIG::FFTW_threads_ok)
@ -110,14 +149,16 @@ int main( int argc, char** argv )
    omp_set_num_threads(CONFIG::num_threads);
 #endif
    // std::feclearexcept(FE_ALL_EXCEPT);
    //------------------------------------------------------------------------------
    // Write code configuration to screen
    //------------------------------------------------------------------------------
    // hardware related infos
-    csoca::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl;
+    music::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl;
    // multi-threading related infos
-    csoca::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl;
    // memory related infos
    SystemStat::Memory mem;
@ -134,34 +175,34 @@ int main( int argc, char** argv )
    MPI_Allreduce(&minupmem,&temp,1,MPI_UNSIGNED,MPI_MIN,MPI_COMM_WORLD); minupmem = temp;
    MPI_Allreduce(&maxupmem,&temp,1,MPI_UNSIGNED,MPI_MAX,MPI_COMM_WORLD); maxupmem = temp;
 #endif
-    csoca::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " <<  "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " <<  "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl;
    // MPI related infos
 #if defined(USE_MPI)
-    csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl;
+    music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl;
-    csoca::dlog << std::setw(32) << std::left << "MPI version" << " : " << GetMPIversion() << std::endl;
+    music::dlog << std::setw(32) << std::left << "MPI version" << " : " << MPI::get_version() << std::endl;
 #else
-    csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl;
+    music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl;
 #endif
-    csoca::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl;
+    music::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl;
    // Kernel related infos
    SystemStat::Kernel kern;
    auto kinfo = kern.get_kernel_info();
-    csoca::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl;
+    music::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl;
    // FFTW related infos
-    csoca::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl;
+    music::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl;
+    music::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "FFTW mode" << " : ";
+    music::ilog << std::setw(32) << std::left << "FFTW mode" << " : ";
 #if defined(FFTW_MODE_PATIENT)
-	csoca::ilog << "FFTW_PATIENT" << std::endl;
+	music::ilog << "FFTW_PATIENT" << std::endl;
 #elif defined(FFTW_MODE_MEASURE)
-    csoca::ilog << "FFTW_MEASURE" << std::endl;
+    music::ilog << "FFTW_MEASURE" << std::endl;
 #else
-	csoca::ilog << "FFTW_ESTIMATE" << std::endl;
+	music::ilog << "FFTW_ESTIMATE" << std::endl;
 #endif
    //--------------------------------------------------------------------
    // Initialise plug-ins
@ -170,7 +211,8 @@ int main( int argc, char** argv )
    {
        ic_generator::Initialise( the_config );
    }catch(...){
-        csoca::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl;
+        handle_eptr( std::current_exception() );
        music::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl;
        #if defined(USE_MPI) 
        MPI_Finalize();
        #endif
@ -181,6 +223,8 @@ int main( int argc, char** argv )
    // do the job...
    ///////////////////////////////////////////////////////////////////////
    ic_generator::Run( the_config );
    // particle::test_plt();
    ///////////////////////////////////////////////////////////////////////
 #if defined(USE_MPI)
@ -188,8 +232,8 @@ int main( int argc, char** argv )
    MPI_Finalize();
 #endif
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    csoca::ilog << "Done." << std::endl;
+    music::ilog << "Done. Have a nice day!\n" << std::endl;
    return 0;
 }
--- a/src/output_plugin.cc
+++ b/src/output_plugin.cc
@ -23,31 +23,32 @@ void print_output_plugins()
 	std::map< std::string, output_plugin_creator *>::iterator it;
 	it = m.begin();
-	csoca::ilog << "Available output plug-ins:\n";
+	music::ilog << "Available output plug-ins:\n";
 	while( it!=m.end() )
 	{
 		if( it->second )
-			csoca::ilog << "\t\'" << it->first << "\'\n";
+			music::ilog << "\t\'" << it->first << "\'\n";
 		++it;
 	}
 	music::ilog << std::endl;
 }
-std::unique_ptr<output_plugin> select_output_plugin( ConfigFile& cf )
+std::unique_ptr<output_plugin> select_output_plugin( config_file& cf )
 {
-	std::string formatname = cf.GetValue<std::string>( "output", "format" );
+	std::string formatname = cf.get_value<std::string>( "output", "format" );
 	output_plugin_creator *the_output_plugin_creator 
 	= get_output_plugin_map()[ formatname ];
 	if( !the_output_plugin_creator )
 	{	
-		csoca::elog << "Error: output plug-in \'" << formatname << "\' not found." << std::endl;
+		music::elog << "Output plug-in \'" << formatname << "\' not found." << std::endl;
 		print_output_plugins();
 		throw std::runtime_error("Unknown output plug-in");
 	}else{
-		csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+		music::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl;
+		music::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl;
 	}
 	return std::move(the_output_plugin_creator->create( cf ));
--- a/src/plugins/output_arepo.cc
+++ b/src/plugins/output_arepo.cc
@ -0,0 +1,241 @@
 #ifdef USE_HDF5
 #include <unistd.h> // for unlink
 #include <output_plugin.hh>
 #include "HDF_IO.hh"
 template <typename T>
 std::vector<T> from_6array(const T *a)
 {
  return std::vector<T>{{a[0], a[1], a[2], a[3], a[4], a[5]}};
 }
 template <typename T>
 std::vector<T> from_value(const T a)
 {
  return std::vector<T>{{a}};
 }
 template <typename write_real_t>
 class gadget_hdf5_output_plugin : public output_plugin
 {
  struct header_t
  {
    unsigned npart[6];
    double mass[6];
    double time;
    double redshift;
    int flag_sfr;
    int flag_feedback;
    unsigned int npartTotal[6];
    int flag_cooling;
    int num_files;
    double BoxSize;
    double Omega0;
    double OmegaLambda;
    double HubbleParam;
    int flag_stellarage;
    int flag_metals;
    unsigned int npartTotalHighWord[6];
    int flag_entropy_instead_u;
    int flag_doubleprecision;
  };
 protected:
  int num_files_, num_simultaneous_writers_;
  header_t header_;
  real_t lunit_, vunit_;
  bool blongids_;
  std::string this_fname_;
  double Tini_;
  unsigned pmgrid_;
  unsigned gridboost_;
  int doublePrec_;
  int doBaryons_;
  double softening_;
 public:
  //! constructor
  explicit gadget_hdf5_output_plugin(config_file &cf)
      : output_plugin(cf, "GADGET-HDF5")
  {
    num_files_ = 1;
 #ifdef USE_MPI
    // use as many output files as we have MPI tasks
    MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
 #endif
    real_t astart = 1.0 / (1.0 + cf_.get_value<double>("setup", "zstart"));
    lunit_ = cf_.get_value<double>("setup", "BoxLength");
    vunit_ = lunit_ / std::sqrt(astart);
    blongids_ = cf_.get_value_safe<bool>("output", "UseLongids", false);
    num_simultaneous_writers_ = cf_.get_value_safe<int>("output", "NumSimWriters", num_files_);
    for (int i = 0; i < 6; ++i)
    {
      header_.npart[i] = 0;
      header_.npartTotal[i] = 0;
      header_.npartTotalHighWord[i] = 0;
      header_.mass[i] = 0.0;
    }
    header_.time = astart;
    header_.redshift = 1.0 / astart - 1.0;
    header_.flag_sfr = 0;
    header_.flag_feedback = 0;
    header_.flag_cooling = 0;
    header_.num_files = num_files_;
    header_.BoxSize = lunit_;
    header_.Omega0 = cf_.get_value<double>("cosmology", "Omega_m");
    header_.OmegaLambda = cf_.get_value<double>("cosmology", "Omega_L");
    header_.HubbleParam = cf_.get_value<double>("cosmology", "H0") / 100.0;
    header_.flag_stellarage = 0;
    header_.flag_metals = 0;
    header_.flag_entropy_instead_u = 0;
    header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false;
    // initial gas temperature
    double Tcmb0 = 2.726;
    double Omegab = cf_.get_value<double>("cosmology", "Omega_b");
    double h = cf_.get_value<double>("cosmology", "H0") / 100.0, h2 = h*h;
    double adec = 1.0 / (160.0 * pow(Omegab * h2 / 0.022, 2.0 / 5.0));
    Tini_ = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec;
    // suggested PM res
    pmgrid_ = 2*cf_.get_value<double>("setup", "GridRes");
    gridboost_ = 1;
    softening_ = cf_.get_value<double>("setup", "BoxLength")/pmgrid_/20;
    doBaryons_ = cf_.get_value<bool>("setup", "DoBaryons");
 #if !defined(USE_SINGLEPRECISION)
    doublePrec_ = 1;
 #else
    doublePrec_ = 0;
 #endif
    this_fname_ = fname_;
 #ifdef USE_MPI
    int thisrank = 0;
    MPI_Comm_rank(MPI_COMM_WORLD, &thisrank);
    if (num_files_ > 1)
      this_fname_ += "." + std::to_string(thisrank);
 #endif
    unlink(this_fname_.c_str());
    HDFCreateFile(this_fname_);
  }
  // use destructor to write header post factum
  ~gadget_hdf5_output_plugin()
  {
    HDFCreateGroup(this_fname_, "Header");
    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array<unsigned>(header_.npart));
    HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array<double>(header_.mass));
    HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value<double>(header_.time));
    HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value<double>(header_.redshift));
    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array<unsigned>(header_.npartTotal));
    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array<unsigned>(header_.npartTotalHighWord));
    HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value<int>(header_.num_files));
    HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value<double>(header_.BoxSize));
    HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value<double>(header_.Omega0));
    HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value<double>(header_.OmegaLambda));
    HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value<double>(header_.HubbleParam));
    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value<int>(0));
    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value<int>(0));
    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value<int>(0));
    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value<int>(0));
    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value<int>(0));
    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_DoublePrecision", (int)doublePrec_);
    // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmin", levelmin_);
    // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmax", levelmax_);
    // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelcounts", levelcounts);
    HDFWriteGroupAttribute(this_fname_, "Header", "haveBaryons", from_value<int>((int)doBaryons_));
    HDFWriteGroupAttribute(this_fname_, "Header", "longIDs", from_value<int>((int)blongids_));
    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_pmgrid", from_value<int>(pmgrid_));
    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gridboost", from_value<int>(gridboost_));
    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_highressoft", from_value<double>(softening_));
    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gas_Tinit", from_value<double>(Tini_));
    music::ilog << "Wrote" << std::endl;
  }
  output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
  real_t position_unit() const { return lunit_; }
  real_t velocity_unit() const { return vunit_; }
  bool has_64bit_reals() const
  {
    if (typeid(write_real_t) == typeid(double))
      return true;
    return false;
  }
  bool has_64bit_ids() const
  {
    if (blongids_)
      return true;
    return false;
  }
  int get_species_idx(const cosmo_species &s) const
  {
    switch (s)
    {
    case cosmo_species::dm:
      return 1;
    case cosmo_species::baryon:
      return 0;
    case cosmo_species::neutrino:
      return 3;
    }
    return -1;
  }
  void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species)
  {
    int sid = get_species_idx(s);
    assert(sid != -1);
    header_.npart[sid] = (pc.get_local_num_particles());
    header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles());
    header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32);
    double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
    double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3);
    header_.mass[sid] = boxmass / pc.get_global_num_particles();
    HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid));
    //... write positions and velocities.....
    if (this->has_64bit_reals())
    {
      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_);
      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_);
    }
    else
    {
      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_);
      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_);
    }
    //... write ids.....
    if (this->has_64bit_ids())
      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_);
    else
      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_);
    // std::cout << ">>>A> " << header_.npart[sid] << std::endl;
  }
 };
 namespace
 {
 #if !defined(USE_SINGLEPRECISION)
 output_plugin_creator_concrete<gadget_hdf5_output_plugin<double>> creator1("AREPO");
 #else
 output_plugin_creator_concrete<gadget_hdf5_output_plugin<float>> creator1("AREPO");
 #endif
 } // namespace
 #endif
--- a/src/plugins/output_gadget2.cc
+++ b/src/plugins/output_gadget2.cc
@ -3,6 +3,7 @@
 constexpr int empty_fill_bytes{56};
 template <typename write_real_t>
 class gadget2_output_plugin : public output_plugin
 {
 public:
@ -33,10 +34,11 @@ protected:
 	int num_files_;
 	header this_header_;
 	real_t lunit_, vunit_;
 	bool blongids_;
 public:
 	//! constructor
-	explicit gadget2_output_plugin(ConfigFile &cf )
+	explicit gadget2_output_plugin(config_file &cf)
 			: output_plugin(cf, "GADGET-2")
 	{
 		num_files_ = 1;
@ -44,21 +46,36 @@ public:
 		// use as many output files as we have MPI tasks
 		MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
 #endif
-		real_t astart = 1.0/(1.0+cf_.GetValue<double>("setup", "zstart"));
+		real_t astart = 1.0 / (1.0 + cf_.get_value<double>("setup", "zstart"));
-		lunit_ = cf_.GetValue<double>("setup", "BoxLength");
+		lunit_ = cf_.get_value<double>("setup", "BoxLength");
 		vunit_ = lunit_ / std::sqrt(astart);
 		blongids_ = cf_.get_value_safe<bool>("output", "UseLongids", false);
 	}
-    output_type write_species_as( const cosmo_species & ) const { return output_type::particles; }
+	output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
 	real_t position_unit() const { return lunit_; }
 	real_t velocity_unit() const { return vunit_; }
-	void write_particle_data(const particle::container &pc, const cosmo_species &s )
+	bool has_64bit_reals() const
 	{
 		if (typeid(write_real_t) == typeid(double))
 			return true;
 		return false;
 	}
 	bool has_64bit_ids() const
 	{
 		if (blongids_)
 			return true;
 		return false;
 	}
 	void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species)
 	{
 		// fill the Gadget-2 header
-		memset(reinterpret_cast<void*>(&this_header_),0,sizeof(header));
+		memset(reinterpret_cast<void *>(&this_header_), 0, sizeof(header));
 		for (int i = 0; i < 6; ++i)
 		{
@ -73,7 +90,7 @@ public:
 		/////
 		//... set time ......................................................
-		this_header_.redshift = cf_.GetValue<double>("setup", "zstart");
+		this_header_.redshift = cf_.get_value<double>("setup", "zstart");
 		this_header_.time = 1.0 / (1.0 + this_header_.redshift);
 		//... SF flags
@ -83,10 +100,10 @@ public:
 		//...
 		this_header_.num_files = num_files_; //1;
-		this_header_.BoxSize = cf_.GetValue<double>("setup", "BoxLength");
+		this_header_.BoxSize = cf_.get_value<double>("setup", "BoxLength");
-		this_header_.Omega0 = cf_.GetValue<double>("cosmology", "Omega_m");
+		this_header_.Omega0 = cf_.get_value<double>("cosmology", "Omega_m");
-		this_header_.OmegaLambda = cf_.GetValue<double>("cosmology", "Omega_L");
+		this_header_.OmegaLambda = cf_.get_value<double>("cosmology", "Omega_L");
-		this_header_.HubbleParam = cf_.GetValue<double>("cosmology", "H0") / 100.0;
+		this_header_.HubbleParam = cf_.get_value<double>("cosmology", "H0") / 100.0;
 		this_header_.flag_stellarage = 0;
 		this_header_.flag_metals = 0;
@ -100,50 +117,73 @@ public:
 		//... set masses
 		double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
-		double boxmass = this_header_.Omega0 * rhoc * std::pow(this_header_.BoxSize,3);
+		double boxmass = Omega_species * rhoc * std::pow(this_header_.BoxSize, 3);
 		this_header_.mass[1] = boxmass / pc.get_global_num_particles();
 		std::string fname = fname_;
 		int thisrank = 0;
 #ifdef USE_MPI
-		MPI_Comm_rank(MPI_COMM_WORLD,&thisrank);
+		MPI_Comm_rank(MPI_COMM_WORLD, &thisrank);
-		if( num_files_ > 1 )
+		if (num_files_ > 1)
 			fname += "." + std::to_string(thisrank);
 #endif
 		uint32_t blocksz;
 		std::ofstream ofs(fname.c_str(), std::ios::binary);
-		csoca::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl;
+		music::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl;
 		blocksz = sizeof(header);
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+		ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
-		ofs.write( reinterpret_cast<char*>(&this_header_), sizeof(header) );
+		ofs.write(reinterpret_cast<char *>(&this_header_), sizeof(header));
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+		ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 		// we write double precision
 		if (this->has_64bit_reals())
 		{
 			blocksz = 3 * sizeof(double) * pc.get_local_num_particles();
 			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 			ofs.write(reinterpret_cast<const char *>(pc.get_pos64_ptr()), blocksz);
 			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 			ofs.write(reinterpret_cast<const char *>(pc.get_vel64_ptr()), blocksz);
 			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 		}
 		else
 		{
 			blocksz = 3 * sizeof(float) * pc.get_local_num_particles();
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
-		ofs.write( reinterpret_cast<const char*>(pc.get_pos_ptr()), blocksz );
+			ofs.write(reinterpret_cast<const char *>(pc.get_pos32_ptr()), blocksz);
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
-		ofs.write( reinterpret_cast<const char*>(pc.get_vel_ptr()), blocksz );
+			ofs.write(reinterpret_cast<const char *>(pc.get_vel32_ptr()), blocksz);
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
-		
+		}
 		blocksz = sizeof(float) * pc.get_local_num_particles();
 		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
 		ofs.write( reinterpret_cast<const char*>(pc.get_ids_ptr()), blocksz );
 		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
 		// we write long IDs
 		if (this->has_64bit_ids())
 		{
 			blocksz = sizeof(uint64_t) * pc.get_local_num_particles();
 			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 			ofs.write(reinterpret_cast<const char *>(pc.get_ids64_ptr()), blocksz);
 			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 		}
 		else
 		{
 			blocksz = sizeof(uint32_t) * pc.get_local_num_particles();
 			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 			ofs.write(reinterpret_cast<const char *>(pc.get_ids32_ptr()), blocksz);
 			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 		}
 	}
 };
 namespace
 {
-   output_plugin_creator_concrete<gadget2_output_plugin> creator1("gadget2"); 
+output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2");
-// output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2");
+#if !defined(USE_SINGLEPRECISION)
-// #ifndef SINGLE_PRECISION
+output_plugin_creator_concrete<gadget2_output_plugin<double>> creator3("gadget2_double");
-// output_plugin_creator_concrete<gadget2_output_plugin<double>> creator2("gadget2_double");
+#endif
 // #endif
 } // namespace
--- a/src/plugins/output_gadget_hdf5.cc
+++ b/src/plugins/output_gadget_hdf5.cc
@ -0,0 +1,210 @@
 #ifdef USE_HDF5
 #include <unistd.h> // for unlink
 #include <output_plugin.hh>
 #include "HDF_IO.hh"
 template <typename T>
 std::vector<T> from_6array(const T *a)
 {
  return std::vector<T>{{a[0], a[1], a[2], a[3], a[4], a[5]}};
 }
 template <typename T>
 std::vector<T> from_value(const T a)
 {
  return std::vector<T>{{a}};
 }
 template <typename write_real_t>
 class gadget_hdf5_output_plugin : public output_plugin
 {
  struct header_t
  {
    unsigned npart[6];
    double mass[6];
    double time;
    double redshift;
    int flag_sfr;
    int flag_feedback;
    unsigned int npartTotal[6];
    int flag_cooling;
    int num_files;
    double BoxSize;
    double Omega0;
    double OmegaLambda;
    double HubbleParam;
    int flag_stellarage;
    int flag_metals;
    unsigned int npartTotalHighWord[6];
    int flag_entropy_instead_u;
    int flag_doubleprecision;
  };
 protected:
  int num_files_, num_simultaneous_writers_;
  header_t header_;
  real_t lunit_, vunit_;
  bool blongids_;
  std::string this_fname_;
 public:
  //! constructor
  explicit gadget_hdf5_output_plugin(config_file &cf)
      : output_plugin(cf, "GADGET-HDF5")
  {
    num_files_ = 1;
 #ifdef USE_MPI
    // use as many output files as we have MPI tasks
    MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
 #endif
    real_t astart = 1.0 / (1.0 + cf_.get_value<double>("setup", "zstart"));
    lunit_ = cf_.get_value<double>("setup", "BoxLength");
    vunit_ = lunit_ / std::sqrt(astart);
    blongids_ = cf_.get_value_safe<bool>("output", "UseLongids", false);
    num_simultaneous_writers_ = cf_.get_value_safe<int>("output", "NumSimWriters", num_files_);
    for (int i = 0; i < 6; ++i)
    {
      header_.npart[i] = 0;
      header_.npartTotal[i] = 0;
      header_.npartTotalHighWord[i] = 0;
      header_.mass[i] = 0.0;
    }
    header_.time = astart;
    header_.redshift = 1.0 / astart - 1.0;
    header_.flag_sfr = 0;
    header_.flag_feedback = 0;
    header_.flag_cooling = 0;
    header_.num_files = num_files_;
    header_.BoxSize = lunit_;
    header_.Omega0 = cf_.get_value<double>("cosmology", "Omega_m");
    header_.OmegaLambda = cf_.get_value<double>("cosmology", "Omega_L");
    header_.HubbleParam = cf_.get_value<double>("cosmology", "H0") / 100.0;
    header_.flag_stellarage = 0;
    header_.flag_metals = 0;
    header_.flag_entropy_instead_u = 0;
    header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false;
    this_fname_ = fname_;
 #ifdef USE_MPI
    int thisrank = 0;
    MPI_Comm_rank(MPI_COMM_WORLD, &thisrank);
    if (num_files_ > 1)
      this_fname_ += "." + std::to_string(thisrank);
 #endif
    unlink(this_fname_.c_str());
    HDFCreateFile(this_fname_);
  }
  // use destructor to write header post factum
  ~gadget_hdf5_output_plugin()
  {
    if (!std::uncaught_exception()) 
    {   
      HDFCreateGroup(this_fname_, "Header");
      HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array<unsigned>(header_.npart));
      HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array<double>(header_.mass));
      HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value<double>(header_.time));
      HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value<double>(header_.redshift));
      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value<int>(header_.flag_sfr));
      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value<int>(header_.flag_feedback));
      HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array<unsigned>(header_.npartTotal));
      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value<int>(header_.flag_cooling));
      HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value<int>(header_.num_files));
      HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value<double>(header_.BoxSize));
      HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value<double>(header_.Omega0));
      HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value<double>(header_.OmegaLambda));
      HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value<double>(header_.HubbleParam));
      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value<int>(header_.flag_stellarage));
      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value<int>(header_.flag_metals));
      HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array<unsigned>(header_.npartTotalHighWord));
      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value<int>(header_.flag_entropy_instead_u));
      music::ilog << "Wrote Gadget-HDF5 file(s) to " << this_fname_ << std::endl;
    }
  }
  output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
  real_t position_unit() const { return lunit_; }
  real_t velocity_unit() const { return vunit_; }
  bool has_64bit_reals() const
  {
    if (typeid(write_real_t) == typeid(double))
      return true;
    return false;
  }
  bool has_64bit_ids() const
  {
    if (blongids_)
      return true;
    return false;
  }
  int get_species_idx(const cosmo_species &s) const
  {
    switch (s)
    {
    case cosmo_species::dm:
      return 1;
    case cosmo_species::baryon:
      return 0;
    case cosmo_species::neutrino:
      return 3;
    }
    return -1;
  }
  void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species)
  {
    int sid = get_species_idx(s);
    assert(sid != -1);
    header_.npart[sid] = (pc.get_local_num_particles());
    header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles());
    header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32);
    double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
    double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3);
    header_.mass[sid] = boxmass / pc.get_global_num_particles();
    HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid));
    //... write positions and velocities.....
    if (this->has_64bit_reals())
    {
      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_);
      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_);
    }
    else
    {
      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_);
      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_);
    }
    //... write ids.....
    if (this->has_64bit_ids())
      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_);
    else
      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_);
    // std::cout << ">>>A> " << header_.npart[sid] << std::endl;
  }
 };
 namespace
 {
 output_plugin_creator_concrete<gadget_hdf5_output_plugin<float>> creator1("gadget_hdf5");
 #if !defined(USE_SINGLEPRECISION)
 output_plugin_creator_concrete<gadget_hdf5_output_plugin<double>> creator3("gadget_hdf5_double");
 #endif
 } // namespace
 #endif
--- a/src/plugins/output_generic.cc
+++ b/src/plugins/output_generic.cc
@ -21,13 +21,13 @@ protected:
 	bool out_eulerian_;
 public:
 	//! constructor
-	explicit generic_output_plugin(ConfigFile &cf )
+	explicit generic_output_plugin(config_file &cf )
 	: output_plugin(cf, "Generic HDF5")
 	{
-		real_t astart   = 1.0/(1.0+cf_.GetValue<double>("setup", "zstart"));
+		real_t astart   = 1.0/(1.0+cf_.get_value<double>("setup", "zstart"));
-		real_t boxsize  = cf_.GetValue<double>("setup", "BoxLength");
+		real_t boxsize  = cf_.get_value<double>("setup", "BoxLength");
-		out_eulerian_   = cf_.GetValueSafe<bool>("output", "generic_out_eulerian",false);
+		out_eulerian_   = cf_.get_value_safe<bool>("output", "generic_out_eulerian",false);
 		if( CONFIG::MPI_task_rank == 0 )
 		{
@ -50,6 +50,10 @@ public:
 		return output_type::field_lagrangian;
 	}
 	bool has_64bit_reals() const{ return true; }
 	bool has_64bit_ids() const{ return true; }
 	real_t position_unit() const { return 1.0; }
 	real_t velocity_unit() const { return 1.0; }
@ -95,7 +99,7 @@ void generic_output_plugin::write_grid_data(const Grid_FFT<real_t> &g, const cos
 {
 	std::string field_name = this->get_field_name( s, c );
 	g.Write_to_HDF5(fname_, field_name);
-	csoca::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl;
+	music::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl;
 }
 namespace
--- a/src/plugins/output_grafic2.cc
+++ b/src/plugins/output_grafic2.cc
@ -40,31 +40,31 @@ protected:
 public:
    //! constructor
-    explicit grafic2_output_plugin(ConfigFile &cf)
+    explicit grafic2_output_plugin(config_file &cf)
        : output_plugin(cf, "GRAFIC2/RAMSES")
    {
        lunit_ = 1.0;
        vunit_ = 1.0;
        double
-            boxlength = cf_.GetValue<double>("setup", "BoxLength"),
+            boxlength = cf_.get_value<double>("setup", "BoxLength"),
-            H0 = cf_.GetValue<double>("cosmology", "H0"),
+            H0 = cf_.get_value<double>("cosmology", "H0"),
-            zstart = cf_.GetValue<double>("setup", "zstart"),
+            zstart = cf_.get_value<double>("setup", "zstart"),
            astart = 1.0 / (1.0 + zstart),
-            omegam = cf_.GetValue<double>("cosmology", "Omega_m"),
+            omegam = cf_.get_value<double>("cosmology", "Omega_m"),
-            omegaL = cf_.GetValue<double>("cosmology", "Omega_L");
+            omegaL = cf_.get_value<double>("cosmology", "Omega_L");
-        uint32_t ngrid = cf_.GetValue<int>("setup", "GridRes");
+        uint32_t ngrid = cf_.get_value<int>("setup", "GridRes");
-        bUseSPT_ = cf_.GetValueSafe<bool>("output", "grafic_use_SPT", false);
+        bUseSPT_ = cf_.get_value_safe<bool>("output", "grafic_use_SPT", false);
        levelmin_ = uint32_t(std::log2(double(ngrid)) + 1e-6);
        if (std::abs(std::pow(2.0, levelmin_) - double(ngrid)) > 1e-4)
        {
-            csoca::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl;
+            music::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl;
            abort();
        }
-        bhavebaryons_ = cf_.GetValueSafe<bool>("setup", "baryons", false);
+        bhavebaryons_ = cf_.get_value_safe<bool>("setup", "baryons", false);
        header_.n1 = ngrid;
        header_.n2 = ngrid;
@ -89,7 +89,7 @@ public:
        mkdir(dirname_.c_str(), 0777);
        // write RAMSES namelist file? if so only with one task
-        if (cf_.GetValueSafe<bool>("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 )
+        if (cf_.get_value_safe<bool>("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 )
        {
            write_ramses_namelist();
        }
@ -102,6 +102,10 @@ public:
        return output_type::field_lagrangian;
    }
    bool has_64bit_reals() const{ return false; }
 	bool has_64bit_ids() const{ return false; }
    real_t position_unit() const { return lunit_; }
    real_t velocity_unit() const { return vunit_; }
@ -192,7 +196,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT<real_t> &g, const cos
            }
            // check field size against buffer size...
-            uint32_t ngrid = cf_.GetValue<int>("setup", "GridRes");
+            uint32_t ngrid = cf_.get_value<int>("setup", "GridRes");
            assert( g.global_size(0) == ngrid && g.global_size(1) == ngrid && g.global_size(2) == ngrid);
            assert( g.size(1) == ngrid && g.size(2) == ngrid);
            // write actual field slice by slice
@ -219,7 +223,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT<real_t> &g, const cos
    } // end loop over write_rank
-    csoca::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl;
+    music::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl;
 }
 void grafic2_output_plugin::write_ramses_namelist(void) const
@ -275,7 +279,7 @@ void grafic2_output_plugin::write_ramses_namelist(void) const
         << "m_refine=" << 1 + naddref << "*8.,\n"
         << "/\n";
-    csoca::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl;
+    music::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl;
 }
 namespace
--- a/src/plugins/random_music.cc
+++ b/src/plugins/random_music.cc
@ -34,29 +34,29 @@ protected:
  //void store_rnd(int ilevel, rng *prng);
 public:
-  explicit RNG_music(ConfigFile &cf) : RNG_plugin(cf), initialized_(false) {}
+  explicit RNG_music(config_file &cf) : RNG_plugin(cf), initialized_(false) {}
  ~RNG_music() {}
  bool isMultiscale() const { return true; }
-  void Fill_Grid( Grid_FFT<real_t>& g ) const { }
+  void Fill_Grid( Grid_FFT<real_t>& g ) {} //const { }
  void initialize_for_grid_structure()//const refinement_hierarchy &refh)
  {
    //prefh_ = &refh;
-    levelmin_ = pcf_->GetValue<unsigned>("setup", "levelmin");
+    levelmin_ = pcf_->get_value<unsigned>("setup", "levelmin");
-    levelmax_ = pcf_->GetValue<unsigned>("setup", "levelmax");
+    levelmax_ = pcf_->get_value<unsigned>("setup", "levelmax");
-    ran_cube_size_ = pcf_->GetValueSafe<unsigned>("random", "cubesize", DEF_RAN_CUBE_SIZE);
+    ran_cube_size_ = pcf_->get_value_safe<unsigned>("random", "cubesize", DEF_RAN_CUBE_SIZE);
-    disk_cached_ = pcf_->GetValueSafe<bool>("random", "disk_cached", true);
+    disk_cached_ = pcf_->get_value_safe<bool>("random", "disk_cached", true);
-    restart_ = pcf_->GetValueSafe<bool>("random", "restart", false);
+    restart_ = pcf_->get_value_safe<bool>("random", "restart", false);
    mem_cache_.assign(levelmax_ - levelmin_ + 1, (std::vector<real_t> *)NULL);
    if (restart_ && !disk_cached_)
    {
-      csoca::elog.Print("Cannot restart from mem cached random numbers.");
+      music::elog.Print("Cannot restart from mem cached random numbers.");
      throw std::runtime_error("Cannot restart from mem cached random numbers.");
    }
@ -93,8 +93,8 @@ void RNG_music::parse_random_parameters(void)
    std::string tempstr;
    bool noseed = false;
    sprintf(seedstr, "seed[%d]", i);
-    if (pcf_->ContainsKey("random", seedstr))
+    if (pcf_->contains_key("random", seedstr))
-      tempstr = pcf_->GetValue<std::string>("random", seedstr);
+      tempstr = pcf_->get_value<std::string>("random", seedstr);
    else
    {
      // "-2" means that no seed entry was found for that level
@ -105,7 +105,7 @@ void RNG_music::parse_random_parameters(void)
    if (is_number(tempstr))
    {
      long ltemp;
-      pcf_->Convert(tempstr, ltemp);
+      pcf_->convert(tempstr, ltemp);
      rngfnames_.push_back("");
      if (noseed) // ltemp < 0 )
        //... generate some dummy seed which only depends on the level, negative so we know it's not
@ -116,7 +116,7 @@ void RNG_music::parse_random_parameters(void)
      {
        if (ltemp <= 0)
        {
-          csoca::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr);
+          music::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr);
          throw std::runtime_error("Seed values need to be >0");
        }
        rngseeds_.push_back(ltemp);
@ -126,7 +126,7 @@ void RNG_music::parse_random_parameters(void)
    {
      rngfnames_.push_back(tempstr);
      rngseeds_.push_back(-1);
-      csoca::ilog.Print("Random numbers for level %3d will be read from file.", i);
+      music::ilog.Print("Random numbers for level %3d will be read from file.", i);
    }
  }
@ -141,7 +141,7 @@ void RNG_music::parse_random_parameters(void)
 void RNG_music::compute_random_numbers(void)
 {
-  bool rndsign = pcf_->GetValueSafe<bool>("random", "grafic_sign", false);
+  bool rndsign = pcf_->get_value_safe<bool>("random", "grafic_sign", false);
  std::vector<rng *> randc(std::max(levelmax_, levelmin_seed_) + 1, (rng *)NULL);
@ -160,7 +160,7 @@ void RNG_music::compute_random_numbers(void)
      //#warning add possibility to read noise from file also here!
      if (rngfnames_[i].size() > 0)
-        csoca::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!");
+        music::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!");
      randc[i] = new rng(*randc[i - 1], ran_cube_size_, rngseeds_[i], true);
      delete randc[i - 1];
@ -180,7 +180,7 @@ void RNG_music::compute_random_numbers(void)
    for (int ilevel = levelmin_seed_ - 1; ilevel >= (int)levelmin_; --ilevel)
    {
      if (rngseeds_[ilevel - levelmin_] > 0)
-        csoca::ilog.Print("Warning: random seed for level %d will be ignored.\n"
+        music::ilog.Print("Warning: random seed for level %d will be ignored.\n"
                "            consistency requires that it is obtained by restriction from level %d",
                ilevel, levelmin_seed_);
@ -227,11 +227,11 @@ void RNG_music::compute_random_numbers(void)
  // {
  //   int lx[3], x0[3];
  //   int shift[3], levelmin_poisson;
-  //   shift[0] = pcf_->GetValue<int>("setup", "shift_x");
+  //   shift[0] = pcf_->get_value<int>("setup", "shift_x");
-  //   shift[1] = pcf_->GetValue<int>("setup", "shift_y");
+  //   shift[1] = pcf_->get_value<int>("setup", "shift_y");
-  //   shift[2] = pcf_->GetValue<int>("setup", "shift_z");
+  //   shift[2] = pcf_->get_value<int>("setup", "shift_z");
-  //   levelmin_poisson = pcf_->GetValue<unsigned>("setup", "levelmin");
+  //   levelmin_poisson = pcf_->get_value<unsigned>("setup", "levelmin");
  //   int lfac = 1 << (ilevel - levelmin_poisson);
--- a/src/plugins/random_music_wnoise_generator.cc
+++ b/src/plugins/random_music_wnoise_generator.cc
@ -11,7 +11,7 @@ template <typename T>
 music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx)
    : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed)
 {
-  csoca::ilog.Print("Generating random numbers (1) with seed %ld", baseseed);
+  music::ilog.Print("Generating random numbers (1) with seed %ld", baseseed);
  initialize();
  fill_subvolume(x0, lx);
@ -21,7 +21,7 @@ template <typename T>
 music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, bool zeromean)
    : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed)
 {
-  csoca::ilog.Print("Generating random numbers (2) with seed %ld", baseseed);
+  music::ilog.Print("Generating random numbers (2) with seed %ld", baseseed);
  double mean = 0.0;
  size_t res_l = res;
@ -31,7 +31,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesiz
    cubesize_ = res_;
  if (!musicnoise)
-    csoca::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo");
+    music::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo");
  initialize();
@ -90,7 +90,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string rand
  std::ifstream ifs(randfname.c_str(), std::ios::binary);
  if (!ifs)
  {
-    csoca::elog.Print("Could not open random number file \'%s\'!", randfname.c_str());
+    music::elog.Print("Could not open random number file \'%s\'!", randfname.c_str());
    throw std::runtime_error(std::string("Could not open random number file \'") + randfname + std::string("\'!"));
  }
@ -186,7 +186,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string rand
  std::vector<float> in_float;
  std::vector<double> in_double;
-  csoca::ilog.Print("Random number file \'%s\'\n   contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz);
+  music::ilog.Print("Random number file \'%s\'\n   contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz);
  long double sum = 0.0, sum2 = 0.0;
  size_t count = 0;
@ -285,7 +285,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string rand
  mean = sum / count;
  var = sum2 / count - mean * mean;
-  csoca::ilog.Print("Random numbers in file have \n     mean = %f and var = %f", mean, var);
+  music::ilog.Print("Random numbers in file have \n     mean = %f and var = %f", mean, var);
 }
 //... copy construct by averaging down
@ -298,7 +298,7 @@ music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generat
  long double sum = 0.0, sum2 = 0.0;
  size_t count = 0;
-  csoca::ilog.Print("Generating a coarse white noise field by k-space degrading");
+  music::ilog.Print("Generating a coarse white noise field by k-space degrading");
  //... initialize properties of container
  res_ = rc.res_ / 2;
  cubesize_ = res_;
@ -307,7 +307,7 @@ music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generat
  if (sizeof(real_t) != sizeof(T))
  {
-    csoca::elog.Print("type mismatch with real_t in k-space averaging");
+    music::elog.Print("type mismatch with real_t in k-space averaging");
    throw std::runtime_error("type mismatch with real_t in k-space averaging");
  }
@ -405,7 +405,7 @@ music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generat
  rmean = sum / count;
  rvar = sum2 / count - rmean * rmean;
-  csoca::ilog.Print("Restricted random numbers have\n       mean = %f, var = %f", rmean, rvar);
+  music::ilog.Print("Restricted random numbers have\n       mean = %f, var = %f", rmean, rvar);
 }
 template <typename T>
@ -438,7 +438,7 @@ music_wnoise_generator<T>::music_wnoise_generator(music_wnoise_generator<T> &rc,
  if (kspace)
  {
-    csoca::ilog.Print("Generating a constrained random number set with seed %ld\n    using coarse mode replacement...", baseseed);
+    music::ilog.Print("Generating a constrained random number set with seed %ld\n    using coarse mode replacement...", baseseed);
    assert(lx[0] % 2 == 0 && lx[1] % 2 == 0 && lx[2] % 2 == 0);
    size_t nx = lx[0], ny = lx[1], nz = lx[2],
           nxc = lx[0] / 2, nyc = lx[1] / 2, nzc = lx[2] / 2;
@ -573,7 +573,7 @@ music_wnoise_generator<T>::music_wnoise_generator(music_wnoise_generator<T> &rc,
  }
  else
  {
-    csoca::ilog.Print("Generating a constrained random number set with seed %ld\n    using Hoffman-Ribak constraints...", baseseed);
+    music::ilog.Print("Generating a constrained random number set with seed %ld\n    using Hoffman-Ribak constraints...", baseseed);
    double fac = 1.0 / sqrt(8.0); //1./sqrt(8.0);
@ -613,7 +613,7 @@ void music_wnoise_generator<T>::register_cube(int i, int j, int k)
    rnums_.push_back(NULL);
    cubemap_[icube] = rnums_.size() - 1;
 #ifdef DEBUG
-    LOGDEBUG("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]);
+    music::dlog.Print("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]);
 #endif
  }
 }
@ -637,7 +637,7 @@ double music_wnoise_generator<T>::fill_cube(int i, int j, int k)
  if (it == cubemap_.end())
  {
-    csoca::elog.Print("Attempt to access non-registered random number cube!");
+    music::elog.Print("Attempt to access non-registered random number cube!");
    throw std::runtime_error("Attempt to access non-registered random number cube!");
  }
@ -674,7 +674,7 @@ void music_wnoise_generator<T>::subtract_from_cube(int i, int j, int k, double v
  if (it == cubemap_.end())
  {
-    csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k);
+    music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k);
    throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::subtract_from_cube");
  }
@ -700,7 +700,7 @@ void music_wnoise_generator<T>::free_cube(int i, int j, int k)
  if (it == cubemap_.end())
  {
-    csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k);
+    music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k);
    throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::free_cube");
  }
@ -724,7 +724,7 @@ void music_wnoise_generator<T>::initialize(void)
    cubesize_ = res_;
  }
-  csoca::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_);
+  music::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_);
 }
 template <typename T>
@ -741,8 +741,8 @@ double music_wnoise_generator<T>::fill_subvolume(int *i0, int *n)
  ncube[2] = (int)(n[2] / cubesize_) + 2;
 #ifdef DEBUG
-  LOGDEBUG("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]);
+  music::dlog.Print("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]);
-  LOGDEBUG("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]);
+  music::dlog.Print("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]);
 #endif
  double mean = 0.0;
@ -836,7 +836,7 @@ void music_wnoise_generator<T>::print_allocated(void)
    if (rnums_[i] != NULL)
      ncount++;
-  csoca::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot);
+  music::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot);
 }
 template class music_wnoise_generator<float>;
--- a/src/plugins/random_music_wnoise_generator.hh
+++ b/src/plugins/random_music_wnoise_generator.hh
@ -80,7 +80,7 @@ protected:
    if (it == cubemap_.end())
    {
-      csoca::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k);
+      music::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k);
      throw std::runtime_error("attempting to copy data from non-existing RND cube");
    }
@ -186,7 +186,7 @@ public:
    if (it == cubemap_.end())
    {
-      csoca::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k);
+      music::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k);
      throw std::runtime_error("attempting to copy data from non-existing RND cube");
    }
@ -194,7 +194,7 @@ public:
    if (rnums_[cubeidx] == NULL)
    {
-      csoca::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc);
+      music::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc);
      throw std::runtime_error("attempting to access data from non-allocated RND cube");
    }
--- a/src/plugins/random_ngenic.cc
+++ b/src/plugins/random_ngenic.cc
@ -18,11 +18,11 @@ private:
    std::vector<unsigned int> SeedTable_;
 public:
-    explicit RNG_ngenic(ConfigFile &cf) : RNG_plugin(cf)
+    explicit RNG_ngenic(config_file &cf) : RNG_plugin(cf)
    {
-        RandomSeed_ = cf.GetValue<long>("random", "seed");
+        RandomSeed_ = cf.get_value<long>("random", "seed");
-        nres_ = cf.GetValue<size_t>("setup", "GridRes");
+        nres_ = cf.get_value<size_t>("setup", "GridRes");
        pRandomGenerator_ = gsl_rng_alloc(gsl_rng_ranlxd1);
        gsl_rng_set(pRandomGenerator_, RandomSeed_);
@ -63,7 +63,7 @@ public:
    bool isMultiscale() const { return false; }
-    void Fill_Grid(Grid_FFT<real_t> &g) const
+    void Fill_Grid(Grid_FFT<real_t> &g) //const
    {
        g.zero();
        g.FourierTransformForward(false);
@ -82,7 +82,11 @@ public:
                for (size_t j = 0; j < nres_; ++j) 
                {                   
                    ptrdiff_t jj = (j>0)? nres_ - j : 0;
                    if( g.is_distributed() )
                        gsl_rng_set( pRandomGenerator_, SeedTable_[j * nres_ + i]);
                    else
                        gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]);
                    for (size_t k = 0; k < g.size(2); ++k) 
                    {
                        double phase = gsl_rng_uniform(pRandomGenerator_) * 2 * M_PI;
@ -101,6 +105,18 @@ public:
                        if (k > 0) {
                            if (i_in_range) g.kelem(ip,j,k) = zrand;
                        } else{ /* k=0 plane needs special treatment */
                            if( g.is_distributed() ){
                                if (j == 0) {
                                    if (i < nres_ / 2 && i_in_range)
                                    {
                                        if(i_in_range) g.kelem(ip,jj,k) = zrand;
                                        if(ii_in_range) g.kelem(iip,j,k) = std::conj(zrand);
                                    }
                                } else if (j < nres_ / 2) {
                                    if(i_in_range) g.kelem(ip,j,k) = zrand;
                                    if(ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
                                }
                            }else{
                                if (i == 0) {
                                    if (j < nres_ / 2 && i_in_range)
                                    {
@ -117,6 +133,7 @@ public:
                }
            }
        }
    }
 };
 namespace
--- a/src/plugins/random_panphasia.cc
+++ b/src/plugins/random_panphasia.cc
@ -0,0 +1,522 @@
 #if defined(USE_PANPHASIA)
 #include <general.hh>
 #include <random_plugin.hh>
 #include <config_file.hh>
 #include <vector>
 #include <cmath>
 #include <cstring>
 #ifdef _OPENMP
 #include <omp.h>
 #endif
 #include <grid_fft.hh>
 const int maxdim = 60, maxlev = 50, maxpow = 3 * maxdim;
 typedef int rand_offset_[5];
 typedef struct
 {
  int state[133]; // Nstore = Nstate (=5) + Nbatch (=128)
  int need_fill;
  int pos;
 } rand_state_;
 /* pan_state_ struct -- corresponds to respective fortran module in panphasia_routines.f
 * data structure that contains all panphasia state variables
 * it needs to get passed between the fortran routines to enable
 * thread-safe execution.
 */
 typedef struct
 {
  int base_state[5], base_lev_start[5][maxdim + 1];
  rand_offset_ poweroffset[maxpow + 1], superjump;
  rand_state_ current_state[maxpow + 2];
  int layer_min, layer_max, indep_field;
  long long xorigin_store[2][2][2], yorigin_store[2][2][2], zorigin_store[2][2][2];
  int lev_common, layer_min_store, layer_max_store;
  long long ix_abs_store, iy_abs_store, iz_abs_store, ix_per_store, iy_per_store, iz_per_store, ix_rel_store,
      iy_rel_store, iz_rel_store;
  double exp_coeffs[8][8][maxdim + 2];
  long long xcursor[maxdim + 1], ycursor[maxdim + 1], zcursor[maxdim + 1];
  int ixshift[2][2][2], iyshift[2][2][2], izshift[2][2][2];
  double cell_data[9][8];
  int ixh_last, iyh_last, izh_last;
  int init;
  int init_cell_props;
  int init_lecuyer_state;
  long long p_xcursor[62], p_ycursor[62], p_zcursor[62];
 } pan_state_;
 extern "C"
 {
  void start_panphasia_(pan_state_ *lstate, const char *descriptor, int *ngrid, int *bverbose);
  void parse_descriptor_(const char *descriptor, int16_t *l, int32_t *ix, int32_t *iy, int32_t *iz, int16_t *side1,
                         int16_t *side2, int16_t *side3, int32_t *check_int, char *name);
  void panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, double *cell_prop);
  void adv_panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, int *layer_min,
                                      int *layer_max, int *indep_field, double *cell_prop);
  void set_phases_and_rel_origin_(pan_state_ *lstate, const char *descriptor, int *lev, long long *ix_rel,
                                  long long *iy_rel, long long *iz_rel, int *VERBOSE);
 }
 struct panphasia_descriptor
 {
  int16_t wn_level_base;
  int32_t i_xorigin_base, i_yorigin_base, i_zorigin_base;
  int16_t i_base, i_base_y, i_base_z;
  int32_t check_rand;
  std::string name;
  explicit panphasia_descriptor(std::string dstring)
  {
    char tmp[100];
    std::memset(tmp, ' ', 100);
    parse_descriptor_(dstring.c_str(), &wn_level_base, &i_xorigin_base, &i_yorigin_base, &i_zorigin_base, &i_base,
                      &i_base_y, &i_base_z, &check_rand, tmp);
    for (int i = 0; i < 100; i++)
      if (tmp[i] == ' ')
      {
        tmp[i] = '\0';
        break;
      }
    name = tmp;
    name.erase(std::remove(name.begin(), name.end(), ' '), name.end());
  }
 };
 // greatest common divisor
 int gcd(int a, int b)
 {
  if (b == 0)
    return a;
  return gcd(b, a % b);
 }
 // least common multiple
 int lcm(int a, int b) { return abs(a * b) / gcd(a, b); }
 // Two or largest power of 2 less than the argument
 int largest_power_two_lte(int b)
 {
  int a = 1;
  if (b <= a)
    return a;
  while (2 * a < b)
    a = 2 * a;
  return a;
 }
 class RNG_panphasia : public RNG_plugin
 {
 private:
 protected:
  std::string descriptor_string_;
  int num_threads_;
  int levelmin_, levelmin_final_, levelmax_, ngrid_;
  bool incongruent_fields_;
  double inter_grid_phase_adjustment_;
  // double translation_phase_;
  pan_state_ *lstate;
  int grid_p_, grid_m_;
  double grid_rescale_fac_;
  int coordinate_system_shift_[3];
  int ix_abs_[3], ix_per_[3], ix_rel_[3], level_p_, lextra_;
  void clear_panphasia_thread_states(void)
  {
    for (int i = 0; i < num_threads_; ++i)
    {
      lstate[i].init = 0;
      lstate[i].init_cell_props = 0;
      lstate[i].init_lecuyer_state = 0;
    }
  }
  void initialize_for_grid_structure(void)
  {
    clear_panphasia_thread_states();
    music::ilog.Print("PANPHASIA: running with %d threads", num_threads_);
    // if ngrid is not a multiple of i_base, then we need to enlarge and then sample down
    ngrid_ = pcf_->get_value<size_t>("setup", "GridRes");
    grid_p_ = pdescriptor_->i_base;
    grid_m_ = largest_power_two_lte(grid_p_);
    lextra_ = (log10((double)ngrid_ / (double)pdescriptor_->i_base) + 0.001) / log10(2.0);
    int ratio = 1 << lextra_;
    grid_rescale_fac_ = 1.0;
    coordinate_system_shift_[0] = -pcf_->get_value_safe<int>("setup", "shift_x", 0);
    coordinate_system_shift_[1] = -pcf_->get_value_safe<int>("setup", "shift_y", 0);
    coordinate_system_shift_[2] = -pcf_->get_value_safe<int>("setup", "shift_z", 0);
    incongruent_fields_ = false;
    if (ngrid_ != ratio * pdescriptor_->i_base)
    {
      incongruent_fields_ = true;
      ngrid_ = 2 * ratio * pdescriptor_->i_base;
      grid_rescale_fac_ = (double)ngrid_ / (1 << levelmin_);
      music::ilog << "PANPHASIA: will use a higher resolution (using Fourier interpolation)" << std::endl;
      music::ilog << "     (" << grid_m_ << " -> " << grid_p_ << ") * 2**ref to be compatible with PANPHASIA" << std::endl;
    }
  }
  std::unique_ptr<panphasia_descriptor> pdescriptor_;
 public:
  explicit RNG_panphasia(config_file &cf) : RNG_plugin(cf)
  {
    descriptor_string_ = pcf_->get_value<std::string>("random", "descriptor");
 #ifdef _OPENMP
    num_threads_ = omp_get_max_threads();
 #else
    num_threads_ = 1;
 #endif
    // create independent state descriptions for each thread
    lstate = new pan_state_[num_threads_];
    // parse the descriptor for its properties
    pdescriptor_ = std::make_unique<panphasia_descriptor>(descriptor_string_);
    music::ilog.Print("PANPHASIA: descriptor \'%s\' is base %d,", pdescriptor_->name.c_str(), pdescriptor_->i_base);
    // write panphasia base size into config file for the grid construction
    // as the gridding unit we use the least common multiple of 2 and i_base
    std::stringstream ss;
    //ARJ  ss << lcm(2, pdescriptor_->i_base);
    //ss <<  two_or_largest_power_two_less_than(pdescriptor_->i_base);//ARJ
    ss << 2; //ARJ - set gridding unit to two
    pcf_->insert_value("setup", "gridding_unit", ss.str());
    ss.str(std::string());
    ss << pdescriptor_->i_base;
    pcf_->insert_value("random", "base_unit", ss.str());
    this->initialize_for_grid_structure();
  }
  ~RNG_panphasia() { delete[] lstate; }
  bool isMultiscale() const { return true; }
  void Fill_Grid(Grid_FFT<real_t> &g)
  {
    auto sinc = [](real_t x) { return (std::abs(x) > 1e-16) ? std::sin(x) / x : 1.0; };
    auto dsinc = [](real_t x) { return (std::abs(x) > 1e-16) ? (x * std::cos(x) - std::sin(x)) / (x * x) : 0.0; };
    const real_t sqrt3{std::sqrt(3.0)}, sqrt27{std::sqrt(27.0)};
    // make sure we're in the right space
    Grid_FFT<real_t> &g0 = g;
    g0.FourierTransformBackward(false);
    // temporaries
    Grid_FFT<real_t> g1(g.n_, g.length_);
    Grid_FFT<real_t> g2(g.n_, g.length_);
    Grid_FFT<real_t> g3(g.n_, g.length_);
    Grid_FFT<real_t> g4(g.n_, g.length_);
    clear_panphasia_thread_states();
    music::ilog.Print("PANPHASIA: running with %d threads", num_threads_);
    ngrid_ = pcf_->get_value<size_t>("setup", "GridRes");
    grid_p_ = pdescriptor_->i_base;
    // grid_m_ = largest_power_two_lte(grid_p_);
    if (ngrid_ % grid_p_ != 0)
    {
      music::elog << "Grid resolution " << ngrid_ << " is not divisible by PANPHASIA descriptor length " << grid_p_ << std::endl;
      throw std::runtime_error("Chosen [setup] / GridRes is not compatible with PANPHASIA descriptor length!");
    }
    double t1 = get_wtime();
    // double tp = t1;
 #pragma omp parallel
    {
 #ifdef _OPENMP
      const int mythread = omp_get_thread_num();
 #else
      const int mythread = 0;
 #endif
      //int odd_x, odd_y, odd_z;
      //int ng_level = ngrid_ * (1 << (level - levelmin_)); // full resolution of current level
      int verbosity = (mythread == 0);
      char descriptor[100];
      std::memset(descriptor, 0, 100);
      std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size());
      start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity);
      {
        panphasia_descriptor d(descriptor_string_);
        int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
        int level_p = d.wn_level_base + lextra;
        int ratio = 1 << lextra;
        lstate[mythread].layer_min = 0;
        lstate[mythread].layer_max = level_p;
        lstate[mythread].indep_field = 1;
        assert(ngrid_ == ratio * d.i_base);
        long long ix_rel[3];
        ix_rel[0] = 0; //ileft_corner_p[0];
        ix_rel[1] = 0; //ileft_corner_p[1];
        ix_rel[2] = 0; //ileft_corner_p[2];
        set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2],
                                   &verbosity);
      }
      if (verbosity)
        t1 = get_wtime();
      std::array<double, 9> cell_prop;
      pan_state_ *ps = &lstate[mythread];
 #pragma omp for //nowait
      for (size_t i = 0; i < g.size(0); i += 2)
      {
        for (size_t j = 0; j < g.size(1); j += 2)
        {
          for (size_t k = 0; k < g.size(2); k += 2)
          {
            // ARJ - added inner set of loops to speed up evaluation of Panphasia
            for (int ix = 0; ix < 2; ++ix)
            {
              for (int iy = 0; iy < 2; ++iy)
              {
                for (int iz = 0; iz < 2; ++iz)
                {
                  int ilocal = i + ix;
                  int jlocal = j + iy;
                  int klocal = k + iz;
                  int iglobal = ilocal + g.local_0_start_;
                  int jglobal = jlocal;
                  int kglobal = klocal;
                  adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min,
                                                 &ps->layer_max, &ps->indep_field, &cell_prop[0]);
                  g0.relem(ilocal, jlocal, klocal) = cell_prop[0];
                  g1.relem(ilocal, jlocal, klocal) = cell_prop[4];
                  g2.relem(ilocal, jlocal, klocal) = cell_prop[2];
                  g3.relem(ilocal, jlocal, klocal) = cell_prop[1];
                  g4.relem(ilocal, jlocal, klocal) = cell_prop[8];
                }
              }
            }
          }
        }
      }
    } // end omp parallel region
    g0.FourierTransformForward();
    g1.FourierTransformForward();
    g2.FourierTransformForward();
    g3.FourierTransformForward();
    g4.FourierTransformForward();
 #pragma omp parallel for
    for (size_t i = 0; i < g0.size(0); i++)
    {
      for (size_t j = 0; j < g0.size(1); j++)
      {
        for (size_t k = 0; k < g0.size(2); k++)
        {
          if (!g0.is_nyquist_mode(i, j, k))
          {
            auto kvec = g0.get_k<real_t>(i, j, k);
            auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0];
            auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1];
            auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2];
            auto fx = sinc(argx);
            auto gx = ccomplex_t(0.0, dsinc(argx));
            auto fy = sinc(argy);
            auto gy = ccomplex_t(0.0, dsinc(argy));
            auto fz = sinc(argz);
            auto gz = ccomplex_t(0.0, dsinc(argz));
            auto temp = (fx + sqrt3 * gx) * (fy + sqrt3 * gy) * (fz + sqrt3 * gz);
            auto magnitude = std::sqrt(1.0 - std::abs(temp * temp));
            auto y0(g0.kelem(i, j, k)), y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k));
            g0.kelem(i, j, k) = y0 * fx * fy * fz 
                              + sqrt3 * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) 
                              + y4 * magnitude;
          }
          else
          {
            g0.kelem(i, j, k) = 0.0;
          }
        }
      }
    }
    // music::ilog.Print("\033[31mtiming [build panphasia field]: %f s\033[0m", get_wtime() - tp);
    // tp = get_wtime();
    g1.FourierTransformBackward(false);
    g2.FourierTransformBackward(false);
    g3.FourierTransformBackward(false);
    g4.FourierTransformBackward(false);
 #pragma omp parallel
    {
 #ifdef _OPENMP
      const int mythread = omp_get_thread_num();
 #else
      const int mythread = 0;
 #endif
      // int odd_x, odd_y, odd_z;
      int verbosity = (mythread == 0);
      char descriptor[100];
      std::memset(descriptor, 0, 100);
      std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size());
      start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity);
      {
        panphasia_descriptor d(descriptor_string_);
        int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
        int level_p = d.wn_level_base + lextra;
        int ratio = 1 << lextra;
        lstate[mythread].layer_min = 0;
        lstate[mythread].layer_max = level_p;
        lstate[mythread].indep_field = 1;
        assert(ngrid_ == ratio * d.i_base);
        long long ix_rel[3];
        ix_rel[0] = 0; //ileft_corner_p[0];
        ix_rel[1] = 0; //ileft_corner_p[1];
        ix_rel[2] = 0; //ileft_corner_p[2];
        set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2],
                                   &verbosity);
      }
      if (verbosity)
        t1 = get_wtime();
      //***************************************************************
      // Process Panphasia values: p110, p011, p101, p111
      //****************************************************************
      std::array<double,9> cell_prop;
      pan_state_ *ps = &lstate[mythread];
 #pragma omp for //nowait
      for (size_t i = 0; i < g1.size(0); i += 2)
      {
        for (size_t j = 0; j < g1.size(1); j += 2)
        {
          for (size_t k = 0; k < g1.size(2); k += 2)
          {
            // ARJ - added inner set of loops to speed up evaluation of Panphasia
            for (int ix = 0; ix < 2; ++ix)
            {
              for (int iy = 0; iy < 2; ++iy)
              {
                for (int iz = 0; iz < 2; ++iz)
                {
                  int ilocal = i + ix;
                  int jlocal = j + iy;
                  int klocal = k + iz;
                  int iglobal = ilocal + g.local_0_start_;
                  int jglobal = jlocal;
                  int kglobal = klocal;
                  adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min,
                                                 &ps->layer_max, &ps->indep_field, &cell_prop[0]);
                  g1.relem(ilocal, jlocal, klocal) = cell_prop[6];
                  g2.relem(ilocal, jlocal, klocal) = cell_prop[3];
                  g3.relem(ilocal, jlocal, klocal) = cell_prop[5];
                  g4.relem(ilocal, jlocal, klocal) = cell_prop[7];
                }
              }
            }
          }
        }
      }
    } // end omp parallel region
    // music::ilog.Print("\033[31mtiming [adv_panphasia_cell_properties2]: %f s \033[0m", get_wtime() - tp);
    // tp = get_wtime();
    /////////////////////////////////////////////////////////////////////////
    // transform and convolve with Legendres
    g1.FourierTransformForward();
    g2.FourierTransformForward();
    g3.FourierTransformForward();
    g4.FourierTransformForward();
    #pragma omp parallel for 
    for (size_t i = 0; i < g1.size(0); i++)
    {
      for (size_t j = 0; j < g1.size(1); j++)
      {
        for (size_t k = 0; k < g1.size(2); k++)
        {
          if (!g1.is_nyquist_mode(i, j, k))
          {
            auto kvec = g1.get_k<real_t>(i, j, k);
            auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0];
            auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1];
            auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2];
            auto fx = sinc(argx);
            auto gx = ccomplex_t(0.0, dsinc(argx));
            auto fy = sinc(argy);
            auto gy = ccomplex_t(0.0, dsinc(argy));
            auto fz = sinc(argz);
            auto gz = ccomplex_t(0.0, dsinc(argz));
            auto y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k));
            g0.kelem(i, j, k) += 3.0 * (y1 * gx * gy * fz + y2 * fx * gy * gz + y3 * gx * fy * gz) + sqrt27 * y4 * gx * gy * gz;
          }
        }
      }
    }
    // music::ilog.Print("\033[31mtiming [build panphasia field2]: %f s\033[0m", get_wtime() - tp);
    // tp = get_wtime();
    music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1,
                          1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2));
    music::ilog.Print("PANPHASIA k-space statistices: mean Re = %f, std = %f", g0.mean(), g0.std());
  }
 };
 namespace
 {
  RNG_plugin_creator_concrete<RNG_panphasia> creator("PANPHASIA");
 }
 #endif // defined(USE_PANPHASIA)
--- a/src/plugins/transfer_CAMB_file.cc
+++ b/src/plugins/transfer_CAMB_file.cc
@ -0,0 +1,344 @@
 //  transfer_CAMB.cc - This file is part of MUSIC -
 //  a code to generate multi-scale initial conditions for cosmological simulations
 //  Copyright (C) 2019  Oliver Hahn
 #include <gsl/gsl_errno.h>
 #include <gsl/gsl_spline.h>
 #include <vector>
 #include "transfer_function_plugin.hh"
 const double tiny = 1e-30;
 class transfer_CAMB_file_plugin : public TransferFunction_plugin
 {
 private:
  std::string m_filename_Pk, m_filename_Tk;
  std::vector<double> m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon;
  std::vector<double> m_tab_Tvk_tot, m_tab_Tvk_cdm, m_tab_Tvk_baryon;
  gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon;
  gsl_interp_accel *acc_vtot, *acc_vcdm, *acc_vbaryon;
  gsl_spline *spline_tot, *spline_cdm, *spline_baryon;
  gsl_spline *spline_vtot, *spline_vcdm, *spline_vbaryon;
  double m_kmin, m_kmax, m_Omega_b, m_Omega_m, m_zstart;
  unsigned m_nlines;
  bool m_linbaryoninterp;
  void read_table(void)
  {
    m_nlines = 0;
    m_linbaryoninterp = false;
 #ifdef WITH_MPI
    if (MPI::COMM_WORLD.Get_rank() == 0)
    {
 #endif
      music::ilog.Print("Reading tabulated transfer function data from file \n    \'%s\'", m_filename_Tk.c_str());
      std::string line;
      std::ifstream ifs(m_filename_Tk.c_str());
      if (!ifs.good())
        throw std::runtime_error("Could not find transfer function file \'" + m_filename_Tk + "\'");
      m_tab_k.clear();
      m_tab_Tk_tot.clear();
      m_tab_Tk_cdm.clear();
      m_tab_Tk_baryon.clear();
      m_tab_Tvk_tot.clear();
      m_tab_Tvk_cdm.clear();    //>[150609SH: add]
      m_tab_Tvk_baryon.clear(); //>[150609SH: add]
      m_kmin = 1e30;
      m_kmax = -1e30;
      std::ofstream ofs("dump_transfer.txt");
      while (!ifs.eof())
      {
        getline(ifs, line);
        if (ifs.eof())
          break;
        // OH: ignore line if it has a comment:
        if (line.find("#") != std::string::npos)
          continue;
        std::stringstream ss(line);
        double k, Tkc, Tkb, Tktot, Tkvtot, Tkvc, Tkvb, dummy;
        ss >> k;
        ss >> Tkc;   // cdm
        ss >> Tkb;   // baryon
        ss >> dummy; // photon
        ss >> dummy; // nu
        ss >> dummy; // mass_nu
        ss >> Tktot; // total
        ss >> dummy; // no_nu
        ss >> dummy; // total_de
        ss >> dummy; // Weyl
        ss >> Tkvc;  // v_cdm
        ss >> Tkvb;  // v_b
        ss >> dummy; // v_b-v_cdm
        if (ss.bad() || ss.fail())
        {
          music::elog.Print("error reading the transfer function file (corrupt or not in expected format)!");
          throw std::runtime_error("error reading transfer function file \'" +
                                   m_filename_Tk + "\'");
        }
        if (m_Omega_b < 1e-6)
          Tkvtot = Tktot;
        else
          Tkvtot = ((m_Omega_m - m_Omega_b) * Tkvc + m_Omega_b * Tkvb) / m_Omega_m; //MvD
        m_linbaryoninterp |= Tkb < 0.0 || Tkvb < 0.0;
        m_tab_k.push_back(log10(k));
        m_tab_Tk_tot.push_back(Tktot);
        m_tab_Tk_baryon.push_back(Tkb);
        m_tab_Tk_cdm.push_back(Tkc);
        m_tab_Tvk_tot.push_back(Tkvtot);
        m_tab_Tvk_baryon.push_back(Tkvb);
        m_tab_Tvk_cdm.push_back(Tkvc);
        ++m_nlines;
        if (k < m_kmin)
          m_kmin = k;
        if (k > m_kmax)
          m_kmax = k;
      }
      for (size_t i = 0; i < m_tab_k.size(); ++i)
      {
        m_tab_Tk_tot[i] = log10(m_tab_Tk_tot[i]);
        m_tab_Tk_cdm[i] = log10(m_tab_Tk_cdm[i]);
        m_tab_Tvk_cdm[i] = log10(m_tab_Tvk_cdm[i]);
        m_tab_Tvk_tot[i] = log10(m_tab_Tvk_tot[i]);
        if (!m_linbaryoninterp)
        {
          m_tab_Tk_baryon[i] = log10(m_tab_Tk_baryon[i]);
          m_tab_Tvk_baryon[i] = log10(m_tab_Tvk_baryon[i]);
        }
      }
      ifs.close();
      music::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines);
      if (m_linbaryoninterp)
        music::ilog.Print("Using log-lin interpolation for baryons\n    (TF is not "
                          "positive definite)");
 #ifdef WITH_MPI
    }
    unsigned n = m_tab_k.size();
    MPI::COMM_WORLD.Bcast(&n, 1, MPI_UNSIGNED, 0);
    if (MPI::COMM_WORLD.Get_rank() > 0)
    {
      m_tab_k.assign(n, 0);
      m_tab_Tk_tot.assign(n, 0);
      m_tab_Tk_cdm.assign(n, 0);
      m_tab_Tk_baryon.assign(n, 0);
      m_tab_Tvk_tot.assign(n, 0);
      m_tab_Tvk_cdm.assign(n, 0);
      m_tab_Tvk_baryon.assign(n, 0);
    }
    MPI::COMM_WORLD.Bcast(&m_tab_k[0], n, MPI_DOUBLE, 0);
    MPI::COMM_WORLD.Bcast(&m_tab_Tk_tot[0], n, MPI_DOUBLE, 0);
    MPI::COMM_WORLD.Bcast(&m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0);
    MPI::COMM_WORLD.Bcast(&m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0);
    MPI::COMM_WORLD.Bcast(&m_tab_Tvk_tot[0], n, MPI_DOUBLE, 0);
    MPI::COMM_WORLD.Bcast(&m_tab_Tvk_cdm[0], n, MPI_DOUBLE, 0);
    MPI::COMM_WORLD.Bcast(&m_tab_Tvk_baryon[0], n, MPI_DOUBLE, 0);
 #endif
  }
 public:
  transfer_CAMB_file_plugin(config_file &cf)
      : TransferFunction_plugin(cf)
  {
    m_filename_Tk = pcf_->get_value<std::string>("cosmology", "transfer_file");
    m_Omega_m = cf.get_value<double>("cosmology", "Omega_m"); //MvD
    m_Omega_b = cf.get_value<double>("cosmology", "Omega_b"); //MvD
    m_zstart = cf.get_value<double>("setup", "zstart");       //MvD
    read_table();
    acc_tot = gsl_interp_accel_alloc();
    acc_cdm = gsl_interp_accel_alloc();
    acc_baryon = gsl_interp_accel_alloc();
    acc_vtot = gsl_interp_accel_alloc();
    acc_vcdm = gsl_interp_accel_alloc();
    acc_vbaryon = gsl_interp_accel_alloc();
    spline_tot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
    spline_cdm = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
    spline_baryon = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
    spline_vtot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
    spline_vcdm =
        gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
    spline_vbaryon =
        gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
    gsl_spline_init(spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size());
    gsl_spline_init(spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size());
    gsl_spline_init(spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0],
                    m_tab_k.size());
    gsl_spline_init(spline_vtot, &m_tab_k[0], &m_tab_Tvk_tot[0],
                    m_tab_k.size());
    gsl_spline_init(spline_vcdm, &m_tab_k[0], &m_tab_Tvk_cdm[0],
                    m_tab_k.size());
    gsl_spline_init(spline_vbaryon, &m_tab_k[0], &m_tab_Tvk_baryon[0],
                    m_tab_k.size());
    tf_distinct_ = true; // different density between CDM v.s. Baryon
    tf_withvel_ = true;  // using velocity transfer function
  }
  ~transfer_CAMB_file_plugin()
  {
    gsl_spline_free(spline_tot);
    gsl_spline_free(spline_cdm);
    gsl_spline_free(spline_baryon);
    gsl_spline_free(spline_vtot);
    gsl_spline_free(spline_vcdm);
    gsl_spline_free(spline_vbaryon);
    gsl_interp_accel_free(acc_tot);
    gsl_interp_accel_free(acc_cdm);
    gsl_interp_accel_free(acc_baryon);
    gsl_interp_accel_free(acc_vtot);
    gsl_interp_accel_free(acc_vcdm);
    gsl_interp_accel_free(acc_vbaryon);
  }
  // linear interpolation in log-log
  inline double extrap_right(double k, const tf_type &type) const
  {
    int n = m_tab_k.size() - 1, n1 = n - 1;
    double v1(1.0), v2(1.0);
    double lk = log10(k);
    double dk = m_tab_k[n] - m_tab_k[n1];
    double delk = lk - m_tab_k[n];
    switch (type)
    {
    case cdm:
      v1 = m_tab_Tk_cdm[n1];
      v2 = m_tab_Tk_cdm[n];
      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
    case baryon:
      v1 = m_tab_Tk_baryon[n1];
      v2 = m_tab_Tk_baryon[n];
      if (m_linbaryoninterp)
        return std::max((v2 - v1) / dk * (delk) + v2, tiny);
      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
    case vtotal: //>[150609SH: add]
      v1 = m_tab_Tvk_tot[n1];
      v2 = m_tab_Tvk_tot[n];
      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
    case vcdm: //>[150609SH: add]
      v1 = m_tab_Tvk_cdm[n1];
      v2 = m_tab_Tvk_cdm[n];
      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
    case vbaryon: //>[150609SH: add]
      v1 = m_tab_Tvk_baryon[n1];
      v2 = m_tab_Tvk_baryon[n];
      if (m_linbaryoninterp)
        return std::max((v2 - v1) / dk * (delk) + v2, tiny);
      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
    case total:
      v1 = m_tab_Tk_tot[n1];
      v2 = m_tab_Tk_tot[n];
      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
    default:
      throw std::runtime_error(
          "Invalid type requested in transfer function evaluation");
    }
    return 0.0;
  }
  inline double compute(double k, tf_type type) const
  {
    // use constant interpolation on the left side of the tabulated values
    if (k < m_kmin)
    {
      switch (type)
      {
      case cdm:
        return pow(10.0, m_tab_Tk_cdm[0]);
      case baryon:
        if (m_linbaryoninterp)
          return m_tab_Tk_baryon[0];
        return pow(10.0, m_tab_Tk_baryon[0]);
      case vtotal:
        return pow(10.0, m_tab_Tvk_tot[0]);
      case vcdm:
        return pow(10.0, m_tab_Tvk_cdm[0]);
      case vbaryon:
        if (m_linbaryoninterp)
          return m_tab_Tvk_baryon[0];
        return pow(10.0, m_tab_Tvk_baryon[0]);
      case total:
        return pow(10.0, m_tab_Tk_tot[0]);
      default:
        throw std::runtime_error(
            "Invalid type requested in transfer function evaluation");
      }
    }
    // use linear interpolation on the right side of the tabulated values
    else if (k > m_kmax)
      return extrap_right(k, type);
    double lk = log10(k);
    switch (type)
    {
    case cdm:
      return pow(10.0, gsl_spline_eval(spline_cdm, lk, acc_cdm));
    case baryon:
      if (m_linbaryoninterp)
        return gsl_spline_eval(spline_baryon, lk, acc_baryon);
      return pow(10.0, gsl_spline_eval(spline_baryon, lk, acc_baryon));
    case vtotal:
      return pow(10.0, gsl_spline_eval(spline_vtot, lk, acc_vtot)); //MvD
    case vcdm:
      return pow(10.0, gsl_spline_eval(spline_vcdm, lk, acc_vcdm));
    case vbaryon:
      if (m_linbaryoninterp)
        return gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon);
      return pow(10.0, gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon));
    case total:
      return pow(10.0, gsl_spline_eval(spline_tot, lk, acc_tot));
    default:
      throw std::runtime_error(
          "Invalid type requested in transfer function evaluation");
    }
  }
  inline double get_kmin(void) const { return pow(10.0, m_tab_k[1]); }
  inline double get_kmax(void) const { return pow(10.0, m_tab_k[m_tab_k.size() - 2]); }
 };
 namespace
 {
 TransferFunction_plugin_creator_concrete<transfer_CAMB_file_plugin> creator("CAMB_file");
 }
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@ -9,145 +9,328 @@
 #include <string>
 #include <vector>
 #include <memory>
 #include <sstream>
 #include <ClassEngine.hh>
 #include <general.hh>
 #include <config_file.hh>
 #include <transfer_function_plugin.hh>
 #include <math/interpolate.hh>
-#include <gsl/gsl_errno.h>
+class transfer_CLASS_plugin : public TransferFunction_plugin
-#include <gsl/gsl_spline.h>
+{
 class transfer_CLASS_plugin : public TransferFunction_plugin {
 private:
-    std::vector<double> tab_lnk_, tab_dtot_, tab_dc_, tab_db_, tab_ttot_, tab_tc_, tab_tb_;
+  interpolated_function_1d<true, true, false> delta_c_, delta_b_, delta_n_, delta_m_, theta_c_, theta_b_, theta_n_, theta_m_;
-    gsl_interp_accel *gsl_ia_dtot_, *gsl_ia_dc_, *gsl_ia_db_, *gsl_ia_ttot_, *gsl_ia_tc_, *gsl_ia_tb_;
+  interpolated_function_1d<true, true, false> delta_c0_, delta_b0_, delta_n0_, delta_m0_, theta_c0_, theta_b0_, theta_n0_, theta_m0_;
    gsl_spline *gsl_sp_dtot_, *gsl_sp_dc_, *gsl_sp_db_, *gsl_sp_ttot_, *gsl_sp_tc_, *gsl_sp_tb_;
    double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_;
-    void ClassEngine_get_data( void ){
+  // single fluid growing/decaying mode decomposition
-        std::vector<double> d_ncdm, t_ncdm, phi, psi;
+  // gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_;
  // gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_;
  // std::vector<double> tab_Cplus_, tab_Cminus_;
-        csoca::ilog << "Computing TF via ClassEngine..." << std::endl << " ztarget = " << ztarget_ << ", zstart = " << zstart_ << " ..." << std::flush;
+  double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_, A_s_, n_s_, sigma8_, Tcmb_, tnorm_;
  ClassParams pars_;
  std::unique_ptr<ClassEngine> the_ClassEngine_;
  std::ofstream ofs_class_input_;
  template <typename T>
  void add_class_parameter(std::string parameter_name, const T parameter_value)
  {
    pars_.add(parameter_name, parameter_value);
    ofs_class_input_ << parameter_name << " = " << parameter_value << std::endl;
  }
  //! Set up class parameters from MUSIC cosmological parameters
  void init_ClassEngine(void)
  {
    //--- general parameters ------------------------------------------
    add_class_parameter("z_max_pk", std::max(std::max(zstart_, ztarget_),199.0)); // use 1.2 as safety
    add_class_parameter("P_k_max_h/Mpc", kmax_);
    add_class_parameter("output", "dTk,vTk");
    add_class_parameter("extra metric transfer functions","yes");
    // add_class_parameter("lensing", "no");
    //--- choose gauge ------------------------------------------------
    // add_class_parameter("extra metric transfer functions", "yes");
    add_class_parameter("gauge", "synchronous");
    //--- cosmological parameters, densities --------------------------
    add_class_parameter("h", h_);
    add_class_parameter("Omega_b", Omega_b_);
    add_class_parameter("Omega_cdm", Omega_m_ - Omega_b_);
    add_class_parameter("Omega_k", 0.0);
    // add_class_parameter("Omega_Lambda",1.0-Omega_m_);
    add_class_parameter("Omega_fld", 0.0);
    add_class_parameter("Omega_scf", 0.0);
    // add_class_parameter("fluid_equation_of_state","CLP");
    // add_class_parameter("w0_fld", -1 );
    // add_class_parameter("wa_fld", 0. );
    // add_class_parameter("cs2_fld", 1);
    //--- massive neutrinos -------------------------------------------
 #if 1
    //default off
    // add_class_parameter("Omega_ur",0.0);
    add_class_parameter("N_ur", N_ur_);
    add_class_parameter("N_ncdm", 0);
 #else
    // change above to enable
    add_class_parameter("N_ur", 0);
    add_class_parameter("N_ncdm", 1);
    add_class_parameter("m_ncdm", "0.4");
    add_class_parameter("T_ncdm", 0.71611);
 #endif
    //--- cosmological parameters, primordial -------------------------
    add_class_parameter("P_k_ini type", "analytic_Pk");
    if( A_s_ > 0.0 ){
      add_class_parameter("A_s", A_s_);
    }else{
      add_class_parameter("sigma8", sigma8_);
    }
    add_class_parameter("n_s", n_s_);
    add_class_parameter("alpha_s", 0.0);
    add_class_parameter("T_cmb", Tcmb_);
    add_class_parameter("YHe", 0.248);
    // precision parameters
    add_class_parameter("k_per_decade_for_pk", 100);
    add_class_parameter("k_per_decade_for_bao", 100);
    add_class_parameter("compute damping scale", "yes");
    add_class_parameter("tol_perturb_integration", 1.e-8);
    add_class_parameter("tol_background_integration", 1e-9);
    // high precision options from cl_permille.pre:
    // precision file to be passed as input in order to achieve at least percent precision on scalar Cls
    add_class_parameter("hyper_flat_approximation_nu", 7000.);
    add_class_parameter("transfer_neglect_delta_k_S_t0", 0.17);
    add_class_parameter("transfer_neglect_delta_k_S_t1", 0.05);
    add_class_parameter("transfer_neglect_delta_k_S_t2", 0.17);
    add_class_parameter("transfer_neglect_delta_k_S_e", 0.13);
    add_class_parameter("delta_l_max", 1000);
    int class_verbosity = 0;
    add_class_parameter("background_verbose", class_verbosity);
    add_class_parameter("thermodynamics_verbose", class_verbosity);
    add_class_parameter("perturbations_verbose", class_verbosity);
    add_class_parameter("transfer_verbose", class_verbosity);
    add_class_parameter("primordial_verbose", class_verbosity);
    add_class_parameter("spectra_verbose", class_verbosity);
    add_class_parameter("nonlinear_verbose", class_verbosity);
    add_class_parameter("lensing_verbose", class_verbosity);
    add_class_parameter("output_verbose", class_verbosity);
    // output parameters, only needed for the control CLASS .ini file that we output
    std::stringstream zlist;
    if (ztarget_ == zstart_)
      zlist << ztarget_ << ((ztarget_!=0.0)? ", 0.0" : "");
    else
      zlist << std::max(ztarget_, zstart_) << ", " << std::min(ztarget_, zstart_) << ", 0.0";
    add_class_parameter("z_pk", zlist.str());
    music::ilog << "Computing transfer function via ClassEngine..." << std::endl;
    double wtime = get_wtime();
-        ClassParams pars;
+    the_ClassEngine_ = std::move(std::make_unique<ClassEngine>(pars_, false));
        pars.add("extra metric transfer functions", "yes");
        pars.add("z_pk",ztarget_);
        pars.add("P_k_max_h/Mpc", kmax_);
        pars.add("h",h_);
        pars.add("Omega_b",Omega_b_);
        // pars.add("Omega_k",0.0);
        // pars.add("Omega_ur",0.0);
        pars.add("N_ur",N_ur_);
        pars.add("Omega_cdm",Omega_m_-Omega_b_);
        pars.add("Omega_Lambda",1.0-Omega_m_);
        // pars.add("Omega_fld",0.0);
        // pars.add("Omega_scf",0.0);
        pars.add("A_s",2.42e-9);
        pars.add("n_s",.96); // tnis doesn't matter for TF
        pars.add("output","dTk,vTk");
        pars.add("YHe",0.248);
        pars.add("k_per_decade_for_pk",50);
        pars.add("k_per_decade_for_bao",50);
        pars.add("compute damping scale","yes");
        pars.add("z_reio",-1.0); // make sure reionisation is not included
        std::unique_ptr<ClassEngine> CE = std::make_unique<ClassEngine>(pars, false);
        CE->getTk(ztarget_, tab_lnk_, tab_dc_, tab_db_, d_ncdm, tab_dtot_,
                tab_tc_, tab_tb_, t_ncdm, tab_ttot_, phi, psi );
    wtime = get_wtime() - wtime;
-        csoca::ilog << "   took " << wtime << " s / " << tab_lnk_.size() << " modes."  << std::endl;
+    music::ilog << "CLASS took " << wtime << " s." << std::endl;
  }
  //! run ClassEngine with parameters set up
  void run_ClassEngine(double z, std::vector<double> &k, std::vector<double> &dc, std::vector<double> &tc, std::vector<double> &db, std::vector<double> &tb,
                       std::vector<double> &dn, std::vector<double> &tn, std::vector<double> &dm, std::vector<double> &tm)
  {
    k.clear(); 
    dc.clear(); db.clear(); dn.clear(); dm.clear();
    tc.clear(); tb.clear(); tn.clear(); tm.clear();
    the_ClassEngine_->getTk(z, k, dc, db, dn, dm, tc, tb, tn, tm);
    real_t fc = (Omega_m_ - Omega_b_) / Omega_m_;
    real_t fb = Omega_b_ / Omega_m_;
    for (size_t i = 0; i < k.size(); ++i)
    {
      // convert to 'CAMB' format, since we interpolate loglog and
      // don't want negative numbers...
      auto ik2 = 1.0 / (k[i] * k[i]) * h_ * h_;
      dc[i] = -dc[i] * ik2;
      db[i] = -db[i] * ik2;
      dn[i] = -dn[i] * ik2;
      dm[i] = fc * dc[i] + fb * db[i];
      tc[i] = -tc[i] * ik2;
      tb[i] = -tb[i] * ik2;
      tn[i] = -tn[i] * ik2;
      tm[i] = fc * tc[i] + fb * tb[i];
    }
  }
 public:
-  explicit transfer_CLASS_plugin( ConfigFile &cf)
+  explicit transfer_CLASS_plugin(config_file &cf)
      : TransferFunction_plugin(cf)
  {
-    h_       = pcf_->GetValue<double>("cosmology","H0") / 100.0; 
+    this->tf_isnormalised_ = true;
    Omega_m_ = pcf_->GetValue<double>("cosmology","Omega_m"); 
    Omega_b_ = pcf_->GetValue<double>("cosmology","Omega_b");
    N_ur_    = pcf_->GetValueSafe<double>("cosmology","N_ur", 3.046);
    ztarget_ = pcf_->GetValueSafe<double>("cosmology","ztarget",0.0);
    zstart_  = pcf_->GetValue<double>("setup","zstart");
    double lbox = pcf_->GetValue<double>("setup","BoxLength");
    int nres = pcf_->GetValue<double>("setup","GridRes");
    kmax_    = 2.0*M_PI/lbox * nres/2 * sqrt(3) * 2.0; // 120% of spatial diagonal
-    this->ClassEngine_get_data();
+    ofs_class_input_.open("input_class_parameters.ini", std::ios::trunc);
-    gsl_ia_dtot_ = gsl_interp_accel_alloc();
+    h_ = pcf_->get_value<double>("cosmology", "H0") / 100.0;
-    gsl_ia_dc_   = gsl_interp_accel_alloc();
+    Omega_m_ = pcf_->get_value<double>("cosmology", "Omega_m");
-    gsl_ia_db_   = gsl_interp_accel_alloc();
+    Omega_b_ = pcf_->get_value<double>("cosmology", "Omega_b");
-    gsl_ia_ttot_ = gsl_interp_accel_alloc();
+    N_ur_ = pcf_->get_value_safe<double>("cosmology", "Neff", 3.046);
-    gsl_ia_tc_   = gsl_interp_accel_alloc();
+    ztarget_ = pcf_->get_value_safe<double>("cosmology", "ztarget", 0.0);
-    gsl_ia_tb_   = gsl_interp_accel_alloc();
+    atarget_ = 1.0 / (1.0 + ztarget_);
    zstart_ = pcf_->get_value<double>("setup", "zstart");
    astart_ = 1.0 / (1.0 + zstart_);
    A_s_ = pcf_->get_value_safe<double>("cosmology", "A_s", -1.0);
    n_s_ = pcf_->get_value<double>("cosmology", "nspec");
    Tcmb_ = cf.get_value_safe<double>("cosmology", "Tcmb", 2.7255);
-    gsl_sp_dtot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    if (A_s_ > 0) {
-    gsl_sp_dc_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+      music::ilog << "CLASS: Using A_s=" << A_s_<< " to normalise the transfer function." << std::endl;
-    gsl_sp_db_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    }else{
-    gsl_sp_ttot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+      sigma8_ = pcf_->get_value_safe<double>("cosmology", "sigma_8", -1.0);
-    gsl_sp_tc_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+      if( sigma8_ < 0 ){
-    gsl_sp_tb_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+        throw std::runtime_error("Need to specify either A_s or sigma_8 for CLASS plugin...");
      }
      music::ilog << "CLASS: Using sigma8_ =" << sigma8_<< " to normalise the transfer function." << std::endl;
    }
-    gsl_spline_init(gsl_sp_dtot_, &tab_lnk_[0], &tab_dtot_[0], tab_lnk_.size());
+    // determine highest k we will need for the resolution selected
-    gsl_spline_init(gsl_sp_dc_,   &tab_lnk_[0], &tab_dc_[0],   tab_lnk_.size());
+    double lbox = pcf_->get_value<double>("setup", "BoxLength");
-    gsl_spline_init(gsl_sp_db_,   &tab_lnk_[0], &tab_db_[0],   tab_lnk_.size());
+    int nres = pcf_->get_value<double>("setup", "GridRes");
-    gsl_spline_init(gsl_sp_ttot_, &tab_lnk_[0], &tab_ttot_[0], tab_lnk_.size());
+    kmax_ = std::max(20.0, 2.0 * M_PI / lbox * nres / 2 * sqrt(3) * 2.0); // 120% of spatial diagonal, or k=10h Mpc-1
    gsl_spline_init(gsl_sp_tc_,   &tab_lnk_[0], &tab_tc_[0],   tab_lnk_.size());
    gsl_spline_init(gsl_sp_tb_,   &tab_lnk_[0], &tab_tb_[0],   tab_lnk_.size());
-    kmin_ = std::exp(tab_lnk_[0]);
+    // initialise CLASS and get the normalisation
    this->init_ClassEngine();
    A_s_ = the_ClassEngine_->get_A_s(); // this either the input one, or the one computed from sigma8
    // compute the normalisation to interface with MUSIC
    double k_p = pcf_->get_value_safe<double>("cosmology", "k_p", 0.05);
    tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p * h_, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0));
    // compute the transfer function at z=0 using CLASS engine
    std::vector<double> k, dc, tc, db, tb, dn, tn, dm, tm;
    this->run_ClassEngine(0.0, k, dc, tc, db, tb, dn, tn, dm, tm);
    delta_c0_.set_data(k, dc);
    theta_c0_.set_data(k, tc);
    delta_b0_.set_data(k, db);
    theta_b0_.set_data(k, tb);
    delta_n0_.set_data(k, dn);
    theta_n0_.set_data(k, tn);
    delta_m0_.set_data(k, dm);
    theta_m0_.set_data(k, tm);
     // compute the transfer function at z=z_target using CLASS engine
    this->run_ClassEngine(ztarget_, k, dc, tc, db, tb, dn, tn, dm, tm);
    delta_c_.set_data(k, dc);
    theta_c_.set_data(k, tc);
    delta_b_.set_data(k, db);
    theta_b_.set_data(k, tb);
    delta_n_.set_data(k, dn);
    theta_n_.set_data(k, tn);
    delta_m_.set_data(k, dm);
    theta_m_.set_data(k, tm);
    kmin_ = k[0];
    kmax_ = k.back();
    music::ilog << "CLASS table contains k = " << this->get_kmin() << " to " << this->get_kmax() << " h Mpc-1." << std::endl;
    //--------------------------------------------------------------------------
    // single fluid growing/decaying mode decomposition
    //--------------------------------------------------------------------------
    /*gsl_ia_Cplus_ = gsl_interp_accel_alloc();
    gsl_ia_Cminus_ = gsl_interp_accel_alloc();
    gsl_sp_Cplus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
    gsl_sp_Cminus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
    tab_Cplus_.assign(tab_lnk_.size(), 0);
    tab_Cminus_.assign(tab_lnk_.size(), 0);
    std::ofstream ofs("grow_decay.txt");
    for (size_t i = 0; i < tab_lnk_.size(); ++i)
    {
      tab_Cplus_[i] = (3.0 / 5.0 * tab_dtot_[i] / atarget_ - 2.0 / 5.0 * tab_ttot_[i] / atarget_);
      tab_Cminus_[i] = (2.0 / 5.0 * std::pow(atarget_, 1.5) * (tab_dtot_[i] + tab_ttot_[i]));
      ofs << std::exp(tab_lnk_[i]) << " " << tab_Cplus_[i] << " " << tab_Cminus_[i] << " " << tab_dtot_[i] << " " << tab_ttot_[i] << std::endl;
    }
    gsl_spline_init(gsl_sp_Cplus_, &tab_lnk_[0], &tab_Cplus_[0], tab_lnk_.size());
    gsl_spline_init(gsl_sp_Cminus_, &tab_lnk_[0], &tab_Cminus_[0], tab_lnk_.size());*/
    //--------------------------------------------------------------------------
    tf_distinct_ = true;
    tf_withvel_ = true;
    tf_withtotal0_ = true;
  }
-  ~transfer_CLASS_plugin(){
+  ~transfer_CLASS_plugin()
-    gsl_spline_free(gsl_sp_dtot_);
+  {
    gsl_spline_free(gsl_sp_dc_);
    gsl_spline_free(gsl_sp_db_);
    gsl_spline_free(gsl_sp_ttot_);
    gsl_spline_free(gsl_sp_tc_);
    gsl_spline_free(gsl_sp_tb_);
    gsl_interp_accel_free(gsl_ia_dtot_);
    gsl_interp_accel_free(gsl_ia_dc_);
    gsl_interp_accel_free(gsl_ia_db_);
    gsl_interp_accel_free(gsl_ia_ttot_);
    gsl_interp_accel_free(gsl_ia_tc_);
    gsl_interp_accel_free(gsl_ia_tb_);
  }
-  inline double compute(double k, tf_type type) const {
+  inline double compute(double k, tf_type type) const
-      gsl_spline *splineT = nullptr;
+  {
-      gsl_interp_accel *accT = nullptr;
+    k *= h_;
-      switch(type){
+
-          case total:   splineT = gsl_sp_dtot_; accT = gsl_ia_dtot_; break;
+    if (k < kmin_ || k > kmax_)
-          case cdm:     splineT = gsl_sp_dc_;   accT = gsl_ia_dc_;   break;
+    {
-          case baryon:  splineT = gsl_sp_db_;   accT = gsl_ia_db_;   break;
+      return 0.0;
-          case vtotal:  splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break;
+    }
-          case vcdm:    splineT = gsl_sp_tc_;   accT = gsl_ia_tc_;   break;
+
-          case vbaryon: splineT = gsl_sp_tb_;   accT = gsl_ia_tb_;   break;
+    real_t val(0.0);
    switch (type)
    {
      // values at ztarget:
    case total:
      val = delta_m_(k); break;
    case cdm:
      val = delta_c_(k); break;
    case baryon:
      val = delta_b_(k); break;
    case vtotal:
      val = theta_m_(k); break;
    case vcdm:
      val = theta_c_(k); break;
    case vbaryon:
      val = theta_b_(k); break;
      // values at zstart:
    case total0:
      val = delta_m0_(k); break;
    case cdm0:
      val = delta_c0_(k); break;
    case baryon0:
      val = delta_b0_(k); break;
    case vtotal0:
      val = theta_m0_(k); break;
    case vcdm0:
      val = theta_c0_(k); break;
    case vbaryon0:
      val = theta_b0_(k); break;
    default:
      throw std::runtime_error("Invalid type requested in transfer function evaluation");
    }
-
+    return val * tnorm_;
      double d = (k<=kmin_)? gsl_spline_eval(splineT, std::log(kmin_), accT) 
        : gsl_spline_eval(splineT, std::log(k*h_), accT);
      return -d/(k*k);
  }
-  inline double get_kmin(void) const { return std::exp(tab_lnk_[0])/h_; }
+  inline double get_kmin(void) const { return kmin_ / h_; }
-  inline double get_kmax(void) const { return std::exp(tab_lnk_[tab_lnk_.size()-1])/h_; }
+  inline double get_kmax(void) const { return kmax_ / h_; }
 };
-namespace {
+namespace
 {
 TransferFunction_plugin_creator_concrete<transfer_CLASS_plugin> creator("CLASS");
 }
--- a/src/plugins/transfer_eisenstein.cc
+++ b/src/plugins/transfer_eisenstein.cc
@ -207,13 +207,13 @@ public:
 	 \param Tcmb mean temperature of the CMB fluctuations (defaults to
 	 Tcmb = 2.726 if not specified)
 	 */
-  transfer_eisenstein_plugin(ConfigFile &cf)
+  transfer_eisenstein_plugin(config_file &cf)
      : TransferFunction_plugin(cf)
  {
-    double Tcmb = pcf_->GetValueSafe<double>("cosmology", "Tcmb", 2.726);
+    double Tcmb = pcf_->get_value_safe<double>("cosmology", "Tcmb", 2.726);
-    double H0 = pcf_->GetValue<double>("cosmology", "H0");
+    double H0 = pcf_->get_value<double>("cosmology", "H0");
-    double Omega_m = pcf_->GetValue<double>("cosmology", "Omega_m");
+    double Omega_m = pcf_->get_value<double>("cosmology", "Omega_m");
-    double Omega_b = pcf_->GetValue<double>("cosmology", "Omega_b");
+    double Omega_b = pcf_->get_value<double>("cosmology", "Omega_b");
    etf_.set_parameters(H0, Omega_m, Omega_b, Tcmb);
@ -257,15 +257,15 @@ protected:
  };
 public:
-  transfer_eisenstein_wdm_plugin(ConfigFile &cf)
+  transfer_eisenstein_wdm_plugin(config_file &cf)
      : TransferFunction_plugin(cf)
  {
-    double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726);
+    double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726);
-    omegam_ = pcf_->GetValue<double>("cosmology", "Omega_m");
+    omegam_ = pcf_->get_value<double>("cosmology", "Omega_m");
-    omegab_ = pcf_->GetValue<double>("cosmology", "Omega_b");
+    omegab_ = pcf_->get_value<double>("cosmology", "Omega_b");
-    H0_ = pcf_->GetValue<double>("cosmology", "H0");
+    H0_ = pcf_->get_value<double>("cosmology", "H0");
    m_h0 = H0_ / 100.0;
-    wdmm_ = pcf_->GetValue<double>("cosmology", "WDMmass");
+    wdmm_ = pcf_->get_value<double>("cosmology", "WDMmass");
    etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
@ -273,7 +273,7 @@ public:
    typemap_.insert(std::pair<std::string, int>("VIEL", wdm_viel));             // add the other types
    typemap_.insert(std::pair<std::string, int>("BODE_WRONG", wdm_bode_wrong)); // add the other types
-    type_ = pcf_->GetValueSafe<std::string>("cosmology", "WDMtftype", "BODE");
+    type_ = pcf_->get_value_safe<std::string>("cosmology", "WDMtftype", "BODE");
    //type_ = std::string( toupper( type_.c_str() ) );
@ -286,29 +286,29 @@ public:
    {
    //... parameterisation from Bode et al. (2001), ApJ, 556, 93
    case wdm_bode:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.0);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.0);
-      wdmgx_ = pcf_->GetValueSafe<double>("cosmology", "WDMg_x", 1.5);
+      wdmgx_ = pcf_->get_value_safe<double>("cosmology", "WDMg_x", 1.5);
      m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29);
      break;
    //... parameterisation from Viel et al. (2005), Phys Rev D, 71
    case wdm_viel:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.12);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.12);
      m_WDMalpha = 0.049 * pow(omegam_ / 0.25, 0.11) * pow(H0_ * 0.01 / 0.7, 1.22) * pow(wdmm_, -1.11);
      break;
    //.... below is for historical reasons due to the buggy parameterisation
    //.... in early versions of MUSIC, but apart from H instead of h, Bode et al.
    case wdm_bode_wrong:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.0);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.0);
-      wdmgx_ = pcf_->GetValueSafe<double>("cosmology", "WDMg_x", 1.5);
+      wdmgx_ = pcf_->get_value_safe<double>("cosmology", "WDMg_x", 1.5);
      m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29);
      break;
    default:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.0);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.0);
-      wdmgx_ = pcf_->GetValueSafe<double>("cosmology", "WDMg_x", 1.5);
+      wdmgx_ = pcf_->get_value_safe<double>("cosmology", "WDMg_x", 1.5);
      m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29);
      break;
    }
@ -340,20 +340,20 @@ protected:
  eisenstein_transfer etf_;
 public:
-  transfer_eisenstein_cdmbino_plugin(ConfigFile &cf)
+  transfer_eisenstein_cdmbino_plugin(config_file &cf)
      : TransferFunction_plugin(cf)
  { 
-    double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726);
+    double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726);
-    omegam_ = pcf_->GetValue<double>("cosmology", "Omega_m");
+    omegam_ = pcf_->get_value<double>("cosmology", "Omega_m");
-    omegab_ = pcf_->GetValue<double>("cosmology", "Omega_b");
+    omegab_ = pcf_->get_value<double>("cosmology", "Omega_b");
-    H0_ = pcf_->GetValue<double>("cosmology", "H0");
+    H0_ = pcf_->get_value<double>("cosmology", "H0");
    m_h0 = H0_ / 100.0;
    etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
-    mcdm_ = pcf_->GetValueSafe<double>("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV
+    mcdm_ = pcf_->get_value_safe<double>("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV
-    Tkd_ = pcf_->GetValueSafe<double>("cosmology", "CDM_Tkd", 33.0);    // temperature at which CDM particle kinetically decouples (in MeV)
+    Tkd_ = pcf_->get_value_safe<double>("cosmology", "CDM_Tkd", 33.0);    // temperature at which CDM particle kinetically decouples (in MeV)
    kfs_ = 1.7e6 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.) / (1.0 + log(Tkd_ / 30.) / 19.2);
    kd_ = 3.8e7 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.);
@ -395,19 +395,19 @@ protected:
  eisenstein_transfer etf_;
 public:
-  transfer_eisenstein_cutoff_plugin(ConfigFile &cf)
+  transfer_eisenstein_cutoff_plugin(config_file &cf)
      : TransferFunction_plugin(cf)
  { 
-    double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726);
+    double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726);
-    omegam_ = pcf_->GetValue<double>("cosmology", "Omega_m");
+    omegam_ = pcf_->get_value<double>("cosmology", "Omega_m");
-    omegab_ = pcf_->GetValue<double>("cosmology", "Omega_b");
+    omegab_ = pcf_->get_value<double>("cosmology", "Omega_b");
-    H0_ = pcf_->GetValue<double>("cosmology", "H0");
+    H0_ = pcf_->get_value<double>("cosmology", "H0");
    m_h0 = H0_ / 100.0;
    etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
-    Rcut_ = pcf_->GetValueSafe<double>("cosmology", "Rcut", 1.0);
+    Rcut_ = pcf_->get_value_safe<double>("cosmology", "Rcut", 1.0);
  }
  inline double compute(double k, tf_type type) const
@ -434,5 +434,5 @@ namespace
 TransferFunction_plugin_creator_concrete<transfer_eisenstein_plugin> creator("eisenstein");
 TransferFunction_plugin_creator_concrete<transfer_eisenstein_wdm_plugin> creator2("eisenstein_wdm");
 TransferFunction_plugin_creator_concrete<transfer_eisenstein_cdmbino_plugin> creator3("eisenstein_cdmbino");
-TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
+// TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
 } // namespace
--- a/src/random_plugin.cc
+++ b/src/random_plugin.cc
@ -13,32 +13,33 @@ void print_RNG_plugins()
    std::map<std::string, RNG_plugin_creator *> &m = get_RNG_plugin_map();
    std::map<std::string, RNG_plugin_creator *>::iterator it;
    it = m.begin();
-    csoca::ilog << "- Available random number generator plug-ins:" << std::endl;
+    music::ilog << "Available random number generator plug-ins:" << std::endl;
    while (it != m.end())
    {
        if ((*it).second){
-            csoca::ilog.Print("\t\'%s\'\n", (*it).first.c_str());
+            music::ilog.Print("\t\'%s\'\n", (*it).first.c_str());
        }
        ++it;
    }
    music::ilog << std::endl;
 }
-std::unique_ptr<RNG_plugin> select_RNG_plugin(ConfigFile &cf)
+std::unique_ptr<RNG_plugin> select_RNG_plugin(config_file &cf)
 {
-    std::string rngname = cf.GetValueSafe<std::string>("random", "generator", "MUSIC");
+    std::string rngname = cf.get_value_safe<std::string>("random", "generator", "MUSIC");
    RNG_plugin_creator *the_RNG_plugin_creator = get_RNG_plugin_map()[rngname];
    if (!the_RNG_plugin_creator)
    {
-        csoca::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str());
+        music::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str());
        print_RNG_plugins();
        throw std::runtime_error("Unknown random number generator plug-in");
    }
    else
    {
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl;
+        music::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl;
    }
    return std::move(the_RNG_plugin_creator->Create(cf));
--- a/src/testing.cc
+++ b/src/testing.cc
@ -9,7 +9,7 @@ namespace testing
 {
 void output_potentials_and_densities(
-    ConfigFile &the_config,
+    config_file &the_config,
    size_t ngrid, real_t boxlen,
    Grid_FFT<real_t> &phi,
    Grid_FFT<real_t> &phi2,
@ -17,8 +17,8 @@ void output_potentials_and_densities(
    Grid_FFT<real_t> &phi3b,
    std::array<Grid_FFT<real_t> *, 3> &A3)
 {
-    const std::string fname_hdf5 = the_config.GetValueSafe<std::string>("output", "fname_hdf5", "output.hdf5");
+    const std::string fname_hdf5 = the_config.get_value_safe<std::string>("output", "fname_hdf5", "output.hdf5");
-    const std::string fname_analysis = the_config.GetValueSafe<std::string>("output", "fbase_analysis", "output");
+    const std::string fname_analysis = the_config.get_value_safe<std::string>("output", "fbase_analysis", "output");
    Grid_FFT<real_t> delta({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    Grid_FFT<real_t> delta2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
@ -98,7 +98,7 @@ void output_potentials_and_densities(
 }
 void output_velocity_displacement_symmetries(
-    ConfigFile &the_config,
+    config_file &the_config,
    size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
    Grid_FFT<real_t> &phi,
    Grid_FFT<real_t> &phi2,
@ -107,8 +107,8 @@ void output_velocity_displacement_symmetries(
    std::array<Grid_FFT<real_t> *, 3> &A3,
    bool bwrite_out_fields)
 {
-    const std::string fname_hdf5 = the_config.GetValueSafe<std::string>("output", "fname_hdf5", "output.hdf5");
+    const std::string fname_hdf5 = the_config.get_value_safe<std::string>("output", "fname_hdf5", "output.hdf5");
-    const std::string fname_analysis = the_config.GetValueSafe<std::string>("output", "fbase_analysis", "output");
+    const std::string fname_analysis = the_config.get_value_safe<std::string>("output", "fbase_analysis", "output");
    real_t vfac1 = vfac;
    real_t vfac2 = 2 * vfac;
@ -232,7 +232,7 @@ void output_velocity_displacement_symmetries(
    }
-    csoca::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n"
+    music::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n"
                    << std::setw(16) << dplus << " "
                    << std::setw(16) << Icomp[0] << " "
                    << std::setw(16) << Icomp[1] << " "
@ -241,7 +241,8 @@ void output_velocity_displacement_symmetries(
 }
 void output_convergence(
-    ConfigFile &the_config,
+    config_file &the_config,
    cosmology::calculator* the_cosmo_calc,
    std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
    Grid_FFT<real_t> &phi,
    Grid_FFT<real_t> &phi2,
@ -249,7 +250,6 @@ void output_convergence(
    Grid_FFT<real_t> &phi3b,
    std::array<Grid_FFT<real_t> *, 3> &A3)
 {
    // scale all potentials to remove dplus0
    phi /= dplus;
    phi2 /= dplus * dplus;
@ -259,11 +259,95 @@ void output_convergence(
    (*A3[1]) /= dplus * dplus * dplus;
    (*A3[2]) /= dplus * dplus * dplus;
    ////////////////////// theoretical convergence radius //////////////////////
    // compute phi_code
    Grid_FFT<real_t> phi_code({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    phi_code.FourierTransformForward(false);
    #pragma omp parallel for //collapse(3)
    for (std::size_t i = 0; i < phi_code.size(0); ++i) {
        for (std::size_t j = 0; j < phi_code.size(1); ++j) {
            for (std::size_t k = 0; k < phi_code.size(2); ++k) {
                std::size_t idx = phi_code.get_idx(i, j, k);
                phi_code.kelem(idx) = -phi.kelem(idx);
            }
        }
    }
    // initialize norm to 0
    Grid_FFT<real_t> nabla_vini_norm({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    #pragma omp parallel for //collapse(3)
    for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
        for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
            for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
                std::size_t idx = nabla_vini_norm.get_idx(i, j, k);
                nabla_vini_norm.relem(idx) = 0.0;
            }
        }
    }
    Grid_FFT<real_t> nabla_vini_mn({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    for(std::size_t m = 0; m < 3; m++) {
        for(std::size_t n = m; n < 3; n++) {
            nabla_vini_mn.FourierTransformForward(false);
            #pragma omp parallel for //collapse(3)
            for (std::size_t i = 0; i < phi_code.size(0); ++i) {
                for (std::size_t j = 0; j < phi_code.size(1); ++j) {
                    for (std::size_t k = 0; k < phi_code.size(2); ++k) {
                        std::size_t idx = phi_code.get_idx(i, j, k);
                        auto kk = phi_code.get_k<real_t>(i, j, k);
                        nabla_vini_mn.kelem(idx) = phi_code.kelem(idx) * (kk[m] * kk[n]);
                    }
                }
            }
            nabla_vini_mn.FourierTransformBackward();
            nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->get_growth_factor(1.0));
            // sum of squares
            #pragma omp parallel for //collapse(3)
            for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
                for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
                    for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
                        std::size_t idx = nabla_vini_norm.get_idx(i, j, k);
                        if(m != n) {
                            nabla_vini_norm.relem(idx) += (2.0 * nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx));
                        } else {
                            nabla_vini_norm.relem(idx) += (nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx));
                        }
                    }
                }
            }
        }
    }
    // square root
    #pragma omp parallel for //collapse(3)
    for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
        for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
            for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
                std::size_t idx = nabla_vini_norm.get_idx(i, j, k);
                nabla_vini_norm.relem(idx) = std::sqrt(nabla_vini_norm.relem(idx));
            }
        }
    }
    // get t_eds
    Grid_FFT<real_t> t_eds({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    #pragma omp parallel for //collapse(3)
    for (std::size_t i = 0; i < t_eds.size(0); ++i) {
        for (std::size_t j = 0; j < t_eds.size(1); ++j) {
            for (std::size_t k = 0; k < t_eds.size(2); ++k) {
                std::size_t idx = t_eds.get_idx(i, j, k);
                t_eds.relem(idx) = 0.0204 / nabla_vini_norm.relem(idx);
            }
        }
    }
    ////////////////////////// 3lpt convergence test ///////////////////////////
    // initialize grids to 0
    Grid_FFT<real_t> psi_1({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    Grid_FFT<real_t> psi_2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
    Grid_FFT<real_t> psi_3({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-#pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
    for (std::size_t i = 0; i < psi_1.size(0); ++i) {
        for (std::size_t j = 0; j < psi_1.size(1); ++j) {
            for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@ -290,7 +374,7 @@ void output_convergence(
        psi_2_tmp.FourierTransformForward(false);
        psi_3_tmp.FourierTransformForward(false);
-#pragma omp parallel for collapse(3)
+        #pragma omp parallel for //collapse(3)
        for (std::size_t i = 0; i < phi.size(0); ++i) {
            for (std::size_t j = 0; j < phi.size(1); ++j) {
                for (std::size_t k = 0; k < phi.size(2); ++k) {
@ -311,7 +395,7 @@ void output_convergence(
        psi_3_tmp.FourierTransformBackward();
        // sum of squares
-#pragma omp parallel for collapse(3)
+        #pragma omp parallel for //collapse(3)
        for (std::size_t i = 0; i < psi_1.size(0); ++i) {
            for (std::size_t j = 0; j < psi_1.size(1); ++j) {
                for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@ -325,7 +409,7 @@ void output_convergence(
    } // loop on dimensions
    // apply square root for the L2 norm
-#pragma omp parallel for collapse(3)
+#pragma omp parallel for //collapse(3)
    for (std::size_t i = 0; i < psi_1.size(0); ++i) {
        for (std::size_t j = 0; j < psi_1.size(1); ++j) {
            for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@ -339,7 +423,7 @@ void output_convergence(
    // convergence radius
    Grid_FFT<real_t> inv_convergence_radius({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-#pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
    for (std::size_t i = 0; i < psi_1.size(0); ++i) {
        for (std::size_t j = 0; j < psi_1.size(1); ++j) {
            for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@ -351,13 +435,17 @@ void output_convergence(
        }
    }
-    // write results
+    ////////////////////////////// write results ///////////////////////////////
-    unlink("convergence_test.hdf5");
+    std::string convergence_test_filename("convergence_test.hdf5");
-    inv_convergence_radius.Write_to_HDF5("convergence_test.hdf5", "inv_convergence_radius");
+    unlink(convergence_test_filename.c_str());
-    psi_1.Write_to_HDF5("convergence_test.hdf5", "psi_1_norm");
+#if defined(USE_MPI)
-    psi_2.Write_to_HDF5("convergence_test.hdf5", "psi_2_norm");
+    MPI_Barrier(MPI_COMM_WORLD);
-    psi_3.Write_to_HDF5("convergence_test.hdf5", "psi_3_norm");
+#endif
-
+    t_eds.Write_to_HDF5(convergence_test_filename, "t_eds");
    inv_convergence_radius.Write_to_HDF5(convergence_test_filename, "inv_convergence_radius");
    // psi_1.Write_to_HDF5(convergence_test_filename, "psi_1_norm");
    // psi_2.Write_to_HDF5(convergence_test_filename, "psi_2_norm");
    // psi_3.Write_to_HDF5(convergence_test_filename, "psi_3_norm");
 }
 } // namespace testing
--- a/src/transfer_function_plugin.cc
+++ b/src/transfer_function_plugin.cc
@ -13,31 +13,32 @@ void print_TransferFunction_plugins()
    std::map<std::string, TransferFunction_plugin_creator *> &m = get_TransferFunction_plugin_map();
    std::map<std::string, TransferFunction_plugin_creator *>::iterator it;
    it = m.begin();
-    csoca::ilog << "Available transfer function plug-ins:" << std::endl;
+    music::ilog << "Available transfer function plug-ins:" << std::endl;
    while (it != m.end())
    {
        if ((*it).second)
-            csoca::ilog << "\t\'" << (*it).first << "\'" << std::endl;
+            music::ilog << "\t\'" << (*it).first << "\'" << std::endl;
        ++it;
    }
    music::ilog << std::endl;
 }
-std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(ConfigFile &cf)
+std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(config_file &cf)
 {
-    std::string tfname = cf.GetValue<std::string>("cosmology", "transfer");
+    std::string tfname = cf.get_value<std::string>("cosmology", "transfer");
    TransferFunction_plugin_creator *the_TransferFunction_plugin_creator = get_TransferFunction_plugin_map()[tfname];
    if (!the_TransferFunction_plugin_creator)
    {
-        csoca::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl;
+        music::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl;
        print_TransferFunction_plugins();
        throw std::runtime_error("Unknown transfer function plug-in");
    }
    else
    {
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl;
+        music::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl;
    }
    return std::move(the_TransferFunction_plugin_creator->create(cf));
		`@ -1 +1 @@`
			`Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b`				`Subproject commit 6adecae2f30172a94e003155090791abf509d995`
		`@ -0,0 +1 @@`
							`Subproject commit ec6b82cc1122ba029a7a7142cf836014e992e68c`