commit 9549f5f19596db9cea7ee199081a9ef4c9c31813
Author: Oliver Hahn <oliverjhahn@gmail.com>
Date:   Tue May 7 01:05:16 2019 +0200

    initial commit

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..4e2dc4f
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,89 @@
+cmake_minimum_required(VERSION 3.9)
+set(PRGNAME fastLPT)
+project(fastLPT)
+
+
+#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall")
+find_package(PkgConfig REQUIRED)
+
+set(CMAKE_MODULE_PATH
+        "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
+
+
+########################################################################################################################
+# OpenMP
+find_package(OpenMP REQUIRED)
+if(OPENMP_FOUND)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+endif()
+
+########################################################################################################################
+# mpi support
+option(ENABLE_MPI "Enable MPI support" ON)
+if(ENABLE_MPI)
+  find_package(MPI)
+  if(MPI_CXX_FOUND)
+    message(STATUS "MPI_CXX_COMPILER = ${MPI_CXX_COMPILER}")
+    message(STATUS "MPI_CXX_INCLUDE_PATH = ${MPI_CXX_INCLUDE_PATH}")
+    message(STATUS "MPI_CXX_LIBRARIES = ${MPI_CXX_LIBRARIES}")
+  endif(MPI_CXX_FOUND)
+endif(ENABLE_MPI)
+
+
+# FFTW
+find_package(FFTW3 REQUIRED)
+
+# GSL
+find_package(GSL REQUIRED)
+
+# HDF5
+find_package(HDF5 REQUIRED)
+
+########################################################################################################################
+# INCLUDES
+include_directories(${PROJECT_SOURCE_DIR}/include)
+
+# SOURCES
+# get all the *.cc files in the subfolders
+file( GLOB SOURCES
+  ${PROJECT_SOURCE_DIR}/src/*.cc
+)
+
+# PLUGINS
+# get all the *.cc files in the plugin subfolder
+file( GLOB PLUGINS
+  ${PROJECT_SOURCE_DIR}/src/plugins/*.cc
+)
+
+add_executable(${PRGNAME} ${SOURCES} ${PLUGINS})
+
+set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14)
+
+# mpi flags
+if(MPI_CXX_FOUND)
+  if(FFTW_MPI_FOUND)
+    target_link_libraries(${PRGNAME} ${FFTW_MPI_LIBRARIES})
+    target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
+  endif(FFTW_MPI_FOUND)
+
+  target_include_directories(${PRGNAME} PRIVATE ${MPI_CXX_INCLUDE_PATH})
+  target_compile_options(${PRGNAME} PRIVATE "-DUSE_MPI")
+  target_link_libraries(${PRGNAME} ${MPI_LIBRARIES})
+endif(MPI_CXX_FOUND)
+
+if(FFTW_THREADS_FOUND) 
+  target_link_libraries(${PRGNAME} ${FFTW_THREADS_LIBRARIES})
+  target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
+endif(FFTW_THREADS_FOUND)
+
+target_link_libraries(${PRGNAME} ${FFTW_LIBRARIES})
+target_include_directories(${PRGNAME} PRIVATE ${FFTW_INCLUDE_DIR})
+
+target_link_libraries(${PRGNAME} ${GSL_LIBRARIES})
+target_include_directories(${PRGNAME} PRIVATE ${GSL_INCLUDE_DIR})
+
+target_link_libraries(${PRGNAME} ${HDF5_LIBRARIES})
+target_include_directories(${PRGNAME} PRIVATE ${HDF5_INCLUDE_DIR})
diff --git a/FindFFTW3.cmake b/FindFFTW3.cmake
new file mode 100644
index 0000000..4a72c90
--- /dev/null
+++ b/FindFFTW3.cmake
@@ -0,0 +1,126 @@
+# - Find FFTW
+# Find the native FFTW includes and library
+# This module defines
+# FFTW_INCLUDE_DIR, where to find fftw3.h, etc.
+# FFTW_LIBRARIES, the libraries needed to use FFTW.
+# FFTW_FOUND, If false, do not try to use FFTW.
+# also defined, but not for general use are
+# FFTW_LIBRARY, where to find the FFTW library.
+
+#UNSET(FFTW_INCLUDE_DIR CACHE)
+#UNSET(FFTW_LIBRARY CACHE)
+#UNSET(FFTW_THREADS_NAMES CACHE)
+#UNSET(FFTW_MPI_LIBRARY CACHE)
+
+FIND_PATH(FFTW_INCLUDE_DIR fftw3.h
+  PATHS ${FFTW3_DIR}
+  PATH_SUFFIXES include
+  DOC "Directory where the FFTW3 header files are located"
+  NO_DEFAULT_PATH
+)
+
+SET(FFTW_NAMES ${FFTW_NAMES} fftw3 fftw3-3)
+find_library(FFTW_LIBRARY
+  NAMES ${FFTW_NAMES} 
+  PATHS ${FFTW3_DIR}
+  PATH_SUFFIXES lib
+  DOC "Directory where the FFTW3 library is located"
+  NO_DEFAULT_PATH
+)
+
+SET(FFTW_THREADS_NAMES ${FFTW_THREADS_NAMES} fftw3_threads fftw3-3_threads)
+find_library(FFTW_THREADS_LIBRARY
+  NAMES ${FFTW_THREADS_NAMES} 
+  PATHS ${FFTW3_DIR}
+  PATH_SUFFIXES lib
+  DOC "Directory where the FFTW3-threads library is located"
+  NO_DEFAULT_PATH
+)
+
+SET(FFTW_MPI_NAMES ${FFTW_MPI_NAMES} fftw3_mpi fftw3-3_mpi)
+find_library(FFTW_MPI_LIBRARY
+  NAMES ${FFTW_MPI_NAMES} 
+  PATHS ${FFTW3_DIR}
+  PATH_SUFFIXES lib
+  DOC "Directory where the FFTW3-MPI library is located"
+  NO_DEFAULT_PATH
+)
+
+FIND_PATH(FFTW_INCLUDE_DIR fftw3.h
+${FFTW3_DIR} ${FFTW3_DIR}/include ${FFTW3_DIR}/lib )# /usr/local/include /usr/include /opt/local/lib )
+
+FIND_LIBRARY(FFTW_LIBRARY
+NAMES ${FFTW_NAMES}
+PATHS ${FFTW3_DIR} ${FFTW3_DIR}/include ${FFTW3_DIR}/lib )#/usr/lib /usr/local/lib /opt/locala/lib )
+
+# Find threads part of FFTW
+FIND_LIBRARY(FFTW_THREADS_LIBRARY
+NAMES ${FFTW_THREADS_NAMES}
+PATHS ${FFTW3_DIR} ${FFTW3_DIR}/include ${FFTW3_DIR}/lib )#/usr/lib /usr/local/lib /opt/local/lib )
+
+# Find MPI part of FFTW
+FIND_LIBRARY(FFTW_MPI_LIBRARY
+NAMES ${FFTW_MPI_NAMES}
+PATHS ${FFTW3_DIR} ${FFTW3_DIR}/include ${FFTW3_DIR}/lib )#/usr/lib /usr/local/lib /opt/local/lib )
+
+IF (FFTW_THREADS_LIBRARY AND FFTW_INCLUDE_DIR)
+SET(FFTW_THREADS_LIBRARIES ${FFTW_THREADS_LIBRARY})
+SET(FFTW_THREADS_FOUND "YES")
+ELSE (FFTW_THREADS_LIBRARY AND FFTW_INCLUDE_DIR)
+SET(FFTW_THREADS_FOUND "NO")
+ENDIF (FFTW_THREADS_LIBRARY AND FFTW_INCLUDE_DIR)
+
+
+IF (FFTW_THREADS_FOUND)
+IF (NOT FFTW_THREADS_FIND_QUIETLY)
+MESSAGE(STATUS "Found FFTW threads: ${FFTW_THREADS_LIBRARIES}")
+ENDIF (NOT FFTW_THREADS_FIND_QUIETLY)
+ELSE (FFTW_THREADS_FOUND)
+IF (FFTW_THREADS_FIND_REQUIRED)
+MESSAGE(FATAL_ERROR "Could not find FFTW threads library")
+ENDIF (FFTW_THREADS_FIND_REQUIRED)
+ENDIF (FFTW_THREADS_FOUND)
+
+
+IF (FFTW_MPI_LIBRARY AND FFTW_INCLUDE_DIR)
+SET(FFTW_MPI_LIBRARIES ${FFTW_MPI_LIBRARY})
+SET(FFTW_MPI_FOUND "YES")
+ELSE (FFTW_MPI_LIBRARY AND FFTW_INCLUDE_DIR)
+SET(FFTW_MPI_FOUND "NO")
+ENDIF (FFTW_MPI_LIBRARY AND FFTW_INCLUDE_DIR)
+
+
+IF (FFTW_MPI_FOUND)
+IF (NOT FFTW_MPI_FIND_QUIETLY)
+MESSAGE(STATUS "Found FFTW MPI: ${FFTW_MPI_LIBRARIES}")
+ENDIF (NOT FFTW_MPI_FIND_QUIETLY)
+ELSE (FFTW_MPI_FOUND)
+IF (FFTW_MPI_FIND_REQUIRED)
+MESSAGE(FATAL_ERROR "Could not find FFTW MPI library")
+ENDIF (FFTW_MPI_FIND_REQUIRED)
+ENDIF (FFTW_MPI_FOUND)
+
+
+IF (FFTW_LIBRARY AND FFTW_INCLUDE_DIR)
+SET(FFTW_LIBRARIES ${FFTW_LIBRARY})
+SET(FFTW_FOUND "YES")
+ELSE (FFTW_LIBRARY AND FFTW_INCLUDE_DIR)
+SET(FFTW_FOUND "NO")
+ENDIF (FFTW_LIBRARY AND FFTW_INCLUDE_DIR)
+
+
+IF (FFTW_FOUND)
+IF (NOT FFTW_FIND_QUIETLY)
+MESSAGE(STATUS "Found FFTW: ${FFTW_LIBRARIES}")
+ENDIF (NOT FFTW_FIND_QUIETLY)
+ELSE (FFTW_FOUND)
+IF (FFTW_FIND_REQUIRED)
+MESSAGE(FATAL_ERROR "Could not find FFTW library")
+ENDIF (FFTW_FIND_REQUIRED)
+ENDIF (FFTW_FOUND)
+
+SET (ON_UNIX ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR
+${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
+IF (${ON_UNIX})
+SET (FFTW_EXECUTABLE_LIBRARIES fftw3f fftw3f_threads)
+ENDIF (${ON_UNIX})
diff --git a/example.conf b/example.conf
new file mode 100644
index 0000000..0e63a79
--- /dev/null
+++ b/example.conf
@@ -0,0 +1,17 @@
+[setup]
+GridRes = 128
+BoxLength = 300
+Dplus0 = 1.0
+
+[output]
+fname_hdf5 = output.hdf5
+fbase_analysis = output
+
+[cosmology]
+transfer = eisenstein
+Omega_m  = 0.302
+Omega_b  = 0.045
+Omega_L  = 0.698
+H0       = 70.3
+sigma_8  = 0.811
+nspec    = 0.961
diff --git a/include/bounding_box.hh b/include/bounding_box.hh
new file mode 100644
index 0000000..db0f481
--- /dev/null
+++ b/include/bounding_box.hh
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <vec3.hh>
+
+template <typename T>
+struct bounding_box
+{
+    vec3<T> x1_, x2_;
+
+    bounding_box(void)
+    { }
+
+    bounding_box( const vec3<T>& x1, const vec3<T>& x2)
+    : x1_(x1), x2_(x2)
+    { }
+
+    bounding_box(const bounding_box &a)
+    : x1_(a.x1_), x2_(a.x2_)
+    { }
+
+    bounding_box &operator/=(const bounding_box<T> &b)
+    {
+        for (int i = 0; i < 3; ++i)
+        {
+            x1_[i] = std::max<T>(x1_[i], b.x1_[i]);
+            x2_[i] = std::min<T>(x2_[i], b.x2_[i]);
+        }
+        return *this;
+    }
+};
+
diff --git a/include/config_file.hh b/include/config_file.hh
new file mode 100644
index 0000000..fe67b00
--- /dev/null
+++ b/include/config_file.hh
@@ -0,0 +1,362 @@
+#pragma once
+
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <typeinfo>
+
+#include <logger.hh>
+
+/*!
+ * @class ConfigFile
+ * @brief provides read/write access to configuration options
+ *
+ * This class provides access to the configuration file. The
+ * configuration is stored in hash-pairs and can be queried and
+ * validated by the responsible class/routine
+ */
+class ConfigFile {
+
+  //! current line number
+  unsigned m_iLine;
+
+  //! hash table for key/value pairs, stored as strings
+  std::map<std::string, std::string> m_Items;
+
+public:
+  //! removes all white space from string source
+  /*!
+   * @param source the string to be trimmed
+   * @param delims a string of delimiting characters
+   * @return trimmed string
+   */
+  std::string trim(std::string const &source,
+                   char const *delims = " \t\r\n") const {
+    std::string result(source);
+    //... skip initial whitespace ...
+    std::string::size_type index = result.find_last_not_of(delims);
+    if (index != std::string::npos)
+      result.erase(++index);
+    //... find beginning of trailing whitespace ...
+    index = result.find_first_not_of(delims);
+    //... remove trailing whitespace ...
+    if (index != std::string::npos)
+      result.erase(0, index);
+    else
+      result.erase();
+    return result;
+  }
+
+  //! converts between different variable types
+  /*!
+   *  The main purpose of this function is to parse and convert
+   *  a string argument into numbers, booleans, etc...
+   * @param ival the input value (typically a std::string)
+   * @param oval the interpreted/converted value
+   */
+  template <class in_value, class out_value>
+  void Convert(const in_value &ival, out_value &oval) const {
+    std::stringstream ss;
+    ss << ival; //.. insert value into stream
+    ss >> oval; //.. retrieve value from stream
+
+    if (!ss.eof()) {
+      //.. conversion error
+      csoca::elog << "Error: conversion of \'" << ival << "\' failed."
+                << std::endl;
+      throw ErrInvalidConversion(std::string("invalid conversion to ") +
+                                 typeid(out_value).name() + '.');
+    }
+  }
+
+  //! constructor of class config_file
+  /*! @param FileName the path/name of the configuration file to be parsed
+   */
+  explicit ConfigFile(std::string const &FileName) : m_iLine(0), m_Items() {
+    std::ifstream file(FileName.c_str());
+
+    if (!file.is_open()){
+      csoca::elog << "Could not open config file \'" << FileName << "\'." << std::endl;
+      throw std::runtime_error(
+          std::string("Error: Could not open config file \'") + FileName +
+          std::string("\'"));
+    }
+
+    std::string line;
+    std::string name;
+    std::string value;
+    std::string inSection;
+    int posEqual;
+    m_iLine = 0;
+    //.. walk through all lines ..
+    while (std::getline(file, line)) {
+      ++m_iLine;
+      //.. encounterd EOL ?
+      if (!line.length())
+        continue;
+
+      //.. encountered comment ?
+      unsigned long idx;
+      if ((idx = line.find_first_of("#;%")) != std::string::npos)
+        line.erase(idx);
+
+      //.. encountered section tag ?
+      if (line[0] == '[') {
+        inSection = trim(line.substr(1, line.find(']') - 1));
+        continue;
+      }
+
+      //.. seek end of entry name ..
+      posEqual = line.find('=');
+      name = trim(line.substr(0, posEqual));
+      value = trim(line.substr(posEqual + 1));
+
+      if ((size_t)posEqual == std::string::npos &&
+          (name.size() != 0 || value.size() != 0)) {
+        csoca::wlog << "Ignoring non-assignment in " << FileName << ":"
+                  << m_iLine << std::endl;
+        continue;
+      }
+
+      if (name.length() == 0 && value.size() != 0) {
+        csoca::wlog << "Ignoring assignment missing entry name in "
+                  << FileName << ":" << m_iLine << std::endl;
+        continue;
+      }
+
+      if (value.length() == 0 && name.size() != 0) {
+        csoca::wlog << "Empty entry will be ignored in " << FileName << ":"
+                  << m_iLine << std::endl;
+        continue;
+      }
+
+      if (value.length() == 0 && name.size() == 0)
+        continue;
+
+      //.. add key/value pair to hash table ..
+      if (m_Items.find(inSection + '/' + name) != m_Items.end()) {
+        csoca::wlog << "Redeclaration overwrites previous value in "
+                  << FileName << ":" << m_iLine << std::endl;
+      }
+
+      m_Items[inSection + '/' + name] = value;
+    }
+  }
+
+  //! inserts a key/value pair in the hash map
+  /*! @param key the key value, usually "section/key"
+   *  @param value the value of the key, also a string
+   */
+  void InsertValue(std::string const &key, std::string const &value) {
+    m_Items[key] = value;
+  }
+
+  //! inserts a key/value pair in the hash map
+  /*! @param section section name. values are stored under "section/key"
+   *  @param key the key value usually "section/key"
+   *  @param value the value of the key, also a string
+   */
+  void InsertValue(std::string const &section, std::string const &key,
+                   std::string const &value) {
+    m_Items[section + '/' + key] = value;
+  }
+
+  //! checks if a key is part of the hash map
+  /*! @param section the section name of the key
+   *  @param key the key name to be checked
+   *  @return true if the key is present, false otherwise
+   */
+  bool ContainsKey(std::string const &section, std::string const &key) {
+    std::map<std::string, std::string>::const_iterator i =
+        m_Items.find(section + '/' + key);
+    if (i == m_Items.end())
+      return false;
+    return true;
+  }
+
+  //! checks if a key is part of the hash map
+  /*! @param key the key name to be checked
+   *  @return true if the key is present, false otherwise
+   */
+  bool ContainsKey(std::string const &key) {
+    std::map<std::string, std::string>::const_iterator i = m_Items.find(key);
+    if (i == m_Items.end())
+      return false;
+    return true;
+  }
+
+  //! return value of a key
+  /*! returns the value of a given key, throws a ErrItemNotFound
+   *  exception if the key is not available in the hash map.
+   *  @param key the key name
+   *  @return the value of the key
+   *  @sa ErrItemNotFound
+   */
+  template <class T> T GetValue(std::string const &key) const {
+    return GetValue<T>("", key);
+  }
+
+  //! return value of a key
+  /*! returns the value of a given key, throws a ErrItemNotFound
+   *  exception if the key is not available in the hash map.
+   *  @param section the section name for the key
+   *  @param key the key name
+   *  @return the value of the key
+   *  @sa ErrItemNotFound
+   */
+  template <class T>
+  T GetValueBasic(std::string const &section, std::string const &key) const {
+    T r;
+    std::map<std::string, std::string>::const_iterator i =
+        m_Items.find(section + '/' + key);
+    if (i == m_Items.end()){
+      throw ErrItemNotFound('\'' + section + '/' + key +
+                            std::string("\' not found."));
+    }
+
+    Convert(i->second, r);
+    return r;
+  }
+
+  template <class T>
+  T GetValue(std::string const &section, std::string const &key) const
+  {
+    T r;
+    try
+    {
+      r = GetValueBasic<T>(section, key);
+    }
+    catch (ErrItemNotFound& e)
+    {
+      csoca::elog << e.what() << std::endl;
+      throw;
+    }
+    return r;
+  }
+
+  //! exception safe version of getValue
+  /*! returns the value of a given key, returns a default value rather
+   *  than a ErrItemNotFound exception if the key is not found.
+   *  @param section the section name for the key
+   *  @param key the key name
+   *  @param default_value the value that is returned if the key is not found
+   *  @return the key value (if key found) otherwise default_value
+   */
+  template <class T>
+  T GetValueSafe(std::string const &section, std::string const &key,
+                 T default_value) const {
+    T r;
+    try {
+      r = GetValueBasic<T>(section, key);
+    } catch (ErrItemNotFound&) {
+      r = default_value;
+    }
+    return r;
+  }
+
+  //! exception safe version of getValue
+  /*! returns the value of a given key, returns a default value rather
+   *  than a ErrItemNotFound exception if the key is not found.
+   *  @param key the key name
+   *  @param default_value the value that is returned if the key is not found
+   *  @return the key value (if key found) otherwise default_value
+   */
+  template <class T>
+  T GetValueSafe(std::string const &key, T default_value) const {
+    return GetValueSafe("", key, default_value);
+  }
+
+  //! dumps all key-value pairs to a std::ostream
+  void Dump(std::ostream &out) {
+    std::map<std::string, std::string>::const_iterator i = m_Items.begin();
+    while (i != m_Items.end()) {
+      if (i->second.length() > 0)
+        out << std::setw(24) << std::left << i->first << "  =  " << i->second
+            << std::endl;
+      ++i;
+    }
+  }
+
+  void LogDump(void) {
+    csoca::ilog << "List of all configuration options:" << std::endl;
+    std::map<std::string, std::string>::const_iterator i = m_Items.begin();
+    while (i != m_Items.end()) {
+      if (i->second.length() > 0)
+        csoca::ilog << std::setw(28) << i->first << " = " << i->second
+                  << std::endl;
+      ++i;
+    }
+  }
+
+  //--- EXCEPTIONS ---
+
+  //! runtime error that is thrown if key is not found in getValue
+  class ErrItemNotFound : public std::runtime_error {
+  public:
+    ErrItemNotFound(std::string itemname)
+        : std::runtime_error(itemname.c_str()) {}
+  };
+
+  //! runtime error that is thrown if type conversion fails
+  class ErrInvalidConversion : public std::runtime_error {
+  public:
+    ErrInvalidConversion(std::string errmsg) : std::runtime_error(errmsg) {}
+  };
+
+  //! runtime error that is thrown if identifier is not found in keys
+  class ErrIllegalIdentifier : public std::runtime_error {
+  public:
+    ErrIllegalIdentifier(std::string errmsg) : std::runtime_error(errmsg) {}
+  };
+};
+
+//==== below are template specialisations
+//=======================================//
+
+//... Function: getValue( strSection, strEntry ) ...
+//... Descript: specialization of getValue for type boolean to interpret strings
+//...
+//...           like "true" and "false" etc.
+//...           converts the string to type bool, returns type bool ...
+template <>
+inline bool ConfigFile::GetValue<bool>(std::string const &strSection,
+                                       std::string const &strEntry) const {
+  std::string r1 = GetValue<std::string>(strSection, strEntry);
+  if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1")
+    return true;
+  if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0")
+    return false;
+  csoca::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl;
+  throw ErrIllegalIdentifier(std::string("Illegal identifier \'") + r1 +
+                             std::string("\' in \'") + strEntry +
+                             std::string("\'."));
+  // return false;
+}
+
+template <>
+inline bool ConfigFile::GetValueSafe<bool>(std::string const &strSection,
+                                           std::string const &strEntry,
+                                           bool defaultValue) const {
+  std::string r1;
+  try {
+    r1 = GetValue<std::string>(strSection, strEntry);
+    if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1")
+      return true;
+    if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0")
+      return false;
+  } catch (ErrItemNotFound&) {
+    return defaultValue;
+  }
+  return defaultValue;
+}
+
+template <>
+inline void
+ConfigFile::Convert<std::string, std::string>(const std::string &ival,
+                                              std::string &oval) const {
+  oval = ival;
+}
diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
new file mode 100644
index 0000000..23fd4e2
--- /dev/null
+++ b/include/cosmology_calculator.hh
@@ -0,0 +1,248 @@
+#pragma once
+
+#include <array>
+
+#include <cosmology_parameters.hh>
+#include <transfer_function_plugin.hh>
+#include <logger.hh>
+
+#include <gsl/gsl_integration.h>
+#include <gsl/gsl_errno.h>
+
+/*!
+ * @class CosmologyCalculator
+ * @brief provides functions to compute cosmological quantities
+ *
+ * This class provides member functions to compute cosmological quantities
+ * related to the Friedmann equations and linear perturbation theory
+ */
+class CosmologyCalculator
+{
+  private:
+    static constexpr double REL_PRECISION = 1e-5;
+
+    real_t integrate(double (*func)(double x, void *params), double a, double b, void *params)
+    {
+        gsl_function F;
+        F.function = func;
+        F.params = params;
+
+        double result;
+        double error;
+
+        gsl_set_error_handler_off();
+        gsl_integration_workspace *w = gsl_integration_workspace_alloc(100000);
+        gsl_integration_qag(&F, a, b, 0, REL_PRECISION, 100000, 6, w, &result, &error);
+
+        gsl_integration_workspace_free(w);
+
+        gsl_set_error_handler(NULL);
+
+        if (error / result > REL_PRECISION)
+            csoca::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl;
+
+        return (real_t)result;
+    }
+
+  public:
+    //! data structure to store cosmological parameters
+    CosmologyParameters cosmo_param_;
+
+    //! pointer to an instance of a transfer function plugin
+    TransferFunction_plugin *ptransfer_fun_;
+
+    //! constructor for a cosmology calculator object
+    /*!
+	 * @param acosmo a cosmological parameters structure
+	 * @param pTransferFunction pointer to an instance of a transfer function object
+	 */
+
+    CosmologyCalculator(const CosmologyParameters& cp, TransferFunction_plugin *ptransfer_fun)
+    {
+        cosmo_param_ = cp;
+        ptransfer_fun_ = ptransfer_fun;
+        cosmo_param_.pnorm = this->ComputePNorm();
+        cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm);
+    }
+
+    CosmologyCalculator(ConfigFile &cf, TransferFunction_plugin *ptransfer_fun)
+    : cosmo_param_(cf), ptransfer_fun_(ptransfer_fun)
+    {
+        cosmo_param_.pnorm = this->ComputePNorm();
+        cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm);
+    }
+
+    const CosmologyParameters& GetParams( void ) const{
+        return cosmo_param_;
+    }
+
+    //! returns the amplitude of amplitude of the power spectrum
+    /*!
+	 * @param k the wave number in h/Mpc
+	 * @param a the expansion factor of the universe
+	 * @returns power spectrum amplitude for wave number k at time a
+	 */
+    inline real_t Power(real_t k, real_t a)
+    {
+        real_t Dplus = CalcGrowthFactor(a);
+        real_t DplusOne = CalcGrowthFactor(1.0);
+        real_t pNorm = ComputePNorm();
+        Dplus /= DplusOne;
+        DplusOne = 1.0;
+        real_t scale = Dplus / DplusOne;
+        return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect);
+    }
+
+    inline static double H_of_a(double a, void *Params)
+    {
+        CosmologyParameters *cosm = (CosmologyParameters *)Params;
+        double a2 = a * a;
+        double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a));
+        return Ha;
+    }
+
+    inline static double Hprime_of_a(double a, void *Params)
+    {
+        CosmologyParameters *cosm = (CosmologyParameters *)Params;
+        double a2 = a * a;
+        double H = H_of_a(a, Params);
+        double Hprime = 1 / (a * H) * (-1.5 * cosm->Omega_m / (a2 * a) - cosm->Omega_k / a2 - 1.5 * cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a) * (1. + cosm->w_0 + (1. - a) * cosm->w_a));
+        return Hprime;
+    }
+
+    //! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+
+    inline static double GrowthIntegrand(double a, void *Params)
+    {
+        double Ha = a * H_of_a(a, Params);
+        return 2.5 / (Ha * Ha * Ha);
+    }
+
+    //! Computes the linear theory growth factor D+
+    /*! Function integrates over member function GrowthIntegrand and computes
+    *                      /a
+    *   D+(a) = 5/2 H(a) * |  [a'^3 * H(a')^3]^(-1) da'
+    *                      /0
+    */
+    real_t CalcGrowthFactor(real_t a)
+    {
+        real_t integral = integrate(&GrowthIntegrand, 0.0, a, (void *)&cosmo_param_);
+        return H_of_a(a, (void *)&cosmo_param_) * integral;
+    }
+
+    //! Compute the factor relating particle displacement and velocity
+    /*! Function computes
+    *
+    *  vfac = a^2 * H(a) * dlogD+ / d log a = a^2 * H'(a) + 5/2 * [ a * D+(a) * H(a) ]^(-1)
+    *
+    */
+    real_t CalcVFact(real_t a)
+    {
+        real_t Dp = CalcGrowthFactor(a);
+        real_t H = H_of_a(a, (void *)&cosmo_param_);
+        real_t Hp = Hprime_of_a(a, (void *)&cosmo_param_);
+        real_t a2 = a * a;
+
+        return (a2 * Hp + 2.5 / (a * Dp * H)) * 100.0;
+    }
+
+    //! Integrand for the sigma_8 normalization of the power spectrum
+    /*! Returns the value of the primordial power spectrum multiplied with 
+     the transfer function and the window function of 8 Mpc/h at wave number k */
+    static double dSigma8(double k, void *pParams)
+    {
+        if (k <= 0.0)
+            return 0.0f;
+
+        std::array<void *, 2> *Params = (std::array<void *, 2> *)pParams;
+        TransferFunction_plugin *ptf = (TransferFunction_plugin *)(*Params)[0];
+        CosmologyParameters *pcosmo = (CosmologyParameters *)(*Params)[1];
+
+        double x = k * 8.0;
+        double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
+        static double nspect = (double)pcosmo->nspect;
+
+        double tf = ptf->compute(k, total);
+
+        //... no growth factor since we compute at z=0 and normalize so that D+(z=0)=1
+        return k * k * w * w * pow((double)k, (double)nspect) * tf * tf;
+    }
+
+    //! Integrand for the sigma_8 normalization of the power spectrum
+    /*! Returns the value of the primordial power spectrum multiplied with 
+	 the transfer function and the window function of 8 Mpc/h at wave number k */
+    static double dSigma8_0(double k, void *pParams)
+    {
+        if (k <= 0.0)
+            return 0.0f;
+
+        std::array<void *, 2> *Params = (std::array<void *, 2> *)pParams;
+        TransferFunction_plugin *ptf = (TransferFunction_plugin *)(*Params)[0];
+        CosmologyParameters *pcosmo = (CosmologyParameters*)(*Params)[1];
+
+        double x = k * 8.0;
+        double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
+        static double nspect = (double)pcosmo->nspect;
+
+        double tf = ptf->compute(k, total0);
+
+        //... no growth factor since we compute at z=0 and normalize so that D+(z=0)=1
+        return k * k * w * w * pow((double)k, (double)nspect) * tf * tf;
+    }
+
+    //! Computes the square of the transfer function
+    /*! Function evaluates the supplied transfer function ptransfer_fun_
+	 * and returns the square of its value at wave number k
+	 * @param k wave number at which to evaluate the transfer function
+	 */
+    inline real_t TransferSq(real_t k) const
+    {
+        //.. parameter supplied transfer function
+        real_t tf1 = ptransfer_fun_->compute(k, total);
+        return tf1 * tf1;
+    }
+
+    //! Computes the amplitude of a mode from the power spectrum
+    /*! Function evaluates the supplied transfer function ptransfer_fun_
+	 * and returns the amplitude of fluctuations at wave number k at z=0
+	 * @param k wave number at which to evaluate
+	 */
+    inline real_t GetAmplitude(real_t k, tf_type type) const
+    {
+        return std::pow(k, 0.5 * cosmo_param_.nspect) * ptransfer_fun_->compute(k, type) * cosmo_param_.sqrtpnorm;
+    }
+
+    //! Computes the normalization for the power spectrum
+    /*!
+	 * integrates the power spectrum to fix the normalization to that given
+	 * by the sigma_8 parameter
+	 */
+    real_t ComputePNorm(void)
+    {
+        real_t sigma0, kmin, kmax;
+        kmax = ptransfer_fun_->get_kmax(); //cosmo_param_.H0/8.0;
+        kmin = ptransfer_fun_->get_kmin(); //0.0;
+
+        std::array<void *, 2> Params = {(void *)ptransfer_fun_,
+                                        (void *)&cosmo_param_ };
+
+        if (!ptransfer_fun_->tf_has_total0())
+            sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, (void *)&Params);//ptransfer_fun_);
+        else
+            sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, (void *)&Params);//ptransfer_fun_);
+
+        return cosmo_param_.sigma8 * cosmo_param_.sigma8 / sigma0;
+    }
+};
+
+//! compute the jeans sound speed
+/*! given a density in g/cm^-3 and a mass in g it gives back the sound
+ *  speed in cm/s for which the input mass is equal to the jeans mass
+ *  @param rho density 
+ *  @param mass mass scale
+ *  @returns jeans sound speed
+ */
+inline double jeans_sound_speed(double rho, double mass)
+{
+    const double G = 6.67e-8;
+    return pow(6.0 * mass / M_PI * sqrt(rho) * pow(G, 1.5), 1.0 / 3.0);
+}
\ No newline at end of file
diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh
new file mode 100644
index 0000000..754a729
--- /dev/null
+++ b/include/cosmology_parameters.hh
@@ -0,0 +1,49 @@
+#pragma once
+
+#include <config_file.hh>
+
+//! structure for cosmological parameters
+struct CosmologyParameters
+{
+    double
+        Omega_m,  //!< baryon+dark matter density
+        Omega_b,  //!< baryon matter density
+        Omega_DE, //!< dark energy density (cosmological constant or parameterised)
+        Omega_r,  //!< photon + relativistic particle density
+        Omega_k,  //!< curvature density
+        H0,       //!< Hubble constant in km/s/Mpc
+        nspect,   //!< long-wave spectral index (scale free is nspect=1)
+        sigma8,   //!< power spectrum normalization
+        w_0,      //!< dark energy equation of state parameter 1: w = w0 + a * wa
+        w_a,      //!< dark energy equation of state parameter 2: w = w0 + a * wa
+
+        // below are helpers to store additional information
+        dplus, //!< linear perturbation growth factor
+        pnorm, //!< actual power spectrum normalisation factor
+        sqrtpnorm, //!< sqrt of power spectrum normalisation factor
+        vfact; //!< velocity<->displacement conversion factor in Zel'dovich approx.
+
+    CosmologyParameters(ConfigFile cf)
+    {
+        Omega_b = cf.GetValue<double>("cosmology", "Omega_b");
+        Omega_m = cf.GetValue<double>("cosmology", "Omega_m");
+        Omega_DE = cf.GetValue<double>("cosmology", "Omega_L");
+        w_0 = cf.GetValueSafe<double>("cosmology", "w0", -1.0);
+        w_a = cf.GetValueSafe<double>("cosmology", "wa", 0.0);
+
+        Omega_r = cf.GetValueSafe<double>("cosmology", "Omega_r", 0.0); // no longer default to nonzero (8.3e-5)
+        Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r;
+
+        H0 = cf.GetValue<double>("cosmology", "H0");
+        sigma8 = cf.GetValue<double>("cosmology", "sigma_8");
+        nspect = cf.GetValue<double>("cosmology", "nspec");
+
+        dplus = 0.0;
+        pnorm = 0.0;
+        vfact = 0.0;
+    }
+
+    CosmologyParameters(void)
+    {
+    }
+};
\ No newline at end of file
diff --git a/include/general.hh b/include/general.hh
new file mode 100644
index 0000000..46e99b7
--- /dev/null
+++ b/include/general.hh
@@ -0,0 +1,69 @@
+#pragma once
+
+#include <logger.hh>
+
+#include <complex>
+
+#if defined(USE_MPI)
+#include <mpi.h>
+  #include <fftw3-mpi.h>
+#else
+  #include <fftw3.h>
+#endif
+
+#ifdef USE_SINGLEPRECISION
+using real_t = float;
+using complex_t = fftwf_complex;
+#define FFTW_PREFIX fftwf
+#else
+using real_t = double;
+using complex_t = fftw_complex;
+#define FFTW_PREFIX fftw
+#endif
+
+using ccomplex_t = std::complex<real_t>;
+
+#define FFTW_GEN_NAME_PRIM(a, b) a##_##b
+#define FFTW_GEN_NAME(a, b) FFTW_GEN_NAME_PRIM(a, b)
+#define FFTW_API(x) FFTW_GEN_NAME(FFTW_PREFIX, x)
+
+using fftw_plan_t = FFTW_GEN_NAME(FFTW_PREFIX, plan);
+
+#if defined(FFTW_MODE_PATIENT)
+#define FFTW_RUNMODE FFTW_PATIENT
+#elif defined(FFTW_MODE_MEASURE)
+#define FFTW_RUNMODE FFTW_MEASURE
+#else
+#define FFTW_RUNMODE FFTW_ESTIMATE
+#endif
+
+#if defined(USE_MPI)
+inline double get_wtime()
+{
+    return MPI_Wtime();
+}
+#else
+  #if defined(_OPENMP)
+    #include <omp.h>
+    inline double get_wtime()
+    {
+      return omp_get_wtime();
+    }
+  #else
+    #include <ctime>
+    inline double get_wtime()
+    {
+      return std::clock() / double(CLOCKS_PER_SEC);
+    }
+  #endif
+#endif
+
+namespace CONFIG
+{
+extern int MPI_thread_support;
+extern int MPI_task_rank;
+extern int MPI_task_size;
+extern bool MPI_ok;
+extern bool MPI_threads_ok;
+extern bool FFTW_threads_ok;
+}; // namespace CONFIG
diff --git a/include/grid_fft.hh b/include/grid_fft.hh
new file mode 100644
index 0000000..afbc023
--- /dev/null
+++ b/include/grid_fft.hh
@@ -0,0 +1,394 @@
+#pragma once 
+
+#include <cmath>
+#include <array>
+#include <vector>
+
+#include <vec3.hh>
+#include <general.hh>
+#include <bounding_box.hh>
+
+enum space_t
+{
+    kspace_id,
+    rspace_id
+};
+
+template <typename data_t>
+class Grid_FFT
+{
+  protected:
+#if defined(USE_MPI)
+  const MPI_Datatype MPI_data_t_type = (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
+    : (typeid(data_t) == typeid(float)) ? MPI_FLOAT
+    : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
+    : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX : MPI_INT;
+#endif
+public:
+  std::array<size_t,3> n_, nhalf_;
+  std::array<size_t,4> sizes_;
+  size_t npr_, npc_;
+  size_t ntot_;
+  std::array<real_t,3> length_, kfac_, dx_;
+
+  space_t space_;
+  data_t *data_;
+  ccomplex_t *cdata_;
+
+  bounding_box<size_t> global_range_;
+
+  fftw_plan_t plan_, iplan_;
+
+  real_t fft_norm_fac_;
+
+  ptrdiff_t local_0_start_, local_1_start_;
+  ptrdiff_t local_0_size_, local_1_size_;
+
+  
+
+  Grid_FFT(const std::array<size_t, 3> &N, const std::array<real_t, 3> &L, space_t initialspace = rspace_id)
+      : n_(N), length_(L), space_(initialspace), data_(nullptr), cdata_(nullptr) //, RV_(*this), KV_(*this)
+  {
+    for(int i=0; i<3; ++i){
+      kfac_[i] = 2.0 * M_PI / length_[i];
+      dx_[i]   = length_[i] / n_[i];
+    }
+    //invalidated = true;
+    this->Setup();
+  }
+
+  Grid_FFT(const Grid_FFT<data_t> &g)
+      : n_(g.n_), length_(g.length_), space_(g.space_), data_(nullptr), cdata_(nullptr)
+  {
+      for (int i = 0; i < 3; ++i)
+      {
+          kfac_[i] = g.kfac_[i];
+          dx_[i] = g.dx_[i];
+      }
+      //invalidated = true;
+      this->Setup();
+
+      for (size_t i = 0; i < ntot_; ++i ){
+          data_[i] = g.data_[i];
+      }
+  }
+
+  ~Grid_FFT()
+  {
+      if( data_!=nullptr){
+          fftw_free( data_ );
+      }
+  }
+
+  void Setup();
+
+  size_t size( size_t i ) const{ return sizes_[i]; }
+
+  void zero() {
+      for( size_t i=0; i<ntot_; ++i ) data_[i] = 0.0;
+  }
+
+  data_t &relem(size_t i, size_t j, size_t k)
+  {
+      size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
+      return data_[idx];
+  }
+
+  const data_t &relem(size_t i, size_t j, size_t k) const
+  {
+      size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
+      return data_[idx];
+  }
+
+  ccomplex_t &kelem(size_t i, size_t j, size_t k)
+  {
+      size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
+      return cdata_[idx];
+  }
+
+  const ccomplex_t &kelem(size_t i, size_t j, size_t k) const 
+  {
+      size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
+      return cdata_[idx];
+  }
+
+  ccomplex_t &kelem(size_t idx) { return cdata_[idx]; }
+  const ccomplex_t &kelem(size_t idx) const { return cdata_[idx]; }
+  data_t &relem(size_t idx) { return data_[idx]; }
+  const data_t &relem(size_t idx) const { return data_[idx]; }
+
+  size_t get_idx( size_t i, size_t j, size_t k ) const
+  {
+      return (i * sizes_[1] + j) * sizes_[3] + k;
+  }
+
+  template <typename ft>
+  vec3<ft> get_r(const size_t& i, const size_t& j, const size_t& k) const
+  {
+      vec3<ft> rr;
+
+#if defined(USE_MPI)
+      rr[0] = real_t(i + local_0_start_) * dx_[0];
+#else
+      rr[0] = real_t(i) * dx_[0];
+#endif
+      rr[1] = real_t(j) * dx_[1];
+      rr[2] = real_t(k) * dx_[2];
+
+      return rr;
+  }
+
+  template <typename ft>
+  vec3<ft> get_k(const size_t &i, const size_t &j, const size_t &k) const
+  {
+      vec3<ft> kk;
+
+#if defined(USE_MPI)
+      auto ip = i+local_1_start_;
+      kk[0] = (real_t(j) - real_t(j > nhalf_[0])*n_[0]) * kfac_[0];
+      kk[1] = (real_t(ip) - real_t(ip > nhalf_[1])*n_[1]) * kfac_[1];
+#else
+      kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
+      kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
+#endif
+      kk[2] = (real_t(k) - real_t(k>nhalf_[2])*n_[2])* kfac_[2];
+
+      return kk;
+  }
+
+  template <typename functional>
+  void apply_function_k(const functional &f)
+  {
+    #pragma omp parallel for
+      for (size_t i = 0; i < sizes_[0]; ++i)
+      {
+          for (size_t j = 0; j < sizes_[1]; ++j)
+          {
+              for (size_t k = 0; k < sizes_[2]; ++k)
+              {
+                  auto &elem = this->kelem(i, j, k);
+                  elem = f(elem);
+              }
+          }
+      }
+  }
+
+  template <typename functional>
+  void apply_function_r(const functional &f)
+  {
+    #pragma omp parallel for
+      for (size_t i = 0; i < sizes_[0]; ++i)
+      {
+          for (size_t j = 0; j < sizes_[1]; ++j)
+          {
+              for (size_t k = 0; k < sizes_[2]; ++k)
+              {
+                  auto &elem = this->relem(i, j, k);
+                  elem = f(elem);
+              }
+          }
+      }
+  }
+
+  double compute_2norm(void)
+  {
+      real_t sum1{0.0};
+#pragma omp parallel for reduction(+ : sum1)
+      for (size_t i = 0; i < sizes_[0]; ++i)
+      {
+          for (size_t j = 0; j < sizes_[1]; ++j)
+          {
+              for (size_t k = 0; k < sizes_[2]; ++k)
+              {
+                  const auto re = std::real(this->relem(i, j, k));
+                  const auto im = std::imag(this->relem(i, j, k));
+                  sum1 += re*re+im*im;
+              }
+          }
+      }
+
+      sum1 /= sizes_[0] * sizes_[1] * sizes_[2];
+
+      return sum1;
+  }
+
+  double std( void )
+  {
+    real_t sum1{0.0}, sum2{0.0};
+    #pragma omp parallel for reduction(+:sum1,sum2)
+      for (size_t i = 0; i < sizes_[0]; ++i)
+      {
+          for (size_t j = 0; j < sizes_[1]; ++j)
+          {
+              for (size_t k = 0; k < sizes_[2]; ++k)
+              {
+                  const auto elem = std::real(this->relem(i, j, k));
+                  sum1 += elem;
+                  sum2 += elem*elem;
+              }
+          }
+      }
+
+    sum1 /= sizes_[0] * sizes_[1] * sizes_[2];
+    sum2 /= sizes_[0] * sizes_[1] * sizes_[2];
+
+    return std::sqrt( sum2 - sum1*sum1 );
+  }
+  double mean(void)
+  {
+      real_t sum1{0.0};
+#pragma omp parallel for reduction(+ : sum1)
+      for (size_t i = 0; i < sizes_[0]; ++i)
+      {
+          for (size_t j = 0; j < sizes_[1]; ++j)
+          {
+              for (size_t k = 0; k < sizes_[2]; ++k)
+              {
+                  const auto elem = std::real(this->relem(i, j, k));
+                  sum1 += elem;
+              }
+          }
+      }
+
+      sum1 /= sizes_[0] * sizes_[1] * sizes_[2];
+
+      return sum1;
+  }
+
+  template <typename functional, typename grid_t>
+  void assign_function_of_grids_r(const functional &f, const grid_t &g)
+  {
+    assert(g.size(0) == size(0) && g.size(1) == size(1));// && g.size(2) == size(2) );
+
+    #pragma omp parallel for
+    for (size_t i = 0; i < sizes_[0]; ++i)
+    {
+        for (size_t j = 0; j < sizes_[1]; ++j)
+        {
+            for (size_t k = 0; k < sizes_[2]; ++k)
+            {
+                auto &elem = this->relem(i, j, k);
+                const auto &elemg = g.relem(i, j, k);
+
+                elem = f(elemg);
+            }
+        }
+    }
+  }
+
+  
+
+  template <typename functional, typename grid1_t, typename grid2_t>
+  void assign_function_of_grids_r(const functional &f, const grid1_t &g1, const grid2_t &g2)
+  {
+      assert(g1.size(0) == size(0) && g1.size(1) == size(1));// && g1.size(2) == size(2));
+      assert(g2.size(0) == size(0) && g2.size(1) == size(1));// && g2.size(2) == size(2));
+
+#pragma omp parallel for
+      for (size_t i = 0; i < sizes_[0]; ++i)
+      {
+          for (size_t j = 0; j < sizes_[1]; ++j)
+          {
+              for (size_t k = 0; k < sizes_[2]; ++k)
+              {
+                  //auto idx = this->get_idx(i,j,k);
+                  auto &elem = this->relem(i,j,k);
+
+                  const auto &elemg1 = g1.relem(i,j,k);
+                  const auto &elemg2 = g2.relem(i,j,k);
+
+                  elem = f(elemg1,elemg2);
+              }
+          }
+      }
+  }
+
+  template <typename functional, typename grid1_t, typename grid2_t, typename grid3_t>
+  void assign_function_of_grids_r(const functional &f, const grid1_t &g1, const grid2_t &g2, const grid3_t &g3)
+  {
+      assert(g1.size(0) == size(0) && g1.size(1) == size(1));// && g1.size(2) == size(2));
+      assert(g2.size(0) == size(0) && g2.size(1) == size(1));// && g2.size(2) == size(2));
+      assert(g3.size(0) == size(0) && g3.size(1) == size(1));// && g3.size(2) == size(2));
+
+#pragma omp parallel for
+      for (size_t i = 0; i < sizes_[0]; ++i)
+      {
+          for (size_t j = 0; j < sizes_[1]; ++j)
+          {
+              for (size_t k = 0; k < sizes_[2]; ++k)
+              {
+                  //auto idx = this->get_idx(i,j,k);
+                  auto &elem = this->relem(i, j, k);
+
+                  const auto &elemg1 = g1.relem(i, j, k);
+                  const auto &elemg2 = g2.relem(i, j, k);
+                  const auto &elemg3 = g3.relem(i, j, k);
+
+                  elem = f(elemg1, elemg2, elemg3);
+              }
+          }
+      }
+  }
+
+  template< typename functional >
+  void apply_function_k_dep( const functional& f )
+  {
+      #pragma omp parallel for
+      for( size_t i = 0; i < sizes_[0]; ++i )
+      {
+          for (size_t j = 0; j < sizes_[1]; ++j)
+          {
+              for (size_t k = 0; k < sizes_[2]; ++k)
+              {
+                auto& elem = this->kelem(i,j,k);
+                elem = f(elem,this->get_k<real_t>(i,j,k));
+              }
+          }
+      }
+  }
+
+  template <typename functional>
+  void apply_function_r_dep( const functional& f)
+  {
+      #pragma omp parallel for
+      for (size_t i = 0; i < sizes_[0]; ++i)
+      {
+          for (size_t j = 0; j < sizes_[1]; ++j)
+          {
+              for (size_t k = 0; k < sizes_[2]; ++k)
+              {
+                  auto &elem = this->relem(i, j, k);
+                  elem = f(elem, this->get_r<real_t>(i, j, k));
+              }
+          }
+      }
+  }
+
+  void FourierTransformBackward(bool do_transform = true);
+
+  void FourierTransformForward(bool do_transform = true);
+
+  void ApplyNorm(void);
+
+  void FillRandomReal( unsigned long int seed = 123456ul );
+
+  void Write_to_HDF5(std::string fname, std::string datasetname);
+
+  void ComputePowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, int nbins);
+
+  void Compute_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3);
+
+  void zero_DC_mode(void)
+  {
+#ifdef USE_MPI
+      if (CONFIG::MPI_task_rank == 0)
+#endif
+        cdata_[0] = (data_t)0.0;
+  }
+};
+
+template <typename data_t>
+void unpad(const Grid_FFT<data_t> &fp, Grid_FFT<data_t> &f);
+
+template <typename data_t>
+void pad_insert(const Grid_FFT<data_t> &f, Grid_FFT<data_t> &fp);
\ No newline at end of file
diff --git a/include/logger.hh b/include/logger.hh
new file mode 100644
index 0000000..41fc287
--- /dev/null
+++ b/include/logger.hh
@@ -0,0 +1,135 @@
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <cstdarg>
+#include <fstream>
+#include <iostream>
+
+namespace csoca {
+
+enum LogLevel : int {
+  Off     = 0,
+  Fatal   = 1,
+  Error   = 2,
+  Warning = 3,
+  Info    = 4,
+  Debug   = 5
+};
+
+class Logger {
+private:
+  static LogLevel log_level_;
+  static std::ofstream output_file_;
+
+public:
+  Logger()  = default;
+  ~Logger() = default;
+
+  static void SetLevel(const LogLevel &level);
+  static LogLevel GetLevel();
+
+  static void SetOutput(const std::string filename);
+  static void UnsetOutput();
+
+  static std::ofstream &GetOutput();
+
+  template <typename T> Logger &operator<<(const T &item) {
+    std::cout << item;
+    if (output_file_.is_open()) {
+      output_file_ << item;
+    }
+    return *this;
+  }
+
+  Logger &operator<<(std::ostream &(*fp)(std::ostream &)) {
+    std::cout << fp;
+    if (output_file_.is_open()) {
+      output_file_ << fp;
+    }
+    return *this;
+  }
+};
+
+class LogStream {
+private:
+  Logger &logger_;
+  LogLevel stream_level_;
+  std::string line_prefix_, line_postfix_;
+
+  bool newline;
+
+public:
+  LogStream(Logger &logger, const LogLevel &level)
+    : logger_(logger), stream_level_(level), newline(true) {
+    switch (stream_level_) {
+      case LogLevel::Fatal:
+        line_prefix_ = "\033[31mFatal : ";
+        break;
+      case LogLevel::Error:
+        line_prefix_ = "\033[31mError : ";
+        break;
+      case LogLevel::Warning:
+        line_prefix_ = "\033[33mWarning : ";
+        break;
+      case LogLevel::Info:
+        //line_prefix_ = " | Info    | ";
+        line_prefix_ = " \033[0m";
+        break;
+      case LogLevel::Debug:
+        line_prefix_ = "Debug : \033[0m";
+        break;
+      default:
+        line_prefix_ = "\033[0m";
+        break;
+    }
+    line_postfix_ = "\033[0m";
+  }
+  ~LogStream() = default;
+
+  inline std::string GetPrefix() const {
+    return line_prefix_;
+  }
+
+  template <typename T> LogStream &operator<<(const T &item) {
+    if (Logger::GetLevel() >= stream_level_) {
+      if (newline) {
+        logger_ << line_prefix_;
+        newline = false;
+      }
+      logger_ << item;
+    }
+    return *this;
+  }
+
+  LogStream &operator<<(std::ostream &(*fp)(std::ostream &)) {
+    if (Logger::GetLevel() >= stream_level_) {
+      logger_ << fp;
+      logger_ << line_postfix_;
+      newline = true;
+    }
+    return *this;
+  }
+
+  inline void Print(const char *str, ...) {
+    char out[1024];
+    va_list argptr;
+    va_start(argptr, str);
+    vsprintf(out, str, argptr);
+    va_end(argptr);
+    std::string out_string = std::string(out);
+    out_string.erase(std::remove(out_string.begin(), out_string.end(), '\n'),
+                     out_string.end());
+    (*this) << out_string << std::endl;
+  }
+};
+
+// global instantiations for different levels
+extern Logger glogger;
+extern LogStream flog;
+extern LogStream elog;
+extern LogStream wlog;
+extern LogStream ilog;
+extern LogStream dlog;
+
+} // namespace csoca
diff --git a/include/random_plugin.hh b/include/random_plugin.hh
new file mode 100644
index 0000000..4bc8f6d
--- /dev/null
+++ b/include/random_plugin.hh
@@ -0,0 +1,92 @@
+#pragma once
+
+#include <map>
+#include <config_file.hh>
+
+#define DEF_RAN_CUBE_SIZE 32
+
+class RNG_plugin
+{
+  protected:
+    ConfigFile *pcf_; //!< pointer to config_file from which to read parameters
+  public:
+    explicit RNG_plugin(ConfigFile &cf)
+        : pcf_(&cf)
+    {
+    }
+    virtual ~RNG_plugin() {}
+    virtual bool isMultiscale() const = 0;
+    //virtual void FillGrid(int level, DensityGrid<real_t> &R) = 0;
+};
+
+struct RNG_plugin_creator
+{
+    virtual RNG_plugin *Create(ConfigFile &cf) const = 0;
+    virtual ~RNG_plugin_creator() {}
+};
+
+std::map<std::string, RNG_plugin_creator *> & get_RNG_plugin_map();
+
+void print_RNG_plugins(void);
+
+template <class Derived>
+struct RNG_plugin_creator_concrete : public RNG_plugin_creator
+{
+    //! register the plugin by its name
+    RNG_plugin_creator_concrete(const std::string &plugin_name)
+    {
+        get_RNG_plugin_map()[plugin_name] = this;
+    }
+
+    //! create an instance of the plugin
+    RNG_plugin *Create(ConfigFile &cf) const
+    {
+        return new Derived(cf);
+    }
+};
+
+typedef RNG_plugin RNG_instance;
+RNG_plugin *select_RNG_plugin( ConfigFile &cf);
+
+// /*!
+//  * @brief encapsulates all things for multi-scale white noise generation
+//  */
+// template <typename T>
+// class random_number_generator
+// {
+//   protected:
+//     ConfigFile *pcf_;
+//     //const refinement_hierarchy * prefh_;
+//     RNG_plugin *generator_;
+//     int levelmin_, levelmax_;
+
+//   public:
+//     //! constructor
+//     random_number_generator( ConfigFile &cf )
+//         : pcf_(&cf) //, prefh_( &refh )
+//     {
+//         levelmin_ = pcf_->GetValue<int>("setup", "levelmin");
+//         levelmax_ = pcf_->GetValue<int>("setup", "levelmax");
+//         generator_ = select_RNG_plugin(cf);
+//     }
+
+//     //! destructor
+//     ~random_number_generator()
+//     {
+//     }
+
+//     //! initialize_for_grid_structure
+//     /*void initialize_for_grid_structure(const refinement_hierarchy &refh)
+//     {
+//         generator_->initialize_for_grid_structure(refh);
+//     }*/
+
+//     //! load random numbers to a new array
+//     template <typename array>
+//     void load(array &A, int ilevel)
+//     {
+//         generator_->FillGrid(ilevel, A);
+//     }
+// };
+
+// typedef random_number_generator<real_t> noise_generator;
\ No newline at end of file
diff --git a/include/transfer_function_plugin.hh b/include/transfer_function_plugin.hh
new file mode 100644
index 0000000..302fafe
--- /dev/null
+++ b/include/transfer_function_plugin.hh
@@ -0,0 +1,114 @@
+#pragma once
+
+#include <map>
+#include <string>
+#include <general.hh>
+#include <config_file.hh>
+
+enum tf_type
+{
+    total,
+    cdm,
+    baryon,
+    vtotal,
+    vcdm,
+    vbaryon,
+    total0
+};
+
+class TransferFunction_plugin
+{
+  public:
+    // Cosmology cosmo_;    //!< cosmological parameter, read from config_file
+    ConfigFile *pcf_;   //!< pointer to config_file from which to read parameters
+    bool tf_distinct_;   //!< bool if density transfer function is distinct for baryons and DM
+    bool tf_withvel_;    //!< bool if also have velocity transfer functions
+    bool tf_withtotal0_; //!< have the z=0 spectrum for normalisation purposes
+    bool tf_velunits_;   //!< velocities are in velocity units (km/s)
+  public:
+    //! constructor
+    TransferFunction_plugin(ConfigFile &cf)
+        : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false)
+    {
+        // real_t zstart;
+        // zstart = pcf_->getValue<real_t>("setup", "zstart");
+        // cosmo_.astart = 1.0 / (1.0 + zstart);
+        // cosmo_.Omega_b = pcf_->getValue<real_t>("cosmology", "Omega_b");
+        // cosmo_.Omega_m = pcf_->getValue<real_t>("cosmology", "Omega_m");
+        // cosmo_.Omega_DE = pcf_->getValue<real_t>("cosmology", "Omega_L");
+        // cosmo_.H0 = pcf_->getValue<real_t>("cosmology", "H0");
+        // cosmo_.sigma8 = pcf_->getValue<real_t>("cosmology", "sigma_8");
+        // cosmo_.nspect = pcf_->getValue<real_t>("cosmology", "nspec");
+    }
+
+    //! destructor
+    virtual ~TransferFunction_plugin(){};
+
+    //! compute value of transfer function at waven umber
+    virtual double compute(double k, tf_type type) = 0;
+
+    //! return maximum wave number allowed
+    virtual double get_kmax(void) = 0;
+
+    //! return minimum wave number allowed
+    virtual double get_kmin(void) = 0;
+
+    //! return if density transfer function is distinct for baryons and DM
+    bool tf_is_distinct(void)
+    {
+        return tf_distinct_;
+    }
+
+    //! return if we also have velocity transfer functions
+    bool tf_has_velocities(void)
+    {
+        return tf_withvel_;
+    }
+
+    //! return if we also have a z=0 transfer function for normalisation
+    bool tf_has_total0(void)
+    {
+        return tf_withtotal0_;
+    }
+
+    //! return if velocity returned is in velocity or in displacement units
+    bool tf_velocity_units(void)
+    {
+        return tf_velunits_;
+    }
+};
+
+//! Implements abstract factory design pattern for transfer function plug-ins
+struct TransferFunction_plugin_creator
+{
+    //! create an instance of a transfer function plug-in
+    virtual TransferFunction_plugin *create(ConfigFile &cf) const = 0;
+
+    //! destroy an instance of a plug-in
+    virtual ~TransferFunction_plugin_creator() {}
+};
+
+//! Write names of registered transfer function plug-ins to stdout
+std::map<std::string, TransferFunction_plugin_creator *> &get_TransferFunction_plugin_map();
+void print_TransferFunction_plugins(void);
+
+//! Concrete factory pattern for transfer function plug-ins
+template <class Derived>
+struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin_creator
+{
+    //! register the plug-in by its name
+    TransferFunction_plugin_creator_concrete(const std::string &plugin_name)
+    {
+        get_TransferFunction_plugin_map()[plugin_name] = this;
+    }
+
+    //! create an instance of the plug-in
+    TransferFunction_plugin *create(ConfigFile &cf) const
+    {
+        return new Derived(cf);
+    }
+};
+
+// typedef TransferFunction_plugin TransferFunction;
+
+TransferFunction_plugin *select_TransferFunction_plugin(ConfigFile &cf);
diff --git a/include/vec3.hh b/include/vec3.hh
new file mode 100644
index 0000000..1af7a50
--- /dev/null
+++ b/include/vec3.hh
@@ -0,0 +1,29 @@
+#pragma once
+
+template< typename T >
+class vec3{
+private:
+    std::array<T,3> data_;
+
+public:    
+    T &operator[](size_t i){ return data_[i];}
+    
+    const T &operator[](size_t i) const { return data_[i]; }
+    
+    T dot(const vec3<T> &a) const 
+    {
+        return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
+    }
+    
+    T norm_squared(void) const
+    {
+        return this->dot(*this);
+    }
+
+    T norm(void) const
+    {
+        return std::sqrt( this->norm_squared() );
+    }
+
+    
+};
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
new file mode 100644
index 0000000..6036273
--- /dev/null
+++ b/src/grid_fft.cc
@@ -0,0 +1,1001 @@
+#include <general.hh>
+#include <grid_fft.hh>
+#include <thread>
+
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+
+template <typename data_t>
+void Grid_FFT<data_t>::FillRandomReal( unsigned long int seed )
+{
+    gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937);
+    gsl_rng_set(RNG, seed);
+
+    for (size_t i = 0; i < sizes_[0]; ++i)
+    {
+        for (size_t j = 0; j < sizes_[1]; ++j)
+        {
+            for (size_t k = 0; k < sizes_[2]; ++k)
+            {
+                this->relem(i,j,k) = gsl_ran_ugaussian_ratio_method(RNG);
+            }
+        }
+    }
+
+    gsl_rng_free(RNG);
+}
+
+template <typename data_t>
+void Grid_FFT<data_t>::Setup(void)
+{
+    if (CONFIG::FFTW_threads_ok)
+        fftw_plan_with_nthreads(std::thread::hardware_concurrency());
+
+#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////////////
+
+    ntot_ = (n_[2] + 2) * n_[1] * n_[0];
+
+    
+    csoca::ilog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
+    if (typeid(data_t) == typeid(real_t))
+    {
+        data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
+        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+
+        plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE);
+        iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE);
+    }
+    else if (typeid(data_t) == typeid(ccomplex_t))
+    {
+        data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(ccomplex_t)));
+        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+
+        plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE);
+        iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE);
+    }
+    else
+    {
+        csoca::elog.Print("invalid data type in field3d<data_t>::setup_fft_interface\n");
+    }
+
+    fft_norm_fac_ = 1.f / sqrtf((float)((size_t)n_[0] * (size_t)n_[1] * (size_t)n_[2]));
+
+    if (typeid(data_t) == typeid(real_t))
+    {
+        npr_ = n_[2] + 2;
+        npc_ = n_[2] / 2 + 1;
+    }
+    else
+    {
+        npr_ = n_[2];
+        npc_ = n_[2];
+    }
+
+    nhalf_[0] = n_[0] / 2;
+    nhalf_[1] = n_[1] / 2;
+    nhalf_[2] = n_[2] / 2;
+
+    kfac_[0] = (real_t)(2.0 * M_PI) / length_[0];
+    kfac_[1] = (real_t)(2.0 * M_PI) / length_[1];
+    kfac_[2] = (real_t)(2.0 * M_PI) / length_[2];
+
+    local_0_size_ = n_[0];
+    local_1_size_ = n_[1];
+
+    if (space_ == rspace_id)
+    {
+        sizes_[0] = n_[0];
+        sizes_[1] = n_[1];
+        sizes_[2] = n_[2];
+        sizes_[3] = npr_;
+    }
+    else
+    {
+        sizes_[0] = n_[1];
+        sizes_[1] = n_[0];
+        sizes_[2] = npc_;
+        sizes_[3] = npc_;
+    }
+
+    global_range_.x1_[0] = 0;
+    global_range_.x1_[1] = 0;
+    global_range_.x1_[2] = 0;
+
+    global_range_.x2_[0] = n_[0];
+    global_range_.x2_[1] = n_[1];
+    global_range_.x2_[2] = n_[2];
+
+#else //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
+
+    size_t cmplxsz;
+
+    if (typeid(data_t) == typeid(real_t))
+    {
+        cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
+                                                         &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
+        ntot_ = 2 * cmplxsz;
+        data_ = (data_t*)fftw_malloc(ntot_ * sizeof(real_t));
+        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+        plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
+                                              MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
+        iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
+                                               MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
+    }
+    else if (typeid(data_t) == typeid(ccomplex_t))
+    {
+        cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
+                                                         &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
+        ntot_ = cmplxsz;
+        data_ = (data_t*)fftw_malloc(ntot_ * sizeof(ccomplex_t));
+        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+        plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
+                                            MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
+        iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
+                                           MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
+    }
+    else
+    {
+        csoca::elog.Print("unknown data type in field3d<data_t>::setup_fft_interface\n");
+        abort();
+    }
+
+    
+    csoca::ilog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
+
+
+    fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
+
+    if (typeid(data_t) == typeid(real_t))
+    {
+        npr_ = n_[2] + 2;
+        npc_ = n_[2] / 2 + 1;
+    }
+    else
+    {
+        npr_ = n_[2];
+        npc_ = n_[2];
+    }
+
+    nhalf_[0] = n_[0] / 2;
+    nhalf_[1] = n_[1] / 2;
+    nhalf_[2] = n_[2] / 2;
+
+    kfac_[0] = (real_t)(2.0 * M_PI) / length_[0];
+    kfac_[1] = (real_t)(2.0 * M_PI) / length_[1];
+    kfac_[2] = (real_t)(2.0 * M_PI) / length_[2];
+
+    if (space_ == rspace_id)
+    {
+        sizes_[0] = (int)local_0_size_;
+        sizes_[1] = n_[1];
+        sizes_[2] = n_[2];
+        sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension
+    }
+    else
+    {
+        sizes_[0] = (int)local_1_size_;
+        sizes_[1] = n_[0];
+        sizes_[2] = npc_;
+        sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
+    }
+
+    global_range_.x1_[0] = (int)local_0_start_;
+    global_range_.x1_[1] = 0;
+    global_range_.x1_[2] = 0;
+
+    global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_);
+    global_range_.x2_[1] = n_[1];
+    global_range_.x2_[2] = n_[2];
+
+#endif //// of #ifdef #else USE_MPI ////////////////////////////////////////////////////////////////////////////////////
+}
+
+template <typename data_t>
+void Grid_FFT<data_t>::ApplyNorm(void)
+{
+    #pragma omp parallel for
+    for (size_t i = 0; i < ntot_; ++i)
+        data_[i] *= fft_norm_fac_;
+}
+
+template <typename data_t>
+void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
+{
+
+    if (space_ != kspace_id)
+    {
+        //.............................
+        if (do_transform)
+        {
+            double wtime = get_wtime();
+
+            FFTW_API(execute)
+            (plan_);
+            this->ApplyNorm();
+
+            wtime = get_wtime() - wtime;
+            csoca::ilog.Print("[FFT] Completed field3d::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime);
+        }
+
+        sizes_[0] = local_1_size_;
+        sizes_[1] = n_[0];
+        sizes_[2] = (int)npc_;
+        sizes_[3] = npc_;
+
+        space_ = kspace_id;
+        //.............................
+    }
+}
+
+template <typename data_t>
+void Grid_FFT<data_t>::FourierTransformBackward(bool do_transform)
+{
+    if (space_ != rspace_id)
+    {
+        //.............................
+        if (do_transform)
+        {
+            double wtime = get_wtime();
+
+            FFTW_API(execute)
+            (iplan_);
+            this->ApplyNorm();
+
+            wtime = get_wtime() - wtime;
+            csoca::ilog.Print("[FFT] Completed field3d::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime);
+        }
+        sizes_[0] = local_0_size_;
+        sizes_[1] = n_[1];
+        sizes_[2] = n_[2];
+        sizes_[3] = npr_;
+
+        space_ = rspace_id;
+        //.............................
+    }
+}
+
+#define H5_USE_16_API
+#include <hdf5.h>
+
+bool file_exists(std::string Filename)
+{
+    bool flag = false;
+    std::fstream fin(Filename.c_str(), std::ios::in | std::ios::binary);
+    if (fin.is_open())
+        flag = true;
+    fin.close();
+    return flag;
+}
+
+void create_hdf5(std::string Filename)
+{
+    hid_t HDF_FileID;
+    HDF_FileID =
+        H5Fcreate(Filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+    H5Fclose(HDF_FileID);
+}
+
+template <typename data_t>
+void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
+{
+    const bool bComplexType(typeid(data_t) == typeid(std::complex<double>) ||
+                            typeid(data_t) == typeid(std::complex<float>));
+
+    std::string datasetname_real = datasetname + std::string("_real");
+    std::string datasetname_imag = datasetname + std::string("_imag");
+
+    hid_t file_id, dset_id;    /* file and dataset identifiers */
+    hid_t filespace, memspace; /* file and memory dataspace identifiers */
+    hsize_t offset[3], count[3];
+    hid_t dtype_id;
+    hid_t plist_id;
+
+#if defined(USE_MPI)
+
+    if (!file_exists(fname) && CONFIG::MPI_task_rank == 0)
+        create_hdf5(fname);
+    MPI_Barrier(MPI_COMM_WORLD);
+
+#ifndef NOMPIIO
+    plist_id = H5Pcreate(H5P_FILE_ACCESS);
+#else
+    plist_id = H5P_DEFAULT;
+#endif
+
+#else
+
+    if (!file_exists(fname))
+        create_hdf5(fname);
+
+    plist_id = H5P_DEFAULT;
+
+#endif
+
+#if defined(USE_MPI) && defined(NOMPIIO)
+    for (int itask = 0; itask < CONFIG::MPI_task_size; ++itask)
+    {
+        MPI_Barrier(MPI_COMM_WORLD);
+        if (itask != CONFIG::MPI_task_rank)
+            continue;
+
+#endif
+
+        // file_id = H5Fcreate( fname.c_str(), H5F_ACC_RDWR, H5P_DEFAULT,
+        // H5P_DEFAULT );
+        file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
+
+        for (int i = 0; i < 3; ++i)
+            count[i] = size(i);
+
+        assert(typeid(real_t) == typeid(float) || typeid(real_t) == typeid(double));
+
+        if (typeid(real_t) == typeid(float))
+            dtype_id = H5T_NATIVE_FLOAT;
+        else
+            dtype_id = H5T_NATIVE_DOUBLE;
+
+        hsize_t slice_sz = size(1) * size(2);
+
+        count[0] = size(0);
+        count[1] = size(1);
+        count[2] = size(2);
+
+        offset[1] = 0;
+        offset[2] = 0;
+
+#if defined(USE_MPI) && !defined(NOMPIIO)
+        H5Pclose(plist_id);
+        plist_id = H5Pcreate(H5P_DATASET_XFER);
+// H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
+#else
+    plist_id = H5P_DEFAULT;
+#endif
+
+        real_t *buf = new real_t[slice_sz];
+
+        //-------- write real part
+        //-----------------------------------------------------------
+
+#if defined(USE_MPI) && defined(NOMPIIO)
+        if (itask == 0)
+        {
+            filespace = H5Screate_simple(3, count, NULL);
+            dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
+                                 H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+            H5Sclose(filespace);
+        }
+        else
+        {
+            dset_id = H5Dopen1(file_id, datasetname.c_str());
+        }
+#else
+    filespace = H5Screate_simple(3, count, NULL);
+    dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
+                         H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+    H5Sclose(filespace);
+#endif
+
+        count[0] = 1;
+
+        for (size_t i = 0; i < size(0); ++i)
+        {
+            offset[0] = i;
+
+            for (size_t j = 0; j < size(1); ++j)
+                for (size_t k = 0; k < size(2); ++k){
+                    buf[j * size(2) + k] = std::real(relem(i, j, k));
+                }
+
+            memspace = H5Screate_simple(3, count, NULL);
+
+            filespace = H5Dget_space(dset_id);
+            H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL);
+            // H5Dwrite( dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, H5P_DEFAULT,
+            // m_Data );
+
+            H5Dwrite(dset_id, dtype_id, memspace, filespace, plist_id, buf);
+            H5Sclose(memspace);
+            H5Sclose(filespace);
+        }
+
+        H5Dclose(dset_id);
+
+        //-------- write imaginary part
+        //-----------------------------------------------------------
+        if( bComplexType ){
+
+            count[0] = size(0);
+
+#if defined(USE_MPI) && defined(NOMPIIO)
+            if (itask == 0)
+            {
+                filespace = H5Screate_simple(3, count, NULL);
+                dset_id = H5Dcreate2(file_id, datasetname_imag.c_str(), dtype_id,
+                                     filespace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+                H5Sclose(filespace);
+            }
+            else
+            {
+                dset_id = H5Dopen1(file_id, datasetname_imag.c_str());
+            }
+#else
+            filespace = H5Screate_simple(3, count, NULL);
+            dset_id = H5Dcreate2(file_id, datasetname_imag.c_str(), dtype_id, filespace,
+                                 H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+            H5Sclose(filespace);
+#endif
+            count[0] = 1;
+
+            for (size_t i = 0; i < size(0); ++i)
+            {
+                offset[0] = i;
+
+                for (size_t j = 0; j < size(1); ++j)
+                    for (size_t k = 0; k < size(2); ++k)
+                        buf[j * size(2) + k] = std::imag(relem(i, j, k));
+
+                memspace = H5Screate_simple(3, count, NULL);
+
+                filespace = H5Dget_space(dset_id);
+                H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL);
+                // H5Dwrite( dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, H5P_DEFAULT,
+                // m_Data );
+                H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
+                H5Sclose(memspace);
+                H5Sclose(filespace);
+            }
+
+            H5Dclose(dset_id);
+        }
+        //------------------------------------------------------------------------------------
+
+#if defined(USE_MPI) && !defined(NOMPIIO)
+        H5Pclose(plist_id);
+#endif
+
+        H5Fclose(file_id);
+
+        delete[] buf;
+
+#if defined(USE_MPI) && defined(NOMPIIO)
+    }
+#endif
+}
+
+#include <iomanip>
+
+template <typename data_t>
+void Grid_FFT<data_t>::Compute_PDF( std::string ofname, int nbins, double scale, double vmin, double vmax )
+{
+    double logvmin = std::log10(vmin);
+    double logvmax = std::log10(vmax);
+    double idv = double(nbins) / (logvmax - logvmin);
+
+    std::vector<double> count( nbins, 0.0 ), scount( nbins, 0.0 );
+
+    for( size_t ix=0; ix<size(0); ix++ )
+        for( size_t iy=0; iy<size(1); iy++ )
+            for (size_t iz = 0; iz < size(2); iz++)
+            {
+                auto v = this->relem(ix,iy,iz);
+                int ibin = int( (std::log10(std::abs(v))-logvmin)*idv );
+                if( ibin >= 0 && ibin < nbins ){
+                    count[ibin] += 1.0;
+                }
+                ibin = int(((std::log10((std::abs(v)-1.0) * scale + 1.0 )) - logvmin) * idv);
+                if (ibin >= 0 && ibin < nbins)
+                {
+                    scount[ibin] += 1.0;
+                }
+            }
+
+#if defined(USE_MPI)
+    if (CONFIG::MPI_task_rank == 0)
+    {
+#endif
+        std::ofstream ofs(ofname.c_str());
+        std::size_t numcells = size(0) * size(1) * size(2);
+
+        //ofs << "# a = " << aexp << std::endl;
+        ofs << "# " << std::setw(14) << "rho" << std::setw(16) << "d rho / dV" << std::setw(16) << "d (rho/D+) / dV"
+            << "\n";
+
+        for( int ibin=0; ibin<nbins; ++ibin ){
+            double vmean = std::pow(10.0, logvmin + (double(ibin)+0.5)/idv );
+            double dv = std::pow(10.0, logvmin + (double(ibin) + 1.0) / idv) - std::pow(10.0, logvmin + (double(ibin)) / idv);
+
+            ofs << std::setw(16) << vmean
+                << std::setw(16) << count[ibin] / dv / numcells
+                << std::setw(16) << scount[ibin] / dv / numcells
+                << std::endl;
+        }
+#if defined(USE_MPI)
+    }
+#endif
+}
+
+template <typename data_t>
+void Grid_FFT<data_t>::ComputePowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, int nbins)
+{
+    real_t kmax = std::max(std::max(kfac_[0] * nhalf_[0], kfac_[1] * nhalf_[1]),
+                           kfac_[2] * nhalf_[2]),
+           kmin = std::min(std::min(kfac_[0], kfac_[1]), kfac_[2]),
+           dklog = log10(kmax / kmin) / nbins;
+
+    std::vector<size_t> bin_count;
+
+    bin_count.assign(nbins,0);
+    bin_k.assign(nbins, 0);
+    bin_P.assign(nbins, 0);
+    bin_eP.assign(nbins, 0);
+
+    for (size_t ix = 0; ix < size(0); ix++)
+        for (size_t iy = 0; iy < size(1); iy++)
+            for (size_t iz = 0; iz < size(2); iz++)
+            {
+                vec3<double> k3 = get_k<double>(ix, iy, iz);
+                double k = k3.norm();
+                int idx2 = int((1.0f / dklog * std::log10(k / kmin)));
+                auto z = this->kelem(ix, iy, iz);
+                double vabs = z.real()*z.real()+z.imag()*z.imag();
+
+                if (k >= kmin && k < kmax)
+                {
+                    if (iz == 0)
+                    {
+                        bin_P[idx2] += vabs;
+                        bin_eP[idx2] += vabs * vabs;
+                        bin_k[idx2] += k;
+                        bin_count[idx2]++;
+                    }
+                    else
+                    {
+                        bin_P[idx2] += 2.0 * vabs;
+                        bin_eP[idx2] += 2.0 * vabs * vabs;
+                        bin_k[idx2] += 2.0 * k;
+                        bin_count[idx2] += 2;
+                    }
+                }
+            }
+
+#if defined(USE_MPI)
+    std::vector<double> tempv(nbins, 0);
+    std::vector<size_t> tempvi(nbins, 0);
+
+    MPI_Allreduce(reinterpret_cast<void *>(&bin_k[0]), reinterpret_cast<void *>(&tempv[0]),
+                  nbins, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    bin_k.swap(tempv);
+    MPI_Allreduce(reinterpret_cast<void *>(&bin_P[0]), reinterpret_cast<void *>(&tempv[0]),
+                  nbins, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    bin_P.swap(tempv);
+    MPI_Allreduce(reinterpret_cast<void *>(&bin_eP[0]), reinterpret_cast<void *>(&tempv[0]),
+                  nbins, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    bin_eP.swap(tempv);
+    MPI_Allreduce(reinterpret_cast<void *>(&bin_count[0]), reinterpret_cast<void *>(&tempvi[0]),
+                  nbins, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD);
+    bin_count.swap(tempvi);
+
+#endif
+
+    const real_t volfac(length_[0] * length_[1] * length_[2] / std::pow(2.0 * M_PI, 3.0));
+    const real_t fftfac(std::pow(double(size(0)), 3.0));
+
+    for( int i=0; i<nbins; ++i ){
+        bin_k[i]  /= bin_count[i];
+        bin_P[i]  = bin_P[i] / bin_count[i] * volfac / fftfac;
+        bin_eP[i] = std::sqrt(bin_eP[i] / bin_count[i] - bin_P[i] * bin_P[i])/std::sqrt(bin_count[i]) * volfac / fftfac;
+    }
+}
+
+template <typename array_type>
+int get_task(ptrdiff_t index, const array_type &offsets, const array_type& sizes,
+             const int ntasks )
+{
+    int itask = 0;
+    while (itask < ntasks - 1 && offsets[itask + 1] <= index)
+        ++itask;
+    return itask;
+}
+
+template <typename data_t>
+void pad_insert(const Grid_FFT<data_t> &f, Grid_FFT<data_t> &fp)
+{
+
+    // assert(fp.n_[0] == 3 * f.n_[0] / 2);
+    // assert(fp.n_[1] == 3 * f.n_[1] / 2);
+    // assert(fp.n_[2] == 3 * f.n_[2] / 2);
+
+    size_t dn[3] = {
+        fp.n_[0] - f.n_[0],
+        fp.n_[1] - f.n_[1],
+        fp.n_[2] - f.n_[2],
+    };
+    const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(f.n_[0] * f.n_[1] * f.n_[2]);
+
+    fp.zero();
+
+#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
+
+    size_t nhalf[3] = {f.n_[0] / 2, f.n_[1] / 2, f.n_[2] / 2};
+
+    for (size_t i = 0; i < f.size(0); ++i)
+    {
+        size_t ip = (i > nhalf[0]) ? i + dn[0] : i;
+        for (size_t j = 0; j < f.size(1); ++j)
+        {
+            size_t jp = (j > nhalf[1]) ? j + dn[1] : j;
+            for (size_t k = 0; k < f.size(2); ++k)
+            {
+                size_t kp = (k > nhalf[2]) ? k + dn[2] : k;
+                // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
+                fp.kelem(ip, jp, kp) = f.kelem(i, j, k) * rfac;
+            }
+        }
+    }
+
+
+#else /// then USE_MPI is defined ////////////////////////////////////////////////////////////
+
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    /////////////////////////////////////////////////////////////////////
+    size_t maxslicesz = fp.sizes_[1] * fp.sizes_[3] * 2;
+
+    std::vector<ccomplex_t> crecvbuf_(maxslicesz / 2, 0);
+    real_t *recvbuf_ = reinterpret_cast<real_t *>(&crecvbuf_[0]);
+
+    std::vector<ptrdiff_t>
+        offsets_(CONFIG::MPI_task_size, 0),
+        offsetsp_(CONFIG::MPI_task_size, 0),
+        sizes_(CONFIG::MPI_task_size, 0),
+        sizesp_(CONFIG::MPI_task_size, 0);
+
+    size_t tsize = f.size(0), tsizep = fp.size(0);
+
+    MPI_Allgather(&f.local_1_start_, 1, MPI_LONG_LONG, &offsets_[0], 1,
+                  MPI_LONG_LONG, MPI_COMM_WORLD);
+    MPI_Allgather(&fp.local_1_start_, 1, MPI_LONG_LONG, &offsetsp_[0], 1,
+                  MPI_LONG_LONG, MPI_COMM_WORLD);
+    MPI_Allgather(&tsize, 1, MPI_LONG_LONG, &sizes_[0], 1, MPI_LONG_LONG,
+                  MPI_COMM_WORLD);
+    MPI_Allgather(&tsizep, 1, MPI_LONG_LONG, &sizesp_[0], 1, MPI_LONG_LONG,
+                  MPI_COMM_WORLD);
+    /////////////////////////////////////////////////////////////////////
+
+    double tstart = get_wtime();
+    csoca::dlog << "[MPI] Started scatter for convolution" << std::endl;
+
+    //... collect offsets
+
+    assert(f.space_ == kspace_id);
+
+    size_t nf[3] = {f.size(0), f.size(1), f.size(2)};
+    size_t nfp[3] = {fp.size(0), fp.size(1), fp.size(2)};
+    size_t fny[3] = {f.n_[1] / 2, f.n_[0] / 2, f.n_[2] / 2};
+
+    //... local size must be divisible by 2, otherwise this gets too complicated
+    assert(f.n_[1] % 2 == 0);
+
+    size_t slicesz = f.size(1) * f.size(3); //*2;
+
+    // comunicate
+    if (typeid(data_t) == typeid(real_t))
+        slicesz *= 2; // then sizeof(real_t) gives only half of a complex
+
+    MPI_Datatype datatype =
+        (typeid(data_t) == typeid(float))
+            ? MPI_FLOAT
+            : (typeid(data_t) == typeid(double))
+                  ? MPI_DOUBLE
+                  : (typeid(data_t) == typeid(std::complex<float>))
+                        ? MPI_COMPLEX
+                        : (typeid(data_t) == typeid(std::complex<double>))
+                              ? MPI_DOUBLE_COMPLEX
+                              : MPI_INT;
+
+    MPI_Status status;
+
+    std::vector<MPI_Request> req;
+    MPI_Request temp_req;
+
+    for (size_t i = 0; i < nf[0]; ++i)
+    {
+        size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
+
+        if (iglobal < fny[0])
+        {
+            int sendto = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
+            MPI_Isend(&f.kelem(i * slicesz), (int)slicesz, datatype, sendto,
+                      (int)iglobal, MPI_COMM_WORLD, &temp_req);
+            req.push_back(temp_req);
+            // ofs << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ":
+            // Isend #" << iglobal << " to task " << sendto << std::endl;
+        }
+        if (iglobal > fny[0])
+        {
+            int sendto = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
+            MPI_Isend(&f.kelem(i * slicesz), (int)slicesz, datatype, sendto,
+                      (int)(iglobal + fny[0]), MPI_COMM_WORLD, &temp_req);
+            req.push_back(temp_req);
+            // ofs << "task " << CONFIG::MPI_task_rank << " : added request No" << req.size()-1 << ":
+            // Isend #" << iglobal+fny[0] << " to task " << sendto << std::endl;
+        }
+    }
+
+    for (size_t i = 0; i < nfp[0]; ++i)
+    {
+        size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
+
+        if (iglobal < fny[0] || iglobal > 2 * fny[0])
+        {
+            int recvfrom = 0;
+            if (iglobal <= fny[0])
+                recvfrom = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
+            else
+                recvfrom = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size);
+
+            // ofs << "task " << CONFIG::MPI_task_rank << " : receive #" << iglobal << " from task "
+            // << recvfrom << std::endl;
+
+            MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
+                     MPI_COMM_WORLD, &status);
+            // ofs << "---> ok!  " << (bool)(status.Get_error()==MPI::SUCCESS) <<
+            // std::endl;
+
+            assert(status.MPI_ERROR == MPI_SUCCESS);
+
+            for (size_t j = 0; j < nf[1]; ++j)
+            {
+                if (j < fny[1])
+                {
+                    size_t jp = j;
+                    for (size_t k = 0; k < nf[2]; ++k)
+                    {
+                        // size_t kp = (k>fny[2])? k+fny[2] : k;
+                        if (k < fny[2])
+                            fp.kelem(i, jp, k) = crecvbuf_[j * f.sizes_[3] + k];
+                        else if (k > fny[2])
+                            fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * f.sizes_[3] + k];
+                    }
+                }
+
+                else if (j > fny[1])
+                {
+                    size_t jp = j + fny[1];
+                    for (size_t k = 0; k < nf[2]; ++k)
+                    {
+                        // size_t kp = (k>fny[2])? k+fny[2] : k;
+                        // fp.kelem(i,jp,kp) = crecvbuf_[j*f.sizes_[3]+k];
+                        if (k < fny[2])
+                            fp.kelem(i, jp, k) = crecvbuf_[j * f.sizes_[3] + k];
+                        else if (k > fny[2])
+                            fp.kelem(i, jp, k + fny[2]) = crecvbuf_[j * f.sizes_[3] + k];
+                    }
+                }
+            }
+        }
+    }
+
+    for (size_t i = 0; i < req.size(); ++i)
+    {
+        // need to set status as wait does not necessarily modify it
+        // c.f. http://www.open-mpi.org/community/lists/devel/2007/04/1402.php
+        status.MPI_ERROR = MPI_SUCCESS;
+        // ofs << "task " << CONFIG::MPI_task_rank << " : checking request No" << i << std::endl;
+        MPI_Wait(&req[i], &status);
+        // ofs << "---> ok!" << std::endl;
+        assert(status.MPI_ERROR == MPI_SUCCESS);
+    }
+
+    // usleep(1000);
+
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    // std::cerr << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! <<<<<"
+    // << std::endl;  ofs << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed!
+    // <<<<<" << std::endl;
+    csoca::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n",
+                get_wtime() - tstart);
+
+#endif /// end of ifdef/ifndef USE_MPI ///////////////////////////////////////////////////////////////
+}
+
+
+template <typename data_t>
+void unpad(const Grid_FFT<data_t> &fp, Grid_FFT<data_t> &f)
+{
+    // assert(fp.n_[0] == 3 * f.n_[0] / 2);
+    // assert(fp.n_[1] == 3 * f.n_[1] / 2);
+    // assert(fp.n_[2] == 3 * f.n_[2] / 2);
+
+    size_t dn[3] = {
+        fp.n_[0] - f.n_[0],
+        fp.n_[1] - f.n_[1],
+        fp.n_[2] - f.n_[2],
+    };
+
+    const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(f.n_[0] * f.n_[1] * f.n_[2]);
+
+#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
+
+    size_t nhalf[3] = {f.n_[0] / 2, f.n_[1] / 2, f.n_[2] / 2};
+
+    for (size_t i = 0; i < f.size(0); ++i)
+    {
+        size_t ip = (i > nhalf[0]) ? i + dn[0] : i;
+        for (size_t j = 0; j < f.size(1); ++j)
+        {
+            size_t jp = (j > nhalf[1]) ? j + dn[1] : j;
+            for (size_t k = 0; k < f.size(2); ++k)
+            {
+                size_t kp = (k > nhalf[2]) ? k + dn[2] : k;
+                // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
+                f.kelem(i, j, k) = fp.kelem(ip, jp, kp) / rfac;
+            }
+        }
+    }
+
+#else /// then USE_MPI is defined //////////////////////////////////////////////////////////////
+
+                        /////////////////////////////////////////////////////////////////////
+                        size_t maxslicesz = fp.sizes_[1] * fp.sizes_[3] * 2;
+
+    std::vector<ccomplex_t> crecvbuf_(maxslicesz / 2,0);
+    real_t* recvbuf_ = reinterpret_cast<real_t *>(&crecvbuf_[0]);
+
+    std::vector<ptrdiff_t> 
+        offsets_(CONFIG::MPI_task_size, 0), 
+        offsetsp_(CONFIG::MPI_task_size, 0), 
+        sizes_(CONFIG::MPI_task_size, 0), 
+        sizesp_(CONFIG::MPI_task_size, 0);
+
+    size_t tsize = f.size(0), tsizep = fp.size(0);
+
+    MPI_Allgather(&f.local_1_start_, 1, MPI_LONG_LONG, &offsets_[0], 1,
+                  MPI_LONG_LONG, MPI_COMM_WORLD);
+    MPI_Allgather(&fp.local_1_start_, 1, MPI_LONG_LONG, &offsetsp_[0], 1,
+                  MPI_LONG_LONG, MPI_COMM_WORLD);
+    MPI_Allgather(&tsize, 1, MPI_LONG_LONG, &sizes_[0], 1, MPI_LONG_LONG,
+                  MPI_COMM_WORLD);
+    MPI_Allgather(&tsizep, 1, MPI_LONG_LONG, &sizesp_[0], 1, MPI_LONG_LONG,
+                  MPI_COMM_WORLD);
+    /////////////////////////////////////////////////////////////////////
+
+    double tstart = get_wtime();
+
+    csoca::ilog << "[MPI] Started gather for convolution";
+
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    size_t nf[3] = {f.size(0), f.size(1), f.size(2)};
+    size_t nfp[4] = {fp.size(0), fp.size(1), fp.size(2), fp.size(3)};
+    size_t fny[3] = {f.n_[1] / 2, f.n_[0] / 2, f.n_[2] / 2};
+
+    size_t slicesz = fp.size(1) * fp.size(3);
+
+    if (typeid(data_t) == typeid(real_t))
+        slicesz *= 2; // then sizeof(real_t) gives only half of a complex
+
+    MPI_Datatype datatype =
+        (typeid(data_t) == typeid(float))
+            ? MPI_FLOAT
+            : (typeid(data_t) == typeid(double))
+                  ? MPI_DOUBLE
+                  : (typeid(data_t) == typeid(std::complex<float>))
+                        ? MPI_COMPLEX
+                        : (typeid(data_t) == typeid(std::complex<double>))
+                              ? MPI_DOUBLE_COMPLEX
+                              : MPI_INT;
+
+    MPI_Status status;
+
+    //... local size must be divisible by 2, otherwise this gets too complicated
+    // assert( tsize%2 == 0 );
+
+    f.zero();
+
+    std::vector<MPI_Request> req;
+    MPI_Request temp_req;
+
+    for (size_t i = 0; i < nfp[0]; ++i)
+    {
+        size_t iglobal = i + offsetsp_[CONFIG::MPI_task_rank];
+
+        //... sending
+        if (iglobal < fny[0])
+        {
+            int sendto = get_task(iglobal, offsets_, sizes_, CONFIG::MPI_task_size);
+
+            MPI_Isend(&fp.kelem(i * slicesz), (int)slicesz, datatype, sendto, (int)iglobal,
+                      MPI_COMM_WORLD, &temp_req);
+            req.push_back(temp_req);
+        }
+        else if (iglobal > 2 * fny[0])
+        {
+            int sendto = get_task(iglobal - fny[0], offsets_, sizes_, CONFIG::MPI_task_size);
+            MPI_Isend(&fp.kelem(i * slicesz), (int)slicesz, datatype, sendto, (int)iglobal,
+                      MPI_COMM_WORLD, &temp_req);
+            req.push_back(temp_req);
+        }
+    }
+
+    for (size_t i = 0; i < nf[0]; ++i)
+    {
+        size_t iglobal = i + offsets_[CONFIG::MPI_task_rank];
+
+        int recvfrom = 0;
+        if (iglobal < fny[0])
+        {
+            recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
+            MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
+                     MPI_COMM_WORLD, &status);
+        }
+        else if (iglobal > fny[0])
+        {
+            recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
+            MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom,
+                     (int)(iglobal + fny[0]), MPI_COMM_WORLD, &status);
+        }
+        else
+            continue;
+
+        assert(status.MPI_ERROR == MPI_SUCCESS);
+
+        for (size_t j = 0; j < nf[1]; ++j)
+        {
+
+            if (j < fny[1])
+            {
+                size_t jp = j;
+                for (size_t k = 0; k < nf[2]; ++k)
+                {
+                    // size_t kp = (k>fny[2])? k+fny[2] : k;
+                    // f.kelem(i,j,k) = crecvbuf_[jp*nfp[3]+kp];
+                    if (k < fny[2])
+                        f.kelem(i, j, k) = crecvbuf_[jp * nfp[3] + k];
+                    else if (k > fny[2])
+                        f.kelem(i, j, k) = crecvbuf_[jp * nfp[3] + k + fny[2]];
+                }
+            }
+            if (j > fny[1])
+            {
+                size_t jp = j + fny[1];
+                for (size_t k = 0; k < nf[2]; ++k)
+                {
+                    // size_t kp = (k>fny[2])? k+fny[2] : k;
+                    // f.kelem(i,j,k) = crecvbuf_[jp*nfp[3]+kp];
+                    if (k < fny[2])
+                        f.kelem(i, j, k) = crecvbuf_[jp * nfp[3] + k];
+                    else if (k > fny[2])
+                        f.kelem(i, j, k) = crecvbuf_[jp * nfp[3] + k + fny[2]];
+                }
+            }
+        }
+    }
+
+    for (size_t i = 0; i < req.size(); ++i)
+    {
+        // need to preset status as wait does not necessarily modify it to reflect
+        // success c.f.
+        // http://www.open-mpi.org/community/lists/devel/2007/04/1402.php
+        status.MPI_ERROR = MPI_SUCCESS;
+
+        MPI_Wait(&req[i], &status);
+        assert(status.MPI_ERROR == MPI_SUCCESS);
+    }
+
+    MPI_Barrier(MPI_COMM_WORLD);
+
+    csoca::ilog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart);
+
+#endif /// end of ifdef/ifndef USE_MPI //////////////////////////////////////////////////////////////
+}
+
+template class Grid_FFT<real_t>;
+template class Grid_FFT<ccomplex_t>;
+
+template void unpad(const Grid_FFT<real_t> &fp, Grid_FFT<real_t> &f);
+template void unpad(const Grid_FFT<ccomplex_t> &fp, Grid_FFT<ccomplex_t> &f);
+
+template void pad_insert(const Grid_FFT<real_t> &f, Grid_FFT<real_t> &fp);
+template void pad_insert(const Grid_FFT<ccomplex_t> &f, Grid_FFT<ccomplex_t> &fp);
diff --git a/src/logger.cc b/src/logger.cc
new file mode 100644
index 0000000..2b93b89
--- /dev/null
+++ b/src/logger.cc
@@ -0,0 +1,42 @@
+#include <logger.hh>
+
+namespace csoca {
+
+std::ofstream Logger::output_file_;
+LogLevel Logger::log_level_ = LogLevel::Off;
+
+void Logger::SetLevel(const LogLevel &level) {
+  log_level_ = level;
+}
+
+LogLevel Logger::GetLevel() {
+  return log_level_;
+}
+
+void Logger::SetOutput(const std::string filename) {
+  if (output_file_.is_open()) {
+    output_file_.close();
+  }
+  output_file_.open(filename, std::ofstream::out);
+  assert(output_file_.is_open());
+}
+
+void Logger::UnsetOutput() {
+  if (output_file_.is_open()) {
+    output_file_.close();
+  }
+}
+
+std::ofstream &Logger::GetOutput() {
+  return output_file_;
+}
+
+// global instantiations for different levels
+Logger glogger;
+LogStream flog(glogger, LogLevel::Fatal);
+LogStream elog(glogger, LogLevel::Error);
+LogStream wlog(glogger, LogLevel::Warning);
+LogStream ilog(glogger, LogLevel::Info);
+LogStream dlog(glogger, LogLevel::Debug);
+
+} // namespace csoca
diff --git a/src/main.cc b/src/main.cc
new file mode 100644
index 0000000..7e0ba45
--- /dev/null
+++ b/src/main.cc
@@ -0,0 +1,359 @@
+#include <cmath>
+#include <complex>
+#include <iostream>
+#include <fstream>
+#include <thread>
+
+#include <unistd.h> // for unlink
+
+#include <general.hh>
+#include <grid_fft.hh>
+
+#include <transfer_function_plugin.hh>
+#include <random_plugin.hh>
+#include <cosmology_calculator.hh>
+
+namespace CONFIG{
+int  MPI_thread_support = -1;
+int  MPI_task_rank = 0;
+int  MPI_task_size = 1;
+bool MPI_ok = false;
+bool MPI_threads_ok = false;
+bool FFTW_threads_ok = false;
+};
+
+RNG_plugin *the_random_number_generator;
+TransferFunction_plugin *the_transfer_function;
+
+int main( int argc, char** argv )
+{
+    csoca::Logger::SetLevel(csoca::LogLevel::Info);
+    // csoca::Logger::SetLevel(csoca::LogLevel::Debug);
+
+    // initialise MPI and multi-threading
+#if defined(USE_MPI)
+    MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &CONFIG::MPI_thread_support);
+    CONFIG::MPI_threads_ok = CONFIG::MPI_thread_support >= MPI_THREAD_FUNNELED;
+    MPI_Comm_rank(MPI_COMM_WORLD, &CONFIG::MPI_task_rank);
+    MPI_Comm_size(MPI_COMM_WORLD, &CONFIG::MPI_task_size);
+    CONFIG::MPI_ok = true;
+#endif
+
+#if defined(USE_FFTW_THREADS)
+  #if defined(USE_MPI)
+    if (CONFIG::MPI_threads_ok)
+        CONFIG::FFTW_threads_ok = fftw_init_threads();
+  #else
+    CONFIG::FFTW_threads_ok = fftw_init_threads();
+  #endif 
+#endif
+
+#if defined(USE_FFTW_MPI)
+    fftw_mpi_init();
+    csoca::ilog << "MPI is enabled                : " << "yes" << std::endl;
+#endif
+
+    csoca::ilog << "MPI supports multi-threading  : " << CONFIG::MPI_threads_ok << std::endl;
+    csoca::ilog << "FFTW supports multi-threading : " << CONFIG::FFTW_threads_ok << std::endl;
+    csoca::ilog << "Available HW threads / task   : " << std::thread::hardware_concurrency() << std::endl;
+
+    //------------------------------------------------------------------------------
+    // Parse command line options
+    //------------------------------------------------------------------------------
+
+    if (argc != 2)
+    {
+        // print_region_generator_plugins();
+        print_TransferFunction_plugins();
+        // print_RNG_plugins();
+        // print_output_plugins();
+
+        csoca::elog << "In order to run, you need to specify a parameter file!" << std::endl;
+        exit(0);
+    }
+
+    
+    //--------------------------------------------------------------------
+    // Initialise parameters
+    ConfigFile the_config(argv[1]);
+
+    const size_t ngrid = the_config.GetValue<size_t>("setup", "GridRes");
+    const real_t boxlen = the_config.GetValue<double>("setup", "BoxLength");
+    const real_t volfac(std::pow(boxlen / ngrid / 2.0 / M_PI, 1.5));
+    const real_t phifac = 1.0 / boxlen / boxlen; // to have potential in box units
+
+    real_t Dplus0 = the_config.GetValue<real_t>("setup", "Dplus0");
+
+    const std::string fname_hdf5 = the_config.GetValueSafe<std::string>("output", "fname_hdf5", "output.hdf5");
+    //////////////////////////////////////////////////////////////////////////////////////////////
+
+    std::unique_ptr<CosmologyCalculator>
+        the_cosmo_calc;
+
+    try
+    {
+        the_random_number_generator = select_RNG_plugin(the_config);
+        the_transfer_function       = select_TransferFunction_plugin(the_config);
+
+        the_cosmo_calc = std::make_unique<CosmologyCalculator>(the_config, the_transfer_function);
+
+        //double pnorm = the_cosmo_calc->ComputePNorm();
+        //Dplus = the_cosmo_calc->CalcGrowthFactor(astart) / the_cosmo_calc->CalcGrowthFactor(1.0);
+
+        csoca::ilog << "power spectrum is output for D+ =" << Dplus0 << std::endl;
+        //csoca::ilog << "power spectrum normalisation is " << pnorm << std::endl;
+        //csoca::ilog << "power spectrum normalisation is " << pnorm*Dplus*Dplus << std::endl;
+
+        // write power spectrum to a file
+        std::ofstream ofs("input_powerspec.txt");
+        for( double k=1e-4; k<1e4; k*=1.1 ){
+            ofs << std::setw(16) << k
+                << std::setw(16) << std::pow(the_cosmo_calc->GetAmplitude(k, total) * Dplus0, 2.0)
+                << std::setw(16) << std::pow(the_cosmo_calc->GetAmplitude(k, total), 2.0)
+                << std::endl;
+        }
+
+    }catch(...){
+        csoca::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl;
+        #if defined(USE_MPI) 
+        MPI_Finalize();
+        #endif
+        return 1;
+    }
+    //--------------------------------------------------------------------
+    // Create arrays
+    Grid_FFT<real_t> phi({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t> phi2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t> phi3a({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t> phi3b({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    
+    phi.FillRandomReal(6519);
+
+    //======================================================================
+    //... compute 1LPT displacement potential ....
+    // phi = - delta / k^2
+    phi.FourierTransformForward();
+    
+    phi.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
+        real_t kmod = k.norm();
+        ccomplex_t delta = x * the_cosmo_calc->GetAmplitude(kmod, total);
+        return -delta / (kmod * kmod) * phifac / volfac;
+    });
+
+    phi.zero_DC_mode();
+
+    //======================================================================
+    //... compute 2LPT displacement potential
+    Grid_FFT<real_t>
+        phi_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+
+    phi_xx.FourierTransformForward(false);
+    phi_xy.FourierTransformForward(false);
+    phi_xz.FourierTransformForward(false);
+    phi_yy.FourierTransformForward(false);
+    phi_yz.FourierTransformForward(false);
+    phi_zz.FourierTransformForward(false);
+
+    #pragma omp parallel for
+    for (size_t i = 0; i < phi.size(0); ++i)
+    {
+        for (size_t j = 0; j < phi.size(1); ++j)
+        {
+            for (size_t k = 0; k < phi.size(2); ++k)
+            {
+                auto kk = phi.get_k<real_t>(i,j,k);
+                size_t idx = phi.get_idx(i,j,k);
+
+                phi_xx.kelem(idx) = -kk[0] * kk[0] * phi.kelem(idx) / phifac;
+                phi_xy.kelem(idx) = -kk[0] * kk[1] * phi.kelem(idx) / phifac;
+                phi_xz.kelem(idx) = -kk[0] * kk[2] * phi.kelem(idx) / phifac;
+                phi_yy.kelem(idx) = -kk[1] * kk[1] * phi.kelem(idx) / phifac;
+                phi_yz.kelem(idx) = -kk[1] * kk[2] * phi.kelem(idx) / phifac;
+                phi_zz.kelem(idx) = -kk[2] * kk[2] * phi.kelem(idx) / phifac;
+            }
+        }
+    }
+
+    phi_xx.FourierTransformBackward();
+    phi_xy.FourierTransformBackward();
+    phi_xz.FourierTransformBackward();
+    phi_yy.FourierTransformBackward();
+    phi_yz.FourierTransformBackward();
+    phi_zz.FourierTransformBackward();
+
+    
+    for (size_t i = 0; i < phi2.size(0); ++i)
+    {
+        for (size_t j = 0; j < phi2.size(1); ++j)
+        {
+            for (size_t k = 0; k < phi2.size(2); ++k)
+            {
+                size_t idx = phi2.get_idx(i, j, k);
+
+                phi2.relem(idx) = ((phi_xx.relem(idx)*phi_yy.relem(idx)-phi_xy.relem(idx)*phi_xy.relem(idx))
+                                  +(phi_xx.relem(idx)*phi_zz.relem(idx)-phi_xz.relem(idx)*phi_xz.relem(idx))
+                                  +(phi_yy.relem(idx)*phi_zz.relem(idx)-phi_yz.relem(idx)*phi_yz.relem(idx)));
+            }
+        }
+    }
+
+    phi2.FourierTransformForward();
+    phi2.apply_function_k_dep([&](auto x, auto k) {
+        real_t kmod2 = k.norm_squared();
+        return x * (-1.0 / kmod2) * phifac;
+    });
+    phi2.zero_DC_mode();
+    
+    //======================================================================
+    //... compute 3LPT displacement potential
+    Grid_FFT<real_t>
+        phi2_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi2_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi2_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi2_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi2_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}),
+        phi2_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+
+    phi2_xx.FourierTransformForward(false);
+    phi2_xy.FourierTransformForward(false);
+    phi2_xz.FourierTransformForward(false);
+    phi2_yy.FourierTransformForward(false);
+    phi2_yz.FourierTransformForward(false);
+    phi2_zz.FourierTransformForward(false);
+
+    #pragma omp parallel for
+    for (size_t i = 0; i < phi2.size(0); ++i)
+    {
+        for (size_t j = 0; j < phi2.size(1); ++j)
+        {
+            for (size_t k = 0; k < phi2.size(2); ++k)
+            {
+                auto kk = phi2.get_k<real_t>(i,j,k);
+                size_t idx = phi2.get_idx(i,j,k);
+
+                phi2_xx.kelem(idx) = -kk[0] * kk[0] * phi2.kelem(idx) / phifac;
+                phi2_xy.kelem(idx) = -kk[0] * kk[1] * phi2.kelem(idx) / phifac;
+                phi2_xz.kelem(idx) = -kk[0] * kk[2] * phi2.kelem(idx) / phifac;
+                phi2_yy.kelem(idx) = -kk[1] * kk[1] * phi2.kelem(idx) / phifac;
+                phi2_yz.kelem(idx) = -kk[1] * kk[2] * phi2.kelem(idx) / phifac;
+                phi2_zz.kelem(idx) = -kk[2] * kk[2] * phi2.kelem(idx) / phifac;
+            }
+        }
+    }
+
+    phi2_xx.FourierTransformBackward();
+    phi2_xy.FourierTransformBackward();
+    phi2_xz.FourierTransformBackward();
+    phi2_yy.FourierTransformBackward();
+    phi2_yz.FourierTransformBackward();
+    phi2_zz.FourierTransformBackward();
+
+    for (size_t i = 0; i < phi3a.size(0); ++i)
+    {
+        for (size_t j = 0; j < phi3a.size(1); ++j)
+        {
+            for (size_t k = 0; k < phi3a.size(2); ++k)
+            {
+                size_t idx = phi3a.get_idx(i, j, k);
+
+                phi3a.relem(idx) = 0.5 * (
+                    + phi_xx.relem(idx) * ( phi2_yy.relem(idx) + phi2_zz.relem(idx) )
+                    + phi_yy.relem(idx) * ( phi2_zz.relem(idx) + phi2_xx.relem(idx) )
+                    + phi_zz.relem(idx) * ( phi2_xx.relem(idx) + phi2_yy.relem(idx) )
+                    - phi_xy.relem(idx) * phi2_xy.relem(idx) * 2.0
+                    - phi_xz.relem(idx) * phi2_xz.relem(idx) * 2.0
+                    - phi_yz.relem(idx) * phi2_yz.relem(idx) * 2.0
+                );
+                
+                phi3b.relem(idx) = 
+                    + phi_xx.relem(idx)*phi_yy.relem(idx)*phi_zz.relem(idx)
+                    + phi_xy.relem(idx)*phi_xz.relem(idx)*phi_yz.relem(idx) * 2.0
+                    - phi_yz.relem(idx)*phi_yz.relem(idx)*phi_xx.relem(idx)
+                    - phi_xz.relem(idx)*phi_xz.relem(idx)*phi_yy.relem(idx)
+                    - phi_xy.relem(idx)*phi_xy.relem(idx)*phi_zz.relem(idx);
+            }
+        }
+    }
+
+    phi3a.FourierTransformForward();
+    phi3a.apply_function_k_dep([&](auto x, auto k) {
+        real_t kmod2 = k.norm_squared();
+        return x * (-1.0 / kmod2) * phifac;
+    });
+    phi3a.zero_DC_mode();
+    
+    phi3b.FourierTransformForward();
+    phi3b.apply_function_k_dep([&](auto x, auto k) {
+        real_t kmod2 = k.norm_squared();
+        return x * (-1.0 / kmod2) * phifac;
+    });
+    phi3b.zero_DC_mode();
+    
+    ///////////////////////////////////////////////////////////////////////
+
+    Grid_FFT<real_t> &delta = phi_xx;
+    Grid_FFT<real_t> &delta2 = phi_xy;
+    Grid_FFT<real_t> &delta3a = phi_xz;
+    Grid_FFT<real_t> &delta3b = phi_yy;
+
+    delta.FourierTransformForward(false);
+    delta2.FourierTransformForward(false);
+    delta3a.FourierTransformForward(false);
+    delta3b.FourierTransformForward(false);
+
+    #pragma omp parallel for
+    for (size_t i = 0; i < phi.size(0); ++i)
+    {
+        for (size_t j = 0; j < phi.size(1); ++j)
+        {
+            for (size_t k = 0; k < phi.size(2); ++k)
+            {
+                auto kk = phi.get_k<real_t>(i,j,k);
+                size_t idx = phi.get_idx(i,j,k);
+                auto laplace = -kk.norm_squared();
+
+                delta.kelem(idx) = laplace * phi.kelem(idx) / phifac;
+                delta2.kelem(idx) = laplace * phi2.kelem(idx) / phifac;
+                delta3a.kelem(idx) = laplace * phi3a.kelem(idx) / phifac;
+                delta3b.kelem(idx) = laplace * phi3b.kelem(idx) / phifac;
+            }
+        }
+    }
+
+    ///////////////////////////////////////////////////////////////////////
+    phi.FourierTransformBackward();
+    phi2.FourierTransformBackward();
+    phi3a.FourierTransformBackward();
+    phi3b.FourierTransformBackward();
+
+    delta.FourierTransformBackward();
+    delta2.FourierTransformBackward();
+    delta3a.FourierTransformBackward();
+    delta3b.FourierTransformBackward();
+
+
+    //... write output .....
+    unlink(fname_hdf5.c_str());
+    phi.Write_to_HDF5(fname_hdf5, "phi");
+    phi2.Write_to_HDF5(fname_hdf5, "phi2");
+    phi3a.Write_to_HDF5(fname_hdf5, "phi3a");
+    phi3b.Write_to_HDF5(fname_hdf5, "phi3b");
+    delta.Write_to_HDF5(fname_hdf5, "delta");
+    delta2.Write_to_HDF5(fname_hdf5, "delta2");
+    delta3a.Write_to_HDF5(fname_hdf5, "delta3a");
+    delta3b.Write_to_HDF5(fname_hdf5, "delta3b");
+    
+    
+
+#if defined(USE_MPI)
+    MPI_Barrier(MPI_COMM_WORLD);
+    MPI_Finalize();
+#endif
+
+    return 0;
+}
diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc
new file mode 100644
index 0000000..b0c3fd7
--- /dev/null
+++ b/src/plugins/random_music.cc
@@ -0,0 +1,283 @@
+#include <random_plugin.hh>
+#include "random_music_wnoise_generator.hh"
+
+typedef music_wnoise_generator<real_t> rng;
+
+class RNG_music : public RNG_plugin
+{
+protected:
+  std::vector<long> rngseeds_;
+  std::vector<std::string> rngfnames_;
+  unsigned ran_cube_size_;
+
+  int levelmin_, levelmax_, levelmin_seed_;
+
+  bool disk_cached_;
+  bool restart_;
+  bool initialized_;
+
+  std::vector<std::vector<real_t> *> mem_cache_;
+
+  //! checks if the specified string is numeric
+  bool is_number(const std::string &s);
+
+  //! parses the random number parameters in the conf file
+  void parse_random_parameters(void);
+
+  //! computes the white noise fields and keeps them either in memory or on disk
+  void compute_random_numbers(void);
+
+  //! adjusts averages
+  //void correct_avg(int icoarse, int ifine);
+
+  //! store the white noise fields in memory or on disk
+  //void store_rnd(int ilevel, rng *prng);
+
+public:
+  explicit RNG_music(ConfigFile &cf) : RNG_plugin(cf), initialized_(false) {}
+
+  ~RNG_music() {}
+
+  bool isMultiscale() const { return true; }
+
+  void initialize_for_grid_structure()//const refinement_hierarchy &refh)
+  {
+    //prefh_ = &refh;
+    levelmin_ = pcf_->GetValue<unsigned>("setup", "levelmin");
+    levelmax_ = pcf_->GetValue<unsigned>("setup", "levelmax");
+
+    ran_cube_size_ = pcf_->GetValueSafe<unsigned>("random", "cubesize", DEF_RAN_CUBE_SIZE);
+    disk_cached_ = pcf_->GetValueSafe<bool>("random", "disk_cached", true);
+    restart_ = pcf_->GetValueSafe<bool>("random", "restart", false);
+
+    mem_cache_.assign(levelmax_ - levelmin_ + 1, (std::vector<real_t> *)NULL);
+
+    if (restart_ && !disk_cached_)
+    {
+      csoca::elog.Print("Cannot restart from mem cached random numbers.");
+      throw std::runtime_error("Cannot restart from mem cached random numbers.");
+    }
+
+    //... determine seed/white noise file data to be applied
+    parse_random_parameters();
+
+    if (!restart_)
+    {
+      //... compute the actual random numbers
+      compute_random_numbers();
+    }
+
+    initialized_ = true;
+  }
+
+  //void fill_grid(int level, DensityGrid<real_t> &R);
+};
+
+bool RNG_music::is_number(const std::string &s)
+{
+  for (size_t i = 0; i < s.length(); i++)
+    if (!std::isdigit(s[i]) && s[i] != '-')
+      return false;
+
+  return true;
+}
+
+void RNG_music::parse_random_parameters(void)
+{
+  //... parse random number options
+  for (int i = 0; i <= 100; ++i)
+  {
+    char seedstr[128];
+    std::string tempstr;
+    bool noseed = false;
+    sprintf(seedstr, "seed[%d]", i);
+    if (pcf_->ContainsKey("random", seedstr))
+      tempstr = pcf_->GetValue<std::string>("random", seedstr);
+    else
+    {
+      // "-2" means that no seed entry was found for that level
+      tempstr = std::string("-2");
+      noseed = true;
+    }
+
+    if (is_number(tempstr))
+    {
+      long ltemp;
+      pcf_->Convert(tempstr, ltemp);
+      rngfnames_.push_back("");
+      if (noseed) // ltemp < 0 )
+        //... generate some dummy seed which only depends on the level, negative so we know it's not
+        //... an actual seed and thus should not be used as a constraint for coarse levels
+        // rngseeds_.push_back( -abs((unsigned)(ltemp-i)%123+(unsigned)(ltemp+827342523521*i)%123456789) );
+        rngseeds_.push_back(-abs((long)(ltemp - i) % 123 + (long)(ltemp + 7342523521 * i) % 123456789));
+      else
+      {
+        if (ltemp <= 0)
+        {
+          csoca::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr);
+          throw std::runtime_error("Seed values need to be >0");
+        }
+        rngseeds_.push_back(ltemp);
+      }
+    }
+    else
+    {
+      rngfnames_.push_back(tempstr);
+      rngseeds_.push_back(-1);
+      csoca::ilog.Print("Random numbers for level %3d will be read from file.", i);
+    }
+  }
+
+  //.. determine for which levels random seeds/random number files are given
+  levelmin_seed_ = -1;
+  for (unsigned ilevel = 0; ilevel < rngseeds_.size(); ++ilevel)
+  {
+    if (levelmin_seed_ < 0 && (rngfnames_[ilevel].size() > 0 || rngseeds_[ilevel] >= 0))
+      levelmin_seed_ = ilevel;
+  }
+}
+
+void RNG_music::compute_random_numbers(void)
+{
+  bool rndsign = pcf_->GetValueSafe<bool>("random", "grafic_sign", false);
+
+  std::vector<rng *> randc(std::max(levelmax_, levelmin_seed_) + 1, (rng *)NULL);
+
+  //--- FILL ALL WHITE NOISE ARRAYS FOR WHICH WE NEED THE FULL FIELD ---//
+
+  //... seeds are given for a level coarser than levelmin
+  if (levelmin_seed_ < levelmin_)
+  {
+    if (rngfnames_[levelmin_seed_].size() > 0)
+      randc[levelmin_seed_] = new rng(1 << levelmin_seed_, rngfnames_[levelmin_seed_], rndsign);
+    else
+      randc[levelmin_seed_] = new rng(1 << levelmin_seed_, ran_cube_size_, rngseeds_[levelmin_seed_], true);
+
+    for (int i = levelmin_seed_ + 1; i <= levelmin_; ++i)
+    {
+      //#warning add possibility to read noise from file also here!
+
+      if (rngfnames_[i].size() > 0)
+        csoca::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!");
+
+      randc[i] = new rng(*randc[i - 1], ran_cube_size_, rngseeds_[i], true);
+      delete randc[i - 1];
+      randc[i - 1] = NULL;
+    }
+  }
+
+  //... seeds are given for a level finer than levelmin, obtain by averaging
+  if (levelmin_seed_ > levelmin_)
+  {
+    if (rngfnames_[levelmin_seed_].size() > 0)
+      randc[levelmin_seed_] = new rng(1 << levelmin_seed_, rngfnames_[levelmin_seed_], rndsign);
+    else
+      randc[levelmin_seed_] =
+          new rng(1 << levelmin_seed_, ran_cube_size_, rngseeds_[levelmin_seed_], true); //, x0, lx );
+
+    for (int ilevel = levelmin_seed_ - 1; ilevel >= (int)levelmin_; --ilevel)
+    {
+      if (rngseeds_[ilevel - levelmin_] > 0)
+        csoca::ilog.Print("Warning: random seed for level %d will be ignored.\n"
+                "            consistency requires that it is obtained by restriction from level %d",
+                ilevel, levelmin_seed_);
+
+      // if( brealspace_tf && ilevel < levelmax_ )
+      //  randc[ilevel] = new rng( *randc[ilevel+1], false );
+      // else // do k-space averaging
+      randc[ilevel] = new rng(*randc[ilevel + 1], true);
+
+      if (ilevel + 1 > levelmax_)
+      {
+        delete randc[ilevel + 1];
+        randc[ilevel + 1] = NULL;
+      }
+    }
+  }
+
+  //--- GENERATE AND STORE ALL LEVELS, INCLUDING REFINEMENTS ---//
+
+  //... levelmin
+  if (randc[levelmin_] == NULL)
+  {
+    if (rngfnames_[levelmin_].size() > 0)
+      randc[levelmin_] = new rng(1 << levelmin_, rngfnames_[levelmin_], rndsign);
+    else
+      randc[levelmin_] = new rng(1 << levelmin_, ran_cube_size_, rngseeds_[levelmin_], true);
+  }
+
+// if( levelmax_ == levelmin_ )
+#if 0
+  {
+    //... apply constraints to coarse grid
+    //... if no constraints are specified, or not for this level
+    //... constraints.apply will return without doing anything
+    int x0[3] = { 0, 0, 0 };
+    int lx[3] = { 1<<levelmin_, 1<<levelmin_, 1<<levelmin_ };
+    constraints.apply( levelmin_, x0, lx, randc[levelmin_] );
+  }
+#endif
+
+  // store_rnd(levelmin_, randc[levelmin_]);
+
+  //... refinement levels
+  // for (int ilevel = levelmin_ + 1; ilevel <= levelmax_; ++ilevel)
+  // {
+  //   int lx[3], x0[3];
+  //   int shift[3], levelmin_poisson;
+  //   shift[0] = pcf_->GetValue<int>("setup", "shift_x");
+  //   shift[1] = pcf_->GetValue<int>("setup", "shift_y");
+  //   shift[2] = pcf_->GetValue<int>("setup", "shift_z");
+
+  //   levelmin_poisson = pcf_->GetValue<unsigned>("setup", "levelmin");
+
+  //   int lfac = 1 << (ilevel - levelmin_poisson);
+
+  //   lx[0] = 2 * prefh_->size(ilevel, 0);
+  //   lx[1] = 2 * prefh_->size(ilevel, 1);
+  //   lx[2] = 2 * prefh_->size(ilevel, 2);
+  //   x0[0] = prefh_->offset_abs(ilevel, 0) - lfac * shift[0] - lx[0] / 4;
+  //   x0[1] = prefh_->offset_abs(ilevel, 1) - lfac * shift[1] - lx[1] / 4;
+  //   x0[2] = prefh_->offset_abs(ilevel, 2) - lfac * shift[2] - lx[2] / 4;
+
+  //   if (randc[ilevel] == NULL)
+  //     randc[ilevel] =
+  //         new rng(*randc[ilevel - 1], ran_cube_size_, rngseeds_[ilevel], kavg, ilevel == levelmin_ + 1, x0, lx);
+  //   delete randc[ilevel - 1];
+  //   randc[ilevel - 1] = NULL;
+
+  //   //... apply constraints to this level, if any
+  //   // if( ilevel == levelmax_ )
+  //   // constraints.apply( ilevel, x0, lx, randc[ilevel] );
+
+  //   //... store numbers
+  //   store_rnd(ilevel, randc[ilevel]);
+  // }
+
+  delete randc[levelmax_];
+  randc[levelmax_] = NULL;
+
+  //... make sure that the coarse grid contains oct averages where it overlaps with a fine grid
+  //... this also ensures that constraints enforced on fine grids are carried to the coarser grids
+  // if (brealspace_tf)
+  // {
+  //   for (int ilevel = levelmax_; ilevel > levelmin_; --ilevel)
+  //     correct_avg(ilevel - 1, ilevel);
+  // }
+
+  //.. we do not have random numbers for a coarse level, generate them
+  /*if( levelmax_rand_ >= (int)levelmin_ )
+    {
+    std::cerr << "lmaxread >= (int)levelmin\n";
+    randc[levelmax_rand_] = new rng( (unsigned)pow(2,levelmax_rand_), rngfnames_[levelmax_rand_] );
+    for( int ilevel = levelmax_rand_-1; ilevel >= (int)levelmin_; --ilevel )
+    randc[ilevel] = new rng( *randc[ilevel+1] );
+         }*/
+}
+
+
+
+namespace
+{
+RNG_plugin_creator_concrete<RNG_music> creator("MUSIC");
+}
\ No newline at end of file
diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc
new file mode 100644
index 0000000..a6d4c35
--- /dev/null
+++ b/src/plugins/random_music_wnoise_generator.cc
@@ -0,0 +1,843 @@
+
+#include <complex>
+
+#include <gsl/gsl_rng.h>
+#include <gsl/gsl_randist.h>
+
+#include <random_plugin.hh>
+#include "random_music_wnoise_generator.hh"
+
+template <typename T>
+music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx)
+    : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed)
+{
+  csoca::ilog.Print("Generating random numbers (1) with seed %ld", baseseed);
+
+  initialize();
+  fill_subvolume(x0, lx);
+}
+
+template <typename T>
+music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, bool zeromean)
+    : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed)
+{
+  csoca::ilog.Print("Generating random numbers (2) with seed %ld", baseseed);
+
+  double mean = 0.0;
+  size_t res_l = res;
+
+  bool musicnoise = true;
+  if (!musicnoise)
+    cubesize_ = res_;
+
+  if (!musicnoise)
+    csoca::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo");
+
+  initialize();
+
+  if (musicnoise)
+    mean = fill_all();
+  else
+  {
+    rnums_.push_back(new Meshvar<T>(res, 0, 0, 0));
+    cubemap_[0] = 0; // create dummy map index
+    register_cube(0, 0, 0);
+    //rapid_proto_ngenic_rng( res_, baseseed_, *this );
+  }
+
+  /*
+
+    if( musicnoise )
+    mean = fill_all();
+    else
+    {
+    rnums_.push_back( new Meshvar<T>( res, 0, 0, 0 ) );
+    cubemap_[0] = 0; // create dummy map index
+    register_cube(0,0,0);
+    rapid_proto_ngenic_rng( res_, baseseed_, *this );
+    }
+     
+  */
+
+  if (zeromean)
+  {
+    mean = 0.0;
+
+#pragma omp parallel for reduction(+ \
+                                   : mean)
+    for (int i = 0; i < (int)res_; ++i)
+      for (unsigned j = 0; j < res_; ++j)
+        for (unsigned k = 0; k < res_; ++k)
+          mean += (*this)(i, j, k);
+
+    mean *= 1.0 / (double)(res_l * res_l * res_l);
+
+#pragma omp parallel for
+    for (int i = 0; i < (int)res_; ++i)
+      for (unsigned j = 0; j < res_; ++j)
+        for (unsigned k = 0; k < res_; ++k)
+          (*this)(i, j, k) = (*this)(i, j, k) - mean;
+  }
+}
+
+template <typename T>
+music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string randfname, bool randsign)
+    : res_(res), cubesize_(res), ncubes_(1)
+{
+  rnums_.push_back(new Meshvar<T>(res, 0, 0, 0));
+  cubemap_[0] = 0; // create dummy map index
+
+  std::ifstream ifs(randfname.c_str(), std::ios::binary);
+  if (!ifs)
+  {
+    csoca::elog.Print("Could not open random number file \'%s\'!", randfname.c_str());
+    throw std::runtime_error(std::string("Could not open random number file \'") + randfname + std::string("\'!"));
+  }
+
+  unsigned vartype;
+  unsigned nx, ny, nz, blksz32;
+  size_t blksz64;
+  int iseed;
+  //long seed;
+
+  float sign4 = -1.0f;
+  double sign8 = -1.0;
+
+  int addrtype = 32;
+
+  if (randsign) // use grafic2 sign convention
+  {
+    sign4 = 1.0f;
+    sign8 = 1.0;
+  }
+
+  //... read header and check if 32bit or 64bit block size .../
+  ifs.read(reinterpret_cast<char *>(&blksz32), sizeof(int));
+  ifs.read(reinterpret_cast<char *>(&nx), sizeof(unsigned));
+  if (blksz32 != 4 * sizeof(int) || nx != res_)
+  {
+    addrtype = 64;
+
+    ifs.seekg(0);
+    ifs.read(reinterpret_cast<char *>(&blksz64), sizeof(size_t));
+    ifs.read(reinterpret_cast<char *>(&nx), sizeof(unsigned));
+
+    if (blksz64 != 4 * sizeof(int) || nx != res_)
+      addrtype = -1;
+  }
+  ifs.seekg(0);
+
+  if (addrtype < 0)
+    throw std::runtime_error("corrupt random number file");
+
+  if (addrtype == 32)
+    ifs.read(reinterpret_cast<char *>(&blksz32), sizeof(int));
+  else
+    ifs.read(reinterpret_cast<char *>(&blksz64), sizeof(size_t));
+
+  ifs.read(reinterpret_cast<char *>(&nx), sizeof(unsigned));
+  ifs.read(reinterpret_cast<char *>(&ny), sizeof(unsigned));
+  ifs.read(reinterpret_cast<char *>(&nz), sizeof(unsigned));
+  ifs.read(reinterpret_cast<char *>(&iseed), sizeof(int));
+  //seed = (long)iseed;
+
+  if (nx != res_ || ny != res_ || nz != res_)
+  {
+    char errmsg[128];
+    sprintf(errmsg, "White noise file dimensions do not match level dimensions: %ux%ux%u vs. %u**3", nx, ny, nz, res_);
+    throw std::runtime_error(errmsg);
+  }
+
+  if (addrtype == 32)
+    ifs.read(reinterpret_cast<char *>(&blksz32), sizeof(int));
+  else
+    ifs.read(reinterpret_cast<char *>(&blksz64), sizeof(size_t));
+
+  //... read data ...//
+  //check whether random numbers are single or double precision numbers
+  if (addrtype == 32)
+  {
+    ifs.read(reinterpret_cast<char *>(&blksz32), sizeof(int));
+    if (blksz32 == nx * ny * sizeof(float))
+      vartype = 4;
+    else if (blksz32 == nx * ny * sizeof(double))
+      vartype = 8;
+    else
+      throw std::runtime_error("corrupt random number file");
+  }
+  else
+  {
+
+    ifs.read(reinterpret_cast<char *>(&blksz64), sizeof(size_t));
+    if (blksz64 == nx * ny * sizeof(float))
+      vartype = 4;
+    else if (blksz64 == nx * ny * sizeof(double))
+      vartype = 8;
+    else
+      throw std::runtime_error("corrupt random number file");
+  }
+
+  //rewind to beginning of block
+  if (addrtype == 32)
+    ifs.seekg(-sizeof(int), std::ios::cur);
+  else
+    ifs.seekg(-sizeof(size_t), std::ios::cur);
+
+  std::vector<float> in_float;
+  std::vector<double> in_double;
+
+  csoca::ilog.Print("Random number file \'%s\'\n   contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz);
+
+  long double sum = 0.0, sum2 = 0.0;
+  size_t count = 0;
+
+  //perform actual reading
+  if (vartype == 4)
+  {
+    for (int ii = 0; ii < (int)nz; ++ii)
+    {
+
+      if (addrtype == 32)
+      {
+        ifs.read(reinterpret_cast<char *>(&blksz32), sizeof(int));
+        if (blksz32 != nx * ny * sizeof(float))
+          throw std::runtime_error("corrupt random number file");
+      }
+      else
+      {
+        ifs.read(reinterpret_cast<char *>(&blksz64), sizeof(size_t));
+        if (blksz64 != nx * ny * sizeof(float))
+          throw std::runtime_error("corrupt random number file");
+      }
+
+      in_float.assign(nx * ny, 0.0f);
+      ifs.read((char *)&in_float[0], nx * ny * sizeof(float));
+
+      for (int jj = 0, q = 0; jj < (int)ny; ++jj)
+        for (int kk = 0; kk < (int)nx; ++kk)
+        {
+          sum += in_float[q];
+          sum2 += in_float[q] * in_float[q];
+          ++count;
+
+          (*rnums_[0])(kk, jj, ii) = sign4 * in_float[q++];
+        }
+
+      if (addrtype == 32)
+      {
+        ifs.read(reinterpret_cast<char *>(&blksz32), sizeof(int));
+        if (blksz32 != nx * ny * sizeof(float))
+          throw std::runtime_error("corrupt random number file");
+      }
+      else
+      {
+        ifs.read(reinterpret_cast<char *>(&blksz64), sizeof(size_t));
+        if (blksz64 != nx * ny * sizeof(float))
+          throw std::runtime_error("corrupt random number file");
+      }
+    }
+  }
+  else if (vartype == 8)
+  {
+    for (int ii = 0; ii < (int)nz; ++ii)
+    {
+      if (addrtype == 32)
+      {
+        ifs.read(reinterpret_cast<char *>(&blksz32), sizeof(int));
+        if (blksz32 != nx * ny * sizeof(double))
+          throw std::runtime_error("corrupt random number file");
+      }
+      else
+      {
+        ifs.read(reinterpret_cast<char *>(&blksz64), sizeof(size_t));
+        if (blksz64 != nx * ny * sizeof(double))
+          throw std::runtime_error("corrupt random number file");
+      }
+
+      in_double.assign(nx * ny, 0.0f);
+      ifs.read((char *)&in_double[0], nx * ny * sizeof(double));
+
+      for (int jj = 0, q = 0; jj < (int)ny; ++jj)
+        for (int kk = 0; kk < (int)nx; ++kk)
+        {
+          sum += in_double[q];
+          sum2 += in_double[q] * in_double[q];
+          ++count;
+          (*rnums_[0])(kk, jj, ii) = sign8 * in_double[q++];
+        }
+
+      if (addrtype == 32)
+      {
+        ifs.read(reinterpret_cast<char *>(&blksz32), sizeof(int));
+        if (blksz32 != nx * ny * sizeof(double))
+          throw std::runtime_error("corrupt random number file");
+      }
+      else
+      {
+        ifs.read(reinterpret_cast<char *>(&blksz64), sizeof(size_t));
+        if (blksz64 != nx * ny * sizeof(double))
+          throw std::runtime_error("corrupt random number file");
+      }
+    }
+  }
+
+  double mean, var;
+  mean = sum / count;
+  var = sum2 / count - mean * mean;
+
+  csoca::ilog.Print("Random numbers in file have \n     mean = %f and var = %f", mean, var);
+}
+
+//... copy construct by averaging down
+template <typename T>
+music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generator<T> &rc, bool kdegrade)
+{
+  //if( res > rc.m_res || (res/rc.m_res)%2 != 0 )
+  //			throw std::runtime_error("Invalid restriction in random number container copy constructor.");
+
+  long double sum = 0.0, sum2 = 0.0;
+  size_t count = 0;
+
+  csoca::ilog.Print("Generating a coarse white noise field by k-space degrading");
+  //... initialize properties of container
+  res_ = rc.res_ / 2;
+  cubesize_ = res_;
+  ncubes_ = 1;
+  baseseed_ = -2;
+
+  if (sizeof(real_t) != sizeof(T))
+  {
+    csoca::elog.Print("type mismatch with real_t in k-space averaging");
+    throw std::runtime_error("type mismatch with real_t in k-space averaging");
+  }
+
+  real_t
+      *rfine = new real_t[(size_t)rc.res_ * (size_t)rc.res_ * 2 * ((size_t)rc.res_ / 2 + 1)],
+      *rcoarse = new real_t[(size_t)res_ * (size_t)res_ * 2 * ((size_t)res_ / 2 + 1)];
+
+  fftw_complex
+      *ccoarse = reinterpret_cast<fftw_complex *>(rcoarse),
+      *cfine = reinterpret_cast<fftw_complex *>(rfine);
+
+  int nx(rc.res_), ny(rc.res_), nz(rc.res_), nxc(res_), nyc(res_), nzc(res_);
+
+#ifdef USE_SINGLEPRECISION
+  fftwf_plan
+      pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
+      ipc = fftwf_plan_dft_c2r_3d(nxc, nyc, nzc, ccoarse, rcoarse, FFTW_ESTIMATE);
+#else
+  fftw_plan
+      pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
+      ipc = fftw_plan_dft_c2r_3d(nxc, nyc, nzc, ccoarse, rcoarse, FFTW_ESTIMATE);
+#endif
+
+#pragma omp parallel for
+  for (int i = 0; i < nx; i++)
+    for (int j = 0; j < ny; j++)
+      for (int k = 0; k < nz; k++)
+      {
+        size_t q = ((size_t)i * ny + (size_t)j) * (nz + 2) + (size_t)k;
+        rfine[q] = rc(i, j, k);
+      }
+
+  FFTW_API(execute)(pf);
+
+  double fftnorm = 1.0 / ((double)nxc * (double)nyc * (double)nzc);
+
+#pragma omp parallel for
+  for (int i = 0; i < nxc; i++)
+    for (int j = 0; j < nyc; j++)
+      for (int k = 0; k < nzc / 2 + 1; k++)
+      {
+        int ii(i), jj(j), kk(k);
+
+        if (i > nxc / 2)
+          ii += nx / 2;
+        if (j > nyc / 2)
+          jj += ny / 2;
+
+        size_t qc, qf;
+
+        double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc);
+        double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc);
+        double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc);
+
+        qc = ((size_t)i * nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k;
+        qf = ((size_t)ii * ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk;
+
+        std::complex<double> val_fine(cfine[qf][0],cfine[qf][1]);
+        double phase = (kx / nxc + ky / nyc + kz / nzc) * 0.5 * M_PI;
+        std::complex<double> val_phas(cos(phase), sin(phase));
+
+        val_fine *= val_phas * fftnorm / sqrt(8.0);
+
+        ccoarse[qc][0] = val_fine.real();
+        ccoarse[qc][1] = val_fine.imag();
+      }
+
+  delete[] rfine;
+
+  FFTW_API(execute)(ipc);
+
+  rnums_.push_back(new Meshvar<T>(res_, 0, 0, 0));
+  cubemap_[0] = 0; // map all to single array
+
+#pragma omp parallel for reduction(+ \
+                                  : sum, sum2, count)
+  for (int i = 0; i < nxc; i++)
+    for (int j = 0; j < nyc; j++)
+      for (int k = 0; k < nzc; k++)
+      {
+        size_t q = ((size_t)i * nyc + (size_t)j) * (nzc + 2) + (size_t)k;
+        (*rnums_[0])(i, j, k) = rcoarse[q];
+        sum += (*rnums_[0])(i, j, k);
+        sum2 += (*rnums_[0])(i, j, k) * (*rnums_[0])(i, j, k);
+        ++count;
+      }
+
+  delete[] rcoarse;
+
+  FFTW_API(destroy_plan)(pf);
+  FFTW_API(destroy_plan)(ipc);
+
+
+  double rmean, rvar;
+  rmean = sum / count;
+  rvar = sum2 / count - rmean * rmean;
+
+  csoca::ilog.Print("Restricted random numbers have\n       mean = %f, var = %f", rmean, rvar);
+}
+
+template <typename T>
+music_wnoise_generator<T>::music_wnoise_generator(music_wnoise_generator<T> &rc, unsigned cubesize, long baseseed,
+                                                  bool kspace, bool isolated, int *x0_, int *lx_, bool zeromean)
+    : res_(2 * rc.res_), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed)
+{
+  initialize();
+
+  int x0[3], lx[3];
+  if (x0_ == NULL || lx_ == NULL)
+  {
+    for (int i = 0; i < 3; ++i)
+    {
+      x0[i] = 0;
+      lx[i] = res_;
+    }
+    fill_all();
+  }
+  else
+  {
+    for (int i = 0; i < 3; ++i)
+    {
+      x0[i] = x0_[i];
+      lx[i] = lx_[i];
+    }
+    fill_subvolume(x0, lx);
+  }
+
+  if (kspace)
+  {
+
+    csoca::ilog.Print("Generating a constrained random number set with seed %ld\n    using coarse mode replacement...", baseseed);
+    assert(lx[0] % 2 == 0 && lx[1] % 2 == 0 && lx[2] % 2 == 0);
+    size_t nx = lx[0], ny = lx[1], nz = lx[2],
+           nxc = lx[0] / 2, nyc = lx[1] / 2, nzc = lx[2] / 2;
+
+    real_t *rfine = new real_t[nx * ny * (nz + 2l)];
+    fftw_complex *cfine = reinterpret_cast<fftw_complex *>(rfine);
+
+
+#ifdef USE_SINGLEPRECISION
+    fftwf_plan
+        pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
+        ipf = fftwf_plan_dft_c2r_3d(nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
+#else
+    fftw_plan
+        pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE),
+        ipf = fftw_plan_dft_c2r_3d(nx, ny, nz, cfine, rfine, FFTW_ESTIMATE);
+#endif
+
+
+#pragma omp parallel for
+    for (int i = 0; i < (int)nx; i++)
+      for (int j = 0; j < (int)ny; j++)
+        for (int k = 0; k < (int)nz; k++)
+        {
+          size_t q = ((size_t)i * (size_t)ny + (size_t)j) * (size_t)(nz + 2) + (size_t)k;
+          rfine[q] = (*this)(x0[0] + i, x0[1] + j, x0[2] + k);
+        }
+    //this->free_all_mem();	// temporarily free memory, allocate again later
+
+    real_t *rcoarse = new real_t[nxc * nyc * (nzc + 2)];
+    fftw_complex *ccoarse = reinterpret_cast<fftw_complex *>(rcoarse);
+
+
+#ifdef USE_SINGLEPRECISION
+    fftwf_plan pc = fftwf_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE);
+#else
+    fftw_plan pc = fftw_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE);
+#endif
+
+#pragma omp parallel for
+    for (int i = 0; i < (int)nxc; i++)
+      for (int j = 0; j < (int)nyc; j++)
+        for (int k = 0; k < (int)nzc; k++)
+        {
+          size_t q = ((size_t)i * (size_t)nyc + (size_t)j) * (size_t)(nzc + 2) + (size_t)k;
+          rcoarse[q] = rc(x0[0] / 2 + i, x0[1] / 2 + j, x0[2] / 2 + k);
+        }
+
+    FFTW_API(execute)(pc);
+    FFTW_API(execute)(pf);
+
+    double fftnorm = 1.0 / ((double)nx * (double)ny * (double)nz);
+    double sqrt8 = sqrt(8.0);
+    double phasefac = -0.5; //-1.0;//-0.125;
+
+    //if( isolated ) phasefac *= 1.5;
+
+    // embedding of coarse white noise by fourier interpolation
+
+#pragma omp parallel for
+    for (int i = 0; i < (int)nxc; i++)
+      for (int j = 0; j < (int)nyc; j++)
+        for (int k = 0; k < (int)nzc / 2 + 1; k++)
+        {
+          int ii(i), jj(j), kk(k);
+
+          //if( i==(int)nxc/2 ) continue;
+          //if( j==(int)nyc/2 ) continue;
+
+          if (i > (int)nxc / 2)
+            ii += (int)nx / 2;
+          if (j > (int)nyc / 2)
+            jj += (int)ny / 2;
+
+          size_t qc, qf;
+
+          double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc);
+          double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc);
+          double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc);
+
+          qc = ((size_t)i * nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k;
+          qf = ((size_t)ii * ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk;
+
+          std::complex<double> val(ccoarse[qc][0], ccoarse[qc][1]);
+          double phase = (kx / nxc + ky / nyc + kz / nzc) * phasefac * M_PI;
+
+          std::complex<double> val_phas(cos(phase), sin(phase));
+
+          val *= val_phas * sqrt8;
+
+          if (i != (int)nxc / 2 && j != (int)nyc / 2 && k != (int)nzc / 2)
+          {
+            cfine[qf][0] = val.real();
+            cfine[qf][1] = val.imag();
+          }
+          else
+          {
+            //RE(cfine[qf]) = val.real();
+            //IM(cfine[qf]) = 0.0;
+          }
+        }
+
+    delete[] rcoarse;
+
+#pragma omp parallel for
+    for (int i = 0; i < (int)nx; i++)
+      for (int j = 0; j < (int)ny; j++)
+        for (int k = 0; k < (int)nz / 2 + 1; k++)
+        {
+          size_t q = ((size_t)i * ny + (size_t)j) * (nz / 2 + 1) + (size_t)k;
+
+          cfine[q][0] *= fftnorm;
+          cfine[q][1] *= fftnorm;
+        }
+
+    FFTW_API(execute)(ipf);
+
+#pragma omp parallel for
+    for (int i = 0; i < (int)nx; i++)
+      for (int j = 0; j < (int)ny; j++)
+        for (int k = 0; k < (int)nz; k++)
+        {
+          size_t q = ((size_t)i * ny + (size_t)j) * (nz + 2) + (size_t)k;
+          (*this)(x0[0] + i, x0[1] + j, x0[2] + k, false) = rfine[q];
+        }
+
+    delete[] rfine;
+
+    FFTW_API(destroy_plan)(pf);
+    FFTW_API(destroy_plan)(pc);
+    FFTW_API(destroy_plan)(ipf);
+  }
+  else
+  {
+    csoca::ilog.Print("Generating a constrained random number set with seed %ld\n    using Hoffman-Ribak constraints...", baseseed);
+
+    double fac = 1.0 / sqrt(8.0); //1./sqrt(8.0);
+
+    for (int i = x0[0], ii = x0[0] / 2; i < x0[0] + lx[0]; i += 2, ++ii)
+      for (int j = x0[1], jj = x0[1] / 2; j < x0[1] + lx[1]; j += 2, ++jj)
+        for (int k = x0[2], kk = x0[2] / 2; k < x0[2] + lx[2]; k += 2, ++kk)
+        {
+          double topval = rc(ii, jj, kk);
+          double locmean = 0.125 * ((*this)(i, j, k) + (*this)(i + 1, j, k) + (*this)(i, j + 1, k) + (*this)(i, j, k + 1) +
+                                    (*this)(i + 1, j + 1, k) + (*this)(i + 1, j, k + 1) + (*this)(i, j + 1, k + 1) + (*this)(i + 1, j + 1, k + 1));
+          double dif = fac * topval - locmean;
+
+          (*this)(i, j, k) += dif;
+          (*this)(i + 1, j, k) += dif;
+          (*this)(i, j + 1, k) += dif;
+          (*this)(i, j, k + 1) += dif;
+          (*this)(i + 1, j + 1, k) += dif;
+          (*this)(i + 1, j, k + 1) += dif;
+          (*this)(i, j + 1, k + 1) += dif;
+          (*this)(i + 1, j + 1, k + 1) += dif;
+        }
+  }
+}
+
+template <typename T>
+void music_wnoise_generator<T>::register_cube(int i, int j, int k)
+{
+  i = (i + ncubes_) % ncubes_;
+  j = (j + ncubes_) % ncubes_;
+  k = (k + ncubes_) % ncubes_;
+  size_t icube = ((size_t)i * ncubes_ + (size_t)j) * ncubes_ + (size_t)k;
+
+  cubemap_iterator it = cubemap_.find(icube);
+
+  if (it == cubemap_.end())
+  {
+    rnums_.push_back(NULL);
+    cubemap_[icube] = rnums_.size() - 1;
+#ifdef DEBUG
+    LOGDEBUG("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]);
+#endif
+  }
+}
+
+template <typename T>
+double music_wnoise_generator<T>::fill_cube(int i, int j, int k)
+{
+
+  gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937);
+
+  i = (i + ncubes_) % ncubes_;
+  j = (j + ncubes_) % ncubes_;
+  k = (k + ncubes_) % ncubes_;
+
+  size_t icube = ((size_t)i * ncubes_ + (size_t)j) * ncubes_ + (size_t)k;
+  long cubeseed = baseseed_ + icube; //... each cube gets its unique seed
+
+  gsl_rng_set(RNG, cubeseed);
+
+  cubemap_iterator it = cubemap_.find(icube);
+
+  if (it == cubemap_.end())
+  {
+    csoca::elog.Print("Attempt to access non-registered random number cube!");
+    throw std::runtime_error("Attempt to access non-registered random number cube!");
+  }
+
+  size_t cubeidx = it->second;
+
+  if (rnums_[cubeidx] == NULL)
+    rnums_[cubeidx] = new Meshvar<T>(cubesize_, 0, 0, 0);
+
+  double mean = 0.0;
+
+  for (int ii = 0; ii < (int)cubesize_; ++ii)
+    for (int jj = 0; jj < (int)cubesize_; ++jj)
+      for (int kk = 0; kk < (int)cubesize_; ++kk)
+      {
+        (*rnums_[cubeidx])(ii, jj, kk) = gsl_ran_ugaussian_ratio_method(RNG);
+        mean += (*rnums_[cubeidx])(ii, jj, kk);
+      }
+
+  gsl_rng_free(RNG);
+
+  return mean / (cubesize_ * cubesize_ * cubesize_);
+}
+
+template <typename T>
+void music_wnoise_generator<T>::subtract_from_cube(int i, int j, int k, double val)
+{
+  i = (i + ncubes_) % ncubes_;
+  j = (j + ncubes_) % ncubes_;
+  k = (k + ncubes_) % ncubes_;
+
+  size_t icube = ((size_t)i * ncubes_ + (size_t)j) * ncubes_ + (size_t)k;
+
+  cubemap_iterator it = cubemap_.find(icube);
+
+  if (it == cubemap_.end())
+  {
+    csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k);
+    throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::subtract_from_cube");
+  }
+
+  size_t cubeidx = it->second;
+
+  for (int ii = 0; ii < (int)cubesize_; ++ii)
+    for (int jj = 0; jj < (int)cubesize_; ++jj)
+      for (int kk = 0; kk < (int)cubesize_; ++kk)
+        (*rnums_[cubeidx])(ii, jj, kk) -= val;
+}
+
+template <typename T>
+void music_wnoise_generator<T>::free_cube(int i, int j, int k)
+{
+
+  i = (i + ncubes_) % ncubes_;
+  j = (j + ncubes_) % ncubes_;
+  k = (k + ncubes_) % ncubes_;
+
+  size_t icube = ((size_t)i * (size_t)ncubes_ + (size_t)j) * (size_t)ncubes_ + (size_t)k;
+
+  cubemap_iterator it = cubemap_.find(icube);
+
+  if (it == cubemap_.end())
+  {
+    csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k);
+    throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::free_cube");
+  }
+
+  size_t cubeidx = it->second;
+
+  if (rnums_[cubeidx] != NULL)
+  {
+    delete rnums_[cubeidx];
+    rnums_[cubeidx] = NULL;
+  }
+}
+
+template <typename T>
+void music_wnoise_generator<T>::initialize(void)
+{
+
+  ncubes_ = std::max((int)((double)res_ / cubesize_), 1);
+  if (res_ < cubesize_)
+  {
+    ncubes_ = 1;
+    cubesize_ = res_;
+  }
+
+  csoca::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_);
+}
+
+template <typename T>
+double music_wnoise_generator<T>::fill_subvolume(int *i0, int *n)
+{
+  int i0cube[3], ncube[3];
+
+  i0cube[0] = (int)((double)(res_ + i0[0]) / cubesize_);
+  i0cube[1] = (int)((double)(res_ + i0[1]) / cubesize_);
+  i0cube[2] = (int)((double)(res_ + i0[2]) / cubesize_);
+
+  ncube[0] = (int)(n[0] / cubesize_) + 2;
+  ncube[1] = (int)(n[1] / cubesize_) + 2;
+  ncube[2] = (int)(n[2] / cubesize_) + 2;
+
+#ifdef DEBUG
+  LOGDEBUG("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]);
+  LOGDEBUG("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]);
+#endif
+
+  double mean = 0.0;
+
+  for (int i = i0cube[0]; i < i0cube[0] + ncube[0]; ++i)
+    for (int j = i0cube[1]; j < i0cube[1] + ncube[1]; ++j)
+      for (int k = i0cube[2]; k < i0cube[2] + ncube[2]; ++k)
+      {
+        int ii(i), jj(j), kk(k);
+
+        ii = (ii + ncubes_) % ncubes_;
+        jj = (jj + ncubes_) % ncubes_;
+        kk = (kk + ncubes_) % ncubes_;
+
+        register_cube(ii, jj, kk);
+      }
+
+#pragma omp parallel for reduction(+ \
+                                   : mean)
+  for (int i = i0cube[0]; i < i0cube[0] + ncube[0]; ++i)
+    for (int j = i0cube[1]; j < i0cube[1] + ncube[1]; ++j)
+      for (int k = i0cube[2]; k < i0cube[2] + ncube[2]; ++k)
+      {
+        int ii(i), jj(j), kk(k);
+
+        ii = (ii + ncubes_) % ncubes_;
+        jj = (jj + ncubes_) % ncubes_;
+        kk = (kk + ncubes_) % ncubes_;
+
+        mean += fill_cube(ii, jj, kk);
+      }
+  return mean / (ncube[0] * ncube[1] * ncube[2]);
+}
+
+template <typename T>
+double music_wnoise_generator<T>::fill_all(void)
+{
+  double sum = 0.0;
+
+  for (int i = 0; i < (int)ncubes_; ++i)
+    for (int j = 0; j < (int)ncubes_; ++j)
+      for (int k = 0; k < (int)ncubes_; ++k)
+      {
+        int ii(i), jj(j), kk(k);
+
+        ii = (ii + ncubes_) % ncubes_;
+        jj = (jj + ncubes_) % ncubes_;
+        kk = (kk + ncubes_) % ncubes_;
+
+        register_cube(ii, jj, kk);
+      }
+
+#pragma omp parallel for reduction(+ \
+                                   : sum)
+  for (int i = 0; i < (int)ncubes_; ++i)
+    for (int j = 0; j < (int)ncubes_; ++j)
+      for (int k = 0; k < (int)ncubes_; ++k)
+      {
+        int ii(i), jj(j), kk(k);
+
+        ii = (ii + ncubes_) % ncubes_;
+        jj = (jj + ncubes_) % ncubes_;
+        kk = (kk + ncubes_) % ncubes_;
+
+        sum += fill_cube(ii, jj, kk);
+      }
+
+//... subtract mean
+#pragma omp parallel for reduction(+ \
+                                   : sum)
+  for (int i = 0; i < (int)ncubes_; ++i)
+    for (int j = 0; j < (int)ncubes_; ++j)
+      for (int k = 0; k < (int)ncubes_; ++k)
+      {
+        int ii(i), jj(j), kk(k);
+
+        ii = (ii + ncubes_) % ncubes_;
+        jj = (jj + ncubes_) % ncubes_;
+        kk = (kk + ncubes_) % ncubes_;
+        subtract_from_cube(ii, jj, kk, sum / (ncubes_ * ncubes_ * ncubes_));
+      }
+
+  return sum / (ncubes_ * ncubes_ * ncubes_);
+}
+
+template <typename T>
+void music_wnoise_generator<T>::print_allocated(void)
+{
+  unsigned ncount = 0, ntot = rnums_.size();
+  for (size_t i = 0; i < rnums_.size(); ++i)
+    if (rnums_[i] != NULL)
+      ncount++;
+
+  csoca::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot);
+}
+
+template class music_wnoise_generator<float>;
+template class music_wnoise_generator<double>;
diff --git a/src/plugins/random_music_wnoise_generator.hh b/src/plugins/random_music_wnoise_generator.hh
new file mode 100644
index 0000000..5b9cb36
--- /dev/null
+++ b/src/plugins/random_music_wnoise_generator.hh
@@ -0,0 +1,219 @@
+#pragma once
+
+#include <vector>
+#include <map>
+#include <general.hh>
+//#include "mesh.hh"
+
+#define DEF_RAN_CUBE_SIZE 32
+
+/*!
+ * @brief encapsulates all things random number generator related
+ */
+template <typename T>
+class music_wnoise_generator
+{
+public:
+  unsigned
+      res_,       //!< resolution of the full mesh
+      cubesize_,  //!< size of one independent random number cube
+      ncubes_;    //!< number of random number cubes to cover the full mesh
+  long baseseed_; //!< base seed from which cube seeds are computed
+
+protected:
+  template <typename data_t>
+  struct Meshvar
+  {
+    std::vector<data_t> data_;
+    size_t n_;
+    Meshvar(size_t n, ptrdiff_t ox, ptrdiff_t oy, ptrdiff_t oz)
+        : n_(n)
+    {
+      data_.assign(n * n * n, (data_t)0);
+    }
+
+    T &operator()(size_t i, size_t j, size_t k)
+    {
+      return data_[(i * n_ + j) * n_ + k];
+    }
+
+    const T &operator()(size_t i, size_t j, size_t k) const
+    {
+      return data_[(i * n_ + j) * n_ + k];
+    }
+  };
+
+  //! vector of 3D meshes (the random number cubes) with random numbers
+  std::vector<Meshvar<T> *> rnums_;
+
+  //! map of 3D indices to cube index
+  std::map<size_t, size_t> cubemap_;
+
+  typedef std::map<size_t, size_t>::iterator cubemap_iterator;
+
+protected:
+  //! register a cube with the hash map
+  void register_cube(int i, int j, int k);
+
+  //! fills a subcube with random numbers
+  double fill_cube(int i, int j, int k);
+
+  //! subtract a constant from an entire cube
+  void subtract_from_cube(int i, int j, int k, double val);
+
+  //! copy random numbers from a cube to a full grid array
+  template <class C>
+  void copy_cube(int i, int j, int k, C &dat)
+  {
+    int offi, offj, offk;
+
+    offi = i * cubesize_;
+    offj = j * cubesize_;
+    offk = k * cubesize_;
+
+    i = (i + ncubes_) % ncubes_;
+    j = (j + ncubes_) % ncubes_;
+    k = (k + ncubes_) % ncubes_;
+
+    size_t icube = (i * ncubes_ + j) * ncubes_ + k;
+    cubemap_iterator it = cubemap_.find(icube);
+
+    if (it == cubemap_.end())
+    {
+      csoca::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k);
+      throw std::runtime_error("attempting to copy data from non-existing RND cube");
+    }
+
+    size_t cubeidx = it->second;
+
+    for (int ii = 0; ii < (int)cubesize_; ++ii)
+      for (int jj = 0; jj < (int)cubesize_; ++jj)
+        for (int kk = 0; kk < (int)cubesize_; ++kk)
+          dat(offi + ii, offj + jj, offk + kk) = (*rnums_[cubeidx])(ii, jj, kk);
+  }
+
+  //! free the memory associated with a subcube
+  void free_cube(int i, int j, int k);
+
+  //! initialize member variables and allocate memory
+  void initialize(void);
+
+  //! fill a cubic subvolume of the full grid with random numbers
+  double fill_subvolume(int *i0, int *n);
+
+  //! fill an entire grid with random numbers
+  double fill_all(void);
+
+  //! fill an external array instead of the internal field
+  template <class C>
+  double fill_all(C &dat)
+  {
+    double sum = 0.0;
+
+    for (int i = 0; i < (int)ncubes_; ++i)
+      for (int j = 0; j < (int)ncubes_; ++j)
+        for (int k = 0; k < (int)ncubes_; ++k)
+        {
+          int ii(i), jj(j), kk(k);
+          register_cube(ii, jj, kk);
+        }
+
+#pragma omp parallel for reduction(+ \
+                                   : sum)
+    for (int i = 0; i < (int)ncubes_; ++i)
+      for (int j = 0; j < (int)ncubes_; ++j)
+        for (int k = 0; k < (int)ncubes_; ++k)
+        {
+          int ii(i), jj(j), kk(k);
+
+          ii = (ii + ncubes_) % ncubes_;
+          jj = (jj + ncubes_) % ncubes_;
+          kk = (kk + ncubes_) % ncubes_;
+
+          sum += fill_cube(ii, jj, kk);
+          copy_cube(ii, jj, kk, dat);
+          free_cube(ii, jj, kk);
+        }
+
+    return sum / (ncubes_ * ncubes_ * ncubes_);
+  }
+
+  //! write the number of allocated random number cubes to stdout
+  void print_allocated(void);
+
+public:
+  //! constructor
+  music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx);
+
+  //! constructor for constrained fine field
+  music_wnoise_generator(music_wnoise_generator<T> &rc, unsigned cubesize, long baseseed,
+                         bool kspace = false, bool isolated = false, int *x0_ = NULL, int *lx_ = NULL, bool zeromean = true);
+
+  //! constructor
+  music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, bool zeromean = true);
+
+  //! constructor to read white noise from file
+  music_wnoise_generator(unsigned res, std::string randfname, bool rndsign);
+
+  //! copy constructor for averaged field (not copying) hence explicit!
+  explicit music_wnoise_generator(/*const*/ music_wnoise_generator<T> &rc, bool kdegrade = true);
+
+  //! destructor
+  ~music_wnoise_generator()
+  {
+    for (unsigned i = 0; i < rnums_.size(); ++i)
+      if (rnums_[i] != NULL)
+        delete rnums_[i];
+    rnums_.clear();
+  }
+
+  //! access a random number, this allocates a cube and fills it with consistent random numbers
+  inline T &operator()(int i, int j, int k, bool fillrand = true)
+  {
+    int ic, jc, kc, is, js, ks;
+
+    if (ncubes_ == 0)
+      throw std::runtime_error("music_wnoise_generator: internal error, not properly initialized");
+
+    //... determine cube
+    ic = (int)((double)i / cubesize_ + ncubes_) % ncubes_;
+    jc = (int)((double)j / cubesize_ + ncubes_) % ncubes_;
+    kc = (int)((double)k / cubesize_ + ncubes_) % ncubes_;
+
+    size_t icube = ((size_t)ic * ncubes_ + (size_t)jc) * ncubes_ + (size_t)kc;
+
+    cubemap_iterator it = cubemap_.find(icube);
+
+    if (it == cubemap_.end())
+    {
+      csoca::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k);
+      throw std::runtime_error("attempting to copy data from non-existing RND cube");
+    }
+
+    size_t cubeidx = it->second;
+
+    if (rnums_[cubeidx] == NULL)
+    {
+      csoca::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc);
+      throw std::runtime_error("attempting to access data from non-allocated RND cube");
+    }
+
+    //... determine cell in cube
+    is = (i - ic * cubesize_ + cubesize_) % cubesize_;
+    js = (j - jc * cubesize_ + cubesize_) % cubesize_;
+    ks = (k - kc * cubesize_ + cubesize_) % cubesize_;
+
+    return (*rnums_[cubeidx])(is, js, ks);
+  }
+
+  //! free all cubes
+  void free_all_mem(void)
+  {
+    for (unsigned i = 0; i < rnums_.size(); ++i)
+      if (rnums_[i] != NULL)
+      {
+        delete rnums_[i];
+        rnums_[i] = NULL;
+      }
+  }
+};
diff --git a/src/plugins/transfer_eisenstein.cc b/src/plugins/transfer_eisenstein.cc
new file mode 100644
index 0000000..1868e4a
--- /dev/null
+++ b/src/plugins/transfer_eisenstein.cc
@@ -0,0 +1,392 @@
+/*
+ 
+ transfer_eisenstein.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ */
+
+#include "transfer_function_plugin.hh"
+
+// forward declaration of WDM class
+class transfer_eisenstein_wdm_plugin;
+
+
+struct eisenstein_transfer
+{
+  //Cosmology m_Cosmology;
+  double  m_h0;
+  double	omhh,		/* Omega_matter*h^2 */
+  obhh,		/* Omega_baryon*h^2 */
+  theta_cmb,	/* Tcmb in units of 2.7 K */
+  z_equality,	/* Redshift of matter-radiation equality, really 1+z */
+  k_equality,	/* Scale of equality, in Mpc^-1 */
+  z_drag,		/* Redshift of drag epoch */
+  R_drag,		/* Photon-baryon ratio at drag epoch */
+  R_equality,	/* Photon-baryon ratio at equality epoch */
+  sound_horizon,	/* Sound horizon at drag epoch, in Mpc */
+  k_silk,		/* Silk damping scale, in Mpc^-1 */
+  alpha_c,	/* CDM suppression */
+  beta_c,		/* CDM log shift */
+  alpha_b,	/* Baryon suppression */
+  beta_b,		/* Baryon envelope shift */
+  beta_node,	/* Sound horizon shift */
+  k_peak,		/* Fit to wavenumber of first peak, in Mpc^-1 */
+  sound_horizon_fit,	/* Fit to sound horizon, in Mpc */
+  alpha_gamma;	/* Gamma suppression in approximate TF */
+
+  template <typename T>
+  inline T SQR(T x) { return x * x; };
+
+  template <typename T>
+  inline T CUBE(T x) { return x * SQR(x); };
+
+  template<typename T>
+  inline T POW4( T x ){ return SQR(SQR(x)); };
+  
+  //! private member function: sets internal quantities for Eisenstein & Hu fitting
+  void TFset_parameters(double omega0hh, double f_baryon, double Tcmb)
+  /* Set all the scalars quantities for Eisenstein & Hu 1997 fitting formula */
+  /* Input: omega0hh -- The density of CDM and baryons, in units of critical dens,
+   multiplied by the square of the Hubble constant, in units
+   of 100 km/s/Mpc */
+  /* 	  f_baryon -- The fraction of baryons to CDM */
+  /*        Tcmb -- The temperature of the CMB in Kelvin.  Tcmb<=0 forces use
+   of the COBE value of  2.728 K. */
+  /* Output: Nothing, but set many global variables used in TFfit_onek().
+   You can access them yourself, if you want. */
+  /* Note: Units are always Mpc, never h^-1 Mpc. */
+  {
+    double z_drag_b1, z_drag_b2;
+    double alpha_c_a1, alpha_c_a2, beta_c_b1, beta_c_b2, alpha_b_G, y;
+    
+    if (f_baryon<=0.0 || omega0hh<=0.0) {
+      fprintf(stderr, "TFset_parameters(): Illegal input.\n");
+      exit(1);
+    }
+    omhh = omega0hh;
+    obhh = omhh*f_baryon;
+    if (Tcmb<=0.0) Tcmb=2.728;	/* COBE FIRAS */
+    theta_cmb = Tcmb/2.7;
+    
+    z_equality = 2.50e4*omhh/POW4(theta_cmb);  /* Really 1+z */
+    k_equality = 0.0746*omhh/SQR(theta_cmb);
+    
+    z_drag_b1 = 0.313*pow((double)omhh,-0.419)*(1+0.607*pow((double)omhh,0.674));
+    z_drag_b2 = 0.238*pow((double)omhh,0.223);
+    z_drag = 1291*pow(omhh,0.251)/(1+0.659*pow((double)omhh,0.828))*
+    (1+z_drag_b1*pow((double)obhh,(double)z_drag_b2));
+    
+    R_drag = 31.5*obhh/POW4(theta_cmb)*(1000/(1+z_drag));
+    R_equality = 31.5*obhh/POW4(theta_cmb)*(1000/z_equality);
+    
+    sound_horizon = 2./3./k_equality*sqrt(6./R_equality)*
+    log((sqrt(1+R_drag)+sqrt(R_drag+R_equality))/(1+sqrt(R_equality)));
+    
+    k_silk = 1.6*pow((double)obhh,0.52)*pow((double)omhh,0.73)*(1+pow((double)10.4*omhh,-0.95));
+    
+    alpha_c_a1 = pow((double)46.9*omhh,0.670)*(1+pow(32.1*omhh,-0.532));
+    alpha_c_a2 = pow((double)12.0*omhh,0.424)*(1+pow(45.0*omhh,-0.582));
+    alpha_c = pow(alpha_c_a1,-f_baryon)*
+    pow(alpha_c_a2,-CUBE(f_baryon));
+    
+    beta_c_b1 = 0.944/(1+pow(458*omhh,-0.708));
+    beta_c_b2 = pow(0.395*omhh, -0.0266);
+    beta_c = 1.0/(1+beta_c_b1*(pow(1-f_baryon, beta_c_b2)-1));
+    
+    y = z_equality/(1+z_drag);
+    alpha_b_G = y*(-6.*sqrt(1+y)+(2.+3.*y)*log((sqrt(1+y)+1)/(sqrt(1+y)-1)));
+    alpha_b = 2.07*k_equality*sound_horizon*pow(1+R_drag,-0.75)*alpha_b_G;
+    
+    beta_node = 8.41*pow(omhh, 0.435);
+    beta_b = 0.5+f_baryon+(3.-2.*f_baryon)*sqrt(pow(17.2*omhh,2.0)+1);
+    
+    k_peak = 2.5*3.14159*(1+0.217*omhh)/sound_horizon;
+    sound_horizon_fit = 44.5*log(9.83/omhh)/sqrt(1+10.0*pow(obhh,0.75));
+    
+    alpha_gamma = 1-0.328*log(431.0*omhh)*f_baryon + 0.38*log(22.3*omhh)*
+    SQR(f_baryon);
+    
+    return;
+  }
+  
+  //! private member function: computes transfer function for mode k (k in Mpc)
+  inline double TFfit_onek(double k, double *tf_baryon, double *tf_cdm)
+  /* Input: k -- Wavenumber at which to calculate transfer function, in Mpc^-1.
+   *tf_baryon, *tf_cdm -- Input value not used; replaced on output if
+   the input was not NULL. */
+  /* Output: Returns the value of the full transfer function fitting formula.
+   This is the form given in Section 3 of Eisenstein & Hu (1997).
+   *tf_baryon -- The baryonic contribution to the full fit.
+   *tf_cdm -- The CDM contribution to the full fit. */
+  /* Notes: Units are Mpc, not h^-1 Mpc. */
+  {
+    double T_c_ln_beta, T_c_ln_nobeta, T_c_C_alpha, T_c_C_noalpha;
+    double q, xx, xx_tilde;//, q_eff;
+    double T_c_f, T_c, s_tilde, T_b_T0, T_b, f_baryon, T_full;
+    //double T_0_L0, T_0_C0, T_0, gamma_eff;
+    //double T_nowiggles_L0, T_nowiggles_C0, T_nowiggles;
+    
+    k = fabs(k);	/* Just define negative k as positive */
+    if (k==0.0) {
+      if (tf_baryon!=NULL) *tf_baryon = 1.0;
+      if (tf_cdm!=NULL) *tf_cdm = 1.0;
+      return 1.0;
+    }
+    
+    q = k/13.41/k_equality;
+    xx = k*sound_horizon;
+    
+    T_c_ln_beta = log(2.718282+1.8*beta_c*q);
+    T_c_ln_nobeta = log(2.718282+1.8*q);
+    T_c_C_alpha = 14.2/alpha_c + 386.0/(1+69.9*pow(q,1.08));
+    T_c_C_noalpha = 14.2 + 386.0/(1+69.9*pow(q,1.08));
+    
+    T_c_f = 1.0/(1.0+POW4(xx/5.4));
+    T_c = T_c_f*T_c_ln_beta/(T_c_ln_beta+T_c_C_noalpha*SQR(q)) +
+    (1-T_c_f)*T_c_ln_beta/(T_c_ln_beta+T_c_C_alpha*SQR(q));
+    
+    s_tilde = sound_horizon*pow(1.+CUBE(beta_node/xx),-1./3.);
+    xx_tilde = k*s_tilde;
+    
+    T_b_T0 = T_c_ln_nobeta/(T_c_ln_nobeta+T_c_C_noalpha*SQR(q));
+    T_b = sin(xx_tilde)/(xx_tilde)*(T_b_T0/(1.+SQR(xx/5.2))+
+                                    alpha_b/(1.+CUBE(beta_b/xx))*exp(-pow(k/k_silk,1.4)));
+    
+    f_baryon = obhh/omhh;
+    T_full = f_baryon*T_b + (1-f_baryon)*T_c;
+    
+    /* Now to store these transfer functions */
+    if (tf_baryon!=NULL) *tf_baryon = T_b;
+    if (tf_cdm!=NULL) *tf_cdm = T_c;
+    return T_full;
+  }
+  
+  double fb_, fc_;
+  
+  eisenstein_transfer()
+  {  }
+  
+  void set_parameters( double H0, double Omega_m, double Omega_b, double Tcmb )
+  {
+    m_h0 = H0*0.01;
+    TFset_parameters( (Omega_m)*H0*H0*(0.01*0.01),
+                     Omega_b/Omega_m, Tcmb);
+    
+    fb_ = Omega_b/(Omega_m);
+    fc_ = (Omega_m-Omega_b)/(Omega_m) ;
+  }
+  
+  inline double at_k( double k )
+  {
+    double tfb, tfcdm;
+    TFfit_onek( k*m_h0, &tfb, &tfcdm );
+    return fb_*tfb+fc_*tfcdm;
+  }
+};
+
+
+//! Implementation of abstract base class TransferFunction for the Eisenstein & Hu transfer function 
+/*!
+ This class implements the analytical fit to the matter transfer
+ function by Eisenstein & Hu (1999). In fact it is their code.
+ */
+class transfer_eisenstein_plugin : public TransferFunction_plugin
+{
+protected:
+   eisenstein_transfer etf_;
+	
+public:
+	//! Constructor for Eisenstein & Hu fitting for transfer function
+	/*!
+	 \param aCosm structure of type Cosmology carrying the cosmological parameters
+	 \param Tcmb mean temperature of the CMB fluctuations (defaults to
+	 Tcmb = 2.726 if not specified)
+	 */
+	transfer_eisenstein_plugin( ConfigFile &cf )
+    :  TransferFunction_plugin(cf)
+	{
+		double Tcmb    = pcf_->GetValueSafe<double>("cosmology", "Tcmb", 2.726);
+        double H0      = pcf_->GetValue<double>("cosmology", "H0");
+        double Omega_m = pcf_->GetValue<double>("cosmology", "Omega_m");
+        double Omega_b = pcf_->GetValue<double>("cosmology", "Omega_b");
+
+        etf_.set_parameters( H0, Omega_m, Omega_b, Tcmb );
+		
+		tf_distinct_ = false;
+		tf_withvel_  = false;
+	}
+	
+	//! Computes the transfer function for k in Mpc/h by calling TFfit_onek
+	inline double compute( double k, tf_type type ){
+        return etf_.at_k( k );
+	}
+	
+	inline double get_kmin( void ){
+		return 1e-4;
+	}
+	
+	inline double get_kmax( void ){
+		return 1.e4;
+	}
+	
+};
+
+
+#include <map>
+class transfer_eisenstein_wdm_plugin : public TransferFunction_plugin
+{
+protected:
+	real_t m_WDMalpha, m_h0;
+	double omegam_, wdmm_, wdmgx_, wdmnu_, H0_, omegab_;
+    std::string type_;
+    std::map< std::string, int > typemap_;
+  
+    eisenstein_transfer etf_;
+  
+    enum wdmtyp { wdm_bode, wdm_viel, wdm_bode_wrong=99};
+    
+public:
+	transfer_eisenstein_wdm_plugin( ConfigFile &cf )
+	: TransferFunction_plugin(cf)
+	{
+        double Tcmb = pcf_->GetValueSafe("cosmology","Tcmb",2.726);
+        omegam_ = cf.GetValue<double>("cosmology", "Omega_m");
+        omegab_ = cf.GetValue<double>("cosmology", "Omega_b");
+        H0_ = cf.GetValue<double>("cosmology", "H0");
+        m_h0 = H0_ / 100.0;
+        wdmm_ = cf.GetValue<double>("cosmology", "WDMmass");
+
+        etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
+
+        typemap_.insert( std::pair<std::string,int>( "BODE", wdm_bode ) );
+        typemap_.insert( std::pair<std::string,int>( "VIEL", wdm_viel ) ); // add the other types
+        typemap_.insert( std::pair<std::string,int>( "BODE_WRONG", wdm_bode_wrong ) ); // add the other types
+        
+        type_   = cf.GetValueSafe<std::string>("cosmology","WDMtftype","BODE");
+        
+        //type_ = std::string( toupper( type_.c_str() ) );
+        
+        if( typemap_.find( type_ ) == typemap_.end() )
+            throw std::runtime_error("unknown transfer function fit for WDM");
+        
+        m_WDMalpha = 1.0;
+        
+        switch( typemap_[type_] )
+        {
+            //... parameterisation from Bode et al. (2001), ApJ, 556, 93
+            case wdm_bode:
+                    wdmnu_  = cf.GetValueSafe<double>("cosmology","WDMnu",1.0);
+                    wdmgx_  = cf.GetValueSafe<double>("cosmology","WDMg_x",1.5);
+                    m_WDMalpha = 0.05 * pow( omegam_/0.4,0.15)
+                        *pow(H0_*0.01/0.65,1.3)*pow(wdmm_,-1.15)
+                        *pow(1.5/wdmgx_,0.29);
+                
+                break;
+            
+            //... parameterisation from Viel et al. (2005), Phys Rev D, 71
+            case wdm_viel:
+                    wdmnu_  = cf.GetValueSafe<double>("cosmology","WDMnu",1.12);
+                    m_WDMalpha = 0.049 * pow( omegam_/0.25,0.11)
+                        *pow(H0_*0.01/0.7,1.22)*pow(wdmm_,-1.11);
+                break;
+            
+            
+            //.... below is for historical reasons due to the buggy parameterisation
+            //.... in early versions of MUSIC, but apart from H instead of h, Bode et al.
+            case wdm_bode_wrong:
+                    wdmnu_  = cf.GetValueSafe<double>("cosmology","WDMnu",1.0);
+                    wdmgx_  = cf.GetValueSafe<double>("cosmology","WDMg_x",1.5);
+                    m_WDMalpha = 0.05 * pow( omegam_/0.4,0.15)
+                        *pow(H0_/0.65,1.3)*pow(wdmm_,-1.15)
+                        *pow(1.5/wdmgx_,0.29);
+                break;
+                
+            default:
+                    wdmnu_  = cf.GetValueSafe<double>("cosmology","WDMnu",1.0);
+                    wdmgx_  = cf.GetValueSafe<double>("cosmology","WDMg_x",1.5);
+                    m_WDMalpha = 0.05 * pow( omegam_/0.4,0.15)
+                        *pow(H0_*0.01/0.65,1.3)*pow(wdmm_,-1.15)
+                        *pow(1.5/wdmgx_,0.29);
+                break;
+        }
+        std::cerr << "WDM alpha = " << m_WDMalpha << std::endl;
+	}
+	
+	inline double compute( double k, tf_type type )
+	{
+		return etf_.at_k( k )*pow(1.0+pow(m_WDMalpha*k,2.0*wdmnu_),-5.0/wdmnu_);
+	}
+  
+    inline double get_kmin( void ){
+      return 1e-4;
+    }
+    
+    inline double get_kmax( void ){
+      return 1.e4;
+    }
+	
+};
+
+// CDM Bino type WIMP small-scale damped spectrum from Green, Hofmann & Schwarz (2004)
+class transfer_eisenstein_cdmbino_plugin : public TransferFunction_plugin
+{
+protected:
+	real_t m_h0;
+	double omegam_, H0_, omegab_, mcdm_, Tkd_, kfs_, kd_;
+    eisenstein_transfer etf_;
+    
+public:
+	transfer_eisenstein_cdmbino_plugin( ConfigFile &cf )
+	: TransferFunction_plugin(cf)
+	{
+        double Tcmb = pcf_->GetValueSafe("cosmology","Tcmb",2.726);
+        
+		omegam_ = cf.GetValue<double>("cosmology","Omega_m");
+		omegab_ = cf.GetValue<double>("cosmology","Omega_b");
+		H0_     = cf.GetValue<double>("cosmology","H0");
+        m_h0    = H0_ / 100.0;
+
+        etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
+
+        mcdm_ = cf.GetValueSafe<double>("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV
+        Tkd_    = cf.GetValueSafe<double>("cosmology","CDM_Tkd", 33.0); // temperature at which CDM particle kinetically decouples (in MeV)
+        
+        kfs_    = 1.7e6 / m_h0 * sqrt( mcdm_ / 100. * Tkd_ / 30. ) / (1.0 + log( Tkd_ / 30. ) / 19.2 );
+        kd_     = 3.8e7 / m_h0 * sqrt( mcdm_ / 100. * Tkd_ / 30. );
+        
+        //LOGINFO(" bino CDM: k_fs = %g, k_d = %g", kfs_, kd_ );
+	}
+	
+	inline double compute( double k, tf_type type )
+	{
+        double kkfs = k/kfs_;
+        double kkfs2 = kkfs*kkfs;
+        double kkd2 = (k/kd_)*(k/kd_);
+        
+        // in principle the Green et al. (2004) works only up to k/k_fs < 1
+        // the fit crosses zero at (k/k_fs)**2 = 3/2, we just zero it there...
+        if( kkfs2 < 1.5 )
+            return etf_.at_k( k ) * (1.0-2.0/3.0*kkfs2) * exp( -kkfs2 - kkd2 );
+        else
+            return 0.0;
+	}
+    
+    inline double get_kmin( void ){
+        return 1e-4;
+    }
+    
+    inline double get_kmax( void ){
+        return 1.e8;
+    }
+	
+};
+
+
+
+namespace{
+	TransferFunction_plugin_creator_concrete< transfer_eisenstein_plugin > creator("eisenstein");
+	TransferFunction_plugin_creator_concrete< transfer_eisenstein_wdm_plugin > creator2("eisenstein_wdm");
+    TransferFunction_plugin_creator_concrete< transfer_eisenstein_cdmbino_plugin > creator3("eisenstein_cdmbino");
+}
+
diff --git a/src/random_plugin.cc b/src/random_plugin.cc
new file mode 100644
index 0000000..77f351d
--- /dev/null
+++ b/src/random_plugin.cc
@@ -0,0 +1,46 @@
+#include <general.hh>
+#include <random_plugin.hh>
+
+std::map<std::string, RNG_plugin_creator *> &
+get_RNG_plugin_map()
+{
+    static std::map<std::string, RNG_plugin_creator *> RNG_plugin_map;
+    return RNG_plugin_map;
+}
+
+void print_RNG_plugins()
+{
+    std::map<std::string, RNG_plugin_creator *> &m = get_RNG_plugin_map();
+    std::map<std::string, RNG_plugin_creator *>::iterator it;
+    it = m.begin();
+    csoca::ilog << "- Available random number generator plug-ins:" << std::endl;
+    while (it != m.end())
+    {
+        if ((*it).second){
+            csoca::ilog.Print("\t\'%s\'\n", (*it).first.c_str());
+        }
+        ++it;
+    }
+}
+
+RNG_plugin *select_RNG_plugin(ConfigFile &cf)
+{
+    std::string rngname = cf.GetValueSafe<std::string>("random", "generator", "MUSIC");
+
+    RNG_plugin_creator *the_RNG_plugin_creator = get_RNG_plugin_map()[rngname];
+
+    if (!the_RNG_plugin_creator)
+    {
+        csoca::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str());
+        print_RNG_plugins();
+        throw std::runtime_error("Unknown random number generator plug-in");
+    }
+    else
+    {
+        csoca::ilog.Print("Selecting random number generator plug-in  : %s", rngname.c_str());
+    }
+
+    RNG_plugin *the_RNG_plugin = the_RNG_plugin_creator->Create(cf);
+
+    return the_RNG_plugin;
+}
\ No newline at end of file
diff --git a/src/transfer_function_plugin.cc b/src/transfer_function_plugin.cc
new file mode 100644
index 0000000..30ecbba
--- /dev/null
+++ b/src/transfer_function_plugin.cc
@@ -0,0 +1,45 @@
+
+#include <transfer_function_plugin.hh>
+
+std::map<std::string, TransferFunction_plugin_creator *> &
+get_TransferFunction_plugin_map()
+{
+    static std::map<std::string, TransferFunction_plugin_creator *> TransferFunction_plugin_map;
+    return TransferFunction_plugin_map;
+}
+
+void print_TransferFunction_plugins()
+{
+    std::map<std::string, TransferFunction_plugin_creator *> &m = get_TransferFunction_plugin_map();
+    std::map<std::string, TransferFunction_plugin_creator *>::iterator it;
+    it = m.begin();
+    csoca::ilog << "- Available transfer function plug-ins:" << std::endl;
+    while (it != m.end())
+    {
+        if ((*it).second)
+            csoca::ilog << "\t\'" << (*it).first << "\'" << std::endl;
+        ++it;
+    }
+}
+
+TransferFunction_plugin *select_TransferFunction_plugin(ConfigFile &cf)
+{
+    std::string tfname = cf.GetValue<std::string>("cosmology", "transfer");
+
+    TransferFunction_plugin_creator *the_TransferFunction_plugin_creator = get_TransferFunction_plugin_map()[tfname];
+
+    if (!the_TransferFunction_plugin_creator)
+    {
+        csoca::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl;
+        print_TransferFunction_plugins();
+        throw std::runtime_error("Unknown transfer function plug-in");
+    }
+    else
+    {
+        csoca::ilog << "Selecting transfer function plug-in \'" << tfname << "\'..." << std::endl;
+    }
+
+    TransferFunction_plugin *the_TransferFunction_plugin = the_TransferFunction_plugin_creator->create(cf);
+
+    return the_TransferFunction_plugin;
+}