From 01b22b76a32aecdcabf43c3536573e7c87a2de78 Mon Sep 17 00:00:00 2001
From: Michael Michaux <mmichaux@laplace.oca.eu>
Date: Thu, 24 Oct 2019 14:44:06 +0200
Subject: [PATCH 001/130] Added theoretical convergence test.

---
 include/testing.hh  |   2 +
 src/ic_generator.cc |   2 +-
 src/testing.cc      | 104 ++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 99 insertions(+), 9 deletions(-)

diff --git a/include/testing.hh b/include/testing.hh
index 53bc571..2395db3 100644
--- a/include/testing.hh
+++ b/include/testing.hh
@@ -4,6 +4,7 @@
 #include <general.hh>
 #include <config_file.hh>
 #include <grid_fft.hh>
+#include <cosmology_calculator.hh>
 
 namespace testing{
     void output_potentials_and_densities( 
@@ -27,6 +28,7 @@ namespace testing{
 
     void output_convergence(
         ConfigFile &the_config,
+        CosmologyCalculator* the_cosmo_calc,
         std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
         Grid_FFT<real_t> &phi,
         Grid_FFT<real_t> &phi2,
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index a915db9..bc2a956 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -335,7 +335,7 @@ int Run( ConfigFile& the_config )
             } else if(testing == "velocity_displacement_symmetries") {
                 testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
             } else if(testing == "convergence") {
-                testing::output_convergence(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
+                testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
             } else {
                 csoca::flog << "unknown test '" << testing << "'" << std::endl;
                 std::abort();
diff --git a/src/testing.cc b/src/testing.cc
index bfd088d..533855a 100644
--- a/src/testing.cc
+++ b/src/testing.cc
@@ -242,6 +242,7 @@ void output_velocity_displacement_symmetries(
 
 void output_convergence(
     ConfigFile &the_config,
+    CosmologyCalculator* the_cosmo_calc,
     std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
     Grid_FFT<real_t> &phi,
     Grid_FFT<real_t> &phi2,
@@ -249,7 +250,6 @@ void output_convergence(
     Grid_FFT<real_t> &phi3b,
     std::array<Grid_FFT<real_t> *, 3> &A3)
 {
-
     // scale all potentials to remove dplus0
     phi /= dplus;
     phi2 /= dplus * dplus;
@@ -259,6 +259,90 @@ void output_convergence(
     (*A3[1]) /= dplus * dplus * dplus;
     (*A3[2]) /= dplus * dplus * dplus;
 
+    ////////////////////// theoretical convergence radius //////////////////////
+
+    // compute phi_code
+    Grid_FFT<real_t> phi_code({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    phi_code.FourierTransformForward(false);
+    #pragma omp parallel for collapse(3)
+    for (std::size_t i = 0; i < phi_code.size(0); ++i) {
+        for (std::size_t j = 0; j < phi_code.size(1); ++j) {
+            for (std::size_t k = 0; k < phi_code.size(2); ++k) {
+                std::size_t idx = phi_code.get_idx(i, j, k);
+                phi_code.kelem(idx) = -phi.kelem(idx);
+            }
+        }
+    }
+
+    // initialize norm to 0
+    Grid_FFT<real_t> nabla_vini_norm({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    #pragma omp parallel for collapse(3)
+    for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
+        for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
+            for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
+                std::size_t idx = nabla_vini_norm.get_idx(i, j, k);
+                nabla_vini_norm.relem(idx) = 0.0;
+            }
+        }
+    }
+
+    Grid_FFT<real_t> nabla_vini_mn({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    for(std::size_t m = 0; m < 3; m++) {
+        for(std::size_t n = m; n < 3; n++) {
+            nabla_vini_mn.FourierTransformForward(false);
+            #pragma omp parallel for collapse(3)
+            for (std::size_t i = 0; i < phi_code.size(0); ++i) {
+                for (std::size_t j = 0; j < phi_code.size(1); ++j) {
+                    for (std::size_t k = 0; k < phi_code.size(2); ++k) {
+                        std::size_t idx = phi_code.get_idx(i, j, k);
+                        auto kk = phi_code.get_k<real_t>(i, j, k);
+                        nabla_vini_mn.kelem(idx) = phi_code.kelem(idx) * (kk[m] * kk[n]);
+                    }
+                }
+            }
+            nabla_vini_mn.FourierTransformBackward();
+            nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->CalcGrowthFactor(1.0));
+            // sum of squares
+            #pragma omp parallel for collapse(3)
+            for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
+                for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
+                    for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
+                        std::size_t idx = nabla_vini_norm.get_idx(i, j, k);
+                        if(m != n) {
+                            nabla_vini_norm.relem(idx) += (2.0 * nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx));
+                        } else {
+                            nabla_vini_norm.relem(idx) += (nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx));
+                        }
+                    }
+                }
+            }
+        }
+    }
+    // square root
+    #pragma omp parallel for collapse(3)
+    for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
+        for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
+            for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
+                std::size_t idx = nabla_vini_norm.get_idx(i, j, k);
+                nabla_vini_norm.relem(idx) = std::sqrt(nabla_vini_norm.relem(idx));
+            }
+        }
+    }
+
+    // get t_eds
+    Grid_FFT<real_t> t_eds({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    #pragma omp parallel for collapse(3)
+    for (std::size_t i = 0; i < t_eds.size(0); ++i) {
+        for (std::size_t j = 0; j < t_eds.size(1); ++j) {
+            for (std::size_t k = 0; k < t_eds.size(2); ++k) {
+                std::size_t idx = t_eds.get_idx(i, j, k);
+                t_eds.relem(idx) = 0.0204 / nabla_vini_norm.relem(idx);
+            }
+        }
+    }
+
+    ////////////////////////// 3lpt convergence test ///////////////////////////
+
     // initialize grids to 0
     Grid_FFT<real_t> psi_1({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> psi_2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
@@ -351,13 +435,17 @@ void output_convergence(
         }
     }
 
-    // write results
-    unlink("convergence_test.hdf5");
-    inv_convergence_radius.Write_to_HDF5("convergence_test.hdf5", "inv_convergence_radius");
-    psi_1.Write_to_HDF5("convergence_test.hdf5", "psi_1_norm");
-    psi_2.Write_to_HDF5("convergence_test.hdf5", "psi_2_norm");
-    psi_3.Write_to_HDF5("convergence_test.hdf5", "psi_3_norm");
-
+    ////////////////////////////// write results ///////////////////////////////
+    std::string convergence_test_filename("convergence_test.hdf5");
+    unlink(convergence_test_filename.c_str());
+#if defined(USE_MPI)
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
+    t_eds.Write_to_HDF5(convergence_test_filename, "t_eds");
+    inv_convergence_radius.Write_to_HDF5(convergence_test_filename, "inv_convergence_radius");
+    // psi_1.Write_to_HDF5(convergence_test_filename, "psi_1_norm");
+    // psi_2.Write_to_HDF5(convergence_test_filename, "psi_2_norm");
+    // psi_3.Write_to_HDF5(convergence_test_filename, "psi_3_norm");
 }
 
 } // namespace testing

From 4de579ca7831f44897ba54406f97391cb45da2c0 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 24 Oct 2019 17:12:43 +0200
Subject: [PATCH 002/130] revert back to C++14 by replacing inline lambdas with
 template funcs

---
 CMakeLists.txt       |  2 +-
 include/operators.hh | 27 ++++++++++++++++++++++-----
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 875fc91..fcc57e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -84,7 +84,7 @@ file( GLOB PLUGINS
 add_executable(${PRGNAME} ${SOURCES} ${PLUGINS})
 target_setup_class(${PRGNAME})
 
-set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 17)
+set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14)
 
 # mpi flags
 if(MPI_CXX_FOUND)
diff --git a/include/operators.hh b/include/operators.hh
index cc0ed67..c78526e 100644
--- a/include/operators.hh
+++ b/include/operators.hh
@@ -1,9 +1,26 @@
 #pragma once
 
 namespace op{
-inline auto assign_to = [](auto &g){return [&](auto i, auto v){ g[i] = v; };};
-inline auto add_to = [](auto &g){return [&](auto i, auto v){ g[i] += v; };};
-inline auto add_twice_to = [](auto &g){return [&](auto i, auto v){ g[i] += 2*v; };};
-inline auto subtract_from = [](auto &g){return [&](auto i, auto v){ g[i] -= v; };};
-inline auto subtract_twice_from = [](auto &g){return [&](auto i, auto v){ g[i] -= 2*v; };};
+
+template< typename grid>
+inline auto assign_to( grid& g ){return [&](auto i, auto v){ g[i] = v; };}
+
+template< typename grid>
+inline auto add_to( grid& g ){return [&](auto i, auto v){ g[i] += v; };}
+
+template< typename grid>
+inline auto add_twice_to( grid& g ){return [&](auto i, auto v){ g[i] += 2*v; };}
+
+template< typename grid>
+inline auto subtract_from( grid& g ){return [&](auto i, auto v){ g[i] -= v; };}
+
+template< typename grid>
+inline auto subtract_twice_from( grid& g ){return [&](auto i, auto v){ g[i] -= 2*v; };}
+
+// above template functions can be written as C++17 inline lambdas... but we're using C++14...
+// inline auto assign_to = [](auto &g){return [&](auto i, auto v){ g[i] = v; };};
+// inline auto add_to = [](auto &g){return [&](auto i, auto v){ g[i] += v; };};
+// inline auto add_twice_to = [](auto &g){return [&](auto i, auto v){ g[i] += 2*v; };};
+// inline auto subtract_from = [](auto &g){return [&](auto i, auto v){ g[i] -= v; };};
+// inline auto subtract_twice_from = [](auto &g){return [&](auto i, auto v){ g[i] -= 2*v; };};
 }

From 80da0a4ff2558644b3b83971bce4fce55268de68 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 24 Oct 2019 17:26:44 +0200
Subject: [PATCH 003/130] removed openmp loop collapse, problems with intel c++

---
 src/testing.cc | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/testing.cc b/src/testing.cc
index 533855a..e99fbb4 100644
--- a/src/testing.cc
+++ b/src/testing.cc
@@ -264,7 +264,7 @@ void output_convergence(
     // compute phi_code
     Grid_FFT<real_t> phi_code({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     phi_code.FourierTransformForward(false);
-    #pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < phi_code.size(0); ++i) {
         for (std::size_t j = 0; j < phi_code.size(1); ++j) {
             for (std::size_t k = 0; k < phi_code.size(2); ++k) {
@@ -276,7 +276,7 @@ void output_convergence(
 
     // initialize norm to 0
     Grid_FFT<real_t> nabla_vini_norm({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    #pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
         for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
             for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
@@ -290,7 +290,7 @@ void output_convergence(
     for(std::size_t m = 0; m < 3; m++) {
         for(std::size_t n = m; n < 3; n++) {
             nabla_vini_mn.FourierTransformForward(false);
-            #pragma omp parallel for collapse(3)
+            #pragma omp parallel for //collapse(3)
             for (std::size_t i = 0; i < phi_code.size(0); ++i) {
                 for (std::size_t j = 0; j < phi_code.size(1); ++j) {
                     for (std::size_t k = 0; k < phi_code.size(2); ++k) {
@@ -303,7 +303,7 @@ void output_convergence(
             nabla_vini_mn.FourierTransformBackward();
             nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->CalcGrowthFactor(1.0));
             // sum of squares
-            #pragma omp parallel for collapse(3)
+            #pragma omp parallel for //collapse(3)
             for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
                 for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
                     for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
@@ -319,7 +319,7 @@ void output_convergence(
         }
     }
     // square root
-    #pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {
         for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) {
             for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) {
@@ -331,7 +331,7 @@ void output_convergence(
 
     // get t_eds
     Grid_FFT<real_t> t_eds({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    #pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < t_eds.size(0); ++i) {
         for (std::size_t j = 0; j < t_eds.size(1); ++j) {
             for (std::size_t k = 0; k < t_eds.size(2); ++k) {
@@ -347,7 +347,7 @@ void output_convergence(
     Grid_FFT<real_t> psi_1({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> psi_2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> psi_3({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-#pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < psi_1.size(0); ++i) {
         for (std::size_t j = 0; j < psi_1.size(1); ++j) {
             for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@@ -374,7 +374,7 @@ void output_convergence(
         psi_2_tmp.FourierTransformForward(false);
         psi_3_tmp.FourierTransformForward(false);
 
-#pragma omp parallel for collapse(3)
+        #pragma omp parallel for //collapse(3)
         for (std::size_t i = 0; i < phi.size(0); ++i) {
             for (std::size_t j = 0; j < phi.size(1); ++j) {
                 for (std::size_t k = 0; k < phi.size(2); ++k) {
@@ -395,7 +395,7 @@ void output_convergence(
         psi_3_tmp.FourierTransformBackward();
 
         // sum of squares
-#pragma omp parallel for collapse(3)
+        #pragma omp parallel for //collapse(3)
         for (std::size_t i = 0; i < psi_1.size(0); ++i) {
             for (std::size_t j = 0; j < psi_1.size(1); ++j) {
                 for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@@ -409,7 +409,7 @@ void output_convergence(
     } // loop on dimensions
 
     // apply square root for the L2 norm
-#pragma omp parallel for collapse(3)
+#pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < psi_1.size(0); ++i) {
         for (std::size_t j = 0; j < psi_1.size(1); ++j) {
             for (std::size_t k = 0; k < psi_1.size(2); ++k) {
@@ -423,7 +423,7 @@ void output_convergence(
 
     // convergence radius
     Grid_FFT<real_t> inv_convergence_radius({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-#pragma omp parallel for collapse(3)
+    #pragma omp parallel for //collapse(3)
     for (std::size_t i = 0; i < psi_1.size(0); ++i) {
         for (std::size_t j = 0; j < psi_1.size(1); ++j) {
             for (std::size_t k = 0; k < psi_1.size(2); ++k) {

From fa1abffd0d352c68f978799998646ba5d7f2e995 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 27 Oct 2019 19:49:54 +0100
Subject: [PATCH 004/130] minor cleanup

---
 include/operators.hh | 29 +++++++++++++----------------
 src/ic_generator.cc  |  2 +-
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/include/operators.hh b/include/operators.hh
index c78526e..63d94f4 100644
--- a/include/operators.hh
+++ b/include/operators.hh
@@ -2,25 +2,22 @@
 
 namespace op{
 
-template< typename grid>
-inline auto assign_to( grid& g ){return [&](auto i, auto v){ g[i] = v; };}
+template< typename field>
+inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };}
 
-template< typename grid>
-inline auto add_to( grid& g ){return [&](auto i, auto v){ g[i] += v; };}
+template< typename field, typename val >
+inline auto multiply_add_to( field& g, val x ){return [&g,x](auto i, auto v){ g[i] += v*x; };}
 
-template< typename grid>
-inline auto add_twice_to( grid& g ){return [&](auto i, auto v){ g[i] += 2*v; };}
+template< typename field>
+inline auto add_to( field& g ){return [&g](auto i, auto v){ g[i] += v; };}
 
-template< typename grid>
-inline auto subtract_from( grid& g ){return [&](auto i, auto v){ g[i] -= v; };}
+template< typename field>
+inline auto add_twice_to( field& g ){return [&g](auto i, auto v){ g[i] += 2*v; };}
 
-template< typename grid>
-inline auto subtract_twice_from( grid& g ){return [&](auto i, auto v){ g[i] -= 2*v; };}
+template< typename field>
+inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };}
+
+template< typename field>
+inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -= 2*v; };}
 
-// above template functions can be written as C++17 inline lambdas... but we're using C++14...
-// inline auto assign_to = [](auto &g){return [&](auto i, auto v){ g[i] = v; };};
-// inline auto add_to = [](auto &g){return [&](auto i, auto v){ g[i] += v; };};
-// inline auto add_twice_to = [](auto &g){return [&](auto i, auto v){ g[i] += 2*v; };};
-// inline auto subtract_from = [](auto &g){return [&](auto i, auto v){ g[i] -= v; };};
-// inline auto subtract_twice_from = [](auto &g){return [&](auto i, auto v){ g[i] -= 2*v; };};
 }
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 2446b4f..d4d160c 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -340,7 +340,7 @@ int Run( ConfigFile& the_config )
             } else {
                 csoca::flog << "unknown test '" << testing << "'" << std::endl;
                 std::abort();
-        }
+            }
         } else {
             // temporary storage of data
             Grid_FFT<real_t> tmp({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});

From 9a3ebc2bec85be141cfefb26ceaa0223afc2b5b5 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 04:45:34 +0100
Subject: [PATCH 005/130] updates to vec3 class, mostly arithmetics

---
 include/vec3.hh | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/include/vec3.hh b/include/vec3.hh
index 9295722..8efb1b2 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -3,24 +3,45 @@
 template< typename T >
 class vec3{
 private:
+    //! holds the data
     std::array<T,3> data_;
+    
+public: 
+    //! expose access to elements via references
     T &x,&y,&z;
-public:    
+
+    //! empty constructor
     vec3()
     : x(data_[0]),y(data_[1]),z(data_[2]){}
 
+    //! copy constructor
     vec3( const vec3<T> &v)
     : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
-
-    vec3( std::array<T,3>&& d )
-    : data_(std::move(d)), x(data_[0]),y(data_[1]),z(data_[2]){}
     
+    //! move constructor
     vec3( vec3<T> &&v)
-    : data_(std::move(v.data_)), x(data_[0]),y(data_[1]),z(data_[2]){}
+    : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){}
 
+    //! construct from initialiser list
+    template<typename ...E>
+    vec3(E&&...e) 
+    : data_{{std::forward<E>(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){}
+    
     T &operator[](size_t i){ return data_[i];}
     
     const T &operator[](size_t i) const { return data_[i]; }
+
+    vec3<T> operator+( const vec3<T>& v ) const{ return vec3({x+v.x,y+v.y,z+v.z}); }
+
+    vec3<T> operator-( const vec3<T>& v ) const{ return vec3({x-v.x,y-v.y,z-v.z}); }
+
+    vec3<T> operator*( T s ) const{ return vec3({x*s,y*s,z*s}); }
+
+    vec3<T>& operator+=( const vec3<T>& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; }
+
+    vec3<T>& operator-=( const vec3<T>& v ) const{ x-=v.x; y-=v.y; z-=v.z; return *this; }
+
+    vec3<T>& operator*=( T s ) const{ x*=s; y*=s; z*=s; return *this; }
     
     T dot(const vec3<T> &a) const 
     {

From 248c460b716bb204edf7ca2a4a72e2e758fd1f52 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 04:47:02 +0100
Subject: [PATCH 006/130] rewrote particle load generator, added new load
 (refined sc lattice)

---
 include/grid_fft.hh           | 27 ++------------
 include/particle_generator.hh | 69 +++++++++++++++++++++++++++++++++--
 src/grid_fft.cc               |  1 -
 src/ic_generator.cc           |  7 +++-
 4 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index dcb3cb4..b66be1b 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -177,36 +177,17 @@ public:
     }
 
     template <typename ft>
-    vec3<ft> get_unit_r_staggered(const size_t i, const size_t j, const size_t k) const
+    vec3<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3<real_t> s) const
     {
         vec3<ft> rr;
 
-        rr[0] = (real_t(i + local_0_start_) + 0.5) / real_t(n_[0]);
-        rr[1] = (real_t(j) + 0.5) / real_t(n_[1]);
-        rr[2] = (real_t(k) + 0.5) / real_t(n_[2]);
+        rr[0] = (real_t(i + local_0_start_) + s.x) / real_t(n_[0]);
+        rr[1] = (real_t(j) + s.y) / real_t(n_[1]);
+        rr[2] = (real_t(k) + s.z) / real_t(n_[2]);
 
         return rr;
     }
 
-    template <typename ft>
-    vec3<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, double sx, double sy, double sz) const
-    {
-        vec3<ft> rr;
-
-        rr[0] = (real_t(i + local_0_start_) + sx) / real_t(n_[0]);
-        rr[1] = (real_t(j) + sy) / real_t(n_[1]);
-        rr[2] = (real_t(k) + sz) / real_t(n_[2]);
-
-        return rr;
-    }
-
-    void cell_pos(int ilevel, size_t i, size_t j, size_t k, double *x) const
-    {
-        x[0] = double(i + local_0_start_) / size(0);
-        x[1] = double(j) / size(1);
-        x[2] = double(k) / size(2);
-    }
-
     vec3<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const
     {
         return vec3<size_t>({i + local_0_start_, j, k});
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index da88813..1aa96b8 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -1,15 +1,29 @@
 #pragma once
 
+#include <vec3.hh>
+
 namespace particle {
 
 enum lattice{
-    lattice_sc=0, lattice_bcc=1, lattice_fcc=2
+    lattice_sc  = 0, // SC : simple cubic
+    lattice_bcc = 1, // BCC: body-centered cubic
+    lattice_fcc = 2, // FCC: face-centered cubic
+    lattice_rsc = 3, // RSC: refined simple cubic
+};
+
+const std::vector< std::vector<vec3<real_t>> > lattice_shifts = 
+{   
+    // first shift must always be zero! (otherwise set_positions and set_velocities break)
+    /* SC : */ {{0.0,0.0,0.0}},
+    /* BCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.5}},
+    /* FCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.0},{0.5,0.0,0.5},{0.0,0.5,0.5}},
+    /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}},
 };
 
 template<typename field_t>
 void initialize_lattice( container& particles, lattice lattice_type, const field_t& field ){
     const size_t num_p_in_load = field.local_size();
-    const size_t overload = 1<<lattice_type; // 1 for sc, 2 for bcc, 4 for fcc
+    const size_t overload = 1ull<<lattice_type; // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
 
     particles.allocate( overload * num_p_in_load );
 
@@ -26,7 +40,54 @@ void initialize_lattice( container& particles, lattice lattice_type, const field
 
 // invalidates field, phase shifted to unspecified position after return
 template<typename field_t>
-void set_positions( container& particles, lattice lattice_type, int idim, real_t lunit, field_t& field )
+void set_positions( container& particles, const lattice lattice_type, int idim, real_t lunit, field_t& field )
+{
+    const size_t num_p_in_load = field.local_size();
+    for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
+        // can omit first shift since zero by convention, otherwise apply phase shift
+        if( ishift>0 ){
+            vec3<real_t> shift = lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1];
+            field.shift_field( shift.x, shift.y, shift.z );
+        }
+        auto ipcount0 = ishift * num_p_in_load;
+        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
+            for( size_t j=0; j<field.size(1); ++j){
+                for( size_t k=0; k<field.size(2); ++k){
+                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,lattice_shifts[lattice_type][ishift]);
+                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
+                }
+            }
+        }
+    }
+}
+
+template<typename field_t>
+void set_velocities( container& particles, lattice lattice_type, int idim, field_t& field )
+{
+    const size_t num_p_in_load = field.local_size();
+    for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
+        // can omit first shift since zero by convention, otherwise apply phase shift
+        if( ishift>0 ){
+            vec3<real_t> shift = lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1];
+            field.shift_field( shift.x, shift.y, shift.z );
+        }
+        auto ipcount0 = ishift * num_p_in_load;
+        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
+            for( size_t j=0; j<field.size(1); ++j){
+                for( size_t k=0; k<field.size(2); ++k){
+                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
+                }
+            }
+        }
+    }
+}
+
+
+///// deprecated code below ////////////////////////////////////////////////////
+
+// invalidates field, phase shifted to unspecified position after return
+template<typename field_t>
+void set_positions_old( container& particles, lattice lattice_type, int idim, real_t lunit, field_t& field )
 {
     const size_t num_p_in_load = field.local_size();
 
@@ -89,7 +150,7 @@ void set_positions( container& particles, lattice lattice_type, int idim, real_t
 }
 
 template<typename field_t>
-void set_velocities( container& particles, lattice lattice_type, int idim, field_t& field )
+void set_velocities_old( container& particles, lattice lattice_type, int idim, field_t& field )
 {
     const size_t num_p_in_load = field.local_size();
 
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index d5f103a..2881010 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -550,7 +550,6 @@ void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
 {
     std::vector<double> bin_k, bin_P, bin_eP;
     std::vector<size_t> bin_count;
-    int nbins = 4 * std::max(nhalf_[0], std::max(nhalf_[1], nhalf_[2]));
     this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count );
 #if defined(USE_MPI)
     if (CONFIG::MPI_task_rank == 0)
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index d4d160c..d3a7ece 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -57,8 +57,11 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------------------------------------------
     //! initialice particles on a bcc or fcc lattice instead of a standard sc lattice (doubles and quadruples the number of particles) 
     std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
-    const particle::lattice lattice_type = (lattice_str=="bcc")? particle::lattice_bcc 
-        : ((lattice_str=="fcc")? particle::lattice_fcc : particle::lattice_sc);
+    const particle::lattice lattice_type = 
+          ((lattice_str=="bcc")? particle::lattice_bcc 
+        : ((lattice_str=="fcc")? particle::lattice_fcc 
+        : ((lattice_str=="rsc")? particle::lattice_rsc 
+        : particle::lattice_sc)));
 
     //--------------------------------------------------------------------------------------------------------
     //! apply fixing of the complex mode amplitude following Angulo & Pontzen (2016) [https://arxiv.org/abs/1603.05253]

From c593a7067d72d5b31d7b020c8d465644badeb415 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 04:49:40 +0100
Subject: [PATCH 007/130] cleanup particle generator

---
 include/particle_generator.hh | 127 +---------------------------------
 1 file changed, 1 insertion(+), 126 deletions(-)

diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 1aa96b8..7c69e7d 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -16,7 +16,7 @@ const std::vector< std::vector<vec3<real_t>> > lattice_shifts =
     // first shift must always be zero! (otherwise set_positions and set_velocities break)
     /* SC : */ {{0.0,0.0,0.0}},
     /* BCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.5}},
-    /* FCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.0},{0.5,0.0,0.5},{0.0,0.5,0.5}},
+    /* FCC: */ {{0.0,0.0,0.0},{0.0,0.5,0.5},{0.5,0.0,0.5},{0.5,0.5,0.0}},
     /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}},
 };
 
@@ -83,129 +83,4 @@ void set_velocities( container& particles, lattice lattice_type, int idim, field
 }
 
 
-///// deprecated code below ////////////////////////////////////////////////////
-
-// invalidates field, phase shifted to unspecified position after return
-template<typename field_t>
-void set_positions_old( container& particles, lattice lattice_type, int idim, real_t lunit, field_t& field )
-{
-    const size_t num_p_in_load = field.local_size();
-
-    for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
-        for( size_t j=0; j<field.size(1); ++j){
-            for( size_t k=0; k<field.size(2); ++k){
-                auto pos = field.template get_unit_r<real_t>(i,j,k);
-                particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-            }
-        }
-    }
-
-    if( lattice_type == particle::lattice_bcc ){
-        field.shift_field( 0.5, 0.5, 0.5 );
-        auto ipcount0 = num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.5,0.5,0.5);
-                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                }
-            }
-        }
-    }
-    else if( lattice_type == particle::lattice_fcc ){ 
-        // 0.5 0.5 0.0
-        field.shift_field( 0.5, 0.5, 0.0 );
-        auto ipcount0 = num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.5,0.5,0.0);
-                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                }
-            }
-        }
-        // 0.0 0.5 0.5
-        field.shift_field( -0.5, 0.0, 0.5 );
-        ipcount0 = 2*num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.0,0.5,0.5);
-                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                }
-            }
-        }
-        // 0.5 0.0 0.5
-        field.shift_field( 0.5, -0.5, 0.0 );
-        ipcount0 = 3*num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,0.5,0.0,0.5);
-                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                }
-            }
-        }
-    }
-}
-
-template<typename field_t>
-void set_velocities_old( container& particles, lattice lattice_type, int idim, field_t& field )
-{
-    const size_t num_p_in_load = field.local_size();
-
-    for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
-        for( size_t j=0; j<field.size(1); ++j){
-            for( size_t k=0; k<field.size(2); ++k){
-                particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
-            }
-        }
-    }
-
-    if( lattice_type == particle::lattice_bcc ){
-        field.shift_field( 0.5, 0.5, 0.5 );
-        auto ipcount0 = num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
-                }
-            }
-        }
-    }
-    else if( lattice_type == particle::lattice_fcc ){ 
-        // 0.5 0.5 0.0
-        field.shift_field( 0.5, 0.5, 0.0 );
-        auto ipcount0 = num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
-                }
-            }
-        }
-        // 0.0 0.5 0.5
-        field.shift_field( -0.5, 0.0, 0.5 );
-        ipcount0 = 2*num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
-                }
-            }
-        }
-        // 0.5 0.0 0.5
-        field.shift_field( 0.5, -0.5, 0.0 );
-        ipcount0 = 3*num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
-                }
-            }
-        }
-    }
-}
-
-
 } // end namespace particles

From 57b570824847813eabd7e4b6d6df345ce9a20a79 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 04:58:08 +0100
Subject: [PATCH 008/130] removed saved findfftw3 cmake

---
 new/FindFFTW3.cmake | 232 --------------------------------------------
 1 file changed, 232 deletions(-)
 delete mode 100644 new/FindFFTW3.cmake

diff --git a/new/FindFFTW3.cmake b/new/FindFFTW3.cmake
deleted file mode 100644
index 80aa67b..0000000
--- a/new/FindFFTW3.cmake
+++ /dev/null
@@ -1,232 +0,0 @@
-# - Try to find FFTW
-#
-# By default, it will look only for the serial libraries with single, double,
-# and long double precision. Any combination of precision (SINGLE, DOUBLE,
-# LONGDOUBLE) and library type (SERIAL, [THREADS|OPENMP], MPI) is possible by
-# using the COMPONENTS keyword. For example,
-#
-# find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP MPI)
-#
-# Once done this will define
-#  FFTW3_FOUND - System has FFTW3
-#  FFTW3_INCLUDE_DIRS - The FFTW3 include directories
-#  FFTW3_LIBRARIES - The libraries needed to use FFTW3
-#  FFTW3_DEFINITIONS - Compiler switches required for using FFTW3
-#  FFTW3_$KIND_$PARALLEL_FOUND- Set if FFTW3 exists in KIND precision format for PARALLEL mode.
-#                             where KIND can be: SINGLE, DOUBLE, LONGDOUBLE
-#                             and PARALLEL: SERIAL, OPENMP, MPI, THREADS.
-#  FFTW3_$KIND_$PARALLEL_LIBRARY - The libraries needed to use.
-#  FFTW3_INCLUDE_DIR_PARALLEL - The FFTW3 include directories for parallels mode.
-
-cmake_policy(SET CMP0054 NEW)
-
-if(FFTW3_FOUND)
-  return()
-endif()
-
-if(FFTW3_INCLUDE_DIR AND FFTW3_LIBRARIES)
-  set(FFTW3_FOUND TRUE)
-  foreach(component ${FFTW3_FIND_COMPONENTS})
-    if("${FFTW3_${component}_LIBRARY}" STREQUAL "")
-        set(FFTW3_${component}_LIBRARY "${FFTW3_LIBRARIES}")
-    endif()
-  endforeach()
-  return()
-endif()
-
-macro(find_specific_libraries KIND PARALLEL)
-  list(APPEND FFTW3_FIND_COMPONENTS ${KIND}_${PARALLEL})
-  if(NOT (${PARALLEL} STREQUAL "SERIAL") AND NOT ${PARALLEL}_FOUND)
-    message(FATAL_ERROR "Please, find ${PARALLEL} libraries before FFTW")
-  endif()
-
-  find_library(FFTW3_${KIND}_${PARALLEL}_LIBRARY NAMES
-    fftw3${SUFFIX_${KIND}}${SUFFIX_${PARALLEL}}${SUFFIX_FINAL} HINTS ${HINT_DIRS})
-  if(FFTW3_${KIND}_${PARALLEL}_LIBRARY MATCHES fftw3)
-    list(APPEND FFTW3_LIBRARIES ${FFTW3_${KIND}_${PARALLEL}_LIBRARY})
-    set(FFTW3_${KIND}_${PARALLEL}_FOUND TRUE)
-
-    STRING(TOLOWER "${KIND}" kind)
-    STRING(TOLOWER "${PARALLEL}" parallel)
-    if(FFTW3_${kind}_${parallel}_LIBRARY MATCHES "\\.a$")
-      add_library(fftw3::${kind}::${parallel} STATIC IMPORTED GLOBAL)
-    else()
-      add_library(fftw3::${kind}::${parallel} SHARED IMPORTED GLOBAL)
-    endif()
-
-    # MPI Has a different included library than the others
-    # FFTW3_INCLUDE_DIR_PARALLEL will change depending of which on is used.
-    set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_INCLUDE_DIR} )
-    if(PARALLEL STREQUAL "MPI")
-      set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_${PARALLEL}_INCLUDE_DIR})
-    endif()
-
-    set_target_properties(fftw3::${kind}::${parallel} PROPERTIES
-      IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
-      INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}")
-
-    # adding target properties to the different cases
-    ##   MPI
-    if(PARALLEL STREQUAL "MPI")
-      if(MPI_C_LIBRARIES)
-        set_target_properties(fftw3::${kind}::mpi PROPERTIES
-          IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
-          INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}"
-          IMPORTED_LINK_INTERFACE_LIBRARIES ${MPI_C_LIBRARIES})
-      endif()
-    endif()
-    ##   OpenMP
-    if(PARALLEL STREQUAL "OPENMP")
-      if(OPENMP_C_FLAGS)
-        set_target_properties(fftw3::${kind}::${parallel} PROPERTIES
-           IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
-           INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}"
-           INTERFACE_COMPILE_OPTIONS "${OPENMP_C_FLAGS}")
-        endif()
-    endif()
-    ##  THREADS
-    if(PARALLEL STREQUAL "THREADS")
-      if(CMAKE_THREAD_LIBS_INIT) # TODO: this is not running
-        set_target_properties(fftw3::${kind}::${parallel} PROPERTIES
-          IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}"
-          INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}"
-          INTERFACE_COMPILE_OPTIONS "${CMAKE_THREAD_LIBS_INIT}")
-      endif()
-    endif()
-  endif()
-endmacro()
-
-
-
-
-if(NOT FFTW3_FIND_COMPONENTS)
-  set(FFTW3_FIND_COMPONENTS SINGLE DOUBLE LONGDOUBLE SERIAL)
-endif()
-
-string(TOUPPER "${FFTW3_FIND_COMPONENTS}" FFTW3_FIND_COMPONENTS)
-
-list(FIND FFTW3_FIND_COMPONENTS SINGLE LOOK_FOR_SINGLE)
-list(FIND FFTW3_FIND_COMPONENTS DOUBLE LOOK_FOR_DOUBLE)
-list(FIND FFTW3_FIND_COMPONENTS LONGDOUBLE LOOK_FOR_LONGDOUBLE)
-list(FIND FFTW3_FIND_COMPONENTS THREADS LOOK_FOR_THREADS)
-list(FIND FFTW3_FIND_COMPONENTS OPENMP LOOK_FOR_OPENMP)
-list(FIND FFTW3_FIND_COMPONENTS MPI LOOK_FOR_MPI)
-list(FIND FFTW3_FIND_COMPONENTS SERIAL LOOK_FOR_SERIAL)
-
-# FIXME - This may fail in computers wihtout serial
-# Default serial to obtain version number
-set(LOOK_FOR_SERIAL 1)
-
-# set serial as default if none parallel component has been set
-if((LOOK_FOR_THREADS LESS 0) AND (LOOK_FOR_MPI LESS 0) AND
-    (LOOK_FOR_OPENMP LESS 0))
-  set(LOOK_FOR_SERIAL 1)
-endif()
-
-if(MPI_C_FOUND)
-  set(MPI_FOUND ${MPI_C_FOUND})
-endif()
-unset(FFTW3_FIND_COMPONENTS)
-
-
-
-
-if(WIN32)
-  set(HINT_DIRS ${FFTW3_DIRECTORY} $ENV{FFTW3_DIRECTORY})
-else()
-  find_package(PkgConfig)
-  if(PKG_CONFIG_FOUND)
-    pkg_check_modules(PC_FFTW QUIET fftw3)
-    set(FFTW3_DEFINITIONS ${PC_FFTW3_CFLAGS_OTHER})
-  endif()
-  set(HINT_DIRS ${PC_FFTW3_INCLUDEDIR} ${PC_FFTW3_INCLUDE_DIRS}
-    ${FFTW3_INCLUDE_DIR} $ENV{FFTW3_INCLUDE_DIR} )
-endif()
-
-find_path(FFTW3_INCLUDE_DIR NAMES fftw3.h HINTS ${HINT_DIRS})
-if (LOOK_FOR_MPI)  # Probably is going to be the same as fftw3.h
-  find_path(FFTW3_MPI_INCLUDE_DIR NAMES fftw3-mpi.h HINTS ${HINT_DIRS})
-endif()
-
-function(find_version OUTVAR LIBRARY SUFFIX)
-    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c
-      # TODO: do we need to add include for mpi headers?
-      "#include <fftw3.h>
-       #include <stdio.h>
-       int main(int nargs, char const *argv[]) {
-           printf(\"%s\", fftw${SUFFIX}_version);
-           return 0;
-       }"
-  )
-if(NOT CMAKE_CROSSCOMPILING)
-    try_run(RUN_RESULT COMPILE_RESULT
-        "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/"
-        "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c"
-        CMAKE_FLAGS
-          -DLINK_LIBRARIES=${LIBRARY}
-          -DINCLUDE_DIRECTORIES=${FFTW3_INCLUDE_DIR}
-        RUN_OUTPUT_VARIABLE OUTPUT
-        COMPILE_OUTPUT_VARIABLE COUTPUT
-    )
-  endif()
-  if(RUN_RESULT EQUAL 0)
-    string(REGEX REPLACE
-        ".*([0-9]+\\.[0-9]+\\.[0-9]+).*"
-        "\\1" VERSION_STRING "${OUTPUT}"
-    )
-    set(${OUTVAR} ${VERSION_STRING} PARENT_SCOPE)
-  endif()
-endfunction()
-
-set(SUFFIX_DOUBLE "")
-set(SUFFIX_SINGLE "f")
-set(SUFFIX_LONGDOUBLE "l")
-set(SUFFIX_SERIAL "")
-set(SUFFIX_OPENMP "_omp")
-set(SUFFIX_MPI "_mpi")
-set(SUFFIX_THREADS "_threads")
-set(SUFFIX_FINAL "")
-
-if(WIN32)
-  set(SUFFIX_FINAL "-3")
-else()
-  set(HINT_DIRS ${PC_FFTW3_LIBDIR} ${PC_FFTW3_LIBRARY_DIRS}
-    $ENV{FFTW3_LIBRARY_DIR} ${FFTW3_LIBRARY_DIR} )
-endif(WIN32)
-
-unset(FFTW3_LIBRARIES)
-set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR} ) # TODO what's for?
-set(FFTW3_FLAGS_C "")
-foreach(KIND SINGLE DOUBLE LONGDOUBLE)
-  if(LOOK_FOR_${KIND} LESS 0)
-    continue()
-  endif()
-  foreach(PARALLEL SERIAL MPI OPENMP THREADS)
-    if(LOOK_FOR_${PARALLEL} LESS 0)
-      continue()
-    endif()
-    find_specific_libraries(${KIND} ${PARALLEL})
-  endforeach()
-endforeach()
-
-if(FFTW3_INCLUDE_DIR)
-  list(GET FFTW3_FIND_COMPONENTS 0 smallerrun)
-  string(REPLACE "_" ";" RUNLIST ${smallerrun})
-  list(GET RUNLIST 0 KIND)
-  list(GET RUNLIST 1 PARALLEL)
-  unset(smallerrun)
-  unset(RUNLIST)
-  # suffix is quoted so it pass empty in the case of double as it's empty
-  find_version(FFTW3_VERSION_STRING ${FFTW3_${KIND}_${PARALLEL}_LIBRARY}
-    "${SUFFIX_${KIND}}")
-endif()
-
-# FIXME: fails if use REQUIRED.
-include(FindPackageHandleStandardArgs)
-# handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE
-# if all listed variables are TRUE
-find_package_handle_standard_args(FFTW3
-    REQUIRED_VARS FFTW3_LIBRARIES FFTW3_INCLUDE_DIR
-    VERSION_VAR FFTW3_VERSION_STRING
-    HANDLE_COMPONENTS
-)

From 1ebc5f2ff777002fbe3708abbc668163a8b789a8 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 10:46:31 +0100
Subject: [PATCH 009/130] removed deprecated member function

---
 include/grid_fft.hh | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index b66be1b..4c8464c 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -636,16 +636,6 @@ public:
 
     void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3);
 
-    // void stagger_field(void)
-    // {
-    //     FourierTransformForward();
-    //     apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
-    //         real_t shift = k[0] * get_dx()[0] + k[1] * get_dx()[1] + k[2] * get_dx()[2];
-    //         return x * std::exp(ccomplex_t(0.0, 0.5 * shift));
-    //     });
-    //     FourierTransformBackward();
-    // }
-
     void shift_field( double sx, double sy, double sz )
     {
         FourierTransformForward();

From eb2743c61b5ac880d5c8fa146d7a53e67e6c92c9 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 12:01:39 +0100
Subject: [PATCH 010/130] removal of more deprecated member functions

---
 include/grid_fft.hh | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 4c8464c..4848f0e 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -636,25 +636,20 @@ public:
 
     void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3);
 
-    void shift_field( double sx, double sy, double sz )
+    void shift_field( const vec3<real_t>& s )
     {
         FourierTransformForward();
         apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
 #ifdef WITH_MPI
-            real_t shift = sy * k[0] * get_dx()[0] + sx * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2];
+            real_t shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
 #else
-            real_t shift = sx * k[0] * get_dx()[0] + sy * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2];
+            real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
 #endif
             return x * std::exp(ccomplex_t(0.0, shift));
         });
         FourierTransformBackward();
     }
 
-    void stagger_field(void)
-    {
-        this->shift_field( 0.5, 0.5, 0.5 );
-    }
-
     void zero_DC_mode(void)
     {
         if (space_ == kspace_id)

From d80bf34c105fda150ccf7b776223bb523a453d1e Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 12:02:09 +0100
Subject: [PATCH 011/130] added file headers

---
 include/system_stat.hh | 7 +++++++
 include/testing.hh     | 7 +++++++
 include/vec3.hh        | 7 +++++++
 3 files changed, 21 insertions(+)

diff --git a/include/system_stat.hh b/include/system_stat.hh
index f911a42..fb7f6f3 100644
--- a/include/system_stat.hh
+++ b/include/system_stat.hh
@@ -1,3 +1,10 @@
+/*******************************************************************\
+ system_stat.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    08/2019 - Oliver Hahn - first implementation
+\*******************************************************************/
 #pragma once
 
 #include <string>
diff --git a/include/testing.hh b/include/testing.hh
index 2395db3..e5d2a99 100644
--- a/include/testing.hh
+++ b/include/testing.hh
@@ -1,3 +1,10 @@
+/*******************************************************************\
+ testing.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    10/2019 - Michael Michaux & Oliver Hahn - first implementation
+\*******************************************************************/
 #pragma once
 
 #include <array>
diff --git a/include/vec3.hh b/include/vec3.hh
index 8efb1b2..ea7a2f2 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -1,3 +1,10 @@
+/*******************************************************************\
+ vec3.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************/
 #pragma once
 
 template< typename T >

From 68080d2545a81ba9991fd7ec9c35b7a5bfb8e4eb Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 12:03:02 +0100
Subject: [PATCH 012/130] added possibility to write 32 or 64bit positions and
 ids

---
 include/output_plugin.hh      |   7 +++
 include/particle_container.hh | 114 ++++++++++++++++++++++++----------
 include/particle_generator.hh |  55 +++++++++++-----
 src/ic_generator.cc           |   6 +-
 src/plugins/output_gadget2.cc |  66 +++++++++++++++-----
 src/plugins/output_generic.cc |   4 ++
 src/plugins/output_grafic2.cc |   4 ++
 7 files changed, 185 insertions(+), 71 deletions(-)

diff --git a/include/output_plugin.hh b/include/output_plugin.hh
index cc092d3..b7c23e1 100644
--- a/include/output_plugin.hh
+++ b/include/output_plugin.hh
@@ -21,6 +21,7 @@
 
 enum class output_type {particles,field_lagrangian,field_eulerian};
 
+
 class output_plugin
 {
 protected:
@@ -57,6 +58,12 @@ public:
 
 	//! routine to query whether species is written as particle data
 	// virtual bool write_species_as_particles( const cosmo_species &s ){ return !write_species_as_grid(s); }
+
+	//! query if output wants 64bit precision for real values
+	virtual bool has_64bit_reals() const = 0;
+
+	//! query if output wants 64bit precision for integer values
+	virtual bool has_64bit_ids() const = 0;
 	
 	//! routine to return a multiplicative factor that contains the desired position units for the output
 	virtual real_t position_unit() const = 0;
diff --git a/include/particle_container.hh b/include/particle_container.hh
index fb05889..92b683c 100644
--- a/include/particle_container.hh
+++ b/include/particle_container.hh
@@ -1,3 +1,10 @@
+/*******************************************************************\
+ particle_container.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    10/2019 - Oliver Hahn - first implementation
+\*******************************************************************/
 #pragma once
 
 #ifdef USE_MPI
@@ -13,57 +20,96 @@ namespace particle{
 class container
 {
 public:
-	std::vector<float> positions_, velocities_;
-	std::vector<int> ids_;
+	std::vector<float > positions32_, velocities32_;
+	std::vector<double> positions64_, velocities64_;
+	
+	std::vector<uint32_t> ids32_;
+	std::vector<uint64_t> ids64_;
+	
 
-	container()
-	{
-	}
+	container(){ }
 
 	container(const container &) = delete;
 
-	const void* get_pos_ptr() const{
-		return reinterpret_cast<const void*>( &positions_[0] );
-	}
-
-	const void* get_vel_ptr() const{
-		return reinterpret_cast<const void*>( &velocities_[0] );
-	}
-
-	const void* get_ids_ptr() const{
-		return reinterpret_cast<const void*>( &ids_[0] );
-	}
-
-	void allocate(size_t nump)
+	void allocate(size_t nump, bool b64reals, bool b64ids)
 	{
-		positions_.resize(3 * nump);
-		velocities_.resize(3 * nump);
-		ids_.resize(nump);
+		if( b64reals ){
+			positions64_.resize(3 * nump);
+			velocities64_.resize(3 * nump);
+			positions32_.clear();
+			velocities32_.clear();
+		}else{
+			positions32_.resize(3 * nump);
+			velocities32_.resize(3 * nump);
+			positions64_.clear();
+			velocities64_.clear();
+		}
+
+		if( b64ids ){
+			ids64_.resize(nump);
+			ids32_.clear();
+		}else{
+			ids32_.resize(nump);
+			ids64_.clear();
+		}
 	}
 
-	void set_pos(size_t ipart, size_t idim, real_t p)
-	{
-		positions_[3 * ipart + idim] = p;
+	const void* get_pos32_ptr() const{
+		return reinterpret_cast<const void*>( &positions32_[0] );
 	}
 
-	void set_vel(size_t ipart, size_t idim, real_t p)
-	{
-		velocities_[3 * ipart + idim] = p;
+	void set_pos32(size_t ipart, size_t idim, float p){
+		positions32_[3 * ipart + idim] = p;
 	}
 
-	void set_id(size_t ipart, id_t id)
-	{
-		ids_[ipart] = id;
+	const void* get_pos64_ptr() const{
+		return reinterpret_cast<const void*>( &positions64_[0] );
+	}
+
+	inline void set_pos64(size_t ipart, size_t idim, double p){
+		positions64_[3 * ipart + idim] = p;
+	}
+
+	inline const void* get_vel32_ptr() const{
+		return reinterpret_cast<const void*>( &velocities32_[0] );
+	}
+	
+	inline void set_vel32(size_t ipart, size_t idim, float p){
+		velocities32_[3 * ipart + idim] = p;
+	}
+
+	const void* get_vel64_ptr() const{
+		return reinterpret_cast<const void*>( &velocities64_[0] );
+	}
+
+	inline void set_vel64(size_t ipart, size_t idim, double p){
+		velocities64_[3 * ipart + idim] = p;
+	}
+
+	const void* get_ids32_ptr() const{
+		return reinterpret_cast<const void*>( &ids32_[0] );
+	}
+
+	void set_id32(size_t ipart, uint32_t id){
+		ids32_[ipart] = id;
+	}
+
+	const void* get_ids64_ptr() const{
+		return reinterpret_cast<const void*>( &ids64_[0] );
+	}
+
+	void set_id64(size_t ipart, uint64_t id){
+		ids64_[ipart] = id;
 	}
 
 	size_t get_local_num_particles(void) const
 	{
-		return ids_.size();
+		return std::max(ids32_.size(),ids64_.size());
 	}
 
 	size_t get_global_num_particles(void) const
 	{
-		size_t local_nump = ids_.size(), global_nump;
+		size_t local_nump = this->get_local_num_particles(), global_nump;
 #ifdef USE_MPI
 		MPI_Allreduce(reinterpret_cast<void *>(&local_nump), reinterpret_cast<void *>(&global_nump), 1,
 					  MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
@@ -97,11 +143,11 @@ public:
 
 	void dump(void)
 	{
-		for (size_t i = 0; i < ids_.size(); ++i)
+		/*for (size_t i = 0; i < ids_.size(); ++i)
 		{
 			std::cout << positions_[3 * i + 0] << " " << positions_[3 * i + 1] << " " << positions_[3 * i + 2] << " "
 					  << velocities_[3 * i + 0] << " " << velocities_[3 * i + 1] << " " << velocities_[3 * i + 2] << std::endl;
-		}
+		}*/
 	}
 };
 
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 7c69e7d..1c176eb 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -1,3 +1,10 @@
+/*******************************************************************\
+ particle_generator.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    10/2019 - Oliver Hahn - first implementation
+\*******************************************************************/
 #pragma once
 
 #include <vec3.hh>
@@ -21,17 +28,23 @@ const std::vector< std::vector<vec3<real_t>> > lattice_shifts =
 };
 
 template<typename field_t>
-void initialize_lattice( container& particles, lattice lattice_type, const field_t& field ){
+void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const field_t& field ){
+    // number of modes present in the field
     const size_t num_p_in_load = field.local_size();
+    // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load):
     const size_t overload = 1ull<<lattice_type; // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
-
-    particles.allocate( overload * num_p_in_load );
-
+    // allocate memory for all local particles
+    particles.allocate( overload * num_p_in_load, b64reals, b64ids );
+    // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well
     for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
         for( size_t j=0; j<field.size(1); ++j){
             for( size_t k=0; k<field.size(2); ++k,++ipcount){
                 for( size_t iload=0; iload<overload; ++iload ){
-                    particles.set_id( ipcount+iload*num_p_in_load, overload*field.get_cell_idx_1d(i,j,k)+iload );
+                    if( b64ids ){
+                        particles.set_id64( ipcount+iload*num_p_in_load, overload*field.get_cell_idx_1d(i,j,k)+iload );
+                    }else{
+                        particles.set_id32( ipcount+iload*num_p_in_load, overload*field.get_cell_idx_1d(i,j,k)+iload );
+                    }
                 }
             }
         }
@@ -40,21 +53,25 @@ void initialize_lattice( container& particles, lattice lattice_type, const field
 
 // invalidates field, phase shifted to unspecified position after return
 template<typename field_t>
-void set_positions( container& particles, const lattice lattice_type, int idim, real_t lunit, field_t& field )
+void set_positions( container& particles, const lattice lattice_type, int idim, real_t lunit, const bool b64reals, field_t& field )
 {
     const size_t num_p_in_load = field.local_size();
     for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
-        // can omit first shift since zero by convention, otherwise apply phase shift
+        // can omit first shift since zero by convention, otherwise apply relative phase shift
         if( ishift>0 ){
-            vec3<real_t> shift = lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1];
-            field.shift_field( shift.x, shift.y, shift.z );
+            field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] );
         }
-        auto ipcount0 = ishift * num_p_in_load;
+        // read out values from phase shifted field and set assoc. particle's value
+        const auto ipcount0 = ishift * num_p_in_load;
         for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
             for( size_t j=0; j<field.size(1); ++j){
                 for( size_t k=0; k<field.size(2); ++k){
                     auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,lattice_shifts[lattice_type][ishift]);
-                    particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
+                    if( b64reals ){
+                        particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
+                    }else{
+                        particles.set_pos32( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
+                    }
                 }
             }
         }
@@ -62,20 +79,24 @@ void set_positions( container& particles, const lattice lattice_type, int idim,
 }
 
 template<typename field_t>
-void set_velocities( container& particles, lattice lattice_type, int idim, field_t& field )
+void set_velocities( container& particles, lattice lattice_type, int idim, const bool b64reals, field_t& field )
 {
     const size_t num_p_in_load = field.local_size();
     for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
-        // can omit first shift since zero by convention, otherwise apply phase shift
+        // can omit first shift since zero by convention, otherwise apply relative phase shift
         if( ishift>0 ){
-            vec3<real_t> shift = lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1];
-            field.shift_field( shift.x, shift.y, shift.z );
+            field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] );
         }
-        auto ipcount0 = ishift * num_p_in_load;
+        // read out values from phase shifted field and set assoc. particle's value
+        const auto ipcount0 = ishift * num_p_in_load;
         for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
             for( size_t j=0; j<field.size(1); ++j){
                 for( size_t k=0; k<field.size(2); ++k){
-                    particles.set_vel( ipcount++, idim, field.relem(i,j,k) );
+                    if( b64reals ){
+                        particles.set_vel64( ipcount++, idim, field.relem(i,j,k) );
+                    }else{
+                        particles.set_vel32( ipcount++, idim, field.relem(i,j,k) );
+                    }
                 }
             }
         }
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index d3a7ece..48d31dc 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -444,7 +444,7 @@ int Run( ConfigFile& the_config )
                 if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                 {
                     // allocate particle structure and generate particle IDs
-                    particle::initialize_lattice( particles, lattice_type, tmp );
+                    particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), tmp );
                 }
             
                 // write out positions
@@ -472,7 +472,7 @@ int Run( ConfigFile& the_config )
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_positions( particles, lattice_type, idim, lunit, tmp );
+                        particle::set_positions( particles, lattice_type, idim, lunit, the_output_plugin->has_64bit_reals(), tmp );
                     } 
                     // otherwise write out the grid data directly to the output plugin
                     // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian )
@@ -518,7 +518,7 @@ int Run( ConfigFile& the_config )
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_velocities( particles, lattice_type, idim, tmp );
+                        particle::set_velocities( particles, lattice_type, idim, the_output_plugin->has_64bit_reals(), tmp );
                     }
                     // otherwise write out the grid data directly to the output plugin
                     else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )
diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc
index fddb734..57d9cc1 100644
--- a/src/plugins/output_gadget2.cc
+++ b/src/plugins/output_gadget2.cc
@@ -3,6 +3,7 @@
 
 constexpr int empty_fill_bytes{56};
 
+template<typename write_real_t>
 class gadget2_output_plugin : public output_plugin
 {
 public:
@@ -33,6 +34,7 @@ protected:
 	int num_files_;
 	header this_header_;
 	real_t lunit_, vunit_;
+	bool blongids_;
 
 public:
 	//! constructor
@@ -47,6 +49,7 @@ public:
 		real_t astart = 1.0/(1.0+cf_.GetValue<double>("setup", "zstart"));
 		lunit_ = cf_.GetValue<double>("setup", "BoxLength");
 		vunit_ = lunit_ / std::sqrt(astart);
+		blongids_ = cf_.GetValueSafe<bool>("output","UseLongids",false);
 	}
 
     output_type write_species_as( const cosmo_species & ) const { return output_type::particles; }
@@ -55,6 +58,16 @@ public:
 
 	real_t velocity_unit() const { return vunit_; }
 
+	bool has_64bit_reals() const{
+		if( typeid(write_real_t)==typeid(double) ) return true;
+		return false;
+	}
+
+	bool has_64bit_ids() const{
+		if( blongids_ ) return true;
+		return false;
+	}
+
 	void write_particle_data(const particle::container &pc, const cosmo_species &s )
 	{
 			// fill the Gadget-2 header
@@ -121,19 +134,39 @@ public:
 		ofs.write( reinterpret_cast<char*>(&this_header_), sizeof(header) );
 		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
 		
-		blocksz = 3 * sizeof(float) * pc.get_local_num_particles();
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		ofs.write( reinterpret_cast<const char*>(pc.get_pos_ptr()), blocksz );
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+		// we write double precision 
+		if( this->has_64bit_reals() ){
+			blocksz = 3 * sizeof(double) * pc.get_local_num_particles();
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write( reinterpret_cast<const char*>(pc.get_pos64_ptr()), blocksz );
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write( reinterpret_cast<const char*>(pc.get_vel64_ptr()), blocksz );
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+		}else{
+			blocksz = 3 * sizeof(float) * pc.get_local_num_particles();
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write( reinterpret_cast<const char*>(pc.get_pos32_ptr()), blocksz );
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write( reinterpret_cast<const char*>(pc.get_vel32_ptr()), blocksz );
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+		}
 		
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		ofs.write( reinterpret_cast<const char*>(pc.get_vel_ptr()), blocksz );
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		
-		blocksz = sizeof(float) * pc.get_local_num_particles();
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		ofs.write( reinterpret_cast<const char*>(pc.get_ids_ptr()), blocksz );
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+		// we write long IDs
+		if( this->has_64bit_ids() ){
+			blocksz = sizeof(uint64_t) * pc.get_local_num_particles();
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write( reinterpret_cast<const char*>(pc.get_ids64_ptr()), blocksz );
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+		}else{
+			blocksz = sizeof(uint32_t) * pc.get_local_num_particles();
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write( reinterpret_cast<const char*>(pc.get_ids32_ptr()), blocksz );
+			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+		}
 		
 	}
 };
@@ -141,9 +174,8 @@ public:
 
 namespace
 {
-   output_plugin_creator_concrete<gadget2_output_plugin> creator1("gadget2"); 
-// output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2");
-// #ifndef SINGLE_PRECISION
-// output_plugin_creator_concrete<gadget2_output_plugin<double>> creator2("gadget2_double");
-// #endif
+	output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2"); 
+#if !defined(USE_SINGLEPRECISION)
+	output_plugin_creator_concrete<gadget2_output_plugin<double>> creator3("gadget2_double"); 
+#endif
 } // namespace
diff --git a/src/plugins/output_generic.cc b/src/plugins/output_generic.cc
index 10eacfc..1a53e84 100644
--- a/src/plugins/output_generic.cc
+++ b/src/plugins/output_generic.cc
@@ -50,6 +50,10 @@ public:
 		return output_type::field_lagrangian;
 	}
 
+	bool has_64bit_reals() const{ return true; }
+
+	bool has_64bit_ids() const{ return true; }
+
 	real_t position_unit() const { return 1.0; }
 	
 	real_t velocity_unit() const { return 1.0; }
diff --git a/src/plugins/output_grafic2.cc b/src/plugins/output_grafic2.cc
index b511dd9..31e8a04 100644
--- a/src/plugins/output_grafic2.cc
+++ b/src/plugins/output_grafic2.cc
@@ -102,6 +102,10 @@ public:
         return output_type::field_lagrangian;
     }
 
+    bool has_64bit_reals() const{ return false; }
+
+	bool has_64bit_ids() const{ return false; }
+
     real_t position_unit() const { return lunit_; }
 
     real_t velocity_unit() const { return vunit_; }

From 3ee9dfd6ddd580116ff618d95857a3a76d164211 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 12:19:55 +0100
Subject: [PATCH 013/130] adjusted example conf file

---
 example.conf | 57 +++++++++++++++++++++++++++-------------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/example.conf b/example.conf
index 3b6d07e..58fc969 100644
--- a/example.conf
+++ b/example.conf
@@ -14,33 +14,6 @@ DoFixing     = no
 # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
 ParticleLoad = sc
 
-[testing]
-# enables diagnostic output
-# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
-test = convergence
-
-[execution]
-NumThreads   = 4
-
-[output]
-fname_hdf5   = output_sch.hdf5
-fbase_analysis = output
-
-format       = gadget2
-filename     = ics_gadget.dat
-
-#format       = generic
-#filename     = debug.hdf5
-#generic_out_eulerian = yes
-
-#format	       = grafic2
-#filename       = ics_ramses
-#grafic_use_SPT = yes
-
-[random]
-generator    = NGENIC
-seed         = 9001
-
 [cosmology]
 #transfer     = CLASS 
 transfer     = eisenstein
@@ -56,3 +29,33 @@ nspec        = 0.961
 #LSS_aniso_ly = 0.1
 #LSS_aniso_lz = -0.2
 
+[random]
+generator    = NGENIC
+seed         = 9001
+
+[testing]
+# enables diagnostic output
+# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
+test = convergence
+
+[execution]
+NumThreads   = 4
+
+[output]
+fname_hdf5   = output.hdf5
+fbase_analysis = output
+
+format       = gadget2
+filename     = ics_gadget.dat
+UseLongids   = false
+
+#format       = generic
+#filename     = debug.hdf5
+#generic_out_eulerian = yes
+
+#format	       = grafic2
+#filename       = ics_ramses
+#grafic_use_SPT = yes
+
+
+

From a5253bcace1d074a5d6159b95389e1d9a6b1651a Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 1 Nov 2019 13:16:05 +0100
Subject: [PATCH 014/130] cosmetic changes

---
 include/vec3.hh                 | 38 +++++++++++++++++++++------------
 src/main.cc                     | 23 ++++++++++----------
 src/output_plugin.cc            |  1 +
 src/random_plugin.cc            |  3 ++-
 src/transfer_function_plugin.cc |  1 +
 5 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/include/vec3.hh b/include/vec3.hh
index ea7a2f2..3f48967 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -7,6 +7,7 @@
 \*******************************************************************/
 #pragma once
 
+//! implements a simple class of 3-vectors of arbitrary scalar type
 template< typename T >
 class vec3{
 private:
@@ -29,41 +30,50 @@ public:
     vec3( vec3<T> &&v)
     : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){}
 
-    //! construct from initialiser list
+    //! construct vec3 from initializer list
     template<typename ...E>
     vec3(E&&...e) 
     : data_{{std::forward<E>(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){}
     
+    //! braket index access to vector components
     T &operator[](size_t i){ return data_[i];}
     
+    //! const braket index access to vector components
     const T &operator[](size_t i) const { return data_[i]; }
 
-    vec3<T> operator+( const vec3<T>& v ) const{ return vec3({x+v.x,y+v.y,z+v.z}); }
+    //! implementation of summation of vec3
+    vec3<T> operator+( const vec3<T>& v ) const{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
 
-    vec3<T> operator-( const vec3<T>& v ) const{ return vec3({x-v.x,y-v.y,z-v.z}); }
+    //! implementation of difference of vec3
+    vec3<T> operator-( const vec3<T>& v ) const{ return vec3<T>({x-v.x,y-v.y,z-v.z}); }
 
-    vec3<T> operator*( T s ) const{ return vec3({x*s,y*s,z*s}); }
+    //! implementation of scalar multiplication
+    vec3<T> operator*( T s ) const{ return vec3<T>({x*s,y*s,z*s}); }
 
+    //! implementation of += operator
     vec3<T>& operator+=( const vec3<T>& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; }
 
+    //! implementation of -= operator
     vec3<T>& operator-=( const vec3<T>& v ) const{ x-=v.x; y-=v.y; z-=v.z; return *this; }
 
+    //! multiply with scalar
     vec3<T>& operator*=( T s ) const{ x*=s; y*=s; z*=s; return *this; }
     
+    //! compute dot product with another vector
     T dot(const vec3<T> &a) const 
     {
         return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
     }
     
-    T norm_squared(void) const
-    {
-        return this->dot(*this);
-    }
+    //! returns 2-norm squared of vector
+    T norm_squared(void) const { return this->dot(*this); }
 
-    T norm(void) const
-    {
-        return std::sqrt( this->norm_squared() );
-    }
-
-    
+    //! returns 2-norm of vector
+    T norm(void) const { return std::sqrt( this->norm_squared() ); }
 };
+
+//! multiplication with scalar
+template<typename T>
+vec3<T> operator*( T s, const vec3<T>& v ){
+    return vec3<T>({v.x*s,v.y*s,v.z*s});
+}
diff --git a/src/main.cc b/src/main.cc
index 72e9a38..2416a20 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -49,16 +49,17 @@ int main( int argc, char** argv )
     }
 #endif
 
+    // Ascii ART logo. generated via http://patorjk.com/software/taag/#p=display&f=Nancyj&t=monofonIC
     csoca::ilog << "\n"
-                << " unigrid MUSIC                          .8888b                   dP  a88888b. \n"
-                << "                                        88   \"                   88 d8\'   `88 \n"
-                << "  88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa  .d8888b. 88d888b. 88 88        \n"
-                << "  88\'`88\'`88 88\'  `88 88\'  `88 88\'  `88 88     88\'  `88 88\'  `88 88 88        \n"
-                << "  88  88  88 88.  .88 88    88 88.  .88 88     88.  .88 88    88 88 Y8.   .88 \n"
-                << "  dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl
-                << "version  : v0.1a, git rev. : " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl
-                << "-------------------------------------------------------------------------------" << std::endl;
-    
+                << " The unigrid version of MUSIC-2        .8888b                   dP  a88888b. \n"
+                << "                                       88   \"                   88 d8\'   `88 \n"
+                << " 88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa  .d8888b. 88d888b. 88 88        \n"
+                << " 88\'`88\'`88 88\'  `88 88\'  `88 88\'  `88 88     88\'  `88 88\'  `88 88 88        \n"
+                << " 88  88  88 88.  .88 88    88 88.  .88 88     88.  .88 88    88 88 Y8.   .88 \n"
+                << " dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl
+                << "Build was compiled on " <<  __DATE__ << " at " <<  __TIME__ << std::endl
+                << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl
+                << "-------------------------------------------------------------------------------\n" << std::endl;
 
     //------------------------------------------------------------------------------
     // Parse command line options
@@ -71,7 +72,7 @@ int main( int argc, char** argv )
         print_RNG_plugins();
         print_output_plugins();
 
-        csoca::elog << "In order to run, you need to specify a parameter file!" << std::endl;
+        csoca::elog << "In order to run, you need to specify a parameter file!\n" << std::endl;
         exit(0);
     }
 
@@ -189,7 +190,7 @@ int main( int argc, char** argv )
 #endif
 
     csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    csoca::ilog << "Done." << std::endl;
+    csoca::ilog << "Done.\n" << std::endl;
 
     return 0;
 }
diff --git a/src/output_plugin.cc b/src/output_plugin.cc
index 763336e..35664dc 100644
--- a/src/output_plugin.cc
+++ b/src/output_plugin.cc
@@ -30,6 +30,7 @@ void print_output_plugins()
 			csoca::ilog << "\t\'" << it->first << "\'\n";
 		++it;
 	}
+	csoca::ilog << std::endl;
 }
 
 std::unique_ptr<output_plugin> select_output_plugin( ConfigFile& cf )
diff --git a/src/random_plugin.cc b/src/random_plugin.cc
index 045978f..5cfea9a 100644
--- a/src/random_plugin.cc
+++ b/src/random_plugin.cc
@@ -13,7 +13,7 @@ void print_RNG_plugins()
     std::map<std::string, RNG_plugin_creator *> &m = get_RNG_plugin_map();
     std::map<std::string, RNG_plugin_creator *>::iterator it;
     it = m.begin();
-    csoca::ilog << "- Available random number generator plug-ins:" << std::endl;
+    csoca::ilog << "Available random number generator plug-ins:" << std::endl;
     while (it != m.end())
     {
         if ((*it).second){
@@ -21,6 +21,7 @@ void print_RNG_plugins()
         }
         ++it;
     }
+    csoca::ilog << std::endl;
 }
 
 std::unique_ptr<RNG_plugin> select_RNG_plugin(ConfigFile &cf)
diff --git a/src/transfer_function_plugin.cc b/src/transfer_function_plugin.cc
index e9d3748..6101ada 100644
--- a/src/transfer_function_plugin.cc
+++ b/src/transfer_function_plugin.cc
@@ -20,6 +20,7 @@ void print_TransferFunction_plugins()
             csoca::ilog << "\t\'" << (*it).first << "\'" << std::endl;
         ++it;
     }
+    csoca::ilog << std::endl;
 }
 
 std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(ConfigFile &cf)

From 40be27c36fb4726cb0075dd6dc0ec0f70e555bfa Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 3 Nov 2019 15:54:17 +0100
Subject: [PATCH 015/130] started implementation of PLT (working commit)

---
 include/grid_fft.hh     |  47 ++++++++++
 include/mat3.hh         |  98 +++++++++++++++++++++
 include/particle_plt.hh | 184 ++++++++++++++++++++++++++++++++++++++++
 include/vec3.hh         |   4 +-
 src/main.cc             |   5 +-
 5 files changed, 335 insertions(+), 3 deletions(-)
 create mode 100644 include/mat3.hh
 create mode 100644 include/particle_plt.hh

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 4848f0e..3b760b2 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -152,6 +152,32 @@ public:
         return (i * sizes_[1] + j) * sizes_[3] + k;
     }
 
+    data_t get_cic( const vec3<real_t>& v ) const{
+        // warning! this doesn't work with MPI
+        vec3<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
+                        std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
+                        std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] });
+        size_t ix = static_cast<size_t>(x.x);
+        size_t iy = static_cast<size_t>(x.y);
+        size_t iz = static_cast<size_t>(x.z);
+        real_t dx = x.x-real_t(ix), tx = 1.0-dx;
+        real_t dy = x.y-real_t(iy), ty = 1.0-dy;
+        real_t dz = x.z-real_t(iz), tz = 1.0-dz;
+        size_t ix1 = (ix+1)%n_[0];
+        size_t iy1 = (iy+1)%n_[1];
+        size_t iz1 = (iz+1)%n_[2];
+        data_t val = 0.0;
+        val += this->relem(ix ,iy ,iz ) * tx * ty * tz;
+        val += this->relem(ix ,iy ,iz1) * tx * ty * dz;
+        val += this->relem(ix ,iy1,iz ) * tx * dy * tz;
+        val += this->relem(ix ,iy1,iz1) * tx * dy * dz;
+        val += this->relem(ix1,iy ,iz ) * dx * ty * tz;
+        val += this->relem(ix1,iy ,iz1) * dx * ty * dz;
+        val += this->relem(ix1,iy1,iz ) * dx * dy * tz;
+        val += this->relem(ix1,iy1,iz1) * dx * dy * dz;
+        return val;
+    }
+
     template <typename ft>
     vec3<ft> get_r(const size_t i, const size_t j, const size_t k) const
     {
@@ -563,6 +589,27 @@ public:
         }
     }
 
+    template <typename functional, typename grid_t>
+    void assign_function_of_grids_kdep(const functional &f, const grid_t &g)
+    {
+        assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
+
+#pragma omp parallel for
+        for (size_t i = 0; i < sizes_[0]; ++i)
+        {
+            for (size_t j = 0; j < sizes_[1]; ++j)
+            {
+                for (size_t k = 0; k < sizes_[2]; ++k)
+                {
+                    auto &elem = this->kelem(i, j, k);
+                    const auto &elemg = g.kelem(i, j, k);
+
+                    elem = f(this->get_k<real_t>(i, j, k), elemg);
+                }
+            }
+        }
+    }
+
     template <typename functional, typename grid1_t, typename grid2_t>
     void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2)
     {
diff --git a/include/mat3.hh b/include/mat3.hh
new file mode 100644
index 0000000..9f72305
--- /dev/null
+++ b/include/mat3.hh
@@ -0,0 +1,98 @@
+#include <gsl/gsl_math.h>
+#include <gsl/gsl_eigen.h>
+
+#include <vec3.hh>
+
+template<typename T>
+class mat3s{
+protected:
+    std::array<T,9> data_;
+    gsl_matrix_view m_;
+    gsl_vector *eval_;
+    gsl_matrix *evec_;
+	gsl_eigen_symmv_workspace * wsp_;
+						
+    void init_gsl(){
+        m_ = gsl_matrix_view_array (&data_[0], 3, 3);
+        eval_ = gsl_vector_alloc (3);
+        evec_ = gsl_matrix_alloc (3, 3);
+        wsp_ = gsl_eigen_symmv_alloc (3);
+    }
+
+    void free_gsl(){
+        gsl_eigen_symmv_free (wsp_);
+        gsl_vector_free (eval_);
+        gsl_matrix_free (evec_);
+    }
+
+public:
+
+    mat3s(){
+        this->init_gsl();
+    }
+
+    //! copy constructor
+    mat3s( const mat3s<T> &m)
+    : data_(m.data_){
+        this->init_gsl();
+    }
+    
+    //! move constructor
+    mat3s( mat3s<T> &&m)
+    : data_(std::move(m.data_)){
+        this->init_gsl();
+    }
+
+    //! construct vec3 from initializer list
+    template<typename ...E>
+    mat3s(E&&...e) 
+    : data_{{std::forward<E>(e)...}}{
+        // resort into symmetrix matrix
+        data_[8] = data_[5];
+        data_[7] = data_[4];
+        data_[6] = data_[2];
+        data_[5] = data_[4];
+        data_[4] = data_[3];
+        data_[3] = data_[1];
+        this->init_gsl();
+    }
+
+    mat3s<T>& operator=(const mat3s<T>& m){
+        data_ = m.data_;
+        return *this;
+    }
+
+    mat3s<T>& operator=(const mat3s<T>&& m){
+        data_ = std::move(m.data_);
+        return *this;
+    }
+    
+    //! bracket index access to vector components
+    T &operator[](size_t i){ return data_[i];}
+    
+    //! const bracket index access to vector components
+    const T &operator[](size_t i) const { return data_[i]; }
+
+    //! matrix 2d index access
+    T &operator()(size_t i, size_t j){ return data_[3*i+j]; }
+
+    //! const matrix 2d index access
+    const T &operator()(size_t i, size_t j) const { return data_[3*i+j]; }
+
+    //! destructor
+    ~mat3s(){
+        this->free_gsl();
+    }
+
+    void eigen( vec3<T>& evals, vec3<T>& evec1, vec3<T>& evec2, vec3<T>& evec3 ){
+        gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_);
+        gsl_eigen_symmv_sort (eval_, evec_, GSL_EIGEN_SORT_VAL_ASC);
+
+        for( int i=0; i<3; ++i ){
+            evals[i] = gsl_vector_get( eval_, i );
+            evec1[i] = gsl_matrix_get( evec_, 0, i );
+            evec2[i] = gsl_matrix_get( evec_, 1, i );
+            evec3[i] = gsl_matrix_get( evec_, 2, i );
+        }
+    }
+};
\ No newline at end of file
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
new file mode 100644
index 0000000..1390a42
--- /dev/null
+++ b/include/particle_plt.hh
@@ -0,0 +1,184 @@
+#pragma once
+
+#include <general.hh>
+#include <unistd.h> // for unlink
+
+#include <iostream>
+#include <fstream>
+
+#include <random>
+
+#include <mat3.hh>
+
+namespace particle{
+//! implement Marcos et al. PLT calculation
+
+inline void test_plt( void ){
+
+    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    csoca::ilog << "Testing PLT implementation..." << std::endl;
+
+    real_t boxlen = 1.0;
+    
+    size_t ngrid  = 64;
+    size_t npgrid = 1;
+    size_t dpg    = ngrid/npgrid;
+    size_t nump   = npgrid*npgrid*npgrid;
+
+    real_t pweight = 1.0/real_t(nump);
+    real_t eta = 2.0 * boxlen/ngrid;
+
+    const real_t alpha = 1.0/std::sqrt(2)/eta;
+    const real_t alpha2 = alpha*alpha;
+    const real_t alpha3 = alpha2*alpha;
+    const real_t sqrtpi = std::sqrt(M_PI);
+    const real_t pi3halfs = std::pow(M_PI,1.5);
+
+    const real_t dV( std::pow( boxlen/ngrid, 3 ) );
+    Grid_FFT<real_t> rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    std::vector< vec3<real_t> > gpos ;
+
+    auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
+
+    auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
+        auto d = vR-vP;
+        d.x = (d.x>0.5)? d.x-1.0 : (d.x<-0.5)? d.x+1.0 : d.x;
+        d.y = (d.y>0.5)? d.y-1.0 : (d.y<-0.5)? d.y+1.0 : d.y;
+        d.z = (d.z>0.5)? d.z-1.0 : (d.z<-0.5)? d.z+1.0 : d.z;
+        auto r = d.norm();
+
+        if( r< 1e-14 ) return 0.0;
+
+        real_t val = 0.0;
+
+        val -= d[mu]*d[nu]/(r*r) * alpha3/pi3halfs * std::exp(-alpha*alpha*r*r);
+        val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
+            (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
+
+        return pweight * val;
+    };
+
+    gpos.reserve(nump);
+
+    // sc
+    for( size_t i=0; i<npgrid; ++i ){
+        for( size_t j=0; j<npgrid; ++j ){
+            for( size_t k=0; k<npgrid; ++k ){
+                rho.relem(i*dpg,j*dpg,k*dpg) = pweight/dV;
+                gpos.push_back({real_t(i)/npgrid,real_t(j)/npgrid,real_t(k)/npgrid});
+            }
+        }    
+    }
+
+    rho.FourierTransformForward();
+    rho.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
+        real_t kmod = k.norm();
+        return -x * std::exp(-0.5*eta*eta*kmod*kmod) / (kmod*kmod);
+    });
+    rho.zero_DC_mode();
+
+    auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
+        real_t sr = 0.0;
+        for( auto& p : gpos ){
+            sr += greensftide_sr( mu, nu, v, p);
+        }
+        if( v.norm()<1e-14 ) return 0.0;
+
+        return sr;
+    };
+
+
+    // std::random_device rd;  //Will be used to obtain a seed for the random number engine
+    // std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd()
+    // std::uniform_real_distribution<> dis(-0.25,0.25);
+
+    Grid_FFT<real_t> D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t> D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t> D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t> D_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t> D_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t> D_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+
+    #pragma omp parallel for
+    for( size_t i=0; i<ngrid; i++ ){
+        vec3<real_t>  p;
+        p.x = real_t(i)/ngrid;
+        for( size_t j=0; j<ngrid; j++ ){
+            p.y = real_t(j)/ngrid;
+            for( size_t k=0; k<ngrid; k++ ){
+                p.z = real_t(k)/ngrid;
+                D_xx.relem(i,j,k) = evaluate_D(0,0,p);
+                D_xy.relem(i,j,k) = evaluate_D(0,1,p);
+                D_xz.relem(i,j,k) = evaluate_D(0,2,p);
+                D_yy.relem(i,j,k) = evaluate_D(1,1,p);
+                D_yz.relem(i,j,k) = evaluate_D(1,2,p);
+                D_zz.relem(i,j,k) = evaluate_D(2,2,p);
+
+                //D = {evaluate_D(0,0,p),evaluate_D(0,1,p),evaluate_D(0,2,p),evaluate_D(1,0,p),evaluate_D(1,1,p),evaluate_D(2,2,p)};
+                //D.eigen(eval, evec1, evec2, evec3);
+                //rho.relem(i,j,k) = eval[2];
+            }   
+        }    
+    }
+    D_xx.relem(0,0,0) = 0.0;
+    D_xy.relem(0,0,0) = 0.0;
+    D_xz.relem(0,0,0) = 0.0;
+    D_yy.relem(0,0,0) = 0.0;
+    D_yz.relem(0,0,0) = 0.0;
+    D_zz.relem(0,0,0) = 0.0;
+    
+    
+
+    D_xx.FourierTransformForward();
+    D_xy.FourierTransformForward();
+    D_xz.FourierTransformForward();
+    D_yy.FourierTransformForward();
+    D_yz.FourierTransformForward();
+    D_zz.FourierTransformForward();
+
+    std::ofstream ofs("test_ewald.txt");
+
+    real_t nfac = 1.0/std::pow(real_t(ngrid),1.5);
+
+    real_t kNyquist = M_PI/boxlen * ngrid;
+
+    //#pragma omp parallel for
+    for( size_t i=0; i<D_xx.size(0); i++ ){
+        mat3s<real_t> D;
+        vec3<real_t> eval, evec1, evec2, evec3;
+        for( size_t j=0; j<D_xx.size(1); j++ ){
+            for( size_t k=0; k<D_xx.size(2); k++ ){
+                vec3<real_t> kv = D_xx.get_k<real_t>(i,j,k);
+
+                D = { std::real(D_xx.kelem(i,j,k) - kv[0]*kv[0] * rho.kelem(i,j,k) ),
+                      std::real(D_xy.kelem(i,j,k) - kv[0]*kv[1] * rho.kelem(i,j,k) ),
+                      std::real(D_xz.kelem(i,j,k) - kv[0]*kv[2] * rho.kelem(i,j,k) ),
+                      std::real(D_yy.kelem(i,j,k) - kv[1]*kv[1] * rho.kelem(i,j,k) ),
+                      std::real(D_yz.kelem(i,j,k) - kv[1]*kv[2] * rho.kelem(i,j,k) ),
+                      std::real(D_zz.kelem(i,j,k) - kv[2]*kv[2] * rho.kelem(i,j,k) ) };
+                D.eigen(eval, evec1, evec2, evec3);
+                
+
+                ofs << std::setw(16) << kv.norm() / kNyquist
+                    << std::setw(16) << eval[0] *nfac + 1.0/3.0
+                    << std::setw(16) << eval[1] *nfac + 1.0/3.0
+                    << std::setw(16) << eval[2] *nfac + 1.0/3.0
+                    << std::setw(16) << kv[0]
+                    << std::setw(16) << kv[1]
+                    << std::setw(16) << kv[2]
+                    << std::endl;
+            }
+        }
+    }
+
+//     std::string filename("plt_test.hdf5");
+//     unlink(filename.c_str());
+// #if defined(USE_MPI)
+//     MPI_Barrier(MPI_COMM_WORLD);
+// #endif
+//     rho.Write_to_HDF5(filename, "rho");
+
+}
+
+
+}
\ No newline at end of file
diff --git a/include/vec3.hh b/include/vec3.hh
index 3f48967..057fbcd 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -35,10 +35,10 @@ public:
     vec3(E&&...e) 
     : data_{{std::forward<E>(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){}
     
-    //! braket index access to vector components
+    //! bracket index access to vector components
     T &operator[](size_t i){ return data_[i];}
     
-    //! const braket index access to vector components
+    //! const bracket index access to vector components
     const T &operator[](size_t i) const { return data_[i]; }
 
     //! implementation of summation of vec3
diff --git a/src/main.cc b/src/main.cc
index 2416a20..01ee5be 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -10,6 +10,7 @@
 
 #include <general.hh>
 #include <ic_generator.hh>
+#include <particle_plt.hh>
 
 
 // initialise with "default" values
@@ -181,7 +182,9 @@ int main( int argc, char** argv )
     ///////////////////////////////////////////////////////////////////////
     // do the job...
     ///////////////////////////////////////////////////////////////////////
-    ic_generator::Run( the_config );
+    // ic_generator::Run( the_config );
+
+    particle::test_plt();
     ///////////////////////////////////////////////////////////////////////
 
 #if defined(USE_MPI)

From c71b844e162f9563bd4af91dd144b2e72e0a55cd Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 3 Nov 2019 17:03:29 +0100
Subject: [PATCH 016/130] enabled output to HDF5 of complex data when in
 Fourier space

---
 src/grid_fft.cc | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index 2881010..54ddbbf 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -391,7 +391,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
             {
                 for (size_t k = 0; k < size(2); ++k)
                 {
-                    buf[j * size(2) + k] = std::real(relem(i, j, k));
+                    if( this->space_ == rspace_id )
+                        buf[j * size(2) + k] = std::real(relem(i, j, k));
+                    else
+                        buf[j * size(2) + k] = std::real(kelem(i, j, k));
                 }
             }
 
@@ -410,7 +413,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
         H5Dclose(dset_id);
 
         if (typeid(data_t) == typeid(std::complex<float>) ||
-            typeid(data_t) == typeid(std::complex<double>))
+            typeid(data_t) == typeid(std::complex<double>) ||
+            this->space_ == kspace_id )
         {
             datasetname += std::string(".im");
 
@@ -460,7 +464,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
                 for (size_t j = 0; j < size(1); ++j)
                     for (size_t k = 0; k < size(2); ++k)
                     {
-                        buf[j * size(2) + k] = std::imag(relem(i, j, k));
+                        if( this->space_ == rspace_id )
+                            buf[j * size(2) + k] = std::imag(relem(i, j, k));
+                        else
+                            buf[j * size(2) + k] = std::imag(kelem(i, j, k));
                     }
 
                 memspace = H5Screate_simple(3, count, NULL);

From 93568708410fa96c07d42fdedb8d490abe8921cc Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 3 Nov 2019 17:04:05 +0100
Subject: [PATCH 017/130] fixed bug where eigenvectors and coordinate indices
 were mixed up

---
 include/mat3.hh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/mat3.hh b/include/mat3.hh
index 9f72305..04ac0ac 100644
--- a/include/mat3.hh
+++ b/include/mat3.hh
@@ -90,9 +90,9 @@ public:
 
         for( int i=0; i<3; ++i ){
             evals[i] = gsl_vector_get( eval_, i );
-            evec1[i] = gsl_matrix_get( evec_, 0, i );
-            evec2[i] = gsl_matrix_get( evec_, 1, i );
-            evec3[i] = gsl_matrix_get( evec_, 2, i );
+            evec1[i] = gsl_matrix_get( evec_, i, 0 );
+            evec2[i] = gsl_matrix_get( evec_, i, 1 );
+            evec3[i] = gsl_matrix_get( evec_, i, 2 );
         }
     }
 };
\ No newline at end of file

From 06264bfb510a5a985fd2ac1390a962f2d5553cfd Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 3 Nov 2019 17:04:49 +0100
Subject: [PATCH 018/130] PLT: output of eigenvalues and vectors to HDF5

---
 include/particle_plt.hh | 70 ++++++++++++++++++++++++-----------------
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 1390a42..7025362 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -20,7 +20,7 @@ inline void test_plt( void ){
 
     real_t boxlen = 1.0;
     
-    size_t ngrid  = 64;
+    size_t ngrid  = 128;
     size_t npgrid = 1;
     size_t dpg    = ngrid/npgrid;
     size_t nump   = npgrid*npgrid*npgrid;
@@ -87,11 +87,6 @@ inline void test_plt( void ){
         return sr;
     };
 
-
-    // std::random_device rd;  //Will be used to obtain a seed for the random number engine
-    // std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd()
-    // std::uniform_real_distribution<> dis(-0.25,0.25);
-
     Grid_FFT<real_t> D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
@@ -136,13 +131,13 @@ inline void test_plt( void ){
     D_yz.FourierTransformForward();
     D_zz.FourierTransformForward();
 
-    std::ofstream ofs("test_ewald.txt");
+    // std::ofstream ofs("test_ewald.txt");
 
     real_t nfac = 1.0/std::pow(real_t(ngrid),1.5);
 
     real_t kNyquist = M_PI/boxlen * ngrid;
 
-    //#pragma omp parallel for
+    #pragma omp parallel for
     for( size_t i=0; i<D_xx.size(0); i++ ){
         mat3s<real_t> D;
         vec3<real_t> eval, evec1, evec2, evec3;
@@ -150,33 +145,50 @@ inline void test_plt( void ){
             for( size_t k=0; k<D_xx.size(2); k++ ){
                 vec3<real_t> kv = D_xx.get_k<real_t>(i,j,k);
 
-                D = { std::real(D_xx.kelem(i,j,k) - kv[0]*kv[0] * rho.kelem(i,j,k) ),
-                      std::real(D_xy.kelem(i,j,k) - kv[0]*kv[1] * rho.kelem(i,j,k) ),
-                      std::real(D_xz.kelem(i,j,k) - kv[0]*kv[2] * rho.kelem(i,j,k) ),
-                      std::real(D_yy.kelem(i,j,k) - kv[1]*kv[1] * rho.kelem(i,j,k) ),
-                      std::real(D_yz.kelem(i,j,k) - kv[1]*kv[2] * rho.kelem(i,j,k) ),
-                      std::real(D_zz.kelem(i,j,k) - kv[2]*kv[2] * rho.kelem(i,j,k) ) };
-                D.eigen(eval, evec1, evec2, evec3);
-                
+                D_xx.kelem(i,j,k) = (D_xx.kelem(i,j,k) - kv[0]*kv[0] * rho.kelem(i,j,k))*nfac + 1.0/3.0;
+                D_xy.kelem(i,j,k) = (D_xy.kelem(i,j,k) - kv[0]*kv[1] * rho.kelem(i,j,k))*nfac;
+                D_xz.kelem(i,j,k) = (D_xz.kelem(i,j,k) - kv[0]*kv[2] * rho.kelem(i,j,k))*nfac;
+                D_yy.kelem(i,j,k) = (D_yy.kelem(i,j,k) - kv[1]*kv[1] * rho.kelem(i,j,k))*nfac + 1.0/3.0;;
+                D_yz.kelem(i,j,k) = (D_yz.kelem(i,j,k) - kv[1]*kv[2] * rho.kelem(i,j,k))*nfac;
+                D_zz.kelem(i,j,k) = (D_zz.kelem(i,j,k) - kv[2]*kv[2] * rho.kelem(i,j,k))*nfac + 1.0/3.0;;
 
-                ofs << std::setw(16) << kv.norm() / kNyquist
-                    << std::setw(16) << eval[0] *nfac + 1.0/3.0
-                    << std::setw(16) << eval[1] *nfac + 1.0/3.0
-                    << std::setw(16) << eval[2] *nfac + 1.0/3.0
-                    << std::setw(16) << kv[0]
-                    << std::setw(16) << kv[1]
-                    << std::setw(16) << kv[2]
-                    << std::endl;
+                D = { std::real(D_xx.kelem(i,j,k)), std::real(D_xy.kelem(i,j,k)), std::real(D_xz.kelem(i,j,k)),
+                      std::real(D_yy.kelem(i,j,k)), std::real(D_yz.kelem(i,j,k)), std::real(D_zz.kelem(i,j,k)) };
+                
+                D.eigen(eval, evec1, evec2, evec3);
+
+                D_xx.kelem(i,j,k) = eval[2];
+                D_yy.kelem(i,j,k) = eval[1];
+                D_zz.kelem(i,j,k) = eval[0];
+
+                D_xy.kelem(i,j,k) = evec3[0];
+                D_xz.kelem(i,j,k) = evec3[1];
+                D_yz.kelem(i,j,k) = evec3[2];
+
+                // ofs << std::setw(16) << kv.norm() / kNyquist
+                //     << std::setw(16) << eval[0] // *nfac + 1.0/3.0
+                //     << std::setw(16) << eval[1] // *nfac + 1.0/3.0
+                //     << std::setw(16) << eval[2] // *nfac + 1.0/3.0
+                //     << std::setw(16) << kv[0]
+                //     << std::setw(16) << kv[1]
+                //     << std::setw(16) << kv[2]
+                //     << std::endl;
             }
         }
     }
 
-//     std::string filename("plt_test.hdf5");
-//     unlink(filename.c_str());
-// #if defined(USE_MPI)
-//     MPI_Barrier(MPI_COMM_WORLD);
-// #endif
+    std::string filename("plt_test.hdf5");
+    unlink(filename.c_str());
+#if defined(USE_MPI)
+    MPI_Barrier(MPI_COMM_WORLD);
+#endif
 //     rho.Write_to_HDF5(filename, "rho");
+    D_xx.Write_to_HDF5(filename, "omega1");
+    D_yy.Write_to_HDF5(filename, "omega2");
+    D_zz.Write_to_HDF5(filename, "omega3");
+    D_xy.Write_to_HDF5(filename, "e1_x");
+    D_xz.Write_to_HDF5(filename, "e1_y");
+    D_yz.Write_to_HDF5(filename, "e1_z");
 
 }
 

From 8048825e02b9af50efff1454d857f07694fc8936 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 3 Nov 2019 17:05:28 +0100
Subject: [PATCH 019/130] cosmetics

---
 CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fcc57e9..a3bffae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,6 +49,7 @@ if(ENABLE_MPI)
 endif(ENABLE_MPI)
 
 
+########################################################################################################################
 # FFTW
 cmake_policy(SET CMP0074 NEW)
 if(ENABLE_MPI)
@@ -57,9 +58,11 @@ else()
   find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS)
 endif(ENABLE_MPI)
 
+########################################################################################################################
 # GSL
 find_package(GSL REQUIRED)
 
+########################################################################################################################
 # HDF5
 find_package(HDF5 REQUIRED)
 

From 6d1a3bf7cc0b38b1515e3183f0935216a9c605a4 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 4 Nov 2019 00:25:45 +0100
Subject: [PATCH 020/130] minor cleanup, calculation of D works for SC

---
 include/grid_fft.hh     | 16 ++++++-----
 include/particle_plt.hh | 63 ++++++++++++++++-------------------------
 2 files changed, 34 insertions(+), 45 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 3b760b2..ad7920c 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -613,15 +613,15 @@ public:
     template <typename functional, typename grid1_t, typename grid2_t>
     void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2)
     {
-        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
-        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
+        assert(g1.size(0) == size(0) && g1.size(1) == size(1) && g1.size(2) == size(2) );
+        assert(g2.size(0) == size(0) && g2.size(1) == size(1) && g2.size(2) == size(2) );
 
 #pragma omp parallel for
-        for (size_t i = 0; i < sizes_[0]; ++i)
+        for (size_t i = 0; i < size(0); ++i)
         {
-            for (size_t j = 0; j < sizes_[1]; ++j)
+            for (size_t j = 0; j < size(1); ++j)
             {
-                for (size_t k = 0; k < sizes_[2]; ++k)
+                for (size_t k = 0; k < size(2); ++k)
                 {
                     auto &elem = this->kelem(i, j, k);
                     const auto &elemg1 = g1.kelem(i, j, k);
@@ -683,7 +683,7 @@ public:
 
     void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3);
 
-    void shift_field( const vec3<real_t>& s )
+    void shift_field( const vec3<real_t>& s, bool transform_back=true )
     {
         FourierTransformForward();
         apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
@@ -694,7 +694,9 @@ public:
 #endif
             return x * std::exp(ccomplex_t(0.0, shift));
         });
-        FourierTransformBackward();
+        if( transform_back ){
+            FourierTransformBackward();
+        }
     }
 
     void zero_DC_mode(void)
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 7025362..2c84051 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -8,6 +8,7 @@
 
 #include <random>
 
+#include <grid_fft.hh>
 #include <mat3.hh>
 
 namespace particle{
@@ -20,7 +21,7 @@ inline void test_plt( void ){
 
     real_t boxlen = 1.0;
     
-    size_t ngrid  = 128;
+    size_t ngrid  = 64;
     size_t npgrid = 1;
     size_t dpg    = ngrid/npgrid;
     size_t nump   = npgrid*npgrid*npgrid;
@@ -36,40 +37,26 @@ inline void test_plt( void ){
 
     const real_t dV( std::pow( boxlen/ngrid, 3 ) );
     Grid_FFT<real_t> rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    std::vector< vec3<real_t> > gpos ;
 
     auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
 
     auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
         auto d = vR-vP;
-        d.x = (d.x>0.5)? d.x-1.0 : (d.x<-0.5)? d.x+1.0 : d.x;
-        d.y = (d.y>0.5)? d.y-1.0 : (d.y<-0.5)? d.y+1.0 : d.y;
-        d.z = (d.z>0.5)? d.z-1.0 : (d.z<-0.5)? d.z+1.0 : d.z;
         auto r = d.norm();
 
         if( r< 1e-14 ) return 0.0;
 
         real_t val = 0.0;
-
         val -= d[mu]*d[nu]/(r*r) * alpha3/pi3halfs * std::exp(-alpha*alpha*r*r);
         val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
             (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
-
         return pweight * val;
     };
 
-    gpos.reserve(nump);
-
     // sc
-    for( size_t i=0; i<npgrid; ++i ){
-        for( size_t j=0; j<npgrid; ++j ){
-            for( size_t k=0; k<npgrid; ++k ){
-                rho.relem(i*dpg,j*dpg,k*dpg) = pweight/dV;
-                gpos.push_back({real_t(i)/npgrid,real_t(j)/npgrid,real_t(k)/npgrid});
-            }
-        }    
-    }
-
+    rho.zero();
+    rho.relem(0,0,0) = pweight/dV;
+    
     rho.FourierTransformForward();
     rho.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
         real_t kmod = k.norm();
@@ -79,11 +66,16 @@ inline void test_plt( void ){
 
     auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
         real_t sr = 0.0;
-        for( auto& p : gpos ){
-            sr += greensftide_sr( mu, nu, v, p);
+        int N = 3;
+        for( int i=-N; i<=N; ++i ){
+            for( int j=-N; j<=N; ++j ){
+                for( int k=-N; k<=N; ++k ){
+                    if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
+                        sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
+                    }
+                }
+            }
         }
-        if( v.norm()<1e-14 ) return 0.0;
-
         return sr;
     };
 
@@ -108,21 +100,16 @@ inline void test_plt( void ){
                 D_yy.relem(i,j,k) = evaluate_D(1,1,p);
                 D_yz.relem(i,j,k) = evaluate_D(1,2,p);
                 D_zz.relem(i,j,k) = evaluate_D(2,2,p);
-
-                //D = {evaluate_D(0,0,p),evaluate_D(0,1,p),evaluate_D(0,2,p),evaluate_D(1,0,p),evaluate_D(1,1,p),evaluate_D(2,2,p)};
-                //D.eigen(eval, evec1, evec2, evec3);
-                //rho.relem(i,j,k) = eval[2];
             }   
         }    
     }
+
     D_xx.relem(0,0,0) = 0.0;
     D_xy.relem(0,0,0) = 0.0;
     D_xz.relem(0,0,0) = 0.0;
     D_yy.relem(0,0,0) = 0.0;
     D_yz.relem(0,0,0) = 0.0;
     D_zz.relem(0,0,0) = 0.0;
-    
-    
 
     D_xx.FourierTransformForward();
     D_xy.FourierTransformForward();
@@ -131,13 +118,13 @@ inline void test_plt( void ){
     D_yz.FourierTransformForward();
     D_zz.FourierTransformForward();
 
-    // std::ofstream ofs("test_ewald.txt");
+    std::ofstream ofs("test_ewald.txt");
 
     real_t nfac = 1.0/std::pow(real_t(ngrid),1.5);
 
     real_t kNyquist = M_PI/boxlen * ngrid;
 
-    #pragma omp parallel for
+    // #pragma omp parallel for
     for( size_t i=0; i<D_xx.size(0); i++ ){
         mat3s<real_t> D;
         vec3<real_t> eval, evec1, evec2, evec3;
@@ -165,14 +152,14 @@ inline void test_plt( void ){
                 D_xz.kelem(i,j,k) = evec3[1];
                 D_yz.kelem(i,j,k) = evec3[2];
 
-                // ofs << std::setw(16) << kv.norm() / kNyquist
-                //     << std::setw(16) << eval[0] // *nfac + 1.0/3.0
-                //     << std::setw(16) << eval[1] // *nfac + 1.0/3.0
-                //     << std::setw(16) << eval[2] // *nfac + 1.0/3.0
-                //     << std::setw(16) << kv[0]
-                //     << std::setw(16) << kv[1]
-                //     << std::setw(16) << kv[2]
-                //     << std::endl;
+                ofs << std::setw(16) << kv.norm() / kNyquist
+                    << std::setw(16) << eval[0]
+                    << std::setw(16) << eval[1]
+                    << std::setw(16) << eval[2]
+                    << std::setw(16) << kv[0]
+                    << std::setw(16) << kv[1]
+                    << std::setw(16) << kv[2]
+                    << std::endl;
             }
         }
     }

From 747031bee2b36b94632285eef9c8e14122f2b1c1 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Tue, 5 Nov 2019 00:29:33 +0100
Subject: [PATCH 021/130] added calculation of Brillouin zone (from Bruno's
 Code)

---
 include/particle_plt.hh | 170 ++++++++++++++++++++++++++++++++++++++--
 include/vec3.hh         |   9 +++
 2 files changed, 173 insertions(+), 6 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 2c84051..bea2e30 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -19,6 +19,22 @@ inline void test_plt( void ){
     csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
     csoca::ilog << "Testing PLT implementation..." << std::endl;
 
+    constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
+
+    const std::vector<vec3<real_t>> bcc_normals{
+        {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
+        {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
+        {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
+    };
+
+    const std::vector<vec3<real_t>> bcc_reciprocal{
+        {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
+    };
+
+    /*const std::vector<vec3<real_t>> fcc_reciprocal{
+        {-2.,0.,2.}, {2.,0.,0.}, {1.,1.,-1.}
+    };*/
+
     real_t boxlen = 1.0;
     
     size_t ngrid  = 64;
@@ -56,7 +72,9 @@ inline void test_plt( void ){
     // sc
     rho.zero();
     rho.relem(0,0,0) = pweight/dV;
-    
+    // rho.relem(0,0,0) = pweight/dV/2;
+    // rho.relem(ngrid/2,ngrid/2,ngrid/2) = pweight/dV/2;
+
     rho.FourierTransformForward();
     rho.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
         real_t kmod = k.norm();
@@ -72,6 +90,17 @@ inline void test_plt( void ){
                 for( int k=-N; k<=N; ++k ){
                     if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
                         sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
+                        
+                        // sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} )/2;
+                        
+                        // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
+                        // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
+                        // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
+                        // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
+                        // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
+                        // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
+                        // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
+                        // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
                     }
                 }
             }
@@ -118,13 +147,13 @@ inline void test_plt( void ){
     D_yz.FourierTransformForward();
     D_zz.FourierTransformForward();
 
-    std::ofstream ofs("test_ewald.txt");
+    
 
     real_t nfac = 1.0/std::pow(real_t(ngrid),1.5);
 
     real_t kNyquist = M_PI/boxlen * ngrid;
 
-    // #pragma omp parallel for
+    #pragma omp parallel for
     for( size_t i=0; i<D_xx.size(0); i++ ){
         mat3s<real_t> D;
         vec3<real_t> eval, evec1, evec2, evec3;
@@ -151,11 +180,139 @@ inline void test_plt( void ){
                 D_xy.kelem(i,j,k) = evec3[0];
                 D_xz.kelem(i,j,k) = evec3[1];
                 D_yz.kelem(i,j,k) = evec3[2];
+            }
+        }
+    }
 
+#if 1
+    std::vector<vec3<real_t>> vectk;
+    std::vector<vec3<int>> ico, vecitk;
+    vectk.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3<real_t>());
+    ico.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3<int>());
+    vecitk.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3<int>());
+
+    std::ofstream ofs2("test_brillouin.txt");
+
+    const int numb = 1;
+    for( size_t i=0; i<D_xx.size(0); i++ ){
+        mat3s<real_t> D;
+        vec3<real_t> eval, evec1, evec2, evec3;
+        vec3<real_t> a({0.,0.,0.});
+
+        for( size_t j=0; j<D_xx.size(1); j++ ){
+
+            for( size_t k=0; k<D_xx.size(2); k++ ){
+
+                auto idx = D_xx.get_idx(i,j,k);
+                vec3<real_t> ar = D_xx.get_k<real_t>(i,j,k) / (twopi*ngrid);
+                vec3<real_t> kv = D_xx.get_k<real_t>(i,j,k);
+                
+                for( int l=0; l<3; l++ ){
+                    a[l] = 0.0;
+                    for( int m=0; m<3; m++){
+                        // project k on reciprocal basis
+                        a[l] += ar[m]*bcc_reciprocal[m][l];
+                    }
+                }
+
+                // translate the k-vectors into the "candidate" FBZ
+                vec3<real_t> anum;
+                for( int l1=-numb; l1<=numb; ++l1 ){
+                    anum[0] = real_t(l1);
+                    for( int l2=-numb; l2<=numb; ++l2 ){
+                        anum[1] = real_t(l2);
+                        for( int l3=-numb; l3<=numb; ++l3 ){
+                            anum[2] = real_t(l3);
+
+                            vectk[idx] = a;
+
+                            for( int l=0; l<3; l++ ){
+                                for( int m=0; m<3; m++){
+                                    // project k on reciprocal basis
+                                    vectk[idx][l] += anum[m]*bcc_reciprocal[m][l];
+                                }
+                            }
+                            // check if in first Brillouin zone
+                            bool btest=true;
+                            for( size_t l=0; l<bcc_normals.size(); ++l ){
+                                real_t amod2 = 0.0;
+                                real_t scalar = 0.0;
+                                for( int m=0; m<3; m++ ){
+                                    amod2  += bcc_normals[l][m]*bcc_normals[l][m];
+                                    scalar += bcc_normals[l][m]*vectk[idx][m];
+                                }
+                                real_t amod = std::sqrt(amod2);
+                                //if( scalar/amod > amod*1.0001 ){ btest=false; break; }
+                                if( scalar > 1.01 * amod2 ){ btest=false; break; }
+                            }
+                            if( btest ){
+                                vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid)/twopi);
+                                vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid)/twopi);
+                                vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid)/twopi);
+
+                                ico[idx][0] = int((ar[0]+l1) * ngrid+0.5);
+                                ico[idx][1] = int((ar[1]+l2) * ngrid+0.5);
+                                ico[idx][2] = int((ar[2]+l3) * ngrid+0.5);
+                                if( ico[idx][2] < 0 ){
+                                    ico[idx][0] = -ico[idx][0];
+                                    ico[idx][1] = -ico[idx][1];
+                                    ico[idx][2] = -ico[idx][2];
+                                }
+
+                                ico[idx][0] = (ico[idx][0]+ngrid)%ngrid;
+                                ico[idx][1] = (ico[idx][1]+ngrid)%ngrid;
+
+                                if( vectk[idx][2] < 0 ){
+                                    vectk[idx][0] = - vectk[idx][0];
+                                    vectk[idx][1] = - vectk[idx][1];
+                                    vectk[idx][2] = - vectk[idx][2];
+                                }
+
+                                if( vecitk[idx][2] < 0 ){
+                                    vecitk[idx][0] = -vecitk[idx][0];
+                                    vecitk[idx][1] = -vecitk[idx][1];
+                                    vecitk[idx][2] = -vecitk[idx][2];
+                                }
+                                vecitk[idx][0] = (vecitk[idx][0]+ngrid)%ngrid;
+                                vecitk[idx][1] = (vecitk[idx][1]+ngrid)%ngrid;
+                                vecitk[idx][2] = (vecitk[idx][2]+ngrid)%ngrid;
+                                
+                                
+
+                                //vecitk[idx][0] = (vecitk[idx][0]<0)? vecitk[idx][0]+ngrid : vecitk[idx][0];;
+                                //vecitk[idx][1] = (vecitk[idx][1]<0)? vecitk[idx][1]+ngrid : vecitk[idx][1];
+                                
+
+
+                                //ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vectk[idx].x*(ngrid)/twopi << ", " << vectk[idx].y*(ngrid)/twopi << ", " << vectk[idx].z*(ngrid)/twopi << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
+                                ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
+                                //std::cout << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] << " " << vectk[idx][1] << std::endl;
+                                goto endloop;
+                            }
+                        }
+                    }
+                }
+                endloop: ;
+
+                D_xx.kelem(i,j,k) = D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]);
+                // D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]) = D_xx.kelem(i,j,k);
+                // D_xx.kelem(i,j,k) = D_xx.kelem(i+vecitk[idx][0],j+vecitk[idx][1],k+vecitk[idx][2]);
+            }
+        }
+            
+    }
+
+#endif
+
+    std::ofstream ofs("test_ewald.txt");
+    for( size_t i=0; i<D_xx.size(0); i++ ){
+        for( size_t j=0; j<D_xx.size(1); j++ ){
+            for( size_t k=0; k<D_xx.size(2); k++ ){
+                vec3<real_t> kv = D_xx.get_k<real_t>(i,j,k);
                 ofs << std::setw(16) << kv.norm() / kNyquist
-                    << std::setw(16) << eval[0]
-                    << std::setw(16) << eval[1]
-                    << std::setw(16) << eval[2]
+                    << std::setw(16) << std::real(D_xx.kelem(i,j,k))
+                    << std::setw(16) << std::real(D_yy.kelem(i,j,k))
+                    << std::setw(16) << std::real(D_zz.kelem(i,j,k))
                     << std::setw(16) << kv[0]
                     << std::setw(16) << kv[1]
                     << std::setw(16) << kv[2]
@@ -164,6 +321,7 @@ inline void test_plt( void ){
         }
     }
 
+
     std::string filename("plt_test.hdf5");
     unlink(filename.c_str());
 #if defined(USE_MPI)
diff --git a/include/vec3.hh b/include/vec3.hh
index 057fbcd..b6550ae 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -41,6 +41,12 @@ public:
     //! const bracket index access to vector components
     const T &operator[](size_t i) const { return data_[i]; }
 
+    // assignment operator
+    vec3<T>& operator=( const vec3<T>& v ) { data_=v.data_; return *this; }
+
+    // assignment operator
+    const vec3<T>& operator=( const vec3<T>& v ) const { data_=v.data_; return *this; }
+
     //! implementation of summation of vec3
     vec3<T> operator+( const vec3<T>& v ) const{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
 
@@ -50,6 +56,9 @@ public:
     //! implementation of scalar multiplication
     vec3<T> operator*( T s ) const{ return vec3<T>({x*s,y*s,z*s}); }
 
+    //! implementation of scalar division
+    vec3<T> operator/( T s ) const{ return vec3<T>({x/s,y/s,z/s}); }
+
     //! implementation of += operator
     vec3<T>& operator+=( const vec3<T>& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; }
 

From 144d0d9e1ace23ce1a8ff9414b0199a37f044ffc Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Tue, 5 Nov 2019 19:14:14 +0100
Subject: [PATCH 022/130] added projection onto plt eigenmodes. (not working)

---
 include/grid_fft.hh     |  79 +++++++----
 include/particle_plt.hh | 282 +++++++++++++++++++++++++++++++++++-----
 src/ic_generator.cc     |  48 +++++++
 src/main.cc             |   4 +-
 4 files changed, 356 insertions(+), 57 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index ad7920c..00d2cb0 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -152,32 +152,6 @@ public:
         return (i * sizes_[1] + j) * sizes_[3] + k;
     }
 
-    data_t get_cic( const vec3<real_t>& v ) const{
-        // warning! this doesn't work with MPI
-        vec3<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
-                        std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
-                        std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] });
-        size_t ix = static_cast<size_t>(x.x);
-        size_t iy = static_cast<size_t>(x.y);
-        size_t iz = static_cast<size_t>(x.z);
-        real_t dx = x.x-real_t(ix), tx = 1.0-dx;
-        real_t dy = x.y-real_t(iy), ty = 1.0-dy;
-        real_t dz = x.z-real_t(iz), tz = 1.0-dz;
-        size_t ix1 = (ix+1)%n_[0];
-        size_t iy1 = (iy+1)%n_[1];
-        size_t iz1 = (iz+1)%n_[2];
-        data_t val = 0.0;
-        val += this->relem(ix ,iy ,iz ) * tx * ty * tz;
-        val += this->relem(ix ,iy ,iz1) * tx * ty * dz;
-        val += this->relem(ix ,iy1,iz ) * tx * dy * tz;
-        val += this->relem(ix ,iy1,iz1) * tx * dy * dz;
-        val += this->relem(ix1,iy ,iz ) * dx * ty * tz;
-        val += this->relem(ix1,iy ,iz1) * dx * ty * dz;
-        val += this->relem(ix1,iy1,iz ) * dx * dy * tz;
-        val += this->relem(ix1,iy1,iz1) * dx * dy * dz;
-        return val;
-    }
-
     template <typename ft>
     vec3<ft> get_r(const size_t i, const size_t j, const size_t k) const
     {
@@ -257,6 +231,59 @@ public:
         return kk;
     }
 
+    data_t get_cic( const vec3<real_t>& v ) const{
+        // warning! this doesn't work with MPI
+        vec3<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
+                        std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
+                        std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] });
+        size_t ix = static_cast<size_t>(x.x);
+        size_t iy = static_cast<size_t>(x.y);
+        size_t iz = static_cast<size_t>(x.z);
+        real_t dx = x.x-real_t(ix), tx = 1.0-dx;
+        real_t dy = x.y-real_t(iy), ty = 1.0-dy;
+        real_t dz = x.z-real_t(iz), tz = 1.0-dz;
+        size_t ix1 = (ix+1)%n_[0];
+        size_t iy1 = (iy+1)%n_[1];
+        size_t iz1 = (iz+1)%n_[2];
+        data_t val = 0.0;
+        val += this->relem(ix ,iy ,iz ) * tx * ty * tz;
+        val += this->relem(ix ,iy ,iz1) * tx * ty * dz;
+        val += this->relem(ix ,iy1,iz ) * tx * dy * tz;
+        val += this->relem(ix ,iy1,iz1) * tx * dy * dz;
+        val += this->relem(ix1,iy ,iz ) * dx * ty * tz;
+        val += this->relem(ix1,iy ,iz1) * dx * ty * dz;
+        val += this->relem(ix1,iy1,iz ) * dx * dy * tz;
+        val += this->relem(ix1,iy1,iz1) * dx * dy * dz;
+        return val;
+    }
+
+    ccomplex_t get_cic_kspace( const vec3<real_t>& x ) const{
+        // warning! this doesn't work with MPI
+        size_t ix = static_cast<size_t>(x.x);
+        size_t iy = static_cast<size_t>(x.y);
+        size_t iz = std::min(static_cast<size_t>(x.z),size(2)-1); //static_cast<size_t>(x.z);
+        real_t dx = x.x-real_t(ix), tx = 1.0-dx;
+        real_t dy = x.y-real_t(iy), ty = 1.0-dy;
+        real_t dz = x.z-real_t(iz), tz = 1.0-dz;
+        size_t ix1 = (ix+1)%size(0);
+        size_t iy1 = (iy+1)%size(1);
+        size_t iz1 = std::min((iz+1),size(2)-1);
+        ccomplex_t val = 0.0;
+        val += this->kelem(ix ,iy ,iz ) * tx * ty * tz;
+        val += this->kelem(ix ,iy ,iz1) * tx * ty * dz;
+        val += this->kelem(ix ,iy1,iz ) * tx * dy * tz;
+        val += this->kelem(ix ,iy1,iz1) * tx * dy * dz;
+        val += this->kelem(ix1,iy ,iz ) * dx * ty * tz;
+        val += this->kelem(ix1,iy ,iz1) * dx * ty * dz;
+        val += this->kelem(ix1,iy1,iz ) * dx * dy * tz;
+        val += this->kelem(ix1,iy1,iz1) * dx * dy * dz;
+        // if( val != val ){
+           //auto k = this->get_k<real_t>(ix,iy,iz);
+           //std::cerr << ix << " " << iy << " " << iz << " " << val << " " <<  this->gradient(0,{ix,iy,iz}) << " " <<  this->gradient(1,{ix,iy,iz}) << " " <<  this->gradient(2,{ix,iy,iz}) << std::endl;
+        // }
+        return val;
+    }
+
     inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
     {
 #if defined(USE_MPI)
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index bea2e30..7ba2a55 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -14,11 +14,217 @@
 namespace particle{
 //! implement Marcos et al. PLT calculation
 
+class lattice_gradient{
+private:
+    const real_t boxlen_;
+    const size_t ngmapto_, ngrid_, ngrid32_;
+    const real_t mapratio_;
+    Grid_FFT<real_t> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
+    Grid_FFT<real_t> grad_x_, grad_y_, grad_z_;
+
+    void init_D()
+    {
+        const real_t eta = 2.0/ngrid_; // Ewald cutoff shall be 2 cells
+        const real_t alpha = 1.0/std::sqrt(2)/eta;
+        const real_t alpha2 = alpha*alpha;
+        const real_t alpha3 = alpha2*alpha;
+        const real_t sqrtpi = std::sqrt(M_PI);
+        const real_t pi32   = std::pow(M_PI,1.5);
+
+        //! just a Kronecker \delta_ij
+        auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
+
+        //! just a sign function
+        auto sign = []( real_t x ) -> real_t { return (x<0.0)? -1.0 : 1.0; };
+
+        //! short range component of Ewald sum, eq. (A2) of Marcos (2008)
+        auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
+            auto d = vR-vP;
+            auto r = d.norm();
+            // if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
+            real_t val{0.0};
+            val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r);
+            val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
+                (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
+            return val;
+        };
+
+        //! sums mirrored copies of short-range component of Ewald sum
+        auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
+            real_t sr = 0.0;
+            constexpr int N = 3; // number of repeated copies ±N per dimension
+            for( int i=-N; i<=N; ++i ){
+                for( int j=-N; j<=N; ++j ){
+                    for( int k=-N; k<=N; ++k ){
+                        if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
+                            sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
+                        }
+                    }
+                }
+            }
+            return sr;
+        };
+
+        //! fill D_ij array with short range evaluated function
+        #pragma omp parallel for
+        for( size_t i=0; i<ngrid_; i++ ){
+            vec3<real_t>  p;
+            p.x = real_t(i)/ngrid_;
+            for( size_t j=0; j<ngrid_; j++ ){
+                p.y = real_t(j)/ngrid_;
+                for( size_t k=0; k<ngrid_; k++ ){
+                    p.z = real_t(k)/ngrid_;
+                    D_xx_.relem(i,j,k) = evaluate_D(0,0,p);
+                    D_xy_.relem(i,j,k) = evaluate_D(0,1,p);
+                    D_xz_.relem(i,j,k) = evaluate_D(0,2,p);
+                    D_yy_.relem(i,j,k) = evaluate_D(1,1,p);
+                    D_yz_.relem(i,j,k) = evaluate_D(1,2,p);
+                    D_zz_.relem(i,j,k) = evaluate_D(2,2,p);
+                }   
+            }    
+        }
+        // fix r=0 with background density (added later in Fourier space)
+        D_xx_.relem(0,0,0) = 0.0;
+        D_xy_.relem(0,0,0) = 0.0;
+        D_xz_.relem(0,0,0) = 0.0;
+        D_yy_.relem(0,0,0) = 0.0;
+        D_yz_.relem(0,0,0) = 0.0;
+        D_zz_.relem(0,0,0) = 0.0;
+
+        // Fourier transform all six components
+        D_xx_.FourierTransformForward();
+        D_xy_.FourierTransformForward();
+        D_xz_.FourierTransformForward();
+        D_yy_.FourierTransformForward();
+        D_yz_.FourierTransformForward();
+        D_zz_.FourierTransformForward();
+
+        const real_t rho0 = std::pow(real_t(ngrid_),1.5); //mass of one particle in Fourier space
+        const real_t nfac = 1.0/std::pow(real_t(ngrid_),1.5);
+
+        #pragma omp parallel
+        {
+            // thread private matrix representation
+            mat3s<real_t> D;
+            vec3<real_t> eval, evec1, evec2, evec3;
+        
+            #pragma omp for
+            for( size_t i=0; i<D_xx_.size(0); i++ )
+            {
+                for( size_t j=0; j<D_xx_.size(1); j++ )
+                {
+                    for( size_t k=0; k<D_xx_.size(2); k++ )
+                    {
+                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                        const real_t kmod2 = kv.norm_squared();
+
+                        // long range component of Ewald sum
+                        real_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2;
+                        phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0
+
+                        // assemble short-range + long_range of Ewald sum and add DC component to trace
+                        D_xx_.kelem(i,j,k) = (D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac + 1.0/3.0;
+                        D_xy_.kelem(i,j,k) = (D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac;
+                        D_xz_.kelem(i,j,k) = (D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac;
+                        D_yy_.kelem(i,j,k) = (D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac + 1.0/3.0;
+                        D_yz_.kelem(i,j,k) = (D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac;
+                        D_zz_.kelem(i,j,k) = (D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac + 1.0/3.0;
+
+                    }
+                }
+            }
+
+            D_xx_.kelem(0,0,0) = 1.0/3.0;
+            D_xy_.kelem(0,0,0) = 0.0;
+            D_xz_.kelem(0,0,0) = 0.0;
+            D_yy_.kelem(0,0,0) = 1.0/3.0;
+            D_yz_.kelem(0,0,0) = 0.0;
+            D_zz_.kelem(0,0,0) = 1.0/3.0;
+
+            #pragma omp for
+            for( size_t i=0; i<D_xx_.size(0); i++ )
+            {
+                for( size_t j=0; j<D_xx_.size(1); j++ )
+                {
+                    for( size_t k=0; k<D_xx_.size(2); k++ )
+                    {
+                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                        const real_t kmod  = kv.norm()/mapratio_/boxlen_;
+
+                            // put matrix elements into actual matrix
+                        D = { std::real(D_xx_.kelem(i,j,k)), std::real(D_xy_.kelem(i,j,k)), std::real(D_xz_.kelem(i,j,k)),
+                            std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) };
+                        
+                        // compute eigenstructure of matrix
+                        D.eigen(eval, evec1, evec2, evec3);
+
+                        // store in diagonal components of D_ij
+                        // D_xx_.kelem(i,j,k) = (i!=D_xx_.size(0)/2)? ccomplex_t(0.0,kv.x/mapratio_/boxlen_) : 0.0;
+                        // D_yy_.kelem(i,j,k) = (j!=D_yy_.size(1)/2)? ccomplex_t(0.0,kv.y/mapratio_/boxlen_) : 0.0;
+                        // D_zz_.kelem(i,j,k) = (k!=D_zz_.size(2)-1)? ccomplex_t(0.0,kv.z/mapratio_/boxlen_) : 0.0;
+                        // D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_);
+                        // D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_);
+                        // D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_);
+
+                        D_xx_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.x;
+                        D_yy_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.y;
+                        D_zz_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.z;
+
+                        if(std::fabs(kv.dot(evec3))>1e-16){
+                            D_xx_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm());
+                            D_yy_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm());
+                            D_zz_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm());
+                        }
+                    }
+                }
+            }
+        }
+
+    }
+
+public:
+    explicit lattice_gradient( real_t boxlen, size_t ngridother, size_t ngridself=64 )
+    : boxlen_(boxlen), ngmapto_(ngridother), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
+      D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+      D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+      D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+      grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+      grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
+    { 
+        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        double wtime = get_wtime();
+        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT lattice eigenmodes "<< std::flush;
+        
+        init_D();
+
+        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+    }
+
+    inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
+    {
+        real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
+        // std::cerr << ix << " " << ijk[0] << std::endl;
+        if( idim== 0 ){
+            return D_xx_.get_cic_kspace({ix,iy,iz});
+        }
+        else if( idim==1){
+            return D_yy_.get_cic_kspace({ix,iy,iz});
+        }
+        return D_zz_.get_cic_kspace({ix,iy,iz});
+    }
+
+};
+
+#if 0
 inline void test_plt( void ){
 
     csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
     csoca::ilog << "Testing PLT implementation..." << std::endl;
 
+    lattice_gradient lg( 64 );
+
+    return;
+
     constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
 
     const std::vector<vec3<real_t>> bcc_normals{
@@ -29,7 +235,7 @@ inline void test_plt( void ){
 
     const std::vector<vec3<real_t>> bcc_reciprocal{
         {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
-    };
+    };    
 
     /*const std::vector<vec3<real_t>> fcc_reciprocal{
         {-2.,0.,2.}, {2.,0.,0.}, {1.,1.,-1.}
@@ -78,6 +284,7 @@ inline void test_plt( void ){
     rho.FourierTransformForward();
     rho.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
         real_t kmod = k.norm();
+        std::cerr << x << std::endl;
         return -x * std::exp(-0.5*eta*eta*kmod*kmod) / (kmod*kmod);
     });
     rho.zero_DC_mode();
@@ -246,36 +453,47 @@ inline void test_plt( void ){
                                 if( scalar > 1.01 * amod2 ){ btest=false; break; }
                             }
                             if( btest ){
+                                // int is = (i>ngrid/2)? i-ngrid : i;
+                                // int js = (j>ngrid/2)? j-ngrid : j;
+                                // int ks = (k>ngrid/2)? k-ngrid : k;
+                                
                                 vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid)/twopi);
                                 vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid)/twopi);
                                 vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid)/twopi);
 
-                                ico[idx][0] = int((ar[0]+l1) * ngrid+0.5);
-                                ico[idx][1] = int((ar[1]+l2) * ngrid+0.5);
-                                ico[idx][2] = int((ar[2]+l3) * ngrid+0.5);
-                                if( ico[idx][2] < 0 ){
-                                    ico[idx][0] = -ico[idx][0];
-                                    ico[idx][1] = -ico[idx][1];
-                                    ico[idx][2] = -ico[idx][2];
-                                }
+                                ico[idx][0] = std::round((ar[0]+l1) * ngrid);
+                                ico[idx][1] = std::round((ar[1]+l2) * ngrid);
+                                ico[idx][2] = std::round((ar[2]+l3) * ngrid);
 
-                                ico[idx][0] = (ico[idx][0]+ngrid)%ngrid;
-                                ico[idx][1] = (ico[idx][1]+ngrid)%ngrid;
+                                assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] - vectk[idx][0] ) < 1e-12 );
+                                assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] - vectk[idx][1] ) < 1e-12 );
+                                assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][2]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][2]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][2] - vectk[idx][2] ) < 1e-12 );
+                                
 
-                                if( vectk[idx][2] < 0 ){
-                                    vectk[idx][0] = - vectk[idx][0];
-                                    vectk[idx][1] = - vectk[idx][1];
-                                    vectk[idx][2] = - vectk[idx][2];
-                                }
+                                
+                                // if( ico[idx][2] < 0 ){
+                                //     ico[idx][0] = -ico[idx][0];
+                                //     ico[idx][1] = -ico[idx][1];
+                                //     ico[idx][2] = -ico[idx][2];
+                                // }
 
-                                if( vecitk[idx][2] < 0 ){
-                                    vecitk[idx][0] = -vecitk[idx][0];
-                                    vecitk[idx][1] = -vecitk[idx][1];
-                                    vecitk[idx][2] = -vecitk[idx][2];
-                                }
-                                vecitk[idx][0] = (vecitk[idx][0]+ngrid)%ngrid;
-                                vecitk[idx][1] = (vecitk[idx][1]+ngrid)%ngrid;
-                                vecitk[idx][2] = (vecitk[idx][2]+ngrid)%ngrid;
+                                // ico[idx][0] = (ico[idx][0]+ngrid)%ngrid;
+                                // ico[idx][1] = (ico[idx][1]+ngrid)%ngrid;
+
+                                // if( vectk[idx][2] < 0 ){
+                                //     vectk[idx][0] = - vectk[idx][0];
+                                //     vectk[idx][1] = - vectk[idx][1];
+                                //     vectk[idx][2] = - vectk[idx][2];
+                                // }
+
+                                // if( vecitk[idx][2] < 0 ){
+                                //     vecitk[idx][0] = -vecitk[idx][0];
+                                //     vecitk[idx][1] = -vecitk[idx][1];
+                                //     vecitk[idx][2] = -vecitk[idx][2];
+                                // }
+                                //vecitk[idx][0] = (vecitk[idx][0]+ngrid)%ngrid;
+                                //vecitk[idx][1] = (vecitk[idx][1]+ngrid)%ngrid;
+                                //vecitk[idx][2] = (vecitk[idx][2]+ngrid)%ngrid;
                                 
                                 
 
@@ -285,8 +503,14 @@ inline void test_plt( void ){
 
 
                                 //ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vectk[idx].x*(ngrid)/twopi << ", " << vectk[idx].y*(ngrid)/twopi << ", " << vectk[idx].z*(ngrid)/twopi << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
-                                ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
-                                //std::cout << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] << " " << vectk[idx][1] << std::endl;
+                                ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
+                                ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk[idx].x << ", " << -vecitk[idx].y << ", " << -vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
+                                
+                                // std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] << " " <<  vectk[idx][0] << std::endl;
+                                
+                                // std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] << " " <<  vectk[idx][0] << std::endl;
+                                //std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] << " " <<  vectk[idx][1] << std::endl;
+                                // assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] - vectk[idx][1] ) < 1e-12 );
                                 goto endloop;
                             }
                         }
@@ -294,9 +518,9 @@ inline void test_plt( void ){
                 }
                 endloop: ;
 
-                D_xx.kelem(i,j,k) = D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]);
+                //D_xx.kelem(i,j,k) = D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]);
                 // D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]) = D_xx.kelem(i,j,k);
-                // D_xx.kelem(i,j,k) = D_xx.kelem(i+vecitk[idx][0],j+vecitk[idx][1],k+vecitk[idx][2]);
+                //D_xx.kelem(i,j,k) = D_xx.kelem(i+vecitk[idx][0],j+vecitk[idx][1],k+vecitk[idx][2]);
             }
         }
             
@@ -336,6 +560,6 @@ inline void test_plt( void ){
     D_yz.Write_to_HDF5(filename, "e1_z");
 
 }
-
+#endif
 
 }
\ No newline at end of file
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 48d31dc..04682ad 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -7,6 +7,7 @@
 
 #include <ic_generator.hh>
 #include <particle_generator.hh>
+#include <particle_plt.hh>
 
 #include <unistd.h> // for unlink
 
@@ -164,6 +165,12 @@ int Run( ConfigFile& the_config )
     // NaiveConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     //--------------------------------------------------------------------
 
+    //--------------------------------------------------------------------
+    // Create PLT gradient operator
+    //--------------------------------------------------------------------
+    particle::lattice_gradient lg( boxlen, ngrid );
+
+    //--------------------------------------------------------------------
     std::vector<cosmo_species> species_list;
     species_list.push_back( cosmo_species::dm );
     if( bDoBaryons ) species_list.push_back( cosmo_species::baryon );
@@ -455,6 +462,7 @@ int Run( ConfigFile& the_config )
                     tmp.FourierTransformForward(false);
 
                     // combine the various LPT potentials into one and take gradient
+                    #if 0 // non PLT corrected version
                     #pragma omp parallel for
                     for (size_t i = 0; i < phi.size(0); ++i) {
                         for (size_t j = 0; j < phi.size(1); ++j) {
@@ -467,6 +475,21 @@ int Run( ConfigFile& the_config )
                             }
                         }
                     }
+                    #else // non PLT corrected version
+                    #pragma omp parallel for
+                    for (size_t i = 0; i < phi.size(0); ++i) {
+                        for (size_t j = 0; j < phi.size(1); ++j) {
+                            for (size_t k = 0; k < phi.size(2); ++k) {
+                                // std::cerr << i << " " << j << " " << k << " " << phi.gradient(idim,{i,j,k}) << " " << lg.gradient(idim,{i,j,k}) << std::endl;
+                                size_t idx = phi.get_idx(i,j,k);
+                                auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx);
+                                // divide by Lbox, because displacement is in box units for output plugin
+                                tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot 
+                                    + lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) );
+                            }
+                        }
+                    }
+                    #endif
                     tmp.FourierTransformBackward();
 
                     // if we write particle data, store particle data in particle structure
@@ -491,6 +514,7 @@ int Run( ConfigFile& the_config )
                     
                     tmp.FourierTransformForward(false);
 
+                    #if 0 // non PLT corrected version
                     #pragma omp parallel for
                     for (size_t i = 0; i < phi.size(0); ++i) {
                         for (size_t j = 0; j < phi.size(1); ++j) {
@@ -513,6 +537,30 @@ int Run( ConfigFile& the_config )
                             }
                         }
                     }
+                    #else // PLT corrected version
+                    #pragma omp parallel for
+                    for (size_t i = 0; i < phi.size(0); ++i) {
+                        for (size_t j = 0; j < phi.size(1); ++j) {
+                            for (size_t k = 0; k < phi.size(2); ++k) {
+                                size_t idx = phi.get_idx(i,j,k);
+                                // divide by Lbox, because displacement is in box units for output plugin
+                                auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx));
+
+                                tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot_v 
+                                        + vfac3 * (lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) );
+
+                                if( bAddExternalTides ){
+                                    // modify velocities with anisotropic expansion factor**2
+                                    tmp.kelem(idx) *= std::pow(lss_aniso_alpha[idim],2.0);
+                                }
+                                // if( bSymplecticPT){
+                                //     auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx);
+                                //     tmp.kelem(idx) = vunit*ccomplex_t(0.0,1.0) * (kk[idim] * phitot_v) + vfac1 * A3[idim]->kelem(idx);
+                                // }
+                            }
+                        }
+                    }
+                    #endif
                     tmp.FourierTransformBackward();
 
                     // if we write particle data, store particle data in particle structure
diff --git a/src/main.cc b/src/main.cc
index 01ee5be..c36943c 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -182,9 +182,9 @@ int main( int argc, char** argv )
     ///////////////////////////////////////////////////////////////////////
     // do the job...
     ///////////////////////////////////////////////////////////////////////
-    // ic_generator::Run( the_config );
+    ic_generator::Run( the_config );
 
-    particle::test_plt();
+    // particle::test_plt();
     ///////////////////////////////////////////////////////////////////////
 
 #if defined(USE_MPI)

From 88ac5ab19a4cbcd8726306f874975dc2ca4c3943 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Wed, 6 Nov 2019 11:44:32 +0100
Subject: [PATCH 023/130] fix PS normalisation with PLT proj correction

---
 include/particle_plt.hh | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 7ba2a55..91cd4d9 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -151,30 +151,24 @@ private:
                         vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
                         const real_t kmod  = kv.norm()/mapratio_/boxlen_;
 
-                            // put matrix elements into actual matrix
+                        // put matrix elements into actual matrix
                         D = { std::real(D_xx_.kelem(i,j,k)), std::real(D_xy_.kelem(i,j,k)), std::real(D_xz_.kelem(i,j,k)),
-                            std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) };
+                              std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) };
                         
                         // compute eigenstructure of matrix
                         D.eigen(eval, evec1, evec2, evec3);
 
                         // store in diagonal components of D_ij
-                        // D_xx_.kelem(i,j,k) = (i!=D_xx_.size(0)/2)? ccomplex_t(0.0,kv.x/mapratio_/boxlen_) : 0.0;
-                        // D_yy_.kelem(i,j,k) = (j!=D_yy_.size(1)/2)? ccomplex_t(0.0,kv.y/mapratio_/boxlen_) : 0.0;
-                        // D_zz_.kelem(i,j,k) = (k!=D_zz_.size(2)-1)? ccomplex_t(0.0,kv.z/mapratio_/boxlen_) : 0.0;
-                        // D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_);
-                        // D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_);
-                        // D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_);
+                        D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec3.x;
+                        D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec3.y;
+                        D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec3.z;
 
-                        D_xx_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.x;
-                        D_yy_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.y;
-                        D_zz_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.z;
+                        auto norm = (kv.norm()/kv.dot(evec3));
+                        if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; 
 
-                        if(std::fabs(kv.dot(evec3))>1e-16){
-                            D_xx_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm());
-                            D_yy_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm());
-                            D_zz_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm());
-                        }
+                        D_xx_.kelem(i,j,k) *= norm;
+                        D_yy_.kelem(i,j,k) *= norm;
+                        D_zz_.kelem(i,j,k) *= norm;
                     }
                 }
             }

From d075e496be9cc265c9b7a1aa4b7aa5653181c91d Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Wed, 6 Nov 2019 14:06:19 +0100
Subject: [PATCH 024/130] added correction of growth rates

---
 include/particle_plt.hh | 11 +++++++++++
 src/ic_generator.cc     |  3 +++
 2 files changed, 14 insertions(+)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 91cd4d9..81c5fcb 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -169,11 +169,16 @@ private:
                         D_xx_.kelem(i,j,k) *= norm;
                         D_yy_.kelem(i,j,k) *= norm;
                         D_zz_.kelem(i,j,k) *= norm;
+
+                        // spatially dependent correction to vfact = \dot{D_+}/D_+
+                        D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*eval[2])-1.));
                     }
                 }
             }
         }
 
+        D_xy_.kelem(0,0,0) = 1.0;
+
     }
 
 public:
@@ -207,6 +212,12 @@ public:
         return D_zz_.get_cic_kspace({ix,iy,iz});
     }
 
+    inline ccomplex_t vfac_corr( std::array<size_t,3> ijk ) const
+    {
+        real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
+        return D_xy_.get_cic_kspace({ix,iy,iz});
+    }
+
 };
 
 #if 0
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 04682ad..90fbb0a 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -549,6 +549,9 @@ int Run( ConfigFile& the_config )
                                 tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot_v 
                                         + vfac3 * (lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) );
 
+                                // correct velocity with PLT mode growth rate
+                                tmp.kelem(idx) *= lg.vfac_corr({i,j,k});
+
                                 if( bAddExternalTides ){
                                     // modify velocities with anisotropic expansion factor**2
                                     tmp.kelem(idx) *= std::pow(lss_aniso_alpha[idim],2.0);

From dc2564994f9593866b91ba7e575151749e15ee4a Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Wed, 6 Nov 2019 17:55:09 +0100
Subject: [PATCH 025/130] minor cleanup

---
 include/particle_plt.hh | 44 +++++++++++++++++++++++++++++------------
 src/ic_generator.cc     | 43 +---------------------------------------
 2 files changed, 32 insertions(+), 55 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 81c5fcb..abf464a 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -8,6 +8,7 @@
 
 #include <random>
 
+#include <particle_generator.hh>
 #include <grid_fft.hh>
 #include <mat3.hh>
 
@@ -34,9 +35,6 @@ private:
         //! just a Kronecker \delta_ij
         auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
 
-        //! just a sign function
-        auto sign = []( real_t x ) -> real_t { return (x<0.0)? -1.0 : 1.0; };
-
         //! short range component of Ewald sum, eq. (A2) of Marcos (2008)
         auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
             auto d = vR-vP;
@@ -182,8 +180,11 @@ private:
     }
 
 public:
-    explicit lattice_gradient( real_t boxlen, size_t ngridother, size_t ngridself=64 )
-    : boxlen_(boxlen), ngmapto_(ngridother), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
+    // real_t boxlen, size_t ngridother
+    explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 )
+    : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
+      ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ), 
+      ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
       D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
@@ -191,8 +192,30 @@ public:
       grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
     { 
         csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
+        const lattice lattice_type = 
+            ((lattice_str=="bcc")? lattice_bcc 
+            : ((lattice_str=="fcc")? lattice_fcc 
+            : ((lattice_str=="rsc")? lattice_rsc 
+            : lattice_sc)));
+
+        if( lattice_type != lattice_sc){
+            csoca::elog << "PLT not implemented for chosen lattice type! Currently only SC." << std::endl;
+            abort();
+        }
+
+        csoca::ilog << "PLT corrections for SC lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
+
+#if defined(USE_MPI)
+        if( CONFIG::MPI_task_size>1 )
+        {
+            csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl;
+            abort();
+        }
+#endif 
+
         double wtime = get_wtime();
-        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT lattice eigenmodes "<< std::flush;
+        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
         
         init_D();
 
@@ -202,13 +225,8 @@ public:
     inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
     {
         real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
-        // std::cerr << ix << " " << ijk[0] << std::endl;
-        if( idim== 0 ){
-            return D_xx_.get_cic_kspace({ix,iy,iz});
-        }
-        else if( idim==1){
-            return D_yy_.get_cic_kspace({ix,iy,iz});
-        }
+        if( idim == 0 )    return D_xx_.get_cic_kspace({ix,iy,iz});
+        else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz});
         return D_zz_.get_cic_kspace({ix,iy,iz});
     }
 
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 90fbb0a..4184e86 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -168,7 +168,7 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
     // Create PLT gradient operator
     //--------------------------------------------------------------------
-    particle::lattice_gradient lg( boxlen, ngrid );
+    particle::lattice_gradient lg( the_config );
 
     //--------------------------------------------------------------------
     std::vector<cosmo_species> species_list;
@@ -462,25 +462,10 @@ int Run( ConfigFile& the_config )
                     tmp.FourierTransformForward(false);
 
                     // combine the various LPT potentials into one and take gradient
-                    #if 0 // non PLT corrected version
                     #pragma omp parallel for
                     for (size_t i = 0; i < phi.size(0); ++i) {
                         for (size_t j = 0; j < phi.size(1); ++j) {
                             for (size_t k = 0; k < phi.size(2); ++k) {
-                                size_t idx = phi.get_idx(i,j,k);
-                                auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx);
-                                // divide by Lbox, because displacement is in box units for output plugin
-                                tmp.kelem(idx) = lunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot 
-                                    + phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) );
-                            }
-                        }
-                    }
-                    #else // non PLT corrected version
-                    #pragma omp parallel for
-                    for (size_t i = 0; i < phi.size(0); ++i) {
-                        for (size_t j = 0; j < phi.size(1); ++j) {
-                            for (size_t k = 0; k < phi.size(2); ++k) {
-                                // std::cerr << i << " " << j << " " << k << " " << phi.gradient(idim,{i,j,k}) << " " << lg.gradient(idim,{i,j,k}) << std::endl;
                                 size_t idx = phi.get_idx(i,j,k);
                                 auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx);
                                 // divide by Lbox, because displacement is in box units for output plugin
@@ -489,7 +474,6 @@ int Run( ConfigFile& the_config )
                             }
                         }
                     }
-                    #endif
                     tmp.FourierTransformBackward();
 
                     // if we write particle data, store particle data in particle structure
@@ -514,30 +498,6 @@ int Run( ConfigFile& the_config )
                     
                     tmp.FourierTransformForward(false);
 
-                    #if 0 // non PLT corrected version
-                    #pragma omp parallel for
-                    for (size_t i = 0; i < phi.size(0); ++i) {
-                        for (size_t j = 0; j < phi.size(1); ++j) {
-                            for (size_t k = 0; k < phi.size(2); ++k) {
-                                size_t idx = phi.get_idx(i,j,k);
-                                // divide by Lbox, because displacement is in box units for output plugin
-                                auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx));
-
-                                tmp.kelem(idx) = vunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot_v 
-                                        + vfac3 * (phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) );
-
-                                if( bAddExternalTides ){
-                                    // modify velocities with anisotropic expansion factor**2
-                                    tmp.kelem(idx) *= std::pow(lss_aniso_alpha[idim],2.0);
-                                }
-                                // if( bSymplecticPT){
-                                //     auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx);
-                                //     tmp.kelem(idx) = vunit*ccomplex_t(0.0,1.0) * (kk[idim] * phitot_v) + vfac1 * A3[idim]->kelem(idx);
-                                // }
-                            }
-                        }
-                    }
-                    #else // PLT corrected version
                     #pragma omp parallel for
                     for (size_t i = 0; i < phi.size(0); ++i) {
                         for (size_t j = 0; j < phi.size(1); ++j) {
@@ -563,7 +523,6 @@ int Run( ConfigFile& the_config )
                             }
                         }
                     }
-                    #endif
                     tmp.FourierTransformBackward();
 
                     // if we write particle data, store particle data in particle structure

From 5de14003c00c72619b17b4d080ebcf6b4a14d5e3 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 14 Nov 2019 14:09:24 +0100
Subject: [PATCH 026/130] working commit

---
 include/particle_plt.hh | 191 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 182 insertions(+), 9 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index abf464a..5e926a8 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -25,6 +25,18 @@ private:
 
     void init_D()
     {
+        constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
+
+        const std::vector<vec3<real_t>> bcc_normals{
+            {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
+            {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
+            {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
+        };
+
+        const std::vector<vec3<real_t>> bcc_reciprocal{
+            {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
+        };
+
         const real_t eta = 2.0/ngrid_; // Ewald cutoff shall be 2 cells
         const real_t alpha = 1.0/std::sqrt(2)/eta;
         const real_t alpha2 = alpha*alpha;
@@ -56,6 +68,17 @@ private:
                     for( int k=-N; k<=N; ++k ){
                         if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
                             sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
+                            //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ) * 0.5;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} ) * 0.5;
+
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
                         }
                     }
                 }
@@ -88,6 +111,7 @@ private:
         D_yy_.relem(0,0,0) = 0.0;
         D_yz_.relem(0,0,0) = 0.0;
         D_zz_.relem(0,0,0) = 0.0;
+        
 
         // Fourier transform all six components
         D_xx_.FourierTransformForward();
@@ -114,19 +138,42 @@ private:
                     for( size_t k=0; k<D_xx_.size(2); k++ )
                     {
                         vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                        auto& b=bcc_reciprocal;
+                        vec3<real_t> kvc = { b[0][0]*kvc[0]+b[1][0]*kvc[1]+b[2][0]*kvc[2],
+                                            b[0][1]*kvc[0]+b[1][1]*kvc[1]+b[2][1]*kvc[2],
+                                            b[0][2]*kvc[0]+b[1][2]*kvc[1]+b[2][2]*kvc[2] };
+                        // vec3<real_t> kv = {kvc.dot(bcc_reciprocal[0]),kvc.dot(bcc_reciprocal[1]),kvc.dot(bcc_reciprocal[2])};
                         const real_t kmod2 = kv.norm_squared();
 
                         // long range component of Ewald sum
-                        real_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2;
+                        ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0]));
+                        ccomplex_t phi0 = -rho0 * (0.5+0.5*shift) * std::exp(-0.5*eta*eta*kmod2) / kmod2;
                         phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0
 
+
+                        // const int nn = 3;
+                        // size_t nsum = 0;
+                        // ccomplex_t ff = 0.0;
+                        // for( int is=-nn;is<=nn;is++){
+                        //     for( int js=-nn;js<=nn;js++){
+                        //         for( int ks=-nn;ks<=nn;ks++){
+                        //             if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){
+                        //                 ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2]))));
+                        //                 ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2]))));
+                        //                 ++nsum;
+                        //             }
+                        //         }
+                        //     }    
+                        // }
+                        // ff /= nsum;
+                        ccomplex_t ff = 1.0; //(0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2]))));
                         // assemble short-range + long_range of Ewald sum and add DC component to trace
-                        D_xx_.kelem(i,j,k) = (D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac + 1.0/3.0;
-                        D_xy_.kelem(i,j,k) = (D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac;
-                        D_xz_.kelem(i,j,k) = (D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac;
-                        D_yy_.kelem(i,j,k) = (D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac + 1.0/3.0;
-                        D_yz_.kelem(i,j,k) = (D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac;
-                        D_zz_.kelem(i,j,k) = (D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac + 1.0/3.0;
+                        D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0;
+                        D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac);
+                        D_xz_.kelem(i,j,k) = ff*((D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac);
+                        D_yy_.kelem(i,j,k) = ff*((D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac) + 1.0/3.0;
+                        D_yz_.kelem(i,j,k) = ff*((D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac);
+                        D_zz_.kelem(i,j,k) = ff*((D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac) + 1.0/3.0;
 
                     }
                 }
@@ -163,20 +210,145 @@ private:
 
                         auto norm = (kv.norm()/kv.dot(evec3));
                         if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; 
-
+#ifdef PRODUCTION
                         D_xx_.kelem(i,j,k) *= norm;
                         D_yy_.kelem(i,j,k) *= norm;
                         D_zz_.kelem(i,j,k) *= norm;
 
                         // spatially dependent correction to vfact = \dot{D_+}/D_+
                         D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*eval[2])-1.));
+#else
+
+                        D_xx_.kelem(i,j,k) = eval[2];
+                        D_yy_.kelem(i,j,k) = eval[1];
+                        D_zz_.kelem(i,j,k) = eval[0];
+
+                        D_xy_.kelem(i,j,k) = evec3[0];
+                        D_xz_.kelem(i,j,k) = evec3[1];
+                        D_yz_.kelem(i,j,k) = evec3[2];
+#endif
                     }
                 }
             }
         }
-
+#ifdef PRODUCTION
         D_xy_.kelem(0,0,0) = 1.0;
+#endif
 
+        //////////////////////////////////////////
+        std::string filename("plt_test.hdf5");
+        unlink(filename.c_str());
+    #if defined(USE_MPI)
+        MPI_Barrier(MPI_COMM_WORLD);
+    #endif
+    //     rho.Write_to_HDF5(filename, "rho");
+        D_xx_.Write_to_HDF5(filename, "omega1");
+        D_yy_.Write_to_HDF5(filename, "omega2");
+        D_zz_.Write_to_HDF5(filename, "omega3");
+        D_xy_.Write_to_HDF5(filename, "e1_x");
+        D_xz_.Write_to_HDF5(filename, "e1_y");
+        D_yz_.Write_to_HDF5(filename, "e1_z");
+
+    }
+
+
+    void compute_vectk( )
+    {
+        constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
+
+        const std::vector<vec3<real_t>> bcc_normals{
+            {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
+            {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
+            {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
+        };
+
+        const std::vector<vec3<real_t>> bcc_reciprocal{
+            {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
+        }; 
+
+        std::vector<vec3<real_t>> vectk;
+        std::vector<vec3<int>> ico, vecitk;
+        vectk.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3<real_t>());
+        ico.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3<int>());
+        vecitk.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3<int>());
+
+        std::ofstream ofs2("test_brillouin.txt");
+
+        const int numb = 1;
+        for( size_t i=0; i<D_xx_.size(0); i++ ){
+            mat3s<real_t> D;
+            vec3<real_t> eval, evec1, evec2, evec3;
+            vec3<real_t> a({0.,0.,0.});
+
+            for( size_t j=0; j<D_xx_.size(1); j++ ){
+
+                for( size_t k=0; k<D_xx_.size(2); k++ ){
+
+                    auto idx = D_xx_.get_idx(i,j,k);
+                    vec3<real_t> ar = D_xx_.get_k<real_t>(i,j,k) / (twopi*ngrid_);
+                    vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                    
+                    for( int l=0; l<3; l++ ){
+                        a[l] = 0.0;
+                        for( int m=0; m<3; m++){
+                            // project k on reciprocal basis
+                            a[l] += ar[m]*bcc_reciprocal[m][l];
+                        }
+                    }
+
+                    // translate the k-vectors into the "candidate" FBZ
+                    vec3<real_t> anum;
+                    for( int l1=-numb; l1<=numb; ++l1 ){
+                        anum[0] = real_t(l1);
+                        for( int l2=-numb; l2<=numb; ++l2 ){
+                            anum[1] = real_t(l2);
+                            for( int l3=-numb; l3<=numb; ++l3 ){
+                                anum[2] = real_t(l3);
+
+                                vectk[idx] = a;
+
+                                for( int l=0; l<3; l++ ){
+                                    for( int m=0; m<3; m++){
+                                        // project k on reciprocal basis
+                                        vectk[idx][l] += anum[m]*bcc_reciprocal[m][l];
+                                    }
+                                }
+                                // check if in first Brillouin zone
+                                bool btest=true;
+                                for( size_t l=0; l<bcc_normals.size(); ++l ){
+                                    real_t amod2 = 0.0;
+                                    real_t scalar = 0.0;
+                                    for( int m=0; m<3; m++ ){
+                                        amod2  += bcc_normals[l][m]*bcc_normals[l][m];
+                                        scalar += bcc_normals[l][m]*vectk[idx][m];
+                                    }
+                                    real_t amod = std::sqrt(amod2);
+                                    //if( scalar/amod > amod*1.0001 ){ btest=false; break; }
+                                    if( scalar > 1.01 * amod2 ){ btest=false; break; }
+                                }
+                                if( btest ){
+                                    
+                                    vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid_)/twopi);
+                                    vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid_)/twopi);
+                                    vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid_)/twopi);
+
+                                    ico[idx][0] = std::round((ar[0]+l1) * ngrid_);
+                                    ico[idx][1] = std::round((ar[1]+l2) * ngrid_);
+                                    ico[idx][2] = std::round((ar[2]+l3) * ngrid_);
+
+                                    ofs2 << vectk[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
+                                    // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
+                                    // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk[idx].x << ", " << -vecitk[idx].y << ", " << -vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
+                                    
+                                    goto endloop;
+                                }
+                            }
+                        }
+                    }
+                    endloop: ;
+                }
+            }
+        }            
     }
 
 public:
@@ -218,6 +390,7 @@ public:
         csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
         
         init_D();
+        compute_vectk();
 
         csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
     }

From 675ba19a955fca9e6ecbaf7185bbdf777b6a82ad Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 14 Nov 2019 15:36:39 +0100
Subject: [PATCH 027/130] working commit, fft grids can be non-distributed
 memory even when using MPI

---
 include/grid_fft.hh     | 183 +++++++++++++----------
 include/particle_plt.hh |  40 ++---
 src/grid_fft.cc         | 315 ++++++++++++++++++++--------------------
 src/ic_generator.cc     |  10 +-
 4 files changed, 290 insertions(+), 258 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 00d2cb0..c7bec8f 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -16,16 +16,23 @@ enum space_t
 };
 
 
-template <typename data_t>
+#ifdef USE_MPI
+template <typename data_t, bool bdistributed=true>
+#else
+template <typename data_t, bool bdistributed=false>
+#endif
 class Grid_FFT
 {
 protected:
 #if defined(USE_MPI)
-    const MPI_Datatype MPI_data_t_type = (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
-                                                                            : (typeid(data_t) == typeid(float)) ? MPI_FLOAT
-                                                                                                                : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
-                                                                                                                                                                  : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX : MPI_INT;
+    const MPI_Datatype MPI_data_t_type = 
+        (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
+        : (typeid(data_t) == typeid(float)) ? MPI_FLOAT
+        : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
+        : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX 
+        : MPI_INT;
 #endif
+    using grid_fft_t = Grid_FFT<data_t,bdistributed>;
 public:
     std::array<size_t, 3> n_, nhalf_;
     std::array<size_t, 4> sizes_;
@@ -54,7 +61,7 @@ public:
     }
 
     // avoid implicit copying of data
-    Grid_FFT(const Grid_FFT<data_t> &g) = delete;
+    Grid_FFT(const grid_fft_t &g) = delete;
 
     ~Grid_FFT()
     {
@@ -64,7 +71,7 @@ public:
         }
     }
 
-    const Grid_FFT<data_t> *get_grid(size_t ilevel) const { return this; }
+    const grid_fft_t *get_grid(size_t ilevel) const { return this; }
 
     void Setup();
 
@@ -91,7 +98,7 @@ public:
             data_[i] = 0.0;
     }
 
-    void copy_from(const Grid_FFT<data_t> &g)
+    void copy_from(const grid_fft_t &g)
     {
         // make sure the two fields are in the same space
         if (g.space_ != this->space_)
@@ -217,20 +224,34 @@ public:
     vec3<ft> get_k(const size_t i, const size_t j, const size_t k) const
     {
         vec3<ft> kk;
-
-#if defined(USE_MPI)
-        auto ip = i + local_1_start_;
-        kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
-        kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1];
-#else
-        kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
-        kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
-#endif
+        if( bdistributed ){
+            auto ip = i + local_1_start_;
+            kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
+            kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1];
+        }else{
+            kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
+            kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
+        }
         kk[2] = (real_t(k) - real_t(k > nhalf_[2]) * n_[2]) * kfac_[2];
 
         return kk;
     }
 
+    std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const
+    {
+        return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
+        // vec3<size_t> kk;
+        // if( bdistributed ){
+        //     kk[0] = j;
+        //     kk[1] = i + local_1_start_;
+        // }else{
+        //     kk[0] = i;
+        //     kk[1] = j;
+        // }
+        // kk[2] = k;
+        // return kk;
+    }
+
     data_t get_cic( const vec3<real_t>& v ) const{
         // warning! this doesn't work with MPI
         vec3<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
@@ -286,16 +307,16 @@ public:
 
     inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
     {
-#if defined(USE_MPI)
-        ijk[0] += local_1_start_;
-        std::swap(ijk[0],ijk[1]);
-#endif
+        if( bdistributed ){
+            ijk[0] += local_1_start_;
+            std::swap(ijk[0],ijk[1]);
+        }
         real_t rgrad = 
             (ijk[idim]!=nhalf_[idim])? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_[idim]) * n_[idim]) * kfac_[idim] : 0.0; 
         return ccomplex_t(0.0,rgrad);
     }
 
-    Grid_FFT<data_t> &operator*=(data_t x)
+    grid_fft_t &operator*=(data_t x)
     {
         if (space_ == kspace_id)
         {
@@ -308,7 +329,7 @@ public:
         return *this;
     }
 
-    Grid_FFT<data_t> &operator/=(data_t x)
+    grid_fft_t &operator/=(data_t x)
     {
         if (space_ == kspace_id)
         {
@@ -321,7 +342,7 @@ public:
         return *this;
     }
 
-    Grid_FFT<data_t> &apply_Laplacian(void)
+    grid_fft_t &apply_Laplacian(void)
     {
         this->FourierTransformForward();
         this->apply_function_k_dep([&](auto x, auto k) {
@@ -332,7 +353,7 @@ public:
         return *this;
     }
 
-    Grid_FFT<data_t> &apply_negative_Laplacian(void)
+    grid_fft_t &apply_negative_Laplacian(void)
     {
         this->FourierTransformForward();
         this->apply_function_k_dep([&](auto x, auto k) {
@@ -343,7 +364,7 @@ public:
         return *this;
     }
 
-    Grid_FFT<data_t> &apply_InverseLaplacian(void)
+    grid_fft_t &apply_InverseLaplacian(void)
     {
         this->FourierTransformForward();
         this->apply_function_k_dep([&](auto x, auto k) {
@@ -391,8 +412,7 @@ public:
     double compute_2norm(void)
     {
         real_t sum1{0.0};
-#pragma omp parallel for reduction(+ \
-                                   : sum1)
+        #pragma omp parallel for reduction(+ : sum1)
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -416,8 +436,7 @@ public:
         double sum1{0.0}, sum2{0.0};
         size_t count{0};
 
-#pragma omp parallel for reduction(+ \
-                                   : sum1, sum2)
+        #pragma omp parallel for reduction(+ : sum1, sum2)
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -433,24 +452,26 @@ public:
         count = sizes_[0] * sizes_[1] * sizes_[2];
 
 #ifdef USE_MPI
-        double globsum1{0.0}, globsum2{0.0};
-        size_t globcount{0};
+        if( bdistributed ){
+            double globsum1{0.0}, globsum2{0.0};
+            size_t globcount{0};
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
-                      reinterpret_cast<void *>(&globsum1),
-                      1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
+                        reinterpret_cast<void *>(&globsum1),
+                        1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&sum2),
-                      reinterpret_cast<void *>(&globsum2),
-                      1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&sum2),
+                        reinterpret_cast<void *>(&globsum2),
+                        1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&count),
-                      reinterpret_cast<void *>(&globcount),
-                      1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&count),
+                        reinterpret_cast<void *>(&globcount),
+                        1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
 
-        sum1 = globsum1;
-        sum2 = globsum2;
-        count = globcount;
+            sum1 = globsum1;
+            sum2 = globsum2;
+            count = globcount;
+        }
 #endif
         sum1 /= count;
         sum2 /= count;
@@ -463,8 +484,7 @@ public:
         double sum1{0.0};
         size_t count{0};
 
-#pragma omp parallel for reduction(+ \
-                                   : sum1)
+        #pragma omp parallel for reduction(+ : sum1)
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -479,19 +499,21 @@ public:
         count = sizes_[0] * sizes_[1] * sizes_[2];
 
 #ifdef USE_MPI
-        double globsum1{0.0};
-        size_t globcount{0};
+        if( bdistributed ){
+            double globsum1{0.0};
+            size_t globcount{0};
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
-                      reinterpret_cast<void *>(&globsum1),
-                      1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
+                        reinterpret_cast<void *>(&globsum1),
+                        1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
 
-        MPI_Allreduce(reinterpret_cast<const void *>(&count),
-                      reinterpret_cast<void *>(&globcount),
-                      1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(reinterpret_cast<const void *>(&count),
+                        reinterpret_cast<void *>(&globcount),
+                        1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
 
-        sum1 = globsum1;
-        count = globcount;
+            sum1 = globsum1;
+            count = globcount;
+        }
 #endif
 
         sum1 /= count;
@@ -502,9 +524,9 @@ public:
     template <typename functional, typename grid_t>
     void assign_function_of_grids_r(const functional &f, const grid_t &g)
     {
-        assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
+        assert(g.size(0) == size(0) && g.size(1) == size(1)); 
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -523,10 +545,10 @@ public:
     template <typename functional, typename grid1_t, typename grid2_t>
     void assign_function_of_grids_r(const functional &f, const grid1_t &g1, const grid2_t &g2)
     {
-        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g1.size(2) == size(2));
-        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
+        assert(g1.size(0) == size(0) && g1.size(1) == size(1)); 
+        assert(g2.size(0) == size(0) && g2.size(1) == size(1)); 
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -552,7 +574,7 @@ public:
         assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
         assert(g3.size(0) == size(0) && g3.size(1) == size(1)); // && g3.size(2) == size(2));
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -577,7 +599,7 @@ public:
     {
         assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -599,7 +621,7 @@ public:
         assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
         assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -621,7 +643,7 @@ public:
     {
         assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -643,7 +665,7 @@ public:
         assert(g1.size(0) == size(0) && g1.size(1) == size(1) && g1.size(2) == size(2) );
         assert(g2.size(0) == size(0) && g2.size(1) == size(1) && g2.size(2) == size(2) );
 
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < size(0); ++i)
         {
             for (size_t j = 0; j < size(1); ++j)
@@ -663,7 +685,7 @@ public:
     template <typename functional>
     void apply_function_k_dep(const functional &f)
     {
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -680,7 +702,7 @@ public:
     template <typename functional>
     void apply_function_r_dep(const functional &f)
     {
-#pragma omp parallel for
+        #pragma omp parallel for
         for (size_t i = 0; i < sizes_[0]; ++i)
         {
             for (size_t j = 0; j < sizes_[1]; ++j)
@@ -714,11 +736,12 @@ public:
     {
         FourierTransformForward();
         apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
-#ifdef WITH_MPI
-            real_t shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
-#else
-            real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
-#endif
+        real_t shift;
+        if( bdistributed ){
+            shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
+        }else{
+            shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
+        }
             return x * std::exp(ccomplex_t(0.0, shift));
         });
         if( transform_back ){
@@ -730,9 +753,7 @@ public:
     {
         if (space_ == kspace_id)
         {
-#ifdef USE_MPI
-            if (CONFIG::MPI_task_rank == 0)
-#endif
+            if (CONFIG::MPI_task_rank == 0 || !bdistributed )
                 cdata_[0] = (data_t)0.0;
         }
         else
@@ -749,12 +770,14 @@ public:
                     }
                 }
             }
+            if( bdistributed ){
 #if defined(USE_MPI)
-            data_t glob_sum = 0.0;
-            MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
-                          1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
-            sum = glob_sum;
+                data_t glob_sum = 0.0;
+                MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
+                            1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
+                sum = glob_sum;
 #endif
+            }
             sum /= sizes_[0] * sizes_[1] * sizes_[2];
 
 #pragma omp parallel for
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 5e926a8..774d3ba 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -12,6 +12,8 @@
 #include <grid_fft.hh>
 #include <mat3.hh>
 
+#define PRODUCTION
+
 namespace particle{
 //! implement Marcos et al. PLT calculation
 
@@ -20,8 +22,8 @@ private:
     const real_t boxlen_;
     const size_t ngmapto_, ngrid_, ngrid32_;
     const real_t mapratio_;
-    Grid_FFT<real_t> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
-    Grid_FFT<real_t> grad_x_, grad_y_, grad_z_;
+    Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
+    Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
 
     void init_D()
     {
@@ -146,8 +148,8 @@ private:
                         const real_t kmod2 = kv.norm_squared();
 
                         // long range component of Ewald sum
-                        ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0]));
-                        ccomplex_t phi0 = -rho0 * (0.5+0.5*shift) * std::exp(-0.5*eta*eta*kmod2) / kmod2;
+                        //ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0]));
+                        ccomplex_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2;
                         phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0
 
 
@@ -322,7 +324,7 @@ private:
                                         amod2  += bcc_normals[l][m]*bcc_normals[l][m];
                                         scalar += bcc_normals[l][m]*vectk[idx][m];
                                     }
-                                    real_t amod = std::sqrt(amod2);
+                                    //real_t amod = std::sqrt(amod2);
                                     //if( scalar/amod > amod*1.0001 ){ btest=false; break; }
                                     if( scalar > 1.01 * amod2 ){ btest=false; break; }
                                 }
@@ -378,13 +380,13 @@ public:
 
         csoca::ilog << "PLT corrections for SC lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
 
-#if defined(USE_MPI)
-        if( CONFIG::MPI_task_size>1 )
-        {
-            csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl;
-            abort();
-        }
-#endif 
+// #if defined(USE_MPI)
+//         if( CONFIG::MPI_task_size>1 )
+//         {
+//             csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl;
+//             abort();
+//         }
+// #endif 
 
         double wtime = get_wtime();
         csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
@@ -454,7 +456,7 @@ inline void test_plt( void ){
     const real_t pi3halfs = std::pow(M_PI,1.5);
 
     const real_t dV( std::pow( boxlen/ngrid, 3 ) );
-    Grid_FFT<real_t> rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t,false> rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
 
     auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
 
@@ -511,12 +513,12 @@ inline void test_plt( void ){
         return sr;
     };
 
-    Grid_FFT<real_t> D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t> D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t> D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t> D_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t> D_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t> D_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t,false> D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t,false> D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t,false> D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t,false> D_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t,false> D_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    Grid_FFT<real_t,false> D_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
 
     #pragma omp parallel for
     for( size_t i=0; i<ngrid; i++ ){
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index 54ddbbf..d2d2c6b 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -5,13 +5,13 @@
 #include <gsl/gsl_rng.h>
 #include <gsl/gsl_randist.h>
 
-template <typename data_t>
-void Grid_FFT<data_t>::FillRandomReal(unsigned long int seed)
+template <typename data_t,bool bdistributed>
+void Grid_FFT<data_t,bdistributed>::FillRandomReal(unsigned long int seed)
 {
     gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937);
-#if defined(USE_MPI)
-    seed += 17321 * CONFIG::MPI_task_rank;
-#endif
+    if( bdistributed ){
+        seed += 17321 * CONFIG::MPI_task_rank;
+    }
     gsl_rng_set(RNG, seed);
 
     for (size_t i = 0; i < sizes_[0]; ++i)
@@ -28,166 +28,169 @@ void Grid_FFT<data_t>::FillRandomReal(unsigned long int seed)
     gsl_rng_free(RNG);
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::Setup(void)
+template <typename data_t,bool bdistributed>
+void Grid_FFT<data_t,bdistributed>::Setup(void)
 {
-#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////////////
+    if( !bdistributed ){
+        ntot_ = (n_[2] + 2) * n_[1] * n_[0];
 
-    ntot_ = (n_[2] + 2) * n_[1] * n_[0];
+        csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
+        if (typeid(data_t) == typeid(real_t))
+        {
+            data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
+            cdata_ = reinterpret_cast<ccomplex_t *>(data_);
 
-    csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
-    if (typeid(data_t) == typeid(real_t))
-    {
-        data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
-        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+            plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE);
+            iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE);
+        }
+        else if (typeid(data_t) == typeid(ccomplex_t))
+        {
+            data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(ccomplex_t)));
+            cdata_ = reinterpret_cast<ccomplex_t *>(data_);
 
-        plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE);
-        iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE);
-    }
-    else if (typeid(data_t) == typeid(ccomplex_t))
-    {
-        data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(ccomplex_t)));
-        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+            plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE);
+            iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE);
+        }
+        else
+        {
+            csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
+        }
 
-        plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE);
-        iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE);
+        fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
+
+        if (typeid(data_t) == typeid(real_t))
+        {
+            npr_ = n_[2] + 2;
+            npc_ = n_[2] / 2 + 1;
+        }
+        else
+        {
+            npr_ = n_[2];
+            npc_ = n_[2];
+        }
+
+        for (int i = 0; i < 3; ++i)
+        {
+            nhalf_[i] = n_[i] / 2;
+            kfac_[i] = 2.0 * M_PI / length_[i];
+            dx_[i] = length_[i] / n_[i];
+
+            global_range_.x1_[i] = 0;
+            global_range_.x2_[i] = n_[i];
+        }
+
+        local_0_size_ = n_[0];
+        local_1_size_ = n_[1];
+        local_0_start_ = 0;
+        local_1_start_ = 0;
+
+        if (space_ == rspace_id)
+        {
+            sizes_[0] = n_[0];
+            sizes_[1] = n_[1];
+            sizes_[2] = n_[2];
+            sizes_[3] = npr_;
+        }
+        else
+        {
+            sizes_[0] = n_[1];
+            sizes_[1] = n_[0];
+            sizes_[2] = npc_;
+            sizes_[3] = npc_;
+        }
     }
     else
     {
-        csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
-    }
+#ifdef USE_MPI //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
+        size_t cmplxsz;
 
-    fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
+        if (typeid(data_t) == typeid(real_t))
+        {
+            cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
+                                                            &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
+            ntot_ = 2 * cmplxsz;
+            data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
+            cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+            plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
+                                                MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
+            iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
+                                                MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
+        }
+        else if (typeid(data_t) == typeid(ccomplex_t))
+        {
+            cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
+                                                            &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
+            ntot_ = cmplxsz;
+            data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
+            cdata_ = reinterpret_cast<ccomplex_t *>(data_);
+            plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
+                                            MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
+            iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
+                                            MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
+        }
+        else
+        {
+            csoca::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
+            abort();
+        }
 
-    if (typeid(data_t) == typeid(real_t))
-    {
-        npr_ = n_[2] + 2;
-        npc_ = n_[2] / 2 + 1;
-    }
-    else
-    {
-        npr_ = n_[2];
-        npc_ = n_[2];
-    }
+        csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
+        fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
 
-    for (int i = 0; i < 3; ++i)
-    {
-        nhalf_[i] = n_[i] / 2;
-        kfac_[i] = 2.0 * M_PI / length_[i];
-        dx_[i] = length_[i] / n_[i];
+        if (typeid(data_t) == typeid(real_t))
+        {
+            npr_ = n_[2] + 2;
+            npc_ = n_[2] / 2 + 1;
+        }
+        else
+        {
+            npr_ = n_[2];
+            npc_ = n_[2];
+        }
 
-        global_range_.x1_[i] = 0;
-        global_range_.x2_[i] = n_[i];
-    }
+        for (int i = 0; i < 3; ++i)
+        {
+            nhalf_[i] = n_[i] / 2;
+            kfac_[i] = 2.0 * M_PI / length_[i];
+            dx_[i] = length_[i] / n_[i];
 
-    local_0_size_ = n_[0];
-    local_1_size_ = n_[1];
-    local_0_start_ = 0;
-    local_1_start_ = 0;
-
-    if (space_ == rspace_id)
-    {
-        sizes_[0] = n_[0];
-        sizes_[1] = n_[1];
-        sizes_[2] = n_[2];
-        sizes_[3] = npr_;
-    }
-    else
-    {
-        sizes_[0] = n_[1];
-        sizes_[1] = n_[0];
-        sizes_[2] = npc_;
-        sizes_[3] = npc_;
-    }
-
-#else //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
-
-    size_t cmplxsz;
-
-    if (typeid(data_t) == typeid(real_t))
-    {
-        cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
-                                                         &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
-        ntot_ = 2 * cmplxsz;
-        data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
-        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
-        plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
-                                              MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
-        iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
-                                               MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
-    }
-    else if (typeid(data_t) == typeid(ccomplex_t))
-    {
-        cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
-                                                         &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
-        ntot_ = cmplxsz;
-        data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
-        cdata_ = reinterpret_cast<ccomplex_t *>(data_);
-        plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
-                                          MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
-        iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
-                                           MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
-    }
-    else
-    {
-        csoca::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
-        abort();
-    }
-
-    csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
-    fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
-
-    if (typeid(data_t) == typeid(real_t))
-    {
-        npr_ = n_[2] + 2;
-        npc_ = n_[2] / 2 + 1;
-    }
-    else
-    {
-        npr_ = n_[2];
-        npc_ = n_[2];
-    }
-
-    for (int i = 0; i < 3; ++i)
-    {
-        nhalf_[i] = n_[i] / 2;
-        kfac_[i] = 2.0 * M_PI / length_[i];
-        dx_[i] = length_[i] / n_[i];
-
-        global_range_.x1_[i] = 0;
-        global_range_.x2_[i] = n_[i];
-    }
-    global_range_.x1_[0] = (int)local_0_start_;
-    global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_);
-
-    if (space_ == rspace_id)
-    {
-        sizes_[0] = (int)local_0_size_;
-        sizes_[1] = n_[1];
-        sizes_[2] = n_[2];
-        sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension
-    }
-    else
-    {
-        sizes_[0] = (int)local_1_size_;
-        sizes_[1] = n_[0];
-        sizes_[2] = npc_;
-        sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
-    }
+            global_range_.x1_[i] = 0;
+            global_range_.x2_[i] = n_[i];
+        }
+        global_range_.x1_[0] = (int)local_0_start_;
+        global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_);
 
+        if (space_ == rspace_id)
+        {
+            sizes_[0] = (int)local_0_size_;
+            sizes_[1] = n_[1];
+            sizes_[2] = n_[2];
+            sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension
+        }
+        else
+        {
+            sizes_[0] = (int)local_1_size_;
+            sizes_[1] = n_[0];
+            sizes_[2] = npc_;
+            sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
+        }
+#else
+        csoca::flog << "MPI is required for distributed FFT arrays!" << std::endl;
+        throw std::runtime_error("MPI is required for distributed FFT arrays!");
 #endif //// of #ifdef #else USE_MPI ////////////////////////////////////////////////////////////////////////////////////
+    }
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::ApplyNorm(void)
+template <typename data_t,bool bdistributed>
+void Grid_FFT<data_t,bdistributed>::ApplyNorm(void)
 {
 #pragma omp parallel for
     for (size_t i = 0; i < ntot_; ++i)
         data_[i] *= fft_norm_fac_;
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
+template <typename data_t,bool bdistributed>
+void Grid_FFT<data_t,bdistributed>::FourierTransformForward(bool do_transform)
 {
 #if defined(USE_MPI)
     MPI_Barrier(MPI_COMM_WORLD);
@@ -217,8 +220,8 @@ void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
     }
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::FourierTransformBackward(bool do_transform)
+template <typename data_t,bool bdistributed>
+void Grid_FFT<data_t,bdistributed>::FourierTransformBackward(bool do_transform)
 {
 #if defined(USE_MPI)
     MPI_Barrier(MPI_COMM_WORLD);
@@ -269,8 +272,8 @@ void create_hdf5(std::string Filename)
     H5Fclose(HDF_FileID);
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname) const
+template <typename data_t,bool bdistributed>
+void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
 {
     hid_t file_id, dset_id;    /* file and dataset identifiers */
     hid_t filespace, memspace; /* file and memory dataspace identifiers */
@@ -278,6 +281,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
     hid_t dtype_id = H5T_NATIVE_FLOAT;
     hid_t plist_id;
 
+    #warning "check if this works for non-distributed fft arrays with MPI"
+
 #if defined(USE_MPI)
 
     int mpi_size, mpi_rank;
@@ -500,8 +505,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
 
 #include <iomanip>
 
-template <typename data_t>
-void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
+template <typename data_t,bool bdistributed>
+void Grid_FFT<data_t,bdistributed>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
 {
     double logvmin = std::log10(vmin);
     double logvmax = std::log10(vmax);
@@ -552,8 +557,8 @@ void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, do
 #endif
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
+template <typename data_t,bool bdistributed>
+void Grid_FFT<data_t,bdistributed>::Write_PowerSpectrum(std::string ofname)
 {
     std::vector<double> bin_k, bin_P, bin_eP;
     std::vector<size_t> bin_count;
@@ -582,8 +587,8 @@ void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
 #endif
 }
 
-template <typename data_t>
-void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
+template <typename data_t,bool bdistributed>
+void Grid_FFT<data_t,bdistributed>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
 {
     this->FourierTransformForward();
 
@@ -663,5 +668,7 @@ void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::ve
 
 /********************************************************************************************/
 
-template class Grid_FFT<real_t>;
-template class Grid_FFT<ccomplex_t>;
+template class Grid_FFT<real_t,true>;
+template class Grid_FFT<real_t,false>;
+template class Grid_FFT<ccomplex_t,true>;
+template class Grid_FFT<ccomplex_t,false>;
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 4184e86..4394947 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -469,8 +469,8 @@ int Run( ConfigFile& the_config )
                                 size_t idx = phi.get_idx(i,j,k);
                                 auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx);
                                 // divide by Lbox, because displacement is in box units for output plugin
-                                tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot 
-                                    + lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) );
+                                tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot 
+                                    + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) );
                             }
                         }
                     }
@@ -506,11 +506,11 @@ int Run( ConfigFile& the_config )
                                 // divide by Lbox, because displacement is in box units for output plugin
                                 auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx));
 
-                                tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot_v 
-                                        + vfac3 * (lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) );
+                                tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v 
+                                        + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) );
 
                                 // correct velocity with PLT mode growth rate
-                                tmp.kelem(idx) *= lg.vfac_corr({i,j,k});
+                                tmp.kelem(idx) *= lg.vfac_corr(tmp.get_k3(i,j,k));
 
                                 if( bAddExternalTides ){
                                     // modify velocities with anisotropic expansion factor**2

From 07b430f25cf3536df85d49ce4ac586363eceaf21 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 15 Nov 2019 11:54:16 +0100
Subject: [PATCH 028/130] removed unused code to fill grids with random numbers

---
 include/grid_fft.hh | 10 ----------
 src/grid_fft.cc     | 26 --------------------------
 2 files changed, 36 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index c7bec8f..49584eb 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -240,16 +240,6 @@ public:
     std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const
     {
         return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
-        // vec3<size_t> kk;
-        // if( bdistributed ){
-        //     kk[0] = j;
-        //     kk[1] = i + local_1_start_;
-        // }else{
-        //     kk[0] = i;
-        //     kk[1] = j;
-        // }
-        // kk[2] = k;
-        // return kk;
     }
 
     data_t get_cic( const vec3<real_t>& v ) const{
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index d2d2c6b..b7e3f52 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -2,32 +2,6 @@
 #include <grid_fft.hh>
 #include <thread>
 
-#include <gsl/gsl_rng.h>
-#include <gsl/gsl_randist.h>
-
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::FillRandomReal(unsigned long int seed)
-{
-    gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937);
-    if( bdistributed ){
-        seed += 17321 * CONFIG::MPI_task_rank;
-    }
-    gsl_rng_set(RNG, seed);
-
-    for (size_t i = 0; i < sizes_[0]; ++i)
-    {
-        for (size_t j = 0; j < sizes_[1]; ++j)
-        {
-            for (size_t k = 0; k < sizes_[2]; ++k)
-            {
-                this->relem(i, j, k) = gsl_ran_ugaussian_ratio_method(RNG);
-            }
-        }
-    }
-
-    gsl_rng_free(RNG);
-}
-
 template <typename data_t,bool bdistributed>
 void Grid_FFT<data_t,bdistributed>::Setup(void)
 {

From 89ec1775f3689b6d8f6c8ddcb21584b7fcd34d23 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 15 Nov 2019 22:32:09 +0100
Subject: [PATCH 029/130] fixed inconsistency between NGENIC random numbers
 generated with MPI and without

---
 include/grid_fft.hh          |  2 ++
 src/plugins/random_ngenic.cc | 35 ++++++++++++++++++++++++++---------
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 49584eb..88a938f 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -73,6 +73,8 @@ public:
 
     const grid_fft_t *get_grid(size_t ilevel) const { return this; }
 
+    bool is_distributed( void ) const { return bdistributed; }
+
     void Setup();
 
     //! return the (local) size of dimension i
diff --git a/src/plugins/random_ngenic.cc b/src/plugins/random_ngenic.cc
index 1aa1942..b84221e 100644
--- a/src/plugins/random_ngenic.cc
+++ b/src/plugins/random_ngenic.cc
@@ -82,7 +82,11 @@ public:
                 for (size_t j = 0; j < nres_; ++j) 
                 {                   
                     ptrdiff_t jj = (j>0)? nres_ - j : 0;
-                    gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]);
+                    if( g.is_distributed() )
+                        gsl_rng_set( pRandomGenerator_, SeedTable_[j * nres_ + i]);
+                    else
+                        gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]);
+                    
                     for (size_t k = 0; k < g.size(2); ++k) 
                     {
                         double phase = gsl_rng_uniform(pRandomGenerator_) * 2 * M_PI;
@@ -101,15 +105,28 @@ public:
                         if (k > 0) {
                             if (i_in_range) g.kelem(ip,j,k) = zrand;
                         } else{ /* k=0 plane needs special treatment */
-                            if (i == 0) {
-                                if (j < nres_ / 2 && i_in_range)
-                                {
-                                    g.kelem(ip,j,k) = zrand;
-                                    g.kelem(ip,jj,k) = std::conj(zrand);
+                            if( g.is_distributed() ){
+                                if (j == 0) {
+                                    if (i < nres_ / 2 && i_in_range)
+                                    {
+                                        if(i_in_range) g.kelem(ip,jj,k) = zrand;
+                                        if(ii_in_range) g.kelem(iip,j,k) = std::conj(zrand);
+                                    }
+                                } else if (j < nres_ / 2) {
+                                    if(i_in_range) g.kelem(ip,j,k) = zrand;
+                                    if(ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
+                                }
+                            }else{
+                                if (i == 0) {
+                                    if (j < nres_ / 2 && i_in_range)
+                                    {
+                                        g.kelem(ip,j,k) = zrand;
+                                        g.kelem(ip,jj,k) = std::conj(zrand);
+                                    }
+                                } else if (i < nres_ / 2) {
+                                    if(i_in_range) g.kelem(ip,j,k) = zrand;
+                                    if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
                                 }
-                            } else if (i < nres_ / 2) {
-                                if(i_in_range) g.kelem(ip,j,k) = zrand;
-                                if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
                             }
                         }
                     }

From 9a36cc13fd1f3bf45c7ddb5d7124e6c155f11e14 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 15 Nov 2019 23:19:57 +0100
Subject: [PATCH 030/130] unified calculation of growth factor and growth rate

---
 include/cosmology_calculator.hh | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index f74039d..ea51299 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -127,14 +127,19 @@ public:
         return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect);
     }
 
-    inline static double H_of_a(double a, void *Params)
+    inline static double H_of_a(double a, const void *Params)
     {
-        CosmologyParameters *cosm = (CosmologyParameters *)Params;
+        const CosmologyParameters *cosm = (CosmologyParameters *)Params;
         double a2 = a * a;
         double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a));
         return Ha;
     }
 
+    inline double H_of_a( double a ) const
+    {
+        return 100.0 * this->H_of_a(a,reinterpret_cast<const void*>(&this->cosmo_param_));
+    }
+
     inline static double Hprime_of_a(double a, void *Params) 
     {
         CosmologyParameters *cosm = (CosmologyParameters *)Params;
@@ -168,10 +173,7 @@ public:
 	 */
 	inline real_t CalcGrowthRate( real_t a )
 	{
-        #warning CalcGrowthRate is only correct if dark energy is a cosmological constant, need to upgrade calculator...
-		real_t y = cosmo_param_.Omega_m*(1.0/a-1.0) + cosmo_param_.Omega_DE*(a*a-1.0) + 1.0;
-		real_t fact = integrate( &fIntegrand, 1e-6, a, (void*)&cosmo_param_ );
-		return (cosmo_param_.Omega_DE*a*a-0.5*cosmo_param_.Omega_m/a)/y - 1.0 + a*fIntegrand(a,(void*)&cosmo_param_)/fact;
+        return CalcVFact(a) / H_of_a(a) / a;
 	}
 
     //! Computes the linear theory growth factor D+

From bcb301f3381ca4ee9ab5b6443c295f08cdfc1533 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 16 Nov 2019 04:42:38 +0100
Subject: [PATCH 031/130] can write non-distributed grids with MPI, but needs
 cleanup

---
 src/grid_fft.cc | 127 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 126 insertions(+), 1 deletion(-)

diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index b7e3f52..67201ad 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -246,16 +246,141 @@ void create_hdf5(std::string Filename)
     H5Fclose(HDF_FileID);
 }
 
+
 template <typename data_t,bool bdistributed>
 void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
 {
+    // FIXME: cleanup duplicate code in this function!
+    if( !bdistributed && CONFIG::MPI_task_rank==0 ){
+        
+        hid_t file_id, dset_id;    /* file and dataset identifiers */
+        hid_t filespace, memspace; /* file and memory dataspace identifiers */
+        hsize_t offset[3], count[3];
+        hid_t dtype_id = H5T_NATIVE_FLOAT;
+        hid_t plist_id = H5P_DEFAULT;
+
+        if (!file_exists(fname))
+            create_hdf5(fname);
+
+        file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
+
+        for (int i = 0; i < 3; ++i)
+            count[i] = size(i);
+        
+        if (typeid(data_t) == typeid(float))
+            dtype_id = H5T_NATIVE_FLOAT;
+        else if (typeid(data_t) == typeid(double))
+            dtype_id = H5T_NATIVE_DOUBLE;
+        else if (typeid(data_t) == typeid(std::complex<float>))
+        {
+            dtype_id = H5T_NATIVE_FLOAT;
+        }
+        else if (typeid(data_t) == typeid(std::complex<double>))
+        {
+            dtype_id = H5T_NATIVE_DOUBLE;
+        }
+
+        filespace = H5Screate_simple(3, count, NULL);
+        dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
+                            H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        H5Sclose(filespace);
+
+        hsize_t slice_sz = size(1) * size(2);
+
+        real_t *buf = new real_t[slice_sz];
+
+        count[0] = 1;
+        count[1] = size(1);
+        count[2] = size(2);
+
+        offset[1] = 0;
+        offset[2] = 0;
+
+        memspace = H5Screate_simple(3, count, NULL);
+        filespace = H5Dget_space(dset_id);
+
+        for (size_t i = 0; i < size(0); ++i)
+        {
+            offset[0] = i;
+            for (size_t j = 0; j < size(1); ++j)
+            {
+                for (size_t k = 0; k < size(2); ++k)
+                {
+                    if( this->space_ == rspace_id )
+                        buf[j * size(2) + k] = std::real(relem(i, j, k));
+                    else
+                        buf[j * size(2) + k] = std::real(kelem(i, j, k));
+                }
+            }
+
+            H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL);
+            H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
+        }
+
+        H5Sclose(filespace);
+        H5Sclose(memspace);
+
+        // H5Sclose(filespace);
+        H5Dclose(dset_id);
+
+        if (typeid(data_t) == typeid(std::complex<float>) ||
+            typeid(data_t) == typeid(std::complex<double>) ||
+            this->space_ == kspace_id )
+        {
+            datasetname += std::string(".im");
+
+            for (int i = 0; i < 3; ++i)
+                count[i] = size(i);
+
+            filespace = H5Screate_simple(3, count, NULL);
+            dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
+                                H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+            H5Sclose(filespace);
+
+            count[0] = 1;
+
+            for (size_t i = 0; i < size(0); ++i)
+            {
+                offset[0] = i;
+
+                for (size_t j = 0; j < size(1); ++j)
+                    for (size_t k = 0; k < size(2); ++k)
+                    {
+                        if( this->space_ == rspace_id )
+                            buf[j * size(2) + k] = std::imag(relem(i, j, k));
+                        else
+                            buf[j * size(2) + k] = std::imag(kelem(i, j, k));
+                    }
+
+                memspace = H5Screate_simple(3, count, NULL);
+                filespace = H5Dget_space(dset_id);
+
+                H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count,
+                                    NULL);
+
+                H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
+
+                H5Sclose(memspace);
+                H5Sclose(filespace);
+            }
+
+            H5Dclose(dset_id);
+
+            delete[] buf;
+        }
+
+        H5Fclose(file_id);
+        return;
+    }
+
+    if( !bdistributed && CONFIG::MPI_task_rank!=0 ) return;
+
     hid_t file_id, dset_id;    /* file and dataset identifiers */
     hid_t filespace, memspace; /* file and memory dataspace identifiers */
     hsize_t offset[3], count[3];
     hid_t dtype_id = H5T_NATIVE_FLOAT;
     hid_t plist_id;
 
-    #warning "check if this works for non-distributed fft arrays with MPI"
 
 #if defined(USE_MPI)
 

From 68d3aa4a4ca5a1af33ec4f409bdfcfa7b0985fca Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Wed, 27 Nov 2019 16:23:43 +0100
Subject: [PATCH 032/130] working commit PLT other lattices

---
 include/particle_plt.hh | 215 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 194 insertions(+), 21 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 774d3ba..96f07ff 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -12,7 +12,7 @@
 #include <grid_fft.hh>
 #include <mat3.hh>
 
-#define PRODUCTION
+// #define PRODUCTION
 
 namespace particle{
 //! implement Marcos et al. PLT calculation
@@ -29,6 +29,176 @@ private:
     {
         constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
 
+        const ptrdiff_t nlattice = 16;
+        const real_t dx = 1.0/real_t(nlattice);
+
+        const real_t eta = 4.0/nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells
+        const real_t alpha = 1.0/std::sqrt(2)/eta;
+        const real_t alpha2 = alpha*alpha;
+        const real_t alpha3 = alpha2*alpha;
+        const real_t sqrtpi = std::sqrt(M_PI);
+        const real_t pi32   = std::pow(M_PI,1.5);
+
+        //! just a Kronecker \delta_ij
+        auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
+
+        //! short range component of Ewald sum, eq. (A2) of Marcos (2008)
+        auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
+            auto d = vR-vP;
+            auto r = d.norm();
+            if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
+            real_t val{0.0};
+            val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r);
+            val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
+                (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
+            return val;
+        };
+
+        auto greensftide_sr2 = [&]( int mu, int nu, const vec3<real_t>& d ) -> real_t {
+            auto r = d.norm();
+            if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
+            real_t val{0.0};
+            val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r);
+            val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
+                (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
+            return val;
+        };
+
+        const std::vector<vec3<real_t>> bcc_bravais{
+           {1.0,0.0,0.0},{0.0,1.0,0.0},{0.5,0.5,0.5} 
+        };
+
+        const std::vector<vec3<real_t>> bcc_reciprocal{
+            {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
+        };
+        
+        const std::vector<vec3<real_t>> bcc_normals{
+            {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
+            {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
+            {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
+        };
+
+        
+        std::vector<vec3<real_t>> x;
+        for( ptrdiff_t i=-2*nlattice; i<=2*nlattice; i++ ){
+            for( ptrdiff_t j=-2*nlattice; j<=2*nlattice; j++ ){
+                for( ptrdiff_t k=-2*nlattice; k<=2*nlattice; k++ ){
+                    real_t dxp = dx*(real_t(i)*bcc_bravais[0][0]+real_t(j)*bcc_bravais[1][0]+real_t(k)*bcc_bravais[2][0]);
+                    real_t dyp = dx*(real_t(i)*bcc_bravais[0][1]+real_t(j)*bcc_bravais[1][1]+real_t(k)*bcc_bravais[2][1]);
+                    real_t dzp = dx*(real_t(i)*bcc_bravais[0][2]+real_t(j)*bcc_bravais[1][2]+real_t(k)*bcc_bravais[2][2]);
+
+                    if( dxp>-1e-10&&dxp<1.0&&dyp>-1e-10&&dyp<1.0&&dzp>-1e-10&&dzp<1.0)              
+                    {
+                        x.push_back({dxp,dyp,dzp});
+                    }
+                }
+            }
+        }
+        std::vector<mat3s<real_t>> a(x.size(),{0.0});
+
+
+        constexpr ptrdiff_t lnumber = 4, knumber = 4;
+        for( size_t i=0,j=0; j< x.size(); ++j ){
+            // r-part
+            if( i==j )
+            {
+                a[i](0,0) = 1.0/3.0;
+                a[i](1,1) = 1.0/3.0;
+                a[i](2,2) = 1.0/3.0; 
+            }else{
+
+                for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
+                    for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
+                        for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){                      
+                            vec3<real_t> ai = {real_t(ix)*nlattice,real_t(iy)*nlattice,real_t(iz)*nlattice};
+                            auto dr = x[i]-x[j];
+                            dr[0] -= ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0];
+                            dr[1] -= ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1];
+                            dr[2] -= ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2];
+                            real_t d = dr.norm();
+                            // std::cerr << dr.x << " " << dr.y << " " << dr.z << " " << greensftide_sr2(0,0,dr) << std::endl;
+                            for( int mu=0; mu<3; ++mu ){
+                                for( int nu=mu; nu<3; ++nu ){
+                                    a[i](mu,nu) += greensftide_sr2(mu,nu,dr);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            // k-part
+            if( i!=j ){
+                auto dr = x[i]-x[j];
+                real_t d = dr.norm();
+                for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){
+                    for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){
+                        for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){    
+                            vec3<real_t> ak, bk = {real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice};
+                            if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
+                                ak.x = bk.x*bcc_reciprocal[0][0]+bk.y*bcc_reciprocal[1][0]+bk.z*bcc_reciprocal[2][0];
+                                ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1];
+                                ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2];
+                                real_t amodk2 = ak.norm_squared();
+                                real_t term = std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(dr)) / amodk2;// / std::pow(nlattice,3);
+                                for( int mu=0; mu<3; ++mu ){
+                                    for( int nu=mu; nu<3; ++nu ){
+                                        a[i](mu,nu) += ak[mu]*ak[nu]*term;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+
+            }
+
+
+            
+        }
+        
+        for( auto& m : a ){
+            std::cout << m(0,0) << std::endl;
+        }
+        
+
+        //! sums mirrored copies of short-range component of Ewald sum
+        auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
+            real_t sr = 0.0;
+            constexpr int N = 3; // number of repeated copies ±N per dimension
+            int count = 0;
+            for( int i=-N; i<=N; ++i ){
+                for( int j=-N; j<=N; ++j ){
+                    for( int k=-N; k<=N; ++k ){
+                        if( mu!=nu ){
+
+                        }
+
+                        if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
+                            //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
+                            sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
+                            sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} );
+                            count += 2;
+
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
+                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
+                        }
+                    }
+                }
+            }
+            return sr / count;
+        };
+    }
+    void init_D__old()
+    {
+        constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
+
         const std::vector<vec3<real_t>> bcc_normals{
             {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
             {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
@@ -53,7 +223,7 @@ private:
         auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
             auto d = vR-vP;
             auto r = d.norm();
-            // if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
+            if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
             real_t val{0.0};
             val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r);
             val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
@@ -65,13 +235,15 @@ private:
         auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
             real_t sr = 0.0;
             constexpr int N = 3; // number of repeated copies ±N per dimension
+            int count = 0;
             for( int i=-N; i<=N; ++i ){
                 for( int j=-N; j<=N; ++j ){
                     for( int k=-N; k<=N; ++k ){
                         if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
+                            //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
                             sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
-                            //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ) * 0.5;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} ) * 0.5;
+                            sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} );
+                            count += 2;
 
                             // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
                             // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
@@ -85,7 +257,7 @@ private:
                     }
                 }
             }
-            return sr;
+            return sr / count;
         };
 
         //! fill D_ij array with short range evaluated function
@@ -153,22 +325,23 @@ private:
                         phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0
 
 
-                        // const int nn = 3;
-                        // size_t nsum = 0;
-                        // ccomplex_t ff = 0.0;
-                        // for( int is=-nn;is<=nn;is++){
-                        //     for( int js=-nn;js<=nn;js++){
-                        //         for( int ks=-nn;ks<=nn;ks++){
-                        //             if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){
-                        //                 ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2]))));
-                        //                 ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2]))));
-                        //                 ++nsum;
-                        //             }
-                        //         }
-                        //     }    
-                        // }
-                        // ff /= nsum;
-                        ccomplex_t ff = 1.0; //(0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2]))));
+                        const int nn = 3;
+                        size_t nsum = 0;
+                        ccomplex_t ff = 0.0;
+                        for( int is=-nn;is<=nn;is++){
+                            for( int js=-nn;js<=nn;js++){
+                                for( int ks=-nn;ks<=nn;ks++){
+                                    if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){
+                                        ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2]))));
+                                        ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2]))));
+                                        ++nsum;
+                                    }
+                                }
+                            }    
+                        }
+                        ff /= nsum;
+                        // ccomplex_t ff = 1.0; 
+                        // ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2]))));
                         // assemble short-range + long_range of Ewald sum and add DC component to trace
                         D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0;
                         D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac);

From 093363791eaf85a2db0daf5f9cb0dc410af8d26f Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Wed, 27 Nov 2019 22:19:39 +0100
Subject: [PATCH 033/130] working commit, decaying mode integration for single
 fluid

---
 example.conf                  |  7 ++--
 src/plugins/transfer_CLASS.cc | 70 +++++++++++++++++++++++++++++++----
 2 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/example.conf b/example.conf
index 58fc969..2ebaeac 100644
--- a/example.conf
+++ b/example.conf
@@ -4,7 +4,7 @@ GridRes      = 128
 # length of the box in Mpc/h
 BoxLength    = 250
 # starting redshift
-zstart       = 49.0
+zstart       = 100.0
 # order of the LPT to be used (1,2 or 3)
 LPTorder     = 3
 # also do baryon ICs?
@@ -15,8 +15,9 @@ DoFixing     = no
 ParticleLoad = sc
 
 [cosmology]
-#transfer     = CLASS 
-transfer     = eisenstein
+transfer     = CLASS
+ztarget      = 100.0 
+#transfer     = eisenstein
 Omega_m      = 0.302
 Omega_b      = 0.045
 Omega_L      = 0.698
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 85b65b8..3469b7e 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -25,7 +25,13 @@ private:
     std::vector<double> tab_lnk_, tab_dtot_, tab_dc_, tab_db_, tab_ttot_, tab_tc_, tab_tb_;
     gsl_interp_accel *gsl_ia_dtot_, *gsl_ia_dc_, *gsl_ia_db_, *gsl_ia_ttot_, *gsl_ia_tc_, *gsl_ia_tb_;
     gsl_spline *gsl_sp_dtot_, *gsl_sp_dc_, *gsl_sp_db_, *gsl_sp_ttot_, *gsl_sp_tc_, *gsl_sp_tb_;
-    double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_;
+
+    // single fluid growing/decaying mode decomposition
+    gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_;
+    gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_;
+    std::vector<double> tab_Cplus_, tab_Cminus_;
+
+    double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_;
 
     void ClassEngine_get_data( void ){
         std::vector<double> d_ncdm, t_ncdm, phi, psi;
@@ -37,25 +43,46 @@ private:
         pars.add("extra metric transfer functions", "yes");
         pars.add("z_pk",ztarget_);
         pars.add("P_k_max_h/Mpc", kmax_);
+        
         pars.add("h",h_);
         pars.add("Omega_b",Omega_b_);
         // pars.add("Omega_k",0.0);
         // pars.add("Omega_ur",0.0);
         pars.add("N_ur",N_ur_);
         pars.add("Omega_cdm",Omega_m_-Omega_b_);
-        pars.add("Omega_Lambda",1.0-Omega_m_);
-        // pars.add("Omega_fld",0.0);
-        // pars.add("Omega_scf",0.0);
+        pars.add("Omega_k",0.0);
+        // pars.add("Omega_Lambda",1.0-Omega_m_);
+        pars.add("Omega_fld",0.0);
+        pars.add("Omega_scf",0.0);
+
         pars.add("A_s",2.42e-9);
-        pars.add("n_s",.96); // tnis doesn't matter for TF
+        pars.add("n_s",.961); // this doesn't matter for TF
         pars.add("output","dTk,vTk");
         pars.add("YHe",0.248);
+        pars.add("lensing","no");
+        pars.add("alpha_s",0.0);
+        pars.add("P_k_ini type","analytic_Pk");
+        pars.add("gauge","synchronous");
+
+        pars.add("k_per_decade_for_pk",100);
+        pars.add("k_per_decade_for_bao",100);
 
-        pars.add("k_per_decade_for_pk",50);
-        pars.add("k_per_decade_for_bao",50);
         pars.add("compute damping scale","yes");
         pars.add("z_reio",-1.0); // make sure reionisation is not included
 
+        pars.add("tol_perturb_integration",1.e-8);
+        pars.add("tol_background_integration",1e-9);
+
+        // high precision options from cl_permille.pre:
+        // precision file to be passed as input in order to achieve at least percent precision on scalar Cls
+        pars.add("hyper_flat_approximation_nu", 7000. );
+        pars.add("transfer_neglect_delta_k_S_t0", 0.17 );
+        pars.add("transfer_neglect_delta_k_S_t1", 0.05 );
+        pars.add("transfer_neglect_delta_k_S_t2", 0.17 );
+        pars.add("transfer_neglect_delta_k_S_e", 0.13 );
+        pars.add("delta_l_max", 1000 );
+
+
         std::unique_ptr<ClassEngine> CE = std::make_unique<ClassEngine>(pars, false);
 
         CE->getTk(ztarget_, tab_lnk_, tab_dc_, tab_db_, d_ncdm, tab_dtot_,
@@ -74,7 +101,9 @@ public:
     Omega_b_ = pcf_->GetValue<double>("cosmology","Omega_b");
     N_ur_    = pcf_->GetValueSafe<double>("cosmology","N_ur", 3.046);
     ztarget_ = pcf_->GetValueSafe<double>("cosmology","ztarget",0.0);
+    atarget_ = 1.0/(1.0+ztarget_);
     zstart_  = pcf_->GetValue<double>("setup","zstart");
+    astart_  = 1.0/(1.0+zstart_);
     double lbox = pcf_->GetValue<double>("setup","BoxLength");
     int nres = pcf_->GetValue<double>("setup","GridRes");
     kmax_    = 2.0*M_PI/lbox * nres/2 * sqrt(3) * 2.0; // 120% of spatial diagonal
@@ -102,6 +131,33 @@ public:
     gsl_spline_init(gsl_sp_tc_,   &tab_lnk_[0], &tab_tc_[0],   tab_lnk_.size());
     gsl_spline_init(gsl_sp_tb_,   &tab_lnk_[0], &tab_tb_[0],   tab_lnk_.size());
 
+    //--------------------------------------------------------------------------
+    // single fluid growing/decaying mode decomposition
+    //--------------------------------------------------------------------------
+    gsl_ia_Cplus_  = gsl_interp_accel_alloc();
+    gsl_ia_Cminus_ = gsl_interp_accel_alloc();
+    
+    gsl_sp_Cplus_  = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    gsl_sp_Cminus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    
+    tab_Cplus_.assign(tab_lnk_.size(),0);
+    tab_Cminus_.assign(tab_lnk_.size(),0);
+
+    std::ofstream ofs("grow_decay.txt");
+    
+    for( size_t i=0; i<tab_lnk_.size(); ++i ){
+      tab_Cplus_[i]  = (3.0/5.0 * tab_dtot_[i]/atarget_ - 2.0/5.0*tab_ttot_[i]/atarget_);
+      tab_Cminus_[i] = (2.0/5.0 * std::pow(atarget_, 1.5) *  ( tab_dtot_[i] + tab_ttot_[i] ));
+
+      ofs << std::exp(tab_lnk_[i]) << " " << tab_Cplus_[i] << " " << tab_Cminus_[i] << " " << tab_dtot_[i] << " " << tab_ttot_[i] << std::endl;
+    }
+    
+    gsl_spline_init(gsl_sp_Cplus_,   &tab_lnk_[0], &tab_Cplus_[0],   tab_lnk_.size());
+    gsl_spline_init(gsl_sp_Cminus_,  &tab_lnk_[0], &tab_Cminus_[0],  tab_lnk_.size());
+    //--------------------------------------------------------------------------
+    
+    
+
     kmin_ = std::exp(tab_lnk_[0]);
   
     tf_distinct_ = true; 

From e3f4032e181cc1ba209e9f32b120931ca19d0e8f Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Wed, 27 Nov 2019 22:55:30 +0100
Subject: [PATCH 034/130] updated class submodule

---
 external/class | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/class b/external/class
index b34d7f6..6f3abba 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b
+Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507

From 2cce64977af3f2514adc986825e24edc19575da3 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 29 Nov 2019 15:41:22 +0100
Subject: [PATCH 035/130] bcc plt working commit

---
 include/particle_plt.hh | 71 +++++++++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 27 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 96f07ff..0942cef 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -32,7 +32,7 @@ private:
         const ptrdiff_t nlattice = 16;
         const real_t dx = 1.0/real_t(nlattice);
 
-        const real_t eta = 4.0/nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells
+        const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells
         const real_t alpha = 1.0/std::sqrt(2)/eta;
         const real_t alpha2 = alpha*alpha;
         const real_t alpha3 = alpha2*alpha;
@@ -80,46 +80,58 @@ private:
 
         
         std::vector<vec3<real_t>> x;
-        for( ptrdiff_t i=-2*nlattice; i<=2*nlattice; i++ ){
-            for( ptrdiff_t j=-2*nlattice; j<=2*nlattice; j++ ){
-                for( ptrdiff_t k=-2*nlattice; k<=2*nlattice; k++ ){
+        // for( ptrdiff_t i=-2*nlattice; i<=2*nlattice; i++ ){
+        //     for( ptrdiff_t j=-2*nlattice; j<=2*nlattice; j++ ){
+        //         for( ptrdiff_t k=-2*nlattice; k<=2*nlattice; k++ ){
+        //             real_t dxp = dx*(real_t(i)*bcc_bravais[0][0]+real_t(j)*bcc_bravais[1][0]+real_t(k)*bcc_bravais[2][0]);
+        //             real_t dyp = dx*(real_t(i)*bcc_bravais[0][1]+real_t(j)*bcc_bravais[1][1]+real_t(k)*bcc_bravais[2][1]);
+        //             real_t dzp = dx*(real_t(i)*bcc_bravais[0][2]+real_t(j)*bcc_bravais[1][2]+real_t(k)*bcc_bravais[2][2]);
+
+        //             if( dxp>-1e-10&&dxp<1.0&&dyp>-1e-10&&dyp<1.0&&dzp>-1e-10&&dzp<1.0)              
+        //             {
+        //                 x.push_back({dxp,dyp,dzp});
+        //             }
+        //         }
+        //     }
+        // }
+        for( size_t i=0; i<nlattice; ++i ){
+            for( size_t j=0; j<nlattice; ++j ){
+                for( size_t k=0; k<nlattice; ++k ){
                     real_t dxp = dx*(real_t(i)*bcc_bravais[0][0]+real_t(j)*bcc_bravais[1][0]+real_t(k)*bcc_bravais[2][0]);
                     real_t dyp = dx*(real_t(i)*bcc_bravais[0][1]+real_t(j)*bcc_bravais[1][1]+real_t(k)*bcc_bravais[2][1]);
                     real_t dzp = dx*(real_t(i)*bcc_bravais[0][2]+real_t(j)*bcc_bravais[1][2]+real_t(k)*bcc_bravais[2][2]);
-
-                    if( dxp>-1e-10&&dxp<1.0&&dyp>-1e-10&&dyp<1.0&&dzp>-1e-10&&dzp<1.0)              
-                    {
-                        x.push_back({dxp,dyp,dzp});
-                    }
+                    dxp = std::fmod( 2.0+dxp, 1.0 );
+                    dyp = std::fmod( 2.0+dyp, 1.0 );
+                    dzp = std::fmod( 2.0+dzp, 1.0 );
+                    x.push_back( {dxp,dyp,dzp} );
                 }
             }
         }
         std::vector<mat3s<real_t>> a(x.size(),{0.0});
 
+        std::ofstream ofs("debug.txt");
 
         constexpr ptrdiff_t lnumber = 4, knumber = 4;
         for( size_t i=0,j=0; j< x.size(); ++j ){
             // r-part
             if( i==j )
             {
-                a[i](0,0) = 1.0/3.0;
-                a[i](1,1) = 1.0/3.0;
-                a[i](2,2) = 1.0/3.0; 
+                a[j](0,0) = 1.0/3.0;
+                a[j](1,1) = 1.0/3.0;
+                a[j](2,2) = 1.0/3.0; 
             }else{
 
                 for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
                     for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
                         for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){                      
-                            vec3<real_t> ai = {real_t(ix)*nlattice,real_t(iy)*nlattice,real_t(iz)*nlattice};
+                            vec3<real_t> ai = {real_t(ix),real_t(iy),real_t(iz)};
                             auto dr = x[i]-x[j];
-                            dr[0] -= ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0];
-                            dr[1] -= ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1];
-                            dr[2] -= ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2];
-                            real_t d = dr.norm();
-                            // std::cerr << dr.x << " " << dr.y << " " << dr.z << " " << greensftide_sr2(0,0,dr) << std::endl;
+                            dr[0] -= (ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]);
+                            dr[1] -= (ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]);
+                            dr[2] -= (ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]);
                             for( int mu=0; mu<3; ++mu ){
                                 for( int nu=mu; nu<3; ++nu ){
-                                    a[i](mu,nu) += greensftide_sr2(mu,nu,dr);
+                                    a[j](mu,nu) += greensftide_sr2(mu,nu,dr);
                                 }
                             }
                         }
@@ -140,10 +152,10 @@ private:
                                 ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1];
                                 ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2];
                                 real_t amodk2 = ak.norm_squared();
-                                real_t term = std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(dr)) / amodk2;// / std::pow(nlattice,3);
+                                real_t term = std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(dr)) / amodk2 / std::pow(nlattice,3);
                                 for( int mu=0; mu<3; ++mu ){
                                     for( int nu=mu; nu<3; ++nu ){
-                                        a[i](mu,nu) += ak[mu]*ak[nu]*term;
+                                        a[j](mu,nu) += ak[mu]*ak[nu]*term;
                                     }
                                 }
                             }
@@ -153,17 +165,22 @@ private:
 
             }
 
-
+            ofs << x[j].x << " " << x[j].y << " " << x[j].z << " " 
+                << a[j](0,0) << " " << a[j](0,1) << " " << a[j](0,2) << " " 
+                << a[j](1,1) << " " << a[j](1,2) << " " << a[j](2,2) << std::endl;
+                
             
         }
+
+        std::cout << "num grid points : " << x.size() << std::endl;
         
-        for( auto& m : a ){
-            std::cout << m(0,0) << std::endl;
-        }
+        // for( auto& m : a ){
+        //     std::cout << m(0,1) << " ";
+        // }
         
 
         //! sums mirrored copies of short-range component of Ewald sum
-        auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
+        /*auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
             real_t sr = 0.0;
             constexpr int N = 3; // number of repeated copies ±N per dimension
             int count = 0;
@@ -193,7 +210,7 @@ private:
                 }
             }
             return sr / count;
-        };
+        };*/
     }
     void init_D__old()
     {

From dcc59368edcc7e00acf8e5009f6f525a01c2631f Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 1 Dec 2019 11:28:17 +0100
Subject: [PATCH 036/130] working commit: dynamical matrix for bcc seems right,
 interpolation doesn't work yet, also too slow

---
 include/grid_fft.hh     |   3 +
 include/particle_plt.hh | 744 ++++++++++------------------------------
 2 files changed, 190 insertions(+), 557 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 88a938f..66c1a6f 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -77,6 +77,9 @@ public:
 
     void Setup();
 
+    //! return the number of data_t elements that we store in the container
+    size_t memsize( void ) const { return ntot_; }
+
     //! return the (local) size of dimension i
     size_t size(size_t i) const { return sizes_[i]; }
 
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 0942cef..7e7979b 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -24,12 +24,14 @@ private:
     const real_t mapratio_;
     Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
     Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
+    std::vector<vec3<real_t>> vectk_;
+    std::vector<vec3<int>> ico_, vecitk_;
 
     void init_D()
     {
         constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
 
-        const ptrdiff_t nlattice = 16;
+        const ptrdiff_t nlattice = ngrid_;//16;
         const real_t dx = 1.0/real_t(nlattice);
 
         const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells
@@ -58,12 +60,14 @@ private:
             auto r = d.norm();
             if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
             real_t val{0.0};
-            val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r);
+            val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha2*r*r);
             val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
-                (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
+                (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r*r)*r);
             return val;
         };
 
+        const int charge_multiplicity = 2;
+
         const std::vector<vec3<real_t>> bcc_bravais{
            {1.0,0.0,0.0},{0.0,1.0,0.0},{0.5,0.5,0.5} 
         };
@@ -78,140 +82,213 @@ private:
             {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
         };
 
+        const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_multiplicity;
+        const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5);
         
         std::vector<vec3<real_t>> x;
-        // for( ptrdiff_t i=-2*nlattice; i<=2*nlattice; i++ ){
-        //     for( ptrdiff_t j=-2*nlattice; j<=2*nlattice; j++ ){
-        //         for( ptrdiff_t k=-2*nlattice; k<=2*nlattice; k++ ){
-        //             real_t dxp = dx*(real_t(i)*bcc_bravais[0][0]+real_t(j)*bcc_bravais[1][0]+real_t(k)*bcc_bravais[2][0]);
-        //             real_t dyp = dx*(real_t(i)*bcc_bravais[0][1]+real_t(j)*bcc_bravais[1][1]+real_t(k)*bcc_bravais[2][1]);
-        //             real_t dzp = dx*(real_t(i)*bcc_bravais[0][2]+real_t(j)*bcc_bravais[1][2]+real_t(k)*bcc_bravais[2][2]);
+        std::vector<mat3s<real_t>> a(x.size(),{0.0});
+        constexpr ptrdiff_t lnumber = 4, knumber = 4;
+        const int numb = 1;
 
-        //             if( dxp>-1e-10&&dxp<1.0&&dyp>-1e-10&&dyp<1.0&&dzp>-1e-10&&dzp<1.0)              
-        //             {
-        //                 x.push_back({dxp,dyp,dzp});
-        //             }
-        //         }
-        //     }
-        // }
+        vectk_.assign(D_xx_.memsize(),vec3<real_t>());
+        ico_.assign(D_xx_.memsize(),vec3<int>());
+        vecitk_.assign(D_xx_.memsize(),vec3<int>());
+
+        D_xx_.zero(); 
+        D_xy_.zero(); 
+        D_xz_.zero(); 
+        D_yy_.zero(); 
+        D_yz_.zero(); 
+        D_zz_.zero(); 
+
+        #pragma omp parallel for
         for( size_t i=0; i<nlattice; ++i ){
             for( size_t j=0; j<nlattice; ++j ){
                 for( size_t k=0; k<nlattice; ++k ){
                     real_t dxp = dx*(real_t(i)*bcc_bravais[0][0]+real_t(j)*bcc_bravais[1][0]+real_t(k)*bcc_bravais[2][0]);
                     real_t dyp = dx*(real_t(i)*bcc_bravais[0][1]+real_t(j)*bcc_bravais[1][1]+real_t(k)*bcc_bravais[2][1]);
                     real_t dzp = dx*(real_t(i)*bcc_bravais[0][2]+real_t(j)*bcc_bravais[1][2]+real_t(k)*bcc_bravais[2][2]);
-                    dxp = std::fmod( 2.0+dxp, 1.0 );
-                    dyp = std::fmod( 2.0+dyp, 1.0 );
-                    dzp = std::fmod( 2.0+dzp, 1.0 );
-                    x.push_back( {dxp,dyp,dzp} );
-                }
-            }
-        }
-        std::vector<mat3s<real_t>> a(x.size(),{0.0});
+                    const vec3<real_t> cdr( {std::fmod( 2.0+dxp, 1.0 ),std::fmod( 2.0+dyp, 1.0 ),std::fmod( 2.0+dzp, 1.0 )} );
+                    vec3<real_t> ak;
 
-        std::ofstream ofs("debug.txt");
+                    for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
+                        for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
+                            for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){                      
+                                vec3<real_t> ai = {real_t(ix),real_t(iy),real_t(iz)};
+                                vec3<real_t> dr( cdr );
+                                dr[0] -= (ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]);
+                                dr[1] -= (ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]);
+                                dr[2] -= (ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]);
 
-        constexpr ptrdiff_t lnumber = 4, knumber = 4;
-        for( size_t i=0,j=0; j< x.size(); ++j ){
-            // r-part
-            if( i==j )
-            {
-                a[j](0,0) = 1.0/3.0;
-                a[j](1,1) = 1.0/3.0;
-                a[j](2,2) = 1.0/3.0; 
-            }else{
-
-                for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
-                    for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
-                        for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){                      
-                            vec3<real_t> ai = {real_t(ix),real_t(iy),real_t(iz)};
-                            auto dr = x[i]-x[j];
-                            dr[0] -= (ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]);
-                            dr[1] -= (ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]);
-                            dr[2] -= (ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]);
-                            for( int mu=0; mu<3; ++mu ){
-                                for( int nu=mu; nu<3; ++nu ){
-                                    a[j](mu,nu) += greensftide_sr2(mu,nu,dr);
+                                D_xx_.relem(i,j,k) += greensftide_sr2(0,0,dr) * charge;
+                                D_xy_.relem(i,j,k) += greensftide_sr2(0,1,dr) * charge;
+                                D_xz_.relem(i,j,k) += greensftide_sr2(0,2,dr) * charge;
+                                D_yy_.relem(i,j,k) += greensftide_sr2(1,1,dr) * charge;
+                                D_yz_.relem(i,j,k) += greensftide_sr2(1,2,dr) * charge;
+                                D_zz_.relem(i,j,k) += greensftide_sr2(2,2,dr) * charge;
+                             
+                                vec3<real_t> bk = {real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice};
+                                if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
+                                    ak.x = bk.x*bcc_reciprocal[0][0]+bk.y*bcc_reciprocal[1][0]+bk.z*bcc_reciprocal[2][0];
+                                    ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1];
+                                    ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2];
+                                    real_t amodk2 = ak.norm_squared();
+                                    real_t term = charge*std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(cdr)) / amodk2 / std::pow(nlattice,3);
+                                    D_xx_.relem(i,j,k) += ak.x*ak.x*term;
+                                    D_xy_.relem(i,j,k) += ak.x*ak.y*term;
+                                    D_xz_.relem(i,j,k) += ak.x*ak.z*term;
+                                    D_yy_.relem(i,j,k) += ak.y*ak.y*term;
+                                    D_yz_.relem(i,j,k) += ak.y*ak.z*term;
+                                    D_zz_.relem(i,j,k) += ak.z*ak.z*term;
                                 }
                             }
                         }
-                    }
+                    }   
                 }
             }
+        }
 
-            // k-part
-            if( i!=j ){
-                auto dr = x[i]-x[j];
-                real_t d = dr.norm();
-                for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){
-                    for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){
-                        for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){    
-                            vec3<real_t> ak, bk = {real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice};
-                            if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
-                                ak.x = bk.x*bcc_reciprocal[0][0]+bk.y*bcc_reciprocal[1][0]+bk.z*bcc_reciprocal[2][0];
-                                ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1];
-                                ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2];
-                                real_t amodk2 = ak.norm_squared();
-                                real_t term = std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(dr)) / amodk2 / std::pow(nlattice,3);
-                                for( int mu=0; mu<3; ++mu ){
-                                    for( int nu=mu; nu<3; ++nu ){
-                                        a[j](mu,nu) += ak[mu]*ak[nu]*term;
+        // fix r=0 with background density (added later in Fourier space)
+        D_xx_.relem(0,0,0) = 1.0/3.0;
+        D_xy_.relem(0,0,0) = 0.0;
+        D_xz_.relem(0,0,0) = 0.0;
+        D_yy_.relem(0,0,0) = 1.0/3.0;
+        D_yz_.relem(0,0,0) = 0.0;
+        D_zz_.relem(0,0,0) = 1.0/3.0;
+
+        D_xx_.FourierTransformForward();
+        D_xy_.FourierTransformForward();
+        D_xz_.FourierTransformForward();
+        D_yy_.FourierTransformForward();
+        D_yz_.FourierTransformForward();
+        D_zz_.FourierTransformForward();
+
+        if (CONFIG::MPI_task_rank == 0)
+            unlink("debug.hdf5");
+        D_xx_.Write_to_HDF5("debug.hdf5","Dxx");
+        D_xy_.Write_to_HDF5("debug.hdf5","Dxy");
+        D_xz_.Write_to_HDF5("debug.hdf5","Dxz");
+        D_yy_.Write_to_HDF5("debug.hdf5","Dyy");
+        D_yz_.Write_to_HDF5("debug.hdf5","Dyz");
+        D_zz_.Write_to_HDF5("debug.hdf5","Dzz");
+
+        std::ofstream ofs2("test_brillouin.txt");
+        
+        #pragma omp parallel
+        {
+            // thread private matrix representation
+            mat3s<real_t> D;
+            vec3<real_t> eval, evec1, evec2, evec3;
+
+            #pragma omp for
+            for( size_t i=0; i<D_xx_.size(0); i++ )
+            {
+                for( size_t j=0; j<D_xx_.size(1); j++ )
+                {
+                    for( size_t k=0; k<D_xx_.size(2); k++ )
+                    {
+                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                        const real_t kmod  = kv.norm()/mapratio_/boxlen_;
+
+                        // put matrix elements into actual matrix
+                        D = {   std::real(D_xx_.kelem(i,j,k))/fft_norm12, 
+                                std::real(D_xy_.kelem(i,j,k))/fft_norm12,
+                                std::real(D_xz_.kelem(i,j,k))/fft_norm12,
+                                std::real(D_yy_.kelem(i,j,k))/fft_norm12, 
+                                std::real(D_yz_.kelem(i,j,k))/fft_norm12, 
+                                std::real(D_zz_.kelem(i,j,k))/fft_norm12 };
+                        
+                        // compute eigenstructure of matrix
+                        D.eigen(eval, evec1, evec2, evec3);
+
+                        D_xx_.kelem(i,j,k) = eval[2];
+                        D_yy_.kelem(i,j,k) = eval[1];
+                        D_zz_.kelem(i,j,k) = eval[0];
+
+                        D_xy_.kelem(i,j,k) = evec3[0];
+                        D_xz_.kelem(i,j,k) = evec3[1];
+                        D_yz_.kelem(i,j,k) = evec3[2];
+
+
+                        vec3<real_t> a({0.,0.,0.});
+
+                        auto idx = D_xx_.get_idx(i,j,k);
+
+                        vec3<real_t> ar = D_xx_.get_k<real_t>(i,j,k) / (twopi*ngrid_);
+                        // vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                        
+                        for( int l=0; l<3; l++ ){
+                            a[l] = 0.0;
+                            for( int m=0; m<3; m++){
+                                // project k on reciprocal basis
+                                a[l] += ar[m]*bcc_reciprocal[m][l];
+                            }
+                        }
+
+                        // translate the k-vectors into the "candidate" FBZ
+                        vec3<real_t> anum;
+                        for( int l1=-numb; l1<=numb; ++l1 ){
+                            anum[0] = real_t(l1);
+                            for( int l2=-numb; l2<=numb; ++l2 ){
+                                anum[1] = real_t(l2);
+                                for( int l3=-numb; l3<=numb; ++l3 ){
+                                    anum[2] = real_t(l3);
+
+                                    vectk_[idx] = a;
+
+                                    for( int l=0; l<3; l++ ){
+                                        for( int m=0; m<3; m++){
+                                            // project k on reciprocal basis
+                                            vectk_[idx][l] += anum[m]*bcc_reciprocal[m][l];
+                                        }
+                                    }
+                                    // check if in first Brillouin zone
+                                    bool btest=true;
+                                    for( size_t l=0; l<bcc_normals.size(); ++l ){
+                                        real_t amod2 = 0.0;
+                                        real_t scalar = 0.0;
+                                        for( int m=0; m<3; m++ ){
+                                            amod2  += bcc_normals[l][m]*bcc_normals[l][m];
+                                            scalar += bcc_normals[l][m]*vectk_[idx][m];
+                                        }
+                                        //real_t amod = std::sqrt(amod2);
+                                        //if( scalar/amod > amod*1.0001 ){ btest=false; break; }
+                                        if( scalar > 1.01 * amod2 ){ btest=false; break; }
+                                    }
+                                    if( btest ){
+                                        
+                                        vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi);
+                                        vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi);
+                                        vecitk_[idx][2] = std::round(vectk_[idx][2]*(ngrid_)/twopi);
+
+                                        ico_[idx][0] = std::round((ar[0]+l1) * ngrid_);
+                                        ico_[idx][1] = std::round((ar[1]+l2) * ngrid_);
+                                        ico_[idx][2] = std::round((ar[2]+l3) * ngrid_);
+
+                                        ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
+                                        // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk_[idx].x << ", " << vecitk_[idx].y << ", " << vecitk_[idx].z << ", " << ico_[idx][0] << ", " << ico_[idx][1] << ", " << ico_[idx][2] << std::endl;
+                                        // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk_[idx].x << ", " << -vecitk_[idx].y << ", " << -vecitk_[idx].z << ", " << ico_[idx][0] << ", " << ico_[idx][1] << ", " << ico_[idx][2] << std::endl;
+                                        
+                                        goto endloop;
                                     }
                                 }
                             }
-                        }
+                        }                    endloop: ;
                     }
                 }
-
             }
-
-            ofs << x[j].x << " " << x[j].y << " " << x[j].z << " " 
-                << a[j](0,0) << " " << a[j](0,1) << " " << a[j](0,2) << " " 
-                << a[j](1,1) << " " << a[j](1,2) << " " << a[j](2,2) << std::endl;
-                
-            
         }
 
-        std::cout << "num grid points : " << x.size() << std::endl;
-        
-        // for( auto& m : a ){
-        //     std::cout << m(0,1) << " ";
-        // }
         
 
-        //! sums mirrored copies of short-range component of Ewald sum
-        /*auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
-            real_t sr = 0.0;
-            constexpr int N = 3; // number of repeated copies ±N per dimension
-            int count = 0;
-            for( int i=-N; i<=N; ++i ){
-                for( int j=-N; j<=N; ++j ){
-                    for( int k=-N; k<=N; ++k ){
-                        if( mu!=nu ){
-
-                        }
-
-                        if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
-                            //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
-                            sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
-                            sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} );
-                            count += 2;
-
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
-                        }
-                    }
-                }
-            }
-            return sr / count;
-        };*/
+        D_xx_.Write_to_HDF5("debug.hdf5","mu1");
+        D_xy_.Write_to_HDF5("debug.hdf5","mu2");
+        D_xz_.Write_to_HDF5("debug.hdf5","mu3");
+        D_yy_.Write_to_HDF5("debug.hdf5","e1x");
+        D_yz_.Write_to_HDF5("debug.hdf5","e1y");
+        D_zz_.Write_to_HDF5("debug.hdf5","e1z");
     }
+
     void init_D__old()
     {
         constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
@@ -444,108 +521,9 @@ private:
     }
 
 
-    void compute_vectk( )
-    {
-        constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
-
-        const std::vector<vec3<real_t>> bcc_normals{
-            {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
-            {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
-            {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
-        };
-
-        const std::vector<vec3<real_t>> bcc_reciprocal{
-            {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
-        }; 
-
-        std::vector<vec3<real_t>> vectk;
-        std::vector<vec3<int>> ico, vecitk;
-        vectk.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3<real_t>());
-        ico.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3<int>());
-        vecitk.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3<int>());
-
-        std::ofstream ofs2("test_brillouin.txt");
-
-        const int numb = 1;
-        for( size_t i=0; i<D_xx_.size(0); i++ ){
-            mat3s<real_t> D;
-            vec3<real_t> eval, evec1, evec2, evec3;
-            vec3<real_t> a({0.,0.,0.});
-
-            for( size_t j=0; j<D_xx_.size(1); j++ ){
-
-                for( size_t k=0; k<D_xx_.size(2); k++ ){
-
-                    auto idx = D_xx_.get_idx(i,j,k);
-                    vec3<real_t> ar = D_xx_.get_k<real_t>(i,j,k) / (twopi*ngrid_);
-                    vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-                    
-                    for( int l=0; l<3; l++ ){
-                        a[l] = 0.0;
-                        for( int m=0; m<3; m++){
-                            // project k on reciprocal basis
-                            a[l] += ar[m]*bcc_reciprocal[m][l];
-                        }
-                    }
-
-                    // translate the k-vectors into the "candidate" FBZ
-                    vec3<real_t> anum;
-                    for( int l1=-numb; l1<=numb; ++l1 ){
-                        anum[0] = real_t(l1);
-                        for( int l2=-numb; l2<=numb; ++l2 ){
-                            anum[1] = real_t(l2);
-                            for( int l3=-numb; l3<=numb; ++l3 ){
-                                anum[2] = real_t(l3);
-
-                                vectk[idx] = a;
-
-                                for( int l=0; l<3; l++ ){
-                                    for( int m=0; m<3; m++){
-                                        // project k on reciprocal basis
-                                        vectk[idx][l] += anum[m]*bcc_reciprocal[m][l];
-                                    }
-                                }
-                                // check if in first Brillouin zone
-                                bool btest=true;
-                                for( size_t l=0; l<bcc_normals.size(); ++l ){
-                                    real_t amod2 = 0.0;
-                                    real_t scalar = 0.0;
-                                    for( int m=0; m<3; m++ ){
-                                        amod2  += bcc_normals[l][m]*bcc_normals[l][m];
-                                        scalar += bcc_normals[l][m]*vectk[idx][m];
-                                    }
-                                    //real_t amod = std::sqrt(amod2);
-                                    //if( scalar/amod > amod*1.0001 ){ btest=false; break; }
-                                    if( scalar > 1.01 * amod2 ){ btest=false; break; }
-                                }
-                                if( btest ){
-                                    
-                                    vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid_)/twopi);
-                                    vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid_)/twopi);
-                                    vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid_)/twopi);
-
-                                    ico[idx][0] = std::round((ar[0]+l1) * ngrid_);
-                                    ico[idx][1] = std::round((ar[1]+l2) * ngrid_);
-                                    ico[idx][2] = std::round((ar[2]+l3) * ngrid_);
-
-                                    ofs2 << vectk[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
-                                    // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
-                                    // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk[idx].x << ", " << -vecitk[idx].y << ", " << -vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
-                                    
-                                    goto endloop;
-                                }
-                            }
-                        }
-                    }
-                    endloop: ;
-                }
-            }
-        }            
-    }
-
 public:
     // real_t boxlen, size_t ngridother
-    explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 )
+    explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=16 )
     : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
       ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ), 
       ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
@@ -582,7 +560,6 @@ public:
         csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
         
         init_D();
-        compute_vectk();
 
         csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
     }
@@ -603,351 +580,4 @@ public:
 
 };
 
-#if 0
-inline void test_plt( void ){
-
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    csoca::ilog << "Testing PLT implementation..." << std::endl;
-
-    lattice_gradient lg( 64 );
-
-    return;
-
-    constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
-
-    const std::vector<vec3<real_t>> bcc_normals{
-        {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
-        {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
-        {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
-    };
-
-    const std::vector<vec3<real_t>> bcc_reciprocal{
-        {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
-    };    
-
-    /*const std::vector<vec3<real_t>> fcc_reciprocal{
-        {-2.,0.,2.}, {2.,0.,0.}, {1.,1.,-1.}
-    };*/
-
-    real_t boxlen = 1.0;
-    
-    size_t ngrid  = 64;
-    size_t npgrid = 1;
-    size_t dpg    = ngrid/npgrid;
-    size_t nump   = npgrid*npgrid*npgrid;
-
-    real_t pweight = 1.0/real_t(nump);
-    real_t eta = 2.0 * boxlen/ngrid;
-
-    const real_t alpha = 1.0/std::sqrt(2)/eta;
-    const real_t alpha2 = alpha*alpha;
-    const real_t alpha3 = alpha2*alpha;
-    const real_t sqrtpi = std::sqrt(M_PI);
-    const real_t pi3halfs = std::pow(M_PI,1.5);
-
-    const real_t dV( std::pow( boxlen/ngrid, 3 ) );
-    Grid_FFT<real_t,false> rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-
-    auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
-
-    auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
-        auto d = vR-vP;
-        auto r = d.norm();
-
-        if( r< 1e-14 ) return 0.0;
-
-        real_t val = 0.0;
-        val -= d[mu]*d[nu]/(r*r) * alpha3/pi3halfs * std::exp(-alpha*alpha*r*r);
-        val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
-            (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
-        return pweight * val;
-    };
-
-    // sc
-    rho.zero();
-    rho.relem(0,0,0) = pweight/dV;
-    // rho.relem(0,0,0) = pweight/dV/2;
-    // rho.relem(ngrid/2,ngrid/2,ngrid/2) = pweight/dV/2;
-
-    rho.FourierTransformForward();
-    rho.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
-        real_t kmod = k.norm();
-        std::cerr << x << std::endl;
-        return -x * std::exp(-0.5*eta*eta*kmod*kmod) / (kmod*kmod);
-    });
-    rho.zero_DC_mode();
-
-    auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
-        real_t sr = 0.0;
-        int N = 3;
-        for( int i=-N; i<=N; ++i ){
-            for( int j=-N; j<=N; ++j ){
-                for( int k=-N; k<=N; ++k ){
-                    if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
-                        sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
-                        
-                        // sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} )/2;
-                        
-                        // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
-                        // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
-                        // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
-                        // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
-                        // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
-                        // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
-                        // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
-                        // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
-                    }
-                }
-            }
-        }
-        return sr;
-    };
-
-    Grid_FFT<real_t,false> D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t,false> D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t,false> D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t,false> D_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t,false> D_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    Grid_FFT<real_t,false> D_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-
-    #pragma omp parallel for
-    for( size_t i=0; i<ngrid; i++ ){
-        vec3<real_t>  p;
-        p.x = real_t(i)/ngrid;
-        for( size_t j=0; j<ngrid; j++ ){
-            p.y = real_t(j)/ngrid;
-            for( size_t k=0; k<ngrid; k++ ){
-                p.z = real_t(k)/ngrid;
-                D_xx.relem(i,j,k) = evaluate_D(0,0,p);
-                D_xy.relem(i,j,k) = evaluate_D(0,1,p);
-                D_xz.relem(i,j,k) = evaluate_D(0,2,p);
-                D_yy.relem(i,j,k) = evaluate_D(1,1,p);
-                D_yz.relem(i,j,k) = evaluate_D(1,2,p);
-                D_zz.relem(i,j,k) = evaluate_D(2,2,p);
-            }   
-        }    
-    }
-
-    D_xx.relem(0,0,0) = 0.0;
-    D_xy.relem(0,0,0) = 0.0;
-    D_xz.relem(0,0,0) = 0.0;
-    D_yy.relem(0,0,0) = 0.0;
-    D_yz.relem(0,0,0) = 0.0;
-    D_zz.relem(0,0,0) = 0.0;
-
-    D_xx.FourierTransformForward();
-    D_xy.FourierTransformForward();
-    D_xz.FourierTransformForward();
-    D_yy.FourierTransformForward();
-    D_yz.FourierTransformForward();
-    D_zz.FourierTransformForward();
-
-    
-
-    real_t nfac = 1.0/std::pow(real_t(ngrid),1.5);
-
-    real_t kNyquist = M_PI/boxlen * ngrid;
-
-    #pragma omp parallel for
-    for( size_t i=0; i<D_xx.size(0); i++ ){
-        mat3s<real_t> D;
-        vec3<real_t> eval, evec1, evec2, evec3;
-        for( size_t j=0; j<D_xx.size(1); j++ ){
-            for( size_t k=0; k<D_xx.size(2); k++ ){
-                vec3<real_t> kv = D_xx.get_k<real_t>(i,j,k);
-
-                D_xx.kelem(i,j,k) = (D_xx.kelem(i,j,k) - kv[0]*kv[0] * rho.kelem(i,j,k))*nfac + 1.0/3.0;
-                D_xy.kelem(i,j,k) = (D_xy.kelem(i,j,k) - kv[0]*kv[1] * rho.kelem(i,j,k))*nfac;
-                D_xz.kelem(i,j,k) = (D_xz.kelem(i,j,k) - kv[0]*kv[2] * rho.kelem(i,j,k))*nfac;
-                D_yy.kelem(i,j,k) = (D_yy.kelem(i,j,k) - kv[1]*kv[1] * rho.kelem(i,j,k))*nfac + 1.0/3.0;;
-                D_yz.kelem(i,j,k) = (D_yz.kelem(i,j,k) - kv[1]*kv[2] * rho.kelem(i,j,k))*nfac;
-                D_zz.kelem(i,j,k) = (D_zz.kelem(i,j,k) - kv[2]*kv[2] * rho.kelem(i,j,k))*nfac + 1.0/3.0;;
-
-                D = { std::real(D_xx.kelem(i,j,k)), std::real(D_xy.kelem(i,j,k)), std::real(D_xz.kelem(i,j,k)),
-                      std::real(D_yy.kelem(i,j,k)), std::real(D_yz.kelem(i,j,k)), std::real(D_zz.kelem(i,j,k)) };
-                
-                D.eigen(eval, evec1, evec2, evec3);
-
-                D_xx.kelem(i,j,k) = eval[2];
-                D_yy.kelem(i,j,k) = eval[1];
-                D_zz.kelem(i,j,k) = eval[0];
-
-                D_xy.kelem(i,j,k) = evec3[0];
-                D_xz.kelem(i,j,k) = evec3[1];
-                D_yz.kelem(i,j,k) = evec3[2];
-            }
-        }
-    }
-
-#if 1
-    std::vector<vec3<real_t>> vectk;
-    std::vector<vec3<int>> ico, vecitk;
-    vectk.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3<real_t>());
-    ico.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3<int>());
-    vecitk.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3<int>());
-
-    std::ofstream ofs2("test_brillouin.txt");
-
-    const int numb = 1;
-    for( size_t i=0; i<D_xx.size(0); i++ ){
-        mat3s<real_t> D;
-        vec3<real_t> eval, evec1, evec2, evec3;
-        vec3<real_t> a({0.,0.,0.});
-
-        for( size_t j=0; j<D_xx.size(1); j++ ){
-
-            for( size_t k=0; k<D_xx.size(2); k++ ){
-
-                auto idx = D_xx.get_idx(i,j,k);
-                vec3<real_t> ar = D_xx.get_k<real_t>(i,j,k) / (twopi*ngrid);
-                vec3<real_t> kv = D_xx.get_k<real_t>(i,j,k);
-                
-                for( int l=0; l<3; l++ ){
-                    a[l] = 0.0;
-                    for( int m=0; m<3; m++){
-                        // project k on reciprocal basis
-                        a[l] += ar[m]*bcc_reciprocal[m][l];
-                    }
-                }
-
-                // translate the k-vectors into the "candidate" FBZ
-                vec3<real_t> anum;
-                for( int l1=-numb; l1<=numb; ++l1 ){
-                    anum[0] = real_t(l1);
-                    for( int l2=-numb; l2<=numb; ++l2 ){
-                        anum[1] = real_t(l2);
-                        for( int l3=-numb; l3<=numb; ++l3 ){
-                            anum[2] = real_t(l3);
-
-                            vectk[idx] = a;
-
-                            for( int l=0; l<3; l++ ){
-                                for( int m=0; m<3; m++){
-                                    // project k on reciprocal basis
-                                    vectk[idx][l] += anum[m]*bcc_reciprocal[m][l];
-                                }
-                            }
-                            // check if in first Brillouin zone
-                            bool btest=true;
-                            for( size_t l=0; l<bcc_normals.size(); ++l ){
-                                real_t amod2 = 0.0;
-                                real_t scalar = 0.0;
-                                for( int m=0; m<3; m++ ){
-                                    amod2  += bcc_normals[l][m]*bcc_normals[l][m];
-                                    scalar += bcc_normals[l][m]*vectk[idx][m];
-                                }
-                                real_t amod = std::sqrt(amod2);
-                                //if( scalar/amod > amod*1.0001 ){ btest=false; break; }
-                                if( scalar > 1.01 * amod2 ){ btest=false; break; }
-                            }
-                            if( btest ){
-                                // int is = (i>ngrid/2)? i-ngrid : i;
-                                // int js = (j>ngrid/2)? j-ngrid : j;
-                                // int ks = (k>ngrid/2)? k-ngrid : k;
-                                
-                                vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid)/twopi);
-                                vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid)/twopi);
-                                vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid)/twopi);
-
-                                ico[idx][0] = std::round((ar[0]+l1) * ngrid);
-                                ico[idx][1] = std::round((ar[1]+l2) * ngrid);
-                                ico[idx][2] = std::round((ar[2]+l3) * ngrid);
-
-                                assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] - vectk[idx][0] ) < 1e-12 );
-                                assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] - vectk[idx][1] ) < 1e-12 );
-                                assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][2]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][2]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][2] - vectk[idx][2] ) < 1e-12 );
-                                
-
-                                
-                                // if( ico[idx][2] < 0 ){
-                                //     ico[idx][0] = -ico[idx][0];
-                                //     ico[idx][1] = -ico[idx][1];
-                                //     ico[idx][2] = -ico[idx][2];
-                                // }
-
-                                // ico[idx][0] = (ico[idx][0]+ngrid)%ngrid;
-                                // ico[idx][1] = (ico[idx][1]+ngrid)%ngrid;
-
-                                // if( vectk[idx][2] < 0 ){
-                                //     vectk[idx][0] = - vectk[idx][0];
-                                //     vectk[idx][1] = - vectk[idx][1];
-                                //     vectk[idx][2] = - vectk[idx][2];
-                                // }
-
-                                // if( vecitk[idx][2] < 0 ){
-                                //     vecitk[idx][0] = -vecitk[idx][0];
-                                //     vecitk[idx][1] = -vecitk[idx][1];
-                                //     vecitk[idx][2] = -vecitk[idx][2];
-                                // }
-                                //vecitk[idx][0] = (vecitk[idx][0]+ngrid)%ngrid;
-                                //vecitk[idx][1] = (vecitk[idx][1]+ngrid)%ngrid;
-                                //vecitk[idx][2] = (vecitk[idx][2]+ngrid)%ngrid;
-                                
-                                
-
-                                //vecitk[idx][0] = (vecitk[idx][0]<0)? vecitk[idx][0]+ngrid : vecitk[idx][0];;
-                                //vecitk[idx][1] = (vecitk[idx][1]<0)? vecitk[idx][1]+ngrid : vecitk[idx][1];
-                                
-
-
-                                //ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vectk[idx].x*(ngrid)/twopi << ", " << vectk[idx].y*(ngrid)/twopi << ", " << vectk[idx].z*(ngrid)/twopi << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
-                                ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
-                                ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk[idx].x << ", " << -vecitk[idx].y << ", " << -vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl;
-                                
-                                // std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] << " " <<  vectk[idx][0] << std::endl;
-                                
-                                // std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] << " " <<  vectk[idx][0] << std::endl;
-                                //std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] << " " <<  vectk[idx][1] << std::endl;
-                                // assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] - vectk[idx][1] ) < 1e-12 );
-                                goto endloop;
-                            }
-                        }
-                    }
-                }
-                endloop: ;
-
-                //D_xx.kelem(i,j,k) = D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]);
-                // D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]) = D_xx.kelem(i,j,k);
-                //D_xx.kelem(i,j,k) = D_xx.kelem(i+vecitk[idx][0],j+vecitk[idx][1],k+vecitk[idx][2]);
-            }
-        }
-            
-    }
-
-#endif
-
-    std::ofstream ofs("test_ewald.txt");
-    for( size_t i=0; i<D_xx.size(0); i++ ){
-        for( size_t j=0; j<D_xx.size(1); j++ ){
-            for( size_t k=0; k<D_xx.size(2); k++ ){
-                vec3<real_t> kv = D_xx.get_k<real_t>(i,j,k);
-                ofs << std::setw(16) << kv.norm() / kNyquist
-                    << std::setw(16) << std::real(D_xx.kelem(i,j,k))
-                    << std::setw(16) << std::real(D_yy.kelem(i,j,k))
-                    << std::setw(16) << std::real(D_zz.kelem(i,j,k))
-                    << std::setw(16) << kv[0]
-                    << std::setw(16) << kv[1]
-                    << std::setw(16) << kv[2]
-                    << std::endl;
-            }
-        }
-    }
-
-
-    std::string filename("plt_test.hdf5");
-    unlink(filename.c_str());
-#if defined(USE_MPI)
-    MPI_Barrier(MPI_COMM_WORLD);
-#endif
-//     rho.Write_to_HDF5(filename, "rho");
-    D_xx.Write_to_HDF5(filename, "omega1");
-    D_yy.Write_to_HDF5(filename, "omega2");
-    D_zz.Write_to_HDF5(filename, "omega3");
-    D_xy.Write_to_HDF5(filename, "e1_x");
-    D_xz.Write_to_HDF5(filename, "e1_y");
-    D_yz.Write_to_HDF5(filename, "e1_z");
-
-}
-#endif
-
 }
\ No newline at end of file

From 0ea91247e2171a9d4fe7f68cf2ab969d85894800 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 1 Dec 2019 14:34:28 +0100
Subject: [PATCH 037/130] working commit: optimizations/cleanup

---
 include/mat3.hh         | 157 ++++++++++++++++++++----------
 include/particle_plt.hh | 207 +++++++++++++++++++++-------------------
 include/vec3.hh         |  49 +++++++---
 3 files changed, 254 insertions(+), 159 deletions(-)

diff --git a/include/mat3.hh b/include/mat3.hh
index 04ac0ac..3c28f13 100644
--- a/include/mat3.hh
+++ b/include/mat3.hh
@@ -4,87 +4,122 @@
 #include <vec3.hh>
 
 template<typename T>
-class mat3s{
+class mat3{
 protected:
     std::array<T,9> data_;
     gsl_matrix_view m_;
     gsl_vector *eval_;
     gsl_matrix *evec_;
 	gsl_eigen_symmv_workspace * wsp_;
+    bool bdid_alloc_gsl_;
 						
     void init_gsl(){
-        m_ = gsl_matrix_view_array (&data_[0], 3, 3);
-        eval_ = gsl_vector_alloc (3);
-        evec_ = gsl_matrix_alloc (3, 3);
-        wsp_ = gsl_eigen_symmv_alloc (3);
+        // allocate memory for GSL operations if we haven't done so yet
+        if( !bdid_alloc_gsl_ )
+        {
+            m_ = gsl_matrix_view_array (&data_[0], 3, 3);
+            eval_ = gsl_vector_alloc (3);
+            evec_ = gsl_matrix_alloc (3, 3);
+            wsp_ = gsl_eigen_symmv_alloc (3);
+            bdid_alloc_gsl_ = true;
+        }
     }
 
     void free_gsl(){
-        gsl_eigen_symmv_free (wsp_);
-        gsl_vector_free (eval_);
-        gsl_matrix_free (evec_);
+        // free memory for GSL operations if it was allocated
+        if( bdid_alloc_gsl_ )
+        {
+            gsl_eigen_symmv_free (wsp_);
+            gsl_vector_free (eval_);
+            gsl_matrix_free (evec_);
+        }
     }
 
 public:
 
-    mat3s(){
-        this->init_gsl();
-    }
+    mat3()
+    : bdid_alloc_gsl_(false) 
+    {}
 
     //! copy constructor
-    mat3s( const mat3s<T> &m)
-    : data_(m.data_){
-        this->init_gsl();
-    }
+    mat3( const mat3<T> &m)
+    : data_(m.data_), bdid_alloc_gsl_(false) 
+    {}
     
     //! move constructor
-    mat3s( mat3s<T> &&m)
-    : data_(std::move(m.data_)){
-        this->init_gsl();
-    }
+    mat3( mat3<T> &&m)
+    : data_(std::move(m.data_)), bdid_alloc_gsl_(false) 
+    {}
 
-    //! construct vec3 from initializer list
+    //! construct mat3 from initializer list
     template<typename ...E>
-    mat3s(E&&...e) 
-    : data_{{std::forward<E>(e)...}}{
-        // resort into symmetrix matrix
-        data_[8] = data_[5];
-        data_[7] = data_[4];
-        data_[6] = data_[2];
-        data_[5] = data_[4];
-        data_[4] = data_[3];
-        data_[3] = data_[1];
-        this->init_gsl();
-    }
+    mat3(E&&...e) 
+    : data_{{std::forward<E>(e)...}}, bdid_alloc_gsl_(false)
+    {}
 
-    mat3s<T>& operator=(const mat3s<T>& m){
+    mat3<T>& operator=(const mat3<T>& m) noexcept{
         data_ = m.data_;
         return *this;
     }
 
-    mat3s<T>& operator=(const mat3s<T>&& m){
+    mat3<T>& operator=(const mat3<T>&& m) noexcept{
         data_ = std::move(m.data_);
         return *this;
     }
-    
-    //! bracket index access to vector components
-    T &operator[](size_t i){ return data_[i];}
-    
-    //! const bracket index access to vector components
-    const T &operator[](size_t i) const { return data_[i]; }
-
-    //! matrix 2d index access
-    T &operator()(size_t i, size_t j){ return data_[3*i+j]; }
-
-    //! const matrix 2d index access
-    const T &operator()(size_t i, size_t j) const { return data_[3*i+j]; }
 
     //! destructor
-    ~mat3s(){
+    ~mat3(){
         this->free_gsl();
     }
+    
+    //! bracket index access to vector components
+    T &operator[](size_t i) noexcept { return data_[i];}
+    
+    //! const bracket index access to vector components
+    const T &operator[](size_t i) const noexcept { return data_[i]; }
+
+    //! matrix 2d index access
+    T &operator()(size_t i, size_t j) noexcept { return data_[3*i+j]; }
+
+    //! const matrix 2d index access
+    const T &operator()(size_t i, size_t j) const noexcept { return data_[3*i+j]; }
+
+    //! in-place addition
+    mat3<T>& operator+=( const mat3<T>& rhs ) noexcept{
+        for (size_t i = 0; i < 9; ++i) {
+           (*this)[i] += rhs[i];
+        }
+        return *this;
+    }
+
+    //! in-place subtraction
+    mat3<T>& operator-=( const mat3<T>& rhs ) noexcept{
+        for (size_t i = 0; i < 9; ++i) {
+           (*this)[i] -= rhs[i];
+        }
+        return *this;
+    }
+
+    void zero() noexcept{
+        for (size_t i = 0; i < 9; ++i) data_[i]=0;
+    }
+
+    void eigen( vec3<T>& evals, vec3<T>& evec1, vec3<T>& evec2, vec3<T>& evec3 )
+    {
+        // for( auto x : data_ ){
+        //     std::cerr << x << " " ;
+        // }
+        // std::cerr << std::endl;
+        // resort into symmetrix matrix
+        // data_[8] = data_[5];
+        // data_[7] = data_[4];
+        // data_[6] = data_[2];
+        // data_[5] = data_[4];
+        // data_[4] = data_[3];
+        // data_[3] = data_[1];
+
+        this->init_gsl();
 
-    void eigen( vec3<T>& evals, vec3<T>& evec1, vec3<T>& evec2, vec3<T>& evec3 ){
         gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_);
         gsl_eigen_symmv_sort (eval_, evec_, GSL_EIGEN_SORT_VAL_ASC);
 
@@ -94,5 +129,31 @@ public:
             evec2[i] = gsl_matrix_get( evec_, i, 1 );
             evec3[i] = gsl_matrix_get( evec_, i, 2 );
         }
+
+        // std::cerr << "(" << evals[0] << " " << evals[1] << " " << evals[2] << ")" << std::endl;
     }
-};
\ No newline at end of file
+};
+
+template<typename T>
+constexpr const mat3<T> operator+(const mat3<T> &lhs, const mat3<T> &rhs) noexcept
+{
+    mat3<T> result;
+    for (size_t i = 0; i < 9; ++i) {
+        result[i] = lhs[i] + rhs[i];
+    }
+    return result;
+}
+
+// matrix - vector multiplication
+template<typename T>
+vec3<T> operator*( const mat3<T> &A, const vec3<T> &v ) noexcept
+{
+    vec3<T> result;
+    for( int mu=0; mu<3; ++mu ){
+        result[mu] = 0.0;
+        for( int nu=0; nu<3; ++nu ){
+            result[mu] += A(mu,nu)*v[nu];
+        }
+    }
+    return result;
+}
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 7e7979b..051a6b3 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -31,45 +31,18 @@ private:
     {
         constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
 
-        const ptrdiff_t nlattice = ngrid_;//16;
-        const real_t dx = 1.0/real_t(nlattice);
-
-        const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells
-        const real_t alpha = 1.0/std::sqrt(2)/eta;
-        const real_t alpha2 = alpha*alpha;
-        const real_t alpha3 = alpha2*alpha;
-        const real_t sqrtpi = std::sqrt(M_PI);
-        const real_t pi32   = std::pow(M_PI,1.5);
-
-        //! just a Kronecker \delta_ij
-        auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
-
-        //! short range component of Ewald sum, eq. (A2) of Marcos (2008)
-        auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
-            auto d = vR-vP;
-            auto r = d.norm();
-            if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
-            real_t val{0.0};
-            val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r);
-            val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
-                (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
-            return val;
-        };
-
-        auto greensftide_sr2 = [&]( int mu, int nu, const vec3<real_t>& d ) -> real_t {
-            auto r = d.norm();
-            if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
-            real_t val{0.0};
-            val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha2*r*r);
-            val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
-                (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r*r)*r);
-            return val;
-        };
-
         const int charge_multiplicity = 2;
 
-        const std::vector<vec3<real_t>> bcc_bravais{
-           {1.0,0.0,0.0},{0.0,1.0,0.0},{0.5,0.5,0.5} 
+        const mat3<real_t> mat_bcc_bravais{
+            1.0, 0.0, 0.5,
+            0.0, 1.0, 0.5,
+            0.0, 0.0, 0.5, 
+        };
+
+        const mat3<real_t> mat_bcc_reciprocal{
+            twopi, 0.0, 0.0,
+            0.0, twopi, 0.0,
+            -twopi, -twopi, 2.0*twopi,
         };
 
         const std::vector<vec3<real_t>> bcc_reciprocal{
@@ -82,11 +55,55 @@ private:
             {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
         };
 
-        const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_multiplicity;
-        const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5);
         
-        std::vector<vec3<real_t>> x;
-        std::vector<mat3s<real_t>> a(x.size(),{0.0});
+
+        const size_t nlattice = ngrid_;//16;
+        const real_t dx = 1.0/real_t(nlattice);
+
+        const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells
+        const real_t alpha = 1.0/std::sqrt(2)/eta;
+        const real_t alpha2 = alpha*alpha;
+        const real_t alpha3 = alpha2*alpha;
+        const real_t sqrtpi = std::sqrt(M_PI);
+        const real_t fourpi = 4.0*M_PI;
+        const real_t pi32   = std::pow(M_PI,1.5);
+        
+        const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_multiplicity;
+        const real_t fft_norm   = 1.0/std::pow(real_t(nlattice),3.0);
+        const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5);
+
+        //! just a Kronecker \delta_ij
+        auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
+
+        auto add_greensftide_sr = [&]( mat3<real_t>& D, const vec3<real_t>& d ) -> void {
+            auto r = d.norm();
+            if( r< 1e-14 ) return; // return zero for r=0
+
+            const real_t r2(r*r), r3(r2*r), r5(r3*r2);
+            const real_t K1( -alpha3/pi32 * std::exp(-alpha2*r2)/r2 );
+            const real_t K2( (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r2)*r)/fourpi );
+            
+            for( int mu=0; mu<3; ++mu ){
+                for( int nu=mu; nu<3; ++nu ){
+                    real_t dd( d[mu]*d[nu] * K1 + (kronecker(mu,nu)/r3 - 3.0 * (d[mu]*d[nu])/r5) * K2 );
+                    D(mu,nu) += dd;
+                    D(nu,mu) += (mu!=nu)? dd : 0.0;
+                }
+            }
+        };
+
+        auto add_greensftide_lr = [&]( mat3<real_t>& D, const vec3<real_t>& k, const vec3<real_t>& r ) -> void {
+            real_t kmod2 = k.norm_squared();
+            real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm;
+            for( int mu=0; mu<3; ++mu ){
+                for( int nu=mu; nu<3; ++nu ){
+                    auto dd = k[mu] * k[nu] * term;
+                    D(mu,nu) += dd;
+                    D(nu,mu) += (mu!=nu)? dd : 0.0;
+                }
+            }
+        };
+        
         constexpr ptrdiff_t lnumber = 4, knumber = 4;
         const int numb = 1;
 
@@ -94,59 +111,57 @@ private:
         ico_.assign(D_xx_.memsize(),vec3<int>());
         vecitk_.assign(D_xx_.memsize(),vec3<int>());
 
-        D_xx_.zero(); 
-        D_xy_.zero(); 
-        D_xz_.zero(); 
-        D_yy_.zero(); 
-        D_yz_.zero(); 
-        D_zz_.zero(); 
+        #pragma omp parallel 
+        {
+            //... temporary to hold values of the dynamical matrix 
+            mat3<real_t> matD(0.0);
 
-        #pragma omp parallel for
-        for( size_t i=0; i<nlattice; ++i ){
-            for( size_t j=0; j<nlattice; ++j ){
-                for( size_t k=0; k<nlattice; ++k ){
-                    real_t dxp = dx*(real_t(i)*bcc_bravais[0][0]+real_t(j)*bcc_bravais[1][0]+real_t(k)*bcc_bravais[2][0]);
-                    real_t dyp = dx*(real_t(i)*bcc_bravais[0][1]+real_t(j)*bcc_bravais[1][1]+real_t(k)*bcc_bravais[2][1]);
-                    real_t dzp = dx*(real_t(i)*bcc_bravais[0][2]+real_t(j)*bcc_bravais[1][2]+real_t(k)*bcc_bravais[2][2]);
-                    const vec3<real_t> cdr( {std::fmod( 2.0+dxp, 1.0 ),std::fmod( 2.0+dyp, 1.0 ),std::fmod( 2.0+dzp, 1.0 )} );
-                    vec3<real_t> ak;
+            #pragma omp for
+            for( size_t i=0; i<nlattice; ++i ){
+                for( size_t j=0; j<nlattice; ++j ){
+                    for( size_t k=0; k<nlattice; ++k ){
+                        // compute lattice site vector from (i,j,k) multiplying Bravais base matrix, and wrap back to box
+                        const vec3<real_t> x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)});
+                        const vec3<real_t> ar = (mat_bcc_bravais * x_ijk).wrap_abs();
 
-                    for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
-                        for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
-                            for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){                      
-                                vec3<real_t> ai = {real_t(ix),real_t(iy),real_t(iz)};
-                                vec3<real_t> dr( cdr );
-                                dr[0] -= (ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]);
-                                dr[1] -= (ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]);
-                                dr[2] -= (ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]);
+                        //... zero temporary matrix
+                        matD.zero();        
 
-                                D_xx_.relem(i,j,k) += greensftide_sr2(0,0,dr) * charge;
-                                D_xy_.relem(i,j,k) += greensftide_sr2(0,1,dr) * charge;
-                                D_xz_.relem(i,j,k) += greensftide_sr2(0,2,dr) * charge;
-                                D_yy_.relem(i,j,k) += greensftide_sr2(1,1,dr) * charge;
-                                D_yz_.relem(i,j,k) += greensftide_sr2(1,2,dr) * charge;
-                                D_zz_.relem(i,j,k) += greensftide_sr2(2,2,dr) * charge;
-                             
-                                vec3<real_t> bk = {real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice};
-                                if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
-                                    ak.x = bk.x*bcc_reciprocal[0][0]+bk.y*bcc_reciprocal[1][0]+bk.z*bcc_reciprocal[2][0];
-                                    ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1];
-                                    ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2];
-                                    real_t amodk2 = ak.norm_squared();
-                                    real_t term = charge*std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(cdr)) / amodk2 / std::pow(nlattice,3);
-                                    D_xx_.relem(i,j,k) += ak.x*ak.x*term;
-                                    D_xy_.relem(i,j,k) += ak.x*ak.y*term;
-                                    D_xz_.relem(i,j,k) += ak.x*ak.z*term;
-                                    D_yy_.relem(i,j,k) += ak.y*ak.y*term;
-                                    D_yz_.relem(i,j,k) += ak.y*ak.z*term;
-                                    D_zz_.relem(i,j,k) += ak.z*ak.z*term;
+                        // add real-space part of dynamical matrix, periodic copies
+                        for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
+                            for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
+                                for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){      
+                                    const vec3<real_t> n_ijk({real_t(ix),real_t(iy),real_t(iz)});            
+                                    const vec3<real_t> dr(ar - mat_bcc_bravais * n_ijk);
+                                    add_greensftide_sr(matD, dr);
                                 }
                             }
                         }
-                    }   
+
+                        // add k-space part of dynamical matrix
+                        for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){
+                            for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){
+                                for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){                      
+                                    if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
+                                        const vec3<real_t> k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice});
+                                        const vec3<real_t> ak( mat_bcc_reciprocal * k_ijk);
+
+                                        add_greensftide_lr(matD, ak, ar );
+                                    }
+                                }
+                            }
+                        } 
+
+                        D_xx_.relem(i,j,k) = matD(0,0) * charge;
+                        D_xy_.relem(i,j,k) = matD(0,1) * charge;
+                        D_xz_.relem(i,j,k) = matD(0,2) * charge;
+                        D_yy_.relem(i,j,k) = matD(1,1) * charge;
+                        D_yz_.relem(i,j,k) = matD(1,2) * charge;
+                        D_zz_.relem(i,j,k) = matD(2,2) * charge;
+                    }
                 }
             }
-        }
+        } // end omp parallel region
 
         // fix r=0 with background density (added later in Fourier space)
         D_xx_.relem(0,0,0) = 1.0/3.0;
@@ -177,7 +192,7 @@ private:
         #pragma omp parallel
         {
             // thread private matrix representation
-            mat3s<real_t> D;
+            mat3<real_t> D;
             vec3<real_t> eval, evec1, evec2, evec3;
 
             #pragma omp for
@@ -191,13 +206,13 @@ private:
                         const real_t kmod  = kv.norm()/mapratio_/boxlen_;
 
                         // put matrix elements into actual matrix
-                        D = {   std::real(D_xx_.kelem(i,j,k))/fft_norm12, 
-                                std::real(D_xy_.kelem(i,j,k))/fft_norm12,
-                                std::real(D_xz_.kelem(i,j,k))/fft_norm12,
-                                std::real(D_yy_.kelem(i,j,k))/fft_norm12, 
-                                std::real(D_yz_.kelem(i,j,k))/fft_norm12, 
-                                std::real(D_zz_.kelem(i,j,k))/fft_norm12 };
-                        
+                        D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12;
+                        D(0,1) = D(1,0) = std::real(D_xy_.kelem(i,j,k)) / fft_norm12;
+                        D(0,2) = D(2,0) = std::real(D_xz_.kelem(i,j,k)) / fft_norm12;
+                        D(1,1) = std::real(D_yy_.kelem(i,j,k)) / fft_norm12;
+                        D(1,2) = D(2,1) = std::real(D_yz_.kelem(i,j,k)) / fft_norm12;
+                        D(2,2) = std::real(D_zz_.kelem(i,j,k)) / fft_norm12;
+
                         // compute eigenstructure of matrix
                         D.eigen(eval, evec1, evec2, evec3);
 
@@ -395,7 +410,7 @@ private:
         #pragma omp parallel
         {
             // thread private matrix representation
-            mat3s<real_t> D;
+            mat3<real_t> D;
             vec3<real_t> eval, evec1, evec2, evec3;
         
             #pragma omp for
@@ -523,7 +538,7 @@ private:
 
 public:
     // real_t boxlen, size_t ngridother
-    explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=16 )
+    explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=32 )
     : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
       ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ), 
       ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
diff --git a/include/vec3.hh b/include/vec3.hh
index b6550ae..af40bf3 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -25,6 +25,10 @@ public:
     //! copy constructor
     vec3( const vec3<T> &v)
     : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
+
+    //! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference
+    vec3( vec3<T>& v)
+    : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
     
     //! move constructor
     vec3( vec3<T> &&v)
@@ -33,52 +37,67 @@ public:
     //! construct vec3 from initializer list
     template<typename ...E>
     vec3(E&&...e) 
-    : data_{{std::forward<E>(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){}
+    : data_{{std::forward<E>(e)...}}, x{data_[0]}, y{data_[1]}, z{data_[2]}
+    {}
+    // vec3( T a, T b, T c ) 
+    // : data_{{a,b,c}}, x(data_[0]), y(data_[1]), z(data_[2]){}
     
     //! bracket index access to vector components
-    T &operator[](size_t i){ return data_[i];}
+    T &operator[](size_t i) noexcept{ return data_[i];}
     
     //! const bracket index access to vector components
-    const T &operator[](size_t i) const { return data_[i]; }
+    const T &operator[](size_t i) const noexcept { return data_[i]; }
 
     // assignment operator
-    vec3<T>& operator=( const vec3<T>& v ) { data_=v.data_; return *this; }
+    vec3<T>& operator=( const vec3<T>& v ) noexcept { data_=v.data_; return *this; }
 
     // assignment operator
-    const vec3<T>& operator=( const vec3<T>& v ) const { data_=v.data_; return *this; }
+    const vec3<T>& operator=( const vec3<T>& v ) const noexcept{ data_=v.data_; return *this; }
 
     //! implementation of summation of vec3
-    vec3<T> operator+( const vec3<T>& v ) const{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
+    vec3<T> operator+( const vec3<T>& v ) const noexcept{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
 
     //! implementation of difference of vec3
-    vec3<T> operator-( const vec3<T>& v ) const{ return vec3<T>({x-v.x,y-v.y,z-v.z}); }
+    vec3<T> operator-( const vec3<T>& v ) const noexcept{ return vec3<T>({x-v.x,y-v.y,z-v.z}); }
 
     //! implementation of scalar multiplication
-    vec3<T> operator*( T s ) const{ return vec3<T>({x*s,y*s,z*s}); }
+    vec3<T> operator*( T s ) const noexcept{ return vec3<T>({x*s,y*s,z*s}); }
 
     //! implementation of scalar division
-    vec3<T> operator/( T s ) const{ return vec3<T>({x/s,y/s,z/s}); }
+    vec3<T> operator/( T s ) const noexcept{ return vec3<T>({x/s,y/s,z/s}); }
 
     //! implementation of += operator
-    vec3<T>& operator+=( const vec3<T>& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; }
+    vec3<T>& operator+=( const vec3<T>& v ) const noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; }
 
     //! implementation of -= operator
-    vec3<T>& operator-=( const vec3<T>& v ) const{ x-=v.x; y-=v.y; z-=v.z; return *this; }
+    vec3<T>& operator-=( const vec3<T>& v ) const noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; }
 
     //! multiply with scalar
-    vec3<T>& operator*=( T s ) const{ x*=s; y*=s; z*=s; return *this; }
+    vec3<T>& operator*=( T s ) const noexcept{ x*=s; y*=s; z*=s; return *this; }
     
     //! compute dot product with another vector
-    T dot(const vec3<T> &a) const 
+    T dot(const vec3<T> &a) const noexcept
     {
         return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
     }
     
     //! returns 2-norm squared of vector
-    T norm_squared(void) const { return this->dot(*this); }
+    T norm_squared(void) const noexcept { return this->dot(*this); }
 
     //! returns 2-norm of vector
-    T norm(void) const { return std::sqrt( this->norm_squared() ); }
+    T norm(void) const noexcept { return std::sqrt( this->norm_squared() ); }
+
+    //! wrap absolute vector to box of size p
+    vec3<T>& wrap_abs( T p = 1.0 ) noexcept{
+        for( auto& x : data_ ) x = std::fmod( 2*p + x, p );
+        return *this;
+    }
+
+    //! wrap relative vector to box of size p
+    vec3<T>& wrap_rel( T p = 1.0 ) noexcept{
+        for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x;
+        return *this;
+    }
 };
 
 //! multiplication with scalar

From a71795cbb3859f30269180a49ddeceec5e6c855f Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 1 Dec 2019 15:04:25 +0100
Subject: [PATCH 038/130] added other lattice types to dynamical matrix
 calculation

---
 include/particle_plt.hh | 116 ++++++++++++++++++++++++++--------------
 1 file changed, 77 insertions(+), 39 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 051a6b3..934f544 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -29,34 +29,82 @@ private:
 
     void init_D()
     {
-        constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
+        constexpr real_t pi = M_PI;
+        constexpr real_t twopi = 2.0*M_PI;
+        constexpr real_t fourpi = 4.0*M_PI;
+        constexpr real_t sqrtpi = std::sqrt(M_PI);
+        constexpr real_t pi32   = std::pow(M_PI,1.5);
 
         const int charge_multiplicity = 2;
 
-        const mat3<real_t> mat_bcc_bravais{
+        //! === vectors, reciprocals and normals for the SC lattice ===
+        const int charge_fac_sc = 1;
+        const mat3<real_t> mat_bravais_sc{
+            1.0, 0.0, 0.0,
+            0.0, 1.0, 0.0,
+            0.0, 0.0, 1.0, 
+        };
+        const mat3<real_t> mat_reciprocal_sc{
+            twopi, 0.0, 0.0,
+            0.0, twopi, 0.0,
+            0.0, 0.0, twopi,
+        };
+        const std::vector<vec3<real_t>> normals_sc{
+            {pi,0.,0.},{-pi,0.,0.},
+            {0.,pi,0.},{0.,-pi,0.},
+            {0.,0.,pi},{0.,0.,-pi},
+        };
+        
+
+        //! === vectors, reciprocals and normals for the BCC lattice ===
+        const int charge_fac_bcc = 2;
+        const mat3<real_t> mat_bravais_bcc{
             1.0, 0.0, 0.5,
             0.0, 1.0, 0.5,
             0.0, 0.0, 0.5, 
         };
-
-        const mat3<real_t> mat_bcc_reciprocal{
+        const mat3<real_t> mat_reciprocal_bcc{
             twopi, 0.0, 0.0,
             0.0, twopi, 0.0,
-            -twopi, -twopi, 2.0*twopi,
+            -twopi, -twopi, fourpi,
         };
-
-        const std::vector<vec3<real_t>> bcc_reciprocal{
-            {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
-        };
-        
-        const std::vector<vec3<real_t>> bcc_normals{
+        const std::vector<vec3<real_t>> normals_bcc{
             {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
             {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
             {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
         };
-
         
 
+        //! === vectors, reciprocals and normals for the FCC lattice ===
+        const int charge_fac_fcc = 4;
+        const mat3<real_t> mat_bravais_fcc{
+            0.0, 0.5, 0.0,
+            0.5, 0.0, 1.0,
+            0.5, 0.5, 0.0, 
+        };
+        const mat3<real_t> mat_reciprocal_fcc{
+            -fourpi, fourpi, twopi,
+            0.0, 0.0, twopi,
+            fourpi, 0.0, -twopi,
+        };
+        const std::vector<vec3<real_t>> normals_fcc{
+            {twopi,0.,0.},{-twopi,0.,0.},
+            {0.,twopi,0.},{0.,-twopi,0.},
+            {0.,0.,twopi},{0.,0.,-twopi},
+            {+pi,+pi,+pi},{+pi,+pi,-pi},
+            {+pi,-pi,+pi},{+pi,-pi,-pi},
+            {-pi,+pi,+pi},{-pi,+pi,-pi},
+            {-pi,-pi,+pi},{-pi,-pi,-pi},
+        };
+        
+        //! select the properties for the chosen lattice
+        const int ilat = 2; // 0 = sc, 1 = bcc, 2 = fcc
+
+        const auto mat_bravais    = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
+        const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
+        const auto normals        = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
+        const auto charge_fac     = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
+
         const size_t nlattice = ngrid_;//16;
         const real_t dx = 1.0/real_t(nlattice);
 
@@ -64,11 +112,8 @@ private:
         const real_t alpha = 1.0/std::sqrt(2)/eta;
         const real_t alpha2 = alpha*alpha;
         const real_t alpha3 = alpha2*alpha;
-        const real_t sqrtpi = std::sqrt(M_PI);
-        const real_t fourpi = 4.0*M_PI;
-        const real_t pi32   = std::pow(M_PI,1.5);
         
-        const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_multiplicity;
+        const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_fac;
         const real_t fft_norm   = 1.0/std::pow(real_t(nlattice),3.0);
         const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5);
 
@@ -122,7 +167,7 @@ private:
                     for( size_t k=0; k<nlattice; ++k ){
                         // compute lattice site vector from (i,j,k) multiplying Bravais base matrix, and wrap back to box
                         const vec3<real_t> x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)});
-                        const vec3<real_t> ar = (mat_bcc_bravais * x_ijk).wrap_abs();
+                        const vec3<real_t> ar = (mat_bravais * x_ijk).wrap_abs();
 
                         //... zero temporary matrix
                         matD.zero();        
@@ -132,7 +177,7 @@ private:
                             for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
                                 for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){      
                                     const vec3<real_t> n_ijk({real_t(ix),real_t(iy),real_t(iz)});            
-                                    const vec3<real_t> dr(ar - mat_bcc_bravais * n_ijk);
+                                    const vec3<real_t> dr(ar - mat_bravais * n_ijk);
                                     add_greensftide_sr(matD, dr);
                                 }
                             }
@@ -144,7 +189,7 @@ private:
                                 for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){                      
                                     if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
                                         const vec3<real_t> k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice});
-                                        const vec3<real_t> ak( mat_bcc_reciprocal * k_ijk);
+                                        const vec3<real_t> ak( mat_reciprocal * k_ijk);
 
                                         add_greensftide_lr(matD, ak, ar );
                                     }
@@ -230,15 +275,7 @@ private:
                         auto idx = D_xx_.get_idx(i,j,k);
 
                         vec3<real_t> ar = D_xx_.get_k<real_t>(i,j,k) / (twopi*ngrid_);
-                        // vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-                        
-                        for( int l=0; l<3; l++ ){
-                            a[l] = 0.0;
-                            for( int m=0; m<3; m++){
-                                // project k on reciprocal basis
-                                a[l] += ar[m]*bcc_reciprocal[m][l];
-                            }
-                        }
+                        a = mat_reciprocal * ar;
 
                         // translate the k-vectors into the "candidate" FBZ
                         vec3<real_t> anum;
@@ -249,22 +286,23 @@ private:
                                 for( int l3=-numb; l3<=numb; ++l3 ){
                                     anum[2] = real_t(l3);
 
-                                    vectk_[idx] = a;
+                                    // vectk_[idx] = a;
+                                    vectk_[idx] = a + mat_reciprocal * anum;
 
-                                    for( int l=0; l<3; l++ ){
-                                        for( int m=0; m<3; m++){
-                                            // project k on reciprocal basis
-                                            vectk_[idx][l] += anum[m]*bcc_reciprocal[m][l];
-                                        }
-                                    }
+                                    // for( int l=0; l<3; l++ ){
+                                    //     for( int m=0; m<3; m++){
+                                    //         // project k on reciprocal basis
+                                    //         vectk_[idx][l] += anum[m]*bcc_reciprocal[m][l];
+                                    //     }
+                                    // }
                                     // check if in first Brillouin zone
                                     bool btest=true;
-                                    for( size_t l=0; l<bcc_normals.size(); ++l ){
+                                    for( size_t l=0; l<normals.size(); ++l ){
                                         real_t amod2 = 0.0;
                                         real_t scalar = 0.0;
                                         for( int m=0; m<3; m++ ){
-                                            amod2  += bcc_normals[l][m]*bcc_normals[l][m];
-                                            scalar += bcc_normals[l][m]*vectk_[idx][m];
+                                            amod2  += normals[l][m]*normals[l][m];
+                                            scalar += normals[l][m]*vectk_[idx][m];
                                         }
                                         //real_t amod = std::sqrt(amod2);
                                         //if( scalar/amod > amod*1.0001 ){ btest=false; break; }
@@ -308,7 +346,7 @@ private:
     {
         constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
 
-        const std::vector<vec3<real_t>> bcc_normals{
+        const std::vector<vec3<real_t>> normals_bcc{
             {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
             {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
             {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}

From 1d10f5194157c7eb7e5cd9fb3baa2d204bbbf674 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 1 Dec 2019 15:40:24 +0100
Subject: [PATCH 039/130] more minor cleanup

---
 include/particle_plt.hh | 81 ++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 934f544..0bfe52f 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -29,13 +29,11 @@ private:
 
     void init_D()
     {
-        constexpr real_t pi = M_PI;
-        constexpr real_t twopi = 2.0*M_PI;
+        constexpr real_t pi     = M_PI;
+        constexpr real_t twopi  = 2.0*M_PI;
         constexpr real_t fourpi = 4.0*M_PI;
-        constexpr real_t sqrtpi = std::sqrt(M_PI);
-        constexpr real_t pi32   = std::pow(M_PI,1.5);
-
-        const int charge_multiplicity = 2;
+        const     real_t sqrtpi = std::sqrt(M_PI);
+        const     real_t pi32   = std::pow(M_PI,1.5);
 
         //! === vectors, reciprocals and normals for the SC lattice ===
         const int charge_fac_sc = 1;
@@ -98,14 +96,14 @@ private:
         };
         
         //! select the properties for the chosen lattice
-        const int ilat = 2; // 0 = sc, 1 = bcc, 2 = fcc
+        const int ilat = 1; // 0 = sc, 1 = bcc, 2 = fcc
 
         const auto mat_bravais    = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
         const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
         const auto normals        = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
         const auto charge_fac     = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
 
-        const size_t nlattice = ngrid_;//16;
+        const size_t nlattice = ngrid_;
         const real_t dx = 1.0/real_t(nlattice);
 
         const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells
@@ -120,6 +118,7 @@ private:
         //! just a Kronecker \delta_ij
         auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
 
+        //! Ewald summation: short-range Green's function
         auto add_greensftide_sr = [&]( mat3<real_t>& D, const vec3<real_t>& d ) -> void {
             auto r = d.norm();
             if( r< 1e-14 ) return; // return zero for r=0
@@ -137,6 +136,7 @@ private:
             }
         };
 
+        //! Ewald summation: long-range Green's function
         auto add_greensftide_lr = [&]( mat3<real_t>& D, const vec3<real_t>& k, const vec3<real_t>& r ) -> void {
             real_t kmod2 = k.norm_squared();
             real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm;
@@ -148,8 +148,20 @@ private:
                 }
             }
         };
+
+        //! checks if 'vec' is in the FBZ with FBZ normal vectors given in 'normals'
+        auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool {
+            bool btest = true;
+            for( const auto& n : normals ){ 
+                if( n.dot( vec ) > 1.01 * n.dot(n) ){
+                    btest = false;
+                    break;
+                }
+            }
+            return btest;
+        };
         
-        constexpr ptrdiff_t lnumber = 4, knumber = 4;
+        constexpr ptrdiff_t lnumber = 3, knumber = 3;
         const int numb = 1;
 
         vectk_.assign(D_xx_.memsize(),vec3<real_t>());
@@ -247,8 +259,9 @@ private:
                 {
                     for( size_t k=0; k<D_xx_.size(2); k++ )
                     {
+                        auto idx = D_xx_.get_idx(i,j,k);
                         vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-                        const real_t kmod  = kv.norm()/mapratio_/boxlen_;
+                        // const real_t kmod  = kv.norm()/mapratio_/boxlen_;
 
                         // put matrix elements into actual matrix
                         D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12;
@@ -269,46 +282,18 @@ private:
                         D_xz_.kelem(i,j,k) = evec3[1];
                         D_yz_.kelem(i,j,k) = evec3[2];
 
-
-                        vec3<real_t> a({0.,0.,0.});
-
-                        auto idx = D_xx_.get_idx(i,j,k);
-
-                        vec3<real_t> ar = D_xx_.get_k<real_t>(i,j,k) / (twopi*ngrid_);
-                        a = mat_reciprocal * ar;
+                        
+                        vec3<real_t> ar = kv / (twopi*ngrid_);
+                        vec3<real_t> a(mat_reciprocal * ar);
 
                         // translate the k-vectors into the "candidate" FBZ
-                        vec3<real_t> anum;
                         for( int l1=-numb; l1<=numb; ++l1 ){
-                            anum[0] = real_t(l1);
                             for( int l2=-numb; l2<=numb; ++l2 ){
-                                anum[1] = real_t(l2);
                                 for( int l3=-numb; l3<=numb; ++l3 ){
-                                    anum[2] = real_t(l3);
 
-                                    // vectk_[idx] = a;
-                                    vectk_[idx] = a + mat_reciprocal * anum;
+                                    vectk_[idx] = a + mat_reciprocal * vec3<real_t>({real_t(l1),real_t(l2),real_t(l3)});
 
-                                    // for( int l=0; l<3; l++ ){
-                                    //     for( int m=0; m<3; m++){
-                                    //         // project k on reciprocal basis
-                                    //         vectk_[idx][l] += anum[m]*bcc_reciprocal[m][l];
-                                    //     }
-                                    // }
-                                    // check if in first Brillouin zone
-                                    bool btest=true;
-                                    for( size_t l=0; l<normals.size(); ++l ){
-                                        real_t amod2 = 0.0;
-                                        real_t scalar = 0.0;
-                                        for( int m=0; m<3; m++ ){
-                                            amod2  += normals[l][m]*normals[l][m];
-                                            scalar += normals[l][m]*vectk_[idx][m];
-                                        }
-                                        //real_t amod = std::sqrt(amod2);
-                                        //if( scalar/amod > amod*1.0001 ){ btest=false; break; }
-                                        if( scalar > 1.01 * amod2 ){ btest=false; break; }
-                                    }
-                                    if( btest ){
+                                    if( check_FBZ( normals, vectk_[idx]) ){
                                         
                                         vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi);
                                         vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi);
@@ -318,10 +303,10 @@ private:
                                         ico_[idx][1] = std::round((ar[1]+l2) * ngrid_);
                                         ico_[idx][2] = std::round((ar[2]+l3) * ngrid_);
 
-                                        ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
-                                        // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk_[idx].x << ", " << vecitk_[idx].y << ", " << vecitk_[idx].z << ", " << ico_[idx][0] << ", " << ico_[idx][1] << ", " << ico_[idx][2] << std::endl;
-                                        // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk_[idx].x << ", " << -vecitk_[idx].y << ", " << -vecitk_[idx].z << ", " << ico_[idx][0] << ", " << ico_[idx][1] << ", " << ico_[idx][2] << std::endl;
-                                        
+                                        #pragma omp critical
+                                        {
+                                            ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
+                                        }
                                         goto endloop;
                                     }
                                 }
@@ -332,8 +317,6 @@ private:
             }
         }
 
-        
-
         D_xx_.Write_to_HDF5("debug.hdf5","mu1");
         D_xy_.Write_to_HDF5("debug.hdf5","mu2");
         D_xz_.Write_to_HDF5("debug.hdf5","mu3");

From 0de486f5525d3111ec6b1327fe6e0396f93e798d Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 1 Dec 2019 18:52:53 +0100
Subject: [PATCH 040/130] added gridding back of dynamical matrix to ordinary
 Fourier space

---
 include/particle_plt.hh | 202 +++++++++++++++++++++++++++++++---------
 include/vec3.hh         |   5 +-
 2 files changed, 162 insertions(+), 45 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 0bfe52f..92dea42 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -23,6 +23,7 @@ private:
     const size_t ngmapto_, ngrid_, ngrid32_;
     const real_t mapratio_;
     Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
+    Grid_FFT<real_t,false> mu1;
     Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
     std::vector<vec3<real_t>> vectk_;
     std::vector<vec3<int>> ico_, vecitk_;
@@ -103,7 +104,7 @@ private:
         const auto normals        = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
         const auto charge_fac     = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
 
-        const size_t nlattice = ngrid_;
+        const ptrdiff_t nlattice = ngrid_;
         const real_t dx = 1.0/real_t(nlattice);
 
         const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells
@@ -153,7 +154,7 @@ private:
         auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool {
             bool btest = true;
             for( const auto& n : normals ){ 
-                if( n.dot( vec ) > 1.01 * n.dot(n) ){
+                if( n.dot( vec ) > 1.0001 * n.dot(n) ){
                     btest = false;
                     break;
                 }
@@ -162,7 +163,7 @@ private:
         };
         
         constexpr ptrdiff_t lnumber = 3, knumber = 3;
-        const int numb = 1;
+        const int numb = 1, numb2 = 2;
 
         vectk_.assign(D_xx_.memsize(),vec3<real_t>());
         ico_.assign(D_xx_.memsize(),vec3<int>());
@@ -234,6 +235,7 @@ private:
         D_yy_.FourierTransformForward();
         D_yz_.FourierTransformForward();
         D_zz_.FourierTransformForward();
+        mu1.FourierTransformForward(false);
 
         if (CONFIG::MPI_task_rank == 0)
             unlink("debug.hdf5");
@@ -274,50 +276,160 @@ private:
                         // compute eigenstructure of matrix
                         D.eigen(eval, evec1, evec2, evec3);
 
-                        D_xx_.kelem(i,j,k) = eval[2];
-                        D_yy_.kelem(i,j,k) = eval[1];
-                        D_zz_.kelem(i,j,k) = eval[0];
-
-                        D_xy_.kelem(i,j,k) = evec3[0];
-                        D_xz_.kelem(i,j,k) = evec3[1];
-                        D_yz_.kelem(i,j,k) = evec3[2];
-
+                        // now determine to which modes on the regular lattice this contributes
+                        vec3<real_t> ar1 = kv / (twopi*ngrid_);
+                        vec3<real_t> ar2 = -kv / (twopi*ngrid_);
                         
-                        vec3<real_t> ar = kv / (twopi*ngrid_);
-                        vec3<real_t> a(mat_reciprocal * ar);
+                        vec3<real_t> a1(mat_reciprocal * ar1);
+                        vec3<real_t> a2(mat_reciprocal * ar2);
 
                         // translate the k-vectors into the "candidate" FBZ
                         for( int l1=-numb; l1<=numb; ++l1 ){
                             for( int l2=-numb; l2<=numb; ++l2 ){
                                 for( int l3=-numb; l3<=numb; ++l3 ){
 
-                                    vectk_[idx] = a + mat_reciprocal * vec3<real_t>({real_t(l1),real_t(l2),real_t(l3)});
+                                    vectk_[idx] = a1 + mat_reciprocal * vec3<real_t>({real_t(l1),real_t(l2),real_t(l3)});
 
                                     if( check_FBZ( normals, vectk_[idx]) ){
                                         
-                                        vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi);
-                                        vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi);
-                                        vecitk_[idx][2] = std::round(vectk_[idx][2]*(ngrid_)/twopi);
+                                        int ix = std::round(vectk_[idx][0]*(ngrid_)/twopi);
+                                        int iy = std::round(vectk_[idx][1]*(ngrid_)/twopi);
+                                        int iz = std::round(vectk_[idx][2]*(ngrid_)/twopi);
 
-                                        ico_[idx][0] = std::round((ar[0]+l1) * ngrid_);
-                                        ico_[idx][1] = std::round((ar[1]+l2) * ngrid_);
-                                        ico_[idx][2] = std::round((ar[2]+l3) * ngrid_);
+                                        // for( int k1=-numb2; k1<=numb2; ++k1 ){
+                                        //     for( int k2=-numb2; k2<=numb2; ++k2 ){
+                                        //         for( int k3=-numb2; k3<=numb2; ++k3 ){
+                                        {{{ int k1=0,k2=0,k3=0;
 
-                                        #pragma omp critical
-                                        {
-                                            ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
+                                                    auto d = mat_reciprocal * vec3<real_t>({real_t(k1),real_t(k2),real_t(k3)}) / twopi * ngrid_;
+                                                    int iix = ix;// + std::round(d.x);
+                                                    int iiy = iy;// + std::round(d.y);
+                                                    int iiz = iz;// + std::round(d.z);
+
+                                                    if( iix >= -nlattice/2 && iiy >= -nlattice/2 && iiz >= 0 &&
+                                                    // if( iix >= 0 && iiy >= 0 && iiz >= 0 &&
+                                                        iix < nlattice/2 && iiy < nlattice/2 && iiz <= nlattice/2){
+                                                        iix = (iix<0)? iix+nlattice : iix;
+                                                        iiy = (iiy<0)? iiy+nlattice : iiy;
+                                                        iiz = (iiz<0)? iiz+nlattice : iiz;
+                                                        mu1.kelem(iix,iiy,iiz) = eval[2];
+                                                    }
+                                                }
+                                            }
                                         }
-                                        goto endloop;
+                                    }
+
+                                    vectk_[idx] = a2 + mat_reciprocal * vec3<real_t>({real_t(l1),real_t(l2),real_t(l3)});
+
+                                    if( check_FBZ( normals, vectk_[idx]) ){
+                                        
+                                        int ix = std::round(vectk_[idx][0]*(ngrid_)/twopi);
+                                        int iy = std::round(vectk_[idx][1]*(ngrid_)/twopi);
+                                        int iz = std::round(vectk_[idx][2]*(ngrid_)/twopi);
+
+                                        // for( int k1=-numb; k1<=numb2; ++k1 ){
+                                        //     for( int k2=-numb; k2<=numb2; ++k2 ){
+                                        //         for( int k3=-numb; k3<=numb2; ++k3 ){
+                                        {{{ int k1=0,k2=0,k3=0;
+
+                                                    auto d = mat_reciprocal * vec3<real_t>({real_t(k1),real_t(k2),real_t(k3)}) / twopi * ngrid_;
+                                                    int iix = ix;// + std::round(d.x);
+                                                    int iiy = iy;// + std::round(d.y);
+                                                    int iiz = iz;// + std::round(d.z);
+
+                                        
+                                                    if( iix >= -nlattice/2 && iiy >= -nlattice/2 && iiz >= 0 &&
+                                                    // if( iix >= 0 && iiy >= 0 && iiz >= 0 &&
+                                                        iix < nlattice/2 && iiy < nlattice/2 && iiz <= nlattice/2){
+                                                        iix = (iix<0)? iix+nlattice : iix;
+                                                        iiy = (iiy<0)? iiy+nlattice : iiy;
+                                                        iiz = (iiz<0)? iiz+nlattice : iiz;
+                                                        mu1.kelem(iix,iiy,iiz) = eval[2];
+                                                    }
+                                                }
+                                            }
+                                        }
+
                                     }
                                 }
                             }
-                        }                    endloop: ;
+                        }
+
+                        endloop: ;
+
                     }
                 }
             }
+
+            // #pragma omp for
+            // for( size_t i=0; i<D_xx_.size(0); i++ )
+            // {
+            //     for( size_t j=0; j<D_xx_.size(1); j++ )
+            //     {
+            //         for( size_t k=0; k<D_xx_.size(2); k++ )
+            //         {
+            //             auto idx = D_xx_.get_idx(i,j,k);
+            //             vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+            //             // const real_t kmod  = kv.norm()/mapratio_/boxlen_;
+
+            //             // put matrix elements into actual matrix
+            //             D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12;
+            //             D(0,1) = D(1,0) = std::real(D_xy_.kelem(i,j,k)) / fft_norm12;
+            //             D(0,2) = D(2,0) = std::real(D_xz_.kelem(i,j,k)) / fft_norm12;
+            //             D(1,1) = std::real(D_yy_.kelem(i,j,k)) / fft_norm12;
+            //             D(1,2) = D(2,1) = std::real(D_yz_.kelem(i,j,k)) / fft_norm12;
+            //             D(2,2) = std::real(D_zz_.kelem(i,j,k)) / fft_norm12;
+
+            //             // compute eigenstructure of matrix
+            //             D.eigen(eval, evec1, evec2, evec3);
+
+            //             D_xx_.kelem(i,j,k) = eval[2];
+            //             D_yy_.kelem(i,j,k) = eval[1];
+            //             D_zz_.kelem(i,j,k) = eval[0];
+
+            //             D_xy_.kelem(i,j,k) = evec3[0];
+            //             D_xz_.kelem(i,j,k) = evec3[1];
+            //             D_yz_.kelem(i,j,k) = evec3[2];
+
+                        
+            //             vec3<real_t> ar = kv / (twopi*ngrid_);
+            //             vec3<real_t> a(mat_reciprocal * ar);
+
+            //             // translate the k-vectors into the "candidate" FBZ
+            //             for( int l1=-numb; l1<=numb; ++l1 ){
+            //                 for( int l2=-numb; l2<=numb; ++l2 ){
+            //                     for( int l3=-numb; l3<=numb; ++l3 ){
+
+            //                         vectk_[idx] = a + mat_reciprocal * vec3<real_t>({real_t(l1),real_t(l2),real_t(l3)});
+
+            //                         if( check_FBZ( normals, vectk_[idx]) ){
+                                        
+            //                             vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi);
+            //                             vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi);
+            //                             vecitk_[idx][2] = std::round(vectk_[idx][2]*(ngrid_)/twopi);
+
+            //                             ico_[idx][0] = std::round((ar[0]+l1) * ngrid_);
+            //                             ico_[idx][1] = std::round((ar[1]+l2) * ngrid_);
+            //                             ico_[idx][2] = std::round((ar[2]+l3) * ngrid_);
+
+            //                             #pragma omp critical
+            //                             {
+            //                                 //ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
+            //                                 ofs2 << vecitk_[idx][0] << " " << vecitk_[idx][1] << " " << vecitk_[idx][2] << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
+                                            
+            //                             }
+            //                             //goto endloop;
+            //                         }
+            //                     }
+            //                 }
+            //             }                    endloop: ;
+            //         }
+            //     }
+            // }
         }
 
-        D_xx_.Write_to_HDF5("debug.hdf5","mu1");
+        mu1.kelem(0,0,0) = 1.0;
+        mu1.Write_to_HDF5("debug.hdf5","mu1");
         D_xy_.Write_to_HDF5("debug.hdf5","mu2");
         D_xz_.Write_to_HDF5("debug.hdf5","mu3");
         D_yy_.Write_to_HDF5("debug.hdf5","e1x");
@@ -455,23 +567,23 @@ private:
                         phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0
 
 
-                        const int nn = 3;
-                        size_t nsum = 0;
-                        ccomplex_t ff = 0.0;
-                        for( int is=-nn;is<=nn;is++){
-                            for( int js=-nn;js<=nn;js++){
-                                for( int ks=-nn;ks<=nn;ks++){
-                                    if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){
-                                        ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2]))));
-                                        ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2]))));
-                                        ++nsum;
-                                    }
-                                }
-                            }    
-                        }
-                        ff /= nsum;
+                        // const int nn = 3;
+                        // size_t nsum = 0;
+                        // ccomplex_t ff = 0.0;
+                        // for( int is=-nn;is<=nn;is++){
+                        //     for( int js=-nn;js<=nn;js++){
+                        //         for( int ks=-nn;ks<=nn;ks++){
+                        //             if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){
+                        //                 ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2]))));
+                        //                 ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2]))));
+                        //                 ++nsum;
+                        //             }
+                        //         }
+                        //     }    
+                        // }
+                        // ff /= nsum;
                         // ccomplex_t ff = 1.0; 
-                        // ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2]))));
+                        ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2]))));
                         // assemble short-range + long_range of Ewald sum and add DC component to trace
                         D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0;
                         D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac);
@@ -559,7 +671,7 @@ private:
 
 public:
     // real_t boxlen, size_t ngridother
-    explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=32 )
+    explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 )
     : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
       ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ), 
       ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
@@ -567,7 +679,8 @@ public:
       D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
-      grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
+      grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+      mu1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
     { 
         csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
         std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
@@ -596,6 +709,7 @@ public:
         csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
         
         init_D();
+        // init_D__old();
 
         csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
     }
diff --git a/include/vec3.hh b/include/vec3.hh
index af40bf3..af2bb9a 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -29,7 +29,7 @@ public:
     //! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference
     vec3( vec3<T>& v)
     : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
-    
+
     //! move constructor
     vec3( vec3<T> &&v)
     : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){}
@@ -60,6 +60,9 @@ public:
     //! implementation of difference of vec3
     vec3<T> operator-( const vec3<T>& v ) const noexcept{ return vec3<T>({x-v.x,y-v.y,z-v.z}); }
 
+    //! implementation of unary negative
+    vec3<T> operator-() const noexcept{ return vec3<T>({-x,-y,-z}); }
+
     //! implementation of scalar multiplication
     vec3<T> operator*( T s ) const noexcept{ return vec3<T>({x*s,y*s,z*s}); }
 

From 06fa3c128ec407843ef88b38954e85ab6b916293 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 1 Dec 2019 20:10:58 +0100
Subject: [PATCH 041/130] added interpolation to approximate infinite lattice

---
 include/particle_plt.hh | 108 ++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 66 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 92dea42..9c97694 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -62,6 +62,7 @@ private:
             0.0, 1.0, 0.5,
             0.0, 0.0, 0.5, 
         };
+        
         const mat3<real_t> mat_reciprocal_bcc{
             twopi, 0.0, 0.0,
             0.0, twopi, 0.0,
@@ -97,7 +98,7 @@ private:
         };
         
         //! select the properties for the chosen lattice
-        const int ilat = 1; // 0 = sc, 1 = bcc, 2 = fcc
+        const int ilat = 2; // 0 = sc, 1 = bcc, 2 = fcc
 
         const auto mat_bravais    = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
         const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
@@ -355,80 +356,55 @@ private:
                             }
                         }
 
+                        
                         endloop: ;
 
                     }
                 }
             }
 
-            // #pragma omp for
-            // for( size_t i=0; i<D_xx_.size(0); i++ )
-            // {
-            //     for( size_t j=0; j<D_xx_.size(1); j++ )
-            //     {
-            //         for( size_t k=0; k<D_xx_.size(2); k++ )
-            //         {
-            //             auto idx = D_xx_.get_idx(i,j,k);
-            //             vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-            //             // const real_t kmod  = kv.norm()/mapratio_/boxlen_;
+            mu1.kelem(0,0,0) = 1.0;
 
-            //             // put matrix elements into actual matrix
-            //             D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12;
-            //             D(0,1) = D(1,0) = std::real(D_xy_.kelem(i,j,k)) / fft_norm12;
-            //             D(0,2) = D(2,0) = std::real(D_xz_.kelem(i,j,k)) / fft_norm12;
-            //             D(1,1) = std::real(D_yy_.kelem(i,j,k)) / fft_norm12;
-            //             D(1,2) = D(2,1) = std::real(D_yz_.kelem(i,j,k)) / fft_norm12;
-            //             D(2,2) = std::real(D_zz_.kelem(i,j,k)) / fft_norm12;
-
-            //             // compute eigenstructure of matrix
-            //             D.eigen(eval, evec1, evec2, evec3);
-
-            //             D_xx_.kelem(i,j,k) = eval[2];
-            //             D_yy_.kelem(i,j,k) = eval[1];
-            //             D_zz_.kelem(i,j,k) = eval[0];
-
-            //             D_xy_.kelem(i,j,k) = evec3[0];
-            //             D_xz_.kelem(i,j,k) = evec3[1];
-            //             D_yz_.kelem(i,j,k) = evec3[2];
-
-                        
-            //             vec3<real_t> ar = kv / (twopi*ngrid_);
-            //             vec3<real_t> a(mat_reciprocal * ar);
-
-            //             // translate the k-vectors into the "candidate" FBZ
-            //             for( int l1=-numb; l1<=numb; ++l1 ){
-            //                 for( int l2=-numb; l2<=numb; ++l2 ){
-            //                     for( int l3=-numb; l3<=numb; ++l3 ){
-
-            //                         vectk_[idx] = a + mat_reciprocal * vec3<real_t>({real_t(l1),real_t(l2),real_t(l3)});
-
-            //                         if( check_FBZ( normals, vectk_[idx]) ){
-                                        
-            //                             vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi);
-            //                             vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi);
-            //                             vecitk_[idx][2] = std::round(vectk_[idx][2]*(ngrid_)/twopi);
-
-            //                             ico_[idx][0] = std::round((ar[0]+l1) * ngrid_);
-            //                             ico_[idx][1] = std::round((ar[1]+l2) * ngrid_);
-            //                             ico_[idx][2] = std::round((ar[2]+l3) * ngrid_);
-
-            //                             #pragma omp critical
-            //                             {
-            //                                 //ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
-            //                                 ofs2 << vecitk_[idx][0] << " " << vecitk_[idx][1] << " " << vecitk_[idx][2] << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl;
-                                            
-            //                             }
-            //                             //goto endloop;
-            //                         }
-            //                     }
-            //                 }
-            //             }                    endloop: ;
-            //         }
-            //     }
-            // }
+            //... approximate infinite lattice by inerpolating to sites not convered by current resolution...
+            if( ilat==1 ){
+                for( size_t i=0; i<D_xx_.size(0); i++ ){
+                    for( size_t j=0; j<D_xx_.size(1); j++ ){
+                        for( size_t k=0; k<D_xx_.size(2); k++ ){
+                            if( std::real(mu1.kelem(i,j,k)) < 0.0001 ){
+                                mu1.kelem(i,j,k) = 0.25 * (
+                                    mu1.kelem((nlattice+i-1)%nlattice,j,k)+ mu1.kelem((i+1)%nlattice,j,k)
+                                    + mu1.kelem(i,(j+nlattice-1)%nlattice,k) + mu1.kelem(i,(j+1)%nlattice,k) );
+                            }
+                        }
+                    }
+                }
+            }else if( ilat==2 ){
+                for( size_t i=0; i<D_xx_.size(0); i++ ){
+                    for( size_t j=0; j<D_xx_.size(1); j++ ){
+                        for( size_t k=0; k<D_xx_.size(2); k++ ){
+                            if( std::real(mu1.kelem(i,j,k)) < 0.0001 ){
+                                mu1.kelem(i,j,k) = 0.5 * (
+                                    mu1.kelem(i,(j+nlattice-1)%nlattice,k)
+                                    + mu1.kelem(i,(j+1)%nlattice,k) );
+                            }
+                        }
+                    }
+                }
+                for( size_t i=0; i<D_xx_.size(0); i++ ){
+                    for( size_t j=0; j<D_xx_.size(1); j++ ){
+                        for( size_t k=0; k<D_xx_.size(2); k++ ){
+                            if( std::real(mu1.kelem(i,j,k)) < 0.0001 ){
+                                mu1.kelem(i,j,k) = 0.5 * (
+                                    mu1.kelem((nlattice+i-1)%nlattice,j,k)
+                                    + mu1.kelem((i+1)%nlattice,j,k) );
+                            }
+                        }
+                    }
+                }
+            }
         }
 
-        mu1.kelem(0,0,0) = 1.0;
+        
         mu1.Write_to_HDF5("debug.hdf5","mu1");
         D_xy_.Write_to_HDF5("debug.hdf5","mu2");
         D_xz_.Write_to_HDF5("debug.hdf5","mu3");

From df791bbbc44dd1f3931261a7239f0de7c7781af5 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 2 Dec 2019 01:04:03 +0100
Subject: [PATCH 042/130] added interpolation of all fields, plt seems to work
 for all lattices, projection needs testing still

---
 include/mat3.hh         |  12 ++
 include/particle_plt.hh | 383 +++++++++++++++++++++++-----------------
 2 files changed, 235 insertions(+), 160 deletions(-)

diff --git a/include/mat3.hh b/include/mat3.hh
index 3c28f13..ac23069 100644
--- a/include/mat3.hh
+++ b/include/mat3.hh
@@ -157,3 +157,15 @@ vec3<T> operator*( const mat3<T> &A, const vec3<T> &v ) noexcept
     }
     return result;
 }
+
+// template<typename T>
+// vec3<T> operator*( const vec3<T> &v, const mat3<T> &A ) noexcept
+// {
+//     vec3<T> result = 0.0;
+//     for( int mu=0; mu<3; ++mu ){
+//         for( int nu=0; nu<3; ++nu ){
+//             result[nu] += v[mu]*A(mu,nu);
+//         }
+//     }
+//     return result;
+// }
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 9c97694..e48e356 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -23,12 +23,11 @@ private:
     const size_t ngmapto_, ngrid_, ngrid32_;
     const real_t mapratio_;
     Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
-    Grid_FFT<real_t,false> mu1;
     Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
     std::vector<vec3<real_t>> vectk_;
     std::vector<vec3<int>> ico_, vecitk_;
 
-    void init_D()
+    void init_D( lattice lattice_type )
     {
         constexpr real_t pi     = M_PI;
         constexpr real_t twopi  = 2.0*M_PI;
@@ -62,7 +61,6 @@ private:
             0.0, 1.0, 0.5,
             0.0, 0.0, 0.5, 
         };
-        
         const mat3<real_t> mat_reciprocal_bcc{
             twopi, 0.0, 0.0,
             0.0, twopi, 0.0,
@@ -98,17 +96,17 @@ private:
         };
         
         //! select the properties for the chosen lattice
-        const int ilat = 2; // 0 = sc, 1 = bcc, 2 = fcc
+        const int ilat = lattice_type; // 0 = sc, 1 = bcc, 2 = fcc
 
-        const auto mat_bravais    = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
-        const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
-        const auto normals        = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
-        const auto charge_fac     = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
+        const auto mat_bravais     = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
+        const auto mat_reciprocal  = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
+        const auto normals         = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
+        const auto charge_fac      = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
 
         const ptrdiff_t nlattice = ngrid_;
         const real_t dx = 1.0/real_t(nlattice);
 
-        const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells
+        const real_t eta = 4.0; // Ewald cutoff shall be 4 cells
         const real_t alpha = 1.0/std::sqrt(2)/eta;
         const real_t alpha2 = alpha*alpha;
         const real_t alpha3 = alpha2*alpha;
@@ -164,7 +162,7 @@ private:
         };
         
         constexpr ptrdiff_t lnumber = 3, knumber = 3;
-        const int numb = 1, numb2 = 2;
+        const int numb = 1; //!< search radius when shifting vectors into FBZ
 
         vectk_.assign(D_xx_.memsize(),vec3<real_t>());
         ico_.assign(D_xx_.memsize(),vec3<int>());
@@ -176,9 +174,9 @@ private:
             mat3<real_t> matD(0.0);
 
             #pragma omp for
-            for( size_t i=0; i<nlattice; ++i ){
-                for( size_t j=0; j<nlattice; ++j ){
-                    for( size_t k=0; k<nlattice; ++k ){
+            for( ptrdiff_t i=0; i<nlattice; ++i ){
+                for( ptrdiff_t j=0; j<nlattice; ++j ){
+                    for( ptrdiff_t k=0; k<nlattice; ++k ){
                         // compute lattice site vector from (i,j,k) multiplying Bravais base matrix, and wrap back to box
                         const vec3<real_t> x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)});
                         const vec3<real_t> ar = (mat_bravais * x_ijk).wrap_abs();
@@ -236,8 +234,8 @@ private:
         D_yy_.FourierTransformForward();
         D_yz_.FourierTransformForward();
         D_zz_.FourierTransformForward();
-        mu1.FourierTransformForward(false);
 
+#ifndef PRODUCTION
         if (CONFIG::MPI_task_rank == 0)
             unlink("debug.hdf5");
         D_xx_.Write_to_HDF5("debug.hdf5","Dxx");
@@ -248,169 +246,239 @@ private:
         D_zz_.Write_to_HDF5("debug.hdf5","Dzz");
 
         std::ofstream ofs2("test_brillouin.txt");
-        
-        #pragma omp parallel
+#endif
         {
-            // thread private matrix representation
-            mat3<real_t> D;
-            vec3<real_t> eval, evec1, evec2, evec3;
+            //!=== Make temporary copies before resorting to std. Fourier grid ========!//
+            Grid_FFT<real_t,false> 
+                temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+                temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+                temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0});
 
-            #pragma omp for
+            temp1.FourierTransformForward(false);
+            temp2.FourierTransformForward(false);
+            temp3.FourierTransformForward(false);
+            
+            #pragma omp parallel for
             for( size_t i=0; i<D_xx_.size(0); i++ )
             {
                 for( size_t j=0; j<D_xx_.size(1); j++ )
                 {
                     for( size_t k=0; k<D_xx_.size(2); k++ )
                     {
-                        auto idx = D_xx_.get_idx(i,j,k);
-                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-                        // const real_t kmod  = kv.norm()/mapratio_/boxlen_;
-
-                        // put matrix elements into actual matrix
-                        D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12;
-                        D(0,1) = D(1,0) = std::real(D_xy_.kelem(i,j,k)) / fft_norm12;
-                        D(0,2) = D(2,0) = std::real(D_xz_.kelem(i,j,k)) / fft_norm12;
-                        D(1,1) = std::real(D_yy_.kelem(i,j,k)) / fft_norm12;
-                        D(1,2) = D(2,1) = std::real(D_yz_.kelem(i,j,k)) / fft_norm12;
-                        D(2,2) = std::real(D_zz_.kelem(i,j,k)) / fft_norm12;
-
-                        // compute eigenstructure of matrix
-                        D.eigen(eval, evec1, evec2, evec3);
-
-                        // now determine to which modes on the regular lattice this contributes
-                        vec3<real_t> ar1 = kv / (twopi*ngrid_);
-                        vec3<real_t> ar2 = -kv / (twopi*ngrid_);
-                        
-                        vec3<real_t> a1(mat_reciprocal * ar1);
-                        vec3<real_t> a2(mat_reciprocal * ar2);
-
-                        // translate the k-vectors into the "candidate" FBZ
-                        for( int l1=-numb; l1<=numb; ++l1 ){
-                            for( int l2=-numb; l2<=numb; ++l2 ){
-                                for( int l3=-numb; l3<=numb; ++l3 ){
-
-                                    vectk_[idx] = a1 + mat_reciprocal * vec3<real_t>({real_t(l1),real_t(l2),real_t(l3)});
-
-                                    if( check_FBZ( normals, vectk_[idx]) ){
-                                        
-                                        int ix = std::round(vectk_[idx][0]*(ngrid_)/twopi);
-                                        int iy = std::round(vectk_[idx][1]*(ngrid_)/twopi);
-                                        int iz = std::round(vectk_[idx][2]*(ngrid_)/twopi);
-
-                                        // for( int k1=-numb2; k1<=numb2; ++k1 ){
-                                        //     for( int k2=-numb2; k2<=numb2; ++k2 ){
-                                        //         for( int k3=-numb2; k3<=numb2; ++k3 ){
-                                        {{{ int k1=0,k2=0,k3=0;
-
-                                                    auto d = mat_reciprocal * vec3<real_t>({real_t(k1),real_t(k2),real_t(k3)}) / twopi * ngrid_;
-                                                    int iix = ix;// + std::round(d.x);
-                                                    int iiy = iy;// + std::round(d.y);
-                                                    int iiz = iz;// + std::round(d.z);
-
-                                                    if( iix >= -nlattice/2 && iiy >= -nlattice/2 && iiz >= 0 &&
-                                                    // if( iix >= 0 && iiy >= 0 && iiz >= 0 &&
-                                                        iix < nlattice/2 && iiy < nlattice/2 && iiz <= nlattice/2){
-                                                        iix = (iix<0)? iix+nlattice : iix;
-                                                        iiy = (iiy<0)? iiy+nlattice : iiy;
-                                                        iiz = (iiz<0)? iiz+nlattice : iiz;
-                                                        mu1.kelem(iix,iiy,iiz) = eval[2];
-                                                    }
-                                                }
-                                            }
-                                        }
-                                    }
-
-                                    vectk_[idx] = a2 + mat_reciprocal * vec3<real_t>({real_t(l1),real_t(l2),real_t(l3)});
-
-                                    if( check_FBZ( normals, vectk_[idx]) ){
-                                        
-                                        int ix = std::round(vectk_[idx][0]*(ngrid_)/twopi);
-                                        int iy = std::round(vectk_[idx][1]*(ngrid_)/twopi);
-                                        int iz = std::round(vectk_[idx][2]*(ngrid_)/twopi);
-
-                                        // for( int k1=-numb; k1<=numb2; ++k1 ){
-                                        //     for( int k2=-numb; k2<=numb2; ++k2 ){
-                                        //         for( int k3=-numb; k3<=numb2; ++k3 ){
-                                        {{{ int k1=0,k2=0,k3=0;
-
-                                                    auto d = mat_reciprocal * vec3<real_t>({real_t(k1),real_t(k2),real_t(k3)}) / twopi * ngrid_;
-                                                    int iix = ix;// + std::round(d.x);
-                                                    int iiy = iy;// + std::round(d.y);
-                                                    int iiz = iz;// + std::round(d.z);
-
-                                        
-                                                    if( iix >= -nlattice/2 && iiy >= -nlattice/2 && iiz >= 0 &&
-                                                    // if( iix >= 0 && iiy >= 0 && iiz >= 0 &&
-                                                        iix < nlattice/2 && iiy < nlattice/2 && iiz <= nlattice/2){
-                                                        iix = (iix<0)? iix+nlattice : iix;
-                                                        iiy = (iiy<0)? iiy+nlattice : iiy;
-                                                        iiz = (iiz<0)? iiz+nlattice : iiz;
-                                                        mu1.kelem(iix,iiy,iiz) = eval[2];
-                                                    }
-                                                }
-                                            }
-                                        }
-
-                                    }
-                                }
-                            }
-                        }
-
-                        
-                        endloop: ;
-
+                        temp1.kelem(i,j,k) = ccomplex_t(std::real(D_xx_.kelem(i,j,k)),std::real(D_xy_.kelem(i,j,k)));
+                        temp2.kelem(i,j,k) = ccomplex_t(std::real(D_xz_.kelem(i,j,k)),std::real(D_yy_.kelem(i,j,k)));
+                        temp3.kelem(i,j,k) = ccomplex_t(std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k)));
                     }
                 }
             }
+            D_xx_.zero(); D_xy_.zero(); D_xz_.zero();
+            D_yy_.zero(); D_yz_.zero(); D_zz_.zero();
+            
+            //!=== Diagonalise and resort to std. Fourier grid ========!//
+            #pragma omp parallel 
+            {
+                // thread private matrix representation
+                mat3<real_t> D;
+                vec3<real_t> eval, evec1, evec2, evec3;
 
-            mu1.kelem(0,0,0) = 1.0;
+                #pragma omp for
+                for( size_t i=0; i<D_xx_.size(0); i++ )
+                {
+                    for( size_t j=0; j<D_xx_.size(1); j++ )
+                    {
+                        for( size_t k=0; k<D_xx_.size(2); k++ )
+                        {
+                            vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                            
+                            // put matrix elements into actual matrix
+                            D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12;
+                            D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12;
+                            D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12;
+                            D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12;
+                            D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12;
+                            D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12;
 
-            //... approximate infinite lattice by inerpolating to sites not convered by current resolution...
-            if( ilat==1 ){
-                for( size_t i=0; i<D_xx_.size(0); i++ ){
-                    for( size_t j=0; j<D_xx_.size(1); j++ ){
-                        for( size_t k=0; k<D_xx_.size(2); k++ ){
-                            if( std::real(mu1.kelem(i,j,k)) < 0.0001 ){
-                                mu1.kelem(i,j,k) = 0.25 * (
-                                    mu1.kelem((nlattice+i-1)%nlattice,j,k)+ mu1.kelem((i+1)%nlattice,j,k)
-                                    + mu1.kelem(i,(j+nlattice-1)%nlattice,k) + mu1.kelem(i,(j+1)%nlattice,k) );
-                            }
+                            // compute eigenstructure of matrix
+                            D.eigen(eval, evec1, evec2, evec3);
+
+                            auto vvv =  evec3 / (twopi*ngrid_);
+                            
+                            // now determine to which modes on the regular lattice this contributes
+                            vec3<real_t> ar1 = kv / (twopi*ngrid_);
+                            vec3<real_t> ar2 = -kv / (twopi*ngrid_);
+                            
+                            vec3<real_t> a1(mat_reciprocal * ar1);
+                            vec3<real_t> a2(mat_reciprocal * ar2);
+
+                            // translate the k-vectors into the "candidate" FBZ
+                            for( int l1=-numb; l1<=numb; ++l1 ){
+                                for( int l2=-numb; l2<=numb; ++l2 ){
+                                    for( int l3=-numb; l3<=numb; ++l3 ){
+                                        const vec3<real_t> vshift({real_t(l1),real_t(l2),real_t(l3)});
+
+                                        // first half of Fourier space (due to real trafo we only have half in memory)
+                                        vec3<real_t> vectk = a1 + mat_reciprocal * vshift;
+
+                                        if( check_FBZ( normals, vectk ) )
+                                        {
+                                            int ix = std::round(vectk.x*(ngrid_)/twopi);
+                                            int iy = std::round(vectk.y*(ngrid_)/twopi);
+                                            int iz = std::round(vectk.z*(ngrid_)/twopi);
+
+                                            if( ix >= -nlattice/2 && iy >= -nlattice/2 && iz >= 0 &&
+                                                ix < nlattice/2 && iy < nlattice/2 && iz <= nlattice/2){
+                                                    ix = (ix<0)? ix+nlattice : ix;
+                                                    iy = (iy<0)? iy+nlattice : iy;
+                                                    D_xx_.kelem(ix,iy,iz) = eval[2];
+                                                    D_xy_.kelem(ix,iy,iz) = eval[1];
+                                                    D_xz_.kelem(ix,iy,iz) = eval[0];
+                                                    D_yy_.kelem(ix,iy,iz) = vvv.x;
+                                                    D_yz_.kelem(ix,iy,iz) = vvv.y;
+                                                    D_zz_.kelem(ix,iy,iz) = vvv.z;
+                                            }
+                                        }
+                                        // second half of Fourier space (due to real trafo we only have half in memory)
+                                        vectk = a2 + mat_reciprocal * vshift;
+
+                                        if( check_FBZ( normals, vectk ) )
+                                        {
+                                            int ix = std::round(vectk.x*(ngrid_)/twopi);
+                                            int iy = std::round(vectk.y*(ngrid_)/twopi);
+                                            int iz = std::round(vectk.z*(ngrid_)/twopi);
+
+                                            if( ix >= -nlattice/2 && iy >= -nlattice/2 && iz >= 0 &&
+                                                ix < nlattice/2 && iy < nlattice/2 && iz <= nlattice/2){
+                                                    ix = (ix<0)? ix+nlattice : ix;
+                                                    iy = (iy<0)? iy+nlattice : iy;
+                                                    D_xx_.kelem(ix,iy,iz) = eval[2];
+                                                    D_xy_.kelem(ix,iy,iz) = eval[1];
+                                                    D_xz_.kelem(ix,iy,iz) = eval[0];
+                                                    D_yy_.kelem(ix,iy,iz) = vvv.x;
+                                                    D_yz_.kelem(ix,iy,iz) = vvv.y;
+                                                    D_zz_.kelem(ix,iy,iz) = vvv.z;
+                                            }
+                                        }
+                                    } //l3
+                                } //l2
+                            } //l1
+                        } //k
+                    } //j
+                } //i
+            }
+
+            D_xx_.kelem(0,0,0) = 1.0;
+            D_xy_.kelem(0,0,0) = 0.0;
+            D_xz_.kelem(0,0,0) = 0.0;
+        }
+
+        //... approximate infinite lattice by inerpolating to sites not convered by current resolution...
+        if( ilat==1 ){
+            #pragma omp parallel for
+            for( size_t i=0; i<D_xx_.size(0); i++ ){
+                for( size_t j=0; j<D_xx_.size(1); j++ ){
+                    for( size_t k=0; k<D_xx_.size(2); k++ ){
+                        if( std::real(D_xx_.kelem(i,j,k)) < 0.01 ){
+                            auto avg = [&]( const auto& D ) -> ccomplex_t {
+                                return 0.25 * (
+                                    D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k)
+                                    + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) );
+                            };
+                        
+                            D_xx_.kelem(i,j,k) = avg( D_xx_ );
+                            D_xy_.kelem(i,j,k) = avg( D_xy_ );
+                            D_xz_.kelem(i,j,k) = avg( D_xz_ );
+                            D_yy_.kelem(i,j,k) = avg( D_yy_ );
+                            D_yz_.kelem(i,j,k) = avg( D_yz_ );
+                            D_zz_.kelem(i,j,k) = avg( D_zz_ );
                         }
                     }
                 }
-            }else if( ilat==2 ){
-                for( size_t i=0; i<D_xx_.size(0); i++ ){
-                    for( size_t j=0; j<D_xx_.size(1); j++ ){
-                        for( size_t k=0; k<D_xx_.size(2); k++ ){
-                            if( std::real(mu1.kelem(i,j,k)) < 0.0001 ){
-                                mu1.kelem(i,j,k) = 0.5 * (
-                                    mu1.kelem(i,(j+nlattice-1)%nlattice,k)
-                                    + mu1.kelem(i,(j+1)%nlattice,k) );
-                            }
+            }
+        }else if( ilat==2 ){
+            #pragma omp parallel for
+            for( size_t i=0; i<D_xx_.size(0); i++ ){
+                for( size_t j=0; j<D_xx_.size(1); j++ ){
+                    for( size_t k=0; k<D_xx_.size(2); k++ ){
+                        if( std::abs(D_xx_.kelem(i,j,k)) < 0.01 ){
+                            auto avg = [&]( const auto& D ) -> ccomplex_t{
+                                return 0.5 * ( D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) );
+                            };
+
+                            D_xx_.kelem(i,j,k) = avg( D_xx_ );
+                            D_xy_.kelem(i,j,k) = avg( D_xy_ );
+                            D_xz_.kelem(i,j,k) = avg( D_xz_ );
+                            D_yy_.kelem(i,j,k) = avg( D_yy_ );
+                            D_yz_.kelem(i,j,k) = avg( D_yz_ );
+                            D_zz_.kelem(i,j,k) = avg( D_zz_ );
                         }
                     }
                 }
-                for( size_t i=0; i<D_xx_.size(0); i++ ){
-                    for( size_t j=0; j<D_xx_.size(1); j++ ){
-                        for( size_t k=0; k<D_xx_.size(2); k++ ){
-                            if( std::real(mu1.kelem(i,j,k)) < 0.0001 ){
-                                mu1.kelem(i,j,k) = 0.5 * (
-                                    mu1.kelem((nlattice+i-1)%nlattice,j,k)
-                                    + mu1.kelem((i+1)%nlattice,j,k) );
-                            }
+            }
+            #pragma omp parallel for
+            for( size_t i=0; i<D_xx_.size(0); i++ ){
+                for( size_t j=0; j<D_xx_.size(1); j++ ){
+                    for( size_t k=0; k<D_xx_.size(2); k++ ){
+                        if( std::abs(D_xx_.kelem(i,j,k)) < 0.01 ){
+                            auto avg = [&]( const auto& D ) -> ccomplex_t{
+                                return 0.5 * ( D.kelem((nlattice+i-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) );
+                            };
+
+                            D_xx_.kelem(i,j,k) = avg( D_xx_ );
+                            D_xy_.kelem(i,j,k) = avg( D_xy_ );
+                            D_xz_.kelem(i,j,k) = avg( D_xz_ );
+                            D_yy_.kelem(i,j,k) = avg( D_yy_ );
+                            D_yz_.kelem(i,j,k) = avg( D_yz_ );
+                            D_zz_.kelem(i,j,k) = avg( D_zz_ );
                         }
                     }
                 }
             }
         }
-
         
-        mu1.Write_to_HDF5("debug.hdf5","mu1");
+#ifdef PRODUCTION
+        #pragma omp parallel for
+        for( size_t i=0; i<D_xx_.size(0); i++ ){
+            for( size_t j=0; j<D_xx_.size(1); j++ ){
+                for( size_t k=0; k<D_xx_.size(2); k++ )
+                {
+                    vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                    const real_t kmod  = kv.norm()/mapratio_/boxlen_;
+
+                    double mu1 = std::real(D_xx_.kelem(i,j,k));
+                    double mu2 = std::real(D_xy_.kelem(i,j,k));
+                    double mu3 = std::real(D_xz_.kelem(i,j,k));
+
+                    vec<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))})
+ 
+                    // store in diagonal components of D_ij
+                    D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec1.x;
+                    D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec1.y;
+                    D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec1.z;
+
+                    auto norm = (kv.norm()/kv.dot(evec1));
+                    if ( std::abs(kv.dot(evec1)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0;
+
+                    D_xx_.kelem(i,j,k) *= norm;
+                    D_yy_.kelem(i,j,k) *= norm;
+                    D_zz_.kelem(i,j,k) *= norm;
+
+                    // spatially dependent correction to vfact = \dot{D_+}/D_+
+                    D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
+                }
+            }
+        }
+        D_xy_.kelem(0,0,0) = 1.0;
+#else
+        D_xx_.Write_to_HDF5("debug.hdf5","mu1");
         D_xy_.Write_to_HDF5("debug.hdf5","mu2");
         D_xz_.Write_to_HDF5("debug.hdf5","mu3");
         D_yy_.Write_to_HDF5("debug.hdf5","e1x");
         D_yz_.Write_to_HDF5("debug.hdf5","e1y");
         D_zz_.Write_to_HDF5("debug.hdf5","e1z");
+#endif
+
+        
+        
     }
 
     void init_D__old()
@@ -586,15 +654,16 @@ private:
                 {
                     for( size_t k=0; k<D_xx_.size(2); k++ )
                     {
-                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-                        const real_t kmod  = kv.norm()/mapratio_/boxlen_;
-
                         // put matrix elements into actual matrix
                         D = { std::real(D_xx_.kelem(i,j,k)), std::real(D_xy_.kelem(i,j,k)), std::real(D_xz_.kelem(i,j,k)),
                               std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) };
                         
                         // compute eigenstructure of matrix
                         D.eigen(eval, evec1, evec2, evec3);
+                        
+#ifdef PRODUCTION
+                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                        const real_t kmod  = kv.norm()/mapratio_/boxlen_;
 
                         // store in diagonal components of D_ij
                         D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec3.x;
@@ -603,7 +672,7 @@ private:
 
                         auto norm = (kv.norm()/kv.dot(evec3));
                         if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; 
-#ifdef PRODUCTION
+
                         D_xx_.kelem(i,j,k) *= norm;
                         D_yy_.kelem(i,j,k) *= norm;
                         D_zz_.kelem(i,j,k) *= norm;
@@ -655,8 +724,7 @@ public:
       D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
-      grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
-      mu1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
+      grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
     { 
         csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
         std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
@@ -666,12 +734,7 @@ public:
             : ((lattice_str=="rsc")? lattice_rsc 
             : lattice_sc)));
 
-        if( lattice_type != lattice_sc){
-            csoca::elog << "PLT not implemented for chosen lattice type! Currently only SC." << std::endl;
-            abort();
-        }
-
-        csoca::ilog << "PLT corrections for SC lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
+        csoca::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
 
 // #if defined(USE_MPI)
 //         if( CONFIG::MPI_task_size>1 )
@@ -684,7 +747,7 @@ public:
         double wtime = get_wtime();
         csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
         
-        init_D();
+        init_D( lattice_type );
         // init_D__old();
 
         csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;

From beb40bfc352ad848430bc897d52af8ad9577ecb4 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 2 Dec 2019 18:47:34 +0100
Subject: [PATCH 043/130] added non-PLT back into the game. has to be switched
 by hand in ic_generator.cc right now...

---
 include/general.hh      |  2 ++
 include/operators.hh    | 29 +++++++++++++++++++++++++++++
 include/particle_plt.hh |  4 ++--
 src/ic_generator.cc     |  3 ++-
 4 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/include/general.hh b/include/general.hh
index 71e521a..c77be01 100644
--- a/include/general.hh
+++ b/include/general.hh
@@ -12,6 +12,8 @@
   #include <fftw3.h>
 #endif
 
+#include <config_file.hh>
+
 #ifdef USE_SINGLEPRECISION
 using real_t = float;
 using complex_t = fftwf_complex;
diff --git a/include/operators.hh b/include/operators.hh
index 63d94f4..83e17dc 100644
--- a/include/operators.hh
+++ b/include/operators.hh
@@ -1,7 +1,11 @@
 #pragma once
 
+#include <general.hh>
+
 namespace op{
 
+//!== long list of primitive operators to work on fields ==!//
+
 template< typename field>
 inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };}
 
@@ -20,4 +24,29 @@ inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };
 template< typename field>
 inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -= 2*v; };}
 
+//! vanilla standard gradient
+class fourier_gradient{
+private:
+    real_t boxlen_, k0_;
+    ptrdiff_t n_, nhalf_;
+public:
+    explicit fourier_gradient( const ConfigFile& the_config )
+    : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
+      n_( the_config.GetValue<size_t>("setup","GridRes") ),
+      nhalf_( n_/2 ),
+      k0_(2.0*M_PI/boxlen_)
+    {}
+
+    inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
+    {
+        real_t rgrad = 
+            (ijk[idim]!=nhalf_)? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_) * n_) : 0.0; 
+        return ccomplex_t(0.0,rgrad * k0_);
+    }
+
+    inline real_t vfac_corr( std::array<size_t,3> ijk ) const
+    {
+        return 1.0;
+    }
+};
 }
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index e48e356..9bed249 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -761,10 +761,10 @@ public:
         return D_zz_.get_cic_kspace({ix,iy,iz});
     }
 
-    inline ccomplex_t vfac_corr( std::array<size_t,3> ijk ) const
+    inline real_t vfac_corr( std::array<size_t,3> ijk ) const
     {
         real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
-        return D_xy_.get_cic_kspace({ix,iy,iz});
+        return std::real(D_xy_.get_cic_kspace({ix,iy,iz}));
     }
 
 };
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 4394947..56566ff 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -168,7 +168,8 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
     // Create PLT gradient operator
     //--------------------------------------------------------------------
-    particle::lattice_gradient lg( the_config );
+    // particle::lattice_gradient lg( the_config );
+    op::fourier_gradient lg( the_config );
 
     //--------------------------------------------------------------------
     std::vector<cosmo_species> species_list;

From d8cbc4fca681395cc50dee85838e9f97e7a48f00 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Wed, 4 Dec 2019 14:26:42 +0100
Subject: [PATCH 044/130] improved PLT field interpolation, still has problems
 for FCC though

---
 include/operators.hh    |   6 +-
 include/particle_plt.hh | 171 ++++++++++++++++++++++++++--------------
 include/vec3.hh         |  12 +--
 3 files changed, 119 insertions(+), 70 deletions(-)

diff --git a/include/operators.hh b/include/operators.hh
index 83e17dc..be6d1f7 100644
--- a/include/operators.hh
+++ b/include/operators.hh
@@ -28,13 +28,13 @@ inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -=
 class fourier_gradient{
 private:
     real_t boxlen_, k0_;
-    ptrdiff_t n_, nhalf_;
+    size_t n_, nhalf_;
 public:
     explicit fourier_gradient( const ConfigFile& the_config )
     : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
+      k0_(2.0*M_PI/boxlen_),
       n_( the_config.GetValue<size_t>("setup","GridRes") ),
-      nhalf_( n_/2 ),
-      k0_(2.0*M_PI/boxlen_)
+      nhalf_( n_/2 )
     {}
 
     inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 9bed249..6e8d280 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -12,7 +12,7 @@
 #include <grid_fft.hh>
 #include <mat3.hh>
 
-// #define PRODUCTION
+#define PRODUCTION
 
 namespace particle{
 //! implement Marcos et al. PLT calculation
@@ -27,6 +27,14 @@ private:
     std::vector<vec3<real_t>> vectk_;
     std::vector<vec3<int>> ico_, vecitk_;
 
+    bool is_even( int i ){ return (i%2)==0; }
+
+    bool is_in( int i, int j, int k, const mat3<int>& M ){
+        vec3<int> v({i,j,k});
+        auto vv = M * v;
+        return is_even(vv.x)&&is_even(vv.y)&&is_even(vv.z);
+    }
+
     void init_D( lattice lattice_type )
     {
         constexpr real_t pi     = M_PI;
@@ -47,6 +55,11 @@ private:
             0.0, twopi, 0.0,
             0.0, 0.0, twopi,
         };
+        const mat3<int> mat_invrecip_sc{
+            2, 0, 0,
+            0, 2, 0,
+            0, 0, 2,
+        };
         const std::vector<vec3<real_t>> normals_sc{
             {pi,0.,0.},{-pi,0.,0.},
             {0.,pi,0.},{0.,-pi,0.},
@@ -66,6 +79,11 @@ private:
             0.0, twopi, 0.0,
             -twopi, -twopi, fourpi,
         };
+        const mat3<int> mat_invrecip_bcc{
+            2, 0, 0,
+            0, 2, 0,
+            1, 1, 1,
+        };
         const std::vector<vec3<real_t>> normals_bcc{
             {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
             {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
@@ -85,6 +103,11 @@ private:
             0.0, 0.0, twopi,
             fourpi, 0.0, -twopi,
         };
+        const mat3<int> mat_invrecip_fcc{
+            0, 1, 1,
+            1, 0, 1,
+            0, 2, 0,
+        };
         const std::vector<vec3<real_t>> normals_fcc{
             {twopi,0.,0.},{-twopi,0.,0.},
             {0.,twopi,0.},{0.,-twopi,0.},
@@ -100,6 +123,7 @@ private:
 
         const auto mat_bravais     = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
         const auto mat_reciprocal  = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
+        const auto mat_invrecip    = (ilat==2)? mat_invrecip_fcc : (ilat==1)? mat_invrecip_bcc : mat_invrecip_sc;
         const auto normals         = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
         const auto charge_fac      = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
 
@@ -300,9 +324,8 @@ private:
 
                             // compute eigenstructure of matrix
                             D.eigen(eval, evec1, evec2, evec3);
+                            evec3 /= (twopi*ngrid_);
 
-                            auto vvv =  evec3 / (twopi*ngrid_);
-                            
                             // now determine to which modes on the regular lattice this contributes
                             vec3<real_t> ar1 = kv / (twopi*ngrid_);
                             vec3<real_t> ar2 = -kv / (twopi*ngrid_);
@@ -324,17 +347,17 @@ private:
                                             int ix = std::round(vectk.x*(ngrid_)/twopi);
                                             int iy = std::round(vectk.y*(ngrid_)/twopi);
                                             int iz = std::round(vectk.z*(ngrid_)/twopi);
-
-                                            if( ix >= -nlattice/2 && iy >= -nlattice/2 && iz >= 0 &&
-                                                ix < nlattice/2 && iy < nlattice/2 && iz <= nlattice/2){
+                                            if( ix > -nlattice/2 && iy > -nlattice/2 && iz >= 0 &&
+                                                ix <= nlattice/2 && iy <= nlattice/2 && iz <= nlattice/2){
                                                     ix = (ix<0)? ix+nlattice : ix;
                                                     iy = (iy<0)? iy+nlattice : iy;
+                                                    real_t sign = (evec3.dot(vectk) >= 0.0)?1.0:-1.0;
                                                     D_xx_.kelem(ix,iy,iz) = eval[2];
                                                     D_xy_.kelem(ix,iy,iz) = eval[1];
                                                     D_xz_.kelem(ix,iy,iz) = eval[0];
-                                                    D_yy_.kelem(ix,iy,iz) = vvv.x;
-                                                    D_yz_.kelem(ix,iy,iz) = vvv.y;
-                                                    D_zz_.kelem(ix,iy,iz) = vvv.z;
+                                                    D_yy_.kelem(ix,iy,iz) = evec3.x*sign;
+                                                    D_yz_.kelem(ix,iy,iz) = evec3.y*sign;
+                                                    D_zz_.kelem(ix,iy,iz) = evec3.z*sign;
                                             }
                                         }
                                         // second half of Fourier space (due to real trafo we only have half in memory)
@@ -345,17 +368,17 @@ private:
                                             int ix = std::round(vectk.x*(ngrid_)/twopi);
                                             int iy = std::round(vectk.y*(ngrid_)/twopi);
                                             int iz = std::round(vectk.z*(ngrid_)/twopi);
-
-                                            if( ix >= -nlattice/2 && iy >= -nlattice/2 && iz >= 0 &&
-                                                ix < nlattice/2 && iy < nlattice/2 && iz <= nlattice/2){
+                                            if( ix > -nlattice/2 && iy > -nlattice/2 && iz >= 0 &&
+                                                ix <= nlattice/2 && iy <= nlattice/2 && iz <= nlattice/2){
                                                     ix = (ix<0)? ix+nlattice : ix;
                                                     iy = (iy<0)? iy+nlattice : iy;
+                                                    real_t sign = (evec3.dot(vectk) >= 0.0)?1.0:-1.0;
                                                     D_xx_.kelem(ix,iy,iz) = eval[2];
                                                     D_xy_.kelem(ix,iy,iz) = eval[1];
                                                     D_xz_.kelem(ix,iy,iz) = eval[0];
-                                                    D_yy_.kelem(ix,iy,iz) = vvv.x;
-                                                    D_yz_.kelem(ix,iy,iz) = vvv.y;
-                                                    D_zz_.kelem(ix,iy,iz) = vvv.z;
+                                                    D_yy_.kelem(ix,iy,iz) = evec3.x*sign;
+                                                    D_yz_.kelem(ix,iy,iz) = evec3.y*sign;
+                                                    D_zz_.kelem(ix,iy,iz) = evec3.z*sign;
                                             }
                                         }
                                     } //l3
@@ -369,6 +392,10 @@ private:
             D_xx_.kelem(0,0,0) = 1.0;
             D_xy_.kelem(0,0,0) = 0.0;
             D_xz_.kelem(0,0,0) = 0.0;
+
+            D_yy_.kelem(0,0,0) = 1.0;
+            D_yz_.kelem(0,0,0) = 0.0;
+            D_zz_.kelem(0,0,0) = 0.0;
         }
 
         //... approximate infinite lattice by inerpolating to sites not convered by current resolution...
@@ -377,11 +404,20 @@ private:
             for( size_t i=0; i<D_xx_.size(0); i++ ){
                 for( size_t j=0; j<D_xx_.size(1); j++ ){
                     for( size_t k=0; k<D_xx_.size(2); k++ ){
-                        if( std::real(D_xx_.kelem(i,j,k)) < 0.01 ){
+                        if( !is_in(i,j,k,mat_invrecip_bcc)  ){
                             auto avg = [&]( const auto& D ) -> ccomplex_t {
-                                return 0.25 * (
-                                    D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k)
-                                    + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) );
+                                if( k>0 && k< size_t(nlattice/2) ) return 1.0/6.0 * (
+                                        D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k)
+                                        + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) 
+                                        + D.kelem(i,j,k-1) + D.kelem(i,j,k+1) );
+                                if( k==0 ) return 1.0/6.0 * (
+                                        D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k)
+                                        + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) 
+                                        + D.kelem(i,j,k+1) + D.kelem(i,j,k+1) );
+                                return 1.0/6.0 * (
+                                        D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k)
+                                        + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) 
+                                        + D.kelem(i,j,k-1) + D.kelem(i,j,k-1) );
                             };
                         
                             D_xx_.kelem(i,j,k) = avg( D_xx_ );
@@ -399,30 +435,17 @@ private:
             for( size_t i=0; i<D_xx_.size(0); i++ ){
                 for( size_t j=0; j<D_xx_.size(1); j++ ){
                     for( size_t k=0; k<D_xx_.size(2); k++ ){
-                        if( std::abs(D_xx_.kelem(i,j,k)) < 0.01 ){
+                        if( !is_in( i, j, k, mat_invrecip_fcc)  ){
                             auto avg = [&]( const auto& D ) -> ccomplex_t{
-                                return 0.5 * ( D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) );
-                            };
-
-                            D_xx_.kelem(i,j,k) = avg( D_xx_ );
-                            D_xy_.kelem(i,j,k) = avg( D_xy_ );
-                            D_xz_.kelem(i,j,k) = avg( D_xz_ );
-                            D_yy_.kelem(i,j,k) = avg( D_yy_ );
-                            D_yz_.kelem(i,j,k) = avg( D_yz_ );
-                            D_zz_.kelem(i,j,k) = avg( D_zz_ );
-                        }
-                    }
-                }
-            }
-            #pragma omp parallel for
-            for( size_t i=0; i<D_xx_.size(0); i++ ){
-                for( size_t j=0; j<D_xx_.size(1); j++ ){
-                    for( size_t k=0; k<D_xx_.size(2); k++ ){
-                        if( std::abs(D_xx_.kelem(i,j,k)) < 0.01 ){
-                            auto avg = [&]( const auto& D ) -> ccomplex_t{
-                                return 0.5 * ( D.kelem((nlattice+i-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) );
-                            };
-
+                                if( is_in( (i+1)%nlattice, j, k, mat_invrecip_fcc ) ){
+                                    return 0.5 * ( D.kelem((i+nlattice-1)%nlattice,j,k) + D.kelem((i+1)%nlattice,j,k) );
+                                }else if( is_in( i, (j+1)%nlattice, k, mat_invrecip_fcc ) ){
+                                    return 0.5 * ( D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) );
+                                }//else//
+                                if( k>0 && k< size_t(nlattice/2) ) return 0.5 * ( D.kelem(i,j,k-1) + D.kelem(i,j,k+1) );
+                                if( k==0 ) return D.kelem(i,j,k+1);
+                                return D.kelem(i,j,k-1);
+                            };  
                             D_xx_.kelem(i,j,k) = avg( D_xx_ );
                             D_xy_.kelem(i,j,k) = avg( D_xy_ );
                             D_xz_.kelem(i,j,k) = avg( D_xz_ );
@@ -441,33 +464,62 @@ private:
             for( size_t j=0; j<D_xx_.size(1); j++ ){
                 for( size_t k=0; k<D_xx_.size(2); k++ )
                 {
+                    int ii = (i>size_t(nlattice/2))? int(i)-nlattice : i;
+                    int jj = (j>size_t(nlattice/2))? int(j)-nlattice : j;
+                    
                     vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
                     const real_t kmod  = kv.norm()/mapratio_/boxlen_;
 
                     double mu1 = std::real(D_xx_.kelem(i,j,k));
-                    double mu2 = std::real(D_xy_.kelem(i,j,k));
-                    double mu3 = std::real(D_xz_.kelem(i,j,k));
+                    // double mu2 = std::real(D_xy_.kelem(i,j,k));
+                    // double mu3 = std::real(D_xz_.kelem(i,j,k));
 
-                    vec<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))})
- 
-                    // store in diagonal components of D_ij
-                    D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec1.x;
-                    D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec1.y;
-                    D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec1.z;
+                    vec3<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))});
+                    evec1 /= evec1.norm();
 
-                    auto norm = (kv.norm()/kv.dot(evec1));
-                    if ( std::abs(kv.dot(evec1)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0;
+                    if(std::abs(ii)+std::abs(jj)+k<8){
+                        // small k modes, use usual pseudospectral derivative
+                        // -- store in diagonal components of D_ij
+                        D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_);
+                        D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_);
+                        D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_);
 
-                    D_xx_.kelem(i,j,k) *= norm;
-                    D_yy_.kelem(i,j,k) *= norm;
-                    D_zz_.kelem(i,j,k) *= norm;
+                        // spatially dependent correction to vfact = \dot{D_+}/D_+
+                        D_xy_.kelem(i,j,k) = 1.0;
+                    }else{
+                        // large k modes, use interpolated PLT results
+                        // -- store in diagonal components of D_ij
+                        D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.x * kmod);
+                        D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.y * kmod);
+                        D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.z * kmod);
 
-                    // spatially dependent correction to vfact = \dot{D_+}/D_+
-                    D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
+                        // re-normalise to that longitudinal amplitude is exact
+                        auto kv_dot_e1 = (kv.norm()>1e-8)?kv.dot(evec1):kv.norm();
+                        auto norm = (kv.norm()/kv_dot_e1);
+                        D_xx_.kelem(i,j,k) *= norm;
+                        D_yy_.kelem(i,j,k) *= norm;
+                        D_zz_.kelem(i,j,k) *= norm;
+
+                        // spatially dependent correction to vfact = \dot{D_+}/D_+
+                        D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
+                    }
+                    if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0;
+                    if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0;
+                    if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0;
                 }
             }
         }
         D_xy_.kelem(0,0,0) = 1.0;
+        D_xx_.kelem(0,0,0) = 0.0;
+        D_yy_.kelem(0,0,0) = 0.0;
+        D_zz_.kelem(0,0,0) = 0.0;
+
+        // unlink("debug.hdf5");
+        // D_xy_.Write_to_HDF5("debug.hdf5","mu1");
+        // D_xx_.Write_to_HDF5("debug.hdf5","e1x");
+        // D_yy_.Write_to_HDF5("debug.hdf5","e1y");
+        // D_zz_.Write_to_HDF5("debug.hdf5","e1z");
+
 #else
         D_xx_.Write_to_HDF5("debug.hdf5","mu1");
         D_xy_.Write_to_HDF5("debug.hdf5","mu2");
@@ -475,10 +527,7 @@ private:
         D_yy_.Write_to_HDF5("debug.hdf5","e1x");
         D_yz_.Write_to_HDF5("debug.hdf5","e1y");
         D_zz_.Write_to_HDF5("debug.hdf5","e1z");
-#endif
-
-        
-        
+#endif   
     }
 
     void init_D__old()
diff --git a/include/vec3.hh b/include/vec3.hh
index af2bb9a..3d45c10 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -51,9 +51,6 @@ public:
     // assignment operator
     vec3<T>& operator=( const vec3<T>& v ) noexcept { data_=v.data_; return *this; }
 
-    // assignment operator
-    const vec3<T>& operator=( const vec3<T>& v ) const noexcept{ data_=v.data_; return *this; }
-
     //! implementation of summation of vec3
     vec3<T> operator+( const vec3<T>& v ) const noexcept{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
 
@@ -70,14 +67,17 @@ public:
     vec3<T> operator/( T s ) const noexcept{ return vec3<T>({x/s,y/s,z/s}); }
 
     //! implementation of += operator
-    vec3<T>& operator+=( const vec3<T>& v ) const noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; }
+    vec3<T>& operator+=( const vec3<T>& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; }
 
     //! implementation of -= operator
-    vec3<T>& operator-=( const vec3<T>& v ) const noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; }
+    vec3<T>& operator-=( const vec3<T>& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; }
 
     //! multiply with scalar
-    vec3<T>& operator*=( T s ) const noexcept{ x*=s; y*=s; z*=s; return *this; }
+    vec3<T>& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; }
     
+    //! divide by scalar
+    vec3<T>& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; }
+
     //! compute dot product with another vector
     T dot(const vec3<T> &a) const noexcept
     {

From 6a998123c7cc179d3261dbe28155e1ad44ce3887 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 5 Dec 2019 05:43:49 +0100
Subject: [PATCH 045/130] improved implementation of plt field interpolation

---
 include/particle_plt.hh | 300 ++++++++++++++++++----------------------
 include/vec3.hh         |   8 ++
 2 files changed, 144 insertions(+), 164 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 6e8d280..6671028 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -7,6 +7,7 @@
 #include <fstream>
 
 #include <random>
+#include <map>
 
 #include <particle_generator.hh>
 #include <grid_fft.hh>
@@ -15,7 +16,7 @@
 #define PRODUCTION
 
 namespace particle{
-//! implement Marcos et al. PLT calculation
+//! implement Joyce, Marcos et al. PLT calculation
 
 class lattice_gradient{
 private:
@@ -271,189 +272,161 @@ private:
 
         std::ofstream ofs2("test_brillouin.txt");
 #endif
-        {
-            //!=== Make temporary copies before resorting to std. Fourier grid ========!//
-            Grid_FFT<real_t,false> 
-                temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
-                temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
-                temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0});
-
-            temp1.FourierTransformForward(false);
-            temp2.FourierTransformForward(false);
-            temp3.FourierTransformForward(false);
+        using map_t = std::map<vec3<int>,size_t>;
+        map_t iimap;
             
-            #pragma omp parallel for
+        //!=== Make temporary copies before resorting to std. Fourier grid ========!//
+        Grid_FFT<real_t,false> 
+            temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+            temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
+            temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0});
+
+        temp1.FourierTransformForward(false);
+        temp2.FourierTransformForward(false);
+        temp3.FourierTransformForward(false);
+            
+        #pragma omp parallel for
+        for( size_t i=0; i<D_xx_.size(0); i++ )
+        {
+            for( size_t j=0; j<D_xx_.size(1); j++ )
+            {
+                for( size_t k=0; k<D_xx_.size(2); k++ )
+                {
+                    temp1.kelem(i,j,k) = ccomplex_t(std::real(D_xx_.kelem(i,j,k)),std::real(D_xy_.kelem(i,j,k)));
+                    temp2.kelem(i,j,k) = ccomplex_t(std::real(D_xz_.kelem(i,j,k)),std::real(D_yy_.kelem(i,j,k)));
+                    temp3.kelem(i,j,k) = ccomplex_t(std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k)));
+                }
+            }
+        }
+        D_xx_.zero(); D_xy_.zero(); D_xz_.zero();
+        D_yy_.zero(); D_yz_.zero(); D_zz_.zero();
+
+        
+        //!=== Diagonalise and resort to std. Fourier grid ========!//
+        #pragma omp parallel 
+        {
+            // thread private matrix representation
+            mat3<real_t> D;
+            vec3<real_t> eval, evec1, evec2, evec3;
+
+            #pragma omp for
             for( size_t i=0; i<D_xx_.size(0); i++ )
             {
                 for( size_t j=0; j<D_xx_.size(1); j++ )
                 {
                     for( size_t k=0; k<D_xx_.size(2); k++ )
                     {
-                        temp1.kelem(i,j,k) = ccomplex_t(std::real(D_xx_.kelem(i,j,k)),std::real(D_xy_.kelem(i,j,k)));
-                        temp2.kelem(i,j,k) = ccomplex_t(std::real(D_xz_.kelem(i,j,k)),std::real(D_yy_.kelem(i,j,k)));
-                        temp3.kelem(i,j,k) = ccomplex_t(std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k)));
-                    }
-                }
-            }
-            D_xx_.zero(); D_xy_.zero(); D_xz_.zero();
-            D_yy_.zero(); D_yz_.zero(); D_zz_.zero();
-            
-            //!=== Diagonalise and resort to std. Fourier grid ========!//
-            #pragma omp parallel 
-            {
-                // thread private matrix representation
-                mat3<real_t> D;
-                vec3<real_t> eval, evec1, evec2, evec3;
+                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                        
+                        // put matrix elements into actual matrix
+                        D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12;
+                        D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12;
+                        D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12;
+                        D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12;
+                        D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12;
+                        D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12;
 
-                #pragma omp for
-                for( size_t i=0; i<D_xx_.size(0); i++ )
-                {
-                    for( size_t j=0; j<D_xx_.size(1); j++ )
-                    {
-                        for( size_t k=0; k<D_xx_.size(2); k++ )
-                        {
-                            vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-                            
-                            // put matrix elements into actual matrix
-                            D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12;
-                            D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12;
-                            D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12;
-                            D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12;
-                            D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12;
-                            D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12;
+                        // compute eigenstructure of matrix
+                        D.eigen(eval, evec1, evec2, evec3);
+                        evec3 /= (twopi*ngrid_);
 
-                            // compute eigenstructure of matrix
-                            D.eigen(eval, evec1, evec2, evec3);
-                            evec3 /= (twopi*ngrid_);
-
-                            // now determine to which modes on the regular lattice this contributes
-                            vec3<real_t> ar1 = kv / (twopi*ngrid_);
-                            vec3<real_t> ar2 = -kv / (twopi*ngrid_);
-                            
-                            vec3<real_t> a1(mat_reciprocal * ar1);
-                            vec3<real_t> a2(mat_reciprocal * ar2);
-
-                            // translate the k-vectors into the "candidate" FBZ
-                            for( int l1=-numb; l1<=numb; ++l1 ){
-                                for( int l2=-numb; l2<=numb; ++l2 ){
-                                    for( int l3=-numb; l3<=numb; ++l3 ){
+                        // now determine to which modes on the regular lattice this contributes
+                        vec3<real_t> ar = kv / (twopi*ngrid_);
+                        vec3<real_t> a(mat_reciprocal * ar);
+                        
+                        // translate the k-vectors into the "candidate" FBZ
+                        for( int l1=-numb; l1<=numb; ++l1 ){
+                            for( int l2=-numb; l2<=numb; ++l2 ){
+                                for( int l3=-numb; l3<=numb; ++l3 ){
+                                    // need both halfs of Fourier space since we use real transforms
+                                    for( int isign=0; isign<=1; ++isign ){
+                                        real_t sign = (isign==0)? +1.0 : -1.0;
                                         const vec3<real_t> vshift({real_t(l1),real_t(l2),real_t(l3)});
 
-                                        // first half of Fourier space (due to real trafo we only have half in memory)
-                                        vec3<real_t> vectk = a1 + mat_reciprocal * vshift;
+                                        vec3<real_t> vectk = sign * a + mat_reciprocal * vshift;
 
                                         if( check_FBZ( normals, vectk ) )
                                         {
                                             int ix = std::round(vectk.x*(ngrid_)/twopi);
                                             int iy = std::round(vectk.y*(ngrid_)/twopi);
                                             int iz = std::round(vectk.z*(ngrid_)/twopi);
-                                            if( ix > -nlattice/2 && iy > -nlattice/2 && iz >= 0 &&
-                                                ix <= nlattice/2 && iy <= nlattice/2 && iz <= nlattice/2){
-                                                    ix = (ix<0)? ix+nlattice : ix;
-                                                    iy = (iy<0)? iy+nlattice : iy;
-                                                    real_t sign = (evec3.dot(vectk) >= 0.0)?1.0:-1.0;
-                                                    D_xx_.kelem(ix,iy,iz) = eval[2];
-                                                    D_xy_.kelem(ix,iy,iz) = eval[1];
-                                                    D_xz_.kelem(ix,iy,iz) = eval[0];
-                                                    D_yy_.kelem(ix,iy,iz) = evec3.x*sign;
-                                                    D_yz_.kelem(ix,iy,iz) = evec3.y*sign;
-                                                    D_zz_.kelem(ix,iy,iz) = evec3.z*sign;
-                                            }
+
+                                            #pragma omp critical
+                                            {iimap.insert( std::pair<vec3<int>,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );}
+
+                                            temp1.kelem(i,j,k) = ccomplex_t(eval[2],eval[1]);
+                                            temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3.x);
+                                            temp3.kelem(i,j,k) = ccomplex_t(evec3.y,evec3.z);
                                         }
-                                        // second half of Fourier space (due to real trafo we only have half in memory)
-                                        vectk = a2 + mat_reciprocal * vshift;
-
-                                        if( check_FBZ( normals, vectk ) )
-                                        {
-                                            int ix = std::round(vectk.x*(ngrid_)/twopi);
-                                            int iy = std::round(vectk.y*(ngrid_)/twopi);
-                                            int iz = std::round(vectk.z*(ngrid_)/twopi);
-                                            if( ix > -nlattice/2 && iy > -nlattice/2 && iz >= 0 &&
-                                                ix <= nlattice/2 && iy <= nlattice/2 && iz <= nlattice/2){
-                                                    ix = (ix<0)? ix+nlattice : ix;
-                                                    iy = (iy<0)? iy+nlattice : iy;
-                                                    real_t sign = (evec3.dot(vectk) >= 0.0)?1.0:-1.0;
-                                                    D_xx_.kelem(ix,iy,iz) = eval[2];
-                                                    D_xy_.kelem(ix,iy,iz) = eval[1];
-                                                    D_xz_.kelem(ix,iy,iz) = eval[0];
-                                                    D_yy_.kelem(ix,iy,iz) = evec3.x*sign;
-                                                    D_yz_.kelem(ix,iy,iz) = evec3.y*sign;
-                                                    D_zz_.kelem(ix,iy,iz) = evec3.z*sign;
-                                            }
-                                        }
-                                    } //l3
-                                } //l2
-                            } //l1
-                        } //k
-                    } //j
-                } //i
-            }
-
-            D_xx_.kelem(0,0,0) = 1.0;
-            D_xy_.kelem(0,0,0) = 0.0;
-            D_xz_.kelem(0,0,0) = 0.0;
-
-            D_yy_.kelem(0,0,0) = 1.0;
-            D_yz_.kelem(0,0,0) = 0.0;
-            D_zz_.kelem(0,0,0) = 0.0;
+                                    }//sign
+                                } //l3
+                            } //l2
+                        } //l1
+                    } //k
+                } //j
+            } //i
         }
 
+        D_xx_.kelem(0,0,0) = 1.0;
+        D_xy_.kelem(0,0,0) = 0.0;
+        D_xz_.kelem(0,0,0) = 0.0;
+
+        D_yy_.kelem(0,0,0) = 1.0;
+        D_yz_.kelem(0,0,0) = 0.0;
+        D_zz_.kelem(0,0,0) = 0.0;
+
         //... approximate infinite lattice by inerpolating to sites not convered by current resolution...
-        if( ilat==1 ){
-            #pragma omp parallel for
-            for( size_t i=0; i<D_xx_.size(0); i++ ){
-                for( size_t j=0; j<D_xx_.size(1); j++ ){
-                    for( size_t k=0; k<D_xx_.size(2); k++ ){
-                        if( !is_in(i,j,k,mat_invrecip_bcc)  ){
-                            auto avg = [&]( const auto& D ) -> ccomplex_t {
-                                if( k>0 && k< size_t(nlattice/2) ) return 1.0/6.0 * (
-                                        D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k)
-                                        + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) 
-                                        + D.kelem(i,j,k-1) + D.kelem(i,j,k+1) );
-                                if( k==0 ) return 1.0/6.0 * (
-                                        D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k)
-                                        + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) 
-                                        + D.kelem(i,j,k+1) + D.kelem(i,j,k+1) );
-                                return 1.0/6.0 * (
-                                        D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k)
-                                        + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) 
-                                        + D.kelem(i,j,k-1) + D.kelem(i,j,k-1) );
+        #pragma omp parallel for
+        for( size_t i=0; i<D_xx_.size(0); i++ ){
+            for( size_t j=0; j<D_xx_.size(1); j++ ){
+                for( size_t k=0; k<D_xx_.size(2); k++ ){
+                    int ii = (int(i)>nlattice/2)? int(i)-nlattice : int(i);
+                    int jj = (int(j)>nlattice/2)? int(j)-nlattice : int(j);
+                    int kk = (int(k)>nlattice/2)? int(k)-nlattice : int(k);
+                    vec3<real_t> kv({real_t(ii),real_t(jj),real_t(kk)});
+
+                    auto align_with_k = [&]( const vec3<real_t>& v ) -> vec3<real_t>{
+                        return v*((v.dot(kv)<0.0)?-1.0:1.0);
+                    };
+
+                    vec3<real_t> v, l;
+                    map_t::iterator it;
+                    
+                    if( !is_in(i,j,k,mat_invrecip)  ){
+                        auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3<real_t>& v, vec3<real_t>& l ) {
+                            v = 0.0; l = 0.0;
+                            int count(0);
+                            
+                            auto add_lv = [&]( auto it ) -> void {
+                                auto q = it->second;++count;
+                                l += vec3<real_t>({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))});
+                                v += align_with_k(vec3<real_t>({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))}));
                             };
+                            map_t::iterator it;
+                            if( (it = iimap.find({ii-1,jj,kk}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii+1,jj,kk}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii,jj-1,kk}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii,jj+1,kk}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii,jj,kk-1}))!=iimap.end() ){ add_lv(it); }
+                            if( (it = iimap.find({ii,jj,kk+1}))!=iimap.end() ){ add_lv(it); }
+                            l/=real_t(count); v/=real_t(count);
+                        };
                         
-                            D_xx_.kelem(i,j,k) = avg( D_xx_ );
-                            D_xy_.kelem(i,j,k) = avg( D_xy_ );
-                            D_xz_.kelem(i,j,k) = avg( D_xz_ );
-                            D_yy_.kelem(i,j,k) = avg( D_yy_ );
-                            D_yz_.kelem(i,j,k) = avg( D_yz_ );
-                            D_zz_.kelem(i,j,k) = avg( D_zz_ );
-                        }
-                    }
-                }
-            }
-        }else if( ilat==2 ){
-            #pragma omp parallel for
-            for( size_t i=0; i<D_xx_.size(0); i++ ){
-                for( size_t j=0; j<D_xx_.size(1); j++ ){
-                    for( size_t k=0; k<D_xx_.size(2); k++ ){
-                        if( !is_in( i, j, k, mat_invrecip_fcc)  ){
-                            auto avg = [&]( const auto& D ) -> ccomplex_t{
-                                if( is_in( (i+1)%nlattice, j, k, mat_invrecip_fcc ) ){
-                                    return 0.5 * ( D.kelem((i+nlattice-1)%nlattice,j,k) + D.kelem((i+1)%nlattice,j,k) );
-                                }else if( is_in( i, (j+1)%nlattice, k, mat_invrecip_fcc ) ){
-                                    return 0.5 * ( D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) );
-                                }//else//
-                                if( k>0 && k< size_t(nlattice/2) ) return 0.5 * ( D.kelem(i,j,k-1) + D.kelem(i,j,k+1) );
-                                if( k==0 ) return D.kelem(i,j,k+1);
-                                return D.kelem(i,j,k-1);
-                            };  
-                            D_xx_.kelem(i,j,k) = avg( D_xx_ );
-                            D_xy_.kelem(i,j,k) = avg( D_xy_ );
-                            D_xz_.kelem(i,j,k) = avg( D_xz_ );
-                            D_yy_.kelem(i,j,k) = avg( D_yy_ );
-                            D_yz_.kelem(i,j,k) = avg( D_yz_ );
-                            D_zz_.kelem(i,j,k) = avg( D_zz_ );
+                        average_lv(temp1,temp2,temp3,v,l);
+                        
+                    }else{
+                        if( (it = iimap.find({ii,jj,kk}))!=iimap.end() ){
+                            auto q = it->second;
+                            l = vec3<real_t>({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))});
+                            v = align_with_k(vec3<real_t>({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))}));
                         }
                     }
+                    D_xx_.kelem(i,j,k) = l[0];
+                    D_xy_.kelem(i,j,k) = l[1];
+                    D_xz_.kelem(i,j,k) = l[2];
+                    D_yy_.kelem(i,j,k) = v[0];
+                    D_yz_.kelem(i,j,k) = v[1];
+                    D_zz_.kelem(i,j,k) = v[2];
                 }
             }
         }
@@ -494,8 +467,7 @@ private:
                         D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.z * kmod);
 
                         // re-normalise to that longitudinal amplitude is exact
-                        auto kv_dot_e1 = (kv.norm()>1e-8)?kv.dot(evec1):kv.norm();
-                        auto norm = (kv.norm()/kv_dot_e1);
+                        auto norm = (kv.norm()/kv.dot(evec1));
                         D_xx_.kelem(i,j,k) *= norm;
                         D_yy_.kelem(i,j,k) *= norm;
                         D_zz_.kelem(i,j,k) *= norm;
@@ -503,9 +475,9 @@ private:
                         // spatially dependent correction to vfact = \dot{D_+}/D_+
                         D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
                     }
-                    if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0;
-                    if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0;
-                    if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0;
+                    // if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0;
+                    // if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0;
+                    // if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0;
                 }
             }
         }
diff --git a/include/vec3.hh b/include/vec3.hh
index 3d45c10..4e72d81 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -101,6 +101,14 @@ public:
         for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x;
         return *this;
     }
+
+    //! ordering, allows 3d sorting of vec3s
+    bool operator<( const vec3<T>& o ) const noexcept{
+        if( x!=o.x ) return x<o.x?true:false;
+        if( y!=o.y ) return y<o.y?true:false;
+        if( z!=o.z ) return z<o.z?true:false;
+        return false;
+    }
 };
 
 //! multiplication with scalar

From 6178b2f215db307ba5c85374a92f68403b813327 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 5 Dec 2019 22:26:16 +0100
Subject: [PATCH 046/130] zeroed nyquist modes in plt

---
 include/particle_plt.hh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 6671028..6948d19 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -475,9 +475,9 @@ private:
                         // spatially dependent correction to vfact = \dot{D_+}/D_+
                         D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
                     }
-                    // if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0;
-                    // if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0;
-                    // if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0;
+                    if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0;
+                    if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0;
+                    if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0;
                 }
             }
         }

From 120cf21577253a1fa5e52c211a72468c42329fae Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 19 Dec 2019 11:52:08 +0000
Subject: [PATCH 047/130] README.md edited online with Bitbucket

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index e34dce2..495a2c6 100644
--- a/README.md
+++ b/README.md
@@ -18,3 +18,9 @@ Create build directory, configure, and build:
 
 this should create an executable in the build directory. 
 There is an example parameter file 'example.conf' in the main directory
+
+If you run into problems with CMake not being able to find your local FFTW3 or HDF5 installation, it is best to give the path directly as
+
+    FFTW3_ROOT=<path> HDF5_ROOT=<path> ccmake ..
+
+make sure to delete previous files generated by CMake before reconfiguring like this.
\ No newline at end of file

From e3017dea955f981aabe187f13a3d0ac5c36f1ddd Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 19 Dec 2019 11:55:56 +0000
Subject: [PATCH 048/130] README.md edited online with Bitbucket

---
 README.md | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 495a2c6..aac4f55 100644
--- a/README.md
+++ b/README.md
@@ -17,10 +17,23 @@ Create build directory, configure, and build:
     make
 
 this should create an executable in the build directory. 
-There is an example parameter file 'example.conf' in the main directory
 
 If you run into problems with CMake not being able to find your local FFTW3 or HDF5 installation, it is best to give the path directly as
 
     FFTW3_ROOT=<path> HDF5_ROOT=<path> ccmake ..
 
-make sure to delete previous files generated by CMake before reconfiguring like this.
\ No newline at end of file
+make sure to delete previous files generated by CMake before reconfiguring like this.
+
+## Running
+
+There is an example parameter file 'example.conf' in the main directory. Possible options are explained in it, it can be run
+as a simple argument, e.g. from within the build directory:
+
+     ./monofonic ../example.conf
+
+If you want to run with MPI, you need to enable MPI support via ccmake. Then you can launch in hybrid MPI+threads mode by 
+specifying the desired number of threads per task in the config file, and the number of tasks to be launched via
+
+     mpirun -np 16 ./monofonic <path to config file>
+     
+It will then run with 16 tasks times the number of threads per task specified in the config file.
\ No newline at end of file

From cffea05dcd275e7911496d7d061fafd447083094 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 19 Dec 2019 11:58:35 +0000
Subject: [PATCH 049/130] README.md edited online with Bitbucket

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index aac4f55..c7cc745 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,13 @@ If you run into problems with CMake not being able to find your local FFTW3 or H
 
 make sure to delete previous files generated by CMake before reconfiguring like this.
 
+If you want to build on macOS, then it is strongly recommended to use GNU (or Intel) compilers instead of Apple's Clang. Install them e.g. 
+via homebrew and then configure cmake to use them instead of the macOS default compiler via
+
+    CC=gcc-9 CXX=g++-9 ccmake ..
+    
+This is necessary since Apple's compilers haven't supported OpenMP for years.
+
 ## Running
 
 There is an example parameter file 'example.conf' in the main directory. Possible options are explained in it, it can be run

From 2ef654f22ad271c73ec6bcd428ab471d74c4844a Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 19 Dec 2019 13:54:24 +0100
Subject: [PATCH 050/130] small optimizations

---
 CMakeLists.txt                       |    2 +-
 example.conf                         |    9 +-
 example_testing.conf                 |   33 +
 external/class                       |    2 +-
 external/fftwpp                      |    1 +
 ics.conf                             |   62 ++
 include/particle_plt.hh              |    8 +-
 src/old/output_gadget2___original.cc | 1408 ++++++++++++++++++++++++++
 8 files changed, 1514 insertions(+), 11 deletions(-)
 create mode 100644 example_testing.conf
 create mode 160000 external/fftwpp
 create mode 100644 ics.conf
 create mode 100644 src/old/output_gadget2___original.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a3bffae..5df55a7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ project(monofonIC)
 # include class submodule
 include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake)
 
-# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
+#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic")
 find_package(PkgConfig REQUIRED)
 
diff --git a/example.conf b/example.conf
index 58fc969..a537d40 100644
--- a/example.conf
+++ b/example.conf
@@ -4,13 +4,13 @@ GridRes      = 128
 # length of the box in Mpc/h
 BoxLength    = 250
 # starting redshift
-zstart       = 49.0
+zstart       = 24.0
 # order of the LPT to be used (1,2 or 3)
-LPTorder     = 3
+LPTorder     = 1
 # also do baryon ICs?
 DoBaryons    = no
 # do mode fixing à la Angulo&Pontzen
-DoFixing     = no
+DoFixing     = yes
 # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
 ParticleLoad = sc
 
@@ -36,7 +36,8 @@ seed         = 9001
 [testing]
 # enables diagnostic output
 # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
-test = convergence
+#test = convergence
+test = none
 
 [execution]
 NumThreads   = 4
diff --git a/example_testing.conf b/example_testing.conf
new file mode 100644
index 0000000..2890286
--- /dev/null
+++ b/example_testing.conf
@@ -0,0 +1,33 @@
+[setup]
+GridRes      = 256
+BoxLength    = 6.28318530718
+zstart       = 0.0
+LPTorder     = 1
+SymplecticPT = no
+DoFixing     = no
+
+[execution]
+NumThreads   = 4
+
+[output]
+fname_hdf5   = output.hdf5
+fbase_analysis = output
+#format       = gadget2
+#filename     = ics_gadget.dat
+format	     = generic
+filename     = debug.hdf5
+generic_out_eulerian = yes
+
+[random]
+generator    = NGENIC
+seed         = 9001
+
+[cosmology]
+#transfer     = CLASS 
+transfer     = eisenstein
+Omega_m      = 1.0
+Omega_b      = 0.045
+Omega_L      = 0.0
+H0           = 70.3
+sigma_8      = 0.811
+nspec        = 0.961
diff --git a/external/class b/external/class
index b34d7f6..6f3abba 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b
+Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507
diff --git a/external/fftwpp b/external/fftwpp
new file mode 160000
index 0000000..ec6b82c
--- /dev/null
+++ b/external/fftwpp
@@ -0,0 +1 @@
+Subproject commit ec6b82cc1122ba029a7a7142cf836014e992e68c
diff --git a/ics.conf b/ics.conf
new file mode 100644
index 0000000..d867cb9
--- /dev/null
+++ b/ics.conf
@@ -0,0 +1,62 @@
+[setup]
+# number of grid cells per linear dimension for calculations = particles for sc initial load
+GridRes      = 128 
+# length of the box in Mpc/h
+BoxLength    = 200
+# starting redshift
+zstart       = 24.0 
+# order of the LPT to be used (1,2 or 3)
+LPTorder     = 1
+# also do baryon ICs?
+DoBaryons    = no
+# do mode fixing à la Angulo&Pontzen
+DoFixing     = yes
+# particle load, can be 'sc' (1x), 'bcc' (2x), 'fcc' (4x), or 'rsc' (8x)
+ParticleLoad = sc
+
+[testing]
+# enables diagnostic output
+# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
+#test = potentials_and_densities
+#test = convergence
+test = none
+
+[execution]
+NumThreads   = 1
+
+[output]
+fname_hdf5   = output.hdf5
+fbase_analysis = output
+
+#format       = gadget2
+#filename     = ics_gadget.dat
+
+format       = generic
+filename     = debug.hdf5
+#generic_out_eulerian = yes
+
+#format	       = grafic2
+#filename       = ics_ramses
+#grafic_use_SPT = yes
+
+[random]
+generator    = NGENIC
+seed         = 9001
+
+[cosmology]
+transfer     = eisenstein
+#transfer     = CLASS 
+#transfer     = eisenstein_wdm
+#WDMmass      = 0.1
+Omega_m      = 0.302
+Omega_b      = 0.045
+Omega_L      = 0.698
+H0           = 70.3
+sigma_8      = 0.811
+nspec        = 0.961
+
+# anisotropic large scale tidal field
+#LSS_aniso_lx = 0.1
+#LSS_aniso_ly = 0.1
+#LSS_aniso_lz = -0.2
+
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 6948d19..e636dcc 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -176,14 +176,12 @@ private:
 
         //! checks if 'vec' is in the FBZ with FBZ normal vectors given in 'normals'
         auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool {
-            bool btest = true;
             for( const auto& n : normals ){ 
                 if( n.dot( vec ) > 1.0001 * n.dot(n) ){
-                    btest = false;
-                    break;
+                    return false;
                 }
             }
-            return btest;
+            return true;
         };
         
         constexpr ptrdiff_t lnumber = 3, knumber = 3;
@@ -340,7 +338,7 @@ private:
                                 for( int l3=-numb; l3<=numb; ++l3 ){
                                     // need both halfs of Fourier space since we use real transforms
                                     for( int isign=0; isign<=1; ++isign ){
-                                        real_t sign = (isign==0)? +1.0 : -1.0;
+                                        const real_t sign = 2.0*real_t(isign)-1.0; 
                                         const vec3<real_t> vshift({real_t(l1),real_t(l2),real_t(l3)});
 
                                         vec3<real_t> vectk = sign * a + mat_reciprocal * vshift;
diff --git a/src/old/output_gadget2___original.cc b/src/old/output_gadget2___original.cc
new file mode 100644
index 0000000..b5cbf41
--- /dev/null
+++ b/src/old/output_gadget2___original.cc
@@ -0,0 +1,1408 @@
+/*
+ 
+ output_gadget2.cc - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2010  Oliver Hahn
+ 
+ */
+
+#include <fstream>
+#include <map>
+#include "logger.hh"
+// #include "region_generator.hh"
+#include "output_plugin.hh"
+// #include "mg_interp.hh"
+// #include "mesh.hh"
+
+
+template <typename T_store = float>
+class gadget2_output_plugin : public output_plugin
+{
+	// const int empty_fill_bytes = 56;
+
+public:
+	bool do_baryons_;
+	double omegab_;
+	double gamma_;
+	bool shift_halfcell_;
+
+protected:
+	std::ofstream ofs_;
+	bool blongids_;
+	bool bhave_particlenumbers_;
+
+	std::map<std::string, double> units_length_;
+	std::map<std::string, double> units_mass_;
+	std::map<std::string, double> units_vel_;
+
+	double unit_length_chosen_;
+	double unit_mass_chosen_;
+	double unit_vel_chosen_;
+
+	typedef struct io_header
+	{
+		int npart[6];
+		double mass[6];
+		double time;
+		double redshift;
+		int flag_sfr;
+		int flag_feedback;
+		unsigned int npartTotal[6];
+		int flag_cooling;
+		int num_files;
+		double BoxSize;
+		double Omega0;
+		double OmegaLambda;
+		double HubbleParam;
+		int flag_stellarage;
+		int flag_metals;
+		unsigned int npartTotalHighWord[6];
+		int flag_entropy_instead_u;
+		int flag_doubleprecision;
+		char fill[empty_fill_bytes];
+	} header;
+
+	header header_;
+
+	std::string fname;
+
+	enum iofields
+	{
+		id_dm_mass,
+		id_dm_vel,
+		id_dm_pos,
+		id_gas_vel,
+		id_gas_rho,
+		id_gas_temp,
+		id_gas_pos
+	};
+
+	size_t np_per_type_[6];
+
+	size_t block_buf_size_;
+	size_t npartmax_;
+	unsigned nfiles_;
+
+	unsigned bndparticletype_;
+	bool bmorethan2bnd_;
+	bool kpcunits_;
+	bool msolunits_;
+	double YHe_;
+	bool spread_coarse_acrosstypes_;
+
+	// refinement_mask refmask;
+
+	void distribute_particles(unsigned nfiles, std::vector<std::vector<unsigned>> &np_per_file, std::vector<unsigned> &np_tot_per_file)
+	{
+		np_per_file.assign(nfiles, std::vector<unsigned>(6, 0));
+		np_tot_per_file.assign(nfiles, 0);
+
+		size_t n2dist[6];
+		size_t ntotal = 0;
+		for (int i = 0; i < 6; ++i)
+		{
+			ntotal += np_per_type_[i];
+			n2dist[i] = np_per_type_[i];
+		}
+
+		size_t nnominal = (size_t)((double)ntotal / (double)nfiles);
+		size_t nlast = ntotal - nnominal * (nfiles - 1);
+
+		for (unsigned i = 0; i < nfiles; ++i)
+		{
+			size_t nthisfile = 0;
+
+			size_t nmax = (i == nfiles - 1) ? nlast : nnominal;
+
+			for (int itype = 0; itype < 6; ++itype)
+			{
+				if (n2dist[itype] == 0)
+					continue;
+				np_per_file[i][itype] = std::min(n2dist[itype], nmax - nthisfile);
+				n2dist[itype] -= np_per_file[i][itype];
+				nthisfile += np_per_file[i][itype];
+
+				if (nthisfile >= nmax)
+					break;
+			}
+
+			np_tot_per_file[i] = nthisfile;
+		}
+
+		for (int i = 0; i < 6; ++i)
+			assert(n2dist[i] == 0);
+	}
+
+	std::ifstream &open_and_check(std::string ffname, size_t npart, size_t offset = 0)
+	{
+		std::ifstream ifs(ffname.c_str(), std::ios::binary);
+		size_t blk;
+		ifs.read((char *)&blk, sizeof(size_t));
+		if (blk != npart * (size_t)sizeof(T_store))
+		{
+			csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
+			csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * (size_t)sizeof(T_store), blk);
+			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+		}
+		ifs.seekg(offset, std::ios::cur);
+
+		return ifs;
+	}
+
+	class pistream : public std::ifstream
+	{
+	public:
+		pistream(std::string fname, size_t npart, size_t offset = 0)
+				: std::ifstream(fname.c_str(), std::ios::binary)
+		{
+			size_t blk;
+
+			if (!this->good())
+			{
+				csoca::elog.Print("Could not open buffer file in gadget2 output plug-in");
+				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
+			}
+
+			this->read((char *)&blk, sizeof(size_t));
+
+			if (blk != npart * sizeof(T_store))
+			{
+				csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
+				csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
+				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+			}
+
+			this->seekg(offset + sizeof(size_t), std::ios::beg);
+		}
+
+		pistream()
+		{
+		}
+
+		void open(std::string fname, size_t npart, size_t offset = 0)
+		{
+			std::ifstream::open(fname.c_str(), std::ios::binary);
+			size_t blk;
+
+			if (!this->good())
+			{
+				csoca::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str());
+				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
+			}
+
+			this->read((char *)&blk, sizeof(size_t));
+
+			if (blk != npart * sizeof(T_store))
+			{
+				csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
+				csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
+				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+			}
+
+			this->seekg(offset + sizeof(size_t), std::ios::beg);
+		}
+	};
+
+	class postream : public std::fstream
+	{
+	public:
+		postream(std::string fname, size_t npart, size_t offset = 0)
+				: std::fstream(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out)
+		{
+			size_t blk;
+
+			if (!this->good())
+			{
+				csoca::elog.Print("Could not open buffer file in gadget2 output plug-in");
+				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
+			}
+
+			this->read((char *)&blk, sizeof(size_t));
+
+			if (blk != npart * sizeof(T_store))
+			{
+				csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
+				csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
+				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+			}
+
+			this->seekg(offset, std::ios::cur);
+			this->seekp(offset + sizeof(size_t), std::ios::beg);
+		}
+
+		postream()
+		{
+		}
+
+		void open(std::string fname, size_t npart, size_t offset = 0)
+		{
+			if (is_open())
+				this->close();
+
+			std::fstream::open(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out);
+			size_t blk;
+
+			if (!this->good())
+			{
+				csoca::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str());
+				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
+			}
+
+			this->read((char *)&blk, sizeof(size_t));
+
+			if (blk != npart * sizeof(T_store))
+			{
+				csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
+				csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
+				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
+			}
+
+			this->seekg(offset, std::ios::cur);
+			this->seekp(offset + sizeof(size_t), std::ios::beg);
+		}
+	};
+
+	void combine_components_for_coarse(void)
+	{
+		const size_t
+				nptot = np_per_type_[1] + np_per_type_[2] + np_per_type_[3] + np_per_type_[4] + np_per_type_[5],
+				npfine = np_per_type_[1],
+				npcoarse = nptot - npfine;
+
+		std::vector<T_store> tmp1, tmp2;
+
+		tmp1.assign(block_buf_size_, 0.0);
+		tmp2.assign(block_buf_size_, 0.0);
+
+		double facb = omegab_ / header_.Omega0, facc = (header_.Omega0 - omegab_) / header_.Omega0;
+
+		for (int icomp = 0; icomp < 3; ++icomp)
+		{
+			char fc[256], fb[256];
+			postream iffs1, iffs2;
+
+			/*** positions ***/
+
+			sprintf(fc, "___ic_temp_%05d.bin", 100 * id_dm_pos + icomp);
+			sprintf(fb, "___ic_temp_%05d.bin", 100 * id_gas_pos + icomp);
+
+			iffs1.open(fc, nptot, npfine * sizeof(T_store));
+			iffs2.open(fb, nptot, npfine * sizeof(T_store));
+
+			size_t npleft = npcoarse;
+			size_t n2read = std::min((size_t)block_buf_size_, npleft);
+			while (n2read > 0ul)
+			{
+				std::streampos sp = iffs1.tellg();
+				iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+				iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
+
+				for (size_t i = 0; i < n2read; ++i)
+				{
+					tmp1[i] = facc * tmp1[i] + facb * tmp2[i];
+				}
+
+				iffs1.seekp(sp);
+				iffs1.write(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+
+				npleft -= n2read;
+				n2read = std::min((size_t)block_buf_size_, npleft);
+			}
+
+			iffs1.close();
+			iffs2.close();
+
+			/*** velocities ***/
+
+			sprintf(fc, "___ic_temp_%05d.bin", 100 * id_dm_vel + icomp);
+			sprintf(fb, "___ic_temp_%05d.bin", 100 * id_gas_vel + icomp);
+
+			iffs1.open(fc, nptot, npfine * sizeof(T_store));
+			iffs2.open(fb, nptot, npfine * sizeof(T_store));
+
+			npleft = npcoarse;
+			n2read = std::min((size_t)block_buf_size_, npleft);
+
+			while (n2read > 0ul)
+			{
+				std::streampos sp = iffs1.tellg();
+				iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+				iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
+
+				for (size_t i = 0; i < n2read; ++i)
+				{
+					tmp1[i] = facc * tmp1[i] + facb * tmp2[i];
+				}
+
+				iffs1.seekp(sp);
+				iffs1.write(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+
+				npleft -= n2read;
+				n2read = std::min((size_t)block_buf_size_, npleft);
+			}
+
+			iffs1.close();
+			iffs2.close();
+		}
+	}
+
+	void assemble_gadget_file(void)
+	{
+
+		if (do_baryons_)
+			combine_components_for_coarse();
+
+		//............................................................................
+		//... copy from the temporary files, interleave the data and save ............
+
+		char fnx[256], fny[256], fnz[256], fnvx[256], fnvy[256], fnvz[256], fnm[256];
+		char fnbx[256], fnby[256], fnbz[256], fnbvx[256], fnbvy[256], fnbvz[256];
+
+		sprintf(fnx, "___ic_temp_%05d.bin", 100 * id_dm_pos + 0);
+		sprintf(fny, "___ic_temp_%05d.bin", 100 * id_dm_pos + 1);
+		sprintf(fnz, "___ic_temp_%05d.bin", 100 * id_dm_pos + 2);
+		sprintf(fnvx, "___ic_temp_%05d.bin", 100 * id_dm_vel + 0);
+		sprintf(fnvy, "___ic_temp_%05d.bin", 100 * id_dm_vel + 1);
+		sprintf(fnvz, "___ic_temp_%05d.bin", 100 * id_dm_vel + 2);
+		sprintf(fnm, "___ic_temp_%05d.bin", 100 * id_dm_mass);
+
+		sprintf(fnbx, "___ic_temp_%05d.bin", 100 * id_gas_pos + 0);
+		sprintf(fnby, "___ic_temp_%05d.bin", 100 * id_gas_pos + 1);
+		sprintf(fnbz, "___ic_temp_%05d.bin", 100 * id_gas_pos + 2);
+		sprintf(fnbvx, "___ic_temp_%05d.bin", 100 * id_gas_vel + 0);
+		sprintf(fnbvy, "___ic_temp_%05d.bin", 100 * id_gas_vel + 1);
+		sprintf(fnbvz, "___ic_temp_%05d.bin", 100 * id_gas_vel + 2);
+
+		pistream iffs1, iffs2, iffs3;
+
+		const size_t
+				nptot = np_per_type_[0] + np_per_type_[1] + np_per_type_[2] + np_per_type_[3] + np_per_type_[4] + np_per_type_[5],
+				//npgas = np_fine_gas_,
+				npcdm = nptot - np_per_type_[0];
+
+		size_t
+				wrote_coarse = 0,
+				wrote_gas = 0,
+				wrote_dm = 0;
+
+		size_t
+				npleft = nptot,
+				n2read = std::min((size_t)block_buf_size_, npleft);
+
+		std::cout << " - Gadget2 : writing " << nptot << " particles to file...\n";
+		for (int i = 0; i < 6; ++i)
+			if (np_per_type_[i] > 0)
+				csoca::ilog.Print("      type   %d : %12llu [m=%g]", i, np_per_type_[i], header_.mass[i]);
+
+		bool bbaryons = np_per_type_[0] > 0;
+
+		std::vector<T_store> adata3;
+		adata3.reserve(3 * block_buf_size_);
+		T_store *tmp1, *tmp2, *tmp3;
+
+		tmp1 = new T_store[block_buf_size_];
+		tmp2 = new T_store[block_buf_size_];
+		tmp3 = new T_store[block_buf_size_];
+
+		//... for multi-file output
+		//int fileno = 0;
+		//size_t npart_left = nptot;
+
+		//std::vector<unsigned> nfdm_per_file, nfgas_per_file, nc_per_file;
+
+		std::vector<std::vector<unsigned>> np_per_file;
+		std::vector<unsigned> np_tot_per_file;
+
+		distribute_particles(nfiles_, np_per_file, np_tot_per_file);
+
+		if (nfiles_ > 1)
+		{
+			csoca::ilog.Print("Gadget2 : distributing particles to %d files", nfiles_);
+			//<< "                 " << std::setw(12) << "type 0" << "," << std::setw(12) << "type 1" << "," << std::setw(12) << "type " << bndparticletype_ << std::endl;
+			for (unsigned i = 0; i < nfiles_; ++i)
+				csoca::ilog.Print("      file %i : %12llu", i, np_tot_per_file[i], header_.mass[i]);
+		}
+
+		size_t curr_block_buf_size = block_buf_size_;
+
+		size_t idcount = 0;
+		bool bneed_long_ids = blongids_;
+		if (nptot >= 1ul << 32 && !bneed_long_ids)
+		{
+			bneed_long_ids = true;
+			csoca::wlog.Print("Need long particle IDs, will write 64bit, make sure to enable in Gadget!");
+		}
+
+		for (unsigned ifile = 0; ifile < nfiles_; ++ifile)
+		{
+
+			if (nfiles_ > 1)
+			{
+				char ffname[256];
+				sprintf(ffname, "%s.%d", fname_.c_str(), ifile);
+				ofs_.open(ffname, std::ios::binary | std::ios::trunc);
+			}
+			else
+			{
+				ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc);
+			}
+
+			size_t np_this_file = np_tot_per_file[ifile];
+
+			int blksize = sizeof(header);
+
+			//... write the header .......................................................
+
+			header this_header(header_);
+			for (int i = 0; i < 6; ++i)
+			{
+				this_header.npart[i] = np_per_file[ifile][i];
+				this_header.npartTotal[i] = (unsigned)np_per_type_[i];
+				this_header.npartTotalHighWord[i] = (unsigned)(np_per_type_[i] >> 32);
+			}
+
+			ofs_.write((char *)&blksize, sizeof(int));
+			ofs_.write((char *)&this_header, sizeof(header));
+			ofs_.write((char *)&blksize, sizeof(int));
+
+			//... particle positions ..................................................
+			blksize = 3ul * np_this_file * sizeof(T_store);
+			ofs_.write((char *)&blksize, sizeof(int));
+
+			if (bbaryons && np_per_file[ifile][0] > 0ul)
+			{
+
+				iffs1.open(fnbx, npcdm, wrote_gas * sizeof(T_store));
+				iffs2.open(fnby, npcdm, wrote_gas * sizeof(T_store));
+				iffs3.open(fnbz, npcdm, wrote_gas * sizeof(T_store));
+
+				npleft = np_per_file[ifile][0];
+				n2read = std::min(curr_block_buf_size, npleft);
+				while (n2read > 0ul)
+				{
+					iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+					iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
+					iffs3.read(reinterpret_cast<char *>(&tmp3[0]), n2read * sizeof(T_store));
+
+					for (size_t i = 0; i < n2read; ++i)
+					{
+						adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize));
+						adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize));
+						adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize));
+					}
+					ofs_.write(reinterpret_cast<char *>(&adata3[0]), 3 * n2read * sizeof(T_store));
+
+					adata3.clear();
+					npleft -= n2read;
+					n2read = std::min(curr_block_buf_size, npleft);
+				}
+				iffs1.close();
+				iffs2.close();
+				iffs3.close();
+			}
+
+			npleft = np_this_file - np_per_file[ifile][0];
+			n2read = std::min(curr_block_buf_size, npleft);
+
+			iffs1.open(fnx, npcdm, wrote_dm * sizeof(T_store));
+			iffs2.open(fny, npcdm, wrote_dm * sizeof(T_store));
+			iffs3.open(fnz, npcdm, wrote_dm * sizeof(T_store));
+
+			while (n2read > 0ul)
+			{
+				iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+				iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
+				iffs3.read(reinterpret_cast<char *>(&tmp3[0]), n2read * sizeof(T_store));
+
+				for (size_t i = 0; i < n2read; ++i)
+				{
+					adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize));
+					adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize));
+					adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize));
+				}
+				ofs_.write(reinterpret_cast<char *>(&adata3[0]), 3 * n2read * sizeof(T_store));
+
+				adata3.clear();
+				npleft -= n2read;
+				n2read = std::min(curr_block_buf_size, npleft);
+			}
+			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
+
+			iffs1.close();
+			iffs2.close();
+			iffs3.close();
+
+			//... particle velocities ..................................................
+			blksize = 3ul * np_this_file * sizeof(T_store);
+			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
+
+			if (bbaryons && np_per_file[ifile][0] > 0ul)
+			{
+				iffs1.open(fnbvx, npcdm, wrote_gas * sizeof(T_store));
+				iffs2.open(fnbvy, npcdm, wrote_gas * sizeof(T_store));
+				iffs3.open(fnbvz, npcdm, wrote_gas * sizeof(T_store));
+
+				npleft = np_per_file[ifile][0];
+				n2read = std::min(curr_block_buf_size, npleft);
+				while (n2read > 0ul)
+				{
+					iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+					iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
+					iffs3.read(reinterpret_cast<char *>(&tmp3[0]), n2read * sizeof(T_store));
+
+					for (size_t i = 0; i < n2read; ++i)
+					{
+						adata3.push_back(tmp1[i]);
+						adata3.push_back(tmp2[i]);
+						adata3.push_back(tmp3[i]);
+					}
+
+					ofs_.write(reinterpret_cast<char *>(&adata3[0]), 3 * n2read * sizeof(T_store));
+
+					adata3.clear();
+					npleft -= n2read;
+					n2read = std::min(curr_block_buf_size, npleft);
+				}
+
+				iffs1.close();
+				iffs2.close();
+				iffs3.close();
+			}
+
+			iffs1.open(fnvx, npcdm, wrote_dm * sizeof(T_store));
+			iffs2.open(fnvy, npcdm, wrote_dm * sizeof(T_store));
+			iffs3.open(fnvz, npcdm, wrote_dm * sizeof(T_store));
+
+			npleft = np_this_file - np_per_file[ifile][0];
+			n2read = std::min(curr_block_buf_size, npleft);
+			while (n2read > 0ul)
+			{
+				iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+				iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
+				iffs3.read(reinterpret_cast<char *>(&tmp3[0]), n2read * sizeof(T_store));
+
+				for (size_t i = 0; i < n2read; ++i)
+				{
+					adata3.push_back(tmp1[i]);
+					adata3.push_back(tmp2[i]);
+					adata3.push_back(tmp3[i]);
+				}
+
+				ofs_.write(reinterpret_cast<char *>(&adata3[0]), 3 * n2read * sizeof(T_store));
+
+				adata3.clear();
+				npleft -= n2read;
+				n2read = std::min(curr_block_buf_size, npleft);
+			}
+			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
+
+			iffs1.close();
+			iffs2.close();
+			iffs3.close();
+
+			//... particle IDs ..........................................................
+			std::vector<unsigned> short_ids;
+			std::vector<size_t> long_ids;
+
+			if (bneed_long_ids)
+				long_ids.assign(curr_block_buf_size, 0);
+			else
+				short_ids.assign(curr_block_buf_size, 0);
+
+			npleft = np_this_file;
+			n2read = std::min(curr_block_buf_size, npleft);
+			blksize = sizeof(unsigned) * np_this_file;
+
+			if (bneed_long_ids)
+				blksize = sizeof(size_t) * np_this_file;
+
+			//... generate contiguous IDs and store in file ..
+			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
+			while (n2read > 0ul)
+			{
+				if (bneed_long_ids)
+				{
+					for (size_t i = 0; i < n2read; ++i)
+						long_ids[i] = idcount++;
+					ofs_.write(reinterpret_cast<char *>(&long_ids[0]), n2read * sizeof(size_t));
+				}
+				else
+				{
+					for (size_t i = 0; i < n2read; ++i)
+						short_ids[i] = idcount++;
+					ofs_.write(reinterpret_cast<char *>(&short_ids[0]), n2read * sizeof(unsigned));
+				}
+				npleft -= n2read;
+				n2read = std::min(curr_block_buf_size, npleft);
+			}
+			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
+
+			std::vector<unsigned>().swap(short_ids);
+			std::vector<size_t>().swap(long_ids);
+
+			//... particle masses .......................................................
+			if (bmorethan2bnd_) //bmultimass_ && bmorethan2bnd_ && nc_per_file[ifile] > 0ul)
+			{
+				unsigned npcoarse = np_per_file[ifile][bndparticletype_]; // nc_per_file[ifile];//header_.npart[5];
+				iffs1.open(fnm, np_per_type_[bndparticletype_], wrote_coarse * sizeof(T_store));
+
+				npleft = npcoarse;
+				n2read = std::min(curr_block_buf_size, npleft);
+				blksize = npcoarse * sizeof(T_store);
+
+				ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
+				while (n2read > 0ul)
+				{
+					iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+					ofs_.write(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
+
+					npleft -= n2read;
+					n2read = std::min(curr_block_buf_size, npleft);
+				}
+				ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
+
+				iffs1.close();
+			}
+
+			//... initial internal energy for gas particles
+			if (bbaryons && np_per_file[ifile][0] > 0ul)
+			{
+
+				std::vector<T_store> eint(curr_block_buf_size, 0.0);
+
+				const double astart = 1. / (1. + header_.redshift);
+				const double npol = (fabs(1.0 - gamma_) > 1e-7) ? 1.0 / (gamma_ - 1.) : 1.0;
+				const double unitv = 1e5;
+				const double h2 = header_.HubbleParam * header_.HubbleParam; //*0.0001;
+				const double adec = 1.0 / (160. * pow(omegab_ * h2 / 0.022, 2.0 / 5.0));
+				const double Tcmb0 = 2.726;
+				const double Tini = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec;
+				const double mu = (Tini > 1.e4) ? 4.0 / (8. - 5. * YHe_) : 4.0 / (1. + 3. * (1. - YHe_));
+				const double ceint = 1.3806e-16 / 1.6726e-24 * Tini * npol / mu / unitv / unitv;
+
+				npleft = np_per_file[ifile][0];
+				n2read = std::min(curr_block_buf_size, npleft);
+				blksize = sizeof(T_store) * np_per_file[ifile][0]; //*npgas
+
+				ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
+				while (n2read > 0ul)
+				{
+					for (size_t i = 0; i < n2read; ++i)
+						eint[i] = ceint;
+					ofs_.write(reinterpret_cast<char *>(&eint[0]), n2read * sizeof(T_store));
+					npleft -= n2read;
+					n2read = std::min(curr_block_buf_size, npleft);
+				}
+				ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
+
+				static bool bdisplayed = false;
+				if (!bdisplayed)
+				{
+					csoca::ilog.Print("Gadget2 : set initial gas temperature to %.2f K/mu", Tini / mu);
+					bdisplayed = true;
+				}
+			}
+
+			ofs_.flush();
+			ofs_.close();
+
+			wrote_gas += np_per_file[ifile][0];
+			wrote_dm += np_this_file - np_per_file[ifile][0];
+			wrote_coarse += np_per_file[ifile][5];
+		}
+
+		delete[] tmp1;
+		delete[] tmp2;
+		delete[] tmp3;
+
+		remove(fnbx);
+		remove(fnby);
+		remove(fnbz);
+		remove(fnx);
+		remove(fny);
+		remove(fnz);
+		remove(fnbvx);
+		remove(fnbvy);
+		remove(fnbvz);
+		remove(fnvx);
+		remove(fnvy);
+		remove(fnvz);
+		remove(fnm);
+	}
+
+	void determine_particle_numbers(const grid_hierarchy &gh)
+	{
+		if (!bhave_particlenumbers_)
+		{
+			bhave_particlenumbers_ = true;
+
+			double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
+
+			/*if( kpcunits_ )
+	  rhoc *= 1e-9; // in h^2 1e10 M_sol / kpc^3
+	
+	if( msolunits_ )
+	rhoc *= 1e10; // in h^2 M_sol / kpc^3*/
+
+			rhoc /= unit_mass_chosen_ / (unit_length_chosen_ * unit_length_chosen_ * unit_length_chosen_);
+
+			// only type 1 are baryons
+			if (!do_baryons_)
+				header_.mass[1] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_);
+			else
+			{
+				header_.mass[0] = (omegab_)*rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0);										///pow(2,3*levelmax_);
+				header_.mass[1] = (header_.Omega0 - omegab_) * rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_);
+			}
+
+			//...
+			for (int i = 0; i < 6; ++i)
+				np_per_type_[i] = 0;
+
+			// determine how many particles per type exist, determine their mass
+			for (int ilevel = (int)gh.levelmax(); ilevel >= (int)gh.levelmin(); --ilevel)
+			{
+				int itype = std::min<int>((int)gh.levelmax() - ilevel + 1, 5);
+				np_per_type_[itype] += gh.count_leaf_cells(ilevel, ilevel);
+				if (itype > 1)
+					header_.mass[itype] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel);
+			}
+
+			// if coarse particles should not be spread across types, assign them all to type bndparticletype
+			if (!spread_coarse_acrosstypes_)
+			{
+				if (gh.levelmax() > gh.levelmin() + 1)
+					bmorethan2bnd_ = true;
+				else
+					bmorethan2bnd_ = false;
+
+				for (unsigned itype = 2; itype < 6; ++itype)
+				{
+					if (itype == bndparticletype_)
+						continue;
+					np_per_type_[bndparticletype_] += np_per_type_[itype];
+					if (!bmorethan2bnd_)
+						header_.mass[bndparticletype_] += header_.mass[itype];
+					np_per_type_[itype] = 0;
+					header_.mass[itype] = 0.;
+				}
+			}
+
+			if (do_baryons_)
+				np_per_type_[0] = np_per_type_[1];
+		}
+	}
+
+public:
+	gadget2_output_plugin(ConfigFile &cf)
+			: output_plugin(cf)
+	{
+
+		units_mass_.insert(std::pair<std::string, double>("1e10Msol", 1.0));		 // 1e10 M_o/h (default)
+		units_mass_.insert(std::pair<std::string, double>("Msol", 1.0e-10));		 // 1 M_o/h
+		units_mass_.insert(std::pair<std::string, double>("Mearth", 3.002e-16)); // 1 M_earth/h
+
+		units_length_.insert(std::pair<std::string, double>("Mpc", 1.0));		 // 1 Mpc/h (default)
+		units_length_.insert(std::pair<std::string, double>("kpc", 1.0e-3)); // 1 kpc/h
+		units_length_.insert(std::pair<std::string, double>("pc", 1.0e-6));	// 1 pc/h
+
+		units_vel_.insert(std::pair<std::string, double>("km/s", 1.0));		 // 1 km/s (default)
+		units_vel_.insert(std::pair<std::string, double>("m/s", 1.0e-3));	// 1 m/s
+		units_vel_.insert(std::pair<std::string, double>("cm/s", 1.0e-5)); // 1 cm/s
+
+		block_buf_size_ = cf_.GetValueSafe<unsigned>("output", "gadget_blksize", 1048576);
+
+		//... ensure that everyone knows we want to do SPH
+		cf.InsertValue("setup", "do_SPH", "yes");
+
+		//bbndparticles_  = !cf_.GetValueSafe<bool>("output","gadget_nobndpart",false);
+		npartmax_ = 1 << 30;
+
+		nfiles_ = cf.GetValueSafe<unsigned>("output", "gadget_num_files", 1);
+
+		blongids_ = cf.GetValueSafe<bool>("output", "gadget_longids", false);
+
+		shift_halfcell_ = cf.GetValueSafe<bool>("output", "gadget_cell_centered", false);
+
+		//if( nfiles_ < (int)ceil((double)npart/(double)npartmax_) )
+		//	csoca::wlog.Print("Should use more files.");
+
+		if (nfiles_ > 1)
+		{
+			for (unsigned ifile = 0; ifile < nfiles_; ++ifile)
+			{
+				char ffname[256];
+				sprintf(ffname, "%s.%d", fname_.c_str(), ifile);
+				ofs_.open(ffname, std::ios::binary | std::ios::trunc);
+				if (!ofs_.good())
+				{
+					csoca::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", ffname);
+					throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + std::string(ffname) + "\' for writing!\n");
+				}
+				ofs_.close();
+			}
+		}
+		else
+		{
+			ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc);
+			if (!ofs_.good())
+			{
+				csoca::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", fname_.c_str());
+				throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + fname_ + "\' for writing!\n");
+			}
+			ofs_.close();
+		}
+
+		bhave_particlenumbers_ = false;
+
+		bmorethan2bnd_ = false;
+		if (false) //levelmax_ > levelmin_ +4)
+			bmorethan2bnd_ = true;
+
+		for (int i = 0; i < 6; ++i)
+		{
+			header_.npart[i] = 0;
+			header_.npartTotal[i] = 0;
+			header_.npartTotalHighWord[i] = 0;
+			header_.mass[i] = 0.0;
+		}
+
+		if (typeid(T_store) == typeid(float))
+			header_.flag_doubleprecision = 0;
+		else if (typeid(T_store) == typeid(double))
+			header_.flag_doubleprecision = 1;
+		else
+		{
+			csoca::elog.Print("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'");
+			throw std::runtime_error("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'");
+		}
+
+		YHe_ = cf.GetValueSafe<double>("cosmology", "YHe", 0.248);
+		gamma_ = cf.GetValueSafe<double>("cosmology", "gamma", 5.0 / 3.0);
+
+		do_baryons_ = cf.GetValueSafe<bool>("setup", "baryons", false);
+		omegab_ = cf.GetValueSafe<double>("cosmology", "Omega_b", 0.045);
+
+		//... new way
+		std::string lunitstr = cf.GetValueSafe<std::string>("output", "gadget_lunit", "Mpc");
+		std::string munitstr = cf.GetValueSafe<std::string>("output", "gadget_munit", "1e10Msol");
+		std::string vunitstr = cf.GetValueSafe<std::string>("output", "gadget_vunit", "km/s");
+
+		std::map<std::string, double>::iterator mapit;
+
+		if ((mapit = units_length_.find(lunitstr)) != units_length_.end())
+			unit_length_chosen_ = (*mapit).second;
+		else
+		{
+			csoca::elog.Print("Gadget: length unit \'%s\' unknown in gadget_lunit", lunitstr.c_str());
+			throw std::runtime_error("Unknown length unit specified for Gadget output plugin");
+		}
+
+		if ((mapit = units_mass_.find(munitstr)) != units_mass_.end())
+			unit_mass_chosen_ = (*mapit).second;
+		else
+		{
+			csoca::elog.Print("Gadget: mass unit \'%s\' unknown in gadget_munit", munitstr.c_str());
+			throw std::runtime_error("Unknown mass unit specified for Gadget output plugin");
+		}
+
+		if ((mapit = units_vel_.find(vunitstr)) != units_vel_.end())
+			unit_vel_chosen_ = (*mapit).second;
+		else
+		{
+			csoca::elog.Print("Gadget: velocity unit \'%s\' unknown in gadget_vunit", vunitstr.c_str());
+			throw std::runtime_error("Unknown velocity unit specified for Gadget output plugin");
+		}
+
+		//... maintain compatibility with old way of setting units
+		if (cf.ContainsKey("output", "gadget_usekpc"))
+		{
+			kpcunits_ = cf.GetValueSafe<bool>("output", "gadget_usekpc", false);
+			if (kpcunits_)
+				unit_length_chosen_ = 1e-3;
+			csoca::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead.");
+		}
+		if (cf.ContainsKey("output", "gadget_usemsol"))
+		{
+			msolunits_ = cf.GetValueSafe<bool>("output", "gadget_usemsol", false);
+			if (msolunits_)
+				unit_mass_chosen_ = 1e-10;
+			csoca::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead.");
+		}
+
+		//... coarse particle properties...
+
+		spread_coarse_acrosstypes_ = cf.GetValueSafe<bool>("output", "gadget_spreadcoarse", false);
+		bndparticletype_ = 5;
+
+		if (!spread_coarse_acrosstypes_)
+		{
+			bndparticletype_ = cf.GetValueSafe<unsigned>("output", "gadget_coarsetype", 5);
+
+			if (bndparticletype_ == 0 || //bndparticletype_ == 1 || bndparticletype_ == 4 ||
+					bndparticletype_ > 5)
+			{
+				csoca::elog.Print("Coarse particles cannot be of Gadget particle type %d in output plugin.", bndparticletype_);
+				throw std::runtime_error("Specified illegal Gadget particle type for coarse particles");
+			}
+		}
+		else
+		{
+			if (cf.GetValueSafe<unsigned>("output", "gadget_coarsetype", 5) != 5)
+				csoca::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override.");
+		}
+
+		//... set time ......................................................
+		header_.redshift = cf.GetValue<double>("setup", "zstart");
+		header_.time = 1.0 / (1.0 + header_.redshift);
+
+		//... SF flags
+		header_.flag_sfr = 0;
+		header_.flag_feedback = 0;
+		header_.flag_cooling = 0;
+
+		//...
+		header_.num_files = nfiles_; //1;
+		header_.BoxSize = cf.GetValue<double>("setup", "BoxLength");
+		header_.Omega0 = cf.GetValue<double>("cosmology", "Omega_m");
+		header_.OmegaLambda = cf.GetValue<double>("cosmology", "Omega_L");
+		header_.HubbleParam = cf.GetValue<double>("cosmology", "H0") / 100.0;
+
+		header_.flag_stellarage = 0;
+		header_.flag_metals = 0;
+
+		header_.flag_entropy_instead_u = 0;
+
+		//if( kpcunits_ )
+		//  header_.BoxSize *= 1000.0;
+		header_.BoxSize /= unit_length_chosen_;
+
+		for (int i = 0; i < empty_fill_bytes; ++i)
+			header_.fill[i] = 0;
+	}
+
+	void write_dm_mass(const grid_hierarchy &gh)
+	{
+		determine_particle_numbers(gh);
+
+		double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
+
+		// adjust units
+		rhoc /= unit_mass_chosen_ / (unit_length_chosen_ * unit_length_chosen_ * unit_length_chosen_);
+
+		/*if( kpcunits_ )
+      rhoc *= 1e-9; // in h^2 1e10 M_sol / kpc^3
+    
+    if( msolunits_ )
+      rhoc *= 1e10; // in h^2 M_sol / kpc^3
+    */
+
+		// if there are more than one kind of coarse particle assigned to the same type,
+		// we have to explicitly store their masses
+		if (bmorethan2bnd_)
+		{
+			header_.mass[bndparticletype_] = 0.;
+
+			size_t npcoarse = np_per_type_[bndparticletype_];
+			size_t nwritten = 0;
+
+			std::vector<T_store> temp_dat;
+			temp_dat.reserve(block_buf_size_);
+
+			char temp_fname[256];
+			sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_mass);
+			std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
+
+			size_t blksize = sizeof(T_store) * npcoarse;
+
+			ofs_temp.write((char *)&blksize, sizeof(size_t));
+
+			// int levelmaxcoarse = gh.levelmax() - 4;
+			// if (!spread_coarse_acrosstypes_)
+			// 	levelmaxcoarse = gh.levelmax() - 1;
+
+			//for( int ilevel=levelmaxcoarse; ilevel>=(int)gh.levelmin(); --ilevel )
+
+			{
+				int ilevel = 0;
+				// baryon particles live only on finest grid
+				// these particles here are total matter particles
+				double pmass = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel);
+
+				for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
+					for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
+						for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
+							if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
+							{
+								if (temp_dat.size() < block_buf_size_)
+									temp_dat.push_back(pmass);
+								else
+								{
+									ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * block_buf_size_);
+									nwritten += block_buf_size_;
+									temp_dat.clear();
+									temp_dat.push_back(pmass);
+								}
+							}
+			}
+
+			if (temp_dat.size() > 0)
+			{
+				ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * temp_dat.size());
+				nwritten += temp_dat.size();
+			}
+
+			if (nwritten != npcoarse)
+			{
+				csoca::elog.Print("nwritten = %llu != npcoarse = %llu\n", nwritten, npcoarse);
+				throw std::runtime_error("Internal consistency error while writing temporary file for masses");
+			}
+
+			ofs_temp.write((char *)&blksize, sizeof(size_t));
+
+			if (ofs_temp.bad())
+				throw std::runtime_error("I/O error while writing temporary file for masses");
+		}
+	}
+
+	void write_dm_position(int coord, const grid_hierarchy &gh)
+	{
+		//... count number of leaf cells ...//
+		determine_particle_numbers(gh);
+
+		size_t npart = 0;
+		for (int i = 1; i < 6; ++i)
+			npart += np_per_type_[i];
+
+		//... determine if we need to shift the coordinates back
+		double *shift = NULL;
+
+		if (shift_halfcell_)
+		{
+			double h = 0.0; //1.0/(1<<(levelmin_+1));
+			shift = new double[3];
+			shift[0] = shift[1] = shift[2] = -h;
+		}
+
+		size_t nwritten = 0;
+		//... collect displacements and convert to absolute coordinates with correct
+		//... units
+		std::vector<T_store> temp_data;
+		temp_data.reserve(block_buf_size_);
+
+		char temp_fname[256];
+		sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_pos + coord);
+		std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
+
+		size_t blksize = sizeof(T_store) * npart;
+		ofs_temp.write((char *)&blksize, sizeof(size_t));
+
+		double xfac = header_.BoxSize;
+
+		//for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
+		unsigned ilevel = 0;
+		for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
+			for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
+				for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
+					if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
+					{
+						double xx[3];
+						gh.cell_pos(ilevel, i, j, k, xx);
+						if (shift != NULL)
+							xx[coord] += shift[coord];
+
+						
+						// std::cerr << i << " " << j << " " << k << " : " << xx[coord]*xfac << " " << (*gh.get_grid(ilevel)).relem(i, j, k) * xfac << std::endl;
+
+						xx[coord] = (xx[coord] + (*gh.get_grid(ilevel)).relem(i, j, k)) * xfac;
+
+						if (temp_data.size() < block_buf_size_)
+							temp_data.push_back(xx[coord]);
+						else
+						{
+							ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_);
+							nwritten += block_buf_size_;
+							temp_data.clear();
+							temp_data.push_back(xx[coord]);
+						}
+					}
+
+		if (temp_data.size() > 0)
+		{
+			ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size());
+			nwritten += temp_data.size();
+		}
+
+		if (nwritten != npart)
+			throw std::runtime_error("Internal consistency error while writing temporary file for positions");
+
+		//... dump to temporary file
+		ofs_temp.write((char *)&blksize, sizeof(size_t));
+
+		if (ofs_temp.bad())
+			throw std::runtime_error("I/O error while writing temporary file for positions");
+
+		ofs_temp.close();
+
+		if (shift != NULL)
+			delete[] shift;
+	}
+
+	void write_dm_velocity(int coord, const grid_hierarchy &gh)
+	{
+		//... count number of leaf cells ...//
+		determine_particle_numbers(gh);
+
+		size_t npart = 0;
+		for (int i = 1; i < 6; ++i)
+			npart += np_per_type_[i];
+
+		//... collect displacements and convert to absolute coordinates with correct
+		//... units
+		std::vector<T_store> temp_data;
+		temp_data.reserve(block_buf_size_);
+
+		float isqrta = 1.0f / sqrt(header_.time);
+		float vfac = isqrta * header_.BoxSize;
+
+		//if( kpcunits_ )
+		//  vfac /= 1000.0;
+		vfac *= unit_length_chosen_ / unit_vel_chosen_;
+
+		size_t nwritten = 0;
+
+		char temp_fname[256];
+		sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_vel + coord);
+		std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
+
+		size_t blksize = sizeof(T_store) * npart;
+		ofs_temp.write((char *)&blksize, sizeof(size_t));
+
+		//for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel )
+		int ilevel = 0;
+		for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
+			for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
+				for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
+					if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
+					{
+						if (temp_data.size() < block_buf_size_)
+							temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac);
+						else
+						{
+							ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_);
+							nwritten += block_buf_size_;
+							temp_data.clear();
+							temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac);
+						}
+					}
+		if (temp_data.size() > 0)
+		{
+			ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store));
+			nwritten += temp_data.size();
+		}
+
+		if (nwritten != npart)
+			throw std::runtime_error("Internal consistency error while writing temporary file for velocities");
+
+		ofs_temp.write((char *)&blksize, sizeof(int));
+
+		if (ofs_temp.bad())
+			throw std::runtime_error("I/O error while writing temporary file for velocities");
+
+		ofs_temp.close();
+
+		
+	}
+
+	void write_dm_density(const grid_hierarchy &gh)
+	{
+		//... we don't care about DM density for Gadget
+	}
+
+	void write_dm_potential(const grid_hierarchy &gh)
+	{
+		//... we don't care about DM potential for Gadget
+	}
+
+	void write_gas_potential(const grid_hierarchy &gh)
+	{
+		//... we don't care about gas potential for Gadget
+	}
+
+	//... write data for gas -- don't do this
+	void write_gas_velocity(int coord, const grid_hierarchy &gh)
+	{
+		determine_particle_numbers(gh);
+		size_t npart = 0;
+		for (int i = 1; i < 6; ++i)
+			npart += np_per_type_[i];
+
+		//... collect velocities and convert to absolute coordinates with correct
+		//... units
+		std::vector<T_store> temp_data;
+		temp_data.reserve(block_buf_size_);
+
+		float isqrta = 1.0f / sqrt(header_.time);
+		float vfac = isqrta * header_.BoxSize;
+
+		//if( kpcunits_ )
+		//  vfac /= 1000.0;
+		vfac *= unit_length_chosen_ / unit_vel_chosen_;
+
+		//size_t npart = gh.count_leaf_cells(gh.levelmin(), gh.levelmax());;;
+		size_t nwritten = 0;
+
+		char temp_fname[256];
+		sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_vel + coord);
+		std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
+
+		size_t blksize = sizeof(T_store) * npart;
+		ofs_temp.write((char *)&blksize, sizeof(size_t));
+
+		//for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel )
+		int ilevel = 0;
+		for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
+			for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
+				for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
+					if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
+					{
+						if (temp_data.size() < block_buf_size_)
+							temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac);
+						else
+						{
+							ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_);
+							nwritten += block_buf_size_;
+							temp_data.clear();
+							temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac);
+						}
+					}
+
+		if (temp_data.size() > 0)
+		{
+			ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store));
+			nwritten += temp_data.size();
+		}
+
+		if (nwritten != npart)
+			throw std::runtime_error("Internal consistency error while writing temporary file for gas velocities");
+
+		ofs_temp.write((char *)&blksize, sizeof(int));
+
+		if (ofs_temp.bad())
+			throw std::runtime_error("I/O error while writing temporary file for gas velocities");
+
+		ofs_temp.close();
+	}
+
+	//... write only for fine level
+	void write_gas_position(int coord, const grid_hierarchy &gh)
+	{
+		//... count number of leaf cells ...//
+		determine_particle_numbers(gh);
+
+		size_t npart = 0;
+		for (int i = 1; i < 6; ++i)
+			npart += np_per_type_[i];
+
+		//... determine if we need to shift the coordinates back
+		double *shift = NULL;
+
+		if (shift_halfcell_)
+		{
+			double h = 0.0; //1.0/(1<<(levelmin_+1));
+			shift = new double[3];
+			shift[0] = shift[1] = shift[2] = -h;
+		}
+
+		size_t nwritten = 0;
+
+		//...
+		//... collect displacements and convert to absolute coordinates with correct
+		//... units
+		std::vector<T_store> temp_data;
+		temp_data.reserve(block_buf_size_);
+
+		char temp_fname[256];
+		sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_pos + coord);
+		std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
+
+		size_t blksize = sizeof(T_store) * npart;
+		ofs_temp.write((char *)&blksize, sizeof(size_t));
+
+		double xfac = header_.BoxSize;
+
+		double h = 1.0 / (1ul << gh.levelmax());
+
+		//for (int ilevel = gh.levelmax(); ilevel >= (int)gh.levelmin(); --ilevel)
+		int ilevel = 0;
+		{
+			for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
+				for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
+					for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
+						//if( ! gh.is_refined(ilevel,i,j,k) )
+						if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
+						{
+							double xx[3];
+							gh.cell_pos(ilevel, i, j, k, xx);
+							if (shift != NULL)
+								xx[coord] += shift[coord];
+
+							//... shift particle positions (this has to be done as the same shift
+							//... is used when computing the convolution kernel for SPH baryons)
+							xx[coord] += 0.5 * h;
+
+							xx[coord] = (xx[coord] + (*gh.get_grid(ilevel)).relem(i, j, k)) * xfac;
+
+							if (temp_data.size() < block_buf_size_)
+								temp_data.push_back(xx[coord]);
+							else
+							{
+								ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_);
+								nwritten += block_buf_size_;
+								temp_data.clear();
+								temp_data.push_back(xx[coord]);
+							}
+						}
+		}
+
+		if (temp_data.size() > 0)
+		{
+			ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size());
+			nwritten += temp_data.size();
+		}
+
+		if (nwritten != npart)
+			throw std::runtime_error("Internal consistency error while writing temporary file for gas positions");
+
+		//... dump to temporary file
+		ofs_temp.write((char *)&blksize, sizeof(size_t));
+
+		if (ofs_temp.bad())
+			throw std::runtime_error("I/O error while writing temporary file for gas positions");
+
+		ofs_temp.close();
+
+		if (shift != NULL)
+			delete[] shift;
+	}
+
+	void write_gas_density(const grid_hierarchy &gh)
+	{
+		//do nothing as we write out positions
+	}
+
+	void finalize(void)
+	{
+		this->assemble_gadget_file();
+	}
+};
+
+// namespace
+// {
+// output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2");
+// #ifndef SINGLE_PRECISION
+// output_plugin_creator_concrete<gadget2_output_plugin<double>> creator2("gadget2_double");
+// #endif
+// } // namespace

From f90778ba54131727224c2a07ed9d220a060b69d7 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 19 Dec 2019 14:00:06 +0100
Subject: [PATCH 051/130] submodule update

---
 external/class | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/class b/external/class
index b34d7f6..6f3abba 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b
+Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507

From 3797ff0325911bcc3645170e1a93c976f7ba5c3a Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 19 Dec 2019 14:08:35 +0100
Subject: [PATCH 052/130] avoid policy error on old versions of cmake

---
 CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fcc57e9..26eaa63 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,7 +50,9 @@ endif(ENABLE_MPI)
 
 
 # FFTW
-cmake_policy(SET CMP0074 NEW)
+if(POLICY CMP0074)
+    cmake_policy(SET CMP0074 NEW)
+endif()
 if(ENABLE_MPI)
   find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI)
 else()

From 4020d5b33f7d515036ba2a6b8cc6f703ef03b863 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 19 Dec 2019 14:37:30 +0100
Subject: [PATCH 053/130] added old MUSIC1 plugin for tabulated CAMB transfer
 function files back in

---
 example.conf                       |  54 ++---
 src/plugins/transfer_CAMB_file.cc  | 344 +++++++++++++++++++++++++++++
 src/plugins/transfer_eisenstein.cc |   2 +-
 3 files changed, 373 insertions(+), 27 deletions(-)
 create mode 100644 src/plugins/transfer_CAMB_file.cc

diff --git a/example.conf b/example.conf
index 3b6d07e..1b5e530 100644
--- a/example.conf
+++ b/example.conf
@@ -1,33 +1,33 @@
 [setup]
 # number of grid cells per linear dimension for calculations = particles for sc initial load
-GridRes      = 128
+GridRes         = 128
 # length of the box in Mpc/h
-BoxLength    = 250
+BoxLength       = 250
 # starting redshift
-zstart       = 49.0
+zstart          = 49.0
 # order of the LPT to be used (1,2 or 3)
-LPTorder     = 3
+LPTorder        = 3
 # also do baryon ICs?
-DoBaryons    = no
+DoBaryons       = no
 # do mode fixing à la Angulo&Pontzen
-DoFixing     = no
+DoFixing        = no
 # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
-ParticleLoad = sc
+ParticleLoad    = sc
 
 [testing]
 # enables diagnostic output
 # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
-test = convergence
+test            = none
 
 [execution]
-NumThreads   = 4
+NumThreads      = 4
 
 [output]
-fname_hdf5   = output_sch.hdf5
-fbase_analysis = output
+fname_hdf5      = output_sch.hdf5
+fbase_analysis  = output
 
-format       = gadget2
-filename     = ics_gadget.dat
+format          = gadget2
+filename        = ics_gadget.dat
 
 #format       = generic
 #filename     = debug.hdf5
@@ -38,21 +38,23 @@ filename     = ics_gadget.dat
 #grafic_use_SPT = yes
 
 [random]
-generator    = NGENIC
-seed         = 9001
+generator       = NGENIC
+seed            = 9001
 
 [cosmology]
-#transfer     = CLASS 
-transfer     = eisenstein
-Omega_m      = 0.302
-Omega_b      = 0.045
-Omega_L      = 0.698
-H0           = 70.3
-sigma_8      = 0.811
-nspec        = 0.961
+transfer        = CLASS 
+# transfer        = eisenstein
+# transfer        = file_CAMB
+# transfer_file   = wmap5_transfer_out_z0.dat
+Omega_m         = 0.302
+Omega_b         = 0.045
+Omega_L         = 0.698
+H0              = 70.3
+sigma_8         = 0.811
+nspec           = 0.961
 
 # anisotropic large scale tidal field
-#LSS_aniso_lx = 0.1
-#LSS_aniso_ly = 0.1
-#LSS_aniso_lz = -0.2
+#LSS_aniso_lx   = +0.1
+#LSS_aniso_ly   = +0.1
+#LSS_aniso_lz   = -0.2
 
diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc
new file mode 100644
index 0000000..ddbf35e
--- /dev/null
+++ b/src/plugins/transfer_CAMB_file.cc
@@ -0,0 +1,344 @@
+//  transfer_CAMB.cc - This file is part of MUSIC -
+//  a code to generate multi-scale initial conditions for cosmological simulations
+
+//  Copyright (C) 2019  Oliver Hahn
+
+#include <gsl/gsl_errno.h>
+#include <gsl/gsl_spline.h>
+
+#include <vector>
+
+#include "transfer_function_plugin.hh"
+
+const double tiny = 1e-30;
+
+class transfer_CAMB_file_plugin : public TransferFunction_plugin
+{
+
+private:
+  std::string m_filename_Pk, m_filename_Tk;
+  std::vector<double> m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon;
+  std::vector<double> m_tab_Tvk_tot, m_tab_Tvk_cdm, m_tab_Tvk_baryon;
+  gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon;
+  gsl_interp_accel *acc_vtot, *acc_vcdm, *acc_vbaryon;
+  gsl_spline *spline_tot, *spline_cdm, *spline_baryon;
+  gsl_spline *spline_vtot, *spline_vcdm, *spline_vbaryon;
+
+  double m_kmin, m_kmax, m_Omega_b, m_Omega_m, m_zstart;
+  unsigned m_nlines;
+
+  bool m_linbaryoninterp;
+
+  void read_table(void)
+  {
+
+    m_nlines = 0;
+    m_linbaryoninterp = false;
+
+#ifdef WITH_MPI
+    if (MPI::COMM_WORLD.Get_rank() == 0)
+    {
+#endif
+      csoca::ilog.Print("Reading tabulated transfer function data from file \n    \'%s\'", m_filename_Tk.c_str());
+
+      std::string line;
+      std::ifstream ifs(m_filename_Tk.c_str());
+
+      if (!ifs.good())
+        throw std::runtime_error("Could not find transfer function file \'" + m_filename_Tk + "\'");
+
+      m_tab_k.clear();
+      m_tab_Tk_tot.clear();
+      m_tab_Tk_cdm.clear();
+      m_tab_Tk_baryon.clear();
+      m_tab_Tvk_tot.clear();
+      m_tab_Tvk_cdm.clear();    //>[150609SH: add]
+      m_tab_Tvk_baryon.clear(); //>[150609SH: add]
+
+      m_kmin = 1e30;
+      m_kmax = -1e30;
+      std::ofstream ofs("dump_transfer.txt");
+
+      while (!ifs.eof())
+      {
+        getline(ifs, line);
+        if (ifs.eof())
+          break;
+
+        // OH: ignore line if it has a comment:
+        if (line.find("#") != std::string::npos)
+          continue;
+
+        std::stringstream ss(line);
+
+        double k, Tkc, Tkb, Tktot, Tkvtot, Tkvc, Tkvb, dummy;
+
+        ss >> k;
+        ss >> Tkc;   // cdm
+        ss >> Tkb;   // baryon
+        ss >> dummy; // photon
+        ss >> dummy; // nu
+        ss >> dummy; // mass_nu
+        ss >> Tktot; // total
+        ss >> dummy; // no_nu
+        ss >> dummy; // total_de
+        ss >> dummy; // Weyl
+        ss >> Tkvc;  // v_cdm
+        ss >> Tkvb;  // v_b
+        ss >> dummy; // v_b-v_cdm
+
+        if (ss.bad() || ss.fail())
+        {
+          csoca::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!");
+          throw std::runtime_error("Error reading transfer function file \'" +
+                                   m_filename_Tk + "\'");
+        }
+
+        if (m_Omega_b < 1e-6)
+          Tkvtot = Tktot;
+        else
+          Tkvtot = ((m_Omega_m - m_Omega_b) * Tkvc + m_Omega_b * Tkvb) / m_Omega_m; //MvD
+
+        m_linbaryoninterp |= Tkb < 0.0 || Tkvb < 0.0;
+
+        m_tab_k.push_back(log10(k));
+
+        m_tab_Tk_tot.push_back(Tktot);
+        m_tab_Tk_baryon.push_back(Tkb);
+        m_tab_Tk_cdm.push_back(Tkc);
+        m_tab_Tvk_tot.push_back(Tkvtot);
+        m_tab_Tvk_baryon.push_back(Tkvb);
+        m_tab_Tvk_cdm.push_back(Tkvc);
+
+        ++m_nlines;
+
+        if (k < m_kmin)
+          m_kmin = k;
+        if (k > m_kmax)
+          m_kmax = k;
+      }
+
+      for (size_t i = 0; i < m_tab_k.size(); ++i)
+      {
+        m_tab_Tk_tot[i] = log10(m_tab_Tk_tot[i]);
+        m_tab_Tk_cdm[i] = log10(m_tab_Tk_cdm[i]);
+        m_tab_Tvk_cdm[i] = log10(m_tab_Tvk_cdm[i]);
+        m_tab_Tvk_tot[i] = log10(m_tab_Tvk_tot[i]);
+
+        if (!m_linbaryoninterp)
+        {
+          m_tab_Tk_baryon[i] = log10(m_tab_Tk_baryon[i]);
+          m_tab_Tvk_baryon[i] = log10(m_tab_Tvk_baryon[i]);
+        }
+      }
+
+      ifs.close();
+
+      csoca::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines);
+
+      if (m_linbaryoninterp)
+        csoca::ilog.Print("Using log-lin interpolation for baryons\n    (TF is not "
+                          "positive definite)");
+
+#ifdef WITH_MPI
+    }
+
+    unsigned n = m_tab_k.size();
+    MPI::COMM_WORLD.Bcast(&n, 1, MPI_UNSIGNED, 0);
+
+    if (MPI::COMM_WORLD.Get_rank() > 0)
+    {
+      m_tab_k.assign(n, 0);
+      m_tab_Tk_tot.assign(n, 0);
+      m_tab_Tk_cdm.assign(n, 0);
+      m_tab_Tk_baryon.assign(n, 0);
+      m_tab_Tvk_tot.assign(n, 0);
+      m_tab_Tvk_cdm.assign(n, 0);
+      m_tab_Tvk_baryon.assign(n, 0);
+    }
+
+    MPI::COMM_WORLD.Bcast(&m_tab_k[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tk_tot[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tvk_tot[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tvk_cdm[0], n, MPI_DOUBLE, 0);
+    MPI::COMM_WORLD.Bcast(&m_tab_Tvk_baryon[0], n, MPI_DOUBLE, 0);
+
+#endif
+  }
+
+public:
+  transfer_CAMB_file_plugin(ConfigFile &cf)
+      : TransferFunction_plugin(cf)
+  {
+    m_filename_Tk = pcf_->GetValue<std::string>("cosmology", "transfer_file");
+    m_Omega_m = cf.GetValue<double>("cosmology", "Omega_m"); //MvD
+    m_Omega_b = cf.GetValue<double>("cosmology", "Omega_b"); //MvD
+    m_zstart = cf.GetValue<double>("setup", "zstart");       //MvD
+
+    read_table();
+
+    acc_tot = gsl_interp_accel_alloc();
+    acc_cdm = gsl_interp_accel_alloc();
+    acc_baryon = gsl_interp_accel_alloc();
+    acc_vtot = gsl_interp_accel_alloc();
+    acc_vcdm = gsl_interp_accel_alloc();
+    acc_vbaryon = gsl_interp_accel_alloc();
+
+    spline_tot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_cdm = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_baryon = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_vtot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_vcdm =
+        gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+    spline_vbaryon =
+        gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
+
+    gsl_spline_init(spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size());
+    gsl_spline_init(spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size());
+    gsl_spline_init(spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0],
+                    m_tab_k.size());
+    gsl_spline_init(spline_vtot, &m_tab_k[0], &m_tab_Tvk_tot[0],
+                    m_tab_k.size());
+    gsl_spline_init(spline_vcdm, &m_tab_k[0], &m_tab_Tvk_cdm[0],
+                    m_tab_k.size());
+    gsl_spline_init(spline_vbaryon, &m_tab_k[0], &m_tab_Tvk_baryon[0],
+                    m_tab_k.size());
+
+    tf_distinct_ = true; // different density between CDM v.s. Baryon
+    tf_withvel_ = true;  // using velocity transfer function
+  }
+
+  ~transfer_CAMB_file_plugin()
+  {
+    gsl_spline_free(spline_tot);
+    gsl_spline_free(spline_cdm);
+    gsl_spline_free(spline_baryon);
+    gsl_spline_free(spline_vtot);
+    gsl_spline_free(spline_vcdm);
+    gsl_spline_free(spline_vbaryon);
+
+    gsl_interp_accel_free(acc_tot);
+    gsl_interp_accel_free(acc_cdm);
+    gsl_interp_accel_free(acc_baryon);
+    gsl_interp_accel_free(acc_vtot);
+    gsl_interp_accel_free(acc_vcdm);
+    gsl_interp_accel_free(acc_vbaryon);
+  }
+
+  // linear interpolation in log-log
+  inline double extrap_right(double k, const tf_type &type) const
+  {
+    int n = m_tab_k.size() - 1, n1 = n - 1;
+
+    double v1(1.0), v2(1.0);
+
+    double lk = log10(k);
+    double dk = m_tab_k[n] - m_tab_k[n1];
+    double delk = lk - m_tab_k[n];
+
+    switch (type)
+    {
+    case cdm:
+      v1 = m_tab_Tk_cdm[n1];
+      v2 = m_tab_Tk_cdm[n];
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case baryon:
+      v1 = m_tab_Tk_baryon[n1];
+      v2 = m_tab_Tk_baryon[n];
+      if (m_linbaryoninterp)
+        return std::max((v2 - v1) / dk * (delk) + v2, tiny);
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case vtotal: //>[150609SH: add]
+      v1 = m_tab_Tvk_tot[n1];
+      v2 = m_tab_Tvk_tot[n];
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case vcdm: //>[150609SH: add]
+      v1 = m_tab_Tvk_cdm[n1];
+      v2 = m_tab_Tvk_cdm[n];
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case vbaryon: //>[150609SH: add]
+      v1 = m_tab_Tvk_baryon[n1];
+      v2 = m_tab_Tvk_baryon[n];
+      if (m_linbaryoninterp)
+        return std::max((v2 - v1) / dk * (delk) + v2, tiny);
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    case total:
+      v1 = m_tab_Tk_tot[n1];
+      v2 = m_tab_Tk_tot[n];
+      return pow(10.0, (v2 - v1) / dk * (delk) + v2);
+    default:
+      throw std::runtime_error(
+          "Invalid type requested in transfer function evaluation");
+    }
+
+    return 0.0;
+  }
+
+  inline double compute(double k, tf_type type) const
+  {
+    // use constant interpolation on the left side of the tabulated values
+    if (k < m_kmin)
+    {
+      switch (type)
+      {
+      case cdm:
+        return pow(10.0, m_tab_Tk_cdm[0]);
+      case baryon:
+        if (m_linbaryoninterp)
+          return m_tab_Tk_baryon[0];
+        return pow(10.0, m_tab_Tk_baryon[0]);
+      case vtotal:
+        return pow(10.0, m_tab_Tvk_tot[0]);
+      case vcdm:
+        return pow(10.0, m_tab_Tvk_cdm[0]);
+      case vbaryon:
+        if (m_linbaryoninterp)
+          return m_tab_Tvk_baryon[0];
+        return pow(10.0, m_tab_Tvk_baryon[0]);
+      case total:
+        return pow(10.0, m_tab_Tk_tot[0]);
+      default:
+        throw std::runtime_error(
+            "Invalid type requested in transfer function evaluation");
+      }
+    }
+    // use linear interpolation on the right side of the tabulated values
+    else if (k > m_kmax)
+      return extrap_right(k, type);
+
+    double lk = log10(k);
+    switch (type)
+    {
+    case cdm:
+      return pow(10.0, gsl_spline_eval(spline_cdm, lk, acc_cdm));
+    case baryon:
+      if (m_linbaryoninterp)
+        return gsl_spline_eval(spline_baryon, lk, acc_baryon);
+      return pow(10.0, gsl_spline_eval(spline_baryon, lk, acc_baryon));
+    case vtotal:
+      return pow(10.0, gsl_spline_eval(spline_vtot, lk, acc_vtot)); //MvD
+    case vcdm:
+      return pow(10.0, gsl_spline_eval(spline_vcdm, lk, acc_vcdm));
+    case vbaryon:
+      if (m_linbaryoninterp)
+        return gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon);
+      return pow(10.0, gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon));
+    case total:
+      return pow(10.0, gsl_spline_eval(spline_tot, lk, acc_tot));
+    default:
+      throw std::runtime_error(
+          "Invalid type requested in transfer function evaluation");
+    }
+  }
+
+  inline double get_kmin(void) const { return pow(10.0, m_tab_k[1]); }
+
+  inline double get_kmax(void) const { return pow(10.0, m_tab_k[m_tab_k.size() - 2]); }
+};
+
+namespace
+{
+TransferFunction_plugin_creator_concrete<transfer_CAMB_file_plugin> creator("file_CAMB");
+}
diff --git a/src/plugins/transfer_eisenstein.cc b/src/plugins/transfer_eisenstein.cc
index 9d4c032..47a7efd 100644
--- a/src/plugins/transfer_eisenstein.cc
+++ b/src/plugins/transfer_eisenstein.cc
@@ -434,5 +434,5 @@ namespace
 TransferFunction_plugin_creator_concrete<transfer_eisenstein_plugin> creator("eisenstein");
 TransferFunction_plugin_creator_concrete<transfer_eisenstein_wdm_plugin> creator2("eisenstein_wdm");
 TransferFunction_plugin_creator_concrete<transfer_eisenstein_cdmbino_plugin> creator3("eisenstein_cdmbino");
-TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
+// TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
 } // namespace

From e7ebed552376eb352460d59781131627208c82aa Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 19 Dec 2019 15:43:42 +0100
Subject: [PATCH 054/130] fix merge conflict in example conf file

---
 example.conf | 45 +++++++++++++--------------------------------
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/example.conf b/example.conf
index 6c4779d..c8db213 100644
--- a/example.conf
+++ b/example.conf
@@ -15,19 +15,21 @@ DoFixing        = no
 ParticleLoad    = sc
 
 [cosmology]
-#transfer     = CLASS 
-transfer     = eisenstein
-Omega_m      = 0.302
-Omega_b      = 0.045
-Omega_L      = 0.698
-H0           = 70.3
-sigma_8      = 0.811
-nspec        = 0.961
+transfer        = CLASS 
+# transfer        = eisenstein
+# transfer        = file_CAMB
+# transfer_file   = wmap5_transfer_out_z0.dat
+Omega_m         = 0.302
+Omega_b         = 0.045
+Omega_L         = 0.698
+H0              = 70.3
+sigma_8         = 0.811
+nspec           = 0.961
 
 # anisotropic large scale tidal field
-#LSS_aniso_lx = 0.1
-#LSS_aniso_ly = 0.1
-#LSS_aniso_lz = -0.2
+#LSS_aniso_lx   = +0.1
+#LSS_aniso_ly   = +0.1
+#LSS_aniso_lz   = -0.2
 
 [random]
 generator    = NGENIC
@@ -57,24 +59,3 @@ UseLongids      = false
 #filename       = ics_ramses
 #grafic_use_SPT = yes
 
-[random]
-generator       = NGENIC
-seed            = 9001
-
-[cosmology]
-transfer        = CLASS 
-# transfer        = eisenstein
-# transfer        = file_CAMB
-# transfer_file   = wmap5_transfer_out_z0.dat
-Omega_m         = 0.302
-Omega_b         = 0.045
-Omega_L         = 0.698
-H0              = 70.3
-sigma_8         = 0.811
-nspec           = 0.961
-
-# anisotropic large scale tidal field
-#LSS_aniso_lx   = +0.1
-#LSS_aniso_ly   = +0.1
-#LSS_aniso_lz   = -0.2
-

From 89c5f2758ba580bfd384a8a695c98cd813201fc0 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 24 Jan 2020 15:00:32 +0100
Subject: [PATCH 055/130] added support for writing multiple species, staggered
 grids

---
 include/output_plugin.hh          |   2 +-
 include/particle_generator.hh     |  32 ++++-
 src/ic_generator.cc               |  21 ++-
 src/plugins/output_gadget2.cc     | 122 ++++++++++--------
 src/plugins/output_gadget_hdf5.cc | 207 ++++++++++++++++++++++++++++++
 5 files changed, 316 insertions(+), 68 deletions(-)
 create mode 100644 src/plugins/output_gadget_hdf5.cc

diff --git a/include/output_plugin.hh b/include/output_plugin.hh
index b7c23e1..5a18407 100644
--- a/include/output_plugin.hh
+++ b/include/output_plugin.hh
@@ -45,7 +45,7 @@ public:
 	virtual ~output_plugin(){}
 
 	//! routine to write particle data for a species
-	virtual void write_particle_data(const particle::container &pc, const cosmo_species &s ) {};
+	virtual void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species ) {};
 
 	//! routine to write gridded fluid component data for a species
 	virtual void write_grid_data(const Grid_FFT<real_t> &g, const cosmo_species &s, const fluid_component &c ) {};
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 1c176eb..4dafda8 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -27,6 +27,14 @@ const std::vector< std::vector<vec3<real_t>> > lattice_shifts =
     /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}},
 };
 
+const std::vector<vec3<real_t>> second_lattice_shift =
+{
+        /* SC : */ {0.5, 0.5, 0.5},
+        /* BCC: */ {0.5, 0.5, 0.0},
+        /* FCC: */ {0.5, 0.5, 0.5},
+        /* RSC: */ {0.25, 0.25, 0.25},
+};
+
 template<typename field_t>
 void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const field_t& field ){
     // number of modes present in the field
@@ -53,11 +61,16 @@ void initialize_lattice( container& particles, lattice lattice_type, const bool
 
 // invalidates field, phase shifted to unspecified position after return
 template<typename field_t>
-void set_positions( container& particles, const lattice lattice_type, int idim, real_t lunit, const bool b64reals, field_t& field )
+void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field )
 {
     const size_t num_p_in_load = field.local_size();
     for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
-        // can omit first shift since zero by convention, otherwise apply relative phase shift
+        // if we are dealing with the secondary lattice, apply a global shift
+        if( ishift==0 && is_second_lattice ){
+            field.shift_field( second_lattice_shift[lattice_type] );
+        }
+
+        // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
         if( ishift>0 ){
             field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] );
         }
@@ -66,7 +79,8 @@ void set_positions( container& particles, const lattice lattice_type, int idim,
         for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
             for( size_t j=0; j<field.size(1); ++j){
                 for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,lattice_shifts[lattice_type][ishift]);
+                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,lattice_shifts[lattice_type][ishift] 
+                        + (is_second_lattice? second_lattice_shift[lattice_type] : vec3<real_t>{0.,0.,0.}) );
                     if( b64reals ){
                         particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
                     }else{
@@ -78,13 +92,17 @@ void set_positions( container& particles, const lattice lattice_type, int idim,
     }
 }
 
-template<typename field_t>
-void set_velocities( container& particles, lattice lattice_type, int idim, const bool b64reals, field_t& field )
+template <typename field_t>
+void set_velocities(container &particles, lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field)
 {
     const size_t num_p_in_load = field.local_size();
     for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
-        // can omit first shift since zero by convention, otherwise apply relative phase shift
-        if( ishift>0 ){
+        // if we are dealing with the secondary lattice, apply a global shift
+        if (ishift == 0 && is_second_lattice){
+            field.shift_field(second_lattice_shift[lattice_type]);
+        }
+        // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
+        if (ishift > 0){
             field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] );
         }
         // read out values from phase shifted field and set assoc. particle's value
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 48d31dc..ee370ff 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -70,6 +70,18 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------------------------------------------
     //! do baryon ICs?
     const bool bDoBaryons = the_config.GetValueSafe<bool>("setup", "DoBaryons", false );
+    std::map< cosmo_species, double > Omega;
+    if( bDoBaryons ){
+        double Om = the_config.GetValue<double>("cosmology", "Omega_m");
+        double Ob = the_config.GetValue<double>("cosmology", "Omega_b");
+        Omega[cosmo_species::dm] = Om-Ob;
+        Omega[cosmo_species::baryon] = Ob;
+    }else{
+        double Om = the_config.GetValue<double>("cosmology", "Omega_m");
+        double Ob = the_config.GetValue<double>("cosmology", "Omega_b");
+        Omega[cosmo_species::dm] = Om;
+        Omega[cosmo_species::baryon] = 0.0;
+    }
 
     //--------------------------------------------------------------------------------------------------------
     //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274]
@@ -440,6 +452,9 @@ int Run( ConfigFile& the_config )
                 //===================================================================================
                 particle::container particles;
 
+                bool shifted_lattice = (this_species == cosmo_species::baryon &&
+                                        the_output_plugin->write_species_as(this_species) == output_type::particles) ? true : false;
+
                 // if output plugin wants particles, then we need to store them, along with their IDs
                 if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                 {
@@ -472,7 +487,7 @@ int Run( ConfigFile& the_config )
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_positions( particles, lattice_type, idim, lunit, the_output_plugin->has_64bit_reals(), tmp );
+                        particle::set_positions( particles, lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp );
                     } 
                     // otherwise write out the grid data directly to the output plugin
                     // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian )
@@ -518,7 +533,7 @@ int Run( ConfigFile& the_config )
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_velocities( particles, lattice_type, idim, the_output_plugin->has_64bit_reals(), tmp );
+                        particle::set_velocities( particles, lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp );
                     }
                     // otherwise write out the grid data directly to the output plugin
                     else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )
@@ -530,7 +545,7 @@ int Run( ConfigFile& the_config )
 
                 if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                 {
-                    the_output_plugin->write_particle_data( particles, this_species );
+                    the_output_plugin->write_particle_data( particles, this_species, Omega[this_species] );
                 }
                 
                 if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )
diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc
index 57d9cc1..e7f20e6 100644
--- a/src/plugins/output_gadget2.cc
+++ b/src/plugins/output_gadget2.cc
@@ -3,7 +3,7 @@
 
 constexpr int empty_fill_bytes{56};
 
-template<typename write_real_t>
+template <typename write_real_t>
 class gadget2_output_plugin : public output_plugin
 {
 public:
@@ -38,40 +38,44 @@ protected:
 
 public:
 	//! constructor
-	explicit gadget2_output_plugin(ConfigFile &cf )
-	: output_plugin(cf, "GADGET-2")
+	explicit gadget2_output_plugin(ConfigFile &cf)
+			: output_plugin(cf, "GADGET-2")
 	{
 		num_files_ = 1;
 #ifdef USE_MPI
 		// use as many output files as we have MPI tasks
 		MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
 #endif
-		real_t astart = 1.0/(1.0+cf_.GetValue<double>("setup", "zstart"));
+		real_t astart = 1.0 / (1.0 + cf_.GetValue<double>("setup", "zstart"));
 		lunit_ = cf_.GetValue<double>("setup", "BoxLength");
 		vunit_ = lunit_ / std::sqrt(astart);
-		blongids_ = cf_.GetValueSafe<bool>("output","UseLongids",false);
+		blongids_ = cf_.GetValueSafe<bool>("output", "UseLongids", false);
 	}
 
-    output_type write_species_as( const cosmo_species & ) const { return output_type::particles; }
+	output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
 
 	real_t position_unit() const { return lunit_; }
 
 	real_t velocity_unit() const { return vunit_; }
 
-	bool has_64bit_reals() const{
-		if( typeid(write_real_t)==typeid(double) ) return true;
-		return false;
-	}
-
-	bool has_64bit_ids() const{
-		if( blongids_ ) return true;
-		return false;
-	}
-
-	void write_particle_data(const particle::container &pc, const cosmo_species &s )
+	bool has_64bit_reals() const
 	{
-			// fill the Gadget-2 header
-		memset(reinterpret_cast<void*>(&this_header_),0,sizeof(header));
+		if (typeid(write_real_t) == typeid(double))
+			return true;
+		return false;
+	}
+
+	bool has_64bit_ids() const
+	{
+		if (blongids_)
+			return true;
+		return false;
+	}
+
+	void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species)
+	{
+		// fill the Gadget-2 header
+		memset(reinterpret_cast<void *>(&this_header_), 0, sizeof(header));
 
 		for (int i = 0; i < 6; ++i)
 		{
@@ -113,15 +117,15 @@ public:
 
 		//... set masses
 		double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
-		double boxmass = this_header_.Omega0 * rhoc * std::pow(this_header_.BoxSize,3);
+		double boxmass = Omega_species * rhoc * std::pow(this_header_.BoxSize, 3);
 		this_header_.mass[1] = boxmass / pc.get_global_num_particles();
-	
+
 		std::string fname = fname_;
 		int thisrank = 0;
-		
+
 #ifdef USE_MPI
-		MPI_Comm_rank(MPI_COMM_WORLD,&thisrank);
-		if( num_files_ > 1 )
+		MPI_Comm_rank(MPI_COMM_WORLD, &thisrank);
+		if (num_files_ > 1)
 			fname += "." + std::to_string(thisrank);
 #endif
 		uint32_t blocksz;
@@ -130,52 +134,56 @@ public:
 		csoca::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl;
 
 		blocksz = sizeof(header);
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		ofs.write( reinterpret_cast<char*>(&this_header_), sizeof(header) );
-		ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		
-		// we write double precision 
-		if( this->has_64bit_reals() ){
+		ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+		ofs.write(reinterpret_cast<char *>(&this_header_), sizeof(header));
+		ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+
+		// we write double precision
+		if (this->has_64bit_reals())
+		{
 			blocksz = 3 * sizeof(double) * pc.get_local_num_particles();
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-			ofs.write( reinterpret_cast<const char*>(pc.get_pos64_ptr()), blocksz );
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_pos64_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-			ofs.write( reinterpret_cast<const char*>(pc.get_vel64_ptr()), blocksz );
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		}else{
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_vel64_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+		}
+		else
+		{
 			blocksz = 3 * sizeof(float) * pc.get_local_num_particles();
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-			ofs.write( reinterpret_cast<const char*>(pc.get_pos32_ptr()), blocksz );
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_pos32_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-			ofs.write( reinterpret_cast<const char*>(pc.get_vel32_ptr()), blocksz );
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_vel32_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 		}
-		
+
 		// we write long IDs
-		if( this->has_64bit_ids() ){
+		if (this->has_64bit_ids())
+		{
 			blocksz = sizeof(uint64_t) * pc.get_local_num_particles();
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-			ofs.write( reinterpret_cast<const char*>(pc.get_ids64_ptr()), blocksz );
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-		}else{
-			blocksz = sizeof(uint32_t) * pc.get_local_num_particles();
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
-			ofs.write( reinterpret_cast<const char*>(pc.get_ids32_ptr()), blocksz );
-			ofs.write( reinterpret_cast<char*>(&blocksz), sizeof(uint32_t) );
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_ids64_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+		}
+		else
+		{
+			blocksz = sizeof(uint32_t) * pc.get_local_num_particles();
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
+			ofs.write(reinterpret_cast<const char *>(pc.get_ids32_ptr()), blocksz);
+			ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
 		}
-		
 	}
 };
 
-
 namespace
 {
-	output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2"); 
+output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2");
 #if !defined(USE_SINGLEPRECISION)
-	output_plugin_creator_concrete<gadget2_output_plugin<double>> creator3("gadget2_double"); 
+output_plugin_creator_concrete<gadget2_output_plugin<double>> creator3("gadget2_double");
 #endif
 } // namespace
diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc
new file mode 100644
index 0000000..43afbe1
--- /dev/null
+++ b/src/plugins/output_gadget_hdf5.cc
@@ -0,0 +1,207 @@
+
+#ifdef USE_HDF5
+#include <unistd.h> // for unlink
+#include <output_plugin.hh>
+#include "HDF_IO.hh"
+
+template <typename T>
+std::vector<T> from_6array(const T *a)
+{
+  return std::vector<T>{{a[0], a[1], a[2], a[3], a[4], a[5]}};
+}
+
+template <typename T>
+std::vector<T> from_value(const T a)
+{
+  return std::vector<T>{{a}};
+}
+
+template <typename write_real_t>
+class gadget_hdf5_output_plugin : public output_plugin
+{
+  struct header_t
+  {
+    unsigned npart[6];
+    double mass[6];
+    double time;
+    double redshift;
+    int flag_sfr;
+    int flag_feedback;
+    unsigned int npartTotal[6];
+    int flag_cooling;
+    int num_files;
+    double BoxSize;
+    double Omega0;
+    double OmegaLambda;
+    double HubbleParam;
+    int flag_stellarage;
+    int flag_metals;
+    unsigned int npartTotalHighWord[6];
+    int flag_entropy_instead_u;
+    int flag_doubleprecision;
+  };
+
+protected:
+  int num_files_, num_simultaneous_writers_;
+  header_t header_;
+  real_t lunit_, vunit_;
+  bool blongids_;
+  std::string this_fname_;
+
+public:
+  //! constructor
+  explicit gadget_hdf5_output_plugin(ConfigFile &cf)
+      : output_plugin(cf, "GADGET-HDF5")
+  {
+    num_files_ = 1;
+#ifdef USE_MPI
+    // use as many output files as we have MPI tasks
+    MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
+#endif
+    real_t astart = 1.0 / (1.0 + cf_.GetValue<double>("setup", "zstart"));
+    lunit_ = cf_.GetValue<double>("setup", "BoxLength");
+    vunit_ = lunit_ / std::sqrt(astart);
+    blongids_ = cf_.GetValueSafe<bool>("output", "UseLongids", false);
+    num_simultaneous_writers_ = cf_.GetValueSafe<int>("output", "NumSimWriters", num_files_);
+
+    for (int i = 0; i < 6; ++i)
+    {
+      header_.npart[i] = 0;
+      header_.npartTotal[i] = 0;
+      header_.npartTotalHighWord[i] = 0;
+      header_.mass[i] = 0.0;
+    }
+
+    header_.time = astart;
+    header_.redshift = 1.0 / astart - 1.0;
+    header_.flag_sfr = 0;
+    header_.flag_feedback = 0;
+    header_.flag_cooling = 0;
+    header_.num_files = num_files_;
+    header_.BoxSize = lunit_;
+    header_.Omega0 = cf_.GetValue<double>("cosmology", "Omega_m");
+    header_.OmegaLambda = cf_.GetValue<double>("cosmology", "Omega_L");
+    header_.HubbleParam = cf_.GetValue<double>("cosmology", "H0") / 100.0;
+    header_.flag_stellarage = 0;
+    header_.flag_metals = 0;
+    header_.flag_entropy_instead_u = 0;
+    header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false;
+
+    this_fname_ = fname_;
+#ifdef USE_MPI
+    int thisrank = 0;
+    MPI_Comm_rank(MPI_COMM_WORLD, &thisrank);
+    if (num_files_ > 1)
+      this_fname_ += "." + std::to_string(thisrank);
+#endif
+
+    unlink(this_fname_.c_str());
+    HDFCreateFile(this_fname_);
+  }
+
+  // use destructor to write header post factum
+  ~gadget_hdf5_output_plugin()
+  {
+    HDFCreateGroup(this_fname_, "Header");
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array<unsigned>(header_.npart));
+    HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array<double>(header_.mass));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value<double>(header_.time));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value<double>(header_.redshift));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value<int>(header_.flag_sfr));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value<int>(header_.flag_feedback));
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array<unsigned>(header_.npartTotal));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value<int>(header_.flag_cooling));
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value<int>(header_.num_files));
+    HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value<double>(header_.BoxSize));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value<double>(header_.Omega0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value<double>(header_.OmegaLambda));
+    HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value<double>(header_.HubbleParam));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value<int>(header_.flag_stellarage));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value<int>(header_.flag_metals));
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array<unsigned>(header_.npartTotalHighWord));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value<int>(header_.flag_entropy_instead_u));
+
+    csoca::ilog << "Wrote" << std::endl;
+  }
+
+  output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
+
+  real_t position_unit() const { return lunit_; }
+
+  real_t velocity_unit() const { return vunit_; }
+
+  bool has_64bit_reals() const
+  {
+    if (typeid(write_real_t) == typeid(double))
+      return true;
+    return false;
+  }
+
+  bool has_64bit_ids() const
+  {
+    if (blongids_)
+      return true;
+    return false;
+  }
+
+  int get_species_idx(const cosmo_species &s) const
+  {
+    switch (s)
+    {
+    case cosmo_species::dm:
+      return 1;
+    case cosmo_species::baryon:
+      return 2;
+    case cosmo_species::neutrino:
+      return 3;
+    }
+    return -1;
+  }
+
+  void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species)
+  {
+    int sid = get_species_idx(s);
+
+    assert(sid != -1);
+
+    header_.npart[sid] = (pc.get_local_num_particles());
+    header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles());
+    header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32);
+
+    double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
+    double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3);
+    header_.mass[sid] = boxmass / pc.get_global_num_particles();
+
+    HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid));
+
+    //... write positions and velocities.....
+    if (this->has_64bit_reals())
+    {
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_);
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_);
+    }
+    else
+    {
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_);
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_);
+    }
+
+    //... write ids.....
+    if (this->has_64bit_ids())
+      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_);
+    else
+      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_);
+
+    // std::cout << ">>>A> " << header_.npart[sid] << std::endl;
+  }
+};
+
+namespace
+{
+output_plugin_creator_concrete<gadget_hdf5_output_plugin<float>> creator1("gadget_hdf5");
+#if !defined(USE_SINGLEPRECISION)
+output_plugin_creator_concrete<gadget_hdf5_output_plugin<double>> creator3("gadget_hdf5_double");
+#endif
+} // namespace
+
+#endif
\ No newline at end of file

From f7b2519b8208cc39c5c703104311756829bb64b2 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 25 Jan 2020 16:20:29 +0100
Subject: [PATCH 056/130] working commit, changes for multi-species, moved mean
 matter PT outside of species loop

---
 include/grid_fft.hh                 |  21 ++++
 include/transfer_function_plugin.hh |   7 +-
 src/ic_generator.cc                 | 177 ++++++++++++++--------------
 src/plugins/transfer_CLASS.cc       |  68 ++++++++---
 4 files changed, 167 insertions(+), 106 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 4848f0e..3f44c37 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -563,6 +563,27 @@ public:
         }
     }
 
+    template <typename functional, typename grid1_t>
+    void assign_function_of_grids_kdep(const functional &f, const grid1_t &g)
+    {
+        assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
+        
+#pragma omp parallel for
+        for (size_t i = 0; i < sizes_[0]; ++i)
+        {
+            for (size_t j = 0; j < sizes_[1]; ++j)
+            {
+                for (size_t k = 0; k < sizes_[2]; ++k)
+                {
+                    auto &elem = this->kelem(i, j, k);
+                    const auto &elemg = g.kelem(i, j, k);
+                    
+                    elem = f(this->get_k<real_t>(i, j, k), elemg);
+                }
+            }
+        }
+    }
+
     template <typename functional, typename grid1_t, typename grid2_t>
     void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2)
     {
diff --git a/include/transfer_function_plugin.hh b/include/transfer_function_plugin.hh
index cd7c762..6a7fb6f 100644
--- a/include/transfer_function_plugin.hh
+++ b/include/transfer_function_plugin.hh
@@ -13,7 +13,12 @@ enum tf_type
     vtotal,
     vcdm,
     vbaryon,
-    total0
+    total0,
+    cdm0,
+    baryon0,
+    vtotal0,
+    vcdm0,
+    vbaryon0,
 };
 
 class TransferFunction_plugin
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index ee370ff..28f382f 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -78,7 +78,6 @@ int Run( ConfigFile& the_config )
         Omega[cosmo_species::baryon] = Ob;
     }else{
         double Om = the_config.GetValue<double>("cosmology", "Omega_m");
-        double Ob = the_config.GetValue<double>("cosmology", "Omega_b");
         Omega[cosmo_species::dm] = Om;
         Omega[cosmo_species::baryon] = 0.0;
     }
@@ -166,8 +165,27 @@ int Run( ConfigFile& the_config )
     Grid_FFT<real_t> A3x({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> A3y({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> A3z({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+
     //... array [.] access to components of A3:
-    std::array< Grid_FFT<real_t>*,3 > A3({&A3x,&A3y,&A3z});
+    std::array<Grid_FFT<real_t> *, 3> A3({&A3x, &A3y, &A3z});
+
+    // white noise field 
+    Grid_FFT<real_t> wnoise({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+    
+    //--------------------------------------------------------------------
+    // Fill the grid with a Gaussian white noise field
+    //--------------------------------------------------------------------
+    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    csoca::ilog << "Generating white noise field...." << std::endl;
+
+    the_random_number_generator->Fill_Grid(wnoise);
+
+    wnoise.FourierTransformForward();
+
+
+    //--------------------------------------------------------------------
+    // Compute the LPT terms....
+    //--------------------------------------------------------------------
 
     //--------------------------------------------------------------------
     // Create convolution class instance for non-linear terms
@@ -177,89 +195,63 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
 
     std::vector<cosmo_species> species_list;
-    species_list.push_back( cosmo_species::dm );
-    if( bDoBaryons ) species_list.push_back( cosmo_species::baryon );
-
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    
-    for( auto& this_species : species_list )
-    {
-        csoca::ilog << std::endl
-                    << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl;
+    species_list.push_back(cosmo_species::dm);
+    if (bDoBaryons)
+        species_list.push_back(cosmo_species::baryon);
 
         //======================================================================
         //... compute 1LPT displacement potential ....
         //======================================================================
         // phi = - delta / k^2
+
+    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    csoca::ilog << "Generating white noise field...." << std::endl;
+
         double wtime = get_wtime();
         csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
 
-        #if 1 //  random ICs
-        //--------------------------------------------------------------------
-        // Fill the grid with a Gaussian white noise field
-        //--------------------------------------------------------------------
-        the_random_number_generator->Fill_Grid( phi );
-
-        phi.FourierTransformForward();
-
-        phi.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
+    phi.FourierTransformForward(false);
+    phi.assign_function_of_grids_kdep([&](auto k, auto wn) {
             real_t kmod = k.norm();
-            if( bDoFixing ) x = (std::abs(x)!=0.0)? x / std::abs(x) : x; 
-            ccomplex_t delta = x * the_cosmo_calc->GetAmplitude(kmod, total);
+        if (bDoFixing)
+            wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn;
+        ccomplex_t delta = wn * the_cosmo_calc->GetAmplitude(kmod, total);
             return -delta / (kmod * kmod) / volfac;
-        });
+    },
+                                      wnoise);
 
         phi.zero_DC_mode();
-        #else // ICs with a given phi(1) potential function
-        constexpr real_t twopi{2.0*M_PI};
-        constexpr real_t epsilon_q1d{0.25};
 
-        constexpr real_t epsy{0.25};
-        constexpr real_t epsz{0.0};//epsz{0.25};
+    csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
         
-        phi.FourierTransformBackward(false);
-
-        phi.apply_function_r_dep([&](auto v, auto r) -> real_t {
-            real_t q1 = r[0]-0.5*boxlen;//r[0]/boxlen * twopi - M_PI;
-            real_t q2 = r[1]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI;
-            real_t q3 = r[2]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI;
-
-            // std::cerr << q1  << " " << q2 << std::endl;
-            
-            return -2.0*std::cos(q1+std::cos(q2));
-            // return (-std::cos(q1) + epsilon_q1d * std::sin(q2));
-            // return (-std::cos(q1) + epsy * std::sin(q2) + epsz * std::cos(q1) * std::sin(q3));
-        });
-        phi.FourierTransformForward();
-
-
-        #endif
-        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
-
         //======================================================================
         //... compute 2LPT displacement potential ....
         //======================================================================
-        if( LPTorder > 1 ){
+    if (LPTorder > 1)
+    {
             wtime = get_wtime();
             csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
             phi2.FourierTransformForward(false);
-            Conv.convolve_SumOfHessians( phi, {0,0}, phi, {1,1}, {2,2}, op::assign_to( phi2 ) );
-            Conv.convolve_Hessians( phi, {1,1}, phi, {2,2}, op::add_to(phi2) );
-            Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, op::subtract_from(phi2) );
-            Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, op::subtract_from(phi2) );
-            Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, op::subtract_from(phi2) );
+        Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2));
+        Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2));
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, op::subtract_from(phi2));
+        Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, op::subtract_from(phi2));
+        Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, op::subtract_from(phi2));
 
-            if( bAddExternalTides ){
-                phi2.assign_function_of_grids_kdep([&]( vec3<real_t> kvec, ccomplex_t pphi, ccomplex_t pphi2 ){
+        if (bAddExternalTides)
+        {
+            phi2.assign_function_of_grids_kdep([&](vec3<real_t> kvec, ccomplex_t pphi, ccomplex_t pphi2) {
                     // sign in front of f_aniso is reversed since phi1 = -phi
-                    return pphi2 + f_aniso * (kvec[0]*kvec[0]*lss_aniso_lambda[0]+kvec[1]*kvec[1]*lss_aniso_lambda[1]+kvec[2]*kvec[2]*lss_aniso_lambda[2])*pphi;
-                }, phi, phi2 );
+                return pphi2 + f_aniso * (kvec[0] * kvec[0] * lss_aniso_lambda[0] + kvec[1] * kvec[1] * lss_aniso_lambda[1] + kvec[2] * kvec[2] * lss_aniso_lambda[2]) * pphi;
+            },
+                                               phi, phi2);
             }
 
             phi2.apply_InverseLaplacian();
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
-            if( bAddExternalTides ){
+        if (bAddExternalTides)
+        {
                 csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl;
                 csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl;
             }
@@ -268,47 +260,49 @@ int Run( ConfigFile& the_config )
         //======================================================================
         //... compute 3LPT displacement potential
         //======================================================================
-        if( LPTorder > 2 ){
+    if (LPTorder > 2)
+    {
             //... 3a term ...
             wtime = get_wtime();
             csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
             phi3a.FourierTransformForward(false);
-            Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi, {2,2}, op::assign_to(phi3a) );
-            Conv.convolve_Hessians( phi, {0,1}, phi, {0,2}, phi, {1,2}, op::add_twice_to(phi3a) );
-            Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi, {0,0}, op::subtract_from(phi3a) );
-            Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi, {1,1}, op::subtract_from(phi3a) );
-            Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi, {2,2}, op::subtract_from(phi3a) );
+        Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a));
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::add_twice_to(phi3a));
+        Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3a));
+        Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a));
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a));
             phi3a.apply_InverseLaplacian();
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
             //... 3b term ...
             wtime = get_wtime();
             csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
             phi3b.FourierTransformForward(false);
-            Conv.convolve_SumOfHessians( phi, {0,0}, phi2, {1,1}, {2,2}, op::assign_to(phi3b) );
-            Conv.convolve_SumOfHessians( phi, {1,1}, phi2, {2,2}, {0,0}, op::add_to(phi3b) );
-            Conv.convolve_SumOfHessians( phi, {2,2}, phi2, {0,0}, {1,1}, op::add_to(phi3b) );
-            Conv.convolve_Hessians( phi, {0,1}, phi2, {0,1}, op::subtract_twice_from(phi3b) );
-            Conv.convolve_Hessians( phi, {0,2}, phi2, {0,2}, op::subtract_twice_from(phi3b) );
-            Conv.convolve_Hessians( phi, {1,2}, phi2, {1,2}, op::subtract_twice_from(phi3b) );
+        Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b));
+        Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b));
+        Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::add_to(phi3b));
+        Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::subtract_twice_from(phi3b));
+        Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::subtract_twice_from(phi3b));
+        Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::subtract_twice_from(phi3b));
             phi3b.apply_InverseLaplacian();
             phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
             //... transversal term ...
             wtime = get_wtime();
             csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
-            for( int idim=0; idim<3; ++idim ){
+        for (int idim = 0; idim < 3; ++idim)
+        {
                 // cyclic rotations of indices
-                int idimp = (idim+1)%3, idimpp = (idim+2)%3;
+            int idimp = (idim + 1) % 3, idimpp = (idim + 2) % 3;
                 A3[idim]->FourierTransformForward(false);
-                Conv.convolve_Hessians( phi2, {idim,idimp},  phi, {idim,idimpp}, op::assign_to(*A3[idim]) );
-                Conv.convolve_Hessians( phi2, {idim,idimpp}, phi, {idim,idimp},  op::subtract_from(*A3[idim]) );
-                Conv.convolve_DifferenceOfHessians( phi, {idimp,idimpp}, phi2,{idimp,idimp}, {idimpp,idimpp}, op::add_to(*A3[idim]) );
-                Conv.convolve_DifferenceOfHessians( phi2,{idimp,idimpp}, phi, {idimp,idimp}, {idimpp,idimpp}, op::subtract_from(*A3[idim]) );
+            Conv.convolve_Hessians(phi2, {idim, idimp}, phi, {idim, idimpp}, op::assign_to(*A3[idim]));
+            Conv.convolve_Hessians(phi2, {idim, idimpp}, phi, {idim, idimp}, op::subtract_from(*A3[idim]));
+            Conv.convolve_DifferenceOfHessians(phi, {idimp, idimpp}, phi2, {idimp, idimp}, {idimpp, idimpp}, op::add_to(*A3[idim]));
+            Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim]));
                 A3[idim]->apply_InverseLaplacian();
             }
-            csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
         }
 
         // if( bSymplecticPT ){
@@ -344,19 +338,30 @@ int Run( ConfigFile& the_config )
         // Testing
         const std::string testing = the_config.GetValueSafe<std::string>("testing", "test", "none");
 
-        if(testing != "none") {
+    if (testing != "none")
+    {
             csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl;
-            if(testing == "potentials_and_densities") {
+        if (testing == "potentials_and_densities"){
                 testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3);
-            } else if(testing == "velocity_displacement_symmetries") {
+        }
+        else if (testing == "velocity_displacement_symmetries"){
                 testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
-            } else if(testing == "convergence") {
+        }
+        else if (testing == "convergence"){
                 testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
-            } else {
+        }
+        else{
                 csoca::flog << "unknown test '" << testing << "'" << std::endl;
                 std::abort();
             }
-        } else {
+    }
+
+    for( auto& this_species : species_list )
+    {
+        csoca::ilog << std::endl
+                    << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl;
+
+        {
             // temporary storage of data
             Grid_FFT<real_t> tmp({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
 
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 3469b7e..da73c6a 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -22,10 +22,16 @@
 class transfer_CLASS_plugin : public TransferFunction_plugin {
 
 private:
+    //... target redshift tables
     std::vector<double> tab_lnk_, tab_dtot_, tab_dc_, tab_db_, tab_ttot_, tab_tc_, tab_tb_;
     gsl_interp_accel *gsl_ia_dtot_, *gsl_ia_dc_, *gsl_ia_db_, *gsl_ia_ttot_, *gsl_ia_tc_, *gsl_ia_tb_;
     gsl_spline *gsl_sp_dtot_, *gsl_sp_dc_, *gsl_sp_db_, *gsl_sp_ttot_, *gsl_sp_tc_, *gsl_sp_tb_;
 
+    //... starting redshift tables
+    std::vector<double> tab_lnk0_, tab_dtot0_, tab_dc0_, tab_db0_, tab_ttot0_, tab_tc0_, tab_tb0_;
+    gsl_interp_accel *gsl_ia_dtot0_, *gsl_ia_dc0_, *gsl_ia_db0_, *gsl_ia_ttot0_, *gsl_ia_tc0_, *gsl_ia_tb0_;
+    gsl_spline *gsl_sp_dtot0_, *gsl_sp_dc0_, *gsl_sp_db0_, *gsl_sp_ttot0_, *gsl_sp_tc0_, *gsl_sp_tb0_;
+
     // single fluid growing/decaying mode decomposition
     gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_;
     gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_;
@@ -85,8 +91,11 @@ private:
 
         std::unique_ptr<ClassEngine> CE = std::make_unique<ClassEngine>(pars, false);
 
+        CE->getTk(zstart_, tab_lnk0_, tab_dc0_, tab_db0_, d_ncdm, tab_dtot0_,
+                tab_tc0_, tab_tb0_, t_ncdm, tab_ttot0_, phi, psi );
+
         CE->getTk(ztarget_, tab_lnk_, tab_dc_, tab_db_, d_ncdm, tab_dtot_,
-                tab_tc_, tab_tb_, t_ncdm, tab_ttot_, phi, psi );
+                  tab_tc_, tab_tb_, t_ncdm, tab_ttot_, phi, psi);
 
         wtime = get_wtime() - wtime;
         csoca::ilog << "   took " << wtime << " s / " << tab_lnk_.size() << " modes."  << std::endl;
@@ -110,12 +119,12 @@ public:
 
     this->ClassEngine_get_data();
     
-    gsl_ia_dtot_ = gsl_interp_accel_alloc();
-    gsl_ia_dc_   = gsl_interp_accel_alloc();
-    gsl_ia_db_   = gsl_interp_accel_alloc();
-    gsl_ia_ttot_ = gsl_interp_accel_alloc();
-    gsl_ia_tc_   = gsl_interp_accel_alloc();
-    gsl_ia_tb_   = gsl_interp_accel_alloc();
+    gsl_ia_dtot_ = gsl_interp_accel_alloc();  gsl_ia_dtot0_ = gsl_interp_accel_alloc();
+    gsl_ia_dc_   = gsl_interp_accel_alloc();  gsl_ia_dc0_   = gsl_interp_accel_alloc();
+    gsl_ia_db_   = gsl_interp_accel_alloc();  gsl_ia_db0_   = gsl_interp_accel_alloc();
+    gsl_ia_ttot_ = gsl_interp_accel_alloc();  gsl_ia_ttot0_ = gsl_interp_accel_alloc();
+    gsl_ia_tc_   = gsl_interp_accel_alloc();  gsl_ia_tc0_   = gsl_interp_accel_alloc();
+    gsl_ia_tb_   = gsl_interp_accel_alloc();  gsl_ia_tb0_   = gsl_interp_accel_alloc();
 
     gsl_sp_dtot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
     gsl_sp_dc_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
@@ -124,6 +133,13 @@ public:
     gsl_sp_tc_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
     gsl_sp_tb_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
 
+    gsl_sp_dtot0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    gsl_sp_dc0_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    gsl_sp_db0_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    gsl_sp_ttot0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    gsl_sp_tc0_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    gsl_sp_tb0_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+
     gsl_spline_init(gsl_sp_dtot_, &tab_lnk_[0], &tab_dtot_[0], tab_lnk_.size());
     gsl_spline_init(gsl_sp_dc_,   &tab_lnk_[0], &tab_dc_[0],   tab_lnk_.size());
     gsl_spline_init(gsl_sp_db_,   &tab_lnk_[0], &tab_db_[0],   tab_lnk_.size());
@@ -131,6 +147,13 @@ public:
     gsl_spline_init(gsl_sp_tc_,   &tab_lnk_[0], &tab_tc_[0],   tab_lnk_.size());
     gsl_spline_init(gsl_sp_tb_,   &tab_lnk_[0], &tab_tb_[0],   tab_lnk_.size());
 
+    gsl_spline_init(gsl_sp_dtot0_, &tab_lnk0_[0], &tab_dtot0_[0], tab_lnk0_.size());
+    gsl_spline_init(gsl_sp_dc0_,   &tab_lnk0_[0], &tab_dc0_[0],   tab_lnk0_.size());
+    gsl_spline_init(gsl_sp_db0_,   &tab_lnk0_[0], &tab_db0_[0],   tab_lnk0_.size());
+    gsl_spline_init(gsl_sp_ttot0_, &tab_lnk0_[0], &tab_ttot0_[0], tab_lnk0_.size());
+    gsl_spline_init(gsl_sp_tc0_,   &tab_lnk0_[0], &tab_tc0_[0],   tab_lnk0_.size());
+    gsl_spline_init(gsl_sp_tb0_,   &tab_lnk0_[0], &tab_tb0_[0],   tab_lnk0_.size());
+
     //--------------------------------------------------------------------------
     // single fluid growing/decaying mode decomposition
     //--------------------------------------------------------------------------
@@ -165,19 +188,19 @@ public:
   }
     
   ~transfer_CLASS_plugin(){
-    gsl_spline_free(gsl_sp_dtot_);
-    gsl_spline_free(gsl_sp_dc_);
-    gsl_spline_free(gsl_sp_db_);
-    gsl_spline_free(gsl_sp_ttot_);
-    gsl_spline_free(gsl_sp_tc_);
-    gsl_spline_free(gsl_sp_tb_);
+    gsl_spline_free(gsl_sp_dtot_);   gsl_spline_free(gsl_sp_dtot0_);  
+    gsl_spline_free(gsl_sp_dc_);     gsl_spline_free(gsl_sp_dc0_);
+    gsl_spline_free(gsl_sp_db_);     gsl_spline_free(gsl_sp_db0_);
+    gsl_spline_free(gsl_sp_ttot_);   gsl_spline_free(gsl_sp_ttot0_);  
+    gsl_spline_free(gsl_sp_tc_);     gsl_spline_free(gsl_sp_tc0_);
+    gsl_spline_free(gsl_sp_tb_);     gsl_spline_free(gsl_sp_tb0_);
 
-    gsl_interp_accel_free(gsl_ia_dtot_);
-    gsl_interp_accel_free(gsl_ia_dc_);
-    gsl_interp_accel_free(gsl_ia_db_);
-    gsl_interp_accel_free(gsl_ia_ttot_);
-    gsl_interp_accel_free(gsl_ia_tc_);
-    gsl_interp_accel_free(gsl_ia_tb_);
+    gsl_interp_accel_free(gsl_ia_dtot_);  gsl_interp_accel_free(gsl_ia_dtot0_);  
+    gsl_interp_accel_free(gsl_ia_dc_);    gsl_interp_accel_free(gsl_ia_dc0_);
+    gsl_interp_accel_free(gsl_ia_db_);    gsl_interp_accel_free(gsl_ia_db0_);
+    gsl_interp_accel_free(gsl_ia_ttot_);  gsl_interp_accel_free(gsl_ia_ttot0_);  
+    gsl_interp_accel_free(gsl_ia_tc_);    gsl_interp_accel_free(gsl_ia_tc0_);
+    gsl_interp_accel_free(gsl_ia_tb_);    gsl_interp_accel_free(gsl_ia_tb0_);
   }
 
   inline double compute(double k, tf_type type) const {
@@ -190,6 +213,13 @@ public:
           case vtotal:  splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break;
           case vcdm:    splineT = gsl_sp_tc_;   accT = gsl_ia_tc_;   break;
           case vbaryon: splineT = gsl_sp_tb_;   accT = gsl_ia_tb_;   break;
+          
+          case total0:  splineT = gsl_sp_dtot0_;accT = gsl_ia_dtot0_;break;
+          case cdm0:    splineT = gsl_sp_dc0_;  accT = gsl_ia_dc0_;  break;
+          case baryon0: splineT = gsl_sp_db0_;  accT = gsl_ia_db0_;  break;
+          case vtotal0: splineT = gsl_sp_ttot0_;accT = gsl_ia_ttot0_;break;
+          case vcdm0:   splineT = gsl_sp_tc0_;  accT = gsl_ia_tc0_;  break;
+          case vbaryon0:splineT = gsl_sp_tb0_;  accT = gsl_ia_tb0_;  break;
           default:
             throw std::runtime_error("Invalid type requested in transfer function evaluation");
       }

From cd7f451397d2857863ab22fdc92d7ca8b10290e2 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 25 Jan 2020 17:55:34 +0100
Subject: [PATCH 057/130] fixed bug with getting CLASS TF for two different
 redshifts

---
 src/plugins/transfer_CLASS.cc | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index da73c6a..9d415a3 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -9,6 +9,7 @@
 #include <string>
 #include <vector>
 #include <memory>
+#include <sstream>
 
 #include <ClassEngine.hh>
 
@@ -44,10 +45,13 @@ private:
 
         csoca::ilog << "Computing TF via ClassEngine..." << std::endl << " ztarget = " << ztarget_ << ", zstart = " << zstart_ << " ..." << std::flush;
         double wtime = get_wtime();
+
+        std::stringstream zlist;
+        zlist << zstart_ << ", " << zstart_; 
         
         ClassParams pars;
         pars.add("extra metric transfer functions", "yes");
-        pars.add("z_pk",ztarget_);
+        pars.add("z_max_pk",zlist.str());
         pars.add("P_k_max_h/Mpc", kmax_);
         
         pars.add("h",h_);
@@ -213,7 +217,7 @@ public:
           case vtotal:  splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break;
           case vcdm:    splineT = gsl_sp_tc_;   accT = gsl_ia_tc_;   break;
           case vbaryon: splineT = gsl_sp_tb_;   accT = gsl_ia_tb_;   break;
-          
+
           case total0:  splineT = gsl_sp_dtot0_;accT = gsl_ia_dtot0_;break;
           case cdm0:    splineT = gsl_sp_dc0_;  accT = gsl_ia_dc0_;  break;
           case baryon0: splineT = gsl_sp_db0_;  accT = gsl_ia_db0_;  break;

From 1fc2b2d67718578008b75f2e71eacb737849a53b Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 25 Jan 2020 23:31:03 +0100
Subject: [PATCH 058/130] added distinct amplitudes for cdm and baryons

---
 CMakeLists.txt                  |   4 +-
 example.conf                    |  20 ++--
 include/cosmology_calculator.hh |  26 +++--
 include/particle_generator.hh   |   2 +-
 src/ic_generator.cc             | 188 ++++++++++++++++++--------------
 5 files changed, 139 insertions(+), 101 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 51a453e..f10eb0a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,8 +5,8 @@ project(monofonIC)
 # include class submodule
 include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake)
 
-#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
+#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic -g -fno-omit-frame-pointer")
 find_package(PkgConfig REQUIRED)
 
 set(CMAKE_MODULE_PATH
diff --git a/example.conf b/example.conf
index 178248d..718e145 100644
--- a/example.conf
+++ b/example.conf
@@ -4,15 +4,15 @@ GridRes         = 128
 # length of the box in Mpc/h
 BoxLength       = 250
 # starting redshift
-zstart          = 49.0
+zstart          = 129.0
 # order of the LPT to be used (1,2 or 3)
 LPTorder        = 3
 # also do baryon ICs?
-DoBaryons       = no
+DoBaryons       = yes
 # do mode fixing à la Angulo&Pontzen
 DoFixing        = no
 # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
-ParticleLoad    = sc
+ParticleLoad    = bcc
 
 [cosmology]
 transfer     = CLASS
@@ -39,18 +39,22 @@ seed         = 9001
 [testing]
 # enables diagnostic output
 # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
-test            = none
+test = none
 
 [execution]
-NumThreads      = 4
+NumThreads   = 16
 
 [output]
 fname_hdf5      = output_sch.hdf5
 fbase_analysis  = output
 
-format          = gadget2
-filename        = ics_gadget.dat
-UseLongids      = false
+#format       = gadget2
+#filename     = ics_gadget.dat
+#UseLongids   = false
+#
+format        = gadget_hdf5
+filename      = ics_gadget.hdf5
+
 
 #format       = generic
 #filename     = debug.hdf5
diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index ea51299..ba2d8ff 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -81,23 +81,33 @@ public:
 
             // write power spectrum to a file
             std::ofstream ofs(fname.c_str());
-            std::stringstream ss; ss << " (a=" << a <<")";
+            std::stringstream ss; ss << " ,a=" << a <<"";
             ofs << "# " << std::setw(18) << "k [h/Mpc]"
-                        << std::setw(20) << ("P_dtot(k)"+ss.str()) 
-                        << std::setw(20) << ("P_dcdm(k)"+ss.str())
-                        << std::setw(20) << ("P_dbar(k)"+ss.str())
-                        << std::setw(20) << ("P_dtot(K) (a=1)")
-                        << std::setw(20) << ("P_tcdm(k)"+ss.str()) 
-                        << std::setw(20) << ("P_tbar(k)"+ss.str())
+                        << std::setw(20) << ("P_dtot(k"+ss.str()+"|BS)") 
+                        << std::setw(20) << ("P_dcdm(k"+ss.str()+"|BS)")
+                        << std::setw(20) << ("P_dbar(k"+ss.str()+"|BS)")
+                        << std::setw(20) << ("P_tcdm(k"+ss.str()+"|BS)") 
+                        << std::setw(20) << ("P_tbar(k"+ss.str()+"|BS)")
+                        << std::setw(20) << ("P_dtot(k"+ss.str()+")") 
+                        << std::setw(20) << ("P_dcdm(k"+ss.str()+")")
+                        << std::setw(20) << ("P_dbar(k"+ss.str()+")")
+                        << std::setw(20) << ("P_tcdm(k"+ss.str()+")") 
+                        << std::setw(20) << ("P_tbar(k"+ss.str()+")")
+                        << std::setw(20) << ("P_dtot(K,a=1)")
                         << std::endl;
             for( double k=kmin; k<transfer_function_->get_kmax(); k*=1.05 ){
                 ofs << std::setw(20) << std::setprecision(10) << k
                     << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total) * Dplus0, 2.0)
                     << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm) * Dplus0, 2.0)
                     << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total), 2.0)
                     << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm) * Dplus0, 2.0)
                     << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon) * Dplus0, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total), 2.0)
                     << std::endl;
             }
         }
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 4dafda8..efac3dd 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -31,7 +31,7 @@ const std::vector<vec3<real_t>> second_lattice_shift =
 {
         /* SC : */ {0.5, 0.5, 0.5},
         /* BCC: */ {0.5, 0.5, 0.0},
-        /* FCC: */ {0.5, 0.5, 0.5},
+        /* FCC: */ {0.5, 0.0, 0.0},
         /* RSC: */ {0.25, 0.25, 0.25},
 };
 
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 8eb83bd..9641112 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -182,6 +182,11 @@ int Run( ConfigFile& the_config )
     the_random_number_generator->Fill_Grid(wnoise);
 
     wnoise.FourierTransformForward();
+    wnoise.apply_function_k( [&](auto wn){
+        if (bDoFixing)
+            wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn;
+        return wn / volfac;
+    });
 
 
     //--------------------------------------------------------------------
@@ -207,39 +212,36 @@ int Run( ConfigFile& the_config )
     if (bDoBaryons)
         species_list.push_back(cosmo_species::baryon);
 
-        //======================================================================
-        //... compute 1LPT displacement potential ....
-        //======================================================================
-        // phi = - delta / k^2
+    //======================================================================
+    //... compute 1LPT displacement potential ....
+    //======================================================================
+    // phi = - delta / k^2
 
     csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
     csoca::ilog << "Generating white noise field...." << std::endl;
 
-        double wtime = get_wtime();
-        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
+    double wtime = get_wtime();
+    csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
 
     phi.FourierTransformForward(false);
     phi.assign_function_of_grids_kdep([&](auto k, auto wn) {
-            real_t kmod = k.norm();
-        if (bDoFixing)
-            wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn;
+        real_t kmod = k.norm();
         ccomplex_t delta = wn * the_cosmo_calc->GetAmplitude(kmod, total);
-            return -delta / (kmod * kmod) / volfac;
-    },
-                                      wnoise);
+        return -delta / (kmod * kmod);
+    }, wnoise);
 
-        phi.zero_DC_mode();
+    phi.zero_DC_mode();
 
     csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
-        
-        //======================================================================
-        //... compute 2LPT displacement potential ....
-        //======================================================================
+
+    //======================================================================
+    //... compute 2LPT displacement potential ....
+    //======================================================================
     if (LPTorder > 1)
     {
-            wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
-            phi2.FourierTransformForward(false);
+        wtime = get_wtime();
+        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
+        phi2.FourierTransformForward(false);
         Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2));
         Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2));
         Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, op::subtract_from(phi2));
@@ -249,119 +251,119 @@ int Run( ConfigFile& the_config )
         if (bAddExternalTides)
         {
             phi2.assign_function_of_grids_kdep([&](vec3<real_t> kvec, ccomplex_t pphi, ccomplex_t pphi2) {
-                    // sign in front of f_aniso is reversed since phi1 = -phi
+                // sign in front of f_aniso is reversed since phi1 = -phi
                 return pphi2 + f_aniso * (kvec[0] * kvec[0] * lss_aniso_lambda[0] + kvec[1] * kvec[1] * lss_aniso_lambda[1] + kvec[2] * kvec[2] * lss_aniso_lambda[2]) * pphi;
             },
                                                phi, phi2);
-            }
+        }
 
-            phi2.apply_InverseLaplacian();
+        phi2.apply_InverseLaplacian();
         csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
         if (bAddExternalTides)
         {
-                csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl;
-                csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl;
-            }
+            csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl;
+            csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl;
         }
+    }
 
-        //======================================================================
-        //... compute 3LPT displacement potential
-        //======================================================================
+    //======================================================================
+    //... compute 3LPT displacement potential
+    //======================================================================
     if (LPTorder > 2)
     {
-            //... 3a term ...
-            wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
-            phi3a.FourierTransformForward(false);
+        //... 3a term ...
+        wtime = get_wtime();
+        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
+        phi3a.FourierTransformForward(false);
         Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a));
         Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::add_twice_to(phi3a));
         Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3a));
         Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a));
         Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a));
-            phi3a.apply_InverseLaplacian();
+        phi3a.apply_InverseLaplacian();
         csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
-            //... 3b term ...
-            wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
-            phi3b.FourierTransformForward(false);
+        //... 3b term ...
+        wtime = get_wtime();
+        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
+        phi3b.FourierTransformForward(false);
         Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b));
         Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b));
         Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::add_to(phi3b));
         Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::subtract_twice_from(phi3b));
         Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::subtract_twice_from(phi3b));
         Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::subtract_twice_from(phi3b));
-            phi3b.apply_InverseLaplacian();
-            phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
+        phi3b.apply_InverseLaplacian();
+        phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
         csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
-            //... transversal term ...
-            wtime = get_wtime();
-            csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
+        //... transversal term ...
+        wtime = get_wtime();
+        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
         for (int idim = 0; idim < 3; ++idim)
         {
-                // cyclic rotations of indices
+            // cyclic rotations of indices
             int idimp = (idim + 1) % 3, idimpp = (idim + 2) % 3;
-                A3[idim]->FourierTransformForward(false);
+            A3[idim]->FourierTransformForward(false);
             Conv.convolve_Hessians(phi2, {idim, idimp}, phi, {idim, idimpp}, op::assign_to(*A3[idim]));
             Conv.convolve_Hessians(phi2, {idim, idimpp}, phi, {idim, idimp}, op::subtract_from(*A3[idim]));
             Conv.convolve_DifferenceOfHessians(phi, {idimp, idimpp}, phi2, {idimp, idimp}, {idimpp, idimpp}, op::add_to(*A3[idim]));
             Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim]));
-                A3[idim]->apply_InverseLaplacian();
-            }
-        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+            A3[idim]->apply_InverseLaplacian();
         }
+        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+    }
 
-        // if( bSymplecticPT ){
-        //     //... transversal term ...
-        //     wtime = get_wtime();
-        //     csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush;
-        //     for( int idim=0; idim<3; ++idim ){
-        //         // cyclic rotations of indices
-        //         A3[idim]->FourierTransformForward(false);
-        //         Conv.convolve_Gradient_and_Hessian( phi, {0},  phi2, {idim,0}, assign_to(*A3[idim]) );
-        //         Conv.convolve_Gradient_and_Hessian( phi, {1},  phi2, {idim,1}, add_to(*A3[idim]) );
-        //         Conv.convolve_Gradient_and_Hessian( phi, {2},  phi2, {idim,2}, add_to(*A3[idim]) );
-        //     }
-        //     csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+    // if( bSymplecticPT ){
+    //     //... transversal term ...
+    //     wtime = get_wtime();
+    //     csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush;
+    //     for( int idim=0; idim<3; ++idim ){
+    //         // cyclic rotations of indices
+    //         A3[idim]->FourierTransformForward(false);
+    //         Conv.convolve_Gradient_and_Hessian( phi, {0},  phi2, {idim,0}, assign_to(*A3[idim]) );
+    //         Conv.convolve_Gradient_and_Hessian( phi, {1},  phi2, {idim,1}, add_to(*A3[idim]) );
+    //         Conv.convolve_Gradient_and_Hessian( phi, {2},  phi2, {idim,2}, add_to(*A3[idim]) );
+    //     }
+    //     csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
 
-        // }
+    // }
 
-        ///... scale all potentials with respective growth factors
-        phi *= g1;
-        phi2 *= g2;
-        phi3a *= g3a;
-        phi3b *= g3b;
-        (*A3[0]) *= g3c;
-        (*A3[1]) *= g3c;
-        (*A3[2]) *= g3c;
+    ///... scale all potentials with respective growth factors
+    phi *= g1;
+    phi2 *= g2;
+    phi3a *= g3a;
+    phi3b *= g3b;
+    (*A3[0]) *= g3c;
+    (*A3[1]) *= g3c;
+    (*A3[2]) *= g3c;
 
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
 
-        ///////////////////////////////////////////////////////////////////////
-        // we store the densities here if we compute them
-        //======================================================================
+    ///////////////////////////////////////////////////////////////////////
+    // we store the densities here if we compute them
+    //======================================================================
 
-        // Testing
-        const std::string testing = the_config.GetValueSafe<std::string>("testing", "test", "none");
+    // Testing
+    const std::string testing = the_config.GetValueSafe<std::string>("testing", "test", "none");
 
     if (testing != "none")
     {
-            csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl;
+        csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl;
         if (testing == "potentials_and_densities"){
-                testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3);
+            testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3);
         }
         else if (testing == "velocity_displacement_symmetries"){
-                testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
+            testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
         }
         else if (testing == "convergence"){
-                testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
+            testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
         }
         else{
-                csoca::flog << "unknown test '" << testing << "'" << std::endl;
-                std::abort();
-            }
+            csoca::flog << "unknown test '" << testing << "'" << std::endl;
+            std::abort();
+        }
     }
 
     for( auto& this_species : species_list )
@@ -492,9 +494,20 @@ int Run( ConfigFile& the_config )
                                 // divide by Lbox, because displacement is in box units for output plugin
                                 tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot 
                                     + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) );
+
+                                if( bDoBaryons ){
+                                    vec3<real_t> kvec = phi.get_k<real_t>(i,j,k);
+                                    real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
+                                    double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) :
+                                     (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : 
+                                     the_cosmo_calc->GetAmplitude(kmod, total)*g1) - the_cosmo_calc->GetAmplitude(kmod, total)*g1;
+
+                                    tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen;
+                                }
                             }
                         }
                     }
+                    tmp.zero_DC_mode();
                     tmp.FourierTransformBackward();
 
                     // if we write particle data, store particle data in particle structure
@@ -530,9 +543,19 @@ int Run( ConfigFile& the_config )
                                 tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v 
                                         + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) );
 
+                                if( bDoBaryons ){
+                                    vec3<real_t> kvec = phi.get_k<real_t>(i,j,k);
+                                    real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
+                                    double ampldiff = ((this_species == cosmo_species::dm)? -the_cosmo_calc->GetAmplitude(kmod, vcdm0) :
+                                     (this_species == cosmo_species::baryon)? -the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : 
+                                     the_cosmo_calc->GetAmplitude(kmod, total)*g1) - the_cosmo_calc->GetAmplitude(kmod, total)*g1;
+                                    tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ;
+                                }
+
                                 // correct velocity with PLT mode growth rate
                                 tmp.kelem(idx) *= lg.vfac_corr(tmp.get_k3(i,j,k));
 
+
                                 if( bAddExternalTides ){
                                     // modify velocities with anisotropic expansion factor**2
                                     tmp.kelem(idx) *= std::pow(lss_aniso_alpha[idim],2.0);
@@ -544,6 +567,7 @@ int Run( ConfigFile& the_config )
                             }
                         }
                     }
+                    tmp.zero_DC_mode();
                     tmp.FourierTransformBackward();
 
                     // if we write particle data, store particle data in particle structure

From 91cc71c038e16f86a5841aa025053c01332243cb Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 25 Jan 2020 23:31:50 +0100
Subject: [PATCH 059/130] removed sanitizer options from cmake

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f10eb0a..a4eab8b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,8 +5,8 @@ project(monofonIC)
 # include class submodule
 include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake)
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
-#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic -g -fno-omit-frame-pointer")
+# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic")
 find_package(PkgConfig REQUIRED)
 
 set(CMAKE_MODULE_PATH

From 401fec0ebd4327165edfcc28cd2a127e66b865a0 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 26 Jan 2020 16:45:26 +0100
Subject: [PATCH 060/130] added place holder massive neutrino options for class

---
 src/plugins/transfer_CLASS.cc | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 9d415a3..d660d25 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -65,6 +65,18 @@ private:
         pars.add("Omega_fld",0.0);
         pars.add("Omega_scf",0.0);
 
+        // massive neutrinos
+#if 1
+        //default off
+        pars.add("N_ncdm",0);
+#else
+        // change above to enable
+        pars.add("N_ur",0);
+        pars.add("N_ncdm",1);
+        pars.add("m_ncdm","0.4");
+        pars.add("T_ncdm",0.71611);
+#endif
+
         pars.add("A_s",2.42e-9);
         pars.add("n_s",.961); // this doesn't matter for TF
         pars.add("output","dTk,vTk");

From 2b6605861965c1d86153e9ce9b82637e9bd9ccb5 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 26 Jan 2020 21:42:07 +0100
Subject: [PATCH 061/130] fixed dual lattice two-fluid ICs

---
 include/particle_generator.hh |  2 +-
 src/ic_generator.cc           | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index efac3dd..801c919 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -31,7 +31,7 @@ const std::vector<vec3<real_t>> second_lattice_shift =
 {
         /* SC : */ {0.5, 0.5, 0.5},
         /* BCC: */ {0.5, 0.5, 0.0},
-        /* FCC: */ {0.5, 0.0, 0.0},
+        /* FCC: */ {0.25, 0.25, 0.25},
         /* RSC: */ {0.25, 0.25, 0.25},
 };
 
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 9641112..ec71944 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -500,7 +500,8 @@ int Run( ConfigFile& the_config )
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
                                     double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) :
                                      (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : 
-                                     the_cosmo_calc->GetAmplitude(kmod, total)*g1) - the_cosmo_calc->GetAmplitude(kmod, total)*g1;
+                                    //  the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0);
+                                     the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1);
 
                                     tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen;
                                 }
@@ -546,9 +547,10 @@ int Run( ConfigFile& the_config )
                                 if( bDoBaryons ){
                                     vec3<real_t> kvec = phi.get_k<real_t>(i,j,k);
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
-                                    double ampldiff = ((this_species == cosmo_species::dm)? -the_cosmo_calc->GetAmplitude(kmod, vcdm0) :
-                                     (this_species == cosmo_species::baryon)? -the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : 
-                                     the_cosmo_calc->GetAmplitude(kmod, total)*g1) - the_cosmo_calc->GetAmplitude(kmod, total)*g1;
+                                    double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, vcdm0) :
+                                     (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : 
+                                        // the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0);
+                                     the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1);
                                     tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ;
                                 }
 

From 816a52d4da1a20167877f4193e5468f8eb1abe78 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 24 Feb 2020 18:10:09 +0100
Subject: [PATCH 062/130] added reading routine from HDF5

---
 include/grid_fft.hh |   2 +
 src/grid_fft.cc     | 130 ++++++++++++++++++++++++++++++++++++++++++--
 src/ic_generator.cc |  18 +++++-
 3 files changed, 145 insertions(+), 5 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index dcb3cb4..edbcc69 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -649,6 +649,8 @@ public:
 
     void Write_to_HDF5(std::string fname, std::string datasetname) const;
 
+    void Read_from_HDF5( std::string fname, std::string datasetname );
+
     void Write_PowerSpectrum(std::string ofname);
 
     void Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count);
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index d5f103a..54f8aac 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -200,7 +200,8 @@ void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
         {
             double wtime = get_wtime();
             csoca::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]);
-            FFTW_API(execute)(plan_);
+            FFTW_API(execute)
+            (plan_);
             this->ApplyNorm();
 
             wtime = get_wtime() - wtime;
@@ -232,7 +233,8 @@ void Grid_FFT<data_t>::FourierTransformBackward(bool do_transform)
             csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]);
             double wtime = get_wtime();
 
-            FFTW_API(execute)(iplan_);
+            FFTW_API(execute)
+            (iplan_);
             this->ApplyNorm();
 
             wtime = get_wtime() - wtime;
@@ -269,6 +271,126 @@ void create_hdf5(std::string Filename)
     H5Fclose(HDF_FileID);
 }
 
+template <typename T>
+hid_t hdf5_get_data_type(void)
+{
+    if (typeid(T) == typeid(int))
+        return H5T_NATIVE_INT;
+
+    if (typeid(T) == typeid(unsigned))
+        return H5T_NATIVE_UINT;
+
+    if (typeid(T) == typeid(float))
+        return H5T_NATIVE_FLOAT;
+
+    if (typeid(T) == typeid(double))
+        return H5T_NATIVE_DOUBLE;
+
+    if (typeid(T) == typeid(long long))
+        return H5T_NATIVE_LLONG;
+
+    if (typeid(T) == typeid(unsigned long long))
+        return H5T_NATIVE_ULLONG;
+
+    if (typeid(T) == typeid(size_t))
+        return H5T_NATIVE_ULLONG;
+
+    std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n";
+    return -1;
+}
+
+template <typename data_t>
+void Grid_FFT<data_t>::Read_from_HDF5(const std::string Filename, const std::string ObjName)
+{
+    hid_t HDF_Type = hdf5_get_data_type<data_t>();
+
+    hid_t HDF_FileID = H5Fopen(Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+
+    //... save old error handler
+    herr_t (*old_func)(void *);
+    void *old_client_data;
+
+    H5Eget_auto(&old_func, &old_client_data);
+
+    //... turn off error handling by hdf5 library
+    H5Eset_auto(NULL, NULL);
+
+    //... probe dataset opening
+    hid_t HDF_DatasetID = H5Dopen(HDF_FileID, ObjName.c_str());
+
+    //... restore previous error handler
+    H5Eset_auto(old_func, old_client_data);
+
+    //... dataset did not exist or was empty
+    if (HDF_DatasetID < 0)
+    {
+        csoca::wlog << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+        H5Fclose(HDF_FileID);
+        abort();
+    }
+
+    //... get space associated with dataset and its extensions
+    hid_t HDF_DataspaceID = H5Dget_space(HDF_DatasetID);
+
+    int ndims = H5Sget_simple_extent_ndims(HDF_DataspaceID);
+
+    hsize_t dimsize[3];
+
+    H5Sget_simple_extent_dims(HDF_DataspaceID, dimsize, NULL);
+
+    hsize_t HDF_StorageSize = 1;
+    for (int i = 0; i < ndims; ++i)
+        HDF_StorageSize *= dimsize[i];
+
+    //... adjust the array size to hold the data
+    std::vector<data_t> Data;
+    Data.reserve(HDF_StorageSize);
+    Data.assign(HDF_StorageSize, (data_t)0);
+
+    if (Data.capacity() < HDF_StorageSize)
+    {
+        csoca::elog << "Not enough memory to store all data in HDFReadDataset!\n";
+        abort();
+    }
+
+    //... read the dataset
+    H5Dread(HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, &Data[0]);
+
+    if (Data.size() != HDF_StorageSize)
+    {
+        csoca::elog << "Something went wrong while reading!\n";
+        abort();
+    }
+
+    H5Sclose(HDF_DataspaceID);
+    H5Dclose(HDF_DatasetID);
+    H5Fclose(HDF_FileID);
+
+    assert( dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2] );
+    csoca::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl;
+
+    for( size_t i=0; i<3; ++i ) this->n_[i] = dimsize[i];
+
+    if (data_ != nullptr)
+    {
+        fftw_free(data_);
+    }
+    this->Setup();
+    
+
+    //... copy data to internal array ...
+    for (size_t i = 0; i < size(0); ++i)
+    {
+        for (size_t j = 0; j < size(1); ++j)
+        {
+            for (size_t k = 0; k < size(2); ++k)
+            {
+                this->relem(i,j,k) = Data[ (i*size(1) + j)*size(2)+k ];
+            }
+        }
+    }    
+}
+
 template <typename data_t>
 void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname) const
 {
@@ -551,7 +673,7 @@ void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
     std::vector<double> bin_k, bin_P, bin_eP;
     std::vector<size_t> bin_count;
     int nbins = 4 * std::max(nhalf_[0], std::max(nhalf_[1], nhalf_[2]));
-    this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count );
+    this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count);
 #if defined(USE_MPI)
     if (CONFIG::MPI_task_rank == 0)
     {
@@ -577,7 +699,7 @@ void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
 }
 
 template <typename data_t>
-void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
+void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count)
 {
     this->FourierTransformForward();
 
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index d4d160c..8bf1674 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -68,6 +68,10 @@ int Run( ConfigFile& the_config )
     //! do baryon ICs?
     const bool bDoBaryons = the_config.GetValueSafe<bool>("setup", "DoBaryons", false );
 
+    //--------------------------------------------------------------------------------------------------------
+    //! do constrained ICs?
+    const bool bAddConstrainedModes =  the_config.ContainsKey("setup", "ConstraintField" );
+
     //--------------------------------------------------------------------------------------------------------
     //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274]
     bool bAddExternalTides = the_config.ContainsKey("cosmology", "LSS_aniso_lx") 
@@ -184,9 +188,21 @@ int Run( ConfigFile& the_config )
         // Fill the grid with a Gaussian white noise field
         //--------------------------------------------------------------------
         the_random_number_generator->Fill_Grid( phi );
-
         phi.FourierTransformForward();
 
+        //--------------------------------------------------------------------
+        // with the unconstrained noise in Fourier space, add constrained 
+        // modes for low k
+        //--------------------------------------------------------------------
+        if( bAddConstrainedModes ){
+            auto cfield_fname = the_config.GetValue<std::string>("setup", "ConstraintField" );
+            
+        }
+
+
+        //--------------------------------------------------------------------
+        // Apply power spectrum
+        //--------------------------------------------------------------------
         phi.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
             real_t kmod = k.norm();
             if( bDoFixing ) x = (std::abs(x)!=0.0)? x / std::abs(x) : x; 

From 8c9d2acf7a8dc72cde0fb832c42179308871ed8a Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 24 Feb 2020 18:23:51 +0100
Subject: [PATCH 063/130] prepared adding in external large-scale modes

---
 src/grid_fft.cc     | 17 ++++++++++++++---
 src/ic_generator.cc | 12 ++++++++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index 07a704d..e1af60f 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -279,6 +279,11 @@ hid_t hdf5_get_data_type(void)
 template <typename data_t,bool bdistributed>
 void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, const std::string ObjName)
 {
+    if( bdistributed ){
+        csoca::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl;
+        abort();
+    }
+
     hid_t HDF_Type = hdf5_get_data_type<data_t>();
 
     hid_t HDF_FileID = H5Fopen(Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
@@ -301,7 +306,7 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
     //... dataset did not exist or was empty
     if (HDF_DatasetID < 0)
     {
-        csoca::wlog << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+        csoca::elog << "Dataset \'" << ObjName.c_str() << "\' does not exist or is empty." << std::endl;
         H5Fclose(HDF_FileID);
         abort();
     }
@@ -326,7 +331,10 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
 
     if (Data.capacity() < HDF_StorageSize)
     {
-        csoca::elog << "Not enough memory to store all data in HDFReadDataset!\n";
+        csoca::elog << "Not enough memory to store all data in HDFReadDataset!" << std::endl;
+        H5Sclose(HDF_DataspaceID);
+        H5Dclose(HDF_DatasetID);
+        H5Fclose(HDF_FileID);
         abort();
     }
 
@@ -335,7 +343,10 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
 
     if (Data.size() != HDF_StorageSize)
     {
-        csoca::elog << "Something went wrong while reading!\n";
+        csoca::elog << "Something went wrong while reading!" << std::endl;
+        H5Sclose(HDF_DataspaceID);
+        H5Dclose(HDF_DatasetID);
+        H5Fclose(HDF_FileID);
         abort();
     }
 
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 377b644..79b8ace 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -193,6 +193,18 @@ int Run( ConfigFile& the_config )
     });
 
 
+    //--------------------------------------------------------------------
+    // Use externally specified large scale modes from constraints in case
+    //--------------------------------------------------------------------
+    if( bAddConstrainedModes ){
+        Grid_FFT<real_t,false> cwnoise({8,8,8}, {boxlen,boxlen,boxlen});
+        cwnoise.Read_from_HDF5( the_config.GetValue<std::string>("setup", "ConstraintField"), "wnoise" );
+        cwnoise.FourierTransformForward();
+
+        // TODO: copy over modes
+    }
+
+
     //--------------------------------------------------------------------
     // Compute the LPT terms....
     //--------------------------------------------------------------------

From 64e13026fb1148a7ac072057d7b255a5ed011ac0 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 24 Feb 2020 21:26:36 +0100
Subject: [PATCH 064/130] can read hdf5 noise files

---
 example.conf        |  9 ++++++---
 src/grid_fft.cc     | 10 +++++++++-
 src/ic_generator.cc |  6 ++++--
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/example.conf b/example.conf
index 718e145..12d6a5f 100644
--- a/example.conf
+++ b/example.conf
@@ -8,11 +8,14 @@ zstart          = 129.0
 # order of the LPT to be used (1,2 or 3)
 LPTorder        = 3
 # also do baryon ICs?
-DoBaryons       = yes
+DoBaryons       = no
 # do mode fixing à la Angulo&Pontzen
 DoFixing        = no
 # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
-ParticleLoad    = bcc
+ParticleLoad    = s
+# Add a possible constraint field here:
+ConstraintFieldFile = initial_conditions.h5
+ConstraintFieldName = ic
 
 [cosmology]
 transfer     = CLASS
@@ -42,7 +45,7 @@ seed         = 9001
 test = none
 
 [execution]
-NumThreads   = 16
+NumThreads   = 8
 
 [output]
 fname_hdf5      = output_sch.hdf5
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index e1af60f..3a61608 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -358,6 +358,7 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
     csoca::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl;
 
     for( size_t i=0; i<3; ++i ) this->n_[i] = dimsize[i];
+    this->space_ = rspace_id;
 
     if (data_ != nullptr)
     {
@@ -367,6 +368,8 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
     
 
     //... copy data to internal array ...
+    double sum1{0.0}, sum2{0.0};
+    #pragma omp parallel for reduction(+:sum1,sum2)
     for (size_t i = 0; i < size(0); ++i)
     {
         for (size_t j = 0; j < size(1); ++j)
@@ -374,9 +377,14 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
             for (size_t k = 0; k < size(2); ++k)
             {
                 this->relem(i,j,k) = Data[ (i*size(1) + j)*size(2)+k ];
+                sum2 += std::real(this->relem(i,j,k)*this->relem(i,j,k));
+                sum1 += std::real(this->relem(i,j,k));
             }
         }
-    }    
+    }
+    sum1 /= Data.size();
+    sum2 /= Data.size();
+    csoca::ilog << "Constraint field has <W>=" << sum1 << ", <W^2>-<W>^2=" << std::sqrt(sum2-sum1*sum1) << std::endl;
 }
 
 template <typename data_t,bool bdistributed>
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 79b8ace..dd4e6fa 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -85,7 +85,7 @@ int Run( ConfigFile& the_config )
 
     //--------------------------------------------------------------------------------------------------------
     //! do constrained ICs?
-    const bool bAddConstrainedModes =  the_config.ContainsKey("setup", "ConstraintField" );
+    const bool bAddConstrainedModes =  the_config.ContainsKey("setup", "ConstraintFieldFile" );
 
     //--------------------------------------------------------------------------------------------------------
     //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274]
@@ -198,10 +198,12 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
     if( bAddConstrainedModes ){
         Grid_FFT<real_t,false> cwnoise({8,8,8}, {boxlen,boxlen,boxlen});
-        cwnoise.Read_from_HDF5( the_config.GetValue<std::string>("setup", "ConstraintField"), "wnoise" );
+        cwnoise.Read_from_HDF5( the_config.GetValue<std::string>("setup", "ConstraintFieldFile"), 
+                the_config.GetValue<std::string>("setup", "ConstraintFieldName") );
         cwnoise.FourierTransformForward();
 
         // TODO: copy over modes
+
     }
 
 

From 10682e632e1960b6df9764fdccacb2a0e3decd9e Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 24 Feb 2020 21:27:27 +0100
Subject: [PATCH 065/130] class submodule update

---
 external/class | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/class b/external/class
index 6f3abba..58e0adb 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507
+Subproject commit 58e0adbb2cf845cd0766a26cecc1a153fa17d8b9

From ccd813a2ad2d434960577c7b2c347a8f62ff2ad3 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Tue, 25 Feb 2020 18:36:46 +0100
Subject: [PATCH 066/130] added constraint mode copying. needs testing

---
 example.conf        |  4 +--
 src/ic_generator.cc | 65 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/example.conf b/example.conf
index 12d6a5f..4bd7af4 100644
--- a/example.conf
+++ b/example.conf
@@ -6,13 +6,13 @@ BoxLength       = 250
 # starting redshift
 zstart          = 129.0
 # order of the LPT to be used (1,2 or 3)
-LPTorder        = 3
+LPTorder        = 1
 # also do baryon ICs?
 DoBaryons       = no
 # do mode fixing à la Angulo&Pontzen
 DoFixing        = no
 # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
-ParticleLoad    = s
+ParticleLoad    = sc
 # Add a possible constraint field here:
 ConstraintFieldFile = initial_conditions.h5
 ConstraintFieldName = ic
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index dd4e6fa..47f7e75 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -186,12 +186,6 @@ int Run( ConfigFile& the_config )
     the_random_number_generator->Fill_Grid(wnoise);
 
     wnoise.FourierTransformForward();
-    wnoise.apply_function_k( [&](auto wn){
-        if (bDoFixing)
-            wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn;
-        return wn / volfac;
-    });
-
 
     //--------------------------------------------------------------------
     // Use externally specified large scale modes from constraints in case
@@ -202,10 +196,69 @@ int Run( ConfigFile& the_config )
                 the_config.GetValue<std::string>("setup", "ConstraintFieldName") );
         cwnoise.FourierTransformForward();
 
+        size_t ngrid_c = cwnoise.size(0), ngrid_c_2 = ngrid_c/2;
+
         // TODO: copy over modes
+        double rs1{0.0},rs2{0.0},is1{0.0},is2{0.0};
+        double nrs1{0.0},nrs2{0.0},nis1{0.0},nis2{0.0};
+        double renormfac = std::pow(real_t(ngrid)/real_t(ngrid_c),1.5);
+        size_t count{0};
+
+        csoca::ilog << "renormfac = " << renormfac << " " << ngrid << " " << ngrid_c << std::endl;
+
+        #pragma omp parallel for reduction(+:rs1,rs2,is1,is2,nrs1,nrs2,nis1,nis2,count)
+        for( size_t i=0; i<ngrid_c; ++i ){
+            size_t il = size_t(-1);
+            if( il<cwnoise.nhalf_[0] ) il = i;
+            if( il>cwnoise.nhalf_[0] ) il = ngrid-ngrid_c_2+i;
+            if( il == size_t(-1) ) continue;
+            if( il<size_t(wnoise.local_1_start_) || il>=size_t(wnoise.local_1_start_+wnoise.local_1_size_)) continue;
+            il -= wnoise.local_1_start_;
+            for( size_t j=0; j<ngrid_c; ++j ){
+                size_t jl = size_t(-1);
+                if( jl<cwnoise.nhalf_[1] ) jl = j;
+                if( jl>cwnoise.nhalf_[1] ) jl = ngrid-ngrid_c_2+j;
+                for( size_t k=0; k<ngrid_c/2+1; ++k ){
+                    size_t kl = size_t(-1);
+                    if( kl<cwnoise.nhalf_[2] ) kl = k;
+                    if( kl>cwnoise.nhalf_[2] ) kl = ngrid-ngrid_c_2+k;
+                    if( kl == size_t(-1) ) continue;
+
+                    ++count;
+
+                    nrs1 += std::real(cwnoise.kelem(i,j,k) * renormfac);
+                    nrs2 += std::real(cwnoise.kelem(i,j,k))*std::real(cwnoise.kelem(i,j,k)) * renormfac * renormfac;
+                    nis1 += std::imag(cwnoise.kelem(i,j,k) * renormfac);
+                    nis2 += std::imag(cwnoise.kelem(i,j,k))*std::imag(cwnoise.kelem(i,j,k)) * renormfac * renormfac;
+
+                    rs1 += std::real(wnoise.kelem(il,jl,kl));
+                    rs2 += std::real(wnoise.kelem(il,jl,kl))*std::real(wnoise.kelem(il,jl,kl));
+                    is1 += std::imag(wnoise.kelem(il,jl,kl));
+                    is2 += std::imag(wnoise.kelem(il,jl,kl))*std::imag(wnoise.kelem(il,jl,kl));
+                    
+                    wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k) * renormfac;
+                }
+            }
+        }
+
+        csoca::ilog << "old field: real part: <w>=" << rs1/count << " <w^2>-<w>^2=" << rs2/count-rs1*rs1/count/count << std::endl;
+        csoca::ilog << "old field: imag part: <w>=" << is1/count << " <w^2>-<w>^2=" << is2/count-is1*is1/count/count << std::endl;
+        csoca::ilog << "new field: real part: <w>=" << nrs1/count << " <w^2>-<w>^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl;
+        csoca::ilog << "new field: imag part: <w>=" << nis1/count << " <w^2>-<w>^2=" << nis2/count-nis1*nis1/count/count << std::endl;
+
 
     }
 
+    //--------------------------------------------------------------------
+    // Apply Normalisation factor and Angulo&Pontzen fixing or not
+    //--------------------------------------------------------------------
+
+    wnoise.apply_function_k( [&](auto wn){
+        if (bDoFixing)
+            wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn;
+        return wn / volfac;
+    });
+
 
     //--------------------------------------------------------------------
     // Compute the LPT terms....

From 2fe35aa2b438ebc291b2bc4600d1409a543590ed Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Tue, 25 Feb 2020 18:36:54 +0100
Subject: [PATCH 067/130] fixes

---
 src/ic_generator.cc | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 47f7e75..17530eb 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -201,50 +201,45 @@ int Run( ConfigFile& the_config )
         // TODO: copy over modes
         double rs1{0.0},rs2{0.0},is1{0.0},is2{0.0};
         double nrs1{0.0},nrs2{0.0},nis1{0.0},nis2{0.0};
-        double renormfac = std::pow(real_t(ngrid)/real_t(ngrid_c),1.5);
         size_t count{0};
 
-        csoca::ilog << "renormfac = " << renormfac << " " << ngrid << " " << ngrid_c << std::endl;
-
         #pragma omp parallel for reduction(+:rs1,rs2,is1,is2,nrs1,nrs2,nis1,nis2,count)
         for( size_t i=0; i<ngrid_c; ++i ){
             size_t il = size_t(-1);
-            if( il<cwnoise.nhalf_[0] ) il = i;
-            if( il>cwnoise.nhalf_[0] ) il = ngrid-ngrid_c_2+i;
+            if( i<ngrid_c_2 && i<wnoise.nhalf_[0] ) il = i;
+            if( i>ngrid_c_2 && i+ngrid-ngrid_c_2>ngrid/2) il = ngrid-ngrid_c_2+i;
             if( il == size_t(-1) ) continue;
             if( il<size_t(wnoise.local_1_start_) || il>=size_t(wnoise.local_1_start_+wnoise.local_1_size_)) continue;
             il -= wnoise.local_1_start_;
             for( size_t j=0; j<ngrid_c; ++j ){
                 size_t jl = size_t(-1);
-                if( jl<cwnoise.nhalf_[1] ) jl = j;
-                if( jl>cwnoise.nhalf_[1] ) jl = ngrid-ngrid_c_2+j;
+                if( j<ngrid_c_2 && i<wnoise.nhalf_[1] ) jl = j;
+                if( j>ngrid_c_2 && j+ngrid-ngrid_c_2>ngrid/2 ) jl = ngrid-ngrid_c_2+j;
+                if( jl == size_t(-1) ) continue;
                 for( size_t k=0; k<ngrid_c/2+1; ++k ){
-                    size_t kl = size_t(-1);
-                    if( kl<cwnoise.nhalf_[2] ) kl = k;
-                    if( kl>cwnoise.nhalf_[2] ) kl = ngrid-ngrid_c_2+k;
-                    if( kl == size_t(-1) ) continue;
-
+                    size_t kl = k;
+                    
                     ++count;
 
-                    nrs1 += std::real(cwnoise.kelem(i,j,k) * renormfac);
-                    nrs2 += std::real(cwnoise.kelem(i,j,k))*std::real(cwnoise.kelem(i,j,k)) * renormfac * renormfac;
-                    nis1 += std::imag(cwnoise.kelem(i,j,k) * renormfac);
-                    nis2 += std::imag(cwnoise.kelem(i,j,k))*std::imag(cwnoise.kelem(i,j,k)) * renormfac * renormfac;
+                    nrs1 += std::real(cwnoise.kelem(i,j,k));
+                    nrs2 += std::real(cwnoise.kelem(i,j,k))*std::real(cwnoise.kelem(i,j,k));
+                    nis1 += std::imag(cwnoise.kelem(i,j,k));
+                    nis2 += std::imag(cwnoise.kelem(i,j,k))*std::imag(cwnoise.kelem(i,j,k));
 
                     rs1 += std::real(wnoise.kelem(il,jl,kl));
                     rs2 += std::real(wnoise.kelem(il,jl,kl))*std::real(wnoise.kelem(il,jl,kl));
                     is1 += std::imag(wnoise.kelem(il,jl,kl));
                     is2 += std::imag(wnoise.kelem(il,jl,kl))*std::imag(wnoise.kelem(il,jl,kl));
                     
-                    wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k) * renormfac;
+                    wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k);
                 }
             }
         }
 
-        csoca::ilog << "old field: real part: <w>=" << rs1/count << " <w^2>-<w>^2=" << rs2/count-rs1*rs1/count/count << std::endl;
-        csoca::ilog << "old field: imag part: <w>=" << is1/count << " <w^2>-<w>^2=" << is2/count-is1*is1/count/count << std::endl;
-        csoca::ilog << "new field: real part: <w>=" << nrs1/count << " <w^2>-<w>^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl;
-        csoca::ilog << "new field: imag part: <w>=" << nis1/count << " <w^2>-<w>^2=" << nis2/count-nis1*nis1/count/count << std::endl;
+        csoca::ilog << "  ... old field: re <w>=" << rs1/count << " <w^2>-<w>^2=" << rs2/count-rs1*rs1/count/count << std::endl;
+        csoca::ilog << "  ... old field: im <w>=" << is1/count << " <w^2>-<w>^2=" << is2/count-is1*is1/count/count << std::endl;
+        csoca::ilog << "  ... new field: re <w>=" << nrs1/count << " <w^2>-<w>^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl;
+        csoca::ilog << "  ... new field: im <w>=" << nis1/count << " <w^2>-<w>^2=" << nis2/count-nis1*nis1/count/count << std::endl;
 
 
     }

From ed9f30235c391d644dc19dcde62d4cc50f30e374 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Wed, 26 Feb 2020 06:03:19 +0100
Subject: [PATCH 068/130] fixes to constraint inclusion

---
 src/ic_generator.cc | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 17530eb..e1d72cb 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -184,7 +184,7 @@ int Run( ConfigFile& the_config )
     csoca::ilog << "Generating white noise field...." << std::endl;
 
     the_random_number_generator->Fill_Grid(wnoise);
-
+    
     wnoise.FourierTransformForward();
 
     //--------------------------------------------------------------------
@@ -206,17 +206,18 @@ int Run( ConfigFile& the_config )
         #pragma omp parallel for reduction(+:rs1,rs2,is1,is2,nrs1,nrs2,nis1,nis2,count)
         for( size_t i=0; i<ngrid_c; ++i ){
             size_t il = size_t(-1);
-            if( i<ngrid_c_2 && i<wnoise.nhalf_[0] ) il = i;
-            if( i>ngrid_c_2 && i+ngrid-ngrid_c_2>ngrid/2) il = ngrid-ngrid_c_2+i;
+            if( i<ngrid_c_2 && i<ngrid/2 ) il = i;
+            if( i>ngrid_c_2 && i+ngrid-ngrid_c>ngrid/2) il = ngrid-ngrid_c+i;
             if( il == size_t(-1) ) continue;
             if( il<size_t(wnoise.local_1_start_) || il>=size_t(wnoise.local_1_start_+wnoise.local_1_size_)) continue;
             il -= wnoise.local_1_start_;
             for( size_t j=0; j<ngrid_c; ++j ){
                 size_t jl = size_t(-1);
-                if( j<ngrid_c_2 && i<wnoise.nhalf_[1] ) jl = j;
-                if( j>ngrid_c_2 && j+ngrid-ngrid_c_2>ngrid/2 ) jl = ngrid-ngrid_c_2+j;
+                if( j<ngrid_c_2 && j<ngrid/2 ) jl = j;
+                if( j>ngrid_c_2 && j+ngrid-ngrid_c>ngrid/2 ) jl = ngrid-ngrid_c+j;
                 if( jl == size_t(-1) ) continue;
                 for( size_t k=0; k<ngrid_c/2+1; ++k ){
+                    if( k>ngrid/2 ) continue;
                     size_t kl = k;
                     
                     ++count;
@@ -231,17 +232,20 @@ int Run( ConfigFile& the_config )
                     is1 += std::imag(wnoise.kelem(il,jl,kl));
                     is2 += std::imag(wnoise.kelem(il,jl,kl))*std::imag(wnoise.kelem(il,jl,kl));
                     
+                #if defined(USE_MPI)
+                    wnoise.kelem(il,jl,kl) = cwnoise.kelem(j,i,k);
+                #else
                     wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k);
+                #endif
                 }
             }
         }
 
-        csoca::ilog << "  ... old field: re <w>=" << rs1/count << " <w^2>-<w>^2=" << rs2/count-rs1*rs1/count/count << std::endl;
-        csoca::ilog << "  ... old field: im <w>=" << is1/count << " <w^2>-<w>^2=" << is2/count-is1*is1/count/count << std::endl;
-        csoca::ilog << "  ... new field: re <w>=" << nrs1/count << " <w^2>-<w>^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl;
-        csoca::ilog << "  ... new field: im <w>=" << nis1/count << " <w^2>-<w>^2=" << nis2/count-nis1*nis1/count/count << std::endl;
-
-
+        // csoca::ilog << "  ... old field: re <w>=" << rs1/count << " <w^2>-<w>^2=" << rs2/count-rs1*rs1/count/count << std::endl;
+        // csoca::ilog << "  ... old field: im <w>=" << is1/count << " <w^2>-<w>^2=" << is2/count-is1*is1/count/count << std::endl;
+        // csoca::ilog << "  ... new field: re <w>=" << nrs1/count << " <w^2>-<w>^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl;
+        // csoca::ilog << "  ... new field: im <w>=" << nis1/count << " <w^2>-<w>^2=" << nis2/count-nis1*nis1/count/count << std::endl;
+        csoca::ilog << "White noise field large-scale modes overwritten with external field." << std::endl;
     }
 
     //--------------------------------------------------------------------

From f2ba17cfcd7006992fdcbe57928bf987b82ed14c Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 27 Feb 2020 22:56:40 +0100
Subject: [PATCH 069/130] added enforced normalisation of read white noise

---
 example.conf    |  8 ++++----
 src/grid_fft.cc | 15 ++++++++++++++-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/example.conf b/example.conf
index 4bd7af4..33e227a 100644
--- a/example.conf
+++ b/example.conf
@@ -4,9 +4,9 @@ GridRes         = 128
 # length of the box in Mpc/h
 BoxLength       = 250
 # starting redshift
-zstart          = 129.0
+zstart          = 49.0
 # order of the LPT to be used (1,2 or 3)
-LPTorder        = 1
+LPTorder        = 3
 # also do baryon ICs?
 DoBaryons       = no
 # do mode fixing à la Angulo&Pontzen
@@ -15,11 +15,11 @@ DoFixing        = no
 ParticleLoad    = sc
 # Add a possible constraint field here:
 ConstraintFieldFile = initial_conditions.h5
-ConstraintFieldName = ic
+ConstraintFieldName = ic_white_noise
 
 [cosmology]
 transfer     = CLASS
-ztarget      = 0.0 
+ztarget      = 2.5
 # transfer     = eisenstein
 # transfer        = file_CAMB
 # transfer_file   = wmap5_transfer_out_z0.dat
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index 3a61608..5ae6b24 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -384,7 +384,20 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
     }
     sum1 /= Data.size();
     sum2 /= Data.size();
-    csoca::ilog << "Constraint field has <W>=" << sum1 << ", <W^2>-<W>^2=" << std::sqrt(sum2-sum1*sum1) << std::endl;
+    auto stdw = std::sqrt(sum2-sum1*sum1);
+    csoca::ilog << "Constraint field has <W>=" << sum1 << ", <W^2>-<W>^2=" << stdw << std::endl;
+
+    #pragma omp parallel for reduction(+:sum1,sum2)
+    for (size_t i = 0; i < size(0); ++i)
+    {
+        for (size_t j = 0; j < size(1); ++j)
+        {
+            for (size_t k = 0; k < size(2); ++k)
+            {
+                this->relem(i,j,k) /= stdw;
+            }
+        }
+    }
 }
 
 template <typename data_t,bool bdistributed>

From 2dfab2b2670225d99b0a8dde65a622b07929fcdc Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 28 Feb 2020 16:15:37 +0100
Subject: [PATCH 070/130] working commit

---
 example.conf                      |  21 +-
 external/class                    |   2 +-
 include/general.hh                |   1 -
 include/grid_fft.hh               |  17 ++
 include/particle_generator.hh     |   7 +-
 include/particle_plt.hh           | 338 ++++++++----------------------
 src/ic_generator.cc               |  12 +-
 src/main.cc                       |   2 +-
 src/plugins/output_gadget_hdf5.cc |   2 +-
 9 files changed, 133 insertions(+), 269 deletions(-)

diff --git a/example.conf b/example.conf
index 718e145..c66a520 100644
--- a/example.conf
+++ b/example.conf
@@ -2,17 +2,18 @@
 # number of grid cells per linear dimension for calculations = particles for sc initial load
 GridRes         = 128
 # length of the box in Mpc/h
-BoxLength       = 250
+BoxLength       = 125
 # starting redshift
-zstart          = 129.0
+zstart          = 49.0
+#zstart          = 19.0
 # order of the LPT to be used (1,2 or 3)
 LPTorder        = 3
 # also do baryon ICs?
-DoBaryons       = yes
+DoBaryons       = no
 # do mode fixing à la Angulo&Pontzen
-DoFixing        = no
+DoFixing        = yes
 # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
-ParticleLoad    = bcc
+ParticleLoad    = sc
 
 [cosmology]
 transfer     = CLASS
@@ -42,18 +43,18 @@ seed         = 9001
 test = none
 
 [execution]
-NumThreads   = 16
+NumThreads   = 1
 
 [output]
 fname_hdf5      = output_sch.hdf5
 fbase_analysis  = output
 
-#format       = gadget2
-#filename     = ics_gadget.dat
+format       = gadget2
+filename     = ics_gadget.dat
 #UseLongids   = false
 #
-format        = gadget_hdf5
-filename      = ics_gadget.hdf5
+#format        = gadget_hdf5
+#filename      = ics_gadget.hdf5
 
 
 #format       = generic
diff --git a/external/class b/external/class
index 6f3abba..58e0adb 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507
+Subproject commit 58e0adbb2cf845cd0766a26cecc1a153fa17d8b9
diff --git a/include/general.hh b/include/general.hh
index c77be01..b7f7df3 100644
--- a/include/general.hh
+++ b/include/general.hh
@@ -126,7 +126,6 @@ inline void multitask_sync_barrier( void )
 }
 
 
-
 namespace CONFIG
 {
 extern int MPI_thread_support;
diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 66c1a6f..e98d6a7 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -242,6 +242,23 @@ public:
         return kk;
     }
 
+    template <typename ft>
+    vec3<ft> get_k(const real_t i, const real_t j, const real_t k) const
+    {
+        vec3<ft> kk;
+        if( bdistributed ){
+            auto ip = i + real_t(local_1_start_);
+            kk[0] = (j - real_t(j > real_t(nhalf_[0])) * n_[0]) * kfac_[0];
+            kk[1] = (ip - real_t(ip > real_t(nhalf_[1])) * n_[1]) * kfac_[1];
+        }else{
+            kk[0] = (real_t(i) - real_t(i > real_t(nhalf_[0])) * n_[0]) * kfac_[0];
+            kk[1] = (real_t(j) - real_t(j > real_t(nhalf_[1])) * n_[1]) * kfac_[1];
+        }
+        kk[2] = (real_t(k) - real_t(k > real_t(nhalf_[2])) * n_[2]) * kfac_[2];
+
+        return kk;
+    }
+
     std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const
     {
         return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 801c919..56c69f4 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -29,9 +29,10 @@ const std::vector< std::vector<vec3<real_t>> > lattice_shifts =
 
 const std::vector<vec3<real_t>> second_lattice_shift =
 {
-        /* SC : */ {0.5, 0.5, 0.5},
-        /* BCC: */ {0.5, 0.5, 0.0},
-        /* FCC: */ {0.25, 0.25, 0.25},
+        /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice
+        /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!?
+        /* FCC: */ {0.5, 0.5, 0.5}, // this corresponds to NaCl lattice
+        // /* FCC: */ {0.25, 0.25, 0.25}, // this corresponds to Zincblende/GaAs lattice
         /* RSC: */ {0.25, 0.25, 0.25},
 };
 
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index e636dcc..e95308f 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -9,10 +9,18 @@
 #include <random>
 #include <map>
 
+#include <cassert>
+
 #include <particle_generator.hh>
 #include <grid_fft.hh>
 #include <mat3.hh>
 
+#include <gsl/gsl_sf_hyperg.h>
+inline double Hypergeometric2F1( double a, double b, double c, double x )
+{
+  return gsl_sf_hyperg_2F1( a, b, c, x);
+}
+
 #define PRODUCTION
 
 namespace particle{
@@ -20,7 +28,7 @@ namespace particle{
 
 class lattice_gradient{
 private:
-    const real_t boxlen_;
+    const real_t boxlen_, XmL_, aini_;
     const size_t ngmapto_, ngrid_, ngrid32_;
     const real_t mapratio_;
     Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
@@ -448,7 +456,7 @@ private:
                     vec3<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))});
                     evec1 /= evec1.norm();
 
-                    if(std::abs(ii)+std::abs(jj)+k<8){
+                    if(false){//std::abs(ii)+std::abs(jj)+k<8){
                         // small k modes, use usual pseudospectral derivative
                         // -- store in diagonal components of D_ij
                         D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_);
@@ -460,15 +468,40 @@ private:
                     }else{
                         // large k modes, use interpolated PLT results
                         // -- store in diagonal components of D_ij
-                        D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.x * kmod);
-                        D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.y * kmod);
-                        D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.z * kmod);
+                        // D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.x * kmod);
+                        // D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.y * kmod);
+                        // D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.z * kmod);
 
-                        // re-normalise to that longitudinal amplitude is exact
+                        // // re-normalise to that longitudinal amplitude is exact
+                        evec1 = kv;
                         auto norm = (kv.norm()/kv.dot(evec1));
-                        D_xx_.kelem(i,j,k) *= norm;
-                        D_yy_.kelem(i,j,k) *= norm;
-                        D_zz_.kelem(i,j,k) *= norm;
+                        // D_xx_.kelem(i,j,k) *= norm;
+                        // D_yy_.kelem(i,j,k) *= norm;
+                        // D_zz_.kelem(i,j,k) *= norm;
+
+                        ///////////////////////////////////
+                        // project onto spherical coordinate vectors
+                        
+                        real_t kr = kv.norm(), kphi = std::atan2(kv.y,kv.x), ktheta = std::acos( kv.z / kr );
+                        real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
+                        vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
+
+                        //vec3<real_t> e_r( 1.0, 0.0, 0.0 ), e_theta( 0.0, 1.0, 0.0 ), e_phi( 0.0, 0.0, 1.0 );
+
+                        D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_r ) );
+                        D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_theta ) );
+                        D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_phi ) );
+
+                        real_t eve1p1 = kmod*norm * evec1.dot( e_r );
+                        real_t eve1p2 = kmod*norm * evec1.dot( e_theta );
+                        real_t eve1p3 = kmod*norm * evec1.dot( e_phi );
+
+                        auto rvec = eve1p1 * e_r + eve1p2 * e_theta + eve1p3 * e_phi;
+
+                        std::cerr << D_xx_.kelem(i,j,k) << " " << D_yy_.kelem(i,j,k)  << " " << D_zz_.kelem(i,j,k) << std::endl;
+
+                        //std::cerr << rvec.x << " " << evec1.x * kmod*norm << std::endl;
+
 
                         // spatially dependent correction to vfact = \dot{D_+}/D_+
                         D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
@@ -500,244 +533,14 @@ private:
 #endif   
     }
 
-    void init_D__old()
-    {
-        constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
-
-        const std::vector<vec3<real_t>> normals_bcc{
-            {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
-            {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
-            {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
-        };
-
-        const std::vector<vec3<real_t>> bcc_reciprocal{
-            {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
-        };
-
-        const real_t eta = 2.0/ngrid_; // Ewald cutoff shall be 2 cells
-        const real_t alpha = 1.0/std::sqrt(2)/eta;
-        const real_t alpha2 = alpha*alpha;
-        const real_t alpha3 = alpha2*alpha;
-        const real_t sqrtpi = std::sqrt(M_PI);
-        const real_t pi32   = std::pow(M_PI,1.5);
-
-        //! just a Kronecker \delta_ij
-        auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
-
-        //! short range component of Ewald sum, eq. (A2) of Marcos (2008)
-        auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
-            auto d = vR-vP;
-            auto r = d.norm();
-            if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
-            real_t val{0.0};
-            val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r);
-            val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * 
-                (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
-            return val;
-        };
-
-        //! sums mirrored copies of short-range component of Ewald sum
-        auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
-            real_t sr = 0.0;
-            constexpr int N = 3; // number of repeated copies ±N per dimension
-            int count = 0;
-            for( int i=-N; i<=N; ++i ){
-                for( int j=-N; j<=N; ++j ){
-                    for( int k=-N; k<=N; ++k ){
-                        if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
-                            //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
-                            sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
-                            sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} );
-                            count += 2;
-
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
-                            // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
-                        }
-                    }
-                }
-            }
-            return sr / count;
-        };
-
-        //! fill D_ij array with short range evaluated function
-        #pragma omp parallel for
-        for( size_t i=0; i<ngrid_; i++ ){
-            vec3<real_t>  p;
-            p.x = real_t(i)/ngrid_;
-            for( size_t j=0; j<ngrid_; j++ ){
-                p.y = real_t(j)/ngrid_;
-                for( size_t k=0; k<ngrid_; k++ ){
-                    p.z = real_t(k)/ngrid_;
-                    D_xx_.relem(i,j,k) = evaluate_D(0,0,p);
-                    D_xy_.relem(i,j,k) = evaluate_D(0,1,p);
-                    D_xz_.relem(i,j,k) = evaluate_D(0,2,p);
-                    D_yy_.relem(i,j,k) = evaluate_D(1,1,p);
-                    D_yz_.relem(i,j,k) = evaluate_D(1,2,p);
-                    D_zz_.relem(i,j,k) = evaluate_D(2,2,p);
-                }   
-            }    
-        }
-        // fix r=0 with background density (added later in Fourier space)
-        D_xx_.relem(0,0,0) = 0.0;
-        D_xy_.relem(0,0,0) = 0.0;
-        D_xz_.relem(0,0,0) = 0.0;
-        D_yy_.relem(0,0,0) = 0.0;
-        D_yz_.relem(0,0,0) = 0.0;
-        D_zz_.relem(0,0,0) = 0.0;
-        
-
-        // Fourier transform all six components
-        D_xx_.FourierTransformForward();
-        D_xy_.FourierTransformForward();
-        D_xz_.FourierTransformForward();
-        D_yy_.FourierTransformForward();
-        D_yz_.FourierTransformForward();
-        D_zz_.FourierTransformForward();
-
-        const real_t rho0 = std::pow(real_t(ngrid_),1.5); //mass of one particle in Fourier space
-        const real_t nfac = 1.0/std::pow(real_t(ngrid_),1.5);
-
-        #pragma omp parallel
-        {
-            // thread private matrix representation
-            mat3<real_t> D;
-            vec3<real_t> eval, evec1, evec2, evec3;
-        
-            #pragma omp for
-            for( size_t i=0; i<D_xx_.size(0); i++ )
-            {
-                for( size_t j=0; j<D_xx_.size(1); j++ )
-                {
-                    for( size_t k=0; k<D_xx_.size(2); k++ )
-                    {
-                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-                        auto& b=bcc_reciprocal;
-                        vec3<real_t> kvc = { b[0][0]*kvc[0]+b[1][0]*kvc[1]+b[2][0]*kvc[2],
-                                            b[0][1]*kvc[0]+b[1][1]*kvc[1]+b[2][1]*kvc[2],
-                                            b[0][2]*kvc[0]+b[1][2]*kvc[1]+b[2][2]*kvc[2] };
-                        // vec3<real_t> kv = {kvc.dot(bcc_reciprocal[0]),kvc.dot(bcc_reciprocal[1]),kvc.dot(bcc_reciprocal[2])};
-                        const real_t kmod2 = kv.norm_squared();
-
-                        // long range component of Ewald sum
-                        //ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0]));
-                        ccomplex_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2;
-                        phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0
-
-
-                        // const int nn = 3;
-                        // size_t nsum = 0;
-                        // ccomplex_t ff = 0.0;
-                        // for( int is=-nn;is<=nn;is++){
-                        //     for( int js=-nn;js<=nn;js++){
-                        //         for( int ks=-nn;ks<=nn;ks++){
-                        //             if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){
-                        //                 ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2]))));
-                        //                 ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2]))));
-                        //                 ++nsum;
-                        //             }
-                        //         }
-                        //     }    
-                        // }
-                        // ff /= nsum;
-                        // ccomplex_t ff = 1.0; 
-                        ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2]))));
-                        // assemble short-range + long_range of Ewald sum and add DC component to trace
-                        D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0;
-                        D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac);
-                        D_xz_.kelem(i,j,k) = ff*((D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac);
-                        D_yy_.kelem(i,j,k) = ff*((D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac) + 1.0/3.0;
-                        D_yz_.kelem(i,j,k) = ff*((D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac);
-                        D_zz_.kelem(i,j,k) = ff*((D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac) + 1.0/3.0;
-
-                    }
-                }
-            }
-
-            D_xx_.kelem(0,0,0) = 1.0/3.0;
-            D_xy_.kelem(0,0,0) = 0.0;
-            D_xz_.kelem(0,0,0) = 0.0;
-            D_yy_.kelem(0,0,0) = 1.0/3.0;
-            D_yz_.kelem(0,0,0) = 0.0;
-            D_zz_.kelem(0,0,0) = 1.0/3.0;
-
-            #pragma omp for
-            for( size_t i=0; i<D_xx_.size(0); i++ )
-            {
-                for( size_t j=0; j<D_xx_.size(1); j++ )
-                {
-                    for( size_t k=0; k<D_xx_.size(2); k++ )
-                    {
-                        // put matrix elements into actual matrix
-                        D = { std::real(D_xx_.kelem(i,j,k)), std::real(D_xy_.kelem(i,j,k)), std::real(D_xz_.kelem(i,j,k)),
-                              std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) };
-                        
-                        // compute eigenstructure of matrix
-                        D.eigen(eval, evec1, evec2, evec3);
-                        
-#ifdef PRODUCTION
-                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-                        const real_t kmod  = kv.norm()/mapratio_/boxlen_;
-
-                        // store in diagonal components of D_ij
-                        D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec3.x;
-                        D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec3.y;
-                        D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,kmod) * evec3.z;
-
-                        auto norm = (kv.norm()/kv.dot(evec3));
-                        if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; 
-
-                        D_xx_.kelem(i,j,k) *= norm;
-                        D_yy_.kelem(i,j,k) *= norm;
-                        D_zz_.kelem(i,j,k) *= norm;
-
-                        // spatially dependent correction to vfact = \dot{D_+}/D_+
-                        D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*eval[2])-1.));
-#else
-
-                        D_xx_.kelem(i,j,k) = eval[2];
-                        D_yy_.kelem(i,j,k) = eval[1];
-                        D_zz_.kelem(i,j,k) = eval[0];
-
-                        D_xy_.kelem(i,j,k) = evec3[0];
-                        D_xz_.kelem(i,j,k) = evec3[1];
-                        D_yz_.kelem(i,j,k) = evec3[2];
-#endif
-                    }
-                }
-            }
-        }
-#ifdef PRODUCTION
-        D_xy_.kelem(0,0,0) = 1.0;
-#endif
-
-        //////////////////////////////////////////
-        std::string filename("plt_test.hdf5");
-        unlink(filename.c_str());
-    #if defined(USE_MPI)
-        MPI_Barrier(MPI_COMM_WORLD);
-    #endif
-    //     rho.Write_to_HDF5(filename, "rho");
-        D_xx_.Write_to_HDF5(filename, "omega1");
-        D_yy_.Write_to_HDF5(filename, "omega2");
-        D_zz_.Write_to_HDF5(filename, "omega3");
-        D_xy_.Write_to_HDF5(filename, "e1_x");
-        D_xz_.Write_to_HDF5(filename, "e1_y");
-        D_yz_.Write_to_HDF5(filename, "e1_z");
-
-    }
-
 
 public:
     // real_t boxlen, size_t ngridother
     explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 )
     : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
       ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ), 
+      XmL_ ( the_config.GetValue<double>("cosmology", "Omega_L") / the_config.GetValue<double>("cosmology", "Omega_m") ),
+      aini_ ( 1.0/(1.0+the_config.GetValue<double>("setup", "zstart")) ),
       ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
       D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
@@ -775,15 +578,58 @@ public:
     inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
     {
         real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
-        if( idim == 0 )    return D_xx_.get_cic_kspace({ix,iy,iz});
-        else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz});
-        return D_zz_.get_cic_kspace({ix,iy,iz});
+
+        // if( idim == 0 )    return D_xx_.get_cic_kspace({ix,iy,iz});
+        // else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz});
+        // return D_zz_.get_cic_kspace({ix,iy,iz});
+
+        ///////
+        // auto kv = D_xx_.get_k<real_t>( static_cast<size_t>(ix), static_cast<size_t>(iy), static_cast<size_t>(iz) );
+        auto kv = D_xx_.get_k<real_t>( ix, iy, iz ) / mapratio_;
+
+        // project onto spherical coordinate vectors
+        //real_t kr = kv.norm(), kphi = kr > 0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0;
+        
+        // vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
+        auto D_r = D_xx_.get_cic_kspace({ix,iy,iz});
+        auto D_theta = D_yy_.get_cic_kspace({ix,iy,iz});
+        auto D_phi = D_zz_.get_cic_kspace({ix,iy,iz});
+        real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0;
+        real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
+                        
+                        //real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
+        vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
+
+        vec3<real_t> evec3 = D_r.imag() * e_r + D_theta.imag() * e_theta + D_phi.imag() * e_phi;
+
+        assert(!std::isnan(std::imag(D_r * st * cp + D_theta * ct * cp - D_phi * sp)));
+        assert(!std::isnan(std::imag(D_r  * st * sp + D_theta * ct * sp + D_phi * cp)));
+        assert(!std::isnan(std::imag(D_r  * ct - D_theta * st)));
+        assert(!std::isnan(std::real(D_r * st * cp + D_theta * ct * cp - D_phi * sp)));
+        assert(!std::isnan(std::real(D_r  * st * sp + D_theta * ct * sp + D_phi * cp)));
+        assert(!std::isnan(std::real(D_r  * ct - D_theta * st)));
+
+        // std::cerr << kv.x/boxlen_ << " " << kv.y/boxlen_ << " " << kv.z/boxlen_  << " -- " << D_r * st * cp + D_theta * ct * cp - D_phi * sp << " " << D_r  * st * sp + D_theta * ct * sp + D_phi * cp << " " << (D_r  * ct - D_theta * st ) << std::endl;
+
+        if( idim == 0 ){
+            return ccomplex_t( 0.0, evec3.x );//D_r; //D_r * st * cp + D_theta * ct * cp - D_phi * sp; 
+        }
+        else if( idim == 1 ){
+            return ccomplex_t( 0.0, evec3.y );;//D_theta; //D_r  * st * sp + D_theta * ct * sp + D_phi * cp; 
+        }
+        return ccomplex_t( 0.0, evec3.z );//D_phi; //(D_r  * ct - D_theta * st ); 
     }
 
-    inline real_t vfac_corr( std::array<size_t,3> ijk ) const
+    inline real_t vfac_corr( std::array<size_t,3> ijk  ) const
     {
         real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
-        return std::real(D_xy_.get_cic_kspace({ix,iy,iz}));
+        const real_t alpha = 1.0/std::real(D_xy_.get_cic_kspace({ix,iy,iz}));
+        return 1.0/alpha;
+        // // below is for LCDM:
+        //! X = \Omega_\Lambda / \Omega_m
+        // return 1.0 / (alpha - (2*std::pow(aini_,3)*alpha*(2 + alpha)*XmL_*Hypergeometric2F1((3 + alpha)/3.,(5 + alpha)/3.,
+        //     (13 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_)))/
+        //     ((7 + 4*alpha)*Hypergeometric2F1(alpha/3.,(2 + alpha)/3.,(7 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_))));
     }
 
 };
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index ec71944..66858ba 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -203,8 +203,8 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
     // Create PLT gradient operator
     //--------------------------------------------------------------------
-    // particle::lattice_gradient lg( the_config );
-    op::fourier_gradient lg( the_config );
+    particle::lattice_gradient lg( the_config );
+    // op::fourier_gradient lg( the_config );
 
     //--------------------------------------------------------------------
     std::vector<cosmo_species> species_list;
@@ -500,8 +500,8 @@ int Run( ConfigFile& the_config )
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
                                     double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) :
                                      (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : 
-                                    //  the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0);
-                                     the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1);
+                                      the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0);
+                                     //the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1);
 
                                     tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen;
                                 }
@@ -549,8 +549,8 @@ int Run( ConfigFile& the_config )
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
                                     double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, vcdm0) :
                                      (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : 
-                                        // the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0);
-                                     the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1);
+                                         the_cosmo_calc->GetAmplitude(kmod, vtotal0)) - the_cosmo_calc->GetAmplitude(kmod, vtotal0);
+                                     //the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1);
                                     tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ;
                                 }
 
diff --git a/src/main.cc b/src/main.cc
index c36943c..cbdf209 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -193,7 +193,7 @@ int main( int argc, char** argv )
 #endif
 
     csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    csoca::ilog << "Done.\n" << std::endl;
+    csoca::ilog << "Done. Have a nice day!\n" << std::endl;
 
     return 0;
 }
diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc
index 43afbe1..f32f9c8 100644
--- a/src/plugins/output_gadget_hdf5.cc
+++ b/src/plugins/output_gadget_hdf5.cc
@@ -121,7 +121,7 @@ public:
     HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array<unsigned>(header_.npartTotalHighWord));
     HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value<int>(header_.flag_entropy_instead_u));
 
-    csoca::ilog << "Wrote" << std::endl;
+    csoca::ilog << "Wrote Gadget-HDF5 file(s) to " << this_fname_ << std::endl;
   }
 
   output_type write_species_as(const cosmo_species &) const { return output_type::particles; }

From 35344f017029284a18f6d1971107cecee1bac1c2 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 28 Feb 2020 17:40:50 +0100
Subject: [PATCH 071/130] added arepo plugin, but this might get removed again
 since too similar to gadget-hdf5

---
 example.conf                |   8 +-
 src/plugins/output_arepo.cc | 241 ++++++++++++++++++++++++++++++++++++
 2 files changed, 246 insertions(+), 3 deletions(-)
 create mode 100644 src/plugins/output_arepo.cc

diff --git a/example.conf b/example.conf
index 33e227a..2066756 100644
--- a/example.conf
+++ b/example.conf
@@ -54,10 +54,12 @@ fbase_analysis  = output
 #format       = gadget2
 #filename     = ics_gadget.dat
 #UseLongids   = false
-#
-format        = gadget_hdf5
-filename      = ics_gadget.hdf5
 
+#format        = gadget_hdf5
+#filename      = ics_gadget.hdf5
+
+format          = AREPO
+filename        = ics_arepo.hdf5
 
 #format       = generic
 #filename     = debug.hdf5
diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc
new file mode 100644
index 0000000..8d8d903
--- /dev/null
+++ b/src/plugins/output_arepo.cc
@@ -0,0 +1,241 @@
+
+#ifdef USE_HDF5
+#include <unistd.h> // for unlink
+#include <output_plugin.hh>
+#include "HDF_IO.hh"
+
+template <typename T>
+std::vector<T> from_6array(const T *a)
+{
+  return std::vector<T>{{a[0], a[1], a[2], a[3], a[4], a[5]}};
+}
+
+template <typename T>
+std::vector<T> from_value(const T a)
+{
+  return std::vector<T>{{a}};
+}
+
+template <typename write_real_t>
+class gadget_hdf5_output_plugin : public output_plugin
+{
+  struct header_t
+  {
+    unsigned npart[6];
+    double mass[6];
+    double time;
+    double redshift;
+    int flag_sfr;
+    int flag_feedback;
+    unsigned int npartTotal[6];
+    int flag_cooling;
+    int num_files;
+    double BoxSize;
+    double Omega0;
+    double OmegaLambda;
+    double HubbleParam;
+    int flag_stellarage;
+    int flag_metals;
+    unsigned int npartTotalHighWord[6];
+    int flag_entropy_instead_u;
+    int flag_doubleprecision;
+  };
+
+protected:
+  int num_files_, num_simultaneous_writers_;
+  header_t header_;
+  real_t lunit_, vunit_;
+  bool blongids_;
+  std::string this_fname_;
+  double Tini_;
+  unsigned pmgrid_;
+  unsigned gridboost_;
+  int doublePrec_;
+  int doBaryons_;
+  double softening_;
+
+public:
+  //! constructor
+  explicit gadget_hdf5_output_plugin(ConfigFile &cf)
+      : output_plugin(cf, "GADGET-HDF5")
+  {
+    num_files_ = 1;
+#ifdef USE_MPI
+    // use as many output files as we have MPI tasks
+    MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
+#endif
+    real_t astart = 1.0 / (1.0 + cf_.GetValue<double>("setup", "zstart"));
+    lunit_ = cf_.GetValue<double>("setup", "BoxLength");
+    vunit_ = lunit_ / std::sqrt(astart);
+    blongids_ = cf_.GetValueSafe<bool>("output", "UseLongids", false);
+    num_simultaneous_writers_ = cf_.GetValueSafe<int>("output", "NumSimWriters", num_files_);
+
+    for (int i = 0; i < 6; ++i)
+    {
+      header_.npart[i] = 0;
+      header_.npartTotal[i] = 0;
+      header_.npartTotalHighWord[i] = 0;
+      header_.mass[i] = 0.0;
+    }
+
+    header_.time = astart;
+    header_.redshift = 1.0 / astart - 1.0;
+    header_.flag_sfr = 0;
+    header_.flag_feedback = 0;
+    header_.flag_cooling = 0;
+    header_.num_files = num_files_;
+    header_.BoxSize = lunit_;
+    header_.Omega0 = cf_.GetValue<double>("cosmology", "Omega_m");
+    header_.OmegaLambda = cf_.GetValue<double>("cosmology", "Omega_L");
+    header_.HubbleParam = cf_.GetValue<double>("cosmology", "H0") / 100.0;
+    header_.flag_stellarage = 0;
+    header_.flag_metals = 0;
+    header_.flag_entropy_instead_u = 0;
+    header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false;
+
+    // initial gas temperature
+    double Tcmb0 = 2.726;
+    double Omegab = cf_.GetValue<double>("cosmology", "Omega_b");
+    double h = cf_.GetValue<double>("cosmology", "H0") / 100.0, h2 = h*h;
+    double adec = 1.0 / (160.0 * pow(Omegab * h2 / 0.022, 2.0 / 5.0));
+    Tini_ = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec;
+
+    // suggested PM res
+    pmgrid_ = 2*cf_.GetValue<double>("setup", "GridRes");
+    gridboost_ = 1;
+    softening_ = cf_.GetValue<double>("setup", "BoxLength")/pmgrid_/20;
+    doBaryons_ = cf_.GetValue<bool>("setup", "DoBaryons");
+#if !defined(USE_SINGLEPRECISION)
+    doublePrec_ = 1;
+#else
+    doublePrec_ = 0;
+#endif
+
+    this_fname_ = fname_;
+#ifdef USE_MPI
+    int thisrank = 0;
+    MPI_Comm_rank(MPI_COMM_WORLD, &thisrank);
+    if (num_files_ > 1)
+      this_fname_ += "." + std::to_string(thisrank);
+#endif
+
+    unlink(this_fname_.c_str());
+    HDFCreateFile(this_fname_);
+  }
+
+  // use destructor to write header post factum
+  ~gadget_hdf5_output_plugin()
+  {
+    HDFCreateGroup(this_fname_, "Header");
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array<unsigned>(header_.npart));
+    HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array<double>(header_.mass));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value<double>(header_.time));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value<double>(header_.redshift));
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array<unsigned>(header_.npartTotal));
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array<unsigned>(header_.npartTotalHighWord));
+    HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value<int>(header_.num_files));
+    HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value<double>(header_.BoxSize));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value<double>(header_.Omega0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value<double>(header_.OmegaLambda));
+    HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value<double>(header_.HubbleParam));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value<int>(0));
+    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_DoublePrecision", (int)doublePrec_);
+    // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmin", levelmin_);
+    // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmax", levelmax_);
+    // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelcounts", levelcounts);
+    HDFWriteGroupAttribute(this_fname_, "Header", "haveBaryons", from_value<int>((int)doBaryons_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "longIDs", from_value<int>((int)blongids_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_pmgrid", from_value<int>(pmgrid_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gridboost", from_value<int>(gridboost_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_highressoft", from_value<double>(softening_));
+    HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gas_Tinit", from_value<double>(Tini_));
+
+    csoca::ilog << "Wrote" << std::endl;
+  }
+
+  output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
+
+  real_t position_unit() const { return lunit_; }
+
+  real_t velocity_unit() const { return vunit_; }
+
+  bool has_64bit_reals() const
+  {
+    if (typeid(write_real_t) == typeid(double))
+      return true;
+    return false;
+  }
+
+  bool has_64bit_ids() const
+  {
+    if (blongids_)
+      return true;
+    return false;
+  }
+
+  int get_species_idx(const cosmo_species &s) const
+  {
+    switch (s)
+    {
+    case cosmo_species::dm:
+      return 1;
+    case cosmo_species::baryon:
+      return 2;
+    case cosmo_species::neutrino:
+      return 3;
+    }
+    return -1;
+  }
+
+  void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species)
+  {
+    int sid = get_species_idx(s);
+
+    assert(sid != -1);
+
+    header_.npart[sid] = (pc.get_local_num_particles());
+    header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles());
+    header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32);
+
+    double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
+    double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3);
+    header_.mass[sid] = boxmass / pc.get_global_num_particles();
+
+    HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid));
+
+    //... write positions and velocities.....
+    if (this->has_64bit_reals())
+    {
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_);
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_);
+    }
+    else
+    {
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_);
+      HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_);
+    }
+
+    //... write ids.....
+    if (this->has_64bit_ids())
+      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_);
+    else
+      HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_);
+
+    // std::cout << ">>>A> " << header_.npart[sid] << std::endl;
+  }
+};
+
+namespace
+{
+#if !defined(USE_SINGLEPRECISION)
+output_plugin_creator_concrete<gadget_hdf5_output_plugin<double>> creator1("AREPO");
+#else
+output_plugin_creator_concrete<gadget_hdf5_output_plugin<float>> creator1("AREPO");
+#endif
+} // namespace
+
+#endif
\ No newline at end of file

From 77f9f06ebc2838ff499dedd551134bc07ffca317 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 28 Feb 2020 18:03:48 +0100
Subject: [PATCH 072/130] working commit, PLT interpolation

---
 include/particle_plt.hh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index e95308f..18e6394 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -498,7 +498,7 @@ private:
 
                         auto rvec = eve1p1 * e_r + eve1p2 * e_theta + eve1p3 * e_phi;
 
-                        std::cerr << D_xx_.kelem(i,j,k) << " " << D_yy_.kelem(i,j,k)  << " " << D_zz_.kelem(i,j,k) << std::endl;
+                        // std::cerr << D_xx_.kelem(i,j,k) << " " << D_yy_.kelem(i,j,k)  << " " << D_zz_.kelem(i,j,k) << std::endl;
 
                         //std::cerr << rvec.x << " " << evec1.x * kmod*norm << std::endl;
 
@@ -612,12 +612,12 @@ public:
         // std::cerr << kv.x/boxlen_ << " " << kv.y/boxlen_ << " " << kv.z/boxlen_  << " -- " << D_r * st * cp + D_theta * ct * cp - D_phi * sp << " " << D_r  * st * sp + D_theta * ct * sp + D_phi * cp << " " << (D_r  * ct - D_theta * st ) << std::endl;
 
         if( idim == 0 ){
-            return ccomplex_t( 0.0, evec3.x );//D_r; //D_r * st * cp + D_theta * ct * cp - D_phi * sp; 
+            return D_r * st * cp + D_theta * ct * cp - D_phi * sp; 
         }
         else if( idim == 1 ){
-            return ccomplex_t( 0.0, evec3.y );;//D_theta; //D_r  * st * sp + D_theta * ct * sp + D_phi * cp; 
+            return D_r  * st * sp + D_theta * ct * sp + D_phi * cp; 
         }
-        return ccomplex_t( 0.0, evec3.z );//D_phi; //(D_r  * ct - D_theta * st ); 
+        return D_r  * ct - D_theta * st; 
     }
 
     inline real_t vfac_corr( std::array<size_t,3> ijk  ) const

From 569831530846a873f7d659de34aee4d920aefef5 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 28 Feb 2020 18:35:26 +0100
Subject: [PATCH 073/130] working commit

---
 example.conf            |  4 +-
 include/particle_plt.hh | 99 ++++++++++++++---------------------------
 2 files changed, 36 insertions(+), 67 deletions(-)

diff --git a/example.conf b/example.conf
index c66a520..21576b0 100644
--- a/example.conf
+++ b/example.conf
@@ -7,7 +7,7 @@ BoxLength       = 125
 zstart          = 49.0
 #zstart          = 19.0
 # order of the LPT to be used (1,2 or 3)
-LPTorder        = 3
+LPTorder        = 1
 # also do baryon ICs?
 DoBaryons       = no
 # do mode fixing à la Angulo&Pontzen
@@ -43,7 +43,7 @@ seed         = 9001
 test = none
 
 [execution]
-NumThreads   = 1
+NumThreads   = 8
 
 [output]
 fname_hdf5      = output_sch.hdf5
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 18e6394..b62ba2e 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -468,40 +468,26 @@ private:
                     }else{
                         // large k modes, use interpolated PLT results
                         // -- store in diagonal components of D_ij
-                        // D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.x * kmod);
-                        // D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.y * kmod);
-                        // D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.z * kmod);
+                        auto norm = (kv.norm()/kv.dot(evec1));
+                        D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.x * kmod);
+                        D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.y * kmod);
+                        D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.z * kmod);
 
                         // // re-normalise to that longitudinal amplitude is exact
-                        evec1 = kv;
-                        auto norm = (kv.norm()/kv.dot(evec1));
-                        // D_xx_.kelem(i,j,k) *= norm;
-                        // D_yy_.kelem(i,j,k) *= norm;
-                        // D_zz_.kelem(i,j,k) *= norm;
-
-                        ///////////////////////////////////
-                        // project onto spherical coordinate vectors
+                        // //evec1 = kv;
+                        // auto norm = (kv.norm()/kv.dot(evec1));
+                        // //evec1 = evec1 * (1.0/boxlen_);
                         
-                        real_t kr = kv.norm(), kphi = std::atan2(kv.y,kv.x), ktheta = std::acos( kv.z / kr );
-                        real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
-                        vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
-
-                        //vec3<real_t> e_r( 1.0, 0.0, 0.0 ), e_theta( 0.0, 1.0, 0.0 ), e_phi( 0.0, 0.0, 1.0 );
-
-                        D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_r ) );
-                        D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_theta ) );
-                        D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_phi ) );
-
-                        real_t eve1p1 = kmod*norm * evec1.dot( e_r );
-                        real_t eve1p2 = kmod*norm * evec1.dot( e_theta );
-                        real_t eve1p3 = kmod*norm * evec1.dot( e_phi );
-
-                        auto rvec = eve1p1 * e_r + eve1p2 * e_theta + eve1p3 * e_phi;
-
-                        // std::cerr << D_xx_.kelem(i,j,k) << " " << D_yy_.kelem(i,j,k)  << " " << D_zz_.kelem(i,j,k) << std::endl;
-
-                        //std::cerr << rvec.x << " " << evec1.x * kmod*norm << std::endl;
+                        // ///////////////////////////////////
+                        // // project onto spherical coordinate vectors
+                        
+                        // real_t kr = kv.norm(), kphi = std::atan2(kv.y,kv.x), ktheta = std::acos( kv.z / kr );
+                        // real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
+                        // vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
 
+                        // D_xx_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_r )); //kmod*norm
+                        // D_yy_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_theta )); //kmod*norm
+                        // D_zz_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_phi )); //kmod*norm
 
                         // spatially dependent correction to vfact = \dot{D_+}/D_+
                         D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
@@ -579,45 +565,28 @@ public:
     {
         real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
 
-        // if( idim == 0 )    return D_xx_.get_cic_kspace({ix,iy,iz});
-        // else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz});
-        // return D_zz_.get_cic_kspace({ix,iy,iz});
+        if( idim == 0 )    return D_xx_.get_cic_kspace({ix,iy,iz});
+        else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz});
+        return D_zz_.get_cic_kspace({ix,iy,iz});
 
-        ///////
-        // auto kv = D_xx_.get_k<real_t>( static_cast<size_t>(ix), static_cast<size_t>(iy), static_cast<size_t>(iz) );
-        auto kv = D_xx_.get_k<real_t>( ix, iy, iz ) / mapratio_;
+        // auto kv = D_xx_.get_k<real_t>( ix, iy, iz ) / boxlen_;
 
-        // project onto spherical coordinate vectors
-        //real_t kr = kv.norm(), kphi = kr > 0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0;
+        // // project onto spherical coordinate vectors
+        // auto D_r = D_xx_.get_cic_kspace({ix,iy,iz});
+        // auto D_theta = D_yy_.get_cic_kspace({ix,iy,iz});
+        // auto D_phi = D_zz_.get_cic_kspace({ix,iy,iz});
         
+        // real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0;
+        // real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
         // vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
-        auto D_r = D_xx_.get_cic_kspace({ix,iy,iz});
-        auto D_theta = D_yy_.get_cic_kspace({ix,iy,iz});
-        auto D_phi = D_zz_.get_cic_kspace({ix,iy,iz});
-        real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0;
-        real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
-                        
-                        //real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
-        vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
-
-        vec3<real_t> evec3 = D_r.imag() * e_r + D_theta.imag() * e_theta + D_phi.imag() * e_phi;
-
-        assert(!std::isnan(std::imag(D_r * st * cp + D_theta * ct * cp - D_phi * sp)));
-        assert(!std::isnan(std::imag(D_r  * st * sp + D_theta * ct * sp + D_phi * cp)));
-        assert(!std::isnan(std::imag(D_r  * ct - D_theta * st)));
-        assert(!std::isnan(std::real(D_r * st * cp + D_theta * ct * cp - D_phi * sp)));
-        assert(!std::isnan(std::real(D_r  * st * sp + D_theta * ct * sp + D_phi * cp)));
-        assert(!std::isnan(std::real(D_r  * ct - D_theta * st)));
-
-        // std::cerr << kv.x/boxlen_ << " " << kv.y/boxlen_ << " " << kv.z/boxlen_  << " -- " << D_r * st * cp + D_theta * ct * cp - D_phi * sp << " " << D_r  * st * sp + D_theta * ct * sp + D_phi * cp << " " << (D_r  * ct - D_theta * st ) << std::endl;
-
-        if( idim == 0 ){
-            return D_r * st * cp + D_theta * ct * cp - D_phi * sp; 
-        }
-        else if( idim == 1 ){
-            return D_r  * st * sp + D_theta * ct * sp + D_phi * cp; 
-        }
-        return D_r  * ct - D_theta * st; 
+        
+        // if( idim == 0 ){
+        //     return D_r * st * cp + D_theta * ct * cp - D_phi * sp; 
+        // }
+        // else if( idim == 1 ){
+        //     return D_r  * st * sp + D_theta * ct * sp + D_phi * cp; 
+        // }
+        // return D_r  * ct - D_theta * st; 
     }
 
     inline real_t vfac_corr( std::array<size_t,3> ijk  ) const

From 6c027d7094af7ae7eb73d843e639d8f8c59b730c Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 1 Mar 2020 17:21:17 +0100
Subject: [PATCH 074/130] fixed PLT correction interpolation

---
 include/grid_fft.hh     |   8 +--
 include/particle_plt.hh | 106 +++++++++++++---------------------------
 2 files changed, 38 insertions(+), 76 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index e98d6a7..f8157da 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -290,11 +290,11 @@ public:
         return val;
     }
 
-    ccomplex_t get_cic_kspace( const vec3<real_t>& x ) const{
+    ccomplex_t get_cic_kspace( const vec3<real_t> x ) const{
         // warning! this doesn't work with MPI
-        size_t ix = static_cast<size_t>(x.x);
-        size_t iy = static_cast<size_t>(x.y);
-        size_t iz = std::min(static_cast<size_t>(x.z),size(2)-1); //static_cast<size_t>(x.z);
+        int ix = std::floor(x.x);
+        int iy = std::floor(x.y);
+        int iz = std::floor(x.z);
         real_t dx = x.x-real_t(ix), tx = 1.0-dx;
         real_t dy = x.y-real_t(iy), ty = 1.0-dy;
         real_t dz = x.z-real_t(iz), tz = 1.0-dz;
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index b62ba2e..b0d3760 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -28,9 +28,9 @@ namespace particle{
 
 class lattice_gradient{
 private:
-    const real_t boxlen_, XmL_, aini_;
+    const real_t boxlen_, aini_;
     const size_t ngmapto_, ngrid_, ngrid32_;
-    const real_t mapratio_;
+    const real_t mapratio_, XmL_;
     Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
     Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
     std::vector<vec3<real_t>> vectk_;
@@ -443,11 +443,7 @@ private:
             for( size_t j=0; j<D_xx_.size(1); j++ ){
                 for( size_t k=0; k<D_xx_.size(2); k++ )
                 {
-                    int ii = (i>size_t(nlattice/2))? int(i)-nlattice : i;
-                    int jj = (j>size_t(nlattice/2))? int(j)-nlattice : j;
-                    
                     vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
-                    const real_t kmod  = kv.norm()/mapratio_/boxlen_;
 
                     double mu1 = std::real(D_xx_.kelem(i,j,k));
                     // double mu2 = std::real(D_xy_.kelem(i,j,k));
@@ -456,50 +452,28 @@ private:
                     vec3<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))});
                     evec1 /= evec1.norm();
 
-                    if(false){//std::abs(ii)+std::abs(jj)+k<8){
-                        // small k modes, use usual pseudospectral derivative
-                        // -- store in diagonal components of D_ij
-                        D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_);
-                        D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_);
-                        D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_);
+                    // ///////////////////////////////////
+                    // // project onto spherical coordinate vectors
+                    
+                    real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ): 0.0;
+                    real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
+                    vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
 
-                        // spatially dependent correction to vfact = \dot{D_+}/D_+
-                        D_xy_.kelem(i,j,k) = 1.0;
-                    }else{
-                        // large k modes, use interpolated PLT results
-                        // -- store in diagonal components of D_ij
-                        auto norm = (kv.norm()/kv.dot(evec1));
-                        D_xx_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.x * kmod);
-                        D_yy_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.y * kmod);
-                        D_zz_.kelem(i,j,k) =  ccomplex_t(0.0,evec1.z * kmod);
+                    // re-normalise to that longitudinal amplitude is exact
+                    double renorm = evec1.dot( e_r ); if( renorm < 0.01 ) renorm = 1.0;
 
-                        // // re-normalise to that longitudinal amplitude is exact
-                        // //evec1 = kv;
-                        // auto norm = (kv.norm()/kv.dot(evec1));
-                        // //evec1 = evec1 * (1.0/boxlen_);
-                        
-                        // ///////////////////////////////////
-                        // // project onto spherical coordinate vectors
-                        
-                        // real_t kr = kv.norm(), kphi = std::atan2(kv.y,kv.x), ktheta = std::acos( kv.z / kr );
-                        // real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
-                        // vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
+                    // -- store in diagonal components of D_ij
+                    D_xx_.kelem(i,j,k) = 1.0;
+                    D_yy_.kelem(i,j,k) = evec1.dot( e_theta ) / renorm;
+                    D_zz_.kelem(i,j,k) = evec1.dot( e_phi ) / renorm;
 
-                        // D_xx_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_r )); //kmod*norm
-                        // D_yy_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_theta )); //kmod*norm
-                        // D_zz_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_phi )); //kmod*norm
-
-                        // spatially dependent correction to vfact = \dot{D_+}/D_+
-                        D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
-                    }
-                    if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0;
-                    if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0;
-                    if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0;
+                    // spatially dependent correction to vfact = \dot{D_+}/D_+
+                    D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
                 }
             }
         }
         D_xy_.kelem(0,0,0) = 1.0;
-        D_xx_.kelem(0,0,0) = 0.0;
+        D_xx_.kelem(0,0,0) = 1.0;
         D_yy_.kelem(0,0,0) = 0.0;
         D_zz_.kelem(0,0,0) = 0.0;
 
@@ -524,10 +498,10 @@ public:
     // real_t boxlen, size_t ngridother
     explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 )
     : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
-      ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ), 
-      XmL_ ( the_config.GetValue<double>("cosmology", "Omega_L") / the_config.GetValue<double>("cosmology", "Omega_m") ),
       aini_ ( 1.0/(1.0+the_config.GetValue<double>("setup", "zstart")) ),
+      ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ), 
       ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
+      XmL_ ( the_config.GetValue<double>("cosmology", "Omega_L") / the_config.GetValue<double>("cosmology", "Omega_m") ),
       D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
@@ -544,14 +518,6 @@ public:
 
         csoca::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
 
-// #if defined(USE_MPI)
-//         if( CONFIG::MPI_task_size>1 )
-//         {
-//             csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl;
-//             abort();
-//         }
-// #endif 
-
         double wtime = get_wtime();
         csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
         
@@ -565,28 +531,24 @@ public:
     {
         real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
 
-        if( idim == 0 )    return D_xx_.get_cic_kspace({ix,iy,iz});
-        else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz});
-        return D_zz_.get_cic_kspace({ix,iy,iz});
-
-        // auto kv = D_xx_.get_k<real_t>( ix, iy, iz ) / boxlen_;
+        auto kv = D_xx_.get_k<real_t>( ix, iy, iz );
+        auto kmod = kv.norm() / mapratio_ / boxlen_;
 
         // // project onto spherical coordinate vectors
-        // auto D_r = D_xx_.get_cic_kspace({ix,iy,iz});
-        // auto D_theta = D_yy_.get_cic_kspace({ix,iy,iz});
-        // auto D_phi = D_zz_.get_cic_kspace({ix,iy,iz});
+        auto D_r = std::real(D_xx_.get_cic_kspace({ix,iy,iz}));
+        auto D_theta = std::real(D_yy_.get_cic_kspace({ix,iy,iz}));
+        auto D_phi = std::real(D_zz_.get_cic_kspace({ix,iy,iz}));
         
-        // real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0;
-        // real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
-        // vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
+        real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0;
+        real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
         
-        // if( idim == 0 ){
-        //     return D_r * st * cp + D_theta * ct * cp - D_phi * sp; 
-        // }
-        // else if( idim == 1 ){
-        //     return D_r  * st * sp + D_theta * ct * sp + D_phi * cp; 
-        // }
-        // return D_r  * ct - D_theta * st; 
+        if( idim == 0 ){
+            return ccomplex_t(0.0, kmod*(D_r * st * cp + D_theta * ct * cp - D_phi * sp)); 
+        }
+        else if( idim == 1 ){
+            return ccomplex_t(0.0, kmod*(D_r  * st * sp + D_theta * ct * sp + D_phi * cp)); 
+        }
+        return ccomplex_t(0.0, kmod*(D_r  * ct - D_theta * st)); 
     }
 
     inline real_t vfac_corr( std::array<size_t,3> ijk  ) const
@@ -594,7 +556,7 @@ public:
         real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
         const real_t alpha = 1.0/std::real(D_xy_.get_cic_kspace({ix,iy,iz}));
         return 1.0/alpha;
-        // // below is for LCDM:
+        // // below is for LCDM, but it is a tiny correction for typical starting redshifts:
         //! X = \Omega_\Lambda / \Omega_m
         // return 1.0 / (alpha - (2*std::pow(aini_,3)*alpha*(2 + alpha)*XmL_*Hypergeometric2F1((3 + alpha)/3.,(5 + alpha)/3.,
         //     (13 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_)))/

From a1a5e614cf94e5a32802170ee28a7dfd47ab43d3 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 1 Mar 2020 17:37:57 +0100
Subject: [PATCH 075/130] fixed compilation error

---
 include/grid_fft.hh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index f8157da..38cf11c 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -292,15 +292,15 @@ public:
 
     ccomplex_t get_cic_kspace( const vec3<real_t> x ) const{
         // warning! this doesn't work with MPI
-        int ix = std::floor(x.x);
-        int iy = std::floor(x.y);
-        int iz = std::floor(x.z);
+        int ix = static_cast<int>(std::floor(x.x));
+        int iy = static_cast<int>(std::floor(x.y));
+        int iz = static_cast<int>(std::floor(x.z));
         real_t dx = x.x-real_t(ix), tx = 1.0-dx;
         real_t dy = x.y-real_t(iy), ty = 1.0-dy;
         real_t dz = x.z-real_t(iz), tz = 1.0-dz;
         size_t ix1 = (ix+1)%size(0);
         size_t iy1 = (iy+1)%size(1);
-        size_t iz1 = std::min((iz+1),size(2)-1);
+        size_t iz1 = std::min((iz+1),int(size(2))-1);
         ccomplex_t val = 0.0;
         val += this->kelem(ix ,iy ,iz ) * tx * ty * tz;
         val += this->kelem(ix ,iy ,iz1) * tx * ty * dz;

From c58ccfa6cae4765ff349f73229a383d4ad73b263 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 1 Mar 2020 16:48:03 +0000
Subject: [PATCH 076/130] README.md edited online with Bitbucket

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c7cc745..3d3be7b 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ High order LPT/QPT tool for single resolution simulations
 ## Build Instructions
 Clone code including submodules (currently only CLASS is used as a submodule):
 
-    git clone --recurse-submodules https://ohahn@bitbucket.org/ohahn/monofonic.git
+    git clone --recurse-submodules https://<username>@bitbucket.org/ohahn/monofonic.git
 
 
 Create build directory, configure, and build:

From b0b67086fdedf6430476d4575184883a12223565 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 6 Mar 2020 16:44:53 +0100
Subject: [PATCH 077/130] fixed particle type for baryons in gadget-hdf5 and
 arepo, disabled PLT by default

---
 src/ic_generator.cc               | 4 ++--
 src/main.cc                       | 3 +++
 src/plugins/output_arepo.cc       | 2 +-
 src/plugins/output_gadget_hdf5.cc | 2 +-
 src/plugins/transfer_CLASS.cc     | 2 +-
 5 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 23899c0..a9d9670 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -273,8 +273,8 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
     // Create PLT gradient operator
     //--------------------------------------------------------------------
-    particle::lattice_gradient lg( the_config );
-    // op::fourier_gradient lg( the_config );
+    // particle::lattice_gradient lg( the_config );
+    op::fourier_gradient lg( the_config );
 
     //--------------------------------------------------------------------
     std::vector<cosmo_species> species_list;
diff --git a/src/main.cc b/src/main.cc
index cbdf209..12cde3a 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -3,6 +3,7 @@
 #include <iostream>
 #include <fstream>
 #include <thread>
+#include <cfenv>
 
 #if defined(_OPENMP)
 #include <omp.h>
@@ -112,6 +113,8 @@ int main( int argc, char** argv )
     omp_set_num_threads(CONFIG::num_threads);
 #endif
 
+    // std::feclearexcept(FE_ALL_EXCEPT);
+
     //------------------------------------------------------------------------------
     // Write code configuration to screen
     //------------------------------------------------------------------------------
diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc
index 8d8d903..1af182f 100644
--- a/src/plugins/output_arepo.cc
+++ b/src/plugins/output_arepo.cc
@@ -184,7 +184,7 @@ public:
     case cosmo_species::dm:
       return 1;
     case cosmo_species::baryon:
-      return 2;
+      return 0;
     case cosmo_species::neutrino:
       return 3;
     }
diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc
index f32f9c8..c862e41 100644
--- a/src/plugins/output_gadget_hdf5.cc
+++ b/src/plugins/output_gadget_hdf5.cc
@@ -151,7 +151,7 @@ public:
     case cosmo_species::dm:
       return 1;
     case cosmo_species::baryon:
-      return 2;
+      return 0;
     case cosmo_species::neutrino:
       return 3;
     }
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index d660d25..e6e2c00 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -47,7 +47,7 @@ private:
         double wtime = get_wtime();
 
         std::stringstream zlist;
-        zlist << zstart_ << ", " << zstart_; 
+        zlist << zstart_ << ", " << ztarget_; 
         
         ClassParams pars;
         pars.add("extra metric transfer functions", "yes");

From 0abe891f864ad72b44b42f3d8b995710b9ef09ed Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Tue, 10 Mar 2020 19:01:44 +0100
Subject: [PATCH 078/130] added ID offset for baryon particles to avoid
 duplicate IDs

---
 include/particle_generator.hh | 6 +++---
 src/ic_generator.cc           | 5 ++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 56c69f4..956ed28 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -37,7 +37,7 @@ const std::vector<vec3<real_t>> second_lattice_shift =
 };
 
 template<typename field_t>
-void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const field_t& field ){
+void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field ){
     // number of modes present in the field
     const size_t num_p_in_load = field.local_size();
     // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load):
@@ -50,9 +50,9 @@ void initialize_lattice( container& particles, lattice lattice_type, const bool
             for( size_t k=0; k<field.size(2); ++k,++ipcount){
                 for( size_t iload=0; iload<overload; ++iload ){
                     if( b64ids ){
-                        particles.set_id64( ipcount+iload*num_p_in_load, overload*field.get_cell_idx_1d(i,j,k)+iload );
+                        particles.set_id64( ipcount+iload*num_p_in_load, IDoffset + overload*field.get_cell_idx_1d(i,j,k)+iload );
                     }else{
-                        particles.set_id32( ipcount+iload*num_p_in_load, overload*field.get_cell_idx_1d(i,j,k)+iload );
+                        particles.set_id32( ipcount+iload*num_p_in_load, IDoffset + overload*field.get_cell_idx_1d(i,j,k)+iload );
                     }
                 }
             }
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index a9d9670..ce86444 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -540,11 +540,14 @@ int Run( ConfigFile& the_config )
                 bool shifted_lattice = (this_species == cosmo_species::baryon &&
                                         the_output_plugin->write_species_as(this_species) == output_type::particles) ? true : false;
 
+                // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits
+                size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ;
+
                 // if output plugin wants particles, then we need to store them, along with their IDs
                 if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                 {
                     // allocate particle structure and generate particle IDs
-                    particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), tmp );
+                    particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp );
                 }
             
                 // write out positions

From ab2db06990295fe180616ba4759747e4903ce638 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 29 Mar 2020 14:42:55 +0200
Subject: [PATCH 079/130] added useful physical constants

---
 include/physical_constants.hh | 62 +++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 include/physical_constants.hh

diff --git a/include/physical_constants.hh b/include/physical_constants.hh
new file mode 100644
index 0000000..594eb0d
--- /dev/null
+++ b/include/physical_constants.hh
@@ -0,0 +1,62 @@
+#pragma once
+/*******************************************************************************\
+ physical_constants.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************************/
+
+// physical constants for convenience, all values have been taken from
+// the 2018 edition of the Particle Data Group Booklet,
+// http://pdg.lbl.gov/2019/mobile/reviews/pdf/rpp2018-rev-phys-constants-m.pdf
+
+namespace phys_const
+{
+// helper value of pi so that we don't need to include any other header just for this
+static constexpr double pi_ = 3.141592653589793115997963468544185161590576171875;
+
+//--- unit conversions ---------------------------------------------------
+
+// 1 Mpc in m
+static constexpr double Mpc_SI = 3.0857e22;
+
+// 1 Gyr in s
+static constexpr double Gyr_SI = 3.1536e16;
+
+// 1 eV in J
+static constexpr double eV_SI = 1.602176487e-19;
+
+// 1 erg in J
+static constexpr double erg_SI = 1e-7;
+
+//--- physical constants ------------------------------------------------
+
+// speed of light c in m/s
+static constexpr double c_SI = 2.99792458e8;
+
+// gravitational constant G in m^3/s^2/kg
+static constexpr double G_SI = 6.6740800e-11;
+
+// Boltzmann constant k_B in kg m^2/s^2/K
+static constexpr double kB_SI = 1.38064852e-23;
+
+// reduced Planck's quantum \hbar in kg m^2/s
+static constexpr double hbar_SI = 1.054571800e-34;
+
+// Stefan-Boltzmann constant sigma in J/m^2/s/K^-4
+static constexpr double sigma_SI = (pi_ * pi_) * (kB_SI * kB_SI * kB_SI * kB_SI) / 60. / (hbar_SI * hbar_SI * hbar_SI) / (c_SI * c_SI);
+
+// electron mass in kg
+static constexpr double me_SI = 9.10938356e-31;
+
+// proton mass in kg
+static constexpr double mp_SI = 1.672621898e-27;
+
+// unified atomic mass unit (u) in kg
+static constexpr double u_SI = 1.660539040e-27;
+
+// critical density of the Universe in h^2 kg/m^3
+static constexpr double rhocrit_h2_SI = 3 * 1e10 / (8 * pi_ * G_SI) / Mpc_SI / Mpc_SI;
+
+} // namespace phys_const
\ No newline at end of file

From a587ad6b3ee5174f937c5658f1b93fbfc868282f Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 29 Mar 2020 14:45:43 +0200
Subject: [PATCH 080/130] some refactoring (add '_t' to vec3 and mat3)

---
 include/bounding_box.hh       |   4 +-
 include/grid_fft.hh           |  32 +++++------
 include/mat3.hh               |  59 ++++++-------------
 include/particle_generator.hh |   6 +-
 include/particle_plt.hh       | 104 +++++++++++++++++-----------------
 include/vec3.hh               |  56 +++++++++---------
 src/grid_fft.cc               |   2 +-
 src/ic_generator.cc           |   6 +-
 src/plugins/transfer_CLASS.cc |   3 +-
 9 files changed, 124 insertions(+), 148 deletions(-)

diff --git a/include/bounding_box.hh b/include/bounding_box.hh
index db0f481..3048c79 100644
--- a/include/bounding_box.hh
+++ b/include/bounding_box.hh
@@ -5,12 +5,12 @@
 template <typename T>
 struct bounding_box
 {
-    vec3<T> x1_, x2_;
+    vec3_t<T> x1_, x2_;
 
     bounding_box(void)
     { }
 
-    bounding_box( const vec3<T>& x1, const vec3<T>& x2)
+    bounding_box( const vec3_t<T>& x1, const vec3_t<T>& x2)
     : x1_(x1), x2_(x2)
     { }
 
diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 8acc2bd..2cf5557 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -165,9 +165,9 @@ public:
     }
 
     template <typename ft>
-    vec3<ft> get_r(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_r(const size_t i, const size_t j, const size_t k) const
     {
-        vec3<ft> rr;
+        vec3_t<ft> rr;
 
         rr[0] = real_t(i + local_0_start_) * dx_[0];
         rr[1] = real_t(j) * dx_[1];
@@ -177,9 +177,9 @@ public:
     }
 
     template <typename ft>
-    vec3<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const
     {
-        vec3<ft> rr;
+        vec3_t<ft> rr;
 
         rr[0] = real_t(i + local_0_start_) / real_t(n_[0]);
         rr[1] = real_t(j) / real_t(n_[1]);
@@ -189,9 +189,9 @@ public:
     }
 
     template <typename ft>
-    vec3<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3<real_t> s) const
+    vec3_t<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t<real_t> s) const
     {
-        vec3<ft> rr;
+        vec3_t<ft> rr;
 
         rr[0] = (real_t(i + local_0_start_) + s.x) / real_t(n_[0]);
         rr[1] = (real_t(j) + s.y) / real_t(n_[1]);
@@ -200,9 +200,9 @@ public:
         return rr;
     }
 
-    vec3<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const
+    vec3_t<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const
     {
-        return vec3<size_t>({i + local_0_start_, j, k});
+        return vec3_t<size_t>({i + local_0_start_, j, k});
     }
 
     size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const
@@ -226,9 +226,9 @@ public:
     }
 
     template <typename ft>
-    vec3<ft> get_k(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_k(const size_t i, const size_t j, const size_t k) const
     {
-        vec3<ft> kk;
+        vec3_t<ft> kk;
         if( bdistributed ){
             auto ip = i + local_1_start_;
             kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
@@ -243,9 +243,9 @@ public:
     }
 
     template <typename ft>
-    vec3<ft> get_k(const real_t i, const real_t j, const real_t k) const
+    vec3_t<ft> get_k(const real_t i, const real_t j, const real_t k) const
     {
-        vec3<ft> kk;
+        vec3_t<ft> kk;
         if( bdistributed ){
             auto ip = i + real_t(local_1_start_);
             kk[0] = (j - real_t(j > real_t(nhalf_[0])) * n_[0]) * kfac_[0];
@@ -264,9 +264,9 @@ public:
         return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
     }
 
-    data_t get_cic( const vec3<real_t>& v ) const{
+    data_t get_cic( const vec3_t<real_t>& v ) const{
         // warning! this doesn't work with MPI
-        vec3<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
+        vec3_t<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
                         std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
                         std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] });
         size_t ix = static_cast<size_t>(x.x);
@@ -290,7 +290,7 @@ public:
         return val;
     }
 
-    ccomplex_t get_cic_kspace( const vec3<real_t> x ) const{
+    ccomplex_t get_cic_kspace( const vec3_t<real_t> x ) const{
         // warning! this doesn't work with MPI
         int ix = static_cast<int>(std::floor(x.x));
         int iy = static_cast<int>(std::floor(x.y));
@@ -746,7 +746,7 @@ public:
 
     void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3);
 
-    void shift_field( const vec3<real_t>& s, bool transform_back=true )
+    void shift_field( const vec3_t<real_t>& s, bool transform_back=true )
     {
         FourierTransformForward();
         apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
diff --git a/include/mat3.hh b/include/mat3.hh
index ac23069..6cf2689 100644
--- a/include/mat3.hh
+++ b/include/mat3.hh
@@ -4,7 +4,7 @@
 #include <vec3.hh>
 
 template<typename T>
-class mat3{
+class mat3_t{
 protected:
     std::array<T,9> data_;
     gsl_matrix_view m_;
@@ -37,38 +37,38 @@ protected:
 
 public:
 
-    mat3()
+    mat3_t()
     : bdid_alloc_gsl_(false) 
     {}
 
     //! copy constructor
-    mat3( const mat3<T> &m)
+    mat3_t( const mat3_t<T> &m)
     : data_(m.data_), bdid_alloc_gsl_(false) 
     {}
     
     //! move constructor
-    mat3( mat3<T> &&m)
+    mat3_t( mat3_t<T> &&m)
     : data_(std::move(m.data_)), bdid_alloc_gsl_(false) 
     {}
 
-    //! construct mat3 from initializer list
+    //! construct mat3_t from initializer list
     template<typename ...E>
-    mat3(E&&...e) 
+    mat3_t(E&&...e) 
     : data_{{std::forward<E>(e)...}}, bdid_alloc_gsl_(false)
     {}
 
-    mat3<T>& operator=(const mat3<T>& m) noexcept{
+    mat3_t<T>& operator=(const mat3_t<T>& m) noexcept{
         data_ = m.data_;
         return *this;
     }
 
-    mat3<T>& operator=(const mat3<T>&& m) noexcept{
+    mat3_t<T>& operator=(const mat3_t<T>&& m) noexcept{
         data_ = std::move(m.data_);
         return *this;
     }
 
     //! destructor
-    ~mat3(){
+    ~mat3_t(){
         this->free_gsl();
     }
     
@@ -85,7 +85,7 @@ public:
     const T &operator()(size_t i, size_t j) const noexcept { return data_[3*i+j]; }
 
     //! in-place addition
-    mat3<T>& operator+=( const mat3<T>& rhs ) noexcept{
+    mat3_t<T>& operator+=( const mat3_t<T>& rhs ) noexcept{
         for (size_t i = 0; i < 9; ++i) {
            (*this)[i] += rhs[i];
         }
@@ -93,7 +93,7 @@ public:
     }
 
     //! in-place subtraction
-    mat3<T>& operator-=( const mat3<T>& rhs ) noexcept{
+    mat3_t<T>& operator-=( const mat3_t<T>& rhs ) noexcept{
         for (size_t i = 0; i < 9; ++i) {
            (*this)[i] -= rhs[i];
         }
@@ -104,20 +104,8 @@ public:
         for (size_t i = 0; i < 9; ++i) data_[i]=0;
     }
 
-    void eigen( vec3<T>& evals, vec3<T>& evec1, vec3<T>& evec2, vec3<T>& evec3 )
+    void eigen( vec3_t<T>& evals, vec3_t<T>& evec1, vec3_t<T>& evec2, vec3_t<T>& evec3_t )
     {
-        // for( auto x : data_ ){
-        //     std::cerr << x << " " ;
-        // }
-        // std::cerr << std::endl;
-        // resort into symmetrix matrix
-        // data_[8] = data_[5];
-        // data_[7] = data_[4];
-        // data_[6] = data_[2];
-        // data_[5] = data_[4];
-        // data_[4] = data_[3];
-        // data_[3] = data_[1];
-
         this->init_gsl();
 
         gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_);
@@ -127,17 +115,15 @@ public:
             evals[i] = gsl_vector_get( eval_, i );
             evec1[i] = gsl_matrix_get( evec_, i, 0 );
             evec2[i] = gsl_matrix_get( evec_, i, 1 );
-            evec3[i] = gsl_matrix_get( evec_, i, 2 );
+            evec3_t[i] = gsl_matrix_get( evec_, i, 2 );
         }
-
-        // std::cerr << "(" << evals[0] << " " << evals[1] << " " << evals[2] << ")" << std::endl;
     }
 };
 
 template<typename T>
-constexpr const mat3<T> operator+(const mat3<T> &lhs, const mat3<T> &rhs) noexcept
+constexpr const mat3_t<T> operator+(const mat3_t<T> &lhs, const mat3_t<T> &rhs) noexcept
 {
-    mat3<T> result;
+    mat3_t<T> result;
     for (size_t i = 0; i < 9; ++i) {
         result[i] = lhs[i] + rhs[i];
     }
@@ -146,9 +132,9 @@ constexpr const mat3<T> operator+(const mat3<T> &lhs, const mat3<T> &rhs) noexce
 
 // matrix - vector multiplication
 template<typename T>
-vec3<T> operator*( const mat3<T> &A, const vec3<T> &v ) noexcept
+inline vec3_t<T> operator*( const mat3_t<T> &A, const vec3_t<T> &v ) noexcept
 {
-    vec3<T> result;
+    vec3_t<T> result;
     for( int mu=0; mu<3; ++mu ){
         result[mu] = 0.0;
         for( int nu=0; nu<3; ++nu ){
@@ -158,14 +144,3 @@ vec3<T> operator*( const mat3<T> &A, const vec3<T> &v ) noexcept
     return result;
 }
 
-// template<typename T>
-// vec3<T> operator*( const vec3<T> &v, const mat3<T> &A ) noexcept
-// {
-//     vec3<T> result = 0.0;
-//     for( int mu=0; mu<3; ++mu ){
-//         for( int nu=0; nu<3; ++nu ){
-//             result[nu] += v[mu]*A(mu,nu);
-//         }
-//     }
-//     return result;
-// }
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 956ed28..57e8b0f 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -18,7 +18,7 @@ enum lattice{
     lattice_rsc = 3, // RSC: refined simple cubic
 };
 
-const std::vector< std::vector<vec3<real_t>> > lattice_shifts = 
+const std::vector< std::vector<vec3_t<real_t>> > lattice_shifts = 
 {   
     // first shift must always be zero! (otherwise set_positions and set_velocities break)
     /* SC : */ {{0.0,0.0,0.0}},
@@ -27,7 +27,7 @@ const std::vector< std::vector<vec3<real_t>> > lattice_shifts =
     /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}},
 };
 
-const std::vector<vec3<real_t>> second_lattice_shift =
+const std::vector<vec3_t<real_t>> second_lattice_shift =
 {
         /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice
         /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!?
@@ -81,7 +81,7 @@ void set_positions( container& particles, const lattice lattice_type, bool is_se
             for( size_t j=0; j<field.size(1); ++j){
                 for( size_t k=0; k<field.size(2); ++k){
                     auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,lattice_shifts[lattice_type][ishift] 
-                        + (is_second_lattice? second_lattice_shift[lattice_type] : vec3<real_t>{0.,0.,0.}) );
+                        + (is_second_lattice? second_lattice_shift[lattice_type] : vec3_t<real_t>{0.,0.,0.}) );
                     if( b64reals ){
                         particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
                     }else{
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index b0d3760..a6fc1ad 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -33,13 +33,13 @@ private:
     const real_t mapratio_, XmL_;
     Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
     Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
-    std::vector<vec3<real_t>> vectk_;
-    std::vector<vec3<int>> ico_, vecitk_;
+    std::vector<vec3_t<real_t>> vectk_;
+    std::vector<vec3_t<int>> ico_, vecitk_;
 
     bool is_even( int i ){ return (i%2)==0; }
 
-    bool is_in( int i, int j, int k, const mat3<int>& M ){
-        vec3<int> v({i,j,k});
+    bool is_in( int i, int j, int k, const mat3_t<int>& M ){
+        vec3_t<int> v({i,j,k});
         auto vv = M * v;
         return is_even(vv.x)&&is_even(vv.y)&&is_even(vv.z);
     }
@@ -54,22 +54,22 @@ private:
 
         //! === vectors, reciprocals and normals for the SC lattice ===
         const int charge_fac_sc = 1;
-        const mat3<real_t> mat_bravais_sc{
+        const mat3_t<real_t> mat_bravais_sc{
             1.0, 0.0, 0.0,
             0.0, 1.0, 0.0,
             0.0, 0.0, 1.0, 
         };
-        const mat3<real_t> mat_reciprocal_sc{
+        const mat3_t<real_t> mat_reciprocal_sc{
             twopi, 0.0, 0.0,
             0.0, twopi, 0.0,
             0.0, 0.0, twopi,
         };
-        const mat3<int> mat_invrecip_sc{
+        const mat3_t<int> mat_invrecip_sc{
             2, 0, 0,
             0, 2, 0,
             0, 0, 2,
         };
-        const std::vector<vec3<real_t>> normals_sc{
+        const std::vector<vec3_t<real_t>> normals_sc{
             {pi,0.,0.},{-pi,0.,0.},
             {0.,pi,0.},{0.,-pi,0.},
             {0.,0.,pi},{0.,0.,-pi},
@@ -78,22 +78,22 @@ private:
 
         //! === vectors, reciprocals and normals for the BCC lattice ===
         const int charge_fac_bcc = 2;
-        const mat3<real_t> mat_bravais_bcc{
+        const mat3_t<real_t> mat_bravais_bcc{
             1.0, 0.0, 0.5,
             0.0, 1.0, 0.5,
             0.0, 0.0, 0.5, 
         };
-        const mat3<real_t> mat_reciprocal_bcc{
+        const mat3_t<real_t> mat_reciprocal_bcc{
             twopi, 0.0, 0.0,
             0.0, twopi, 0.0,
             -twopi, -twopi, fourpi,
         };
-        const mat3<int> mat_invrecip_bcc{
+        const mat3_t<int> mat_invrecip_bcc{
             2, 0, 0,
             0, 2, 0,
             1, 1, 1,
         };
-        const std::vector<vec3<real_t>> normals_bcc{
+        const std::vector<vec3_t<real_t>> normals_bcc{
             {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
             {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
             {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
@@ -102,22 +102,22 @@ private:
 
         //! === vectors, reciprocals and normals for the FCC lattice ===
         const int charge_fac_fcc = 4;
-        const mat3<real_t> mat_bravais_fcc{
+        const mat3_t<real_t> mat_bravais_fcc{
             0.0, 0.5, 0.0,
             0.5, 0.0, 1.0,
             0.5, 0.5, 0.0, 
         };
-        const mat3<real_t> mat_reciprocal_fcc{
+        const mat3_t<real_t> mat_reciprocal_fcc{
             -fourpi, fourpi, twopi,
             0.0, 0.0, twopi,
             fourpi, 0.0, -twopi,
         };
-        const mat3<int> mat_invrecip_fcc{
+        const mat3_t<int> mat_invrecip_fcc{
             0, 1, 1,
             1, 0, 1,
             0, 2, 0,
         };
-        const std::vector<vec3<real_t>> normals_fcc{
+        const std::vector<vec3_t<real_t>> normals_fcc{
             {twopi,0.,0.},{-twopi,0.,0.},
             {0.,twopi,0.},{0.,-twopi,0.},
             {0.,0.,twopi},{0.,0.,-twopi},
@@ -152,7 +152,7 @@ private:
         auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
 
         //! Ewald summation: short-range Green's function
-        auto add_greensftide_sr = [&]( mat3<real_t>& D, const vec3<real_t>& d ) -> void {
+        auto add_greensftide_sr = [&]( mat3_t<real_t>& D, const vec3_t<real_t>& d ) -> void {
             auto r = d.norm();
             if( r< 1e-14 ) return; // return zero for r=0
 
@@ -170,7 +170,7 @@ private:
         };
 
         //! Ewald summation: long-range Green's function
-        auto add_greensftide_lr = [&]( mat3<real_t>& D, const vec3<real_t>& k, const vec3<real_t>& r ) -> void {
+        auto add_greensftide_lr = [&]( mat3_t<real_t>& D, const vec3_t<real_t>& k, const vec3_t<real_t>& r ) -> void {
             real_t kmod2 = k.norm_squared();
             real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm;
             for( int mu=0; mu<3; ++mu ){
@@ -195,22 +195,22 @@ private:
         constexpr ptrdiff_t lnumber = 3, knumber = 3;
         const int numb = 1; //!< search radius when shifting vectors into FBZ
 
-        vectk_.assign(D_xx_.memsize(),vec3<real_t>());
-        ico_.assign(D_xx_.memsize(),vec3<int>());
-        vecitk_.assign(D_xx_.memsize(),vec3<int>());
+        vectk_.assign(D_xx_.memsize(),vec3_t<real_t>());
+        ico_.assign(D_xx_.memsize(),vec3_t<int>());
+        vecitk_.assign(D_xx_.memsize(),vec3_t<int>());
 
         #pragma omp parallel 
         {
             //... temporary to hold values of the dynamical matrix 
-            mat3<real_t> matD(0.0);
+            mat3_t<real_t> matD(0.0);
 
             #pragma omp for
             for( ptrdiff_t i=0; i<nlattice; ++i ){
                 for( ptrdiff_t j=0; j<nlattice; ++j ){
                     for( ptrdiff_t k=0; k<nlattice; ++k ){
                         // compute lattice site vector from (i,j,k) multiplying Bravais base matrix, and wrap back to box
-                        const vec3<real_t> x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)});
-                        const vec3<real_t> ar = (mat_bravais * x_ijk).wrap_abs();
+                        const vec3_t<real_t> x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)});
+                        const vec3_t<real_t> ar = (mat_bravais * x_ijk).wrap_abs();
 
                         //... zero temporary matrix
                         matD.zero();        
@@ -219,8 +219,8 @@ private:
                         for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
                             for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
                                 for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){      
-                                    const vec3<real_t> n_ijk({real_t(ix),real_t(iy),real_t(iz)});            
-                                    const vec3<real_t> dr(ar - mat_bravais * n_ijk);
+                                    const vec3_t<real_t> n_ijk({real_t(ix),real_t(iy),real_t(iz)});            
+                                    const vec3_t<real_t> dr(ar - mat_bravais * n_ijk);
                                     add_greensftide_sr(matD, dr);
                                 }
                             }
@@ -231,8 +231,8 @@ private:
                             for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){
                                 for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){                      
                                     if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
-                                        const vec3<real_t> k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice});
-                                        const vec3<real_t> ak( mat_reciprocal * k_ijk);
+                                        const vec3_t<real_t> k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice});
+                                        const vec3_t<real_t> ak( mat_reciprocal * k_ijk);
 
                                         add_greensftide_lr(matD, ak, ar );
                                     }
@@ -278,7 +278,7 @@ private:
 
         std::ofstream ofs2("test_brillouin.txt");
 #endif
-        using map_t = std::map<vec3<int>,size_t>;
+        using map_t = std::map<vec3_t<int>,size_t>;
         map_t iimap;
             
         //!=== Make temporary copies before resorting to std. Fourier grid ========!//
@@ -312,8 +312,8 @@ private:
         #pragma omp parallel 
         {
             // thread private matrix representation
-            mat3<real_t> D;
-            vec3<real_t> eval, evec1, evec2, evec3;
+            mat3_t<real_t> D;
+            vec3_t<real_t> eval, evec1, evec2, evec3_t;
 
             #pragma omp for
             for( size_t i=0; i<D_xx_.size(0); i++ )
@@ -322,7 +322,7 @@ private:
                 {
                     for( size_t k=0; k<D_xx_.size(2); k++ )
                     {
-                        vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                        vec3_t<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
                         
                         // put matrix elements into actual matrix
                         D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12;
@@ -333,12 +333,12 @@ private:
                         D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12;
 
                         // compute eigenstructure of matrix
-                        D.eigen(eval, evec1, evec2, evec3);
-                        evec3 /= (twopi*ngrid_);
+                        D.eigen(eval, evec1, evec2, evec3_t);
+                        evec3_t /= (twopi*ngrid_);
 
                         // now determine to which modes on the regular lattice this contributes
-                        vec3<real_t> ar = kv / (twopi*ngrid_);
-                        vec3<real_t> a(mat_reciprocal * ar);
+                        vec3_t<real_t> ar = kv / (twopi*ngrid_);
+                        vec3_t<real_t> a(mat_reciprocal * ar);
                         
                         // translate the k-vectors into the "candidate" FBZ
                         for( int l1=-numb; l1<=numb; ++l1 ){
@@ -347,9 +347,9 @@ private:
                                     // need both halfs of Fourier space since we use real transforms
                                     for( int isign=0; isign<=1; ++isign ){
                                         const real_t sign = 2.0*real_t(isign)-1.0; 
-                                        const vec3<real_t> vshift({real_t(l1),real_t(l2),real_t(l3)});
+                                        const vec3_t<real_t> vshift({real_t(l1),real_t(l2),real_t(l3)});
 
-                                        vec3<real_t> vectk = sign * a + mat_reciprocal * vshift;
+                                        vec3_t<real_t> vectk = sign * a + mat_reciprocal * vshift;
 
                                         if( check_FBZ( normals, vectk ) )
                                         {
@@ -358,11 +358,11 @@ private:
                                             int iz = std::round(vectk.z*(ngrid_)/twopi);
 
                                             #pragma omp critical
-                                            {iimap.insert( std::pair<vec3<int>,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );}
+                                            {iimap.insert( std::pair<vec3_t<int>,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );}
 
                                             temp1.kelem(i,j,k) = ccomplex_t(eval[2],eval[1]);
-                                            temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3.x);
-                                            temp3.kelem(i,j,k) = ccomplex_t(evec3.y,evec3.z);
+                                            temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3_t.x);
+                                            temp3.kelem(i,j,k) = ccomplex_t(evec3_t.y,evec3_t.z);
                                         }
                                     }//sign
                                 } //l3
@@ -389,24 +389,24 @@ private:
                     int ii = (int(i)>nlattice/2)? int(i)-nlattice : int(i);
                     int jj = (int(j)>nlattice/2)? int(j)-nlattice : int(j);
                     int kk = (int(k)>nlattice/2)? int(k)-nlattice : int(k);
-                    vec3<real_t> kv({real_t(ii),real_t(jj),real_t(kk)});
+                    vec3_t<real_t> kv({real_t(ii),real_t(jj),real_t(kk)});
 
-                    auto align_with_k = [&]( const vec3<real_t>& v ) -> vec3<real_t>{
+                    auto align_with_k = [&]( const vec3_t<real_t>& v ) -> vec3_t<real_t>{
                         return v*((v.dot(kv)<0.0)?-1.0:1.0);
                     };
 
-                    vec3<real_t> v, l;
+                    vec3_t<real_t> v, l;
                     map_t::iterator it;
                     
                     if( !is_in(i,j,k,mat_invrecip)  ){
-                        auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3<real_t>& v, vec3<real_t>& l ) {
+                        auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3_t<real_t>& v, vec3_t<real_t>& l ) {
                             v = 0.0; l = 0.0;
                             int count(0);
                             
                             auto add_lv = [&]( auto it ) -> void {
                                 auto q = it->second;++count;
-                                l += vec3<real_t>({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))});
-                                v += align_with_k(vec3<real_t>({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))}));
+                                l += vec3_t<real_t>({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))});
+                                v += align_with_k(vec3_t<real_t>({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))}));
                             };
                             map_t::iterator it;
                             if( (it = iimap.find({ii-1,jj,kk}))!=iimap.end() ){ add_lv(it); }
@@ -423,8 +423,8 @@ private:
                     }else{
                         if( (it = iimap.find({ii,jj,kk}))!=iimap.end() ){
                             auto q = it->second;
-                            l = vec3<real_t>({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))});
-                            v = align_with_k(vec3<real_t>({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))}));
+                            l = vec3_t<real_t>({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))});
+                            v = align_with_k(vec3_t<real_t>({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))}));
                         }
                     }
                     D_xx_.kelem(i,j,k) = l[0];
@@ -443,13 +443,13 @@ private:
             for( size_t j=0; j<D_xx_.size(1); j++ ){
                 for( size_t k=0; k<D_xx_.size(2); k++ )
                 {
-                    vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
+                    vec3_t<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
 
                     double mu1 = std::real(D_xx_.kelem(i,j,k));
                     // double mu2 = std::real(D_xy_.kelem(i,j,k));
                     // double mu3 = std::real(D_xz_.kelem(i,j,k));
 
-                    vec3<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))});
+                    vec3_t<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))});
                     evec1 /= evec1.norm();
 
                     // ///////////////////////////////////
@@ -457,7 +457,7 @@ private:
                     
                     real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ): 0.0;
                     real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi);
-                    vec3<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
+                    vec3_t<real_t> e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 );
 
                     // re-normalise to that longitudinal amplitude is exact
                     double renorm = evec1.dot( e_r ); if( renorm < 0.01 ) renorm = 1.0;
diff --git a/include/vec3.hh b/include/vec3.hh
index 4e72d81..3d1fe44 100644
--- a/include/vec3.hh
+++ b/include/vec3.hh
@@ -1,5 +1,5 @@
 /*******************************************************************\
- vec3.hh - This file is part of MUSIC2 -
+ vec3_t.hh - This file is part of MUSIC2 -
  a code to generate initial conditions for cosmological simulations 
  
  CHANGELOG (only majors, for details see repo):
@@ -9,7 +9,7 @@
 
 //! implements a simple class of 3-vectors of arbitrary scalar type
 template< typename T >
-class vec3{
+class vec3_t{
 private:
     //! holds the data
     std::array<T,3> data_;
@@ -19,27 +19,27 @@ public:
     T &x,&y,&z;
 
     //! empty constructor
-    vec3()
+    vec3_t()
     : x(data_[0]),y(data_[1]),z(data_[2]){}
 
     //! copy constructor
-    vec3( const vec3<T> &v)
+    vec3_t( const vec3_t<T> &v)
     : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
 
     //! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference
-    vec3( vec3<T>& v)
+    vec3_t( vec3_t<T>& v)
     : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
 
     //! move constructor
-    vec3( vec3<T> &&v)
+    vec3_t( vec3_t<T> &&v)
     : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){}
 
-    //! construct vec3 from initializer list
+    //! construct vec3_t from initializer list
     template<typename ...E>
-    vec3(E&&...e) 
+    vec3_t(E&&...e) 
     : data_{{std::forward<E>(e)...}}, x{data_[0]}, y{data_[1]}, z{data_[2]}
     {}
-    // vec3( T a, T b, T c ) 
+    // vec3_t( T a, T b, T c ) 
     // : data_{{a,b,c}}, x(data_[0]), y(data_[1]), z(data_[2]){}
     
     //! bracket index access to vector components
@@ -49,37 +49,37 @@ public:
     const T &operator[](size_t i) const noexcept { return data_[i]; }
 
     // assignment operator
-    vec3<T>& operator=( const vec3<T>& v ) noexcept { data_=v.data_; return *this; }
+    vec3_t<T>& operator=( const vec3_t<T>& v ) noexcept { data_=v.data_; return *this; }
 
-    //! implementation of summation of vec3
-    vec3<T> operator+( const vec3<T>& v ) const noexcept{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
+    //! implementation of summation of vec3_t
+    vec3_t<T> operator+( const vec3_t<T>& v ) const noexcept{ return vec3_t<T>({x+v.x,y+v.y,z+v.z}); }
 
-    //! implementation of difference of vec3
-    vec3<T> operator-( const vec3<T>& v ) const noexcept{ return vec3<T>({x-v.x,y-v.y,z-v.z}); }
+    //! implementation of difference of vec3_t
+    vec3_t<T> operator-( const vec3_t<T>& v ) const noexcept{ return vec3_t<T>({x-v.x,y-v.y,z-v.z}); }
 
     //! implementation of unary negative
-    vec3<T> operator-() const noexcept{ return vec3<T>({-x,-y,-z}); }
+    vec3_t<T> operator-() const noexcept{ return vec3_t<T>({-x,-y,-z}); }
 
     //! implementation of scalar multiplication
-    vec3<T> operator*( T s ) const noexcept{ return vec3<T>({x*s,y*s,z*s}); }
+    vec3_t<T> operator*( T s ) const noexcept{ return vec3_t<T>({x*s,y*s,z*s}); }
 
     //! implementation of scalar division
-    vec3<T> operator/( T s ) const noexcept{ return vec3<T>({x/s,y/s,z/s}); }
+    vec3_t<T> operator/( T s ) const noexcept{ return vec3_t<T>({x/s,y/s,z/s}); }
 
     //! implementation of += operator
-    vec3<T>& operator+=( const vec3<T>& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; }
+    vec3_t<T>& operator+=( const vec3_t<T>& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; }
 
     //! implementation of -= operator
-    vec3<T>& operator-=( const vec3<T>& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; }
+    vec3_t<T>& operator-=( const vec3_t<T>& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; }
 
     //! multiply with scalar
-    vec3<T>& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; }
+    vec3_t<T>& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; }
     
     //! divide by scalar
-    vec3<T>& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; }
+    vec3_t<T>& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; }
 
     //! compute dot product with another vector
-    T dot(const vec3<T> &a) const noexcept
+    T dot(const vec3_t<T> &a) const noexcept
     {
         return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
     }
@@ -91,19 +91,19 @@ public:
     T norm(void) const noexcept { return std::sqrt( this->norm_squared() ); }
 
     //! wrap absolute vector to box of size p
-    vec3<T>& wrap_abs( T p = 1.0 ) noexcept{
+    vec3_t<T>& wrap_abs( T p = 1.0 ) noexcept{
         for( auto& x : data_ ) x = std::fmod( 2*p + x, p );
         return *this;
     }
 
     //! wrap relative vector to box of size p
-    vec3<T>& wrap_rel( T p = 1.0 ) noexcept{
+    vec3_t<T>& wrap_rel( T p = 1.0 ) noexcept{
         for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x;
         return *this;
     }
 
-    //! ordering, allows 3d sorting of vec3s
-    bool operator<( const vec3<T>& o ) const noexcept{
+    //! ordering, allows 3d sorting of vec3_ts
+    bool operator<( const vec3_t<T>& o ) const noexcept{
         if( x!=o.x ) return x<o.x?true:false;
         if( y!=o.y ) return y<o.y?true:false;
         if( z!=o.z ) return z<o.z?true:false;
@@ -113,6 +113,6 @@ public:
 
 //! multiplication with scalar
 template<typename T>
-vec3<T> operator*( T s, const vec3<T>& v ){
-    return vec3<T>({v.x*s,v.y*s,v.z*s});
+vec3_t<T> operator*( T s, const vec3_t<T>& v ){
+    return vec3_t<T>({v.x*s,v.y*s,v.z*s});
 }
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index 5ae6b24..a1b1912 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -860,7 +860,7 @@ void Grid_FFT<data_t,bdistributed>::Compute_PowerSpectrum(std::vector<double> &b
         for (size_t iy = 0; iy < size(1); iy++)
             for (size_t iz = 0; iz < size(2); iz++)
             {
-                vec3<double> k3 = get_k<double>(ix, iy, iz);
+                vec3_t<double> k3 = get_k<double>(ix, iy, iz);
                 double k = k3.norm();
                 int idx2 = k / dk; //int((1.0f / dklog * std::log10(k / kmin)));
                 auto z = this->kelem(ix, iy, iz);
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index ce86444..708e12b 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -320,7 +320,7 @@ int Run( ConfigFile& the_config )
 
         if (bAddExternalTides)
         {
-            phi2.assign_function_of_grids_kdep([&](vec3<real_t> kvec, ccomplex_t pphi, ccomplex_t pphi2) {
+            phi2.assign_function_of_grids_kdep([&](vec3_t<real_t> kvec, ccomplex_t pphi, ccomplex_t pphi2) {
                 // sign in front of f_aniso is reversed since phi1 = -phi
                 return pphi2 + f_aniso * (kvec[0] * kvec[0] * lss_aniso_lambda[0] + kvec[1] * kvec[1] * lss_aniso_lambda[1] + kvec[2] * kvec[2] * lss_aniso_lambda[2]) * pphi;
             },
@@ -569,7 +569,7 @@ int Run( ConfigFile& the_config )
                                     + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) );
 
                                 if( bDoBaryons ){
-                                    vec3<real_t> kvec = phi.get_k<real_t>(i,j,k);
+                                    vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
                                     double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) :
                                      (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : 
@@ -618,7 +618,7 @@ int Run( ConfigFile& the_config )
                                         + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) );
 
                                 if( bDoBaryons ){
-                                    vec3<real_t> kvec = phi.get_k<real_t>(i,j,k);
+                                    vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
                                     double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, vcdm0) :
                                      (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : 
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index e6e2c00..079b633 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -223,13 +223,14 @@ public:
       gsl_spline *splineT = nullptr;
       gsl_interp_accel *accT = nullptr;
       switch(type){
+        // values at ztarget:
           case total:   splineT = gsl_sp_dtot_; accT = gsl_ia_dtot_; break;
           case cdm:     splineT = gsl_sp_dc_;   accT = gsl_ia_dc_;   break;
           case baryon:  splineT = gsl_sp_db_;   accT = gsl_ia_db_;   break;
           case vtotal:  splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break;
           case vcdm:    splineT = gsl_sp_tc_;   accT = gsl_ia_tc_;   break;
           case vbaryon: splineT = gsl_sp_tb_;   accT = gsl_ia_tb_;   break;
-
+        // values at zstart:
           case total0:  splineT = gsl_sp_dtot0_;accT = gsl_ia_dtot0_;break;
           case cdm0:    splineT = gsl_sp_dc0_;  accT = gsl_ia_dc0_;  break;
           case baryon0: splineT = gsl_sp_db0_;  accT = gsl_ia_db0_;  break;

From 8423161d6b1714905642e1b5ac69e5c5ee2f2bca Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 29 Mar 2020 14:46:25 +0200
Subject: [PATCH 081/130] added new vector type for vectorized operations

---
 include/vec.hh | 144 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)
 create mode 100644 include/vec.hh

diff --git a/include/vec.hh b/include/vec.hh
new file mode 100644
index 0000000..dd914b0
--- /dev/null
+++ b/include/vec.hh
@@ -0,0 +1,144 @@
+#pragma once
+/*******************************************************************************\
+ vec.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************************/
+
+#include <array>
+
+//! implements general N-dim vectors of arbitrary primtive type with some arithmetic ops
+template <int N, typename T = double>
+struct vec_t
+{
+  std::array<T, N> data_;
+
+  vec_t() {}
+
+  vec_t(const vec_t<N, T> &v)
+      : data_(v.data_) {}
+
+  vec_t(vec_t<N, T> &&v)
+      : data_(std::move(v.data_)) {}
+
+  template <typename... E>
+  vec_t(E... e)
+      : data_{{std::forward<E>(e)...}}
+  {
+    static_assert(sizeof...(E) == N, "Brace-enclosed initialiser list doesn't match vec_t length!");
+  }
+
+  //! bracket index access to vector components
+  T &operator[](size_t i) noexcept { return data_[i]; }
+
+  //! const bracket index access to vector components
+  const T &operator[](size_t i) const noexcept { return data_[i]; }
+
+  // assignment operator
+  vec_t<N, T> &operator=(const vec_t<N, T> &v) noexcept
+  {
+    data_ = v.data_;
+    return *this;
+  }
+
+  //! implementation of summation of vec_t
+  vec_t<N, T> operator+(const vec_t<N, T> &v) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = data_[i] + v[i];
+    return res;
+  }
+
+  //! implementation of difference of vec_t
+  vec_t<N, T> operator-(const vec_t<N, T> &v) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = data_[i] - v[i];
+    return res;
+  }
+
+  //! implementation of unary negative
+  vec_t<N, T> operator-() const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = -data_[i];
+    return res;
+  }
+
+  //! implementation of scalar multiplication
+  template <typename T2>
+  vec_t<N, T> operator*(T2 s) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = data_[i] * s;
+    return res;
+  }
+
+  //! implementation of scalar division
+  vec_t<N, T> operator/(T s) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = data_[i] / s;
+    return res;
+  }
+
+  //! takes the absolute value of each element
+  vec_t<N, T> abs(void) const noexcept
+  {
+    vec_t<N, T> res;
+    for (int i = 0; i < N; ++i)
+      res[i] = std::abs(data_[i]);
+    return res;
+  }
+
+  //! implementation of implicit summation of vec_t
+  vec_t<N, T> &operator+=(const vec_t<N, T> &v) noexcept
+  {
+    for (int i = 0; i < N; ++i)
+      data_[i] += v[i];
+    return *this;
+  }
+
+  //! implementation of implicit subtraction of vec_t
+  vec_t<N, T> &operator-=(const vec_t<N, T> &v) noexcept
+  {
+    for (int i = 0; i < N; ++i)
+      data_[i] -= v[i];
+    return *this;
+  }
+
+  //! implementation of implicit scalar multiplication of vec_t
+  vec_t<N, T> &operator*=(T s) noexcept
+  {
+    for (int i = 0; i < N; ++i)
+      data_[i] *= s;
+    return *this;
+  }
+
+  //! implementation of implicit scalar division of vec_t
+  vec_t<N, T> &operator/=(T s) noexcept
+  {
+    for (int i = 0; i < N; ++i)
+      data_[i] /= s;
+    return *this;
+  }
+
+  size_t size(void) const noexcept { return N; }
+};
+
+//! multiplication with scalar
+template <typename T2, int N, typename T = double>
+inline vec_t<N, T> operator*(T2 s, const vec_t<N, T> &v)
+{
+  vec_t<N, T> res;
+  for (int i = 0; i < N; ++i)
+    res[i] = v[i] * s;
+  return res;
+}

From 23edbd63a93ce7e128388bba46557330f064e747 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 29 Mar 2020 14:46:53 +0200
Subject: [PATCH 082/130] added custom ODE integration module

---
 include/ode_integrate.hh | 103 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 include/ode_integrate.hh

diff --git a/include/ode_integrate.hh b/include/ode_integrate.hh
new file mode 100644
index 0000000..3858b85
--- /dev/null
+++ b/include/ode_integrate.hh
@@ -0,0 +1,103 @@
+#pragma once
+/*******************************************************************************\
+ odetools.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************************/
+
+namespace ode_integrate
+{
+
+// simple Runge-Kutta 4th order step without error estimate
+template <typename vector_t, typename function_t>
+inline void rk4_step(double h, double &t, vector_t &y, function_t f)
+{
+    vector_t k1(h * f(t, y));
+    vector_t k2(h * f(t + h / 2, y + k1 / 2));
+    vector_t k3(h * f(t + h / 2, y + k2 / 2));
+    vector_t k4(h * f(t + h, y + k3));
+    y += (k1 + 2 * k2 + 2 * k3 + k4) / 6;
+    t += h;
+}
+
+// Cash-Karp modified Runge-Kutta scheme, 5th order with 4th order error estimate
+// see Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration"
+// in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060
+template <typename vector_t, typename function_t>
+inline vector_t ckrk5_step(double h, double &t, vector_t &y, function_t f)
+{
+  static constexpr double
+      a2 = 0.20,
+      a3 = 0.30, a4 = 0.60, a5 = 1.0, a6 = 0.8750,
+      b21 = 0.20,
+      b31 = 3.0 / 40.0, b32 = 9.0 / 40.0,
+      b41 = 0.30, b42 = -0.90, b43 = 1.20,
+      b51 = -11.0 / 54.0, b52 = 2.50, b53 = -70.0 / 27.0, b54 = 35.0 / 27.0,
+      b61 = 1631.0 / 55296.0, b62 = 175.0 / 512.0, b63 = 575.0 / 13824.0, b64 = 44275.0 / 110592.0, b65 = 253.0 / 4096.0,
+      c1 = 37.0 / 378.0, c3 = 250.0 / 621.0, c4 = 125.0 / 594.0, c6 = 512.0 / 1771.0,
+      dc1 = c1 - 2825.0 / 27648.0, dc3 = c3 - 18575.0 / 48384.0,
+      dc4 = c4 - 13525.0 / 55296.0, dc5 = -277.0 / 14336.0, dc6 = c6 - 0.250;
+
+  vector_t k1(h * f(t, y));
+  vector_t k2(h * f(t + a2 * h, y + b21 * k1));
+  vector_t k3(h * f(t + a3 * h, y + b31 * k1 + b32 * k2));
+  vector_t k4(h * f(t + a4 * h, y + b41 * k1 + b42 * k2 + b43 * k3));
+  vector_t k5(h * f(t + a5 * h, y + b51 * k1 + b52 * k2 + b53 * k3 + b54 * k4));
+  vector_t k6(h * f(t + a6 * h, y + b61 * k1 + b62 * k2 + b63 * k3 + b64 * k4 + b65 * k5));
+
+  y += c1 * k1 + c3 * k3 + c4 * k4 + c6 * k6;
+
+  return dc1 * k1 + dc3 * k3 + dc4 * k4 + dc5 * k5 + dc6 * k6;
+}
+
+// Adaptive step-size quality-controlled routine for ckrk5_step, see
+// Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration"
+// in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060
+template <typename vector_t, typename function_t>
+inline void rk_step_qs(double htry, double &t, vector_t &y, vector_t &yscale, function_t f, double eps, double &hdid, double &hnext)
+{
+  static constexpr double SAFETY{0.9};
+  static constexpr double PSHRNK{-0.25};
+  static constexpr double PGROW{-0.2};
+  static constexpr double ERRCON{1.89e-4};
+
+  auto h(htry);
+  vector_t ytemp(y);
+  vector_t yerr;
+  double errmax;
+
+do_ckrk5trialstep:
+  yerr = ckrk5_step(h, t, ytemp, f);
+  errmax = 0.0;
+  for (size_t i = 0; i < yerr.size(); ++i)
+  {
+    errmax = std::max(errmax, std::abs(yerr[i] / yscale[i]));
+  }
+  errmax = errmax / eps;
+  if (errmax > 1.0)
+  {
+    h *= std::max(0.1, SAFETY*std::pow(errmax, PSHRNK));
+    if (t + h == t)
+    {
+      std::cerr << "stepsize underflow in rkqs" << std::endl;
+      abort();
+    }
+    goto do_ckrk5trialstep;
+  }
+  else
+  {
+    if( errmax > ERRCON ){
+      hnext = h * SAFETY * std::pow(errmax, PGROW);
+    }else{
+      hnext = 5*h;
+    }
+    hdid = h;
+    t += h;
+    y = ytemp;
+  }
+}
+
+
+} // namespace ode_integrate
\ No newline at end of file

From 0d3a17b253efdd3c5c3934572f26f3e787cbb385 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 29 Mar 2020 14:49:17 +0200
Subject: [PATCH 083/130] rewrote cosmology parameters and background evolution
 with full integration of 2nd order ODE

---
 include/cosmology_calculator.hh | 274 +++++++++++++++++++-------------
 include/cosmology_parameters.hh |  65 ++++++--
 include/ic_generator.hh         |   2 +-
 include/testing.hh              |   2 +-
 src/ic_generator.cc             |  12 +-
 src/testing.cc                  |   4 +-
 6 files changed, 226 insertions(+), 133 deletions(-)

diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index ba2d8ff..cb07a04 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -1,25 +1,44 @@
 #pragma once
 
 #include <array>
+#include <vec.hh>
 
 #include <cosmology_parameters.hh>
+#include <physical_constants.hh>
 #include <transfer_function_plugin.hh>
+#include <ode_integrate.hh>
 #include <logger.hh>
 
 #include <gsl/gsl_integration.h>
+#include <gsl/gsl_spline.h>
 #include <gsl/gsl_errno.h>
 
+namespace cosmology
+{
+
 /*!
- * @class CosmologyCalculator
+ * @class cosmology::calculator
  * @brief provides functions to compute cosmological quantities
  *
  * This class provides member functions to compute cosmological quantities
  * related to the Friedmann equations and linear perturbation theory
  */
-class CosmologyCalculator
+class calculator
 {
+public:
+    //! data structure to store cosmological parameters
+    cosmology::parameters cosmo_param_;
+
+    //! pointer to an instance of a transfer function plugin
+    //TransferFunction_plugin *ptransfer_fun_;
+    std::unique_ptr<TransferFunction_plugin> transfer_function_;
+
 private:
     static constexpr double REL_PRECISION = 1e-5;
+    std::vector<double> tab_a_, tab_D_, tab_f_;
+    gsl_interp_accel *gsl_ia_a_, *gsl_ia_D_, *gsl_ia_f_;
+    gsl_spline *gsl_sp_a_, *gsl_sp_D_, *gsl_sp_f_;
+    double Dnow_;
 
     real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const
     {
@@ -44,58 +63,136 @@ private:
         return (real_t)result;
     }
 
+    void compute_growth(void)
+    {
+        using v_t = vec_t<3, double>;
+
+        // set ICs
+        const double a0 = 1e-10;
+        const double D0 = a0;
+        const double Dprime0 = 2.0 * D0 * H_of_a(a0) / std::pow(phys_const::c_SI, 2);
+        const double t0 = 1.0 / (a0 * H_of_a(a0));
+
+        v_t y0({a0, D0, Dprime0});
+
+        // set up integration
+        double dt = 1e-9;
+        double dtdid, dtnext;
+        const double amax = 2.0;
+
+        v_t yy(y0);
+        double t = t0;
+        const double eps = 1e-10;
+
+        while (yy[0] < amax)
+        {
+            // RHS of ODEs
+            auto rhs = [&](double t, v_t y) -> v_t {
+                auto a = y[0];
+                auto D = y[1];
+                auto Dprime = y[2];
+                v_t dy;
+                // da/dtau = a^2 H(a)
+                dy[0] = a * a * H_of_a(a);
+                // d D/dtau
+                dy[1] = Dprime;
+                // d^2 D / dtau^2
+                dy[2] = -a * H_of_a(a) * Dprime + 3.0 / 2.0 * cosmo_param_.Omega_m * std::pow(cosmo_param_.H0, 2) * D / a;
+                return dy;
+            };
+
+            // scale by predicted value to get approx. constant fractional errors
+            v_t yyscale = yy.abs() + dt * rhs(t, yy).abs();
+            
+            // call integrator
+            ode_integrate::rk_step_qs(dt, t, yy, yyscale, rhs, eps, dtdid, dtnext);
+
+            tab_a_.push_back(yy[0]);
+            tab_D_.push_back(yy[1]);
+            tab_f_.push_back(yy[2]);
+
+            dt = dtnext;
+        }
+
+        // compute f, before we stored here D'
+        for (size_t i = 0; i < tab_a_.size(); ++i)
+        {
+            tab_f_[i] = std::log(tab_f_[i] / (tab_a_[i] * H_of_a(tab_a_[i]) * tab_D_[i]));
+            tab_D_[i] = std::log(tab_D_[i]);
+            tab_a_[i] = std::log(tab_a_[i]);
+        }
+
+        gsl_ia_D_ = gsl_interp_accel_alloc();
+        gsl_ia_f_ = gsl_interp_accel_alloc();
+
+        gsl_sp_D_ = gsl_spline_alloc(gsl_interp_cspline, tab_a_.size());
+        gsl_sp_f_ = gsl_spline_alloc(gsl_interp_cspline, tab_a_.size());
+
+        gsl_spline_init(gsl_sp_D_, &tab_a_[0], &tab_D_[0], tab_a_.size());
+        gsl_spline_init(gsl_sp_f_, &tab_a_[0], &tab_f_[0], tab_a_.size());
+
+        Dnow_ = std::exp(gsl_spline_eval(gsl_sp_D_, 0.0, gsl_ia_D_));
+    }
+
 public:
-    //! data structure to store cosmological parameters
-    CosmologyParameters cosmo_param_;
-
-    //! pointer to an instance of a transfer function plugin
-    //TransferFunction_plugin *ptransfer_fun_;
-    std::unique_ptr<TransferFunction_plugin> transfer_function_;
-
-
     //! constructor for a cosmology calculator object
     /*!
 	 * @param acosmo a cosmological parameters structure
 	 * @param pTransferFunction pointer to an instance of a transfer function object
 	 */
 
-    explicit CosmologyCalculator(ConfigFile &cf)
-    : cosmo_param_(cf)
-    {   
+    explicit calculator(ConfigFile &cf)
+        : cosmo_param_(cf)
+    {
         transfer_function_ = std::move(select_TransferFunction_plugin(cf));
         transfer_function_->intialise();
         cosmo_param_.pnorm = this->ComputePNorm();
         cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm);
-        csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" << " : " << (transfer_function_->tf_is_distinct()? "yes" : "no") << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "TF maximum wave number" << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
+        csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons"
+                    << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl;
+        csoca::ilog << std::setw(32) << std::left << "TF maximum wave number"
+                    << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
+
+        // pre-compute growth factors and store for interpolation
+        this->compute_growth();
+    }
+
+    ~calculator()
+    {
+        gsl_spline_free(gsl_sp_D_);
+        gsl_spline_free(gsl_sp_f_);
+        gsl_interp_accel_free(gsl_ia_D_);
+        gsl_interp_accel_free(gsl_ia_f_);
     }
 
     //! Write out a correctly scaled power spectrum at time a
-    void WritePowerspectrum( real_t a, std::string fname ) const
+    void write_powerspectrum(real_t a, std::string fname) const
     {
-        const real_t Dplus0 = this->CalcGrowthFactor(a) / this->CalcGrowthFactor(1.0);
+        const real_t Dplus0 = this->get_growth_factor(a);
 
-        if( CONFIG::MPI_task_rank==0 )
+        if (CONFIG::MPI_task_rank == 0)
         {
-            double kmin = std::max(1e-4,transfer_function_->get_kmin());
+            double kmin = std::max(1e-4, transfer_function_->get_kmin());
 
             // write power spectrum to a file
             std::ofstream ofs(fname.c_str());
-            std::stringstream ss; ss << " ,a=" << a <<"";
+            std::stringstream ss;
+            ss << " ,a=" << a << "";
             ofs << "# " << std::setw(18) << "k [h/Mpc]"
-                        << std::setw(20) << ("P_dtot(k"+ss.str()+"|BS)") 
-                        << std::setw(20) << ("P_dcdm(k"+ss.str()+"|BS)")
-                        << std::setw(20) << ("P_dbar(k"+ss.str()+"|BS)")
-                        << std::setw(20) << ("P_tcdm(k"+ss.str()+"|BS)") 
-                        << std::setw(20) << ("P_tbar(k"+ss.str()+"|BS)")
-                        << std::setw(20) << ("P_dtot(k"+ss.str()+")") 
-                        << std::setw(20) << ("P_dcdm(k"+ss.str()+")")
-                        << std::setw(20) << ("P_dbar(k"+ss.str()+")")
-                        << std::setw(20) << ("P_tcdm(k"+ss.str()+")") 
-                        << std::setw(20) << ("P_tbar(k"+ss.str()+")")
-                        << std::setw(20) << ("P_dtot(K,a=1)")
-                        << std::endl;
-            for( double k=kmin; k<transfer_function_->get_kmax(); k*=1.05 ){
+                << std::setw(20) << ("P_dtot(k" + ss.str() + "|BS)")
+                << std::setw(20) << ("P_dcdm(k" + ss.str() + "|BS)")
+                << std::setw(20) << ("P_dbar(k" + ss.str() + "|BS)")
+                << std::setw(20) << ("P_tcdm(k" + ss.str() + "|BS)")
+                << std::setw(20) << ("P_tbar(k" + ss.str() + "|BS)")
+                << std::setw(20) << ("P_dtot(k" + ss.str() + ")")
+                << std::setw(20) << ("P_dcdm(k" + ss.str() + ")")
+                << std::setw(20) << ("P_dbar(k" + ss.str() + ")")
+                << std::setw(20) << ("P_tcdm(k" + ss.str() + ")")
+                << std::setw(20) << ("P_tbar(k" + ss.str() + ")")
+                << std::setw(20) << ("P_dtot(K,a=1)")
+                << std::endl;
+            for (double k = kmin; k < transfer_function_->get_kmax(); k *= 1.05)
+            {
                 ofs << std::setw(20) << std::setprecision(10) << k
                     << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total) * Dplus0, 2.0)
                     << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm) * Dplus0, 2.0)
@@ -111,11 +208,10 @@ public:
                     << std::endl;
             }
         }
-
         csoca::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl;
     }
 
-    const CosmologyParameters &GetParams(void) const
+    const cosmology::parameters &GetParams(void) const
     {
         return cosmo_param_;
     }
@@ -128,90 +224,44 @@ public:
 	 */
     inline real_t Power(real_t k, real_t a)
     {
-        real_t Dplus = CalcGrowthFactor(a);
-        real_t DplusOne = CalcGrowthFactor(1.0);
+        real_t Dplus = this->get_growth_factor(a);
         real_t pNorm = ComputePNorm();
-        Dplus /= DplusOne;
-        DplusOne = 1.0;
-        real_t scale = Dplus / DplusOne;
-        return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect);
+        return pNorm * Dplus * Dplus * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect);
     }
 
-    inline static double H_of_a(double a, const void *Params)
+    //! return the value of the Hubble function H(a) = dloga/dt 
+    inline double H_of_a(double a) const
     {
-        const CosmologyParameters *cosm = (CosmologyParameters *)Params;
-        double a2 = a * a;
-        double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a));
-        return Ha;
+        double HH2 = 0.0;
+        HH2 += cosmo_param_.Omega_r / (a * a * a * a);
+        HH2 += cosmo_param_.Omega_m / (a * a * a);
+        HH2 += cosmo_param_.Omega_k / (a * a);
+        HH2 += cosmo_param_.Omega_DE * std::pow(a, -3. * (1. + cosmo_param_.w_0 + cosmo_param_.w_a)) * exp(-3. * (1.0 - a) * cosmo_param_.w_a);
+        return cosmo_param_.H0 * std::sqrt(HH2);
     }
 
-    inline double H_of_a( double a ) const
+    //! Computes the linear theory growth factor D+, normalised to D+(a=1)=1
+    real_t get_growth_factor(real_t a) const
     {
-        return 100.0 * this->H_of_a(a,reinterpret_cast<const void*>(&this->cosmo_param_));
+        return std::exp(gsl_spline_eval(gsl_sp_D_, std::log(a), gsl_ia_D_)) / Dnow_;
     }
 
-    inline static double Hprime_of_a(double a, void *Params) 
+    //! Computes the linear theory growth rate f
+    /*! Function computes (by interpolating on precalculated table)
+     *   f = dlog D+ / dlog a
+     */
+    real_t get_f(real_t a) const
     {
-        CosmologyParameters *cosm = (CosmologyParameters *)Params;
-        double a2 = a * a;
-        double H = H_of_a(a, Params);
-        double Hprime = 1 / (a * H) * (-1.5 * cosm->Omega_m / (a2 * a) - cosm->Omega_k / a2 - 1.5 * cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a) * (1. + cosm->w_0 + (1. - a) * cosm->w_a));
-        return Hprime;
-    }
-
-    //! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+
-    inline static double GrowthIntegrand(double a, void *Params) 
-    {
-        double Ha = a * H_of_a(a, Params);
-        return 2.5 / (Ha * Ha * Ha);
-    }
-
-    //! integrand function for Calc_fPeebles
-	/*!
-	 * @sa Calc_fPeebles
-	 */
-	inline static double fIntegrand( double a, void *Params )
-	{
-		CosmologyParameters *cosm = (CosmologyParameters *)Params;
-		double y = cosm->Omega_m*(1.0/a-1.0) + cosm->Omega_DE*(a*a-1.0) + 1.0;
-		return 1.0/pow(y,1.5);
-	}
-	
-	//! calculates d log D+/d log a
-	/*! this version follows the Peebles (TBD: add citation)
-	 *  formula to compute Bertschinger's vfact
-	 */
-	inline real_t CalcGrowthRate( real_t a )
-	{
-        return CalcVFact(a) / H_of_a(a) / a;
-	}
-
-    //! Computes the linear theory growth factor D+
-    /*! Function integrates over member function GrowthIntegrand and computes
-    *                      /a
-    *   D+(a) = 5/2 H(a) * |  [a'^3 * H(a')^3]^(-1) da'
-    *                      /0
-    */
-    real_t CalcGrowthFactor(real_t a) const
-    {
-        real_t integral = integrate(&GrowthIntegrand, 0.0, a, (void *)&cosmo_param_);
-        return H_of_a(a, (void *)&cosmo_param_) * integral;
+        return std::exp(gsl_spline_eval(gsl_sp_f_, std::log(a), gsl_ia_f_));
     }
 
     //! Compute the factor relating particle displacement and velocity
     /*! Function computes
-    *
-    *  vfac = a^2 * H(a) * dlogD+ / d log a = a^2 * H'(a) + 5/2 * [ a * D+(a) * H(a) ]^(-1)
-    *
-    */
-    real_t CalcVFact(real_t a) const
+     *  vfac = a * (H(a)/h) * dlogD+ / dlog a 
+     */
+    real_t get_vfact(real_t a) const
     {
-        real_t Dp = CalcGrowthFactor(a);
-        real_t H = H_of_a(a, (void *)&cosmo_param_);
-        real_t Hp = Hprime_of_a(a, (void *)&cosmo_param_);
-        real_t a2 = a * a;
-
-        return (a2 * Hp + 2.5 / (a * Dp * H)) * 100.0;
+        return a * H_of_a(a) / cosmo_param_.h * this->get_f(a);
     }
 
     //! Integrand for the sigma_8 normalization of the power spectrum
@@ -222,8 +272,8 @@ public:
         if (k <= 0.0)
             return 0.0f;
 
-        CosmologyCalculator *pcc = reinterpret_cast<CosmologyCalculator*>(pParams);
-        
+        cosmology::calculator *pcc = reinterpret_cast<cosmology::calculator *>(pParams);
+
         double x = k * 8.0;
         double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
         static double nspect = (double)pcc->cosmo_param_.nspect;
@@ -241,8 +291,8 @@ public:
         if (k <= 0.0)
             return 0.0f;
 
-        CosmologyCalculator *pcc = reinterpret_cast<CosmologyCalculator*>(pParams);
-       
+        cosmology::calculator *pcc = reinterpret_cast<cosmology::calculator *>(pParams);
+
         double x = k * 8.0;
         double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
         static double nspect = (double)pcc->cosmo_param_.nspect;
@@ -286,9 +336,9 @@ public:
         kmin = transfer_function_->get_kmin();
 
         if (!transfer_function_->tf_has_total0())
-            sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this );
+            sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this);
         else
-            sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this );
+            sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this);
 
         return cosmo_param_.sigma8 * cosmo_param_.sigma8 / sigma0;
     }
@@ -305,4 +355,6 @@ inline double jeans_sound_speed(double rho, double mass)
 {
     const double G = 6.67e-8;
     return pow(6.0 * mass / M_PI * sqrt(rho) * pow(G, 1.5), 1.0 / 3.0);
-}
\ No newline at end of file
+}
+
+} // namespace cosmology
\ No newline at end of file
diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh
index 0c4efcd..228b20f 100644
--- a/include/cosmology_parameters.hh
+++ b/include/cosmology_parameters.hh
@@ -1,10 +1,21 @@
 #pragma once
+/*******************************************************************************\
+ cosmology_parameters.hh - This file is part of MUSIC2 -
+ a code to generate initial conditions for cosmological simulations 
+ 
+ CHANGELOG (only majors, for details see repo):
+    06/2019 - Oliver Hahn - first implementation
+\*******************************************************************************/
 
+#include <physical_constants.hh>
 #include <config_file.hh>
 
-//! structure for cosmological parameters
-struct CosmologyParameters
+namespace cosmology
 {
+//! structure for cosmological parameters
+struct parameters
+{
+
     double
         Omega_m,  //!< baryon+dark matter density
         Omega_b,  //!< baryon matter density
@@ -12,38 +23,68 @@ struct CosmologyParameters
         Omega_r,  //!< photon + relativistic particle density
         Omega_k,  //!< curvature density
         H0,       //!< Hubble constant in km/s/Mpc
+        h,        //!< hubble parameter
         nspect,   //!< long-wave spectral index (scale free is nspect=1)
         sigma8,   //!< power spectrum normalization
+        Tcmb,     //!< CMB temperature (used to set Omega_r)
+        Neff,     //!< effective number of neutrino species (used to set Omega_r)
         w_0,      //!< dark energy equation of state parameter 1: w = w0 + a * wa
         w_a,      //!< dark energy equation of state parameter 2: w = w0 + a * wa
 
         // below are helpers to store additional information
-        dplus, //!< linear perturbation growth factor
-        pnorm, //!< actual power spectrum normalisation factor
+        dplus,     //!< linear perturbation growth factor
+        f,         //!< growth factor logarithmic derivative
+        pnorm,     //!< actual power spectrum normalisation factor
         sqrtpnorm, //!< sqrt of power spectrum normalisation factor
-        vfact; //!< velocity<->displacement conversion factor in Zel'dovich approx.
+        vfact;     //!< velocity<->displacement conversion factor in Zel'dovich approx.
 
-    explicit CosmologyParameters(ConfigFile cf)
+    explicit parameters(ConfigFile cf)
     {
+        H0 = cf.GetValue<double>("cosmology", "H0");
+        h  = H0 / 100.0;
+
+        nspect = cf.GetValue<double>("cosmology", "nspec");
+
         Omega_b = cf.GetValue<double>("cosmology", "Omega_b");
+
         Omega_m = cf.GetValue<double>("cosmology", "Omega_m");
+
         Omega_DE = cf.GetValue<double>("cosmology", "Omega_L");
+
         w_0 = cf.GetValueSafe<double>("cosmology", "w0", -1.0);
+
         w_a = cf.GetValueSafe<double>("cosmology", "wa", 0.0);
 
-        Omega_r = cf.GetValueSafe<double>("cosmology", "Omega_r", 0.0); // no longer default to nonzero (8.3e-5)
-        Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r;
+        Tcmb = cf.GetValueSafe<double>("cosmology", "Tcmb", 2.725);
+
+        Neff = cf.GetValueSafe<double>("cosmology", "Neff", 3.04);
 
-        H0 = cf.GetValue<double>("cosmology", "H0");
         sigma8 = cf.GetValue<double>("cosmology", "sigma_8");
-        nspect = cf.GetValue<double>("cosmology", "nspec");
+
+        // calculate energy density in ultrarelativistic species from Tcmb and Neff
+        double Omega_gamma = 4 * phys_const::sigma_SI / std::pow(phys_const::c_SI, 3) * std::pow(Tcmb, 4.0) / phys_const::rhocrit_h2_SI / (h * h);
+        double Omega_nu = Neff * Omega_gamma * 7. / 8. * std::pow(4. / 11., 4. / 3.);
+
+        Omega_r = Omega_gamma + Omega_nu;
+
+        if (cf.GetValueSafe<bool>("cosmology", "NoRadiation", false))
+        {
+            Omega_r = 0.0;
+        }
+        else
+        {
+            csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally. Make sure your sim code supports this..." << std::endl;
+        }
+
+        Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r;
 
         dplus = 0.0;
         pnorm = 0.0;
         vfact = 0.0;
     }
 
-    CosmologyParameters(void)
+    parameters(void)
     {
     }
-};
\ No newline at end of file
+};
+} // namespace cosmology
\ No newline at end of file
diff --git a/include/ic_generator.hh b/include/ic_generator.hh
index 59471b0..2cf38f4 100644
--- a/include/ic_generator.hh
+++ b/include/ic_generator.hh
@@ -15,6 +15,6 @@ namespace ic_generator{
 
     extern std::unique_ptr<RNG_plugin> the_random_number_generator;
     extern std::unique_ptr<output_plugin> the_output_plugin;
-    extern std::unique_ptr<CosmologyCalculator>  the_cosmo_calc;
+    extern std::unique_ptr<cosmology::calculator>  the_cosmo_calc;
 
 }
diff --git a/include/testing.hh b/include/testing.hh
index e5d2a99..1683b09 100644
--- a/include/testing.hh
+++ b/include/testing.hh
@@ -35,7 +35,7 @@ namespace testing{
 
     void output_convergence(
         ConfigFile &the_config,
-        CosmologyCalculator* the_cosmo_calc,
+        cosmology::calculator* the_cosmo_calc,
         std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
         Grid_FFT<real_t> &phi,
         Grid_FFT<real_t> &phi2,
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 708e12b..49024df 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -22,13 +22,13 @@ namespace ic_generator{
 
 std::unique_ptr<RNG_plugin> the_random_number_generator;
 std::unique_ptr<output_plugin> the_output_plugin;
-std::unique_ptr<CosmologyCalculator>  the_cosmo_calc;
+std::unique_ptr<cosmology::calculator>  the_cosmo_calc;
 
 int Initialise( ConfigFile& the_config )
 {
     the_random_number_generator = std::move(select_RNG_plugin(the_config));
     the_output_plugin           = std::move(select_output_plugin(the_config));
-    the_cosmo_calc              = std::make_unique<CosmologyCalculator>(the_config);
+    the_cosmo_calc              = std::make_unique<cosmology::calculator>(the_config);
 
     return 0;
 }
@@ -120,7 +120,7 @@ int Run( ConfigFile& the_config )
     const real_t astart = 1.0/(1.0+zstart);
     const real_t volfac(std::pow(boxlen / ngrid / 2.0 / M_PI, 1.5));
 
-    the_cosmo_calc->WritePowerspectrum(astart, "input_powerspec.txt" );
+    the_cosmo_calc->write_powerspectrum(astart, "input_powerspec.txt" );
 
     //csoca::ilog << "-----------------------------------------------------------------------------" << std::endl;
 
@@ -132,8 +132,8 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
     // Compute LPT time coefficients
     //--------------------------------------------------------------------
-    const real_t Dplus0 = the_cosmo_calc->CalcGrowthFactor(astart) / the_cosmo_calc->CalcGrowthFactor(1.0);
-    const real_t vfac   = the_cosmo_calc->CalcVFact(astart);
+    const real_t Dplus0 = the_cosmo_calc->get_growth_factor(astart);
+    const real_t vfac   = the_cosmo_calc->get_vfact(astart);
 
     const double g1  = -Dplus0;
     const double g2  = ((LPTorder>1)? -3.0/7.0*Dplus0*Dplus0 : 0.0);
@@ -151,7 +151,7 @@ int Run( ConfigFile& the_config )
     // coefficients needed for anisotropic external tides
     const double ai3 = std::pow(astart,-3);
     const double Omega_m_of_a = the_cosmo_calc->cosmo_param_.Omega_m * ai3 / (the_cosmo_calc->cosmo_param_.Omega_m * ai3 + the_cosmo_calc->cosmo_param_.Omega_DE);
-    const double f1 = the_cosmo_calc->CalcGrowthRate(astart);
+    const double f1 = the_cosmo_calc->get_f(astart);
     const double f_aniso = -4.0/3.0 * f1 * f1 / Omega_m_of_a;
 
     const std::array<real_t,3> lss_aniso_alpha = {
diff --git a/src/testing.cc b/src/testing.cc
index e99fbb4..ff990e1 100644
--- a/src/testing.cc
+++ b/src/testing.cc
@@ -242,7 +242,7 @@ void output_velocity_displacement_symmetries(
 
 void output_convergence(
     ConfigFile &the_config,
-    CosmologyCalculator* the_cosmo_calc,
+    cosmology::calculator* the_cosmo_calc,
     std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
     Grid_FFT<real_t> &phi,
     Grid_FFT<real_t> &phi2,
@@ -301,7 +301,7 @@ void output_convergence(
                 }
             }
             nabla_vini_mn.FourierTransformBackward();
-            nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->CalcGrowthFactor(1.0));
+            nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->get_growth_factor(1.0));
             // sum of squares
             #pragma omp parallel for //collapse(3)
             for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) {

From 9932f38e3c24d8b03049de134b5df77e4bd0efdc Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 11:15:59 +0200
Subject: [PATCH 084/130] updated to class 2.8 submodule

---
 external/class | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/class b/external/class
index 58e0adb..083efeb 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit 58e0adbb2cf845cd0766a26cecc1a153fa17d8b9
+Subproject commit 083efeb043fca85418c1ea02f062be111b970b28

From f5a9006299a54567146084de3912741e82096f7d Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 11:18:21 +0200
Subject: [PATCH 085/130] some refactoring, mostly just homogenizing function
 names

---
 include/convolution.hh |   2 +-
 include/general.hh     | 136 ++++++++++++++++++++++++-----------------
 include/grid_fft.hh    |   2 +-
 src/grid_fft.cc        |   4 +-
 src/main.cc            |   2 +-
 5 files changed, 86 insertions(+), 60 deletions(-)

diff --git a/include/convolution.hh b/include/convolution.hh
index 2145445..a1fc1e3 100644
--- a/include/convolution.hh
+++ b/include/convolution.hh
@@ -333,7 +333,7 @@ public:
         crecvbuf_ = new ccomplex_t[maxslicesz_ / 2];
         recvbuf_ = reinterpret_cast<real_t *>(&crecvbuf_[0]);
 
-        int ntasks(MPI_Get_size());
+        int ntasks(MPI::get_size());
 
         offsets_.assign(ntasks, 0);
         offsetsp_.assign(ntasks, 0);
diff --git a/include/general.hh b/include/general.hh
index b7f7df3..7334579 100644
--- a/include/general.hh
+++ b/include/general.hh
@@ -7,13 +7,15 @@
 
 #if defined(USE_MPI)
 #include <mpi.h>
-  #include <fftw3-mpi.h>
+#include <fftw3-mpi.h>
 #else
-  #include <fftw3.h>
+#include <fftw3.h>
 #endif
 
 #include <config_file.hh>
 
+#define _unused(x) ((void)(x))
+
 #ifdef USE_SINGLEPRECISION
 using real_t = float;
 using complex_t = fftwf_complex;
@@ -24,9 +26,23 @@ using complex_t = fftw_complex;
 #define FFTW_PREFIX fftw
 #endif
 
-enum class fluid_component { density, vx, vy, vz, dx, dy, dz };
-enum class cosmo_species { dm, baryon, neutrino };
-extern std::map<cosmo_species,std::string> cosmo_species_name;
+enum class fluid_component
+{
+  density,
+  vx,
+  vy,
+  vz,
+  dx,
+  dy,
+  dz
+};
+enum class cosmo_species
+{
+  dm,
+  baryon,
+  neutrino
+};
+extern std::map<cosmo_species, std::string> cosmo_species_name;
 
 using ccomplex_t = std::complex<real_t>;
 
@@ -47,52 +63,64 @@ using fftw_plan_t = FFTW_GEN_NAME(FFTW_PREFIX, plan);
 #if defined(USE_MPI)
 inline double get_wtime()
 {
-    return MPI_Wtime();
+  return MPI_Wtime();
 }
 
-inline int MPI_Get_rank( void ){
-    int rank, ret;
-    ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-	assert( ret==MPI_SUCCESS );
-    return rank;
-}
-
-inline int MPI_Get_size( void ){
-    int size, ret;
-    ret = MPI_Comm_size(MPI_COMM_WORLD, &size);
-	assert( ret==MPI_SUCCESS );
-    return size;
-}
-
-template<typename T>
-MPI_Datatype GetMPIDatatype( void )
+namespace MPI
 {
-  if( typeid(T) == typeid(std::complex<float>) )
-    return MPI_COMPLEX;
-  
-  if( typeid(T) == typeid(std::complex<double>) )
-    return MPI_DOUBLE_COMPLEX;
 
-  if( typeid(T) == typeid(int) )
+inline int get_rank(void)
+{
+  int rank, ret;
+  ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  assert(ret == MPI_SUCCESS);
+  _unused(ret);
+  return rank;
+}
+
+inline int get_size(void)
+{
+  int size, ret;
+  ret = MPI_Comm_size(MPI_COMM_WORLD, &size);
+  assert(ret == MPI_SUCCESS);
+  _unused(ret);
+  return size;
+}
+
+template <typename T>
+inline MPI_Datatype get_datatype(void)
+{
+  if (typeid(T) == typeid(std::complex<float>))
+    return MPI_C_FLOAT_COMPLEX;
+
+  if (typeid(T) == typeid(std::complex<double>))
+    return MPI_C_DOUBLE_COMPLEX;
+
+  if (typeid(T) == typeid(std::complex<long double>))
+    return MPI_C_LONG_DOUBLE_COMPLEX;
+
+  if (typeid(T) == typeid(int))
     return MPI_INT;
 
-  if( typeid(T) == typeid(unsigned) )
+  if (typeid(T) == typeid(unsigned))
     return MPI_UNSIGNED;
 
-  if( typeid(T) == typeid(float) )
+  if (typeid(T) == typeid(float))
     return MPI_FLOAT;
 
-  if( typeid(T) == typeid(double) )
+  if (typeid(T) == typeid(double))
     return MPI_DOUBLE;
 
-  if( typeid(T) == typeid(char) )
+  if (typeid(T) == typeid(long double))
+    return MPI_LONG_DOUBLE;
+
+  if (typeid(T) == typeid(char))
     return MPI_CHAR;
 
   abort();
-
 }
 
-inline std::string GetMPIversion( void )
+inline std::string get_version(void)
 {
   int len;
   char mpi_lib_ver[MPI_MAX_LIBRARY_VERSION_STRING];
@@ -100,32 +128,31 @@ inline std::string GetMPIversion( void )
   MPI_Get_library_version(mpi_lib_ver, &len);
   return std::string(mpi_lib_ver);
 }
-
+} // namespace MPI
 
 #else
-  #if defined(_OPENMP)
-    #include <omp.h>
-    inline double get_wtime()
-    {
-      return omp_get_wtime();
-    }
-  #else
-    #include <ctime>
-    inline double get_wtime()
-    {
-      return std::clock() / double(CLOCKS_PER_SEC);
-    }
-  #endif
+#if defined(_OPENMP)
+#include <omp.h>
+inline double get_wtime()
+{
+  return omp_get_wtime();
+}
+#else
+#include <ctime>
+inline double get_wtime()
+{
+  return std::clock() / double(CLOCKS_PER_SEC);
+}
+#endif
 #endif
 
-inline void multitask_sync_barrier( void )
+inline void multitask_sync_barrier(void)
 {
 #if defined(USE_MPI)
-  MPI_Barrier( MPI_COMM_WORLD );
+  MPI_Barrier(MPI_COMM_WORLD);
 #endif
 }
 
-
 namespace CONFIG
 {
 extern int MPI_thread_support;
@@ -137,12 +164,11 @@ extern bool FFTW_threads_ok;
 extern int num_threads;
 } // namespace CONFIG
 
-
 // These variables are autogenerated and compiled
 // into the library by the version.cmake script
 extern "C"
 {
-    extern const char* GIT_TAG;
-    extern const char* GIT_REV;
-    extern const char* GIT_BRANCH;
+  extern const char *GIT_TAG;
+  extern const char *GIT_REV;
+  extern const char *GIT_BRANCH;
 }
\ No newline at end of file
diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 2cf5557..f460297 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -788,7 +788,7 @@ public:
 #if defined(USE_MPI)
                 data_t glob_sum = 0.0;
                 MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
-                            1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
+                            1, MPI::get_datatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
                 sum = glob_sum;
 #endif
             }
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index a1b1912..4905cb1 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -539,8 +539,8 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
 
     int mpi_size, mpi_rank;
 
-    mpi_size = MPI_Get_size();
-    mpi_rank = MPI_Get_rank();
+    mpi_size = MPI::get_size();
+    mpi_rank = MPI::get_rank();
 
     if (!file_exists(fname) && mpi_rank == 0)
         create_hdf5(fname);
diff --git a/src/main.cc b/src/main.cc
index 12cde3a..d2b97aa 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -146,7 +146,7 @@ int main( int argc, char** argv )
     // MPI related infos
 #if defined(USE_MPI)
     csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl;
-    csoca::dlog << std::setw(32) << std::left << "MPI version" << " : " << GetMPIversion() << std::endl;
+    csoca::dlog << std::setw(32) << std::left << "MPI version" << " : " << MPI::get_version() << std::endl;
 #else
     csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl;
 #endif

From 4e013ec0d112c7a69d5ecafd8f0f219eb1f9688c Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 11:18:50 +0200
Subject: [PATCH 086/130] added global exception handler

---
 src/main.cc | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/main.cc b/src/main.cc
index d2b97aa..5afc648 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -28,6 +28,20 @@ int  num_threads = 1;
 
 #include "system_stat.hh"
 
+#include <exception>
+#include <stdexcept>
+ 
+void handle_eptr(std::exception_ptr eptr) // passing by value is ok
+{
+    try {
+        if (eptr) {
+            std::rethrow_exception(eptr);
+        }
+    } catch(const std::exception& e) {
+        csoca::elog << "This happened: \"" << e.what() << "\"" << std::endl;
+    }
+}
+
 int main( int argc, char** argv )
 {
     csoca::Logger::SetLevel(csoca::LogLevel::Info);
@@ -175,6 +189,7 @@ int main( int argc, char** argv )
     {
         ic_generator::Initialise( the_config );
     }catch(...){
+        handle_eptr( std::current_exception() );
         csoca::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl;
         #if defined(USE_MPI) 
         MPI_Finalize();

From 809a03d59e7671182c680dedeb522e541d572d46 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 12:45:24 +0200
Subject: [PATCH 087/130] more parameters and verbosity in cosmo parameters

---
 include/cosmology_parameters.hh | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh
index 228b20f..1af692c 100644
--- a/include/cosmology_parameters.hh
+++ b/include/cosmology_parameters.hh
@@ -55,19 +55,18 @@ struct parameters
 
         w_a = cf.GetValueSafe<double>("cosmology", "wa", 0.0);
 
-        Tcmb = cf.GetValueSafe<double>("cosmology", "Tcmb", 2.725);
+        Tcmb = cf.GetValueSafe<double>("cosmology", "Tcmb", 2.7255);
 
-        Neff = cf.GetValueSafe<double>("cosmology", "Neff", 3.04);
+        Neff = cf.GetValueSafe<double>("cosmology", "Neff", 3.046);
 
         sigma8 = cf.GetValue<double>("cosmology", "sigma_8");
 
         // calculate energy density in ultrarelativistic species from Tcmb and Neff
         double Omega_gamma = 4 * phys_const::sigma_SI / std::pow(phys_const::c_SI, 3) * std::pow(Tcmb, 4.0) / phys_const::rhocrit_h2_SI / (h * h);
         double Omega_nu = Neff * Omega_gamma * 7. / 8. * std::pow(4. / 11., 4. / 3.);
-
         Omega_r = Omega_gamma + Omega_nu;
 
-        if (cf.GetValueSafe<bool>("cosmology", "NoRadiation", false))
+        if (cf.GetValueSafe<bool>("cosmology", "ZeroRadiation", false))
         {
             Omega_r = 0.0;
         }
@@ -75,8 +74,25 @@ struct parameters
         {
             csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally. Make sure your sim code supports this..." << std::endl;
         }
-
+#if 1
+        // assume zero curvature, take difference from dark energy
+        Omega_DE += 1.0 - Omega_m - Omega_DE - Omega_r;
+#else
+        // allow for curvature 
         Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r;
+#endif
+
+        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        csoca::ilog << "Cosmological parameters are: " << std::endl;
+        csoca::ilog << " H0       = " << std::setw(16) << H0          << "sigma_8  = " << std::setw(16) << sigma8 << std::endl;
+        csoca::ilog << " Omega_c  = " << std::setw(16) << Omega_m-Omega_b << "Omega_b  = " << std::setw(16) << Omega_b << std::endl;
+        if (!cf.GetValueSafe<bool>("cosmology", "ZeroRadiation", false)){
+            csoca::ilog << " Omega_g  = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl;
+        }else{
+            csoca::ilog << " Omega_r  = " << std::setw(16) << Omega_r << std::endl;
+        }
+        csoca::ilog << " Omega_DE = " << std::setw(16) << Omega_DE    << "nspect   = " << std::setw(16) << nspect << std::endl;
+        csoca::ilog << " w0       = " << std::setw(16) << w_0         << "w_a      = " << std::setw(16) << w_a << std::endl;
 
         dplus = 0.0;
         pnorm = 0.0;

From 4aba654797102c37ae33604860b857c51e30ff75 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 12:48:52 +0200
Subject: [PATCH 088/130] wrapped interpolation into its own class, also
 transfer functions can come pre-normalised now

---
 include/cosmology_calculator.hh     | 179 +++++++++++++---------------
 include/interpolate.hh              |  65 ++++++++++
 include/transfer_function_plugin.hh |   4 +-
 src/ic_generator.cc                 |  25 ++--
 4 files changed, 170 insertions(+), 103 deletions(-)
 create mode 100644 include/interpolate.hh

diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index cb07a04..1708637 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -9,8 +9,10 @@
 #include <ode_integrate.hh>
 #include <logger.hh>
 
+#include <interpolate.hh>
+
 #include <gsl/gsl_integration.h>
-#include <gsl/gsl_spline.h>
+// #include <gsl/gsl_spline.h>
 #include <gsl/gsl_errno.h>
 
 namespace cosmology
@@ -30,15 +32,12 @@ public:
     cosmology::parameters cosmo_param_;
 
     //! pointer to an instance of a transfer function plugin
-    //TransferFunction_plugin *ptransfer_fun_;
     std::unique_ptr<TransferFunction_plugin> transfer_function_;
 
 private:
-    static constexpr double REL_PRECISION = 1e-5;
-    std::vector<double> tab_a_, tab_D_, tab_f_;
-    gsl_interp_accel *gsl_ia_a_, *gsl_ia_D_, *gsl_ia_f_;
-    gsl_spline *gsl_sp_a_, *gsl_sp_D_, *gsl_sp_f_;
-    double Dnow_;
+    static constexpr double REL_PRECISION = 1e-9;
+    interpolated_function_1d<true,true,false> D_of_a_, f_of_a_, a_of_D_;
+    double Dnow_, astart_;
 
     real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const
     {
@@ -63,7 +62,7 @@ private:
         return (real_t)result;
     }
 
-    void compute_growth(void)
+    void compute_growth( std::vector<double>& tab_a, std::vector<double>& tab_D, std::vector<double>& tab_f )
     {
         using v_t = vec_t<3, double>;
 
@@ -107,31 +106,20 @@ private:
             // call integrator
             ode_integrate::rk_step_qs(dt, t, yy, yyscale, rhs, eps, dtdid, dtnext);
 
-            tab_a_.push_back(yy[0]);
-            tab_D_.push_back(yy[1]);
-            tab_f_.push_back(yy[2]);
+            tab_a.push_back(yy[0]);
+            tab_D.push_back(yy[1]);
+            tab_f.push_back(yy[2]);
 
             dt = dtnext;
         }
 
         // compute f, before we stored here D'
-        for (size_t i = 0; i < tab_a_.size(); ++i)
+        for (size_t i = 0; i < tab_a.size(); ++i)
         {
-            tab_f_[i] = std::log(tab_f_[i] / (tab_a_[i] * H_of_a(tab_a_[i]) * tab_D_[i]));
-            tab_D_[i] = std::log(tab_D_[i]);
-            tab_a_[i] = std::log(tab_a_[i]);
+            tab_f[i] = tab_f[i] / (tab_a[i] * H_of_a(tab_a[i]) * tab_D[i]);
+            tab_D[i] = tab_D[i];
+            tab_a[i] = tab_a[i];
         }
-
-        gsl_ia_D_ = gsl_interp_accel_alloc();
-        gsl_ia_f_ = gsl_interp_accel_alloc();
-
-        gsl_sp_D_ = gsl_spline_alloc(gsl_interp_cspline, tab_a_.size());
-        gsl_sp_f_ = gsl_spline_alloc(gsl_interp_cspline, tab_a_.size());
-
-        gsl_spline_init(gsl_sp_D_, &tab_a_[0], &tab_D_[0], tab_a_.size());
-        gsl_spline_init(gsl_sp_f_, &tab_a_[0], &tab_f_[0], tab_a_.size());
-
-        Dnow_ = std::exp(gsl_spline_eval(gsl_sp_D_, 0.0, gsl_ia_D_));
     }
 
 public:
@@ -142,33 +130,44 @@ public:
 	 */
 
     explicit calculator(ConfigFile &cf)
-        : cosmo_param_(cf)
+        : cosmo_param_(cf), astart_( 1.0/(1.0+cf.GetValue<double>("setup","zstart")) )
     {
+        // pre-compute growth factors and store for interpolation
+        std::vector<double> tab_a, tab_D, tab_f;
+        this->compute_growth(tab_a, tab_D, tab_f);
+        D_of_a_.set_data(tab_a,tab_D);
+        f_of_a_.set_data(tab_a,tab_f);
+        a_of_D_.set_data(tab_D,tab_a);
+        Dnow_ = D_of_a_(1.0);
+
+        // set up transfer functions and compute normalisation
         transfer_function_ = std::move(select_TransferFunction_plugin(cf));
         transfer_function_->intialise();
-        cosmo_param_.pnorm = this->ComputePNorm();
+        if( !transfer_function_->tf_isnormalised_ )
+            cosmo_param_.pnorm = this->compute_pnorm_from_sigma8();
+        else{
+            cosmo_param_.pnorm = 1.0;
+            csoca::ilog << "Measured sigma8 for fixed PS normalisation is " << this->compute_sigma8() << std::endl;
+        }
         cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm);
+
         csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons"
                     << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl;
         csoca::ilog << std::setw(32) << std::left << "TF maximum wave number"
                     << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
 
-        // pre-compute growth factors and store for interpolation
-        this->compute_growth();
+        // csoca::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.GetValue<double>("setup","zstart")) ) << std::endl;
+        // csoca::ilog << "pnrom     = " << cosmo_param_.pnorm << std::endl;
     }
 
     ~calculator()
     {
-        gsl_spline_free(gsl_sp_D_);
-        gsl_spline_free(gsl_sp_f_);
-        gsl_interp_accel_free(gsl_ia_D_);
-        gsl_interp_accel_free(gsl_ia_f_);
     }
 
     //! Write out a correctly scaled power spectrum at time a
     void write_powerspectrum(real_t a, std::string fname) const
     {
-        const real_t Dplus0 = this->get_growth_factor(a);
+        // const real_t Dplus0 = this->get_growth_factor(a);
 
         if (CONFIG::MPI_task_rank == 0)
         {
@@ -177,60 +176,48 @@ public:
             // write power spectrum to a file
             std::ofstream ofs(fname.c_str());
             std::stringstream ss;
-            ss << " ,a=" << a << "";
+            ss << " ,ap=" << a << "";
             ofs << "# " << std::setw(18) << "k [h/Mpc]"
-                << std::setw(20) << ("P_dtot(k" + ss.str() + "|BS)")
-                << std::setw(20) << ("P_dcdm(k" + ss.str() + "|BS)")
-                << std::setw(20) << ("P_dbar(k" + ss.str() + "|BS)")
-                << std::setw(20) << ("P_tcdm(k" + ss.str() + "|BS)")
-                << std::setw(20) << ("P_tbar(k" + ss.str() + "|BS)")
-                << std::setw(20) << ("P_dtot(k" + ss.str() + ")")
-                << std::setw(20) << ("P_dcdm(k" + ss.str() + ")")
-                << std::setw(20) << ("P_dbar(k" + ss.str() + ")")
-                << std::setw(20) << ("P_tcdm(k" + ss.str() + ")")
-                << std::setw(20) << ("P_tbar(k" + ss.str() + ")")
+                << std::setw(20) << ("P_dtot(k,a=ap)")
+                << std::setw(20) << ("P_dcdm(k,a=ap)")
+                << std::setw(20) << ("P_dbar(k,a=ap)")
+                << std::setw(20) << ("P_tcdm(k,a=ap)")
+                << std::setw(20) << ("P_tbar(k,a=ap)")
+                << std::setw(20) << ("P_dtot(k,a=1)")
+                << std::setw(20) << ("P_dcdm(k,a=1)")
+                << std::setw(20) << ("P_dbar(k,a=1)")
+                << std::setw(20) << ("P_tcdm(k,a=1)")
+                << std::setw(20) << ("P_tbar(k,a=1)")
                 << std::setw(20) << ("P_dtot(K,a=1)")
                 << std::endl;
             for (double k = kmin; k < transfer_function_->get_kmax(); k *= 1.05)
             {
                 ofs << std::setw(20) << std::setprecision(10) << k
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon) * Dplus0, 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total0), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm0), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon0), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm0), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon0), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon0), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vtotal), 2.0)
                     << std::endl;
+                    #warning Check whether output is at redshift that is indicated!
             }
         }
         csoca::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl;
     }
 
-    const cosmology::parameters &GetParams(void) const
+    const cosmology::parameters &get_parameters(void) const noexcept
     {
         return cosmo_param_;
     }
 
-    //! returns the amplitude of amplitude of the power spectrum
-    /*!
-	 * @param k the wave number in h/Mpc
-	 * @param a the expansion factor of the universe
-	 * @returns power spectrum amplitude for wave number k at time a
-	 */
-    inline real_t Power(real_t k, real_t a)
-    {
-        real_t Dplus = this->get_growth_factor(a);
-        real_t pNorm = ComputePNorm();
-        return pNorm * Dplus * Dplus * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect);
-    }
-
     //! return the value of the Hubble function H(a) = dloga/dt 
-    inline double H_of_a(double a) const
+    inline double H_of_a(double a) const noexcept
     {
         double HH2 = 0.0;
         HH2 += cosmo_param_.Omega_r / (a * a * a * a);
@@ -241,25 +228,31 @@ public:
     }
 
     //! Computes the linear theory growth factor D+, normalised to D+(a=1)=1
-    real_t get_growth_factor(real_t a) const
+    real_t get_growth_factor(real_t a) const noexcept
     {
-        return std::exp(gsl_spline_eval(gsl_sp_D_, std::log(a), gsl_ia_D_)) / Dnow_;
+        return D_of_a_(a) / Dnow_;
+    }
+
+    //! Computes the inverse of get_growth_factor
+    real_t get_a( real_t Dplus ) const noexcept
+    {
+        return a_of_D_( Dplus * Dnow_ );
     }
 
     //! Computes the linear theory growth rate f
     /*! Function computes (by interpolating on precalculated table)
      *   f = dlog D+ / dlog a
      */
-    real_t get_f(real_t a) const
+    real_t get_f(real_t a) const noexcept
     {
-        return std::exp(gsl_spline_eval(gsl_sp_f_, std::log(a), gsl_ia_f_));
+        return f_of_a_(a);
     }
 
     //! Compute the factor relating particle displacement and velocity
     /*! Function computes
      *  vfac = a * (H(a)/h) * dlogD+ / dlog a 
      */
-    real_t get_vfact(real_t a) const
+    real_t get_vfact(real_t a) const noexcept
     {
         return a * H_of_a(a) / cosmo_param_.h * this->get_f(a);
     }
@@ -302,24 +295,12 @@ public:
         return k * k * w * w * pow((double)k, (double)nspect) * tf * tf;
     }
 
-    //! Computes the square of the transfer function
-    /*! Function evaluates the supplied transfer function ptransfer_fun_
-	 * and returns the square of its value at wave number k
-	 * @param k wave number at which to evaluate the transfer function
-	 */
-    inline real_t TransferSq(real_t k) const
-    {
-        //.. parameter supplied transfer function
-        real_t tf1 = transfer_function_->compute(k, total);
-        return tf1 * tf1;
-    }
-
     //! Computes the amplitude of a mode from the power spectrum
     /*! Function evaluates the supplied transfer function ptransfer_fun_
 	 * and returns the amplitude of fluctuations at wave number k at z=0
 	 * @param k wave number at which to evaluate
 	 */
-    inline real_t GetAmplitude(real_t k, tf_type type) const
+    inline real_t get_amplitude(real_t k, tf_type type) const
     {
         return std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm;
     }
@@ -329,7 +310,7 @@ public:
 	 * integrates the power spectrum to fix the normalization to that given
 	 * by the sigma_8 parameter
 	 */
-    real_t ComputePNorm(void)
+    real_t compute_sigma8(void)
     {
         real_t sigma0, kmin, kmax;
         kmax = transfer_function_->get_kmax();
@@ -337,10 +318,22 @@ public:
 
         if (!transfer_function_->tf_has_total0())
             sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this);
-        else
+        else{
             sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this);
+        }
 
-        return cosmo_param_.sigma8 * cosmo_param_.sigma8 / sigma0;
+        return std::sqrt(sigma0);
+    }
+
+    //! Computes the normalization for the power spectrum
+    /*!
+	 * integrates the power spectrum to fix the normalization to that given
+	 * by the sigma_8 parameter
+	 */
+    real_t compute_pnorm_from_sigma8(void)
+    {
+        auto measured_sigma8 = this->compute_sigma8();
+        return cosmo_param_.sigma8 * cosmo_param_.sigma8 / (measured_sigma8  * measured_sigma8);
     }
 };
 
diff --git a/include/interpolate.hh b/include/interpolate.hh
new file mode 100644
index 0000000..a2a5a88
--- /dev/null
+++ b/include/interpolate.hh
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <vector>
+#include <cassert>
+#include <gsl/gsl_spline.h>
+#include <gsl/gsl_errno.h>
+
+template <bool logx, bool logy, bool periodic>
+class interpolated_function_1d
+{
+
+private:
+  std::vector<double> data_x_, data_y_;
+  gsl_interp_accel *gsl_ia_;
+  gsl_spline *gsl_sp_;
+  bool isinit_;
+
+  void deallocate()
+  {
+    gsl_spline_free(gsl_sp_);
+    gsl_interp_accel_free(gsl_ia_);
+  }
+
+public:
+  interpolated_function_1d(const interpolated_function_1d &i) = delete;
+
+  interpolated_function_1d(){}
+
+  interpolated_function_1d(const std::vector<double> &data_x, const std::vector<double> &data_y)
+  {
+    this->set_data( data_x, data_y );
+  }
+
+  ~interpolated_function_1d()
+  {
+    if (isinit_) this->deallocate();
+  }
+
+  void set_data(const std::vector<double> &data_x, const std::vector<double> &data_y)
+  {
+    assert(data_x_.size() == data_y_.size());
+    assert(!(logx & periodic));
+
+    data_x_ = data_x;
+    data_y_ = data_y;
+
+    if (logx) for (auto &d : data_x_) d = std::log(d);
+    if (logy) for (auto &d : data_y_) d = std::log(d);
+
+    if (isinit_) this->deallocate();
+
+    gsl_ia_ = gsl_interp_accel_alloc();
+    gsl_sp_ = gsl_spline_alloc(periodic ? gsl_interp_cspline_periodic : gsl_interp_cspline, data_x_.size());
+    gsl_spline_init(gsl_sp_, &data_x_[0], &data_y_[0], data_x_.size());
+
+    isinit_ = true;
+  }
+
+  double operator()(double x) const noexcept
+  {
+    double xa = logx ? std::log(x) : x;
+    double y(gsl_spline_eval(gsl_sp_, xa, gsl_ia_));
+    return logy ? std::exp(y) : y;
+  }
+};
\ No newline at end of file
diff --git a/include/transfer_function_plugin.hh b/include/transfer_function_plugin.hh
index 6a7fb6f..fd95250 100644
--- a/include/transfer_function_plugin.hh
+++ b/include/transfer_function_plugin.hh
@@ -30,10 +30,12 @@ class TransferFunction_plugin
     bool tf_withvel_;    //!< bool if also have velocity transfer functions
     bool tf_withtotal0_; //!< have the z=0 spectrum for normalisation purposes
     bool tf_velunits_;   //!< velocities are in velocity units (km/s)
+    bool tf_isnormalised_; //!< assume that transfer functions come already correctly normalised and need be re-normalised to a specified value
+    
   public:
     //! constructor
     TransferFunction_plugin(ConfigFile &cf)
-        : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false)
+        : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false), tf_isnormalised_(false)
     { }
 
     //! destructor
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 49024df..a8f60bd 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -296,7 +296,7 @@ int Run( ConfigFile& the_config )
     phi.FourierTransformForward(false);
     phi.assign_function_of_grids_kdep([&](auto k, auto wn) {
         real_t kmod = k.norm();
-        ccomplex_t delta = wn * the_cosmo_calc->GetAmplitude(kmod, total);
+        ccomplex_t delta = wn * the_cosmo_calc->get_amplitude(kmod, total);
         return -delta / (kmod * kmod);
     }, wnoise);
 
@@ -571,10 +571,14 @@ int Run( ConfigFile& the_config )
                                 if( bDoBaryons ){
                                     vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
-                                    double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) :
-                                     (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : 
-                                      the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0);
-                                     //the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1);
+                                    // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) :
+                                    //  (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : 
+                                    // //   the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total);
+                                    //  the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1);
+
+                                    real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) 
+                                        : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : 
+                                           the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total)) * (-g1);
 
                                     tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen;
                                 }
@@ -620,10 +624,13 @@ int Run( ConfigFile& the_config )
                                 if( bDoBaryons ){
                                     vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
-                                    double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, vcdm0) :
-                                     (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : 
-                                         the_cosmo_calc->GetAmplitude(kmod, vtotal0)) - the_cosmo_calc->GetAmplitude(kmod, vtotal0);
-                                     //the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1);
+                                    // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm0) :
+                                    //  (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon0) : 
+                                    //      the_cosmo_calc->get_amplitude(kmod, vtotal0)) - the_cosmo_calc->get_amplitude(kmod, vtotal0);
+                                    // // the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1);
+                                    real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm) 
+                                        : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon) : 
+                                           the_cosmo_calc->get_amplitude(kmod, vtotal)) - the_cosmo_calc->get_amplitude(kmod, vtotal)) * (-g1);
                                     tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ;
                                 }
 

From 705dcf7cf52c03bf8e4ed3d82f0c03572ff63ef3 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 12:49:15 +0200
Subject: [PATCH 089/130] fixed class cmake file

---
 external/class.cmake | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/external/class.cmake b/external/class.cmake
index 0a3f3c2..aead59b 100644
--- a/external/class.cmake
+++ b/external/class.cmake
@@ -78,6 +78,7 @@ if(ENABLE_CLASS)
       ${CMAKE_CURRENT_LIST_DIR}/class/tools/parser.c
       ${CMAKE_CURRENT_LIST_DIR}/class/tools/quadrature.c
       ${CMAKE_CURRENT_LIST_DIR}/class/tools/hyperspherical.c
+      ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.c
       ${CMAKE_CURRENT_LIST_DIR}/class/tools/common.c
       ${CMAKE_CURRENT_LIST_DIR}/class/source/input.c
       ${CMAKE_CURRENT_LIST_DIR}/class/source/background.c
@@ -131,9 +132,9 @@ macro(target_setup_class target_name)
   endif(ENABLE_CLASS)
 endmacro(target_setup_class)
 
-if(ENABLE_CLASS)
-  # test executable
-  add_executable(testTk
-    ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc)
-  target_setup_class(testTk)
-endif(ENABLE_CLASS)
\ No newline at end of file
+# if(ENABLE_CLASS)
+#   # test executable
+#   add_executable(testTk
+#     ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc)
+#   target_setup_class(testTk)
+# endif(ENABLE_CLASS)
\ No newline at end of file

From 3a8a22737f235581261a701f4903fb81ee4b0a82 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 19:25:54 +0200
Subject: [PATCH 090/130] major update of class plugin

---
 include/cosmology_calculator.hh |  20 +-
 include/interpolate.hh          |   7 +-
 src/plugins/transfer_CLASS.cc   | 433 ++++++++++++++++++--------------
 3 files changed, 267 insertions(+), 193 deletions(-)

diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index 1708637..e7d92f9 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -37,7 +37,7 @@ public:
 private:
     static constexpr double REL_PRECISION = 1e-9;
     interpolated_function_1d<true,true,false> D_of_a_, f_of_a_, a_of_D_;
-    double Dnow_, astart_;
+    double Dnow_, Dplus_start_, astart_;
 
     real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const
     {
@@ -140,6 +140,8 @@ public:
         a_of_D_.set_data(tab_D,tab_a);
         Dnow_ = D_of_a_(1.0);
 
+        Dplus_start_ = D_of_a_( astart_ ) / Dnow_;
+
         // set up transfer functions and compute normalisation
         transfer_function_ = std::move(select_TransferFunction_plugin(cf));
         transfer_function_->intialise();
@@ -147,7 +149,7 @@ public:
             cosmo_param_.pnorm = this->compute_pnorm_from_sigma8();
         else{
             cosmo_param_.pnorm = 1.0;
-            csoca::ilog << "Measured sigma8 for fixed PS normalisation is " << this->compute_sigma8() << std::endl;
+            csoca::ilog << "Measured sigma_8 for given PS normalisation is " << this->compute_sigma8() << std::endl;
         }
         cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm);
 
@@ -193,11 +195,11 @@ public:
             for (double k = kmin; k < transfer_function_->get_kmax(); k *= 1.05)
             {
                 ofs << std::setw(20) << std::setprecision(10) << k
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm), 2.0)
-                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon), 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total)*Dplus_start_, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm)*Dplus_start_, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon)*Dplus_start_, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm)*Dplus_start_, 2.0)
+                    << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon)*Dplus_start_, 2.0)
                     << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total0), 2.0)
                     << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm0), 2.0)
                     << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon0), 2.0)
@@ -302,7 +304,9 @@ public:
 	 */
     inline real_t get_amplitude(real_t k, tf_type type) const
     {
-        return std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm;
+        // if the transfer function doesn't need backscaling, then divide out growth factor
+        real_t f = transfer_function_->tf_isnormalised_? 1.0/Dplus_start_ : 1.0;
+        return f * std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm;
     }
 
     //! Computes the normalization for the power spectrum
diff --git a/include/interpolate.hh b/include/interpolate.hh
index a2a5a88..cb0ea50 100644
--- a/include/interpolate.hh
+++ b/include/interpolate.hh
@@ -10,10 +10,10 @@ class interpolated_function_1d
 {
 
 private:
+  bool isinit_;
   std::vector<double> data_x_, data_y_;
   gsl_interp_accel *gsl_ia_;
   gsl_spline *gsl_sp_;
-  bool isinit_;
 
   void deallocate()
   {
@@ -22,11 +22,12 @@ private:
   }
 
 public:
-  interpolated_function_1d(const interpolated_function_1d &i) = delete;
+  interpolated_function_1d(const interpolated_function_1d &) = delete;
 
-  interpolated_function_1d(){}
+  interpolated_function_1d() : isinit_(false){}
 
   interpolated_function_1d(const std::vector<double> &data_x, const std::vector<double> &data_y)
+  : isinit_(false)
   {
     this->set_data( data_x, data_y );
   }
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 079b633..e358fce 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -16,241 +16,310 @@
 #include <general.hh>
 #include <config_file.hh>
 #include <transfer_function_plugin.hh>
+#include <interpolate.hh>
 
 #include <gsl/gsl_errno.h>
 #include <gsl/gsl_spline.h>
 
-class transfer_CLASS_plugin : public TransferFunction_plugin {
+class transfer_CLASS_plugin : public TransferFunction_plugin
+{
 
 private:
-    //... target redshift tables
-    std::vector<double> tab_lnk_, tab_dtot_, tab_dc_, tab_db_, tab_ttot_, tab_tc_, tab_tb_;
-    gsl_interp_accel *gsl_ia_dtot_, *gsl_ia_dc_, *gsl_ia_db_, *gsl_ia_ttot_, *gsl_ia_tc_, *gsl_ia_tb_;
-    gsl_spline *gsl_sp_dtot_, *gsl_sp_dc_, *gsl_sp_db_, *gsl_sp_ttot_, *gsl_sp_tc_, *gsl_sp_tb_;
+  interpolated_function_1d<true, true, false> delta_c_, delta_b_, delta_n_, delta_m_, theta_c_, theta_b_, theta_n_, theta_m_;
+  interpolated_function_1d<true, true, false> delta_c0_, delta_b0_, delta_n0_, delta_m0_, theta_c0_, theta_b0_, theta_n0_, theta_m0_;
 
-    //... starting redshift tables
-    std::vector<double> tab_lnk0_, tab_dtot0_, tab_dc0_, tab_db0_, tab_ttot0_, tab_tc0_, tab_tb0_;
-    gsl_interp_accel *gsl_ia_dtot0_, *gsl_ia_dc0_, *gsl_ia_db0_, *gsl_ia_ttot0_, *gsl_ia_tc0_, *gsl_ia_tb0_;
-    gsl_spline *gsl_sp_dtot0_, *gsl_sp_dc0_, *gsl_sp_db0_, *gsl_sp_ttot0_, *gsl_sp_tc0_, *gsl_sp_tb0_;
+  // single fluid growing/decaying mode decomposition
+  // gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_;
+  // gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_;
+  // std::vector<double> tab_Cplus_, tab_Cminus_;
 
-    // single fluid growing/decaying mode decomposition
-    gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_;
-    gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_;
-    std::vector<double> tab_Cplus_, tab_Cminus_;
+  double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_, A_s_, n_s_, Tcmb_, tnorm_;
 
-    double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_;
+  ClassParams pars_;
+  std::unique_ptr<ClassEngine> the_ClassEngine_;
+  std::ofstream ofs_class_input_;
 
-    void ClassEngine_get_data( void ){
-        std::vector<double> d_ncdm, t_ncdm, phi, psi;
+  template <typename T>
+  void add_class_parameter(std::string parameter_name, const T parameter_value)
+  {
+    pars_.add(parameter_name, parameter_value);
+    ofs_class_input_ << parameter_name << " = " << parameter_value << std::endl;
+  }
 
-        csoca::ilog << "Computing TF via ClassEngine..." << std::endl << " ztarget = " << ztarget_ << ", zstart = " << zstart_ << " ..." << std::flush;
-        double wtime = get_wtime();
+  //! Set up class parameters from MUSIC cosmological parameters
+  void init_ClassEngine(void)
+  {
+    //--- general parameters ------------------------------------------
+    add_class_parameter("z_max_pk", std::max(zstart_, ztarget_) * 1.2); // use 1.2 as safety
+    add_class_parameter("P_k_max_h/Mpc", kmax_);
+    add_class_parameter("output", "dTk,vTk");
+    // add_class_parameter("lensing", "no");
 
-        std::stringstream zlist;
-        zlist << zstart_ << ", " << ztarget_; 
-        
-        ClassParams pars;
-        pars.add("extra metric transfer functions", "yes");
-        pars.add("z_max_pk",zlist.str());
-        pars.add("P_k_max_h/Mpc", kmax_);
-        
-        pars.add("h",h_);
-        pars.add("Omega_b",Omega_b_);
-        // pars.add("Omega_k",0.0);
-        // pars.add("Omega_ur",0.0);
-        pars.add("N_ur",N_ur_);
-        pars.add("Omega_cdm",Omega_m_-Omega_b_);
-        pars.add("Omega_k",0.0);
-        // pars.add("Omega_Lambda",1.0-Omega_m_);
-        pars.add("Omega_fld",0.0);
-        pars.add("Omega_scf",0.0);
+    //--- choose gauge ------------------------------------------------
+    // add_class_parameter("extra metric transfer functions", "yes");
+    add_class_parameter("gauge", "synchronous");
 
-        // massive neutrinos
+    //--- cosmological parameters, densities --------------------------
+    add_class_parameter("h", h_);
+
+    add_class_parameter("Omega_b", Omega_b_);
+    add_class_parameter("Omega_cdm", Omega_m_ - Omega_b_);
+    add_class_parameter("Omega_k", 0.0);
+    // add_class_parameter("Omega_Lambda",1.0-Omega_m_);
+    add_class_parameter("Omega_fld", 0.0);
+    add_class_parameter("Omega_scf", 0.0);
+    // add_class_parameter("fluid_equation_of_state","CLP");
+    // add_class_parameter("w0_fld", -1 );
+    // add_class_parameter("wa_fld", 0. );
+    // add_class_parameter("cs2_fld", 1);
+
+    //--- massive neutrinos -------------------------------------------
 #if 1
-        //default off
-        pars.add("N_ncdm",0);
+    //default off
+    // add_class_parameter("Omega_ur",0.0);
+    add_class_parameter("N_ur", N_ur_);
+    add_class_parameter("N_ncdm", 0);
+
 #else
-        // change above to enable
-        pars.add("N_ur",0);
-        pars.add("N_ncdm",1);
-        pars.add("m_ncdm","0.4");
-        pars.add("T_ncdm",0.71611);
+    // change above to enable
+    add_class_parameter("N_ur", 0);
+    add_class_parameter("N_ncdm", 1);
+    add_class_parameter("m_ncdm", "0.4");
+    add_class_parameter("T_ncdm", 0.71611);
 #endif
 
-        pars.add("A_s",2.42e-9);
-        pars.add("n_s",.961); // this doesn't matter for TF
-        pars.add("output","dTk,vTk");
-        pars.add("YHe",0.248);
-        pars.add("lensing","no");
-        pars.add("alpha_s",0.0);
-        pars.add("P_k_ini type","analytic_Pk");
-        pars.add("gauge","synchronous");
+    //--- cosmological parameters, primordial -------------------------
+    add_class_parameter("P_k_ini type", "analytic_Pk");
 
-        pars.add("k_per_decade_for_pk",100);
-        pars.add("k_per_decade_for_bao",100);
+    add_class_parameter("A_s", A_s_);
+    add_class_parameter("n_s", n_s_);
+    add_class_parameter("alpha_s", 0.0);
+    add_class_parameter("T_cmb", Tcmb_);
+    add_class_parameter("YHe", 0.248);
 
-        pars.add("compute damping scale","yes");
-        pars.add("z_reio",-1.0); // make sure reionisation is not included
+    // precision parameters
+    add_class_parameter("k_per_decade_for_pk", 100);
+    add_class_parameter("k_per_decade_for_bao", 100);
+    add_class_parameter("compute damping scale", "yes");
+    add_class_parameter("tol_perturb_integration", 1.e-8);
+    add_class_parameter("tol_background_integration", 1e-9);
 
-        pars.add("tol_perturb_integration",1.e-8);
-        pars.add("tol_background_integration",1e-9);
+    // high precision options from cl_permille.pre:
+    // precision file to be passed as input in order to achieve at least percent precision on scalar Cls
+    add_class_parameter("hyper_flat_approximation_nu", 7000.);
+    add_class_parameter("transfer_neglect_delta_k_S_t0", 0.17);
+    add_class_parameter("transfer_neglect_delta_k_S_t1", 0.05);
+    add_class_parameter("transfer_neglect_delta_k_S_t2", 0.17);
+    add_class_parameter("transfer_neglect_delta_k_S_e", 0.13);
+    add_class_parameter("delta_l_max", 1000);
 
-        // high precision options from cl_permille.pre:
-        // precision file to be passed as input in order to achieve at least percent precision on scalar Cls
-        pars.add("hyper_flat_approximation_nu", 7000. );
-        pars.add("transfer_neglect_delta_k_S_t0", 0.17 );
-        pars.add("transfer_neglect_delta_k_S_t1", 0.05 );
-        pars.add("transfer_neglect_delta_k_S_t2", 0.17 );
-        pars.add("transfer_neglect_delta_k_S_e", 0.13 );
-        pars.add("delta_l_max", 1000 );
+    int class_verbosity = 0;
 
+    add_class_parameter("background_verbose", class_verbosity);
+    add_class_parameter("thermodynamics_verbose", class_verbosity);
+    add_class_parameter("perturbations_verbose", class_verbosity);
+    add_class_parameter("transfer_verbose", class_verbosity);
+    add_class_parameter("primordial_verbose", class_verbosity);
+    add_class_parameter("spectra_verbose", class_verbosity);
+    add_class_parameter("nonlinear_verbose", class_verbosity);
+    add_class_parameter("lensing_verbose", class_verbosity);
+    add_class_parameter("output_verbose", class_verbosity);
 
-        std::unique_ptr<ClassEngine> CE = std::make_unique<ClassEngine>(pars, false);
+    // output parameters, only needed for the control CLASS .ini file that we output
+    std::stringstream zlist;
+    if (ztarget_ == zstart_)
+      zlist << ztarget_ << ", 0.0";
+    else
+      zlist << std::max(ztarget_, zstart_) << ", " << std::min(ztarget_, zstart_) << ", 0.0";
+    add_class_parameter("z_pk", zlist.str());
 
-        CE->getTk(zstart_, tab_lnk0_, tab_dc0_, tab_db0_, d_ncdm, tab_dtot0_,
-                tab_tc0_, tab_tb0_, t_ncdm, tab_ttot0_, phi, psi );
+    csoca::ilog << "Computing transfer function via ClassEngine..." << std::endl;
+    double wtime = get_wtime();
 
-        CE->getTk(ztarget_, tab_lnk_, tab_dc_, tab_db_, d_ncdm, tab_dtot_,
-                  tab_tc_, tab_tb_, t_ncdm, tab_ttot_, phi, psi);
+    the_ClassEngine_ = std::move(std::make_unique<ClassEngine>(pars_, false));
 
-        wtime = get_wtime() - wtime;
-        csoca::ilog << "   took " << wtime << " s / " << tab_lnk_.size() << " modes."  << std::endl;
+    wtime = get_wtime() - wtime;
+    csoca::ilog << "CLASS took " << wtime << " s." << std::endl;
+  }
+
+  //! run ClassEngine with parameters set up
+  void run_ClassEngine(double z, std::vector<double> &k, std::vector<double> &dc, std::vector<double> &tc, std::vector<double> &db, std::vector<double> &tb,
+                       std::vector<double> &dn, std::vector<double> &tn, std::vector<double> &dm, std::vector<double> &tm)
+  {
+    k.clear(); 
+    dc.clear(); db.clear(); dn.clear(); dm.clear();
+    tc.clear(); tb.clear(); tn.clear(); tm.clear();
+    
+    the_ClassEngine_->getTk(z, k, dc, db, dn, dm, tc, tb, tn, tm);
+
+    real_t fc = (Omega_m_ - Omega_b_) / Omega_m_;
+    real_t fb = Omega_b_ / Omega_m_;
+
+    for (size_t i = 0; i < k.size(); ++i)
+    {
+      // convert to 'CAMB' format, since we interpolate loglog and
+      // don't want negative numbers...
+      auto ik2 = 1.0 / (k[i] * k[i]) * h_ * h_;
+      dc[i] = -dc[i] * ik2;
+      db[i] = -db[i] * ik2;
+      dn[i] = -dn[i] * ik2;
+      dm[i] = fc * dc[i] + fb * db[i];
+      tc[i] = -tc[i] * ik2;
+      tb[i] = -tb[i] * ik2;
+      tn[i] = -tn[i] * ik2;
+      tm[i] = fc * tc[i] + fb * tb[i];
     }
+  }
 
 public:
-  explicit transfer_CLASS_plugin( ConfigFile &cf)
-  : TransferFunction_plugin(cf)
-  { 
-    h_       = pcf_->GetValue<double>("cosmology","H0") / 100.0; 
-    Omega_m_ = pcf_->GetValue<double>("cosmology","Omega_m"); 
-    Omega_b_ = pcf_->GetValue<double>("cosmology","Omega_b");
-    N_ur_    = pcf_->GetValueSafe<double>("cosmology","N_ur", 3.046);
-    ztarget_ = pcf_->GetValueSafe<double>("cosmology","ztarget",0.0);
-    atarget_ = 1.0/(1.0+ztarget_);
-    zstart_  = pcf_->GetValue<double>("setup","zstart");
-    astart_  = 1.0/(1.0+zstart_);
-    double lbox = pcf_->GetValue<double>("setup","BoxLength");
-    int nres = pcf_->GetValue<double>("setup","GridRes");
-    kmax_    = 2.0*M_PI/lbox * nres/2 * sqrt(3) * 2.0; // 120% of spatial diagonal
+  explicit transfer_CLASS_plugin(ConfigFile &cf)
+      : TransferFunction_plugin(cf)
+  {
+    ofs_class_input_.open("input_class_parameters.ini", std::ios::trunc);
 
-    this->ClassEngine_get_data();
-    
-    gsl_ia_dtot_ = gsl_interp_accel_alloc();  gsl_ia_dtot0_ = gsl_interp_accel_alloc();
-    gsl_ia_dc_   = gsl_interp_accel_alloc();  gsl_ia_dc0_   = gsl_interp_accel_alloc();
-    gsl_ia_db_   = gsl_interp_accel_alloc();  gsl_ia_db0_   = gsl_interp_accel_alloc();
-    gsl_ia_ttot_ = gsl_interp_accel_alloc();  gsl_ia_ttot0_ = gsl_interp_accel_alloc();
-    gsl_ia_tc_   = gsl_interp_accel_alloc();  gsl_ia_tc0_   = gsl_interp_accel_alloc();
-    gsl_ia_tb_   = gsl_interp_accel_alloc();  gsl_ia_tb0_   = gsl_interp_accel_alloc();
+    h_ = pcf_->GetValue<double>("cosmology", "H0") / 100.0;
+    Omega_m_ = pcf_->GetValue<double>("cosmology", "Omega_m");
+    Omega_b_ = pcf_->GetValue<double>("cosmology", "Omega_b");
+    N_ur_ = pcf_->GetValueSafe<double>("cosmology", "Neff", 3.046);
+    ztarget_ = pcf_->GetValueSafe<double>("cosmology", "ztarget", 0.0);
+    atarget_ = 1.0 / (1.0 + ztarget_);
+    zstart_ = pcf_->GetValue<double>("setup", "zstart");
+    astart_ = 1.0 / (1.0 + zstart_);
+    double lbox = pcf_->GetValue<double>("setup", "BoxLength");
+    int nres = pcf_->GetValue<double>("setup", "GridRes");
+    A_s_ = pcf_->GetValueSafe<double>("cosmology", "A_s", -1.0);
+    double k_p = pcf_->GetValueSafe<double>("cosmology", "k_p", 0.05);
+    n_s_ = pcf_->GetValue<double>("cosmology", "nspec");
+    Tcmb_ = cf.GetValueSafe<double>("cosmology", "Tcmb", 2.7255);
 
-    gsl_sp_dtot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_dc_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_db_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_ttot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_tc_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_tb_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    tnorm_ = 1.0;
 
-    gsl_sp_dtot0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_dc0_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_db0_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_ttot0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_tc0_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    gsl_sp_tb0_   = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+    if (A_s_ > 0)
+    {
+      this->tf_isnormalised_ = true;
+      tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0));
+      csoca::ilog << "Using A_s to normalise the transfer function!" << std::endl;
+    }
 
-    gsl_spline_init(gsl_sp_dtot_, &tab_lnk_[0], &tab_dtot_[0], tab_lnk_.size());
-    gsl_spline_init(gsl_sp_dc_,   &tab_lnk_[0], &tab_dc_[0],   tab_lnk_.size());
-    gsl_spline_init(gsl_sp_db_,   &tab_lnk_[0], &tab_db_[0],   tab_lnk_.size());
-    gsl_spline_init(gsl_sp_ttot_, &tab_lnk_[0], &tab_ttot_[0], tab_lnk_.size());
-    gsl_spline_init(gsl_sp_tc_,   &tab_lnk_[0], &tab_tc_[0],   tab_lnk_.size());
-    gsl_spline_init(gsl_sp_tb_,   &tab_lnk_[0], &tab_tb_[0],   tab_lnk_.size());
+    kmax_ = std::max(20.0, 2.0 * M_PI / lbox * nres / 2 * sqrt(3) * 2.0); // 120% of spatial diagonal, or k=10h Mpc-1
 
-    gsl_spline_init(gsl_sp_dtot0_, &tab_lnk0_[0], &tab_dtot0_[0], tab_lnk0_.size());
-    gsl_spline_init(gsl_sp_dc0_,   &tab_lnk0_[0], &tab_dc0_[0],   tab_lnk0_.size());
-    gsl_spline_init(gsl_sp_db0_,   &tab_lnk0_[0], &tab_db0_[0],   tab_lnk0_.size());
-    gsl_spline_init(gsl_sp_ttot0_, &tab_lnk0_[0], &tab_ttot0_[0], tab_lnk0_.size());
-    gsl_spline_init(gsl_sp_tc0_,   &tab_lnk0_[0], &tab_tc0_[0],   tab_lnk0_.size());
-    gsl_spline_init(gsl_sp_tb0_,   &tab_lnk0_[0], &tab_tb0_[0],   tab_lnk0_.size());
+    this->init_ClassEngine();
+
+    std::vector<double> k, dc, tc, db, tb, dn, tn, dm, tm;
+
+    this->run_ClassEngine(0.0, k, dc, tc, db, tb, dn, tn, dm, tm);
+
+    delta_c0_.set_data(k, dc);
+    theta_c0_.set_data(k, tc);
+    delta_b0_.set_data(k, db);
+    theta_b0_.set_data(k, tb);
+    delta_n0_.set_data(k, dn);
+    theta_n0_.set_data(k, tn);
+    delta_m0_.set_data(k, dm);
+    theta_m0_.set_data(k, tm);
+
+    this->run_ClassEngine(ztarget_, k, dc, tc, db, tb, dn, tn, dm, tm);
+
+    delta_c_.set_data(k, dc);
+    theta_c_.set_data(k, tc);
+    delta_b_.set_data(k, db);
+    theta_b_.set_data(k, tb);
+    delta_n_.set_data(k, dn);
+    theta_n_.set_data(k, tn);
+    delta_m_.set_data(k, dm);
+    theta_m_.set_data(k, tm);
+
+    kmin_ = k[0];
+    kmax_ = k.back();
+
+    csoca::ilog << "CLASS table contains k = " << this->get_kmin() << " to " << this->get_kmax() << " h Mpc-1." << std::endl;
 
     //--------------------------------------------------------------------------
     // single fluid growing/decaying mode decomposition
     //--------------------------------------------------------------------------
-    gsl_ia_Cplus_  = gsl_interp_accel_alloc();
+    /*gsl_ia_Cplus_ = gsl_interp_accel_alloc();
     gsl_ia_Cminus_ = gsl_interp_accel_alloc();
-    
-    gsl_sp_Cplus_  = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
+
+    gsl_sp_Cplus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
     gsl_sp_Cminus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size());
-    
-    tab_Cplus_.assign(tab_lnk_.size(),0);
-    tab_Cminus_.assign(tab_lnk_.size(),0);
+
+    tab_Cplus_.assign(tab_lnk_.size(), 0);
+    tab_Cminus_.assign(tab_lnk_.size(), 0);
 
     std::ofstream ofs("grow_decay.txt");
-    
-    for( size_t i=0; i<tab_lnk_.size(); ++i ){
-      tab_Cplus_[i]  = (3.0/5.0 * tab_dtot_[i]/atarget_ - 2.0/5.0*tab_ttot_[i]/atarget_);
-      tab_Cminus_[i] = (2.0/5.0 * std::pow(atarget_, 1.5) *  ( tab_dtot_[i] + tab_ttot_[i] ));
+
+    for (size_t i = 0; i < tab_lnk_.size(); ++i)
+    {
+      tab_Cplus_[i] = (3.0 / 5.0 * tab_dtot_[i] / atarget_ - 2.0 / 5.0 * tab_ttot_[i] / atarget_);
+      tab_Cminus_[i] = (2.0 / 5.0 * std::pow(atarget_, 1.5) * (tab_dtot_[i] + tab_ttot_[i]));
 
       ofs << std::exp(tab_lnk_[i]) << " " << tab_Cplus_[i] << " " << tab_Cminus_[i] << " " << tab_dtot_[i] << " " << tab_ttot_[i] << std::endl;
     }
-    
-    gsl_spline_init(gsl_sp_Cplus_,   &tab_lnk_[0], &tab_Cplus_[0],   tab_lnk_.size());
-    gsl_spline_init(gsl_sp_Cminus_,  &tab_lnk_[0], &tab_Cminus_[0],  tab_lnk_.size());
+
+    gsl_spline_init(gsl_sp_Cplus_, &tab_lnk_[0], &tab_Cplus_[0], tab_lnk_.size());
+    gsl_spline_init(gsl_sp_Cminus_, &tab_lnk_[0], &tab_Cminus_[0], tab_lnk_.size());*/
     //--------------------------------------------------------------------------
-    
-    
 
-    kmin_ = std::exp(tab_lnk_[0]);
-  
-    tf_distinct_ = true; 
-    tf_withvel_  = true; 
-  }
-    
-  ~transfer_CLASS_plugin(){
-    gsl_spline_free(gsl_sp_dtot_);   gsl_spline_free(gsl_sp_dtot0_);  
-    gsl_spline_free(gsl_sp_dc_);     gsl_spline_free(gsl_sp_dc0_);
-    gsl_spline_free(gsl_sp_db_);     gsl_spline_free(gsl_sp_db0_);
-    gsl_spline_free(gsl_sp_ttot_);   gsl_spline_free(gsl_sp_ttot0_);  
-    gsl_spline_free(gsl_sp_tc_);     gsl_spline_free(gsl_sp_tc0_);
-    gsl_spline_free(gsl_sp_tb_);     gsl_spline_free(gsl_sp_tb0_);
-
-    gsl_interp_accel_free(gsl_ia_dtot_);  gsl_interp_accel_free(gsl_ia_dtot0_);  
-    gsl_interp_accel_free(gsl_ia_dc_);    gsl_interp_accel_free(gsl_ia_dc0_);
-    gsl_interp_accel_free(gsl_ia_db_);    gsl_interp_accel_free(gsl_ia_db0_);
-    gsl_interp_accel_free(gsl_ia_ttot_);  gsl_interp_accel_free(gsl_ia_ttot0_);  
-    gsl_interp_accel_free(gsl_ia_tc_);    gsl_interp_accel_free(gsl_ia_tc0_);
-    gsl_interp_accel_free(gsl_ia_tb_);    gsl_interp_accel_free(gsl_ia_tb0_);
+    tf_distinct_ = true;
+    tf_withvel_ = true;
+    tf_withtotal0_ = true;
   }
 
-  inline double compute(double k, tf_type type) const {
-      gsl_spline *splineT = nullptr;
-      gsl_interp_accel *accT = nullptr;
-      switch(type){
-        // values at ztarget:
-          case total:   splineT = gsl_sp_dtot_; accT = gsl_ia_dtot_; break;
-          case cdm:     splineT = gsl_sp_dc_;   accT = gsl_ia_dc_;   break;
-          case baryon:  splineT = gsl_sp_db_;   accT = gsl_ia_db_;   break;
-          case vtotal:  splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break;
-          case vcdm:    splineT = gsl_sp_tc_;   accT = gsl_ia_tc_;   break;
-          case vbaryon: splineT = gsl_sp_tb_;   accT = gsl_ia_tb_;   break;
-        // values at zstart:
-          case total0:  splineT = gsl_sp_dtot0_;accT = gsl_ia_dtot0_;break;
-          case cdm0:    splineT = gsl_sp_dc0_;  accT = gsl_ia_dc0_;  break;
-          case baryon0: splineT = gsl_sp_db0_;  accT = gsl_ia_db0_;  break;
-          case vtotal0: splineT = gsl_sp_ttot0_;accT = gsl_ia_ttot0_;break;
-          case vcdm0:   splineT = gsl_sp_tc0_;  accT = gsl_ia_tc0_;  break;
-          case vbaryon0:splineT = gsl_sp_tb0_;  accT = gsl_ia_tb0_;  break;
-          default:
-            throw std::runtime_error("Invalid type requested in transfer function evaluation");
-      }
-
-      double d = (k<=kmin_)? gsl_spline_eval(splineT, std::log(kmin_), accT) 
-        : gsl_spline_eval(splineT, std::log(k*h_), accT);
-      return -d/(k*k);
+  ~transfer_CLASS_plugin()
+  {
   }
 
-  inline double get_kmin(void) const { return std::exp(tab_lnk_[0])/h_; }
-  inline double get_kmax(void) const { return std::exp(tab_lnk_[tab_lnk_.size()-1])/h_; }
+  inline double compute(double k, tf_type type) const
+  {
+    k *= h_;
+
+    if (k < kmin_ || k > kmax_)
+    {
+      return 0.0;
+    }
+
+    real_t val(0.0);
+    switch (type)
+    {
+      // values at ztarget:
+    case total:
+      val = delta_m_(k); break;
+    case cdm:
+      val = delta_c_(k); break;
+    case baryon:
+      val = delta_b_(k); break;
+    case vtotal:
+      val = theta_m_(k); break;
+    case vcdm:
+      val = theta_c_(k); break;
+    case vbaryon:
+      val = theta_b_(k); break;
+
+      // values at zstart:
+    case total0:
+      val = delta_m0_(k); break;
+    case cdm0:
+      val = delta_c0_(k); break;
+    case baryon0:
+      val = delta_b0_(k); break;
+    case vtotal0:
+      val = theta_m0_(k); break;
+    case vcdm0:
+      val = theta_c0_(k); break;
+    case vbaryon0:
+      val = theta_b0_(k); break;
+    default:
+      throw std::runtime_error("Invalid type requested in transfer function evaluation");
+    }
+    return val * tnorm_;
+  }
+
+  inline double get_kmin(void) const { return kmin_ / h_; }
+  inline double get_kmax(void) const { return kmax_ / h_; }
 };
 
-namespace {
+namespace
+{
 TransferFunction_plugin_creator_concrete<transfer_CLASS_plugin> creator("CLASS");
 }
 

From b8b9db3b999e6e4bb9d0be1327ed31b31eed3b1e Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 19:57:41 +0200
Subject: [PATCH 091/130] made precision switchable from makefile

---
 CMakeLists.txt         |  96 ++++++++++++++++++++++++----
 include/convolution.hh |  16 +++--
 include/general.hh     |  11 +++-
 include/grid_fft.hh    |  89 +++++++++++++++-----------
 src/grid_fft.cc        | 142 ++++++++++++++++++++++-------------------
 5 files changed, 227 insertions(+), 127 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a4eab8b..d381d7d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,16 +1,34 @@
 cmake_minimum_required(VERSION 3.9)
 set(PRGNAME monofonIC)
-project(monofonIC)
 
+project(monofonIC C CXX)
+
+#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -Wall -pedantic" CACHE STRING "Flags used by the compiler during Release builds." FORCE)
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -march=native -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE)
+set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE)
+set(CMAKE_CXX_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address " CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE)
+set(CMAKE_CXX_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE)
+
+set(default_build_type "Release")
+if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
+  message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
+  set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
+      STRING "Choose the type of build." FORCE)
+  # Set the possible values of build type for cmake-gui
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
+    "Debug" "Release" "RelWithDebInfo" "DebugSanAdd" "DebugSanUndef")
+endif()
+mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT)
+
+
+########################################################################################################################
 # include class submodule
 include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake)
 
-# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic")
 find_package(PkgConfig REQUIRED)
 
-set(CMAKE_MODULE_PATH
-        "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
+set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
 
 
 ########################################################################################################################
@@ -48,6 +66,16 @@ if(ENABLE_MPI)
   endif(MPI_CXX_FOUND)
 endif(ENABLE_MPI)
 
+########################################################################################################################
+# floating point precision
+set (
+  CODE_PRECISION "DOUBLE"
+  CACHE STRING "Floating point type used for internal computations and FFTs"
+)
+set_property (
+  CACHE CODE_PRECISION
+  PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE
+)
 
 ########################################################################################################################
 # FFTW
@@ -55,18 +83,25 @@ if(POLICY CMP0074)
     cmake_policy(SET CMP0074 NEW)
 endif()
 if(ENABLE_MPI)
-  find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI)
+  find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS MPI)
 else()
-  find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS)
+  find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS)
 endif(ENABLE_MPI)
+mark_as_advanced(FFTW3_SINGLE_MPI_LIBRARY FFTW3_SINGLE_OPENMP_LIBRARY FFTW3_SINGLE_SERIAL_LIBRARY FFTW3_SINGLE_THREADS_LIBRARY)
+mark_as_advanced(FFTW3_DOUBLE_MPI_LIBRARY FFTW3_DOUBLE_OPENMP_LIBRARY FFTW3_DOUBLE_SERIAL_LIBRARY FFTW3_DOUBLE_THREADS_LIBRARY)
+mark_as_advanced(FFTW3_LONGDOUBLE_MPI_LIBRARY FFTW3_LONGDOUBLE_OPENMP_LIBRARY FFTW3_LONGDOUBLE_SERIAL_LIBRARY FFTW3_LONGDOUBLE_THREADS_LIBRARY)
+mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_MPI_INCLUDE_DIR)
+mark_as_advanced(pkgcfg_lib_PC_FFTW_fftw3)
 
 ########################################################################################################################
 # GSL
 find_package(GSL REQUIRED)
+mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m)
 
 ########################################################################################################################
 # HDF5
 find_package(HDF5 REQUIRED)
+mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz)
 
 ########################################################################################################################
 # INCLUDES
@@ -86,28 +121,61 @@ file( GLOB PLUGINS
   ${PROJECT_SOURCE_DIR}/src/plugins/*.cc
 )
 
+# project configuration header
+configure_file(
+  ${PROJECT_SOURCE_DIR}/include/cmake_config.hh.in
+  ${PROJECT_SOURCE_DIR}/include/cmake_config.hh
+)
+
 add_executable(${PRGNAME} ${SOURCES} ${PLUGINS})
 target_setup_class(${PRGNAME})
 
 set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14)
 
+
 # mpi flags
 if(MPI_CXX_FOUND)
-  if(FFTW3_DOUBLE_MPI_FOUND)
-    target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY})
-    target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
-    target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
-  endif(FFTW3_DOUBLE_MPI_FOUND)
+  if(CODE_PRECISION STREQUAL "FLOAT")
+    if(FFTW3_SINGLE_MPI_FOUND)
+      target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_MPI_LIBRARY})
+      target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
+      target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
+    else()
+      message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for single precision!")
+    endif()
+  elseif(CODE_PRECISION STREQUAL "DOUBLE")
+    if(FFTW3_DOUBLE_MPI_FOUND)
+      target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY})
+      target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
+      target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
+    else()
+      message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for double precision!")
+    endif()
+  elseif(CODE_PRECISION STREQUAL "LONGDOUBLE")
+    if(FFTW3_LONGDOUBLE_MPI_FOUND)
+      target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_MPI_LIBRARY})
+      target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
+      target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
+    else()
+      message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for long double precision!")
+    endif()
+  endif()
 
   target_include_directories(${PRGNAME} PRIVATE ${MPI_CXX_INCLUDE_PATH})
   target_compile_options(${PRGNAME} PRIVATE "-DUSE_MPI")
   target_link_libraries(${PRGNAME} ${MPI_LIBRARIES})
 endif(MPI_CXX_FOUND)
 
-if(FFTW3_DOUBLE_THREADS_FOUND) 
+if(CODE_PRECISION STREQUAL "FLOAT" AND FFTW3_SINGLE_THREADS_FOUND) 
+  target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_THREADS_LIBRARY})
+  target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
+elseif(CODE_PRECISION STREQUAL "DOUBLE" AND FFTW3_DOUBLE_THREADS_FOUND) 
   target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_THREADS_LIBRARY})
   target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
-endif(FFTW3_DOUBLE_THREADS_FOUND)
+elseif(CODE_PRECISION STREQUAL "LONGDOUBLE" AND FFTW3_LONGDOUBLE_THREADS_FOUND) 
+  target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_THREADS_LIBRARY})
+  target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
+endif()
 
 if(HDF5_FOUND)
   # target_link_libraries(${PRGNAME} ${HDF5_C_LIBRARY_DIRS})
diff --git a/include/convolution.hh b/include/convolution.hh
index a1fc1e3..238717b 100644
--- a/include/convolution.hh
+++ b/include/convolution.hh
@@ -415,12 +415,12 @@ private:
     {
         assert(fp.space_ == kspace_id);
 
-        const double rfac = std::pow(1.5, 1.5);
+        const real_t rfac = std::pow(1.5, 1.5);
 
         fp.zero();
 
 #if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
-        size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
+        const size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
 
 #pragma omp parallel for
         for (size_t i = 0; i < 2 * fp.size(0) / 3; ++i)
@@ -460,7 +460,10 @@ private:
         size_t slicesz = fbuf_->size(1) * fbuf_->size(3);
 
         MPI_Datatype datatype =
-            (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
+            (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX 
+            : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX 
+            : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX
+            : MPI_BYTE;
 
         // fill MPI send buffer with results of kfunc
 
@@ -596,7 +599,7 @@ private:
     template <typename operator_t>
     void unpad(const Grid_FFT<data_t> &fp, operator_t output_op)
     {
-        const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
+        const real_t rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
 
         // make sure we're in Fourier space...
         assert(fp.space_ == kspace_id);
@@ -645,7 +648,10 @@ private:
         size_t slicesz = fp.size(1) * fp.size(3);
 
         MPI_Datatype datatype =
-            (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
+            (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX 
+            : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX 
+            : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX 
+            : MPI_BYTE;
 
         MPI_Status status;
 
diff --git a/include/general.hh b/include/general.hh
index 7334579..f4395bb 100644
--- a/include/general.hh
+++ b/include/general.hh
@@ -16,14 +16,21 @@
 
 #define _unused(x) ((void)(x))
 
-#ifdef USE_SINGLEPRECISION
+// include CMake controlled configuration settings
+#include <cmake_config.hh>
+
+#if defined(USE_PRECISION_FLOAT)
 using real_t = float;
 using complex_t = fftwf_complex;
 #define FFTW_PREFIX fftwf
-#else
+#elif defined(USE_PRECISION_DOUBLE)
 using real_t = double;
 using complex_t = fftw_complex;
 #define FFTW_PREFIX fftw
+#elif defined(USE_PRECISION_LONGDOUBLE)
+using real_t = long double;
+using complex_t = fftwl_complex;
+#define FFTW_PREFIX fftwl
 #endif
 
 enum class fluid_component
diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index f460297..2170dc8 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -26,10 +26,12 @@ class Grid_FFT
 protected:
 #if defined(USE_MPI)
     const MPI_Datatype MPI_data_t_type = 
-        (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
-        : (typeid(data_t) == typeid(float)) ? MPI_FLOAT
-        : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
-        : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX 
+        (typeid(data_t) == typeid(float)) ? MPI_FLOAT
+        : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
+        : (typeid(data_t) == typeid(long double)) ? MPI_LONG_DOUBLE
+        : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_C_FLOAT_COMPLEX
+        : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_C_DOUBLE_COMPLEX 
+        : (typeid(data_t) == typeid(std::complex<long double>)) ? MPI_C_LONG_DOUBLE_COMPLEX 
         : MPI_INT;
 #endif
     using grid_fft_t = Grid_FFT<data_t,bdistributed>;
@@ -73,30 +75,30 @@ public:
 
     const grid_fft_t *get_grid(size_t ilevel) const { return this; }
 
-    bool is_distributed( void ) const { return bdistributed; }
+    bool is_distributed( void ) const noexcept { return bdistributed; }
 
     void Setup();
 
     //! return the number of data_t elements that we store in the container
-    size_t memsize( void ) const { return ntot_; }
+    size_t memsize( void ) const noexcept { return ntot_; }
 
     //! return the (local) size of dimension i
-    size_t size(size_t i) const { return sizes_[i]; }
+    size_t size(size_t i) const noexcept { assert(i<4); return sizes_[i]; }
 
     //! return the (global) size of dimension i
-    size_t global_size(size_t i) const { return n_[i]; }
+    size_t global_size(size_t i) const noexcept { assert(i<3); return n_[i]; }
 
     //! return locally stored number of elements of field
-    size_t local_size(void) const { return local_0_size_ * n_[1] * n_[2]; }
+    size_t local_size(void) const noexcept { return local_0_size_ * n_[1] * n_[2]; }
 
     //! return a bounding box of the global extent of the field
-    const bounding_box<size_t> &get_global_range(void) const
+    const bounding_box<size_t> &get_global_range(void) const noexcept
     {
         return global_range_;
     }
 
     //! set all field elements to zero
-    void zero()
+    void zero() noexcept
     {
 #pragma omp parallel for
         for (size_t i = 0; i < ntot_; ++i)
@@ -125,47 +127,47 @@ public:
             data_[i] = g.data_[i];
     }
 
-    data_t &operator[](size_t i)
+    data_t &operator[](size_t i) noexcept
     {
         return data_[i];
     }
 
-    data_t &relem(size_t i, size_t j, size_t k)
+    data_t &relem(size_t i, size_t j, size_t k) noexcept 
     {
         size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
         return data_[idx];
     }
 
-    const data_t &relem(size_t i, size_t j, size_t k) const
+    const data_t &relem(size_t i, size_t j, size_t k) const noexcept
     {
         size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
         return data_[idx];
     }
 
-    ccomplex_t &kelem(size_t i, size_t j, size_t k)
+    ccomplex_t &kelem(size_t i, size_t j, size_t k) noexcept
     {
         size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
         return cdata_[idx];
     }
 
-    const ccomplex_t &kelem(size_t i, size_t j, size_t k) const
+    const ccomplex_t &kelem(size_t i, size_t j, size_t k) const noexcept
     {
         size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
         return cdata_[idx];
     }
 
-    ccomplex_t &kelem(size_t idx) { return cdata_[idx]; }
-    const ccomplex_t &kelem(size_t idx) const { return cdata_[idx]; }
-    data_t &relem(size_t idx) { return data_[idx]; }
-    const data_t &relem(size_t idx) const { return data_[idx]; }
+    ccomplex_t &kelem(size_t idx) noexcept { return cdata_[idx]; }
+    const ccomplex_t &kelem(size_t idx) const noexcept { return cdata_[idx]; }
+    data_t &relem(size_t idx) noexcept { return data_[idx]; }
+    const data_t &relem(size_t idx) const noexcept { return data_[idx]; }
 
-    size_t get_idx(size_t i, size_t j, size_t k) const
+    size_t get_idx(size_t i, size_t j, size_t k) const noexcept
     {
         return (i * sizes_[1] + j) * sizes_[3] + k;
     }
 
     template <typename ft>
-    vec3_t<ft> get_r(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_r(const size_t i, const size_t j, const size_t k) const noexcept
     {
         vec3_t<ft> rr;
 
@@ -177,7 +179,7 @@ public:
     }
 
     template <typename ft>
-    vec3_t<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const noexcept
     {
         vec3_t<ft> rr;
 
@@ -189,7 +191,7 @@ public:
     }
 
     template <typename ft>
-    vec3_t<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t<real_t> s) const
+    vec3_t<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t<real_t> s) const noexcept
     {
         vec3_t<ft> rr;
 
@@ -200,33 +202,35 @@ public:
         return rr;
     }
 
-    vec3_t<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const
+    vec3_t<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const noexcept
     {
         return vec3_t<size_t>({i + local_0_start_, j, k});
     }
 
-    size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const
+    size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const noexcept
     {
         return ((i + local_0_start_) * size(1) + j) * size(2) + k;
     }
 
-    size_t count_leaf_cells(int, int) const
+    //! deprecated function, was needed for old output plugin
+    size_t count_leaf_cells(int, int) const noexcept
     {
         return n_[0] * n_[1] * n_[2];
     }
 
-    real_t get_dx(int idim) const
+    real_t get_dx(int idim) const noexcept
     {
+        assert(idim<3&&idim>=0);
         return dx_[idim];
     }
 
-    const std::array<real_t, 3> &get_dx(void) const
+    const std::array<real_t, 3> &get_dx(void) const noexcept
     {
         return dx_;
     }
 
     template <typename ft>
-    vec3_t<ft> get_k(const size_t i, const size_t j, const size_t k) const
+    vec3_t<ft> get_k(const size_t i, const size_t j, const size_t k) const noexcept
     {
         vec3_t<ft> kk;
         if( bdistributed ){
@@ -243,7 +247,7 @@ public:
     }
 
     template <typename ft>
-    vec3_t<ft> get_k(const real_t i, const real_t j, const real_t k) const
+    vec3_t<ft> get_k(const real_t i, const real_t j, const real_t k) const noexcept
     {
         vec3_t<ft> kk;
         if( bdistributed ){
@@ -259,12 +263,13 @@ public:
         return kk;
     }
 
-    std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const
+    std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const noexcept
     {
         return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
     }
 
-    data_t get_cic( const vec3_t<real_t>& v ) const{
+    data_t get_cic( const vec3_t<real_t>& v ) const noexcept
+    {
         // warning! this doesn't work with MPI
         vec3_t<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
                         std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
@@ -290,7 +295,8 @@ public:
         return val;
     }
 
-    ccomplex_t get_cic_kspace( const vec3_t<real_t> x ) const{
+    ccomplex_t get_cic_kspace( const vec3_t<real_t> x ) const noexcept
+    {
         // warning! this doesn't work with MPI
         int ix = static_cast<int>(std::floor(x.x));
         int iy = static_cast<int>(std::floor(x.y));
@@ -328,6 +334,11 @@ public:
         return ccomplex_t(0.0,rgrad);
     }
 
+    inline real_t laplacian( const std::array<size_t,3>& ijk ) const noexcept
+    {
+        return -this->get_k<real_t>(ijk[0],ijk[1],ijk[2]).norm_squared();
+    }
+
     grid_fft_t &operator*=(data_t x)
     {
         if (space_ == kspace_id)
@@ -421,7 +432,7 @@ public:
         }
     }
 
-    double compute_2norm(void)
+    real_t compute_2norm(void) const
     {
         real_t sum1{0.0};
         #pragma omp parallel for reduction(+ : sum1)
@@ -443,7 +454,7 @@ public:
         return sum1;
     }
 
-    double std(void)
+    real_t std(void) const
     {
         double sum1{0.0}, sum2{0.0};
         size_t count{0};
@@ -488,10 +499,10 @@ public:
         sum1 /= count;
         sum2 /= count;
 
-        return std::sqrt(sum2 - sum1 * sum1);
+        return real_t(std::sqrt(sum2 - sum1 * sum1));
     }
 
-    double mean(void)
+    real_t mean(void) const
     {
         double sum1{0.0};
         size_t count{0};
@@ -530,7 +541,7 @@ public:
 
         sum1 /= count;
 
-        return sum1;
+        return real_t(sum1);
     }
 
     template <typename functional, typename grid_t>
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index 4905cb1..2b595b8 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -2,10 +2,11 @@
 #include <grid_fft.hh>
 #include <thread>
 
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::Setup(void)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Setup(void)
 {
-    if( !bdistributed ){
+    if (!bdistributed)
+    {
         ntot_ = (n_[2] + 2) * n_[1] * n_[0];
 
         csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
@@ -30,7 +31,7 @@ void Grid_FFT<data_t,bdistributed>::Setup(void)
             csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
         }
 
-        fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
+        fft_norm_fac_ = 1.0 / std::sqrt((real_t)((size_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]));
 
         if (typeid(data_t) == typeid(real_t))
         {
@@ -81,26 +82,26 @@ void Grid_FFT<data_t,bdistributed>::Setup(void)
         if (typeid(data_t) == typeid(real_t))
         {
             cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
-                                                            &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
+                                                             &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
             ntot_ = 2 * cmplxsz;
             data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
             cdata_ = reinterpret_cast<ccomplex_t *>(data_);
             plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
-                                                MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
+                                                  MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
             iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
-                                                MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
+                                                   MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
         }
         else if (typeid(data_t) == typeid(ccomplex_t))
         {
             cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
-                                                            &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
+                                                             &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
             ntot_ = cmplxsz;
             data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
             cdata_ = reinterpret_cast<ccomplex_t *>(data_);
             plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
-                                            MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
+                                              MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
             iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
-                                            MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
+                                               MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
         }
         else
         {
@@ -109,7 +110,8 @@ void Grid_FFT<data_t,bdistributed>::Setup(void)
         }
 
         csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
-        fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
+
+        fft_norm_fac_ = 1.0 / sqrt((real_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]);
 
         if (typeid(data_t) == typeid(real_t))
         {
@@ -155,16 +157,16 @@ void Grid_FFT<data_t,bdistributed>::Setup(void)
     }
 }
 
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::ApplyNorm(void)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::ApplyNorm(void)
 {
 #pragma omp parallel for
     for (size_t i = 0; i < ntot_; ++i)
         data_[i] *= fft_norm_fac_;
 }
 
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::FourierTransformForward(bool do_transform)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::FourierTransformForward(bool do_transform)
 {
 #if defined(USE_MPI)
     MPI_Barrier(MPI_COMM_WORLD);
@@ -195,8 +197,8 @@ void Grid_FFT<data_t,bdistributed>::FourierTransformForward(bool do_transform)
     }
 }
 
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::FourierTransformBackward(bool do_transform)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::FourierTransformBackward(bool do_transform)
 {
 #if defined(USE_MPI)
     MPI_Barrier(MPI_COMM_WORLD);
@@ -210,8 +212,7 @@ void Grid_FFT<data_t,bdistributed>::FourierTransformBackward(bool do_transform)
             csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]);
             double wtime = get_wtime();
 
-            FFTW_API(execute)
-            (iplan_);
+            FFTW_API(execute)(iplan_);
             this->ApplyNorm();
 
             wtime = get_wtime() - wtime;
@@ -262,6 +263,9 @@ hid_t hdf5_get_data_type(void)
 
     if (typeid(T) == typeid(double))
         return H5T_NATIVE_DOUBLE;
+    
+    if (typeid(T) == typeid(long double))
+        return H5T_NATIVE_LDOUBLE;
 
     if (typeid(T) == typeid(long long))
         return H5T_NATIVE_LLONG;
@@ -276,10 +280,11 @@ hid_t hdf5_get_data_type(void)
     return -1;
 }
 
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, const std::string ObjName)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename, const std::string ObjName)
 {
-    if( bdistributed ){
+    if (bdistributed)
+    {
         csoca::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl;
         abort();
     }
@@ -354,10 +359,11 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
     H5Dclose(HDF_DatasetID);
     H5Fclose(HDF_FileID);
 
-    assert( dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2] );
+    assert(dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2]);
     csoca::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl;
 
-    for( size_t i=0; i<3; ++i ) this->n_[i] = dimsize[i];
+    for (size_t i = 0; i < 3; ++i)
+        this->n_[i] = dimsize[i];
     this->space_ = rspace_id;
 
     if (data_ != nullptr)
@@ -365,47 +371,47 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
         fftw_free(data_);
     }
     this->Setup();
-    
 
     //... copy data to internal array ...
-    double sum1{0.0}, sum2{0.0};
-    #pragma omp parallel for reduction(+:sum1,sum2)
+    real_t sum1{0.0}, sum2{0.0};
+    #pragma omp parallel for reduction(+ : sum1, sum2)
     for (size_t i = 0; i < size(0); ++i)
     {
         for (size_t j = 0; j < size(1); ++j)
         {
             for (size_t k = 0; k < size(2); ++k)
             {
-                this->relem(i,j,k) = Data[ (i*size(1) + j)*size(2)+k ];
-                sum2 += std::real(this->relem(i,j,k)*this->relem(i,j,k));
-                sum1 += std::real(this->relem(i,j,k));
+                this->relem(i, j, k) = Data[(i * size(1) + j) * size(2) + k];
+                sum2 += std::real(this->relem(i, j, k) * this->relem(i, j, k));
+                sum1 += std::real(this->relem(i, j, k));
             }
         }
     }
     sum1 /= Data.size();
     sum2 /= Data.size();
-    auto stdw = std::sqrt(sum2-sum1*sum1);
+    auto stdw = std::sqrt(sum2 - sum1 * sum1);
     csoca::ilog << "Constraint field has <W>=" << sum1 << ", <W^2>-<W>^2=" << stdw << std::endl;
 
-    #pragma omp parallel for reduction(+:sum1,sum2)
+    #pragma omp parallel for reduction(+ : sum1, sum2)
     for (size_t i = 0; i < size(0); ++i)
     {
         for (size_t j = 0; j < size(1); ++j)
         {
             for (size_t k = 0; k < size(2); ++k)
             {
-                this->relem(i,j,k) /= stdw;
+                this->relem(i, j, k) /= stdw;
             }
         }
     }
 }
 
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
 {
     // FIXME: cleanup duplicate code in this function!
-    if( !bdistributed && CONFIG::MPI_task_rank==0 ){
-        
+    if (!bdistributed && CONFIG::MPI_task_rank == 0)
+    {
+
         hid_t file_id, dset_id;    /* file and dataset identifiers */
         hid_t filespace, memspace; /* file and memory dataspace identifiers */
         hsize_t offset[3], count[3];
@@ -419,23 +425,23 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
 
         for (int i = 0; i < 3; ++i)
             count[i] = size(i);
-        
+
         if (typeid(data_t) == typeid(float))
             dtype_id = H5T_NATIVE_FLOAT;
         else if (typeid(data_t) == typeid(double))
             dtype_id = H5T_NATIVE_DOUBLE;
+        else if (typeid(data_t) == typeid(long double))
+            dtype_id = H5T_NATIVE_LDOUBLE;    
         else if (typeid(data_t) == typeid(std::complex<float>))
-        {
             dtype_id = H5T_NATIVE_FLOAT;
-        }
         else if (typeid(data_t) == typeid(std::complex<double>))
-        {
             dtype_id = H5T_NATIVE_DOUBLE;
-        }
+        else if (typeid(data_t) == typeid(std::complex<long double>))
+            dtype_id = H5T_NATIVE_LDOUBLE;
 
         filespace = H5Screate_simple(3, count, NULL);
         dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
-                            H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+                             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
         H5Sclose(filespace);
 
         hsize_t slice_sz = size(1) * size(2);
@@ -459,7 +465,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
             {
                 for (size_t k = 0; k < size(2); ++k)
                 {
-                    if( this->space_ == rspace_id )
+                    if (this->space_ == rspace_id)
                         buf[j * size(2) + k] = std::real(relem(i, j, k));
                     else
                         buf[j * size(2) + k] = std::real(kelem(i, j, k));
@@ -478,7 +484,8 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
 
         if (typeid(data_t) == typeid(std::complex<float>) ||
             typeid(data_t) == typeid(std::complex<double>) ||
-            this->space_ == kspace_id )
+            typeid(data_t) == typeid(std::complex<long double>) ||
+            this->space_ == kspace_id)
         {
             datasetname += std::string(".im");
 
@@ -487,7 +494,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
 
             filespace = H5Screate_simple(3, count, NULL);
             dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
-                                H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+                                 H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
             H5Sclose(filespace);
 
             count[0] = 1;
@@ -499,7 +506,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
                 for (size_t j = 0; j < size(1); ++j)
                     for (size_t k = 0; k < size(2); ++k)
                     {
-                        if( this->space_ == rspace_id )
+                        if (this->space_ == rspace_id)
                             buf[j * size(2) + k] = std::imag(relem(i, j, k));
                         else
                             buf[j * size(2) + k] = std::imag(kelem(i, j, k));
@@ -526,7 +533,8 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
         return;
     }
 
-    if( !bdistributed && CONFIG::MPI_task_rank!=0 ) return;
+    if (!bdistributed && CONFIG::MPI_task_rank != 0)
+        return;
 
     hid_t file_id, dset_id;    /* file and dataset identifiers */
     hid_t filespace, memspace; /* file and memory dataspace identifiers */
@@ -534,7 +542,6 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
     hid_t dtype_id = H5T_NATIVE_FLOAT;
     hid_t plist_id;
 
-
 #if defined(USE_MPI)
 
     int mpi_size, mpi_rank;
@@ -586,14 +593,14 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
             dtype_id = H5T_NATIVE_FLOAT;
         else if (typeid(data_t) == typeid(double))
             dtype_id = H5T_NATIVE_DOUBLE;
+        else if (typeid(data_t) == typeid(long double))
+            dtype_id = H5T_NATIVE_LDOUBLE;
         else if (typeid(data_t) == typeid(std::complex<float>))
-        {
             dtype_id = H5T_NATIVE_FLOAT;
-        }
         else if (typeid(data_t) == typeid(std::complex<double>))
-        {
             dtype_id = H5T_NATIVE_DOUBLE;
-        }
+        else if (typeid(data_t) == typeid(std::complex<long double>))
+            dtype_id = H5T_NATIVE_LDOUBLE;
 
 #if defined(USE_MPI) && !defined(USE_MPI_IO)
         if (itask == 0)
@@ -648,7 +655,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
             {
                 for (size_t k = 0; k < size(2); ++k)
                 {
-                    if( this->space_ == rspace_id )
+                    if (this->space_ == rspace_id)
                         buf[j * size(2) + k] = std::real(relem(i, j, k));
                     else
                         buf[j * size(2) + k] = std::real(kelem(i, j, k));
@@ -671,7 +678,8 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
 
         if (typeid(data_t) == typeid(std::complex<float>) ||
             typeid(data_t) == typeid(std::complex<double>) ||
-            this->space_ == kspace_id )
+            typeid(data_t) == typeid(std::complex<long double>) ||
+            this->space_ == kspace_id)
         {
             datasetname += std::string(".im");
 
@@ -721,7 +729,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
                 for (size_t j = 0; j < size(1); ++j)
                     for (size_t k = 0; k < size(2); ++k)
                     {
-                        if( this->space_ == rspace_id )
+                        if (this->space_ == rspace_id)
                             buf[j * size(2) + k] = std::imag(relem(i, j, k));
                         else
                             buf[j * size(2) + k] = std::imag(kelem(i, j, k));
@@ -757,8 +765,8 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
 
 #include <iomanip>
 
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
 {
     double logvmin = std::log10(vmin);
     double logvmax = std::log10(vmax);
@@ -809,12 +817,12 @@ void Grid_FFT<data_t,bdistributed>::Write_PDF(std::string ofname, int nbins, dou
 #endif
 }
 
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::Write_PowerSpectrum(std::string ofname)
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Write_PowerSpectrum(std::string ofname)
 {
     std::vector<double> bin_k, bin_P, bin_eP;
     std::vector<size_t> bin_count;
-    this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count );
+    this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count);
 #if defined(USE_MPI)
     if (CONFIG::MPI_task_rank == 0)
     {
@@ -839,8 +847,8 @@ void Grid_FFT<data_t,bdistributed>::Write_PowerSpectrum(std::string ofname)
 #endif
 }
 
-template <typename data_t,bool bdistributed>
-void Grid_FFT<data_t,bdistributed>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
+template <typename data_t, bool bdistributed>
+void Grid_FFT<data_t, bdistributed>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count)
 {
     this->FourierTransformForward();
 
@@ -920,7 +928,7 @@ void Grid_FFT<data_t,bdistributed>::Compute_PowerSpectrum(std::vector<double> &b
 
 /********************************************************************************************/
 
-template class Grid_FFT<real_t,true>;
-template class Grid_FFT<real_t,false>;
-template class Grid_FFT<ccomplex_t,true>;
-template class Grid_FFT<ccomplex_t,false>;
+template class Grid_FFT<real_t, true>;
+template class Grid_FFT<real_t, false>;
+template class Grid_FFT<ccomplex_t, true>;
+template class Grid_FFT<ccomplex_t, false>;

From 6dabf65ab20986bfd680b659bea64fde6c5d177a Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 19:58:34 +0200
Subject: [PATCH 092/130] added template cmake_config file that was forgotten
 in previous commit

---
 include/cmake_config.hh.in | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 include/cmake_config.hh.in

diff --git a/include/cmake_config.hh.in b/include/cmake_config.hh.in
new file mode 100644
index 0000000..b280f44
--- /dev/null
+++ b/include/cmake_config.hh.in
@@ -0,0 +1,12 @@
+#pragma once
+
+#define USE_PRECISION_${CODE_PRECISION}
+constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}";
+
+#if defined(USE_PRECISION_FLOAT)
+  constexpr char CMAKE_PRECISION_STR[] = "single";
+#elif defined(USE_PRECISION_DOUBLE)
+  constexpr char CMAKE_PRECISION_STR[] = "double";
+#elif defined(USE_PRECISION_LONGDOUBLE)
+  constexpr char CMAKE_PRECISION_STR[] = "long double";
+#endif 
\ No newline at end of file

From 0678489386e689afabaafa13ab56c8220dabf938 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 19:58:57 +0200
Subject: [PATCH 093/130] fixed some nyquist plane errors in convolutions

---
 include/convolution.hh | 82 ++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 43 deletions(-)

diff --git a/include/convolution.hh b/include/convolution.hh
index 238717b..fc0b9b4 100644
--- a/include/convolution.hh
+++ b/include/convolution.hh
@@ -429,10 +429,9 @@ private:
             for (size_t j = 0; j < 2 * fp.size(1) / 3; ++j)
             {
                 size_t jp = (j > nhalf[1]) ? j + nhalf[1] : j;
-                for (size_t k = 0; k < 2 * fp.size(2) / 3; ++k)
+                for (size_t k = 0; k < nhalf[2]+1; ++k)
                 {
                     size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k;
-                    // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
                     fp.kelem(ip, jp, kp) = kfunc(i, j, k) * rfac;
                 }
             }
@@ -618,8 +617,11 @@ private:
                 for (size_t k = 0; k < fbuf_->size(2); ++k)
                 {
                     size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k;
-                    // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue;
                     fbuf_->kelem(i, j, k) = fp.kelem(ip, jp, kp) / rfac;
+                    // zero Nyquist modes since they are not unique after convolution
+                    if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]){
+                        fbuf_->kelem(i, j, k) = 0.0; 
+                    }
                 }
             }
         }
@@ -691,7 +693,7 @@ private:
             int recvfrom = 0;
             if (iglobal <= fny[0])
             {
-                real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0;
+                real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0;
 
                 recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size);
                 MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal,
@@ -699,7 +701,7 @@ private:
 
                 for (size_t j = 0; j < nf[1]; ++j)
                 {
-                    real_t wj = (j == fny[1]) ? 0.5 : 1.0;
+                    real_t wj = (j == fny[1]) ? 0.0 : 1.0;
                     if (j <= fny[1])
                     {
                         size_t jp = j;
@@ -707,21 +709,22 @@ private:
                         {
                             if (typeid(data_t) == typeid(real_t))
                             {
-                                real_t w = wi * wj;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                                real_t w = wi * wj * wk;
                                 fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                             }
                             else
                             {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                                 real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
-                                {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
+                                // if (w < 1.0)
+                                // {
+                                //     fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
+                                // }
                             }
                         }
                     }
@@ -732,21 +735,22 @@ private:
                         {
                             if (typeid(data_t) == typeid(real_t))
                             {
-                                real_t w = wi * wj;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                                real_t w = wi * wj * wk;
                                 fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                             }
                             else
                             {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
                                 real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
-                                {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
+                                // if (w < 1.0)
+                                // {
+                                //     fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
+                                // }
                             }
                         }
                     }
@@ -754,7 +758,7 @@ private:
             }
             if (iglobal >= fny[0])
             {
-                real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0;
+                real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0;
 
                 recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size);
                 MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom,
@@ -762,29 +766,26 @@ private:
 
                 for (size_t j = 0; j < nf[1]; ++j)
                 {
-                    real_t wj = (j == fny[1]) ? 0.5 : 1.0;
+                    real_t wj = (j == fny[1]) ? 0.0 : 1.0;
                     if (j <= fny[1])
                     {
                         size_t jp = j;
                         for (size_t k = 0; k < nf[2]; ++k)
                         {
+                            const real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                            const real_t w = wi * wj * wk;
                             if (typeid(data_t) == typeid(real_t))
                             {
-                                real_t w = wi * wj;
+                                real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                                real_t w = wi * wj * wk;
                                 fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                             }
                             else
                             {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
-                                real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
-                                {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
                             }
                         }
                     }
@@ -793,23 +794,18 @@ private:
                         size_t jp = j + fny[1];
                         for (size_t k = 0; k < nf[2]; ++k)
                         {
+                            const real_t wk = (k == fny[2]) ? 0.0 : 1.0;
+                            const real_t w = wi * wj * wk;
                             if (typeid(data_t) == typeid(real_t))
                             {
-                                real_t w = wi * wj;
                                 fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
                             }
                             else
                             {
-                                real_t wk = (k == fny[2]) ? 0.5 : 1.0;
-                                real_t w = wi * wj * wk;
-                                if (k <= fny[2])
+                                if (k < fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac;
-                                if (k >= fny[2])
+                                if (k > fny[2])
                                     fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac;
-                                if (w < 1.0)
-                                {
-                                    fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k));
-                                }
                             }
                         }
                     }
@@ -817,8 +813,8 @@ private:
             }
         }
 
-//... copy data back
-#pragma omp parallel for
+        //... copy data back
+        #pragma omp parallel for
         for (size_t i = 0; i < fbuf_->ntot_; ++i)
         {
             output_op(i, (*fbuf_)[i]);

From 7ead43455c3fd5bdc56c97b2c98020fb6213930e Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Thu, 2 Apr 2020 21:47:17 +0200
Subject: [PATCH 094/130] removed old debugging logging

---
 src/plugins/random_music_wnoise_generator.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc
index a6d4c35..68b4649 100644
--- a/src/plugins/random_music_wnoise_generator.cc
+++ b/src/plugins/random_music_wnoise_generator.cc
@@ -613,7 +613,7 @@ void music_wnoise_generator<T>::register_cube(int i, int j, int k)
     rnums_.push_back(NULL);
     cubemap_[icube] = rnums_.size() - 1;
 #ifdef DEBUG
-    LOGDEBUG("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]);
+    csoca::dlog.Print("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]);
 #endif
   }
 }
@@ -741,8 +741,8 @@ double music_wnoise_generator<T>::fill_subvolume(int *i0, int *n)
   ncube[2] = (int)(n[2] / cubesize_) + 2;
 
 #ifdef DEBUG
-  LOGDEBUG("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]);
-  LOGDEBUG("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]);
+  csoca::dlog.Print("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]);
+  csoca::dlog.Print("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]);
 #endif
 
   double mean = 0.0;

From dc5f87f216a8d00a755bc3d0f6e22eb01d4c73e0 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 3 Apr 2020 00:39:28 +0200
Subject: [PATCH 095/130] added convolver and plt as options to cmake

---
 .gitignore                    |  1 +
 CMakeLists.txt                | 16 ++++++++++++++++
 src/ic_generator.cc           | 10 ++++++++--
 src/main.cc                   | 29 ++++++++++++++++++++---------
 src/plugins/transfer_CLASS.cc |  3 ---
 5 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/.gitignore b/.gitignore
index 60035a0..b012d08 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,4 @@ src/fastLPT
 src/input_powerspec.txt
 src/Makefile
 .DS_Store
+include/cmake_config.hh
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d381d7d..4fedf66 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,6 +77,22 @@ set_property (
   PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE
 )
 
+########################################################################################################################
+# convolver type, right now only orszag or naive
+set (
+  CONVOLVER_TYPE "ORSZAG"
+  CACHE STRING "Convolution algorithm to be used (Naive=no dealiasing, Orszag=dealiased)"
+)
+set_property (
+  CACHE CONVOLVER_TYPE
+  PROPERTY STRINGS ORSZAG NAIVE
+)
+
+########################################################################################################################
+# PLT options, right now only on/off
+option(ENABLE_PLT "Enable PLT (particle linear theory) corrections" OFF)
+
+
 ########################################################################################################################
 # FFTW
 if(POLICY CMP0074)
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index a8f60bd..0a88054 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -266,15 +266,21 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
     // Create convolution class instance for non-linear terms
     //--------------------------------------------------------------------
+#if defined(USE_CONVOLVER_ORSZAG)
     OrszagConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
-    // NaiveConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+#elif defined(USE_CONVOLVER_NAIVE)
+    NaiveConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
+#endif
     //--------------------------------------------------------------------
 
     //--------------------------------------------------------------------
     // Create PLT gradient operator
     //--------------------------------------------------------------------
-    // particle::lattice_gradient lg( the_config );
+#if defined(ENABLE_PLT)
+    particle::lattice_gradient lg( the_config );
+#else
     op::fourier_gradient lg( the_config );
+#endif
 
     //--------------------------------------------------------------------
     std::vector<cosmo_species> species_list;
diff --git a/src/main.cc b/src/main.cc
index 5afc648..140e588 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -67,15 +67,26 @@ int main( int argc, char** argv )
 
     // Ascii ART logo. generated via http://patorjk.com/software/taag/#p=display&f=Nancyj&t=monofonIC
     csoca::ilog << "\n"
-                << " The unigrid version of MUSIC-2        .8888b                   dP  a88888b. \n"
-                << "                                       88   \"                   88 d8\'   `88 \n"
-                << " 88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa  .d8888b. 88d888b. 88 88        \n"
-                << " 88\'`88\'`88 88\'  `88 88\'  `88 88\'  `88 88     88\'  `88 88\'  `88 88 88        \n"
-                << " 88  88  88 88.  .88 88    88 88.  .88 88     88.  .88 88    88 88 Y8.   .88 \n"
-                << " dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl
-                << "Build was compiled on " <<  __DATE__ << " at " <<  __TIME__ << std::endl
-                << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl
-                << "-------------------------------------------------------------------------------\n" << std::endl;
+                << " The unigrid version of MUSIC-2         .8888b                   dP  a88888b. \n"
+                << "                                        88   \"                   88 d8\'   `88 \n"
+                << "  88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa  .d8888b. 88d888b. 88 88        \n"
+                << "  88\'`88\'`88 88\'  `88 88\'  `88 88\'  `88 88     88\'  `88 88\'  `88 88 88        \n"
+                << "  88  88  88 88.  .88 88    88 88.  .88 88     88.  .88 88    88 88 Y8.   .88 \n"
+                << "  dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl;
+
+    // Compilation CMake configuration, time etc info:
+    csoca::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;            
+    
+    // git and versioning info:
+    csoca::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl;
+    
+    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    csoca::ilog << "Compile time options : " << std::endl;
+    csoca::ilog << "                       Precision : " << CMAKE_PRECISION_STR << std::endl;
+    csoca::ilog << "                    Convolutions : " << CMAKE_CONVOLVER_STR << std::endl;
+    csoca::ilog << "                             PLT : " << CMAKE_PLT_STR << std::endl;
+    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+
 
     //------------------------------------------------------------------------------
     // Parse command line options
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index e358fce..484c3d7 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -18,9 +18,6 @@
 #include <transfer_function_plugin.hh>
 #include <interpolate.hh>
 
-#include <gsl/gsl_errno.h>
-#include <gsl/gsl_spline.h>
-
 class transfer_CLASS_plugin : public TransferFunction_plugin
 {
 

From b99597d300931b7de6b599aa15da388e25662690 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 3 Apr 2020 00:54:10 +0200
Subject: [PATCH 096/130] updates fo cmake template header, forgotten in
 previous commit

---
 include/cmake_config.hh.in | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/include/cmake_config.hh.in b/include/cmake_config.hh.in
index b280f44..db4b9a8 100644
--- a/include/cmake_config.hh.in
+++ b/include/cmake_config.hh.in
@@ -1,12 +1,25 @@
 #pragma once
 
-#define USE_PRECISION_${CODE_PRECISION}
 constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}";
 
+#define USE_PRECISION_${CODE_PRECISION}
 #if defined(USE_PRECISION_FLOAT)
   constexpr char CMAKE_PRECISION_STR[] = "single";
 #elif defined(USE_PRECISION_DOUBLE)
   constexpr char CMAKE_PRECISION_STR[] = "double";
 #elif defined(USE_PRECISION_LONGDOUBLE)
   constexpr char CMAKE_PRECISION_STR[] = "long double";
-#endif 
\ No newline at end of file
+#endif 
+
+#define USE_CONVOLVER_${CONVOLVER_TYPE}
+#if defined(USE_CONVOLVER_ORSZAG)
+  constexpr char CMAKE_CONVOLVER_STR[] = "Orszag3/2";
+#elif defined(USE_CONVOLVER_NAIVE)
+  constexpr char CMAKE_CONVOLVER_STR[] = "Aliased";
+#endif
+
+#if defined(ENABLE_PLT)
+  constexpr char CMAKE_PLT_STR[] = "PLT corr. on";
+#else
+  constexpr char CMAKE_PLT_STR[] = "PLT corr. off";
+#endif
\ No newline at end of file

From c9fce7f2108dca0b0aa7d9ca395a55afd4a56758 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Fri, 3 Apr 2020 07:04:41 +0200
Subject: [PATCH 097/130] fixed bug that Omega_k was not set

---
 include/cosmology_parameters.hh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh
index 1af692c..c8796ad 100644
--- a/include/cosmology_parameters.hh
+++ b/include/cosmology_parameters.hh
@@ -38,6 +38,9 @@ struct parameters
         sqrtpnorm, //!< sqrt of power spectrum normalisation factor
         vfact;     //!< velocity<->displacement conversion factor in Zel'dovich approx.
 
+    parameters( const parameters& ) = default;
+    parameters() = delete;
+
     explicit parameters(ConfigFile cf)
     {
         H0 = cf.GetValue<double>("cosmology", "H0");
@@ -77,6 +80,7 @@ struct parameters
 #if 1
         // assume zero curvature, take difference from dark energy
         Omega_DE += 1.0 - Omega_m - Omega_DE - Omega_r;
+        Omega_k  = 0.0;
 #else
         // allow for curvature 
         Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r;
@@ -99,8 +103,5 @@ struct parameters
         vfact = 0.0;
     }
 
-    parameters(void)
-    {
-    }
 };
 } // namespace cosmology
\ No newline at end of file

From 734948c2a1bf5fbeb0ad6becf3ed045abc60471f Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 01:24:05 +0200
Subject: [PATCH 098/130] minor fixes

---
 include/cosmology_calculator.hh |  9 ++++++---
 include/cosmology_parameters.hh | 21 ++++++++++++---------
 include/interpolate.hh          |  8 +++++---
 src/plugins/transfer_CLASS.cc   |  1 +
 4 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index e7d92f9..49ebc62 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -35,7 +35,7 @@ public:
     std::unique_ptr<TransferFunction_plugin> transfer_function_;
 
 private:
-    static constexpr double REL_PRECISION = 1e-9;
+    static constexpr double REL_PRECISION = 1e-10;
     interpolated_function_1d<true,true,false> D_of_a_, f_of_a_, a_of_D_;
     double Dnow_, Dplus_start_, astart_;
 
@@ -123,6 +123,8 @@ private:
     }
 
 public:
+    calculator() = delete;
+    calculator(const calculator& c) = delete;
     //! constructor for a cosmology calculator object
     /*!
 	 * @param acosmo a cosmological parameters structure
@@ -149,7 +151,8 @@ public:
             cosmo_param_.pnorm = this->compute_pnorm_from_sigma8();
         else{
             cosmo_param_.pnorm = 1.0;
-            csoca::ilog << "Measured sigma_8 for given PS normalisation is " << this->compute_sigma8() << std::endl;
+            auto sigma8 = this->compute_sigma8();
+            csoca::ilog << "Measured sigma_8 for given PS normalisation is " <<  sigma8 << std::endl;
         }
         cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm);
 
@@ -256,7 +259,7 @@ public:
      */
     real_t get_vfact(real_t a) const noexcept
     {
-        return a * H_of_a(a) / cosmo_param_.h * this->get_f(a);
+        return f_of_a_(a) * a * H_of_a(a) / cosmo_param_.h;
     }
 
     //! Integrand for the sigma_8 normalization of the power spectrum
diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh
index c8796ad..6a19043 100644
--- a/include/cosmology_parameters.hh
+++ b/include/cosmology_parameters.hh
@@ -38,9 +38,10 @@ struct parameters
         sqrtpnorm, //!< sqrt of power spectrum normalisation factor
         vfact;     //!< velocity<->displacement conversion factor in Zel'dovich approx.
 
-    parameters( const parameters& ) = default;
     parameters() = delete;
-
+    
+    parameters( const parameters& ) = default;
+    
     explicit parameters(ConfigFile cf)
     {
         H0 = cf.GetValue<double>("cosmology", "H0");
@@ -73,10 +74,6 @@ struct parameters
         {
             Omega_r = 0.0;
         }
-        else
-        {
-            csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally. Make sure your sim code supports this..." << std::endl;
-        }
 #if 1
         // assume zero curvature, take difference from dark energy
         Omega_DE += 1.0 - Omega_m - Omega_DE - Omega_r;
@@ -86,6 +83,10 @@ struct parameters
         Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r;
 #endif
 
+        dplus = 0.0;
+        pnorm = 0.0;
+        vfact = 0.0;
+
         csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
         csoca::ilog << "Cosmological parameters are: " << std::endl;
         csoca::ilog << " H0       = " << std::setw(16) << H0          << "sigma_8  = " << std::setw(16) << sigma8 << std::endl;
@@ -98,9 +99,11 @@ struct parameters
         csoca::ilog << " Omega_DE = " << std::setw(16) << Omega_DE    << "nspect   = " << std::setw(16) << nspect << std::endl;
         csoca::ilog << " w0       = " << std::setw(16) << w_0         << "w_a      = " << std::setw(16) << w_a << std::endl;
 
-        dplus = 0.0;
-        pnorm = 0.0;
-        vfact = 0.0;
+        if( Omega_r > 0.0 )
+        {
+            csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally."<< std::endl;
+            csoca::wlog << "Make sure your sim code supports this..." << std::endl;
+        }
     }
 
 };
diff --git a/include/interpolate.hh b/include/interpolate.hh
index cb0ea50..41fe8d4 100644
--- a/include/interpolate.hh
+++ b/include/interpolate.hh
@@ -39,11 +39,12 @@ public:
 
   void set_data(const std::vector<double> &data_x, const std::vector<double> &data_y)
   {
-    assert(data_x_.size() == data_y_.size());
-    assert(!(logx & periodic));
-
     data_x_ = data_x;
     data_y_ = data_y;
+    
+    assert(data_x_.size() == data_y_.size());
+    assert(data_x_.size() > 5);
+    assert(!(logx & periodic));
 
     if (logx) for (auto &d : data_x_) d = std::log(d);
     if (logy) for (auto &d : data_y_) d = std::log(d);
@@ -59,6 +60,7 @@ public:
 
   double operator()(double x) const noexcept
   {
+    assert( isinit_ && !(logx&&x<=0.0) );
     double xa = logx ? std::log(x) : x;
     double y(gsl_spline_eval(gsl_sp_, xa, gsl_ia_));
     return logy ? std::exp(y) : y;
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 484c3d7..281d381 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -50,6 +50,7 @@ private:
     add_class_parameter("z_max_pk", std::max(zstart_, ztarget_) * 1.2); // use 1.2 as safety
     add_class_parameter("P_k_max_h/Mpc", kmax_);
     add_class_parameter("output", "dTk,vTk");
+    add_class_parameter("extra metric transfer functions","yes");
     // add_class_parameter("lensing", "no");
 
     //--- choose gauge ------------------------------------------------

From 4432fbe8fc3d15c07723036dabf53005123594e1 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 06:07:12 +0200
Subject: [PATCH 099/130] added c compiler flags for build types

---
 CMakeLists.txt | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4fedf66..c8cf314 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,9 +6,15 @@ project(monofonIC C CXX)
 #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g  -fsanitize=address")
 set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -Wall -pedantic" CACHE STRING "Flags used by the compiler during Release builds." FORCE)
 set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -march=native -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE)
-set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE)
+set(CMAKE_CXX_FLAGS_DEBUG "-g -O1 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE)
 set(CMAKE_CXX_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address " CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE)
 set(CMAKE_CXX_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE)
+set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "Flags used by the compiler during Release builds." FORCE)
+set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE)
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING "Flags used by the compiler during Debug builds." FORCE)
+set(CMAKE_C_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUGSANADD}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE)
+set(CMAKE_C_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUGSANUNDEF}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE)
+
 
 set(default_build_type "Release")
 if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -19,7 +25,9 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
   set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
     "Debug" "Release" "RelWithDebInfo" "DebugSanAdd" "DebugSanUndef")
 endif()
-mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT)
+mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF)
+mark_as_advanced(CMAKE_C_FLAGS_DEBUGSANADD CMAKE_C_FLAGS_DEBUGSANUNDEF)
+mark_as_advanced(CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT)
 
 
 ########################################################################################################################

From 8c24becc92c73e5381965c29bd3a4692875f841a Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 14:22:59 +0200
Subject: [PATCH 100/130] forgotten file in class.cmake when compiling with
 Makefile

---
 external/class.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/external/class.cmake b/external/class.cmake
index aead59b..a2e5057 100644
--- a/external/class.cmake
+++ b/external/class.cmake
@@ -32,6 +32,7 @@ if(ENABLE_CLASS)
       ${CMAKE_CURRENT_LIST_DIR}/class/build/history.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/hydrogen.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/hyperspherical.o
+      ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/hyrectools.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/input.o
       ${CMAKE_CURRENT_LIST_DIR}/class/build/lensing.o

From 23155153058e5157204de0c01bdc47307a3caee3 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 20:27:51 +0200
Subject: [PATCH 101/130] renaming (csoca->music)

---
 external/class                               |  2 +-
 include/config_file.hh                       | 20 +++---
 include/convolution.hh                       |  8 +--
 include/cosmology_calculator.hh              | 14 ++---
 include/cosmology_parameters.hh              | 20 +++---
 include/logger.hh                            |  4 +-
 include/particle_plt.hh                      |  8 +--
 src/grid_fft.cc                              | 30 ++++-----
 src/ic_generator.cc                          | 64 +++++++++----------
 src/logger.cc                                |  4 +-
 src/main.cc                                  | 66 ++++++++++----------
 src/old/output_gadget2___original.cc         | 62 +++++++++---------
 src/output_plugin.cc                         | 12 ++--
 src/plugins/output_arepo.cc                  |  2 +-
 src/plugins/output_gadget2.cc                |  2 +-
 src/plugins/output_gadget_hdf5.cc            | 43 +++++++------
 src/plugins/output_generic.cc                |  2 +-
 src/plugins/output_grafic2.cc                |  6 +-
 src/plugins/random_music.cc                  | 10 +--
 src/plugins/random_music_wnoise_generator.cc | 38 +++++------
 src/plugins/random_music_wnoise_generator.hh |  6 +-
 src/plugins/transfer_CAMB_file.cc            |  8 +--
 src/plugins/transfer_CLASS.cc                |  8 +--
 src/random_plugin.cc                         | 12 ++--
 src/testing.cc                               |  2 +-
 src/transfer_function_plugin.cc              | 12 ++--
 26 files changed, 234 insertions(+), 231 deletions(-)

diff --git a/external/class b/external/class
index 083efeb..52bc312 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit 083efeb043fca85418c1ea02f062be111b970b28
+Subproject commit 52bc3126fca4415c4f541d47d43ffdb9763e0464
diff --git a/include/config_file.hh b/include/config_file.hh
index b0d6401..4b6f1fc 100644
--- a/include/config_file.hh
+++ b/include/config_file.hh
@@ -66,7 +66,7 @@ public:
 
     if (!ss.eof()) {
       //.. conversion error
-      csoca::elog << "Error: conversion of \'" << ival << "\' failed."
+      music::elog << "Error: conversion of \'" << ival << "\' failed."
                 << std::endl;
       throw ErrInvalidConversion(std::string("invalid conversion to ") +
                                  typeid(out_value).name() + '.');
@@ -80,7 +80,7 @@ public:
     std::ifstream file(FileName.c_str());
 
     if (!file.is_open()){
-      csoca::elog << "Could not open config file \'" << FileName << "\'." << std::endl;
+      music::elog << "Could not open config file \'" << FileName << "\'." << std::endl;
       throw std::runtime_error(
           std::string("Error: Could not open config file \'") + FileName +
           std::string("\'"));
@@ -117,19 +117,19 @@ public:
 
       if ((size_t)posEqual == std::string::npos &&
           (name.size() != 0 || value.size() != 0)) {
-        csoca::wlog << "Ignoring non-assignment in " << FileName << ":"
+        music::wlog << "Ignoring non-assignment in " << FileName << ":"
                   << m_iLine << std::endl;
         continue;
       }
 
       if (name.length() == 0 && value.size() != 0) {
-        csoca::wlog << "Ignoring assignment missing entry name in "
+        music::wlog << "Ignoring assignment missing entry name in "
                   << FileName << ":" << m_iLine << std::endl;
         continue;
       }
 
       if (value.length() == 0 && name.size() != 0) {
-        csoca::wlog << "Empty entry will be ignored in " << FileName << ":"
+        music::wlog << "Empty entry will be ignored in " << FileName << ":"
                   << m_iLine << std::endl;
         continue;
       }
@@ -139,7 +139,7 @@ public:
 
       //.. add key/value pair to hash table ..
       if (m_Items.find(inSection + '/' + name) != m_Items.end()) {
-        csoca::wlog << "Redeclaration overwrites previous value in "
+        music::wlog << "Redeclaration overwrites previous value in "
                   << FileName << ":" << m_iLine << std::endl;
       }
 
@@ -232,7 +232,7 @@ public:
     }
     catch (ErrItemNotFound& e)
     {
-      csoca::elog << e.what() << std::endl;
+      music::elog << e.what() << std::endl;
       throw;
     }
     return r;
@@ -282,11 +282,11 @@ public:
   }
 
   void LogDump(void) {
-    csoca::ilog << "List of all configuration options:" << std::endl;
+    music::ilog << "List of all configuration options:" << std::endl;
     std::map<std::string, std::string>::const_iterator i = m_Items.begin();
     while (i != m_Items.end()) {
       if (i->second.length() > 0)
-        csoca::ilog << std::setw(28) << i->first << " = " << i->second
+        music::ilog << std::setw(28) << i->first << " = " << i->second
                   << std::endl;
       ++i;
     }
@@ -330,7 +330,7 @@ inline bool ConfigFile::GetValue<bool>(std::string const &strSection,
     return true;
   if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0")
     return false;
-  csoca::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl;
+  music::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl;
   throw ErrIllegalIdentifier(std::string("Illegal identifier \'") + r1 +
                              std::string("\' in \'") + strEntry +
                              std::string("\'."));
diff --git a/include/convolution.hh b/include/convolution.hh
index fc0b9b4..90736b1 100644
--- a/include/convolution.hh
+++ b/include/convolution.hh
@@ -444,7 +444,7 @@ private:
         /////////////////////////////////////////////////////////////////////
 
         double tstart = get_wtime();
-        csoca::dlog << "[MPI] Started scatter for convolution" << std::endl;
+        music::dlog << "[MPI] Started scatter for convolution" << std::endl;
 
         //... collect offsets
 
@@ -589,7 +589,7 @@ private:
         // std::cerr << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! <<<<<"
         // << std::endl;  ofs << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed!
         // <<<<<" << std::endl;
-        csoca::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n",
+        music::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n",
                           get_wtime() - tstart);
 
 #endif /// end of ifdef/ifndef USE_MPI ///////////////////////////////////////////////////////////////
@@ -639,7 +639,7 @@ private:
 
         double tstart = get_wtime();
 
-        csoca::dlog << "[MPI] Started gather for convolution";
+        music::dlog << "[MPI] Started gather for convolution";
 
         MPI_Barrier(MPI_COMM_WORLD);
 
@@ -833,7 +833,7 @@ private:
 
         MPI_Barrier(MPI_COMM_WORLD);
 
-        csoca::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart);
+        music::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart);
 
 #endif /// end of ifdef/ifndef USE_MPI //////////////////////////////////////////////////////////////
     }
diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index 49ebc62..04aa2e9 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -57,7 +57,7 @@ private:
         gsl_set_error_handler(NULL);
 
         if (error / result > REL_PRECISION)
-            csoca::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl;
+            music::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl;
 
         return (real_t)result;
     }
@@ -152,17 +152,17 @@ public:
         else{
             cosmo_param_.pnorm = 1.0;
             auto sigma8 = this->compute_sigma8();
-            csoca::ilog << "Measured sigma_8 for given PS normalisation is " <<  sigma8 << std::endl;
+            music::ilog << "Measured sigma_8 for given PS normalisation is " <<  sigma8 << std::endl;
         }
         cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm);
 
-        csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons"
+        music::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons"
                     << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "TF maximum wave number"
+        music::ilog << std::setw(32) << std::left << "TF maximum wave number"
                     << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
 
-        // csoca::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.GetValue<double>("setup","zstart")) ) << std::endl;
-        // csoca::ilog << "pnrom     = " << cosmo_param_.pnorm << std::endl;
+        // music::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.GetValue<double>("setup","zstart")) ) << std::endl;
+        // music::ilog << "pnrom     = " << cosmo_param_.pnorm << std::endl;
     }
 
     ~calculator()
@@ -213,7 +213,7 @@ public:
                     #warning Check whether output is at redshift that is indicated!
             }
         }
-        csoca::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl;
+        music::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl;
     }
 
     const cosmology::parameters &get_parameters(void) const noexcept
diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh
index 6a19043..7168ec9 100644
--- a/include/cosmology_parameters.hh
+++ b/include/cosmology_parameters.hh
@@ -87,22 +87,22 @@ struct parameters
         pnorm = 0.0;
         vfact = 0.0;
 
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << "Cosmological parameters are: " << std::endl;
-        csoca::ilog << " H0       = " << std::setw(16) << H0          << "sigma_8  = " << std::setw(16) << sigma8 << std::endl;
-        csoca::ilog << " Omega_c  = " << std::setw(16) << Omega_m-Omega_b << "Omega_b  = " << std::setw(16) << Omega_b << std::endl;
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << "Cosmological parameters are: " << std::endl;
+        music::ilog << " H0       = " << std::setw(16) << H0          << "sigma_8  = " << std::setw(16) << sigma8 << std::endl;
+        music::ilog << " Omega_c  = " << std::setw(16) << Omega_m-Omega_b << "Omega_b  = " << std::setw(16) << Omega_b << std::endl;
         if (!cf.GetValueSafe<bool>("cosmology", "ZeroRadiation", false)){
-            csoca::ilog << " Omega_g  = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl;
+            music::ilog << " Omega_g  = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl;
         }else{
-            csoca::ilog << " Omega_r  = " << std::setw(16) << Omega_r << std::endl;
+            music::ilog << " Omega_r  = " << std::setw(16) << Omega_r << std::endl;
         }
-        csoca::ilog << " Omega_DE = " << std::setw(16) << Omega_DE    << "nspect   = " << std::setw(16) << nspect << std::endl;
-        csoca::ilog << " w0       = " << std::setw(16) << w_0         << "w_a      = " << std::setw(16) << w_a << std::endl;
+        music::ilog << " Omega_DE = " << std::setw(16) << Omega_DE    << "nspect   = " << std::setw(16) << nspect << std::endl;
+        music::ilog << " w0       = " << std::setw(16) << w_0         << "w_a      = " << std::setw(16) << w_a << std::endl;
 
         if( Omega_r > 0.0 )
         {
-            csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally."<< std::endl;
-            csoca::wlog << "Make sure your sim code supports this..." << std::endl;
+            music::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally."<< std::endl;
+            music::wlog << "Make sure your sim code supports this..." << std::endl;
         }
     }
 
diff --git a/include/logger.hh b/include/logger.hh
index 41fc287..6c86fd0 100644
--- a/include/logger.hh
+++ b/include/logger.hh
@@ -6,7 +6,7 @@
 #include <fstream>
 #include <iostream>
 
-namespace csoca {
+namespace music {
 
 enum LogLevel : int {
   Off     = 0,
@@ -132,4 +132,4 @@ extern LogStream wlog;
 extern LogStream ilog;
 extern LogStream dlog;
 
-} // namespace csoca
+} // namespace music
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index a6fc1ad..5346955 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -508,7 +508,7 @@ public:
       grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
     { 
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
         std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
         const lattice lattice_type = 
             ((lattice_str=="bcc")? lattice_bcc 
@@ -516,15 +516,15 @@ public:
             : ((lattice_str=="rsc")? lattice_rsc 
             : lattice_sc)));
 
-        csoca::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
+        music::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
 
         double wtime = get_wtime();
-        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
         
         init_D( lattice_type );
         // init_D__old();
 
-        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
     }
 
     inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index 2b595b8..e925dc5 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -9,7 +9,7 @@ void Grid_FFT<data_t, bdistributed>::Setup(void)
     {
         ntot_ = (n_[2] + 2) * n_[1] * n_[0];
 
-        csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
+        music::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
         if (typeid(data_t) == typeid(real_t))
         {
             data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
@@ -28,7 +28,7 @@ void Grid_FFT<data_t, bdistributed>::Setup(void)
         }
         else
         {
-            csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
+            music::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
         }
 
         fft_norm_fac_ = 1.0 / std::sqrt((real_t)((size_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]));
@@ -105,11 +105,11 @@ void Grid_FFT<data_t, bdistributed>::Setup(void)
         }
         else
         {
-            csoca::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
+            music::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
             abort();
         }
 
-        csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
+        music::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
 
         fft_norm_fac_ = 1.0 / sqrt((real_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]);
 
@@ -151,7 +151,7 @@ void Grid_FFT<data_t, bdistributed>::Setup(void)
             sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
         }
 #else
-        csoca::flog << "MPI is required for distributed FFT arrays!" << std::endl;
+        music::flog << "MPI is required for distributed FFT arrays!" << std::endl;
         throw std::runtime_error("MPI is required for distributed FFT arrays!");
 #endif //// of #ifdef #else USE_MPI ////////////////////////////////////////////////////////////////////////////////////
     }
@@ -178,13 +178,13 @@ void Grid_FFT<data_t, bdistributed>::FourierTransformForward(bool do_transform)
         if (do_transform)
         {
             double wtime = get_wtime();
-            csoca::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]);
+            music::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]);
             FFTW_API(execute)
             (plan_);
             this->ApplyNorm();
 
             wtime = get_wtime() - wtime;
-            csoca::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime);
+            music::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime);
         }
 
         sizes_[0] = local_1_size_;
@@ -209,14 +209,14 @@ void Grid_FFT<data_t, bdistributed>::FourierTransformBackward(bool do_transform)
         //.............................
         if (do_transform)
         {
-            csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]);
+            music::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]);
             double wtime = get_wtime();
 
             FFTW_API(execute)(iplan_);
             this->ApplyNorm();
 
             wtime = get_wtime() - wtime;
-            csoca::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime);
+            music::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime);
         }
         sizes_[0] = local_0_size_;
         sizes_[1] = n_[1];
@@ -285,7 +285,7 @@ void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename,
 {
     if (bdistributed)
     {
-        csoca::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl;
+        music::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl;
         abort();
     }
 
@@ -311,7 +311,7 @@ void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename,
     //... dataset did not exist or was empty
     if (HDF_DatasetID < 0)
     {
-        csoca::elog << "Dataset \'" << ObjName.c_str() << "\' does not exist or is empty." << std::endl;
+        music::elog << "Dataset \'" << ObjName.c_str() << "\' does not exist or is empty." << std::endl;
         H5Fclose(HDF_FileID);
         abort();
     }
@@ -336,7 +336,7 @@ void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename,
 
     if (Data.capacity() < HDF_StorageSize)
     {
-        csoca::elog << "Not enough memory to store all data in HDFReadDataset!" << std::endl;
+        music::elog << "Not enough memory to store all data in HDFReadDataset!" << std::endl;
         H5Sclose(HDF_DataspaceID);
         H5Dclose(HDF_DatasetID);
         H5Fclose(HDF_FileID);
@@ -348,7 +348,7 @@ void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename,
 
     if (Data.size() != HDF_StorageSize)
     {
-        csoca::elog << "Something went wrong while reading!" << std::endl;
+        music::elog << "Something went wrong while reading!" << std::endl;
         H5Sclose(HDF_DataspaceID);
         H5Dclose(HDF_DatasetID);
         H5Fclose(HDF_FileID);
@@ -360,7 +360,7 @@ void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename,
     H5Fclose(HDF_FileID);
 
     assert(dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2]);
-    csoca::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl;
+    music::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl;
 
     for (size_t i = 0; i < 3; ++i)
         this->n_[i] = dimsize[i];
@@ -390,7 +390,7 @@ void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename,
     sum1 /= Data.size();
     sum2 /= Data.size();
     auto stdw = std::sqrt(sum2 - sum1 * sum1);
-    csoca::ilog << "Constraint field has <W>=" << sum1 << ", <W^2>-<W>^2=" << stdw << std::endl;
+    music::ilog << "Constraint field has <W>=" << sum1 << ", <W^2>-<W>^2=" << stdw << std::endl;
 
     #pragma omp parallel for reduction(+ : sum1, sum2)
     for (size_t i = 0; i < size(0); ++i)
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 0a88054..4964a4d 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -97,7 +97,7 @@ int Run( ConfigFile& the_config )
                                | the_config.ContainsKey("cosmology", "LSS_aniso_ly") 
                                | the_config.ContainsKey("cosmology", "LSS_aniso_lz") ))
     {
-        csoca::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl;
+        music::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl;
         bAddExternalTides = false;
     }
     // Anisotropy parameters for beyond box tidal field 
@@ -108,7 +108,7 @@ int Run( ConfigFile& the_config )
     };  
     
     if( std::abs(lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2]) > 1e-10 ){
-        csoca::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl;
+        music::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl;
         auto tr_l_3 = (lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2])/3.0;
         lss_aniso_lambda[0] -= tr_l_3;
         lss_aniso_lambda[1] -= tr_l_3;
@@ -122,10 +122,10 @@ int Run( ConfigFile& the_config )
 
     the_cosmo_calc->write_powerspectrum(astart, "input_powerspec.txt" );
 
-    //csoca::ilog << "-----------------------------------------------------------------------------" << std::endl;
+    //music::ilog << "-----------------------------------------------------------------------------" << std::endl;
 
     // if( bSymplecticPT && LPTorder!=2 ){
-    //     csoca::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl;
+    //     music::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl;
     //     LPTorder = 2;
     // }
 
@@ -180,8 +180,8 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
     // Fill the grid with a Gaussian white noise field
     //--------------------------------------------------------------------
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    csoca::ilog << "Generating white noise field...." << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "Generating white noise field...." << std::endl;
 
     the_random_number_generator->Fill_Grid(wnoise);
     
@@ -241,11 +241,11 @@ int Run( ConfigFile& the_config )
             }
         }
 
-        // csoca::ilog << "  ... old field: re <w>=" << rs1/count << " <w^2>-<w>^2=" << rs2/count-rs1*rs1/count/count << std::endl;
-        // csoca::ilog << "  ... old field: im <w>=" << is1/count << " <w^2>-<w>^2=" << is2/count-is1*is1/count/count << std::endl;
-        // csoca::ilog << "  ... new field: re <w>=" << nrs1/count << " <w^2>-<w>^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl;
-        // csoca::ilog << "  ... new field: im <w>=" << nis1/count << " <w^2>-<w>^2=" << nis2/count-nis1*nis1/count/count << std::endl;
-        csoca::ilog << "White noise field large-scale modes overwritten with external field." << std::endl;
+        // music::ilog << "  ... old field: re <w>=" << rs1/count << " <w^2>-<w>^2=" << rs2/count-rs1*rs1/count/count << std::endl;
+        // music::ilog << "  ... old field: im <w>=" << is1/count << " <w^2>-<w>^2=" << is2/count-is1*is1/count/count << std::endl;
+        // music::ilog << "  ... new field: re <w>=" << nrs1/count << " <w^2>-<w>^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl;
+        // music::ilog << "  ... new field: im <w>=" << nis1/count << " <w^2>-<w>^2=" << nis2/count-nis1*nis1/count/count << std::endl;
+        music::ilog << "White noise field large-scale modes overwritten with external field." << std::endl;
     }
 
     //--------------------------------------------------------------------
@@ -293,11 +293,11 @@ int Run( ConfigFile& the_config )
     //======================================================================
     // phi = - delta / k^2
 
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    csoca::ilog << "Generating white noise field...." << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "Generating white noise field...." << std::endl;
 
     double wtime = get_wtime();
-    csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
+    music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
 
     phi.FourierTransformForward(false);
     phi.assign_function_of_grids_kdep([&](auto k, auto wn) {
@@ -308,7 +308,7 @@ int Run( ConfigFile& the_config )
 
     phi.zero_DC_mode();
 
-    csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+    music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
     //======================================================================
     //... compute 2LPT displacement potential ....
@@ -316,7 +316,7 @@ int Run( ConfigFile& the_config )
     if (LPTorder > 1)
     {
         wtime = get_wtime();
-        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
         phi2.FourierTransformForward(false);
         Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2));
         Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2));
@@ -334,12 +334,12 @@ int Run( ConfigFile& the_config )
         }
 
         phi2.apply_InverseLaplacian();
-        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
         if (bAddExternalTides)
         {
-            csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl;
-            csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl;
+            music::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl;
+            music::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl;
         }
     }
 
@@ -350,7 +350,7 @@ int Run( ConfigFile& the_config )
     {
         //... 3a term ...
         wtime = get_wtime();
-        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
         phi3a.FourierTransformForward(false);
         Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a));
         Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::add_twice_to(phi3a));
@@ -358,11 +358,11 @@ int Run( ConfigFile& the_config )
         Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a));
         Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a));
         phi3a.apply_InverseLaplacian();
-        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
         //... 3b term ...
         wtime = get_wtime();
-        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
         phi3b.FourierTransformForward(false);
         Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b));
         Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b));
@@ -372,11 +372,11 @@ int Run( ConfigFile& the_config )
         Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::subtract_twice_from(phi3b));
         phi3b.apply_InverseLaplacian();
         phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
-        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
 
         //... transversal term ...
         wtime = get_wtime();
-        csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
+        music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
         for (int idim = 0; idim < 3; ++idim)
         {
             // cyclic rotations of indices
@@ -388,13 +388,13 @@ int Run( ConfigFile& the_config )
             Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim]));
             A3[idim]->apply_InverseLaplacian();
         }
-        csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
+        music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
     }
 
     // if( bSymplecticPT ){
     //     //... transversal term ...
     //     wtime = get_wtime();
-    //     csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush;
+    //     music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush;
     //     for( int idim=0; idim<3; ++idim ){
     //         // cyclic rotations of indices
     //         A3[idim]->FourierTransformForward(false);
@@ -402,7 +402,7 @@ int Run( ConfigFile& the_config )
     //         Conv.convolve_Gradient_and_Hessian( phi, {1},  phi2, {idim,1}, add_to(*A3[idim]) );
     //         Conv.convolve_Gradient_and_Hessian( phi, {2},  phi2, {idim,2}, add_to(*A3[idim]) );
     //     }
-    //     csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
+    //     music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
 
     // }
 
@@ -415,7 +415,7 @@ int Run( ConfigFile& the_config )
     (*A3[1]) *= g3c;
     (*A3[2]) *= g3c;
 
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
 
     ///////////////////////////////////////////////////////////////////////
     // we store the densities here if we compute them
@@ -426,7 +426,7 @@ int Run( ConfigFile& the_config )
 
     if (testing != "none")
     {
-        csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl;
+        music::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl;
         if (testing == "potentials_and_densities"){
             testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3);
         }
@@ -437,14 +437,14 @@ int Run( ConfigFile& the_config )
             testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3);
         }
         else{
-            csoca::flog << "unknown test '" << testing << "'" << std::endl;
+            music::flog << "unknown test '" << testing << "'" << std::endl;
             std::abort();
         }
     }
 
     for( auto& this_species : species_list )
     {
-        csoca::ilog << std::endl
+        music::ilog << std::endl
                     << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl;
 
         {
@@ -468,7 +468,7 @@ int Run( ConfigFile& the_config )
                 real_t std_phi1 = phi.std();
 
                 const real_t hbar = 2.0 * M_PI/ngrid * (2*std_phi1/Dplus0); //3sigma, but this might rather depend on gradients of phi...
-                csoca::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl;
+                music::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl;
                 
                 if( LPTorder == 1 ){
                     psi.assign_function_of_grids_r([hbar,Dplus0]( real_t pphi ){
diff --git a/src/logger.cc b/src/logger.cc
index 2b93b89..eb07442 100644
--- a/src/logger.cc
+++ b/src/logger.cc
@@ -1,6 +1,6 @@
 #include <logger.hh>
 
-namespace csoca {
+namespace music {
 
 std::ofstream Logger::output_file_;
 LogLevel Logger::log_level_ = LogLevel::Off;
@@ -39,4 +39,4 @@ LogStream wlog(glogger, LogLevel::Warning);
 LogStream ilog(glogger, LogLevel::Info);
 LogStream dlog(glogger, LogLevel::Debug);
 
-} // namespace csoca
+} // namespace music
diff --git a/src/main.cc b/src/main.cc
index 140e588..c16690a 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -38,14 +38,14 @@ void handle_eptr(std::exception_ptr eptr) // passing by value is ok
             std::rethrow_exception(eptr);
         }
     } catch(const std::exception& e) {
-        csoca::elog << "This happened: \"" << e.what() << "\"" << std::endl;
+        music::elog << "This happened: \"" << e.what() << "\"" << std::endl;
     }
 }
 
 int main( int argc, char** argv )
 {
-    csoca::Logger::SetLevel(csoca::LogLevel::Info);
-    // csoca::Logger::SetLevel(csoca::LogLevel::Debug);
+    music::Logger::SetLevel(music::LogLevel::Info);
+    // music::Logger::SetLevel(music::LogLevel::Debug);
 
     //------------------------------------------------------------------------------
     // initialise MPI 
@@ -61,12 +61,12 @@ int main( int argc, char** argv )
     // set up lower logging levels for other tasks
     if( CONFIG::MPI_task_rank!=0 )
     {
-        csoca::Logger::SetLevel(csoca::LogLevel::Error);
+        music::Logger::SetLevel(music::LogLevel::Error);
     }
 #endif
 
     // Ascii ART logo. generated via http://patorjk.com/software/taag/#p=display&f=Nancyj&t=monofonIC
-    csoca::ilog << "\n"
+    music::ilog << "\n"
                 << " The unigrid version of MUSIC-2         .8888b                   dP  a88888b. \n"
                 << "                                        88   \"                   88 d8\'   `88 \n"
                 << "  88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa  .d8888b. 88d888b. 88 88        \n"
@@ -75,17 +75,17 @@ int main( int argc, char** argv )
                 << "  dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl;
 
     // Compilation CMake configuration, time etc info:
-    csoca::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;            
+    music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;            
     
     // git and versioning info:
-    csoca::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl;
+    music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl;
     
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    csoca::ilog << "Compile time options : " << std::endl;
-    csoca::ilog << "                       Precision : " << CMAKE_PRECISION_STR << std::endl;
-    csoca::ilog << "                    Convolutions : " << CMAKE_CONVOLVER_STR << std::endl;
-    csoca::ilog << "                             PLT : " << CMAKE_PLT_STR << std::endl;
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "Compile time options : " << std::endl;
+    music::ilog << "                       Precision : " << CMAKE_PRECISION_STR << std::endl;
+    music::ilog << "                    Convolutions : " << CMAKE_CONVOLVER_STR << std::endl;
+    music::ilog << "                             PLT : " << CMAKE_PLT_STR << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
 
 
     //------------------------------------------------------------------------------
@@ -99,7 +99,7 @@ int main( int argc, char** argv )
         print_RNG_plugins();
         print_output_plugins();
 
-        csoca::elog << "In order to run, you need to specify a parameter file!\n" << std::endl;
+        music::elog << "In order to run, you need to specify a parameter file!\n" << std::endl;
         exit(0);
     }
 
@@ -144,10 +144,10 @@ int main( int argc, char** argv )
     // Write code configuration to screen
     //------------------------------------------------------------------------------
     // hardware related infos
-    csoca::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl;
+    music::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl;
     
     // multi-threading related infos
-    csoca::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl;
 
     // memory related infos
     SystemStat::Memory mem;
@@ -164,34 +164,34 @@ int main( int argc, char** argv )
     MPI_Allreduce(&minupmem,&temp,1,MPI_UNSIGNED,MPI_MIN,MPI_COMM_WORLD); minupmem = temp;
     MPI_Allreduce(&maxupmem,&temp,1,MPI_UNSIGNED,MPI_MAX,MPI_COMM_WORLD); maxupmem = temp;
 #endif
-    csoca::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " <<  "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl;
+    music::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " <<  "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl;
     
     // MPI related infos
 #if defined(USE_MPI)
-    csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl;
-    csoca::dlog << std::setw(32) << std::left << "MPI version" << " : " << MPI::get_version() << std::endl;
+    music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl;
+    music::dlog << std::setw(32) << std::left << "MPI version" << " : " << MPI::get_version() << std::endl;
 #else
-    csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl;
+    music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl;
 #endif
-    csoca::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl;
+    music::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl;
     
     // Kernel related infos
     SystemStat::Kernel kern;
     auto kinfo = kern.get_kernel_info();
-    csoca::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl;
+    music::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl;
 
     // FFTW related infos
-    csoca::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl;
-    csoca::ilog << std::setw(32) << std::left << "FFTW mode" << " : ";
+    music::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl;
+    music::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl;
+    music::ilog << std::setw(32) << std::left << "FFTW mode" << " : ";
 #if defined(FFTW_MODE_PATIENT)
-	csoca::ilog << "FFTW_PATIENT" << std::endl;
+	music::ilog << "FFTW_PATIENT" << std::endl;
 #elif defined(FFTW_MODE_MEASURE)
-    csoca::ilog << "FFTW_MEASURE" << std::endl;
+    music::ilog << "FFTW_MEASURE" << std::endl;
 #else
-	csoca::ilog << "FFTW_ESTIMATE" << std::endl;
+	music::ilog << "FFTW_ESTIMATE" << std::endl;
 #endif
     //--------------------------------------------------------------------
     // Initialise plug-ins
@@ -201,7 +201,7 @@ int main( int argc, char** argv )
         ic_generator::Initialise( the_config );
     }catch(...){
         handle_eptr( std::current_exception() );
-        csoca::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl;
+        music::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl;
         #if defined(USE_MPI) 
         MPI_Finalize();
         #endif
@@ -221,8 +221,8 @@ int main( int argc, char** argv )
     MPI_Finalize();
 #endif
 
-    csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-    csoca::ilog << "Done. Have a nice day!\n" << std::endl;
+    music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+    music::ilog << "Done. Have a nice day!\n" << std::endl;
 
     return 0;
 }
diff --git a/src/old/output_gadget2___original.cc b/src/old/output_gadget2___original.cc
index b5cbf41..b8dfa62 100644
--- a/src/old/output_gadget2___original.cc
+++ b/src/old/output_gadget2___original.cc
@@ -142,8 +142,8 @@ protected:
 		ifs.read((char *)&blk, sizeof(size_t));
 		if (blk != npart * (size_t)sizeof(T_store))
 		{
-			csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
-			csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * (size_t)sizeof(T_store), blk);
+			music::elog.Print("Internal consistency error in gadget2 output plug-in");
+			music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * (size_t)sizeof(T_store), blk);
 			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
 		}
 		ifs.seekg(offset, std::ios::cur);
@@ -161,7 +161,7 @@ protected:
 
 			if (!this->good())
 			{
-				csoca::elog.Print("Could not open buffer file in gadget2 output plug-in");
+				music::elog.Print("Could not open buffer file in gadget2 output plug-in");
 				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
 			}
 
@@ -169,8 +169,8 @@ protected:
 
 			if (blk != npart * sizeof(T_store))
 			{
-				csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
-				csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
+				music::elog.Print("Internal consistency error in gadget2 output plug-in");
+				music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
 				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
 			}
 
@@ -188,7 +188,7 @@ protected:
 
 			if (!this->good())
 			{
-				csoca::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str());
+				music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str());
 				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
 			}
 
@@ -196,8 +196,8 @@ protected:
 
 			if (blk != npart * sizeof(T_store))
 			{
-				csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
-				csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
+				music::elog.Print("Internal consistency error in gadget2 output plug-in");
+				music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
 				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
 			}
 
@@ -215,7 +215,7 @@ protected:
 
 			if (!this->good())
 			{
-				csoca::elog.Print("Could not open buffer file in gadget2 output plug-in");
+				music::elog.Print("Could not open buffer file in gadget2 output plug-in");
 				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
 			}
 
@@ -223,8 +223,8 @@ protected:
 
 			if (blk != npart * sizeof(T_store))
 			{
-				csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
-				csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
+				music::elog.Print("Internal consistency error in gadget2 output plug-in");
+				music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
 				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
 			}
 
@@ -246,7 +246,7 @@ protected:
 
 			if (!this->good())
 			{
-				csoca::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str());
+				music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str());
 				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
 			}
 
@@ -254,8 +254,8 @@ protected:
 
 			if (blk != npart * sizeof(T_store))
 			{
-				csoca::elog.Print("Internal consistency error in gadget2 output plug-in");
-				csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
+				music::elog.Print("Internal consistency error in gadget2 output plug-in");
+				music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
 				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
 			}
 
@@ -394,7 +394,7 @@ protected:
 		std::cout << " - Gadget2 : writing " << nptot << " particles to file...\n";
 		for (int i = 0; i < 6; ++i)
 			if (np_per_type_[i] > 0)
-				csoca::ilog.Print("      type   %d : %12llu [m=%g]", i, np_per_type_[i], header_.mass[i]);
+				music::ilog.Print("      type   %d : %12llu [m=%g]", i, np_per_type_[i], header_.mass[i]);
 
 		bool bbaryons = np_per_type_[0] > 0;
 
@@ -419,10 +419,10 @@ protected:
 
 		if (nfiles_ > 1)
 		{
-			csoca::ilog.Print("Gadget2 : distributing particles to %d files", nfiles_);
+			music::ilog.Print("Gadget2 : distributing particles to %d files", nfiles_);
 			//<< "                 " << std::setw(12) << "type 0" << "," << std::setw(12) << "type 1" << "," << std::setw(12) << "type " << bndparticletype_ << std::endl;
 			for (unsigned i = 0; i < nfiles_; ++i)
-				csoca::ilog.Print("      file %i : %12llu", i, np_tot_per_file[i], header_.mass[i]);
+				music::ilog.Print("      file %i : %12llu", i, np_tot_per_file[i], header_.mass[i]);
 		}
 
 		size_t curr_block_buf_size = block_buf_size_;
@@ -432,7 +432,7 @@ protected:
 		if (nptot >= 1ul << 32 && !bneed_long_ids)
 		{
 			bneed_long_ids = true;
-			csoca::wlog.Print("Need long particle IDs, will write 64bit, make sure to enable in Gadget!");
+			music::wlog.Print("Need long particle IDs, will write 64bit, make sure to enable in Gadget!");
 		}
 
 		for (unsigned ifile = 0; ifile < nfiles_; ++ifile)
@@ -700,7 +700,7 @@ protected:
 				static bool bdisplayed = false;
 				if (!bdisplayed)
 				{
-					csoca::ilog.Print("Gadget2 : set initial gas temperature to %.2f K/mu", Tini / mu);
+					music::ilog.Print("Gadget2 : set initial gas temperature to %.2f K/mu", Tini / mu);
 					bdisplayed = true;
 				}
 			}
@@ -827,7 +827,7 @@ public:
 		shift_halfcell_ = cf.GetValueSafe<bool>("output", "gadget_cell_centered", false);
 
 		//if( nfiles_ < (int)ceil((double)npart/(double)npartmax_) )
-		//	csoca::wlog.Print("Should use more files.");
+		//	music::wlog.Print("Should use more files.");
 
 		if (nfiles_ > 1)
 		{
@@ -838,7 +838,7 @@ public:
 				ofs_.open(ffname, std::ios::binary | std::ios::trunc);
 				if (!ofs_.good())
 				{
-					csoca::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", ffname);
+					music::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", ffname);
 					throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + std::string(ffname) + "\' for writing!\n");
 				}
 				ofs_.close();
@@ -849,7 +849,7 @@ public:
 			ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc);
 			if (!ofs_.good())
 			{
-				csoca::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", fname_.c_str());
+				music::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", fname_.c_str());
 				throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + fname_ + "\' for writing!\n");
 			}
 			ofs_.close();
@@ -875,7 +875,7 @@ public:
 			header_.flag_doubleprecision = 1;
 		else
 		{
-			csoca::elog.Print("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'");
+			music::elog.Print("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'");
 			throw std::runtime_error("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'");
 		}
 
@@ -896,7 +896,7 @@ public:
 			unit_length_chosen_ = (*mapit).second;
 		else
 		{
-			csoca::elog.Print("Gadget: length unit \'%s\' unknown in gadget_lunit", lunitstr.c_str());
+			music::elog.Print("Gadget: length unit \'%s\' unknown in gadget_lunit", lunitstr.c_str());
 			throw std::runtime_error("Unknown length unit specified for Gadget output plugin");
 		}
 
@@ -904,7 +904,7 @@ public:
 			unit_mass_chosen_ = (*mapit).second;
 		else
 		{
-			csoca::elog.Print("Gadget: mass unit \'%s\' unknown in gadget_munit", munitstr.c_str());
+			music::elog.Print("Gadget: mass unit \'%s\' unknown in gadget_munit", munitstr.c_str());
 			throw std::runtime_error("Unknown mass unit specified for Gadget output plugin");
 		}
 
@@ -912,7 +912,7 @@ public:
 			unit_vel_chosen_ = (*mapit).second;
 		else
 		{
-			csoca::elog.Print("Gadget: velocity unit \'%s\' unknown in gadget_vunit", vunitstr.c_str());
+			music::elog.Print("Gadget: velocity unit \'%s\' unknown in gadget_vunit", vunitstr.c_str());
 			throw std::runtime_error("Unknown velocity unit specified for Gadget output plugin");
 		}
 
@@ -922,14 +922,14 @@ public:
 			kpcunits_ = cf.GetValueSafe<bool>("output", "gadget_usekpc", false);
 			if (kpcunits_)
 				unit_length_chosen_ = 1e-3;
-			csoca::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead.");
+			music::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead.");
 		}
 		if (cf.ContainsKey("output", "gadget_usemsol"))
 		{
 			msolunits_ = cf.GetValueSafe<bool>("output", "gadget_usemsol", false);
 			if (msolunits_)
 				unit_mass_chosen_ = 1e-10;
-			csoca::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead.");
+			music::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead.");
 		}
 
 		//... coarse particle properties...
@@ -944,14 +944,14 @@ public:
 			if (bndparticletype_ == 0 || //bndparticletype_ == 1 || bndparticletype_ == 4 ||
 					bndparticletype_ > 5)
 			{
-				csoca::elog.Print("Coarse particles cannot be of Gadget particle type %d in output plugin.", bndparticletype_);
+				music::elog.Print("Coarse particles cannot be of Gadget particle type %d in output plugin.", bndparticletype_);
 				throw std::runtime_error("Specified illegal Gadget particle type for coarse particles");
 			}
 		}
 		else
 		{
 			if (cf.GetValueSafe<unsigned>("output", "gadget_coarsetype", 5) != 5)
-				csoca::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override.");
+				music::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override.");
 		}
 
 		//... set time ......................................................
@@ -1056,7 +1056,7 @@ public:
 
 			if (nwritten != npcoarse)
 			{
-				csoca::elog.Print("nwritten = %llu != npcoarse = %llu\n", nwritten, npcoarse);
+				music::elog.Print("nwritten = %llu != npcoarse = %llu\n", nwritten, npcoarse);
 				throw std::runtime_error("Internal consistency error while writing temporary file for masses");
 			}
 
diff --git a/src/output_plugin.cc b/src/output_plugin.cc
index 35664dc..7e287c2 100644
--- a/src/output_plugin.cc
+++ b/src/output_plugin.cc
@@ -23,14 +23,14 @@ void print_output_plugins()
 	
 	std::map< std::string, output_plugin_creator *>::iterator it;
 	it = m.begin();
-	csoca::ilog << "Available output plug-ins:\n";
+	music::ilog << "Available output plug-ins:\n";
 	while( it!=m.end() )
 	{
 		if( it->second )
-			csoca::ilog << "\t\'" << it->first << "\'\n";
+			music::ilog << "\t\'" << it->first << "\'\n";
 		++it;
 	}
-	csoca::ilog << std::endl;
+	music::ilog << std::endl;
 }
 
 std::unique_ptr<output_plugin> select_output_plugin( ConfigFile& cf )
@@ -42,13 +42,13 @@ std::unique_ptr<output_plugin> select_output_plugin( ConfigFile& cf )
 	
 	if( !the_output_plugin_creator )
 	{	
-		csoca::elog << "Error: output plug-in \'" << formatname << "\' not found." << std::endl;
+		music::elog << "Error: output plug-in \'" << formatname << "\' not found." << std::endl;
 		print_output_plugins();
 		throw std::runtime_error("Unknown output plug-in");
 		
 	}else{
-		csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl;
+		music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl;
 	}
 	
 	return std::move(the_output_plugin_creator->create( cf ));
diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc
index 1af182f..8263060 100644
--- a/src/plugins/output_arepo.cc
+++ b/src/plugins/output_arepo.cc
@@ -154,7 +154,7 @@ public:
     HDFWriteGroupAttribute(this_fname_, "Header", "suggested_highressoft", from_value<double>(softening_));
     HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gas_Tinit", from_value<double>(Tini_));
 
-    csoca::ilog << "Wrote" << std::endl;
+    music::ilog << "Wrote" << std::endl;
   }
 
   output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc
index e7f20e6..ba3a986 100644
--- a/src/plugins/output_gadget2.cc
+++ b/src/plugins/output_gadget2.cc
@@ -131,7 +131,7 @@ public:
 		uint32_t blocksz;
 		std::ofstream ofs(fname.c_str(), std::ios::binary);
 
-		csoca::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl;
+		music::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl;
 
 		blocksz = sizeof(header);
 		ofs.write(reinterpret_cast<char *>(&blocksz), sizeof(uint32_t));
diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc
index c862e41..e6a821b 100644
--- a/src/plugins/output_gadget_hdf5.cc
+++ b/src/plugins/output_gadget_hdf5.cc
@@ -102,26 +102,29 @@ public:
   // use destructor to write header post factum
   ~gadget_hdf5_output_plugin()
   {
-    HDFCreateGroup(this_fname_, "Header");
-    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array<unsigned>(header_.npart));
-    HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array<double>(header_.mass));
-    HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value<double>(header_.time));
-    HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value<double>(header_.redshift));
-    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value<int>(header_.flag_sfr));
-    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value<int>(header_.flag_feedback));
-    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array<unsigned>(header_.npartTotal));
-    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value<int>(header_.flag_cooling));
-    HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value<int>(header_.num_files));
-    HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value<double>(header_.BoxSize));
-    HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value<double>(header_.Omega0));
-    HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value<double>(header_.OmegaLambda));
-    HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value<double>(header_.HubbleParam));
-    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value<int>(header_.flag_stellarage));
-    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value<int>(header_.flag_metals));
-    HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array<unsigned>(header_.npartTotalHighWord));
-    HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value<int>(header_.flag_entropy_instead_u));
+    if (!std::uncaught_exception()) 
+    {   
+      HDFCreateGroup(this_fname_, "Header");
+      HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array<unsigned>(header_.npart));
+      HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array<double>(header_.mass));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value<double>(header_.time));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value<double>(header_.redshift));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value<int>(header_.flag_sfr));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value<int>(header_.flag_feedback));
+      HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array<unsigned>(header_.npartTotal));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value<int>(header_.flag_cooling));
+      HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value<int>(header_.num_files));
+      HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value<double>(header_.BoxSize));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value<double>(header_.Omega0));
+      HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value<double>(header_.OmegaLambda));
+      HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value<double>(header_.HubbleParam));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value<int>(header_.flag_stellarage));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value<int>(header_.flag_metals));
+      HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array<unsigned>(header_.npartTotalHighWord));
+      HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value<int>(header_.flag_entropy_instead_u));
 
-    csoca::ilog << "Wrote Gadget-HDF5 file(s) to " << this_fname_ << std::endl;
+      music::ilog << "Wrote Gadget-HDF5 file(s) to " << this_fname_ << std::endl;
+    }
   }
 
   output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
@@ -151,7 +154,7 @@ public:
     case cosmo_species::dm:
       return 1;
     case cosmo_species::baryon:
-      return 0;
+      return 2;
     case cosmo_species::neutrino:
       return 3;
     }
diff --git a/src/plugins/output_generic.cc b/src/plugins/output_generic.cc
index 1a53e84..d96358e 100644
--- a/src/plugins/output_generic.cc
+++ b/src/plugins/output_generic.cc
@@ -99,7 +99,7 @@ void generic_output_plugin::write_grid_data(const Grid_FFT<real_t> &g, const cos
 {
 	std::string field_name = this->get_field_name( s, c );
 	g.Write_to_HDF5(fname_, field_name);
-	csoca::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl;
+	music::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl;
 }
 
 namespace
diff --git a/src/plugins/output_grafic2.cc b/src/plugins/output_grafic2.cc
index 31e8a04..43eb7cf 100644
--- a/src/plugins/output_grafic2.cc
+++ b/src/plugins/output_grafic2.cc
@@ -60,7 +60,7 @@ public:
 
         if (std::abs(std::pow(2.0, levelmin_) - double(ngrid)) > 1e-4)
         {
-            csoca::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl;
+            music::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl;
             abort();
         }
 
@@ -223,7 +223,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT<real_t> &g, const cos
 
     } // end loop over write_rank
 
-    csoca::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl;
+    music::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl;
 }
 
 void grafic2_output_plugin::write_ramses_namelist(void) const
@@ -279,7 +279,7 @@ void grafic2_output_plugin::write_ramses_namelist(void) const
          << "m_refine=" << 1 + naddref << "*8.,\n"
          << "/\n";
 
-    csoca::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl;
+    music::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl;
 }
 
 namespace
diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc
index a13726f..073a6f9 100644
--- a/src/plugins/random_music.cc
+++ b/src/plugins/random_music.cc
@@ -56,7 +56,7 @@ public:
 
     if (restart_ && !disk_cached_)
     {
-      csoca::elog.Print("Cannot restart from mem cached random numbers.");
+      music::elog.Print("Cannot restart from mem cached random numbers.");
       throw std::runtime_error("Cannot restart from mem cached random numbers.");
     }
 
@@ -116,7 +116,7 @@ void RNG_music::parse_random_parameters(void)
       {
         if (ltemp <= 0)
         {
-          csoca::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr);
+          music::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr);
           throw std::runtime_error("Seed values need to be >0");
         }
         rngseeds_.push_back(ltemp);
@@ -126,7 +126,7 @@ void RNG_music::parse_random_parameters(void)
     {
       rngfnames_.push_back(tempstr);
       rngseeds_.push_back(-1);
-      csoca::ilog.Print("Random numbers for level %3d will be read from file.", i);
+      music::ilog.Print("Random numbers for level %3d will be read from file.", i);
     }
   }
 
@@ -160,7 +160,7 @@ void RNG_music::compute_random_numbers(void)
       //#warning add possibility to read noise from file also here!
 
       if (rngfnames_[i].size() > 0)
-        csoca::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!");
+        music::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!");
 
       randc[i] = new rng(*randc[i - 1], ran_cube_size_, rngseeds_[i], true);
       delete randc[i - 1];
@@ -180,7 +180,7 @@ void RNG_music::compute_random_numbers(void)
     for (int ilevel = levelmin_seed_ - 1; ilevel >= (int)levelmin_; --ilevel)
     {
       if (rngseeds_[ilevel - levelmin_] > 0)
-        csoca::ilog.Print("Warning: random seed for level %d will be ignored.\n"
+        music::ilog.Print("Warning: random seed for level %d will be ignored.\n"
                 "            consistency requires that it is obtained by restriction from level %d",
                 ilevel, levelmin_seed_);
 
diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc
index 68b4649..18e287f 100644
--- a/src/plugins/random_music_wnoise_generator.cc
+++ b/src/plugins/random_music_wnoise_generator.cc
@@ -11,7 +11,7 @@ template <typename T>
 music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx)
     : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed)
 {
-  csoca::ilog.Print("Generating random numbers (1) with seed %ld", baseseed);
+  music::ilog.Print("Generating random numbers (1) with seed %ld", baseseed);
 
   initialize();
   fill_subvolume(x0, lx);
@@ -21,7 +21,7 @@ template <typename T>
 music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, bool zeromean)
     : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed)
 {
-  csoca::ilog.Print("Generating random numbers (2) with seed %ld", baseseed);
+  music::ilog.Print("Generating random numbers (2) with seed %ld", baseseed);
 
   double mean = 0.0;
   size_t res_l = res;
@@ -31,7 +31,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, unsigned cubesiz
     cubesize_ = res_;
 
   if (!musicnoise)
-    csoca::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo");
+    music::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo");
 
   initialize();
 
@@ -90,7 +90,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string rand
   std::ifstream ifs(randfname.c_str(), std::ios::binary);
   if (!ifs)
   {
-    csoca::elog.Print("Could not open random number file \'%s\'!", randfname.c_str());
+    music::elog.Print("Could not open random number file \'%s\'!", randfname.c_str());
     throw std::runtime_error(std::string("Could not open random number file \'") + randfname + std::string("\'!"));
   }
 
@@ -186,7 +186,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string rand
   std::vector<float> in_float;
   std::vector<double> in_double;
 
-  csoca::ilog.Print("Random number file \'%s\'\n   contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz);
+  music::ilog.Print("Random number file \'%s\'\n   contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz);
 
   long double sum = 0.0, sum2 = 0.0;
   size_t count = 0;
@@ -285,7 +285,7 @@ music_wnoise_generator<T>::music_wnoise_generator(unsigned res, std::string rand
   mean = sum / count;
   var = sum2 / count - mean * mean;
 
-  csoca::ilog.Print("Random numbers in file have \n     mean = %f and var = %f", mean, var);
+  music::ilog.Print("Random numbers in file have \n     mean = %f and var = %f", mean, var);
 }
 
 //... copy construct by averaging down
@@ -298,7 +298,7 @@ music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generat
   long double sum = 0.0, sum2 = 0.0;
   size_t count = 0;
 
-  csoca::ilog.Print("Generating a coarse white noise field by k-space degrading");
+  music::ilog.Print("Generating a coarse white noise field by k-space degrading");
   //... initialize properties of container
   res_ = rc.res_ / 2;
   cubesize_ = res_;
@@ -307,7 +307,7 @@ music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generat
 
   if (sizeof(real_t) != sizeof(T))
   {
-    csoca::elog.Print("type mismatch with real_t in k-space averaging");
+    music::elog.Print("type mismatch with real_t in k-space averaging");
     throw std::runtime_error("type mismatch with real_t in k-space averaging");
   }
 
@@ -405,7 +405,7 @@ music_wnoise_generator<T>::music_wnoise_generator(/*const*/ music_wnoise_generat
   rmean = sum / count;
   rvar = sum2 / count - rmean * rmean;
 
-  csoca::ilog.Print("Restricted random numbers have\n       mean = %f, var = %f", rmean, rvar);
+  music::ilog.Print("Restricted random numbers have\n       mean = %f, var = %f", rmean, rvar);
 }
 
 template <typename T>
@@ -438,7 +438,7 @@ music_wnoise_generator<T>::music_wnoise_generator(music_wnoise_generator<T> &rc,
   if (kspace)
   {
 
-    csoca::ilog.Print("Generating a constrained random number set with seed %ld\n    using coarse mode replacement...", baseseed);
+    music::ilog.Print("Generating a constrained random number set with seed %ld\n    using coarse mode replacement...", baseseed);
     assert(lx[0] % 2 == 0 && lx[1] % 2 == 0 && lx[2] % 2 == 0);
     size_t nx = lx[0], ny = lx[1], nz = lx[2],
            nxc = lx[0] / 2, nyc = lx[1] / 2, nzc = lx[2] / 2;
@@ -573,7 +573,7 @@ music_wnoise_generator<T>::music_wnoise_generator(music_wnoise_generator<T> &rc,
   }
   else
   {
-    csoca::ilog.Print("Generating a constrained random number set with seed %ld\n    using Hoffman-Ribak constraints...", baseseed);
+    music::ilog.Print("Generating a constrained random number set with seed %ld\n    using Hoffman-Ribak constraints...", baseseed);
 
     double fac = 1.0 / sqrt(8.0); //1./sqrt(8.0);
 
@@ -613,7 +613,7 @@ void music_wnoise_generator<T>::register_cube(int i, int j, int k)
     rnums_.push_back(NULL);
     cubemap_[icube] = rnums_.size() - 1;
 #ifdef DEBUG
-    csoca::dlog.Print("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]);
+    music::dlog.Print("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]);
 #endif
   }
 }
@@ -637,7 +637,7 @@ double music_wnoise_generator<T>::fill_cube(int i, int j, int k)
 
   if (it == cubemap_.end())
   {
-    csoca::elog.Print("Attempt to access non-registered random number cube!");
+    music::elog.Print("Attempt to access non-registered random number cube!");
     throw std::runtime_error("Attempt to access non-registered random number cube!");
   }
 
@@ -674,7 +674,7 @@ void music_wnoise_generator<T>::subtract_from_cube(int i, int j, int k, double v
 
   if (it == cubemap_.end())
   {
-    csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k);
+    music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k);
     throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::subtract_from_cube");
   }
 
@@ -700,7 +700,7 @@ void music_wnoise_generator<T>::free_cube(int i, int j, int k)
 
   if (it == cubemap_.end())
   {
-    csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k);
+    music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k);
     throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::free_cube");
   }
 
@@ -724,7 +724,7 @@ void music_wnoise_generator<T>::initialize(void)
     cubesize_ = res_;
   }
 
-  csoca::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_);
+  music::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_);
 }
 
 template <typename T>
@@ -741,8 +741,8 @@ double music_wnoise_generator<T>::fill_subvolume(int *i0, int *n)
   ncube[2] = (int)(n[2] / cubesize_) + 2;
 
 #ifdef DEBUG
-  csoca::dlog.Print("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]);
-  csoca::dlog.Print("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]);
+  music::dlog.Print("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]);
+  music::dlog.Print("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]);
 #endif
 
   double mean = 0.0;
@@ -836,7 +836,7 @@ void music_wnoise_generator<T>::print_allocated(void)
     if (rnums_[i] != NULL)
       ncount++;
 
-  csoca::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot);
+  music::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot);
 }
 
 template class music_wnoise_generator<float>;
diff --git a/src/plugins/random_music_wnoise_generator.hh b/src/plugins/random_music_wnoise_generator.hh
index 5b9cb36..4dd1b37 100644
--- a/src/plugins/random_music_wnoise_generator.hh
+++ b/src/plugins/random_music_wnoise_generator.hh
@@ -80,7 +80,7 @@ protected:
 
     if (it == cubemap_.end())
     {
-      csoca::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k);
+      music::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k);
       throw std::runtime_error("attempting to copy data from non-existing RND cube");
     }
 
@@ -186,7 +186,7 @@ public:
 
     if (it == cubemap_.end())
     {
-      csoca::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k);
+      music::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k);
       throw std::runtime_error("attempting to copy data from non-existing RND cube");
     }
 
@@ -194,7 +194,7 @@ public:
 
     if (rnums_[cubeidx] == NULL)
     {
-      csoca::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc);
+      music::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc);
       throw std::runtime_error("attempting to access data from non-allocated RND cube");
     }
 
diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc
index 0f5d5ef..54ec9cb 100644
--- a/src/plugins/transfer_CAMB_file.cc
+++ b/src/plugins/transfer_CAMB_file.cc
@@ -39,7 +39,7 @@ private:
     if (MPI::COMM_WORLD.Get_rank() == 0)
     {
 #endif
-      csoca::ilog.Print("Reading tabulated transfer function data from file \n    \'%s\'", m_filename_Tk.c_str());
+      music::ilog.Print("Reading tabulated transfer function data from file \n    \'%s\'", m_filename_Tk.c_str());
 
       std::string line;
       std::ifstream ifs(m_filename_Tk.c_str());
@@ -89,7 +89,7 @@ private:
 
         if (ss.bad() || ss.fail())
         {
-          csoca::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!");
+          music::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!");
           throw std::runtime_error("Error reading transfer function file \'" +
                                    m_filename_Tk + "\'");
         }
@@ -134,10 +134,10 @@ private:
 
       ifs.close();
 
-      csoca::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines);
+      music::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines);
 
       if (m_linbaryoninterp)
-        csoca::ilog.Print("Using log-lin interpolation for baryons\n    (TF is not "
+        music::ilog.Print("Using log-lin interpolation for baryons\n    (TF is not "
                           "positive definite)");
 
 #ifdef WITH_MPI
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 281d381..6b113a5 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -131,13 +131,13 @@ private:
       zlist << std::max(ztarget_, zstart_) << ", " << std::min(ztarget_, zstart_) << ", 0.0";
     add_class_parameter("z_pk", zlist.str());
 
-    csoca::ilog << "Computing transfer function via ClassEngine..." << std::endl;
+    music::ilog << "Computing transfer function via ClassEngine..." << std::endl;
     double wtime = get_wtime();
 
     the_ClassEngine_ = std::move(std::make_unique<ClassEngine>(pars_, false));
 
     wtime = get_wtime() - wtime;
-    csoca::ilog << "CLASS took " << wtime << " s." << std::endl;
+    music::ilog << "CLASS took " << wtime << " s." << std::endl;
   }
 
   //! run ClassEngine with parameters set up
@@ -196,7 +196,7 @@ public:
     {
       this->tf_isnormalised_ = true;
       tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0));
-      csoca::ilog << "Using A_s to normalise the transfer function!" << std::endl;
+      music::ilog << "Using A_s to normalise the transfer function!" << std::endl;
     }
 
     kmax_ = std::max(20.0, 2.0 * M_PI / lbox * nres / 2 * sqrt(3) * 2.0); // 120% of spatial diagonal, or k=10h Mpc-1
@@ -230,7 +230,7 @@ public:
     kmin_ = k[0];
     kmax_ = k.back();
 
-    csoca::ilog << "CLASS table contains k = " << this->get_kmin() << " to " << this->get_kmax() << " h Mpc-1." << std::endl;
+    music::ilog << "CLASS table contains k = " << this->get_kmin() << " to " << this->get_kmax() << " h Mpc-1." << std::endl;
 
     //--------------------------------------------------------------------------
     // single fluid growing/decaying mode decomposition
diff --git a/src/random_plugin.cc b/src/random_plugin.cc
index 5cfea9a..87bf08f 100644
--- a/src/random_plugin.cc
+++ b/src/random_plugin.cc
@@ -13,15 +13,15 @@ void print_RNG_plugins()
     std::map<std::string, RNG_plugin_creator *> &m = get_RNG_plugin_map();
     std::map<std::string, RNG_plugin_creator *>::iterator it;
     it = m.begin();
-    csoca::ilog << "Available random number generator plug-ins:" << std::endl;
+    music::ilog << "Available random number generator plug-ins:" << std::endl;
     while (it != m.end())
     {
         if ((*it).second){
-            csoca::ilog.Print("\t\'%s\'\n", (*it).first.c_str());
+            music::ilog.Print("\t\'%s\'\n", (*it).first.c_str());
         }
         ++it;
     }
-    csoca::ilog << std::endl;
+    music::ilog << std::endl;
 }
 
 std::unique_ptr<RNG_plugin> select_RNG_plugin(ConfigFile &cf)
@@ -32,14 +32,14 @@ std::unique_ptr<RNG_plugin> select_RNG_plugin(ConfigFile &cf)
 
     if (!the_RNG_plugin_creator)
     {
-        csoca::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str());
+        music::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str());
         print_RNG_plugins();
         throw std::runtime_error("Unknown random number generator plug-in");
     }
     else
     {
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl;
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl;
     }
 
     return std::move(the_RNG_plugin_creator->Create(cf));
diff --git a/src/testing.cc b/src/testing.cc
index ff990e1..c65eb53 100644
--- a/src/testing.cc
+++ b/src/testing.cc
@@ -232,7 +232,7 @@ void output_velocity_displacement_symmetries(
     }
 
 
-    csoca::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n"
+    music::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n"
                     << std::setw(16) << dplus << " "
                     << std::setw(16) << Icomp[0] << " "
                     << std::setw(16) << Icomp[1] << " "
diff --git a/src/transfer_function_plugin.cc b/src/transfer_function_plugin.cc
index 6101ada..424ae82 100644
--- a/src/transfer_function_plugin.cc
+++ b/src/transfer_function_plugin.cc
@@ -13,14 +13,14 @@ void print_TransferFunction_plugins()
     std::map<std::string, TransferFunction_plugin_creator *> &m = get_TransferFunction_plugin_map();
     std::map<std::string, TransferFunction_plugin_creator *>::iterator it;
     it = m.begin();
-    csoca::ilog << "Available transfer function plug-ins:" << std::endl;
+    music::ilog << "Available transfer function plug-ins:" << std::endl;
     while (it != m.end())
     {
         if ((*it).second)
-            csoca::ilog << "\t\'" << (*it).first << "\'" << std::endl;
+            music::ilog << "\t\'" << (*it).first << "\'" << std::endl;
         ++it;
     }
-    csoca::ilog << std::endl;
+    music::ilog << std::endl;
 }
 
 std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(ConfigFile &cf)
@@ -31,14 +31,14 @@ std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(ConfigFi
 
     if (!the_TransferFunction_plugin_creator)
     {
-        csoca::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl;
+        music::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl;
         print_TransferFunction_plugins();
         throw std::runtime_error("Unknown transfer function plug-in");
     }
     else
     {
-        csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        csoca::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl;
+        music::ilog << "-------------------------------------------------------------------------------" << std::endl;
+        music::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl;
     }
 
     return std::move(the_TransferFunction_plugin_creator->create(cf));

From 06b3a84bd39c1c4df745078e4c39c04daaf1c1d4 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 20:39:52 +0200
Subject: [PATCH 102/130] naming convention homogenisation

---
 include/logger.hh                 | 76 +++++++++++++++----------------
 src/grid_fft.cc                   |  2 +-
 src/logger.cc                     | 26 +++++------
 src/main.cc                       |  6 +--
 src/output_plugin.cc              |  4 +-
 src/plugins/transfer_CAMB_file.cc |  4 +-
 6 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/include/logger.hh b/include/logger.hh
index 6c86fd0..e13012f 100644
--- a/include/logger.hh
+++ b/include/logger.hh
@@ -8,33 +8,33 @@
 
 namespace music {
 
-enum LogLevel : int {
-  Off     = 0,
-  Fatal   = 1,
-  Error   = 2,
-  Warning = 3,
-  Info    = 4,
-  Debug   = 5
+enum log_level : int {
+  off     = 0,
+  fatal   = 1,
+  error   = 2,
+  warning = 3,
+  info    = 4,
+  debug   = 5
 };
 
-class Logger {
+class logger {
 private:
-  static LogLevel log_level_;
+  static log_level log_level_;
   static std::ofstream output_file_;
 
 public:
-  Logger()  = default;
-  ~Logger() = default;
+  logger()  = default;
+  ~logger() = default;
 
-  static void SetLevel(const LogLevel &level);
-  static LogLevel GetLevel();
+  static void set_level(const log_level &level);
+  static log_level get_level();
 
-  static void SetOutput(const std::string filename);
-  static void UnsetOutput();
+  static void set_output(const std::string filename);
+  static void unset_output();
 
-  static std::ofstream &GetOutput();
+  static std::ofstream &get_output();
 
-  template <typename T> Logger &operator<<(const T &item) {
+  template <typename T> logger &operator<<(const T &item) {
     std::cout << item;
     if (output_file_.is_open()) {
       output_file_ << item;
@@ -42,7 +42,7 @@ public:
     return *this;
   }
 
-  Logger &operator<<(std::ostream &(*fp)(std::ostream &)) {
+  logger &operator<<(std::ostream &(*fp)(std::ostream &)) {
     std::cout << fp;
     if (output_file_.is_open()) {
       output_file_ << fp;
@@ -51,32 +51,32 @@ public:
   }
 };
 
-class LogStream {
+class log_stream {
 private:
-  Logger &logger_;
-  LogLevel stream_level_;
+  logger &logger_;
+  log_level stream_level_;
   std::string line_prefix_, line_postfix_;
 
   bool newline;
 
 public:
-  LogStream(Logger &logger, const LogLevel &level)
+  log_stream(logger &logger, const log_level &level)
     : logger_(logger), stream_level_(level), newline(true) {
     switch (stream_level_) {
-      case LogLevel::Fatal:
+      case log_level::fatal:
         line_prefix_ = "\033[31mFatal : ";
         break;
-      case LogLevel::Error:
+      case log_level::error:
         line_prefix_ = "\033[31mError : ";
         break;
-      case LogLevel::Warning:
+      case log_level::warning:
         line_prefix_ = "\033[33mWarning : ";
         break;
-      case LogLevel::Info:
+      case log_level::info:
         //line_prefix_ = " | Info    | ";
         line_prefix_ = " \033[0m";
         break;
-      case LogLevel::Debug:
+      case log_level::debug:
         line_prefix_ = "Debug : \033[0m";
         break;
       default:
@@ -85,14 +85,14 @@ public:
     }
     line_postfix_ = "\033[0m";
   }
-  ~LogStream() = default;
+  ~log_stream() = default;
 
   inline std::string GetPrefix() const {
     return line_prefix_;
   }
 
-  template <typename T> LogStream &operator<<(const T &item) {
-    if (Logger::GetLevel() >= stream_level_) {
+  template <typename T> log_stream &operator<<(const T &item) {
+    if (logger::get_level() >= stream_level_) {
       if (newline) {
         logger_ << line_prefix_;
         newline = false;
@@ -102,8 +102,8 @@ public:
     return *this;
   }
 
-  LogStream &operator<<(std::ostream &(*fp)(std::ostream &)) {
-    if (Logger::GetLevel() >= stream_level_) {
+  log_stream &operator<<(std::ostream &(*fp)(std::ostream &)) {
+    if (logger::get_level() >= stream_level_) {
       logger_ << fp;
       logger_ << line_postfix_;
       newline = true;
@@ -125,11 +125,11 @@ public:
 };
 
 // global instantiations for different levels
-extern Logger glogger;
-extern LogStream flog;
-extern LogStream elog;
-extern LogStream wlog;
-extern LogStream ilog;
-extern LogStream dlog;
+extern logger glogger;
+extern log_stream flog;
+extern log_stream elog;
+extern log_stream wlog;
+extern log_stream ilog;
+extern log_stream dlog;
 
 } // namespace music
diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index e925dc5..a51577e 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -276,7 +276,7 @@ hid_t hdf5_get_data_type(void)
     if (typeid(T) == typeid(size_t))
         return H5T_NATIVE_ULLONG;
 
-    std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n";
+    music::elog << "[HDF_IO] trying to evaluate unsupported type in GetDataType";
     return -1;
 }
 
diff --git a/src/logger.cc b/src/logger.cc
index eb07442..26c34a5 100644
--- a/src/logger.cc
+++ b/src/logger.cc
@@ -2,18 +2,18 @@
 
 namespace music {
 
-std::ofstream Logger::output_file_;
-LogLevel Logger::log_level_ = LogLevel::Off;
+std::ofstream logger::output_file_;
+log_level logger::log_level_ = log_level::off;
 
-void Logger::SetLevel(const LogLevel &level) {
+void logger::set_level(const log_level &level) {
   log_level_ = level;
 }
 
-LogLevel Logger::GetLevel() {
+log_level logger::get_level() {
   return log_level_;
 }
 
-void Logger::SetOutput(const std::string filename) {
+void logger::set_output(const std::string filename) {
   if (output_file_.is_open()) {
     output_file_.close();
   }
@@ -21,22 +21,22 @@ void Logger::SetOutput(const std::string filename) {
   assert(output_file_.is_open());
 }
 
-void Logger::UnsetOutput() {
+void logger::unset_output() {
   if (output_file_.is_open()) {
     output_file_.close();
   }
 }
 
-std::ofstream &Logger::GetOutput() {
+std::ofstream &logger::get_output() {
   return output_file_;
 }
 
 // global instantiations for different levels
-Logger glogger;
-LogStream flog(glogger, LogLevel::Fatal);
-LogStream elog(glogger, LogLevel::Error);
-LogStream wlog(glogger, LogLevel::Warning);
-LogStream ilog(glogger, LogLevel::Info);
-LogStream dlog(glogger, LogLevel::Debug);
+logger the_logger;
+log_stream flog(the_logger, log_level::fatal);
+log_stream elog(the_logger, log_level::error);
+log_stream wlog(the_logger, log_level::warning);
+log_stream ilog(the_logger, log_level::info);
+log_stream dlog(the_logger, log_level::debug);
 
 } // namespace music
diff --git a/src/main.cc b/src/main.cc
index c16690a..5a11c30 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -44,8 +44,8 @@ void handle_eptr(std::exception_ptr eptr) // passing by value is ok
 
 int main( int argc, char** argv )
 {
-    music::Logger::SetLevel(music::LogLevel::Info);
-    // music::Logger::SetLevel(music::LogLevel::Debug);
+    music::logger::set_level(music::log_level::info);
+    // music::logger::set_level(music::log_level::Debug);
 
     //------------------------------------------------------------------------------
     // initialise MPI 
@@ -61,7 +61,7 @@ int main( int argc, char** argv )
     // set up lower logging levels for other tasks
     if( CONFIG::MPI_task_rank!=0 )
     {
-        music::Logger::SetLevel(music::LogLevel::Error);
+        music::logger::set_level(music::log_level::error);
     }
 #endif
 
diff --git a/src/output_plugin.cc b/src/output_plugin.cc
index 7e287c2..106c8fe 100644
--- a/src/output_plugin.cc
+++ b/src/output_plugin.cc
@@ -42,13 +42,13 @@ std::unique_ptr<output_plugin> select_output_plugin( ConfigFile& cf )
 	
 	if( !the_output_plugin_creator )
 	{	
-		music::elog << "Error: output plug-in \'" << formatname << "\' not found." << std::endl;
+		music::elog << "Output plug-in \'" << formatname << "\' not found." << std::endl;
 		print_output_plugins();
 		throw std::runtime_error("Unknown output plug-in");
 		
 	}else{
 		music::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        music::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl;
+		music::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl;
 	}
 	
 	return std::move(the_output_plugin_creator->create( cf ));
diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc
index 54ec9cb..9e0a627 100644
--- a/src/plugins/transfer_CAMB_file.cc
+++ b/src/plugins/transfer_CAMB_file.cc
@@ -89,8 +89,8 @@ private:
 
         if (ss.bad() || ss.fail())
         {
-          music::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!");
-          throw std::runtime_error("Error reading transfer function file \'" +
+          music::elog.Print("error reading the transfer function file (corrupt or not in expected format)!");
+          throw std::runtime_error("error reading transfer function file \'" +
                                    m_filename_Tk + "\'");
         }
 

From 3f17e5a796040d271e1ed70f42bc1f183ca3239a Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 20:55:24 +0200
Subject: [PATCH 103/130] more naming convention homogenisation

---
 include/config_file.hh               | 151 ++++++++++++++-------------
 include/cosmology_calculator.hh      |   6 +-
 include/cosmology_parameters.hh      |  26 ++---
 include/ic_generator.hh              |   4 +-
 include/operators.hh                 |   6 +-
 include/output_plugin.hh             |  14 +--
 include/particle_plt.hh              |  12 +--
 include/random_plugin.hh             |  18 ++--
 include/testing.hh                   |   6 +-
 include/transfer_function_plugin.hh  |  10 +-
 src/ic_generator.cc                  |  50 ++++-----
 src/main.cc                          |  10 +-
 src/old/output_gadget2___original.cc |  52 ++++-----
 src/output_plugin.cc                 |   4 +-
 src/plugins/output_arepo.cc          |  26 ++---
 src/plugins/output_gadget2.cc        |  18 ++--
 src/plugins/output_gadget_hdf5.cc    |  16 +--
 src/plugins/output_generic.cc        |   8 +-
 src/plugins/output_grafic2.cc        |  22 ++--
 src/plugins/random_music.cc          |  28 ++---
 src/plugins/random_ngenic.cc         |   6 +-
 src/plugins/transfer_CAMB_file.cc    |  10 +-
 src/plugins/transfer_CLASS.cc        |  26 ++---
 src/plugins/transfer_eisenstein.cc   |  64 ++++++------
 src/random_plugin.cc                 |   4 +-
 src/testing.cc                       |  14 +--
 src/transfer_function_plugin.cc      |   4 +-
 27 files changed, 310 insertions(+), 305 deletions(-)

diff --git a/include/config_file.hh b/include/config_file.hh
index 4b6f1fc..ab30ea2 100644
--- a/include/config_file.hh
+++ b/include/config_file.hh
@@ -12,20 +12,20 @@
 #include <logger.hh>
 
 /*!
- * @class ConfigFile
+ * @class config_file
  * @brief provides read/write access to configuration options
  *
  * This class provides access to the configuration file. The
  * configuration is stored in hash-pairs and can be queried and
  * validated by the responsible class/routine
  */
-class ConfigFile {
+class config_file {
 
   //! current line number
-  unsigned m_iLine;
+  unsigned iline_;
 
   //! hash table for key/value pairs, stored as strings
-  std::map<std::string, std::string> m_Items;
+  std::map<std::string, std::string> items_;
 
 public:
   //! removes all white space from string source
@@ -59,7 +59,7 @@ public:
    * @param oval the interpreted/converted value
    */
   template <class in_value, class out_value>
-  void Convert(const in_value &ival, out_value &oval) const {
+  void convert(const in_value &ival, out_value &oval) const {
     std::stringstream ss;
     ss << ival; //.. insert value into stream
     ss >> oval; //.. retrieve value from stream
@@ -68,33 +68,33 @@ public:
       //.. conversion error
       music::elog << "Error: conversion of \'" << ival << "\' failed."
                 << std::endl;
-      throw ErrInvalidConversion(std::string("invalid conversion to ") +
+      throw except_invalid_conversion(std::string("invalid conversion to ") +
                                  typeid(out_value).name() + '.');
     }
   }
 
   //! constructor of class config_file
-  /*! @param FileName the path/name of the configuration file to be parsed
+  /*! @param filename the path/name of the configuration file to be parsed
    */
-  explicit ConfigFile(std::string const &FileName) : m_iLine(0), m_Items() {
-    std::ifstream file(FileName.c_str());
+  explicit config_file(std::string const &filename) : iline_(0), items_() {
+    std::ifstream file(filename.c_str());
 
     if (!file.is_open()){
-      music::elog << "Could not open config file \'" << FileName << "\'." << std::endl;
+      music::elog << "Could not open config file \'" << filename << "\'." << std::endl;
       throw std::runtime_error(
-          std::string("Error: Could not open config file \'") + FileName +
+          std::string("Error: Could not open config file \'") + filename +
           std::string("\'"));
     }
 
     std::string line;
     std::string name;
     std::string value;
-    std::string inSection;
-    int posEqual;
-    m_iLine = 0;
+    std::string in_section;
+    int pos_equal;
+    iline_ = 0;
     //.. walk through all lines ..
     while (std::getline(file, line)) {
-      ++m_iLine;
+      ++iline_;
       //.. encounterd EOL ?
       if (!line.length())
         continue;
@@ -106,31 +106,31 @@ public:
 
       //.. encountered section tag ?
       if (line[0] == '[') {
-        inSection = trim(line.substr(1, line.find(']') - 1));
+        in_section = trim(line.substr(1, line.find(']') - 1));
         continue;
       }
 
       //.. seek end of entry name ..
-      posEqual = line.find('=');
-      name = trim(line.substr(0, posEqual));
-      value = trim(line.substr(posEqual + 1));
+      pos_equal = line.find('=');
+      name = trim(line.substr(0, pos_equal));
+      value = trim(line.substr(pos_equal + 1));
 
-      if ((size_t)posEqual == std::string::npos &&
+      if ((size_t)pos_equal == std::string::npos &&
           (name.size() != 0 || value.size() != 0)) {
-        music::wlog << "Ignoring non-assignment in " << FileName << ":"
-                  << m_iLine << std::endl;
+        music::wlog << "Ignoring non-assignment in " << filename << ":"
+                  << iline_ << std::endl;
         continue;
       }
 
       if (name.length() == 0 && value.size() != 0) {
         music::wlog << "Ignoring assignment missing entry name in "
-                  << FileName << ":" << m_iLine << std::endl;
+                  << filename << ":" << iline_ << std::endl;
         continue;
       }
 
       if (value.length() == 0 && name.size() != 0) {
-        music::wlog << "Empty entry will be ignored in " << FileName << ":"
-                  << m_iLine << std::endl;
+        music::wlog << "Empty entry will be ignored in " << filename << ":"
+                  << iline_ << std::endl;
         continue;
       }
 
@@ -138,12 +138,12 @@ public:
         continue;
 
       //.. add key/value pair to hash table ..
-      if (m_Items.find(inSection + '/' + name) != m_Items.end()) {
+      if (items_.find(in_section + '/' + name) != items_.end()) {
         music::wlog << "Redeclaration overwrites previous value in "
-                  << FileName << ":" << m_iLine << std::endl;
+                  << filename << ":" << iline_ << std::endl;
       }
 
-      m_Items[inSection + '/' + name] = value;
+      items_[in_section + '/' + name] = value;
     }
   }
 
@@ -151,8 +151,8 @@ public:
   /*! @param key the key value, usually "section/key"
    *  @param value the value of the key, also a string
    */
-  void InsertValue(std::string const &key, std::string const &value) {
-    m_Items[key] = value;
+  void insert_value(std::string const &key, std::string const &value) {
+    items_[key] = value;
   }
 
   //! inserts a key/value pair in the hash map
@@ -160,9 +160,9 @@ public:
    *  @param key the key value usually "section/key"
    *  @param value the value of the key, also a string
    */
-  void InsertValue(std::string const &section, std::string const &key,
+  void insert_value(std::string const &section, std::string const &key,
                    std::string const &value) {
-    m_Items[section + '/' + key] = value;
+    items_[section + '/' + key] = value;
   }
 
   //! checks if a key is part of the hash map
@@ -170,10 +170,10 @@ public:
    *  @param key the key name to be checked
    *  @return true if the key is present, false otherwise
    */
-  bool ContainsKey(std::string const &section, std::string const &key) {
+  bool contains_key(std::string const &section, std::string const &key) {
     std::map<std::string, std::string>::const_iterator i =
-        m_Items.find(section + '/' + key);
-    if (i == m_Items.end())
+        items_.find(section + '/' + key);
+    if (i == items_.end())
       return false;
     return true;
   }
@@ -182,55 +182,55 @@ public:
   /*! @param key the key name to be checked
    *  @return true if the key is present, false otherwise
    */
-  bool ContainsKey(std::string const &key) {
-    std::map<std::string, std::string>::const_iterator i = m_Items.find(key);
-    if (i == m_Items.end())
+  bool contains_key(std::string const &key) {
+    std::map<std::string, std::string>::const_iterator i = items_.find(key);
+    if (i == items_.end())
       return false;
     return true;
   }
 
   //! return value of a key
-  /*! returns the value of a given key, throws a ErrItemNotFound
+  /*! returns the value of a given key, throws a except_item_not_found
    *  exception if the key is not available in the hash map.
    *  @param key the key name
    *  @return the value of the key
-   *  @sa ErrItemNotFound
+   *  @sa except_item_not_found
    */
-  template <class T> T GetValue(std::string const &key) const {
-    return GetValue<T>("", key);
+  template <class T> T get_value(std::string const &key) const {
+    return get_value<T>("", key);
   }
 
   //! return value of a key
-  /*! returns the value of a given key, throws a ErrItemNotFound
+  /*! returns the value of a given key, throws a except_item_not_found
    *  exception if the key is not available in the hash map.
    *  @param section the section name for the key
    *  @param key the key name
    *  @return the value of the key
-   *  @sa ErrItemNotFound
+   *  @sa except_item_not_found
    */
   template <class T>
-  T GetValueBasic(std::string const &section, std::string const &key) const {
+  T get_value_basic(std::string const &section, std::string const &key) const {
     T r;
     std::map<std::string, std::string>::const_iterator i =
-        m_Items.find(section + '/' + key);
-    if (i == m_Items.end()){
-      throw ErrItemNotFound('\'' + section + '/' + key +
+        items_.find(section + '/' + key);
+    if (i == items_.end()){
+      throw except_item_not_found('\'' + section + '/' + key +
                             std::string("\' not found."));
     }
 
-    Convert(i->second, r);
+    convert(i->second, r);
     return r;
   }
 
   template <class T>
-  T GetValue(std::string const &section, std::string const &key) const
+  T get_value(std::string const &section, std::string const &key) const
   {
     T r;
     try
     {
-      r = GetValueBasic<T>(section, key);
+      r = get_value_basic<T>(section, key);
     }
-    catch (ErrItemNotFound& e)
+    catch (except_item_not_found& e)
     {
       music::elog << e.what() << std::endl;
       throw;
@@ -240,40 +240,41 @@ public:
 
   //! exception safe version of getValue
   /*! returns the value of a given key, returns a default value rather
-   *  than a ErrItemNotFound exception if the key is not found.
+   *  than a except_item_not_found exception if the key is not found.
    *  @param section the section name for the key
    *  @param key the key name
    *  @param default_value the value that is returned if the key is not found
    *  @return the key value (if key found) otherwise default_value
    */
   template <class T>
-  T GetValueSafe(std::string const &section, std::string const &key,
+  T get_value_safe(std::string const &section, std::string const &key,
                  T default_value) const {
     T r;
     try {
-      r = GetValueBasic<T>(section, key);
-    } catch (ErrItemNotFound&) {
+      r = get_value_basic<T>(section, key);
+    } catch (except_item_not_found&) {
       r = default_value;
+      music::dlog << "Item \'" << section << "/" << key << " not found in config. Default = \'" << default_value << "\'" << std::endl;
     }
     return r;
   }
 
   //! exception safe version of getValue
   /*! returns the value of a given key, returns a default value rather
-   *  than a ErrItemNotFound exception if the key is not found.
+   *  than a except_item_not_found exception if the key is not found.
    *  @param key the key name
    *  @param default_value the value that is returned if the key is not found
    *  @return the key value (if key found) otherwise default_value
    */
   template <class T>
-  T GetValueSafe(std::string const &key, T default_value) const {
-    return GetValueSafe("", key, default_value);
+  T get_value_safe(std::string const &key, T default_value) const {
+    return get_value_safe("", key, default_value);
   }
 
   //! dumps all key-value pairs to a std::ostream
-  void Dump(std::ostream &out) {
-    std::map<std::string, std::string>::const_iterator i = m_Items.begin();
-    while (i != m_Items.end()) {
+  void dump(std::ostream &out) {
+    std::map<std::string, std::string>::const_iterator i = items_.begin();
+    while (i != items_.end()) {
       if (i->second.length() > 0)
         out << std::setw(24) << std::left << i->first << "  =  " << i->second
             << std::endl;
@@ -281,10 +282,10 @@ public:
     }
   }
 
-  void LogDump(void) {
+  void dump_to_log(void) {
     music::ilog << "List of all configuration options:" << std::endl;
-    std::map<std::string, std::string>::const_iterator i = m_Items.begin();
-    while (i != m_Items.end()) {
+    std::map<std::string, std::string>::const_iterator i = items_.begin();
+    while (i != items_.end()) {
       if (i->second.length() > 0)
         music::ilog << std::setw(28) << i->first << " = " << i->second
                   << std::endl;
@@ -295,16 +296,16 @@ public:
   //--- EXCEPTIONS ---
 
   //! runtime error that is thrown if key is not found in getValue
-  class ErrItemNotFound : public std::runtime_error {
+  class except_item_not_found : public std::runtime_error {
   public:
-    ErrItemNotFound(std::string itemname)
+    except_item_not_found(std::string itemname)
         : std::runtime_error(itemname.c_str()) {}
   };
 
   //! runtime error that is thrown if type conversion fails
-  class ErrInvalidConversion : public std::runtime_error {
+  class except_invalid_conversion : public std::runtime_error {
   public:
-    ErrInvalidConversion(std::string errmsg) : std::runtime_error(errmsg) {}
+    except_invalid_conversion(std::string errmsg) : std::runtime_error(errmsg) {}
   };
 
   //! runtime error that is thrown if identifier is not found in keys
@@ -323,9 +324,9 @@ public:
 //...           like "true" and "false" etc.
 //...           converts the string to type bool, returns type bool ...
 template <>
-inline bool ConfigFile::GetValue<bool>(std::string const &strSection,
+inline bool config_file::get_value<bool>(std::string const &strSection,
                                        std::string const &strEntry) const {
-  std::string r1 = GetValue<std::string>(strSection, strEntry);
+  std::string r1 = get_value<std::string>(strSection, strEntry);
   if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1")
     return true;
   if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0")
@@ -338,17 +339,17 @@ inline bool ConfigFile::GetValue<bool>(std::string const &strSection,
 }
 
 template <>
-inline bool ConfigFile::GetValueSafe<bool>(std::string const &strSection,
+inline bool config_file::get_value_safe<bool>(std::string const &strSection,
                                            std::string const &strEntry,
                                            bool defaultValue) const {
   std::string r1;
   try {
-    r1 = GetValueBasic<std::string>(strSection, strEntry);
+    r1 = get_value_basic<std::string>(strSection, strEntry);
     if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1")
       return true;
     if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0")
       return false;
-  } catch (ErrItemNotFound&) {
+  } catch (except_item_not_found&) {
     return defaultValue;
   }
   return defaultValue;
@@ -356,7 +357,7 @@ inline bool ConfigFile::GetValueSafe<bool>(std::string const &strSection,
 
 template <>
 inline void
-ConfigFile::Convert<std::string, std::string>(const std::string &ival,
+config_file::convert<std::string, std::string>(const std::string &ival,
                                               std::string &oval) const {
   oval = ival;
 }
diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index 04aa2e9..1d99209 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -131,8 +131,8 @@ public:
 	 * @param pTransferFunction pointer to an instance of a transfer function object
 	 */
 
-    explicit calculator(ConfigFile &cf)
-        : cosmo_param_(cf), astart_( 1.0/(1.0+cf.GetValue<double>("setup","zstart")) )
+    explicit calculator(config_file &cf)
+        : cosmo_param_(cf), astart_( 1.0/(1.0+cf.get_value<double>("setup","zstart")) )
     {
         // pre-compute growth factors and store for interpolation
         std::vector<double> tab_a, tab_D, tab_f;
@@ -161,7 +161,7 @@ public:
         music::ilog << std::setw(32) << std::left << "TF maximum wave number"
                     << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
 
-        // music::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.GetValue<double>("setup","zstart")) ) << std::endl;
+        // music::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.get_value<double>("setup","zstart")) ) << std::endl;
         // music::ilog << "pnrom     = " << cosmo_param_.pnorm << std::endl;
     }
 
diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh
index 7168ec9..0d3a3ad 100644
--- a/include/cosmology_parameters.hh
+++ b/include/cosmology_parameters.hh
@@ -42,35 +42,35 @@ struct parameters
     
     parameters( const parameters& ) = default;
     
-    explicit parameters(ConfigFile cf)
+    explicit parameters(config_file cf)
     {
-        H0 = cf.GetValue<double>("cosmology", "H0");
+        H0 = cf.get_value<double>("cosmology", "H0");
         h  = H0 / 100.0;
 
-        nspect = cf.GetValue<double>("cosmology", "nspec");
+        nspect = cf.get_value<double>("cosmology", "nspec");
 
-        Omega_b = cf.GetValue<double>("cosmology", "Omega_b");
+        Omega_b = cf.get_value<double>("cosmology", "Omega_b");
 
-        Omega_m = cf.GetValue<double>("cosmology", "Omega_m");
+        Omega_m = cf.get_value<double>("cosmology", "Omega_m");
 
-        Omega_DE = cf.GetValue<double>("cosmology", "Omega_L");
+        Omega_DE = cf.get_value<double>("cosmology", "Omega_L");
 
-        w_0 = cf.GetValueSafe<double>("cosmology", "w0", -1.0);
+        w_0 = cf.get_value_safe<double>("cosmology", "w0", -1.0);
 
-        w_a = cf.GetValueSafe<double>("cosmology", "wa", 0.0);
+        w_a = cf.get_value_safe<double>("cosmology", "wa", 0.0);
 
-        Tcmb = cf.GetValueSafe<double>("cosmology", "Tcmb", 2.7255);
+        Tcmb = cf.get_value_safe<double>("cosmology", "Tcmb", 2.7255);
 
-        Neff = cf.GetValueSafe<double>("cosmology", "Neff", 3.046);
+        Neff = cf.get_value_safe<double>("cosmology", "Neff", 3.046);
 
-        sigma8 = cf.GetValue<double>("cosmology", "sigma_8");
+        sigma8 = cf.get_value<double>("cosmology", "sigma_8");
 
         // calculate energy density in ultrarelativistic species from Tcmb and Neff
         double Omega_gamma = 4 * phys_const::sigma_SI / std::pow(phys_const::c_SI, 3) * std::pow(Tcmb, 4.0) / phys_const::rhocrit_h2_SI / (h * h);
         double Omega_nu = Neff * Omega_gamma * 7. / 8. * std::pow(4. / 11., 4. / 3.);
         Omega_r = Omega_gamma + Omega_nu;
 
-        if (cf.GetValueSafe<bool>("cosmology", "ZeroRadiation", false))
+        if (cf.get_value_safe<bool>("cosmology", "ZeroRadiation", false))
         {
             Omega_r = 0.0;
         }
@@ -91,7 +91,7 @@ struct parameters
         music::ilog << "Cosmological parameters are: " << std::endl;
         music::ilog << " H0       = " << std::setw(16) << H0          << "sigma_8  = " << std::setw(16) << sigma8 << std::endl;
         music::ilog << " Omega_c  = " << std::setw(16) << Omega_m-Omega_b << "Omega_b  = " << std::setw(16) << Omega_b << std::endl;
-        if (!cf.GetValueSafe<bool>("cosmology", "ZeroRadiation", false)){
+        if (!cf.get_value_safe<bool>("cosmology", "ZeroRadiation", false)){
             music::ilog << " Omega_g  = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl;
         }else{
             music::ilog << " Omega_r  = " << std::setw(16) << Omega_r << std::endl;
diff --git a/include/ic_generator.hh b/include/ic_generator.hh
index 2cf38f4..3a637e8 100644
--- a/include/ic_generator.hh
+++ b/include/ic_generator.hh
@@ -9,9 +9,9 @@
 
 namespace ic_generator{
 
-    int Run( ConfigFile& the_config );
+    int Run( config_file& the_config );
     
-    int Initialise( ConfigFile& the_config );
+    int Initialise( config_file& the_config );
 
     extern std::unique_ptr<RNG_plugin> the_random_number_generator;
     extern std::unique_ptr<output_plugin> the_output_plugin;
diff --git a/include/operators.hh b/include/operators.hh
index be6d1f7..49ed8d1 100644
--- a/include/operators.hh
+++ b/include/operators.hh
@@ -30,10 +30,10 @@ private:
     real_t boxlen_, k0_;
     size_t n_, nhalf_;
 public:
-    explicit fourier_gradient( const ConfigFile& the_config )
-    : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
+    explicit fourier_gradient( const config_file& the_config )
+    : boxlen_( the_config.get_value<double>("setup", "BoxLength") ), 
       k0_(2.0*M_PI/boxlen_),
-      n_( the_config.GetValue<size_t>("setup","GridRes") ),
+      n_( the_config.get_value<size_t>("setup","GridRes") ),
       nhalf_( n_/2 )
     {}
 
diff --git a/include/output_plugin.hh b/include/output_plugin.hh
index 5a18407..fff657c 100644
--- a/include/output_plugin.hh
+++ b/include/output_plugin.hh
@@ -25,8 +25,8 @@ enum class output_type {particles,field_lagrangian,field_eulerian};
 class output_plugin
 {
 protected:
-	//! reference to the ConfigFile object that holds all configuration options
-	ConfigFile &cf_;
+	//! reference to the config_file object that holds all configuration options
+	config_file &cf_;
 
 	//! output file or directory name
 	std::string fname_;
@@ -35,10 +35,10 @@ protected:
 	std::string interface_name_;
 public:
 	//! constructor
-	output_plugin(ConfigFile &cf, std::string interface_name )
+	output_plugin(config_file &cf, std::string interface_name )
 		: cf_(cf), interface_name_(interface_name)
 	{
-		fname_ = cf_.GetValue<std::string>("output", "filename");
+		fname_ = cf_.get_value<std::string>("output", "filename");
 	}
 
 	//! virtual destructor
@@ -78,7 +78,7 @@ public:
 struct output_plugin_creator
 {
 	//! create an instance of a plug-in
-	virtual std::unique_ptr<output_plugin> create(ConfigFile &cf) const = 0;
+	virtual std::unique_ptr<output_plugin> create(config_file &cf) const = 0;
 
 	//! destroy an instance of a plug-in
 	virtual ~output_plugin_creator() {}
@@ -103,12 +103,12 @@ struct output_plugin_creator_concrete : public output_plugin_creator
 	}
 
 	//! create an instance of the plug-in
-	std::unique_ptr<output_plugin> create(ConfigFile &cf) const
+	std::unique_ptr<output_plugin> create(config_file &cf) const
 	{
 		return std::make_unique<Derived>(cf); // Derived( cf );
 	}
 };
 
 //! failsafe version to select the output plug-in
-std::unique_ptr<output_plugin> select_output_plugin(ConfigFile &cf);
+std::unique_ptr<output_plugin> select_output_plugin(config_file &cf);
 
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 5346955..9e6df1e 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -496,12 +496,12 @@ private:
 
 public:
     // real_t boxlen, size_t ngridother
-    explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 )
-    : boxlen_( the_config.GetValue<double>("setup", "BoxLength") ), 
-      aini_ ( 1.0/(1.0+the_config.GetValue<double>("setup", "zstart")) ),
-      ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ), 
+    explicit lattice_gradient( config_file& the_config, size_t ngridself=64 )
+    : boxlen_( the_config.get_value<double>("setup", "BoxLength") ), 
+      aini_ ( 1.0/(1.0+the_config.get_value<double>("setup", "zstart")) ),
+      ngmapto_( the_config.get_value<size_t>("setup", "GridRes") ), 
       ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
-      XmL_ ( the_config.GetValue<double>("cosmology", "Omega_L") / the_config.GetValue<double>("cosmology", "Omega_m") ),
+      XmL_ ( the_config.get_value<double>("cosmology", "Omega_L") / the_config.get_value<double>("cosmology", "Omega_m") ),
       D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
       D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
@@ -509,7 +509,7 @@ public:
       grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
     { 
         music::ilog << "-------------------------------------------------------------------------------" << std::endl;
-        std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
+        std::string lattice_str = the_config.get_value_safe<std::string>("setup","ParticleLoad","sc");
         const lattice lattice_type = 
             ((lattice_str=="bcc")? lattice_bcc 
             : ((lattice_str=="fcc")? lattice_fcc 
diff --git a/include/random_plugin.hh b/include/random_plugin.hh
index 3e7b77c..1c33fae 100644
--- a/include/random_plugin.hh
+++ b/include/random_plugin.hh
@@ -10,9 +10,9 @@
 class RNG_plugin
 {
   protected:
-    ConfigFile *pcf_; //!< pointer to config_file from which to read parameters
+    config_file *pcf_; //!< pointer to config_file from which to read parameters
   public:
-    explicit RNG_plugin(ConfigFile &cf)
+    explicit RNG_plugin(config_file &cf)
         : pcf_(&cf)
     {
     }
@@ -24,7 +24,7 @@ class RNG_plugin
 
 struct RNG_plugin_creator
 {
-    virtual std::unique_ptr<RNG_plugin> Create(ConfigFile &cf) const = 0;
+    virtual std::unique_ptr<RNG_plugin> Create(config_file &cf) const = 0;
     virtual ~RNG_plugin_creator() {}
 };
 
@@ -42,14 +42,14 @@ struct RNG_plugin_creator_concrete : public RNG_plugin_creator
     }
 
     //! create an instance of the plugin
-    std::unique_ptr<RNG_plugin> Create(ConfigFile &cf) const
+    std::unique_ptr<RNG_plugin> Create(config_file &cf) const
     {
         return std::make_unique<Derived>(cf);
     }
 };
 
 typedef RNG_plugin RNG_instance;
-std::unique_ptr<RNG_plugin> select_RNG_plugin( ConfigFile &cf);
+std::unique_ptr<RNG_plugin> select_RNG_plugin( config_file &cf);
 
 // /*!
 //  * @brief encapsulates all things for multi-scale white noise generation
@@ -58,18 +58,18 @@ std::unique_ptr<RNG_plugin> select_RNG_plugin( ConfigFile &cf);
 // class random_number_generator
 // {
 //   protected:
-//     ConfigFile *pcf_;
+//     config_file *pcf_;
 //     //const refinement_hierarchy * prefh_;
 //     RNG_plugin *generator_;
 //     int levelmin_, levelmax_;
 
 //   public:
 //     //! constructor
-//     random_number_generator( ConfigFile &cf )
+//     random_number_generator( config_file &cf )
 //         : pcf_(&cf) //, prefh_( &refh )
 //     {
-//         levelmin_ = pcf_->GetValue<int>("setup", "levelmin");
-//         levelmax_ = pcf_->GetValue<int>("setup", "levelmax");
+//         levelmin_ = pcf_->get_value<int>("setup", "levelmin");
+//         levelmax_ = pcf_->get_value<int>("setup", "levelmax");
 //         generator_ = select_RNG_plugin(cf);
 //     }
 
diff --git a/include/testing.hh b/include/testing.hh
index 1683b09..aaaae39 100644
--- a/include/testing.hh
+++ b/include/testing.hh
@@ -15,7 +15,7 @@
 
 namespace testing{
     void output_potentials_and_densities( 
-        ConfigFile& the_config,
+        config_file& the_config,
         size_t ngrid, real_t boxlen,
         Grid_FFT<real_t>& phi,
         Grid_FFT<real_t>& phi2,
@@ -24,7 +24,7 @@ namespace testing{
         std::array< Grid_FFT<real_t>*,3 >& A3 );
 
     void output_velocity_displacement_symmetries(
-        ConfigFile &the_config,
+        config_file &the_config,
         size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
         Grid_FFT<real_t> &phi,
         Grid_FFT<real_t> &phi2,
@@ -34,7 +34,7 @@ namespace testing{
         bool bwrite_out_fields=false);
 
     void output_convergence(
-        ConfigFile &the_config,
+        config_file &the_config,
         cosmology::calculator* the_cosmo_calc,
         std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
         Grid_FFT<real_t> &phi,
diff --git a/include/transfer_function_plugin.hh b/include/transfer_function_plugin.hh
index fd95250..942a7ea 100644
--- a/include/transfer_function_plugin.hh
+++ b/include/transfer_function_plugin.hh
@@ -25,7 +25,7 @@ class TransferFunction_plugin
 {
   public:
     // Cosmology cosmo_;    //!< cosmological parameter, read from config_file
-    ConfigFile *pcf_;   //!< pointer to config_file from which to read parameters
+    config_file *pcf_;   //!< pointer to config_file from which to read parameters
     bool tf_distinct_;   //!< bool if density transfer function is distinct for baryons and DM
     bool tf_withvel_;    //!< bool if also have velocity transfer functions
     bool tf_withtotal0_; //!< have the z=0 spectrum for normalisation purposes
@@ -34,7 +34,7 @@ class TransferFunction_plugin
     
   public:
     //! constructor
-    TransferFunction_plugin(ConfigFile &cf)
+    TransferFunction_plugin(config_file &cf)
         : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false), tf_isnormalised_(false)
     { }
 
@@ -82,7 +82,7 @@ class TransferFunction_plugin
 struct TransferFunction_plugin_creator
 {
     //! create an instance of a transfer function plug-in
-    virtual std::unique_ptr<TransferFunction_plugin> create(ConfigFile &cf) const = 0;
+    virtual std::unique_ptr<TransferFunction_plugin> create(config_file &cf) const = 0;
 
     //! destroy an instance of a plug-in
     virtual ~TransferFunction_plugin_creator() {}
@@ -103,7 +103,7 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin
     }
 
     //! create an instance of the plug-in
-    std::unique_ptr<TransferFunction_plugin> create(ConfigFile &cf) const
+    std::unique_ptr<TransferFunction_plugin> create(config_file &cf) const
     {
         return std::make_unique<Derived>(cf);
     }
@@ -111,4 +111,4 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin
 
 // typedef TransferFunction_plugin TransferFunction;
 
-std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(ConfigFile &cf);
+std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(config_file &cf);
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 4964a4d..72c4482 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -24,7 +24,7 @@ std::unique_ptr<RNG_plugin> the_random_number_generator;
 std::unique_ptr<output_plugin> the_output_plugin;
 std::unique_ptr<cosmology::calculator>  the_cosmo_calc;
 
-int Initialise( ConfigFile& the_config )
+int Initialise( config_file& the_config )
 {
     the_random_number_generator = std::move(select_RNG_plugin(the_config));
     the_output_plugin           = std::move(select_output_plugin(the_config));
@@ -33,7 +33,7 @@ int Initialise( ConfigFile& the_config )
     return 0;
 }
 
-int Run( ConfigFile& the_config )
+int Run( config_file& the_config )
 {
     //--------------------------------------------------------------------------------------------------------
     // Read run parameters
@@ -41,23 +41,23 @@ int Run( ConfigFile& the_config )
 
     //--------------------------------------------------------------------------------------------------------
     //! number of resolution elements per dimension
-    const size_t ngrid = the_config.GetValue<size_t>("setup", "GridRes");
+    const size_t ngrid = the_config.get_value<size_t>("setup", "GridRes");
 
     //--------------------------------------------------------------------------------------------------------
     //! box side length in h-1 Mpc
-    const real_t boxlen = the_config.GetValue<double>("setup", "BoxLength");
+    const real_t boxlen = the_config.get_value<double>("setup", "BoxLength");
 
     //--------------------------------------------------------------------------------------------------------
     //! starting redshift
-    const real_t zstart = the_config.GetValue<double>("setup", "zstart");
+    const real_t zstart = the_config.get_value<double>("setup", "zstart");
 
     //--------------------------------------------------------------------------------------------------------
     //! order of the LPT approximation 
-    int LPTorder = the_config.GetValueSafe<double>("setup","LPTorder",100);
+    int LPTorder = the_config.get_value_safe<double>("setup","LPTorder",100);
 
     //--------------------------------------------------------------------------------------------------------
     //! initialice particles on a bcc or fcc lattice instead of a standard sc lattice (doubles and quadruples the number of particles) 
-    std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
+    std::string lattice_str = the_config.get_value_safe<std::string>("setup","ParticleLoad","sc");
     const particle::lattice lattice_type = 
           ((lattice_str=="bcc")? particle::lattice_bcc 
         : ((lattice_str=="fcc")? particle::lattice_fcc 
@@ -66,45 +66,45 @@ int Run( ConfigFile& the_config )
 
     //--------------------------------------------------------------------------------------------------------
     //! apply fixing of the complex mode amplitude following Angulo & Pontzen (2016) [https://arxiv.org/abs/1603.05253]
-    const bool bDoFixing = the_config.GetValueSafe<bool>("setup", "DoFixing", false);
+    const bool bDoFixing = the_config.get_value_safe<bool>("setup", "DoFixing", false);
 
     //--------------------------------------------------------------------------------------------------------
     //! do baryon ICs?
-    const bool bDoBaryons = the_config.GetValueSafe<bool>("setup", "DoBaryons", false );
+    const bool bDoBaryons = the_config.get_value_safe<bool>("setup", "DoBaryons", false );
     std::map< cosmo_species, double > Omega;
     if( bDoBaryons ){
-        double Om = the_config.GetValue<double>("cosmology", "Omega_m");
-        double Ob = the_config.GetValue<double>("cosmology", "Omega_b");
+        double Om = the_config.get_value<double>("cosmology", "Omega_m");
+        double Ob = the_config.get_value<double>("cosmology", "Omega_b");
         Omega[cosmo_species::dm] = Om-Ob;
         Omega[cosmo_species::baryon] = Ob;
     }else{
-        double Om = the_config.GetValue<double>("cosmology", "Omega_m");
+        double Om = the_config.get_value<double>("cosmology", "Omega_m");
         Omega[cosmo_species::dm] = Om;
         Omega[cosmo_species::baryon] = 0.0;
     }
 
     //--------------------------------------------------------------------------------------------------------
     //! do constrained ICs?
-    const bool bAddConstrainedModes =  the_config.ContainsKey("setup", "ConstraintFieldFile" );
+    const bool bAddConstrainedModes =  the_config.contains_key("setup", "ConstraintFieldFile" );
 
     //--------------------------------------------------------------------------------------------------------
     //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274]
-    bool bAddExternalTides = the_config.ContainsKey("cosmology", "LSS_aniso_lx") 
-                           & the_config.ContainsKey("cosmology", "LSS_aniso_ly") 
-                           & the_config.ContainsKey("cosmology", "LSS_aniso_lz");
+    bool bAddExternalTides = the_config.contains_key("cosmology", "LSS_aniso_lx") 
+                           & the_config.contains_key("cosmology", "LSS_aniso_ly") 
+                           & the_config.contains_key("cosmology", "LSS_aniso_lz");
 
-    if( bAddExternalTides && !(  the_config.ContainsKey("cosmology", "LSS_aniso_lx") 
-                               | the_config.ContainsKey("cosmology", "LSS_aniso_ly") 
-                               | the_config.ContainsKey("cosmology", "LSS_aniso_lz") ))
+    if( bAddExternalTides && !(  the_config.contains_key("cosmology", "LSS_aniso_lx") 
+                               | the_config.contains_key("cosmology", "LSS_aniso_ly") 
+                               | the_config.contains_key("cosmology", "LSS_aniso_lz") ))
     {
         music::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl;
         bAddExternalTides = false;
     }
     // Anisotropy parameters for beyond box tidal field 
     std::array<real_t,3> lss_aniso_lambda = {
-        the_config.GetValueSafe<double>("cosmology", "LSS_aniso_lx", 0.0),
-        the_config.GetValueSafe<double>("cosmology", "LSS_aniso_ly", 0.0),
-        the_config.GetValueSafe<double>("cosmology", "LSS_aniso_lz", 0.0),
+        the_config.get_value_safe<double>("cosmology", "LSS_aniso_lx", 0.0),
+        the_config.get_value_safe<double>("cosmology", "LSS_aniso_ly", 0.0),
+        the_config.get_value_safe<double>("cosmology", "LSS_aniso_lz", 0.0),
     };  
     
     if( std::abs(lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2]) > 1e-10 ){
@@ -192,8 +192,8 @@ int Run( ConfigFile& the_config )
     //--------------------------------------------------------------------
     if( bAddConstrainedModes ){
         Grid_FFT<real_t,false> cwnoise({8,8,8}, {boxlen,boxlen,boxlen});
-        cwnoise.Read_from_HDF5( the_config.GetValue<std::string>("setup", "ConstraintFieldFile"), 
-                the_config.GetValue<std::string>("setup", "ConstraintFieldName") );
+        cwnoise.Read_from_HDF5( the_config.get_value<std::string>("setup", "ConstraintFieldFile"), 
+                the_config.get_value<std::string>("setup", "ConstraintFieldName") );
         cwnoise.FourierTransformForward();
 
         size_t ngrid_c = cwnoise.size(0), ngrid_c_2 = ngrid_c/2;
@@ -422,7 +422,7 @@ int Run( ConfigFile& the_config )
     //======================================================================
 
     // Testing
-    const std::string testing = the_config.GetValueSafe<std::string>("testing", "test", "none");
+    const std::string testing = the_config.get_value_safe<std::string>("testing", "test", "none");
 
     if (testing != "none")
     {
diff --git a/src/main.cc b/src/main.cc
index 5a11c30..04e2302 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -44,8 +44,12 @@ void handle_eptr(std::exception_ptr eptr) // passing by value is ok
 
 int main( int argc, char** argv )
 {
+
+#if defined(NDEBUG)
     music::logger::set_level(music::log_level::info);
-    // music::logger::set_level(music::log_level::Debug);
+#else
+    music::logger::set_level(music::log_level::debug);
+#endif
 
     //------------------------------------------------------------------------------
     // initialise MPI 
@@ -104,7 +108,7 @@ int main( int argc, char** argv )
     }
 
     // open the configuration file 
-    ConfigFile the_config(argv[1]);
+    config_file the_config(argv[1]);
 
     //------------------------------------------------------------------------------
     // Set up FFTW
@@ -123,7 +127,7 @@ int main( int argc, char** argv )
     FFTW_API(mpi_init)();
 #endif
 
-    CONFIG::num_threads = the_config.GetValueSafe<unsigned>("execution", "NumThreads",std::thread::hardware_concurrency());
+    CONFIG::num_threads = the_config.get_value_safe<unsigned>("execution", "NumThreads",std::thread::hardware_concurrency());
     
 #if defined(USE_FFTW_THREADS)
     if (CONFIG::FFTW_threads_ok)
diff --git a/src/old/output_gadget2___original.cc b/src/old/output_gadget2___original.cc
index b8dfa62..688ddae 100644
--- a/src/old/output_gadget2___original.cc
+++ b/src/old/output_gadget2___original.cc
@@ -796,7 +796,7 @@ protected:
 	}
 
 public:
-	gadget2_output_plugin(ConfigFile &cf)
+	gadget2_output_plugin(config_file &cf)
 			: output_plugin(cf)
 	{
 
@@ -812,19 +812,19 @@ public:
 		units_vel_.insert(std::pair<std::string, double>("m/s", 1.0e-3));	// 1 m/s
 		units_vel_.insert(std::pair<std::string, double>("cm/s", 1.0e-5)); // 1 cm/s
 
-		block_buf_size_ = cf_.GetValueSafe<unsigned>("output", "gadget_blksize", 1048576);
+		block_buf_size_ = cf_.get_value_safe<unsigned>("output", "gadget_blksize", 1048576);
 
 		//... ensure that everyone knows we want to do SPH
-		cf.InsertValue("setup", "do_SPH", "yes");
+		cf.insert_value("setup", "do_SPH", "yes");
 
-		//bbndparticles_  = !cf_.GetValueSafe<bool>("output","gadget_nobndpart",false);
+		//bbndparticles_  = !cf_.get_value_safe<bool>("output","gadget_nobndpart",false);
 		npartmax_ = 1 << 30;
 
-		nfiles_ = cf.GetValueSafe<unsigned>("output", "gadget_num_files", 1);
+		nfiles_ = cf.get_value_safe<unsigned>("output", "gadget_num_files", 1);
 
-		blongids_ = cf.GetValueSafe<bool>("output", "gadget_longids", false);
+		blongids_ = cf.get_value_safe<bool>("output", "gadget_longids", false);
 
-		shift_halfcell_ = cf.GetValueSafe<bool>("output", "gadget_cell_centered", false);
+		shift_halfcell_ = cf.get_value_safe<bool>("output", "gadget_cell_centered", false);
 
 		//if( nfiles_ < (int)ceil((double)npart/(double)npartmax_) )
 		//	music::wlog.Print("Should use more files.");
@@ -879,16 +879,16 @@ public:
 			throw std::runtime_error("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'");
 		}
 
-		YHe_ = cf.GetValueSafe<double>("cosmology", "YHe", 0.248);
-		gamma_ = cf.GetValueSafe<double>("cosmology", "gamma", 5.0 / 3.0);
+		YHe_ = cf.get_value_safe<double>("cosmology", "YHe", 0.248);
+		gamma_ = cf.get_value_safe<double>("cosmology", "gamma", 5.0 / 3.0);
 
-		do_baryons_ = cf.GetValueSafe<bool>("setup", "baryons", false);
-		omegab_ = cf.GetValueSafe<double>("cosmology", "Omega_b", 0.045);
+		do_baryons_ = cf.get_value_safe<bool>("setup", "baryons", false);
+		omegab_ = cf.get_value_safe<double>("cosmology", "Omega_b", 0.045);
 
 		//... new way
-		std::string lunitstr = cf.GetValueSafe<std::string>("output", "gadget_lunit", "Mpc");
-		std::string munitstr = cf.GetValueSafe<std::string>("output", "gadget_munit", "1e10Msol");
-		std::string vunitstr = cf.GetValueSafe<std::string>("output", "gadget_vunit", "km/s");
+		std::string lunitstr = cf.get_value_safe<std::string>("output", "gadget_lunit", "Mpc");
+		std::string munitstr = cf.get_value_safe<std::string>("output", "gadget_munit", "1e10Msol");
+		std::string vunitstr = cf.get_value_safe<std::string>("output", "gadget_vunit", "km/s");
 
 		std::map<std::string, double>::iterator mapit;
 
@@ -917,16 +917,16 @@ public:
 		}
 
 		//... maintain compatibility with old way of setting units
-		if (cf.ContainsKey("output", "gadget_usekpc"))
+		if (cf.contains_key("output", "gadget_usekpc"))
 		{
-			kpcunits_ = cf.GetValueSafe<bool>("output", "gadget_usekpc", false);
+			kpcunits_ = cf.get_value_safe<bool>("output", "gadget_usekpc", false);
 			if (kpcunits_)
 				unit_length_chosen_ = 1e-3;
 			music::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead.");
 		}
-		if (cf.ContainsKey("output", "gadget_usemsol"))
+		if (cf.contains_key("output", "gadget_usemsol"))
 		{
-			msolunits_ = cf.GetValueSafe<bool>("output", "gadget_usemsol", false);
+			msolunits_ = cf.get_value_safe<bool>("output", "gadget_usemsol", false);
 			if (msolunits_)
 				unit_mass_chosen_ = 1e-10;
 			music::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead.");
@@ -934,12 +934,12 @@ public:
 
 		//... coarse particle properties...
 
-		spread_coarse_acrosstypes_ = cf.GetValueSafe<bool>("output", "gadget_spreadcoarse", false);
+		spread_coarse_acrosstypes_ = cf.get_value_safe<bool>("output", "gadget_spreadcoarse", false);
 		bndparticletype_ = 5;
 
 		if (!spread_coarse_acrosstypes_)
 		{
-			bndparticletype_ = cf.GetValueSafe<unsigned>("output", "gadget_coarsetype", 5);
+			bndparticletype_ = cf.get_value_safe<unsigned>("output", "gadget_coarsetype", 5);
 
 			if (bndparticletype_ == 0 || //bndparticletype_ == 1 || bndparticletype_ == 4 ||
 					bndparticletype_ > 5)
@@ -950,12 +950,12 @@ public:
 		}
 		else
 		{
-			if (cf.GetValueSafe<unsigned>("output", "gadget_coarsetype", 5) != 5)
+			if (cf.get_value_safe<unsigned>("output", "gadget_coarsetype", 5) != 5)
 				music::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override.");
 		}
 
 		//... set time ......................................................
-		header_.redshift = cf.GetValue<double>("setup", "zstart");
+		header_.redshift = cf.get_value<double>("setup", "zstart");
 		header_.time = 1.0 / (1.0 + header_.redshift);
 
 		//... SF flags
@@ -965,10 +965,10 @@ public:
 
 		//...
 		header_.num_files = nfiles_; //1;
-		header_.BoxSize = cf.GetValue<double>("setup", "BoxLength");
-		header_.Omega0 = cf.GetValue<double>("cosmology", "Omega_m");
-		header_.OmegaLambda = cf.GetValue<double>("cosmology", "Omega_L");
-		header_.HubbleParam = cf.GetValue<double>("cosmology", "H0") / 100.0;
+		header_.BoxSize = cf.get_value<double>("setup", "BoxLength");
+		header_.Omega0 = cf.get_value<double>("cosmology", "Omega_m");
+		header_.OmegaLambda = cf.get_value<double>("cosmology", "Omega_L");
+		header_.HubbleParam = cf.get_value<double>("cosmology", "H0") / 100.0;
 
 		header_.flag_stellarage = 0;
 		header_.flag_metals = 0;
diff --git a/src/output_plugin.cc b/src/output_plugin.cc
index 106c8fe..d0a7c5d 100644
--- a/src/output_plugin.cc
+++ b/src/output_plugin.cc
@@ -33,9 +33,9 @@ void print_output_plugins()
 	music::ilog << std::endl;
 }
 
-std::unique_ptr<output_plugin> select_output_plugin( ConfigFile& cf )
+std::unique_ptr<output_plugin> select_output_plugin( config_file& cf )
 {
-	std::string formatname = cf.GetValue<std::string>( "output", "format" );
+	std::string formatname = cf.get_value<std::string>( "output", "format" );
 	
 	output_plugin_creator *the_output_plugin_creator 
 	= get_output_plugin_map()[ formatname ];
diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc
index 8263060..1674604 100644
--- a/src/plugins/output_arepo.cc
+++ b/src/plugins/output_arepo.cc
@@ -56,7 +56,7 @@ protected:
 
 public:
   //! constructor
-  explicit gadget_hdf5_output_plugin(ConfigFile &cf)
+  explicit gadget_hdf5_output_plugin(config_file &cf)
       : output_plugin(cf, "GADGET-HDF5")
   {
     num_files_ = 1;
@@ -64,11 +64,11 @@ public:
     // use as many output files as we have MPI tasks
     MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
 #endif
-    real_t astart = 1.0 / (1.0 + cf_.GetValue<double>("setup", "zstart"));
-    lunit_ = cf_.GetValue<double>("setup", "BoxLength");
+    real_t astart = 1.0 / (1.0 + cf_.get_value<double>("setup", "zstart"));
+    lunit_ = cf_.get_value<double>("setup", "BoxLength");
     vunit_ = lunit_ / std::sqrt(astart);
-    blongids_ = cf_.GetValueSafe<bool>("output", "UseLongids", false);
-    num_simultaneous_writers_ = cf_.GetValueSafe<int>("output", "NumSimWriters", num_files_);
+    blongids_ = cf_.get_value_safe<bool>("output", "UseLongids", false);
+    num_simultaneous_writers_ = cf_.get_value_safe<int>("output", "NumSimWriters", num_files_);
 
     for (int i = 0; i < 6; ++i)
     {
@@ -85,9 +85,9 @@ public:
     header_.flag_cooling = 0;
     header_.num_files = num_files_;
     header_.BoxSize = lunit_;
-    header_.Omega0 = cf_.GetValue<double>("cosmology", "Omega_m");
-    header_.OmegaLambda = cf_.GetValue<double>("cosmology", "Omega_L");
-    header_.HubbleParam = cf_.GetValue<double>("cosmology", "H0") / 100.0;
+    header_.Omega0 = cf_.get_value<double>("cosmology", "Omega_m");
+    header_.OmegaLambda = cf_.get_value<double>("cosmology", "Omega_L");
+    header_.HubbleParam = cf_.get_value<double>("cosmology", "H0") / 100.0;
     header_.flag_stellarage = 0;
     header_.flag_metals = 0;
     header_.flag_entropy_instead_u = 0;
@@ -95,16 +95,16 @@ public:
 
     // initial gas temperature
     double Tcmb0 = 2.726;
-    double Omegab = cf_.GetValue<double>("cosmology", "Omega_b");
-    double h = cf_.GetValue<double>("cosmology", "H0") / 100.0, h2 = h*h;
+    double Omegab = cf_.get_value<double>("cosmology", "Omega_b");
+    double h = cf_.get_value<double>("cosmology", "H0") / 100.0, h2 = h*h;
     double adec = 1.0 / (160.0 * pow(Omegab * h2 / 0.022, 2.0 / 5.0));
     Tini_ = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec;
 
     // suggested PM res
-    pmgrid_ = 2*cf_.GetValue<double>("setup", "GridRes");
+    pmgrid_ = 2*cf_.get_value<double>("setup", "GridRes");
     gridboost_ = 1;
-    softening_ = cf_.GetValue<double>("setup", "BoxLength")/pmgrid_/20;
-    doBaryons_ = cf_.GetValue<bool>("setup", "DoBaryons");
+    softening_ = cf_.get_value<double>("setup", "BoxLength")/pmgrid_/20;
+    doBaryons_ = cf_.get_value<bool>("setup", "DoBaryons");
 #if !defined(USE_SINGLEPRECISION)
     doublePrec_ = 1;
 #else
diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc
index ba3a986..0a3afbb 100644
--- a/src/plugins/output_gadget2.cc
+++ b/src/plugins/output_gadget2.cc
@@ -38,7 +38,7 @@ protected:
 
 public:
 	//! constructor
-	explicit gadget2_output_plugin(ConfigFile &cf)
+	explicit gadget2_output_plugin(config_file &cf)
 			: output_plugin(cf, "GADGET-2")
 	{
 		num_files_ = 1;
@@ -46,10 +46,10 @@ public:
 		// use as many output files as we have MPI tasks
 		MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
 #endif
-		real_t astart = 1.0 / (1.0 + cf_.GetValue<double>("setup", "zstart"));
-		lunit_ = cf_.GetValue<double>("setup", "BoxLength");
+		real_t astart = 1.0 / (1.0 + cf_.get_value<double>("setup", "zstart"));
+		lunit_ = cf_.get_value<double>("setup", "BoxLength");
 		vunit_ = lunit_ / std::sqrt(astart);
-		blongids_ = cf_.GetValueSafe<bool>("output", "UseLongids", false);
+		blongids_ = cf_.get_value_safe<bool>("output", "UseLongids", false);
 	}
 
 	output_type write_species_as(const cosmo_species &) const { return output_type::particles; }
@@ -90,7 +90,7 @@ public:
 
 		/////
 		//... set time ......................................................
-		this_header_.redshift = cf_.GetValue<double>("setup", "zstart");
+		this_header_.redshift = cf_.get_value<double>("setup", "zstart");
 		this_header_.time = 1.0 / (1.0 + this_header_.redshift);
 
 		//... SF flags
@@ -100,10 +100,10 @@ public:
 
 		//...
 		this_header_.num_files = num_files_; //1;
-		this_header_.BoxSize = cf_.GetValue<double>("setup", "BoxLength");
-		this_header_.Omega0 = cf_.GetValue<double>("cosmology", "Omega_m");
-		this_header_.OmegaLambda = cf_.GetValue<double>("cosmology", "Omega_L");
-		this_header_.HubbleParam = cf_.GetValue<double>("cosmology", "H0") / 100.0;
+		this_header_.BoxSize = cf_.get_value<double>("setup", "BoxLength");
+		this_header_.Omega0 = cf_.get_value<double>("cosmology", "Omega_m");
+		this_header_.OmegaLambda = cf_.get_value<double>("cosmology", "Omega_L");
+		this_header_.HubbleParam = cf_.get_value<double>("cosmology", "H0") / 100.0;
 
 		this_header_.flag_stellarage = 0;
 		this_header_.flag_metals = 0;
diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc
index e6a821b..2e41e47 100644
--- a/src/plugins/output_gadget_hdf5.cc
+++ b/src/plugins/output_gadget_hdf5.cc
@@ -50,7 +50,7 @@ protected:
 
 public:
   //! constructor
-  explicit gadget_hdf5_output_plugin(ConfigFile &cf)
+  explicit gadget_hdf5_output_plugin(config_file &cf)
       : output_plugin(cf, "GADGET-HDF5")
   {
     num_files_ = 1;
@@ -58,11 +58,11 @@ public:
     // use as many output files as we have MPI tasks
     MPI_Comm_size(MPI_COMM_WORLD, &num_files_);
 #endif
-    real_t astart = 1.0 / (1.0 + cf_.GetValue<double>("setup", "zstart"));
-    lunit_ = cf_.GetValue<double>("setup", "BoxLength");
+    real_t astart = 1.0 / (1.0 + cf_.get_value<double>("setup", "zstart"));
+    lunit_ = cf_.get_value<double>("setup", "BoxLength");
     vunit_ = lunit_ / std::sqrt(astart);
-    blongids_ = cf_.GetValueSafe<bool>("output", "UseLongids", false);
-    num_simultaneous_writers_ = cf_.GetValueSafe<int>("output", "NumSimWriters", num_files_);
+    blongids_ = cf_.get_value_safe<bool>("output", "UseLongids", false);
+    num_simultaneous_writers_ = cf_.get_value_safe<int>("output", "NumSimWriters", num_files_);
 
     for (int i = 0; i < 6; ++i)
     {
@@ -79,9 +79,9 @@ public:
     header_.flag_cooling = 0;
     header_.num_files = num_files_;
     header_.BoxSize = lunit_;
-    header_.Omega0 = cf_.GetValue<double>("cosmology", "Omega_m");
-    header_.OmegaLambda = cf_.GetValue<double>("cosmology", "Omega_L");
-    header_.HubbleParam = cf_.GetValue<double>("cosmology", "H0") / 100.0;
+    header_.Omega0 = cf_.get_value<double>("cosmology", "Omega_m");
+    header_.OmegaLambda = cf_.get_value<double>("cosmology", "Omega_L");
+    header_.HubbleParam = cf_.get_value<double>("cosmology", "H0") / 100.0;
     header_.flag_stellarage = 0;
     header_.flag_metals = 0;
     header_.flag_entropy_instead_u = 0;
diff --git a/src/plugins/output_generic.cc b/src/plugins/output_generic.cc
index d96358e..79c2139 100644
--- a/src/plugins/output_generic.cc
+++ b/src/plugins/output_generic.cc
@@ -21,13 +21,13 @@ protected:
 	bool out_eulerian_;
 public:
 	//! constructor
-	explicit generic_output_plugin(ConfigFile &cf )
+	explicit generic_output_plugin(config_file &cf )
 	: output_plugin(cf, "Generic HDF5")
 	{
-		real_t astart   = 1.0/(1.0+cf_.GetValue<double>("setup", "zstart"));
-		real_t boxsize  = cf_.GetValue<double>("setup", "BoxLength");
+		real_t astart   = 1.0/(1.0+cf_.get_value<double>("setup", "zstart"));
+		real_t boxsize  = cf_.get_value<double>("setup", "BoxLength");
 
-		out_eulerian_   = cf_.GetValueSafe<bool>("output", "generic_out_eulerian",false);
+		out_eulerian_   = cf_.get_value_safe<bool>("output", "generic_out_eulerian",false);
 
 		if( CONFIG::MPI_task_rank == 0 )
 		{
diff --git a/src/plugins/output_grafic2.cc b/src/plugins/output_grafic2.cc
index 43eb7cf..b3f3f04 100644
--- a/src/plugins/output_grafic2.cc
+++ b/src/plugins/output_grafic2.cc
@@ -40,22 +40,22 @@ protected:
 
 public:
     //! constructor
-    explicit grafic2_output_plugin(ConfigFile &cf)
+    explicit grafic2_output_plugin(config_file &cf)
         : output_plugin(cf, "GRAFIC2/RAMSES")
     {
         lunit_ = 1.0;
         vunit_ = 1.0;
 
         double
-            boxlength = cf_.GetValue<double>("setup", "BoxLength"),
-            H0 = cf_.GetValue<double>("cosmology", "H0"),
-            zstart = cf_.GetValue<double>("setup", "zstart"),
+            boxlength = cf_.get_value<double>("setup", "BoxLength"),
+            H0 = cf_.get_value<double>("cosmology", "H0"),
+            zstart = cf_.get_value<double>("setup", "zstart"),
             astart = 1.0 / (1.0 + zstart),
-            omegam = cf_.GetValue<double>("cosmology", "Omega_m"),
-            omegaL = cf_.GetValue<double>("cosmology", "Omega_L");
-        uint32_t ngrid = cf_.GetValue<int>("setup", "GridRes");
+            omegam = cf_.get_value<double>("cosmology", "Omega_m"),
+            omegaL = cf_.get_value<double>("cosmology", "Omega_L");
+        uint32_t ngrid = cf_.get_value<int>("setup", "GridRes");
 
-        bUseSPT_ = cf_.GetValueSafe<bool>("output", "grafic_use_SPT", false);
+        bUseSPT_ = cf_.get_value_safe<bool>("output", "grafic_use_SPT", false);
         levelmin_ = uint32_t(std::log2(double(ngrid)) + 1e-6);
 
         if (std::abs(std::pow(2.0, levelmin_) - double(ngrid)) > 1e-4)
@@ -64,7 +64,7 @@ public:
             abort();
         }
 
-        bhavebaryons_ = cf_.GetValueSafe<bool>("setup", "baryons", false);
+        bhavebaryons_ = cf_.get_value_safe<bool>("setup", "baryons", false);
 
         header_.n1 = ngrid;
         header_.n2 = ngrid;
@@ -89,7 +89,7 @@ public:
         mkdir(dirname_.c_str(), 0777);
 
         // write RAMSES namelist file? if so only with one task
-        if (cf_.GetValueSafe<bool>("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 )
+        if (cf_.get_value_safe<bool>("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 )
         {
             write_ramses_namelist();
         }
@@ -196,7 +196,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT<real_t> &g, const cos
             }
 
             // check field size against buffer size...
-            uint32_t ngrid = cf_.GetValue<int>("setup", "GridRes");
+            uint32_t ngrid = cf_.get_value<int>("setup", "GridRes");
             assert( g.global_size(0) == ngrid && g.global_size(1) == ngrid && g.global_size(2) == ngrid);
             assert( g.size(1) == ngrid && g.size(2) == ngrid);
             // write actual field slice by slice
diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc
index 073a6f9..ab0f959 100644
--- a/src/plugins/random_music.cc
+++ b/src/plugins/random_music.cc
@@ -34,7 +34,7 @@ protected:
   //void store_rnd(int ilevel, rng *prng);
 
 public:
-  explicit RNG_music(ConfigFile &cf) : RNG_plugin(cf), initialized_(false) {}
+  explicit RNG_music(config_file &cf) : RNG_plugin(cf), initialized_(false) {}
 
   ~RNG_music() {}
 
@@ -45,12 +45,12 @@ public:
   void initialize_for_grid_structure()//const refinement_hierarchy &refh)
   {
     //prefh_ = &refh;
-    levelmin_ = pcf_->GetValue<unsigned>("setup", "levelmin");
-    levelmax_ = pcf_->GetValue<unsigned>("setup", "levelmax");
+    levelmin_ = pcf_->get_value<unsigned>("setup", "levelmin");
+    levelmax_ = pcf_->get_value<unsigned>("setup", "levelmax");
 
-    ran_cube_size_ = pcf_->GetValueSafe<unsigned>("random", "cubesize", DEF_RAN_CUBE_SIZE);
-    disk_cached_ = pcf_->GetValueSafe<bool>("random", "disk_cached", true);
-    restart_ = pcf_->GetValueSafe<bool>("random", "restart", false);
+    ran_cube_size_ = pcf_->get_value_safe<unsigned>("random", "cubesize", DEF_RAN_CUBE_SIZE);
+    disk_cached_ = pcf_->get_value_safe<bool>("random", "disk_cached", true);
+    restart_ = pcf_->get_value_safe<bool>("random", "restart", false);
 
     mem_cache_.assign(levelmax_ - levelmin_ + 1, (std::vector<real_t> *)NULL);
 
@@ -93,8 +93,8 @@ void RNG_music::parse_random_parameters(void)
     std::string tempstr;
     bool noseed = false;
     sprintf(seedstr, "seed[%d]", i);
-    if (pcf_->ContainsKey("random", seedstr))
-      tempstr = pcf_->GetValue<std::string>("random", seedstr);
+    if (pcf_->contains_key("random", seedstr))
+      tempstr = pcf_->get_value<std::string>("random", seedstr);
     else
     {
       // "-2" means that no seed entry was found for that level
@@ -105,7 +105,7 @@ void RNG_music::parse_random_parameters(void)
     if (is_number(tempstr))
     {
       long ltemp;
-      pcf_->Convert(tempstr, ltemp);
+      pcf_->convert(tempstr, ltemp);
       rngfnames_.push_back("");
       if (noseed) // ltemp < 0 )
         //... generate some dummy seed which only depends on the level, negative so we know it's not
@@ -141,7 +141,7 @@ void RNG_music::parse_random_parameters(void)
 
 void RNG_music::compute_random_numbers(void)
 {
-  bool rndsign = pcf_->GetValueSafe<bool>("random", "grafic_sign", false);
+  bool rndsign = pcf_->get_value_safe<bool>("random", "grafic_sign", false);
 
   std::vector<rng *> randc(std::max(levelmax_, levelmin_seed_) + 1, (rng *)NULL);
 
@@ -227,11 +227,11 @@ void RNG_music::compute_random_numbers(void)
   // {
   //   int lx[3], x0[3];
   //   int shift[3], levelmin_poisson;
-  //   shift[0] = pcf_->GetValue<int>("setup", "shift_x");
-  //   shift[1] = pcf_->GetValue<int>("setup", "shift_y");
-  //   shift[2] = pcf_->GetValue<int>("setup", "shift_z");
+  //   shift[0] = pcf_->get_value<int>("setup", "shift_x");
+  //   shift[1] = pcf_->get_value<int>("setup", "shift_y");
+  //   shift[2] = pcf_->get_value<int>("setup", "shift_z");
 
-  //   levelmin_poisson = pcf_->GetValue<unsigned>("setup", "levelmin");
+  //   levelmin_poisson = pcf_->get_value<unsigned>("setup", "levelmin");
 
   //   int lfac = 1 << (ilevel - levelmin_poisson);
 
diff --git a/src/plugins/random_ngenic.cc b/src/plugins/random_ngenic.cc
index b84221e..f1c6a59 100644
--- a/src/plugins/random_ngenic.cc
+++ b/src/plugins/random_ngenic.cc
@@ -18,11 +18,11 @@ private:
     std::vector<unsigned int> SeedTable_;
 
 public:
-    explicit RNG_ngenic(ConfigFile &cf) : RNG_plugin(cf)
+    explicit RNG_ngenic(config_file &cf) : RNG_plugin(cf)
     {
 
-        RandomSeed_ = cf.GetValue<long>("random", "seed");
-        nres_ = cf.GetValue<size_t>("setup", "GridRes");
+        RandomSeed_ = cf.get_value<long>("random", "seed");
+        nres_ = cf.get_value<size_t>("setup", "GridRes");
         pRandomGenerator_ = gsl_rng_alloc(gsl_rng_ranlxd1);
         gsl_rng_set(pRandomGenerator_, RandomSeed_);
 
diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc
index 9e0a627..4a2baf3 100644
--- a/src/plugins/transfer_CAMB_file.cc
+++ b/src/plugins/transfer_CAMB_file.cc
@@ -169,13 +169,13 @@ private:
   }
 
 public:
-  transfer_CAMB_file_plugin(ConfigFile &cf)
+  transfer_CAMB_file_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   {
-    m_filename_Tk = pcf_->GetValue<std::string>("cosmology", "transfer_file");
-    m_Omega_m = cf.GetValue<double>("cosmology", "Omega_m"); //MvD
-    m_Omega_b = cf.GetValue<double>("cosmology", "Omega_b"); //MvD
-    m_zstart = cf.GetValue<double>("setup", "zstart");       //MvD
+    m_filename_Tk = pcf_->get_value<std::string>("cosmology", "transfer_file");
+    m_Omega_m = cf.get_value<double>("cosmology", "Omega_m"); //MvD
+    m_Omega_b = cf.get_value<double>("cosmology", "Omega_b"); //MvD
+    m_zstart = cf.get_value<double>("setup", "zstart");       //MvD
 
     read_table();
 
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 6b113a5..09047c6 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -170,25 +170,25 @@ private:
   }
 
 public:
-  explicit transfer_CLASS_plugin(ConfigFile &cf)
+  explicit transfer_CLASS_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   {
     ofs_class_input_.open("input_class_parameters.ini", std::ios::trunc);
 
-    h_ = pcf_->GetValue<double>("cosmology", "H0") / 100.0;
-    Omega_m_ = pcf_->GetValue<double>("cosmology", "Omega_m");
-    Omega_b_ = pcf_->GetValue<double>("cosmology", "Omega_b");
-    N_ur_ = pcf_->GetValueSafe<double>("cosmology", "Neff", 3.046);
-    ztarget_ = pcf_->GetValueSafe<double>("cosmology", "ztarget", 0.0);
+    h_ = pcf_->get_value<double>("cosmology", "H0") / 100.0;
+    Omega_m_ = pcf_->get_value<double>("cosmology", "Omega_m");
+    Omega_b_ = pcf_->get_value<double>("cosmology", "Omega_b");
+    N_ur_ = pcf_->get_value_safe<double>("cosmology", "Neff", 3.046);
+    ztarget_ = pcf_->get_value_safe<double>("cosmology", "ztarget", 0.0);
     atarget_ = 1.0 / (1.0 + ztarget_);
-    zstart_ = pcf_->GetValue<double>("setup", "zstart");
+    zstart_ = pcf_->get_value<double>("setup", "zstart");
     astart_ = 1.0 / (1.0 + zstart_);
-    double lbox = pcf_->GetValue<double>("setup", "BoxLength");
-    int nres = pcf_->GetValue<double>("setup", "GridRes");
-    A_s_ = pcf_->GetValueSafe<double>("cosmology", "A_s", -1.0);
-    double k_p = pcf_->GetValueSafe<double>("cosmology", "k_p", 0.05);
-    n_s_ = pcf_->GetValue<double>("cosmology", "nspec");
-    Tcmb_ = cf.GetValueSafe<double>("cosmology", "Tcmb", 2.7255);
+    double lbox = pcf_->get_value<double>("setup", "BoxLength");
+    int nres = pcf_->get_value<double>("setup", "GridRes");
+    A_s_ = pcf_->get_value_safe<double>("cosmology", "A_s", -1.0);
+    double k_p = pcf_->get_value_safe<double>("cosmology", "k_p", 0.05);
+    n_s_ = pcf_->get_value<double>("cosmology", "nspec");
+    Tcmb_ = cf.get_value_safe<double>("cosmology", "Tcmb", 2.7255);
 
     tnorm_ = 1.0;
 
diff --git a/src/plugins/transfer_eisenstein.cc b/src/plugins/transfer_eisenstein.cc
index 47a7efd..adc9e06 100644
--- a/src/plugins/transfer_eisenstein.cc
+++ b/src/plugins/transfer_eisenstein.cc
@@ -207,13 +207,13 @@ public:
 	 \param Tcmb mean temperature of the CMB fluctuations (defaults to
 	 Tcmb = 2.726 if not specified)
 	 */
-  transfer_eisenstein_plugin(ConfigFile &cf)
+  transfer_eisenstein_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   {
-    double Tcmb = pcf_->GetValueSafe<double>("cosmology", "Tcmb", 2.726);
-    double H0 = pcf_->GetValue<double>("cosmology", "H0");
-    double Omega_m = pcf_->GetValue<double>("cosmology", "Omega_m");
-    double Omega_b = pcf_->GetValue<double>("cosmology", "Omega_b");
+    double Tcmb = pcf_->get_value_safe<double>("cosmology", "Tcmb", 2.726);
+    double H0 = pcf_->get_value<double>("cosmology", "H0");
+    double Omega_m = pcf_->get_value<double>("cosmology", "Omega_m");
+    double Omega_b = pcf_->get_value<double>("cosmology", "Omega_b");
 
     etf_.set_parameters(H0, Omega_m, Omega_b, Tcmb);
     
@@ -257,15 +257,15 @@ protected:
   };
 
 public:
-  transfer_eisenstein_wdm_plugin(ConfigFile &cf)
+  transfer_eisenstein_wdm_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   {
-    double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726);
-    omegam_ = pcf_->GetValue<double>("cosmology", "Omega_m");
-    omegab_ = pcf_->GetValue<double>("cosmology", "Omega_b");
-    H0_ = pcf_->GetValue<double>("cosmology", "H0");
+    double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726);
+    omegam_ = pcf_->get_value<double>("cosmology", "Omega_m");
+    omegab_ = pcf_->get_value<double>("cosmology", "Omega_b");
+    H0_ = pcf_->get_value<double>("cosmology", "H0");
     m_h0 = H0_ / 100.0;
-    wdmm_ = pcf_->GetValue<double>("cosmology", "WDMmass");
+    wdmm_ = pcf_->get_value<double>("cosmology", "WDMmass");
 
     etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
 
@@ -273,7 +273,7 @@ public:
     typemap_.insert(std::pair<std::string, int>("VIEL", wdm_viel));             // add the other types
     typemap_.insert(std::pair<std::string, int>("BODE_WRONG", wdm_bode_wrong)); // add the other types
 
-    type_ = pcf_->GetValueSafe<std::string>("cosmology", "WDMtftype", "BODE");
+    type_ = pcf_->get_value_safe<std::string>("cosmology", "WDMtftype", "BODE");
 
     //type_ = std::string( toupper( type_.c_str() ) );
 
@@ -286,29 +286,29 @@ public:
     {
     //... parameterisation from Bode et al. (2001), ApJ, 556, 93
     case wdm_bode:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.0);
-      wdmgx_ = pcf_->GetValueSafe<double>("cosmology", "WDMg_x", 1.5);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.0);
+      wdmgx_ = pcf_->get_value_safe<double>("cosmology", "WDMg_x", 1.5);
       m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29);
 
       break;
 
     //... parameterisation from Viel et al. (2005), Phys Rev D, 71
     case wdm_viel:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.12);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.12);
       m_WDMalpha = 0.049 * pow(omegam_ / 0.25, 0.11) * pow(H0_ * 0.01 / 0.7, 1.22) * pow(wdmm_, -1.11);
       break;
 
     //.... below is for historical reasons due to the buggy parameterisation
     //.... in early versions of MUSIC, but apart from H instead of h, Bode et al.
     case wdm_bode_wrong:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.0);
-      wdmgx_ = pcf_->GetValueSafe<double>("cosmology", "WDMg_x", 1.5);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.0);
+      wdmgx_ = pcf_->get_value_safe<double>("cosmology", "WDMg_x", 1.5);
       m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29);
       break;
 
     default:
-      wdmnu_ = pcf_->GetValueSafe<double>("cosmology", "WDMnu", 1.0);
-      wdmgx_ = pcf_->GetValueSafe<double>("cosmology", "WDMg_x", 1.5);
+      wdmnu_ = pcf_->get_value_safe<double>("cosmology", "WDMnu", 1.0);
+      wdmgx_ = pcf_->get_value_safe<double>("cosmology", "WDMg_x", 1.5);
       m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29);
       break;
     }
@@ -340,20 +340,20 @@ protected:
   eisenstein_transfer etf_;
 
 public:
-  transfer_eisenstein_cdmbino_plugin(ConfigFile &cf)
+  transfer_eisenstein_cdmbino_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   { 
-    double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726);
+    double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726);
 
-    omegam_ = pcf_->GetValue<double>("cosmology", "Omega_m");
-    omegab_ = pcf_->GetValue<double>("cosmology", "Omega_b");
-    H0_ = pcf_->GetValue<double>("cosmology", "H0");
+    omegam_ = pcf_->get_value<double>("cosmology", "Omega_m");
+    omegab_ = pcf_->get_value<double>("cosmology", "Omega_b");
+    H0_ = pcf_->get_value<double>("cosmology", "H0");
     m_h0 = H0_ / 100.0;
 
     etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
 
-    mcdm_ = pcf_->GetValueSafe<double>("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV
-    Tkd_ = pcf_->GetValueSafe<double>("cosmology", "CDM_Tkd", 33.0);    // temperature at which CDM particle kinetically decouples (in MeV)
+    mcdm_ = pcf_->get_value_safe<double>("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV
+    Tkd_ = pcf_->get_value_safe<double>("cosmology", "CDM_Tkd", 33.0);    // temperature at which CDM particle kinetically decouples (in MeV)
 
     kfs_ = 1.7e6 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.) / (1.0 + log(Tkd_ / 30.) / 19.2);
     kd_ = 3.8e7 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.);
@@ -395,19 +395,19 @@ protected:
   eisenstein_transfer etf_;
 
 public:
-  transfer_eisenstein_cutoff_plugin(ConfigFile &cf)
+  transfer_eisenstein_cutoff_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   { 
-    double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726);
+    double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726);
 
-    omegam_ = pcf_->GetValue<double>("cosmology", "Omega_m");
-    omegab_ = pcf_->GetValue<double>("cosmology", "Omega_b");
-    H0_ = pcf_->GetValue<double>("cosmology", "H0");
+    omegam_ = pcf_->get_value<double>("cosmology", "Omega_m");
+    omegab_ = pcf_->get_value<double>("cosmology", "Omega_b");
+    H0_ = pcf_->get_value<double>("cosmology", "H0");
     m_h0 = H0_ / 100.0;
 
     etf_.set_parameters(H0_, omegam_, omegab_, Tcmb);
 
-    Rcut_ = pcf_->GetValueSafe<double>("cosmology", "Rcut", 1.0);
+    Rcut_ = pcf_->get_value_safe<double>("cosmology", "Rcut", 1.0);
   }
 
   inline double compute(double k, tf_type type) const
diff --git a/src/random_plugin.cc b/src/random_plugin.cc
index 87bf08f..5121efa 100644
--- a/src/random_plugin.cc
+++ b/src/random_plugin.cc
@@ -24,9 +24,9 @@ void print_RNG_plugins()
     music::ilog << std::endl;
 }
 
-std::unique_ptr<RNG_plugin> select_RNG_plugin(ConfigFile &cf)
+std::unique_ptr<RNG_plugin> select_RNG_plugin(config_file &cf)
 {
-    std::string rngname = cf.GetValueSafe<std::string>("random", "generator", "MUSIC");
+    std::string rngname = cf.get_value_safe<std::string>("random", "generator", "MUSIC");
 
     RNG_plugin_creator *the_RNG_plugin_creator = get_RNG_plugin_map()[rngname];
 
diff --git a/src/testing.cc b/src/testing.cc
index c65eb53..8e88e17 100644
--- a/src/testing.cc
+++ b/src/testing.cc
@@ -9,7 +9,7 @@ namespace testing
 {
 
 void output_potentials_and_densities(
-    ConfigFile &the_config,
+    config_file &the_config,
     size_t ngrid, real_t boxlen,
     Grid_FFT<real_t> &phi,
     Grid_FFT<real_t> &phi2,
@@ -17,8 +17,8 @@ void output_potentials_and_densities(
     Grid_FFT<real_t> &phi3b,
     std::array<Grid_FFT<real_t> *, 3> &A3)
 {
-    const std::string fname_hdf5 = the_config.GetValueSafe<std::string>("output", "fname_hdf5", "output.hdf5");
-    const std::string fname_analysis = the_config.GetValueSafe<std::string>("output", "fbase_analysis", "output");
+    const std::string fname_hdf5 = the_config.get_value_safe<std::string>("output", "fname_hdf5", "output.hdf5");
+    const std::string fname_analysis = the_config.get_value_safe<std::string>("output", "fbase_analysis", "output");
 
     Grid_FFT<real_t> delta({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
     Grid_FFT<real_t> delta2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
@@ -98,7 +98,7 @@ void output_potentials_and_densities(
 }
 
 void output_velocity_displacement_symmetries(
-    ConfigFile &the_config,
+    config_file &the_config,
     size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
     Grid_FFT<real_t> &phi,
     Grid_FFT<real_t> &phi2,
@@ -107,8 +107,8 @@ void output_velocity_displacement_symmetries(
     std::array<Grid_FFT<real_t> *, 3> &A3,
     bool bwrite_out_fields)
 {
-    const std::string fname_hdf5 = the_config.GetValueSafe<std::string>("output", "fname_hdf5", "output.hdf5");
-    const std::string fname_analysis = the_config.GetValueSafe<std::string>("output", "fbase_analysis", "output");
+    const std::string fname_hdf5 = the_config.get_value_safe<std::string>("output", "fname_hdf5", "output.hdf5");
+    const std::string fname_analysis = the_config.get_value_safe<std::string>("output", "fbase_analysis", "output");
 
     real_t vfac1 = vfac;
     real_t vfac2 = 2 * vfac;
@@ -241,7 +241,7 @@ void output_velocity_displacement_symmetries(
 }
 
 void output_convergence(
-    ConfigFile &the_config,
+    config_file &the_config,
     cosmology::calculator* the_cosmo_calc,
     std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus,
     Grid_FFT<real_t> &phi,
diff --git a/src/transfer_function_plugin.cc b/src/transfer_function_plugin.cc
index 424ae82..5b2ec9e 100644
--- a/src/transfer_function_plugin.cc
+++ b/src/transfer_function_plugin.cc
@@ -23,9 +23,9 @@ void print_TransferFunction_plugins()
     music::ilog << std::endl;
 }
 
-std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(ConfigFile &cf)
+std::unique_ptr<TransferFunction_plugin> select_TransferFunction_plugin(config_file &cf)
 {
-    std::string tfname = cf.GetValue<std::string>("cosmology", "transfer");
+    std::string tfname = cf.get_value<std::string>("cosmology", "transfer");
 
     TransferFunction_plugin_creator *the_TransferFunction_plugin_creator = get_TransferFunction_plugin_map()[tfname];
 

From 096513e7e889dc2c36ab76fb20ee9d0fde6ac829 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 20:59:28 +0200
Subject: [PATCH 104/130] moved git version info to cmake_config.hh.in

---
 include/cmake_config.hh.in | 11 ++++++++++-
 include/general.hh         | 11 +----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/cmake_config.hh.in b/include/cmake_config.hh.in
index db4b9a8..03768a3 100644
--- a/include/cmake_config.hh.in
+++ b/include/cmake_config.hh.in
@@ -22,4 +22,13 @@ constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}";
   constexpr char CMAKE_PLT_STR[] = "PLT corr. on";
 #else
   constexpr char CMAKE_PLT_STR[] = "PLT corr. off";
-#endif
\ No newline at end of file
+#endif
+
+// These variables are autogenerated and compiled
+// into the library by the version.cmake script. do not touch!
+extern "C"
+{
+  extern const char *GIT_TAG;
+  extern const char *GIT_REV;
+  extern const char *GIT_BRANCH;
+}
\ No newline at end of file
diff --git a/include/general.hh b/include/general.hh
index f4395bb..88eb2f7 100644
--- a/include/general.hh
+++ b/include/general.hh
@@ -169,13 +169,4 @@ extern bool MPI_ok;
 extern bool MPI_threads_ok;
 extern bool FFTW_threads_ok;
 extern int num_threads;
-} // namespace CONFIG
-
-// These variables are autogenerated and compiled
-// into the library by the version.cmake script
-extern "C"
-{
-  extern const char *GIT_TAG;
-  extern const char *GIT_REV;
-  extern const char *GIT_BRANCH;
-}
\ No newline at end of file
+} // namespace CONFIG
\ No newline at end of file

From 4644840ee6a9678992842587253e1f0070e432ed Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 21:18:18 +0200
Subject: [PATCH 105/130] removed old files

---
 src/old/output_gadget2___original.cc | 1408 --------------------------
 1 file changed, 1408 deletions(-)
 delete mode 100644 src/old/output_gadget2___original.cc

diff --git a/src/old/output_gadget2___original.cc b/src/old/output_gadget2___original.cc
deleted file mode 100644
index 688ddae..0000000
--- a/src/old/output_gadget2___original.cc
+++ /dev/null
@@ -1,1408 +0,0 @@
-/*
- 
- output_gadget2.cc - This file is part of MUSIC -
- a code to generate multi-scale initial conditions 
- for cosmological simulations 
- 
- Copyright (C) 2010  Oliver Hahn
- 
- */
-
-#include <fstream>
-#include <map>
-#include "logger.hh"
-// #include "region_generator.hh"
-#include "output_plugin.hh"
-// #include "mg_interp.hh"
-// #include "mesh.hh"
-
-
-template <typename T_store = float>
-class gadget2_output_plugin : public output_plugin
-{
-	// const int empty_fill_bytes = 56;
-
-public:
-	bool do_baryons_;
-	double omegab_;
-	double gamma_;
-	bool shift_halfcell_;
-
-protected:
-	std::ofstream ofs_;
-	bool blongids_;
-	bool bhave_particlenumbers_;
-
-	std::map<std::string, double> units_length_;
-	std::map<std::string, double> units_mass_;
-	std::map<std::string, double> units_vel_;
-
-	double unit_length_chosen_;
-	double unit_mass_chosen_;
-	double unit_vel_chosen_;
-
-	typedef struct io_header
-	{
-		int npart[6];
-		double mass[6];
-		double time;
-		double redshift;
-		int flag_sfr;
-		int flag_feedback;
-		unsigned int npartTotal[6];
-		int flag_cooling;
-		int num_files;
-		double BoxSize;
-		double Omega0;
-		double OmegaLambda;
-		double HubbleParam;
-		int flag_stellarage;
-		int flag_metals;
-		unsigned int npartTotalHighWord[6];
-		int flag_entropy_instead_u;
-		int flag_doubleprecision;
-		char fill[empty_fill_bytes];
-	} header;
-
-	header header_;
-
-	std::string fname;
-
-	enum iofields
-	{
-		id_dm_mass,
-		id_dm_vel,
-		id_dm_pos,
-		id_gas_vel,
-		id_gas_rho,
-		id_gas_temp,
-		id_gas_pos
-	};
-
-	size_t np_per_type_[6];
-
-	size_t block_buf_size_;
-	size_t npartmax_;
-	unsigned nfiles_;
-
-	unsigned bndparticletype_;
-	bool bmorethan2bnd_;
-	bool kpcunits_;
-	bool msolunits_;
-	double YHe_;
-	bool spread_coarse_acrosstypes_;
-
-	// refinement_mask refmask;
-
-	void distribute_particles(unsigned nfiles, std::vector<std::vector<unsigned>> &np_per_file, std::vector<unsigned> &np_tot_per_file)
-	{
-		np_per_file.assign(nfiles, std::vector<unsigned>(6, 0));
-		np_tot_per_file.assign(nfiles, 0);
-
-		size_t n2dist[6];
-		size_t ntotal = 0;
-		for (int i = 0; i < 6; ++i)
-		{
-			ntotal += np_per_type_[i];
-			n2dist[i] = np_per_type_[i];
-		}
-
-		size_t nnominal = (size_t)((double)ntotal / (double)nfiles);
-		size_t nlast = ntotal - nnominal * (nfiles - 1);
-
-		for (unsigned i = 0; i < nfiles; ++i)
-		{
-			size_t nthisfile = 0;
-
-			size_t nmax = (i == nfiles - 1) ? nlast : nnominal;
-
-			for (int itype = 0; itype < 6; ++itype)
-			{
-				if (n2dist[itype] == 0)
-					continue;
-				np_per_file[i][itype] = std::min(n2dist[itype], nmax - nthisfile);
-				n2dist[itype] -= np_per_file[i][itype];
-				nthisfile += np_per_file[i][itype];
-
-				if (nthisfile >= nmax)
-					break;
-			}
-
-			np_tot_per_file[i] = nthisfile;
-		}
-
-		for (int i = 0; i < 6; ++i)
-			assert(n2dist[i] == 0);
-	}
-
-	std::ifstream &open_and_check(std::string ffname, size_t npart, size_t offset = 0)
-	{
-		std::ifstream ifs(ffname.c_str(), std::ios::binary);
-		size_t blk;
-		ifs.read((char *)&blk, sizeof(size_t));
-		if (blk != npart * (size_t)sizeof(T_store))
-		{
-			music::elog.Print("Internal consistency error in gadget2 output plug-in");
-			music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * (size_t)sizeof(T_store), blk);
-			throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
-		}
-		ifs.seekg(offset, std::ios::cur);
-
-		return ifs;
-	}
-
-	class pistream : public std::ifstream
-	{
-	public:
-		pistream(std::string fname, size_t npart, size_t offset = 0)
-				: std::ifstream(fname.c_str(), std::ios::binary)
-		{
-			size_t blk;
-
-			if (!this->good())
-			{
-				music::elog.Print("Could not open buffer file in gadget2 output plug-in");
-				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
-			}
-
-			this->read((char *)&blk, sizeof(size_t));
-
-			if (blk != npart * sizeof(T_store))
-			{
-				music::elog.Print("Internal consistency error in gadget2 output plug-in");
-				music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
-				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
-			}
-
-			this->seekg(offset + sizeof(size_t), std::ios::beg);
-		}
-
-		pistream()
-		{
-		}
-
-		void open(std::string fname, size_t npart, size_t offset = 0)
-		{
-			std::ifstream::open(fname.c_str(), std::ios::binary);
-			size_t blk;
-
-			if (!this->good())
-			{
-				music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str());
-				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
-			}
-
-			this->read((char *)&blk, sizeof(size_t));
-
-			if (blk != npart * sizeof(T_store))
-			{
-				music::elog.Print("Internal consistency error in gadget2 output plug-in");
-				music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
-				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
-			}
-
-			this->seekg(offset + sizeof(size_t), std::ios::beg);
-		}
-	};
-
-	class postream : public std::fstream
-	{
-	public:
-		postream(std::string fname, size_t npart, size_t offset = 0)
-				: std::fstream(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out)
-		{
-			size_t blk;
-
-			if (!this->good())
-			{
-				music::elog.Print("Could not open buffer file in gadget2 output plug-in");
-				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
-			}
-
-			this->read((char *)&blk, sizeof(size_t));
-
-			if (blk != npart * sizeof(T_store))
-			{
-				music::elog.Print("Internal consistency error in gadget2 output plug-in");
-				music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
-				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
-			}
-
-			this->seekg(offset, std::ios::cur);
-			this->seekp(offset + sizeof(size_t), std::ios::beg);
-		}
-
-		postream()
-		{
-		}
-
-		void open(std::string fname, size_t npart, size_t offset = 0)
-		{
-			if (is_open())
-				this->close();
-
-			std::fstream::open(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out);
-			size_t blk;
-
-			if (!this->good())
-			{
-				music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str());
-				throw std::runtime_error("Could not open buffer file in gadget2 output plug-in");
-			}
-
-			this->read((char *)&blk, sizeof(size_t));
-
-			if (blk != npart * sizeof(T_store))
-			{
-				music::elog.Print("Internal consistency error in gadget2 output plug-in");
-				music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk);
-				throw std::runtime_error("Internal consistency error in gadget2 output plug-in");
-			}
-
-			this->seekg(offset, std::ios::cur);
-			this->seekp(offset + sizeof(size_t), std::ios::beg);
-		}
-	};
-
-	void combine_components_for_coarse(void)
-	{
-		const size_t
-				nptot = np_per_type_[1] + np_per_type_[2] + np_per_type_[3] + np_per_type_[4] + np_per_type_[5],
-				npfine = np_per_type_[1],
-				npcoarse = nptot - npfine;
-
-		std::vector<T_store> tmp1, tmp2;
-
-		tmp1.assign(block_buf_size_, 0.0);
-		tmp2.assign(block_buf_size_, 0.0);
-
-		double facb = omegab_ / header_.Omega0, facc = (header_.Omega0 - omegab_) / header_.Omega0;
-
-		for (int icomp = 0; icomp < 3; ++icomp)
-		{
-			char fc[256], fb[256];
-			postream iffs1, iffs2;
-
-			/*** positions ***/
-
-			sprintf(fc, "___ic_temp_%05d.bin", 100 * id_dm_pos + icomp);
-			sprintf(fb, "___ic_temp_%05d.bin", 100 * id_gas_pos + icomp);
-
-			iffs1.open(fc, nptot, npfine * sizeof(T_store));
-			iffs2.open(fb, nptot, npfine * sizeof(T_store));
-
-			size_t npleft = npcoarse;
-			size_t n2read = std::min((size_t)block_buf_size_, npleft);
-			while (n2read > 0ul)
-			{
-				std::streampos sp = iffs1.tellg();
-				iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-				iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
-
-				for (size_t i = 0; i < n2read; ++i)
-				{
-					tmp1[i] = facc * tmp1[i] + facb * tmp2[i];
-				}
-
-				iffs1.seekp(sp);
-				iffs1.write(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-
-				npleft -= n2read;
-				n2read = std::min((size_t)block_buf_size_, npleft);
-			}
-
-			iffs1.close();
-			iffs2.close();
-
-			/*** velocities ***/
-
-			sprintf(fc, "___ic_temp_%05d.bin", 100 * id_dm_vel + icomp);
-			sprintf(fb, "___ic_temp_%05d.bin", 100 * id_gas_vel + icomp);
-
-			iffs1.open(fc, nptot, npfine * sizeof(T_store));
-			iffs2.open(fb, nptot, npfine * sizeof(T_store));
-
-			npleft = npcoarse;
-			n2read = std::min((size_t)block_buf_size_, npleft);
-
-			while (n2read > 0ul)
-			{
-				std::streampos sp = iffs1.tellg();
-				iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-				iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
-
-				for (size_t i = 0; i < n2read; ++i)
-				{
-					tmp1[i] = facc * tmp1[i] + facb * tmp2[i];
-				}
-
-				iffs1.seekp(sp);
-				iffs1.write(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-
-				npleft -= n2read;
-				n2read = std::min((size_t)block_buf_size_, npleft);
-			}
-
-			iffs1.close();
-			iffs2.close();
-		}
-	}
-
-	void assemble_gadget_file(void)
-	{
-
-		if (do_baryons_)
-			combine_components_for_coarse();
-
-		//............................................................................
-		//... copy from the temporary files, interleave the data and save ............
-
-		char fnx[256], fny[256], fnz[256], fnvx[256], fnvy[256], fnvz[256], fnm[256];
-		char fnbx[256], fnby[256], fnbz[256], fnbvx[256], fnbvy[256], fnbvz[256];
-
-		sprintf(fnx, "___ic_temp_%05d.bin", 100 * id_dm_pos + 0);
-		sprintf(fny, "___ic_temp_%05d.bin", 100 * id_dm_pos + 1);
-		sprintf(fnz, "___ic_temp_%05d.bin", 100 * id_dm_pos + 2);
-		sprintf(fnvx, "___ic_temp_%05d.bin", 100 * id_dm_vel + 0);
-		sprintf(fnvy, "___ic_temp_%05d.bin", 100 * id_dm_vel + 1);
-		sprintf(fnvz, "___ic_temp_%05d.bin", 100 * id_dm_vel + 2);
-		sprintf(fnm, "___ic_temp_%05d.bin", 100 * id_dm_mass);
-
-		sprintf(fnbx, "___ic_temp_%05d.bin", 100 * id_gas_pos + 0);
-		sprintf(fnby, "___ic_temp_%05d.bin", 100 * id_gas_pos + 1);
-		sprintf(fnbz, "___ic_temp_%05d.bin", 100 * id_gas_pos + 2);
-		sprintf(fnbvx, "___ic_temp_%05d.bin", 100 * id_gas_vel + 0);
-		sprintf(fnbvy, "___ic_temp_%05d.bin", 100 * id_gas_vel + 1);
-		sprintf(fnbvz, "___ic_temp_%05d.bin", 100 * id_gas_vel + 2);
-
-		pistream iffs1, iffs2, iffs3;
-
-		const size_t
-				nptot = np_per_type_[0] + np_per_type_[1] + np_per_type_[2] + np_per_type_[3] + np_per_type_[4] + np_per_type_[5],
-				//npgas = np_fine_gas_,
-				npcdm = nptot - np_per_type_[0];
-
-		size_t
-				wrote_coarse = 0,
-				wrote_gas = 0,
-				wrote_dm = 0;
-
-		size_t
-				npleft = nptot,
-				n2read = std::min((size_t)block_buf_size_, npleft);
-
-		std::cout << " - Gadget2 : writing " << nptot << " particles to file...\n";
-		for (int i = 0; i < 6; ++i)
-			if (np_per_type_[i] > 0)
-				music::ilog.Print("      type   %d : %12llu [m=%g]", i, np_per_type_[i], header_.mass[i]);
-
-		bool bbaryons = np_per_type_[0] > 0;
-
-		std::vector<T_store> adata3;
-		adata3.reserve(3 * block_buf_size_);
-		T_store *tmp1, *tmp2, *tmp3;
-
-		tmp1 = new T_store[block_buf_size_];
-		tmp2 = new T_store[block_buf_size_];
-		tmp3 = new T_store[block_buf_size_];
-
-		//... for multi-file output
-		//int fileno = 0;
-		//size_t npart_left = nptot;
-
-		//std::vector<unsigned> nfdm_per_file, nfgas_per_file, nc_per_file;
-
-		std::vector<std::vector<unsigned>> np_per_file;
-		std::vector<unsigned> np_tot_per_file;
-
-		distribute_particles(nfiles_, np_per_file, np_tot_per_file);
-
-		if (nfiles_ > 1)
-		{
-			music::ilog.Print("Gadget2 : distributing particles to %d files", nfiles_);
-			//<< "                 " << std::setw(12) << "type 0" << "," << std::setw(12) << "type 1" << "," << std::setw(12) << "type " << bndparticletype_ << std::endl;
-			for (unsigned i = 0; i < nfiles_; ++i)
-				music::ilog.Print("      file %i : %12llu", i, np_tot_per_file[i], header_.mass[i]);
-		}
-
-		size_t curr_block_buf_size = block_buf_size_;
-
-		size_t idcount = 0;
-		bool bneed_long_ids = blongids_;
-		if (nptot >= 1ul << 32 && !bneed_long_ids)
-		{
-			bneed_long_ids = true;
-			music::wlog.Print("Need long particle IDs, will write 64bit, make sure to enable in Gadget!");
-		}
-
-		for (unsigned ifile = 0; ifile < nfiles_; ++ifile)
-		{
-
-			if (nfiles_ > 1)
-			{
-				char ffname[256];
-				sprintf(ffname, "%s.%d", fname_.c_str(), ifile);
-				ofs_.open(ffname, std::ios::binary | std::ios::trunc);
-			}
-			else
-			{
-				ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc);
-			}
-
-			size_t np_this_file = np_tot_per_file[ifile];
-
-			int blksize = sizeof(header);
-
-			//... write the header .......................................................
-
-			header this_header(header_);
-			for (int i = 0; i < 6; ++i)
-			{
-				this_header.npart[i] = np_per_file[ifile][i];
-				this_header.npartTotal[i] = (unsigned)np_per_type_[i];
-				this_header.npartTotalHighWord[i] = (unsigned)(np_per_type_[i] >> 32);
-			}
-
-			ofs_.write((char *)&blksize, sizeof(int));
-			ofs_.write((char *)&this_header, sizeof(header));
-			ofs_.write((char *)&blksize, sizeof(int));
-
-			//... particle positions ..................................................
-			blksize = 3ul * np_this_file * sizeof(T_store);
-			ofs_.write((char *)&blksize, sizeof(int));
-
-			if (bbaryons && np_per_file[ifile][0] > 0ul)
-			{
-
-				iffs1.open(fnbx, npcdm, wrote_gas * sizeof(T_store));
-				iffs2.open(fnby, npcdm, wrote_gas * sizeof(T_store));
-				iffs3.open(fnbz, npcdm, wrote_gas * sizeof(T_store));
-
-				npleft = np_per_file[ifile][0];
-				n2read = std::min(curr_block_buf_size, npleft);
-				while (n2read > 0ul)
-				{
-					iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-					iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
-					iffs3.read(reinterpret_cast<char *>(&tmp3[0]), n2read * sizeof(T_store));
-
-					for (size_t i = 0; i < n2read; ++i)
-					{
-						adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize));
-						adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize));
-						adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize));
-					}
-					ofs_.write(reinterpret_cast<char *>(&adata3[0]), 3 * n2read * sizeof(T_store));
-
-					adata3.clear();
-					npleft -= n2read;
-					n2read = std::min(curr_block_buf_size, npleft);
-				}
-				iffs1.close();
-				iffs2.close();
-				iffs3.close();
-			}
-
-			npleft = np_this_file - np_per_file[ifile][0];
-			n2read = std::min(curr_block_buf_size, npleft);
-
-			iffs1.open(fnx, npcdm, wrote_dm * sizeof(T_store));
-			iffs2.open(fny, npcdm, wrote_dm * sizeof(T_store));
-			iffs3.open(fnz, npcdm, wrote_dm * sizeof(T_store));
-
-			while (n2read > 0ul)
-			{
-				iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-				iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
-				iffs3.read(reinterpret_cast<char *>(&tmp3[0]), n2read * sizeof(T_store));
-
-				for (size_t i = 0; i < n2read; ++i)
-				{
-					adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize));
-					adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize));
-					adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize));
-				}
-				ofs_.write(reinterpret_cast<char *>(&adata3[0]), 3 * n2read * sizeof(T_store));
-
-				adata3.clear();
-				npleft -= n2read;
-				n2read = std::min(curr_block_buf_size, npleft);
-			}
-			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
-
-			iffs1.close();
-			iffs2.close();
-			iffs3.close();
-
-			//... particle velocities ..................................................
-			blksize = 3ul * np_this_file * sizeof(T_store);
-			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
-
-			if (bbaryons && np_per_file[ifile][0] > 0ul)
-			{
-				iffs1.open(fnbvx, npcdm, wrote_gas * sizeof(T_store));
-				iffs2.open(fnbvy, npcdm, wrote_gas * sizeof(T_store));
-				iffs3.open(fnbvz, npcdm, wrote_gas * sizeof(T_store));
-
-				npleft = np_per_file[ifile][0];
-				n2read = std::min(curr_block_buf_size, npleft);
-				while (n2read > 0ul)
-				{
-					iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-					iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
-					iffs3.read(reinterpret_cast<char *>(&tmp3[0]), n2read * sizeof(T_store));
-
-					for (size_t i = 0; i < n2read; ++i)
-					{
-						adata3.push_back(tmp1[i]);
-						adata3.push_back(tmp2[i]);
-						adata3.push_back(tmp3[i]);
-					}
-
-					ofs_.write(reinterpret_cast<char *>(&adata3[0]), 3 * n2read * sizeof(T_store));
-
-					adata3.clear();
-					npleft -= n2read;
-					n2read = std::min(curr_block_buf_size, npleft);
-				}
-
-				iffs1.close();
-				iffs2.close();
-				iffs3.close();
-			}
-
-			iffs1.open(fnvx, npcdm, wrote_dm * sizeof(T_store));
-			iffs2.open(fnvy, npcdm, wrote_dm * sizeof(T_store));
-			iffs3.open(fnvz, npcdm, wrote_dm * sizeof(T_store));
-
-			npleft = np_this_file - np_per_file[ifile][0];
-			n2read = std::min(curr_block_buf_size, npleft);
-			while (n2read > 0ul)
-			{
-				iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-				iffs2.read(reinterpret_cast<char *>(&tmp2[0]), n2read * sizeof(T_store));
-				iffs3.read(reinterpret_cast<char *>(&tmp3[0]), n2read * sizeof(T_store));
-
-				for (size_t i = 0; i < n2read; ++i)
-				{
-					adata3.push_back(tmp1[i]);
-					adata3.push_back(tmp2[i]);
-					adata3.push_back(tmp3[i]);
-				}
-
-				ofs_.write(reinterpret_cast<char *>(&adata3[0]), 3 * n2read * sizeof(T_store));
-
-				adata3.clear();
-				npleft -= n2read;
-				n2read = std::min(curr_block_buf_size, npleft);
-			}
-			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
-
-			iffs1.close();
-			iffs2.close();
-			iffs3.close();
-
-			//... particle IDs ..........................................................
-			std::vector<unsigned> short_ids;
-			std::vector<size_t> long_ids;
-
-			if (bneed_long_ids)
-				long_ids.assign(curr_block_buf_size, 0);
-			else
-				short_ids.assign(curr_block_buf_size, 0);
-
-			npleft = np_this_file;
-			n2read = std::min(curr_block_buf_size, npleft);
-			blksize = sizeof(unsigned) * np_this_file;
-
-			if (bneed_long_ids)
-				blksize = sizeof(size_t) * np_this_file;
-
-			//... generate contiguous IDs and store in file ..
-			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
-			while (n2read > 0ul)
-			{
-				if (bneed_long_ids)
-				{
-					for (size_t i = 0; i < n2read; ++i)
-						long_ids[i] = idcount++;
-					ofs_.write(reinterpret_cast<char *>(&long_ids[0]), n2read * sizeof(size_t));
-				}
-				else
-				{
-					for (size_t i = 0; i < n2read; ++i)
-						short_ids[i] = idcount++;
-					ofs_.write(reinterpret_cast<char *>(&short_ids[0]), n2read * sizeof(unsigned));
-				}
-				npleft -= n2read;
-				n2read = std::min(curr_block_buf_size, npleft);
-			}
-			ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
-
-			std::vector<unsigned>().swap(short_ids);
-			std::vector<size_t>().swap(long_ids);
-
-			//... particle masses .......................................................
-			if (bmorethan2bnd_) //bmultimass_ && bmorethan2bnd_ && nc_per_file[ifile] > 0ul)
-			{
-				unsigned npcoarse = np_per_file[ifile][bndparticletype_]; // nc_per_file[ifile];//header_.npart[5];
-				iffs1.open(fnm, np_per_type_[bndparticletype_], wrote_coarse * sizeof(T_store));
-
-				npleft = npcoarse;
-				n2read = std::min(curr_block_buf_size, npleft);
-				blksize = npcoarse * sizeof(T_store);
-
-				ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
-				while (n2read > 0ul)
-				{
-					iffs1.read(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-					ofs_.write(reinterpret_cast<char *>(&tmp1[0]), n2read * sizeof(T_store));
-
-					npleft -= n2read;
-					n2read = std::min(curr_block_buf_size, npleft);
-				}
-				ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
-
-				iffs1.close();
-			}
-
-			//... initial internal energy for gas particles
-			if (bbaryons && np_per_file[ifile][0] > 0ul)
-			{
-
-				std::vector<T_store> eint(curr_block_buf_size, 0.0);
-
-				const double astart = 1. / (1. + header_.redshift);
-				const double npol = (fabs(1.0 - gamma_) > 1e-7) ? 1.0 / (gamma_ - 1.) : 1.0;
-				const double unitv = 1e5;
-				const double h2 = header_.HubbleParam * header_.HubbleParam; //*0.0001;
-				const double adec = 1.0 / (160. * pow(omegab_ * h2 / 0.022, 2.0 / 5.0));
-				const double Tcmb0 = 2.726;
-				const double Tini = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec;
-				const double mu = (Tini > 1.e4) ? 4.0 / (8. - 5. * YHe_) : 4.0 / (1. + 3. * (1. - YHe_));
-				const double ceint = 1.3806e-16 / 1.6726e-24 * Tini * npol / mu / unitv / unitv;
-
-				npleft = np_per_file[ifile][0];
-				n2read = std::min(curr_block_buf_size, npleft);
-				blksize = sizeof(T_store) * np_per_file[ifile][0]; //*npgas
-
-				ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
-				while (n2read > 0ul)
-				{
-					for (size_t i = 0; i < n2read; ++i)
-						eint[i] = ceint;
-					ofs_.write(reinterpret_cast<char *>(&eint[0]), n2read * sizeof(T_store));
-					npleft -= n2read;
-					n2read = std::min(curr_block_buf_size, npleft);
-				}
-				ofs_.write(reinterpret_cast<char *>(&blksize), sizeof(int));
-
-				static bool bdisplayed = false;
-				if (!bdisplayed)
-				{
-					music::ilog.Print("Gadget2 : set initial gas temperature to %.2f K/mu", Tini / mu);
-					bdisplayed = true;
-				}
-			}
-
-			ofs_.flush();
-			ofs_.close();
-
-			wrote_gas += np_per_file[ifile][0];
-			wrote_dm += np_this_file - np_per_file[ifile][0];
-			wrote_coarse += np_per_file[ifile][5];
-		}
-
-		delete[] tmp1;
-		delete[] tmp2;
-		delete[] tmp3;
-
-		remove(fnbx);
-		remove(fnby);
-		remove(fnbz);
-		remove(fnx);
-		remove(fny);
-		remove(fnz);
-		remove(fnbvx);
-		remove(fnbvy);
-		remove(fnbvz);
-		remove(fnvx);
-		remove(fnvy);
-		remove(fnvz);
-		remove(fnm);
-	}
-
-	void determine_particle_numbers(const grid_hierarchy &gh)
-	{
-		if (!bhave_particlenumbers_)
-		{
-			bhave_particlenumbers_ = true;
-
-			double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
-
-			/*if( kpcunits_ )
-	  rhoc *= 1e-9; // in h^2 1e10 M_sol / kpc^3
-	
-	if( msolunits_ )
-	rhoc *= 1e10; // in h^2 M_sol / kpc^3*/
-
-			rhoc /= unit_mass_chosen_ / (unit_length_chosen_ * unit_length_chosen_ * unit_length_chosen_);
-
-			// only type 1 are baryons
-			if (!do_baryons_)
-				header_.mass[1] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_);
-			else
-			{
-				header_.mass[0] = (omegab_)*rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0);										///pow(2,3*levelmax_);
-				header_.mass[1] = (header_.Omega0 - omegab_) * rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_);
-			}
-
-			//...
-			for (int i = 0; i < 6; ++i)
-				np_per_type_[i] = 0;
-
-			// determine how many particles per type exist, determine their mass
-			for (int ilevel = (int)gh.levelmax(); ilevel >= (int)gh.levelmin(); --ilevel)
-			{
-				int itype = std::min<int>((int)gh.levelmax() - ilevel + 1, 5);
-				np_per_type_[itype] += gh.count_leaf_cells(ilevel, ilevel);
-				if (itype > 1)
-					header_.mass[itype] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel);
-			}
-
-			// if coarse particles should not be spread across types, assign them all to type bndparticletype
-			if (!spread_coarse_acrosstypes_)
-			{
-				if (gh.levelmax() > gh.levelmin() + 1)
-					bmorethan2bnd_ = true;
-				else
-					bmorethan2bnd_ = false;
-
-				for (unsigned itype = 2; itype < 6; ++itype)
-				{
-					if (itype == bndparticletype_)
-						continue;
-					np_per_type_[bndparticletype_] += np_per_type_[itype];
-					if (!bmorethan2bnd_)
-						header_.mass[bndparticletype_] += header_.mass[itype];
-					np_per_type_[itype] = 0;
-					header_.mass[itype] = 0.;
-				}
-			}
-
-			if (do_baryons_)
-				np_per_type_[0] = np_per_type_[1];
-		}
-	}
-
-public:
-	gadget2_output_plugin(config_file &cf)
-			: output_plugin(cf)
-	{
-
-		units_mass_.insert(std::pair<std::string, double>("1e10Msol", 1.0));		 // 1e10 M_o/h (default)
-		units_mass_.insert(std::pair<std::string, double>("Msol", 1.0e-10));		 // 1 M_o/h
-		units_mass_.insert(std::pair<std::string, double>("Mearth", 3.002e-16)); // 1 M_earth/h
-
-		units_length_.insert(std::pair<std::string, double>("Mpc", 1.0));		 // 1 Mpc/h (default)
-		units_length_.insert(std::pair<std::string, double>("kpc", 1.0e-3)); // 1 kpc/h
-		units_length_.insert(std::pair<std::string, double>("pc", 1.0e-6));	// 1 pc/h
-
-		units_vel_.insert(std::pair<std::string, double>("km/s", 1.0));		 // 1 km/s (default)
-		units_vel_.insert(std::pair<std::string, double>("m/s", 1.0e-3));	// 1 m/s
-		units_vel_.insert(std::pair<std::string, double>("cm/s", 1.0e-5)); // 1 cm/s
-
-		block_buf_size_ = cf_.get_value_safe<unsigned>("output", "gadget_blksize", 1048576);
-
-		//... ensure that everyone knows we want to do SPH
-		cf.insert_value("setup", "do_SPH", "yes");
-
-		//bbndparticles_  = !cf_.get_value_safe<bool>("output","gadget_nobndpart",false);
-		npartmax_ = 1 << 30;
-
-		nfiles_ = cf.get_value_safe<unsigned>("output", "gadget_num_files", 1);
-
-		blongids_ = cf.get_value_safe<bool>("output", "gadget_longids", false);
-
-		shift_halfcell_ = cf.get_value_safe<bool>("output", "gadget_cell_centered", false);
-
-		//if( nfiles_ < (int)ceil((double)npart/(double)npartmax_) )
-		//	music::wlog.Print("Should use more files.");
-
-		if (nfiles_ > 1)
-		{
-			for (unsigned ifile = 0; ifile < nfiles_; ++ifile)
-			{
-				char ffname[256];
-				sprintf(ffname, "%s.%d", fname_.c_str(), ifile);
-				ofs_.open(ffname, std::ios::binary | std::ios::trunc);
-				if (!ofs_.good())
-				{
-					music::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", ffname);
-					throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + std::string(ffname) + "\' for writing!\n");
-				}
-				ofs_.close();
-			}
-		}
-		else
-		{
-			ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc);
-			if (!ofs_.good())
-			{
-				music::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", fname_.c_str());
-				throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + fname_ + "\' for writing!\n");
-			}
-			ofs_.close();
-		}
-
-		bhave_particlenumbers_ = false;
-
-		bmorethan2bnd_ = false;
-		if (false) //levelmax_ > levelmin_ +4)
-			bmorethan2bnd_ = true;
-
-		for (int i = 0; i < 6; ++i)
-		{
-			header_.npart[i] = 0;
-			header_.npartTotal[i] = 0;
-			header_.npartTotalHighWord[i] = 0;
-			header_.mass[i] = 0.0;
-		}
-
-		if (typeid(T_store) == typeid(float))
-			header_.flag_doubleprecision = 0;
-		else if (typeid(T_store) == typeid(double))
-			header_.flag_doubleprecision = 1;
-		else
-		{
-			music::elog.Print("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'");
-			throw std::runtime_error("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'");
-		}
-
-		YHe_ = cf.get_value_safe<double>("cosmology", "YHe", 0.248);
-		gamma_ = cf.get_value_safe<double>("cosmology", "gamma", 5.0 / 3.0);
-
-		do_baryons_ = cf.get_value_safe<bool>("setup", "baryons", false);
-		omegab_ = cf.get_value_safe<double>("cosmology", "Omega_b", 0.045);
-
-		//... new way
-		std::string lunitstr = cf.get_value_safe<std::string>("output", "gadget_lunit", "Mpc");
-		std::string munitstr = cf.get_value_safe<std::string>("output", "gadget_munit", "1e10Msol");
-		std::string vunitstr = cf.get_value_safe<std::string>("output", "gadget_vunit", "km/s");
-
-		std::map<std::string, double>::iterator mapit;
-
-		if ((mapit = units_length_.find(lunitstr)) != units_length_.end())
-			unit_length_chosen_ = (*mapit).second;
-		else
-		{
-			music::elog.Print("Gadget: length unit \'%s\' unknown in gadget_lunit", lunitstr.c_str());
-			throw std::runtime_error("Unknown length unit specified for Gadget output plugin");
-		}
-
-		if ((mapit = units_mass_.find(munitstr)) != units_mass_.end())
-			unit_mass_chosen_ = (*mapit).second;
-		else
-		{
-			music::elog.Print("Gadget: mass unit \'%s\' unknown in gadget_munit", munitstr.c_str());
-			throw std::runtime_error("Unknown mass unit specified for Gadget output plugin");
-		}
-
-		if ((mapit = units_vel_.find(vunitstr)) != units_vel_.end())
-			unit_vel_chosen_ = (*mapit).second;
-		else
-		{
-			music::elog.Print("Gadget: velocity unit \'%s\' unknown in gadget_vunit", vunitstr.c_str());
-			throw std::runtime_error("Unknown velocity unit specified for Gadget output plugin");
-		}
-
-		//... maintain compatibility with old way of setting units
-		if (cf.contains_key("output", "gadget_usekpc"))
-		{
-			kpcunits_ = cf.get_value_safe<bool>("output", "gadget_usekpc", false);
-			if (kpcunits_)
-				unit_length_chosen_ = 1e-3;
-			music::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead.");
-		}
-		if (cf.contains_key("output", "gadget_usemsol"))
-		{
-			msolunits_ = cf.get_value_safe<bool>("output", "gadget_usemsol", false);
-			if (msolunits_)
-				unit_mass_chosen_ = 1e-10;
-			music::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead.");
-		}
-
-		//... coarse particle properties...
-
-		spread_coarse_acrosstypes_ = cf.get_value_safe<bool>("output", "gadget_spreadcoarse", false);
-		bndparticletype_ = 5;
-
-		if (!spread_coarse_acrosstypes_)
-		{
-			bndparticletype_ = cf.get_value_safe<unsigned>("output", "gadget_coarsetype", 5);
-
-			if (bndparticletype_ == 0 || //bndparticletype_ == 1 || bndparticletype_ == 4 ||
-					bndparticletype_ > 5)
-			{
-				music::elog.Print("Coarse particles cannot be of Gadget particle type %d in output plugin.", bndparticletype_);
-				throw std::runtime_error("Specified illegal Gadget particle type for coarse particles");
-			}
-		}
-		else
-		{
-			if (cf.get_value_safe<unsigned>("output", "gadget_coarsetype", 5) != 5)
-				music::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override.");
-		}
-
-		//... set time ......................................................
-		header_.redshift = cf.get_value<double>("setup", "zstart");
-		header_.time = 1.0 / (1.0 + header_.redshift);
-
-		//... SF flags
-		header_.flag_sfr = 0;
-		header_.flag_feedback = 0;
-		header_.flag_cooling = 0;
-
-		//...
-		header_.num_files = nfiles_; //1;
-		header_.BoxSize = cf.get_value<double>("setup", "BoxLength");
-		header_.Omega0 = cf.get_value<double>("cosmology", "Omega_m");
-		header_.OmegaLambda = cf.get_value<double>("cosmology", "Omega_L");
-		header_.HubbleParam = cf.get_value<double>("cosmology", "H0") / 100.0;
-
-		header_.flag_stellarage = 0;
-		header_.flag_metals = 0;
-
-		header_.flag_entropy_instead_u = 0;
-
-		//if( kpcunits_ )
-		//  header_.BoxSize *= 1000.0;
-		header_.BoxSize /= unit_length_chosen_;
-
-		for (int i = 0; i < empty_fill_bytes; ++i)
-			header_.fill[i] = 0;
-	}
-
-	void write_dm_mass(const grid_hierarchy &gh)
-	{
-		determine_particle_numbers(gh);
-
-		double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3
-
-		// adjust units
-		rhoc /= unit_mass_chosen_ / (unit_length_chosen_ * unit_length_chosen_ * unit_length_chosen_);
-
-		/*if( kpcunits_ )
-      rhoc *= 1e-9; // in h^2 1e10 M_sol / kpc^3
-    
-    if( msolunits_ )
-      rhoc *= 1e10; // in h^2 M_sol / kpc^3
-    */
-
-		// if there are more than one kind of coarse particle assigned to the same type,
-		// we have to explicitly store their masses
-		if (bmorethan2bnd_)
-		{
-			header_.mass[bndparticletype_] = 0.;
-
-			size_t npcoarse = np_per_type_[bndparticletype_];
-			size_t nwritten = 0;
-
-			std::vector<T_store> temp_dat;
-			temp_dat.reserve(block_buf_size_);
-
-			char temp_fname[256];
-			sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_mass);
-			std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
-
-			size_t blksize = sizeof(T_store) * npcoarse;
-
-			ofs_temp.write((char *)&blksize, sizeof(size_t));
-
-			// int levelmaxcoarse = gh.levelmax() - 4;
-			// if (!spread_coarse_acrosstypes_)
-			// 	levelmaxcoarse = gh.levelmax() - 1;
-
-			//for( int ilevel=levelmaxcoarse; ilevel>=(int)gh.levelmin(); --ilevel )
-
-			{
-				int ilevel = 0;
-				// baryon particles live only on finest grid
-				// these particles here are total matter particles
-				double pmass = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel);
-
-				for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
-					for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
-						for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
-							if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
-							{
-								if (temp_dat.size() < block_buf_size_)
-									temp_dat.push_back(pmass);
-								else
-								{
-									ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * block_buf_size_);
-									nwritten += block_buf_size_;
-									temp_dat.clear();
-									temp_dat.push_back(pmass);
-								}
-							}
-			}
-
-			if (temp_dat.size() > 0)
-			{
-				ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * temp_dat.size());
-				nwritten += temp_dat.size();
-			}
-
-			if (nwritten != npcoarse)
-			{
-				music::elog.Print("nwritten = %llu != npcoarse = %llu\n", nwritten, npcoarse);
-				throw std::runtime_error("Internal consistency error while writing temporary file for masses");
-			}
-
-			ofs_temp.write((char *)&blksize, sizeof(size_t));
-
-			if (ofs_temp.bad())
-				throw std::runtime_error("I/O error while writing temporary file for masses");
-		}
-	}
-
-	void write_dm_position(int coord, const grid_hierarchy &gh)
-	{
-		//... count number of leaf cells ...//
-		determine_particle_numbers(gh);
-
-		size_t npart = 0;
-		for (int i = 1; i < 6; ++i)
-			npart += np_per_type_[i];
-
-		//... determine if we need to shift the coordinates back
-		double *shift = NULL;
-
-		if (shift_halfcell_)
-		{
-			double h = 0.0; //1.0/(1<<(levelmin_+1));
-			shift = new double[3];
-			shift[0] = shift[1] = shift[2] = -h;
-		}
-
-		size_t nwritten = 0;
-		//... collect displacements and convert to absolute coordinates with correct
-		//... units
-		std::vector<T_store> temp_data;
-		temp_data.reserve(block_buf_size_);
-
-		char temp_fname[256];
-		sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_pos + coord);
-		std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
-
-		size_t blksize = sizeof(T_store) * npart;
-		ofs_temp.write((char *)&blksize, sizeof(size_t));
-
-		double xfac = header_.BoxSize;
-
-		//for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel )
-		unsigned ilevel = 0;
-		for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
-			for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
-				for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
-					if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
-					{
-						double xx[3];
-						gh.cell_pos(ilevel, i, j, k, xx);
-						if (shift != NULL)
-							xx[coord] += shift[coord];
-
-						
-						// std::cerr << i << " " << j << " " << k << " : " << xx[coord]*xfac << " " << (*gh.get_grid(ilevel)).relem(i, j, k) * xfac << std::endl;
-
-						xx[coord] = (xx[coord] + (*gh.get_grid(ilevel)).relem(i, j, k)) * xfac;
-
-						if (temp_data.size() < block_buf_size_)
-							temp_data.push_back(xx[coord]);
-						else
-						{
-							ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_);
-							nwritten += block_buf_size_;
-							temp_data.clear();
-							temp_data.push_back(xx[coord]);
-						}
-					}
-
-		if (temp_data.size() > 0)
-		{
-			ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size());
-			nwritten += temp_data.size();
-		}
-
-		if (nwritten != npart)
-			throw std::runtime_error("Internal consistency error while writing temporary file for positions");
-
-		//... dump to temporary file
-		ofs_temp.write((char *)&blksize, sizeof(size_t));
-
-		if (ofs_temp.bad())
-			throw std::runtime_error("I/O error while writing temporary file for positions");
-
-		ofs_temp.close();
-
-		if (shift != NULL)
-			delete[] shift;
-	}
-
-	void write_dm_velocity(int coord, const grid_hierarchy &gh)
-	{
-		//... count number of leaf cells ...//
-		determine_particle_numbers(gh);
-
-		size_t npart = 0;
-		for (int i = 1; i < 6; ++i)
-			npart += np_per_type_[i];
-
-		//... collect displacements and convert to absolute coordinates with correct
-		//... units
-		std::vector<T_store> temp_data;
-		temp_data.reserve(block_buf_size_);
-
-		float isqrta = 1.0f / sqrt(header_.time);
-		float vfac = isqrta * header_.BoxSize;
-
-		//if( kpcunits_ )
-		//  vfac /= 1000.0;
-		vfac *= unit_length_chosen_ / unit_vel_chosen_;
-
-		size_t nwritten = 0;
-
-		char temp_fname[256];
-		sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_vel + coord);
-		std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
-
-		size_t blksize = sizeof(T_store) * npart;
-		ofs_temp.write((char *)&blksize, sizeof(size_t));
-
-		//for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel )
-		int ilevel = 0;
-		for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
-			for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
-				for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
-					if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
-					{
-						if (temp_data.size() < block_buf_size_)
-							temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac);
-						else
-						{
-							ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_);
-							nwritten += block_buf_size_;
-							temp_data.clear();
-							temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac);
-						}
-					}
-		if (temp_data.size() > 0)
-		{
-			ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store));
-			nwritten += temp_data.size();
-		}
-
-		if (nwritten != npart)
-			throw std::runtime_error("Internal consistency error while writing temporary file for velocities");
-
-		ofs_temp.write((char *)&blksize, sizeof(int));
-
-		if (ofs_temp.bad())
-			throw std::runtime_error("I/O error while writing temporary file for velocities");
-
-		ofs_temp.close();
-
-		
-	}
-
-	void write_dm_density(const grid_hierarchy &gh)
-	{
-		//... we don't care about DM density for Gadget
-	}
-
-	void write_dm_potential(const grid_hierarchy &gh)
-	{
-		//... we don't care about DM potential for Gadget
-	}
-
-	void write_gas_potential(const grid_hierarchy &gh)
-	{
-		//... we don't care about gas potential for Gadget
-	}
-
-	//... write data for gas -- don't do this
-	void write_gas_velocity(int coord, const grid_hierarchy &gh)
-	{
-		determine_particle_numbers(gh);
-		size_t npart = 0;
-		for (int i = 1; i < 6; ++i)
-			npart += np_per_type_[i];
-
-		//... collect velocities and convert to absolute coordinates with correct
-		//... units
-		std::vector<T_store> temp_data;
-		temp_data.reserve(block_buf_size_);
-
-		float isqrta = 1.0f / sqrt(header_.time);
-		float vfac = isqrta * header_.BoxSize;
-
-		//if( kpcunits_ )
-		//  vfac /= 1000.0;
-		vfac *= unit_length_chosen_ / unit_vel_chosen_;
-
-		//size_t npart = gh.count_leaf_cells(gh.levelmin(), gh.levelmax());;;
-		size_t nwritten = 0;
-
-		char temp_fname[256];
-		sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_vel + coord);
-		std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
-
-		size_t blksize = sizeof(T_store) * npart;
-		ofs_temp.write((char *)&blksize, sizeof(size_t));
-
-		//for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel )
-		int ilevel = 0;
-		for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
-			for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
-				for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
-					if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
-					{
-						if (temp_data.size() < block_buf_size_)
-							temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac);
-						else
-						{
-							ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_);
-							nwritten += block_buf_size_;
-							temp_data.clear();
-							temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac);
-						}
-					}
-
-		if (temp_data.size() > 0)
-		{
-			ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store));
-			nwritten += temp_data.size();
-		}
-
-		if (nwritten != npart)
-			throw std::runtime_error("Internal consistency error while writing temporary file for gas velocities");
-
-		ofs_temp.write((char *)&blksize, sizeof(int));
-
-		if (ofs_temp.bad())
-			throw std::runtime_error("I/O error while writing temporary file for gas velocities");
-
-		ofs_temp.close();
-	}
-
-	//... write only for fine level
-	void write_gas_position(int coord, const grid_hierarchy &gh)
-	{
-		//... count number of leaf cells ...//
-		determine_particle_numbers(gh);
-
-		size_t npart = 0;
-		for (int i = 1; i < 6; ++i)
-			npart += np_per_type_[i];
-
-		//... determine if we need to shift the coordinates back
-		double *shift = NULL;
-
-		if (shift_halfcell_)
-		{
-			double h = 0.0; //1.0/(1<<(levelmin_+1));
-			shift = new double[3];
-			shift[0] = shift[1] = shift[2] = -h;
-		}
-
-		size_t nwritten = 0;
-
-		//...
-		//... collect displacements and convert to absolute coordinates with correct
-		//... units
-		std::vector<T_store> temp_data;
-		temp_data.reserve(block_buf_size_);
-
-		char temp_fname[256];
-		sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_pos + coord);
-		std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc);
-
-		size_t blksize = sizeof(T_store) * npart;
-		ofs_temp.write((char *)&blksize, sizeof(size_t));
-
-		double xfac = header_.BoxSize;
-
-		double h = 1.0 / (1ul << gh.levelmax());
-
-		//for (int ilevel = gh.levelmax(); ilevel >= (int)gh.levelmin(); --ilevel)
-		int ilevel = 0;
-		{
-			for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i)
-				for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j)
-					for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k)
-						//if( ! gh.is_refined(ilevel,i,j,k) )
-						if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k))
-						{
-							double xx[3];
-							gh.cell_pos(ilevel, i, j, k, xx);
-							if (shift != NULL)
-								xx[coord] += shift[coord];
-
-							//... shift particle positions (this has to be done as the same shift
-							//... is used when computing the convolution kernel for SPH baryons)
-							xx[coord] += 0.5 * h;
-
-							xx[coord] = (xx[coord] + (*gh.get_grid(ilevel)).relem(i, j, k)) * xfac;
-
-							if (temp_data.size() < block_buf_size_)
-								temp_data.push_back(xx[coord]);
-							else
-							{
-								ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_);
-								nwritten += block_buf_size_;
-								temp_data.clear();
-								temp_data.push_back(xx[coord]);
-							}
-						}
-		}
-
-		if (temp_data.size() > 0)
-		{
-			ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size());
-			nwritten += temp_data.size();
-		}
-
-		if (nwritten != npart)
-			throw std::runtime_error("Internal consistency error while writing temporary file for gas positions");
-
-		//... dump to temporary file
-		ofs_temp.write((char *)&blksize, sizeof(size_t));
-
-		if (ofs_temp.bad())
-			throw std::runtime_error("I/O error while writing temporary file for gas positions");
-
-		ofs_temp.close();
-
-		if (shift != NULL)
-			delete[] shift;
-	}
-
-	void write_gas_density(const grid_hierarchy &gh)
-	{
-		//do nothing as we write out positions
-	}
-
-	void finalize(void)
-	{
-		this->assemble_gadget_file();
-	}
-};
-
-// namespace
-// {
-// output_plugin_creator_concrete<gadget2_output_plugin<float>> creator1("gadget2");
-// #ifndef SINGLE_PRECISION
-// output_plugin_creator_concrete<gadget2_output_plugin<double>> creator2("gadget2_double");
-// #endif
-// } // namespace

From f4d6b9e6695fb4e036f48a39539993ac4478d6d7 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 21:19:11 +0200
Subject: [PATCH 106/130] fixed baryon particle type, was used for testing

---
 src/plugins/output_gadget_hdf5.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc
index 2e41e47..3908e64 100644
--- a/src/plugins/output_gadget_hdf5.cc
+++ b/src/plugins/output_gadget_hdf5.cc
@@ -154,7 +154,7 @@ public:
     case cosmo_species::dm:
       return 1;
     case cosmo_species::baryon:
-      return 2;
+      return 0;
     case cosmo_species::neutrino:
       return 3;
     }

From 5d60b59f6cd5a80e8591623737b86bf4d78c0bc2 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 21:35:16 +0200
Subject: [PATCH 107/130] removed superfluous grid  operators

---
 include/operators.hh | 18 ++++++++++--------
 src/ic_generator.cc  |  8 ++++----
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/operators.hh b/include/operators.hh
index 49ed8d1..e2f4c8e 100644
--- a/include/operators.hh
+++ b/include/operators.hh
@@ -1,10 +1,18 @@
 #pragma once
-
+/*
+ 
+ operators.hh - This file is part of MUSIC2 -
+ a code to generate multi-scale initial conditions 
+ for cosmological simulations 
+ 
+ Copyright (C) 2019  Oliver Hahn
+ 
+*/
 #include <general.hh>
 
 namespace op{
 
-//!== long list of primitive operators to work on fields ==!//
+//!== list of primitive operators to work on fields ==!//
 
 template< typename field>
 inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };}
@@ -15,15 +23,9 @@ inline auto multiply_add_to( field& g, val x ){return [&g,x](auto i, auto v){ g[
 template< typename field>
 inline auto add_to( field& g ){return [&g](auto i, auto v){ g[i] += v; };}
 
-template< typename field>
-inline auto add_twice_to( field& g ){return [&g](auto i, auto v){ g[i] += 2*v; };}
-
 template< typename field>
 inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };}
 
-template< typename field>
-inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -= 2*v; };}
-
 //! vanilla standard gradient
 class fourier_gradient{
 private:
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 72c4482..6185af0 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -353,7 +353,7 @@ int Run( config_file& the_config )
         music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
         phi3a.FourierTransformForward(false);
         Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a));
-        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::add_twice_to(phi3a));
+        Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::multiply_add_to(phi3a,2.0));
         Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3a));
         Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a));
         Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a));
@@ -367,9 +367,9 @@ int Run( config_file& the_config )
         Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b));
         Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b));
         Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::add_to(phi3b));
-        Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::subtract_twice_from(phi3b));
-        Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::subtract_twice_from(phi3b));
-        Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::subtract_twice_from(phi3b));
+        Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::multiply_add_to(phi3b,-2.0));
+        Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::multiply_add_to(phi3b,-2.0));
+        Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::multiply_add_to(phi3b,-2.0));
         phi3b.apply_InverseLaplacian();
         phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
         music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;

From 83b8d9bbafcfab7828caee69dc2accc8de16f354 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 4 Apr 2020 23:59:13 +0200
Subject: [PATCH 108/130] moved math headers to subdirectory

---
 include/bounding_box.hh             | 2 +-
 include/cosmology_calculator.hh     | 5 ++---
 include/grid_fft.hh                 | 2 +-
 include/{ => math}/interpolate.hh   | 0
 include/{ => math}/mat3.hh          | 2 +-
 include/{ => math}/ode_integrate.hh | 0
 include/{ => math}/vec3.hh          | 0
 include/particle_generator.hh       | 2 +-
 include/particle_plt.hh             | 2 +-
 src/plugins/transfer_CLASS.cc       | 2 +-
 10 files changed, 8 insertions(+), 9 deletions(-)
 rename include/{ => math}/interpolate.hh (100%)
 rename include/{ => math}/mat3.hh (99%)
 rename include/{ => math}/ode_integrate.hh (100%)
 rename include/{ => math}/vec3.hh (100%)

diff --git a/include/bounding_box.hh b/include/bounding_box.hh
index 3048c79..6b70bcf 100644
--- a/include/bounding_box.hh
+++ b/include/bounding_box.hh
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <vec3.hh>
+#include <math/vec3.hh>
 
 template <typename T>
 struct bounding_box
diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index 1d99209..bedc653 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -6,10 +6,10 @@
 #include <cosmology_parameters.hh>
 #include <physical_constants.hh>
 #include <transfer_function_plugin.hh>
-#include <ode_integrate.hh>
+#include <math/ode_integrate.hh>
 #include <logger.hh>
 
-#include <interpolate.hh>
+#include <math/interpolate.hh>
 
 #include <gsl/gsl_integration.h>
 // #include <gsl/gsl_spline.h>
@@ -210,7 +210,6 @@ public:
                     << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon0), 2.0)
                     << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vtotal), 2.0)
                     << std::endl;
-                    #warning Check whether output is at redshift that is indicated!
             }
         }
         music::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl;
diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 2170dc8..b8a76aa 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -4,7 +4,7 @@
 #include <array>
 #include <vector>
 
-#include <vec3.hh>
+#include <math/vec3.hh>
 #include <general.hh>
 #include <bounding_box.hh>
 #include <typeinfo>
diff --git a/include/interpolate.hh b/include/math/interpolate.hh
similarity index 100%
rename from include/interpolate.hh
rename to include/math/interpolate.hh
diff --git a/include/mat3.hh b/include/math/mat3.hh
similarity index 99%
rename from include/mat3.hh
rename to include/math/mat3.hh
index 6cf2689..75458ea 100644
--- a/include/mat3.hh
+++ b/include/math/mat3.hh
@@ -1,7 +1,7 @@
 #include <gsl/gsl_math.h>
 #include <gsl/gsl_eigen.h>
 
-#include <vec3.hh>
+#include <math/vec3.hh>
 
 template<typename T>
 class mat3_t{
diff --git a/include/ode_integrate.hh b/include/math/ode_integrate.hh
similarity index 100%
rename from include/ode_integrate.hh
rename to include/math/ode_integrate.hh
diff --git a/include/vec3.hh b/include/math/vec3.hh
similarity index 100%
rename from include/vec3.hh
rename to include/math/vec3.hh
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 57e8b0f..de6c912 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -7,7 +7,7 @@
 \*******************************************************************/
 #pragma once
 
-#include <vec3.hh>
+#include <math/vec3.hh>
 
 namespace particle {
 
diff --git a/include/particle_plt.hh b/include/particle_plt.hh
index 9e6df1e..a452559 100644
--- a/include/particle_plt.hh
+++ b/include/particle_plt.hh
@@ -13,7 +13,7 @@
 
 #include <particle_generator.hh>
 #include <grid_fft.hh>
-#include <mat3.hh>
+#include <math/mat3.hh>
 
 #include <gsl/gsl_sf_hyperg.h>
 inline double Hypergeometric2F1( double a, double b, double c, double x )
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 09047c6..2ae3ae2 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -16,7 +16,7 @@
 #include <general.hh>
 #include <config_file.hh>
 #include <transfer_function_plugin.hh>
-#include <interpolate.hh>
+#include <math/interpolate.hh>
 
 class transfer_CLASS_plugin : public TransferFunction_plugin
 {

From 7e196f3a14f779a9767e96e8b40219bf6f343dfd Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 5 Apr 2020 01:13:40 +0200
Subject: [PATCH 109/130] added compiler version string

---
 src/main.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/main.cc b/src/main.cc
index 04e2302..1d98276 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -79,7 +79,8 @@ int main( int argc, char** argv )
                 << "  dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl;
 
     // Compilation CMake configuration, time etc info:
-    music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;            
+    music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;           
+    music::ilog << "Compiler used: " << __VERSION__ << std::endl; 
     
     // git and versioning info:
     music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl;

From 8d459062892f480e5b650b39bea5809858bc9852 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 5 Apr 2020 01:15:56 +0200
Subject: [PATCH 110/130] added a compiler version string

---
 src/main.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main.cc b/src/main.cc
index 04e2302..82cb800 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -80,7 +80,7 @@ int main( int argc, char** argv )
 
     // Compilation CMake configuration, time etc info:
     music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;            
-    
+    music::ilog << "Compiled with " << __VERSION__ << std::endl;
     // git and versioning info:
     music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl;
     

From 3ac20125775a7700a9498b34a110e9fa47e152b0 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 5 Apr 2020 22:26:22 +0200
Subject: [PATCH 111/130] fixed forgotten h factor in A_s CLASS normalisation

---
 src/main.cc                   | 14 ++++++++++----
 src/plugins/transfer_CLASS.cc |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/main.cc b/src/main.cc
index ca62004..c609a4a 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -78,13 +78,19 @@ int main( int argc, char** argv )
                 << "  88  88  88 88.  .88 88    88 88.  .88 88     88.  .88 88    88 88 Y8.   .88 \n"
                 << "  dP  dP  dP `88888P\' dP    dP `88888P\' dP     `88888P\' dP    dP dP  Y88888P\' \n" << std::endl;
 
-    // Compilation CMake configuration, time etc info:
-    music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;            
-    music::ilog << "Compiled with " << __VERSION__ << std::endl;
-
     // git and versioning info:
     music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl;
     
+    // Compilation CMake configuration, time etc info:
+    music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " <<  __DATE__ << std::endl;
+
+#ifdef __GNUC__
+    music::ilog << "Compiled with GNU C++ version " << __VERSION__ <<std::endl;
+#else
+    music::ilog << "Compiled with " << __VERSION__ << std::endl;
+#endif
+
+    
     music::ilog << "-------------------------------------------------------------------------------" << std::endl;
     music::ilog << "Compile time options : " << std::endl;
     music::ilog << "                       Precision : " << CMAKE_PRECISION_STR << std::endl;
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 2ae3ae2..1f0f776 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -195,7 +195,7 @@ public:
     if (A_s_ > 0)
     {
       this->tf_isnormalised_ = true;
-      tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0));
+      tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p * h_, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0));
       music::ilog << "Using A_s to normalise the transfer function!" << std::endl;
     }
 

From 7ddc22fc0f0efbd4325f5343711e760e4dddc274 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Tue, 21 Apr 2020 17:23:59 +0200
Subject: [PATCH 112/130] updated class submodule branch

---
 external/class | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/class b/external/class
index 52bc312..055d8bc 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit 52bc3126fca4415c4f541d47d43ffdb9763e0464
+Subproject commit 055d8bca371631da0c51ff167ce81905996b4ca2

From 0cafcfea197a2324c2caebd9b59a6f256b409b3d Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 2 May 2020 15:01:09 +0200
Subject: [PATCH 113/130] added function to check if Nyquist mode to grd

---
 include/grid_fft.hh | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index b8a76aa..2d49f7f 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -40,7 +40,7 @@ public:
     std::array<size_t, 4> sizes_;
     size_t npr_, npc_;
     size_t ntot_;
-    std::array<real_t, 3> length_, kfac_, dx_;
+    std::array<real_t, 3> length_, kfac_, kny_, dx_;
 
     space_t space_;
     data_t *data_;
@@ -97,6 +97,15 @@ public:
         return global_range_;
     }
 
+    bool is_nyquist_mode( size_t i, size_t j, size_t k ) const
+    {
+        assert( this->space_ == kspace_id );
+        bool bres = (i+local_1_start_ == n_[1]/2);
+        bres |= (j == n_[0]/2);
+        bres |= (k == n_[2]/2);
+        return bres;
+    }
+
     //! set all field elements to zero
     void zero() noexcept
     {
@@ -466,9 +475,9 @@ public:
             {
                 for (size_t k = 0; k < sizes_[2]; ++k)
                 {
-                    const auto elem = std::real(this->relem(i, j, k));
-                    sum1 += elem;
-                    sum2 += elem * elem;
+                    const auto elem = (space_==kspace_id)? this->kelem(i, j, k) : this->relem(i, j, k);
+                    sum1 += std::real(elem);
+                    sum2 += std::norm(elem);// * elem;
                 }
             }
         }

From 95502596dd245c2f4add04853e29de575fa81588 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 2 May 2020 15:01:36 +0200
Subject: [PATCH 114/130] added forgotten mods for kNy in grids

---
 src/grid_fft.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/grid_fft.cc b/src/grid_fft.cc
index a51577e..eeba708 100644
--- a/src/grid_fft.cc
+++ b/src/grid_fft.cc
@@ -48,6 +48,7 @@ void Grid_FFT<data_t, bdistributed>::Setup(void)
         {
             nhalf_[i] = n_[i] / 2;
             kfac_[i] = 2.0 * M_PI / length_[i];
+            kny_[i] = kfac_[i] * n_[i]/2;
             dx_[i] = length_[i] / n_[i];
 
             global_range_.x1_[i] = 0;
@@ -128,6 +129,7 @@ void Grid_FFT<data_t, bdistributed>::Setup(void)
         {
             nhalf_[i] = n_[i] / 2;
             kfac_[i] = 2.0 * M_PI / length_[i];
+            kny_[i] = kfac_[i] * n_[i]/2;
             dx_[i] = length_[i] / n_[i];
 
             global_range_.x1_[i] = 0;

From 52dfa9a72d81b5a94837bae74a47b2d9a0c19104 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 2 May 2020 15:02:16 +0200
Subject: [PATCH 115/130] Fill_Grid member function cannot be const

---
 include/random_plugin.hh     | 2 +-
 src/plugins/random_music.cc  | 2 +-
 src/plugins/random_ngenic.cc | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/random_plugin.hh b/include/random_plugin.hh
index 1c33fae..a91ab7e 100644
--- a/include/random_plugin.hh
+++ b/include/random_plugin.hh
@@ -18,7 +18,7 @@ class RNG_plugin
     }
     virtual ~RNG_plugin() {}
     virtual bool isMultiscale() const = 0;
-    virtual void Fill_Grid( Grid_FFT<real_t>& g ) const = 0;
+    virtual void Fill_Grid( Grid_FFT<real_t>& g ) = 0;//const = 0;
     //virtual void FillGrid(int level, DensityGrid<real_t> &R) = 0;
 };
 
diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc
index ab0f959..28486b5 100644
--- a/src/plugins/random_music.cc
+++ b/src/plugins/random_music.cc
@@ -40,7 +40,7 @@ public:
 
   bool isMultiscale() const { return true; }
 
-  void Fill_Grid( Grid_FFT<real_t>& g ) const { }
+  void Fill_Grid( Grid_FFT<real_t>& g ) {} //const { }
 
   void initialize_for_grid_structure()//const refinement_hierarchy &refh)
   {
diff --git a/src/plugins/random_ngenic.cc b/src/plugins/random_ngenic.cc
index f1c6a59..1498d4b 100644
--- a/src/plugins/random_ngenic.cc
+++ b/src/plugins/random_ngenic.cc
@@ -63,7 +63,7 @@ public:
 
     bool isMultiscale() const { return false; }
 
-    void Fill_Grid(Grid_FFT<real_t> &g) const
+    void Fill_Grid(Grid_FFT<real_t> &g) //const
     {
         g.zero();
         g.FourierTransformForward(false);

From bd78c7468a684c08fa7907d9e73da584bf3bafc3 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 2 May 2020 15:02:54 +0200
Subject: [PATCH 116/130] added a default z_max_pk to class interface to avoid
 not having any if zstart==ztarget

---
 src/plugins/transfer_CLASS.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index 1f0f776..bcf85df 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -47,7 +47,7 @@ private:
   void init_ClassEngine(void)
   {
     //--- general parameters ------------------------------------------
-    add_class_parameter("z_max_pk", std::max(zstart_, ztarget_) * 1.2); // use 1.2 as safety
+    add_class_parameter("z_max_pk", std::max(std::max(zstart_, ztarget_),199.0)); // use 1.2 as safety
     add_class_parameter("P_k_max_h/Mpc", kmax_);
     add_class_parameter("output", "dTk,vTk");
     add_class_parameter("extra metric transfer functions","yes");
@@ -126,7 +126,7 @@ private:
     // output parameters, only needed for the control CLASS .ini file that we output
     std::stringstream zlist;
     if (ztarget_ == zstart_)
-      zlist << ztarget_ << ", 0.0";
+      zlist << ztarget_ << ((ztarget_!=0.0)? ", 0.0" : "");
     else
       zlist << std::max(ztarget_, zstart_) << ", " << std::min(ztarget_, zstart_) << ", 0.0";
     add_class_parameter("z_pk", zlist.str());

From 95a660f4ffd56f768e20c20f7d773b19badcdd5f Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 2 May 2020 15:03:43 +0200
Subject: [PATCH 117/130] added interface with PANPHASIA random number
 generator

---
 CMakeLists.txt                          |   22 +
 external/panphasia/generic_lecuyer.f90  |  683 +++++
 external/panphasia/panphasia_routines.f | 3334 +++++++++++++++++++++++
 src/plugins/random_panphasia.cc         |  532 ++++
 4 files changed, 4571 insertions(+)
 create mode 100644 external/panphasia/generic_lecuyer.f90
 create mode 100644 external/panphasia/panphasia_routines.f
 create mode 100644 src/plugins/random_panphasia.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c8cf314..be14271 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -127,6 +127,17 @@ mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m)
 find_package(HDF5 REQUIRED)
 mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz)
 
+########################################################################################################################
+# PANPHASIA
+option(ENABLE_PANPHASIA "Enable PANPHASIA random number generator" ON)
+if(ENABLE_PANPHASIA)
+enable_language(Fortran)
+if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Intel")
+  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -132 -implicit-none")
+elseif("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU")
+  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffixed-line-length-132 -fimplicit-none")
+endif()
+endif(ENABLE_PANPHASIA)
 ########################################################################################################################
 # INCLUDES
 include_directories(${PROJECT_SOURCE_DIR}/include)
@@ -145,6 +156,13 @@ file( GLOB PLUGINS
   ${PROJECT_SOURCE_DIR}/src/plugins/*.cc
 )
 
+if(ENABLE_PANPHASIA)
+list (APPEND SOURCES 
+  ${PROJECT_SOURCE_DIR}/external/panphasia/panphasia_routines.f
+  ${PROJECT_SOURCE_DIR}/external/panphasia/generic_lecuyer.f90
+)
+endif()
+
 # project configuration header
 configure_file(
   ${PROJECT_SOURCE_DIR}/include/cmake_config.hh.in
@@ -208,6 +226,10 @@ if(HDF5_FOUND)
   target_compile_options(${PRGNAME} PRIVATE "-DUSE_HDF5")
 endif(HDF5_FOUND)
 
+if(ENABLE_PANPHASIA)
+target_compile_options(${PRGNAME} PRIVATE "-DUSE_PANPHASIA")
+endif(ENABLE_PANPHASIA)
+
 target_link_libraries(${PRGNAME} ${FFTW3_LIBRARIES})
 target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIRS})
 
diff --git a/external/panphasia/generic_lecuyer.f90 b/external/panphasia/generic_lecuyer.f90
new file mode 100644
index 0000000..13f53ed
--- /dev/null
+++ b/external/panphasia/generic_lecuyer.f90
@@ -0,0 +1,683 @@
+!=====================================================================================c
+!        
+! The code below was written by: Stephen Booth
+!                                Edinburgh Parallel Computing Centre
+!                                The University of Edinburgh
+!                                JCMB
+!                                Mayfield Road
+!                                Edinburgh EH9 3JZ
+!                                United Kingdom
+!
+! This file is part of the software made public in
+! Jenkins and Booth 2013  - arXiv:1306.XXXX
+!
+! The software computes the Panphasia Gaussian white noise field
+! realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX
+! 
+!
+!
+! This software is free, subject to a agreeing licence conditions:
+!
+!
+! (i)  you will publish the phase descriptors and reference Jenkins (13) 
+!      for any new simulations that use Panphasia phases. You will pass on this 
+!      condition to others for any software or data you make available publically 
+!      or privately that makes use of Panphasia. 
+!
+! (ii) that you will ensure any publications using results derived from Panphasia 
+!      will be submitted as a final version to arXiv prior to or coincident with
+!      publication in a journal. 
+!
+!
+! (iii) that you report any bugs in this software as soon as confirmed to 
+!       A.R.Jenkins@durham.ac.uk 
+!
+! (iv)  that you understand that this software comes with no warranty and that is 
+!       your responsibility to ensure that it is suitable for the purpose that 
+!       you intend. 
+!
+!=====================================================================================c
+!{{{Rand_base (define kind types) 
+MODULE Rand_base
+! This module just declares the base types 
+! we may have to edit this to match to the target machine
+! we really need a power of 2 selected int kind in fortran-95 we could
+! do this with a PURE function I think.
+
+!
+! 10 decimal digits will hold 2^31
+!
+
+   INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(9)
+!  INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(10)
+!  INTEGER, PARAMETER :: Sint = 4
+
+!
+! 18-19 decimal digits will hold 2^63
+! but all 19 digit numbers require 2^65 :-(
+!
+
+   INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(17)
+!  INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(18)
+!  INTEGER, PARAMETER :: Dint = 8
+
+! type for index counters must hold Nstore
+  INTEGER, PARAMETER :: Ctype = SELECTED_INT_KIND(3)
+END MODULE Rand_base
+!}}}
+
+!{{{Rand_int (random integers mod 2^31-1) 
+
+MODULE Rand_int
+  USE Rand_base
+  IMPLICIT NONE
+! The general approach of this module is two have
+! two types Sint and Dint 
+! 
+! Sint should have at least 31 bits
+! dint shouldhave at least 63
+
+!{{{constants
+
+  INTEGER(KIND=Ctype), PARAMETER :: Nstate=5_Ctype
+  INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nbatch=128_Ctype
+  INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nstore=Nstate+Nbatch
+
+  INTEGER(KIND=Sint), PRIVATE, PARAMETER  :: M = 2147483647_Sint
+  INTEGER(KIND=Dint), PRIVATE, PARAMETER  :: Mask = 2147483647_Dint
+  INTEGER(KIND=Dint), PRIVATE, PARAMETER  :: A1 = 107374182_Dint
+  INTEGER(KIND=Dint), PRIVATE, PARAMETER  :: A5 = 104480_Dint
+  LOGICAL, PARAMETER :: Can_step_int=.TRUE.
+  LOGICAL, PARAMETER :: Can_reverse_int=.TRUE.
+
+!}}}
+
+!{{{Types
+!
+! This type holds the state of the generator
+!
+!{{{TYPE RAND_state
+
+TYPE RAND_state
+  PRIVATE
+  INTEGER(KIND=Sint) :: state(Nstore) 
+! do we need to re-fill state table this is reset when we initialise state.
+  LOGICAL :: need_fill 
+! position of the next state variable to output
+  INTEGER(KIND=Ctype) :: pos
+END TYPE RAND_state
+
+!}}}
+
+!
+! This type defines the offset type used for stepping.
+!
+!{{{TYPE RAND_offset
+
+TYPE RAND_offset
+  PRIVATE
+  INTEGER(KIND=Sint) :: poly(Nstate)
+END TYPE RAND_offset
+
+!}}}
+
+!}}}
+
+!{{{interface and overloads
+!
+! Allow automatic conversion between integers and offsets
+!
+INTERFACE ASSIGNMENT(=)
+  MODULE PROCEDURE Rand_set_offset
+  MODULE PROCEDURE Rand_load
+  MODULE PROCEDURE Rand_save
+  MODULE PROCEDURE Rand_seed
+END INTERFACE
+INTERFACE OPERATOR(+)
+  MODULE PROCEDURE Rand_add_offset
+END INTERFACE
+INTERFACE OPERATOR(*)
+  MODULE PROCEDURE Rand_mul_offset
+END INTERFACE
+
+!
+! overload + as the boost/stepping operator
+!
+INTERFACE OPERATOR(+)
+  MODULE PROCEDURE Rand_step
+  MODULE PROCEDURE Rand_boost
+END INTERFACE
+!}}}
+
+
+!{{{PUBLIC/PRIVATE 
+  PRIVATE reduce,mod_saxpy,mod_sdot,p_saxpy,p_sdot,poly_mult
+  PRIVATE poly_square, poly_power
+  PRIVATE fill_state, repack_state
+
+  PUBLIC Rand_sint, Rand_sint_vec
+
+  PUBLIC Rand_save, Rand_load
+  PUBLIC Rand_set_offset, Rand_add_offset, Rand_mul_offset
+  PUBLIC Rand_step, Rand_boost, Rand_seed
+!}}}
+
+CONTAINS
+  !{{{Internals
+  !{{{RECURSIVE FUNCTION reduce(A)
+  RECURSIVE FUNCTION reduce(A)
+  !
+  ! Take A Dint and reduce to Sint MOD M
+  !
+   INTEGER(KIND=Dint), INTENT(IN) :: A
+   INTEGER(KIND=Sint) reduce
+   INTEGER(KIND=Dint) tmp
+  
+    tmp = A  
+    DO WHILE( ISHFT(tmp, -31) .GT. 0 )
+      tmp = IAND(tmp,Mask) + ISHFT(tmp, -31)
+    END DO
+    IF( tmp .GE. M ) THEN
+      reduce = tmp - M
+    ELSE
+      reduce = tmp
+    END IF
+  END FUNCTION reduce
+  !}}}
+  !{{{RECURSIVE SUBROUTINE fill_state(x)
+  RECURSIVE SUBROUTINE fill_state(x)
+  TYPE(RAND_state), INTENT(INOUT) ::  x
+  INTEGER(KIND=Ctype) i
+  INTRINSIC IAND, ISHFT
+  INTEGER(KIND=Dint)  tmp
+    DO i=Nstate+1,Nstore
+      tmp = (x%state(i-5) * A5) + (x%state(i-1)*A1)
+      !
+      ! now reduce down to mod M efficiently
+      ! really hope the compiler in-lines this
+      !
+      ! x%state(i) = reduce(tmp)
+      DO WHILE( ISHFT(tmp, -31) .GT. 0 )
+        tmp = IAND(tmp,Mask) + ISHFT(tmp, -31)
+      END DO
+      IF( tmp .GE. M ) THEN
+        x%state(i) = tmp - M
+      ELSE
+        x%state(i) = tmp
+      END IF
+  
+    END DO
+    x%need_fill = .FALSE.
+  END SUBROUTINE fill_state
+  !}}}
+  !{{{RECURSIVE SUBROUTINE repack_state(x)
+  RECURSIVE SUBROUTINE repack_state(x)
+  TYPE(RAND_state), INTENT(INOUT) ::  x
+  INTEGER(KIND=Ctype) i
+    DO i=1,Nstate
+      x%state(i) = x%state(i+x%pos-(Nstate+1))
+    END DO
+    x%pos = Nstate + 1
+    x%need_fill = .TRUE.  
+  END SUBROUTINE repack_state
+  !}}}
+  !{{{RECURSIVE SUBROUTINE mod_saxpy(y,a,x)
+  RECURSIVE SUBROUTINE mod_saxpy(y,a,x)
+   INTEGER(KIND=Ctype) i
+   INTEGER(KIND=Sint) y(Nstate)
+   INTEGER(KIND=Sint) a
+   INTEGER(KIND=Sint) x(Nstate)
+   INTEGER(KIND=Dint) tx,ty,ta
+  
+     IF( a .EQ. 0_Sint ) RETURN
+  
+     ! We use KIND=Dint temporaries here to ensure
+     ! that we don't overflow in the expression
+  
+     ta = a
+     DO i=1,Nstate
+       ty=y(i)
+       tx=x(i)
+       y(i) = reduce(ty + ta * tx)
+     END DO
+  
+  END SUBROUTINE 
+  !}}}
+  !{{{RECURSIVE SUBROUTINE mod_sdot(res,x,y)
+  RECURSIVE SUBROUTINE mod_sdot(res,x,y)
+  INTEGER(KIND=Sint), INTENT(OUT) :: res
+  INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate) , y(Nstate)
+  INTEGER(KIND=Dint) dx, dy, dtmp
+  INTEGER(KIND=Sint) tmp
+  INTEGER(KIND=Ctype) i
+  
+    tmp = 0
+    DO i=1,Nstate
+     dx = x(i)
+     dy = y(i)
+     dtmp = tmp
+     tmp = reduce(dtmp + dx * dy)
+    END DO
+    res = tmp
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE p_saxpy(y,a)
+  RECURSIVE SUBROUTINE p_saxpy(y,a)
+   ! Calculates mod_saxpy(y,a,P)
+   INTEGER(KIND=Sint), INTENT(INOUT) :: y(Nstate)
+   INTEGER(KIND=Sint), INTENT(IN) :: a
+   INTEGER(KIND=Dint) tmp, dy, da
+     dy = y(1)
+     da = a
+     tmp = dy + da*A5
+     y(1) = reduce(tmp)
+     dy = y(5)
+     da = a
+     tmp = dy + da*A1
+     y(5) = reduce(tmp)
+  
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE p_sdot(res,n,x)
+  RECURSIVE SUBROUTINE p_sdot(res,x)
+  INTEGER(KIND=Sint), INTENT(OUT) :: res
+  INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate)
+  INTEGER(KIND=Dint) dx1, dx5, dtmp
+    dx1 = x(1)
+    dx5 = x(5)
+    
+    dtmp = A1*dx5 + A5*dx1
+    res = reduce(dtmp)
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE poly_mult(a,b)
+  RECURSIVE SUBROUTINE poly_mult(a,b)
+    INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate)
+    INTEGER(KIND=Sint), INTENT(IN) :: b(Nstate)
+    INTEGER(KIND=Sint) tmp((2*Nstate) - 1)
+    INTEGER(KIND=Ctype) i
+  
+    tmp = 0_Sint
+  
+    DO i=1,Nstate
+      CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), b)
+    END DO
+    DO i=(2*Nstate)-1, Nstate+1, -1
+      CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i))
+    END DO
+    a = tmp(1:Nstate)
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE poly_square(a)
+  RECURSIVE SUBROUTINE poly_square(a)
+    INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate)
+    INTEGER(KIND=Sint) tmp((2*Nstate) - 1)
+    INTEGER(KIND=Ctype) i
+  
+    tmp = 0_Sint
+  
+    DO i=1,Nstate
+      CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), a)
+    END DO
+    DO i=(2*Nstate)-1, Nstate+1, -1
+      CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i))
+    END DO
+    a = tmp(1:Nstate)
+  END SUBROUTINE
+  !}}}
+  !{{{RECURSIVE SUBROUTINE poly_power(poly,n)
+  RECURSIVE SUBROUTINE poly_power(poly,n)
+   INTEGER(KIND=Sint), INTENT(INOUT) :: poly(Nstate)
+   INTEGER, INTENT(IN) :: n
+   INTEGER nn
+   INTEGER(KIND=Sint) x(Nstate), out(Nstate)
+  
+   IF( n .EQ. 0 )THEN
+     poly = 0_Sint
+     poly(1) = 1_Sint
+     RETURN
+   ELSE IF( n .LT. 0 )THEN
+     poly = 0_Sint
+     RETURN
+   END IF
+  
+   out = 0_sint
+   out(1) = 1_Sint
+   x = poly
+   nn = n
+   DO WHILE( nn .GT. 0 )
+     IF( MOD(nn,2) .EQ. 1 )THEN
+       call poly_mult(out,x)
+     END IF
+     nn = nn/2
+     IF( nn .GT. 0 )THEN
+       call poly_square(x)
+     END IF
+   END DO 
+   poly = out
+  
+  END SUBROUTINE poly_power
+  !}}}
+  !}}}
+
+  !{{{RECURSIVE SUBROUTINE  Rand_seed( state, n )
+  RECURSIVE SUBROUTINE  Rand_seed( state, n )
+    TYPE(Rand_state), INTENT(OUT) :: state
+    INTEGER, INTENT(IN) :: n
+    ! initialise the genrator using a single integer
+    ! fist initialise to an arbitrary state then boost by a multiple 
+    ! of a long distance
+    !
+    ! state is moved forward by P^n steps
+    ! we want this to be ok for seperating parallel sequences on MPP machines
+    ! P is taken as a prime number as this should prevent strong correlations
+    ! when the generators are operated in tight lockstep.
+    ! equivalent points on different processors will also be related by a
+    ! primative polynomial
+    ! P is 2^48-59
+    TYPE(Rand_state) tmp
+    TYPE(Rand_offset), PARAMETER ::  P = &
+         Rand_offset( (/ 1509238949_Sint ,2146167999_Sint ,1539340803_Sint , &
+                     1041407428_Sint ,666274987_Sint /) )
+  
+    CALL Rand_load( tmp, (/ 5, 4, 3, 2, 1 /) )
+    state = Rand_boost( tmp, Rand_mul_offset(P, n ))
+  
+  END SUBROUTINE Rand_seed
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_load( state, input )
+  RECURSIVE SUBROUTINE Rand_load( state, input )
+  TYPE(RAND_state), INTENT(OUT) :: state
+  INTEGER, INTENT(IN) :: input(Nstate)
+  
+  INTEGER(KIND=Ctype) i
+  
+    state%state = 0_Sint
+    DO i=1,Nstate
+      state%state(i) = MOD(INT(input(i),KIND=Sint),M)
+    END DO
+    state%need_fill = .TRUE.
+    state%pos = Nstate + 1
+  END SUBROUTINE Rand_load
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_save( save_vec,state )
+  RECURSIVE SUBROUTINE Rand_save( save_vec, x ) 
+  INTEGER, INTENT(OUT) ::  save_vec(Nstate)
+  TYPE(RAND_state), INTENT(IN) ::  x
+  
+  INTEGER(KIND=Ctype) i
+    DO i=1,Nstate
+      save_vec(i) = x%state(x%pos-(Nstate+1) + i)
+    END DO
+  END SUBROUTINE Rand_save
+  !}}}
+
+  !{{{RECURSIVE SUBROUTINE Rand_set_offset( offset, n )
+  RECURSIVE SUBROUTINE Rand_set_offset( offset, n )
+  TYPE(Rand_offset), INTENT(OUT) :: offset
+  INTEGER, INTENT(IN) :: n
+  
+    offset%poly = 0_Sint
+    IF ( n .GE. 0 ) THEN
+      offset%poly(2) = 1_Sint
+      call poly_power(offset%poly,n)
+    ELSE
+      !
+      ! This is X^-1 
+      !
+      offset%poly(4) = 858869107_Sint
+      offset%poly(5) = 1840344978_Sint    
+      call poly_power(offset%poly,-n)
+    END IF
+  END SUBROUTINE Rand_set_offset
+  !}}}
+  !{{{TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b )
+  TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b )
+  TYPE(Rand_offset), INTENT(IN) :: a, b
+  
+    Rand_add_offset = a
+    CALL poly_mult(Rand_add_offset%poly,b%poly)
+    RETURN
+  END FUNCTION Rand_add_offset
+  !}}}
+  !{{{TYPE(Rand_offset) RECURSIVE  FUNCTION Rand_mul_offset( a, n )
+  TYPE(Rand_offset) RECURSIVE  FUNCTION Rand_mul_offset( a, n )
+  TYPE(Rand_offset), INTENT(IN) :: a
+  INTEGER, INTENT(IN) :: n
+    Rand_mul_offset = a
+    CALL poly_power(Rand_mul_offset%poly,n)
+    RETURN
+  END FUNCTION Rand_mul_offset
+  !}}}
+  !{{{RECURSIVE FUNCTION Rand_boost(x, offset)
+  RECURSIVE FUNCTION Rand_boost(x, offset)
+  TYPE(Rand_state) Rand_boost
+  TYPE(Rand_state), INTENT(IN) ::  x
+  TYPE(Rand_offset), INTENT(IN) :: offset
+  INTEGER(KIND=Sint) tmp(2*Nstate-1), res(Nstate)
+  INTEGER(KIND=Ctype) i
+  
+    DO i=1,Nstate
+      tmp(i) = x%state(x%pos-(Nstate+1) + i)
+    END DO
+    tmp(Nstate+1:) = 0_Sint
+  
+    DO i=1,Nstate-1
+      call P_SDOT(tmp(i+Nstate),tmp(i:Nstate+i-1))
+    END DO
+  
+    DO i=1,Nstate
+      call mod_sdot(res(i),offset%poly,tmp(i:Nstate+i-1))
+    END DO
+    Rand_boost%state = 0_Sint
+    DO i=1,Nstate
+      Rand_boost%state(i) = res(i)
+    END DO
+    Rand_boost%need_fill = .TRUE.
+    Rand_boost%pos = Nstate + 1
+  
+  END FUNCTION Rand_boost
+  !}}}
+  !{{{RECURSIVE FUNCTION Rand_step(x, n)
+  RECURSIVE FUNCTION Rand_step(x, n)
+  TYPE(Rand_state) Rand_step
+  TYPE(RAND_state), INTENT(IN) ::  x
+  INTEGER, INTENT(IN) :: n
+  TYPE(Rand_offset) tmp
+  
+    CALL Rand_set_offset(tmp,n)
+    Rand_step=Rand_boost(x,tmp)
+  
+  END FUNCTION
+  !}}}
+  
+  !{{{RECURSIVE FUNCTION Rand_sint(x)
+  RECURSIVE FUNCTION Rand_sint(x)
+    TYPE(RAND_state), INTENT(INOUT) :: x
+    INTEGER(KIND=Sint)  Rand_sint
+    IF( x%pos .GT. Nstore )THEN
+      CALL repack_state(x)
+    END IF
+    IF( x%need_fill ) CALL fill_state(x)
+    Rand_sint = x%state(x%pos)
+    x%pos = x%pos + 1
+    RETURN
+  END FUNCTION Rand_sint
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_sint_vec(iv,x)
+  RECURSIVE SUBROUTINE Rand_sint_vec(iv,x)
+    INTEGER(KIND=Sint), INTENT(OUT)  :: iv(:)
+    TYPE(RAND_state), INTENT(INOUT)  ::  x
+    INTEGER left,start, chunk, i
+  
+    start=1
+    left=SIZE(iv)
+    DO WHILE( left .GT. 0 )
+      IF( x%pos .GT. Nstore )THEN
+        CALL repack_state(x)
+      END IF
+      IF( x%need_fill ) CALL fill_state(x)
+  
+      chunk = MIN(left,Nstore-x%pos+1)
+      DO i=0,chunk-1
+        iv(start+i) = x%state(x%pos+i)
+      END DO
+      start = start + chunk
+      x%pos = x%pos + chunk
+      left = left - chunk
+    END DO
+  
+    RETURN
+  END SUBROUTINE Rand_sint_vec
+  !}}}
+
+
+END MODULE Rand_int
+
+!}}}
+
+!{{{Rand (use Rand_int to make random reals)
+
+MODULE Rand
+  USE Rand_int
+  IMPLICIT NONE
+
+!{{{Parameters
+
+  INTEGER, PARAMETER :: RAND_kind1 = SELECTED_REAL_KIND(10)
+  INTEGER, PARAMETER :: RAND_kind2 = SELECTED_REAL_KIND(6)
+
+  INTEGER, PARAMETER, PRIVATE :: Max_block=100
+  INTEGER(KIND=Sint), PRIVATE, PARAMETER  :: M = 2147483647
+  REAL(KIND=RAND_kind1), PRIVATE, PARAMETER :: INVMP1_1 = ( 1.0_RAND_kind1 / 2147483647.0_RAND_kind1 )
+  REAL(KIND=RAND_kind2), PRIVATE, PARAMETER :: INVMP1_2 = ( 1.0_RAND_kind2 / 2147483647.0_RAND_kind2 )
+
+  LOGICAL, PARAMETER :: Can_step = Can_step_int
+  LOGICAL, PARAMETER :: Can_reverse = Can_reverse_int
+
+!}}}
+  PUBLIC Rand_real
+
+
+INTERFACE Rand_real
+  MODULE PROCEDURE Rand_real1
+  MODULE PROCEDURE Rand_real2
+  MODULE PROCEDURE Rand_real_vec1
+  MODULE PROCEDURE Rand_real_vec2
+END INTERFACE
+
+
+CONTAINS
+
+  !{{{RECURSIVE SUBROUTINE Rand_real1(y,x)
+  RECURSIVE SUBROUTINE Rand_real1(y,x)
+    REAL(KIND=RAND_kind1), INTENT(OUT) :: y
+    TYPE(RAND_state), INTENT(INOUT) ::  x
+    INTEGER(KIND=Sint) Z
+  
+    Z = Rand_sint(x)
+    IF (Z .EQ. 0) Z = M
+  
+    y = ((Z-0.5d0)*INVMP1_1)
+    RETURN
+  END SUBROUTINE Rand_real1
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_real2(y,x)
+  RECURSIVE SUBROUTINE Rand_real2(y,x)
+    REAL(KIND=RAND_kind2), INTENT(OUT) :: y
+    TYPE(RAND_state), INTENT(INOUT) ::  x
+    INTEGER(KIND=Sint) Z
+  
+    Z = Rand_sint(x)
+    IF (Z .EQ. 0) Z = M
+  
+    y = ((Z-0.5d0)*INVMP1_1)  ! generate in double and truncate.
+    RETURN
+  END SUBROUTINE Rand_real2
+  !}}}
+
+  !{{{RECURSIVE SUBROUTINE Rand_real_vec1(rv,x)
+  RECURSIVE SUBROUTINE Rand_real_vec1(rv,x)
+    TYPE(RAND_state), INTENT(INOUT) ::  x
+    REAL(KIND=RAND_kind1)  rv(:)
+    INTEGER left,start, chunk, i
+    INTEGER(KIND=Sint) Z
+    INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block))
+  
+    start=0
+    left=SIZE(rv)
+    DO WHILE( left .GT. 0 )
+      chunk = MIN(left,Max_block)
+      CALL Rand_sint_vec(temp(1:chunk),x)
+      DO i=1,chunk
+       Z = temp(i)
+       IF (Z .EQ. 0) Z = M
+       rv(start+i) = (Z-0.5d0)*INVMP1_1
+      END DO 
+      start = start + chunk
+      left = left - chunk
+    END DO
+  
+    RETURN
+  END SUBROUTINE Rand_real_vec1
+  !}}}
+  !{{{RECURSIVE SUBROUTINE Rand_real_vec2(rv,x)
+  RECURSIVE SUBROUTINE Rand_real_vec2(rv,x)
+    TYPE(RAND_state), INTENT(INOUT) ::  x
+    REAL(KIND=RAND_kind2)  rv(:)
+    INTEGER left,start, chunk, i
+    INTEGER(KIND=Sint) Z
+    INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block))
+  
+    start=0
+    left=SIZE(rv)
+    DO WHILE( left .GT. 0 )
+      chunk = MIN(left,Max_block)
+      CALL Rand_sint_vec(temp(1:chunk),x)
+      DO i=1,chunk
+       Z = temp(i)
+       IF (Z .EQ. 0) Z = M
+       rv(start+i) = (Z-0.5d0)*INVMP1_2
+      END DO 
+      start = start + chunk
+      left = left - chunk
+    END DO
+  
+    RETURN
+  END SUBROUTINE Rand_real_vec2
+  !}}}
+END MODULE Rand
+
+!}}}
+
+!{{{test program
+! PROGRAM test_random
+! use Rand
+!     TYPE(RAND_state) x
+!     REAL y
+!      CALL Rand_load(x,(/5,4,3,2,1/)) 
+!      DO I=0,10
+!       CALL Rand_real(y,x)
+!       WRITE(*,10) I,y
+!      END DO
+!
+!10    FORMAT(I10,E25.16)
+!
+!     END
+
+!         0   0.5024326127022505E-01
+!         1   0.8260946767404675E-01
+!         2   0.2123264316469431E-01
+!         3   0.6926658791489899E+00
+!         4   0.2076155943796039E+00
+!         5   0.4327449947595596E-01
+!         6   0.2204052871093154E-01
+!         7   0.1288446951657534E+00
+!         8   0.4859915426932275E+00
+!         9   0.5721384193748236E-01
+!        10   0.7996825082227588E+00
+!
+
+
+!}}}
+
diff --git a/external/panphasia/panphasia_routines.f b/external/panphasia/panphasia_routines.f
new file mode 100644
index 0000000..2e1bfbd
--- /dev/null
+++ b/external/panphasia/panphasia_routines.f
@@ -0,0 +1,3334 @@
+c=====================================================================================c
+c        
+c The code below was written by: Adrian Jenkins,                                            
+c                                Institute for Computational Cosmology
+c                                Department of Physics
+c                                South Road
+c                                Durham, DH1 3LE
+c                                United Kingdom
+c
+c This file is part of the software made public in
+c Jenkins and Booth 2013  - arXiv:1306.XXXX
+c
+c The software computes the Panphasia Gaussian white noise field
+c realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX
+c 
+c
+c
+c This software is free, subject to a agreeing licence conditions:
+c
+c
+c (i)  you will publish the phase descriptors and reference Jenkins (13) 
+c      for any new simulations that use Panphasia phases. You will pass on this 
+c      condition to others for any software or data you make available publically 
+c      or privately that makes use of Panphasia. 
+c
+c (ii) that you will ensure any publications using results derived from Panphasia 
+c      will be submitted as a final version to arXiv prior to or coincident with
+c      publication in a journal. 
+c
+c (iii) that you report any bugs in this software as soon as confirmed to 
+c       A.R.Jenkins@durham.ac.uk 
+c
+c (iv)  that you understand that this software comes with no warranty and that is 
+c       your responsibility to ensure that it is suitable for the purpose that 
+c       you intend. 
+c
+c=====================================================================================c
+
+c=====================================================================================
+c       List of subroutines and arguments.  Each of these is documented in           c
+c       arXiV/1306.XXXX                                                              c
+c                                                                                    c
+c       Adrian Jenkins, 24/6/2013.                                                   c
+c-------------------------------------------------------------------------------------
+c  Version 1.000
+c===================================================================================
+
+      module pan_state
+      use Rand
+      implicit none
+      integer maxdim_, maxlev_, maxpow_
+      parameter (maxdim_=60,maxlev_=50, maxpow_ = 3*maxdim_)
+      integer nmulti_
+      parameter (nmulti_=64)
+      integer range_max
+      parameter(range_max=10000)
+      integer indmin,indmax
+      parameter (indmin=-1, indmax=60)
+
+
+      type state_data
+      integer base_state(5), base_lev_start(5,0:maxdim_)
+      TYPE(Rand_offset) :: poweroffset(0:maxpow_)
+      TYPE(Rand_offset) :: superjump
+      TYPE(Rand_state) :: current_state(-1:maxpow_)
+
+      integer  layer_min,layer_max,indep_field
+
+!  This module stores information needed to access the part of Panphasia
+!  selected by a particular descriptor.
+      integer*8 xorigin_store(0:1,0:1,0:1)
+      integer*8 yorigin_store(0:1,0:1,0:1)
+      integer*8 zorigin_store(0:1,0:1,0:1)
+
+      integer*4 lev_common
+      integer*4 layer_min_store,layer_max_store
+
+      integer*8 ix_abs_store,iy_abs_store,iz_abs_store    
+      integer*8 ix_per_store,iy_per_store,iz_per_store
+      integer*8 ix_rel_store,iy_rel_store,iz_rel_store
+
+      real*8 exp_coeffs(8,0:7,-1:maxdim_)
+      integer*8 xcursor(0:maxdim_),ycursor(0:maxdim_),zcursor(0:maxdim_)
+
+c    Local box parameters
+
+      integer*4 ixshift(0:1,0:1,0:1)
+      integer*4 iyshift(0:1,0:1,0:1)
+      integer*4 izshift(0:1,0:1,0:1)
+
+
+c     more state variables
+      real*8 cell_data(9,0:7)
+      integer*4 ixh_last,iyh_last,izh_last
+      integer init
+
+      integer return_cell_props_init
+      integer reset_lecuyer_state_init
+      integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax)
+
+
+
+      end type state_data
+
+
+
+c     Switch for enabling custom spherical function
+c     Set isub_spherical_function = 1 to turn on the spherical function
+      integer*4 isub_spherical_function
+      parameter (isub_spherical_function=0)
+
+      end module pan_state
+
+
+c================================================================================
+c       Begin white noise routines
+c================================================================================
+      recursive subroutine start_panphasia(ldata,descriptor,ngrid,VERBOSE)
+      use pan_state
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      character*100 descriptor
+      integer ngrid
+      integer VERBOSE
+
+      
+
+      integer*4 wn_level_base,i_base,i_base_y,i_base_z
+      integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand
+      character*20 name
+
+      integer ratio
+      integer lextra
+      integer level_p
+
+      
+      integer*8 ix_abs,iy_abs,iz_abs
+      integer*8 ix_per,iy_per,iz_per
+      integer*8 ix_rel,iy_rel,iz_rel
+      
+      !integer  layer_min,layer_max,indep_field
+      !common /oct_range/  layer_min,layer_max,indep_field
+
+      call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base,
+     &                      i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,name)
+
+
+      lextra = (log10(real(ngrid)/real(i_base))+0.001)/log10(2.0)
+      ratio = 2**lextra
+
+      if (ratio*i_base.ne.ngrid) 
+     &stop 'Value of ngrid inconsistent with dim of region in Panphasia'
+
+      level_p = wn_level_base + lextra
+
+      ix_abs = ishft(i_xorigin_base,lextra)
+      iy_abs = ishft(i_yorigin_base,lextra)
+      iz_abs = ishft(i_zorigin_base,lextra)
+
+      ix_per = i_base*ratio
+      iy_per = i_base*ratio
+      iz_per = i_base*ratio
+
+c     Set the refinement position at the origin. 
+   
+      ix_rel = 0
+      iy_rel = 0
+      iz_rel = 0
+
+      call set_phases_and_rel_origin(ldata,descriptor,level_p,ix_rel,iy_rel,iz_rel,VERBOSE)
+
+c    Finally set the octree functions required for making cosmological
+c    initial conditions.  These are passed using a common block.
+
+      ldata%layer_min = 0
+      ldata%layer_max = level_p
+      ldata%indep_field  = 1
+
+      end
+c=================================================================================
+      recursive subroutine set_phases_and_rel_origin(ldata,descriptor,lev,ix_rel,iy_rel,iz_rel,VERBOSE)
+      use pan_state
+      !use descriptor_phases
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      character*100 descriptor
+      integer lev
+      integer*8 ix_abs,iy_abs,iz_abs
+      integer*8 ix_per,iy_per,iz_per
+      integer*8 ix_rel,iy_rel,iz_rel
+      integer*8 xorigin,yorigin,zorigin
+
+      integer VERBOSE
+      integer MYID
+      integer*8 maxco
+      integer i
+      integer px,py,pz
+      
+      integer lnblnk
+      integer*8 mconst
+      parameter(mconst = 2147483647_Dint)
+
+      integer*4 wn_level_base,i_base,i_base_y,i_base_z
+      integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand
+      integer lextra,ratio
+      character*20 phase_name
+
+c-----------------------------------------------------------------------------------------------
+
+      call initialise_panphasia(ldata)
+
+      call validate_descriptor(ldata, descriptor,-1,check_rand)
+
+      call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base,
+     &                      i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,phase_name)
+      lextra = lev - wn_level_base
+      ratio  = 2**lextra
+      
+      ix_abs = ishft(i_xorigin_base,lextra)
+      iy_abs = ishft(i_yorigin_base,lextra)
+      iz_abs = ishft(i_zorigin_base,lextra)
+
+      ix_per = i_base*ratio
+      iy_per = i_base*ratio
+      iz_per = i_base*ratio
+
+c-------------------------------------------------------------------------
+c    Error checking
+c-------------------------------------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (1)'
+      
+
+      maxco = 2_dint**lev
+
+      if (ix_abs.lt.0) stop 'Error: ix_abs negative (1)'
+      if (iy_abs.lt.0) stop 'Error: iy_abs negative (1)'
+      if (iz_abs.lt.0) stop 'Error: iz_abs negative (1)'
+
+      if (ix_rel.lt.0) stop 'Error: ix_rel negative (1)'
+      if (iy_rel.lt.0) stop 'Error: iy_rel negative (1)'
+      if (iz_rel.lt.0) stop 'Error: iz_rel negative (1)'
+
+
+      if (ix_abs+ix_rel.ge.maxco)
+     &   stop 'Error: ix_abs + ix_rel out of range. (1)'
+      if (iy_abs+iy_rel.ge.maxco) 
+     &   stop 'Error: iy_abs + iy_rel out of range. (1)'
+      if (iz_abs+iz_rel.ge.maxco) 
+     &   stop 'Error: iz_abs + iz_rel out of range. (1)'
+
+c----------------------------------------------------------------------------------------
+c  To allow the local box to wrap around, if needed, define a series of eight
+c  'origins'.  For many purposes (ix,iy,iz) = (0,0,0) is the only origin needed.
+
+
+      do px=0,1
+       do py=0,1
+        do pz=0,1
+
+         xorigin = max(0,( ix_abs + ix_rel - px*ix_per )/2)
+         yorigin = max(0,( iy_abs + iy_rel - py*iy_per )/2)
+         zorigin = max(0,( iz_abs + iz_rel - pz*iz_per )/2)
+
+         ldata%ixshift(px,py,pz) = max(0, ix_abs + ix_rel -px*ix_per) - 2*xorigin
+         ldata%iyshift(px,py,pz) = max(0, iy_abs + iy_rel -py*iy_per) - 2*yorigin
+         ldata%izshift(px,py,pz) = max(0, iz_abs + iz_rel -pz*iz_per) - 2*zorigin
+
+
+c        Store box details:  store the positions at level lev-1
+  
+
+         ldata%xorigin_store(px,py,pz) = xorigin
+         ldata%yorigin_store(px,py,pz) = yorigin
+         ldata%zorigin_store(px,py,pz) = zorigin
+
+        enddo
+       enddo
+      enddo
+
+      ldata%lev_common = lev
+
+
+      ldata%ix_abs_store = ix_abs
+      ldata%iy_abs_store = iy_abs
+      ldata%iz_abs_store = iz_abs
+
+      ldata%ix_per_store = ix_per
+      ldata%iy_per_store = iy_per
+      ldata%iz_per_store = iz_per
+
+      ldata%ix_rel_store = ix_rel
+      ldata%iy_rel_store = iy_rel
+      ldata%iz_rel_store = iz_rel
+
+ 
+c  Reset all cursor values to negative numbers.
+
+      do i=0,maxdim_
+       ldata%xcursor(i) = -999
+       ldata%ycursor(i) = -999
+       ldata%zcursor(i) = -999
+      enddo
+      if (VERBOSE.gt.1) then
+         if (MYID.lt.1) then
+            print*,'----------------------------------------------------------'
+            print*,'Successfully initialised Panphasia box at level ',lev
+            write (6,105) ix_abs,iy_abs,iz_abs
+            write (6,106) ix_rel,iy_rel,iz_rel
+            write (6,107) ix_per,iy_per,iz_per
+            write (6,*)  'Phases used: ',descriptor(1:lnblnk(descriptor))
+            print*,'----------------------------------------------------------'
+         endif
+      endif
+ 105  format(' Abs origin: (',i12,',',i12,',',i12,')')
+ 106  format(' Rel origin: (',i12,',',i12,',',i12,')')
+ 107  format(' Periods   : (',i12,',',i12,',',i12,')') 
+      end 
+c================================================================================
+      recursive subroutine initialise_panphasia( ldata )
+      use Rand
+      use pan_state
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+
+      TYPE(Rand_state) :: state
+      TYPE(Rand_offset) :: offset
+      integer ninitialise
+      parameter (ninitialise=218)
+      integer i
+      real*8 rand_num
+
+
+      call Rand_seed(state,ninitialise)
+      
+      call Rand_save(ldata%base_state,state)
+
+      call Rand_set_offset(offset,1)
+
+c   Calculate offsets of powers of 2 times nmulti
+c
+
+      do i=0,maxpow_
+        ldata%poweroffset(i) = Rand_mul_offset(offset,nmulti_)
+        offset = Rand_mul_offset(offset,2)
+      enddo
+
+
+c   Compute the base state for each level. 
+
+      call Rand_load(state,ldata%base_state)
+      state = Rand_step(state,8)
+
+      do i=0,maxdim_
+       call Rand_save(ldata%base_lev_start(1,i),state)
+       state = Rand_boost(state,ldata%poweroffset(3*i))
+      enddo
+
+c   Set superjump to value 2**137   - used occasionally in computing Gaussian variables
+c   when the value of the returned random number is less an 10-6.
+
+       call Rand_set_offset(ldata%superjump,1)
+
+       do i=1,137
+         ldata%superjump = Rand_mul_offset(ldata%superjump,2)
+       enddo  
+
+
+c   Run time test to see if one particular value can be recovered.
+      
+      call Rand_load(state,ldata%base_lev_start(1,34))
+      call Rand_real(rand_num,state)
+
+      if (abs(rand_num- 0.828481889948473d0).gt.1.e-14) then
+        print*,'Error in initialisation!'
+        print*,'Rand_num     = ',rand_num
+        print*,'Target value = ', 0.828481889948473d0
+        stop
+      endif
+      return
+      end
+c=================================================================================
+      recursive subroutine panphasia_cell_properties(ldata,ixcell,iycell,izcell,cell_prop)
+      use pan_state
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      !integer  layer_min,layer_max,indep_field
+      !common /oct_range/  layer_min,layer_max,indep_field
+      integer*4 ixcell,iycell,izcell
+      real*8 cell_prop(9)
+
+      call adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,ldata%layer_min,
+     &                                           ldata%layer_max,ldata%indep_field,cell_prop)
+      return
+      end
+c=================================================================================
+      recursive subroutine adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,layer_min,
+     &                                           layer_max,indep_field,cell_prop)
+      use pan_state
+      !use descriptor_phases
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+
+      integer*4 lev
+      integer*4 ixcell,iycell,izcell
+      integer layer_min,layer_max,indep_field
+      real*8 cell_prop(9)
+c      real*8 cell_data(9,0:7)
+      integer*4 j,l,lx,ly,lz
+      integer*4 px,py,pz
+
+c      integer*4 ixh_last,iyh_last,izh_last
+
+c      integer init
+c      data init/0/
+c      save init,cell_data,ixh_last,iyh_last,izh_last  ! Keep internal state
+
+      integer*4 ixh,iyh,izh
+
+      lev = ldata%lev_common
+
+c------- Error checking -----------------------------
+
+       if (layer_min.gt.layer_max) then
+
+              if (layer_min-layer_max.eq.1) then      ! Not necessarily bad. No octree basis functions
+                   do j=1,9                           ! required at this level and position.
+                   cell_prop(j) = 0.0d0               ! Set returned cell_prop data to zero.
+                   enddo
+                   return
+              endif
+
+              print*,'Warning: layer_min.gt.layer_max!'
+              print*,'layer_min = ',layer_min
+              print*,'layer_max = ',layer_max
+              print*,'ixcell,iycell,izcell',ixcell,iycell,izcell
+
+              call flush(6)
+              stop 'Error: layer_min.gt.layer_max'
+       endif
+
+       if (layer_max.gt.ldata%lev_common) then
+          print*,'lev_common = ',ldata%lev_common
+          print*,'layer_min  = ',layer_min
+          print*,'layer_max  = ',layer_max
+          stop 'Error: layer_max.gt.lev_common'
+       endif
+       if ((indep_field.lt.-1).or.(indep_field.gt.1)) 
+     & stop 'Error: indep_field out of range'
+
+c----------------------------------------------------
+c  Check which 'origin' to use.  
+
+      px = 0
+      py = 0
+      pz = 0
+
+      if (ldata%ix_rel_store+ixcell.ge.ldata%ix_per_store) px = 1  ! Crossed x-periodic bndy
+      if (ldata%iy_rel_store+iycell.ge.ldata%iy_per_store) py = 1  ! Crossed y-periodic bndy
+      if (ldata%iz_rel_store+izcell.ge.ldata%iz_per_store) pz = 1  ! Crossed z-periodic bndy
+c----------------------------------------------------
+
+
+      ixh = (ixcell+ldata%ixshift(px,py,pz) )/2
+      iyh = (iycell+ldata%iyshift(px,py,pz) )/2
+      izh = (izcell+ldata%izshift(px,py,pz) )/2
+
+      lx  = mod(ixcell+ldata%ixshift(px,py,pz) ,2)
+      ly  = mod(iycell+ldata%iyshift(px,py,pz) ,2)
+      lz  = mod(izcell+ldata%izshift(px,py,pz) ,2)
+
+
+      l = 4*lx + 2*ly + lz   ! Determine which cell is required
+
+cc------------------   If no new evalation is needed skip assignment -----
+      if ((ldata%init.eq.1).and.(ixh.eq.ldata%ixh_last).and.(iyh.eq.ldata%iyh_last).and.
+     &   (izh.eq.ldata%izh_last).and.(layer_min.eq.ldata%layer_min_store).and.
+     &   (layer_max.eq.ldata%layer_max_store)) goto 24
+cc-----------------------------------------------------------------------------
+
+
+       call   return_cell_props(ldata,lev,ixh,iyh,izh,px,py,pz,layer_min,
+     &      layer_max,indep_field,ldata%cell_data)
+
+c  Remember previous values.
+ 
+       ldata%ixh_last = ixh
+       ldata%iyh_last = iyh
+       ldata%izh_last = izh
+
+
+ 24    continue
+
+ 
+      do j=1,9
+       cell_prop(j) = ldata%cell_data(j,l)  ! Copy the required data
+      enddo
+     
+      if (ldata%init.eq.0) ldata%init=1
+
+      return
+      end
+c=================================================================================
+      recursive subroutine return_cell_props(ldata,lev_input,ix_half,iy_half,iz_half,
+     &  px,py,pz,layer_min,layer_max,indep_field,cell_data)
+      use Rand
+      use pan_state
+      !use descriptor_phases
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      integer lev_input,ix_half,iy_half,iz_half,px,py,pz
+      integer layer_min,layer_max,indep_field 
+      real*8 cell_data(9,0:7)
+
+      real*8 garray(0:63)
+      integer lev
+      integer*8 xarray,yarray,zarray
+
+      integer i,istart,icell_name
+
+
+c      integer init
+c      data init/0/
+c      save init
+
+ 
+
+c--------------------------------------------------------
+c--------------------------- Initialise level -1 --------
+c--------------------------------------------------------
+
+      if (ldata%return_cell_props_init.eq.0) then                          ! First time called. Set up the Legendre coefficients    
+      ldata%return_cell_props_init = 1                                     ! for the root cell.   This is the first term on the
+      call Rand_load(ldata%current_state(-1),ldata%base_state) ! right hand side of the equation in appendix C of
+      call return_gaussian_array(ldata,-1,8,garray)      ! Jenkins 2013 that defines PANPHASIA.
+      ldata%exp_coeffs(1,0,-1) = garray(0)
+      ldata%exp_coeffs(2,0,-1) = garray(1)
+      ldata%exp_coeffs(3,0,-1) = garray(2)
+      ldata%exp_coeffs(4,0,-1) = garray(3)
+      ldata%exp_coeffs(5,0,-1) = garray(4)
+      ldata%exp_coeffs(6,0,-1) = garray(5)
+      ldata%exp_coeffs(7,0,-1) = garray(6)
+      ldata%exp_coeffs(8,0,-1) = garray(7)
+
+      ldata%layer_min_store = layer_min
+      ldata%layer_max_store = layer_max
+          
+      endif
+
+c--------------------------------------------------------
+c---------------------------- Error checking ------------
+c--------------------------------------------------------
+
+      lev = lev_input-1
+
+      if (lev_input.ne.ldata%lev_common) stop 'Box initialised at a different level !'
+      if (ix_half.lt.0) then
+          print*,'ix_half negative',ix_half
+          stop 'ix_half out of range!'
+      endif
+      if (iy_half.lt.0) stop 'iy_half out of range!'
+      if (iz_half.lt.0) then
+          print*,'iz_half negative',iz_half
+          stop 'iz_half out of range!' 
+      endif
+
+
+      xarray = ldata%xorigin_store(px,py,pz) + ix_half
+      yarray = ldata%yorigin_store(px,py,pz) + iy_half
+      zarray = ldata%zorigin_store(px,py,pz) + iz_half
+
+
+c   If layer_max or layer_min have changed, rebuild from the start and reset the
+c   recorded value of layer_max and layer_min
+
+      if ((layer_max.ne.ldata%layer_max_store).or.(layer_min.ne.ldata%layer_min_store)) then
+
+         if (layer_min.gt.layer_max) stop 'layer_min > layer_max : 2'
+
+         istart = max(1,layer_min-1)
+
+         ldata%layer_max_store = layer_max
+         ldata%layer_min_store = layer_min
+
+         goto 10
+
+      endif
+
+
+      if ((xarray.eq.ldata%xcursor(lev)).and.(yarray.eq.ldata%ycursor(lev)).and.(zarray.eq.ldata%zcursor(lev))) return ! Nothing to do.
+
+c===========================================================================================================
+c------------- First determine which levels need to be (re)computed
+c===========================================================================================================
+
+      istart = 0
+      do i=lev-1,0,-1
+        if ((ishft(xarray,i-lev).eq.ldata%xcursor(i)).and.(ishft(yarray,i-lev).eq.ldata%ycursor(i)).and.
+     &         (ishft(zarray,i-lev).eq.ldata%zcursor(i))) then
+            istart = i+1
+            goto 10
+        endif
+      enddo
+
+ 10   continue
+
+
+c====================================================================================
+c------------- Now compute each level as required and update (x,y,z) cursor variables
+c====================================================================================
+
+      do i=istart,lev
+
+       icell_name = 0
+
+       ldata%xcursor(i) = ishft(xarray,i-lev)
+       ldata%ycursor(i) = ishft(yarray,i-lev)
+       ldata%zcursor(i) = ishft(zarray,i-lev)
+
+       if (btest(ldata%xcursor(i),0)) icell_name = icell_name + 4
+       if (btest(ldata%ycursor(i),0)) icell_name = icell_name + 2
+       if (btest(ldata%zcursor(i),0)) icell_name = icell_name + 1
+
+       call reset_lecuyer_state(ldata,i,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i))
+
+       if (isub_spherical_function.ne.1) then
+           call return_gaussian_array(ldata,i,64,garray)
+       else
+           call return_oct_sf_expansion(ldata,i,lev,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i),
+     &                                    64,garray)
+       endif
+
+
+       call evaluate_panphasia(ldata,i,maxdim_,garray,layer_min,
+     &    layer_max, indep_field, icell_name,cell_data,ldata%exp_coeffs)
+
+      enddo
+      return
+      end
+c=================================================================================
+      recursive subroutine  evaluate_panphasia(ldata,nlev,maxdim,g,
+     &   layer_min,layer_max,indep_field,icell_name,cell_data,leg_coeff)
+      use pan_state
+      implicit none
+c---------------------------------------------------------------------------------
+c    This subroutine calculates the Legendre block coefficients for the eight child
+c    cells of an octree cell.
+c
+c----------------- Define subroutine arguments -----------------------------------
+      type(state_data), intent(inout) :: ldata
+      integer nlev,maxdim
+      integer layer_min,layer_max,indep_field
+      integer icell_name
+      real*8 leg_coeff(0:7,0:7,-1:maxdim),cell_data(0:8,0:7)
+      real*8 g(*)
+
+c----------------- Define constants using notation from appendix A of Jenkins 2013
+ 
+      real*8 a1,a2,b1,b2,b3,c1,c2,c3,c4
+
+      parameter(a1 = 0.5d0*sqrt(3.0d0),      a2 = 0.5d0)
+
+      parameter(b1 = 0.75d0,                 b2 = 0.25d0*sqrt(3.0d0))
+      parameter(b3 = 0.25d0)
+
+      parameter(c1 = sqrt(27.0d0/64.0d0),    c2 = 0.375d0)
+      parameter(c3 = sqrt(3.0d0/64.0d0),     c4 = 0.125d0)
+
+c----------------- Define octree variables --------------------------------
+
+      real*8 coeff_p000, coeff_p001, coeff_p010, coeff_p011
+      real*8 coeff_p100, coeff_p101, coeff_p110, coeff_p111
+
+      real*8 positive_octant_lc(0:7,0:1,0:1,0:1),temp_value(0:7,0:7)
+      integer i,j,ix,iy,iz
+      integer icx,icy,icz
+      integer iox,ioy,ioz
+      real*8 parity,isig
+      real*8 usually_rooteighth_factor
+c--------------------------------------------------------------------------
+
+c-------------  Set the Legendre block coefficients for the parent cell
+c               itself. These are either inherited from the octree above
+c               or set to zero depending on which levels of the octree
+c               have been selected to be populated with the octree
+c               basis functions.
+c---------------------------------------------------------------------------
+      if (nlev.ge.layer_min) then
+             coeff_p000  = leg_coeff(0,icell_name,nlev-1)
+             coeff_p001  = leg_coeff(1,icell_name,nlev-1)
+             coeff_p010  = leg_coeff(2,icell_name,nlev-1)
+             coeff_p011  = leg_coeff(3,icell_name,nlev-1)
+             coeff_p100  = leg_coeff(4,icell_name,nlev-1)
+             coeff_p101  = leg_coeff(5,icell_name,nlev-1)
+             coeff_p110  = leg_coeff(6,icell_name,nlev-1)
+             coeff_p111  = leg_coeff(7,icell_name,nlev-1)
+      else
+             coeff_p000  = 0.0d0
+             coeff_p001  = 0.0d0
+             coeff_p010  = 0.0d0
+             coeff_p011  = 0.0d0
+             coeff_p100  = 0.0d0
+             coeff_p101  = 0.0d0
+             coeff_p110  = 0.0d0
+             coeff_p111  = 0.0d0 
+      endif
+
+c   Apply layer_max and indep_field inputs ---------------------------------
+
+      if (indep_field.ne.-1) then
+         usually_rooteighth_factor = sqrt(0.125d0)
+      else
+         usually_rooteighth_factor = 0.0d0  ! This option returns only the indep field.
+      endif                                 ! For use in testing only.
+
+      if (nlev.ge.layer_max) then
+        do i=1,56
+        g(i) = 0.0d0               ! Set octree coefficients to zero as not required.
+        enddo
+      endif
+
+      if (indep_field.eq.0) then   ! Set the independent field to zero as not required.
+        do i=57,64
+        g(i) = 0.0d0
+        enddo
+      endif
+c-----------------------------------------------------------------------------
+c
+c
+c    The calculations immediately below evalute the eight Legendre block coefficients for the
+c    child cell that is furthest from the absolute coordiate origin of the octree - we call
+c    this the positive octant cell.
+c
+c    The coefficients are given by a set of matrix equations which combine the
+c    coefficients of the Legendre basis functions of the parent cell itself, with
+c    the coefficients from the octree basis functions that occupy the
+c    parent cell.   
+c
+c    The Legendre basis function coefficients of the parent cell are stored in
+c    the variables, coeff_p000 - coeff_p111 and are initialise above.
+c
+c    The coefficients of the octree basis functions are determined by the
+c    first 56 entries of the array g, which is passed down into this
+c    subroutine.
+c
+c    These two sources of information are combined using a set of linear equations.
+c    The coefficients of these linear equations are taken from the inverses or
+c    equivalently transposes of the matrices given in appendix A of Jenkins 2013.
+c    The matrices in appendix A define the PANPHASIA octree basis functions
+c    in terms of Legendre blocks.
+c
+c    All of the Legendre block functions of the parent cell, and the octree basis
+c    functions of the parent cell share one of eight distinct symmetries with respect to
+c    reflection about the x1=0,x2=0,x3=0 planes (where the origin is taken as the parent 
+c    cell centre and x1,x2,x3 are parallel to the cell edges).
+c
+c    Each function has either purely reflectional symmetry (even parity) or
+c    reflectional symmetry with a sign change (odd parity) about each of the three principal
+c    planes through the cell centre. There are therefore 8 parity types. We can label each 
+c    parity type with a binary triplet. So 000 is pure reflectional symmetry about 
+c    all of the principal planes.
+c  
+c    In the code below the parent cell Legendre block functions, and octree functions are 
+c    organised into eight groups each with eight members. Each group has a common
+c    parity type.
+c
+c    We keep the contributions of each parity type to each of the eight Legendre basis
+c    functions occupying the positive octant cell separate. Once they have all been
+c    computed, we can apply the different symmetry operations and determine the
+c    Legendre block basis functions for all eight child cells at the same time.
+c---------------------------------------------------------------------------------------
+c    000  parity
+
+      positive_octant_lc(0, 0,0,0) =  1.0d0*coeff_p000
+      positive_octant_lc(1, 0,0,0) = -1.0d0*g(1)
+      positive_octant_lc(2, 0,0,0) = -1.0d0*g(2)
+      positive_octant_lc(3, 0,0,0) =  1.0d0*g(3)
+      positive_octant_lc(4, 0,0,0) = -1.0d0*g(4)
+      positive_octant_lc(5, 0,0,0) =  1.0d0*g(5)
+      positive_octant_lc(6, 0,0,0) =  1.0d0*g(6)
+      positive_octant_lc(7, 0,0,0) = -1.0d0*g(7)
+
+c    100 parity
+
+      positive_octant_lc(0, 1,0,0) =  a1*coeff_p100  - a2*g(8)
+      positive_octant_lc(1, 1,0,0) =  g(9)
+      positive_octant_lc(2, 1,0,0) =  g(10)
+      positive_octant_lc(3, 1,0,0) = -g(11)
+      positive_octant_lc(4, 1,0,0) =  a2*coeff_p100  + a1*g(8)
+      positive_octant_lc(5, 1,0,0) = -g(12) 
+      positive_octant_lc(6, 1,0,0) = -g(13)
+      positive_octant_lc(7, 1,0,0) =  g(14)
+
+c     010 parity
+
+      positive_octant_lc(0, 0,1,0) =  a1*coeff_p010 - a2*g(15)
+      positive_octant_lc(1, 0,1,0) =  g(16) 
+      positive_octant_lc(2, 0,1,0) =  a2*coeff_p010 + a1*g(15) 
+      positive_octant_lc(3, 0,1,0) = -g(17)
+      positive_octant_lc(4, 0,1,0) =  g(18)
+      positive_octant_lc(5, 0,1,0) = -g(19)
+      positive_octant_lc(6, 0,1,0) = -g(20)
+      positive_octant_lc(7, 0,1,0) =  g(21)
+
+
+c     001 parity
+
+      positive_octant_lc(0, 0,0,1) =  a1*coeff_p001 - a2*g(22)
+      positive_octant_lc(1, 0,0,1) =  a2*coeff_p001 + a1*g(22)
+      positive_octant_lc(2, 0,0,1) =  g(23)
+      positive_octant_lc(3, 0,0,1) = -g(24)
+      positive_octant_lc(4, 0,0,1) =  g(25)
+      positive_octant_lc(5, 0,0,1) = -g(26)
+      positive_octant_lc(6, 0,0,1) = -g(27)
+      positive_octant_lc(7, 0,0,1) =  g(28)
+
+c    110 parity
+
+      positive_octant_lc(0, 1,1,0) = b1*coeff_p110 - b2*g(29) + b3*g(30) - b2*g(31)
+      positive_octant_lc(1, 1,1,0) = -g(32)
+      positive_octant_lc(2, 1,1,0) = b2*coeff_p110 - b3*g(29) - b2*g(30) + b1*g(31)
+      positive_octant_lc(3, 1,1,0) =  g(33)
+      positive_octant_lc(4, 1,1,0) = b2*coeff_p110 + b1*g(29) + b2*g(30) + b3*g(31)
+      positive_octant_lc(5, 1,1,0) =  g(34)
+      positive_octant_lc(6, 1,1,0) = b3*coeff_p110 + b2*g(29) - b1*g(30) - b2*g(31)
+      positive_octant_lc(7, 1,1,0) = -g(35) 
+
+
+c     011 parity
+
+      positive_octant_lc(0, 0,1,1) = b1*coeff_p011 - b2*g(36) + b3*g(37) - b2*g(38)
+      positive_octant_lc(1, 0,1,1) = b2*coeff_p011 - b3*g(36) - b2*g(37) + b1*g(38)
+      positive_octant_lc(2, 0,1,1) = b2*coeff_p011 + b1*g(36) + b2*g(37) + b3*g(38)
+      positive_octant_lc(3, 0,1,1) = b3*coeff_p011 + b2*g(36) - b1*g(37) - b2*g(38)
+      positive_octant_lc(4, 0,1,1) = -g(39) 
+      positive_octant_lc(5, 0,1,1) =  g(40)
+      positive_octant_lc(6, 0,1,1) =  g(41)
+      positive_octant_lc(7, 0,1,1) = -g(42)
+
+c     101 parity
+
+      positive_octant_lc(0, 1,0,1) = b1*coeff_p101 - b2*g(43) + b3*g(44) - b2*g(45)
+      positive_octant_lc(1, 1,0,1) = b2*coeff_p101 - b3*g(43) - b2*g(44) + b1*g(45) 
+      positive_octant_lc(2, 1,0,1) = -g(46) 
+      positive_octant_lc(3, 1,0,1) =  g(47)
+      positive_octant_lc(4, 1,0,1) = b2*coeff_p101 + b1*g(43) + b2*g(44) + b3*g(45)
+      positive_octant_lc(5, 1,0,1) = b3*coeff_p101 + b2*g(43) - b1*g(44) - b2*g(45)
+      positive_octant_lc(6, 1,0,1) =  g(48)
+      positive_octant_lc(7, 1,0,1) = -g(49)
+
+c     111 parity
+
+      positive_octant_lc(0, 1,1,1) = c1*coeff_p111 - c2*g(50) - c2*g(51) - c2*g(52) + c3*g(53) + c3*g(54) + c3*g(55) - c4*g(56)
+      positive_octant_lc(1, 1,1,1) = c2*coeff_p111 + c1*g(50) - c2*g(51) + c2*g(52) - c3*g(53) + c3*g(54) + c4*g(55) + c3*g(56) 
+      positive_octant_lc(2, 1,1,1) = c2*coeff_p111 + c2*g(50) + c1*g(51) - c2*g(52) - c3*g(53) - c4*g(54) + c3*g(55) - c3*g(56)
+      positive_octant_lc(3, 1,1,1) = c3*coeff_p111 - c3*g(50) - c3*g(51) + c4*g(52) - c1*g(53) - c2*g(54) - c2*g(55) - c2*g(56) 
+      positive_octant_lc(4, 1,1,1) = c2*coeff_p111 - c2*g(50) + c2*g(51) + c1*g(52) + c4*g(53) - c3*g(54) + c3*g(55) + c3*g(56)
+      positive_octant_lc(5, 1,1,1) = c3*coeff_p111 + c3*g(50) - c4*g(51) - c3*g(52) + c2*g(53) - c1*g(54) - c2*g(55) + c2*g(56)
+      positive_octant_lc(6, 1,1,1) = c3*coeff_p111 + c4*g(50) + c3*g(51) + c3*g(52) + c2*g(53) + c2*g(54) - c1*g(55) - c2*g(56)
+      positive_octant_lc(7, 1,1,1) = c4*coeff_p111 - c3*g(50) + c3*g(51) - c3*g(52) - c2*g(53) + c2*g(54) - c2*g(55) + c1*g(56)
+c--------------------------------------------------------------------------------------------
+c 
+c
+c   We now calculate the Legendre basis coefficients for all eight child cells
+c   by applying the appropriate reflectional parities to the coefficients 
+c   calculated above for the positive octant child cell.
+c
+c   See equations A2 and A3 in appendix A of Jenkins 2013.
+c
+c   The reflectional parity is given by (ix,iy,iz) loops below.
+c
+c   The (icx,icy,icz) loops below, loop over the eight child cells.
+c
+c   The positive octant child cell is given below by  (icx=icy=icz=0) or i=7.
+c
+c   The combination ix*icx +iy*icy +iz*icz is either even or odd, depending
+c   on whether the parity change is even or odd.
+c
+c   The variables iox,ioy,ioz are used to loop over the different
+c   types of Legendre basis function.
+c
+c   The combination iox*icx + ioy*icy + ioz*icz is either even and odd
+c   and identifies which coefficients keep or change sign respectively
+c   due to a pure reflection about the principal planes.
+c--------------------------------------------------------------------------------------------
+
+      do iz=0,7
+       do iy=0,7
+        temp_value(iy,iz) = 0.0d0      ! Zero temporary sums
+       enddo
+      enddo
+c--------------------------------------------------------------------------------------------
+      do iz=0,1              ! Loop over z parity (0=keep sign, 1=change sign)
+       do iy=0,1             ! Loop over y parity (0=keep sign, 1=change sign)
+        do ix=0,1            ! Loop over x parity (0=keep sign, 1=change sign)
+
+
+         do icx=0,1                      ! Loop over x-child cells
+          do icy=0,1                     ! Loop over y-child cells
+           do icz=0,1                    ! Loop over z-child cells
+
+             if (mod(ix*icx+iy*icy+iz*icz,2).eq.0) then
+                  parity = 1.0d0
+             else
+                  parity =-1.0d0
+             endif
+
+             i = 7 - 4*icx -2*icy - icz               ! Calculate which child cell this is.
+
+
+             do iox=0,1                               ! Loop over Legendre basis function type                     
+              do ioy=0,1                              ! Loop over Legendre basis function type
+               do ioz=0,1                             ! Loop over Legendre basis function type
+
+                  j = 4*iox + 2*ioy + ioz
+
+                  if (mod(iox*icx + ioy*icy + ioz*icz,2).eq.0) then
+                       isig =  parity
+                  else
+                       isig = -parity
+                  endif
+
+                  temp_value(j,i) = temp_value(j,i) + isig*positive_octant_lc(j,ix,iy,iz)
+
+               enddo
+              enddo
+             enddo
+
+           enddo   
+          enddo
+         enddo
+
+        enddo
+       enddo
+      enddo
+
+
+c   Assign values of the output variables
+
+      do i=0,7
+       do j=0,7
+         leg_coeff(j,i,nlev) = temp_value(j,i)*usually_rooteighth_factor
+         cell_data(j,i)      = leg_coeff(j,i,nlev)
+       enddo
+      enddo
+
+c   Finally set the independent field values
+    
+      cell_data(8,0) = g(57)
+      cell_data(8,1) = g(58)
+      cell_data(8,2) = g(59)
+      cell_data(8,3) = g(60)
+      cell_data(8,4) = g(61)
+      cell_data(8,5) = g(62)
+      cell_data(8,6) = g(63)
+      cell_data(8,7) = g(64)
+
+
+      return
+      end
+c=================================================================================
+      recursive subroutine reset_lecuyer_state(ldata,lev,xcursor,ycursor,zcursor)
+      use pan_state
+      implicit none
+      
+      type(state_data), intent(inout) :: ldata
+      integer lev
+      integer*8 xcursor,ycursor,zcursor
+
+c      integer indmin,indmax
+c      parameter (indmin=-1, indmax=60)
+c      integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax)
+c      save p_xcursor,p_ycursor,p_zcursor
+      integer i
+c      integer init
+c      data init/0/
+c      save init
+
+      if (ldata%reset_lecuyer_state_init.eq.0) then       ! Initialise p_cursor variables with 
+          ldata%reset_lecuyer_state_init = 1              ! negative values.
+          do i=indmin,indmax
+            ldata%p_xcursor(i) = -9999
+            ldata%p_ycursor(i) = -9999
+            ldata%p_zcursor(i) = -9999
+          enddo
+      endif
+
+      if ( (xcursor.eq.ldata%p_xcursor(lev)).and.(ycursor.eq.ldata%p_ycursor(lev)).and.
+     &      (zcursor.eq.ldata%p_zcursor(lev)+1)) then
+          ldata%p_xcursor(lev) = xcursor
+          ldata%p_ycursor(lev) = ycursor
+          ldata%p_zcursor(lev) = zcursor
+          return
+      endif
+      
+      call advance_current_state(ldata,lev,xcursor,ycursor,zcursor)
+          
+      ldata%p_xcursor(lev) = xcursor
+      ldata%p_ycursor(lev) = ycursor
+      ldata%p_zcursor(lev) = zcursor
+    
+
+      return
+      end
+c=================================================================================
+      recursive subroutine advance_current_state(ldata,lev,x,y,z)
+      use Rand
+      use pan_state
+      !use descriptor_phases
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+
+      integer lev
+      integer*8 x,y,z
+
+      integer*8 lev_range
+
+      TYPE(Rand_offset) :: offset1,offset2
+      TYPE(Rand_offset) :: offset_x,offset_y,offset_z,offset_total
+
+      integer ndiv,nrem
+      integer*8 ndiv8,nrem8
+      integer nfactor
+      parameter (nfactor=291071) ! Value unimportant except has to be > 262144
+
+
+c-----   First some error checking ------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (2)'
+
+      lev_range = 2_dint**lev
+
+
+      if ((x.lt.0).or.(x.ge.lev_range)) then 
+      print*,'x,lev,lev_range',x,lev,lev_range
+      call flush(6)
+      stop 'x out of range!'
+      endif
+      if ((y.lt.0).or.(y.ge.lev_range)) then
+      print*,'y,lev,lev_range',y,lev,lev_range       
+          stop 'y out of range!'
+      endif
+      if ((z.lt.0).or.(z.ge.lev_range)) stop 'z out of range!' 
+c----------------------------------------------------------------------------          
+c
+c Note the Rand_set_offset subroutine takes an integer*4 value
+c for the offset value. For this reason we need to use integer*4
+c values - ndiv,nrem.  As a precaution an explicit check is made
+c to be sure that these values are calculated correctly.
+c---------------------------------------------------------------------------
+
+
+      call Rand_load(ldata%current_state(lev),ldata%base_lev_start(1,lev))
+
+      if (lev.eq.0) return
+
+c     Calculate z-offset
+
+      ndiv = z/nfactor
+      nrem = z - ndiv*nfactor
+      ndiv8 = ndiv
+      nrem8 = nrem
+
+      if (ndiv8*nfactor+nrem8.ne.z)  stop 'Error in z ndiv nrem'
+
+      call Rand_set_offset(offset1,ndiv)
+      offset1 = Rand_mul_offset(offset1,nfactor)
+      call Rand_set_offset(offset2,nrem)
+      offset2 = Rand_add_offset(offset1,offset2)
+      offset_z = Rand_mul_offset(offset2,nmulti_)
+
+c     Calculate y-offset
+
+      ndiv = y/nfactor
+      nrem = y - ndiv*nfactor
+      ndiv8 = ndiv
+      nrem8 = nrem
+
+      if (ndiv8*nfactor+nrem8.ne.y) stop 'Error in y ndiv nrem'
+
+      offset1 =  Rand_mul_offset(ldata%poweroffset(lev),ndiv)
+      offset1 =  Rand_mul_offset(offset1,nfactor)
+      offset2 =  Rand_mul_offset(ldata%poweroffset(lev),nrem)
+      offset_y = Rand_add_offset(offset1,offset2)
+
+c     Calculate x-offset
+
+      ndiv = x/nfactor
+      nrem = x - ndiv*nfactor
+      ndiv8 = ndiv
+      nrem8 = nrem
+
+      if (ndiv8*nfactor+nrem8.ne.x) then
+           print*,'ndiv,nfactor,nrem,x',ndiv,nfactor,nrem,x
+           print*,'ndiv*nfactor+nrem',ndiv*nfactor+nrem
+           print*,'x-ndiv*nfactor-nrem',x-ndiv*nfactor-nrem
+           stop 'Error in x ndiv nrem'
+      endif
+
+      offset1 = Rand_mul_offset(ldata%poweroffset(2*lev),ndiv)
+      offset1 = Rand_mul_offset(offset1,nfactor)
+      offset2 = Rand_mul_offset(ldata%poweroffset(2*lev),nrem)
+      offset_x = Rand_add_offset(offset1,offset2)
+
+      offset1      = Rand_add_offset(offset_x,offset_y)
+      offset_total = Rand_add_offset(offset1, offset_z)
+ 
+      ldata%current_state(lev) = Rand_boost(ldata%current_state(lev),offset_total)
+      
+      return
+      end
+c=================================================================================
+      recursive subroutine return_gaussian_array(ldata,lev,ngauss,garray)
+      use Rand
+      use pan_state
+      implicit none 
+      type(state_data), intent(inout) :: ldata
+      integer lev,ngauss
+      real*8 garray(0:*)
+      TYPE(Rand_state) :: state
+      real*8 PI
+      parameter (PI=3.1415926535897932384d0)
+      real*8 branch
+      parameter (branch=1.d-6)
+      integer iloop
+
+      real*8 temp,mag,ang
+      integer i
+
+      if (mod(ngauss,2).ne.0) 
+     & stop 'Error in return_gaussian_array - even pairs only'
+
+c   First obtain a set of uniformly distributed pseudorandom numbers
+c   between 0 and 1. The method used is described in detail in 
+c   appendix B of Jenkins 2013.
+
+      do i=0,ngauss-1
+       call Rand_real(garray(i),ldata%current_state(lev))
+
+       if (garray(i).lt.branch) then
+          garray(i) = branch
+          state = Rand_boost(ldata%current_state(lev),ldata%superjump)
+          iloop = 0
+ 10       continue
+          call Rand_real(temp,state)
+          iloop = iloop+1
+          if (temp.lt.branch) then
+               garray(i) = garray(i)*branch
+               state = Rand_boost(state,ldata%superjump)
+               if (iloop.gt.100) then
+               print*,'Too may iterations in return_gaussian_array!'
+               call flush(6)
+               stop
+               endif               
+               goto 10
+          else
+               garray(i) = garray(i)*temp
+          endif
+       endif
+      enddo
+
+c     Apply Box-Muller transformation to create pairs of Gaussian
+c     pseudorandom numbers.
+
+      do i=0,ngauss/2-1
+
+       mag = sqrt(-2.0d0*log(garray(2*i)))
+       ang = 2.0d0*PI*garray(2*i+1)
+
+       garray(2*i)   = mag*cos(ang)
+       garray(2*i+1) = mag*sin(ang)
+ 
+      enddo
+      end
+c=================================================================================
+      recursive subroutine parse_descriptor(string,l,ix,iy,iz,side1,side2,side3,check_int,name)
+      implicit none
+      integer nchar
+      parameter(nchar=100)
+      character*100  string
+      integer*4 l,side1,side2,side3,ierror
+      integer*8 ix,iy,iz
+      integer*8 check_int
+      character*20 name
+
+
+      integer i,ip,iq,ir
+
+      ierror = 0
+
+      ip = 1
+      do while (string(ip:ip).eq.' ')
+       ip = ip + 1
+      enddo
+
+      if (string(ip:ip+7).ne.'[Panph1,') then
+           ierror = 1
+           print*,string(ip:ip+7)
+           goto 10
+      endif
+
+      ip = ip+8
+      if (string(ip:ip).ne.'L') then
+          ierror = 2
+          goto 10 
+      endif
+
+      ip = ip+1
+
+      iq  = ip + scan( string(ip:nchar),',') -1
+
+      if (ip.eq.iq) then
+          ierror = 3
+          goto 10
+      endif
+
+
+      read (string(ip:iq),*) l
+
+      ip = iq+1
+      
+      if (string(ip:ip).ne.'(') then
+         ierror = 4
+         goto 10
+      endif
+
+      ip = ip+1
+
+      iq = ip + scan( string(ip:nchar),')') -2
+
+      read(string(ip:iq),*) ix,iy,iz
+
+      ip = iq+2
+      
+      if (string(ip:ip).ne.',') then
+         ierror = 5
+         goto 10
+      endif
+
+      ip = ip+1
+      if ((string(ip:ip).ne.'S').and.(string(ip:ip).ne.'D')) then
+         ierror = 6
+         goto 10
+      endif
+
+      if (string(ip:ip).eq.'S') then
+        ip = ip + 1
+        iq = ip + scan( string(ip:nchar),',') -2
+        read (string(ip:iq),*) side1
+        side2 = side1
+        side3 = side1
+        iq = iq+1
+        if (string(iq:iq+2).ne.',CH') then
+           print*,string(ip:iq),string(iq:iq+2)
+           ierror = 6
+           goto 10
+        endif
+      else
+        ip = ip + 1
+        if (string(ip:ip).ne.'(') then
+           ierror = 7
+           goto 10
+        endif
+
+
+        ip = ip + 1
+        iq = ip + scan( string(ip:nchar),')') -2
+        read (string(ip:iq),*) side1,side2,side3
+
+        iq = iq + 1
+
+        if (string(iq:iq).ne.')') then
+           ierror = 8
+           goto 10
+        endif
+
+        iq = iq + 1
+
+         if (string(iq:iq+2).ne.',CH') then
+            ierror = 9
+            goto 10
+        endif
+
+      endif
+
+      ip = iq + 3
+
+      iq = ip + scan( string(ip:nchar),',') -2
+
+      read (string(ip:iq),*) check_int
+
+      ip = iq + 1
+
+      if (string(ip:ip).ne.',') then
+          ierror = 10
+          goto 10
+      endif
+
+      ip = ip+1
+
+      ir = ip + scan( string(ip:nchar),']') -2
+
+      iq = min(ir,ip+19)
+
+      do i=1,20
+        name(i:i)=' '
+      enddo
+
+      do i=ip,iq
+        name(i-ip+1:i-ip+1) = string(i:i)
+      enddo
+
+      iq = ir + 1
+
+      if (string(iq:iq).ne.']') then
+          ierror = 11
+          goto 10
+      endif
+
+
+ 10   continue
+
+      if (ierror.eq.0) return
+
+      print*,'Error reading panphasian descriptor. Error number:',ierror
+      stop
+
+      return
+      end
+c=================================================================================
+      recursive subroutine compose_descriptor(l,ix,iy,iz,side,check_int,name,string)
+      implicit none
+      integer nchar
+      parameter(nchar=100)
+      character*100,intent(out)::string
+      character*20 name
+      integer*4 l,ltemp
+      integer*8 side
+      integer*8 ix,iy,iz
+      integer*8 check_int
+
+      character*50 temp1,temp2,temp3,temp4,temp5,temp6
+      integer lnblnk
+
+      integer ip1,ip2,ip3,ip4,ip5,ip6
+      
+      ltemp = l
+
+ 5    continue
+      if ((mod(ix,2).eq.0).and.(mod(iy,2).eq.0).and.(mod(iz,2).eq.0).and.(mod(side,2).eq.0)) then
+        ix = ix/2
+        iy = iy/2
+        iz = iz/2
+        side = side/2
+        ltemp = ltemp-1
+        goto 5
+      endif
+
+
+      write (temp1,*) ltemp
+      ip1= scan(temp1,'0123456789')
+      write (temp2,*) ix
+      ip2= scan(temp2,'0123456789')
+      write (temp3,*) iy
+      ip3= scan(temp3,'0123456789')
+      write (temp4,*) iz
+      ip4= scan(temp4,'0123456789')
+      write (temp5,*) side
+      ip5= scan(temp5,'0123456789')
+      write (temp6,*) check_int
+      ip6= scan(temp6,'-0123456789')
+
+
+      string='[Panph1,L'//temp1(ip1:lnblnk(temp1))//',('//temp2(ip2:lnblnk(temp2))
+     &   //','//temp3(ip3:lnblnk(temp3))//','//temp4(ip4:lnblnk(temp4))//'),S'
+     &   // temp5(ip5:lnblnk(temp5))//',CH'//temp6(ip6:lnblnk(temp6))//
+     &  ','//name(1:lnblnk(name))//']'
+ 
+      return 
+
+      end
+c=================================================================================
+      recursive subroutine validate_descriptor(ldata,string,MYID,check_number)
+      use pan_state
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+      character*100 string
+      integer*8 check_number
+      integer MYID
+
+      character*20 phase_name
+      integer*4 lev
+
+      integer*8 ix_abs,iy_abs,iz_abs
+      integer*4 ix_base,iy_base,iz_base
+      
+
+      integer*8 xval,yval,zval
+      integer val_state(5)
+
+      TYPE(Rand_state) :: state
+
+      real*8 rand_num
+      integer*8 mconst,check_total,check_rand
+      parameter(mconst = 2147483647_Dint)
+      integer ascii_list(0:255)
+      integer*8 maxco
+      integer i
+      integer*8 ii
+      integer lnblnk
+      
+
+
+      call parse_descriptor(string,lev,ix_abs,iy_abs,iz_abs,
+     &                  ix_base,iy_base,iz_base,check_rand,phase_name)
+
+c-------------------------------------------------------------------------
+c    Some basic checking
+c-------------------------------------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) then
+            print*,'lev,maxlev',lev,maxlev_
+            call flush(6)
+            stop 'Level out of range! (3)'
+      endif
+
+      if ((mod(ix_abs,2).eq.0).and.(mod(iy_abs,2).eq.0).and.(mod(iz_abs,2).eq.0).and.
+     & (mod(ix_base,2).eq.0).and.(mod(iy_base,2).eq.0).and.(mod(iz_base,2).eq.0)) 
+     &    stop 'Parameters not at lowest level'
+
+
+      maxco = 2_dint**lev
+
+      if (ix_abs.lt.0) stop 'Error: ix_abs negative (2)'
+      if (iy_abs.lt.0) stop 'Error: iy_abs negative (2)'
+      if (iz_abs.lt.0) stop 'Error: iz_abs negative (2)'
+
+
+      if (ix_abs+ix_base.ge.maxco)
+     &   stop 'Error: ix_abs + ix_per out of range.'
+      if (iy_abs+iy_base.ge.maxco) 
+     &   stop 'Error: iy_abs + iy_per out of range.'
+      if (iz_abs+iz_base.ge.maxco) 
+     &   stop 'Error: iz_abs + iz_per out of range.'
+
+      check_total = 0
+
+      call initialise_panphasia(ldata)
+c    First corner
+      xval = ix_abs + ix_base - 1
+      yval = iy_abs
+      zval = iz_abs
+      call advance_current_state(ldata,lev,xval,yval,zval)
+      call Rand_real(rand_num,ldata%current_state(lev))
+      call Rand_save(val_state,ldata%current_state(lev))
+      check_total = check_total + val_state(5)
+      if (MYID.eq.0) print*,'--------------------------------------'
+      if (MYID.eq.0)  print*,'X-corner rand = ',rand_num
+      if (MYID.eq.0) print*,'State:',val_state
+c    Second corner
+      xval = ix_abs
+      yval = iy_abs + iy_base - 1
+      zval = iz_abs
+      call advance_current_state(ldata,lev,xval,yval,zval)
+      call Rand_real(rand_num,ldata%current_state(lev))
+      call Rand_save(val_state,ldata%current_state(lev))
+      check_total = check_total + val_state(5)
+      if (MYID.eq.0)  print*,'Y-corner rand = ',rand_num
+      if (MYID.eq.0) print*,'State:',val_state
+c    Third corner
+      xval = ix_abs
+      yval = iy_abs
+      zval = iz_abs + iz_base - 1
+      call advance_current_state(ldata,lev,xval,yval,zval)
+      call Rand_real(rand_num,ldata%current_state(lev))
+      call Rand_save(val_state,ldata%current_state(lev))
+      check_total = check_total + val_state(5)
+      if (MYID.eq.0)  print*,'z-corner rand = ',rand_num
+      if (MYID.eq.0) print*,'State:',val_state
+      if (MYID.eq.0) print*,'--------------------------------------'
+
+c     Now encode the name.  An integer for each ascii character is generated
+c     starting from the state which gives r0 - the first random number in
+c     Panphasia.   The integer is in the range 0 - m-1.  
+c     After making the list, then loop over non-blank characters
+c     in the name and take the ascii value, and sum the associated numbers.
+c     To avoid simple anagrams giving the same score, weight the integer
+c     by position in the string.  Finally take mod m  - to give the
+c     check number.  
+
+      call Rand_load(state,ldata%base_state)
+
+      do i=0,255
+      call Rand_real(rand_num,state)
+      call Rand_save(val_state,state)
+      ascii_list(i) = val_state(5)
+      enddo
+
+
+
+      do ii=1,lnblnk(phase_name)
+       check_total = check_total + ii*ascii_list(iachar(phase_name(ii:ii)))
+      enddo
+
+
+      check_total = mod(check_total,mconst)
+      if (check_rand.eq.-999) then         ! override the safety check number.
+            check_number = check_total
+            return
+      else
+          if (check_rand.ne.check_total) then
+           print*,'Inconsistency in the input panphasia descriptor ',MYID
+           print*,'Check_rand  = ',check_rand
+           print*,'val_state(5) =',val_state(5)
+           print*,'xval,yval,zval',xval,yval,zval
+           print*,'lev_val =  ',lev
+           call flush(6)
+           stop
+          endif
+      endif
+
+
+      return
+      end
+c=================================================================================
+      recursive subroutine generate_random_descriptor(ldata,string)
+      use Rand
+      use pan_state
+      implicit none
+      type(state_data), intent(inout) :: ldata
+      character*100  string
+      character*100  instring
+      character*20   name
+      integer*4 unix_timestamp
+
+      real*8 lbox
+      real*8 lpanphasia
+      parameter (lpanphasia = 25000000.0)  ! Units of Mpc/h
+      integer level
+      integer*8 cell_dim
+      integer val_state(5)
+
+      TYPE(Rand_state) :: state
+      TYPE(Rand_offset) :: offset
+
+      real*8 rand_num1,rand_num2
+      integer*8 mconst,check_int
+      parameter(mconst = 2147483647_Dint)
+      integer*8 mfac,imajor,iminor
+      parameter(mfac=33554332_Dint)
+      integer ascii_list(0:255)
+      integer i,lnblnk
+      integer*8 ii
+      integer mult
+
+      integer*8 ixco,iyco,izco,irange
+
+      print*,'___________________________________________________________'
+      print*
+      print*,'            Generate a random descriptor                   '
+      print*
+      print*,'The code uses the time (the unix timestamp) plus some extra '
+      print*,'information personal to the user to choose a random region  '
+      print*,'within PANPHASIA.  The user must also specify the side length'
+      print*,'of the cosmological volume. The code assumes that the whole of'
+      print*,'PANPHASIA is 25000 Gpc/h on a side and selects an appropriate '
+      print*,'level in the octree for the descriptor.  '
+      print*,'Assuming this scaling the small scale power is defined down '
+      print*,'to a mass scale of around 10^{-12} solar masses.'
+      print*      
+      print*,'The user must also specify a human readable label for the '
+      print*,'descriptor of less than 21 characters.'
+      print*,'___________________________________________________________'
+      print*
+      print*,'Press return to continue '
+      read (*,*) 
+      print*
+      print*,'___________________________________________________________'
+      print*,'Enter the box side-length in Mpc/h units'
+      read (*,*) lbox
+      print*,'___________________________________________________________'
+      print*
+      print*
+ 5    continue
+      print*,'Enter up to 20 character name to label the descriptor (no spaces)'
+      read (*,'(a)') name
+      if ((len_trim(instring).lt.21).or.(scan(name,' ').le.len_trim(name))) goto 5
+      print*,'___________________________________________________________'
+      print*
+      print* 
+      print*,'___________________________________________________________'
+      print*,'The phases for the simulation are described by whole octree '
+      print*,'cells. Enter an odd integer that defines the number of cells '
+      print*,'you require in one dimension.  Choose this number carefully  '
+      print*,'as it will limit the possible 1-D sizes of  the of the Fourier '
+      print*,'transforms that can be used to make initial conditions to a product '
+      print*,'of this integer times any power of two. In which case the only'
+      print*,'choice is 1.)'
+      print*,'(I would recommend 3 unless the initial condition code is'
+      print*,'incapable of using grid sizes that are not purely powers of two.'
+      print*,'___________________________________________________________'
+      print*
+ 7    continue
+      print*,'Enter number of octree cells on an edge (positive odd number only) '
+      read (*,*) cell_dim
+      if ((cell_dim.le.0).or.(mod(cell_dim,2).eq.0)) goto 7
+      print*,'___________________________________________________________' 
+      call system('date +%s>tempfile_42526037646')
+      open(16,file='tempfile_42526037646',status='old')
+      read (16,*) unix_timestamp
+      close(16)
+      call system('/bin/rm tempfile_42526037646') 
+
+      print*,'Unix_timestamp determined. Value: ',unix_timestamp
+      print*,'___________________________________________________________'
+      print*
+      print*
+      print*
+      print*,'___________________________________________________________'
+      print*,'The code has just read the unix timestamp and will use this'
+      print*,'to help choose a random region in PANPHASIA.  Although it is'
+      print*,'perhaps unlikely that someone else is also running this code at '
+      print*,'the same time to the nearest second, to make it more likely'
+      print*,' still that the desciptor to be generated is unique'
+      print*,'please enter your name or some other piece of information'
+      print*,'below that you think is unlikely to be used by anyone else'
+      print*,'___________________________________________________________'
+
+      print*
+
+ 10   continue
+      print*,'Please enter your name (a minimum of six characters)'
+      read (*,'(a)') instring                         !'
+      if (len_trim(instring).lt.6) goto 10
+
+      level =  int(log10(dble(cell_dim)*lpanphasia/lbox)/log10(2.0d0))
+
+      if (level.gt.50) stop 'level >50 '
+
+
+
+c      'd' lines allow the generation of a large set of
+c       descriptors. Use to check that they are randomly
+c       positioned over the available volume.
+
+
+c    First use the unix timestamp to initialises the
+c    random generator.
+
+      call Rand_seed(state,unix_timestamp)
+      
+      call Rand_save(ldata%base_state,state)
+     
+
+c   First generate an integer from the user data.
+      call Rand_load(state,ldata%base_state)
+
+      do i=0,255
+      call Rand_real(rand_num1,state)
+      call Rand_save(val_state,state)
+      ascii_list(i) = val_state(5)
+      enddo
+
+      call Rand_set_offset(offset,1)
+
+      do ii=1,lnblnk(instring)
+       mult = mod(ii*ascii_list(iachar(instring(ii:ii))),mconst)
+       offset =  Rand_mul_offset(offset,mult)
+      enddo
+
+      call Rand_load(state,ldata%base_state)
+      state = Rand_boost(state,offset)          ! Starting point for choosing location.
+
+ 20   continue
+
+      irange = 2_Dint**level
+      imajor = irange/mfac
+      iminor = mod(irange,mfac)
+
+      call Rand_real(rand_num1,state)
+      call Rand_real(rand_num2,state)
+
+      ixco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor)
+
+      if (ixco+cell_dim.ge.irange) goto 20      ! Invalid descriptor
+
+      call Rand_real(rand_num1,state)
+      call Rand_real(rand_num2,state)
+
+      iyco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor)
+
+      if (iyco+cell_dim.ge.irange) goto 20      ! Invalid descriptor
+
+      call Rand_real(rand_num1,state)
+      call Rand_real(rand_num2,state)
+
+      izco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor)
+
+      if (izco+cell_dim.ge.irange) goto 20      ! Invalid descriptor
+
+
+c     Value of the check digit is not known. Use validate_descriptor to compute it.
+
+      check_int = -999  ! Special value required to make validate_descriptor 
+                        ! return the check digit.
+
+      call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string)
+
+      call validate_descriptor(ldata,string,-1,check_int)
+
+      call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string)
+
+
+      return
+      end
+c=================================================================================
+      recursive subroutine demo_basis_function_allocator
+
+      implicit none
+      integer nmax
+      parameter (nmax=10)
+
+      integer*4 wn_level(nmax)
+
+      integer*8 ix_abs(nmax),iy_abs(nmax),iz_abs(nmax)
+      integer*8 ix_per(nmax),iy_per(nmax),iz_per(nmax)
+      integer*8 ix_rel(nmax),iy_rel(nmax),iz_rel(nmax)
+      integer*8 ix_dim(nmax),iy_dim(nmax),iz_dim(nmax)
+
+      integer ix,iy,iz,nref
+      integer layer_min,layer_max,indep_field
+
+
+      integer*8 itot_int,itot_ib
+
+      integer inv_open
+
+c      Assign some trial values
+
+      nref = 3
+      inv_open=9
+
+      wn_level(1) = 22
+
+      ix_abs(1) = 2000000
+      iy_abs(1) = 1500032
+      iz_abs(1) = 2500032
+
+      ix_per(1) = 768
+      iy_per(1) = 768
+      iz_per(1) = 768
+
+      ix_rel(1) = 0
+      iy_rel(1) = 0
+      iz_rel(1) = 0
+
+      ix_dim(1) = 768
+      iy_dim(1) = 768
+      iz_dim(1) = 768
+
+
+      wn_level(2) = 23
+
+      ix_abs(2) = 4000000
+      iy_abs(2) = 3000064
+      iz_abs(2) = 5000064
+
+      ix_per(2) = 1536
+      iy_per(2) = 1536
+      iz_per(2) = 1536
+
+      ix_rel(2) = 256
+      iy_rel(2) = 16
+      iz_rel(2) = 720
+
+      ix_dim(2) = 768
+      iy_dim(2) = 768
+      iz_dim(2) = 768
+
+
+      wn_level(3) = 24
+
+      ix_abs(3) = 8000000
+      iy_abs(3) = 6000128
+      iz_abs(3) = 10000128
+
+      ix_per(3) = 3072
+      iy_per(3) = 3072
+      iz_per(3) = 3072
+
+      ix_rel(3) = 896
+      iy_rel(3) = 432
+      iz_rel(3) = 1840
+
+      ix_dim(3) = 768
+      iy_dim(3) = 768
+      iz_dim(3) = 768
+
+
+      itot_int = 0
+      itot_ib  = 0
+
+
+
+
+      open(10,file='ascii_dump_r1',status='unknown')
+
+      ix=320
+      do iy=0,767
+       do iz=0,767
+        call layer_choice(ix,iy,iz,1,nref,ix_abs,iy_abs,iz_abs,
+     &   ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,
+     &   wn_level,inv_open,layer_min,layer_max,indep_field) 
+        write(10,*) iy,iz,layer_min,layer_max,indep_field
+       enddo
+      enddo
+      close(10)
+
+      open(10,file='ascii_dump_r2',status='unknown')
+
+      ix=384
+      do iy=0,767
+       do iz=0,767
+        call layer_choice(ix,iy,iz,2,nref,ix_abs,iy_abs,iz_abs,
+     &   ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,
+     &   wn_level,inv_open,layer_min,layer_max,indep_field) 
+        write(10,*) iy,iz,layer_min,layer_max,indep_field
+       enddo
+      enddo
+      close(10)
+
+      open(10,file='ascii_dump_r3',status='unknown')
+
+      ix=384
+      do iy=0,767
+       do iz=0,767
+        call layer_choice(ix,iy,iz,3,nref,ix_abs,iy_abs,iz_abs,
+     &   ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,
+     &   wn_level,inv_open,layer_min,layer_max,indep_field) 
+        write(10,*) iy,iz,layer_min,layer_max,indep_field
+       enddo
+      enddo
+      close(10)
+      end
+c=================================================================================    
+      recursive subroutine layer_choice(ix0,iy0,iz0,iref,nref,
+     &  ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &  ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,
+     &  wn_level,x_fact,layer_min,layer_max,indep_field)
+      implicit none
+
+      integer ix0,iy0,iz0,iref,nref,isize,ibase
+      integer ix,iy,iz,irefplus
+      integer ione
+
+      integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref)
+      integer*8 ix_per(nref),iy_per(nref),iz_per(nref)
+      integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref)
+      integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref)
+
+      integer wn_level(nref)
+      integer layer_min,layer_max,indep_field,x_fact
+      integer idebug
+
+
+      integer interior,iboundary
+
+      if (iref.eq.9999) then
+         idebug = 1
+      else
+         idebug = 0
+      endif
+
+      ione =  1
+
+      irefplus = min(iref+1,nref)
+
+      if (nref.eq.1) then            ! Deal with simplest case
+         layer_min = 0
+         layer_max = wn_level(1)
+         indep_field  = 1
+         if (idebug.eq.1) print*,'return 1'
+         return
+      endif 
+
+c-----------  Case of the top periodic refinement.  For this refinement layer_min=0 as
+c-----------  all the larger basis functions must be included.  By default layer_max
+c-----------  is set to wn_level(1) so all basis functions are included. A check is
+c-----------  made to determine if the lowest basis function can be included in the
+c-----------  next refinement. If it can the same process is repeated for the next
+c-----------  largest basis function and this is repeated until a failure occurs.
+
+      if ((iref.eq.1).and.(nref.gt.1)) then
+         ibase = 1
+ 10      continue
+         
+         ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref)
+         iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref)
+         iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref)
+         isize = ishft(ione,ibase)
+
+         call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level,
+     &   ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &   ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact,
+     &   interior,iboundary) 
+
+         if ((interior.eq.1).and.(iboundary.eq.1)) then
+            ibase = ibase + 1
+            goto 10
+         endif
+
+           layer_min = 0
+           layer_max = wn_level(iref) - ibase + 1
+         if (layer_max.ne.wn_level(iref)) then
+           indep_field = 0
+         else
+           indep_field = 1
+         endif
+
+         if (idebug.eq.1) then
+         print*,'iref,wn_level(iref)',iref,wn_level(iref)
+         print*,'Return 2',layer_min,layer_max,indep_field
+         endif
+
+         return
+      endif
+c------------------------------------------------------------------------------------------
+c------------------------------------------------------------------------------------------
+
+
+c-----------  For second or higher refinement determine layer_min by reference 
+c-----------  to itself.  In this case the loop continues until a basis function
+c------------ is found which fits in a larger refinement
+   
+         ibase = 1
+
+ 20      continue
+
+
+         ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref)
+         iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref)
+         iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref)
+         isize = ishft(ione,ibase)
+
+         call inref(ix,iy,iz,isize,iref,iref,nref,wn_level,
+     &   ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &   ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact,
+     &   interior,iboundary) 
+
+         if ((interior.eq.1).and.(iboundary.eq.1)) then
+            ibase = ibase + 1
+            goto 20
+         endif
+
+         layer_min = wn_level(iref) - max(ibase-2,0)         ! Take last suitable refinement
+
+
+c-----------  For an intermediate refinement define layer_max by reference to
+c-----------  the next refinement
+
+         if (iref.lt.nref) then
+         ibase = 1
+
+ 30          continue
+
+            ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref)
+            iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref)
+            iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref)
+            isize = ishft(ione,ibase)
+
+            call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level, 
+     &      ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &      ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact,
+     &      interior,iboundary) 
+
+            if ((interior.eq.1).and.(iboundary.eq.1)) then
+               ibase = ibase + 1
+               goto 30
+            endif
+
+            layer_max = wn_level(iref) - ibase + 1
+
+            if (layer_min.eq.wn_level(iref)) then
+               indep_field = 1
+            else
+               indep_field = 0
+            endif
+         else
+            layer_max = wn_level(iref)
+            indep_field  = 1
+         endif
+
+         if (idebug.eq.1) then
+           print*,'Return 3'
+           print*,'layer_min,layer_max,indep_field',layer_min,layer_max,indep_field
+           print*,'interior,iboundary',interior,iboundary
+           print*,'ibase = ',ibase
+           print*,'iref,nref,wn_level(iref)',iref,nref,wn_level(iref)
+         endif
+
+
+         return
+     
+      end
+
+
+
+
+c   The function takes a given basis function specified by a corner ixc,iyc,izc
+c   and a size isz at level wn_c in the oct-tree and returns two integer values.
+c   (i)  interior:     
+c                  Value 1 if the basis function is completely within the given
+c                  refinement.
+c
+c                  Value 0 if the basis function is without the refinement, or
+c                  overlaps the edges of the refinement, or the edges of the
+c                  primary white noise patch.
+c
+c   (ii) iboundary:
+c                  Value 1 if the basis function is sufficiently far from the
+c                  refinement boundary.
+c
+c                  Value 0 otherwise.
+c   The given refinement is defined at level wn_r in the oct-tree and by the variables
+c   (ix_rel,iy_rel,iz_rel) which give the location of the refinement relative to
+c   corner of the white noise patch, (ix_per,iy_per,iz_per) which define the
+c   periodicity of the white noise patch, and (ix_dim,iy_dim,iz_dim) which
+c   define the size of the refinement.
+c
+c
+c
+c=================================================================================
+      recursive subroutine inref(ixc,iyc,izc,isz,ir1,ir2,nref,wn_level,
+     &   ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per,
+     &   ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact,
+     &   interior,iboundary)
+      implicit none
+
+      integer nref
+      integer ixc,iyc,izc,isz,ir1,ir2
+      integer wn_level(nref)
+      integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref)
+      integer*8 ix_per(nref),iy_per(nref),iz_per(nref)
+      integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref)
+      integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref)
+      integer interior, iboundary
+      integer x_fact
+     
+      integer*8 ixco,iyco,izco,isize
+      integer*8 ixref0,iyref0,izref0
+      integer*8 ixref1,iyref1,izref1
+      integer*8 idist
+
+      integer delta_wn
+
+c   Error checking
+      if (ir2.lt.ir1) stop 'ir2<ir1'
+      if ((ir1.lt.1).or.(ir2.gt.nref)) 
+     &  stop 'Either/or ir1,ir2 out of range'
+
+c  First copy coordinates to integer*8 variables
+
+      ixco = ixc
+      iyco = iyc
+      izco = izc
+      isize= isz
+
+      delta_wn = wn_level(ir2)-wn_level(ir1)
+
+c  Now translate coordinates from refinement ir1 to ir2 and express relative
+c  to the origin of refinement 2.
+
+      ixco =     ishft(ix_abs(ir1)+ix_rel(ir1)+ixco,delta_wn)-ix_abs(ir2)-ix_rel(ir2)
+      iyco =     ishft(iy_abs(ir1)+iy_rel(ir1)+iyco,delta_wn)-iy_abs(ir2)-iy_rel(ir2)
+      izco =     ishft(iz_abs(ir1)+iz_rel(ir1)+izco,delta_wn)-iz_abs(ir2)-iz_rel(ir2)
+      isize=     ishft(isize,delta_wn)
+
+      ixref0 = mod(ix_per(ir2) + ixco, ix_per(ir2))
+      iyref0 = mod(iy_per(ir2) + iyco, iy_per(ir2))
+      izref0 = mod(iz_per(ir2) + izco, iz_per(ir2))
+
+      if ((ixref0.ge.ix_dim(ir2)).or.(iyref0.ge.iy_dim(ir2)).or.(izref0.ge.iz_dim(ir2))) then !
+           interior =  0
+          iboundary =  0
+          return          ! The basis function is not inside the refinement
+      endif
+
+      ixref1 = mod(ix_per(ir2) + ixco + isize, ix_per(ir2))
+      iyref1 = mod(iy_per(ir2) + iyco + isize, iy_per(ir2))
+      izref1 = mod(iz_per(ir2) + izco + isize, iz_per(ir2))
+
+      if ((ixref1.ge.ix_dim(ir2)).or.(iyref1.ge.iy_dim(ir2)).or.(izref1.ge.iz_dim(ir2))) then ! Location not in refinement
+           interior =  0
+          iboundary =  0
+          return          ! The basis function is not inside the refinement
+      endif
+
+c     The basis function is within the refinement. Now calculate the 
+c     minimum perpendicular distance of the basis function from the 
+c     edge of the refinement.
+
+
+      idist = min(ixref0,ix_dim(ir2)-ixref1,iyref0,iy_dim(ir2)-iyref1, izref0,iz_dim(ir2)-izref1)
+
+      if (idist.gt.x_fact*isize) then  
+          iboundary = 1                ! Sufficiently far from the boundary
+      else
+          iboundary = 0
+      endif
+
+c  Final check - does the basis function reside entirely in the white noise patch.
+
+      ixref0 = mod(ix_rel(ir2)+ixco       ,ix_per(ir2))
+      ixref1 = mod(ix_rel(ir2)+ixco +isize,ix_per(ir2))
+
+      iyref0 = mod(iy_rel(ir2)+iyco       ,iy_per(ir2))
+      iyref1 = mod(iy_rel(ir2)+iyco +isize,iy_per(ir2))
+
+      izref0 = mod(iz_rel(ir2)+izco       ,iz_per(ir2))
+      izref1 = mod(iz_rel(ir2)+izco +isize,iz_per(ir2))
+
+      if ((ixref1.le.ixref0).or.(iyref1.le.iyref0).or.(izref1.le.izref0)) then
+         interior  = 0
+         iboundary = 0
+         return          ! Basis function not completely in the refinement:
+      endif              ! crosses white noise patch boundary.
+      
+      interior = 1    ! Basis function is completely within the refinement
+
+      return   
+      end
+c==========================================================================================
+      recursive subroutine set_local_box(ldata,lev,ix_abs,iy_abs,iz_abs,
+     & ix_per,iy_per,iz_per, ix_rel,iy_rel,iz_rel,wn_level_base,check_rand,phase_name,MYID)
+      use pan_state
+      !use descriptor_phases
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+      !integer  layer_min,layer_max,indep_field
+      !common /oct_range/  layer_min,layer_max,indep_field
+
+
+      integer lev
+      integer*8 ix_abs,iy_abs,iz_abs
+      integer*8 ix_per,iy_per,iz_per
+      integer*8 ix_rel,iy_rel,iz_rel
+      integer*8 xorigin,yorigin,zorigin
+      integer wn_level_base
+      integer*8 check_rand
+      character*20 phase_name
+      integer MYID
+      integer*8 maxco
+      integer i
+      integer px,py,pz
+
+      integer*8 xval,yval,zval,val_side
+      integer lev_val
+      character*100 outstring
+      integer lnblnk
+      integer*8 mconst
+      parameter(mconst = 2147483647_Dint)
+c-------------------------------------------------------------------------
+
+      call initialise_panphasia(ldata)
+
+c-------------------------------------------------------------------------
+c    Error checking
+c-------------------------------------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (4)'
+
+      maxco = 2_dint**lev
+
+      if (ix_abs.lt.0) stop 'Error: ix_abs negative (3)'
+      if (iy_abs.lt.0) stop 'Error: iy_abs negative (3)'
+      if (iz_abs.lt.0) stop 'Error: iz_abs negative (3)'
+
+      if (ix_rel.lt.0) stop 'Error: ix_rel negative (2)'
+      if (iy_rel.lt.0) stop 'Error: iy_rel negative (2)'
+      if (iz_rel.lt.0) stop 'Error: iz_rel negative (2)'
+
+      if (ix_abs+ix_rel.ge.maxco)
+     &   stop 'Error: ix_abs + ix_rel out of range. (2)'
+      if (iy_abs+iy_rel.ge.maxco) 
+     &   stop 'Error: iy_abs + iy_rel out of range. (2)'
+      if (iz_abs+iz_rel.ge.maxco) 
+     &   stop 'Error: iz_abs + iz_rel out of range. (2)'
+c-----------------------------------------------------------------------------
+c  To allow the local box to wrap around, if needed, define a series of eight
+c  'origins'.  For many purposes (ix,iy,iz) = (0,0,0) is the only origin needed.
+c-----------------------------------------------------------------------------
+
+      do px=0,1
+       do py=0,1
+        do pz=0,1
+
+         xorigin = max(0,( ix_abs + ix_rel - px*ix_per )/2)
+         yorigin = max(0,( iy_abs + iy_rel - py*iy_per )/2)
+         zorigin = max(0,( iz_abs + iz_rel - pz*iz_per )/2)
+
+         ldata%ixshift(px,py,pz) = max(0, ix_abs + ix_rel -px*ix_per) - 2*xorigin
+         ldata%iyshift(px,py,pz) = max(0, iy_abs + iy_rel -py*iy_per) - 2*yorigin
+         ldata%izshift(px,py,pz) = max(0, iz_abs + iz_rel -pz*iz_per) - 2*zorigin
+
+
+c        Store box details:  store the positions at level lev-1
+  
+
+         ldata%xorigin_store(px,py,pz) = xorigin
+         ldata%yorigin_store(px,py,pz) = yorigin
+         ldata%zorigin_store(px,py,pz) = zorigin
+
+        enddo
+       enddo
+      enddo
+
+      ldata%lev_common = lev
+
+
+      ldata%ix_abs_store = ix_abs
+      ldata%iy_abs_store = iy_abs
+      ldata%iz_abs_store = iz_abs
+
+      ldata%ix_per_store = ix_per
+      ldata%iy_per_store = iy_per
+      ldata%iz_per_store = iz_per
+
+      ldata%ix_rel_store = ix_rel
+      ldata%iy_rel_store = iy_rel
+      ldata%iz_rel_store = iz_rel
+
+c------  Now validate the panphasian descriptor ---------------------------------
+c-----   Use lowest level possible
+      lev_val = wn_level_base
+      xval = ix_abs/2_dint**(lev-lev_val)
+      yval = iy_abs/2_dint**(lev-lev_val)
+      zval = iz_abs/2_dint**(lev-lev_val)
+      val_side = ix_per/2_dint**(lev-lev_val)
+      call compose_descriptor(lev_val,xval,yval,zval,val_side,check_rand,phase_name,outstring)
+      print*,'blabla: ',outstring
+      call validate_descriptor(ldata,outstring,-1,check_rand)
+c--------------------------------------------------------------------------------         
+ 
+c  Reset all cursor values to negative numbers.
+
+      do i=0,maxdim_
+       ldata%xcursor(i) = -999
+       ldata%ycursor(i) = -999
+       ldata%zcursor(i) = -999
+      enddo
+
+      if (MYID.lt.1) then
+         print*,'----------------------------------------------------------'
+         print*,'Successfully initialised Panphasia box at level ',lev
+         write (6,105) ix_abs,iy_abs,iz_abs
+         write (6,106) ix_rel,iy_rel,iz_rel
+         write (6,107) ix_per,iy_per,iz_per
+         write (6,*)  'Phases used: ',outstring(1:lnblnk(outstring))
+         print*,'----------------------------------------------------------'
+      endif
+ 105  format(' Abs origin: (',i12,',',i12,',',i12,')')
+ 106  format(' Rel origin: (',i12,',',i12,',',i12,')')
+ 107  format(' Periods   : (',i12,',',i12,',',i12,')') 
+
+c  Set default values 
+
+      ldata%layer_min =  0
+      ldata%layer_max =  lev
+      ldata%indep_field= 1
+      end 
+c=================================================================================
+
+
+
+
+c-------------------------------------------------------------------------------
+c    The goal of this function is to replace the call to return_gaussian_array in
+c    return_cell_props with an equivalent call that returns the Legendre
+c    blocks for a special spherically symmetric function defined below.
+c-------------------------------------------------------------------------------
+      recursive subroutine return_oct_sf_expansion(ldata,ii,lev,x,y,z,ndim,garray)
+      use pan_state
+      implicit none
+
+      type(state_data), intent(inout) :: ldata
+
+      integer ii,jj
+      integer lev,ndim
+      real*8 garray(0:ndim-1)
+      integer*8 x,y,z 
+      real*8 xorig,yorig,zorig
+      real*8 cell_data(0:8,0:7) 
+
+c  Debugging variables ....      
+
+      integer*8 xtemp,ytemp,ztemp
+      integer ndimension
+     
+      integer*8 pstore,xstore,ystore,zstore
+
+
+      integer i,j
+c-------------------------------------------------------------------------------
+      real*8 length,cube_centre(3),oct_cell_data(0:8,0:7)
+      integer*8 lev_range
+c-----   First some error checking ------------------------------------------
+      if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (2)'
+      lev_range = 2_dint**lev
+      if ((x.lt.0).or.(x.ge.lev_range)) then 
+      print*,'x,lev,lev_range',x,lev,lev_range
+      call flush(6)
+      stop 'x out of range!'
+      endif
+      if ((y.lt.0).or.(y.ge.lev_range)) then
+      print*,'y,lev,lev_range',y,lev,lev_range       
+          stop 'y out of range!'
+      endif
+      if ((z.lt.0).or.(z.ge.lev_range)) stop 'z out of range!' 
+c----------------------------------------------------------------------------
+c     Define cell centre and cell size and get Legendre block coefficients
+c     for an octree function expansion of a single layer of the octree
+c----------------------------------------------------------------------------
+
+
+      length = 1.0/dble(ldata%ix_per_store)*2.0d0**(1+lev-ii) 
+
+      xorig = dble(ldata%ix_abs_store)/2.0d0**(1+lev-ii) 
+      yorig = dble(ldata%iy_abs_store)/2.0d0**(1+lev-ii) 
+      zorig = dble(ldata%iz_abs_store)/2.0d0**(1+lev-ii) 
+
+      cube_centre(1) =  (dble(x)-xorig+0.5d0)*length
+      cube_centre(2) =  (dble(y)-yorig+0.5d0)*length
+      cube_centre(3) =  (dble(z)-zorig+0.5d0)*length
+
+
+c----------------------------------------------------------------------------
+       call octree_expansion(cube_centre,length,ndim,garray)
+c--------------------------------------------------------------------------------
+
+      return
+      end
+
+
+
+c-------------------------------------------------------------------------------
+c   Expand function of interest in octree basis functions. The
+c   result returned is the superposition of the octree functions
+c   at a single octree level, expressed as Legendre block
+c   functions
+c-------------------------------------------------------------------------------    
+      recursive subroutine octree_expansion(cube_centre,length,ndim,q)
+      implicit none
+      real*8 cube_centre(3),length,oct_cell_data(0:8,0:7)
+
+      real*8 local_centre(3), small_data(0:8,0:7),small_len
+      real*8 temp_data(0:8)
+
+      real*8 moment(0:7)
+      integer ndim
+
+
+      real*8 p(0:7),q(ndim)
+
+
+      integer ix,iy,iz,ind1,ind2
+      integer i,i1,i2,i3
+      integer isign
+
+      small_len = 0.5d0*length
+
+      do i1=0,1
+       do i2=0,1
+        do i3=0,1
+          ind2 = 4*i1 + 2*i2 + i3
+          local_centre(1) = cube_centre(1)+0.25d0*dble(2*i1-1)*length
+          local_centre(2) = cube_centre(2)+0.25d0*dble(2*i2-1)*length
+          local_centre(3) = cube_centre(3)+0.25d0*dble(2*i3-1)*length
+          call spherical_perturbation(local_centre,small_len,temp_data)
+           do i=0,8
+            small_data(i,ind2) = temp_data(i)
+          enddo
+        enddo
+       enddo
+      enddo
+
+
+      call expand_octree_coefficients(small_data,p,q)
+
+      return
+      end
+
+
+
+
+
+      recursive subroutine spherical_perturbation(cube_centre,length,cell_data)
+      implicit none
+      real*8 cube_centre(3),length,cell_data(0:8)
+      integer nfeature, nuse 
+
+      parameter (nfeature=5,nuse=1)
+      
+      real*8 centre(3), amplitude(nfeature), sigma(nfeature)
+
+      integer i,j
+      real*8 cell_data_temp(0:8)
+      real*8 pcentre(3),scaled_length
+      real*8 CellVolume
+      real*8 prefac0,prefac1,prefac2,prefac3
+
+c   Set the parameters of the perturbation.  The periodic volume is
+c   a cube of unit length, occupying the positive coordinate octant
+
+      centre(1) = 0.60226666666d0
+      centre(2) = 0.4025d0
+      centre(3) = 0.5393d0
+     
+      amplitude(1) = 1.0d0
+      sigma(1)     = 0.05d0
+
+      amplitude(2) = 1.5d0
+      sigma(2)     = 0.02d0
+
+      amplitude(3) = 0.2d0
+      sigma(3)     = 0.002d0
+
+      amplitude(4) = 0.25d0
+      sigma(4)     = 0.00024d0
+
+      amplitude(5) = 0.3d0
+      sigma(5)     = 0.00003d0
+
+      do i=0,8
+       cell_data(i) = 0.0d0
+      enddo
+      
+      do j=1,nuse
+       do i=1,3
+        pcentre(i) = (cube_centre(i)-centre(i))/sigma(j)
+       enddo
+        scaled_length = length/sigma(j)
+
+        CellVolume = length**3
+
+ 
+       call evaluate_3d_integrals(pcentre,scaled_length,cell_data_temp)
+
+
+c   Scaling factors for change of variables in 3-D integration from length to scaled_length
+
+       prefac0 = amplitude(j)/scaled_length**3
+       prefac1 = amplitude(j)/scaled_length**4
+       prefac2 = amplitude(j)/scaled_length**5
+       prefac3 = amplitude(j)/scaled_length**6
+
+       cell_data(0) = cell_data(0)+prefac0*cell_data_temp(0)*sqrt(CellVolume)  !p000
+       cell_data(1) = cell_data(1)+prefac1*cell_data_temp(1)*sqrt(CellVolume)  !p001
+       cell_data(2) = cell_data(2)+prefac1*cell_data_temp(2)*sqrt(CellVolume)  !p010
+       cell_data(3) = cell_data(3)+prefac2*cell_data_temp(3)*sqrt(CellVolume)  !p011
+       cell_data(4) = cell_data(4)+prefac1*cell_data_temp(4)*sqrt(CellVolume)  !p100
+       cell_data(5) = cell_data(5)+prefac2*cell_data_temp(5)*sqrt(CellVolume)  !p101
+       cell_data(6) = cell_data(6)+prefac2*cell_data_temp(6)*sqrt(CellVolume)  !p110
+       cell_data(7) = cell_data(7)+prefac3*cell_data_temp(7)*sqrt(CellVolume)  !p111
+ 
+       cell_data(8) = 0.0d0
+
+      enddo
+
+
+      return
+      end
+
+
+
+
+
+
+c---------------------------------------------------------------------------
+c  CODE WRITTEN BY ADRIAN JENKINS -   AUGUST 2015
+c  ORCiD:    http://orcid.org/0000-0003-4389-2232
+c---------------------------------------------------------------------------
+      recursive subroutine evaluate_3d_integrals(pos_cen,len,cell_data)
+c---------------------------------------------------------------------------
+c  GOAL
+c---------------------------------------------------------------------------
+c
+c
+c  To expand the overdensity: 
+c      
+c            rho = (3-r^2)exp(-r^2/2) 
+c
+c   In terms of Legendre basis functions (Jenkins 2013)
+c
+c
+c
+c  This overdensity function is taken from Jenkins 2010.
+c
+c  It is easy to compute the Zeldovich and 2lpt displacements generated
+c  by this function.  It can be used to test initial condition 
+c  generator codes.
+c
+c  The coefficients are computed by a 3-d integral over a cell volume. 
+c  However the integral can be written as a sum of products of 1-d integrals over x,y or z
+c  coordinates, and the 1-d integrals can all be expressed in terms
+c  of incomplete Gamma functions
+c
+c---------------------------------------------------------------------------
+      implicit none
+      real*8 pos_cen(3),len,cell_data(0:8)
+
+      real*8 pos_min(3),pos_max(3)
+      real*8 abs_u_min(3),abs_u_max(3)
+      real*8 a(0:3),c(0:3),s(0:3),p_min,p_max
+      real*8 stretch
+      real*8 gammp
+
+      real*8 si(0:3,3),ti(0:3,3)
+
+      real*8 Coeff000,Coeff001,Coeff010,Coeff011
+      real*8 Coeff100,Coeff101,Coeff110,Coeff111
+
+      integer i,j,n
+
+c---------------------------------------------------------------------------
+c  SET UP ALL COEFFICIENTS
+c---------------------------------------------------------------------------
+c   Normalising coefficients for each integral
+c
+c   Each coefficient is the product of two numbers
+c   (i) a coefficient from a Legendre block function (Jenkins 2013)
+c       for c(0) and c(2) this is unity, for c(1) and c(3) this
+c       is the sqrt(3)
+c       
+c   (ii) A coefficient of 2**( (n-1)/2)) \Gamma[ (n+1)/2]
+c        for the integral  \int x^n exp(-x^2/2) dx  - 
+c        which comes from the substitution u = x^2/2
+c        and the definition of a incomplete Gamma function
+c
+c      P(a,x) = 1/\Gamma(a) * \int_0^t exp(-t) t^{a-1} dt
+c
+c
+c---------------------------------------------------------------------------
+c
+ 
+
+      c(0) = sqrt(3.1415926535897932d0/2.0d0)
+      c(1) = 1.0d0
+      c(2) = c(0)
+      c(3) = 2.0d0
+
+c   Define a(n) = (n+1)/2
+
+      a(0) = 0.5d0
+      a(1) = 1.0d0
+      a(2) = 1.5d0
+      a(3) = 2.0d0
+
+c   The substitution used to convert the desired integral into the incomplete
+c   Gamma function (shown below) 'looses' the signs of the limits.  The n=0 and 2 the
+c   desired integrand is symmetric about the origin, while for n=1 and 3,
+c   it is antisymmetric.  The array 's' below encodes this information so
+c   that the definite integral is evaluated correctly for positive or
+c   negative values in pos_min and pos_max.  The fortran sign function
+c   is used to carry the sign of the pos_min and pos_max arguments.
+
+      s(0) = -1.0d0
+      s(1) = 1.0d0
+      s(2) = -1.0d0
+      s(3) = 1.0d0
+
+c----------------------------------------------------------------------------
+c    Change of variable - from substitution in the integral above  (u=x^2/2)
+
+      stretch = pos_cen(1)**2 + pos_cen(2)**2 + pos_cen(3)**2
+
+
+      do i=1,3
+        pos_min(i) = pos_cen(i) - 0.5d0*len
+        pos_max(i) = pos_cen(i) + 0.5d0*len
+
+        abs_u_min(i) = 0.5d0 * pos_min(i)**2
+        abs_u_max(i) = 0.5d0 * pos_max(i)**2
+      enddo
+c----------------------------------------------------------------------------
+
+
+
+c----------------------------------------------------------------------------
+c    Create a 4x3 matrix of integrals  
+c    First index n, second index coordinate (1=x,2=y,3=z)
+c
+c    si(n,j) = \int p^n exp(-p*p/2) dp     (for j=1,2,3, p = x,y,z)
+c                                               n=0,1,2,3
+c----------------------------------------------------------------------------
+
+      do n=0,3  
+       do j=1,3
+
+        if (pos_min(j).lt.0.0) then
+           p_min = s(n)
+        else 
+           p_min = 1.0d0
+        endif
+
+        if (pos_max(j).lt.0.0) then
+           p_max = s(n)
+        else 
+           p_max = 1.0d0
+        endif
+
+
+       if (stretch.lt.200.0d0) then
+     
+       si(n,j) = c(n)*
+     &  ( p_max*gammp(a(n),abs_u_max(j)) 
+     &   -p_min*gammp(a(n),abs_u_min(j))) 
+   
+       else
+         si(n,j) = 0.0d0
+       endif
+
+
+
+       ti(n,j) = si(n,j)
+       enddo
+      enddo
+c----------------------------------------------------------------------------
+
+
+
+
+c----------------------------------------------------------------------------
+c     Compute integrals with respect to the Legendre block. Each block
+c     factorises into x,y and z directions
+c     p_j1j2j3    -  where j1,j2,j3 are either zero or 1
+c
+c     p_0(x)  = 1                   Zeroth moment
+c     p_1(x)  = sqrt(12)(x-xcen)    First moment
+c
+c----------------------------------------------------------------------------c
+
+      do n=1,3,2  ! Shift origins to cell centre for first moments
+       do j=1,3
+          ti(n,j) = sqrt(12.0d0) * (ti(n,j) - pos_cen(j)*ti(n-1,j))
+       enddo
+      enddo
+
+
+
+c----------------------------------------------------------------------------
+c   Combine the computed integrals to give the 8 Legendre block
+c   expansion coefficients. 
+c----------------------------------------------------------------------------
+
+
+      Coeff000 = 3.0 * ti(0,1) * ti(0,2) * ti(0,3) 
+     &  - ti(2,1) * ti(0,2) * ti(0,3)
+     &  - ti(0,1) * ti(2,2) * ti(0,3)
+     &  - ti(0,1) * ti(0,2) * ti(2,3)
+
+ 
+      Coeff100 = 3.0 * ti(1,1) * ti(0,2) * ti(0,3) 
+     &  - ti(3,1) * ti(0,2) * ti(0,3)
+     &  - ti(1,1) * ti(2,2) * ti(0,3)
+     &  - ti(1,1) * ti(0,2) * ti(2,3)
+
+      Coeff010 = 3.0 * ti(0,1) * ti(1,2) * ti(0,3) 
+     &  - ti(2,1) * ti(1,2) * ti(0,3)
+     &  - ti(0,1) * ti(3,2) * ti(0,3)
+     &  - ti(0,1) * ti(1,2) * ti(2,3)
+
+
+      Coeff001 = 3.0 * ti(0,1) * ti(0,2) * ti(1,3) 
+     &  - ti(2,1) * ti(0,2) * ti(1,3)
+     &  - ti(0,1) * ti(2,2) * ti(1,3)
+     &  - ti(0,1) * ti(0,2) * ti(3,3)
+
+
+      Coeff110 = 3.0 * ti(1,1) * ti(1,2) * ti(0,3) 
+     &  - ti(3,1) * ti(1,2) * ti(0,3)
+     &  - ti(1,1) * ti(3,2) * ti(0,3)
+     &  - ti(1,1) * ti(1,2) * ti(2,3)
+
+      Coeff101 = 3.0 * ti(1,1) * ti(0,2) * ti(1,3) 
+     &  - ti(3,1) * ti(0,2) * ti(1,3)
+     &  - ti(1,1) * ti(2,2) * ti(1,3)
+     &  - ti(1,1) * ti(0,2) * ti(3,3)
+
+
+      Coeff011 = 3.0 * ti(0,1) * ti(1,2) * ti(1,3) 
+     &  - ti(2,1) * ti(1,2) * ti(1,3)
+     &  - ti(0,1) * ti(3,2) * ti(1,3)
+     &  - ti(0,1) * ti(1,2) * ti(3,3)
+
+
+
+      Coeff111 = 3.0 * ti(1,1) * ti(1,2) * ti(1,3) 
+     &  - ti(3,1) * ti(1,2) * ti(1,3)
+     &  - ti(1,1) * ti(3,2) * ti(1,3)
+     &  - ti(1,1) * ti(1,2) * ti(3,3)     
+
+c--------------------------------------------------------------------------
+c     Copy into output structure - ordering matches the Panphasia code
+c     (see Jenkins & Booth 2013)
+c--------------------------------------------------------------------------
+
+
+      cell_data(0) = Coeff000     ! Scales as len**1.5
+      cell_data(1) = Coeff001     ! Scales as len**3.5
+      cell_data(2) = Coeff010
+      cell_data(3) = Coeff011     ! Scales as len**5.5
+      cell_data(4) = Coeff100
+      cell_data(5) = Coeff101
+      cell_data(6) = Coeff110
+      cell_data(7) = Coeff111     ! Scales as len**7.5
+
+      cell_data(8) = 0.0d0      ! Set the 'independent' field to zero (J&B13)
+
+
+
+      return
+      end
+
+
+c=========================================================================
+c   NUMERICAL RECIPES ROUTINES BELOW - modified to make the
+c   output double precision.  Routines taken from the Blue f77 Book
+c   Value of EPS changed from 3e-7 to 3e-15, ITMAX increased from 100 to 200
+c=========================================================================
+
+      REAL*8 recursive FUNCTION gammp(a,x)
+      REAL*8 a,x
+CU    USES gcf,gser
+      REAL*8 gammcf,gamser,gln
+      if(x.lt.0..or.a.le.0.)stop 'bad arguments in gammp'
+      if(x.lt.a+1.)then
+        call gser(gamser,a,x,gln)
+        gammp=gamser
+      else
+        call gcf(gammcf,a,x,gln)
+        gammp=1.-gammcf
+      endif
+      return
+      END
+
+      recursive SUBROUTINE gcf(gammcf,a,x,gln)
+      INTEGER ITMAX
+      REAL*8 a,gammcf,gln,x,EPS,FPMIN
+      PARAMETER (ITMAX=200,EPS=3.d-15,FPMIN=1.d-290)
+CU    USES gammln
+      INTEGER i
+      REAL*8 an,b,c,d,del,h,gammln
+      gln=gammln(a)
+      b=x+1.-a
+      c=1./FPMIN
+      d=1./b
+      h=d
+      do 11 i=1,ITMAX
+        an=-i*(i-a)
+        b=b+2.
+        d=an*d+b
+        if(abs(d).lt.FPMIN)d=FPMIN
+        c=b+an/c
+        if(abs(c).lt.FPMIN)c=FPMIN
+        d=1./d
+        del=d*c
+        h=h*del
+        if(abs(del-1.).lt.EPS)goto 1
+11    continue
+      stop 'a too large, ITMAX too small in gcf'
+1     gammcf=exp(-x+a*log(x)-gln)*h
+      return
+      END
+
+      SUBROUTINE gser(gamser,a,x,gln)
+      INTEGER ITMAX
+      REAL*8 a,gamser,gln,x,EPS
+      PARAMETER (ITMAX=200,EPS=3.d-15)
+CU    USES gammln
+      INTEGER n
+      REAL*8 ap,del,sum,gammln
+      gln=gammln(a)
+      if(x.le.0.)then
+        if(x.lt.0.)stop 'x < 0 in gser'
+        gamser=0.
+        return
+      endif
+      ap=a
+      sum=1./a
+      del=sum
+      do 11 n=1,ITMAX
+        ap=ap+1.
+        del=del*x/ap
+        sum=sum+del
+        if(abs(del).lt.abs(sum)*EPS)goto 1
+11    continue
+      stop 'a too large, ITMAX too small in gser'
+1     gamser=sum*exp(-x+a*log(x)-gln)
+      return
+      END
+
+
+      REAL*8 recursive FUNCTION gammln(xx)
+      REAL*8 xx
+      INTEGER j
+      REAL*8 ser,stp,tmp,x,y,cof(6)
+      SAVE cof,stp
+      DATA cof,stp/76.18009172947146d0,-86.50532032941677d0,
+     *24.01409824083091d0,-1.231739572450155d0,.1208650973866179d-2,
+     *-.5395239384953d-5,2.5066282746310005d0/
+      x=xx
+      y=x
+      tmp=x+5.5d0
+      tmp=(x+0.5d0)*log(tmp)-tmp
+      ser=1.000000000190015d0
+      do 11 j=1,6
+        y=y+1.d0
+        ser=ser+cof(j)/y
+11    continue
+      gammln=tmp+log(stp*ser/x)
+      return
+      END
+c======================  END NR ================================================
+c===============================================================================
+      recursive subroutine expand_octree_coefficients(cell_data, p,q)
+      implicit none
+c----------------- Define subroutine arguments -----------------------------------
+      real*8 cell_data(0:8,0:7)
+      real*8 p(0:7),q(56)
+
+c----------------- Define constants using notation from appendix A of Jenkins 2013
+ 
+      real*8 a1,a2,b1,b2,b3,c1,c2,c3,c4,rooteighth_factor
+
+      parameter(a1 = 0.5d0*sqrt(3.0d0),      a2 = 0.5d0)
+
+      parameter(b1 = 0.75d0,                 b2 = 0.25d0*sqrt(3.0d0))
+      parameter(b3 = 0.25d0)
+
+      parameter(c1 = sqrt(27.0d0/64.0d0),    c2 = 0.375d0)
+      parameter(c3 = sqrt(3.0d0/64.0d0),     c4 = 0.125d0)
+
+      parameter(rooteighth_factor = sqrt(0.125d0))
+
+c----------------- Define octree variables --------------------------------
+
+      real*8 po(0:7,0:7),tsum(0:7,0:7)
+      integer iparity
+      integer i,j,ix,iy,iz
+      integer icx,icy,icz
+      integer iox,ioy,ioz
+      real*8 parity,isig
+c-----------------------------------------------------------------------------
+
+c 
+c
+c   We now calculate the Legendre basis coefficients for all eight child cells
+c   by applying the appropriate reflectional parities to the coefficients 
+c   calculated above for the positive octant child cell.
+c
+c   See equations A2 and A3 in appendix A of Jenkins 2013.
+c
+c   The reflectional parity is given by (ix,iy,iz) loops below.
+c
+c   The (icx,icy,icz) loops below, loop over the eight child cells.
+c
+c   The positive octant child cell is given below by  (icx=icy=icz=0) or i=7.
+c
+c   The combination ix*icx +iy*icy +iz*icz is either even or odd, depending
+c   on whether the parity change is even or odd.
+c
+c   The variables iox,ioy,ioz are used to loop over the different
+c   types of Legendre basis function.
+c
+c   The combination iox*icx + ioy*icy + ioz*icz is either even and odd
+c   and identifies which coefficients keep or change sign respectively
+c   due to a pure reflection about the principal planes.
+c--------------------------------------------------------------------------------------------
+      do i=0,7
+       p(i) = -9999.0d0
+      enddo
+
+      do i=1,56
+       q(i) = -999.0d0
+      enddo
+
+
+      do iz=0,7
+       do iy=0,7
+        po(iy,iz) = 0.0d0      ! Set positive octant coefficients to zero
+       enddo
+      enddo
+c--------------------------------------------------------------------------------------------
+      do iz=0,1              ! Loop over z parity (0=keep sign, 1=change sign)
+       do iy=0,1             ! Loop over y parity (0=keep sign, 1=change sign)
+        do ix=0,1            ! Loop over x parity (0=keep sign, 1=change sign)
+        iparity = 4*ix + 2*iy + iz
+
+         do icx=0,1                      ! Loop over x-child cells
+          do icy=0,1                     ! Loop over y-child cells
+           do icz=0,1                    ! Loop over z-child cells
+
+             if (mod(ix*icx+iy*icy+iz*icz,2).eq.0) then
+                  parity = 1.0d0
+             else
+                  parity =-1.0d0
+             endif
+
+             i = 7 - 4*icx -2*icy - icz               ! Calculate which child cell this is.
+
+
+             do iox=0,1                               ! Loop over Legendre basis function type                     
+              do ioy=0,1                              ! Loop over Legendre basis function type
+               do ioz=0,1                             ! Loop over Legendre basis function type
+
+                  j = 4*iox + 2*ioy + ioz
+
+                  if (mod(iox*icx + ioy*icy + ioz*icz,2).eq.0) then
+                       isig =  parity
+                  else
+                       isig = -parity
+                  endif
+
+                  po(j,iparity) = po(j,iparity) + isig*cell_data(j,i)*rooteighth_factor
+
+               enddo
+              enddo
+             enddo
+
+           enddo   
+          enddo
+         enddo
+
+        enddo
+       enddo
+      enddo
+
+
+c
+c    The calculations immediately below evalute the eight Legendre block coefficients for the
+c    child cell that is furthest from the absolute coordiate origin of the octree - we call
+c    this the positive octant cell.
+c
+c    The coefficients are given by a set of matrix equations which combine the
+c    coefficients of the Legendre basis functions of the parent cell itself, with
+c    the coefficients from the octree basis functions that occupy the
+c    parent cell.   
+c
+c    The Legendre basis function coefficients of the parent cell are stored in
+c    the variables, p(0) - p(7) and are initialise above.
+c
+c    The coefficients of the octree basis functions are determined by the
+c    first 56 entries of the array g, which is passed down into this
+c    subroutine.
+c
+c    These two sources of information are combined using a set of linear equations.
+c    The coefficients of these linear equations are taken from the inverses or
+c    equivalently transposes of the matrices given in appendix A of Jenkins 2013.
+c    The matrices in appendix A define the PANPHASIA octree basis functions
+c    in terms of Legendre blocks.
+c
+c    All of the Legendre block functions of the parent cell, and the octree basis
+c    functions of the parent cell share one of eight distinct symmetries with respect to
+c    reflection about the x1=0,x2=0,x3=0 planes (where the origin is taken as the parent 
+c    cell centre and x1,x2,x3 are parallel to the cell edges).
+c
+c    Each function has either purely reflectional symmetry (even parity) or
+c    reflectional symmetry with a sign change (odd parity) about each of the three principal
+c    planes through the cell centre. There are therefore 8 parity types. We can label each 
+c    parity type with a binary triplet. So 000 is pure reflectional symmetry about 
+c    all of the principal planes.
+c  
+c    In the code below the parent cell Legendre block functions, and octree functions are 
+c    organised into eight groups each with eight members. Each group has a common
+c    parity type.
+c
+c    We keep the contributions of each parity type to each of the eight Legendre basis
+c    functions occupying the positive octant cell separate. Once they have all been
+c    computed, we can apply the different symmetry operations and determine the
+c    Legendre block basis functions for all eight child cells at the same time.
+c---------------------------------------------------------------------------------------
+c    000/ 0-parity
+
+      p(0) =  1.0d0*po(0,0)
+      q(1) = -1.0d0*po(1,0)
+      q(2) = -1.0d0*po(2,0)
+      q(3) =  1.0d0*po(3,0)
+      q(4) = -1.0d0*po(4,0)
+      q(5) =  1.0d0*po(5,0)
+      q(6) =  1.0d0*po(6,0)
+      q(7) = -1.0d0*po(7,0)
+
+c    100/ 4-parity
+
+      p(4)  =  a1*po(0,4) +  a2*po(4,4)
+      q(8)  = -a2*po(0,4)  + a1*po(4,4)
+      q(9)  =  po(1,4)
+      q(11) = -po(3,4)
+      q(10) =  po(2,4)
+      q(12) = -po(5,4)
+      q(13) = -po(6,4)
+      q(14) =  po(7,4)
+
+c     010/ 2-parity
+
+      p(2)  =  a1*po(0,2) + a2*po(2,2)
+      q(15) = -a2*po(0,2) + a1*po(2,2) 
+      q(16) =  po(1,2) 
+      q(17) = -po(3,2)
+      q(18) =  po(4,2)
+      q(19) = -po(5,2)
+      q(20) = -po(6,2)
+      q(21) =  po(7,2)
+
+
+c     001/ 1-parity
+
+      p(1)  =  a1*po(0,1) + a2*po(1,1)
+      q(22) = -a2*po(0,1) + a1*po(1,1)
+      q(23) =  po(2,1)
+      q(24) = -po(3,1)
+      q(25) =  po(4,1)
+      q(26) = -po(5,1)
+      q(27) = -po(6,1)
+      q(28) =  po(7,1)
+
+c    110/ 6-parity
+
+      p(6)  =  b1*po(0,6) + b2*po(2,6) + b2*po(4,6) + b3*po(6,6)
+      q(29) = -b2*po(0,6) - b3*po(2,6) + b1*po(4,6) + b2*po(6,6)
+      q(30) =  b3*po(0,6) - b2*po(2,6) + b2*po(4,6) - b1*po(6,6)
+      q(31) = -b2*po(0,6) + b1*po(2,6) + b3*po(4,6) - b2*po(6,6)
+      q(32) = -po(1,6)
+      q(33) =  po(3,6)
+      q(34) =  po(5,6)
+      q(35) = -po(7,6)
+
+
+c     011/ 3-parity
+
+      p(3)  =  b1*po(0,3) + b2*po(1,3) + b2*po(2,3) + b3*po(3,3)
+      q(36) = -b2*po(0,3) - b3*po(1,3) + b1*po(2,3) + b2*po(3,3)
+      q(37) =  b3*po(0,3) - b2*po(1,3) + b2*po(2,3) - b1*po(3,3)
+      q(38) = -b2*po(0,3) + b1*po(1,3) + b3*po(2,3) - b2*po(3,3)
+      q(39) = -po(4,3)
+      q(40) =  po(5,3)
+      q(41) =  po(6,3)
+      q(42) = -po(7,3)
+
+c     101/ 5-parity
+
+
+      p(5)  =  b1*po(0,5) + b2*po(1,5) + b2*po(4,5) + b3*po(5,5)
+      q(43) = -b2*po(0,5) - b3*po(1,5) + b1*po(4,5) + b2*po(5,5)
+      q(44) =  b3*po(0,5) - b2*po(1,5) + b2*po(4,5) - b1*po(5,5)
+      q(45) = -b2*po(0,5) + b1*po(1,5) + b3*po(4,5) - b2*po(5,5)
+      q(46) = -po(2,5)
+      q(47) =  po(3,5)
+      q(48) =  po(6,5)
+      q(49) = -po(7,5)
+
+c     111/ 7-parity
+
+      p(7) = c1*po(0,7)+c2*po(1,7)+c2*po(2,7)+c3*po(3,7)+c2*po(4,7)+c3*po(5,7)+c3*po(6,7)+c4*po(7,7)
+      q(50)=-c2*po(0,7)+c1*po(1,7)+c2*po(2,7)-c3*po(3,7)-c2*po(4,7)+c3*po(5,7)+c4*po(6,7)-c3*po(7,7)
+      q(51)=-c2*po(0,7)-c2*po(1,7)+c1*po(2,7)-c3*po(3,7)+c2*po(4,7)-c4*po(5,7)+c3*po(6,7)+c3*po(7,7)
+      q(52)=-c2*po(0,7)+c2*po(1,7)-c2*po(2,7)+c4*po(3,7)+c1*po(4,7)-c3*po(5,7)+c3*po(6,7)-c3*po(7,7)
+      q(53)= c3*po(0,7)-c3*po(1,7)-c3*po(2,7)-c1*po(3,7)+c4*po(4,7)+c2*po(5,7)+c2*po(6,7)-c2*po(7,7)
+      q(54)= c3*po(0,7)+c3*po(1,7)-c4*po(2,7)-c2*po(3,7)-c3*po(4,7)-c1*po(5,7)+c2*po(6,7)+c2*po(7,7)
+      q(55)= c3*po(0,7)+c4*po(1,7)+c3*po(2,7)-c2*po(3,7)+c3*po(4,7)-c2*po(5,7)-c1*po(6,7)-c2*po(7,7)
+      q(56)=-c4*po(0,7)+c3*po(1,7)-c3*po(2,7)-c2*po(3,7)+c3*po(4,7)+c2*po(5,7)-c2*po(6,7)+c1*po(7,7)
+
+
+      return
+      end
+c===============================================================================
+
+
+      recursive subroutine compound_octree_coefficients(p,q,cell_data)
+      implicit none
+c----------------- Define subroutine arguments -----------------------------------
+      real*8 cell_data(0:8,0:7)
+      real*8 p(0:7),q(56)
+
+c----------------- Define constants using notation from appendix A of Jenkins 2013
+ 
+      real*8 a1,a2,b1,b2,b3,c1,c2,c3,c4,rooteighth_factor
+
+      parameter(a1 = 0.5d0*sqrt(3.0d0),      a2 = 0.5d0)
+
+      parameter(b1 = 0.75d0,                 b2 = 0.25d0*sqrt(3.0d0))
+      parameter(b3 = 0.25d0)
+
+      parameter(c1 = sqrt(27.0d0/64.0d0),    c2 = 0.375d0)
+      parameter(c3 = sqrt(3.0d0/64.0d0),     c4 = 0.125d0)
+
+      parameter(rooteighth_factor = sqrt(0.125d0))
+
+c----------------- Define octree variables --------------------------------
+
+      real*8 po(0:7,0:7),tsum(0:7,0:7)
+      integer iparity
+      integer i,j,ix,iy,iz
+      integer icx,icy,icz
+      integer iox,ioy,ioz
+      real*8 parity,isig
+c-----------------------------------------------------------------------------
+c
+c
+c    The calculations immediately below evalute the eight Legendre block coefficients for the
+c    child cell that is furthest from the absolute coordiate origin of the octree - we call
+c    this the positive octant cell.
+c
+c    The coefficients are given by a set of matrix equations which combine the
+c    coefficients of the Legendre basis functions of the parent cell itself, with
+c    the coefficients from the octree basis functions that occupy the
+c    parent cell.   
+c
+c    The Legendre basis function coefficients of the parent cell are stored in
+c    the variables, p(0) - p(7) and are initialise above.
+c
+c    The coefficients of the octree basis functions are determined by the
+c    first 56 entries of the array g, which is passed down into this
+c    subroutine.
+c
+c    These two sources of information are combined using a set of linear equations.
+c    The coefficients of these linear equations are taken from the inverses or
+c    equivalently transposes of the matrices given in appendix A of Jenkins 2013.
+c    The matrices in appendix A define the PANPHASIA octree basis functions
+c    in terms of Legendre blocks.
+c
+c    All of the Legendre block functions of the parent cell, and the octree basis
+c    functions of the parent cell share one of eight distinct symmetries with respect to
+c    reflection about the x1=0,x2=0,x3=0 planes (where the origin is taken as the parent 
+c    cell centre and x1,x2,x3 are parallel to the cell edges).
+c
+c    Each function has either purely reflectional symmetry (even parity) or
+c    reflectional symmetry with a sign change (odd parity) about each of the three principal
+c    planes through the cell centre. There are therefore 8 parity types. We can label each 
+c    parity type with a binary triplet. So 000 is pure reflectional symmetry about 
+c    all of the principal planes.
+c  
+c    In the code below the parent cell Legendre block functions, and octree functions are 
+c    organised into eight groups each with eight members. Each group has a common
+c    parity type.
+c
+c    We keep the contributions of each parity type to each of the eight Legendre basis
+c    functions occupying the positive octant cell separate. Once they have all been
+c    computed, we can apply the different symmetry operations and determine the
+c    Legendre block basis functions for all eight child cells at the same time.
+c---------------------------------------------------------------------------------------
+c    000/ 0-parity
+
+      po(0,0) =  1.0d0*p(0)
+      po(1,0) = -1.0d0*q(1)
+      po(2,0) = -1.0d0*q(2)
+      po(3,0) =  1.0d0*q(3)
+      po(4,0) = -1.0d0*q(4)
+      po(5,0) =  1.0d0*q(5)
+      po(6,0) =  1.0d0*q(6)
+      po(7,0) = -1.0d0*q(7)
+
+c    100/ 4-parity
+
+      po(0,4) =  a1*p(4)  - a2*q(8)
+      po(1,4) =  q(9)
+      po(2,4) =  q(10)
+      po(3,4) = -q(11)
+      po(4,4) =  a2*p(4)  + a1*q(8)
+      po(5,4) = -q(12) 
+      po(6,4) = -q(13)
+      po(7,4) =  q(14)
+
+c     010/ 2-parity
+
+      po(0,2) =  a1*p(2) - a2*q(15)
+      po(1,2) =  q(16) 
+      po(2,2) =  a2*p(2) + a1*q(15) 
+      po(3,2) = -q(17)
+      po(4,2) =  q(18)
+      po(5,2) = -q(19)
+      po(6,2) = -q(20)
+      po(7,2) =  q(21)
+
+
+c     001/ 1-parity
+
+      po(0,1) =  a1*p(1) - a2*q(22)
+      po(1,1) =  a2*p(1) + a1*q(22)
+      po(2,1) =  q(23)
+      po(3,1) = -q(24)
+      po(4,1) =  q(25)
+      po(5,1) = -q(26)
+      po(6,1) = -q(27)
+      po(7,1) =  q(28)
+
+c    110/ 6-parity
+
+      po(0,6) = b1*p(6) - b2*q(29) + b3*q(30) - b2*q(31)
+      po(1,6) = -q(32)
+      po(2,6) = b2*p(6) - b3*q(29) - b2*q(30) + b1*q(31)
+      po(3,6) =  q(33)
+      po(4,6) = b2*p(6) + b1*q(29) + b2*q(30) + b3*q(31)
+      po(5,6) =  q(34)
+      po(6,6) = b3*p(6) + b2*q(29) - b1*q(30) - b2*q(31)
+      po(7,6) = -q(35) 
+
+
+c     011/ 3-parity
+
+      po(0,3) = b1*p(3) - b2*q(36) + b3*q(37) - b2*q(38)
+      po(1,3) = b2*p(3) - b3*q(36) - b2*q(37) + b1*q(38)
+      po(2,3) = b2*p(3) + b1*q(36) + b2*q(37) + b3*q(38)
+      po(3,3) = b3*p(3) + b2*q(36) - b1*q(37) - b2*q(38)
+      po(4,3) = -q(39) 
+      po(5,3) =  q(40)
+      po(6,3) =  q(41)
+      po(7,3) = -q(42)
+
+c     101/ 5-parity
+
+      po(0,5) = b1*p(5) - b2*q(43) + b3*q(44) - b2*q(45)
+      po(1,5) = b2*p(5) - b3*q(43) - b2*q(44) + b1*q(45) 
+      po(2,5) = -q(46) 
+      po(3,5) =  q(47)
+      po(4,5) = b2*p(5) + b1*q(43) + b2*q(44) + b3*q(45)
+      po(5,5) = b3*p(5) + b2*q(43) - b1*q(44) - b2*q(45)
+      po(6,5) =  q(48)
+      po(7,5) = -q(49)
+
+c     111/ 7-parity
+
+      po(0,7) = c1*p(7) - c2*q(50) - c2*q(51) - c2*q(52) + c3*q(53) + c3*q(54) + c3*q(55) - c4*q(56)
+      po(1,7) = c2*p(7) + c1*q(50) - c2*q(51) + c2*q(52) - c3*q(53) + c3*q(54) + c4*q(55) + c3*q(56) 
+      po(2,7) = c2*p(7) + c2*q(50) + c1*q(51) - c2*q(52) - c3*q(53) - c4*q(54) + c3*q(55) - c3*q(56)
+      po(3,7) = c3*p(7) - c3*q(50) - c3*q(51) + c4*q(52) - c1*q(53) - c2*q(54) - c2*q(55) - c2*q(56) 
+      po(4,7) = c2*p(7) - c2*q(50) + c2*q(51) + c1*q(52) + c4*q(53) - c3*q(54) + c3*q(55) + c3*q(56)
+      po(5,7) = c3*p(7) + c3*q(50) - c4*q(51) - c3*q(52) + c2*q(53) - c1*q(54) - c2*q(55) + c2*q(56)
+      po(6,7) = c3*p(7) + c4*q(50) + c3*q(51) + c3*q(52) + c2*q(53) + c2*q(54) - c1*q(55) - c2*q(56)
+      po(7,7) = c4*p(7) - c3*q(50) + c3*q(51) - c3*q(52) - c2*q(53) + c2*q(54) - c2*q(55) + c1*q(56)
+c--------------------------------------------------------------------------------------------
+c 
+c
+c   We now calculate the Legendre basis coefficients for all eight child cells
+c   by applying the appropriate reflectional parities to the coefficients 
+c   calculated above for the positive octant child cell.
+c
+c   See equations A2 and A3 in appendix A of Jenkins 2013.
+c
+c   The reflectional parity is given by (ix,iy,iz) loops below.
+c
+c   The (icx,icy,icz) loops below, loop over the eight child cells.
+c
+c   The positive octant child cell is given below by  (icx=icy=icz=0) or i=7.
+c
+c   The combination ix*icx +iy*icy +iz*icz is either even or odd, depending
+c   on whether the parity change is even or odd.
+c
+c   The variables iox,ioy,ioz are used to loop over the different
+c   types of Legendre basis function.
+c
+c   The combination iox*icx + ioy*icy + ioz*icz is either even and odd
+c   and identifies which coefficients keep or change sign respectively
+c   due to a pure reflection about the principal planes.
+c--------------------------------------------------------------------------------------------
+
+      do iz=0,7
+       do iy=0,7
+        tsum(iy,iz) = 0.0d0      ! Zero temporary sums
+       enddo
+      enddo
+c--------------------------------------------------------------------------------------------
+      do iz=0,1              ! Loop over z parity (0=keep sign, 1=change sign)
+       do iy=0,1             ! Loop over y parity (0=keep sign, 1=change sign)
+        do ix=0,1            ! Loop over x parity (0=keep sign, 1=change sign)
+        iparity = 4*ix + 2*iy + iz
+
+         do icx=0,1                      ! Loop over x-child cells
+          do icy=0,1                     ! Loop over y-child cells
+           do icz=0,1                    ! Loop over z-child cells
+
+             if (mod(ix*icx+iy*icy+iz*icz,2).eq.0) then
+                  parity = 1.0d0
+             else
+                  parity =-1.0d0
+             endif
+
+             i = 7 - 4*icx -2*icy - icz               ! Calculate which child cell this is.
+
+
+             do iox=0,1                               ! Loop over Legendre basis function type                     
+              do ioy=0,1                              ! Loop over Legendre basis function type
+               do ioz=0,1                             ! Loop over Legendre basis function type
+
+                  j = 4*iox + 2*ioy + ioz
+
+                  if (mod(iox*icx + ioy*icy + ioz*icz,2).eq.0) then
+                       isig =  parity
+                  else
+                       isig = -parity
+                  endif
+
+                  tsum(j,i) = tsum(j,i) + isig*po(j,iparity)
+
+               enddo
+              enddo
+             enddo
+
+           enddo   
+          enddo
+         enddo
+
+        enddo
+       enddo
+      enddo
+
+
+c   Assign values of the output variables an set independent field to zero
+
+      do i=0,7
+       do j=0,7
+         cell_data(j,i) = tsum(j,i)*rooteighth_factor
+        enddo
+         cell_data(8,i) = 0.0d0
+      enddo
+
+
+      return
+      end
+
+
+
+
diff --git a/src/plugins/random_panphasia.cc b/src/plugins/random_panphasia.cc
new file mode 100644
index 0000000..d21dd7f
--- /dev/null
+++ b/src/plugins/random_panphasia.cc
@@ -0,0 +1,532 @@
+#if defined(USE_PANPHASIA)
+
+#include <general.hh>
+#include <random_plugin.hh>
+#include <config_file.hh>
+
+#include <vector>
+#include <cmath>
+#include <cstring>
+
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#include <grid_fft.hh>
+
+const int maxdim = 60, maxlev = 50, maxpow = 3 * maxdim;
+typedef int rand_offset_[5];
+typedef struct
+{
+  int state[133]; // Nstore = Nstate (=5) + Nbatch (=128)
+  int need_fill;
+  int pos;
+} rand_state_;
+
+/* pan_state_ struct -- corresponds to respective fortran module in panphasia_routines.f
+ * data structure that contains all panphasia state variables
+ * it needs to get passed between the fortran routines to enable
+ * thread-safe execution.
+ */
+typedef struct
+{
+  int base_state[5], base_lev_start[5][maxdim + 1];
+  rand_offset_ poweroffset[maxpow + 1], superjump;
+  rand_state_ current_state[maxpow + 2];
+
+  int layer_min, layer_max, indep_field;
+
+  long long xorigin_store[2][2][2], yorigin_store[2][2][2], zorigin_store[2][2][2];
+  int lev_common, layer_min_store, layer_max_store;
+  long long ix_abs_store, iy_abs_store, iz_abs_store, ix_per_store, iy_per_store, iz_per_store, ix_rel_store,
+      iy_rel_store, iz_rel_store;
+  double exp_coeffs[8][8][maxdim + 2];
+  long long xcursor[maxdim + 1], ycursor[maxdim + 1], zcursor[maxdim + 1];
+  int ixshift[2][2][2], iyshift[2][2][2], izshift[2][2][2];
+
+  double cell_data[9][8];
+  int ixh_last, iyh_last, izh_last;
+  int init;
+
+  int init_cell_props;
+  int init_lecuyer_state;
+  long long p_xcursor[62], p_ycursor[62], p_zcursor[62];
+
+} pan_state_;
+
+extern "C"
+{
+  void start_panphasia_(pan_state_ *lstate, const char *descriptor, int *ngrid, int *bverbose);
+
+  void parse_descriptor_(const char *descriptor, int16_t *l, int32_t *ix, int32_t *iy, int32_t *iz, int16_t *side1,
+                         int16_t *side2, int16_t *side3, int32_t *check_int, char *name);
+
+  void panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, double *cell_prop);
+
+  void adv_panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, int *layer_min,
+                                      int *layer_max, int *indep_field, double *cell_prop);
+
+  void set_phases_and_rel_origin_(pan_state_ *lstate, const char *descriptor, int *lev, long long *ix_rel,
+                                  long long *iy_rel, long long *iz_rel, int *VERBOSE);
+}
+
+struct panphasia_descriptor
+{
+  int16_t wn_level_base;
+  int32_t i_xorigin_base, i_yorigin_base, i_zorigin_base;
+  int16_t i_base, i_base_y, i_base_z;
+  int32_t check_rand;
+  std::string name;
+
+  explicit panphasia_descriptor(std::string dstring)
+  {
+    char tmp[100];
+    std::memset(tmp, ' ', 100);
+    parse_descriptor_(dstring.c_str(), &wn_level_base, &i_xorigin_base, &i_yorigin_base, &i_zorigin_base, &i_base,
+                      &i_base_y, &i_base_z, &check_rand, tmp);
+    for (int i = 0; i < 100; i++)
+      if (tmp[i] == ' ')
+      {
+        tmp[i] = '\0';
+        break;
+      }
+    name = tmp;
+    name.erase(std::remove(name.begin(), name.end(), ' '), name.end());
+  }
+};
+
+// greatest common divisor
+int gcd(int a, int b)
+{
+  if (b == 0)
+    return a;
+  return gcd(b, a % b);
+}
+
+// least common multiple
+int lcm(int a, int b) { return abs(a * b) / gcd(a, b); }
+
+// Two or largest power of 2 less than the argument
+int largest_power_two_lte(int b)
+{
+  int a = 1;
+  if (b <= a)
+    return a;
+  while (2 * a < b)
+    a = 2 * a;
+  return a;
+}
+
+class RNG_panphasia : public RNG_plugin
+{
+private:
+protected:
+  std::string descriptor_string_;
+  int num_threads_;
+  int levelmin_, levelmin_final_, levelmax_, ngrid_;
+  bool incongruent_fields_;
+  double inter_grid_phase_adjustment_;
+  // double translation_phase_;
+  pan_state_ *lstate;
+  int grid_p_, grid_m_;
+  double grid_rescale_fac_;
+  int coordinate_system_shift_[3];
+  int ix_abs_[3], ix_per_[3], ix_rel_[3], level_p_, lextra_;
+
+  void clear_panphasia_thread_states(void)
+  {
+    for (int i = 0; i < num_threads_; ++i)
+    {
+      lstate[i].init = 0;
+      lstate[i].init_cell_props = 0;
+      lstate[i].init_lecuyer_state = 0;
+    }
+  }
+
+  void initialize_for_grid_structure(void)
+  {
+    clear_panphasia_thread_states();
+    music::ilog.Print("PANPHASIA: running with %d threads", num_threads_);
+
+    // if ngrid is not a multiple of i_base, then we need to enlarge and then sample down
+    ngrid_ = pcf_->get_value<size_t>("setup", "GridRes");
+
+    grid_p_ = pdescriptor_->i_base;
+    grid_m_ = largest_power_two_lte(grid_p_);
+
+    lextra_ = (log10((double)ngrid_ / (double)pdescriptor_->i_base) + 0.001) / log10(2.0);
+    int ratio = 1 << lextra_;
+    grid_rescale_fac_ = 1.0;
+
+    coordinate_system_shift_[0] = -pcf_->get_value_safe<int>("setup", "shift_x", 0);
+    coordinate_system_shift_[1] = -pcf_->get_value_safe<int>("setup", "shift_y", 0);
+    coordinate_system_shift_[2] = -pcf_->get_value_safe<int>("setup", "shift_z", 0);
+
+    incongruent_fields_ = false;
+    if (ngrid_ != ratio * pdescriptor_->i_base)
+    {
+      incongruent_fields_ = true;
+      ngrid_ = 2 * ratio * pdescriptor_->i_base;
+      grid_rescale_fac_ = (double)ngrid_ / (1 << levelmin_);
+      music::ilog << "PANPHASIA: will use a higher resolution (using Fourier interpolation)" << std::endl;
+      music::ilog << "     (" << grid_m_ << " -> " << grid_p_ << ") * 2**ref to be compatible with PANPHASIA" << std::endl;
+    }
+  }
+
+  std::unique_ptr<panphasia_descriptor> pdescriptor_;
+
+public:
+  explicit RNG_panphasia(config_file &cf) : RNG_plugin(cf)
+  {
+    descriptor_string_ = pcf_->get_value<std::string>("random", "descriptor");
+
+#ifdef _OPENMP
+    num_threads_ = omp_get_max_threads();
+#else
+    num_threads_ = 1;
+#endif
+
+    // create independent state descriptions for each thread
+    lstate = new pan_state_[num_threads_];
+
+    // parse the descriptor for its properties
+    pdescriptor_ = std::make_unique<panphasia_descriptor>(descriptor_string_);
+
+    music::ilog.Print("PANPHASIA: descriptor \'%s\' is base %d,", pdescriptor_->name.c_str(), pdescriptor_->i_base);
+
+    // write panphasia base size into config file for the grid construction
+    // as the gridding unit we use the least common multiple of 2 and i_base
+    std::stringstream ss;
+    //ARJ  ss << lcm(2, pdescriptor_->i_base);
+    //ss <<  two_or_largest_power_two_less_than(pdescriptor_->i_base);//ARJ
+    ss << 2; //ARJ - set gridding unit to two
+    pcf_->insert_value("setup", "gridding_unit", ss.str());
+    ss.str(std::string());
+    ss << pdescriptor_->i_base;
+    pcf_->insert_value("random", "base_unit", ss.str());
+
+    this->initialize_for_grid_structure();
+  }
+
+  ~RNG_panphasia() { delete[] lstate; }
+
+  bool isMultiscale() const { return true; }
+
+  void Fill_Grid(Grid_FFT<real_t> &g)
+  {
+    auto sinc = [](real_t x) { return (std::abs(x) > 1e-16) ? std::sin(x) / x : 1.0; };
+    auto dsinc = [](real_t x) { return (std::abs(x) > 1e-16) ? (x * std::cos(x) - std::sin(x)) / (x * x) : 0.0; };
+    const real_t sqrt3{std::sqrt(3.0)}, sqrt27{std::sqrt(27.0)};
+
+    // make sure we're in the right space
+    Grid_FFT<real_t> &g0 = g;
+    g0.FourierTransformBackward(false);
+
+    // temporaries
+    Grid_FFT<real_t> g1(g.n_, g.length_);
+    Grid_FFT<real_t> g2(g.n_, g.length_);
+    Grid_FFT<real_t> g3(g.n_, g.length_);
+    Grid_FFT<real_t> g4(g.n_, g.length_);
+
+    clear_panphasia_thread_states();
+    music::ilog.Print("PANPHASIA: running with %d threads", num_threads_);
+
+    ngrid_ = pcf_->get_value<size_t>("setup", "GridRes");
+
+    grid_p_ = pdescriptor_->i_base;
+    // grid_m_ = largest_power_two_lte(grid_p_);
+    if (ngrid_ % grid_p_ != 0)
+    {
+      music::elog << "Grid resolution " << ngrid_ << " is not divisible by PANPHASIA descriptor length " << grid_p_ << std::endl;
+      throw std::runtime_error("Chosen [setup] / GridRes is not compatible with PANPHASIA descriptor length!");
+    }
+
+    double t1 = get_wtime();
+    double tp = t1;
+
+#pragma omp parallel
+    {
+#ifdef _OPENMP
+      const int mythread = omp_get_thread_num();
+#else
+      const int mythread = 0;
+#endif
+
+      //int odd_x, odd_y, odd_z;
+      //int ng_level = ngrid_ * (1 << (level - levelmin_)); // full resolution of current level
+
+      int verbosity = (mythread == 0);
+      char descriptor[100];
+      std::memset(descriptor, 0, 100);
+      std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size());
+
+      start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity);
+
+      {
+        panphasia_descriptor d(descriptor_string_);
+
+        int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
+        int level_p = d.wn_level_base + lextra;
+        int ratio = 1 << lextra;
+
+        lstate[mythread].layer_min = 0;
+        lstate[mythread].layer_max = level_p;
+        lstate[mythread].indep_field = 1;
+
+        assert(ngrid_ == ratio * d.i_base);
+
+        long long ix_rel[3];
+        ix_rel[0] = 0; //ileft_corner_p[0];
+        ix_rel[1] = 0; //ileft_corner_p[1];
+        ix_rel[2] = 0; //ileft_corner_p[2];
+
+        set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2],
+                                   &verbosity);
+
+        music::ilog.Print(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0],
+                          ix_rel[1], ix_rel[2]);
+      }
+
+      if (verbosity)
+        t1 = get_wtime();
+
+      std::array<double, 9> cell_prop;
+      pan_state_ *ps = &lstate[mythread];
+
+#pragma omp for //nowait
+      for (size_t i = 0; i < g.size(0); i += 2)
+      {
+        for (size_t j = 0; j < g.size(1); j += 2)
+        {
+          for (size_t k = 0; k < g.size(2); k += 2)
+          {
+
+            // ARJ - added inner set of loops to speed up evaluation of Panphasia
+
+            for (int ix = 0; ix < 2; ++ix)
+            {
+              for (int iy = 0; iy < 2; ++iy)
+              {
+                for (int iz = 0; iz < 2; ++iz)
+                {
+                  int ilocal = i + ix;
+                  int jlocal = j + iy;
+                  int klocal = k + iz;
+
+                  int iglobal = ilocal + g.local_0_start_;
+                  int jglobal = jlocal;
+                  int kglobal = klocal;
+
+                  adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min,
+                                                 &ps->layer_max, &ps->indep_field, &cell_prop[0]);
+
+                  g0.relem(ilocal, jlocal, klocal) = cell_prop[0];
+                  g1.relem(ilocal, jlocal, klocal) = cell_prop[4];
+                  g2.relem(ilocal, jlocal, klocal) = cell_prop[2];
+                  g3.relem(ilocal, jlocal, klocal) = cell_prop[1];
+                  g4.relem(ilocal, jlocal, klocal) = cell_prop[8];
+                }
+              }
+            }
+          }
+        }
+      }
+
+      if (verbosity)
+      {
+        music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1,
+                          1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2));
+      }
+    } // end omp parallel region
+
+    g0.FourierTransformForward();
+    g1.FourierTransformForward();
+    g2.FourierTransformForward();
+    g3.FourierTransformForward();
+    g4.FourierTransformForward();
+
+#pragma omp parallel for
+    for (size_t i = 0; i < g0.size(0); i++)
+    {
+      for (size_t j = 0; j < g0.size(1); j++)
+      {
+        for (size_t k = 0; k < g0.size(2); k++)
+        {
+          if (!g0.is_nyquist_mode(i, j, k))
+          {
+            auto kvec = g0.get_k<real_t>(i, j, k);
+
+            auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0];
+            auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1];
+            auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2];
+
+            auto fx = sinc(argx);
+            auto gx = ccomplex_t(0.0, dsinc(argx));
+            auto fy = sinc(argy);
+            auto gy = ccomplex_t(0.0, dsinc(argy));
+            auto fz = sinc(argz);
+            auto gz = ccomplex_t(0.0, dsinc(argz));
+
+            auto temp = (fx + sqrt3 * gx) * (fy + sqrt3 * gy) * (fz + sqrt3 * gz);
+            auto magnitude = std::sqrt(1.0 - std::abs(temp * temp));
+
+            auto y0(g0.kelem(i, j, k)), y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k));
+
+            g0.kelem(i, j, k) = y0 * fx * fy * fz + sqrt3 * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) +
+                                y4 * magnitude;
+          }
+          else
+          {
+            g0.kelem(i, j, k) = 0.0;
+          }
+        }
+      }
+    }
+
+    music::ilog.Print("\033[31mtiming [build panphasia field]: %f s\033[0m", get_wtime() - tp);
+    tp = get_wtime();
+
+    g1.FourierTransformBackward(false);
+    g2.FourierTransformBackward(false);
+    g3.FourierTransformBackward(false);
+    g4.FourierTransformBackward(false);
+
+#pragma omp parallel
+    {
+#ifdef _OPENMP
+      const int mythread = omp_get_thread_num();
+#else
+      const int mythread = 0;
+#endif
+
+      // int odd_x, odd_y, odd_z;
+      int verbosity = (mythread == 0);
+      char descriptor[100];
+      std::memset(descriptor, 0, 100);
+      std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size());
+
+      start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity);
+
+      {
+        panphasia_descriptor d(descriptor_string_);
+
+        int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
+        int level_p = d.wn_level_base + lextra;
+        int ratio = 1 << lextra;
+
+        lstate[mythread].layer_min = 0;
+        lstate[mythread].layer_max = level_p;
+        lstate[mythread].indep_field = 1;
+
+        assert(ngrid_ == ratio * d.i_base);
+
+        long long ix_rel[3];
+        ix_rel[0] = 0; //ileft_corner_p[0];
+        ix_rel[1] = 0; //ileft_corner_p[1];
+        ix_rel[2] = 0; //ileft_corner_p[2];
+
+        set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2],
+                                   &verbosity);
+
+        music::ilog.Print(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0],
+                          ix_rel[1], ix_rel[2]);
+      }
+
+      if (verbosity)
+        t1 = get_wtime();
+
+      //***************************************************************
+      // Process Panphasia values: p110, p011, p101, p111
+      //****************************************************************
+      std::array<double,9> cell_prop;
+      pan_state_ *ps = &lstate[mythread];
+
+#pragma omp for //nowait
+      for (size_t i = 0; i < g1.size(0); i += 2)
+      {
+        for (size_t j = 0; j < g1.size(1); j += 2)
+        {
+          for (size_t k = 0; k < g1.size(2); k += 2)
+          {
+            // ARJ - added inner set of loops to speed up evaluation of Panphasia
+            for (int ix = 0; ix < 2; ++ix)
+            {
+              for (int iy = 0; iy < 2; ++iy)
+              {
+                for (int iz = 0; iz < 2; ++iz)
+                {
+                  int ilocal = i + ix;
+                  int jlocal = j + iy;
+                  int klocal = k + iz;
+
+                  int iglobal = ilocal + g.local_0_start_;
+                  int jglobal = jlocal;
+                  int kglobal = klocal;
+
+                  adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min,
+                                                 &ps->layer_max, &ps->indep_field, &cell_prop[0]);
+
+                  g1.relem(ilocal, jlocal, klocal) = cell_prop[6];
+                  g2.relem(ilocal, jlocal, klocal) = cell_prop[3];
+                  g3.relem(ilocal, jlocal, klocal) = cell_prop[5];
+                  g4.relem(ilocal, jlocal, klocal) = cell_prop[7];
+                }
+              }
+            }
+          }
+        }
+      }
+    } // end omp parallel region
+
+    music::ilog.Print("\033[31mtiming [adv_panphasia_cell_properties2]: %f s \033[0m", get_wtime() - tp);
+    tp = get_wtime();
+
+    /////////////////////////////////////////////////////////////////////////
+    // transform and convolve with Legendres
+    g1.FourierTransformForward();
+    g2.FourierTransformForward();
+    g3.FourierTransformForward();
+    g4.FourierTransformForward();
+
+    #pragma omp parallel for 
+    for (size_t i = 0; i < g1.size(0); i++)
+    {
+      for (size_t j = 0; j < g1.size(1); j++)
+      {
+        for (size_t k = 0; k < g1.size(2); k++)
+        {
+          if (!g1.is_nyquist_mode(i, j, k))
+          {
+            auto kvec = g1.get_k<real_t>(i, j, k);
+
+            auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0];
+            auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1];
+            auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2];
+
+            auto fx = sinc(argx);
+            auto gx = ccomplex_t(0.0, dsinc(argx));
+            auto fy = sinc(argy);
+            auto gy = ccomplex_t(0.0, dsinc(argy));
+            auto fz = sinc(argz);
+            auto gz = ccomplex_t(0.0, dsinc(argz));
+
+            auto y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k));
+
+            g0.kelem(i, j, k) += 3.0 * (y1 * gx * gy * fz + y2 * fx * gy * gz + y3 * gx * fy * gz) + sqrt27 * y4 * gx * gy * gz;
+          }
+        }
+      }
+    }
+
+    music::ilog.Print("\033[31mtiming [build panphasia field2]: %f s\033[0m", get_wtime() - tp);
+    // tp = get_wtime();
+
+    music::ilog.Print("PANPHASIA k-space statistices: mean Re = %f, std = %f", g0.mean(), g0.std());
+  }
+};
+
+namespace
+{
+  RNG_plugin_creator_concrete<RNG_panphasia> creator("PANPHASIA");
+}
+#endif // defined(USE_PANPHASIA)
\ No newline at end of file

From 997b934f032132d2279b80fe7c7d255b71eaae5f Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 2 May 2020 15:24:37 +0200
Subject: [PATCH 118/130] less screen output in panphasia plugin

---
 src/plugins/random_panphasia.cc | 35 ++++++++++++++-------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/plugins/random_panphasia.cc b/src/plugins/random_panphasia.cc
index d21dd7f..632d7fc 100644
--- a/src/plugins/random_panphasia.cc
+++ b/src/plugins/random_panphasia.cc
@@ -242,7 +242,7 @@ public:
     }
 
     double t1 = get_wtime();
-    double tp = t1;
+    // double tp = t1;
 
 #pragma omp parallel
     {
@@ -267,7 +267,7 @@ public:
 
         int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
         int level_p = d.wn_level_base + lextra;
-        int ratio = 1 << lextra;
+        // int ratio = 1 << lextra;
 
         lstate[mythread].layer_min = 0;
         lstate[mythread].layer_max = level_p;
@@ -282,9 +282,6 @@ public:
 
         set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2],
                                    &verbosity);
-
-        music::ilog.Print(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0],
-                          ix_rel[1], ix_rel[2]);
       }
 
       if (verbosity)
@@ -332,11 +329,11 @@ public:
         }
       }
 
-      if (verbosity)
-      {
-        music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1,
-                          1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2));
-      }
+      // if (verbosity)
+      // {
+      //   music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1,
+      //                     1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2));
+      // }
     } // end omp parallel region
 
     g0.FourierTransformForward();
@@ -383,8 +380,8 @@ public:
       }
     }
 
-    music::ilog.Print("\033[31mtiming [build panphasia field]: %f s\033[0m", get_wtime() - tp);
-    tp = get_wtime();
+    // music::ilog.Print("\033[31mtiming [build panphasia field]: %f s\033[0m", get_wtime() - tp);
+    // tp = get_wtime();
 
     g1.FourierTransformBackward(false);
     g2.FourierTransformBackward(false);
@@ -412,7 +409,7 @@ public:
 
         int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
         int level_p = d.wn_level_base + lextra;
-        int ratio = 1 << lextra;
+        // int ratio = 1 << lextra;
 
         lstate[mythread].layer_min = 0;
         lstate[mythread].layer_max = level_p;
@@ -427,9 +424,6 @@ public:
 
         set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2],
                                    &verbosity);
-
-        music::ilog.Print(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0],
-                          ix_rel[1], ix_rel[2]);
       }
 
       if (verbosity)
@@ -478,8 +472,8 @@ public:
       }
     } // end omp parallel region
 
-    music::ilog.Print("\033[31mtiming [adv_panphasia_cell_properties2]: %f s \033[0m", get_wtime() - tp);
-    tp = get_wtime();
+    // music::ilog.Print("\033[31mtiming [adv_panphasia_cell_properties2]: %f s \033[0m", get_wtime() - tp);
+    // tp = get_wtime();
 
     /////////////////////////////////////////////////////////////////////////
     // transform and convolve with Legendres
@@ -518,9 +512,10 @@ public:
       }
     }
 
-    music::ilog.Print("\033[31mtiming [build panphasia field2]: %f s\033[0m", get_wtime() - tp);
+    // music::ilog.Print("\033[31mtiming [build panphasia field2]: %f s\033[0m", get_wtime() - tp);
     // tp = get_wtime();
-
+    music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1,
+                          1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2));
     music::ilog.Print("PANPHASIA k-space statistices: mean Re = %f, std = %f", g0.mean(), g0.std());
   }
 };

From e54db0223cc3e23294debcf76329a1adb709d4e1 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 2 May 2020 15:28:29 +0200
Subject: [PATCH 119/130] updated gitignore

---
 .gitignore | 63 +++++++++---------------------------------------------
 1 file changed, 10 insertions(+), 53 deletions(-)

diff --git a/.gitignore b/.gitignore
index b012d08..bcbdff2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,57 +1,14 @@
-build
+.DS_Store
 .vscode
-src/CMakeFiles/3.12.2/CompilerIdC/CMakeCCompilerId.c
-src/CMakeFiles/feature_tests.c
-src/CMakeFiles/feature_tests.cxx
-src/CMakeFiles/progress.marks
-src/CMakeFiles/3.12.2/CMakeCCompiler.cmake
-src/CMakeFiles/3.12.2/CMakeCXXCompiler.cmake
-src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_C.bin
-src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_CXX.bin
-src/CMakeFiles/3.12.2/CMakeSystem.cmake
-src/CMakeFiles/fastLPT.dir/build.make
-src/CMakeFiles/FindMPI/test_mpi.cpp
-src/CMakeFiles/FindMPI/test_mpi_C.bin
-src/CMakeFiles/FindMPI/test_mpi_CXX.bin
-src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.c
-src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.cpp
-src/CMakeFiles/FindOpenMP/OpenMPTryFlag.c
-src/CMakeFiles/FindOpenMP/OpenMPTryFlag.cpp
-src/CMakeFiles/FindOpenMP/ompver_C.bin
-src/CMakeFiles/FindOpenMP/ompver_CXX.bin
-src/CMakeFiles/fastLPT.dir/CXX.includecache
-src/CMakeFiles/fastLPT.dir/DependInfo.cmake
-src/CMakeFiles/fastLPT.dir/plugins/transfer_eisenstein.cc.o
-src/CMakeFiles/3.12.2/CompilerIdCXX/a.out
-src/CMakeFiles/fastLPT.dir/cmake_clean.cmake
-src/CMakeFiles/fastLPT.dir/depend.internal
-src/CMakeFiles/fastLPT.dir/depend.make
-src/CMakeFiles/fastLPT.dir/flags.make
-src/CMakeFiles/fastLPT.dir/grid_fft.cc.o
-src/CMakeFiles/fastLPT.dir/link.txt
-src/CMakeFiles/fastLPT.dir/logger.cc.o
-src/CMakeFiles/fastLPT.dir/main.cc.o
-src/CMakeFiles/fastLPT.dir/progress.make
-src/CMakeFiles/fastLPT.dir/random_plugin.cc.o
-src/CMakeFiles/fastLPT.dir/transfer_function_plugin.cc.o
-src/CMakeFiles/fastLPT.dir/plugins/random_music.cc.o
-src/CMakeFiles/fastLPT.dir/plugins/random_music_wnoise_generator.cc.o
-src/CMakeFiles/feature_tests.bin
-src/CMakeFiles/CMakeDirectoryInformation.cmake
-src/CMakeFiles/CMakeOutput.log
-src/CMakeFiles/Makefile.cmake
-src/CMakeFiles/Makefile2
-src/CMakeFiles/TargetDirectories.txt
-src/CMakeFiles/cmake.check_cache
-src/CMakeFiles/3.12.2/CompilerIdC/a.out
-src/CMakeFiles/3.12.2/CompilerIdCXX/CMakeCXXCompilerId.cpp
-src/CMakeFiles/hdf5/cmake_hdf5_test.c
-src/fastLPT.dSYM/Contents/Info.plist
-src/fastLPT.dSYM/Contents/Resources/DWARF/fastLPT
+build
+include/cmake_config.hh
+src/input_powerspec.txt
+CMakeCache.txt
+CMakeFiles/cmake.check_cache
+src/CMakeFiles
 src/cmake_install.cmake
 src/CMakeCache.txt
-src/fastLPT
-src/input_powerspec.txt
 src/Makefile
-.DS_Store
-include/cmake_config.hh
+external/panphasia/rand_base.mod
+external/panphasia/rand_int.mod
+external/panphasia/rand.mod
\ No newline at end of file

From b534c7ff35f295c9d43cef6b1e4736b2d0bae42b Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sat, 2 May 2020 21:02:24 +0200
Subject: [PATCH 120/130] fixed normalisation of CLASS when using ztarget and
 zstart

---
 include/cosmology_calculator.hh | 15 +++++-------
 src/plugins/transfer_CLASS.cc   | 41 ++++++++++++++++++++++-----------
 2 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh
index bedc653..6f1fd7f 100644
--- a/include/cosmology_calculator.hh
+++ b/include/cosmology_calculator.hh
@@ -37,7 +37,7 @@ public:
 private:
     static constexpr double REL_PRECISION = 1e-10;
     interpolated_function_1d<true,true,false> D_of_a_, f_of_a_, a_of_D_;
-    double Dnow_, Dplus_start_, astart_;
+    double Dnow_, Dplus_start_, Dplus_target_, astart_, atarget_;
 
     real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const
     {
@@ -132,7 +132,8 @@ public:
 	 */
 
     explicit calculator(config_file &cf)
-        : cosmo_param_(cf), astart_( 1.0/(1.0+cf.get_value<double>("setup","zstart")) )
+        : cosmo_param_(cf), astart_( 1.0/(1.0+cf.get_value<double>("setup","zstart")) ),
+            atarget_( 1.0/(1.0+cf.get_value_safe<double>("cosmology","ztarget",1./astart_-1.)))
     {
         // pre-compute growth factors and store for interpolation
         std::vector<double> tab_a, tab_D, tab_f;
@@ -143,6 +144,7 @@ public:
         Dnow_ = D_of_a_(1.0);
 
         Dplus_start_ = D_of_a_( astart_ ) / Dnow_;
+        Dplus_target_ = D_of_a_( atarget_ ) / Dnow_;
 
         // set up transfer functions and compute normalisation
         transfer_function_ = std::move(select_TransferFunction_plugin(cf));
@@ -150,7 +152,7 @@ public:
         if( !transfer_function_->tf_isnormalised_ )
             cosmo_param_.pnorm = this->compute_pnorm_from_sigma8();
         else{
-            cosmo_param_.pnorm = 1.0;
+            cosmo_param_.pnorm = 1.0/Dplus_target_/Dplus_target_;
             auto sigma8 = this->compute_sigma8();
             music::ilog << "Measured sigma_8 for given PS normalisation is " <<  sigma8 << std::endl;
         }
@@ -160,9 +162,6 @@ public:
                     << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl;
         music::ilog << std::setw(32) << std::left << "TF maximum wave number"
                     << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl;
-
-        // music::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.get_value<double>("setup","zstart")) ) << std::endl;
-        // music::ilog << "pnrom     = " << cosmo_param_.pnorm << std::endl;
     }
 
     ~calculator()
@@ -306,9 +305,7 @@ public:
 	 */
     inline real_t get_amplitude(real_t k, tf_type type) const
     {
-        // if the transfer function doesn't need backscaling, then divide out growth factor
-        real_t f = transfer_function_->tf_isnormalised_? 1.0/Dplus_start_ : 1.0;
-        return f * std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm;
+        return std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm;
     }
 
     //! Computes the normalization for the power spectrum
diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc
index bcf85df..a842736 100644
--- a/src/plugins/transfer_CLASS.cc
+++ b/src/plugins/transfer_CLASS.cc
@@ -30,7 +30,7 @@ private:
   // gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_;
   // std::vector<double> tab_Cplus_, tab_Cminus_;
 
-  double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_, A_s_, n_s_, Tcmb_, tnorm_;
+  double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_, A_s_, n_s_, sigma8_, Tcmb_, tnorm_;
 
   ClassParams pars_;
   std::unique_ptr<ClassEngine> the_ClassEngine_;
@@ -89,7 +89,11 @@ private:
     //--- cosmological parameters, primordial -------------------------
     add_class_parameter("P_k_ini type", "analytic_Pk");
 
-    add_class_parameter("A_s", A_s_);
+    if( A_s_ > 0.0 ){
+      add_class_parameter("A_s", A_s_);
+    }else{
+      add_class_parameter("sigma8", sigma8_);
+    }
     add_class_parameter("n_s", n_s_);
     add_class_parameter("alpha_s", 0.0);
     add_class_parameter("T_cmb", Tcmb_);
@@ -173,6 +177,8 @@ public:
   explicit transfer_CLASS_plugin(config_file &cf)
       : TransferFunction_plugin(cf)
   {
+    this->tf_isnormalised_ = true;
+
     ofs_class_input_.open("input_class_parameters.ini", std::ios::trunc);
 
     h_ = pcf_->get_value<double>("cosmology", "H0") / 100.0;
@@ -183,28 +189,35 @@ public:
     atarget_ = 1.0 / (1.0 + ztarget_);
     zstart_ = pcf_->get_value<double>("setup", "zstart");
     astart_ = 1.0 / (1.0 + zstart_);
-    double lbox = pcf_->get_value<double>("setup", "BoxLength");
-    int nres = pcf_->get_value<double>("setup", "GridRes");
     A_s_ = pcf_->get_value_safe<double>("cosmology", "A_s", -1.0);
-    double k_p = pcf_->get_value_safe<double>("cosmology", "k_p", 0.05);
     n_s_ = pcf_->get_value<double>("cosmology", "nspec");
     Tcmb_ = cf.get_value_safe<double>("cosmology", "Tcmb", 2.7255);
 
-    tnorm_ = 1.0;
-
-    if (A_s_ > 0)
-    {
-      this->tf_isnormalised_ = true;
-      tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p * h_, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0));
-      music::ilog << "Using A_s to normalise the transfer function!" << std::endl;
+    if (A_s_ > 0) {
+      music::ilog << "CLASS: Using A_s=" << A_s_<< " to normalise the transfer function." << std::endl;
+    }else{
+      sigma8_ = pcf_->get_value_safe<double>("cosmology", "sigma_8", -1.0);
+      if( sigma8_ < 0 ){
+        throw std::runtime_error("Need to specify either A_s or sigma_8 for CLASS plugin...");
+      }
+      music::ilog << "CLASS: Using sigma8_ =" << sigma8_<< " to normalise the transfer function." << std::endl;
     }
 
+    // determine highest k we will need for the resolution selected
+    double lbox = pcf_->get_value<double>("setup", "BoxLength");
+    int nres = pcf_->get_value<double>("setup", "GridRes");
     kmax_ = std::max(20.0, 2.0 * M_PI / lbox * nres / 2 * sqrt(3) * 2.0); // 120% of spatial diagonal, or k=10h Mpc-1
 
+    // initialise CLASS and get the normalisation
     this->init_ClassEngine();
+    A_s_ = the_ClassEngine_->get_A_s(); // this either the input one, or the one computed from sigma8
+    
+    // compute the normalisation to interface with MUSIC
+    double k_p = pcf_->get_value_safe<double>("cosmology", "k_p", 0.05);
+    tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p * h_, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0));
 
+    // compute the transfer function at z=0 using CLASS engine
     std::vector<double> k, dc, tc, db, tb, dn, tn, dm, tm;
-
     this->run_ClassEngine(0.0, k, dc, tc, db, tb, dn, tn, dm, tm);
 
     delta_c0_.set_data(k, dc);
@@ -216,8 +229,8 @@ public:
     delta_m0_.set_data(k, dm);
     theta_m0_.set_data(k, tm);
 
+     // compute the transfer function at z=z_target using CLASS engine
     this->run_ClassEngine(ztarget_, k, dc, tc, db, tb, dn, tn, dm, tm);
-
     delta_c_.set_data(k, dc);
     theta_c_.set_data(k, tc);
     delta_b_.set_data(k, db);

From 1313905660d40c873145776b97dacc789ae40217 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 3 May 2020 00:14:47 +0200
Subject: [PATCH 121/130] working commit

---
 include/grid_fft.hh           |   8 +-
 include/grid_interpolate.hh   | 205 ++++++++++++++++++++++++++++++++++
 include/particle_generator.hh |  55 +++++----
 src/ic_generator.cc           |   2 +
 4 files changed, 244 insertions(+), 26 deletions(-)
 create mode 100644 include/grid_interpolate.hh

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 2d49f7f..c11d1b3 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -17,12 +17,16 @@ enum space_t
 
 
 #ifdef USE_MPI
-template <typename data_t, bool bdistributed=true>
+template <typename data_t_, bool bdistributed=true>
 #else
-template <typename data_t, bool bdistributed=false>
+template <typename data_t_, bool bdistributed=false>
 #endif
 class Grid_FFT
 {
+public:
+    using data_t = data_t_;
+    static constexpr bool is_distributed_trait{bdistributed};
+
 protected:
 #if defined(USE_MPI)
     const MPI_Datatype MPI_data_t_type = 
diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh
new file mode 100644
index 0000000..b99c958
--- /dev/null
+++ b/include/grid_interpolate.hh
@@ -0,0 +1,205 @@
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include <general.hh>
+
+template <int interp_order, typename grid_t>
+struct grid_interpolate
+{
+  using data_t = typename grid_t::data_t;
+  using vec3 = std::array<real_t, 3>;
+
+  static constexpr bool is_distributed_trait = grid_t::is_distributed_trait;
+  static constexpr int interpolation_order = interp_order;
+
+  size_t nx_, ny_, nz_;
+
+#if defined(USE_MPI)
+  const MPI_Datatype MPI_data_t_type =
+      (typeid(data_t) == typeid(float)) ? MPI_FLOAT
+                                        : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
+                                                                             : (typeid(data_t) == typeid(long double)) ? MPI_LONG_DOUBLE
+                                                                                                                       : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_C_FLOAT_COMPLEX
+                                                                                                                                                                         : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_C_DOUBLE_COMPLEX
+                                                                                                                                                                                                                            : (typeid(data_t) == typeid(std::complex<long double>)) ? MPI_C_LONG_DOUBLE_COMPLEX
+                                                                                                                                                                                                                                                                                    : MPI_INT;
+#endif
+
+  std::vector<data_t> boundary_;
+  const grid_t &gridref;
+
+  explicit grid_interpolate(const grid_t &g)
+      : gridref(g), nx_(g.n_[0]), ny_(g.n_[1]), nz_(g.n_[2])
+  {
+    static_assert(interpolation_order >= 0 && interpolation_order <= 2, "Interpolation order needs to be 0 (NGP), 1 (CIC), or 2 (TSC).");
+
+    if (is_distributed_trait)
+    {
+#if defined(USE_MPI)
+      size_t nx = interpolation_order + 1;
+      size_t ny = g.n_[1];
+      size_t nz = g.n_[2];
+
+      boundary_.assign(nx * ny * nz, data_t{0.0});
+
+      for (size_t i = 0; i < nx; ++i)
+      {
+        for (size_t j = 0; j < ny; ++j)
+        {
+          for (size_t k = 0; k < nx; ++k)
+          {
+            boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k);
+          }
+        }
+      }
+
+      int sendto = (MPI::get_rank() + MPI::get_size() - 1) % MPI::get_size();
+      int recvfrom = (MPI::get_rank() + MPI::get_size() + 1) % MPI::get_size();
+
+      MPI_Status status;
+      status.MPI_ERROR = MPI_SUCCESS;
+
+      MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype<data_t>(), sendto,
+                           MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status);
+
+      assert(status.MPI_ERROR == MPI_SUCCESS);
+#endif
+    }
+  }
+
+  data_t get_ngp_at(const std::array<real_t, 3> &pos, std::vector<data_t> &val) const noexcept
+  {
+    size_t ix = static_cast<size_t>(pos[0]);
+    size_t iy = static_cast<size_t>(pos[1]);
+    size_t iz = static_cast<size_t>(pos[2]);
+    return gridref.relem(ix - gridref.local_0_start_, iy, iz);
+  }
+
+  data_t get_cic_at(const std::array<real_t, 3> &pos) const noexcept
+  {
+    size_t ix = static_cast<size_t>(pos[0]);
+    size_t iy = static_cast<size_t>(pos[1]);
+    size_t iz = static_cast<size_t>(pos[2]);
+    real_t dx = pos[0] - real_t(ix), tx = 1.0 - dx;
+    real_t dy = pos[1] - real_t(iy), ty = 1.0 - dy;
+    real_t dz = pos[2] - real_t(iz), tz = 1.0 - dz;
+    size_t iy1 = (iy + 1) % ny_;
+    size_t iz1 = (iz + 1) % nz_;
+
+    data_t val{0.0};
+    
+    if( is_distributed_trait ){
+      size_t localix = ix-gridref.local_0_start_;
+      val += this->relem(localix, iy, iz) * tx * ty * tz;
+      val += this->relem(localix, iy, iz1) * tx * ty * dz;
+      val += this->relem(localix, iy1, iz) * tx * dy * tz;
+      val += this->relem(localix, iy1, iz1) * tx * dy * dz;
+
+      if( localix+1 >= gridref.local_0_size_ ){
+        size_t localix1 = localix+1 - gridref.local_0_size_;
+        val += boundary_[(localix1*ny_+iy)*nz_+iz] * dx * ty * tz;
+        val += boundary_[(localix1*ny_+iy)*nz_+iz1] * dx * ty * dz;
+        val += boundary_[(localix1*ny_+iy1)*nz_+iz] * dx * dy * tz;
+        val += boundary_[(localix1*ny_+iy1)*nz_+iz1] * dx * dy * dz;
+      }else{
+        size_t localix1 = localix+1;
+        val += this->relem(localix1, iy, iz) * dx * ty * tz;
+        val += this->relem(localix1, iy, iz1) * dx * ty * dz;
+        val += this->relem(localix1, iy1, iz) * dx * dy * tz;
+        val += this->relem(localix1, iy1, iz1) * dx * dy * dz;
+      }
+    }else{
+      size_t ix1 = (ix + 1) % nx_;
+      val += this->relem(ix, iy, iz) * tx * ty * tz;
+      val += this->relem(ix, iy, iz1) * tx * ty * dz;
+      val += this->relem(ix, iy1, iz) * tx * dy * tz;
+      val += this->relem(ix, iy1, iz1) * tx * dy * dz;
+      val += this->relem(ix1, iy, iz) * dx * ty * tz;
+      val += this->relem(ix1, iy, iz1) * dx * ty * dz;
+      val += this->relem(ix1, iy1, iz) * dx * dy * tz;
+      val += this->relem(ix1, iy1, iz1) * dx * dy * dz;
+    }
+
+    return val;
+  }
+
+  // data_t get_tsc_at(const std::array<real_t, 3> &pos, std::vector<data_t> &val) const
+  // {
+  // }
+
+  int get_task(const vec3 &x, const std::vector<int> &local0starts) const noexcept
+  {
+    auto it = std::lower_bound(local0starts.begin(), local0starts.end(), int(x[0]));
+    return std::distance(local0starts.begin(), it) - 1;
+  }
+
+  void domain_decompose_pos(std::vector<vec3> &pos) const noexcept
+  {
+    if (is_distributed_trait)
+    {
+#if defined(USE_MPI)
+      int local_0_start = int(gridref.local_0_start_);
+      std::vector<int> local0starts(MPI::get_size(), 0);
+      MPI_Alltoall(&local_0_start, 1, MPI_INT, &local0starts[0], 1, MPI_INT, MPI_COMM_WORLD);
+
+      std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1) < get_task(x2); });
+      std::vector<int> sendcounts(MPI::get_size(), 0), sendoffsets(MPI::get_size(), 0);
+      std::vector<int> recvcounts(MPI::get_size(), 0), recvoffsets(MPI::get_size(), 0);
+      for (auto x : pos)
+      {
+        sendcounts[get_task(x)] += 3;
+      }
+
+      // int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
+      MPI_Alltoall(&sendcounts[0], 1, MPI_INT, &recvcounts[0], 1, MPI_INT, MPI_COMM_WORLD);
+
+      for (int i = 1; i < MPI::get_size(); ++i)
+      {
+        sendoffsets[i] = sendcounts[i - 1] + sendoffsets[i - 1];
+        recvoffsets[i] = recvcounts[i - 1] + recvoffsets[i - 1];
+      }
+
+      // int MPI_Alltoallv(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf,
+      // const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm)
+
+      MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI_data_t_type,
+                    &pos[0], &recvcounts[0], &recvoffsets[0], MPI_data_t_type, MPI_COMM_WORLD);
+#endif
+    }
+  }
+
+  ccomplex_t compensation_kernel( vec3 k ) const noexcept
+  {
+    auto sinc = []( real_t x ){ (std::abs(x)>1e-10)? std::sin(x)/x : 1.0; };
+    real_t dfx = sinc(0.5*M_PI*k[0]/gridref.kny_[0]);
+    real_t dfy = sinc(0.5*M_PI*k[1]/gridref.kny_[1]);
+    real_t dfz = sinc(0.5*M_PI*k[2]/gridref.kny_[2]);
+    real_t del = std::pow(dfx*dfy*dfz,1+interpolation_order);
+    return ccomplex_t(1.0) / del;
+  }
+
+  void get_at(std::vector<vec3> &pos, std::vector<data_t> &val) const
+  {
+
+    val.assign( pos.size(), data_t{0.0} );
+
+    for( size_t i=0; i<pos.size(); ++i ){
+      const vec3& x = pos[i];
+
+      switch (interpolation_order)
+      {
+      case 0:
+        val[i] = get_ngp_at(x);
+        break;
+      case 1:
+        val[i] = get_cic_at(x);
+        break;
+      // case 2:
+      //   val[i] = get_tsc_at(x);
+      //   break;
+      };
+    }
+  }
+};
\ No newline at end of file
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index de6c912..3eb9e9b 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -8,10 +8,12 @@
 #pragma once
 
 #include <math/vec3.hh>
+#include <grid_interpolate.hh>
 
 namespace particle {
 
 enum lattice{
+    lattice_glass = -1,
     lattice_sc  = 0, // SC : simple cubic
     lattice_bcc = 1, // BCC: body-centered cubic
     lattice_fcc = 2, // FCC: face-centered cubic
@@ -37,11 +39,11 @@ const std::vector<vec3_t<real_t>> second_lattice_shift =
 };
 
 template<typename field_t>
-void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field ){
+void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field, size_t num_p = 0 ){
     // number of modes present in the field
-    const size_t num_p_in_load = field.local_size();
+    const size_t num_p_in_load = (lattice_type>=0)? field.local_size() : num_p;
     // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load):
-    const size_t overload = 1ull<<lattice_type; // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
+    const size_t overload = 1ull<<std::max<int>(0,lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
     // allocate memory for all local particles
     particles.allocate( overload * num_p_in_load, b64reals, b64ids );
     // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well
@@ -64,32 +66,37 @@ void initialize_lattice( container& particles, lattice lattice_type, const bool
 template<typename field_t>
 void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field )
 {
-    const size_t num_p_in_load = field.local_size();
-    for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
-        // if we are dealing with the secondary lattice, apply a global shift
-        if( ishift==0 && is_second_lattice ){
-            field.shift_field( second_lattice_shift[lattice_type] );
-        }
+    // works only for Bravais types
+    if( lattice_type >= 0 ){
+        const size_t num_p_in_load = field.local_size();
+        for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
+            // if we are dealing with the secondary lattice, apply a global shift
+            if( ishift==0 && is_second_lattice ){
+                field.shift_field( second_lattice_shift[lattice_type] );
+            }
 
-        // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
-        if( ishift>0 ){
-            field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] );
-        }
-        // read out values from phase shifted field and set assoc. particle's value
-        const auto ipcount0 = ishift * num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,lattice_shifts[lattice_type][ishift] 
-                        + (is_second_lattice? second_lattice_shift[lattice_type] : vec3_t<real_t>{0.,0.,0.}) );
-                    if( b64reals ){
-                        particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                    }else{
-                        particles.set_pos32( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
+            // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
+            if( ishift>0 ){
+                field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] );
+            }
+            // read out values from phase shifted field and set assoc. particle's value
+            const auto ipcount0 = ishift * num_p_in_load;
+            for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
+                for( size_t j=0; j<field.size(1); ++j){
+                    for( size_t k=0; k<field.size(2); ++k){
+                        auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,lattice_shifts[lattice_type][ishift] 
+                            + (is_second_lattice? second_lattice_shift[lattice_type] : vec3_t<real_t>{0.,0.,0.}) );
+                        if( b64reals ){
+                            particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
+                        }else{
+                            particles.set_pos32( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
+                        }
                     }
                 }
             }
         }
+    }else{
+
     }
 }
 
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 6185af0..9e41b59 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -549,6 +549,8 @@ int Run( config_file& the_config )
                 // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits
                 size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ;
 
+                grid_interpolate<1,Grid_FFT<real_t>> interp( tmp );
+
                 // if output plugin wants particles, then we need to store them, along with their IDs
                 if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                 {

From dd4688953c78e8ba78a9508f1c9674912d370f9f Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 3 May 2020 04:20:12 +0200
Subject: [PATCH 122/130] working commit, glass

---
 include/HDF_IO.hh               | 1086 +++++++++++++++++++++++++++++++
 include/grid_interpolate.hh     |   61 +-
 include/particle_generator.hh   |  202 ++++--
 src/ic_generator.cc             |   17 +-
 src/plugins/random_panphasia.cc |   15 +-
 5 files changed, 1304 insertions(+), 77 deletions(-)
 create mode 100755 include/HDF_IO.hh

diff --git a/include/HDF_IO.hh b/include/HDF_IO.hh
new file mode 100755
index 0000000..53b3f92
--- /dev/null
+++ b/include/HDF_IO.hh
@@ -0,0 +1,1086 @@
+#pragma once
+#if defined(USE_HDF5)
+
+#define H5_USE_16_API
+
+/*
+    HDF_IO.hh  --  templated C++ HDF5 front-end functions, v1.2b
+      
+    Copyright (C) 2006-7  Oliver Hahn  --  ojha@gmx.de
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <typeinfo>
+#include <vector>
+#include <cstdlib>
+#include "hdf5.h"
+
+template<typename T>
+hid_t GetDataType( void )
+{
+  if( typeid(T) == typeid(int) )
+    return H5T_NATIVE_INT;
+
+  if( typeid(T) == typeid(unsigned) )
+    return H5T_NATIVE_UINT;
+
+  if( typeid(T) == typeid(float) )
+    return H5T_NATIVE_FLOAT;
+
+  if( typeid(T) == typeid(double) )
+    return H5T_NATIVE_DOUBLE;
+  
+	if( typeid(T) == typeid(long long) )
+		return H5T_NATIVE_LLONG;
+	
+	if( typeid(T) == typeid(unsigned long long) )
+		return H5T_NATIVE_ULLONG;
+	
+	if( typeid(T) == typeid(size_t) )
+		return H5T_NATIVE_ULLONG;
+	
+  
+  std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n";
+  return -1;
+}
+
+#include <stdexcept>
+ 
+class HDFException : public std::runtime_error {
+	public:
+	 HDFException( const std::string &errtxt ) : std::runtime_error(errtxt) { }
+};
+
+
+inline bool DoesFileExist( std::string Filename ){
+        bool flag = false;
+        std::fstream fin(Filename.c_str(),std::ios::in|std::ios::binary);
+        if( fin.is_open() )
+                flag=true;
+        fin.close();
+        return flag;
+}
+
+inline void AssertFileOpen( std::string Filename )
+{
+  if( !DoesFileExist( Filename ) ){
+    std::fstream fout( Filename.c_str(), std::ios::out|std::ios::binary);
+    fout.close();
+  }
+}
+
+inline void HDFCreateFile( std::string Filename )
+{
+  hid_t HDF_FileID;
+  HDF_FileID = H5Fcreate( Filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT );
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T>
+inline void HDFReadVector( const std::string Filename, const std::string ObjName, std::vector<T> &Data )
+{
+  HDFReadDataset( Filename, ObjName, Data );
+}
+
+
+
+
+inline void HDFGetDatasetExtent( const std::string Filename, const std::string ObjName, std::vector<int> &Extent )
+{
+  hid_t HDF_FileID, HDF_DatasetID, HDF_DataspaceID;
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( HDF_DatasetID < 0 ){
+	  std::stringstream ss;
+	  ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+	  H5Fclose( HDF_FileID );
+	  throw HDFException(ss.str());
+	  return;
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
+
+  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
+  
+  hsize_t *dimsize = new hsize_t[ndims];
+
+  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+
+  Extent.clear();
+  for(int i=0; i<ndims; ++i )
+    Extent.push_back( dimsize[i] );
+
+  delete[] dimsize;
+	
+  H5Sclose( HDF_DataspaceID );
+  H5Dclose( HDF_DatasetID );
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFReadDataset( const std::string Filename, const std::string ObjName, std::vector<T> &Data )
+{
+ 
+  hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID;
+  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( HDF_DatasetID < 0 ){
+	  std::stringstream ss;
+	  ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+	  Data.clear();
+	  H5Fclose( HDF_FileID );
+	  throw HDFException(ss.str());
+	  return;
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
+
+  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
+  
+  hsize_t dimsize[ndims];
+
+  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+
+  HDF_StorageSize = 1;
+  for(int i=0; i<ndims; ++i )
+    HDF_StorageSize *= dimsize[i];
+
+  //... adjust the array size to hold the data
+  Data.clear();
+  Data.reserve( HDF_StorageSize );
+  Data.assign( HDF_StorageSize, (T)1 );
+
+  if( Data.capacity() < HDF_StorageSize ){
+    std::cerr << " - Error: not enough memory to store all data in HDFReadDataset!\n\n";
+    abort();
+  }
+
+  //... read the dataset
+  H5Dread( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, &Data[0] );
+
+  if( Data.size() != HDF_StorageSize ){
+    std::cerr << " - Error: something went wrong while reading!\n\n";
+    abort();
+  }
+
+  H5Sclose( HDF_DataspaceID );
+  H5Dclose( HDF_DatasetID );
+  H5Fclose( HDF_FileID );
+}
+
+template<typename T >
+inline void HDFReadSelect( const std::string Filename, const std::string ObjName, const std::vector<unsigned>& ii, std::vector<T> &Data ){
+
+  hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID;
+  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( HDF_DatasetID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    Data.clear();
+    H5Fclose( HDF_FileID );
+    throw HDFException(ss.str());
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
+ 
+  hsize_t block[2];
+  block[0]  = ii.size();
+  block[1]  = 1;
+
+
+  Data.clear();
+  Data.reserve( block[0]*block[1] );
+  Data.assign( block[0]*block[1], (T)1 );
+  
+  HDF_MemspaceID = H5Screate_simple( 2, block, NULL );
+  //  H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block );
+  H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, ii.size(), (const hsize_t *)&ii[0] );
+
+  H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] );
+  
+  H5Sclose( HDF_DataspaceID );
+  H5Sclose( HDF_MemspaceID );
+  H5Dclose( HDF_DatasetID );
+  H5Fclose( HDF_FileID );
+
+}
+
+template<typename T >
+inline void HDFReadVectorSelect( const std::string Filename, const std::string ObjName, const std::vector<unsigned>& ii, std::vector<T> &Data ){
+
+  hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID;
+//  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( HDF_DatasetID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    Data.clear();
+    H5Fclose( HDF_FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
+  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
+  hsize_t dimsize[ndims];
+  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+
+  hsize_t block[2];
+  block[0]  = ii.size();
+  block[1]  = 3;
+  
+  std::vector<hsize_t> coord;
+  for( unsigned i=0; i<ii.size(); ++i )
+    for( unsigned k=0; k<3; ++k ){
+      coord.push_back(ii[i]);
+      coord.push_back(k);
+    }
+  //std::vector<unsigned>().swap(ii);
+  
+
+  
+
+  if( ii.size() == 0 ){
+    std::cerr << "attempted to read empty block. skipping....\n"; 
+    return;
+  }
+  //std::cerr << "starting 2 read...\n";
+  H5Sselect_none( HDF_DataspaceID );
+  if( H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, coord.size()/2, (const hsize_t *)&coord[0] ) < 0 )//(const hsize_t**)&coord[0] ) < 0 )
+    std::cerr << " - could not select elements properly\n";
+
+  if(H5Sselect_valid( HDF_DataspaceID )<=0 ){
+    std::cerr << "\n - sorry, invalid element selection in file \'"<< Filename.c_str() << "\'. \n - dumping 10 first indices...\n";
+
+    /*for( unsigned i=0; i<10; ++i ){
+      for( unsigned k=0; k<3; ++k ){
+        std::cerr << coord[3*i+k] << " ";
+      }
+      std::cerr << "\n";
+    }*/
+
+    return;
+  }
+
+  std::vector<hsize_t>().swap(coord);
+  Data.assign( block[0]*block[1], (T)0 );
+  HDF_MemspaceID = H5Screate_simple( 2, &block[0], NULL );
+
+  H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] );
+  
+
+  H5Sclose( HDF_DataspaceID );
+  H5Sclose( HDF_MemspaceID );
+  H5Dclose( HDF_DatasetID );
+  H5Fclose( HDF_FileID );
+
+}
+
+template< typename T >
+inline void HDFReadVectorSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector<T> &Data )
+{
+  hsize_t 
+      offset[2],
+      stride[2],
+      count[2],
+      block[2];
+    
+  hid_t MemspaceID, FilespaceID, DatasetID, FileID;
+  hid_t Type =  GetDataType<T>();
+  
+  FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+
+ //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  DatasetID =  H5Dopen( FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( DatasetID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    Data.clear();
+    H5Fclose( FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+  FilespaceID = H5Dget_space( DatasetID );
+  
+  offset[0] = nStart;
+  offset[1] = 0;
+  
+  count[0]  = 1;
+  count[1]  = 1;
+  
+  stride[0] = 1;
+  stride[1] = 1;
+  
+  block[0]  = nCount;
+  block[1]  = 3;
+
+
+  Data.clear();
+  Data.reserve( block[0]*block[1] );
+  Data.assign( block[0]*block[1], (T)1 );
+  
+  MemspaceID = H5Screate_simple( 2, block, NULL );
+  H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block );
+
+  H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] );
+
+  H5Sclose( FilespaceID );
+  H5Sclose( MemspaceID );
+  H5Dclose( DatasetID );
+  H5Fclose( FileID );
+}
+
+template< typename T >
+inline void HDFReadDatasetSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector<T> &Data )
+{
+  hsize_t 
+      offset[2],
+      stride[2],
+      count[2],
+      block[2];
+    
+  hid_t MemspaceID, FilespaceID, DatasetID, FileID;
+  hid_t Type =  GetDataType<T>();
+  
+  FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+
+
+
+ //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... probe dataset opening
+  DatasetID =  H5Dopen( FileID, ObjName.c_str() );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  //... dataset did not exist or was empty
+  if( DatasetID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    Data.clear();
+    H5Fclose( FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+  FilespaceID = H5Dget_space( DatasetID );
+  
+  offset[0] = nStart;
+  offset[1] = 0;
+  
+  count[0]  = 1;
+  count[1]  = 1;
+  
+  stride[0] = 1;
+  stride[1] = 1;
+  
+  block[0]  = nCount;
+  block[1]  = 1;
+
+
+  Data.clear();
+  Data.reserve( block[0]*block[1] );
+  Data.assign( block[0]*block[1], (T)1 );
+  
+  MemspaceID = H5Screate_simple( 2, block, NULL );
+  H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block );
+
+  H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] );
+
+  H5Sclose( FilespaceID );
+  H5Sclose( MemspaceID );
+  H5Dclose( DatasetID );
+  H5Fclose( FileID );
+}
+
+template< typename T>
+inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T &Data )
+{
+
+  hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID;
+  //  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... attempt to open attribute
+  
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() );
+
+  if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    H5Fclose( HDF_FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+
+  H5Aread( HDF_AttributeID, HDF_Type, &Data );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+
+  H5Aclose( HDF_AttributeID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+
+}
+
+template< typename T>
+inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, std::vector<T> &Data )
+{
+
+  hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID, HDF_DataspaceID;
+  hsize_t HDF_StorageSize;
+
+  HDF_Type = GetDataType<T>();
+
+  //... save old error handler
+  herr_t   (*old_func)(void*);
+  void     *old_client_data;
+
+  H5Eget_auto(&old_func, &old_client_data);
+
+  //... turn off error handling by hdf5 library
+  H5Eset_auto(NULL, NULL);
+
+  //... attempt to open attribute
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() );
+
+  if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){
+    std::stringstream ss;
+    ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n";
+    H5Fclose( HDF_FileID );
+    throw HDFException(ss.str());
+    return;
+  }
+
+  //... get space associated with dataset and its extensions
+  HDF_DataspaceID = H5Aget_space( HDF_AttributeID );
+
+  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
+  
+  hsize_t dimsize[ndims];
+
+  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+
+  HDF_StorageSize = 1;
+  for(int i=0; i<ndims; ++i )
+    HDF_StorageSize *= dimsize[i];
+
+  //... adjust the array size to hold the data
+  Data.clear();
+  Data.reserve( HDF_StorageSize );
+  Data.assign( HDF_StorageSize, (T)1 );
+
+  H5Aread( HDF_AttributeID, HDF_Type, &Data[0] );
+
+  //... restore previous error handler
+  H5Eset_auto(old_func, old_client_data);
+
+  H5Aclose( HDF_AttributeID );
+  H5Sclose( HDF_DataspaceID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteDataset( const std::string Filename, const std::string ObjName, const std::vector<T> &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims;
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims                = Data.size();
+  HDF_DataspaceID         = H5Screate_simple(1, &HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, &Data[0] );
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteGroupDataset( const std::string Filename, const std::string GrpName, const std::string ObjName, const std::vector<T> &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_GroupID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims;
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  
+  HDF_GroupID = H5Gopen( HDF_FileID, GrpName.c_str() );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims                = Data.size();
+  HDF_DataspaceID         = H5Screate_simple(1, &HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_GroupID, ObjName.c_str(), HDF_Type,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, &Data[0] );
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+  
+  H5Gclose( HDF_GroupID );
+
+  H5Fclose( HDF_FileID );
+}
+
+
+template< typename T >
+inline void HDFWriteDataset2D( const std::string Filename, const std::string ObjName, const std::vector<  std::vector<T> > &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims[2];
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims[0]             = Data.size();
+  HDF_Dims[1]             = Data[0].size();
+  HDF_DataspaceID         = H5Screate_simple(2, HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+
+  T *tmp = new T[HDF_Dims[0]*HDF_Dims[1]];
+
+  unsigned k=0;
+  for(unsigned i=0; i<HDF_Dims[0]; ++i )
+    for(unsigned j=0; j<HDF_Dims[1]; ++j ){
+      tmp[k++] = (Data[i])[j];
+    }
+
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, tmp );
+
+  delete[] tmp;
+
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteDataset3D( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims[3];
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims[0]             = nd[0];
+  HDF_Dims[1]             = nd[1];
+  HDF_Dims[2]             = nd[2];
+  
+  //std::cerr << nd[0]<<nd[1]<<nd[2]<<"\n";
+  HDF_DataspaceID         = H5Screate_simple(3, HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, &Data[0] );
+
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+
+  H5Fclose( HDF_FileID );
+}
+
+
+template< typename T >
+struct HDFHyperslabWriter3Ds
+{
+	hid_t dset_id_, type_id_, file_id_;
+	
+	HDFHyperslabWriter3Ds( const std::string Filename, const std::string ObjName, size_t nd[3] )
+	{
+		hid_t filespace;
+		
+		hsize_t sizes[4] = { 1, nd[0], nd[1], nd[2] };
+		
+		type_id_	= GetDataType<T>();
+		file_id_	= H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+		
+		//std::cerr << "creating filespace : 1 x " << nd[0] << " x " << nd[1] << " x " << nd[2] << std::endl;
+		filespace	= H5Screate_simple( 4, sizes, NULL );
+		dset_id_	= H5Dcreate( file_id_, ObjName.c_str(), type_id_, filespace, H5P_DEFAULT );
+		
+		H5Sclose(filespace);
+	}
+	
+	~HDFHyperslabWriter3Ds()
+	{
+		H5Dclose( dset_id_ );
+		H5Fclose( file_id_ );
+	}
+	
+	void write_slab( T* data, size_t* count, size_t* offset )
+	{
+		
+		hsize_t counts[4] = { 1, count[0], count[1], count[2] };
+		hsize_t offsets[4] = { 0, offset[0], offset[1], offset[2] };
+		
+		hid_t filespace = H5Dget_space(dset_id_);
+		
+		//std::cerr << "creating memspace : 1 x " << count[0] << " x " << count[1] << " x " << count[2] << std::endl;
+		hid_t memspace  = H5Screate_simple(4, counts, NULL);
+		H5Sselect_hyperslab( filespace, H5S_SELECT_SET, offsets, NULL, counts, NULL );
+		
+		//herr_t status;
+		//status = 
+		H5Dwrite(dset_id_, type_id_, memspace, filespace, H5P_DEFAULT, reinterpret_cast<void*>(data));
+		H5Sclose(filespace);
+		H5Sclose(memspace);
+	}
+	
+};
+
+
+template< typename T >
+inline void HDFWriteDataset3Ds( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data )
+{
+	
+	hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+	
+	hsize_t HDF_Dims[4];
+	
+	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+	
+	HDF_Type                = GetDataType<T>();
+	
+	HDF_Dims[0]             = 1;
+	HDF_Dims[1]             = nd[0];
+	HDF_Dims[2]             = nd[1];
+	HDF_Dims[3]             = nd[2];
+	
+	//std::cerr << nd[0]<<nd[1]<<nd[2]<<"\n";
+	HDF_DataspaceID         = H5Screate_simple(4, HDF_Dims, NULL);
+	HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
+										HDF_DataspaceID, H5P_DEFAULT );
+	
+	H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+			 H5P_DEFAULT, &Data[0] );
+	
+	H5Dclose( HDF_DatasetID );
+	H5Sclose( HDF_DataspaceID );
+	
+	H5Fclose( HDF_FileID );
+}
+
+
+template< typename T >
+inline void HDFWriteDatasetVector( const std::string Filename, const std::string ObjName, const std::vector<T> &Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_DataspaceID,
+    HDF_Type;
+
+  hsize_t HDF_Dims[2];
+
+  //  hsize_t HDF_Dims;
+
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+
+  HDF_Type                = GetDataType<T>();
+
+  HDF_Dims[0]             = (hsize_t)(Data.size()/3);
+  HDF_Dims[1]             = 3;
+
+  if( Data.size() % 3 != 0 ){
+    std::cerr << " - Warning: Trying to write vector data in HDFWriteDatasetVector\n"
+              << "            but array length not divisible by 3!\n\n";
+
+  }
+
+  HDF_DataspaceID         = H5Screate_simple(2, HDF_Dims, NULL);
+  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), H5T_NATIVE_FLOAT,
+                                       HDF_DataspaceID, H5P_DEFAULT );
+  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
+            H5P_DEFAULT, &Data[0] );
+  H5Dclose( HDF_DatasetID );
+  H5Sclose( HDF_DataspaceID );
+
+  H5Fclose( HDF_FileID );
+}
+
+inline void HDFCreateGroup( const std::string Filename, const std::string GroupName )
+{
+	hid_t HDF_FileID, HDF_GroupID;
+
+	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+	HDF_GroupID = H5Gcreate( HDF_FileID, GroupName.c_str(), 0 );
+	H5Gclose( HDF_GroupID );
+	H5Fclose( HDF_FileID );
+
+}
+
+inline void HDFCreateSubGroup( const std::string Filename, const std::string SuperGroupName, const std::string GroupName )
+{
+	hid_t HDF_FileID, HDF_GroupID, HDF_SuperGroupID;
+
+	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  HDF_SuperGroupID = H5Gopen( HDF_FileID, SuperGroupName.c_str() );
+	HDF_GroupID = H5Gcreate( HDF_SuperGroupID, GroupName.c_str(), 0 );
+	H5Gclose( HDF_GroupID );
+  H5Gclose( HDF_SuperGroupID );
+	H5Fclose( HDF_FileID );
+
+}
+
+template< typename T >
+inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, const std::vector< T > &Data )
+{
+ hid_t    HDF_FileID,    
+	  HDF_GroupID,    
+	  HDF_AttributeID,    
+	  HDF_DataspaceID,    
+	  HDF_DatatypeID;
+ 
+ hsize_t  HDF_Dims;
+ 
+ HDF_DatatypeID = GetDataType<T>();  
+ 
+ HDF_Dims = (hsize_t)(Data.size());
+
+ 
+ HDF_FileID      = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );  
+ HDF_GroupID     = H5Gopen( HDF_FileID, GroupName.c_str() );  
+ HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL);
+
+ HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT);  
+ H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] );  
+ H5Aclose( HDF_AttributeID );  
+ H5Sclose( HDF_DataspaceID );  
+ H5Gclose( HDF_GroupID );  
+ H5Fclose( HDF_FileID ); 
+}
+
+template< typename T >
+inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, const std::vector< T > &Data )
+{
+	hid_t    HDF_FileID,    
+	HDF_DatasetID,    
+	HDF_AttributeID,    
+	HDF_DataspaceID,    
+	HDF_DatatypeID;
+	
+	hsize_t  HDF_Dims;
+	
+	HDF_DatatypeID = GetDataType<T>();  
+	
+	HDF_Dims = (hsize_t)(Data.size());
+	
+	
+	HDF_FileID      = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );  
+	HDF_DatasetID   = H5Dopen( HDF_FileID, DatasetName.c_str() );  
+	HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL);
+	
+	HDF_AttributeID = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT);  
+	H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] );  
+	H5Aclose( HDF_AttributeID );  
+	H5Sclose( HDF_DataspaceID );  
+	H5Dclose( HDF_DatasetID );  
+	H5Fclose( HDF_FileID ); 
+}
+
+
+template< typename T >
+inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T
+Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_GroupID,
+    HDF_AttributeID,
+    HDF_DataspaceID,
+    HDF_DatatypeID;
+
+  HDF_DatatypeID = GetDataType<T>();
+
+  
+  
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  HDF_DataspaceID         = H5Screate(H5S_SCALAR);
+  HDF_AttributeID         = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID,
+                                      HDF_DataspaceID, H5P_DEFAULT);
+  H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data );
+  H5Aclose( HDF_AttributeID );
+  H5Sclose( HDF_DataspaceID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, T Data )
+{
+	
+	hid_t
+    HDF_FileID,
+    HDF_DatasetID,
+    HDF_AttributeID,
+    HDF_DataspaceID,
+    HDF_DatatypeID;
+	
+	HDF_DatatypeID = GetDataType<T>();
+	
+	
+	
+	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+	HDF_DatasetID = H5Dopen( HDF_FileID, DatasetName.c_str() );
+	HDF_DataspaceID         = H5Screate(H5S_SCALAR);
+	HDF_AttributeID         = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID,
+										HDF_DataspaceID, H5P_DEFAULT);
+	H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data );
+	H5Aclose( HDF_AttributeID );
+	H5Sclose( HDF_DataspaceID );
+	H5Dclose( HDF_DatasetID );
+	H5Fclose( HDF_FileID );
+}
+
+template< typename T >
+inline void HDFWriteSubGroupAttribute( const std::string Filename, const std::string GroupName,  const std::string SubGroupName, const std::string ObjName, T
+Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_GroupID,
+    HDF_SubGroupID,
+    HDF_AttributeID,
+    HDF_DataspaceID,
+    HDF_DatatypeID;
+
+  HDF_DatatypeID = GetDataType<T>();
+
+  
+  
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  std::cerr << "opening " << GroupName.c_str() << std::endl;
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  std::cerr << "opening " << SubGroupName.c_str() << std::endl;
+  HDF_SubGroupID = H5Gopen( HDF_GroupID, SubGroupName.c_str() );
+  HDF_DataspaceID         = H5Screate(H5S_SCALAR);
+  HDF_AttributeID         = H5Acreate(HDF_SubGroupID, ObjName.c_str(), HDF_DatatypeID,
+                                      HDF_DataspaceID, H5P_DEFAULT);
+  H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data );
+  H5Aclose( HDF_AttributeID );
+  H5Sclose( HDF_DataspaceID );
+  H5Gclose( HDF_SubGroupID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+}
+
+template<>
+inline void HDFWriteGroupAttribute<std::string>( const std::string Filename, const std::string GroupName, const std::string ObjName, std::string Data )
+{
+
+  hid_t
+    HDF_FileID,
+    HDF_GroupID,
+    HDF_AttributeID,
+    HDF_DataspaceID,
+    HDF_DatatypeID;
+
+  HDF_DatatypeID = H5Tcopy( H5T_C_S1 );
+
+  H5Tset_size( HDF_DatatypeID, Data.size() );
+  H5Tset_strpad(HDF_DatatypeID, H5T_STR_NULLPAD);
+  
+  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
+  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
+  HDF_DataspaceID         = H5Screate(H5S_SCALAR);
+  HDF_AttributeID         = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID,
+                                      HDF_DataspaceID, H5P_DEFAULT);
+  H5Awrite( HDF_AttributeID, HDF_DatatypeID, Data.c_str() );
+  H5Aclose( HDF_AttributeID );
+  H5Sclose( HDF_DataspaceID );
+  H5Gclose( HDF_GroupID );
+  H5Fclose( HDF_FileID );
+}
+
+#endif // USE_HDF5
diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh
index b99c958..2facf80 100644
--- a/include/grid_interpolate.hh
+++ b/include/grid_interpolate.hh
@@ -5,6 +5,8 @@
 
 #include <general.hh>
 
+#include <math/vec3.hh>
+
 template <int interp_order, typename grid_t>
 struct grid_interpolate
 {
@@ -61,10 +63,15 @@ struct grid_interpolate
       MPI_Status status;
       status.MPI_ERROR = MPI_SUCCESS;
 
-      MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype<data_t>(), sendto,
+      int err = MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype<data_t>(), sendto,
                            MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status);
 
-      assert(status.MPI_ERROR == MPI_SUCCESS);
+      if( err != MPI_SUCCESS ){
+        char errstr[256]; int errlen=256;
+        MPI_Error_string(err, errstr, &errlen ); 
+        music::elog << "MPI_ERROR #" << err << " : " << errstr << std::endl;
+      }
+
 #endif
     }
   }
@@ -92,10 +99,10 @@ struct grid_interpolate
     
     if( is_distributed_trait ){
       size_t localix = ix-gridref.local_0_start_;
-      val += this->relem(localix, iy, iz) * tx * ty * tz;
-      val += this->relem(localix, iy, iz1) * tx * ty * dz;
-      val += this->relem(localix, iy1, iz) * tx * dy * tz;
-      val += this->relem(localix, iy1, iz1) * tx * dy * dz;
+      val += gridref.relem(localix, iy, iz) * tx * ty * tz;
+      val += gridref.relem(localix, iy, iz1) * tx * ty * dz;
+      val += gridref.relem(localix, iy1, iz) * tx * dy * tz;
+      val += gridref.relem(localix, iy1, iz1) * tx * dy * dz;
 
       if( localix+1 >= gridref.local_0_size_ ){
         size_t localix1 = localix+1 - gridref.local_0_size_;
@@ -105,23 +112,22 @@ struct grid_interpolate
         val += boundary_[(localix1*ny_+iy1)*nz_+iz1] * dx * dy * dz;
       }else{
         size_t localix1 = localix+1;
-        val += this->relem(localix1, iy, iz) * dx * ty * tz;
-        val += this->relem(localix1, iy, iz1) * dx * ty * dz;
-        val += this->relem(localix1, iy1, iz) * dx * dy * tz;
-        val += this->relem(localix1, iy1, iz1) * dx * dy * dz;
+        val += gridref.relem(localix1, iy, iz) * dx * ty * tz;
+        val += gridref.relem(localix1, iy, iz1) * dx * ty * dz;
+        val += gridref.relem(localix1, iy1, iz) * dx * dy * tz;
+        val += gridref.relem(localix1, iy1, iz1) * dx * dy * dz;
       }
     }else{
       size_t ix1 = (ix + 1) % nx_;
-      val += this->relem(ix, iy, iz) * tx * ty * tz;
-      val += this->relem(ix, iy, iz1) * tx * ty * dz;
-      val += this->relem(ix, iy1, iz) * tx * dy * tz;
-      val += this->relem(ix, iy1, iz1) * tx * dy * dz;
-      val += this->relem(ix1, iy, iz) * dx * ty * tz;
-      val += this->relem(ix1, iy, iz1) * dx * ty * dz;
-      val += this->relem(ix1, iy1, iz) * dx * dy * tz;
-      val += this->relem(ix1, iy1, iz1) * dx * dy * dz;
+      val += gridref.relem(ix, iy, iz) * tx * ty * tz;
+      val += gridref.relem(ix, iy, iz1) * tx * ty * dz;
+      val += gridref.relem(ix, iy1, iz) * tx * dy * tz;
+      val += gridref.relem(ix, iy1, iz1) * tx * dy * dz;
+      val += gridref.relem(ix1, iy, iz) * dx * ty * tz;
+      val += gridref.relem(ix1, iy, iz1) * dx * ty * dz;
+      val += gridref.relem(ix1, iy1, iz) * dx * dy * tz;
+      val += gridref.relem(ix1, iy1, iz1) * dx * dy * dz;
     }
-
     return val;
   }
 
@@ -131,8 +137,8 @@ struct grid_interpolate
 
   int get_task(const vec3 &x, const std::vector<int> &local0starts) const noexcept
   {
-    auto it = std::lower_bound(local0starts.begin(), local0starts.end(), int(x[0]));
-    return std::distance(local0starts.begin(), it) - 1;
+    const auto it = std::upper_bound(local0starts.begin(), local0starts.end(), int(x[0]));
+    return std::distance(local0starts.begin(), it)-1;
   }
 
   void domain_decompose_pos(std::vector<vec3> &pos) const noexcept
@@ -144,12 +150,12 @@ struct grid_interpolate
       std::vector<int> local0starts(MPI::get_size(), 0);
       MPI_Alltoall(&local_0_start, 1, MPI_INT, &local0starts[0], 1, MPI_INT, MPI_COMM_WORLD);
 
-      std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1) < get_task(x2); });
+      std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1,local0starts) < get_task(x2,local0starts); });
       std::vector<int> sendcounts(MPI::get_size(), 0), sendoffsets(MPI::get_size(), 0);
       std::vector<int> recvcounts(MPI::get_size(), 0), recvoffsets(MPI::get_size(), 0);
       for (auto x : pos)
       {
-        sendcounts[get_task(x)] += 3;
+        sendcounts[get_task(x,local0starts)] += 3;
       }
 
       // int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
@@ -170,14 +176,17 @@ struct grid_interpolate
     }
   }
 
-  ccomplex_t compensation_kernel( vec3 k ) const noexcept
+  ccomplex_t compensation_kernel( const vec3_t<real_t>& k ) const noexcept
   {
-    auto sinc = []( real_t x ){ (std::abs(x)>1e-10)? std::sin(x)/x : 1.0; };
+    auto sinc = []( real_t x ){ return (std::abs(x)>1e-10)? std::sin(x)/x : 1.0; };
     real_t dfx = sinc(0.5*M_PI*k[0]/gridref.kny_[0]);
     real_t dfy = sinc(0.5*M_PI*k[1]/gridref.kny_[1]);
     real_t dfz = sinc(0.5*M_PI*k[2]/gridref.kny_[2]);
     real_t del = std::pow(dfx*dfy*dfz,1+interpolation_order);
-    return ccomplex_t(1.0) / del;
+
+    real_t shift = 0.5 * k[0] * gridref.get_dx()[0] + 0.5 * k[1] * gridref.get_dx()[1] + 0.5 * k[2] * gridref.get_dx()[2];
+
+    return std::exp(ccomplex_t(0.0, shift)) / del;
   }
 
   void get_at(std::vector<vec3> &pos, std::vector<data_t> &val) const
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 3eb9e9b..e9a780a 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -10,6 +10,10 @@
 #include <math/vec3.hh>
 #include <grid_interpolate.hh>
 
+#if defined(USE_HDF5)
+#include "HDF_IO.hh"
+#endif
+
 namespace particle {
 
 enum lattice{
@@ -39,32 +43,61 @@ const std::vector<vec3_t<real_t>> second_lattice_shift =
 };
 
 template<typename field_t>
-void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field, size_t num_p = 0 ){
-    // number of modes present in the field
-    const size_t num_p_in_load = (lattice_type>=0)? field.local_size() : num_p;
-    // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load):
-    const size_t overload = 1ull<<std::max<int>(0,lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
-    // allocate memory for all local particles
-    particles.allocate( overload * num_p_in_load, b64reals, b64ids );
-    // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well
-    for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
-        for( size_t j=0; j<field.size(1); ++j){
-            for( size_t k=0; k<field.size(2); ++k,++ipcount){
-                for( size_t iload=0; iload<overload; ++iload ){
-                    if( b64ids ){
-                        particles.set_id64( ipcount+iload*num_p_in_load, IDoffset + overload*field.get_cell_idx_1d(i,j,k)+iload );
-                    }else{
-                        particles.set_id32( ipcount+iload*num_p_in_load, IDoffset + overload*field.get_cell_idx_1d(i,j,k)+iload );
+void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field, config_file& cf ){
+    if( lattice_type != lattice_glass )
+    {
+        // number of modes present in the field
+        const size_t num_p_in_load = field.local_size();
+        // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load):
+        const size_t overload = 1ull<<std::max<int>(0,lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
+        // allocate memory for all local particles
+        particles.allocate( overload * num_p_in_load, b64reals, b64ids );
+        // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well
+        for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
+            for( size_t j=0; j<field.size(1); ++j){
+                for( size_t k=0; k<field.size(2); ++k,++ipcount){
+                    for( size_t iload=0; iload<overload; ++iload ){
+                        if( b64ids ){
+                            particles.set_id64( ipcount+iload*num_p_in_load, IDoffset + overload*field.get_cell_idx_1d(i,j,k)+iload );
+                        }else{
+                            particles.set_id32( ipcount+iload*num_p_in_load, IDoffset + overload*field.get_cell_idx_1d(i,j,k)+iload );
+                        }
                     }
                 }
             }
         }
     }
+    else
+    {
+#if defined(USE_HDF5)
+        std::string glass_fname = cf.get_value<std::string>("setup","GlassFileName");
+        
+        std::vector<int> glass_dims;
+        HDFGetDatasetExtent( glass_fname, "/PartType1/Coordinates", glass_dims );
+        music::ilog << "Glass file contains " << glass_dims[0] << " particles." << std::endl;
+        
+        size_t ntiles = cf.get_value<size_t>("setup","GlassTiles");
+        size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles / MPI::get_size();
+        size_t off_p = MPI::get_rank() * num_p;
+
+        particles.allocate( num_p, b64reals, b64ids );
+
+        for( size_t i=0; i<num_p; ++i ){
+            if( b64ids ){
+                particles.set_id64( i, IDoffset + i + off_p );
+            }else{
+                particles.set_id32( i, IDoffset + i + off_p );
+            }
+        }
+#else
+        throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile.");
+#endif
+    }
 }
 
 // invalidates field, phase shifted to unspecified position after return
 template<typename field_t>
-void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field )
+void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field, config_file& cf )
 {
     // works only for Bravais types
     if( lattice_type >= 0 ){
@@ -96,36 +129,131 @@ void set_positions( container& particles, const lattice lattice_type, bool is_se
             }
         }
     }else{
+#if defined(USE_HDF5)
+        std::string glass_fname = cf.get_value<std::string>("setup","GlassFileName");
+        size_t ntiles = cf.get_value<size_t>("setup","GlassTiles");
+        
+        real_t lglassbox = 1.0;
+        HDFReadGroupAttribute( glass_fname, "Header", "BoxSize", lglassbox );
 
+        std::vector<real_t> glass_pos;
+        HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos );
+        size_t np_in_file = glass_pos.size()/3;
+        size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size();
+        size_t off_p = num_p * MPI::get_rank();
+        
+        std::vector< std::array<real_t,3> > glass_posr(num_p,{0.0,0.0,0.0});
+
+        std::array<real_t,3> ng({field.n_[0],field.n_[1],field.n_[2]});
+
+        for( size_t i=0; i<num_p; ++i ){
+            size_t idxpart = off_p+i;
+            size_t idx_in_glass = idxpart%np_in_file;
+            size_t idxtile = idxpart / np_in_file;
+            size_t tile_z = idxtile%(ntiles*ntiles);
+            size_t tile_y = ((idxtile-tile_z)/ntiles)%ntiles;
+            size_t tile_x = (((idxtile-tile_z)/ntiles)-tile_y)/ntiles;
+            glass_posr[i][0] = std::fmod((glass_pos[3*idx_in_glass+0]/lglassbox + real_t(tile_x)) / ntiles * ng[0] + ng[0], ng[0]);
+            glass_posr[i][1] = std::fmod((glass_pos[3*idx_in_glass+1]/lglassbox + real_t(tile_y)) / ntiles * ng[1] + ng[1], ng[1]);
+            glass_posr[i][2] = std::fmod((glass_pos[3*idx_in_glass+2]/lglassbox + real_t(tile_z)) / ntiles * ng[2] + ng[2], ng[2]);
+        }
+        
+        grid_interpolate<1,field_t> interp( field );
+
+        interp.domain_decompose_pos( glass_posr );
+
+        for( size_t i=0; i<num_p; ++i ){
+            auto pos = glass_posr[i];
+            real_t disp = interp.get_cic_at( pos );
+            if( b64reals ){
+                particles.set_pos64( i, idim, pos[idim]/ng[idim]*lunit + disp );
+            }else{
+                particles.set_pos32( i, idim, pos[idim]/ng[idim]*lunit + disp );
+            }
+        }
+        
+#else
+        throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile.");
+#endif
     }
 }
 
 template <typename field_t>
-void set_velocities(container &particles, lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field)
+void set_velocities(container &particles, lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field, config_file& cf)
 {
-    const size_t num_p_in_load = field.local_size();
-    for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
-        // if we are dealing with the secondary lattice, apply a global shift
-        if (ishift == 0 && is_second_lattice){
-            field.shift_field(second_lattice_shift[lattice_type]);
-        }
-        // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
-        if (ishift > 0){
-            field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] );
-        }
-        // read out values from phase shifted field and set assoc. particle's value
-        const auto ipcount0 = ishift * num_p_in_load;
-        for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k){
-                    if( b64reals ){
-                        particles.set_vel64( ipcount++, idim, field.relem(i,j,k) );
-                    }else{
-                        particles.set_vel32( ipcount++, idim, field.relem(i,j,k) );
+    // works only for Bravais types
+    if( lattice_type >= 0 ){
+        const size_t num_p_in_load = field.local_size();
+        for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
+            // if we are dealing with the secondary lattice, apply a global shift
+            if (ishift == 0 && is_second_lattice){
+                field.shift_field(second_lattice_shift[lattice_type]);
+            }
+            // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
+            if (ishift > 0){
+                field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] );
+            }
+            // read out values from phase shifted field and set assoc. particle's value
+            const auto ipcount0 = ishift * num_p_in_load;
+            for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
+                for( size_t j=0; j<field.size(1); ++j){
+                    for( size_t k=0; k<field.size(2); ++k){
+                        if( b64reals ){
+                            particles.set_vel64( ipcount++, idim, field.relem(i,j,k) );
+                        }else{
+                            particles.set_vel32( ipcount++, idim, field.relem(i,j,k) );
+                        }
                     }
                 }
             }
         }
+    }else{
+#if defined(USE_HDF5)
+        std::string glass_fname = cf.get_value<std::string>("setup","GlassFileName");
+        size_t ntiles = cf.get_value<size_t>("setup","GlassTiles");
+        
+        real_t lglassbox = 1.0;
+        HDFReadGroupAttribute( glass_fname, "Header", "BoxSize", lglassbox );
+
+        std::vector<real_t> glass_pos;
+        HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos );
+        size_t np_in_file = glass_pos.size()/3;
+        size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size();
+        size_t off_p = num_p * MPI::get_rank();
+        
+        std::vector< std::array<real_t,3> > glass_posr(num_p,{0.0,0.0,0.0});
+
+        std::array<real_t,3> ng({field.n_[0],field.n_[1],field.n_[2]});
+
+        for( size_t i=0; i<num_p; ++i ){
+            size_t idxpart = off_p+i;
+            size_t idx_in_glass = idxpart%np_in_file;
+            size_t idxtile = idxpart / np_in_file;
+            size_t tile_z = idxtile%(ntiles*ntiles);
+            size_t tile_y = ((idxtile-tile_z)/ntiles)%ntiles;
+            size_t tile_x = (((idxtile-tile_z)/ntiles)-tile_y)/ntiles;
+            glass_posr[i][0] = std::fmod((glass_pos[3*idx_in_glass+0]/lglassbox + real_t(tile_x)) / ntiles * ng[0] + ng[0], ng[0]);
+            glass_posr[i][1] = std::fmod((glass_pos[3*idx_in_glass+1]/lglassbox + real_t(tile_y)) / ntiles * ng[1] + ng[1], ng[1]);
+            glass_posr[i][2] = std::fmod((glass_pos[3*idx_in_glass+2]/lglassbox + real_t(tile_z)) / ntiles * ng[2] + ng[2], ng[2]);
+        }
+        
+        grid_interpolate<1,field_t> interp( field );
+
+        interp.domain_decompose_pos( glass_posr );
+
+        for( size_t i=0; i<num_p; ++i ){
+            auto pos = glass_posr[i];
+            real_t vel = interp.get_cic_at( pos );
+            if( b64reals ){
+                particles.set_vel64( i, idim, vel );
+            }else{
+                particles.set_vel32( i, idim, vel );
+            }
+        }
+        
+#else
+        throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile.");
+#endif
     }
 }
 
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 9e41b59..8352007 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -62,7 +62,8 @@ int Run( config_file& the_config )
           ((lattice_str=="bcc")? particle::lattice_bcc 
         : ((lattice_str=="fcc")? particle::lattice_fcc 
         : ((lattice_str=="rsc")? particle::lattice_rsc 
-        : particle::lattice_sc)));
+        : ((lattice_str=="glass")? particle::lattice_glass
+        : particle::lattice_sc))));
 
     //--------------------------------------------------------------------------------------------------------
     //! apply fixing of the complex mode amplitude following Angulo & Pontzen (2016) [https://arxiv.org/abs/1603.05253]
@@ -555,7 +556,7 @@ int Run( config_file& the_config )
                 if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                 {
                     // allocate particle structure and generate particle IDs
-                    particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp );
+                    particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config );
                 }
             
                 // write out positions
@@ -576,6 +577,10 @@ int Run( config_file& the_config )
                                 tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot 
                                     + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) );
 
+                                if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){
+                                    tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k<real_t>(i,j,k) );
+                                }
+
                                 if( bDoBaryons ){
                                     vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
@@ -599,7 +604,7 @@ int Run( config_file& the_config )
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_positions( particles, lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp );
+                        particle::set_positions( particles, lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp, the_config );
                     } 
                     // otherwise write out the grid data directly to the output plugin
                     // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian )
@@ -629,6 +634,10 @@ int Run( config_file& the_config )
                                 tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v 
                                         + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) );
 
+                                if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){
+                                    tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k<real_t>(i,j,k) );
+                                }
+
                                 if( bDoBaryons ){
                                     vec3_t<real_t> kvec = phi.get_k<real_t>(i,j,k);
                                     real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2);
@@ -663,7 +672,7 @@ int Run( config_file& the_config )
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_velocities( particles, lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp );
+                        particle::set_velocities( particles, lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp, the_config );
                     }
                     // otherwise write out the grid data directly to the output plugin
                     else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )
diff --git a/src/plugins/random_panphasia.cc b/src/plugins/random_panphasia.cc
index 632d7fc..1489f59 100644
--- a/src/plugins/random_panphasia.cc
+++ b/src/plugins/random_panphasia.cc
@@ -267,7 +267,7 @@ public:
 
         int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
         int level_p = d.wn_level_base + lextra;
-        // int ratio = 1 << lextra;
+        int ratio = 1 << lextra;
 
         lstate[mythread].layer_min = 0;
         lstate[mythread].layer_max = level_p;
@@ -328,12 +328,6 @@ public:
           }
         }
       }
-
-      // if (verbosity)
-      // {
-      //   music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1,
-      //                     1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2));
-      // }
     } // end omp parallel region
 
     g0.FourierTransformForward();
@@ -369,8 +363,9 @@ public:
 
             auto y0(g0.kelem(i, j, k)), y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k));
 
-            g0.kelem(i, j, k) = y0 * fx * fy * fz + sqrt3 * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) +
-                                y4 * magnitude;
+            g0.kelem(i, j, k) = y0 * fx * fy * fz 
+                              + sqrt3 * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) 
+                              + y4 * magnitude;
           }
           else
           {
@@ -409,7 +404,7 @@ public:
 
         int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0);
         int level_p = d.wn_level_base + lextra;
-        // int ratio = 1 << lextra;
+        int ratio = 1 << lextra;
 
         lstate[mythread].layer_min = 0;
         lstate[mythread].layer_max = level_p;

From bae1701cb50a49843a03a14fe6770d055a1247f2 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 3 May 2020 16:15:40 +0200
Subject: [PATCH 123/130] fixed compiler errors when not using MPI

---
 include/particle_generator.hh |   20 +-
 src/plugins/HDF_IO.hh         | 1085 ---------------------------------
 2 files changed, 18 insertions(+), 1087 deletions(-)
 delete mode 100755 src/plugins/HDF_IO.hh

diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index e9a780a..91a19d9 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -77,8 +77,13 @@ void initialize_lattice( container& particles, lattice lattice_type, const bool
         music::ilog << "Glass file contains " << glass_dims[0] << " particles." << std::endl;
         
         size_t ntiles = cf.get_value<size_t>("setup","GlassTiles");
+#if defined(USE_MPI)
         size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles / MPI::get_size();
         size_t off_p = MPI::get_rank() * num_p;
+#else
+        size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles;
+        size_t off_p = 0;
+#endif
 
         particles.allocate( num_p, b64reals, b64ids );
 
@@ -139,8 +144,13 @@ void set_positions( container& particles, const lattice lattice_type, bool is_se
         std::vector<real_t> glass_pos;
         HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos );
         size_t np_in_file = glass_pos.size()/3;
+#if defined(USE_MPI)
         size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size();
-        size_t off_p = num_p * MPI::get_rank();
+        size_t off_p = MPI::get_rank() * num_p;
+#else
+        size_t num_p = np_in_file * ntiles*ntiles*ntiles;
+        size_t off_p = 0;
+#endif
         
         std::vector< std::array<real_t,3> > glass_posr(num_p,{0.0,0.0,0.0});
 
@@ -218,8 +228,14 @@ void set_velocities(container &particles, lattice lattice_type, bool is_second_l
         std::vector<real_t> glass_pos;
         HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos );
         size_t np_in_file = glass_pos.size()/3;
+#if defined(USE_MPI)
         size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size();
-        size_t off_p = num_p * MPI::get_rank();
+        size_t off_p = MPI::get_rank() * num_p;
+#else
+        size_t num_p = np_in_file * ntiles*ntiles*ntiles;
+        size_t off_p = 0;
+#endif
+
         
         std::vector< std::array<real_t,3> > glass_posr(num_p,{0.0,0.0,0.0});
 
diff --git a/src/plugins/HDF_IO.hh b/src/plugins/HDF_IO.hh
deleted file mode 100755
index 965dac9..0000000
--- a/src/plugins/HDF_IO.hh
+++ /dev/null
@@ -1,1085 +0,0 @@
-#ifndef __HDF_IO_HH
-#define __HDF_IO_HH
-
-#define H5_USE_16_API
-
-/*
-    HDF_IO.hh  --  templated C++ HDF5 front-end functions, v1.2b
-      
-    Copyright (C) 2006-7  Oliver Hahn  --  ojha@gmx.de
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <sstream>
-#include <typeinfo>
-#include <vector>
-#include <cstdlib>
-#include "hdf5.h"
-
-template<typename T>
-hid_t GetDataType( void )
-{
-  if( typeid(T) == typeid(int) )
-    return H5T_NATIVE_INT;
-
-  if( typeid(T) == typeid(unsigned) )
-    return H5T_NATIVE_UINT;
-
-  if( typeid(T) == typeid(float) )
-    return H5T_NATIVE_FLOAT;
-
-  if( typeid(T) == typeid(double) )
-    return H5T_NATIVE_DOUBLE;
-  
-	if( typeid(T) == typeid(long long) )
-		return H5T_NATIVE_LLONG;
-	
-	if( typeid(T) == typeid(unsigned long long) )
-		return H5T_NATIVE_ULLONG;
-	
-	if( typeid(T) == typeid(size_t) )
-		return H5T_NATIVE_ULLONG;
-	
-  
-  std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n";
-  return -1;
-}
-
-#include <stdexcept>
- 
-class HDFException : public std::runtime_error {
-	public:
-	 HDFException( const std::string &errtxt ) : std::runtime_error(errtxt) { }
-};
-
-
-inline bool DoesFileExist( std::string Filename ){
-        bool flag = false;
-        std::fstream fin(Filename.c_str(),std::ios::in|std::ios::binary);
-        if( fin.is_open() )
-                flag=true;
-        fin.close();
-        return flag;
-}
-
-inline void AssertFileOpen( std::string Filename )
-{
-  if( !DoesFileExist( Filename ) ){
-    std::fstream fout( Filename.c_str(), std::ios::out|std::ios::binary);
-    fout.close();
-  }
-}
-
-inline void HDFCreateFile( std::string Filename )
-{
-  hid_t HDF_FileID;
-  HDF_FileID = H5Fcreate( Filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT );
-  H5Fclose( HDF_FileID );
-}
-
-template< typename T>
-inline void HDFReadVector( const std::string Filename, const std::string ObjName, std::vector<T> &Data )
-{
-  HDFReadDataset( Filename, ObjName, Data );
-}
-
-
-
-
-inline void HDFGetDatasetExtent( const std::string Filename, const std::string ObjName, std::vector<int> &Extent )
-{
-  hid_t HDF_FileID, HDF_DatasetID, HDF_DataspaceID;
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
-
-  //... save old error handler
-  herr_t   (*old_func)(void*);
-  void     *old_client_data;
-
-  H5Eget_auto(&old_func, &old_client_data);
-
-  //... turn off error handling by hdf5 library
-  H5Eset_auto(NULL, NULL);
-
-  //... probe dataset opening
-  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
-
-  //... restore previous error handler
-  H5Eset_auto(old_func, old_client_data);
-
-  //... dataset did not exist or was empty
-  if( HDF_DatasetID < 0 ){
-	  std::stringstream ss;
-	  ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
-	  H5Fclose( HDF_FileID );
-	  throw HDFException(ss.str());
-	  return;
-  }
-
-  //... get space associated with dataset and its extensions
-  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
-
-  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
-  
-  hsize_t *dimsize = new hsize_t[ndims];
-
-  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
-
-  Extent.clear();
-  for(int i=0; i<ndims; ++i )
-    Extent.push_back( dimsize[i] );
-
-  delete[] dimsize;
-	
-  H5Sclose( HDF_DataspaceID );
-  H5Dclose( HDF_DatasetID );
-  H5Fclose( HDF_FileID );
-}
-
-template< typename T >
-inline void HDFReadDataset( const std::string Filename, const std::string ObjName, std::vector<T> &Data )
-{
- 
-  hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID;
-  hsize_t HDF_StorageSize;
-
-  HDF_Type = GetDataType<T>();
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
-
-
-  //... save old error handler
-  herr_t   (*old_func)(void*);
-  void     *old_client_data;
-
-  H5Eget_auto(&old_func, &old_client_data);
-
-  //... turn off error handling by hdf5 library
-  H5Eset_auto(NULL, NULL);
-
-  //... probe dataset opening
-  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
-
-  //... restore previous error handler
-  H5Eset_auto(old_func, old_client_data);
-
-  //... dataset did not exist or was empty
-  if( HDF_DatasetID < 0 ){
-	  std::stringstream ss;
-	  ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
-	  Data.clear();
-	  H5Fclose( HDF_FileID );
-	  throw HDFException(ss.str());
-	  return;
-  }
-
-  //... get space associated with dataset and its extensions
-  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
-
-  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
-  
-  hsize_t dimsize[ndims];
-
-  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
-
-  HDF_StorageSize = 1;
-  for(int i=0; i<ndims; ++i )
-    HDF_StorageSize *= dimsize[i];
-
-  //... adjust the array size to hold the data
-  Data.clear();
-  Data.reserve( HDF_StorageSize );
-  Data.assign( HDF_StorageSize, (T)1 );
-
-  if( Data.capacity() < HDF_StorageSize ){
-    std::cerr << " - Error: not enough memory to store all data in HDFReadDataset!\n\n";
-    abort();
-  }
-
-  //... read the dataset
-  H5Dread( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, &Data[0] );
-
-  if( Data.size() != HDF_StorageSize ){
-    std::cerr << " - Error: something went wrong while reading!\n\n";
-    abort();
-  }
-
-  H5Sclose( HDF_DataspaceID );
-  H5Dclose( HDF_DatasetID );
-  H5Fclose( HDF_FileID );
-}
-
-template<typename T >
-inline void HDFReadSelect( const std::string Filename, const std::string ObjName, const std::vector<unsigned>& ii, std::vector<T> &Data ){
-
-  hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID;
-  hsize_t HDF_StorageSize;
-
-  HDF_Type = GetDataType<T>();
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
-
-
-  //... save old error handler
-  herr_t   (*old_func)(void*);
-  void     *old_client_data;
-
-  H5Eget_auto(&old_func, &old_client_data);
-
-  //... turn off error handling by hdf5 library
-  H5Eset_auto(NULL, NULL);
-
-  //... probe dataset opening
-  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
-
-  //... restore previous error handler
-  H5Eset_auto(old_func, old_client_data);
-
-  //... dataset did not exist or was empty
-  if( HDF_DatasetID < 0 ){
-    std::stringstream ss;
-    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
-    Data.clear();
-    H5Fclose( HDF_FileID );
-    throw HDFException(ss.str());
-  }
-
-  //... get space associated with dataset and its extensions
-  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
- 
-  hsize_t block[2];
-  block[0]  = ii.size();
-  block[1]  = 1;
-
-
-  Data.clear();
-  Data.reserve( block[0]*block[1] );
-  Data.assign( block[0]*block[1], (T)1 );
-  
-  HDF_MemspaceID = H5Screate_simple( 2, block, NULL );
-  //  H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block );
-  H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, ii.size(), (const hsize_t *)&ii[0] );
-
-  H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] );
-  
-  H5Sclose( HDF_DataspaceID );
-  H5Sclose( HDF_MemspaceID );
-  H5Dclose( HDF_DatasetID );
-  H5Fclose( HDF_FileID );
-
-}
-
-template<typename T >
-inline void HDFReadVectorSelect( const std::string Filename, const std::string ObjName, const std::vector<unsigned>& ii, std::vector<T> &Data ){
-
-  hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID;
-//  hsize_t HDF_StorageSize;
-
-  HDF_Type = GetDataType<T>();
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
-
-
-  //... save old error handler
-  herr_t   (*old_func)(void*);
-  void     *old_client_data;
-
-  H5Eget_auto(&old_func, &old_client_data);
-
-  //... turn off error handling by hdf5 library
-  H5Eset_auto(NULL, NULL);
-
-  //... probe dataset opening
-  HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() );
-
-  //... restore previous error handler
-  H5Eset_auto(old_func, old_client_data);
-
-  //... dataset did not exist or was empty
-  if( HDF_DatasetID < 0 ){
-    std::stringstream ss;
-    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
-    Data.clear();
-    H5Fclose( HDF_FileID );
-    throw HDFException(ss.str());
-    return;
-  }
-
-  //... get space associated with dataset and its extensions
-  HDF_DataspaceID = H5Dget_space( HDF_DatasetID );
-  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
-  hsize_t dimsize[ndims];
-  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
-
-  hsize_t block[2];
-  block[0]  = ii.size();
-  block[1]  = 3;
-  
-  std::vector<hsize_t> coord;
-  for( unsigned i=0; i<ii.size(); ++i )
-    for( unsigned k=0; k<3; ++k ){
-      coord.push_back(ii[i]);
-      coord.push_back(k);
-    }
-  //std::vector<unsigned>().swap(ii);
-  
-
-  
-
-  if( ii.size() == 0 ){
-    std::cerr << "attempted to read empty block. skipping....\n"; 
-    return;
-  }
-  //std::cerr << "starting 2 read...\n";
-  H5Sselect_none( HDF_DataspaceID );
-  if( H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, coord.size()/2, (const hsize_t *)&coord[0] ) < 0 )//(const hsize_t**)&coord[0] ) < 0 )
-    std::cerr << " - could not select elements properly\n";
-
-  if(H5Sselect_valid( HDF_DataspaceID )<=0 ){
-    std::cerr << "\n - sorry, invalid element selection in file \'"<< Filename.c_str() << "\'. \n - dumping 10 first indices...\n";
-
-    /*for( unsigned i=0; i<10; ++i ){
-      for( unsigned k=0; k<3; ++k ){
-        std::cerr << coord[3*i+k] << " ";
-      }
-      std::cerr << "\n";
-    }*/
-
-    return;
-  }
-
-  std::vector<hsize_t>().swap(coord);
-  Data.assign( block[0]*block[1], (T)0 );
-  HDF_MemspaceID = H5Screate_simple( 2, &block[0], NULL );
-
-  H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] );
-  
-
-  H5Sclose( HDF_DataspaceID );
-  H5Sclose( HDF_MemspaceID );
-  H5Dclose( HDF_DatasetID );
-  H5Fclose( HDF_FileID );
-
-}
-
-template< typename T >
-inline void HDFReadVectorSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector<T> &Data )
-{
-  hsize_t 
-      offset[2],
-      stride[2],
-      count[2],
-      block[2];
-    
-  hid_t MemspaceID, FilespaceID, DatasetID, FileID;
-  hid_t Type =  GetDataType<T>();
-  
-  FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
-
-
-
- //... save old error handler
-  herr_t   (*old_func)(void*);
-  void     *old_client_data;
-
-  H5Eget_auto(&old_func, &old_client_data);
-
-  //... turn off error handling by hdf5 library
-  H5Eset_auto(NULL, NULL);
-
-  //... probe dataset opening
-  DatasetID =  H5Dopen( FileID, ObjName.c_str() );
-
-  //... restore previous error handler
-  H5Eset_auto(old_func, old_client_data);
-
-  //... dataset did not exist or was empty
-  if( DatasetID < 0 ){
-    std::stringstream ss;
-    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
-    Data.clear();
-    H5Fclose( FileID );
-    throw HDFException(ss.str());
-    return;
-  }
-
-  FilespaceID = H5Dget_space( DatasetID );
-  
-  offset[0] = nStart;
-  offset[1] = 0;
-  
-  count[0]  = 1;
-  count[1]  = 1;
-  
-  stride[0] = 1;
-  stride[1] = 1;
-  
-  block[0]  = nCount;
-  block[1]  = 3;
-
-
-  Data.clear();
-  Data.reserve( block[0]*block[1] );
-  Data.assign( block[0]*block[1], (T)1 );
-  
-  MemspaceID = H5Screate_simple( 2, block, NULL );
-  H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block );
-
-  H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] );
-
-  H5Sclose( FilespaceID );
-  H5Sclose( MemspaceID );
-  H5Dclose( DatasetID );
-  H5Fclose( FileID );
-}
-
-template< typename T >
-inline void HDFReadDatasetSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector<T> &Data )
-{
-  hsize_t 
-      offset[2],
-      stride[2],
-      count[2],
-      block[2];
-    
-  hid_t MemspaceID, FilespaceID, DatasetID, FileID;
-  hid_t Type =  GetDataType<T>();
-  
-  FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
-
-
-
- //... save old error handler
-  herr_t   (*old_func)(void*);
-  void     *old_client_data;
-
-  H5Eget_auto(&old_func, &old_client_data);
-
-  //... turn off error handling by hdf5 library
-  H5Eset_auto(NULL, NULL);
-
-  //... probe dataset opening
-  DatasetID =  H5Dopen( FileID, ObjName.c_str() );
-
-  //... restore previous error handler
-  H5Eset_auto(old_func, old_client_data);
-
-  //... dataset did not exist or was empty
-  if( DatasetID < 0 ){
-    std::stringstream ss;
-    ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n";
-    Data.clear();
-    H5Fclose( FileID );
-    throw HDFException(ss.str());
-    return;
-  }
-
-  FilespaceID = H5Dget_space( DatasetID );
-  
-  offset[0] = nStart;
-  offset[1] = 0;
-  
-  count[0]  = 1;
-  count[1]  = 1;
-  
-  stride[0] = 1;
-  stride[1] = 1;
-  
-  block[0]  = nCount;
-  block[1]  = 1;
-
-
-  Data.clear();
-  Data.reserve( block[0]*block[1] );
-  Data.assign( block[0]*block[1], (T)1 );
-  
-  MemspaceID = H5Screate_simple( 2, block, NULL );
-  H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block );
-
-  H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] );
-
-  H5Sclose( FilespaceID );
-  H5Sclose( MemspaceID );
-  H5Dclose( DatasetID );
-  H5Fclose( FileID );
-}
-
-template< typename T>
-inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T &Data )
-{
-
-  hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID;
-  //  hsize_t HDF_StorageSize;
-
-  HDF_Type = GetDataType<T>();
-
-  //... save old error handler
-  herr_t   (*old_func)(void*);
-  void     *old_client_data;
-
-  H5Eget_auto(&old_func, &old_client_data);
-
-  //... turn off error handling by hdf5 library
-  H5Eset_auto(NULL, NULL);
-
-  //... attempt to open attribute
-  
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
-  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
-  HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() );
-
-  if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){
-    std::stringstream ss;
-    ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n";
-    H5Fclose( HDF_FileID );
-    throw HDFException(ss.str());
-    return;
-  }
-
-
-  H5Aread( HDF_AttributeID, HDF_Type, &Data );
-
-  //... restore previous error handler
-  H5Eset_auto(old_func, old_client_data);
-
-
-  H5Aclose( HDF_AttributeID );
-  H5Gclose( HDF_GroupID );
-  H5Fclose( HDF_FileID );
-
-}
-
-template< typename T>
-inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, std::vector<T> &Data )
-{
-
-  hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID, HDF_DataspaceID;
-  hsize_t HDF_StorageSize;
-
-  HDF_Type = GetDataType<T>();
-
-  //... save old error handler
-  herr_t   (*old_func)(void*);
-  void     *old_client_data;
-
-  H5Eget_auto(&old_func, &old_client_data);
-
-  //... turn off error handling by hdf5 library
-  H5Eset_auto(NULL, NULL);
-
-  //... attempt to open attribute
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT );
-  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
-  HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() );
-
-  if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){
-    std::stringstream ss;
-    ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n";
-    H5Fclose( HDF_FileID );
-    throw HDFException(ss.str());
-    return;
-  }
-
-  //... get space associated with dataset and its extensions
-  HDF_DataspaceID = H5Aget_space( HDF_AttributeID );
-
-  int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
-  
-  hsize_t dimsize[ndims];
-
-  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
-
-  HDF_StorageSize = 1;
-  for(int i=0; i<ndims; ++i )
-    HDF_StorageSize *= dimsize[i];
-
-  //... adjust the array size to hold the data
-  Data.clear();
-  Data.reserve( HDF_StorageSize );
-  Data.assign( HDF_StorageSize, (T)1 );
-
-  H5Aread( HDF_AttributeID, HDF_Type, &Data[0] );
-
-  //... restore previous error handler
-  H5Eset_auto(old_func, old_client_data);
-
-  H5Aclose( HDF_AttributeID );
-  H5Sclose( HDF_DataspaceID );
-  H5Gclose( HDF_GroupID );
-  H5Fclose( HDF_FileID );
-}
-
-template< typename T >
-inline void HDFWriteDataset( const std::string Filename, const std::string ObjName, const std::vector<T> &Data )
-{
-
-  hid_t
-    HDF_FileID,
-    HDF_DatasetID,
-    HDF_DataspaceID,
-    HDF_Type;
-
-  hsize_t HDF_Dims;
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-
-  HDF_Type                = GetDataType<T>();
-
-  HDF_Dims                = Data.size();
-  HDF_DataspaceID         = H5Screate_simple(1, &HDF_Dims, NULL);
-  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
-                                       HDF_DataspaceID, H5P_DEFAULT );
-  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
-            H5P_DEFAULT, &Data[0] );
-  H5Dclose( HDF_DatasetID );
-  H5Sclose( HDF_DataspaceID );
-
-  H5Fclose( HDF_FileID );
-}
-
-template< typename T >
-inline void HDFWriteGroupDataset( const std::string Filename, const std::string GrpName, const std::string ObjName, const std::vector<T> &Data )
-{
-
-  hid_t
-    HDF_FileID,
-    HDF_GroupID,
-    HDF_DatasetID,
-    HDF_DataspaceID,
-    HDF_Type;
-
-  hsize_t HDF_Dims;
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-  
-  HDF_GroupID = H5Gopen( HDF_FileID, GrpName.c_str() );
-
-  HDF_Type                = GetDataType<T>();
-
-  HDF_Dims                = Data.size();
-  HDF_DataspaceID         = H5Screate_simple(1, &HDF_Dims, NULL);
-  HDF_DatasetID           = H5Dcreate( HDF_GroupID, ObjName.c_str(), HDF_Type,
-                                       HDF_DataspaceID, H5P_DEFAULT );
-  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
-            H5P_DEFAULT, &Data[0] );
-  H5Dclose( HDF_DatasetID );
-  H5Sclose( HDF_DataspaceID );
-  
-  H5Gclose( HDF_GroupID );
-
-  H5Fclose( HDF_FileID );
-}
-
-
-template< typename T >
-inline void HDFWriteDataset2D( const std::string Filename, const std::string ObjName, const std::vector<  std::vector<T> > &Data )
-{
-
-  hid_t
-    HDF_FileID,
-    HDF_DatasetID,
-    HDF_DataspaceID,
-    HDF_Type;
-
-  hsize_t HDF_Dims[2];
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-
-  HDF_Type                = GetDataType<T>();
-
-  HDF_Dims[0]             = Data.size();
-  HDF_Dims[1]             = Data[0].size();
-  HDF_DataspaceID         = H5Screate_simple(2, HDF_Dims, NULL);
-  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
-                                       HDF_DataspaceID, H5P_DEFAULT );
-
-  T *tmp = new T[HDF_Dims[0]*HDF_Dims[1]];
-
-  unsigned k=0;
-  for(unsigned i=0; i<HDF_Dims[0]; ++i )
-    for(unsigned j=0; j<HDF_Dims[1]; ++j ){
-      tmp[k++] = (Data[i])[j];
-    }
-
-  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
-            H5P_DEFAULT, tmp );
-
-  delete[] tmp;
-
-  H5Dclose( HDF_DatasetID );
-  H5Sclose( HDF_DataspaceID );
-
-  H5Fclose( HDF_FileID );
-}
-
-template< typename T >
-inline void HDFWriteDataset3D( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data )
-{
-
-  hid_t
-    HDF_FileID,
-    HDF_DatasetID,
-    HDF_DataspaceID,
-    HDF_Type;
-
-  hsize_t HDF_Dims[3];
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-
-  HDF_Type                = GetDataType<T>();
-
-  HDF_Dims[0]             = nd[0];
-  HDF_Dims[1]             = nd[1];
-  HDF_Dims[2]             = nd[2];
-  
-  //std::cerr << nd[0]<<nd[1]<<nd[2]<<"\n";
-  HDF_DataspaceID         = H5Screate_simple(3, HDF_Dims, NULL);
-  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
-                                       HDF_DataspaceID, H5P_DEFAULT );
-
-  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
-            H5P_DEFAULT, &Data[0] );
-
-  H5Dclose( HDF_DatasetID );
-  H5Sclose( HDF_DataspaceID );
-
-  H5Fclose( HDF_FileID );
-}
-
-
-template< typename T >
-struct HDFHyperslabWriter3Ds
-{
-	hid_t dset_id_, type_id_, file_id_;
-	
-	HDFHyperslabWriter3Ds( const std::string Filename, const std::string ObjName, size_t nd[3] )
-	{
-		hid_t filespace;
-		
-		hsize_t sizes[4] = { 1, nd[0], nd[1], nd[2] };
-		
-		type_id_	= GetDataType<T>();
-		file_id_	= H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-		
-		//std::cerr << "creating filespace : 1 x " << nd[0] << " x " << nd[1] << " x " << nd[2] << std::endl;
-		filespace	= H5Screate_simple( 4, sizes, NULL );
-		dset_id_	= H5Dcreate( file_id_, ObjName.c_str(), type_id_, filespace, H5P_DEFAULT );
-		
-		H5Sclose(filespace);
-	}
-	
-	~HDFHyperslabWriter3Ds()
-	{
-		H5Dclose( dset_id_ );
-		H5Fclose( file_id_ );
-	}
-	
-	void write_slab( T* data, size_t* count, size_t* offset )
-	{
-		
-		hsize_t counts[4] = { 1, count[0], count[1], count[2] };
-		hsize_t offsets[4] = { 0, offset[0], offset[1], offset[2] };
-		
-		hid_t filespace = H5Dget_space(dset_id_);
-		
-		//std::cerr << "creating memspace : 1 x " << count[0] << " x " << count[1] << " x " << count[2] << std::endl;
-		hid_t memspace  = H5Screate_simple(4, counts, NULL);
-		H5Sselect_hyperslab( filespace, H5S_SELECT_SET, offsets, NULL, counts, NULL );
-		
-		//herr_t status;
-		//status = 
-		H5Dwrite(dset_id_, type_id_, memspace, filespace, H5P_DEFAULT, reinterpret_cast<void*>(data));
-		H5Sclose(filespace);
-		H5Sclose(memspace);
-	}
-	
-};
-
-
-template< typename T >
-inline void HDFWriteDataset3Ds( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data )
-{
-	
-	hid_t
-    HDF_FileID,
-    HDF_DatasetID,
-    HDF_DataspaceID,
-    HDF_Type;
-	
-	hsize_t HDF_Dims[4];
-	
-	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-	
-	HDF_Type                = GetDataType<T>();
-	
-	HDF_Dims[0]             = 1;
-	HDF_Dims[1]             = nd[0];
-	HDF_Dims[2]             = nd[1];
-	HDF_Dims[3]             = nd[2];
-	
-	//std::cerr << nd[0]<<nd[1]<<nd[2]<<"\n";
-	HDF_DataspaceID         = H5Screate_simple(4, HDF_Dims, NULL);
-	HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type,
-										HDF_DataspaceID, H5P_DEFAULT );
-	
-	H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
-			 H5P_DEFAULT, &Data[0] );
-	
-	H5Dclose( HDF_DatasetID );
-	H5Sclose( HDF_DataspaceID );
-	
-	H5Fclose( HDF_FileID );
-}
-
-
-template< typename T >
-inline void HDFWriteDatasetVector( const std::string Filename, const std::string ObjName, const std::vector<T> &Data )
-{
-
-  hid_t
-    HDF_FileID,
-    HDF_DatasetID,
-    HDF_DataspaceID,
-    HDF_Type;
-
-  hsize_t HDF_Dims[2];
-
-  //  hsize_t HDF_Dims;
-
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-
-  HDF_Type                = GetDataType<T>();
-
-  HDF_Dims[0]             = (hsize_t)(Data.size()/3);
-  HDF_Dims[1]             = 3;
-
-  if( Data.size() % 3 != 0 ){
-    std::cerr << " - Warning: Trying to write vector data in HDFWriteDatasetVector\n"
-              << "            but array length not divisible by 3!\n\n";
-
-  }
-
-  HDF_DataspaceID         = H5Screate_simple(2, HDF_Dims, NULL);
-  HDF_DatasetID           = H5Dcreate( HDF_FileID, ObjName.c_str(), H5T_NATIVE_FLOAT,
-                                       HDF_DataspaceID, H5P_DEFAULT );
-  H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL,
-            H5P_DEFAULT, &Data[0] );
-  H5Dclose( HDF_DatasetID );
-  H5Sclose( HDF_DataspaceID );
-
-  H5Fclose( HDF_FileID );
-}
-
-inline void HDFCreateGroup( const std::string Filename, const std::string GroupName )
-{
-	hid_t HDF_FileID, HDF_GroupID;
-
-	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-	HDF_GroupID = H5Gcreate( HDF_FileID, GroupName.c_str(), 0 );
-	H5Gclose( HDF_GroupID );
-	H5Fclose( HDF_FileID );
-
-}
-
-inline void HDFCreateSubGroup( const std::string Filename, const std::string SuperGroupName, const std::string GroupName )
-{
-	hid_t HDF_FileID, HDF_GroupID, HDF_SuperGroupID;
-
-	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-  HDF_SuperGroupID = H5Gopen( HDF_FileID, SuperGroupName.c_str() );
-	HDF_GroupID = H5Gcreate( HDF_SuperGroupID, GroupName.c_str(), 0 );
-	H5Gclose( HDF_GroupID );
-  H5Gclose( HDF_SuperGroupID );
-	H5Fclose( HDF_FileID );
-
-}
-
-template< typename T >
-inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, const std::vector< T > &Data )
-{
- hid_t    HDF_FileID,    
-	  HDF_GroupID,    
-	  HDF_AttributeID,    
-	  HDF_DataspaceID,    
-	  HDF_DatatypeID;
- 
- hsize_t  HDF_Dims;
- 
- HDF_DatatypeID = GetDataType<T>();  
- 
- HDF_Dims = (hsize_t)(Data.size());
-
- 
- HDF_FileID      = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );  
- HDF_GroupID     = H5Gopen( HDF_FileID, GroupName.c_str() );  
- HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL);
-
- HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT);  
- H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] );  
- H5Aclose( HDF_AttributeID );  
- H5Sclose( HDF_DataspaceID );  
- H5Gclose( HDF_GroupID );  
- H5Fclose( HDF_FileID ); 
-}
-
-template< typename T >
-inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, const std::vector< T > &Data )
-{
-	hid_t    HDF_FileID,    
-	HDF_DatasetID,    
-	HDF_AttributeID,    
-	HDF_DataspaceID,    
-	HDF_DatatypeID;
-	
-	hsize_t  HDF_Dims;
-	
-	HDF_DatatypeID = GetDataType<T>();  
-	
-	HDF_Dims = (hsize_t)(Data.size());
-	
-	
-	HDF_FileID      = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );  
-	HDF_DatasetID   = H5Dopen( HDF_FileID, DatasetName.c_str() );  
-	HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL);
-	
-	HDF_AttributeID = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT);  
-	H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] );  
-	H5Aclose( HDF_AttributeID );  
-	H5Sclose( HDF_DataspaceID );  
-	H5Dclose( HDF_DatasetID );  
-	H5Fclose( HDF_FileID ); 
-}
-
-
-template< typename T >
-inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T
-Data )
-{
-
-  hid_t
-    HDF_FileID,
-    HDF_GroupID,
-    HDF_AttributeID,
-    HDF_DataspaceID,
-    HDF_DatatypeID;
-
-  HDF_DatatypeID = GetDataType<T>();
-
-  
-  
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
-  HDF_DataspaceID         = H5Screate(H5S_SCALAR);
-  HDF_AttributeID         = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID,
-                                      HDF_DataspaceID, H5P_DEFAULT);
-  H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data );
-  H5Aclose( HDF_AttributeID );
-  H5Sclose( HDF_DataspaceID );
-  H5Gclose( HDF_GroupID );
-  H5Fclose( HDF_FileID );
-}
-
-template< typename T >
-inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, T Data )
-{
-	
-	hid_t
-    HDF_FileID,
-    HDF_DatasetID,
-    HDF_AttributeID,
-    HDF_DataspaceID,
-    HDF_DatatypeID;
-	
-	HDF_DatatypeID = GetDataType<T>();
-	
-	
-	
-	HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-	HDF_DatasetID = H5Dopen( HDF_FileID, DatasetName.c_str() );
-	HDF_DataspaceID         = H5Screate(H5S_SCALAR);
-	HDF_AttributeID         = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID,
-										HDF_DataspaceID, H5P_DEFAULT);
-	H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data );
-	H5Aclose( HDF_AttributeID );
-	H5Sclose( HDF_DataspaceID );
-	H5Dclose( HDF_DatasetID );
-	H5Fclose( HDF_FileID );
-}
-
-template< typename T >
-inline void HDFWriteSubGroupAttribute( const std::string Filename, const std::string GroupName,  const std::string SubGroupName, const std::string ObjName, T
-Data )
-{
-
-  hid_t
-    HDF_FileID,
-    HDF_GroupID,
-    HDF_SubGroupID,
-    HDF_AttributeID,
-    HDF_DataspaceID,
-    HDF_DatatypeID;
-
-  HDF_DatatypeID = GetDataType<T>();
-
-  
-  
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-  std::cerr << "opening " << GroupName.c_str() << std::endl;
-  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
-  std::cerr << "opening " << SubGroupName.c_str() << std::endl;
-  HDF_SubGroupID = H5Gopen( HDF_GroupID, SubGroupName.c_str() );
-  HDF_DataspaceID         = H5Screate(H5S_SCALAR);
-  HDF_AttributeID         = H5Acreate(HDF_SubGroupID, ObjName.c_str(), HDF_DatatypeID,
-                                      HDF_DataspaceID, H5P_DEFAULT);
-  H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data );
-  H5Aclose( HDF_AttributeID );
-  H5Sclose( HDF_DataspaceID );
-  H5Gclose( HDF_SubGroupID );
-  H5Gclose( HDF_GroupID );
-  H5Fclose( HDF_FileID );
-}
-
-template<>
-inline void HDFWriteGroupAttribute<std::string>( const std::string Filename, const std::string GroupName, const std::string ObjName, std::string Data )
-{
-
-  hid_t
-    HDF_FileID,
-    HDF_GroupID,
-    HDF_AttributeID,
-    HDF_DataspaceID,
-    HDF_DatatypeID;
-
-  HDF_DatatypeID = H5Tcopy( H5T_C_S1 );
-
-  H5Tset_size( HDF_DatatypeID, Data.size() );
-  H5Tset_strpad(HDF_DatatypeID, H5T_STR_NULLPAD);
-  
-  HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT );
-  HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() );
-  HDF_DataspaceID         = H5Screate(H5S_SCALAR);
-  HDF_AttributeID         = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID,
-                                      HDF_DataspaceID, H5P_DEFAULT);
-  H5Awrite( HDF_AttributeID, HDF_DatatypeID, Data.c_str() );
-  H5Aclose( HDF_AttributeID );
-  H5Sclose( HDF_DataspaceID );
-  H5Gclose( HDF_GroupID );
-  H5Fclose( HDF_FileID );
-}
-#endif

From 226a9303db15eae71399a4817297f0a72b8d73e5 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Sun, 3 May 2020 21:20:22 +0200
Subject: [PATCH 124/130] bug fixes to glass with MPI

---
 include/HDF_IO.hh             |  4 ++--
 include/grid_interpolate.hh   | 20 ++++++++++++--------
 include/particle_generator.hh |  4 ++--
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/include/HDF_IO.hh b/include/HDF_IO.hh
index 53b3f92..1b15b34 100755
--- a/include/HDF_IO.hh
+++ b/include/HDF_IO.hh
@@ -193,9 +193,9 @@ inline void HDFReadDataset( const std::string Filename, const std::string ObjNam
 
   int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID );
   
-  hsize_t dimsize[ndims];
+  std::vector<hsize_t> dimsize(ndims,0);
 
-  H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL );
+  H5Sget_simple_extent_dims( HDF_DataspaceID, &dimsize[0], NULL );
 
   HDF_StorageSize = 1;
   for(int i=0; i<ndims; ++i )
diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh
index 2facf80..ac92cd6 100644
--- a/include/grid_interpolate.hh
+++ b/include/grid_interpolate.hh
@@ -16,8 +16,7 @@ struct grid_interpolate
   static constexpr bool is_distributed_trait = grid_t::is_distributed_trait;
   static constexpr int interpolation_order = interp_order;
 
-  size_t nx_, ny_, nz_;
-
+  
 #if defined(USE_MPI)
   const MPI_Datatype MPI_data_t_type =
       (typeid(data_t) == typeid(float)) ? MPI_FLOAT
@@ -31,6 +30,7 @@ struct grid_interpolate
 
   std::vector<data_t> boundary_;
   const grid_t &gridref;
+  size_t nx_, ny_, nz_;
 
   explicit grid_interpolate(const grid_t &g)
       : gridref(g), nx_(g.n_[0]), ny_(g.n_[1]), nz_(g.n_[2])
@@ -50,7 +50,7 @@ struct grid_interpolate
       {
         for (size_t j = 0; j < ny; ++j)
         {
-          for (size_t k = 0; k < nx; ++k)
+          for (size_t k = 0; k < nz; ++k)
           {
             boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k);
           }
@@ -98,7 +98,7 @@ struct grid_interpolate
     data_t val{0.0};
     
     if( is_distributed_trait ){
-      size_t localix = ix-gridref.local_0_start_;
+      ptrdiff_t localix = ix-gridref.local_0_start_;
       val += gridref.relem(localix, iy, iz) * tx * ty * tz;
       val += gridref.relem(localix, iy, iz1) * tx * ty * dz;
       val += gridref.relem(localix, iy1, iz) * tx * dy * tz;
@@ -158,20 +158,24 @@ struct grid_interpolate
         sendcounts[get_task(x,local0starts)] += 3;
       }
 
-      // int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm)
       MPI_Alltoall(&sendcounts[0], 1, MPI_INT, &recvcounts[0], 1, MPI_INT, MPI_COMM_WORLD);
 
+      size_t tot_receive = recvcounts[0], tot_send = sendcounts[0];
       for (int i = 1; i < MPI::get_size(); ++i)
       {
         sendoffsets[i] = sendcounts[i - 1] + sendoffsets[i - 1];
         recvoffsets[i] = recvcounts[i - 1] + recvoffsets[i - 1];
+        tot_receive += recvcounts[i];
+        tot_send += sendcounts[i];
       }
 
-      // int MPI_Alltoallv(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf,
-      // const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm)
+      std::vector<vec3> recvbuf;
+      recvbuf.assign(tot_receive,{0.,0.,0.});
 
       MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI_data_t_type,
-                    &pos[0], &recvcounts[0], &recvoffsets[0], MPI_data_t_type, MPI_COMM_WORLD);
+                    &recvbuf[0], &recvcounts[0], &recvoffsets[0], MPI_data_t_type, MPI_COMM_WORLD);
+
+      std::swap( pos, recvbuf );
 #endif
     }
   }
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 91a19d9..d632d2d 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -154,7 +154,7 @@ void set_positions( container& particles, const lattice lattice_type, bool is_se
         
         std::vector< std::array<real_t,3> > glass_posr(num_p,{0.0,0.0,0.0});
 
-        std::array<real_t,3> ng({field.n_[0],field.n_[1],field.n_[2]});
+        std::array<real_t,3> ng({real_t(field.n_[0]),real_t(field.n_[1]),real_t(field.n_[2])});
 
         for( size_t i=0; i<num_p; ++i ){
             size_t idxpart = off_p+i;
@@ -239,7 +239,7 @@ void set_velocities(container &particles, lattice lattice_type, bool is_second_l
         
         std::vector< std::array<real_t,3> > glass_posr(num_p,{0.0,0.0,0.0});
 
-        std::array<real_t,3> ng({field.n_[0],field.n_[1],field.n_[2]});
+        std::array<real_t,3> ng({real_t(field.n_[0]),real_t(field.n_[1]),real_t(field.n_[2])});
 
         for( size_t i=0; i<num_p; ++i ){
             size_t idxpart = off_p+i;

From 9ebe27cb6821fe4937f2566681b6da882af05cc1 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 4 May 2020 00:49:11 +0200
Subject: [PATCH 125/130] mpi bugfixes, refactoring of particle creation

---
 include/grid_interpolate.hh   |  70 ++---
 include/particle_generator.hh | 498 ++++++++++++++++++----------------
 src/ic_generator.cc           |  34 ++-
 3 files changed, 311 insertions(+), 291 deletions(-)

diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh
index ac92cd6..c0ec9aa 100644
--- a/include/grid_interpolate.hh
+++ b/include/grid_interpolate.hh
@@ -16,19 +16,8 @@ struct grid_interpolate
   static constexpr bool is_distributed_trait = grid_t::is_distributed_trait;
   static constexpr int interpolation_order = interp_order;
 
-  
-#if defined(USE_MPI)
-  const MPI_Datatype MPI_data_t_type =
-      (typeid(data_t) == typeid(float)) ? MPI_FLOAT
-                                        : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
-                                                                             : (typeid(data_t) == typeid(long double)) ? MPI_LONG_DOUBLE
-                                                                                                                       : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_C_FLOAT_COMPLEX
-                                                                                                                                                                         : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_C_DOUBLE_COMPLEX
-                                                                                                                                                                                                                            : (typeid(data_t) == typeid(std::complex<long double>)) ? MPI_C_LONG_DOUBLE_COMPLEX
-                                                                                                                                                                                                                                                                                    : MPI_INT;
-#endif
-
   std::vector<data_t> boundary_;
+  std::vector<int> local0starts_;
   const grid_t &gridref;
   size_t nx_, ny_, nz_;
 
@@ -40,6 +29,13 @@ struct grid_interpolate
     if (is_distributed_trait)
     {
 #if defined(USE_MPI)
+
+      int local_0_start = int(gridref.local_0_start_);
+      local0starts_.assign(MPI::get_size(), 0);
+
+      MPI_Allgather(&local_0_start, 1, MPI_INT, &local0starts_[0], 1, MPI_INT, MPI_COMM_WORLD);
+
+      //... exchange boundary
       size_t nx = interpolation_order + 1;
       size_t ny = g.n_[1];
       size_t nz = g.n_[2];
@@ -96,6 +92,11 @@ struct grid_interpolate
     size_t iz1 = (iz + 1) % nz_;
 
     data_t val{0.0};
+
+    if( get_task(pos) != MPI::get_rank() ){
+      std::cout << "task : " << MPI::get_rank() << " p@(" << pos[0] << ", " << pos[1] << ", " << pos[2] << ") belongs to task " << get_task(pos) << std::endl;
+      abort();
+    }
     
     if( is_distributed_trait ){
       ptrdiff_t localix = ix-gridref.local_0_start_;
@@ -135,10 +136,10 @@ struct grid_interpolate
   // {
   // }
 
-  int get_task(const vec3 &x, const std::vector<int> &local0starts) const noexcept
+  int get_task(const vec3 &x) const noexcept
   {
-    const auto it = std::upper_bound(local0starts.begin(), local0starts.end(), int(x[0]));
-    return std::distance(local0starts.begin(), it)-1;
+    const auto it = std::upper_bound(local0starts_.begin(), local0starts_.end(), int(x[0]));
+    return std::distance(local0starts_.begin(), it)-1;
   }
 
   void domain_decompose_pos(std::vector<vec3> &pos) const noexcept
@@ -146,16 +147,12 @@ struct grid_interpolate
     if (is_distributed_trait)
     {
 #if defined(USE_MPI)
-      int local_0_start = int(gridref.local_0_start_);
-      std::vector<int> local0starts(MPI::get_size(), 0);
-      MPI_Alltoall(&local_0_start, 1, MPI_INT, &local0starts[0], 1, MPI_INT, MPI_COMM_WORLD);
-
-      std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1,local0starts) < get_task(x2,local0starts); });
+      std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1) < get_task(x2); });
       std::vector<int> sendcounts(MPI::get_size(), 0), sendoffsets(MPI::get_size(), 0);
       std::vector<int> recvcounts(MPI::get_size(), 0), recvoffsets(MPI::get_size(), 0);
       for (auto x : pos)
       {
-        sendcounts[get_task(x,local0starts)] += 3;
+        sendcounts[get_task(x)] += 3;
       }
 
       MPI_Alltoall(&sendcounts[0], 1, MPI_INT, &recvcounts[0], 1, MPI_INT, MPI_COMM_WORLD);
@@ -169,13 +166,12 @@ struct grid_interpolate
         tot_send += sendcounts[i];
       }
 
-      std::vector<vec3> recvbuf;
-      recvbuf.assign(tot_receive,{0.,0.,0.});
+      std::vector<vec3> recvbuf(tot_receive/3,{0.,0.,0.});
 
-      MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI_data_t_type,
-                    &recvbuf[0], &recvcounts[0], &recvoffsets[0], MPI_data_t_type, MPI_COMM_WORLD);
+      MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI::get_datatype<real_t>(),
+                    &recvbuf[0], &recvcounts[0], &recvoffsets[0], MPI::get_datatype<real_t>(), MPI_COMM_WORLD);
 
-      std::swap( pos, recvbuf );
+      pos.swap( recvbuf );
 #endif
     }
   }
@@ -193,26 +189,4 @@ struct grid_interpolate
     return std::exp(ccomplex_t(0.0, shift)) / del;
   }
 
-  void get_at(std::vector<vec3> &pos, std::vector<data_t> &val) const
-  {
-
-    val.assign( pos.size(), data_t{0.0} );
-
-    for( size_t i=0; i<pos.size(); ++i ){
-      const vec3& x = pos[i];
-
-      switch (interpolation_order)
-      {
-      case 0:
-        val[i] = get_ngp_at(x);
-        break;
-      case 1:
-        val[i] = get_cic_at(x);
-        break;
-      // case 2:
-      //   val[i] = get_tsc_at(x);
-      //   break;
-      };
-    }
-  }
 };
\ No newline at end of file
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index d632d2d..ec00903 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -14,264 +14,298 @@
 #include "HDF_IO.hh"
 #endif
 
-namespace particle {
-
-enum lattice{
-    lattice_glass = -1,
-    lattice_sc  = 0, // SC : simple cubic
-    lattice_bcc = 1, // BCC: body-centered cubic
-    lattice_fcc = 2, // FCC: face-centered cubic
-    lattice_rsc = 3, // RSC: refined simple cubic
-};
-
-const std::vector< std::vector<vec3_t<real_t>> > lattice_shifts = 
-{   
-    // first shift must always be zero! (otherwise set_positions and set_velocities break)
-    /* SC : */ {{0.0,0.0,0.0}},
-    /* BCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.5}},
-    /* FCC: */ {{0.0,0.0,0.0},{0.0,0.5,0.5},{0.5,0.0,0.5},{0.5,0.5,0.0}},
-    /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}},
-};
-
-const std::vector<vec3_t<real_t>> second_lattice_shift =
+namespace particle
 {
-        /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice
-        /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!?
-        /* FCC: */ {0.5, 0.5, 0.5}, // this corresponds to NaCl lattice
-        // /* FCC: */ {0.25, 0.25, 0.25}, // this corresponds to Zincblende/GaAs lattice
-        /* RSC: */ {0.25, 0.25, 0.25},
-};
+    using vec3 = std::array<real_t,3>;
 
-template<typename field_t>
-void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field, config_file& cf ){
-    if( lattice_type != lattice_glass )
+    enum lattice
     {
-        // number of modes present in the field
-        const size_t num_p_in_load = field.local_size();
-        // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load):
-        const size_t overload = 1ull<<std::max<int>(0,lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
-        // allocate memory for all local particles
-        particles.allocate( overload * num_p_in_load, b64reals, b64ids );
-        // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well
-        for( size_t i=0,ipcount=0; i<field.size(0); ++i ){
-            for( size_t j=0; j<field.size(1); ++j){
-                for( size_t k=0; k<field.size(2); ++k,++ipcount){
-                    for( size_t iload=0; iload<overload; ++iload ){
-                        if( b64ids ){
-                            particles.set_id64( ipcount+iload*num_p_in_load, IDoffset + overload*field.get_cell_idx_1d(i,j,k)+iload );
-                        }else{
-                            particles.set_id32( ipcount+iload*num_p_in_load, IDoffset + overload*field.get_cell_idx_1d(i,j,k)+iload );
-                        }
-                    }
-                }
-            }
-        }
-    }
-    else
+        lattice_glass = -1,
+        lattice_sc = 0,  // SC : simple cubic
+        lattice_bcc = 1, // BCC: body-centered cubic
+        lattice_fcc = 2, // FCC: face-centered cubic
+        lattice_rsc = 3, // RSC: refined simple cubic
+    };
+
+    const std::vector<std::vector<vec3_t<real_t>>> lattice_shifts =
+        {
+            // first shift must always be zero! (otherwise set_positions and set_velocities break)
+            /* SC : */ {{0.0, 0.0, 0.0}},
+            /* BCC: */ {{0.0, 0.0, 0.0}, {0.5, 0.5, 0.5}},
+            /* FCC: */ {{0.0, 0.0, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}},
+            /* RSC: */ {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.5}, {0.0, 0.5, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.0}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}, {0.5, 0.5, 0.5}},
+    };
+
+    const std::vector<vec3_t<real_t>> second_lattice_shift =
+        {
+            /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice
+            /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!?
+            /* FCC: */ {0.5, 0.5, 0.5}, // this corresponds to NaCl lattice
+                                        // /* FCC: */ {0.25, 0.25, 0.25}, // this corresponds to Zincblende/GaAs lattice
+            /* RSC: */ {0.25, 0.25, 0.25},
+    };
+
+    template <typename field_t>
+    class lattice_generator
     {
+        protected:
+
+        struct glass
+        {
+            using data_t = typename field_t::data_t;
+            size_t num_p, off_p;
+            grid_interpolate<1, field_t> interp_;
+            std::vector<vec3> glass_posr;
+
+            glass( config_file& cf, const field_t &field )
+            : num_p(0), off_p(0), interp_( field )
+            {
+                std::vector<real_t> glass_pos;
+                real_t lglassbox = 1.0;
+
+                std::string glass_fname = cf.get_value<std::string>("setup", "GlassFileName");
+                size_t ntiles = cf.get_value<size_t>("setup", "GlassTiles");
+
 #if defined(USE_HDF5)
-        std::string glass_fname = cf.get_value<std::string>("setup","GlassFileName");
-        
-        std::vector<int> glass_dims;
-        HDFGetDatasetExtent( glass_fname, "/PartType1/Coordinates", glass_dims );
-        music::ilog << "Glass file contains " << glass_dims[0] << " particles." << std::endl;
-        
-        size_t ntiles = cf.get_value<size_t>("setup","GlassTiles");
+                HDFReadGroupAttribute(glass_fname, "Header", "BoxSize", lglassbox);
+                HDFReadDataset(glass_fname, "/PartType1/Coordinates", glass_pos);
+#else
+                throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile.");
+#endif
+
+                size_t np_in_file = glass_pos.size() / 3;
 #if defined(USE_MPI)
-        size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles / MPI::get_size();
-        size_t off_p = MPI::get_rank() * num_p;
+                num_p = np_in_file * ntiles * ntiles * ntiles / MPI::get_size();
+                off_p = MPI::get_rank() * num_p;
 #else
-        size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles;
-        size_t off_p = 0;
+                num_p = np_in_file * ntiles * ntiles * ntiles;
+                off_p = 0;
 #endif
 
-        particles.allocate( num_p, b64reals, b64ids );
+                glass_posr.assign(num_p, {0.0, 0.0, 0.0});
 
-        for( size_t i=0; i<num_p; ++i ){
-            if( b64ids ){
-                particles.set_id64( i, IDoffset + i + off_p );
-            }else{
-                particles.set_id32( i, IDoffset + i + off_p );
-            }
-        }
-#else
-        throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile.");
+                std::array<real_t, 3> ng({real_t(field.n_[0]), real_t(field.n_[1]), real_t(field.n_[2])});
+
+                for (size_t i = 0; i < num_p; ++i)
+                {
+                    size_t idxpart = off_p + i;
+                    size_t idx_in_glass = idxpart % np_in_file;
+                    size_t idxtile = idxpart / np_in_file;
+                    size_t tile_z = idxtile % (ntiles * ntiles);
+                    size_t tile_y = ((idxtile - tile_z) / ntiles) % ntiles;
+                    size_t tile_x = (((idxtile - tile_z) / ntiles) - tile_y) / ntiles;
+                    glass_posr[i][0] = std::fmod((glass_pos[3 * idx_in_glass + 0] / lglassbox + real_t(tile_x)) / ntiles * ng[0] + ng[0], ng[0]);
+                    glass_posr[i][1] = std::fmod((glass_pos[3 * idx_in_glass + 1] / lglassbox + real_t(tile_y)) / ntiles * ng[1] + ng[1], ng[1]);
+                    glass_posr[i][2] = std::fmod((glass_pos[3 * idx_in_glass + 2] / lglassbox + real_t(tile_z)) / ntiles * ng[2] + ng[2], ng[2]);
+                }
+
+#if defined(USE_MPI)
+                interp_.domain_decompose_pos(glass_posr);
+
+                num_p = glass_posr.size();
+                std::vector<size_t> all_num_p( MPI::get_size(), 0 );
+                MPI_Allgather( &num_p, 1, MPI_UNSIGNED_LONG_LONG, &all_num_p[0], 1, MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD );
+                off_p = 0;
+                for( int itask=0; itask<=MPI::get_rank(); ++itask ){
+                    off_p += all_num_p[itask];
+                }
 #endif
-    }
-}
-
-// invalidates field, phase shifted to unspecified position after return
-template<typename field_t>
-void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field, config_file& cf )
-{
-    // works only for Bravais types
-    if( lattice_type >= 0 ){
-        const size_t num_p_in_load = field.local_size();
-        for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
-            // if we are dealing with the secondary lattice, apply a global shift
-            if( ishift==0 && is_second_lattice ){
-                field.shift_field( second_lattice_shift[lattice_type] );
             }
 
-            // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
-            if( ishift>0 ){
-                field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] );
+            data_t get_at( const vec3& x ) const noexcept
+            {
+                return interp_.get_cic_at( x );
             }
-            // read out values from phase shifted field and set assoc. particle's value
-            const auto ipcount0 = ishift * num_p_in_load;
-            for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-                for( size_t j=0; j<field.size(1); ++j){
-                    for( size_t k=0; k<field.size(2); ++k){
-                        auto pos = field.template get_unit_r_shifted<real_t>(i,j,k,lattice_shifts[lattice_type][ishift] 
-                            + (is_second_lattice? second_lattice_shift[lattice_type] : vec3_t<real_t>{0.,0.,0.}) );
-                        if( b64reals ){
-                            particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
-                        }else{
-                            particles.set_pos32( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) );
+
+            size_t size() const noexcept
+            {
+                return num_p;
+            }
+
+            size_t offset() const noexcept
+            {
+                return off_p;
+            }
+        };
+
+        std::unique_ptr<glass> glass_ptr_;
+
+        private:
+        particle::container particles_;
+
+        public:
+        lattice_generator(lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t &field, config_file &cf)
+        {
+            if (lattice_type != lattice_glass)
+            {
+                // number of modes present in the field
+                const size_t num_p_in_load = field.local_size();
+                // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load):
+                const size_t overload = 1ull << std::max<int>(0, lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc
+                // allocate memory for all local particles
+                particles_.allocate(overload * num_p_in_load, b64reals, b64ids);
+                // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well
+                for (size_t i = 0, ipcount = 0; i < field.size(0); ++i)
+                {
+                    for (size_t j = 0; j < field.size(1); ++j)
+                    {
+                        for (size_t k = 0; k < field.size(2); ++k, ++ipcount)
+                        {
+                            for (size_t iload = 0; iload < overload; ++iload)
+                            {
+                                if (b64ids)
+                                {
+                                    particles_.set_id64(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload);
+                                }
+                                else
+                                {
+                                    particles_.set_id32(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload);
+                                }
+                            }
                         }
                     }
                 }
             }
-        }
-    }else{
-#if defined(USE_HDF5)
-        std::string glass_fname = cf.get_value<std::string>("setup","GlassFileName");
-        size_t ntiles = cf.get_value<size_t>("setup","GlassTiles");
-        
-        real_t lglassbox = 1.0;
-        HDFReadGroupAttribute( glass_fname, "Header", "BoxSize", lglassbox );
+            else
+            {
+                glass_ptr_ = std::make_unique<glass>( cf, field );
+                particles_.allocate(glass_ptr_->size(), b64reals, b64ids);
 
-        std::vector<real_t> glass_pos;
-        HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos );
-        size_t np_in_file = glass_pos.size()/3;
-#if defined(USE_MPI)
-        size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size();
-        size_t off_p = MPI::get_rank() * num_p;
-#else
-        size_t num_p = np_in_file * ntiles*ntiles*ntiles;
-        size_t off_p = 0;
-#endif
-        
-        std::vector< std::array<real_t,3> > glass_posr(num_p,{0.0,0.0,0.0});
-
-        std::array<real_t,3> ng({real_t(field.n_[0]),real_t(field.n_[1]),real_t(field.n_[2])});
-
-        for( size_t i=0; i<num_p; ++i ){
-            size_t idxpart = off_p+i;
-            size_t idx_in_glass = idxpart%np_in_file;
-            size_t idxtile = idxpart / np_in_file;
-            size_t tile_z = idxtile%(ntiles*ntiles);
-            size_t tile_y = ((idxtile-tile_z)/ntiles)%ntiles;
-            size_t tile_x = (((idxtile-tile_z)/ntiles)-tile_y)/ntiles;
-            glass_posr[i][0] = std::fmod((glass_pos[3*idx_in_glass+0]/lglassbox + real_t(tile_x)) / ntiles * ng[0] + ng[0], ng[0]);
-            glass_posr[i][1] = std::fmod((glass_pos[3*idx_in_glass+1]/lglassbox + real_t(tile_y)) / ntiles * ng[1] + ng[1], ng[1]);
-            glass_posr[i][2] = std::fmod((glass_pos[3*idx_in_glass+2]/lglassbox + real_t(tile_z)) / ntiles * ng[2] + ng[2], ng[2]);
-        }
-        
-        grid_interpolate<1,field_t> interp( field );
-
-        interp.domain_decompose_pos( glass_posr );
-
-        for( size_t i=0; i<num_p; ++i ){
-            auto pos = glass_posr[i];
-            real_t disp = interp.get_cic_at( pos );
-            if( b64reals ){
-                particles.set_pos64( i, idim, pos[idim]/ng[idim]*lunit + disp );
-            }else{
-                particles.set_pos32( i, idim, pos[idim]/ng[idim]*lunit + disp );
-            }
-        }
-        
-#else
-        throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile.");
-#endif
-    }
-}
-
-template <typename field_t>
-void set_velocities(container &particles, lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field, config_file& cf)
-{
-    // works only for Bravais types
-    if( lattice_type >= 0 ){
-        const size_t num_p_in_load = field.local_size();
-        for( int ishift=0; ishift<(1<<lattice_type); ++ishift ){
-            // if we are dealing with the secondary lattice, apply a global shift
-            if (ishift == 0 && is_second_lattice){
-                field.shift_field(second_lattice_shift[lattice_type]);
-            }
-            // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
-            if (ishift > 0){
-                field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] );
-            }
-            // read out values from phase shifted field and set assoc. particle's value
-            const auto ipcount0 = ishift * num_p_in_load;
-            for( size_t i=0,ipcount=ipcount0; i<field.size(0); ++i ){
-                for( size_t j=0; j<field.size(1); ++j){
-                    for( size_t k=0; k<field.size(2); ++k){
-                        if( b64reals ){
-                            particles.set_vel64( ipcount++, idim, field.relem(i,j,k) );
-                        }else{
-                            particles.set_vel32( ipcount++, idim, field.relem(i,j,k) );
-                        }
+                for (size_t i = 0; i < glass_ptr_->size(); ++i)
+                {
+                    if (b64ids)
+                    {
+                        particles_.set_id64(i, IDoffset + i + glass_ptr_->offset());
+                    }
+                    else
+                    {
+                        particles_.set_id32(i, IDoffset + i + glass_ptr_->offset());
                     }
                 }
             }
         }
-    }else{
-#if defined(USE_HDF5)
-        std::string glass_fname = cf.get_value<std::string>("setup","GlassFileName");
-        size_t ntiles = cf.get_value<size_t>("setup","GlassTiles");
-        
-        real_t lglassbox = 1.0;
-        HDFReadGroupAttribute( glass_fname, "Header", "BoxSize", lglassbox );
 
-        std::vector<real_t> glass_pos;
-        HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos );
-        size_t np_in_file = glass_pos.size()/3;
-#if defined(USE_MPI)
-        size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size();
-        size_t off_p = MPI::get_rank() * num_p;
-#else
-        size_t num_p = np_in_file * ntiles*ntiles*ntiles;
-        size_t off_p = 0;
-#endif
+        // invalidates field, phase shifted to unspecified position after return
+        void set_positions(const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t &field, config_file &cf)
+        {
+            // works only for Bravais types
+            if (lattice_type >= 0)
+            {
+                const size_t num_p_in_load = field.local_size();
+                for (int ishift = 0; ishift < (1 << lattice_type); ++ishift)
+                {
+                    // if we are dealing with the secondary lattice, apply a global shift
+                    if (ishift == 0 && is_second_lattice)
+                    {
+                        field.shift_field(second_lattice_shift[lattice_type]);
+                    }
 
-        
-        std::vector< std::array<real_t,3> > glass_posr(num_p,{0.0,0.0,0.0});
-
-        std::array<real_t,3> ng({real_t(field.n_[0]),real_t(field.n_[1]),real_t(field.n_[2])});
-
-        for( size_t i=0; i<num_p; ++i ){
-            size_t idxpart = off_p+i;
-            size_t idx_in_glass = idxpart%np_in_file;
-            size_t idxtile = idxpart / np_in_file;
-            size_t tile_z = idxtile%(ntiles*ntiles);
-            size_t tile_y = ((idxtile-tile_z)/ntiles)%ntiles;
-            size_t tile_x = (((idxtile-tile_z)/ntiles)-tile_y)/ntiles;
-            glass_posr[i][0] = std::fmod((glass_pos[3*idx_in_glass+0]/lglassbox + real_t(tile_x)) / ntiles * ng[0] + ng[0], ng[0]);
-            glass_posr[i][1] = std::fmod((glass_pos[3*idx_in_glass+1]/lglassbox + real_t(tile_y)) / ntiles * ng[1] + ng[1], ng[1]);
-            glass_posr[i][2] = std::fmod((glass_pos[3*idx_in_glass+2]/lglassbox + real_t(tile_z)) / ntiles * ng[2] + ng[2], ng[2]);
-        }
-        
-        grid_interpolate<1,field_t> interp( field );
-
-        interp.domain_decompose_pos( glass_posr );
-
-        for( size_t i=0; i<num_p; ++i ){
-            auto pos = glass_posr[i];
-            real_t vel = interp.get_cic_at( pos );
-            if( b64reals ){
-                particles.set_vel64( i, idim, vel );
-            }else{
-                particles.set_vel32( i, idim, vel );
+                    // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
+                    if (ishift > 0)
+                    {
+                        field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]);
+                    }
+                    // read out values from phase shifted field and set assoc. particle's value
+                    const auto ipcount0 = ishift * num_p_in_load;
+                    for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i)
+                    {
+                        for (size_t j = 0; j < field.size(1); ++j)
+                        {
+                            for (size_t k = 0; k < field.size(2); ++k)
+                            {
+                                auto pos = field.template get_unit_r_shifted<real_t>(i, j, k, lattice_shifts[lattice_type][ishift] + (is_second_lattice ? second_lattice_shift[lattice_type] : vec3_t<real_t>{0., 0., 0.}));
+                                if (b64reals)
+                                {
+                                    particles_.set_pos64(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k));
+                                }
+                                else
+                                {
+                                    particles_.set_pos32(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for (size_t i = 0; i < this->glass_ptr_->size(); ++i)
+                {
+                    auto pos = this->glass_ptr_->glass_posr[i];
+                    real_t disp = this->glass_ptr_->get_at(pos);
+                    if (b64reals)
+                    {
+                        particles_.set_pos64(i, idim, pos[idim] / field.n_[idim] * lunit + disp);
+                    }
+                    else
+                    {
+                        particles_.set_pos32(i, idim, pos[idim] / field.n_[idim] * lunit + disp);
+                    }
+                }
             }
         }
-        
-#else
-        throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile.");
-#endif
-    }
-}
 
+        void set_velocities(lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field, config_file &cf)
+        {
+            // works only for Bravais types
+            if (lattice_type >= 0)
+            {
+                const size_t num_p_in_load = field.local_size();
+                for (int ishift = 0; ishift < (1 << lattice_type); ++ishift)
+                {
+                    // if we are dealing with the secondary lattice, apply a global shift
+                    if (ishift == 0 && is_second_lattice)
+                    {
+                        field.shift_field(second_lattice_shift[lattice_type]);
+                    }
+                    // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift
+                    if (ishift > 0)
+                    {
+                        field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]);
+                    }
+                    // read out values from phase shifted field and set assoc. particle's value
+                    const auto ipcount0 = ishift * num_p_in_load;
+                    for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i)
+                    {
+                        for (size_t j = 0; j < field.size(1); ++j)
+                        {
+                            for (size_t k = 0; k < field.size(2); ++k)
+                            {
+                                if (b64reals)
+                                {
+                                    particles_.set_vel64(ipcount++, idim, field.relem(i, j, k));
+                                }
+                                else
+                                {
+                                    particles_.set_vel32(ipcount++, idim, field.relem(i, j, k));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                for (size_t i = 0; i < glass_ptr_->size(); ++i)
+                {
+                    auto pos = glass_ptr_->glass_posr[i];
+                    real_t vel = glass_ptr_->get_at(pos);
+                    if (b64reals)
+                    {
+                        particles_.set_vel64(i, idim, vel);
+                    }
+                    else
+                    {
+                        particles_.set_vel32(i, idim, vel);
+                    }
+                }
+            }
+        }
 
-} // end namespace particles
+        const particle::container& get_particles() const noexcept{
+            return particles_;
+        }
+
+    }; // struct lattice
+
+} // namespace particle
diff --git a/src/ic_generator.cc b/src/ic_generator.cc
index 8352007..f677551 100644
--- a/src/ic_generator.cc
+++ b/src/ic_generator.cc
@@ -452,6 +452,19 @@ int Run( config_file& the_config )
             // temporary storage of data
             Grid_FFT<real_t> tmp({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
 
+            std::unique_ptr<particle::lattice_generator<Grid_FFT<real_t>>> particle_lattice_generator_ptr;
+
+            // if output plugin wants particles, then we need to store them, along with their IDs
+            if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
+            {
+                // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits
+                size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ;
+
+                // allocate particle structure and generate particle IDs
+                particle_lattice_generator_ptr = 
+                std::make_unique<particle::lattice_generator<Grid_FFT<real_t>>>( lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config );
+            }
+
 
             //if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_eulerian ){
             if( the_output_plugin->write_species_as(this_species) == output_type::field_eulerian )
@@ -542,22 +555,21 @@ int Run( config_file& the_config )
                 //===================================================================================
                 // we store displacements and velocities here if we compute them
                 //===================================================================================
-                particle::container particles;
+                
 
                 bool shifted_lattice = (this_species == cosmo_species::baryon &&
                                         the_output_plugin->write_species_as(this_species) == output_type::particles) ? true : false;
 
-                // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits
-                size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ;
+                
 
                 grid_interpolate<1,Grid_FFT<real_t>> interp( tmp );
 
                 // if output plugin wants particles, then we need to store them, along with their IDs
-                if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
-                {
-                    // allocate particle structure and generate particle IDs
-                    particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config );
-                }
+                // if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
+                // {
+                //     // allocate particle structure and generate particle IDs
+                //     particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config );
+                // }
             
                 // write out positions
                 for( int idim=0; idim<3; ++idim ){
@@ -604,7 +616,7 @@ int Run( config_file& the_config )
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_positions( particles, lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp, the_config );
+                        particle_lattice_generator_ptr->set_positions( lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp, the_config );
                     } 
                     // otherwise write out the grid data directly to the output plugin
                     // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian )
@@ -672,7 +684,7 @@ int Run( config_file& the_config )
                     // if we write particle data, store particle data in particle structure
                     if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                     {
-                        particle::set_velocities( particles, lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp, the_config );
+                        particle_lattice_generator_ptr->set_velocities( lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp, the_config );
                     }
                     // otherwise write out the grid data directly to the output plugin
                     else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )
@@ -684,7 +696,7 @@ int Run( config_file& the_config )
 
                 if( the_output_plugin->write_species_as( this_species ) == output_type::particles )
                 {
-                    the_output_plugin->write_particle_data( particles, this_species, Omega[this_species] );
+                    the_output_plugin->write_particle_data( particle_lattice_generator_ptr->get_particles(), this_species, Omega[this_species] );
                 }
                 
                 if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian )

From 68aa31c59a61835eabf08f9a358c54f3ac8c126a Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 4 May 2020 01:13:07 +0200
Subject: [PATCH 126/130] fixed forgotten boundary update

---
 include/grid_interpolate.hh   | 68 ++++++++++++++++++-----------------
 include/particle_generator.hh | 15 ++++++--
 2 files changed, 48 insertions(+), 35 deletions(-)

diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh
index c0ec9aa..c37fafb 100644
--- a/include/grid_interpolate.hh
+++ b/include/grid_interpolate.hh
@@ -28,48 +28,52 @@ struct grid_interpolate
 
     if (is_distributed_trait)
     {
-#if defined(USE_MPI)
+      update_ghosts( g );
+    }
+  }
 
-      int local_0_start = int(gridref.local_0_start_);
-      local0starts_.assign(MPI::get_size(), 0);
+  void update_ghosts( const grid_t &g )
+  {
+  #if defined(USE_MPI)
 
-      MPI_Allgather(&local_0_start, 1, MPI_INT, &local0starts_[0], 1, MPI_INT, MPI_COMM_WORLD);
+    int local_0_start = int(gridref.local_0_start_);
+    local0starts_.assign(MPI::get_size(), 0);
 
-      //... exchange boundary
-      size_t nx = interpolation_order + 1;
-      size_t ny = g.n_[1];
-      size_t nz = g.n_[2];
+    MPI_Allgather(&local_0_start, 1, MPI_INT, &local0starts_[0], 1, MPI_INT, MPI_COMM_WORLD);
 
-      boundary_.assign(nx * ny * nz, data_t{0.0});
+    //... exchange boundary
+    size_t nx = interpolation_order + 1;
+    size_t ny = g.n_[1];
+    size_t nz = g.n_[2];
 
-      for (size_t i = 0; i < nx; ++i)
+    boundary_.assign(nx * ny * nz, data_t{0.0});
+
+    for (size_t i = 0; i < nx; ++i)
+    {
+      for (size_t j = 0; j < ny; ++j)
       {
-        for (size_t j = 0; j < ny; ++j)
+        for (size_t k = 0; k < nz; ++k)
         {
-          for (size_t k = 0; k < nz; ++k)
-          {
-            boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k);
-          }
+          boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k);
         }
       }
-
-      int sendto = (MPI::get_rank() + MPI::get_size() - 1) % MPI::get_size();
-      int recvfrom = (MPI::get_rank() + MPI::get_size() + 1) % MPI::get_size();
-
-      MPI_Status status;
-      status.MPI_ERROR = MPI_SUCCESS;
-
-      int err = MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype<data_t>(), sendto,
-                           MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status);
-
-      if( err != MPI_SUCCESS ){
-        char errstr[256]; int errlen=256;
-        MPI_Error_string(err, errstr, &errlen ); 
-        music::elog << "MPI_ERROR #" << err << " : " << errstr << std::endl;
-      }
-
-#endif
     }
+
+    int sendto = (MPI::get_rank() + MPI::get_size() - 1) % MPI::get_size();
+    int recvfrom = (MPI::get_rank() + MPI::get_size() + 1) % MPI::get_size();
+
+    MPI_Status status;
+    status.MPI_ERROR = MPI_SUCCESS;
+
+    int err = MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype<data_t>(), sendto,
+                          MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status);
+
+    if( err != MPI_SUCCESS ){
+      char errstr[256]; int errlen=256;
+      MPI_Error_string(err, errstr, &errlen ); 
+      music::elog << "MPI_ERROR #" << err << " : " << errstr << std::endl;
+    }
+#endif
   }
 
   data_t get_ngp_at(const std::array<real_t, 3> &pos, std::vector<data_t> &val) const noexcept
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index ec00903..1dec028 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -82,6 +82,8 @@ namespace particle
                 off_p = 0;
 #endif
 
+                music::ilog << "Glass file contains " << np_in_file << " particles." << std::endl;
+
                 glass_posr.assign(num_p, {0.0, 0.0, 0.0});
 
                 std::array<real_t, 3> ng({real_t(field.n_[0]), real_t(field.n_[1]), real_t(field.n_[2])});
@@ -112,6 +114,11 @@ namespace particle
 #endif
             }
 
+            void update_ghosts( const field_t &field )
+            {
+                interp_.update_ghosts( field );
+            }
+
             data_t get_at( const vec3& x ) const noexcept
             {
                 return interp_.get_cic_at( x );
@@ -229,10 +236,11 @@ namespace particle
             }
             else
             {
-                for (size_t i = 0; i < this->glass_ptr_->size(); ++i)
+                glass_ptr_->update_ghosts( field );
+                for (size_t i = 0; i < glass_ptr_->size(); ++i)
                 {
-                    auto pos = this->glass_ptr_->glass_posr[i];
-                    real_t disp = this->glass_ptr_->get_at(pos);
+                    auto pos = glass_ptr_->glass_posr[i];
+                    real_t disp = glass_ptr_->get_at(pos);
                     if (b64reals)
                     {
                         particles_.set_pos64(i, idim, pos[idim] / field.n_[idim] * lunit + disp);
@@ -286,6 +294,7 @@ namespace particle
             }
             else
             {
+                glass_ptr_->update_ghosts( field );
                 for (size_t i = 0; i < glass_ptr_->size(); ++i)
                 {
                     auto pos = glass_ptr_->glass_posr[i];

From 0a6f8f51cbaa14c28a88a616093fc7b14d4b3422 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 4 May 2020 02:09:03 +0200
Subject: [PATCH 127/130] fixed bug in grid shifting with MPI (affects bcc and
 fcc)

---
 include/grid_fft.hh | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index c11d1b3..8097b36 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -774,12 +774,7 @@ public:
     {
         FourierTransformForward();
         apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
-        real_t shift;
-        if( bdistributed ){
-            shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
-        }else{
-            shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
-        }
+            real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
             return x * std::exp(ccomplex_t(0.0, shift));
         });
         if( transform_back ){

From fc448a21a0c8c0d071542309e270c9f093217623 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 4 May 2020 02:09:42 +0200
Subject: [PATCH 128/130] some cleanup / compilation problem fixes

---
 include/grid_interpolate.hh   | 5 -----
 include/particle_generator.hh | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh
index c37fafb..5304fab 100644
--- a/include/grid_interpolate.hh
+++ b/include/grid_interpolate.hh
@@ -96,11 +96,6 @@ struct grid_interpolate
     size_t iz1 = (iz + 1) % nz_;
 
     data_t val{0.0};
-
-    if( get_task(pos) != MPI::get_rank() ){
-      std::cout << "task : " << MPI::get_rank() << " p@(" << pos[0] << ", " << pos[1] << ", " << pos[2] << ") belongs to task " << get_task(pos) << std::endl;
-      abort();
-    }
     
     if( is_distributed_trait ){
       ptrdiff_t localix = ix-gridref.local_0_start_;
diff --git a/include/particle_generator.hh b/include/particle_generator.hh
index 1dec028..5fe68d1 100644
--- a/include/particle_generator.hh
+++ b/include/particle_generator.hh
@@ -88,6 +88,7 @@ namespace particle
 
                 std::array<real_t, 3> ng({real_t(field.n_[0]), real_t(field.n_[1]), real_t(field.n_[2])});
 
+                #pragma omp parallel for
                 for (size_t i = 0; i < num_p; ++i)
                 {
                     size_t idxpart = off_p + i;
@@ -152,6 +153,7 @@ namespace particle
                 // allocate memory for all local particles
                 particles_.allocate(overload * num_p_in_load, b64reals, b64ids);
                 // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well
+
                 for (size_t i = 0, ipcount = 0; i < field.size(0); ++i)
                 {
                     for (size_t j = 0; j < field.size(1); ++j)
@@ -178,6 +180,7 @@ namespace particle
                 glass_ptr_ = std::make_unique<glass>( cf, field );
                 particles_.allocate(glass_ptr_->size(), b64reals, b64ids);
 
+                #pragma omp parallel for
                 for (size_t i = 0; i < glass_ptr_->size(); ++i)
                 {
                     if (b64ids)
@@ -237,6 +240,7 @@ namespace particle
             else
             {
                 glass_ptr_->update_ghosts( field );
+                #pragma omp parallel for
                 for (size_t i = 0; i < glass_ptr_->size(); ++i)
                 {
                     auto pos = glass_ptr_->glass_posr[i];
@@ -295,6 +299,7 @@ namespace particle
             else
             {
                 glass_ptr_->update_ghosts( field );
+                #pragma omp parallel for
                 for (size_t i = 0; i < glass_ptr_->size(); ++i)
                 {
                     auto pos = glass_ptr_->glass_posr[i];

From 658a8ad41853fe9422a25982a90e73a953ac5eba Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Mon, 4 May 2020 10:34:58 +0200
Subject: [PATCH 129/130] cleanup

---
 include/grid_fft.hh | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/include/grid_fft.hh b/include/grid_fft.hh
index 8097b36..e07e5ed 100644
--- a/include/grid_fft.hh
+++ b/include/grid_fft.hh
@@ -28,17 +28,8 @@ public:
     static constexpr bool is_distributed_trait{bdistributed};
 
 protected:
-#if defined(USE_MPI)
-    const MPI_Datatype MPI_data_t_type = 
-        (typeid(data_t) == typeid(float)) ? MPI_FLOAT
-        : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
-        : (typeid(data_t) == typeid(long double)) ? MPI_LONG_DOUBLE
-        : (typeid(data_t) == typeid(std::complex<float>)) ? MPI_C_FLOAT_COMPLEX
-        : (typeid(data_t) == typeid(std::complex<double>)) ? MPI_C_DOUBLE_COMPLEX 
-        : (typeid(data_t) == typeid(std::complex<long double>)) ? MPI_C_LONG_DOUBLE_COMPLEX 
-        : MPI_INT;
-#endif
     using grid_fft_t = Grid_FFT<data_t,bdistributed>;
+    
 public:
     std::array<size_t, 3> n_, nhalf_;
     std::array<size_t, 4> sizes_;

From 0937242a1b9cc17578b9d611c8ed5c916cdd6768 Mon Sep 17 00:00:00 2001
From: Oliver Hahn <oliverjhahn@gmail.com>
Date: Tue, 5 May 2020 18:21:04 +0200
Subject: [PATCH 130/130] class submodule update

---
 external/class | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/class b/external/class
index 055d8bc..6adecae 160000
--- a/external/class
+++ b/external/class
@@ -1 +1 @@
-Subproject commit 055d8bca371631da0c51ff167ce81905996b4ca2
+Subproject commit 6adecae2f30172a94e003155090791abf509d995