From 01b22b76a32aecdcabf43c3536573e7c87a2de78 Mon Sep 17 00:00:00 2001 From: Michael Michaux Date: Thu, 24 Oct 2019 14:44:06 +0200 Subject: [PATCH 001/130] Added theoretical convergence test. --- include/testing.hh | 2 + src/ic_generator.cc | 2 +- src/testing.cc | 104 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 99 insertions(+), 9 deletions(-) diff --git a/include/testing.hh b/include/testing.hh index 53bc571..2395db3 100644 --- a/include/testing.hh +++ b/include/testing.hh @@ -4,6 +4,7 @@ #include #include #include +#include namespace testing{ void output_potentials_and_densities( @@ -27,6 +28,7 @@ namespace testing{ void output_convergence( ConfigFile &the_config, + CosmologyCalculator* the_cosmo_calc, std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, diff --git a/src/ic_generator.cc b/src/ic_generator.cc index a915db9..bc2a956 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -335,7 +335,7 @@ int Run( ConfigFile& the_config ) } else if(testing == "velocity_displacement_symmetries") { testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); } else if(testing == "convergence") { - testing::output_convergence(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); + testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); } else { csoca::flog << "unknown test '" << testing << "'" << std::endl; std::abort(); diff --git a/src/testing.cc b/src/testing.cc index bfd088d..533855a 100644 --- a/src/testing.cc +++ b/src/testing.cc @@ -242,6 +242,7 @@ void output_velocity_displacement_symmetries( void output_convergence( ConfigFile &the_config, + CosmologyCalculator* the_cosmo_calc, std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, @@ -249,7 +250,6 @@ void output_convergence( Grid_FFT &phi3b, std::array *, 3> &A3) { - // scale all potentials to remove dplus0 phi /= dplus; phi2 /= dplus * dplus; @@ -259,6 +259,90 @@ void output_convergence( (*A3[1]) /= dplus * dplus * dplus; (*A3[2]) /= dplus * dplus * dplus; + ////////////////////// theoretical convergence radius ////////////////////// + + // compute phi_code + Grid_FFT phi_code({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + phi_code.FourierTransformForward(false); + #pragma omp parallel for collapse(3) + for (std::size_t i = 0; i < phi_code.size(0); ++i) { + for (std::size_t j = 0; j < phi_code.size(1); ++j) { + for (std::size_t k = 0; k < phi_code.size(2); ++k) { + std::size_t idx = phi_code.get_idx(i, j, k); + phi_code.kelem(idx) = -phi.kelem(idx); + } + } + } + + // initialize norm to 0 + Grid_FFT nabla_vini_norm({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + #pragma omp parallel for collapse(3) + for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { + for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) { + for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) { + std::size_t idx = nabla_vini_norm.get_idx(i, j, k); + nabla_vini_norm.relem(idx) = 0.0; + } + } + } + + Grid_FFT nabla_vini_mn({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + for(std::size_t m = 0; m < 3; m++) { + for(std::size_t n = m; n < 3; n++) { + nabla_vini_mn.FourierTransformForward(false); + #pragma omp parallel for collapse(3) + for (std::size_t i = 0; i < phi_code.size(0); ++i) { + for (std::size_t j = 0; j < phi_code.size(1); ++j) { + for (std::size_t k = 0; k < phi_code.size(2); ++k) { + std::size_t idx = phi_code.get_idx(i, j, k); + auto kk = phi_code.get_k(i, j, k); + nabla_vini_mn.kelem(idx) = phi_code.kelem(idx) * (kk[m] * kk[n]); + } + } + } + nabla_vini_mn.FourierTransformBackward(); + nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->CalcGrowthFactor(1.0)); + // sum of squares + #pragma omp parallel for collapse(3) + for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { + for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) { + for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) { + std::size_t idx = nabla_vini_norm.get_idx(i, j, k); + if(m != n) { + nabla_vini_norm.relem(idx) += (2.0 * nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx)); + } else { + nabla_vini_norm.relem(idx) += (nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx)); + } + } + } + } + } + } + // square root + #pragma omp parallel for collapse(3) + for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { + for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) { + for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) { + std::size_t idx = nabla_vini_norm.get_idx(i, j, k); + nabla_vini_norm.relem(idx) = std::sqrt(nabla_vini_norm.relem(idx)); + } + } + } + + // get t_eds + Grid_FFT t_eds({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + #pragma omp parallel for collapse(3) + for (std::size_t i = 0; i < t_eds.size(0); ++i) { + for (std::size_t j = 0; j < t_eds.size(1); ++j) { + for (std::size_t k = 0; k < t_eds.size(2); ++k) { + std::size_t idx = t_eds.get_idx(i, j, k); + t_eds.relem(idx) = 0.0204 / nabla_vini_norm.relem(idx); + } + } + } + + ////////////////////////// 3lpt convergence test /////////////////////////// + // initialize grids to 0 Grid_FFT psi_1({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT psi_2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); @@ -351,13 +435,17 @@ void output_convergence( } } - // write results - unlink("convergence_test.hdf5"); - inv_convergence_radius.Write_to_HDF5("convergence_test.hdf5", "inv_convergence_radius"); - psi_1.Write_to_HDF5("convergence_test.hdf5", "psi_1_norm"); - psi_2.Write_to_HDF5("convergence_test.hdf5", "psi_2_norm"); - psi_3.Write_to_HDF5("convergence_test.hdf5", "psi_3_norm"); - + ////////////////////////////// write results /////////////////////////////// + std::string convergence_test_filename("convergence_test.hdf5"); + unlink(convergence_test_filename.c_str()); +#if defined(USE_MPI) + MPI_Barrier(MPI_COMM_WORLD); +#endif + t_eds.Write_to_HDF5(convergence_test_filename, "t_eds"); + inv_convergence_radius.Write_to_HDF5(convergence_test_filename, "inv_convergence_radius"); + // psi_1.Write_to_HDF5(convergence_test_filename, "psi_1_norm"); + // psi_2.Write_to_HDF5(convergence_test_filename, "psi_2_norm"); + // psi_3.Write_to_HDF5(convergence_test_filename, "psi_3_norm"); } } // namespace testing From 4de579ca7831f44897ba54406f97391cb45da2c0 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 24 Oct 2019 17:12:43 +0200 Subject: [PATCH 002/130] revert back to C++14 by replacing inline lambdas with template funcs --- CMakeLists.txt | 2 +- include/operators.hh | 27 ++++++++++++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 875fc91..fcc57e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,7 +84,7 @@ file( GLOB PLUGINS add_executable(${PRGNAME} ${SOURCES} ${PLUGINS}) target_setup_class(${PRGNAME}) -set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 17) +set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14) # mpi flags if(MPI_CXX_FOUND) diff --git a/include/operators.hh b/include/operators.hh index cc0ed67..c78526e 100644 --- a/include/operators.hh +++ b/include/operators.hh @@ -1,9 +1,26 @@ #pragma once namespace op{ -inline auto assign_to = [](auto &g){return [&](auto i, auto v){ g[i] = v; };}; -inline auto add_to = [](auto &g){return [&](auto i, auto v){ g[i] += v; };}; -inline auto add_twice_to = [](auto &g){return [&](auto i, auto v){ g[i] += 2*v; };}; -inline auto subtract_from = [](auto &g){return [&](auto i, auto v){ g[i] -= v; };}; -inline auto subtract_twice_from = [](auto &g){return [&](auto i, auto v){ g[i] -= 2*v; };}; + +template< typename grid> +inline auto assign_to( grid& g ){return [&](auto i, auto v){ g[i] = v; };} + +template< typename grid> +inline auto add_to( grid& g ){return [&](auto i, auto v){ g[i] += v; };} + +template< typename grid> +inline auto add_twice_to( grid& g ){return [&](auto i, auto v){ g[i] += 2*v; };} + +template< typename grid> +inline auto subtract_from( grid& g ){return [&](auto i, auto v){ g[i] -= v; };} + +template< typename grid> +inline auto subtract_twice_from( grid& g ){return [&](auto i, auto v){ g[i] -= 2*v; };} + +// above template functions can be written as C++17 inline lambdas... but we're using C++14... +// inline auto assign_to = [](auto &g){return [&](auto i, auto v){ g[i] = v; };}; +// inline auto add_to = [](auto &g){return [&](auto i, auto v){ g[i] += v; };}; +// inline auto add_twice_to = [](auto &g){return [&](auto i, auto v){ g[i] += 2*v; };}; +// inline auto subtract_from = [](auto &g){return [&](auto i, auto v){ g[i] -= v; };}; +// inline auto subtract_twice_from = [](auto &g){return [&](auto i, auto v){ g[i] -= 2*v; };}; } From 80da0a4ff2558644b3b83971bce4fce55268de68 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 24 Oct 2019 17:26:44 +0200 Subject: [PATCH 003/130] removed openmp loop collapse, problems with intel c++ --- src/testing.cc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/testing.cc b/src/testing.cc index 533855a..e99fbb4 100644 --- a/src/testing.cc +++ b/src/testing.cc @@ -264,7 +264,7 @@ void output_convergence( // compute phi_code Grid_FFT phi_code({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); phi_code.FourierTransformForward(false); - #pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < phi_code.size(0); ++i) { for (std::size_t j = 0; j < phi_code.size(1); ++j) { for (std::size_t k = 0; k < phi_code.size(2); ++k) { @@ -276,7 +276,7 @@ void output_convergence( // initialize norm to 0 Grid_FFT nabla_vini_norm({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - #pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) { for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) { @@ -290,7 +290,7 @@ void output_convergence( for(std::size_t m = 0; m < 3; m++) { for(std::size_t n = m; n < 3; n++) { nabla_vini_mn.FourierTransformForward(false); - #pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < phi_code.size(0); ++i) { for (std::size_t j = 0; j < phi_code.size(1); ++j) { for (std::size_t k = 0; k < phi_code.size(2); ++k) { @@ -303,7 +303,7 @@ void output_convergence( nabla_vini_mn.FourierTransformBackward(); nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->CalcGrowthFactor(1.0)); // sum of squares - #pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) { for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) { @@ -319,7 +319,7 @@ void output_convergence( } } // square root - #pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) { for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) { @@ -331,7 +331,7 @@ void output_convergence( // get t_eds Grid_FFT t_eds({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - #pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < t_eds.size(0); ++i) { for (std::size_t j = 0; j < t_eds.size(1); ++j) { for (std::size_t k = 0; k < t_eds.size(2); ++k) { @@ -347,7 +347,7 @@ void output_convergence( Grid_FFT psi_1({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT psi_2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT psi_3({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); -#pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < psi_1.size(0); ++i) { for (std::size_t j = 0; j < psi_1.size(1); ++j) { for (std::size_t k = 0; k < psi_1.size(2); ++k) { @@ -374,7 +374,7 @@ void output_convergence( psi_2_tmp.FourierTransformForward(false); psi_3_tmp.FourierTransformForward(false); -#pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < phi.size(0); ++i) { for (std::size_t j = 0; j < phi.size(1); ++j) { for (std::size_t k = 0; k < phi.size(2); ++k) { @@ -395,7 +395,7 @@ void output_convergence( psi_3_tmp.FourierTransformBackward(); // sum of squares -#pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < psi_1.size(0); ++i) { for (std::size_t j = 0; j < psi_1.size(1); ++j) { for (std::size_t k = 0; k < psi_1.size(2); ++k) { @@ -409,7 +409,7 @@ void output_convergence( } // loop on dimensions // apply square root for the L2 norm -#pragma omp parallel for collapse(3) +#pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < psi_1.size(0); ++i) { for (std::size_t j = 0; j < psi_1.size(1); ++j) { for (std::size_t k = 0; k < psi_1.size(2); ++k) { @@ -423,7 +423,7 @@ void output_convergence( // convergence radius Grid_FFT inv_convergence_radius({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); -#pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < psi_1.size(0); ++i) { for (std::size_t j = 0; j < psi_1.size(1); ++j) { for (std::size_t k = 0; k < psi_1.size(2); ++k) { From fa1abffd0d352c68f978799998646ba5d7f2e995 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 27 Oct 2019 19:49:54 +0100 Subject: [PATCH 004/130] minor cleanup --- include/operators.hh | 29 +++++++++++++---------------- src/ic_generator.cc | 2 +- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/include/operators.hh b/include/operators.hh index c78526e..63d94f4 100644 --- a/include/operators.hh +++ b/include/operators.hh @@ -2,25 +2,22 @@ namespace op{ -template< typename grid> -inline auto assign_to( grid& g ){return [&](auto i, auto v){ g[i] = v; };} +template< typename field> +inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };} -template< typename grid> -inline auto add_to( grid& g ){return [&](auto i, auto v){ g[i] += v; };} +template< typename field, typename val > +inline auto multiply_add_to( field& g, val x ){return [&g,x](auto i, auto v){ g[i] += v*x; };} -template< typename grid> -inline auto add_twice_to( grid& g ){return [&](auto i, auto v){ g[i] += 2*v; };} +template< typename field> +inline auto add_to( field& g ){return [&g](auto i, auto v){ g[i] += v; };} -template< typename grid> -inline auto subtract_from( grid& g ){return [&](auto i, auto v){ g[i] -= v; };} +template< typename field> +inline auto add_twice_to( field& g ){return [&g](auto i, auto v){ g[i] += 2*v; };} -template< typename grid> -inline auto subtract_twice_from( grid& g ){return [&](auto i, auto v){ g[i] -= 2*v; };} +template< typename field> +inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };} + +template< typename field> +inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -= 2*v; };} -// above template functions can be written as C++17 inline lambdas... but we're using C++14... -// inline auto assign_to = [](auto &g){return [&](auto i, auto v){ g[i] = v; };}; -// inline auto add_to = [](auto &g){return [&](auto i, auto v){ g[i] += v; };}; -// inline auto add_twice_to = [](auto &g){return [&](auto i, auto v){ g[i] += 2*v; };}; -// inline auto subtract_from = [](auto &g){return [&](auto i, auto v){ g[i] -= v; };}; -// inline auto subtract_twice_from = [](auto &g){return [&](auto i, auto v){ g[i] -= 2*v; };}; } diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 2446b4f..d4d160c 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -340,7 +340,7 @@ int Run( ConfigFile& the_config ) } else { csoca::flog << "unknown test '" << testing << "'" << std::endl; std::abort(); - } + } } else { // temporary storage of data Grid_FFT tmp({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); From 9a3ebc2bec85be141cfefb26ceaa0223afc2b5b5 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 1 Nov 2019 04:45:34 +0100 Subject: [PATCH 005/130] updates to vec3 class, mostly arithmetics --- include/vec3.hh | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/include/vec3.hh b/include/vec3.hh index 9295722..8efb1b2 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -3,24 +3,45 @@ template< typename T > class vec3{ private: + //! holds the data std::array data_; + +public: + //! expose access to elements via references T &x,&y,&z; -public: + + //! empty constructor vec3() : x(data_[0]),y(data_[1]),z(data_[2]){} + //! copy constructor vec3( const vec3 &v) : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){} - - vec3( std::array&& d ) - : data_(std::move(d)), x(data_[0]),y(data_[1]),z(data_[2]){} + //! move constructor vec3( vec3 &&v) - : data_(std::move(v.data_)), x(data_[0]),y(data_[1]),z(data_[2]){} + : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){} + //! construct from initialiser list + template + vec3(E&&...e) + : data_{{std::forward(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){} + T &operator[](size_t i){ return data_[i];} const T &operator[](size_t i) const { return data_[i]; } + + vec3 operator+( const vec3& v ) const{ return vec3({x+v.x,y+v.y,z+v.z}); } + + vec3 operator-( const vec3& v ) const{ return vec3({x-v.x,y-v.y,z-v.z}); } + + vec3 operator*( T s ) const{ return vec3({x*s,y*s,z*s}); } + + vec3& operator+=( const vec3& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; } + + vec3& operator-=( const vec3& v ) const{ x-=v.x; y-=v.y; z-=v.z; return *this; } + + vec3& operator*=( T s ) const{ x*=s; y*=s; z*=s; return *this; } T dot(const vec3 &a) const { From 248c460b716bb204edf7ca2a4a72e2e758fd1f52 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 1 Nov 2019 04:47:02 +0100 Subject: [PATCH 006/130] rewrote particle load generator, added new load (refined sc lattice) --- include/grid_fft.hh | 27 ++------------ include/particle_generator.hh | 69 +++++++++++++++++++++++++++++++++-- src/grid_fft.cc | 1 - src/ic_generator.cc | 7 +++- 4 files changed, 74 insertions(+), 30 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index dcb3cb4..b66be1b 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -177,36 +177,17 @@ public: } template - vec3 get_unit_r_staggered(const size_t i, const size_t j, const size_t k) const + vec3 get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3 s) const { vec3 rr; - rr[0] = (real_t(i + local_0_start_) + 0.5) / real_t(n_[0]); - rr[1] = (real_t(j) + 0.5) / real_t(n_[1]); - rr[2] = (real_t(k) + 0.5) / real_t(n_[2]); + rr[0] = (real_t(i + local_0_start_) + s.x) / real_t(n_[0]); + rr[1] = (real_t(j) + s.y) / real_t(n_[1]); + rr[2] = (real_t(k) + s.z) / real_t(n_[2]); return rr; } - template - vec3 get_unit_r_shifted(const size_t i, const size_t j, const size_t k, double sx, double sy, double sz) const - { - vec3 rr; - - rr[0] = (real_t(i + local_0_start_) + sx) / real_t(n_[0]); - rr[1] = (real_t(j) + sy) / real_t(n_[1]); - rr[2] = (real_t(k) + sz) / real_t(n_[2]); - - return rr; - } - - void cell_pos(int ilevel, size_t i, size_t j, size_t k, double *x) const - { - x[0] = double(i + local_0_start_) / size(0); - x[1] = double(j) / size(1); - x[2] = double(k) / size(2); - } - vec3 get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const { return vec3({i + local_0_start_, j, k}); diff --git a/include/particle_generator.hh b/include/particle_generator.hh index da88813..1aa96b8 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -1,15 +1,29 @@ #pragma once +#include + namespace particle { enum lattice{ - lattice_sc=0, lattice_bcc=1, lattice_fcc=2 + lattice_sc = 0, // SC : simple cubic + lattice_bcc = 1, // BCC: body-centered cubic + lattice_fcc = 2, // FCC: face-centered cubic + lattice_rsc = 3, // RSC: refined simple cubic +}; + +const std::vector< std::vector> > lattice_shifts = +{ + // first shift must always be zero! (otherwise set_positions and set_velocities break) + /* SC : */ {{0.0,0.0,0.0}}, + /* BCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.5}}, + /* FCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.0},{0.5,0.0,0.5},{0.0,0.5,0.5}}, + /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}}, }; template void initialize_lattice( container& particles, lattice lattice_type, const field_t& field ){ const size_t num_p_in_load = field.local_size(); - const size_t overload = 1< -void set_positions( container& particles, lattice lattice_type, int idim, real_t lunit, field_t& field ) +void set_positions( container& particles, const lattice lattice_type, int idim, real_t lunit, field_t& field ) +{ + const size_t num_p_in_load = field.local_size(); + for( int ishift=0; ishift<(1<0 ){ + vec3 shift = lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1]; + field.shift_field( shift.x, shift.y, shift.z ); + } + auto ipcount0 = ishift * num_p_in_load; + for( size_t i=0,ipcount=ipcount0; i(i,j,k,lattice_shifts[lattice_type][ishift]); + particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); + } + } + } + } +} + +template +void set_velocities( container& particles, lattice lattice_type, int idim, field_t& field ) +{ + const size_t num_p_in_load = field.local_size(); + for( int ishift=0; ishift<(1<0 ){ + vec3 shift = lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1]; + field.shift_field( shift.x, shift.y, shift.z ); + } + auto ipcount0 = ishift * num_p_in_load; + for( size_t i=0,ipcount=ipcount0; i +void set_positions_old( container& particles, lattice lattice_type, int idim, real_t lunit, field_t& field ) { const size_t num_p_in_load = field.local_size(); @@ -89,7 +150,7 @@ void set_positions( container& particles, lattice lattice_type, int idim, real_t } template -void set_velocities( container& particles, lattice lattice_type, int idim, field_t& field ) +void set_velocities_old( container& particles, lattice lattice_type, int idim, field_t& field ) { const size_t num_p_in_load = field.local_size(); diff --git a/src/grid_fft.cc b/src/grid_fft.cc index d5f103a..2881010 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -550,7 +550,6 @@ void Grid_FFT::Write_PowerSpectrum(std::string ofname) { std::vector bin_k, bin_P, bin_eP; std::vector bin_count; - int nbins = 4 * std::max(nhalf_[0], std::max(nhalf_[1], nhalf_[2])); this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count ); #if defined(USE_MPI) if (CONFIG::MPI_task_rank == 0) diff --git a/src/ic_generator.cc b/src/ic_generator.cc index d4d160c..d3a7ece 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -57,8 +57,11 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------------------------------------------- //! initialice particles on a bcc or fcc lattice instead of a standard sc lattice (doubles and quadruples the number of particles) std::string lattice_str = the_config.GetValueSafe("setup","ParticleLoad","sc"); - const particle::lattice lattice_type = (lattice_str=="bcc")? particle::lattice_bcc - : ((lattice_str=="fcc")? particle::lattice_fcc : particle::lattice_sc); + const particle::lattice lattice_type = + ((lattice_str=="bcc")? particle::lattice_bcc + : ((lattice_str=="fcc")? particle::lattice_fcc + : ((lattice_str=="rsc")? particle::lattice_rsc + : particle::lattice_sc))); //-------------------------------------------------------------------------------------------------------- //! apply fixing of the complex mode amplitude following Angulo & Pontzen (2016) [https://arxiv.org/abs/1603.05253] From c593a7067d72d5b31d7b020c8d465644badeb415 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 1 Nov 2019 04:49:40 +0100 Subject: [PATCH 007/130] cleanup particle generator --- include/particle_generator.hh | 127 +--------------------------------- 1 file changed, 1 insertion(+), 126 deletions(-) diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 1aa96b8..7c69e7d 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -16,7 +16,7 @@ const std::vector< std::vector> > lattice_shifts = // first shift must always be zero! (otherwise set_positions and set_velocities break) /* SC : */ {{0.0,0.0,0.0}}, /* BCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.5}}, - /* FCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.0},{0.5,0.0,0.5},{0.0,0.5,0.5}}, + /* FCC: */ {{0.0,0.0,0.0},{0.0,0.5,0.5},{0.5,0.0,0.5},{0.5,0.5,0.0}}, /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}}, }; @@ -83,129 +83,4 @@ void set_velocities( container& particles, lattice lattice_type, int idim, field } -///// deprecated code below //////////////////////////////////////////////////// - -// invalidates field, phase shifted to unspecified position after return -template -void set_positions_old( container& particles, lattice lattice_type, int idim, real_t lunit, field_t& field ) -{ - const size_t num_p_in_load = field.local_size(); - - for( size_t i=0,ipcount=0; i(i,j,k); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } - - if( lattice_type == particle::lattice_bcc ){ - field.shift_field( 0.5, 0.5, 0.5 ); - auto ipcount0 = num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,0.5,0.5,0.5); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } - } - else if( lattice_type == particle::lattice_fcc ){ - // 0.5 0.5 0.0 - field.shift_field( 0.5, 0.5, 0.0 ); - auto ipcount0 = num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,0.5,0.5,0.0); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } - // 0.0 0.5 0.5 - field.shift_field( -0.5, 0.0, 0.5 ); - ipcount0 = 2*num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,0.0,0.5,0.5); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } - // 0.5 0.0 0.5 - field.shift_field( 0.5, -0.5, 0.0 ); - ipcount0 = 3*num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,0.5,0.0,0.5); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } - } -} - -template -void set_velocities_old( container& particles, lattice lattice_type, int idim, field_t& field ) -{ - const size_t num_p_in_load = field.local_size(); - - for( size_t i=0,ipcount=0; i Date: Fri, 1 Nov 2019 04:58:08 +0100 Subject: [PATCH 008/130] removed saved findfftw3 cmake --- new/FindFFTW3.cmake | 232 -------------------------------------------- 1 file changed, 232 deletions(-) delete mode 100644 new/FindFFTW3.cmake diff --git a/new/FindFFTW3.cmake b/new/FindFFTW3.cmake deleted file mode 100644 index 80aa67b..0000000 --- a/new/FindFFTW3.cmake +++ /dev/null @@ -1,232 +0,0 @@ -# - Try to find FFTW -# -# By default, it will look only for the serial libraries with single, double, -# and long double precision. Any combination of precision (SINGLE, DOUBLE, -# LONGDOUBLE) and library type (SERIAL, [THREADS|OPENMP], MPI) is possible by -# using the COMPONENTS keyword. For example, -# -# find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP MPI) -# -# Once done this will define -# FFTW3_FOUND - System has FFTW3 -# FFTW3_INCLUDE_DIRS - The FFTW3 include directories -# FFTW3_LIBRARIES - The libraries needed to use FFTW3 -# FFTW3_DEFINITIONS - Compiler switches required for using FFTW3 -# FFTW3_$KIND_$PARALLEL_FOUND- Set if FFTW3 exists in KIND precision format for PARALLEL mode. -# where KIND can be: SINGLE, DOUBLE, LONGDOUBLE -# and PARALLEL: SERIAL, OPENMP, MPI, THREADS. -# FFTW3_$KIND_$PARALLEL_LIBRARY - The libraries needed to use. -# FFTW3_INCLUDE_DIR_PARALLEL - The FFTW3 include directories for parallels mode. - -cmake_policy(SET CMP0054 NEW) - -if(FFTW3_FOUND) - return() -endif() - -if(FFTW3_INCLUDE_DIR AND FFTW3_LIBRARIES) - set(FFTW3_FOUND TRUE) - foreach(component ${FFTW3_FIND_COMPONENTS}) - if("${FFTW3_${component}_LIBRARY}" STREQUAL "") - set(FFTW3_${component}_LIBRARY "${FFTW3_LIBRARIES}") - endif() - endforeach() - return() -endif() - -macro(find_specific_libraries KIND PARALLEL) - list(APPEND FFTW3_FIND_COMPONENTS ${KIND}_${PARALLEL}) - if(NOT (${PARALLEL} STREQUAL "SERIAL") AND NOT ${PARALLEL}_FOUND) - message(FATAL_ERROR "Please, find ${PARALLEL} libraries before FFTW") - endif() - - find_library(FFTW3_${KIND}_${PARALLEL}_LIBRARY NAMES - fftw3${SUFFIX_${KIND}}${SUFFIX_${PARALLEL}}${SUFFIX_FINAL} HINTS ${HINT_DIRS}) - if(FFTW3_${KIND}_${PARALLEL}_LIBRARY MATCHES fftw3) - list(APPEND FFTW3_LIBRARIES ${FFTW3_${KIND}_${PARALLEL}_LIBRARY}) - set(FFTW3_${KIND}_${PARALLEL}_FOUND TRUE) - - STRING(TOLOWER "${KIND}" kind) - STRING(TOLOWER "${PARALLEL}" parallel) - if(FFTW3_${kind}_${parallel}_LIBRARY MATCHES "\\.a$") - add_library(fftw3::${kind}::${parallel} STATIC IMPORTED GLOBAL) - else() - add_library(fftw3::${kind}::${parallel} SHARED IMPORTED GLOBAL) - endif() - - # MPI Has a different included library than the others - # FFTW3_INCLUDE_DIR_PARALLEL will change depending of which on is used. - set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_INCLUDE_DIR} ) - if(PARALLEL STREQUAL "MPI") - set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_${PARALLEL}_INCLUDE_DIR}) - endif() - - set_target_properties(fftw3::${kind}::${parallel} PROPERTIES - IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}") - - # adding target properties to the different cases - ## MPI - if(PARALLEL STREQUAL "MPI") - if(MPI_C_LIBRARIES) - set_target_properties(fftw3::${kind}::mpi PROPERTIES - IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}" - IMPORTED_LINK_INTERFACE_LIBRARIES ${MPI_C_LIBRARIES}) - endif() - endif() - ## OpenMP - if(PARALLEL STREQUAL "OPENMP") - if(OPENMP_C_FLAGS) - set_target_properties(fftw3::${kind}::${parallel} PROPERTIES - IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}" - INTERFACE_COMPILE_OPTIONS "${OPENMP_C_FLAGS}") - endif() - endif() - ## THREADS - if(PARALLEL STREQUAL "THREADS") - if(CMAKE_THREAD_LIBS_INIT) # TODO: this is not running - set_target_properties(fftw3::${kind}::${parallel} PROPERTIES - IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}" - INTERFACE_COMPILE_OPTIONS "${CMAKE_THREAD_LIBS_INIT}") - endif() - endif() - endif() -endmacro() - - - - -if(NOT FFTW3_FIND_COMPONENTS) - set(FFTW3_FIND_COMPONENTS SINGLE DOUBLE LONGDOUBLE SERIAL) -endif() - -string(TOUPPER "${FFTW3_FIND_COMPONENTS}" FFTW3_FIND_COMPONENTS) - -list(FIND FFTW3_FIND_COMPONENTS SINGLE LOOK_FOR_SINGLE) -list(FIND FFTW3_FIND_COMPONENTS DOUBLE LOOK_FOR_DOUBLE) -list(FIND FFTW3_FIND_COMPONENTS LONGDOUBLE LOOK_FOR_LONGDOUBLE) -list(FIND FFTW3_FIND_COMPONENTS THREADS LOOK_FOR_THREADS) -list(FIND FFTW3_FIND_COMPONENTS OPENMP LOOK_FOR_OPENMP) -list(FIND FFTW3_FIND_COMPONENTS MPI LOOK_FOR_MPI) -list(FIND FFTW3_FIND_COMPONENTS SERIAL LOOK_FOR_SERIAL) - -# FIXME - This may fail in computers wihtout serial -# Default serial to obtain version number -set(LOOK_FOR_SERIAL 1) - -# set serial as default if none parallel component has been set -if((LOOK_FOR_THREADS LESS 0) AND (LOOK_FOR_MPI LESS 0) AND - (LOOK_FOR_OPENMP LESS 0)) - set(LOOK_FOR_SERIAL 1) -endif() - -if(MPI_C_FOUND) - set(MPI_FOUND ${MPI_C_FOUND}) -endif() -unset(FFTW3_FIND_COMPONENTS) - - - - -if(WIN32) - set(HINT_DIRS ${FFTW3_DIRECTORY} $ENV{FFTW3_DIRECTORY}) -else() - find_package(PkgConfig) - if(PKG_CONFIG_FOUND) - pkg_check_modules(PC_FFTW QUIET fftw3) - set(FFTW3_DEFINITIONS ${PC_FFTW3_CFLAGS_OTHER}) - endif() - set(HINT_DIRS ${PC_FFTW3_INCLUDEDIR} ${PC_FFTW3_INCLUDE_DIRS} - ${FFTW3_INCLUDE_DIR} $ENV{FFTW3_INCLUDE_DIR} ) -endif() - -find_path(FFTW3_INCLUDE_DIR NAMES fftw3.h HINTS ${HINT_DIRS}) -if (LOOK_FOR_MPI) # Probably is going to be the same as fftw3.h - find_path(FFTW3_MPI_INCLUDE_DIR NAMES fftw3-mpi.h HINTS ${HINT_DIRS}) -endif() - -function(find_version OUTVAR LIBRARY SUFFIX) - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c - # TODO: do we need to add include for mpi headers? - "#include - #include - int main(int nargs, char const *argv[]) { - printf(\"%s\", fftw${SUFFIX}_version); - return 0; - }" - ) -if(NOT CMAKE_CROSSCOMPILING) - try_run(RUN_RESULT COMPILE_RESULT - "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/" - "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c" - CMAKE_FLAGS - -DLINK_LIBRARIES=${LIBRARY} - -DINCLUDE_DIRECTORIES=${FFTW3_INCLUDE_DIR} - RUN_OUTPUT_VARIABLE OUTPUT - COMPILE_OUTPUT_VARIABLE COUTPUT - ) - endif() - if(RUN_RESULT EQUAL 0) - string(REGEX REPLACE - ".*([0-9]+\\.[0-9]+\\.[0-9]+).*" - "\\1" VERSION_STRING "${OUTPUT}" - ) - set(${OUTVAR} ${VERSION_STRING} PARENT_SCOPE) - endif() -endfunction() - -set(SUFFIX_DOUBLE "") -set(SUFFIX_SINGLE "f") -set(SUFFIX_LONGDOUBLE "l") -set(SUFFIX_SERIAL "") -set(SUFFIX_OPENMP "_omp") -set(SUFFIX_MPI "_mpi") -set(SUFFIX_THREADS "_threads") -set(SUFFIX_FINAL "") - -if(WIN32) - set(SUFFIX_FINAL "-3") -else() - set(HINT_DIRS ${PC_FFTW3_LIBDIR} ${PC_FFTW3_LIBRARY_DIRS} - $ENV{FFTW3_LIBRARY_DIR} ${FFTW3_LIBRARY_DIR} ) -endif(WIN32) - -unset(FFTW3_LIBRARIES) -set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR} ) # TODO what's for? -set(FFTW3_FLAGS_C "") -foreach(KIND SINGLE DOUBLE LONGDOUBLE) - if(LOOK_FOR_${KIND} LESS 0) - continue() - endif() - foreach(PARALLEL SERIAL MPI OPENMP THREADS) - if(LOOK_FOR_${PARALLEL} LESS 0) - continue() - endif() - find_specific_libraries(${KIND} ${PARALLEL}) - endforeach() -endforeach() - -if(FFTW3_INCLUDE_DIR) - list(GET FFTW3_FIND_COMPONENTS 0 smallerrun) - string(REPLACE "_" ";" RUNLIST ${smallerrun}) - list(GET RUNLIST 0 KIND) - list(GET RUNLIST 1 PARALLEL) - unset(smallerrun) - unset(RUNLIST) - # suffix is quoted so it pass empty in the case of double as it's empty - find_version(FFTW3_VERSION_STRING ${FFTW3_${KIND}_${PARALLEL}_LIBRARY} - "${SUFFIX_${KIND}}") -endif() - -# FIXME: fails if use REQUIRED. -include(FindPackageHandleStandardArgs) -# handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE -# if all listed variables are TRUE -find_package_handle_standard_args(FFTW3 - REQUIRED_VARS FFTW3_LIBRARIES FFTW3_INCLUDE_DIR - VERSION_VAR FFTW3_VERSION_STRING - HANDLE_COMPONENTS -) From 1ebc5f2ff777002fbe3708abbc668163a8b789a8 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 1 Nov 2019 10:46:31 +0100 Subject: [PATCH 009/130] removed deprecated member function --- include/grid_fft.hh | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index b66be1b..4c8464c 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -636,16 +636,6 @@ public: void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3); - // void stagger_field(void) - // { - // FourierTransformForward(); - // apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { - // real_t shift = k[0] * get_dx()[0] + k[1] * get_dx()[1] + k[2] * get_dx()[2]; - // return x * std::exp(ccomplex_t(0.0, 0.5 * shift)); - // }); - // FourierTransformBackward(); - // } - void shift_field( double sx, double sy, double sz ) { FourierTransformForward(); From eb2743c61b5ac880d5c8fa146d7a53e67e6c92c9 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 1 Nov 2019 12:01:39 +0100 Subject: [PATCH 010/130] removal of more deprecated member functions --- include/grid_fft.hh | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 4c8464c..4848f0e 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -636,25 +636,20 @@ public: void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3); - void shift_field( double sx, double sy, double sz ) + void shift_field( const vec3& s ) { FourierTransformForward(); apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { #ifdef WITH_MPI - real_t shift = sy * k[0] * get_dx()[0] + sx * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2]; + real_t shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; #else - real_t shift = sx * k[0] * get_dx()[0] + sy * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2]; + real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; #endif return x * std::exp(ccomplex_t(0.0, shift)); }); FourierTransformBackward(); } - void stagger_field(void) - { - this->shift_field( 0.5, 0.5, 0.5 ); - } - void zero_DC_mode(void) { if (space_ == kspace_id) From d80bf34c105fda150ccf7b776223bb523a453d1e Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 1 Nov 2019 12:02:09 +0100 Subject: [PATCH 011/130] added file headers --- include/system_stat.hh | 7 +++++++ include/testing.hh | 7 +++++++ include/vec3.hh | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/include/system_stat.hh b/include/system_stat.hh index f911a42..fb7f6f3 100644 --- a/include/system_stat.hh +++ b/include/system_stat.hh @@ -1,3 +1,10 @@ +/*******************************************************************\ + system_stat.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 08/2019 - Oliver Hahn - first implementation +\*******************************************************************/ #pragma once #include diff --git a/include/testing.hh b/include/testing.hh index 2395db3..e5d2a99 100644 --- a/include/testing.hh +++ b/include/testing.hh @@ -1,3 +1,10 @@ +/*******************************************************************\ + testing.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 10/2019 - Michael Michaux & Oliver Hahn - first implementation +\*******************************************************************/ #pragma once #include diff --git a/include/vec3.hh b/include/vec3.hh index 8efb1b2..ea7a2f2 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -1,3 +1,10 @@ +/*******************************************************************\ + vec3.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************/ #pragma once template< typename T > From 68080d2545a81ba9991fd7ec9c35b7a5bfb8e4eb Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 1 Nov 2019 12:03:02 +0100 Subject: [PATCH 012/130] added possibility to write 32 or 64bit positions and ids --- include/output_plugin.hh | 7 +++ include/particle_container.hh | 114 ++++++++++++++++++++++++---------- include/particle_generator.hh | 55 +++++++++++----- src/ic_generator.cc | 6 +- src/plugins/output_gadget2.cc | 66 +++++++++++++++----- src/plugins/output_generic.cc | 4 ++ src/plugins/output_grafic2.cc | 4 ++ 7 files changed, 185 insertions(+), 71 deletions(-) diff --git a/include/output_plugin.hh b/include/output_plugin.hh index cc092d3..b7c23e1 100644 --- a/include/output_plugin.hh +++ b/include/output_plugin.hh @@ -21,6 +21,7 @@ enum class output_type {particles,field_lagrangian,field_eulerian}; + class output_plugin { protected: @@ -57,6 +58,12 @@ public: //! routine to query whether species is written as particle data // virtual bool write_species_as_particles( const cosmo_species &s ){ return !write_species_as_grid(s); } + + //! query if output wants 64bit precision for real values + virtual bool has_64bit_reals() const = 0; + + //! query if output wants 64bit precision for integer values + virtual bool has_64bit_ids() const = 0; //! routine to return a multiplicative factor that contains the desired position units for the output virtual real_t position_unit() const = 0; diff --git a/include/particle_container.hh b/include/particle_container.hh index fb05889..92b683c 100644 --- a/include/particle_container.hh +++ b/include/particle_container.hh @@ -1,3 +1,10 @@ +/*******************************************************************\ + particle_container.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 10/2019 - Oliver Hahn - first implementation +\*******************************************************************/ #pragma once #ifdef USE_MPI @@ -13,57 +20,96 @@ namespace particle{ class container { public: - std::vector positions_, velocities_; - std::vector ids_; + std::vector positions32_, velocities32_; + std::vector positions64_, velocities64_; + + std::vector ids32_; + std::vector ids64_; + - container() - { - } + container(){ } container(const container &) = delete; - const void* get_pos_ptr() const{ - return reinterpret_cast( &positions_[0] ); - } - - const void* get_vel_ptr() const{ - return reinterpret_cast( &velocities_[0] ); - } - - const void* get_ids_ptr() const{ - return reinterpret_cast( &ids_[0] ); - } - - void allocate(size_t nump) + void allocate(size_t nump, bool b64reals, bool b64ids) { - positions_.resize(3 * nump); - velocities_.resize(3 * nump); - ids_.resize(nump); + if( b64reals ){ + positions64_.resize(3 * nump); + velocities64_.resize(3 * nump); + positions32_.clear(); + velocities32_.clear(); + }else{ + positions32_.resize(3 * nump); + velocities32_.resize(3 * nump); + positions64_.clear(); + velocities64_.clear(); + } + + if( b64ids ){ + ids64_.resize(nump); + ids32_.clear(); + }else{ + ids32_.resize(nump); + ids64_.clear(); + } } - void set_pos(size_t ipart, size_t idim, real_t p) - { - positions_[3 * ipart + idim] = p; + const void* get_pos32_ptr() const{ + return reinterpret_cast( &positions32_[0] ); } - void set_vel(size_t ipart, size_t idim, real_t p) - { - velocities_[3 * ipart + idim] = p; + void set_pos32(size_t ipart, size_t idim, float p){ + positions32_[3 * ipart + idim] = p; } - void set_id(size_t ipart, id_t id) - { - ids_[ipart] = id; + const void* get_pos64_ptr() const{ + return reinterpret_cast( &positions64_[0] ); + } + + inline void set_pos64(size_t ipart, size_t idim, double p){ + positions64_[3 * ipart + idim] = p; + } + + inline const void* get_vel32_ptr() const{ + return reinterpret_cast( &velocities32_[0] ); + } + + inline void set_vel32(size_t ipart, size_t idim, float p){ + velocities32_[3 * ipart + idim] = p; + } + + const void* get_vel64_ptr() const{ + return reinterpret_cast( &velocities64_[0] ); + } + + inline void set_vel64(size_t ipart, size_t idim, double p){ + velocities64_[3 * ipart + idim] = p; + } + + const void* get_ids32_ptr() const{ + return reinterpret_cast( &ids32_[0] ); + } + + void set_id32(size_t ipart, uint32_t id){ + ids32_[ipart] = id; + } + + const void* get_ids64_ptr() const{ + return reinterpret_cast( &ids64_[0] ); + } + + void set_id64(size_t ipart, uint64_t id){ + ids64_[ipart] = id; } size_t get_local_num_particles(void) const { - return ids_.size(); + return std::max(ids32_.size(),ids64_.size()); } size_t get_global_num_particles(void) const { - size_t local_nump = ids_.size(), global_nump; + size_t local_nump = this->get_local_num_particles(), global_nump; #ifdef USE_MPI MPI_Allreduce(reinterpret_cast(&local_nump), reinterpret_cast(&global_nump), 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); @@ -97,11 +143,11 @@ public: void dump(void) { - for (size_t i = 0; i < ids_.size(); ++i) + /*for (size_t i = 0; i < ids_.size(); ++i) { std::cout << positions_[3 * i + 0] << " " << positions_[3 * i + 1] << " " << positions_[3 * i + 2] << " " << velocities_[3 * i + 0] << " " << velocities_[3 * i + 1] << " " << velocities_[3 * i + 2] << std::endl; - } + }*/ } }; diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 7c69e7d..1c176eb 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -1,3 +1,10 @@ +/*******************************************************************\ + particle_generator.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 10/2019 - Oliver Hahn - first implementation +\*******************************************************************/ #pragma once #include @@ -21,17 +28,23 @@ const std::vector< std::vector> > lattice_shifts = }; template -void initialize_lattice( container& particles, lattice lattice_type, const field_t& field ){ +void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const field_t& field ){ + // number of modes present in the field const size_t num_p_in_load = field.local_size(); + // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load): const size_t overload = 1ull< -void set_positions( container& particles, const lattice lattice_type, int idim, real_t lunit, field_t& field ) +void set_positions( container& particles, const lattice lattice_type, int idim, real_t lunit, const bool b64reals, field_t& field ) { const size_t num_p_in_load = field.local_size(); for( int ishift=0; ishift<(1<0 ){ - vec3 shift = lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1]; - field.shift_field( shift.x, shift.y, shift.z ); + field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] ); } - auto ipcount0 = ishift * num_p_in_load; + // read out values from phase shifted field and set assoc. particle's value + const auto ipcount0 = ishift * num_p_in_load; for( size_t i=0,ipcount=ipcount0; i(i,j,k,lattice_shifts[lattice_type][ishift]); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); + if( b64reals ){ + particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); + }else{ + particles.set_pos32( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); + } } } } @@ -62,20 +79,24 @@ void set_positions( container& particles, const lattice lattice_type, int idim, } template -void set_velocities( container& particles, lattice lattice_type, int idim, field_t& field ) +void set_velocities( container& particles, lattice lattice_type, int idim, const bool b64reals, field_t& field ) { const size_t num_p_in_load = field.local_size(); for( int ishift=0; ishift<(1<0 ){ - vec3 shift = lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1]; - field.shift_field( shift.x, shift.y, shift.z ); + field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] ); } - auto ipcount0 = ishift * num_p_in_load; + // read out values from phase shifted field and set assoc. particle's value + const auto ipcount0 = ishift * num_p_in_load; for( size_t i=0,ipcount=ipcount0; iwrite_species_as( this_species ) == output_type::particles ) { // allocate particle structure and generate particle IDs - particle::initialize_lattice( particles, lattice_type, tmp ); + particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), tmp ); } // write out positions @@ -472,7 +472,7 @@ int Run( ConfigFile& the_config ) // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_positions( particles, lattice_type, idim, lunit, tmp ); + particle::set_positions( particles, lattice_type, idim, lunit, the_output_plugin->has_64bit_reals(), tmp ); } // otherwise write out the grid data directly to the output plugin // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian ) @@ -518,7 +518,7 @@ int Run( ConfigFile& the_config ) // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_velocities( particles, lattice_type, idim, tmp ); + particle::set_velocities( particles, lattice_type, idim, the_output_plugin->has_64bit_reals(), tmp ); } // otherwise write out the grid data directly to the output plugin else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian ) diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc index fddb734..57d9cc1 100644 --- a/src/plugins/output_gadget2.cc +++ b/src/plugins/output_gadget2.cc @@ -3,6 +3,7 @@ constexpr int empty_fill_bytes{56}; +template class gadget2_output_plugin : public output_plugin { public: @@ -33,6 +34,7 @@ protected: int num_files_; header this_header_; real_t lunit_, vunit_; + bool blongids_; public: //! constructor @@ -47,6 +49,7 @@ public: real_t astart = 1.0/(1.0+cf_.GetValue("setup", "zstart")); lunit_ = cf_.GetValue("setup", "BoxLength"); vunit_ = lunit_ / std::sqrt(astart); + blongids_ = cf_.GetValueSafe("output","UseLongids",false); } output_type write_species_as( const cosmo_species & ) const { return output_type::particles; } @@ -55,6 +58,16 @@ public: real_t velocity_unit() const { return vunit_; } + bool has_64bit_reals() const{ + if( typeid(write_real_t)==typeid(double) ) return true; + return false; + } + + bool has_64bit_ids() const{ + if( blongids_ ) return true; + return false; + } + void write_particle_data(const particle::container &pc, const cosmo_species &s ) { // fill the Gadget-2 header @@ -121,19 +134,39 @@ public: ofs.write( reinterpret_cast(&this_header_), sizeof(header) ); ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - blocksz = 3 * sizeof(float) * pc.get_local_num_particles(); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_pos_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + // we write double precision + if( this->has_64bit_reals() ){ + blocksz = 3 * sizeof(double) * pc.get_local_num_particles(); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write( reinterpret_cast(pc.get_pos64_ptr()), blocksz ); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write( reinterpret_cast(pc.get_vel64_ptr()), blocksz ); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + }else{ + blocksz = 3 * sizeof(float) * pc.get_local_num_particles(); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write( reinterpret_cast(pc.get_pos32_ptr()), blocksz ); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write( reinterpret_cast(pc.get_vel32_ptr()), blocksz ); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + } - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_vel_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - - blocksz = sizeof(float) * pc.get_local_num_particles(); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_ids_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + // we write long IDs + if( this->has_64bit_ids() ){ + blocksz = sizeof(uint64_t) * pc.get_local_num_particles(); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write( reinterpret_cast(pc.get_ids64_ptr()), blocksz ); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + }else{ + blocksz = sizeof(uint32_t) * pc.get_local_num_particles(); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write( reinterpret_cast(pc.get_ids32_ptr()), blocksz ); + ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + } } }; @@ -141,9 +174,8 @@ public: namespace { - output_plugin_creator_concrete creator1("gadget2"); -// output_plugin_creator_concrete> creator1("gadget2"); -// #ifndef SINGLE_PRECISION -// output_plugin_creator_concrete> creator2("gadget2_double"); -// #endif + output_plugin_creator_concrete> creator1("gadget2"); +#if !defined(USE_SINGLEPRECISION) + output_plugin_creator_concrete> creator3("gadget2_double"); +#endif } // namespace diff --git a/src/plugins/output_generic.cc b/src/plugins/output_generic.cc index 10eacfc..1a53e84 100644 --- a/src/plugins/output_generic.cc +++ b/src/plugins/output_generic.cc @@ -50,6 +50,10 @@ public: return output_type::field_lagrangian; } + bool has_64bit_reals() const{ return true; } + + bool has_64bit_ids() const{ return true; } + real_t position_unit() const { return 1.0; } real_t velocity_unit() const { return 1.0; } diff --git a/src/plugins/output_grafic2.cc b/src/plugins/output_grafic2.cc index b511dd9..31e8a04 100644 --- a/src/plugins/output_grafic2.cc +++ b/src/plugins/output_grafic2.cc @@ -102,6 +102,10 @@ public: return output_type::field_lagrangian; } + bool has_64bit_reals() const{ return false; } + + bool has_64bit_ids() const{ return false; } + real_t position_unit() const { return lunit_; } real_t velocity_unit() const { return vunit_; } From 3ee9dfd6ddd580116ff618d95857a3a76d164211 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 1 Nov 2019 12:19:55 +0100 Subject: [PATCH 013/130] adjusted example conf file --- example.conf | 57 +++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/example.conf b/example.conf index 3b6d07e..58fc969 100644 --- a/example.conf +++ b/example.conf @@ -14,33 +14,6 @@ DoFixing = no # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!) ParticleLoad = sc -[testing] -# enables diagnostic output -# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence' -test = convergence - -[execution] -NumThreads = 4 - -[output] -fname_hdf5 = output_sch.hdf5 -fbase_analysis = output - -format = gadget2 -filename = ics_gadget.dat - -#format = generic -#filename = debug.hdf5 -#generic_out_eulerian = yes - -#format = grafic2 -#filename = ics_ramses -#grafic_use_SPT = yes - -[random] -generator = NGENIC -seed = 9001 - [cosmology] #transfer = CLASS transfer = eisenstein @@ -56,3 +29,33 @@ nspec = 0.961 #LSS_aniso_ly = 0.1 #LSS_aniso_lz = -0.2 +[random] +generator = NGENIC +seed = 9001 + +[testing] +# enables diagnostic output +# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence' +test = convergence + +[execution] +NumThreads = 4 + +[output] +fname_hdf5 = output.hdf5 +fbase_analysis = output + +format = gadget2 +filename = ics_gadget.dat +UseLongids = false + +#format = generic +#filename = debug.hdf5 +#generic_out_eulerian = yes + +#format = grafic2 +#filename = ics_ramses +#grafic_use_SPT = yes + + + From a5253bcace1d074a5d6159b95389e1d9a6b1651a Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 1 Nov 2019 13:16:05 +0100 Subject: [PATCH 014/130] cosmetic changes --- include/vec3.hh | 38 +++++++++++++++++++++------------ src/main.cc | 23 ++++++++++---------- src/output_plugin.cc | 1 + src/random_plugin.cc | 3 ++- src/transfer_function_plugin.cc | 1 + 5 files changed, 40 insertions(+), 26 deletions(-) diff --git a/include/vec3.hh b/include/vec3.hh index ea7a2f2..3f48967 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -7,6 +7,7 @@ \*******************************************************************/ #pragma once +//! implements a simple class of 3-vectors of arbitrary scalar type template< typename T > class vec3{ private: @@ -29,41 +30,50 @@ public: vec3( vec3 &&v) : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){} - //! construct from initialiser list + //! construct vec3 from initializer list template vec3(E&&...e) : data_{{std::forward(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){} + //! braket index access to vector components T &operator[](size_t i){ return data_[i];} + //! const braket index access to vector components const T &operator[](size_t i) const { return data_[i]; } - vec3 operator+( const vec3& v ) const{ return vec3({x+v.x,y+v.y,z+v.z}); } + //! implementation of summation of vec3 + vec3 operator+( const vec3& v ) const{ return vec3({x+v.x,y+v.y,z+v.z}); } - vec3 operator-( const vec3& v ) const{ return vec3({x-v.x,y-v.y,z-v.z}); } + //! implementation of difference of vec3 + vec3 operator-( const vec3& v ) const{ return vec3({x-v.x,y-v.y,z-v.z}); } - vec3 operator*( T s ) const{ return vec3({x*s,y*s,z*s}); } + //! implementation of scalar multiplication + vec3 operator*( T s ) const{ return vec3({x*s,y*s,z*s}); } + //! implementation of += operator vec3& operator+=( const vec3& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; } + //! implementation of -= operator vec3& operator-=( const vec3& v ) const{ x-=v.x; y-=v.y; z-=v.z; return *this; } + //! multiply with scalar vec3& operator*=( T s ) const{ x*=s; y*=s; z*=s; return *this; } + //! compute dot product with another vector T dot(const vec3 &a) const { return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2]; } - T norm_squared(void) const - { - return this->dot(*this); - } + //! returns 2-norm squared of vector + T norm_squared(void) const { return this->dot(*this); } - T norm(void) const - { - return std::sqrt( this->norm_squared() ); - } - - + //! returns 2-norm of vector + T norm(void) const { return std::sqrt( this->norm_squared() ); } }; + +//! multiplication with scalar +template +vec3 operator*( T s, const vec3& v ){ + return vec3({v.x*s,v.y*s,v.z*s}); +} diff --git a/src/main.cc b/src/main.cc index 72e9a38..2416a20 100644 --- a/src/main.cc +++ b/src/main.cc @@ -49,16 +49,17 @@ int main( int argc, char** argv ) } #endif + // Ascii ART logo. generated via http://patorjk.com/software/taag/#p=display&f=Nancyj&t=monofonIC csoca::ilog << "\n" - << " unigrid MUSIC .8888b dP a88888b. \n" - << " 88 \" 88 d8\' `88 \n" - << " 88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa .d8888b. 88d888b. 88 88 \n" - << " 88\'`88\'`88 88\' `88 88\' `88 88\' `88 88 88\' `88 88\' `88 88 88 \n" - << " 88 88 88 88. .88 88 88 88. .88 88 88. .88 88 88 88 Y8. .88 \n" - << " dP dP dP `88888P\' dP dP `88888P\' dP `88888P\' dP dP dP Y88888P\' \n" << std::endl - << "version : v0.1a, git rev. : " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl - << "-------------------------------------------------------------------------------" << std::endl; - + << " The unigrid version of MUSIC-2 .8888b dP a88888b. \n" + << " 88 \" 88 d8\' `88 \n" + << " 88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa .d8888b. 88d888b. 88 88 \n" + << " 88\'`88\'`88 88\' `88 88\' `88 88\' `88 88 88\' `88 88\' `88 88 88 \n" + << " 88 88 88 88. .88 88 88 88. .88 88 88. .88 88 88 88 Y8. .88 \n" + << " dP dP dP `88888P\' dP dP `88888P\' dP `88888P\' dP dP dP Y88888P\' \n" << std::endl + << "Build was compiled on " << __DATE__ << " at " << __TIME__ << std::endl + << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl + << "-------------------------------------------------------------------------------\n" << std::endl; //------------------------------------------------------------------------------ // Parse command line options @@ -71,7 +72,7 @@ int main( int argc, char** argv ) print_RNG_plugins(); print_output_plugins(); - csoca::elog << "In order to run, you need to specify a parameter file!" << std::endl; + csoca::elog << "In order to run, you need to specify a parameter file!\n" << std::endl; exit(0); } @@ -189,7 +190,7 @@ int main( int argc, char** argv ) #endif csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << "Done." << std::endl; + csoca::ilog << "Done.\n" << std::endl; return 0; } diff --git a/src/output_plugin.cc b/src/output_plugin.cc index 763336e..35664dc 100644 --- a/src/output_plugin.cc +++ b/src/output_plugin.cc @@ -30,6 +30,7 @@ void print_output_plugins() csoca::ilog << "\t\'" << it->first << "\'\n"; ++it; } + csoca::ilog << std::endl; } std::unique_ptr select_output_plugin( ConfigFile& cf ) diff --git a/src/random_plugin.cc b/src/random_plugin.cc index 045978f..5cfea9a 100644 --- a/src/random_plugin.cc +++ b/src/random_plugin.cc @@ -13,7 +13,7 @@ void print_RNG_plugins() std::map &m = get_RNG_plugin_map(); std::map::iterator it; it = m.begin(); - csoca::ilog << "- Available random number generator plug-ins:" << std::endl; + csoca::ilog << "Available random number generator plug-ins:" << std::endl; while (it != m.end()) { if ((*it).second){ @@ -21,6 +21,7 @@ void print_RNG_plugins() } ++it; } + csoca::ilog << std::endl; } std::unique_ptr select_RNG_plugin(ConfigFile &cf) diff --git a/src/transfer_function_plugin.cc b/src/transfer_function_plugin.cc index e9d3748..6101ada 100644 --- a/src/transfer_function_plugin.cc +++ b/src/transfer_function_plugin.cc @@ -20,6 +20,7 @@ void print_TransferFunction_plugins() csoca::ilog << "\t\'" << (*it).first << "\'" << std::endl; ++it; } + csoca::ilog << std::endl; } std::unique_ptr select_TransferFunction_plugin(ConfigFile &cf) From 40be27c36fb4726cb0075dd6dc0ec0f70e555bfa Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 3 Nov 2019 15:54:17 +0100 Subject: [PATCH 015/130] started implementation of PLT (working commit) --- include/grid_fft.hh | 47 ++++++++++ include/mat3.hh | 98 +++++++++++++++++++++ include/particle_plt.hh | 184 ++++++++++++++++++++++++++++++++++++++++ include/vec3.hh | 4 +- src/main.cc | 5 +- 5 files changed, 335 insertions(+), 3 deletions(-) create mode 100644 include/mat3.hh create mode 100644 include/particle_plt.hh diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 4848f0e..3b760b2 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -152,6 +152,32 @@ public: return (i * sizes_[1] + j) * sizes_[3] + k; } + data_t get_cic( const vec3& v ) const{ + // warning! this doesn't work with MPI + vec3 x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0], + std::fmod(v.y/length_[1]+1.0,1.0)*n_[1], + std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] }); + size_t ix = static_cast(x.x); + size_t iy = static_cast(x.y); + size_t iz = static_cast(x.z); + real_t dx = x.x-real_t(ix), tx = 1.0-dx; + real_t dy = x.y-real_t(iy), ty = 1.0-dy; + real_t dz = x.z-real_t(iz), tz = 1.0-dz; + size_t ix1 = (ix+1)%n_[0]; + size_t iy1 = (iy+1)%n_[1]; + size_t iz1 = (iz+1)%n_[2]; + data_t val = 0.0; + val += this->relem(ix ,iy ,iz ) * tx * ty * tz; + val += this->relem(ix ,iy ,iz1) * tx * ty * dz; + val += this->relem(ix ,iy1,iz ) * tx * dy * tz; + val += this->relem(ix ,iy1,iz1) * tx * dy * dz; + val += this->relem(ix1,iy ,iz ) * dx * ty * tz; + val += this->relem(ix1,iy ,iz1) * dx * ty * dz; + val += this->relem(ix1,iy1,iz ) * dx * dy * tz; + val += this->relem(ix1,iy1,iz1) * dx * dy * dz; + return val; + } + template vec3 get_r(const size_t i, const size_t j, const size_t k) const { @@ -563,6 +589,27 @@ public: } } + template + void assign_function_of_grids_kdep(const functional &f, const grid_t &g) + { + assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) ); + +#pragma omp parallel for + for (size_t i = 0; i < sizes_[0]; ++i) + { + for (size_t j = 0; j < sizes_[1]; ++j) + { + for (size_t k = 0; k < sizes_[2]; ++k) + { + auto &elem = this->kelem(i, j, k); + const auto &elemg = g.kelem(i, j, k); + + elem = f(this->get_k(i, j, k), elemg); + } + } + } + } + template void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2) { diff --git a/include/mat3.hh b/include/mat3.hh new file mode 100644 index 0000000..9f72305 --- /dev/null +++ b/include/mat3.hh @@ -0,0 +1,98 @@ +#include +#include + +#include + +template +class mat3s{ +protected: + std::array data_; + gsl_matrix_view m_; + gsl_vector *eval_; + gsl_matrix *evec_; + gsl_eigen_symmv_workspace * wsp_; + + void init_gsl(){ + m_ = gsl_matrix_view_array (&data_[0], 3, 3); + eval_ = gsl_vector_alloc (3); + evec_ = gsl_matrix_alloc (3, 3); + wsp_ = gsl_eigen_symmv_alloc (3); + } + + void free_gsl(){ + gsl_eigen_symmv_free (wsp_); + gsl_vector_free (eval_); + gsl_matrix_free (evec_); + } + +public: + + mat3s(){ + this->init_gsl(); + } + + //! copy constructor + mat3s( const mat3s &m) + : data_(m.data_){ + this->init_gsl(); + } + + //! move constructor + mat3s( mat3s &&m) + : data_(std::move(m.data_)){ + this->init_gsl(); + } + + //! construct vec3 from initializer list + template + mat3s(E&&...e) + : data_{{std::forward(e)...}}{ + // resort into symmetrix matrix + data_[8] = data_[5]; + data_[7] = data_[4]; + data_[6] = data_[2]; + data_[5] = data_[4]; + data_[4] = data_[3]; + data_[3] = data_[1]; + this->init_gsl(); + } + + mat3s& operator=(const mat3s& m){ + data_ = m.data_; + return *this; + } + + mat3s& operator=(const mat3s&& m){ + data_ = std::move(m.data_); + return *this; + } + + //! bracket index access to vector components + T &operator[](size_t i){ return data_[i];} + + //! const bracket index access to vector components + const T &operator[](size_t i) const { return data_[i]; } + + //! matrix 2d index access + T &operator()(size_t i, size_t j){ return data_[3*i+j]; } + + //! const matrix 2d index access + const T &operator()(size_t i, size_t j) const { return data_[3*i+j]; } + + //! destructor + ~mat3s(){ + this->free_gsl(); + } + + void eigen( vec3& evals, vec3& evec1, vec3& evec2, vec3& evec3 ){ + gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_); + gsl_eigen_symmv_sort (eval_, evec_, GSL_EIGEN_SORT_VAL_ASC); + + for( int i=0; i<3; ++i ){ + evals[i] = gsl_vector_get( eval_, i ); + evec1[i] = gsl_matrix_get( evec_, 0, i ); + evec2[i] = gsl_matrix_get( evec_, 1, i ); + evec3[i] = gsl_matrix_get( evec_, 2, i ); + } + } +}; \ No newline at end of file diff --git a/include/particle_plt.hh b/include/particle_plt.hh new file mode 100644 index 0000000..1390a42 --- /dev/null +++ b/include/particle_plt.hh @@ -0,0 +1,184 @@ +#pragma once + +#include +#include // for unlink + +#include +#include + +#include + +#include + +namespace particle{ +//! implement Marcos et al. PLT calculation + +inline void test_plt( void ){ + + csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + csoca::ilog << "Testing PLT implementation..." << std::endl; + + real_t boxlen = 1.0; + + size_t ngrid = 64; + size_t npgrid = 1; + size_t dpg = ngrid/npgrid; + size_t nump = npgrid*npgrid*npgrid; + + real_t pweight = 1.0/real_t(nump); + real_t eta = 2.0 * boxlen/ngrid; + + const real_t alpha = 1.0/std::sqrt(2)/eta; + const real_t alpha2 = alpha*alpha; + const real_t alpha3 = alpha2*alpha; + const real_t sqrtpi = std::sqrt(M_PI); + const real_t pi3halfs = std::pow(M_PI,1.5); + + const real_t dV( std::pow( boxlen/ngrid, 3 ) ); + Grid_FFT rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + std::vector< vec3 > gpos ; + + auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; + + auto greensftide_sr = [&]( int mu, int nu, const vec3& vR, const vec3& vP ) -> real_t { + auto d = vR-vP; + d.x = (d.x>0.5)? d.x-1.0 : (d.x<-0.5)? d.x+1.0 : d.x; + d.y = (d.y>0.5)? d.y-1.0 : (d.y<-0.5)? d.y+1.0 : d.y; + d.z = (d.z>0.5)? d.z-1.0 : (d.z<-0.5)? d.z+1.0 : d.z; + auto r = d.norm(); + + if( r< 1e-14 ) return 0.0; + + real_t val = 0.0; + + val -= d[mu]*d[nu]/(r*r) * alpha3/pi3halfs * std::exp(-alpha*alpha*r*r); + val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * + (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r); + + return pweight * val; + }; + + gpos.reserve(nump); + + // sc + for( size_t i=0; i ccomplex_t { + real_t kmod = k.norm(); + return -x * std::exp(-0.5*eta*eta*kmod*kmod) / (kmod*kmod); + }); + rho.zero_DC_mode(); + + auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ + real_t sr = 0.0; + for( auto& p : gpos ){ + sr += greensftide_sr( mu, nu, v, p); + } + if( v.norm()<1e-14 ) return 0.0; + + return sr; + }; + + + // std::random_device rd; //Will be used to obtain a seed for the random number engine + // std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + // std::uniform_real_distribution<> dis(-0.25,0.25); + + Grid_FFT D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + + #pragma omp parallel for + for( size_t i=0; i p; + p.x = real_t(i)/ngrid; + for( size_t j=0; j D; + vec3 eval, evec1, evec2, evec3; + for( size_t j=0; j kv = D_xx.get_k(i,j,k); + + D = { std::real(D_xx.kelem(i,j,k) - kv[0]*kv[0] * rho.kelem(i,j,k) ), + std::real(D_xy.kelem(i,j,k) - kv[0]*kv[1] * rho.kelem(i,j,k) ), + std::real(D_xz.kelem(i,j,k) - kv[0]*kv[2] * rho.kelem(i,j,k) ), + std::real(D_yy.kelem(i,j,k) - kv[1]*kv[1] * rho.kelem(i,j,k) ), + std::real(D_yz.kelem(i,j,k) - kv[1]*kv[2] * rho.kelem(i,j,k) ), + std::real(D_zz.kelem(i,j,k) - kv[2]*kv[2] * rho.kelem(i,j,k) ) }; + D.eigen(eval, evec1, evec2, evec3); + + + ofs << std::setw(16) << kv.norm() / kNyquist + << std::setw(16) << eval[0] *nfac + 1.0/3.0 + << std::setw(16) << eval[1] *nfac + 1.0/3.0 + << std::setw(16) << eval[2] *nfac + 1.0/3.0 + << std::setw(16) << kv[0] + << std::setw(16) << kv[1] + << std::setw(16) << kv[2] + << std::endl; + } + } + } + +// std::string filename("plt_test.hdf5"); +// unlink(filename.c_str()); +// #if defined(USE_MPI) +// MPI_Barrier(MPI_COMM_WORLD); +// #endif +// rho.Write_to_HDF5(filename, "rho"); + +} + + +} \ No newline at end of file diff --git a/include/vec3.hh b/include/vec3.hh index 3f48967..057fbcd 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -35,10 +35,10 @@ public: vec3(E&&...e) : data_{{std::forward(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){} - //! braket index access to vector components + //! bracket index access to vector components T &operator[](size_t i){ return data_[i];} - //! const braket index access to vector components + //! const bracket index access to vector components const T &operator[](size_t i) const { return data_[i]; } //! implementation of summation of vec3 diff --git a/src/main.cc b/src/main.cc index 2416a20..01ee5be 100644 --- a/src/main.cc +++ b/src/main.cc @@ -10,6 +10,7 @@ #include #include +#include // initialise with "default" values @@ -181,7 +182,9 @@ int main( int argc, char** argv ) /////////////////////////////////////////////////////////////////////// // do the job... /////////////////////////////////////////////////////////////////////// - ic_generator::Run( the_config ); + // ic_generator::Run( the_config ); + + particle::test_plt(); /////////////////////////////////////////////////////////////////////// #if defined(USE_MPI) From c71b844e162f9563bd4af91dd144b2e72e0a55cd Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 3 Nov 2019 17:03:29 +0100 Subject: [PATCH 016/130] enabled output to HDF5 of complex data when in Fourier space --- src/grid_fft.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/grid_fft.cc b/src/grid_fft.cc index 2881010..54ddbbf 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -391,7 +391,10 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) { for (size_t k = 0; k < size(2); ++k) { - buf[j * size(2) + k] = std::real(relem(i, j, k)); + if( this->space_ == rspace_id ) + buf[j * size(2) + k] = std::real(relem(i, j, k)); + else + buf[j * size(2) + k] = std::real(kelem(i, j, k)); } } @@ -410,7 +413,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) H5Dclose(dset_id); if (typeid(data_t) == typeid(std::complex) || - typeid(data_t) == typeid(std::complex)) + typeid(data_t) == typeid(std::complex) || + this->space_ == kspace_id ) { datasetname += std::string(".im"); @@ -460,7 +464,10 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) for (size_t j = 0; j < size(1); ++j) for (size_t k = 0; k < size(2); ++k) { - buf[j * size(2) + k] = std::imag(relem(i, j, k)); + if( this->space_ == rspace_id ) + buf[j * size(2) + k] = std::imag(relem(i, j, k)); + else + buf[j * size(2) + k] = std::imag(kelem(i, j, k)); } memspace = H5Screate_simple(3, count, NULL); From 93568708410fa96c07d42fdedb8d490abe8921cc Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 3 Nov 2019 17:04:05 +0100 Subject: [PATCH 017/130] fixed bug where eigenvectors and coordinate indices were mixed up --- include/mat3.hh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/mat3.hh b/include/mat3.hh index 9f72305..04ac0ac 100644 --- a/include/mat3.hh +++ b/include/mat3.hh @@ -90,9 +90,9 @@ public: for( int i=0; i<3; ++i ){ evals[i] = gsl_vector_get( eval_, i ); - evec1[i] = gsl_matrix_get( evec_, 0, i ); - evec2[i] = gsl_matrix_get( evec_, 1, i ); - evec3[i] = gsl_matrix_get( evec_, 2, i ); + evec1[i] = gsl_matrix_get( evec_, i, 0 ); + evec2[i] = gsl_matrix_get( evec_, i, 1 ); + evec3[i] = gsl_matrix_get( evec_, i, 2 ); } } }; \ No newline at end of file From 06264bfb510a5a985fd2ac1390a962f2d5553cfd Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 3 Nov 2019 17:04:49 +0100 Subject: [PATCH 018/130] PLT: output of eigenvalues and vectors to HDF5 --- include/particle_plt.hh | 70 ++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 1390a42..7025362 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -20,7 +20,7 @@ inline void test_plt( void ){ real_t boxlen = 1.0; - size_t ngrid = 64; + size_t ngrid = 128; size_t npgrid = 1; size_t dpg = ngrid/npgrid; size_t nump = npgrid*npgrid*npgrid; @@ -87,11 +87,6 @@ inline void test_plt( void ){ return sr; }; - - // std::random_device rd; //Will be used to obtain a seed for the random number engine - // std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() - // std::uniform_real_distribution<> dis(-0.25,0.25); - Grid_FFT D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); @@ -136,13 +131,13 @@ inline void test_plt( void ){ D_yz.FourierTransformForward(); D_zz.FourierTransformForward(); - std::ofstream ofs("test_ewald.txt"); + // std::ofstream ofs("test_ewald.txt"); real_t nfac = 1.0/std::pow(real_t(ngrid),1.5); real_t kNyquist = M_PI/boxlen * ngrid; - //#pragma omp parallel for + #pragma omp parallel for for( size_t i=0; i D; vec3 eval, evec1, evec2, evec3; @@ -150,33 +145,50 @@ inline void test_plt( void ){ for( size_t k=0; k kv = D_xx.get_k(i,j,k); - D = { std::real(D_xx.kelem(i,j,k) - kv[0]*kv[0] * rho.kelem(i,j,k) ), - std::real(D_xy.kelem(i,j,k) - kv[0]*kv[1] * rho.kelem(i,j,k) ), - std::real(D_xz.kelem(i,j,k) - kv[0]*kv[2] * rho.kelem(i,j,k) ), - std::real(D_yy.kelem(i,j,k) - kv[1]*kv[1] * rho.kelem(i,j,k) ), - std::real(D_yz.kelem(i,j,k) - kv[1]*kv[2] * rho.kelem(i,j,k) ), - std::real(D_zz.kelem(i,j,k) - kv[2]*kv[2] * rho.kelem(i,j,k) ) }; - D.eigen(eval, evec1, evec2, evec3); - + D_xx.kelem(i,j,k) = (D_xx.kelem(i,j,k) - kv[0]*kv[0] * rho.kelem(i,j,k))*nfac + 1.0/3.0; + D_xy.kelem(i,j,k) = (D_xy.kelem(i,j,k) - kv[0]*kv[1] * rho.kelem(i,j,k))*nfac; + D_xz.kelem(i,j,k) = (D_xz.kelem(i,j,k) - kv[0]*kv[2] * rho.kelem(i,j,k))*nfac; + D_yy.kelem(i,j,k) = (D_yy.kelem(i,j,k) - kv[1]*kv[1] * rho.kelem(i,j,k))*nfac + 1.0/3.0;; + D_yz.kelem(i,j,k) = (D_yz.kelem(i,j,k) - kv[1]*kv[2] * rho.kelem(i,j,k))*nfac; + D_zz.kelem(i,j,k) = (D_zz.kelem(i,j,k) - kv[2]*kv[2] * rho.kelem(i,j,k))*nfac + 1.0/3.0;; - ofs << std::setw(16) << kv.norm() / kNyquist - << std::setw(16) << eval[0] *nfac + 1.0/3.0 - << std::setw(16) << eval[1] *nfac + 1.0/3.0 - << std::setw(16) << eval[2] *nfac + 1.0/3.0 - << std::setw(16) << kv[0] - << std::setw(16) << kv[1] - << std::setw(16) << kv[2] - << std::endl; + D = { std::real(D_xx.kelem(i,j,k)), std::real(D_xy.kelem(i,j,k)), std::real(D_xz.kelem(i,j,k)), + std::real(D_yy.kelem(i,j,k)), std::real(D_yz.kelem(i,j,k)), std::real(D_zz.kelem(i,j,k)) }; + + D.eigen(eval, evec1, evec2, evec3); + + D_xx.kelem(i,j,k) = eval[2]; + D_yy.kelem(i,j,k) = eval[1]; + D_zz.kelem(i,j,k) = eval[0]; + + D_xy.kelem(i,j,k) = evec3[0]; + D_xz.kelem(i,j,k) = evec3[1]; + D_yz.kelem(i,j,k) = evec3[2]; + + // ofs << std::setw(16) << kv.norm() / kNyquist + // << std::setw(16) << eval[0] // *nfac + 1.0/3.0 + // << std::setw(16) << eval[1] // *nfac + 1.0/3.0 + // << std::setw(16) << eval[2] // *nfac + 1.0/3.0 + // << std::setw(16) << kv[0] + // << std::setw(16) << kv[1] + // << std::setw(16) << kv[2] + // << std::endl; } } } -// std::string filename("plt_test.hdf5"); -// unlink(filename.c_str()); -// #if defined(USE_MPI) -// MPI_Barrier(MPI_COMM_WORLD); -// #endif + std::string filename("plt_test.hdf5"); + unlink(filename.c_str()); +#if defined(USE_MPI) + MPI_Barrier(MPI_COMM_WORLD); +#endif // rho.Write_to_HDF5(filename, "rho"); + D_xx.Write_to_HDF5(filename, "omega1"); + D_yy.Write_to_HDF5(filename, "omega2"); + D_zz.Write_to_HDF5(filename, "omega3"); + D_xy.Write_to_HDF5(filename, "e1_x"); + D_xz.Write_to_HDF5(filename, "e1_y"); + D_yz.Write_to_HDF5(filename, "e1_z"); } From 8048825e02b9af50efff1454d857f07694fc8936 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 3 Nov 2019 17:05:28 +0100 Subject: [PATCH 019/130] cosmetics --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index fcc57e9..a3bffae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ if(ENABLE_MPI) endif(ENABLE_MPI) +######################################################################################################################## # FFTW cmake_policy(SET CMP0074 NEW) if(ENABLE_MPI) @@ -57,9 +58,11 @@ else() find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS) endif(ENABLE_MPI) +######################################################################################################################## # GSL find_package(GSL REQUIRED) +######################################################################################################################## # HDF5 find_package(HDF5 REQUIRED) From 6d1a3bf7cc0b38b1515e3183f0935216a9c605a4 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 4 Nov 2019 00:25:45 +0100 Subject: [PATCH 020/130] minor cleanup, calculation of D works for SC --- include/grid_fft.hh | 16 ++++++----- include/particle_plt.hh | 63 ++++++++++++++++------------------------- 2 files changed, 34 insertions(+), 45 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 3b760b2..ad7920c 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -613,15 +613,15 @@ public: template void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2) { - assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) ); - assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) ); + assert(g1.size(0) == size(0) && g1.size(1) == size(1) && g1.size(2) == size(2) ); + assert(g2.size(0) == size(0) && g2.size(1) == size(1) && g2.size(2) == size(2) ); #pragma omp parallel for - for (size_t i = 0; i < sizes_[0]; ++i) + for (size_t i = 0; i < size(0); ++i) { - for (size_t j = 0; j < sizes_[1]; ++j) + for (size_t j = 0; j < size(1); ++j) { - for (size_t k = 0; k < sizes_[2]; ++k) + for (size_t k = 0; k < size(2); ++k) { auto &elem = this->kelem(i, j, k); const auto &elemg1 = g1.kelem(i, j, k); @@ -683,7 +683,7 @@ public: void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3); - void shift_field( const vec3& s ) + void shift_field( const vec3& s, bool transform_back=true ) { FourierTransformForward(); apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { @@ -694,7 +694,9 @@ public: #endif return x * std::exp(ccomplex_t(0.0, shift)); }); - FourierTransformBackward(); + if( transform_back ){ + FourierTransformBackward(); + } } void zero_DC_mode(void) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 7025362..2c84051 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -8,6 +8,7 @@ #include +#include #include namespace particle{ @@ -20,7 +21,7 @@ inline void test_plt( void ){ real_t boxlen = 1.0; - size_t ngrid = 128; + size_t ngrid = 64; size_t npgrid = 1; size_t dpg = ngrid/npgrid; size_t nump = npgrid*npgrid*npgrid; @@ -36,40 +37,26 @@ inline void test_plt( void ){ const real_t dV( std::pow( boxlen/ngrid, 3 ) ); Grid_FFT rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - std::vector< vec3 > gpos ; auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; auto greensftide_sr = [&]( int mu, int nu, const vec3& vR, const vec3& vP ) -> real_t { auto d = vR-vP; - d.x = (d.x>0.5)? d.x-1.0 : (d.x<-0.5)? d.x+1.0 : d.x; - d.y = (d.y>0.5)? d.y-1.0 : (d.y<-0.5)? d.y+1.0 : d.y; - d.z = (d.z>0.5)? d.z-1.0 : (d.z<-0.5)? d.z+1.0 : d.z; auto r = d.norm(); if( r< 1e-14 ) return 0.0; real_t val = 0.0; - val -= d[mu]*d[nu]/(r*r) * alpha3/pi3halfs * std::exp(-alpha*alpha*r*r); val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r); - return pweight * val; }; - gpos.reserve(nump); - // sc - for( size_t i=0; i ccomplex_t { real_t kmod = k.norm(); @@ -79,11 +66,16 @@ inline void test_plt( void ){ auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ real_t sr = 0.0; - for( auto& p : gpos ){ - sr += greensftide_sr( mu, nu, v, p); + int N = 3; + for( int i=-N; i<=N; ++i ){ + for( int j=-N; j<=N; ++j ){ + for( int k=-N; k<=N; ++k ){ + if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){ + sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); + } + } + } } - if( v.norm()<1e-14 ) return 0.0; - return sr; }; @@ -108,21 +100,16 @@ inline void test_plt( void ){ D_yy.relem(i,j,k) = evaluate_D(1,1,p); D_yz.relem(i,j,k) = evaluate_D(1,2,p); D_zz.relem(i,j,k) = evaluate_D(2,2,p); - - //D = {evaluate_D(0,0,p),evaluate_D(0,1,p),evaluate_D(0,2,p),evaluate_D(1,0,p),evaluate_D(1,1,p),evaluate_D(2,2,p)}; - //D.eigen(eval, evec1, evec2, evec3); - //rho.relem(i,j,k) = eval[2]; } } } + D_xx.relem(0,0,0) = 0.0; D_xy.relem(0,0,0) = 0.0; D_xz.relem(0,0,0) = 0.0; D_yy.relem(0,0,0) = 0.0; D_yz.relem(0,0,0) = 0.0; D_zz.relem(0,0,0) = 0.0; - - D_xx.FourierTransformForward(); D_xy.FourierTransformForward(); @@ -131,13 +118,13 @@ inline void test_plt( void ){ D_yz.FourierTransformForward(); D_zz.FourierTransformForward(); - // std::ofstream ofs("test_ewald.txt"); + std::ofstream ofs("test_ewald.txt"); real_t nfac = 1.0/std::pow(real_t(ngrid),1.5); real_t kNyquist = M_PI/boxlen * ngrid; - #pragma omp parallel for + // #pragma omp parallel for for( size_t i=0; i D; vec3 eval, evec1, evec2, evec3; @@ -165,14 +152,14 @@ inline void test_plt( void ){ D_xz.kelem(i,j,k) = evec3[1]; D_yz.kelem(i,j,k) = evec3[2]; - // ofs << std::setw(16) << kv.norm() / kNyquist - // << std::setw(16) << eval[0] // *nfac + 1.0/3.0 - // << std::setw(16) << eval[1] // *nfac + 1.0/3.0 - // << std::setw(16) << eval[2] // *nfac + 1.0/3.0 - // << std::setw(16) << kv[0] - // << std::setw(16) << kv[1] - // << std::setw(16) << kv[2] - // << std::endl; + ofs << std::setw(16) << kv.norm() / kNyquist + << std::setw(16) << eval[0] + << std::setw(16) << eval[1] + << std::setw(16) << eval[2] + << std::setw(16) << kv[0] + << std::setw(16) << kv[1] + << std::setw(16) << kv[2] + << std::endl; } } } From 747031bee2b36b94632285eef9c8e14122f2b1c1 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Tue, 5 Nov 2019 00:29:33 +0100 Subject: [PATCH 021/130] added calculation of Brillouin zone (from Bruno's Code) --- include/particle_plt.hh | 170 ++++++++++++++++++++++++++++++++++++++-- include/vec3.hh | 9 +++ 2 files changed, 173 insertions(+), 6 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 2c84051..bea2e30 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -19,6 +19,22 @@ inline void test_plt( void ){ csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; csoca::ilog << "Testing PLT implementation..." << std::endl; + constexpr real_t pi = M_PI, twopi = 2.0*M_PI; + + const std::vector> bcc_normals{ + {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, + {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, + {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} + }; + + const std::vector> bcc_reciprocal{ + {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi} + }; + + /*const std::vector> fcc_reciprocal{ + {-2.,0.,2.}, {2.,0.,0.}, {1.,1.,-1.} + };*/ + real_t boxlen = 1.0; size_t ngrid = 64; @@ -56,7 +72,9 @@ inline void test_plt( void ){ // sc rho.zero(); rho.relem(0,0,0) = pweight/dV; - + // rho.relem(0,0,0) = pweight/dV/2; + // rho.relem(ngrid/2,ngrid/2,ngrid/2) = pweight/dV/2; + rho.FourierTransformForward(); rho.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { real_t kmod = k.norm(); @@ -72,6 +90,17 @@ inline void test_plt( void ){ for( int k=-N; k<=N; ++k ){ if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){ sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); + + // sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} )/2; + + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; } } } @@ -118,13 +147,13 @@ inline void test_plt( void ){ D_yz.FourierTransformForward(); D_zz.FourierTransformForward(); - std::ofstream ofs("test_ewald.txt"); + real_t nfac = 1.0/std::pow(real_t(ngrid),1.5); real_t kNyquist = M_PI/boxlen * ngrid; - // #pragma omp parallel for + #pragma omp parallel for for( size_t i=0; i D; vec3 eval, evec1, evec2, evec3; @@ -151,11 +180,139 @@ inline void test_plt( void ){ D_xy.kelem(i,j,k) = evec3[0]; D_xz.kelem(i,j,k) = evec3[1]; D_yz.kelem(i,j,k) = evec3[2]; + } + } + } +#if 1 + std::vector> vectk; + std::vector> ico, vecitk; + vectk.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3()); + ico.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3()); + vecitk.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3()); + + std::ofstream ofs2("test_brillouin.txt"); + + const int numb = 1; + for( size_t i=0; i D; + vec3 eval, evec1, evec2, evec3; + vec3 a({0.,0.,0.}); + + for( size_t j=0; j ar = D_xx.get_k(i,j,k) / (twopi*ngrid); + vec3 kv = D_xx.get_k(i,j,k); + + for( int l=0; l<3; l++ ){ + a[l] = 0.0; + for( int m=0; m<3; m++){ + // project k on reciprocal basis + a[l] += ar[m]*bcc_reciprocal[m][l]; + } + } + + // translate the k-vectors into the "candidate" FBZ + vec3 anum; + for( int l1=-numb; l1<=numb; ++l1 ){ + anum[0] = real_t(l1); + for( int l2=-numb; l2<=numb; ++l2 ){ + anum[1] = real_t(l2); + for( int l3=-numb; l3<=numb; ++l3 ){ + anum[2] = real_t(l3); + + vectk[idx] = a; + + for( int l=0; l<3; l++ ){ + for( int m=0; m<3; m++){ + // project k on reciprocal basis + vectk[idx][l] += anum[m]*bcc_reciprocal[m][l]; + } + } + // check if in first Brillouin zone + bool btest=true; + for( size_t l=0; l amod*1.0001 ){ btest=false; break; } + if( scalar > 1.01 * amod2 ){ btest=false; break; } + } + if( btest ){ + vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid)/twopi); + vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid)/twopi); + vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid)/twopi); + + ico[idx][0] = int((ar[0]+l1) * ngrid+0.5); + ico[idx][1] = int((ar[1]+l2) * ngrid+0.5); + ico[idx][2] = int((ar[2]+l3) * ngrid+0.5); + if( ico[idx][2] < 0 ){ + ico[idx][0] = -ico[idx][0]; + ico[idx][1] = -ico[idx][1]; + ico[idx][2] = -ico[idx][2]; + } + + ico[idx][0] = (ico[idx][0]+ngrid)%ngrid; + ico[idx][1] = (ico[idx][1]+ngrid)%ngrid; + + if( vectk[idx][2] < 0 ){ + vectk[idx][0] = - vectk[idx][0]; + vectk[idx][1] = - vectk[idx][1]; + vectk[idx][2] = - vectk[idx][2]; + } + + if( vecitk[idx][2] < 0 ){ + vecitk[idx][0] = -vecitk[idx][0]; + vecitk[idx][1] = -vecitk[idx][1]; + vecitk[idx][2] = -vecitk[idx][2]; + } + vecitk[idx][0] = (vecitk[idx][0]+ngrid)%ngrid; + vecitk[idx][1] = (vecitk[idx][1]+ngrid)%ngrid; + vecitk[idx][2] = (vecitk[idx][2]+ngrid)%ngrid; + + + + //vecitk[idx][0] = (vecitk[idx][0]<0)? vecitk[idx][0]+ngrid : vecitk[idx][0];; + //vecitk[idx][1] = (vecitk[idx][1]<0)? vecitk[idx][1]+ngrid : vecitk[idx][1]; + + + + //ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vectk[idx].x*(ngrid)/twopi << ", " << vectk[idx].y*(ngrid)/twopi << ", " << vectk[idx].z*(ngrid)/twopi << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; + ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; + //std::cout << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] << " " << vectk[idx][1] << std::endl; + goto endloop; + } + } + } + } + endloop: ; + + D_xx.kelem(i,j,k) = D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]); + // D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]) = D_xx.kelem(i,j,k); + // D_xx.kelem(i,j,k) = D_xx.kelem(i+vecitk[idx][0],j+vecitk[idx][1],k+vecitk[idx][2]); + } + } + + } + +#endif + + std::ofstream ofs("test_ewald.txt"); + for( size_t i=0; i kv = D_xx.get_k(i,j,k); ofs << std::setw(16) << kv.norm() / kNyquist - << std::setw(16) << eval[0] - << std::setw(16) << eval[1] - << std::setw(16) << eval[2] + << std::setw(16) << std::real(D_xx.kelem(i,j,k)) + << std::setw(16) << std::real(D_yy.kelem(i,j,k)) + << std::setw(16) << std::real(D_zz.kelem(i,j,k)) << std::setw(16) << kv[0] << std::setw(16) << kv[1] << std::setw(16) << kv[2] @@ -164,6 +321,7 @@ inline void test_plt( void ){ } } + std::string filename("plt_test.hdf5"); unlink(filename.c_str()); #if defined(USE_MPI) diff --git a/include/vec3.hh b/include/vec3.hh index 057fbcd..b6550ae 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -41,6 +41,12 @@ public: //! const bracket index access to vector components const T &operator[](size_t i) const { return data_[i]; } + // assignment operator + vec3& operator=( const vec3& v ) { data_=v.data_; return *this; } + + // assignment operator + const vec3& operator=( const vec3& v ) const { data_=v.data_; return *this; } + //! implementation of summation of vec3 vec3 operator+( const vec3& v ) const{ return vec3({x+v.x,y+v.y,z+v.z}); } @@ -50,6 +56,9 @@ public: //! implementation of scalar multiplication vec3 operator*( T s ) const{ return vec3({x*s,y*s,z*s}); } + //! implementation of scalar division + vec3 operator/( T s ) const{ return vec3({x/s,y/s,z/s}); } + //! implementation of += operator vec3& operator+=( const vec3& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; } From 144d0d9e1ace23ce1a8ff9414b0199a37f044ffc Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Tue, 5 Nov 2019 19:14:14 +0100 Subject: [PATCH 022/130] added projection onto plt eigenmodes. (not working) --- include/grid_fft.hh | 79 +++++++---- include/particle_plt.hh | 282 +++++++++++++++++++++++++++++++++++----- src/ic_generator.cc | 48 +++++++ src/main.cc | 4 +- 4 files changed, 356 insertions(+), 57 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index ad7920c..00d2cb0 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -152,32 +152,6 @@ public: return (i * sizes_[1] + j) * sizes_[3] + k; } - data_t get_cic( const vec3& v ) const{ - // warning! this doesn't work with MPI - vec3 x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0], - std::fmod(v.y/length_[1]+1.0,1.0)*n_[1], - std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] }); - size_t ix = static_cast(x.x); - size_t iy = static_cast(x.y); - size_t iz = static_cast(x.z); - real_t dx = x.x-real_t(ix), tx = 1.0-dx; - real_t dy = x.y-real_t(iy), ty = 1.0-dy; - real_t dz = x.z-real_t(iz), tz = 1.0-dz; - size_t ix1 = (ix+1)%n_[0]; - size_t iy1 = (iy+1)%n_[1]; - size_t iz1 = (iz+1)%n_[2]; - data_t val = 0.0; - val += this->relem(ix ,iy ,iz ) * tx * ty * tz; - val += this->relem(ix ,iy ,iz1) * tx * ty * dz; - val += this->relem(ix ,iy1,iz ) * tx * dy * tz; - val += this->relem(ix ,iy1,iz1) * tx * dy * dz; - val += this->relem(ix1,iy ,iz ) * dx * ty * tz; - val += this->relem(ix1,iy ,iz1) * dx * ty * dz; - val += this->relem(ix1,iy1,iz ) * dx * dy * tz; - val += this->relem(ix1,iy1,iz1) * dx * dy * dz; - return val; - } - template vec3 get_r(const size_t i, const size_t j, const size_t k) const { @@ -257,6 +231,59 @@ public: return kk; } + data_t get_cic( const vec3& v ) const{ + // warning! this doesn't work with MPI + vec3 x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0], + std::fmod(v.y/length_[1]+1.0,1.0)*n_[1], + std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] }); + size_t ix = static_cast(x.x); + size_t iy = static_cast(x.y); + size_t iz = static_cast(x.z); + real_t dx = x.x-real_t(ix), tx = 1.0-dx; + real_t dy = x.y-real_t(iy), ty = 1.0-dy; + real_t dz = x.z-real_t(iz), tz = 1.0-dz; + size_t ix1 = (ix+1)%n_[0]; + size_t iy1 = (iy+1)%n_[1]; + size_t iz1 = (iz+1)%n_[2]; + data_t val = 0.0; + val += this->relem(ix ,iy ,iz ) * tx * ty * tz; + val += this->relem(ix ,iy ,iz1) * tx * ty * dz; + val += this->relem(ix ,iy1,iz ) * tx * dy * tz; + val += this->relem(ix ,iy1,iz1) * tx * dy * dz; + val += this->relem(ix1,iy ,iz ) * dx * ty * tz; + val += this->relem(ix1,iy ,iz1) * dx * ty * dz; + val += this->relem(ix1,iy1,iz ) * dx * dy * tz; + val += this->relem(ix1,iy1,iz1) * dx * dy * dz; + return val; + } + + ccomplex_t get_cic_kspace( const vec3& x ) const{ + // warning! this doesn't work with MPI + size_t ix = static_cast(x.x); + size_t iy = static_cast(x.y); + size_t iz = std::min(static_cast(x.z),size(2)-1); //static_cast(x.z); + real_t dx = x.x-real_t(ix), tx = 1.0-dx; + real_t dy = x.y-real_t(iy), ty = 1.0-dy; + real_t dz = x.z-real_t(iz), tz = 1.0-dz; + size_t ix1 = (ix+1)%size(0); + size_t iy1 = (iy+1)%size(1); + size_t iz1 = std::min((iz+1),size(2)-1); + ccomplex_t val = 0.0; + val += this->kelem(ix ,iy ,iz ) * tx * ty * tz; + val += this->kelem(ix ,iy ,iz1) * tx * ty * dz; + val += this->kelem(ix ,iy1,iz ) * tx * dy * tz; + val += this->kelem(ix ,iy1,iz1) * tx * dy * dz; + val += this->kelem(ix1,iy ,iz ) * dx * ty * tz; + val += this->kelem(ix1,iy ,iz1) * dx * ty * dz; + val += this->kelem(ix1,iy1,iz ) * dx * dy * tz; + val += this->kelem(ix1,iy1,iz1) * dx * dy * dz; + // if( val != val ){ + //auto k = this->get_k(ix,iy,iz); + //std::cerr << ix << " " << iy << " " << iz << " " << val << " " << this->gradient(0,{ix,iy,iz}) << " " << this->gradient(1,{ix,iy,iz}) << " " << this->gradient(2,{ix,iy,iz}) << std::endl; + // } + return val; + } + inline ccomplex_t gradient( const int idim, std::array ijk ) const { #if defined(USE_MPI) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index bea2e30..7ba2a55 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -14,11 +14,217 @@ namespace particle{ //! implement Marcos et al. PLT calculation +class lattice_gradient{ +private: + const real_t boxlen_; + const size_t ngmapto_, ngrid_, ngrid32_; + const real_t mapratio_; + Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; + Grid_FFT grad_x_, grad_y_, grad_z_; + + void init_D() + { + const real_t eta = 2.0/ngrid_; // Ewald cutoff shall be 2 cells + const real_t alpha = 1.0/std::sqrt(2)/eta; + const real_t alpha2 = alpha*alpha; + const real_t alpha3 = alpha2*alpha; + const real_t sqrtpi = std::sqrt(M_PI); + const real_t pi32 = std::pow(M_PI,1.5); + + //! just a Kronecker \delta_ij + auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; + + //! just a sign function + auto sign = []( real_t x ) -> real_t { return (x<0.0)? -1.0 : 1.0; }; + + //! short range component of Ewald sum, eq. (A2) of Marcos (2008) + auto greensftide_sr = [&]( int mu, int nu, const vec3& vR, const vec3& vP ) -> real_t { + auto d = vR-vP; + auto r = d.norm(); + // if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later! + real_t val{0.0}; + val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r); + val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * + (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r); + return val; + }; + + //! sums mirrored copies of short-range component of Ewald sum + auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ + real_t sr = 0.0; + constexpr int N = 3; // number of repeated copies ±N per dimension + for( int i=-N; i<=N; ++i ){ + for( int j=-N; j<=N; ++j ){ + for( int k=-N; k<=N; ++k ){ + if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){ + sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); + } + } + } + } + return sr; + }; + + //! fill D_ij array with short range evaluated function + #pragma omp parallel for + for( size_t i=0; i p; + p.x = real_t(i)/ngrid_; + for( size_t j=0; j D; + vec3 eval, evec1, evec2, evec3; + + #pragma omp for + for( size_t i=0; i kv = D_xx_.get_k(i,j,k); + const real_t kmod2 = kv.norm_squared(); + + // long range component of Ewald sum + real_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2; + phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0 + + // assemble short-range + long_range of Ewald sum and add DC component to trace + D_xx_.kelem(i,j,k) = (D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac + 1.0/3.0; + D_xy_.kelem(i,j,k) = (D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac; + D_xz_.kelem(i,j,k) = (D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac; + D_yy_.kelem(i,j,k) = (D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac + 1.0/3.0; + D_yz_.kelem(i,j,k) = (D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac; + D_zz_.kelem(i,j,k) = (D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac + 1.0/3.0; + + } + } + } + + D_xx_.kelem(0,0,0) = 1.0/3.0; + D_xy_.kelem(0,0,0) = 0.0; + D_xz_.kelem(0,0,0) = 0.0; + D_yy_.kelem(0,0,0) = 1.0/3.0; + D_yz_.kelem(0,0,0) = 0.0; + D_zz_.kelem(0,0,0) = 1.0/3.0; + + #pragma omp for + for( size_t i=0; i kv = D_xx_.get_k(i,j,k); + const real_t kmod = kv.norm()/mapratio_/boxlen_; + + // put matrix elements into actual matrix + D = { std::real(D_xx_.kelem(i,j,k)), std::real(D_xy_.kelem(i,j,k)), std::real(D_xz_.kelem(i,j,k)), + std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) }; + + // compute eigenstructure of matrix + D.eigen(eval, evec1, evec2, evec3); + + // store in diagonal components of D_ij + // D_xx_.kelem(i,j,k) = (i!=D_xx_.size(0)/2)? ccomplex_t(0.0,kv.x/mapratio_/boxlen_) : 0.0; + // D_yy_.kelem(i,j,k) = (j!=D_yy_.size(1)/2)? ccomplex_t(0.0,kv.y/mapratio_/boxlen_) : 0.0; + // D_zz_.kelem(i,j,k) = (k!=D_zz_.size(2)-1)? ccomplex_t(0.0,kv.z/mapratio_/boxlen_) : 0.0; + // D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_); + // D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_); + // D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_); + + D_xx_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.x; + D_yy_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.y; + D_zz_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.z; + + if(std::fabs(kv.dot(evec3))>1e-16){ + D_xx_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm()); + D_yy_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm()); + D_zz_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm()); + } + } + } + } + } + + } + +public: + explicit lattice_gradient( real_t boxlen, size_t ngridother, size_t ngridself=64 ) + : boxlen_(boxlen), ngmapto_(ngridother), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), + D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}) + { + csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + double wtime = get_wtime(); + csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT lattice eigenmodes "<< std::flush; + + init_D(); + + csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + } + + inline ccomplex_t gradient( const int idim, std::array ijk ) const + { + real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; + // std::cerr << ix << " " << ijk[0] << std::endl; + if( idim== 0 ){ + return D_xx_.get_cic_kspace({ix,iy,iz}); + } + else if( idim==1){ + return D_yy_.get_cic_kspace({ix,iy,iz}); + } + return D_zz_.get_cic_kspace({ix,iy,iz}); + } + +}; + +#if 0 inline void test_plt( void ){ csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; csoca::ilog << "Testing PLT implementation..." << std::endl; + lattice_gradient lg( 64 ); + + return; + constexpr real_t pi = M_PI, twopi = 2.0*M_PI; const std::vector> bcc_normals{ @@ -29,7 +235,7 @@ inline void test_plt( void ){ const std::vector> bcc_reciprocal{ {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi} - }; + }; /*const std::vector> fcc_reciprocal{ {-2.,0.,2.}, {2.,0.,0.}, {1.,1.,-1.} @@ -78,6 +284,7 @@ inline void test_plt( void ){ rho.FourierTransformForward(); rho.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { real_t kmod = k.norm(); + std::cerr << x << std::endl; return -x * std::exp(-0.5*eta*eta*kmod*kmod) / (kmod*kmod); }); rho.zero_DC_mode(); @@ -246,36 +453,47 @@ inline void test_plt( void ){ if( scalar > 1.01 * amod2 ){ btest=false; break; } } if( btest ){ + // int is = (i>ngrid/2)? i-ngrid : i; + // int js = (j>ngrid/2)? j-ngrid : j; + // int ks = (k>ngrid/2)? k-ngrid : k; + vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid)/twopi); vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid)/twopi); vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid)/twopi); - ico[idx][0] = int((ar[0]+l1) * ngrid+0.5); - ico[idx][1] = int((ar[1]+l2) * ngrid+0.5); - ico[idx][2] = int((ar[2]+l3) * ngrid+0.5); - if( ico[idx][2] < 0 ){ - ico[idx][0] = -ico[idx][0]; - ico[idx][1] = -ico[idx][1]; - ico[idx][2] = -ico[idx][2]; - } + ico[idx][0] = std::round((ar[0]+l1) * ngrid); + ico[idx][1] = std::round((ar[1]+l2) * ngrid); + ico[idx][2] = std::round((ar[2]+l3) * ngrid); - ico[idx][0] = (ico[idx][0]+ngrid)%ngrid; - ico[idx][1] = (ico[idx][1]+ngrid)%ngrid; + assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] - vectk[idx][0] ) < 1e-12 ); + assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] - vectk[idx][1] ) < 1e-12 ); + assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][2]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][2]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][2] - vectk[idx][2] ) < 1e-12 ); + - if( vectk[idx][2] < 0 ){ - vectk[idx][0] = - vectk[idx][0]; - vectk[idx][1] = - vectk[idx][1]; - vectk[idx][2] = - vectk[idx][2]; - } + + // if( ico[idx][2] < 0 ){ + // ico[idx][0] = -ico[idx][0]; + // ico[idx][1] = -ico[idx][1]; + // ico[idx][2] = -ico[idx][2]; + // } - if( vecitk[idx][2] < 0 ){ - vecitk[idx][0] = -vecitk[idx][0]; - vecitk[idx][1] = -vecitk[idx][1]; - vecitk[idx][2] = -vecitk[idx][2]; - } - vecitk[idx][0] = (vecitk[idx][0]+ngrid)%ngrid; - vecitk[idx][1] = (vecitk[idx][1]+ngrid)%ngrid; - vecitk[idx][2] = (vecitk[idx][2]+ngrid)%ngrid; + // ico[idx][0] = (ico[idx][0]+ngrid)%ngrid; + // ico[idx][1] = (ico[idx][1]+ngrid)%ngrid; + + // if( vectk[idx][2] < 0 ){ + // vectk[idx][0] = - vectk[idx][0]; + // vectk[idx][1] = - vectk[idx][1]; + // vectk[idx][2] = - vectk[idx][2]; + // } + + // if( vecitk[idx][2] < 0 ){ + // vecitk[idx][0] = -vecitk[idx][0]; + // vecitk[idx][1] = -vecitk[idx][1]; + // vecitk[idx][2] = -vecitk[idx][2]; + // } + //vecitk[idx][0] = (vecitk[idx][0]+ngrid)%ngrid; + //vecitk[idx][1] = (vecitk[idx][1]+ngrid)%ngrid; + //vecitk[idx][2] = (vecitk[idx][2]+ngrid)%ngrid; @@ -285,8 +503,14 @@ inline void test_plt( void ){ //ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vectk[idx].x*(ngrid)/twopi << ", " << vectk[idx].y*(ngrid)/twopi << ", " << vectk[idx].z*(ngrid)/twopi << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; - ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; - //std::cout << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] << " " << vectk[idx][1] << std::endl; + ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; + ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk[idx].x << ", " << -vecitk[idx].y << ", " << -vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; + + // std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] << " " << vectk[idx][0] << std::endl; + + // std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] << " " << vectk[idx][0] << std::endl; + //std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] << " " << vectk[idx][1] << std::endl; + // assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] - vectk[idx][1] ) < 1e-12 ); goto endloop; } } @@ -294,9 +518,9 @@ inline void test_plt( void ){ } endloop: ; - D_xx.kelem(i,j,k) = D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]); + //D_xx.kelem(i,j,k) = D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]); // D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]) = D_xx.kelem(i,j,k); - // D_xx.kelem(i,j,k) = D_xx.kelem(i+vecitk[idx][0],j+vecitk[idx][1],k+vecitk[idx][2]); + //D_xx.kelem(i,j,k) = D_xx.kelem(i+vecitk[idx][0],j+vecitk[idx][1],k+vecitk[idx][2]); } } @@ -336,6 +560,6 @@ inline void test_plt( void ){ D_yz.Write_to_HDF5(filename, "e1_z"); } - +#endif } \ No newline at end of file diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 48d31dc..04682ad 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -7,6 +7,7 @@ #include #include +#include #include // for unlink @@ -164,6 +165,12 @@ int Run( ConfigFile& the_config ) // NaiveConvolver Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); //-------------------------------------------------------------------- + //-------------------------------------------------------------------- + // Create PLT gradient operator + //-------------------------------------------------------------------- + particle::lattice_gradient lg( boxlen, ngrid ); + + //-------------------------------------------------------------------- std::vector species_list; species_list.push_back( cosmo_species::dm ); if( bDoBaryons ) species_list.push_back( cosmo_species::baryon ); @@ -455,6 +462,7 @@ int Run( ConfigFile& the_config ) tmp.FourierTransformForward(false); // combine the various LPT potentials into one and take gradient + #if 0 // non PLT corrected version #pragma omp parallel for for (size_t i = 0; i < phi.size(0); ++i) { for (size_t j = 0; j < phi.size(1); ++j) { @@ -467,6 +475,21 @@ int Run( ConfigFile& the_config ) } } } + #else // non PLT corrected version + #pragma omp parallel for + for (size_t i = 0; i < phi.size(0); ++i) { + for (size_t j = 0; j < phi.size(1); ++j) { + for (size_t k = 0; k < phi.size(2); ++k) { + // std::cerr << i << " " << j << " " << k << " " << phi.gradient(idim,{i,j,k}) << " " << lg.gradient(idim,{i,j,k}) << std::endl; + size_t idx = phi.get_idx(i,j,k); + auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx); + // divide by Lbox, because displacement is in box units for output plugin + tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot + + lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) ); + } + } + } + #endif tmp.FourierTransformBackward(); // if we write particle data, store particle data in particle structure @@ -491,6 +514,7 @@ int Run( ConfigFile& the_config ) tmp.FourierTransformForward(false); + #if 0 // non PLT corrected version #pragma omp parallel for for (size_t i = 0; i < phi.size(0); ++i) { for (size_t j = 0; j < phi.size(1); ++j) { @@ -513,6 +537,30 @@ int Run( ConfigFile& the_config ) } } } + #else // PLT corrected version + #pragma omp parallel for + for (size_t i = 0; i < phi.size(0); ++i) { + for (size_t j = 0; j < phi.size(1); ++j) { + for (size_t k = 0; k < phi.size(2); ++k) { + size_t idx = phi.get_idx(i,j,k); + // divide by Lbox, because displacement is in box units for output plugin + auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx)); + + tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot_v + + vfac3 * (lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) ); + + if( bAddExternalTides ){ + // modify velocities with anisotropic expansion factor**2 + tmp.kelem(idx) *= std::pow(lss_aniso_alpha[idim],2.0); + } + // if( bSymplecticPT){ + // auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx); + // tmp.kelem(idx) = vunit*ccomplex_t(0.0,1.0) * (kk[idim] * phitot_v) + vfac1 * A3[idim]->kelem(idx); + // } + } + } + } + #endif tmp.FourierTransformBackward(); // if we write particle data, store particle data in particle structure diff --git a/src/main.cc b/src/main.cc index 01ee5be..c36943c 100644 --- a/src/main.cc +++ b/src/main.cc @@ -182,9 +182,9 @@ int main( int argc, char** argv ) /////////////////////////////////////////////////////////////////////// // do the job... /////////////////////////////////////////////////////////////////////// - // ic_generator::Run( the_config ); + ic_generator::Run( the_config ); - particle::test_plt(); + // particle::test_plt(); /////////////////////////////////////////////////////////////////////// #if defined(USE_MPI) From 88ac5ab19a4cbcd8726306f874975dc2ca4c3943 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Wed, 6 Nov 2019 11:44:32 +0100 Subject: [PATCH 023/130] fix PS normalisation with PLT proj correction --- include/particle_plt.hh | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 7ba2a55..91cd4d9 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -151,30 +151,24 @@ private: vec3 kv = D_xx_.get_k(i,j,k); const real_t kmod = kv.norm()/mapratio_/boxlen_; - // put matrix elements into actual matrix + // put matrix elements into actual matrix D = { std::real(D_xx_.kelem(i,j,k)), std::real(D_xy_.kelem(i,j,k)), std::real(D_xz_.kelem(i,j,k)), - std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) }; + std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) }; // compute eigenstructure of matrix D.eigen(eval, evec1, evec2, evec3); // store in diagonal components of D_ij - // D_xx_.kelem(i,j,k) = (i!=D_xx_.size(0)/2)? ccomplex_t(0.0,kv.x/mapratio_/boxlen_) : 0.0; - // D_yy_.kelem(i,j,k) = (j!=D_yy_.size(1)/2)? ccomplex_t(0.0,kv.y/mapratio_/boxlen_) : 0.0; - // D_zz_.kelem(i,j,k) = (k!=D_zz_.size(2)-1)? ccomplex_t(0.0,kv.z/mapratio_/boxlen_) : 0.0; - // D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_); - // D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_); - // D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_); + D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.x; + D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.y; + D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.z; - D_xx_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.x; - D_yy_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.y; - D_zz_.kelem(i,j,k) = sign(kv.dot(evec3)) * ccomplex_t(0.0,kmod) * evec3.z; + auto norm = (kv.norm()/kv.dot(evec3)); + if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; - if(std::fabs(kv.dot(evec3))>1e-16){ - D_xx_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm()); - D_yy_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm()); - D_zz_.kelem(i,j,k) /= (std::fabs(kv.dot(evec3))/kv.norm()); - } + D_xx_.kelem(i,j,k) *= norm; + D_yy_.kelem(i,j,k) *= norm; + D_zz_.kelem(i,j,k) *= norm; } } } From d075e496be9cc265c9b7a1aa4b7aa5653181c91d Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Wed, 6 Nov 2019 14:06:19 +0100 Subject: [PATCH 024/130] added correction of growth rates --- include/particle_plt.hh | 11 +++++++++++ src/ic_generator.cc | 3 +++ 2 files changed, 14 insertions(+) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 91cd4d9..81c5fcb 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -169,11 +169,16 @@ private: D_xx_.kelem(i,j,k) *= norm; D_yy_.kelem(i,j,k) *= norm; D_zz_.kelem(i,j,k) *= norm; + + // spatially dependent correction to vfact = \dot{D_+}/D_+ + D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*eval[2])-1.)); } } } } + D_xy_.kelem(0,0,0) = 1.0; + } public: @@ -207,6 +212,12 @@ public: return D_zz_.get_cic_kspace({ix,iy,iz}); } + inline ccomplex_t vfac_corr( std::array ijk ) const + { + real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; + return D_xy_.get_cic_kspace({ix,iy,iz}); + } + }; #if 0 diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 04682ad..90fbb0a 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -549,6 +549,9 @@ int Run( ConfigFile& the_config ) tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot_v + vfac3 * (lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) ); + // correct velocity with PLT mode growth rate + tmp.kelem(idx) *= lg.vfac_corr({i,j,k}); + if( bAddExternalTides ){ // modify velocities with anisotropic expansion factor**2 tmp.kelem(idx) *= std::pow(lss_aniso_alpha[idim],2.0); From dc2564994f9593866b91ba7e575151749e15ee4a Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Wed, 6 Nov 2019 17:55:09 +0100 Subject: [PATCH 025/130] minor cleanup --- include/particle_plt.hh | 44 +++++++++++++++++++++++++++++------------ src/ic_generator.cc | 43 +--------------------------------------- 2 files changed, 32 insertions(+), 55 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 81c5fcb..abf464a 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -8,6 +8,7 @@ #include +#include #include #include @@ -34,9 +35,6 @@ private: //! just a Kronecker \delta_ij auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; - //! just a sign function - auto sign = []( real_t x ) -> real_t { return (x<0.0)? -1.0 : 1.0; }; - //! short range component of Ewald sum, eq. (A2) of Marcos (2008) auto greensftide_sr = [&]( int mu, int nu, const vec3& vR, const vec3& vP ) -> real_t { auto d = vR-vP; @@ -182,8 +180,11 @@ private: } public: - explicit lattice_gradient( real_t boxlen, size_t ngridother, size_t ngridself=64 ) - : boxlen_(boxlen), ngmapto_(ngridother), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), + // real_t boxlen, size_t ngridother + explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 ) + : boxlen_( the_config.GetValue("setup", "BoxLength") ), + ngmapto_( the_config.GetValue("setup", "GridRes") ), + ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), @@ -191,8 +192,30 @@ public: grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}) { csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + std::string lattice_str = the_config.GetValueSafe("setup","ParticleLoad","sc"); + const lattice lattice_type = + ((lattice_str=="bcc")? lattice_bcc + : ((lattice_str=="fcc")? lattice_fcc + : ((lattice_str=="rsc")? lattice_rsc + : lattice_sc))); + + if( lattice_type != lattice_sc){ + csoca::elog << "PLT not implemented for chosen lattice type! Currently only SC." << std::endl; + abort(); + } + + csoca::ilog << "PLT corrections for SC lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl; + +#if defined(USE_MPI) + if( CONFIG::MPI_task_size>1 ) + { + csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl; + abort(); + } +#endif + double wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT lattice eigenmodes "<< std::flush; + csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; init_D(); @@ -202,13 +225,8 @@ public: inline ccomplex_t gradient( const int idim, std::array ijk ) const { real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; - // std::cerr << ix << " " << ijk[0] << std::endl; - if( idim== 0 ){ - return D_xx_.get_cic_kspace({ix,iy,iz}); - } - else if( idim==1){ - return D_yy_.get_cic_kspace({ix,iy,iz}); - } + if( idim == 0 ) return D_xx_.get_cic_kspace({ix,iy,iz}); + else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz}); return D_zz_.get_cic_kspace({ix,iy,iz}); } diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 90fbb0a..4184e86 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -168,7 +168,7 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- // Create PLT gradient operator //-------------------------------------------------------------------- - particle::lattice_gradient lg( boxlen, ngrid ); + particle::lattice_gradient lg( the_config ); //-------------------------------------------------------------------- std::vector species_list; @@ -462,25 +462,10 @@ int Run( ConfigFile& the_config ) tmp.FourierTransformForward(false); // combine the various LPT potentials into one and take gradient - #if 0 // non PLT corrected version #pragma omp parallel for for (size_t i = 0; i < phi.size(0); ++i) { for (size_t j = 0; j < phi.size(1); ++j) { for (size_t k = 0; k < phi.size(2); ++k) { - size_t idx = phi.get_idx(i,j,k); - auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx); - // divide by Lbox, because displacement is in box units for output plugin - tmp.kelem(idx) = lunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot - + phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) ); - } - } - } - #else // non PLT corrected version - #pragma omp parallel for - for (size_t i = 0; i < phi.size(0); ++i) { - for (size_t j = 0; j < phi.size(1); ++j) { - for (size_t k = 0; k < phi.size(2); ++k) { - // std::cerr << i << " " << j << " " << k << " " << phi.gradient(idim,{i,j,k}) << " " << lg.gradient(idim,{i,j,k}) << std::endl; size_t idx = phi.get_idx(i,j,k); auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx); // divide by Lbox, because displacement is in box units for output plugin @@ -489,7 +474,6 @@ int Run( ConfigFile& the_config ) } } } - #endif tmp.FourierTransformBackward(); // if we write particle data, store particle data in particle structure @@ -514,30 +498,6 @@ int Run( ConfigFile& the_config ) tmp.FourierTransformForward(false); - #if 0 // non PLT corrected version - #pragma omp parallel for - for (size_t i = 0; i < phi.size(0); ++i) { - for (size_t j = 0; j < phi.size(1); ++j) { - for (size_t k = 0; k < phi.size(2); ++k) { - size_t idx = phi.get_idx(i,j,k); - // divide by Lbox, because displacement is in box units for output plugin - auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx)); - - tmp.kelem(idx) = vunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot_v - + vfac3 * (phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) ); - - if( bAddExternalTides ){ - // modify velocities with anisotropic expansion factor**2 - tmp.kelem(idx) *= std::pow(lss_aniso_alpha[idim],2.0); - } - // if( bSymplecticPT){ - // auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx); - // tmp.kelem(idx) = vunit*ccomplex_t(0.0,1.0) * (kk[idim] * phitot_v) + vfac1 * A3[idim]->kelem(idx); - // } - } - } - } - #else // PLT corrected version #pragma omp parallel for for (size_t i = 0; i < phi.size(0); ++i) { for (size_t j = 0; j < phi.size(1); ++j) { @@ -563,7 +523,6 @@ int Run( ConfigFile& the_config ) } } } - #endif tmp.FourierTransformBackward(); // if we write particle data, store particle data in particle structure From 5de14003c00c72619b17b4d080ebcf6b4a14d5e3 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 14 Nov 2019 14:09:24 +0100 Subject: [PATCH 026/130] working commit --- include/particle_plt.hh | 191 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 182 insertions(+), 9 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index abf464a..5e926a8 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -25,6 +25,18 @@ private: void init_D() { + constexpr real_t pi = M_PI, twopi = 2.0*M_PI; + + const std::vector> bcc_normals{ + {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, + {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, + {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} + }; + + const std::vector> bcc_reciprocal{ + {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi} + }; + const real_t eta = 2.0/ngrid_; // Ewald cutoff shall be 2 cells const real_t alpha = 1.0/std::sqrt(2)/eta; const real_t alpha2 = alpha*alpha; @@ -56,6 +68,17 @@ private: for( int k=-N; k<=N; ++k ){ if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){ sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); + //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ) * 0.5; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} ) * 0.5; + + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; } } } @@ -88,6 +111,7 @@ private: D_yy_.relem(0,0,0) = 0.0; D_yz_.relem(0,0,0) = 0.0; D_zz_.relem(0,0,0) = 0.0; + // Fourier transform all six components D_xx_.FourierTransformForward(); @@ -114,19 +138,42 @@ private: for( size_t k=0; k kv = D_xx_.get_k(i,j,k); + auto& b=bcc_reciprocal; + vec3 kvc = { b[0][0]*kvc[0]+b[1][0]*kvc[1]+b[2][0]*kvc[2], + b[0][1]*kvc[0]+b[1][1]*kvc[1]+b[2][1]*kvc[2], + b[0][2]*kvc[0]+b[1][2]*kvc[1]+b[2][2]*kvc[2] }; + // vec3 kv = {kvc.dot(bcc_reciprocal[0]),kvc.dot(bcc_reciprocal[1]),kvc.dot(bcc_reciprocal[2])}; const real_t kmod2 = kv.norm_squared(); // long range component of Ewald sum - real_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2; + ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0])); + ccomplex_t phi0 = -rho0 * (0.5+0.5*shift) * std::exp(-0.5*eta*eta*kmod2) / kmod2; phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0 + + // const int nn = 3; + // size_t nsum = 0; + // ccomplex_t ff = 0.0; + // for( int is=-nn;is<=nn;is++){ + // for( int js=-nn;js<=nn;js++){ + // for( int ks=-nn;ks<=nn;ks++){ + // if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){ + // ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2])))); + // ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2])))); + // ++nsum; + // } + // } + // } + // } + // ff /= nsum; + ccomplex_t ff = 1.0; //(0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])))); // assemble short-range + long_range of Ewald sum and add DC component to trace - D_xx_.kelem(i,j,k) = (D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac + 1.0/3.0; - D_xy_.kelem(i,j,k) = (D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac; - D_xz_.kelem(i,j,k) = (D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac; - D_yy_.kelem(i,j,k) = (D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac + 1.0/3.0; - D_yz_.kelem(i,j,k) = (D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac; - D_zz_.kelem(i,j,k) = (D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac + 1.0/3.0; + D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0; + D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac); + D_xz_.kelem(i,j,k) = ff*((D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac); + D_yy_.kelem(i,j,k) = ff*((D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac) + 1.0/3.0; + D_yz_.kelem(i,j,k) = ff*((D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac); + D_zz_.kelem(i,j,k) = ff*((D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac) + 1.0/3.0; } } @@ -163,20 +210,145 @@ private: auto norm = (kv.norm()/kv.dot(evec3)); if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; - +#ifdef PRODUCTION D_xx_.kelem(i,j,k) *= norm; D_yy_.kelem(i,j,k) *= norm; D_zz_.kelem(i,j,k) *= norm; // spatially dependent correction to vfact = \dot{D_+}/D_+ D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*eval[2])-1.)); +#else + + D_xx_.kelem(i,j,k) = eval[2]; + D_yy_.kelem(i,j,k) = eval[1]; + D_zz_.kelem(i,j,k) = eval[0]; + + D_xy_.kelem(i,j,k) = evec3[0]; + D_xz_.kelem(i,j,k) = evec3[1]; + D_yz_.kelem(i,j,k) = evec3[2]; +#endif } } } } - +#ifdef PRODUCTION D_xy_.kelem(0,0,0) = 1.0; +#endif + ////////////////////////////////////////// + std::string filename("plt_test.hdf5"); + unlink(filename.c_str()); + #if defined(USE_MPI) + MPI_Barrier(MPI_COMM_WORLD); + #endif + // rho.Write_to_HDF5(filename, "rho"); + D_xx_.Write_to_HDF5(filename, "omega1"); + D_yy_.Write_to_HDF5(filename, "omega2"); + D_zz_.Write_to_HDF5(filename, "omega3"); + D_xy_.Write_to_HDF5(filename, "e1_x"); + D_xz_.Write_to_HDF5(filename, "e1_y"); + D_yz_.Write_to_HDF5(filename, "e1_z"); + + } + + + void compute_vectk( ) + { + constexpr real_t pi = M_PI, twopi = 2.0*M_PI; + + const std::vector> bcc_normals{ + {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, + {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, + {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} + }; + + const std::vector> bcc_reciprocal{ + {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi} + }; + + std::vector> vectk; + std::vector> ico, vecitk; + vectk.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3()); + ico.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3()); + vecitk.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3()); + + std::ofstream ofs2("test_brillouin.txt"); + + const int numb = 1; + for( size_t i=0; i D; + vec3 eval, evec1, evec2, evec3; + vec3 a({0.,0.,0.}); + + for( size_t j=0; j ar = D_xx_.get_k(i,j,k) / (twopi*ngrid_); + vec3 kv = D_xx_.get_k(i,j,k); + + for( int l=0; l<3; l++ ){ + a[l] = 0.0; + for( int m=0; m<3; m++){ + // project k on reciprocal basis + a[l] += ar[m]*bcc_reciprocal[m][l]; + } + } + + // translate the k-vectors into the "candidate" FBZ + vec3 anum; + for( int l1=-numb; l1<=numb; ++l1 ){ + anum[0] = real_t(l1); + for( int l2=-numb; l2<=numb; ++l2 ){ + anum[1] = real_t(l2); + for( int l3=-numb; l3<=numb; ++l3 ){ + anum[2] = real_t(l3); + + vectk[idx] = a; + + for( int l=0; l<3; l++ ){ + for( int m=0; m<3; m++){ + // project k on reciprocal basis + vectk[idx][l] += anum[m]*bcc_reciprocal[m][l]; + } + } + // check if in first Brillouin zone + bool btest=true; + for( size_t l=0; l amod*1.0001 ){ btest=false; break; } + if( scalar > 1.01 * amod2 ){ btest=false; break; } + } + if( btest ){ + + vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid_)/twopi); + vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid_)/twopi); + vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid_)/twopi); + + ico[idx][0] = std::round((ar[0]+l1) * ngrid_); + ico[idx][1] = std::round((ar[1]+l2) * ngrid_); + ico[idx][2] = std::round((ar[2]+l3) * ngrid_); + + ofs2 << vectk[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; + // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; + // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk[idx].x << ", " << -vecitk[idx].y << ", " << -vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; + + goto endloop; + } + } + } + } + endloop: ; + } + } + } } public: @@ -218,6 +390,7 @@ public: csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; init_D(); + compute_vectk(); csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; } From 675ba19a955fca9e6ecbaf7185bbdf777b6a82ad Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 14 Nov 2019 15:36:39 +0100 Subject: [PATCH 027/130] working commit, fft grids can be non-distributed memory even when using MPI --- include/grid_fft.hh | 183 +++++++++++++---------- include/particle_plt.hh | 40 ++--- src/grid_fft.cc | 315 ++++++++++++++++++++-------------------- src/ic_generator.cc | 10 +- 4 files changed, 290 insertions(+), 258 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 00d2cb0..c7bec8f 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -16,16 +16,23 @@ enum space_t }; -template +#ifdef USE_MPI +template +#else +template +#endif class Grid_FFT { protected: #if defined(USE_MPI) - const MPI_Datatype MPI_data_t_type = (typeid(data_t) == typeid(double)) ? MPI_DOUBLE - : (typeid(data_t) == typeid(float)) ? MPI_FLOAT - : (typeid(data_t) == typeid(std::complex)) ? MPI_COMPLEX - : (typeid(data_t) == typeid(std::complex)) ? MPI_DOUBLE_COMPLEX : MPI_INT; + const MPI_Datatype MPI_data_t_type = + (typeid(data_t) == typeid(double)) ? MPI_DOUBLE + : (typeid(data_t) == typeid(float)) ? MPI_FLOAT + : (typeid(data_t) == typeid(std::complex)) ? MPI_COMPLEX + : (typeid(data_t) == typeid(std::complex)) ? MPI_DOUBLE_COMPLEX + : MPI_INT; #endif + using grid_fft_t = Grid_FFT; public: std::array n_, nhalf_; std::array sizes_; @@ -54,7 +61,7 @@ public: } // avoid implicit copying of data - Grid_FFT(const Grid_FFT &g) = delete; + Grid_FFT(const grid_fft_t &g) = delete; ~Grid_FFT() { @@ -64,7 +71,7 @@ public: } } - const Grid_FFT *get_grid(size_t ilevel) const { return this; } + const grid_fft_t *get_grid(size_t ilevel) const { return this; } void Setup(); @@ -91,7 +98,7 @@ public: data_[i] = 0.0; } - void copy_from(const Grid_FFT &g) + void copy_from(const grid_fft_t &g) { // make sure the two fields are in the same space if (g.space_ != this->space_) @@ -217,20 +224,34 @@ public: vec3 get_k(const size_t i, const size_t j, const size_t k) const { vec3 kk; - -#if defined(USE_MPI) - auto ip = i + local_1_start_; - kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0]; - kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1]; -#else - kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0]; - kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1]; -#endif + if( bdistributed ){ + auto ip = i + local_1_start_; + kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0]; + kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1]; + }else{ + kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0]; + kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1]; + } kk[2] = (real_t(k) - real_t(k > nhalf_[2]) * n_[2]) * kfac_[2]; return kk; } + std::array get_k3(const size_t i, const size_t j, const size_t k) const + { + return bdistributed? std::array({j,i+local_1_start_,k}) : std::array({i,j,k}); + // vec3 kk; + // if( bdistributed ){ + // kk[0] = j; + // kk[1] = i + local_1_start_; + // }else{ + // kk[0] = i; + // kk[1] = j; + // } + // kk[2] = k; + // return kk; + } + data_t get_cic( const vec3& v ) const{ // warning! this doesn't work with MPI vec3 x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0], @@ -286,16 +307,16 @@ public: inline ccomplex_t gradient( const int idim, std::array ijk ) const { -#if defined(USE_MPI) - ijk[0] += local_1_start_; - std::swap(ijk[0],ijk[1]); -#endif + if( bdistributed ){ + ijk[0] += local_1_start_; + std::swap(ijk[0],ijk[1]); + } real_t rgrad = (ijk[idim]!=nhalf_[idim])? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_[idim]) * n_[idim]) * kfac_[idim] : 0.0; return ccomplex_t(0.0,rgrad); } - Grid_FFT &operator*=(data_t x) + grid_fft_t &operator*=(data_t x) { if (space_ == kspace_id) { @@ -308,7 +329,7 @@ public: return *this; } - Grid_FFT &operator/=(data_t x) + grid_fft_t &operator/=(data_t x) { if (space_ == kspace_id) { @@ -321,7 +342,7 @@ public: return *this; } - Grid_FFT &apply_Laplacian(void) + grid_fft_t &apply_Laplacian(void) { this->FourierTransformForward(); this->apply_function_k_dep([&](auto x, auto k) { @@ -332,7 +353,7 @@ public: return *this; } - Grid_FFT &apply_negative_Laplacian(void) + grid_fft_t &apply_negative_Laplacian(void) { this->FourierTransformForward(); this->apply_function_k_dep([&](auto x, auto k) { @@ -343,7 +364,7 @@ public: return *this; } - Grid_FFT &apply_InverseLaplacian(void) + grid_fft_t &apply_InverseLaplacian(void) { this->FourierTransformForward(); this->apply_function_k_dep([&](auto x, auto k) { @@ -391,8 +412,7 @@ public: double compute_2norm(void) { real_t sum1{0.0}; -#pragma omp parallel for reduction(+ \ - : sum1) + #pragma omp parallel for reduction(+ : sum1) for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -416,8 +436,7 @@ public: double sum1{0.0}, sum2{0.0}; size_t count{0}; -#pragma omp parallel for reduction(+ \ - : sum1, sum2) + #pragma omp parallel for reduction(+ : sum1, sum2) for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -433,24 +452,26 @@ public: count = sizes_[0] * sizes_[1] * sizes_[2]; #ifdef USE_MPI - double globsum1{0.0}, globsum2{0.0}; - size_t globcount{0}; + if( bdistributed ){ + double globsum1{0.0}, globsum2{0.0}; + size_t globcount{0}; - MPI_Allreduce(reinterpret_cast(&sum1), - reinterpret_cast(&globsum1), - 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&sum1), + reinterpret_cast(&globsum1), + 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(reinterpret_cast(&sum2), - reinterpret_cast(&globsum2), - 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&sum2), + reinterpret_cast(&globsum2), + 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(reinterpret_cast(&count), - reinterpret_cast(&globcount), - 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&count), + reinterpret_cast(&globcount), + 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); - sum1 = globsum1; - sum2 = globsum2; - count = globcount; + sum1 = globsum1; + sum2 = globsum2; + count = globcount; + } #endif sum1 /= count; sum2 /= count; @@ -463,8 +484,7 @@ public: double sum1{0.0}; size_t count{0}; -#pragma omp parallel for reduction(+ \ - : sum1) + #pragma omp parallel for reduction(+ : sum1) for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -479,19 +499,21 @@ public: count = sizes_[0] * sizes_[1] * sizes_[2]; #ifdef USE_MPI - double globsum1{0.0}; - size_t globcount{0}; + if( bdistributed ){ + double globsum1{0.0}; + size_t globcount{0}; - MPI_Allreduce(reinterpret_cast(&sum1), - reinterpret_cast(&globsum1), - 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&sum1), + reinterpret_cast(&globsum1), + 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(reinterpret_cast(&count), - reinterpret_cast(&globcount), - 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&count), + reinterpret_cast(&globcount), + 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); - sum1 = globsum1; - count = globcount; + sum1 = globsum1; + count = globcount; + } #endif sum1 /= count; @@ -502,9 +524,9 @@ public: template void assign_function_of_grids_r(const functional &f, const grid_t &g) { - assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) ); + assert(g.size(0) == size(0) && g.size(1) == size(1)); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -523,10 +545,10 @@ public: template void assign_function_of_grids_r(const functional &f, const grid1_t &g1, const grid2_t &g2) { - assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g1.size(2) == size(2)); - assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2)); + assert(g1.size(0) == size(0) && g1.size(1) == size(1)); + assert(g2.size(0) == size(0) && g2.size(1) == size(1)); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -552,7 +574,7 @@ public: assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2)); assert(g3.size(0) == size(0) && g3.size(1) == size(1)); // && g3.size(2) == size(2)); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -577,7 +599,7 @@ public: { assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) ); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -599,7 +621,7 @@ public: assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) ); assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) ); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -621,7 +643,7 @@ public: { assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) ); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -643,7 +665,7 @@ public: assert(g1.size(0) == size(0) && g1.size(1) == size(1) && g1.size(2) == size(2) ); assert(g2.size(0) == size(0) && g2.size(1) == size(1) && g2.size(2) == size(2) ); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < size(0); ++i) { for (size_t j = 0; j < size(1); ++j) @@ -663,7 +685,7 @@ public: template void apply_function_k_dep(const functional &f) { -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -680,7 +702,7 @@ public: template void apply_function_r_dep(const functional &f) { -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -714,11 +736,12 @@ public: { FourierTransformForward(); apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { -#ifdef WITH_MPI - real_t shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; -#else - real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; -#endif + real_t shift; + if( bdistributed ){ + shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; + }else{ + shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; + } return x * std::exp(ccomplex_t(0.0, shift)); }); if( transform_back ){ @@ -730,9 +753,7 @@ public: { if (space_ == kspace_id) { -#ifdef USE_MPI - if (CONFIG::MPI_task_rank == 0) -#endif + if (CONFIG::MPI_task_rank == 0 || !bdistributed ) cdata_[0] = (data_t)0.0; } else @@ -749,12 +770,14 @@ public: } } } + if( bdistributed ){ #if defined(USE_MPI) - data_t glob_sum = 0.0; - MPI_Allreduce(reinterpret_cast(&sum), reinterpret_cast(&glob_sum), - 1, GetMPIDatatype(), MPI_SUM, MPI_COMM_WORLD); - sum = glob_sum; + data_t glob_sum = 0.0; + MPI_Allreduce(reinterpret_cast(&sum), reinterpret_cast(&glob_sum), + 1, GetMPIDatatype(), MPI_SUM, MPI_COMM_WORLD); + sum = glob_sum; #endif + } sum /= sizes_[0] * sizes_[1] * sizes_[2]; #pragma omp parallel for diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 5e926a8..774d3ba 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -12,6 +12,8 @@ #include #include +#define PRODUCTION + namespace particle{ //! implement Marcos et al. PLT calculation @@ -20,8 +22,8 @@ private: const real_t boxlen_; const size_t ngmapto_, ngrid_, ngrid32_; const real_t mapratio_; - Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; - Grid_FFT grad_x_, grad_y_, grad_z_; + Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; + Grid_FFT grad_x_, grad_y_, grad_z_; void init_D() { @@ -146,8 +148,8 @@ private: const real_t kmod2 = kv.norm_squared(); // long range component of Ewald sum - ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0])); - ccomplex_t phi0 = -rho0 * (0.5+0.5*shift) * std::exp(-0.5*eta*eta*kmod2) / kmod2; + //ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0])); + ccomplex_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2; phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0 @@ -322,7 +324,7 @@ private: amod2 += bcc_normals[l][m]*bcc_normals[l][m]; scalar += bcc_normals[l][m]*vectk[idx][m]; } - real_t amod = std::sqrt(amod2); + //real_t amod = std::sqrt(amod2); //if( scalar/amod > amod*1.0001 ){ btest=false; break; } if( scalar > 1.01 * amod2 ){ btest=false; break; } } @@ -378,13 +380,13 @@ public: csoca::ilog << "PLT corrections for SC lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl; -#if defined(USE_MPI) - if( CONFIG::MPI_task_size>1 ) - { - csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl; - abort(); - } -#endif +// #if defined(USE_MPI) +// if( CONFIG::MPI_task_size>1 ) +// { +// csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl; +// abort(); +// } +// #endif double wtime = get_wtime(); csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; @@ -454,7 +456,7 @@ inline void test_plt( void ){ const real_t pi3halfs = std::pow(M_PI,1.5); const real_t dV( std::pow( boxlen/ngrid, 3 ) ); - Grid_FFT rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; @@ -511,12 +513,12 @@ inline void test_plt( void ){ return sr; }; - Grid_FFT D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + Grid_FFT D_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); #pragma omp parallel for for( size_t i=0; i #include -template -void Grid_FFT::FillRandomReal(unsigned long int seed) +template +void Grid_FFT::FillRandomReal(unsigned long int seed) { gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937); -#if defined(USE_MPI) - seed += 17321 * CONFIG::MPI_task_rank; -#endif + if( bdistributed ){ + seed += 17321 * CONFIG::MPI_task_rank; + } gsl_rng_set(RNG, seed); for (size_t i = 0; i < sizes_[0]; ++i) @@ -28,166 +28,169 @@ void Grid_FFT::FillRandomReal(unsigned long int seed) gsl_rng_free(RNG); } -template -void Grid_FFT::Setup(void) +template +void Grid_FFT::Setup(void) { -#if !defined(USE_MPI) //////////////////////////////////////////////////////////////////////////////////////////// + if( !bdistributed ){ + ntot_ = (n_[2] + 2) * n_[1] * n_[0]; - ntot_ = (n_[2] + 2) * n_[1] * n_[0]; + csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); + if (typeid(data_t) == typeid(real_t)) + { + data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(real_t))); + cdata_ = reinterpret_cast(data_); - csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); - if (typeid(data_t) == typeid(real_t)) - { - data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(real_t))); - cdata_ = reinterpret_cast(data_); + plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE); + iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE); + } + else if (typeid(data_t) == typeid(ccomplex_t)) + { + data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(ccomplex_t))); + cdata_ = reinterpret_cast(data_); - plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE); - iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE); - } - else if (typeid(data_t) == typeid(ccomplex_t)) - { - data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(ccomplex_t))); - cdata_ = reinterpret_cast(data_); + plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE); + iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE); + } + else + { + csoca::elog.Print("invalid data type in Grid_FFT::setup_fft_interface\n"); + } - plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE); - iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE); + fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2])); + + if (typeid(data_t) == typeid(real_t)) + { + npr_ = n_[2] + 2; + npc_ = n_[2] / 2 + 1; + } + else + { + npr_ = n_[2]; + npc_ = n_[2]; + } + + for (int i = 0; i < 3; ++i) + { + nhalf_[i] = n_[i] / 2; + kfac_[i] = 2.0 * M_PI / length_[i]; + dx_[i] = length_[i] / n_[i]; + + global_range_.x1_[i] = 0; + global_range_.x2_[i] = n_[i]; + } + + local_0_size_ = n_[0]; + local_1_size_ = n_[1]; + local_0_start_ = 0; + local_1_start_ = 0; + + if (space_ == rspace_id) + { + sizes_[0] = n_[0]; + sizes_[1] = n_[1]; + sizes_[2] = n_[2]; + sizes_[3] = npr_; + } + else + { + sizes_[0] = n_[1]; + sizes_[1] = n_[0]; + sizes_[2] = npc_; + sizes_[3] = npc_; + } } else { - csoca::elog.Print("invalid data type in Grid_FFT::setup_fft_interface\n"); - } +#ifdef USE_MPI //// i.e. ifdef USE_MPI //////////////////////////////////////////////////////////////////////////////////// + size_t cmplxsz; - fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2])); + if (typeid(data_t) == typeid(real_t)) + { + cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD, + &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); + ntot_ = 2 * cmplxsz; + data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t)); + cdata_ = reinterpret_cast(data_); + plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, + MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); + iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, + MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); + } + else if (typeid(data_t) == typeid(ccomplex_t)) + { + cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD, + &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); + ntot_ = cmplxsz; + data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t)); + cdata_ = reinterpret_cast(data_); + plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, + MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); + iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, + MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); + } + else + { + csoca::elog.Print("unknown data type in Grid_FFT::setup_fft_interface\n"); + abort(); + } - if (typeid(data_t) == typeid(real_t)) - { - npr_ = n_[2] + 2; - npc_ = n_[2] / 2 + 1; - } - else - { - npr_ = n_[2]; - npc_ = n_[2]; - } + csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); + fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]); - for (int i = 0; i < 3; ++i) - { - nhalf_[i] = n_[i] / 2; - kfac_[i] = 2.0 * M_PI / length_[i]; - dx_[i] = length_[i] / n_[i]; + if (typeid(data_t) == typeid(real_t)) + { + npr_ = n_[2] + 2; + npc_ = n_[2] / 2 + 1; + } + else + { + npr_ = n_[2]; + npc_ = n_[2]; + } - global_range_.x1_[i] = 0; - global_range_.x2_[i] = n_[i]; - } + for (int i = 0; i < 3; ++i) + { + nhalf_[i] = n_[i] / 2; + kfac_[i] = 2.0 * M_PI / length_[i]; + dx_[i] = length_[i] / n_[i]; - local_0_size_ = n_[0]; - local_1_size_ = n_[1]; - local_0_start_ = 0; - local_1_start_ = 0; - - if (space_ == rspace_id) - { - sizes_[0] = n_[0]; - sizes_[1] = n_[1]; - sizes_[2] = n_[2]; - sizes_[3] = npr_; - } - else - { - sizes_[0] = n_[1]; - sizes_[1] = n_[0]; - sizes_[2] = npc_; - sizes_[3] = npc_; - } - -#else //// i.e. ifdef USE_MPI //////////////////////////////////////////////////////////////////////////////////// - - size_t cmplxsz; - - if (typeid(data_t) == typeid(real_t)) - { - cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD, - &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); - ntot_ = 2 * cmplxsz; - data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t)); - cdata_ = reinterpret_cast(data_); - plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, - MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); - iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, - MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); - } - else if (typeid(data_t) == typeid(ccomplex_t)) - { - cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD, - &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); - ntot_ = cmplxsz; - data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t)); - cdata_ = reinterpret_cast(data_); - plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, - MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); - iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, - MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); - } - else - { - csoca::elog.Print("unknown data type in Grid_FFT::setup_fft_interface\n"); - abort(); - } - - csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); - fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]); - - if (typeid(data_t) == typeid(real_t)) - { - npr_ = n_[2] + 2; - npc_ = n_[2] / 2 + 1; - } - else - { - npr_ = n_[2]; - npc_ = n_[2]; - } - - for (int i = 0; i < 3; ++i) - { - nhalf_[i] = n_[i] / 2; - kfac_[i] = 2.0 * M_PI / length_[i]; - dx_[i] = length_[i] / n_[i]; - - global_range_.x1_[i] = 0; - global_range_.x2_[i] = n_[i]; - } - global_range_.x1_[0] = (int)local_0_start_; - global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_); - - if (space_ == rspace_id) - { - sizes_[0] = (int)local_0_size_; - sizes_[1] = n_[1]; - sizes_[2] = n_[2]; - sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension - } - else - { - sizes_[0] = (int)local_1_size_; - sizes_[1] = n_[0]; - sizes_[2] = npc_; - sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension - } + global_range_.x1_[i] = 0; + global_range_.x2_[i] = n_[i]; + } + global_range_.x1_[0] = (int)local_0_start_; + global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_); + if (space_ == rspace_id) + { + sizes_[0] = (int)local_0_size_; + sizes_[1] = n_[1]; + sizes_[2] = n_[2]; + sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension + } + else + { + sizes_[0] = (int)local_1_size_; + sizes_[1] = n_[0]; + sizes_[2] = npc_; + sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension + } +#else + csoca::flog << "MPI is required for distributed FFT arrays!" << std::endl; + throw std::runtime_error("MPI is required for distributed FFT arrays!"); #endif //// of #ifdef #else USE_MPI //////////////////////////////////////////////////////////////////////////////////// + } } -template -void Grid_FFT::ApplyNorm(void) +template +void Grid_FFT::ApplyNorm(void) { #pragma omp parallel for for (size_t i = 0; i < ntot_; ++i) data_[i] *= fft_norm_fac_; } -template -void Grid_FFT::FourierTransformForward(bool do_transform) +template +void Grid_FFT::FourierTransformForward(bool do_transform) { #if defined(USE_MPI) MPI_Barrier(MPI_COMM_WORLD); @@ -217,8 +220,8 @@ void Grid_FFT::FourierTransformForward(bool do_transform) } } -template -void Grid_FFT::FourierTransformBackward(bool do_transform) +template +void Grid_FFT::FourierTransformBackward(bool do_transform) { #if defined(USE_MPI) MPI_Barrier(MPI_COMM_WORLD); @@ -269,8 +272,8 @@ void create_hdf5(std::string Filename) H5Fclose(HDF_FileID); } -template -void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) const +template +void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) const { hid_t file_id, dset_id; /* file and dataset identifiers */ hid_t filespace, memspace; /* file and memory dataspace identifiers */ @@ -278,6 +281,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) hid_t dtype_id = H5T_NATIVE_FLOAT; hid_t plist_id; + #warning "check if this works for non-distributed fft arrays with MPI" + #if defined(USE_MPI) int mpi_size, mpi_rank; @@ -500,8 +505,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) #include -template -void Grid_FFT::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax) +template +void Grid_FFT::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax) { double logvmin = std::log10(vmin); double logvmax = std::log10(vmax); @@ -552,8 +557,8 @@ void Grid_FFT::Write_PDF(std::string ofname, int nbins, double scale, do #endif } -template -void Grid_FFT::Write_PowerSpectrum(std::string ofname) +template +void Grid_FFT::Write_PowerSpectrum(std::string ofname) { std::vector bin_k, bin_P, bin_eP; std::vector bin_count; @@ -582,8 +587,8 @@ void Grid_FFT::Write_PowerSpectrum(std::string ofname) #endif } -template -void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count ) +template +void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count ) { this->FourierTransformForward(); @@ -663,5 +668,7 @@ void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::ve /********************************************************************************************/ -template class Grid_FFT; -template class Grid_FFT; +template class Grid_FFT; +template class Grid_FFT; +template class Grid_FFT; +template class Grid_FFT; diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 4184e86..4394947 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -469,8 +469,8 @@ int Run( ConfigFile& the_config ) size_t idx = phi.get_idx(i,j,k); auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx); // divide by Lbox, because displacement is in box units for output plugin - tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot - + lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) ); + tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot + + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) ); } } } @@ -506,11 +506,11 @@ int Run( ConfigFile& the_config ) // divide by Lbox, because displacement is in box units for output plugin auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx)); - tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,{i,j,k}) * phitot_v - + vfac3 * (lg.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) ); + tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v + + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) ); // correct velocity with PLT mode growth rate - tmp.kelem(idx) *= lg.vfac_corr({i,j,k}); + tmp.kelem(idx) *= lg.vfac_corr(tmp.get_k3(i,j,k)); if( bAddExternalTides ){ // modify velocities with anisotropic expansion factor**2 From 07b430f25cf3536df85d49ce4ac586363eceaf21 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 15 Nov 2019 11:54:16 +0100 Subject: [PATCH 028/130] removed unused code to fill grids with random numbers --- include/grid_fft.hh | 10 ---------- src/grid_fft.cc | 26 -------------------------- 2 files changed, 36 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index c7bec8f..49584eb 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -240,16 +240,6 @@ public: std::array get_k3(const size_t i, const size_t j, const size_t k) const { return bdistributed? std::array({j,i+local_1_start_,k}) : std::array({i,j,k}); - // vec3 kk; - // if( bdistributed ){ - // kk[0] = j; - // kk[1] = i + local_1_start_; - // }else{ - // kk[0] = i; - // kk[1] = j; - // } - // kk[2] = k; - // return kk; } data_t get_cic( const vec3& v ) const{ diff --git a/src/grid_fft.cc b/src/grid_fft.cc index d2d2c6b..b7e3f52 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -2,32 +2,6 @@ #include #include -#include -#include - -template -void Grid_FFT::FillRandomReal(unsigned long int seed) -{ - gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937); - if( bdistributed ){ - seed += 17321 * CONFIG::MPI_task_rank; - } - gsl_rng_set(RNG, seed); - - for (size_t i = 0; i < sizes_[0]; ++i) - { - for (size_t j = 0; j < sizes_[1]; ++j) - { - for (size_t k = 0; k < sizes_[2]; ++k) - { - this->relem(i, j, k) = gsl_ran_ugaussian_ratio_method(RNG); - } - } - } - - gsl_rng_free(RNG); -} - template void Grid_FFT::Setup(void) { From 89ec1775f3689b6d8f6c8ddcb21584b7fcd34d23 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 15 Nov 2019 22:32:09 +0100 Subject: [PATCH 029/130] fixed inconsistency between NGENIC random numbers generated with MPI and without --- include/grid_fft.hh | 2 ++ src/plugins/random_ngenic.cc | 35 ++++++++++++++++++++++++++--------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 49584eb..88a938f 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -73,6 +73,8 @@ public: const grid_fft_t *get_grid(size_t ilevel) const { return this; } + bool is_distributed( void ) const { return bdistributed; } + void Setup(); //! return the (local) size of dimension i diff --git a/src/plugins/random_ngenic.cc b/src/plugins/random_ngenic.cc index 1aa1942..b84221e 100644 --- a/src/plugins/random_ngenic.cc +++ b/src/plugins/random_ngenic.cc @@ -82,7 +82,11 @@ public: for (size_t j = 0; j < nres_; ++j) { ptrdiff_t jj = (j>0)? nres_ - j : 0; - gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]); + if( g.is_distributed() ) + gsl_rng_set( pRandomGenerator_, SeedTable_[j * nres_ + i]); + else + gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]); + for (size_t k = 0; k < g.size(2); ++k) { double phase = gsl_rng_uniform(pRandomGenerator_) * 2 * M_PI; @@ -101,15 +105,28 @@ public: if (k > 0) { if (i_in_range) g.kelem(ip,j,k) = zrand; } else{ /* k=0 plane needs special treatment */ - if (i == 0) { - if (j < nres_ / 2 && i_in_range) - { - g.kelem(ip,j,k) = zrand; - g.kelem(ip,jj,k) = std::conj(zrand); + if( g.is_distributed() ){ + if (j == 0) { + if (i < nres_ / 2 && i_in_range) + { + if(i_in_range) g.kelem(ip,jj,k) = zrand; + if(ii_in_range) g.kelem(iip,j,k) = std::conj(zrand); + } + } else if (j < nres_ / 2) { + if(i_in_range) g.kelem(ip,j,k) = zrand; + if(ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand); + } + }else{ + if (i == 0) { + if (j < nres_ / 2 && i_in_range) + { + g.kelem(ip,j,k) = zrand; + g.kelem(ip,jj,k) = std::conj(zrand); + } + } else if (i < nres_ / 2) { + if(i_in_range) g.kelem(ip,j,k) = zrand; + if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand); } - } else if (i < nres_ / 2) { - if(i_in_range) g.kelem(ip,j,k) = zrand; - if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand); } } } From 9a36cc13fd1f3bf45c7ddb5d7124e6c155f11e14 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 15 Nov 2019 23:19:57 +0100 Subject: [PATCH 030/130] unified calculation of growth factor and growth rate --- include/cosmology_calculator.hh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index f74039d..ea51299 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -127,14 +127,19 @@ public: return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect); } - inline static double H_of_a(double a, void *Params) + inline static double H_of_a(double a, const void *Params) { - CosmologyParameters *cosm = (CosmologyParameters *)Params; + const CosmologyParameters *cosm = (CosmologyParameters *)Params; double a2 = a * a; double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a)); return Ha; } + inline double H_of_a( double a ) const + { + return 100.0 * this->H_of_a(a,reinterpret_cast(&this->cosmo_param_)); + } + inline static double Hprime_of_a(double a, void *Params) { CosmologyParameters *cosm = (CosmologyParameters *)Params; @@ -168,10 +173,7 @@ public: */ inline real_t CalcGrowthRate( real_t a ) { - #warning CalcGrowthRate is only correct if dark energy is a cosmological constant, need to upgrade calculator... - real_t y = cosmo_param_.Omega_m*(1.0/a-1.0) + cosmo_param_.Omega_DE*(a*a-1.0) + 1.0; - real_t fact = integrate( &fIntegrand, 1e-6, a, (void*)&cosmo_param_ ); - return (cosmo_param_.Omega_DE*a*a-0.5*cosmo_param_.Omega_m/a)/y - 1.0 + a*fIntegrand(a,(void*)&cosmo_param_)/fact; + return CalcVFact(a) / H_of_a(a) / a; } //! Computes the linear theory growth factor D+ From bcb301f3381ca4ee9ab5b6443c295f08cdfc1533 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 16 Nov 2019 04:42:38 +0100 Subject: [PATCH 031/130] can write non-distributed grids with MPI, but needs cleanup --- src/grid_fft.cc | 127 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 126 insertions(+), 1 deletion(-) diff --git a/src/grid_fft.cc b/src/grid_fft.cc index b7e3f52..67201ad 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -246,16 +246,141 @@ void create_hdf5(std::string Filename) H5Fclose(HDF_FileID); } + template void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) const { + // FIXME: cleanup duplicate code in this function! + if( !bdistributed && CONFIG::MPI_task_rank==0 ){ + + hid_t file_id, dset_id; /* file and dataset identifiers */ + hid_t filespace, memspace; /* file and memory dataspace identifiers */ + hsize_t offset[3], count[3]; + hid_t dtype_id = H5T_NATIVE_FLOAT; + hid_t plist_id = H5P_DEFAULT; + + if (!file_exists(fname)) + create_hdf5(fname); + + file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id); + + for (int i = 0; i < 3; ++i) + count[i] = size(i); + + if (typeid(data_t) == typeid(float)) + dtype_id = H5T_NATIVE_FLOAT; + else if (typeid(data_t) == typeid(double)) + dtype_id = H5T_NATIVE_DOUBLE; + else if (typeid(data_t) == typeid(std::complex)) + { + dtype_id = H5T_NATIVE_FLOAT; + } + else if (typeid(data_t) == typeid(std::complex)) + { + dtype_id = H5T_NATIVE_DOUBLE; + } + + filespace = H5Screate_simple(3, count, NULL); + dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Sclose(filespace); + + hsize_t slice_sz = size(1) * size(2); + + real_t *buf = new real_t[slice_sz]; + + count[0] = 1; + count[1] = size(1); + count[2] = size(2); + + offset[1] = 0; + offset[2] = 0; + + memspace = H5Screate_simple(3, count, NULL); + filespace = H5Dget_space(dset_id); + + for (size_t i = 0; i < size(0); ++i) + { + offset[0] = i; + for (size_t j = 0; j < size(1); ++j) + { + for (size_t k = 0; k < size(2); ++k) + { + if( this->space_ == rspace_id ) + buf[j * size(2) + k] = std::real(relem(i, j, k)); + else + buf[j * size(2) + k] = std::real(kelem(i, j, k)); + } + } + + H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf); + } + + H5Sclose(filespace); + H5Sclose(memspace); + + // H5Sclose(filespace); + H5Dclose(dset_id); + + if (typeid(data_t) == typeid(std::complex) || + typeid(data_t) == typeid(std::complex) || + this->space_ == kspace_id ) + { + datasetname += std::string(".im"); + + for (int i = 0; i < 3; ++i) + count[i] = size(i); + + filespace = H5Screate_simple(3, count, NULL); + dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Sclose(filespace); + + count[0] = 1; + + for (size_t i = 0; i < size(0); ++i) + { + offset[0] = i; + + for (size_t j = 0; j < size(1); ++j) + for (size_t k = 0; k < size(2); ++k) + { + if( this->space_ == rspace_id ) + buf[j * size(2) + k] = std::imag(relem(i, j, k)); + else + buf[j * size(2) + k] = std::imag(kelem(i, j, k)); + } + + memspace = H5Screate_simple(3, count, NULL); + filespace = H5Dget_space(dset_id); + + H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, + NULL); + + H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf); + + H5Sclose(memspace); + H5Sclose(filespace); + } + + H5Dclose(dset_id); + + delete[] buf; + } + + H5Fclose(file_id); + return; + } + + if( !bdistributed && CONFIG::MPI_task_rank!=0 ) return; + hid_t file_id, dset_id; /* file and dataset identifiers */ hid_t filespace, memspace; /* file and memory dataspace identifiers */ hsize_t offset[3], count[3]; hid_t dtype_id = H5T_NATIVE_FLOAT; hid_t plist_id; - #warning "check if this works for non-distributed fft arrays with MPI" #if defined(USE_MPI) From 68d3aa4a4ca5a1af33ec4f409bdfcfa7b0985fca Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Wed, 27 Nov 2019 16:23:43 +0100 Subject: [PATCH 032/130] working commit PLT other lattices --- include/particle_plt.hh | 215 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 194 insertions(+), 21 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 774d3ba..96f07ff 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -12,7 +12,7 @@ #include #include -#define PRODUCTION +// #define PRODUCTION namespace particle{ //! implement Marcos et al. PLT calculation @@ -29,6 +29,176 @@ private: { constexpr real_t pi = M_PI, twopi = 2.0*M_PI; + const ptrdiff_t nlattice = 16; + const real_t dx = 1.0/real_t(nlattice); + + const real_t eta = 4.0/nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells + const real_t alpha = 1.0/std::sqrt(2)/eta; + const real_t alpha2 = alpha*alpha; + const real_t alpha3 = alpha2*alpha; + const real_t sqrtpi = std::sqrt(M_PI); + const real_t pi32 = std::pow(M_PI,1.5); + + //! just a Kronecker \delta_ij + auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; + + //! short range component of Ewald sum, eq. (A2) of Marcos (2008) + auto greensftide_sr = [&]( int mu, int nu, const vec3& vR, const vec3& vP ) -> real_t { + auto d = vR-vP; + auto r = d.norm(); + if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later! + real_t val{0.0}; + val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r); + val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * + (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r); + return val; + }; + + auto greensftide_sr2 = [&]( int mu, int nu, const vec3& d ) -> real_t { + auto r = d.norm(); + if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later! + real_t val{0.0}; + val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r); + val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * + (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r); + return val; + }; + + const std::vector> bcc_bravais{ + {1.0,0.0,0.0},{0.0,1.0,0.0},{0.5,0.5,0.5} + }; + + const std::vector> bcc_reciprocal{ + {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi} + }; + + const std::vector> bcc_normals{ + {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, + {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, + {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} + }; + + + std::vector> x; + for( ptrdiff_t i=-2*nlattice; i<=2*nlattice; i++ ){ + for( ptrdiff_t j=-2*nlattice; j<=2*nlattice; j++ ){ + for( ptrdiff_t k=-2*nlattice; k<=2*nlattice; k++ ){ + real_t dxp = dx*(real_t(i)*bcc_bravais[0][0]+real_t(j)*bcc_bravais[1][0]+real_t(k)*bcc_bravais[2][0]); + real_t dyp = dx*(real_t(i)*bcc_bravais[0][1]+real_t(j)*bcc_bravais[1][1]+real_t(k)*bcc_bravais[2][1]); + real_t dzp = dx*(real_t(i)*bcc_bravais[0][2]+real_t(j)*bcc_bravais[1][2]+real_t(k)*bcc_bravais[2][2]); + + if( dxp>-1e-10&&dxp<1.0&&dyp>-1e-10&&dyp<1.0&&dzp>-1e-10&&dzp<1.0) + { + x.push_back({dxp,dyp,dzp}); + } + } + } + } + std::vector> a(x.size(),{0.0}); + + + constexpr ptrdiff_t lnumber = 4, knumber = 4; + for( size_t i=0,j=0; j< x.size(); ++j ){ + // r-part + if( i==j ) + { + a[i](0,0) = 1.0/3.0; + a[i](1,1) = 1.0/3.0; + a[i](2,2) = 1.0/3.0; + }else{ + + for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){ + for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){ + for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){ + vec3 ai = {real_t(ix)*nlattice,real_t(iy)*nlattice,real_t(iz)*nlattice}; + auto dr = x[i]-x[j]; + dr[0] -= ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]; + dr[1] -= ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]; + dr[2] -= ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]; + real_t d = dr.norm(); + // std::cerr << dr.x << " " << dr.y << " " << dr.z << " " << greensftide_sr2(0,0,dr) << std::endl; + for( int mu=0; mu<3; ++mu ){ + for( int nu=mu; nu<3; ++nu ){ + a[i](mu,nu) += greensftide_sr2(mu,nu,dr); + } + } + } + } + } + } + + // k-part + if( i!=j ){ + auto dr = x[i]-x[j]; + real_t d = dr.norm(); + for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){ + for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){ + for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){ + vec3 ak, bk = {real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice}; + if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){ + ak.x = bk.x*bcc_reciprocal[0][0]+bk.y*bcc_reciprocal[1][0]+bk.z*bcc_reciprocal[2][0]; + ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1]; + ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2]; + real_t amodk2 = ak.norm_squared(); + real_t term = std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(dr)) / amodk2;// / std::pow(nlattice,3); + for( int mu=0; mu<3; ++mu ){ + for( int nu=mu; nu<3; ++nu ){ + a[i](mu,nu) += ak[mu]*ak[nu]*term; + } + } + } + } + } + } + + } + + + + } + + for( auto& m : a ){ + std::cout << m(0,0) << std::endl; + } + + + //! sums mirrored copies of short-range component of Ewald sum + auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ + real_t sr = 0.0; + constexpr int N = 3; // number of repeated copies ±N per dimension + int count = 0; + for( int i=-N; i<=N; ++i ){ + for( int j=-N; j<=N; ++j ){ + for( int k=-N; k<=N; ++k ){ + if( mu!=nu ){ + + } + + if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){ + //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); + sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); + sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} ); + count += 2; + + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; + // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; + } + } + } + } + return sr / count; + }; + } + void init_D__old() + { + constexpr real_t pi = M_PI, twopi = 2.0*M_PI; + const std::vector> bcc_normals{ {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, @@ -53,7 +223,7 @@ private: auto greensftide_sr = [&]( int mu, int nu, const vec3& vR, const vec3& vP ) -> real_t { auto d = vR-vP; auto r = d.norm(); - // if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later! + if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later! real_t val{0.0}; val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r); val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * @@ -65,13 +235,15 @@ private: auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ real_t sr = 0.0; constexpr int N = 3; // number of repeated copies ±N per dimension + int count = 0; for( int i=-N; i<=N; ++i ){ for( int j=-N; j<=N; ++j ){ for( int k=-N; k<=N; ++k ){ if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){ + //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); - //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ) * 0.5; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} ) * 0.5; + sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} ); + count += 2; // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; @@ -85,7 +257,7 @@ private: } } } - return sr; + return sr / count; }; //! fill D_ij array with short range evaluated function @@ -153,22 +325,23 @@ private: phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0 - // const int nn = 3; - // size_t nsum = 0; - // ccomplex_t ff = 0.0; - // for( int is=-nn;is<=nn;is++){ - // for( int js=-nn;js<=nn;js++){ - // for( int ks=-nn;ks<=nn;ks++){ - // if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){ - // ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2])))); - // ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2])))); - // ++nsum; - // } - // } - // } - // } - // ff /= nsum; - ccomplex_t ff = 1.0; //(0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])))); + const int nn = 3; + size_t nsum = 0; + ccomplex_t ff = 0.0; + for( int is=-nn;is<=nn;is++){ + for( int js=-nn;js<=nn;js++){ + for( int ks=-nn;ks<=nn;ks++){ + if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){ + ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2])))); + ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2])))); + ++nsum; + } + } + } + } + ff /= nsum; + // ccomplex_t ff = 1.0; + // ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])))); // assemble short-range + long_range of Ewald sum and add DC component to trace D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0; D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac); From 093363791eaf85a2db0daf5f9cb0dc410af8d26f Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Wed, 27 Nov 2019 22:19:39 +0100 Subject: [PATCH 033/130] working commit, decaying mode integration for single fluid --- example.conf | 7 ++-- src/plugins/transfer_CLASS.cc | 70 +++++++++++++++++++++++++++++++---- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/example.conf b/example.conf index 58fc969..2ebaeac 100644 --- a/example.conf +++ b/example.conf @@ -4,7 +4,7 @@ GridRes = 128 # length of the box in Mpc/h BoxLength = 250 # starting redshift -zstart = 49.0 +zstart = 100.0 # order of the LPT to be used (1,2 or 3) LPTorder = 3 # also do baryon ICs? @@ -15,8 +15,9 @@ DoFixing = no ParticleLoad = sc [cosmology] -#transfer = CLASS -transfer = eisenstein +transfer = CLASS +ztarget = 100.0 +#transfer = eisenstein Omega_m = 0.302 Omega_b = 0.045 Omega_L = 0.698 diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 85b65b8..3469b7e 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -25,7 +25,13 @@ private: std::vector tab_lnk_, tab_dtot_, tab_dc_, tab_db_, tab_ttot_, tab_tc_, tab_tb_; gsl_interp_accel *gsl_ia_dtot_, *gsl_ia_dc_, *gsl_ia_db_, *gsl_ia_ttot_, *gsl_ia_tc_, *gsl_ia_tb_; gsl_spline *gsl_sp_dtot_, *gsl_sp_dc_, *gsl_sp_db_, *gsl_sp_ttot_, *gsl_sp_tc_, *gsl_sp_tb_; - double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_; + + // single fluid growing/decaying mode decomposition + gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_; + gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_; + std::vector tab_Cplus_, tab_Cminus_; + + double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_; void ClassEngine_get_data( void ){ std::vector d_ncdm, t_ncdm, phi, psi; @@ -37,25 +43,46 @@ private: pars.add("extra metric transfer functions", "yes"); pars.add("z_pk",ztarget_); pars.add("P_k_max_h/Mpc", kmax_); + pars.add("h",h_); pars.add("Omega_b",Omega_b_); // pars.add("Omega_k",0.0); // pars.add("Omega_ur",0.0); pars.add("N_ur",N_ur_); pars.add("Omega_cdm",Omega_m_-Omega_b_); - pars.add("Omega_Lambda",1.0-Omega_m_); - // pars.add("Omega_fld",0.0); - // pars.add("Omega_scf",0.0); + pars.add("Omega_k",0.0); + // pars.add("Omega_Lambda",1.0-Omega_m_); + pars.add("Omega_fld",0.0); + pars.add("Omega_scf",0.0); + pars.add("A_s",2.42e-9); - pars.add("n_s",.96); // tnis doesn't matter for TF + pars.add("n_s",.961); // this doesn't matter for TF pars.add("output","dTk,vTk"); pars.add("YHe",0.248); + pars.add("lensing","no"); + pars.add("alpha_s",0.0); + pars.add("P_k_ini type","analytic_Pk"); + pars.add("gauge","synchronous"); + + pars.add("k_per_decade_for_pk",100); + pars.add("k_per_decade_for_bao",100); - pars.add("k_per_decade_for_pk",50); - pars.add("k_per_decade_for_bao",50); pars.add("compute damping scale","yes"); pars.add("z_reio",-1.0); // make sure reionisation is not included + pars.add("tol_perturb_integration",1.e-8); + pars.add("tol_background_integration",1e-9); + + // high precision options from cl_permille.pre: + // precision file to be passed as input in order to achieve at least percent precision on scalar Cls + pars.add("hyper_flat_approximation_nu", 7000. ); + pars.add("transfer_neglect_delta_k_S_t0", 0.17 ); + pars.add("transfer_neglect_delta_k_S_t1", 0.05 ); + pars.add("transfer_neglect_delta_k_S_t2", 0.17 ); + pars.add("transfer_neglect_delta_k_S_e", 0.13 ); + pars.add("delta_l_max", 1000 ); + + std::unique_ptr CE = std::make_unique(pars, false); CE->getTk(ztarget_, tab_lnk_, tab_dc_, tab_db_, d_ncdm, tab_dtot_, @@ -74,7 +101,9 @@ public: Omega_b_ = pcf_->GetValue("cosmology","Omega_b"); N_ur_ = pcf_->GetValueSafe("cosmology","N_ur", 3.046); ztarget_ = pcf_->GetValueSafe("cosmology","ztarget",0.0); + atarget_ = 1.0/(1.0+ztarget_); zstart_ = pcf_->GetValue("setup","zstart"); + astart_ = 1.0/(1.0+zstart_); double lbox = pcf_->GetValue("setup","BoxLength"); int nres = pcf_->GetValue("setup","GridRes"); kmax_ = 2.0*M_PI/lbox * nres/2 * sqrt(3) * 2.0; // 120% of spatial diagonal @@ -102,6 +131,33 @@ public: gsl_spline_init(gsl_sp_tc_, &tab_lnk_[0], &tab_tc_[0], tab_lnk_.size()); gsl_spline_init(gsl_sp_tb_, &tab_lnk_[0], &tab_tb_[0], tab_lnk_.size()); + //-------------------------------------------------------------------------- + // single fluid growing/decaying mode decomposition + //-------------------------------------------------------------------------- + gsl_ia_Cplus_ = gsl_interp_accel_alloc(); + gsl_ia_Cminus_ = gsl_interp_accel_alloc(); + + gsl_sp_Cplus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + gsl_sp_Cminus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + + tab_Cplus_.assign(tab_lnk_.size(),0); + tab_Cminus_.assign(tab_lnk_.size(),0); + + std::ofstream ofs("grow_decay.txt"); + + for( size_t i=0; i Date: Wed, 27 Nov 2019 22:55:30 +0100 Subject: [PATCH 034/130] updated class submodule --- external/class | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/class b/external/class index b34d7f6..6f3abba 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b +Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507 From 2cce64977af3f2514adc986825e24edc19575da3 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 29 Nov 2019 15:41:22 +0100 Subject: [PATCH 035/130] bcc plt working commit --- include/particle_plt.hh | 71 +++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 96f07ff..0942cef 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -32,7 +32,7 @@ private: const ptrdiff_t nlattice = 16; const real_t dx = 1.0/real_t(nlattice); - const real_t eta = 4.0/nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells + const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells const real_t alpha = 1.0/std::sqrt(2)/eta; const real_t alpha2 = alpha*alpha; const real_t alpha3 = alpha2*alpha; @@ -80,46 +80,58 @@ private: std::vector> x; - for( ptrdiff_t i=-2*nlattice; i<=2*nlattice; i++ ){ - for( ptrdiff_t j=-2*nlattice; j<=2*nlattice; j++ ){ - for( ptrdiff_t k=-2*nlattice; k<=2*nlattice; k++ ){ + // for( ptrdiff_t i=-2*nlattice; i<=2*nlattice; i++ ){ + // for( ptrdiff_t j=-2*nlattice; j<=2*nlattice; j++ ){ + // for( ptrdiff_t k=-2*nlattice; k<=2*nlattice; k++ ){ + // real_t dxp = dx*(real_t(i)*bcc_bravais[0][0]+real_t(j)*bcc_bravais[1][0]+real_t(k)*bcc_bravais[2][0]); + // real_t dyp = dx*(real_t(i)*bcc_bravais[0][1]+real_t(j)*bcc_bravais[1][1]+real_t(k)*bcc_bravais[2][1]); + // real_t dzp = dx*(real_t(i)*bcc_bravais[0][2]+real_t(j)*bcc_bravais[1][2]+real_t(k)*bcc_bravais[2][2]); + + // if( dxp>-1e-10&&dxp<1.0&&dyp>-1e-10&&dyp<1.0&&dzp>-1e-10&&dzp<1.0) + // { + // x.push_back({dxp,dyp,dzp}); + // } + // } + // } + // } + for( size_t i=0; i-1e-10&&dxp<1.0&&dyp>-1e-10&&dyp<1.0&&dzp>-1e-10&&dzp<1.0) - { - x.push_back({dxp,dyp,dzp}); - } + dxp = std::fmod( 2.0+dxp, 1.0 ); + dyp = std::fmod( 2.0+dyp, 1.0 ); + dzp = std::fmod( 2.0+dzp, 1.0 ); + x.push_back( {dxp,dyp,dzp} ); } } } std::vector> a(x.size(),{0.0}); + std::ofstream ofs("debug.txt"); constexpr ptrdiff_t lnumber = 4, knumber = 4; for( size_t i=0,j=0; j< x.size(); ++j ){ // r-part if( i==j ) { - a[i](0,0) = 1.0/3.0; - a[i](1,1) = 1.0/3.0; - a[i](2,2) = 1.0/3.0; + a[j](0,0) = 1.0/3.0; + a[j](1,1) = 1.0/3.0; + a[j](2,2) = 1.0/3.0; }else{ for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){ for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){ for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){ - vec3 ai = {real_t(ix)*nlattice,real_t(iy)*nlattice,real_t(iz)*nlattice}; + vec3 ai = {real_t(ix),real_t(iy),real_t(iz)}; auto dr = x[i]-x[j]; - dr[0] -= ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]; - dr[1] -= ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]; - dr[2] -= ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]; - real_t d = dr.norm(); - // std::cerr << dr.x << " " << dr.y << " " << dr.z << " " << greensftide_sr2(0,0,dr) << std::endl; + dr[0] -= (ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]); + dr[1] -= (ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]); + dr[2] -= (ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]); for( int mu=0; mu<3; ++mu ){ for( int nu=mu; nu<3; ++nu ){ - a[i](mu,nu) += greensftide_sr2(mu,nu,dr); + a[j](mu,nu) += greensftide_sr2(mu,nu,dr); } } } @@ -140,10 +152,10 @@ private: ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1]; ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2]; real_t amodk2 = ak.norm_squared(); - real_t term = std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(dr)) / amodk2;// / std::pow(nlattice,3); + real_t term = std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(dr)) / amodk2 / std::pow(nlattice,3); for( int mu=0; mu<3; ++mu ){ for( int nu=mu; nu<3; ++nu ){ - a[i](mu,nu) += ak[mu]*ak[nu]*term; + a[j](mu,nu) += ak[mu]*ak[nu]*term; } } } @@ -153,17 +165,22 @@ private: } - + ofs << x[j].x << " " << x[j].y << " " << x[j].z << " " + << a[j](0,0) << " " << a[j](0,1) << " " << a[j](0,2) << " " + << a[j](1,1) << " " << a[j](1,2) << " " << a[j](2,2) << std::endl; + } + + std::cout << "num grid points : " << x.size() << std::endl; - for( auto& m : a ){ - std::cout << m(0,0) << std::endl; - } + // for( auto& m : a ){ + // std::cout << m(0,1) << " "; + // } //! sums mirrored copies of short-range component of Ewald sum - auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ + /*auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ real_t sr = 0.0; constexpr int N = 3; // number of repeated copies ±N per dimension int count = 0; @@ -193,7 +210,7 @@ private: } } return sr / count; - }; + };*/ } void init_D__old() { From dcc59368edcc7e00acf8e5009f6f525a01c2631f Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 1 Dec 2019 11:28:17 +0100 Subject: [PATCH 036/130] working commit: dynamical matrix for bcc seems right, interpolation doesn't work yet, also too slow --- include/grid_fft.hh | 3 + include/particle_plt.hh | 744 ++++++++++------------------------------ 2 files changed, 190 insertions(+), 557 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 88a938f..66c1a6f 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -77,6 +77,9 @@ public: void Setup(); + //! return the number of data_t elements that we store in the container + size_t memsize( void ) const { return ntot_; } + //! return the (local) size of dimension i size_t size(size_t i) const { return sizes_[i]; } diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 0942cef..7e7979b 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -24,12 +24,14 @@ private: const real_t mapratio_; Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; Grid_FFT grad_x_, grad_y_, grad_z_; + std::vector> vectk_; + std::vector> ico_, vecitk_; void init_D() { constexpr real_t pi = M_PI, twopi = 2.0*M_PI; - const ptrdiff_t nlattice = 16; + const ptrdiff_t nlattice = ngrid_;//16; const real_t dx = 1.0/real_t(nlattice); const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells @@ -58,12 +60,14 @@ private: auto r = d.norm(); if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later! real_t val{0.0}; - val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r); + val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha2*r*r); val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * - (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r); + (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r*r)*r); return val; }; + const int charge_multiplicity = 2; + const std::vector> bcc_bravais{ {1.0,0.0,0.0},{0.0,1.0,0.0},{0.5,0.5,0.5} }; @@ -78,140 +82,213 @@ private: {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} }; + const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_multiplicity; + const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5); std::vector> x; - // for( ptrdiff_t i=-2*nlattice; i<=2*nlattice; i++ ){ - // for( ptrdiff_t j=-2*nlattice; j<=2*nlattice; j++ ){ - // for( ptrdiff_t k=-2*nlattice; k<=2*nlattice; k++ ){ - // real_t dxp = dx*(real_t(i)*bcc_bravais[0][0]+real_t(j)*bcc_bravais[1][0]+real_t(k)*bcc_bravais[2][0]); - // real_t dyp = dx*(real_t(i)*bcc_bravais[0][1]+real_t(j)*bcc_bravais[1][1]+real_t(k)*bcc_bravais[2][1]); - // real_t dzp = dx*(real_t(i)*bcc_bravais[0][2]+real_t(j)*bcc_bravais[1][2]+real_t(k)*bcc_bravais[2][2]); + std::vector> a(x.size(),{0.0}); + constexpr ptrdiff_t lnumber = 4, knumber = 4; + const int numb = 1; - // if( dxp>-1e-10&&dxp<1.0&&dyp>-1e-10&&dyp<1.0&&dzp>-1e-10&&dzp<1.0) - // { - // x.push_back({dxp,dyp,dzp}); - // } - // } - // } - // } + vectk_.assign(D_xx_.memsize(),vec3()); + ico_.assign(D_xx_.memsize(),vec3()); + vecitk_.assign(D_xx_.memsize(),vec3()); + + D_xx_.zero(); + D_xy_.zero(); + D_xz_.zero(); + D_yy_.zero(); + D_yz_.zero(); + D_zz_.zero(); + + #pragma omp parallel for for( size_t i=0; i> a(x.size(),{0.0}); + const vec3 cdr( {std::fmod( 2.0+dxp, 1.0 ),std::fmod( 2.0+dyp, 1.0 ),std::fmod( 2.0+dzp, 1.0 )} ); + vec3 ak; - std::ofstream ofs("debug.txt"); + for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){ + for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){ + for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){ + vec3 ai = {real_t(ix),real_t(iy),real_t(iz)}; + vec3 dr( cdr ); + dr[0] -= (ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]); + dr[1] -= (ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]); + dr[2] -= (ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]); - constexpr ptrdiff_t lnumber = 4, knumber = 4; - for( size_t i=0,j=0; j< x.size(); ++j ){ - // r-part - if( i==j ) - { - a[j](0,0) = 1.0/3.0; - a[j](1,1) = 1.0/3.0; - a[j](2,2) = 1.0/3.0; - }else{ - - for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){ - for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){ - for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){ - vec3 ai = {real_t(ix),real_t(iy),real_t(iz)}; - auto dr = x[i]-x[j]; - dr[0] -= (ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]); - dr[1] -= (ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]); - dr[2] -= (ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]); - for( int mu=0; mu<3; ++mu ){ - for( int nu=mu; nu<3; ++nu ){ - a[j](mu,nu) += greensftide_sr2(mu,nu,dr); + D_xx_.relem(i,j,k) += greensftide_sr2(0,0,dr) * charge; + D_xy_.relem(i,j,k) += greensftide_sr2(0,1,dr) * charge; + D_xz_.relem(i,j,k) += greensftide_sr2(0,2,dr) * charge; + D_yy_.relem(i,j,k) += greensftide_sr2(1,1,dr) * charge; + D_yz_.relem(i,j,k) += greensftide_sr2(1,2,dr) * charge; + D_zz_.relem(i,j,k) += greensftide_sr2(2,2,dr) * charge; + + vec3 bk = {real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice}; + if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){ + ak.x = bk.x*bcc_reciprocal[0][0]+bk.y*bcc_reciprocal[1][0]+bk.z*bcc_reciprocal[2][0]; + ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1]; + ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2]; + real_t amodk2 = ak.norm_squared(); + real_t term = charge*std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(cdr)) / amodk2 / std::pow(nlattice,3); + D_xx_.relem(i,j,k) += ak.x*ak.x*term; + D_xy_.relem(i,j,k) += ak.x*ak.y*term; + D_xz_.relem(i,j,k) += ak.x*ak.z*term; + D_yy_.relem(i,j,k) += ak.y*ak.y*term; + D_yz_.relem(i,j,k) += ak.y*ak.z*term; + D_zz_.relem(i,j,k) += ak.z*ak.z*term; } } } - } + } } } + } - // k-part - if( i!=j ){ - auto dr = x[i]-x[j]; - real_t d = dr.norm(); - for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){ - for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){ - for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){ - vec3 ak, bk = {real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice}; - if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){ - ak.x = bk.x*bcc_reciprocal[0][0]+bk.y*bcc_reciprocal[1][0]+bk.z*bcc_reciprocal[2][0]; - ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1]; - ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2]; - real_t amodk2 = ak.norm_squared(); - real_t term = std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(dr)) / amodk2 / std::pow(nlattice,3); - for( int mu=0; mu<3; ++mu ){ - for( int nu=mu; nu<3; ++nu ){ - a[j](mu,nu) += ak[mu]*ak[nu]*term; + // fix r=0 with background density (added later in Fourier space) + D_xx_.relem(0,0,0) = 1.0/3.0; + D_xy_.relem(0,0,0) = 0.0; + D_xz_.relem(0,0,0) = 0.0; + D_yy_.relem(0,0,0) = 1.0/3.0; + D_yz_.relem(0,0,0) = 0.0; + D_zz_.relem(0,0,0) = 1.0/3.0; + + D_xx_.FourierTransformForward(); + D_xy_.FourierTransformForward(); + D_xz_.FourierTransformForward(); + D_yy_.FourierTransformForward(); + D_yz_.FourierTransformForward(); + D_zz_.FourierTransformForward(); + + if (CONFIG::MPI_task_rank == 0) + unlink("debug.hdf5"); + D_xx_.Write_to_HDF5("debug.hdf5","Dxx"); + D_xy_.Write_to_HDF5("debug.hdf5","Dxy"); + D_xz_.Write_to_HDF5("debug.hdf5","Dxz"); + D_yy_.Write_to_HDF5("debug.hdf5","Dyy"); + D_yz_.Write_to_HDF5("debug.hdf5","Dyz"); + D_zz_.Write_to_HDF5("debug.hdf5","Dzz"); + + std::ofstream ofs2("test_brillouin.txt"); + + #pragma omp parallel + { + // thread private matrix representation + mat3s D; + vec3 eval, evec1, evec2, evec3; + + #pragma omp for + for( size_t i=0; i kv = D_xx_.get_k(i,j,k); + const real_t kmod = kv.norm()/mapratio_/boxlen_; + + // put matrix elements into actual matrix + D = { std::real(D_xx_.kelem(i,j,k))/fft_norm12, + std::real(D_xy_.kelem(i,j,k))/fft_norm12, + std::real(D_xz_.kelem(i,j,k))/fft_norm12, + std::real(D_yy_.kelem(i,j,k))/fft_norm12, + std::real(D_yz_.kelem(i,j,k))/fft_norm12, + std::real(D_zz_.kelem(i,j,k))/fft_norm12 }; + + // compute eigenstructure of matrix + D.eigen(eval, evec1, evec2, evec3); + + D_xx_.kelem(i,j,k) = eval[2]; + D_yy_.kelem(i,j,k) = eval[1]; + D_zz_.kelem(i,j,k) = eval[0]; + + D_xy_.kelem(i,j,k) = evec3[0]; + D_xz_.kelem(i,j,k) = evec3[1]; + D_yz_.kelem(i,j,k) = evec3[2]; + + + vec3 a({0.,0.,0.}); + + auto idx = D_xx_.get_idx(i,j,k); + + vec3 ar = D_xx_.get_k(i,j,k) / (twopi*ngrid_); + // vec3 kv = D_xx_.get_k(i,j,k); + + for( int l=0; l<3; l++ ){ + a[l] = 0.0; + for( int m=0; m<3; m++){ + // project k on reciprocal basis + a[l] += ar[m]*bcc_reciprocal[m][l]; + } + } + + // translate the k-vectors into the "candidate" FBZ + vec3 anum; + for( int l1=-numb; l1<=numb; ++l1 ){ + anum[0] = real_t(l1); + for( int l2=-numb; l2<=numb; ++l2 ){ + anum[1] = real_t(l2); + for( int l3=-numb; l3<=numb; ++l3 ){ + anum[2] = real_t(l3); + + vectk_[idx] = a; + + for( int l=0; l<3; l++ ){ + for( int m=0; m<3; m++){ + // project k on reciprocal basis + vectk_[idx][l] += anum[m]*bcc_reciprocal[m][l]; + } + } + // check if in first Brillouin zone + bool btest=true; + for( size_t l=0; l amod*1.0001 ){ btest=false; break; } + if( scalar > 1.01 * amod2 ){ btest=false; break; } + } + if( btest ){ + + vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi); + vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi); + vecitk_[idx][2] = std::round(vectk_[idx][2]*(ngrid_)/twopi); + + ico_[idx][0] = std::round((ar[0]+l1) * ngrid_); + ico_[idx][1] = std::round((ar[1]+l2) * ngrid_); + ico_[idx][2] = std::round((ar[2]+l3) * ngrid_); + + ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; + // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk_[idx].x << ", " << vecitk_[idx].y << ", " << vecitk_[idx].z << ", " << ico_[idx][0] << ", " << ico_[idx][1] << ", " << ico_[idx][2] << std::endl; + // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk_[idx].x << ", " << -vecitk_[idx].y << ", " << -vecitk_[idx].z << ", " << ico_[idx][0] << ", " << ico_[idx][1] << ", " << ico_[idx][2] << std::endl; + + goto endloop; } } } - } + } endloop: ; } } - } - - ofs << x[j].x << " " << x[j].y << " " << x[j].z << " " - << a[j](0,0) << " " << a[j](0,1) << " " << a[j](0,2) << " " - << a[j](1,1) << " " << a[j](1,2) << " " << a[j](2,2) << std::endl; - - } - std::cout << "num grid points : " << x.size() << std::endl; - - // for( auto& m : a ){ - // std::cout << m(0,1) << " "; - // } - //! sums mirrored copies of short-range component of Ewald sum - /*auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ - real_t sr = 0.0; - constexpr int N = 3; // number of repeated copies ±N per dimension - int count = 0; - for( int i=-N; i<=N; ++i ){ - for( int j=-N; j<=N; ++j ){ - for( int k=-N; k<=N; ++k ){ - if( mu!=nu ){ - - } - - if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){ - //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); - sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); - sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} ); - count += 2; - - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; - } - } - } - } - return sr / count; - };*/ + D_xx_.Write_to_HDF5("debug.hdf5","mu1"); + D_xy_.Write_to_HDF5("debug.hdf5","mu2"); + D_xz_.Write_to_HDF5("debug.hdf5","mu3"); + D_yy_.Write_to_HDF5("debug.hdf5","e1x"); + D_yz_.Write_to_HDF5("debug.hdf5","e1y"); + D_zz_.Write_to_HDF5("debug.hdf5","e1z"); } + void init_D__old() { constexpr real_t pi = M_PI, twopi = 2.0*M_PI; @@ -444,108 +521,9 @@ private: } - void compute_vectk( ) - { - constexpr real_t pi = M_PI, twopi = 2.0*M_PI; - - const std::vector> bcc_normals{ - {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, - {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, - {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} - }; - - const std::vector> bcc_reciprocal{ - {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi} - }; - - std::vector> vectk; - std::vector> ico, vecitk; - vectk.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3()); - ico.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3()); - vecitk.assign(D_xx_.size(0)*D_xx_.size(1)*D_xx_.size(2),vec3()); - - std::ofstream ofs2("test_brillouin.txt"); - - const int numb = 1; - for( size_t i=0; i D; - vec3 eval, evec1, evec2, evec3; - vec3 a({0.,0.,0.}); - - for( size_t j=0; j ar = D_xx_.get_k(i,j,k) / (twopi*ngrid_); - vec3 kv = D_xx_.get_k(i,j,k); - - for( int l=0; l<3; l++ ){ - a[l] = 0.0; - for( int m=0; m<3; m++){ - // project k on reciprocal basis - a[l] += ar[m]*bcc_reciprocal[m][l]; - } - } - - // translate the k-vectors into the "candidate" FBZ - vec3 anum; - for( int l1=-numb; l1<=numb; ++l1 ){ - anum[0] = real_t(l1); - for( int l2=-numb; l2<=numb; ++l2 ){ - anum[1] = real_t(l2); - for( int l3=-numb; l3<=numb; ++l3 ){ - anum[2] = real_t(l3); - - vectk[idx] = a; - - for( int l=0; l<3; l++ ){ - for( int m=0; m<3; m++){ - // project k on reciprocal basis - vectk[idx][l] += anum[m]*bcc_reciprocal[m][l]; - } - } - // check if in first Brillouin zone - bool btest=true; - for( size_t l=0; l amod*1.0001 ){ btest=false; break; } - if( scalar > 1.01 * amod2 ){ btest=false; break; } - } - if( btest ){ - - vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid_)/twopi); - vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid_)/twopi); - vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid_)/twopi); - - ico[idx][0] = std::round((ar[0]+l1) * ngrid_); - ico[idx][1] = std::round((ar[1]+l2) * ngrid_); - ico[idx][2] = std::round((ar[2]+l3) * ngrid_); - - ofs2 << vectk[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; - // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; - // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk[idx].x << ", " << -vecitk[idx].y << ", " << -vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; - - goto endloop; - } - } - } - } - endloop: ; - } - } - } - } - public: // real_t boxlen, size_t ngridother - explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 ) + explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=16 ) : boxlen_( the_config.GetValue("setup", "BoxLength") ), ngmapto_( the_config.GetValue("setup", "GridRes") ), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), @@ -582,7 +560,6 @@ public: csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; init_D(); - compute_vectk(); csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; } @@ -603,351 +580,4 @@ public: }; -#if 0 -inline void test_plt( void ){ - - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << "Testing PLT implementation..." << std::endl; - - lattice_gradient lg( 64 ); - - return; - - constexpr real_t pi = M_PI, twopi = 2.0*M_PI; - - const std::vector> bcc_normals{ - {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, - {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, - {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} - }; - - const std::vector> bcc_reciprocal{ - {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi} - }; - - /*const std::vector> fcc_reciprocal{ - {-2.,0.,2.}, {2.,0.,0.}, {1.,1.,-1.} - };*/ - - real_t boxlen = 1.0; - - size_t ngrid = 64; - size_t npgrid = 1; - size_t dpg = ngrid/npgrid; - size_t nump = npgrid*npgrid*npgrid; - - real_t pweight = 1.0/real_t(nump); - real_t eta = 2.0 * boxlen/ngrid; - - const real_t alpha = 1.0/std::sqrt(2)/eta; - const real_t alpha2 = alpha*alpha; - const real_t alpha3 = alpha2*alpha; - const real_t sqrtpi = std::sqrt(M_PI); - const real_t pi3halfs = std::pow(M_PI,1.5); - - const real_t dV( std::pow( boxlen/ngrid, 3 ) ); - Grid_FFT rho({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - - auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; - - auto greensftide_sr = [&]( int mu, int nu, const vec3& vR, const vec3& vP ) -> real_t { - auto d = vR-vP; - auto r = d.norm(); - - if( r< 1e-14 ) return 0.0; - - real_t val = 0.0; - val -= d[mu]*d[nu]/(r*r) * alpha3/pi3halfs * std::exp(-alpha*alpha*r*r); - val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * - (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r); - return pweight * val; - }; - - // sc - rho.zero(); - rho.relem(0,0,0) = pweight/dV; - // rho.relem(0,0,0) = pweight/dV/2; - // rho.relem(ngrid/2,ngrid/2,ngrid/2) = pweight/dV/2; - - rho.FourierTransformForward(); - rho.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { - real_t kmod = k.norm(); - std::cerr << x << std::endl; - return -x * std::exp(-0.5*eta*eta*kmod*kmod) / (kmod*kmod); - }); - rho.zero_DC_mode(); - - auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ - real_t sr = 0.0; - int N = 3; - for( int i=-N; i<=N; ++i ){ - for( int j=-N; j<=N; ++j ){ - for( int k=-N; k<=N; ++k ){ - if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){ - sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); - - // sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} )/2; - - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; - } - } - } - } - return sr; - }; - - Grid_FFT D_xx({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_xy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_xz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_yy({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_yz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - Grid_FFT D_zz({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - - #pragma omp parallel for - for( size_t i=0; i p; - p.x = real_t(i)/ngrid; - for( size_t j=0; j D; - vec3 eval, evec1, evec2, evec3; - for( size_t j=0; j kv = D_xx.get_k(i,j,k); - - D_xx.kelem(i,j,k) = (D_xx.kelem(i,j,k) - kv[0]*kv[0] * rho.kelem(i,j,k))*nfac + 1.0/3.0; - D_xy.kelem(i,j,k) = (D_xy.kelem(i,j,k) - kv[0]*kv[1] * rho.kelem(i,j,k))*nfac; - D_xz.kelem(i,j,k) = (D_xz.kelem(i,j,k) - kv[0]*kv[2] * rho.kelem(i,j,k))*nfac; - D_yy.kelem(i,j,k) = (D_yy.kelem(i,j,k) - kv[1]*kv[1] * rho.kelem(i,j,k))*nfac + 1.0/3.0;; - D_yz.kelem(i,j,k) = (D_yz.kelem(i,j,k) - kv[1]*kv[2] * rho.kelem(i,j,k))*nfac; - D_zz.kelem(i,j,k) = (D_zz.kelem(i,j,k) - kv[2]*kv[2] * rho.kelem(i,j,k))*nfac + 1.0/3.0;; - - D = { std::real(D_xx.kelem(i,j,k)), std::real(D_xy.kelem(i,j,k)), std::real(D_xz.kelem(i,j,k)), - std::real(D_yy.kelem(i,j,k)), std::real(D_yz.kelem(i,j,k)), std::real(D_zz.kelem(i,j,k)) }; - - D.eigen(eval, evec1, evec2, evec3); - - D_xx.kelem(i,j,k) = eval[2]; - D_yy.kelem(i,j,k) = eval[1]; - D_zz.kelem(i,j,k) = eval[0]; - - D_xy.kelem(i,j,k) = evec3[0]; - D_xz.kelem(i,j,k) = evec3[1]; - D_yz.kelem(i,j,k) = evec3[2]; - } - } - } - -#if 1 - std::vector> vectk; - std::vector> ico, vecitk; - vectk.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3()); - ico.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3()); - vecitk.assign(D_xx.size(0)*D_xx.size(1)*D_xx.size(2),vec3()); - - std::ofstream ofs2("test_brillouin.txt"); - - const int numb = 1; - for( size_t i=0; i D; - vec3 eval, evec1, evec2, evec3; - vec3 a({0.,0.,0.}); - - for( size_t j=0; j ar = D_xx.get_k(i,j,k) / (twopi*ngrid); - vec3 kv = D_xx.get_k(i,j,k); - - for( int l=0; l<3; l++ ){ - a[l] = 0.0; - for( int m=0; m<3; m++){ - // project k on reciprocal basis - a[l] += ar[m]*bcc_reciprocal[m][l]; - } - } - - // translate the k-vectors into the "candidate" FBZ - vec3 anum; - for( int l1=-numb; l1<=numb; ++l1 ){ - anum[0] = real_t(l1); - for( int l2=-numb; l2<=numb; ++l2 ){ - anum[1] = real_t(l2); - for( int l3=-numb; l3<=numb; ++l3 ){ - anum[2] = real_t(l3); - - vectk[idx] = a; - - for( int l=0; l<3; l++ ){ - for( int m=0; m<3; m++){ - // project k on reciprocal basis - vectk[idx][l] += anum[m]*bcc_reciprocal[m][l]; - } - } - // check if in first Brillouin zone - bool btest=true; - for( size_t l=0; l amod*1.0001 ){ btest=false; break; } - if( scalar > 1.01 * amod2 ){ btest=false; break; } - } - if( btest ){ - // int is = (i>ngrid/2)? i-ngrid : i; - // int js = (j>ngrid/2)? j-ngrid : j; - // int ks = (k>ngrid/2)? k-ngrid : k; - - vecitk[idx][0] = std::round(vectk[idx][0]*(ngrid)/twopi); - vecitk[idx][1] = std::round(vectk[idx][1]*(ngrid)/twopi); - vecitk[idx][2] = std::round(vectk[idx][2]*(ngrid)/twopi); - - ico[idx][0] = std::round((ar[0]+l1) * ngrid); - ico[idx][1] = std::round((ar[1]+l2) * ngrid); - ico[idx][2] = std::round((ar[2]+l3) * ngrid); - - assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] - vectk[idx][0] ) < 1e-12 ); - assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] - vectk[idx][1] ) < 1e-12 ); - assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][2]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][2]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][2] - vectk[idx][2] ) < 1e-12 ); - - - - // if( ico[idx][2] < 0 ){ - // ico[idx][0] = -ico[idx][0]; - // ico[idx][1] = -ico[idx][1]; - // ico[idx][2] = -ico[idx][2]; - // } - - // ico[idx][0] = (ico[idx][0]+ngrid)%ngrid; - // ico[idx][1] = (ico[idx][1]+ngrid)%ngrid; - - // if( vectk[idx][2] < 0 ){ - // vectk[idx][0] = - vectk[idx][0]; - // vectk[idx][1] = - vectk[idx][1]; - // vectk[idx][2] = - vectk[idx][2]; - // } - - // if( vecitk[idx][2] < 0 ){ - // vecitk[idx][0] = -vecitk[idx][0]; - // vecitk[idx][1] = -vecitk[idx][1]; - // vecitk[idx][2] = -vecitk[idx][2]; - // } - //vecitk[idx][0] = (vecitk[idx][0]+ngrid)%ngrid; - //vecitk[idx][1] = (vecitk[idx][1]+ngrid)%ngrid; - //vecitk[idx][2] = (vecitk[idx][2]+ngrid)%ngrid; - - - - //vecitk[idx][0] = (vecitk[idx][0]<0)? vecitk[idx][0]+ngrid : vecitk[idx][0];; - //vecitk[idx][1] = (vecitk[idx][1]<0)? vecitk[idx][1]+ngrid : vecitk[idx][1]; - - - - //ofs2 << kv.x << ", " << kv.y << ", " << kv.z << ", " << vectk[idx].x*(ngrid)/twopi << ", " << vectk[idx].y*(ngrid)/twopi << ", " << vectk[idx].z*(ngrid)/twopi << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; - ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk[idx].x << ", " << vecitk[idx].y << ", " << vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; - ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk[idx].x << ", " << -vecitk[idx].y << ", " << -vecitk[idx].z << ", " << ico[idx][0] << ", " << ico[idx][1] << ", " << ico[idx][2] << std::endl; - - // std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] << " " << vectk[idx][0] << std::endl; - - // std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][0]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][0]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][0] << " " << vectk[idx][0] << std::endl; - //std::cerr << real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] << " " << vectk[idx][1] << std::endl; - // assert( std::fabs(real_t(ico[idx][0])/ngrid * bcc_reciprocal[0][1]+real_t(ico[idx][1])/ngrid * bcc_reciprocal[1][1]+real_t(ico[idx][2])/ngrid * bcc_reciprocal[2][1] - vectk[idx][1] ) < 1e-12 ); - goto endloop; - } - } - } - } - endloop: ; - - //D_xx.kelem(i,j,k) = D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]); - // D_xx.kelem(ico[idx][0],ico[idx][1],ico[idx][2]) = D_xx.kelem(i,j,k); - //D_xx.kelem(i,j,k) = D_xx.kelem(i+vecitk[idx][0],j+vecitk[idx][1],k+vecitk[idx][2]); - } - } - - } - -#endif - - std::ofstream ofs("test_ewald.txt"); - for( size_t i=0; i kv = D_xx.get_k(i,j,k); - ofs << std::setw(16) << kv.norm() / kNyquist - << std::setw(16) << std::real(D_xx.kelem(i,j,k)) - << std::setw(16) << std::real(D_yy.kelem(i,j,k)) - << std::setw(16) << std::real(D_zz.kelem(i,j,k)) - << std::setw(16) << kv[0] - << std::setw(16) << kv[1] - << std::setw(16) << kv[2] - << std::endl; - } - } - } - - - std::string filename("plt_test.hdf5"); - unlink(filename.c_str()); -#if defined(USE_MPI) - MPI_Barrier(MPI_COMM_WORLD); -#endif -// rho.Write_to_HDF5(filename, "rho"); - D_xx.Write_to_HDF5(filename, "omega1"); - D_yy.Write_to_HDF5(filename, "omega2"); - D_zz.Write_to_HDF5(filename, "omega3"); - D_xy.Write_to_HDF5(filename, "e1_x"); - D_xz.Write_to_HDF5(filename, "e1_y"); - D_yz.Write_to_HDF5(filename, "e1_z"); - -} -#endif - } \ No newline at end of file From 0ea91247e2171a9d4fe7f68cf2ab969d85894800 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 1 Dec 2019 14:34:28 +0100 Subject: [PATCH 037/130] working commit: optimizations/cleanup --- include/mat3.hh | 157 ++++++++++++++++++++---------- include/particle_plt.hh | 207 +++++++++++++++++++++------------------- include/vec3.hh | 49 +++++++--- 3 files changed, 254 insertions(+), 159 deletions(-) diff --git a/include/mat3.hh b/include/mat3.hh index 04ac0ac..3c28f13 100644 --- a/include/mat3.hh +++ b/include/mat3.hh @@ -4,87 +4,122 @@ #include template -class mat3s{ +class mat3{ protected: std::array data_; gsl_matrix_view m_; gsl_vector *eval_; gsl_matrix *evec_; gsl_eigen_symmv_workspace * wsp_; + bool bdid_alloc_gsl_; void init_gsl(){ - m_ = gsl_matrix_view_array (&data_[0], 3, 3); - eval_ = gsl_vector_alloc (3); - evec_ = gsl_matrix_alloc (3, 3); - wsp_ = gsl_eigen_symmv_alloc (3); + // allocate memory for GSL operations if we haven't done so yet + if( !bdid_alloc_gsl_ ) + { + m_ = gsl_matrix_view_array (&data_[0], 3, 3); + eval_ = gsl_vector_alloc (3); + evec_ = gsl_matrix_alloc (3, 3); + wsp_ = gsl_eigen_symmv_alloc (3); + bdid_alloc_gsl_ = true; + } } void free_gsl(){ - gsl_eigen_symmv_free (wsp_); - gsl_vector_free (eval_); - gsl_matrix_free (evec_); + // free memory for GSL operations if it was allocated + if( bdid_alloc_gsl_ ) + { + gsl_eigen_symmv_free (wsp_); + gsl_vector_free (eval_); + gsl_matrix_free (evec_); + } } public: - mat3s(){ - this->init_gsl(); - } + mat3() + : bdid_alloc_gsl_(false) + {} //! copy constructor - mat3s( const mat3s &m) - : data_(m.data_){ - this->init_gsl(); - } + mat3( const mat3 &m) + : data_(m.data_), bdid_alloc_gsl_(false) + {} //! move constructor - mat3s( mat3s &&m) - : data_(std::move(m.data_)){ - this->init_gsl(); - } + mat3( mat3 &&m) + : data_(std::move(m.data_)), bdid_alloc_gsl_(false) + {} - //! construct vec3 from initializer list + //! construct mat3 from initializer list template - mat3s(E&&...e) - : data_{{std::forward(e)...}}{ - // resort into symmetrix matrix - data_[8] = data_[5]; - data_[7] = data_[4]; - data_[6] = data_[2]; - data_[5] = data_[4]; - data_[4] = data_[3]; - data_[3] = data_[1]; - this->init_gsl(); - } + mat3(E&&...e) + : data_{{std::forward(e)...}}, bdid_alloc_gsl_(false) + {} - mat3s& operator=(const mat3s& m){ + mat3& operator=(const mat3& m) noexcept{ data_ = m.data_; return *this; } - mat3s& operator=(const mat3s&& m){ + mat3& operator=(const mat3&& m) noexcept{ data_ = std::move(m.data_); return *this; } - - //! bracket index access to vector components - T &operator[](size_t i){ return data_[i];} - - //! const bracket index access to vector components - const T &operator[](size_t i) const { return data_[i]; } - - //! matrix 2d index access - T &operator()(size_t i, size_t j){ return data_[3*i+j]; } - - //! const matrix 2d index access - const T &operator()(size_t i, size_t j) const { return data_[3*i+j]; } //! destructor - ~mat3s(){ + ~mat3(){ this->free_gsl(); } + + //! bracket index access to vector components + T &operator[](size_t i) noexcept { return data_[i];} + + //! const bracket index access to vector components + const T &operator[](size_t i) const noexcept { return data_[i]; } + + //! matrix 2d index access + T &operator()(size_t i, size_t j) noexcept { return data_[3*i+j]; } + + //! const matrix 2d index access + const T &operator()(size_t i, size_t j) const noexcept { return data_[3*i+j]; } + + //! in-place addition + mat3& operator+=( const mat3& rhs ) noexcept{ + for (size_t i = 0; i < 9; ++i) { + (*this)[i] += rhs[i]; + } + return *this; + } + + //! in-place subtraction + mat3& operator-=( const mat3& rhs ) noexcept{ + for (size_t i = 0; i < 9; ++i) { + (*this)[i] -= rhs[i]; + } + return *this; + } + + void zero() noexcept{ + for (size_t i = 0; i < 9; ++i) data_[i]=0; + } + + void eigen( vec3& evals, vec3& evec1, vec3& evec2, vec3& evec3 ) + { + // for( auto x : data_ ){ + // std::cerr << x << " " ; + // } + // std::cerr << std::endl; + // resort into symmetrix matrix + // data_[8] = data_[5]; + // data_[7] = data_[4]; + // data_[6] = data_[2]; + // data_[5] = data_[4]; + // data_[4] = data_[3]; + // data_[3] = data_[1]; + + this->init_gsl(); - void eigen( vec3& evals, vec3& evec1, vec3& evec2, vec3& evec3 ){ gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_); gsl_eigen_symmv_sort (eval_, evec_, GSL_EIGEN_SORT_VAL_ASC); @@ -94,5 +129,31 @@ public: evec2[i] = gsl_matrix_get( evec_, i, 1 ); evec3[i] = gsl_matrix_get( evec_, i, 2 ); } + + // std::cerr << "(" << evals[0] << " " << evals[1] << " " << evals[2] << ")" << std::endl; } -}; \ No newline at end of file +}; + +template +constexpr const mat3 operator+(const mat3 &lhs, const mat3 &rhs) noexcept +{ + mat3 result; + for (size_t i = 0; i < 9; ++i) { + result[i] = lhs[i] + rhs[i]; + } + return result; +} + +// matrix - vector multiplication +template +vec3 operator*( const mat3 &A, const vec3 &v ) noexcept +{ + vec3 result; + for( int mu=0; mu<3; ++mu ){ + result[mu] = 0.0; + for( int nu=0; nu<3; ++nu ){ + result[mu] += A(mu,nu)*v[nu]; + } + } + return result; +} diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 7e7979b..051a6b3 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -31,45 +31,18 @@ private: { constexpr real_t pi = M_PI, twopi = 2.0*M_PI; - const ptrdiff_t nlattice = ngrid_;//16; - const real_t dx = 1.0/real_t(nlattice); - - const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells - const real_t alpha = 1.0/std::sqrt(2)/eta; - const real_t alpha2 = alpha*alpha; - const real_t alpha3 = alpha2*alpha; - const real_t sqrtpi = std::sqrt(M_PI); - const real_t pi32 = std::pow(M_PI,1.5); - - //! just a Kronecker \delta_ij - auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; - - //! short range component of Ewald sum, eq. (A2) of Marcos (2008) - auto greensftide_sr = [&]( int mu, int nu, const vec3& vR, const vec3& vP ) -> real_t { - auto d = vR-vP; - auto r = d.norm(); - if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later! - real_t val{0.0}; - val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r); - val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * - (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r); - return val; - }; - - auto greensftide_sr2 = [&]( int mu, int nu, const vec3& d ) -> real_t { - auto r = d.norm(); - if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later! - real_t val{0.0}; - val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha2*r*r); - val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * - (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r*r)*r); - return val; - }; - const int charge_multiplicity = 2; - const std::vector> bcc_bravais{ - {1.0,0.0,0.0},{0.0,1.0,0.0},{0.5,0.5,0.5} + const mat3 mat_bcc_bravais{ + 1.0, 0.0, 0.5, + 0.0, 1.0, 0.5, + 0.0, 0.0, 0.5, + }; + + const mat3 mat_bcc_reciprocal{ + twopi, 0.0, 0.0, + 0.0, twopi, 0.0, + -twopi, -twopi, 2.0*twopi, }; const std::vector> bcc_reciprocal{ @@ -82,11 +55,55 @@ private: {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} }; - const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_multiplicity; - const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5); - std::vector> x; - std::vector> a(x.size(),{0.0}); + + const size_t nlattice = ngrid_;//16; + const real_t dx = 1.0/real_t(nlattice); + + const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells + const real_t alpha = 1.0/std::sqrt(2)/eta; + const real_t alpha2 = alpha*alpha; + const real_t alpha3 = alpha2*alpha; + const real_t sqrtpi = std::sqrt(M_PI); + const real_t fourpi = 4.0*M_PI; + const real_t pi32 = std::pow(M_PI,1.5); + + const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_multiplicity; + const real_t fft_norm = 1.0/std::pow(real_t(nlattice),3.0); + const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5); + + //! just a Kronecker \delta_ij + auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; + + auto add_greensftide_sr = [&]( mat3& D, const vec3& d ) -> void { + auto r = d.norm(); + if( r< 1e-14 ) return; // return zero for r=0 + + const real_t r2(r*r), r3(r2*r), r5(r3*r2); + const real_t K1( -alpha3/pi32 * std::exp(-alpha2*r2)/r2 ); + const real_t K2( (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r2)*r)/fourpi ); + + for( int mu=0; mu<3; ++mu ){ + for( int nu=mu; nu<3; ++nu ){ + real_t dd( d[mu]*d[nu] * K1 + (kronecker(mu,nu)/r3 - 3.0 * (d[mu]*d[nu])/r5) * K2 ); + D(mu,nu) += dd; + D(nu,mu) += (mu!=nu)? dd : 0.0; + } + } + }; + + auto add_greensftide_lr = [&]( mat3& D, const vec3& k, const vec3& r ) -> void { + real_t kmod2 = k.norm_squared(); + real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm; + for( int mu=0; mu<3; ++mu ){ + for( int nu=mu; nu<3; ++nu ){ + auto dd = k[mu] * k[nu] * term; + D(mu,nu) += dd; + D(nu,mu) += (mu!=nu)? dd : 0.0; + } + } + }; + constexpr ptrdiff_t lnumber = 4, knumber = 4; const int numb = 1; @@ -94,59 +111,57 @@ private: ico_.assign(D_xx_.memsize(),vec3()); vecitk_.assign(D_xx_.memsize(),vec3()); - D_xx_.zero(); - D_xy_.zero(); - D_xz_.zero(); - D_yy_.zero(); - D_yz_.zero(); - D_zz_.zero(); + #pragma omp parallel + { + //... temporary to hold values of the dynamical matrix + mat3 matD(0.0); - #pragma omp parallel for - for( size_t i=0; i cdr( {std::fmod( 2.0+dxp, 1.0 ),std::fmod( 2.0+dyp, 1.0 ),std::fmod( 2.0+dzp, 1.0 )} ); - vec3 ak; + #pragma omp for + for( size_t i=0; i x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)}); + const vec3 ar = (mat_bcc_bravais * x_ijk).wrap_abs(); - for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){ - for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){ - for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){ - vec3 ai = {real_t(ix),real_t(iy),real_t(iz)}; - vec3 dr( cdr ); - dr[0] -= (ai.x*bcc_bravais[0][0]+ai.y*bcc_bravais[1][0]+ai.z*bcc_bravais[2][0]); - dr[1] -= (ai.x*bcc_bravais[0][1]+ai.y*bcc_bravais[1][1]+ai.z*bcc_bravais[2][1]); - dr[2] -= (ai.x*bcc_bravais[0][2]+ai.y*bcc_bravais[1][2]+ai.z*bcc_bravais[2][2]); + //... zero temporary matrix + matD.zero(); - D_xx_.relem(i,j,k) += greensftide_sr2(0,0,dr) * charge; - D_xy_.relem(i,j,k) += greensftide_sr2(0,1,dr) * charge; - D_xz_.relem(i,j,k) += greensftide_sr2(0,2,dr) * charge; - D_yy_.relem(i,j,k) += greensftide_sr2(1,1,dr) * charge; - D_yz_.relem(i,j,k) += greensftide_sr2(1,2,dr) * charge; - D_zz_.relem(i,j,k) += greensftide_sr2(2,2,dr) * charge; - - vec3 bk = {real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice}; - if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){ - ak.x = bk.x*bcc_reciprocal[0][0]+bk.y*bcc_reciprocal[1][0]+bk.z*bcc_reciprocal[2][0]; - ak.y = bk.x*bcc_reciprocal[0][1]+bk.y*bcc_reciprocal[1][1]+bk.z*bcc_reciprocal[2][1]; - ak.z = bk.x*bcc_reciprocal[0][2]+bk.y*bcc_reciprocal[1][2]+bk.z*bcc_reciprocal[2][2]; - real_t amodk2 = ak.norm_squared(); - real_t term = charge*std::exp(-amodk2/(4*alpha*alpha))*std::cos(ak.dot(cdr)) / amodk2 / std::pow(nlattice,3); - D_xx_.relem(i,j,k) += ak.x*ak.x*term; - D_xy_.relem(i,j,k) += ak.x*ak.y*term; - D_xz_.relem(i,j,k) += ak.x*ak.z*term; - D_yy_.relem(i,j,k) += ak.y*ak.y*term; - D_yz_.relem(i,j,k) += ak.y*ak.z*term; - D_zz_.relem(i,j,k) += ak.z*ak.z*term; + // add real-space part of dynamical matrix, periodic copies + for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){ + for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){ + for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){ + const vec3 n_ijk({real_t(ix),real_t(iy),real_t(iz)}); + const vec3 dr(ar - mat_bcc_bravais * n_ijk); + add_greensftide_sr(matD, dr); } } } - } + + // add k-space part of dynamical matrix + for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){ + for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){ + for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){ + if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){ + const vec3 k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice}); + const vec3 ak( mat_bcc_reciprocal * k_ijk); + + add_greensftide_lr(matD, ak, ar ); + } + } + } + } + + D_xx_.relem(i,j,k) = matD(0,0) * charge; + D_xy_.relem(i,j,k) = matD(0,1) * charge; + D_xz_.relem(i,j,k) = matD(0,2) * charge; + D_yy_.relem(i,j,k) = matD(1,1) * charge; + D_yz_.relem(i,j,k) = matD(1,2) * charge; + D_zz_.relem(i,j,k) = matD(2,2) * charge; + } } } - } + } // end omp parallel region // fix r=0 with background density (added later in Fourier space) D_xx_.relem(0,0,0) = 1.0/3.0; @@ -177,7 +192,7 @@ private: #pragma omp parallel { // thread private matrix representation - mat3s D; + mat3 D; vec3 eval, evec1, evec2, evec3; #pragma omp for @@ -191,13 +206,13 @@ private: const real_t kmod = kv.norm()/mapratio_/boxlen_; // put matrix elements into actual matrix - D = { std::real(D_xx_.kelem(i,j,k))/fft_norm12, - std::real(D_xy_.kelem(i,j,k))/fft_norm12, - std::real(D_xz_.kelem(i,j,k))/fft_norm12, - std::real(D_yy_.kelem(i,j,k))/fft_norm12, - std::real(D_yz_.kelem(i,j,k))/fft_norm12, - std::real(D_zz_.kelem(i,j,k))/fft_norm12 }; - + D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12; + D(0,1) = D(1,0) = std::real(D_xy_.kelem(i,j,k)) / fft_norm12; + D(0,2) = D(2,0) = std::real(D_xz_.kelem(i,j,k)) / fft_norm12; + D(1,1) = std::real(D_yy_.kelem(i,j,k)) / fft_norm12; + D(1,2) = D(2,1) = std::real(D_yz_.kelem(i,j,k)) / fft_norm12; + D(2,2) = std::real(D_zz_.kelem(i,j,k)) / fft_norm12; + // compute eigenstructure of matrix D.eigen(eval, evec1, evec2, evec3); @@ -395,7 +410,7 @@ private: #pragma omp parallel { // thread private matrix representation - mat3s D; + mat3 D; vec3 eval, evec1, evec2, evec3; #pragma omp for @@ -523,7 +538,7 @@ private: public: // real_t boxlen, size_t ngridother - explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=16 ) + explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=32 ) : boxlen_( the_config.GetValue("setup", "BoxLength") ), ngmapto_( the_config.GetValue("setup", "GridRes") ), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), diff --git a/include/vec3.hh b/include/vec3.hh index b6550ae..af40bf3 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -25,6 +25,10 @@ public: //! copy constructor vec3( const vec3 &v) : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){} + + //! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference + vec3( vec3& v) + : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){} //! move constructor vec3( vec3 &&v) @@ -33,52 +37,67 @@ public: //! construct vec3 from initializer list template vec3(E&&...e) - : data_{{std::forward(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){} + : data_{{std::forward(e)...}}, x{data_[0]}, y{data_[1]}, z{data_[2]} + {} + // vec3( T a, T b, T c ) + // : data_{{a,b,c}}, x(data_[0]), y(data_[1]), z(data_[2]){} //! bracket index access to vector components - T &operator[](size_t i){ return data_[i];} + T &operator[](size_t i) noexcept{ return data_[i];} //! const bracket index access to vector components - const T &operator[](size_t i) const { return data_[i]; } + const T &operator[](size_t i) const noexcept { return data_[i]; } // assignment operator - vec3& operator=( const vec3& v ) { data_=v.data_; return *this; } + vec3& operator=( const vec3& v ) noexcept { data_=v.data_; return *this; } // assignment operator - const vec3& operator=( const vec3& v ) const { data_=v.data_; return *this; } + const vec3& operator=( const vec3& v ) const noexcept{ data_=v.data_; return *this; } //! implementation of summation of vec3 - vec3 operator+( const vec3& v ) const{ return vec3({x+v.x,y+v.y,z+v.z}); } + vec3 operator+( const vec3& v ) const noexcept{ return vec3({x+v.x,y+v.y,z+v.z}); } //! implementation of difference of vec3 - vec3 operator-( const vec3& v ) const{ return vec3({x-v.x,y-v.y,z-v.z}); } + vec3 operator-( const vec3& v ) const noexcept{ return vec3({x-v.x,y-v.y,z-v.z}); } //! implementation of scalar multiplication - vec3 operator*( T s ) const{ return vec3({x*s,y*s,z*s}); } + vec3 operator*( T s ) const noexcept{ return vec3({x*s,y*s,z*s}); } //! implementation of scalar division - vec3 operator/( T s ) const{ return vec3({x/s,y/s,z/s}); } + vec3 operator/( T s ) const noexcept{ return vec3({x/s,y/s,z/s}); } //! implementation of += operator - vec3& operator+=( const vec3& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; } + vec3& operator+=( const vec3& v ) const noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; } //! implementation of -= operator - vec3& operator-=( const vec3& v ) const{ x-=v.x; y-=v.y; z-=v.z; return *this; } + vec3& operator-=( const vec3& v ) const noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; } //! multiply with scalar - vec3& operator*=( T s ) const{ x*=s; y*=s; z*=s; return *this; } + vec3& operator*=( T s ) const noexcept{ x*=s; y*=s; z*=s; return *this; } //! compute dot product with another vector - T dot(const vec3 &a) const + T dot(const vec3 &a) const noexcept { return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2]; } //! returns 2-norm squared of vector - T norm_squared(void) const { return this->dot(*this); } + T norm_squared(void) const noexcept { return this->dot(*this); } //! returns 2-norm of vector - T norm(void) const { return std::sqrt( this->norm_squared() ); } + T norm(void) const noexcept { return std::sqrt( this->norm_squared() ); } + + //! wrap absolute vector to box of size p + vec3& wrap_abs( T p = 1.0 ) noexcept{ + for( auto& x : data_ ) x = std::fmod( 2*p + x, p ); + return *this; + } + + //! wrap relative vector to box of size p + vec3& wrap_rel( T p = 1.0 ) noexcept{ + for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x; + return *this; + } }; //! multiplication with scalar From a71795cbb3859f30269180a49ddeceec5e6c855f Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 1 Dec 2019 15:04:25 +0100 Subject: [PATCH 038/130] added other lattice types to dynamical matrix calculation --- include/particle_plt.hh | 116 ++++++++++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 39 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 051a6b3..934f544 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -29,34 +29,82 @@ private: void init_D() { - constexpr real_t pi = M_PI, twopi = 2.0*M_PI; + constexpr real_t pi = M_PI; + constexpr real_t twopi = 2.0*M_PI; + constexpr real_t fourpi = 4.0*M_PI; + constexpr real_t sqrtpi = std::sqrt(M_PI); + constexpr real_t pi32 = std::pow(M_PI,1.5); const int charge_multiplicity = 2; - const mat3 mat_bcc_bravais{ + //! === vectors, reciprocals and normals for the SC lattice === + const int charge_fac_sc = 1; + const mat3 mat_bravais_sc{ + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, + }; + const mat3 mat_reciprocal_sc{ + twopi, 0.0, 0.0, + 0.0, twopi, 0.0, + 0.0, 0.0, twopi, + }; + const std::vector> normals_sc{ + {pi,0.,0.},{-pi,0.,0.}, + {0.,pi,0.},{0.,-pi,0.}, + {0.,0.,pi},{0.,0.,-pi}, + }; + + + //! === vectors, reciprocals and normals for the BCC lattice === + const int charge_fac_bcc = 2; + const mat3 mat_bravais_bcc{ 1.0, 0.0, 0.5, 0.0, 1.0, 0.5, 0.0, 0.0, 0.5, }; - - const mat3 mat_bcc_reciprocal{ + const mat3 mat_reciprocal_bcc{ twopi, 0.0, 0.0, 0.0, twopi, 0.0, - -twopi, -twopi, 2.0*twopi, + -twopi, -twopi, fourpi, }; - - const std::vector> bcc_reciprocal{ - {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi} - }; - - const std::vector> bcc_normals{ + const std::vector> normals_bcc{ {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} }; - + //! === vectors, reciprocals and normals for the FCC lattice === + const int charge_fac_fcc = 4; + const mat3 mat_bravais_fcc{ + 0.0, 0.5, 0.0, + 0.5, 0.0, 1.0, + 0.5, 0.5, 0.0, + }; + const mat3 mat_reciprocal_fcc{ + -fourpi, fourpi, twopi, + 0.0, 0.0, twopi, + fourpi, 0.0, -twopi, + }; + const std::vector> normals_fcc{ + {twopi,0.,0.},{-twopi,0.,0.}, + {0.,twopi,0.},{0.,-twopi,0.}, + {0.,0.,twopi},{0.,0.,-twopi}, + {+pi,+pi,+pi},{+pi,+pi,-pi}, + {+pi,-pi,+pi},{+pi,-pi,-pi}, + {-pi,+pi,+pi},{-pi,+pi,-pi}, + {-pi,-pi,+pi},{-pi,-pi,-pi}, + }; + + //! select the properties for the chosen lattice + const int ilat = 2; // 0 = sc, 1 = bcc, 2 = fcc + + const auto mat_bravais = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc; + const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc; + const auto normals = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc; + const auto charge_fac = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc; + const size_t nlattice = ngrid_;//16; const real_t dx = 1.0/real_t(nlattice); @@ -64,11 +112,8 @@ private: const real_t alpha = 1.0/std::sqrt(2)/eta; const real_t alpha2 = alpha*alpha; const real_t alpha3 = alpha2*alpha; - const real_t sqrtpi = std::sqrt(M_PI); - const real_t fourpi = 4.0*M_PI; - const real_t pi32 = std::pow(M_PI,1.5); - const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_multiplicity; + const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_fac; const real_t fft_norm = 1.0/std::pow(real_t(nlattice),3.0); const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5); @@ -122,7 +167,7 @@ private: for( size_t k=0; k x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)}); - const vec3 ar = (mat_bcc_bravais * x_ijk).wrap_abs(); + const vec3 ar = (mat_bravais * x_ijk).wrap_abs(); //... zero temporary matrix matD.zero(); @@ -132,7 +177,7 @@ private: for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){ for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){ const vec3 n_ijk({real_t(ix),real_t(iy),real_t(iz)}); - const vec3 dr(ar - mat_bcc_bravais * n_ijk); + const vec3 dr(ar - mat_bravais * n_ijk); add_greensftide_sr(matD, dr); } } @@ -144,7 +189,7 @@ private: for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){ if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){ const vec3 k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice}); - const vec3 ak( mat_bcc_reciprocal * k_ijk); + const vec3 ak( mat_reciprocal * k_ijk); add_greensftide_lr(matD, ak, ar ); } @@ -230,15 +275,7 @@ private: auto idx = D_xx_.get_idx(i,j,k); vec3 ar = D_xx_.get_k(i,j,k) / (twopi*ngrid_); - // vec3 kv = D_xx_.get_k(i,j,k); - - for( int l=0; l<3; l++ ){ - a[l] = 0.0; - for( int m=0; m<3; m++){ - // project k on reciprocal basis - a[l] += ar[m]*bcc_reciprocal[m][l]; - } - } + a = mat_reciprocal * ar; // translate the k-vectors into the "candidate" FBZ vec3 anum; @@ -249,22 +286,23 @@ private: for( int l3=-numb; l3<=numb; ++l3 ){ anum[2] = real_t(l3); - vectk_[idx] = a; + // vectk_[idx] = a; + vectk_[idx] = a + mat_reciprocal * anum; - for( int l=0; l<3; l++ ){ - for( int m=0; m<3; m++){ - // project k on reciprocal basis - vectk_[idx][l] += anum[m]*bcc_reciprocal[m][l]; - } - } + // for( int l=0; l<3; l++ ){ + // for( int m=0; m<3; m++){ + // // project k on reciprocal basis + // vectk_[idx][l] += anum[m]*bcc_reciprocal[m][l]; + // } + // } // check if in first Brillouin zone bool btest=true; - for( size_t l=0; l amod*1.0001 ){ btest=false; break; } @@ -308,7 +346,7 @@ private: { constexpr real_t pi = M_PI, twopi = 2.0*M_PI; - const std::vector> bcc_normals{ + const std::vector> normals_bcc{ {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} From 1d10f5194157c7eb7e5cd9fb3baa2d204bbbf674 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 1 Dec 2019 15:40:24 +0100 Subject: [PATCH 039/130] more minor cleanup --- include/particle_plt.hh | 81 ++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 49 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 934f544..0bfe52f 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -29,13 +29,11 @@ private: void init_D() { - constexpr real_t pi = M_PI; - constexpr real_t twopi = 2.0*M_PI; + constexpr real_t pi = M_PI; + constexpr real_t twopi = 2.0*M_PI; constexpr real_t fourpi = 4.0*M_PI; - constexpr real_t sqrtpi = std::sqrt(M_PI); - constexpr real_t pi32 = std::pow(M_PI,1.5); - - const int charge_multiplicity = 2; + const real_t sqrtpi = std::sqrt(M_PI); + const real_t pi32 = std::pow(M_PI,1.5); //! === vectors, reciprocals and normals for the SC lattice === const int charge_fac_sc = 1; @@ -98,14 +96,14 @@ private: }; //! select the properties for the chosen lattice - const int ilat = 2; // 0 = sc, 1 = bcc, 2 = fcc + const int ilat = 1; // 0 = sc, 1 = bcc, 2 = fcc const auto mat_bravais = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc; const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc; const auto normals = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc; const auto charge_fac = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc; - const size_t nlattice = ngrid_;//16; + const size_t nlattice = ngrid_; const real_t dx = 1.0/real_t(nlattice); const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells @@ -120,6 +118,7 @@ private: //! just a Kronecker \delta_ij auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; + //! Ewald summation: short-range Green's function auto add_greensftide_sr = [&]( mat3& D, const vec3& d ) -> void { auto r = d.norm(); if( r< 1e-14 ) return; // return zero for r=0 @@ -137,6 +136,7 @@ private: } }; + //! Ewald summation: long-range Green's function auto add_greensftide_lr = [&]( mat3& D, const vec3& k, const vec3& r ) -> void { real_t kmod2 = k.norm_squared(); real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm; @@ -148,8 +148,20 @@ private: } } }; + + //! checks if 'vec' is in the FBZ with FBZ normal vectors given in 'normals' + auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool { + bool btest = true; + for( const auto& n : normals ){ + if( n.dot( vec ) > 1.01 * n.dot(n) ){ + btest = false; + break; + } + } + return btest; + }; - constexpr ptrdiff_t lnumber = 4, knumber = 4; + constexpr ptrdiff_t lnumber = 3, knumber = 3; const int numb = 1; vectk_.assign(D_xx_.memsize(),vec3()); @@ -247,8 +259,9 @@ private: { for( size_t k=0; k kv = D_xx_.get_k(i,j,k); - const real_t kmod = kv.norm()/mapratio_/boxlen_; + // const real_t kmod = kv.norm()/mapratio_/boxlen_; // put matrix elements into actual matrix D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12; @@ -269,46 +282,18 @@ private: D_xz_.kelem(i,j,k) = evec3[1]; D_yz_.kelem(i,j,k) = evec3[2]; - - vec3 a({0.,0.,0.}); - - auto idx = D_xx_.get_idx(i,j,k); - - vec3 ar = D_xx_.get_k(i,j,k) / (twopi*ngrid_); - a = mat_reciprocal * ar; + + vec3 ar = kv / (twopi*ngrid_); + vec3 a(mat_reciprocal * ar); // translate the k-vectors into the "candidate" FBZ - vec3 anum; for( int l1=-numb; l1<=numb; ++l1 ){ - anum[0] = real_t(l1); for( int l2=-numb; l2<=numb; ++l2 ){ - anum[1] = real_t(l2); for( int l3=-numb; l3<=numb; ++l3 ){ - anum[2] = real_t(l3); - // vectk_[idx] = a; - vectk_[idx] = a + mat_reciprocal * anum; + vectk_[idx] = a + mat_reciprocal * vec3({real_t(l1),real_t(l2),real_t(l3)}); - // for( int l=0; l<3; l++ ){ - // for( int m=0; m<3; m++){ - // // project k on reciprocal basis - // vectk_[idx][l] += anum[m]*bcc_reciprocal[m][l]; - // } - // } - // check if in first Brillouin zone - bool btest=true; - for( size_t l=0; l amod*1.0001 ){ btest=false; break; } - if( scalar > 1.01 * amod2 ){ btest=false; break; } - } - if( btest ){ + if( check_FBZ( normals, vectk_[idx]) ){ vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi); vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi); @@ -318,10 +303,10 @@ private: ico_[idx][1] = std::round((ar[1]+l2) * ngrid_); ico_[idx][2] = std::round((ar[2]+l3) * ngrid_); - ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; - // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << vecitk_[idx].x << ", " << vecitk_[idx].y << ", " << vecitk_[idx].z << ", " << ico_[idx][0] << ", " << ico_[idx][1] << ", " << ico_[idx][2] << std::endl; - // ofs2 << kv.x/twopi << ", " << kv.y/twopi << ", " << kv.z/twopi << ", " << -vecitk_[idx].x << ", " << -vecitk_[idx].y << ", " << -vecitk_[idx].z << ", " << ico_[idx][0] << ", " << ico_[idx][1] << ", " << ico_[idx][2] << std::endl; - + #pragma omp critical + { + ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; + } goto endloop; } } @@ -332,8 +317,6 @@ private: } } - - D_xx_.Write_to_HDF5("debug.hdf5","mu1"); D_xy_.Write_to_HDF5("debug.hdf5","mu2"); D_xz_.Write_to_HDF5("debug.hdf5","mu3"); From 0de486f5525d3111ec6b1327fe6e0396f93e798d Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 1 Dec 2019 18:52:53 +0100 Subject: [PATCH 040/130] added gridding back of dynamical matrix to ordinary Fourier space --- include/particle_plt.hh | 202 +++++++++++++++++++++++++++++++--------- include/vec3.hh | 5 +- 2 files changed, 162 insertions(+), 45 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 0bfe52f..92dea42 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -23,6 +23,7 @@ private: const size_t ngmapto_, ngrid_, ngrid32_; const real_t mapratio_; Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; + Grid_FFT mu1; Grid_FFT grad_x_, grad_y_, grad_z_; std::vector> vectk_; std::vector> ico_, vecitk_; @@ -103,7 +104,7 @@ private: const auto normals = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc; const auto charge_fac = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc; - const size_t nlattice = ngrid_; + const ptrdiff_t nlattice = ngrid_; const real_t dx = 1.0/real_t(nlattice); const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells @@ -153,7 +154,7 @@ private: auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool { bool btest = true; for( const auto& n : normals ){ - if( n.dot( vec ) > 1.01 * n.dot(n) ){ + if( n.dot( vec ) > 1.0001 * n.dot(n) ){ btest = false; break; } @@ -162,7 +163,7 @@ private: }; constexpr ptrdiff_t lnumber = 3, knumber = 3; - const int numb = 1; + const int numb = 1, numb2 = 2; vectk_.assign(D_xx_.memsize(),vec3()); ico_.assign(D_xx_.memsize(),vec3()); @@ -234,6 +235,7 @@ private: D_yy_.FourierTransformForward(); D_yz_.FourierTransformForward(); D_zz_.FourierTransformForward(); + mu1.FourierTransformForward(false); if (CONFIG::MPI_task_rank == 0) unlink("debug.hdf5"); @@ -274,50 +276,160 @@ private: // compute eigenstructure of matrix D.eigen(eval, evec1, evec2, evec3); - D_xx_.kelem(i,j,k) = eval[2]; - D_yy_.kelem(i,j,k) = eval[1]; - D_zz_.kelem(i,j,k) = eval[0]; - - D_xy_.kelem(i,j,k) = evec3[0]; - D_xz_.kelem(i,j,k) = evec3[1]; - D_yz_.kelem(i,j,k) = evec3[2]; - + // now determine to which modes on the regular lattice this contributes + vec3 ar1 = kv / (twopi*ngrid_); + vec3 ar2 = -kv / (twopi*ngrid_); - vec3 ar = kv / (twopi*ngrid_); - vec3 a(mat_reciprocal * ar); + vec3 a1(mat_reciprocal * ar1); + vec3 a2(mat_reciprocal * ar2); // translate the k-vectors into the "candidate" FBZ for( int l1=-numb; l1<=numb; ++l1 ){ for( int l2=-numb; l2<=numb; ++l2 ){ for( int l3=-numb; l3<=numb; ++l3 ){ - vectk_[idx] = a + mat_reciprocal * vec3({real_t(l1),real_t(l2),real_t(l3)}); + vectk_[idx] = a1 + mat_reciprocal * vec3({real_t(l1),real_t(l2),real_t(l3)}); if( check_FBZ( normals, vectk_[idx]) ){ - vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi); - vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi); - vecitk_[idx][2] = std::round(vectk_[idx][2]*(ngrid_)/twopi); + int ix = std::round(vectk_[idx][0]*(ngrid_)/twopi); + int iy = std::round(vectk_[idx][1]*(ngrid_)/twopi); + int iz = std::round(vectk_[idx][2]*(ngrid_)/twopi); - ico_[idx][0] = std::round((ar[0]+l1) * ngrid_); - ico_[idx][1] = std::round((ar[1]+l2) * ngrid_); - ico_[idx][2] = std::round((ar[2]+l3) * ngrid_); + // for( int k1=-numb2; k1<=numb2; ++k1 ){ + // for( int k2=-numb2; k2<=numb2; ++k2 ){ + // for( int k3=-numb2; k3<=numb2; ++k3 ){ + {{{ int k1=0,k2=0,k3=0; - #pragma omp critical - { - ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; + auto d = mat_reciprocal * vec3({real_t(k1),real_t(k2),real_t(k3)}) / twopi * ngrid_; + int iix = ix;// + std::round(d.x); + int iiy = iy;// + std::round(d.y); + int iiz = iz;// + std::round(d.z); + + if( iix >= -nlattice/2 && iiy >= -nlattice/2 && iiz >= 0 && + // if( iix >= 0 && iiy >= 0 && iiz >= 0 && + iix < nlattice/2 && iiy < nlattice/2 && iiz <= nlattice/2){ + iix = (iix<0)? iix+nlattice : iix; + iiy = (iiy<0)? iiy+nlattice : iiy; + iiz = (iiz<0)? iiz+nlattice : iiz; + mu1.kelem(iix,iiy,iiz) = eval[2]; + } + } + } } - goto endloop; + } + + vectk_[idx] = a2 + mat_reciprocal * vec3({real_t(l1),real_t(l2),real_t(l3)}); + + if( check_FBZ( normals, vectk_[idx]) ){ + + int ix = std::round(vectk_[idx][0]*(ngrid_)/twopi); + int iy = std::round(vectk_[idx][1]*(ngrid_)/twopi); + int iz = std::round(vectk_[idx][2]*(ngrid_)/twopi); + + // for( int k1=-numb; k1<=numb2; ++k1 ){ + // for( int k2=-numb; k2<=numb2; ++k2 ){ + // for( int k3=-numb; k3<=numb2; ++k3 ){ + {{{ int k1=0,k2=0,k3=0; + + auto d = mat_reciprocal * vec3({real_t(k1),real_t(k2),real_t(k3)}) / twopi * ngrid_; + int iix = ix;// + std::round(d.x); + int iiy = iy;// + std::round(d.y); + int iiz = iz;// + std::round(d.z); + + + if( iix >= -nlattice/2 && iiy >= -nlattice/2 && iiz >= 0 && + // if( iix >= 0 && iiy >= 0 && iiz >= 0 && + iix < nlattice/2 && iiy < nlattice/2 && iiz <= nlattice/2){ + iix = (iix<0)? iix+nlattice : iix; + iiy = (iiy<0)? iiy+nlattice : iiy; + iiz = (iiz<0)? iiz+nlattice : iiz; + mu1.kelem(iix,iiy,iiz) = eval[2]; + } + } + } + } + } } } - } endloop: ; + } + + endloop: ; + } } } + + // #pragma omp for + // for( size_t i=0; i kv = D_xx_.get_k(i,j,k); + // // const real_t kmod = kv.norm()/mapratio_/boxlen_; + + // // put matrix elements into actual matrix + // D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12; + // D(0,1) = D(1,0) = std::real(D_xy_.kelem(i,j,k)) / fft_norm12; + // D(0,2) = D(2,0) = std::real(D_xz_.kelem(i,j,k)) / fft_norm12; + // D(1,1) = std::real(D_yy_.kelem(i,j,k)) / fft_norm12; + // D(1,2) = D(2,1) = std::real(D_yz_.kelem(i,j,k)) / fft_norm12; + // D(2,2) = std::real(D_zz_.kelem(i,j,k)) / fft_norm12; + + // // compute eigenstructure of matrix + // D.eigen(eval, evec1, evec2, evec3); + + // D_xx_.kelem(i,j,k) = eval[2]; + // D_yy_.kelem(i,j,k) = eval[1]; + // D_zz_.kelem(i,j,k) = eval[0]; + + // D_xy_.kelem(i,j,k) = evec3[0]; + // D_xz_.kelem(i,j,k) = evec3[1]; + // D_yz_.kelem(i,j,k) = evec3[2]; + + + // vec3 ar = kv / (twopi*ngrid_); + // vec3 a(mat_reciprocal * ar); + + // // translate the k-vectors into the "candidate" FBZ + // for( int l1=-numb; l1<=numb; ++l1 ){ + // for( int l2=-numb; l2<=numb; ++l2 ){ + // for( int l3=-numb; l3<=numb; ++l3 ){ + + // vectk_[idx] = a + mat_reciprocal * vec3({real_t(l1),real_t(l2),real_t(l3)}); + + // if( check_FBZ( normals, vectk_[idx]) ){ + + // vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi); + // vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi); + // vecitk_[idx][2] = std::round(vectk_[idx][2]*(ngrid_)/twopi); + + // ico_[idx][0] = std::round((ar[0]+l1) * ngrid_); + // ico_[idx][1] = std::round((ar[1]+l2) * ngrid_); + // ico_[idx][2] = std::round((ar[2]+l3) * ngrid_); + + // #pragma omp critical + // { + // //ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; + // ofs2 << vecitk_[idx][0] << " " << vecitk_[idx][1] << " " << vecitk_[idx][2] << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; + + // } + // //goto endloop; + // } + // } + // } + // } endloop: ; + // } + // } + // } } - D_xx_.Write_to_HDF5("debug.hdf5","mu1"); + mu1.kelem(0,0,0) = 1.0; + mu1.Write_to_HDF5("debug.hdf5","mu1"); D_xy_.Write_to_HDF5("debug.hdf5","mu2"); D_xz_.Write_to_HDF5("debug.hdf5","mu3"); D_yy_.Write_to_HDF5("debug.hdf5","e1x"); @@ -455,23 +567,23 @@ private: phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0 - const int nn = 3; - size_t nsum = 0; - ccomplex_t ff = 0.0; - for( int is=-nn;is<=nn;is++){ - for( int js=-nn;js<=nn;js++){ - for( int ks=-nn;ks<=nn;ks++){ - if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){ - ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2])))); - ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2])))); - ++nsum; - } - } - } - } - ff /= nsum; + // const int nn = 3; + // size_t nsum = 0; + // ccomplex_t ff = 0.0; + // for( int is=-nn;is<=nn;is++){ + // for( int js=-nn;js<=nn;js++){ + // for( int ks=-nn;ks<=nn;ks++){ + // if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){ + // ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2])))); + // ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2])))); + // ++nsum; + // } + // } + // } + // } + // ff /= nsum; // ccomplex_t ff = 1.0; - // ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])))); + ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])))); // assemble short-range + long_range of Ewald sum and add DC component to trace D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0; D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac); @@ -559,7 +671,7 @@ private: public: // real_t boxlen, size_t ngridother - explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=32 ) + explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 ) : boxlen_( the_config.GetValue("setup", "BoxLength") ), ngmapto_( the_config.GetValue("setup", "GridRes") ), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), @@ -567,7 +679,8 @@ public: D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), - grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}) + grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + mu1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}) { csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; std::string lattice_str = the_config.GetValueSafe("setup","ParticleLoad","sc"); @@ -596,6 +709,7 @@ public: csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; init_D(); + // init_D__old(); csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; } diff --git a/include/vec3.hh b/include/vec3.hh index af40bf3..af2bb9a 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -29,7 +29,7 @@ public: //! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference vec3( vec3& v) : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){} - + //! move constructor vec3( vec3 &&v) : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){} @@ -60,6 +60,9 @@ public: //! implementation of difference of vec3 vec3 operator-( const vec3& v ) const noexcept{ return vec3({x-v.x,y-v.y,z-v.z}); } + //! implementation of unary negative + vec3 operator-() const noexcept{ return vec3({-x,-y,-z}); } + //! implementation of scalar multiplication vec3 operator*( T s ) const noexcept{ return vec3({x*s,y*s,z*s}); } From 06fa3c128ec407843ef88b38954e85ab6b916293 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 1 Dec 2019 20:10:58 +0100 Subject: [PATCH 041/130] added interpolation to approximate infinite lattice --- include/particle_plt.hh | 108 ++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 66 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 92dea42..9c97694 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -62,6 +62,7 @@ private: 0.0, 1.0, 0.5, 0.0, 0.0, 0.5, }; + const mat3 mat_reciprocal_bcc{ twopi, 0.0, 0.0, 0.0, twopi, 0.0, @@ -97,7 +98,7 @@ private: }; //! select the properties for the chosen lattice - const int ilat = 1; // 0 = sc, 1 = bcc, 2 = fcc + const int ilat = 2; // 0 = sc, 1 = bcc, 2 = fcc const auto mat_bravais = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc; const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc; @@ -355,80 +356,55 @@ private: } } + endloop: ; } } } - // #pragma omp for - // for( size_t i=0; i kv = D_xx_.get_k(i,j,k); - // // const real_t kmod = kv.norm()/mapratio_/boxlen_; + mu1.kelem(0,0,0) = 1.0; - // // put matrix elements into actual matrix - // D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12; - // D(0,1) = D(1,0) = std::real(D_xy_.kelem(i,j,k)) / fft_norm12; - // D(0,2) = D(2,0) = std::real(D_xz_.kelem(i,j,k)) / fft_norm12; - // D(1,1) = std::real(D_yy_.kelem(i,j,k)) / fft_norm12; - // D(1,2) = D(2,1) = std::real(D_yz_.kelem(i,j,k)) / fft_norm12; - // D(2,2) = std::real(D_zz_.kelem(i,j,k)) / fft_norm12; - - // // compute eigenstructure of matrix - // D.eigen(eval, evec1, evec2, evec3); - - // D_xx_.kelem(i,j,k) = eval[2]; - // D_yy_.kelem(i,j,k) = eval[1]; - // D_zz_.kelem(i,j,k) = eval[0]; - - // D_xy_.kelem(i,j,k) = evec3[0]; - // D_xz_.kelem(i,j,k) = evec3[1]; - // D_yz_.kelem(i,j,k) = evec3[2]; - - - // vec3 ar = kv / (twopi*ngrid_); - // vec3 a(mat_reciprocal * ar); - - // // translate the k-vectors into the "candidate" FBZ - // for( int l1=-numb; l1<=numb; ++l1 ){ - // for( int l2=-numb; l2<=numb; ++l2 ){ - // for( int l3=-numb; l3<=numb; ++l3 ){ - - // vectk_[idx] = a + mat_reciprocal * vec3({real_t(l1),real_t(l2),real_t(l3)}); - - // if( check_FBZ( normals, vectk_[idx]) ){ - - // vecitk_[idx][0] = std::round(vectk_[idx][0]*(ngrid_)/twopi); - // vecitk_[idx][1] = std::round(vectk_[idx][1]*(ngrid_)/twopi); - // vecitk_[idx][2] = std::round(vectk_[idx][2]*(ngrid_)/twopi); - - // ico_[idx][0] = std::round((ar[0]+l1) * ngrid_); - // ico_[idx][1] = std::round((ar[1]+l2) * ngrid_); - // ico_[idx][2] = std::round((ar[2]+l3) * ngrid_); - - // #pragma omp critical - // { - // //ofs2 << vectk_[idx].norm() << " " << kv.norm() << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; - // ofs2 << vecitk_[idx][0] << " " << vecitk_[idx][1] << " " << vecitk_[idx][2] << " " << std::real(D_xx_.kelem(i,j,k)) << " " << std::real(D_yy_.kelem(i,j,k)) << " " << std::real(D_zz_.kelem(i,j,k)) << std::endl; - - // } - // //goto endloop; - // } - // } - // } - // } endloop: ; - // } - // } - // } + //... approximate infinite lattice by inerpolating to sites not convered by current resolution... + if( ilat==1 ){ + for( size_t i=0; i Date: Mon, 2 Dec 2019 01:04:03 +0100 Subject: [PATCH 042/130] added interpolation of all fields, plt seems to work for all lattices, projection needs testing still --- include/mat3.hh | 12 ++ include/particle_plt.hh | 383 +++++++++++++++++++++++----------------- 2 files changed, 235 insertions(+), 160 deletions(-) diff --git a/include/mat3.hh b/include/mat3.hh index 3c28f13..ac23069 100644 --- a/include/mat3.hh +++ b/include/mat3.hh @@ -157,3 +157,15 @@ vec3 operator*( const mat3 &A, const vec3 &v ) noexcept } return result; } + +// template +// vec3 operator*( const vec3 &v, const mat3 &A ) noexcept +// { +// vec3 result = 0.0; +// for( int mu=0; mu<3; ++mu ){ +// for( int nu=0; nu<3; ++nu ){ +// result[nu] += v[mu]*A(mu,nu); +// } +// } +// return result; +// } diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 9c97694..e48e356 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -23,12 +23,11 @@ private: const size_t ngmapto_, ngrid_, ngrid32_; const real_t mapratio_; Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; - Grid_FFT mu1; Grid_FFT grad_x_, grad_y_, grad_z_; std::vector> vectk_; std::vector> ico_, vecitk_; - void init_D() + void init_D( lattice lattice_type ) { constexpr real_t pi = M_PI; constexpr real_t twopi = 2.0*M_PI; @@ -62,7 +61,6 @@ private: 0.0, 1.0, 0.5, 0.0, 0.0, 0.5, }; - const mat3 mat_reciprocal_bcc{ twopi, 0.0, 0.0, 0.0, twopi, 0.0, @@ -98,17 +96,17 @@ private: }; //! select the properties for the chosen lattice - const int ilat = 2; // 0 = sc, 1 = bcc, 2 = fcc + const int ilat = lattice_type; // 0 = sc, 1 = bcc, 2 = fcc - const auto mat_bravais = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc; - const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc; - const auto normals = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc; - const auto charge_fac = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc; + const auto mat_bravais = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc; + const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc; + const auto normals = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc; + const auto charge_fac = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc; const ptrdiff_t nlattice = ngrid_; const real_t dx = 1.0/real_t(nlattice); - const real_t eta = 4.0;//nlattice;//4.0; //2.0/ngrid_; // Ewald cutoff shall be 2 cells + const real_t eta = 4.0; // Ewald cutoff shall be 4 cells const real_t alpha = 1.0/std::sqrt(2)/eta; const real_t alpha2 = alpha*alpha; const real_t alpha3 = alpha2*alpha; @@ -164,7 +162,7 @@ private: }; constexpr ptrdiff_t lnumber = 3, knumber = 3; - const int numb = 1, numb2 = 2; + const int numb = 1; //!< search radius when shifting vectors into FBZ vectk_.assign(D_xx_.memsize(),vec3()); ico_.assign(D_xx_.memsize(),vec3()); @@ -176,9 +174,9 @@ private: mat3 matD(0.0); #pragma omp for - for( size_t i=0; i x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)}); const vec3 ar = (mat_bravais * x_ijk).wrap_abs(); @@ -236,8 +234,8 @@ private: D_yy_.FourierTransformForward(); D_yz_.FourierTransformForward(); D_zz_.FourierTransformForward(); - mu1.FourierTransformForward(false); +#ifndef PRODUCTION if (CONFIG::MPI_task_rank == 0) unlink("debug.hdf5"); D_xx_.Write_to_HDF5("debug.hdf5","Dxx"); @@ -248,169 +246,239 @@ private: D_zz_.Write_to_HDF5("debug.hdf5","Dzz"); std::ofstream ofs2("test_brillouin.txt"); - - #pragma omp parallel +#endif { - // thread private matrix representation - mat3 D; - vec3 eval, evec1, evec2, evec3; + //!=== Make temporary copies before resorting to std. Fourier grid ========!// + Grid_FFT + temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}); - #pragma omp for + temp1.FourierTransformForward(false); + temp2.FourierTransformForward(false); + temp3.FourierTransformForward(false); + + #pragma omp parallel for for( size_t i=0; i kv = D_xx_.get_k(i,j,k); - // const real_t kmod = kv.norm()/mapratio_/boxlen_; - - // put matrix elements into actual matrix - D(0,0) = std::real(D_xx_.kelem(i,j,k)) / fft_norm12; - D(0,1) = D(1,0) = std::real(D_xy_.kelem(i,j,k)) / fft_norm12; - D(0,2) = D(2,0) = std::real(D_xz_.kelem(i,j,k)) / fft_norm12; - D(1,1) = std::real(D_yy_.kelem(i,j,k)) / fft_norm12; - D(1,2) = D(2,1) = std::real(D_yz_.kelem(i,j,k)) / fft_norm12; - D(2,2) = std::real(D_zz_.kelem(i,j,k)) / fft_norm12; - - // compute eigenstructure of matrix - D.eigen(eval, evec1, evec2, evec3); - - // now determine to which modes on the regular lattice this contributes - vec3 ar1 = kv / (twopi*ngrid_); - vec3 ar2 = -kv / (twopi*ngrid_); - - vec3 a1(mat_reciprocal * ar1); - vec3 a2(mat_reciprocal * ar2); - - // translate the k-vectors into the "candidate" FBZ - for( int l1=-numb; l1<=numb; ++l1 ){ - for( int l2=-numb; l2<=numb; ++l2 ){ - for( int l3=-numb; l3<=numb; ++l3 ){ - - vectk_[idx] = a1 + mat_reciprocal * vec3({real_t(l1),real_t(l2),real_t(l3)}); - - if( check_FBZ( normals, vectk_[idx]) ){ - - int ix = std::round(vectk_[idx][0]*(ngrid_)/twopi); - int iy = std::round(vectk_[idx][1]*(ngrid_)/twopi); - int iz = std::round(vectk_[idx][2]*(ngrid_)/twopi); - - // for( int k1=-numb2; k1<=numb2; ++k1 ){ - // for( int k2=-numb2; k2<=numb2; ++k2 ){ - // for( int k3=-numb2; k3<=numb2; ++k3 ){ - {{{ int k1=0,k2=0,k3=0; - - auto d = mat_reciprocal * vec3({real_t(k1),real_t(k2),real_t(k3)}) / twopi * ngrid_; - int iix = ix;// + std::round(d.x); - int iiy = iy;// + std::round(d.y); - int iiz = iz;// + std::round(d.z); - - if( iix >= -nlattice/2 && iiy >= -nlattice/2 && iiz >= 0 && - // if( iix >= 0 && iiy >= 0 && iiz >= 0 && - iix < nlattice/2 && iiy < nlattice/2 && iiz <= nlattice/2){ - iix = (iix<0)? iix+nlattice : iix; - iiy = (iiy<0)? iiy+nlattice : iiy; - iiz = (iiz<0)? iiz+nlattice : iiz; - mu1.kelem(iix,iiy,iiz) = eval[2]; - } - } - } - } - } - - vectk_[idx] = a2 + mat_reciprocal * vec3({real_t(l1),real_t(l2),real_t(l3)}); - - if( check_FBZ( normals, vectk_[idx]) ){ - - int ix = std::round(vectk_[idx][0]*(ngrid_)/twopi); - int iy = std::round(vectk_[idx][1]*(ngrid_)/twopi); - int iz = std::round(vectk_[idx][2]*(ngrid_)/twopi); - - // for( int k1=-numb; k1<=numb2; ++k1 ){ - // for( int k2=-numb; k2<=numb2; ++k2 ){ - // for( int k3=-numb; k3<=numb2; ++k3 ){ - {{{ int k1=0,k2=0,k3=0; - - auto d = mat_reciprocal * vec3({real_t(k1),real_t(k2),real_t(k3)}) / twopi * ngrid_; - int iix = ix;// + std::round(d.x); - int iiy = iy;// + std::round(d.y); - int iiz = iz;// + std::round(d.z); - - - if( iix >= -nlattice/2 && iiy >= -nlattice/2 && iiz >= 0 && - // if( iix >= 0 && iiy >= 0 && iiz >= 0 && - iix < nlattice/2 && iiy < nlattice/2 && iiz <= nlattice/2){ - iix = (iix<0)? iix+nlattice : iix; - iiy = (iiy<0)? iiy+nlattice : iiy; - iiz = (iiz<0)? iiz+nlattice : iiz; - mu1.kelem(iix,iiy,iiz) = eval[2]; - } - } - } - } - - } - } - } - } - - - endloop: ; - + temp1.kelem(i,j,k) = ccomplex_t(std::real(D_xx_.kelem(i,j,k)),std::real(D_xy_.kelem(i,j,k))); + temp2.kelem(i,j,k) = ccomplex_t(std::real(D_xz_.kelem(i,j,k)),std::real(D_yy_.kelem(i,j,k))); + temp3.kelem(i,j,k) = ccomplex_t(std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))); } } } + D_xx_.zero(); D_xy_.zero(); D_xz_.zero(); + D_yy_.zero(); D_yz_.zero(); D_zz_.zero(); + + //!=== Diagonalise and resort to std. Fourier grid ========!// + #pragma omp parallel + { + // thread private matrix representation + mat3 D; + vec3 eval, evec1, evec2, evec3; - mu1.kelem(0,0,0) = 1.0; + #pragma omp for + for( size_t i=0; i kv = D_xx_.get_k(i,j,k); + + // put matrix elements into actual matrix + D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12; + D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12; + D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12; + D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12; + D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12; + D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12; - //... approximate infinite lattice by inerpolating to sites not convered by current resolution... - if( ilat==1 ){ - for( size_t i=0; i ar1 = kv / (twopi*ngrid_); + vec3 ar2 = -kv / (twopi*ngrid_); + + vec3 a1(mat_reciprocal * ar1); + vec3 a2(mat_reciprocal * ar2); + + // translate the k-vectors into the "candidate" FBZ + for( int l1=-numb; l1<=numb; ++l1 ){ + for( int l2=-numb; l2<=numb; ++l2 ){ + for( int l3=-numb; l3<=numb; ++l3 ){ + const vec3 vshift({real_t(l1),real_t(l2),real_t(l3)}); + + // first half of Fourier space (due to real trafo we only have half in memory) + vec3 vectk = a1 + mat_reciprocal * vshift; + + if( check_FBZ( normals, vectk ) ) + { + int ix = std::round(vectk.x*(ngrid_)/twopi); + int iy = std::round(vectk.y*(ngrid_)/twopi); + int iz = std::round(vectk.z*(ngrid_)/twopi); + + if( ix >= -nlattice/2 && iy >= -nlattice/2 && iz >= 0 && + ix < nlattice/2 && iy < nlattice/2 && iz <= nlattice/2){ + ix = (ix<0)? ix+nlattice : ix; + iy = (iy<0)? iy+nlattice : iy; + D_xx_.kelem(ix,iy,iz) = eval[2]; + D_xy_.kelem(ix,iy,iz) = eval[1]; + D_xz_.kelem(ix,iy,iz) = eval[0]; + D_yy_.kelem(ix,iy,iz) = vvv.x; + D_yz_.kelem(ix,iy,iz) = vvv.y; + D_zz_.kelem(ix,iy,iz) = vvv.z; + } + } + // second half of Fourier space (due to real trafo we only have half in memory) + vectk = a2 + mat_reciprocal * vshift; + + if( check_FBZ( normals, vectk ) ) + { + int ix = std::round(vectk.x*(ngrid_)/twopi); + int iy = std::round(vectk.y*(ngrid_)/twopi); + int iz = std::round(vectk.z*(ngrid_)/twopi); + + if( ix >= -nlattice/2 && iy >= -nlattice/2 && iz >= 0 && + ix < nlattice/2 && iy < nlattice/2 && iz <= nlattice/2){ + ix = (ix<0)? ix+nlattice : ix; + iy = (iy<0)? iy+nlattice : iy; + D_xx_.kelem(ix,iy,iz) = eval[2]; + D_xy_.kelem(ix,iy,iz) = eval[1]; + D_xz_.kelem(ix,iy,iz) = eval[0]; + D_yy_.kelem(ix,iy,iz) = vvv.x; + D_yz_.kelem(ix,iy,iz) = vvv.y; + D_zz_.kelem(ix,iy,iz) = vvv.z; + } + } + } //l3 + } //l2 + } //l1 + } //k + } //j + } //i + } + + D_xx_.kelem(0,0,0) = 1.0; + D_xy_.kelem(0,0,0) = 0.0; + D_xz_.kelem(0,0,0) = 0.0; + } + + //... approximate infinite lattice by inerpolating to sites not convered by current resolution... + if( ilat==1 ){ + #pragma omp parallel for + for( size_t i=0; i ccomplex_t { + return 0.25 * ( + D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) + + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) ); + }; + + D_xx_.kelem(i,j,k) = avg( D_xx_ ); + D_xy_.kelem(i,j,k) = avg( D_xy_ ); + D_xz_.kelem(i,j,k) = avg( D_xz_ ); + D_yy_.kelem(i,j,k) = avg( D_yy_ ); + D_yz_.kelem(i,j,k) = avg( D_yz_ ); + D_zz_.kelem(i,j,k) = avg( D_zz_ ); } } } - }else if( ilat==2 ){ - for( size_t i=0; i ccomplex_t{ + return 0.5 * ( D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) ); + }; + + D_xx_.kelem(i,j,k) = avg( D_xx_ ); + D_xy_.kelem(i,j,k) = avg( D_xy_ ); + D_xz_.kelem(i,j,k) = avg( D_xz_ ); + D_yy_.kelem(i,j,k) = avg( D_yy_ ); + D_yz_.kelem(i,j,k) = avg( D_yz_ ); + D_zz_.kelem(i,j,k) = avg( D_zz_ ); } } } - for( size_t i=0; i ccomplex_t{ + return 0.5 * ( D.kelem((nlattice+i-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) ); + }; + + D_xx_.kelem(i,j,k) = avg( D_xx_ ); + D_xy_.kelem(i,j,k) = avg( D_xy_ ); + D_xz_.kelem(i,j,k) = avg( D_xz_ ); + D_yy_.kelem(i,j,k) = avg( D_yy_ ); + D_yz_.kelem(i,j,k) = avg( D_yz_ ); + D_zz_.kelem(i,j,k) = avg( D_zz_ ); } } } } } - - mu1.Write_to_HDF5("debug.hdf5","mu1"); +#ifdef PRODUCTION + #pragma omp parallel for + for( size_t i=0; i kv = D_xx_.get_k(i,j,k); + const real_t kmod = kv.norm()/mapratio_/boxlen_; + + double mu1 = std::real(D_xx_.kelem(i,j,k)); + double mu2 = std::real(D_xy_.kelem(i,j,k)); + double mu3 = std::real(D_xz_.kelem(i,j,k)); + + vec evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))}) + + // store in diagonal components of D_ij + D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec1.x; + D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec1.y; + D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec1.z; + + auto norm = (kv.norm()/kv.dot(evec1)); + if ( std::abs(kv.dot(evec1)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; + + D_xx_.kelem(i,j,k) *= norm; + D_yy_.kelem(i,j,k) *= norm; + D_zz_.kelem(i,j,k) *= norm; + + // spatially dependent correction to vfact = \dot{D_+}/D_+ + D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); + } + } + } + D_xy_.kelem(0,0,0) = 1.0; +#else + D_xx_.Write_to_HDF5("debug.hdf5","mu1"); D_xy_.Write_to_HDF5("debug.hdf5","mu2"); D_xz_.Write_to_HDF5("debug.hdf5","mu3"); D_yy_.Write_to_HDF5("debug.hdf5","e1x"); D_yz_.Write_to_HDF5("debug.hdf5","e1y"); D_zz_.Write_to_HDF5("debug.hdf5","e1z"); +#endif + + + } void init_D__old() @@ -586,15 +654,16 @@ private: { for( size_t k=0; k kv = D_xx_.get_k(i,j,k); - const real_t kmod = kv.norm()/mapratio_/boxlen_; - // put matrix elements into actual matrix D = { std::real(D_xx_.kelem(i,j,k)), std::real(D_xy_.kelem(i,j,k)), std::real(D_xz_.kelem(i,j,k)), std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) }; // compute eigenstructure of matrix D.eigen(eval, evec1, evec2, evec3); + +#ifdef PRODUCTION + vec3 kv = D_xx_.get_k(i,j,k); + const real_t kmod = kv.norm()/mapratio_/boxlen_; // store in diagonal components of D_ij D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.x; @@ -603,7 +672,7 @@ private: auto norm = (kv.norm()/kv.dot(evec3)); if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; -#ifdef PRODUCTION + D_xx_.kelem(i,j,k) *= norm; D_yy_.kelem(i,j,k) *= norm; D_zz_.kelem(i,j,k) *= norm; @@ -655,8 +724,7 @@ public: D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), - grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), - mu1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}) + grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}) { csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; std::string lattice_str = the_config.GetValueSafe("setup","ParticleLoad","sc"); @@ -666,12 +734,7 @@ public: : ((lattice_str=="rsc")? lattice_rsc : lattice_sc))); - if( lattice_type != lattice_sc){ - csoca::elog << "PLT not implemented for chosen lattice type! Currently only SC." << std::endl; - abort(); - } - - csoca::ilog << "PLT corrections for SC lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl; + csoca::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl; // #if defined(USE_MPI) // if( CONFIG::MPI_task_size>1 ) @@ -684,7 +747,7 @@ public: double wtime = get_wtime(); csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; - init_D(); + init_D( lattice_type ); // init_D__old(); csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; From beb40bfc352ad848430bc897d52af8ad9577ecb4 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 2 Dec 2019 18:47:34 +0100 Subject: [PATCH 043/130] added non-PLT back into the game. has to be switched by hand in ic_generator.cc right now... --- include/general.hh | 2 ++ include/operators.hh | 29 +++++++++++++++++++++++++++++ include/particle_plt.hh | 4 ++-- src/ic_generator.cc | 3 ++- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/include/general.hh b/include/general.hh index 71e521a..c77be01 100644 --- a/include/general.hh +++ b/include/general.hh @@ -12,6 +12,8 @@ #include #endif +#include + #ifdef USE_SINGLEPRECISION using real_t = float; using complex_t = fftwf_complex; diff --git a/include/operators.hh b/include/operators.hh index 63d94f4..83e17dc 100644 --- a/include/operators.hh +++ b/include/operators.hh @@ -1,7 +1,11 @@ #pragma once +#include + namespace op{ +//!== long list of primitive operators to work on fields ==!// + template< typename field> inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };} @@ -20,4 +24,29 @@ inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; }; template< typename field> inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -= 2*v; };} +//! vanilla standard gradient +class fourier_gradient{ +private: + real_t boxlen_, k0_; + ptrdiff_t n_, nhalf_; +public: + explicit fourier_gradient( const ConfigFile& the_config ) + : boxlen_( the_config.GetValue("setup", "BoxLength") ), + n_( the_config.GetValue("setup","GridRes") ), + nhalf_( n_/2 ), + k0_(2.0*M_PI/boxlen_) + {} + + inline ccomplex_t gradient( const int idim, std::array ijk ) const + { + real_t rgrad = + (ijk[idim]!=nhalf_)? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_) * n_) : 0.0; + return ccomplex_t(0.0,rgrad * k0_); + } + + inline real_t vfac_corr( std::array ijk ) const + { + return 1.0; + } +}; } diff --git a/include/particle_plt.hh b/include/particle_plt.hh index e48e356..9bed249 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -761,10 +761,10 @@ public: return D_zz_.get_cic_kspace({ix,iy,iz}); } - inline ccomplex_t vfac_corr( std::array ijk ) const + inline real_t vfac_corr( std::array ijk ) const { real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; - return D_xy_.get_cic_kspace({ix,iy,iz}); + return std::real(D_xy_.get_cic_kspace({ix,iy,iz})); } }; diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 4394947..56566ff 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -168,7 +168,8 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- // Create PLT gradient operator //-------------------------------------------------------------------- - particle::lattice_gradient lg( the_config ); + // particle::lattice_gradient lg( the_config ); + op::fourier_gradient lg( the_config ); //-------------------------------------------------------------------- std::vector species_list; From d8cbc4fca681395cc50dee85838e9f97e7a48f00 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Wed, 4 Dec 2019 14:26:42 +0100 Subject: [PATCH 044/130] improved PLT field interpolation, still has problems for FCC though --- include/operators.hh | 6 +- include/particle_plt.hh | 171 ++++++++++++++++++++++++++-------------- include/vec3.hh | 12 +-- 3 files changed, 119 insertions(+), 70 deletions(-) diff --git a/include/operators.hh b/include/operators.hh index 83e17dc..be6d1f7 100644 --- a/include/operators.hh +++ b/include/operators.hh @@ -28,13 +28,13 @@ inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -= class fourier_gradient{ private: real_t boxlen_, k0_; - ptrdiff_t n_, nhalf_; + size_t n_, nhalf_; public: explicit fourier_gradient( const ConfigFile& the_config ) : boxlen_( the_config.GetValue("setup", "BoxLength") ), + k0_(2.0*M_PI/boxlen_), n_( the_config.GetValue("setup","GridRes") ), - nhalf_( n_/2 ), - k0_(2.0*M_PI/boxlen_) + nhalf_( n_/2 ) {} inline ccomplex_t gradient( const int idim, std::array ijk ) const diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 9bed249..6e8d280 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -12,7 +12,7 @@ #include #include -// #define PRODUCTION +#define PRODUCTION namespace particle{ //! implement Marcos et al. PLT calculation @@ -27,6 +27,14 @@ private: std::vector> vectk_; std::vector> ico_, vecitk_; + bool is_even( int i ){ return (i%2)==0; } + + bool is_in( int i, int j, int k, const mat3& M ){ + vec3 v({i,j,k}); + auto vv = M * v; + return is_even(vv.x)&&is_even(vv.y)&&is_even(vv.z); + } + void init_D( lattice lattice_type ) { constexpr real_t pi = M_PI; @@ -47,6 +55,11 @@ private: 0.0, twopi, 0.0, 0.0, 0.0, twopi, }; + const mat3 mat_invrecip_sc{ + 2, 0, 0, + 0, 2, 0, + 0, 0, 2, + }; const std::vector> normals_sc{ {pi,0.,0.},{-pi,0.,0.}, {0.,pi,0.},{0.,-pi,0.}, @@ -66,6 +79,11 @@ private: 0.0, twopi, 0.0, -twopi, -twopi, fourpi, }; + const mat3 mat_invrecip_bcc{ + 2, 0, 0, + 0, 2, 0, + 1, 1, 1, + }; const std::vector> normals_bcc{ {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, @@ -85,6 +103,11 @@ private: 0.0, 0.0, twopi, fourpi, 0.0, -twopi, }; + const mat3 mat_invrecip_fcc{ + 0, 1, 1, + 1, 0, 1, + 0, 2, 0, + }; const std::vector> normals_fcc{ {twopi,0.,0.},{-twopi,0.,0.}, {0.,twopi,0.},{0.,-twopi,0.}, @@ -100,6 +123,7 @@ private: const auto mat_bravais = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc; const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc; + const auto mat_invrecip = (ilat==2)? mat_invrecip_fcc : (ilat==1)? mat_invrecip_bcc : mat_invrecip_sc; const auto normals = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc; const auto charge_fac = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc; @@ -300,9 +324,8 @@ private: // compute eigenstructure of matrix D.eigen(eval, evec1, evec2, evec3); + evec3 /= (twopi*ngrid_); - auto vvv = evec3 / (twopi*ngrid_); - // now determine to which modes on the regular lattice this contributes vec3 ar1 = kv / (twopi*ngrid_); vec3 ar2 = -kv / (twopi*ngrid_); @@ -324,17 +347,17 @@ private: int ix = std::round(vectk.x*(ngrid_)/twopi); int iy = std::round(vectk.y*(ngrid_)/twopi); int iz = std::round(vectk.z*(ngrid_)/twopi); - - if( ix >= -nlattice/2 && iy >= -nlattice/2 && iz >= 0 && - ix < nlattice/2 && iy < nlattice/2 && iz <= nlattice/2){ + if( ix > -nlattice/2 && iy > -nlattice/2 && iz >= 0 && + ix <= nlattice/2 && iy <= nlattice/2 && iz <= nlattice/2){ ix = (ix<0)? ix+nlattice : ix; iy = (iy<0)? iy+nlattice : iy; + real_t sign = (evec3.dot(vectk) >= 0.0)?1.0:-1.0; D_xx_.kelem(ix,iy,iz) = eval[2]; D_xy_.kelem(ix,iy,iz) = eval[1]; D_xz_.kelem(ix,iy,iz) = eval[0]; - D_yy_.kelem(ix,iy,iz) = vvv.x; - D_yz_.kelem(ix,iy,iz) = vvv.y; - D_zz_.kelem(ix,iy,iz) = vvv.z; + D_yy_.kelem(ix,iy,iz) = evec3.x*sign; + D_yz_.kelem(ix,iy,iz) = evec3.y*sign; + D_zz_.kelem(ix,iy,iz) = evec3.z*sign; } } // second half of Fourier space (due to real trafo we only have half in memory) @@ -345,17 +368,17 @@ private: int ix = std::round(vectk.x*(ngrid_)/twopi); int iy = std::round(vectk.y*(ngrid_)/twopi); int iz = std::round(vectk.z*(ngrid_)/twopi); - - if( ix >= -nlattice/2 && iy >= -nlattice/2 && iz >= 0 && - ix < nlattice/2 && iy < nlattice/2 && iz <= nlattice/2){ + if( ix > -nlattice/2 && iy > -nlattice/2 && iz >= 0 && + ix <= nlattice/2 && iy <= nlattice/2 && iz <= nlattice/2){ ix = (ix<0)? ix+nlattice : ix; iy = (iy<0)? iy+nlattice : iy; + real_t sign = (evec3.dot(vectk) >= 0.0)?1.0:-1.0; D_xx_.kelem(ix,iy,iz) = eval[2]; D_xy_.kelem(ix,iy,iz) = eval[1]; D_xz_.kelem(ix,iy,iz) = eval[0]; - D_yy_.kelem(ix,iy,iz) = vvv.x; - D_yz_.kelem(ix,iy,iz) = vvv.y; - D_zz_.kelem(ix,iy,iz) = vvv.z; + D_yy_.kelem(ix,iy,iz) = evec3.x*sign; + D_yz_.kelem(ix,iy,iz) = evec3.y*sign; + D_zz_.kelem(ix,iy,iz) = evec3.z*sign; } } } //l3 @@ -369,6 +392,10 @@ private: D_xx_.kelem(0,0,0) = 1.0; D_xy_.kelem(0,0,0) = 0.0; D_xz_.kelem(0,0,0) = 0.0; + + D_yy_.kelem(0,0,0) = 1.0; + D_yz_.kelem(0,0,0) = 0.0; + D_zz_.kelem(0,0,0) = 0.0; } //... approximate infinite lattice by inerpolating to sites not convered by current resolution... @@ -377,11 +404,20 @@ private: for( size_t i=0; i ccomplex_t { - return 0.25 * ( - D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) - + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) ); + if( k>0 && k< size_t(nlattice/2) ) return 1.0/6.0 * ( + D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) + + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) + + D.kelem(i,j,k-1) + D.kelem(i,j,k+1) ); + if( k==0 ) return 1.0/6.0 * ( + D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) + + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) + + D.kelem(i,j,k+1) + D.kelem(i,j,k+1) ); + return 1.0/6.0 * ( + D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) + + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) + + D.kelem(i,j,k-1) + D.kelem(i,j,k-1) ); }; D_xx_.kelem(i,j,k) = avg( D_xx_ ); @@ -399,30 +435,17 @@ private: for( size_t i=0; i ccomplex_t{ - return 0.5 * ( D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) ); - }; - - D_xx_.kelem(i,j,k) = avg( D_xx_ ); - D_xy_.kelem(i,j,k) = avg( D_xy_ ); - D_xz_.kelem(i,j,k) = avg( D_xz_ ); - D_yy_.kelem(i,j,k) = avg( D_yy_ ); - D_yz_.kelem(i,j,k) = avg( D_yz_ ); - D_zz_.kelem(i,j,k) = avg( D_zz_ ); - } - } - } - } - #pragma omp parallel for - for( size_t i=0; i ccomplex_t{ - return 0.5 * ( D.kelem((nlattice+i-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) ); - }; - + if( is_in( (i+1)%nlattice, j, k, mat_invrecip_fcc ) ){ + return 0.5 * ( D.kelem((i+nlattice-1)%nlattice,j,k) + D.kelem((i+1)%nlattice,j,k) ); + }else if( is_in( i, (j+1)%nlattice, k, mat_invrecip_fcc ) ){ + return 0.5 * ( D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) ); + }//else// + if( k>0 && k< size_t(nlattice/2) ) return 0.5 * ( D.kelem(i,j,k-1) + D.kelem(i,j,k+1) ); + if( k==0 ) return D.kelem(i,j,k+1); + return D.kelem(i,j,k-1); + }; D_xx_.kelem(i,j,k) = avg( D_xx_ ); D_xy_.kelem(i,j,k) = avg( D_xy_ ); D_xz_.kelem(i,j,k) = avg( D_xz_ ); @@ -441,33 +464,62 @@ private: for( size_t j=0; jsize_t(nlattice/2))? int(i)-nlattice : i; + int jj = (j>size_t(nlattice/2))? int(j)-nlattice : j; + vec3 kv = D_xx_.get_k(i,j,k); const real_t kmod = kv.norm()/mapratio_/boxlen_; double mu1 = std::real(D_xx_.kelem(i,j,k)); - double mu2 = std::real(D_xy_.kelem(i,j,k)); - double mu3 = std::real(D_xz_.kelem(i,j,k)); + // double mu2 = std::real(D_xy_.kelem(i,j,k)); + // double mu3 = std::real(D_xz_.kelem(i,j,k)); - vec evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))}) - - // store in diagonal components of D_ij - D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec1.x; - D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec1.y; - D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec1.z; + vec3 evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))}); + evec1 /= evec1.norm(); - auto norm = (kv.norm()/kv.dot(evec1)); - if ( std::abs(kv.dot(evec1)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; + if(std::abs(ii)+std::abs(jj)+k<8){ + // small k modes, use usual pseudospectral derivative + // -- store in diagonal components of D_ij + D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_); + D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_); + D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_); - D_xx_.kelem(i,j,k) *= norm; - D_yy_.kelem(i,j,k) *= norm; - D_zz_.kelem(i,j,k) *= norm; + // spatially dependent correction to vfact = \dot{D_+}/D_+ + D_xy_.kelem(i,j,k) = 1.0; + }else{ + // large k modes, use interpolated PLT results + // -- store in diagonal components of D_ij + D_xx_.kelem(i,j,k) = ccomplex_t(0.0,evec1.x * kmod); + D_yy_.kelem(i,j,k) = ccomplex_t(0.0,evec1.y * kmod); + D_zz_.kelem(i,j,k) = ccomplex_t(0.0,evec1.z * kmod); - // spatially dependent correction to vfact = \dot{D_+}/D_+ - D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); + // re-normalise to that longitudinal amplitude is exact + auto kv_dot_e1 = (kv.norm()>1e-8)?kv.dot(evec1):kv.norm(); + auto norm = (kv.norm()/kv_dot_e1); + D_xx_.kelem(i,j,k) *= norm; + D_yy_.kelem(i,j,k) *= norm; + D_zz_.kelem(i,j,k) *= norm; + + // spatially dependent correction to vfact = \dot{D_+}/D_+ + D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); + } + if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0; + if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0; + if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0; } } } D_xy_.kelem(0,0,0) = 1.0; + D_xx_.kelem(0,0,0) = 0.0; + D_yy_.kelem(0,0,0) = 0.0; + D_zz_.kelem(0,0,0) = 0.0; + + // unlink("debug.hdf5"); + // D_xy_.Write_to_HDF5("debug.hdf5","mu1"); + // D_xx_.Write_to_HDF5("debug.hdf5","e1x"); + // D_yy_.Write_to_HDF5("debug.hdf5","e1y"); + // D_zz_.Write_to_HDF5("debug.hdf5","e1z"); + #else D_xx_.Write_to_HDF5("debug.hdf5","mu1"); D_xy_.Write_to_HDF5("debug.hdf5","mu2"); @@ -475,10 +527,7 @@ private: D_yy_.Write_to_HDF5("debug.hdf5","e1x"); D_yz_.Write_to_HDF5("debug.hdf5","e1y"); D_zz_.Write_to_HDF5("debug.hdf5","e1z"); -#endif - - - +#endif } void init_D__old() diff --git a/include/vec3.hh b/include/vec3.hh index af2bb9a..3d45c10 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -51,9 +51,6 @@ public: // assignment operator vec3& operator=( const vec3& v ) noexcept { data_=v.data_; return *this; } - // assignment operator - const vec3& operator=( const vec3& v ) const noexcept{ data_=v.data_; return *this; } - //! implementation of summation of vec3 vec3 operator+( const vec3& v ) const noexcept{ return vec3({x+v.x,y+v.y,z+v.z}); } @@ -70,14 +67,17 @@ public: vec3 operator/( T s ) const noexcept{ return vec3({x/s,y/s,z/s}); } //! implementation of += operator - vec3& operator+=( const vec3& v ) const noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; } + vec3& operator+=( const vec3& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; } //! implementation of -= operator - vec3& operator-=( const vec3& v ) const noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; } + vec3& operator-=( const vec3& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; } //! multiply with scalar - vec3& operator*=( T s ) const noexcept{ x*=s; y*=s; z*=s; return *this; } + vec3& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; } + //! divide by scalar + vec3& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; } + //! compute dot product with another vector T dot(const vec3 &a) const noexcept { From 6a998123c7cc179d3261dbe28155e1ad44ce3887 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 5 Dec 2019 05:43:49 +0100 Subject: [PATCH 045/130] improved implementation of plt field interpolation --- include/particle_plt.hh | 300 ++++++++++++++++++---------------------- include/vec3.hh | 8 ++ 2 files changed, 144 insertions(+), 164 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 6e8d280..6671028 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -15,7 +16,7 @@ #define PRODUCTION namespace particle{ -//! implement Marcos et al. PLT calculation +//! implement Joyce, Marcos et al. PLT calculation class lattice_gradient{ private: @@ -271,189 +272,161 @@ private: std::ofstream ofs2("test_brillouin.txt"); #endif - { - //!=== Make temporary copies before resorting to std. Fourier grid ========!// - Grid_FFT - temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), - temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), - temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}); - - temp1.FourierTransformForward(false); - temp2.FourierTransformForward(false); - temp3.FourierTransformForward(false); + using map_t = std::map,size_t>; + map_t iimap; - #pragma omp parallel for + //!=== Make temporary copies before resorting to std. Fourier grid ========!// + Grid_FFT + temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}); + + temp1.FourierTransformForward(false); + temp2.FourierTransformForward(false); + temp3.FourierTransformForward(false); + + #pragma omp parallel for + for( size_t i=0; i D; + vec3 eval, evec1, evec2, evec3; + + #pragma omp for for( size_t i=0; i D; - vec3 eval, evec1, evec2, evec3; + vec3 kv = D_xx_.get_k(i,j,k); + + // put matrix elements into actual matrix + D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12; + D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12; + D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12; + D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12; + D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12; + D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12; - #pragma omp for - for( size_t i=0; i kv = D_xx_.get_k(i,j,k); - - // put matrix elements into actual matrix - D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12; - D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12; - D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12; - D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12; - D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12; - D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12; + // compute eigenstructure of matrix + D.eigen(eval, evec1, evec2, evec3); + evec3 /= (twopi*ngrid_); - // compute eigenstructure of matrix - D.eigen(eval, evec1, evec2, evec3); - evec3 /= (twopi*ngrid_); - - // now determine to which modes on the regular lattice this contributes - vec3 ar1 = kv / (twopi*ngrid_); - vec3 ar2 = -kv / (twopi*ngrid_); - - vec3 a1(mat_reciprocal * ar1); - vec3 a2(mat_reciprocal * ar2); - - // translate the k-vectors into the "candidate" FBZ - for( int l1=-numb; l1<=numb; ++l1 ){ - for( int l2=-numb; l2<=numb; ++l2 ){ - for( int l3=-numb; l3<=numb; ++l3 ){ + // now determine to which modes on the regular lattice this contributes + vec3 ar = kv / (twopi*ngrid_); + vec3 a(mat_reciprocal * ar); + + // translate the k-vectors into the "candidate" FBZ + for( int l1=-numb; l1<=numb; ++l1 ){ + for( int l2=-numb; l2<=numb; ++l2 ){ + for( int l3=-numb; l3<=numb; ++l3 ){ + // need both halfs of Fourier space since we use real transforms + for( int isign=0; isign<=1; ++isign ){ + real_t sign = (isign==0)? +1.0 : -1.0; const vec3 vshift({real_t(l1),real_t(l2),real_t(l3)}); - // first half of Fourier space (due to real trafo we only have half in memory) - vec3 vectk = a1 + mat_reciprocal * vshift; + vec3 vectk = sign * a + mat_reciprocal * vshift; if( check_FBZ( normals, vectk ) ) { int ix = std::round(vectk.x*(ngrid_)/twopi); int iy = std::round(vectk.y*(ngrid_)/twopi); int iz = std::round(vectk.z*(ngrid_)/twopi); - if( ix > -nlattice/2 && iy > -nlattice/2 && iz >= 0 && - ix <= nlattice/2 && iy <= nlattice/2 && iz <= nlattice/2){ - ix = (ix<0)? ix+nlattice : ix; - iy = (iy<0)? iy+nlattice : iy; - real_t sign = (evec3.dot(vectk) >= 0.0)?1.0:-1.0; - D_xx_.kelem(ix,iy,iz) = eval[2]; - D_xy_.kelem(ix,iy,iz) = eval[1]; - D_xz_.kelem(ix,iy,iz) = eval[0]; - D_yy_.kelem(ix,iy,iz) = evec3.x*sign; - D_yz_.kelem(ix,iy,iz) = evec3.y*sign; - D_zz_.kelem(ix,iy,iz) = evec3.z*sign; - } + + #pragma omp critical + {iimap.insert( std::pair,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );} + + temp1.kelem(i,j,k) = ccomplex_t(eval[2],eval[1]); + temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3.x); + temp3.kelem(i,j,k) = ccomplex_t(evec3.y,evec3.z); } - // second half of Fourier space (due to real trafo we only have half in memory) - vectk = a2 + mat_reciprocal * vshift; - - if( check_FBZ( normals, vectk ) ) - { - int ix = std::round(vectk.x*(ngrid_)/twopi); - int iy = std::round(vectk.y*(ngrid_)/twopi); - int iz = std::round(vectk.z*(ngrid_)/twopi); - if( ix > -nlattice/2 && iy > -nlattice/2 && iz >= 0 && - ix <= nlattice/2 && iy <= nlattice/2 && iz <= nlattice/2){ - ix = (ix<0)? ix+nlattice : ix; - iy = (iy<0)? iy+nlattice : iy; - real_t sign = (evec3.dot(vectk) >= 0.0)?1.0:-1.0; - D_xx_.kelem(ix,iy,iz) = eval[2]; - D_xy_.kelem(ix,iy,iz) = eval[1]; - D_xz_.kelem(ix,iy,iz) = eval[0]; - D_yy_.kelem(ix,iy,iz) = evec3.x*sign; - D_yz_.kelem(ix,iy,iz) = evec3.y*sign; - D_zz_.kelem(ix,iy,iz) = evec3.z*sign; - } - } - } //l3 - } //l2 - } //l1 - } //k - } //j - } //i - } - - D_xx_.kelem(0,0,0) = 1.0; - D_xy_.kelem(0,0,0) = 0.0; - D_xz_.kelem(0,0,0) = 0.0; - - D_yy_.kelem(0,0,0) = 1.0; - D_yz_.kelem(0,0,0) = 0.0; - D_zz_.kelem(0,0,0) = 0.0; + }//sign + } //l3 + } //l2 + } //l1 + } //k + } //j + } //i } + D_xx_.kelem(0,0,0) = 1.0; + D_xy_.kelem(0,0,0) = 0.0; + D_xz_.kelem(0,0,0) = 0.0; + + D_yy_.kelem(0,0,0) = 1.0; + D_yz_.kelem(0,0,0) = 0.0; + D_zz_.kelem(0,0,0) = 0.0; + //... approximate infinite lattice by inerpolating to sites not convered by current resolution... - if( ilat==1 ){ - #pragma omp parallel for - for( size_t i=0; i ccomplex_t { - if( k>0 && k< size_t(nlattice/2) ) return 1.0/6.0 * ( - D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) - + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) - + D.kelem(i,j,k-1) + D.kelem(i,j,k+1) ); - if( k==0 ) return 1.0/6.0 * ( - D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) - + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) - + D.kelem(i,j,k+1) + D.kelem(i,j,k+1) ); - return 1.0/6.0 * ( - D.kelem((i+nlattice-1)%nlattice,j,k)+ D.kelem((i+1)%nlattice,j,k) - + D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) - + D.kelem(i,j,k-1) + D.kelem(i,j,k-1) ); + #pragma omp parallel for + for( size_t i=0; inlattice/2)? int(i)-nlattice : int(i); + int jj = (int(j)>nlattice/2)? int(j)-nlattice : int(j); + int kk = (int(k)>nlattice/2)? int(k)-nlattice : int(k); + vec3 kv({real_t(ii),real_t(jj),real_t(kk)}); + + auto align_with_k = [&]( const vec3& v ) -> vec3{ + return v*((v.dot(kv)<0.0)?-1.0:1.0); + }; + + vec3 v, l; + map_t::iterator it; + + if( !is_in(i,j,k,mat_invrecip) ){ + auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3& v, vec3& l ) { + v = 0.0; l = 0.0; + int count(0); + + auto add_lv = [&]( auto it ) -> void { + auto q = it->second;++count; + l += vec3({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))}); + v += align_with_k(vec3({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))})); }; + map_t::iterator it; + if( (it = iimap.find({ii-1,jj,kk}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii+1,jj,kk}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii,jj-1,kk}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii,jj+1,kk}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii,jj,kk-1}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii,jj,kk+1}))!=iimap.end() ){ add_lv(it); } + l/=real_t(count); v/=real_t(count); + }; - D_xx_.kelem(i,j,k) = avg( D_xx_ ); - D_xy_.kelem(i,j,k) = avg( D_xy_ ); - D_xz_.kelem(i,j,k) = avg( D_xz_ ); - D_yy_.kelem(i,j,k) = avg( D_yy_ ); - D_yz_.kelem(i,j,k) = avg( D_yz_ ); - D_zz_.kelem(i,j,k) = avg( D_zz_ ); - } - } - } - } - }else if( ilat==2 ){ - #pragma omp parallel for - for( size_t i=0; i ccomplex_t{ - if( is_in( (i+1)%nlattice, j, k, mat_invrecip_fcc ) ){ - return 0.5 * ( D.kelem((i+nlattice-1)%nlattice,j,k) + D.kelem((i+1)%nlattice,j,k) ); - }else if( is_in( i, (j+1)%nlattice, k, mat_invrecip_fcc ) ){ - return 0.5 * ( D.kelem(i,(j+nlattice-1)%nlattice,k) + D.kelem(i,(j+1)%nlattice,k) ); - }//else// - if( k>0 && k< size_t(nlattice/2) ) return 0.5 * ( D.kelem(i,j,k-1) + D.kelem(i,j,k+1) ); - if( k==0 ) return D.kelem(i,j,k+1); - return D.kelem(i,j,k-1); - }; - D_xx_.kelem(i,j,k) = avg( D_xx_ ); - D_xy_.kelem(i,j,k) = avg( D_xy_ ); - D_xz_.kelem(i,j,k) = avg( D_xz_ ); - D_yy_.kelem(i,j,k) = avg( D_yy_ ); - D_yz_.kelem(i,j,k) = avg( D_yz_ ); - D_zz_.kelem(i,j,k) = avg( D_zz_ ); + average_lv(temp1,temp2,temp3,v,l); + + }else{ + if( (it = iimap.find({ii,jj,kk}))!=iimap.end() ){ + auto q = it->second; + l = vec3({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))}); + v = align_with_k(vec3({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))})); } } + D_xx_.kelem(i,j,k) = l[0]; + D_xy_.kelem(i,j,k) = l[1]; + D_xz_.kelem(i,j,k) = l[2]; + D_yy_.kelem(i,j,k) = v[0]; + D_yz_.kelem(i,j,k) = v[1]; + D_zz_.kelem(i,j,k) = v[2]; } } } @@ -494,8 +467,7 @@ private: D_zz_.kelem(i,j,k) = ccomplex_t(0.0,evec1.z * kmod); // re-normalise to that longitudinal amplitude is exact - auto kv_dot_e1 = (kv.norm()>1e-8)?kv.dot(evec1):kv.norm(); - auto norm = (kv.norm()/kv_dot_e1); + auto norm = (kv.norm()/kv.dot(evec1)); D_xx_.kelem(i,j,k) *= norm; D_yy_.kelem(i,j,k) *= norm; D_zz_.kelem(i,j,k) *= norm; @@ -503,9 +475,9 @@ private: // spatially dependent correction to vfact = \dot{D_+}/D_+ D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); } - if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0; - if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0; - if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0; + // if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0; + // if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0; + // if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0; } } } diff --git a/include/vec3.hh b/include/vec3.hh index 3d45c10..4e72d81 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -101,6 +101,14 @@ public: for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x; return *this; } + + //! ordering, allows 3d sorting of vec3s + bool operator<( const vec3& o ) const noexcept{ + if( x!=o.x ) return x Date: Thu, 5 Dec 2019 22:26:16 +0100 Subject: [PATCH 046/130] zeroed nyquist modes in plt --- include/particle_plt.hh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 6671028..6948d19 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -475,9 +475,9 @@ private: // spatially dependent correction to vfact = \dot{D_+}/D_+ D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); } - // if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0; - // if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0; - // if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0; + if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0; + if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0; + if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0; } } } From 120cf21577253a1fa5e52c211a72468c42329fae Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 19 Dec 2019 11:52:08 +0000 Subject: [PATCH 047/130] README.md edited online with Bitbucket --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index e34dce2..495a2c6 100644 --- a/README.md +++ b/README.md @@ -18,3 +18,9 @@ Create build directory, configure, and build: this should create an executable in the build directory. There is an example parameter file 'example.conf' in the main directory + +If you run into problems with CMake not being able to find your local FFTW3 or HDF5 installation, it is best to give the path directly as + + FFTW3_ROOT= HDF5_ROOT= ccmake .. + +make sure to delete previous files generated by CMake before reconfiguring like this. \ No newline at end of file From e3017dea955f981aabe187f13a3d0ac5c36f1ddd Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 19 Dec 2019 11:55:56 +0000 Subject: [PATCH 048/130] README.md edited online with Bitbucket --- README.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 495a2c6..aac4f55 100644 --- a/README.md +++ b/README.md @@ -17,10 +17,23 @@ Create build directory, configure, and build: make this should create an executable in the build directory. -There is an example parameter file 'example.conf' in the main directory If you run into problems with CMake not being able to find your local FFTW3 or HDF5 installation, it is best to give the path directly as FFTW3_ROOT= HDF5_ROOT= ccmake .. -make sure to delete previous files generated by CMake before reconfiguring like this. \ No newline at end of file +make sure to delete previous files generated by CMake before reconfiguring like this. + +## Running + +There is an example parameter file 'example.conf' in the main directory. Possible options are explained in it, it can be run +as a simple argument, e.g. from within the build directory: + + ./monofonic ../example.conf + +If you want to run with MPI, you need to enable MPI support via ccmake. Then you can launch in hybrid MPI+threads mode by +specifying the desired number of threads per task in the config file, and the number of tasks to be launched via + + mpirun -np 16 ./monofonic + +It will then run with 16 tasks times the number of threads per task specified in the config file. \ No newline at end of file From cffea05dcd275e7911496d7d061fafd447083094 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 19 Dec 2019 11:58:35 +0000 Subject: [PATCH 049/130] README.md edited online with Bitbucket --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index aac4f55..c7cc745 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,13 @@ If you run into problems with CMake not being able to find your local FFTW3 or H make sure to delete previous files generated by CMake before reconfiguring like this. +If you want to build on macOS, then it is strongly recommended to use GNU (or Intel) compilers instead of Apple's Clang. Install them e.g. +via homebrew and then configure cmake to use them instead of the macOS default compiler via + + CC=gcc-9 CXX=g++-9 ccmake .. + +This is necessary since Apple's compilers haven't supported OpenMP for years. + ## Running There is an example parameter file 'example.conf' in the main directory. Possible options are explained in it, it can be run From 2ef654f22ad271c73ec6bcd428ab471d74c4844a Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 19 Dec 2019 13:54:24 +0100 Subject: [PATCH 050/130] small optimizations --- CMakeLists.txt | 2 +- example.conf | 9 +- example_testing.conf | 33 + external/class | 2 +- external/fftwpp | 1 + ics.conf | 62 ++ include/particle_plt.hh | 8 +- src/old/output_gadget2___original.cc | 1408 ++++++++++++++++++++++++++ 8 files changed, 1514 insertions(+), 11 deletions(-) create mode 100644 example_testing.conf create mode 160000 external/fftwpp create mode 100644 ics.conf create mode 100644 src/old/output_gadget2___original.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index a3bffae..5df55a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ project(monofonIC) # include class submodule include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake) -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") +#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic") find_package(PkgConfig REQUIRED) diff --git a/example.conf b/example.conf index 58fc969..a537d40 100644 --- a/example.conf +++ b/example.conf @@ -4,13 +4,13 @@ GridRes = 128 # length of the box in Mpc/h BoxLength = 250 # starting redshift -zstart = 49.0 +zstart = 24.0 # order of the LPT to be used (1,2 or 3) -LPTorder = 3 +LPTorder = 1 # also do baryon ICs? DoBaryons = no # do mode fixing à la Angulo&Pontzen -DoFixing = no +DoFixing = yes # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!) ParticleLoad = sc @@ -36,7 +36,8 @@ seed = 9001 [testing] # enables diagnostic output # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence' -test = convergence +#test = convergence +test = none [execution] NumThreads = 4 diff --git a/example_testing.conf b/example_testing.conf new file mode 100644 index 0000000..2890286 --- /dev/null +++ b/example_testing.conf @@ -0,0 +1,33 @@ +[setup] +GridRes = 256 +BoxLength = 6.28318530718 +zstart = 0.0 +LPTorder = 1 +SymplecticPT = no +DoFixing = no + +[execution] +NumThreads = 4 + +[output] +fname_hdf5 = output.hdf5 +fbase_analysis = output +#format = gadget2 +#filename = ics_gadget.dat +format = generic +filename = debug.hdf5 +generic_out_eulerian = yes + +[random] +generator = NGENIC +seed = 9001 + +[cosmology] +#transfer = CLASS +transfer = eisenstein +Omega_m = 1.0 +Omega_b = 0.045 +Omega_L = 0.0 +H0 = 70.3 +sigma_8 = 0.811 +nspec = 0.961 diff --git a/external/class b/external/class index b34d7f6..6f3abba 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b +Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507 diff --git a/external/fftwpp b/external/fftwpp new file mode 160000 index 0000000..ec6b82c --- /dev/null +++ b/external/fftwpp @@ -0,0 +1 @@ +Subproject commit ec6b82cc1122ba029a7a7142cf836014e992e68c diff --git a/ics.conf b/ics.conf new file mode 100644 index 0000000..d867cb9 --- /dev/null +++ b/ics.conf @@ -0,0 +1,62 @@ +[setup] +# number of grid cells per linear dimension for calculations = particles for sc initial load +GridRes = 128 +# length of the box in Mpc/h +BoxLength = 200 +# starting redshift +zstart = 24.0 +# order of the LPT to be used (1,2 or 3) +LPTorder = 1 +# also do baryon ICs? +DoBaryons = no +# do mode fixing à la Angulo&Pontzen +DoFixing = yes +# particle load, can be 'sc' (1x), 'bcc' (2x), 'fcc' (4x), or 'rsc' (8x) +ParticleLoad = sc + +[testing] +# enables diagnostic output +# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence' +#test = potentials_and_densities +#test = convergence +test = none + +[execution] +NumThreads = 1 + +[output] +fname_hdf5 = output.hdf5 +fbase_analysis = output + +#format = gadget2 +#filename = ics_gadget.dat + +format = generic +filename = debug.hdf5 +#generic_out_eulerian = yes + +#format = grafic2 +#filename = ics_ramses +#grafic_use_SPT = yes + +[random] +generator = NGENIC +seed = 9001 + +[cosmology] +transfer = eisenstein +#transfer = CLASS +#transfer = eisenstein_wdm +#WDMmass = 0.1 +Omega_m = 0.302 +Omega_b = 0.045 +Omega_L = 0.698 +H0 = 70.3 +sigma_8 = 0.811 +nspec = 0.961 + +# anisotropic large scale tidal field +#LSS_aniso_lx = 0.1 +#LSS_aniso_ly = 0.1 +#LSS_aniso_lz = -0.2 + diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 6948d19..e636dcc 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -176,14 +176,12 @@ private: //! checks if 'vec' is in the FBZ with FBZ normal vectors given in 'normals' auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool { - bool btest = true; for( const auto& n : normals ){ if( n.dot( vec ) > 1.0001 * n.dot(n) ){ - btest = false; - break; + return false; } } - return btest; + return true; }; constexpr ptrdiff_t lnumber = 3, knumber = 3; @@ -340,7 +338,7 @@ private: for( int l3=-numb; l3<=numb; ++l3 ){ // need both halfs of Fourier space since we use real transforms for( int isign=0; isign<=1; ++isign ){ - real_t sign = (isign==0)? +1.0 : -1.0; + const real_t sign = 2.0*real_t(isign)-1.0; const vec3 vshift({real_t(l1),real_t(l2),real_t(l3)}); vec3 vectk = sign * a + mat_reciprocal * vshift; diff --git a/src/old/output_gadget2___original.cc b/src/old/output_gadget2___original.cc new file mode 100644 index 0000000..b5cbf41 --- /dev/null +++ b/src/old/output_gadget2___original.cc @@ -0,0 +1,1408 @@ +/* + + output_gadget2.cc - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + */ + +#include +#include +#include "logger.hh" +// #include "region_generator.hh" +#include "output_plugin.hh" +// #include "mg_interp.hh" +// #include "mesh.hh" + + +template +class gadget2_output_plugin : public output_plugin +{ + // const int empty_fill_bytes = 56; + +public: + bool do_baryons_; + double omegab_; + double gamma_; + bool shift_halfcell_; + +protected: + std::ofstream ofs_; + bool blongids_; + bool bhave_particlenumbers_; + + std::map units_length_; + std::map units_mass_; + std::map units_vel_; + + double unit_length_chosen_; + double unit_mass_chosen_; + double unit_vel_chosen_; + + typedef struct io_header + { + int npart[6]; + double mass[6]; + double time; + double redshift; + int flag_sfr; + int flag_feedback; + unsigned int npartTotal[6]; + int flag_cooling; + int num_files; + double BoxSize; + double Omega0; + double OmegaLambda; + double HubbleParam; + int flag_stellarage; + int flag_metals; + unsigned int npartTotalHighWord[6]; + int flag_entropy_instead_u; + int flag_doubleprecision; + char fill[empty_fill_bytes]; + } header; + + header header_; + + std::string fname; + + enum iofields + { + id_dm_mass, + id_dm_vel, + id_dm_pos, + id_gas_vel, + id_gas_rho, + id_gas_temp, + id_gas_pos + }; + + size_t np_per_type_[6]; + + size_t block_buf_size_; + size_t npartmax_; + unsigned nfiles_; + + unsigned bndparticletype_; + bool bmorethan2bnd_; + bool kpcunits_; + bool msolunits_; + double YHe_; + bool spread_coarse_acrosstypes_; + + // refinement_mask refmask; + + void distribute_particles(unsigned nfiles, std::vector> &np_per_file, std::vector &np_tot_per_file) + { + np_per_file.assign(nfiles, std::vector(6, 0)); + np_tot_per_file.assign(nfiles, 0); + + size_t n2dist[6]; + size_t ntotal = 0; + for (int i = 0; i < 6; ++i) + { + ntotal += np_per_type_[i]; + n2dist[i] = np_per_type_[i]; + } + + size_t nnominal = (size_t)((double)ntotal / (double)nfiles); + size_t nlast = ntotal - nnominal * (nfiles - 1); + + for (unsigned i = 0; i < nfiles; ++i) + { + size_t nthisfile = 0; + + size_t nmax = (i == nfiles - 1) ? nlast : nnominal; + + for (int itype = 0; itype < 6; ++itype) + { + if (n2dist[itype] == 0) + continue; + np_per_file[i][itype] = std::min(n2dist[itype], nmax - nthisfile); + n2dist[itype] -= np_per_file[i][itype]; + nthisfile += np_per_file[i][itype]; + + if (nthisfile >= nmax) + break; + } + + np_tot_per_file[i] = nthisfile; + } + + for (int i = 0; i < 6; ++i) + assert(n2dist[i] == 0); + } + + std::ifstream &open_and_check(std::string ffname, size_t npart, size_t offset = 0) + { + std::ifstream ifs(ffname.c_str(), std::ios::binary); + size_t blk; + ifs.read((char *)&blk, sizeof(size_t)); + if (blk != npart * (size_t)sizeof(T_store)) + { + csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); + csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * (size_t)sizeof(T_store), blk); + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + ifs.seekg(offset, std::ios::cur); + + return ifs; + } + + class pistream : public std::ifstream + { + public: + pistream(std::string fname, size_t npart, size_t offset = 0) + : std::ifstream(fname.c_str(), std::ios::binary) + { + size_t blk; + + if (!this->good()) + { + csoca::elog.Print("Could not open buffer file in gadget2 output plug-in"); + throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); + } + + this->read((char *)&blk, sizeof(size_t)); + + if (blk != npart * sizeof(T_store)) + { + csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); + csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + this->seekg(offset + sizeof(size_t), std::ios::beg); + } + + pistream() + { + } + + void open(std::string fname, size_t npart, size_t offset = 0) + { + std::ifstream::open(fname.c_str(), std::ios::binary); + size_t blk; + + if (!this->good()) + { + csoca::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); + throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); + } + + this->read((char *)&blk, sizeof(size_t)); + + if (blk != npart * sizeof(T_store)) + { + csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); + csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + this->seekg(offset + sizeof(size_t), std::ios::beg); + } + }; + + class postream : public std::fstream + { + public: + postream(std::string fname, size_t npart, size_t offset = 0) + : std::fstream(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out) + { + size_t blk; + + if (!this->good()) + { + csoca::elog.Print("Could not open buffer file in gadget2 output plug-in"); + throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); + } + + this->read((char *)&blk, sizeof(size_t)); + + if (blk != npart * sizeof(T_store)) + { + csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); + csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + this->seekg(offset, std::ios::cur); + this->seekp(offset + sizeof(size_t), std::ios::beg); + } + + postream() + { + } + + void open(std::string fname, size_t npart, size_t offset = 0) + { + if (is_open()) + this->close(); + + std::fstream::open(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out); + size_t blk; + + if (!this->good()) + { + csoca::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); + throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); + } + + this->read((char *)&blk, sizeof(size_t)); + + if (blk != npart * sizeof(T_store)) + { + csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); + csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); + throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + + this->seekg(offset, std::ios::cur); + this->seekp(offset + sizeof(size_t), std::ios::beg); + } + }; + + void combine_components_for_coarse(void) + { + const size_t + nptot = np_per_type_[1] + np_per_type_[2] + np_per_type_[3] + np_per_type_[4] + np_per_type_[5], + npfine = np_per_type_[1], + npcoarse = nptot - npfine; + + std::vector tmp1, tmp2; + + tmp1.assign(block_buf_size_, 0.0); + tmp2.assign(block_buf_size_, 0.0); + + double facb = omegab_ / header_.Omega0, facc = (header_.Omega0 - omegab_) / header_.Omega0; + + for (int icomp = 0; icomp < 3; ++icomp) + { + char fc[256], fb[256]; + postream iffs1, iffs2; + + /*** positions ***/ + + sprintf(fc, "___ic_temp_%05d.bin", 100 * id_dm_pos + icomp); + sprintf(fb, "___ic_temp_%05d.bin", 100 * id_gas_pos + icomp); + + iffs1.open(fc, nptot, npfine * sizeof(T_store)); + iffs2.open(fb, nptot, npfine * sizeof(T_store)); + + size_t npleft = npcoarse; + size_t n2read = std::min((size_t)block_buf_size_, npleft); + while (n2read > 0ul) + { + std::streampos sp = iffs1.tellg(); + iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) + { + tmp1[i] = facc * tmp1[i] + facb * tmp2[i]; + } + + iffs1.seekp(sp); + iffs1.write(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + + npleft -= n2read; + n2read = std::min((size_t)block_buf_size_, npleft); + } + + iffs1.close(); + iffs2.close(); + + /*** velocities ***/ + + sprintf(fc, "___ic_temp_%05d.bin", 100 * id_dm_vel + icomp); + sprintf(fb, "___ic_temp_%05d.bin", 100 * id_gas_vel + icomp); + + iffs1.open(fc, nptot, npfine * sizeof(T_store)); + iffs2.open(fb, nptot, npfine * sizeof(T_store)); + + npleft = npcoarse; + n2read = std::min((size_t)block_buf_size_, npleft); + + while (n2read > 0ul) + { + std::streampos sp = iffs1.tellg(); + iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) + { + tmp1[i] = facc * tmp1[i] + facb * tmp2[i]; + } + + iffs1.seekp(sp); + iffs1.write(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + + npleft -= n2read; + n2read = std::min((size_t)block_buf_size_, npleft); + } + + iffs1.close(); + iffs2.close(); + } + } + + void assemble_gadget_file(void) + { + + if (do_baryons_) + combine_components_for_coarse(); + + //............................................................................ + //... copy from the temporary files, interleave the data and save ............ + + char fnx[256], fny[256], fnz[256], fnvx[256], fnvy[256], fnvz[256], fnm[256]; + char fnbx[256], fnby[256], fnbz[256], fnbvx[256], fnbvy[256], fnbvz[256]; + + sprintf(fnx, "___ic_temp_%05d.bin", 100 * id_dm_pos + 0); + sprintf(fny, "___ic_temp_%05d.bin", 100 * id_dm_pos + 1); + sprintf(fnz, "___ic_temp_%05d.bin", 100 * id_dm_pos + 2); + sprintf(fnvx, "___ic_temp_%05d.bin", 100 * id_dm_vel + 0); + sprintf(fnvy, "___ic_temp_%05d.bin", 100 * id_dm_vel + 1); + sprintf(fnvz, "___ic_temp_%05d.bin", 100 * id_dm_vel + 2); + sprintf(fnm, "___ic_temp_%05d.bin", 100 * id_dm_mass); + + sprintf(fnbx, "___ic_temp_%05d.bin", 100 * id_gas_pos + 0); + sprintf(fnby, "___ic_temp_%05d.bin", 100 * id_gas_pos + 1); + sprintf(fnbz, "___ic_temp_%05d.bin", 100 * id_gas_pos + 2); + sprintf(fnbvx, "___ic_temp_%05d.bin", 100 * id_gas_vel + 0); + sprintf(fnbvy, "___ic_temp_%05d.bin", 100 * id_gas_vel + 1); + sprintf(fnbvz, "___ic_temp_%05d.bin", 100 * id_gas_vel + 2); + + pistream iffs1, iffs2, iffs3; + + const size_t + nptot = np_per_type_[0] + np_per_type_[1] + np_per_type_[2] + np_per_type_[3] + np_per_type_[4] + np_per_type_[5], + //npgas = np_fine_gas_, + npcdm = nptot - np_per_type_[0]; + + size_t + wrote_coarse = 0, + wrote_gas = 0, + wrote_dm = 0; + + size_t + npleft = nptot, + n2read = std::min((size_t)block_buf_size_, npleft); + + std::cout << " - Gadget2 : writing " << nptot << " particles to file...\n"; + for (int i = 0; i < 6; ++i) + if (np_per_type_[i] > 0) + csoca::ilog.Print(" type %d : %12llu [m=%g]", i, np_per_type_[i], header_.mass[i]); + + bool bbaryons = np_per_type_[0] > 0; + + std::vector adata3; + adata3.reserve(3 * block_buf_size_); + T_store *tmp1, *tmp2, *tmp3; + + tmp1 = new T_store[block_buf_size_]; + tmp2 = new T_store[block_buf_size_]; + tmp3 = new T_store[block_buf_size_]; + + //... for multi-file output + //int fileno = 0; + //size_t npart_left = nptot; + + //std::vector nfdm_per_file, nfgas_per_file, nc_per_file; + + std::vector> np_per_file; + std::vector np_tot_per_file; + + distribute_particles(nfiles_, np_per_file, np_tot_per_file); + + if (nfiles_ > 1) + { + csoca::ilog.Print("Gadget2 : distributing particles to %d files", nfiles_); + //<< " " << std::setw(12) << "type 0" << "," << std::setw(12) << "type 1" << "," << std::setw(12) << "type " << bndparticletype_ << std::endl; + for (unsigned i = 0; i < nfiles_; ++i) + csoca::ilog.Print(" file %i : %12llu", i, np_tot_per_file[i], header_.mass[i]); + } + + size_t curr_block_buf_size = block_buf_size_; + + size_t idcount = 0; + bool bneed_long_ids = blongids_; + if (nptot >= 1ul << 32 && !bneed_long_ids) + { + bneed_long_ids = true; + csoca::wlog.Print("Need long particle IDs, will write 64bit, make sure to enable in Gadget!"); + } + + for (unsigned ifile = 0; ifile < nfiles_; ++ifile) + { + + if (nfiles_ > 1) + { + char ffname[256]; + sprintf(ffname, "%s.%d", fname_.c_str(), ifile); + ofs_.open(ffname, std::ios::binary | std::ios::trunc); + } + else + { + ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc); + } + + size_t np_this_file = np_tot_per_file[ifile]; + + int blksize = sizeof(header); + + //... write the header ....................................................... + + header this_header(header_); + for (int i = 0; i < 6; ++i) + { + this_header.npart[i] = np_per_file[ifile][i]; + this_header.npartTotal[i] = (unsigned)np_per_type_[i]; + this_header.npartTotalHighWord[i] = (unsigned)(np_per_type_[i] >> 32); + } + + ofs_.write((char *)&blksize, sizeof(int)); + ofs_.write((char *)&this_header, sizeof(header)); + ofs_.write((char *)&blksize, sizeof(int)); + + //... particle positions .................................................. + blksize = 3ul * np_this_file * sizeof(T_store); + ofs_.write((char *)&blksize, sizeof(int)); + + if (bbaryons && np_per_file[ifile][0] > 0ul) + { + + iffs1.open(fnbx, npcdm, wrote_gas * sizeof(T_store)); + iffs2.open(fnby, npcdm, wrote_gas * sizeof(T_store)); + iffs3.open(fnbz, npcdm, wrote_gas * sizeof(T_store)); + + npleft = np_per_file[ifile][0]; + n2read = std::min(curr_block_buf_size, npleft); + while (n2read > 0ul) + { + iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) + { + adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize)); + adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize)); + adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize)); + } + ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); + + adata3.clear(); + npleft -= n2read; + n2read = std::min(curr_block_buf_size, npleft); + } + iffs1.close(); + iffs2.close(); + iffs3.close(); + } + + npleft = np_this_file - np_per_file[ifile][0]; + n2read = std::min(curr_block_buf_size, npleft); + + iffs1.open(fnx, npcdm, wrote_dm * sizeof(T_store)); + iffs2.open(fny, npcdm, wrote_dm * sizeof(T_store)); + iffs3.open(fnz, npcdm, wrote_dm * sizeof(T_store)); + + while (n2read > 0ul) + { + iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) + { + adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize)); + adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize)); + adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize)); + } + ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); + + adata3.clear(); + npleft -= n2read; + n2read = std::min(curr_block_buf_size, npleft); + } + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + + iffs1.close(); + iffs2.close(); + iffs3.close(); + + //... particle velocities .................................................. + blksize = 3ul * np_this_file * sizeof(T_store); + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + + if (bbaryons && np_per_file[ifile][0] > 0ul) + { + iffs1.open(fnbvx, npcdm, wrote_gas * sizeof(T_store)); + iffs2.open(fnbvy, npcdm, wrote_gas * sizeof(T_store)); + iffs3.open(fnbvz, npcdm, wrote_gas * sizeof(T_store)); + + npleft = np_per_file[ifile][0]; + n2read = std::min(curr_block_buf_size, npleft); + while (n2read > 0ul) + { + iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) + { + adata3.push_back(tmp1[i]); + adata3.push_back(tmp2[i]); + adata3.push_back(tmp3[i]); + } + + ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); + + adata3.clear(); + npleft -= n2read; + n2read = std::min(curr_block_buf_size, npleft); + } + + iffs1.close(); + iffs2.close(); + iffs3.close(); + } + + iffs1.open(fnvx, npcdm, wrote_dm * sizeof(T_store)); + iffs2.open(fnvy, npcdm, wrote_dm * sizeof(T_store)); + iffs3.open(fnvz, npcdm, wrote_dm * sizeof(T_store)); + + npleft = np_this_file - np_per_file[ifile][0]; + n2read = std::min(curr_block_buf_size, npleft); + while (n2read > 0ul) + { + iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) + { + adata3.push_back(tmp1[i]); + adata3.push_back(tmp2[i]); + adata3.push_back(tmp3[i]); + } + + ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); + + adata3.clear(); + npleft -= n2read; + n2read = std::min(curr_block_buf_size, npleft); + } + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + + iffs1.close(); + iffs2.close(); + iffs3.close(); + + //... particle IDs .......................................................... + std::vector short_ids; + std::vector long_ids; + + if (bneed_long_ids) + long_ids.assign(curr_block_buf_size, 0); + else + short_ids.assign(curr_block_buf_size, 0); + + npleft = np_this_file; + n2read = std::min(curr_block_buf_size, npleft); + blksize = sizeof(unsigned) * np_this_file; + + if (bneed_long_ids) + blksize = sizeof(size_t) * np_this_file; + + //... generate contiguous IDs and store in file .. + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + while (n2read > 0ul) + { + if (bneed_long_ids) + { + for (size_t i = 0; i < n2read; ++i) + long_ids[i] = idcount++; + ofs_.write(reinterpret_cast(&long_ids[0]), n2read * sizeof(size_t)); + } + else + { + for (size_t i = 0; i < n2read; ++i) + short_ids[i] = idcount++; + ofs_.write(reinterpret_cast(&short_ids[0]), n2read * sizeof(unsigned)); + } + npleft -= n2read; + n2read = std::min(curr_block_buf_size, npleft); + } + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + + std::vector().swap(short_ids); + std::vector().swap(long_ids); + + //... particle masses ....................................................... + if (bmorethan2bnd_) //bmultimass_ && bmorethan2bnd_ && nc_per_file[ifile] > 0ul) + { + unsigned npcoarse = np_per_file[ifile][bndparticletype_]; // nc_per_file[ifile];//header_.npart[5]; + iffs1.open(fnm, np_per_type_[bndparticletype_], wrote_coarse * sizeof(T_store)); + + npleft = npcoarse; + n2read = std::min(curr_block_buf_size, npleft); + blksize = npcoarse * sizeof(T_store); + + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + while (n2read > 0ul) + { + iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + ofs_.write(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); + + npleft -= n2read; + n2read = std::min(curr_block_buf_size, npleft); + } + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + + iffs1.close(); + } + + //... initial internal energy for gas particles + if (bbaryons && np_per_file[ifile][0] > 0ul) + { + + std::vector eint(curr_block_buf_size, 0.0); + + const double astart = 1. / (1. + header_.redshift); + const double npol = (fabs(1.0 - gamma_) > 1e-7) ? 1.0 / (gamma_ - 1.) : 1.0; + const double unitv = 1e5; + const double h2 = header_.HubbleParam * header_.HubbleParam; //*0.0001; + const double adec = 1.0 / (160. * pow(omegab_ * h2 / 0.022, 2.0 / 5.0)); + const double Tcmb0 = 2.726; + const double Tini = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec; + const double mu = (Tini > 1.e4) ? 4.0 / (8. - 5. * YHe_) : 4.0 / (1. + 3. * (1. - YHe_)); + const double ceint = 1.3806e-16 / 1.6726e-24 * Tini * npol / mu / unitv / unitv; + + npleft = np_per_file[ifile][0]; + n2read = std::min(curr_block_buf_size, npleft); + blksize = sizeof(T_store) * np_per_file[ifile][0]; //*npgas + + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + while (n2read > 0ul) + { + for (size_t i = 0; i < n2read; ++i) + eint[i] = ceint; + ofs_.write(reinterpret_cast(&eint[0]), n2read * sizeof(T_store)); + npleft -= n2read; + n2read = std::min(curr_block_buf_size, npleft); + } + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + + static bool bdisplayed = false; + if (!bdisplayed) + { + csoca::ilog.Print("Gadget2 : set initial gas temperature to %.2f K/mu", Tini / mu); + bdisplayed = true; + } + } + + ofs_.flush(); + ofs_.close(); + + wrote_gas += np_per_file[ifile][0]; + wrote_dm += np_this_file - np_per_file[ifile][0]; + wrote_coarse += np_per_file[ifile][5]; + } + + delete[] tmp1; + delete[] tmp2; + delete[] tmp3; + + remove(fnbx); + remove(fnby); + remove(fnbz); + remove(fnx); + remove(fny); + remove(fnz); + remove(fnbvx); + remove(fnbvy); + remove(fnbvz); + remove(fnvx); + remove(fnvy); + remove(fnvz); + remove(fnm); + } + + void determine_particle_numbers(const grid_hierarchy &gh) + { + if (!bhave_particlenumbers_) + { + bhave_particlenumbers_ = true; + + double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 + + /*if( kpcunits_ ) + rhoc *= 1e-9; // in h^2 1e10 M_sol / kpc^3 + + if( msolunits_ ) + rhoc *= 1e10; // in h^2 M_sol / kpc^3*/ + + rhoc /= unit_mass_chosen_ / (unit_length_chosen_ * unit_length_chosen_ * unit_length_chosen_); + + // only type 1 are baryons + if (!do_baryons_) + header_.mass[1] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_); + else + { + header_.mass[0] = (omegab_)*rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_); + header_.mass[1] = (header_.Omega0 - omegab_) * rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_); + } + + //... + for (int i = 0; i < 6; ++i) + np_per_type_[i] = 0; + + // determine how many particles per type exist, determine their mass + for (int ilevel = (int)gh.levelmax(); ilevel >= (int)gh.levelmin(); --ilevel) + { + int itype = std::min((int)gh.levelmax() - ilevel + 1, 5); + np_per_type_[itype] += gh.count_leaf_cells(ilevel, ilevel); + if (itype > 1) + header_.mass[itype] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel); + } + + // if coarse particles should not be spread across types, assign them all to type bndparticletype + if (!spread_coarse_acrosstypes_) + { + if (gh.levelmax() > gh.levelmin() + 1) + bmorethan2bnd_ = true; + else + bmorethan2bnd_ = false; + + for (unsigned itype = 2; itype < 6; ++itype) + { + if (itype == bndparticletype_) + continue; + np_per_type_[bndparticletype_] += np_per_type_[itype]; + if (!bmorethan2bnd_) + header_.mass[bndparticletype_] += header_.mass[itype]; + np_per_type_[itype] = 0; + header_.mass[itype] = 0.; + } + } + + if (do_baryons_) + np_per_type_[0] = np_per_type_[1]; + } + } + +public: + gadget2_output_plugin(ConfigFile &cf) + : output_plugin(cf) + { + + units_mass_.insert(std::pair("1e10Msol", 1.0)); // 1e10 M_o/h (default) + units_mass_.insert(std::pair("Msol", 1.0e-10)); // 1 M_o/h + units_mass_.insert(std::pair("Mearth", 3.002e-16)); // 1 M_earth/h + + units_length_.insert(std::pair("Mpc", 1.0)); // 1 Mpc/h (default) + units_length_.insert(std::pair("kpc", 1.0e-3)); // 1 kpc/h + units_length_.insert(std::pair("pc", 1.0e-6)); // 1 pc/h + + units_vel_.insert(std::pair("km/s", 1.0)); // 1 km/s (default) + units_vel_.insert(std::pair("m/s", 1.0e-3)); // 1 m/s + units_vel_.insert(std::pair("cm/s", 1.0e-5)); // 1 cm/s + + block_buf_size_ = cf_.GetValueSafe("output", "gadget_blksize", 1048576); + + //... ensure that everyone knows we want to do SPH + cf.InsertValue("setup", "do_SPH", "yes"); + + //bbndparticles_ = !cf_.GetValueSafe("output","gadget_nobndpart",false); + npartmax_ = 1 << 30; + + nfiles_ = cf.GetValueSafe("output", "gadget_num_files", 1); + + blongids_ = cf.GetValueSafe("output", "gadget_longids", false); + + shift_halfcell_ = cf.GetValueSafe("output", "gadget_cell_centered", false); + + //if( nfiles_ < (int)ceil((double)npart/(double)npartmax_) ) + // csoca::wlog.Print("Should use more files."); + + if (nfiles_ > 1) + { + for (unsigned ifile = 0; ifile < nfiles_; ++ifile) + { + char ffname[256]; + sprintf(ffname, "%s.%d", fname_.c_str(), ifile); + ofs_.open(ffname, std::ios::binary | std::ios::trunc); + if (!ofs_.good()) + { + csoca::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", ffname); + throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + std::string(ffname) + "\' for writing!\n"); + } + ofs_.close(); + } + } + else + { + ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc); + if (!ofs_.good()) + { + csoca::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", fname_.c_str()); + throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + fname_ + "\' for writing!\n"); + } + ofs_.close(); + } + + bhave_particlenumbers_ = false; + + bmorethan2bnd_ = false; + if (false) //levelmax_ > levelmin_ +4) + bmorethan2bnd_ = true; + + for (int i = 0; i < 6; ++i) + { + header_.npart[i] = 0; + header_.npartTotal[i] = 0; + header_.npartTotalHighWord[i] = 0; + header_.mass[i] = 0.0; + } + + if (typeid(T_store) == typeid(float)) + header_.flag_doubleprecision = 0; + else if (typeid(T_store) == typeid(double)) + header_.flag_doubleprecision = 1; + else + { + csoca::elog.Print("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'"); + throw std::runtime_error("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'"); + } + + YHe_ = cf.GetValueSafe("cosmology", "YHe", 0.248); + gamma_ = cf.GetValueSafe("cosmology", "gamma", 5.0 / 3.0); + + do_baryons_ = cf.GetValueSafe("setup", "baryons", false); + omegab_ = cf.GetValueSafe("cosmology", "Omega_b", 0.045); + + //... new way + std::string lunitstr = cf.GetValueSafe("output", "gadget_lunit", "Mpc"); + std::string munitstr = cf.GetValueSafe("output", "gadget_munit", "1e10Msol"); + std::string vunitstr = cf.GetValueSafe("output", "gadget_vunit", "km/s"); + + std::map::iterator mapit; + + if ((mapit = units_length_.find(lunitstr)) != units_length_.end()) + unit_length_chosen_ = (*mapit).second; + else + { + csoca::elog.Print("Gadget: length unit \'%s\' unknown in gadget_lunit", lunitstr.c_str()); + throw std::runtime_error("Unknown length unit specified for Gadget output plugin"); + } + + if ((mapit = units_mass_.find(munitstr)) != units_mass_.end()) + unit_mass_chosen_ = (*mapit).second; + else + { + csoca::elog.Print("Gadget: mass unit \'%s\' unknown in gadget_munit", munitstr.c_str()); + throw std::runtime_error("Unknown mass unit specified for Gadget output plugin"); + } + + if ((mapit = units_vel_.find(vunitstr)) != units_vel_.end()) + unit_vel_chosen_ = (*mapit).second; + else + { + csoca::elog.Print("Gadget: velocity unit \'%s\' unknown in gadget_vunit", vunitstr.c_str()); + throw std::runtime_error("Unknown velocity unit specified for Gadget output plugin"); + } + + //... maintain compatibility with old way of setting units + if (cf.ContainsKey("output", "gadget_usekpc")) + { + kpcunits_ = cf.GetValueSafe("output", "gadget_usekpc", false); + if (kpcunits_) + unit_length_chosen_ = 1e-3; + csoca::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead."); + } + if (cf.ContainsKey("output", "gadget_usemsol")) + { + msolunits_ = cf.GetValueSafe("output", "gadget_usemsol", false); + if (msolunits_) + unit_mass_chosen_ = 1e-10; + csoca::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead."); + } + + //... coarse particle properties... + + spread_coarse_acrosstypes_ = cf.GetValueSafe("output", "gadget_spreadcoarse", false); + bndparticletype_ = 5; + + if (!spread_coarse_acrosstypes_) + { + bndparticletype_ = cf.GetValueSafe("output", "gadget_coarsetype", 5); + + if (bndparticletype_ == 0 || //bndparticletype_ == 1 || bndparticletype_ == 4 || + bndparticletype_ > 5) + { + csoca::elog.Print("Coarse particles cannot be of Gadget particle type %d in output plugin.", bndparticletype_); + throw std::runtime_error("Specified illegal Gadget particle type for coarse particles"); + } + } + else + { + if (cf.GetValueSafe("output", "gadget_coarsetype", 5) != 5) + csoca::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override."); + } + + //... set time ...................................................... + header_.redshift = cf.GetValue("setup", "zstart"); + header_.time = 1.0 / (1.0 + header_.redshift); + + //... SF flags + header_.flag_sfr = 0; + header_.flag_feedback = 0; + header_.flag_cooling = 0; + + //... + header_.num_files = nfiles_; //1; + header_.BoxSize = cf.GetValue("setup", "BoxLength"); + header_.Omega0 = cf.GetValue("cosmology", "Omega_m"); + header_.OmegaLambda = cf.GetValue("cosmology", "Omega_L"); + header_.HubbleParam = cf.GetValue("cosmology", "H0") / 100.0; + + header_.flag_stellarage = 0; + header_.flag_metals = 0; + + header_.flag_entropy_instead_u = 0; + + //if( kpcunits_ ) + // header_.BoxSize *= 1000.0; + header_.BoxSize /= unit_length_chosen_; + + for (int i = 0; i < empty_fill_bytes; ++i) + header_.fill[i] = 0; + } + + void write_dm_mass(const grid_hierarchy &gh) + { + determine_particle_numbers(gh); + + double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 + + // adjust units + rhoc /= unit_mass_chosen_ / (unit_length_chosen_ * unit_length_chosen_ * unit_length_chosen_); + + /*if( kpcunits_ ) + rhoc *= 1e-9; // in h^2 1e10 M_sol / kpc^3 + + if( msolunits_ ) + rhoc *= 1e10; // in h^2 M_sol / kpc^3 + */ + + // if there are more than one kind of coarse particle assigned to the same type, + // we have to explicitly store their masses + if (bmorethan2bnd_) + { + header_.mass[bndparticletype_] = 0.; + + size_t npcoarse = np_per_type_[bndparticletype_]; + size_t nwritten = 0; + + std::vector temp_dat; + temp_dat.reserve(block_buf_size_); + + char temp_fname[256]; + sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_mass); + std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); + + size_t blksize = sizeof(T_store) * npcoarse; + + ofs_temp.write((char *)&blksize, sizeof(size_t)); + + // int levelmaxcoarse = gh.levelmax() - 4; + // if (!spread_coarse_acrosstypes_) + // levelmaxcoarse = gh.levelmax() - 1; + + //for( int ilevel=levelmaxcoarse; ilevel>=(int)gh.levelmin(); --ilevel ) + + { + int ilevel = 0; + // baryon particles live only on finest grid + // these particles here are total matter particles + double pmass = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel); + + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) + { + if (temp_dat.size() < block_buf_size_) + temp_dat.push_back(pmass); + else + { + ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * block_buf_size_); + nwritten += block_buf_size_; + temp_dat.clear(); + temp_dat.push_back(pmass); + } + } + } + + if (temp_dat.size() > 0) + { + ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * temp_dat.size()); + nwritten += temp_dat.size(); + } + + if (nwritten != npcoarse) + { + csoca::elog.Print("nwritten = %llu != npcoarse = %llu\n", nwritten, npcoarse); + throw std::runtime_error("Internal consistency error while writing temporary file for masses"); + } + + ofs_temp.write((char *)&blksize, sizeof(size_t)); + + if (ofs_temp.bad()) + throw std::runtime_error("I/O error while writing temporary file for masses"); + } + } + + void write_dm_position(int coord, const grid_hierarchy &gh) + { + //... count number of leaf cells ...// + determine_particle_numbers(gh); + + size_t npart = 0; + for (int i = 1; i < 6; ++i) + npart += np_per_type_[i]; + + //... determine if we need to shift the coordinates back + double *shift = NULL; + + if (shift_halfcell_) + { + double h = 0.0; //1.0/(1<<(levelmin_+1)); + shift = new double[3]; + shift[0] = shift[1] = shift[2] = -h; + } + + size_t nwritten = 0; + //... collect displacements and convert to absolute coordinates with correct + //... units + std::vector temp_data; + temp_data.reserve(block_buf_size_); + + char temp_fname[256]; + sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_pos + coord); + std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); + + size_t blksize = sizeof(T_store) * npart; + ofs_temp.write((char *)&blksize, sizeof(size_t)); + + double xfac = header_.BoxSize; + + //for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel ) + unsigned ilevel = 0; + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) + { + double xx[3]; + gh.cell_pos(ilevel, i, j, k, xx); + if (shift != NULL) + xx[coord] += shift[coord]; + + + // std::cerr << i << " " << j << " " << k << " : " << xx[coord]*xfac << " " << (*gh.get_grid(ilevel)).relem(i, j, k) * xfac << std::endl; + + xx[coord] = (xx[coord] + (*gh.get_grid(ilevel)).relem(i, j, k)) * xfac; + + if (temp_data.size() < block_buf_size_) + temp_data.push_back(xx[coord]); + else + { + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); + nwritten += block_buf_size_; + temp_data.clear(); + temp_data.push_back(xx[coord]); + } + } + + if (temp_data.size() > 0) + { + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size()); + nwritten += temp_data.size(); + } + + if (nwritten != npart) + throw std::runtime_error("Internal consistency error while writing temporary file for positions"); + + //... dump to temporary file + ofs_temp.write((char *)&blksize, sizeof(size_t)); + + if (ofs_temp.bad()) + throw std::runtime_error("I/O error while writing temporary file for positions"); + + ofs_temp.close(); + + if (shift != NULL) + delete[] shift; + } + + void write_dm_velocity(int coord, const grid_hierarchy &gh) + { + //... count number of leaf cells ...// + determine_particle_numbers(gh); + + size_t npart = 0; + for (int i = 1; i < 6; ++i) + npart += np_per_type_[i]; + + //... collect displacements and convert to absolute coordinates with correct + //... units + std::vector temp_data; + temp_data.reserve(block_buf_size_); + + float isqrta = 1.0f / sqrt(header_.time); + float vfac = isqrta * header_.BoxSize; + + //if( kpcunits_ ) + // vfac /= 1000.0; + vfac *= unit_length_chosen_ / unit_vel_chosen_; + + size_t nwritten = 0; + + char temp_fname[256]; + sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_vel + coord); + std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); + + size_t blksize = sizeof(T_store) * npart; + ofs_temp.write((char *)&blksize, sizeof(size_t)); + + //for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel ) + int ilevel = 0; + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) + { + if (temp_data.size() < block_buf_size_) + temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac); + else + { + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); + nwritten += block_buf_size_; + temp_data.clear(); + temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac); + } + } + if (temp_data.size() > 0) + { + ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store)); + nwritten += temp_data.size(); + } + + if (nwritten != npart) + throw std::runtime_error("Internal consistency error while writing temporary file for velocities"); + + ofs_temp.write((char *)&blksize, sizeof(int)); + + if (ofs_temp.bad()) + throw std::runtime_error("I/O error while writing temporary file for velocities"); + + ofs_temp.close(); + + + } + + void write_dm_density(const grid_hierarchy &gh) + { + //... we don't care about DM density for Gadget + } + + void write_dm_potential(const grid_hierarchy &gh) + { + //... we don't care about DM potential for Gadget + } + + void write_gas_potential(const grid_hierarchy &gh) + { + //... we don't care about gas potential for Gadget + } + + //... write data for gas -- don't do this + void write_gas_velocity(int coord, const grid_hierarchy &gh) + { + determine_particle_numbers(gh); + size_t npart = 0; + for (int i = 1; i < 6; ++i) + npart += np_per_type_[i]; + + //... collect velocities and convert to absolute coordinates with correct + //... units + std::vector temp_data; + temp_data.reserve(block_buf_size_); + + float isqrta = 1.0f / sqrt(header_.time); + float vfac = isqrta * header_.BoxSize; + + //if( kpcunits_ ) + // vfac /= 1000.0; + vfac *= unit_length_chosen_ / unit_vel_chosen_; + + //size_t npart = gh.count_leaf_cells(gh.levelmin(), gh.levelmax());;; + size_t nwritten = 0; + + char temp_fname[256]; + sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_vel + coord); + std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); + + size_t blksize = sizeof(T_store) * npart; + ofs_temp.write((char *)&blksize, sizeof(size_t)); + + //for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel ) + int ilevel = 0; + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) + { + if (temp_data.size() < block_buf_size_) + temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac); + else + { + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); + nwritten += block_buf_size_; + temp_data.clear(); + temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac); + } + } + + if (temp_data.size() > 0) + { + ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store)); + nwritten += temp_data.size(); + } + + if (nwritten != npart) + throw std::runtime_error("Internal consistency error while writing temporary file for gas velocities"); + + ofs_temp.write((char *)&blksize, sizeof(int)); + + if (ofs_temp.bad()) + throw std::runtime_error("I/O error while writing temporary file for gas velocities"); + + ofs_temp.close(); + } + + //... write only for fine level + void write_gas_position(int coord, const grid_hierarchy &gh) + { + //... count number of leaf cells ...// + determine_particle_numbers(gh); + + size_t npart = 0; + for (int i = 1; i < 6; ++i) + npart += np_per_type_[i]; + + //... determine if we need to shift the coordinates back + double *shift = NULL; + + if (shift_halfcell_) + { + double h = 0.0; //1.0/(1<<(levelmin_+1)); + shift = new double[3]; + shift[0] = shift[1] = shift[2] = -h; + } + + size_t nwritten = 0; + + //... + //... collect displacements and convert to absolute coordinates with correct + //... units + std::vector temp_data; + temp_data.reserve(block_buf_size_); + + char temp_fname[256]; + sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_pos + coord); + std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); + + size_t blksize = sizeof(T_store) * npart; + ofs_temp.write((char *)&blksize, sizeof(size_t)); + + double xfac = header_.BoxSize; + + double h = 1.0 / (1ul << gh.levelmax()); + + //for (int ilevel = gh.levelmax(); ilevel >= (int)gh.levelmin(); --ilevel) + int ilevel = 0; + { + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + //if( ! gh.is_refined(ilevel,i,j,k) ) + if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) + { + double xx[3]; + gh.cell_pos(ilevel, i, j, k, xx); + if (shift != NULL) + xx[coord] += shift[coord]; + + //... shift particle positions (this has to be done as the same shift + //... is used when computing the convolution kernel for SPH baryons) + xx[coord] += 0.5 * h; + + xx[coord] = (xx[coord] + (*gh.get_grid(ilevel)).relem(i, j, k)) * xfac; + + if (temp_data.size() < block_buf_size_) + temp_data.push_back(xx[coord]); + else + { + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); + nwritten += block_buf_size_; + temp_data.clear(); + temp_data.push_back(xx[coord]); + } + } + } + + if (temp_data.size() > 0) + { + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size()); + nwritten += temp_data.size(); + } + + if (nwritten != npart) + throw std::runtime_error("Internal consistency error while writing temporary file for gas positions"); + + //... dump to temporary file + ofs_temp.write((char *)&blksize, sizeof(size_t)); + + if (ofs_temp.bad()) + throw std::runtime_error("I/O error while writing temporary file for gas positions"); + + ofs_temp.close(); + + if (shift != NULL) + delete[] shift; + } + + void write_gas_density(const grid_hierarchy &gh) + { + //do nothing as we write out positions + } + + void finalize(void) + { + this->assemble_gadget_file(); + } +}; + +// namespace +// { +// output_plugin_creator_concrete> creator1("gadget2"); +// #ifndef SINGLE_PRECISION +// output_plugin_creator_concrete> creator2("gadget2_double"); +// #endif +// } // namespace From f90778ba54131727224c2a07ed9d220a060b69d7 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 19 Dec 2019 14:00:06 +0100 Subject: [PATCH 051/130] submodule update --- external/class | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/class b/external/class index b34d7f6..6f3abba 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b +Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507 From 3797ff0325911bcc3645170e1a93c976f7ba5c3a Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 19 Dec 2019 14:08:35 +0100 Subject: [PATCH 052/130] avoid policy error on old versions of cmake --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fcc57e9..26eaa63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,7 +50,9 @@ endif(ENABLE_MPI) # FFTW -cmake_policy(SET CMP0074 NEW) +if(POLICY CMP0074) + cmake_policy(SET CMP0074 NEW) +endif() if(ENABLE_MPI) find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI) else() From 4020d5b33f7d515036ba2a6b8cc6f703ef03b863 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 19 Dec 2019 14:37:30 +0100 Subject: [PATCH 053/130] added old MUSIC1 plugin for tabulated CAMB transfer function files back in --- example.conf | 54 ++--- src/plugins/transfer_CAMB_file.cc | 344 +++++++++++++++++++++++++++++ src/plugins/transfer_eisenstein.cc | 2 +- 3 files changed, 373 insertions(+), 27 deletions(-) create mode 100644 src/plugins/transfer_CAMB_file.cc diff --git a/example.conf b/example.conf index 3b6d07e..1b5e530 100644 --- a/example.conf +++ b/example.conf @@ -1,33 +1,33 @@ [setup] # number of grid cells per linear dimension for calculations = particles for sc initial load -GridRes = 128 +GridRes = 128 # length of the box in Mpc/h -BoxLength = 250 +BoxLength = 250 # starting redshift -zstart = 49.0 +zstart = 49.0 # order of the LPT to be used (1,2 or 3) -LPTorder = 3 +LPTorder = 3 # also do baryon ICs? -DoBaryons = no +DoBaryons = no # do mode fixing à la Angulo&Pontzen -DoFixing = no +DoFixing = no # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!) -ParticleLoad = sc +ParticleLoad = sc [testing] # enables diagnostic output # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence' -test = convergence +test = none [execution] -NumThreads = 4 +NumThreads = 4 [output] -fname_hdf5 = output_sch.hdf5 -fbase_analysis = output +fname_hdf5 = output_sch.hdf5 +fbase_analysis = output -format = gadget2 -filename = ics_gadget.dat +format = gadget2 +filename = ics_gadget.dat #format = generic #filename = debug.hdf5 @@ -38,21 +38,23 @@ filename = ics_gadget.dat #grafic_use_SPT = yes [random] -generator = NGENIC -seed = 9001 +generator = NGENIC +seed = 9001 [cosmology] -#transfer = CLASS -transfer = eisenstein -Omega_m = 0.302 -Omega_b = 0.045 -Omega_L = 0.698 -H0 = 70.3 -sigma_8 = 0.811 -nspec = 0.961 +transfer = CLASS +# transfer = eisenstein +# transfer = file_CAMB +# transfer_file = wmap5_transfer_out_z0.dat +Omega_m = 0.302 +Omega_b = 0.045 +Omega_L = 0.698 +H0 = 70.3 +sigma_8 = 0.811 +nspec = 0.961 # anisotropic large scale tidal field -#LSS_aniso_lx = 0.1 -#LSS_aniso_ly = 0.1 -#LSS_aniso_lz = -0.2 +#LSS_aniso_lx = +0.1 +#LSS_aniso_ly = +0.1 +#LSS_aniso_lz = -0.2 diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc new file mode 100644 index 0000000..ddbf35e --- /dev/null +++ b/src/plugins/transfer_CAMB_file.cc @@ -0,0 +1,344 @@ +// transfer_CAMB.cc - This file is part of MUSIC - +// a code to generate multi-scale initial conditions for cosmological simulations + +// Copyright (C) 2019 Oliver Hahn + +#include +#include + +#include + +#include "transfer_function_plugin.hh" + +const double tiny = 1e-30; + +class transfer_CAMB_file_plugin : public TransferFunction_plugin +{ + +private: + std::string m_filename_Pk, m_filename_Tk; + std::vector m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon; + std::vector m_tab_Tvk_tot, m_tab_Tvk_cdm, m_tab_Tvk_baryon; + gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon; + gsl_interp_accel *acc_vtot, *acc_vcdm, *acc_vbaryon; + gsl_spline *spline_tot, *spline_cdm, *spline_baryon; + gsl_spline *spline_vtot, *spline_vcdm, *spline_vbaryon; + + double m_kmin, m_kmax, m_Omega_b, m_Omega_m, m_zstart; + unsigned m_nlines; + + bool m_linbaryoninterp; + + void read_table(void) + { + + m_nlines = 0; + m_linbaryoninterp = false; + +#ifdef WITH_MPI + if (MPI::COMM_WORLD.Get_rank() == 0) + { +#endif + csoca::ilog.Print("Reading tabulated transfer function data from file \n \'%s\'", m_filename_Tk.c_str()); + + std::string line; + std::ifstream ifs(m_filename_Tk.c_str()); + + if (!ifs.good()) + throw std::runtime_error("Could not find transfer function file \'" + m_filename_Tk + "\'"); + + m_tab_k.clear(); + m_tab_Tk_tot.clear(); + m_tab_Tk_cdm.clear(); + m_tab_Tk_baryon.clear(); + m_tab_Tvk_tot.clear(); + m_tab_Tvk_cdm.clear(); //>[150609SH: add] + m_tab_Tvk_baryon.clear(); //>[150609SH: add] + + m_kmin = 1e30; + m_kmax = -1e30; + std::ofstream ofs("dump_transfer.txt"); + + while (!ifs.eof()) + { + getline(ifs, line); + if (ifs.eof()) + break; + + // OH: ignore line if it has a comment: + if (line.find("#") != std::string::npos) + continue; + + std::stringstream ss(line); + + double k, Tkc, Tkb, Tktot, Tkvtot, Tkvc, Tkvb, dummy; + + ss >> k; + ss >> Tkc; // cdm + ss >> Tkb; // baryon + ss >> dummy; // photon + ss >> dummy; // nu + ss >> dummy; // mass_nu + ss >> Tktot; // total + ss >> dummy; // no_nu + ss >> dummy; // total_de + ss >> dummy; // Weyl + ss >> Tkvc; // v_cdm + ss >> Tkvb; // v_b + ss >> dummy; // v_b-v_cdm + + if (ss.bad() || ss.fail()) + { + csoca::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!"); + throw std::runtime_error("Error reading transfer function file \'" + + m_filename_Tk + "\'"); + } + + if (m_Omega_b < 1e-6) + Tkvtot = Tktot; + else + Tkvtot = ((m_Omega_m - m_Omega_b) * Tkvc + m_Omega_b * Tkvb) / m_Omega_m; //MvD + + m_linbaryoninterp |= Tkb < 0.0 || Tkvb < 0.0; + + m_tab_k.push_back(log10(k)); + + m_tab_Tk_tot.push_back(Tktot); + m_tab_Tk_baryon.push_back(Tkb); + m_tab_Tk_cdm.push_back(Tkc); + m_tab_Tvk_tot.push_back(Tkvtot); + m_tab_Tvk_baryon.push_back(Tkvb); + m_tab_Tvk_cdm.push_back(Tkvc); + + ++m_nlines; + + if (k < m_kmin) + m_kmin = k; + if (k > m_kmax) + m_kmax = k; + } + + for (size_t i = 0; i < m_tab_k.size(); ++i) + { + m_tab_Tk_tot[i] = log10(m_tab_Tk_tot[i]); + m_tab_Tk_cdm[i] = log10(m_tab_Tk_cdm[i]); + m_tab_Tvk_cdm[i] = log10(m_tab_Tvk_cdm[i]); + m_tab_Tvk_tot[i] = log10(m_tab_Tvk_tot[i]); + + if (!m_linbaryoninterp) + { + m_tab_Tk_baryon[i] = log10(m_tab_Tk_baryon[i]); + m_tab_Tvk_baryon[i] = log10(m_tab_Tvk_baryon[i]); + } + } + + ifs.close(); + + csoca::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines); + + if (m_linbaryoninterp) + csoca::ilog.Print("Using log-lin interpolation for baryons\n (TF is not " + "positive definite)"); + +#ifdef WITH_MPI + } + + unsigned n = m_tab_k.size(); + MPI::COMM_WORLD.Bcast(&n, 1, MPI_UNSIGNED, 0); + + if (MPI::COMM_WORLD.Get_rank() > 0) + { + m_tab_k.assign(n, 0); + m_tab_Tk_tot.assign(n, 0); + m_tab_Tk_cdm.assign(n, 0); + m_tab_Tk_baryon.assign(n, 0); + m_tab_Tvk_tot.assign(n, 0); + m_tab_Tvk_cdm.assign(n, 0); + m_tab_Tvk_baryon.assign(n, 0); + } + + MPI::COMM_WORLD.Bcast(&m_tab_k[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tk_tot[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tvk_tot[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tvk_cdm[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tvk_baryon[0], n, MPI_DOUBLE, 0); + +#endif + } + +public: + transfer_CAMB_file_plugin(ConfigFile &cf) + : TransferFunction_plugin(cf) + { + m_filename_Tk = pcf_->GetValue("cosmology", "transfer_file"); + m_Omega_m = cf.GetValue("cosmology", "Omega_m"); //MvD + m_Omega_b = cf.GetValue("cosmology", "Omega_b"); //MvD + m_zstart = cf.GetValue("setup", "zstart"); //MvD + + read_table(); + + acc_tot = gsl_interp_accel_alloc(); + acc_cdm = gsl_interp_accel_alloc(); + acc_baryon = gsl_interp_accel_alloc(); + acc_vtot = gsl_interp_accel_alloc(); + acc_vcdm = gsl_interp_accel_alloc(); + acc_vbaryon = gsl_interp_accel_alloc(); + + spline_tot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_cdm = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_baryon = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_vtot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_vcdm = + gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_vbaryon = + gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + + gsl_spline_init(spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size()); + gsl_spline_init(spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size()); + gsl_spline_init(spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0], + m_tab_k.size()); + gsl_spline_init(spline_vtot, &m_tab_k[0], &m_tab_Tvk_tot[0], + m_tab_k.size()); + gsl_spline_init(spline_vcdm, &m_tab_k[0], &m_tab_Tvk_cdm[0], + m_tab_k.size()); + gsl_spline_init(spline_vbaryon, &m_tab_k[0], &m_tab_Tvk_baryon[0], + m_tab_k.size()); + + tf_distinct_ = true; // different density between CDM v.s. Baryon + tf_withvel_ = true; // using velocity transfer function + } + + ~transfer_CAMB_file_plugin() + { + gsl_spline_free(spline_tot); + gsl_spline_free(spline_cdm); + gsl_spline_free(spline_baryon); + gsl_spline_free(spline_vtot); + gsl_spline_free(spline_vcdm); + gsl_spline_free(spline_vbaryon); + + gsl_interp_accel_free(acc_tot); + gsl_interp_accel_free(acc_cdm); + gsl_interp_accel_free(acc_baryon); + gsl_interp_accel_free(acc_vtot); + gsl_interp_accel_free(acc_vcdm); + gsl_interp_accel_free(acc_vbaryon); + } + + // linear interpolation in log-log + inline double extrap_right(double k, const tf_type &type) const + { + int n = m_tab_k.size() - 1, n1 = n - 1; + + double v1(1.0), v2(1.0); + + double lk = log10(k); + double dk = m_tab_k[n] - m_tab_k[n1]; + double delk = lk - m_tab_k[n]; + + switch (type) + { + case cdm: + v1 = m_tab_Tk_cdm[n1]; + v2 = m_tab_Tk_cdm[n]; + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case baryon: + v1 = m_tab_Tk_baryon[n1]; + v2 = m_tab_Tk_baryon[n]; + if (m_linbaryoninterp) + return std::max((v2 - v1) / dk * (delk) + v2, tiny); + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case vtotal: //>[150609SH: add] + v1 = m_tab_Tvk_tot[n1]; + v2 = m_tab_Tvk_tot[n]; + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case vcdm: //>[150609SH: add] + v1 = m_tab_Tvk_cdm[n1]; + v2 = m_tab_Tvk_cdm[n]; + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case vbaryon: //>[150609SH: add] + v1 = m_tab_Tvk_baryon[n1]; + v2 = m_tab_Tvk_baryon[n]; + if (m_linbaryoninterp) + return std::max((v2 - v1) / dk * (delk) + v2, tiny); + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case total: + v1 = m_tab_Tk_tot[n1]; + v2 = m_tab_Tk_tot[n]; + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + default: + throw std::runtime_error( + "Invalid type requested in transfer function evaluation"); + } + + return 0.0; + } + + inline double compute(double k, tf_type type) const + { + // use constant interpolation on the left side of the tabulated values + if (k < m_kmin) + { + switch (type) + { + case cdm: + return pow(10.0, m_tab_Tk_cdm[0]); + case baryon: + if (m_linbaryoninterp) + return m_tab_Tk_baryon[0]; + return pow(10.0, m_tab_Tk_baryon[0]); + case vtotal: + return pow(10.0, m_tab_Tvk_tot[0]); + case vcdm: + return pow(10.0, m_tab_Tvk_cdm[0]); + case vbaryon: + if (m_linbaryoninterp) + return m_tab_Tvk_baryon[0]; + return pow(10.0, m_tab_Tvk_baryon[0]); + case total: + return pow(10.0, m_tab_Tk_tot[0]); + default: + throw std::runtime_error( + "Invalid type requested in transfer function evaluation"); + } + } + // use linear interpolation on the right side of the tabulated values + else if (k > m_kmax) + return extrap_right(k, type); + + double lk = log10(k); + switch (type) + { + case cdm: + return pow(10.0, gsl_spline_eval(spline_cdm, lk, acc_cdm)); + case baryon: + if (m_linbaryoninterp) + return gsl_spline_eval(spline_baryon, lk, acc_baryon); + return pow(10.0, gsl_spline_eval(spline_baryon, lk, acc_baryon)); + case vtotal: + return pow(10.0, gsl_spline_eval(spline_vtot, lk, acc_vtot)); //MvD + case vcdm: + return pow(10.0, gsl_spline_eval(spline_vcdm, lk, acc_vcdm)); + case vbaryon: + if (m_linbaryoninterp) + return gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon); + return pow(10.0, gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon)); + case total: + return pow(10.0, gsl_spline_eval(spline_tot, lk, acc_tot)); + default: + throw std::runtime_error( + "Invalid type requested in transfer function evaluation"); + } + } + + inline double get_kmin(void) const { return pow(10.0, m_tab_k[1]); } + + inline double get_kmax(void) const { return pow(10.0, m_tab_k[m_tab_k.size() - 2]); } +}; + +namespace +{ +TransferFunction_plugin_creator_concrete creator("file_CAMB"); +} diff --git a/src/plugins/transfer_eisenstein.cc b/src/plugins/transfer_eisenstein.cc index 9d4c032..47a7efd 100644 --- a/src/plugins/transfer_eisenstein.cc +++ b/src/plugins/transfer_eisenstein.cc @@ -434,5 +434,5 @@ namespace TransferFunction_plugin_creator_concrete creator("eisenstein"); TransferFunction_plugin_creator_concrete creator2("eisenstein_wdm"); TransferFunction_plugin_creator_concrete creator3("eisenstein_cdmbino"); -TransferFunction_plugin_creator_concrete creator4("eisenstein_cutoff"); +// TransferFunction_plugin_creator_concrete creator4("eisenstein_cutoff"); } // namespace From e7ebed552376eb352460d59781131627208c82aa Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 19 Dec 2019 15:43:42 +0100 Subject: [PATCH 054/130] fix merge conflict in example conf file --- example.conf | 45 +++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/example.conf b/example.conf index 6c4779d..c8db213 100644 --- a/example.conf +++ b/example.conf @@ -15,19 +15,21 @@ DoFixing = no ParticleLoad = sc [cosmology] -#transfer = CLASS -transfer = eisenstein -Omega_m = 0.302 -Omega_b = 0.045 -Omega_L = 0.698 -H0 = 70.3 -sigma_8 = 0.811 -nspec = 0.961 +transfer = CLASS +# transfer = eisenstein +# transfer = file_CAMB +# transfer_file = wmap5_transfer_out_z0.dat +Omega_m = 0.302 +Omega_b = 0.045 +Omega_L = 0.698 +H0 = 70.3 +sigma_8 = 0.811 +nspec = 0.961 # anisotropic large scale tidal field -#LSS_aniso_lx = 0.1 -#LSS_aniso_ly = 0.1 -#LSS_aniso_lz = -0.2 +#LSS_aniso_lx = +0.1 +#LSS_aniso_ly = +0.1 +#LSS_aniso_lz = -0.2 [random] generator = NGENIC @@ -57,24 +59,3 @@ UseLongids = false #filename = ics_ramses #grafic_use_SPT = yes -[random] -generator = NGENIC -seed = 9001 - -[cosmology] -transfer = CLASS -# transfer = eisenstein -# transfer = file_CAMB -# transfer_file = wmap5_transfer_out_z0.dat -Omega_m = 0.302 -Omega_b = 0.045 -Omega_L = 0.698 -H0 = 70.3 -sigma_8 = 0.811 -nspec = 0.961 - -# anisotropic large scale tidal field -#LSS_aniso_lx = +0.1 -#LSS_aniso_ly = +0.1 -#LSS_aniso_lz = -0.2 - From 89c5f2758ba580bfd384a8a695c98cd813201fc0 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 24 Jan 2020 15:00:32 +0100 Subject: [PATCH 055/130] added support for writing multiple species, staggered grids --- include/output_plugin.hh | 2 +- include/particle_generator.hh | 32 ++++- src/ic_generator.cc | 21 ++- src/plugins/output_gadget2.cc | 122 ++++++++++-------- src/plugins/output_gadget_hdf5.cc | 207 ++++++++++++++++++++++++++++++ 5 files changed, 316 insertions(+), 68 deletions(-) create mode 100644 src/plugins/output_gadget_hdf5.cc diff --git a/include/output_plugin.hh b/include/output_plugin.hh index b7c23e1..5a18407 100644 --- a/include/output_plugin.hh +++ b/include/output_plugin.hh @@ -45,7 +45,7 @@ public: virtual ~output_plugin(){} //! routine to write particle data for a species - virtual void write_particle_data(const particle::container &pc, const cosmo_species &s ) {}; + virtual void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species ) {}; //! routine to write gridded fluid component data for a species virtual void write_grid_data(const Grid_FFT &g, const cosmo_species &s, const fluid_component &c ) {}; diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 1c176eb..4dafda8 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -27,6 +27,14 @@ const std::vector< std::vector> > lattice_shifts = /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}}, }; +const std::vector> second_lattice_shift = +{ + /* SC : */ {0.5, 0.5, 0.5}, + /* BCC: */ {0.5, 0.5, 0.0}, + /* FCC: */ {0.5, 0.5, 0.5}, + /* RSC: */ {0.25, 0.25, 0.25}, +}; + template void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const field_t& field ){ // number of modes present in the field @@ -53,11 +61,16 @@ void initialize_lattice( container& particles, lattice lattice_type, const bool // invalidates field, phase shifted to unspecified position after return template -void set_positions( container& particles, const lattice lattice_type, int idim, real_t lunit, const bool b64reals, field_t& field ) +void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field ) { const size_t num_p_in_load = field.local_size(); for( int ishift=0; ishift<(1<0 ){ field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] ); } @@ -66,7 +79,8 @@ void set_positions( container& particles, const lattice lattice_type, int idim, for( size_t i=0,ipcount=ipcount0; i(i,j,k,lattice_shifts[lattice_type][ishift]); + auto pos = field.template get_unit_r_shifted(i,j,k,lattice_shifts[lattice_type][ishift] + + (is_second_lattice? second_lattice_shift[lattice_type] : vec3{0.,0.,0.}) ); if( b64reals ){ particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); }else{ @@ -78,13 +92,17 @@ void set_positions( container& particles, const lattice lattice_type, int idim, } } -template -void set_velocities( container& particles, lattice lattice_type, int idim, const bool b64reals, field_t& field ) +template +void set_velocities(container &particles, lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field) { const size_t num_p_in_load = field.local_size(); for( int ishift=0; ishift<(1<0 ){ + // if we are dealing with the secondary lattice, apply a global shift + if (ishift == 0 && is_second_lattice){ + field.shift_field(second_lattice_shift[lattice_type]); + } + // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift + if (ishift > 0){ field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] ); } // read out values from phase shifted field and set assoc. particle's value diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 48d31dc..ee370ff 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -70,6 +70,18 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------------------------------------------- //! do baryon ICs? const bool bDoBaryons = the_config.GetValueSafe("setup", "DoBaryons", false ); + std::map< cosmo_species, double > Omega; + if( bDoBaryons ){ + double Om = the_config.GetValue("cosmology", "Omega_m"); + double Ob = the_config.GetValue("cosmology", "Omega_b"); + Omega[cosmo_species::dm] = Om-Ob; + Omega[cosmo_species::baryon] = Ob; + }else{ + double Om = the_config.GetValue("cosmology", "Omega_m"); + double Ob = the_config.GetValue("cosmology", "Omega_b"); + Omega[cosmo_species::dm] = Om; + Omega[cosmo_species::baryon] = 0.0; + } //-------------------------------------------------------------------------------------------------------- //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274] @@ -440,6 +452,9 @@ int Run( ConfigFile& the_config ) //=================================================================================== particle::container particles; + bool shifted_lattice = (this_species == cosmo_species::baryon && + the_output_plugin->write_species_as(this_species) == output_type::particles) ? true : false; + // if output plugin wants particles, then we need to store them, along with their IDs if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { @@ -472,7 +487,7 @@ int Run( ConfigFile& the_config ) // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_positions( particles, lattice_type, idim, lunit, the_output_plugin->has_64bit_reals(), tmp ); + particle::set_positions( particles, lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp ); } // otherwise write out the grid data directly to the output plugin // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian ) @@ -518,7 +533,7 @@ int Run( ConfigFile& the_config ) // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_velocities( particles, lattice_type, idim, the_output_plugin->has_64bit_reals(), tmp ); + particle::set_velocities( particles, lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp ); } // otherwise write out the grid data directly to the output plugin else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian ) @@ -530,7 +545,7 @@ int Run( ConfigFile& the_config ) if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - the_output_plugin->write_particle_data( particles, this_species ); + the_output_plugin->write_particle_data( particles, this_species, Omega[this_species] ); } if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian ) diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc index 57d9cc1..e7f20e6 100644 --- a/src/plugins/output_gadget2.cc +++ b/src/plugins/output_gadget2.cc @@ -3,7 +3,7 @@ constexpr int empty_fill_bytes{56}; -template +template class gadget2_output_plugin : public output_plugin { public: @@ -38,40 +38,44 @@ protected: public: //! constructor - explicit gadget2_output_plugin(ConfigFile &cf ) - : output_plugin(cf, "GADGET-2") + explicit gadget2_output_plugin(ConfigFile &cf) + : output_plugin(cf, "GADGET-2") { num_files_ = 1; #ifdef USE_MPI // use as many output files as we have MPI tasks MPI_Comm_size(MPI_COMM_WORLD, &num_files_); #endif - real_t astart = 1.0/(1.0+cf_.GetValue("setup", "zstart")); + real_t astart = 1.0 / (1.0 + cf_.GetValue("setup", "zstart")); lunit_ = cf_.GetValue("setup", "BoxLength"); vunit_ = lunit_ / std::sqrt(astart); - blongids_ = cf_.GetValueSafe("output","UseLongids",false); + blongids_ = cf_.GetValueSafe("output", "UseLongids", false); } - output_type write_species_as( const cosmo_species & ) const { return output_type::particles; } + output_type write_species_as(const cosmo_species &) const { return output_type::particles; } real_t position_unit() const { return lunit_; } real_t velocity_unit() const { return vunit_; } - bool has_64bit_reals() const{ - if( typeid(write_real_t)==typeid(double) ) return true; - return false; - } - - bool has_64bit_ids() const{ - if( blongids_ ) return true; - return false; - } - - void write_particle_data(const particle::container &pc, const cosmo_species &s ) + bool has_64bit_reals() const { - // fill the Gadget-2 header - memset(reinterpret_cast(&this_header_),0,sizeof(header)); + if (typeid(write_real_t) == typeid(double)) + return true; + return false; + } + + bool has_64bit_ids() const + { + if (blongids_) + return true; + return false; + } + + void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species) + { + // fill the Gadget-2 header + memset(reinterpret_cast(&this_header_), 0, sizeof(header)); for (int i = 0; i < 6; ++i) { @@ -113,15 +117,15 @@ public: //... set masses double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 - double boxmass = this_header_.Omega0 * rhoc * std::pow(this_header_.BoxSize,3); + double boxmass = Omega_species * rhoc * std::pow(this_header_.BoxSize, 3); this_header_.mass[1] = boxmass / pc.get_global_num_particles(); - + std::string fname = fname_; int thisrank = 0; - + #ifdef USE_MPI - MPI_Comm_rank(MPI_COMM_WORLD,&thisrank); - if( num_files_ > 1 ) + MPI_Comm_rank(MPI_COMM_WORLD, &thisrank); + if (num_files_ > 1) fname += "." + std::to_string(thisrank); #endif uint32_t blocksz; @@ -130,52 +134,56 @@ public: csoca::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl; blocksz = sizeof(header); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(&this_header_), sizeof(header) ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - - // we write double precision - if( this->has_64bit_reals() ){ + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(&this_header_), sizeof(header)); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + + // we write double precision + if (this->has_64bit_reals()) + { blocksz = 3 * sizeof(double) * pc.get_local_num_particles(); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_pos64_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_pos64_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_vel64_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - }else{ + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_vel64_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + } + else + { blocksz = 3 * sizeof(float) * pc.get_local_num_particles(); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_pos32_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_pos32_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_vel32_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_vel32_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); } - + // we write long IDs - if( this->has_64bit_ids() ){ + if (this->has_64bit_ids()) + { blocksz = sizeof(uint64_t) * pc.get_local_num_particles(); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_ids64_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - }else{ - blocksz = sizeof(uint32_t) * pc.get_local_num_particles(); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_ids32_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_ids64_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + } + else + { + blocksz = sizeof(uint32_t) * pc.get_local_num_particles(); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_ids32_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); } - } }; - namespace { - output_plugin_creator_concrete> creator1("gadget2"); +output_plugin_creator_concrete> creator1("gadget2"); #if !defined(USE_SINGLEPRECISION) - output_plugin_creator_concrete> creator3("gadget2_double"); +output_plugin_creator_concrete> creator3("gadget2_double"); #endif } // namespace diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc new file mode 100644 index 0000000..43afbe1 --- /dev/null +++ b/src/plugins/output_gadget_hdf5.cc @@ -0,0 +1,207 @@ + +#ifdef USE_HDF5 +#include // for unlink +#include +#include "HDF_IO.hh" + +template +std::vector from_6array(const T *a) +{ + return std::vector{{a[0], a[1], a[2], a[3], a[4], a[5]}}; +} + +template +std::vector from_value(const T a) +{ + return std::vector{{a}}; +} + +template +class gadget_hdf5_output_plugin : public output_plugin +{ + struct header_t + { + unsigned npart[6]; + double mass[6]; + double time; + double redshift; + int flag_sfr; + int flag_feedback; + unsigned int npartTotal[6]; + int flag_cooling; + int num_files; + double BoxSize; + double Omega0; + double OmegaLambda; + double HubbleParam; + int flag_stellarage; + int flag_metals; + unsigned int npartTotalHighWord[6]; + int flag_entropy_instead_u; + int flag_doubleprecision; + }; + +protected: + int num_files_, num_simultaneous_writers_; + header_t header_; + real_t lunit_, vunit_; + bool blongids_; + std::string this_fname_; + +public: + //! constructor + explicit gadget_hdf5_output_plugin(ConfigFile &cf) + : output_plugin(cf, "GADGET-HDF5") + { + num_files_ = 1; +#ifdef USE_MPI + // use as many output files as we have MPI tasks + MPI_Comm_size(MPI_COMM_WORLD, &num_files_); +#endif + real_t astart = 1.0 / (1.0 + cf_.GetValue("setup", "zstart")); + lunit_ = cf_.GetValue("setup", "BoxLength"); + vunit_ = lunit_ / std::sqrt(astart); + blongids_ = cf_.GetValueSafe("output", "UseLongids", false); + num_simultaneous_writers_ = cf_.GetValueSafe("output", "NumSimWriters", num_files_); + + for (int i = 0; i < 6; ++i) + { + header_.npart[i] = 0; + header_.npartTotal[i] = 0; + header_.npartTotalHighWord[i] = 0; + header_.mass[i] = 0.0; + } + + header_.time = astart; + header_.redshift = 1.0 / astart - 1.0; + header_.flag_sfr = 0; + header_.flag_feedback = 0; + header_.flag_cooling = 0; + header_.num_files = num_files_; + header_.BoxSize = lunit_; + header_.Omega0 = cf_.GetValue("cosmology", "Omega_m"); + header_.OmegaLambda = cf_.GetValue("cosmology", "Omega_L"); + header_.HubbleParam = cf_.GetValue("cosmology", "H0") / 100.0; + header_.flag_stellarage = 0; + header_.flag_metals = 0; + header_.flag_entropy_instead_u = 0; + header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false; + + this_fname_ = fname_; +#ifdef USE_MPI + int thisrank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &thisrank); + if (num_files_ > 1) + this_fname_ += "." + std::to_string(thisrank); +#endif + + unlink(this_fname_.c_str()); + HDFCreateFile(this_fname_); + } + + // use destructor to write header post factum + ~gadget_hdf5_output_plugin() + { + HDFCreateGroup(this_fname_, "Header"); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array(header_.npart)); + HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array(header_.mass)); + HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value(header_.time)); + HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value(header_.redshift)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value(header_.flag_sfr)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value(header_.flag_feedback)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array(header_.npartTotal)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value(header_.flag_cooling)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value(header_.num_files)); + HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value(header_.BoxSize)); + HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value(header_.Omega0)); + HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value(header_.OmegaLambda)); + HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value(header_.HubbleParam)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value(header_.flag_stellarage)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value(header_.flag_metals)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array(header_.npartTotalHighWord)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value(header_.flag_entropy_instead_u)); + + csoca::ilog << "Wrote" << std::endl; + } + + output_type write_species_as(const cosmo_species &) const { return output_type::particles; } + + real_t position_unit() const { return lunit_; } + + real_t velocity_unit() const { return vunit_; } + + bool has_64bit_reals() const + { + if (typeid(write_real_t) == typeid(double)) + return true; + return false; + } + + bool has_64bit_ids() const + { + if (blongids_) + return true; + return false; + } + + int get_species_idx(const cosmo_species &s) const + { + switch (s) + { + case cosmo_species::dm: + return 1; + case cosmo_species::baryon: + return 2; + case cosmo_species::neutrino: + return 3; + } + return -1; + } + + void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species) + { + int sid = get_species_idx(s); + + assert(sid != -1); + + header_.npart[sid] = (pc.get_local_num_particles()); + header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles()); + header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32); + + double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 + double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3); + header_.mass[sid] = boxmass / pc.get_global_num_particles(); + + HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid)); + + //... write positions and velocities..... + if (this->has_64bit_reals()) + { + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_); + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_); + } + else + { + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_); + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_); + } + + //... write ids..... + if (this->has_64bit_ids()) + HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_); + else + HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_); + + // std::cout << ">>>A> " << header_.npart[sid] << std::endl; + } +}; + +namespace +{ +output_plugin_creator_concrete> creator1("gadget_hdf5"); +#if !defined(USE_SINGLEPRECISION) +output_plugin_creator_concrete> creator3("gadget_hdf5_double"); +#endif +} // namespace + +#endif \ No newline at end of file From f7b2519b8208cc39c5c703104311756829bb64b2 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 25 Jan 2020 16:20:29 +0100 Subject: [PATCH 056/130] working commit, changes for multi-species, moved mean matter PT outside of species loop --- include/grid_fft.hh | 21 ++++ include/transfer_function_plugin.hh | 7 +- src/ic_generator.cc | 177 ++++++++++++++-------------- src/plugins/transfer_CLASS.cc | 68 ++++++++--- 4 files changed, 167 insertions(+), 106 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 4848f0e..3f44c37 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -563,6 +563,27 @@ public: } } + template + void assign_function_of_grids_kdep(const functional &f, const grid1_t &g) + { + assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) ); + +#pragma omp parallel for + for (size_t i = 0; i < sizes_[0]; ++i) + { + for (size_t j = 0; j < sizes_[1]; ++j) + { + for (size_t k = 0; k < sizes_[2]; ++k) + { + auto &elem = this->kelem(i, j, k); + const auto &elemg = g.kelem(i, j, k); + + elem = f(this->get_k(i, j, k), elemg); + } + } + } + } + template void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2) { diff --git a/include/transfer_function_plugin.hh b/include/transfer_function_plugin.hh index cd7c762..6a7fb6f 100644 --- a/include/transfer_function_plugin.hh +++ b/include/transfer_function_plugin.hh @@ -13,7 +13,12 @@ enum tf_type vtotal, vcdm, vbaryon, - total0 + total0, + cdm0, + baryon0, + vtotal0, + vcdm0, + vbaryon0, }; class TransferFunction_plugin diff --git a/src/ic_generator.cc b/src/ic_generator.cc index ee370ff..28f382f 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -78,7 +78,6 @@ int Run( ConfigFile& the_config ) Omega[cosmo_species::baryon] = Ob; }else{ double Om = the_config.GetValue("cosmology", "Omega_m"); - double Ob = the_config.GetValue("cosmology", "Omega_b"); Omega[cosmo_species::dm] = Om; Omega[cosmo_species::baryon] = 0.0; } @@ -166,8 +165,27 @@ int Run( ConfigFile& the_config ) Grid_FFT A3x({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT A3y({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT A3z({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + //... array [.] access to components of A3: - std::array< Grid_FFT*,3 > A3({&A3x,&A3y,&A3z}); + std::array *, 3> A3({&A3x, &A3y, &A3z}); + + // white noise field + Grid_FFT wnoise({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + + //-------------------------------------------------------------------- + // Fill the grid with a Gaussian white noise field + //-------------------------------------------------------------------- + csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + csoca::ilog << "Generating white noise field...." << std::endl; + + the_random_number_generator->Fill_Grid(wnoise); + + wnoise.FourierTransformForward(); + + + //-------------------------------------------------------------------- + // Compute the LPT terms.... + //-------------------------------------------------------------------- //-------------------------------------------------------------------- // Create convolution class instance for non-linear terms @@ -177,89 +195,63 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- std::vector species_list; - species_list.push_back( cosmo_species::dm ); - if( bDoBaryons ) species_list.push_back( cosmo_species::baryon ); - - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - - for( auto& this_species : species_list ) - { - csoca::ilog << std::endl - << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl; + species_list.push_back(cosmo_species::dm); + if (bDoBaryons) + species_list.push_back(cosmo_species::baryon); //====================================================================== //... compute 1LPT displacement potential .... //====================================================================== // phi = - delta / k^2 + + csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + csoca::ilog << "Generating white noise field...." << std::endl; + double wtime = get_wtime(); csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush; - #if 1 // random ICs - //-------------------------------------------------------------------- - // Fill the grid with a Gaussian white noise field - //-------------------------------------------------------------------- - the_random_number_generator->Fill_Grid( phi ); - - phi.FourierTransformForward(); - - phi.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { + phi.FourierTransformForward(false); + phi.assign_function_of_grids_kdep([&](auto k, auto wn) { real_t kmod = k.norm(); - if( bDoFixing ) x = (std::abs(x)!=0.0)? x / std::abs(x) : x; - ccomplex_t delta = x * the_cosmo_calc->GetAmplitude(kmod, total); + if (bDoFixing) + wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn; + ccomplex_t delta = wn * the_cosmo_calc->GetAmplitude(kmod, total); return -delta / (kmod * kmod) / volfac; - }); + }, + wnoise); phi.zero_DC_mode(); - #else // ICs with a given phi(1) potential function - constexpr real_t twopi{2.0*M_PI}; - constexpr real_t epsilon_q1d{0.25}; - constexpr real_t epsy{0.25}; - constexpr real_t epsz{0.0};//epsz{0.25}; + csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; - phi.FourierTransformBackward(false); - - phi.apply_function_r_dep([&](auto v, auto r) -> real_t { - real_t q1 = r[0]-0.5*boxlen;//r[0]/boxlen * twopi - M_PI; - real_t q2 = r[1]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI; - real_t q3 = r[2]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI; - - // std::cerr << q1 << " " << q2 << std::endl; - - return -2.0*std::cos(q1+std::cos(q2)); - // return (-std::cos(q1) + epsilon_q1d * std::sin(q2)); - // return (-std::cos(q1) + epsy * std::sin(q2) + epsz * std::cos(q1) * std::sin(q3)); - }); - phi.FourierTransformForward(); - - - #endif - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; - //====================================================================== //... compute 2LPT displacement potential .... //====================================================================== - if( LPTorder > 1 ){ + if (LPTorder > 1) + { wtime = get_wtime(); csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush; phi2.FourierTransformForward(false); - Conv.convolve_SumOfHessians( phi, {0,0}, phi, {1,1}, {2,2}, op::assign_to( phi2 ) ); - Conv.convolve_Hessians( phi, {1,1}, phi, {2,2}, op::add_to(phi2) ); - Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, op::subtract_from(phi2) ); - Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, op::subtract_from(phi2) ); - Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, op::subtract_from(phi2) ); + Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2)); + Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2)); + Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, op::subtract_from(phi2)); + Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, op::subtract_from(phi2)); + Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, op::subtract_from(phi2)); - if( bAddExternalTides ){ - phi2.assign_function_of_grids_kdep([&]( vec3 kvec, ccomplex_t pphi, ccomplex_t pphi2 ){ + if (bAddExternalTides) + { + phi2.assign_function_of_grids_kdep([&](vec3 kvec, ccomplex_t pphi, ccomplex_t pphi2) { // sign in front of f_aniso is reversed since phi1 = -phi - return pphi2 + f_aniso * (kvec[0]*kvec[0]*lss_aniso_lambda[0]+kvec[1]*kvec[1]*lss_aniso_lambda[1]+kvec[2]*kvec[2]*lss_aniso_lambda[2])*pphi; - }, phi, phi2 ); + return pphi2 + f_aniso * (kvec[0] * kvec[0] * lss_aniso_lambda[0] + kvec[1] * kvec[1] * lss_aniso_lambda[1] + kvec[2] * kvec[2] * lss_aniso_lambda[2]) * pphi; + }, + phi, phi2); } phi2.apply_InverseLaplacian(); - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; - if( bAddExternalTides ){ + if (bAddExternalTides) + { csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl; csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl; } @@ -268,47 +260,49 @@ int Run( ConfigFile& the_config ) //====================================================================== //... compute 3LPT displacement potential //====================================================================== - if( LPTorder > 2 ){ + if (LPTorder > 2) + { //... 3a term ... wtime = get_wtime(); csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush; phi3a.FourierTransformForward(false); - Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi, {2,2}, op::assign_to(phi3a) ); - Conv.convolve_Hessians( phi, {0,1}, phi, {0,2}, phi, {1,2}, op::add_twice_to(phi3a) ); - Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi, {0,0}, op::subtract_from(phi3a) ); - Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi, {1,1}, op::subtract_from(phi3a) ); - Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi, {2,2}, op::subtract_from(phi3a) ); + Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a)); + Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::add_twice_to(phi3a)); + Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3a)); + Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a)); + Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a)); phi3a.apply_InverseLaplacian(); - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; //... 3b term ... wtime = get_wtime(); csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush; phi3b.FourierTransformForward(false); - Conv.convolve_SumOfHessians( phi, {0,0}, phi2, {1,1}, {2,2}, op::assign_to(phi3b) ); - Conv.convolve_SumOfHessians( phi, {1,1}, phi2, {2,2}, {0,0}, op::add_to(phi3b) ); - Conv.convolve_SumOfHessians( phi, {2,2}, phi2, {0,0}, {1,1}, op::add_to(phi3b) ); - Conv.convolve_Hessians( phi, {0,1}, phi2, {0,1}, op::subtract_twice_from(phi3b) ); - Conv.convolve_Hessians( phi, {0,2}, phi2, {0,2}, op::subtract_twice_from(phi3b) ); - Conv.convolve_Hessians( phi, {1,2}, phi2, {1,2}, op::subtract_twice_from(phi3b) ); + Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b)); + Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b)); + Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::add_to(phi3b)); + Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::subtract_twice_from(phi3b)); + Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::subtract_twice_from(phi3b)); + Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::subtract_twice_from(phi3b)); phi3b.apply_InverseLaplacian(); phi3b *= 0.5; // factor 1/2 from definition of phi(3b)! - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; //... transversal term ... wtime = get_wtime(); csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush; - for( int idim=0; idim<3; ++idim ){ + for (int idim = 0; idim < 3; ++idim) + { // cyclic rotations of indices - int idimp = (idim+1)%3, idimpp = (idim+2)%3; + int idimp = (idim + 1) % 3, idimpp = (idim + 2) % 3; A3[idim]->FourierTransformForward(false); - Conv.convolve_Hessians( phi2, {idim,idimp}, phi, {idim,idimpp}, op::assign_to(*A3[idim]) ); - Conv.convolve_Hessians( phi2, {idim,idimpp}, phi, {idim,idimp}, op::subtract_from(*A3[idim]) ); - Conv.convolve_DifferenceOfHessians( phi, {idimp,idimpp}, phi2,{idimp,idimp}, {idimpp,idimpp}, op::add_to(*A3[idim]) ); - Conv.convolve_DifferenceOfHessians( phi2,{idimp,idimpp}, phi, {idimp,idimp}, {idimpp,idimpp}, op::subtract_from(*A3[idim]) ); + Conv.convolve_Hessians(phi2, {idim, idimp}, phi, {idim, idimpp}, op::assign_to(*A3[idim])); + Conv.convolve_Hessians(phi2, {idim, idimpp}, phi, {idim, idimp}, op::subtract_from(*A3[idim])); + Conv.convolve_DifferenceOfHessians(phi, {idimp, idimpp}, phi2, {idimp, idimp}, {idimpp, idimpp}, op::add_to(*A3[idim])); + Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim])); A3[idim]->apply_InverseLaplacian(); } - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; } // if( bSymplecticPT ){ @@ -344,19 +338,30 @@ int Run( ConfigFile& the_config ) // Testing const std::string testing = the_config.GetValueSafe("testing", "test", "none"); - if(testing != "none") { + if (testing != "none") + { csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl; - if(testing == "potentials_and_densities") { + if (testing == "potentials_and_densities"){ testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3); - } else if(testing == "velocity_displacement_symmetries") { + } + else if (testing == "velocity_displacement_symmetries"){ testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); - } else if(testing == "convergence") { + } + else if (testing == "convergence"){ testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); - } else { + } + else{ csoca::flog << "unknown test '" << testing << "'" << std::endl; std::abort(); } - } else { + } + + for( auto& this_species : species_list ) + { + csoca::ilog << std::endl + << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl; + + { // temporary storage of data Grid_FFT tmp({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 3469b7e..da73c6a 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -22,10 +22,16 @@ class transfer_CLASS_plugin : public TransferFunction_plugin { private: + //... target redshift tables std::vector tab_lnk_, tab_dtot_, tab_dc_, tab_db_, tab_ttot_, tab_tc_, tab_tb_; gsl_interp_accel *gsl_ia_dtot_, *gsl_ia_dc_, *gsl_ia_db_, *gsl_ia_ttot_, *gsl_ia_tc_, *gsl_ia_tb_; gsl_spline *gsl_sp_dtot_, *gsl_sp_dc_, *gsl_sp_db_, *gsl_sp_ttot_, *gsl_sp_tc_, *gsl_sp_tb_; + //... starting redshift tables + std::vector tab_lnk0_, tab_dtot0_, tab_dc0_, tab_db0_, tab_ttot0_, tab_tc0_, tab_tb0_; + gsl_interp_accel *gsl_ia_dtot0_, *gsl_ia_dc0_, *gsl_ia_db0_, *gsl_ia_ttot0_, *gsl_ia_tc0_, *gsl_ia_tb0_; + gsl_spline *gsl_sp_dtot0_, *gsl_sp_dc0_, *gsl_sp_db0_, *gsl_sp_ttot0_, *gsl_sp_tc0_, *gsl_sp_tb0_; + // single fluid growing/decaying mode decomposition gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_; gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_; @@ -85,8 +91,11 @@ private: std::unique_ptr CE = std::make_unique(pars, false); + CE->getTk(zstart_, tab_lnk0_, tab_dc0_, tab_db0_, d_ncdm, tab_dtot0_, + tab_tc0_, tab_tb0_, t_ncdm, tab_ttot0_, phi, psi ); + CE->getTk(ztarget_, tab_lnk_, tab_dc_, tab_db_, d_ncdm, tab_dtot_, - tab_tc_, tab_tb_, t_ncdm, tab_ttot_, phi, psi ); + tab_tc_, tab_tb_, t_ncdm, tab_ttot_, phi, psi); wtime = get_wtime() - wtime; csoca::ilog << " took " << wtime << " s / " << tab_lnk_.size() << " modes." << std::endl; @@ -110,12 +119,12 @@ public: this->ClassEngine_get_data(); - gsl_ia_dtot_ = gsl_interp_accel_alloc(); - gsl_ia_dc_ = gsl_interp_accel_alloc(); - gsl_ia_db_ = gsl_interp_accel_alloc(); - gsl_ia_ttot_ = gsl_interp_accel_alloc(); - gsl_ia_tc_ = gsl_interp_accel_alloc(); - gsl_ia_tb_ = gsl_interp_accel_alloc(); + gsl_ia_dtot_ = gsl_interp_accel_alloc(); gsl_ia_dtot0_ = gsl_interp_accel_alloc(); + gsl_ia_dc_ = gsl_interp_accel_alloc(); gsl_ia_dc0_ = gsl_interp_accel_alloc(); + gsl_ia_db_ = gsl_interp_accel_alloc(); gsl_ia_db0_ = gsl_interp_accel_alloc(); + gsl_ia_ttot_ = gsl_interp_accel_alloc(); gsl_ia_ttot0_ = gsl_interp_accel_alloc(); + gsl_ia_tc_ = gsl_interp_accel_alloc(); gsl_ia_tc0_ = gsl_interp_accel_alloc(); + gsl_ia_tb_ = gsl_interp_accel_alloc(); gsl_ia_tb0_ = gsl_interp_accel_alloc(); gsl_sp_dtot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); gsl_sp_dc_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); @@ -124,6 +133,13 @@ public: gsl_sp_tc_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); gsl_sp_tb_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + gsl_sp_dtot0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + gsl_sp_dc0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + gsl_sp_db0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + gsl_sp_ttot0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + gsl_sp_tc0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + gsl_sp_tb0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + gsl_spline_init(gsl_sp_dtot_, &tab_lnk_[0], &tab_dtot_[0], tab_lnk_.size()); gsl_spline_init(gsl_sp_dc_, &tab_lnk_[0], &tab_dc_[0], tab_lnk_.size()); gsl_spline_init(gsl_sp_db_, &tab_lnk_[0], &tab_db_[0], tab_lnk_.size()); @@ -131,6 +147,13 @@ public: gsl_spline_init(gsl_sp_tc_, &tab_lnk_[0], &tab_tc_[0], tab_lnk_.size()); gsl_spline_init(gsl_sp_tb_, &tab_lnk_[0], &tab_tb_[0], tab_lnk_.size()); + gsl_spline_init(gsl_sp_dtot0_, &tab_lnk0_[0], &tab_dtot0_[0], tab_lnk0_.size()); + gsl_spline_init(gsl_sp_dc0_, &tab_lnk0_[0], &tab_dc0_[0], tab_lnk0_.size()); + gsl_spline_init(gsl_sp_db0_, &tab_lnk0_[0], &tab_db0_[0], tab_lnk0_.size()); + gsl_spline_init(gsl_sp_ttot0_, &tab_lnk0_[0], &tab_ttot0_[0], tab_lnk0_.size()); + gsl_spline_init(gsl_sp_tc0_, &tab_lnk0_[0], &tab_tc0_[0], tab_lnk0_.size()); + gsl_spline_init(gsl_sp_tb0_, &tab_lnk0_[0], &tab_tb0_[0], tab_lnk0_.size()); + //-------------------------------------------------------------------------- // single fluid growing/decaying mode decomposition //-------------------------------------------------------------------------- @@ -165,19 +188,19 @@ public: } ~transfer_CLASS_plugin(){ - gsl_spline_free(gsl_sp_dtot_); - gsl_spline_free(gsl_sp_dc_); - gsl_spline_free(gsl_sp_db_); - gsl_spline_free(gsl_sp_ttot_); - gsl_spline_free(gsl_sp_tc_); - gsl_spline_free(gsl_sp_tb_); + gsl_spline_free(gsl_sp_dtot_); gsl_spline_free(gsl_sp_dtot0_); + gsl_spline_free(gsl_sp_dc_); gsl_spline_free(gsl_sp_dc0_); + gsl_spline_free(gsl_sp_db_); gsl_spline_free(gsl_sp_db0_); + gsl_spline_free(gsl_sp_ttot_); gsl_spline_free(gsl_sp_ttot0_); + gsl_spline_free(gsl_sp_tc_); gsl_spline_free(gsl_sp_tc0_); + gsl_spline_free(gsl_sp_tb_); gsl_spline_free(gsl_sp_tb0_); - gsl_interp_accel_free(gsl_ia_dtot_); - gsl_interp_accel_free(gsl_ia_dc_); - gsl_interp_accel_free(gsl_ia_db_); - gsl_interp_accel_free(gsl_ia_ttot_); - gsl_interp_accel_free(gsl_ia_tc_); - gsl_interp_accel_free(gsl_ia_tb_); + gsl_interp_accel_free(gsl_ia_dtot_); gsl_interp_accel_free(gsl_ia_dtot0_); + gsl_interp_accel_free(gsl_ia_dc_); gsl_interp_accel_free(gsl_ia_dc0_); + gsl_interp_accel_free(gsl_ia_db_); gsl_interp_accel_free(gsl_ia_db0_); + gsl_interp_accel_free(gsl_ia_ttot_); gsl_interp_accel_free(gsl_ia_ttot0_); + gsl_interp_accel_free(gsl_ia_tc_); gsl_interp_accel_free(gsl_ia_tc0_); + gsl_interp_accel_free(gsl_ia_tb_); gsl_interp_accel_free(gsl_ia_tb0_); } inline double compute(double k, tf_type type) const { @@ -190,6 +213,13 @@ public: case vtotal: splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break; case vcdm: splineT = gsl_sp_tc_; accT = gsl_ia_tc_; break; case vbaryon: splineT = gsl_sp_tb_; accT = gsl_ia_tb_; break; + + case total0: splineT = gsl_sp_dtot0_;accT = gsl_ia_dtot0_;break; + case cdm0: splineT = gsl_sp_dc0_; accT = gsl_ia_dc0_; break; + case baryon0: splineT = gsl_sp_db0_; accT = gsl_ia_db0_; break; + case vtotal0: splineT = gsl_sp_ttot0_;accT = gsl_ia_ttot0_;break; + case vcdm0: splineT = gsl_sp_tc0_; accT = gsl_ia_tc0_; break; + case vbaryon0:splineT = gsl_sp_tb0_; accT = gsl_ia_tb0_; break; default: throw std::runtime_error("Invalid type requested in transfer function evaluation"); } From cd7f451397d2857863ab22fdc92d7ca8b10290e2 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 25 Jan 2020 17:55:34 +0100 Subject: [PATCH 057/130] fixed bug with getting CLASS TF for two different redshifts --- src/plugins/transfer_CLASS.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index da73c6a..9d415a3 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -44,10 +45,13 @@ private: csoca::ilog << "Computing TF via ClassEngine..." << std::endl << " ztarget = " << ztarget_ << ", zstart = " << zstart_ << " ..." << std::flush; double wtime = get_wtime(); + + std::stringstream zlist; + zlist << zstart_ << ", " << zstart_; ClassParams pars; pars.add("extra metric transfer functions", "yes"); - pars.add("z_pk",ztarget_); + pars.add("z_max_pk",zlist.str()); pars.add("P_k_max_h/Mpc", kmax_); pars.add("h",h_); @@ -213,7 +217,7 @@ public: case vtotal: splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break; case vcdm: splineT = gsl_sp_tc_; accT = gsl_ia_tc_; break; case vbaryon: splineT = gsl_sp_tb_; accT = gsl_ia_tb_; break; - + case total0: splineT = gsl_sp_dtot0_;accT = gsl_ia_dtot0_;break; case cdm0: splineT = gsl_sp_dc0_; accT = gsl_ia_dc0_; break; case baryon0: splineT = gsl_sp_db0_; accT = gsl_ia_db0_; break; From 1fc2b2d67718578008b75f2e71eacb737849a53b Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 25 Jan 2020 23:31:03 +0100 Subject: [PATCH 058/130] added distinct amplitudes for cdm and baryons --- CMakeLists.txt | 4 +- example.conf | 20 ++-- include/cosmology_calculator.hh | 26 +++-- include/particle_generator.hh | 2 +- src/ic_generator.cc | 188 ++++++++++++++++++-------------- 5 files changed, 139 insertions(+), 101 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 51a453e..f10eb0a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,8 +5,8 @@ project(monofonIC) # include class submodule include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake) -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") +#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic -g -fno-omit-frame-pointer") find_package(PkgConfig REQUIRED) set(CMAKE_MODULE_PATH diff --git a/example.conf b/example.conf index 178248d..718e145 100644 --- a/example.conf +++ b/example.conf @@ -4,15 +4,15 @@ GridRes = 128 # length of the box in Mpc/h BoxLength = 250 # starting redshift -zstart = 49.0 +zstart = 129.0 # order of the LPT to be used (1,2 or 3) LPTorder = 3 # also do baryon ICs? -DoBaryons = no +DoBaryons = yes # do mode fixing à la Angulo&Pontzen DoFixing = no # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!) -ParticleLoad = sc +ParticleLoad = bcc [cosmology] transfer = CLASS @@ -39,18 +39,22 @@ seed = 9001 [testing] # enables diagnostic output # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence' -test = none +test = none [execution] -NumThreads = 4 +NumThreads = 16 [output] fname_hdf5 = output_sch.hdf5 fbase_analysis = output -format = gadget2 -filename = ics_gadget.dat -UseLongids = false +#format = gadget2 +#filename = ics_gadget.dat +#UseLongids = false +# +format = gadget_hdf5 +filename = ics_gadget.hdf5 + #format = generic #filename = debug.hdf5 diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index ea51299..ba2d8ff 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -81,23 +81,33 @@ public: // write power spectrum to a file std::ofstream ofs(fname.c_str()); - std::stringstream ss; ss << " (a=" << a <<")"; + std::stringstream ss; ss << " ,a=" << a <<""; ofs << "# " << std::setw(18) << "k [h/Mpc]" - << std::setw(20) << ("P_dtot(k)"+ss.str()) - << std::setw(20) << ("P_dcdm(k)"+ss.str()) - << std::setw(20) << ("P_dbar(k)"+ss.str()) - << std::setw(20) << ("P_dtot(K) (a=1)") - << std::setw(20) << ("P_tcdm(k)"+ss.str()) - << std::setw(20) << ("P_tbar(k)"+ss.str()) + << std::setw(20) << ("P_dtot(k"+ss.str()+"|BS)") + << std::setw(20) << ("P_dcdm(k"+ss.str()+"|BS)") + << std::setw(20) << ("P_dbar(k"+ss.str()+"|BS)") + << std::setw(20) << ("P_tcdm(k"+ss.str()+"|BS)") + << std::setw(20) << ("P_tbar(k"+ss.str()+"|BS)") + << std::setw(20) << ("P_dtot(k"+ss.str()+")") + << std::setw(20) << ("P_dcdm(k"+ss.str()+")") + << std::setw(20) << ("P_dbar(k"+ss.str()+")") + << std::setw(20) << ("P_tcdm(k"+ss.str()+")") + << std::setw(20) << ("P_tbar(k"+ss.str()+")") + << std::setw(20) << ("P_dtot(K,a=1)") << std::endl; for( double k=kmin; kget_kmax(); k*=1.05 ){ ofs << std::setw(20) << std::setprecision(10) << k << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total) * Dplus0, 2.0) << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm) * Dplus0, 2.0) << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total), 2.0) << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm) * Dplus0, 2.0) << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon) * Dplus0, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total), 2.0) << std::endl; } } diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 4dafda8..efac3dd 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -31,7 +31,7 @@ const std::vector> second_lattice_shift = { /* SC : */ {0.5, 0.5, 0.5}, /* BCC: */ {0.5, 0.5, 0.0}, - /* FCC: */ {0.5, 0.5, 0.5}, + /* FCC: */ {0.5, 0.0, 0.0}, /* RSC: */ {0.25, 0.25, 0.25}, }; diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 8eb83bd..9641112 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -182,6 +182,11 @@ int Run( ConfigFile& the_config ) the_random_number_generator->Fill_Grid(wnoise); wnoise.FourierTransformForward(); + wnoise.apply_function_k( [&](auto wn){ + if (bDoFixing) + wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn; + return wn / volfac; + }); //-------------------------------------------------------------------- @@ -207,39 +212,36 @@ int Run( ConfigFile& the_config ) if (bDoBaryons) species_list.push_back(cosmo_species::baryon); - //====================================================================== - //... compute 1LPT displacement potential .... - //====================================================================== - // phi = - delta / k^2 + //====================================================================== + //... compute 1LPT displacement potential .... + //====================================================================== + // phi = - delta / k^2 csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; csoca::ilog << "Generating white noise field...." << std::endl; - double wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush; + double wtime = get_wtime(); + csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush; phi.FourierTransformForward(false); phi.assign_function_of_grids_kdep([&](auto k, auto wn) { - real_t kmod = k.norm(); - if (bDoFixing) - wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn; + real_t kmod = k.norm(); ccomplex_t delta = wn * the_cosmo_calc->GetAmplitude(kmod, total); - return -delta / (kmod * kmod) / volfac; - }, - wnoise); + return -delta / (kmod * kmod); + }, wnoise); - phi.zero_DC_mode(); + phi.zero_DC_mode(); csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; - - //====================================================================== - //... compute 2LPT displacement potential .... - //====================================================================== + + //====================================================================== + //... compute 2LPT displacement potential .... + //====================================================================== if (LPTorder > 1) { - wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush; - phi2.FourierTransformForward(false); + wtime = get_wtime(); + csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush; + phi2.FourierTransformForward(false); Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2)); Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2)); Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, op::subtract_from(phi2)); @@ -249,119 +251,119 @@ int Run( ConfigFile& the_config ) if (bAddExternalTides) { phi2.assign_function_of_grids_kdep([&](vec3 kvec, ccomplex_t pphi, ccomplex_t pphi2) { - // sign in front of f_aniso is reversed since phi1 = -phi + // sign in front of f_aniso is reversed since phi1 = -phi return pphi2 + f_aniso * (kvec[0] * kvec[0] * lss_aniso_lambda[0] + kvec[1] * kvec[1] * lss_aniso_lambda[1] + kvec[2] * kvec[2] * lss_aniso_lambda[2]) * pphi; }, phi, phi2); - } + } - phi2.apply_InverseLaplacian(); + phi2.apply_InverseLaplacian(); csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; if (bAddExternalTides) { - csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl; - csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl; - } + csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl; + csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl; } + } - //====================================================================== - //... compute 3LPT displacement potential - //====================================================================== + //====================================================================== + //... compute 3LPT displacement potential + //====================================================================== if (LPTorder > 2) { - //... 3a term ... - wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush; - phi3a.FourierTransformForward(false); + //... 3a term ... + wtime = get_wtime(); + csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush; + phi3a.FourierTransformForward(false); Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a)); Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::add_twice_to(phi3a)); Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3a)); Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a)); Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a)); - phi3a.apply_InverseLaplacian(); + phi3a.apply_InverseLaplacian(); csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; - //... 3b term ... - wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush; - phi3b.FourierTransformForward(false); + //... 3b term ... + wtime = get_wtime(); + csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush; + phi3b.FourierTransformForward(false); Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b)); Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b)); Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::add_to(phi3b)); Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::subtract_twice_from(phi3b)); Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::subtract_twice_from(phi3b)); Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::subtract_twice_from(phi3b)); - phi3b.apply_InverseLaplacian(); - phi3b *= 0.5; // factor 1/2 from definition of phi(3b)! + phi3b.apply_InverseLaplacian(); + phi3b *= 0.5; // factor 1/2 from definition of phi(3b)! csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; - //... transversal term ... - wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush; + //... transversal term ... + wtime = get_wtime(); + csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush; for (int idim = 0; idim < 3; ++idim) { - // cyclic rotations of indices + // cyclic rotations of indices int idimp = (idim + 1) % 3, idimpp = (idim + 2) % 3; - A3[idim]->FourierTransformForward(false); + A3[idim]->FourierTransformForward(false); Conv.convolve_Hessians(phi2, {idim, idimp}, phi, {idim, idimpp}, op::assign_to(*A3[idim])); Conv.convolve_Hessians(phi2, {idim, idimpp}, phi, {idim, idimp}, op::subtract_from(*A3[idim])); Conv.convolve_DifferenceOfHessians(phi, {idimp, idimpp}, phi2, {idimp, idimp}, {idimpp, idimpp}, op::add_to(*A3[idim])); Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim])); - A3[idim]->apply_InverseLaplacian(); - } - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + A3[idim]->apply_InverseLaplacian(); } + csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + } - // if( bSymplecticPT ){ - // //... transversal term ... - // wtime = get_wtime(); - // csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush; - // for( int idim=0; idim<3; ++idim ){ - // // cyclic rotations of indices - // A3[idim]->FourierTransformForward(false); - // Conv.convolve_Gradient_and_Hessian( phi, {0}, phi2, {idim,0}, assign_to(*A3[idim]) ); - // Conv.convolve_Gradient_and_Hessian( phi, {1}, phi2, {idim,1}, add_to(*A3[idim]) ); - // Conv.convolve_Gradient_and_Hessian( phi, {2}, phi2, {idim,2}, add_to(*A3[idim]) ); - // } - // csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + // if( bSymplecticPT ){ + // //... transversal term ... + // wtime = get_wtime(); + // csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush; + // for( int idim=0; idim<3; ++idim ){ + // // cyclic rotations of indices + // A3[idim]->FourierTransformForward(false); + // Conv.convolve_Gradient_and_Hessian( phi, {0}, phi2, {idim,0}, assign_to(*A3[idim]) ); + // Conv.convolve_Gradient_and_Hessian( phi, {1}, phi2, {idim,1}, add_to(*A3[idim]) ); + // Conv.convolve_Gradient_and_Hessian( phi, {2}, phi2, {idim,2}, add_to(*A3[idim]) ); + // } + // csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; - // } + // } - ///... scale all potentials with respective growth factors - phi *= g1; - phi2 *= g2; - phi3a *= g3a; - phi3b *= g3b; - (*A3[0]) *= g3c; - (*A3[1]) *= g3c; - (*A3[2]) *= g3c; + ///... scale all potentials with respective growth factors + phi *= g1; + phi2 *= g2; + phi3a *= g3a; + phi3b *= g3b; + (*A3[0]) *= g3c; + (*A3[1]) *= g3c; + (*A3[2]) *= g3c; - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - /////////////////////////////////////////////////////////////////////// - // we store the densities here if we compute them - //====================================================================== + /////////////////////////////////////////////////////////////////////// + // we store the densities here if we compute them + //====================================================================== - // Testing - const std::string testing = the_config.GetValueSafe("testing", "test", "none"); + // Testing + const std::string testing = the_config.GetValueSafe("testing", "test", "none"); if (testing != "none") { - csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl; + csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl; if (testing == "potentials_and_densities"){ - testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3); + testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3); } else if (testing == "velocity_displacement_symmetries"){ - testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); + testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); } else if (testing == "convergence"){ - testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); + testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); } else{ - csoca::flog << "unknown test '" << testing << "'" << std::endl; - std::abort(); - } + csoca::flog << "unknown test '" << testing << "'" << std::endl; + std::abort(); + } } for( auto& this_species : species_list ) @@ -492,9 +494,20 @@ int Run( ConfigFile& the_config ) // divide by Lbox, because displacement is in box units for output plugin tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) ); + + if( bDoBaryons ){ + vec3 kvec = phi.get_k(i,j,k); + real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); + double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) : + (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : + the_cosmo_calc->GetAmplitude(kmod, total)*g1) - the_cosmo_calc->GetAmplitude(kmod, total)*g1; + + tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen; + } } } } + tmp.zero_DC_mode(); tmp.FourierTransformBackward(); // if we write particle data, store particle data in particle structure @@ -530,9 +543,19 @@ int Run( ConfigFile& the_config ) tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) ); + if( bDoBaryons ){ + vec3 kvec = phi.get_k(i,j,k); + real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); + double ampldiff = ((this_species == cosmo_species::dm)? -the_cosmo_calc->GetAmplitude(kmod, vcdm0) : + (this_species == cosmo_species::baryon)? -the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : + the_cosmo_calc->GetAmplitude(kmod, total)*g1) - the_cosmo_calc->GetAmplitude(kmod, total)*g1; + tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ; + } + // correct velocity with PLT mode growth rate tmp.kelem(idx) *= lg.vfac_corr(tmp.get_k3(i,j,k)); + if( bAddExternalTides ){ // modify velocities with anisotropic expansion factor**2 tmp.kelem(idx) *= std::pow(lss_aniso_alpha[idim],2.0); @@ -544,6 +567,7 @@ int Run( ConfigFile& the_config ) } } } + tmp.zero_DC_mode(); tmp.FourierTransformBackward(); // if we write particle data, store particle data in particle structure From 91cc71c038e16f86a5841aa025053c01332243cb Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 25 Jan 2020 23:31:50 +0100 Subject: [PATCH 059/130] removed sanitizer options from cmake --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f10eb0a..a4eab8b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,8 +5,8 @@ project(monofonIC) # include class submodule include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic -g -fno-omit-frame-pointer") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic") find_package(PkgConfig REQUIRED) set(CMAKE_MODULE_PATH From 401fec0ebd4327165edfcc28cd2a127e66b865a0 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 26 Jan 2020 16:45:26 +0100 Subject: [PATCH 060/130] added place holder massive neutrino options for class --- src/plugins/transfer_CLASS.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 9d415a3..d660d25 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -65,6 +65,18 @@ private: pars.add("Omega_fld",0.0); pars.add("Omega_scf",0.0); + // massive neutrinos +#if 1 + //default off + pars.add("N_ncdm",0); +#else + // change above to enable + pars.add("N_ur",0); + pars.add("N_ncdm",1); + pars.add("m_ncdm","0.4"); + pars.add("T_ncdm",0.71611); +#endif + pars.add("A_s",2.42e-9); pars.add("n_s",.961); // this doesn't matter for TF pars.add("output","dTk,vTk"); From 2b6605861965c1d86153e9ce9b82637e9bd9ccb5 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 26 Jan 2020 21:42:07 +0100 Subject: [PATCH 061/130] fixed dual lattice two-fluid ICs --- include/particle_generator.hh | 2 +- src/ic_generator.cc | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/particle_generator.hh b/include/particle_generator.hh index efac3dd..801c919 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -31,7 +31,7 @@ const std::vector> second_lattice_shift = { /* SC : */ {0.5, 0.5, 0.5}, /* BCC: */ {0.5, 0.5, 0.0}, - /* FCC: */ {0.5, 0.0, 0.0}, + /* FCC: */ {0.25, 0.25, 0.25}, /* RSC: */ {0.25, 0.25, 0.25}, }; diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 9641112..ec71944 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -500,7 +500,8 @@ int Run( ConfigFile& the_config ) real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) : (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : - the_cosmo_calc->GetAmplitude(kmod, total)*g1) - the_cosmo_calc->GetAmplitude(kmod, total)*g1; + // the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0); + the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1); tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen; } @@ -546,9 +547,10 @@ int Run( ConfigFile& the_config ) if( bDoBaryons ){ vec3 kvec = phi.get_k(i,j,k); real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); - double ampldiff = ((this_species == cosmo_species::dm)? -the_cosmo_calc->GetAmplitude(kmod, vcdm0) : - (this_species == cosmo_species::baryon)? -the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : - the_cosmo_calc->GetAmplitude(kmod, total)*g1) - the_cosmo_calc->GetAmplitude(kmod, total)*g1; + double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, vcdm0) : + (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : + // the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0); + the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1); tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ; } From 816a52d4da1a20167877f4193e5468f8eb1abe78 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 24 Feb 2020 18:10:09 +0100 Subject: [PATCH 062/130] added reading routine from HDF5 --- include/grid_fft.hh | 2 + src/grid_fft.cc | 130 ++++++++++++++++++++++++++++++++++++++++++-- src/ic_generator.cc | 18 +++++- 3 files changed, 145 insertions(+), 5 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index dcb3cb4..edbcc69 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -649,6 +649,8 @@ public: void Write_to_HDF5(std::string fname, std::string datasetname) const; + void Read_from_HDF5( std::string fname, std::string datasetname ); + void Write_PowerSpectrum(std::string ofname); void Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count); diff --git a/src/grid_fft.cc b/src/grid_fft.cc index d5f103a..54f8aac 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -200,7 +200,8 @@ void Grid_FFT::FourierTransformForward(bool do_transform) { double wtime = get_wtime(); csoca::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]); - FFTW_API(execute)(plan_); + FFTW_API(execute) + (plan_); this->ApplyNorm(); wtime = get_wtime() - wtime; @@ -232,7 +233,8 @@ void Grid_FFT::FourierTransformBackward(bool do_transform) csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]); double wtime = get_wtime(); - FFTW_API(execute)(iplan_); + FFTW_API(execute) + (iplan_); this->ApplyNorm(); wtime = get_wtime() - wtime; @@ -269,6 +271,126 @@ void create_hdf5(std::string Filename) H5Fclose(HDF_FileID); } +template +hid_t hdf5_get_data_type(void) +{ + if (typeid(T) == typeid(int)) + return H5T_NATIVE_INT; + + if (typeid(T) == typeid(unsigned)) + return H5T_NATIVE_UINT; + + if (typeid(T) == typeid(float)) + return H5T_NATIVE_FLOAT; + + if (typeid(T) == typeid(double)) + return H5T_NATIVE_DOUBLE; + + if (typeid(T) == typeid(long long)) + return H5T_NATIVE_LLONG; + + if (typeid(T) == typeid(unsigned long long)) + return H5T_NATIVE_ULLONG; + + if (typeid(T) == typeid(size_t)) + return H5T_NATIVE_ULLONG; + + std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n"; + return -1; +} + +template +void Grid_FFT::Read_from_HDF5(const std::string Filename, const std::string ObjName) +{ + hid_t HDF_Type = hdf5_get_data_type(); + + hid_t HDF_FileID = H5Fopen(Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + + //... save old error handler + herr_t (*old_func)(void *); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + hid_t HDF_DatasetID = H5Dopen(HDF_FileID, ObjName.c_str()); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if (HDF_DatasetID < 0) + { + csoca::wlog << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + H5Fclose(HDF_FileID); + abort(); + } + + //... get space associated with dataset and its extensions + hid_t HDF_DataspaceID = H5Dget_space(HDF_DatasetID); + + int ndims = H5Sget_simple_extent_ndims(HDF_DataspaceID); + + hsize_t dimsize[3]; + + H5Sget_simple_extent_dims(HDF_DataspaceID, dimsize, NULL); + + hsize_t HDF_StorageSize = 1; + for (int i = 0; i < ndims; ++i) + HDF_StorageSize *= dimsize[i]; + + //... adjust the array size to hold the data + std::vector Data; + Data.reserve(HDF_StorageSize); + Data.assign(HDF_StorageSize, (data_t)0); + + if (Data.capacity() < HDF_StorageSize) + { + csoca::elog << "Not enough memory to store all data in HDFReadDataset!\n"; + abort(); + } + + //... read the dataset + H5Dread(HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, &Data[0]); + + if (Data.size() != HDF_StorageSize) + { + csoca::elog << "Something went wrong while reading!\n"; + abort(); + } + + H5Sclose(HDF_DataspaceID); + H5Dclose(HDF_DatasetID); + H5Fclose(HDF_FileID); + + assert( dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2] ); + csoca::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl; + + for( size_t i=0; i<3; ++i ) this->n_[i] = dimsize[i]; + + if (data_ != nullptr) + { + fftw_free(data_); + } + this->Setup(); + + + //... copy data to internal array ... + for (size_t i = 0; i < size(0); ++i) + { + for (size_t j = 0; j < size(1); ++j) + { + for (size_t k = 0; k < size(2); ++k) + { + this->relem(i,j,k) = Data[ (i*size(1) + j)*size(2)+k ]; + } + } + } +} + template void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) const { @@ -551,7 +673,7 @@ void Grid_FFT::Write_PowerSpectrum(std::string ofname) std::vector bin_k, bin_P, bin_eP; std::vector bin_count; int nbins = 4 * std::max(nhalf_[0], std::max(nhalf_[1], nhalf_[2])); - this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count ); + this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count); #if defined(USE_MPI) if (CONFIG::MPI_task_rank == 0) { @@ -577,7 +699,7 @@ void Grid_FFT::Write_PowerSpectrum(std::string ofname) } template -void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count ) +void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count) { this->FourierTransformForward(); diff --git a/src/ic_generator.cc b/src/ic_generator.cc index d4d160c..8bf1674 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -68,6 +68,10 @@ int Run( ConfigFile& the_config ) //! do baryon ICs? const bool bDoBaryons = the_config.GetValueSafe("setup", "DoBaryons", false ); + //-------------------------------------------------------------------------------------------------------- + //! do constrained ICs? + const bool bAddConstrainedModes = the_config.ContainsKey("setup", "ConstraintField" ); + //-------------------------------------------------------------------------------------------------------- //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274] bool bAddExternalTides = the_config.ContainsKey("cosmology", "LSS_aniso_lx") @@ -184,9 +188,21 @@ int Run( ConfigFile& the_config ) // Fill the grid with a Gaussian white noise field //-------------------------------------------------------------------- the_random_number_generator->Fill_Grid( phi ); - phi.FourierTransformForward(); + //-------------------------------------------------------------------- + // with the unconstrained noise in Fourier space, add constrained + // modes for low k + //-------------------------------------------------------------------- + if( bAddConstrainedModes ){ + auto cfield_fname = the_config.GetValue("setup", "ConstraintField" ); + + } + + + //-------------------------------------------------------------------- + // Apply power spectrum + //-------------------------------------------------------------------- phi.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { real_t kmod = k.norm(); if( bDoFixing ) x = (std::abs(x)!=0.0)? x / std::abs(x) : x; From 8c9d2acf7a8dc72cde0fb832c42179308871ed8a Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 24 Feb 2020 18:23:51 +0100 Subject: [PATCH 063/130] prepared adding in external large-scale modes --- src/grid_fft.cc | 17 ++++++++++++++--- src/ic_generator.cc | 12 ++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/grid_fft.cc b/src/grid_fft.cc index 07a704d..e1af60f 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -279,6 +279,11 @@ hid_t hdf5_get_data_type(void) template void Grid_FFT::Read_from_HDF5(const std::string Filename, const std::string ObjName) { + if( bdistributed ){ + csoca::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl; + abort(); + } + hid_t HDF_Type = hdf5_get_data_type(); hid_t HDF_FileID = H5Fopen(Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); @@ -301,7 +306,7 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, c //... dataset did not exist or was empty if (HDF_DatasetID < 0) { - csoca::wlog << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + csoca::elog << "Dataset \'" << ObjName.c_str() << "\' does not exist or is empty." << std::endl; H5Fclose(HDF_FileID); abort(); } @@ -326,7 +331,10 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, c if (Data.capacity() < HDF_StorageSize) { - csoca::elog << "Not enough memory to store all data in HDFReadDataset!\n"; + csoca::elog << "Not enough memory to store all data in HDFReadDataset!" << std::endl; + H5Sclose(HDF_DataspaceID); + H5Dclose(HDF_DatasetID); + H5Fclose(HDF_FileID); abort(); } @@ -335,7 +343,10 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, c if (Data.size() != HDF_StorageSize) { - csoca::elog << "Something went wrong while reading!\n"; + csoca::elog << "Something went wrong while reading!" << std::endl; + H5Sclose(HDF_DataspaceID); + H5Dclose(HDF_DatasetID); + H5Fclose(HDF_FileID); abort(); } diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 377b644..79b8ace 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -193,6 +193,18 @@ int Run( ConfigFile& the_config ) }); + //-------------------------------------------------------------------- + // Use externally specified large scale modes from constraints in case + //-------------------------------------------------------------------- + if( bAddConstrainedModes ){ + Grid_FFT cwnoise({8,8,8}, {boxlen,boxlen,boxlen}); + cwnoise.Read_from_HDF5( the_config.GetValue("setup", "ConstraintField"), "wnoise" ); + cwnoise.FourierTransformForward(); + + // TODO: copy over modes + } + + //-------------------------------------------------------------------- // Compute the LPT terms.... //-------------------------------------------------------------------- From 64e13026fb1148a7ac072057d7b255a5ed011ac0 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 24 Feb 2020 21:26:36 +0100 Subject: [PATCH 064/130] can read hdf5 noise files --- example.conf | 9 ++++++--- src/grid_fft.cc | 10 +++++++++- src/ic_generator.cc | 6 ++++-- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/example.conf b/example.conf index 718e145..12d6a5f 100644 --- a/example.conf +++ b/example.conf @@ -8,11 +8,14 @@ zstart = 129.0 # order of the LPT to be used (1,2 or 3) LPTorder = 3 # also do baryon ICs? -DoBaryons = yes +DoBaryons = no # do mode fixing à la Angulo&Pontzen DoFixing = no # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!) -ParticleLoad = bcc +ParticleLoad = s +# Add a possible constraint field here: +ConstraintFieldFile = initial_conditions.h5 +ConstraintFieldName = ic [cosmology] transfer = CLASS @@ -42,7 +45,7 @@ seed = 9001 test = none [execution] -NumThreads = 16 +NumThreads = 8 [output] fname_hdf5 = output_sch.hdf5 diff --git a/src/grid_fft.cc b/src/grid_fft.cc index e1af60f..3a61608 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -358,6 +358,7 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, c csoca::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl; for( size_t i=0; i<3; ++i ) this->n_[i] = dimsize[i]; + this->space_ = rspace_id; if (data_ != nullptr) { @@ -367,6 +368,8 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, c //... copy data to internal array ... + double sum1{0.0}, sum2{0.0}; + #pragma omp parallel for reduction(+:sum1,sum2) for (size_t i = 0; i < size(0); ++i) { for (size_t j = 0; j < size(1); ++j) @@ -374,9 +377,14 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, c for (size_t k = 0; k < size(2); ++k) { this->relem(i,j,k) = Data[ (i*size(1) + j)*size(2)+k ]; + sum2 += std::real(this->relem(i,j,k)*this->relem(i,j,k)); + sum1 += std::real(this->relem(i,j,k)); } } - } + } + sum1 /= Data.size(); + sum2 /= Data.size(); + csoca::ilog << "Constraint field has =" << sum1 << ", -^2=" << std::sqrt(sum2-sum1*sum1) << std::endl; } template diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 79b8ace..dd4e6fa 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -85,7 +85,7 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------------------------------------------- //! do constrained ICs? - const bool bAddConstrainedModes = the_config.ContainsKey("setup", "ConstraintField" ); + const bool bAddConstrainedModes = the_config.ContainsKey("setup", "ConstraintFieldFile" ); //-------------------------------------------------------------------------------------------------------- //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274] @@ -198,10 +198,12 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- if( bAddConstrainedModes ){ Grid_FFT cwnoise({8,8,8}, {boxlen,boxlen,boxlen}); - cwnoise.Read_from_HDF5( the_config.GetValue("setup", "ConstraintField"), "wnoise" ); + cwnoise.Read_from_HDF5( the_config.GetValue("setup", "ConstraintFieldFile"), + the_config.GetValue("setup", "ConstraintFieldName") ); cwnoise.FourierTransformForward(); // TODO: copy over modes + } From 10682e632e1960b6df9764fdccacb2a0e3decd9e Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 24 Feb 2020 21:27:27 +0100 Subject: [PATCH 065/130] class submodule update --- external/class | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/class b/external/class index 6f3abba..58e0adb 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507 +Subproject commit 58e0adbb2cf845cd0766a26cecc1a153fa17d8b9 From ccd813a2ad2d434960577c7b2c347a8f62ff2ad3 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Tue, 25 Feb 2020 18:36:46 +0100 Subject: [PATCH 066/130] added constraint mode copying. needs testing --- example.conf | 4 +-- src/ic_generator.cc | 65 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/example.conf b/example.conf index 12d6a5f..4bd7af4 100644 --- a/example.conf +++ b/example.conf @@ -6,13 +6,13 @@ BoxLength = 250 # starting redshift zstart = 129.0 # order of the LPT to be used (1,2 or 3) -LPTorder = 3 +LPTorder = 1 # also do baryon ICs? DoBaryons = no # do mode fixing à la Angulo&Pontzen DoFixing = no # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!) -ParticleLoad = s +ParticleLoad = sc # Add a possible constraint field here: ConstraintFieldFile = initial_conditions.h5 ConstraintFieldName = ic diff --git a/src/ic_generator.cc b/src/ic_generator.cc index dd4e6fa..47f7e75 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -186,12 +186,6 @@ int Run( ConfigFile& the_config ) the_random_number_generator->Fill_Grid(wnoise); wnoise.FourierTransformForward(); - wnoise.apply_function_k( [&](auto wn){ - if (bDoFixing) - wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn; - return wn / volfac; - }); - //-------------------------------------------------------------------- // Use externally specified large scale modes from constraints in case @@ -202,10 +196,69 @@ int Run( ConfigFile& the_config ) the_config.GetValue("setup", "ConstraintFieldName") ); cwnoise.FourierTransformForward(); + size_t ngrid_c = cwnoise.size(0), ngrid_c_2 = ngrid_c/2; + // TODO: copy over modes + double rs1{0.0},rs2{0.0},is1{0.0},is2{0.0}; + double nrs1{0.0},nrs2{0.0},nis1{0.0},nis2{0.0}; + double renormfac = std::pow(real_t(ngrid)/real_t(ngrid_c),1.5); + size_t count{0}; + + csoca::ilog << "renormfac = " << renormfac << " " << ngrid << " " << ngrid_c << std::endl; + + #pragma omp parallel for reduction(+:rs1,rs2,is1,is2,nrs1,nrs2,nis1,nis2,count) + for( size_t i=0; icwnoise.nhalf_[0] ) il = ngrid-ngrid_c_2+i; + if( il == size_t(-1) ) continue; + if( il=size_t(wnoise.local_1_start_+wnoise.local_1_size_)) continue; + il -= wnoise.local_1_start_; + for( size_t j=0; jcwnoise.nhalf_[1] ) jl = ngrid-ngrid_c_2+j; + for( size_t k=0; kcwnoise.nhalf_[2] ) kl = ngrid-ngrid_c_2+k; + if( kl == size_t(-1) ) continue; + + ++count; + + nrs1 += std::real(cwnoise.kelem(i,j,k) * renormfac); + nrs2 += std::real(cwnoise.kelem(i,j,k))*std::real(cwnoise.kelem(i,j,k)) * renormfac * renormfac; + nis1 += std::imag(cwnoise.kelem(i,j,k) * renormfac); + nis2 += std::imag(cwnoise.kelem(i,j,k))*std::imag(cwnoise.kelem(i,j,k)) * renormfac * renormfac; + + rs1 += std::real(wnoise.kelem(il,jl,kl)); + rs2 += std::real(wnoise.kelem(il,jl,kl))*std::real(wnoise.kelem(il,jl,kl)); + is1 += std::imag(wnoise.kelem(il,jl,kl)); + is2 += std::imag(wnoise.kelem(il,jl,kl))*std::imag(wnoise.kelem(il,jl,kl)); + + wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k) * renormfac; + } + } + } + + csoca::ilog << "old field: real part: =" << rs1/count << " -^2=" << rs2/count-rs1*rs1/count/count << std::endl; + csoca::ilog << "old field: imag part: =" << is1/count << " -^2=" << is2/count-is1*is1/count/count << std::endl; + csoca::ilog << "new field: real part: =" << nrs1/count << " -^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl; + csoca::ilog << "new field: imag part: =" << nis1/count << " -^2=" << nis2/count-nis1*nis1/count/count << std::endl; + } + //-------------------------------------------------------------------- + // Apply Normalisation factor and Angulo&Pontzen fixing or not + //-------------------------------------------------------------------- + + wnoise.apply_function_k( [&](auto wn){ + if (bDoFixing) + wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn; + return wn / volfac; + }); + //-------------------------------------------------------------------- // Compute the LPT terms.... From 2fe35aa2b438ebc291b2bc4600d1409a543590ed Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Tue, 25 Feb 2020 18:36:54 +0100 Subject: [PATCH 067/130] fixes --- src/ic_generator.cc | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 47f7e75..17530eb 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -201,50 +201,45 @@ int Run( ConfigFile& the_config ) // TODO: copy over modes double rs1{0.0},rs2{0.0},is1{0.0},is2{0.0}; double nrs1{0.0},nrs2{0.0},nis1{0.0},nis2{0.0}; - double renormfac = std::pow(real_t(ngrid)/real_t(ngrid_c),1.5); size_t count{0}; - csoca::ilog << "renormfac = " << renormfac << " " << ngrid << " " << ngrid_c << std::endl; - #pragma omp parallel for reduction(+:rs1,rs2,is1,is2,nrs1,nrs2,nis1,nis2,count) for( size_t i=0; icwnoise.nhalf_[0] ) il = ngrid-ngrid_c_2+i; + if( ingrid_c_2 && i+ngrid-ngrid_c_2>ngrid/2) il = ngrid-ngrid_c_2+i; if( il == size_t(-1) ) continue; if( il=size_t(wnoise.local_1_start_+wnoise.local_1_size_)) continue; il -= wnoise.local_1_start_; for( size_t j=0; jcwnoise.nhalf_[1] ) jl = ngrid-ngrid_c_2+j; + if( jngrid_c_2 && j+ngrid-ngrid_c_2>ngrid/2 ) jl = ngrid-ngrid_c_2+j; + if( jl == size_t(-1) ) continue; for( size_t k=0; kcwnoise.nhalf_[2] ) kl = ngrid-ngrid_c_2+k; - if( kl == size_t(-1) ) continue; - + size_t kl = k; + ++count; - nrs1 += std::real(cwnoise.kelem(i,j,k) * renormfac); - nrs2 += std::real(cwnoise.kelem(i,j,k))*std::real(cwnoise.kelem(i,j,k)) * renormfac * renormfac; - nis1 += std::imag(cwnoise.kelem(i,j,k) * renormfac); - nis2 += std::imag(cwnoise.kelem(i,j,k))*std::imag(cwnoise.kelem(i,j,k)) * renormfac * renormfac; + nrs1 += std::real(cwnoise.kelem(i,j,k)); + nrs2 += std::real(cwnoise.kelem(i,j,k))*std::real(cwnoise.kelem(i,j,k)); + nis1 += std::imag(cwnoise.kelem(i,j,k)); + nis2 += std::imag(cwnoise.kelem(i,j,k))*std::imag(cwnoise.kelem(i,j,k)); rs1 += std::real(wnoise.kelem(il,jl,kl)); rs2 += std::real(wnoise.kelem(il,jl,kl))*std::real(wnoise.kelem(il,jl,kl)); is1 += std::imag(wnoise.kelem(il,jl,kl)); is2 += std::imag(wnoise.kelem(il,jl,kl))*std::imag(wnoise.kelem(il,jl,kl)); - wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k) * renormfac; + wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k); } } } - csoca::ilog << "old field: real part: =" << rs1/count << " -^2=" << rs2/count-rs1*rs1/count/count << std::endl; - csoca::ilog << "old field: imag part: =" << is1/count << " -^2=" << is2/count-is1*is1/count/count << std::endl; - csoca::ilog << "new field: real part: =" << nrs1/count << " -^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl; - csoca::ilog << "new field: imag part: =" << nis1/count << " -^2=" << nis2/count-nis1*nis1/count/count << std::endl; + csoca::ilog << " ... old field: re =" << rs1/count << " -^2=" << rs2/count-rs1*rs1/count/count << std::endl; + csoca::ilog << " ... old field: im =" << is1/count << " -^2=" << is2/count-is1*is1/count/count << std::endl; + csoca::ilog << " ... new field: re =" << nrs1/count << " -^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl; + csoca::ilog << " ... new field: im =" << nis1/count << " -^2=" << nis2/count-nis1*nis1/count/count << std::endl; } From ed9f30235c391d644dc19dcde62d4cc50f30e374 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Wed, 26 Feb 2020 06:03:19 +0100 Subject: [PATCH 068/130] fixes to constraint inclusion --- src/ic_generator.cc | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 17530eb..e1d72cb 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -184,7 +184,7 @@ int Run( ConfigFile& the_config ) csoca::ilog << "Generating white noise field...." << std::endl; the_random_number_generator->Fill_Grid(wnoise); - + wnoise.FourierTransformForward(); //-------------------------------------------------------------------- @@ -206,17 +206,18 @@ int Run( ConfigFile& the_config ) #pragma omp parallel for reduction(+:rs1,rs2,is1,is2,nrs1,nrs2,nis1,nis2,count) for( size_t i=0; ingrid_c_2 && i+ngrid-ngrid_c_2>ngrid/2) il = ngrid-ngrid_c_2+i; + if( ingrid_c_2 && i+ngrid-ngrid_c>ngrid/2) il = ngrid-ngrid_c+i; if( il == size_t(-1) ) continue; if( il=size_t(wnoise.local_1_start_+wnoise.local_1_size_)) continue; il -= wnoise.local_1_start_; for( size_t j=0; jngrid_c_2 && j+ngrid-ngrid_c_2>ngrid/2 ) jl = ngrid-ngrid_c_2+j; + if( jngrid_c_2 && j+ngrid-ngrid_c>ngrid/2 ) jl = ngrid-ngrid_c+j; if( jl == size_t(-1) ) continue; for( size_t k=0; kngrid/2 ) continue; size_t kl = k; ++count; @@ -231,17 +232,20 @@ int Run( ConfigFile& the_config ) is1 += std::imag(wnoise.kelem(il,jl,kl)); is2 += std::imag(wnoise.kelem(il,jl,kl))*std::imag(wnoise.kelem(il,jl,kl)); + #if defined(USE_MPI) + wnoise.kelem(il,jl,kl) = cwnoise.kelem(j,i,k); + #else wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k); + #endif } } } - csoca::ilog << " ... old field: re =" << rs1/count << " -^2=" << rs2/count-rs1*rs1/count/count << std::endl; - csoca::ilog << " ... old field: im =" << is1/count << " -^2=" << is2/count-is1*is1/count/count << std::endl; - csoca::ilog << " ... new field: re =" << nrs1/count << " -^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl; - csoca::ilog << " ... new field: im =" << nis1/count << " -^2=" << nis2/count-nis1*nis1/count/count << std::endl; - - + // csoca::ilog << " ... old field: re =" << rs1/count << " -^2=" << rs2/count-rs1*rs1/count/count << std::endl; + // csoca::ilog << " ... old field: im =" << is1/count << " -^2=" << is2/count-is1*is1/count/count << std::endl; + // csoca::ilog << " ... new field: re =" << nrs1/count << " -^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl; + // csoca::ilog << " ... new field: im =" << nis1/count << " -^2=" << nis2/count-nis1*nis1/count/count << std::endl; + csoca::ilog << "White noise field large-scale modes overwritten with external field." << std::endl; } //-------------------------------------------------------------------- From f2ba17cfcd7006992fdcbe57928bf987b82ed14c Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 27 Feb 2020 22:56:40 +0100 Subject: [PATCH 069/130] added enforced normalisation of read white noise --- example.conf | 8 ++++---- src/grid_fft.cc | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/example.conf b/example.conf index 4bd7af4..33e227a 100644 --- a/example.conf +++ b/example.conf @@ -4,9 +4,9 @@ GridRes = 128 # length of the box in Mpc/h BoxLength = 250 # starting redshift -zstart = 129.0 +zstart = 49.0 # order of the LPT to be used (1,2 or 3) -LPTorder = 1 +LPTorder = 3 # also do baryon ICs? DoBaryons = no # do mode fixing à la Angulo&Pontzen @@ -15,11 +15,11 @@ DoFixing = no ParticleLoad = sc # Add a possible constraint field here: ConstraintFieldFile = initial_conditions.h5 -ConstraintFieldName = ic +ConstraintFieldName = ic_white_noise [cosmology] transfer = CLASS -ztarget = 0.0 +ztarget = 2.5 # transfer = eisenstein # transfer = file_CAMB # transfer_file = wmap5_transfer_out_z0.dat diff --git a/src/grid_fft.cc b/src/grid_fft.cc index 3a61608..5ae6b24 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -384,7 +384,20 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, c } sum1 /= Data.size(); sum2 /= Data.size(); - csoca::ilog << "Constraint field has =" << sum1 << ", -^2=" << std::sqrt(sum2-sum1*sum1) << std::endl; + auto stdw = std::sqrt(sum2-sum1*sum1); + csoca::ilog << "Constraint field has =" << sum1 << ", -^2=" << stdw << std::endl; + + #pragma omp parallel for reduction(+:sum1,sum2) + for (size_t i = 0; i < size(0); ++i) + { + for (size_t j = 0; j < size(1); ++j) + { + for (size_t k = 0; k < size(2); ++k) + { + this->relem(i,j,k) /= stdw; + } + } + } } template From 2dfab2b2670225d99b0a8dde65a622b07929fcdc Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 28 Feb 2020 16:15:37 +0100 Subject: [PATCH 070/130] working commit --- example.conf | 21 +- external/class | 2 +- include/general.hh | 1 - include/grid_fft.hh | 17 ++ include/particle_generator.hh | 7 +- include/particle_plt.hh | 338 ++++++++---------------------- src/ic_generator.cc | 12 +- src/main.cc | 2 +- src/plugins/output_gadget_hdf5.cc | 2 +- 9 files changed, 133 insertions(+), 269 deletions(-) diff --git a/example.conf b/example.conf index 718e145..c66a520 100644 --- a/example.conf +++ b/example.conf @@ -2,17 +2,18 @@ # number of grid cells per linear dimension for calculations = particles for sc initial load GridRes = 128 # length of the box in Mpc/h -BoxLength = 250 +BoxLength = 125 # starting redshift -zstart = 129.0 +zstart = 49.0 +#zstart = 19.0 # order of the LPT to be used (1,2 or 3) LPTorder = 3 # also do baryon ICs? -DoBaryons = yes +DoBaryons = no # do mode fixing à la Angulo&Pontzen -DoFixing = no +DoFixing = yes # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!) -ParticleLoad = bcc +ParticleLoad = sc [cosmology] transfer = CLASS @@ -42,18 +43,18 @@ seed = 9001 test = none [execution] -NumThreads = 16 +NumThreads = 1 [output] fname_hdf5 = output_sch.hdf5 fbase_analysis = output -#format = gadget2 -#filename = ics_gadget.dat +format = gadget2 +filename = ics_gadget.dat #UseLongids = false # -format = gadget_hdf5 -filename = ics_gadget.hdf5 +#format = gadget_hdf5 +#filename = ics_gadget.hdf5 #format = generic diff --git a/external/class b/external/class index 6f3abba..58e0adb 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit 6f3abbab2608712029d740d6c69aad0ba853e507 +Subproject commit 58e0adbb2cf845cd0766a26cecc1a153fa17d8b9 diff --git a/include/general.hh b/include/general.hh index c77be01..b7f7df3 100644 --- a/include/general.hh +++ b/include/general.hh @@ -126,7 +126,6 @@ inline void multitask_sync_barrier( void ) } - namespace CONFIG { extern int MPI_thread_support; diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 66c1a6f..e98d6a7 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -242,6 +242,23 @@ public: return kk; } + template + vec3 get_k(const real_t i, const real_t j, const real_t k) const + { + vec3 kk; + if( bdistributed ){ + auto ip = i + real_t(local_1_start_); + kk[0] = (j - real_t(j > real_t(nhalf_[0])) * n_[0]) * kfac_[0]; + kk[1] = (ip - real_t(ip > real_t(nhalf_[1])) * n_[1]) * kfac_[1]; + }else{ + kk[0] = (real_t(i) - real_t(i > real_t(nhalf_[0])) * n_[0]) * kfac_[0]; + kk[1] = (real_t(j) - real_t(j > real_t(nhalf_[1])) * n_[1]) * kfac_[1]; + } + kk[2] = (real_t(k) - real_t(k > real_t(nhalf_[2])) * n_[2]) * kfac_[2]; + + return kk; + } + std::array get_k3(const size_t i, const size_t j, const size_t k) const { return bdistributed? std::array({j,i+local_1_start_,k}) : std::array({i,j,k}); diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 801c919..56c69f4 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -29,9 +29,10 @@ const std::vector< std::vector> > lattice_shifts = const std::vector> second_lattice_shift = { - /* SC : */ {0.5, 0.5, 0.5}, - /* BCC: */ {0.5, 0.5, 0.0}, - /* FCC: */ {0.25, 0.25, 0.25}, + /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice + /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!? + /* FCC: */ {0.5, 0.5, 0.5}, // this corresponds to NaCl lattice + // /* FCC: */ {0.25, 0.25, 0.25}, // this corresponds to Zincblende/GaAs lattice /* RSC: */ {0.25, 0.25, 0.25}, }; diff --git a/include/particle_plt.hh b/include/particle_plt.hh index e636dcc..e95308f 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -9,10 +9,18 @@ #include #include +#include + #include #include #include +#include +inline double Hypergeometric2F1( double a, double b, double c, double x ) +{ + return gsl_sf_hyperg_2F1( a, b, c, x); +} + #define PRODUCTION namespace particle{ @@ -20,7 +28,7 @@ namespace particle{ class lattice_gradient{ private: - const real_t boxlen_; + const real_t boxlen_, XmL_, aini_; const size_t ngmapto_, ngrid_, ngrid32_; const real_t mapratio_; Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; @@ -448,7 +456,7 @@ private: vec3 evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))}); evec1 /= evec1.norm(); - if(std::abs(ii)+std::abs(jj)+k<8){ + if(false){//std::abs(ii)+std::abs(jj)+k<8){ // small k modes, use usual pseudospectral derivative // -- store in diagonal components of D_ij D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_); @@ -460,15 +468,40 @@ private: }else{ // large k modes, use interpolated PLT results // -- store in diagonal components of D_ij - D_xx_.kelem(i,j,k) = ccomplex_t(0.0,evec1.x * kmod); - D_yy_.kelem(i,j,k) = ccomplex_t(0.0,evec1.y * kmod); - D_zz_.kelem(i,j,k) = ccomplex_t(0.0,evec1.z * kmod); + // D_xx_.kelem(i,j,k) = ccomplex_t(0.0,evec1.x * kmod); + // D_yy_.kelem(i,j,k) = ccomplex_t(0.0,evec1.y * kmod); + // D_zz_.kelem(i,j,k) = ccomplex_t(0.0,evec1.z * kmod); - // re-normalise to that longitudinal amplitude is exact + // // re-normalise to that longitudinal amplitude is exact + evec1 = kv; auto norm = (kv.norm()/kv.dot(evec1)); - D_xx_.kelem(i,j,k) *= norm; - D_yy_.kelem(i,j,k) *= norm; - D_zz_.kelem(i,j,k) *= norm; + // D_xx_.kelem(i,j,k) *= norm; + // D_yy_.kelem(i,j,k) *= norm; + // D_zz_.kelem(i,j,k) *= norm; + + /////////////////////////////////// + // project onto spherical coordinate vectors + + real_t kr = kv.norm(), kphi = std::atan2(kv.y,kv.x), ktheta = std::acos( kv.z / kr ); + real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); + vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); + + //vec3 e_r( 1.0, 0.0, 0.0 ), e_theta( 0.0, 1.0, 0.0 ), e_phi( 0.0, 0.0, 1.0 ); + + D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_r ) ); + D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_theta ) ); + D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_phi ) ); + + real_t eve1p1 = kmod*norm * evec1.dot( e_r ); + real_t eve1p2 = kmod*norm * evec1.dot( e_theta ); + real_t eve1p3 = kmod*norm * evec1.dot( e_phi ); + + auto rvec = eve1p1 * e_r + eve1p2 * e_theta + eve1p3 * e_phi; + + std::cerr << D_xx_.kelem(i,j,k) << " " << D_yy_.kelem(i,j,k) << " " << D_zz_.kelem(i,j,k) << std::endl; + + //std::cerr << rvec.x << " " << evec1.x * kmod*norm << std::endl; + // spatially dependent correction to vfact = \dot{D_+}/D_+ D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); @@ -500,244 +533,14 @@ private: #endif } - void init_D__old() - { - constexpr real_t pi = M_PI, twopi = 2.0*M_PI; - - const std::vector> normals_bcc{ - {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, - {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, - {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} - }; - - const std::vector> bcc_reciprocal{ - {twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi} - }; - - const real_t eta = 2.0/ngrid_; // Ewald cutoff shall be 2 cells - const real_t alpha = 1.0/std::sqrt(2)/eta; - const real_t alpha2 = alpha*alpha; - const real_t alpha3 = alpha2*alpha; - const real_t sqrtpi = std::sqrt(M_PI); - const real_t pi32 = std::pow(M_PI,1.5); - - //! just a Kronecker \delta_ij - auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; - - //! short range component of Ewald sum, eq. (A2) of Marcos (2008) - auto greensftide_sr = [&]( int mu, int nu, const vec3& vR, const vec3& vP ) -> real_t { - auto d = vR-vP; - auto r = d.norm(); - if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later! - real_t val{0.0}; - val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r); - val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) * - (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r); - return val; - }; - - //! sums mirrored copies of short-range component of Ewald sum - auto evaluate_D = [&]( int mu, int nu, const vec3& v ) -> real_t{ - real_t sr = 0.0; - constexpr int N = 3; // number of repeated copies ±N per dimension - int count = 0; - for( int i=-N; i<=N; ++i ){ - for( int j=-N; j<=N; ++j ){ - for( int k=-N; k<=N; ++k ){ - if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){ - //sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); - sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} ); - sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} ); - count += 2; - - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16; - // sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16; - } - } - } - } - return sr / count; - }; - - //! fill D_ij array with short range evaluated function - #pragma omp parallel for - for( size_t i=0; i p; - p.x = real_t(i)/ngrid_; - for( size_t j=0; j D; - vec3 eval, evec1, evec2, evec3; - - #pragma omp for - for( size_t i=0; i kv = D_xx_.get_k(i,j,k); - auto& b=bcc_reciprocal; - vec3 kvc = { b[0][0]*kvc[0]+b[1][0]*kvc[1]+b[2][0]*kvc[2], - b[0][1]*kvc[0]+b[1][1]*kvc[1]+b[2][1]*kvc[2], - b[0][2]*kvc[0]+b[1][2]*kvc[1]+b[2][2]*kvc[2] }; - // vec3 kv = {kvc.dot(bcc_reciprocal[0]),kvc.dot(bcc_reciprocal[1]),kvc.dot(bcc_reciprocal[2])}; - const real_t kmod2 = kv.norm_squared(); - - // long range component of Ewald sum - //ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0])); - ccomplex_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2; - phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0 - - - // const int nn = 3; - // size_t nsum = 0; - // ccomplex_t ff = 0.0; - // for( int is=-nn;is<=nn;is++){ - // for( int js=-nn;js<=nn;js++){ - // for( int ks=-nn;ks<=nn;ks++){ - // if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){ - // ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2])))); - // ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2])))); - // ++nsum; - // } - // } - // } - // } - // ff /= nsum; - // ccomplex_t ff = 1.0; - ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])))); - // assemble short-range + long_range of Ewald sum and add DC component to trace - D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0; - D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac); - D_xz_.kelem(i,j,k) = ff*((D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac); - D_yy_.kelem(i,j,k) = ff*((D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac) + 1.0/3.0; - D_yz_.kelem(i,j,k) = ff*((D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac); - D_zz_.kelem(i,j,k) = ff*((D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac) + 1.0/3.0; - - } - } - } - - D_xx_.kelem(0,0,0) = 1.0/3.0; - D_xy_.kelem(0,0,0) = 0.0; - D_xz_.kelem(0,0,0) = 0.0; - D_yy_.kelem(0,0,0) = 1.0/3.0; - D_yz_.kelem(0,0,0) = 0.0; - D_zz_.kelem(0,0,0) = 1.0/3.0; - - #pragma omp for - for( size_t i=0; i kv = D_xx_.get_k(i,j,k); - const real_t kmod = kv.norm()/mapratio_/boxlen_; - - // store in diagonal components of D_ij - D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.x; - D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.y; - D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.z; - - auto norm = (kv.norm()/kv.dot(evec3)); - if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0; - - D_xx_.kelem(i,j,k) *= norm; - D_yy_.kelem(i,j,k) *= norm; - D_zz_.kelem(i,j,k) *= norm; - - // spatially dependent correction to vfact = \dot{D_+}/D_+ - D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*eval[2])-1.)); -#else - - D_xx_.kelem(i,j,k) = eval[2]; - D_yy_.kelem(i,j,k) = eval[1]; - D_zz_.kelem(i,j,k) = eval[0]; - - D_xy_.kelem(i,j,k) = evec3[0]; - D_xz_.kelem(i,j,k) = evec3[1]; - D_yz_.kelem(i,j,k) = evec3[2]; -#endif - } - } - } - } -#ifdef PRODUCTION - D_xy_.kelem(0,0,0) = 1.0; -#endif - - ////////////////////////////////////////// - std::string filename("plt_test.hdf5"); - unlink(filename.c_str()); - #if defined(USE_MPI) - MPI_Barrier(MPI_COMM_WORLD); - #endif - // rho.Write_to_HDF5(filename, "rho"); - D_xx_.Write_to_HDF5(filename, "omega1"); - D_yy_.Write_to_HDF5(filename, "omega2"); - D_zz_.Write_to_HDF5(filename, "omega3"); - D_xy_.Write_to_HDF5(filename, "e1_x"); - D_xz_.Write_to_HDF5(filename, "e1_y"); - D_yz_.Write_to_HDF5(filename, "e1_z"); - - } - public: // real_t boxlen, size_t ngridother explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 ) : boxlen_( the_config.GetValue("setup", "BoxLength") ), ngmapto_( the_config.GetValue("setup", "GridRes") ), + XmL_ ( the_config.GetValue("cosmology", "Omega_L") / the_config.GetValue("cosmology", "Omega_m") ), + aini_ ( 1.0/(1.0+the_config.GetValue("setup", "zstart")) ), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), @@ -775,15 +578,58 @@ public: inline ccomplex_t gradient( const int idim, std::array ijk ) const { real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; - if( idim == 0 ) return D_xx_.get_cic_kspace({ix,iy,iz}); - else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz}); - return D_zz_.get_cic_kspace({ix,iy,iz}); + + // if( idim == 0 ) return D_xx_.get_cic_kspace({ix,iy,iz}); + // else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz}); + // return D_zz_.get_cic_kspace({ix,iy,iz}); + + /////// + // auto kv = D_xx_.get_k( static_cast(ix), static_cast(iy), static_cast(iz) ); + auto kv = D_xx_.get_k( ix, iy, iz ) / mapratio_; + + // project onto spherical coordinate vectors + //real_t kr = kv.norm(), kphi = kr > 0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0; + + // vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); + auto D_r = D_xx_.get_cic_kspace({ix,iy,iz}); + auto D_theta = D_yy_.get_cic_kspace({ix,iy,iz}); + auto D_phi = D_zz_.get_cic_kspace({ix,iy,iz}); + real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0; + real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); + + //real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); + vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); + + vec3 evec3 = D_r.imag() * e_r + D_theta.imag() * e_theta + D_phi.imag() * e_phi; + + assert(!std::isnan(std::imag(D_r * st * cp + D_theta * ct * cp - D_phi * sp))); + assert(!std::isnan(std::imag(D_r * st * sp + D_theta * ct * sp + D_phi * cp))); + assert(!std::isnan(std::imag(D_r * ct - D_theta * st))); + assert(!std::isnan(std::real(D_r * st * cp + D_theta * ct * cp - D_phi * sp))); + assert(!std::isnan(std::real(D_r * st * sp + D_theta * ct * sp + D_phi * cp))); + assert(!std::isnan(std::real(D_r * ct - D_theta * st))); + + // std::cerr << kv.x/boxlen_ << " " << kv.y/boxlen_ << " " << kv.z/boxlen_ << " -- " << D_r * st * cp + D_theta * ct * cp - D_phi * sp << " " << D_r * st * sp + D_theta * ct * sp + D_phi * cp << " " << (D_r * ct - D_theta * st ) << std::endl; + + if( idim == 0 ){ + return ccomplex_t( 0.0, evec3.x );//D_r; //D_r * st * cp + D_theta * ct * cp - D_phi * sp; + } + else if( idim == 1 ){ + return ccomplex_t( 0.0, evec3.y );;//D_theta; //D_r * st * sp + D_theta * ct * sp + D_phi * cp; + } + return ccomplex_t( 0.0, evec3.z );//D_phi; //(D_r * ct - D_theta * st ); } - inline real_t vfac_corr( std::array ijk ) const + inline real_t vfac_corr( std::array ijk ) const { real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; - return std::real(D_xy_.get_cic_kspace({ix,iy,iz})); + const real_t alpha = 1.0/std::real(D_xy_.get_cic_kspace({ix,iy,iz})); + return 1.0/alpha; + // // below is for LCDM: + //! X = \Omega_\Lambda / \Omega_m + // return 1.0 / (alpha - (2*std::pow(aini_,3)*alpha*(2 + alpha)*XmL_*Hypergeometric2F1((3 + alpha)/3.,(5 + alpha)/3., + // (13 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_)))/ + // ((7 + 4*alpha)*Hypergeometric2F1(alpha/3.,(2 + alpha)/3.,(7 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_)))); } }; diff --git a/src/ic_generator.cc b/src/ic_generator.cc index ec71944..66858ba 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -203,8 +203,8 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- // Create PLT gradient operator //-------------------------------------------------------------------- - // particle::lattice_gradient lg( the_config ); - op::fourier_gradient lg( the_config ); + particle::lattice_gradient lg( the_config ); + // op::fourier_gradient lg( the_config ); //-------------------------------------------------------------------- std::vector species_list; @@ -500,8 +500,8 @@ int Run( ConfigFile& the_config ) real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) : (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : - // the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0); - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1); + the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0); + //the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1); tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen; } @@ -549,8 +549,8 @@ int Run( ConfigFile& the_config ) real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, vcdm0) : (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : - // the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0); - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1); + the_cosmo_calc->GetAmplitude(kmod, vtotal0)) - the_cosmo_calc->GetAmplitude(kmod, vtotal0); + //the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1); tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ; } diff --git a/src/main.cc b/src/main.cc index c36943c..cbdf209 100644 --- a/src/main.cc +++ b/src/main.cc @@ -193,7 +193,7 @@ int main( int argc, char** argv ) #endif csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << "Done.\n" << std::endl; + csoca::ilog << "Done. Have a nice day!\n" << std::endl; return 0; } diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc index 43afbe1..f32f9c8 100644 --- a/src/plugins/output_gadget_hdf5.cc +++ b/src/plugins/output_gadget_hdf5.cc @@ -121,7 +121,7 @@ public: HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array(header_.npartTotalHighWord)); HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value(header_.flag_entropy_instead_u)); - csoca::ilog << "Wrote" << std::endl; + csoca::ilog << "Wrote Gadget-HDF5 file(s) to " << this_fname_ << std::endl; } output_type write_species_as(const cosmo_species &) const { return output_type::particles; } From 35344f017029284a18f6d1971107cecee1bac1c2 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 28 Feb 2020 17:40:50 +0100 Subject: [PATCH 071/130] added arepo plugin, but this might get removed again since too similar to gadget-hdf5 --- example.conf | 8 +- src/plugins/output_arepo.cc | 241 ++++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 3 deletions(-) create mode 100644 src/plugins/output_arepo.cc diff --git a/example.conf b/example.conf index 33e227a..2066756 100644 --- a/example.conf +++ b/example.conf @@ -54,10 +54,12 @@ fbase_analysis = output #format = gadget2 #filename = ics_gadget.dat #UseLongids = false -# -format = gadget_hdf5 -filename = ics_gadget.hdf5 +#format = gadget_hdf5 +#filename = ics_gadget.hdf5 + +format = AREPO +filename = ics_arepo.hdf5 #format = generic #filename = debug.hdf5 diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc new file mode 100644 index 0000000..8d8d903 --- /dev/null +++ b/src/plugins/output_arepo.cc @@ -0,0 +1,241 @@ + +#ifdef USE_HDF5 +#include // for unlink +#include +#include "HDF_IO.hh" + +template +std::vector from_6array(const T *a) +{ + return std::vector{{a[0], a[1], a[2], a[3], a[4], a[5]}}; +} + +template +std::vector from_value(const T a) +{ + return std::vector{{a}}; +} + +template +class gadget_hdf5_output_plugin : public output_plugin +{ + struct header_t + { + unsigned npart[6]; + double mass[6]; + double time; + double redshift; + int flag_sfr; + int flag_feedback; + unsigned int npartTotal[6]; + int flag_cooling; + int num_files; + double BoxSize; + double Omega0; + double OmegaLambda; + double HubbleParam; + int flag_stellarage; + int flag_metals; + unsigned int npartTotalHighWord[6]; + int flag_entropy_instead_u; + int flag_doubleprecision; + }; + +protected: + int num_files_, num_simultaneous_writers_; + header_t header_; + real_t lunit_, vunit_; + bool blongids_; + std::string this_fname_; + double Tini_; + unsigned pmgrid_; + unsigned gridboost_; + int doublePrec_; + int doBaryons_; + double softening_; + +public: + //! constructor + explicit gadget_hdf5_output_plugin(ConfigFile &cf) + : output_plugin(cf, "GADGET-HDF5") + { + num_files_ = 1; +#ifdef USE_MPI + // use as many output files as we have MPI tasks + MPI_Comm_size(MPI_COMM_WORLD, &num_files_); +#endif + real_t astart = 1.0 / (1.0 + cf_.GetValue("setup", "zstart")); + lunit_ = cf_.GetValue("setup", "BoxLength"); + vunit_ = lunit_ / std::sqrt(astart); + blongids_ = cf_.GetValueSafe("output", "UseLongids", false); + num_simultaneous_writers_ = cf_.GetValueSafe("output", "NumSimWriters", num_files_); + + for (int i = 0; i < 6; ++i) + { + header_.npart[i] = 0; + header_.npartTotal[i] = 0; + header_.npartTotalHighWord[i] = 0; + header_.mass[i] = 0.0; + } + + header_.time = astart; + header_.redshift = 1.0 / astart - 1.0; + header_.flag_sfr = 0; + header_.flag_feedback = 0; + header_.flag_cooling = 0; + header_.num_files = num_files_; + header_.BoxSize = lunit_; + header_.Omega0 = cf_.GetValue("cosmology", "Omega_m"); + header_.OmegaLambda = cf_.GetValue("cosmology", "Omega_L"); + header_.HubbleParam = cf_.GetValue("cosmology", "H0") / 100.0; + header_.flag_stellarage = 0; + header_.flag_metals = 0; + header_.flag_entropy_instead_u = 0; + header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false; + + // initial gas temperature + double Tcmb0 = 2.726; + double Omegab = cf_.GetValue("cosmology", "Omega_b"); + double h = cf_.GetValue("cosmology", "H0") / 100.0, h2 = h*h; + double adec = 1.0 / (160.0 * pow(Omegab * h2 / 0.022, 2.0 / 5.0)); + Tini_ = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec; + + // suggested PM res + pmgrid_ = 2*cf_.GetValue("setup", "GridRes"); + gridboost_ = 1; + softening_ = cf_.GetValue("setup", "BoxLength")/pmgrid_/20; + doBaryons_ = cf_.GetValue("setup", "DoBaryons"); +#if !defined(USE_SINGLEPRECISION) + doublePrec_ = 1; +#else + doublePrec_ = 0; +#endif + + this_fname_ = fname_; +#ifdef USE_MPI + int thisrank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &thisrank); + if (num_files_ > 1) + this_fname_ += "." + std::to_string(thisrank); +#endif + + unlink(this_fname_.c_str()); + HDFCreateFile(this_fname_); + } + + // use destructor to write header post factum + ~gadget_hdf5_output_plugin() + { + HDFCreateGroup(this_fname_, "Header"); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array(header_.npart)); + HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array(header_.mass)); + HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value(header_.time)); + HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value(header_.redshift)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array(header_.npartTotal)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array(header_.npartTotalHighWord)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value(header_.num_files)); + HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value(header_.BoxSize)); + HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value(header_.Omega0)); + HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value(header_.OmegaLambda)); + HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value(header_.HubbleParam)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_DoublePrecision", (int)doublePrec_); + // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmin", levelmin_); + // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmax", levelmax_); + // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelcounts", levelcounts); + HDFWriteGroupAttribute(this_fname_, "Header", "haveBaryons", from_value((int)doBaryons_)); + HDFWriteGroupAttribute(this_fname_, "Header", "longIDs", from_value((int)blongids_)); + HDFWriteGroupAttribute(this_fname_, "Header", "suggested_pmgrid", from_value(pmgrid_)); + HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gridboost", from_value(gridboost_)); + HDFWriteGroupAttribute(this_fname_, "Header", "suggested_highressoft", from_value(softening_)); + HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gas_Tinit", from_value(Tini_)); + + csoca::ilog << "Wrote" << std::endl; + } + + output_type write_species_as(const cosmo_species &) const { return output_type::particles; } + + real_t position_unit() const { return lunit_; } + + real_t velocity_unit() const { return vunit_; } + + bool has_64bit_reals() const + { + if (typeid(write_real_t) == typeid(double)) + return true; + return false; + } + + bool has_64bit_ids() const + { + if (blongids_) + return true; + return false; + } + + int get_species_idx(const cosmo_species &s) const + { + switch (s) + { + case cosmo_species::dm: + return 1; + case cosmo_species::baryon: + return 2; + case cosmo_species::neutrino: + return 3; + } + return -1; + } + + void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species) + { + int sid = get_species_idx(s); + + assert(sid != -1); + + header_.npart[sid] = (pc.get_local_num_particles()); + header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles()); + header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32); + + double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 + double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3); + header_.mass[sid] = boxmass / pc.get_global_num_particles(); + + HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid)); + + //... write positions and velocities..... + if (this->has_64bit_reals()) + { + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_); + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_); + } + else + { + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_); + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_); + } + + //... write ids..... + if (this->has_64bit_ids()) + HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_); + else + HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_); + + // std::cout << ">>>A> " << header_.npart[sid] << std::endl; + } +}; + +namespace +{ +#if !defined(USE_SINGLEPRECISION) +output_plugin_creator_concrete> creator1("AREPO"); +#else +output_plugin_creator_concrete> creator1("AREPO"); +#endif +} // namespace + +#endif \ No newline at end of file From 77f9f06ebc2838ff499dedd551134bc07ffca317 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 28 Feb 2020 18:03:48 +0100 Subject: [PATCH 072/130] working commit, PLT interpolation --- include/particle_plt.hh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/particle_plt.hh b/include/particle_plt.hh index e95308f..18e6394 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -498,7 +498,7 @@ private: auto rvec = eve1p1 * e_r + eve1p2 * e_theta + eve1p3 * e_phi; - std::cerr << D_xx_.kelem(i,j,k) << " " << D_yy_.kelem(i,j,k) << " " << D_zz_.kelem(i,j,k) << std::endl; + // std::cerr << D_xx_.kelem(i,j,k) << " " << D_yy_.kelem(i,j,k) << " " << D_zz_.kelem(i,j,k) << std::endl; //std::cerr << rvec.x << " " << evec1.x * kmod*norm << std::endl; @@ -612,12 +612,12 @@ public: // std::cerr << kv.x/boxlen_ << " " << kv.y/boxlen_ << " " << kv.z/boxlen_ << " -- " << D_r * st * cp + D_theta * ct * cp - D_phi * sp << " " << D_r * st * sp + D_theta * ct * sp + D_phi * cp << " " << (D_r * ct - D_theta * st ) << std::endl; if( idim == 0 ){ - return ccomplex_t( 0.0, evec3.x );//D_r; //D_r * st * cp + D_theta * ct * cp - D_phi * sp; + return D_r * st * cp + D_theta * ct * cp - D_phi * sp; } else if( idim == 1 ){ - return ccomplex_t( 0.0, evec3.y );;//D_theta; //D_r * st * sp + D_theta * ct * sp + D_phi * cp; + return D_r * st * sp + D_theta * ct * sp + D_phi * cp; } - return ccomplex_t( 0.0, evec3.z );//D_phi; //(D_r * ct - D_theta * st ); + return D_r * ct - D_theta * st; } inline real_t vfac_corr( std::array ijk ) const From 569831530846a873f7d659de34aee4d920aefef5 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 28 Feb 2020 18:35:26 +0100 Subject: [PATCH 073/130] working commit --- example.conf | 4 +- include/particle_plt.hh | 99 ++++++++++++++--------------------------- 2 files changed, 36 insertions(+), 67 deletions(-) diff --git a/example.conf b/example.conf index c66a520..21576b0 100644 --- a/example.conf +++ b/example.conf @@ -7,7 +7,7 @@ BoxLength = 125 zstart = 49.0 #zstart = 19.0 # order of the LPT to be used (1,2 or 3) -LPTorder = 3 +LPTorder = 1 # also do baryon ICs? DoBaryons = no # do mode fixing à la Angulo&Pontzen @@ -43,7 +43,7 @@ seed = 9001 test = none [execution] -NumThreads = 1 +NumThreads = 8 [output] fname_hdf5 = output_sch.hdf5 diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 18e6394..b62ba2e 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -468,40 +468,26 @@ private: }else{ // large k modes, use interpolated PLT results // -- store in diagonal components of D_ij - // D_xx_.kelem(i,j,k) = ccomplex_t(0.0,evec1.x * kmod); - // D_yy_.kelem(i,j,k) = ccomplex_t(0.0,evec1.y * kmod); - // D_zz_.kelem(i,j,k) = ccomplex_t(0.0,evec1.z * kmod); + auto norm = (kv.norm()/kv.dot(evec1)); + D_xx_.kelem(i,j,k) = ccomplex_t(0.0,evec1.x * kmod); + D_yy_.kelem(i,j,k) = ccomplex_t(0.0,evec1.y * kmod); + D_zz_.kelem(i,j,k) = ccomplex_t(0.0,evec1.z * kmod); // // re-normalise to that longitudinal amplitude is exact - evec1 = kv; - auto norm = (kv.norm()/kv.dot(evec1)); - // D_xx_.kelem(i,j,k) *= norm; - // D_yy_.kelem(i,j,k) *= norm; - // D_zz_.kelem(i,j,k) *= norm; - - /////////////////////////////////// - // project onto spherical coordinate vectors + // //evec1 = kv; + // auto norm = (kv.norm()/kv.dot(evec1)); + // //evec1 = evec1 * (1.0/boxlen_); - real_t kr = kv.norm(), kphi = std::atan2(kv.y,kv.x), ktheta = std::acos( kv.z / kr ); - real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); - vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); - - //vec3 e_r( 1.0, 0.0, 0.0 ), e_theta( 0.0, 1.0, 0.0 ), e_phi( 0.0, 0.0, 1.0 ); - - D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_r ) ); - D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_theta ) ); - D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod*norm * evec1.dot( e_phi ) ); - - real_t eve1p1 = kmod*norm * evec1.dot( e_r ); - real_t eve1p2 = kmod*norm * evec1.dot( e_theta ); - real_t eve1p3 = kmod*norm * evec1.dot( e_phi ); - - auto rvec = eve1p1 * e_r + eve1p2 * e_theta + eve1p3 * e_phi; - - // std::cerr << D_xx_.kelem(i,j,k) << " " << D_yy_.kelem(i,j,k) << " " << D_zz_.kelem(i,j,k) << std::endl; - - //std::cerr << rvec.x << " " << evec1.x * kmod*norm << std::endl; + // /////////////////////////////////// + // // project onto spherical coordinate vectors + + // real_t kr = kv.norm(), kphi = std::atan2(kv.y,kv.x), ktheta = std::acos( kv.z / kr ); + // real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); + // vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); + // D_xx_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_r )); //kmod*norm + // D_yy_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_theta )); //kmod*norm + // D_zz_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_phi )); //kmod*norm // spatially dependent correction to vfact = \dot{D_+}/D_+ D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); @@ -579,45 +565,28 @@ public: { real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; - // if( idim == 0 ) return D_xx_.get_cic_kspace({ix,iy,iz}); - // else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz}); - // return D_zz_.get_cic_kspace({ix,iy,iz}); + if( idim == 0 ) return D_xx_.get_cic_kspace({ix,iy,iz}); + else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz}); + return D_zz_.get_cic_kspace({ix,iy,iz}); - /////// - // auto kv = D_xx_.get_k( static_cast(ix), static_cast(iy), static_cast(iz) ); - auto kv = D_xx_.get_k( ix, iy, iz ) / mapratio_; + // auto kv = D_xx_.get_k( ix, iy, iz ) / boxlen_; - // project onto spherical coordinate vectors - //real_t kr = kv.norm(), kphi = kr > 0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0; + // // project onto spherical coordinate vectors + // auto D_r = D_xx_.get_cic_kspace({ix,iy,iz}); + // auto D_theta = D_yy_.get_cic_kspace({ix,iy,iz}); + // auto D_phi = D_zz_.get_cic_kspace({ix,iy,iz}); + // real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0; + // real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); // vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); - auto D_r = D_xx_.get_cic_kspace({ix,iy,iz}); - auto D_theta = D_yy_.get_cic_kspace({ix,iy,iz}); - auto D_phi = D_zz_.get_cic_kspace({ix,iy,iz}); - real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0; - real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); - - //real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); - vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); - - vec3 evec3 = D_r.imag() * e_r + D_theta.imag() * e_theta + D_phi.imag() * e_phi; - - assert(!std::isnan(std::imag(D_r * st * cp + D_theta * ct * cp - D_phi * sp))); - assert(!std::isnan(std::imag(D_r * st * sp + D_theta * ct * sp + D_phi * cp))); - assert(!std::isnan(std::imag(D_r * ct - D_theta * st))); - assert(!std::isnan(std::real(D_r * st * cp + D_theta * ct * cp - D_phi * sp))); - assert(!std::isnan(std::real(D_r * st * sp + D_theta * ct * sp + D_phi * cp))); - assert(!std::isnan(std::real(D_r * ct - D_theta * st))); - - // std::cerr << kv.x/boxlen_ << " " << kv.y/boxlen_ << " " << kv.z/boxlen_ << " -- " << D_r * st * cp + D_theta * ct * cp - D_phi * sp << " " << D_r * st * sp + D_theta * ct * sp + D_phi * cp << " " << (D_r * ct - D_theta * st ) << std::endl; - - if( idim == 0 ){ - return D_r * st * cp + D_theta * ct * cp - D_phi * sp; - } - else if( idim == 1 ){ - return D_r * st * sp + D_theta * ct * sp + D_phi * cp; - } - return D_r * ct - D_theta * st; + + // if( idim == 0 ){ + // return D_r * st * cp + D_theta * ct * cp - D_phi * sp; + // } + // else if( idim == 1 ){ + // return D_r * st * sp + D_theta * ct * sp + D_phi * cp; + // } + // return D_r * ct - D_theta * st; } inline real_t vfac_corr( std::array ijk ) const From 6c027d7094af7ae7eb73d843e639d8f8c59b730c Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 1 Mar 2020 17:21:17 +0100 Subject: [PATCH 074/130] fixed PLT correction interpolation --- include/grid_fft.hh | 8 +-- include/particle_plt.hh | 106 +++++++++++++--------------------------- 2 files changed, 38 insertions(+), 76 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index e98d6a7..f8157da 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -290,11 +290,11 @@ public: return val; } - ccomplex_t get_cic_kspace( const vec3& x ) const{ + ccomplex_t get_cic_kspace( const vec3 x ) const{ // warning! this doesn't work with MPI - size_t ix = static_cast(x.x); - size_t iy = static_cast(x.y); - size_t iz = std::min(static_cast(x.z),size(2)-1); //static_cast(x.z); + int ix = std::floor(x.x); + int iy = std::floor(x.y); + int iz = std::floor(x.z); real_t dx = x.x-real_t(ix), tx = 1.0-dx; real_t dy = x.y-real_t(iy), ty = 1.0-dy; real_t dz = x.z-real_t(iz), tz = 1.0-dz; diff --git a/include/particle_plt.hh b/include/particle_plt.hh index b62ba2e..b0d3760 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -28,9 +28,9 @@ namespace particle{ class lattice_gradient{ private: - const real_t boxlen_, XmL_, aini_; + const real_t boxlen_, aini_; const size_t ngmapto_, ngrid_, ngrid32_; - const real_t mapratio_; + const real_t mapratio_, XmL_; Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; Grid_FFT grad_x_, grad_y_, grad_z_; std::vector> vectk_; @@ -443,11 +443,7 @@ private: for( size_t j=0; jsize_t(nlattice/2))? int(i)-nlattice : i; - int jj = (j>size_t(nlattice/2))? int(j)-nlattice : j; - vec3 kv = D_xx_.get_k(i,j,k); - const real_t kmod = kv.norm()/mapratio_/boxlen_; double mu1 = std::real(D_xx_.kelem(i,j,k)); // double mu2 = std::real(D_xy_.kelem(i,j,k)); @@ -456,50 +452,28 @@ private: vec3 evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))}); evec1 /= evec1.norm(); - if(false){//std::abs(ii)+std::abs(jj)+k<8){ - // small k modes, use usual pseudospectral derivative - // -- store in diagonal components of D_ij - D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_); - D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_); - D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_); + // /////////////////////////////////// + // // project onto spherical coordinate vectors + + real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ): 0.0; + real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); + vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); - // spatially dependent correction to vfact = \dot{D_+}/D_+ - D_xy_.kelem(i,j,k) = 1.0; - }else{ - // large k modes, use interpolated PLT results - // -- store in diagonal components of D_ij - auto norm = (kv.norm()/kv.dot(evec1)); - D_xx_.kelem(i,j,k) = ccomplex_t(0.0,evec1.x * kmod); - D_yy_.kelem(i,j,k) = ccomplex_t(0.0,evec1.y * kmod); - D_zz_.kelem(i,j,k) = ccomplex_t(0.0,evec1.z * kmod); + // re-normalise to that longitudinal amplitude is exact + double renorm = evec1.dot( e_r ); if( renorm < 0.01 ) renorm = 1.0; - // // re-normalise to that longitudinal amplitude is exact - // //evec1 = kv; - // auto norm = (kv.norm()/kv.dot(evec1)); - // //evec1 = evec1 * (1.0/boxlen_); - - // /////////////////////////////////// - // // project onto spherical coordinate vectors - - // real_t kr = kv.norm(), kphi = std::atan2(kv.y,kv.x), ktheta = std::acos( kv.z / kr ); - // real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); - // vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); + // -- store in diagonal components of D_ij + D_xx_.kelem(i,j,k) = 1.0; + D_yy_.kelem(i,j,k) = evec1.dot( e_theta ) / renorm; + D_zz_.kelem(i,j,k) = evec1.dot( e_phi ) / renorm; - // D_xx_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_r )); //kmod*norm - // D_yy_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_theta )); //kmod*norm - // D_zz_.kelem(i,j,k) = ccomplex_t( 0.0, evec1.dot( e_phi )); //kmod*norm - - // spatially dependent correction to vfact = \dot{D_+}/D_+ - D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); - } - if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0; - if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0; - if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0; + // spatially dependent correction to vfact = \dot{D_+}/D_+ + D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); } } } D_xy_.kelem(0,0,0) = 1.0; - D_xx_.kelem(0,0,0) = 0.0; + D_xx_.kelem(0,0,0) = 1.0; D_yy_.kelem(0,0,0) = 0.0; D_zz_.kelem(0,0,0) = 0.0; @@ -524,10 +498,10 @@ public: // real_t boxlen, size_t ngridother explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 ) : boxlen_( the_config.GetValue("setup", "BoxLength") ), - ngmapto_( the_config.GetValue("setup", "GridRes") ), - XmL_ ( the_config.GetValue("cosmology", "Omega_L") / the_config.GetValue("cosmology", "Omega_m") ), aini_ ( 1.0/(1.0+the_config.GetValue("setup", "zstart")) ), + ngmapto_( the_config.GetValue("setup", "GridRes") ), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), + XmL_ ( the_config.GetValue("cosmology", "Omega_L") / the_config.GetValue("cosmology", "Omega_m") ), D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), @@ -544,14 +518,6 @@ public: csoca::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl; -// #if defined(USE_MPI) -// if( CONFIG::MPI_task_size>1 ) -// { -// csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl; -// abort(); -// } -// #endif - double wtime = get_wtime(); csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; @@ -565,28 +531,24 @@ public: { real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; - if( idim == 0 ) return D_xx_.get_cic_kspace({ix,iy,iz}); - else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz}); - return D_zz_.get_cic_kspace({ix,iy,iz}); - - // auto kv = D_xx_.get_k( ix, iy, iz ) / boxlen_; + auto kv = D_xx_.get_k( ix, iy, iz ); + auto kmod = kv.norm() / mapratio_ / boxlen_; // // project onto spherical coordinate vectors - // auto D_r = D_xx_.get_cic_kspace({ix,iy,iz}); - // auto D_theta = D_yy_.get_cic_kspace({ix,iy,iz}); - // auto D_phi = D_zz_.get_cic_kspace({ix,iy,iz}); + auto D_r = std::real(D_xx_.get_cic_kspace({ix,iy,iz})); + auto D_theta = std::real(D_yy_.get_cic_kspace({ix,iy,iz})); + auto D_phi = std::real(D_zz_.get_cic_kspace({ix,iy,iz})); - // real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0; - // real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); - // vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); + real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0; + real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); - // if( idim == 0 ){ - // return D_r * st * cp + D_theta * ct * cp - D_phi * sp; - // } - // else if( idim == 1 ){ - // return D_r * st * sp + D_theta * ct * sp + D_phi * cp; - // } - // return D_r * ct - D_theta * st; + if( idim == 0 ){ + return ccomplex_t(0.0, kmod*(D_r * st * cp + D_theta * ct * cp - D_phi * sp)); + } + else if( idim == 1 ){ + return ccomplex_t(0.0, kmod*(D_r * st * sp + D_theta * ct * sp + D_phi * cp)); + } + return ccomplex_t(0.0, kmod*(D_r * ct - D_theta * st)); } inline real_t vfac_corr( std::array ijk ) const @@ -594,7 +556,7 @@ public: real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; const real_t alpha = 1.0/std::real(D_xy_.get_cic_kspace({ix,iy,iz})); return 1.0/alpha; - // // below is for LCDM: + // // below is for LCDM, but it is a tiny correction for typical starting redshifts: //! X = \Omega_\Lambda / \Omega_m // return 1.0 / (alpha - (2*std::pow(aini_,3)*alpha*(2 + alpha)*XmL_*Hypergeometric2F1((3 + alpha)/3.,(5 + alpha)/3., // (13 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_)))/ From a1a5e614cf94e5a32802170ee28a7dfd47ab43d3 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 1 Mar 2020 17:37:57 +0100 Subject: [PATCH 075/130] fixed compilation error --- include/grid_fft.hh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index f8157da..38cf11c 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -292,15 +292,15 @@ public: ccomplex_t get_cic_kspace( const vec3 x ) const{ // warning! this doesn't work with MPI - int ix = std::floor(x.x); - int iy = std::floor(x.y); - int iz = std::floor(x.z); + int ix = static_cast(std::floor(x.x)); + int iy = static_cast(std::floor(x.y)); + int iz = static_cast(std::floor(x.z)); real_t dx = x.x-real_t(ix), tx = 1.0-dx; real_t dy = x.y-real_t(iy), ty = 1.0-dy; real_t dz = x.z-real_t(iz), tz = 1.0-dz; size_t ix1 = (ix+1)%size(0); size_t iy1 = (iy+1)%size(1); - size_t iz1 = std::min((iz+1),size(2)-1); + size_t iz1 = std::min((iz+1),int(size(2))-1); ccomplex_t val = 0.0; val += this->kelem(ix ,iy ,iz ) * tx * ty * tz; val += this->kelem(ix ,iy ,iz1) * tx * ty * dz; From c58ccfa6cae4765ff349f73229a383d4ad73b263 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 1 Mar 2020 16:48:03 +0000 Subject: [PATCH 076/130] README.md edited online with Bitbucket --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c7cc745..3d3be7b 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ High order LPT/QPT tool for single resolution simulations ## Build Instructions Clone code including submodules (currently only CLASS is used as a submodule): - git clone --recurse-submodules https://ohahn@bitbucket.org/ohahn/monofonic.git + git clone --recurse-submodules https://@bitbucket.org/ohahn/monofonic.git Create build directory, configure, and build: From b0b67086fdedf6430476d4575184883a12223565 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 6 Mar 2020 16:44:53 +0100 Subject: [PATCH 077/130] fixed particle type for baryons in gadget-hdf5 and arepo, disabled PLT by default --- src/ic_generator.cc | 4 ++-- src/main.cc | 3 +++ src/plugins/output_arepo.cc | 2 +- src/plugins/output_gadget_hdf5.cc | 2 +- src/plugins/transfer_CLASS.cc | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 23899c0..a9d9670 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -273,8 +273,8 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- // Create PLT gradient operator //-------------------------------------------------------------------- - particle::lattice_gradient lg( the_config ); - // op::fourier_gradient lg( the_config ); + // particle::lattice_gradient lg( the_config ); + op::fourier_gradient lg( the_config ); //-------------------------------------------------------------------- std::vector species_list; diff --git a/src/main.cc b/src/main.cc index cbdf209..12cde3a 100644 --- a/src/main.cc +++ b/src/main.cc @@ -3,6 +3,7 @@ #include #include #include +#include #if defined(_OPENMP) #include @@ -112,6 +113,8 @@ int main( int argc, char** argv ) omp_set_num_threads(CONFIG::num_threads); #endif + // std::feclearexcept(FE_ALL_EXCEPT); + //------------------------------------------------------------------------------ // Write code configuration to screen //------------------------------------------------------------------------------ diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc index 8d8d903..1af182f 100644 --- a/src/plugins/output_arepo.cc +++ b/src/plugins/output_arepo.cc @@ -184,7 +184,7 @@ public: case cosmo_species::dm: return 1; case cosmo_species::baryon: - return 2; + return 0; case cosmo_species::neutrino: return 3; } diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc index f32f9c8..c862e41 100644 --- a/src/plugins/output_gadget_hdf5.cc +++ b/src/plugins/output_gadget_hdf5.cc @@ -151,7 +151,7 @@ public: case cosmo_species::dm: return 1; case cosmo_species::baryon: - return 2; + return 0; case cosmo_species::neutrino: return 3; } diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index d660d25..e6e2c00 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -47,7 +47,7 @@ private: double wtime = get_wtime(); std::stringstream zlist; - zlist << zstart_ << ", " << zstart_; + zlist << zstart_ << ", " << ztarget_; ClassParams pars; pars.add("extra metric transfer functions", "yes"); From 0abe891f864ad72b44b42f3d8b995710b9ef09ed Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Tue, 10 Mar 2020 19:01:44 +0100 Subject: [PATCH 078/130] added ID offset for baryon particles to avoid duplicate IDs --- include/particle_generator.hh | 6 +++--- src/ic_generator.cc | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 56c69f4..956ed28 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -37,7 +37,7 @@ const std::vector> second_lattice_shift = }; template -void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const field_t& field ){ +void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field ){ // number of modes present in the field const size_t num_p_in_load = field.local_size(); // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load): @@ -50,9 +50,9 @@ void initialize_lattice( container& particles, lattice lattice_type, const bool for( size_t k=0; kwrite_species_as(this_species) == output_type::particles) ? true : false; + // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits + size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ; + // if output plugin wants particles, then we need to store them, along with their IDs if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { // allocate particle structure and generate particle IDs - particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), tmp ); + particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp ); } // write out positions From ab2db06990295fe180616ba4759747e4903ce638 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 29 Mar 2020 14:42:55 +0200 Subject: [PATCH 079/130] added useful physical constants --- include/physical_constants.hh | 62 +++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/physical_constants.hh diff --git a/include/physical_constants.hh b/include/physical_constants.hh new file mode 100644 index 0000000..594eb0d --- /dev/null +++ b/include/physical_constants.hh @@ -0,0 +1,62 @@ +#pragma once +/*******************************************************************************\ + physical_constants.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************************/ + +// physical constants for convenience, all values have been taken from +// the 2018 edition of the Particle Data Group Booklet, +// http://pdg.lbl.gov/2019/mobile/reviews/pdf/rpp2018-rev-phys-constants-m.pdf + +namespace phys_const +{ +// helper value of pi so that we don't need to include any other header just for this +static constexpr double pi_ = 3.141592653589793115997963468544185161590576171875; + +//--- unit conversions --------------------------------------------------- + +// 1 Mpc in m +static constexpr double Mpc_SI = 3.0857e22; + +// 1 Gyr in s +static constexpr double Gyr_SI = 3.1536e16; + +// 1 eV in J +static constexpr double eV_SI = 1.602176487e-19; + +// 1 erg in J +static constexpr double erg_SI = 1e-7; + +//--- physical constants ------------------------------------------------ + +// speed of light c in m/s +static constexpr double c_SI = 2.99792458e8; + +// gravitational constant G in m^3/s^2/kg +static constexpr double G_SI = 6.6740800e-11; + +// Boltzmann constant k_B in kg m^2/s^2/K +static constexpr double kB_SI = 1.38064852e-23; + +// reduced Planck's quantum \hbar in kg m^2/s +static constexpr double hbar_SI = 1.054571800e-34; + +// Stefan-Boltzmann constant sigma in J/m^2/s/K^-4 +static constexpr double sigma_SI = (pi_ * pi_) * (kB_SI * kB_SI * kB_SI * kB_SI) / 60. / (hbar_SI * hbar_SI * hbar_SI) / (c_SI * c_SI); + +// electron mass in kg +static constexpr double me_SI = 9.10938356e-31; + +// proton mass in kg +static constexpr double mp_SI = 1.672621898e-27; + +// unified atomic mass unit (u) in kg +static constexpr double u_SI = 1.660539040e-27; + +// critical density of the Universe in h^2 kg/m^3 +static constexpr double rhocrit_h2_SI = 3 * 1e10 / (8 * pi_ * G_SI) / Mpc_SI / Mpc_SI; + +} // namespace phys_const \ No newline at end of file From a587ad6b3ee5174f937c5658f1b93fbfc868282f Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 29 Mar 2020 14:45:43 +0200 Subject: [PATCH 080/130] some refactoring (add '_t' to vec3 and mat3) --- include/bounding_box.hh | 4 +- include/grid_fft.hh | 32 +++++------ include/mat3.hh | 59 ++++++------------- include/particle_generator.hh | 6 +- include/particle_plt.hh | 104 +++++++++++++++++----------------- include/vec3.hh | 56 +++++++++--------- src/grid_fft.cc | 2 +- src/ic_generator.cc | 6 +- src/plugins/transfer_CLASS.cc | 3 +- 9 files changed, 124 insertions(+), 148 deletions(-) diff --git a/include/bounding_box.hh b/include/bounding_box.hh index db0f481..3048c79 100644 --- a/include/bounding_box.hh +++ b/include/bounding_box.hh @@ -5,12 +5,12 @@ template struct bounding_box { - vec3 x1_, x2_; + vec3_t x1_, x2_; bounding_box(void) { } - bounding_box( const vec3& x1, const vec3& x2) + bounding_box( const vec3_t& x1, const vec3_t& x2) : x1_(x1), x2_(x2) { } diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 8acc2bd..2cf5557 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -165,9 +165,9 @@ public: } template - vec3 get_r(const size_t i, const size_t j, const size_t k) const + vec3_t get_r(const size_t i, const size_t j, const size_t k) const { - vec3 rr; + vec3_t rr; rr[0] = real_t(i + local_0_start_) * dx_[0]; rr[1] = real_t(j) * dx_[1]; @@ -177,9 +177,9 @@ public: } template - vec3 get_unit_r(const size_t i, const size_t j, const size_t k) const + vec3_t get_unit_r(const size_t i, const size_t j, const size_t k) const { - vec3 rr; + vec3_t rr; rr[0] = real_t(i + local_0_start_) / real_t(n_[0]); rr[1] = real_t(j) / real_t(n_[1]); @@ -189,9 +189,9 @@ public: } template - vec3 get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3 s) const + vec3_t get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t s) const { - vec3 rr; + vec3_t rr; rr[0] = (real_t(i + local_0_start_) + s.x) / real_t(n_[0]); rr[1] = (real_t(j) + s.y) / real_t(n_[1]); @@ -200,9 +200,9 @@ public: return rr; } - vec3 get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const + vec3_t get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const { - return vec3({i + local_0_start_, j, k}); + return vec3_t({i + local_0_start_, j, k}); } size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const @@ -226,9 +226,9 @@ public: } template - vec3 get_k(const size_t i, const size_t j, const size_t k) const + vec3_t get_k(const size_t i, const size_t j, const size_t k) const { - vec3 kk; + vec3_t kk; if( bdistributed ){ auto ip = i + local_1_start_; kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0]; @@ -243,9 +243,9 @@ public: } template - vec3 get_k(const real_t i, const real_t j, const real_t k) const + vec3_t get_k(const real_t i, const real_t j, const real_t k) const { - vec3 kk; + vec3_t kk; if( bdistributed ){ auto ip = i + real_t(local_1_start_); kk[0] = (j - real_t(j > real_t(nhalf_[0])) * n_[0]) * kfac_[0]; @@ -264,9 +264,9 @@ public: return bdistributed? std::array({j,i+local_1_start_,k}) : std::array({i,j,k}); } - data_t get_cic( const vec3& v ) const{ + data_t get_cic( const vec3_t& v ) const{ // warning! this doesn't work with MPI - vec3 x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0], + vec3_t x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0], std::fmod(v.y/length_[1]+1.0,1.0)*n_[1], std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] }); size_t ix = static_cast(x.x); @@ -290,7 +290,7 @@ public: return val; } - ccomplex_t get_cic_kspace( const vec3 x ) const{ + ccomplex_t get_cic_kspace( const vec3_t x ) const{ // warning! this doesn't work with MPI int ix = static_cast(std::floor(x.x)); int iy = static_cast(std::floor(x.y)); @@ -746,7 +746,7 @@ public: void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3); - void shift_field( const vec3& s, bool transform_back=true ) + void shift_field( const vec3_t& s, bool transform_back=true ) { FourierTransformForward(); apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { diff --git a/include/mat3.hh b/include/mat3.hh index ac23069..6cf2689 100644 --- a/include/mat3.hh +++ b/include/mat3.hh @@ -4,7 +4,7 @@ #include template -class mat3{ +class mat3_t{ protected: std::array data_; gsl_matrix_view m_; @@ -37,38 +37,38 @@ protected: public: - mat3() + mat3_t() : bdid_alloc_gsl_(false) {} //! copy constructor - mat3( const mat3 &m) + mat3_t( const mat3_t &m) : data_(m.data_), bdid_alloc_gsl_(false) {} //! move constructor - mat3( mat3 &&m) + mat3_t( mat3_t &&m) : data_(std::move(m.data_)), bdid_alloc_gsl_(false) {} - //! construct mat3 from initializer list + //! construct mat3_t from initializer list template - mat3(E&&...e) + mat3_t(E&&...e) : data_{{std::forward(e)...}}, bdid_alloc_gsl_(false) {} - mat3& operator=(const mat3& m) noexcept{ + mat3_t& operator=(const mat3_t& m) noexcept{ data_ = m.data_; return *this; } - mat3& operator=(const mat3&& m) noexcept{ + mat3_t& operator=(const mat3_t&& m) noexcept{ data_ = std::move(m.data_); return *this; } //! destructor - ~mat3(){ + ~mat3_t(){ this->free_gsl(); } @@ -85,7 +85,7 @@ public: const T &operator()(size_t i, size_t j) const noexcept { return data_[3*i+j]; } //! in-place addition - mat3& operator+=( const mat3& rhs ) noexcept{ + mat3_t& operator+=( const mat3_t& rhs ) noexcept{ for (size_t i = 0; i < 9; ++i) { (*this)[i] += rhs[i]; } @@ -93,7 +93,7 @@ public: } //! in-place subtraction - mat3& operator-=( const mat3& rhs ) noexcept{ + mat3_t& operator-=( const mat3_t& rhs ) noexcept{ for (size_t i = 0; i < 9; ++i) { (*this)[i] -= rhs[i]; } @@ -104,20 +104,8 @@ public: for (size_t i = 0; i < 9; ++i) data_[i]=0; } - void eigen( vec3& evals, vec3& evec1, vec3& evec2, vec3& evec3 ) + void eigen( vec3_t& evals, vec3_t& evec1, vec3_t& evec2, vec3_t& evec3_t ) { - // for( auto x : data_ ){ - // std::cerr << x << " " ; - // } - // std::cerr << std::endl; - // resort into symmetrix matrix - // data_[8] = data_[5]; - // data_[7] = data_[4]; - // data_[6] = data_[2]; - // data_[5] = data_[4]; - // data_[4] = data_[3]; - // data_[3] = data_[1]; - this->init_gsl(); gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_); @@ -127,17 +115,15 @@ public: evals[i] = gsl_vector_get( eval_, i ); evec1[i] = gsl_matrix_get( evec_, i, 0 ); evec2[i] = gsl_matrix_get( evec_, i, 1 ); - evec3[i] = gsl_matrix_get( evec_, i, 2 ); + evec3_t[i] = gsl_matrix_get( evec_, i, 2 ); } - - // std::cerr << "(" << evals[0] << " " << evals[1] << " " << evals[2] << ")" << std::endl; } }; template -constexpr const mat3 operator+(const mat3 &lhs, const mat3 &rhs) noexcept +constexpr const mat3_t operator+(const mat3_t &lhs, const mat3_t &rhs) noexcept { - mat3 result; + mat3_t result; for (size_t i = 0; i < 9; ++i) { result[i] = lhs[i] + rhs[i]; } @@ -146,9 +132,9 @@ constexpr const mat3 operator+(const mat3 &lhs, const mat3 &rhs) noexce // matrix - vector multiplication template -vec3 operator*( const mat3 &A, const vec3 &v ) noexcept +inline vec3_t operator*( const mat3_t &A, const vec3_t &v ) noexcept { - vec3 result; + vec3_t result; for( int mu=0; mu<3; ++mu ){ result[mu] = 0.0; for( int nu=0; nu<3; ++nu ){ @@ -158,14 +144,3 @@ vec3 operator*( const mat3 &A, const vec3 &v ) noexcept return result; } -// template -// vec3 operator*( const vec3 &v, const mat3 &A ) noexcept -// { -// vec3 result = 0.0; -// for( int mu=0; mu<3; ++mu ){ -// for( int nu=0; nu<3; ++nu ){ -// result[nu] += v[mu]*A(mu,nu); -// } -// } -// return result; -// } diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 956ed28..57e8b0f 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -18,7 +18,7 @@ enum lattice{ lattice_rsc = 3, // RSC: refined simple cubic }; -const std::vector< std::vector> > lattice_shifts = +const std::vector< std::vector> > lattice_shifts = { // first shift must always be zero! (otherwise set_positions and set_velocities break) /* SC : */ {{0.0,0.0,0.0}}, @@ -27,7 +27,7 @@ const std::vector< std::vector> > lattice_shifts = /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}}, }; -const std::vector> second_lattice_shift = +const std::vector> second_lattice_shift = { /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!? @@ -81,7 +81,7 @@ void set_positions( container& particles, const lattice lattice_type, bool is_se for( size_t j=0; j(i,j,k,lattice_shifts[lattice_type][ishift] - + (is_second_lattice? second_lattice_shift[lattice_type] : vec3{0.,0.,0.}) ); + + (is_second_lattice? second_lattice_shift[lattice_type] : vec3_t{0.,0.,0.}) ); if( b64reals ){ particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); }else{ diff --git a/include/particle_plt.hh b/include/particle_plt.hh index b0d3760..a6fc1ad 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -33,13 +33,13 @@ private: const real_t mapratio_, XmL_; Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; Grid_FFT grad_x_, grad_y_, grad_z_; - std::vector> vectk_; - std::vector> ico_, vecitk_; + std::vector> vectk_; + std::vector> ico_, vecitk_; bool is_even( int i ){ return (i%2)==0; } - bool is_in( int i, int j, int k, const mat3& M ){ - vec3 v({i,j,k}); + bool is_in( int i, int j, int k, const mat3_t& M ){ + vec3_t v({i,j,k}); auto vv = M * v; return is_even(vv.x)&&is_even(vv.y)&&is_even(vv.z); } @@ -54,22 +54,22 @@ private: //! === vectors, reciprocals and normals for the SC lattice === const int charge_fac_sc = 1; - const mat3 mat_bravais_sc{ + const mat3_t mat_bravais_sc{ 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, }; - const mat3 mat_reciprocal_sc{ + const mat3_t mat_reciprocal_sc{ twopi, 0.0, 0.0, 0.0, twopi, 0.0, 0.0, 0.0, twopi, }; - const mat3 mat_invrecip_sc{ + const mat3_t mat_invrecip_sc{ 2, 0, 0, 0, 2, 0, 0, 0, 2, }; - const std::vector> normals_sc{ + const std::vector> normals_sc{ {pi,0.,0.},{-pi,0.,0.}, {0.,pi,0.},{0.,-pi,0.}, {0.,0.,pi},{0.,0.,-pi}, @@ -78,22 +78,22 @@ private: //! === vectors, reciprocals and normals for the BCC lattice === const int charge_fac_bcc = 2; - const mat3 mat_bravais_bcc{ + const mat3_t mat_bravais_bcc{ 1.0, 0.0, 0.5, 0.0, 1.0, 0.5, 0.0, 0.0, 0.5, }; - const mat3 mat_reciprocal_bcc{ + const mat3_t mat_reciprocal_bcc{ twopi, 0.0, 0.0, 0.0, twopi, 0.0, -twopi, -twopi, fourpi, }; - const mat3 mat_invrecip_bcc{ + const mat3_t mat_invrecip_bcc{ 2, 0, 0, 0, 2, 0, 1, 1, 1, }; - const std::vector> normals_bcc{ + const std::vector> normals_bcc{ {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} @@ -102,22 +102,22 @@ private: //! === vectors, reciprocals and normals for the FCC lattice === const int charge_fac_fcc = 4; - const mat3 mat_bravais_fcc{ + const mat3_t mat_bravais_fcc{ 0.0, 0.5, 0.0, 0.5, 0.0, 1.0, 0.5, 0.5, 0.0, }; - const mat3 mat_reciprocal_fcc{ + const mat3_t mat_reciprocal_fcc{ -fourpi, fourpi, twopi, 0.0, 0.0, twopi, fourpi, 0.0, -twopi, }; - const mat3 mat_invrecip_fcc{ + const mat3_t mat_invrecip_fcc{ 0, 1, 1, 1, 0, 1, 0, 2, 0, }; - const std::vector> normals_fcc{ + const std::vector> normals_fcc{ {twopi,0.,0.},{-twopi,0.,0.}, {0.,twopi,0.},{0.,-twopi,0.}, {0.,0.,twopi},{0.,0.,-twopi}, @@ -152,7 +152,7 @@ private: auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; //! Ewald summation: short-range Green's function - auto add_greensftide_sr = [&]( mat3& D, const vec3& d ) -> void { + auto add_greensftide_sr = [&]( mat3_t& D, const vec3_t& d ) -> void { auto r = d.norm(); if( r< 1e-14 ) return; // return zero for r=0 @@ -170,7 +170,7 @@ private: }; //! Ewald summation: long-range Green's function - auto add_greensftide_lr = [&]( mat3& D, const vec3& k, const vec3& r ) -> void { + auto add_greensftide_lr = [&]( mat3_t& D, const vec3_t& k, const vec3_t& r ) -> void { real_t kmod2 = k.norm_squared(); real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm; for( int mu=0; mu<3; ++mu ){ @@ -195,22 +195,22 @@ private: constexpr ptrdiff_t lnumber = 3, knumber = 3; const int numb = 1; //!< search radius when shifting vectors into FBZ - vectk_.assign(D_xx_.memsize(),vec3()); - ico_.assign(D_xx_.memsize(),vec3()); - vecitk_.assign(D_xx_.memsize(),vec3()); + vectk_.assign(D_xx_.memsize(),vec3_t()); + ico_.assign(D_xx_.memsize(),vec3_t()); + vecitk_.assign(D_xx_.memsize(),vec3_t()); #pragma omp parallel { //... temporary to hold values of the dynamical matrix - mat3 matD(0.0); + mat3_t matD(0.0); #pragma omp for for( ptrdiff_t i=0; i x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)}); - const vec3 ar = (mat_bravais * x_ijk).wrap_abs(); + const vec3_t x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)}); + const vec3_t ar = (mat_bravais * x_ijk).wrap_abs(); //... zero temporary matrix matD.zero(); @@ -219,8 +219,8 @@ private: for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){ for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){ for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){ - const vec3 n_ijk({real_t(ix),real_t(iy),real_t(iz)}); - const vec3 dr(ar - mat_bravais * n_ijk); + const vec3_t n_ijk({real_t(ix),real_t(iy),real_t(iz)}); + const vec3_t dr(ar - mat_bravais * n_ijk); add_greensftide_sr(matD, dr); } } @@ -231,8 +231,8 @@ private: for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){ for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){ if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){ - const vec3 k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice}); - const vec3 ak( mat_reciprocal * k_ijk); + const vec3_t k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice}); + const vec3_t ak( mat_reciprocal * k_ijk); add_greensftide_lr(matD, ak, ar ); } @@ -278,7 +278,7 @@ private: std::ofstream ofs2("test_brillouin.txt"); #endif - using map_t = std::map,size_t>; + using map_t = std::map,size_t>; map_t iimap; //!=== Make temporary copies before resorting to std. Fourier grid ========!// @@ -312,8 +312,8 @@ private: #pragma omp parallel { // thread private matrix representation - mat3 D; - vec3 eval, evec1, evec2, evec3; + mat3_t D; + vec3_t eval, evec1, evec2, evec3_t; #pragma omp for for( size_t i=0; i kv = D_xx_.get_k(i,j,k); + vec3_t kv = D_xx_.get_k(i,j,k); // put matrix elements into actual matrix D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12; @@ -333,12 +333,12 @@ private: D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12; // compute eigenstructure of matrix - D.eigen(eval, evec1, evec2, evec3); - evec3 /= (twopi*ngrid_); + D.eigen(eval, evec1, evec2, evec3_t); + evec3_t /= (twopi*ngrid_); // now determine to which modes on the regular lattice this contributes - vec3 ar = kv / (twopi*ngrid_); - vec3 a(mat_reciprocal * ar); + vec3_t ar = kv / (twopi*ngrid_); + vec3_t a(mat_reciprocal * ar); // translate the k-vectors into the "candidate" FBZ for( int l1=-numb; l1<=numb; ++l1 ){ @@ -347,9 +347,9 @@ private: // need both halfs of Fourier space since we use real transforms for( int isign=0; isign<=1; ++isign ){ const real_t sign = 2.0*real_t(isign)-1.0; - const vec3 vshift({real_t(l1),real_t(l2),real_t(l3)}); + const vec3_t vshift({real_t(l1),real_t(l2),real_t(l3)}); - vec3 vectk = sign * a + mat_reciprocal * vshift; + vec3_t vectk = sign * a + mat_reciprocal * vshift; if( check_FBZ( normals, vectk ) ) { @@ -358,11 +358,11 @@ private: int iz = std::round(vectk.z*(ngrid_)/twopi); #pragma omp critical - {iimap.insert( std::pair,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );} + {iimap.insert( std::pair,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );} temp1.kelem(i,j,k) = ccomplex_t(eval[2],eval[1]); - temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3.x); - temp3.kelem(i,j,k) = ccomplex_t(evec3.y,evec3.z); + temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3_t.x); + temp3.kelem(i,j,k) = ccomplex_t(evec3_t.y,evec3_t.z); } }//sign } //l3 @@ -389,24 +389,24 @@ private: int ii = (int(i)>nlattice/2)? int(i)-nlattice : int(i); int jj = (int(j)>nlattice/2)? int(j)-nlattice : int(j); int kk = (int(k)>nlattice/2)? int(k)-nlattice : int(k); - vec3 kv({real_t(ii),real_t(jj),real_t(kk)}); + vec3_t kv({real_t(ii),real_t(jj),real_t(kk)}); - auto align_with_k = [&]( const vec3& v ) -> vec3{ + auto align_with_k = [&]( const vec3_t& v ) -> vec3_t{ return v*((v.dot(kv)<0.0)?-1.0:1.0); }; - vec3 v, l; + vec3_t v, l; map_t::iterator it; if( !is_in(i,j,k,mat_invrecip) ){ - auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3& v, vec3& l ) { + auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3_t& v, vec3_t& l ) { v = 0.0; l = 0.0; int count(0); auto add_lv = [&]( auto it ) -> void { auto q = it->second;++count; - l += vec3({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))}); - v += align_with_k(vec3({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))})); + l += vec3_t({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))}); + v += align_with_k(vec3_t({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))})); }; map_t::iterator it; if( (it = iimap.find({ii-1,jj,kk}))!=iimap.end() ){ add_lv(it); } @@ -423,8 +423,8 @@ private: }else{ if( (it = iimap.find({ii,jj,kk}))!=iimap.end() ){ auto q = it->second; - l = vec3({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))}); - v = align_with_k(vec3({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))})); + l = vec3_t({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))}); + v = align_with_k(vec3_t({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))})); } } D_xx_.kelem(i,j,k) = l[0]; @@ -443,13 +443,13 @@ private: for( size_t j=0; j kv = D_xx_.get_k(i,j,k); + vec3_t kv = D_xx_.get_k(i,j,k); double mu1 = std::real(D_xx_.kelem(i,j,k)); // double mu2 = std::real(D_xy_.kelem(i,j,k)); // double mu3 = std::real(D_xz_.kelem(i,j,k)); - vec3 evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))}); + vec3_t evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))}); evec1 /= evec1.norm(); // /////////////////////////////////// @@ -457,7 +457,7 @@ private: real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ): 0.0; real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); - vec3 e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); + vec3_t e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); // re-normalise to that longitudinal amplitude is exact double renorm = evec1.dot( e_r ); if( renorm < 0.01 ) renorm = 1.0; diff --git a/include/vec3.hh b/include/vec3.hh index 4e72d81..3d1fe44 100644 --- a/include/vec3.hh +++ b/include/vec3.hh @@ -1,5 +1,5 @@ /*******************************************************************\ - vec3.hh - This file is part of MUSIC2 - + vec3_t.hh - This file is part of MUSIC2 - a code to generate initial conditions for cosmological simulations CHANGELOG (only majors, for details see repo): @@ -9,7 +9,7 @@ //! implements a simple class of 3-vectors of arbitrary scalar type template< typename T > -class vec3{ +class vec3_t{ private: //! holds the data std::array data_; @@ -19,27 +19,27 @@ public: T &x,&y,&z; //! empty constructor - vec3() + vec3_t() : x(data_[0]),y(data_[1]),z(data_[2]){} //! copy constructor - vec3( const vec3 &v) + vec3_t( const vec3_t &v) : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){} //! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference - vec3( vec3& v) + vec3_t( vec3_t& v) : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){} //! move constructor - vec3( vec3 &&v) + vec3_t( vec3_t &&v) : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){} - //! construct vec3 from initializer list + //! construct vec3_t from initializer list template - vec3(E&&...e) + vec3_t(E&&...e) : data_{{std::forward(e)...}}, x{data_[0]}, y{data_[1]}, z{data_[2]} {} - // vec3( T a, T b, T c ) + // vec3_t( T a, T b, T c ) // : data_{{a,b,c}}, x(data_[0]), y(data_[1]), z(data_[2]){} //! bracket index access to vector components @@ -49,37 +49,37 @@ public: const T &operator[](size_t i) const noexcept { return data_[i]; } // assignment operator - vec3& operator=( const vec3& v ) noexcept { data_=v.data_; return *this; } + vec3_t& operator=( const vec3_t& v ) noexcept { data_=v.data_; return *this; } - //! implementation of summation of vec3 - vec3 operator+( const vec3& v ) const noexcept{ return vec3({x+v.x,y+v.y,z+v.z}); } + //! implementation of summation of vec3_t + vec3_t operator+( const vec3_t& v ) const noexcept{ return vec3_t({x+v.x,y+v.y,z+v.z}); } - //! implementation of difference of vec3 - vec3 operator-( const vec3& v ) const noexcept{ return vec3({x-v.x,y-v.y,z-v.z}); } + //! implementation of difference of vec3_t + vec3_t operator-( const vec3_t& v ) const noexcept{ return vec3_t({x-v.x,y-v.y,z-v.z}); } //! implementation of unary negative - vec3 operator-() const noexcept{ return vec3({-x,-y,-z}); } + vec3_t operator-() const noexcept{ return vec3_t({-x,-y,-z}); } //! implementation of scalar multiplication - vec3 operator*( T s ) const noexcept{ return vec3({x*s,y*s,z*s}); } + vec3_t operator*( T s ) const noexcept{ return vec3_t({x*s,y*s,z*s}); } //! implementation of scalar division - vec3 operator/( T s ) const noexcept{ return vec3({x/s,y/s,z/s}); } + vec3_t operator/( T s ) const noexcept{ return vec3_t({x/s,y/s,z/s}); } //! implementation of += operator - vec3& operator+=( const vec3& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; } + vec3_t& operator+=( const vec3_t& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; } //! implementation of -= operator - vec3& operator-=( const vec3& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; } + vec3_t& operator-=( const vec3_t& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; } //! multiply with scalar - vec3& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; } + vec3_t& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; } //! divide by scalar - vec3& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; } + vec3_t& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; } //! compute dot product with another vector - T dot(const vec3 &a) const noexcept + T dot(const vec3_t &a) const noexcept { return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2]; } @@ -91,19 +91,19 @@ public: T norm(void) const noexcept { return std::sqrt( this->norm_squared() ); } //! wrap absolute vector to box of size p - vec3& wrap_abs( T p = 1.0 ) noexcept{ + vec3_t& wrap_abs( T p = 1.0 ) noexcept{ for( auto& x : data_ ) x = std::fmod( 2*p + x, p ); return *this; } //! wrap relative vector to box of size p - vec3& wrap_rel( T p = 1.0 ) noexcept{ + vec3_t& wrap_rel( T p = 1.0 ) noexcept{ for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x; return *this; } - //! ordering, allows 3d sorting of vec3s - bool operator<( const vec3& o ) const noexcept{ + //! ordering, allows 3d sorting of vec3_ts + bool operator<( const vec3_t& o ) const noexcept{ if( x!=o.x ) return x -vec3 operator*( T s, const vec3& v ){ - return vec3({v.x*s,v.y*s,v.z*s}); +vec3_t operator*( T s, const vec3_t& v ){ + return vec3_t({v.x*s,v.y*s,v.z*s}); } diff --git a/src/grid_fft.cc b/src/grid_fft.cc index 5ae6b24..a1b1912 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -860,7 +860,7 @@ void Grid_FFT::Compute_PowerSpectrum(std::vector &b for (size_t iy = 0; iy < size(1); iy++) for (size_t iz = 0; iz < size(2); iz++) { - vec3 k3 = get_k(ix, iy, iz); + vec3_t k3 = get_k(ix, iy, iz); double k = k3.norm(); int idx2 = k / dk; //int((1.0f / dklog * std::log10(k / kmin))); auto z = this->kelem(ix, iy, iz); diff --git a/src/ic_generator.cc b/src/ic_generator.cc index ce86444..708e12b 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -320,7 +320,7 @@ int Run( ConfigFile& the_config ) if (bAddExternalTides) { - phi2.assign_function_of_grids_kdep([&](vec3 kvec, ccomplex_t pphi, ccomplex_t pphi2) { + phi2.assign_function_of_grids_kdep([&](vec3_t kvec, ccomplex_t pphi, ccomplex_t pphi2) { // sign in front of f_aniso is reversed since phi1 = -phi return pphi2 + f_aniso * (kvec[0] * kvec[0] * lss_aniso_lambda[0] + kvec[1] * kvec[1] * lss_aniso_lambda[1] + kvec[2] * kvec[2] * lss_aniso_lambda[2]) * pphi; }, @@ -569,7 +569,7 @@ int Run( ConfigFile& the_config ) + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) ); if( bDoBaryons ){ - vec3 kvec = phi.get_k(i,j,k); + vec3_t kvec = phi.get_k(i,j,k); real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) : (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : @@ -618,7 +618,7 @@ int Run( ConfigFile& the_config ) + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) ); if( bDoBaryons ){ - vec3 kvec = phi.get_k(i,j,k); + vec3_t kvec = phi.get_k(i,j,k); real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, vcdm0) : (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index e6e2c00..079b633 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -223,13 +223,14 @@ public: gsl_spline *splineT = nullptr; gsl_interp_accel *accT = nullptr; switch(type){ + // values at ztarget: case total: splineT = gsl_sp_dtot_; accT = gsl_ia_dtot_; break; case cdm: splineT = gsl_sp_dc_; accT = gsl_ia_dc_; break; case baryon: splineT = gsl_sp_db_; accT = gsl_ia_db_; break; case vtotal: splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break; case vcdm: splineT = gsl_sp_tc_; accT = gsl_ia_tc_; break; case vbaryon: splineT = gsl_sp_tb_; accT = gsl_ia_tb_; break; - + // values at zstart: case total0: splineT = gsl_sp_dtot0_;accT = gsl_ia_dtot0_;break; case cdm0: splineT = gsl_sp_dc0_; accT = gsl_ia_dc0_; break; case baryon0: splineT = gsl_sp_db0_; accT = gsl_ia_db0_; break; From 8423161d6b1714905642e1b5ac69e5c5ee2f2bca Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 29 Mar 2020 14:46:25 +0200 Subject: [PATCH 081/130] added new vector type for vectorized operations --- include/vec.hh | 144 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 include/vec.hh diff --git a/include/vec.hh b/include/vec.hh new file mode 100644 index 0000000..dd914b0 --- /dev/null +++ b/include/vec.hh @@ -0,0 +1,144 @@ +#pragma once +/*******************************************************************************\ + vec.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************************/ + +#include + +//! implements general N-dim vectors of arbitrary primtive type with some arithmetic ops +template +struct vec_t +{ + std::array data_; + + vec_t() {} + + vec_t(const vec_t &v) + : data_(v.data_) {} + + vec_t(vec_t &&v) + : data_(std::move(v.data_)) {} + + template + vec_t(E... e) + : data_{{std::forward(e)...}} + { + static_assert(sizeof...(E) == N, "Brace-enclosed initialiser list doesn't match vec_t length!"); + } + + //! bracket index access to vector components + T &operator[](size_t i) noexcept { return data_[i]; } + + //! const bracket index access to vector components + const T &operator[](size_t i) const noexcept { return data_[i]; } + + // assignment operator + vec_t &operator=(const vec_t &v) noexcept + { + data_ = v.data_; + return *this; + } + + //! implementation of summation of vec_t + vec_t operator+(const vec_t &v) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = data_[i] + v[i]; + return res; + } + + //! implementation of difference of vec_t + vec_t operator-(const vec_t &v) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = data_[i] - v[i]; + return res; + } + + //! implementation of unary negative + vec_t operator-() const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = -data_[i]; + return res; + } + + //! implementation of scalar multiplication + template + vec_t operator*(T2 s) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = data_[i] * s; + return res; + } + + //! implementation of scalar division + vec_t operator/(T s) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = data_[i] / s; + return res; + } + + //! takes the absolute value of each element + vec_t abs(void) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = std::abs(data_[i]); + return res; + } + + //! implementation of implicit summation of vec_t + vec_t &operator+=(const vec_t &v) noexcept + { + for (int i = 0; i < N; ++i) + data_[i] += v[i]; + return *this; + } + + //! implementation of implicit subtraction of vec_t + vec_t &operator-=(const vec_t &v) noexcept + { + for (int i = 0; i < N; ++i) + data_[i] -= v[i]; + return *this; + } + + //! implementation of implicit scalar multiplication of vec_t + vec_t &operator*=(T s) noexcept + { + for (int i = 0; i < N; ++i) + data_[i] *= s; + return *this; + } + + //! implementation of implicit scalar division of vec_t + vec_t &operator/=(T s) noexcept + { + for (int i = 0; i < N; ++i) + data_[i] /= s; + return *this; + } + + size_t size(void) const noexcept { return N; } +}; + +//! multiplication with scalar +template +inline vec_t operator*(T2 s, const vec_t &v) +{ + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = v[i] * s; + return res; +} From 23edbd63a93ce7e128388bba46557330f064e747 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 29 Mar 2020 14:46:53 +0200 Subject: [PATCH 082/130] added custom ODE integration module --- include/ode_integrate.hh | 103 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 include/ode_integrate.hh diff --git a/include/ode_integrate.hh b/include/ode_integrate.hh new file mode 100644 index 0000000..3858b85 --- /dev/null +++ b/include/ode_integrate.hh @@ -0,0 +1,103 @@ +#pragma once +/*******************************************************************************\ + odetools.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************************/ + +namespace ode_integrate +{ + +// simple Runge-Kutta 4th order step without error estimate +template +inline void rk4_step(double h, double &t, vector_t &y, function_t f) +{ + vector_t k1(h * f(t, y)); + vector_t k2(h * f(t + h / 2, y + k1 / 2)); + vector_t k3(h * f(t + h / 2, y + k2 / 2)); + vector_t k4(h * f(t + h, y + k3)); + y += (k1 + 2 * k2 + 2 * k3 + k4) / 6; + t += h; +} + +// Cash-Karp modified Runge-Kutta scheme, 5th order with 4th order error estimate +// see Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration" +// in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060 +template +inline vector_t ckrk5_step(double h, double &t, vector_t &y, function_t f) +{ + static constexpr double + a2 = 0.20, + a3 = 0.30, a4 = 0.60, a5 = 1.0, a6 = 0.8750, + b21 = 0.20, + b31 = 3.0 / 40.0, b32 = 9.0 / 40.0, + b41 = 0.30, b42 = -0.90, b43 = 1.20, + b51 = -11.0 / 54.0, b52 = 2.50, b53 = -70.0 / 27.0, b54 = 35.0 / 27.0, + b61 = 1631.0 / 55296.0, b62 = 175.0 / 512.0, b63 = 575.0 / 13824.0, b64 = 44275.0 / 110592.0, b65 = 253.0 / 4096.0, + c1 = 37.0 / 378.0, c3 = 250.0 / 621.0, c4 = 125.0 / 594.0, c6 = 512.0 / 1771.0, + dc1 = c1 - 2825.0 / 27648.0, dc3 = c3 - 18575.0 / 48384.0, + dc4 = c4 - 13525.0 / 55296.0, dc5 = -277.0 / 14336.0, dc6 = c6 - 0.250; + + vector_t k1(h * f(t, y)); + vector_t k2(h * f(t + a2 * h, y + b21 * k1)); + vector_t k3(h * f(t + a3 * h, y + b31 * k1 + b32 * k2)); + vector_t k4(h * f(t + a4 * h, y + b41 * k1 + b42 * k2 + b43 * k3)); + vector_t k5(h * f(t + a5 * h, y + b51 * k1 + b52 * k2 + b53 * k3 + b54 * k4)); + vector_t k6(h * f(t + a6 * h, y + b61 * k1 + b62 * k2 + b63 * k3 + b64 * k4 + b65 * k5)); + + y += c1 * k1 + c3 * k3 + c4 * k4 + c6 * k6; + + return dc1 * k1 + dc3 * k3 + dc4 * k4 + dc5 * k5 + dc6 * k6; +} + +// Adaptive step-size quality-controlled routine for ckrk5_step, see +// Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration" +// in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060 +template +inline void rk_step_qs(double htry, double &t, vector_t &y, vector_t &yscale, function_t f, double eps, double &hdid, double &hnext) +{ + static constexpr double SAFETY{0.9}; + static constexpr double PSHRNK{-0.25}; + static constexpr double PGROW{-0.2}; + static constexpr double ERRCON{1.89e-4}; + + auto h(htry); + vector_t ytemp(y); + vector_t yerr; + double errmax; + +do_ckrk5trialstep: + yerr = ckrk5_step(h, t, ytemp, f); + errmax = 0.0; + for (size_t i = 0; i < yerr.size(); ++i) + { + errmax = std::max(errmax, std::abs(yerr[i] / yscale[i])); + } + errmax = errmax / eps; + if (errmax > 1.0) + { + h *= std::max(0.1, SAFETY*std::pow(errmax, PSHRNK)); + if (t + h == t) + { + std::cerr << "stepsize underflow in rkqs" << std::endl; + abort(); + } + goto do_ckrk5trialstep; + } + else + { + if( errmax > ERRCON ){ + hnext = h * SAFETY * std::pow(errmax, PGROW); + }else{ + hnext = 5*h; + } + hdid = h; + t += h; + y = ytemp; + } +} + + +} // namespace ode_integrate \ No newline at end of file From 0d3a17b253efdd3c5c3934572f26f3e787cbb385 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 29 Mar 2020 14:49:17 +0200 Subject: [PATCH 083/130] rewrote cosmology parameters and background evolution with full integration of 2nd order ODE --- include/cosmology_calculator.hh | 274 +++++++++++++++++++------------- include/cosmology_parameters.hh | 65 ++++++-- include/ic_generator.hh | 2 +- include/testing.hh | 2 +- src/ic_generator.cc | 12 +- src/testing.cc | 4 +- 6 files changed, 226 insertions(+), 133 deletions(-) diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index ba2d8ff..cb07a04 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -1,25 +1,44 @@ #pragma once #include +#include #include +#include #include +#include #include #include +#include #include +namespace cosmology +{ + /*! - * @class CosmologyCalculator + * @class cosmology::calculator * @brief provides functions to compute cosmological quantities * * This class provides member functions to compute cosmological quantities * related to the Friedmann equations and linear perturbation theory */ -class CosmologyCalculator +class calculator { +public: + //! data structure to store cosmological parameters + cosmology::parameters cosmo_param_; + + //! pointer to an instance of a transfer function plugin + //TransferFunction_plugin *ptransfer_fun_; + std::unique_ptr transfer_function_; + private: static constexpr double REL_PRECISION = 1e-5; + std::vector tab_a_, tab_D_, tab_f_; + gsl_interp_accel *gsl_ia_a_, *gsl_ia_D_, *gsl_ia_f_; + gsl_spline *gsl_sp_a_, *gsl_sp_D_, *gsl_sp_f_; + double Dnow_; real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const { @@ -44,58 +63,136 @@ private: return (real_t)result; } + void compute_growth(void) + { + using v_t = vec_t<3, double>; + + // set ICs + const double a0 = 1e-10; + const double D0 = a0; + const double Dprime0 = 2.0 * D0 * H_of_a(a0) / std::pow(phys_const::c_SI, 2); + const double t0 = 1.0 / (a0 * H_of_a(a0)); + + v_t y0({a0, D0, Dprime0}); + + // set up integration + double dt = 1e-9; + double dtdid, dtnext; + const double amax = 2.0; + + v_t yy(y0); + double t = t0; + const double eps = 1e-10; + + while (yy[0] < amax) + { + // RHS of ODEs + auto rhs = [&](double t, v_t y) -> v_t { + auto a = y[0]; + auto D = y[1]; + auto Dprime = y[2]; + v_t dy; + // da/dtau = a^2 H(a) + dy[0] = a * a * H_of_a(a); + // d D/dtau + dy[1] = Dprime; + // d^2 D / dtau^2 + dy[2] = -a * H_of_a(a) * Dprime + 3.0 / 2.0 * cosmo_param_.Omega_m * std::pow(cosmo_param_.H0, 2) * D / a; + return dy; + }; + + // scale by predicted value to get approx. constant fractional errors + v_t yyscale = yy.abs() + dt * rhs(t, yy).abs(); + + // call integrator + ode_integrate::rk_step_qs(dt, t, yy, yyscale, rhs, eps, dtdid, dtnext); + + tab_a_.push_back(yy[0]); + tab_D_.push_back(yy[1]); + tab_f_.push_back(yy[2]); + + dt = dtnext; + } + + // compute f, before we stored here D' + for (size_t i = 0; i < tab_a_.size(); ++i) + { + tab_f_[i] = std::log(tab_f_[i] / (tab_a_[i] * H_of_a(tab_a_[i]) * tab_D_[i])); + tab_D_[i] = std::log(tab_D_[i]); + tab_a_[i] = std::log(tab_a_[i]); + } + + gsl_ia_D_ = gsl_interp_accel_alloc(); + gsl_ia_f_ = gsl_interp_accel_alloc(); + + gsl_sp_D_ = gsl_spline_alloc(gsl_interp_cspline, tab_a_.size()); + gsl_sp_f_ = gsl_spline_alloc(gsl_interp_cspline, tab_a_.size()); + + gsl_spline_init(gsl_sp_D_, &tab_a_[0], &tab_D_[0], tab_a_.size()); + gsl_spline_init(gsl_sp_f_, &tab_a_[0], &tab_f_[0], tab_a_.size()); + + Dnow_ = std::exp(gsl_spline_eval(gsl_sp_D_, 0.0, gsl_ia_D_)); + } + public: - //! data structure to store cosmological parameters - CosmologyParameters cosmo_param_; - - //! pointer to an instance of a transfer function plugin - //TransferFunction_plugin *ptransfer_fun_; - std::unique_ptr transfer_function_; - - //! constructor for a cosmology calculator object /*! * @param acosmo a cosmological parameters structure * @param pTransferFunction pointer to an instance of a transfer function object */ - explicit CosmologyCalculator(ConfigFile &cf) - : cosmo_param_(cf) - { + explicit calculator(ConfigFile &cf) + : cosmo_param_(cf) + { transfer_function_ = std::move(select_TransferFunction_plugin(cf)); transfer_function_->intialise(); cosmo_param_.pnorm = this->ComputePNorm(); cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm); - csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" << " : " << (transfer_function_->tf_is_distinct()? "yes" : "no") << std::endl; - csoca::ilog << std::setw(32) << std::left << "TF maximum wave number" << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl; + csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" + << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl; + csoca::ilog << std::setw(32) << std::left << "TF maximum wave number" + << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl; + + // pre-compute growth factors and store for interpolation + this->compute_growth(); + } + + ~calculator() + { + gsl_spline_free(gsl_sp_D_); + gsl_spline_free(gsl_sp_f_); + gsl_interp_accel_free(gsl_ia_D_); + gsl_interp_accel_free(gsl_ia_f_); } //! Write out a correctly scaled power spectrum at time a - void WritePowerspectrum( real_t a, std::string fname ) const + void write_powerspectrum(real_t a, std::string fname) const { - const real_t Dplus0 = this->CalcGrowthFactor(a) / this->CalcGrowthFactor(1.0); + const real_t Dplus0 = this->get_growth_factor(a); - if( CONFIG::MPI_task_rank==0 ) + if (CONFIG::MPI_task_rank == 0) { - double kmin = std::max(1e-4,transfer_function_->get_kmin()); + double kmin = std::max(1e-4, transfer_function_->get_kmin()); // write power spectrum to a file std::ofstream ofs(fname.c_str()); - std::stringstream ss; ss << " ,a=" << a <<""; + std::stringstream ss; + ss << " ,a=" << a << ""; ofs << "# " << std::setw(18) << "k [h/Mpc]" - << std::setw(20) << ("P_dtot(k"+ss.str()+"|BS)") - << std::setw(20) << ("P_dcdm(k"+ss.str()+"|BS)") - << std::setw(20) << ("P_dbar(k"+ss.str()+"|BS)") - << std::setw(20) << ("P_tcdm(k"+ss.str()+"|BS)") - << std::setw(20) << ("P_tbar(k"+ss.str()+"|BS)") - << std::setw(20) << ("P_dtot(k"+ss.str()+")") - << std::setw(20) << ("P_dcdm(k"+ss.str()+")") - << std::setw(20) << ("P_dbar(k"+ss.str()+")") - << std::setw(20) << ("P_tcdm(k"+ss.str()+")") - << std::setw(20) << ("P_tbar(k"+ss.str()+")") - << std::setw(20) << ("P_dtot(K,a=1)") - << std::endl; - for( double k=kmin; kget_kmax(); k*=1.05 ){ + << std::setw(20) << ("P_dtot(k" + ss.str() + "|BS)") + << std::setw(20) << ("P_dcdm(k" + ss.str() + "|BS)") + << std::setw(20) << ("P_dbar(k" + ss.str() + "|BS)") + << std::setw(20) << ("P_tcdm(k" + ss.str() + "|BS)") + << std::setw(20) << ("P_tbar(k" + ss.str() + "|BS)") + << std::setw(20) << ("P_dtot(k" + ss.str() + ")") + << std::setw(20) << ("P_dcdm(k" + ss.str() + ")") + << std::setw(20) << ("P_dbar(k" + ss.str() + ")") + << std::setw(20) << ("P_tcdm(k" + ss.str() + ")") + << std::setw(20) << ("P_tbar(k" + ss.str() + ")") + << std::setw(20) << ("P_dtot(K,a=1)") + << std::endl; + for (double k = kmin; k < transfer_function_->get_kmax(); k *= 1.05) + { ofs << std::setw(20) << std::setprecision(10) << k << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total) * Dplus0, 2.0) << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm) * Dplus0, 2.0) @@ -111,11 +208,10 @@ public: << std::endl; } } - csoca::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl; } - const CosmologyParameters &GetParams(void) const + const cosmology::parameters &GetParams(void) const { return cosmo_param_; } @@ -128,90 +224,44 @@ public: */ inline real_t Power(real_t k, real_t a) { - real_t Dplus = CalcGrowthFactor(a); - real_t DplusOne = CalcGrowthFactor(1.0); + real_t Dplus = this->get_growth_factor(a); real_t pNorm = ComputePNorm(); - Dplus /= DplusOne; - DplusOne = 1.0; - real_t scale = Dplus / DplusOne; - return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect); + return pNorm * Dplus * Dplus * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect); } - inline static double H_of_a(double a, const void *Params) + //! return the value of the Hubble function H(a) = dloga/dt + inline double H_of_a(double a) const { - const CosmologyParameters *cosm = (CosmologyParameters *)Params; - double a2 = a * a; - double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a)); - return Ha; + double HH2 = 0.0; + HH2 += cosmo_param_.Omega_r / (a * a * a * a); + HH2 += cosmo_param_.Omega_m / (a * a * a); + HH2 += cosmo_param_.Omega_k / (a * a); + HH2 += cosmo_param_.Omega_DE * std::pow(a, -3. * (1. + cosmo_param_.w_0 + cosmo_param_.w_a)) * exp(-3. * (1.0 - a) * cosmo_param_.w_a); + return cosmo_param_.H0 * std::sqrt(HH2); } - inline double H_of_a( double a ) const + //! Computes the linear theory growth factor D+, normalised to D+(a=1)=1 + real_t get_growth_factor(real_t a) const { - return 100.0 * this->H_of_a(a,reinterpret_cast(&this->cosmo_param_)); + return std::exp(gsl_spline_eval(gsl_sp_D_, std::log(a), gsl_ia_D_)) / Dnow_; } - inline static double Hprime_of_a(double a, void *Params) + //! Computes the linear theory growth rate f + /*! Function computes (by interpolating on precalculated table) + * f = dlog D+ / dlog a + */ + real_t get_f(real_t a) const { - CosmologyParameters *cosm = (CosmologyParameters *)Params; - double a2 = a * a; - double H = H_of_a(a, Params); - double Hprime = 1 / (a * H) * (-1.5 * cosm->Omega_m / (a2 * a) - cosm->Omega_k / a2 - 1.5 * cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a) * (1. + cosm->w_0 + (1. - a) * cosm->w_a)); - return Hprime; - } - - //! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+ - inline static double GrowthIntegrand(double a, void *Params) - { - double Ha = a * H_of_a(a, Params); - return 2.5 / (Ha * Ha * Ha); - } - - //! integrand function for Calc_fPeebles - /*! - * @sa Calc_fPeebles - */ - inline static double fIntegrand( double a, void *Params ) - { - CosmologyParameters *cosm = (CosmologyParameters *)Params; - double y = cosm->Omega_m*(1.0/a-1.0) + cosm->Omega_DE*(a*a-1.0) + 1.0; - return 1.0/pow(y,1.5); - } - - //! calculates d log D+/d log a - /*! this version follows the Peebles (TBD: add citation) - * formula to compute Bertschinger's vfact - */ - inline real_t CalcGrowthRate( real_t a ) - { - return CalcVFact(a) / H_of_a(a) / a; - } - - //! Computes the linear theory growth factor D+ - /*! Function integrates over member function GrowthIntegrand and computes - * /a - * D+(a) = 5/2 H(a) * | [a'^3 * H(a')^3]^(-1) da' - * /0 - */ - real_t CalcGrowthFactor(real_t a) const - { - real_t integral = integrate(&GrowthIntegrand, 0.0, a, (void *)&cosmo_param_); - return H_of_a(a, (void *)&cosmo_param_) * integral; + return std::exp(gsl_spline_eval(gsl_sp_f_, std::log(a), gsl_ia_f_)); } //! Compute the factor relating particle displacement and velocity /*! Function computes - * - * vfac = a^2 * H(a) * dlogD+ / d log a = a^2 * H'(a) + 5/2 * [ a * D+(a) * H(a) ]^(-1) - * - */ - real_t CalcVFact(real_t a) const + * vfac = a * (H(a)/h) * dlogD+ / dlog a + */ + real_t get_vfact(real_t a) const { - real_t Dp = CalcGrowthFactor(a); - real_t H = H_of_a(a, (void *)&cosmo_param_); - real_t Hp = Hprime_of_a(a, (void *)&cosmo_param_); - real_t a2 = a * a; - - return (a2 * Hp + 2.5 / (a * Dp * H)) * 100.0; + return a * H_of_a(a) / cosmo_param_.h * this->get_f(a); } //! Integrand for the sigma_8 normalization of the power spectrum @@ -222,8 +272,8 @@ public: if (k <= 0.0) return 0.0f; - CosmologyCalculator *pcc = reinterpret_cast(pParams); - + cosmology::calculator *pcc = reinterpret_cast(pParams); + double x = k * 8.0; double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x); static double nspect = (double)pcc->cosmo_param_.nspect; @@ -241,8 +291,8 @@ public: if (k <= 0.0) return 0.0f; - CosmologyCalculator *pcc = reinterpret_cast(pParams); - + cosmology::calculator *pcc = reinterpret_cast(pParams); + double x = k * 8.0; double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x); static double nspect = (double)pcc->cosmo_param_.nspect; @@ -286,9 +336,9 @@ public: kmin = transfer_function_->get_kmin(); if (!transfer_function_->tf_has_total0()) - sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this ); + sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this); else - sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this ); + sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this); return cosmo_param_.sigma8 * cosmo_param_.sigma8 / sigma0; } @@ -305,4 +355,6 @@ inline double jeans_sound_speed(double rho, double mass) { const double G = 6.67e-8; return pow(6.0 * mass / M_PI * sqrt(rho) * pow(G, 1.5), 1.0 / 3.0); -} \ No newline at end of file +} + +} // namespace cosmology \ No newline at end of file diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh index 0c4efcd..228b20f 100644 --- a/include/cosmology_parameters.hh +++ b/include/cosmology_parameters.hh @@ -1,10 +1,21 @@ #pragma once +/*******************************************************************************\ + cosmology_parameters.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************************/ +#include #include -//! structure for cosmological parameters -struct CosmologyParameters +namespace cosmology { +//! structure for cosmological parameters +struct parameters +{ + double Omega_m, //!< baryon+dark matter density Omega_b, //!< baryon matter density @@ -12,38 +23,68 @@ struct CosmologyParameters Omega_r, //!< photon + relativistic particle density Omega_k, //!< curvature density H0, //!< Hubble constant in km/s/Mpc + h, //!< hubble parameter nspect, //!< long-wave spectral index (scale free is nspect=1) sigma8, //!< power spectrum normalization + Tcmb, //!< CMB temperature (used to set Omega_r) + Neff, //!< effective number of neutrino species (used to set Omega_r) w_0, //!< dark energy equation of state parameter 1: w = w0 + a * wa w_a, //!< dark energy equation of state parameter 2: w = w0 + a * wa // below are helpers to store additional information - dplus, //!< linear perturbation growth factor - pnorm, //!< actual power spectrum normalisation factor + dplus, //!< linear perturbation growth factor + f, //!< growth factor logarithmic derivative + pnorm, //!< actual power spectrum normalisation factor sqrtpnorm, //!< sqrt of power spectrum normalisation factor - vfact; //!< velocity<->displacement conversion factor in Zel'dovich approx. + vfact; //!< velocity<->displacement conversion factor in Zel'dovich approx. - explicit CosmologyParameters(ConfigFile cf) + explicit parameters(ConfigFile cf) { + H0 = cf.GetValue("cosmology", "H0"); + h = H0 / 100.0; + + nspect = cf.GetValue("cosmology", "nspec"); + Omega_b = cf.GetValue("cosmology", "Omega_b"); + Omega_m = cf.GetValue("cosmology", "Omega_m"); + Omega_DE = cf.GetValue("cosmology", "Omega_L"); + w_0 = cf.GetValueSafe("cosmology", "w0", -1.0); + w_a = cf.GetValueSafe("cosmology", "wa", 0.0); - Omega_r = cf.GetValueSafe("cosmology", "Omega_r", 0.0); // no longer default to nonzero (8.3e-5) - Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r; + Tcmb = cf.GetValueSafe("cosmology", "Tcmb", 2.725); + + Neff = cf.GetValueSafe("cosmology", "Neff", 3.04); - H0 = cf.GetValue("cosmology", "H0"); sigma8 = cf.GetValue("cosmology", "sigma_8"); - nspect = cf.GetValue("cosmology", "nspec"); + + // calculate energy density in ultrarelativistic species from Tcmb and Neff + double Omega_gamma = 4 * phys_const::sigma_SI / std::pow(phys_const::c_SI, 3) * std::pow(Tcmb, 4.0) / phys_const::rhocrit_h2_SI / (h * h); + double Omega_nu = Neff * Omega_gamma * 7. / 8. * std::pow(4. / 11., 4. / 3.); + + Omega_r = Omega_gamma + Omega_nu; + + if (cf.GetValueSafe("cosmology", "NoRadiation", false)) + { + Omega_r = 0.0; + } + else + { + csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally. Make sure your sim code supports this..." << std::endl; + } + + Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r; dplus = 0.0; pnorm = 0.0; vfact = 0.0; } - CosmologyParameters(void) + parameters(void) { } -}; \ No newline at end of file +}; +} // namespace cosmology \ No newline at end of file diff --git a/include/ic_generator.hh b/include/ic_generator.hh index 59471b0..2cf38f4 100644 --- a/include/ic_generator.hh +++ b/include/ic_generator.hh @@ -15,6 +15,6 @@ namespace ic_generator{ extern std::unique_ptr the_random_number_generator; extern std::unique_ptr the_output_plugin; - extern std::unique_ptr the_cosmo_calc; + extern std::unique_ptr the_cosmo_calc; } diff --git a/include/testing.hh b/include/testing.hh index e5d2a99..1683b09 100644 --- a/include/testing.hh +++ b/include/testing.hh @@ -35,7 +35,7 @@ namespace testing{ void output_convergence( ConfigFile &the_config, - CosmologyCalculator* the_cosmo_calc, + cosmology::calculator* the_cosmo_calc, std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 708e12b..49024df 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -22,13 +22,13 @@ namespace ic_generator{ std::unique_ptr the_random_number_generator; std::unique_ptr the_output_plugin; -std::unique_ptr the_cosmo_calc; +std::unique_ptr the_cosmo_calc; int Initialise( ConfigFile& the_config ) { the_random_number_generator = std::move(select_RNG_plugin(the_config)); the_output_plugin = std::move(select_output_plugin(the_config)); - the_cosmo_calc = std::make_unique(the_config); + the_cosmo_calc = std::make_unique(the_config); return 0; } @@ -120,7 +120,7 @@ int Run( ConfigFile& the_config ) const real_t astart = 1.0/(1.0+zstart); const real_t volfac(std::pow(boxlen / ngrid / 2.0 / M_PI, 1.5)); - the_cosmo_calc->WritePowerspectrum(astart, "input_powerspec.txt" ); + the_cosmo_calc->write_powerspectrum(astart, "input_powerspec.txt" ); //csoca::ilog << "-----------------------------------------------------------------------------" << std::endl; @@ -132,8 +132,8 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- // Compute LPT time coefficients //-------------------------------------------------------------------- - const real_t Dplus0 = the_cosmo_calc->CalcGrowthFactor(astart) / the_cosmo_calc->CalcGrowthFactor(1.0); - const real_t vfac = the_cosmo_calc->CalcVFact(astart); + const real_t Dplus0 = the_cosmo_calc->get_growth_factor(astart); + const real_t vfac = the_cosmo_calc->get_vfact(astart); const double g1 = -Dplus0; const double g2 = ((LPTorder>1)? -3.0/7.0*Dplus0*Dplus0 : 0.0); @@ -151,7 +151,7 @@ int Run( ConfigFile& the_config ) // coefficients needed for anisotropic external tides const double ai3 = std::pow(astart,-3); const double Omega_m_of_a = the_cosmo_calc->cosmo_param_.Omega_m * ai3 / (the_cosmo_calc->cosmo_param_.Omega_m * ai3 + the_cosmo_calc->cosmo_param_.Omega_DE); - const double f1 = the_cosmo_calc->CalcGrowthRate(astart); + const double f1 = the_cosmo_calc->get_f(astart); const double f_aniso = -4.0/3.0 * f1 * f1 / Omega_m_of_a; const std::array lss_aniso_alpha = { diff --git a/src/testing.cc b/src/testing.cc index e99fbb4..ff990e1 100644 --- a/src/testing.cc +++ b/src/testing.cc @@ -242,7 +242,7 @@ void output_velocity_displacement_symmetries( void output_convergence( ConfigFile &the_config, - CosmologyCalculator* the_cosmo_calc, + cosmology::calculator* the_cosmo_calc, std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, @@ -301,7 +301,7 @@ void output_convergence( } } nabla_vini_mn.FourierTransformBackward(); - nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->CalcGrowthFactor(1.0)); + nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->get_growth_factor(1.0)); // sum of squares #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { From 9932f38e3c24d8b03049de134b5df77e4bd0efdc Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 11:15:59 +0200 Subject: [PATCH 084/130] updated to class 2.8 submodule --- external/class | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/class b/external/class index 58e0adb..083efeb 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit 58e0adbb2cf845cd0766a26cecc1a153fa17d8b9 +Subproject commit 083efeb043fca85418c1ea02f062be111b970b28 From f5a9006299a54567146084de3912741e82096f7d Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 11:18:21 +0200 Subject: [PATCH 085/130] some refactoring, mostly just homogenizing function names --- include/convolution.hh | 2 +- include/general.hh | 136 ++++++++++++++++++++++++----------------- include/grid_fft.hh | 2 +- src/grid_fft.cc | 4 +- src/main.cc | 2 +- 5 files changed, 86 insertions(+), 60 deletions(-) diff --git a/include/convolution.hh b/include/convolution.hh index 2145445..a1fc1e3 100644 --- a/include/convolution.hh +++ b/include/convolution.hh @@ -333,7 +333,7 @@ public: crecvbuf_ = new ccomplex_t[maxslicesz_ / 2]; recvbuf_ = reinterpret_cast(&crecvbuf_[0]); - int ntasks(MPI_Get_size()); + int ntasks(MPI::get_size()); offsets_.assign(ntasks, 0); offsetsp_.assign(ntasks, 0); diff --git a/include/general.hh b/include/general.hh index b7f7df3..7334579 100644 --- a/include/general.hh +++ b/include/general.hh @@ -7,13 +7,15 @@ #if defined(USE_MPI) #include - #include +#include #else - #include +#include #endif #include +#define _unused(x) ((void)(x)) + #ifdef USE_SINGLEPRECISION using real_t = float; using complex_t = fftwf_complex; @@ -24,9 +26,23 @@ using complex_t = fftw_complex; #define FFTW_PREFIX fftw #endif -enum class fluid_component { density, vx, vy, vz, dx, dy, dz }; -enum class cosmo_species { dm, baryon, neutrino }; -extern std::map cosmo_species_name; +enum class fluid_component +{ + density, + vx, + vy, + vz, + dx, + dy, + dz +}; +enum class cosmo_species +{ + dm, + baryon, + neutrino +}; +extern std::map cosmo_species_name; using ccomplex_t = std::complex; @@ -47,52 +63,64 @@ using fftw_plan_t = FFTW_GEN_NAME(FFTW_PREFIX, plan); #if defined(USE_MPI) inline double get_wtime() { - return MPI_Wtime(); + return MPI_Wtime(); } -inline int MPI_Get_rank( void ){ - int rank, ret; - ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank); - assert( ret==MPI_SUCCESS ); - return rank; -} - -inline int MPI_Get_size( void ){ - int size, ret; - ret = MPI_Comm_size(MPI_COMM_WORLD, &size); - assert( ret==MPI_SUCCESS ); - return size; -} - -template -MPI_Datatype GetMPIDatatype( void ) +namespace MPI { - if( typeid(T) == typeid(std::complex) ) - return MPI_COMPLEX; - - if( typeid(T) == typeid(std::complex) ) - return MPI_DOUBLE_COMPLEX; - if( typeid(T) == typeid(int) ) +inline int get_rank(void) +{ + int rank, ret; + ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(ret == MPI_SUCCESS); + _unused(ret); + return rank; +} + +inline int get_size(void) +{ + int size, ret; + ret = MPI_Comm_size(MPI_COMM_WORLD, &size); + assert(ret == MPI_SUCCESS); + _unused(ret); + return size; +} + +template +inline MPI_Datatype get_datatype(void) +{ + if (typeid(T) == typeid(std::complex)) + return MPI_C_FLOAT_COMPLEX; + + if (typeid(T) == typeid(std::complex)) + return MPI_C_DOUBLE_COMPLEX; + + if (typeid(T) == typeid(std::complex)) + return MPI_C_LONG_DOUBLE_COMPLEX; + + if (typeid(T) == typeid(int)) return MPI_INT; - if( typeid(T) == typeid(unsigned) ) + if (typeid(T) == typeid(unsigned)) return MPI_UNSIGNED; - if( typeid(T) == typeid(float) ) + if (typeid(T) == typeid(float)) return MPI_FLOAT; - if( typeid(T) == typeid(double) ) + if (typeid(T) == typeid(double)) return MPI_DOUBLE; - if( typeid(T) == typeid(char) ) + if (typeid(T) == typeid(long double)) + return MPI_LONG_DOUBLE; + + if (typeid(T) == typeid(char)) return MPI_CHAR; abort(); - } -inline std::string GetMPIversion( void ) +inline std::string get_version(void) { int len; char mpi_lib_ver[MPI_MAX_LIBRARY_VERSION_STRING]; @@ -100,32 +128,31 @@ inline std::string GetMPIversion( void ) MPI_Get_library_version(mpi_lib_ver, &len); return std::string(mpi_lib_ver); } - +} // namespace MPI #else - #if defined(_OPENMP) - #include - inline double get_wtime() - { - return omp_get_wtime(); - } - #else - #include - inline double get_wtime() - { - return std::clock() / double(CLOCKS_PER_SEC); - } - #endif +#if defined(_OPENMP) +#include +inline double get_wtime() +{ + return omp_get_wtime(); +} +#else +#include +inline double get_wtime() +{ + return std::clock() / double(CLOCKS_PER_SEC); +} +#endif #endif -inline void multitask_sync_barrier( void ) +inline void multitask_sync_barrier(void) { #if defined(USE_MPI) - MPI_Barrier( MPI_COMM_WORLD ); + MPI_Barrier(MPI_COMM_WORLD); #endif } - namespace CONFIG { extern int MPI_thread_support; @@ -137,12 +164,11 @@ extern bool FFTW_threads_ok; extern int num_threads; } // namespace CONFIG - // These variables are autogenerated and compiled // into the library by the version.cmake script extern "C" { - extern const char* GIT_TAG; - extern const char* GIT_REV; - extern const char* GIT_BRANCH; + extern const char *GIT_TAG; + extern const char *GIT_REV; + extern const char *GIT_BRANCH; } \ No newline at end of file diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 2cf5557..f460297 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -788,7 +788,7 @@ public: #if defined(USE_MPI) data_t glob_sum = 0.0; MPI_Allreduce(reinterpret_cast(&sum), reinterpret_cast(&glob_sum), - 1, GetMPIDatatype(), MPI_SUM, MPI_COMM_WORLD); + 1, MPI::get_datatype(), MPI_SUM, MPI_COMM_WORLD); sum = glob_sum; #endif } diff --git a/src/grid_fft.cc b/src/grid_fft.cc index a1b1912..4905cb1 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -539,8 +539,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string int mpi_size, mpi_rank; - mpi_size = MPI_Get_size(); - mpi_rank = MPI_Get_rank(); + mpi_size = MPI::get_size(); + mpi_rank = MPI::get_rank(); if (!file_exists(fname) && mpi_rank == 0) create_hdf5(fname); diff --git a/src/main.cc b/src/main.cc index 12cde3a..d2b97aa 100644 --- a/src/main.cc +++ b/src/main.cc @@ -146,7 +146,7 @@ int main( int argc, char** argv ) // MPI related infos #if defined(USE_MPI) csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl; - csoca::dlog << std::setw(32) << std::left << "MPI version" << " : " << GetMPIversion() << std::endl; + csoca::dlog << std::setw(32) << std::left << "MPI version" << " : " << MPI::get_version() << std::endl; #else csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl; #endif From 4e013ec0d112c7a69d5ecafd8f0f219eb1f9688c Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 11:18:50 +0200 Subject: [PATCH 086/130] added global exception handler --- src/main.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/main.cc b/src/main.cc index d2b97aa..5afc648 100644 --- a/src/main.cc +++ b/src/main.cc @@ -28,6 +28,20 @@ int num_threads = 1; #include "system_stat.hh" +#include +#include + +void handle_eptr(std::exception_ptr eptr) // passing by value is ok +{ + try { + if (eptr) { + std::rethrow_exception(eptr); + } + } catch(const std::exception& e) { + csoca::elog << "This happened: \"" << e.what() << "\"" << std::endl; + } +} + int main( int argc, char** argv ) { csoca::Logger::SetLevel(csoca::LogLevel::Info); @@ -175,6 +189,7 @@ int main( int argc, char** argv ) { ic_generator::Initialise( the_config ); }catch(...){ + handle_eptr( std::current_exception() ); csoca::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl; #if defined(USE_MPI) MPI_Finalize(); From 809a03d59e7671182c680dedeb522e541d572d46 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 12:45:24 +0200 Subject: [PATCH 087/130] more parameters and verbosity in cosmo parameters --- include/cosmology_parameters.hh | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh index 228b20f..1af692c 100644 --- a/include/cosmology_parameters.hh +++ b/include/cosmology_parameters.hh @@ -55,19 +55,18 @@ struct parameters w_a = cf.GetValueSafe("cosmology", "wa", 0.0); - Tcmb = cf.GetValueSafe("cosmology", "Tcmb", 2.725); + Tcmb = cf.GetValueSafe("cosmology", "Tcmb", 2.7255); - Neff = cf.GetValueSafe("cosmology", "Neff", 3.04); + Neff = cf.GetValueSafe("cosmology", "Neff", 3.046); sigma8 = cf.GetValue("cosmology", "sigma_8"); // calculate energy density in ultrarelativistic species from Tcmb and Neff double Omega_gamma = 4 * phys_const::sigma_SI / std::pow(phys_const::c_SI, 3) * std::pow(Tcmb, 4.0) / phys_const::rhocrit_h2_SI / (h * h); double Omega_nu = Neff * Omega_gamma * 7. / 8. * std::pow(4. / 11., 4. / 3.); - Omega_r = Omega_gamma + Omega_nu; - if (cf.GetValueSafe("cosmology", "NoRadiation", false)) + if (cf.GetValueSafe("cosmology", "ZeroRadiation", false)) { Omega_r = 0.0; } @@ -75,8 +74,25 @@ struct parameters { csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally. Make sure your sim code supports this..." << std::endl; } - +#if 1 + // assume zero curvature, take difference from dark energy + Omega_DE += 1.0 - Omega_m - Omega_DE - Omega_r; +#else + // allow for curvature Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r; +#endif + + csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + csoca::ilog << "Cosmological parameters are: " << std::endl; + csoca::ilog << " H0 = " << std::setw(16) << H0 << "sigma_8 = " << std::setw(16) << sigma8 << std::endl; + csoca::ilog << " Omega_c = " << std::setw(16) << Omega_m-Omega_b << "Omega_b = " << std::setw(16) << Omega_b << std::endl; + if (!cf.GetValueSafe("cosmology", "ZeroRadiation", false)){ + csoca::ilog << " Omega_g = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl; + }else{ + csoca::ilog << " Omega_r = " << std::setw(16) << Omega_r << std::endl; + } + csoca::ilog << " Omega_DE = " << std::setw(16) << Omega_DE << "nspect = " << std::setw(16) << nspect << std::endl; + csoca::ilog << " w0 = " << std::setw(16) << w_0 << "w_a = " << std::setw(16) << w_a << std::endl; dplus = 0.0; pnorm = 0.0; From 4aba654797102c37ae33604860b857c51e30ff75 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 12:48:52 +0200 Subject: [PATCH 088/130] wrapped interpolation into its own class, also transfer functions can come pre-normalised now --- include/cosmology_calculator.hh | 179 +++++++++++++--------------- include/interpolate.hh | 65 ++++++++++ include/transfer_function_plugin.hh | 4 +- src/ic_generator.cc | 25 ++-- 4 files changed, 170 insertions(+), 103 deletions(-) create mode 100644 include/interpolate.hh diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index cb07a04..1708637 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -9,8 +9,10 @@ #include #include +#include + #include -#include +// #include #include namespace cosmology @@ -30,15 +32,12 @@ public: cosmology::parameters cosmo_param_; //! pointer to an instance of a transfer function plugin - //TransferFunction_plugin *ptransfer_fun_; std::unique_ptr transfer_function_; private: - static constexpr double REL_PRECISION = 1e-5; - std::vector tab_a_, tab_D_, tab_f_; - gsl_interp_accel *gsl_ia_a_, *gsl_ia_D_, *gsl_ia_f_; - gsl_spline *gsl_sp_a_, *gsl_sp_D_, *gsl_sp_f_; - double Dnow_; + static constexpr double REL_PRECISION = 1e-9; + interpolated_function_1d D_of_a_, f_of_a_, a_of_D_; + double Dnow_, astart_; real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const { @@ -63,7 +62,7 @@ private: return (real_t)result; } - void compute_growth(void) + void compute_growth( std::vector& tab_a, std::vector& tab_D, std::vector& tab_f ) { using v_t = vec_t<3, double>; @@ -107,31 +106,20 @@ private: // call integrator ode_integrate::rk_step_qs(dt, t, yy, yyscale, rhs, eps, dtdid, dtnext); - tab_a_.push_back(yy[0]); - tab_D_.push_back(yy[1]); - tab_f_.push_back(yy[2]); + tab_a.push_back(yy[0]); + tab_D.push_back(yy[1]); + tab_f.push_back(yy[2]); dt = dtnext; } // compute f, before we stored here D' - for (size_t i = 0; i < tab_a_.size(); ++i) + for (size_t i = 0; i < tab_a.size(); ++i) { - tab_f_[i] = std::log(tab_f_[i] / (tab_a_[i] * H_of_a(tab_a_[i]) * tab_D_[i])); - tab_D_[i] = std::log(tab_D_[i]); - tab_a_[i] = std::log(tab_a_[i]); + tab_f[i] = tab_f[i] / (tab_a[i] * H_of_a(tab_a[i]) * tab_D[i]); + tab_D[i] = tab_D[i]; + tab_a[i] = tab_a[i]; } - - gsl_ia_D_ = gsl_interp_accel_alloc(); - gsl_ia_f_ = gsl_interp_accel_alloc(); - - gsl_sp_D_ = gsl_spline_alloc(gsl_interp_cspline, tab_a_.size()); - gsl_sp_f_ = gsl_spline_alloc(gsl_interp_cspline, tab_a_.size()); - - gsl_spline_init(gsl_sp_D_, &tab_a_[0], &tab_D_[0], tab_a_.size()); - gsl_spline_init(gsl_sp_f_, &tab_a_[0], &tab_f_[0], tab_a_.size()); - - Dnow_ = std::exp(gsl_spline_eval(gsl_sp_D_, 0.0, gsl_ia_D_)); } public: @@ -142,33 +130,44 @@ public: */ explicit calculator(ConfigFile &cf) - : cosmo_param_(cf) + : cosmo_param_(cf), astart_( 1.0/(1.0+cf.GetValue("setup","zstart")) ) { + // pre-compute growth factors and store for interpolation + std::vector tab_a, tab_D, tab_f; + this->compute_growth(tab_a, tab_D, tab_f); + D_of_a_.set_data(tab_a,tab_D); + f_of_a_.set_data(tab_a,tab_f); + a_of_D_.set_data(tab_D,tab_a); + Dnow_ = D_of_a_(1.0); + + // set up transfer functions and compute normalisation transfer_function_ = std::move(select_TransferFunction_plugin(cf)); transfer_function_->intialise(); - cosmo_param_.pnorm = this->ComputePNorm(); + if( !transfer_function_->tf_isnormalised_ ) + cosmo_param_.pnorm = this->compute_pnorm_from_sigma8(); + else{ + cosmo_param_.pnorm = 1.0; + csoca::ilog << "Measured sigma8 for fixed PS normalisation is " << this->compute_sigma8() << std::endl; + } cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm); + csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl; csoca::ilog << std::setw(32) << std::left << "TF maximum wave number" << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl; - // pre-compute growth factors and store for interpolation - this->compute_growth(); + // csoca::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.GetValue("setup","zstart")) ) << std::endl; + // csoca::ilog << "pnrom = " << cosmo_param_.pnorm << std::endl; } ~calculator() { - gsl_spline_free(gsl_sp_D_); - gsl_spline_free(gsl_sp_f_); - gsl_interp_accel_free(gsl_ia_D_); - gsl_interp_accel_free(gsl_ia_f_); } //! Write out a correctly scaled power spectrum at time a void write_powerspectrum(real_t a, std::string fname) const { - const real_t Dplus0 = this->get_growth_factor(a); + // const real_t Dplus0 = this->get_growth_factor(a); if (CONFIG::MPI_task_rank == 0) { @@ -177,60 +176,48 @@ public: // write power spectrum to a file std::ofstream ofs(fname.c_str()); std::stringstream ss; - ss << " ,a=" << a << ""; + ss << " ,ap=" << a << ""; ofs << "# " << std::setw(18) << "k [h/Mpc]" - << std::setw(20) << ("P_dtot(k" + ss.str() + "|BS)") - << std::setw(20) << ("P_dcdm(k" + ss.str() + "|BS)") - << std::setw(20) << ("P_dbar(k" + ss.str() + "|BS)") - << std::setw(20) << ("P_tcdm(k" + ss.str() + "|BS)") - << std::setw(20) << ("P_tbar(k" + ss.str() + "|BS)") - << std::setw(20) << ("P_dtot(k" + ss.str() + ")") - << std::setw(20) << ("P_dcdm(k" + ss.str() + ")") - << std::setw(20) << ("P_dbar(k" + ss.str() + ")") - << std::setw(20) << ("P_tcdm(k" + ss.str() + ")") - << std::setw(20) << ("P_tbar(k" + ss.str() + ")") + << std::setw(20) << ("P_dtot(k,a=ap)") + << std::setw(20) << ("P_dcdm(k,a=ap)") + << std::setw(20) << ("P_dbar(k,a=ap)") + << std::setw(20) << ("P_tcdm(k,a=ap)") + << std::setw(20) << ("P_tbar(k,a=ap)") + << std::setw(20) << ("P_dtot(k,a=1)") + << std::setw(20) << ("P_dcdm(k,a=1)") + << std::setw(20) << ("P_dbar(k,a=1)") + << std::setw(20) << ("P_tcdm(k,a=1)") + << std::setw(20) << ("P_tbar(k,a=1)") << std::setw(20) << ("P_dtot(K,a=1)") << std::endl; for (double k = kmin; k < transfer_function_->get_kmax(); k *= 1.05) { ofs << std::setw(20) << std::setprecision(10) << k - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total0), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm0), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon0), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm0), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon0), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vtotal), 2.0) << std::endl; + #warning Check whether output is at redshift that is indicated! } } csoca::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl; } - const cosmology::parameters &GetParams(void) const + const cosmology::parameters &get_parameters(void) const noexcept { return cosmo_param_; } - //! returns the amplitude of amplitude of the power spectrum - /*! - * @param k the wave number in h/Mpc - * @param a the expansion factor of the universe - * @returns power spectrum amplitude for wave number k at time a - */ - inline real_t Power(real_t k, real_t a) - { - real_t Dplus = this->get_growth_factor(a); - real_t pNorm = ComputePNorm(); - return pNorm * Dplus * Dplus * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect); - } - //! return the value of the Hubble function H(a) = dloga/dt - inline double H_of_a(double a) const + inline double H_of_a(double a) const noexcept { double HH2 = 0.0; HH2 += cosmo_param_.Omega_r / (a * a * a * a); @@ -241,25 +228,31 @@ public: } //! Computes the linear theory growth factor D+, normalised to D+(a=1)=1 - real_t get_growth_factor(real_t a) const + real_t get_growth_factor(real_t a) const noexcept { - return std::exp(gsl_spline_eval(gsl_sp_D_, std::log(a), gsl_ia_D_)) / Dnow_; + return D_of_a_(a) / Dnow_; + } + + //! Computes the inverse of get_growth_factor + real_t get_a( real_t Dplus ) const noexcept + { + return a_of_D_( Dplus * Dnow_ ); } //! Computes the linear theory growth rate f /*! Function computes (by interpolating on precalculated table) * f = dlog D+ / dlog a */ - real_t get_f(real_t a) const + real_t get_f(real_t a) const noexcept { - return std::exp(gsl_spline_eval(gsl_sp_f_, std::log(a), gsl_ia_f_)); + return f_of_a_(a); } //! Compute the factor relating particle displacement and velocity /*! Function computes * vfac = a * (H(a)/h) * dlogD+ / dlog a */ - real_t get_vfact(real_t a) const + real_t get_vfact(real_t a) const noexcept { return a * H_of_a(a) / cosmo_param_.h * this->get_f(a); } @@ -302,24 +295,12 @@ public: return k * k * w * w * pow((double)k, (double)nspect) * tf * tf; } - //! Computes the square of the transfer function - /*! Function evaluates the supplied transfer function ptransfer_fun_ - * and returns the square of its value at wave number k - * @param k wave number at which to evaluate the transfer function - */ - inline real_t TransferSq(real_t k) const - { - //.. parameter supplied transfer function - real_t tf1 = transfer_function_->compute(k, total); - return tf1 * tf1; - } - //! Computes the amplitude of a mode from the power spectrum /*! Function evaluates the supplied transfer function ptransfer_fun_ * and returns the amplitude of fluctuations at wave number k at z=0 * @param k wave number at which to evaluate */ - inline real_t GetAmplitude(real_t k, tf_type type) const + inline real_t get_amplitude(real_t k, tf_type type) const { return std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm; } @@ -329,7 +310,7 @@ public: * integrates the power spectrum to fix the normalization to that given * by the sigma_8 parameter */ - real_t ComputePNorm(void) + real_t compute_sigma8(void) { real_t sigma0, kmin, kmax; kmax = transfer_function_->get_kmax(); @@ -337,10 +318,22 @@ public: if (!transfer_function_->tf_has_total0()) sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this); - else + else{ sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this); + } - return cosmo_param_.sigma8 * cosmo_param_.sigma8 / sigma0; + return std::sqrt(sigma0); + } + + //! Computes the normalization for the power spectrum + /*! + * integrates the power spectrum to fix the normalization to that given + * by the sigma_8 parameter + */ + real_t compute_pnorm_from_sigma8(void) + { + auto measured_sigma8 = this->compute_sigma8(); + return cosmo_param_.sigma8 * cosmo_param_.sigma8 / (measured_sigma8 * measured_sigma8); } }; diff --git a/include/interpolate.hh b/include/interpolate.hh new file mode 100644 index 0000000..a2a5a88 --- /dev/null +++ b/include/interpolate.hh @@ -0,0 +1,65 @@ +#pragma once + +#include +#include +#include +#include + +template +class interpolated_function_1d +{ + +private: + std::vector data_x_, data_y_; + gsl_interp_accel *gsl_ia_; + gsl_spline *gsl_sp_; + bool isinit_; + + void deallocate() + { + gsl_spline_free(gsl_sp_); + gsl_interp_accel_free(gsl_ia_); + } + +public: + interpolated_function_1d(const interpolated_function_1d &i) = delete; + + interpolated_function_1d(){} + + interpolated_function_1d(const std::vector &data_x, const std::vector &data_y) + { + this->set_data( data_x, data_y ); + } + + ~interpolated_function_1d() + { + if (isinit_) this->deallocate(); + } + + void set_data(const std::vector &data_x, const std::vector &data_y) + { + assert(data_x_.size() == data_y_.size()); + assert(!(logx & periodic)); + + data_x_ = data_x; + data_y_ = data_y; + + if (logx) for (auto &d : data_x_) d = std::log(d); + if (logy) for (auto &d : data_y_) d = std::log(d); + + if (isinit_) this->deallocate(); + + gsl_ia_ = gsl_interp_accel_alloc(); + gsl_sp_ = gsl_spline_alloc(periodic ? gsl_interp_cspline_periodic : gsl_interp_cspline, data_x_.size()); + gsl_spline_init(gsl_sp_, &data_x_[0], &data_y_[0], data_x_.size()); + + isinit_ = true; + } + + double operator()(double x) const noexcept + { + double xa = logx ? std::log(x) : x; + double y(gsl_spline_eval(gsl_sp_, xa, gsl_ia_)); + return logy ? std::exp(y) : y; + } +}; \ No newline at end of file diff --git a/include/transfer_function_plugin.hh b/include/transfer_function_plugin.hh index 6a7fb6f..fd95250 100644 --- a/include/transfer_function_plugin.hh +++ b/include/transfer_function_plugin.hh @@ -30,10 +30,12 @@ class TransferFunction_plugin bool tf_withvel_; //!< bool if also have velocity transfer functions bool tf_withtotal0_; //!< have the z=0 spectrum for normalisation purposes bool tf_velunits_; //!< velocities are in velocity units (km/s) + bool tf_isnormalised_; //!< assume that transfer functions come already correctly normalised and need be re-normalised to a specified value + public: //! constructor TransferFunction_plugin(ConfigFile &cf) - : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false) + : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false), tf_isnormalised_(false) { } //! destructor diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 49024df..a8f60bd 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -296,7 +296,7 @@ int Run( ConfigFile& the_config ) phi.FourierTransformForward(false); phi.assign_function_of_grids_kdep([&](auto k, auto wn) { real_t kmod = k.norm(); - ccomplex_t delta = wn * the_cosmo_calc->GetAmplitude(kmod, total); + ccomplex_t delta = wn * the_cosmo_calc->get_amplitude(kmod, total); return -delta / (kmod * kmod); }, wnoise); @@ -571,10 +571,14 @@ int Run( ConfigFile& the_config ) if( bDoBaryons ){ vec3_t kvec = phi.get_k(i,j,k); real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); - double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, cdm0) : - (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, baryon0) : - the_cosmo_calc->GetAmplitude(kmod, total0)) - the_cosmo_calc->GetAmplitude(kmod, total0); - //the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1); + // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) : + // (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : + // // the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total); + // the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1); + + real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) + : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : + the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total)) * (-g1); tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen; } @@ -620,10 +624,13 @@ int Run( ConfigFile& the_config ) if( bDoBaryons ){ vec3_t kvec = phi.get_k(i,j,k); real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); - double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->GetAmplitude(kmod, vcdm0) : - (this_species == cosmo_species::baryon)? the_cosmo_calc->GetAmplitude(kmod, vbaryon0) : - the_cosmo_calc->GetAmplitude(kmod, vtotal0)) - the_cosmo_calc->GetAmplitude(kmod, vtotal0); - //the_cosmo_calc->GetAmplitude(kmod, total)*(-g1)) - the_cosmo_calc->GetAmplitude(kmod, total)*(-g1); + // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm0) : + // (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon0) : + // the_cosmo_calc->get_amplitude(kmod, vtotal0)) - the_cosmo_calc->get_amplitude(kmod, vtotal0); + // // the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1); + real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm) + : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon) : + the_cosmo_calc->get_amplitude(kmod, vtotal)) - the_cosmo_calc->get_amplitude(kmod, vtotal)) * (-g1); tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ; } From 705dcf7cf52c03bf8e4ed3d82f0c03572ff63ef3 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 12:49:15 +0200 Subject: [PATCH 089/130] fixed class cmake file --- external/class.cmake | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/external/class.cmake b/external/class.cmake index 0a3f3c2..aead59b 100644 --- a/external/class.cmake +++ b/external/class.cmake @@ -78,6 +78,7 @@ if(ENABLE_CLASS) ${CMAKE_CURRENT_LIST_DIR}/class/tools/parser.c ${CMAKE_CURRENT_LIST_DIR}/class/tools/quadrature.c ${CMAKE_CURRENT_LIST_DIR}/class/tools/hyperspherical.c + ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.c ${CMAKE_CURRENT_LIST_DIR}/class/tools/common.c ${CMAKE_CURRENT_LIST_DIR}/class/source/input.c ${CMAKE_CURRENT_LIST_DIR}/class/source/background.c @@ -131,9 +132,9 @@ macro(target_setup_class target_name) endif(ENABLE_CLASS) endmacro(target_setup_class) -if(ENABLE_CLASS) - # test executable - add_executable(testTk - ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc) - target_setup_class(testTk) -endif(ENABLE_CLASS) \ No newline at end of file +# if(ENABLE_CLASS) +# # test executable +# add_executable(testTk +# ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc) +# target_setup_class(testTk) +# endif(ENABLE_CLASS) \ No newline at end of file From 3a8a22737f235581261a701f4903fb81ee4b0a82 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 19:25:54 +0200 Subject: [PATCH 090/130] major update of class plugin --- include/cosmology_calculator.hh | 20 +- include/interpolate.hh | 7 +- src/plugins/transfer_CLASS.cc | 433 ++++++++++++++++++-------------- 3 files changed, 267 insertions(+), 193 deletions(-) diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index 1708637..e7d92f9 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -37,7 +37,7 @@ public: private: static constexpr double REL_PRECISION = 1e-9; interpolated_function_1d D_of_a_, f_of_a_, a_of_D_; - double Dnow_, astart_; + double Dnow_, Dplus_start_, astart_; real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const { @@ -140,6 +140,8 @@ public: a_of_D_.set_data(tab_D,tab_a); Dnow_ = D_of_a_(1.0); + Dplus_start_ = D_of_a_( astart_ ) / Dnow_; + // set up transfer functions and compute normalisation transfer_function_ = std::move(select_TransferFunction_plugin(cf)); transfer_function_->intialise(); @@ -147,7 +149,7 @@ public: cosmo_param_.pnorm = this->compute_pnorm_from_sigma8(); else{ cosmo_param_.pnorm = 1.0; - csoca::ilog << "Measured sigma8 for fixed PS normalisation is " << this->compute_sigma8() << std::endl; + csoca::ilog << "Measured sigma_8 for given PS normalisation is " << this->compute_sigma8() << std::endl; } cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm); @@ -193,11 +195,11 @@ public: for (double k = kmin; k < transfer_function_->get_kmax(); k *= 1.05) { ofs << std::setw(20) << std::setprecision(10) << k - << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total)*Dplus_start_, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm)*Dplus_start_, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon)*Dplus_start_, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm)*Dplus_start_, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon)*Dplus_start_, 2.0) << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total0), 2.0) << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm0), 2.0) << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon0), 2.0) @@ -302,7 +304,9 @@ public: */ inline real_t get_amplitude(real_t k, tf_type type) const { - return std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm; + // if the transfer function doesn't need backscaling, then divide out growth factor + real_t f = transfer_function_->tf_isnormalised_? 1.0/Dplus_start_ : 1.0; + return f * std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm; } //! Computes the normalization for the power spectrum diff --git a/include/interpolate.hh b/include/interpolate.hh index a2a5a88..cb0ea50 100644 --- a/include/interpolate.hh +++ b/include/interpolate.hh @@ -10,10 +10,10 @@ class interpolated_function_1d { private: + bool isinit_; std::vector data_x_, data_y_; gsl_interp_accel *gsl_ia_; gsl_spline *gsl_sp_; - bool isinit_; void deallocate() { @@ -22,11 +22,12 @@ private: } public: - interpolated_function_1d(const interpolated_function_1d &i) = delete; + interpolated_function_1d(const interpolated_function_1d &) = delete; - interpolated_function_1d(){} + interpolated_function_1d() : isinit_(false){} interpolated_function_1d(const std::vector &data_x, const std::vector &data_y) + : isinit_(false) { this->set_data( data_x, data_y ); } diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 079b633..e358fce 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -16,241 +16,310 @@ #include #include #include +#include #include #include -class transfer_CLASS_plugin : public TransferFunction_plugin { +class transfer_CLASS_plugin : public TransferFunction_plugin +{ private: - //... target redshift tables - std::vector tab_lnk_, tab_dtot_, tab_dc_, tab_db_, tab_ttot_, tab_tc_, tab_tb_; - gsl_interp_accel *gsl_ia_dtot_, *gsl_ia_dc_, *gsl_ia_db_, *gsl_ia_ttot_, *gsl_ia_tc_, *gsl_ia_tb_; - gsl_spline *gsl_sp_dtot_, *gsl_sp_dc_, *gsl_sp_db_, *gsl_sp_ttot_, *gsl_sp_tc_, *gsl_sp_tb_; + interpolated_function_1d delta_c_, delta_b_, delta_n_, delta_m_, theta_c_, theta_b_, theta_n_, theta_m_; + interpolated_function_1d delta_c0_, delta_b0_, delta_n0_, delta_m0_, theta_c0_, theta_b0_, theta_n0_, theta_m0_; - //... starting redshift tables - std::vector tab_lnk0_, tab_dtot0_, tab_dc0_, tab_db0_, tab_ttot0_, tab_tc0_, tab_tb0_; - gsl_interp_accel *gsl_ia_dtot0_, *gsl_ia_dc0_, *gsl_ia_db0_, *gsl_ia_ttot0_, *gsl_ia_tc0_, *gsl_ia_tb0_; - gsl_spline *gsl_sp_dtot0_, *gsl_sp_dc0_, *gsl_sp_db0_, *gsl_sp_ttot0_, *gsl_sp_tc0_, *gsl_sp_tb0_; + // single fluid growing/decaying mode decomposition + // gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_; + // gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_; + // std::vector tab_Cplus_, tab_Cminus_; - // single fluid growing/decaying mode decomposition - gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_; - gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_; - std::vector tab_Cplus_, tab_Cminus_; + double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_, A_s_, n_s_, Tcmb_, tnorm_; - double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_; + ClassParams pars_; + std::unique_ptr the_ClassEngine_; + std::ofstream ofs_class_input_; - void ClassEngine_get_data( void ){ - std::vector d_ncdm, t_ncdm, phi, psi; + template + void add_class_parameter(std::string parameter_name, const T parameter_value) + { + pars_.add(parameter_name, parameter_value); + ofs_class_input_ << parameter_name << " = " << parameter_value << std::endl; + } - csoca::ilog << "Computing TF via ClassEngine..." << std::endl << " ztarget = " << ztarget_ << ", zstart = " << zstart_ << " ..." << std::flush; - double wtime = get_wtime(); + //! Set up class parameters from MUSIC cosmological parameters + void init_ClassEngine(void) + { + //--- general parameters ------------------------------------------ + add_class_parameter("z_max_pk", std::max(zstart_, ztarget_) * 1.2); // use 1.2 as safety + add_class_parameter("P_k_max_h/Mpc", kmax_); + add_class_parameter("output", "dTk,vTk"); + // add_class_parameter("lensing", "no"); - std::stringstream zlist; - zlist << zstart_ << ", " << ztarget_; - - ClassParams pars; - pars.add("extra metric transfer functions", "yes"); - pars.add("z_max_pk",zlist.str()); - pars.add("P_k_max_h/Mpc", kmax_); - - pars.add("h",h_); - pars.add("Omega_b",Omega_b_); - // pars.add("Omega_k",0.0); - // pars.add("Omega_ur",0.0); - pars.add("N_ur",N_ur_); - pars.add("Omega_cdm",Omega_m_-Omega_b_); - pars.add("Omega_k",0.0); - // pars.add("Omega_Lambda",1.0-Omega_m_); - pars.add("Omega_fld",0.0); - pars.add("Omega_scf",0.0); + //--- choose gauge ------------------------------------------------ + // add_class_parameter("extra metric transfer functions", "yes"); + add_class_parameter("gauge", "synchronous"); - // massive neutrinos + //--- cosmological parameters, densities -------------------------- + add_class_parameter("h", h_); + + add_class_parameter("Omega_b", Omega_b_); + add_class_parameter("Omega_cdm", Omega_m_ - Omega_b_); + add_class_parameter("Omega_k", 0.0); + // add_class_parameter("Omega_Lambda",1.0-Omega_m_); + add_class_parameter("Omega_fld", 0.0); + add_class_parameter("Omega_scf", 0.0); + // add_class_parameter("fluid_equation_of_state","CLP"); + // add_class_parameter("w0_fld", -1 ); + // add_class_parameter("wa_fld", 0. ); + // add_class_parameter("cs2_fld", 1); + + //--- massive neutrinos ------------------------------------------- #if 1 - //default off - pars.add("N_ncdm",0); + //default off + // add_class_parameter("Omega_ur",0.0); + add_class_parameter("N_ur", N_ur_); + add_class_parameter("N_ncdm", 0); + #else - // change above to enable - pars.add("N_ur",0); - pars.add("N_ncdm",1); - pars.add("m_ncdm","0.4"); - pars.add("T_ncdm",0.71611); + // change above to enable + add_class_parameter("N_ur", 0); + add_class_parameter("N_ncdm", 1); + add_class_parameter("m_ncdm", "0.4"); + add_class_parameter("T_ncdm", 0.71611); #endif - pars.add("A_s",2.42e-9); - pars.add("n_s",.961); // this doesn't matter for TF - pars.add("output","dTk,vTk"); - pars.add("YHe",0.248); - pars.add("lensing","no"); - pars.add("alpha_s",0.0); - pars.add("P_k_ini type","analytic_Pk"); - pars.add("gauge","synchronous"); + //--- cosmological parameters, primordial ------------------------- + add_class_parameter("P_k_ini type", "analytic_Pk"); - pars.add("k_per_decade_for_pk",100); - pars.add("k_per_decade_for_bao",100); + add_class_parameter("A_s", A_s_); + add_class_parameter("n_s", n_s_); + add_class_parameter("alpha_s", 0.0); + add_class_parameter("T_cmb", Tcmb_); + add_class_parameter("YHe", 0.248); - pars.add("compute damping scale","yes"); - pars.add("z_reio",-1.0); // make sure reionisation is not included + // precision parameters + add_class_parameter("k_per_decade_for_pk", 100); + add_class_parameter("k_per_decade_for_bao", 100); + add_class_parameter("compute damping scale", "yes"); + add_class_parameter("tol_perturb_integration", 1.e-8); + add_class_parameter("tol_background_integration", 1e-9); - pars.add("tol_perturb_integration",1.e-8); - pars.add("tol_background_integration",1e-9); + // high precision options from cl_permille.pre: + // precision file to be passed as input in order to achieve at least percent precision on scalar Cls + add_class_parameter("hyper_flat_approximation_nu", 7000.); + add_class_parameter("transfer_neglect_delta_k_S_t0", 0.17); + add_class_parameter("transfer_neglect_delta_k_S_t1", 0.05); + add_class_parameter("transfer_neglect_delta_k_S_t2", 0.17); + add_class_parameter("transfer_neglect_delta_k_S_e", 0.13); + add_class_parameter("delta_l_max", 1000); - // high precision options from cl_permille.pre: - // precision file to be passed as input in order to achieve at least percent precision on scalar Cls - pars.add("hyper_flat_approximation_nu", 7000. ); - pars.add("transfer_neglect_delta_k_S_t0", 0.17 ); - pars.add("transfer_neglect_delta_k_S_t1", 0.05 ); - pars.add("transfer_neglect_delta_k_S_t2", 0.17 ); - pars.add("transfer_neglect_delta_k_S_e", 0.13 ); - pars.add("delta_l_max", 1000 ); + int class_verbosity = 0; + add_class_parameter("background_verbose", class_verbosity); + add_class_parameter("thermodynamics_verbose", class_verbosity); + add_class_parameter("perturbations_verbose", class_verbosity); + add_class_parameter("transfer_verbose", class_verbosity); + add_class_parameter("primordial_verbose", class_verbosity); + add_class_parameter("spectra_verbose", class_verbosity); + add_class_parameter("nonlinear_verbose", class_verbosity); + add_class_parameter("lensing_verbose", class_verbosity); + add_class_parameter("output_verbose", class_verbosity); - std::unique_ptr CE = std::make_unique(pars, false); + // output parameters, only needed for the control CLASS .ini file that we output + std::stringstream zlist; + if (ztarget_ == zstart_) + zlist << ztarget_ << ", 0.0"; + else + zlist << std::max(ztarget_, zstart_) << ", " << std::min(ztarget_, zstart_) << ", 0.0"; + add_class_parameter("z_pk", zlist.str()); - CE->getTk(zstart_, tab_lnk0_, tab_dc0_, tab_db0_, d_ncdm, tab_dtot0_, - tab_tc0_, tab_tb0_, t_ncdm, tab_ttot0_, phi, psi ); + csoca::ilog << "Computing transfer function via ClassEngine..." << std::endl; + double wtime = get_wtime(); - CE->getTk(ztarget_, tab_lnk_, tab_dc_, tab_db_, d_ncdm, tab_dtot_, - tab_tc_, tab_tb_, t_ncdm, tab_ttot_, phi, psi); + the_ClassEngine_ = std::move(std::make_unique(pars_, false)); - wtime = get_wtime() - wtime; - csoca::ilog << " took " << wtime << " s / " << tab_lnk_.size() << " modes." << std::endl; + wtime = get_wtime() - wtime; + csoca::ilog << "CLASS took " << wtime << " s." << std::endl; + } + + //! run ClassEngine with parameters set up + void run_ClassEngine(double z, std::vector &k, std::vector &dc, std::vector &tc, std::vector &db, std::vector &tb, + std::vector &dn, std::vector &tn, std::vector &dm, std::vector &tm) + { + k.clear(); + dc.clear(); db.clear(); dn.clear(); dm.clear(); + tc.clear(); tb.clear(); tn.clear(); tm.clear(); + + the_ClassEngine_->getTk(z, k, dc, db, dn, dm, tc, tb, tn, tm); + + real_t fc = (Omega_m_ - Omega_b_) / Omega_m_; + real_t fb = Omega_b_ / Omega_m_; + + for (size_t i = 0; i < k.size(); ++i) + { + // convert to 'CAMB' format, since we interpolate loglog and + // don't want negative numbers... + auto ik2 = 1.0 / (k[i] * k[i]) * h_ * h_; + dc[i] = -dc[i] * ik2; + db[i] = -db[i] * ik2; + dn[i] = -dn[i] * ik2; + dm[i] = fc * dc[i] + fb * db[i]; + tc[i] = -tc[i] * ik2; + tb[i] = -tb[i] * ik2; + tn[i] = -tn[i] * ik2; + tm[i] = fc * tc[i] + fb * tb[i]; } + } public: - explicit transfer_CLASS_plugin( ConfigFile &cf) - : TransferFunction_plugin(cf) - { - h_ = pcf_->GetValue("cosmology","H0") / 100.0; - Omega_m_ = pcf_->GetValue("cosmology","Omega_m"); - Omega_b_ = pcf_->GetValue("cosmology","Omega_b"); - N_ur_ = pcf_->GetValueSafe("cosmology","N_ur", 3.046); - ztarget_ = pcf_->GetValueSafe("cosmology","ztarget",0.0); - atarget_ = 1.0/(1.0+ztarget_); - zstart_ = pcf_->GetValue("setup","zstart"); - astart_ = 1.0/(1.0+zstart_); - double lbox = pcf_->GetValue("setup","BoxLength"); - int nres = pcf_->GetValue("setup","GridRes"); - kmax_ = 2.0*M_PI/lbox * nres/2 * sqrt(3) * 2.0; // 120% of spatial diagonal + explicit transfer_CLASS_plugin(ConfigFile &cf) + : TransferFunction_plugin(cf) + { + ofs_class_input_.open("input_class_parameters.ini", std::ios::trunc); - this->ClassEngine_get_data(); - - gsl_ia_dtot_ = gsl_interp_accel_alloc(); gsl_ia_dtot0_ = gsl_interp_accel_alloc(); - gsl_ia_dc_ = gsl_interp_accel_alloc(); gsl_ia_dc0_ = gsl_interp_accel_alloc(); - gsl_ia_db_ = gsl_interp_accel_alloc(); gsl_ia_db0_ = gsl_interp_accel_alloc(); - gsl_ia_ttot_ = gsl_interp_accel_alloc(); gsl_ia_ttot0_ = gsl_interp_accel_alloc(); - gsl_ia_tc_ = gsl_interp_accel_alloc(); gsl_ia_tc0_ = gsl_interp_accel_alloc(); - gsl_ia_tb_ = gsl_interp_accel_alloc(); gsl_ia_tb0_ = gsl_interp_accel_alloc(); + h_ = pcf_->GetValue("cosmology", "H0") / 100.0; + Omega_m_ = pcf_->GetValue("cosmology", "Omega_m"); + Omega_b_ = pcf_->GetValue("cosmology", "Omega_b"); + N_ur_ = pcf_->GetValueSafe("cosmology", "Neff", 3.046); + ztarget_ = pcf_->GetValueSafe("cosmology", "ztarget", 0.0); + atarget_ = 1.0 / (1.0 + ztarget_); + zstart_ = pcf_->GetValue("setup", "zstart"); + astart_ = 1.0 / (1.0 + zstart_); + double lbox = pcf_->GetValue("setup", "BoxLength"); + int nres = pcf_->GetValue("setup", "GridRes"); + A_s_ = pcf_->GetValueSafe("cosmology", "A_s", -1.0); + double k_p = pcf_->GetValueSafe("cosmology", "k_p", 0.05); + n_s_ = pcf_->GetValue("cosmology", "nspec"); + Tcmb_ = cf.GetValueSafe("cosmology", "Tcmb", 2.7255); - gsl_sp_dtot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_dc_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_db_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_ttot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_tc_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_tb_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + tnorm_ = 1.0; - gsl_sp_dtot0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_dc0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_db0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_ttot0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_tc0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_tb0_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + if (A_s_ > 0) + { + this->tf_isnormalised_ = true; + tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0)); + csoca::ilog << "Using A_s to normalise the transfer function!" << std::endl; + } - gsl_spline_init(gsl_sp_dtot_, &tab_lnk_[0], &tab_dtot_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_dc_, &tab_lnk_[0], &tab_dc_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_db_, &tab_lnk_[0], &tab_db_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_ttot_, &tab_lnk_[0], &tab_ttot_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_tc_, &tab_lnk_[0], &tab_tc_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_tb_, &tab_lnk_[0], &tab_tb_[0], tab_lnk_.size()); + kmax_ = std::max(20.0, 2.0 * M_PI / lbox * nres / 2 * sqrt(3) * 2.0); // 120% of spatial diagonal, or k=10h Mpc-1 - gsl_spline_init(gsl_sp_dtot0_, &tab_lnk0_[0], &tab_dtot0_[0], tab_lnk0_.size()); - gsl_spline_init(gsl_sp_dc0_, &tab_lnk0_[0], &tab_dc0_[0], tab_lnk0_.size()); - gsl_spline_init(gsl_sp_db0_, &tab_lnk0_[0], &tab_db0_[0], tab_lnk0_.size()); - gsl_spline_init(gsl_sp_ttot0_, &tab_lnk0_[0], &tab_ttot0_[0], tab_lnk0_.size()); - gsl_spline_init(gsl_sp_tc0_, &tab_lnk0_[0], &tab_tc0_[0], tab_lnk0_.size()); - gsl_spline_init(gsl_sp_tb0_, &tab_lnk0_[0], &tab_tb0_[0], tab_lnk0_.size()); + this->init_ClassEngine(); + + std::vector k, dc, tc, db, tb, dn, tn, dm, tm; + + this->run_ClassEngine(0.0, k, dc, tc, db, tb, dn, tn, dm, tm); + + delta_c0_.set_data(k, dc); + theta_c0_.set_data(k, tc); + delta_b0_.set_data(k, db); + theta_b0_.set_data(k, tb); + delta_n0_.set_data(k, dn); + theta_n0_.set_data(k, tn); + delta_m0_.set_data(k, dm); + theta_m0_.set_data(k, tm); + + this->run_ClassEngine(ztarget_, k, dc, tc, db, tb, dn, tn, dm, tm); + + delta_c_.set_data(k, dc); + theta_c_.set_data(k, tc); + delta_b_.set_data(k, db); + theta_b_.set_data(k, tb); + delta_n_.set_data(k, dn); + theta_n_.set_data(k, tn); + delta_m_.set_data(k, dm); + theta_m_.set_data(k, tm); + + kmin_ = k[0]; + kmax_ = k.back(); + + csoca::ilog << "CLASS table contains k = " << this->get_kmin() << " to " << this->get_kmax() << " h Mpc-1." << std::endl; //-------------------------------------------------------------------------- // single fluid growing/decaying mode decomposition //-------------------------------------------------------------------------- - gsl_ia_Cplus_ = gsl_interp_accel_alloc(); + /*gsl_ia_Cplus_ = gsl_interp_accel_alloc(); gsl_ia_Cminus_ = gsl_interp_accel_alloc(); - - gsl_sp_Cplus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + + gsl_sp_Cplus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); gsl_sp_Cminus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - - tab_Cplus_.assign(tab_lnk_.size(),0); - tab_Cminus_.assign(tab_lnk_.size(),0); + + tab_Cplus_.assign(tab_lnk_.size(), 0); + tab_Cminus_.assign(tab_lnk_.size(), 0); std::ofstream ofs("grow_decay.txt"); - - for( size_t i=0; i kmax_) + { + return 0.0; + } + + real_t val(0.0); + switch (type) + { + // values at ztarget: + case total: + val = delta_m_(k); break; + case cdm: + val = delta_c_(k); break; + case baryon: + val = delta_b_(k); break; + case vtotal: + val = theta_m_(k); break; + case vcdm: + val = theta_c_(k); break; + case vbaryon: + val = theta_b_(k); break; + + // values at zstart: + case total0: + val = delta_m0_(k); break; + case cdm0: + val = delta_c0_(k); break; + case baryon0: + val = delta_b0_(k); break; + case vtotal0: + val = theta_m0_(k); break; + case vcdm0: + val = theta_c0_(k); break; + case vbaryon0: + val = theta_b0_(k); break; + default: + throw std::runtime_error("Invalid type requested in transfer function evaluation"); + } + return val * tnorm_; + } + + inline double get_kmin(void) const { return kmin_ / h_; } + inline double get_kmax(void) const { return kmax_ / h_; } }; -namespace { +namespace +{ TransferFunction_plugin_creator_concrete creator("CLASS"); } From b8b9db3b999e6e4bb9d0be1327ed31b31eed3b1e Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 19:57:41 +0200 Subject: [PATCH 091/130] made precision switchable from makefile --- CMakeLists.txt | 96 ++++++++++++++++++++++++---- include/convolution.hh | 16 +++-- include/general.hh | 11 +++- include/grid_fft.hh | 89 +++++++++++++++----------- src/grid_fft.cc | 142 ++++++++++++++++++++++------------------- 5 files changed, 227 insertions(+), 127 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a4eab8b..d381d7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,16 +1,34 @@ cmake_minimum_required(VERSION 3.9) set(PRGNAME monofonIC) -project(monofonIC) +project(monofonIC C CXX) + +#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -Wall -pedantic" CACHE STRING "Flags used by the compiler during Release builds." FORCE) +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -march=native -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE) +set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE) +set(CMAKE_CXX_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address " CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE) +set(CMAKE_CXX_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE) + +set(default_build_type "Release") +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "Setting build type to '${default_build_type}' as none was specified.") + set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE + STRING "Choose the type of build." FORCE) + # Set the possible values of build type for cmake-gui + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS + "Debug" "Release" "RelWithDebInfo" "DebugSanAdd" "DebugSanUndef") +endif() +mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT) + + +######################################################################################################################## # include class submodule include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake) -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic") find_package(PkgConfig REQUIRED) -set(CMAKE_MODULE_PATH - "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}") +set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}") ######################################################################################################################## @@ -48,6 +66,16 @@ if(ENABLE_MPI) endif(MPI_CXX_FOUND) endif(ENABLE_MPI) +######################################################################################################################## +# floating point precision +set ( + CODE_PRECISION "DOUBLE" + CACHE STRING "Floating point type used for internal computations and FFTs" +) +set_property ( + CACHE CODE_PRECISION + PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE +) ######################################################################################################################## # FFTW @@ -55,18 +83,25 @@ if(POLICY CMP0074) cmake_policy(SET CMP0074 NEW) endif() if(ENABLE_MPI) - find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI) + find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS MPI) else() - find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS) + find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS) endif(ENABLE_MPI) +mark_as_advanced(FFTW3_SINGLE_MPI_LIBRARY FFTW3_SINGLE_OPENMP_LIBRARY FFTW3_SINGLE_SERIAL_LIBRARY FFTW3_SINGLE_THREADS_LIBRARY) +mark_as_advanced(FFTW3_DOUBLE_MPI_LIBRARY FFTW3_DOUBLE_OPENMP_LIBRARY FFTW3_DOUBLE_SERIAL_LIBRARY FFTW3_DOUBLE_THREADS_LIBRARY) +mark_as_advanced(FFTW3_LONGDOUBLE_MPI_LIBRARY FFTW3_LONGDOUBLE_OPENMP_LIBRARY FFTW3_LONGDOUBLE_SERIAL_LIBRARY FFTW3_LONGDOUBLE_THREADS_LIBRARY) +mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_MPI_INCLUDE_DIR) +mark_as_advanced(pkgcfg_lib_PC_FFTW_fftw3) ######################################################################################################################## # GSL find_package(GSL REQUIRED) +mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m) ######################################################################################################################## # HDF5 find_package(HDF5 REQUIRED) +mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz) ######################################################################################################################## # INCLUDES @@ -86,28 +121,61 @@ file( GLOB PLUGINS ${PROJECT_SOURCE_DIR}/src/plugins/*.cc ) +# project configuration header +configure_file( + ${PROJECT_SOURCE_DIR}/include/cmake_config.hh.in + ${PROJECT_SOURCE_DIR}/include/cmake_config.hh +) + add_executable(${PRGNAME} ${SOURCES} ${PLUGINS}) target_setup_class(${PRGNAME}) set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14) + # mpi flags if(MPI_CXX_FOUND) - if(FFTW3_DOUBLE_MPI_FOUND) - target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY}) - target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL}) - target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI") - endif(FFTW3_DOUBLE_MPI_FOUND) + if(CODE_PRECISION STREQUAL "FLOAT") + if(FFTW3_SINGLE_MPI_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_MPI_LIBRARY}) + target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI") + else() + message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for single precision!") + endif() + elseif(CODE_PRECISION STREQUAL "DOUBLE") + if(FFTW3_DOUBLE_MPI_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY}) + target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI") + else() + message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for double precision!") + endif() + elseif(CODE_PRECISION STREQUAL "LONGDOUBLE") + if(FFTW3_LONGDOUBLE_MPI_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_MPI_LIBRARY}) + target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI") + else() + message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for long double precision!") + endif() + endif() target_include_directories(${PRGNAME} PRIVATE ${MPI_CXX_INCLUDE_PATH}) target_compile_options(${PRGNAME} PRIVATE "-DUSE_MPI") target_link_libraries(${PRGNAME} ${MPI_LIBRARIES}) endif(MPI_CXX_FOUND) -if(FFTW3_DOUBLE_THREADS_FOUND) +if(CODE_PRECISION STREQUAL "FLOAT" AND FFTW3_SINGLE_THREADS_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_THREADS_LIBRARY}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS") +elseif(CODE_PRECISION STREQUAL "DOUBLE" AND FFTW3_DOUBLE_THREADS_FOUND) target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_THREADS_LIBRARY}) target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS") -endif(FFTW3_DOUBLE_THREADS_FOUND) +elseif(CODE_PRECISION STREQUAL "LONGDOUBLE" AND FFTW3_LONGDOUBLE_THREADS_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_THREADS_LIBRARY}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS") +endif() if(HDF5_FOUND) # target_link_libraries(${PRGNAME} ${HDF5_C_LIBRARY_DIRS}) diff --git a/include/convolution.hh b/include/convolution.hh index a1fc1e3..238717b 100644 --- a/include/convolution.hh +++ b/include/convolution.hh @@ -415,12 +415,12 @@ private: { assert(fp.space_ == kspace_id); - const double rfac = std::pow(1.5, 1.5); + const real_t rfac = std::pow(1.5, 1.5); fp.zero(); #if !defined(USE_MPI) //////////////////////////////////////////////////////////////////////////////////// - size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3}; + const size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3}; #pragma omp parallel for for (size_t i = 0; i < 2 * fp.size(0) / 3; ++i) @@ -460,7 +460,10 @@ private: size_t slicesz = fbuf_->size(1) * fbuf_->size(3); MPI_Datatype datatype = - (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE; + (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX + : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX + : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX + : MPI_BYTE; // fill MPI send buffer with results of kfunc @@ -596,7 +599,7 @@ private: template void unpad(const Grid_FFT &fp, operator_t output_op) { - const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]); + const real_t rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]); // make sure we're in Fourier space... assert(fp.space_ == kspace_id); @@ -645,7 +648,10 @@ private: size_t slicesz = fp.size(1) * fp.size(3); MPI_Datatype datatype = - (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE; + (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX + : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX + : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX + : MPI_BYTE; MPI_Status status; diff --git a/include/general.hh b/include/general.hh index 7334579..f4395bb 100644 --- a/include/general.hh +++ b/include/general.hh @@ -16,14 +16,21 @@ #define _unused(x) ((void)(x)) -#ifdef USE_SINGLEPRECISION +// include CMake controlled configuration settings +#include + +#if defined(USE_PRECISION_FLOAT) using real_t = float; using complex_t = fftwf_complex; #define FFTW_PREFIX fftwf -#else +#elif defined(USE_PRECISION_DOUBLE) using real_t = double; using complex_t = fftw_complex; #define FFTW_PREFIX fftw +#elif defined(USE_PRECISION_LONGDOUBLE) +using real_t = long double; +using complex_t = fftwl_complex; +#define FFTW_PREFIX fftwl #endif enum class fluid_component diff --git a/include/grid_fft.hh b/include/grid_fft.hh index f460297..2170dc8 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -26,10 +26,12 @@ class Grid_FFT protected: #if defined(USE_MPI) const MPI_Datatype MPI_data_t_type = - (typeid(data_t) == typeid(double)) ? MPI_DOUBLE - : (typeid(data_t) == typeid(float)) ? MPI_FLOAT - : (typeid(data_t) == typeid(std::complex)) ? MPI_COMPLEX - : (typeid(data_t) == typeid(std::complex)) ? MPI_DOUBLE_COMPLEX + (typeid(data_t) == typeid(float)) ? MPI_FLOAT + : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE + : (typeid(data_t) == typeid(long double)) ? MPI_LONG_DOUBLE + : (typeid(data_t) == typeid(std::complex)) ? MPI_C_FLOAT_COMPLEX + : (typeid(data_t) == typeid(std::complex)) ? MPI_C_DOUBLE_COMPLEX + : (typeid(data_t) == typeid(std::complex)) ? MPI_C_LONG_DOUBLE_COMPLEX : MPI_INT; #endif using grid_fft_t = Grid_FFT; @@ -73,30 +75,30 @@ public: const grid_fft_t *get_grid(size_t ilevel) const { return this; } - bool is_distributed( void ) const { return bdistributed; } + bool is_distributed( void ) const noexcept { return bdistributed; } void Setup(); //! return the number of data_t elements that we store in the container - size_t memsize( void ) const { return ntot_; } + size_t memsize( void ) const noexcept { return ntot_; } //! return the (local) size of dimension i - size_t size(size_t i) const { return sizes_[i]; } + size_t size(size_t i) const noexcept { assert(i<4); return sizes_[i]; } //! return the (global) size of dimension i - size_t global_size(size_t i) const { return n_[i]; } + size_t global_size(size_t i) const noexcept { assert(i<3); return n_[i]; } //! return locally stored number of elements of field - size_t local_size(void) const { return local_0_size_ * n_[1] * n_[2]; } + size_t local_size(void) const noexcept { return local_0_size_ * n_[1] * n_[2]; } //! return a bounding box of the global extent of the field - const bounding_box &get_global_range(void) const + const bounding_box &get_global_range(void) const noexcept { return global_range_; } //! set all field elements to zero - void zero() + void zero() noexcept { #pragma omp parallel for for (size_t i = 0; i < ntot_; ++i) @@ -125,47 +127,47 @@ public: data_[i] = g.data_[i]; } - data_t &operator[](size_t i) + data_t &operator[](size_t i) noexcept { return data_[i]; } - data_t &relem(size_t i, size_t j, size_t k) + data_t &relem(size_t i, size_t j, size_t k) noexcept { size_t idx = (i * sizes_[1] + j) * sizes_[3] + k; return data_[idx]; } - const data_t &relem(size_t i, size_t j, size_t k) const + const data_t &relem(size_t i, size_t j, size_t k) const noexcept { size_t idx = (i * sizes_[1] + j) * sizes_[3] + k; return data_[idx]; } - ccomplex_t &kelem(size_t i, size_t j, size_t k) + ccomplex_t &kelem(size_t i, size_t j, size_t k) noexcept { size_t idx = (i * sizes_[1] + j) * sizes_[3] + k; return cdata_[idx]; } - const ccomplex_t &kelem(size_t i, size_t j, size_t k) const + const ccomplex_t &kelem(size_t i, size_t j, size_t k) const noexcept { size_t idx = (i * sizes_[1] + j) * sizes_[3] + k; return cdata_[idx]; } - ccomplex_t &kelem(size_t idx) { return cdata_[idx]; } - const ccomplex_t &kelem(size_t idx) const { return cdata_[idx]; } - data_t &relem(size_t idx) { return data_[idx]; } - const data_t &relem(size_t idx) const { return data_[idx]; } + ccomplex_t &kelem(size_t idx) noexcept { return cdata_[idx]; } + const ccomplex_t &kelem(size_t idx) const noexcept { return cdata_[idx]; } + data_t &relem(size_t idx) noexcept { return data_[idx]; } + const data_t &relem(size_t idx) const noexcept { return data_[idx]; } - size_t get_idx(size_t i, size_t j, size_t k) const + size_t get_idx(size_t i, size_t j, size_t k) const noexcept { return (i * sizes_[1] + j) * sizes_[3] + k; } template - vec3_t get_r(const size_t i, const size_t j, const size_t k) const + vec3_t get_r(const size_t i, const size_t j, const size_t k) const noexcept { vec3_t rr; @@ -177,7 +179,7 @@ public: } template - vec3_t get_unit_r(const size_t i, const size_t j, const size_t k) const + vec3_t get_unit_r(const size_t i, const size_t j, const size_t k) const noexcept { vec3_t rr; @@ -189,7 +191,7 @@ public: } template - vec3_t get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t s) const + vec3_t get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t s) const noexcept { vec3_t rr; @@ -200,33 +202,35 @@ public: return rr; } - vec3_t get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const + vec3_t get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const noexcept { return vec3_t({i + local_0_start_, j, k}); } - size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const + size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const noexcept { return ((i + local_0_start_) * size(1) + j) * size(2) + k; } - size_t count_leaf_cells(int, int) const + //! deprecated function, was needed for old output plugin + size_t count_leaf_cells(int, int) const noexcept { return n_[0] * n_[1] * n_[2]; } - real_t get_dx(int idim) const + real_t get_dx(int idim) const noexcept { + assert(idim<3&&idim>=0); return dx_[idim]; } - const std::array &get_dx(void) const + const std::array &get_dx(void) const noexcept { return dx_; } template - vec3_t get_k(const size_t i, const size_t j, const size_t k) const + vec3_t get_k(const size_t i, const size_t j, const size_t k) const noexcept { vec3_t kk; if( bdistributed ){ @@ -243,7 +247,7 @@ public: } template - vec3_t get_k(const real_t i, const real_t j, const real_t k) const + vec3_t get_k(const real_t i, const real_t j, const real_t k) const noexcept { vec3_t kk; if( bdistributed ){ @@ -259,12 +263,13 @@ public: return kk; } - std::array get_k3(const size_t i, const size_t j, const size_t k) const + std::array get_k3(const size_t i, const size_t j, const size_t k) const noexcept { return bdistributed? std::array({j,i+local_1_start_,k}) : std::array({i,j,k}); } - data_t get_cic( const vec3_t& v ) const{ + data_t get_cic( const vec3_t& v ) const noexcept + { // warning! this doesn't work with MPI vec3_t x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0], std::fmod(v.y/length_[1]+1.0,1.0)*n_[1], @@ -290,7 +295,8 @@ public: return val; } - ccomplex_t get_cic_kspace( const vec3_t x ) const{ + ccomplex_t get_cic_kspace( const vec3_t x ) const noexcept + { // warning! this doesn't work with MPI int ix = static_cast(std::floor(x.x)); int iy = static_cast(std::floor(x.y)); @@ -328,6 +334,11 @@ public: return ccomplex_t(0.0,rgrad); } + inline real_t laplacian( const std::array& ijk ) const noexcept + { + return -this->get_k(ijk[0],ijk[1],ijk[2]).norm_squared(); + } + grid_fft_t &operator*=(data_t x) { if (space_ == kspace_id) @@ -421,7 +432,7 @@ public: } } - double compute_2norm(void) + real_t compute_2norm(void) const { real_t sum1{0.0}; #pragma omp parallel for reduction(+ : sum1) @@ -443,7 +454,7 @@ public: return sum1; } - double std(void) + real_t std(void) const { double sum1{0.0}, sum2{0.0}; size_t count{0}; @@ -488,10 +499,10 @@ public: sum1 /= count; sum2 /= count; - return std::sqrt(sum2 - sum1 * sum1); + return real_t(std::sqrt(sum2 - sum1 * sum1)); } - double mean(void) + real_t mean(void) const { double sum1{0.0}; size_t count{0}; @@ -530,7 +541,7 @@ public: sum1 /= count; - return sum1; + return real_t(sum1); } template diff --git a/src/grid_fft.cc b/src/grid_fft.cc index 4905cb1..2b595b8 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -2,10 +2,11 @@ #include #include -template -void Grid_FFT::Setup(void) +template +void Grid_FFT::Setup(void) { - if( !bdistributed ){ + if (!bdistributed) + { ntot_ = (n_[2] + 2) * n_[1] * n_[0]; csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); @@ -30,7 +31,7 @@ void Grid_FFT::Setup(void) csoca::elog.Print("invalid data type in Grid_FFT::setup_fft_interface\n"); } - fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2])); + fft_norm_fac_ = 1.0 / std::sqrt((real_t)((size_t)n_[0] * (real_t)n_[1] * (real_t)n_[2])); if (typeid(data_t) == typeid(real_t)) { @@ -81,26 +82,26 @@ void Grid_FFT::Setup(void) if (typeid(data_t) == typeid(real_t)) { cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD, - &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); + &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); ntot_ = 2 * cmplxsz; data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t)); cdata_ = reinterpret_cast(data_); plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, - MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); + MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, - MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); + MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); } else if (typeid(data_t) == typeid(ccomplex_t)) { cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD, - &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); + &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); ntot_ = cmplxsz; data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t)); cdata_ = reinterpret_cast(data_); plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, - MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); + MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, - MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); + MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); } else { @@ -109,7 +110,8 @@ void Grid_FFT::Setup(void) } csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); - fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]); + + fft_norm_fac_ = 1.0 / sqrt((real_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]); if (typeid(data_t) == typeid(real_t)) { @@ -155,16 +157,16 @@ void Grid_FFT::Setup(void) } } -template -void Grid_FFT::ApplyNorm(void) +template +void Grid_FFT::ApplyNorm(void) { #pragma omp parallel for for (size_t i = 0; i < ntot_; ++i) data_[i] *= fft_norm_fac_; } -template -void Grid_FFT::FourierTransformForward(bool do_transform) +template +void Grid_FFT::FourierTransformForward(bool do_transform) { #if defined(USE_MPI) MPI_Barrier(MPI_COMM_WORLD); @@ -195,8 +197,8 @@ void Grid_FFT::FourierTransformForward(bool do_transform) } } -template -void Grid_FFT::FourierTransformBackward(bool do_transform) +template +void Grid_FFT::FourierTransformBackward(bool do_transform) { #if defined(USE_MPI) MPI_Barrier(MPI_COMM_WORLD); @@ -210,8 +212,7 @@ void Grid_FFT::FourierTransformBackward(bool do_transform) csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]); double wtime = get_wtime(); - FFTW_API(execute) - (iplan_); + FFTW_API(execute)(iplan_); this->ApplyNorm(); wtime = get_wtime() - wtime; @@ -262,6 +263,9 @@ hid_t hdf5_get_data_type(void) if (typeid(T) == typeid(double)) return H5T_NATIVE_DOUBLE; + + if (typeid(T) == typeid(long double)) + return H5T_NATIVE_LDOUBLE; if (typeid(T) == typeid(long long)) return H5T_NATIVE_LLONG; @@ -276,10 +280,11 @@ hid_t hdf5_get_data_type(void) return -1; } -template -void Grid_FFT::Read_from_HDF5(const std::string Filename, const std::string ObjName) +template +void Grid_FFT::Read_from_HDF5(const std::string Filename, const std::string ObjName) { - if( bdistributed ){ + if (bdistributed) + { csoca::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl; abort(); } @@ -354,10 +359,11 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, c H5Dclose(HDF_DatasetID); H5Fclose(HDF_FileID); - assert( dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2] ); + assert(dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2]); csoca::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl; - for( size_t i=0; i<3; ++i ) this->n_[i] = dimsize[i]; + for (size_t i = 0; i < 3; ++i) + this->n_[i] = dimsize[i]; this->space_ = rspace_id; if (data_ != nullptr) @@ -365,47 +371,47 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, c fftw_free(data_); } this->Setup(); - //... copy data to internal array ... - double sum1{0.0}, sum2{0.0}; - #pragma omp parallel for reduction(+:sum1,sum2) + real_t sum1{0.0}, sum2{0.0}; + #pragma omp parallel for reduction(+ : sum1, sum2) for (size_t i = 0; i < size(0); ++i) { for (size_t j = 0; j < size(1); ++j) { for (size_t k = 0; k < size(2); ++k) { - this->relem(i,j,k) = Data[ (i*size(1) + j)*size(2)+k ]; - sum2 += std::real(this->relem(i,j,k)*this->relem(i,j,k)); - sum1 += std::real(this->relem(i,j,k)); + this->relem(i, j, k) = Data[(i * size(1) + j) * size(2) + k]; + sum2 += std::real(this->relem(i, j, k) * this->relem(i, j, k)); + sum1 += std::real(this->relem(i, j, k)); } } } sum1 /= Data.size(); sum2 /= Data.size(); - auto stdw = std::sqrt(sum2-sum1*sum1); + auto stdw = std::sqrt(sum2 - sum1 * sum1); csoca::ilog << "Constraint field has =" << sum1 << ", -^2=" << stdw << std::endl; - #pragma omp parallel for reduction(+:sum1,sum2) + #pragma omp parallel for reduction(+ : sum1, sum2) for (size_t i = 0; i < size(0); ++i) { for (size_t j = 0; j < size(1); ++j) { for (size_t k = 0; k < size(2); ++k) { - this->relem(i,j,k) /= stdw; + this->relem(i, j, k) /= stdw; } } } } -template -void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) const +template +void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) const { // FIXME: cleanup duplicate code in this function! - if( !bdistributed && CONFIG::MPI_task_rank==0 ){ - + if (!bdistributed && CONFIG::MPI_task_rank == 0) + { + hid_t file_id, dset_id; /* file and dataset identifiers */ hid_t filespace, memspace; /* file and memory dataspace identifiers */ hsize_t offset[3], count[3]; @@ -419,23 +425,23 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string for (int i = 0; i < 3; ++i) count[i] = size(i); - + if (typeid(data_t) == typeid(float)) dtype_id = H5T_NATIVE_FLOAT; else if (typeid(data_t) == typeid(double)) dtype_id = H5T_NATIVE_DOUBLE; + else if (typeid(data_t) == typeid(long double)) + dtype_id = H5T_NATIVE_LDOUBLE; else if (typeid(data_t) == typeid(std::complex)) - { dtype_id = H5T_NATIVE_FLOAT; - } else if (typeid(data_t) == typeid(std::complex)) - { dtype_id = H5T_NATIVE_DOUBLE; - } + else if (typeid(data_t) == typeid(std::complex)) + dtype_id = H5T_NATIVE_LDOUBLE; filespace = H5Screate_simple(3, count, NULL); dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); H5Sclose(filespace); hsize_t slice_sz = size(1) * size(2); @@ -459,7 +465,7 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string { for (size_t k = 0; k < size(2); ++k) { - if( this->space_ == rspace_id ) + if (this->space_ == rspace_id) buf[j * size(2) + k] = std::real(relem(i, j, k)); else buf[j * size(2) + k] = std::real(kelem(i, j, k)); @@ -478,7 +484,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string if (typeid(data_t) == typeid(std::complex) || typeid(data_t) == typeid(std::complex) || - this->space_ == kspace_id ) + typeid(data_t) == typeid(std::complex) || + this->space_ == kspace_id) { datasetname += std::string(".im"); @@ -487,7 +494,7 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string filespace = H5Screate_simple(3, count, NULL); dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); H5Sclose(filespace); count[0] = 1; @@ -499,7 +506,7 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string for (size_t j = 0; j < size(1); ++j) for (size_t k = 0; k < size(2); ++k) { - if( this->space_ == rspace_id ) + if (this->space_ == rspace_id) buf[j * size(2) + k] = std::imag(relem(i, j, k)); else buf[j * size(2) + k] = std::imag(kelem(i, j, k)); @@ -526,7 +533,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string return; } - if( !bdistributed && CONFIG::MPI_task_rank!=0 ) return; + if (!bdistributed && CONFIG::MPI_task_rank != 0) + return; hid_t file_id, dset_id; /* file and dataset identifiers */ hid_t filespace, memspace; /* file and memory dataspace identifiers */ @@ -534,7 +542,6 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string hid_t dtype_id = H5T_NATIVE_FLOAT; hid_t plist_id; - #if defined(USE_MPI) int mpi_size, mpi_rank; @@ -586,14 +593,14 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string dtype_id = H5T_NATIVE_FLOAT; else if (typeid(data_t) == typeid(double)) dtype_id = H5T_NATIVE_DOUBLE; + else if (typeid(data_t) == typeid(long double)) + dtype_id = H5T_NATIVE_LDOUBLE; else if (typeid(data_t) == typeid(std::complex)) - { dtype_id = H5T_NATIVE_FLOAT; - } else if (typeid(data_t) == typeid(std::complex)) - { dtype_id = H5T_NATIVE_DOUBLE; - } + else if (typeid(data_t) == typeid(std::complex)) + dtype_id = H5T_NATIVE_LDOUBLE; #if defined(USE_MPI) && !defined(USE_MPI_IO) if (itask == 0) @@ -648,7 +655,7 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string { for (size_t k = 0; k < size(2); ++k) { - if( this->space_ == rspace_id ) + if (this->space_ == rspace_id) buf[j * size(2) + k] = std::real(relem(i, j, k)); else buf[j * size(2) + k] = std::real(kelem(i, j, k)); @@ -671,7 +678,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string if (typeid(data_t) == typeid(std::complex) || typeid(data_t) == typeid(std::complex) || - this->space_ == kspace_id ) + typeid(data_t) == typeid(std::complex) || + this->space_ == kspace_id) { datasetname += std::string(".im"); @@ -721,7 +729,7 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string for (size_t j = 0; j < size(1); ++j) for (size_t k = 0; k < size(2); ++k) { - if( this->space_ == rspace_id ) + if (this->space_ == rspace_id) buf[j * size(2) + k] = std::imag(relem(i, j, k)); else buf[j * size(2) + k] = std::imag(kelem(i, j, k)); @@ -757,8 +765,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string #include -template -void Grid_FFT::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax) +template +void Grid_FFT::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax) { double logvmin = std::log10(vmin); double logvmax = std::log10(vmax); @@ -809,12 +817,12 @@ void Grid_FFT::Write_PDF(std::string ofname, int nbins, dou #endif } -template -void Grid_FFT::Write_PowerSpectrum(std::string ofname) +template +void Grid_FFT::Write_PowerSpectrum(std::string ofname) { std::vector bin_k, bin_P, bin_eP; std::vector bin_count; - this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count ); + this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count); #if defined(USE_MPI) if (CONFIG::MPI_task_rank == 0) { @@ -839,8 +847,8 @@ void Grid_FFT::Write_PowerSpectrum(std::string ofname) #endif } -template -void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count ) +template +void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count) { this->FourierTransformForward(); @@ -920,7 +928,7 @@ void Grid_FFT::Compute_PowerSpectrum(std::vector &b /********************************************************************************************/ -template class Grid_FFT; -template class Grid_FFT; -template class Grid_FFT; -template class Grid_FFT; +template class Grid_FFT; +template class Grid_FFT; +template class Grid_FFT; +template class Grid_FFT; From 6dabf65ab20986bfd680b659bea64fde6c5d177a Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 19:58:34 +0200 Subject: [PATCH 092/130] added template cmake_config file that was forgotten in previous commit --- include/cmake_config.hh.in | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/cmake_config.hh.in diff --git a/include/cmake_config.hh.in b/include/cmake_config.hh.in new file mode 100644 index 0000000..b280f44 --- /dev/null +++ b/include/cmake_config.hh.in @@ -0,0 +1,12 @@ +#pragma once + +#define USE_PRECISION_${CODE_PRECISION} +constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}"; + +#if defined(USE_PRECISION_FLOAT) + constexpr char CMAKE_PRECISION_STR[] = "single"; +#elif defined(USE_PRECISION_DOUBLE) + constexpr char CMAKE_PRECISION_STR[] = "double"; +#elif defined(USE_PRECISION_LONGDOUBLE) + constexpr char CMAKE_PRECISION_STR[] = "long double"; +#endif \ No newline at end of file From 0678489386e689afabaafa13ab56c8220dabf938 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 19:58:57 +0200 Subject: [PATCH 093/130] fixed some nyquist plane errors in convolutions --- include/convolution.hh | 82 ++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/include/convolution.hh b/include/convolution.hh index 238717b..fc0b9b4 100644 --- a/include/convolution.hh +++ b/include/convolution.hh @@ -429,10 +429,9 @@ private: for (size_t j = 0; j < 2 * fp.size(1) / 3; ++j) { size_t jp = (j > nhalf[1]) ? j + nhalf[1] : j; - for (size_t k = 0; k < 2 * fp.size(2) / 3; ++k) + for (size_t k = 0; k < nhalf[2]+1; ++k) { size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k; - // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue; fp.kelem(ip, jp, kp) = kfunc(i, j, k) * rfac; } } @@ -618,8 +617,11 @@ private: for (size_t k = 0; k < fbuf_->size(2); ++k) { size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k; - // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue; fbuf_->kelem(i, j, k) = fp.kelem(ip, jp, kp) / rfac; + // zero Nyquist modes since they are not unique after convolution + if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]){ + fbuf_->kelem(i, j, k) = 0.0; + } } } } @@ -691,7 +693,7 @@ private: int recvfrom = 0; if (iglobal <= fny[0]) { - real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0; + real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0; recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size); MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal, @@ -699,7 +701,7 @@ private: for (size_t j = 0; j < nf[1]; ++j) { - real_t wj = (j == fny[1]) ? 0.5 : 1.0; + real_t wj = (j == fny[1]) ? 0.0 : 1.0; if (j <= fny[1]) { size_t jp = j; @@ -707,21 +709,22 @@ private: { if (typeid(data_t) == typeid(real_t)) { - real_t w = wi * wj; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; + real_t w = wi * wj * wk; fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; } else { - real_t wk = (k == fny[2]) ? 0.5 : 1.0; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; real_t w = wi * wj * wk; - if (k <= fny[2]) + if (k < fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; - if (k >= fny[2]) + if (k > fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac; - if (w < 1.0) - { - fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); - } + // if (w < 1.0) + // { + // fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); + // } } } } @@ -732,21 +735,22 @@ private: { if (typeid(data_t) == typeid(real_t)) { - real_t w = wi * wj; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; + real_t w = wi * wj * wk; fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; } else { - real_t wk = (k == fny[2]) ? 0.5 : 1.0; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; real_t w = wi * wj * wk; - if (k <= fny[2]) + if (k < fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; - if (k >= fny[2]) + if (k > fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac; - if (w < 1.0) - { - fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); - } + // if (w < 1.0) + // { + // fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); + // } } } } @@ -754,7 +758,7 @@ private: } if (iglobal >= fny[0]) { - real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0; + real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0; recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size); MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, @@ -762,29 +766,26 @@ private: for (size_t j = 0; j < nf[1]; ++j) { - real_t wj = (j == fny[1]) ? 0.5 : 1.0; + real_t wj = (j == fny[1]) ? 0.0 : 1.0; if (j <= fny[1]) { size_t jp = j; for (size_t k = 0; k < nf[2]; ++k) { + const real_t wk = (k == fny[2]) ? 0.0 : 1.0; + const real_t w = wi * wj * wk; if (typeid(data_t) == typeid(real_t)) { - real_t w = wi * wj; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; + real_t w = wi * wj * wk; fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; } else { - real_t wk = (k == fny[2]) ? 0.5 : 1.0; - real_t w = wi * wj * wk; - if (k <= fny[2]) + if (k < fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; - if (k >= fny[2]) + if (k > fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac; - if (w < 1.0) - { - fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); - } } } } @@ -793,23 +794,18 @@ private: size_t jp = j + fny[1]; for (size_t k = 0; k < nf[2]; ++k) { + const real_t wk = (k == fny[2]) ? 0.0 : 1.0; + const real_t w = wi * wj * wk; if (typeid(data_t) == typeid(real_t)) { - real_t w = wi * wj; fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; } else { - real_t wk = (k == fny[2]) ? 0.5 : 1.0; - real_t w = wi * wj * wk; - if (k <= fny[2]) + if (k < fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; - if (k >= fny[2]) + if (k > fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac; - if (w < 1.0) - { - fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); - } } } } @@ -817,8 +813,8 @@ private: } } -//... copy data back -#pragma omp parallel for + //... copy data back + #pragma omp parallel for for (size_t i = 0; i < fbuf_->ntot_; ++i) { output_op(i, (*fbuf_)[i]); From 7ead43455c3fd5bdc56c97b2c98020fb6213930e Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Thu, 2 Apr 2020 21:47:17 +0200 Subject: [PATCH 094/130] removed old debugging logging --- src/plugins/random_music_wnoise_generator.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc index a6d4c35..68b4649 100644 --- a/src/plugins/random_music_wnoise_generator.cc +++ b/src/plugins/random_music_wnoise_generator.cc @@ -613,7 +613,7 @@ void music_wnoise_generator::register_cube(int i, int j, int k) rnums_.push_back(NULL); cubemap_[icube] = rnums_.size() - 1; #ifdef DEBUG - LOGDEBUG("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]); + csoca::dlog.Print("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]); #endif } } @@ -741,8 +741,8 @@ double music_wnoise_generator::fill_subvolume(int *i0, int *n) ncube[2] = (int)(n[2] / cubesize_) + 2; #ifdef DEBUG - LOGDEBUG("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]); - LOGDEBUG("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]); + csoca::dlog.Print("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]); + csoca::dlog.Print("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]); #endif double mean = 0.0; From dc5f87f216a8d00a755bc3d0f6e22eb01d4c73e0 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 3 Apr 2020 00:39:28 +0200 Subject: [PATCH 095/130] added convolver and plt as options to cmake --- .gitignore | 1 + CMakeLists.txt | 16 ++++++++++++++++ src/ic_generator.cc | 10 ++++++++-- src/main.cc | 29 ++++++++++++++++++++--------- src/plugins/transfer_CLASS.cc | 3 --- 5 files changed, 45 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 60035a0..b012d08 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,4 @@ src/fastLPT src/input_powerspec.txt src/Makefile .DS_Store +include/cmake_config.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index d381d7d..4fedf66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,6 +77,22 @@ set_property ( PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE ) +######################################################################################################################## +# convolver type, right now only orszag or naive +set ( + CONVOLVER_TYPE "ORSZAG" + CACHE STRING "Convolution algorithm to be used (Naive=no dealiasing, Orszag=dealiased)" +) +set_property ( + CACHE CONVOLVER_TYPE + PROPERTY STRINGS ORSZAG NAIVE +) + +######################################################################################################################## +# PLT options, right now only on/off +option(ENABLE_PLT "Enable PLT (particle linear theory) corrections" OFF) + + ######################################################################################################################## # FFTW if(POLICY CMP0074) diff --git a/src/ic_generator.cc b/src/ic_generator.cc index a8f60bd..0a88054 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -266,15 +266,21 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- // Create convolution class instance for non-linear terms //-------------------------------------------------------------------- +#if defined(USE_CONVOLVER_ORSZAG) OrszagConvolver Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - // NaiveConvolver Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); +#elif defined(USE_CONVOLVER_NAIVE) + NaiveConvolver Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); +#endif //-------------------------------------------------------------------- //-------------------------------------------------------------------- // Create PLT gradient operator //-------------------------------------------------------------------- - // particle::lattice_gradient lg( the_config ); +#if defined(ENABLE_PLT) + particle::lattice_gradient lg( the_config ); +#else op::fourier_gradient lg( the_config ); +#endif //-------------------------------------------------------------------- std::vector species_list; diff --git a/src/main.cc b/src/main.cc index 5afc648..140e588 100644 --- a/src/main.cc +++ b/src/main.cc @@ -67,15 +67,26 @@ int main( int argc, char** argv ) // Ascii ART logo. generated via http://patorjk.com/software/taag/#p=display&f=Nancyj&t=monofonIC csoca::ilog << "\n" - << " The unigrid version of MUSIC-2 .8888b dP a88888b. \n" - << " 88 \" 88 d8\' `88 \n" - << " 88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa .d8888b. 88d888b. 88 88 \n" - << " 88\'`88\'`88 88\' `88 88\' `88 88\' `88 88 88\' `88 88\' `88 88 88 \n" - << " 88 88 88 88. .88 88 88 88. .88 88 88. .88 88 88 88 Y8. .88 \n" - << " dP dP dP `88888P\' dP dP `88888P\' dP `88888P\' dP dP dP Y88888P\' \n" << std::endl - << "Build was compiled on " << __DATE__ << " at " << __TIME__ << std::endl - << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl - << "-------------------------------------------------------------------------------\n" << std::endl; + << " The unigrid version of MUSIC-2 .8888b dP a88888b. \n" + << " 88 \" 88 d8\' `88 \n" + << " 88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa .d8888b. 88d888b. 88 88 \n" + << " 88\'`88\'`88 88\' `88 88\' `88 88\' `88 88 88\' `88 88\' `88 88 88 \n" + << " 88 88 88 88. .88 88 88 88. .88 88 88. .88 88 88 88 Y8. .88 \n" + << " dP dP dP `88888P\' dP dP `88888P\' dP `88888P\' dP dP dP Y88888P\' \n" << std::endl; + + // Compilation CMake configuration, time etc info: + csoca::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " << __DATE__ << std::endl; + + // git and versioning info: + csoca::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl; + + csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + csoca::ilog << "Compile time options : " << std::endl; + csoca::ilog << " Precision : " << CMAKE_PRECISION_STR << std::endl; + csoca::ilog << " Convolutions : " << CMAKE_CONVOLVER_STR << std::endl; + csoca::ilog << " PLT : " << CMAKE_PLT_STR << std::endl; + csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + //------------------------------------------------------------------------------ // Parse command line options diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index e358fce..484c3d7 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -18,9 +18,6 @@ #include #include -#include -#include - class transfer_CLASS_plugin : public TransferFunction_plugin { From b99597d300931b7de6b599aa15da388e25662690 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 3 Apr 2020 00:54:10 +0200 Subject: [PATCH 096/130] updates fo cmake template header, forgotten in previous commit --- include/cmake_config.hh.in | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/include/cmake_config.hh.in b/include/cmake_config.hh.in index b280f44..db4b9a8 100644 --- a/include/cmake_config.hh.in +++ b/include/cmake_config.hh.in @@ -1,12 +1,25 @@ #pragma once -#define USE_PRECISION_${CODE_PRECISION} constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}"; +#define USE_PRECISION_${CODE_PRECISION} #if defined(USE_PRECISION_FLOAT) constexpr char CMAKE_PRECISION_STR[] = "single"; #elif defined(USE_PRECISION_DOUBLE) constexpr char CMAKE_PRECISION_STR[] = "double"; #elif defined(USE_PRECISION_LONGDOUBLE) constexpr char CMAKE_PRECISION_STR[] = "long double"; -#endif \ No newline at end of file +#endif + +#define USE_CONVOLVER_${CONVOLVER_TYPE} +#if defined(USE_CONVOLVER_ORSZAG) + constexpr char CMAKE_CONVOLVER_STR[] = "Orszag3/2"; +#elif defined(USE_CONVOLVER_NAIVE) + constexpr char CMAKE_CONVOLVER_STR[] = "Aliased"; +#endif + +#if defined(ENABLE_PLT) + constexpr char CMAKE_PLT_STR[] = "PLT corr. on"; +#else + constexpr char CMAKE_PLT_STR[] = "PLT corr. off"; +#endif \ No newline at end of file From c9fce7f2108dca0b0aa7d9ca395a55afd4a56758 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Fri, 3 Apr 2020 07:04:41 +0200 Subject: [PATCH 097/130] fixed bug that Omega_k was not set --- include/cosmology_parameters.hh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh index 1af692c..c8796ad 100644 --- a/include/cosmology_parameters.hh +++ b/include/cosmology_parameters.hh @@ -38,6 +38,9 @@ struct parameters sqrtpnorm, //!< sqrt of power spectrum normalisation factor vfact; //!< velocity<->displacement conversion factor in Zel'dovich approx. + parameters( const parameters& ) = default; + parameters() = delete; + explicit parameters(ConfigFile cf) { H0 = cf.GetValue("cosmology", "H0"); @@ -77,6 +80,7 @@ struct parameters #if 1 // assume zero curvature, take difference from dark energy Omega_DE += 1.0 - Omega_m - Omega_DE - Omega_r; + Omega_k = 0.0; #else // allow for curvature Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r; @@ -99,8 +103,5 @@ struct parameters vfact = 0.0; } - parameters(void) - { - } }; } // namespace cosmology \ No newline at end of file From 734948c2a1bf5fbeb0ad6becf3ed045abc60471f Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 01:24:05 +0200 Subject: [PATCH 098/130] minor fixes --- include/cosmology_calculator.hh | 9 ++++++--- include/cosmology_parameters.hh | 21 ++++++++++++--------- include/interpolate.hh | 8 +++++--- src/plugins/transfer_CLASS.cc | 1 + 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index e7d92f9..49ebc62 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -35,7 +35,7 @@ public: std::unique_ptr transfer_function_; private: - static constexpr double REL_PRECISION = 1e-9; + static constexpr double REL_PRECISION = 1e-10; interpolated_function_1d D_of_a_, f_of_a_, a_of_D_; double Dnow_, Dplus_start_, astart_; @@ -123,6 +123,8 @@ private: } public: + calculator() = delete; + calculator(const calculator& c) = delete; //! constructor for a cosmology calculator object /*! * @param acosmo a cosmological parameters structure @@ -149,7 +151,8 @@ public: cosmo_param_.pnorm = this->compute_pnorm_from_sigma8(); else{ cosmo_param_.pnorm = 1.0; - csoca::ilog << "Measured sigma_8 for given PS normalisation is " << this->compute_sigma8() << std::endl; + auto sigma8 = this->compute_sigma8(); + csoca::ilog << "Measured sigma_8 for given PS normalisation is " << sigma8 << std::endl; } cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm); @@ -256,7 +259,7 @@ public: */ real_t get_vfact(real_t a) const noexcept { - return a * H_of_a(a) / cosmo_param_.h * this->get_f(a); + return f_of_a_(a) * a * H_of_a(a) / cosmo_param_.h; } //! Integrand for the sigma_8 normalization of the power spectrum diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh index c8796ad..6a19043 100644 --- a/include/cosmology_parameters.hh +++ b/include/cosmology_parameters.hh @@ -38,9 +38,10 @@ struct parameters sqrtpnorm, //!< sqrt of power spectrum normalisation factor vfact; //!< velocity<->displacement conversion factor in Zel'dovich approx. - parameters( const parameters& ) = default; parameters() = delete; - + + parameters( const parameters& ) = default; + explicit parameters(ConfigFile cf) { H0 = cf.GetValue("cosmology", "H0"); @@ -73,10 +74,6 @@ struct parameters { Omega_r = 0.0; } - else - { - csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally. Make sure your sim code supports this..." << std::endl; - } #if 1 // assume zero curvature, take difference from dark energy Omega_DE += 1.0 - Omega_m - Omega_DE - Omega_r; @@ -86,6 +83,10 @@ struct parameters Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r; #endif + dplus = 0.0; + pnorm = 0.0; + vfact = 0.0; + csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; csoca::ilog << "Cosmological parameters are: " << std::endl; csoca::ilog << " H0 = " << std::setw(16) << H0 << "sigma_8 = " << std::setw(16) << sigma8 << std::endl; @@ -98,9 +99,11 @@ struct parameters csoca::ilog << " Omega_DE = " << std::setw(16) << Omega_DE << "nspect = " << std::setw(16) << nspect << std::endl; csoca::ilog << " w0 = " << std::setw(16) << w_0 << "w_a = " << std::setw(16) << w_a << std::endl; - dplus = 0.0; - pnorm = 0.0; - vfact = 0.0; + if( Omega_r > 0.0 ) + { + csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally."<< std::endl; + csoca::wlog << "Make sure your sim code supports this..." << std::endl; + } } }; diff --git a/include/interpolate.hh b/include/interpolate.hh index cb0ea50..41fe8d4 100644 --- a/include/interpolate.hh +++ b/include/interpolate.hh @@ -39,11 +39,12 @@ public: void set_data(const std::vector &data_x, const std::vector &data_y) { - assert(data_x_.size() == data_y_.size()); - assert(!(logx & periodic)); - data_x_ = data_x; data_y_ = data_y; + + assert(data_x_.size() == data_y_.size()); + assert(data_x_.size() > 5); + assert(!(logx & periodic)); if (logx) for (auto &d : data_x_) d = std::log(d); if (logy) for (auto &d : data_y_) d = std::log(d); @@ -59,6 +60,7 @@ public: double operator()(double x) const noexcept { + assert( isinit_ && !(logx&&x<=0.0) ); double xa = logx ? std::log(x) : x; double y(gsl_spline_eval(gsl_sp_, xa, gsl_ia_)); return logy ? std::exp(y) : y; diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 484c3d7..281d381 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -50,6 +50,7 @@ private: add_class_parameter("z_max_pk", std::max(zstart_, ztarget_) * 1.2); // use 1.2 as safety add_class_parameter("P_k_max_h/Mpc", kmax_); add_class_parameter("output", "dTk,vTk"); + add_class_parameter("extra metric transfer functions","yes"); // add_class_parameter("lensing", "no"); //--- choose gauge ------------------------------------------------ From 4432fbe8fc3d15c07723036dabf53005123594e1 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 06:07:12 +0200 Subject: [PATCH 099/130] added c compiler flags for build types --- CMakeLists.txt | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fedf66..c8cf314 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,9 +6,15 @@ project(monofonIC C CXX) #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -Wall -pedantic" CACHE STRING "Flags used by the compiler during Release builds." FORCE) set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -march=native -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE) -set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE) +set(CMAKE_CXX_FLAGS_DEBUG "-g -O1 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE) set(CMAKE_CXX_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address " CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE) set(CMAKE_CXX_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE) +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "Flags used by the compiler during Release builds." FORCE) +set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE) +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING "Flags used by the compiler during Debug builds." FORCE) +set(CMAKE_C_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUGSANADD}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE) +set(CMAKE_C_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUGSANUNDEF}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE) + set(default_build_type "Release") if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) @@ -19,7 +25,9 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo" "DebugSanAdd" "DebugSanUndef") endif() -mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT) +mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF) +mark_as_advanced(CMAKE_C_FLAGS_DEBUGSANADD CMAKE_C_FLAGS_DEBUGSANUNDEF) +mark_as_advanced(CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT) ######################################################################################################################## From 8c24becc92c73e5381965c29bd3a4692875f841a Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 14:22:59 +0200 Subject: [PATCH 100/130] forgotten file in class.cmake when compiling with Makefile --- external/class.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/external/class.cmake b/external/class.cmake index aead59b..a2e5057 100644 --- a/external/class.cmake +++ b/external/class.cmake @@ -32,6 +32,7 @@ if(ENABLE_CLASS) ${CMAKE_CURRENT_LIST_DIR}/class/build/history.o ${CMAKE_CURRENT_LIST_DIR}/class/build/hydrogen.o ${CMAKE_CURRENT_LIST_DIR}/class/build/hyperspherical.o + ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.o ${CMAKE_CURRENT_LIST_DIR}/class/build/hyrectools.o ${CMAKE_CURRENT_LIST_DIR}/class/build/input.o ${CMAKE_CURRENT_LIST_DIR}/class/build/lensing.o From 23155153058e5157204de0c01bdc47307a3caee3 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 20:27:51 +0200 Subject: [PATCH 101/130] renaming (csoca->music) --- external/class | 2 +- include/config_file.hh | 20 +++--- include/convolution.hh | 8 +-- include/cosmology_calculator.hh | 14 ++--- include/cosmology_parameters.hh | 20 +++--- include/logger.hh | 4 +- include/particle_plt.hh | 8 +-- src/grid_fft.cc | 30 ++++----- src/ic_generator.cc | 64 +++++++++---------- src/logger.cc | 4 +- src/main.cc | 66 ++++++++++---------- src/old/output_gadget2___original.cc | 62 +++++++++--------- src/output_plugin.cc | 12 ++-- src/plugins/output_arepo.cc | 2 +- src/plugins/output_gadget2.cc | 2 +- src/plugins/output_gadget_hdf5.cc | 43 +++++++------ src/plugins/output_generic.cc | 2 +- src/plugins/output_grafic2.cc | 6 +- src/plugins/random_music.cc | 10 +-- src/plugins/random_music_wnoise_generator.cc | 38 +++++------ src/plugins/random_music_wnoise_generator.hh | 6 +- src/plugins/transfer_CAMB_file.cc | 8 +-- src/plugins/transfer_CLASS.cc | 8 +-- src/random_plugin.cc | 12 ++-- src/testing.cc | 2 +- src/transfer_function_plugin.cc | 12 ++-- 26 files changed, 234 insertions(+), 231 deletions(-) diff --git a/external/class b/external/class index 083efeb..52bc312 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit 083efeb043fca85418c1ea02f062be111b970b28 +Subproject commit 52bc3126fca4415c4f541d47d43ffdb9763e0464 diff --git a/include/config_file.hh b/include/config_file.hh index b0d6401..4b6f1fc 100644 --- a/include/config_file.hh +++ b/include/config_file.hh @@ -66,7 +66,7 @@ public: if (!ss.eof()) { //.. conversion error - csoca::elog << "Error: conversion of \'" << ival << "\' failed." + music::elog << "Error: conversion of \'" << ival << "\' failed." << std::endl; throw ErrInvalidConversion(std::string("invalid conversion to ") + typeid(out_value).name() + '.'); @@ -80,7 +80,7 @@ public: std::ifstream file(FileName.c_str()); if (!file.is_open()){ - csoca::elog << "Could not open config file \'" << FileName << "\'." << std::endl; + music::elog << "Could not open config file \'" << FileName << "\'." << std::endl; throw std::runtime_error( std::string("Error: Could not open config file \'") + FileName + std::string("\'")); @@ -117,19 +117,19 @@ public: if ((size_t)posEqual == std::string::npos && (name.size() != 0 || value.size() != 0)) { - csoca::wlog << "Ignoring non-assignment in " << FileName << ":" + music::wlog << "Ignoring non-assignment in " << FileName << ":" << m_iLine << std::endl; continue; } if (name.length() == 0 && value.size() != 0) { - csoca::wlog << "Ignoring assignment missing entry name in " + music::wlog << "Ignoring assignment missing entry name in " << FileName << ":" << m_iLine << std::endl; continue; } if (value.length() == 0 && name.size() != 0) { - csoca::wlog << "Empty entry will be ignored in " << FileName << ":" + music::wlog << "Empty entry will be ignored in " << FileName << ":" << m_iLine << std::endl; continue; } @@ -139,7 +139,7 @@ public: //.. add key/value pair to hash table .. if (m_Items.find(inSection + '/' + name) != m_Items.end()) { - csoca::wlog << "Redeclaration overwrites previous value in " + music::wlog << "Redeclaration overwrites previous value in " << FileName << ":" << m_iLine << std::endl; } @@ -232,7 +232,7 @@ public: } catch (ErrItemNotFound& e) { - csoca::elog << e.what() << std::endl; + music::elog << e.what() << std::endl; throw; } return r; @@ -282,11 +282,11 @@ public: } void LogDump(void) { - csoca::ilog << "List of all configuration options:" << std::endl; + music::ilog << "List of all configuration options:" << std::endl; std::map::const_iterator i = m_Items.begin(); while (i != m_Items.end()) { if (i->second.length() > 0) - csoca::ilog << std::setw(28) << i->first << " = " << i->second + music::ilog << std::setw(28) << i->first << " = " << i->second << std::endl; ++i; } @@ -330,7 +330,7 @@ inline bool ConfigFile::GetValue(std::string const &strSection, return true; if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0") return false; - csoca::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl; + music::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl; throw ErrIllegalIdentifier(std::string("Illegal identifier \'") + r1 + std::string("\' in \'") + strEntry + std::string("\'.")); diff --git a/include/convolution.hh b/include/convolution.hh index fc0b9b4..90736b1 100644 --- a/include/convolution.hh +++ b/include/convolution.hh @@ -444,7 +444,7 @@ private: ///////////////////////////////////////////////////////////////////// double tstart = get_wtime(); - csoca::dlog << "[MPI] Started scatter for convolution" << std::endl; + music::dlog << "[MPI] Started scatter for convolution" << std::endl; //... collect offsets @@ -589,7 +589,7 @@ private: // std::cerr << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! <<<<<" // << std::endl; ofs << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! // <<<<<" << std::endl; - csoca::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n", + music::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n", get_wtime() - tstart); #endif /// end of ifdef/ifndef USE_MPI /////////////////////////////////////////////////////////////// @@ -639,7 +639,7 @@ private: double tstart = get_wtime(); - csoca::dlog << "[MPI] Started gather for convolution"; + music::dlog << "[MPI] Started gather for convolution"; MPI_Barrier(MPI_COMM_WORLD); @@ -833,7 +833,7 @@ private: MPI_Barrier(MPI_COMM_WORLD); - csoca::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart); + music::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart); #endif /// end of ifdef/ifndef USE_MPI ////////////////////////////////////////////////////////////// } diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index 49ebc62..04aa2e9 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -57,7 +57,7 @@ private: gsl_set_error_handler(NULL); if (error / result > REL_PRECISION) - csoca::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl; + music::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl; return (real_t)result; } @@ -152,17 +152,17 @@ public: else{ cosmo_param_.pnorm = 1.0; auto sigma8 = this->compute_sigma8(); - csoca::ilog << "Measured sigma_8 for given PS normalisation is " << sigma8 << std::endl; + music::ilog << "Measured sigma_8 for given PS normalisation is " << sigma8 << std::endl; } cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm); - csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" + music::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl; - csoca::ilog << std::setw(32) << std::left << "TF maximum wave number" + music::ilog << std::setw(32) << std::left << "TF maximum wave number" << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl; - // csoca::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.GetValue("setup","zstart")) ) << std::endl; - // csoca::ilog << "pnrom = " << cosmo_param_.pnorm << std::endl; + // music::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.GetValue("setup","zstart")) ) << std::endl; + // music::ilog << "pnrom = " << cosmo_param_.pnorm << std::endl; } ~calculator() @@ -213,7 +213,7 @@ public: #warning Check whether output is at redshift that is indicated! } } - csoca::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl; + music::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl; } const cosmology::parameters &get_parameters(void) const noexcept diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh index 6a19043..7168ec9 100644 --- a/include/cosmology_parameters.hh +++ b/include/cosmology_parameters.hh @@ -87,22 +87,22 @@ struct parameters pnorm = 0.0; vfact = 0.0; - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << "Cosmological parameters are: " << std::endl; - csoca::ilog << " H0 = " << std::setw(16) << H0 << "sigma_8 = " << std::setw(16) << sigma8 << std::endl; - csoca::ilog << " Omega_c = " << std::setw(16) << Omega_m-Omega_b << "Omega_b = " << std::setw(16) << Omega_b << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "Cosmological parameters are: " << std::endl; + music::ilog << " H0 = " << std::setw(16) << H0 << "sigma_8 = " << std::setw(16) << sigma8 << std::endl; + music::ilog << " Omega_c = " << std::setw(16) << Omega_m-Omega_b << "Omega_b = " << std::setw(16) << Omega_b << std::endl; if (!cf.GetValueSafe("cosmology", "ZeroRadiation", false)){ - csoca::ilog << " Omega_g = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl; + music::ilog << " Omega_g = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl; }else{ - csoca::ilog << " Omega_r = " << std::setw(16) << Omega_r << std::endl; + music::ilog << " Omega_r = " << std::setw(16) << Omega_r << std::endl; } - csoca::ilog << " Omega_DE = " << std::setw(16) << Omega_DE << "nspect = " << std::setw(16) << nspect << std::endl; - csoca::ilog << " w0 = " << std::setw(16) << w_0 << "w_a = " << std::setw(16) << w_a << std::endl; + music::ilog << " Omega_DE = " << std::setw(16) << Omega_DE << "nspect = " << std::setw(16) << nspect << std::endl; + music::ilog << " w0 = " << std::setw(16) << w_0 << "w_a = " << std::setw(16) << w_a << std::endl; if( Omega_r > 0.0 ) { - csoca::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally."<< std::endl; - csoca::wlog << "Make sure your sim code supports this..." << std::endl; + music::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally."<< std::endl; + music::wlog << "Make sure your sim code supports this..." << std::endl; } } diff --git a/include/logger.hh b/include/logger.hh index 41fc287..6c86fd0 100644 --- a/include/logger.hh +++ b/include/logger.hh @@ -6,7 +6,7 @@ #include #include -namespace csoca { +namespace music { enum LogLevel : int { Off = 0, @@ -132,4 +132,4 @@ extern LogStream wlog; extern LogStream ilog; extern LogStream dlog; -} // namespace csoca +} // namespace music diff --git a/include/particle_plt.hh b/include/particle_plt.hh index a6fc1ad..5346955 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -508,7 +508,7 @@ public: grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}) { - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; std::string lattice_str = the_config.GetValueSafe("setup","ParticleLoad","sc"); const lattice lattice_type = ((lattice_str=="bcc")? lattice_bcc @@ -516,15 +516,15 @@ public: : ((lattice_str=="rsc")? lattice_rsc : lattice_sc))); - csoca::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl; + music::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl; double wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; init_D( lattice_type ); // init_D__old(); - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; } inline ccomplex_t gradient( const int idim, std::array ijk ) const diff --git a/src/grid_fft.cc b/src/grid_fft.cc index 2b595b8..e925dc5 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -9,7 +9,7 @@ void Grid_FFT::Setup(void) { ntot_ = (n_[2] + 2) * n_[1] * n_[0]; - csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); + music::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); if (typeid(data_t) == typeid(real_t)) { data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(real_t))); @@ -28,7 +28,7 @@ void Grid_FFT::Setup(void) } else { - csoca::elog.Print("invalid data type in Grid_FFT::setup_fft_interface\n"); + music::elog.Print("invalid data type in Grid_FFT::setup_fft_interface\n"); } fft_norm_fac_ = 1.0 / std::sqrt((real_t)((size_t)n_[0] * (real_t)n_[1] * (real_t)n_[2])); @@ -105,11 +105,11 @@ void Grid_FFT::Setup(void) } else { - csoca::elog.Print("unknown data type in Grid_FFT::setup_fft_interface\n"); + music::elog.Print("unknown data type in Grid_FFT::setup_fft_interface\n"); abort(); } - csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); + music::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); fft_norm_fac_ = 1.0 / sqrt((real_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]); @@ -151,7 +151,7 @@ void Grid_FFT::Setup(void) sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension } #else - csoca::flog << "MPI is required for distributed FFT arrays!" << std::endl; + music::flog << "MPI is required for distributed FFT arrays!" << std::endl; throw std::runtime_error("MPI is required for distributed FFT arrays!"); #endif //// of #ifdef #else USE_MPI //////////////////////////////////////////////////////////////////////////////////// } @@ -178,13 +178,13 @@ void Grid_FFT::FourierTransformForward(bool do_transform) if (do_transform) { double wtime = get_wtime(); - csoca::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]); + music::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]); FFTW_API(execute) (plan_); this->ApplyNorm(); wtime = get_wtime() - wtime; - csoca::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime); + music::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime); } sizes_[0] = local_1_size_; @@ -209,14 +209,14 @@ void Grid_FFT::FourierTransformBackward(bool do_transform) //............................. if (do_transform) { - csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]); + music::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]); double wtime = get_wtime(); FFTW_API(execute)(iplan_); this->ApplyNorm(); wtime = get_wtime() - wtime; - csoca::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime); + music::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime); } sizes_[0] = local_0_size_; sizes_[1] = n_[1]; @@ -285,7 +285,7 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, { if (bdistributed) { - csoca::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl; + music::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl; abort(); } @@ -311,7 +311,7 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, //... dataset did not exist or was empty if (HDF_DatasetID < 0) { - csoca::elog << "Dataset \'" << ObjName.c_str() << "\' does not exist or is empty." << std::endl; + music::elog << "Dataset \'" << ObjName.c_str() << "\' does not exist or is empty." << std::endl; H5Fclose(HDF_FileID); abort(); } @@ -336,7 +336,7 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, if (Data.capacity() < HDF_StorageSize) { - csoca::elog << "Not enough memory to store all data in HDFReadDataset!" << std::endl; + music::elog << "Not enough memory to store all data in HDFReadDataset!" << std::endl; H5Sclose(HDF_DataspaceID); H5Dclose(HDF_DatasetID); H5Fclose(HDF_FileID); @@ -348,7 +348,7 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, if (Data.size() != HDF_StorageSize) { - csoca::elog << "Something went wrong while reading!" << std::endl; + music::elog << "Something went wrong while reading!" << std::endl; H5Sclose(HDF_DataspaceID); H5Dclose(HDF_DatasetID); H5Fclose(HDF_FileID); @@ -360,7 +360,7 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, H5Fclose(HDF_FileID); assert(dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2]); - csoca::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl; + music::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl; for (size_t i = 0; i < 3; ++i) this->n_[i] = dimsize[i]; @@ -390,7 +390,7 @@ void Grid_FFT::Read_from_HDF5(const std::string Filename, sum1 /= Data.size(); sum2 /= Data.size(); auto stdw = std::sqrt(sum2 - sum1 * sum1); - csoca::ilog << "Constraint field has =" << sum1 << ", -^2=" << stdw << std::endl; + music::ilog << "Constraint field has =" << sum1 << ", -^2=" << stdw << std::endl; #pragma omp parallel for reduction(+ : sum1, sum2) for (size_t i = 0; i < size(0); ++i) diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 0a88054..4964a4d 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -97,7 +97,7 @@ int Run( ConfigFile& the_config ) | the_config.ContainsKey("cosmology", "LSS_aniso_ly") | the_config.ContainsKey("cosmology", "LSS_aniso_lz") )) { - csoca::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl; + music::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl; bAddExternalTides = false; } // Anisotropy parameters for beyond box tidal field @@ -108,7 +108,7 @@ int Run( ConfigFile& the_config ) }; if( std::abs(lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2]) > 1e-10 ){ - csoca::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl; + music::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl; auto tr_l_3 = (lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2])/3.0; lss_aniso_lambda[0] -= tr_l_3; lss_aniso_lambda[1] -= tr_l_3; @@ -122,10 +122,10 @@ int Run( ConfigFile& the_config ) the_cosmo_calc->write_powerspectrum(astart, "input_powerspec.txt" ); - //csoca::ilog << "-----------------------------------------------------------------------------" << std::endl; + //music::ilog << "-----------------------------------------------------------------------------" << std::endl; // if( bSymplecticPT && LPTorder!=2 ){ - // csoca::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl; + // music::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl; // LPTorder = 2; // } @@ -180,8 +180,8 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- // Fill the grid with a Gaussian white noise field //-------------------------------------------------------------------- - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << "Generating white noise field...." << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "Generating white noise field...." << std::endl; the_random_number_generator->Fill_Grid(wnoise); @@ -241,11 +241,11 @@ int Run( ConfigFile& the_config ) } } - // csoca::ilog << " ... old field: re =" << rs1/count << " -^2=" << rs2/count-rs1*rs1/count/count << std::endl; - // csoca::ilog << " ... old field: im =" << is1/count << " -^2=" << is2/count-is1*is1/count/count << std::endl; - // csoca::ilog << " ... new field: re =" << nrs1/count << " -^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl; - // csoca::ilog << " ... new field: im =" << nis1/count << " -^2=" << nis2/count-nis1*nis1/count/count << std::endl; - csoca::ilog << "White noise field large-scale modes overwritten with external field." << std::endl; + // music::ilog << " ... old field: re =" << rs1/count << " -^2=" << rs2/count-rs1*rs1/count/count << std::endl; + // music::ilog << " ... old field: im =" << is1/count << " -^2=" << is2/count-is1*is1/count/count << std::endl; + // music::ilog << " ... new field: re =" << nrs1/count << " -^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl; + // music::ilog << " ... new field: im =" << nis1/count << " -^2=" << nis2/count-nis1*nis1/count/count << std::endl; + music::ilog << "White noise field large-scale modes overwritten with external field." << std::endl; } //-------------------------------------------------------------------- @@ -293,11 +293,11 @@ int Run( ConfigFile& the_config ) //====================================================================== // phi = - delta / k^2 - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << "Generating white noise field...." << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "Generating white noise field...." << std::endl; double wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush; + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush; phi.FourierTransformForward(false); phi.assign_function_of_grids_kdep([&](auto k, auto wn) { @@ -308,7 +308,7 @@ int Run( ConfigFile& the_config ) phi.zero_DC_mode(); - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; //====================================================================== //... compute 2LPT displacement potential .... @@ -316,7 +316,7 @@ int Run( ConfigFile& the_config ) if (LPTorder > 1) { wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush; + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush; phi2.FourierTransformForward(false); Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2)); Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2)); @@ -334,12 +334,12 @@ int Run( ConfigFile& the_config ) } phi2.apply_InverseLaplacian(); - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; if (bAddExternalTides) { - csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl; - csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl; + music::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl; + music::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl; } } @@ -350,7 +350,7 @@ int Run( ConfigFile& the_config ) { //... 3a term ... wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush; + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush; phi3a.FourierTransformForward(false); Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a)); Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::add_twice_to(phi3a)); @@ -358,11 +358,11 @@ int Run( ConfigFile& the_config ) Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a)); Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a)); phi3a.apply_InverseLaplacian(); - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; //... 3b term ... wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush; + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush; phi3b.FourierTransformForward(false); Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b)); Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b)); @@ -372,11 +372,11 @@ int Run( ConfigFile& the_config ) Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::subtract_twice_from(phi3b)); phi3b.apply_InverseLaplacian(); phi3b *= 0.5; // factor 1/2 from definition of phi(3b)! - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; //... transversal term ... wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush; + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush; for (int idim = 0; idim < 3; ++idim) { // cyclic rotations of indices @@ -388,13 +388,13 @@ int Run( ConfigFile& the_config ) Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim])); A3[idim]->apply_InverseLaplacian(); } - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; } // if( bSymplecticPT ){ // //... transversal term ... // wtime = get_wtime(); - // csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush; + // music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush; // for( int idim=0; idim<3; ++idim ){ // // cyclic rotations of indices // A3[idim]->FourierTransformForward(false); @@ -402,7 +402,7 @@ int Run( ConfigFile& the_config ) // Conv.convolve_Gradient_and_Hessian( phi, {1}, phi2, {idim,1}, add_to(*A3[idim]) ); // Conv.convolve_Gradient_and_Hessian( phi, {2}, phi2, {idim,2}, add_to(*A3[idim]) ); // } - // csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + // music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; // } @@ -415,7 +415,7 @@ int Run( ConfigFile& the_config ) (*A3[1]) *= g3c; (*A3[2]) *= g3c; - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; /////////////////////////////////////////////////////////////////////// // we store the densities here if we compute them @@ -426,7 +426,7 @@ int Run( ConfigFile& the_config ) if (testing != "none") { - csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl; + music::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl; if (testing == "potentials_and_densities"){ testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3); } @@ -437,14 +437,14 @@ int Run( ConfigFile& the_config ) testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); } else{ - csoca::flog << "unknown test '" << testing << "'" << std::endl; + music::flog << "unknown test '" << testing << "'" << std::endl; std::abort(); } } for( auto& this_species : species_list ) { - csoca::ilog << std::endl + music::ilog << std::endl << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl; { @@ -468,7 +468,7 @@ int Run( ConfigFile& the_config ) real_t std_phi1 = phi.std(); const real_t hbar = 2.0 * M_PI/ngrid * (2*std_phi1/Dplus0); //3sigma, but this might rather depend on gradients of phi... - csoca::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl; + music::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl; if( LPTorder == 1 ){ psi.assign_function_of_grids_r([hbar,Dplus0]( real_t pphi ){ diff --git a/src/logger.cc b/src/logger.cc index 2b93b89..eb07442 100644 --- a/src/logger.cc +++ b/src/logger.cc @@ -1,6 +1,6 @@ #include -namespace csoca { +namespace music { std::ofstream Logger::output_file_; LogLevel Logger::log_level_ = LogLevel::Off; @@ -39,4 +39,4 @@ LogStream wlog(glogger, LogLevel::Warning); LogStream ilog(glogger, LogLevel::Info); LogStream dlog(glogger, LogLevel::Debug); -} // namespace csoca +} // namespace music diff --git a/src/main.cc b/src/main.cc index 140e588..c16690a 100644 --- a/src/main.cc +++ b/src/main.cc @@ -38,14 +38,14 @@ void handle_eptr(std::exception_ptr eptr) // passing by value is ok std::rethrow_exception(eptr); } } catch(const std::exception& e) { - csoca::elog << "This happened: \"" << e.what() << "\"" << std::endl; + music::elog << "This happened: \"" << e.what() << "\"" << std::endl; } } int main( int argc, char** argv ) { - csoca::Logger::SetLevel(csoca::LogLevel::Info); - // csoca::Logger::SetLevel(csoca::LogLevel::Debug); + music::Logger::SetLevel(music::LogLevel::Info); + // music::Logger::SetLevel(music::LogLevel::Debug); //------------------------------------------------------------------------------ // initialise MPI @@ -61,12 +61,12 @@ int main( int argc, char** argv ) // set up lower logging levels for other tasks if( CONFIG::MPI_task_rank!=0 ) { - csoca::Logger::SetLevel(csoca::LogLevel::Error); + music::Logger::SetLevel(music::LogLevel::Error); } #endif // Ascii ART logo. generated via http://patorjk.com/software/taag/#p=display&f=Nancyj&t=monofonIC - csoca::ilog << "\n" + music::ilog << "\n" << " The unigrid version of MUSIC-2 .8888b dP a88888b. \n" << " 88 \" 88 d8\' `88 \n" << " 88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa .d8888b. 88d888b. 88 88 \n" @@ -75,17 +75,17 @@ int main( int argc, char** argv ) << " dP dP dP `88888P\' dP dP `88888P\' dP `88888P\' dP dP dP Y88888P\' \n" << std::endl; // Compilation CMake configuration, time etc info: - csoca::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " << __DATE__ << std::endl; + music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " << __DATE__ << std::endl; // git and versioning info: - csoca::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl; + music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl; - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << "Compile time options : " << std::endl; - csoca::ilog << " Precision : " << CMAKE_PRECISION_STR << std::endl; - csoca::ilog << " Convolutions : " << CMAKE_CONVOLVER_STR << std::endl; - csoca::ilog << " PLT : " << CMAKE_PLT_STR << std::endl; - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "Compile time options : " << std::endl; + music::ilog << " Precision : " << CMAKE_PRECISION_STR << std::endl; + music::ilog << " Convolutions : " << CMAKE_CONVOLVER_STR << std::endl; + music::ilog << " PLT : " << CMAKE_PLT_STR << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; //------------------------------------------------------------------------------ @@ -99,7 +99,7 @@ int main( int argc, char** argv ) print_RNG_plugins(); print_output_plugins(); - csoca::elog << "In order to run, you need to specify a parameter file!\n" << std::endl; + music::elog << "In order to run, you need to specify a parameter file!\n" << std::endl; exit(0); } @@ -144,10 +144,10 @@ int main( int argc, char** argv ) // Write code configuration to screen //------------------------------------------------------------------------------ // hardware related infos - csoca::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl; + music::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl; // multi-threading related infos - csoca::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl; + music::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl; // memory related infos SystemStat::Memory mem; @@ -164,34 +164,34 @@ int main( int argc, char** argv ) MPI_Allreduce(&minupmem,&temp,1,MPI_UNSIGNED,MPI_MIN,MPI_COMM_WORLD); minupmem = temp; MPI_Allreduce(&maxupmem,&temp,1,MPI_UNSIGNED,MPI_MAX,MPI_COMM_WORLD); maxupmem = temp; #endif - csoca::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " << "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl; + music::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl; + music::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl; + music::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " << "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl; // MPI related infos #if defined(USE_MPI) - csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl; - csoca::dlog << std::setw(32) << std::left << "MPI version" << " : " << MPI::get_version() << std::endl; + music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl; + music::dlog << std::setw(32) << std::left << "MPI version" << " : " << MPI::get_version() << std::endl; #else - csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl; + music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl; #endif - csoca::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl; + music::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl; // Kernel related infos SystemStat::Kernel kern; auto kinfo = kern.get_kernel_info(); - csoca::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl; + music::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl; // FFTW related infos - csoca::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl; - csoca::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl; - csoca::ilog << std::setw(32) << std::left << "FFTW mode" << " : "; + music::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl; + music::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl; + music::ilog << std::setw(32) << std::left << "FFTW mode" << " : "; #if defined(FFTW_MODE_PATIENT) - csoca::ilog << "FFTW_PATIENT" << std::endl; + music::ilog << "FFTW_PATIENT" << std::endl; #elif defined(FFTW_MODE_MEASURE) - csoca::ilog << "FFTW_MEASURE" << std::endl; + music::ilog << "FFTW_MEASURE" << std::endl; #else - csoca::ilog << "FFTW_ESTIMATE" << std::endl; + music::ilog << "FFTW_ESTIMATE" << std::endl; #endif //-------------------------------------------------------------------- // Initialise plug-ins @@ -201,7 +201,7 @@ int main( int argc, char** argv ) ic_generator::Initialise( the_config ); }catch(...){ handle_eptr( std::current_exception() ); - csoca::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl; + music::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl; #if defined(USE_MPI) MPI_Finalize(); #endif @@ -221,8 +221,8 @@ int main( int argc, char** argv ) MPI_Finalize(); #endif - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << "Done. Have a nice day!\n" << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "Done. Have a nice day!\n" << std::endl; return 0; } diff --git a/src/old/output_gadget2___original.cc b/src/old/output_gadget2___original.cc index b5cbf41..b8dfa62 100644 --- a/src/old/output_gadget2___original.cc +++ b/src/old/output_gadget2___original.cc @@ -142,8 +142,8 @@ protected: ifs.read((char *)&blk, sizeof(size_t)); if (blk != npart * (size_t)sizeof(T_store)) { - csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); - csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * (size_t)sizeof(T_store), blk); + music::elog.Print("Internal consistency error in gadget2 output plug-in"); + music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * (size_t)sizeof(T_store), blk); throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); } ifs.seekg(offset, std::ios::cur); @@ -161,7 +161,7 @@ protected: if (!this->good()) { - csoca::elog.Print("Could not open buffer file in gadget2 output plug-in"); + music::elog.Print("Could not open buffer file in gadget2 output plug-in"); throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); } @@ -169,8 +169,8 @@ protected: if (blk != npart * sizeof(T_store)) { - csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); - csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); + music::elog.Print("Internal consistency error in gadget2 output plug-in"); + music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); } @@ -188,7 +188,7 @@ protected: if (!this->good()) { - csoca::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); + music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); } @@ -196,8 +196,8 @@ protected: if (blk != npart * sizeof(T_store)) { - csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); - csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); + music::elog.Print("Internal consistency error in gadget2 output plug-in"); + music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); } @@ -215,7 +215,7 @@ protected: if (!this->good()) { - csoca::elog.Print("Could not open buffer file in gadget2 output plug-in"); + music::elog.Print("Could not open buffer file in gadget2 output plug-in"); throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); } @@ -223,8 +223,8 @@ protected: if (blk != npart * sizeof(T_store)) { - csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); - csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); + music::elog.Print("Internal consistency error in gadget2 output plug-in"); + music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); } @@ -246,7 +246,7 @@ protected: if (!this->good()) { - csoca::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); + music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); } @@ -254,8 +254,8 @@ protected: if (blk != npart * sizeof(T_store)) { - csoca::elog.Print("Internal consistency error in gadget2 output plug-in"); - csoca::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); + music::elog.Print("Internal consistency error in gadget2 output plug-in"); + music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); } @@ -394,7 +394,7 @@ protected: std::cout << " - Gadget2 : writing " << nptot << " particles to file...\n"; for (int i = 0; i < 6; ++i) if (np_per_type_[i] > 0) - csoca::ilog.Print(" type %d : %12llu [m=%g]", i, np_per_type_[i], header_.mass[i]); + music::ilog.Print(" type %d : %12llu [m=%g]", i, np_per_type_[i], header_.mass[i]); bool bbaryons = np_per_type_[0] > 0; @@ -419,10 +419,10 @@ protected: if (nfiles_ > 1) { - csoca::ilog.Print("Gadget2 : distributing particles to %d files", nfiles_); + music::ilog.Print("Gadget2 : distributing particles to %d files", nfiles_); //<< " " << std::setw(12) << "type 0" << "," << std::setw(12) << "type 1" << "," << std::setw(12) << "type " << bndparticletype_ << std::endl; for (unsigned i = 0; i < nfiles_; ++i) - csoca::ilog.Print(" file %i : %12llu", i, np_tot_per_file[i], header_.mass[i]); + music::ilog.Print(" file %i : %12llu", i, np_tot_per_file[i], header_.mass[i]); } size_t curr_block_buf_size = block_buf_size_; @@ -432,7 +432,7 @@ protected: if (nptot >= 1ul << 32 && !bneed_long_ids) { bneed_long_ids = true; - csoca::wlog.Print("Need long particle IDs, will write 64bit, make sure to enable in Gadget!"); + music::wlog.Print("Need long particle IDs, will write 64bit, make sure to enable in Gadget!"); } for (unsigned ifile = 0; ifile < nfiles_; ++ifile) @@ -700,7 +700,7 @@ protected: static bool bdisplayed = false; if (!bdisplayed) { - csoca::ilog.Print("Gadget2 : set initial gas temperature to %.2f K/mu", Tini / mu); + music::ilog.Print("Gadget2 : set initial gas temperature to %.2f K/mu", Tini / mu); bdisplayed = true; } } @@ -827,7 +827,7 @@ public: shift_halfcell_ = cf.GetValueSafe("output", "gadget_cell_centered", false); //if( nfiles_ < (int)ceil((double)npart/(double)npartmax_) ) - // csoca::wlog.Print("Should use more files."); + // music::wlog.Print("Should use more files."); if (nfiles_ > 1) { @@ -838,7 +838,7 @@ public: ofs_.open(ffname, std::ios::binary | std::ios::trunc); if (!ofs_.good()) { - csoca::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", ffname); + music::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", ffname); throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + std::string(ffname) + "\' for writing!\n"); } ofs_.close(); @@ -849,7 +849,7 @@ public: ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc); if (!ofs_.good()) { - csoca::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", fname_.c_str()); + music::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", fname_.c_str()); throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + fname_ + "\' for writing!\n"); } ofs_.close(); @@ -875,7 +875,7 @@ public: header_.flag_doubleprecision = 1; else { - csoca::elog.Print("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'"); + music::elog.Print("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'"); throw std::runtime_error("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'"); } @@ -896,7 +896,7 @@ public: unit_length_chosen_ = (*mapit).second; else { - csoca::elog.Print("Gadget: length unit \'%s\' unknown in gadget_lunit", lunitstr.c_str()); + music::elog.Print("Gadget: length unit \'%s\' unknown in gadget_lunit", lunitstr.c_str()); throw std::runtime_error("Unknown length unit specified for Gadget output plugin"); } @@ -904,7 +904,7 @@ public: unit_mass_chosen_ = (*mapit).second; else { - csoca::elog.Print("Gadget: mass unit \'%s\' unknown in gadget_munit", munitstr.c_str()); + music::elog.Print("Gadget: mass unit \'%s\' unknown in gadget_munit", munitstr.c_str()); throw std::runtime_error("Unknown mass unit specified for Gadget output plugin"); } @@ -912,7 +912,7 @@ public: unit_vel_chosen_ = (*mapit).second; else { - csoca::elog.Print("Gadget: velocity unit \'%s\' unknown in gadget_vunit", vunitstr.c_str()); + music::elog.Print("Gadget: velocity unit \'%s\' unknown in gadget_vunit", vunitstr.c_str()); throw std::runtime_error("Unknown velocity unit specified for Gadget output plugin"); } @@ -922,14 +922,14 @@ public: kpcunits_ = cf.GetValueSafe("output", "gadget_usekpc", false); if (kpcunits_) unit_length_chosen_ = 1e-3; - csoca::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead."); + music::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead."); } if (cf.ContainsKey("output", "gadget_usemsol")) { msolunits_ = cf.GetValueSafe("output", "gadget_usemsol", false); if (msolunits_) unit_mass_chosen_ = 1e-10; - csoca::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead."); + music::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead."); } //... coarse particle properties... @@ -944,14 +944,14 @@ public: if (bndparticletype_ == 0 || //bndparticletype_ == 1 || bndparticletype_ == 4 || bndparticletype_ > 5) { - csoca::elog.Print("Coarse particles cannot be of Gadget particle type %d in output plugin.", bndparticletype_); + music::elog.Print("Coarse particles cannot be of Gadget particle type %d in output plugin.", bndparticletype_); throw std::runtime_error("Specified illegal Gadget particle type for coarse particles"); } } else { if (cf.GetValueSafe("output", "gadget_coarsetype", 5) != 5) - csoca::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override."); + music::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override."); } //... set time ...................................................... @@ -1056,7 +1056,7 @@ public: if (nwritten != npcoarse) { - csoca::elog.Print("nwritten = %llu != npcoarse = %llu\n", nwritten, npcoarse); + music::elog.Print("nwritten = %llu != npcoarse = %llu\n", nwritten, npcoarse); throw std::runtime_error("Internal consistency error while writing temporary file for masses"); } diff --git a/src/output_plugin.cc b/src/output_plugin.cc index 35664dc..7e287c2 100644 --- a/src/output_plugin.cc +++ b/src/output_plugin.cc @@ -23,14 +23,14 @@ void print_output_plugins() std::map< std::string, output_plugin_creator *>::iterator it; it = m.begin(); - csoca::ilog << "Available output plug-ins:\n"; + music::ilog << "Available output plug-ins:\n"; while( it!=m.end() ) { if( it->second ) - csoca::ilog << "\t\'" << it->first << "\'\n"; + music::ilog << "\t\'" << it->first << "\'\n"; ++it; } - csoca::ilog << std::endl; + music::ilog << std::endl; } std::unique_ptr select_output_plugin( ConfigFile& cf ) @@ -42,13 +42,13 @@ std::unique_ptr select_output_plugin( ConfigFile& cf ) if( !the_output_plugin_creator ) { - csoca::elog << "Error: output plug-in \'" << formatname << "\' not found." << std::endl; + music::elog << "Error: output plug-in \'" << formatname << "\' not found." << std::endl; print_output_plugins(); throw std::runtime_error("Unknown output plug-in"); }else{ - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl; } return std::move(the_output_plugin_creator->create( cf )); diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc index 1af182f..8263060 100644 --- a/src/plugins/output_arepo.cc +++ b/src/plugins/output_arepo.cc @@ -154,7 +154,7 @@ public: HDFWriteGroupAttribute(this_fname_, "Header", "suggested_highressoft", from_value(softening_)); HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gas_Tinit", from_value(Tini_)); - csoca::ilog << "Wrote" << std::endl; + music::ilog << "Wrote" << std::endl; } output_type write_species_as(const cosmo_species &) const { return output_type::particles; } diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc index e7f20e6..ba3a986 100644 --- a/src/plugins/output_gadget2.cc +++ b/src/plugins/output_gadget2.cc @@ -131,7 +131,7 @@ public: uint32_t blocksz; std::ofstream ofs(fname.c_str(), std::ios::binary); - csoca::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl; + music::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl; blocksz = sizeof(header); ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc index c862e41..e6a821b 100644 --- a/src/plugins/output_gadget_hdf5.cc +++ b/src/plugins/output_gadget_hdf5.cc @@ -102,26 +102,29 @@ public: // use destructor to write header post factum ~gadget_hdf5_output_plugin() { - HDFCreateGroup(this_fname_, "Header"); - HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array(header_.npart)); - HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array(header_.mass)); - HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value(header_.time)); - HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value(header_.redshift)); - HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value(header_.flag_sfr)); - HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value(header_.flag_feedback)); - HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array(header_.npartTotal)); - HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value(header_.flag_cooling)); - HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value(header_.num_files)); - HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value(header_.BoxSize)); - HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value(header_.Omega0)); - HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value(header_.OmegaLambda)); - HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value(header_.HubbleParam)); - HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value(header_.flag_stellarage)); - HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value(header_.flag_metals)); - HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array(header_.npartTotalHighWord)); - HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value(header_.flag_entropy_instead_u)); + if (!std::uncaught_exception()) + { + HDFCreateGroup(this_fname_, "Header"); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array(header_.npart)); + HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array(header_.mass)); + HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value(header_.time)); + HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value(header_.redshift)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value(header_.flag_sfr)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value(header_.flag_feedback)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array(header_.npartTotal)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value(header_.flag_cooling)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value(header_.num_files)); + HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value(header_.BoxSize)); + HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value(header_.Omega0)); + HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value(header_.OmegaLambda)); + HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value(header_.HubbleParam)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value(header_.flag_stellarage)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value(header_.flag_metals)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array(header_.npartTotalHighWord)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value(header_.flag_entropy_instead_u)); - csoca::ilog << "Wrote Gadget-HDF5 file(s) to " << this_fname_ << std::endl; + music::ilog << "Wrote Gadget-HDF5 file(s) to " << this_fname_ << std::endl; + } } output_type write_species_as(const cosmo_species &) const { return output_type::particles; } @@ -151,7 +154,7 @@ public: case cosmo_species::dm: return 1; case cosmo_species::baryon: - return 0; + return 2; case cosmo_species::neutrino: return 3; } diff --git a/src/plugins/output_generic.cc b/src/plugins/output_generic.cc index 1a53e84..d96358e 100644 --- a/src/plugins/output_generic.cc +++ b/src/plugins/output_generic.cc @@ -99,7 +99,7 @@ void generic_output_plugin::write_grid_data(const Grid_FFT &g, const cos { std::string field_name = this->get_field_name( s, c ); g.Write_to_HDF5(fname_, field_name); - csoca::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl; + music::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl; } namespace diff --git a/src/plugins/output_grafic2.cc b/src/plugins/output_grafic2.cc index 31e8a04..43eb7cf 100644 --- a/src/plugins/output_grafic2.cc +++ b/src/plugins/output_grafic2.cc @@ -60,7 +60,7 @@ public: if (std::abs(std::pow(2.0, levelmin_) - double(ngrid)) > 1e-4) { - csoca::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl; + music::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl; abort(); } @@ -223,7 +223,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT &g, const cos } // end loop over write_rank - csoca::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl; + music::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl; } void grafic2_output_plugin::write_ramses_namelist(void) const @@ -279,7 +279,7 @@ void grafic2_output_plugin::write_ramses_namelist(void) const << "m_refine=" << 1 + naddref << "*8.,\n" << "/\n"; - csoca::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl; + music::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl; } namespace diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc index a13726f..073a6f9 100644 --- a/src/plugins/random_music.cc +++ b/src/plugins/random_music.cc @@ -56,7 +56,7 @@ public: if (restart_ && !disk_cached_) { - csoca::elog.Print("Cannot restart from mem cached random numbers."); + music::elog.Print("Cannot restart from mem cached random numbers."); throw std::runtime_error("Cannot restart from mem cached random numbers."); } @@ -116,7 +116,7 @@ void RNG_music::parse_random_parameters(void) { if (ltemp <= 0) { - csoca::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr); + music::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr); throw std::runtime_error("Seed values need to be >0"); } rngseeds_.push_back(ltemp); @@ -126,7 +126,7 @@ void RNG_music::parse_random_parameters(void) { rngfnames_.push_back(tempstr); rngseeds_.push_back(-1); - csoca::ilog.Print("Random numbers for level %3d will be read from file.", i); + music::ilog.Print("Random numbers for level %3d will be read from file.", i); } } @@ -160,7 +160,7 @@ void RNG_music::compute_random_numbers(void) //#warning add possibility to read noise from file also here! if (rngfnames_[i].size() > 0) - csoca::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!"); + music::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!"); randc[i] = new rng(*randc[i - 1], ran_cube_size_, rngseeds_[i], true); delete randc[i - 1]; @@ -180,7 +180,7 @@ void RNG_music::compute_random_numbers(void) for (int ilevel = levelmin_seed_ - 1; ilevel >= (int)levelmin_; --ilevel) { if (rngseeds_[ilevel - levelmin_] > 0) - csoca::ilog.Print("Warning: random seed for level %d will be ignored.\n" + music::ilog.Print("Warning: random seed for level %d will be ignored.\n" " consistency requires that it is obtained by restriction from level %d", ilevel, levelmin_seed_); diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc index 68b4649..18e287f 100644 --- a/src/plugins/random_music_wnoise_generator.cc +++ b/src/plugins/random_music_wnoise_generator.cc @@ -11,7 +11,7 @@ template music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx) : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) { - csoca::ilog.Print("Generating random numbers (1) with seed %ld", baseseed); + music::ilog.Print("Generating random numbers (1) with seed %ld", baseseed); initialize(); fill_subvolume(x0, lx); @@ -21,7 +21,7 @@ template music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, bool zeromean) : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) { - csoca::ilog.Print("Generating random numbers (2) with seed %ld", baseseed); + music::ilog.Print("Generating random numbers (2) with seed %ld", baseseed); double mean = 0.0; size_t res_l = res; @@ -31,7 +31,7 @@ music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesiz cubesize_ = res_; if (!musicnoise) - csoca::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo"); + music::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo"); initialize(); @@ -90,7 +90,7 @@ music_wnoise_generator::music_wnoise_generator(unsigned res, std::string rand std::ifstream ifs(randfname.c_str(), std::ios::binary); if (!ifs) { - csoca::elog.Print("Could not open random number file \'%s\'!", randfname.c_str()); + music::elog.Print("Could not open random number file \'%s\'!", randfname.c_str()); throw std::runtime_error(std::string("Could not open random number file \'") + randfname + std::string("\'!")); } @@ -186,7 +186,7 @@ music_wnoise_generator::music_wnoise_generator(unsigned res, std::string rand std::vector in_float; std::vector in_double; - csoca::ilog.Print("Random number file \'%s\'\n contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz); + music::ilog.Print("Random number file \'%s\'\n contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz); long double sum = 0.0, sum2 = 0.0; size_t count = 0; @@ -285,7 +285,7 @@ music_wnoise_generator::music_wnoise_generator(unsigned res, std::string rand mean = sum / count; var = sum2 / count - mean * mean; - csoca::ilog.Print("Random numbers in file have \n mean = %f and var = %f", mean, var); + music::ilog.Print("Random numbers in file have \n mean = %f and var = %f", mean, var); } //... copy construct by averaging down @@ -298,7 +298,7 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat long double sum = 0.0, sum2 = 0.0; size_t count = 0; - csoca::ilog.Print("Generating a coarse white noise field by k-space degrading"); + music::ilog.Print("Generating a coarse white noise field by k-space degrading"); //... initialize properties of container res_ = rc.res_ / 2; cubesize_ = res_; @@ -307,7 +307,7 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat if (sizeof(real_t) != sizeof(T)) { - csoca::elog.Print("type mismatch with real_t in k-space averaging"); + music::elog.Print("type mismatch with real_t in k-space averaging"); throw std::runtime_error("type mismatch with real_t in k-space averaging"); } @@ -405,7 +405,7 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat rmean = sum / count; rvar = sum2 / count - rmean * rmean; - csoca::ilog.Print("Restricted random numbers have\n mean = %f, var = %f", rmean, rvar); + music::ilog.Print("Restricted random numbers have\n mean = %f, var = %f", rmean, rvar); } template @@ -438,7 +438,7 @@ music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, if (kspace) { - csoca::ilog.Print("Generating a constrained random number set with seed %ld\n using coarse mode replacement...", baseseed); + music::ilog.Print("Generating a constrained random number set with seed %ld\n using coarse mode replacement...", baseseed); assert(lx[0] % 2 == 0 && lx[1] % 2 == 0 && lx[2] % 2 == 0); size_t nx = lx[0], ny = lx[1], nz = lx[2], nxc = lx[0] / 2, nyc = lx[1] / 2, nzc = lx[2] / 2; @@ -573,7 +573,7 @@ music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, } else { - csoca::ilog.Print("Generating a constrained random number set with seed %ld\n using Hoffman-Ribak constraints...", baseseed); + music::ilog.Print("Generating a constrained random number set with seed %ld\n using Hoffman-Ribak constraints...", baseseed); double fac = 1.0 / sqrt(8.0); //1./sqrt(8.0); @@ -613,7 +613,7 @@ void music_wnoise_generator::register_cube(int i, int j, int k) rnums_.push_back(NULL); cubemap_[icube] = rnums_.size() - 1; #ifdef DEBUG - csoca::dlog.Print("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]); + music::dlog.Print("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]); #endif } } @@ -637,7 +637,7 @@ double music_wnoise_generator::fill_cube(int i, int j, int k) if (it == cubemap_.end()) { - csoca::elog.Print("Attempt to access non-registered random number cube!"); + music::elog.Print("Attempt to access non-registered random number cube!"); throw std::runtime_error("Attempt to access non-registered random number cube!"); } @@ -674,7 +674,7 @@ void music_wnoise_generator::subtract_from_cube(int i, int j, int k, double v if (it == cubemap_.end()) { - csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k); + music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k); throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::subtract_from_cube"); } @@ -700,7 +700,7 @@ void music_wnoise_generator::free_cube(int i, int j, int k) if (it == cubemap_.end()) { - csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k); + music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k); throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::free_cube"); } @@ -724,7 +724,7 @@ void music_wnoise_generator::initialize(void) cubesize_ = res_; } - csoca::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_); + music::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_); } template @@ -741,8 +741,8 @@ double music_wnoise_generator::fill_subvolume(int *i0, int *n) ncube[2] = (int)(n[2] / cubesize_) + 2; #ifdef DEBUG - csoca::dlog.Print("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]); - csoca::dlog.Print("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]); + music::dlog.Print("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]); + music::dlog.Print("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]); #endif double mean = 0.0; @@ -836,7 +836,7 @@ void music_wnoise_generator::print_allocated(void) if (rnums_[i] != NULL) ncount++; - csoca::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot); + music::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot); } template class music_wnoise_generator; diff --git a/src/plugins/random_music_wnoise_generator.hh b/src/plugins/random_music_wnoise_generator.hh index 5b9cb36..4dd1b37 100644 --- a/src/plugins/random_music_wnoise_generator.hh +++ b/src/plugins/random_music_wnoise_generator.hh @@ -80,7 +80,7 @@ protected: if (it == cubemap_.end()) { - csoca::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k); + music::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k); throw std::runtime_error("attempting to copy data from non-existing RND cube"); } @@ -186,7 +186,7 @@ public: if (it == cubemap_.end()) { - csoca::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k); + music::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k); throw std::runtime_error("attempting to copy data from non-existing RND cube"); } @@ -194,7 +194,7 @@ public: if (rnums_[cubeidx] == NULL) { - csoca::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc); + music::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc); throw std::runtime_error("attempting to access data from non-allocated RND cube"); } diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc index 0f5d5ef..54ec9cb 100644 --- a/src/plugins/transfer_CAMB_file.cc +++ b/src/plugins/transfer_CAMB_file.cc @@ -39,7 +39,7 @@ private: if (MPI::COMM_WORLD.Get_rank() == 0) { #endif - csoca::ilog.Print("Reading tabulated transfer function data from file \n \'%s\'", m_filename_Tk.c_str()); + music::ilog.Print("Reading tabulated transfer function data from file \n \'%s\'", m_filename_Tk.c_str()); std::string line; std::ifstream ifs(m_filename_Tk.c_str()); @@ -89,7 +89,7 @@ private: if (ss.bad() || ss.fail()) { - csoca::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!"); + music::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!"); throw std::runtime_error("Error reading transfer function file \'" + m_filename_Tk + "\'"); } @@ -134,10 +134,10 @@ private: ifs.close(); - csoca::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines); + music::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines); if (m_linbaryoninterp) - csoca::ilog.Print("Using log-lin interpolation for baryons\n (TF is not " + music::ilog.Print("Using log-lin interpolation for baryons\n (TF is not " "positive definite)"); #ifdef WITH_MPI diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 281d381..6b113a5 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -131,13 +131,13 @@ private: zlist << std::max(ztarget_, zstart_) << ", " << std::min(ztarget_, zstart_) << ", 0.0"; add_class_parameter("z_pk", zlist.str()); - csoca::ilog << "Computing transfer function via ClassEngine..." << std::endl; + music::ilog << "Computing transfer function via ClassEngine..." << std::endl; double wtime = get_wtime(); the_ClassEngine_ = std::move(std::make_unique(pars_, false)); wtime = get_wtime() - wtime; - csoca::ilog << "CLASS took " << wtime << " s." << std::endl; + music::ilog << "CLASS took " << wtime << " s." << std::endl; } //! run ClassEngine with parameters set up @@ -196,7 +196,7 @@ public: { this->tf_isnormalised_ = true; tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0)); - csoca::ilog << "Using A_s to normalise the transfer function!" << std::endl; + music::ilog << "Using A_s to normalise the transfer function!" << std::endl; } kmax_ = std::max(20.0, 2.0 * M_PI / lbox * nres / 2 * sqrt(3) * 2.0); // 120% of spatial diagonal, or k=10h Mpc-1 @@ -230,7 +230,7 @@ public: kmin_ = k[0]; kmax_ = k.back(); - csoca::ilog << "CLASS table contains k = " << this->get_kmin() << " to " << this->get_kmax() << " h Mpc-1." << std::endl; + music::ilog << "CLASS table contains k = " << this->get_kmin() << " to " << this->get_kmax() << " h Mpc-1." << std::endl; //-------------------------------------------------------------------------- // single fluid growing/decaying mode decomposition diff --git a/src/random_plugin.cc b/src/random_plugin.cc index 5cfea9a..87bf08f 100644 --- a/src/random_plugin.cc +++ b/src/random_plugin.cc @@ -13,15 +13,15 @@ void print_RNG_plugins() std::map &m = get_RNG_plugin_map(); std::map::iterator it; it = m.begin(); - csoca::ilog << "Available random number generator plug-ins:" << std::endl; + music::ilog << "Available random number generator plug-ins:" << std::endl; while (it != m.end()) { if ((*it).second){ - csoca::ilog.Print("\t\'%s\'\n", (*it).first.c_str()); + music::ilog.Print("\t\'%s\'\n", (*it).first.c_str()); } ++it; } - csoca::ilog << std::endl; + music::ilog << std::endl; } std::unique_ptr select_RNG_plugin(ConfigFile &cf) @@ -32,14 +32,14 @@ std::unique_ptr select_RNG_plugin(ConfigFile &cf) if (!the_RNG_plugin_creator) { - csoca::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str()); + music::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str()); print_RNG_plugins(); throw std::runtime_error("Unknown random number generator plug-in"); } else { - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl; } return std::move(the_RNG_plugin_creator->Create(cf)); diff --git a/src/testing.cc b/src/testing.cc index ff990e1..c65eb53 100644 --- a/src/testing.cc +++ b/src/testing.cc @@ -232,7 +232,7 @@ void output_velocity_displacement_symmetries( } - csoca::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n" + music::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n" << std::setw(16) << dplus << " " << std::setw(16) << Icomp[0] << " " << std::setw(16) << Icomp[1] << " " diff --git a/src/transfer_function_plugin.cc b/src/transfer_function_plugin.cc index 6101ada..424ae82 100644 --- a/src/transfer_function_plugin.cc +++ b/src/transfer_function_plugin.cc @@ -13,14 +13,14 @@ void print_TransferFunction_plugins() std::map &m = get_TransferFunction_plugin_map(); std::map::iterator it; it = m.begin(); - csoca::ilog << "Available transfer function plug-ins:" << std::endl; + music::ilog << "Available transfer function plug-ins:" << std::endl; while (it != m.end()) { if ((*it).second) - csoca::ilog << "\t\'" << (*it).first << "\'" << std::endl; + music::ilog << "\t\'" << (*it).first << "\'" << std::endl; ++it; } - csoca::ilog << std::endl; + music::ilog << std::endl; } std::unique_ptr select_TransferFunction_plugin(ConfigFile &cf) @@ -31,14 +31,14 @@ std::unique_ptr select_TransferFunction_plugin(ConfigFi if (!the_TransferFunction_plugin_creator) { - csoca::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl; + music::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl; print_TransferFunction_plugins(); throw std::runtime_error("Unknown transfer function plug-in"); } else { - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl; } return std::move(the_TransferFunction_plugin_creator->create(cf)); From 06b3a84bd39c1c4df745078e4c39c04daaf1c1d4 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 20:39:52 +0200 Subject: [PATCH 102/130] naming convention homogenisation --- include/logger.hh | 76 +++++++++++++++---------------- src/grid_fft.cc | 2 +- src/logger.cc | 26 +++++------ src/main.cc | 6 +-- src/output_plugin.cc | 4 +- src/plugins/transfer_CAMB_file.cc | 4 +- 6 files changed, 59 insertions(+), 59 deletions(-) diff --git a/include/logger.hh b/include/logger.hh index 6c86fd0..e13012f 100644 --- a/include/logger.hh +++ b/include/logger.hh @@ -8,33 +8,33 @@ namespace music { -enum LogLevel : int { - Off = 0, - Fatal = 1, - Error = 2, - Warning = 3, - Info = 4, - Debug = 5 +enum log_level : int { + off = 0, + fatal = 1, + error = 2, + warning = 3, + info = 4, + debug = 5 }; -class Logger { +class logger { private: - static LogLevel log_level_; + static log_level log_level_; static std::ofstream output_file_; public: - Logger() = default; - ~Logger() = default; + logger() = default; + ~logger() = default; - static void SetLevel(const LogLevel &level); - static LogLevel GetLevel(); + static void set_level(const log_level &level); + static log_level get_level(); - static void SetOutput(const std::string filename); - static void UnsetOutput(); + static void set_output(const std::string filename); + static void unset_output(); - static std::ofstream &GetOutput(); + static std::ofstream &get_output(); - template Logger &operator<<(const T &item) { + template logger &operator<<(const T &item) { std::cout << item; if (output_file_.is_open()) { output_file_ << item; @@ -42,7 +42,7 @@ public: return *this; } - Logger &operator<<(std::ostream &(*fp)(std::ostream &)) { + logger &operator<<(std::ostream &(*fp)(std::ostream &)) { std::cout << fp; if (output_file_.is_open()) { output_file_ << fp; @@ -51,32 +51,32 @@ public: } }; -class LogStream { +class log_stream { private: - Logger &logger_; - LogLevel stream_level_; + logger &logger_; + log_level stream_level_; std::string line_prefix_, line_postfix_; bool newline; public: - LogStream(Logger &logger, const LogLevel &level) + log_stream(logger &logger, const log_level &level) : logger_(logger), stream_level_(level), newline(true) { switch (stream_level_) { - case LogLevel::Fatal: + case log_level::fatal: line_prefix_ = "\033[31mFatal : "; break; - case LogLevel::Error: + case log_level::error: line_prefix_ = "\033[31mError : "; break; - case LogLevel::Warning: + case log_level::warning: line_prefix_ = "\033[33mWarning : "; break; - case LogLevel::Info: + case log_level::info: //line_prefix_ = " | Info | "; line_prefix_ = " \033[0m"; break; - case LogLevel::Debug: + case log_level::debug: line_prefix_ = "Debug : \033[0m"; break; default: @@ -85,14 +85,14 @@ public: } line_postfix_ = "\033[0m"; } - ~LogStream() = default; + ~log_stream() = default; inline std::string GetPrefix() const { return line_prefix_; } - template LogStream &operator<<(const T &item) { - if (Logger::GetLevel() >= stream_level_) { + template log_stream &operator<<(const T &item) { + if (logger::get_level() >= stream_level_) { if (newline) { logger_ << line_prefix_; newline = false; @@ -102,8 +102,8 @@ public: return *this; } - LogStream &operator<<(std::ostream &(*fp)(std::ostream &)) { - if (Logger::GetLevel() >= stream_level_) { + log_stream &operator<<(std::ostream &(*fp)(std::ostream &)) { + if (logger::get_level() >= stream_level_) { logger_ << fp; logger_ << line_postfix_; newline = true; @@ -125,11 +125,11 @@ public: }; // global instantiations for different levels -extern Logger glogger; -extern LogStream flog; -extern LogStream elog; -extern LogStream wlog; -extern LogStream ilog; -extern LogStream dlog; +extern logger glogger; +extern log_stream flog; +extern log_stream elog; +extern log_stream wlog; +extern log_stream ilog; +extern log_stream dlog; } // namespace music diff --git a/src/grid_fft.cc b/src/grid_fft.cc index e925dc5..a51577e 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -276,7 +276,7 @@ hid_t hdf5_get_data_type(void) if (typeid(T) == typeid(size_t)) return H5T_NATIVE_ULLONG; - std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n"; + music::elog << "[HDF_IO] trying to evaluate unsupported type in GetDataType"; return -1; } diff --git a/src/logger.cc b/src/logger.cc index eb07442..26c34a5 100644 --- a/src/logger.cc +++ b/src/logger.cc @@ -2,18 +2,18 @@ namespace music { -std::ofstream Logger::output_file_; -LogLevel Logger::log_level_ = LogLevel::Off; +std::ofstream logger::output_file_; +log_level logger::log_level_ = log_level::off; -void Logger::SetLevel(const LogLevel &level) { +void logger::set_level(const log_level &level) { log_level_ = level; } -LogLevel Logger::GetLevel() { +log_level logger::get_level() { return log_level_; } -void Logger::SetOutput(const std::string filename) { +void logger::set_output(const std::string filename) { if (output_file_.is_open()) { output_file_.close(); } @@ -21,22 +21,22 @@ void Logger::SetOutput(const std::string filename) { assert(output_file_.is_open()); } -void Logger::UnsetOutput() { +void logger::unset_output() { if (output_file_.is_open()) { output_file_.close(); } } -std::ofstream &Logger::GetOutput() { +std::ofstream &logger::get_output() { return output_file_; } // global instantiations for different levels -Logger glogger; -LogStream flog(glogger, LogLevel::Fatal); -LogStream elog(glogger, LogLevel::Error); -LogStream wlog(glogger, LogLevel::Warning); -LogStream ilog(glogger, LogLevel::Info); -LogStream dlog(glogger, LogLevel::Debug); +logger the_logger; +log_stream flog(the_logger, log_level::fatal); +log_stream elog(the_logger, log_level::error); +log_stream wlog(the_logger, log_level::warning); +log_stream ilog(the_logger, log_level::info); +log_stream dlog(the_logger, log_level::debug); } // namespace music diff --git a/src/main.cc b/src/main.cc index c16690a..5a11c30 100644 --- a/src/main.cc +++ b/src/main.cc @@ -44,8 +44,8 @@ void handle_eptr(std::exception_ptr eptr) // passing by value is ok int main( int argc, char** argv ) { - music::Logger::SetLevel(music::LogLevel::Info); - // music::Logger::SetLevel(music::LogLevel::Debug); + music::logger::set_level(music::log_level::info); + // music::logger::set_level(music::log_level::Debug); //------------------------------------------------------------------------------ // initialise MPI @@ -61,7 +61,7 @@ int main( int argc, char** argv ) // set up lower logging levels for other tasks if( CONFIG::MPI_task_rank!=0 ) { - music::Logger::SetLevel(music::LogLevel::Error); + music::logger::set_level(music::log_level::error); } #endif diff --git a/src/output_plugin.cc b/src/output_plugin.cc index 7e287c2..106c8fe 100644 --- a/src/output_plugin.cc +++ b/src/output_plugin.cc @@ -42,13 +42,13 @@ std::unique_ptr select_output_plugin( ConfigFile& cf ) if( !the_output_plugin_creator ) { - music::elog << "Error: output plug-in \'" << formatname << "\' not found." << std::endl; + music::elog << "Output plug-in \'" << formatname << "\' not found." << std::endl; print_output_plugins(); throw std::runtime_error("Unknown output plug-in"); }else{ music::ilog << "-------------------------------------------------------------------------------" << std::endl; - music::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl; + music::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl; } return std::move(the_output_plugin_creator->create( cf )); diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc index 54ec9cb..9e0a627 100644 --- a/src/plugins/transfer_CAMB_file.cc +++ b/src/plugins/transfer_CAMB_file.cc @@ -89,8 +89,8 @@ private: if (ss.bad() || ss.fail()) { - music::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!"); - throw std::runtime_error("Error reading transfer function file \'" + + music::elog.Print("error reading the transfer function file (corrupt or not in expected format)!"); + throw std::runtime_error("error reading transfer function file \'" + m_filename_Tk + "\'"); } From 3f17e5a796040d271e1ed70f42bc1f183ca3239a Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 20:55:24 +0200 Subject: [PATCH 103/130] more naming convention homogenisation --- include/config_file.hh | 151 ++++++++++++++------------- include/cosmology_calculator.hh | 6 +- include/cosmology_parameters.hh | 26 ++--- include/ic_generator.hh | 4 +- include/operators.hh | 6 +- include/output_plugin.hh | 14 +-- include/particle_plt.hh | 12 +-- include/random_plugin.hh | 18 ++-- include/testing.hh | 6 +- include/transfer_function_plugin.hh | 10 +- src/ic_generator.cc | 50 ++++----- src/main.cc | 10 +- src/old/output_gadget2___original.cc | 52 ++++----- src/output_plugin.cc | 4 +- src/plugins/output_arepo.cc | 26 ++--- src/plugins/output_gadget2.cc | 18 ++-- src/plugins/output_gadget_hdf5.cc | 16 +-- src/plugins/output_generic.cc | 8 +- src/plugins/output_grafic2.cc | 22 ++-- src/plugins/random_music.cc | 28 ++--- src/plugins/random_ngenic.cc | 6 +- src/plugins/transfer_CAMB_file.cc | 10 +- src/plugins/transfer_CLASS.cc | 26 ++--- src/plugins/transfer_eisenstein.cc | 64 ++++++------ src/random_plugin.cc | 4 +- src/testing.cc | 14 +-- src/transfer_function_plugin.cc | 4 +- 27 files changed, 310 insertions(+), 305 deletions(-) diff --git a/include/config_file.hh b/include/config_file.hh index 4b6f1fc..ab30ea2 100644 --- a/include/config_file.hh +++ b/include/config_file.hh @@ -12,20 +12,20 @@ #include /*! - * @class ConfigFile + * @class config_file * @brief provides read/write access to configuration options * * This class provides access to the configuration file. The * configuration is stored in hash-pairs and can be queried and * validated by the responsible class/routine */ -class ConfigFile { +class config_file { //! current line number - unsigned m_iLine; + unsigned iline_; //! hash table for key/value pairs, stored as strings - std::map m_Items; + std::map items_; public: //! removes all white space from string source @@ -59,7 +59,7 @@ public: * @param oval the interpreted/converted value */ template - void Convert(const in_value &ival, out_value &oval) const { + void convert(const in_value &ival, out_value &oval) const { std::stringstream ss; ss << ival; //.. insert value into stream ss >> oval; //.. retrieve value from stream @@ -68,33 +68,33 @@ public: //.. conversion error music::elog << "Error: conversion of \'" << ival << "\' failed." << std::endl; - throw ErrInvalidConversion(std::string("invalid conversion to ") + + throw except_invalid_conversion(std::string("invalid conversion to ") + typeid(out_value).name() + '.'); } } //! constructor of class config_file - /*! @param FileName the path/name of the configuration file to be parsed + /*! @param filename the path/name of the configuration file to be parsed */ - explicit ConfigFile(std::string const &FileName) : m_iLine(0), m_Items() { - std::ifstream file(FileName.c_str()); + explicit config_file(std::string const &filename) : iline_(0), items_() { + std::ifstream file(filename.c_str()); if (!file.is_open()){ - music::elog << "Could not open config file \'" << FileName << "\'." << std::endl; + music::elog << "Could not open config file \'" << filename << "\'." << std::endl; throw std::runtime_error( - std::string("Error: Could not open config file \'") + FileName + + std::string("Error: Could not open config file \'") + filename + std::string("\'")); } std::string line; std::string name; std::string value; - std::string inSection; - int posEqual; - m_iLine = 0; + std::string in_section; + int pos_equal; + iline_ = 0; //.. walk through all lines .. while (std::getline(file, line)) { - ++m_iLine; + ++iline_; //.. encounterd EOL ? if (!line.length()) continue; @@ -106,31 +106,31 @@ public: //.. encountered section tag ? if (line[0] == '[') { - inSection = trim(line.substr(1, line.find(']') - 1)); + in_section = trim(line.substr(1, line.find(']') - 1)); continue; } //.. seek end of entry name .. - posEqual = line.find('='); - name = trim(line.substr(0, posEqual)); - value = trim(line.substr(posEqual + 1)); + pos_equal = line.find('='); + name = trim(line.substr(0, pos_equal)); + value = trim(line.substr(pos_equal + 1)); - if ((size_t)posEqual == std::string::npos && + if ((size_t)pos_equal == std::string::npos && (name.size() != 0 || value.size() != 0)) { - music::wlog << "Ignoring non-assignment in " << FileName << ":" - << m_iLine << std::endl; + music::wlog << "Ignoring non-assignment in " << filename << ":" + << iline_ << std::endl; continue; } if (name.length() == 0 && value.size() != 0) { music::wlog << "Ignoring assignment missing entry name in " - << FileName << ":" << m_iLine << std::endl; + << filename << ":" << iline_ << std::endl; continue; } if (value.length() == 0 && name.size() != 0) { - music::wlog << "Empty entry will be ignored in " << FileName << ":" - << m_iLine << std::endl; + music::wlog << "Empty entry will be ignored in " << filename << ":" + << iline_ << std::endl; continue; } @@ -138,12 +138,12 @@ public: continue; //.. add key/value pair to hash table .. - if (m_Items.find(inSection + '/' + name) != m_Items.end()) { + if (items_.find(in_section + '/' + name) != items_.end()) { music::wlog << "Redeclaration overwrites previous value in " - << FileName << ":" << m_iLine << std::endl; + << filename << ":" << iline_ << std::endl; } - m_Items[inSection + '/' + name] = value; + items_[in_section + '/' + name] = value; } } @@ -151,8 +151,8 @@ public: /*! @param key the key value, usually "section/key" * @param value the value of the key, also a string */ - void InsertValue(std::string const &key, std::string const &value) { - m_Items[key] = value; + void insert_value(std::string const &key, std::string const &value) { + items_[key] = value; } //! inserts a key/value pair in the hash map @@ -160,9 +160,9 @@ public: * @param key the key value usually "section/key" * @param value the value of the key, also a string */ - void InsertValue(std::string const §ion, std::string const &key, + void insert_value(std::string const §ion, std::string const &key, std::string const &value) { - m_Items[section + '/' + key] = value; + items_[section + '/' + key] = value; } //! checks if a key is part of the hash map @@ -170,10 +170,10 @@ public: * @param key the key name to be checked * @return true if the key is present, false otherwise */ - bool ContainsKey(std::string const §ion, std::string const &key) { + bool contains_key(std::string const §ion, std::string const &key) { std::map::const_iterator i = - m_Items.find(section + '/' + key); - if (i == m_Items.end()) + items_.find(section + '/' + key); + if (i == items_.end()) return false; return true; } @@ -182,55 +182,55 @@ public: /*! @param key the key name to be checked * @return true if the key is present, false otherwise */ - bool ContainsKey(std::string const &key) { - std::map::const_iterator i = m_Items.find(key); - if (i == m_Items.end()) + bool contains_key(std::string const &key) { + std::map::const_iterator i = items_.find(key); + if (i == items_.end()) return false; return true; } //! return value of a key - /*! returns the value of a given key, throws a ErrItemNotFound + /*! returns the value of a given key, throws a except_item_not_found * exception if the key is not available in the hash map. * @param key the key name * @return the value of the key - * @sa ErrItemNotFound + * @sa except_item_not_found */ - template T GetValue(std::string const &key) const { - return GetValue("", key); + template T get_value(std::string const &key) const { + return get_value("", key); } //! return value of a key - /*! returns the value of a given key, throws a ErrItemNotFound + /*! returns the value of a given key, throws a except_item_not_found * exception if the key is not available in the hash map. * @param section the section name for the key * @param key the key name * @return the value of the key - * @sa ErrItemNotFound + * @sa except_item_not_found */ template - T GetValueBasic(std::string const §ion, std::string const &key) const { + T get_value_basic(std::string const §ion, std::string const &key) const { T r; std::map::const_iterator i = - m_Items.find(section + '/' + key); - if (i == m_Items.end()){ - throw ErrItemNotFound('\'' + section + '/' + key + + items_.find(section + '/' + key); + if (i == items_.end()){ + throw except_item_not_found('\'' + section + '/' + key + std::string("\' not found.")); } - Convert(i->second, r); + convert(i->second, r); return r; } template - T GetValue(std::string const §ion, std::string const &key) const + T get_value(std::string const §ion, std::string const &key) const { T r; try { - r = GetValueBasic(section, key); + r = get_value_basic(section, key); } - catch (ErrItemNotFound& e) + catch (except_item_not_found& e) { music::elog << e.what() << std::endl; throw; @@ -240,40 +240,41 @@ public: //! exception safe version of getValue /*! returns the value of a given key, returns a default value rather - * than a ErrItemNotFound exception if the key is not found. + * than a except_item_not_found exception if the key is not found. * @param section the section name for the key * @param key the key name * @param default_value the value that is returned if the key is not found * @return the key value (if key found) otherwise default_value */ template - T GetValueSafe(std::string const §ion, std::string const &key, + T get_value_safe(std::string const §ion, std::string const &key, T default_value) const { T r; try { - r = GetValueBasic(section, key); - } catch (ErrItemNotFound&) { + r = get_value_basic(section, key); + } catch (except_item_not_found&) { r = default_value; + music::dlog << "Item \'" << section << "/" << key << " not found in config. Default = \'" << default_value << "\'" << std::endl; } return r; } //! exception safe version of getValue /*! returns the value of a given key, returns a default value rather - * than a ErrItemNotFound exception if the key is not found. + * than a except_item_not_found exception if the key is not found. * @param key the key name * @param default_value the value that is returned if the key is not found * @return the key value (if key found) otherwise default_value */ template - T GetValueSafe(std::string const &key, T default_value) const { - return GetValueSafe("", key, default_value); + T get_value_safe(std::string const &key, T default_value) const { + return get_value_safe("", key, default_value); } //! dumps all key-value pairs to a std::ostream - void Dump(std::ostream &out) { - std::map::const_iterator i = m_Items.begin(); - while (i != m_Items.end()) { + void dump(std::ostream &out) { + std::map::const_iterator i = items_.begin(); + while (i != items_.end()) { if (i->second.length() > 0) out << std::setw(24) << std::left << i->first << " = " << i->second << std::endl; @@ -281,10 +282,10 @@ public: } } - void LogDump(void) { + void dump_to_log(void) { music::ilog << "List of all configuration options:" << std::endl; - std::map::const_iterator i = m_Items.begin(); - while (i != m_Items.end()) { + std::map::const_iterator i = items_.begin(); + while (i != items_.end()) { if (i->second.length() > 0) music::ilog << std::setw(28) << i->first << " = " << i->second << std::endl; @@ -295,16 +296,16 @@ public: //--- EXCEPTIONS --- //! runtime error that is thrown if key is not found in getValue - class ErrItemNotFound : public std::runtime_error { + class except_item_not_found : public std::runtime_error { public: - ErrItemNotFound(std::string itemname) + except_item_not_found(std::string itemname) : std::runtime_error(itemname.c_str()) {} }; //! runtime error that is thrown if type conversion fails - class ErrInvalidConversion : public std::runtime_error { + class except_invalid_conversion : public std::runtime_error { public: - ErrInvalidConversion(std::string errmsg) : std::runtime_error(errmsg) {} + except_invalid_conversion(std::string errmsg) : std::runtime_error(errmsg) {} }; //! runtime error that is thrown if identifier is not found in keys @@ -323,9 +324,9 @@ public: //... like "true" and "false" etc. //... converts the string to type bool, returns type bool ... template <> -inline bool ConfigFile::GetValue(std::string const &strSection, +inline bool config_file::get_value(std::string const &strSection, std::string const &strEntry) const { - std::string r1 = GetValue(strSection, strEntry); + std::string r1 = get_value(strSection, strEntry); if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1") return true; if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0") @@ -338,17 +339,17 @@ inline bool ConfigFile::GetValue(std::string const &strSection, } template <> -inline bool ConfigFile::GetValueSafe(std::string const &strSection, +inline bool config_file::get_value_safe(std::string const &strSection, std::string const &strEntry, bool defaultValue) const { std::string r1; try { - r1 = GetValueBasic(strSection, strEntry); + r1 = get_value_basic(strSection, strEntry); if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1") return true; if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0") return false; - } catch (ErrItemNotFound&) { + } catch (except_item_not_found&) { return defaultValue; } return defaultValue; @@ -356,7 +357,7 @@ inline bool ConfigFile::GetValueSafe(std::string const &strSection, template <> inline void -ConfigFile::Convert(const std::string &ival, +config_file::convert(const std::string &ival, std::string &oval) const { oval = ival; } diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index 04aa2e9..1d99209 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -131,8 +131,8 @@ public: * @param pTransferFunction pointer to an instance of a transfer function object */ - explicit calculator(ConfigFile &cf) - : cosmo_param_(cf), astart_( 1.0/(1.0+cf.GetValue("setup","zstart")) ) + explicit calculator(config_file &cf) + : cosmo_param_(cf), astart_( 1.0/(1.0+cf.get_value("setup","zstart")) ) { // pre-compute growth factors and store for interpolation std::vector tab_a, tab_D, tab_f; @@ -161,7 +161,7 @@ public: music::ilog << std::setw(32) << std::left << "TF maximum wave number" << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl; - // music::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.GetValue("setup","zstart")) ) << std::endl; + // music::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.get_value("setup","zstart")) ) << std::endl; // music::ilog << "pnrom = " << cosmo_param_.pnorm << std::endl; } diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh index 7168ec9..0d3a3ad 100644 --- a/include/cosmology_parameters.hh +++ b/include/cosmology_parameters.hh @@ -42,35 +42,35 @@ struct parameters parameters( const parameters& ) = default; - explicit parameters(ConfigFile cf) + explicit parameters(config_file cf) { - H0 = cf.GetValue("cosmology", "H0"); + H0 = cf.get_value("cosmology", "H0"); h = H0 / 100.0; - nspect = cf.GetValue("cosmology", "nspec"); + nspect = cf.get_value("cosmology", "nspec"); - Omega_b = cf.GetValue("cosmology", "Omega_b"); + Omega_b = cf.get_value("cosmology", "Omega_b"); - Omega_m = cf.GetValue("cosmology", "Omega_m"); + Omega_m = cf.get_value("cosmology", "Omega_m"); - Omega_DE = cf.GetValue("cosmology", "Omega_L"); + Omega_DE = cf.get_value("cosmology", "Omega_L"); - w_0 = cf.GetValueSafe("cosmology", "w0", -1.0); + w_0 = cf.get_value_safe("cosmology", "w0", -1.0); - w_a = cf.GetValueSafe("cosmology", "wa", 0.0); + w_a = cf.get_value_safe("cosmology", "wa", 0.0); - Tcmb = cf.GetValueSafe("cosmology", "Tcmb", 2.7255); + Tcmb = cf.get_value_safe("cosmology", "Tcmb", 2.7255); - Neff = cf.GetValueSafe("cosmology", "Neff", 3.046); + Neff = cf.get_value_safe("cosmology", "Neff", 3.046); - sigma8 = cf.GetValue("cosmology", "sigma_8"); + sigma8 = cf.get_value("cosmology", "sigma_8"); // calculate energy density in ultrarelativistic species from Tcmb and Neff double Omega_gamma = 4 * phys_const::sigma_SI / std::pow(phys_const::c_SI, 3) * std::pow(Tcmb, 4.0) / phys_const::rhocrit_h2_SI / (h * h); double Omega_nu = Neff * Omega_gamma * 7. / 8. * std::pow(4. / 11., 4. / 3.); Omega_r = Omega_gamma + Omega_nu; - if (cf.GetValueSafe("cosmology", "ZeroRadiation", false)) + if (cf.get_value_safe("cosmology", "ZeroRadiation", false)) { Omega_r = 0.0; } @@ -91,7 +91,7 @@ struct parameters music::ilog << "Cosmological parameters are: " << std::endl; music::ilog << " H0 = " << std::setw(16) << H0 << "sigma_8 = " << std::setw(16) << sigma8 << std::endl; music::ilog << " Omega_c = " << std::setw(16) << Omega_m-Omega_b << "Omega_b = " << std::setw(16) << Omega_b << std::endl; - if (!cf.GetValueSafe("cosmology", "ZeroRadiation", false)){ + if (!cf.get_value_safe("cosmology", "ZeroRadiation", false)){ music::ilog << " Omega_g = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl; }else{ music::ilog << " Omega_r = " << std::setw(16) << Omega_r << std::endl; diff --git a/include/ic_generator.hh b/include/ic_generator.hh index 2cf38f4..3a637e8 100644 --- a/include/ic_generator.hh +++ b/include/ic_generator.hh @@ -9,9 +9,9 @@ namespace ic_generator{ - int Run( ConfigFile& the_config ); + int Run( config_file& the_config ); - int Initialise( ConfigFile& the_config ); + int Initialise( config_file& the_config ); extern std::unique_ptr the_random_number_generator; extern std::unique_ptr the_output_plugin; diff --git a/include/operators.hh b/include/operators.hh index be6d1f7..49ed8d1 100644 --- a/include/operators.hh +++ b/include/operators.hh @@ -30,10 +30,10 @@ private: real_t boxlen_, k0_; size_t n_, nhalf_; public: - explicit fourier_gradient( const ConfigFile& the_config ) - : boxlen_( the_config.GetValue("setup", "BoxLength") ), + explicit fourier_gradient( const config_file& the_config ) + : boxlen_( the_config.get_value("setup", "BoxLength") ), k0_(2.0*M_PI/boxlen_), - n_( the_config.GetValue("setup","GridRes") ), + n_( the_config.get_value("setup","GridRes") ), nhalf_( n_/2 ) {} diff --git a/include/output_plugin.hh b/include/output_plugin.hh index 5a18407..fff657c 100644 --- a/include/output_plugin.hh +++ b/include/output_plugin.hh @@ -25,8 +25,8 @@ enum class output_type {particles,field_lagrangian,field_eulerian}; class output_plugin { protected: - //! reference to the ConfigFile object that holds all configuration options - ConfigFile &cf_; + //! reference to the config_file object that holds all configuration options + config_file &cf_; //! output file or directory name std::string fname_; @@ -35,10 +35,10 @@ protected: std::string interface_name_; public: //! constructor - output_plugin(ConfigFile &cf, std::string interface_name ) + output_plugin(config_file &cf, std::string interface_name ) : cf_(cf), interface_name_(interface_name) { - fname_ = cf_.GetValue("output", "filename"); + fname_ = cf_.get_value("output", "filename"); } //! virtual destructor @@ -78,7 +78,7 @@ public: struct output_plugin_creator { //! create an instance of a plug-in - virtual std::unique_ptr create(ConfigFile &cf) const = 0; + virtual std::unique_ptr create(config_file &cf) const = 0; //! destroy an instance of a plug-in virtual ~output_plugin_creator() {} @@ -103,12 +103,12 @@ struct output_plugin_creator_concrete : public output_plugin_creator } //! create an instance of the plug-in - std::unique_ptr create(ConfigFile &cf) const + std::unique_ptr create(config_file &cf) const { return std::make_unique(cf); // Derived( cf ); } }; //! failsafe version to select the output plug-in -std::unique_ptr select_output_plugin(ConfigFile &cf); +std::unique_ptr select_output_plugin(config_file &cf); diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 5346955..9e6df1e 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -496,12 +496,12 @@ private: public: // real_t boxlen, size_t ngridother - explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 ) - : boxlen_( the_config.GetValue("setup", "BoxLength") ), - aini_ ( 1.0/(1.0+the_config.GetValue("setup", "zstart")) ), - ngmapto_( the_config.GetValue("setup", "GridRes") ), + explicit lattice_gradient( config_file& the_config, size_t ngridself=64 ) + : boxlen_( the_config.get_value("setup", "BoxLength") ), + aini_ ( 1.0/(1.0+the_config.get_value("setup", "zstart")) ), + ngmapto_( the_config.get_value("setup", "GridRes") ), ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), - XmL_ ( the_config.GetValue("cosmology", "Omega_L") / the_config.GetValue("cosmology", "Omega_m") ), + XmL_ ( the_config.get_value("cosmology", "Omega_L") / the_config.get_value("cosmology", "Omega_m") ), D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), @@ -509,7 +509,7 @@ public: grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}) { music::ilog << "-------------------------------------------------------------------------------" << std::endl; - std::string lattice_str = the_config.GetValueSafe("setup","ParticleLoad","sc"); + std::string lattice_str = the_config.get_value_safe("setup","ParticleLoad","sc"); const lattice lattice_type = ((lattice_str=="bcc")? lattice_bcc : ((lattice_str=="fcc")? lattice_fcc diff --git a/include/random_plugin.hh b/include/random_plugin.hh index 3e7b77c..1c33fae 100644 --- a/include/random_plugin.hh +++ b/include/random_plugin.hh @@ -10,9 +10,9 @@ class RNG_plugin { protected: - ConfigFile *pcf_; //!< pointer to config_file from which to read parameters + config_file *pcf_; //!< pointer to config_file from which to read parameters public: - explicit RNG_plugin(ConfigFile &cf) + explicit RNG_plugin(config_file &cf) : pcf_(&cf) { } @@ -24,7 +24,7 @@ class RNG_plugin struct RNG_plugin_creator { - virtual std::unique_ptr Create(ConfigFile &cf) const = 0; + virtual std::unique_ptr Create(config_file &cf) const = 0; virtual ~RNG_plugin_creator() {} }; @@ -42,14 +42,14 @@ struct RNG_plugin_creator_concrete : public RNG_plugin_creator } //! create an instance of the plugin - std::unique_ptr Create(ConfigFile &cf) const + std::unique_ptr Create(config_file &cf) const { return std::make_unique(cf); } }; typedef RNG_plugin RNG_instance; -std::unique_ptr select_RNG_plugin( ConfigFile &cf); +std::unique_ptr select_RNG_plugin( config_file &cf); // /*! // * @brief encapsulates all things for multi-scale white noise generation @@ -58,18 +58,18 @@ std::unique_ptr select_RNG_plugin( ConfigFile &cf); // class random_number_generator // { // protected: -// ConfigFile *pcf_; +// config_file *pcf_; // //const refinement_hierarchy * prefh_; // RNG_plugin *generator_; // int levelmin_, levelmax_; // public: // //! constructor -// random_number_generator( ConfigFile &cf ) +// random_number_generator( config_file &cf ) // : pcf_(&cf) //, prefh_( &refh ) // { -// levelmin_ = pcf_->GetValue("setup", "levelmin"); -// levelmax_ = pcf_->GetValue("setup", "levelmax"); +// levelmin_ = pcf_->get_value("setup", "levelmin"); +// levelmax_ = pcf_->get_value("setup", "levelmax"); // generator_ = select_RNG_plugin(cf); // } diff --git a/include/testing.hh b/include/testing.hh index 1683b09..aaaae39 100644 --- a/include/testing.hh +++ b/include/testing.hh @@ -15,7 +15,7 @@ namespace testing{ void output_potentials_and_densities( - ConfigFile& the_config, + config_file& the_config, size_t ngrid, real_t boxlen, Grid_FFT& phi, Grid_FFT& phi2, @@ -24,7 +24,7 @@ namespace testing{ std::array< Grid_FFT*,3 >& A3 ); void output_velocity_displacement_symmetries( - ConfigFile &the_config, + config_file &the_config, size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, @@ -34,7 +34,7 @@ namespace testing{ bool bwrite_out_fields=false); void output_convergence( - ConfigFile &the_config, + config_file &the_config, cosmology::calculator* the_cosmo_calc, std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, diff --git a/include/transfer_function_plugin.hh b/include/transfer_function_plugin.hh index fd95250..942a7ea 100644 --- a/include/transfer_function_plugin.hh +++ b/include/transfer_function_plugin.hh @@ -25,7 +25,7 @@ class TransferFunction_plugin { public: // Cosmology cosmo_; //!< cosmological parameter, read from config_file - ConfigFile *pcf_; //!< pointer to config_file from which to read parameters + config_file *pcf_; //!< pointer to config_file from which to read parameters bool tf_distinct_; //!< bool if density transfer function is distinct for baryons and DM bool tf_withvel_; //!< bool if also have velocity transfer functions bool tf_withtotal0_; //!< have the z=0 spectrum for normalisation purposes @@ -34,7 +34,7 @@ class TransferFunction_plugin public: //! constructor - TransferFunction_plugin(ConfigFile &cf) + TransferFunction_plugin(config_file &cf) : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false), tf_isnormalised_(false) { } @@ -82,7 +82,7 @@ class TransferFunction_plugin struct TransferFunction_plugin_creator { //! create an instance of a transfer function plug-in - virtual std::unique_ptr create(ConfigFile &cf) const = 0; + virtual std::unique_ptr create(config_file &cf) const = 0; //! destroy an instance of a plug-in virtual ~TransferFunction_plugin_creator() {} @@ -103,7 +103,7 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin } //! create an instance of the plug-in - std::unique_ptr create(ConfigFile &cf) const + std::unique_ptr create(config_file &cf) const { return std::make_unique(cf); } @@ -111,4 +111,4 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin // typedef TransferFunction_plugin TransferFunction; -std::unique_ptr select_TransferFunction_plugin(ConfigFile &cf); +std::unique_ptr select_TransferFunction_plugin(config_file &cf); diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 4964a4d..72c4482 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -24,7 +24,7 @@ std::unique_ptr the_random_number_generator; std::unique_ptr the_output_plugin; std::unique_ptr the_cosmo_calc; -int Initialise( ConfigFile& the_config ) +int Initialise( config_file& the_config ) { the_random_number_generator = std::move(select_RNG_plugin(the_config)); the_output_plugin = std::move(select_output_plugin(the_config)); @@ -33,7 +33,7 @@ int Initialise( ConfigFile& the_config ) return 0; } -int Run( ConfigFile& the_config ) +int Run( config_file& the_config ) { //-------------------------------------------------------------------------------------------------------- // Read run parameters @@ -41,23 +41,23 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------------------------------------------- //! number of resolution elements per dimension - const size_t ngrid = the_config.GetValue("setup", "GridRes"); + const size_t ngrid = the_config.get_value("setup", "GridRes"); //-------------------------------------------------------------------------------------------------------- //! box side length in h-1 Mpc - const real_t boxlen = the_config.GetValue("setup", "BoxLength"); + const real_t boxlen = the_config.get_value("setup", "BoxLength"); //-------------------------------------------------------------------------------------------------------- //! starting redshift - const real_t zstart = the_config.GetValue("setup", "zstart"); + const real_t zstart = the_config.get_value("setup", "zstart"); //-------------------------------------------------------------------------------------------------------- //! order of the LPT approximation - int LPTorder = the_config.GetValueSafe("setup","LPTorder",100); + int LPTorder = the_config.get_value_safe("setup","LPTorder",100); //-------------------------------------------------------------------------------------------------------- //! initialice particles on a bcc or fcc lattice instead of a standard sc lattice (doubles and quadruples the number of particles) - std::string lattice_str = the_config.GetValueSafe("setup","ParticleLoad","sc"); + std::string lattice_str = the_config.get_value_safe("setup","ParticleLoad","sc"); const particle::lattice lattice_type = ((lattice_str=="bcc")? particle::lattice_bcc : ((lattice_str=="fcc")? particle::lattice_fcc @@ -66,45 +66,45 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------------------------------------------- //! apply fixing of the complex mode amplitude following Angulo & Pontzen (2016) [https://arxiv.org/abs/1603.05253] - const bool bDoFixing = the_config.GetValueSafe("setup", "DoFixing", false); + const bool bDoFixing = the_config.get_value_safe("setup", "DoFixing", false); //-------------------------------------------------------------------------------------------------------- //! do baryon ICs? - const bool bDoBaryons = the_config.GetValueSafe("setup", "DoBaryons", false ); + const bool bDoBaryons = the_config.get_value_safe("setup", "DoBaryons", false ); std::map< cosmo_species, double > Omega; if( bDoBaryons ){ - double Om = the_config.GetValue("cosmology", "Omega_m"); - double Ob = the_config.GetValue("cosmology", "Omega_b"); + double Om = the_config.get_value("cosmology", "Omega_m"); + double Ob = the_config.get_value("cosmology", "Omega_b"); Omega[cosmo_species::dm] = Om-Ob; Omega[cosmo_species::baryon] = Ob; }else{ - double Om = the_config.GetValue("cosmology", "Omega_m"); + double Om = the_config.get_value("cosmology", "Omega_m"); Omega[cosmo_species::dm] = Om; Omega[cosmo_species::baryon] = 0.0; } //-------------------------------------------------------------------------------------------------------- //! do constrained ICs? - const bool bAddConstrainedModes = the_config.ContainsKey("setup", "ConstraintFieldFile" ); + const bool bAddConstrainedModes = the_config.contains_key("setup", "ConstraintFieldFile" ); //-------------------------------------------------------------------------------------------------------- //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274] - bool bAddExternalTides = the_config.ContainsKey("cosmology", "LSS_aniso_lx") - & the_config.ContainsKey("cosmology", "LSS_aniso_ly") - & the_config.ContainsKey("cosmology", "LSS_aniso_lz"); + bool bAddExternalTides = the_config.contains_key("cosmology", "LSS_aniso_lx") + & the_config.contains_key("cosmology", "LSS_aniso_ly") + & the_config.contains_key("cosmology", "LSS_aniso_lz"); - if( bAddExternalTides && !( the_config.ContainsKey("cosmology", "LSS_aniso_lx") - | the_config.ContainsKey("cosmology", "LSS_aniso_ly") - | the_config.ContainsKey("cosmology", "LSS_aniso_lz") )) + if( bAddExternalTides && !( the_config.contains_key("cosmology", "LSS_aniso_lx") + | the_config.contains_key("cosmology", "LSS_aniso_ly") + | the_config.contains_key("cosmology", "LSS_aniso_lz") )) { music::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl; bAddExternalTides = false; } // Anisotropy parameters for beyond box tidal field std::array lss_aniso_lambda = { - the_config.GetValueSafe("cosmology", "LSS_aniso_lx", 0.0), - the_config.GetValueSafe("cosmology", "LSS_aniso_ly", 0.0), - the_config.GetValueSafe("cosmology", "LSS_aniso_lz", 0.0), + the_config.get_value_safe("cosmology", "LSS_aniso_lx", 0.0), + the_config.get_value_safe("cosmology", "LSS_aniso_ly", 0.0), + the_config.get_value_safe("cosmology", "LSS_aniso_lz", 0.0), }; if( std::abs(lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2]) > 1e-10 ){ @@ -192,8 +192,8 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------- if( bAddConstrainedModes ){ Grid_FFT cwnoise({8,8,8}, {boxlen,boxlen,boxlen}); - cwnoise.Read_from_HDF5( the_config.GetValue("setup", "ConstraintFieldFile"), - the_config.GetValue("setup", "ConstraintFieldName") ); + cwnoise.Read_from_HDF5( the_config.get_value("setup", "ConstraintFieldFile"), + the_config.get_value("setup", "ConstraintFieldName") ); cwnoise.FourierTransformForward(); size_t ngrid_c = cwnoise.size(0), ngrid_c_2 = ngrid_c/2; @@ -422,7 +422,7 @@ int Run( ConfigFile& the_config ) //====================================================================== // Testing - const std::string testing = the_config.GetValueSafe("testing", "test", "none"); + const std::string testing = the_config.get_value_safe("testing", "test", "none"); if (testing != "none") { diff --git a/src/main.cc b/src/main.cc index 5a11c30..04e2302 100644 --- a/src/main.cc +++ b/src/main.cc @@ -44,8 +44,12 @@ void handle_eptr(std::exception_ptr eptr) // passing by value is ok int main( int argc, char** argv ) { + +#if defined(NDEBUG) music::logger::set_level(music::log_level::info); - // music::logger::set_level(music::log_level::Debug); +#else + music::logger::set_level(music::log_level::debug); +#endif //------------------------------------------------------------------------------ // initialise MPI @@ -104,7 +108,7 @@ int main( int argc, char** argv ) } // open the configuration file - ConfigFile the_config(argv[1]); + config_file the_config(argv[1]); //------------------------------------------------------------------------------ // Set up FFTW @@ -123,7 +127,7 @@ int main( int argc, char** argv ) FFTW_API(mpi_init)(); #endif - CONFIG::num_threads = the_config.GetValueSafe("execution", "NumThreads",std::thread::hardware_concurrency()); + CONFIG::num_threads = the_config.get_value_safe("execution", "NumThreads",std::thread::hardware_concurrency()); #if defined(USE_FFTW_THREADS) if (CONFIG::FFTW_threads_ok) diff --git a/src/old/output_gadget2___original.cc b/src/old/output_gadget2___original.cc index b8dfa62..688ddae 100644 --- a/src/old/output_gadget2___original.cc +++ b/src/old/output_gadget2___original.cc @@ -796,7 +796,7 @@ protected: } public: - gadget2_output_plugin(ConfigFile &cf) + gadget2_output_plugin(config_file &cf) : output_plugin(cf) { @@ -812,19 +812,19 @@ public: units_vel_.insert(std::pair("m/s", 1.0e-3)); // 1 m/s units_vel_.insert(std::pair("cm/s", 1.0e-5)); // 1 cm/s - block_buf_size_ = cf_.GetValueSafe("output", "gadget_blksize", 1048576); + block_buf_size_ = cf_.get_value_safe("output", "gadget_blksize", 1048576); //... ensure that everyone knows we want to do SPH - cf.InsertValue("setup", "do_SPH", "yes"); + cf.insert_value("setup", "do_SPH", "yes"); - //bbndparticles_ = !cf_.GetValueSafe("output","gadget_nobndpart",false); + //bbndparticles_ = !cf_.get_value_safe("output","gadget_nobndpart",false); npartmax_ = 1 << 30; - nfiles_ = cf.GetValueSafe("output", "gadget_num_files", 1); + nfiles_ = cf.get_value_safe("output", "gadget_num_files", 1); - blongids_ = cf.GetValueSafe("output", "gadget_longids", false); + blongids_ = cf.get_value_safe("output", "gadget_longids", false); - shift_halfcell_ = cf.GetValueSafe("output", "gadget_cell_centered", false); + shift_halfcell_ = cf.get_value_safe("output", "gadget_cell_centered", false); //if( nfiles_ < (int)ceil((double)npart/(double)npartmax_) ) // music::wlog.Print("Should use more files."); @@ -879,16 +879,16 @@ public: throw std::runtime_error("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'"); } - YHe_ = cf.GetValueSafe("cosmology", "YHe", 0.248); - gamma_ = cf.GetValueSafe("cosmology", "gamma", 5.0 / 3.0); + YHe_ = cf.get_value_safe("cosmology", "YHe", 0.248); + gamma_ = cf.get_value_safe("cosmology", "gamma", 5.0 / 3.0); - do_baryons_ = cf.GetValueSafe("setup", "baryons", false); - omegab_ = cf.GetValueSafe("cosmology", "Omega_b", 0.045); + do_baryons_ = cf.get_value_safe("setup", "baryons", false); + omegab_ = cf.get_value_safe("cosmology", "Omega_b", 0.045); //... new way - std::string lunitstr = cf.GetValueSafe("output", "gadget_lunit", "Mpc"); - std::string munitstr = cf.GetValueSafe("output", "gadget_munit", "1e10Msol"); - std::string vunitstr = cf.GetValueSafe("output", "gadget_vunit", "km/s"); + std::string lunitstr = cf.get_value_safe("output", "gadget_lunit", "Mpc"); + std::string munitstr = cf.get_value_safe("output", "gadget_munit", "1e10Msol"); + std::string vunitstr = cf.get_value_safe("output", "gadget_vunit", "km/s"); std::map::iterator mapit; @@ -917,16 +917,16 @@ public: } //... maintain compatibility with old way of setting units - if (cf.ContainsKey("output", "gadget_usekpc")) + if (cf.contains_key("output", "gadget_usekpc")) { - kpcunits_ = cf.GetValueSafe("output", "gadget_usekpc", false); + kpcunits_ = cf.get_value_safe("output", "gadget_usekpc", false); if (kpcunits_) unit_length_chosen_ = 1e-3; music::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead."); } - if (cf.ContainsKey("output", "gadget_usemsol")) + if (cf.contains_key("output", "gadget_usemsol")) { - msolunits_ = cf.GetValueSafe("output", "gadget_usemsol", false); + msolunits_ = cf.get_value_safe("output", "gadget_usemsol", false); if (msolunits_) unit_mass_chosen_ = 1e-10; music::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead."); @@ -934,12 +934,12 @@ public: //... coarse particle properties... - spread_coarse_acrosstypes_ = cf.GetValueSafe("output", "gadget_spreadcoarse", false); + spread_coarse_acrosstypes_ = cf.get_value_safe("output", "gadget_spreadcoarse", false); bndparticletype_ = 5; if (!spread_coarse_acrosstypes_) { - bndparticletype_ = cf.GetValueSafe("output", "gadget_coarsetype", 5); + bndparticletype_ = cf.get_value_safe("output", "gadget_coarsetype", 5); if (bndparticletype_ == 0 || //bndparticletype_ == 1 || bndparticletype_ == 4 || bndparticletype_ > 5) @@ -950,12 +950,12 @@ public: } else { - if (cf.GetValueSafe("output", "gadget_coarsetype", 5) != 5) + if (cf.get_value_safe("output", "gadget_coarsetype", 5) != 5) music::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override."); } //... set time ...................................................... - header_.redshift = cf.GetValue("setup", "zstart"); + header_.redshift = cf.get_value("setup", "zstart"); header_.time = 1.0 / (1.0 + header_.redshift); //... SF flags @@ -965,10 +965,10 @@ public: //... header_.num_files = nfiles_; //1; - header_.BoxSize = cf.GetValue("setup", "BoxLength"); - header_.Omega0 = cf.GetValue("cosmology", "Omega_m"); - header_.OmegaLambda = cf.GetValue("cosmology", "Omega_L"); - header_.HubbleParam = cf.GetValue("cosmology", "H0") / 100.0; + header_.BoxSize = cf.get_value("setup", "BoxLength"); + header_.Omega0 = cf.get_value("cosmology", "Omega_m"); + header_.OmegaLambda = cf.get_value("cosmology", "Omega_L"); + header_.HubbleParam = cf.get_value("cosmology", "H0") / 100.0; header_.flag_stellarage = 0; header_.flag_metals = 0; diff --git a/src/output_plugin.cc b/src/output_plugin.cc index 106c8fe..d0a7c5d 100644 --- a/src/output_plugin.cc +++ b/src/output_plugin.cc @@ -33,9 +33,9 @@ void print_output_plugins() music::ilog << std::endl; } -std::unique_ptr select_output_plugin( ConfigFile& cf ) +std::unique_ptr select_output_plugin( config_file& cf ) { - std::string formatname = cf.GetValue( "output", "format" ); + std::string formatname = cf.get_value( "output", "format" ); output_plugin_creator *the_output_plugin_creator = get_output_plugin_map()[ formatname ]; diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc index 8263060..1674604 100644 --- a/src/plugins/output_arepo.cc +++ b/src/plugins/output_arepo.cc @@ -56,7 +56,7 @@ protected: public: //! constructor - explicit gadget_hdf5_output_plugin(ConfigFile &cf) + explicit gadget_hdf5_output_plugin(config_file &cf) : output_plugin(cf, "GADGET-HDF5") { num_files_ = 1; @@ -64,11 +64,11 @@ public: // use as many output files as we have MPI tasks MPI_Comm_size(MPI_COMM_WORLD, &num_files_); #endif - real_t astart = 1.0 / (1.0 + cf_.GetValue("setup", "zstart")); - lunit_ = cf_.GetValue("setup", "BoxLength"); + real_t astart = 1.0 / (1.0 + cf_.get_value("setup", "zstart")); + lunit_ = cf_.get_value("setup", "BoxLength"); vunit_ = lunit_ / std::sqrt(astart); - blongids_ = cf_.GetValueSafe("output", "UseLongids", false); - num_simultaneous_writers_ = cf_.GetValueSafe("output", "NumSimWriters", num_files_); + blongids_ = cf_.get_value_safe("output", "UseLongids", false); + num_simultaneous_writers_ = cf_.get_value_safe("output", "NumSimWriters", num_files_); for (int i = 0; i < 6; ++i) { @@ -85,9 +85,9 @@ public: header_.flag_cooling = 0; header_.num_files = num_files_; header_.BoxSize = lunit_; - header_.Omega0 = cf_.GetValue("cosmology", "Omega_m"); - header_.OmegaLambda = cf_.GetValue("cosmology", "Omega_L"); - header_.HubbleParam = cf_.GetValue("cosmology", "H0") / 100.0; + header_.Omega0 = cf_.get_value("cosmology", "Omega_m"); + header_.OmegaLambda = cf_.get_value("cosmology", "Omega_L"); + header_.HubbleParam = cf_.get_value("cosmology", "H0") / 100.0; header_.flag_stellarage = 0; header_.flag_metals = 0; header_.flag_entropy_instead_u = 0; @@ -95,16 +95,16 @@ public: // initial gas temperature double Tcmb0 = 2.726; - double Omegab = cf_.GetValue("cosmology", "Omega_b"); - double h = cf_.GetValue("cosmology", "H0") / 100.0, h2 = h*h; + double Omegab = cf_.get_value("cosmology", "Omega_b"); + double h = cf_.get_value("cosmology", "H0") / 100.0, h2 = h*h; double adec = 1.0 / (160.0 * pow(Omegab * h2 / 0.022, 2.0 / 5.0)); Tini_ = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec; // suggested PM res - pmgrid_ = 2*cf_.GetValue("setup", "GridRes"); + pmgrid_ = 2*cf_.get_value("setup", "GridRes"); gridboost_ = 1; - softening_ = cf_.GetValue("setup", "BoxLength")/pmgrid_/20; - doBaryons_ = cf_.GetValue("setup", "DoBaryons"); + softening_ = cf_.get_value("setup", "BoxLength")/pmgrid_/20; + doBaryons_ = cf_.get_value("setup", "DoBaryons"); #if !defined(USE_SINGLEPRECISION) doublePrec_ = 1; #else diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc index ba3a986..0a3afbb 100644 --- a/src/plugins/output_gadget2.cc +++ b/src/plugins/output_gadget2.cc @@ -38,7 +38,7 @@ protected: public: //! constructor - explicit gadget2_output_plugin(ConfigFile &cf) + explicit gadget2_output_plugin(config_file &cf) : output_plugin(cf, "GADGET-2") { num_files_ = 1; @@ -46,10 +46,10 @@ public: // use as many output files as we have MPI tasks MPI_Comm_size(MPI_COMM_WORLD, &num_files_); #endif - real_t astart = 1.0 / (1.0 + cf_.GetValue("setup", "zstart")); - lunit_ = cf_.GetValue("setup", "BoxLength"); + real_t astart = 1.0 / (1.0 + cf_.get_value("setup", "zstart")); + lunit_ = cf_.get_value("setup", "BoxLength"); vunit_ = lunit_ / std::sqrt(astart); - blongids_ = cf_.GetValueSafe("output", "UseLongids", false); + blongids_ = cf_.get_value_safe("output", "UseLongids", false); } output_type write_species_as(const cosmo_species &) const { return output_type::particles; } @@ -90,7 +90,7 @@ public: ///// //... set time ...................................................... - this_header_.redshift = cf_.GetValue("setup", "zstart"); + this_header_.redshift = cf_.get_value("setup", "zstart"); this_header_.time = 1.0 / (1.0 + this_header_.redshift); //... SF flags @@ -100,10 +100,10 @@ public: //... this_header_.num_files = num_files_; //1; - this_header_.BoxSize = cf_.GetValue("setup", "BoxLength"); - this_header_.Omega0 = cf_.GetValue("cosmology", "Omega_m"); - this_header_.OmegaLambda = cf_.GetValue("cosmology", "Omega_L"); - this_header_.HubbleParam = cf_.GetValue("cosmology", "H0") / 100.0; + this_header_.BoxSize = cf_.get_value("setup", "BoxLength"); + this_header_.Omega0 = cf_.get_value("cosmology", "Omega_m"); + this_header_.OmegaLambda = cf_.get_value("cosmology", "Omega_L"); + this_header_.HubbleParam = cf_.get_value("cosmology", "H0") / 100.0; this_header_.flag_stellarage = 0; this_header_.flag_metals = 0; diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc index e6a821b..2e41e47 100644 --- a/src/plugins/output_gadget_hdf5.cc +++ b/src/plugins/output_gadget_hdf5.cc @@ -50,7 +50,7 @@ protected: public: //! constructor - explicit gadget_hdf5_output_plugin(ConfigFile &cf) + explicit gadget_hdf5_output_plugin(config_file &cf) : output_plugin(cf, "GADGET-HDF5") { num_files_ = 1; @@ -58,11 +58,11 @@ public: // use as many output files as we have MPI tasks MPI_Comm_size(MPI_COMM_WORLD, &num_files_); #endif - real_t astart = 1.0 / (1.0 + cf_.GetValue("setup", "zstart")); - lunit_ = cf_.GetValue("setup", "BoxLength"); + real_t astart = 1.0 / (1.0 + cf_.get_value("setup", "zstart")); + lunit_ = cf_.get_value("setup", "BoxLength"); vunit_ = lunit_ / std::sqrt(astart); - blongids_ = cf_.GetValueSafe("output", "UseLongids", false); - num_simultaneous_writers_ = cf_.GetValueSafe("output", "NumSimWriters", num_files_); + blongids_ = cf_.get_value_safe("output", "UseLongids", false); + num_simultaneous_writers_ = cf_.get_value_safe("output", "NumSimWriters", num_files_); for (int i = 0; i < 6; ++i) { @@ -79,9 +79,9 @@ public: header_.flag_cooling = 0; header_.num_files = num_files_; header_.BoxSize = lunit_; - header_.Omega0 = cf_.GetValue("cosmology", "Omega_m"); - header_.OmegaLambda = cf_.GetValue("cosmology", "Omega_L"); - header_.HubbleParam = cf_.GetValue("cosmology", "H0") / 100.0; + header_.Omega0 = cf_.get_value("cosmology", "Omega_m"); + header_.OmegaLambda = cf_.get_value("cosmology", "Omega_L"); + header_.HubbleParam = cf_.get_value("cosmology", "H0") / 100.0; header_.flag_stellarage = 0; header_.flag_metals = 0; header_.flag_entropy_instead_u = 0; diff --git a/src/plugins/output_generic.cc b/src/plugins/output_generic.cc index d96358e..79c2139 100644 --- a/src/plugins/output_generic.cc +++ b/src/plugins/output_generic.cc @@ -21,13 +21,13 @@ protected: bool out_eulerian_; public: //! constructor - explicit generic_output_plugin(ConfigFile &cf ) + explicit generic_output_plugin(config_file &cf ) : output_plugin(cf, "Generic HDF5") { - real_t astart = 1.0/(1.0+cf_.GetValue("setup", "zstart")); - real_t boxsize = cf_.GetValue("setup", "BoxLength"); + real_t astart = 1.0/(1.0+cf_.get_value("setup", "zstart")); + real_t boxsize = cf_.get_value("setup", "BoxLength"); - out_eulerian_ = cf_.GetValueSafe("output", "generic_out_eulerian",false); + out_eulerian_ = cf_.get_value_safe("output", "generic_out_eulerian",false); if( CONFIG::MPI_task_rank == 0 ) { diff --git a/src/plugins/output_grafic2.cc b/src/plugins/output_grafic2.cc index 43eb7cf..b3f3f04 100644 --- a/src/plugins/output_grafic2.cc +++ b/src/plugins/output_grafic2.cc @@ -40,22 +40,22 @@ protected: public: //! constructor - explicit grafic2_output_plugin(ConfigFile &cf) + explicit grafic2_output_plugin(config_file &cf) : output_plugin(cf, "GRAFIC2/RAMSES") { lunit_ = 1.0; vunit_ = 1.0; double - boxlength = cf_.GetValue("setup", "BoxLength"), - H0 = cf_.GetValue("cosmology", "H0"), - zstart = cf_.GetValue("setup", "zstart"), + boxlength = cf_.get_value("setup", "BoxLength"), + H0 = cf_.get_value("cosmology", "H0"), + zstart = cf_.get_value("setup", "zstart"), astart = 1.0 / (1.0 + zstart), - omegam = cf_.GetValue("cosmology", "Omega_m"), - omegaL = cf_.GetValue("cosmology", "Omega_L"); - uint32_t ngrid = cf_.GetValue("setup", "GridRes"); + omegam = cf_.get_value("cosmology", "Omega_m"), + omegaL = cf_.get_value("cosmology", "Omega_L"); + uint32_t ngrid = cf_.get_value("setup", "GridRes"); - bUseSPT_ = cf_.GetValueSafe("output", "grafic_use_SPT", false); + bUseSPT_ = cf_.get_value_safe("output", "grafic_use_SPT", false); levelmin_ = uint32_t(std::log2(double(ngrid)) + 1e-6); if (std::abs(std::pow(2.0, levelmin_) - double(ngrid)) > 1e-4) @@ -64,7 +64,7 @@ public: abort(); } - bhavebaryons_ = cf_.GetValueSafe("setup", "baryons", false); + bhavebaryons_ = cf_.get_value_safe("setup", "baryons", false); header_.n1 = ngrid; header_.n2 = ngrid; @@ -89,7 +89,7 @@ public: mkdir(dirname_.c_str(), 0777); // write RAMSES namelist file? if so only with one task - if (cf_.GetValueSafe("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 ) + if (cf_.get_value_safe("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 ) { write_ramses_namelist(); } @@ -196,7 +196,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT &g, const cos } // check field size against buffer size... - uint32_t ngrid = cf_.GetValue("setup", "GridRes"); + uint32_t ngrid = cf_.get_value("setup", "GridRes"); assert( g.global_size(0) == ngrid && g.global_size(1) == ngrid && g.global_size(2) == ngrid); assert( g.size(1) == ngrid && g.size(2) == ngrid); // write actual field slice by slice diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc index 073a6f9..ab0f959 100644 --- a/src/plugins/random_music.cc +++ b/src/plugins/random_music.cc @@ -34,7 +34,7 @@ protected: //void store_rnd(int ilevel, rng *prng); public: - explicit RNG_music(ConfigFile &cf) : RNG_plugin(cf), initialized_(false) {} + explicit RNG_music(config_file &cf) : RNG_plugin(cf), initialized_(false) {} ~RNG_music() {} @@ -45,12 +45,12 @@ public: void initialize_for_grid_structure()//const refinement_hierarchy &refh) { //prefh_ = &refh; - levelmin_ = pcf_->GetValue("setup", "levelmin"); - levelmax_ = pcf_->GetValue("setup", "levelmax"); + levelmin_ = pcf_->get_value("setup", "levelmin"); + levelmax_ = pcf_->get_value("setup", "levelmax"); - ran_cube_size_ = pcf_->GetValueSafe("random", "cubesize", DEF_RAN_CUBE_SIZE); - disk_cached_ = pcf_->GetValueSafe("random", "disk_cached", true); - restart_ = pcf_->GetValueSafe("random", "restart", false); + ran_cube_size_ = pcf_->get_value_safe("random", "cubesize", DEF_RAN_CUBE_SIZE); + disk_cached_ = pcf_->get_value_safe("random", "disk_cached", true); + restart_ = pcf_->get_value_safe("random", "restart", false); mem_cache_.assign(levelmax_ - levelmin_ + 1, (std::vector *)NULL); @@ -93,8 +93,8 @@ void RNG_music::parse_random_parameters(void) std::string tempstr; bool noseed = false; sprintf(seedstr, "seed[%d]", i); - if (pcf_->ContainsKey("random", seedstr)) - tempstr = pcf_->GetValue("random", seedstr); + if (pcf_->contains_key("random", seedstr)) + tempstr = pcf_->get_value("random", seedstr); else { // "-2" means that no seed entry was found for that level @@ -105,7 +105,7 @@ void RNG_music::parse_random_parameters(void) if (is_number(tempstr)) { long ltemp; - pcf_->Convert(tempstr, ltemp); + pcf_->convert(tempstr, ltemp); rngfnames_.push_back(""); if (noseed) // ltemp < 0 ) //... generate some dummy seed which only depends on the level, negative so we know it's not @@ -141,7 +141,7 @@ void RNG_music::parse_random_parameters(void) void RNG_music::compute_random_numbers(void) { - bool rndsign = pcf_->GetValueSafe("random", "grafic_sign", false); + bool rndsign = pcf_->get_value_safe("random", "grafic_sign", false); std::vector randc(std::max(levelmax_, levelmin_seed_) + 1, (rng *)NULL); @@ -227,11 +227,11 @@ void RNG_music::compute_random_numbers(void) // { // int lx[3], x0[3]; // int shift[3], levelmin_poisson; - // shift[0] = pcf_->GetValue("setup", "shift_x"); - // shift[1] = pcf_->GetValue("setup", "shift_y"); - // shift[2] = pcf_->GetValue("setup", "shift_z"); + // shift[0] = pcf_->get_value("setup", "shift_x"); + // shift[1] = pcf_->get_value("setup", "shift_y"); + // shift[2] = pcf_->get_value("setup", "shift_z"); - // levelmin_poisson = pcf_->GetValue("setup", "levelmin"); + // levelmin_poisson = pcf_->get_value("setup", "levelmin"); // int lfac = 1 << (ilevel - levelmin_poisson); diff --git a/src/plugins/random_ngenic.cc b/src/plugins/random_ngenic.cc index b84221e..f1c6a59 100644 --- a/src/plugins/random_ngenic.cc +++ b/src/plugins/random_ngenic.cc @@ -18,11 +18,11 @@ private: std::vector SeedTable_; public: - explicit RNG_ngenic(ConfigFile &cf) : RNG_plugin(cf) + explicit RNG_ngenic(config_file &cf) : RNG_plugin(cf) { - RandomSeed_ = cf.GetValue("random", "seed"); - nres_ = cf.GetValue("setup", "GridRes"); + RandomSeed_ = cf.get_value("random", "seed"); + nres_ = cf.get_value("setup", "GridRes"); pRandomGenerator_ = gsl_rng_alloc(gsl_rng_ranlxd1); gsl_rng_set(pRandomGenerator_, RandomSeed_); diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc index 9e0a627..4a2baf3 100644 --- a/src/plugins/transfer_CAMB_file.cc +++ b/src/plugins/transfer_CAMB_file.cc @@ -169,13 +169,13 @@ private: } public: - transfer_CAMB_file_plugin(ConfigFile &cf) + transfer_CAMB_file_plugin(config_file &cf) : TransferFunction_plugin(cf) { - m_filename_Tk = pcf_->GetValue("cosmology", "transfer_file"); - m_Omega_m = cf.GetValue("cosmology", "Omega_m"); //MvD - m_Omega_b = cf.GetValue("cosmology", "Omega_b"); //MvD - m_zstart = cf.GetValue("setup", "zstart"); //MvD + m_filename_Tk = pcf_->get_value("cosmology", "transfer_file"); + m_Omega_m = cf.get_value("cosmology", "Omega_m"); //MvD + m_Omega_b = cf.get_value("cosmology", "Omega_b"); //MvD + m_zstart = cf.get_value("setup", "zstart"); //MvD read_table(); diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 6b113a5..09047c6 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -170,25 +170,25 @@ private: } public: - explicit transfer_CLASS_plugin(ConfigFile &cf) + explicit transfer_CLASS_plugin(config_file &cf) : TransferFunction_plugin(cf) { ofs_class_input_.open("input_class_parameters.ini", std::ios::trunc); - h_ = pcf_->GetValue("cosmology", "H0") / 100.0; - Omega_m_ = pcf_->GetValue("cosmology", "Omega_m"); - Omega_b_ = pcf_->GetValue("cosmology", "Omega_b"); - N_ur_ = pcf_->GetValueSafe("cosmology", "Neff", 3.046); - ztarget_ = pcf_->GetValueSafe("cosmology", "ztarget", 0.0); + h_ = pcf_->get_value("cosmology", "H0") / 100.0; + Omega_m_ = pcf_->get_value("cosmology", "Omega_m"); + Omega_b_ = pcf_->get_value("cosmology", "Omega_b"); + N_ur_ = pcf_->get_value_safe("cosmology", "Neff", 3.046); + ztarget_ = pcf_->get_value_safe("cosmology", "ztarget", 0.0); atarget_ = 1.0 / (1.0 + ztarget_); - zstart_ = pcf_->GetValue("setup", "zstart"); + zstart_ = pcf_->get_value("setup", "zstart"); astart_ = 1.0 / (1.0 + zstart_); - double lbox = pcf_->GetValue("setup", "BoxLength"); - int nres = pcf_->GetValue("setup", "GridRes"); - A_s_ = pcf_->GetValueSafe("cosmology", "A_s", -1.0); - double k_p = pcf_->GetValueSafe("cosmology", "k_p", 0.05); - n_s_ = pcf_->GetValue("cosmology", "nspec"); - Tcmb_ = cf.GetValueSafe("cosmology", "Tcmb", 2.7255); + double lbox = pcf_->get_value("setup", "BoxLength"); + int nres = pcf_->get_value("setup", "GridRes"); + A_s_ = pcf_->get_value_safe("cosmology", "A_s", -1.0); + double k_p = pcf_->get_value_safe("cosmology", "k_p", 0.05); + n_s_ = pcf_->get_value("cosmology", "nspec"); + Tcmb_ = cf.get_value_safe("cosmology", "Tcmb", 2.7255); tnorm_ = 1.0; diff --git a/src/plugins/transfer_eisenstein.cc b/src/plugins/transfer_eisenstein.cc index 47a7efd..adc9e06 100644 --- a/src/plugins/transfer_eisenstein.cc +++ b/src/plugins/transfer_eisenstein.cc @@ -207,13 +207,13 @@ public: \param Tcmb mean temperature of the CMB fluctuations (defaults to Tcmb = 2.726 if not specified) */ - transfer_eisenstein_plugin(ConfigFile &cf) + transfer_eisenstein_plugin(config_file &cf) : TransferFunction_plugin(cf) { - double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726); - double H0 = pcf_->GetValue("cosmology", "H0"); - double Omega_m = pcf_->GetValue("cosmology", "Omega_m"); - double Omega_b = pcf_->GetValue("cosmology", "Omega_b"); + double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726); + double H0 = pcf_->get_value("cosmology", "H0"); + double Omega_m = pcf_->get_value("cosmology", "Omega_m"); + double Omega_b = pcf_->get_value("cosmology", "Omega_b"); etf_.set_parameters(H0, Omega_m, Omega_b, Tcmb); @@ -257,15 +257,15 @@ protected: }; public: - transfer_eisenstein_wdm_plugin(ConfigFile &cf) + transfer_eisenstein_wdm_plugin(config_file &cf) : TransferFunction_plugin(cf) { - double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726); - omegam_ = pcf_->GetValue("cosmology", "Omega_m"); - omegab_ = pcf_->GetValue("cosmology", "Omega_b"); - H0_ = pcf_->GetValue("cosmology", "H0"); + double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726); + omegam_ = pcf_->get_value("cosmology", "Omega_m"); + omegab_ = pcf_->get_value("cosmology", "Omega_b"); + H0_ = pcf_->get_value("cosmology", "H0"); m_h0 = H0_ / 100.0; - wdmm_ = pcf_->GetValue("cosmology", "WDMmass"); + wdmm_ = pcf_->get_value("cosmology", "WDMmass"); etf_.set_parameters(H0_, omegam_, omegab_, Tcmb); @@ -273,7 +273,7 @@ public: typemap_.insert(std::pair("VIEL", wdm_viel)); // add the other types typemap_.insert(std::pair("BODE_WRONG", wdm_bode_wrong)); // add the other types - type_ = pcf_->GetValueSafe("cosmology", "WDMtftype", "BODE"); + type_ = pcf_->get_value_safe("cosmology", "WDMtftype", "BODE"); //type_ = std::string( toupper( type_.c_str() ) ); @@ -286,29 +286,29 @@ public: { //... parameterisation from Bode et al. (2001), ApJ, 556, 93 case wdm_bode: - wdmnu_ = pcf_->GetValueSafe("cosmology", "WDMnu", 1.0); - wdmgx_ = pcf_->GetValueSafe("cosmology", "WDMg_x", 1.5); + wdmnu_ = pcf_->get_value_safe("cosmology", "WDMnu", 1.0); + wdmgx_ = pcf_->get_value_safe("cosmology", "WDMg_x", 1.5); m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29); break; //... parameterisation from Viel et al. (2005), Phys Rev D, 71 case wdm_viel: - wdmnu_ = pcf_->GetValueSafe("cosmology", "WDMnu", 1.12); + wdmnu_ = pcf_->get_value_safe("cosmology", "WDMnu", 1.12); m_WDMalpha = 0.049 * pow(omegam_ / 0.25, 0.11) * pow(H0_ * 0.01 / 0.7, 1.22) * pow(wdmm_, -1.11); break; //.... below is for historical reasons due to the buggy parameterisation //.... in early versions of MUSIC, but apart from H instead of h, Bode et al. case wdm_bode_wrong: - wdmnu_ = pcf_->GetValueSafe("cosmology", "WDMnu", 1.0); - wdmgx_ = pcf_->GetValueSafe("cosmology", "WDMg_x", 1.5); + wdmnu_ = pcf_->get_value_safe("cosmology", "WDMnu", 1.0); + wdmgx_ = pcf_->get_value_safe("cosmology", "WDMg_x", 1.5); m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29); break; default: - wdmnu_ = pcf_->GetValueSafe("cosmology", "WDMnu", 1.0); - wdmgx_ = pcf_->GetValueSafe("cosmology", "WDMg_x", 1.5); + wdmnu_ = pcf_->get_value_safe("cosmology", "WDMnu", 1.0); + wdmgx_ = pcf_->get_value_safe("cosmology", "WDMg_x", 1.5); m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29); break; } @@ -340,20 +340,20 @@ protected: eisenstein_transfer etf_; public: - transfer_eisenstein_cdmbino_plugin(ConfigFile &cf) + transfer_eisenstein_cdmbino_plugin(config_file &cf) : TransferFunction_plugin(cf) { - double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726); + double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726); - omegam_ = pcf_->GetValue("cosmology", "Omega_m"); - omegab_ = pcf_->GetValue("cosmology", "Omega_b"); - H0_ = pcf_->GetValue("cosmology", "H0"); + omegam_ = pcf_->get_value("cosmology", "Omega_m"); + omegab_ = pcf_->get_value("cosmology", "Omega_b"); + H0_ = pcf_->get_value("cosmology", "H0"); m_h0 = H0_ / 100.0; etf_.set_parameters(H0_, omegam_, omegab_, Tcmb); - mcdm_ = pcf_->GetValueSafe("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV - Tkd_ = pcf_->GetValueSafe("cosmology", "CDM_Tkd", 33.0); // temperature at which CDM particle kinetically decouples (in MeV) + mcdm_ = pcf_->get_value_safe("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV + Tkd_ = pcf_->get_value_safe("cosmology", "CDM_Tkd", 33.0); // temperature at which CDM particle kinetically decouples (in MeV) kfs_ = 1.7e6 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.) / (1.0 + log(Tkd_ / 30.) / 19.2); kd_ = 3.8e7 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.); @@ -395,19 +395,19 @@ protected: eisenstein_transfer etf_; public: - transfer_eisenstein_cutoff_plugin(ConfigFile &cf) + transfer_eisenstein_cutoff_plugin(config_file &cf) : TransferFunction_plugin(cf) { - double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726); + double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726); - omegam_ = pcf_->GetValue("cosmology", "Omega_m"); - omegab_ = pcf_->GetValue("cosmology", "Omega_b"); - H0_ = pcf_->GetValue("cosmology", "H0"); + omegam_ = pcf_->get_value("cosmology", "Omega_m"); + omegab_ = pcf_->get_value("cosmology", "Omega_b"); + H0_ = pcf_->get_value("cosmology", "H0"); m_h0 = H0_ / 100.0; etf_.set_parameters(H0_, omegam_, omegab_, Tcmb); - Rcut_ = pcf_->GetValueSafe("cosmology", "Rcut", 1.0); + Rcut_ = pcf_->get_value_safe("cosmology", "Rcut", 1.0); } inline double compute(double k, tf_type type) const diff --git a/src/random_plugin.cc b/src/random_plugin.cc index 87bf08f..5121efa 100644 --- a/src/random_plugin.cc +++ b/src/random_plugin.cc @@ -24,9 +24,9 @@ void print_RNG_plugins() music::ilog << std::endl; } -std::unique_ptr select_RNG_plugin(ConfigFile &cf) +std::unique_ptr select_RNG_plugin(config_file &cf) { - std::string rngname = cf.GetValueSafe("random", "generator", "MUSIC"); + std::string rngname = cf.get_value_safe("random", "generator", "MUSIC"); RNG_plugin_creator *the_RNG_plugin_creator = get_RNG_plugin_map()[rngname]; diff --git a/src/testing.cc b/src/testing.cc index c65eb53..8e88e17 100644 --- a/src/testing.cc +++ b/src/testing.cc @@ -9,7 +9,7 @@ namespace testing { void output_potentials_and_densities( - ConfigFile &the_config, + config_file &the_config, size_t ngrid, real_t boxlen, Grid_FFT &phi, Grid_FFT &phi2, @@ -17,8 +17,8 @@ void output_potentials_and_densities( Grid_FFT &phi3b, std::array *, 3> &A3) { - const std::string fname_hdf5 = the_config.GetValueSafe("output", "fname_hdf5", "output.hdf5"); - const std::string fname_analysis = the_config.GetValueSafe("output", "fbase_analysis", "output"); + const std::string fname_hdf5 = the_config.get_value_safe("output", "fname_hdf5", "output.hdf5"); + const std::string fname_analysis = the_config.get_value_safe("output", "fbase_analysis", "output"); Grid_FFT delta({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT delta2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); @@ -98,7 +98,7 @@ void output_potentials_and_densities( } void output_velocity_displacement_symmetries( - ConfigFile &the_config, + config_file &the_config, size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, @@ -107,8 +107,8 @@ void output_velocity_displacement_symmetries( std::array *, 3> &A3, bool bwrite_out_fields) { - const std::string fname_hdf5 = the_config.GetValueSafe("output", "fname_hdf5", "output.hdf5"); - const std::string fname_analysis = the_config.GetValueSafe("output", "fbase_analysis", "output"); + const std::string fname_hdf5 = the_config.get_value_safe("output", "fname_hdf5", "output.hdf5"); + const std::string fname_analysis = the_config.get_value_safe("output", "fbase_analysis", "output"); real_t vfac1 = vfac; real_t vfac2 = 2 * vfac; @@ -241,7 +241,7 @@ void output_velocity_displacement_symmetries( } void output_convergence( - ConfigFile &the_config, + config_file &the_config, cosmology::calculator* the_cosmo_calc, std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, diff --git a/src/transfer_function_plugin.cc b/src/transfer_function_plugin.cc index 424ae82..5b2ec9e 100644 --- a/src/transfer_function_plugin.cc +++ b/src/transfer_function_plugin.cc @@ -23,9 +23,9 @@ void print_TransferFunction_plugins() music::ilog << std::endl; } -std::unique_ptr select_TransferFunction_plugin(ConfigFile &cf) +std::unique_ptr select_TransferFunction_plugin(config_file &cf) { - std::string tfname = cf.GetValue("cosmology", "transfer"); + std::string tfname = cf.get_value("cosmology", "transfer"); TransferFunction_plugin_creator *the_TransferFunction_plugin_creator = get_TransferFunction_plugin_map()[tfname]; From 096513e7e889dc2c36ab76fb20ee9d0fde6ac829 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 20:59:28 +0200 Subject: [PATCH 104/130] moved git version info to cmake_config.hh.in --- include/cmake_config.hh.in | 11 ++++++++++- include/general.hh | 11 +---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/cmake_config.hh.in b/include/cmake_config.hh.in index db4b9a8..03768a3 100644 --- a/include/cmake_config.hh.in +++ b/include/cmake_config.hh.in @@ -22,4 +22,13 @@ constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}"; constexpr char CMAKE_PLT_STR[] = "PLT corr. on"; #else constexpr char CMAKE_PLT_STR[] = "PLT corr. off"; -#endif \ No newline at end of file +#endif + +// These variables are autogenerated and compiled +// into the library by the version.cmake script. do not touch! +extern "C" +{ + extern const char *GIT_TAG; + extern const char *GIT_REV; + extern const char *GIT_BRANCH; +} \ No newline at end of file diff --git a/include/general.hh b/include/general.hh index f4395bb..88eb2f7 100644 --- a/include/general.hh +++ b/include/general.hh @@ -169,13 +169,4 @@ extern bool MPI_ok; extern bool MPI_threads_ok; extern bool FFTW_threads_ok; extern int num_threads; -} // namespace CONFIG - -// These variables are autogenerated and compiled -// into the library by the version.cmake script -extern "C" -{ - extern const char *GIT_TAG; - extern const char *GIT_REV; - extern const char *GIT_BRANCH; -} \ No newline at end of file +} // namespace CONFIG \ No newline at end of file From 4644840ee6a9678992842587253e1f0070e432ed Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 21:18:18 +0200 Subject: [PATCH 105/130] removed old files --- src/old/output_gadget2___original.cc | 1408 -------------------------- 1 file changed, 1408 deletions(-) delete mode 100644 src/old/output_gadget2___original.cc diff --git a/src/old/output_gadget2___original.cc b/src/old/output_gadget2___original.cc deleted file mode 100644 index 688ddae..0000000 --- a/src/old/output_gadget2___original.cc +++ /dev/null @@ -1,1408 +0,0 @@ -/* - - output_gadget2.cc - This file is part of MUSIC - - a code to generate multi-scale initial conditions - for cosmological simulations - - Copyright (C) 2010 Oliver Hahn - - */ - -#include -#include -#include "logger.hh" -// #include "region_generator.hh" -#include "output_plugin.hh" -// #include "mg_interp.hh" -// #include "mesh.hh" - - -template -class gadget2_output_plugin : public output_plugin -{ - // const int empty_fill_bytes = 56; - -public: - bool do_baryons_; - double omegab_; - double gamma_; - bool shift_halfcell_; - -protected: - std::ofstream ofs_; - bool blongids_; - bool bhave_particlenumbers_; - - std::map units_length_; - std::map units_mass_; - std::map units_vel_; - - double unit_length_chosen_; - double unit_mass_chosen_; - double unit_vel_chosen_; - - typedef struct io_header - { - int npart[6]; - double mass[6]; - double time; - double redshift; - int flag_sfr; - int flag_feedback; - unsigned int npartTotal[6]; - int flag_cooling; - int num_files; - double BoxSize; - double Omega0; - double OmegaLambda; - double HubbleParam; - int flag_stellarage; - int flag_metals; - unsigned int npartTotalHighWord[6]; - int flag_entropy_instead_u; - int flag_doubleprecision; - char fill[empty_fill_bytes]; - } header; - - header header_; - - std::string fname; - - enum iofields - { - id_dm_mass, - id_dm_vel, - id_dm_pos, - id_gas_vel, - id_gas_rho, - id_gas_temp, - id_gas_pos - }; - - size_t np_per_type_[6]; - - size_t block_buf_size_; - size_t npartmax_; - unsigned nfiles_; - - unsigned bndparticletype_; - bool bmorethan2bnd_; - bool kpcunits_; - bool msolunits_; - double YHe_; - bool spread_coarse_acrosstypes_; - - // refinement_mask refmask; - - void distribute_particles(unsigned nfiles, std::vector> &np_per_file, std::vector &np_tot_per_file) - { - np_per_file.assign(nfiles, std::vector(6, 0)); - np_tot_per_file.assign(nfiles, 0); - - size_t n2dist[6]; - size_t ntotal = 0; - for (int i = 0; i < 6; ++i) - { - ntotal += np_per_type_[i]; - n2dist[i] = np_per_type_[i]; - } - - size_t nnominal = (size_t)((double)ntotal / (double)nfiles); - size_t nlast = ntotal - nnominal * (nfiles - 1); - - for (unsigned i = 0; i < nfiles; ++i) - { - size_t nthisfile = 0; - - size_t nmax = (i == nfiles - 1) ? nlast : nnominal; - - for (int itype = 0; itype < 6; ++itype) - { - if (n2dist[itype] == 0) - continue; - np_per_file[i][itype] = std::min(n2dist[itype], nmax - nthisfile); - n2dist[itype] -= np_per_file[i][itype]; - nthisfile += np_per_file[i][itype]; - - if (nthisfile >= nmax) - break; - } - - np_tot_per_file[i] = nthisfile; - } - - for (int i = 0; i < 6; ++i) - assert(n2dist[i] == 0); - } - - std::ifstream &open_and_check(std::string ffname, size_t npart, size_t offset = 0) - { - std::ifstream ifs(ffname.c_str(), std::ios::binary); - size_t blk; - ifs.read((char *)&blk, sizeof(size_t)); - if (blk != npart * (size_t)sizeof(T_store)) - { - music::elog.Print("Internal consistency error in gadget2 output plug-in"); - music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * (size_t)sizeof(T_store), blk); - throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); - } - ifs.seekg(offset, std::ios::cur); - - return ifs; - } - - class pistream : public std::ifstream - { - public: - pistream(std::string fname, size_t npart, size_t offset = 0) - : std::ifstream(fname.c_str(), std::ios::binary) - { - size_t blk; - - if (!this->good()) - { - music::elog.Print("Could not open buffer file in gadget2 output plug-in"); - throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); - } - - this->read((char *)&blk, sizeof(size_t)); - - if (blk != npart * sizeof(T_store)) - { - music::elog.Print("Internal consistency error in gadget2 output plug-in"); - music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); - throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); - } - - this->seekg(offset + sizeof(size_t), std::ios::beg); - } - - pistream() - { - } - - void open(std::string fname, size_t npart, size_t offset = 0) - { - std::ifstream::open(fname.c_str(), std::ios::binary); - size_t blk; - - if (!this->good()) - { - music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); - throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); - } - - this->read((char *)&blk, sizeof(size_t)); - - if (blk != npart * sizeof(T_store)) - { - music::elog.Print("Internal consistency error in gadget2 output plug-in"); - music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); - throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); - } - - this->seekg(offset + sizeof(size_t), std::ios::beg); - } - }; - - class postream : public std::fstream - { - public: - postream(std::string fname, size_t npart, size_t offset = 0) - : std::fstream(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out) - { - size_t blk; - - if (!this->good()) - { - music::elog.Print("Could not open buffer file in gadget2 output plug-in"); - throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); - } - - this->read((char *)&blk, sizeof(size_t)); - - if (blk != npart * sizeof(T_store)) - { - music::elog.Print("Internal consistency error in gadget2 output plug-in"); - music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); - throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); - } - - this->seekg(offset, std::ios::cur); - this->seekp(offset + sizeof(size_t), std::ios::beg); - } - - postream() - { - } - - void open(std::string fname, size_t npart, size_t offset = 0) - { - if (is_open()) - this->close(); - - std::fstream::open(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out); - size_t blk; - - if (!this->good()) - { - music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); - throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); - } - - this->read((char *)&blk, sizeof(size_t)); - - if (blk != npart * sizeof(T_store)) - { - music::elog.Print("Internal consistency error in gadget2 output plug-in"); - music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); - throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); - } - - this->seekg(offset, std::ios::cur); - this->seekp(offset + sizeof(size_t), std::ios::beg); - } - }; - - void combine_components_for_coarse(void) - { - const size_t - nptot = np_per_type_[1] + np_per_type_[2] + np_per_type_[3] + np_per_type_[4] + np_per_type_[5], - npfine = np_per_type_[1], - npcoarse = nptot - npfine; - - std::vector tmp1, tmp2; - - tmp1.assign(block_buf_size_, 0.0); - tmp2.assign(block_buf_size_, 0.0); - - double facb = omegab_ / header_.Omega0, facc = (header_.Omega0 - omegab_) / header_.Omega0; - - for (int icomp = 0; icomp < 3; ++icomp) - { - char fc[256], fb[256]; - postream iffs1, iffs2; - - /*** positions ***/ - - sprintf(fc, "___ic_temp_%05d.bin", 100 * id_dm_pos + icomp); - sprintf(fb, "___ic_temp_%05d.bin", 100 * id_gas_pos + icomp); - - iffs1.open(fc, nptot, npfine * sizeof(T_store)); - iffs2.open(fb, nptot, npfine * sizeof(T_store)); - - size_t npleft = npcoarse; - size_t n2read = std::min((size_t)block_buf_size_, npleft); - while (n2read > 0ul) - { - std::streampos sp = iffs1.tellg(); - iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); - - for (size_t i = 0; i < n2read; ++i) - { - tmp1[i] = facc * tmp1[i] + facb * tmp2[i]; - } - - iffs1.seekp(sp); - iffs1.write(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - - npleft -= n2read; - n2read = std::min((size_t)block_buf_size_, npleft); - } - - iffs1.close(); - iffs2.close(); - - /*** velocities ***/ - - sprintf(fc, "___ic_temp_%05d.bin", 100 * id_dm_vel + icomp); - sprintf(fb, "___ic_temp_%05d.bin", 100 * id_gas_vel + icomp); - - iffs1.open(fc, nptot, npfine * sizeof(T_store)); - iffs2.open(fb, nptot, npfine * sizeof(T_store)); - - npleft = npcoarse; - n2read = std::min((size_t)block_buf_size_, npleft); - - while (n2read > 0ul) - { - std::streampos sp = iffs1.tellg(); - iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); - - for (size_t i = 0; i < n2read; ++i) - { - tmp1[i] = facc * tmp1[i] + facb * tmp2[i]; - } - - iffs1.seekp(sp); - iffs1.write(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - - npleft -= n2read; - n2read = std::min((size_t)block_buf_size_, npleft); - } - - iffs1.close(); - iffs2.close(); - } - } - - void assemble_gadget_file(void) - { - - if (do_baryons_) - combine_components_for_coarse(); - - //............................................................................ - //... copy from the temporary files, interleave the data and save ............ - - char fnx[256], fny[256], fnz[256], fnvx[256], fnvy[256], fnvz[256], fnm[256]; - char fnbx[256], fnby[256], fnbz[256], fnbvx[256], fnbvy[256], fnbvz[256]; - - sprintf(fnx, "___ic_temp_%05d.bin", 100 * id_dm_pos + 0); - sprintf(fny, "___ic_temp_%05d.bin", 100 * id_dm_pos + 1); - sprintf(fnz, "___ic_temp_%05d.bin", 100 * id_dm_pos + 2); - sprintf(fnvx, "___ic_temp_%05d.bin", 100 * id_dm_vel + 0); - sprintf(fnvy, "___ic_temp_%05d.bin", 100 * id_dm_vel + 1); - sprintf(fnvz, "___ic_temp_%05d.bin", 100 * id_dm_vel + 2); - sprintf(fnm, "___ic_temp_%05d.bin", 100 * id_dm_mass); - - sprintf(fnbx, "___ic_temp_%05d.bin", 100 * id_gas_pos + 0); - sprintf(fnby, "___ic_temp_%05d.bin", 100 * id_gas_pos + 1); - sprintf(fnbz, "___ic_temp_%05d.bin", 100 * id_gas_pos + 2); - sprintf(fnbvx, "___ic_temp_%05d.bin", 100 * id_gas_vel + 0); - sprintf(fnbvy, "___ic_temp_%05d.bin", 100 * id_gas_vel + 1); - sprintf(fnbvz, "___ic_temp_%05d.bin", 100 * id_gas_vel + 2); - - pistream iffs1, iffs2, iffs3; - - const size_t - nptot = np_per_type_[0] + np_per_type_[1] + np_per_type_[2] + np_per_type_[3] + np_per_type_[4] + np_per_type_[5], - //npgas = np_fine_gas_, - npcdm = nptot - np_per_type_[0]; - - size_t - wrote_coarse = 0, - wrote_gas = 0, - wrote_dm = 0; - - size_t - npleft = nptot, - n2read = std::min((size_t)block_buf_size_, npleft); - - std::cout << " - Gadget2 : writing " << nptot << " particles to file...\n"; - for (int i = 0; i < 6; ++i) - if (np_per_type_[i] > 0) - music::ilog.Print(" type %d : %12llu [m=%g]", i, np_per_type_[i], header_.mass[i]); - - bool bbaryons = np_per_type_[0] > 0; - - std::vector adata3; - adata3.reserve(3 * block_buf_size_); - T_store *tmp1, *tmp2, *tmp3; - - tmp1 = new T_store[block_buf_size_]; - tmp2 = new T_store[block_buf_size_]; - tmp3 = new T_store[block_buf_size_]; - - //... for multi-file output - //int fileno = 0; - //size_t npart_left = nptot; - - //std::vector nfdm_per_file, nfgas_per_file, nc_per_file; - - std::vector> np_per_file; - std::vector np_tot_per_file; - - distribute_particles(nfiles_, np_per_file, np_tot_per_file); - - if (nfiles_ > 1) - { - music::ilog.Print("Gadget2 : distributing particles to %d files", nfiles_); - //<< " " << std::setw(12) << "type 0" << "," << std::setw(12) << "type 1" << "," << std::setw(12) << "type " << bndparticletype_ << std::endl; - for (unsigned i = 0; i < nfiles_; ++i) - music::ilog.Print(" file %i : %12llu", i, np_tot_per_file[i], header_.mass[i]); - } - - size_t curr_block_buf_size = block_buf_size_; - - size_t idcount = 0; - bool bneed_long_ids = blongids_; - if (nptot >= 1ul << 32 && !bneed_long_ids) - { - bneed_long_ids = true; - music::wlog.Print("Need long particle IDs, will write 64bit, make sure to enable in Gadget!"); - } - - for (unsigned ifile = 0; ifile < nfiles_; ++ifile) - { - - if (nfiles_ > 1) - { - char ffname[256]; - sprintf(ffname, "%s.%d", fname_.c_str(), ifile); - ofs_.open(ffname, std::ios::binary | std::ios::trunc); - } - else - { - ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc); - } - - size_t np_this_file = np_tot_per_file[ifile]; - - int blksize = sizeof(header); - - //... write the header ....................................................... - - header this_header(header_); - for (int i = 0; i < 6; ++i) - { - this_header.npart[i] = np_per_file[ifile][i]; - this_header.npartTotal[i] = (unsigned)np_per_type_[i]; - this_header.npartTotalHighWord[i] = (unsigned)(np_per_type_[i] >> 32); - } - - ofs_.write((char *)&blksize, sizeof(int)); - ofs_.write((char *)&this_header, sizeof(header)); - ofs_.write((char *)&blksize, sizeof(int)); - - //... particle positions .................................................. - blksize = 3ul * np_this_file * sizeof(T_store); - ofs_.write((char *)&blksize, sizeof(int)); - - if (bbaryons && np_per_file[ifile][0] > 0ul) - { - - iffs1.open(fnbx, npcdm, wrote_gas * sizeof(T_store)); - iffs2.open(fnby, npcdm, wrote_gas * sizeof(T_store)); - iffs3.open(fnbz, npcdm, wrote_gas * sizeof(T_store)); - - npleft = np_per_file[ifile][0]; - n2read = std::min(curr_block_buf_size, npleft); - while (n2read > 0ul) - { - iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); - iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); - - for (size_t i = 0; i < n2read; ++i) - { - adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize)); - adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize)); - adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize)); - } - ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); - - adata3.clear(); - npleft -= n2read; - n2read = std::min(curr_block_buf_size, npleft); - } - iffs1.close(); - iffs2.close(); - iffs3.close(); - } - - npleft = np_this_file - np_per_file[ifile][0]; - n2read = std::min(curr_block_buf_size, npleft); - - iffs1.open(fnx, npcdm, wrote_dm * sizeof(T_store)); - iffs2.open(fny, npcdm, wrote_dm * sizeof(T_store)); - iffs3.open(fnz, npcdm, wrote_dm * sizeof(T_store)); - - while (n2read > 0ul) - { - iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); - iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); - - for (size_t i = 0; i < n2read; ++i) - { - adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize)); - adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize)); - adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize)); - } - ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); - - adata3.clear(); - npleft -= n2read; - n2read = std::min(curr_block_buf_size, npleft); - } - ofs_.write(reinterpret_cast(&blksize), sizeof(int)); - - iffs1.close(); - iffs2.close(); - iffs3.close(); - - //... particle velocities .................................................. - blksize = 3ul * np_this_file * sizeof(T_store); - ofs_.write(reinterpret_cast(&blksize), sizeof(int)); - - if (bbaryons && np_per_file[ifile][0] > 0ul) - { - iffs1.open(fnbvx, npcdm, wrote_gas * sizeof(T_store)); - iffs2.open(fnbvy, npcdm, wrote_gas * sizeof(T_store)); - iffs3.open(fnbvz, npcdm, wrote_gas * sizeof(T_store)); - - npleft = np_per_file[ifile][0]; - n2read = std::min(curr_block_buf_size, npleft); - while (n2read > 0ul) - { - iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); - iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); - - for (size_t i = 0; i < n2read; ++i) - { - adata3.push_back(tmp1[i]); - adata3.push_back(tmp2[i]); - adata3.push_back(tmp3[i]); - } - - ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); - - adata3.clear(); - npleft -= n2read; - n2read = std::min(curr_block_buf_size, npleft); - } - - iffs1.close(); - iffs2.close(); - iffs3.close(); - } - - iffs1.open(fnvx, npcdm, wrote_dm * sizeof(T_store)); - iffs2.open(fnvy, npcdm, wrote_dm * sizeof(T_store)); - iffs3.open(fnvz, npcdm, wrote_dm * sizeof(T_store)); - - npleft = np_this_file - np_per_file[ifile][0]; - n2read = std::min(curr_block_buf_size, npleft); - while (n2read > 0ul) - { - iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); - iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); - - for (size_t i = 0; i < n2read; ++i) - { - adata3.push_back(tmp1[i]); - adata3.push_back(tmp2[i]); - adata3.push_back(tmp3[i]); - } - - ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); - - adata3.clear(); - npleft -= n2read; - n2read = std::min(curr_block_buf_size, npleft); - } - ofs_.write(reinterpret_cast(&blksize), sizeof(int)); - - iffs1.close(); - iffs2.close(); - iffs3.close(); - - //... particle IDs .......................................................... - std::vector short_ids; - std::vector long_ids; - - if (bneed_long_ids) - long_ids.assign(curr_block_buf_size, 0); - else - short_ids.assign(curr_block_buf_size, 0); - - npleft = np_this_file; - n2read = std::min(curr_block_buf_size, npleft); - blksize = sizeof(unsigned) * np_this_file; - - if (bneed_long_ids) - blksize = sizeof(size_t) * np_this_file; - - //... generate contiguous IDs and store in file .. - ofs_.write(reinterpret_cast(&blksize), sizeof(int)); - while (n2read > 0ul) - { - if (bneed_long_ids) - { - for (size_t i = 0; i < n2read; ++i) - long_ids[i] = idcount++; - ofs_.write(reinterpret_cast(&long_ids[0]), n2read * sizeof(size_t)); - } - else - { - for (size_t i = 0; i < n2read; ++i) - short_ids[i] = idcount++; - ofs_.write(reinterpret_cast(&short_ids[0]), n2read * sizeof(unsigned)); - } - npleft -= n2read; - n2read = std::min(curr_block_buf_size, npleft); - } - ofs_.write(reinterpret_cast(&blksize), sizeof(int)); - - std::vector().swap(short_ids); - std::vector().swap(long_ids); - - //... particle masses ....................................................... - if (bmorethan2bnd_) //bmultimass_ && bmorethan2bnd_ && nc_per_file[ifile] > 0ul) - { - unsigned npcoarse = np_per_file[ifile][bndparticletype_]; // nc_per_file[ifile];//header_.npart[5]; - iffs1.open(fnm, np_per_type_[bndparticletype_], wrote_coarse * sizeof(T_store)); - - npleft = npcoarse; - n2read = std::min(curr_block_buf_size, npleft); - blksize = npcoarse * sizeof(T_store); - - ofs_.write(reinterpret_cast(&blksize), sizeof(int)); - while (n2read > 0ul) - { - iffs1.read(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - ofs_.write(reinterpret_cast(&tmp1[0]), n2read * sizeof(T_store)); - - npleft -= n2read; - n2read = std::min(curr_block_buf_size, npleft); - } - ofs_.write(reinterpret_cast(&blksize), sizeof(int)); - - iffs1.close(); - } - - //... initial internal energy for gas particles - if (bbaryons && np_per_file[ifile][0] > 0ul) - { - - std::vector eint(curr_block_buf_size, 0.0); - - const double astart = 1. / (1. + header_.redshift); - const double npol = (fabs(1.0 - gamma_) > 1e-7) ? 1.0 / (gamma_ - 1.) : 1.0; - const double unitv = 1e5; - const double h2 = header_.HubbleParam * header_.HubbleParam; //*0.0001; - const double adec = 1.0 / (160. * pow(omegab_ * h2 / 0.022, 2.0 / 5.0)); - const double Tcmb0 = 2.726; - const double Tini = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec; - const double mu = (Tini > 1.e4) ? 4.0 / (8. - 5. * YHe_) : 4.0 / (1. + 3. * (1. - YHe_)); - const double ceint = 1.3806e-16 / 1.6726e-24 * Tini * npol / mu / unitv / unitv; - - npleft = np_per_file[ifile][0]; - n2read = std::min(curr_block_buf_size, npleft); - blksize = sizeof(T_store) * np_per_file[ifile][0]; //*npgas - - ofs_.write(reinterpret_cast(&blksize), sizeof(int)); - while (n2read > 0ul) - { - for (size_t i = 0; i < n2read; ++i) - eint[i] = ceint; - ofs_.write(reinterpret_cast(&eint[0]), n2read * sizeof(T_store)); - npleft -= n2read; - n2read = std::min(curr_block_buf_size, npleft); - } - ofs_.write(reinterpret_cast(&blksize), sizeof(int)); - - static bool bdisplayed = false; - if (!bdisplayed) - { - music::ilog.Print("Gadget2 : set initial gas temperature to %.2f K/mu", Tini / mu); - bdisplayed = true; - } - } - - ofs_.flush(); - ofs_.close(); - - wrote_gas += np_per_file[ifile][0]; - wrote_dm += np_this_file - np_per_file[ifile][0]; - wrote_coarse += np_per_file[ifile][5]; - } - - delete[] tmp1; - delete[] tmp2; - delete[] tmp3; - - remove(fnbx); - remove(fnby); - remove(fnbz); - remove(fnx); - remove(fny); - remove(fnz); - remove(fnbvx); - remove(fnbvy); - remove(fnbvz); - remove(fnvx); - remove(fnvy); - remove(fnvz); - remove(fnm); - } - - void determine_particle_numbers(const grid_hierarchy &gh) - { - if (!bhave_particlenumbers_) - { - bhave_particlenumbers_ = true; - - double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 - - /*if( kpcunits_ ) - rhoc *= 1e-9; // in h^2 1e10 M_sol / kpc^3 - - if( msolunits_ ) - rhoc *= 1e10; // in h^2 M_sol / kpc^3*/ - - rhoc /= unit_mass_chosen_ / (unit_length_chosen_ * unit_length_chosen_ * unit_length_chosen_); - - // only type 1 are baryons - if (!do_baryons_) - header_.mass[1] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_); - else - { - header_.mass[0] = (omegab_)*rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_); - header_.mass[1] = (header_.Omega0 - omegab_) * rhoc * pow(header_.BoxSize, 3.) / gh.count_leaf_cells(0, 0); ///pow(2,3*levelmax_); - } - - //... - for (int i = 0; i < 6; ++i) - np_per_type_[i] = 0; - - // determine how many particles per type exist, determine their mass - for (int ilevel = (int)gh.levelmax(); ilevel >= (int)gh.levelmin(); --ilevel) - { - int itype = std::min((int)gh.levelmax() - ilevel + 1, 5); - np_per_type_[itype] += gh.count_leaf_cells(ilevel, ilevel); - if (itype > 1) - header_.mass[itype] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel); - } - - // if coarse particles should not be spread across types, assign them all to type bndparticletype - if (!spread_coarse_acrosstypes_) - { - if (gh.levelmax() > gh.levelmin() + 1) - bmorethan2bnd_ = true; - else - bmorethan2bnd_ = false; - - for (unsigned itype = 2; itype < 6; ++itype) - { - if (itype == bndparticletype_) - continue; - np_per_type_[bndparticletype_] += np_per_type_[itype]; - if (!bmorethan2bnd_) - header_.mass[bndparticletype_] += header_.mass[itype]; - np_per_type_[itype] = 0; - header_.mass[itype] = 0.; - } - } - - if (do_baryons_) - np_per_type_[0] = np_per_type_[1]; - } - } - -public: - gadget2_output_plugin(config_file &cf) - : output_plugin(cf) - { - - units_mass_.insert(std::pair("1e10Msol", 1.0)); // 1e10 M_o/h (default) - units_mass_.insert(std::pair("Msol", 1.0e-10)); // 1 M_o/h - units_mass_.insert(std::pair("Mearth", 3.002e-16)); // 1 M_earth/h - - units_length_.insert(std::pair("Mpc", 1.0)); // 1 Mpc/h (default) - units_length_.insert(std::pair("kpc", 1.0e-3)); // 1 kpc/h - units_length_.insert(std::pair("pc", 1.0e-6)); // 1 pc/h - - units_vel_.insert(std::pair("km/s", 1.0)); // 1 km/s (default) - units_vel_.insert(std::pair("m/s", 1.0e-3)); // 1 m/s - units_vel_.insert(std::pair("cm/s", 1.0e-5)); // 1 cm/s - - block_buf_size_ = cf_.get_value_safe("output", "gadget_blksize", 1048576); - - //... ensure that everyone knows we want to do SPH - cf.insert_value("setup", "do_SPH", "yes"); - - //bbndparticles_ = !cf_.get_value_safe("output","gadget_nobndpart",false); - npartmax_ = 1 << 30; - - nfiles_ = cf.get_value_safe("output", "gadget_num_files", 1); - - blongids_ = cf.get_value_safe("output", "gadget_longids", false); - - shift_halfcell_ = cf.get_value_safe("output", "gadget_cell_centered", false); - - //if( nfiles_ < (int)ceil((double)npart/(double)npartmax_) ) - // music::wlog.Print("Should use more files."); - - if (nfiles_ > 1) - { - for (unsigned ifile = 0; ifile < nfiles_; ++ifile) - { - char ffname[256]; - sprintf(ffname, "%s.%d", fname_.c_str(), ifile); - ofs_.open(ffname, std::ios::binary | std::ios::trunc); - if (!ofs_.good()) - { - music::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", ffname); - throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + std::string(ffname) + "\' for writing!\n"); - } - ofs_.close(); - } - } - else - { - ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc); - if (!ofs_.good()) - { - music::elog.Print("gadget-2 output plug-in could not open output file \'%s\' for writing!", fname_.c_str()); - throw std::runtime_error(std::string("gadget-2 output plug-in could not open output file \'") + fname_ + "\' for writing!\n"); - } - ofs_.close(); - } - - bhave_particlenumbers_ = false; - - bmorethan2bnd_ = false; - if (false) //levelmax_ > levelmin_ +4) - bmorethan2bnd_ = true; - - for (int i = 0; i < 6; ++i) - { - header_.npart[i] = 0; - header_.npartTotal[i] = 0; - header_.npartTotalHighWord[i] = 0; - header_.mass[i] = 0.0; - } - - if (typeid(T_store) == typeid(float)) - header_.flag_doubleprecision = 0; - else if (typeid(T_store) == typeid(double)) - header_.flag_doubleprecision = 1; - else - { - music::elog.Print("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'"); - throw std::runtime_error("Internal error: gadget-2 output plug-in called for neither \'float\' nor \'double\'"); - } - - YHe_ = cf.get_value_safe("cosmology", "YHe", 0.248); - gamma_ = cf.get_value_safe("cosmology", "gamma", 5.0 / 3.0); - - do_baryons_ = cf.get_value_safe("setup", "baryons", false); - omegab_ = cf.get_value_safe("cosmology", "Omega_b", 0.045); - - //... new way - std::string lunitstr = cf.get_value_safe("output", "gadget_lunit", "Mpc"); - std::string munitstr = cf.get_value_safe("output", "gadget_munit", "1e10Msol"); - std::string vunitstr = cf.get_value_safe("output", "gadget_vunit", "km/s"); - - std::map::iterator mapit; - - if ((mapit = units_length_.find(lunitstr)) != units_length_.end()) - unit_length_chosen_ = (*mapit).second; - else - { - music::elog.Print("Gadget: length unit \'%s\' unknown in gadget_lunit", lunitstr.c_str()); - throw std::runtime_error("Unknown length unit specified for Gadget output plugin"); - } - - if ((mapit = units_mass_.find(munitstr)) != units_mass_.end()) - unit_mass_chosen_ = (*mapit).second; - else - { - music::elog.Print("Gadget: mass unit \'%s\' unknown in gadget_munit", munitstr.c_str()); - throw std::runtime_error("Unknown mass unit specified for Gadget output plugin"); - } - - if ((mapit = units_vel_.find(vunitstr)) != units_vel_.end()) - unit_vel_chosen_ = (*mapit).second; - else - { - music::elog.Print("Gadget: velocity unit \'%s\' unknown in gadget_vunit", vunitstr.c_str()); - throw std::runtime_error("Unknown velocity unit specified for Gadget output plugin"); - } - - //... maintain compatibility with old way of setting units - if (cf.contains_key("output", "gadget_usekpc")) - { - kpcunits_ = cf.get_value_safe("output", "gadget_usekpc", false); - if (kpcunits_) - unit_length_chosen_ = 1e-3; - music::wlog.Print("Deprecated option \'gadget_usekpc\' may override unit selection. Use \'gadget_lunit\' instead."); - } - if (cf.contains_key("output", "gadget_usemsol")) - { - msolunits_ = cf.get_value_safe("output", "gadget_usemsol", false); - if (msolunits_) - unit_mass_chosen_ = 1e-10; - music::wlog.Print("Deprecated option \'gadget_usemsol\' may override unit selection. Use \'gadget_munit\' instead."); - } - - //... coarse particle properties... - - spread_coarse_acrosstypes_ = cf.get_value_safe("output", "gadget_spreadcoarse", false); - bndparticletype_ = 5; - - if (!spread_coarse_acrosstypes_) - { - bndparticletype_ = cf.get_value_safe("output", "gadget_coarsetype", 5); - - if (bndparticletype_ == 0 || //bndparticletype_ == 1 || bndparticletype_ == 4 || - bndparticletype_ > 5) - { - music::elog.Print("Coarse particles cannot be of Gadget particle type %d in output plugin.", bndparticletype_); - throw std::runtime_error("Specified illegal Gadget particle type for coarse particles"); - } - } - else - { - if (cf.get_value_safe("output", "gadget_coarsetype", 5) != 5) - music::wlog.Print("Gadget: Option \'gadget_spreadcoarse\' forces \'gadget_coarsetype=5\'! Will override."); - } - - //... set time ...................................................... - header_.redshift = cf.get_value("setup", "zstart"); - header_.time = 1.0 / (1.0 + header_.redshift); - - //... SF flags - header_.flag_sfr = 0; - header_.flag_feedback = 0; - header_.flag_cooling = 0; - - //... - header_.num_files = nfiles_; //1; - header_.BoxSize = cf.get_value("setup", "BoxLength"); - header_.Omega0 = cf.get_value("cosmology", "Omega_m"); - header_.OmegaLambda = cf.get_value("cosmology", "Omega_L"); - header_.HubbleParam = cf.get_value("cosmology", "H0") / 100.0; - - header_.flag_stellarage = 0; - header_.flag_metals = 0; - - header_.flag_entropy_instead_u = 0; - - //if( kpcunits_ ) - // header_.BoxSize *= 1000.0; - header_.BoxSize /= unit_length_chosen_; - - for (int i = 0; i < empty_fill_bytes; ++i) - header_.fill[i] = 0; - } - - void write_dm_mass(const grid_hierarchy &gh) - { - determine_particle_numbers(gh); - - double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 - - // adjust units - rhoc /= unit_mass_chosen_ / (unit_length_chosen_ * unit_length_chosen_ * unit_length_chosen_); - - /*if( kpcunits_ ) - rhoc *= 1e-9; // in h^2 1e10 M_sol / kpc^3 - - if( msolunits_ ) - rhoc *= 1e10; // in h^2 M_sol / kpc^3 - */ - - // if there are more than one kind of coarse particle assigned to the same type, - // we have to explicitly store their masses - if (bmorethan2bnd_) - { - header_.mass[bndparticletype_] = 0.; - - size_t npcoarse = np_per_type_[bndparticletype_]; - size_t nwritten = 0; - - std::vector temp_dat; - temp_dat.reserve(block_buf_size_); - - char temp_fname[256]; - sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_mass); - std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); - - size_t blksize = sizeof(T_store) * npcoarse; - - ofs_temp.write((char *)&blksize, sizeof(size_t)); - - // int levelmaxcoarse = gh.levelmax() - 4; - // if (!spread_coarse_acrosstypes_) - // levelmaxcoarse = gh.levelmax() - 1; - - //for( int ilevel=levelmaxcoarse; ilevel>=(int)gh.levelmin(); --ilevel ) - - { - int ilevel = 0; - // baryon particles live only on finest grid - // these particles here are total matter particles - double pmass = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel); - - for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) - for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) - for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) - if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) - { - if (temp_dat.size() < block_buf_size_) - temp_dat.push_back(pmass); - else - { - ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * block_buf_size_); - nwritten += block_buf_size_; - temp_dat.clear(); - temp_dat.push_back(pmass); - } - } - } - - if (temp_dat.size() > 0) - { - ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * temp_dat.size()); - nwritten += temp_dat.size(); - } - - if (nwritten != npcoarse) - { - music::elog.Print("nwritten = %llu != npcoarse = %llu\n", nwritten, npcoarse); - throw std::runtime_error("Internal consistency error while writing temporary file for masses"); - } - - ofs_temp.write((char *)&blksize, sizeof(size_t)); - - if (ofs_temp.bad()) - throw std::runtime_error("I/O error while writing temporary file for masses"); - } - } - - void write_dm_position(int coord, const grid_hierarchy &gh) - { - //... count number of leaf cells ...// - determine_particle_numbers(gh); - - size_t npart = 0; - for (int i = 1; i < 6; ++i) - npart += np_per_type_[i]; - - //... determine if we need to shift the coordinates back - double *shift = NULL; - - if (shift_halfcell_) - { - double h = 0.0; //1.0/(1<<(levelmin_+1)); - shift = new double[3]; - shift[0] = shift[1] = shift[2] = -h; - } - - size_t nwritten = 0; - //... collect displacements and convert to absolute coordinates with correct - //... units - std::vector temp_data; - temp_data.reserve(block_buf_size_); - - char temp_fname[256]; - sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_pos + coord); - std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); - - size_t blksize = sizeof(T_store) * npart; - ofs_temp.write((char *)&blksize, sizeof(size_t)); - - double xfac = header_.BoxSize; - - //for( int ilevel=gh.levelmax(); ilevel>=(int)gh.levelmin(); --ilevel ) - unsigned ilevel = 0; - for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) - for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) - for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) - if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) - { - double xx[3]; - gh.cell_pos(ilevel, i, j, k, xx); - if (shift != NULL) - xx[coord] += shift[coord]; - - - // std::cerr << i << " " << j << " " << k << " : " << xx[coord]*xfac << " " << (*gh.get_grid(ilevel)).relem(i, j, k) * xfac << std::endl; - - xx[coord] = (xx[coord] + (*gh.get_grid(ilevel)).relem(i, j, k)) * xfac; - - if (temp_data.size() < block_buf_size_) - temp_data.push_back(xx[coord]); - else - { - ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); - nwritten += block_buf_size_; - temp_data.clear(); - temp_data.push_back(xx[coord]); - } - } - - if (temp_data.size() > 0) - { - ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size()); - nwritten += temp_data.size(); - } - - if (nwritten != npart) - throw std::runtime_error("Internal consistency error while writing temporary file for positions"); - - //... dump to temporary file - ofs_temp.write((char *)&blksize, sizeof(size_t)); - - if (ofs_temp.bad()) - throw std::runtime_error("I/O error while writing temporary file for positions"); - - ofs_temp.close(); - - if (shift != NULL) - delete[] shift; - } - - void write_dm_velocity(int coord, const grid_hierarchy &gh) - { - //... count number of leaf cells ...// - determine_particle_numbers(gh); - - size_t npart = 0; - for (int i = 1; i < 6; ++i) - npart += np_per_type_[i]; - - //... collect displacements and convert to absolute coordinates with correct - //... units - std::vector temp_data; - temp_data.reserve(block_buf_size_); - - float isqrta = 1.0f / sqrt(header_.time); - float vfac = isqrta * header_.BoxSize; - - //if( kpcunits_ ) - // vfac /= 1000.0; - vfac *= unit_length_chosen_ / unit_vel_chosen_; - - size_t nwritten = 0; - - char temp_fname[256]; - sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_vel + coord); - std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); - - size_t blksize = sizeof(T_store) * npart; - ofs_temp.write((char *)&blksize, sizeof(size_t)); - - //for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel ) - int ilevel = 0; - for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) - for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) - for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) - if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) - { - if (temp_data.size() < block_buf_size_) - temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac); - else - { - ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); - nwritten += block_buf_size_; - temp_data.clear(); - temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac); - } - } - if (temp_data.size() > 0) - { - ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store)); - nwritten += temp_data.size(); - } - - if (nwritten != npart) - throw std::runtime_error("Internal consistency error while writing temporary file for velocities"); - - ofs_temp.write((char *)&blksize, sizeof(int)); - - if (ofs_temp.bad()) - throw std::runtime_error("I/O error while writing temporary file for velocities"); - - ofs_temp.close(); - - - } - - void write_dm_density(const grid_hierarchy &gh) - { - //... we don't care about DM density for Gadget - } - - void write_dm_potential(const grid_hierarchy &gh) - { - //... we don't care about DM potential for Gadget - } - - void write_gas_potential(const grid_hierarchy &gh) - { - //... we don't care about gas potential for Gadget - } - - //... write data for gas -- don't do this - void write_gas_velocity(int coord, const grid_hierarchy &gh) - { - determine_particle_numbers(gh); - size_t npart = 0; - for (int i = 1; i < 6; ++i) - npart += np_per_type_[i]; - - //... collect velocities and convert to absolute coordinates with correct - //... units - std::vector temp_data; - temp_data.reserve(block_buf_size_); - - float isqrta = 1.0f / sqrt(header_.time); - float vfac = isqrta * header_.BoxSize; - - //if( kpcunits_ ) - // vfac /= 1000.0; - vfac *= unit_length_chosen_ / unit_vel_chosen_; - - //size_t npart = gh.count_leaf_cells(gh.levelmin(), gh.levelmax());;; - size_t nwritten = 0; - - char temp_fname[256]; - sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_vel + coord); - std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); - - size_t blksize = sizeof(T_store) * npart; - ofs_temp.write((char *)&blksize, sizeof(size_t)); - - //for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel ) - int ilevel = 0; - for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) - for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) - for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) - if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) - { - if (temp_data.size() < block_buf_size_) - temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac); - else - { - ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); - nwritten += block_buf_size_; - temp_data.clear(); - temp_data.push_back((*gh.get_grid(ilevel)).relem(i, j, k) * vfac); - } - } - - if (temp_data.size() > 0) - { - ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store)); - nwritten += temp_data.size(); - } - - if (nwritten != npart) - throw std::runtime_error("Internal consistency error while writing temporary file for gas velocities"); - - ofs_temp.write((char *)&blksize, sizeof(int)); - - if (ofs_temp.bad()) - throw std::runtime_error("I/O error while writing temporary file for gas velocities"); - - ofs_temp.close(); - } - - //... write only for fine level - void write_gas_position(int coord, const grid_hierarchy &gh) - { - //... count number of leaf cells ...// - determine_particle_numbers(gh); - - size_t npart = 0; - for (int i = 1; i < 6; ++i) - npart += np_per_type_[i]; - - //... determine if we need to shift the coordinates back - double *shift = NULL; - - if (shift_halfcell_) - { - double h = 0.0; //1.0/(1<<(levelmin_+1)); - shift = new double[3]; - shift[0] = shift[1] = shift[2] = -h; - } - - size_t nwritten = 0; - - //... - //... collect displacements and convert to absolute coordinates with correct - //... units - std::vector temp_data; - temp_data.reserve(block_buf_size_); - - char temp_fname[256]; - sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_pos + coord); - std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); - - size_t blksize = sizeof(T_store) * npart; - ofs_temp.write((char *)&blksize, sizeof(size_t)); - - double xfac = header_.BoxSize; - - double h = 1.0 / (1ul << gh.levelmax()); - - //for (int ilevel = gh.levelmax(); ilevel >= (int)gh.levelmin(); --ilevel) - int ilevel = 0; - { - for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) - for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) - for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) - //if( ! gh.is_refined(ilevel,i,j,k) ) - if (gh.is_in_mask(ilevel, i, j, k) && !gh.is_refined(ilevel, i, j, k)) - { - double xx[3]; - gh.cell_pos(ilevel, i, j, k, xx); - if (shift != NULL) - xx[coord] += shift[coord]; - - //... shift particle positions (this has to be done as the same shift - //... is used when computing the convolution kernel for SPH baryons) - xx[coord] += 0.5 * h; - - xx[coord] = (xx[coord] + (*gh.get_grid(ilevel)).relem(i, j, k)) * xfac; - - if (temp_data.size() < block_buf_size_) - temp_data.push_back(xx[coord]); - else - { - ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); - nwritten += block_buf_size_; - temp_data.clear(); - temp_data.push_back(xx[coord]); - } - } - } - - if (temp_data.size() > 0) - { - ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size()); - nwritten += temp_data.size(); - } - - if (nwritten != npart) - throw std::runtime_error("Internal consistency error while writing temporary file for gas positions"); - - //... dump to temporary file - ofs_temp.write((char *)&blksize, sizeof(size_t)); - - if (ofs_temp.bad()) - throw std::runtime_error("I/O error while writing temporary file for gas positions"); - - ofs_temp.close(); - - if (shift != NULL) - delete[] shift; - } - - void write_gas_density(const grid_hierarchy &gh) - { - //do nothing as we write out positions - } - - void finalize(void) - { - this->assemble_gadget_file(); - } -}; - -// namespace -// { -// output_plugin_creator_concrete> creator1("gadget2"); -// #ifndef SINGLE_PRECISION -// output_plugin_creator_concrete> creator2("gadget2_double"); -// #endif -// } // namespace From f4d6b9e6695fb4e036f48a39539993ac4478d6d7 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 21:19:11 +0200 Subject: [PATCH 106/130] fixed baryon particle type, was used for testing --- src/plugins/output_gadget_hdf5.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc index 2e41e47..3908e64 100644 --- a/src/plugins/output_gadget_hdf5.cc +++ b/src/plugins/output_gadget_hdf5.cc @@ -154,7 +154,7 @@ public: case cosmo_species::dm: return 1; case cosmo_species::baryon: - return 2; + return 0; case cosmo_species::neutrino: return 3; } From 5d60b59f6cd5a80e8591623737b86bf4d78c0bc2 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 21:35:16 +0200 Subject: [PATCH 107/130] removed superfluous grid operators --- include/operators.hh | 18 ++++++++++-------- src/ic_generator.cc | 8 ++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/operators.hh b/include/operators.hh index 49ed8d1..e2f4c8e 100644 --- a/include/operators.hh +++ b/include/operators.hh @@ -1,10 +1,18 @@ #pragma once - +/* + + operators.hh - This file is part of MUSIC2 - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2019 Oliver Hahn + +*/ #include namespace op{ -//!== long list of primitive operators to work on fields ==!// +//!== list of primitive operators to work on fields ==!// template< typename field> inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };} @@ -15,15 +23,9 @@ inline auto multiply_add_to( field& g, val x ){return [&g,x](auto i, auto v){ g[ template< typename field> inline auto add_to( field& g ){return [&g](auto i, auto v){ g[i] += v; };} -template< typename field> -inline auto add_twice_to( field& g ){return [&g](auto i, auto v){ g[i] += 2*v; };} - template< typename field> inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };} -template< typename field> -inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -= 2*v; };} - //! vanilla standard gradient class fourier_gradient{ private: diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 72c4482..6185af0 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -353,7 +353,7 @@ int Run( config_file& the_config ) music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush; phi3a.FourierTransformForward(false); Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a)); - Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::add_twice_to(phi3a)); + Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::multiply_add_to(phi3a,2.0)); Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3a)); Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a)); Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a)); @@ -367,9 +367,9 @@ int Run( config_file& the_config ) Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b)); Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b)); Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::add_to(phi3b)); - Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::subtract_twice_from(phi3b)); - Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::subtract_twice_from(phi3b)); - Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::subtract_twice_from(phi3b)); + Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::multiply_add_to(phi3b,-2.0)); + Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::multiply_add_to(phi3b,-2.0)); + Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::multiply_add_to(phi3b,-2.0)); phi3b.apply_InverseLaplacian(); phi3b *= 0.5; // factor 1/2 from definition of phi(3b)! music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; From 83b8d9bbafcfab7828caee69dc2accc8de16f354 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 4 Apr 2020 23:59:13 +0200 Subject: [PATCH 108/130] moved math headers to subdirectory --- include/bounding_box.hh | 2 +- include/cosmology_calculator.hh | 5 ++--- include/grid_fft.hh | 2 +- include/{ => math}/interpolate.hh | 0 include/{ => math}/mat3.hh | 2 +- include/{ => math}/ode_integrate.hh | 0 include/{ => math}/vec3.hh | 0 include/particle_generator.hh | 2 +- include/particle_plt.hh | 2 +- src/plugins/transfer_CLASS.cc | 2 +- 10 files changed, 8 insertions(+), 9 deletions(-) rename include/{ => math}/interpolate.hh (100%) rename include/{ => math}/mat3.hh (99%) rename include/{ => math}/ode_integrate.hh (100%) rename include/{ => math}/vec3.hh (100%) diff --git a/include/bounding_box.hh b/include/bounding_box.hh index 3048c79..6b70bcf 100644 --- a/include/bounding_box.hh +++ b/include/bounding_box.hh @@ -1,6 +1,6 @@ #pragma once -#include +#include template struct bounding_box diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index 1d99209..bedc653 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -6,10 +6,10 @@ #include #include #include -#include +#include #include -#include +#include #include // #include @@ -210,7 +210,6 @@ public: << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon0), 2.0) << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vtotal), 2.0) << std::endl; - #warning Check whether output is at redshift that is indicated! } } music::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl; diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 2170dc8..b8a76aa 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include #include diff --git a/include/interpolate.hh b/include/math/interpolate.hh similarity index 100% rename from include/interpolate.hh rename to include/math/interpolate.hh diff --git a/include/mat3.hh b/include/math/mat3.hh similarity index 99% rename from include/mat3.hh rename to include/math/mat3.hh index 6cf2689..75458ea 100644 --- a/include/mat3.hh +++ b/include/math/mat3.hh @@ -1,7 +1,7 @@ #include #include -#include +#include template class mat3_t{ diff --git a/include/ode_integrate.hh b/include/math/ode_integrate.hh similarity index 100% rename from include/ode_integrate.hh rename to include/math/ode_integrate.hh diff --git a/include/vec3.hh b/include/math/vec3.hh similarity index 100% rename from include/vec3.hh rename to include/math/vec3.hh diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 57e8b0f..de6c912 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -7,7 +7,7 @@ \*******************************************************************/ #pragma once -#include +#include namespace particle { diff --git a/include/particle_plt.hh b/include/particle_plt.hh index 9e6df1e..a452559 100644 --- a/include/particle_plt.hh +++ b/include/particle_plt.hh @@ -13,7 +13,7 @@ #include #include -#include +#include #include inline double Hypergeometric2F1( double a, double b, double c, double x ) diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 09047c6..2ae3ae2 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -16,7 +16,7 @@ #include #include #include -#include +#include class transfer_CLASS_plugin : public TransferFunction_plugin { From 7e196f3a14f779a9767e96e8b40219bf6f343dfd Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 5 Apr 2020 01:13:40 +0200 Subject: [PATCH 109/130] added compiler version string --- src/main.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main.cc b/src/main.cc index 04e2302..1d98276 100644 --- a/src/main.cc +++ b/src/main.cc @@ -79,7 +79,8 @@ int main( int argc, char** argv ) << " dP dP dP `88888P\' dP dP `88888P\' dP `88888P\' dP dP dP Y88888P\' \n" << std::endl; // Compilation CMake configuration, time etc info: - music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " << __DATE__ << std::endl; + music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " << __DATE__ << std::endl; + music::ilog << "Compiler used: " << __VERSION__ << std::endl; // git and versioning info: music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl; From 8d459062892f480e5b650b39bea5809858bc9852 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 5 Apr 2020 01:15:56 +0200 Subject: [PATCH 110/130] added a compiler version string --- src/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.cc b/src/main.cc index 04e2302..82cb800 100644 --- a/src/main.cc +++ b/src/main.cc @@ -80,7 +80,7 @@ int main( int argc, char** argv ) // Compilation CMake configuration, time etc info: music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " << __DATE__ << std::endl; - + music::ilog << "Compiled with " << __VERSION__ << std::endl; // git and versioning info: music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl; From 3ac20125775a7700a9498b34a110e9fa47e152b0 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 5 Apr 2020 22:26:22 +0200 Subject: [PATCH 111/130] fixed forgotten h factor in A_s CLASS normalisation --- src/main.cc | 14 ++++++++++---- src/plugins/transfer_CLASS.cc | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/main.cc b/src/main.cc index ca62004..c609a4a 100644 --- a/src/main.cc +++ b/src/main.cc @@ -78,13 +78,19 @@ int main( int argc, char** argv ) << " 88 88 88 88. .88 88 88 88. .88 88 88. .88 88 88 88 Y8. .88 \n" << " dP dP dP `88888P\' dP dP `88888P\' dP `88888P\' dP dP dP Y88888P\' \n" << std::endl; - // Compilation CMake configuration, time etc info: - music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " << __DATE__ << std::endl; - music::ilog << "Compiled with " << __VERSION__ << std::endl; - // git and versioning info: music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl; + // Compilation CMake configuration, time etc info: + music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " << __DATE__ << std::endl; + +#ifdef __GNUC__ + music::ilog << "Compiled with GNU C++ version " << __VERSION__ < 0) { this->tf_isnormalised_ = true; - tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0)); + tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p * h_, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0)); music::ilog << "Using A_s to normalise the transfer function!" << std::endl; } From 7ddc22fc0f0efbd4325f5343711e760e4dddc274 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Tue, 21 Apr 2020 17:23:59 +0200 Subject: [PATCH 112/130] updated class submodule branch --- external/class | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/class b/external/class index 52bc312..055d8bc 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit 52bc3126fca4415c4f541d47d43ffdb9763e0464 +Subproject commit 055d8bca371631da0c51ff167ce81905996b4ca2 From 0cafcfea197a2324c2caebd9b59a6f256b409b3d Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 2 May 2020 15:01:09 +0200 Subject: [PATCH 113/130] added function to check if Nyquist mode to grd --- include/grid_fft.hh | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index b8a76aa..2d49f7f 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -40,7 +40,7 @@ public: std::array sizes_; size_t npr_, npc_; size_t ntot_; - std::array length_, kfac_, dx_; + std::array length_, kfac_, kny_, dx_; space_t space_; data_t *data_; @@ -97,6 +97,15 @@ public: return global_range_; } + bool is_nyquist_mode( size_t i, size_t j, size_t k ) const + { + assert( this->space_ == kspace_id ); + bool bres = (i+local_1_start_ == n_[1]/2); + bres |= (j == n_[0]/2); + bres |= (k == n_[2]/2); + return bres; + } + //! set all field elements to zero void zero() noexcept { @@ -466,9 +475,9 @@ public: { for (size_t k = 0; k < sizes_[2]; ++k) { - const auto elem = std::real(this->relem(i, j, k)); - sum1 += elem; - sum2 += elem * elem; + const auto elem = (space_==kspace_id)? this->kelem(i, j, k) : this->relem(i, j, k); + sum1 += std::real(elem); + sum2 += std::norm(elem);// * elem; } } } From 95502596dd245c2f4add04853e29de575fa81588 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 2 May 2020 15:01:36 +0200 Subject: [PATCH 114/130] added forgotten mods for kNy in grids --- src/grid_fft.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/grid_fft.cc b/src/grid_fft.cc index a51577e..eeba708 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -48,6 +48,7 @@ void Grid_FFT::Setup(void) { nhalf_[i] = n_[i] / 2; kfac_[i] = 2.0 * M_PI / length_[i]; + kny_[i] = kfac_[i] * n_[i]/2; dx_[i] = length_[i] / n_[i]; global_range_.x1_[i] = 0; @@ -128,6 +129,7 @@ void Grid_FFT::Setup(void) { nhalf_[i] = n_[i] / 2; kfac_[i] = 2.0 * M_PI / length_[i]; + kny_[i] = kfac_[i] * n_[i]/2; dx_[i] = length_[i] / n_[i]; global_range_.x1_[i] = 0; From 52dfa9a72d81b5a94837bae74a47b2d9a0c19104 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 2 May 2020 15:02:16 +0200 Subject: [PATCH 115/130] Fill_Grid member function cannot be const --- include/random_plugin.hh | 2 +- src/plugins/random_music.cc | 2 +- src/plugins/random_ngenic.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/random_plugin.hh b/include/random_plugin.hh index 1c33fae..a91ab7e 100644 --- a/include/random_plugin.hh +++ b/include/random_plugin.hh @@ -18,7 +18,7 @@ class RNG_plugin } virtual ~RNG_plugin() {} virtual bool isMultiscale() const = 0; - virtual void Fill_Grid( Grid_FFT& g ) const = 0; + virtual void Fill_Grid( Grid_FFT& g ) = 0;//const = 0; //virtual void FillGrid(int level, DensityGrid &R) = 0; }; diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc index ab0f959..28486b5 100644 --- a/src/plugins/random_music.cc +++ b/src/plugins/random_music.cc @@ -40,7 +40,7 @@ public: bool isMultiscale() const { return true; } - void Fill_Grid( Grid_FFT& g ) const { } + void Fill_Grid( Grid_FFT& g ) {} //const { } void initialize_for_grid_structure()//const refinement_hierarchy &refh) { diff --git a/src/plugins/random_ngenic.cc b/src/plugins/random_ngenic.cc index f1c6a59..1498d4b 100644 --- a/src/plugins/random_ngenic.cc +++ b/src/plugins/random_ngenic.cc @@ -63,7 +63,7 @@ public: bool isMultiscale() const { return false; } - void Fill_Grid(Grid_FFT &g) const + void Fill_Grid(Grid_FFT &g) //const { g.zero(); g.FourierTransformForward(false); From bd78c7468a684c08fa7907d9e73da584bf3bafc3 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 2 May 2020 15:02:54 +0200 Subject: [PATCH 116/130] added a default z_max_pk to class interface to avoid not having any if zstart==ztarget --- src/plugins/transfer_CLASS.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 1f0f776..bcf85df 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -47,7 +47,7 @@ private: void init_ClassEngine(void) { //--- general parameters ------------------------------------------ - add_class_parameter("z_max_pk", std::max(zstart_, ztarget_) * 1.2); // use 1.2 as safety + add_class_parameter("z_max_pk", std::max(std::max(zstart_, ztarget_),199.0)); // use 1.2 as safety add_class_parameter("P_k_max_h/Mpc", kmax_); add_class_parameter("output", "dTk,vTk"); add_class_parameter("extra metric transfer functions","yes"); @@ -126,7 +126,7 @@ private: // output parameters, only needed for the control CLASS .ini file that we output std::stringstream zlist; if (ztarget_ == zstart_) - zlist << ztarget_ << ", 0.0"; + zlist << ztarget_ << ((ztarget_!=0.0)? ", 0.0" : ""); else zlist << std::max(ztarget_, zstart_) << ", " << std::min(ztarget_, zstart_) << ", 0.0"; add_class_parameter("z_pk", zlist.str()); From 95a660f4ffd56f768e20c20f7d773b19badcdd5f Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 2 May 2020 15:03:43 +0200 Subject: [PATCH 117/130] added interface with PANPHASIA random number generator --- CMakeLists.txt | 22 + external/panphasia/generic_lecuyer.f90 | 683 +++++ external/panphasia/panphasia_routines.f | 3334 +++++++++++++++++++++++ src/plugins/random_panphasia.cc | 532 ++++ 4 files changed, 4571 insertions(+) create mode 100644 external/panphasia/generic_lecuyer.f90 create mode 100644 external/panphasia/panphasia_routines.f create mode 100644 src/plugins/random_panphasia.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index c8cf314..be14271 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,17 @@ mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m) find_package(HDF5 REQUIRED) mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz) +######################################################################################################################## +# PANPHASIA +option(ENABLE_PANPHASIA "Enable PANPHASIA random number generator" ON) +if(ENABLE_PANPHASIA) +enable_language(Fortran) +if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Intel") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -132 -implicit-none") +elseif("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffixed-line-length-132 -fimplicit-none") +endif() +endif(ENABLE_PANPHASIA) ######################################################################################################################## # INCLUDES include_directories(${PROJECT_SOURCE_DIR}/include) @@ -145,6 +156,13 @@ file( GLOB PLUGINS ${PROJECT_SOURCE_DIR}/src/plugins/*.cc ) +if(ENABLE_PANPHASIA) +list (APPEND SOURCES + ${PROJECT_SOURCE_DIR}/external/panphasia/panphasia_routines.f + ${PROJECT_SOURCE_DIR}/external/panphasia/generic_lecuyer.f90 +) +endif() + # project configuration header configure_file( ${PROJECT_SOURCE_DIR}/include/cmake_config.hh.in @@ -208,6 +226,10 @@ if(HDF5_FOUND) target_compile_options(${PRGNAME} PRIVATE "-DUSE_HDF5") endif(HDF5_FOUND) +if(ENABLE_PANPHASIA) +target_compile_options(${PRGNAME} PRIVATE "-DUSE_PANPHASIA") +endif(ENABLE_PANPHASIA) + target_link_libraries(${PRGNAME} ${FFTW3_LIBRARIES}) target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIRS}) diff --git a/external/panphasia/generic_lecuyer.f90 b/external/panphasia/generic_lecuyer.f90 new file mode 100644 index 0000000..13f53ed --- /dev/null +++ b/external/panphasia/generic_lecuyer.f90 @@ -0,0 +1,683 @@ +!=====================================================================================c +! +! The code below was written by: Stephen Booth +! Edinburgh Parallel Computing Centre +! The University of Edinburgh +! JCMB +! Mayfield Road +! Edinburgh EH9 3JZ +! United Kingdom +! +! This file is part of the software made public in +! Jenkins and Booth 2013 - arXiv:1306.XXXX +! +! The software computes the Panphasia Gaussian white noise field +! realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX +! +! +! +! This software is free, subject to a agreeing licence conditions: +! +! +! (i) you will publish the phase descriptors and reference Jenkins (13) +! for any new simulations that use Panphasia phases. You will pass on this +! condition to others for any software or data you make available publically +! or privately that makes use of Panphasia. +! +! (ii) that you will ensure any publications using results derived from Panphasia +! will be submitted as a final version to arXiv prior to or coincident with +! publication in a journal. +! +! +! (iii) that you report any bugs in this software as soon as confirmed to +! A.R.Jenkins@durham.ac.uk +! +! (iv) that you understand that this software comes with no warranty and that is +! your responsibility to ensure that it is suitable for the purpose that +! you intend. +! +!=====================================================================================c +!{{{Rand_base (define kind types) +MODULE Rand_base +! This module just declares the base types +! we may have to edit this to match to the target machine +! we really need a power of 2 selected int kind in fortran-95 we could +! do this with a PURE function I think. + +! +! 10 decimal digits will hold 2^31 +! + + INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(9) +! INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(10) +! INTEGER, PARAMETER :: Sint = 4 + +! +! 18-19 decimal digits will hold 2^63 +! but all 19 digit numbers require 2^65 :-( +! + + INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(17) +! INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(18) +! INTEGER, PARAMETER :: Dint = 8 + +! type for index counters must hold Nstore + INTEGER, PARAMETER :: Ctype = SELECTED_INT_KIND(3) +END MODULE Rand_base +!}}} + +!{{{Rand_int (random integers mod 2^31-1) + +MODULE Rand_int + USE Rand_base + IMPLICIT NONE +! The general approach of this module is two have +! two types Sint and Dint +! +! Sint should have at least 31 bits +! dint shouldhave at least 63 + +!{{{constants + + INTEGER(KIND=Ctype), PARAMETER :: Nstate=5_Ctype + INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nbatch=128_Ctype + INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nstore=Nstate+Nbatch + + INTEGER(KIND=Sint), PRIVATE, PARAMETER :: M = 2147483647_Sint + INTEGER(KIND=Dint), PRIVATE, PARAMETER :: Mask = 2147483647_Dint + INTEGER(KIND=Dint), PRIVATE, PARAMETER :: A1 = 107374182_Dint + INTEGER(KIND=Dint), PRIVATE, PARAMETER :: A5 = 104480_Dint + LOGICAL, PARAMETER :: Can_step_int=.TRUE. + LOGICAL, PARAMETER :: Can_reverse_int=.TRUE. + +!}}} + +!{{{Types +! +! This type holds the state of the generator +! +!{{{TYPE RAND_state + +TYPE RAND_state + PRIVATE + INTEGER(KIND=Sint) :: state(Nstore) +! do we need to re-fill state table this is reset when we initialise state. + LOGICAL :: need_fill +! position of the next state variable to output + INTEGER(KIND=Ctype) :: pos +END TYPE RAND_state + +!}}} + +! +! This type defines the offset type used for stepping. +! +!{{{TYPE RAND_offset + +TYPE RAND_offset + PRIVATE + INTEGER(KIND=Sint) :: poly(Nstate) +END TYPE RAND_offset + +!}}} + +!}}} + +!{{{interface and overloads +! +! Allow automatic conversion between integers and offsets +! +INTERFACE ASSIGNMENT(=) + MODULE PROCEDURE Rand_set_offset + MODULE PROCEDURE Rand_load + MODULE PROCEDURE Rand_save + MODULE PROCEDURE Rand_seed +END INTERFACE +INTERFACE OPERATOR(+) + MODULE PROCEDURE Rand_add_offset +END INTERFACE +INTERFACE OPERATOR(*) + MODULE PROCEDURE Rand_mul_offset +END INTERFACE + +! +! overload + as the boost/stepping operator +! +INTERFACE OPERATOR(+) + MODULE PROCEDURE Rand_step + MODULE PROCEDURE Rand_boost +END INTERFACE +!}}} + + +!{{{PUBLIC/PRIVATE + PRIVATE reduce,mod_saxpy,mod_sdot,p_saxpy,p_sdot,poly_mult + PRIVATE poly_square, poly_power + PRIVATE fill_state, repack_state + + PUBLIC Rand_sint, Rand_sint_vec + + PUBLIC Rand_save, Rand_load + PUBLIC Rand_set_offset, Rand_add_offset, Rand_mul_offset + PUBLIC Rand_step, Rand_boost, Rand_seed +!}}} + +CONTAINS + !{{{Internals + !{{{RECURSIVE FUNCTION reduce(A) + RECURSIVE FUNCTION reduce(A) + ! + ! Take A Dint and reduce to Sint MOD M + ! + INTEGER(KIND=Dint), INTENT(IN) :: A + INTEGER(KIND=Sint) reduce + INTEGER(KIND=Dint) tmp + + tmp = A + DO WHILE( ISHFT(tmp, -31) .GT. 0 ) + tmp = IAND(tmp,Mask) + ISHFT(tmp, -31) + END DO + IF( tmp .GE. M ) THEN + reduce = tmp - M + ELSE + reduce = tmp + END IF + END FUNCTION reduce + !}}} + !{{{RECURSIVE SUBROUTINE fill_state(x) + RECURSIVE SUBROUTINE fill_state(x) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Ctype) i + INTRINSIC IAND, ISHFT + INTEGER(KIND=Dint) tmp + DO i=Nstate+1,Nstore + tmp = (x%state(i-5) * A5) + (x%state(i-1)*A1) + ! + ! now reduce down to mod M efficiently + ! really hope the compiler in-lines this + ! + ! x%state(i) = reduce(tmp) + DO WHILE( ISHFT(tmp, -31) .GT. 0 ) + tmp = IAND(tmp,Mask) + ISHFT(tmp, -31) + END DO + IF( tmp .GE. M ) THEN + x%state(i) = tmp - M + ELSE + x%state(i) = tmp + END IF + + END DO + x%need_fill = .FALSE. + END SUBROUTINE fill_state + !}}} + !{{{RECURSIVE SUBROUTINE repack_state(x) + RECURSIVE SUBROUTINE repack_state(x) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Ctype) i + DO i=1,Nstate + x%state(i) = x%state(i+x%pos-(Nstate+1)) + END DO + x%pos = Nstate + 1 + x%need_fill = .TRUE. + END SUBROUTINE repack_state + !}}} + !{{{RECURSIVE SUBROUTINE mod_saxpy(y,a,x) + RECURSIVE SUBROUTINE mod_saxpy(y,a,x) + INTEGER(KIND=Ctype) i + INTEGER(KIND=Sint) y(Nstate) + INTEGER(KIND=Sint) a + INTEGER(KIND=Sint) x(Nstate) + INTEGER(KIND=Dint) tx,ty,ta + + IF( a .EQ. 0_Sint ) RETURN + + ! We use KIND=Dint temporaries here to ensure + ! that we don't overflow in the expression + + ta = a + DO i=1,Nstate + ty=y(i) + tx=x(i) + y(i) = reduce(ty + ta * tx) + END DO + + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE mod_sdot(res,x,y) + RECURSIVE SUBROUTINE mod_sdot(res,x,y) + INTEGER(KIND=Sint), INTENT(OUT) :: res + INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate) , y(Nstate) + INTEGER(KIND=Dint) dx, dy, dtmp + INTEGER(KIND=Sint) tmp + INTEGER(KIND=Ctype) i + + tmp = 0 + DO i=1,Nstate + dx = x(i) + dy = y(i) + dtmp = tmp + tmp = reduce(dtmp + dx * dy) + END DO + res = tmp + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE p_saxpy(y,a) + RECURSIVE SUBROUTINE p_saxpy(y,a) + ! Calculates mod_saxpy(y,a,P) + INTEGER(KIND=Sint), INTENT(INOUT) :: y(Nstate) + INTEGER(KIND=Sint), INTENT(IN) :: a + INTEGER(KIND=Dint) tmp, dy, da + dy = y(1) + da = a + tmp = dy + da*A5 + y(1) = reduce(tmp) + dy = y(5) + da = a + tmp = dy + da*A1 + y(5) = reduce(tmp) + + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE p_sdot(res,n,x) + RECURSIVE SUBROUTINE p_sdot(res,x) + INTEGER(KIND=Sint), INTENT(OUT) :: res + INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate) + INTEGER(KIND=Dint) dx1, dx5, dtmp + dx1 = x(1) + dx5 = x(5) + + dtmp = A1*dx5 + A5*dx1 + res = reduce(dtmp) + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE poly_mult(a,b) + RECURSIVE SUBROUTINE poly_mult(a,b) + INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate) + INTEGER(KIND=Sint), INTENT(IN) :: b(Nstate) + INTEGER(KIND=Sint) tmp((2*Nstate) - 1) + INTEGER(KIND=Ctype) i + + tmp = 0_Sint + + DO i=1,Nstate + CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), b) + END DO + DO i=(2*Nstate)-1, Nstate+1, -1 + CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i)) + END DO + a = tmp(1:Nstate) + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE poly_square(a) + RECURSIVE SUBROUTINE poly_square(a) + INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate) + INTEGER(KIND=Sint) tmp((2*Nstate) - 1) + INTEGER(KIND=Ctype) i + + tmp = 0_Sint + + DO i=1,Nstate + CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), a) + END DO + DO i=(2*Nstate)-1, Nstate+1, -1 + CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i)) + END DO + a = tmp(1:Nstate) + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE poly_power(poly,n) + RECURSIVE SUBROUTINE poly_power(poly,n) + INTEGER(KIND=Sint), INTENT(INOUT) :: poly(Nstate) + INTEGER, INTENT(IN) :: n + INTEGER nn + INTEGER(KIND=Sint) x(Nstate), out(Nstate) + + IF( n .EQ. 0 )THEN + poly = 0_Sint + poly(1) = 1_Sint + RETURN + ELSE IF( n .LT. 0 )THEN + poly = 0_Sint + RETURN + END IF + + out = 0_sint + out(1) = 1_Sint + x = poly + nn = n + DO WHILE( nn .GT. 0 ) + IF( MOD(nn,2) .EQ. 1 )THEN + call poly_mult(out,x) + END IF + nn = nn/2 + IF( nn .GT. 0 )THEN + call poly_square(x) + END IF + END DO + poly = out + + END SUBROUTINE poly_power + !}}} + !}}} + + !{{{RECURSIVE SUBROUTINE Rand_seed( state, n ) + RECURSIVE SUBROUTINE Rand_seed( state, n ) + TYPE(Rand_state), INTENT(OUT) :: state + INTEGER, INTENT(IN) :: n + ! initialise the genrator using a single integer + ! fist initialise to an arbitrary state then boost by a multiple + ! of a long distance + ! + ! state is moved forward by P^n steps + ! we want this to be ok for seperating parallel sequences on MPP machines + ! P is taken as a prime number as this should prevent strong correlations + ! when the generators are operated in tight lockstep. + ! equivalent points on different processors will also be related by a + ! primative polynomial + ! P is 2^48-59 + TYPE(Rand_state) tmp + TYPE(Rand_offset), PARAMETER :: P = & + Rand_offset( (/ 1509238949_Sint ,2146167999_Sint ,1539340803_Sint , & + 1041407428_Sint ,666274987_Sint /) ) + + CALL Rand_load( tmp, (/ 5, 4, 3, 2, 1 /) ) + state = Rand_boost( tmp, Rand_mul_offset(P, n )) + + END SUBROUTINE Rand_seed + !}}} + !{{{RECURSIVE SUBROUTINE Rand_load( state, input ) + RECURSIVE SUBROUTINE Rand_load( state, input ) + TYPE(RAND_state), INTENT(OUT) :: state + INTEGER, INTENT(IN) :: input(Nstate) + + INTEGER(KIND=Ctype) i + + state%state = 0_Sint + DO i=1,Nstate + state%state(i) = MOD(INT(input(i),KIND=Sint),M) + END DO + state%need_fill = .TRUE. + state%pos = Nstate + 1 + END SUBROUTINE Rand_load + !}}} + !{{{RECURSIVE SUBROUTINE Rand_save( save_vec,state ) + RECURSIVE SUBROUTINE Rand_save( save_vec, x ) + INTEGER, INTENT(OUT) :: save_vec(Nstate) + TYPE(RAND_state), INTENT(IN) :: x + + INTEGER(KIND=Ctype) i + DO i=1,Nstate + save_vec(i) = x%state(x%pos-(Nstate+1) + i) + END DO + END SUBROUTINE Rand_save + !}}} + + !{{{RECURSIVE SUBROUTINE Rand_set_offset( offset, n ) + RECURSIVE SUBROUTINE Rand_set_offset( offset, n ) + TYPE(Rand_offset), INTENT(OUT) :: offset + INTEGER, INTENT(IN) :: n + + offset%poly = 0_Sint + IF ( n .GE. 0 ) THEN + offset%poly(2) = 1_Sint + call poly_power(offset%poly,n) + ELSE + ! + ! This is X^-1 + ! + offset%poly(4) = 858869107_Sint + offset%poly(5) = 1840344978_Sint + call poly_power(offset%poly,-n) + END IF + END SUBROUTINE Rand_set_offset + !}}} + !{{{TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b ) + TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b ) + TYPE(Rand_offset), INTENT(IN) :: a, b + + Rand_add_offset = a + CALL poly_mult(Rand_add_offset%poly,b%poly) + RETURN + END FUNCTION Rand_add_offset + !}}} + !{{{TYPE(Rand_offset) RECURSIVE FUNCTION Rand_mul_offset( a, n ) + TYPE(Rand_offset) RECURSIVE FUNCTION Rand_mul_offset( a, n ) + TYPE(Rand_offset), INTENT(IN) :: a + INTEGER, INTENT(IN) :: n + Rand_mul_offset = a + CALL poly_power(Rand_mul_offset%poly,n) + RETURN + END FUNCTION Rand_mul_offset + !}}} + !{{{RECURSIVE FUNCTION Rand_boost(x, offset) + RECURSIVE FUNCTION Rand_boost(x, offset) + TYPE(Rand_state) Rand_boost + TYPE(Rand_state), INTENT(IN) :: x + TYPE(Rand_offset), INTENT(IN) :: offset + INTEGER(KIND=Sint) tmp(2*Nstate-1), res(Nstate) + INTEGER(KIND=Ctype) i + + DO i=1,Nstate + tmp(i) = x%state(x%pos-(Nstate+1) + i) + END DO + tmp(Nstate+1:) = 0_Sint + + DO i=1,Nstate-1 + call P_SDOT(tmp(i+Nstate),tmp(i:Nstate+i-1)) + END DO + + DO i=1,Nstate + call mod_sdot(res(i),offset%poly,tmp(i:Nstate+i-1)) + END DO + Rand_boost%state = 0_Sint + DO i=1,Nstate + Rand_boost%state(i) = res(i) + END DO + Rand_boost%need_fill = .TRUE. + Rand_boost%pos = Nstate + 1 + + END FUNCTION Rand_boost + !}}} + !{{{RECURSIVE FUNCTION Rand_step(x, n) + RECURSIVE FUNCTION Rand_step(x, n) + TYPE(Rand_state) Rand_step + TYPE(RAND_state), INTENT(IN) :: x + INTEGER, INTENT(IN) :: n + TYPE(Rand_offset) tmp + + CALL Rand_set_offset(tmp,n) + Rand_step=Rand_boost(x,tmp) + + END FUNCTION + !}}} + + !{{{RECURSIVE FUNCTION Rand_sint(x) + RECURSIVE FUNCTION Rand_sint(x) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Sint) Rand_sint + IF( x%pos .GT. Nstore )THEN + CALL repack_state(x) + END IF + IF( x%need_fill ) CALL fill_state(x) + Rand_sint = x%state(x%pos) + x%pos = x%pos + 1 + RETURN + END FUNCTION Rand_sint + !}}} + !{{{RECURSIVE SUBROUTINE Rand_sint_vec(iv,x) + RECURSIVE SUBROUTINE Rand_sint_vec(iv,x) + INTEGER(KIND=Sint), INTENT(OUT) :: iv(:) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER left,start, chunk, i + + start=1 + left=SIZE(iv) + DO WHILE( left .GT. 0 ) + IF( x%pos .GT. Nstore )THEN + CALL repack_state(x) + END IF + IF( x%need_fill ) CALL fill_state(x) + + chunk = MIN(left,Nstore-x%pos+1) + DO i=0,chunk-1 + iv(start+i) = x%state(x%pos+i) + END DO + start = start + chunk + x%pos = x%pos + chunk + left = left - chunk + END DO + + RETURN + END SUBROUTINE Rand_sint_vec + !}}} + + +END MODULE Rand_int + +!}}} + +!{{{Rand (use Rand_int to make random reals) + +MODULE Rand + USE Rand_int + IMPLICIT NONE + +!{{{Parameters + + INTEGER, PARAMETER :: RAND_kind1 = SELECTED_REAL_KIND(10) + INTEGER, PARAMETER :: RAND_kind2 = SELECTED_REAL_KIND(6) + + INTEGER, PARAMETER, PRIVATE :: Max_block=100 + INTEGER(KIND=Sint), PRIVATE, PARAMETER :: M = 2147483647 + REAL(KIND=RAND_kind1), PRIVATE, PARAMETER :: INVMP1_1 = ( 1.0_RAND_kind1 / 2147483647.0_RAND_kind1 ) + REAL(KIND=RAND_kind2), PRIVATE, PARAMETER :: INVMP1_2 = ( 1.0_RAND_kind2 / 2147483647.0_RAND_kind2 ) + + LOGICAL, PARAMETER :: Can_step = Can_step_int + LOGICAL, PARAMETER :: Can_reverse = Can_reverse_int + +!}}} + PUBLIC Rand_real + + +INTERFACE Rand_real + MODULE PROCEDURE Rand_real1 + MODULE PROCEDURE Rand_real2 + MODULE PROCEDURE Rand_real_vec1 + MODULE PROCEDURE Rand_real_vec2 +END INTERFACE + + +CONTAINS + + !{{{RECURSIVE SUBROUTINE Rand_real1(y,x) + RECURSIVE SUBROUTINE Rand_real1(y,x) + REAL(KIND=RAND_kind1), INTENT(OUT) :: y + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Sint) Z + + Z = Rand_sint(x) + IF (Z .EQ. 0) Z = M + + y = ((Z-0.5d0)*INVMP1_1) + RETURN + END SUBROUTINE Rand_real1 + !}}} + !{{{RECURSIVE SUBROUTINE Rand_real2(y,x) + RECURSIVE SUBROUTINE Rand_real2(y,x) + REAL(KIND=RAND_kind2), INTENT(OUT) :: y + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Sint) Z + + Z = Rand_sint(x) + IF (Z .EQ. 0) Z = M + + y = ((Z-0.5d0)*INVMP1_1) ! generate in double and truncate. + RETURN + END SUBROUTINE Rand_real2 + !}}} + + !{{{RECURSIVE SUBROUTINE Rand_real_vec1(rv,x) + RECURSIVE SUBROUTINE Rand_real_vec1(rv,x) + TYPE(RAND_state), INTENT(INOUT) :: x + REAL(KIND=RAND_kind1) rv(:) + INTEGER left,start, chunk, i + INTEGER(KIND=Sint) Z + INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block)) + + start=0 + left=SIZE(rv) + DO WHILE( left .GT. 0 ) + chunk = MIN(left,Max_block) + CALL Rand_sint_vec(temp(1:chunk),x) + DO i=1,chunk + Z = temp(i) + IF (Z .EQ. 0) Z = M + rv(start+i) = (Z-0.5d0)*INVMP1_1 + END DO + start = start + chunk + left = left - chunk + END DO + + RETURN + END SUBROUTINE Rand_real_vec1 + !}}} + !{{{RECURSIVE SUBROUTINE Rand_real_vec2(rv,x) + RECURSIVE SUBROUTINE Rand_real_vec2(rv,x) + TYPE(RAND_state), INTENT(INOUT) :: x + REAL(KIND=RAND_kind2) rv(:) + INTEGER left,start, chunk, i + INTEGER(KIND=Sint) Z + INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block)) + + start=0 + left=SIZE(rv) + DO WHILE( left .GT. 0 ) + chunk = MIN(left,Max_block) + CALL Rand_sint_vec(temp(1:chunk),x) + DO i=1,chunk + Z = temp(i) + IF (Z .EQ. 0) Z = M + rv(start+i) = (Z-0.5d0)*INVMP1_2 + END DO + start = start + chunk + left = left - chunk + END DO + + RETURN + END SUBROUTINE Rand_real_vec2 + !}}} +END MODULE Rand + +!}}} + +!{{{test program +! PROGRAM test_random +! use Rand +! TYPE(RAND_state) x +! REAL y +! CALL Rand_load(x,(/5,4,3,2,1/)) +! DO I=0,10 +! CALL Rand_real(y,x) +! WRITE(*,10) I,y +! END DO +! +!10 FORMAT(I10,E25.16) +! +! END + +! 0 0.5024326127022505E-01 +! 1 0.8260946767404675E-01 +! 2 0.2123264316469431E-01 +! 3 0.6926658791489899E+00 +! 4 0.2076155943796039E+00 +! 5 0.4327449947595596E-01 +! 6 0.2204052871093154E-01 +! 7 0.1288446951657534E+00 +! 8 0.4859915426932275E+00 +! 9 0.5721384193748236E-01 +! 10 0.7996825082227588E+00 +! + + +!}}} + diff --git a/external/panphasia/panphasia_routines.f b/external/panphasia/panphasia_routines.f new file mode 100644 index 0000000..2e1bfbd --- /dev/null +++ b/external/panphasia/panphasia_routines.f @@ -0,0 +1,3334 @@ +c=====================================================================================c +c +c The code below was written by: Adrian Jenkins, +c Institute for Computational Cosmology +c Department of Physics +c South Road +c Durham, DH1 3LE +c United Kingdom +c +c This file is part of the software made public in +c Jenkins and Booth 2013 - arXiv:1306.XXXX +c +c The software computes the Panphasia Gaussian white noise field +c realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX +c +c +c +c This software is free, subject to a agreeing licence conditions: +c +c +c (i) you will publish the phase descriptors and reference Jenkins (13) +c for any new simulations that use Panphasia phases. You will pass on this +c condition to others for any software or data you make available publically +c or privately that makes use of Panphasia. +c +c (ii) that you will ensure any publications using results derived from Panphasia +c will be submitted as a final version to arXiv prior to or coincident with +c publication in a journal. +c +c (iii) that you report any bugs in this software as soon as confirmed to +c A.R.Jenkins@durham.ac.uk +c +c (iv) that you understand that this software comes with no warranty and that is +c your responsibility to ensure that it is suitable for the purpose that +c you intend. +c +c=====================================================================================c + +c===================================================================================== +c List of subroutines and arguments. Each of these is documented in c +c arXiV/1306.XXXX c +c c +c Adrian Jenkins, 24/6/2013. c +c------------------------------------------------------------------------------------- +c Version 1.000 +c=================================================================================== + + module pan_state + use Rand + implicit none + integer maxdim_, maxlev_, maxpow_ + parameter (maxdim_=60,maxlev_=50, maxpow_ = 3*maxdim_) + integer nmulti_ + parameter (nmulti_=64) + integer range_max + parameter(range_max=10000) + integer indmin,indmax + parameter (indmin=-1, indmax=60) + + + type state_data + integer base_state(5), base_lev_start(5,0:maxdim_) + TYPE(Rand_offset) :: poweroffset(0:maxpow_) + TYPE(Rand_offset) :: superjump + TYPE(Rand_state) :: current_state(-1:maxpow_) + + integer layer_min,layer_max,indep_field + +! This module stores information needed to access the part of Panphasia +! selected by a particular descriptor. + integer*8 xorigin_store(0:1,0:1,0:1) + integer*8 yorigin_store(0:1,0:1,0:1) + integer*8 zorigin_store(0:1,0:1,0:1) + + integer*4 lev_common + integer*4 layer_min_store,layer_max_store + + integer*8 ix_abs_store,iy_abs_store,iz_abs_store + integer*8 ix_per_store,iy_per_store,iz_per_store + integer*8 ix_rel_store,iy_rel_store,iz_rel_store + + real*8 exp_coeffs(8,0:7,-1:maxdim_) + integer*8 xcursor(0:maxdim_),ycursor(0:maxdim_),zcursor(0:maxdim_) + +c Local box parameters + + integer*4 ixshift(0:1,0:1,0:1) + integer*4 iyshift(0:1,0:1,0:1) + integer*4 izshift(0:1,0:1,0:1) + + +c more state variables + real*8 cell_data(9,0:7) + integer*4 ixh_last,iyh_last,izh_last + integer init + + integer return_cell_props_init + integer reset_lecuyer_state_init + integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax) + + + + end type state_data + + + +c Switch for enabling custom spherical function +c Set isub_spherical_function = 1 to turn on the spherical function + integer*4 isub_spherical_function + parameter (isub_spherical_function=0) + + end module pan_state + + +c================================================================================ +c Begin white noise routines +c================================================================================ + recursive subroutine start_panphasia(ldata,descriptor,ngrid,VERBOSE) + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + character*100 descriptor + integer ngrid + integer VERBOSE + + + + integer*4 wn_level_base,i_base,i_base_y,i_base_z + integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand + character*20 name + + integer ratio + integer lextra + integer level_p + + + integer*8 ix_abs,iy_abs,iz_abs + integer*8 ix_per,iy_per,iz_per + integer*8 ix_rel,iy_rel,iz_rel + + !integer layer_min,layer_max,indep_field + !common /oct_range/ layer_min,layer_max,indep_field + + call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base, + & i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,name) + + + lextra = (log10(real(ngrid)/real(i_base))+0.001)/log10(2.0) + ratio = 2**lextra + + if (ratio*i_base.ne.ngrid) + &stop 'Value of ngrid inconsistent with dim of region in Panphasia' + + level_p = wn_level_base + lextra + + ix_abs = ishft(i_xorigin_base,lextra) + iy_abs = ishft(i_yorigin_base,lextra) + iz_abs = ishft(i_zorigin_base,lextra) + + ix_per = i_base*ratio + iy_per = i_base*ratio + iz_per = i_base*ratio + +c Set the refinement position at the origin. + + ix_rel = 0 + iy_rel = 0 + iz_rel = 0 + + call set_phases_and_rel_origin(ldata,descriptor,level_p,ix_rel,iy_rel,iz_rel,VERBOSE) + +c Finally set the octree functions required for making cosmological +c initial conditions. These are passed using a common block. + + ldata%layer_min = 0 + ldata%layer_max = level_p + ldata%indep_field = 1 + + end +c================================================================================= + recursive subroutine set_phases_and_rel_origin(ldata,descriptor,lev,ix_rel,iy_rel,iz_rel,VERBOSE) + use pan_state + !use descriptor_phases + implicit none + type(state_data), intent(inout) :: ldata + character*100 descriptor + integer lev + integer*8 ix_abs,iy_abs,iz_abs + integer*8 ix_per,iy_per,iz_per + integer*8 ix_rel,iy_rel,iz_rel + integer*8 xorigin,yorigin,zorigin + + integer VERBOSE + integer MYID + integer*8 maxco + integer i + integer px,py,pz + + integer lnblnk + integer*8 mconst + parameter(mconst = 2147483647_Dint) + + integer*4 wn_level_base,i_base,i_base_y,i_base_z + integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand + integer lextra,ratio + character*20 phase_name + +c----------------------------------------------------------------------------------------------- + + call initialise_panphasia(ldata) + + call validate_descriptor(ldata, descriptor,-1,check_rand) + + call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base, + & i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,phase_name) + lextra = lev - wn_level_base + ratio = 2**lextra + + ix_abs = ishft(i_xorigin_base,lextra) + iy_abs = ishft(i_yorigin_base,lextra) + iz_abs = ishft(i_zorigin_base,lextra) + + ix_per = i_base*ratio + iy_per = i_base*ratio + iz_per = i_base*ratio + +c------------------------------------------------------------------------- +c Error checking +c------------------------------------------------------------------------- + if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (1)' + + + maxco = 2_dint**lev + + if (ix_abs.lt.0) stop 'Error: ix_abs negative (1)' + if (iy_abs.lt.0) stop 'Error: iy_abs negative (1)' + if (iz_abs.lt.0) stop 'Error: iz_abs negative (1)' + + if (ix_rel.lt.0) stop 'Error: ix_rel negative (1)' + if (iy_rel.lt.0) stop 'Error: iy_rel negative (1)' + if (iz_rel.lt.0) stop 'Error: iz_rel negative (1)' + + + if (ix_abs+ix_rel.ge.maxco) + & stop 'Error: ix_abs + ix_rel out of range. (1)' + if (iy_abs+iy_rel.ge.maxco) + & stop 'Error: iy_abs + iy_rel out of range. (1)' + if (iz_abs+iz_rel.ge.maxco) + & stop 'Error: iz_abs + iz_rel out of range. (1)' + +c---------------------------------------------------------------------------------------- +c To allow the local box to wrap around, if needed, define a series of eight +c 'origins'. For many purposes (ix,iy,iz) = (0,0,0) is the only origin needed. + + + do px=0,1 + do py=0,1 + do pz=0,1 + + xorigin = max(0,( ix_abs + ix_rel - px*ix_per )/2) + yorigin = max(0,( iy_abs + iy_rel - py*iy_per )/2) + zorigin = max(0,( iz_abs + iz_rel - pz*iz_per )/2) + + ldata%ixshift(px,py,pz) = max(0, ix_abs + ix_rel -px*ix_per) - 2*xorigin + ldata%iyshift(px,py,pz) = max(0, iy_abs + iy_rel -py*iy_per) - 2*yorigin + ldata%izshift(px,py,pz) = max(0, iz_abs + iz_rel -pz*iz_per) - 2*zorigin + + +c Store box details: store the positions at level lev-1 + + + ldata%xorigin_store(px,py,pz) = xorigin + ldata%yorigin_store(px,py,pz) = yorigin + ldata%zorigin_store(px,py,pz) = zorigin + + enddo + enddo + enddo + + ldata%lev_common = lev + + + ldata%ix_abs_store = ix_abs + ldata%iy_abs_store = iy_abs + ldata%iz_abs_store = iz_abs + + ldata%ix_per_store = ix_per + ldata%iy_per_store = iy_per + ldata%iz_per_store = iz_per + + ldata%ix_rel_store = ix_rel + ldata%iy_rel_store = iy_rel + ldata%iz_rel_store = iz_rel + + +c Reset all cursor values to negative numbers. + + do i=0,maxdim_ + ldata%xcursor(i) = -999 + ldata%ycursor(i) = -999 + ldata%zcursor(i) = -999 + enddo + if (VERBOSE.gt.1) then + if (MYID.lt.1) then + print*,'----------------------------------------------------------' + print*,'Successfully initialised Panphasia box at level ',lev + write (6,105) ix_abs,iy_abs,iz_abs + write (6,106) ix_rel,iy_rel,iz_rel + write (6,107) ix_per,iy_per,iz_per + write (6,*) 'Phases used: ',descriptor(1:lnblnk(descriptor)) + print*,'----------------------------------------------------------' + endif + endif + 105 format(' Abs origin: (',i12,',',i12,',',i12,')') + 106 format(' Rel origin: (',i12,',',i12,',',i12,')') + 107 format(' Periods : (',i12,',',i12,',',i12,')') + end +c================================================================================ + recursive subroutine initialise_panphasia( ldata ) + use Rand + use pan_state + implicit none + + type(state_data), intent(inout) :: ldata + + TYPE(Rand_state) :: state + TYPE(Rand_offset) :: offset + integer ninitialise + parameter (ninitialise=218) + integer i + real*8 rand_num + + + call Rand_seed(state,ninitialise) + + call Rand_save(ldata%base_state,state) + + call Rand_set_offset(offset,1) + +c Calculate offsets of powers of 2 times nmulti +c + + do i=0,maxpow_ + ldata%poweroffset(i) = Rand_mul_offset(offset,nmulti_) + offset = Rand_mul_offset(offset,2) + enddo + + +c Compute the base state for each level. + + call Rand_load(state,ldata%base_state) + state = Rand_step(state,8) + + do i=0,maxdim_ + call Rand_save(ldata%base_lev_start(1,i),state) + state = Rand_boost(state,ldata%poweroffset(3*i)) + enddo + +c Set superjump to value 2**137 - used occasionally in computing Gaussian variables +c when the value of the returned random number is less an 10-6. + + call Rand_set_offset(ldata%superjump,1) + + do i=1,137 + ldata%superjump = Rand_mul_offset(ldata%superjump,2) + enddo + + +c Run time test to see if one particular value can be recovered. + + call Rand_load(state,ldata%base_lev_start(1,34)) + call Rand_real(rand_num,state) + + if (abs(rand_num- 0.828481889948473d0).gt.1.e-14) then + print*,'Error in initialisation!' + print*,'Rand_num = ',rand_num + print*,'Target value = ', 0.828481889948473d0 + stop + endif + return + end +c================================================================================= + recursive subroutine panphasia_cell_properties(ldata,ixcell,iycell,izcell,cell_prop) + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + !integer layer_min,layer_max,indep_field + !common /oct_range/ layer_min,layer_max,indep_field + integer*4 ixcell,iycell,izcell + real*8 cell_prop(9) + + call adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,ldata%layer_min, + & ldata%layer_max,ldata%indep_field,cell_prop) + return + end +c================================================================================= + recursive subroutine adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,layer_min, + & layer_max,indep_field,cell_prop) + use pan_state + !use descriptor_phases + implicit none + + type(state_data), intent(inout) :: ldata + + integer*4 lev + integer*4 ixcell,iycell,izcell + integer layer_min,layer_max,indep_field + real*8 cell_prop(9) +c real*8 cell_data(9,0:7) + integer*4 j,l,lx,ly,lz + integer*4 px,py,pz + +c integer*4 ixh_last,iyh_last,izh_last + +c integer init +c data init/0/ +c save init,cell_data,ixh_last,iyh_last,izh_last ! Keep internal state + + integer*4 ixh,iyh,izh + + lev = ldata%lev_common + +c------- Error checking ----------------------------- + + if (layer_min.gt.layer_max) then + + if (layer_min-layer_max.eq.1) then ! Not necessarily bad. No octree basis functions + do j=1,9 ! required at this level and position. + cell_prop(j) = 0.0d0 ! Set returned cell_prop data to zero. + enddo + return + endif + + print*,'Warning: layer_min.gt.layer_max!' + print*,'layer_min = ',layer_min + print*,'layer_max = ',layer_max + print*,'ixcell,iycell,izcell',ixcell,iycell,izcell + + call flush(6) + stop 'Error: layer_min.gt.layer_max' + endif + + if (layer_max.gt.ldata%lev_common) then + print*,'lev_common = ',ldata%lev_common + print*,'layer_min = ',layer_min + print*,'layer_max = ',layer_max + stop 'Error: layer_max.gt.lev_common' + endif + if ((indep_field.lt.-1).or.(indep_field.gt.1)) + & stop 'Error: indep_field out of range' + +c---------------------------------------------------- +c Check which 'origin' to use. + + px = 0 + py = 0 + pz = 0 + + if (ldata%ix_rel_store+ixcell.ge.ldata%ix_per_store) px = 1 ! Crossed x-periodic bndy + if (ldata%iy_rel_store+iycell.ge.ldata%iy_per_store) py = 1 ! Crossed y-periodic bndy + if (ldata%iz_rel_store+izcell.ge.ldata%iz_per_store) pz = 1 ! Crossed z-periodic bndy +c---------------------------------------------------- + + + ixh = (ixcell+ldata%ixshift(px,py,pz) )/2 + iyh = (iycell+ldata%iyshift(px,py,pz) )/2 + izh = (izcell+ldata%izshift(px,py,pz) )/2 + + lx = mod(ixcell+ldata%ixshift(px,py,pz) ,2) + ly = mod(iycell+ldata%iyshift(px,py,pz) ,2) + lz = mod(izcell+ldata%izshift(px,py,pz) ,2) + + + l = 4*lx + 2*ly + lz ! Determine which cell is required + +cc------------------ If no new evalation is needed skip assignment ----- + if ((ldata%init.eq.1).and.(ixh.eq.ldata%ixh_last).and.(iyh.eq.ldata%iyh_last).and. + & (izh.eq.ldata%izh_last).and.(layer_min.eq.ldata%layer_min_store).and. + & (layer_max.eq.ldata%layer_max_store)) goto 24 +cc----------------------------------------------------------------------------- + + + call return_cell_props(ldata,lev,ixh,iyh,izh,px,py,pz,layer_min, + & layer_max,indep_field,ldata%cell_data) + +c Remember previous values. + + ldata%ixh_last = ixh + ldata%iyh_last = iyh + ldata%izh_last = izh + + + 24 continue + + + do j=1,9 + cell_prop(j) = ldata%cell_data(j,l) ! Copy the required data + enddo + + if (ldata%init.eq.0) ldata%init=1 + + return + end +c================================================================================= + recursive subroutine return_cell_props(ldata,lev_input,ix_half,iy_half,iz_half, + & px,py,pz,layer_min,layer_max,indep_field,cell_data) + use Rand + use pan_state + !use descriptor_phases + implicit none + type(state_data), intent(inout) :: ldata + integer lev_input,ix_half,iy_half,iz_half,px,py,pz + integer layer_min,layer_max,indep_field + real*8 cell_data(9,0:7) + + real*8 garray(0:63) + integer lev + integer*8 xarray,yarray,zarray + + integer i,istart,icell_name + + +c integer init +c data init/0/ +c save init + + + +c-------------------------------------------------------- +c--------------------------- Initialise level -1 -------- +c-------------------------------------------------------- + + if (ldata%return_cell_props_init.eq.0) then ! First time called. Set up the Legendre coefficients + ldata%return_cell_props_init = 1 ! for the root cell. This is the first term on the + call Rand_load(ldata%current_state(-1),ldata%base_state) ! right hand side of the equation in appendix C of + call return_gaussian_array(ldata,-1,8,garray) ! Jenkins 2013 that defines PANPHASIA. + ldata%exp_coeffs(1,0,-1) = garray(0) + ldata%exp_coeffs(2,0,-1) = garray(1) + ldata%exp_coeffs(3,0,-1) = garray(2) + ldata%exp_coeffs(4,0,-1) = garray(3) + ldata%exp_coeffs(5,0,-1) = garray(4) + ldata%exp_coeffs(6,0,-1) = garray(5) + ldata%exp_coeffs(7,0,-1) = garray(6) + ldata%exp_coeffs(8,0,-1) = garray(7) + + ldata%layer_min_store = layer_min + ldata%layer_max_store = layer_max + + endif + +c-------------------------------------------------------- +c---------------------------- Error checking ------------ +c-------------------------------------------------------- + + lev = lev_input-1 + + if (lev_input.ne.ldata%lev_common) stop 'Box initialised at a different level !' + if (ix_half.lt.0) then + print*,'ix_half negative',ix_half + stop 'ix_half out of range!' + endif + if (iy_half.lt.0) stop 'iy_half out of range!' + if (iz_half.lt.0) then + print*,'iz_half negative',iz_half + stop 'iz_half out of range!' + endif + + + xarray = ldata%xorigin_store(px,py,pz) + ix_half + yarray = ldata%yorigin_store(px,py,pz) + iy_half + zarray = ldata%zorigin_store(px,py,pz) + iz_half + + +c If layer_max or layer_min have changed, rebuild from the start and reset the +c recorded value of layer_max and layer_min + + if ((layer_max.ne.ldata%layer_max_store).or.(layer_min.ne.ldata%layer_min_store)) then + + if (layer_min.gt.layer_max) stop 'layer_min > layer_max : 2' + + istart = max(1,layer_min-1) + + ldata%layer_max_store = layer_max + ldata%layer_min_store = layer_min + + goto 10 + + endif + + + if ((xarray.eq.ldata%xcursor(lev)).and.(yarray.eq.ldata%ycursor(lev)).and.(zarray.eq.ldata%zcursor(lev))) return ! Nothing to do. + +c=========================================================================================================== +c------------- First determine which levels need to be (re)computed +c=========================================================================================================== + + istart = 0 + do i=lev-1,0,-1 + if ((ishft(xarray,i-lev).eq.ldata%xcursor(i)).and.(ishft(yarray,i-lev).eq.ldata%ycursor(i)).and. + & (ishft(zarray,i-lev).eq.ldata%zcursor(i))) then + istart = i+1 + goto 10 + endif + enddo + + 10 continue + + +c==================================================================================== +c------------- Now compute each level as required and update (x,y,z) cursor variables +c==================================================================================== + + do i=istart,lev + + icell_name = 0 + + ldata%xcursor(i) = ishft(xarray,i-lev) + ldata%ycursor(i) = ishft(yarray,i-lev) + ldata%zcursor(i) = ishft(zarray,i-lev) + + if (btest(ldata%xcursor(i),0)) icell_name = icell_name + 4 + if (btest(ldata%ycursor(i),0)) icell_name = icell_name + 2 + if (btest(ldata%zcursor(i),0)) icell_name = icell_name + 1 + + call reset_lecuyer_state(ldata,i,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i)) + + if (isub_spherical_function.ne.1) then + call return_gaussian_array(ldata,i,64,garray) + else + call return_oct_sf_expansion(ldata,i,lev,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i), + & 64,garray) + endif + + + call evaluate_panphasia(ldata,i,maxdim_,garray,layer_min, + & layer_max, indep_field, icell_name,cell_data,ldata%exp_coeffs) + + enddo + return + end +c================================================================================= + recursive subroutine evaluate_panphasia(ldata,nlev,maxdim,g, + & layer_min,layer_max,indep_field,icell_name,cell_data,leg_coeff) + use pan_state + implicit none +c--------------------------------------------------------------------------------- +c This subroutine calculates the Legendre block coefficients for the eight child +c cells of an octree cell. +c +c----------------- Define subroutine arguments ----------------------------------- + type(state_data), intent(inout) :: ldata + integer nlev,maxdim + integer layer_min,layer_max,indep_field + integer icell_name + real*8 leg_coeff(0:7,0:7,-1:maxdim),cell_data(0:8,0:7) + real*8 g(*) + +c----------------- Define constants using notation from appendix A of Jenkins 2013 + + real*8 a1,a2,b1,b2,b3,c1,c2,c3,c4 + + parameter(a1 = 0.5d0*sqrt(3.0d0), a2 = 0.5d0) + + parameter(b1 = 0.75d0, b2 = 0.25d0*sqrt(3.0d0)) + parameter(b3 = 0.25d0) + + parameter(c1 = sqrt(27.0d0/64.0d0), c2 = 0.375d0) + parameter(c3 = sqrt(3.0d0/64.0d0), c4 = 0.125d0) + +c----------------- Define octree variables -------------------------------- + + real*8 coeff_p000, coeff_p001, coeff_p010, coeff_p011 + real*8 coeff_p100, coeff_p101, coeff_p110, coeff_p111 + + real*8 positive_octant_lc(0:7,0:1,0:1,0:1),temp_value(0:7,0:7) + integer i,j,ix,iy,iz + integer icx,icy,icz + integer iox,ioy,ioz + real*8 parity,isig + real*8 usually_rooteighth_factor +c-------------------------------------------------------------------------- + +c------------- Set the Legendre block coefficients for the parent cell +c itself. These are either inherited from the octree above +c or set to zero depending on which levels of the octree +c have been selected to be populated with the octree +c basis functions. +c--------------------------------------------------------------------------- + if (nlev.ge.layer_min) then + coeff_p000 = leg_coeff(0,icell_name,nlev-1) + coeff_p001 = leg_coeff(1,icell_name,nlev-1) + coeff_p010 = leg_coeff(2,icell_name,nlev-1) + coeff_p011 = leg_coeff(3,icell_name,nlev-1) + coeff_p100 = leg_coeff(4,icell_name,nlev-1) + coeff_p101 = leg_coeff(5,icell_name,nlev-1) + coeff_p110 = leg_coeff(6,icell_name,nlev-1) + coeff_p111 = leg_coeff(7,icell_name,nlev-1) + else + coeff_p000 = 0.0d0 + coeff_p001 = 0.0d0 + coeff_p010 = 0.0d0 + coeff_p011 = 0.0d0 + coeff_p100 = 0.0d0 + coeff_p101 = 0.0d0 + coeff_p110 = 0.0d0 + coeff_p111 = 0.0d0 + endif + +c Apply layer_max and indep_field inputs --------------------------------- + + if (indep_field.ne.-1) then + usually_rooteighth_factor = sqrt(0.125d0) + else + usually_rooteighth_factor = 0.0d0 ! This option returns only the indep field. + endif ! For use in testing only. + + if (nlev.ge.layer_max) then + do i=1,56 + g(i) = 0.0d0 ! Set octree coefficients to zero as not required. + enddo + endif + + if (indep_field.eq.0) then ! Set the independent field to zero as not required. + do i=57,64 + g(i) = 0.0d0 + enddo + endif +c----------------------------------------------------------------------------- +c +c +c The calculations immediately below evalute the eight Legendre block coefficients for the +c child cell that is furthest from the absolute coordiate origin of the octree - we call +c this the positive octant cell. +c +c The coefficients are given by a set of matrix equations which combine the +c coefficients of the Legendre basis functions of the parent cell itself, with +c the coefficients from the octree basis functions that occupy the +c parent cell. +c +c The Legendre basis function coefficients of the parent cell are stored in +c the variables, coeff_p000 - coeff_p111 and are initialise above. +c +c The coefficients of the octree basis functions are determined by the +c first 56 entries of the array g, which is passed down into this +c subroutine. +c +c These two sources of information are combined using a set of linear equations. +c The coefficients of these linear equations are taken from the inverses or +c equivalently transposes of the matrices given in appendix A of Jenkins 2013. +c The matrices in appendix A define the PANPHASIA octree basis functions +c in terms of Legendre blocks. +c +c All of the Legendre block functions of the parent cell, and the octree basis +c functions of the parent cell share one of eight distinct symmetries with respect to +c reflection about the x1=0,x2=0,x3=0 planes (where the origin is taken as the parent +c cell centre and x1,x2,x3 are parallel to the cell edges). +c +c Each function has either purely reflectional symmetry (even parity) or +c reflectional symmetry with a sign change (odd parity) about each of the three principal +c planes through the cell centre. There are therefore 8 parity types. We can label each +c parity type with a binary triplet. So 000 is pure reflectional symmetry about +c all of the principal planes. +c +c In the code below the parent cell Legendre block functions, and octree functions are +c organised into eight groups each with eight members. Each group has a common +c parity type. +c +c We keep the contributions of each parity type to each of the eight Legendre basis +c functions occupying the positive octant cell separate. Once they have all been +c computed, we can apply the different symmetry operations and determine the +c Legendre block basis functions for all eight child cells at the same time. +c--------------------------------------------------------------------------------------- +c 000 parity + + positive_octant_lc(0, 0,0,0) = 1.0d0*coeff_p000 + positive_octant_lc(1, 0,0,0) = -1.0d0*g(1) + positive_octant_lc(2, 0,0,0) = -1.0d0*g(2) + positive_octant_lc(3, 0,0,0) = 1.0d0*g(3) + positive_octant_lc(4, 0,0,0) = -1.0d0*g(4) + positive_octant_lc(5, 0,0,0) = 1.0d0*g(5) + positive_octant_lc(6, 0,0,0) = 1.0d0*g(6) + positive_octant_lc(7, 0,0,0) = -1.0d0*g(7) + +c 100 parity + + positive_octant_lc(0, 1,0,0) = a1*coeff_p100 - a2*g(8) + positive_octant_lc(1, 1,0,0) = g(9) + positive_octant_lc(2, 1,0,0) = g(10) + positive_octant_lc(3, 1,0,0) = -g(11) + positive_octant_lc(4, 1,0,0) = a2*coeff_p100 + a1*g(8) + positive_octant_lc(5, 1,0,0) = -g(12) + positive_octant_lc(6, 1,0,0) = -g(13) + positive_octant_lc(7, 1,0,0) = g(14) + +c 010 parity + + positive_octant_lc(0, 0,1,0) = a1*coeff_p010 - a2*g(15) + positive_octant_lc(1, 0,1,0) = g(16) + positive_octant_lc(2, 0,1,0) = a2*coeff_p010 + a1*g(15) + positive_octant_lc(3, 0,1,0) = -g(17) + positive_octant_lc(4, 0,1,0) = g(18) + positive_octant_lc(5, 0,1,0) = -g(19) + positive_octant_lc(6, 0,1,0) = -g(20) + positive_octant_lc(7, 0,1,0) = g(21) + + +c 001 parity + + positive_octant_lc(0, 0,0,1) = a1*coeff_p001 - a2*g(22) + positive_octant_lc(1, 0,0,1) = a2*coeff_p001 + a1*g(22) + positive_octant_lc(2, 0,0,1) = g(23) + positive_octant_lc(3, 0,0,1) = -g(24) + positive_octant_lc(4, 0,0,1) = g(25) + positive_octant_lc(5, 0,0,1) = -g(26) + positive_octant_lc(6, 0,0,1) = -g(27) + positive_octant_lc(7, 0,0,1) = g(28) + +c 110 parity + + positive_octant_lc(0, 1,1,0) = b1*coeff_p110 - b2*g(29) + b3*g(30) - b2*g(31) + positive_octant_lc(1, 1,1,0) = -g(32) + positive_octant_lc(2, 1,1,0) = b2*coeff_p110 - b3*g(29) - b2*g(30) + b1*g(31) + positive_octant_lc(3, 1,1,0) = g(33) + positive_octant_lc(4, 1,1,0) = b2*coeff_p110 + b1*g(29) + b2*g(30) + b3*g(31) + positive_octant_lc(5, 1,1,0) = g(34) + positive_octant_lc(6, 1,1,0) = b3*coeff_p110 + b2*g(29) - b1*g(30) - b2*g(31) + positive_octant_lc(7, 1,1,0) = -g(35) + + +c 011 parity + + positive_octant_lc(0, 0,1,1) = b1*coeff_p011 - b2*g(36) + b3*g(37) - b2*g(38) + positive_octant_lc(1, 0,1,1) = b2*coeff_p011 - b3*g(36) - b2*g(37) + b1*g(38) + positive_octant_lc(2, 0,1,1) = b2*coeff_p011 + b1*g(36) + b2*g(37) + b3*g(38) + positive_octant_lc(3, 0,1,1) = b3*coeff_p011 + b2*g(36) - b1*g(37) - b2*g(38) + positive_octant_lc(4, 0,1,1) = -g(39) + positive_octant_lc(5, 0,1,1) = g(40) + positive_octant_lc(6, 0,1,1) = g(41) + positive_octant_lc(7, 0,1,1) = -g(42) + +c 101 parity + + positive_octant_lc(0, 1,0,1) = b1*coeff_p101 - b2*g(43) + b3*g(44) - b2*g(45) + positive_octant_lc(1, 1,0,1) = b2*coeff_p101 - b3*g(43) - b2*g(44) + b1*g(45) + positive_octant_lc(2, 1,0,1) = -g(46) + positive_octant_lc(3, 1,0,1) = g(47) + positive_octant_lc(4, 1,0,1) = b2*coeff_p101 + b1*g(43) + b2*g(44) + b3*g(45) + positive_octant_lc(5, 1,0,1) = b3*coeff_p101 + b2*g(43) - b1*g(44) - b2*g(45) + positive_octant_lc(6, 1,0,1) = g(48) + positive_octant_lc(7, 1,0,1) = -g(49) + +c 111 parity + + positive_octant_lc(0, 1,1,1) = c1*coeff_p111 - c2*g(50) - c2*g(51) - c2*g(52) + c3*g(53) + c3*g(54) + c3*g(55) - c4*g(56) + positive_octant_lc(1, 1,1,1) = c2*coeff_p111 + c1*g(50) - c2*g(51) + c2*g(52) - c3*g(53) + c3*g(54) + c4*g(55) + c3*g(56) + positive_octant_lc(2, 1,1,1) = c2*coeff_p111 + c2*g(50) + c1*g(51) - c2*g(52) - c3*g(53) - c4*g(54) + c3*g(55) - c3*g(56) + positive_octant_lc(3, 1,1,1) = c3*coeff_p111 - c3*g(50) - c3*g(51) + c4*g(52) - c1*g(53) - c2*g(54) - c2*g(55) - c2*g(56) + positive_octant_lc(4, 1,1,1) = c2*coeff_p111 - c2*g(50) + c2*g(51) + c1*g(52) + c4*g(53) - c3*g(54) + c3*g(55) + c3*g(56) + positive_octant_lc(5, 1,1,1) = c3*coeff_p111 + c3*g(50) - c4*g(51) - c3*g(52) + c2*g(53) - c1*g(54) - c2*g(55) + c2*g(56) + positive_octant_lc(6, 1,1,1) = c3*coeff_p111 + c4*g(50) + c3*g(51) + c3*g(52) + c2*g(53) + c2*g(54) - c1*g(55) - c2*g(56) + positive_octant_lc(7, 1,1,1) = c4*coeff_p111 - c3*g(50) + c3*g(51) - c3*g(52) - c2*g(53) + c2*g(54) - c2*g(55) + c1*g(56) +c-------------------------------------------------------------------------------------------- +c +c +c We now calculate the Legendre basis coefficients for all eight child cells +c by applying the appropriate reflectional parities to the coefficients +c calculated above for the positive octant child cell. +c +c See equations A2 and A3 in appendix A of Jenkins 2013. +c +c The reflectional parity is given by (ix,iy,iz) loops below. +c +c The (icx,icy,icz) loops below, loop over the eight child cells. +c +c The positive octant child cell is given below by (icx=icy=icz=0) or i=7. +c +c The combination ix*icx +iy*icy +iz*icz is either even or odd, depending +c on whether the parity change is even or odd. +c +c The variables iox,ioy,ioz are used to loop over the different +c types of Legendre basis function. +c +c The combination iox*icx + ioy*icy + ioz*icz is either even and odd +c and identifies which coefficients keep or change sign respectively +c due to a pure reflection about the principal planes. +c-------------------------------------------------------------------------------------------- + + do iz=0,7 + do iy=0,7 + temp_value(iy,iz) = 0.0d0 ! Zero temporary sums + enddo + enddo +c-------------------------------------------------------------------------------------------- + do iz=0,1 ! Loop over z parity (0=keep sign, 1=change sign) + do iy=0,1 ! Loop over y parity (0=keep sign, 1=change sign) + do ix=0,1 ! Loop over x parity (0=keep sign, 1=change sign) + + + do icx=0,1 ! Loop over x-child cells + do icy=0,1 ! Loop over y-child cells + do icz=0,1 ! Loop over z-child cells + + if (mod(ix*icx+iy*icy+iz*icz,2).eq.0) then + parity = 1.0d0 + else + parity =-1.0d0 + endif + + i = 7 - 4*icx -2*icy - icz ! Calculate which child cell this is. + + + do iox=0,1 ! Loop over Legendre basis function type + do ioy=0,1 ! Loop over Legendre basis function type + do ioz=0,1 ! Loop over Legendre basis function type + + j = 4*iox + 2*ioy + ioz + + if (mod(iox*icx + ioy*icy + ioz*icz,2).eq.0) then + isig = parity + else + isig = -parity + endif + + temp_value(j,i) = temp_value(j,i) + isig*positive_octant_lc(j,ix,iy,iz) + + enddo + enddo + enddo + + enddo + enddo + enddo + + enddo + enddo + enddo + + +c Assign values of the output variables + + do i=0,7 + do j=0,7 + leg_coeff(j,i,nlev) = temp_value(j,i)*usually_rooteighth_factor + cell_data(j,i) = leg_coeff(j,i,nlev) + enddo + enddo + +c Finally set the independent field values + + cell_data(8,0) = g(57) + cell_data(8,1) = g(58) + cell_data(8,2) = g(59) + cell_data(8,3) = g(60) + cell_data(8,4) = g(61) + cell_data(8,5) = g(62) + cell_data(8,6) = g(63) + cell_data(8,7) = g(64) + + + return + end +c================================================================================= + recursive subroutine reset_lecuyer_state(ldata,lev,xcursor,ycursor,zcursor) + use pan_state + implicit none + + type(state_data), intent(inout) :: ldata + integer lev + integer*8 xcursor,ycursor,zcursor + +c integer indmin,indmax +c parameter (indmin=-1, indmax=60) +c integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax) +c save p_xcursor,p_ycursor,p_zcursor + integer i +c integer init +c data init/0/ +c save init + + if (ldata%reset_lecuyer_state_init.eq.0) then ! Initialise p_cursor variables with + ldata%reset_lecuyer_state_init = 1 ! negative values. + do i=indmin,indmax + ldata%p_xcursor(i) = -9999 + ldata%p_ycursor(i) = -9999 + ldata%p_zcursor(i) = -9999 + enddo + endif + + if ( (xcursor.eq.ldata%p_xcursor(lev)).and.(ycursor.eq.ldata%p_ycursor(lev)).and. + & (zcursor.eq.ldata%p_zcursor(lev)+1)) then + ldata%p_xcursor(lev) = xcursor + ldata%p_ycursor(lev) = ycursor + ldata%p_zcursor(lev) = zcursor + return + endif + + call advance_current_state(ldata,lev,xcursor,ycursor,zcursor) + + ldata%p_xcursor(lev) = xcursor + ldata%p_ycursor(lev) = ycursor + ldata%p_zcursor(lev) = zcursor + + + return + end +c================================================================================= + recursive subroutine advance_current_state(ldata,lev,x,y,z) + use Rand + use pan_state + !use descriptor_phases + implicit none + + type(state_data), intent(inout) :: ldata + + integer lev + integer*8 x,y,z + + integer*8 lev_range + + TYPE(Rand_offset) :: offset1,offset2 + TYPE(Rand_offset) :: offset_x,offset_y,offset_z,offset_total + + integer ndiv,nrem + integer*8 ndiv8,nrem8 + integer nfactor + parameter (nfactor=291071) ! Value unimportant except has to be > 262144 + + +c----- First some error checking ------------------------------------------ + if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (2)' + + lev_range = 2_dint**lev + + + if ((x.lt.0).or.(x.ge.lev_range)) then + print*,'x,lev,lev_range',x,lev,lev_range + call flush(6) + stop 'x out of range!' + endif + if ((y.lt.0).or.(y.ge.lev_range)) then + print*,'y,lev,lev_range',y,lev,lev_range + stop 'y out of range!' + endif + if ((z.lt.0).or.(z.ge.lev_range)) stop 'z out of range!' +c---------------------------------------------------------------------------- +c +c Note the Rand_set_offset subroutine takes an integer*4 value +c for the offset value. For this reason we need to use integer*4 +c values - ndiv,nrem. As a precaution an explicit check is made +c to be sure that these values are calculated correctly. +c--------------------------------------------------------------------------- + + + call Rand_load(ldata%current_state(lev),ldata%base_lev_start(1,lev)) + + if (lev.eq.0) return + +c Calculate z-offset + + ndiv = z/nfactor + nrem = z - ndiv*nfactor + ndiv8 = ndiv + nrem8 = nrem + + if (ndiv8*nfactor+nrem8.ne.z) stop 'Error in z ndiv nrem' + + call Rand_set_offset(offset1,ndiv) + offset1 = Rand_mul_offset(offset1,nfactor) + call Rand_set_offset(offset2,nrem) + offset2 = Rand_add_offset(offset1,offset2) + offset_z = Rand_mul_offset(offset2,nmulti_) + +c Calculate y-offset + + ndiv = y/nfactor + nrem = y - ndiv*nfactor + ndiv8 = ndiv + nrem8 = nrem + + if (ndiv8*nfactor+nrem8.ne.y) stop 'Error in y ndiv nrem' + + offset1 = Rand_mul_offset(ldata%poweroffset(lev),ndiv) + offset1 = Rand_mul_offset(offset1,nfactor) + offset2 = Rand_mul_offset(ldata%poweroffset(lev),nrem) + offset_y = Rand_add_offset(offset1,offset2) + +c Calculate x-offset + + ndiv = x/nfactor + nrem = x - ndiv*nfactor + ndiv8 = ndiv + nrem8 = nrem + + if (ndiv8*nfactor+nrem8.ne.x) then + print*,'ndiv,nfactor,nrem,x',ndiv,nfactor,nrem,x + print*,'ndiv*nfactor+nrem',ndiv*nfactor+nrem + print*,'x-ndiv*nfactor-nrem',x-ndiv*nfactor-nrem + stop 'Error in x ndiv nrem' + endif + + offset1 = Rand_mul_offset(ldata%poweroffset(2*lev),ndiv) + offset1 = Rand_mul_offset(offset1,nfactor) + offset2 = Rand_mul_offset(ldata%poweroffset(2*lev),nrem) + offset_x = Rand_add_offset(offset1,offset2) + + offset1 = Rand_add_offset(offset_x,offset_y) + offset_total = Rand_add_offset(offset1, offset_z) + + ldata%current_state(lev) = Rand_boost(ldata%current_state(lev),offset_total) + + return + end +c================================================================================= + recursive subroutine return_gaussian_array(ldata,lev,ngauss,garray) + use Rand + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + integer lev,ngauss + real*8 garray(0:*) + TYPE(Rand_state) :: state + real*8 PI + parameter (PI=3.1415926535897932384d0) + real*8 branch + parameter (branch=1.d-6) + integer iloop + + real*8 temp,mag,ang + integer i + + if (mod(ngauss,2).ne.0) + & stop 'Error in return_gaussian_array - even pairs only' + +c First obtain a set of uniformly distributed pseudorandom numbers +c between 0 and 1. The method used is described in detail in +c appendix B of Jenkins 2013. + + do i=0,ngauss-1 + call Rand_real(garray(i),ldata%current_state(lev)) + + if (garray(i).lt.branch) then + garray(i) = branch + state = Rand_boost(ldata%current_state(lev),ldata%superjump) + iloop = 0 + 10 continue + call Rand_real(temp,state) + iloop = iloop+1 + if (temp.lt.branch) then + garray(i) = garray(i)*branch + state = Rand_boost(state,ldata%superjump) + if (iloop.gt.100) then + print*,'Too may iterations in return_gaussian_array!' + call flush(6) + stop + endif + goto 10 + else + garray(i) = garray(i)*temp + endif + endif + enddo + +c Apply Box-Muller transformation to create pairs of Gaussian +c pseudorandom numbers. + + do i=0,ngauss/2-1 + + mag = sqrt(-2.0d0*log(garray(2*i))) + ang = 2.0d0*PI*garray(2*i+1) + + garray(2*i) = mag*cos(ang) + garray(2*i+1) = mag*sin(ang) + + enddo + end +c================================================================================= + recursive subroutine parse_descriptor(string,l,ix,iy,iz,side1,side2,side3,check_int,name) + implicit none + integer nchar + parameter(nchar=100) + character*100 string + integer*4 l,side1,side2,side3,ierror + integer*8 ix,iy,iz + integer*8 check_int + character*20 name + + + integer i,ip,iq,ir + + ierror = 0 + + ip = 1 + do while (string(ip:ip).eq.' ') + ip = ip + 1 + enddo + + if (string(ip:ip+7).ne.'[Panph1,') then + ierror = 1 + print*,string(ip:ip+7) + goto 10 + endif + + ip = ip+8 + if (string(ip:ip).ne.'L') then + ierror = 2 + goto 10 + endif + + ip = ip+1 + + iq = ip + scan( string(ip:nchar),',') -1 + + if (ip.eq.iq) then + ierror = 3 + goto 10 + endif + + + read (string(ip:iq),*) l + + ip = iq+1 + + if (string(ip:ip).ne.'(') then + ierror = 4 + goto 10 + endif + + ip = ip+1 + + iq = ip + scan( string(ip:nchar),')') -2 + + read(string(ip:iq),*) ix,iy,iz + + ip = iq+2 + + if (string(ip:ip).ne.',') then + ierror = 5 + goto 10 + endif + + ip = ip+1 + if ((string(ip:ip).ne.'S').and.(string(ip:ip).ne.'D')) then + ierror = 6 + goto 10 + endif + + if (string(ip:ip).eq.'S') then + ip = ip + 1 + iq = ip + scan( string(ip:nchar),',') -2 + read (string(ip:iq),*) side1 + side2 = side1 + side3 = side1 + iq = iq+1 + if (string(iq:iq+2).ne.',CH') then + print*,string(ip:iq),string(iq:iq+2) + ierror = 6 + goto 10 + endif + else + ip = ip + 1 + if (string(ip:ip).ne.'(') then + ierror = 7 + goto 10 + endif + + + ip = ip + 1 + iq = ip + scan( string(ip:nchar),')') -2 + read (string(ip:iq),*) side1,side2,side3 + + iq = iq + 1 + + if (string(iq:iq).ne.')') then + ierror = 8 + goto 10 + endif + + iq = iq + 1 + + if (string(iq:iq+2).ne.',CH') then + ierror = 9 + goto 10 + endif + + endif + + ip = iq + 3 + + iq = ip + scan( string(ip:nchar),',') -2 + + read (string(ip:iq),*) check_int + + ip = iq + 1 + + if (string(ip:ip).ne.',') then + ierror = 10 + goto 10 + endif + + ip = ip+1 + + ir = ip + scan( string(ip:nchar),']') -2 + + iq = min(ir,ip+19) + + do i=1,20 + name(i:i)=' ' + enddo + + do i=ip,iq + name(i-ip+1:i-ip+1) = string(i:i) + enddo + + iq = ir + 1 + + if (string(iq:iq).ne.']') then + ierror = 11 + goto 10 + endif + + + 10 continue + + if (ierror.eq.0) return + + print*,'Error reading panphasian descriptor. Error number:',ierror + stop + + return + end +c================================================================================= + recursive subroutine compose_descriptor(l,ix,iy,iz,side,check_int,name,string) + implicit none + integer nchar + parameter(nchar=100) + character*100,intent(out)::string + character*20 name + integer*4 l,ltemp + integer*8 side + integer*8 ix,iy,iz + integer*8 check_int + + character*50 temp1,temp2,temp3,temp4,temp5,temp6 + integer lnblnk + + integer ip1,ip2,ip3,ip4,ip5,ip6 + + ltemp = l + + 5 continue + if ((mod(ix,2).eq.0).and.(mod(iy,2).eq.0).and.(mod(iz,2).eq.0).and.(mod(side,2).eq.0)) then + ix = ix/2 + iy = iy/2 + iz = iz/2 + side = side/2 + ltemp = ltemp-1 + goto 5 + endif + + + write (temp1,*) ltemp + ip1= scan(temp1,'0123456789') + write (temp2,*) ix + ip2= scan(temp2,'0123456789') + write (temp3,*) iy + ip3= scan(temp3,'0123456789') + write (temp4,*) iz + ip4= scan(temp4,'0123456789') + write (temp5,*) side + ip5= scan(temp5,'0123456789') + write (temp6,*) check_int + ip6= scan(temp6,'-0123456789') + + + string='[Panph1,L'//temp1(ip1:lnblnk(temp1))//',('//temp2(ip2:lnblnk(temp2)) + & //','//temp3(ip3:lnblnk(temp3))//','//temp4(ip4:lnblnk(temp4))//'),S' + & // temp5(ip5:lnblnk(temp5))//',CH'//temp6(ip6:lnblnk(temp6))// + & ','//name(1:lnblnk(name))//']' + + return + + end +c================================================================================= + recursive subroutine validate_descriptor(ldata,string,MYID,check_number) + use pan_state + implicit none + + type(state_data), intent(inout) :: ldata + character*100 string + integer*8 check_number + integer MYID + + character*20 phase_name + integer*4 lev + + integer*8 ix_abs,iy_abs,iz_abs + integer*4 ix_base,iy_base,iz_base + + + integer*8 xval,yval,zval + integer val_state(5) + + TYPE(Rand_state) :: state + + real*8 rand_num + integer*8 mconst,check_total,check_rand + parameter(mconst = 2147483647_Dint) + integer ascii_list(0:255) + integer*8 maxco + integer i + integer*8 ii + integer lnblnk + + + + call parse_descriptor(string,lev,ix_abs,iy_abs,iz_abs, + & ix_base,iy_base,iz_base,check_rand,phase_name) + +c------------------------------------------------------------------------- +c Some basic checking +c------------------------------------------------------------------------- + if ((lev.lt.0).or.(lev.gt.maxlev_)) then + print*,'lev,maxlev',lev,maxlev_ + call flush(6) + stop 'Level out of range! (3)' + endif + + if ((mod(ix_abs,2).eq.0).and.(mod(iy_abs,2).eq.0).and.(mod(iz_abs,2).eq.0).and. + & (mod(ix_base,2).eq.0).and.(mod(iy_base,2).eq.0).and.(mod(iz_base,2).eq.0)) + & stop 'Parameters not at lowest level' + + + maxco = 2_dint**lev + + if (ix_abs.lt.0) stop 'Error: ix_abs negative (2)' + if (iy_abs.lt.0) stop 'Error: iy_abs negative (2)' + if (iz_abs.lt.0) stop 'Error: iz_abs negative (2)' + + + if (ix_abs+ix_base.ge.maxco) + & stop 'Error: ix_abs + ix_per out of range.' + if (iy_abs+iy_base.ge.maxco) + & stop 'Error: iy_abs + iy_per out of range.' + if (iz_abs+iz_base.ge.maxco) + & stop 'Error: iz_abs + iz_per out of range.' + + check_total = 0 + + call initialise_panphasia(ldata) +c First corner + xval = ix_abs + ix_base - 1 + yval = iy_abs + zval = iz_abs + call advance_current_state(ldata,lev,xval,yval,zval) + call Rand_real(rand_num,ldata%current_state(lev)) + call Rand_save(val_state,ldata%current_state(lev)) + check_total = check_total + val_state(5) + if (MYID.eq.0) print*,'--------------------------------------' + if (MYID.eq.0) print*,'X-corner rand = ',rand_num + if (MYID.eq.0) print*,'State:',val_state +c Second corner + xval = ix_abs + yval = iy_abs + iy_base - 1 + zval = iz_abs + call advance_current_state(ldata,lev,xval,yval,zval) + call Rand_real(rand_num,ldata%current_state(lev)) + call Rand_save(val_state,ldata%current_state(lev)) + check_total = check_total + val_state(5) + if (MYID.eq.0) print*,'Y-corner rand = ',rand_num + if (MYID.eq.0) print*,'State:',val_state +c Third corner + xval = ix_abs + yval = iy_abs + zval = iz_abs + iz_base - 1 + call advance_current_state(ldata,lev,xval,yval,zval) + call Rand_real(rand_num,ldata%current_state(lev)) + call Rand_save(val_state,ldata%current_state(lev)) + check_total = check_total + val_state(5) + if (MYID.eq.0) print*,'z-corner rand = ',rand_num + if (MYID.eq.0) print*,'State:',val_state + if (MYID.eq.0) print*,'--------------------------------------' + +c Now encode the name. An integer for each ascii character is generated +c starting from the state which gives r0 - the first random number in +c Panphasia. The integer is in the range 0 - m-1. +c After making the list, then loop over non-blank characters +c in the name and take the ascii value, and sum the associated numbers. +c To avoid simple anagrams giving the same score, weight the integer +c by position in the string. Finally take mod m - to give the +c check number. + + call Rand_load(state,ldata%base_state) + + do i=0,255 + call Rand_real(rand_num,state) + call Rand_save(val_state,state) + ascii_list(i) = val_state(5) + enddo + + + + do ii=1,lnblnk(phase_name) + check_total = check_total + ii*ascii_list(iachar(phase_name(ii:ii))) + enddo + + + check_total = mod(check_total,mconst) + if (check_rand.eq.-999) then ! override the safety check number. + check_number = check_total + return + else + if (check_rand.ne.check_total) then + print*,'Inconsistency in the input panphasia descriptor ',MYID + print*,'Check_rand = ',check_rand + print*,'val_state(5) =',val_state(5) + print*,'xval,yval,zval',xval,yval,zval + print*,'lev_val = ',lev + call flush(6) + stop + endif + endif + + + return + end +c================================================================================= + recursive subroutine generate_random_descriptor(ldata,string) + use Rand + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + character*100 string + character*100 instring + character*20 name + integer*4 unix_timestamp + + real*8 lbox + real*8 lpanphasia + parameter (lpanphasia = 25000000.0) ! Units of Mpc/h + integer level + integer*8 cell_dim + integer val_state(5) + + TYPE(Rand_state) :: state + TYPE(Rand_offset) :: offset + + real*8 rand_num1,rand_num2 + integer*8 mconst,check_int + parameter(mconst = 2147483647_Dint) + integer*8 mfac,imajor,iminor + parameter(mfac=33554332_Dint) + integer ascii_list(0:255) + integer i,lnblnk + integer*8 ii + integer mult + + integer*8 ixco,iyco,izco,irange + + print*,'___________________________________________________________' + print* + print*,' Generate a random descriptor ' + print* + print*,'The code uses the time (the unix timestamp) plus some extra ' + print*,'information personal to the user to choose a random region ' + print*,'within PANPHASIA. The user must also specify the side length' + print*,'of the cosmological volume. The code assumes that the whole of' + print*,'PANPHASIA is 25000 Gpc/h on a side and selects an appropriate ' + print*,'level in the octree for the descriptor. ' + print*,'Assuming this scaling the small scale power is defined down ' + print*,'to a mass scale of around 10^{-12} solar masses.' + print* + print*,'The user must also specify a human readable label for the ' + print*,'descriptor of less than 21 characters.' + print*,'___________________________________________________________' + print* + print*,'Press return to continue ' + read (*,*) + print* + print*,'___________________________________________________________' + print*,'Enter the box side-length in Mpc/h units' + read (*,*) lbox + print*,'___________________________________________________________' + print* + print* + 5 continue + print*,'Enter up to 20 character name to label the descriptor (no spaces)' + read (*,'(a)') name + if ((len_trim(instring).lt.21).or.(scan(name,' ').le.len_trim(name))) goto 5 + print*,'___________________________________________________________' + print* + print* + print*,'___________________________________________________________' + print*,'The phases for the simulation are described by whole octree ' + print*,'cells. Enter an odd integer that defines the number of cells ' + print*,'you require in one dimension. Choose this number carefully ' + print*,'as it will limit the possible 1-D sizes of the of the Fourier ' + print*,'transforms that can be used to make initial conditions to a product ' + print*,'of this integer times any power of two. In which case the only' + print*,'choice is 1.)' + print*,'(I would recommend 3 unless the initial condition code is' + print*,'incapable of using grid sizes that are not purely powers of two.' + print*,'___________________________________________________________' + print* + 7 continue + print*,'Enter number of octree cells on an edge (positive odd number only) ' + read (*,*) cell_dim + if ((cell_dim.le.0).or.(mod(cell_dim,2).eq.0)) goto 7 + print*,'___________________________________________________________' + call system('date +%s>tempfile_42526037646') + open(16,file='tempfile_42526037646',status='old') + read (16,*) unix_timestamp + close(16) + call system('/bin/rm tempfile_42526037646') + + print*,'Unix_timestamp determined. Value: ',unix_timestamp + print*,'___________________________________________________________' + print* + print* + print* + print*,'___________________________________________________________' + print*,'The code has just read the unix timestamp and will use this' + print*,'to help choose a random region in PANPHASIA. Although it is' + print*,'perhaps unlikely that someone else is also running this code at ' + print*,'the same time to the nearest second, to make it more likely' + print*,' still that the desciptor to be generated is unique' + print*,'please enter your name or some other piece of information' + print*,'below that you think is unlikely to be used by anyone else' + print*,'___________________________________________________________' + + print* + + 10 continue + print*,'Please enter your name (a minimum of six characters)' + read (*,'(a)') instring !' + if (len_trim(instring).lt.6) goto 10 + + level = int(log10(dble(cell_dim)*lpanphasia/lbox)/log10(2.0d0)) + + if (level.gt.50) stop 'level >50 ' + + + +c 'd' lines allow the generation of a large set of +c descriptors. Use to check that they are randomly +c positioned over the available volume. + + +c First use the unix timestamp to initialises the +c random generator. + + call Rand_seed(state,unix_timestamp) + + call Rand_save(ldata%base_state,state) + + +c First generate an integer from the user data. + call Rand_load(state,ldata%base_state) + + do i=0,255 + call Rand_real(rand_num1,state) + call Rand_save(val_state,state) + ascii_list(i) = val_state(5) + enddo + + call Rand_set_offset(offset,1) + + do ii=1,lnblnk(instring) + mult = mod(ii*ascii_list(iachar(instring(ii:ii))),mconst) + offset = Rand_mul_offset(offset,mult) + enddo + + call Rand_load(state,ldata%base_state) + state = Rand_boost(state,offset) ! Starting point for choosing location. + + 20 continue + + irange = 2_Dint**level + imajor = irange/mfac + iminor = mod(irange,mfac) + + call Rand_real(rand_num1,state) + call Rand_real(rand_num2,state) + + ixco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor) + + if (ixco+cell_dim.ge.irange) goto 20 ! Invalid descriptor + + call Rand_real(rand_num1,state) + call Rand_real(rand_num2,state) + + iyco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor) + + if (iyco+cell_dim.ge.irange) goto 20 ! Invalid descriptor + + call Rand_real(rand_num1,state) + call Rand_real(rand_num2,state) + + izco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor) + + if (izco+cell_dim.ge.irange) goto 20 ! Invalid descriptor + + +c Value of the check digit is not known. Use validate_descriptor to compute it. + + check_int = -999 ! Special value required to make validate_descriptor + ! return the check digit. + + call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string) + + call validate_descriptor(ldata,string,-1,check_int) + + call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string) + + + return + end +c================================================================================= + recursive subroutine demo_basis_function_allocator + + implicit none + integer nmax + parameter (nmax=10) + + integer*4 wn_level(nmax) + + integer*8 ix_abs(nmax),iy_abs(nmax),iz_abs(nmax) + integer*8 ix_per(nmax),iy_per(nmax),iz_per(nmax) + integer*8 ix_rel(nmax),iy_rel(nmax),iz_rel(nmax) + integer*8 ix_dim(nmax),iy_dim(nmax),iz_dim(nmax) + + integer ix,iy,iz,nref + integer layer_min,layer_max,indep_field + + + integer*8 itot_int,itot_ib + + integer inv_open + +c Assign some trial values + + nref = 3 + inv_open=9 + + wn_level(1) = 22 + + ix_abs(1) = 2000000 + iy_abs(1) = 1500032 + iz_abs(1) = 2500032 + + ix_per(1) = 768 + iy_per(1) = 768 + iz_per(1) = 768 + + ix_rel(1) = 0 + iy_rel(1) = 0 + iz_rel(1) = 0 + + ix_dim(1) = 768 + iy_dim(1) = 768 + iz_dim(1) = 768 + + + wn_level(2) = 23 + + ix_abs(2) = 4000000 + iy_abs(2) = 3000064 + iz_abs(2) = 5000064 + + ix_per(2) = 1536 + iy_per(2) = 1536 + iz_per(2) = 1536 + + ix_rel(2) = 256 + iy_rel(2) = 16 + iz_rel(2) = 720 + + ix_dim(2) = 768 + iy_dim(2) = 768 + iz_dim(2) = 768 + + + wn_level(3) = 24 + + ix_abs(3) = 8000000 + iy_abs(3) = 6000128 + iz_abs(3) = 10000128 + + ix_per(3) = 3072 + iy_per(3) = 3072 + iz_per(3) = 3072 + + ix_rel(3) = 896 + iy_rel(3) = 432 + iz_rel(3) = 1840 + + ix_dim(3) = 768 + iy_dim(3) = 768 + iz_dim(3) = 768 + + + itot_int = 0 + itot_ib = 0 + + + + + open(10,file='ascii_dump_r1',status='unknown') + + ix=320 + do iy=0,767 + do iz=0,767 + call layer_choice(ix,iy,iz,1,nref,ix_abs,iy_abs,iz_abs, + & ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,inv_open,layer_min,layer_max,indep_field) + write(10,*) iy,iz,layer_min,layer_max,indep_field + enddo + enddo + close(10) + + open(10,file='ascii_dump_r2',status='unknown') + + ix=384 + do iy=0,767 + do iz=0,767 + call layer_choice(ix,iy,iz,2,nref,ix_abs,iy_abs,iz_abs, + & ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,inv_open,layer_min,layer_max,indep_field) + write(10,*) iy,iz,layer_min,layer_max,indep_field + enddo + enddo + close(10) + + open(10,file='ascii_dump_r3',status='unknown') + + ix=384 + do iy=0,767 + do iz=0,767 + call layer_choice(ix,iy,iz,3,nref,ix_abs,iy_abs,iz_abs, + & ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,inv_open,layer_min,layer_max,indep_field) + write(10,*) iy,iz,layer_min,layer_max,indep_field + enddo + enddo + close(10) + end +c================================================================================= + recursive subroutine layer_choice(ix0,iy0,iz0,iref,nref, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,x_fact,layer_min,layer_max,indep_field) + implicit none + + integer ix0,iy0,iz0,iref,nref,isize,ibase + integer ix,iy,iz,irefplus + integer ione + + integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref) + integer*8 ix_per(nref),iy_per(nref),iz_per(nref) + integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref) + integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref) + + integer wn_level(nref) + integer layer_min,layer_max,indep_field,x_fact + integer idebug + + + integer interior,iboundary + + if (iref.eq.9999) then + idebug = 1 + else + idebug = 0 + endif + + ione = 1 + + irefplus = min(iref+1,nref) + + if (nref.eq.1) then ! Deal with simplest case + layer_min = 0 + layer_max = wn_level(1) + indep_field = 1 + if (idebug.eq.1) print*,'return 1' + return + endif + +c----------- Case of the top periodic refinement. For this refinement layer_min=0 as +c----------- all the larger basis functions must be included. By default layer_max +c----------- is set to wn_level(1) so all basis functions are included. A check is +c----------- made to determine if the lowest basis function can be included in the +c----------- next refinement. If it can the same process is repeated for the next +c----------- largest basis function and this is repeated until a failure occurs. + + if ((iref.eq.1).and.(nref.gt.1)) then + ibase = 1 + 10 continue + + ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref) + iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref) + iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref) + isize = ishft(ione,ibase) + + call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + + if ((interior.eq.1).and.(iboundary.eq.1)) then + ibase = ibase + 1 + goto 10 + endif + + layer_min = 0 + layer_max = wn_level(iref) - ibase + 1 + if (layer_max.ne.wn_level(iref)) then + indep_field = 0 + else + indep_field = 1 + endif + + if (idebug.eq.1) then + print*,'iref,wn_level(iref)',iref,wn_level(iref) + print*,'Return 2',layer_min,layer_max,indep_field + endif + + return + endif +c------------------------------------------------------------------------------------------ +c------------------------------------------------------------------------------------------ + + +c----------- For second or higher refinement determine layer_min by reference +c----------- to itself. In this case the loop continues until a basis function +c------------ is found which fits in a larger refinement + + ibase = 1 + + 20 continue + + + ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref) + iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref) + iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref) + isize = ishft(ione,ibase) + + call inref(ix,iy,iz,isize,iref,iref,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + + if ((interior.eq.1).and.(iboundary.eq.1)) then + ibase = ibase + 1 + goto 20 + endif + + layer_min = wn_level(iref) - max(ibase-2,0) ! Take last suitable refinement + + +c----------- For an intermediate refinement define layer_max by reference to +c----------- the next refinement + + if (iref.lt.nref) then + ibase = 1 + + 30 continue + + ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref) + iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref) + iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref) + isize = ishft(ione,ibase) + + call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + + if ((interior.eq.1).and.(iboundary.eq.1)) then + ibase = ibase + 1 + goto 30 + endif + + layer_max = wn_level(iref) - ibase + 1 + + if (layer_min.eq.wn_level(iref)) then + indep_field = 1 + else + indep_field = 0 + endif + else + layer_max = wn_level(iref) + indep_field = 1 + endif + + if (idebug.eq.1) then + print*,'Return 3' + print*,'layer_min,layer_max,indep_field',layer_min,layer_max,indep_field + print*,'interior,iboundary',interior,iboundary + print*,'ibase = ',ibase + print*,'iref,nref,wn_level(iref)',iref,nref,wn_level(iref) + endif + + + return + + end + + + + +c The function takes a given basis function specified by a corner ixc,iyc,izc +c and a size isz at level wn_c in the oct-tree and returns two integer values. +c (i) interior: +c Value 1 if the basis function is completely within the given +c refinement. +c +c Value 0 if the basis function is without the refinement, or +c overlaps the edges of the refinement, or the edges of the +c primary white noise patch. +c +c (ii) iboundary: +c Value 1 if the basis function is sufficiently far from the +c refinement boundary. +c +c Value 0 otherwise. +c The given refinement is defined at level wn_r in the oct-tree and by the variables +c (ix_rel,iy_rel,iz_rel) which give the location of the refinement relative to +c corner of the white noise patch, (ix_per,iy_per,iz_per) which define the +c periodicity of the white noise patch, and (ix_dim,iy_dim,iz_dim) which +c define the size of the refinement. +c +c +c +c================================================================================= + recursive subroutine inref(ixc,iyc,izc,isz,ir1,ir2,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + implicit none + + integer nref + integer ixc,iyc,izc,isz,ir1,ir2 + integer wn_level(nref) + integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref) + integer*8 ix_per(nref),iy_per(nref),iz_per(nref) + integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref) + integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref) + integer interior, iboundary + integer x_fact + + integer*8 ixco,iyco,izco,isize + integer*8 ixref0,iyref0,izref0 + integer*8 ixref1,iyref1,izref1 + integer*8 idist + + integer delta_wn + +c Error checking + if (ir2.lt.ir1) stop 'ir2 +#include +#include + +#include +#include +#include + +#ifdef _OPENMP +#include +#endif + +#include + +const int maxdim = 60, maxlev = 50, maxpow = 3 * maxdim; +typedef int rand_offset_[5]; +typedef struct +{ + int state[133]; // Nstore = Nstate (=5) + Nbatch (=128) + int need_fill; + int pos; +} rand_state_; + +/* pan_state_ struct -- corresponds to respective fortran module in panphasia_routines.f + * data structure that contains all panphasia state variables + * it needs to get passed between the fortran routines to enable + * thread-safe execution. + */ +typedef struct +{ + int base_state[5], base_lev_start[5][maxdim + 1]; + rand_offset_ poweroffset[maxpow + 1], superjump; + rand_state_ current_state[maxpow + 2]; + + int layer_min, layer_max, indep_field; + + long long xorigin_store[2][2][2], yorigin_store[2][2][2], zorigin_store[2][2][2]; + int lev_common, layer_min_store, layer_max_store; + long long ix_abs_store, iy_abs_store, iz_abs_store, ix_per_store, iy_per_store, iz_per_store, ix_rel_store, + iy_rel_store, iz_rel_store; + double exp_coeffs[8][8][maxdim + 2]; + long long xcursor[maxdim + 1], ycursor[maxdim + 1], zcursor[maxdim + 1]; + int ixshift[2][2][2], iyshift[2][2][2], izshift[2][2][2]; + + double cell_data[9][8]; + int ixh_last, iyh_last, izh_last; + int init; + + int init_cell_props; + int init_lecuyer_state; + long long p_xcursor[62], p_ycursor[62], p_zcursor[62]; + +} pan_state_; + +extern "C" +{ + void start_panphasia_(pan_state_ *lstate, const char *descriptor, int *ngrid, int *bverbose); + + void parse_descriptor_(const char *descriptor, int16_t *l, int32_t *ix, int32_t *iy, int32_t *iz, int16_t *side1, + int16_t *side2, int16_t *side3, int32_t *check_int, char *name); + + void panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, double *cell_prop); + + void adv_panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, int *layer_min, + int *layer_max, int *indep_field, double *cell_prop); + + void set_phases_and_rel_origin_(pan_state_ *lstate, const char *descriptor, int *lev, long long *ix_rel, + long long *iy_rel, long long *iz_rel, int *VERBOSE); +} + +struct panphasia_descriptor +{ + int16_t wn_level_base; + int32_t i_xorigin_base, i_yorigin_base, i_zorigin_base; + int16_t i_base, i_base_y, i_base_z; + int32_t check_rand; + std::string name; + + explicit panphasia_descriptor(std::string dstring) + { + char tmp[100]; + std::memset(tmp, ' ', 100); + parse_descriptor_(dstring.c_str(), &wn_level_base, &i_xorigin_base, &i_yorigin_base, &i_zorigin_base, &i_base, + &i_base_y, &i_base_z, &check_rand, tmp); + for (int i = 0; i < 100; i++) + if (tmp[i] == ' ') + { + tmp[i] = '\0'; + break; + } + name = tmp; + name.erase(std::remove(name.begin(), name.end(), ' '), name.end()); + } +}; + +// greatest common divisor +int gcd(int a, int b) +{ + if (b == 0) + return a; + return gcd(b, a % b); +} + +// least common multiple +int lcm(int a, int b) { return abs(a * b) / gcd(a, b); } + +// Two or largest power of 2 less than the argument +int largest_power_two_lte(int b) +{ + int a = 1; + if (b <= a) + return a; + while (2 * a < b) + a = 2 * a; + return a; +} + +class RNG_panphasia : public RNG_plugin +{ +private: +protected: + std::string descriptor_string_; + int num_threads_; + int levelmin_, levelmin_final_, levelmax_, ngrid_; + bool incongruent_fields_; + double inter_grid_phase_adjustment_; + // double translation_phase_; + pan_state_ *lstate; + int grid_p_, grid_m_; + double grid_rescale_fac_; + int coordinate_system_shift_[3]; + int ix_abs_[3], ix_per_[3], ix_rel_[3], level_p_, lextra_; + + void clear_panphasia_thread_states(void) + { + for (int i = 0; i < num_threads_; ++i) + { + lstate[i].init = 0; + lstate[i].init_cell_props = 0; + lstate[i].init_lecuyer_state = 0; + } + } + + void initialize_for_grid_structure(void) + { + clear_panphasia_thread_states(); + music::ilog.Print("PANPHASIA: running with %d threads", num_threads_); + + // if ngrid is not a multiple of i_base, then we need to enlarge and then sample down + ngrid_ = pcf_->get_value("setup", "GridRes"); + + grid_p_ = pdescriptor_->i_base; + grid_m_ = largest_power_two_lte(grid_p_); + + lextra_ = (log10((double)ngrid_ / (double)pdescriptor_->i_base) + 0.001) / log10(2.0); + int ratio = 1 << lextra_; + grid_rescale_fac_ = 1.0; + + coordinate_system_shift_[0] = -pcf_->get_value_safe("setup", "shift_x", 0); + coordinate_system_shift_[1] = -pcf_->get_value_safe("setup", "shift_y", 0); + coordinate_system_shift_[2] = -pcf_->get_value_safe("setup", "shift_z", 0); + + incongruent_fields_ = false; + if (ngrid_ != ratio * pdescriptor_->i_base) + { + incongruent_fields_ = true; + ngrid_ = 2 * ratio * pdescriptor_->i_base; + grid_rescale_fac_ = (double)ngrid_ / (1 << levelmin_); + music::ilog << "PANPHASIA: will use a higher resolution (using Fourier interpolation)" << std::endl; + music::ilog << " (" << grid_m_ << " -> " << grid_p_ << ") * 2**ref to be compatible with PANPHASIA" << std::endl; + } + } + + std::unique_ptr pdescriptor_; + +public: + explicit RNG_panphasia(config_file &cf) : RNG_plugin(cf) + { + descriptor_string_ = pcf_->get_value("random", "descriptor"); + +#ifdef _OPENMP + num_threads_ = omp_get_max_threads(); +#else + num_threads_ = 1; +#endif + + // create independent state descriptions for each thread + lstate = new pan_state_[num_threads_]; + + // parse the descriptor for its properties + pdescriptor_ = std::make_unique(descriptor_string_); + + music::ilog.Print("PANPHASIA: descriptor \'%s\' is base %d,", pdescriptor_->name.c_str(), pdescriptor_->i_base); + + // write panphasia base size into config file for the grid construction + // as the gridding unit we use the least common multiple of 2 and i_base + std::stringstream ss; + //ARJ ss << lcm(2, pdescriptor_->i_base); + //ss << two_or_largest_power_two_less_than(pdescriptor_->i_base);//ARJ + ss << 2; //ARJ - set gridding unit to two + pcf_->insert_value("setup", "gridding_unit", ss.str()); + ss.str(std::string()); + ss << pdescriptor_->i_base; + pcf_->insert_value("random", "base_unit", ss.str()); + + this->initialize_for_grid_structure(); + } + + ~RNG_panphasia() { delete[] lstate; } + + bool isMultiscale() const { return true; } + + void Fill_Grid(Grid_FFT &g) + { + auto sinc = [](real_t x) { return (std::abs(x) > 1e-16) ? std::sin(x) / x : 1.0; }; + auto dsinc = [](real_t x) { return (std::abs(x) > 1e-16) ? (x * std::cos(x) - std::sin(x)) / (x * x) : 0.0; }; + const real_t sqrt3{std::sqrt(3.0)}, sqrt27{std::sqrt(27.0)}; + + // make sure we're in the right space + Grid_FFT &g0 = g; + g0.FourierTransformBackward(false); + + // temporaries + Grid_FFT g1(g.n_, g.length_); + Grid_FFT g2(g.n_, g.length_); + Grid_FFT g3(g.n_, g.length_); + Grid_FFT g4(g.n_, g.length_); + + clear_panphasia_thread_states(); + music::ilog.Print("PANPHASIA: running with %d threads", num_threads_); + + ngrid_ = pcf_->get_value("setup", "GridRes"); + + grid_p_ = pdescriptor_->i_base; + // grid_m_ = largest_power_two_lte(grid_p_); + if (ngrid_ % grid_p_ != 0) + { + music::elog << "Grid resolution " << ngrid_ << " is not divisible by PANPHASIA descriptor length " << grid_p_ << std::endl; + throw std::runtime_error("Chosen [setup] / GridRes is not compatible with PANPHASIA descriptor length!"); + } + + double t1 = get_wtime(); + double tp = t1; + +#pragma omp parallel + { +#ifdef _OPENMP + const int mythread = omp_get_thread_num(); +#else + const int mythread = 0; +#endif + + //int odd_x, odd_y, odd_z; + //int ng_level = ngrid_ * (1 << (level - levelmin_)); // full resolution of current level + + int verbosity = (mythread == 0); + char descriptor[100]; + std::memset(descriptor, 0, 100); + std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size()); + + start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity); + + { + panphasia_descriptor d(descriptor_string_); + + int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0); + int level_p = d.wn_level_base + lextra; + int ratio = 1 << lextra; + + lstate[mythread].layer_min = 0; + lstate[mythread].layer_max = level_p; + lstate[mythread].indep_field = 1; + + assert(ngrid_ == ratio * d.i_base); + + long long ix_rel[3]; + ix_rel[0] = 0; //ileft_corner_p[0]; + ix_rel[1] = 0; //ileft_corner_p[1]; + ix_rel[2] = 0; //ileft_corner_p[2]; + + set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], + &verbosity); + + music::ilog.Print(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], + ix_rel[1], ix_rel[2]); + } + + if (verbosity) + t1 = get_wtime(); + + std::array cell_prop; + pan_state_ *ps = &lstate[mythread]; + +#pragma omp for //nowait + for (size_t i = 0; i < g.size(0); i += 2) + { + for (size_t j = 0; j < g.size(1); j += 2) + { + for (size_t k = 0; k < g.size(2); k += 2) + { + + // ARJ - added inner set of loops to speed up evaluation of Panphasia + + for (int ix = 0; ix < 2; ++ix) + { + for (int iy = 0; iy < 2; ++iy) + { + for (int iz = 0; iz < 2; ++iz) + { + int ilocal = i + ix; + int jlocal = j + iy; + int klocal = k + iz; + + int iglobal = ilocal + g.local_0_start_; + int jglobal = jlocal; + int kglobal = klocal; + + adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min, + &ps->layer_max, &ps->indep_field, &cell_prop[0]); + + g0.relem(ilocal, jlocal, klocal) = cell_prop[0]; + g1.relem(ilocal, jlocal, klocal) = cell_prop[4]; + g2.relem(ilocal, jlocal, klocal) = cell_prop[2]; + g3.relem(ilocal, jlocal, klocal) = cell_prop[1]; + g4.relem(ilocal, jlocal, klocal) = cell_prop[8]; + } + } + } + } + } + } + + if (verbosity) + { + music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1, + 1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2)); + } + } // end omp parallel region + + g0.FourierTransformForward(); + g1.FourierTransformForward(); + g2.FourierTransformForward(); + g3.FourierTransformForward(); + g4.FourierTransformForward(); + +#pragma omp parallel for + for (size_t i = 0; i < g0.size(0); i++) + { + for (size_t j = 0; j < g0.size(1); j++) + { + for (size_t k = 0; k < g0.size(2); k++) + { + if (!g0.is_nyquist_mode(i, j, k)) + { + auto kvec = g0.get_k(i, j, k); + + auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0]; + auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1]; + auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2]; + + auto fx = sinc(argx); + auto gx = ccomplex_t(0.0, dsinc(argx)); + auto fy = sinc(argy); + auto gy = ccomplex_t(0.0, dsinc(argy)); + auto fz = sinc(argz); + auto gz = ccomplex_t(0.0, dsinc(argz)); + + auto temp = (fx + sqrt3 * gx) * (fy + sqrt3 * gy) * (fz + sqrt3 * gz); + auto magnitude = std::sqrt(1.0 - std::abs(temp * temp)); + + auto y0(g0.kelem(i, j, k)), y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k)); + + g0.kelem(i, j, k) = y0 * fx * fy * fz + sqrt3 * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) + + y4 * magnitude; + } + else + { + g0.kelem(i, j, k) = 0.0; + } + } + } + } + + music::ilog.Print("\033[31mtiming [build panphasia field]: %f s\033[0m", get_wtime() - tp); + tp = get_wtime(); + + g1.FourierTransformBackward(false); + g2.FourierTransformBackward(false); + g3.FourierTransformBackward(false); + g4.FourierTransformBackward(false); + +#pragma omp parallel + { +#ifdef _OPENMP + const int mythread = omp_get_thread_num(); +#else + const int mythread = 0; +#endif + + // int odd_x, odd_y, odd_z; + int verbosity = (mythread == 0); + char descriptor[100]; + std::memset(descriptor, 0, 100); + std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size()); + + start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity); + + { + panphasia_descriptor d(descriptor_string_); + + int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0); + int level_p = d.wn_level_base + lextra; + int ratio = 1 << lextra; + + lstate[mythread].layer_min = 0; + lstate[mythread].layer_max = level_p; + lstate[mythread].indep_field = 1; + + assert(ngrid_ == ratio * d.i_base); + + long long ix_rel[3]; + ix_rel[0] = 0; //ileft_corner_p[0]; + ix_rel[1] = 0; //ileft_corner_p[1]; + ix_rel[2] = 0; //ileft_corner_p[2]; + + set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], + &verbosity); + + music::ilog.Print(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], + ix_rel[1], ix_rel[2]); + } + + if (verbosity) + t1 = get_wtime(); + + //*************************************************************** + // Process Panphasia values: p110, p011, p101, p111 + //**************************************************************** + std::array cell_prop; + pan_state_ *ps = &lstate[mythread]; + +#pragma omp for //nowait + for (size_t i = 0; i < g1.size(0); i += 2) + { + for (size_t j = 0; j < g1.size(1); j += 2) + { + for (size_t k = 0; k < g1.size(2); k += 2) + { + // ARJ - added inner set of loops to speed up evaluation of Panphasia + for (int ix = 0; ix < 2; ++ix) + { + for (int iy = 0; iy < 2; ++iy) + { + for (int iz = 0; iz < 2; ++iz) + { + int ilocal = i + ix; + int jlocal = j + iy; + int klocal = k + iz; + + int iglobal = ilocal + g.local_0_start_; + int jglobal = jlocal; + int kglobal = klocal; + + adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min, + &ps->layer_max, &ps->indep_field, &cell_prop[0]); + + g1.relem(ilocal, jlocal, klocal) = cell_prop[6]; + g2.relem(ilocal, jlocal, klocal) = cell_prop[3]; + g3.relem(ilocal, jlocal, klocal) = cell_prop[5]; + g4.relem(ilocal, jlocal, klocal) = cell_prop[7]; + } + } + } + } + } + } + } // end omp parallel region + + music::ilog.Print("\033[31mtiming [adv_panphasia_cell_properties2]: %f s \033[0m", get_wtime() - tp); + tp = get_wtime(); + + ///////////////////////////////////////////////////////////////////////// + // transform and convolve with Legendres + g1.FourierTransformForward(); + g2.FourierTransformForward(); + g3.FourierTransformForward(); + g4.FourierTransformForward(); + + #pragma omp parallel for + for (size_t i = 0; i < g1.size(0); i++) + { + for (size_t j = 0; j < g1.size(1); j++) + { + for (size_t k = 0; k < g1.size(2); k++) + { + if (!g1.is_nyquist_mode(i, j, k)) + { + auto kvec = g1.get_k(i, j, k); + + auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0]; + auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1]; + auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2]; + + auto fx = sinc(argx); + auto gx = ccomplex_t(0.0, dsinc(argx)); + auto fy = sinc(argy); + auto gy = ccomplex_t(0.0, dsinc(argy)); + auto fz = sinc(argz); + auto gz = ccomplex_t(0.0, dsinc(argz)); + + auto y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k)); + + g0.kelem(i, j, k) += 3.0 * (y1 * gx * gy * fz + y2 * fx * gy * gz + y3 * gx * fy * gz) + sqrt27 * y4 * gx * gy * gz; + } + } + } + } + + music::ilog.Print("\033[31mtiming [build panphasia field2]: %f s\033[0m", get_wtime() - tp); + // tp = get_wtime(); + + music::ilog.Print("PANPHASIA k-space statistices: mean Re = %f, std = %f", g0.mean(), g0.std()); + } +}; + +namespace +{ + RNG_plugin_creator_concrete creator("PANPHASIA"); +} +#endif // defined(USE_PANPHASIA) \ No newline at end of file From 997b934f032132d2279b80fe7c7d255b71eaae5f Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 2 May 2020 15:24:37 +0200 Subject: [PATCH 118/130] less screen output in panphasia plugin --- src/plugins/random_panphasia.cc | 35 ++++++++++++++------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/plugins/random_panphasia.cc b/src/plugins/random_panphasia.cc index d21dd7f..632d7fc 100644 --- a/src/plugins/random_panphasia.cc +++ b/src/plugins/random_panphasia.cc @@ -242,7 +242,7 @@ public: } double t1 = get_wtime(); - double tp = t1; + // double tp = t1; #pragma omp parallel { @@ -267,7 +267,7 @@ public: int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0); int level_p = d.wn_level_base + lextra; - int ratio = 1 << lextra; + // int ratio = 1 << lextra; lstate[mythread].layer_min = 0; lstate[mythread].layer_max = level_p; @@ -282,9 +282,6 @@ public: set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], &verbosity); - - music::ilog.Print(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], - ix_rel[1], ix_rel[2]); } if (verbosity) @@ -332,11 +329,11 @@ public: } } - if (verbosity) - { - music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1, - 1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2)); - } + // if (verbosity) + // { + // music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1, + // 1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2)); + // } } // end omp parallel region g0.FourierTransformForward(); @@ -383,8 +380,8 @@ public: } } - music::ilog.Print("\033[31mtiming [build panphasia field]: %f s\033[0m", get_wtime() - tp); - tp = get_wtime(); + // music::ilog.Print("\033[31mtiming [build panphasia field]: %f s\033[0m", get_wtime() - tp); + // tp = get_wtime(); g1.FourierTransformBackward(false); g2.FourierTransformBackward(false); @@ -412,7 +409,7 @@ public: int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0); int level_p = d.wn_level_base + lextra; - int ratio = 1 << lextra; + // int ratio = 1 << lextra; lstate[mythread].layer_min = 0; lstate[mythread].layer_max = level_p; @@ -427,9 +424,6 @@ public: set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], &verbosity); - - music::ilog.Print(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], - ix_rel[1], ix_rel[2]); } if (verbosity) @@ -478,8 +472,8 @@ public: } } // end omp parallel region - music::ilog.Print("\033[31mtiming [adv_panphasia_cell_properties2]: %f s \033[0m", get_wtime() - tp); - tp = get_wtime(); + // music::ilog.Print("\033[31mtiming [adv_panphasia_cell_properties2]: %f s \033[0m", get_wtime() - tp); + // tp = get_wtime(); ///////////////////////////////////////////////////////////////////////// // transform and convolve with Legendres @@ -518,9 +512,10 @@ public: } } - music::ilog.Print("\033[31mtiming [build panphasia field2]: %f s\033[0m", get_wtime() - tp); + // music::ilog.Print("\033[31mtiming [build panphasia field2]: %f s\033[0m", get_wtime() - tp); // tp = get_wtime(); - + music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1, + 1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2)); music::ilog.Print("PANPHASIA k-space statistices: mean Re = %f, std = %f", g0.mean(), g0.std()); } }; From e54db0223cc3e23294debcf76329a1adb709d4e1 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 2 May 2020 15:28:29 +0200 Subject: [PATCH 119/130] updated gitignore --- .gitignore | 63 +++++++++--------------------------------------------- 1 file changed, 10 insertions(+), 53 deletions(-) diff --git a/.gitignore b/.gitignore index b012d08..bcbdff2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,57 +1,14 @@ -build +.DS_Store .vscode -src/CMakeFiles/3.12.2/CompilerIdC/CMakeCCompilerId.c -src/CMakeFiles/feature_tests.c -src/CMakeFiles/feature_tests.cxx -src/CMakeFiles/progress.marks -src/CMakeFiles/3.12.2/CMakeCCompiler.cmake -src/CMakeFiles/3.12.2/CMakeCXXCompiler.cmake -src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_C.bin -src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_CXX.bin -src/CMakeFiles/3.12.2/CMakeSystem.cmake -src/CMakeFiles/fastLPT.dir/build.make -src/CMakeFiles/FindMPI/test_mpi.cpp -src/CMakeFiles/FindMPI/test_mpi_C.bin -src/CMakeFiles/FindMPI/test_mpi_CXX.bin -src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.c -src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.cpp -src/CMakeFiles/FindOpenMP/OpenMPTryFlag.c -src/CMakeFiles/FindOpenMP/OpenMPTryFlag.cpp -src/CMakeFiles/FindOpenMP/ompver_C.bin -src/CMakeFiles/FindOpenMP/ompver_CXX.bin -src/CMakeFiles/fastLPT.dir/CXX.includecache -src/CMakeFiles/fastLPT.dir/DependInfo.cmake -src/CMakeFiles/fastLPT.dir/plugins/transfer_eisenstein.cc.o -src/CMakeFiles/3.12.2/CompilerIdCXX/a.out -src/CMakeFiles/fastLPT.dir/cmake_clean.cmake -src/CMakeFiles/fastLPT.dir/depend.internal -src/CMakeFiles/fastLPT.dir/depend.make -src/CMakeFiles/fastLPT.dir/flags.make -src/CMakeFiles/fastLPT.dir/grid_fft.cc.o -src/CMakeFiles/fastLPT.dir/link.txt -src/CMakeFiles/fastLPT.dir/logger.cc.o -src/CMakeFiles/fastLPT.dir/main.cc.o -src/CMakeFiles/fastLPT.dir/progress.make -src/CMakeFiles/fastLPT.dir/random_plugin.cc.o -src/CMakeFiles/fastLPT.dir/transfer_function_plugin.cc.o -src/CMakeFiles/fastLPT.dir/plugins/random_music.cc.o -src/CMakeFiles/fastLPT.dir/plugins/random_music_wnoise_generator.cc.o -src/CMakeFiles/feature_tests.bin -src/CMakeFiles/CMakeDirectoryInformation.cmake -src/CMakeFiles/CMakeOutput.log -src/CMakeFiles/Makefile.cmake -src/CMakeFiles/Makefile2 -src/CMakeFiles/TargetDirectories.txt -src/CMakeFiles/cmake.check_cache -src/CMakeFiles/3.12.2/CompilerIdC/a.out -src/CMakeFiles/3.12.2/CompilerIdCXX/CMakeCXXCompilerId.cpp -src/CMakeFiles/hdf5/cmake_hdf5_test.c -src/fastLPT.dSYM/Contents/Info.plist -src/fastLPT.dSYM/Contents/Resources/DWARF/fastLPT +build +include/cmake_config.hh +src/input_powerspec.txt +CMakeCache.txt +CMakeFiles/cmake.check_cache +src/CMakeFiles src/cmake_install.cmake src/CMakeCache.txt -src/fastLPT -src/input_powerspec.txt src/Makefile -.DS_Store -include/cmake_config.hh +external/panphasia/rand_base.mod +external/panphasia/rand_int.mod +external/panphasia/rand.mod \ No newline at end of file From b534c7ff35f295c9d43cef6b1e4736b2d0bae42b Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 2 May 2020 21:02:24 +0200 Subject: [PATCH 120/130] fixed normalisation of CLASS when using ztarget and zstart --- include/cosmology_calculator.hh | 15 +++++------- src/plugins/transfer_CLASS.cc | 41 ++++++++++++++++++++++----------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index bedc653..6f1fd7f 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -37,7 +37,7 @@ public: private: static constexpr double REL_PRECISION = 1e-10; interpolated_function_1d D_of_a_, f_of_a_, a_of_D_; - double Dnow_, Dplus_start_, astart_; + double Dnow_, Dplus_start_, Dplus_target_, astart_, atarget_; real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const { @@ -132,7 +132,8 @@ public: */ explicit calculator(config_file &cf) - : cosmo_param_(cf), astart_( 1.0/(1.0+cf.get_value("setup","zstart")) ) + : cosmo_param_(cf), astart_( 1.0/(1.0+cf.get_value("setup","zstart")) ), + atarget_( 1.0/(1.0+cf.get_value_safe("cosmology","ztarget",1./astart_-1.))) { // pre-compute growth factors and store for interpolation std::vector tab_a, tab_D, tab_f; @@ -143,6 +144,7 @@ public: Dnow_ = D_of_a_(1.0); Dplus_start_ = D_of_a_( astart_ ) / Dnow_; + Dplus_target_ = D_of_a_( atarget_ ) / Dnow_; // set up transfer functions and compute normalisation transfer_function_ = std::move(select_TransferFunction_plugin(cf)); @@ -150,7 +152,7 @@ public: if( !transfer_function_->tf_isnormalised_ ) cosmo_param_.pnorm = this->compute_pnorm_from_sigma8(); else{ - cosmo_param_.pnorm = 1.0; + cosmo_param_.pnorm = 1.0/Dplus_target_/Dplus_target_; auto sigma8 = this->compute_sigma8(); music::ilog << "Measured sigma_8 for given PS normalisation is " << sigma8 << std::endl; } @@ -160,9 +162,6 @@ public: << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl; music::ilog << std::setw(32) << std::left << "TF maximum wave number" << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl; - - // music::ilog << "D+(MUSIC) = " << this->get_growth_factor( 1.0/(1.0+cf.get_value("setup","zstart")) ) << std::endl; - // music::ilog << "pnrom = " << cosmo_param_.pnorm << std::endl; } ~calculator() @@ -306,9 +305,7 @@ public: */ inline real_t get_amplitude(real_t k, tf_type type) const { - // if the transfer function doesn't need backscaling, then divide out growth factor - real_t f = transfer_function_->tf_isnormalised_? 1.0/Dplus_start_ : 1.0; - return f * std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm; + return std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm; } //! Computes the normalization for the power spectrum diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index bcf85df..a842736 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -30,7 +30,7 @@ private: // gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_; // std::vector tab_Cplus_, tab_Cminus_; - double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_, A_s_, n_s_, Tcmb_, tnorm_; + double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_, A_s_, n_s_, sigma8_, Tcmb_, tnorm_; ClassParams pars_; std::unique_ptr the_ClassEngine_; @@ -89,7 +89,11 @@ private: //--- cosmological parameters, primordial ------------------------- add_class_parameter("P_k_ini type", "analytic_Pk"); - add_class_parameter("A_s", A_s_); + if( A_s_ > 0.0 ){ + add_class_parameter("A_s", A_s_); + }else{ + add_class_parameter("sigma8", sigma8_); + } add_class_parameter("n_s", n_s_); add_class_parameter("alpha_s", 0.0); add_class_parameter("T_cmb", Tcmb_); @@ -173,6 +177,8 @@ public: explicit transfer_CLASS_plugin(config_file &cf) : TransferFunction_plugin(cf) { + this->tf_isnormalised_ = true; + ofs_class_input_.open("input_class_parameters.ini", std::ios::trunc); h_ = pcf_->get_value("cosmology", "H0") / 100.0; @@ -183,28 +189,35 @@ public: atarget_ = 1.0 / (1.0 + ztarget_); zstart_ = pcf_->get_value("setup", "zstart"); astart_ = 1.0 / (1.0 + zstart_); - double lbox = pcf_->get_value("setup", "BoxLength"); - int nres = pcf_->get_value("setup", "GridRes"); A_s_ = pcf_->get_value_safe("cosmology", "A_s", -1.0); - double k_p = pcf_->get_value_safe("cosmology", "k_p", 0.05); n_s_ = pcf_->get_value("cosmology", "nspec"); Tcmb_ = cf.get_value_safe("cosmology", "Tcmb", 2.7255); - tnorm_ = 1.0; - - if (A_s_ > 0) - { - this->tf_isnormalised_ = true; - tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p * h_, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0)); - music::ilog << "Using A_s to normalise the transfer function!" << std::endl; + if (A_s_ > 0) { + music::ilog << "CLASS: Using A_s=" << A_s_<< " to normalise the transfer function." << std::endl; + }else{ + sigma8_ = pcf_->get_value_safe("cosmology", "sigma_8", -1.0); + if( sigma8_ < 0 ){ + throw std::runtime_error("Need to specify either A_s or sigma_8 for CLASS plugin..."); + } + music::ilog << "CLASS: Using sigma8_ =" << sigma8_<< " to normalise the transfer function." << std::endl; } + // determine highest k we will need for the resolution selected + double lbox = pcf_->get_value("setup", "BoxLength"); + int nres = pcf_->get_value("setup", "GridRes"); kmax_ = std::max(20.0, 2.0 * M_PI / lbox * nres / 2 * sqrt(3) * 2.0); // 120% of spatial diagonal, or k=10h Mpc-1 + // initialise CLASS and get the normalisation this->init_ClassEngine(); + A_s_ = the_ClassEngine_->get_A_s(); // this either the input one, or the one computed from sigma8 + + // compute the normalisation to interface with MUSIC + double k_p = pcf_->get_value_safe("cosmology", "k_p", 0.05); + tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p * h_, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0)); + // compute the transfer function at z=0 using CLASS engine std::vector k, dc, tc, db, tb, dn, tn, dm, tm; - this->run_ClassEngine(0.0, k, dc, tc, db, tb, dn, tn, dm, tm); delta_c0_.set_data(k, dc); @@ -216,8 +229,8 @@ public: delta_m0_.set_data(k, dm); theta_m0_.set_data(k, tm); + // compute the transfer function at z=z_target using CLASS engine this->run_ClassEngine(ztarget_, k, dc, tc, db, tb, dn, tn, dm, tm); - delta_c_.set_data(k, dc); theta_c_.set_data(k, tc); delta_b_.set_data(k, db); From 1313905660d40c873145776b97dacc789ae40217 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 3 May 2020 00:14:47 +0200 Subject: [PATCH 121/130] working commit --- include/grid_fft.hh | 8 +- include/grid_interpolate.hh | 205 ++++++++++++++++++++++++++++++++++ include/particle_generator.hh | 55 +++++---- src/ic_generator.cc | 2 + 4 files changed, 244 insertions(+), 26 deletions(-) create mode 100644 include/grid_interpolate.hh diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 2d49f7f..c11d1b3 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -17,12 +17,16 @@ enum space_t #ifdef USE_MPI -template +template #else -template +template #endif class Grid_FFT { +public: + using data_t = data_t_; + static constexpr bool is_distributed_trait{bdistributed}; + protected: #if defined(USE_MPI) const MPI_Datatype MPI_data_t_type = diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh new file mode 100644 index 0000000..b99c958 --- /dev/null +++ b/include/grid_interpolate.hh @@ -0,0 +1,205 @@ +#pragma once + +#include +#include + +#include + +template +struct grid_interpolate +{ + using data_t = typename grid_t::data_t; + using vec3 = std::array; + + static constexpr bool is_distributed_trait = grid_t::is_distributed_trait; + static constexpr int interpolation_order = interp_order; + + size_t nx_, ny_, nz_; + +#if defined(USE_MPI) + const MPI_Datatype MPI_data_t_type = + (typeid(data_t) == typeid(float)) ? MPI_FLOAT + : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE + : (typeid(data_t) == typeid(long double)) ? MPI_LONG_DOUBLE + : (typeid(data_t) == typeid(std::complex)) ? MPI_C_FLOAT_COMPLEX + : (typeid(data_t) == typeid(std::complex)) ? MPI_C_DOUBLE_COMPLEX + : (typeid(data_t) == typeid(std::complex)) ? MPI_C_LONG_DOUBLE_COMPLEX + : MPI_INT; +#endif + + std::vector boundary_; + const grid_t &gridref; + + explicit grid_interpolate(const grid_t &g) + : gridref(g), nx_(g.n_[0]), ny_(g.n_[1]), nz_(g.n_[2]) + { + static_assert(interpolation_order >= 0 && interpolation_order <= 2, "Interpolation order needs to be 0 (NGP), 1 (CIC), or 2 (TSC)."); + + if (is_distributed_trait) + { +#if defined(USE_MPI) + size_t nx = interpolation_order + 1; + size_t ny = g.n_[1]; + size_t nz = g.n_[2]; + + boundary_.assign(nx * ny * nz, data_t{0.0}); + + for (size_t i = 0; i < nx; ++i) + { + for (size_t j = 0; j < ny; ++j) + { + for (size_t k = 0; k < nx; ++k) + { + boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k); + } + } + } + + int sendto = (MPI::get_rank() + MPI::get_size() - 1) % MPI::get_size(); + int recvfrom = (MPI::get_rank() + MPI::get_size() + 1) % MPI::get_size(); + + MPI_Status status; + status.MPI_ERROR = MPI_SUCCESS; + + MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype(), sendto, + MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status); + + assert(status.MPI_ERROR == MPI_SUCCESS); +#endif + } + } + + data_t get_ngp_at(const std::array &pos, std::vector &val) const noexcept + { + size_t ix = static_cast(pos[0]); + size_t iy = static_cast(pos[1]); + size_t iz = static_cast(pos[2]); + return gridref.relem(ix - gridref.local_0_start_, iy, iz); + } + + data_t get_cic_at(const std::array &pos) const noexcept + { + size_t ix = static_cast(pos[0]); + size_t iy = static_cast(pos[1]); + size_t iz = static_cast(pos[2]); + real_t dx = pos[0] - real_t(ix), tx = 1.0 - dx; + real_t dy = pos[1] - real_t(iy), ty = 1.0 - dy; + real_t dz = pos[2] - real_t(iz), tz = 1.0 - dz; + size_t iy1 = (iy + 1) % ny_; + size_t iz1 = (iz + 1) % nz_; + + data_t val{0.0}; + + if( is_distributed_trait ){ + size_t localix = ix-gridref.local_0_start_; + val += this->relem(localix, iy, iz) * tx * ty * tz; + val += this->relem(localix, iy, iz1) * tx * ty * dz; + val += this->relem(localix, iy1, iz) * tx * dy * tz; + val += this->relem(localix, iy1, iz1) * tx * dy * dz; + + if( localix+1 >= gridref.local_0_size_ ){ + size_t localix1 = localix+1 - gridref.local_0_size_; + val += boundary_[(localix1*ny_+iy)*nz_+iz] * dx * ty * tz; + val += boundary_[(localix1*ny_+iy)*nz_+iz1] * dx * ty * dz; + val += boundary_[(localix1*ny_+iy1)*nz_+iz] * dx * dy * tz; + val += boundary_[(localix1*ny_+iy1)*nz_+iz1] * dx * dy * dz; + }else{ + size_t localix1 = localix+1; + val += this->relem(localix1, iy, iz) * dx * ty * tz; + val += this->relem(localix1, iy, iz1) * dx * ty * dz; + val += this->relem(localix1, iy1, iz) * dx * dy * tz; + val += this->relem(localix1, iy1, iz1) * dx * dy * dz; + } + }else{ + size_t ix1 = (ix + 1) % nx_; + val += this->relem(ix, iy, iz) * tx * ty * tz; + val += this->relem(ix, iy, iz1) * tx * ty * dz; + val += this->relem(ix, iy1, iz) * tx * dy * tz; + val += this->relem(ix, iy1, iz1) * tx * dy * dz; + val += this->relem(ix1, iy, iz) * dx * ty * tz; + val += this->relem(ix1, iy, iz1) * dx * ty * dz; + val += this->relem(ix1, iy1, iz) * dx * dy * tz; + val += this->relem(ix1, iy1, iz1) * dx * dy * dz; + } + + return val; + } + + // data_t get_tsc_at(const std::array &pos, std::vector &val) const + // { + // } + + int get_task(const vec3 &x, const std::vector &local0starts) const noexcept + { + auto it = std::lower_bound(local0starts.begin(), local0starts.end(), int(x[0])); + return std::distance(local0starts.begin(), it) - 1; + } + + void domain_decompose_pos(std::vector &pos) const noexcept + { + if (is_distributed_trait) + { +#if defined(USE_MPI) + int local_0_start = int(gridref.local_0_start_); + std::vector local0starts(MPI::get_size(), 0); + MPI_Alltoall(&local_0_start, 1, MPI_INT, &local0starts[0], 1, MPI_INT, MPI_COMM_WORLD); + + std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1) < get_task(x2); }); + std::vector sendcounts(MPI::get_size(), 0), sendoffsets(MPI::get_size(), 0); + std::vector recvcounts(MPI::get_size(), 0), recvoffsets(MPI::get_size(), 0); + for (auto x : pos) + { + sendcounts[get_task(x)] += 3; + } + + // int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) + MPI_Alltoall(&sendcounts[0], 1, MPI_INT, &recvcounts[0], 1, MPI_INT, MPI_COMM_WORLD); + + for (int i = 1; i < MPI::get_size(); ++i) + { + sendoffsets[i] = sendcounts[i - 1] + sendoffsets[i - 1]; + recvoffsets[i] = recvcounts[i - 1] + recvoffsets[i - 1]; + } + + // int MPI_Alltoallv(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, + // const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm) + + MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI_data_t_type, + &pos[0], &recvcounts[0], &recvoffsets[0], MPI_data_t_type, MPI_COMM_WORLD); +#endif + } + } + + ccomplex_t compensation_kernel( vec3 k ) const noexcept + { + auto sinc = []( real_t x ){ (std::abs(x)>1e-10)? std::sin(x)/x : 1.0; }; + real_t dfx = sinc(0.5*M_PI*k[0]/gridref.kny_[0]); + real_t dfy = sinc(0.5*M_PI*k[1]/gridref.kny_[1]); + real_t dfz = sinc(0.5*M_PI*k[2]/gridref.kny_[2]); + real_t del = std::pow(dfx*dfy*dfz,1+interpolation_order); + return ccomplex_t(1.0) / del; + } + + void get_at(std::vector &pos, std::vector &val) const + { + + val.assign( pos.size(), data_t{0.0} ); + + for( size_t i=0; i +#include namespace particle { enum lattice{ + lattice_glass = -1, lattice_sc = 0, // SC : simple cubic lattice_bcc = 1, // BCC: body-centered cubic lattice_fcc = 2, // FCC: face-centered cubic @@ -37,11 +39,11 @@ const std::vector> second_lattice_shift = }; template -void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field ){ +void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field, size_t num_p = 0 ){ // number of modes present in the field - const size_t num_p_in_load = field.local_size(); + const size_t num_p_in_load = (lattice_type>=0)? field.local_size() : num_p; // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load): - const size_t overload = 1ull<(0,lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc // allocate memory for all local particles particles.allocate( overload * num_p_in_load, b64reals, b64ids ); // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well @@ -64,32 +66,37 @@ void initialize_lattice( container& particles, lattice lattice_type, const bool template void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field ) { - const size_t num_p_in_load = field.local_size(); - for( int ishift=0; ishift<(1<= 0 ){ + const size_t num_p_in_load = field.local_size(); + for( int ishift=0; ishift<(1<0 ){ - field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] ); - } - // read out values from phase shifted field and set assoc. particle's value - const auto ipcount0 = ishift * num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,lattice_shifts[lattice_type][ishift] - + (is_second_lattice? second_lattice_shift[lattice_type] : vec3_t{0.,0.,0.}) ); - if( b64reals ){ - particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - }else{ - particles.set_pos32( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); + // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift + if( ishift>0 ){ + field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] ); + } + // read out values from phase shifted field and set assoc. particle's value + const auto ipcount0 = ishift * num_p_in_load; + for( size_t i=0,ipcount=ipcount0; i(i,j,k,lattice_shifts[lattice_type][ishift] + + (is_second_lattice? second_lattice_shift[lattice_type] : vec3_t{0.,0.,0.}) ); + if( b64reals ){ + particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); + }else{ + particles.set_pos32( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); + } } } } } + }else{ + } } diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 6185af0..9e41b59 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -549,6 +549,8 @@ int Run( config_file& the_config ) // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ; + grid_interpolate<1,Grid_FFT> interp( tmp ); + // if output plugin wants particles, then we need to store them, along with their IDs if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { From dd4688953c78e8ba78a9508f1c9674912d370f9f Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 3 May 2020 04:20:12 +0200 Subject: [PATCH 122/130] working commit, glass --- include/HDF_IO.hh | 1086 +++++++++++++++++++++++++++++++ include/grid_interpolate.hh | 61 +- include/particle_generator.hh | 202 ++++-- src/ic_generator.cc | 17 +- src/plugins/random_panphasia.cc | 15 +- 5 files changed, 1304 insertions(+), 77 deletions(-) create mode 100755 include/HDF_IO.hh diff --git a/include/HDF_IO.hh b/include/HDF_IO.hh new file mode 100755 index 0000000..53b3f92 --- /dev/null +++ b/include/HDF_IO.hh @@ -0,0 +1,1086 @@ +#pragma once +#if defined(USE_HDF5) + +#define H5_USE_16_API + +/* + HDF_IO.hh -- templated C++ HDF5 front-end functions, v1.2b + + Copyright (C) 2006-7 Oliver Hahn -- ojha@gmx.de + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "hdf5.h" + +template +hid_t GetDataType( void ) +{ + if( typeid(T) == typeid(int) ) + return H5T_NATIVE_INT; + + if( typeid(T) == typeid(unsigned) ) + return H5T_NATIVE_UINT; + + if( typeid(T) == typeid(float) ) + return H5T_NATIVE_FLOAT; + + if( typeid(T) == typeid(double) ) + return H5T_NATIVE_DOUBLE; + + if( typeid(T) == typeid(long long) ) + return H5T_NATIVE_LLONG; + + if( typeid(T) == typeid(unsigned long long) ) + return H5T_NATIVE_ULLONG; + + if( typeid(T) == typeid(size_t) ) + return H5T_NATIVE_ULLONG; + + + std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n"; + return -1; +} + +#include + +class HDFException : public std::runtime_error { + public: + HDFException( const std::string &errtxt ) : std::runtime_error(errtxt) { } +}; + + +inline bool DoesFileExist( std::string Filename ){ + bool flag = false; + std::fstream fin(Filename.c_str(),std::ios::in|std::ios::binary); + if( fin.is_open() ) + flag=true; + fin.close(); + return flag; +} + +inline void AssertFileOpen( std::string Filename ) +{ + if( !DoesFileExist( Filename ) ){ + std::fstream fout( Filename.c_str(), std::ios::out|std::ios::binary); + fout.close(); + } +} + +inline void HDFCreateFile( std::string Filename ) +{ + hid_t HDF_FileID; + HDF_FileID = H5Fcreate( Filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT ); + H5Fclose( HDF_FileID ); +} + +template< typename T> +inline void HDFReadVector( const std::string Filename, const std::string ObjName, std::vector &Data ) +{ + HDFReadDataset( Filename, ObjName, Data ); +} + + + + +inline void HDFGetDatasetExtent( const std::string Filename, const std::string ObjName, std::vector &Extent ) +{ + hid_t HDF_FileID, HDF_DatasetID, HDF_DataspaceID; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( HDF_DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); + + int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); + + hsize_t *dimsize = new hsize_t[ndims]; + + H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + + Extent.clear(); + for(int i=0; i +inline void HDFReadDataset( const std::string Filename, const std::string ObjName, std::vector &Data ) +{ + + hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID; + hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( HDF_DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); + + int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); + + hsize_t dimsize[ndims]; + + H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + + HDF_StorageSize = 1; + for(int i=0; i +inline void HDFReadSelect( const std::string Filename, const std::string ObjName, const std::vector& ii, std::vector &Data ){ + + hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID; + hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( HDF_DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); + + hsize_t block[2]; + block[0] = ii.size(); + block[1] = 1; + + + Data.clear(); + Data.reserve( block[0]*block[1] ); + Data.assign( block[0]*block[1], (T)1 ); + + HDF_MemspaceID = H5Screate_simple( 2, block, NULL ); + // H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block ); + H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, ii.size(), (const hsize_t *)&ii[0] ); + + H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] ); + + H5Sclose( HDF_DataspaceID ); + H5Sclose( HDF_MemspaceID ); + H5Dclose( HDF_DatasetID ); + H5Fclose( HDF_FileID ); + +} + +template +inline void HDFReadVectorSelect( const std::string Filename, const std::string ObjName, const std::vector& ii, std::vector &Data ){ + + hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID; +// hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( HDF_DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); + int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); + hsize_t dimsize[ndims]; + H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + + hsize_t block[2]; + block[0] = ii.size(); + block[1] = 3; + + std::vector coord; + for( unsigned i=0; i().swap(ii); + + + + + if( ii.size() == 0 ){ + std::cerr << "attempted to read empty block. skipping....\n"; + return; + } + //std::cerr << "starting 2 read...\n"; + H5Sselect_none( HDF_DataspaceID ); + if( H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, coord.size()/2, (const hsize_t *)&coord[0] ) < 0 )//(const hsize_t**)&coord[0] ) < 0 ) + std::cerr << " - could not select elements properly\n"; + + if(H5Sselect_valid( HDF_DataspaceID )<=0 ){ + std::cerr << "\n - sorry, invalid element selection in file \'"<< Filename.c_str() << "\'. \n - dumping 10 first indices...\n"; + + /*for( unsigned i=0; i<10; ++i ){ + for( unsigned k=0; k<3; ++k ){ + std::cerr << coord[3*i+k] << " "; + } + std::cerr << "\n"; + }*/ + + return; + } + + std::vector().swap(coord); + Data.assign( block[0]*block[1], (T)0 ); + HDF_MemspaceID = H5Screate_simple( 2, &block[0], NULL ); + + H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] ); + + + H5Sclose( HDF_DataspaceID ); + H5Sclose( HDF_MemspaceID ); + H5Dclose( HDF_DatasetID ); + H5Fclose( HDF_FileID ); + +} + +template< typename T > +inline void HDFReadVectorSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector &Data ) +{ + hsize_t + offset[2], + stride[2], + count[2], + block[2]; + + hid_t MemspaceID, FilespaceID, DatasetID, FileID; + hid_t Type = GetDataType(); + + FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + DatasetID = H5Dopen( FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( FileID ); + throw HDFException(ss.str()); + return; + } + + FilespaceID = H5Dget_space( DatasetID ); + + offset[0] = nStart; + offset[1] = 0; + + count[0] = 1; + count[1] = 1; + + stride[0] = 1; + stride[1] = 1; + + block[0] = nCount; + block[1] = 3; + + + Data.clear(); + Data.reserve( block[0]*block[1] ); + Data.assign( block[0]*block[1], (T)1 ); + + MemspaceID = H5Screate_simple( 2, block, NULL ); + H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block ); + + H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] ); + + H5Sclose( FilespaceID ); + H5Sclose( MemspaceID ); + H5Dclose( DatasetID ); + H5Fclose( FileID ); +} + +template< typename T > +inline void HDFReadDatasetSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector &Data ) +{ + hsize_t + offset[2], + stride[2], + count[2], + block[2]; + + hid_t MemspaceID, FilespaceID, DatasetID, FileID; + hid_t Type = GetDataType(); + + FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + + + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + DatasetID = H5Dopen( FileID, ObjName.c_str() ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if( DatasetID < 0 ){ + std::stringstream ss; + ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; + Data.clear(); + H5Fclose( FileID ); + throw HDFException(ss.str()); + return; + } + + FilespaceID = H5Dget_space( DatasetID ); + + offset[0] = nStart; + offset[1] = 0; + + count[0] = 1; + count[1] = 1; + + stride[0] = 1; + stride[1] = 1; + + block[0] = nCount; + block[1] = 1; + + + Data.clear(); + Data.reserve( block[0]*block[1] ); + Data.assign( block[0]*block[1], (T)1 ); + + MemspaceID = H5Screate_simple( 2, block, NULL ); + H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block ); + + H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] ); + + H5Sclose( FilespaceID ); + H5Sclose( MemspaceID ); + H5Dclose( DatasetID ); + H5Fclose( FileID ); +} + +template< typename T> +inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T &Data ) +{ + + hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID; + // hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... attempt to open attribute + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() ); + + if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){ + std::stringstream ss; + ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n"; + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + + H5Aread( HDF_AttributeID, HDF_Type, &Data ); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + + H5Aclose( HDF_AttributeID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); + +} + +template< typename T> +inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, std::vector &Data ) +{ + + hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID, HDF_DataspaceID; + hsize_t HDF_StorageSize; + + HDF_Type = GetDataType(); + + //... save old error handler + herr_t (*old_func)(void*); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... attempt to open attribute + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() ); + + if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){ + std::stringstream ss; + ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n"; + H5Fclose( HDF_FileID ); + throw HDFException(ss.str()); + return; + } + + //... get space associated with dataset and its extensions + HDF_DataspaceID = H5Aget_space( HDF_AttributeID ); + + int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); + + hsize_t dimsize[ndims]; + + H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + + HDF_StorageSize = 1; + for(int i=0; i +inline void HDFWriteDataset( const std::string Filename, const std::string ObjName, const std::vector &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims = Data.size(); + HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); + HDF_DatasetID = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type, + HDF_DataspaceID, H5P_DEFAULT ); + H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, + H5P_DEFAULT, &Data[0] ); + H5Dclose( HDF_DatasetID ); + H5Sclose( HDF_DataspaceID ); + + H5Fclose( HDF_FileID ); +} + +template< typename T > +inline void HDFWriteGroupDataset( const std::string Filename, const std::string GrpName, const std::string ObjName, const std::vector &Data ) +{ + + hid_t + HDF_FileID, + HDF_GroupID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_GroupID = H5Gopen( HDF_FileID, GrpName.c_str() ); + + HDF_Type = GetDataType(); + + HDF_Dims = Data.size(); + HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); + HDF_DatasetID = H5Dcreate( HDF_GroupID, ObjName.c_str(), HDF_Type, + HDF_DataspaceID, H5P_DEFAULT ); + H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, + H5P_DEFAULT, &Data[0] ); + H5Dclose( HDF_DatasetID ); + H5Sclose( HDF_DataspaceID ); + + H5Gclose( HDF_GroupID ); + + H5Fclose( HDF_FileID ); +} + + +template< typename T > +inline void HDFWriteDataset2D( const std::string Filename, const std::string ObjName, const std::vector< std::vector > &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims[2]; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims[0] = Data.size(); + HDF_Dims[1] = Data[0].size(); + HDF_DataspaceID = H5Screate_simple(2, HDF_Dims, NULL); + HDF_DatasetID = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type, + HDF_DataspaceID, H5P_DEFAULT ); + + T *tmp = new T[HDF_Dims[0]*HDF_Dims[1]]; + + unsigned k=0; + for(unsigned i=0; i +inline void HDFWriteDataset3D( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims[3]; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims[0] = nd[0]; + HDF_Dims[1] = nd[1]; + HDF_Dims[2] = nd[2]; + + //std::cerr << nd[0]< +struct HDFHyperslabWriter3Ds +{ + hid_t dset_id_, type_id_, file_id_; + + HDFHyperslabWriter3Ds( const std::string Filename, const std::string ObjName, size_t nd[3] ) + { + hid_t filespace; + + hsize_t sizes[4] = { 1, nd[0], nd[1], nd[2] }; + + type_id_ = GetDataType(); + file_id_ = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + //std::cerr << "creating filespace : 1 x " << nd[0] << " x " << nd[1] << " x " << nd[2] << std::endl; + filespace = H5Screate_simple( 4, sizes, NULL ); + dset_id_ = H5Dcreate( file_id_, ObjName.c_str(), type_id_, filespace, H5P_DEFAULT ); + + H5Sclose(filespace); + } + + ~HDFHyperslabWriter3Ds() + { + H5Dclose( dset_id_ ); + H5Fclose( file_id_ ); + } + + void write_slab( T* data, size_t* count, size_t* offset ) + { + + hsize_t counts[4] = { 1, count[0], count[1], count[2] }; + hsize_t offsets[4] = { 0, offset[0], offset[1], offset[2] }; + + hid_t filespace = H5Dget_space(dset_id_); + + //std::cerr << "creating memspace : 1 x " << count[0] << " x " << count[1] << " x " << count[2] << std::endl; + hid_t memspace = H5Screate_simple(4, counts, NULL); + H5Sselect_hyperslab( filespace, H5S_SELECT_SET, offsets, NULL, counts, NULL ); + + //herr_t status; + //status = + H5Dwrite(dset_id_, type_id_, memspace, filespace, H5P_DEFAULT, reinterpret_cast(data)); + H5Sclose(filespace); + H5Sclose(memspace); + } + +}; + + +template< typename T > +inline void HDFWriteDataset3Ds( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims[4]; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims[0] = 1; + HDF_Dims[1] = nd[0]; + HDF_Dims[2] = nd[1]; + HDF_Dims[3] = nd[2]; + + //std::cerr << nd[0]< +inline void HDFWriteDatasetVector( const std::string Filename, const std::string ObjName, const std::vector &Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_DataspaceID, + HDF_Type; + + hsize_t HDF_Dims[2]; + + // hsize_t HDF_Dims; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + + HDF_Type = GetDataType(); + + HDF_Dims[0] = (hsize_t)(Data.size()/3); + HDF_Dims[1] = 3; + + if( Data.size() % 3 != 0 ){ + std::cerr << " - Warning: Trying to write vector data in HDFWriteDatasetVector\n" + << " but array length not divisible by 3!\n\n"; + + } + + HDF_DataspaceID = H5Screate_simple(2, HDF_Dims, NULL); + HDF_DatasetID = H5Dcreate( HDF_FileID, ObjName.c_str(), H5T_NATIVE_FLOAT, + HDF_DataspaceID, H5P_DEFAULT ); + H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, + H5P_DEFAULT, &Data[0] ); + H5Dclose( HDF_DatasetID ); + H5Sclose( HDF_DataspaceID ); + + H5Fclose( HDF_FileID ); +} + +inline void HDFCreateGroup( const std::string Filename, const std::string GroupName ) +{ + hid_t HDF_FileID, HDF_GroupID; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_GroupID = H5Gcreate( HDF_FileID, GroupName.c_str(), 0 ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); + +} + +inline void HDFCreateSubGroup( const std::string Filename, const std::string SuperGroupName, const std::string GroupName ) +{ + hid_t HDF_FileID, HDF_GroupID, HDF_SuperGroupID; + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_SuperGroupID = H5Gopen( HDF_FileID, SuperGroupName.c_str() ); + HDF_GroupID = H5Gcreate( HDF_SuperGroupID, GroupName.c_str(), 0 ); + H5Gclose( HDF_GroupID ); + H5Gclose( HDF_SuperGroupID ); + H5Fclose( HDF_FileID ); + +} + +template< typename T > +inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, const std::vector< T > &Data ) +{ + hid_t HDF_FileID, + HDF_GroupID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + hsize_t HDF_Dims; + + HDF_DatatypeID = GetDataType(); + + HDF_Dims = (hsize_t)(Data.size()); + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); + + HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); +} + +template< typename T > +inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, const std::vector< T > &Data ) +{ + hid_t HDF_FileID, + HDF_DatasetID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + hsize_t HDF_Dims; + + HDF_DatatypeID = GetDataType(); + + HDF_Dims = (hsize_t)(Data.size()); + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_DatasetID = H5Dopen( HDF_FileID, DatasetName.c_str() ); + HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); + + HDF_AttributeID = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Dclose( HDF_DatasetID ); + H5Fclose( HDF_FileID ); +} + + +template< typename T > +inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T +Data ) +{ + + hid_t + HDF_FileID, + HDF_GroupID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + HDF_DatatypeID = GetDataType(); + + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_DataspaceID = H5Screate(H5S_SCALAR); + HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, + HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); +} + +template< typename T > +inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, T Data ) +{ + + hid_t + HDF_FileID, + HDF_DatasetID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + HDF_DatatypeID = GetDataType(); + + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_DatasetID = H5Dopen( HDF_FileID, DatasetName.c_str() ); + HDF_DataspaceID = H5Screate(H5S_SCALAR); + HDF_AttributeID = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID, + HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Dclose( HDF_DatasetID ); + H5Fclose( HDF_FileID ); +} + +template< typename T > +inline void HDFWriteSubGroupAttribute( const std::string Filename, const std::string GroupName, const std::string SubGroupName, const std::string ObjName, T +Data ) +{ + + hid_t + HDF_FileID, + HDF_GroupID, + HDF_SubGroupID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + HDF_DatatypeID = GetDataType(); + + + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + std::cerr << "opening " << GroupName.c_str() << std::endl; + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + std::cerr << "opening " << SubGroupName.c_str() << std::endl; + HDF_SubGroupID = H5Gopen( HDF_GroupID, SubGroupName.c_str() ); + HDF_DataspaceID = H5Screate(H5S_SCALAR); + HDF_AttributeID = H5Acreate(HDF_SubGroupID, ObjName.c_str(), HDF_DatatypeID, + HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Gclose( HDF_SubGroupID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); +} + +template<> +inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, std::string Data ) +{ + + hid_t + HDF_FileID, + HDF_GroupID, + HDF_AttributeID, + HDF_DataspaceID, + HDF_DatatypeID; + + HDF_DatatypeID = H5Tcopy( H5T_C_S1 ); + + H5Tset_size( HDF_DatatypeID, Data.size() ); + H5Tset_strpad(HDF_DatatypeID, H5T_STR_NULLPAD); + + HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); + HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); + HDF_DataspaceID = H5Screate(H5S_SCALAR); + HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, + HDF_DataspaceID, H5P_DEFAULT); + H5Awrite( HDF_AttributeID, HDF_DatatypeID, Data.c_str() ); + H5Aclose( HDF_AttributeID ); + H5Sclose( HDF_DataspaceID ); + H5Gclose( HDF_GroupID ); + H5Fclose( HDF_FileID ); +} + +#endif // USE_HDF5 diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh index b99c958..2facf80 100644 --- a/include/grid_interpolate.hh +++ b/include/grid_interpolate.hh @@ -5,6 +5,8 @@ #include +#include + template struct grid_interpolate { @@ -61,10 +63,15 @@ struct grid_interpolate MPI_Status status; status.MPI_ERROR = MPI_SUCCESS; - MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype(), sendto, + int err = MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype(), sendto, MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status); - assert(status.MPI_ERROR == MPI_SUCCESS); + if( err != MPI_SUCCESS ){ + char errstr[256]; int errlen=256; + MPI_Error_string(err, errstr, &errlen ); + music::elog << "MPI_ERROR #" << err << " : " << errstr << std::endl; + } + #endif } } @@ -92,10 +99,10 @@ struct grid_interpolate if( is_distributed_trait ){ size_t localix = ix-gridref.local_0_start_; - val += this->relem(localix, iy, iz) * tx * ty * tz; - val += this->relem(localix, iy, iz1) * tx * ty * dz; - val += this->relem(localix, iy1, iz) * tx * dy * tz; - val += this->relem(localix, iy1, iz1) * tx * dy * dz; + val += gridref.relem(localix, iy, iz) * tx * ty * tz; + val += gridref.relem(localix, iy, iz1) * tx * ty * dz; + val += gridref.relem(localix, iy1, iz) * tx * dy * tz; + val += gridref.relem(localix, iy1, iz1) * tx * dy * dz; if( localix+1 >= gridref.local_0_size_ ){ size_t localix1 = localix+1 - gridref.local_0_size_; @@ -105,23 +112,22 @@ struct grid_interpolate val += boundary_[(localix1*ny_+iy1)*nz_+iz1] * dx * dy * dz; }else{ size_t localix1 = localix+1; - val += this->relem(localix1, iy, iz) * dx * ty * tz; - val += this->relem(localix1, iy, iz1) * dx * ty * dz; - val += this->relem(localix1, iy1, iz) * dx * dy * tz; - val += this->relem(localix1, iy1, iz1) * dx * dy * dz; + val += gridref.relem(localix1, iy, iz) * dx * ty * tz; + val += gridref.relem(localix1, iy, iz1) * dx * ty * dz; + val += gridref.relem(localix1, iy1, iz) * dx * dy * tz; + val += gridref.relem(localix1, iy1, iz1) * dx * dy * dz; } }else{ size_t ix1 = (ix + 1) % nx_; - val += this->relem(ix, iy, iz) * tx * ty * tz; - val += this->relem(ix, iy, iz1) * tx * ty * dz; - val += this->relem(ix, iy1, iz) * tx * dy * tz; - val += this->relem(ix, iy1, iz1) * tx * dy * dz; - val += this->relem(ix1, iy, iz) * dx * ty * tz; - val += this->relem(ix1, iy, iz1) * dx * ty * dz; - val += this->relem(ix1, iy1, iz) * dx * dy * tz; - val += this->relem(ix1, iy1, iz1) * dx * dy * dz; + val += gridref.relem(ix, iy, iz) * tx * ty * tz; + val += gridref.relem(ix, iy, iz1) * tx * ty * dz; + val += gridref.relem(ix, iy1, iz) * tx * dy * tz; + val += gridref.relem(ix, iy1, iz1) * tx * dy * dz; + val += gridref.relem(ix1, iy, iz) * dx * ty * tz; + val += gridref.relem(ix1, iy, iz1) * dx * ty * dz; + val += gridref.relem(ix1, iy1, iz) * dx * dy * tz; + val += gridref.relem(ix1, iy1, iz1) * dx * dy * dz; } - return val; } @@ -131,8 +137,8 @@ struct grid_interpolate int get_task(const vec3 &x, const std::vector &local0starts) const noexcept { - auto it = std::lower_bound(local0starts.begin(), local0starts.end(), int(x[0])); - return std::distance(local0starts.begin(), it) - 1; + const auto it = std::upper_bound(local0starts.begin(), local0starts.end(), int(x[0])); + return std::distance(local0starts.begin(), it)-1; } void domain_decompose_pos(std::vector &pos) const noexcept @@ -144,12 +150,12 @@ struct grid_interpolate std::vector local0starts(MPI::get_size(), 0); MPI_Alltoall(&local_0_start, 1, MPI_INT, &local0starts[0], 1, MPI_INT, MPI_COMM_WORLD); - std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1) < get_task(x2); }); + std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1,local0starts) < get_task(x2,local0starts); }); std::vector sendcounts(MPI::get_size(), 0), sendoffsets(MPI::get_size(), 0); std::vector recvcounts(MPI::get_size(), 0), recvoffsets(MPI::get_size(), 0); for (auto x : pos) { - sendcounts[get_task(x)] += 3; + sendcounts[get_task(x,local0starts)] += 3; } // int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) @@ -170,14 +176,17 @@ struct grid_interpolate } } - ccomplex_t compensation_kernel( vec3 k ) const noexcept + ccomplex_t compensation_kernel( const vec3_t& k ) const noexcept { - auto sinc = []( real_t x ){ (std::abs(x)>1e-10)? std::sin(x)/x : 1.0; }; + auto sinc = []( real_t x ){ return (std::abs(x)>1e-10)? std::sin(x)/x : 1.0; }; real_t dfx = sinc(0.5*M_PI*k[0]/gridref.kny_[0]); real_t dfy = sinc(0.5*M_PI*k[1]/gridref.kny_[1]); real_t dfz = sinc(0.5*M_PI*k[2]/gridref.kny_[2]); real_t del = std::pow(dfx*dfy*dfz,1+interpolation_order); - return ccomplex_t(1.0) / del; + + real_t shift = 0.5 * k[0] * gridref.get_dx()[0] + 0.5 * k[1] * gridref.get_dx()[1] + 0.5 * k[2] * gridref.get_dx()[2]; + + return std::exp(ccomplex_t(0.0, shift)) / del; } void get_at(std::vector &pos, std::vector &val) const diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 3eb9e9b..e9a780a 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -10,6 +10,10 @@ #include #include +#if defined(USE_HDF5) +#include "HDF_IO.hh" +#endif + namespace particle { enum lattice{ @@ -39,32 +43,61 @@ const std::vector> second_lattice_shift = }; template -void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field, size_t num_p = 0 ){ - // number of modes present in the field - const size_t num_p_in_load = (lattice_type>=0)? field.local_size() : num_p; - // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load): - const size_t overload = 1ull<(0,lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc - // allocate memory for all local particles - particles.allocate( overload * num_p_in_load, b64reals, b64ids ); - // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well - for( size_t i=0,ipcount=0; i(0,lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc + // allocate memory for all local particles + particles.allocate( overload * num_p_in_load, b64reals, b64ids ); + // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well + for( size_t i=0,ipcount=0; i("setup","GlassFileName"); + + std::vector glass_dims; + HDFGetDatasetExtent( glass_fname, "/PartType1/Coordinates", glass_dims ); + music::ilog << "Glass file contains " << glass_dims[0] << " particles." << std::endl; + + size_t ntiles = cf.get_value("setup","GlassTiles"); + size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles / MPI::get_size(); + size_t off_p = MPI::get_rank() * num_p; + + particles.allocate( num_p, b64reals, b64ids ); + + for( size_t i=0; i -void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field ) +void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field, config_file& cf ) { // works only for Bravais types if( lattice_type >= 0 ){ @@ -96,36 +129,131 @@ void set_positions( container& particles, const lattice lattice_type, bool is_se } } }else{ +#if defined(USE_HDF5) + std::string glass_fname = cf.get_value("setup","GlassFileName"); + size_t ntiles = cf.get_value("setup","GlassTiles"); + + real_t lglassbox = 1.0; + HDFReadGroupAttribute( glass_fname, "Header", "BoxSize", lglassbox ); + std::vector glass_pos; + HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos ); + size_t np_in_file = glass_pos.size()/3; + size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size(); + size_t off_p = num_p * MPI::get_rank(); + + std::vector< std::array > glass_posr(num_p,{0.0,0.0,0.0}); + + std::array ng({field.n_[0],field.n_[1],field.n_[2]}); + + for( size_t i=0; i interp( field ); + + interp.domain_decompose_pos( glass_posr ); + + for( size_t i=0; i -void set_velocities(container &particles, lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field) +void set_velocities(container &particles, lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field, config_file& cf) { - const size_t num_p_in_load = field.local_size(); - for( int ishift=0; ishift<(1< 0){ - field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] ); - } - // read out values from phase shifted field and set assoc. particle's value - const auto ipcount0 = ishift * num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i= 0 ){ + const size_t num_p_in_load = field.local_size(); + for( int ishift=0; ishift<(1< 0){ + field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] ); + } + // read out values from phase shifted field and set assoc. particle's value + const auto ipcount0 = ishift * num_p_in_load; + for( size_t i=0,ipcount=ipcount0; i("setup","GlassFileName"); + size_t ntiles = cf.get_value("setup","GlassTiles"); + + real_t lglassbox = 1.0; + HDFReadGroupAttribute( glass_fname, "Header", "BoxSize", lglassbox ); + + std::vector glass_pos; + HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos ); + size_t np_in_file = glass_pos.size()/3; + size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size(); + size_t off_p = num_p * MPI::get_rank(); + + std::vector< std::array > glass_posr(num_p,{0.0,0.0,0.0}); + + std::array ng({field.n_[0],field.n_[1],field.n_[2]}); + + for( size_t i=0; i interp( field ); + + interp.domain_decompose_pos( glass_posr ); + + for( size_t i=0; iwrite_species_as( this_species ) == output_type::particles ) { // allocate particle structure and generate particle IDs - particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp ); + particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config ); } // write out positions @@ -576,6 +577,10 @@ int Run( config_file& the_config ) tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) ); + if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){ + tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k(i,j,k) ); + } + if( bDoBaryons ){ vec3_t kvec = phi.get_k(i,j,k); real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); @@ -599,7 +604,7 @@ int Run( config_file& the_config ) // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_positions( particles, lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp ); + particle::set_positions( particles, lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp, the_config ); } // otherwise write out the grid data directly to the output plugin // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian ) @@ -629,6 +634,10 @@ int Run( config_file& the_config ) tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) ); + if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){ + tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k(i,j,k) ); + } + if( bDoBaryons ){ vec3_t kvec = phi.get_k(i,j,k); real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); @@ -663,7 +672,7 @@ int Run( config_file& the_config ) // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_velocities( particles, lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp ); + particle::set_velocities( particles, lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp, the_config ); } // otherwise write out the grid data directly to the output plugin else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian ) diff --git a/src/plugins/random_panphasia.cc b/src/plugins/random_panphasia.cc index 632d7fc..1489f59 100644 --- a/src/plugins/random_panphasia.cc +++ b/src/plugins/random_panphasia.cc @@ -267,7 +267,7 @@ public: int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0); int level_p = d.wn_level_base + lextra; - // int ratio = 1 << lextra; + int ratio = 1 << lextra; lstate[mythread].layer_min = 0; lstate[mythread].layer_max = level_p; @@ -328,12 +328,6 @@ public: } } } - - // if (verbosity) - // { - // music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1, - // 1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2)); - // } } // end omp parallel region g0.FourierTransformForward(); @@ -369,8 +363,9 @@ public: auto y0(g0.kelem(i, j, k)), y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k)); - g0.kelem(i, j, k) = y0 * fx * fy * fz + sqrt3 * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) + - y4 * magnitude; + g0.kelem(i, j, k) = y0 * fx * fy * fz + + sqrt3 * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) + + y4 * magnitude; } else { @@ -409,7 +404,7 @@ public: int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0); int level_p = d.wn_level_base + lextra; - // int ratio = 1 << lextra; + int ratio = 1 << lextra; lstate[mythread].layer_min = 0; lstate[mythread].layer_max = level_p; From bae1701cb50a49843a03a14fe6770d055a1247f2 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 3 May 2020 16:15:40 +0200 Subject: [PATCH 123/130] fixed compiler errors when not using MPI --- include/particle_generator.hh | 20 +- src/plugins/HDF_IO.hh | 1085 --------------------------------- 2 files changed, 18 insertions(+), 1087 deletions(-) delete mode 100755 src/plugins/HDF_IO.hh diff --git a/include/particle_generator.hh b/include/particle_generator.hh index e9a780a..91a19d9 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -77,8 +77,13 @@ void initialize_lattice( container& particles, lattice lattice_type, const bool music::ilog << "Glass file contains " << glass_dims[0] << " particles." << std::endl; size_t ntiles = cf.get_value("setup","GlassTiles"); +#if defined(USE_MPI) size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles / MPI::get_size(); size_t off_p = MPI::get_rank() * num_p; +#else + size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles; + size_t off_p = 0; +#endif particles.allocate( num_p, b64reals, b64ids ); @@ -139,8 +144,13 @@ void set_positions( container& particles, const lattice lattice_type, bool is_se std::vector glass_pos; HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos ); size_t np_in_file = glass_pos.size()/3; +#if defined(USE_MPI) size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size(); - size_t off_p = num_p * MPI::get_rank(); + size_t off_p = MPI::get_rank() * num_p; +#else + size_t num_p = np_in_file * ntiles*ntiles*ntiles; + size_t off_p = 0; +#endif std::vector< std::array > glass_posr(num_p,{0.0,0.0,0.0}); @@ -218,8 +228,14 @@ void set_velocities(container &particles, lattice lattice_type, bool is_second_l std::vector glass_pos; HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos ); size_t np_in_file = glass_pos.size()/3; +#if defined(USE_MPI) size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size(); - size_t off_p = num_p * MPI::get_rank(); + size_t off_p = MPI::get_rank() * num_p; +#else + size_t num_p = np_in_file * ntiles*ntiles*ntiles; + size_t off_p = 0; +#endif + std::vector< std::array > glass_posr(num_p,{0.0,0.0,0.0}); diff --git a/src/plugins/HDF_IO.hh b/src/plugins/HDF_IO.hh deleted file mode 100755 index 965dac9..0000000 --- a/src/plugins/HDF_IO.hh +++ /dev/null @@ -1,1085 +0,0 @@ -#ifndef __HDF_IO_HH -#define __HDF_IO_HH - -#define H5_USE_16_API - -/* - HDF_IO.hh -- templated C++ HDF5 front-end functions, v1.2b - - Copyright (C) 2006-7 Oliver Hahn -- ojha@gmx.de - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . -*/ - -#include -#include -#include -#include -#include -#include -#include -#include "hdf5.h" - -template -hid_t GetDataType( void ) -{ - if( typeid(T) == typeid(int) ) - return H5T_NATIVE_INT; - - if( typeid(T) == typeid(unsigned) ) - return H5T_NATIVE_UINT; - - if( typeid(T) == typeid(float) ) - return H5T_NATIVE_FLOAT; - - if( typeid(T) == typeid(double) ) - return H5T_NATIVE_DOUBLE; - - if( typeid(T) == typeid(long long) ) - return H5T_NATIVE_LLONG; - - if( typeid(T) == typeid(unsigned long long) ) - return H5T_NATIVE_ULLONG; - - if( typeid(T) == typeid(size_t) ) - return H5T_NATIVE_ULLONG; - - - std::cerr << " - Error: [HDF_IO] trying to evaluate unsupported type in GetDataType\n\n"; - return -1; -} - -#include - -class HDFException : public std::runtime_error { - public: - HDFException( const std::string &errtxt ) : std::runtime_error(errtxt) { } -}; - - -inline bool DoesFileExist( std::string Filename ){ - bool flag = false; - std::fstream fin(Filename.c_str(),std::ios::in|std::ios::binary); - if( fin.is_open() ) - flag=true; - fin.close(); - return flag; -} - -inline void AssertFileOpen( std::string Filename ) -{ - if( !DoesFileExist( Filename ) ){ - std::fstream fout( Filename.c_str(), std::ios::out|std::ios::binary); - fout.close(); - } -} - -inline void HDFCreateFile( std::string Filename ) -{ - hid_t HDF_FileID; - HDF_FileID = H5Fcreate( Filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT ); - H5Fclose( HDF_FileID ); -} - -template< typename T> -inline void HDFReadVector( const std::string Filename, const std::string ObjName, std::vector &Data ) -{ - HDFReadDataset( Filename, ObjName, Data ); -} - - - - -inline void HDFGetDatasetExtent( const std::string Filename, const std::string ObjName, std::vector &Extent ) -{ - hid_t HDF_FileID, HDF_DatasetID, HDF_DataspaceID; - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); - - //... save old error handler - herr_t (*old_func)(void*); - void *old_client_data; - - H5Eget_auto(&old_func, &old_client_data); - - //... turn off error handling by hdf5 library - H5Eset_auto(NULL, NULL); - - //... probe dataset opening - HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); - - //... restore previous error handler - H5Eset_auto(old_func, old_client_data); - - //... dataset did not exist or was empty - if( HDF_DatasetID < 0 ){ - std::stringstream ss; - ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; - H5Fclose( HDF_FileID ); - throw HDFException(ss.str()); - return; - } - - //... get space associated with dataset and its extensions - HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); - - int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); - - hsize_t *dimsize = new hsize_t[ndims]; - - H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); - - Extent.clear(); - for(int i=0; i -inline void HDFReadDataset( const std::string Filename, const std::string ObjName, std::vector &Data ) -{ - - hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID; - hsize_t HDF_StorageSize; - - HDF_Type = GetDataType(); - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); - - - //... save old error handler - herr_t (*old_func)(void*); - void *old_client_data; - - H5Eget_auto(&old_func, &old_client_data); - - //... turn off error handling by hdf5 library - H5Eset_auto(NULL, NULL); - - //... probe dataset opening - HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); - - //... restore previous error handler - H5Eset_auto(old_func, old_client_data); - - //... dataset did not exist or was empty - if( HDF_DatasetID < 0 ){ - std::stringstream ss; - ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; - Data.clear(); - H5Fclose( HDF_FileID ); - throw HDFException(ss.str()); - return; - } - - //... get space associated with dataset and its extensions - HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); - - int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); - - hsize_t dimsize[ndims]; - - H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); - - HDF_StorageSize = 1; - for(int i=0; i -inline void HDFReadSelect( const std::string Filename, const std::string ObjName, const std::vector& ii, std::vector &Data ){ - - hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID; - hsize_t HDF_StorageSize; - - HDF_Type = GetDataType(); - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); - - - //... save old error handler - herr_t (*old_func)(void*); - void *old_client_data; - - H5Eget_auto(&old_func, &old_client_data); - - //... turn off error handling by hdf5 library - H5Eset_auto(NULL, NULL); - - //... probe dataset opening - HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); - - //... restore previous error handler - H5Eset_auto(old_func, old_client_data); - - //... dataset did not exist or was empty - if( HDF_DatasetID < 0 ){ - std::stringstream ss; - ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; - Data.clear(); - H5Fclose( HDF_FileID ); - throw HDFException(ss.str()); - } - - //... get space associated with dataset and its extensions - HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); - - hsize_t block[2]; - block[0] = ii.size(); - block[1] = 1; - - - Data.clear(); - Data.reserve( block[0]*block[1] ); - Data.assign( block[0]*block[1], (T)1 ); - - HDF_MemspaceID = H5Screate_simple( 2, block, NULL ); - // H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block ); - H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, ii.size(), (const hsize_t *)&ii[0] ); - - H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] ); - - H5Sclose( HDF_DataspaceID ); - H5Sclose( HDF_MemspaceID ); - H5Dclose( HDF_DatasetID ); - H5Fclose( HDF_FileID ); - -} - -template -inline void HDFReadVectorSelect( const std::string Filename, const std::string ObjName, const std::vector& ii, std::vector &Data ){ - - hid_t HDF_Type, HDF_FileID, HDF_DatasetID, HDF_DataspaceID, HDF_MemspaceID; -// hsize_t HDF_StorageSize; - - HDF_Type = GetDataType(); - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); - - - //... save old error handler - herr_t (*old_func)(void*); - void *old_client_data; - - H5Eget_auto(&old_func, &old_client_data); - - //... turn off error handling by hdf5 library - H5Eset_auto(NULL, NULL); - - //... probe dataset opening - HDF_DatasetID = H5Dopen( HDF_FileID, ObjName.c_str() ); - - //... restore previous error handler - H5Eset_auto(old_func, old_client_data); - - //... dataset did not exist or was empty - if( HDF_DatasetID < 0 ){ - std::stringstream ss; - ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; - Data.clear(); - H5Fclose( HDF_FileID ); - throw HDFException(ss.str()); - return; - } - - //... get space associated with dataset and its extensions - HDF_DataspaceID = H5Dget_space( HDF_DatasetID ); - int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); - hsize_t dimsize[ndims]; - H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); - - hsize_t block[2]; - block[0] = ii.size(); - block[1] = 3; - - std::vector coord; - for( unsigned i=0; i().swap(ii); - - - - - if( ii.size() == 0 ){ - std::cerr << "attempted to read empty block. skipping....\n"; - return; - } - //std::cerr << "starting 2 read...\n"; - H5Sselect_none( HDF_DataspaceID ); - if( H5Sselect_elements( HDF_DataspaceID, H5S_SELECT_SET, coord.size()/2, (const hsize_t *)&coord[0] ) < 0 )//(const hsize_t**)&coord[0] ) < 0 ) - std::cerr << " - could not select elements properly\n"; - - if(H5Sselect_valid( HDF_DataspaceID )<=0 ){ - std::cerr << "\n - sorry, invalid element selection in file \'"<< Filename.c_str() << "\'. \n - dumping 10 first indices...\n"; - - /*for( unsigned i=0; i<10; ++i ){ - for( unsigned k=0; k<3; ++k ){ - std::cerr << coord[3*i+k] << " "; - } - std::cerr << "\n"; - }*/ - - return; - } - - std::vector().swap(coord); - Data.assign( block[0]*block[1], (T)0 ); - HDF_MemspaceID = H5Screate_simple( 2, &block[0], NULL ); - - H5Dread( HDF_DatasetID, HDF_Type, HDF_MemspaceID, HDF_DataspaceID, H5P_DEFAULT, &Data[0] ); - - - H5Sclose( HDF_DataspaceID ); - H5Sclose( HDF_MemspaceID ); - H5Dclose( HDF_DatasetID ); - H5Fclose( HDF_FileID ); - -} - -template< typename T > -inline void HDFReadVectorSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector &Data ) -{ - hsize_t - offset[2], - stride[2], - count[2], - block[2]; - - hid_t MemspaceID, FilespaceID, DatasetID, FileID; - hid_t Type = GetDataType(); - - FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); - - - - //... save old error handler - herr_t (*old_func)(void*); - void *old_client_data; - - H5Eget_auto(&old_func, &old_client_data); - - //... turn off error handling by hdf5 library - H5Eset_auto(NULL, NULL); - - //... probe dataset opening - DatasetID = H5Dopen( FileID, ObjName.c_str() ); - - //... restore previous error handler - H5Eset_auto(old_func, old_client_data); - - //... dataset did not exist or was empty - if( DatasetID < 0 ){ - std::stringstream ss; - ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; - Data.clear(); - H5Fclose( FileID ); - throw HDFException(ss.str()); - return; - } - - FilespaceID = H5Dget_space( DatasetID ); - - offset[0] = nStart; - offset[1] = 0; - - count[0] = 1; - count[1] = 1; - - stride[0] = 1; - stride[1] = 1; - - block[0] = nCount; - block[1] = 3; - - - Data.clear(); - Data.reserve( block[0]*block[1] ); - Data.assign( block[0]*block[1], (T)1 ); - - MemspaceID = H5Screate_simple( 2, block, NULL ); - H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block ); - - H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] ); - - H5Sclose( FilespaceID ); - H5Sclose( MemspaceID ); - H5Dclose( DatasetID ); - H5Fclose( FileID ); -} - -template< typename T > -inline void HDFReadDatasetSlab( const std::string Filename, const std::string ObjName, unsigned nStart, unsigned nCount, std::vector &Data ) -{ - hsize_t - offset[2], - stride[2], - count[2], - block[2]; - - hid_t MemspaceID, FilespaceID, DatasetID, FileID; - hid_t Type = GetDataType(); - - FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); - - - - //... save old error handler - herr_t (*old_func)(void*); - void *old_client_data; - - H5Eget_auto(&old_func, &old_client_data); - - //... turn off error handling by hdf5 library - H5Eset_auto(NULL, NULL); - - //... probe dataset opening - DatasetID = H5Dopen( FileID, ObjName.c_str() ); - - //... restore previous error handler - H5Eset_auto(old_func, old_client_data); - - //... dataset did not exist or was empty - if( DatasetID < 0 ){ - std::stringstream ss; - ss << " - Warning: dataset \'" << ObjName.c_str() << "\' does not exist or is empty.\n"; - Data.clear(); - H5Fclose( FileID ); - throw HDFException(ss.str()); - return; - } - - FilespaceID = H5Dget_space( DatasetID ); - - offset[0] = nStart; - offset[1] = 0; - - count[0] = 1; - count[1] = 1; - - stride[0] = 1; - stride[1] = 1; - - block[0] = nCount; - block[1] = 1; - - - Data.clear(); - Data.reserve( block[0]*block[1] ); - Data.assign( block[0]*block[1], (T)1 ); - - MemspaceID = H5Screate_simple( 2, block, NULL ); - H5Sselect_hyperslab( FilespaceID, H5S_SELECT_SET, offset, stride, count, block ); - - H5Dread( DatasetID, Type, MemspaceID, FilespaceID, H5P_DEFAULT, &Data[0] ); - - H5Sclose( FilespaceID ); - H5Sclose( MemspaceID ); - H5Dclose( DatasetID ); - H5Fclose( FileID ); -} - -template< typename T> -inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T &Data ) -{ - - hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID; - // hsize_t HDF_StorageSize; - - HDF_Type = GetDataType(); - - //... save old error handler - herr_t (*old_func)(void*); - void *old_client_data; - - H5Eget_auto(&old_func, &old_client_data); - - //... turn off error handling by hdf5 library - H5Eset_auto(NULL, NULL); - - //... attempt to open attribute - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); - HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); - HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() ); - - if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){ - std::stringstream ss; - ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n"; - H5Fclose( HDF_FileID ); - throw HDFException(ss.str()); - return; - } - - - H5Aread( HDF_AttributeID, HDF_Type, &Data ); - - //... restore previous error handler - H5Eset_auto(old_func, old_client_data); - - - H5Aclose( HDF_AttributeID ); - H5Gclose( HDF_GroupID ); - H5Fclose( HDF_FileID ); - -} - -template< typename T> -inline void HDFReadGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, std::vector &Data ) -{ - - hid_t HDF_Type, HDF_FileID, HDF_GroupID, HDF_AttributeID, HDF_DataspaceID; - hsize_t HDF_StorageSize; - - HDF_Type = GetDataType(); - - //... save old error handler - herr_t (*old_func)(void*); - void *old_client_data; - - H5Eget_auto(&old_func, &old_client_data); - - //... turn off error handling by hdf5 library - H5Eset_auto(NULL, NULL); - - //... attempt to open attribute - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); - HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); - HDF_AttributeID = H5Aopen_name( HDF_GroupID, ObjName.c_str() ); - - if( HDF_FileID < 0 || HDF_GroupID < 0 || HDF_AttributeID < 0 ){ - std::stringstream ss; - ss << " - Warning: attribute \'" << GroupName.c_str() << "/" << ObjName.c_str() << "\' does not exist or is empty.\n"; - H5Fclose( HDF_FileID ); - throw HDFException(ss.str()); - return; - } - - //... get space associated with dataset and its extensions - HDF_DataspaceID = H5Aget_space( HDF_AttributeID ); - - int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); - - hsize_t dimsize[ndims]; - - H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); - - HDF_StorageSize = 1; - for(int i=0; i -inline void HDFWriteDataset( const std::string Filename, const std::string ObjName, const std::vector &Data ) -{ - - hid_t - HDF_FileID, - HDF_DatasetID, - HDF_DataspaceID, - HDF_Type; - - hsize_t HDF_Dims; - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - - HDF_Type = GetDataType(); - - HDF_Dims = Data.size(); - HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); - HDF_DatasetID = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type, - HDF_DataspaceID, H5P_DEFAULT ); - H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &Data[0] ); - H5Dclose( HDF_DatasetID ); - H5Sclose( HDF_DataspaceID ); - - H5Fclose( HDF_FileID ); -} - -template< typename T > -inline void HDFWriteGroupDataset( const std::string Filename, const std::string GrpName, const std::string ObjName, const std::vector &Data ) -{ - - hid_t - HDF_FileID, - HDF_GroupID, - HDF_DatasetID, - HDF_DataspaceID, - HDF_Type; - - hsize_t HDF_Dims; - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - - HDF_GroupID = H5Gopen( HDF_FileID, GrpName.c_str() ); - - HDF_Type = GetDataType(); - - HDF_Dims = Data.size(); - HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); - HDF_DatasetID = H5Dcreate( HDF_GroupID, ObjName.c_str(), HDF_Type, - HDF_DataspaceID, H5P_DEFAULT ); - H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &Data[0] ); - H5Dclose( HDF_DatasetID ); - H5Sclose( HDF_DataspaceID ); - - H5Gclose( HDF_GroupID ); - - H5Fclose( HDF_FileID ); -} - - -template< typename T > -inline void HDFWriteDataset2D( const std::string Filename, const std::string ObjName, const std::vector< std::vector > &Data ) -{ - - hid_t - HDF_FileID, - HDF_DatasetID, - HDF_DataspaceID, - HDF_Type; - - hsize_t HDF_Dims[2]; - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - - HDF_Type = GetDataType(); - - HDF_Dims[0] = Data.size(); - HDF_Dims[1] = Data[0].size(); - HDF_DataspaceID = H5Screate_simple(2, HDF_Dims, NULL); - HDF_DatasetID = H5Dcreate( HDF_FileID, ObjName.c_str(), HDF_Type, - HDF_DataspaceID, H5P_DEFAULT ); - - T *tmp = new T[HDF_Dims[0]*HDF_Dims[1]]; - - unsigned k=0; - for(unsigned i=0; i -inline void HDFWriteDataset3D( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data ) -{ - - hid_t - HDF_FileID, - HDF_DatasetID, - HDF_DataspaceID, - HDF_Type; - - hsize_t HDF_Dims[3]; - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - - HDF_Type = GetDataType(); - - HDF_Dims[0] = nd[0]; - HDF_Dims[1] = nd[1]; - HDF_Dims[2] = nd[2]; - - //std::cerr << nd[0]< -struct HDFHyperslabWriter3Ds -{ - hid_t dset_id_, type_id_, file_id_; - - HDFHyperslabWriter3Ds( const std::string Filename, const std::string ObjName, size_t nd[3] ) - { - hid_t filespace; - - hsize_t sizes[4] = { 1, nd[0], nd[1], nd[2] }; - - type_id_ = GetDataType(); - file_id_ = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - - //std::cerr << "creating filespace : 1 x " << nd[0] << " x " << nd[1] << " x " << nd[2] << std::endl; - filespace = H5Screate_simple( 4, sizes, NULL ); - dset_id_ = H5Dcreate( file_id_, ObjName.c_str(), type_id_, filespace, H5P_DEFAULT ); - - H5Sclose(filespace); - } - - ~HDFHyperslabWriter3Ds() - { - H5Dclose( dset_id_ ); - H5Fclose( file_id_ ); - } - - void write_slab( T* data, size_t* count, size_t* offset ) - { - - hsize_t counts[4] = { 1, count[0], count[1], count[2] }; - hsize_t offsets[4] = { 0, offset[0], offset[1], offset[2] }; - - hid_t filespace = H5Dget_space(dset_id_); - - //std::cerr << "creating memspace : 1 x " << count[0] << " x " << count[1] << " x " << count[2] << std::endl; - hid_t memspace = H5Screate_simple(4, counts, NULL); - H5Sselect_hyperslab( filespace, H5S_SELECT_SET, offsets, NULL, counts, NULL ); - - //herr_t status; - //status = - H5Dwrite(dset_id_, type_id_, memspace, filespace, H5P_DEFAULT, reinterpret_cast(data)); - H5Sclose(filespace); - H5Sclose(memspace); - } - -}; - - -template< typename T > -inline void HDFWriteDataset3Ds( const std::string Filename, const std::string ObjName, unsigned nd[3], const std::vector< T > &Data ) -{ - - hid_t - HDF_FileID, - HDF_DatasetID, - HDF_DataspaceID, - HDF_Type; - - hsize_t HDF_Dims[4]; - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - - HDF_Type = GetDataType(); - - HDF_Dims[0] = 1; - HDF_Dims[1] = nd[0]; - HDF_Dims[2] = nd[1]; - HDF_Dims[3] = nd[2]; - - //std::cerr << nd[0]< -inline void HDFWriteDatasetVector( const std::string Filename, const std::string ObjName, const std::vector &Data ) -{ - - hid_t - HDF_FileID, - HDF_DatasetID, - HDF_DataspaceID, - HDF_Type; - - hsize_t HDF_Dims[2]; - - // hsize_t HDF_Dims; - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - - HDF_Type = GetDataType(); - - HDF_Dims[0] = (hsize_t)(Data.size()/3); - HDF_Dims[1] = 3; - - if( Data.size() % 3 != 0 ){ - std::cerr << " - Warning: Trying to write vector data in HDFWriteDatasetVector\n" - << " but array length not divisible by 3!\n\n"; - - } - - HDF_DataspaceID = H5Screate_simple(2, HDF_Dims, NULL); - HDF_DatasetID = H5Dcreate( HDF_FileID, ObjName.c_str(), H5T_NATIVE_FLOAT, - HDF_DataspaceID, H5P_DEFAULT ); - H5Dwrite( HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &Data[0] ); - H5Dclose( HDF_DatasetID ); - H5Sclose( HDF_DataspaceID ); - - H5Fclose( HDF_FileID ); -} - -inline void HDFCreateGroup( const std::string Filename, const std::string GroupName ) -{ - hid_t HDF_FileID, HDF_GroupID; - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - HDF_GroupID = H5Gcreate( HDF_FileID, GroupName.c_str(), 0 ); - H5Gclose( HDF_GroupID ); - H5Fclose( HDF_FileID ); - -} - -inline void HDFCreateSubGroup( const std::string Filename, const std::string SuperGroupName, const std::string GroupName ) -{ - hid_t HDF_FileID, HDF_GroupID, HDF_SuperGroupID; - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - HDF_SuperGroupID = H5Gopen( HDF_FileID, SuperGroupName.c_str() ); - HDF_GroupID = H5Gcreate( HDF_SuperGroupID, GroupName.c_str(), 0 ); - H5Gclose( HDF_GroupID ); - H5Gclose( HDF_SuperGroupID ); - H5Fclose( HDF_FileID ); - -} - -template< typename T > -inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, const std::vector< T > &Data ) -{ - hid_t HDF_FileID, - HDF_GroupID, - HDF_AttributeID, - HDF_DataspaceID, - HDF_DatatypeID; - - hsize_t HDF_Dims; - - HDF_DatatypeID = GetDataType(); - - HDF_Dims = (hsize_t)(Data.size()); - - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); - HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); - - HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT); - H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] ); - H5Aclose( HDF_AttributeID ); - H5Sclose( HDF_DataspaceID ); - H5Gclose( HDF_GroupID ); - H5Fclose( HDF_FileID ); -} - -template< typename T > -inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, const std::vector< T > &Data ) -{ - hid_t HDF_FileID, - HDF_DatasetID, - HDF_AttributeID, - HDF_DataspaceID, - HDF_DatatypeID; - - hsize_t HDF_Dims; - - HDF_DatatypeID = GetDataType(); - - HDF_Dims = (hsize_t)(Data.size()); - - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - HDF_DatasetID = H5Dopen( HDF_FileID, DatasetName.c_str() ); - HDF_DataspaceID = H5Screate_simple(1, &HDF_Dims, NULL); - - HDF_AttributeID = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID, HDF_DataspaceID, H5P_DEFAULT); - H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data[0] ); - H5Aclose( HDF_AttributeID ); - H5Sclose( HDF_DataspaceID ); - H5Dclose( HDF_DatasetID ); - H5Fclose( HDF_FileID ); -} - - -template< typename T > -inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, T -Data ) -{ - - hid_t - HDF_FileID, - HDF_GroupID, - HDF_AttributeID, - HDF_DataspaceID, - HDF_DatatypeID; - - HDF_DatatypeID = GetDataType(); - - - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); - HDF_DataspaceID = H5Screate(H5S_SCALAR); - HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, - HDF_DataspaceID, H5P_DEFAULT); - H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data ); - H5Aclose( HDF_AttributeID ); - H5Sclose( HDF_DataspaceID ); - H5Gclose( HDF_GroupID ); - H5Fclose( HDF_FileID ); -} - -template< typename T > -inline void HDFWriteDatasetAttribute( const std::string Filename, const std::string DatasetName, const std::string ObjName, T Data ) -{ - - hid_t - HDF_FileID, - HDF_DatasetID, - HDF_AttributeID, - HDF_DataspaceID, - HDF_DatatypeID; - - HDF_DatatypeID = GetDataType(); - - - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - HDF_DatasetID = H5Dopen( HDF_FileID, DatasetName.c_str() ); - HDF_DataspaceID = H5Screate(H5S_SCALAR); - HDF_AttributeID = H5Acreate(HDF_DatasetID, ObjName.c_str(), HDF_DatatypeID, - HDF_DataspaceID, H5P_DEFAULT); - H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data ); - H5Aclose( HDF_AttributeID ); - H5Sclose( HDF_DataspaceID ); - H5Dclose( HDF_DatasetID ); - H5Fclose( HDF_FileID ); -} - -template< typename T > -inline void HDFWriteSubGroupAttribute( const std::string Filename, const std::string GroupName, const std::string SubGroupName, const std::string ObjName, T -Data ) -{ - - hid_t - HDF_FileID, - HDF_GroupID, - HDF_SubGroupID, - HDF_AttributeID, - HDF_DataspaceID, - HDF_DatatypeID; - - HDF_DatatypeID = GetDataType(); - - - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - std::cerr << "opening " << GroupName.c_str() << std::endl; - HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); - std::cerr << "opening " << SubGroupName.c_str() << std::endl; - HDF_SubGroupID = H5Gopen( HDF_GroupID, SubGroupName.c_str() ); - HDF_DataspaceID = H5Screate(H5S_SCALAR); - HDF_AttributeID = H5Acreate(HDF_SubGroupID, ObjName.c_str(), HDF_DatatypeID, - HDF_DataspaceID, H5P_DEFAULT); - H5Awrite( HDF_AttributeID, HDF_DatatypeID, &Data ); - H5Aclose( HDF_AttributeID ); - H5Sclose( HDF_DataspaceID ); - H5Gclose( HDF_SubGroupID ); - H5Gclose( HDF_GroupID ); - H5Fclose( HDF_FileID ); -} - -template<> -inline void HDFWriteGroupAttribute( const std::string Filename, const std::string GroupName, const std::string ObjName, std::string Data ) -{ - - hid_t - HDF_FileID, - HDF_GroupID, - HDF_AttributeID, - HDF_DataspaceID, - HDF_DatatypeID; - - HDF_DatatypeID = H5Tcopy( H5T_C_S1 ); - - H5Tset_size( HDF_DatatypeID, Data.size() ); - H5Tset_strpad(HDF_DatatypeID, H5T_STR_NULLPAD); - - HDF_FileID = H5Fopen( Filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); - HDF_GroupID = H5Gopen( HDF_FileID, GroupName.c_str() ); - HDF_DataspaceID = H5Screate(H5S_SCALAR); - HDF_AttributeID = H5Acreate(HDF_GroupID, ObjName.c_str(), HDF_DatatypeID, - HDF_DataspaceID, H5P_DEFAULT); - H5Awrite( HDF_AttributeID, HDF_DatatypeID, Data.c_str() ); - H5Aclose( HDF_AttributeID ); - H5Sclose( HDF_DataspaceID ); - H5Gclose( HDF_GroupID ); - H5Fclose( HDF_FileID ); -} -#endif From 226a9303db15eae71399a4817297f0a72b8d73e5 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 3 May 2020 21:20:22 +0200 Subject: [PATCH 124/130] bug fixes to glass with MPI --- include/HDF_IO.hh | 4 ++-- include/grid_interpolate.hh | 20 ++++++++++++-------- include/particle_generator.hh | 4 ++-- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/HDF_IO.hh b/include/HDF_IO.hh index 53b3f92..1b15b34 100755 --- a/include/HDF_IO.hh +++ b/include/HDF_IO.hh @@ -193,9 +193,9 @@ inline void HDFReadDataset( const std::string Filename, const std::string ObjNam int ndims = H5Sget_simple_extent_ndims( HDF_DataspaceID ); - hsize_t dimsize[ndims]; + std::vector dimsize(ndims,0); - H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + H5Sget_simple_extent_dims( HDF_DataspaceID, &dimsize[0], NULL ); HDF_StorageSize = 1; for(int i=0; i boundary_; const grid_t &gridref; + size_t nx_, ny_, nz_; explicit grid_interpolate(const grid_t &g) : gridref(g), nx_(g.n_[0]), ny_(g.n_[1]), nz_(g.n_[2]) @@ -50,7 +50,7 @@ struct grid_interpolate { for (size_t j = 0; j < ny; ++j) { - for (size_t k = 0; k < nx; ++k) + for (size_t k = 0; k < nz; ++k) { boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k); } @@ -98,7 +98,7 @@ struct grid_interpolate data_t val{0.0}; if( is_distributed_trait ){ - size_t localix = ix-gridref.local_0_start_; + ptrdiff_t localix = ix-gridref.local_0_start_; val += gridref.relem(localix, iy, iz) * tx * ty * tz; val += gridref.relem(localix, iy, iz1) * tx * ty * dz; val += gridref.relem(localix, iy1, iz) * tx * dy * tz; @@ -158,20 +158,24 @@ struct grid_interpolate sendcounts[get_task(x,local0starts)] += 3; } - // int MPI_Alltoall(const void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, MPI_Comm comm) MPI_Alltoall(&sendcounts[0], 1, MPI_INT, &recvcounts[0], 1, MPI_INT, MPI_COMM_WORLD); + size_t tot_receive = recvcounts[0], tot_send = sendcounts[0]; for (int i = 1; i < MPI::get_size(); ++i) { sendoffsets[i] = sendcounts[i - 1] + sendoffsets[i - 1]; recvoffsets[i] = recvcounts[i - 1] + recvoffsets[i - 1]; + tot_receive += recvcounts[i]; + tot_send += sendcounts[i]; } - // int MPI_Alltoallv(const void *sendbuf, const int *sendcounts, const int *sdispls, MPI_Datatype sendtype, void *recvbuf, - // const int *recvcounts, const int *rdispls, MPI_Datatype recvtype, MPI_Comm comm) + std::vector recvbuf; + recvbuf.assign(tot_receive,{0.,0.,0.}); MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI_data_t_type, - &pos[0], &recvcounts[0], &recvoffsets[0], MPI_data_t_type, MPI_COMM_WORLD); + &recvbuf[0], &recvcounts[0], &recvoffsets[0], MPI_data_t_type, MPI_COMM_WORLD); + + std::swap( pos, recvbuf ); #endif } } diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 91a19d9..d632d2d 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -154,7 +154,7 @@ void set_positions( container& particles, const lattice lattice_type, bool is_se std::vector< std::array > glass_posr(num_p,{0.0,0.0,0.0}); - std::array ng({field.n_[0],field.n_[1],field.n_[2]}); + std::array ng({real_t(field.n_[0]),real_t(field.n_[1]),real_t(field.n_[2])}); for( size_t i=0; i > glass_posr(num_p,{0.0,0.0,0.0}); - std::array ng({field.n_[0],field.n_[1],field.n_[2]}); + std::array ng({real_t(field.n_[0]),real_t(field.n_[1]),real_t(field.n_[2])}); for( size_t i=0; i Date: Mon, 4 May 2020 00:49:11 +0200 Subject: [PATCH 125/130] mpi bugfixes, refactoring of particle creation --- include/grid_interpolate.hh | 70 ++--- include/particle_generator.hh | 498 ++++++++++++++++++---------------- src/ic_generator.cc | 34 ++- 3 files changed, 311 insertions(+), 291 deletions(-) diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh index ac92cd6..c0ec9aa 100644 --- a/include/grid_interpolate.hh +++ b/include/grid_interpolate.hh @@ -16,19 +16,8 @@ struct grid_interpolate static constexpr bool is_distributed_trait = grid_t::is_distributed_trait; static constexpr int interpolation_order = interp_order; - -#if defined(USE_MPI) - const MPI_Datatype MPI_data_t_type = - (typeid(data_t) == typeid(float)) ? MPI_FLOAT - : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE - : (typeid(data_t) == typeid(long double)) ? MPI_LONG_DOUBLE - : (typeid(data_t) == typeid(std::complex)) ? MPI_C_FLOAT_COMPLEX - : (typeid(data_t) == typeid(std::complex)) ? MPI_C_DOUBLE_COMPLEX - : (typeid(data_t) == typeid(std::complex)) ? MPI_C_LONG_DOUBLE_COMPLEX - : MPI_INT; -#endif - std::vector boundary_; + std::vector local0starts_; const grid_t &gridref; size_t nx_, ny_, nz_; @@ -40,6 +29,13 @@ struct grid_interpolate if (is_distributed_trait) { #if defined(USE_MPI) + + int local_0_start = int(gridref.local_0_start_); + local0starts_.assign(MPI::get_size(), 0); + + MPI_Allgather(&local_0_start, 1, MPI_INT, &local0starts_[0], 1, MPI_INT, MPI_COMM_WORLD); + + //... exchange boundary size_t nx = interpolation_order + 1; size_t ny = g.n_[1]; size_t nz = g.n_[2]; @@ -96,6 +92,11 @@ struct grid_interpolate size_t iz1 = (iz + 1) % nz_; data_t val{0.0}; + + if( get_task(pos) != MPI::get_rank() ){ + std::cout << "task : " << MPI::get_rank() << " p@(" << pos[0] << ", " << pos[1] << ", " << pos[2] << ") belongs to task " << get_task(pos) << std::endl; + abort(); + } if( is_distributed_trait ){ ptrdiff_t localix = ix-gridref.local_0_start_; @@ -135,10 +136,10 @@ struct grid_interpolate // { // } - int get_task(const vec3 &x, const std::vector &local0starts) const noexcept + int get_task(const vec3 &x) const noexcept { - const auto it = std::upper_bound(local0starts.begin(), local0starts.end(), int(x[0])); - return std::distance(local0starts.begin(), it)-1; + const auto it = std::upper_bound(local0starts_.begin(), local0starts_.end(), int(x[0])); + return std::distance(local0starts_.begin(), it)-1; } void domain_decompose_pos(std::vector &pos) const noexcept @@ -146,16 +147,12 @@ struct grid_interpolate if (is_distributed_trait) { #if defined(USE_MPI) - int local_0_start = int(gridref.local_0_start_); - std::vector local0starts(MPI::get_size(), 0); - MPI_Alltoall(&local_0_start, 1, MPI_INT, &local0starts[0], 1, MPI_INT, MPI_COMM_WORLD); - - std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1,local0starts) < get_task(x2,local0starts); }); + std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1) < get_task(x2); }); std::vector sendcounts(MPI::get_size(), 0), sendoffsets(MPI::get_size(), 0); std::vector recvcounts(MPI::get_size(), 0), recvoffsets(MPI::get_size(), 0); for (auto x : pos) { - sendcounts[get_task(x,local0starts)] += 3; + sendcounts[get_task(x)] += 3; } MPI_Alltoall(&sendcounts[0], 1, MPI_INT, &recvcounts[0], 1, MPI_INT, MPI_COMM_WORLD); @@ -169,13 +166,12 @@ struct grid_interpolate tot_send += sendcounts[i]; } - std::vector recvbuf; - recvbuf.assign(tot_receive,{0.,0.,0.}); + std::vector recvbuf(tot_receive/3,{0.,0.,0.}); - MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI_data_t_type, - &recvbuf[0], &recvcounts[0], &recvoffsets[0], MPI_data_t_type, MPI_COMM_WORLD); + MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI::get_datatype(), + &recvbuf[0], &recvcounts[0], &recvoffsets[0], MPI::get_datatype(), MPI_COMM_WORLD); - std::swap( pos, recvbuf ); + pos.swap( recvbuf ); #endif } } @@ -193,26 +189,4 @@ struct grid_interpolate return std::exp(ccomplex_t(0.0, shift)) / del; } - void get_at(std::vector &pos, std::vector &val) const - { - - val.assign( pos.size(), data_t{0.0} ); - - for( size_t i=0; i> > lattice_shifts = -{ - // first shift must always be zero! (otherwise set_positions and set_velocities break) - /* SC : */ {{0.0,0.0,0.0}}, - /* BCC: */ {{0.0,0.0,0.0},{0.5,0.5,0.5}}, - /* FCC: */ {{0.0,0.0,0.0},{0.0,0.5,0.5},{0.5,0.0,0.5},{0.5,0.5,0.0}}, - /* RSC: */ {{0.0,0.0,0.0},{0.0,0.0,0.5},{0.0,0.5,0.0},{0.0,0.5,0.5},{0.5,0.0,0.0},{0.5,0.0,0.5},{0.5,0.5,0.0},{0.5,0.5,0.5}}, -}; - -const std::vector> second_lattice_shift = +namespace particle { - /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice - /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!? - /* FCC: */ {0.5, 0.5, 0.5}, // this corresponds to NaCl lattice - // /* FCC: */ {0.25, 0.25, 0.25}, // this corresponds to Zincblende/GaAs lattice - /* RSC: */ {0.25, 0.25, 0.25}, -}; + using vec3 = std::array; -template -void initialize_lattice( container& particles, lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t& field, config_file& cf ){ - if( lattice_type != lattice_glass ) + enum lattice { - // number of modes present in the field - const size_t num_p_in_load = field.local_size(); - // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load): - const size_t overload = 1ull<(0,lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc - // allocate memory for all local particles - particles.allocate( overload * num_p_in_load, b64reals, b64ids ); - // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well - for( size_t i=0,ipcount=0; i>> lattice_shifts = + { + // first shift must always be zero! (otherwise set_positions and set_velocities break) + /* SC : */ {{0.0, 0.0, 0.0}}, + /* BCC: */ {{0.0, 0.0, 0.0}, {0.5, 0.5, 0.5}}, + /* FCC: */ {{0.0, 0.0, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}}, + /* RSC: */ {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.5}, {0.0, 0.5, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.0}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}, {0.5, 0.5, 0.5}}, + }; + + const std::vector> second_lattice_shift = + { + /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice + /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!? + /* FCC: */ {0.5, 0.5, 0.5}, // this corresponds to NaCl lattice + // /* FCC: */ {0.25, 0.25, 0.25}, // this corresponds to Zincblende/GaAs lattice + /* RSC: */ {0.25, 0.25, 0.25}, + }; + + template + class lattice_generator { + protected: + + struct glass + { + using data_t = typename field_t::data_t; + size_t num_p, off_p; + grid_interpolate<1, field_t> interp_; + std::vector glass_posr; + + glass( config_file& cf, const field_t &field ) + : num_p(0), off_p(0), interp_( field ) + { + std::vector glass_pos; + real_t lglassbox = 1.0; + + std::string glass_fname = cf.get_value("setup", "GlassFileName"); + size_t ntiles = cf.get_value("setup", "GlassTiles"); + #if defined(USE_HDF5) - std::string glass_fname = cf.get_value("setup","GlassFileName"); - - std::vector glass_dims; - HDFGetDatasetExtent( glass_fname, "/PartType1/Coordinates", glass_dims ); - music::ilog << "Glass file contains " << glass_dims[0] << " particles." << std::endl; - - size_t ntiles = cf.get_value("setup","GlassTiles"); + HDFReadGroupAttribute(glass_fname, "Header", "BoxSize", lglassbox); + HDFReadDataset(glass_fname, "/PartType1/Coordinates", glass_pos); +#else + throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile."); +#endif + + size_t np_in_file = glass_pos.size() / 3; #if defined(USE_MPI) - size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles / MPI::get_size(); - size_t off_p = MPI::get_rank() * num_p; + num_p = np_in_file * ntiles * ntiles * ntiles / MPI::get_size(); + off_p = MPI::get_rank() * num_p; #else - size_t num_p = glass_dims[0] * ntiles*ntiles*ntiles; - size_t off_p = 0; + num_p = np_in_file * ntiles * ntiles * ntiles; + off_p = 0; #endif - particles.allocate( num_p, b64reals, b64ids ); + glass_posr.assign(num_p, {0.0, 0.0, 0.0}); - for( size_t i=0; i ng({real_t(field.n_[0]), real_t(field.n_[1]), real_t(field.n_[2])}); + + for (size_t i = 0; i < num_p; ++i) + { + size_t idxpart = off_p + i; + size_t idx_in_glass = idxpart % np_in_file; + size_t idxtile = idxpart / np_in_file; + size_t tile_z = idxtile % (ntiles * ntiles); + size_t tile_y = ((idxtile - tile_z) / ntiles) % ntiles; + size_t tile_x = (((idxtile - tile_z) / ntiles) - tile_y) / ntiles; + glass_posr[i][0] = std::fmod((glass_pos[3 * idx_in_glass + 0] / lglassbox + real_t(tile_x)) / ntiles * ng[0] + ng[0], ng[0]); + glass_posr[i][1] = std::fmod((glass_pos[3 * idx_in_glass + 1] / lglassbox + real_t(tile_y)) / ntiles * ng[1] + ng[1], ng[1]); + glass_posr[i][2] = std::fmod((glass_pos[3 * idx_in_glass + 2] / lglassbox + real_t(tile_z)) / ntiles * ng[2] + ng[2], ng[2]); + } + +#if defined(USE_MPI) + interp_.domain_decompose_pos(glass_posr); + + num_p = glass_posr.size(); + std::vector all_num_p( MPI::get_size(), 0 ); + MPI_Allgather( &num_p, 1, MPI_UNSIGNED_LONG_LONG, &all_num_p[0], 1, MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD ); + off_p = 0; + for( int itask=0; itask<=MPI::get_rank(); ++itask ){ + off_p += all_num_p[itask]; + } #endif - } -} - -// invalidates field, phase shifted to unspecified position after return -template -void set_positions( container& particles, const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t& field, config_file& cf ) -{ - // works only for Bravais types - if( lattice_type >= 0 ){ - const size_t num_p_in_load = field.local_size(); - for( int ishift=0; ishift<(1<0 ){ - field.shift_field( lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift-1] ); + data_t get_at( const vec3& x ) const noexcept + { + return interp_.get_cic_at( x ); } - // read out values from phase shifted field and set assoc. particle's value - const auto ipcount0 = ishift * num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,lattice_shifts[lattice_type][ishift] - + (is_second_lattice? second_lattice_shift[lattice_type] : vec3_t{0.,0.,0.}) ); - if( b64reals ){ - particles.set_pos64( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - }else{ - particles.set_pos32( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); + + size_t size() const noexcept + { + return num_p; + } + + size_t offset() const noexcept + { + return off_p; + } + }; + + std::unique_ptr glass_ptr_; + + private: + particle::container particles_; + + public: + lattice_generator(lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t &field, config_file &cf) + { + if (lattice_type != lattice_glass) + { + // number of modes present in the field + const size_t num_p_in_load = field.local_size(); + // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load): + const size_t overload = 1ull << std::max(0, lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc + // allocate memory for all local particles + particles_.allocate(overload * num_p_in_load, b64reals, b64ids); + // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well + for (size_t i = 0, ipcount = 0; i < field.size(0); ++i) + { + for (size_t j = 0; j < field.size(1); ++j) + { + for (size_t k = 0; k < field.size(2); ++k, ++ipcount) + { + for (size_t iload = 0; iload < overload; ++iload) + { + if (b64ids) + { + particles_.set_id64(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload); + } + else + { + particles_.set_id32(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload); + } + } } } } } - } - }else{ -#if defined(USE_HDF5) - std::string glass_fname = cf.get_value("setup","GlassFileName"); - size_t ntiles = cf.get_value("setup","GlassTiles"); - - real_t lglassbox = 1.0; - HDFReadGroupAttribute( glass_fname, "Header", "BoxSize", lglassbox ); + else + { + glass_ptr_ = std::make_unique( cf, field ); + particles_.allocate(glass_ptr_->size(), b64reals, b64ids); - std::vector glass_pos; - HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos ); - size_t np_in_file = glass_pos.size()/3; -#if defined(USE_MPI) - size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size(); - size_t off_p = MPI::get_rank() * num_p; -#else - size_t num_p = np_in_file * ntiles*ntiles*ntiles; - size_t off_p = 0; -#endif - - std::vector< std::array > glass_posr(num_p,{0.0,0.0,0.0}); - - std::array ng({real_t(field.n_[0]),real_t(field.n_[1]),real_t(field.n_[2])}); - - for( size_t i=0; i interp( field ); - - interp.domain_decompose_pos( glass_posr ); - - for( size_t i=0; i -void set_velocities(container &particles, lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field, config_file& cf) -{ - // works only for Bravais types - if( lattice_type >= 0 ){ - const size_t num_p_in_load = field.local_size(); - for( int ishift=0; ishift<(1< 0){ - field.shift_field( lattice_shifts[lattice_type][ishift]-lattice_shifts[lattice_type][ishift-1] ); - } - // read out values from phase shifted field and set assoc. particle's value - const auto ipcount0 = ishift * num_p_in_load; - for( size_t i=0,ipcount=ipcount0; isize(); ++i) + { + if (b64ids) + { + particles_.set_id64(i, IDoffset + i + glass_ptr_->offset()); + } + else + { + particles_.set_id32(i, IDoffset + i + glass_ptr_->offset()); } } } } - }else{ -#if defined(USE_HDF5) - std::string glass_fname = cf.get_value("setup","GlassFileName"); - size_t ntiles = cf.get_value("setup","GlassTiles"); - - real_t lglassbox = 1.0; - HDFReadGroupAttribute( glass_fname, "Header", "BoxSize", lglassbox ); - std::vector glass_pos; - HDFReadDataset( glass_fname, "/PartType1/Coordinates", glass_pos ); - size_t np_in_file = glass_pos.size()/3; -#if defined(USE_MPI) - size_t num_p = np_in_file * ntiles*ntiles*ntiles / MPI::get_size(); - size_t off_p = MPI::get_rank() * num_p; -#else - size_t num_p = np_in_file * ntiles*ntiles*ntiles; - size_t off_p = 0; -#endif + // invalidates field, phase shifted to unspecified position after return + void set_positions(const lattice lattice_type, bool is_second_lattice, int idim, real_t lunit, const bool b64reals, field_t &field, config_file &cf) + { + // works only for Bravais types + if (lattice_type >= 0) + { + const size_t num_p_in_load = field.local_size(); + for (int ishift = 0; ishift < (1 << lattice_type); ++ishift) + { + // if we are dealing with the secondary lattice, apply a global shift + if (ishift == 0 && is_second_lattice) + { + field.shift_field(second_lattice_shift[lattice_type]); + } - - std::vector< std::array > glass_posr(num_p,{0.0,0.0,0.0}); - - std::array ng({real_t(field.n_[0]),real_t(field.n_[1]),real_t(field.n_[2])}); - - for( size_t i=0; i interp( field ); - - interp.domain_decompose_pos( glass_posr ); - - for( size_t i=0; i 0) + { + field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]); + } + // read out values from phase shifted field and set assoc. particle's value + const auto ipcount0 = ishift * num_p_in_load; + for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i) + { + for (size_t j = 0; j < field.size(1); ++j) + { + for (size_t k = 0; k < field.size(2); ++k) + { + auto pos = field.template get_unit_r_shifted(i, j, k, lattice_shifts[lattice_type][ishift] + (is_second_lattice ? second_lattice_shift[lattice_type] : vec3_t{0., 0., 0.})); + if (b64reals) + { + particles_.set_pos64(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k)); + } + else + { + particles_.set_pos32(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k)); + } + } + } + } + } + } + else + { + for (size_t i = 0; i < this->glass_ptr_->size(); ++i) + { + auto pos = this->glass_ptr_->glass_posr[i]; + real_t disp = this->glass_ptr_->get_at(pos); + if (b64reals) + { + particles_.set_pos64(i, idim, pos[idim] / field.n_[idim] * lunit + disp); + } + else + { + particles_.set_pos32(i, idim, pos[idim] / field.n_[idim] * lunit + disp); + } + } } } - -#else - throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile."); -#endif - } -} + void set_velocities(lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field, config_file &cf) + { + // works only for Bravais types + if (lattice_type >= 0) + { + const size_t num_p_in_load = field.local_size(); + for (int ishift = 0; ishift < (1 << lattice_type); ++ishift) + { + // if we are dealing with the secondary lattice, apply a global shift + if (ishift == 0 && is_second_lattice) + { + field.shift_field(second_lattice_shift[lattice_type]); + } + // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift + if (ishift > 0) + { + field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]); + } + // read out values from phase shifted field and set assoc. particle's value + const auto ipcount0 = ishift * num_p_in_load; + for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i) + { + for (size_t j = 0; j < field.size(1); ++j) + { + for (size_t k = 0; k < field.size(2); ++k) + { + if (b64reals) + { + particles_.set_vel64(ipcount++, idim, field.relem(i, j, k)); + } + else + { + particles_.set_vel32(ipcount++, idim, field.relem(i, j, k)); + } + } + } + } + } + } + else + { + for (size_t i = 0; i < glass_ptr_->size(); ++i) + { + auto pos = glass_ptr_->glass_posr[i]; + real_t vel = glass_ptr_->get_at(pos); + if (b64reals) + { + particles_.set_vel64(i, idim, vel); + } + else + { + particles_.set_vel32(i, idim, vel); + } + } + } + } -} // end namespace particles + const particle::container& get_particles() const noexcept{ + return particles_; + } + + }; // struct lattice + +} // namespace particle diff --git a/src/ic_generator.cc b/src/ic_generator.cc index 8352007..f677551 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -452,6 +452,19 @@ int Run( config_file& the_config ) // temporary storage of data Grid_FFT tmp({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + std::unique_ptr>> particle_lattice_generator_ptr; + + // if output plugin wants particles, then we need to store them, along with their IDs + if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) + { + // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits + size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ; + + // allocate particle structure and generate particle IDs + particle_lattice_generator_ptr = + std::make_unique>>( lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config ); + } + //if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_eulerian ){ if( the_output_plugin->write_species_as(this_species) == output_type::field_eulerian ) @@ -542,22 +555,21 @@ int Run( config_file& the_config ) //=================================================================================== // we store displacements and velocities here if we compute them //=================================================================================== - particle::container particles; + bool shifted_lattice = (this_species == cosmo_species::baryon && the_output_plugin->write_species_as(this_species) == output_type::particles) ? true : false; - // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits - size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ; + grid_interpolate<1,Grid_FFT> interp( tmp ); // if output plugin wants particles, then we need to store them, along with their IDs - if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) - { - // allocate particle structure and generate particle IDs - particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config ); - } + // if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) + // { + // // allocate particle structure and generate particle IDs + // particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config ); + // } // write out positions for( int idim=0; idim<3; ++idim ){ @@ -604,7 +616,7 @@ int Run( config_file& the_config ) // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_positions( particles, lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp, the_config ); + particle_lattice_generator_ptr->set_positions( lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp, the_config ); } // otherwise write out the grid data directly to the output plugin // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian ) @@ -672,7 +684,7 @@ int Run( config_file& the_config ) // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_velocities( particles, lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp, the_config ); + particle_lattice_generator_ptr->set_velocities( lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp, the_config ); } // otherwise write out the grid data directly to the output plugin else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian ) @@ -684,7 +696,7 @@ int Run( config_file& the_config ) if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - the_output_plugin->write_particle_data( particles, this_species, Omega[this_species] ); + the_output_plugin->write_particle_data( particle_lattice_generator_ptr->get_particles(), this_species, Omega[this_species] ); } if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian ) From 68aa31c59a61835eabf08f9a358c54f3ac8c126a Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 4 May 2020 01:13:07 +0200 Subject: [PATCH 126/130] fixed forgotten boundary update --- include/grid_interpolate.hh | 68 ++++++++++++++++++----------------- include/particle_generator.hh | 15 ++++++-- 2 files changed, 48 insertions(+), 35 deletions(-) diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh index c0ec9aa..c37fafb 100644 --- a/include/grid_interpolate.hh +++ b/include/grid_interpolate.hh @@ -28,48 +28,52 @@ struct grid_interpolate if (is_distributed_trait) { -#if defined(USE_MPI) + update_ghosts( g ); + } + } - int local_0_start = int(gridref.local_0_start_); - local0starts_.assign(MPI::get_size(), 0); + void update_ghosts( const grid_t &g ) + { + #if defined(USE_MPI) - MPI_Allgather(&local_0_start, 1, MPI_INT, &local0starts_[0], 1, MPI_INT, MPI_COMM_WORLD); + int local_0_start = int(gridref.local_0_start_); + local0starts_.assign(MPI::get_size(), 0); - //... exchange boundary - size_t nx = interpolation_order + 1; - size_t ny = g.n_[1]; - size_t nz = g.n_[2]; + MPI_Allgather(&local_0_start, 1, MPI_INT, &local0starts_[0], 1, MPI_INT, MPI_COMM_WORLD); - boundary_.assign(nx * ny * nz, data_t{0.0}); + //... exchange boundary + size_t nx = interpolation_order + 1; + size_t ny = g.n_[1]; + size_t nz = g.n_[2]; - for (size_t i = 0; i < nx; ++i) + boundary_.assign(nx * ny * nz, data_t{0.0}); + + for (size_t i = 0; i < nx; ++i) + { + for (size_t j = 0; j < ny; ++j) { - for (size_t j = 0; j < ny; ++j) + for (size_t k = 0; k < nz; ++k) { - for (size_t k = 0; k < nz; ++k) - { - boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k); - } + boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k); } } - - int sendto = (MPI::get_rank() + MPI::get_size() - 1) % MPI::get_size(); - int recvfrom = (MPI::get_rank() + MPI::get_size() + 1) % MPI::get_size(); - - MPI_Status status; - status.MPI_ERROR = MPI_SUCCESS; - - int err = MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype(), sendto, - MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status); - - if( err != MPI_SUCCESS ){ - char errstr[256]; int errlen=256; - MPI_Error_string(err, errstr, &errlen ); - music::elog << "MPI_ERROR #" << err << " : " << errstr << std::endl; - } - -#endif } + + int sendto = (MPI::get_rank() + MPI::get_size() - 1) % MPI::get_size(); + int recvfrom = (MPI::get_rank() + MPI::get_size() + 1) % MPI::get_size(); + + MPI_Status status; + status.MPI_ERROR = MPI_SUCCESS; + + int err = MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype(), sendto, + MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status); + + if( err != MPI_SUCCESS ){ + char errstr[256]; int errlen=256; + MPI_Error_string(err, errstr, &errlen ); + music::elog << "MPI_ERROR #" << err << " : " << errstr << std::endl; + } +#endif } data_t get_ngp_at(const std::array &pos, std::vector &val) const noexcept diff --git a/include/particle_generator.hh b/include/particle_generator.hh index ec00903..1dec028 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -82,6 +82,8 @@ namespace particle off_p = 0; #endif + music::ilog << "Glass file contains " << np_in_file << " particles." << std::endl; + glass_posr.assign(num_p, {0.0, 0.0, 0.0}); std::array ng({real_t(field.n_[0]), real_t(field.n_[1]), real_t(field.n_[2])}); @@ -112,6 +114,11 @@ namespace particle #endif } + void update_ghosts( const field_t &field ) + { + interp_.update_ghosts( field ); + } + data_t get_at( const vec3& x ) const noexcept { return interp_.get_cic_at( x ); @@ -229,10 +236,11 @@ namespace particle } else { - for (size_t i = 0; i < this->glass_ptr_->size(); ++i) + glass_ptr_->update_ghosts( field ); + for (size_t i = 0; i < glass_ptr_->size(); ++i) { - auto pos = this->glass_ptr_->glass_posr[i]; - real_t disp = this->glass_ptr_->get_at(pos); + auto pos = glass_ptr_->glass_posr[i]; + real_t disp = glass_ptr_->get_at(pos); if (b64reals) { particles_.set_pos64(i, idim, pos[idim] / field.n_[idim] * lunit + disp); @@ -286,6 +294,7 @@ namespace particle } else { + glass_ptr_->update_ghosts( field ); for (size_t i = 0; i < glass_ptr_->size(); ++i) { auto pos = glass_ptr_->glass_posr[i]; From 0a6f8f51cbaa14c28a88a616093fc7b14d4b3422 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 4 May 2020 02:09:03 +0200 Subject: [PATCH 127/130] fixed bug in grid shifting with MPI (affects bcc and fcc) --- include/grid_fft.hh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index c11d1b3..8097b36 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -774,12 +774,7 @@ public: { FourierTransformForward(); apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { - real_t shift; - if( bdistributed ){ - shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; - }else{ - shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; - } + real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; return x * std::exp(ccomplex_t(0.0, shift)); }); if( transform_back ){ From fc448a21a0c8c0d071542309e270c9f093217623 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 4 May 2020 02:09:42 +0200 Subject: [PATCH 128/130] some cleanup / compilation problem fixes --- include/grid_interpolate.hh | 5 ----- include/particle_generator.hh | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh index c37fafb..5304fab 100644 --- a/include/grid_interpolate.hh +++ b/include/grid_interpolate.hh @@ -96,11 +96,6 @@ struct grid_interpolate size_t iz1 = (iz + 1) % nz_; data_t val{0.0}; - - if( get_task(pos) != MPI::get_rank() ){ - std::cout << "task : " << MPI::get_rank() << " p@(" << pos[0] << ", " << pos[1] << ", " << pos[2] << ") belongs to task " << get_task(pos) << std::endl; - abort(); - } if( is_distributed_trait ){ ptrdiff_t localix = ix-gridref.local_0_start_; diff --git a/include/particle_generator.hh b/include/particle_generator.hh index 1dec028..5fe68d1 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -88,6 +88,7 @@ namespace particle std::array ng({real_t(field.n_[0]), real_t(field.n_[1]), real_t(field.n_[2])}); + #pragma omp parallel for for (size_t i = 0; i < num_p; ++i) { size_t idxpart = off_p + i; @@ -152,6 +153,7 @@ namespace particle // allocate memory for all local particles particles_.allocate(overload * num_p_in_load, b64reals, b64ids); // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well + for (size_t i = 0, ipcount = 0; i < field.size(0); ++i) { for (size_t j = 0; j < field.size(1); ++j) @@ -178,6 +180,7 @@ namespace particle glass_ptr_ = std::make_unique( cf, field ); particles_.allocate(glass_ptr_->size(), b64reals, b64ids); + #pragma omp parallel for for (size_t i = 0; i < glass_ptr_->size(); ++i) { if (b64ids) @@ -237,6 +240,7 @@ namespace particle else { glass_ptr_->update_ghosts( field ); + #pragma omp parallel for for (size_t i = 0; i < glass_ptr_->size(); ++i) { auto pos = glass_ptr_->glass_posr[i]; @@ -295,6 +299,7 @@ namespace particle else { glass_ptr_->update_ghosts( field ); + #pragma omp parallel for for (size_t i = 0; i < glass_ptr_->size(); ++i) { auto pos = glass_ptr_->glass_posr[i]; From 658a8ad41853fe9422a25982a90e73a953ac5eba Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Mon, 4 May 2020 10:34:58 +0200 Subject: [PATCH 129/130] cleanup --- include/grid_fft.hh | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/include/grid_fft.hh b/include/grid_fft.hh index 8097b36..e07e5ed 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -28,17 +28,8 @@ public: static constexpr bool is_distributed_trait{bdistributed}; protected: -#if defined(USE_MPI) - const MPI_Datatype MPI_data_t_type = - (typeid(data_t) == typeid(float)) ? MPI_FLOAT - : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE - : (typeid(data_t) == typeid(long double)) ? MPI_LONG_DOUBLE - : (typeid(data_t) == typeid(std::complex)) ? MPI_C_FLOAT_COMPLEX - : (typeid(data_t) == typeid(std::complex)) ? MPI_C_DOUBLE_COMPLEX - : (typeid(data_t) == typeid(std::complex)) ? MPI_C_LONG_DOUBLE_COMPLEX - : MPI_INT; -#endif using grid_fft_t = Grid_FFT; + public: std::array n_, nhalf_; std::array sizes_; From 0937242a1b9cc17578b9d611c8ed5c916cdd6768 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Tue, 5 May 2020 18:21:04 +0200 Subject: [PATCH 130/130] class submodule update --- external/class | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/class b/external/class index 055d8bc..6adecae 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit 055d8bca371631da0c51ff167ce81905996b4ca2 +Subproject commit 6adecae2f30172a94e003155090791abf509d995