mirror of
https://github.com/cosmo-sims/monofonIC.git
synced 2024-09-19 17:03:45 +02:00
Merge branch 'dev-plt' of https://bitbucket.org/ohahn/monofonic into develop
This commit is contained in:
commit
a2f4cbb434
20 changed files with 3459 additions and 329 deletions
|
@ -5,7 +5,7 @@ project(monofonIC)
|
|||
# include class submodule
|
||||
include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake)
|
||||
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address")
|
||||
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic")
|
||||
find_package(PkgConfig REQUIRED)
|
||||
|
||||
|
@ -49,17 +49,22 @@ if(ENABLE_MPI)
|
|||
endif(ENABLE_MPI)
|
||||
|
||||
|
||||
########################################################################################################################
|
||||
# FFTW
|
||||
cmake_policy(SET CMP0074 NEW)
|
||||
if(POLICY CMP0074)
|
||||
cmake_policy(SET CMP0074 NEW)
|
||||
endif()
|
||||
if(ENABLE_MPI)
|
||||
find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI)
|
||||
else()
|
||||
find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS)
|
||||
endif(ENABLE_MPI)
|
||||
|
||||
########################################################################################################################
|
||||
# GSL
|
||||
find_package(GSL REQUIRED)
|
||||
|
||||
########################################################################################################################
|
||||
# HDF5
|
||||
find_package(HDF5 REQUIRED)
|
||||
|
||||
|
|
28
README.md
28
README.md
|
@ -17,4 +17,30 @@ Create build directory, configure, and build:
|
|||
make
|
||||
|
||||
this should create an executable in the build directory.
|
||||
There is an example parameter file 'example.conf' in the main directory
|
||||
|
||||
If you run into problems with CMake not being able to find your local FFTW3 or HDF5 installation, it is best to give the path directly as
|
||||
|
||||
FFTW3_ROOT=<path> HDF5_ROOT=<path> ccmake ..
|
||||
|
||||
make sure to delete previous files generated by CMake before reconfiguring like this.
|
||||
|
||||
If you want to build on macOS, then it is strongly recommended to use GNU (or Intel) compilers instead of Apple's Clang. Install them e.g.
|
||||
via homebrew and then configure cmake to use them instead of the macOS default compiler via
|
||||
|
||||
CC=gcc-9 CXX=g++-9 ccmake ..
|
||||
|
||||
This is necessary since Apple's compilers haven't supported OpenMP for years.
|
||||
|
||||
## Running
|
||||
|
||||
There is an example parameter file 'example.conf' in the main directory. Possible options are explained in it, it can be run
|
||||
as a simple argument, e.g. from within the build directory:
|
||||
|
||||
./monofonic ../example.conf
|
||||
|
||||
If you want to run with MPI, you need to enable MPI support via ccmake. Then you can launch in hybrid MPI+threads mode by
|
||||
specifying the desired number of threads per task in the config file, and the number of tasks to be launched via
|
||||
|
||||
mpirun -np 16 ./monofonic <path to config file>
|
||||
|
||||
It will then run with 16 tasks times the number of threads per task specified in the config file.
|
42
example.conf
42
example.conf
|
@ -1,23 +1,25 @@
|
|||
[setup]
|
||||
# number of grid cells per linear dimension for calculations = particles for sc initial load
|
||||
GridRes = 128
|
||||
GridRes = 128
|
||||
# length of the box in Mpc/h
|
||||
BoxLength = 250
|
||||
BoxLength = 250
|
||||
# starting redshift
|
||||
zstart = 100.0
|
||||
zstart = 49.0
|
||||
# order of the LPT to be used (1,2 or 3)
|
||||
LPTorder = 3
|
||||
LPTorder = 3
|
||||
# also do baryon ICs?
|
||||
DoBaryons = no
|
||||
DoBaryons = no
|
||||
# do mode fixing à la Angulo&Pontzen
|
||||
DoFixing = no
|
||||
DoFixing = no
|
||||
# particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
|
||||
ParticleLoad = sc
|
||||
ParticleLoad = sc
|
||||
|
||||
[cosmology]
|
||||
transfer = CLASS
|
||||
ztarget = 100.0
|
||||
#transfer = eisenstein
|
||||
ztarget = 0.0
|
||||
# transfer = eisenstein
|
||||
# transfer = file_CAMB
|
||||
# transfer_file = wmap5_transfer_out_z0.dat
|
||||
Omega_m = 0.302
|
||||
Omega_b = 0.045
|
||||
Omega_L = 0.698
|
||||
|
@ -26,9 +28,9 @@ sigma_8 = 0.811
|
|||
nspec = 0.961
|
||||
|
||||
# anisotropic large scale tidal field
|
||||
#LSS_aniso_lx = 0.1
|
||||
#LSS_aniso_ly = 0.1
|
||||
#LSS_aniso_lz = -0.2
|
||||
#LSS_aniso_lx = +0.1
|
||||
#LSS_aniso_ly = +0.1
|
||||
#LSS_aniso_lz = -0.2
|
||||
|
||||
[random]
|
||||
generator = NGENIC
|
||||
|
@ -37,18 +39,18 @@ seed = 9001
|
|||
[testing]
|
||||
# enables diagnostic output
|
||||
# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
|
||||
test = convergence
|
||||
test = none
|
||||
|
||||
[execution]
|
||||
NumThreads = 4
|
||||
NumThreads = 4
|
||||
|
||||
[output]
|
||||
fname_hdf5 = output.hdf5
|
||||
fbase_analysis = output
|
||||
fname_hdf5 = output_sch.hdf5
|
||||
fbase_analysis = output
|
||||
|
||||
format = gadget2
|
||||
filename = ics_gadget.dat
|
||||
UseLongids = false
|
||||
format = gadget2
|
||||
filename = ics_gadget.dat
|
||||
UseLongids = false
|
||||
|
||||
#format = generic
|
||||
#filename = debug.hdf5
|
||||
|
@ -58,5 +60,3 @@ UseLongids = false
|
|||
#filename = ics_ramses
|
||||
#grafic_use_SPT = yes
|
||||
|
||||
|
||||
|
||||
|
|
33
example_testing.conf
Normal file
33
example_testing.conf
Normal file
|
@ -0,0 +1,33 @@
|
|||
[setup]
|
||||
GridRes = 256
|
||||
BoxLength = 6.28318530718
|
||||
zstart = 0.0
|
||||
LPTorder = 1
|
||||
SymplecticPT = no
|
||||
DoFixing = no
|
||||
|
||||
[execution]
|
||||
NumThreads = 4
|
||||
|
||||
[output]
|
||||
fname_hdf5 = output.hdf5
|
||||
fbase_analysis = output
|
||||
#format = gadget2
|
||||
#filename = ics_gadget.dat
|
||||
format = generic
|
||||
filename = debug.hdf5
|
||||
generic_out_eulerian = yes
|
||||
|
||||
[random]
|
||||
generator = NGENIC
|
||||
seed = 9001
|
||||
|
||||
[cosmology]
|
||||
#transfer = CLASS
|
||||
transfer = eisenstein
|
||||
Omega_m = 1.0
|
||||
Omega_b = 0.045
|
||||
Omega_L = 0.0
|
||||
H0 = 70.3
|
||||
sigma_8 = 0.811
|
||||
nspec = 0.961
|
1
external/fftwpp
vendored
Submodule
1
external/fftwpp
vendored
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit ec6b82cc1122ba029a7a7142cf836014e992e68c
|
62
ics.conf
Normal file
62
ics.conf
Normal file
|
@ -0,0 +1,62 @@
|
|||
[setup]
|
||||
# number of grid cells per linear dimension for calculations = particles for sc initial load
|
||||
GridRes = 128
|
||||
# length of the box in Mpc/h
|
||||
BoxLength = 200
|
||||
# starting redshift
|
||||
zstart = 24.0
|
||||
# order of the LPT to be used (1,2 or 3)
|
||||
LPTorder = 1
|
||||
# also do baryon ICs?
|
||||
DoBaryons = no
|
||||
# do mode fixing à la Angulo&Pontzen
|
||||
DoFixing = yes
|
||||
# particle load, can be 'sc' (1x), 'bcc' (2x), 'fcc' (4x), or 'rsc' (8x)
|
||||
ParticleLoad = sc
|
||||
|
||||
[testing]
|
||||
# enables diagnostic output
|
||||
# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
|
||||
#test = potentials_and_densities
|
||||
#test = convergence
|
||||
test = none
|
||||
|
||||
[execution]
|
||||
NumThreads = 1
|
||||
|
||||
[output]
|
||||
fname_hdf5 = output.hdf5
|
||||
fbase_analysis = output
|
||||
|
||||
#format = gadget2
|
||||
#filename = ics_gadget.dat
|
||||
|
||||
format = generic
|
||||
filename = debug.hdf5
|
||||
#generic_out_eulerian = yes
|
||||
|
||||
#format = grafic2
|
||||
#filename = ics_ramses
|
||||
#grafic_use_SPT = yes
|
||||
|
||||
[random]
|
||||
generator = NGENIC
|
||||
seed = 9001
|
||||
|
||||
[cosmology]
|
||||
transfer = eisenstein
|
||||
#transfer = CLASS
|
||||
#transfer = eisenstein_wdm
|
||||
#WDMmass = 0.1
|
||||
Omega_m = 0.302
|
||||
Omega_b = 0.045
|
||||
Omega_L = 0.698
|
||||
H0 = 70.3
|
||||
sigma_8 = 0.811
|
||||
nspec = 0.961
|
||||
|
||||
# anisotropic large scale tidal field
|
||||
#LSS_aniso_lx = 0.1
|
||||
#LSS_aniso_ly = 0.1
|
||||
#LSS_aniso_lz = -0.2
|
||||
|
|
@ -127,14 +127,19 @@ public:
|
|||
return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect);
|
||||
}
|
||||
|
||||
inline static double H_of_a(double a, void *Params)
|
||||
inline static double H_of_a(double a, const void *Params)
|
||||
{
|
||||
CosmologyParameters *cosm = (CosmologyParameters *)Params;
|
||||
const CosmologyParameters *cosm = (CosmologyParameters *)Params;
|
||||
double a2 = a * a;
|
||||
double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a));
|
||||
return Ha;
|
||||
}
|
||||
|
||||
inline double H_of_a( double a ) const
|
||||
{
|
||||
return 100.0 * this->H_of_a(a,reinterpret_cast<const void*>(&this->cosmo_param_));
|
||||
}
|
||||
|
||||
inline static double Hprime_of_a(double a, void *Params)
|
||||
{
|
||||
CosmologyParameters *cosm = (CosmologyParameters *)Params;
|
||||
|
@ -168,10 +173,7 @@ public:
|
|||
*/
|
||||
inline real_t CalcGrowthRate( real_t a )
|
||||
{
|
||||
#warning CalcGrowthRate is only correct if dark energy is a cosmological constant, need to upgrade calculator...
|
||||
real_t y = cosmo_param_.Omega_m*(1.0/a-1.0) + cosmo_param_.Omega_DE*(a*a-1.0) + 1.0;
|
||||
real_t fact = integrate( &fIntegrand, 1e-6, a, (void*)&cosmo_param_ );
|
||||
return (cosmo_param_.Omega_DE*a*a-0.5*cosmo_param_.Omega_m/a)/y - 1.0 + a*fIntegrand(a,(void*)&cosmo_param_)/fact;
|
||||
return CalcVFact(a) / H_of_a(a) / a;
|
||||
}
|
||||
|
||||
//! Computes the linear theory growth factor D+
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
#include <fftw3.h>
|
||||
#endif
|
||||
|
||||
#include <config_file.hh>
|
||||
|
||||
#ifdef USE_SINGLEPRECISION
|
||||
using real_t = float;
|
||||
using complex_t = fftwf_complex;
|
||||
|
|
|
@ -16,16 +16,23 @@ enum space_t
|
|||
};
|
||||
|
||||
|
||||
template <typename data_t>
|
||||
#ifdef USE_MPI
|
||||
template <typename data_t, bool bdistributed=true>
|
||||
#else
|
||||
template <typename data_t, bool bdistributed=false>
|
||||
#endif
|
||||
class Grid_FFT
|
||||
{
|
||||
protected:
|
||||
#if defined(USE_MPI)
|
||||
const MPI_Datatype MPI_data_t_type = (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
|
||||
: (typeid(data_t) == typeid(float)) ? MPI_FLOAT
|
||||
: (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
|
||||
: (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX : MPI_INT;
|
||||
const MPI_Datatype MPI_data_t_type =
|
||||
(typeid(data_t) == typeid(double)) ? MPI_DOUBLE
|
||||
: (typeid(data_t) == typeid(float)) ? MPI_FLOAT
|
||||
: (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
|
||||
: (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX
|
||||
: MPI_INT;
|
||||
#endif
|
||||
using grid_fft_t = Grid_FFT<data_t,bdistributed>;
|
||||
public:
|
||||
std::array<size_t, 3> n_, nhalf_;
|
||||
std::array<size_t, 4> sizes_;
|
||||
|
@ -54,7 +61,7 @@ public:
|
|||
}
|
||||
|
||||
// avoid implicit copying of data
|
||||
Grid_FFT(const Grid_FFT<data_t> &g) = delete;
|
||||
Grid_FFT(const grid_fft_t &g) = delete;
|
||||
|
||||
~Grid_FFT()
|
||||
{
|
||||
|
@ -64,10 +71,15 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
const Grid_FFT<data_t> *get_grid(size_t ilevel) const { return this; }
|
||||
const grid_fft_t *get_grid(size_t ilevel) const { return this; }
|
||||
|
||||
bool is_distributed( void ) const { return bdistributed; }
|
||||
|
||||
void Setup();
|
||||
|
||||
//! return the number of data_t elements that we store in the container
|
||||
size_t memsize( void ) const { return ntot_; }
|
||||
|
||||
//! return the (local) size of dimension i
|
||||
size_t size(size_t i) const { return sizes_[i]; }
|
||||
|
||||
|
@ -91,7 +103,7 @@ public:
|
|||
data_[i] = 0.0;
|
||||
}
|
||||
|
||||
void copy_from(const Grid_FFT<data_t> &g)
|
||||
void copy_from(const grid_fft_t &g)
|
||||
{
|
||||
// make sure the two fields are in the same space
|
||||
if (g.space_ != this->space_)
|
||||
|
@ -217,32 +229,89 @@ public:
|
|||
vec3<ft> get_k(const size_t i, const size_t j, const size_t k) const
|
||||
{
|
||||
vec3<ft> kk;
|
||||
|
||||
#if defined(USE_MPI)
|
||||
auto ip = i + local_1_start_;
|
||||
kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
|
||||
kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1];
|
||||
#else
|
||||
kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
|
||||
kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
|
||||
#endif
|
||||
if( bdistributed ){
|
||||
auto ip = i + local_1_start_;
|
||||
kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
|
||||
kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1];
|
||||
}else{
|
||||
kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
|
||||
kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
|
||||
}
|
||||
kk[2] = (real_t(k) - real_t(k > nhalf_[2]) * n_[2]) * kfac_[2];
|
||||
|
||||
return kk;
|
||||
}
|
||||
|
||||
std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const
|
||||
{
|
||||
return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
|
||||
}
|
||||
|
||||
data_t get_cic( const vec3<real_t>& v ) const{
|
||||
// warning! this doesn't work with MPI
|
||||
vec3<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
|
||||
std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
|
||||
std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] });
|
||||
size_t ix = static_cast<size_t>(x.x);
|
||||
size_t iy = static_cast<size_t>(x.y);
|
||||
size_t iz = static_cast<size_t>(x.z);
|
||||
real_t dx = x.x-real_t(ix), tx = 1.0-dx;
|
||||
real_t dy = x.y-real_t(iy), ty = 1.0-dy;
|
||||
real_t dz = x.z-real_t(iz), tz = 1.0-dz;
|
||||
size_t ix1 = (ix+1)%n_[0];
|
||||
size_t iy1 = (iy+1)%n_[1];
|
||||
size_t iz1 = (iz+1)%n_[2];
|
||||
data_t val = 0.0;
|
||||
val += this->relem(ix ,iy ,iz ) * tx * ty * tz;
|
||||
val += this->relem(ix ,iy ,iz1) * tx * ty * dz;
|
||||
val += this->relem(ix ,iy1,iz ) * tx * dy * tz;
|
||||
val += this->relem(ix ,iy1,iz1) * tx * dy * dz;
|
||||
val += this->relem(ix1,iy ,iz ) * dx * ty * tz;
|
||||
val += this->relem(ix1,iy ,iz1) * dx * ty * dz;
|
||||
val += this->relem(ix1,iy1,iz ) * dx * dy * tz;
|
||||
val += this->relem(ix1,iy1,iz1) * dx * dy * dz;
|
||||
return val;
|
||||
}
|
||||
|
||||
ccomplex_t get_cic_kspace( const vec3<real_t>& x ) const{
|
||||
// warning! this doesn't work with MPI
|
||||
size_t ix = static_cast<size_t>(x.x);
|
||||
size_t iy = static_cast<size_t>(x.y);
|
||||
size_t iz = std::min(static_cast<size_t>(x.z),size(2)-1); //static_cast<size_t>(x.z);
|
||||
real_t dx = x.x-real_t(ix), tx = 1.0-dx;
|
||||
real_t dy = x.y-real_t(iy), ty = 1.0-dy;
|
||||
real_t dz = x.z-real_t(iz), tz = 1.0-dz;
|
||||
size_t ix1 = (ix+1)%size(0);
|
||||
size_t iy1 = (iy+1)%size(1);
|
||||
size_t iz1 = std::min((iz+1),size(2)-1);
|
||||
ccomplex_t val = 0.0;
|
||||
val += this->kelem(ix ,iy ,iz ) * tx * ty * tz;
|
||||
val += this->kelem(ix ,iy ,iz1) * tx * ty * dz;
|
||||
val += this->kelem(ix ,iy1,iz ) * tx * dy * tz;
|
||||
val += this->kelem(ix ,iy1,iz1) * tx * dy * dz;
|
||||
val += this->kelem(ix1,iy ,iz ) * dx * ty * tz;
|
||||
val += this->kelem(ix1,iy ,iz1) * dx * ty * dz;
|
||||
val += this->kelem(ix1,iy1,iz ) * dx * dy * tz;
|
||||
val += this->kelem(ix1,iy1,iz1) * dx * dy * dz;
|
||||
// if( val != val ){
|
||||
//auto k = this->get_k<real_t>(ix,iy,iz);
|
||||
//std::cerr << ix << " " << iy << " " << iz << " " << val << " " << this->gradient(0,{ix,iy,iz}) << " " << this->gradient(1,{ix,iy,iz}) << " " << this->gradient(2,{ix,iy,iz}) << std::endl;
|
||||
// }
|
||||
return val;
|
||||
}
|
||||
|
||||
inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
|
||||
{
|
||||
#if defined(USE_MPI)
|
||||
ijk[0] += local_1_start_;
|
||||
std::swap(ijk[0],ijk[1]);
|
||||
#endif
|
||||
if( bdistributed ){
|
||||
ijk[0] += local_1_start_;
|
||||
std::swap(ijk[0],ijk[1]);
|
||||
}
|
||||
real_t rgrad =
|
||||
(ijk[idim]!=nhalf_[idim])? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_[idim]) * n_[idim]) * kfac_[idim] : 0.0;
|
||||
return ccomplex_t(0.0,rgrad);
|
||||
}
|
||||
|
||||
Grid_FFT<data_t> &operator*=(data_t x)
|
||||
grid_fft_t &operator*=(data_t x)
|
||||
{
|
||||
if (space_ == kspace_id)
|
||||
{
|
||||
|
@ -255,7 +324,7 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
Grid_FFT<data_t> &operator/=(data_t x)
|
||||
grid_fft_t &operator/=(data_t x)
|
||||
{
|
||||
if (space_ == kspace_id)
|
||||
{
|
||||
|
@ -268,7 +337,7 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
Grid_FFT<data_t> &apply_Laplacian(void)
|
||||
grid_fft_t &apply_Laplacian(void)
|
||||
{
|
||||
this->FourierTransformForward();
|
||||
this->apply_function_k_dep([&](auto x, auto k) {
|
||||
|
@ -279,7 +348,7 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
Grid_FFT<data_t> &apply_negative_Laplacian(void)
|
||||
grid_fft_t &apply_negative_Laplacian(void)
|
||||
{
|
||||
this->FourierTransformForward();
|
||||
this->apply_function_k_dep([&](auto x, auto k) {
|
||||
|
@ -290,7 +359,7 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
Grid_FFT<data_t> &apply_InverseLaplacian(void)
|
||||
grid_fft_t &apply_InverseLaplacian(void)
|
||||
{
|
||||
this->FourierTransformForward();
|
||||
this->apply_function_k_dep([&](auto x, auto k) {
|
||||
|
@ -338,8 +407,7 @@ public:
|
|||
double compute_2norm(void)
|
||||
{
|
||||
real_t sum1{0.0};
|
||||
#pragma omp parallel for reduction(+ \
|
||||
: sum1)
|
||||
#pragma omp parallel for reduction(+ : sum1)
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -363,8 +431,7 @@ public:
|
|||
double sum1{0.0}, sum2{0.0};
|
||||
size_t count{0};
|
||||
|
||||
#pragma omp parallel for reduction(+ \
|
||||
: sum1, sum2)
|
||||
#pragma omp parallel for reduction(+ : sum1, sum2)
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -380,24 +447,26 @@ public:
|
|||
count = sizes_[0] * sizes_[1] * sizes_[2];
|
||||
|
||||
#ifdef USE_MPI
|
||||
double globsum1{0.0}, globsum2{0.0};
|
||||
size_t globcount{0};
|
||||
if( bdistributed ){
|
||||
double globsum1{0.0}, globsum2{0.0};
|
||||
size_t globcount{0};
|
||||
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
|
||||
reinterpret_cast<void *>(&globsum1),
|
||||
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
|
||||
reinterpret_cast<void *>(&globsum1),
|
||||
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&sum2),
|
||||
reinterpret_cast<void *>(&globsum2),
|
||||
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&sum2),
|
||||
reinterpret_cast<void *>(&globsum2),
|
||||
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&count),
|
||||
reinterpret_cast<void *>(&globcount),
|
||||
1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&count),
|
||||
reinterpret_cast<void *>(&globcount),
|
||||
1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
sum1 = globsum1;
|
||||
sum2 = globsum2;
|
||||
count = globcount;
|
||||
sum1 = globsum1;
|
||||
sum2 = globsum2;
|
||||
count = globcount;
|
||||
}
|
||||
#endif
|
||||
sum1 /= count;
|
||||
sum2 /= count;
|
||||
|
@ -410,8 +479,7 @@ public:
|
|||
double sum1{0.0};
|
||||
size_t count{0};
|
||||
|
||||
#pragma omp parallel for reduction(+ \
|
||||
: sum1)
|
||||
#pragma omp parallel for reduction(+ : sum1)
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -426,19 +494,21 @@ public:
|
|||
count = sizes_[0] * sizes_[1] * sizes_[2];
|
||||
|
||||
#ifdef USE_MPI
|
||||
double globsum1{0.0};
|
||||
size_t globcount{0};
|
||||
if( bdistributed ){
|
||||
double globsum1{0.0};
|
||||
size_t globcount{0};
|
||||
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
|
||||
reinterpret_cast<void *>(&globsum1),
|
||||
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
|
||||
reinterpret_cast<void *>(&globsum1),
|
||||
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&count),
|
||||
reinterpret_cast<void *>(&globcount),
|
||||
1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
|
||||
MPI_Allreduce(reinterpret_cast<const void *>(&count),
|
||||
reinterpret_cast<void *>(&globcount),
|
||||
1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
sum1 = globsum1;
|
||||
count = globcount;
|
||||
sum1 = globsum1;
|
||||
count = globcount;
|
||||
}
|
||||
#endif
|
||||
|
||||
sum1 /= count;
|
||||
|
@ -449,9 +519,9 @@ public:
|
|||
template <typename functional, typename grid_t>
|
||||
void assign_function_of_grids_r(const functional &f, const grid_t &g)
|
||||
{
|
||||
assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
|
||||
assert(g.size(0) == size(0) && g.size(1) == size(1));
|
||||
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -470,10 +540,10 @@ public:
|
|||
template <typename functional, typename grid1_t, typename grid2_t>
|
||||
void assign_function_of_grids_r(const functional &f, const grid1_t &g1, const grid2_t &g2)
|
||||
{
|
||||
assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g1.size(2) == size(2));
|
||||
assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
|
||||
assert(g1.size(0) == size(0) && g1.size(1) == size(1));
|
||||
assert(g2.size(0) == size(0) && g2.size(1) == size(1));
|
||||
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -499,7 +569,7 @@ public:
|
|||
assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
|
||||
assert(g3.size(0) == size(0) && g3.size(1) == size(1)); // && g3.size(2) == size(2));
|
||||
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -524,7 +594,7 @@ public:
|
|||
{
|
||||
assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
|
||||
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -546,7 +616,7 @@ public:
|
|||
assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
|
||||
assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
|
||||
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -563,12 +633,12 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
template <typename functional, typename grid1_t>
|
||||
void assign_function_of_grids_kdep(const functional &f, const grid1_t &g)
|
||||
template <typename functional, typename grid_t>
|
||||
void assign_function_of_grids_kdep(const functional &f, const grid_t &g)
|
||||
{
|
||||
assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
|
||||
|
||||
#pragma omp parallel for
|
||||
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -577,7 +647,7 @@ public:
|
|||
{
|
||||
auto &elem = this->kelem(i, j, k);
|
||||
const auto &elemg = g.kelem(i, j, k);
|
||||
|
||||
|
||||
elem = f(this->get_k<real_t>(i, j, k), elemg);
|
||||
}
|
||||
}
|
||||
|
@ -587,15 +657,15 @@ public:
|
|||
template <typename functional, typename grid1_t, typename grid2_t>
|
||||
void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2)
|
||||
{
|
||||
assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
|
||||
assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
|
||||
assert(g1.size(0) == size(0) && g1.size(1) == size(1) && g1.size(2) == size(2) );
|
||||
assert(g2.size(0) == size(0) && g2.size(1) == size(1) && g2.size(2) == size(2) );
|
||||
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < size(0); ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
for (size_t j = 0; j < size(1); ++j)
|
||||
{
|
||||
for (size_t k = 0; k < sizes_[2]; ++k)
|
||||
for (size_t k = 0; k < size(2); ++k)
|
||||
{
|
||||
auto &elem = this->kelem(i, j, k);
|
||||
const auto &elemg1 = g1.kelem(i, j, k);
|
||||
|
@ -610,7 +680,7 @@ public:
|
|||
template <typename functional>
|
||||
void apply_function_k_dep(const functional &f)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -627,7 +697,7 @@ public:
|
|||
template <typename functional>
|
||||
void apply_function_r_dep(const functional &f)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
|
@ -657,27 +727,28 @@ public:
|
|||
|
||||
void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3);
|
||||
|
||||
void shift_field( const vec3<real_t>& s )
|
||||
void shift_field( const vec3<real_t>& s, bool transform_back=true )
|
||||
{
|
||||
FourierTransformForward();
|
||||
apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
|
||||
#ifdef WITH_MPI
|
||||
real_t shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
|
||||
#else
|
||||
real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
|
||||
#endif
|
||||
real_t shift;
|
||||
if( bdistributed ){
|
||||
shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
|
||||
}else{
|
||||
shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
|
||||
}
|
||||
return x * std::exp(ccomplex_t(0.0, shift));
|
||||
});
|
||||
FourierTransformBackward();
|
||||
if( transform_back ){
|
||||
FourierTransformBackward();
|
||||
}
|
||||
}
|
||||
|
||||
void zero_DC_mode(void)
|
||||
{
|
||||
if (space_ == kspace_id)
|
||||
{
|
||||
#ifdef USE_MPI
|
||||
if (CONFIG::MPI_task_rank == 0)
|
||||
#endif
|
||||
if (CONFIG::MPI_task_rank == 0 || !bdistributed )
|
||||
cdata_[0] = (data_t)0.0;
|
||||
}
|
||||
else
|
||||
|
@ -694,12 +765,14 @@ public:
|
|||
}
|
||||
}
|
||||
}
|
||||
if( bdistributed ){
|
||||
#if defined(USE_MPI)
|
||||
data_t glob_sum = 0.0;
|
||||
MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
|
||||
1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
|
||||
sum = glob_sum;
|
||||
data_t glob_sum = 0.0;
|
||||
MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
|
||||
1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
|
||||
sum = glob_sum;
|
||||
#endif
|
||||
}
|
||||
sum /= sizes_[0] * sizes_[1] * sizes_[2];
|
||||
|
||||
#pragma omp parallel for
|
||||
|
|
171
include/mat3.hh
Normal file
171
include/mat3.hh
Normal file
|
@ -0,0 +1,171 @@
|
|||
#include <gsl/gsl_math.h>
|
||||
#include <gsl/gsl_eigen.h>
|
||||
|
||||
#include <vec3.hh>
|
||||
|
||||
template<typename T>
|
||||
class mat3{
|
||||
protected:
|
||||
std::array<T,9> data_;
|
||||
gsl_matrix_view m_;
|
||||
gsl_vector *eval_;
|
||||
gsl_matrix *evec_;
|
||||
gsl_eigen_symmv_workspace * wsp_;
|
||||
bool bdid_alloc_gsl_;
|
||||
|
||||
void init_gsl(){
|
||||
// allocate memory for GSL operations if we haven't done so yet
|
||||
if( !bdid_alloc_gsl_ )
|
||||
{
|
||||
m_ = gsl_matrix_view_array (&data_[0], 3, 3);
|
||||
eval_ = gsl_vector_alloc (3);
|
||||
evec_ = gsl_matrix_alloc (3, 3);
|
||||
wsp_ = gsl_eigen_symmv_alloc (3);
|
||||
bdid_alloc_gsl_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void free_gsl(){
|
||||
// free memory for GSL operations if it was allocated
|
||||
if( bdid_alloc_gsl_ )
|
||||
{
|
||||
gsl_eigen_symmv_free (wsp_);
|
||||
gsl_vector_free (eval_);
|
||||
gsl_matrix_free (evec_);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
mat3()
|
||||
: bdid_alloc_gsl_(false)
|
||||
{}
|
||||
|
||||
//! copy constructor
|
||||
mat3( const mat3<T> &m)
|
||||
: data_(m.data_), bdid_alloc_gsl_(false)
|
||||
{}
|
||||
|
||||
//! move constructor
|
||||
mat3( mat3<T> &&m)
|
||||
: data_(std::move(m.data_)), bdid_alloc_gsl_(false)
|
||||
{}
|
||||
|
||||
//! construct mat3 from initializer list
|
||||
template<typename ...E>
|
||||
mat3(E&&...e)
|
||||
: data_{{std::forward<E>(e)...}}, bdid_alloc_gsl_(false)
|
||||
{}
|
||||
|
||||
mat3<T>& operator=(const mat3<T>& m) noexcept{
|
||||
data_ = m.data_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
mat3<T>& operator=(const mat3<T>&& m) noexcept{
|
||||
data_ = std::move(m.data_);
|
||||
return *this;
|
||||
}
|
||||
|
||||
//! destructor
|
||||
~mat3(){
|
||||
this->free_gsl();
|
||||
}
|
||||
|
||||
//! bracket index access to vector components
|
||||
T &operator[](size_t i) noexcept { return data_[i];}
|
||||
|
||||
//! const bracket index access to vector components
|
||||
const T &operator[](size_t i) const noexcept { return data_[i]; }
|
||||
|
||||
//! matrix 2d index access
|
||||
T &operator()(size_t i, size_t j) noexcept { return data_[3*i+j]; }
|
||||
|
||||
//! const matrix 2d index access
|
||||
const T &operator()(size_t i, size_t j) const noexcept { return data_[3*i+j]; }
|
||||
|
||||
//! in-place addition
|
||||
mat3<T>& operator+=( const mat3<T>& rhs ) noexcept{
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
(*this)[i] += rhs[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
//! in-place subtraction
|
||||
mat3<T>& operator-=( const mat3<T>& rhs ) noexcept{
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
(*this)[i] -= rhs[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void zero() noexcept{
|
||||
for (size_t i = 0; i < 9; ++i) data_[i]=0;
|
||||
}
|
||||
|
||||
void eigen( vec3<T>& evals, vec3<T>& evec1, vec3<T>& evec2, vec3<T>& evec3 )
|
||||
{
|
||||
// for( auto x : data_ ){
|
||||
// std::cerr << x << " " ;
|
||||
// }
|
||||
// std::cerr << std::endl;
|
||||
// resort into symmetrix matrix
|
||||
// data_[8] = data_[5];
|
||||
// data_[7] = data_[4];
|
||||
// data_[6] = data_[2];
|
||||
// data_[5] = data_[4];
|
||||
// data_[4] = data_[3];
|
||||
// data_[3] = data_[1];
|
||||
|
||||
this->init_gsl();
|
||||
|
||||
gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_);
|
||||
gsl_eigen_symmv_sort (eval_, evec_, GSL_EIGEN_SORT_VAL_ASC);
|
||||
|
||||
for( int i=0; i<3; ++i ){
|
||||
evals[i] = gsl_vector_get( eval_, i );
|
||||
evec1[i] = gsl_matrix_get( evec_, i, 0 );
|
||||
evec2[i] = gsl_matrix_get( evec_, i, 1 );
|
||||
evec3[i] = gsl_matrix_get( evec_, i, 2 );
|
||||
}
|
||||
|
||||
// std::cerr << "(" << evals[0] << " " << evals[1] << " " << evals[2] << ")" << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
constexpr const mat3<T> operator+(const mat3<T> &lhs, const mat3<T> &rhs) noexcept
|
||||
{
|
||||
mat3<T> result;
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
result[i] = lhs[i] + rhs[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// matrix - vector multiplication
|
||||
template<typename T>
|
||||
vec3<T> operator*( const mat3<T> &A, const vec3<T> &v ) noexcept
|
||||
{
|
||||
vec3<T> result;
|
||||
for( int mu=0; mu<3; ++mu ){
|
||||
result[mu] = 0.0;
|
||||
for( int nu=0; nu<3; ++nu ){
|
||||
result[mu] += A(mu,nu)*v[nu];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// template<typename T>
|
||||
// vec3<T> operator*( const vec3<T> &v, const mat3<T> &A ) noexcept
|
||||
// {
|
||||
// vec3<T> result = 0.0;
|
||||
// for( int mu=0; mu<3; ++mu ){
|
||||
// for( int nu=0; nu<3; ++nu ){
|
||||
// result[nu] += v[mu]*A(mu,nu);
|
||||
// }
|
||||
// }
|
||||
// return result;
|
||||
// }
|
|
@ -1,7 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include <general.hh>
|
||||
|
||||
namespace op{
|
||||
|
||||
//!== long list of primitive operators to work on fields ==!//
|
||||
|
||||
template< typename field>
|
||||
inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };}
|
||||
|
||||
|
@ -20,4 +24,29 @@ inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };
|
|||
template< typename field>
|
||||
inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -= 2*v; };}
|
||||
|
||||
//! vanilla standard gradient
|
||||
class fourier_gradient{
|
||||
private:
|
||||
real_t boxlen_, k0_;
|
||||
size_t n_, nhalf_;
|
||||
public:
|
||||
explicit fourier_gradient( const ConfigFile& the_config )
|
||||
: boxlen_( the_config.GetValue<double>("setup", "BoxLength") ),
|
||||
k0_(2.0*M_PI/boxlen_),
|
||||
n_( the_config.GetValue<size_t>("setup","GridRes") ),
|
||||
nhalf_( n_/2 )
|
||||
{}
|
||||
|
||||
inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
|
||||
{
|
||||
real_t rgrad =
|
||||
(ijk[idim]!=nhalf_)? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_) * n_) : 0.0;
|
||||
return ccomplex_t(0.0,rgrad * k0_);
|
||||
}
|
||||
|
||||
inline real_t vfac_corr( std::array<size_t,3> ijk ) const
|
||||
{
|
||||
return 1.0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
791
include/particle_plt.hh
Normal file
791
include/particle_plt.hh
Normal file
|
@ -0,0 +1,791 @@
|
|||
#pragma once
|
||||
|
||||
#include <general.hh>
|
||||
#include <unistd.h> // for unlink
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include <random>
|
||||
#include <map>
|
||||
|
||||
#include <particle_generator.hh>
|
||||
#include <grid_fft.hh>
|
||||
#include <mat3.hh>
|
||||
|
||||
#define PRODUCTION
|
||||
|
||||
namespace particle{
|
||||
//! implement Joyce, Marcos et al. PLT calculation
|
||||
|
||||
class lattice_gradient{
|
||||
private:
|
||||
const real_t boxlen_;
|
||||
const size_t ngmapto_, ngrid_, ngrid32_;
|
||||
const real_t mapratio_;
|
||||
Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
|
||||
Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
|
||||
std::vector<vec3<real_t>> vectk_;
|
||||
std::vector<vec3<int>> ico_, vecitk_;
|
||||
|
||||
bool is_even( int i ){ return (i%2)==0; }
|
||||
|
||||
bool is_in( int i, int j, int k, const mat3<int>& M ){
|
||||
vec3<int> v({i,j,k});
|
||||
auto vv = M * v;
|
||||
return is_even(vv.x)&&is_even(vv.y)&&is_even(vv.z);
|
||||
}
|
||||
|
||||
void init_D( lattice lattice_type )
|
||||
{
|
||||
constexpr real_t pi = M_PI;
|
||||
constexpr real_t twopi = 2.0*M_PI;
|
||||
constexpr real_t fourpi = 4.0*M_PI;
|
||||
const real_t sqrtpi = std::sqrt(M_PI);
|
||||
const real_t pi32 = std::pow(M_PI,1.5);
|
||||
|
||||
//! === vectors, reciprocals and normals for the SC lattice ===
|
||||
const int charge_fac_sc = 1;
|
||||
const mat3<real_t> mat_bravais_sc{
|
||||
1.0, 0.0, 0.0,
|
||||
0.0, 1.0, 0.0,
|
||||
0.0, 0.0, 1.0,
|
||||
};
|
||||
const mat3<real_t> mat_reciprocal_sc{
|
||||
twopi, 0.0, 0.0,
|
||||
0.0, twopi, 0.0,
|
||||
0.0, 0.0, twopi,
|
||||
};
|
||||
const mat3<int> mat_invrecip_sc{
|
||||
2, 0, 0,
|
||||
0, 2, 0,
|
||||
0, 0, 2,
|
||||
};
|
||||
const std::vector<vec3<real_t>> normals_sc{
|
||||
{pi,0.,0.},{-pi,0.,0.},
|
||||
{0.,pi,0.},{0.,-pi,0.},
|
||||
{0.,0.,pi},{0.,0.,-pi},
|
||||
};
|
||||
|
||||
|
||||
//! === vectors, reciprocals and normals for the BCC lattice ===
|
||||
const int charge_fac_bcc = 2;
|
||||
const mat3<real_t> mat_bravais_bcc{
|
||||
1.0, 0.0, 0.5,
|
||||
0.0, 1.0, 0.5,
|
||||
0.0, 0.0, 0.5,
|
||||
};
|
||||
const mat3<real_t> mat_reciprocal_bcc{
|
||||
twopi, 0.0, 0.0,
|
||||
0.0, twopi, 0.0,
|
||||
-twopi, -twopi, fourpi,
|
||||
};
|
||||
const mat3<int> mat_invrecip_bcc{
|
||||
2, 0, 0,
|
||||
0, 2, 0,
|
||||
1, 1, 1,
|
||||
};
|
||||
const std::vector<vec3<real_t>> normals_bcc{
|
||||
{0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
|
||||
{pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
|
||||
{pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
|
||||
};
|
||||
|
||||
|
||||
//! === vectors, reciprocals and normals for the FCC lattice ===
|
||||
const int charge_fac_fcc = 4;
|
||||
const mat3<real_t> mat_bravais_fcc{
|
||||
0.0, 0.5, 0.0,
|
||||
0.5, 0.0, 1.0,
|
||||
0.5, 0.5, 0.0,
|
||||
};
|
||||
const mat3<real_t> mat_reciprocal_fcc{
|
||||
-fourpi, fourpi, twopi,
|
||||
0.0, 0.0, twopi,
|
||||
fourpi, 0.0, -twopi,
|
||||
};
|
||||
const mat3<int> mat_invrecip_fcc{
|
||||
0, 1, 1,
|
||||
1, 0, 1,
|
||||
0, 2, 0,
|
||||
};
|
||||
const std::vector<vec3<real_t>> normals_fcc{
|
||||
{twopi,0.,0.},{-twopi,0.,0.},
|
||||
{0.,twopi,0.},{0.,-twopi,0.},
|
||||
{0.,0.,twopi},{0.,0.,-twopi},
|
||||
{+pi,+pi,+pi},{+pi,+pi,-pi},
|
||||
{+pi,-pi,+pi},{+pi,-pi,-pi},
|
||||
{-pi,+pi,+pi},{-pi,+pi,-pi},
|
||||
{-pi,-pi,+pi},{-pi,-pi,-pi},
|
||||
};
|
||||
|
||||
//! select the properties for the chosen lattice
|
||||
const int ilat = lattice_type; // 0 = sc, 1 = bcc, 2 = fcc
|
||||
|
||||
const auto mat_bravais = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
|
||||
const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
|
||||
const auto mat_invrecip = (ilat==2)? mat_invrecip_fcc : (ilat==1)? mat_invrecip_bcc : mat_invrecip_sc;
|
||||
const auto normals = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
|
||||
const auto charge_fac = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
|
||||
|
||||
const ptrdiff_t nlattice = ngrid_;
|
||||
const real_t dx = 1.0/real_t(nlattice);
|
||||
|
||||
const real_t eta = 4.0; // Ewald cutoff shall be 4 cells
|
||||
const real_t alpha = 1.0/std::sqrt(2)/eta;
|
||||
const real_t alpha2 = alpha*alpha;
|
||||
const real_t alpha3 = alpha2*alpha;
|
||||
|
||||
const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_fac;
|
||||
const real_t fft_norm = 1.0/std::pow(real_t(nlattice),3.0);
|
||||
const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5);
|
||||
|
||||
//! just a Kronecker \delta_ij
|
||||
auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
|
||||
|
||||
//! Ewald summation: short-range Green's function
|
||||
auto add_greensftide_sr = [&]( mat3<real_t>& D, const vec3<real_t>& d ) -> void {
|
||||
auto r = d.norm();
|
||||
if( r< 1e-14 ) return; // return zero for r=0
|
||||
|
||||
const real_t r2(r*r), r3(r2*r), r5(r3*r2);
|
||||
const real_t K1( -alpha3/pi32 * std::exp(-alpha2*r2)/r2 );
|
||||
const real_t K2( (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r2)*r)/fourpi );
|
||||
|
||||
for( int mu=0; mu<3; ++mu ){
|
||||
for( int nu=mu; nu<3; ++nu ){
|
||||
real_t dd( d[mu]*d[nu] * K1 + (kronecker(mu,nu)/r3 - 3.0 * (d[mu]*d[nu])/r5) * K2 );
|
||||
D(mu,nu) += dd;
|
||||
D(nu,mu) += (mu!=nu)? dd : 0.0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//! Ewald summation: long-range Green's function
|
||||
auto add_greensftide_lr = [&]( mat3<real_t>& D, const vec3<real_t>& k, const vec3<real_t>& r ) -> void {
|
||||
real_t kmod2 = k.norm_squared();
|
||||
real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm;
|
||||
for( int mu=0; mu<3; ++mu ){
|
||||
for( int nu=mu; nu<3; ++nu ){
|
||||
auto dd = k[mu] * k[nu] * term;
|
||||
D(mu,nu) += dd;
|
||||
D(nu,mu) += (mu!=nu)? dd : 0.0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//! checks if 'vec' is in the FBZ with FBZ normal vectors given in 'normals'
|
||||
auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool {
|
||||
for( const auto& n : normals ){
|
||||
if( n.dot( vec ) > 1.0001 * n.dot(n) ){
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
constexpr ptrdiff_t lnumber = 3, knumber = 3;
|
||||
const int numb = 1; //!< search radius when shifting vectors into FBZ
|
||||
|
||||
vectk_.assign(D_xx_.memsize(),vec3<real_t>());
|
||||
ico_.assign(D_xx_.memsize(),vec3<int>());
|
||||
vecitk_.assign(D_xx_.memsize(),vec3<int>());
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
//... temporary to hold values of the dynamical matrix
|
||||
mat3<real_t> matD(0.0);
|
||||
|
||||
#pragma omp for
|
||||
for( ptrdiff_t i=0; i<nlattice; ++i ){
|
||||
for( ptrdiff_t j=0; j<nlattice; ++j ){
|
||||
for( ptrdiff_t k=0; k<nlattice; ++k ){
|
||||
// compute lattice site vector from (i,j,k) multiplying Bravais base matrix, and wrap back to box
|
||||
const vec3<real_t> x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)});
|
||||
const vec3<real_t> ar = (mat_bravais * x_ijk).wrap_abs();
|
||||
|
||||
//... zero temporary matrix
|
||||
matD.zero();
|
||||
|
||||
// add real-space part of dynamical matrix, periodic copies
|
||||
for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
|
||||
for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
|
||||
for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){
|
||||
const vec3<real_t> n_ijk({real_t(ix),real_t(iy),real_t(iz)});
|
||||
const vec3<real_t> dr(ar - mat_bravais * n_ijk);
|
||||
add_greensftide_sr(matD, dr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// add k-space part of dynamical matrix
|
||||
for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){
|
||||
for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){
|
||||
for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){
|
||||
if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
|
||||
const vec3<real_t> k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice});
|
||||
const vec3<real_t> ak( mat_reciprocal * k_ijk);
|
||||
|
||||
add_greensftide_lr(matD, ak, ar );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
D_xx_.relem(i,j,k) = matD(0,0) * charge;
|
||||
D_xy_.relem(i,j,k) = matD(0,1) * charge;
|
||||
D_xz_.relem(i,j,k) = matD(0,2) * charge;
|
||||
D_yy_.relem(i,j,k) = matD(1,1) * charge;
|
||||
D_yz_.relem(i,j,k) = matD(1,2) * charge;
|
||||
D_zz_.relem(i,j,k) = matD(2,2) * charge;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end omp parallel region
|
||||
|
||||
// fix r=0 with background density (added later in Fourier space)
|
||||
D_xx_.relem(0,0,0) = 1.0/3.0;
|
||||
D_xy_.relem(0,0,0) = 0.0;
|
||||
D_xz_.relem(0,0,0) = 0.0;
|
||||
D_yy_.relem(0,0,0) = 1.0/3.0;
|
||||
D_yz_.relem(0,0,0) = 0.0;
|
||||
D_zz_.relem(0,0,0) = 1.0/3.0;
|
||||
|
||||
D_xx_.FourierTransformForward();
|
||||
D_xy_.FourierTransformForward();
|
||||
D_xz_.FourierTransformForward();
|
||||
D_yy_.FourierTransformForward();
|
||||
D_yz_.FourierTransformForward();
|
||||
D_zz_.FourierTransformForward();
|
||||
|
||||
#ifndef PRODUCTION
|
||||
if (CONFIG::MPI_task_rank == 0)
|
||||
unlink("debug.hdf5");
|
||||
D_xx_.Write_to_HDF5("debug.hdf5","Dxx");
|
||||
D_xy_.Write_to_HDF5("debug.hdf5","Dxy");
|
||||
D_xz_.Write_to_HDF5("debug.hdf5","Dxz");
|
||||
D_yy_.Write_to_HDF5("debug.hdf5","Dyy");
|
||||
D_yz_.Write_to_HDF5("debug.hdf5","Dyz");
|
||||
D_zz_.Write_to_HDF5("debug.hdf5","Dzz");
|
||||
|
||||
std::ofstream ofs2("test_brillouin.txt");
|
||||
#endif
|
||||
using map_t = std::map<vec3<int>,size_t>;
|
||||
map_t iimap;
|
||||
|
||||
//!=== Make temporary copies before resorting to std. Fourier grid ========!//
|
||||
Grid_FFT<real_t,false>
|
||||
temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
|
||||
temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
|
||||
temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0});
|
||||
|
||||
temp1.FourierTransformForward(false);
|
||||
temp2.FourierTransformForward(false);
|
||||
temp3.FourierTransformForward(false);
|
||||
|
||||
#pragma omp parallel for
|
||||
for( size_t i=0; i<D_xx_.size(0); i++ )
|
||||
{
|
||||
for( size_t j=0; j<D_xx_.size(1); j++ )
|
||||
{
|
||||
for( size_t k=0; k<D_xx_.size(2); k++ )
|
||||
{
|
||||
temp1.kelem(i,j,k) = ccomplex_t(std::real(D_xx_.kelem(i,j,k)),std::real(D_xy_.kelem(i,j,k)));
|
||||
temp2.kelem(i,j,k) = ccomplex_t(std::real(D_xz_.kelem(i,j,k)),std::real(D_yy_.kelem(i,j,k)));
|
||||
temp3.kelem(i,j,k) = ccomplex_t(std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k)));
|
||||
}
|
||||
}
|
||||
}
|
||||
D_xx_.zero(); D_xy_.zero(); D_xz_.zero();
|
||||
D_yy_.zero(); D_yz_.zero(); D_zz_.zero();
|
||||
|
||||
|
||||
//!=== Diagonalise and resort to std. Fourier grid ========!//
|
||||
#pragma omp parallel
|
||||
{
|
||||
// thread private matrix representation
|
||||
mat3<real_t> D;
|
||||
vec3<real_t> eval, evec1, evec2, evec3;
|
||||
|
||||
#pragma omp for
|
||||
for( size_t i=0; i<D_xx_.size(0); i++ )
|
||||
{
|
||||
for( size_t j=0; j<D_xx_.size(1); j++ )
|
||||
{
|
||||
for( size_t k=0; k<D_xx_.size(2); k++ )
|
||||
{
|
||||
vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
|
||||
|
||||
// put matrix elements into actual matrix
|
||||
D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12;
|
||||
D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12;
|
||||
D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12;
|
||||
D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12;
|
||||
D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12;
|
||||
D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12;
|
||||
|
||||
// compute eigenstructure of matrix
|
||||
D.eigen(eval, evec1, evec2, evec3);
|
||||
evec3 /= (twopi*ngrid_);
|
||||
|
||||
// now determine to which modes on the regular lattice this contributes
|
||||
vec3<real_t> ar = kv / (twopi*ngrid_);
|
||||
vec3<real_t> a(mat_reciprocal * ar);
|
||||
|
||||
// translate the k-vectors into the "candidate" FBZ
|
||||
for( int l1=-numb; l1<=numb; ++l1 ){
|
||||
for( int l2=-numb; l2<=numb; ++l2 ){
|
||||
for( int l3=-numb; l3<=numb; ++l3 ){
|
||||
// need both halfs of Fourier space since we use real transforms
|
||||
for( int isign=0; isign<=1; ++isign ){
|
||||
const real_t sign = 2.0*real_t(isign)-1.0;
|
||||
const vec3<real_t> vshift({real_t(l1),real_t(l2),real_t(l3)});
|
||||
|
||||
vec3<real_t> vectk = sign * a + mat_reciprocal * vshift;
|
||||
|
||||
if( check_FBZ( normals, vectk ) )
|
||||
{
|
||||
int ix = std::round(vectk.x*(ngrid_)/twopi);
|
||||
int iy = std::round(vectk.y*(ngrid_)/twopi);
|
||||
int iz = std::round(vectk.z*(ngrid_)/twopi);
|
||||
|
||||
#pragma omp critical
|
||||
{iimap.insert( std::pair<vec3<int>,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );}
|
||||
|
||||
temp1.kelem(i,j,k) = ccomplex_t(eval[2],eval[1]);
|
||||
temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3.x);
|
||||
temp3.kelem(i,j,k) = ccomplex_t(evec3.y,evec3.z);
|
||||
}
|
||||
}//sign
|
||||
} //l3
|
||||
} //l2
|
||||
} //l1
|
||||
} //k
|
||||
} //j
|
||||
} //i
|
||||
}
|
||||
|
||||
D_xx_.kelem(0,0,0) = 1.0;
|
||||
D_xy_.kelem(0,0,0) = 0.0;
|
||||
D_xz_.kelem(0,0,0) = 0.0;
|
||||
|
||||
D_yy_.kelem(0,0,0) = 1.0;
|
||||
D_yz_.kelem(0,0,0) = 0.0;
|
||||
D_zz_.kelem(0,0,0) = 0.0;
|
||||
|
||||
//... approximate infinite lattice by inerpolating to sites not convered by current resolution...
|
||||
#pragma omp parallel for
|
||||
for( size_t i=0; i<D_xx_.size(0); i++ ){
|
||||
for( size_t j=0; j<D_xx_.size(1); j++ ){
|
||||
for( size_t k=0; k<D_xx_.size(2); k++ ){
|
||||
int ii = (int(i)>nlattice/2)? int(i)-nlattice : int(i);
|
||||
int jj = (int(j)>nlattice/2)? int(j)-nlattice : int(j);
|
||||
int kk = (int(k)>nlattice/2)? int(k)-nlattice : int(k);
|
||||
vec3<real_t> kv({real_t(ii),real_t(jj),real_t(kk)});
|
||||
|
||||
auto align_with_k = [&]( const vec3<real_t>& v ) -> vec3<real_t>{
|
||||
return v*((v.dot(kv)<0.0)?-1.0:1.0);
|
||||
};
|
||||
|
||||
vec3<real_t> v, l;
|
||||
map_t::iterator it;
|
||||
|
||||
if( !is_in(i,j,k,mat_invrecip) ){
|
||||
auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3<real_t>& v, vec3<real_t>& l ) {
|
||||
v = 0.0; l = 0.0;
|
||||
int count(0);
|
||||
|
||||
auto add_lv = [&]( auto it ) -> void {
|
||||
auto q = it->second;++count;
|
||||
l += vec3<real_t>({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))});
|
||||
v += align_with_k(vec3<real_t>({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))}));
|
||||
};
|
||||
map_t::iterator it;
|
||||
if( (it = iimap.find({ii-1,jj,kk}))!=iimap.end() ){ add_lv(it); }
|
||||
if( (it = iimap.find({ii+1,jj,kk}))!=iimap.end() ){ add_lv(it); }
|
||||
if( (it = iimap.find({ii,jj-1,kk}))!=iimap.end() ){ add_lv(it); }
|
||||
if( (it = iimap.find({ii,jj+1,kk}))!=iimap.end() ){ add_lv(it); }
|
||||
if( (it = iimap.find({ii,jj,kk-1}))!=iimap.end() ){ add_lv(it); }
|
||||
if( (it = iimap.find({ii,jj,kk+1}))!=iimap.end() ){ add_lv(it); }
|
||||
l/=real_t(count); v/=real_t(count);
|
||||
};
|
||||
|
||||
average_lv(temp1,temp2,temp3,v,l);
|
||||
|
||||
}else{
|
||||
if( (it = iimap.find({ii,jj,kk}))!=iimap.end() ){
|
||||
auto q = it->second;
|
||||
l = vec3<real_t>({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))});
|
||||
v = align_with_k(vec3<real_t>({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))}));
|
||||
}
|
||||
}
|
||||
D_xx_.kelem(i,j,k) = l[0];
|
||||
D_xy_.kelem(i,j,k) = l[1];
|
||||
D_xz_.kelem(i,j,k) = l[2];
|
||||
D_yy_.kelem(i,j,k) = v[0];
|
||||
D_yz_.kelem(i,j,k) = v[1];
|
||||
D_zz_.kelem(i,j,k) = v[2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef PRODUCTION
|
||||
#pragma omp parallel for
|
||||
for( size_t i=0; i<D_xx_.size(0); i++ ){
|
||||
for( size_t j=0; j<D_xx_.size(1); j++ ){
|
||||
for( size_t k=0; k<D_xx_.size(2); k++ )
|
||||
{
|
||||
int ii = (i>size_t(nlattice/2))? int(i)-nlattice : i;
|
||||
int jj = (j>size_t(nlattice/2))? int(j)-nlattice : j;
|
||||
|
||||
vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
|
||||
const real_t kmod = kv.norm()/mapratio_/boxlen_;
|
||||
|
||||
double mu1 = std::real(D_xx_.kelem(i,j,k));
|
||||
// double mu2 = std::real(D_xy_.kelem(i,j,k));
|
||||
// double mu3 = std::real(D_xz_.kelem(i,j,k));
|
||||
|
||||
vec3<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))});
|
||||
evec1 /= evec1.norm();
|
||||
|
||||
if(std::abs(ii)+std::abs(jj)+k<8){
|
||||
// small k modes, use usual pseudospectral derivative
|
||||
// -- store in diagonal components of D_ij
|
||||
D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_);
|
||||
D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_);
|
||||
D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_);
|
||||
|
||||
// spatially dependent correction to vfact = \dot{D_+}/D_+
|
||||
D_xy_.kelem(i,j,k) = 1.0;
|
||||
}else{
|
||||
// large k modes, use interpolated PLT results
|
||||
// -- store in diagonal components of D_ij
|
||||
D_xx_.kelem(i,j,k) = ccomplex_t(0.0,evec1.x * kmod);
|
||||
D_yy_.kelem(i,j,k) = ccomplex_t(0.0,evec1.y * kmod);
|
||||
D_zz_.kelem(i,j,k) = ccomplex_t(0.0,evec1.z * kmod);
|
||||
|
||||
// re-normalise to that longitudinal amplitude is exact
|
||||
auto norm = (kv.norm()/kv.dot(evec1));
|
||||
D_xx_.kelem(i,j,k) *= norm;
|
||||
D_yy_.kelem(i,j,k) *= norm;
|
||||
D_zz_.kelem(i,j,k) *= norm;
|
||||
|
||||
// spatially dependent correction to vfact = \dot{D_+}/D_+
|
||||
D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
|
||||
}
|
||||
if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0;
|
||||
if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0;
|
||||
if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
D_xy_.kelem(0,0,0) = 1.0;
|
||||
D_xx_.kelem(0,0,0) = 0.0;
|
||||
D_yy_.kelem(0,0,0) = 0.0;
|
||||
D_zz_.kelem(0,0,0) = 0.0;
|
||||
|
||||
// unlink("debug.hdf5");
|
||||
// D_xy_.Write_to_HDF5("debug.hdf5","mu1");
|
||||
// D_xx_.Write_to_HDF5("debug.hdf5","e1x");
|
||||
// D_yy_.Write_to_HDF5("debug.hdf5","e1y");
|
||||
// D_zz_.Write_to_HDF5("debug.hdf5","e1z");
|
||||
|
||||
#else
|
||||
D_xx_.Write_to_HDF5("debug.hdf5","mu1");
|
||||
D_xy_.Write_to_HDF5("debug.hdf5","mu2");
|
||||
D_xz_.Write_to_HDF5("debug.hdf5","mu3");
|
||||
D_yy_.Write_to_HDF5("debug.hdf5","e1x");
|
||||
D_yz_.Write_to_HDF5("debug.hdf5","e1y");
|
||||
D_zz_.Write_to_HDF5("debug.hdf5","e1z");
|
||||
#endif
|
||||
}
|
||||
|
||||
void init_D__old()
|
||||
{
|
||||
constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
|
||||
|
||||
const std::vector<vec3<real_t>> normals_bcc{
|
||||
{0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
|
||||
{pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
|
||||
{pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
|
||||
};
|
||||
|
||||
const std::vector<vec3<real_t>> bcc_reciprocal{
|
||||
{twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
|
||||
};
|
||||
|
||||
const real_t eta = 2.0/ngrid_; // Ewald cutoff shall be 2 cells
|
||||
const real_t alpha = 1.0/std::sqrt(2)/eta;
|
||||
const real_t alpha2 = alpha*alpha;
|
||||
const real_t alpha3 = alpha2*alpha;
|
||||
const real_t sqrtpi = std::sqrt(M_PI);
|
||||
const real_t pi32 = std::pow(M_PI,1.5);
|
||||
|
||||
//! just a Kronecker \delta_ij
|
||||
auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
|
||||
|
||||
//! short range component of Ewald sum, eq. (A2) of Marcos (2008)
|
||||
auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
|
||||
auto d = vR-vP;
|
||||
auto r = d.norm();
|
||||
if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
|
||||
real_t val{0.0};
|
||||
val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r);
|
||||
val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) *
|
||||
(std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
|
||||
return val;
|
||||
};
|
||||
|
||||
//! sums mirrored copies of short-range component of Ewald sum
|
||||
auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
|
||||
real_t sr = 0.0;
|
||||
constexpr int N = 3; // number of repeated copies ±N per dimension
|
||||
int count = 0;
|
||||
for( int i=-N; i<=N; ++i ){
|
||||
for( int j=-N; j<=N; ++j ){
|
||||
for( int k=-N; k<=N; ++k ){
|
||||
if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
|
||||
//sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
|
||||
sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
|
||||
sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} );
|
||||
count += 2;
|
||||
|
||||
// sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
|
||||
// sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
|
||||
// sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
|
||||
// sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
|
||||
// sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
|
||||
// sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
|
||||
// sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
|
||||
// sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return sr / count;
|
||||
};
|
||||
|
||||
//! fill D_ij array with short range evaluated function
|
||||
#pragma omp parallel for
|
||||
for( size_t i=0; i<ngrid_; i++ ){
|
||||
vec3<real_t> p;
|
||||
p.x = real_t(i)/ngrid_;
|
||||
for( size_t j=0; j<ngrid_; j++ ){
|
||||
p.y = real_t(j)/ngrid_;
|
||||
for( size_t k=0; k<ngrid_; k++ ){
|
||||
p.z = real_t(k)/ngrid_;
|
||||
D_xx_.relem(i,j,k) = evaluate_D(0,0,p);
|
||||
D_xy_.relem(i,j,k) = evaluate_D(0,1,p);
|
||||
D_xz_.relem(i,j,k) = evaluate_D(0,2,p);
|
||||
D_yy_.relem(i,j,k) = evaluate_D(1,1,p);
|
||||
D_yz_.relem(i,j,k) = evaluate_D(1,2,p);
|
||||
D_zz_.relem(i,j,k) = evaluate_D(2,2,p);
|
||||
}
|
||||
}
|
||||
}
|
||||
// fix r=0 with background density (added later in Fourier space)
|
||||
D_xx_.relem(0,0,0) = 0.0;
|
||||
D_xy_.relem(0,0,0) = 0.0;
|
||||
D_xz_.relem(0,0,0) = 0.0;
|
||||
D_yy_.relem(0,0,0) = 0.0;
|
||||
D_yz_.relem(0,0,0) = 0.0;
|
||||
D_zz_.relem(0,0,0) = 0.0;
|
||||
|
||||
|
||||
// Fourier transform all six components
|
||||
D_xx_.FourierTransformForward();
|
||||
D_xy_.FourierTransformForward();
|
||||
D_xz_.FourierTransformForward();
|
||||
D_yy_.FourierTransformForward();
|
||||
D_yz_.FourierTransformForward();
|
||||
D_zz_.FourierTransformForward();
|
||||
|
||||
const real_t rho0 = std::pow(real_t(ngrid_),1.5); //mass of one particle in Fourier space
|
||||
const real_t nfac = 1.0/std::pow(real_t(ngrid_),1.5);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
// thread private matrix representation
|
||||
mat3<real_t> D;
|
||||
vec3<real_t> eval, evec1, evec2, evec3;
|
||||
|
||||
#pragma omp for
|
||||
for( size_t i=0; i<D_xx_.size(0); i++ )
|
||||
{
|
||||
for( size_t j=0; j<D_xx_.size(1); j++ )
|
||||
{
|
||||
for( size_t k=0; k<D_xx_.size(2); k++ )
|
||||
{
|
||||
vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
|
||||
auto& b=bcc_reciprocal;
|
||||
vec3<real_t> kvc = { b[0][0]*kvc[0]+b[1][0]*kvc[1]+b[2][0]*kvc[2],
|
||||
b[0][1]*kvc[0]+b[1][1]*kvc[1]+b[2][1]*kvc[2],
|
||||
b[0][2]*kvc[0]+b[1][2]*kvc[1]+b[2][2]*kvc[2] };
|
||||
// vec3<real_t> kv = {kvc.dot(bcc_reciprocal[0]),kvc.dot(bcc_reciprocal[1]),kvc.dot(bcc_reciprocal[2])};
|
||||
const real_t kmod2 = kv.norm_squared();
|
||||
|
||||
// long range component of Ewald sum
|
||||
//ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0]));
|
||||
ccomplex_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2;
|
||||
phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0
|
||||
|
||||
|
||||
// const int nn = 3;
|
||||
// size_t nsum = 0;
|
||||
// ccomplex_t ff = 0.0;
|
||||
// for( int is=-nn;is<=nn;is++){
|
||||
// for( int js=-nn;js<=nn;js++){
|
||||
// for( int ks=-nn;ks<=nn;ks++){
|
||||
// if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){
|
||||
// ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2]))));
|
||||
// ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2]))));
|
||||
// ++nsum;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// ff /= nsum;
|
||||
// ccomplex_t ff = 1.0;
|
||||
ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2]))));
|
||||
// assemble short-range + long_range of Ewald sum and add DC component to trace
|
||||
D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0;
|
||||
D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac);
|
||||
D_xz_.kelem(i,j,k) = ff*((D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac);
|
||||
D_yy_.kelem(i,j,k) = ff*((D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac) + 1.0/3.0;
|
||||
D_yz_.kelem(i,j,k) = ff*((D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac);
|
||||
D_zz_.kelem(i,j,k) = ff*((D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac) + 1.0/3.0;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
D_xx_.kelem(0,0,0) = 1.0/3.0;
|
||||
D_xy_.kelem(0,0,0) = 0.0;
|
||||
D_xz_.kelem(0,0,0) = 0.0;
|
||||
D_yy_.kelem(0,0,0) = 1.0/3.0;
|
||||
D_yz_.kelem(0,0,0) = 0.0;
|
||||
D_zz_.kelem(0,0,0) = 1.0/3.0;
|
||||
|
||||
#pragma omp for
|
||||
for( size_t i=0; i<D_xx_.size(0); i++ )
|
||||
{
|
||||
for( size_t j=0; j<D_xx_.size(1); j++ )
|
||||
{
|
||||
for( size_t k=0; k<D_xx_.size(2); k++ )
|
||||
{
|
||||
// put matrix elements into actual matrix
|
||||
D = { std::real(D_xx_.kelem(i,j,k)), std::real(D_xy_.kelem(i,j,k)), std::real(D_xz_.kelem(i,j,k)),
|
||||
std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) };
|
||||
|
||||
// compute eigenstructure of matrix
|
||||
D.eigen(eval, evec1, evec2, evec3);
|
||||
|
||||
#ifdef PRODUCTION
|
||||
vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
|
||||
const real_t kmod = kv.norm()/mapratio_/boxlen_;
|
||||
|
||||
// store in diagonal components of D_ij
|
||||
D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.x;
|
||||
D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.y;
|
||||
D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.z;
|
||||
|
||||
auto norm = (kv.norm()/kv.dot(evec3));
|
||||
if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0;
|
||||
|
||||
D_xx_.kelem(i,j,k) *= norm;
|
||||
D_yy_.kelem(i,j,k) *= norm;
|
||||
D_zz_.kelem(i,j,k) *= norm;
|
||||
|
||||
// spatially dependent correction to vfact = \dot{D_+}/D_+
|
||||
D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*eval[2])-1.));
|
||||
#else
|
||||
|
||||
D_xx_.kelem(i,j,k) = eval[2];
|
||||
D_yy_.kelem(i,j,k) = eval[1];
|
||||
D_zz_.kelem(i,j,k) = eval[0];
|
||||
|
||||
D_xy_.kelem(i,j,k) = evec3[0];
|
||||
D_xz_.kelem(i,j,k) = evec3[1];
|
||||
D_yz_.kelem(i,j,k) = evec3[2];
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef PRODUCTION
|
||||
D_xy_.kelem(0,0,0) = 1.0;
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////
|
||||
std::string filename("plt_test.hdf5");
|
||||
unlink(filename.c_str());
|
||||
#if defined(USE_MPI)
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
#endif
|
||||
// rho.Write_to_HDF5(filename, "rho");
|
||||
D_xx_.Write_to_HDF5(filename, "omega1");
|
||||
D_yy_.Write_to_HDF5(filename, "omega2");
|
||||
D_zz_.Write_to_HDF5(filename, "omega3");
|
||||
D_xy_.Write_to_HDF5(filename, "e1_x");
|
||||
D_xz_.Write_to_HDF5(filename, "e1_y");
|
||||
D_yz_.Write_to_HDF5(filename, "e1_z");
|
||||
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
// real_t boxlen, size_t ngridother
|
||||
explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 )
|
||||
: boxlen_( the_config.GetValue<double>("setup", "BoxLength") ),
|
||||
ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ),
|
||||
ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
|
||||
D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
|
||||
D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
|
||||
D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
|
||||
grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
|
||||
grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
|
||||
{
|
||||
csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
|
||||
std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
|
||||
const lattice lattice_type =
|
||||
((lattice_str=="bcc")? lattice_bcc
|
||||
: ((lattice_str=="fcc")? lattice_fcc
|
||||
: ((lattice_str=="rsc")? lattice_rsc
|
||||
: lattice_sc)));
|
||||
|
||||
csoca::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
|
||||
|
||||
// #if defined(USE_MPI)
|
||||
// if( CONFIG::MPI_task_size>1 )
|
||||
// {
|
||||
// csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl;
|
||||
// abort();
|
||||
// }
|
||||
// #endif
|
||||
|
||||
double wtime = get_wtime();
|
||||
csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
|
||||
|
||||
init_D( lattice_type );
|
||||
// init_D__old();
|
||||
|
||||
csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
|
||||
}
|
||||
|
||||
inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
|
||||
{
|
||||
real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
|
||||
if( idim == 0 ) return D_xx_.get_cic_kspace({ix,iy,iz});
|
||||
else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz});
|
||||
return D_zz_.get_cic_kspace({ix,iy,iz});
|
||||
}
|
||||
|
||||
inline real_t vfac_corr( std::array<size_t,3> ijk ) const
|
||||
{
|
||||
real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
|
||||
return std::real(D_xy_.get_cic_kspace({ix,iy,iz}));
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
|
@ -25,7 +25,11 @@ public:
|
|||
//! copy constructor
|
||||
vec3( const vec3<T> &v)
|
||||
: data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
|
||||
|
||||
|
||||
//! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference
|
||||
vec3( vec3<T>& v)
|
||||
: data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
|
||||
|
||||
//! move constructor
|
||||
vec3( vec3<T> &&v)
|
||||
: data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){}
|
||||
|
@ -33,43 +37,78 @@ public:
|
|||
//! construct vec3 from initializer list
|
||||
template<typename ...E>
|
||||
vec3(E&&...e)
|
||||
: data_{{std::forward<E>(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){}
|
||||
: data_{{std::forward<E>(e)...}}, x{data_[0]}, y{data_[1]}, z{data_[2]}
|
||||
{}
|
||||
// vec3( T a, T b, T c )
|
||||
// : data_{{a,b,c}}, x(data_[0]), y(data_[1]), z(data_[2]){}
|
||||
|
||||
//! braket index access to vector components
|
||||
T &operator[](size_t i){ return data_[i];}
|
||||
//! bracket index access to vector components
|
||||
T &operator[](size_t i) noexcept{ return data_[i];}
|
||||
|
||||
//! const braket index access to vector components
|
||||
const T &operator[](size_t i) const { return data_[i]; }
|
||||
//! const bracket index access to vector components
|
||||
const T &operator[](size_t i) const noexcept { return data_[i]; }
|
||||
|
||||
// assignment operator
|
||||
vec3<T>& operator=( const vec3<T>& v ) noexcept { data_=v.data_; return *this; }
|
||||
|
||||
//! implementation of summation of vec3
|
||||
vec3<T> operator+( const vec3<T>& v ) const{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
|
||||
vec3<T> operator+( const vec3<T>& v ) const noexcept{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
|
||||
|
||||
//! implementation of difference of vec3
|
||||
vec3<T> operator-( const vec3<T>& v ) const{ return vec3<T>({x-v.x,y-v.y,z-v.z}); }
|
||||
vec3<T> operator-( const vec3<T>& v ) const noexcept{ return vec3<T>({x-v.x,y-v.y,z-v.z}); }
|
||||
|
||||
//! implementation of unary negative
|
||||
vec3<T> operator-() const noexcept{ return vec3<T>({-x,-y,-z}); }
|
||||
|
||||
//! implementation of scalar multiplication
|
||||
vec3<T> operator*( T s ) const{ return vec3<T>({x*s,y*s,z*s}); }
|
||||
vec3<T> operator*( T s ) const noexcept{ return vec3<T>({x*s,y*s,z*s}); }
|
||||
|
||||
//! implementation of scalar division
|
||||
vec3<T> operator/( T s ) const noexcept{ return vec3<T>({x/s,y/s,z/s}); }
|
||||
|
||||
//! implementation of += operator
|
||||
vec3<T>& operator+=( const vec3<T>& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; }
|
||||
vec3<T>& operator+=( const vec3<T>& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; }
|
||||
|
||||
//! implementation of -= operator
|
||||
vec3<T>& operator-=( const vec3<T>& v ) const{ x-=v.x; y-=v.y; z-=v.z; return *this; }
|
||||
vec3<T>& operator-=( const vec3<T>& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; }
|
||||
|
||||
//! multiply with scalar
|
||||
vec3<T>& operator*=( T s ) const{ x*=s; y*=s; z*=s; return *this; }
|
||||
vec3<T>& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; }
|
||||
|
||||
//! divide by scalar
|
||||
vec3<T>& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; }
|
||||
|
||||
//! compute dot product with another vector
|
||||
T dot(const vec3<T> &a) const
|
||||
T dot(const vec3<T> &a) const noexcept
|
||||
{
|
||||
return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
|
||||
}
|
||||
|
||||
//! returns 2-norm squared of vector
|
||||
T norm_squared(void) const { return this->dot(*this); }
|
||||
T norm_squared(void) const noexcept { return this->dot(*this); }
|
||||
|
||||
//! returns 2-norm of vector
|
||||
T norm(void) const { return std::sqrt( this->norm_squared() ); }
|
||||
T norm(void) const noexcept { return std::sqrt( this->norm_squared() ); }
|
||||
|
||||
//! wrap absolute vector to box of size p
|
||||
vec3<T>& wrap_abs( T p = 1.0 ) noexcept{
|
||||
for( auto& x : data_ ) x = std::fmod( 2*p + x, p );
|
||||
return *this;
|
||||
}
|
||||
|
||||
//! wrap relative vector to box of size p
|
||||
vec3<T>& wrap_rel( T p = 1.0 ) noexcept{
|
||||
for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x;
|
||||
return *this;
|
||||
}
|
||||
|
||||
//! ordering, allows 3d sorting of vec3s
|
||||
bool operator<( const vec3<T>& o ) const noexcept{
|
||||
if( x!=o.x ) return x<o.x?true:false;
|
||||
if( y!=o.y ) return y<o.y?true:false;
|
||||
if( z!=o.z ) return z<o.z?true:false;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
//! multiplication with scalar
|
||||
|
|
471
src/grid_fft.cc
471
src/grid_fft.cc
|
@ -2,192 +2,169 @@
|
|||
#include <grid_fft.hh>
|
||||
#include <thread>
|
||||
|
||||
#include <gsl/gsl_rng.h>
|
||||
#include <gsl/gsl_randist.h>
|
||||
|
||||
template <typename data_t>
|
||||
void Grid_FFT<data_t>::FillRandomReal(unsigned long int seed)
|
||||
template <typename data_t,bool bdistributed>
|
||||
void Grid_FFT<data_t,bdistributed>::Setup(void)
|
||||
{
|
||||
gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937);
|
||||
#if defined(USE_MPI)
|
||||
seed += 17321 * CONFIG::MPI_task_rank;
|
||||
#endif
|
||||
gsl_rng_set(RNG, seed);
|
||||
if( !bdistributed ){
|
||||
ntot_ = (n_[2] + 2) * n_[1] * n_[0];
|
||||
|
||||
for (size_t i = 0; i < sizes_[0]; ++i)
|
||||
{
|
||||
for (size_t j = 0; j < sizes_[1]; ++j)
|
||||
csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
for (size_t k = 0; k < sizes_[2]; ++k)
|
||||
{
|
||||
this->relem(i, j, k) = gsl_ran_ugaussian_ratio_method(RNG);
|
||||
}
|
||||
data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
|
||||
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
|
||||
|
||||
plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE);
|
||||
iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE);
|
||||
}
|
||||
else if (typeid(data_t) == typeid(ccomplex_t))
|
||||
{
|
||||
data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(ccomplex_t)));
|
||||
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
|
||||
|
||||
plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE);
|
||||
iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE);
|
||||
}
|
||||
else
|
||||
{
|
||||
csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
|
||||
}
|
||||
|
||||
fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
|
||||
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
npr_ = n_[2] + 2;
|
||||
npc_ = n_[2] / 2 + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
npr_ = n_[2];
|
||||
npc_ = n_[2];
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
nhalf_[i] = n_[i] / 2;
|
||||
kfac_[i] = 2.0 * M_PI / length_[i];
|
||||
dx_[i] = length_[i] / n_[i];
|
||||
|
||||
global_range_.x1_[i] = 0;
|
||||
global_range_.x2_[i] = n_[i];
|
||||
}
|
||||
|
||||
local_0_size_ = n_[0];
|
||||
local_1_size_ = n_[1];
|
||||
local_0_start_ = 0;
|
||||
local_1_start_ = 0;
|
||||
|
||||
if (space_ == rspace_id)
|
||||
{
|
||||
sizes_[0] = n_[0];
|
||||
sizes_[1] = n_[1];
|
||||
sizes_[2] = n_[2];
|
||||
sizes_[3] = npr_;
|
||||
}
|
||||
else
|
||||
{
|
||||
sizes_[0] = n_[1];
|
||||
sizes_[1] = n_[0];
|
||||
sizes_[2] = npc_;
|
||||
sizes_[3] = npc_;
|
||||
}
|
||||
}
|
||||
|
||||
gsl_rng_free(RNG);
|
||||
}
|
||||
|
||||
template <typename data_t>
|
||||
void Grid_FFT<data_t>::Setup(void)
|
||||
{
|
||||
#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
ntot_ = (n_[2] + 2) * n_[1] * n_[0];
|
||||
|
||||
csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
|
||||
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
|
||||
|
||||
plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE);
|
||||
iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE);
|
||||
}
|
||||
else if (typeid(data_t) == typeid(ccomplex_t))
|
||||
{
|
||||
data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(ccomplex_t)));
|
||||
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
|
||||
|
||||
plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE);
|
||||
iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE);
|
||||
}
|
||||
else
|
||||
{
|
||||
csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
|
||||
}
|
||||
#ifdef USE_MPI //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
|
||||
size_t cmplxsz;
|
||||
|
||||
fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
|
||||
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
|
||||
ntot_ = 2 * cmplxsz;
|
||||
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
|
||||
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
|
||||
plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
|
||||
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
|
||||
iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
|
||||
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
|
||||
}
|
||||
else if (typeid(data_t) == typeid(ccomplex_t))
|
||||
{
|
||||
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
|
||||
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
|
||||
ntot_ = cmplxsz;
|
||||
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
|
||||
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
|
||||
plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
|
||||
MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
|
||||
iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
|
||||
MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
|
||||
}
|
||||
else
|
||||
{
|
||||
csoca::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
npr_ = n_[2] + 2;
|
||||
npc_ = n_[2] / 2 + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
npr_ = n_[2];
|
||||
npc_ = n_[2];
|
||||
}
|
||||
csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
|
||||
fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
|
||||
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
nhalf_[i] = n_[i] / 2;
|
||||
kfac_[i] = 2.0 * M_PI / length_[i];
|
||||
dx_[i] = length_[i] / n_[i];
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
npr_ = n_[2] + 2;
|
||||
npc_ = n_[2] / 2 + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
npr_ = n_[2];
|
||||
npc_ = n_[2];
|
||||
}
|
||||
|
||||
global_range_.x1_[i] = 0;
|
||||
global_range_.x2_[i] = n_[i];
|
||||
}
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
nhalf_[i] = n_[i] / 2;
|
||||
kfac_[i] = 2.0 * M_PI / length_[i];
|
||||
dx_[i] = length_[i] / n_[i];
|
||||
|
||||
local_0_size_ = n_[0];
|
||||
local_1_size_ = n_[1];
|
||||
local_0_start_ = 0;
|
||||
local_1_start_ = 0;
|
||||
|
||||
if (space_ == rspace_id)
|
||||
{
|
||||
sizes_[0] = n_[0];
|
||||
sizes_[1] = n_[1];
|
||||
sizes_[2] = n_[2];
|
||||
sizes_[3] = npr_;
|
||||
}
|
||||
else
|
||||
{
|
||||
sizes_[0] = n_[1];
|
||||
sizes_[1] = n_[0];
|
||||
sizes_[2] = npc_;
|
||||
sizes_[3] = npc_;
|
||||
}
|
||||
|
||||
#else //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
size_t cmplxsz;
|
||||
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
|
||||
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
|
||||
ntot_ = 2 * cmplxsz;
|
||||
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
|
||||
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
|
||||
plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
|
||||
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
|
||||
iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
|
||||
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
|
||||
}
|
||||
else if (typeid(data_t) == typeid(ccomplex_t))
|
||||
{
|
||||
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
|
||||
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
|
||||
ntot_ = cmplxsz;
|
||||
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
|
||||
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
|
||||
plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
|
||||
MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
|
||||
iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
|
||||
MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
|
||||
}
|
||||
else
|
||||
{
|
||||
csoca::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
|
||||
fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
|
||||
|
||||
if (typeid(data_t) == typeid(real_t))
|
||||
{
|
||||
npr_ = n_[2] + 2;
|
||||
npc_ = n_[2] / 2 + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
npr_ = n_[2];
|
||||
npc_ = n_[2];
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
nhalf_[i] = n_[i] / 2;
|
||||
kfac_[i] = 2.0 * M_PI / length_[i];
|
||||
dx_[i] = length_[i] / n_[i];
|
||||
|
||||
global_range_.x1_[i] = 0;
|
||||
global_range_.x2_[i] = n_[i];
|
||||
}
|
||||
global_range_.x1_[0] = (int)local_0_start_;
|
||||
global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_);
|
||||
|
||||
if (space_ == rspace_id)
|
||||
{
|
||||
sizes_[0] = (int)local_0_size_;
|
||||
sizes_[1] = n_[1];
|
||||
sizes_[2] = n_[2];
|
||||
sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension
|
||||
}
|
||||
else
|
||||
{
|
||||
sizes_[0] = (int)local_1_size_;
|
||||
sizes_[1] = n_[0];
|
||||
sizes_[2] = npc_;
|
||||
sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
|
||||
}
|
||||
global_range_.x1_[i] = 0;
|
||||
global_range_.x2_[i] = n_[i];
|
||||
}
|
||||
global_range_.x1_[0] = (int)local_0_start_;
|
||||
global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_);
|
||||
|
||||
if (space_ == rspace_id)
|
||||
{
|
||||
sizes_[0] = (int)local_0_size_;
|
||||
sizes_[1] = n_[1];
|
||||
sizes_[2] = n_[2];
|
||||
sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension
|
||||
}
|
||||
else
|
||||
{
|
||||
sizes_[0] = (int)local_1_size_;
|
||||
sizes_[1] = n_[0];
|
||||
sizes_[2] = npc_;
|
||||
sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
|
||||
}
|
||||
#else
|
||||
csoca::flog << "MPI is required for distributed FFT arrays!" << std::endl;
|
||||
throw std::runtime_error("MPI is required for distributed FFT arrays!");
|
||||
#endif //// of #ifdef #else USE_MPI ////////////////////////////////////////////////////////////////////////////////////
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_t>
|
||||
void Grid_FFT<data_t>::ApplyNorm(void)
|
||||
template <typename data_t,bool bdistributed>
|
||||
void Grid_FFT<data_t,bdistributed>::ApplyNorm(void)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < ntot_; ++i)
|
||||
data_[i] *= fft_norm_fac_;
|
||||
}
|
||||
|
||||
template <typename data_t>
|
||||
void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
|
||||
template <typename data_t,bool bdistributed>
|
||||
void Grid_FFT<data_t,bdistributed>::FourierTransformForward(bool do_transform)
|
||||
{
|
||||
#if defined(USE_MPI)
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
@ -217,8 +194,8 @@ void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
|
|||
}
|
||||
}
|
||||
|
||||
template <typename data_t>
|
||||
void Grid_FFT<data_t>::FourierTransformBackward(bool do_transform)
|
||||
template <typename data_t,bool bdistributed>
|
||||
void Grid_FFT<data_t,bdistributed>::FourierTransformBackward(bool do_transform)
|
||||
{
|
||||
#if defined(USE_MPI)
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
@ -269,15 +246,142 @@ void create_hdf5(std::string Filename)
|
|||
H5Fclose(HDF_FileID);
|
||||
}
|
||||
|
||||
template <typename data_t>
|
||||
void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname) const
|
||||
|
||||
template <typename data_t,bool bdistributed>
|
||||
void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
|
||||
{
|
||||
// FIXME: cleanup duplicate code in this function!
|
||||
if( !bdistributed && CONFIG::MPI_task_rank==0 ){
|
||||
|
||||
hid_t file_id, dset_id; /* file and dataset identifiers */
|
||||
hid_t filespace, memspace; /* file and memory dataspace identifiers */
|
||||
hsize_t offset[3], count[3];
|
||||
hid_t dtype_id = H5T_NATIVE_FLOAT;
|
||||
hid_t plist_id = H5P_DEFAULT;
|
||||
|
||||
if (!file_exists(fname))
|
||||
create_hdf5(fname);
|
||||
|
||||
file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
|
||||
|
||||
for (int i = 0; i < 3; ++i)
|
||||
count[i] = size(i);
|
||||
|
||||
if (typeid(data_t) == typeid(float))
|
||||
dtype_id = H5T_NATIVE_FLOAT;
|
||||
else if (typeid(data_t) == typeid(double))
|
||||
dtype_id = H5T_NATIVE_DOUBLE;
|
||||
else if (typeid(data_t) == typeid(std::complex<float>))
|
||||
{
|
||||
dtype_id = H5T_NATIVE_FLOAT;
|
||||
}
|
||||
else if (typeid(data_t) == typeid(std::complex<double>))
|
||||
{
|
||||
dtype_id = H5T_NATIVE_DOUBLE;
|
||||
}
|
||||
|
||||
filespace = H5Screate_simple(3, count, NULL);
|
||||
dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
|
||||
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
||||
H5Sclose(filespace);
|
||||
|
||||
hsize_t slice_sz = size(1) * size(2);
|
||||
|
||||
real_t *buf = new real_t[slice_sz];
|
||||
|
||||
count[0] = 1;
|
||||
count[1] = size(1);
|
||||
count[2] = size(2);
|
||||
|
||||
offset[1] = 0;
|
||||
offset[2] = 0;
|
||||
|
||||
memspace = H5Screate_simple(3, count, NULL);
|
||||
filespace = H5Dget_space(dset_id);
|
||||
|
||||
for (size_t i = 0; i < size(0); ++i)
|
||||
{
|
||||
offset[0] = i;
|
||||
for (size_t j = 0; j < size(1); ++j)
|
||||
{
|
||||
for (size_t k = 0; k < size(2); ++k)
|
||||
{
|
||||
if( this->space_ == rspace_id )
|
||||
buf[j * size(2) + k] = std::real(relem(i, j, k));
|
||||
else
|
||||
buf[j * size(2) + k] = std::real(kelem(i, j, k));
|
||||
}
|
||||
}
|
||||
|
||||
H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL);
|
||||
H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
|
||||
}
|
||||
|
||||
H5Sclose(filespace);
|
||||
H5Sclose(memspace);
|
||||
|
||||
// H5Sclose(filespace);
|
||||
H5Dclose(dset_id);
|
||||
|
||||
if (typeid(data_t) == typeid(std::complex<float>) ||
|
||||
typeid(data_t) == typeid(std::complex<double>) ||
|
||||
this->space_ == kspace_id )
|
||||
{
|
||||
datasetname += std::string(".im");
|
||||
|
||||
for (int i = 0; i < 3; ++i)
|
||||
count[i] = size(i);
|
||||
|
||||
filespace = H5Screate_simple(3, count, NULL);
|
||||
dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
|
||||
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
||||
H5Sclose(filespace);
|
||||
|
||||
count[0] = 1;
|
||||
|
||||
for (size_t i = 0; i < size(0); ++i)
|
||||
{
|
||||
offset[0] = i;
|
||||
|
||||
for (size_t j = 0; j < size(1); ++j)
|
||||
for (size_t k = 0; k < size(2); ++k)
|
||||
{
|
||||
if( this->space_ == rspace_id )
|
||||
buf[j * size(2) + k] = std::imag(relem(i, j, k));
|
||||
else
|
||||
buf[j * size(2) + k] = std::imag(kelem(i, j, k));
|
||||
}
|
||||
|
||||
memspace = H5Screate_simple(3, count, NULL);
|
||||
filespace = H5Dget_space(dset_id);
|
||||
|
||||
H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count,
|
||||
NULL);
|
||||
|
||||
H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
|
||||
|
||||
H5Sclose(memspace);
|
||||
H5Sclose(filespace);
|
||||
}
|
||||
|
||||
H5Dclose(dset_id);
|
||||
|
||||
delete[] buf;
|
||||
}
|
||||
|
||||
H5Fclose(file_id);
|
||||
return;
|
||||
}
|
||||
|
||||
if( !bdistributed && CONFIG::MPI_task_rank!=0 ) return;
|
||||
|
||||
hid_t file_id, dset_id; /* file and dataset identifiers */
|
||||
hid_t filespace, memspace; /* file and memory dataspace identifiers */
|
||||
hsize_t offset[3], count[3];
|
||||
hid_t dtype_id = H5T_NATIVE_FLOAT;
|
||||
hid_t plist_id;
|
||||
|
||||
|
||||
#if defined(USE_MPI)
|
||||
|
||||
int mpi_size, mpi_rank;
|
||||
|
@ -391,7 +495,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
|
|||
{
|
||||
for (size_t k = 0; k < size(2); ++k)
|
||||
{
|
||||
buf[j * size(2) + k] = std::real(relem(i, j, k));
|
||||
if( this->space_ == rspace_id )
|
||||
buf[j * size(2) + k] = std::real(relem(i, j, k));
|
||||
else
|
||||
buf[j * size(2) + k] = std::real(kelem(i, j, k));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -410,7 +517,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
|
|||
H5Dclose(dset_id);
|
||||
|
||||
if (typeid(data_t) == typeid(std::complex<float>) ||
|
||||
typeid(data_t) == typeid(std::complex<double>))
|
||||
typeid(data_t) == typeid(std::complex<double>) ||
|
||||
this->space_ == kspace_id )
|
||||
{
|
||||
datasetname += std::string(".im");
|
||||
|
||||
|
@ -460,7 +568,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
|
|||
for (size_t j = 0; j < size(1); ++j)
|
||||
for (size_t k = 0; k < size(2); ++k)
|
||||
{
|
||||
buf[j * size(2) + k] = std::imag(relem(i, j, k));
|
||||
if( this->space_ == rspace_id )
|
||||
buf[j * size(2) + k] = std::imag(relem(i, j, k));
|
||||
else
|
||||
buf[j * size(2) + k] = std::imag(kelem(i, j, k));
|
||||
}
|
||||
|
||||
memspace = H5Screate_simple(3, count, NULL);
|
||||
|
@ -493,8 +604,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
|
|||
|
||||
#include <iomanip>
|
||||
|
||||
template <typename data_t>
|
||||
void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
|
||||
template <typename data_t,bool bdistributed>
|
||||
void Grid_FFT<data_t,bdistributed>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
|
||||
{
|
||||
double logvmin = std::log10(vmin);
|
||||
double logvmax = std::log10(vmax);
|
||||
|
@ -545,8 +656,8 @@ void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, do
|
|||
#endif
|
||||
}
|
||||
|
||||
template <typename data_t>
|
||||
void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
|
||||
template <typename data_t,bool bdistributed>
|
||||
void Grid_FFT<data_t,bdistributed>::Write_PowerSpectrum(std::string ofname)
|
||||
{
|
||||
std::vector<double> bin_k, bin_P, bin_eP;
|
||||
std::vector<size_t> bin_count;
|
||||
|
@ -575,8 +686,8 @@ void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
|
|||
#endif
|
||||
}
|
||||
|
||||
template <typename data_t>
|
||||
void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
|
||||
template <typename data_t,bool bdistributed>
|
||||
void Grid_FFT<data_t,bdistributed>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
|
||||
{
|
||||
this->FourierTransformForward();
|
||||
|
||||
|
@ -656,5 +767,7 @@ void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::ve
|
|||
|
||||
/********************************************************************************************/
|
||||
|
||||
template class Grid_FFT<real_t>;
|
||||
template class Grid_FFT<ccomplex_t>;
|
||||
template class Grid_FFT<real_t,true>;
|
||||
template class Grid_FFT<real_t,false>;
|
||||
template class Grid_FFT<ccomplex_t,true>;
|
||||
template class Grid_FFT<ccomplex_t,false>;
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include <ic_generator.hh>
|
||||
#include <particle_generator.hh>
|
||||
#include <particle_plt.hh>
|
||||
|
||||
#include <unistd.h> // for unlink
|
||||
|
||||
|
@ -194,6 +195,13 @@ int Run( ConfigFile& the_config )
|
|||
// NaiveConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
// Create PLT gradient operator
|
||||
//--------------------------------------------------------------------
|
||||
// particle::lattice_gradient lg( the_config );
|
||||
op::fourier_gradient lg( the_config );
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
std::vector<cosmo_species> species_list;
|
||||
species_list.push_back(cosmo_species::dm);
|
||||
if (bDoBaryons)
|
||||
|
@ -482,8 +490,8 @@ int Run( ConfigFile& the_config )
|
|||
size_t idx = phi.get_idx(i,j,k);
|
||||
auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx);
|
||||
// divide by Lbox, because displacement is in box units for output plugin
|
||||
tmp.kelem(idx) = lunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot
|
||||
+ phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) );
|
||||
tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot
|
||||
+ lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -519,8 +527,11 @@ int Run( ConfigFile& the_config )
|
|||
// divide by Lbox, because displacement is in box units for output plugin
|
||||
auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx));
|
||||
|
||||
tmp.kelem(idx) = vunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot_v
|
||||
+ vfac3 * (phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) );
|
||||
tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v
|
||||
+ vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) );
|
||||
|
||||
// correct velocity with PLT mode growth rate
|
||||
tmp.kelem(idx) *= lg.vfac_corr(tmp.get_k3(i,j,k));
|
||||
|
||||
if( bAddExternalTides ){
|
||||
// modify velocities with anisotropic expansion factor**2
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include <general.hh>
|
||||
#include <ic_generator.hh>
|
||||
#include <particle_plt.hh>
|
||||
|
||||
|
||||
// initialise with "default" values
|
||||
|
@ -182,6 +183,8 @@ int main( int argc, char** argv )
|
|||
// do the job...
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
ic_generator::Run( the_config );
|
||||
|
||||
// particle::test_plt();
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(USE_MPI)
|
||||
|
|
1408
src/old/output_gadget2___original.cc
Normal file
1408
src/old/output_gadget2___original.cc
Normal file
File diff suppressed because it is too large
Load diff
|
@ -82,7 +82,11 @@ public:
|
|||
for (size_t j = 0; j < nres_; ++j)
|
||||
{
|
||||
ptrdiff_t jj = (j>0)? nres_ - j : 0;
|
||||
gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]);
|
||||
if( g.is_distributed() )
|
||||
gsl_rng_set( pRandomGenerator_, SeedTable_[j * nres_ + i]);
|
||||
else
|
||||
gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]);
|
||||
|
||||
for (size_t k = 0; k < g.size(2); ++k)
|
||||
{
|
||||
double phase = gsl_rng_uniform(pRandomGenerator_) * 2 * M_PI;
|
||||
|
@ -101,15 +105,28 @@ public:
|
|||
if (k > 0) {
|
||||
if (i_in_range) g.kelem(ip,j,k) = zrand;
|
||||
} else{ /* k=0 plane needs special treatment */
|
||||
if (i == 0) {
|
||||
if (j < nres_ / 2 && i_in_range)
|
||||
{
|
||||
g.kelem(ip,j,k) = zrand;
|
||||
g.kelem(ip,jj,k) = std::conj(zrand);
|
||||
if( g.is_distributed() ){
|
||||
if (j == 0) {
|
||||
if (i < nres_ / 2 && i_in_range)
|
||||
{
|
||||
if(i_in_range) g.kelem(ip,jj,k) = zrand;
|
||||
if(ii_in_range) g.kelem(iip,j,k) = std::conj(zrand);
|
||||
}
|
||||
} else if (j < nres_ / 2) {
|
||||
if(i_in_range) g.kelem(ip,j,k) = zrand;
|
||||
if(ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
|
||||
}
|
||||
}else{
|
||||
if (i == 0) {
|
||||
if (j < nres_ / 2 && i_in_range)
|
||||
{
|
||||
g.kelem(ip,j,k) = zrand;
|
||||
g.kelem(ip,jj,k) = std::conj(zrand);
|
||||
}
|
||||
} else if (i < nres_ / 2) {
|
||||
if(i_in_range) g.kelem(ip,j,k) = zrand;
|
||||
if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
|
||||
}
|
||||
} else if (i < nres_ / 2) {
|
||||
if(i_in_range) g.kelem(ip,j,k) = zrand;
|
||||
if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
344
src/plugins/transfer_CAMB_file.cc
Normal file
344
src/plugins/transfer_CAMB_file.cc
Normal file
|
@ -0,0 +1,344 @@
|
|||
// transfer_CAMB.cc - This file is part of MUSIC -
|
||||
// a code to generate multi-scale initial conditions for cosmological simulations
|
||||
|
||||
// Copyright (C) 2019 Oliver Hahn
|
||||
|
||||
#include <gsl/gsl_errno.h>
|
||||
#include <gsl/gsl_spline.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "transfer_function_plugin.hh"
|
||||
|
||||
const double tiny = 1e-30;
|
||||
|
||||
class transfer_CAMB_file_plugin : public TransferFunction_plugin
|
||||
{
|
||||
|
||||
private:
|
||||
std::string m_filename_Pk, m_filename_Tk;
|
||||
std::vector<double> m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon;
|
||||
std::vector<double> m_tab_Tvk_tot, m_tab_Tvk_cdm, m_tab_Tvk_baryon;
|
||||
gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon;
|
||||
gsl_interp_accel *acc_vtot, *acc_vcdm, *acc_vbaryon;
|
||||
gsl_spline *spline_tot, *spline_cdm, *spline_baryon;
|
||||
gsl_spline *spline_vtot, *spline_vcdm, *spline_vbaryon;
|
||||
|
||||
double m_kmin, m_kmax, m_Omega_b, m_Omega_m, m_zstart;
|
||||
unsigned m_nlines;
|
||||
|
||||
bool m_linbaryoninterp;
|
||||
|
||||
void read_table(void)
|
||||
{
|
||||
|
||||
m_nlines = 0;
|
||||
m_linbaryoninterp = false;
|
||||
|
||||
#ifdef WITH_MPI
|
||||
if (MPI::COMM_WORLD.Get_rank() == 0)
|
||||
{
|
||||
#endif
|
||||
csoca::ilog.Print("Reading tabulated transfer function data from file \n \'%s\'", m_filename_Tk.c_str());
|
||||
|
||||
std::string line;
|
||||
std::ifstream ifs(m_filename_Tk.c_str());
|
||||
|
||||
if (!ifs.good())
|
||||
throw std::runtime_error("Could not find transfer function file \'" + m_filename_Tk + "\'");
|
||||
|
||||
m_tab_k.clear();
|
||||
m_tab_Tk_tot.clear();
|
||||
m_tab_Tk_cdm.clear();
|
||||
m_tab_Tk_baryon.clear();
|
||||
m_tab_Tvk_tot.clear();
|
||||
m_tab_Tvk_cdm.clear(); //>[150609SH: add]
|
||||
m_tab_Tvk_baryon.clear(); //>[150609SH: add]
|
||||
|
||||
m_kmin = 1e30;
|
||||
m_kmax = -1e30;
|
||||
std::ofstream ofs("dump_transfer.txt");
|
||||
|
||||
while (!ifs.eof())
|
||||
{
|
||||
getline(ifs, line);
|
||||
if (ifs.eof())
|
||||
break;
|
||||
|
||||
// OH: ignore line if it has a comment:
|
||||
if (line.find("#") != std::string::npos)
|
||||
continue;
|
||||
|
||||
std::stringstream ss(line);
|
||||
|
||||
double k, Tkc, Tkb, Tktot, Tkvtot, Tkvc, Tkvb, dummy;
|
||||
|
||||
ss >> k;
|
||||
ss >> Tkc; // cdm
|
||||
ss >> Tkb; // baryon
|
||||
ss >> dummy; // photon
|
||||
ss >> dummy; // nu
|
||||
ss >> dummy; // mass_nu
|
||||
ss >> Tktot; // total
|
||||
ss >> dummy; // no_nu
|
||||
ss >> dummy; // total_de
|
||||
ss >> dummy; // Weyl
|
||||
ss >> Tkvc; // v_cdm
|
||||
ss >> Tkvb; // v_b
|
||||
ss >> dummy; // v_b-v_cdm
|
||||
|
||||
if (ss.bad() || ss.fail())
|
||||
{
|
||||
csoca::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!");
|
||||
throw std::runtime_error("Error reading transfer function file \'" +
|
||||
m_filename_Tk + "\'");
|
||||
}
|
||||
|
||||
if (m_Omega_b < 1e-6)
|
||||
Tkvtot = Tktot;
|
||||
else
|
||||
Tkvtot = ((m_Omega_m - m_Omega_b) * Tkvc + m_Omega_b * Tkvb) / m_Omega_m; //MvD
|
||||
|
||||
m_linbaryoninterp |= Tkb < 0.0 || Tkvb < 0.0;
|
||||
|
||||
m_tab_k.push_back(log10(k));
|
||||
|
||||
m_tab_Tk_tot.push_back(Tktot);
|
||||
m_tab_Tk_baryon.push_back(Tkb);
|
||||
m_tab_Tk_cdm.push_back(Tkc);
|
||||
m_tab_Tvk_tot.push_back(Tkvtot);
|
||||
m_tab_Tvk_baryon.push_back(Tkvb);
|
||||
m_tab_Tvk_cdm.push_back(Tkvc);
|
||||
|
||||
++m_nlines;
|
||||
|
||||
if (k < m_kmin)
|
||||
m_kmin = k;
|
||||
if (k > m_kmax)
|
||||
m_kmax = k;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < m_tab_k.size(); ++i)
|
||||
{
|
||||
m_tab_Tk_tot[i] = log10(m_tab_Tk_tot[i]);
|
||||
m_tab_Tk_cdm[i] = log10(m_tab_Tk_cdm[i]);
|
||||
m_tab_Tvk_cdm[i] = log10(m_tab_Tvk_cdm[i]);
|
||||
m_tab_Tvk_tot[i] = log10(m_tab_Tvk_tot[i]);
|
||||
|
||||
if (!m_linbaryoninterp)
|
||||
{
|
||||
m_tab_Tk_baryon[i] = log10(m_tab_Tk_baryon[i]);
|
||||
m_tab_Tvk_baryon[i] = log10(m_tab_Tvk_baryon[i]);
|
||||
}
|
||||
}
|
||||
|
||||
ifs.close();
|
||||
|
||||
csoca::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines);
|
||||
|
||||
if (m_linbaryoninterp)
|
||||
csoca::ilog.Print("Using log-lin interpolation for baryons\n (TF is not "
|
||||
"positive definite)");
|
||||
|
||||
#ifdef WITH_MPI
|
||||
}
|
||||
|
||||
unsigned n = m_tab_k.size();
|
||||
MPI::COMM_WORLD.Bcast(&n, 1, MPI_UNSIGNED, 0);
|
||||
|
||||
if (MPI::COMM_WORLD.Get_rank() > 0)
|
||||
{
|
||||
m_tab_k.assign(n, 0);
|
||||
m_tab_Tk_tot.assign(n, 0);
|
||||
m_tab_Tk_cdm.assign(n, 0);
|
||||
m_tab_Tk_baryon.assign(n, 0);
|
||||
m_tab_Tvk_tot.assign(n, 0);
|
||||
m_tab_Tvk_cdm.assign(n, 0);
|
||||
m_tab_Tvk_baryon.assign(n, 0);
|
||||
}
|
||||
|
||||
MPI::COMM_WORLD.Bcast(&m_tab_k[0], n, MPI_DOUBLE, 0);
|
||||
MPI::COMM_WORLD.Bcast(&m_tab_Tk_tot[0], n, MPI_DOUBLE, 0);
|
||||
MPI::COMM_WORLD.Bcast(&m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0);
|
||||
MPI::COMM_WORLD.Bcast(&m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0);
|
||||
MPI::COMM_WORLD.Bcast(&m_tab_Tvk_tot[0], n, MPI_DOUBLE, 0);
|
||||
MPI::COMM_WORLD.Bcast(&m_tab_Tvk_cdm[0], n, MPI_DOUBLE, 0);
|
||||
MPI::COMM_WORLD.Bcast(&m_tab_Tvk_baryon[0], n, MPI_DOUBLE, 0);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
transfer_CAMB_file_plugin(ConfigFile &cf)
|
||||
: TransferFunction_plugin(cf)
|
||||
{
|
||||
m_filename_Tk = pcf_->GetValue<std::string>("cosmology", "transfer_file");
|
||||
m_Omega_m = cf.GetValue<double>("cosmology", "Omega_m"); //MvD
|
||||
m_Omega_b = cf.GetValue<double>("cosmology", "Omega_b"); //MvD
|
||||
m_zstart = cf.GetValue<double>("setup", "zstart"); //MvD
|
||||
|
||||
read_table();
|
||||
|
||||
acc_tot = gsl_interp_accel_alloc();
|
||||
acc_cdm = gsl_interp_accel_alloc();
|
||||
acc_baryon = gsl_interp_accel_alloc();
|
||||
acc_vtot = gsl_interp_accel_alloc();
|
||||
acc_vcdm = gsl_interp_accel_alloc();
|
||||
acc_vbaryon = gsl_interp_accel_alloc();
|
||||
|
||||
spline_tot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
|
||||
spline_cdm = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
|
||||
spline_baryon = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
|
||||
spline_vtot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
|
||||
spline_vcdm =
|
||||
gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
|
||||
spline_vbaryon =
|
||||
gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
|
||||
|
||||
gsl_spline_init(spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size());
|
||||
gsl_spline_init(spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size());
|
||||
gsl_spline_init(spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0],
|
||||
m_tab_k.size());
|
||||
gsl_spline_init(spline_vtot, &m_tab_k[0], &m_tab_Tvk_tot[0],
|
||||
m_tab_k.size());
|
||||
gsl_spline_init(spline_vcdm, &m_tab_k[0], &m_tab_Tvk_cdm[0],
|
||||
m_tab_k.size());
|
||||
gsl_spline_init(spline_vbaryon, &m_tab_k[0], &m_tab_Tvk_baryon[0],
|
||||
m_tab_k.size());
|
||||
|
||||
tf_distinct_ = true; // different density between CDM v.s. Baryon
|
||||
tf_withvel_ = true; // using velocity transfer function
|
||||
}
|
||||
|
||||
~transfer_CAMB_file_plugin()
|
||||
{
|
||||
gsl_spline_free(spline_tot);
|
||||
gsl_spline_free(spline_cdm);
|
||||
gsl_spline_free(spline_baryon);
|
||||
gsl_spline_free(spline_vtot);
|
||||
gsl_spline_free(spline_vcdm);
|
||||
gsl_spline_free(spline_vbaryon);
|
||||
|
||||
gsl_interp_accel_free(acc_tot);
|
||||
gsl_interp_accel_free(acc_cdm);
|
||||
gsl_interp_accel_free(acc_baryon);
|
||||
gsl_interp_accel_free(acc_vtot);
|
||||
gsl_interp_accel_free(acc_vcdm);
|
||||
gsl_interp_accel_free(acc_vbaryon);
|
||||
}
|
||||
|
||||
// linear interpolation in log-log
|
||||
inline double extrap_right(double k, const tf_type &type) const
|
||||
{
|
||||
int n = m_tab_k.size() - 1, n1 = n - 1;
|
||||
|
||||
double v1(1.0), v2(1.0);
|
||||
|
||||
double lk = log10(k);
|
||||
double dk = m_tab_k[n] - m_tab_k[n1];
|
||||
double delk = lk - m_tab_k[n];
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case cdm:
|
||||
v1 = m_tab_Tk_cdm[n1];
|
||||
v2 = m_tab_Tk_cdm[n];
|
||||
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
|
||||
case baryon:
|
||||
v1 = m_tab_Tk_baryon[n1];
|
||||
v2 = m_tab_Tk_baryon[n];
|
||||
if (m_linbaryoninterp)
|
||||
return std::max((v2 - v1) / dk * (delk) + v2, tiny);
|
||||
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
|
||||
case vtotal: //>[150609SH: add]
|
||||
v1 = m_tab_Tvk_tot[n1];
|
||||
v2 = m_tab_Tvk_tot[n];
|
||||
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
|
||||
case vcdm: //>[150609SH: add]
|
||||
v1 = m_tab_Tvk_cdm[n1];
|
||||
v2 = m_tab_Tvk_cdm[n];
|
||||
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
|
||||
case vbaryon: //>[150609SH: add]
|
||||
v1 = m_tab_Tvk_baryon[n1];
|
||||
v2 = m_tab_Tvk_baryon[n];
|
||||
if (m_linbaryoninterp)
|
||||
return std::max((v2 - v1) / dk * (delk) + v2, tiny);
|
||||
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
|
||||
case total:
|
||||
v1 = m_tab_Tk_tot[n1];
|
||||
v2 = m_tab_Tk_tot[n];
|
||||
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
|
||||
default:
|
||||
throw std::runtime_error(
|
||||
"Invalid type requested in transfer function evaluation");
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
inline double compute(double k, tf_type type) const
|
||||
{
|
||||
// use constant interpolation on the left side of the tabulated values
|
||||
if (k < m_kmin)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case cdm:
|
||||
return pow(10.0, m_tab_Tk_cdm[0]);
|
||||
case baryon:
|
||||
if (m_linbaryoninterp)
|
||||
return m_tab_Tk_baryon[0];
|
||||
return pow(10.0, m_tab_Tk_baryon[0]);
|
||||
case vtotal:
|
||||
return pow(10.0, m_tab_Tvk_tot[0]);
|
||||
case vcdm:
|
||||
return pow(10.0, m_tab_Tvk_cdm[0]);
|
||||
case vbaryon:
|
||||
if (m_linbaryoninterp)
|
||||
return m_tab_Tvk_baryon[0];
|
||||
return pow(10.0, m_tab_Tvk_baryon[0]);
|
||||
case total:
|
||||
return pow(10.0, m_tab_Tk_tot[0]);
|
||||
default:
|
||||
throw std::runtime_error(
|
||||
"Invalid type requested in transfer function evaluation");
|
||||
}
|
||||
}
|
||||
// use linear interpolation on the right side of the tabulated values
|
||||
else if (k > m_kmax)
|
||||
return extrap_right(k, type);
|
||||
|
||||
double lk = log10(k);
|
||||
switch (type)
|
||||
{
|
||||
case cdm:
|
||||
return pow(10.0, gsl_spline_eval(spline_cdm, lk, acc_cdm));
|
||||
case baryon:
|
||||
if (m_linbaryoninterp)
|
||||
return gsl_spline_eval(spline_baryon, lk, acc_baryon);
|
||||
return pow(10.0, gsl_spline_eval(spline_baryon, lk, acc_baryon));
|
||||
case vtotal:
|
||||
return pow(10.0, gsl_spline_eval(spline_vtot, lk, acc_vtot)); //MvD
|
||||
case vcdm:
|
||||
return pow(10.0, gsl_spline_eval(spline_vcdm, lk, acc_vcdm));
|
||||
case vbaryon:
|
||||
if (m_linbaryoninterp)
|
||||
return gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon);
|
||||
return pow(10.0, gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon));
|
||||
case total:
|
||||
return pow(10.0, gsl_spline_eval(spline_tot, lk, acc_tot));
|
||||
default:
|
||||
throw std::runtime_error(
|
||||
"Invalid type requested in transfer function evaluation");
|
||||
}
|
||||
}
|
||||
|
||||
inline double get_kmin(void) const { return pow(10.0, m_tab_k[1]); }
|
||||
|
||||
inline double get_kmax(void) const { return pow(10.0, m_tab_k[m_tab_k.size() - 2]); }
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
TransferFunction_plugin_creator_concrete<transfer_CAMB_file_plugin> creator("CAMB_file");
|
||||
}
|
|
@ -434,5 +434,5 @@ namespace
|
|||
TransferFunction_plugin_creator_concrete<transfer_eisenstein_plugin> creator("eisenstein");
|
||||
TransferFunction_plugin_creator_concrete<transfer_eisenstein_wdm_plugin> creator2("eisenstein_wdm");
|
||||
TransferFunction_plugin_creator_concrete<transfer_eisenstein_cdmbino_plugin> creator3("eisenstein_cdmbino");
|
||||
TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
|
||||
// TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
|
||||
} // namespace
|
||||
|
|
Loading…
Reference in a new issue