1
0
Fork 0
mirror of https://github.com/cosmo-sims/monofonIC.git synced 2024-09-19 17:03:45 +02:00

Merge branch 'dev-plt' of https://bitbucket.org/ohahn/monofonic into develop

This commit is contained in:
Oliver Hahn 2020-01-25 16:36:08 +01:00
commit a2f4cbb434
20 changed files with 3459 additions and 329 deletions

View file

@ -5,7 +5,7 @@ project(monofonIC)
# include class submodule
include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic")
find_package(PkgConfig REQUIRED)
@ -49,17 +49,22 @@ if(ENABLE_MPI)
endif(ENABLE_MPI)
########################################################################################################################
# FFTW
cmake_policy(SET CMP0074 NEW)
if(POLICY CMP0074)
cmake_policy(SET CMP0074 NEW)
endif()
if(ENABLE_MPI)
find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI)
else()
find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS)
endif(ENABLE_MPI)
########################################################################################################################
# GSL
find_package(GSL REQUIRED)
########################################################################################################################
# HDF5
find_package(HDF5 REQUIRED)

View file

@ -17,4 +17,30 @@ Create build directory, configure, and build:
make
this should create an executable in the build directory.
There is an example parameter file 'example.conf' in the main directory
If you run into problems with CMake not being able to find your local FFTW3 or HDF5 installation, it is best to give the path directly as
FFTW3_ROOT=<path> HDF5_ROOT=<path> ccmake ..
make sure to delete previous files generated by CMake before reconfiguring like this.
If you want to build on macOS, then it is strongly recommended to use GNU (or Intel) compilers instead of Apple's Clang. Install them e.g.
via homebrew and then configure cmake to use them instead of the macOS default compiler via
CC=gcc-9 CXX=g++-9 ccmake ..
This is necessary since Apple's compilers haven't supported OpenMP for years.
## Running
There is an example parameter file 'example.conf' in the main directory. Possible options are explained in it, it can be run
as a simple argument, e.g. from within the build directory:
./monofonic ../example.conf
If you want to run with MPI, you need to enable MPI support via ccmake. Then you can launch in hybrid MPI+threads mode by
specifying the desired number of threads per task in the config file, and the number of tasks to be launched via
mpirun -np 16 ./monofonic <path to config file>
It will then run with 16 tasks times the number of threads per task specified in the config file.

View file

@ -1,23 +1,25 @@
[setup]
# number of grid cells per linear dimension for calculations = particles for sc initial load
GridRes = 128
GridRes = 128
# length of the box in Mpc/h
BoxLength = 250
BoxLength = 250
# starting redshift
zstart = 100.0
zstart = 49.0
# order of the LPT to be used (1,2 or 3)
LPTorder = 3
LPTorder = 3
# also do baryon ICs?
DoBaryons = no
DoBaryons = no
# do mode fixing à la Angulo&Pontzen
DoFixing = no
DoFixing = no
# particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!)
ParticleLoad = sc
ParticleLoad = sc
[cosmology]
transfer = CLASS
ztarget = 100.0
#transfer = eisenstein
ztarget = 0.0
# transfer = eisenstein
# transfer = file_CAMB
# transfer_file = wmap5_transfer_out_z0.dat
Omega_m = 0.302
Omega_b = 0.045
Omega_L = 0.698
@ -26,9 +28,9 @@ sigma_8 = 0.811
nspec = 0.961
# anisotropic large scale tidal field
#LSS_aniso_lx = 0.1
#LSS_aniso_ly = 0.1
#LSS_aniso_lz = -0.2
#LSS_aniso_lx = +0.1
#LSS_aniso_ly = +0.1
#LSS_aniso_lz = -0.2
[random]
generator = NGENIC
@ -37,18 +39,18 @@ seed = 9001
[testing]
# enables diagnostic output
# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
test = convergence
test = none
[execution]
NumThreads = 4
NumThreads = 4
[output]
fname_hdf5 = output.hdf5
fbase_analysis = output
fname_hdf5 = output_sch.hdf5
fbase_analysis = output
format = gadget2
filename = ics_gadget.dat
UseLongids = false
format = gadget2
filename = ics_gadget.dat
UseLongids = false
#format = generic
#filename = debug.hdf5
@ -58,5 +60,3 @@ UseLongids = false
#filename = ics_ramses
#grafic_use_SPT = yes

33
example_testing.conf Normal file
View file

@ -0,0 +1,33 @@
[setup]
GridRes = 256
BoxLength = 6.28318530718
zstart = 0.0
LPTorder = 1
SymplecticPT = no
DoFixing = no
[execution]
NumThreads = 4
[output]
fname_hdf5 = output.hdf5
fbase_analysis = output
#format = gadget2
#filename = ics_gadget.dat
format = generic
filename = debug.hdf5
generic_out_eulerian = yes
[random]
generator = NGENIC
seed = 9001
[cosmology]
#transfer = CLASS
transfer = eisenstein
Omega_m = 1.0
Omega_b = 0.045
Omega_L = 0.0
H0 = 70.3
sigma_8 = 0.811
nspec = 0.961

1
external/fftwpp vendored Submodule

@ -0,0 +1 @@
Subproject commit ec6b82cc1122ba029a7a7142cf836014e992e68c

62
ics.conf Normal file
View file

@ -0,0 +1,62 @@
[setup]
# number of grid cells per linear dimension for calculations = particles for sc initial load
GridRes = 128
# length of the box in Mpc/h
BoxLength = 200
# starting redshift
zstart = 24.0
# order of the LPT to be used (1,2 or 3)
LPTorder = 1
# also do baryon ICs?
DoBaryons = no
# do mode fixing à la Angulo&Pontzen
DoFixing = yes
# particle load, can be 'sc' (1x), 'bcc' (2x), 'fcc' (4x), or 'rsc' (8x)
ParticleLoad = sc
[testing]
# enables diagnostic output
# can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence'
#test = potentials_and_densities
#test = convergence
test = none
[execution]
NumThreads = 1
[output]
fname_hdf5 = output.hdf5
fbase_analysis = output
#format = gadget2
#filename = ics_gadget.dat
format = generic
filename = debug.hdf5
#generic_out_eulerian = yes
#format = grafic2
#filename = ics_ramses
#grafic_use_SPT = yes
[random]
generator = NGENIC
seed = 9001
[cosmology]
transfer = eisenstein
#transfer = CLASS
#transfer = eisenstein_wdm
#WDMmass = 0.1
Omega_m = 0.302
Omega_b = 0.045
Omega_L = 0.698
H0 = 70.3
sigma_8 = 0.811
nspec = 0.961
# anisotropic large scale tidal field
#LSS_aniso_lx = 0.1
#LSS_aniso_ly = 0.1
#LSS_aniso_lz = -0.2

View file

@ -127,14 +127,19 @@ public:
return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect);
}
inline static double H_of_a(double a, void *Params)
inline static double H_of_a(double a, const void *Params)
{
CosmologyParameters *cosm = (CosmologyParameters *)Params;
const CosmologyParameters *cosm = (CosmologyParameters *)Params;
double a2 = a * a;
double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a));
return Ha;
}
inline double H_of_a( double a ) const
{
return 100.0 * this->H_of_a(a,reinterpret_cast<const void*>(&this->cosmo_param_));
}
inline static double Hprime_of_a(double a, void *Params)
{
CosmologyParameters *cosm = (CosmologyParameters *)Params;
@ -168,10 +173,7 @@ public:
*/
inline real_t CalcGrowthRate( real_t a )
{
#warning CalcGrowthRate is only correct if dark energy is a cosmological constant, need to upgrade calculator...
real_t y = cosmo_param_.Omega_m*(1.0/a-1.0) + cosmo_param_.Omega_DE*(a*a-1.0) + 1.0;
real_t fact = integrate( &fIntegrand, 1e-6, a, (void*)&cosmo_param_ );
return (cosmo_param_.Omega_DE*a*a-0.5*cosmo_param_.Omega_m/a)/y - 1.0 + a*fIntegrand(a,(void*)&cosmo_param_)/fact;
return CalcVFact(a) / H_of_a(a) / a;
}
//! Computes the linear theory growth factor D+

View file

@ -12,6 +12,8 @@
#include <fftw3.h>
#endif
#include <config_file.hh>
#ifdef USE_SINGLEPRECISION
using real_t = float;
using complex_t = fftwf_complex;

View file

@ -16,16 +16,23 @@ enum space_t
};
template <typename data_t>
#ifdef USE_MPI
template <typename data_t, bool bdistributed=true>
#else
template <typename data_t, bool bdistributed=false>
#endif
class Grid_FFT
{
protected:
#if defined(USE_MPI)
const MPI_Datatype MPI_data_t_type = (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
: (typeid(data_t) == typeid(float)) ? MPI_FLOAT
: (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
: (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX : MPI_INT;
const MPI_Datatype MPI_data_t_type =
(typeid(data_t) == typeid(double)) ? MPI_DOUBLE
: (typeid(data_t) == typeid(float)) ? MPI_FLOAT
: (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
: (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX
: MPI_INT;
#endif
using grid_fft_t = Grid_FFT<data_t,bdistributed>;
public:
std::array<size_t, 3> n_, nhalf_;
std::array<size_t, 4> sizes_;
@ -54,7 +61,7 @@ public:
}
// avoid implicit copying of data
Grid_FFT(const Grid_FFT<data_t> &g) = delete;
Grid_FFT(const grid_fft_t &g) = delete;
~Grid_FFT()
{
@ -64,10 +71,15 @@ public:
}
}
const Grid_FFT<data_t> *get_grid(size_t ilevel) const { return this; }
const grid_fft_t *get_grid(size_t ilevel) const { return this; }
bool is_distributed( void ) const { return bdistributed; }
void Setup();
//! return the number of data_t elements that we store in the container
size_t memsize( void ) const { return ntot_; }
//! return the (local) size of dimension i
size_t size(size_t i) const { return sizes_[i]; }
@ -91,7 +103,7 @@ public:
data_[i] = 0.0;
}
void copy_from(const Grid_FFT<data_t> &g)
void copy_from(const grid_fft_t &g)
{
// make sure the two fields are in the same space
if (g.space_ != this->space_)
@ -217,32 +229,89 @@ public:
vec3<ft> get_k(const size_t i, const size_t j, const size_t k) const
{
vec3<ft> kk;
#if defined(USE_MPI)
auto ip = i + local_1_start_;
kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1];
#else
kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
#endif
if( bdistributed ){
auto ip = i + local_1_start_;
kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0];
kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1];
}else{
kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0];
kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1];
}
kk[2] = (real_t(k) - real_t(k > nhalf_[2]) * n_[2]) * kfac_[2];
return kk;
}
std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const
{
return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
}
data_t get_cic( const vec3<real_t>& v ) const{
// warning! this doesn't work with MPI
vec3<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] });
size_t ix = static_cast<size_t>(x.x);
size_t iy = static_cast<size_t>(x.y);
size_t iz = static_cast<size_t>(x.z);
real_t dx = x.x-real_t(ix), tx = 1.0-dx;
real_t dy = x.y-real_t(iy), ty = 1.0-dy;
real_t dz = x.z-real_t(iz), tz = 1.0-dz;
size_t ix1 = (ix+1)%n_[0];
size_t iy1 = (iy+1)%n_[1];
size_t iz1 = (iz+1)%n_[2];
data_t val = 0.0;
val += this->relem(ix ,iy ,iz ) * tx * ty * tz;
val += this->relem(ix ,iy ,iz1) * tx * ty * dz;
val += this->relem(ix ,iy1,iz ) * tx * dy * tz;
val += this->relem(ix ,iy1,iz1) * tx * dy * dz;
val += this->relem(ix1,iy ,iz ) * dx * ty * tz;
val += this->relem(ix1,iy ,iz1) * dx * ty * dz;
val += this->relem(ix1,iy1,iz ) * dx * dy * tz;
val += this->relem(ix1,iy1,iz1) * dx * dy * dz;
return val;
}
ccomplex_t get_cic_kspace( const vec3<real_t>& x ) const{
// warning! this doesn't work with MPI
size_t ix = static_cast<size_t>(x.x);
size_t iy = static_cast<size_t>(x.y);
size_t iz = std::min(static_cast<size_t>(x.z),size(2)-1); //static_cast<size_t>(x.z);
real_t dx = x.x-real_t(ix), tx = 1.0-dx;
real_t dy = x.y-real_t(iy), ty = 1.0-dy;
real_t dz = x.z-real_t(iz), tz = 1.0-dz;
size_t ix1 = (ix+1)%size(0);
size_t iy1 = (iy+1)%size(1);
size_t iz1 = std::min((iz+1),size(2)-1);
ccomplex_t val = 0.0;
val += this->kelem(ix ,iy ,iz ) * tx * ty * tz;
val += this->kelem(ix ,iy ,iz1) * tx * ty * dz;
val += this->kelem(ix ,iy1,iz ) * tx * dy * tz;
val += this->kelem(ix ,iy1,iz1) * tx * dy * dz;
val += this->kelem(ix1,iy ,iz ) * dx * ty * tz;
val += this->kelem(ix1,iy ,iz1) * dx * ty * dz;
val += this->kelem(ix1,iy1,iz ) * dx * dy * tz;
val += this->kelem(ix1,iy1,iz1) * dx * dy * dz;
// if( val != val ){
//auto k = this->get_k<real_t>(ix,iy,iz);
//std::cerr << ix << " " << iy << " " << iz << " " << val << " " << this->gradient(0,{ix,iy,iz}) << " " << this->gradient(1,{ix,iy,iz}) << " " << this->gradient(2,{ix,iy,iz}) << std::endl;
// }
return val;
}
inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
{
#if defined(USE_MPI)
ijk[0] += local_1_start_;
std::swap(ijk[0],ijk[1]);
#endif
if( bdistributed ){
ijk[0] += local_1_start_;
std::swap(ijk[0],ijk[1]);
}
real_t rgrad =
(ijk[idim]!=nhalf_[idim])? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_[idim]) * n_[idim]) * kfac_[idim] : 0.0;
return ccomplex_t(0.0,rgrad);
}
Grid_FFT<data_t> &operator*=(data_t x)
grid_fft_t &operator*=(data_t x)
{
if (space_ == kspace_id)
{
@ -255,7 +324,7 @@ public:
return *this;
}
Grid_FFT<data_t> &operator/=(data_t x)
grid_fft_t &operator/=(data_t x)
{
if (space_ == kspace_id)
{
@ -268,7 +337,7 @@ public:
return *this;
}
Grid_FFT<data_t> &apply_Laplacian(void)
grid_fft_t &apply_Laplacian(void)
{
this->FourierTransformForward();
this->apply_function_k_dep([&](auto x, auto k) {
@ -279,7 +348,7 @@ public:
return *this;
}
Grid_FFT<data_t> &apply_negative_Laplacian(void)
grid_fft_t &apply_negative_Laplacian(void)
{
this->FourierTransformForward();
this->apply_function_k_dep([&](auto x, auto k) {
@ -290,7 +359,7 @@ public:
return *this;
}
Grid_FFT<data_t> &apply_InverseLaplacian(void)
grid_fft_t &apply_InverseLaplacian(void)
{
this->FourierTransformForward();
this->apply_function_k_dep([&](auto x, auto k) {
@ -338,8 +407,7 @@ public:
double compute_2norm(void)
{
real_t sum1{0.0};
#pragma omp parallel for reduction(+ \
: sum1)
#pragma omp parallel for reduction(+ : sum1)
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -363,8 +431,7 @@ public:
double sum1{0.0}, sum2{0.0};
size_t count{0};
#pragma omp parallel for reduction(+ \
: sum1, sum2)
#pragma omp parallel for reduction(+ : sum1, sum2)
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -380,24 +447,26 @@ public:
count = sizes_[0] * sizes_[1] * sizes_[2];
#ifdef USE_MPI
double globsum1{0.0}, globsum2{0.0};
size_t globcount{0};
if( bdistributed ){
double globsum1{0.0}, globsum2{0.0};
size_t globcount{0};
MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
reinterpret_cast<void *>(&globsum1),
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
reinterpret_cast<void *>(&globsum1),
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(reinterpret_cast<const void *>(&sum2),
reinterpret_cast<void *>(&globsum2),
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(reinterpret_cast<const void *>(&sum2),
reinterpret_cast<void *>(&globsum2),
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(reinterpret_cast<const void *>(&count),
reinterpret_cast<void *>(&globcount),
1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(reinterpret_cast<const void *>(&count),
reinterpret_cast<void *>(&globcount),
1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
sum1 = globsum1;
sum2 = globsum2;
count = globcount;
sum1 = globsum1;
sum2 = globsum2;
count = globcount;
}
#endif
sum1 /= count;
sum2 /= count;
@ -410,8 +479,7 @@ public:
double sum1{0.0};
size_t count{0};
#pragma omp parallel for reduction(+ \
: sum1)
#pragma omp parallel for reduction(+ : sum1)
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -426,19 +494,21 @@ public:
count = sizes_[0] * sizes_[1] * sizes_[2];
#ifdef USE_MPI
double globsum1{0.0};
size_t globcount{0};
if( bdistributed ){
double globsum1{0.0};
size_t globcount{0};
MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
reinterpret_cast<void *>(&globsum1),
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(reinterpret_cast<const void *>(&sum1),
reinterpret_cast<void *>(&globsum1),
1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(reinterpret_cast<const void *>(&count),
reinterpret_cast<void *>(&globcount),
1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(reinterpret_cast<const void *>(&count),
reinterpret_cast<void *>(&globcount),
1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
sum1 = globsum1;
count = globcount;
sum1 = globsum1;
count = globcount;
}
#endif
sum1 /= count;
@ -449,9 +519,9 @@ public:
template <typename functional, typename grid_t>
void assign_function_of_grids_r(const functional &f, const grid_t &g)
{
assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
assert(g.size(0) == size(0) && g.size(1) == size(1));
#pragma omp parallel for
#pragma omp parallel for
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -470,10 +540,10 @@ public:
template <typename functional, typename grid1_t, typename grid2_t>
void assign_function_of_grids_r(const functional &f, const grid1_t &g1, const grid2_t &g2)
{
assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g1.size(2) == size(2));
assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
assert(g1.size(0) == size(0) && g1.size(1) == size(1));
assert(g2.size(0) == size(0) && g2.size(1) == size(1));
#pragma omp parallel for
#pragma omp parallel for
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -499,7 +569,7 @@ public:
assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2));
assert(g3.size(0) == size(0) && g3.size(1) == size(1)); // && g3.size(2) == size(2));
#pragma omp parallel for
#pragma omp parallel for
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -524,7 +594,7 @@ public:
{
assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
#pragma omp parallel for
#pragma omp parallel for
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -546,7 +616,7 @@ public:
assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
#pragma omp parallel for
#pragma omp parallel for
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -563,12 +633,12 @@ public:
}
}
template <typename functional, typename grid1_t>
void assign_function_of_grids_kdep(const functional &f, const grid1_t &g)
template <typename functional, typename grid_t>
void assign_function_of_grids_kdep(const functional &f, const grid_t &g)
{
assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) );
#pragma omp parallel for
#pragma omp parallel for
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -577,7 +647,7 @@ public:
{
auto &elem = this->kelem(i, j, k);
const auto &elemg = g.kelem(i, j, k);
elem = f(this->get_k<real_t>(i, j, k), elemg);
}
}
@ -587,15 +657,15 @@ public:
template <typename functional, typename grid1_t, typename grid2_t>
void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2)
{
assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) );
assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) );
assert(g1.size(0) == size(0) && g1.size(1) == size(1) && g1.size(2) == size(2) );
assert(g2.size(0) == size(0) && g2.size(1) == size(1) && g2.size(2) == size(2) );
#pragma omp parallel for
for (size_t i = 0; i < sizes_[0]; ++i)
#pragma omp parallel for
for (size_t i = 0; i < size(0); ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
for (size_t j = 0; j < size(1); ++j)
{
for (size_t k = 0; k < sizes_[2]; ++k)
for (size_t k = 0; k < size(2); ++k)
{
auto &elem = this->kelem(i, j, k);
const auto &elemg1 = g1.kelem(i, j, k);
@ -610,7 +680,7 @@ public:
template <typename functional>
void apply_function_k_dep(const functional &f)
{
#pragma omp parallel for
#pragma omp parallel for
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -627,7 +697,7 @@ public:
template <typename functional>
void apply_function_r_dep(const functional &f)
{
#pragma omp parallel for
#pragma omp parallel for
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
@ -657,27 +727,28 @@ public:
void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3);
void shift_field( const vec3<real_t>& s )
void shift_field( const vec3<real_t>& s, bool transform_back=true )
{
FourierTransformForward();
apply_function_k_dep([&](auto x, auto k) -> ccomplex_t {
#ifdef WITH_MPI
real_t shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
#else
real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
#endif
real_t shift;
if( bdistributed ){
shift = s.y * k[0] * get_dx()[0] + s.x * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
}else{
shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2];
}
return x * std::exp(ccomplex_t(0.0, shift));
});
FourierTransformBackward();
if( transform_back ){
FourierTransformBackward();
}
}
void zero_DC_mode(void)
{
if (space_ == kspace_id)
{
#ifdef USE_MPI
if (CONFIG::MPI_task_rank == 0)
#endif
if (CONFIG::MPI_task_rank == 0 || !bdistributed )
cdata_[0] = (data_t)0.0;
}
else
@ -694,12 +765,14 @@ public:
}
}
}
if( bdistributed ){
#if defined(USE_MPI)
data_t glob_sum = 0.0;
MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
sum = glob_sum;
data_t glob_sum = 0.0;
MPI_Allreduce(reinterpret_cast<void *>(&sum), reinterpret_cast<void *>(&glob_sum),
1, GetMPIDatatype<data_t>(), MPI_SUM, MPI_COMM_WORLD);
sum = glob_sum;
#endif
}
sum /= sizes_[0] * sizes_[1] * sizes_[2];
#pragma omp parallel for

171
include/mat3.hh Normal file
View file

@ -0,0 +1,171 @@
#include <gsl/gsl_math.h>
#include <gsl/gsl_eigen.h>
#include <vec3.hh>
template<typename T>
class mat3{
protected:
std::array<T,9> data_;
gsl_matrix_view m_;
gsl_vector *eval_;
gsl_matrix *evec_;
gsl_eigen_symmv_workspace * wsp_;
bool bdid_alloc_gsl_;
void init_gsl(){
// allocate memory for GSL operations if we haven't done so yet
if( !bdid_alloc_gsl_ )
{
m_ = gsl_matrix_view_array (&data_[0], 3, 3);
eval_ = gsl_vector_alloc (3);
evec_ = gsl_matrix_alloc (3, 3);
wsp_ = gsl_eigen_symmv_alloc (3);
bdid_alloc_gsl_ = true;
}
}
void free_gsl(){
// free memory for GSL operations if it was allocated
if( bdid_alloc_gsl_ )
{
gsl_eigen_symmv_free (wsp_);
gsl_vector_free (eval_);
gsl_matrix_free (evec_);
}
}
public:
mat3()
: bdid_alloc_gsl_(false)
{}
//! copy constructor
mat3( const mat3<T> &m)
: data_(m.data_), bdid_alloc_gsl_(false)
{}
//! move constructor
mat3( mat3<T> &&m)
: data_(std::move(m.data_)), bdid_alloc_gsl_(false)
{}
//! construct mat3 from initializer list
template<typename ...E>
mat3(E&&...e)
: data_{{std::forward<E>(e)...}}, bdid_alloc_gsl_(false)
{}
mat3<T>& operator=(const mat3<T>& m) noexcept{
data_ = m.data_;
return *this;
}
mat3<T>& operator=(const mat3<T>&& m) noexcept{
data_ = std::move(m.data_);
return *this;
}
//! destructor
~mat3(){
this->free_gsl();
}
//! bracket index access to vector components
T &operator[](size_t i) noexcept { return data_[i];}
//! const bracket index access to vector components
const T &operator[](size_t i) const noexcept { return data_[i]; }
//! matrix 2d index access
T &operator()(size_t i, size_t j) noexcept { return data_[3*i+j]; }
//! const matrix 2d index access
const T &operator()(size_t i, size_t j) const noexcept { return data_[3*i+j]; }
//! in-place addition
mat3<T>& operator+=( const mat3<T>& rhs ) noexcept{
for (size_t i = 0; i < 9; ++i) {
(*this)[i] += rhs[i];
}
return *this;
}
//! in-place subtraction
mat3<T>& operator-=( const mat3<T>& rhs ) noexcept{
for (size_t i = 0; i < 9; ++i) {
(*this)[i] -= rhs[i];
}
return *this;
}
void zero() noexcept{
for (size_t i = 0; i < 9; ++i) data_[i]=0;
}
void eigen( vec3<T>& evals, vec3<T>& evec1, vec3<T>& evec2, vec3<T>& evec3 )
{
// for( auto x : data_ ){
// std::cerr << x << " " ;
// }
// std::cerr << std::endl;
// resort into symmetrix matrix
// data_[8] = data_[5];
// data_[7] = data_[4];
// data_[6] = data_[2];
// data_[5] = data_[4];
// data_[4] = data_[3];
// data_[3] = data_[1];
this->init_gsl();
gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_);
gsl_eigen_symmv_sort (eval_, evec_, GSL_EIGEN_SORT_VAL_ASC);
for( int i=0; i<3; ++i ){
evals[i] = gsl_vector_get( eval_, i );
evec1[i] = gsl_matrix_get( evec_, i, 0 );
evec2[i] = gsl_matrix_get( evec_, i, 1 );
evec3[i] = gsl_matrix_get( evec_, i, 2 );
}
// std::cerr << "(" << evals[0] << " " << evals[1] << " " << evals[2] << ")" << std::endl;
}
};
template<typename T>
constexpr const mat3<T> operator+(const mat3<T> &lhs, const mat3<T> &rhs) noexcept
{
mat3<T> result;
for (size_t i = 0; i < 9; ++i) {
result[i] = lhs[i] + rhs[i];
}
return result;
}
// matrix - vector multiplication
template<typename T>
vec3<T> operator*( const mat3<T> &A, const vec3<T> &v ) noexcept
{
vec3<T> result;
for( int mu=0; mu<3; ++mu ){
result[mu] = 0.0;
for( int nu=0; nu<3; ++nu ){
result[mu] += A(mu,nu)*v[nu];
}
}
return result;
}
// template<typename T>
// vec3<T> operator*( const vec3<T> &v, const mat3<T> &A ) noexcept
// {
// vec3<T> result = 0.0;
// for( int mu=0; mu<3; ++mu ){
// for( int nu=0; nu<3; ++nu ){
// result[nu] += v[mu]*A(mu,nu);
// }
// }
// return result;
// }

View file

@ -1,7 +1,11 @@
#pragma once
#include <general.hh>
namespace op{
//!== long list of primitive operators to work on fields ==!//
template< typename field>
inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };}
@ -20,4 +24,29 @@ inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };
template< typename field>
inline auto subtract_twice_from( field& g ){return [&g](auto i, auto v){ g[i] -= 2*v; };}
//! vanilla standard gradient
class fourier_gradient{
private:
real_t boxlen_, k0_;
size_t n_, nhalf_;
public:
explicit fourier_gradient( const ConfigFile& the_config )
: boxlen_( the_config.GetValue<double>("setup", "BoxLength") ),
k0_(2.0*M_PI/boxlen_),
n_( the_config.GetValue<size_t>("setup","GridRes") ),
nhalf_( n_/2 )
{}
inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
{
real_t rgrad =
(ijk[idim]!=nhalf_)? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_) * n_) : 0.0;
return ccomplex_t(0.0,rgrad * k0_);
}
inline real_t vfac_corr( std::array<size_t,3> ijk ) const
{
return 1.0;
}
};
}

791
include/particle_plt.hh Normal file
View file

@ -0,0 +1,791 @@
#pragma once
#include <general.hh>
#include <unistd.h> // for unlink
#include <iostream>
#include <fstream>
#include <random>
#include <map>
#include <particle_generator.hh>
#include <grid_fft.hh>
#include <mat3.hh>
#define PRODUCTION
namespace particle{
//! implement Joyce, Marcos et al. PLT calculation
class lattice_gradient{
private:
const real_t boxlen_;
const size_t ngmapto_, ngrid_, ngrid32_;
const real_t mapratio_;
Grid_FFT<real_t,false> D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_;
Grid_FFT<real_t,false> grad_x_, grad_y_, grad_z_;
std::vector<vec3<real_t>> vectk_;
std::vector<vec3<int>> ico_, vecitk_;
bool is_even( int i ){ return (i%2)==0; }
bool is_in( int i, int j, int k, const mat3<int>& M ){
vec3<int> v({i,j,k});
auto vv = M * v;
return is_even(vv.x)&&is_even(vv.y)&&is_even(vv.z);
}
void init_D( lattice lattice_type )
{
constexpr real_t pi = M_PI;
constexpr real_t twopi = 2.0*M_PI;
constexpr real_t fourpi = 4.0*M_PI;
const real_t sqrtpi = std::sqrt(M_PI);
const real_t pi32 = std::pow(M_PI,1.5);
//! === vectors, reciprocals and normals for the SC lattice ===
const int charge_fac_sc = 1;
const mat3<real_t> mat_bravais_sc{
1.0, 0.0, 0.0,
0.0, 1.0, 0.0,
0.0, 0.0, 1.0,
};
const mat3<real_t> mat_reciprocal_sc{
twopi, 0.0, 0.0,
0.0, twopi, 0.0,
0.0, 0.0, twopi,
};
const mat3<int> mat_invrecip_sc{
2, 0, 0,
0, 2, 0,
0, 0, 2,
};
const std::vector<vec3<real_t>> normals_sc{
{pi,0.,0.},{-pi,0.,0.},
{0.,pi,0.},{0.,-pi,0.},
{0.,0.,pi},{0.,0.,-pi},
};
//! === vectors, reciprocals and normals for the BCC lattice ===
const int charge_fac_bcc = 2;
const mat3<real_t> mat_bravais_bcc{
1.0, 0.0, 0.5,
0.0, 1.0, 0.5,
0.0, 0.0, 0.5,
};
const mat3<real_t> mat_reciprocal_bcc{
twopi, 0.0, 0.0,
0.0, twopi, 0.0,
-twopi, -twopi, fourpi,
};
const mat3<int> mat_invrecip_bcc{
2, 0, 0,
0, 2, 0,
1, 1, 1,
};
const std::vector<vec3<real_t>> normals_bcc{
{0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
{pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
{pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
};
//! === vectors, reciprocals and normals for the FCC lattice ===
const int charge_fac_fcc = 4;
const mat3<real_t> mat_bravais_fcc{
0.0, 0.5, 0.0,
0.5, 0.0, 1.0,
0.5, 0.5, 0.0,
};
const mat3<real_t> mat_reciprocal_fcc{
-fourpi, fourpi, twopi,
0.0, 0.0, twopi,
fourpi, 0.0, -twopi,
};
const mat3<int> mat_invrecip_fcc{
0, 1, 1,
1, 0, 1,
0, 2, 0,
};
const std::vector<vec3<real_t>> normals_fcc{
{twopi,0.,0.},{-twopi,0.,0.},
{0.,twopi,0.},{0.,-twopi,0.},
{0.,0.,twopi},{0.,0.,-twopi},
{+pi,+pi,+pi},{+pi,+pi,-pi},
{+pi,-pi,+pi},{+pi,-pi,-pi},
{-pi,+pi,+pi},{-pi,+pi,-pi},
{-pi,-pi,+pi},{-pi,-pi,-pi},
};
//! select the properties for the chosen lattice
const int ilat = lattice_type; // 0 = sc, 1 = bcc, 2 = fcc
const auto mat_bravais = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc;
const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc;
const auto mat_invrecip = (ilat==2)? mat_invrecip_fcc : (ilat==1)? mat_invrecip_bcc : mat_invrecip_sc;
const auto normals = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc;
const auto charge_fac = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc;
const ptrdiff_t nlattice = ngrid_;
const real_t dx = 1.0/real_t(nlattice);
const real_t eta = 4.0; // Ewald cutoff shall be 4 cells
const real_t alpha = 1.0/std::sqrt(2)/eta;
const real_t alpha2 = alpha*alpha;
const real_t alpha3 = alpha2*alpha;
const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_fac;
const real_t fft_norm = 1.0/std::pow(real_t(nlattice),3.0);
const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5);
//! just a Kronecker \delta_ij
auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
//! Ewald summation: short-range Green's function
auto add_greensftide_sr = [&]( mat3<real_t>& D, const vec3<real_t>& d ) -> void {
auto r = d.norm();
if( r< 1e-14 ) return; // return zero for r=0
const real_t r2(r*r), r3(r2*r), r5(r3*r2);
const real_t K1( -alpha3/pi32 * std::exp(-alpha2*r2)/r2 );
const real_t K2( (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r2)*r)/fourpi );
for( int mu=0; mu<3; ++mu ){
for( int nu=mu; nu<3; ++nu ){
real_t dd( d[mu]*d[nu] * K1 + (kronecker(mu,nu)/r3 - 3.0 * (d[mu]*d[nu])/r5) * K2 );
D(mu,nu) += dd;
D(nu,mu) += (mu!=nu)? dd : 0.0;
}
}
};
//! Ewald summation: long-range Green's function
auto add_greensftide_lr = [&]( mat3<real_t>& D, const vec3<real_t>& k, const vec3<real_t>& r ) -> void {
real_t kmod2 = k.norm_squared();
real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm;
for( int mu=0; mu<3; ++mu ){
for( int nu=mu; nu<3; ++nu ){
auto dd = k[mu] * k[nu] * term;
D(mu,nu) += dd;
D(nu,mu) += (mu!=nu)? dd : 0.0;
}
}
};
//! checks if 'vec' is in the FBZ with FBZ normal vectors given in 'normals'
auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool {
for( const auto& n : normals ){
if( n.dot( vec ) > 1.0001 * n.dot(n) ){
return false;
}
}
return true;
};
constexpr ptrdiff_t lnumber = 3, knumber = 3;
const int numb = 1; //!< search radius when shifting vectors into FBZ
vectk_.assign(D_xx_.memsize(),vec3<real_t>());
ico_.assign(D_xx_.memsize(),vec3<int>());
vecitk_.assign(D_xx_.memsize(),vec3<int>());
#pragma omp parallel
{
//... temporary to hold values of the dynamical matrix
mat3<real_t> matD(0.0);
#pragma omp for
for( ptrdiff_t i=0; i<nlattice; ++i ){
for( ptrdiff_t j=0; j<nlattice; ++j ){
for( ptrdiff_t k=0; k<nlattice; ++k ){
// compute lattice site vector from (i,j,k) multiplying Bravais base matrix, and wrap back to box
const vec3<real_t> x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)});
const vec3<real_t> ar = (mat_bravais * x_ijk).wrap_abs();
//... zero temporary matrix
matD.zero();
// add real-space part of dynamical matrix, periodic copies
for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){
for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){
for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){
const vec3<real_t> n_ijk({real_t(ix),real_t(iy),real_t(iz)});
const vec3<real_t> dr(ar - mat_bravais * n_ijk);
add_greensftide_sr(matD, dr);
}
}
}
// add k-space part of dynamical matrix
for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){
for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){
for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){
if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){
const vec3<real_t> k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice});
const vec3<real_t> ak( mat_reciprocal * k_ijk);
add_greensftide_lr(matD, ak, ar );
}
}
}
}
D_xx_.relem(i,j,k) = matD(0,0) * charge;
D_xy_.relem(i,j,k) = matD(0,1) * charge;
D_xz_.relem(i,j,k) = matD(0,2) * charge;
D_yy_.relem(i,j,k) = matD(1,1) * charge;
D_yz_.relem(i,j,k) = matD(1,2) * charge;
D_zz_.relem(i,j,k) = matD(2,2) * charge;
}
}
}
} // end omp parallel region
// fix r=0 with background density (added later in Fourier space)
D_xx_.relem(0,0,0) = 1.0/3.0;
D_xy_.relem(0,0,0) = 0.0;
D_xz_.relem(0,0,0) = 0.0;
D_yy_.relem(0,0,0) = 1.0/3.0;
D_yz_.relem(0,0,0) = 0.0;
D_zz_.relem(0,0,0) = 1.0/3.0;
D_xx_.FourierTransformForward();
D_xy_.FourierTransformForward();
D_xz_.FourierTransformForward();
D_yy_.FourierTransformForward();
D_yz_.FourierTransformForward();
D_zz_.FourierTransformForward();
#ifndef PRODUCTION
if (CONFIG::MPI_task_rank == 0)
unlink("debug.hdf5");
D_xx_.Write_to_HDF5("debug.hdf5","Dxx");
D_xy_.Write_to_HDF5("debug.hdf5","Dxy");
D_xz_.Write_to_HDF5("debug.hdf5","Dxz");
D_yy_.Write_to_HDF5("debug.hdf5","Dyy");
D_yz_.Write_to_HDF5("debug.hdf5","Dyz");
D_zz_.Write_to_HDF5("debug.hdf5","Dzz");
std::ofstream ofs2("test_brillouin.txt");
#endif
using map_t = std::map<vec3<int>,size_t>;
map_t iimap;
//!=== Make temporary copies before resorting to std. Fourier grid ========!//
Grid_FFT<real_t,false>
temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0});
temp1.FourierTransformForward(false);
temp2.FourierTransformForward(false);
temp3.FourierTransformForward(false);
#pragma omp parallel for
for( size_t i=0; i<D_xx_.size(0); i++ )
{
for( size_t j=0; j<D_xx_.size(1); j++ )
{
for( size_t k=0; k<D_xx_.size(2); k++ )
{
temp1.kelem(i,j,k) = ccomplex_t(std::real(D_xx_.kelem(i,j,k)),std::real(D_xy_.kelem(i,j,k)));
temp2.kelem(i,j,k) = ccomplex_t(std::real(D_xz_.kelem(i,j,k)),std::real(D_yy_.kelem(i,j,k)));
temp3.kelem(i,j,k) = ccomplex_t(std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k)));
}
}
}
D_xx_.zero(); D_xy_.zero(); D_xz_.zero();
D_yy_.zero(); D_yz_.zero(); D_zz_.zero();
//!=== Diagonalise and resort to std. Fourier grid ========!//
#pragma omp parallel
{
// thread private matrix representation
mat3<real_t> D;
vec3<real_t> eval, evec1, evec2, evec3;
#pragma omp for
for( size_t i=0; i<D_xx_.size(0); i++ )
{
for( size_t j=0; j<D_xx_.size(1); j++ )
{
for( size_t k=0; k<D_xx_.size(2); k++ )
{
vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
// put matrix elements into actual matrix
D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12;
D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12;
D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12;
D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12;
D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12;
D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12;
// compute eigenstructure of matrix
D.eigen(eval, evec1, evec2, evec3);
evec3 /= (twopi*ngrid_);
// now determine to which modes on the regular lattice this contributes
vec3<real_t> ar = kv / (twopi*ngrid_);
vec3<real_t> a(mat_reciprocal * ar);
// translate the k-vectors into the "candidate" FBZ
for( int l1=-numb; l1<=numb; ++l1 ){
for( int l2=-numb; l2<=numb; ++l2 ){
for( int l3=-numb; l3<=numb; ++l3 ){
// need both halfs of Fourier space since we use real transforms
for( int isign=0; isign<=1; ++isign ){
const real_t sign = 2.0*real_t(isign)-1.0;
const vec3<real_t> vshift({real_t(l1),real_t(l2),real_t(l3)});
vec3<real_t> vectk = sign * a + mat_reciprocal * vshift;
if( check_FBZ( normals, vectk ) )
{
int ix = std::round(vectk.x*(ngrid_)/twopi);
int iy = std::round(vectk.y*(ngrid_)/twopi);
int iz = std::round(vectk.z*(ngrid_)/twopi);
#pragma omp critical
{iimap.insert( std::pair<vec3<int>,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );}
temp1.kelem(i,j,k) = ccomplex_t(eval[2],eval[1]);
temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3.x);
temp3.kelem(i,j,k) = ccomplex_t(evec3.y,evec3.z);
}
}//sign
} //l3
} //l2
} //l1
} //k
} //j
} //i
}
D_xx_.kelem(0,0,0) = 1.0;
D_xy_.kelem(0,0,0) = 0.0;
D_xz_.kelem(0,0,0) = 0.0;
D_yy_.kelem(0,0,0) = 1.0;
D_yz_.kelem(0,0,0) = 0.0;
D_zz_.kelem(0,0,0) = 0.0;
//... approximate infinite lattice by inerpolating to sites not convered by current resolution...
#pragma omp parallel for
for( size_t i=0; i<D_xx_.size(0); i++ ){
for( size_t j=0; j<D_xx_.size(1); j++ ){
for( size_t k=0; k<D_xx_.size(2); k++ ){
int ii = (int(i)>nlattice/2)? int(i)-nlattice : int(i);
int jj = (int(j)>nlattice/2)? int(j)-nlattice : int(j);
int kk = (int(k)>nlattice/2)? int(k)-nlattice : int(k);
vec3<real_t> kv({real_t(ii),real_t(jj),real_t(kk)});
auto align_with_k = [&]( const vec3<real_t>& v ) -> vec3<real_t>{
return v*((v.dot(kv)<0.0)?-1.0:1.0);
};
vec3<real_t> v, l;
map_t::iterator it;
if( !is_in(i,j,k,mat_invrecip) ){
auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3<real_t>& v, vec3<real_t>& l ) {
v = 0.0; l = 0.0;
int count(0);
auto add_lv = [&]( auto it ) -> void {
auto q = it->second;++count;
l += vec3<real_t>({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))});
v += align_with_k(vec3<real_t>({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))}));
};
map_t::iterator it;
if( (it = iimap.find({ii-1,jj,kk}))!=iimap.end() ){ add_lv(it); }
if( (it = iimap.find({ii+1,jj,kk}))!=iimap.end() ){ add_lv(it); }
if( (it = iimap.find({ii,jj-1,kk}))!=iimap.end() ){ add_lv(it); }
if( (it = iimap.find({ii,jj+1,kk}))!=iimap.end() ){ add_lv(it); }
if( (it = iimap.find({ii,jj,kk-1}))!=iimap.end() ){ add_lv(it); }
if( (it = iimap.find({ii,jj,kk+1}))!=iimap.end() ){ add_lv(it); }
l/=real_t(count); v/=real_t(count);
};
average_lv(temp1,temp2,temp3,v,l);
}else{
if( (it = iimap.find({ii,jj,kk}))!=iimap.end() ){
auto q = it->second;
l = vec3<real_t>({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))});
v = align_with_k(vec3<real_t>({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))}));
}
}
D_xx_.kelem(i,j,k) = l[0];
D_xy_.kelem(i,j,k) = l[1];
D_xz_.kelem(i,j,k) = l[2];
D_yy_.kelem(i,j,k) = v[0];
D_yz_.kelem(i,j,k) = v[1];
D_zz_.kelem(i,j,k) = v[2];
}
}
}
#ifdef PRODUCTION
#pragma omp parallel for
for( size_t i=0; i<D_xx_.size(0); i++ ){
for( size_t j=0; j<D_xx_.size(1); j++ ){
for( size_t k=0; k<D_xx_.size(2); k++ )
{
int ii = (i>size_t(nlattice/2))? int(i)-nlattice : i;
int jj = (j>size_t(nlattice/2))? int(j)-nlattice : j;
vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
const real_t kmod = kv.norm()/mapratio_/boxlen_;
double mu1 = std::real(D_xx_.kelem(i,j,k));
// double mu2 = std::real(D_xy_.kelem(i,j,k));
// double mu3 = std::real(D_xz_.kelem(i,j,k));
vec3<real_t> evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))});
evec1 /= evec1.norm();
if(std::abs(ii)+std::abs(jj)+k<8){
// small k modes, use usual pseudospectral derivative
// -- store in diagonal components of D_ij
D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kv.x/mapratio_/boxlen_);
D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kv.y/mapratio_/boxlen_);
D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kv.z/mapratio_/boxlen_);
// spatially dependent correction to vfact = \dot{D_+}/D_+
D_xy_.kelem(i,j,k) = 1.0;
}else{
// large k modes, use interpolated PLT results
// -- store in diagonal components of D_ij
D_xx_.kelem(i,j,k) = ccomplex_t(0.0,evec1.x * kmod);
D_yy_.kelem(i,j,k) = ccomplex_t(0.0,evec1.y * kmod);
D_zz_.kelem(i,j,k) = ccomplex_t(0.0,evec1.z * kmod);
// re-normalise to that longitudinal amplitude is exact
auto norm = (kv.norm()/kv.dot(evec1));
D_xx_.kelem(i,j,k) *= norm;
D_yy_.kelem(i,j,k) *= norm;
D_zz_.kelem(i,j,k) *= norm;
// spatially dependent correction to vfact = \dot{D_+}/D_+
D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.));
}
if( i==size_t(nlattice/2) ) D_xx_.kelem(i,j,k)=0.0;
if( j==size_t(nlattice/2) ) D_yy_.kelem(i,j,k)=0.0;
if( k==size_t(nlattice/2) ) D_zz_.kelem(i,j,k)=0.0;
}
}
}
D_xy_.kelem(0,0,0) = 1.0;
D_xx_.kelem(0,0,0) = 0.0;
D_yy_.kelem(0,0,0) = 0.0;
D_zz_.kelem(0,0,0) = 0.0;
// unlink("debug.hdf5");
// D_xy_.Write_to_HDF5("debug.hdf5","mu1");
// D_xx_.Write_to_HDF5("debug.hdf5","e1x");
// D_yy_.Write_to_HDF5("debug.hdf5","e1y");
// D_zz_.Write_to_HDF5("debug.hdf5","e1z");
#else
D_xx_.Write_to_HDF5("debug.hdf5","mu1");
D_xy_.Write_to_HDF5("debug.hdf5","mu2");
D_xz_.Write_to_HDF5("debug.hdf5","mu3");
D_yy_.Write_to_HDF5("debug.hdf5","e1x");
D_yz_.Write_to_HDF5("debug.hdf5","e1y");
D_zz_.Write_to_HDF5("debug.hdf5","e1z");
#endif
}
void init_D__old()
{
constexpr real_t pi = M_PI, twopi = 2.0*M_PI;
const std::vector<vec3<real_t>> normals_bcc{
{0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi},
{pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi},
{pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.}
};
const std::vector<vec3<real_t>> bcc_reciprocal{
{twopi,0.,-twopi}, {0.,twopi,-twopi}, {0.,0.,2*twopi}
};
const real_t eta = 2.0/ngrid_; // Ewald cutoff shall be 2 cells
const real_t alpha = 1.0/std::sqrt(2)/eta;
const real_t alpha2 = alpha*alpha;
const real_t alpha3 = alpha2*alpha;
const real_t sqrtpi = std::sqrt(M_PI);
const real_t pi32 = std::pow(M_PI,1.5);
//! just a Kronecker \delta_ij
auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; };
//! short range component of Ewald sum, eq. (A2) of Marcos (2008)
auto greensftide_sr = [&]( int mu, int nu, const vec3<real_t>& vR, const vec3<real_t>& vP ) -> real_t {
auto d = vR-vP;
auto r = d.norm();
if( r< 1e-14 ) return 0.0; // let's return nonsense for r=0, and fix it later!
real_t val{0.0};
val -= d[mu]*d[nu]/(r*r) * alpha3/pi32 * std::exp(-alpha*alpha*r*r);
val += 1.0/(4.0*M_PI)*(kronecker(mu,nu)/std::pow(r,3) - 3.0 * (d[mu]*d[nu])/std::pow(r,5)) *
(std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha*alpha*r*r)*r);
return val;
};
//! sums mirrored copies of short-range component of Ewald sum
auto evaluate_D = [&]( int mu, int nu, const vec3<real_t>& v ) -> real_t{
real_t sr = 0.0;
constexpr int N = 3; // number of repeated copies ±N per dimension
int count = 0;
for( int i=-N; i<=N; ++i ){
for( int j=-N; j<=N; ++j ){
for( int k=-N; k<=N; ++k ){
if( std::abs(i)+std::abs(j)+std::abs(k) <= N ){
//sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
sr += greensftide_sr( mu, nu, v, {real_t(i),real_t(j),real_t(k)} );
sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} );
count += 2;
// sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
// sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
// sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
// sr += greensftide_sr( mu, nu, v, {real_t(i)+0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
// sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)+0.5} )/16;
// sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)+0.5,real_t(k)-0.5} )/16;
// sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)+0.5} )/16;
// sr += greensftide_sr( mu, nu, v, {real_t(i)-0.5,real_t(j)-0.5,real_t(k)-0.5} )/16;
}
}
}
}
return sr / count;
};
//! fill D_ij array with short range evaluated function
#pragma omp parallel for
for( size_t i=0; i<ngrid_; i++ ){
vec3<real_t> p;
p.x = real_t(i)/ngrid_;
for( size_t j=0; j<ngrid_; j++ ){
p.y = real_t(j)/ngrid_;
for( size_t k=0; k<ngrid_; k++ ){
p.z = real_t(k)/ngrid_;
D_xx_.relem(i,j,k) = evaluate_D(0,0,p);
D_xy_.relem(i,j,k) = evaluate_D(0,1,p);
D_xz_.relem(i,j,k) = evaluate_D(0,2,p);
D_yy_.relem(i,j,k) = evaluate_D(1,1,p);
D_yz_.relem(i,j,k) = evaluate_D(1,2,p);
D_zz_.relem(i,j,k) = evaluate_D(2,2,p);
}
}
}
// fix r=0 with background density (added later in Fourier space)
D_xx_.relem(0,0,0) = 0.0;
D_xy_.relem(0,0,0) = 0.0;
D_xz_.relem(0,0,0) = 0.0;
D_yy_.relem(0,0,0) = 0.0;
D_yz_.relem(0,0,0) = 0.0;
D_zz_.relem(0,0,0) = 0.0;
// Fourier transform all six components
D_xx_.FourierTransformForward();
D_xy_.FourierTransformForward();
D_xz_.FourierTransformForward();
D_yy_.FourierTransformForward();
D_yz_.FourierTransformForward();
D_zz_.FourierTransformForward();
const real_t rho0 = std::pow(real_t(ngrid_),1.5); //mass of one particle in Fourier space
const real_t nfac = 1.0/std::pow(real_t(ngrid_),1.5);
#pragma omp parallel
{
// thread private matrix representation
mat3<real_t> D;
vec3<real_t> eval, evec1, evec2, evec3;
#pragma omp for
for( size_t i=0; i<D_xx_.size(0); i++ )
{
for( size_t j=0; j<D_xx_.size(1); j++ )
{
for( size_t k=0; k<D_xx_.size(2); k++ )
{
vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
auto& b=bcc_reciprocal;
vec3<real_t> kvc = { b[0][0]*kvc[0]+b[1][0]*kvc[1]+b[2][0]*kvc[2],
b[0][1]*kvc[0]+b[1][1]*kvc[1]+b[2][1]*kvc[2],
b[0][2]*kvc[0]+b[1][2]*kvc[1]+b[2][2]*kvc[2] };
// vec3<real_t> kv = {kvc.dot(bcc_reciprocal[0]),kvc.dot(bcc_reciprocal[1]),kvc.dot(bcc_reciprocal[2])};
const real_t kmod2 = kv.norm_squared();
// long range component of Ewald sum
//ccomplex_t shift = 1.0;//std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2])* D_xx_.get_dx()[0]));
ccomplex_t phi0 = -rho0 * std::exp(-0.5*eta*eta*kmod2) / kmod2;
phi0 = (phi0==phi0)? phi0 : 0.0; // catch NaN from division by zero when kmod2=0
// const int nn = 3;
// size_t nsum = 0;
// ccomplex_t ff = 0.0;
// for( int is=-nn;is<=nn;is++){
// for( int js=-nn;js<=nn;js++){
// for( int ks=-nn;ks<=nn;ks++){
// if( std::abs(is)+std::abs(js)+std::abs(ks) <= nn ){
// ff += std::exp(ccomplex_t(0.0,(((is)*kv[0] + (js)*kv[1] + (ks)*kv[2]))));
// ff += std::exp(ccomplex_t(0.0,(((0.5+is)*kv[0] + (0.5+js)*kv[1] + (0.5+ks)*kv[2]))));
// ++nsum;
// }
// }
// }
// }
// ff /= nsum;
// ccomplex_t ff = 1.0;
ccomplex_t ff = (0.5+0.5*std::exp(ccomplex_t(0.0,0.5*(kv[0] + kv[1] + kv[2]))));
// assemble short-range + long_range of Ewald sum and add DC component to trace
D_xx_.kelem(i,j,k) = ff*((D_xx_.kelem(i,j,k) - kv[0]*kv[0] * phi0)*nfac) + 1.0/3.0;
D_xy_.kelem(i,j,k) = ff*((D_xy_.kelem(i,j,k) - kv[0]*kv[1] * phi0)*nfac);
D_xz_.kelem(i,j,k) = ff*((D_xz_.kelem(i,j,k) - kv[0]*kv[2] * phi0)*nfac);
D_yy_.kelem(i,j,k) = ff*((D_yy_.kelem(i,j,k) - kv[1]*kv[1] * phi0)*nfac) + 1.0/3.0;
D_yz_.kelem(i,j,k) = ff*((D_yz_.kelem(i,j,k) - kv[1]*kv[2] * phi0)*nfac);
D_zz_.kelem(i,j,k) = ff*((D_zz_.kelem(i,j,k) - kv[2]*kv[2] * phi0)*nfac) + 1.0/3.0;
}
}
}
D_xx_.kelem(0,0,0) = 1.0/3.0;
D_xy_.kelem(0,0,0) = 0.0;
D_xz_.kelem(0,0,0) = 0.0;
D_yy_.kelem(0,0,0) = 1.0/3.0;
D_yz_.kelem(0,0,0) = 0.0;
D_zz_.kelem(0,0,0) = 1.0/3.0;
#pragma omp for
for( size_t i=0; i<D_xx_.size(0); i++ )
{
for( size_t j=0; j<D_xx_.size(1); j++ )
{
for( size_t k=0; k<D_xx_.size(2); k++ )
{
// put matrix elements into actual matrix
D = { std::real(D_xx_.kelem(i,j,k)), std::real(D_xy_.kelem(i,j,k)), std::real(D_xz_.kelem(i,j,k)),
std::real(D_yy_.kelem(i,j,k)), std::real(D_yz_.kelem(i,j,k)), std::real(D_zz_.kelem(i,j,k)) };
// compute eigenstructure of matrix
D.eigen(eval, evec1, evec2, evec3);
#ifdef PRODUCTION
vec3<real_t> kv = D_xx_.get_k<real_t>(i,j,k);
const real_t kmod = kv.norm()/mapratio_/boxlen_;
// store in diagonal components of D_ij
D_xx_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.x;
D_yy_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.y;
D_zz_.kelem(i,j,k) = ccomplex_t(0.0,kmod) * evec3.z;
auto norm = (kv.norm()/kv.dot(evec3));
if ( std::abs(kv.dot(evec3)) < 1e-10 || kv.norm() < 1e-10 ) norm = 0.0;
D_xx_.kelem(i,j,k) *= norm;
D_yy_.kelem(i,j,k) *= norm;
D_zz_.kelem(i,j,k) *= norm;
// spatially dependent correction to vfact = \dot{D_+}/D_+
D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*eval[2])-1.));
#else
D_xx_.kelem(i,j,k) = eval[2];
D_yy_.kelem(i,j,k) = eval[1];
D_zz_.kelem(i,j,k) = eval[0];
D_xy_.kelem(i,j,k) = evec3[0];
D_xz_.kelem(i,j,k) = evec3[1];
D_yz_.kelem(i,j,k) = evec3[2];
#endif
}
}
}
}
#ifdef PRODUCTION
D_xy_.kelem(0,0,0) = 1.0;
#endif
//////////////////////////////////////////
std::string filename("plt_test.hdf5");
unlink(filename.c_str());
#if defined(USE_MPI)
MPI_Barrier(MPI_COMM_WORLD);
#endif
// rho.Write_to_HDF5(filename, "rho");
D_xx_.Write_to_HDF5(filename, "omega1");
D_yy_.Write_to_HDF5(filename, "omega2");
D_zz_.Write_to_HDF5(filename, "omega3");
D_xy_.Write_to_HDF5(filename, "e1_x");
D_xz_.Write_to_HDF5(filename, "e1_y");
D_yz_.Write_to_HDF5(filename, "e1_z");
}
public:
// real_t boxlen, size_t ngridother
explicit lattice_gradient( ConfigFile& the_config, size_t ngridself=64 )
: boxlen_( the_config.GetValue<double>("setup", "BoxLength") ),
ngmapto_( the_config.GetValue<size_t>("setup", "GridRes") ),
ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)),
D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}),
grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0})
{
csoca::ilog << "-------------------------------------------------------------------------------" << std::endl;
std::string lattice_str = the_config.GetValueSafe<std::string>("setup","ParticleLoad","sc");
const lattice lattice_type =
((lattice_str=="bcc")? lattice_bcc
: ((lattice_str=="fcc")? lattice_fcc
: ((lattice_str=="rsc")? lattice_rsc
: lattice_sc)));
csoca::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl;
// #if defined(USE_MPI)
// if( CONFIG::MPI_task_size>1 )
// {
// csoca::elog << "PLT not implemented for MPI, cannot run with more than 1 task currently!" << std::endl;
// abort();
// }
// #endif
double wtime = get_wtime();
csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush;
init_D( lattice_type );
// init_D__old();
csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl;
}
inline ccomplex_t gradient( const int idim, std::array<size_t,3> ijk ) const
{
real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
if( idim == 0 ) return D_xx_.get_cic_kspace({ix,iy,iz});
else if( idim == 1 ) return D_yy_.get_cic_kspace({ix,iy,iz});
return D_zz_.get_cic_kspace({ix,iy,iz});
}
inline real_t vfac_corr( std::array<size_t,3> ijk ) const
{
real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_;
return std::real(D_xy_.get_cic_kspace({ix,iy,iz}));
}
};
}

View file

@ -25,7 +25,11 @@ public:
//! copy constructor
vec3( const vec3<T> &v)
: data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
//! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference
vec3( vec3<T>& v)
: data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){}
//! move constructor
vec3( vec3<T> &&v)
: data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){}
@ -33,43 +37,78 @@ public:
//! construct vec3 from initializer list
template<typename ...E>
vec3(E&&...e)
: data_{{std::forward<E>(e)...}}, x(data_[0]), y(data_[1]), z(data_[2]){}
: data_{{std::forward<E>(e)...}}, x{data_[0]}, y{data_[1]}, z{data_[2]}
{}
// vec3( T a, T b, T c )
// : data_{{a,b,c}}, x(data_[0]), y(data_[1]), z(data_[2]){}
//! braket index access to vector components
T &operator[](size_t i){ return data_[i];}
//! bracket index access to vector components
T &operator[](size_t i) noexcept{ return data_[i];}
//! const braket index access to vector components
const T &operator[](size_t i) const { return data_[i]; }
//! const bracket index access to vector components
const T &operator[](size_t i) const noexcept { return data_[i]; }
// assignment operator
vec3<T>& operator=( const vec3<T>& v ) noexcept { data_=v.data_; return *this; }
//! implementation of summation of vec3
vec3<T> operator+( const vec3<T>& v ) const{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
vec3<T> operator+( const vec3<T>& v ) const noexcept{ return vec3<T>({x+v.x,y+v.y,z+v.z}); }
//! implementation of difference of vec3
vec3<T> operator-( const vec3<T>& v ) const{ return vec3<T>({x-v.x,y-v.y,z-v.z}); }
vec3<T> operator-( const vec3<T>& v ) const noexcept{ return vec3<T>({x-v.x,y-v.y,z-v.z}); }
//! implementation of unary negative
vec3<T> operator-() const noexcept{ return vec3<T>({-x,-y,-z}); }
//! implementation of scalar multiplication
vec3<T> operator*( T s ) const{ return vec3<T>({x*s,y*s,z*s}); }
vec3<T> operator*( T s ) const noexcept{ return vec3<T>({x*s,y*s,z*s}); }
//! implementation of scalar division
vec3<T> operator/( T s ) const noexcept{ return vec3<T>({x/s,y/s,z/s}); }
//! implementation of += operator
vec3<T>& operator+=( const vec3<T>& v ) const{ x+=v.x; y+=v.y; z+=v.z; return *this; }
vec3<T>& operator+=( const vec3<T>& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; }
//! implementation of -= operator
vec3<T>& operator-=( const vec3<T>& v ) const{ x-=v.x; y-=v.y; z-=v.z; return *this; }
vec3<T>& operator-=( const vec3<T>& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; }
//! multiply with scalar
vec3<T>& operator*=( T s ) const{ x*=s; y*=s; z*=s; return *this; }
vec3<T>& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; }
//! divide by scalar
vec3<T>& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; }
//! compute dot product with another vector
T dot(const vec3<T> &a) const
T dot(const vec3<T> &a) const noexcept
{
return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2];
}
//! returns 2-norm squared of vector
T norm_squared(void) const { return this->dot(*this); }
T norm_squared(void) const noexcept { return this->dot(*this); }
//! returns 2-norm of vector
T norm(void) const { return std::sqrt( this->norm_squared() ); }
T norm(void) const noexcept { return std::sqrt( this->norm_squared() ); }
//! wrap absolute vector to box of size p
vec3<T>& wrap_abs( T p = 1.0 ) noexcept{
for( auto& x : data_ ) x = std::fmod( 2*p + x, p );
return *this;
}
//! wrap relative vector to box of size p
vec3<T>& wrap_rel( T p = 1.0 ) noexcept{
for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x;
return *this;
}
//! ordering, allows 3d sorting of vec3s
bool operator<( const vec3<T>& o ) const noexcept{
if( x!=o.x ) return x<o.x?true:false;
if( y!=o.y ) return y<o.y?true:false;
if( z!=o.z ) return z<o.z?true:false;
return false;
}
};
//! multiplication with scalar

View file

@ -2,192 +2,169 @@
#include <grid_fft.hh>
#include <thread>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_randist.h>
template <typename data_t>
void Grid_FFT<data_t>::FillRandomReal(unsigned long int seed)
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Setup(void)
{
gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937);
#if defined(USE_MPI)
seed += 17321 * CONFIG::MPI_task_rank;
#endif
gsl_rng_set(RNG, seed);
if( !bdistributed ){
ntot_ = (n_[2] + 2) * n_[1] * n_[0];
for (size_t i = 0; i < sizes_[0]; ++i)
{
for (size_t j = 0; j < sizes_[1]; ++j)
csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
if (typeid(data_t) == typeid(real_t))
{
for (size_t k = 0; k < sizes_[2]; ++k)
{
this->relem(i, j, k) = gsl_ran_ugaussian_ratio_method(RNG);
}
data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE);
iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE);
}
else if (typeid(data_t) == typeid(ccomplex_t))
{
data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(ccomplex_t)));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE);
iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE);
}
else
{
csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
}
fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
if (typeid(data_t) == typeid(real_t))
{
npr_ = n_[2] + 2;
npc_ = n_[2] / 2 + 1;
}
else
{
npr_ = n_[2];
npc_ = n_[2];
}
for (int i = 0; i < 3; ++i)
{
nhalf_[i] = n_[i] / 2;
kfac_[i] = 2.0 * M_PI / length_[i];
dx_[i] = length_[i] / n_[i];
global_range_.x1_[i] = 0;
global_range_.x2_[i] = n_[i];
}
local_0_size_ = n_[0];
local_1_size_ = n_[1];
local_0_start_ = 0;
local_1_start_ = 0;
if (space_ == rspace_id)
{
sizes_[0] = n_[0];
sizes_[1] = n_[1];
sizes_[2] = n_[2];
sizes_[3] = npr_;
}
else
{
sizes_[0] = n_[1];
sizes_[1] = n_[0];
sizes_[2] = npc_;
sizes_[3] = npc_;
}
}
gsl_rng_free(RNG);
}
template <typename data_t>
void Grid_FFT<data_t>::Setup(void)
{
#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////////////
ntot_ = (n_[2] + 2) * n_[1] * n_[0];
csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
if (typeid(data_t) == typeid(real_t))
{
data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(real_t)));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE);
iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE);
}
else if (typeid(data_t) == typeid(ccomplex_t))
{
data_ = reinterpret_cast<data_t *>(fftw_malloc(ntot_ * sizeof(ccomplex_t)));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE);
iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE);
}
else
{
csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
}
#ifdef USE_MPI //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
size_t cmplxsz;
fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
if (typeid(data_t) == typeid(real_t))
{
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
ntot_ = 2 * cmplxsz;
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
}
else if (typeid(data_t) == typeid(ccomplex_t))
{
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
ntot_ = cmplxsz;
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
}
else
{
csoca::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
abort();
}
if (typeid(data_t) == typeid(real_t))
{
npr_ = n_[2] + 2;
npc_ = n_[2] / 2 + 1;
}
else
{
npr_ = n_[2];
npc_ = n_[2];
}
csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
for (int i = 0; i < 3; ++i)
{
nhalf_[i] = n_[i] / 2;
kfac_[i] = 2.0 * M_PI / length_[i];
dx_[i] = length_[i] / n_[i];
if (typeid(data_t) == typeid(real_t))
{
npr_ = n_[2] + 2;
npc_ = n_[2] / 2 + 1;
}
else
{
npr_ = n_[2];
npc_ = n_[2];
}
global_range_.x1_[i] = 0;
global_range_.x2_[i] = n_[i];
}
for (int i = 0; i < 3; ++i)
{
nhalf_[i] = n_[i] / 2;
kfac_[i] = 2.0 * M_PI / length_[i];
dx_[i] = length_[i] / n_[i];
local_0_size_ = n_[0];
local_1_size_ = n_[1];
local_0_start_ = 0;
local_1_start_ = 0;
if (space_ == rspace_id)
{
sizes_[0] = n_[0];
sizes_[1] = n_[1];
sizes_[2] = n_[2];
sizes_[3] = npr_;
}
else
{
sizes_[0] = n_[1];
sizes_[1] = n_[0];
sizes_[2] = npc_;
sizes_[3] = npc_;
}
#else //// i.e. ifdef USE_MPI ////////////////////////////////////////////////////////////////////////////////////
size_t cmplxsz;
if (typeid(data_t) == typeid(real_t))
{
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
ntot_ = 2 * cmplxsz;
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
}
else if (typeid(data_t) == typeid(ccomplex_t))
{
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
ntot_ = cmplxsz;
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
}
else
{
csoca::elog.Print("unknown data type in Grid_FFT<data_t>::setup_fft_interface\n");
abort();
}
csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
if (typeid(data_t) == typeid(real_t))
{
npr_ = n_[2] + 2;
npc_ = n_[2] / 2 + 1;
}
else
{
npr_ = n_[2];
npc_ = n_[2];
}
for (int i = 0; i < 3; ++i)
{
nhalf_[i] = n_[i] / 2;
kfac_[i] = 2.0 * M_PI / length_[i];
dx_[i] = length_[i] / n_[i];
global_range_.x1_[i] = 0;
global_range_.x2_[i] = n_[i];
}
global_range_.x1_[0] = (int)local_0_start_;
global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_);
if (space_ == rspace_id)
{
sizes_[0] = (int)local_0_size_;
sizes_[1] = n_[1];
sizes_[2] = n_[2];
sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension
}
else
{
sizes_[0] = (int)local_1_size_;
sizes_[1] = n_[0];
sizes_[2] = npc_;
sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
}
global_range_.x1_[i] = 0;
global_range_.x2_[i] = n_[i];
}
global_range_.x1_[0] = (int)local_0_start_;
global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_);
if (space_ == rspace_id)
{
sizes_[0] = (int)local_0_size_;
sizes_[1] = n_[1];
sizes_[2] = n_[2];
sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension
}
else
{
sizes_[0] = (int)local_1_size_;
sizes_[1] = n_[0];
sizes_[2] = npc_;
sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension
}
#else
csoca::flog << "MPI is required for distributed FFT arrays!" << std::endl;
throw std::runtime_error("MPI is required for distributed FFT arrays!");
#endif //// of #ifdef #else USE_MPI ////////////////////////////////////////////////////////////////////////////////////
}
}
template <typename data_t>
void Grid_FFT<data_t>::ApplyNorm(void)
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::ApplyNorm(void)
{
#pragma omp parallel for
for (size_t i = 0; i < ntot_; ++i)
data_[i] *= fft_norm_fac_;
}
template <typename data_t>
void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::FourierTransformForward(bool do_transform)
{
#if defined(USE_MPI)
MPI_Barrier(MPI_COMM_WORLD);
@ -217,8 +194,8 @@ void Grid_FFT<data_t>::FourierTransformForward(bool do_transform)
}
}
template <typename data_t>
void Grid_FFT<data_t>::FourierTransformBackward(bool do_transform)
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::FourierTransformBackward(bool do_transform)
{
#if defined(USE_MPI)
MPI_Barrier(MPI_COMM_WORLD);
@ -269,15 +246,142 @@ void create_hdf5(std::string Filename)
H5Fclose(HDF_FileID);
}
template <typename data_t>
void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname) const
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
{
// FIXME: cleanup duplicate code in this function!
if( !bdistributed && CONFIG::MPI_task_rank==0 ){
hid_t file_id, dset_id; /* file and dataset identifiers */
hid_t filespace, memspace; /* file and memory dataspace identifiers */
hsize_t offset[3], count[3];
hid_t dtype_id = H5T_NATIVE_FLOAT;
hid_t plist_id = H5P_DEFAULT;
if (!file_exists(fname))
create_hdf5(fname);
file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id);
for (int i = 0; i < 3; ++i)
count[i] = size(i);
if (typeid(data_t) == typeid(float))
dtype_id = H5T_NATIVE_FLOAT;
else if (typeid(data_t) == typeid(double))
dtype_id = H5T_NATIVE_DOUBLE;
else if (typeid(data_t) == typeid(std::complex<float>))
{
dtype_id = H5T_NATIVE_FLOAT;
}
else if (typeid(data_t) == typeid(std::complex<double>))
{
dtype_id = H5T_NATIVE_DOUBLE;
}
filespace = H5Screate_simple(3, count, NULL);
dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
H5Sclose(filespace);
hsize_t slice_sz = size(1) * size(2);
real_t *buf = new real_t[slice_sz];
count[0] = 1;
count[1] = size(1);
count[2] = size(2);
offset[1] = 0;
offset[2] = 0;
memspace = H5Screate_simple(3, count, NULL);
filespace = H5Dget_space(dset_id);
for (size_t i = 0; i < size(0); ++i)
{
offset[0] = i;
for (size_t j = 0; j < size(1); ++j)
{
for (size_t k = 0; k < size(2); ++k)
{
if( this->space_ == rspace_id )
buf[j * size(2) + k] = std::real(relem(i, j, k));
else
buf[j * size(2) + k] = std::real(kelem(i, j, k));
}
}
H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL);
H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
}
H5Sclose(filespace);
H5Sclose(memspace);
// H5Sclose(filespace);
H5Dclose(dset_id);
if (typeid(data_t) == typeid(std::complex<float>) ||
typeid(data_t) == typeid(std::complex<double>) ||
this->space_ == kspace_id )
{
datasetname += std::string(".im");
for (int i = 0; i < 3; ++i)
count[i] = size(i);
filespace = H5Screate_simple(3, count, NULL);
dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
H5Sclose(filespace);
count[0] = 1;
for (size_t i = 0; i < size(0); ++i)
{
offset[0] = i;
for (size_t j = 0; j < size(1); ++j)
for (size_t k = 0; k < size(2); ++k)
{
if( this->space_ == rspace_id )
buf[j * size(2) + k] = std::imag(relem(i, j, k));
else
buf[j * size(2) + k] = std::imag(kelem(i, j, k));
}
memspace = H5Screate_simple(3, count, NULL);
filespace = H5Dget_space(dset_id);
H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count,
NULL);
H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf);
H5Sclose(memspace);
H5Sclose(filespace);
}
H5Dclose(dset_id);
delete[] buf;
}
H5Fclose(file_id);
return;
}
if( !bdistributed && CONFIG::MPI_task_rank!=0 ) return;
hid_t file_id, dset_id; /* file and dataset identifiers */
hid_t filespace, memspace; /* file and memory dataspace identifiers */
hsize_t offset[3], count[3];
hid_t dtype_id = H5T_NATIVE_FLOAT;
hid_t plist_id;
#if defined(USE_MPI)
int mpi_size, mpi_rank;
@ -391,7 +495,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
{
for (size_t k = 0; k < size(2); ++k)
{
buf[j * size(2) + k] = std::real(relem(i, j, k));
if( this->space_ == rspace_id )
buf[j * size(2) + k] = std::real(relem(i, j, k));
else
buf[j * size(2) + k] = std::real(kelem(i, j, k));
}
}
@ -410,7 +517,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
H5Dclose(dset_id);
if (typeid(data_t) == typeid(std::complex<float>) ||
typeid(data_t) == typeid(std::complex<double>))
typeid(data_t) == typeid(std::complex<double>) ||
this->space_ == kspace_id )
{
datasetname += std::string(".im");
@ -460,7 +568,10 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
for (size_t j = 0; j < size(1); ++j)
for (size_t k = 0; k < size(2); ++k)
{
buf[j * size(2) + k] = std::imag(relem(i, j, k));
if( this->space_ == rspace_id )
buf[j * size(2) + k] = std::imag(relem(i, j, k));
else
buf[j * size(2) + k] = std::imag(kelem(i, j, k));
}
memspace = H5Screate_simple(3, count, NULL);
@ -493,8 +604,8 @@ void Grid_FFT<data_t>::Write_to_HDF5(std::string fname, std::string datasetname)
#include <iomanip>
template <typename data_t>
void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
{
double logvmin = std::log10(vmin);
double logvmax = std::log10(vmax);
@ -545,8 +656,8 @@ void Grid_FFT<data_t>::Write_PDF(std::string ofname, int nbins, double scale, do
#endif
}
template <typename data_t>
void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Write_PowerSpectrum(std::string ofname)
{
std::vector<double> bin_k, bin_P, bin_eP;
std::vector<size_t> bin_count;
@ -575,8 +686,8 @@ void Grid_FFT<data_t>::Write_PowerSpectrum(std::string ofname)
#endif
}
template <typename data_t>
void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
{
this->FourierTransformForward();
@ -656,5 +767,7 @@ void Grid_FFT<data_t>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::ve
/********************************************************************************************/
template class Grid_FFT<real_t>;
template class Grid_FFT<ccomplex_t>;
template class Grid_FFT<real_t,true>;
template class Grid_FFT<real_t,false>;
template class Grid_FFT<ccomplex_t,true>;
template class Grid_FFT<ccomplex_t,false>;

View file

@ -7,6 +7,7 @@
#include <ic_generator.hh>
#include <particle_generator.hh>
#include <particle_plt.hh>
#include <unistd.h> // for unlink
@ -194,6 +195,13 @@ int Run( ConfigFile& the_config )
// NaiveConvolver<real_t> Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen});
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Create PLT gradient operator
//--------------------------------------------------------------------
// particle::lattice_gradient lg( the_config );
op::fourier_gradient lg( the_config );
//--------------------------------------------------------------------
std::vector<cosmo_species> species_list;
species_list.push_back(cosmo_species::dm);
if (bDoBaryons)
@ -482,8 +490,8 @@ int Run( ConfigFile& the_config )
size_t idx = phi.get_idx(i,j,k);
auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx);
// divide by Lbox, because displacement is in box units for output plugin
tmp.kelem(idx) = lunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot
+ phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) );
tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot
+ lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) );
}
}
}
@ -519,8 +527,11 @@ int Run( ConfigFile& the_config )
// divide by Lbox, because displacement is in box units for output plugin
auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx));
tmp.kelem(idx) = vunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot_v
+ vfac3 * (phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) );
tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v
+ vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) );
// correct velocity with PLT mode growth rate
tmp.kelem(idx) *= lg.vfac_corr(tmp.get_k3(i,j,k));
if( bAddExternalTides ){
// modify velocities with anisotropic expansion factor**2

View file

@ -10,6 +10,7 @@
#include <general.hh>
#include <ic_generator.hh>
#include <particle_plt.hh>
// initialise with "default" values
@ -182,6 +183,8 @@ int main( int argc, char** argv )
// do the job...
///////////////////////////////////////////////////////////////////////
ic_generator::Run( the_config );
// particle::test_plt();
///////////////////////////////////////////////////////////////////////
#if defined(USE_MPI)

File diff suppressed because it is too large Load diff

View file

@ -82,7 +82,11 @@ public:
for (size_t j = 0; j < nres_; ++j)
{
ptrdiff_t jj = (j>0)? nres_ - j : 0;
gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]);
if( g.is_distributed() )
gsl_rng_set( pRandomGenerator_, SeedTable_[j * nres_ + i]);
else
gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]);
for (size_t k = 0; k < g.size(2); ++k)
{
double phase = gsl_rng_uniform(pRandomGenerator_) * 2 * M_PI;
@ -101,15 +105,28 @@ public:
if (k > 0) {
if (i_in_range) g.kelem(ip,j,k) = zrand;
} else{ /* k=0 plane needs special treatment */
if (i == 0) {
if (j < nres_ / 2 && i_in_range)
{
g.kelem(ip,j,k) = zrand;
g.kelem(ip,jj,k) = std::conj(zrand);
if( g.is_distributed() ){
if (j == 0) {
if (i < nres_ / 2 && i_in_range)
{
if(i_in_range) g.kelem(ip,jj,k) = zrand;
if(ii_in_range) g.kelem(iip,j,k) = std::conj(zrand);
}
} else if (j < nres_ / 2) {
if(i_in_range) g.kelem(ip,j,k) = zrand;
if(ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
}
}else{
if (i == 0) {
if (j < nres_ / 2 && i_in_range)
{
g.kelem(ip,j,k) = zrand;
g.kelem(ip,jj,k) = std::conj(zrand);
}
} else if (i < nres_ / 2) {
if(i_in_range) g.kelem(ip,j,k) = zrand;
if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
}
} else if (i < nres_ / 2) {
if(i_in_range) g.kelem(ip,j,k) = zrand;
if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand);
}
}
}

View file

@ -0,0 +1,344 @@
// transfer_CAMB.cc - This file is part of MUSIC -
// a code to generate multi-scale initial conditions for cosmological simulations
// Copyright (C) 2019 Oliver Hahn
#include <gsl/gsl_errno.h>
#include <gsl/gsl_spline.h>
#include <vector>
#include "transfer_function_plugin.hh"
const double tiny = 1e-30;
class transfer_CAMB_file_plugin : public TransferFunction_plugin
{
private:
std::string m_filename_Pk, m_filename_Tk;
std::vector<double> m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon;
std::vector<double> m_tab_Tvk_tot, m_tab_Tvk_cdm, m_tab_Tvk_baryon;
gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon;
gsl_interp_accel *acc_vtot, *acc_vcdm, *acc_vbaryon;
gsl_spline *spline_tot, *spline_cdm, *spline_baryon;
gsl_spline *spline_vtot, *spline_vcdm, *spline_vbaryon;
double m_kmin, m_kmax, m_Omega_b, m_Omega_m, m_zstart;
unsigned m_nlines;
bool m_linbaryoninterp;
void read_table(void)
{
m_nlines = 0;
m_linbaryoninterp = false;
#ifdef WITH_MPI
if (MPI::COMM_WORLD.Get_rank() == 0)
{
#endif
csoca::ilog.Print("Reading tabulated transfer function data from file \n \'%s\'", m_filename_Tk.c_str());
std::string line;
std::ifstream ifs(m_filename_Tk.c_str());
if (!ifs.good())
throw std::runtime_error("Could not find transfer function file \'" + m_filename_Tk + "\'");
m_tab_k.clear();
m_tab_Tk_tot.clear();
m_tab_Tk_cdm.clear();
m_tab_Tk_baryon.clear();
m_tab_Tvk_tot.clear();
m_tab_Tvk_cdm.clear(); //>[150609SH: add]
m_tab_Tvk_baryon.clear(); //>[150609SH: add]
m_kmin = 1e30;
m_kmax = -1e30;
std::ofstream ofs("dump_transfer.txt");
while (!ifs.eof())
{
getline(ifs, line);
if (ifs.eof())
break;
// OH: ignore line if it has a comment:
if (line.find("#") != std::string::npos)
continue;
std::stringstream ss(line);
double k, Tkc, Tkb, Tktot, Tkvtot, Tkvc, Tkvb, dummy;
ss >> k;
ss >> Tkc; // cdm
ss >> Tkb; // baryon
ss >> dummy; // photon
ss >> dummy; // nu
ss >> dummy; // mass_nu
ss >> Tktot; // total
ss >> dummy; // no_nu
ss >> dummy; // total_de
ss >> dummy; // Weyl
ss >> Tkvc; // v_cdm
ss >> Tkvb; // v_b
ss >> dummy; // v_b-v_cdm
if (ss.bad() || ss.fail())
{
csoca::elog.Print("Error reading the transfer function file (corrupt or not in expected format)!");
throw std::runtime_error("Error reading transfer function file \'" +
m_filename_Tk + "\'");
}
if (m_Omega_b < 1e-6)
Tkvtot = Tktot;
else
Tkvtot = ((m_Omega_m - m_Omega_b) * Tkvc + m_Omega_b * Tkvb) / m_Omega_m; //MvD
m_linbaryoninterp |= Tkb < 0.0 || Tkvb < 0.0;
m_tab_k.push_back(log10(k));
m_tab_Tk_tot.push_back(Tktot);
m_tab_Tk_baryon.push_back(Tkb);
m_tab_Tk_cdm.push_back(Tkc);
m_tab_Tvk_tot.push_back(Tkvtot);
m_tab_Tvk_baryon.push_back(Tkvb);
m_tab_Tvk_cdm.push_back(Tkvc);
++m_nlines;
if (k < m_kmin)
m_kmin = k;
if (k > m_kmax)
m_kmax = k;
}
for (size_t i = 0; i < m_tab_k.size(); ++i)
{
m_tab_Tk_tot[i] = log10(m_tab_Tk_tot[i]);
m_tab_Tk_cdm[i] = log10(m_tab_Tk_cdm[i]);
m_tab_Tvk_cdm[i] = log10(m_tab_Tvk_cdm[i]);
m_tab_Tvk_tot[i] = log10(m_tab_Tvk_tot[i]);
if (!m_linbaryoninterp)
{
m_tab_Tk_baryon[i] = log10(m_tab_Tk_baryon[i]);
m_tab_Tvk_baryon[i] = log10(m_tab_Tvk_baryon[i]);
}
}
ifs.close();
csoca::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines);
if (m_linbaryoninterp)
csoca::ilog.Print("Using log-lin interpolation for baryons\n (TF is not "
"positive definite)");
#ifdef WITH_MPI
}
unsigned n = m_tab_k.size();
MPI::COMM_WORLD.Bcast(&n, 1, MPI_UNSIGNED, 0);
if (MPI::COMM_WORLD.Get_rank() > 0)
{
m_tab_k.assign(n, 0);
m_tab_Tk_tot.assign(n, 0);
m_tab_Tk_cdm.assign(n, 0);
m_tab_Tk_baryon.assign(n, 0);
m_tab_Tvk_tot.assign(n, 0);
m_tab_Tvk_cdm.assign(n, 0);
m_tab_Tvk_baryon.assign(n, 0);
}
MPI::COMM_WORLD.Bcast(&m_tab_k[0], n, MPI_DOUBLE, 0);
MPI::COMM_WORLD.Bcast(&m_tab_Tk_tot[0], n, MPI_DOUBLE, 0);
MPI::COMM_WORLD.Bcast(&m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0);
MPI::COMM_WORLD.Bcast(&m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0);
MPI::COMM_WORLD.Bcast(&m_tab_Tvk_tot[0], n, MPI_DOUBLE, 0);
MPI::COMM_WORLD.Bcast(&m_tab_Tvk_cdm[0], n, MPI_DOUBLE, 0);
MPI::COMM_WORLD.Bcast(&m_tab_Tvk_baryon[0], n, MPI_DOUBLE, 0);
#endif
}
public:
transfer_CAMB_file_plugin(ConfigFile &cf)
: TransferFunction_plugin(cf)
{
m_filename_Tk = pcf_->GetValue<std::string>("cosmology", "transfer_file");
m_Omega_m = cf.GetValue<double>("cosmology", "Omega_m"); //MvD
m_Omega_b = cf.GetValue<double>("cosmology", "Omega_b"); //MvD
m_zstart = cf.GetValue<double>("setup", "zstart"); //MvD
read_table();
acc_tot = gsl_interp_accel_alloc();
acc_cdm = gsl_interp_accel_alloc();
acc_baryon = gsl_interp_accel_alloc();
acc_vtot = gsl_interp_accel_alloc();
acc_vcdm = gsl_interp_accel_alloc();
acc_vbaryon = gsl_interp_accel_alloc();
spline_tot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
spline_cdm = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
spline_baryon = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
spline_vtot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
spline_vcdm =
gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
spline_vbaryon =
gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size());
gsl_spline_init(spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size());
gsl_spline_init(spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size());
gsl_spline_init(spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0],
m_tab_k.size());
gsl_spline_init(spline_vtot, &m_tab_k[0], &m_tab_Tvk_tot[0],
m_tab_k.size());
gsl_spline_init(spline_vcdm, &m_tab_k[0], &m_tab_Tvk_cdm[0],
m_tab_k.size());
gsl_spline_init(spline_vbaryon, &m_tab_k[0], &m_tab_Tvk_baryon[0],
m_tab_k.size());
tf_distinct_ = true; // different density between CDM v.s. Baryon
tf_withvel_ = true; // using velocity transfer function
}
~transfer_CAMB_file_plugin()
{
gsl_spline_free(spline_tot);
gsl_spline_free(spline_cdm);
gsl_spline_free(spline_baryon);
gsl_spline_free(spline_vtot);
gsl_spline_free(spline_vcdm);
gsl_spline_free(spline_vbaryon);
gsl_interp_accel_free(acc_tot);
gsl_interp_accel_free(acc_cdm);
gsl_interp_accel_free(acc_baryon);
gsl_interp_accel_free(acc_vtot);
gsl_interp_accel_free(acc_vcdm);
gsl_interp_accel_free(acc_vbaryon);
}
// linear interpolation in log-log
inline double extrap_right(double k, const tf_type &type) const
{
int n = m_tab_k.size() - 1, n1 = n - 1;
double v1(1.0), v2(1.0);
double lk = log10(k);
double dk = m_tab_k[n] - m_tab_k[n1];
double delk = lk - m_tab_k[n];
switch (type)
{
case cdm:
v1 = m_tab_Tk_cdm[n1];
v2 = m_tab_Tk_cdm[n];
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
case baryon:
v1 = m_tab_Tk_baryon[n1];
v2 = m_tab_Tk_baryon[n];
if (m_linbaryoninterp)
return std::max((v2 - v1) / dk * (delk) + v2, tiny);
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
case vtotal: //>[150609SH: add]
v1 = m_tab_Tvk_tot[n1];
v2 = m_tab_Tvk_tot[n];
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
case vcdm: //>[150609SH: add]
v1 = m_tab_Tvk_cdm[n1];
v2 = m_tab_Tvk_cdm[n];
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
case vbaryon: //>[150609SH: add]
v1 = m_tab_Tvk_baryon[n1];
v2 = m_tab_Tvk_baryon[n];
if (m_linbaryoninterp)
return std::max((v2 - v1) / dk * (delk) + v2, tiny);
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
case total:
v1 = m_tab_Tk_tot[n1];
v2 = m_tab_Tk_tot[n];
return pow(10.0, (v2 - v1) / dk * (delk) + v2);
default:
throw std::runtime_error(
"Invalid type requested in transfer function evaluation");
}
return 0.0;
}
inline double compute(double k, tf_type type) const
{
// use constant interpolation on the left side of the tabulated values
if (k < m_kmin)
{
switch (type)
{
case cdm:
return pow(10.0, m_tab_Tk_cdm[0]);
case baryon:
if (m_linbaryoninterp)
return m_tab_Tk_baryon[0];
return pow(10.0, m_tab_Tk_baryon[0]);
case vtotal:
return pow(10.0, m_tab_Tvk_tot[0]);
case vcdm:
return pow(10.0, m_tab_Tvk_cdm[0]);
case vbaryon:
if (m_linbaryoninterp)
return m_tab_Tvk_baryon[0];
return pow(10.0, m_tab_Tvk_baryon[0]);
case total:
return pow(10.0, m_tab_Tk_tot[0]);
default:
throw std::runtime_error(
"Invalid type requested in transfer function evaluation");
}
}
// use linear interpolation on the right side of the tabulated values
else if (k > m_kmax)
return extrap_right(k, type);
double lk = log10(k);
switch (type)
{
case cdm:
return pow(10.0, gsl_spline_eval(spline_cdm, lk, acc_cdm));
case baryon:
if (m_linbaryoninterp)
return gsl_spline_eval(spline_baryon, lk, acc_baryon);
return pow(10.0, gsl_spline_eval(spline_baryon, lk, acc_baryon));
case vtotal:
return pow(10.0, gsl_spline_eval(spline_vtot, lk, acc_vtot)); //MvD
case vcdm:
return pow(10.0, gsl_spline_eval(spline_vcdm, lk, acc_vcdm));
case vbaryon:
if (m_linbaryoninterp)
return gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon);
return pow(10.0, gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon));
case total:
return pow(10.0, gsl_spline_eval(spline_tot, lk, acc_tot));
default:
throw std::runtime_error(
"Invalid type requested in transfer function evaluation");
}
}
inline double get_kmin(void) const { return pow(10.0, m_tab_k[1]); }
inline double get_kmax(void) const { return pow(10.0, m_tab_k[m_tab_k.size() - 2]); }
};
namespace
{
TransferFunction_plugin_creator_concrete<transfer_CAMB_file_plugin> creator("CAMB_file");
}

View file

@ -434,5 +434,5 @@ namespace
TransferFunction_plugin_creator_concrete<transfer_eisenstein_plugin> creator("eisenstein");
TransferFunction_plugin_creator_concrete<transfer_eisenstein_wdm_plugin> creator2("eisenstein_wdm");
TransferFunction_plugin_creator_concrete<transfer_eisenstein_cdmbino_plugin> creator3("eisenstein_cdmbino");
TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
// TransferFunction_plugin_creator_concrete<transfer_eisenstein_cutoff_plugin> creator4("eisenstein_cutoff");
} // namespace