1
0
Fork 0
mirror of https://github.com/cosmo-sims/monofonIC.git synced 2024-09-19 17:03:45 +02:00

made precision switchable from makefile

This commit is contained in:
Oliver Hahn 2020-04-02 19:57:41 +02:00
parent 3a8a22737f
commit b8b9db3b99
5 changed files with 227 additions and 127 deletions

View file

@ -1,16 +1,34 @@
cmake_minimum_required(VERSION 3.9)
set(PRGNAME monofonIC)
project(monofonIC)
project(monofonIC C CXX)
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -Wall -pedantic" CACHE STRING "Flags used by the compiler during Release builds." FORCE)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -march=native -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE)
set(CMAKE_CXX_FLAGS_DEBUG "-g -O0 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE)
set(CMAKE_CXX_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address " CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE)
set(CMAKE_CXX_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE)
set(default_build_type "Release")
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
"Debug" "Release" "RelWithDebInfo" "DebugSanAdd" "DebugSanUndef")
endif()
mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT)
########################################################################################################################
# include class submodule
include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake)
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic")
find_package(PkgConfig REQUIRED)
set(CMAKE_MODULE_PATH
"${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
########################################################################################################################
@ -48,6 +66,16 @@ if(ENABLE_MPI)
endif(MPI_CXX_FOUND)
endif(ENABLE_MPI)
########################################################################################################################
# floating point precision
set (
CODE_PRECISION "DOUBLE"
CACHE STRING "Floating point type used for internal computations and FFTs"
)
set_property (
CACHE CODE_PRECISION
PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE
)
########################################################################################################################
# FFTW
@ -55,18 +83,25 @@ if(POLICY CMP0074)
cmake_policy(SET CMP0074 NEW)
endif()
if(ENABLE_MPI)
find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI)
find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS MPI)
else()
find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS)
find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS)
endif(ENABLE_MPI)
mark_as_advanced(FFTW3_SINGLE_MPI_LIBRARY FFTW3_SINGLE_OPENMP_LIBRARY FFTW3_SINGLE_SERIAL_LIBRARY FFTW3_SINGLE_THREADS_LIBRARY)
mark_as_advanced(FFTW3_DOUBLE_MPI_LIBRARY FFTW3_DOUBLE_OPENMP_LIBRARY FFTW3_DOUBLE_SERIAL_LIBRARY FFTW3_DOUBLE_THREADS_LIBRARY)
mark_as_advanced(FFTW3_LONGDOUBLE_MPI_LIBRARY FFTW3_LONGDOUBLE_OPENMP_LIBRARY FFTW3_LONGDOUBLE_SERIAL_LIBRARY FFTW3_LONGDOUBLE_THREADS_LIBRARY)
mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_MPI_INCLUDE_DIR)
mark_as_advanced(pkgcfg_lib_PC_FFTW_fftw3)
########################################################################################################################
# GSL
find_package(GSL REQUIRED)
mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m)
########################################################################################################################
# HDF5
find_package(HDF5 REQUIRED)
mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz)
########################################################################################################################
# INCLUDES
@ -86,28 +121,61 @@ file( GLOB PLUGINS
${PROJECT_SOURCE_DIR}/src/plugins/*.cc
)
# project configuration header
configure_file(
${PROJECT_SOURCE_DIR}/include/cmake_config.hh.in
${PROJECT_SOURCE_DIR}/include/cmake_config.hh
)
add_executable(${PRGNAME} ${SOURCES} ${PLUGINS})
target_setup_class(${PRGNAME})
set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14)
# mpi flags
if(MPI_CXX_FOUND)
if(FFTW3_DOUBLE_MPI_FOUND)
target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY})
target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
endif(FFTW3_DOUBLE_MPI_FOUND)
if(CODE_PRECISION STREQUAL "FLOAT")
if(FFTW3_SINGLE_MPI_FOUND)
target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_MPI_LIBRARY})
target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
else()
message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for single precision!")
endif()
elseif(CODE_PRECISION STREQUAL "DOUBLE")
if(FFTW3_DOUBLE_MPI_FOUND)
target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY})
target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
else()
message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for double precision!")
endif()
elseif(CODE_PRECISION STREQUAL "LONGDOUBLE")
if(FFTW3_LONGDOUBLE_MPI_FOUND)
target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_MPI_LIBRARY})
target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL})
target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI")
else()
message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for long double precision!")
endif()
endif()
target_include_directories(${PRGNAME} PRIVATE ${MPI_CXX_INCLUDE_PATH})
target_compile_options(${PRGNAME} PRIVATE "-DUSE_MPI")
target_link_libraries(${PRGNAME} ${MPI_LIBRARIES})
endif(MPI_CXX_FOUND)
if(FFTW3_DOUBLE_THREADS_FOUND)
if(CODE_PRECISION STREQUAL "FLOAT" AND FFTW3_SINGLE_THREADS_FOUND)
target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_THREADS_LIBRARY})
target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
elseif(CODE_PRECISION STREQUAL "DOUBLE" AND FFTW3_DOUBLE_THREADS_FOUND)
target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_THREADS_LIBRARY})
target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
endif(FFTW3_DOUBLE_THREADS_FOUND)
elseif(CODE_PRECISION STREQUAL "LONGDOUBLE" AND FFTW3_LONGDOUBLE_THREADS_FOUND)
target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_THREADS_LIBRARY})
target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
endif()
if(HDF5_FOUND)
# target_link_libraries(${PRGNAME} ${HDF5_C_LIBRARY_DIRS})

View file

@ -415,12 +415,12 @@ private:
{
assert(fp.space_ == kspace_id);
const double rfac = std::pow(1.5, 1.5);
const real_t rfac = std::pow(1.5, 1.5);
fp.zero();
#if !defined(USE_MPI) ////////////////////////////////////////////////////////////////////////////////////
size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
const size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3};
#pragma omp parallel for
for (size_t i = 0; i < 2 * fp.size(0) / 3; ++i)
@ -460,7 +460,10 @@ private:
size_t slicesz = fbuf_->size(1) * fbuf_->size(3);
MPI_Datatype datatype =
(typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
(typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX
: (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX
: (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX
: MPI_BYTE;
// fill MPI send buffer with results of kfunc
@ -596,7 +599,7 @@ private:
template <typename operator_t>
void unpad(const Grid_FFT<data_t> &fp, operator_t output_op)
{
const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
const real_t rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]);
// make sure we're in Fourier space...
assert(fp.space_ == kspace_id);
@ -645,7 +648,10 @@ private:
size_t slicesz = fp.size(1) * fp.size(3);
MPI_Datatype datatype =
(typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE;
(typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX
: (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX
: (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX
: MPI_BYTE;
MPI_Status status;

View file

@ -16,14 +16,21 @@
#define _unused(x) ((void)(x))
#ifdef USE_SINGLEPRECISION
// include CMake controlled configuration settings
#include <cmake_config.hh>
#if defined(USE_PRECISION_FLOAT)
using real_t = float;
using complex_t = fftwf_complex;
#define FFTW_PREFIX fftwf
#else
#elif defined(USE_PRECISION_DOUBLE)
using real_t = double;
using complex_t = fftw_complex;
#define FFTW_PREFIX fftw
#elif defined(USE_PRECISION_LONGDOUBLE)
using real_t = long double;
using complex_t = fftwl_complex;
#define FFTW_PREFIX fftwl
#endif
enum class fluid_component

View file

@ -26,10 +26,12 @@ class Grid_FFT
protected:
#if defined(USE_MPI)
const MPI_Datatype MPI_data_t_type =
(typeid(data_t) == typeid(double)) ? MPI_DOUBLE
: (typeid(data_t) == typeid(float)) ? MPI_FLOAT
: (typeid(data_t) == typeid(std::complex<float>)) ? MPI_COMPLEX
: (typeid(data_t) == typeid(std::complex<double>)) ? MPI_DOUBLE_COMPLEX
(typeid(data_t) == typeid(float)) ? MPI_FLOAT
: (typeid(data_t) == typeid(double)) ? MPI_DOUBLE
: (typeid(data_t) == typeid(long double)) ? MPI_LONG_DOUBLE
: (typeid(data_t) == typeid(std::complex<float>)) ? MPI_C_FLOAT_COMPLEX
: (typeid(data_t) == typeid(std::complex<double>)) ? MPI_C_DOUBLE_COMPLEX
: (typeid(data_t) == typeid(std::complex<long double>)) ? MPI_C_LONG_DOUBLE_COMPLEX
: MPI_INT;
#endif
using grid_fft_t = Grid_FFT<data_t,bdistributed>;
@ -73,30 +75,30 @@ public:
const grid_fft_t *get_grid(size_t ilevel) const { return this; }
bool is_distributed( void ) const { return bdistributed; }
bool is_distributed( void ) const noexcept { return bdistributed; }
void Setup();
//! return the number of data_t elements that we store in the container
size_t memsize( void ) const { return ntot_; }
size_t memsize( void ) const noexcept { return ntot_; }
//! return the (local) size of dimension i
size_t size(size_t i) const { return sizes_[i]; }
size_t size(size_t i) const noexcept { assert(i<4); return sizes_[i]; }
//! return the (global) size of dimension i
size_t global_size(size_t i) const { return n_[i]; }
size_t global_size(size_t i) const noexcept { assert(i<3); return n_[i]; }
//! return locally stored number of elements of field
size_t local_size(void) const { return local_0_size_ * n_[1] * n_[2]; }
size_t local_size(void) const noexcept { return local_0_size_ * n_[1] * n_[2]; }
//! return a bounding box of the global extent of the field
const bounding_box<size_t> &get_global_range(void) const
const bounding_box<size_t> &get_global_range(void) const noexcept
{
return global_range_;
}
//! set all field elements to zero
void zero()
void zero() noexcept
{
#pragma omp parallel for
for (size_t i = 0; i < ntot_; ++i)
@ -125,47 +127,47 @@ public:
data_[i] = g.data_[i];
}
data_t &operator[](size_t i)
data_t &operator[](size_t i) noexcept
{
return data_[i];
}
data_t &relem(size_t i, size_t j, size_t k)
data_t &relem(size_t i, size_t j, size_t k) noexcept
{
size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
return data_[idx];
}
const data_t &relem(size_t i, size_t j, size_t k) const
const data_t &relem(size_t i, size_t j, size_t k) const noexcept
{
size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
return data_[idx];
}
ccomplex_t &kelem(size_t i, size_t j, size_t k)
ccomplex_t &kelem(size_t i, size_t j, size_t k) noexcept
{
size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
return cdata_[idx];
}
const ccomplex_t &kelem(size_t i, size_t j, size_t k) const
const ccomplex_t &kelem(size_t i, size_t j, size_t k) const noexcept
{
size_t idx = (i * sizes_[1] + j) * sizes_[3] + k;
return cdata_[idx];
}
ccomplex_t &kelem(size_t idx) { return cdata_[idx]; }
const ccomplex_t &kelem(size_t idx) const { return cdata_[idx]; }
data_t &relem(size_t idx) { return data_[idx]; }
const data_t &relem(size_t idx) const { return data_[idx]; }
ccomplex_t &kelem(size_t idx) noexcept { return cdata_[idx]; }
const ccomplex_t &kelem(size_t idx) const noexcept { return cdata_[idx]; }
data_t &relem(size_t idx) noexcept { return data_[idx]; }
const data_t &relem(size_t idx) const noexcept { return data_[idx]; }
size_t get_idx(size_t i, size_t j, size_t k) const
size_t get_idx(size_t i, size_t j, size_t k) const noexcept
{
return (i * sizes_[1] + j) * sizes_[3] + k;
}
template <typename ft>
vec3_t<ft> get_r(const size_t i, const size_t j, const size_t k) const
vec3_t<ft> get_r(const size_t i, const size_t j, const size_t k) const noexcept
{
vec3_t<ft> rr;
@ -177,7 +179,7 @@ public:
}
template <typename ft>
vec3_t<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const
vec3_t<ft> get_unit_r(const size_t i, const size_t j, const size_t k) const noexcept
{
vec3_t<ft> rr;
@ -189,7 +191,7 @@ public:
}
template <typename ft>
vec3_t<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t<real_t> s) const
vec3_t<ft> get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t<real_t> s) const noexcept
{
vec3_t<ft> rr;
@ -200,33 +202,35 @@ public:
return rr;
}
vec3_t<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const
vec3_t<size_t> get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const noexcept
{
return vec3_t<size_t>({i + local_0_start_, j, k});
}
size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const
size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const noexcept
{
return ((i + local_0_start_) * size(1) + j) * size(2) + k;
}
size_t count_leaf_cells(int, int) const
//! deprecated function, was needed for old output plugin
size_t count_leaf_cells(int, int) const noexcept
{
return n_[0] * n_[1] * n_[2];
}
real_t get_dx(int idim) const
real_t get_dx(int idim) const noexcept
{
assert(idim<3&&idim>=0);
return dx_[idim];
}
const std::array<real_t, 3> &get_dx(void) const
const std::array<real_t, 3> &get_dx(void) const noexcept
{
return dx_;
}
template <typename ft>
vec3_t<ft> get_k(const size_t i, const size_t j, const size_t k) const
vec3_t<ft> get_k(const size_t i, const size_t j, const size_t k) const noexcept
{
vec3_t<ft> kk;
if( bdistributed ){
@ -243,7 +247,7 @@ public:
}
template <typename ft>
vec3_t<ft> get_k(const real_t i, const real_t j, const real_t k) const
vec3_t<ft> get_k(const real_t i, const real_t j, const real_t k) const noexcept
{
vec3_t<ft> kk;
if( bdistributed ){
@ -259,12 +263,13 @@ public:
return kk;
}
std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const
std::array<size_t,3> get_k3(const size_t i, const size_t j, const size_t k) const noexcept
{
return bdistributed? std::array<size_t,3>({j,i+local_1_start_,k}) : std::array<size_t,3>({i,j,k});
}
data_t get_cic( const vec3_t<real_t>& v ) const{
data_t get_cic( const vec3_t<real_t>& v ) const noexcept
{
// warning! this doesn't work with MPI
vec3_t<real_t> x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0],
std::fmod(v.y/length_[1]+1.0,1.0)*n_[1],
@ -290,7 +295,8 @@ public:
return val;
}
ccomplex_t get_cic_kspace( const vec3_t<real_t> x ) const{
ccomplex_t get_cic_kspace( const vec3_t<real_t> x ) const noexcept
{
// warning! this doesn't work with MPI
int ix = static_cast<int>(std::floor(x.x));
int iy = static_cast<int>(std::floor(x.y));
@ -328,6 +334,11 @@ public:
return ccomplex_t(0.0,rgrad);
}
inline real_t laplacian( const std::array<size_t,3>& ijk ) const noexcept
{
return -this->get_k<real_t>(ijk[0],ijk[1],ijk[2]).norm_squared();
}
grid_fft_t &operator*=(data_t x)
{
if (space_ == kspace_id)
@ -421,7 +432,7 @@ public:
}
}
double compute_2norm(void)
real_t compute_2norm(void) const
{
real_t sum1{0.0};
#pragma omp parallel for reduction(+ : sum1)
@ -443,7 +454,7 @@ public:
return sum1;
}
double std(void)
real_t std(void) const
{
double sum1{0.0}, sum2{0.0};
size_t count{0};
@ -488,10 +499,10 @@ public:
sum1 /= count;
sum2 /= count;
return std::sqrt(sum2 - sum1 * sum1);
return real_t(std::sqrt(sum2 - sum1 * sum1));
}
double mean(void)
real_t mean(void) const
{
double sum1{0.0};
size_t count{0};
@ -530,7 +541,7 @@ public:
sum1 /= count;
return sum1;
return real_t(sum1);
}
template <typename functional, typename grid_t>

View file

@ -2,10 +2,11 @@
#include <grid_fft.hh>
#include <thread>
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Setup(void)
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::Setup(void)
{
if( !bdistributed ){
if (!bdistributed)
{
ntot_ = (n_[2] + 2) * n_[1] * n_[0];
csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
@ -30,7 +31,7 @@ void Grid_FFT<data_t,bdistributed>::Setup(void)
csoca::elog.Print("invalid data type in Grid_FFT<data_t>::setup_fft_interface\n");
}
fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2]));
fft_norm_fac_ = 1.0 / std::sqrt((real_t)((size_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]));
if (typeid(data_t) == typeid(real_t))
{
@ -81,26 +82,26 @@ void Grid_FFT<data_t,bdistributed>::Setup(void)
if (typeid(data_t) == typeid(real_t))
{
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD,
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
ntot_ = 2 * cmplxsz;
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_,
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_,
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
}
else if (typeid(data_t) == typeid(ccomplex_t))
{
cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD,
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
&local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_);
ntot_ = cmplxsz;
data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t));
cdata_ = reinterpret_cast<ccomplex_t *>(data_);
plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT);
iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_,
MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN);
}
else
{
@ -109,7 +110,8 @@ void Grid_FFT<data_t,bdistributed>::Setup(void)
}
csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]);
fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]);
fft_norm_fac_ = 1.0 / sqrt((real_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]);
if (typeid(data_t) == typeid(real_t))
{
@ -155,16 +157,16 @@ void Grid_FFT<data_t,bdistributed>::Setup(void)
}
}
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::ApplyNorm(void)
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::ApplyNorm(void)
{
#pragma omp parallel for
for (size_t i = 0; i < ntot_; ++i)
data_[i] *= fft_norm_fac_;
}
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::FourierTransformForward(bool do_transform)
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::FourierTransformForward(bool do_transform)
{
#if defined(USE_MPI)
MPI_Barrier(MPI_COMM_WORLD);
@ -195,8 +197,8 @@ void Grid_FFT<data_t,bdistributed>::FourierTransformForward(bool do_transform)
}
}
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::FourierTransformBackward(bool do_transform)
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::FourierTransformBackward(bool do_transform)
{
#if defined(USE_MPI)
MPI_Barrier(MPI_COMM_WORLD);
@ -210,8 +212,7 @@ void Grid_FFT<data_t,bdistributed>::FourierTransformBackward(bool do_transform)
csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]);
double wtime = get_wtime();
FFTW_API(execute)
(iplan_);
FFTW_API(execute)(iplan_);
this->ApplyNorm();
wtime = get_wtime() - wtime;
@ -262,6 +263,9 @@ hid_t hdf5_get_data_type(void)
if (typeid(T) == typeid(double))
return H5T_NATIVE_DOUBLE;
if (typeid(T) == typeid(long double))
return H5T_NATIVE_LDOUBLE;
if (typeid(T) == typeid(long long))
return H5T_NATIVE_LLONG;
@ -276,10 +280,11 @@ hid_t hdf5_get_data_type(void)
return -1;
}
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, const std::string ObjName)
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::Read_from_HDF5(const std::string Filename, const std::string ObjName)
{
if( bdistributed ){
if (bdistributed)
{
csoca::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl;
abort();
}
@ -354,10 +359,11 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
H5Dclose(HDF_DatasetID);
H5Fclose(HDF_FileID);
assert( dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2] );
assert(dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2]);
csoca::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl;
for( size_t i=0; i<3; ++i ) this->n_[i] = dimsize[i];
for (size_t i = 0; i < 3; ++i)
this->n_[i] = dimsize[i];
this->space_ = rspace_id;
if (data_ != nullptr)
@ -365,47 +371,47 @@ void Grid_FFT<data_t,bdistributed>::Read_from_HDF5(const std::string Filename, c
fftw_free(data_);
}
this->Setup();
//... copy data to internal array ...
double sum1{0.0}, sum2{0.0};
#pragma omp parallel for reduction(+:sum1,sum2)
real_t sum1{0.0}, sum2{0.0};
#pragma omp parallel for reduction(+ : sum1, sum2)
for (size_t i = 0; i < size(0); ++i)
{
for (size_t j = 0; j < size(1); ++j)
{
for (size_t k = 0; k < size(2); ++k)
{
this->relem(i,j,k) = Data[ (i*size(1) + j)*size(2)+k ];
sum2 += std::real(this->relem(i,j,k)*this->relem(i,j,k));
sum1 += std::real(this->relem(i,j,k));
this->relem(i, j, k) = Data[(i * size(1) + j) * size(2) + k];
sum2 += std::real(this->relem(i, j, k) * this->relem(i, j, k));
sum1 += std::real(this->relem(i, j, k));
}
}
}
sum1 /= Data.size();
sum2 /= Data.size();
auto stdw = std::sqrt(sum2-sum1*sum1);
auto stdw = std::sqrt(sum2 - sum1 * sum1);
csoca::ilog << "Constraint field has <W>=" << sum1 << ", <W^2>-<W>^2=" << stdw << std::endl;
#pragma omp parallel for reduction(+:sum1,sum2)
#pragma omp parallel for reduction(+ : sum1, sum2)
for (size_t i = 0; i < size(0); ++i)
{
for (size_t j = 0; j < size(1); ++j)
{
for (size_t k = 0; k < size(2); ++k)
{
this->relem(i,j,k) /= stdw;
this->relem(i, j, k) /= stdw;
}
}
}
}
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::Write_to_HDF5(std::string fname, std::string datasetname) const
{
// FIXME: cleanup duplicate code in this function!
if( !bdistributed && CONFIG::MPI_task_rank==0 ){
if (!bdistributed && CONFIG::MPI_task_rank == 0)
{
hid_t file_id, dset_id; /* file and dataset identifiers */
hid_t filespace, memspace; /* file and memory dataspace identifiers */
hsize_t offset[3], count[3];
@ -419,23 +425,23 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
for (int i = 0; i < 3; ++i)
count[i] = size(i);
if (typeid(data_t) == typeid(float))
dtype_id = H5T_NATIVE_FLOAT;
else if (typeid(data_t) == typeid(double))
dtype_id = H5T_NATIVE_DOUBLE;
else if (typeid(data_t) == typeid(long double))
dtype_id = H5T_NATIVE_LDOUBLE;
else if (typeid(data_t) == typeid(std::complex<float>))
{
dtype_id = H5T_NATIVE_FLOAT;
}
else if (typeid(data_t) == typeid(std::complex<double>))
{
dtype_id = H5T_NATIVE_DOUBLE;
}
else if (typeid(data_t) == typeid(std::complex<long double>))
dtype_id = H5T_NATIVE_LDOUBLE;
filespace = H5Screate_simple(3, count, NULL);
dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
H5Sclose(filespace);
hsize_t slice_sz = size(1) * size(2);
@ -459,7 +465,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
{
for (size_t k = 0; k < size(2); ++k)
{
if( this->space_ == rspace_id )
if (this->space_ == rspace_id)
buf[j * size(2) + k] = std::real(relem(i, j, k));
else
buf[j * size(2) + k] = std::real(kelem(i, j, k));
@ -478,7 +484,8 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
if (typeid(data_t) == typeid(std::complex<float>) ||
typeid(data_t) == typeid(std::complex<double>) ||
this->space_ == kspace_id )
typeid(data_t) == typeid(std::complex<long double>) ||
this->space_ == kspace_id)
{
datasetname += std::string(".im");
@ -487,7 +494,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
filespace = H5Screate_simple(3, count, NULL);
dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace,
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
H5Sclose(filespace);
count[0] = 1;
@ -499,7 +506,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
for (size_t j = 0; j < size(1); ++j)
for (size_t k = 0; k < size(2); ++k)
{
if( this->space_ == rspace_id )
if (this->space_ == rspace_id)
buf[j * size(2) + k] = std::imag(relem(i, j, k));
else
buf[j * size(2) + k] = std::imag(kelem(i, j, k));
@ -526,7 +533,8 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
return;
}
if( !bdistributed && CONFIG::MPI_task_rank!=0 ) return;
if (!bdistributed && CONFIG::MPI_task_rank != 0)
return;
hid_t file_id, dset_id; /* file and dataset identifiers */
hid_t filespace, memspace; /* file and memory dataspace identifiers */
@ -534,7 +542,6 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
hid_t dtype_id = H5T_NATIVE_FLOAT;
hid_t plist_id;
#if defined(USE_MPI)
int mpi_size, mpi_rank;
@ -586,14 +593,14 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
dtype_id = H5T_NATIVE_FLOAT;
else if (typeid(data_t) == typeid(double))
dtype_id = H5T_NATIVE_DOUBLE;
else if (typeid(data_t) == typeid(long double))
dtype_id = H5T_NATIVE_LDOUBLE;
else if (typeid(data_t) == typeid(std::complex<float>))
{
dtype_id = H5T_NATIVE_FLOAT;
}
else if (typeid(data_t) == typeid(std::complex<double>))
{
dtype_id = H5T_NATIVE_DOUBLE;
}
else if (typeid(data_t) == typeid(std::complex<long double>))
dtype_id = H5T_NATIVE_LDOUBLE;
#if defined(USE_MPI) && !defined(USE_MPI_IO)
if (itask == 0)
@ -648,7 +655,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
{
for (size_t k = 0; k < size(2); ++k)
{
if( this->space_ == rspace_id )
if (this->space_ == rspace_id)
buf[j * size(2) + k] = std::real(relem(i, j, k));
else
buf[j * size(2) + k] = std::real(kelem(i, j, k));
@ -671,7 +678,8 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
if (typeid(data_t) == typeid(std::complex<float>) ||
typeid(data_t) == typeid(std::complex<double>) ||
this->space_ == kspace_id )
typeid(data_t) == typeid(std::complex<long double>) ||
this->space_ == kspace_id)
{
datasetname += std::string(".im");
@ -721,7 +729,7 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
for (size_t j = 0; j < size(1); ++j)
for (size_t k = 0; k < size(2); ++k)
{
if( this->space_ == rspace_id )
if (this->space_ == rspace_id)
buf[j * size(2) + k] = std::imag(relem(i, j, k));
else
buf[j * size(2) + k] = std::imag(kelem(i, j, k));
@ -757,8 +765,8 @@ void Grid_FFT<data_t,bdistributed>::Write_to_HDF5(std::string fname, std::string
#include <iomanip>
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax)
{
double logvmin = std::log10(vmin);
double logvmax = std::log10(vmax);
@ -809,12 +817,12 @@ void Grid_FFT<data_t,bdistributed>::Write_PDF(std::string ofname, int nbins, dou
#endif
}
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Write_PowerSpectrum(std::string ofname)
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::Write_PowerSpectrum(std::string ofname)
{
std::vector<double> bin_k, bin_P, bin_eP;
std::vector<size_t> bin_count;
this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count );
this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count);
#if defined(USE_MPI)
if (CONFIG::MPI_task_rank == 0)
{
@ -839,8 +847,8 @@ void Grid_FFT<data_t,bdistributed>::Write_PowerSpectrum(std::string ofname)
#endif
}
template <typename data_t,bool bdistributed>
void Grid_FFT<data_t,bdistributed>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count )
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::Compute_PowerSpectrum(std::vector<double> &bin_k, std::vector<double> &bin_P, std::vector<double> &bin_eP, std::vector<size_t> &bin_count)
{
this->FourierTransformForward();
@ -920,7 +928,7 @@ void Grid_FFT<data_t,bdistributed>::Compute_PowerSpectrum(std::vector<double> &b
/********************************************************************************************/
template class Grid_FFT<real_t,true>;
template class Grid_FFT<real_t,false>;
template class Grid_FFT<ccomplex_t,true>;
template class Grid_FFT<ccomplex_t,false>;
template class Grid_FFT<real_t, true>;
template class Grid_FFT<real_t, false>;
template class Grid_FFT<ccomplex_t, true>;
template class Grid_FFT<ccomplex_t, false>;