diff --git a/.gitignore b/.gitignore index 60035a0..bcbdff2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,56 +1,14 @@ -build +.DS_Store .vscode -src/CMakeFiles/3.12.2/CompilerIdC/CMakeCCompilerId.c -src/CMakeFiles/feature_tests.c -src/CMakeFiles/feature_tests.cxx -src/CMakeFiles/progress.marks -src/CMakeFiles/3.12.2/CMakeCCompiler.cmake -src/CMakeFiles/3.12.2/CMakeCXXCompiler.cmake -src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_C.bin -src/CMakeFiles/3.12.2/CMakeDetermineCompilerABI_CXX.bin -src/CMakeFiles/3.12.2/CMakeSystem.cmake -src/CMakeFiles/fastLPT.dir/build.make -src/CMakeFiles/FindMPI/test_mpi.cpp -src/CMakeFiles/FindMPI/test_mpi_C.bin -src/CMakeFiles/FindMPI/test_mpi_CXX.bin -src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.c -src/CMakeFiles/FindOpenMP/OpenMPCheckVersion.cpp -src/CMakeFiles/FindOpenMP/OpenMPTryFlag.c -src/CMakeFiles/FindOpenMP/OpenMPTryFlag.cpp -src/CMakeFiles/FindOpenMP/ompver_C.bin -src/CMakeFiles/FindOpenMP/ompver_CXX.bin -src/CMakeFiles/fastLPT.dir/CXX.includecache -src/CMakeFiles/fastLPT.dir/DependInfo.cmake -src/CMakeFiles/fastLPT.dir/plugins/transfer_eisenstein.cc.o -src/CMakeFiles/3.12.2/CompilerIdCXX/a.out -src/CMakeFiles/fastLPT.dir/cmake_clean.cmake -src/CMakeFiles/fastLPT.dir/depend.internal -src/CMakeFiles/fastLPT.dir/depend.make -src/CMakeFiles/fastLPT.dir/flags.make -src/CMakeFiles/fastLPT.dir/grid_fft.cc.o -src/CMakeFiles/fastLPT.dir/link.txt -src/CMakeFiles/fastLPT.dir/logger.cc.o -src/CMakeFiles/fastLPT.dir/main.cc.o -src/CMakeFiles/fastLPT.dir/progress.make -src/CMakeFiles/fastLPT.dir/random_plugin.cc.o -src/CMakeFiles/fastLPT.dir/transfer_function_plugin.cc.o -src/CMakeFiles/fastLPT.dir/plugins/random_music.cc.o -src/CMakeFiles/fastLPT.dir/plugins/random_music_wnoise_generator.cc.o -src/CMakeFiles/feature_tests.bin -src/CMakeFiles/CMakeDirectoryInformation.cmake -src/CMakeFiles/CMakeOutput.log -src/CMakeFiles/Makefile.cmake -src/CMakeFiles/Makefile2 -src/CMakeFiles/TargetDirectories.txt -src/CMakeFiles/cmake.check_cache -src/CMakeFiles/3.12.2/CompilerIdC/a.out -src/CMakeFiles/3.12.2/CompilerIdCXX/CMakeCXXCompilerId.cpp -src/CMakeFiles/hdf5/cmake_hdf5_test.c -src/fastLPT.dSYM/Contents/Info.plist -src/fastLPT.dSYM/Contents/Resources/DWARF/fastLPT +build +include/cmake_config.hh +src/input_powerspec.txt +CMakeCache.txt +CMakeFiles/cmake.check_cache +src/CMakeFiles src/cmake_install.cmake src/CMakeCache.txt -src/fastLPT -src/input_powerspec.txt src/Makefile -.DS_Store +external/panphasia/rand_base.mod +external/panphasia/rand_int.mod +external/panphasia/rand.mod \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 875fc91..be14271 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,16 +1,42 @@ cmake_minimum_required(VERSION 3.9) set(PRGNAME monofonIC) -project(monofonIC) +project(monofonIC C CXX) + +#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native -Wall -pedantic" CACHE STRING "Flags used by the compiler during Release builds." FORCE) +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -march=native -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE) +set(CMAKE_CXX_FLAGS_DEBUG "-g -O1 -march=native -DDEBUG -fno-omit-frame-pointer -Wall -pedantic" CACHE STRING "Flags used by the compiler during Debug builds." FORCE) +set(CMAKE_CXX_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address " CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE) +set(CMAKE_CXX_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE) +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}" CACHE STRING "Flags used by the compiler during Release builds." FORCE) +set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}" CACHE STRING "Flags used by the compiler during RelWithDebInfo builds." FORCE) +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}" CACHE STRING "Flags used by the compiler during Debug builds." FORCE) +set(CMAKE_C_FLAGS_DEBUGSANADD "${CMAKE_CXX_FLAGS_DEBUGSANADD}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for address." FORCE) +set(CMAKE_C_FLAGS_DEBUGSANUNDEF "${CMAKE_CXX_FLAGS_DEBUGSANUNDEF}" CACHE STRING "Flags used by the compiler during Debug builds with Sanitizer for undefineds." FORCE) + + +set(default_build_type "Release") +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "Setting build type to '${default_build_type}' as none was specified.") + set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE + STRING "Choose the type of build." FORCE) + # Set the possible values of build type for cmake-gui + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS + "Debug" "Release" "RelWithDebInfo" "DebugSanAdd" "DebugSanUndef") +endif() +mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF) +mark_as_advanced(CMAKE_C_FLAGS_DEBUGSANADD CMAKE_C_FLAGS_DEBUGSANUNDEF) +mark_as_advanced(CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT) + + +######################################################################################################################## # include class submodule include(${CMAKE_CURRENT_SOURCE_DIR}/external/class.cmake) -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -march=native -Wall -fno-omit-frame-pointer -g -fsanitize=address") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic") find_package(PkgConfig REQUIRED) -set(CMAKE_MODULE_PATH - "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}") +set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}") ######################################################################################################################## @@ -48,21 +74,70 @@ if(ENABLE_MPI) endif(MPI_CXX_FOUND) endif(ENABLE_MPI) +######################################################################################################################## +# floating point precision +set ( + CODE_PRECISION "DOUBLE" + CACHE STRING "Floating point type used for internal computations and FFTs" +) +set_property ( + CACHE CODE_PRECISION + PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE +) +######################################################################################################################## +# convolver type, right now only orszag or naive +set ( + CONVOLVER_TYPE "ORSZAG" + CACHE STRING "Convolution algorithm to be used (Naive=no dealiasing, Orszag=dealiased)" +) +set_property ( + CACHE CONVOLVER_TYPE + PROPERTY STRINGS ORSZAG NAIVE +) + +######################################################################################################################## +# PLT options, right now only on/off +option(ENABLE_PLT "Enable PLT (particle linear theory) corrections" OFF) + + +######################################################################################################################## # FFTW -cmake_policy(SET CMP0074 NEW) +if(POLICY CMP0074) + cmake_policy(SET CMP0074 NEW) +endif() if(ENABLE_MPI) - find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS MPI) + find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS MPI) else() - find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP THREADS) + find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS) endif(ENABLE_MPI) +mark_as_advanced(FFTW3_SINGLE_MPI_LIBRARY FFTW3_SINGLE_OPENMP_LIBRARY FFTW3_SINGLE_SERIAL_LIBRARY FFTW3_SINGLE_THREADS_LIBRARY) +mark_as_advanced(FFTW3_DOUBLE_MPI_LIBRARY FFTW3_DOUBLE_OPENMP_LIBRARY FFTW3_DOUBLE_SERIAL_LIBRARY FFTW3_DOUBLE_THREADS_LIBRARY) +mark_as_advanced(FFTW3_LONGDOUBLE_MPI_LIBRARY FFTW3_LONGDOUBLE_OPENMP_LIBRARY FFTW3_LONGDOUBLE_SERIAL_LIBRARY FFTW3_LONGDOUBLE_THREADS_LIBRARY) +mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_MPI_INCLUDE_DIR) +mark_as_advanced(pkgcfg_lib_PC_FFTW_fftw3) +######################################################################################################################## # GSL find_package(GSL REQUIRED) +mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m) +######################################################################################################################## # HDF5 find_package(HDF5 REQUIRED) +mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz) +######################################################################################################################## +# PANPHASIA +option(ENABLE_PANPHASIA "Enable PANPHASIA random number generator" ON) +if(ENABLE_PANPHASIA) +enable_language(Fortran) +if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Intel") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -132 -implicit-none") +elseif("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffixed-line-length-132 -fimplicit-none") +endif() +endif(ENABLE_PANPHASIA) ######################################################################################################################## # INCLUDES include_directories(${PROJECT_SOURCE_DIR}/include) @@ -81,28 +156,68 @@ file( GLOB PLUGINS ${PROJECT_SOURCE_DIR}/src/plugins/*.cc ) +if(ENABLE_PANPHASIA) +list (APPEND SOURCES + ${PROJECT_SOURCE_DIR}/external/panphasia/panphasia_routines.f + ${PROJECT_SOURCE_DIR}/external/panphasia/generic_lecuyer.f90 +) +endif() + +# project configuration header +configure_file( + ${PROJECT_SOURCE_DIR}/include/cmake_config.hh.in + ${PROJECT_SOURCE_DIR}/include/cmake_config.hh +) + add_executable(${PRGNAME} ${SOURCES} ${PLUGINS}) target_setup_class(${PRGNAME}) -set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 17) +set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14) + # mpi flags if(MPI_CXX_FOUND) - if(FFTW3_DOUBLE_MPI_FOUND) - target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY}) - target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL}) - target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI") - endif(FFTW3_DOUBLE_MPI_FOUND) + if(CODE_PRECISION STREQUAL "FLOAT") + if(FFTW3_SINGLE_MPI_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_MPI_LIBRARY}) + target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI") + else() + message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for single precision!") + endif() + elseif(CODE_PRECISION STREQUAL "DOUBLE") + if(FFTW3_DOUBLE_MPI_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_MPI_LIBRARY}) + target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI") + else() + message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for double precision!") + endif() + elseif(CODE_PRECISION STREQUAL "LONGDOUBLE") + if(FFTW3_LONGDOUBLE_MPI_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_MPI_LIBRARY}) + target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIR_PARALLEL}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_MPI") + else() + message(SEND_ERROR "MPI enabled but FFTW3 library not found with MPI support for long double precision!") + endif() + endif() target_include_directories(${PRGNAME} PRIVATE ${MPI_CXX_INCLUDE_PATH}) target_compile_options(${PRGNAME} PRIVATE "-DUSE_MPI") target_link_libraries(${PRGNAME} ${MPI_LIBRARIES}) endif(MPI_CXX_FOUND) -if(FFTW3_DOUBLE_THREADS_FOUND) +if(CODE_PRECISION STREQUAL "FLOAT" AND FFTW3_SINGLE_THREADS_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_THREADS_LIBRARY}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS") +elseif(CODE_PRECISION STREQUAL "DOUBLE" AND FFTW3_DOUBLE_THREADS_FOUND) target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_THREADS_LIBRARY}) target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS") -endif(FFTW3_DOUBLE_THREADS_FOUND) +elseif(CODE_PRECISION STREQUAL "LONGDOUBLE" AND FFTW3_LONGDOUBLE_THREADS_FOUND) + target_link_libraries(${PRGNAME} ${FFTW3_LONGDOUBLE_THREADS_LIBRARY}) + target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS") +endif() if(HDF5_FOUND) # target_link_libraries(${PRGNAME} ${HDF5_C_LIBRARY_DIRS}) @@ -111,6 +226,10 @@ if(HDF5_FOUND) target_compile_options(${PRGNAME} PRIVATE "-DUSE_HDF5") endif(HDF5_FOUND) +if(ENABLE_PANPHASIA) +target_compile_options(${PRGNAME} PRIVATE "-DUSE_PANPHASIA") +endif(ENABLE_PANPHASIA) + target_link_libraries(${PRGNAME} ${FFTW3_LIBRARIES}) target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIRS}) diff --git a/README.md b/README.md index e34dce2..3d3be7b 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ High order LPT/QPT tool for single resolution simulations ## Build Instructions Clone code including submodules (currently only CLASS is used as a submodule): - git clone --recurse-submodules https://ohahn@bitbucket.org/ohahn/monofonic.git + git clone --recurse-submodules https://@bitbucket.org/ohahn/monofonic.git Create build directory, configure, and build: @@ -17,4 +17,30 @@ Create build directory, configure, and build: make this should create an executable in the build directory. -There is an example parameter file 'example.conf' in the main directory + +If you run into problems with CMake not being able to find your local FFTW3 or HDF5 installation, it is best to give the path directly as + + FFTW3_ROOT= HDF5_ROOT= ccmake .. + +make sure to delete previous files generated by CMake before reconfiguring like this. + +If you want to build on macOS, then it is strongly recommended to use GNU (or Intel) compilers instead of Apple's Clang. Install them e.g. +via homebrew and then configure cmake to use them instead of the macOS default compiler via + + CC=gcc-9 CXX=g++-9 ccmake .. + +This is necessary since Apple's compilers haven't supported OpenMP for years. + +## Running + +There is an example parameter file 'example.conf' in the main directory. Possible options are explained in it, it can be run +as a simple argument, e.g. from within the build directory: + + ./monofonic ../example.conf + +If you want to run with MPI, you need to enable MPI support via ccmake. Then you can launch in hybrid MPI+threads mode by +specifying the desired number of threads per task in the config file, and the number of tasks to be launched via + + mpirun -np 16 ./monofonic + +It will then run with 16 tasks times the number of threads per task specified in the config file. \ No newline at end of file diff --git a/example.conf b/example.conf index 3b6d07e..073b887 100644 --- a/example.conf +++ b/example.conf @@ -1,58 +1,71 @@ [setup] # number of grid cells per linear dimension for calculations = particles for sc initial load -GridRes = 128 +GridRes = 128 # length of the box in Mpc/h -BoxLength = 250 +BoxLength = 125 # starting redshift -zstart = 49.0 +zstart = 49.0 # order of the LPT to be used (1,2 or 3) -LPTorder = 3 +LPTorder = 1 # also do baryon ICs? -DoBaryons = no +DoBaryons = no # do mode fixing à la Angulo&Pontzen -DoFixing = no +DoFixing = yes # particle load, can be 'sc' (1x), 'bcc' (2x) or 'fcc' (4x) (increases number of particles by factor!) -ParticleLoad = sc +ParticleLoad = sc +# Add a possible constraint field here: +#ConstraintFieldFile = initial_conditions.h5 +#ConstraintFieldName = ic_white_noise + +[cosmology] +transfer = CLASS +ztarget = 2.5 +# transfer = eisenstein +# transfer = file_CAMB +# transfer_file = wmap5_transfer_out_z0.dat +Omega_m = 0.302 +Omega_b = 0.045 +Omega_L = 0.698 +H0 = 70.3 +sigma_8 = 0.811 +nspec = 0.961 + +# anisotropic large scale tidal field +# LSS_aniso_lx = +0.1 +# LSS_aniso_ly = +0.1 +# LSS_aniso_lz = -0.2 + +[random] +generator = NGENIC +seed = 9001 [testing] # enables diagnostic output # can be 'none' (default), 'potentials_and_densities', 'velocity_displacement_symmetries', or 'convergence' -test = convergence +test = none [execution] -NumThreads = 4 +NumThreads = 8 [output] -fname_hdf5 = output_sch.hdf5 -fbase_analysis = output +fname_hdf5 = output_sch.hdf5 +fbase_analysis = output -format = gadget2 -filename = ics_gadget.dat +# format = gadget2 +# filename = ics_gadget.dat +# UseLongids = false -#format = generic -#filename = debug.hdf5 -#generic_out_eulerian = yes +format = gadget_hdf5 +filename = ics_gadget.hdf5 -#format = grafic2 -#filename = ics_ramses -#grafic_use_SPT = yes +# format = AREPO +# filename = ics_arepo.hdf5 -[random] -generator = NGENIC -seed = 9001 +# format = generic +# filename = debug.hdf5 +# generic_out_eulerian = yes -[cosmology] -#transfer = CLASS -transfer = eisenstein -Omega_m = 0.302 -Omega_b = 0.045 -Omega_L = 0.698 -H0 = 70.3 -sigma_8 = 0.811 -nspec = 0.961 - -# anisotropic large scale tidal field -#LSS_aniso_lx = 0.1 -#LSS_aniso_ly = 0.1 -#LSS_aniso_lz = -0.2 +# format = grafic2 +# filename = ics_ramses +# grafic_use_SPT = yes diff --git a/example_testing.conf b/example_testing.conf new file mode 100644 index 0000000..2890286 --- /dev/null +++ b/example_testing.conf @@ -0,0 +1,33 @@ +[setup] +GridRes = 256 +BoxLength = 6.28318530718 +zstart = 0.0 +LPTorder = 1 +SymplecticPT = no +DoFixing = no + +[execution] +NumThreads = 4 + +[output] +fname_hdf5 = output.hdf5 +fbase_analysis = output +#format = gadget2 +#filename = ics_gadget.dat +format = generic +filename = debug.hdf5 +generic_out_eulerian = yes + +[random] +generator = NGENIC +seed = 9001 + +[cosmology] +#transfer = CLASS +transfer = eisenstein +Omega_m = 1.0 +Omega_b = 0.045 +Omega_L = 0.0 +H0 = 70.3 +sigma_8 = 0.811 +nspec = 0.961 diff --git a/external/class b/external/class index b34d7f6..6adecae 160000 --- a/external/class +++ b/external/class @@ -1 +1 @@ -Subproject commit b34d7f6c2b72eab3a347c28e62298d62ca9dd69b +Subproject commit 6adecae2f30172a94e003155090791abf509d995 diff --git a/external/class.cmake b/external/class.cmake index 0a3f3c2..a2e5057 100644 --- a/external/class.cmake +++ b/external/class.cmake @@ -32,6 +32,7 @@ if(ENABLE_CLASS) ${CMAKE_CURRENT_LIST_DIR}/class/build/history.o ${CMAKE_CURRENT_LIST_DIR}/class/build/hydrogen.o ${CMAKE_CURRENT_LIST_DIR}/class/build/hyperspherical.o + ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.o ${CMAKE_CURRENT_LIST_DIR}/class/build/hyrectools.o ${CMAKE_CURRENT_LIST_DIR}/class/build/input.o ${CMAKE_CURRENT_LIST_DIR}/class/build/lensing.o @@ -78,6 +79,7 @@ if(ENABLE_CLASS) ${CMAKE_CURRENT_LIST_DIR}/class/tools/parser.c ${CMAKE_CURRENT_LIST_DIR}/class/tools/quadrature.c ${CMAKE_CURRENT_LIST_DIR}/class/tools/hyperspherical.c + ${CMAKE_CURRENT_LIST_DIR}/class/tools/trigonometric_integrals.c ${CMAKE_CURRENT_LIST_DIR}/class/tools/common.c ${CMAKE_CURRENT_LIST_DIR}/class/source/input.c ${CMAKE_CURRENT_LIST_DIR}/class/source/background.c @@ -131,9 +133,9 @@ macro(target_setup_class target_name) endif(ENABLE_CLASS) endmacro(target_setup_class) -if(ENABLE_CLASS) - # test executable - add_executable(testTk - ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc) - target_setup_class(testTk) -endif(ENABLE_CLASS) \ No newline at end of file +# if(ENABLE_CLASS) +# # test executable +# add_executable(testTk +# ${CMAKE_CURRENT_LIST_DIR}/class/cpp/testTk.cc) +# target_setup_class(testTk) +# endif(ENABLE_CLASS) \ No newline at end of file diff --git a/external/fftwpp b/external/fftwpp new file mode 160000 index 0000000..ec6b82c --- /dev/null +++ b/external/fftwpp @@ -0,0 +1 @@ +Subproject commit ec6b82cc1122ba029a7a7142cf836014e992e68c diff --git a/external/panphasia/generic_lecuyer.f90 b/external/panphasia/generic_lecuyer.f90 new file mode 100644 index 0000000..13f53ed --- /dev/null +++ b/external/panphasia/generic_lecuyer.f90 @@ -0,0 +1,683 @@ +!=====================================================================================c +! +! The code below was written by: Stephen Booth +! Edinburgh Parallel Computing Centre +! The University of Edinburgh +! JCMB +! Mayfield Road +! Edinburgh EH9 3JZ +! United Kingdom +! +! This file is part of the software made public in +! Jenkins and Booth 2013 - arXiv:1306.XXXX +! +! The software computes the Panphasia Gaussian white noise field +! realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX +! +! +! +! This software is free, subject to a agreeing licence conditions: +! +! +! (i) you will publish the phase descriptors and reference Jenkins (13) +! for any new simulations that use Panphasia phases. You will pass on this +! condition to others for any software or data you make available publically +! or privately that makes use of Panphasia. +! +! (ii) that you will ensure any publications using results derived from Panphasia +! will be submitted as a final version to arXiv prior to or coincident with +! publication in a journal. +! +! +! (iii) that you report any bugs in this software as soon as confirmed to +! A.R.Jenkins@durham.ac.uk +! +! (iv) that you understand that this software comes with no warranty and that is +! your responsibility to ensure that it is suitable for the purpose that +! you intend. +! +!=====================================================================================c +!{{{Rand_base (define kind types) +MODULE Rand_base +! This module just declares the base types +! we may have to edit this to match to the target machine +! we really need a power of 2 selected int kind in fortran-95 we could +! do this with a PURE function I think. + +! +! 10 decimal digits will hold 2^31 +! + + INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(9) +! INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(10) +! INTEGER, PARAMETER :: Sint = 4 + +! +! 18-19 decimal digits will hold 2^63 +! but all 19 digit numbers require 2^65 :-( +! + + INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(17) +! INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(18) +! INTEGER, PARAMETER :: Dint = 8 + +! type for index counters must hold Nstore + INTEGER, PARAMETER :: Ctype = SELECTED_INT_KIND(3) +END MODULE Rand_base +!}}} + +!{{{Rand_int (random integers mod 2^31-1) + +MODULE Rand_int + USE Rand_base + IMPLICIT NONE +! The general approach of this module is two have +! two types Sint and Dint +! +! Sint should have at least 31 bits +! dint shouldhave at least 63 + +!{{{constants + + INTEGER(KIND=Ctype), PARAMETER :: Nstate=5_Ctype + INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nbatch=128_Ctype + INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nstore=Nstate+Nbatch + + INTEGER(KIND=Sint), PRIVATE, PARAMETER :: M = 2147483647_Sint + INTEGER(KIND=Dint), PRIVATE, PARAMETER :: Mask = 2147483647_Dint + INTEGER(KIND=Dint), PRIVATE, PARAMETER :: A1 = 107374182_Dint + INTEGER(KIND=Dint), PRIVATE, PARAMETER :: A5 = 104480_Dint + LOGICAL, PARAMETER :: Can_step_int=.TRUE. + LOGICAL, PARAMETER :: Can_reverse_int=.TRUE. + +!}}} + +!{{{Types +! +! This type holds the state of the generator +! +!{{{TYPE RAND_state + +TYPE RAND_state + PRIVATE + INTEGER(KIND=Sint) :: state(Nstore) +! do we need to re-fill state table this is reset when we initialise state. + LOGICAL :: need_fill +! position of the next state variable to output + INTEGER(KIND=Ctype) :: pos +END TYPE RAND_state + +!}}} + +! +! This type defines the offset type used for stepping. +! +!{{{TYPE RAND_offset + +TYPE RAND_offset + PRIVATE + INTEGER(KIND=Sint) :: poly(Nstate) +END TYPE RAND_offset + +!}}} + +!}}} + +!{{{interface and overloads +! +! Allow automatic conversion between integers and offsets +! +INTERFACE ASSIGNMENT(=) + MODULE PROCEDURE Rand_set_offset + MODULE PROCEDURE Rand_load + MODULE PROCEDURE Rand_save + MODULE PROCEDURE Rand_seed +END INTERFACE +INTERFACE OPERATOR(+) + MODULE PROCEDURE Rand_add_offset +END INTERFACE +INTERFACE OPERATOR(*) + MODULE PROCEDURE Rand_mul_offset +END INTERFACE + +! +! overload + as the boost/stepping operator +! +INTERFACE OPERATOR(+) + MODULE PROCEDURE Rand_step + MODULE PROCEDURE Rand_boost +END INTERFACE +!}}} + + +!{{{PUBLIC/PRIVATE + PRIVATE reduce,mod_saxpy,mod_sdot,p_saxpy,p_sdot,poly_mult + PRIVATE poly_square, poly_power + PRIVATE fill_state, repack_state + + PUBLIC Rand_sint, Rand_sint_vec + + PUBLIC Rand_save, Rand_load + PUBLIC Rand_set_offset, Rand_add_offset, Rand_mul_offset + PUBLIC Rand_step, Rand_boost, Rand_seed +!}}} + +CONTAINS + !{{{Internals + !{{{RECURSIVE FUNCTION reduce(A) + RECURSIVE FUNCTION reduce(A) + ! + ! Take A Dint and reduce to Sint MOD M + ! + INTEGER(KIND=Dint), INTENT(IN) :: A + INTEGER(KIND=Sint) reduce + INTEGER(KIND=Dint) tmp + + tmp = A + DO WHILE( ISHFT(tmp, -31) .GT. 0 ) + tmp = IAND(tmp,Mask) + ISHFT(tmp, -31) + END DO + IF( tmp .GE. M ) THEN + reduce = tmp - M + ELSE + reduce = tmp + END IF + END FUNCTION reduce + !}}} + !{{{RECURSIVE SUBROUTINE fill_state(x) + RECURSIVE SUBROUTINE fill_state(x) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Ctype) i + INTRINSIC IAND, ISHFT + INTEGER(KIND=Dint) tmp + DO i=Nstate+1,Nstore + tmp = (x%state(i-5) * A5) + (x%state(i-1)*A1) + ! + ! now reduce down to mod M efficiently + ! really hope the compiler in-lines this + ! + ! x%state(i) = reduce(tmp) + DO WHILE( ISHFT(tmp, -31) .GT. 0 ) + tmp = IAND(tmp,Mask) + ISHFT(tmp, -31) + END DO + IF( tmp .GE. M ) THEN + x%state(i) = tmp - M + ELSE + x%state(i) = tmp + END IF + + END DO + x%need_fill = .FALSE. + END SUBROUTINE fill_state + !}}} + !{{{RECURSIVE SUBROUTINE repack_state(x) + RECURSIVE SUBROUTINE repack_state(x) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Ctype) i + DO i=1,Nstate + x%state(i) = x%state(i+x%pos-(Nstate+1)) + END DO + x%pos = Nstate + 1 + x%need_fill = .TRUE. + END SUBROUTINE repack_state + !}}} + !{{{RECURSIVE SUBROUTINE mod_saxpy(y,a,x) + RECURSIVE SUBROUTINE mod_saxpy(y,a,x) + INTEGER(KIND=Ctype) i + INTEGER(KIND=Sint) y(Nstate) + INTEGER(KIND=Sint) a + INTEGER(KIND=Sint) x(Nstate) + INTEGER(KIND=Dint) tx,ty,ta + + IF( a .EQ. 0_Sint ) RETURN + + ! We use KIND=Dint temporaries here to ensure + ! that we don't overflow in the expression + + ta = a + DO i=1,Nstate + ty=y(i) + tx=x(i) + y(i) = reduce(ty + ta * tx) + END DO + + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE mod_sdot(res,x,y) + RECURSIVE SUBROUTINE mod_sdot(res,x,y) + INTEGER(KIND=Sint), INTENT(OUT) :: res + INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate) , y(Nstate) + INTEGER(KIND=Dint) dx, dy, dtmp + INTEGER(KIND=Sint) tmp + INTEGER(KIND=Ctype) i + + tmp = 0 + DO i=1,Nstate + dx = x(i) + dy = y(i) + dtmp = tmp + tmp = reduce(dtmp + dx * dy) + END DO + res = tmp + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE p_saxpy(y,a) + RECURSIVE SUBROUTINE p_saxpy(y,a) + ! Calculates mod_saxpy(y,a,P) + INTEGER(KIND=Sint), INTENT(INOUT) :: y(Nstate) + INTEGER(KIND=Sint), INTENT(IN) :: a + INTEGER(KIND=Dint) tmp, dy, da + dy = y(1) + da = a + tmp = dy + da*A5 + y(1) = reduce(tmp) + dy = y(5) + da = a + tmp = dy + da*A1 + y(5) = reduce(tmp) + + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE p_sdot(res,n,x) + RECURSIVE SUBROUTINE p_sdot(res,x) + INTEGER(KIND=Sint), INTENT(OUT) :: res + INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate) + INTEGER(KIND=Dint) dx1, dx5, dtmp + dx1 = x(1) + dx5 = x(5) + + dtmp = A1*dx5 + A5*dx1 + res = reduce(dtmp) + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE poly_mult(a,b) + RECURSIVE SUBROUTINE poly_mult(a,b) + INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate) + INTEGER(KIND=Sint), INTENT(IN) :: b(Nstate) + INTEGER(KIND=Sint) tmp((2*Nstate) - 1) + INTEGER(KIND=Ctype) i + + tmp = 0_Sint + + DO i=1,Nstate + CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), b) + END DO + DO i=(2*Nstate)-1, Nstate+1, -1 + CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i)) + END DO + a = tmp(1:Nstate) + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE poly_square(a) + RECURSIVE SUBROUTINE poly_square(a) + INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate) + INTEGER(KIND=Sint) tmp((2*Nstate) - 1) + INTEGER(KIND=Ctype) i + + tmp = 0_Sint + + DO i=1,Nstate + CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), a) + END DO + DO i=(2*Nstate)-1, Nstate+1, -1 + CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i)) + END DO + a = tmp(1:Nstate) + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE poly_power(poly,n) + RECURSIVE SUBROUTINE poly_power(poly,n) + INTEGER(KIND=Sint), INTENT(INOUT) :: poly(Nstate) + INTEGER, INTENT(IN) :: n + INTEGER nn + INTEGER(KIND=Sint) x(Nstate), out(Nstate) + + IF( n .EQ. 0 )THEN + poly = 0_Sint + poly(1) = 1_Sint + RETURN + ELSE IF( n .LT. 0 )THEN + poly = 0_Sint + RETURN + END IF + + out = 0_sint + out(1) = 1_Sint + x = poly + nn = n + DO WHILE( nn .GT. 0 ) + IF( MOD(nn,2) .EQ. 1 )THEN + call poly_mult(out,x) + END IF + nn = nn/2 + IF( nn .GT. 0 )THEN + call poly_square(x) + END IF + END DO + poly = out + + END SUBROUTINE poly_power + !}}} + !}}} + + !{{{RECURSIVE SUBROUTINE Rand_seed( state, n ) + RECURSIVE SUBROUTINE Rand_seed( state, n ) + TYPE(Rand_state), INTENT(OUT) :: state + INTEGER, INTENT(IN) :: n + ! initialise the genrator using a single integer + ! fist initialise to an arbitrary state then boost by a multiple + ! of a long distance + ! + ! state is moved forward by P^n steps + ! we want this to be ok for seperating parallel sequences on MPP machines + ! P is taken as a prime number as this should prevent strong correlations + ! when the generators are operated in tight lockstep. + ! equivalent points on different processors will also be related by a + ! primative polynomial + ! P is 2^48-59 + TYPE(Rand_state) tmp + TYPE(Rand_offset), PARAMETER :: P = & + Rand_offset( (/ 1509238949_Sint ,2146167999_Sint ,1539340803_Sint , & + 1041407428_Sint ,666274987_Sint /) ) + + CALL Rand_load( tmp, (/ 5, 4, 3, 2, 1 /) ) + state = Rand_boost( tmp, Rand_mul_offset(P, n )) + + END SUBROUTINE Rand_seed + !}}} + !{{{RECURSIVE SUBROUTINE Rand_load( state, input ) + RECURSIVE SUBROUTINE Rand_load( state, input ) + TYPE(RAND_state), INTENT(OUT) :: state + INTEGER, INTENT(IN) :: input(Nstate) + + INTEGER(KIND=Ctype) i + + state%state = 0_Sint + DO i=1,Nstate + state%state(i) = MOD(INT(input(i),KIND=Sint),M) + END DO + state%need_fill = .TRUE. + state%pos = Nstate + 1 + END SUBROUTINE Rand_load + !}}} + !{{{RECURSIVE SUBROUTINE Rand_save( save_vec,state ) + RECURSIVE SUBROUTINE Rand_save( save_vec, x ) + INTEGER, INTENT(OUT) :: save_vec(Nstate) + TYPE(RAND_state), INTENT(IN) :: x + + INTEGER(KIND=Ctype) i + DO i=1,Nstate + save_vec(i) = x%state(x%pos-(Nstate+1) + i) + END DO + END SUBROUTINE Rand_save + !}}} + + !{{{RECURSIVE SUBROUTINE Rand_set_offset( offset, n ) + RECURSIVE SUBROUTINE Rand_set_offset( offset, n ) + TYPE(Rand_offset), INTENT(OUT) :: offset + INTEGER, INTENT(IN) :: n + + offset%poly = 0_Sint + IF ( n .GE. 0 ) THEN + offset%poly(2) = 1_Sint + call poly_power(offset%poly,n) + ELSE + ! + ! This is X^-1 + ! + offset%poly(4) = 858869107_Sint + offset%poly(5) = 1840344978_Sint + call poly_power(offset%poly,-n) + END IF + END SUBROUTINE Rand_set_offset + !}}} + !{{{TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b ) + TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b ) + TYPE(Rand_offset), INTENT(IN) :: a, b + + Rand_add_offset = a + CALL poly_mult(Rand_add_offset%poly,b%poly) + RETURN + END FUNCTION Rand_add_offset + !}}} + !{{{TYPE(Rand_offset) RECURSIVE FUNCTION Rand_mul_offset( a, n ) + TYPE(Rand_offset) RECURSIVE FUNCTION Rand_mul_offset( a, n ) + TYPE(Rand_offset), INTENT(IN) :: a + INTEGER, INTENT(IN) :: n + Rand_mul_offset = a + CALL poly_power(Rand_mul_offset%poly,n) + RETURN + END FUNCTION Rand_mul_offset + !}}} + !{{{RECURSIVE FUNCTION Rand_boost(x, offset) + RECURSIVE FUNCTION Rand_boost(x, offset) + TYPE(Rand_state) Rand_boost + TYPE(Rand_state), INTENT(IN) :: x + TYPE(Rand_offset), INTENT(IN) :: offset + INTEGER(KIND=Sint) tmp(2*Nstate-1), res(Nstate) + INTEGER(KIND=Ctype) i + + DO i=1,Nstate + tmp(i) = x%state(x%pos-(Nstate+1) + i) + END DO + tmp(Nstate+1:) = 0_Sint + + DO i=1,Nstate-1 + call P_SDOT(tmp(i+Nstate),tmp(i:Nstate+i-1)) + END DO + + DO i=1,Nstate + call mod_sdot(res(i),offset%poly,tmp(i:Nstate+i-1)) + END DO + Rand_boost%state = 0_Sint + DO i=1,Nstate + Rand_boost%state(i) = res(i) + END DO + Rand_boost%need_fill = .TRUE. + Rand_boost%pos = Nstate + 1 + + END FUNCTION Rand_boost + !}}} + !{{{RECURSIVE FUNCTION Rand_step(x, n) + RECURSIVE FUNCTION Rand_step(x, n) + TYPE(Rand_state) Rand_step + TYPE(RAND_state), INTENT(IN) :: x + INTEGER, INTENT(IN) :: n + TYPE(Rand_offset) tmp + + CALL Rand_set_offset(tmp,n) + Rand_step=Rand_boost(x,tmp) + + END FUNCTION + !}}} + + !{{{RECURSIVE FUNCTION Rand_sint(x) + RECURSIVE FUNCTION Rand_sint(x) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Sint) Rand_sint + IF( x%pos .GT. Nstore )THEN + CALL repack_state(x) + END IF + IF( x%need_fill ) CALL fill_state(x) + Rand_sint = x%state(x%pos) + x%pos = x%pos + 1 + RETURN + END FUNCTION Rand_sint + !}}} + !{{{RECURSIVE SUBROUTINE Rand_sint_vec(iv,x) + RECURSIVE SUBROUTINE Rand_sint_vec(iv,x) + INTEGER(KIND=Sint), INTENT(OUT) :: iv(:) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER left,start, chunk, i + + start=1 + left=SIZE(iv) + DO WHILE( left .GT. 0 ) + IF( x%pos .GT. Nstore )THEN + CALL repack_state(x) + END IF + IF( x%need_fill ) CALL fill_state(x) + + chunk = MIN(left,Nstore-x%pos+1) + DO i=0,chunk-1 + iv(start+i) = x%state(x%pos+i) + END DO + start = start + chunk + x%pos = x%pos + chunk + left = left - chunk + END DO + + RETURN + END SUBROUTINE Rand_sint_vec + !}}} + + +END MODULE Rand_int + +!}}} + +!{{{Rand (use Rand_int to make random reals) + +MODULE Rand + USE Rand_int + IMPLICIT NONE + +!{{{Parameters + + INTEGER, PARAMETER :: RAND_kind1 = SELECTED_REAL_KIND(10) + INTEGER, PARAMETER :: RAND_kind2 = SELECTED_REAL_KIND(6) + + INTEGER, PARAMETER, PRIVATE :: Max_block=100 + INTEGER(KIND=Sint), PRIVATE, PARAMETER :: M = 2147483647 + REAL(KIND=RAND_kind1), PRIVATE, PARAMETER :: INVMP1_1 = ( 1.0_RAND_kind1 / 2147483647.0_RAND_kind1 ) + REAL(KIND=RAND_kind2), PRIVATE, PARAMETER :: INVMP1_2 = ( 1.0_RAND_kind2 / 2147483647.0_RAND_kind2 ) + + LOGICAL, PARAMETER :: Can_step = Can_step_int + LOGICAL, PARAMETER :: Can_reverse = Can_reverse_int + +!}}} + PUBLIC Rand_real + + +INTERFACE Rand_real + MODULE PROCEDURE Rand_real1 + MODULE PROCEDURE Rand_real2 + MODULE PROCEDURE Rand_real_vec1 + MODULE PROCEDURE Rand_real_vec2 +END INTERFACE + + +CONTAINS + + !{{{RECURSIVE SUBROUTINE Rand_real1(y,x) + RECURSIVE SUBROUTINE Rand_real1(y,x) + REAL(KIND=RAND_kind1), INTENT(OUT) :: y + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Sint) Z + + Z = Rand_sint(x) + IF (Z .EQ. 0) Z = M + + y = ((Z-0.5d0)*INVMP1_1) + RETURN + END SUBROUTINE Rand_real1 + !}}} + !{{{RECURSIVE SUBROUTINE Rand_real2(y,x) + RECURSIVE SUBROUTINE Rand_real2(y,x) + REAL(KIND=RAND_kind2), INTENT(OUT) :: y + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Sint) Z + + Z = Rand_sint(x) + IF (Z .EQ. 0) Z = M + + y = ((Z-0.5d0)*INVMP1_1) ! generate in double and truncate. + RETURN + END SUBROUTINE Rand_real2 + !}}} + + !{{{RECURSIVE SUBROUTINE Rand_real_vec1(rv,x) + RECURSIVE SUBROUTINE Rand_real_vec1(rv,x) + TYPE(RAND_state), INTENT(INOUT) :: x + REAL(KIND=RAND_kind1) rv(:) + INTEGER left,start, chunk, i + INTEGER(KIND=Sint) Z + INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block)) + + start=0 + left=SIZE(rv) + DO WHILE( left .GT. 0 ) + chunk = MIN(left,Max_block) + CALL Rand_sint_vec(temp(1:chunk),x) + DO i=1,chunk + Z = temp(i) + IF (Z .EQ. 0) Z = M + rv(start+i) = (Z-0.5d0)*INVMP1_1 + END DO + start = start + chunk + left = left - chunk + END DO + + RETURN + END SUBROUTINE Rand_real_vec1 + !}}} + !{{{RECURSIVE SUBROUTINE Rand_real_vec2(rv,x) + RECURSIVE SUBROUTINE Rand_real_vec2(rv,x) + TYPE(RAND_state), INTENT(INOUT) :: x + REAL(KIND=RAND_kind2) rv(:) + INTEGER left,start, chunk, i + INTEGER(KIND=Sint) Z + INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block)) + + start=0 + left=SIZE(rv) + DO WHILE( left .GT. 0 ) + chunk = MIN(left,Max_block) + CALL Rand_sint_vec(temp(1:chunk),x) + DO i=1,chunk + Z = temp(i) + IF (Z .EQ. 0) Z = M + rv(start+i) = (Z-0.5d0)*INVMP1_2 + END DO + start = start + chunk + left = left - chunk + END DO + + RETURN + END SUBROUTINE Rand_real_vec2 + !}}} +END MODULE Rand + +!}}} + +!{{{test program +! PROGRAM test_random +! use Rand +! TYPE(RAND_state) x +! REAL y +! CALL Rand_load(x,(/5,4,3,2,1/)) +! DO I=0,10 +! CALL Rand_real(y,x) +! WRITE(*,10) I,y +! END DO +! +!10 FORMAT(I10,E25.16) +! +! END + +! 0 0.5024326127022505E-01 +! 1 0.8260946767404675E-01 +! 2 0.2123264316469431E-01 +! 3 0.6926658791489899E+00 +! 4 0.2076155943796039E+00 +! 5 0.4327449947595596E-01 +! 6 0.2204052871093154E-01 +! 7 0.1288446951657534E+00 +! 8 0.4859915426932275E+00 +! 9 0.5721384193748236E-01 +! 10 0.7996825082227588E+00 +! + + +!}}} + diff --git a/external/panphasia/panphasia_routines.f b/external/panphasia/panphasia_routines.f new file mode 100644 index 0000000..2e1bfbd --- /dev/null +++ b/external/panphasia/panphasia_routines.f @@ -0,0 +1,3334 @@ +c=====================================================================================c +c +c The code below was written by: Adrian Jenkins, +c Institute for Computational Cosmology +c Department of Physics +c South Road +c Durham, DH1 3LE +c United Kingdom +c +c This file is part of the software made public in +c Jenkins and Booth 2013 - arXiv:1306.XXXX +c +c The software computes the Panphasia Gaussian white noise field +c realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX +c +c +c +c This software is free, subject to a agreeing licence conditions: +c +c +c (i) you will publish the phase descriptors and reference Jenkins (13) +c for any new simulations that use Panphasia phases. You will pass on this +c condition to others for any software or data you make available publically +c or privately that makes use of Panphasia. +c +c (ii) that you will ensure any publications using results derived from Panphasia +c will be submitted as a final version to arXiv prior to or coincident with +c publication in a journal. +c +c (iii) that you report any bugs in this software as soon as confirmed to +c A.R.Jenkins@durham.ac.uk +c +c (iv) that you understand that this software comes with no warranty and that is +c your responsibility to ensure that it is suitable for the purpose that +c you intend. +c +c=====================================================================================c + +c===================================================================================== +c List of subroutines and arguments. Each of these is documented in c +c arXiV/1306.XXXX c +c c +c Adrian Jenkins, 24/6/2013. c +c------------------------------------------------------------------------------------- +c Version 1.000 +c=================================================================================== + + module pan_state + use Rand + implicit none + integer maxdim_, maxlev_, maxpow_ + parameter (maxdim_=60,maxlev_=50, maxpow_ = 3*maxdim_) + integer nmulti_ + parameter (nmulti_=64) + integer range_max + parameter(range_max=10000) + integer indmin,indmax + parameter (indmin=-1, indmax=60) + + + type state_data + integer base_state(5), base_lev_start(5,0:maxdim_) + TYPE(Rand_offset) :: poweroffset(0:maxpow_) + TYPE(Rand_offset) :: superjump + TYPE(Rand_state) :: current_state(-1:maxpow_) + + integer layer_min,layer_max,indep_field + +! This module stores information needed to access the part of Panphasia +! selected by a particular descriptor. + integer*8 xorigin_store(0:1,0:1,0:1) + integer*8 yorigin_store(0:1,0:1,0:1) + integer*8 zorigin_store(0:1,0:1,0:1) + + integer*4 lev_common + integer*4 layer_min_store,layer_max_store + + integer*8 ix_abs_store,iy_abs_store,iz_abs_store + integer*8 ix_per_store,iy_per_store,iz_per_store + integer*8 ix_rel_store,iy_rel_store,iz_rel_store + + real*8 exp_coeffs(8,0:7,-1:maxdim_) + integer*8 xcursor(0:maxdim_),ycursor(0:maxdim_),zcursor(0:maxdim_) + +c Local box parameters + + integer*4 ixshift(0:1,0:1,0:1) + integer*4 iyshift(0:1,0:1,0:1) + integer*4 izshift(0:1,0:1,0:1) + + +c more state variables + real*8 cell_data(9,0:7) + integer*4 ixh_last,iyh_last,izh_last + integer init + + integer return_cell_props_init + integer reset_lecuyer_state_init + integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax) + + + + end type state_data + + + +c Switch for enabling custom spherical function +c Set isub_spherical_function = 1 to turn on the spherical function + integer*4 isub_spherical_function + parameter (isub_spherical_function=0) + + end module pan_state + + +c================================================================================ +c Begin white noise routines +c================================================================================ + recursive subroutine start_panphasia(ldata,descriptor,ngrid,VERBOSE) + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + character*100 descriptor + integer ngrid + integer VERBOSE + + + + integer*4 wn_level_base,i_base,i_base_y,i_base_z + integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand + character*20 name + + integer ratio + integer lextra + integer level_p + + + integer*8 ix_abs,iy_abs,iz_abs + integer*8 ix_per,iy_per,iz_per + integer*8 ix_rel,iy_rel,iz_rel + + !integer layer_min,layer_max,indep_field + !common /oct_range/ layer_min,layer_max,indep_field + + call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base, + & i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,name) + + + lextra = (log10(real(ngrid)/real(i_base))+0.001)/log10(2.0) + ratio = 2**lextra + + if (ratio*i_base.ne.ngrid) + &stop 'Value of ngrid inconsistent with dim of region in Panphasia' + + level_p = wn_level_base + lextra + + ix_abs = ishft(i_xorigin_base,lextra) + iy_abs = ishft(i_yorigin_base,lextra) + iz_abs = ishft(i_zorigin_base,lextra) + + ix_per = i_base*ratio + iy_per = i_base*ratio + iz_per = i_base*ratio + +c Set the refinement position at the origin. + + ix_rel = 0 + iy_rel = 0 + iz_rel = 0 + + call set_phases_and_rel_origin(ldata,descriptor,level_p,ix_rel,iy_rel,iz_rel,VERBOSE) + +c Finally set the octree functions required for making cosmological +c initial conditions. These are passed using a common block. + + ldata%layer_min = 0 + ldata%layer_max = level_p + ldata%indep_field = 1 + + end +c================================================================================= + recursive subroutine set_phases_and_rel_origin(ldata,descriptor,lev,ix_rel,iy_rel,iz_rel,VERBOSE) + use pan_state + !use descriptor_phases + implicit none + type(state_data), intent(inout) :: ldata + character*100 descriptor + integer lev + integer*8 ix_abs,iy_abs,iz_abs + integer*8 ix_per,iy_per,iz_per + integer*8 ix_rel,iy_rel,iz_rel + integer*8 xorigin,yorigin,zorigin + + integer VERBOSE + integer MYID + integer*8 maxco + integer i + integer px,py,pz + + integer lnblnk + integer*8 mconst + parameter(mconst = 2147483647_Dint) + + integer*4 wn_level_base,i_base,i_base_y,i_base_z + integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand + integer lextra,ratio + character*20 phase_name + +c----------------------------------------------------------------------------------------------- + + call initialise_panphasia(ldata) + + call validate_descriptor(ldata, descriptor,-1,check_rand) + + call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base, + & i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,phase_name) + lextra = lev - wn_level_base + ratio = 2**lextra + + ix_abs = ishft(i_xorigin_base,lextra) + iy_abs = ishft(i_yorigin_base,lextra) + iz_abs = ishft(i_zorigin_base,lextra) + + ix_per = i_base*ratio + iy_per = i_base*ratio + iz_per = i_base*ratio + +c------------------------------------------------------------------------- +c Error checking +c------------------------------------------------------------------------- + if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (1)' + + + maxco = 2_dint**lev + + if (ix_abs.lt.0) stop 'Error: ix_abs negative (1)' + if (iy_abs.lt.0) stop 'Error: iy_abs negative (1)' + if (iz_abs.lt.0) stop 'Error: iz_abs negative (1)' + + if (ix_rel.lt.0) stop 'Error: ix_rel negative (1)' + if (iy_rel.lt.0) stop 'Error: iy_rel negative (1)' + if (iz_rel.lt.0) stop 'Error: iz_rel negative (1)' + + + if (ix_abs+ix_rel.ge.maxco) + & stop 'Error: ix_abs + ix_rel out of range. (1)' + if (iy_abs+iy_rel.ge.maxco) + & stop 'Error: iy_abs + iy_rel out of range. (1)' + if (iz_abs+iz_rel.ge.maxco) + & stop 'Error: iz_abs + iz_rel out of range. (1)' + +c---------------------------------------------------------------------------------------- +c To allow the local box to wrap around, if needed, define a series of eight +c 'origins'. For many purposes (ix,iy,iz) = (0,0,0) is the only origin needed. + + + do px=0,1 + do py=0,1 + do pz=0,1 + + xorigin = max(0,( ix_abs + ix_rel - px*ix_per )/2) + yorigin = max(0,( iy_abs + iy_rel - py*iy_per )/2) + zorigin = max(0,( iz_abs + iz_rel - pz*iz_per )/2) + + ldata%ixshift(px,py,pz) = max(0, ix_abs + ix_rel -px*ix_per) - 2*xorigin + ldata%iyshift(px,py,pz) = max(0, iy_abs + iy_rel -py*iy_per) - 2*yorigin + ldata%izshift(px,py,pz) = max(0, iz_abs + iz_rel -pz*iz_per) - 2*zorigin + + +c Store box details: store the positions at level lev-1 + + + ldata%xorigin_store(px,py,pz) = xorigin + ldata%yorigin_store(px,py,pz) = yorigin + ldata%zorigin_store(px,py,pz) = zorigin + + enddo + enddo + enddo + + ldata%lev_common = lev + + + ldata%ix_abs_store = ix_abs + ldata%iy_abs_store = iy_abs + ldata%iz_abs_store = iz_abs + + ldata%ix_per_store = ix_per + ldata%iy_per_store = iy_per + ldata%iz_per_store = iz_per + + ldata%ix_rel_store = ix_rel + ldata%iy_rel_store = iy_rel + ldata%iz_rel_store = iz_rel + + +c Reset all cursor values to negative numbers. + + do i=0,maxdim_ + ldata%xcursor(i) = -999 + ldata%ycursor(i) = -999 + ldata%zcursor(i) = -999 + enddo + if (VERBOSE.gt.1) then + if (MYID.lt.1) then + print*,'----------------------------------------------------------' + print*,'Successfully initialised Panphasia box at level ',lev + write (6,105) ix_abs,iy_abs,iz_abs + write (6,106) ix_rel,iy_rel,iz_rel + write (6,107) ix_per,iy_per,iz_per + write (6,*) 'Phases used: ',descriptor(1:lnblnk(descriptor)) + print*,'----------------------------------------------------------' + endif + endif + 105 format(' Abs origin: (',i12,',',i12,',',i12,')') + 106 format(' Rel origin: (',i12,',',i12,',',i12,')') + 107 format(' Periods : (',i12,',',i12,',',i12,')') + end +c================================================================================ + recursive subroutine initialise_panphasia( ldata ) + use Rand + use pan_state + implicit none + + type(state_data), intent(inout) :: ldata + + TYPE(Rand_state) :: state + TYPE(Rand_offset) :: offset + integer ninitialise + parameter (ninitialise=218) + integer i + real*8 rand_num + + + call Rand_seed(state,ninitialise) + + call Rand_save(ldata%base_state,state) + + call Rand_set_offset(offset,1) + +c Calculate offsets of powers of 2 times nmulti +c + + do i=0,maxpow_ + ldata%poweroffset(i) = Rand_mul_offset(offset,nmulti_) + offset = Rand_mul_offset(offset,2) + enddo + + +c Compute the base state for each level. + + call Rand_load(state,ldata%base_state) + state = Rand_step(state,8) + + do i=0,maxdim_ + call Rand_save(ldata%base_lev_start(1,i),state) + state = Rand_boost(state,ldata%poweroffset(3*i)) + enddo + +c Set superjump to value 2**137 - used occasionally in computing Gaussian variables +c when the value of the returned random number is less an 10-6. + + call Rand_set_offset(ldata%superjump,1) + + do i=1,137 + ldata%superjump = Rand_mul_offset(ldata%superjump,2) + enddo + + +c Run time test to see if one particular value can be recovered. + + call Rand_load(state,ldata%base_lev_start(1,34)) + call Rand_real(rand_num,state) + + if (abs(rand_num- 0.828481889948473d0).gt.1.e-14) then + print*,'Error in initialisation!' + print*,'Rand_num = ',rand_num + print*,'Target value = ', 0.828481889948473d0 + stop + endif + return + end +c================================================================================= + recursive subroutine panphasia_cell_properties(ldata,ixcell,iycell,izcell,cell_prop) + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + !integer layer_min,layer_max,indep_field + !common /oct_range/ layer_min,layer_max,indep_field + integer*4 ixcell,iycell,izcell + real*8 cell_prop(9) + + call adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,ldata%layer_min, + & ldata%layer_max,ldata%indep_field,cell_prop) + return + end +c================================================================================= + recursive subroutine adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,layer_min, + & layer_max,indep_field,cell_prop) + use pan_state + !use descriptor_phases + implicit none + + type(state_data), intent(inout) :: ldata + + integer*4 lev + integer*4 ixcell,iycell,izcell + integer layer_min,layer_max,indep_field + real*8 cell_prop(9) +c real*8 cell_data(9,0:7) + integer*4 j,l,lx,ly,lz + integer*4 px,py,pz + +c integer*4 ixh_last,iyh_last,izh_last + +c integer init +c data init/0/ +c save init,cell_data,ixh_last,iyh_last,izh_last ! Keep internal state + + integer*4 ixh,iyh,izh + + lev = ldata%lev_common + +c------- Error checking ----------------------------- + + if (layer_min.gt.layer_max) then + + if (layer_min-layer_max.eq.1) then ! Not necessarily bad. No octree basis functions + do j=1,9 ! required at this level and position. + cell_prop(j) = 0.0d0 ! Set returned cell_prop data to zero. + enddo + return + endif + + print*,'Warning: layer_min.gt.layer_max!' + print*,'layer_min = ',layer_min + print*,'layer_max = ',layer_max + print*,'ixcell,iycell,izcell',ixcell,iycell,izcell + + call flush(6) + stop 'Error: layer_min.gt.layer_max' + endif + + if (layer_max.gt.ldata%lev_common) then + print*,'lev_common = ',ldata%lev_common + print*,'layer_min = ',layer_min + print*,'layer_max = ',layer_max + stop 'Error: layer_max.gt.lev_common' + endif + if ((indep_field.lt.-1).or.(indep_field.gt.1)) + & stop 'Error: indep_field out of range' + +c---------------------------------------------------- +c Check which 'origin' to use. + + px = 0 + py = 0 + pz = 0 + + if (ldata%ix_rel_store+ixcell.ge.ldata%ix_per_store) px = 1 ! Crossed x-periodic bndy + if (ldata%iy_rel_store+iycell.ge.ldata%iy_per_store) py = 1 ! Crossed y-periodic bndy + if (ldata%iz_rel_store+izcell.ge.ldata%iz_per_store) pz = 1 ! Crossed z-periodic bndy +c---------------------------------------------------- + + + ixh = (ixcell+ldata%ixshift(px,py,pz) )/2 + iyh = (iycell+ldata%iyshift(px,py,pz) )/2 + izh = (izcell+ldata%izshift(px,py,pz) )/2 + + lx = mod(ixcell+ldata%ixshift(px,py,pz) ,2) + ly = mod(iycell+ldata%iyshift(px,py,pz) ,2) + lz = mod(izcell+ldata%izshift(px,py,pz) ,2) + + + l = 4*lx + 2*ly + lz ! Determine which cell is required + +cc------------------ If no new evalation is needed skip assignment ----- + if ((ldata%init.eq.1).and.(ixh.eq.ldata%ixh_last).and.(iyh.eq.ldata%iyh_last).and. + & (izh.eq.ldata%izh_last).and.(layer_min.eq.ldata%layer_min_store).and. + & (layer_max.eq.ldata%layer_max_store)) goto 24 +cc----------------------------------------------------------------------------- + + + call return_cell_props(ldata,lev,ixh,iyh,izh,px,py,pz,layer_min, + & layer_max,indep_field,ldata%cell_data) + +c Remember previous values. + + ldata%ixh_last = ixh + ldata%iyh_last = iyh + ldata%izh_last = izh + + + 24 continue + + + do j=1,9 + cell_prop(j) = ldata%cell_data(j,l) ! Copy the required data + enddo + + if (ldata%init.eq.0) ldata%init=1 + + return + end +c================================================================================= + recursive subroutine return_cell_props(ldata,lev_input,ix_half,iy_half,iz_half, + & px,py,pz,layer_min,layer_max,indep_field,cell_data) + use Rand + use pan_state + !use descriptor_phases + implicit none + type(state_data), intent(inout) :: ldata + integer lev_input,ix_half,iy_half,iz_half,px,py,pz + integer layer_min,layer_max,indep_field + real*8 cell_data(9,0:7) + + real*8 garray(0:63) + integer lev + integer*8 xarray,yarray,zarray + + integer i,istart,icell_name + + +c integer init +c data init/0/ +c save init + + + +c-------------------------------------------------------- +c--------------------------- Initialise level -1 -------- +c-------------------------------------------------------- + + if (ldata%return_cell_props_init.eq.0) then ! First time called. Set up the Legendre coefficients + ldata%return_cell_props_init = 1 ! for the root cell. This is the first term on the + call Rand_load(ldata%current_state(-1),ldata%base_state) ! right hand side of the equation in appendix C of + call return_gaussian_array(ldata,-1,8,garray) ! Jenkins 2013 that defines PANPHASIA. + ldata%exp_coeffs(1,0,-1) = garray(0) + ldata%exp_coeffs(2,0,-1) = garray(1) + ldata%exp_coeffs(3,0,-1) = garray(2) + ldata%exp_coeffs(4,0,-1) = garray(3) + ldata%exp_coeffs(5,0,-1) = garray(4) + ldata%exp_coeffs(6,0,-1) = garray(5) + ldata%exp_coeffs(7,0,-1) = garray(6) + ldata%exp_coeffs(8,0,-1) = garray(7) + + ldata%layer_min_store = layer_min + ldata%layer_max_store = layer_max + + endif + +c-------------------------------------------------------- +c---------------------------- Error checking ------------ +c-------------------------------------------------------- + + lev = lev_input-1 + + if (lev_input.ne.ldata%lev_common) stop 'Box initialised at a different level !' + if (ix_half.lt.0) then + print*,'ix_half negative',ix_half + stop 'ix_half out of range!' + endif + if (iy_half.lt.0) stop 'iy_half out of range!' + if (iz_half.lt.0) then + print*,'iz_half negative',iz_half + stop 'iz_half out of range!' + endif + + + xarray = ldata%xorigin_store(px,py,pz) + ix_half + yarray = ldata%yorigin_store(px,py,pz) + iy_half + zarray = ldata%zorigin_store(px,py,pz) + iz_half + + +c If layer_max or layer_min have changed, rebuild from the start and reset the +c recorded value of layer_max and layer_min + + if ((layer_max.ne.ldata%layer_max_store).or.(layer_min.ne.ldata%layer_min_store)) then + + if (layer_min.gt.layer_max) stop 'layer_min > layer_max : 2' + + istart = max(1,layer_min-1) + + ldata%layer_max_store = layer_max + ldata%layer_min_store = layer_min + + goto 10 + + endif + + + if ((xarray.eq.ldata%xcursor(lev)).and.(yarray.eq.ldata%ycursor(lev)).and.(zarray.eq.ldata%zcursor(lev))) return ! Nothing to do. + +c=========================================================================================================== +c------------- First determine which levels need to be (re)computed +c=========================================================================================================== + + istart = 0 + do i=lev-1,0,-1 + if ((ishft(xarray,i-lev).eq.ldata%xcursor(i)).and.(ishft(yarray,i-lev).eq.ldata%ycursor(i)).and. + & (ishft(zarray,i-lev).eq.ldata%zcursor(i))) then + istart = i+1 + goto 10 + endif + enddo + + 10 continue + + +c==================================================================================== +c------------- Now compute each level as required and update (x,y,z) cursor variables +c==================================================================================== + + do i=istart,lev + + icell_name = 0 + + ldata%xcursor(i) = ishft(xarray,i-lev) + ldata%ycursor(i) = ishft(yarray,i-lev) + ldata%zcursor(i) = ishft(zarray,i-lev) + + if (btest(ldata%xcursor(i),0)) icell_name = icell_name + 4 + if (btest(ldata%ycursor(i),0)) icell_name = icell_name + 2 + if (btest(ldata%zcursor(i),0)) icell_name = icell_name + 1 + + call reset_lecuyer_state(ldata,i,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i)) + + if (isub_spherical_function.ne.1) then + call return_gaussian_array(ldata,i,64,garray) + else + call return_oct_sf_expansion(ldata,i,lev,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i), + & 64,garray) + endif + + + call evaluate_panphasia(ldata,i,maxdim_,garray,layer_min, + & layer_max, indep_field, icell_name,cell_data,ldata%exp_coeffs) + + enddo + return + end +c================================================================================= + recursive subroutine evaluate_panphasia(ldata,nlev,maxdim,g, + & layer_min,layer_max,indep_field,icell_name,cell_data,leg_coeff) + use pan_state + implicit none +c--------------------------------------------------------------------------------- +c This subroutine calculates the Legendre block coefficients for the eight child +c cells of an octree cell. +c +c----------------- Define subroutine arguments ----------------------------------- + type(state_data), intent(inout) :: ldata + integer nlev,maxdim + integer layer_min,layer_max,indep_field + integer icell_name + real*8 leg_coeff(0:7,0:7,-1:maxdim),cell_data(0:8,0:7) + real*8 g(*) + +c----------------- Define constants using notation from appendix A of Jenkins 2013 + + real*8 a1,a2,b1,b2,b3,c1,c2,c3,c4 + + parameter(a1 = 0.5d0*sqrt(3.0d0), a2 = 0.5d0) + + parameter(b1 = 0.75d0, b2 = 0.25d0*sqrt(3.0d0)) + parameter(b3 = 0.25d0) + + parameter(c1 = sqrt(27.0d0/64.0d0), c2 = 0.375d0) + parameter(c3 = sqrt(3.0d0/64.0d0), c4 = 0.125d0) + +c----------------- Define octree variables -------------------------------- + + real*8 coeff_p000, coeff_p001, coeff_p010, coeff_p011 + real*8 coeff_p100, coeff_p101, coeff_p110, coeff_p111 + + real*8 positive_octant_lc(0:7,0:1,0:1,0:1),temp_value(0:7,0:7) + integer i,j,ix,iy,iz + integer icx,icy,icz + integer iox,ioy,ioz + real*8 parity,isig + real*8 usually_rooteighth_factor +c-------------------------------------------------------------------------- + +c------------- Set the Legendre block coefficients for the parent cell +c itself. These are either inherited from the octree above +c or set to zero depending on which levels of the octree +c have been selected to be populated with the octree +c basis functions. +c--------------------------------------------------------------------------- + if (nlev.ge.layer_min) then + coeff_p000 = leg_coeff(0,icell_name,nlev-1) + coeff_p001 = leg_coeff(1,icell_name,nlev-1) + coeff_p010 = leg_coeff(2,icell_name,nlev-1) + coeff_p011 = leg_coeff(3,icell_name,nlev-1) + coeff_p100 = leg_coeff(4,icell_name,nlev-1) + coeff_p101 = leg_coeff(5,icell_name,nlev-1) + coeff_p110 = leg_coeff(6,icell_name,nlev-1) + coeff_p111 = leg_coeff(7,icell_name,nlev-1) + else + coeff_p000 = 0.0d0 + coeff_p001 = 0.0d0 + coeff_p010 = 0.0d0 + coeff_p011 = 0.0d0 + coeff_p100 = 0.0d0 + coeff_p101 = 0.0d0 + coeff_p110 = 0.0d0 + coeff_p111 = 0.0d0 + endif + +c Apply layer_max and indep_field inputs --------------------------------- + + if (indep_field.ne.-1) then + usually_rooteighth_factor = sqrt(0.125d0) + else + usually_rooteighth_factor = 0.0d0 ! This option returns only the indep field. + endif ! For use in testing only. + + if (nlev.ge.layer_max) then + do i=1,56 + g(i) = 0.0d0 ! Set octree coefficients to zero as not required. + enddo + endif + + if (indep_field.eq.0) then ! Set the independent field to zero as not required. + do i=57,64 + g(i) = 0.0d0 + enddo + endif +c----------------------------------------------------------------------------- +c +c +c The calculations immediately below evalute the eight Legendre block coefficients for the +c child cell that is furthest from the absolute coordiate origin of the octree - we call +c this the positive octant cell. +c +c The coefficients are given by a set of matrix equations which combine the +c coefficients of the Legendre basis functions of the parent cell itself, with +c the coefficients from the octree basis functions that occupy the +c parent cell. +c +c The Legendre basis function coefficients of the parent cell are stored in +c the variables, coeff_p000 - coeff_p111 and are initialise above. +c +c The coefficients of the octree basis functions are determined by the +c first 56 entries of the array g, which is passed down into this +c subroutine. +c +c These two sources of information are combined using a set of linear equations. +c The coefficients of these linear equations are taken from the inverses or +c equivalently transposes of the matrices given in appendix A of Jenkins 2013. +c The matrices in appendix A define the PANPHASIA octree basis functions +c in terms of Legendre blocks. +c +c All of the Legendre block functions of the parent cell, and the octree basis +c functions of the parent cell share one of eight distinct symmetries with respect to +c reflection about the x1=0,x2=0,x3=0 planes (where the origin is taken as the parent +c cell centre and x1,x2,x3 are parallel to the cell edges). +c +c Each function has either purely reflectional symmetry (even parity) or +c reflectional symmetry with a sign change (odd parity) about each of the three principal +c planes through the cell centre. There are therefore 8 parity types. We can label each +c parity type with a binary triplet. So 000 is pure reflectional symmetry about +c all of the principal planes. +c +c In the code below the parent cell Legendre block functions, and octree functions are +c organised into eight groups each with eight members. Each group has a common +c parity type. +c +c We keep the contributions of each parity type to each of the eight Legendre basis +c functions occupying the positive octant cell separate. Once they have all been +c computed, we can apply the different symmetry operations and determine the +c Legendre block basis functions for all eight child cells at the same time. +c--------------------------------------------------------------------------------------- +c 000 parity + + positive_octant_lc(0, 0,0,0) = 1.0d0*coeff_p000 + positive_octant_lc(1, 0,0,0) = -1.0d0*g(1) + positive_octant_lc(2, 0,0,0) = -1.0d0*g(2) + positive_octant_lc(3, 0,0,0) = 1.0d0*g(3) + positive_octant_lc(4, 0,0,0) = -1.0d0*g(4) + positive_octant_lc(5, 0,0,0) = 1.0d0*g(5) + positive_octant_lc(6, 0,0,0) = 1.0d0*g(6) + positive_octant_lc(7, 0,0,0) = -1.0d0*g(7) + +c 100 parity + + positive_octant_lc(0, 1,0,0) = a1*coeff_p100 - a2*g(8) + positive_octant_lc(1, 1,0,0) = g(9) + positive_octant_lc(2, 1,0,0) = g(10) + positive_octant_lc(3, 1,0,0) = -g(11) + positive_octant_lc(4, 1,0,0) = a2*coeff_p100 + a1*g(8) + positive_octant_lc(5, 1,0,0) = -g(12) + positive_octant_lc(6, 1,0,0) = -g(13) + positive_octant_lc(7, 1,0,0) = g(14) + +c 010 parity + + positive_octant_lc(0, 0,1,0) = a1*coeff_p010 - a2*g(15) + positive_octant_lc(1, 0,1,0) = g(16) + positive_octant_lc(2, 0,1,0) = a2*coeff_p010 + a1*g(15) + positive_octant_lc(3, 0,1,0) = -g(17) + positive_octant_lc(4, 0,1,0) = g(18) + positive_octant_lc(5, 0,1,0) = -g(19) + positive_octant_lc(6, 0,1,0) = -g(20) + positive_octant_lc(7, 0,1,0) = g(21) + + +c 001 parity + + positive_octant_lc(0, 0,0,1) = a1*coeff_p001 - a2*g(22) + positive_octant_lc(1, 0,0,1) = a2*coeff_p001 + a1*g(22) + positive_octant_lc(2, 0,0,1) = g(23) + positive_octant_lc(3, 0,0,1) = -g(24) + positive_octant_lc(4, 0,0,1) = g(25) + positive_octant_lc(5, 0,0,1) = -g(26) + positive_octant_lc(6, 0,0,1) = -g(27) + positive_octant_lc(7, 0,0,1) = g(28) + +c 110 parity + + positive_octant_lc(0, 1,1,0) = b1*coeff_p110 - b2*g(29) + b3*g(30) - b2*g(31) + positive_octant_lc(1, 1,1,0) = -g(32) + positive_octant_lc(2, 1,1,0) = b2*coeff_p110 - b3*g(29) - b2*g(30) + b1*g(31) + positive_octant_lc(3, 1,1,0) = g(33) + positive_octant_lc(4, 1,1,0) = b2*coeff_p110 + b1*g(29) + b2*g(30) + b3*g(31) + positive_octant_lc(5, 1,1,0) = g(34) + positive_octant_lc(6, 1,1,0) = b3*coeff_p110 + b2*g(29) - b1*g(30) - b2*g(31) + positive_octant_lc(7, 1,1,0) = -g(35) + + +c 011 parity + + positive_octant_lc(0, 0,1,1) = b1*coeff_p011 - b2*g(36) + b3*g(37) - b2*g(38) + positive_octant_lc(1, 0,1,1) = b2*coeff_p011 - b3*g(36) - b2*g(37) + b1*g(38) + positive_octant_lc(2, 0,1,1) = b2*coeff_p011 + b1*g(36) + b2*g(37) + b3*g(38) + positive_octant_lc(3, 0,1,1) = b3*coeff_p011 + b2*g(36) - b1*g(37) - b2*g(38) + positive_octant_lc(4, 0,1,1) = -g(39) + positive_octant_lc(5, 0,1,1) = g(40) + positive_octant_lc(6, 0,1,1) = g(41) + positive_octant_lc(7, 0,1,1) = -g(42) + +c 101 parity + + positive_octant_lc(0, 1,0,1) = b1*coeff_p101 - b2*g(43) + b3*g(44) - b2*g(45) + positive_octant_lc(1, 1,0,1) = b2*coeff_p101 - b3*g(43) - b2*g(44) + b1*g(45) + positive_octant_lc(2, 1,0,1) = -g(46) + positive_octant_lc(3, 1,0,1) = g(47) + positive_octant_lc(4, 1,0,1) = b2*coeff_p101 + b1*g(43) + b2*g(44) + b3*g(45) + positive_octant_lc(5, 1,0,1) = b3*coeff_p101 + b2*g(43) - b1*g(44) - b2*g(45) + positive_octant_lc(6, 1,0,1) = g(48) + positive_octant_lc(7, 1,0,1) = -g(49) + +c 111 parity + + positive_octant_lc(0, 1,1,1) = c1*coeff_p111 - c2*g(50) - c2*g(51) - c2*g(52) + c3*g(53) + c3*g(54) + c3*g(55) - c4*g(56) + positive_octant_lc(1, 1,1,1) = c2*coeff_p111 + c1*g(50) - c2*g(51) + c2*g(52) - c3*g(53) + c3*g(54) + c4*g(55) + c3*g(56) + positive_octant_lc(2, 1,1,1) = c2*coeff_p111 + c2*g(50) + c1*g(51) - c2*g(52) - c3*g(53) - c4*g(54) + c3*g(55) - c3*g(56) + positive_octant_lc(3, 1,1,1) = c3*coeff_p111 - c3*g(50) - c3*g(51) + c4*g(52) - c1*g(53) - c2*g(54) - c2*g(55) - c2*g(56) + positive_octant_lc(4, 1,1,1) = c2*coeff_p111 - c2*g(50) + c2*g(51) + c1*g(52) + c4*g(53) - c3*g(54) + c3*g(55) + c3*g(56) + positive_octant_lc(5, 1,1,1) = c3*coeff_p111 + c3*g(50) - c4*g(51) - c3*g(52) + c2*g(53) - c1*g(54) - c2*g(55) + c2*g(56) + positive_octant_lc(6, 1,1,1) = c3*coeff_p111 + c4*g(50) + c3*g(51) + c3*g(52) + c2*g(53) + c2*g(54) - c1*g(55) - c2*g(56) + positive_octant_lc(7, 1,1,1) = c4*coeff_p111 - c3*g(50) + c3*g(51) - c3*g(52) - c2*g(53) + c2*g(54) - c2*g(55) + c1*g(56) +c-------------------------------------------------------------------------------------------- +c +c +c We now calculate the Legendre basis coefficients for all eight child cells +c by applying the appropriate reflectional parities to the coefficients +c calculated above for the positive octant child cell. +c +c See equations A2 and A3 in appendix A of Jenkins 2013. +c +c The reflectional parity is given by (ix,iy,iz) loops below. +c +c The (icx,icy,icz) loops below, loop over the eight child cells. +c +c The positive octant child cell is given below by (icx=icy=icz=0) or i=7. +c +c The combination ix*icx +iy*icy +iz*icz is either even or odd, depending +c on whether the parity change is even or odd. +c +c The variables iox,ioy,ioz are used to loop over the different +c types of Legendre basis function. +c +c The combination iox*icx + ioy*icy + ioz*icz is either even and odd +c and identifies which coefficients keep or change sign respectively +c due to a pure reflection about the principal planes. +c-------------------------------------------------------------------------------------------- + + do iz=0,7 + do iy=0,7 + temp_value(iy,iz) = 0.0d0 ! Zero temporary sums + enddo + enddo +c-------------------------------------------------------------------------------------------- + do iz=0,1 ! Loop over z parity (0=keep sign, 1=change sign) + do iy=0,1 ! Loop over y parity (0=keep sign, 1=change sign) + do ix=0,1 ! Loop over x parity (0=keep sign, 1=change sign) + + + do icx=0,1 ! Loop over x-child cells + do icy=0,1 ! Loop over y-child cells + do icz=0,1 ! Loop over z-child cells + + if (mod(ix*icx+iy*icy+iz*icz,2).eq.0) then + parity = 1.0d0 + else + parity =-1.0d0 + endif + + i = 7 - 4*icx -2*icy - icz ! Calculate which child cell this is. + + + do iox=0,1 ! Loop over Legendre basis function type + do ioy=0,1 ! Loop over Legendre basis function type + do ioz=0,1 ! Loop over Legendre basis function type + + j = 4*iox + 2*ioy + ioz + + if (mod(iox*icx + ioy*icy + ioz*icz,2).eq.0) then + isig = parity + else + isig = -parity + endif + + temp_value(j,i) = temp_value(j,i) + isig*positive_octant_lc(j,ix,iy,iz) + + enddo + enddo + enddo + + enddo + enddo + enddo + + enddo + enddo + enddo + + +c Assign values of the output variables + + do i=0,7 + do j=0,7 + leg_coeff(j,i,nlev) = temp_value(j,i)*usually_rooteighth_factor + cell_data(j,i) = leg_coeff(j,i,nlev) + enddo + enddo + +c Finally set the independent field values + + cell_data(8,0) = g(57) + cell_data(8,1) = g(58) + cell_data(8,2) = g(59) + cell_data(8,3) = g(60) + cell_data(8,4) = g(61) + cell_data(8,5) = g(62) + cell_data(8,6) = g(63) + cell_data(8,7) = g(64) + + + return + end +c================================================================================= + recursive subroutine reset_lecuyer_state(ldata,lev,xcursor,ycursor,zcursor) + use pan_state + implicit none + + type(state_data), intent(inout) :: ldata + integer lev + integer*8 xcursor,ycursor,zcursor + +c integer indmin,indmax +c parameter (indmin=-1, indmax=60) +c integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax) +c save p_xcursor,p_ycursor,p_zcursor + integer i +c integer init +c data init/0/ +c save init + + if (ldata%reset_lecuyer_state_init.eq.0) then ! Initialise p_cursor variables with + ldata%reset_lecuyer_state_init = 1 ! negative values. + do i=indmin,indmax + ldata%p_xcursor(i) = -9999 + ldata%p_ycursor(i) = -9999 + ldata%p_zcursor(i) = -9999 + enddo + endif + + if ( (xcursor.eq.ldata%p_xcursor(lev)).and.(ycursor.eq.ldata%p_ycursor(lev)).and. + & (zcursor.eq.ldata%p_zcursor(lev)+1)) then + ldata%p_xcursor(lev) = xcursor + ldata%p_ycursor(lev) = ycursor + ldata%p_zcursor(lev) = zcursor + return + endif + + call advance_current_state(ldata,lev,xcursor,ycursor,zcursor) + + ldata%p_xcursor(lev) = xcursor + ldata%p_ycursor(lev) = ycursor + ldata%p_zcursor(lev) = zcursor + + + return + end +c================================================================================= + recursive subroutine advance_current_state(ldata,lev,x,y,z) + use Rand + use pan_state + !use descriptor_phases + implicit none + + type(state_data), intent(inout) :: ldata + + integer lev + integer*8 x,y,z + + integer*8 lev_range + + TYPE(Rand_offset) :: offset1,offset2 + TYPE(Rand_offset) :: offset_x,offset_y,offset_z,offset_total + + integer ndiv,nrem + integer*8 ndiv8,nrem8 + integer nfactor + parameter (nfactor=291071) ! Value unimportant except has to be > 262144 + + +c----- First some error checking ------------------------------------------ + if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (2)' + + lev_range = 2_dint**lev + + + if ((x.lt.0).or.(x.ge.lev_range)) then + print*,'x,lev,lev_range',x,lev,lev_range + call flush(6) + stop 'x out of range!' + endif + if ((y.lt.0).or.(y.ge.lev_range)) then + print*,'y,lev,lev_range',y,lev,lev_range + stop 'y out of range!' + endif + if ((z.lt.0).or.(z.ge.lev_range)) stop 'z out of range!' +c---------------------------------------------------------------------------- +c +c Note the Rand_set_offset subroutine takes an integer*4 value +c for the offset value. For this reason we need to use integer*4 +c values - ndiv,nrem. As a precaution an explicit check is made +c to be sure that these values are calculated correctly. +c--------------------------------------------------------------------------- + + + call Rand_load(ldata%current_state(lev),ldata%base_lev_start(1,lev)) + + if (lev.eq.0) return + +c Calculate z-offset + + ndiv = z/nfactor + nrem = z - ndiv*nfactor + ndiv8 = ndiv + nrem8 = nrem + + if (ndiv8*nfactor+nrem8.ne.z) stop 'Error in z ndiv nrem' + + call Rand_set_offset(offset1,ndiv) + offset1 = Rand_mul_offset(offset1,nfactor) + call Rand_set_offset(offset2,nrem) + offset2 = Rand_add_offset(offset1,offset2) + offset_z = Rand_mul_offset(offset2,nmulti_) + +c Calculate y-offset + + ndiv = y/nfactor + nrem = y - ndiv*nfactor + ndiv8 = ndiv + nrem8 = nrem + + if (ndiv8*nfactor+nrem8.ne.y) stop 'Error in y ndiv nrem' + + offset1 = Rand_mul_offset(ldata%poweroffset(lev),ndiv) + offset1 = Rand_mul_offset(offset1,nfactor) + offset2 = Rand_mul_offset(ldata%poweroffset(lev),nrem) + offset_y = Rand_add_offset(offset1,offset2) + +c Calculate x-offset + + ndiv = x/nfactor + nrem = x - ndiv*nfactor + ndiv8 = ndiv + nrem8 = nrem + + if (ndiv8*nfactor+nrem8.ne.x) then + print*,'ndiv,nfactor,nrem,x',ndiv,nfactor,nrem,x + print*,'ndiv*nfactor+nrem',ndiv*nfactor+nrem + print*,'x-ndiv*nfactor-nrem',x-ndiv*nfactor-nrem + stop 'Error in x ndiv nrem' + endif + + offset1 = Rand_mul_offset(ldata%poweroffset(2*lev),ndiv) + offset1 = Rand_mul_offset(offset1,nfactor) + offset2 = Rand_mul_offset(ldata%poweroffset(2*lev),nrem) + offset_x = Rand_add_offset(offset1,offset2) + + offset1 = Rand_add_offset(offset_x,offset_y) + offset_total = Rand_add_offset(offset1, offset_z) + + ldata%current_state(lev) = Rand_boost(ldata%current_state(lev),offset_total) + + return + end +c================================================================================= + recursive subroutine return_gaussian_array(ldata,lev,ngauss,garray) + use Rand + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + integer lev,ngauss + real*8 garray(0:*) + TYPE(Rand_state) :: state + real*8 PI + parameter (PI=3.1415926535897932384d0) + real*8 branch + parameter (branch=1.d-6) + integer iloop + + real*8 temp,mag,ang + integer i + + if (mod(ngauss,2).ne.0) + & stop 'Error in return_gaussian_array - even pairs only' + +c First obtain a set of uniformly distributed pseudorandom numbers +c between 0 and 1. The method used is described in detail in +c appendix B of Jenkins 2013. + + do i=0,ngauss-1 + call Rand_real(garray(i),ldata%current_state(lev)) + + if (garray(i).lt.branch) then + garray(i) = branch + state = Rand_boost(ldata%current_state(lev),ldata%superjump) + iloop = 0 + 10 continue + call Rand_real(temp,state) + iloop = iloop+1 + if (temp.lt.branch) then + garray(i) = garray(i)*branch + state = Rand_boost(state,ldata%superjump) + if (iloop.gt.100) then + print*,'Too may iterations in return_gaussian_array!' + call flush(6) + stop + endif + goto 10 + else + garray(i) = garray(i)*temp + endif + endif + enddo + +c Apply Box-Muller transformation to create pairs of Gaussian +c pseudorandom numbers. + + do i=0,ngauss/2-1 + + mag = sqrt(-2.0d0*log(garray(2*i))) + ang = 2.0d0*PI*garray(2*i+1) + + garray(2*i) = mag*cos(ang) + garray(2*i+1) = mag*sin(ang) + + enddo + end +c================================================================================= + recursive subroutine parse_descriptor(string,l,ix,iy,iz,side1,side2,side3,check_int,name) + implicit none + integer nchar + parameter(nchar=100) + character*100 string + integer*4 l,side1,side2,side3,ierror + integer*8 ix,iy,iz + integer*8 check_int + character*20 name + + + integer i,ip,iq,ir + + ierror = 0 + + ip = 1 + do while (string(ip:ip).eq.' ') + ip = ip + 1 + enddo + + if (string(ip:ip+7).ne.'[Panph1,') then + ierror = 1 + print*,string(ip:ip+7) + goto 10 + endif + + ip = ip+8 + if (string(ip:ip).ne.'L') then + ierror = 2 + goto 10 + endif + + ip = ip+1 + + iq = ip + scan( string(ip:nchar),',') -1 + + if (ip.eq.iq) then + ierror = 3 + goto 10 + endif + + + read (string(ip:iq),*) l + + ip = iq+1 + + if (string(ip:ip).ne.'(') then + ierror = 4 + goto 10 + endif + + ip = ip+1 + + iq = ip + scan( string(ip:nchar),')') -2 + + read(string(ip:iq),*) ix,iy,iz + + ip = iq+2 + + if (string(ip:ip).ne.',') then + ierror = 5 + goto 10 + endif + + ip = ip+1 + if ((string(ip:ip).ne.'S').and.(string(ip:ip).ne.'D')) then + ierror = 6 + goto 10 + endif + + if (string(ip:ip).eq.'S') then + ip = ip + 1 + iq = ip + scan( string(ip:nchar),',') -2 + read (string(ip:iq),*) side1 + side2 = side1 + side3 = side1 + iq = iq+1 + if (string(iq:iq+2).ne.',CH') then + print*,string(ip:iq),string(iq:iq+2) + ierror = 6 + goto 10 + endif + else + ip = ip + 1 + if (string(ip:ip).ne.'(') then + ierror = 7 + goto 10 + endif + + + ip = ip + 1 + iq = ip + scan( string(ip:nchar),')') -2 + read (string(ip:iq),*) side1,side2,side3 + + iq = iq + 1 + + if (string(iq:iq).ne.')') then + ierror = 8 + goto 10 + endif + + iq = iq + 1 + + if (string(iq:iq+2).ne.',CH') then + ierror = 9 + goto 10 + endif + + endif + + ip = iq + 3 + + iq = ip + scan( string(ip:nchar),',') -2 + + read (string(ip:iq),*) check_int + + ip = iq + 1 + + if (string(ip:ip).ne.',') then + ierror = 10 + goto 10 + endif + + ip = ip+1 + + ir = ip + scan( string(ip:nchar),']') -2 + + iq = min(ir,ip+19) + + do i=1,20 + name(i:i)=' ' + enddo + + do i=ip,iq + name(i-ip+1:i-ip+1) = string(i:i) + enddo + + iq = ir + 1 + + if (string(iq:iq).ne.']') then + ierror = 11 + goto 10 + endif + + + 10 continue + + if (ierror.eq.0) return + + print*,'Error reading panphasian descriptor. Error number:',ierror + stop + + return + end +c================================================================================= + recursive subroutine compose_descriptor(l,ix,iy,iz,side,check_int,name,string) + implicit none + integer nchar + parameter(nchar=100) + character*100,intent(out)::string + character*20 name + integer*4 l,ltemp + integer*8 side + integer*8 ix,iy,iz + integer*8 check_int + + character*50 temp1,temp2,temp3,temp4,temp5,temp6 + integer lnblnk + + integer ip1,ip2,ip3,ip4,ip5,ip6 + + ltemp = l + + 5 continue + if ((mod(ix,2).eq.0).and.(mod(iy,2).eq.0).and.(mod(iz,2).eq.0).and.(mod(side,2).eq.0)) then + ix = ix/2 + iy = iy/2 + iz = iz/2 + side = side/2 + ltemp = ltemp-1 + goto 5 + endif + + + write (temp1,*) ltemp + ip1= scan(temp1,'0123456789') + write (temp2,*) ix + ip2= scan(temp2,'0123456789') + write (temp3,*) iy + ip3= scan(temp3,'0123456789') + write (temp4,*) iz + ip4= scan(temp4,'0123456789') + write (temp5,*) side + ip5= scan(temp5,'0123456789') + write (temp6,*) check_int + ip6= scan(temp6,'-0123456789') + + + string='[Panph1,L'//temp1(ip1:lnblnk(temp1))//',('//temp2(ip2:lnblnk(temp2)) + & //','//temp3(ip3:lnblnk(temp3))//','//temp4(ip4:lnblnk(temp4))//'),S' + & // temp5(ip5:lnblnk(temp5))//',CH'//temp6(ip6:lnblnk(temp6))// + & ','//name(1:lnblnk(name))//']' + + return + + end +c================================================================================= + recursive subroutine validate_descriptor(ldata,string,MYID,check_number) + use pan_state + implicit none + + type(state_data), intent(inout) :: ldata + character*100 string + integer*8 check_number + integer MYID + + character*20 phase_name + integer*4 lev + + integer*8 ix_abs,iy_abs,iz_abs + integer*4 ix_base,iy_base,iz_base + + + integer*8 xval,yval,zval + integer val_state(5) + + TYPE(Rand_state) :: state + + real*8 rand_num + integer*8 mconst,check_total,check_rand + parameter(mconst = 2147483647_Dint) + integer ascii_list(0:255) + integer*8 maxco + integer i + integer*8 ii + integer lnblnk + + + + call parse_descriptor(string,lev,ix_abs,iy_abs,iz_abs, + & ix_base,iy_base,iz_base,check_rand,phase_name) + +c------------------------------------------------------------------------- +c Some basic checking +c------------------------------------------------------------------------- + if ((lev.lt.0).or.(lev.gt.maxlev_)) then + print*,'lev,maxlev',lev,maxlev_ + call flush(6) + stop 'Level out of range! (3)' + endif + + if ((mod(ix_abs,2).eq.0).and.(mod(iy_abs,2).eq.0).and.(mod(iz_abs,2).eq.0).and. + & (mod(ix_base,2).eq.0).and.(mod(iy_base,2).eq.0).and.(mod(iz_base,2).eq.0)) + & stop 'Parameters not at lowest level' + + + maxco = 2_dint**lev + + if (ix_abs.lt.0) stop 'Error: ix_abs negative (2)' + if (iy_abs.lt.0) stop 'Error: iy_abs negative (2)' + if (iz_abs.lt.0) stop 'Error: iz_abs negative (2)' + + + if (ix_abs+ix_base.ge.maxco) + & stop 'Error: ix_abs + ix_per out of range.' + if (iy_abs+iy_base.ge.maxco) + & stop 'Error: iy_abs + iy_per out of range.' + if (iz_abs+iz_base.ge.maxco) + & stop 'Error: iz_abs + iz_per out of range.' + + check_total = 0 + + call initialise_panphasia(ldata) +c First corner + xval = ix_abs + ix_base - 1 + yval = iy_abs + zval = iz_abs + call advance_current_state(ldata,lev,xval,yval,zval) + call Rand_real(rand_num,ldata%current_state(lev)) + call Rand_save(val_state,ldata%current_state(lev)) + check_total = check_total + val_state(5) + if (MYID.eq.0) print*,'--------------------------------------' + if (MYID.eq.0) print*,'X-corner rand = ',rand_num + if (MYID.eq.0) print*,'State:',val_state +c Second corner + xval = ix_abs + yval = iy_abs + iy_base - 1 + zval = iz_abs + call advance_current_state(ldata,lev,xval,yval,zval) + call Rand_real(rand_num,ldata%current_state(lev)) + call Rand_save(val_state,ldata%current_state(lev)) + check_total = check_total + val_state(5) + if (MYID.eq.0) print*,'Y-corner rand = ',rand_num + if (MYID.eq.0) print*,'State:',val_state +c Third corner + xval = ix_abs + yval = iy_abs + zval = iz_abs + iz_base - 1 + call advance_current_state(ldata,lev,xval,yval,zval) + call Rand_real(rand_num,ldata%current_state(lev)) + call Rand_save(val_state,ldata%current_state(lev)) + check_total = check_total + val_state(5) + if (MYID.eq.0) print*,'z-corner rand = ',rand_num + if (MYID.eq.0) print*,'State:',val_state + if (MYID.eq.0) print*,'--------------------------------------' + +c Now encode the name. An integer for each ascii character is generated +c starting from the state which gives r0 - the first random number in +c Panphasia. The integer is in the range 0 - m-1. +c After making the list, then loop over non-blank characters +c in the name and take the ascii value, and sum the associated numbers. +c To avoid simple anagrams giving the same score, weight the integer +c by position in the string. Finally take mod m - to give the +c check number. + + call Rand_load(state,ldata%base_state) + + do i=0,255 + call Rand_real(rand_num,state) + call Rand_save(val_state,state) + ascii_list(i) = val_state(5) + enddo + + + + do ii=1,lnblnk(phase_name) + check_total = check_total + ii*ascii_list(iachar(phase_name(ii:ii))) + enddo + + + check_total = mod(check_total,mconst) + if (check_rand.eq.-999) then ! override the safety check number. + check_number = check_total + return + else + if (check_rand.ne.check_total) then + print*,'Inconsistency in the input panphasia descriptor ',MYID + print*,'Check_rand = ',check_rand + print*,'val_state(5) =',val_state(5) + print*,'xval,yval,zval',xval,yval,zval + print*,'lev_val = ',lev + call flush(6) + stop + endif + endif + + + return + end +c================================================================================= + recursive subroutine generate_random_descriptor(ldata,string) + use Rand + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + character*100 string + character*100 instring + character*20 name + integer*4 unix_timestamp + + real*8 lbox + real*8 lpanphasia + parameter (lpanphasia = 25000000.0) ! Units of Mpc/h + integer level + integer*8 cell_dim + integer val_state(5) + + TYPE(Rand_state) :: state + TYPE(Rand_offset) :: offset + + real*8 rand_num1,rand_num2 + integer*8 mconst,check_int + parameter(mconst = 2147483647_Dint) + integer*8 mfac,imajor,iminor + parameter(mfac=33554332_Dint) + integer ascii_list(0:255) + integer i,lnblnk + integer*8 ii + integer mult + + integer*8 ixco,iyco,izco,irange + + print*,'___________________________________________________________' + print* + print*,' Generate a random descriptor ' + print* + print*,'The code uses the time (the unix timestamp) plus some extra ' + print*,'information personal to the user to choose a random region ' + print*,'within PANPHASIA. The user must also specify the side length' + print*,'of the cosmological volume. The code assumes that the whole of' + print*,'PANPHASIA is 25000 Gpc/h on a side and selects an appropriate ' + print*,'level in the octree for the descriptor. ' + print*,'Assuming this scaling the small scale power is defined down ' + print*,'to a mass scale of around 10^{-12} solar masses.' + print* + print*,'The user must also specify a human readable label for the ' + print*,'descriptor of less than 21 characters.' + print*,'___________________________________________________________' + print* + print*,'Press return to continue ' + read (*,*) + print* + print*,'___________________________________________________________' + print*,'Enter the box side-length in Mpc/h units' + read (*,*) lbox + print*,'___________________________________________________________' + print* + print* + 5 continue + print*,'Enter up to 20 character name to label the descriptor (no spaces)' + read (*,'(a)') name + if ((len_trim(instring).lt.21).or.(scan(name,' ').le.len_trim(name))) goto 5 + print*,'___________________________________________________________' + print* + print* + print*,'___________________________________________________________' + print*,'The phases for the simulation are described by whole octree ' + print*,'cells. Enter an odd integer that defines the number of cells ' + print*,'you require in one dimension. Choose this number carefully ' + print*,'as it will limit the possible 1-D sizes of the of the Fourier ' + print*,'transforms that can be used to make initial conditions to a product ' + print*,'of this integer times any power of two. In which case the only' + print*,'choice is 1.)' + print*,'(I would recommend 3 unless the initial condition code is' + print*,'incapable of using grid sizes that are not purely powers of two.' + print*,'___________________________________________________________' + print* + 7 continue + print*,'Enter number of octree cells on an edge (positive odd number only) ' + read (*,*) cell_dim + if ((cell_dim.le.0).or.(mod(cell_dim,2).eq.0)) goto 7 + print*,'___________________________________________________________' + call system('date +%s>tempfile_42526037646') + open(16,file='tempfile_42526037646',status='old') + read (16,*) unix_timestamp + close(16) + call system('/bin/rm tempfile_42526037646') + + print*,'Unix_timestamp determined. Value: ',unix_timestamp + print*,'___________________________________________________________' + print* + print* + print* + print*,'___________________________________________________________' + print*,'The code has just read the unix timestamp and will use this' + print*,'to help choose a random region in PANPHASIA. Although it is' + print*,'perhaps unlikely that someone else is also running this code at ' + print*,'the same time to the nearest second, to make it more likely' + print*,' still that the desciptor to be generated is unique' + print*,'please enter your name or some other piece of information' + print*,'below that you think is unlikely to be used by anyone else' + print*,'___________________________________________________________' + + print* + + 10 continue + print*,'Please enter your name (a minimum of six characters)' + read (*,'(a)') instring !' + if (len_trim(instring).lt.6) goto 10 + + level = int(log10(dble(cell_dim)*lpanphasia/lbox)/log10(2.0d0)) + + if (level.gt.50) stop 'level >50 ' + + + +c 'd' lines allow the generation of a large set of +c descriptors. Use to check that they are randomly +c positioned over the available volume. + + +c First use the unix timestamp to initialises the +c random generator. + + call Rand_seed(state,unix_timestamp) + + call Rand_save(ldata%base_state,state) + + +c First generate an integer from the user data. + call Rand_load(state,ldata%base_state) + + do i=0,255 + call Rand_real(rand_num1,state) + call Rand_save(val_state,state) + ascii_list(i) = val_state(5) + enddo + + call Rand_set_offset(offset,1) + + do ii=1,lnblnk(instring) + mult = mod(ii*ascii_list(iachar(instring(ii:ii))),mconst) + offset = Rand_mul_offset(offset,mult) + enddo + + call Rand_load(state,ldata%base_state) + state = Rand_boost(state,offset) ! Starting point for choosing location. + + 20 continue + + irange = 2_Dint**level + imajor = irange/mfac + iminor = mod(irange,mfac) + + call Rand_real(rand_num1,state) + call Rand_real(rand_num2,state) + + ixco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor) + + if (ixco+cell_dim.ge.irange) goto 20 ! Invalid descriptor + + call Rand_real(rand_num1,state) + call Rand_real(rand_num2,state) + + iyco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor) + + if (iyco+cell_dim.ge.irange) goto 20 ! Invalid descriptor + + call Rand_real(rand_num1,state) + call Rand_real(rand_num2,state) + + izco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor) + + if (izco+cell_dim.ge.irange) goto 20 ! Invalid descriptor + + +c Value of the check digit is not known. Use validate_descriptor to compute it. + + check_int = -999 ! Special value required to make validate_descriptor + ! return the check digit. + + call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string) + + call validate_descriptor(ldata,string,-1,check_int) + + call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string) + + + return + end +c================================================================================= + recursive subroutine demo_basis_function_allocator + + implicit none + integer nmax + parameter (nmax=10) + + integer*4 wn_level(nmax) + + integer*8 ix_abs(nmax),iy_abs(nmax),iz_abs(nmax) + integer*8 ix_per(nmax),iy_per(nmax),iz_per(nmax) + integer*8 ix_rel(nmax),iy_rel(nmax),iz_rel(nmax) + integer*8 ix_dim(nmax),iy_dim(nmax),iz_dim(nmax) + + integer ix,iy,iz,nref + integer layer_min,layer_max,indep_field + + + integer*8 itot_int,itot_ib + + integer inv_open + +c Assign some trial values + + nref = 3 + inv_open=9 + + wn_level(1) = 22 + + ix_abs(1) = 2000000 + iy_abs(1) = 1500032 + iz_abs(1) = 2500032 + + ix_per(1) = 768 + iy_per(1) = 768 + iz_per(1) = 768 + + ix_rel(1) = 0 + iy_rel(1) = 0 + iz_rel(1) = 0 + + ix_dim(1) = 768 + iy_dim(1) = 768 + iz_dim(1) = 768 + + + wn_level(2) = 23 + + ix_abs(2) = 4000000 + iy_abs(2) = 3000064 + iz_abs(2) = 5000064 + + ix_per(2) = 1536 + iy_per(2) = 1536 + iz_per(2) = 1536 + + ix_rel(2) = 256 + iy_rel(2) = 16 + iz_rel(2) = 720 + + ix_dim(2) = 768 + iy_dim(2) = 768 + iz_dim(2) = 768 + + + wn_level(3) = 24 + + ix_abs(3) = 8000000 + iy_abs(3) = 6000128 + iz_abs(3) = 10000128 + + ix_per(3) = 3072 + iy_per(3) = 3072 + iz_per(3) = 3072 + + ix_rel(3) = 896 + iy_rel(3) = 432 + iz_rel(3) = 1840 + + ix_dim(3) = 768 + iy_dim(3) = 768 + iz_dim(3) = 768 + + + itot_int = 0 + itot_ib = 0 + + + + + open(10,file='ascii_dump_r1',status='unknown') + + ix=320 + do iy=0,767 + do iz=0,767 + call layer_choice(ix,iy,iz,1,nref,ix_abs,iy_abs,iz_abs, + & ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,inv_open,layer_min,layer_max,indep_field) + write(10,*) iy,iz,layer_min,layer_max,indep_field + enddo + enddo + close(10) + + open(10,file='ascii_dump_r2',status='unknown') + + ix=384 + do iy=0,767 + do iz=0,767 + call layer_choice(ix,iy,iz,2,nref,ix_abs,iy_abs,iz_abs, + & ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,inv_open,layer_min,layer_max,indep_field) + write(10,*) iy,iz,layer_min,layer_max,indep_field + enddo + enddo + close(10) + + open(10,file='ascii_dump_r3',status='unknown') + + ix=384 + do iy=0,767 + do iz=0,767 + call layer_choice(ix,iy,iz,3,nref,ix_abs,iy_abs,iz_abs, + & ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,inv_open,layer_min,layer_max,indep_field) + write(10,*) iy,iz,layer_min,layer_max,indep_field + enddo + enddo + close(10) + end +c================================================================================= + recursive subroutine layer_choice(ix0,iy0,iz0,iref,nref, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,x_fact,layer_min,layer_max,indep_field) + implicit none + + integer ix0,iy0,iz0,iref,nref,isize,ibase + integer ix,iy,iz,irefplus + integer ione + + integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref) + integer*8 ix_per(nref),iy_per(nref),iz_per(nref) + integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref) + integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref) + + integer wn_level(nref) + integer layer_min,layer_max,indep_field,x_fact + integer idebug + + + integer interior,iboundary + + if (iref.eq.9999) then + idebug = 1 + else + idebug = 0 + endif + + ione = 1 + + irefplus = min(iref+1,nref) + + if (nref.eq.1) then ! Deal with simplest case + layer_min = 0 + layer_max = wn_level(1) + indep_field = 1 + if (idebug.eq.1) print*,'return 1' + return + endif + +c----------- Case of the top periodic refinement. For this refinement layer_min=0 as +c----------- all the larger basis functions must be included. By default layer_max +c----------- is set to wn_level(1) so all basis functions are included. A check is +c----------- made to determine if the lowest basis function can be included in the +c----------- next refinement. If it can the same process is repeated for the next +c----------- largest basis function and this is repeated until a failure occurs. + + if ((iref.eq.1).and.(nref.gt.1)) then + ibase = 1 + 10 continue + + ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref) + iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref) + iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref) + isize = ishft(ione,ibase) + + call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + + if ((interior.eq.1).and.(iboundary.eq.1)) then + ibase = ibase + 1 + goto 10 + endif + + layer_min = 0 + layer_max = wn_level(iref) - ibase + 1 + if (layer_max.ne.wn_level(iref)) then + indep_field = 0 + else + indep_field = 1 + endif + + if (idebug.eq.1) then + print*,'iref,wn_level(iref)',iref,wn_level(iref) + print*,'Return 2',layer_min,layer_max,indep_field + endif + + return + endif +c------------------------------------------------------------------------------------------ +c------------------------------------------------------------------------------------------ + + +c----------- For second or higher refinement determine layer_min by reference +c----------- to itself. In this case the loop continues until a basis function +c------------ is found which fits in a larger refinement + + ibase = 1 + + 20 continue + + + ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref) + iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref) + iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref) + isize = ishft(ione,ibase) + + call inref(ix,iy,iz,isize,iref,iref,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + + if ((interior.eq.1).and.(iboundary.eq.1)) then + ibase = ibase + 1 + goto 20 + endif + + layer_min = wn_level(iref) - max(ibase-2,0) ! Take last suitable refinement + + +c----------- For an intermediate refinement define layer_max by reference to +c----------- the next refinement + + if (iref.lt.nref) then + ibase = 1 + + 30 continue + + ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref) + iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref) + iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref) + isize = ishft(ione,ibase) + + call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + + if ((interior.eq.1).and.(iboundary.eq.1)) then + ibase = ibase + 1 + goto 30 + endif + + layer_max = wn_level(iref) - ibase + 1 + + if (layer_min.eq.wn_level(iref)) then + indep_field = 1 + else + indep_field = 0 + endif + else + layer_max = wn_level(iref) + indep_field = 1 + endif + + if (idebug.eq.1) then + print*,'Return 3' + print*,'layer_min,layer_max,indep_field',layer_min,layer_max,indep_field + print*,'interior,iboundary',interior,iboundary + print*,'ibase = ',ibase + print*,'iref,nref,wn_level(iref)',iref,nref,wn_level(iref) + endif + + + return + + end + + + + +c The function takes a given basis function specified by a corner ixc,iyc,izc +c and a size isz at level wn_c in the oct-tree and returns two integer values. +c (i) interior: +c Value 1 if the basis function is completely within the given +c refinement. +c +c Value 0 if the basis function is without the refinement, or +c overlaps the edges of the refinement, or the edges of the +c primary white noise patch. +c +c (ii) iboundary: +c Value 1 if the basis function is sufficiently far from the +c refinement boundary. +c +c Value 0 otherwise. +c The given refinement is defined at level wn_r in the oct-tree and by the variables +c (ix_rel,iy_rel,iz_rel) which give the location of the refinement relative to +c corner of the white noise patch, (ix_per,iy_per,iz_per) which define the +c periodicity of the white noise patch, and (ix_dim,iy_dim,iz_dim) which +c define the size of the refinement. +c +c +c +c================================================================================= + recursive subroutine inref(ixc,iyc,izc,isz,ir1,ir2,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + implicit none + + integer nref + integer ixc,iyc,izc,isz,ir1,ir2 + integer wn_level(nref) + integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref) + integer*8 ix_per(nref),iy_per(nref),iz_per(nref) + integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref) + integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref) + integer interior, iboundary + integer x_fact + + integer*8 ixco,iyco,izco,isize + integer*8 ixref0,iyref0,izref0 + integer*8 ixref1,iyref1,izref1 + integer*8 idist + + integer delta_wn + +c Error checking + if (ir2.lt.ir1) stop 'ir2 dimsize(ndims,0); - H5Sget_simple_extent_dims( HDF_DataspaceID, dimsize, NULL ); + H5Sget_simple_extent_dims( HDF_DataspaceID, &dimsize[0], NULL ); HDF_StorageSize = 1; for(int i=0; i( const std::string Filename, con H5Gclose( HDF_GroupID ); H5Fclose( HDF_FileID ); } -#endif + +#endif // USE_HDF5 diff --git a/include/bounding_box.hh b/include/bounding_box.hh index db0f481..6b70bcf 100644 --- a/include/bounding_box.hh +++ b/include/bounding_box.hh @@ -1,16 +1,16 @@ #pragma once -#include +#include template struct bounding_box { - vec3 x1_, x2_; + vec3_t x1_, x2_; bounding_box(void) { } - bounding_box( const vec3& x1, const vec3& x2) + bounding_box( const vec3_t& x1, const vec3_t& x2) : x1_(x1), x2_(x2) { } diff --git a/include/cmake_config.hh.in b/include/cmake_config.hh.in new file mode 100644 index 0000000..03768a3 --- /dev/null +++ b/include/cmake_config.hh.in @@ -0,0 +1,34 @@ +#pragma once + +constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}"; + +#define USE_PRECISION_${CODE_PRECISION} +#if defined(USE_PRECISION_FLOAT) + constexpr char CMAKE_PRECISION_STR[] = "single"; +#elif defined(USE_PRECISION_DOUBLE) + constexpr char CMAKE_PRECISION_STR[] = "double"; +#elif defined(USE_PRECISION_LONGDOUBLE) + constexpr char CMAKE_PRECISION_STR[] = "long double"; +#endif + +#define USE_CONVOLVER_${CONVOLVER_TYPE} +#if defined(USE_CONVOLVER_ORSZAG) + constexpr char CMAKE_CONVOLVER_STR[] = "Orszag3/2"; +#elif defined(USE_CONVOLVER_NAIVE) + constexpr char CMAKE_CONVOLVER_STR[] = "Aliased"; +#endif + +#if defined(ENABLE_PLT) + constexpr char CMAKE_PLT_STR[] = "PLT corr. on"; +#else + constexpr char CMAKE_PLT_STR[] = "PLT corr. off"; +#endif + +// These variables are autogenerated and compiled +// into the library by the version.cmake script. do not touch! +extern "C" +{ + extern const char *GIT_TAG; + extern const char *GIT_REV; + extern const char *GIT_BRANCH; +} \ No newline at end of file diff --git a/include/config_file.hh b/include/config_file.hh index b0d6401..ab30ea2 100644 --- a/include/config_file.hh +++ b/include/config_file.hh @@ -12,20 +12,20 @@ #include /*! - * @class ConfigFile + * @class config_file * @brief provides read/write access to configuration options * * This class provides access to the configuration file. The * configuration is stored in hash-pairs and can be queried and * validated by the responsible class/routine */ -class ConfigFile { +class config_file { //! current line number - unsigned m_iLine; + unsigned iline_; //! hash table for key/value pairs, stored as strings - std::map m_Items; + std::map items_; public: //! removes all white space from string source @@ -59,42 +59,42 @@ public: * @param oval the interpreted/converted value */ template - void Convert(const in_value &ival, out_value &oval) const { + void convert(const in_value &ival, out_value &oval) const { std::stringstream ss; ss << ival; //.. insert value into stream ss >> oval; //.. retrieve value from stream if (!ss.eof()) { //.. conversion error - csoca::elog << "Error: conversion of \'" << ival << "\' failed." + music::elog << "Error: conversion of \'" << ival << "\' failed." << std::endl; - throw ErrInvalidConversion(std::string("invalid conversion to ") + + throw except_invalid_conversion(std::string("invalid conversion to ") + typeid(out_value).name() + '.'); } } //! constructor of class config_file - /*! @param FileName the path/name of the configuration file to be parsed + /*! @param filename the path/name of the configuration file to be parsed */ - explicit ConfigFile(std::string const &FileName) : m_iLine(0), m_Items() { - std::ifstream file(FileName.c_str()); + explicit config_file(std::string const &filename) : iline_(0), items_() { + std::ifstream file(filename.c_str()); if (!file.is_open()){ - csoca::elog << "Could not open config file \'" << FileName << "\'." << std::endl; + music::elog << "Could not open config file \'" << filename << "\'." << std::endl; throw std::runtime_error( - std::string("Error: Could not open config file \'") + FileName + + std::string("Error: Could not open config file \'") + filename + std::string("\'")); } std::string line; std::string name; std::string value; - std::string inSection; - int posEqual; - m_iLine = 0; + std::string in_section; + int pos_equal; + iline_ = 0; //.. walk through all lines .. while (std::getline(file, line)) { - ++m_iLine; + ++iline_; //.. encounterd EOL ? if (!line.length()) continue; @@ -106,31 +106,31 @@ public: //.. encountered section tag ? if (line[0] == '[') { - inSection = trim(line.substr(1, line.find(']') - 1)); + in_section = trim(line.substr(1, line.find(']') - 1)); continue; } //.. seek end of entry name .. - posEqual = line.find('='); - name = trim(line.substr(0, posEqual)); - value = trim(line.substr(posEqual + 1)); + pos_equal = line.find('='); + name = trim(line.substr(0, pos_equal)); + value = trim(line.substr(pos_equal + 1)); - if ((size_t)posEqual == std::string::npos && + if ((size_t)pos_equal == std::string::npos && (name.size() != 0 || value.size() != 0)) { - csoca::wlog << "Ignoring non-assignment in " << FileName << ":" - << m_iLine << std::endl; + music::wlog << "Ignoring non-assignment in " << filename << ":" + << iline_ << std::endl; continue; } if (name.length() == 0 && value.size() != 0) { - csoca::wlog << "Ignoring assignment missing entry name in " - << FileName << ":" << m_iLine << std::endl; + music::wlog << "Ignoring assignment missing entry name in " + << filename << ":" << iline_ << std::endl; continue; } if (value.length() == 0 && name.size() != 0) { - csoca::wlog << "Empty entry will be ignored in " << FileName << ":" - << m_iLine << std::endl; + music::wlog << "Empty entry will be ignored in " << filename << ":" + << iline_ << std::endl; continue; } @@ -138,12 +138,12 @@ public: continue; //.. add key/value pair to hash table .. - if (m_Items.find(inSection + '/' + name) != m_Items.end()) { - csoca::wlog << "Redeclaration overwrites previous value in " - << FileName << ":" << m_iLine << std::endl; + if (items_.find(in_section + '/' + name) != items_.end()) { + music::wlog << "Redeclaration overwrites previous value in " + << filename << ":" << iline_ << std::endl; } - m_Items[inSection + '/' + name] = value; + items_[in_section + '/' + name] = value; } } @@ -151,8 +151,8 @@ public: /*! @param key the key value, usually "section/key" * @param value the value of the key, also a string */ - void InsertValue(std::string const &key, std::string const &value) { - m_Items[key] = value; + void insert_value(std::string const &key, std::string const &value) { + items_[key] = value; } //! inserts a key/value pair in the hash map @@ -160,9 +160,9 @@ public: * @param key the key value usually "section/key" * @param value the value of the key, also a string */ - void InsertValue(std::string const §ion, std::string const &key, + void insert_value(std::string const §ion, std::string const &key, std::string const &value) { - m_Items[section + '/' + key] = value; + items_[section + '/' + key] = value; } //! checks if a key is part of the hash map @@ -170,10 +170,10 @@ public: * @param key the key name to be checked * @return true if the key is present, false otherwise */ - bool ContainsKey(std::string const §ion, std::string const &key) { + bool contains_key(std::string const §ion, std::string const &key) { std::map::const_iterator i = - m_Items.find(section + '/' + key); - if (i == m_Items.end()) + items_.find(section + '/' + key); + if (i == items_.end()) return false; return true; } @@ -182,57 +182,57 @@ public: /*! @param key the key name to be checked * @return true if the key is present, false otherwise */ - bool ContainsKey(std::string const &key) { - std::map::const_iterator i = m_Items.find(key); - if (i == m_Items.end()) + bool contains_key(std::string const &key) { + std::map::const_iterator i = items_.find(key); + if (i == items_.end()) return false; return true; } //! return value of a key - /*! returns the value of a given key, throws a ErrItemNotFound + /*! returns the value of a given key, throws a except_item_not_found * exception if the key is not available in the hash map. * @param key the key name * @return the value of the key - * @sa ErrItemNotFound + * @sa except_item_not_found */ - template T GetValue(std::string const &key) const { - return GetValue("", key); + template T get_value(std::string const &key) const { + return get_value("", key); } //! return value of a key - /*! returns the value of a given key, throws a ErrItemNotFound + /*! returns the value of a given key, throws a except_item_not_found * exception if the key is not available in the hash map. * @param section the section name for the key * @param key the key name * @return the value of the key - * @sa ErrItemNotFound + * @sa except_item_not_found */ template - T GetValueBasic(std::string const §ion, std::string const &key) const { + T get_value_basic(std::string const §ion, std::string const &key) const { T r; std::map::const_iterator i = - m_Items.find(section + '/' + key); - if (i == m_Items.end()){ - throw ErrItemNotFound('\'' + section + '/' + key + + items_.find(section + '/' + key); + if (i == items_.end()){ + throw except_item_not_found('\'' + section + '/' + key + std::string("\' not found.")); } - Convert(i->second, r); + convert(i->second, r); return r; } template - T GetValue(std::string const §ion, std::string const &key) const + T get_value(std::string const §ion, std::string const &key) const { T r; try { - r = GetValueBasic(section, key); + r = get_value_basic(section, key); } - catch (ErrItemNotFound& e) + catch (except_item_not_found& e) { - csoca::elog << e.what() << std::endl; + music::elog << e.what() << std::endl; throw; } return r; @@ -240,40 +240,41 @@ public: //! exception safe version of getValue /*! returns the value of a given key, returns a default value rather - * than a ErrItemNotFound exception if the key is not found. + * than a except_item_not_found exception if the key is not found. * @param section the section name for the key * @param key the key name * @param default_value the value that is returned if the key is not found * @return the key value (if key found) otherwise default_value */ template - T GetValueSafe(std::string const §ion, std::string const &key, + T get_value_safe(std::string const §ion, std::string const &key, T default_value) const { T r; try { - r = GetValueBasic(section, key); - } catch (ErrItemNotFound&) { + r = get_value_basic(section, key); + } catch (except_item_not_found&) { r = default_value; + music::dlog << "Item \'" << section << "/" << key << " not found in config. Default = \'" << default_value << "\'" << std::endl; } return r; } //! exception safe version of getValue /*! returns the value of a given key, returns a default value rather - * than a ErrItemNotFound exception if the key is not found. + * than a except_item_not_found exception if the key is not found. * @param key the key name * @param default_value the value that is returned if the key is not found * @return the key value (if key found) otherwise default_value */ template - T GetValueSafe(std::string const &key, T default_value) const { - return GetValueSafe("", key, default_value); + T get_value_safe(std::string const &key, T default_value) const { + return get_value_safe("", key, default_value); } //! dumps all key-value pairs to a std::ostream - void Dump(std::ostream &out) { - std::map::const_iterator i = m_Items.begin(); - while (i != m_Items.end()) { + void dump(std::ostream &out) { + std::map::const_iterator i = items_.begin(); + while (i != items_.end()) { if (i->second.length() > 0) out << std::setw(24) << std::left << i->first << " = " << i->second << std::endl; @@ -281,12 +282,12 @@ public: } } - void LogDump(void) { - csoca::ilog << "List of all configuration options:" << std::endl; - std::map::const_iterator i = m_Items.begin(); - while (i != m_Items.end()) { + void dump_to_log(void) { + music::ilog << "List of all configuration options:" << std::endl; + std::map::const_iterator i = items_.begin(); + while (i != items_.end()) { if (i->second.length() > 0) - csoca::ilog << std::setw(28) << i->first << " = " << i->second + music::ilog << std::setw(28) << i->first << " = " << i->second << std::endl; ++i; } @@ -295,16 +296,16 @@ public: //--- EXCEPTIONS --- //! runtime error that is thrown if key is not found in getValue - class ErrItemNotFound : public std::runtime_error { + class except_item_not_found : public std::runtime_error { public: - ErrItemNotFound(std::string itemname) + except_item_not_found(std::string itemname) : std::runtime_error(itemname.c_str()) {} }; //! runtime error that is thrown if type conversion fails - class ErrInvalidConversion : public std::runtime_error { + class except_invalid_conversion : public std::runtime_error { public: - ErrInvalidConversion(std::string errmsg) : std::runtime_error(errmsg) {} + except_invalid_conversion(std::string errmsg) : std::runtime_error(errmsg) {} }; //! runtime error that is thrown if identifier is not found in keys @@ -323,14 +324,14 @@ public: //... like "true" and "false" etc. //... converts the string to type bool, returns type bool ... template <> -inline bool ConfigFile::GetValue(std::string const &strSection, +inline bool config_file::get_value(std::string const &strSection, std::string const &strEntry) const { - std::string r1 = GetValue(strSection, strEntry); + std::string r1 = get_value(strSection, strEntry); if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1") return true; if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0") return false; - csoca::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl; + music::elog << "Illegal identifier \'" << r1 << "\' in \'" << strEntry << "\'." << std::endl; throw ErrIllegalIdentifier(std::string("Illegal identifier \'") + r1 + std::string("\' in \'") + strEntry + std::string("\'.")); @@ -338,17 +339,17 @@ inline bool ConfigFile::GetValue(std::string const &strSection, } template <> -inline bool ConfigFile::GetValueSafe(std::string const &strSection, +inline bool config_file::get_value_safe(std::string const &strSection, std::string const &strEntry, bool defaultValue) const { std::string r1; try { - r1 = GetValueBasic(strSection, strEntry); + r1 = get_value_basic(strSection, strEntry); if (r1 == "true" || r1 == "yes" || r1 == "on" || r1 == "1") return true; if (r1 == "false" || r1 == "no" || r1 == "off" || r1 == "0") return false; - } catch (ErrItemNotFound&) { + } catch (except_item_not_found&) { return defaultValue; } return defaultValue; @@ -356,7 +357,7 @@ inline bool ConfigFile::GetValueSafe(std::string const &strSection, template <> inline void -ConfigFile::Convert(const std::string &ival, +config_file::convert(const std::string &ival, std::string &oval) const { oval = ival; } diff --git a/include/convolution.hh b/include/convolution.hh index 2145445..90736b1 100644 --- a/include/convolution.hh +++ b/include/convolution.hh @@ -333,7 +333,7 @@ public: crecvbuf_ = new ccomplex_t[maxslicesz_ / 2]; recvbuf_ = reinterpret_cast(&crecvbuf_[0]); - int ntasks(MPI_Get_size()); + int ntasks(MPI::get_size()); offsets_.assign(ntasks, 0); offsetsp_.assign(ntasks, 0); @@ -415,12 +415,12 @@ private: { assert(fp.space_ == kspace_id); - const double rfac = std::pow(1.5, 1.5); + const real_t rfac = std::pow(1.5, 1.5); fp.zero(); #if !defined(USE_MPI) //////////////////////////////////////////////////////////////////////////////////// - size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3}; + const size_t nhalf[3] = {fp.n_[0] / 3, fp.n_[1] / 3, fp.n_[2] / 3}; #pragma omp parallel for for (size_t i = 0; i < 2 * fp.size(0) / 3; ++i) @@ -429,10 +429,9 @@ private: for (size_t j = 0; j < 2 * fp.size(1) / 3; ++j) { size_t jp = (j > nhalf[1]) ? j + nhalf[1] : j; - for (size_t k = 0; k < 2 * fp.size(2) / 3; ++k) + for (size_t k = 0; k < nhalf[2]+1; ++k) { size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k; - // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue; fp.kelem(ip, jp, kp) = kfunc(i, j, k) * rfac; } } @@ -445,7 +444,7 @@ private: ///////////////////////////////////////////////////////////////////// double tstart = get_wtime(); - csoca::dlog << "[MPI] Started scatter for convolution" << std::endl; + music::dlog << "[MPI] Started scatter for convolution" << std::endl; //... collect offsets @@ -460,7 +459,10 @@ private: size_t slicesz = fbuf_->size(1) * fbuf_->size(3); MPI_Datatype datatype = - (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE; + (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX + : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX + : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX + : MPI_BYTE; // fill MPI send buffer with results of kfunc @@ -587,7 +589,7 @@ private: // std::cerr << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! <<<<<" // << std::endl; ofs << ">>>>> task " << CONFIG::MPI_task_rank << " all transfers completed! // <<<<<" << std::endl; - csoca::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n", + music::dlog.Print("[MPI] Completed scatter for convolution, took %fs\n", get_wtime() - tstart); #endif /// end of ifdef/ifndef USE_MPI /////////////////////////////////////////////////////////////// @@ -596,7 +598,7 @@ private: template void unpad(const Grid_FFT &fp, operator_t output_op) { - const double rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]); + const real_t rfac = std::sqrt(fp.n_[0] * fp.n_[1] * fp.n_[2]) / std::sqrt(fbuf_->n_[0] * fbuf_->n_[1] * fbuf_->n_[2]); // make sure we're in Fourier space... assert(fp.space_ == kspace_id); @@ -615,8 +617,11 @@ private: for (size_t k = 0; k < fbuf_->size(2); ++k) { size_t kp = (k > nhalf[2]) ? k + nhalf[2] : k; - // if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]) continue; fbuf_->kelem(i, j, k) = fp.kelem(ip, jp, kp) / rfac; + // zero Nyquist modes since they are not unique after convolution + if( i==nhalf[0]||j==nhalf[1]||k==nhalf[2]){ + fbuf_->kelem(i, j, k) = 0.0; + } } } } @@ -634,7 +639,7 @@ private: double tstart = get_wtime(); - csoca::dlog << "[MPI] Started gather for convolution"; + music::dlog << "[MPI] Started gather for convolution"; MPI_Barrier(MPI_COMM_WORLD); @@ -645,7 +650,10 @@ private: size_t slicesz = fp.size(1) * fp.size(3); MPI_Datatype datatype = - (typeid(data_t) == typeid(float)) ? MPI_COMPLEX : (typeid(data_t) == typeid(double)) ? MPI_DOUBLE_COMPLEX : MPI_BYTE; + (typeid(data_t) == typeid(float)) ? MPI_C_FLOAT_COMPLEX + : (typeid(data_t) == typeid(double)) ? MPI_C_DOUBLE_COMPLEX + : (typeid(data_t) == typeid(long double)) ? MPI_C_LONG_DOUBLE_COMPLEX + : MPI_BYTE; MPI_Status status; @@ -685,7 +693,7 @@ private: int recvfrom = 0; if (iglobal <= fny[0]) { - real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0; + real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0; recvfrom = get_task(iglobal, offsetsp_, sizesp_, CONFIG::MPI_task_size); MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, (int)iglobal, @@ -693,7 +701,7 @@ private: for (size_t j = 0; j < nf[1]; ++j) { - real_t wj = (j == fny[1]) ? 0.5 : 1.0; + real_t wj = (j == fny[1]) ? 0.0 : 1.0; if (j <= fny[1]) { size_t jp = j; @@ -701,21 +709,22 @@ private: { if (typeid(data_t) == typeid(real_t)) { - real_t w = wi * wj; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; + real_t w = wi * wj * wk; fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; } else { - real_t wk = (k == fny[2]) ? 0.5 : 1.0; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; real_t w = wi * wj * wk; - if (k <= fny[2]) + if (k < fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; - if (k >= fny[2]) + if (k > fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac; - if (w < 1.0) - { - fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); - } + // if (w < 1.0) + // { + // fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); + // } } } } @@ -726,21 +735,22 @@ private: { if (typeid(data_t) == typeid(real_t)) { - real_t w = wi * wj; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; + real_t w = wi * wj * wk; fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; } else { - real_t wk = (k == fny[2]) ? 0.5 : 1.0; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; real_t w = wi * wj * wk; - if (k <= fny[2]) + if (k < fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; - if (k >= fny[2]) + if (k > fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac; - if (w < 1.0) - { - fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); - } + // if (w < 1.0) + // { + // fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); + // } } } } @@ -748,7 +758,7 @@ private: } if (iglobal >= fny[0]) { - real_t wi = (iglobal == fny[0]) ? 0.5 : 1.0; + real_t wi = (iglobal == fny[0]) ? 0.0 : 1.0; recvfrom = get_task(iglobal + fny[0], offsetsp_, sizesp_, CONFIG::MPI_task_size); MPI_Recv(&recvbuf_[0], (int)slicesz, datatype, recvfrom, @@ -756,29 +766,26 @@ private: for (size_t j = 0; j < nf[1]; ++j) { - real_t wj = (j == fny[1]) ? 0.5 : 1.0; + real_t wj = (j == fny[1]) ? 0.0 : 1.0; if (j <= fny[1]) { size_t jp = j; for (size_t k = 0; k < nf[2]; ++k) { + const real_t wk = (k == fny[2]) ? 0.0 : 1.0; + const real_t w = wi * wj * wk; if (typeid(data_t) == typeid(real_t)) { - real_t w = wi * wj; + real_t wk = (k == fny[2]) ? 0.0 : 1.0; + real_t w = wi * wj * wk; fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; } else { - real_t wk = (k == fny[2]) ? 0.5 : 1.0; - real_t w = wi * wj * wk; - if (k <= fny[2]) + if (k < fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; - if (k >= fny[2]) + if (k > fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac; - if (w < 1.0) - { - fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); - } } } } @@ -787,23 +794,18 @@ private: size_t jp = j + fny[1]; for (size_t k = 0; k < nf[2]; ++k) { + const real_t wk = (k == fny[2]) ? 0.0 : 1.0; + const real_t w = wi * wj * wk; if (typeid(data_t) == typeid(real_t)) { - real_t w = wi * wj; fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; } else { - real_t wk = (k == fny[2]) ? 0.5 : 1.0; - real_t w = wi * wj * wk; - if (k <= fny[2]) + if (k < fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k] / rfac; - if (k >= fny[2]) + if (k > fny[2]) fbuf_->kelem(i, j, k) += w * crecvbuf_[jp * nfp[3] + k + fny[2]] / rfac; - if (w < 1.0) - { - fbuf_->kelem(i, j, k) = std::real(fbuf_->kelem(i, j, k)); - } } } } @@ -811,8 +813,8 @@ private: } } -//... copy data back -#pragma omp parallel for + //... copy data back + #pragma omp parallel for for (size_t i = 0; i < fbuf_->ntot_; ++i) { output_op(i, (*fbuf_)[i]); @@ -831,7 +833,7 @@ private: MPI_Barrier(MPI_COMM_WORLD); - csoca::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart); + music::dlog.Print("[MPI] Completed gather for convolution, took %fs", get_wtime() - tstart); #endif /// end of ifdef/ifndef USE_MPI ////////////////////////////////////////////////////////////// } diff --git a/include/cosmology_calculator.hh b/include/cosmology_calculator.hh index f74039d..6f1fd7f 100644 --- a/include/cosmology_calculator.hh +++ b/include/cosmology_calculator.hh @@ -1,25 +1,43 @@ #pragma once #include +#include #include +#include #include +#include #include +#include + #include +// #include #include +namespace cosmology +{ + /*! - * @class CosmologyCalculator + * @class cosmology::calculator * @brief provides functions to compute cosmological quantities * * This class provides member functions to compute cosmological quantities * related to the Friedmann equations and linear perturbation theory */ -class CosmologyCalculator +class calculator { +public: + //! data structure to store cosmological parameters + cosmology::parameters cosmo_param_; + + //! pointer to an instance of a transfer function plugin + std::unique_ptr transfer_function_; + private: - static constexpr double REL_PRECISION = 1e-5; + static constexpr double REL_PRECISION = 1e-10; + interpolated_function_1d D_of_a_, f_of_a_, a_of_D_; + double Dnow_, Dplus_start_, Dplus_target_, astart_, atarget_; real_t integrate(double (*func)(double x, void *params), double a, double b, void *params) const { @@ -39,167 +57,207 @@ private: gsl_set_error_handler(NULL); if (error / result > REL_PRECISION) - csoca::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl; + music::wlog << "no convergence in function 'integrate', rel. error=" << error / result << std::endl; return (real_t)result; } + void compute_growth( std::vector& tab_a, std::vector& tab_D, std::vector& tab_f ) + { + using v_t = vec_t<3, double>; + + // set ICs + const double a0 = 1e-10; + const double D0 = a0; + const double Dprime0 = 2.0 * D0 * H_of_a(a0) / std::pow(phys_const::c_SI, 2); + const double t0 = 1.0 / (a0 * H_of_a(a0)); + + v_t y0({a0, D0, Dprime0}); + + // set up integration + double dt = 1e-9; + double dtdid, dtnext; + const double amax = 2.0; + + v_t yy(y0); + double t = t0; + const double eps = 1e-10; + + while (yy[0] < amax) + { + // RHS of ODEs + auto rhs = [&](double t, v_t y) -> v_t { + auto a = y[0]; + auto D = y[1]; + auto Dprime = y[2]; + v_t dy; + // da/dtau = a^2 H(a) + dy[0] = a * a * H_of_a(a); + // d D/dtau + dy[1] = Dprime; + // d^2 D / dtau^2 + dy[2] = -a * H_of_a(a) * Dprime + 3.0 / 2.0 * cosmo_param_.Omega_m * std::pow(cosmo_param_.H0, 2) * D / a; + return dy; + }; + + // scale by predicted value to get approx. constant fractional errors + v_t yyscale = yy.abs() + dt * rhs(t, yy).abs(); + + // call integrator + ode_integrate::rk_step_qs(dt, t, yy, yyscale, rhs, eps, dtdid, dtnext); + + tab_a.push_back(yy[0]); + tab_D.push_back(yy[1]); + tab_f.push_back(yy[2]); + + dt = dtnext; + } + + // compute f, before we stored here D' + for (size_t i = 0; i < tab_a.size(); ++i) + { + tab_f[i] = tab_f[i] / (tab_a[i] * H_of_a(tab_a[i]) * tab_D[i]); + tab_D[i] = tab_D[i]; + tab_a[i] = tab_a[i]; + } + } + public: - //! data structure to store cosmological parameters - CosmologyParameters cosmo_param_; - - //! pointer to an instance of a transfer function plugin - //TransferFunction_plugin *ptransfer_fun_; - std::unique_ptr transfer_function_; - - + calculator() = delete; + calculator(const calculator& c) = delete; //! constructor for a cosmology calculator object /*! * @param acosmo a cosmological parameters structure * @param pTransferFunction pointer to an instance of a transfer function object */ - explicit CosmologyCalculator(ConfigFile &cf) - : cosmo_param_(cf) - { + explicit calculator(config_file &cf) + : cosmo_param_(cf), astart_( 1.0/(1.0+cf.get_value("setup","zstart")) ), + atarget_( 1.0/(1.0+cf.get_value_safe("cosmology","ztarget",1./astart_-1.))) + { + // pre-compute growth factors and store for interpolation + std::vector tab_a, tab_D, tab_f; + this->compute_growth(tab_a, tab_D, tab_f); + D_of_a_.set_data(tab_a,tab_D); + f_of_a_.set_data(tab_a,tab_f); + a_of_D_.set_data(tab_D,tab_a); + Dnow_ = D_of_a_(1.0); + + Dplus_start_ = D_of_a_( astart_ ) / Dnow_; + Dplus_target_ = D_of_a_( atarget_ ) / Dnow_; + + // set up transfer functions and compute normalisation transfer_function_ = std::move(select_TransferFunction_plugin(cf)); transfer_function_->intialise(); - cosmo_param_.pnorm = this->ComputePNorm(); + if( !transfer_function_->tf_isnormalised_ ) + cosmo_param_.pnorm = this->compute_pnorm_from_sigma8(); + else{ + cosmo_param_.pnorm = 1.0/Dplus_target_/Dplus_target_; + auto sigma8 = this->compute_sigma8(); + music::ilog << "Measured sigma_8 for given PS normalisation is " << sigma8 << std::endl; + } cosmo_param_.sqrtpnorm = std::sqrt(cosmo_param_.pnorm); - csoca::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" << " : " << (transfer_function_->tf_is_distinct()? "yes" : "no") << std::endl; - csoca::ilog << std::setw(32) << std::left << "TF maximum wave number" << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl; + + music::ilog << std::setw(32) << std::left << "TF supports distinct CDM+baryons" + << " : " << (transfer_function_->tf_is_distinct() ? "yes" : "no") << std::endl; + music::ilog << std::setw(32) << std::left << "TF maximum wave number" + << " : " << transfer_function_->get_kmax() << " h/Mpc" << std::endl; + } + + ~calculator() + { } //! Write out a correctly scaled power spectrum at time a - void WritePowerspectrum( real_t a, std::string fname ) const + void write_powerspectrum(real_t a, std::string fname) const { - const real_t Dplus0 = this->CalcGrowthFactor(a) / this->CalcGrowthFactor(1.0); + // const real_t Dplus0 = this->get_growth_factor(a); - if( CONFIG::MPI_task_rank==0 ) + if (CONFIG::MPI_task_rank == 0) { - double kmin = std::max(1e-4,transfer_function_->get_kmin()); + double kmin = std::max(1e-4, transfer_function_->get_kmin()); // write power spectrum to a file std::ofstream ofs(fname.c_str()); - std::stringstream ss; ss << " (a=" << a <<")"; + std::stringstream ss; + ss << " ,ap=" << a << ""; ofs << "# " << std::setw(18) << "k [h/Mpc]" - << std::setw(20) << ("P_dtot(k)"+ss.str()) - << std::setw(20) << ("P_dcdm(k)"+ss.str()) - << std::setw(20) << ("P_dbar(k)"+ss.str()) - << std::setw(20) << ("P_dtot(K) (a=1)") - << std::setw(20) << ("P_tcdm(k)"+ss.str()) - << std::setw(20) << ("P_tbar(k)"+ss.str()) - << std::endl; - for( double k=kmin; kget_kmax(); k*=1.05 ){ + << std::setw(20) << ("P_dtot(k,a=ap)") + << std::setw(20) << ("P_dcdm(k,a=ap)") + << std::setw(20) << ("P_dbar(k,a=ap)") + << std::setw(20) << ("P_tcdm(k,a=ap)") + << std::setw(20) << ("P_tbar(k,a=ap)") + << std::setw(20) << ("P_dtot(k,a=1)") + << std::setw(20) << ("P_dcdm(k,a=1)") + << std::setw(20) << ("P_dbar(k,a=1)") + << std::setw(20) << ("P_tcdm(k,a=1)") + << std::setw(20) << ("P_tbar(k,a=1)") + << std::setw(20) << ("P_dtot(K,a=1)") + << std::endl; + for (double k = kmin; k < transfer_function_->get_kmax(); k *= 1.05) + { ofs << std::setw(20) << std::setprecision(10) << k - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, cdm) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, baryon) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, total), 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vcdm) * Dplus0, 2.0) - << std::setw(20) << std::setprecision(10) << std::pow(this->GetAmplitude(k, vbaryon) * Dplus0, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total)*Dplus_start_, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm)*Dplus_start_, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon)*Dplus_start_, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm)*Dplus_start_, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon)*Dplus_start_, 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, total0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, cdm0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, baryon0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vcdm0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vbaryon0), 2.0) + << std::setw(20) << std::setprecision(10) << std::pow(this->get_amplitude(k, vtotal), 2.0) << std::endl; } } - - csoca::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl; + music::ilog << "Wrote power spectrum at a=" << a << " to file \'" << fname << "\'" << std::endl; } - const CosmologyParameters &GetParams(void) const + const cosmology::parameters &get_parameters(void) const noexcept { return cosmo_param_; } - //! returns the amplitude of amplitude of the power spectrum - /*! - * @param k the wave number in h/Mpc - * @param a the expansion factor of the universe - * @returns power spectrum amplitude for wave number k at time a - */ - inline real_t Power(real_t k, real_t a) + //! return the value of the Hubble function H(a) = dloga/dt + inline double H_of_a(double a) const noexcept { - real_t Dplus = CalcGrowthFactor(a); - real_t DplusOne = CalcGrowthFactor(1.0); - real_t pNorm = ComputePNorm(); - Dplus /= DplusOne; - DplusOne = 1.0; - real_t scale = Dplus / DplusOne; - return pNorm * scale * scale * TransferSq(k) * pow((double)k, (double)cosmo_param_.nspect); + double HH2 = 0.0; + HH2 += cosmo_param_.Omega_r / (a * a * a * a); + HH2 += cosmo_param_.Omega_m / (a * a * a); + HH2 += cosmo_param_.Omega_k / (a * a); + HH2 += cosmo_param_.Omega_DE * std::pow(a, -3. * (1. + cosmo_param_.w_0 + cosmo_param_.w_a)) * exp(-3. * (1.0 - a) * cosmo_param_.w_a); + return cosmo_param_.H0 * std::sqrt(HH2); } - inline static double H_of_a(double a, void *Params) + //! Computes the linear theory growth factor D+, normalised to D+(a=1)=1 + real_t get_growth_factor(real_t a) const noexcept { - CosmologyParameters *cosm = (CosmologyParameters *)Params; - double a2 = a * a; - double Ha = sqrt(cosm->Omega_m / (a2 * a) + cosm->Omega_k / a2 + cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a)); - return Ha; + return D_of_a_(a) / Dnow_; } - inline static double Hprime_of_a(double a, void *Params) + //! Computes the inverse of get_growth_factor + real_t get_a( real_t Dplus ) const noexcept { - CosmologyParameters *cosm = (CosmologyParameters *)Params; - double a2 = a * a; - double H = H_of_a(a, Params); - double Hprime = 1 / (a * H) * (-1.5 * cosm->Omega_m / (a2 * a) - cosm->Omega_k / a2 - 1.5 * cosm->Omega_DE * pow(a, -3. * (1. + cosm->w_0 + cosm->w_a)) * exp(-3. * (1.0 - a) * cosm->w_a) * (1. + cosm->w_0 + (1. - a) * cosm->w_a)); - return Hprime; + return a_of_D_( Dplus * Dnow_ ); } - //! Integrand used by function CalcGrowthFactor to determine the linear growth factor D+ - inline static double GrowthIntegrand(double a, void *Params) + //! Computes the linear theory growth rate f + /*! Function computes (by interpolating on precalculated table) + * f = dlog D+ / dlog a + */ + real_t get_f(real_t a) const noexcept { - double Ha = a * H_of_a(a, Params); - return 2.5 / (Ha * Ha * Ha); - } - - //! integrand function for Calc_fPeebles - /*! - * @sa Calc_fPeebles - */ - inline static double fIntegrand( double a, void *Params ) - { - CosmologyParameters *cosm = (CosmologyParameters *)Params; - double y = cosm->Omega_m*(1.0/a-1.0) + cosm->Omega_DE*(a*a-1.0) + 1.0; - return 1.0/pow(y,1.5); - } - - //! calculates d log D+/d log a - /*! this version follows the Peebles (TBD: add citation) - * formula to compute Bertschinger's vfact - */ - inline real_t CalcGrowthRate( real_t a ) - { - #warning CalcGrowthRate is only correct if dark energy is a cosmological constant, need to upgrade calculator... - real_t y = cosmo_param_.Omega_m*(1.0/a-1.0) + cosmo_param_.Omega_DE*(a*a-1.0) + 1.0; - real_t fact = integrate( &fIntegrand, 1e-6, a, (void*)&cosmo_param_ ); - return (cosmo_param_.Omega_DE*a*a-0.5*cosmo_param_.Omega_m/a)/y - 1.0 + a*fIntegrand(a,(void*)&cosmo_param_)/fact; - } - - //! Computes the linear theory growth factor D+ - /*! Function integrates over member function GrowthIntegrand and computes - * /a - * D+(a) = 5/2 H(a) * | [a'^3 * H(a')^3]^(-1) da' - * /0 - */ - real_t CalcGrowthFactor(real_t a) const - { - real_t integral = integrate(&GrowthIntegrand, 0.0, a, (void *)&cosmo_param_); - return H_of_a(a, (void *)&cosmo_param_) * integral; + return f_of_a_(a); } //! Compute the factor relating particle displacement and velocity /*! Function computes - * - * vfac = a^2 * H(a) * dlogD+ / d log a = a^2 * H'(a) + 5/2 * [ a * D+(a) * H(a) ]^(-1) - * - */ - real_t CalcVFact(real_t a) const + * vfac = a * (H(a)/h) * dlogD+ / dlog a + */ + real_t get_vfact(real_t a) const noexcept { - real_t Dp = CalcGrowthFactor(a); - real_t H = H_of_a(a, (void *)&cosmo_param_); - real_t Hp = Hprime_of_a(a, (void *)&cosmo_param_); - real_t a2 = a * a; - - return (a2 * Hp + 2.5 / (a * Dp * H)) * 100.0; + return f_of_a_(a) * a * H_of_a(a) / cosmo_param_.h; } //! Integrand for the sigma_8 normalization of the power spectrum @@ -210,8 +268,8 @@ public: if (k <= 0.0) return 0.0f; - CosmologyCalculator *pcc = reinterpret_cast(pParams); - + cosmology::calculator *pcc = reinterpret_cast(pParams); + double x = k * 8.0; double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x); static double nspect = (double)pcc->cosmo_param_.nspect; @@ -229,8 +287,8 @@ public: if (k <= 0.0) return 0.0f; - CosmologyCalculator *pcc = reinterpret_cast(pParams); - + cosmology::calculator *pcc = reinterpret_cast(pParams); + double x = k * 8.0; double w = 3.0 * (sin(x) - x * cos(x)) / (x * x * x); static double nspect = (double)pcc->cosmo_param_.nspect; @@ -240,24 +298,12 @@ public: return k * k * w * w * pow((double)k, (double)nspect) * tf * tf; } - //! Computes the square of the transfer function - /*! Function evaluates the supplied transfer function ptransfer_fun_ - * and returns the square of its value at wave number k - * @param k wave number at which to evaluate the transfer function - */ - inline real_t TransferSq(real_t k) const - { - //.. parameter supplied transfer function - real_t tf1 = transfer_function_->compute(k, total); - return tf1 * tf1; - } - //! Computes the amplitude of a mode from the power spectrum /*! Function evaluates the supplied transfer function ptransfer_fun_ * and returns the amplitude of fluctuations at wave number k at z=0 * @param k wave number at which to evaluate */ - inline real_t GetAmplitude(real_t k, tf_type type) const + inline real_t get_amplitude(real_t k, tf_type type) const { return std::pow(k, 0.5 * cosmo_param_.nspect) * transfer_function_->compute(k, type) * cosmo_param_.sqrtpnorm; } @@ -267,18 +313,30 @@ public: * integrates the power spectrum to fix the normalization to that given * by the sigma_8 parameter */ - real_t ComputePNorm(void) + real_t compute_sigma8(void) { real_t sigma0, kmin, kmax; kmax = transfer_function_->get_kmax(); kmin = transfer_function_->get_kmin(); if (!transfer_function_->tf_has_total0()) - sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this ); - else - sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this ); + sigma0 = 4.0 * M_PI * integrate(&dSigma8, (double)kmin, (double)kmax, this); + else{ + sigma0 = 4.0 * M_PI * integrate(&dSigma8_0, (double)kmin, (double)kmax, this); + } - return cosmo_param_.sigma8 * cosmo_param_.sigma8 / sigma0; + return std::sqrt(sigma0); + } + + //! Computes the normalization for the power spectrum + /*! + * integrates the power spectrum to fix the normalization to that given + * by the sigma_8 parameter + */ + real_t compute_pnorm_from_sigma8(void) + { + auto measured_sigma8 = this->compute_sigma8(); + return cosmo_param_.sigma8 * cosmo_param_.sigma8 / (measured_sigma8 * measured_sigma8); } }; @@ -293,4 +351,6 @@ inline double jeans_sound_speed(double rho, double mass) { const double G = 6.67e-8; return pow(6.0 * mass / M_PI * sqrt(rho) * pow(G, 1.5), 1.0 / 3.0); -} \ No newline at end of file +} + +} // namespace cosmology \ No newline at end of file diff --git a/include/cosmology_parameters.hh b/include/cosmology_parameters.hh index 0c4efcd..0d3a3ad 100644 --- a/include/cosmology_parameters.hh +++ b/include/cosmology_parameters.hh @@ -1,10 +1,21 @@ #pragma once +/*******************************************************************************\ + cosmology_parameters.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************************/ +#include #include -//! structure for cosmological parameters -struct CosmologyParameters +namespace cosmology { +//! structure for cosmological parameters +struct parameters +{ + double Omega_m, //!< baryon+dark matter density Omega_b, //!< baryon matter density @@ -12,38 +23,88 @@ struct CosmologyParameters Omega_r, //!< photon + relativistic particle density Omega_k, //!< curvature density H0, //!< Hubble constant in km/s/Mpc + h, //!< hubble parameter nspect, //!< long-wave spectral index (scale free is nspect=1) sigma8, //!< power spectrum normalization + Tcmb, //!< CMB temperature (used to set Omega_r) + Neff, //!< effective number of neutrino species (used to set Omega_r) w_0, //!< dark energy equation of state parameter 1: w = w0 + a * wa w_a, //!< dark energy equation of state parameter 2: w = w0 + a * wa // below are helpers to store additional information - dplus, //!< linear perturbation growth factor - pnorm, //!< actual power spectrum normalisation factor + dplus, //!< linear perturbation growth factor + f, //!< growth factor logarithmic derivative + pnorm, //!< actual power spectrum normalisation factor sqrtpnorm, //!< sqrt of power spectrum normalisation factor - vfact; //!< velocity<->displacement conversion factor in Zel'dovich approx. + vfact; //!< velocity<->displacement conversion factor in Zel'dovich approx. - explicit CosmologyParameters(ConfigFile cf) + parameters() = delete; + + parameters( const parameters& ) = default; + + explicit parameters(config_file cf) { - Omega_b = cf.GetValue("cosmology", "Omega_b"); - Omega_m = cf.GetValue("cosmology", "Omega_m"); - Omega_DE = cf.GetValue("cosmology", "Omega_L"); - w_0 = cf.GetValueSafe("cosmology", "w0", -1.0); - w_a = cf.GetValueSafe("cosmology", "wa", 0.0); + H0 = cf.get_value("cosmology", "H0"); + h = H0 / 100.0; - Omega_r = cf.GetValueSafe("cosmology", "Omega_r", 0.0); // no longer default to nonzero (8.3e-5) + nspect = cf.get_value("cosmology", "nspec"); + + Omega_b = cf.get_value("cosmology", "Omega_b"); + + Omega_m = cf.get_value("cosmology", "Omega_m"); + + Omega_DE = cf.get_value("cosmology", "Omega_L"); + + w_0 = cf.get_value_safe("cosmology", "w0", -1.0); + + w_a = cf.get_value_safe("cosmology", "wa", 0.0); + + Tcmb = cf.get_value_safe("cosmology", "Tcmb", 2.7255); + + Neff = cf.get_value_safe("cosmology", "Neff", 3.046); + + sigma8 = cf.get_value("cosmology", "sigma_8"); + + // calculate energy density in ultrarelativistic species from Tcmb and Neff + double Omega_gamma = 4 * phys_const::sigma_SI / std::pow(phys_const::c_SI, 3) * std::pow(Tcmb, 4.0) / phys_const::rhocrit_h2_SI / (h * h); + double Omega_nu = Neff * Omega_gamma * 7. / 8. * std::pow(4. / 11., 4. / 3.); + Omega_r = Omega_gamma + Omega_nu; + + if (cf.get_value_safe("cosmology", "ZeroRadiation", false)) + { + Omega_r = 0.0; + } +#if 1 + // assume zero curvature, take difference from dark energy + Omega_DE += 1.0 - Omega_m - Omega_DE - Omega_r; + Omega_k = 0.0; +#else + // allow for curvature Omega_k = 1.0 - Omega_m - Omega_DE - Omega_r; - - H0 = cf.GetValue("cosmology", "H0"); - sigma8 = cf.GetValue("cosmology", "sigma_8"); - nspect = cf.GetValue("cosmology", "nspec"); +#endif dplus = 0.0; pnorm = 0.0; vfact = 0.0; + + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "Cosmological parameters are: " << std::endl; + music::ilog << " H0 = " << std::setw(16) << H0 << "sigma_8 = " << std::setw(16) << sigma8 << std::endl; + music::ilog << " Omega_c = " << std::setw(16) << Omega_m-Omega_b << "Omega_b = " << std::setw(16) << Omega_b << std::endl; + if (!cf.get_value_safe("cosmology", "ZeroRadiation", false)){ + music::ilog << " Omega_g = " << std::setw(16) << Omega_gamma << "Omega_nu = " << std::setw(16) << Omega_nu << std::endl; + }else{ + music::ilog << " Omega_r = " << std::setw(16) << Omega_r << std::endl; + } + music::ilog << " Omega_DE = " << std::setw(16) << Omega_DE << "nspect = " << std::setw(16) << nspect << std::endl; + music::ilog << " w0 = " << std::setw(16) << w_0 << "w_a = " << std::setw(16) << w_a << std::endl; + + if( Omega_r > 0.0 ) + { + music::wlog << "Radiation enabled, using Omega_r=" << Omega_r << " internally."<< std::endl; + music::wlog << "Make sure your sim code supports this..." << std::endl; + } } - CosmologyParameters(void) - { - } -}; \ No newline at end of file +}; +} // namespace cosmology \ No newline at end of file diff --git a/include/general.hh b/include/general.hh index 71e521a..88eb2f7 100644 --- a/include/general.hh +++ b/include/general.hh @@ -7,24 +7,49 @@ #if defined(USE_MPI) #include - #include +#include #else - #include +#include #endif -#ifdef USE_SINGLEPRECISION +#include + +#define _unused(x) ((void)(x)) + +// include CMake controlled configuration settings +#include + +#if defined(USE_PRECISION_FLOAT) using real_t = float; using complex_t = fftwf_complex; #define FFTW_PREFIX fftwf -#else +#elif defined(USE_PRECISION_DOUBLE) using real_t = double; using complex_t = fftw_complex; #define FFTW_PREFIX fftw +#elif defined(USE_PRECISION_LONGDOUBLE) +using real_t = long double; +using complex_t = fftwl_complex; +#define FFTW_PREFIX fftwl #endif -enum class fluid_component { density, vx, vy, vz, dx, dy, dz }; -enum class cosmo_species { dm, baryon, neutrino }; -extern std::map cosmo_species_name; +enum class fluid_component +{ + density, + vx, + vy, + vz, + dx, + dy, + dz +}; +enum class cosmo_species +{ + dm, + baryon, + neutrino +}; +extern std::map cosmo_species_name; using ccomplex_t = std::complex; @@ -45,52 +70,64 @@ using fftw_plan_t = FFTW_GEN_NAME(FFTW_PREFIX, plan); #if defined(USE_MPI) inline double get_wtime() { - return MPI_Wtime(); + return MPI_Wtime(); } -inline int MPI_Get_rank( void ){ - int rank, ret; - ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank); - assert( ret==MPI_SUCCESS ); - return rank; -} - -inline int MPI_Get_size( void ){ - int size, ret; - ret = MPI_Comm_size(MPI_COMM_WORLD, &size); - assert( ret==MPI_SUCCESS ); - return size; -} - -template -MPI_Datatype GetMPIDatatype( void ) +namespace MPI { - if( typeid(T) == typeid(std::complex) ) - return MPI_COMPLEX; - - if( typeid(T) == typeid(std::complex) ) - return MPI_DOUBLE_COMPLEX; - if( typeid(T) == typeid(int) ) +inline int get_rank(void) +{ + int rank, ret; + ret = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(ret == MPI_SUCCESS); + _unused(ret); + return rank; +} + +inline int get_size(void) +{ + int size, ret; + ret = MPI_Comm_size(MPI_COMM_WORLD, &size); + assert(ret == MPI_SUCCESS); + _unused(ret); + return size; +} + +template +inline MPI_Datatype get_datatype(void) +{ + if (typeid(T) == typeid(std::complex)) + return MPI_C_FLOAT_COMPLEX; + + if (typeid(T) == typeid(std::complex)) + return MPI_C_DOUBLE_COMPLEX; + + if (typeid(T) == typeid(std::complex)) + return MPI_C_LONG_DOUBLE_COMPLEX; + + if (typeid(T) == typeid(int)) return MPI_INT; - if( typeid(T) == typeid(unsigned) ) + if (typeid(T) == typeid(unsigned)) return MPI_UNSIGNED; - if( typeid(T) == typeid(float) ) + if (typeid(T) == typeid(float)) return MPI_FLOAT; - if( typeid(T) == typeid(double) ) + if (typeid(T) == typeid(double)) return MPI_DOUBLE; - if( typeid(T) == typeid(char) ) + if (typeid(T) == typeid(long double)) + return MPI_LONG_DOUBLE; + + if (typeid(T) == typeid(char)) return MPI_CHAR; abort(); - } -inline std::string GetMPIversion( void ) +inline std::string get_version(void) { int len; char mpi_lib_ver[MPI_MAX_LIBRARY_VERSION_STRING]; @@ -98,33 +135,31 @@ inline std::string GetMPIversion( void ) MPI_Get_library_version(mpi_lib_ver, &len); return std::string(mpi_lib_ver); } - +} // namespace MPI #else - #if defined(_OPENMP) - #include - inline double get_wtime() - { - return omp_get_wtime(); - } - #else - #include - inline double get_wtime() - { - return std::clock() / double(CLOCKS_PER_SEC); - } - #endif +#if defined(_OPENMP) +#include +inline double get_wtime() +{ + return omp_get_wtime(); +} +#else +#include +inline double get_wtime() +{ + return std::clock() / double(CLOCKS_PER_SEC); +} +#endif #endif -inline void multitask_sync_barrier( void ) +inline void multitask_sync_barrier(void) { #if defined(USE_MPI) - MPI_Barrier( MPI_COMM_WORLD ); + MPI_Barrier(MPI_COMM_WORLD); #endif } - - namespace CONFIG { extern int MPI_thread_support; @@ -134,14 +169,4 @@ extern bool MPI_ok; extern bool MPI_threads_ok; extern bool FFTW_threads_ok; extern int num_threads; -} // namespace CONFIG - - -// These variables are autogenerated and compiled -// into the library by the version.cmake script -extern "C" -{ - extern const char* GIT_TAG; - extern const char* GIT_REV; - extern const char* GIT_BRANCH; -} \ No newline at end of file +} // namespace CONFIG \ No newline at end of file diff --git a/include/grid_fft.hh b/include/grid_fft.hh index dcb3cb4..e07e5ed 100644 --- a/include/grid_fft.hh +++ b/include/grid_fft.hh @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include #include @@ -16,22 +16,26 @@ enum space_t }; -template +#ifdef USE_MPI +template +#else +template +#endif class Grid_FFT { +public: + using data_t = data_t_; + static constexpr bool is_distributed_trait{bdistributed}; + protected: -#if defined(USE_MPI) - const MPI_Datatype MPI_data_t_type = (typeid(data_t) == typeid(double)) ? MPI_DOUBLE - : (typeid(data_t) == typeid(float)) ? MPI_FLOAT - : (typeid(data_t) == typeid(std::complex)) ? MPI_COMPLEX - : (typeid(data_t) == typeid(std::complex)) ? MPI_DOUBLE_COMPLEX : MPI_INT; -#endif + using grid_fft_t = Grid_FFT; + public: std::array n_, nhalf_; std::array sizes_; size_t npr_, npc_; size_t ntot_; - std::array length_, kfac_, dx_; + std::array length_, kfac_, kny_, dx_; space_t space_; data_t *data_; @@ -54,7 +58,7 @@ public: } // avoid implicit copying of data - Grid_FFT(const Grid_FFT &g) = delete; + Grid_FFT(const grid_fft_t &g) = delete; ~Grid_FFT() { @@ -64,34 +68,48 @@ public: } } - const Grid_FFT *get_grid(size_t ilevel) const { return this; } + const grid_fft_t *get_grid(size_t ilevel) const { return this; } + + bool is_distributed( void ) const noexcept { return bdistributed; } void Setup(); + //! return the number of data_t elements that we store in the container + size_t memsize( void ) const noexcept { return ntot_; } + //! return the (local) size of dimension i - size_t size(size_t i) const { return sizes_[i]; } + size_t size(size_t i) const noexcept { assert(i<4); return sizes_[i]; } //! return the (global) size of dimension i - size_t global_size(size_t i) const { return n_[i]; } + size_t global_size(size_t i) const noexcept { assert(i<3); return n_[i]; } //! return locally stored number of elements of field - size_t local_size(void) const { return local_0_size_ * n_[1] * n_[2]; } + size_t local_size(void) const noexcept { return local_0_size_ * n_[1] * n_[2]; } //! return a bounding box of the global extent of the field - const bounding_box &get_global_range(void) const + const bounding_box &get_global_range(void) const noexcept { return global_range_; } + bool is_nyquist_mode( size_t i, size_t j, size_t k ) const + { + assert( this->space_ == kspace_id ); + bool bres = (i+local_1_start_ == n_[1]/2); + bres |= (j == n_[0]/2); + bres |= (k == n_[2]/2); + return bres; + } + //! set all field elements to zero - void zero() + void zero() noexcept { #pragma omp parallel for for (size_t i = 0; i < ntot_; ++i) data_[i] = 0.0; } - void copy_from(const Grid_FFT &g) + void copy_from(const grid_fft_t &g) { // make sure the two fields are in the same space if (g.space_ != this->space_) @@ -113,49 +131,49 @@ public: data_[i] = g.data_[i]; } - data_t &operator[](size_t i) + data_t &operator[](size_t i) noexcept { return data_[i]; } - data_t &relem(size_t i, size_t j, size_t k) + data_t &relem(size_t i, size_t j, size_t k) noexcept { size_t idx = (i * sizes_[1] + j) * sizes_[3] + k; return data_[idx]; } - const data_t &relem(size_t i, size_t j, size_t k) const + const data_t &relem(size_t i, size_t j, size_t k) const noexcept { size_t idx = (i * sizes_[1] + j) * sizes_[3] + k; return data_[idx]; } - ccomplex_t &kelem(size_t i, size_t j, size_t k) + ccomplex_t &kelem(size_t i, size_t j, size_t k) noexcept { size_t idx = (i * sizes_[1] + j) * sizes_[3] + k; return cdata_[idx]; } - const ccomplex_t &kelem(size_t i, size_t j, size_t k) const + const ccomplex_t &kelem(size_t i, size_t j, size_t k) const noexcept { size_t idx = (i * sizes_[1] + j) * sizes_[3] + k; return cdata_[idx]; } - ccomplex_t &kelem(size_t idx) { return cdata_[idx]; } - const ccomplex_t &kelem(size_t idx) const { return cdata_[idx]; } - data_t &relem(size_t idx) { return data_[idx]; } - const data_t &relem(size_t idx) const { return data_[idx]; } + ccomplex_t &kelem(size_t idx) noexcept { return cdata_[idx]; } + const ccomplex_t &kelem(size_t idx) const noexcept { return cdata_[idx]; } + data_t &relem(size_t idx) noexcept { return data_[idx]; } + const data_t &relem(size_t idx) const noexcept { return data_[idx]; } - size_t get_idx(size_t i, size_t j, size_t k) const + size_t get_idx(size_t i, size_t j, size_t k) const noexcept { return (i * sizes_[1] + j) * sizes_[3] + k; } template - vec3 get_r(const size_t i, const size_t j, const size_t k) const + vec3_t get_r(const size_t i, const size_t j, const size_t k) const noexcept { - vec3 rr; + vec3_t rr; rr[0] = real_t(i + local_0_start_) * dx_[0]; rr[1] = real_t(j) * dx_[1]; @@ -165,9 +183,9 @@ public: } template - vec3 get_unit_r(const size_t i, const size_t j, const size_t k) const + vec3_t get_unit_r(const size_t i, const size_t j, const size_t k) const noexcept { - vec3 rr; + vec3_t rr; rr[0] = real_t(i + local_0_start_) / real_t(n_[0]); rr[1] = real_t(j) / real_t(n_[1]); @@ -177,91 +195,155 @@ public: } template - vec3 get_unit_r_staggered(const size_t i, const size_t j, const size_t k) const + vec3_t get_unit_r_shifted(const size_t i, const size_t j, const size_t k, const vec3_t s) const noexcept { - vec3 rr; + vec3_t rr; - rr[0] = (real_t(i + local_0_start_) + 0.5) / real_t(n_[0]); - rr[1] = (real_t(j) + 0.5) / real_t(n_[1]); - rr[2] = (real_t(k) + 0.5) / real_t(n_[2]); + rr[0] = (real_t(i + local_0_start_) + s.x) / real_t(n_[0]); + rr[1] = (real_t(j) + s.y) / real_t(n_[1]); + rr[2] = (real_t(k) + s.z) / real_t(n_[2]); return rr; } - template - vec3 get_unit_r_shifted(const size_t i, const size_t j, const size_t k, double sx, double sy, double sz) const + vec3_t get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const noexcept { - vec3 rr; - - rr[0] = (real_t(i + local_0_start_) + sx) / real_t(n_[0]); - rr[1] = (real_t(j) + sy) / real_t(n_[1]); - rr[2] = (real_t(k) + sz) / real_t(n_[2]); - - return rr; + return vec3_t({i + local_0_start_, j, k}); } - void cell_pos(int ilevel, size_t i, size_t j, size_t k, double *x) const - { - x[0] = double(i + local_0_start_) / size(0); - x[1] = double(j) / size(1); - x[2] = double(k) / size(2); - } - - vec3 get_cell_idx_3d(const size_t i, const size_t j, const size_t k) const - { - return vec3({i + local_0_start_, j, k}); - } - - size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const + size_t get_cell_idx_1d(const size_t i, const size_t j, const size_t k) const noexcept { return ((i + local_0_start_) * size(1) + j) * size(2) + k; } - size_t count_leaf_cells(int, int) const + //! deprecated function, was needed for old output plugin + size_t count_leaf_cells(int, int) const noexcept { return n_[0] * n_[1] * n_[2]; } - real_t get_dx(int idim) const + real_t get_dx(int idim) const noexcept { + assert(idim<3&&idim>=0); return dx_[idim]; } - const std::array &get_dx(void) const + const std::array &get_dx(void) const noexcept { return dx_; } template - vec3 get_k(const size_t i, const size_t j, const size_t k) const + vec3_t get_k(const size_t i, const size_t j, const size_t k) const noexcept { - vec3 kk; - -#if defined(USE_MPI) - auto ip = i + local_1_start_; - kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0]; - kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1]; -#else - kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0]; - kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1]; -#endif + vec3_t kk; + if( bdistributed ){ + auto ip = i + local_1_start_; + kk[0] = (real_t(j) - real_t(j > nhalf_[0]) * n_[0]) * kfac_[0]; + kk[1] = (real_t(ip) - real_t(ip > nhalf_[1]) * n_[1]) * kfac_[1]; + }else{ + kk[0] = (real_t(i) - real_t(i > nhalf_[0]) * n_[0]) * kfac_[0]; + kk[1] = (real_t(j) - real_t(j > nhalf_[1]) * n_[1]) * kfac_[1]; + } kk[2] = (real_t(k) - real_t(k > nhalf_[2]) * n_[2]) * kfac_[2]; return kk; } + template + vec3_t get_k(const real_t i, const real_t j, const real_t k) const noexcept + { + vec3_t kk; + if( bdistributed ){ + auto ip = i + real_t(local_1_start_); + kk[0] = (j - real_t(j > real_t(nhalf_[0])) * n_[0]) * kfac_[0]; + kk[1] = (ip - real_t(ip > real_t(nhalf_[1])) * n_[1]) * kfac_[1]; + }else{ + kk[0] = (real_t(i) - real_t(i > real_t(nhalf_[0])) * n_[0]) * kfac_[0]; + kk[1] = (real_t(j) - real_t(j > real_t(nhalf_[1])) * n_[1]) * kfac_[1]; + } + kk[2] = (real_t(k) - real_t(k > real_t(nhalf_[2])) * n_[2]) * kfac_[2]; + + return kk; + } + + std::array get_k3(const size_t i, const size_t j, const size_t k) const noexcept + { + return bdistributed? std::array({j,i+local_1_start_,k}) : std::array({i,j,k}); + } + + data_t get_cic( const vec3_t& v ) const noexcept + { + // warning! this doesn't work with MPI + vec3_t x({std::fmod(v.x/length_[0]+1.0,1.0)*n_[0], + std::fmod(v.y/length_[1]+1.0,1.0)*n_[1], + std::fmod(v.z/length_[2]+1.0,1.0)*n_[2] }); + size_t ix = static_cast(x.x); + size_t iy = static_cast(x.y); + size_t iz = static_cast(x.z); + real_t dx = x.x-real_t(ix), tx = 1.0-dx; + real_t dy = x.y-real_t(iy), ty = 1.0-dy; + real_t dz = x.z-real_t(iz), tz = 1.0-dz; + size_t ix1 = (ix+1)%n_[0]; + size_t iy1 = (iy+1)%n_[1]; + size_t iz1 = (iz+1)%n_[2]; + data_t val = 0.0; + val += this->relem(ix ,iy ,iz ) * tx * ty * tz; + val += this->relem(ix ,iy ,iz1) * tx * ty * dz; + val += this->relem(ix ,iy1,iz ) * tx * dy * tz; + val += this->relem(ix ,iy1,iz1) * tx * dy * dz; + val += this->relem(ix1,iy ,iz ) * dx * ty * tz; + val += this->relem(ix1,iy ,iz1) * dx * ty * dz; + val += this->relem(ix1,iy1,iz ) * dx * dy * tz; + val += this->relem(ix1,iy1,iz1) * dx * dy * dz; + return val; + } + + ccomplex_t get_cic_kspace( const vec3_t x ) const noexcept + { + // warning! this doesn't work with MPI + int ix = static_cast(std::floor(x.x)); + int iy = static_cast(std::floor(x.y)); + int iz = static_cast(std::floor(x.z)); + real_t dx = x.x-real_t(ix), tx = 1.0-dx; + real_t dy = x.y-real_t(iy), ty = 1.0-dy; + real_t dz = x.z-real_t(iz), tz = 1.0-dz; + size_t ix1 = (ix+1)%size(0); + size_t iy1 = (iy+1)%size(1); + size_t iz1 = std::min((iz+1),int(size(2))-1); + ccomplex_t val = 0.0; + val += this->kelem(ix ,iy ,iz ) * tx * ty * tz; + val += this->kelem(ix ,iy ,iz1) * tx * ty * dz; + val += this->kelem(ix ,iy1,iz ) * tx * dy * tz; + val += this->kelem(ix ,iy1,iz1) * tx * dy * dz; + val += this->kelem(ix1,iy ,iz ) * dx * ty * tz; + val += this->kelem(ix1,iy ,iz1) * dx * ty * dz; + val += this->kelem(ix1,iy1,iz ) * dx * dy * tz; + val += this->kelem(ix1,iy1,iz1) * dx * dy * dz; + // if( val != val ){ + //auto k = this->get_k(ix,iy,iz); + //std::cerr << ix << " " << iy << " " << iz << " " << val << " " << this->gradient(0,{ix,iy,iz}) << " " << this->gradient(1,{ix,iy,iz}) << " " << this->gradient(2,{ix,iy,iz}) << std::endl; + // } + return val; + } + inline ccomplex_t gradient( const int idim, std::array ijk ) const { -#if defined(USE_MPI) - ijk[0] += local_1_start_; - std::swap(ijk[0],ijk[1]); -#endif + if( bdistributed ){ + ijk[0] += local_1_start_; + std::swap(ijk[0],ijk[1]); + } real_t rgrad = (ijk[idim]!=nhalf_[idim])? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_[idim]) * n_[idim]) * kfac_[idim] : 0.0; return ccomplex_t(0.0,rgrad); } - Grid_FFT &operator*=(data_t x) + inline real_t laplacian( const std::array& ijk ) const noexcept + { + return -this->get_k(ijk[0],ijk[1],ijk[2]).norm_squared(); + } + + grid_fft_t &operator*=(data_t x) { if (space_ == kspace_id) { @@ -274,7 +356,7 @@ public: return *this; } - Grid_FFT &operator/=(data_t x) + grid_fft_t &operator/=(data_t x) { if (space_ == kspace_id) { @@ -287,7 +369,7 @@ public: return *this; } - Grid_FFT &apply_Laplacian(void) + grid_fft_t &apply_Laplacian(void) { this->FourierTransformForward(); this->apply_function_k_dep([&](auto x, auto k) { @@ -298,7 +380,7 @@ public: return *this; } - Grid_FFT &apply_negative_Laplacian(void) + grid_fft_t &apply_negative_Laplacian(void) { this->FourierTransformForward(); this->apply_function_k_dep([&](auto x, auto k) { @@ -309,7 +391,7 @@ public: return *this; } - Grid_FFT &apply_InverseLaplacian(void) + grid_fft_t &apply_InverseLaplacian(void) { this->FourierTransformForward(); this->apply_function_k_dep([&](auto x, auto k) { @@ -354,11 +436,10 @@ public: } } - double compute_2norm(void) + real_t compute_2norm(void) const { real_t sum1{0.0}; -#pragma omp parallel for reduction(+ \ - : sum1) + #pragma omp parallel for reduction(+ : sum1) for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -377,60 +458,60 @@ public: return sum1; } - double std(void) + real_t std(void) const { double sum1{0.0}, sum2{0.0}; size_t count{0}; -#pragma omp parallel for reduction(+ \ - : sum1, sum2) + #pragma omp parallel for reduction(+ : sum1, sum2) for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) { for (size_t k = 0; k < sizes_[2]; ++k) { - const auto elem = std::real(this->relem(i, j, k)); - sum1 += elem; - sum2 += elem * elem; + const auto elem = (space_==kspace_id)? this->kelem(i, j, k) : this->relem(i, j, k); + sum1 += std::real(elem); + sum2 += std::norm(elem);// * elem; } } } count = sizes_[0] * sizes_[1] * sizes_[2]; #ifdef USE_MPI - double globsum1{0.0}, globsum2{0.0}; - size_t globcount{0}; + if( bdistributed ){ + double globsum1{0.0}, globsum2{0.0}; + size_t globcount{0}; - MPI_Allreduce(reinterpret_cast(&sum1), - reinterpret_cast(&globsum1), - 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&sum1), + reinterpret_cast(&globsum1), + 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(reinterpret_cast(&sum2), - reinterpret_cast(&globsum2), - 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&sum2), + reinterpret_cast(&globsum2), + 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(reinterpret_cast(&count), - reinterpret_cast(&globcount), - 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&count), + reinterpret_cast(&globcount), + 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); - sum1 = globsum1; - sum2 = globsum2; - count = globcount; + sum1 = globsum1; + sum2 = globsum2; + count = globcount; + } #endif sum1 /= count; sum2 /= count; - return std::sqrt(sum2 - sum1 * sum1); + return real_t(std::sqrt(sum2 - sum1 * sum1)); } - double mean(void) + real_t mean(void) const { double sum1{0.0}; size_t count{0}; -#pragma omp parallel for reduction(+ \ - : sum1) + #pragma omp parallel for reduction(+ : sum1) for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -445,32 +526,34 @@ public: count = sizes_[0] * sizes_[1] * sizes_[2]; #ifdef USE_MPI - double globsum1{0.0}; - size_t globcount{0}; + if( bdistributed ){ + double globsum1{0.0}; + size_t globcount{0}; - MPI_Allreduce(reinterpret_cast(&sum1), - reinterpret_cast(&globsum1), - 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&sum1), + reinterpret_cast(&globsum1), + 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(reinterpret_cast(&count), - reinterpret_cast(&globcount), - 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(reinterpret_cast(&count), + reinterpret_cast(&globcount), + 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); - sum1 = globsum1; - count = globcount; + sum1 = globsum1; + count = globcount; + } #endif sum1 /= count; - return sum1; + return real_t(sum1); } template void assign_function_of_grids_r(const functional &f, const grid_t &g) { - assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) ); + assert(g.size(0) == size(0) && g.size(1) == size(1)); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -489,10 +572,10 @@ public: template void assign_function_of_grids_r(const functional &f, const grid1_t &g1, const grid2_t &g2) { - assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g1.size(2) == size(2)); - assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2)); + assert(g1.size(0) == size(0) && g1.size(1) == size(1)); + assert(g2.size(0) == size(0) && g2.size(1) == size(1)); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -518,7 +601,7 @@ public: assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g2.size(2) == size(2)); assert(g3.size(0) == size(0) && g3.size(1) == size(1)); // && g3.size(2) == size(2)); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -543,7 +626,7 @@ public: { assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) ); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -565,7 +648,7 @@ public: assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) ); assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) ); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -582,18 +665,39 @@ public: } } - template - void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2) + template + void assign_function_of_grids_kdep(const functional &f, const grid_t &g) { - assert(g1.size(0) == size(0) && g1.size(1) == size(1)); // && g.size(2) == size(2) ); - assert(g2.size(0) == size(0) && g2.size(1) == size(1)); // && g.size(2) == size(2) ); + assert(g.size(0) == size(0) && g.size(1) == size(1)); // && g.size(2) == size(2) ); -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) { for (size_t k = 0; k < sizes_[2]; ++k) + { + auto &elem = this->kelem(i, j, k); + const auto &elemg = g.kelem(i, j, k); + + elem = f(this->get_k(i, j, k), elemg); + } + } + } + } + + template + void assign_function_of_grids_kdep(const functional &f, const grid1_t &g1, const grid2_t &g2) + { + assert(g1.size(0) == size(0) && g1.size(1) == size(1) && g1.size(2) == size(2) ); + assert(g2.size(0) == size(0) && g2.size(1) == size(1) && g2.size(2) == size(2) ); + + #pragma omp parallel for + for (size_t i = 0; i < size(0); ++i) + { + for (size_t j = 0; j < size(1); ++j) + { + for (size_t k = 0; k < size(2); ++k) { auto &elem = this->kelem(i, j, k); const auto &elemg1 = g1.kelem(i, j, k); @@ -608,7 +712,7 @@ public: template void apply_function_k_dep(const functional &f) { -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -625,7 +729,7 @@ public: template void apply_function_r_dep(const functional &f) { -#pragma omp parallel for + #pragma omp parallel for for (size_t i = 0; i < sizes_[0]; ++i) { for (size_t j = 0; j < sizes_[1]; ++j) @@ -649,48 +753,31 @@ public: void Write_to_HDF5(std::string fname, std::string datasetname) const; + void Read_from_HDF5( std::string fname, std::string datasetname ); + void Write_PowerSpectrum(std::string ofname); void Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count); void Write_PDF(std::string ofname, int nbins = 1000, double scale = 1.0, double rhomin = 1e-3, double rhomax = 1e3); - // void stagger_field(void) - // { - // FourierTransformForward(); - // apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { - // real_t shift = k[0] * get_dx()[0] + k[1] * get_dx()[1] + k[2] * get_dx()[2]; - // return x * std::exp(ccomplex_t(0.0, 0.5 * shift)); - // }); - // FourierTransformBackward(); - // } - - void shift_field( double sx, double sy, double sz ) + void shift_field( const vec3_t& s, bool transform_back=true ) { FourierTransformForward(); apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { -#ifdef WITH_MPI - real_t shift = sy * k[0] * get_dx()[0] + sx * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2]; -#else - real_t shift = sx * k[0] * get_dx()[0] + sy * k[1] * get_dx()[1] + sz * k[2] * get_dx()[2]; -#endif + real_t shift = s.x * k[0] * get_dx()[0] + s.y * k[1] * get_dx()[1] + s.z * k[2] * get_dx()[2]; return x * std::exp(ccomplex_t(0.0, shift)); }); - FourierTransformBackward(); - } - - void stagger_field(void) - { - this->shift_field( 0.5, 0.5, 0.5 ); + if( transform_back ){ + FourierTransformBackward(); + } } void zero_DC_mode(void) { if (space_ == kspace_id) { -#ifdef USE_MPI - if (CONFIG::MPI_task_rank == 0) -#endif + if (CONFIG::MPI_task_rank == 0 || !bdistributed ) cdata_[0] = (data_t)0.0; } else @@ -707,12 +794,14 @@ public: } } } + if( bdistributed ){ #if defined(USE_MPI) - data_t glob_sum = 0.0; - MPI_Allreduce(reinterpret_cast(&sum), reinterpret_cast(&glob_sum), - 1, GetMPIDatatype(), MPI_SUM, MPI_COMM_WORLD); - sum = glob_sum; + data_t glob_sum = 0.0; + MPI_Allreduce(reinterpret_cast(&sum), reinterpret_cast(&glob_sum), + 1, MPI::get_datatype(), MPI_SUM, MPI_COMM_WORLD); + sum = glob_sum; #endif + } sum /= sizes_[0] * sizes_[1] * sizes_[2]; #pragma omp parallel for diff --git a/include/grid_interpolate.hh b/include/grid_interpolate.hh new file mode 100644 index 0000000..5304fab --- /dev/null +++ b/include/grid_interpolate.hh @@ -0,0 +1,191 @@ +#pragma once + +#include +#include + +#include + +#include + +template +struct grid_interpolate +{ + using data_t = typename grid_t::data_t; + using vec3 = std::array; + + static constexpr bool is_distributed_trait = grid_t::is_distributed_trait; + static constexpr int interpolation_order = interp_order; + + std::vector boundary_; + std::vector local0starts_; + const grid_t &gridref; + size_t nx_, ny_, nz_; + + explicit grid_interpolate(const grid_t &g) + : gridref(g), nx_(g.n_[0]), ny_(g.n_[1]), nz_(g.n_[2]) + { + static_assert(interpolation_order >= 0 && interpolation_order <= 2, "Interpolation order needs to be 0 (NGP), 1 (CIC), or 2 (TSC)."); + + if (is_distributed_trait) + { + update_ghosts( g ); + } + } + + void update_ghosts( const grid_t &g ) + { + #if defined(USE_MPI) + + int local_0_start = int(gridref.local_0_start_); + local0starts_.assign(MPI::get_size(), 0); + + MPI_Allgather(&local_0_start, 1, MPI_INT, &local0starts_[0], 1, MPI_INT, MPI_COMM_WORLD); + + //... exchange boundary + size_t nx = interpolation_order + 1; + size_t ny = g.n_[1]; + size_t nz = g.n_[2]; + + boundary_.assign(nx * ny * nz, data_t{0.0}); + + for (size_t i = 0; i < nx; ++i) + { + for (size_t j = 0; j < ny; ++j) + { + for (size_t k = 0; k < nz; ++k) + { + boundary_[(i * ny + j) * nz + k] = g.relem(i, j, k); + } + } + } + + int sendto = (MPI::get_rank() + MPI::get_size() - 1) % MPI::get_size(); + int recvfrom = (MPI::get_rank() + MPI::get_size() + 1) % MPI::get_size(); + + MPI_Status status; + status.MPI_ERROR = MPI_SUCCESS; + + int err = MPI_Sendrecv_replace(&boundary_[0], nx * ny * nz, MPI::get_datatype(), sendto, + MPI::get_rank() + 1000, recvfrom, recvfrom + 1000, MPI_COMM_WORLD, &status); + + if( err != MPI_SUCCESS ){ + char errstr[256]; int errlen=256; + MPI_Error_string(err, errstr, &errlen ); + music::elog << "MPI_ERROR #" << err << " : " << errstr << std::endl; + } +#endif + } + + data_t get_ngp_at(const std::array &pos, std::vector &val) const noexcept + { + size_t ix = static_cast(pos[0]); + size_t iy = static_cast(pos[1]); + size_t iz = static_cast(pos[2]); + return gridref.relem(ix - gridref.local_0_start_, iy, iz); + } + + data_t get_cic_at(const std::array &pos) const noexcept + { + size_t ix = static_cast(pos[0]); + size_t iy = static_cast(pos[1]); + size_t iz = static_cast(pos[2]); + real_t dx = pos[0] - real_t(ix), tx = 1.0 - dx; + real_t dy = pos[1] - real_t(iy), ty = 1.0 - dy; + real_t dz = pos[2] - real_t(iz), tz = 1.0 - dz; + size_t iy1 = (iy + 1) % ny_; + size_t iz1 = (iz + 1) % nz_; + + data_t val{0.0}; + + if( is_distributed_trait ){ + ptrdiff_t localix = ix-gridref.local_0_start_; + val += gridref.relem(localix, iy, iz) * tx * ty * tz; + val += gridref.relem(localix, iy, iz1) * tx * ty * dz; + val += gridref.relem(localix, iy1, iz) * tx * dy * tz; + val += gridref.relem(localix, iy1, iz1) * tx * dy * dz; + + if( localix+1 >= gridref.local_0_size_ ){ + size_t localix1 = localix+1 - gridref.local_0_size_; + val += boundary_[(localix1*ny_+iy)*nz_+iz] * dx * ty * tz; + val += boundary_[(localix1*ny_+iy)*nz_+iz1] * dx * ty * dz; + val += boundary_[(localix1*ny_+iy1)*nz_+iz] * dx * dy * tz; + val += boundary_[(localix1*ny_+iy1)*nz_+iz1] * dx * dy * dz; + }else{ + size_t localix1 = localix+1; + val += gridref.relem(localix1, iy, iz) * dx * ty * tz; + val += gridref.relem(localix1, iy, iz1) * dx * ty * dz; + val += gridref.relem(localix1, iy1, iz) * dx * dy * tz; + val += gridref.relem(localix1, iy1, iz1) * dx * dy * dz; + } + }else{ + size_t ix1 = (ix + 1) % nx_; + val += gridref.relem(ix, iy, iz) * tx * ty * tz; + val += gridref.relem(ix, iy, iz1) * tx * ty * dz; + val += gridref.relem(ix, iy1, iz) * tx * dy * tz; + val += gridref.relem(ix, iy1, iz1) * tx * dy * dz; + val += gridref.relem(ix1, iy, iz) * dx * ty * tz; + val += gridref.relem(ix1, iy, iz1) * dx * ty * dz; + val += gridref.relem(ix1, iy1, iz) * dx * dy * tz; + val += gridref.relem(ix1, iy1, iz1) * dx * dy * dz; + } + return val; + } + + // data_t get_tsc_at(const std::array &pos, std::vector &val) const + // { + // } + + int get_task(const vec3 &x) const noexcept + { + const auto it = std::upper_bound(local0starts_.begin(), local0starts_.end(), int(x[0])); + return std::distance(local0starts_.begin(), it)-1; + } + + void domain_decompose_pos(std::vector &pos) const noexcept + { + if (is_distributed_trait) + { +#if defined(USE_MPI) + std::sort(pos.begin(), pos.end(), [&](auto x1, auto x2) { return get_task(x1) < get_task(x2); }); + std::vector sendcounts(MPI::get_size(), 0), sendoffsets(MPI::get_size(), 0); + std::vector recvcounts(MPI::get_size(), 0), recvoffsets(MPI::get_size(), 0); + for (auto x : pos) + { + sendcounts[get_task(x)] += 3; + } + + MPI_Alltoall(&sendcounts[0], 1, MPI_INT, &recvcounts[0], 1, MPI_INT, MPI_COMM_WORLD); + + size_t tot_receive = recvcounts[0], tot_send = sendcounts[0]; + for (int i = 1; i < MPI::get_size(); ++i) + { + sendoffsets[i] = sendcounts[i - 1] + sendoffsets[i - 1]; + recvoffsets[i] = recvcounts[i - 1] + recvoffsets[i - 1]; + tot_receive += recvcounts[i]; + tot_send += sendcounts[i]; + } + + std::vector recvbuf(tot_receive/3,{0.,0.,0.}); + + MPI_Alltoallv(&pos[0], &sendcounts[0], &sendoffsets[0], MPI::get_datatype(), + &recvbuf[0], &recvcounts[0], &recvoffsets[0], MPI::get_datatype(), MPI_COMM_WORLD); + + pos.swap( recvbuf ); +#endif + } + } + + ccomplex_t compensation_kernel( const vec3_t& k ) const noexcept + { + auto sinc = []( real_t x ){ return (std::abs(x)>1e-10)? std::sin(x)/x : 1.0; }; + real_t dfx = sinc(0.5*M_PI*k[0]/gridref.kny_[0]); + real_t dfy = sinc(0.5*M_PI*k[1]/gridref.kny_[1]); + real_t dfz = sinc(0.5*M_PI*k[2]/gridref.kny_[2]); + real_t del = std::pow(dfx*dfy*dfz,1+interpolation_order); + + real_t shift = 0.5 * k[0] * gridref.get_dx()[0] + 0.5 * k[1] * gridref.get_dx()[1] + 0.5 * k[2] * gridref.get_dx()[2]; + + return std::exp(ccomplex_t(0.0, shift)) / del; + } + +}; \ No newline at end of file diff --git a/include/ic_generator.hh b/include/ic_generator.hh index 59471b0..3a637e8 100644 --- a/include/ic_generator.hh +++ b/include/ic_generator.hh @@ -9,12 +9,12 @@ namespace ic_generator{ - int Run( ConfigFile& the_config ); + int Run( config_file& the_config ); - int Initialise( ConfigFile& the_config ); + int Initialise( config_file& the_config ); extern std::unique_ptr the_random_number_generator; extern std::unique_ptr the_output_plugin; - extern std::unique_ptr the_cosmo_calc; + extern std::unique_ptr the_cosmo_calc; } diff --git a/include/logger.hh b/include/logger.hh index 41fc287..e13012f 100644 --- a/include/logger.hh +++ b/include/logger.hh @@ -6,35 +6,35 @@ #include #include -namespace csoca { +namespace music { -enum LogLevel : int { - Off = 0, - Fatal = 1, - Error = 2, - Warning = 3, - Info = 4, - Debug = 5 +enum log_level : int { + off = 0, + fatal = 1, + error = 2, + warning = 3, + info = 4, + debug = 5 }; -class Logger { +class logger { private: - static LogLevel log_level_; + static log_level log_level_; static std::ofstream output_file_; public: - Logger() = default; - ~Logger() = default; + logger() = default; + ~logger() = default; - static void SetLevel(const LogLevel &level); - static LogLevel GetLevel(); + static void set_level(const log_level &level); + static log_level get_level(); - static void SetOutput(const std::string filename); - static void UnsetOutput(); + static void set_output(const std::string filename); + static void unset_output(); - static std::ofstream &GetOutput(); + static std::ofstream &get_output(); - template Logger &operator<<(const T &item) { + template logger &operator<<(const T &item) { std::cout << item; if (output_file_.is_open()) { output_file_ << item; @@ -42,7 +42,7 @@ public: return *this; } - Logger &operator<<(std::ostream &(*fp)(std::ostream &)) { + logger &operator<<(std::ostream &(*fp)(std::ostream &)) { std::cout << fp; if (output_file_.is_open()) { output_file_ << fp; @@ -51,32 +51,32 @@ public: } }; -class LogStream { +class log_stream { private: - Logger &logger_; - LogLevel stream_level_; + logger &logger_; + log_level stream_level_; std::string line_prefix_, line_postfix_; bool newline; public: - LogStream(Logger &logger, const LogLevel &level) + log_stream(logger &logger, const log_level &level) : logger_(logger), stream_level_(level), newline(true) { switch (stream_level_) { - case LogLevel::Fatal: + case log_level::fatal: line_prefix_ = "\033[31mFatal : "; break; - case LogLevel::Error: + case log_level::error: line_prefix_ = "\033[31mError : "; break; - case LogLevel::Warning: + case log_level::warning: line_prefix_ = "\033[33mWarning : "; break; - case LogLevel::Info: + case log_level::info: //line_prefix_ = " | Info | "; line_prefix_ = " \033[0m"; break; - case LogLevel::Debug: + case log_level::debug: line_prefix_ = "Debug : \033[0m"; break; default: @@ -85,14 +85,14 @@ public: } line_postfix_ = "\033[0m"; } - ~LogStream() = default; + ~log_stream() = default; inline std::string GetPrefix() const { return line_prefix_; } - template LogStream &operator<<(const T &item) { - if (Logger::GetLevel() >= stream_level_) { + template log_stream &operator<<(const T &item) { + if (logger::get_level() >= stream_level_) { if (newline) { logger_ << line_prefix_; newline = false; @@ -102,8 +102,8 @@ public: return *this; } - LogStream &operator<<(std::ostream &(*fp)(std::ostream &)) { - if (Logger::GetLevel() >= stream_level_) { + log_stream &operator<<(std::ostream &(*fp)(std::ostream &)) { + if (logger::get_level() >= stream_level_) { logger_ << fp; logger_ << line_postfix_; newline = true; @@ -125,11 +125,11 @@ public: }; // global instantiations for different levels -extern Logger glogger; -extern LogStream flog; -extern LogStream elog; -extern LogStream wlog; -extern LogStream ilog; -extern LogStream dlog; +extern logger glogger; +extern log_stream flog; +extern log_stream elog; +extern log_stream wlog; +extern log_stream ilog; +extern log_stream dlog; -} // namespace csoca +} // namespace music diff --git a/include/math/interpolate.hh b/include/math/interpolate.hh new file mode 100644 index 0000000..41fe8d4 --- /dev/null +++ b/include/math/interpolate.hh @@ -0,0 +1,68 @@ +#pragma once + +#include +#include +#include +#include + +template +class interpolated_function_1d +{ + +private: + bool isinit_; + std::vector data_x_, data_y_; + gsl_interp_accel *gsl_ia_; + gsl_spline *gsl_sp_; + + void deallocate() + { + gsl_spline_free(gsl_sp_); + gsl_interp_accel_free(gsl_ia_); + } + +public: + interpolated_function_1d(const interpolated_function_1d &) = delete; + + interpolated_function_1d() : isinit_(false){} + + interpolated_function_1d(const std::vector &data_x, const std::vector &data_y) + : isinit_(false) + { + this->set_data( data_x, data_y ); + } + + ~interpolated_function_1d() + { + if (isinit_) this->deallocate(); + } + + void set_data(const std::vector &data_x, const std::vector &data_y) + { + data_x_ = data_x; + data_y_ = data_y; + + assert(data_x_.size() == data_y_.size()); + assert(data_x_.size() > 5); + assert(!(logx & periodic)); + + if (logx) for (auto &d : data_x_) d = std::log(d); + if (logy) for (auto &d : data_y_) d = std::log(d); + + if (isinit_) this->deallocate(); + + gsl_ia_ = gsl_interp_accel_alloc(); + gsl_sp_ = gsl_spline_alloc(periodic ? gsl_interp_cspline_periodic : gsl_interp_cspline, data_x_.size()); + gsl_spline_init(gsl_sp_, &data_x_[0], &data_y_[0], data_x_.size()); + + isinit_ = true; + } + + double operator()(double x) const noexcept + { + assert( isinit_ && !(logx&&x<=0.0) ); + double xa = logx ? std::log(x) : x; + double y(gsl_spline_eval(gsl_sp_, xa, gsl_ia_)); + return logy ? std::exp(y) : y; + } +}; \ No newline at end of file diff --git a/include/math/mat3.hh b/include/math/mat3.hh new file mode 100644 index 0000000..75458ea --- /dev/null +++ b/include/math/mat3.hh @@ -0,0 +1,146 @@ +#include +#include + +#include + +template +class mat3_t{ +protected: + std::array data_; + gsl_matrix_view m_; + gsl_vector *eval_; + gsl_matrix *evec_; + gsl_eigen_symmv_workspace * wsp_; + bool bdid_alloc_gsl_; + + void init_gsl(){ + // allocate memory for GSL operations if we haven't done so yet + if( !bdid_alloc_gsl_ ) + { + m_ = gsl_matrix_view_array (&data_[0], 3, 3); + eval_ = gsl_vector_alloc (3); + evec_ = gsl_matrix_alloc (3, 3); + wsp_ = gsl_eigen_symmv_alloc (3); + bdid_alloc_gsl_ = true; + } + } + + void free_gsl(){ + // free memory for GSL operations if it was allocated + if( bdid_alloc_gsl_ ) + { + gsl_eigen_symmv_free (wsp_); + gsl_vector_free (eval_); + gsl_matrix_free (evec_); + } + } + +public: + + mat3_t() + : bdid_alloc_gsl_(false) + {} + + //! copy constructor + mat3_t( const mat3_t &m) + : data_(m.data_), bdid_alloc_gsl_(false) + {} + + //! move constructor + mat3_t( mat3_t &&m) + : data_(std::move(m.data_)), bdid_alloc_gsl_(false) + {} + + //! construct mat3_t from initializer list + template + mat3_t(E&&...e) + : data_{{std::forward(e)...}}, bdid_alloc_gsl_(false) + {} + + mat3_t& operator=(const mat3_t& m) noexcept{ + data_ = m.data_; + return *this; + } + + mat3_t& operator=(const mat3_t&& m) noexcept{ + data_ = std::move(m.data_); + return *this; + } + + //! destructor + ~mat3_t(){ + this->free_gsl(); + } + + //! bracket index access to vector components + T &operator[](size_t i) noexcept { return data_[i];} + + //! const bracket index access to vector components + const T &operator[](size_t i) const noexcept { return data_[i]; } + + //! matrix 2d index access + T &operator()(size_t i, size_t j) noexcept { return data_[3*i+j]; } + + //! const matrix 2d index access + const T &operator()(size_t i, size_t j) const noexcept { return data_[3*i+j]; } + + //! in-place addition + mat3_t& operator+=( const mat3_t& rhs ) noexcept{ + for (size_t i = 0; i < 9; ++i) { + (*this)[i] += rhs[i]; + } + return *this; + } + + //! in-place subtraction + mat3_t& operator-=( const mat3_t& rhs ) noexcept{ + for (size_t i = 0; i < 9; ++i) { + (*this)[i] -= rhs[i]; + } + return *this; + } + + void zero() noexcept{ + for (size_t i = 0; i < 9; ++i) data_[i]=0; + } + + void eigen( vec3_t& evals, vec3_t& evec1, vec3_t& evec2, vec3_t& evec3_t ) + { + this->init_gsl(); + + gsl_eigen_symmv (&m_.matrix, eval_, evec_, wsp_); + gsl_eigen_symmv_sort (eval_, evec_, GSL_EIGEN_SORT_VAL_ASC); + + for( int i=0; i<3; ++i ){ + evals[i] = gsl_vector_get( eval_, i ); + evec1[i] = gsl_matrix_get( evec_, i, 0 ); + evec2[i] = gsl_matrix_get( evec_, i, 1 ); + evec3_t[i] = gsl_matrix_get( evec_, i, 2 ); + } + } +}; + +template +constexpr const mat3_t operator+(const mat3_t &lhs, const mat3_t &rhs) noexcept +{ + mat3_t result; + for (size_t i = 0; i < 9; ++i) { + result[i] = lhs[i] + rhs[i]; + } + return result; +} + +// matrix - vector multiplication +template +inline vec3_t operator*( const mat3_t &A, const vec3_t &v ) noexcept +{ + vec3_t result; + for( int mu=0; mu<3; ++mu ){ + result[mu] = 0.0; + for( int nu=0; nu<3; ++nu ){ + result[mu] += A(mu,nu)*v[nu]; + } + } + return result; +} + diff --git a/include/math/ode_integrate.hh b/include/math/ode_integrate.hh new file mode 100644 index 0000000..3858b85 --- /dev/null +++ b/include/math/ode_integrate.hh @@ -0,0 +1,103 @@ +#pragma once +/*******************************************************************************\ + odetools.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************************/ + +namespace ode_integrate +{ + +// simple Runge-Kutta 4th order step without error estimate +template +inline void rk4_step(double h, double &t, vector_t &y, function_t f) +{ + vector_t k1(h * f(t, y)); + vector_t k2(h * f(t + h / 2, y + k1 / 2)); + vector_t k3(h * f(t + h / 2, y + k2 / 2)); + vector_t k4(h * f(t + h, y + k3)); + y += (k1 + 2 * k2 + 2 * k3 + k4) / 6; + t += h; +} + +// Cash-Karp modified Runge-Kutta scheme, 5th order with 4th order error estimate +// see Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration" +// in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060 +template +inline vector_t ckrk5_step(double h, double &t, vector_t &y, function_t f) +{ + static constexpr double + a2 = 0.20, + a3 = 0.30, a4 = 0.60, a5 = 1.0, a6 = 0.8750, + b21 = 0.20, + b31 = 3.0 / 40.0, b32 = 9.0 / 40.0, + b41 = 0.30, b42 = -0.90, b43 = 1.20, + b51 = -11.0 / 54.0, b52 = 2.50, b53 = -70.0 / 27.0, b54 = 35.0 / 27.0, + b61 = 1631.0 / 55296.0, b62 = 175.0 / 512.0, b63 = 575.0 / 13824.0, b64 = 44275.0 / 110592.0, b65 = 253.0 / 4096.0, + c1 = 37.0 / 378.0, c3 = 250.0 / 621.0, c4 = 125.0 / 594.0, c6 = 512.0 / 1771.0, + dc1 = c1 - 2825.0 / 27648.0, dc3 = c3 - 18575.0 / 48384.0, + dc4 = c4 - 13525.0 / 55296.0, dc5 = -277.0 / 14336.0, dc6 = c6 - 0.250; + + vector_t k1(h * f(t, y)); + vector_t k2(h * f(t + a2 * h, y + b21 * k1)); + vector_t k3(h * f(t + a3 * h, y + b31 * k1 + b32 * k2)); + vector_t k4(h * f(t + a4 * h, y + b41 * k1 + b42 * k2 + b43 * k3)); + vector_t k5(h * f(t + a5 * h, y + b51 * k1 + b52 * k2 + b53 * k3 + b54 * k4)); + vector_t k6(h * f(t + a6 * h, y + b61 * k1 + b62 * k2 + b63 * k3 + b64 * k4 + b65 * k5)); + + y += c1 * k1 + c3 * k3 + c4 * k4 + c6 * k6; + + return dc1 * k1 + dc3 * k3 + dc4 * k4 + dc5 * k5 + dc6 * k6; +} + +// Adaptive step-size quality-controlled routine for ckrk5_step, see +// Press & Teukolsky (1992): "Adaptive Stepsize Runge-Kutta Integration" +// in Computers in Physics 6, 188 (1992); doi: 10.1063/1.4823060 +template +inline void rk_step_qs(double htry, double &t, vector_t &y, vector_t &yscale, function_t f, double eps, double &hdid, double &hnext) +{ + static constexpr double SAFETY{0.9}; + static constexpr double PSHRNK{-0.25}; + static constexpr double PGROW{-0.2}; + static constexpr double ERRCON{1.89e-4}; + + auto h(htry); + vector_t ytemp(y); + vector_t yerr; + double errmax; + +do_ckrk5trialstep: + yerr = ckrk5_step(h, t, ytemp, f); + errmax = 0.0; + for (size_t i = 0; i < yerr.size(); ++i) + { + errmax = std::max(errmax, std::abs(yerr[i] / yscale[i])); + } + errmax = errmax / eps; + if (errmax > 1.0) + { + h *= std::max(0.1, SAFETY*std::pow(errmax, PSHRNK)); + if (t + h == t) + { + std::cerr << "stepsize underflow in rkqs" << std::endl; + abort(); + } + goto do_ckrk5trialstep; + } + else + { + if( errmax > ERRCON ){ + hnext = h * SAFETY * std::pow(errmax, PGROW); + }else{ + hnext = 5*h; + } + hdid = h; + t += h; + y = ytemp; + } +} + + +} // namespace ode_integrate \ No newline at end of file diff --git a/include/math/vec3.hh b/include/math/vec3.hh new file mode 100644 index 0000000..3d1fe44 --- /dev/null +++ b/include/math/vec3.hh @@ -0,0 +1,118 @@ +/*******************************************************************\ + vec3_t.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************/ +#pragma once + +//! implements a simple class of 3-vectors of arbitrary scalar type +template< typename T > +class vec3_t{ +private: + //! holds the data + std::array data_; + +public: + //! expose access to elements via references + T &x,&y,&z; + + //! empty constructor + vec3_t() + : x(data_[0]),y(data_[1]),z(data_[2]){} + + //! copy constructor + vec3_t( const vec3_t &v) + : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){} + + //! copy constructor for non-const reference, needed to avoid variadic template being called for non-const reference + vec3_t( vec3_t& v) + : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){} + + //! move constructor + vec3_t( vec3_t &&v) + : data_(std::move(v.data_)), x(data_[0]), y(data_[1]), z(data_[2]){} + + //! construct vec3_t from initializer list + template + vec3_t(E&&...e) + : data_{{std::forward(e)...}}, x{data_[0]}, y{data_[1]}, z{data_[2]} + {} + // vec3_t( T a, T b, T c ) + // : data_{{a,b,c}}, x(data_[0]), y(data_[1]), z(data_[2]){} + + //! bracket index access to vector components + T &operator[](size_t i) noexcept{ return data_[i];} + + //! const bracket index access to vector components + const T &operator[](size_t i) const noexcept { return data_[i]; } + + // assignment operator + vec3_t& operator=( const vec3_t& v ) noexcept { data_=v.data_; return *this; } + + //! implementation of summation of vec3_t + vec3_t operator+( const vec3_t& v ) const noexcept{ return vec3_t({x+v.x,y+v.y,z+v.z}); } + + //! implementation of difference of vec3_t + vec3_t operator-( const vec3_t& v ) const noexcept{ return vec3_t({x-v.x,y-v.y,z-v.z}); } + + //! implementation of unary negative + vec3_t operator-() const noexcept{ return vec3_t({-x,-y,-z}); } + + //! implementation of scalar multiplication + vec3_t operator*( T s ) const noexcept{ return vec3_t({x*s,y*s,z*s}); } + + //! implementation of scalar division + vec3_t operator/( T s ) const noexcept{ return vec3_t({x/s,y/s,z/s}); } + + //! implementation of += operator + vec3_t& operator+=( const vec3_t& v ) noexcept{ x+=v.x; y+=v.y; z+=v.z; return *this; } + + //! implementation of -= operator + vec3_t& operator-=( const vec3_t& v ) noexcept{ x-=v.x; y-=v.y; z-=v.z; return *this; } + + //! multiply with scalar + vec3_t& operator*=( T s ) noexcept{ x*=s; y*=s; z*=s; return *this; } + + //! divide by scalar + vec3_t& operator/=( T s ) noexcept{ x/=s; y/=s; z/=s; return *this; } + + //! compute dot product with another vector + T dot(const vec3_t &a) const noexcept + { + return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2]; + } + + //! returns 2-norm squared of vector + T norm_squared(void) const noexcept { return this->dot(*this); } + + //! returns 2-norm of vector + T norm(void) const noexcept { return std::sqrt( this->norm_squared() ); } + + //! wrap absolute vector to box of size p + vec3_t& wrap_abs( T p = 1.0 ) noexcept{ + for( auto& x : data_ ) x = std::fmod( 2*p + x, p ); + return *this; + } + + //! wrap relative vector to box of size p + vec3_t& wrap_rel( T p = 1.0 ) noexcept{ + for( auto& x : data_ ) x = (x<-p/2)? x+p : (x>=p/2)? x-p : x; + return *this; + } + + //! ordering, allows 3d sorting of vec3_ts + bool operator<( const vec3_t& o ) const noexcept{ + if( x!=o.x ) return x +vec3_t operator*( T s, const vec3_t& v ){ + return vec3_t({v.x*s,v.y*s,v.z*s}); +} diff --git a/include/operators.hh b/include/operators.hh index cc0ed67..e2f4c8e 100644 --- a/include/operators.hh +++ b/include/operators.hh @@ -1,9 +1,54 @@ #pragma once +/* + + operators.hh - This file is part of MUSIC2 - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2019 Oliver Hahn + +*/ +#include namespace op{ -inline auto assign_to = [](auto &g){return [&](auto i, auto v){ g[i] = v; };}; -inline auto add_to = [](auto &g){return [&](auto i, auto v){ g[i] += v; };}; -inline auto add_twice_to = [](auto &g){return [&](auto i, auto v){ g[i] += 2*v; };}; -inline auto subtract_from = [](auto &g){return [&](auto i, auto v){ g[i] -= v; };}; -inline auto subtract_twice_from = [](auto &g){return [&](auto i, auto v){ g[i] -= 2*v; };}; + +//!== list of primitive operators to work on fields ==!// + +template< typename field> +inline auto assign_to( field& g ){return [&g](auto i, auto v){ g[i] = v; };} + +template< typename field, typename val > +inline auto multiply_add_to( field& g, val x ){return [&g,x](auto i, auto v){ g[i] += v*x; };} + +template< typename field> +inline auto add_to( field& g ){return [&g](auto i, auto v){ g[i] += v; };} + +template< typename field> +inline auto subtract_from( field& g ){return [&g](auto i, auto v){ g[i] -= v; };} + +//! vanilla standard gradient +class fourier_gradient{ +private: + real_t boxlen_, k0_; + size_t n_, nhalf_; +public: + explicit fourier_gradient( const config_file& the_config ) + : boxlen_( the_config.get_value("setup", "BoxLength") ), + k0_(2.0*M_PI/boxlen_), + n_( the_config.get_value("setup","GridRes") ), + nhalf_( n_/2 ) + {} + + inline ccomplex_t gradient( const int idim, std::array ijk ) const + { + real_t rgrad = + (ijk[idim]!=nhalf_)? (real_t(ijk[idim]) - real_t(ijk[idim] > nhalf_) * n_) : 0.0; + return ccomplex_t(0.0,rgrad * k0_); + } + + inline real_t vfac_corr( std::array ijk ) const + { + return 1.0; + } +}; } diff --git a/include/output_plugin.hh b/include/output_plugin.hh index cc092d3..fff657c 100644 --- a/include/output_plugin.hh +++ b/include/output_plugin.hh @@ -21,11 +21,12 @@ enum class output_type {particles,field_lagrangian,field_eulerian}; + class output_plugin { protected: - //! reference to the ConfigFile object that holds all configuration options - ConfigFile &cf_; + //! reference to the config_file object that holds all configuration options + config_file &cf_; //! output file or directory name std::string fname_; @@ -34,17 +35,17 @@ protected: std::string interface_name_; public: //! constructor - output_plugin(ConfigFile &cf, std::string interface_name ) + output_plugin(config_file &cf, std::string interface_name ) : cf_(cf), interface_name_(interface_name) { - fname_ = cf_.GetValue("output", "filename"); + fname_ = cf_.get_value("output", "filename"); } //! virtual destructor virtual ~output_plugin(){} //! routine to write particle data for a species - virtual void write_particle_data(const particle::container &pc, const cosmo_species &s ) {}; + virtual void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species ) {}; //! routine to write gridded fluid component data for a species virtual void write_grid_data(const Grid_FFT &g, const cosmo_species &s, const fluid_component &c ) {}; @@ -57,6 +58,12 @@ public: //! routine to query whether species is written as particle data // virtual bool write_species_as_particles( const cosmo_species &s ){ return !write_species_as_grid(s); } + + //! query if output wants 64bit precision for real values + virtual bool has_64bit_reals() const = 0; + + //! query if output wants 64bit precision for integer values + virtual bool has_64bit_ids() const = 0; //! routine to return a multiplicative factor that contains the desired position units for the output virtual real_t position_unit() const = 0; @@ -71,7 +78,7 @@ public: struct output_plugin_creator { //! create an instance of a plug-in - virtual std::unique_ptr create(ConfigFile &cf) const = 0; + virtual std::unique_ptr create(config_file &cf) const = 0; //! destroy an instance of a plug-in virtual ~output_plugin_creator() {} @@ -96,12 +103,12 @@ struct output_plugin_creator_concrete : public output_plugin_creator } //! create an instance of the plug-in - std::unique_ptr create(ConfigFile &cf) const + std::unique_ptr create(config_file &cf) const { return std::make_unique(cf); // Derived( cf ); } }; //! failsafe version to select the output plug-in -std::unique_ptr select_output_plugin(ConfigFile &cf); +std::unique_ptr select_output_plugin(config_file &cf); diff --git a/include/particle_container.hh b/include/particle_container.hh index fb05889..92b683c 100644 --- a/include/particle_container.hh +++ b/include/particle_container.hh @@ -1,3 +1,10 @@ +/*******************************************************************\ + particle_container.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 10/2019 - Oliver Hahn - first implementation +\*******************************************************************/ #pragma once #ifdef USE_MPI @@ -13,57 +20,96 @@ namespace particle{ class container { public: - std::vector positions_, velocities_; - std::vector ids_; + std::vector positions32_, velocities32_; + std::vector positions64_, velocities64_; + + std::vector ids32_; + std::vector ids64_; + - container() - { - } + container(){ } container(const container &) = delete; - const void* get_pos_ptr() const{ - return reinterpret_cast( &positions_[0] ); - } - - const void* get_vel_ptr() const{ - return reinterpret_cast( &velocities_[0] ); - } - - const void* get_ids_ptr() const{ - return reinterpret_cast( &ids_[0] ); - } - - void allocate(size_t nump) + void allocate(size_t nump, bool b64reals, bool b64ids) { - positions_.resize(3 * nump); - velocities_.resize(3 * nump); - ids_.resize(nump); + if( b64reals ){ + positions64_.resize(3 * nump); + velocities64_.resize(3 * nump); + positions32_.clear(); + velocities32_.clear(); + }else{ + positions32_.resize(3 * nump); + velocities32_.resize(3 * nump); + positions64_.clear(); + velocities64_.clear(); + } + + if( b64ids ){ + ids64_.resize(nump); + ids32_.clear(); + }else{ + ids32_.resize(nump); + ids64_.clear(); + } } - void set_pos(size_t ipart, size_t idim, real_t p) - { - positions_[3 * ipart + idim] = p; + const void* get_pos32_ptr() const{ + return reinterpret_cast( &positions32_[0] ); } - void set_vel(size_t ipart, size_t idim, real_t p) - { - velocities_[3 * ipart + idim] = p; + void set_pos32(size_t ipart, size_t idim, float p){ + positions32_[3 * ipart + idim] = p; } - void set_id(size_t ipart, id_t id) - { - ids_[ipart] = id; + const void* get_pos64_ptr() const{ + return reinterpret_cast( &positions64_[0] ); + } + + inline void set_pos64(size_t ipart, size_t idim, double p){ + positions64_[3 * ipart + idim] = p; + } + + inline const void* get_vel32_ptr() const{ + return reinterpret_cast( &velocities32_[0] ); + } + + inline void set_vel32(size_t ipart, size_t idim, float p){ + velocities32_[3 * ipart + idim] = p; + } + + const void* get_vel64_ptr() const{ + return reinterpret_cast( &velocities64_[0] ); + } + + inline void set_vel64(size_t ipart, size_t idim, double p){ + velocities64_[3 * ipart + idim] = p; + } + + const void* get_ids32_ptr() const{ + return reinterpret_cast( &ids32_[0] ); + } + + void set_id32(size_t ipart, uint32_t id){ + ids32_[ipart] = id; + } + + const void* get_ids64_ptr() const{ + return reinterpret_cast( &ids64_[0] ); + } + + void set_id64(size_t ipart, uint64_t id){ + ids64_[ipart] = id; } size_t get_local_num_particles(void) const { - return ids_.size(); + return std::max(ids32_.size(),ids64_.size()); } size_t get_global_num_particles(void) const { - size_t local_nump = ids_.size(), global_nump; + size_t local_nump = this->get_local_num_particles(), global_nump; #ifdef USE_MPI MPI_Allreduce(reinterpret_cast(&local_nump), reinterpret_cast(&global_nump), 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); @@ -97,11 +143,11 @@ public: void dump(void) { - for (size_t i = 0; i < ids_.size(); ++i) + /*for (size_t i = 0; i < ids_.size(); ++i) { std::cout << positions_[3 * i + 0] << " " << positions_[3 * i + 1] << " " << positions_[3 * i + 2] << " " << velocities_[3 * i + 0] << " " << velocities_[3 * i + 1] << " " << velocities_[3 * i + 2] << std::endl; - } + }*/ } }; diff --git a/include/particle_generator.hh b/include/particle_generator.hh index da88813..5fe68d1 100644 --- a/include/particle_generator.hh +++ b/include/particle_generator.hh @@ -1,150 +1,325 @@ +/*******************************************************************\ + particle_generator.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 10/2019 - Oliver Hahn - first implementation +\*******************************************************************/ #pragma once -namespace particle { +#include +#include -enum lattice{ - lattice_sc=0, lattice_bcc=1, lattice_fcc=2 -}; +#if defined(USE_HDF5) +#include "HDF_IO.hh" +#endif -template -void initialize_lattice( container& particles, lattice lattice_type, const field_t& field ){ - const size_t num_p_in_load = field.local_size(); - const size_t overload = 1< -void set_positions( container& particles, lattice lattice_type, int idim, real_t lunit, field_t& field ) +namespace particle { - const size_t num_p_in_load = field.local_size(); + using vec3 = std::array; - for( size_t i=0,ipcount=0; i(i,j,k); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } + enum lattice + { + lattice_glass = -1, + lattice_sc = 0, // SC : simple cubic + lattice_bcc = 1, // BCC: body-centered cubic + lattice_fcc = 2, // FCC: face-centered cubic + lattice_rsc = 3, // RSC: refined simple cubic + }; - if( lattice_type == particle::lattice_bcc ){ - field.shift_field( 0.5, 0.5, 0.5 ); - auto ipcount0 = num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,0.5,0.5,0.5); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } - } - else if( lattice_type == particle::lattice_fcc ){ - // 0.5 0.5 0.0 - field.shift_field( 0.5, 0.5, 0.0 ); - auto ipcount0 = num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,0.5,0.5,0.0); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } - // 0.0 0.5 0.5 - field.shift_field( -0.5, 0.0, 0.5 ); - ipcount0 = 2*num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,0.0,0.5,0.5); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } - // 0.5 0.0 0.5 - field.shift_field( 0.5, -0.5, 0.0 ); - ipcount0 = 3*num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i(i,j,k,0.5,0.0,0.5); - particles.set_pos( ipcount++, idim, pos[idim]*lunit + field.relem(i,j,k) ); - } - } - } - } -} + const std::vector>> lattice_shifts = + { + // first shift must always be zero! (otherwise set_positions and set_velocities break) + /* SC : */ {{0.0, 0.0, 0.0}}, + /* BCC: */ {{0.0, 0.0, 0.0}, {0.5, 0.5, 0.5}}, + /* FCC: */ {{0.0, 0.0, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}}, + /* RSC: */ {{0.0, 0.0, 0.0}, {0.0, 0.0, 0.5}, {0.0, 0.5, 0.0}, {0.0, 0.5, 0.5}, {0.5, 0.0, 0.0}, {0.5, 0.0, 0.5}, {0.5, 0.5, 0.0}, {0.5, 0.5, 0.5}}, + }; -template -void set_velocities( container& particles, lattice lattice_type, int idim, field_t& field ) -{ - const size_t num_p_in_load = field.local_size(); + const std::vector> second_lattice_shift = + { + /* SC : */ {0.5, 0.5, 0.5}, // this corresponds to CsCl lattice + /* BCC: */ {0.5, 0.5, 0.0}, // is there a diatomic lattice with BCC base?!? + /* FCC: */ {0.5, 0.5, 0.5}, // this corresponds to NaCl lattice + // /* FCC: */ {0.25, 0.25, 0.25}, // this corresponds to Zincblende/GaAs lattice + /* RSC: */ {0.25, 0.25, 0.25}, + }; - for( size_t i=0,ipcount=0; i + class lattice_generator + { + protected: + + struct glass + { + using data_t = typename field_t::data_t; + size_t num_p, off_p; + grid_interpolate<1, field_t> interp_; + std::vector glass_posr; + + glass( config_file& cf, const field_t &field ) + : num_p(0), off_p(0), interp_( field ) + { + std::vector glass_pos; + real_t lglassbox = 1.0; + + std::string glass_fname = cf.get_value("setup", "GlassFileName"); + size_t ntiles = cf.get_value("setup", "GlassTiles"); + +#if defined(USE_HDF5) + HDFReadGroupAttribute(glass_fname, "Header", "BoxSize", lglassbox); + HDFReadDataset(glass_fname, "/PartType1/Coordinates", glass_pos); +#else + throw std::runtime_error("Class lattice requires HDF5 support. Enable and recompile."); +#endif + + size_t np_in_file = glass_pos.size() / 3; +#if defined(USE_MPI) + num_p = np_in_file * ntiles * ntiles * ntiles / MPI::get_size(); + off_p = MPI::get_rank() * num_p; +#else + num_p = np_in_file * ntiles * ntiles * ntiles; + off_p = 0; +#endif + + music::ilog << "Glass file contains " << np_in_file << " particles." << std::endl; + + glass_posr.assign(num_p, {0.0, 0.0, 0.0}); + + std::array ng({real_t(field.n_[0]), real_t(field.n_[1]), real_t(field.n_[2])}); + + #pragma omp parallel for + for (size_t i = 0; i < num_p; ++i) + { + size_t idxpart = off_p + i; + size_t idx_in_glass = idxpart % np_in_file; + size_t idxtile = idxpart / np_in_file; + size_t tile_z = idxtile % (ntiles * ntiles); + size_t tile_y = ((idxtile - tile_z) / ntiles) % ntiles; + size_t tile_x = (((idxtile - tile_z) / ntiles) - tile_y) / ntiles; + glass_posr[i][0] = std::fmod((glass_pos[3 * idx_in_glass + 0] / lglassbox + real_t(tile_x)) / ntiles * ng[0] + ng[0], ng[0]); + glass_posr[i][1] = std::fmod((glass_pos[3 * idx_in_glass + 1] / lglassbox + real_t(tile_y)) / ntiles * ng[1] + ng[1], ng[1]); + glass_posr[i][2] = std::fmod((glass_pos[3 * idx_in_glass + 2] / lglassbox + real_t(tile_z)) / ntiles * ng[2] + ng[2], ng[2]); + } + +#if defined(USE_MPI) + interp_.domain_decompose_pos(glass_posr); + + num_p = glass_posr.size(); + std::vector all_num_p( MPI::get_size(), 0 ); + MPI_Allgather( &num_p, 1, MPI_UNSIGNED_LONG_LONG, &all_num_p[0], 1, MPI_UNSIGNED_LONG_LONG, MPI_COMM_WORLD ); + off_p = 0; + for( int itask=0; itask<=MPI::get_rank(); ++itask ){ + off_p += all_num_p[itask]; + } +#endif } - } - } - if( lattice_type == particle::lattice_bcc ){ - field.shift_field( 0.5, 0.5, 0.5 ); - auto ipcount0 = num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i glass_ptr_; + + private: + particle::container particles_; + + public: + lattice_generator(lattice lattice_type, const bool b64reals, const bool b64ids, const size_t IDoffset, const field_t &field, config_file &cf) + { + if (lattice_type != lattice_glass) + { + // number of modes present in the field + const size_t num_p_in_load = field.local_size(); + // unless SC lattice is used, particle number is a multiple of the number of modes (=num_p_in_load): + const size_t overload = 1ull << std::max(0, lattice_type); // 1 for sc, 2 for bcc, 4 for fcc, 8 for rsc + // allocate memory for all local particles + particles_.allocate(overload * num_p_in_load, b64reals, b64ids); + // set particle IDs to the Lagrangian coordinate (1D encoded) with additionally the field shift encoded as well + + for (size_t i = 0, ipcount = 0; i < field.size(0); ++i) + { + for (size_t j = 0; j < field.size(1); ++j) + { + for (size_t k = 0; k < field.size(2); ++k, ++ipcount) + { + for (size_t iload = 0; iload < overload; ++iload) + { + if (b64ids) + { + particles_.set_id64(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload); + } + else + { + particles_.set_id32(ipcount + iload * num_p_in_load, IDoffset + overload * field.get_cell_idx_1d(i, j, k) + iload); + } + } + } + } + } + } + else + { + glass_ptr_ = std::make_unique( cf, field ); + particles_.allocate(glass_ptr_->size(), b64reals, b64ids); + + #pragma omp parallel for + for (size_t i = 0; i < glass_ptr_->size(); ++i) + { + if (b64ids) + { + particles_.set_id64(i, IDoffset + i + glass_ptr_->offset()); + } + else + { + particles_.set_id32(i, IDoffset + i + glass_ptr_->offset()); + } } } } - } - else if( lattice_type == particle::lattice_fcc ){ - // 0.5 0.5 0.0 - field.shift_field( 0.5, 0.5, 0.0 ); - auto ipcount0 = num_p_in_load; - for( size_t i=0,ipcount=ipcount0; i= 0) + { + const size_t num_p_in_load = field.local_size(); + for (int ishift = 0; ishift < (1 << lattice_type); ++ishift) + { + // if we are dealing with the secondary lattice, apply a global shift + if (ishift == 0 && is_second_lattice) + { + field.shift_field(second_lattice_shift[lattice_type]); + } -} // end namespace particles + // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift + if (ishift > 0) + { + field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]); + } + // read out values from phase shifted field and set assoc. particle's value + const auto ipcount0 = ishift * num_p_in_load; + for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i) + { + for (size_t j = 0; j < field.size(1); ++j) + { + for (size_t k = 0; k < field.size(2); ++k) + { + auto pos = field.template get_unit_r_shifted(i, j, k, lattice_shifts[lattice_type][ishift] + (is_second_lattice ? second_lattice_shift[lattice_type] : vec3_t{0., 0., 0.})); + if (b64reals) + { + particles_.set_pos64(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k)); + } + else + { + particles_.set_pos32(ipcount++, idim, pos[idim] * lunit + field.relem(i, j, k)); + } + } + } + } + } + } + else + { + glass_ptr_->update_ghosts( field ); + #pragma omp parallel for + for (size_t i = 0; i < glass_ptr_->size(); ++i) + { + auto pos = glass_ptr_->glass_posr[i]; + real_t disp = glass_ptr_->get_at(pos); + if (b64reals) + { + particles_.set_pos64(i, idim, pos[idim] / field.n_[idim] * lunit + disp); + } + else + { + particles_.set_pos32(i, idim, pos[idim] / field.n_[idim] * lunit + disp); + } + } + } + } + + void set_velocities(lattice lattice_type, bool is_second_lattice, int idim, const bool b64reals, field_t &field, config_file &cf) + { + // works only for Bravais types + if (lattice_type >= 0) + { + const size_t num_p_in_load = field.local_size(); + for (int ishift = 0; ishift < (1 << lattice_type); ++ishift) + { + // if we are dealing with the secondary lattice, apply a global shift + if (ishift == 0 && is_second_lattice) + { + field.shift_field(second_lattice_shift[lattice_type]); + } + // can omit first shift since zero by convention, unless shifted already above, otherwise apply relative phase shift + if (ishift > 0) + { + field.shift_field(lattice_shifts[lattice_type][ishift] - lattice_shifts[lattice_type][ishift - 1]); + } + // read out values from phase shifted field and set assoc. particle's value + const auto ipcount0 = ishift * num_p_in_load; + for (size_t i = 0, ipcount = ipcount0; i < field.size(0); ++i) + { + for (size_t j = 0; j < field.size(1); ++j) + { + for (size_t k = 0; k < field.size(2); ++k) + { + if (b64reals) + { + particles_.set_vel64(ipcount++, idim, field.relem(i, j, k)); + } + else + { + particles_.set_vel32(ipcount++, idim, field.relem(i, j, k)); + } + } + } + } + } + } + else + { + glass_ptr_->update_ghosts( field ); + #pragma omp parallel for + for (size_t i = 0; i < glass_ptr_->size(); ++i) + { + auto pos = glass_ptr_->glass_posr[i]; + real_t vel = glass_ptr_->get_at(pos); + if (b64reals) + { + particles_.set_vel64(i, idim, vel); + } + else + { + particles_.set_vel32(i, idim, vel); + } + } + } + } + + const particle::container& get_particles() const noexcept{ + return particles_; + } + + }; // struct lattice + +} // namespace particle diff --git a/include/particle_plt.hh b/include/particle_plt.hh new file mode 100644 index 0000000..a452559 --- /dev/null +++ b/include/particle_plt.hh @@ -0,0 +1,568 @@ +#pragma once + +#include +#include // for unlink + +#include +#include + +#include +#include + +#include + +#include +#include +#include + +#include +inline double Hypergeometric2F1( double a, double b, double c, double x ) +{ + return gsl_sf_hyperg_2F1( a, b, c, x); +} + +#define PRODUCTION + +namespace particle{ +//! implement Joyce, Marcos et al. PLT calculation + +class lattice_gradient{ +private: + const real_t boxlen_, aini_; + const size_t ngmapto_, ngrid_, ngrid32_; + const real_t mapratio_, XmL_; + Grid_FFT D_xx_, D_xy_, D_xz_, D_yy_, D_yz_, D_zz_; + Grid_FFT grad_x_, grad_y_, grad_z_; + std::vector> vectk_; + std::vector> ico_, vecitk_; + + bool is_even( int i ){ return (i%2)==0; } + + bool is_in( int i, int j, int k, const mat3_t& M ){ + vec3_t v({i,j,k}); + auto vv = M * v; + return is_even(vv.x)&&is_even(vv.y)&&is_even(vv.z); + } + + void init_D( lattice lattice_type ) + { + constexpr real_t pi = M_PI; + constexpr real_t twopi = 2.0*M_PI; + constexpr real_t fourpi = 4.0*M_PI; + const real_t sqrtpi = std::sqrt(M_PI); + const real_t pi32 = std::pow(M_PI,1.5); + + //! === vectors, reciprocals and normals for the SC lattice === + const int charge_fac_sc = 1; + const mat3_t mat_bravais_sc{ + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, + }; + const mat3_t mat_reciprocal_sc{ + twopi, 0.0, 0.0, + 0.0, twopi, 0.0, + 0.0, 0.0, twopi, + }; + const mat3_t mat_invrecip_sc{ + 2, 0, 0, + 0, 2, 0, + 0, 0, 2, + }; + const std::vector> normals_sc{ + {pi,0.,0.},{-pi,0.,0.}, + {0.,pi,0.},{0.,-pi,0.}, + {0.,0.,pi},{0.,0.,-pi}, + }; + + + //! === vectors, reciprocals and normals for the BCC lattice === + const int charge_fac_bcc = 2; + const mat3_t mat_bravais_bcc{ + 1.0, 0.0, 0.5, + 0.0, 1.0, 0.5, + 0.0, 0.0, 0.5, + }; + const mat3_t mat_reciprocal_bcc{ + twopi, 0.0, 0.0, + 0.0, twopi, 0.0, + -twopi, -twopi, fourpi, + }; + const mat3_t mat_invrecip_bcc{ + 2, 0, 0, + 0, 2, 0, + 1, 1, 1, + }; + const std::vector> normals_bcc{ + {0.,pi,pi},{0.,-pi,pi},{0.,pi,-pi},{0.,-pi,-pi}, + {pi,0.,pi},{-pi,0.,pi},{pi,0.,-pi},{-pi,0.,-pi}, + {pi,pi,0.},{-pi,pi,0.},{pi,-pi,0.},{-pi,-pi,0.} + }; + + + //! === vectors, reciprocals and normals for the FCC lattice === + const int charge_fac_fcc = 4; + const mat3_t mat_bravais_fcc{ + 0.0, 0.5, 0.0, + 0.5, 0.0, 1.0, + 0.5, 0.5, 0.0, + }; + const mat3_t mat_reciprocal_fcc{ + -fourpi, fourpi, twopi, + 0.0, 0.0, twopi, + fourpi, 0.0, -twopi, + }; + const mat3_t mat_invrecip_fcc{ + 0, 1, 1, + 1, 0, 1, + 0, 2, 0, + }; + const std::vector> normals_fcc{ + {twopi,0.,0.},{-twopi,0.,0.}, + {0.,twopi,0.},{0.,-twopi,0.}, + {0.,0.,twopi},{0.,0.,-twopi}, + {+pi,+pi,+pi},{+pi,+pi,-pi}, + {+pi,-pi,+pi},{+pi,-pi,-pi}, + {-pi,+pi,+pi},{-pi,+pi,-pi}, + {-pi,-pi,+pi},{-pi,-pi,-pi}, + }; + + //! select the properties for the chosen lattice + const int ilat = lattice_type; // 0 = sc, 1 = bcc, 2 = fcc + + const auto mat_bravais = (ilat==2)? mat_bravais_fcc : (ilat==1)? mat_bravais_bcc : mat_bravais_sc; + const auto mat_reciprocal = (ilat==2)? mat_reciprocal_fcc : (ilat==1)? mat_reciprocal_bcc : mat_reciprocal_sc; + const auto mat_invrecip = (ilat==2)? mat_invrecip_fcc : (ilat==1)? mat_invrecip_bcc : mat_invrecip_sc; + const auto normals = (ilat==2)? normals_fcc : (ilat==1)? normals_bcc : normals_sc; + const auto charge_fac = (ilat==2)? charge_fac_fcc : (ilat==1)? charge_fac_bcc : charge_fac_sc; + + const ptrdiff_t nlattice = ngrid_; + const real_t dx = 1.0/real_t(nlattice); + + const real_t eta = 4.0; // Ewald cutoff shall be 4 cells + const real_t alpha = 1.0/std::sqrt(2)/eta; + const real_t alpha2 = alpha*alpha; + const real_t alpha3 = alpha2*alpha; + + const real_t charge = 1.0/std::pow(real_t(nlattice),3)/charge_fac; + const real_t fft_norm = 1.0/std::pow(real_t(nlattice),3.0); + const real_t fft_norm12 = 1.0/std::pow(real_t(nlattice),1.5); + + //! just a Kronecker \delta_ij + auto kronecker = []( int i, int j ) -> real_t { return (i==j)? 1.0 : 0.0; }; + + //! Ewald summation: short-range Green's function + auto add_greensftide_sr = [&]( mat3_t& D, const vec3_t& d ) -> void { + auto r = d.norm(); + if( r< 1e-14 ) return; // return zero for r=0 + + const real_t r2(r*r), r3(r2*r), r5(r3*r2); + const real_t K1( -alpha3/pi32 * std::exp(-alpha2*r2)/r2 ); + const real_t K2( (std::erfc(alpha*r) + 2.0*alpha/sqrtpi*std::exp(-alpha2*r2)*r)/fourpi ); + + for( int mu=0; mu<3; ++mu ){ + for( int nu=mu; nu<3; ++nu ){ + real_t dd( d[mu]*d[nu] * K1 + (kronecker(mu,nu)/r3 - 3.0 * (d[mu]*d[nu])/r5) * K2 ); + D(mu,nu) += dd; + D(nu,mu) += (mu!=nu)? dd : 0.0; + } + } + }; + + //! Ewald summation: long-range Green's function + auto add_greensftide_lr = [&]( mat3_t& D, const vec3_t& k, const vec3_t& r ) -> void { + real_t kmod2 = k.norm_squared(); + real_t term = std::exp(-kmod2/(4*alpha2))*std::cos(k.dot(r)) / kmod2 * fft_norm; + for( int mu=0; mu<3; ++mu ){ + for( int nu=mu; nu<3; ++nu ){ + auto dd = k[mu] * k[nu] * term; + D(mu,nu) += dd; + D(nu,mu) += (mu!=nu)? dd : 0.0; + } + } + }; + + //! checks if 'vec' is in the FBZ with FBZ normal vectors given in 'normals' + auto check_FBZ = []( const auto& normals, const auto& vec ) -> bool { + for( const auto& n : normals ){ + if( n.dot( vec ) > 1.0001 * n.dot(n) ){ + return false; + } + } + return true; + }; + + constexpr ptrdiff_t lnumber = 3, knumber = 3; + const int numb = 1; //!< search radius when shifting vectors into FBZ + + vectk_.assign(D_xx_.memsize(),vec3_t()); + ico_.assign(D_xx_.memsize(),vec3_t()); + vecitk_.assign(D_xx_.memsize(),vec3_t()); + + #pragma omp parallel + { + //... temporary to hold values of the dynamical matrix + mat3_t matD(0.0); + + #pragma omp for + for( ptrdiff_t i=0; i x_ijk({dx*real_t(i),dx*real_t(j),dx*real_t(k)}); + const vec3_t ar = (mat_bravais * x_ijk).wrap_abs(); + + //... zero temporary matrix + matD.zero(); + + // add real-space part of dynamical matrix, periodic copies + for( ptrdiff_t ix=-lnumber; ix<=lnumber; ix++ ){ + for( ptrdiff_t iy=-lnumber; iy<=lnumber; iy++ ){ + for( ptrdiff_t iz=-lnumber; iz<=lnumber; iz++ ){ + const vec3_t n_ijk({real_t(ix),real_t(iy),real_t(iz)}); + const vec3_t dr(ar - mat_bravais * n_ijk); + add_greensftide_sr(matD, dr); + } + } + } + + // add k-space part of dynamical matrix + for( ptrdiff_t ix=-knumber; ix<=knumber; ix++ ){ + for( ptrdiff_t iy=-knumber; iy<=knumber; iy++ ){ + for( ptrdiff_t iz=-knumber; iz<=knumber; iz++ ){ + if(std::abs(ix)+std::abs(iy)+std::abs(iz) != 0){ + const vec3_t k_ijk({real_t(ix)/nlattice,real_t(iy)/nlattice,real_t(iz)/nlattice}); + const vec3_t ak( mat_reciprocal * k_ijk); + + add_greensftide_lr(matD, ak, ar ); + } + } + } + } + + D_xx_.relem(i,j,k) = matD(0,0) * charge; + D_xy_.relem(i,j,k) = matD(0,1) * charge; + D_xz_.relem(i,j,k) = matD(0,2) * charge; + D_yy_.relem(i,j,k) = matD(1,1) * charge; + D_yz_.relem(i,j,k) = matD(1,2) * charge; + D_zz_.relem(i,j,k) = matD(2,2) * charge; + } + } + } + } // end omp parallel region + + // fix r=0 with background density (added later in Fourier space) + D_xx_.relem(0,0,0) = 1.0/3.0; + D_xy_.relem(0,0,0) = 0.0; + D_xz_.relem(0,0,0) = 0.0; + D_yy_.relem(0,0,0) = 1.0/3.0; + D_yz_.relem(0,0,0) = 0.0; + D_zz_.relem(0,0,0) = 1.0/3.0; + + D_xx_.FourierTransformForward(); + D_xy_.FourierTransformForward(); + D_xz_.FourierTransformForward(); + D_yy_.FourierTransformForward(); + D_yz_.FourierTransformForward(); + D_zz_.FourierTransformForward(); + +#ifndef PRODUCTION + if (CONFIG::MPI_task_rank == 0) + unlink("debug.hdf5"); + D_xx_.Write_to_HDF5("debug.hdf5","Dxx"); + D_xy_.Write_to_HDF5("debug.hdf5","Dxy"); + D_xz_.Write_to_HDF5("debug.hdf5","Dxz"); + D_yy_.Write_to_HDF5("debug.hdf5","Dyy"); + D_yz_.Write_to_HDF5("debug.hdf5","Dyz"); + D_zz_.Write_to_HDF5("debug.hdf5","Dzz"); + + std::ofstream ofs2("test_brillouin.txt"); +#endif + using map_t = std::map,size_t>; + map_t iimap; + + //!=== Make temporary copies before resorting to std. Fourier grid ========!// + Grid_FFT + temp1({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + temp2({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + temp3({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}); + + temp1.FourierTransformForward(false); + temp2.FourierTransformForward(false); + temp3.FourierTransformForward(false); + + #pragma omp parallel for + for( size_t i=0; i D; + vec3_t eval, evec1, evec2, evec3_t; + + #pragma omp for + for( size_t i=0; i kv = D_xx_.get_k(i,j,k); + + // put matrix elements into actual matrix + D(0,0) = std::real(temp1.kelem(i,j,k)) / fft_norm12; + D(0,1) = D(1,0) = std::imag(temp1.kelem(i,j,k)) / fft_norm12; + D(0,2) = D(2,0) = std::real(temp2.kelem(i,j,k)) / fft_norm12; + D(1,1) = std::imag(temp2.kelem(i,j,k)) / fft_norm12; + D(1,2) = D(2,1) = std::real(temp3.kelem(i,j,k)) / fft_norm12; + D(2,2) = std::imag(temp3.kelem(i,j,k)) / fft_norm12; + + // compute eigenstructure of matrix + D.eigen(eval, evec1, evec2, evec3_t); + evec3_t /= (twopi*ngrid_); + + // now determine to which modes on the regular lattice this contributes + vec3_t ar = kv / (twopi*ngrid_); + vec3_t a(mat_reciprocal * ar); + + // translate the k-vectors into the "candidate" FBZ + for( int l1=-numb; l1<=numb; ++l1 ){ + for( int l2=-numb; l2<=numb; ++l2 ){ + for( int l3=-numb; l3<=numb; ++l3 ){ + // need both halfs of Fourier space since we use real transforms + for( int isign=0; isign<=1; ++isign ){ + const real_t sign = 2.0*real_t(isign)-1.0; + const vec3_t vshift({real_t(l1),real_t(l2),real_t(l3)}); + + vec3_t vectk = sign * a + mat_reciprocal * vshift; + + if( check_FBZ( normals, vectk ) ) + { + int ix = std::round(vectk.x*(ngrid_)/twopi); + int iy = std::round(vectk.y*(ngrid_)/twopi); + int iz = std::round(vectk.z*(ngrid_)/twopi); + + #pragma omp critical + {iimap.insert( std::pair,size_t>({ix,iy,iz}, D_xx_.get_idx(i,j,k)) );} + + temp1.kelem(i,j,k) = ccomplex_t(eval[2],eval[1]); + temp2.kelem(i,j,k) = ccomplex_t(eval[0],evec3_t.x); + temp3.kelem(i,j,k) = ccomplex_t(evec3_t.y,evec3_t.z); + } + }//sign + } //l3 + } //l2 + } //l1 + } //k + } //j + } //i + } + + D_xx_.kelem(0,0,0) = 1.0; + D_xy_.kelem(0,0,0) = 0.0; + D_xz_.kelem(0,0,0) = 0.0; + + D_yy_.kelem(0,0,0) = 1.0; + D_yz_.kelem(0,0,0) = 0.0; + D_zz_.kelem(0,0,0) = 0.0; + + //... approximate infinite lattice by inerpolating to sites not convered by current resolution... + #pragma omp parallel for + for( size_t i=0; inlattice/2)? int(i)-nlattice : int(i); + int jj = (int(j)>nlattice/2)? int(j)-nlattice : int(j); + int kk = (int(k)>nlattice/2)? int(k)-nlattice : int(k); + vec3_t kv({real_t(ii),real_t(jj),real_t(kk)}); + + auto align_with_k = [&]( const vec3_t& v ) -> vec3_t{ + return v*((v.dot(kv)<0.0)?-1.0:1.0); + }; + + vec3_t v, l; + map_t::iterator it; + + if( !is_in(i,j,k,mat_invrecip) ){ + auto average_lv = [&]( const auto& t1, const auto& t2, const auto& t3, vec3_t& v, vec3_t& l ) { + v = 0.0; l = 0.0; + int count(0); + + auto add_lv = [&]( auto it ) -> void { + auto q = it->second;++count; + l += vec3_t({std::real(t1.kelem(q)),std::imag(t1.kelem(q)),std::real(t2.kelem(q))}); + v += align_with_k(vec3_t({std::imag(t2.kelem(q)),std::real(t3.kelem(q)),std::imag(t3.kelem(q))})); + }; + map_t::iterator it; + if( (it = iimap.find({ii-1,jj,kk}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii+1,jj,kk}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii,jj-1,kk}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii,jj+1,kk}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii,jj,kk-1}))!=iimap.end() ){ add_lv(it); } + if( (it = iimap.find({ii,jj,kk+1}))!=iimap.end() ){ add_lv(it); } + l/=real_t(count); v/=real_t(count); + }; + + average_lv(temp1,temp2,temp3,v,l); + + }else{ + if( (it = iimap.find({ii,jj,kk}))!=iimap.end() ){ + auto q = it->second; + l = vec3_t({std::real(temp1.kelem(q)),std::imag(temp1.kelem(q)),std::real(temp2.kelem(q))}); + v = align_with_k(vec3_t({std::imag(temp2.kelem(q)),std::real(temp3.kelem(q)),std::imag(temp3.kelem(q))})); + } + } + D_xx_.kelem(i,j,k) = l[0]; + D_xy_.kelem(i,j,k) = l[1]; + D_xz_.kelem(i,j,k) = l[2]; + D_yy_.kelem(i,j,k) = v[0]; + D_yz_.kelem(i,j,k) = v[1]; + D_zz_.kelem(i,j,k) = v[2]; + } + } + } + +#ifdef PRODUCTION + #pragma omp parallel for + for( size_t i=0; i kv = D_xx_.get_k(i,j,k); + + double mu1 = std::real(D_xx_.kelem(i,j,k)); + // double mu2 = std::real(D_xy_.kelem(i,j,k)); + // double mu3 = std::real(D_xz_.kelem(i,j,k)); + + vec3_t evec1({std::real(D_yy_.kelem(i,j,k)),std::real(D_yz_.kelem(i,j,k)),std::real(D_zz_.kelem(i,j,k))}); + evec1 /= evec1.norm(); + + // /////////////////////////////////// + // // project onto spherical coordinate vectors + + real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ): 0.0; + real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); + vec3_t e_r( st*cp, st*sp, ct), e_theta( ct*cp, ct*sp, -st), e_phi( -sp, cp, 0.0 ); + + // re-normalise to that longitudinal amplitude is exact + double renorm = evec1.dot( e_r ); if( renorm < 0.01 ) renorm = 1.0; + + // -- store in diagonal components of D_ij + D_xx_.kelem(i,j,k) = 1.0; + D_yy_.kelem(i,j,k) = evec1.dot( e_theta ) / renorm; + D_zz_.kelem(i,j,k) = evec1.dot( e_phi ) / renorm; + + // spatially dependent correction to vfact = \dot{D_+}/D_+ + D_xy_.kelem(i,j,k) = 1.0/(0.25*(std::sqrt(1.+24*mu1)-1.)); + } + } + } + D_xy_.kelem(0,0,0) = 1.0; + D_xx_.kelem(0,0,0) = 1.0; + D_yy_.kelem(0,0,0) = 0.0; + D_zz_.kelem(0,0,0) = 0.0; + + // unlink("debug.hdf5"); + // D_xy_.Write_to_HDF5("debug.hdf5","mu1"); + // D_xx_.Write_to_HDF5("debug.hdf5","e1x"); + // D_yy_.Write_to_HDF5("debug.hdf5","e1y"); + // D_zz_.Write_to_HDF5("debug.hdf5","e1z"); + +#else + D_xx_.Write_to_HDF5("debug.hdf5","mu1"); + D_xy_.Write_to_HDF5("debug.hdf5","mu2"); + D_xz_.Write_to_HDF5("debug.hdf5","mu3"); + D_yy_.Write_to_HDF5("debug.hdf5","e1x"); + D_yz_.Write_to_HDF5("debug.hdf5","e1y"); + D_zz_.Write_to_HDF5("debug.hdf5","e1z"); +#endif + } + + +public: + // real_t boxlen, size_t ngridother + explicit lattice_gradient( config_file& the_config, size_t ngridself=64 ) + : boxlen_( the_config.get_value("setup", "BoxLength") ), + aini_ ( 1.0/(1.0+the_config.get_value("setup", "zstart")) ), + ngmapto_( the_config.get_value("setup", "GridRes") ), + ngrid_( ngridself ), ngrid32_( std::pow(ngrid_, 1.5) ), mapratio_(real_t(ngrid_)/real_t(ngmapto_)), + XmL_ ( the_config.get_value("cosmology", "Omega_L") / the_config.get_value("cosmology", "Omega_m") ), + D_xx_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_xy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + D_xz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_yy_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + D_yz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), D_zz_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + grad_x_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), grad_y_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}), + grad_z_({ngrid_, ngrid_, ngrid_}, {1.0,1.0,1.0}) + { + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + std::string lattice_str = the_config.get_value_safe("setup","ParticleLoad","sc"); + const lattice lattice_type = + ((lattice_str=="bcc")? lattice_bcc + : ((lattice_str=="fcc")? lattice_fcc + : ((lattice_str=="rsc")? lattice_rsc + : lattice_sc))); + + music::ilog << "PLT corrections for " << lattice_str << " lattice will be computed on " << ngrid_ << "**3 mesh" << std::endl; + + double wtime = get_wtime(); + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing PLT eigenmodes "<< std::flush; + + init_D( lattice_type ); + // init_D__old(); + + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + } + + inline ccomplex_t gradient( const int idim, std::array ijk ) const + { + real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; + + auto kv = D_xx_.get_k( ix, iy, iz ); + auto kmod = kv.norm() / mapratio_ / boxlen_; + + // // project onto spherical coordinate vectors + auto D_r = std::real(D_xx_.get_cic_kspace({ix,iy,iz})); + auto D_theta = std::real(D_yy_.get_cic_kspace({ix,iy,iz})); + auto D_phi = std::real(D_zz_.get_cic_kspace({ix,iy,iz})); + + real_t kr = kv.norm(), kphi = kr>0.0? std::atan2(kv.y,kv.x) : 0.0, ktheta = kr>0.0? std::acos( kv.z / kr ) : 0.0; + real_t st = std::sin(ktheta), ct = std::cos(ktheta), sp = std::sin(kphi), cp = std::cos(kphi); + + if( idim == 0 ){ + return ccomplex_t(0.0, kmod*(D_r * st * cp + D_theta * ct * cp - D_phi * sp)); + } + else if( idim == 1 ){ + return ccomplex_t(0.0, kmod*(D_r * st * sp + D_theta * ct * sp + D_phi * cp)); + } + return ccomplex_t(0.0, kmod*(D_r * ct - D_theta * st)); + } + + inline real_t vfac_corr( std::array ijk ) const + { + real_t ix = ijk[0]*mapratio_, iy = ijk[1]*mapratio_, iz = ijk[2]*mapratio_; + const real_t alpha = 1.0/std::real(D_xy_.get_cic_kspace({ix,iy,iz})); + return 1.0/alpha; + // // below is for LCDM, but it is a tiny correction for typical starting redshifts: + //! X = \Omega_\Lambda / \Omega_m + // return 1.0 / (alpha - (2*std::pow(aini_,3)*alpha*(2 + alpha)*XmL_*Hypergeometric2F1((3 + alpha)/3.,(5 + alpha)/3., + // (13 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_)))/ + // ((7 + 4*alpha)*Hypergeometric2F1(alpha/3.,(2 + alpha)/3.,(7 + 4*alpha)/6.,-(std::pow(aini_,3)*XmL_)))); + } + +}; + +} \ No newline at end of file diff --git a/include/physical_constants.hh b/include/physical_constants.hh new file mode 100644 index 0000000..594eb0d --- /dev/null +++ b/include/physical_constants.hh @@ -0,0 +1,62 @@ +#pragma once +/*******************************************************************************\ + physical_constants.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************************/ + +// physical constants for convenience, all values have been taken from +// the 2018 edition of the Particle Data Group Booklet, +// http://pdg.lbl.gov/2019/mobile/reviews/pdf/rpp2018-rev-phys-constants-m.pdf + +namespace phys_const +{ +// helper value of pi so that we don't need to include any other header just for this +static constexpr double pi_ = 3.141592653589793115997963468544185161590576171875; + +//--- unit conversions --------------------------------------------------- + +// 1 Mpc in m +static constexpr double Mpc_SI = 3.0857e22; + +// 1 Gyr in s +static constexpr double Gyr_SI = 3.1536e16; + +// 1 eV in J +static constexpr double eV_SI = 1.602176487e-19; + +// 1 erg in J +static constexpr double erg_SI = 1e-7; + +//--- physical constants ------------------------------------------------ + +// speed of light c in m/s +static constexpr double c_SI = 2.99792458e8; + +// gravitational constant G in m^3/s^2/kg +static constexpr double G_SI = 6.6740800e-11; + +// Boltzmann constant k_B in kg m^2/s^2/K +static constexpr double kB_SI = 1.38064852e-23; + +// reduced Planck's quantum \hbar in kg m^2/s +static constexpr double hbar_SI = 1.054571800e-34; + +// Stefan-Boltzmann constant sigma in J/m^2/s/K^-4 +static constexpr double sigma_SI = (pi_ * pi_) * (kB_SI * kB_SI * kB_SI * kB_SI) / 60. / (hbar_SI * hbar_SI * hbar_SI) / (c_SI * c_SI); + +// electron mass in kg +static constexpr double me_SI = 9.10938356e-31; + +// proton mass in kg +static constexpr double mp_SI = 1.672621898e-27; + +// unified atomic mass unit (u) in kg +static constexpr double u_SI = 1.660539040e-27; + +// critical density of the Universe in h^2 kg/m^3 +static constexpr double rhocrit_h2_SI = 3 * 1e10 / (8 * pi_ * G_SI) / Mpc_SI / Mpc_SI; + +} // namespace phys_const \ No newline at end of file diff --git a/include/random_plugin.hh b/include/random_plugin.hh index 3e7b77c..a91ab7e 100644 --- a/include/random_plugin.hh +++ b/include/random_plugin.hh @@ -10,21 +10,21 @@ class RNG_plugin { protected: - ConfigFile *pcf_; //!< pointer to config_file from which to read parameters + config_file *pcf_; //!< pointer to config_file from which to read parameters public: - explicit RNG_plugin(ConfigFile &cf) + explicit RNG_plugin(config_file &cf) : pcf_(&cf) { } virtual ~RNG_plugin() {} virtual bool isMultiscale() const = 0; - virtual void Fill_Grid( Grid_FFT& g ) const = 0; + virtual void Fill_Grid( Grid_FFT& g ) = 0;//const = 0; //virtual void FillGrid(int level, DensityGrid &R) = 0; }; struct RNG_plugin_creator { - virtual std::unique_ptr Create(ConfigFile &cf) const = 0; + virtual std::unique_ptr Create(config_file &cf) const = 0; virtual ~RNG_plugin_creator() {} }; @@ -42,14 +42,14 @@ struct RNG_plugin_creator_concrete : public RNG_plugin_creator } //! create an instance of the plugin - std::unique_ptr Create(ConfigFile &cf) const + std::unique_ptr Create(config_file &cf) const { return std::make_unique(cf); } }; typedef RNG_plugin RNG_instance; -std::unique_ptr select_RNG_plugin( ConfigFile &cf); +std::unique_ptr select_RNG_plugin( config_file &cf); // /*! // * @brief encapsulates all things for multi-scale white noise generation @@ -58,18 +58,18 @@ std::unique_ptr select_RNG_plugin( ConfigFile &cf); // class random_number_generator // { // protected: -// ConfigFile *pcf_; +// config_file *pcf_; // //const refinement_hierarchy * prefh_; // RNG_plugin *generator_; // int levelmin_, levelmax_; // public: // //! constructor -// random_number_generator( ConfigFile &cf ) +// random_number_generator( config_file &cf ) // : pcf_(&cf) //, prefh_( &refh ) // { -// levelmin_ = pcf_->GetValue("setup", "levelmin"); -// levelmax_ = pcf_->GetValue("setup", "levelmax"); +// levelmin_ = pcf_->get_value("setup", "levelmin"); +// levelmax_ = pcf_->get_value("setup", "levelmax"); // generator_ = select_RNG_plugin(cf); // } diff --git a/include/system_stat.hh b/include/system_stat.hh index f911a42..fb7f6f3 100644 --- a/include/system_stat.hh +++ b/include/system_stat.hh @@ -1,3 +1,10 @@ +/*******************************************************************\ + system_stat.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 08/2019 - Oliver Hahn - first implementation +\*******************************************************************/ #pragma once #include diff --git a/include/testing.hh b/include/testing.hh index 53bc571..aaaae39 100644 --- a/include/testing.hh +++ b/include/testing.hh @@ -1,13 +1,21 @@ +/*******************************************************************\ + testing.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 10/2019 - Michael Michaux & Oliver Hahn - first implementation +\*******************************************************************/ #pragma once #include #include #include #include +#include namespace testing{ void output_potentials_and_densities( - ConfigFile& the_config, + config_file& the_config, size_t ngrid, real_t boxlen, Grid_FFT& phi, Grid_FFT& phi2, @@ -16,7 +24,7 @@ namespace testing{ std::array< Grid_FFT*,3 >& A3 ); void output_velocity_displacement_symmetries( - ConfigFile &the_config, + config_file &the_config, size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, @@ -26,7 +34,8 @@ namespace testing{ bool bwrite_out_fields=false); void output_convergence( - ConfigFile &the_config, + config_file &the_config, + cosmology::calculator* the_cosmo_calc, std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, diff --git a/include/transfer_function_plugin.hh b/include/transfer_function_plugin.hh index cd7c762..942a7ea 100644 --- a/include/transfer_function_plugin.hh +++ b/include/transfer_function_plugin.hh @@ -13,22 +13,29 @@ enum tf_type vtotal, vcdm, vbaryon, - total0 + total0, + cdm0, + baryon0, + vtotal0, + vcdm0, + vbaryon0, }; class TransferFunction_plugin { public: // Cosmology cosmo_; //!< cosmological parameter, read from config_file - ConfigFile *pcf_; //!< pointer to config_file from which to read parameters + config_file *pcf_; //!< pointer to config_file from which to read parameters bool tf_distinct_; //!< bool if density transfer function is distinct for baryons and DM bool tf_withvel_; //!< bool if also have velocity transfer functions bool tf_withtotal0_; //!< have the z=0 spectrum for normalisation purposes bool tf_velunits_; //!< velocities are in velocity units (km/s) + bool tf_isnormalised_; //!< assume that transfer functions come already correctly normalised and need be re-normalised to a specified value + public: //! constructor - TransferFunction_plugin(ConfigFile &cf) - : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false) + TransferFunction_plugin(config_file &cf) + : pcf_(&cf), tf_distinct_(false), tf_withvel_(false), tf_withtotal0_(false), tf_velunits_(false), tf_isnormalised_(false) { } //! destructor @@ -75,7 +82,7 @@ class TransferFunction_plugin struct TransferFunction_plugin_creator { //! create an instance of a transfer function plug-in - virtual std::unique_ptr create(ConfigFile &cf) const = 0; + virtual std::unique_ptr create(config_file &cf) const = 0; //! destroy an instance of a plug-in virtual ~TransferFunction_plugin_creator() {} @@ -96,7 +103,7 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin } //! create an instance of the plug-in - std::unique_ptr create(ConfigFile &cf) const + std::unique_ptr create(config_file &cf) const { return std::make_unique(cf); } @@ -104,4 +111,4 @@ struct TransferFunction_plugin_creator_concrete : public TransferFunction_plugin // typedef TransferFunction_plugin TransferFunction; -std::unique_ptr select_TransferFunction_plugin(ConfigFile &cf); +std::unique_ptr select_TransferFunction_plugin(config_file &cf); diff --git a/include/vec.hh b/include/vec.hh new file mode 100644 index 0000000..dd914b0 --- /dev/null +++ b/include/vec.hh @@ -0,0 +1,144 @@ +#pragma once +/*******************************************************************************\ + vec.hh - This file is part of MUSIC2 - + a code to generate initial conditions for cosmological simulations + + CHANGELOG (only majors, for details see repo): + 06/2019 - Oliver Hahn - first implementation +\*******************************************************************************/ + +#include + +//! implements general N-dim vectors of arbitrary primtive type with some arithmetic ops +template +struct vec_t +{ + std::array data_; + + vec_t() {} + + vec_t(const vec_t &v) + : data_(v.data_) {} + + vec_t(vec_t &&v) + : data_(std::move(v.data_)) {} + + template + vec_t(E... e) + : data_{{std::forward(e)...}} + { + static_assert(sizeof...(E) == N, "Brace-enclosed initialiser list doesn't match vec_t length!"); + } + + //! bracket index access to vector components + T &operator[](size_t i) noexcept { return data_[i]; } + + //! const bracket index access to vector components + const T &operator[](size_t i) const noexcept { return data_[i]; } + + // assignment operator + vec_t &operator=(const vec_t &v) noexcept + { + data_ = v.data_; + return *this; + } + + //! implementation of summation of vec_t + vec_t operator+(const vec_t &v) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = data_[i] + v[i]; + return res; + } + + //! implementation of difference of vec_t + vec_t operator-(const vec_t &v) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = data_[i] - v[i]; + return res; + } + + //! implementation of unary negative + vec_t operator-() const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = -data_[i]; + return res; + } + + //! implementation of scalar multiplication + template + vec_t operator*(T2 s) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = data_[i] * s; + return res; + } + + //! implementation of scalar division + vec_t operator/(T s) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = data_[i] / s; + return res; + } + + //! takes the absolute value of each element + vec_t abs(void) const noexcept + { + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = std::abs(data_[i]); + return res; + } + + //! implementation of implicit summation of vec_t + vec_t &operator+=(const vec_t &v) noexcept + { + for (int i = 0; i < N; ++i) + data_[i] += v[i]; + return *this; + } + + //! implementation of implicit subtraction of vec_t + vec_t &operator-=(const vec_t &v) noexcept + { + for (int i = 0; i < N; ++i) + data_[i] -= v[i]; + return *this; + } + + //! implementation of implicit scalar multiplication of vec_t + vec_t &operator*=(T s) noexcept + { + for (int i = 0; i < N; ++i) + data_[i] *= s; + return *this; + } + + //! implementation of implicit scalar division of vec_t + vec_t &operator/=(T s) noexcept + { + for (int i = 0; i < N; ++i) + data_[i] /= s; + return *this; + } + + size_t size(void) const noexcept { return N; } +}; + +//! multiplication with scalar +template +inline vec_t operator*(T2 s, const vec_t &v) +{ + vec_t res; + for (int i = 0; i < N; ++i) + res[i] = v[i] * s; + return res; +} diff --git a/include/vec3.hh b/include/vec3.hh deleted file mode 100644 index 9295722..0000000 --- a/include/vec3.hh +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -template< typename T > -class vec3{ -private: - std::array data_; - T &x,&y,&z; -public: - vec3() - : x(data_[0]),y(data_[1]),z(data_[2]){} - - vec3( const vec3 &v) - : data_(v.data_), x(data_[0]),y(data_[1]),z(data_[2]){} - - vec3( std::array&& d ) - : data_(std::move(d)), x(data_[0]),y(data_[1]),z(data_[2]){} - - vec3( vec3 &&v) - : data_(std::move(v.data_)), x(data_[0]),y(data_[1]),z(data_[2]){} - - T &operator[](size_t i){ return data_[i];} - - const T &operator[](size_t i) const { return data_[i]; } - - T dot(const vec3 &a) const - { - return data_[0] * a.data_[0] + data_[1] * a.data_[1] + data_[2] * a.data_[2]; - } - - T norm_squared(void) const - { - return this->dot(*this); - } - - T norm(void) const - { - return std::sqrt( this->norm_squared() ); - } - - -}; diff --git a/new/FindFFTW3.cmake b/new/FindFFTW3.cmake deleted file mode 100644 index 80aa67b..0000000 --- a/new/FindFFTW3.cmake +++ /dev/null @@ -1,232 +0,0 @@ -# - Try to find FFTW -# -# By default, it will look only for the serial libraries with single, double, -# and long double precision. Any combination of precision (SINGLE, DOUBLE, -# LONGDOUBLE) and library type (SERIAL, [THREADS|OPENMP], MPI) is possible by -# using the COMPONENTS keyword. For example, -# -# find_package(FFTW3 COMPONENTS SINGLE DOUBLE OPENMP MPI) -# -# Once done this will define -# FFTW3_FOUND - System has FFTW3 -# FFTW3_INCLUDE_DIRS - The FFTW3 include directories -# FFTW3_LIBRARIES - The libraries needed to use FFTW3 -# FFTW3_DEFINITIONS - Compiler switches required for using FFTW3 -# FFTW3_$KIND_$PARALLEL_FOUND- Set if FFTW3 exists in KIND precision format for PARALLEL mode. -# where KIND can be: SINGLE, DOUBLE, LONGDOUBLE -# and PARALLEL: SERIAL, OPENMP, MPI, THREADS. -# FFTW3_$KIND_$PARALLEL_LIBRARY - The libraries needed to use. -# FFTW3_INCLUDE_DIR_PARALLEL - The FFTW3 include directories for parallels mode. - -cmake_policy(SET CMP0054 NEW) - -if(FFTW3_FOUND) - return() -endif() - -if(FFTW3_INCLUDE_DIR AND FFTW3_LIBRARIES) - set(FFTW3_FOUND TRUE) - foreach(component ${FFTW3_FIND_COMPONENTS}) - if("${FFTW3_${component}_LIBRARY}" STREQUAL "") - set(FFTW3_${component}_LIBRARY "${FFTW3_LIBRARIES}") - endif() - endforeach() - return() -endif() - -macro(find_specific_libraries KIND PARALLEL) - list(APPEND FFTW3_FIND_COMPONENTS ${KIND}_${PARALLEL}) - if(NOT (${PARALLEL} STREQUAL "SERIAL") AND NOT ${PARALLEL}_FOUND) - message(FATAL_ERROR "Please, find ${PARALLEL} libraries before FFTW") - endif() - - find_library(FFTW3_${KIND}_${PARALLEL}_LIBRARY NAMES - fftw3${SUFFIX_${KIND}}${SUFFIX_${PARALLEL}}${SUFFIX_FINAL} HINTS ${HINT_DIRS}) - if(FFTW3_${KIND}_${PARALLEL}_LIBRARY MATCHES fftw3) - list(APPEND FFTW3_LIBRARIES ${FFTW3_${KIND}_${PARALLEL}_LIBRARY}) - set(FFTW3_${KIND}_${PARALLEL}_FOUND TRUE) - - STRING(TOLOWER "${KIND}" kind) - STRING(TOLOWER "${PARALLEL}" parallel) - if(FFTW3_${kind}_${parallel}_LIBRARY MATCHES "\\.a$") - add_library(fftw3::${kind}::${parallel} STATIC IMPORTED GLOBAL) - else() - add_library(fftw3::${kind}::${parallel} SHARED IMPORTED GLOBAL) - endif() - - # MPI Has a different included library than the others - # FFTW3_INCLUDE_DIR_PARALLEL will change depending of which on is used. - set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_INCLUDE_DIR} ) - if(PARALLEL STREQUAL "MPI") - set(FFTW3_INCLUDE_DIR_PARALLEL ${FFTW3_${PARALLEL}_INCLUDE_DIR}) - endif() - - set_target_properties(fftw3::${kind}::${parallel} PROPERTIES - IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}") - - # adding target properties to the different cases - ## MPI - if(PARALLEL STREQUAL "MPI") - if(MPI_C_LIBRARIES) - set_target_properties(fftw3::${kind}::mpi PROPERTIES - IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}" - IMPORTED_LINK_INTERFACE_LIBRARIES ${MPI_C_LIBRARIES}) - endif() - endif() - ## OpenMP - if(PARALLEL STREQUAL "OPENMP") - if(OPENMP_C_FLAGS) - set_target_properties(fftw3::${kind}::${parallel} PROPERTIES - IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}" - INTERFACE_COMPILE_OPTIONS "${OPENMP_C_FLAGS}") - endif() - endif() - ## THREADS - if(PARALLEL STREQUAL "THREADS") - if(CMAKE_THREAD_LIBS_INIT) # TODO: this is not running - set_target_properties(fftw3::${kind}::${parallel} PROPERTIES - IMPORTED_LOCATION "${FFTW3_${KIND}_${PARALLEL}_LIBRARY}" - INTERFACE_INCLUDE_DIRECTORIES "${FFTW3_INCLUDE_DIR_PARALLEL}" - INTERFACE_COMPILE_OPTIONS "${CMAKE_THREAD_LIBS_INIT}") - endif() - endif() - endif() -endmacro() - - - - -if(NOT FFTW3_FIND_COMPONENTS) - set(FFTW3_FIND_COMPONENTS SINGLE DOUBLE LONGDOUBLE SERIAL) -endif() - -string(TOUPPER "${FFTW3_FIND_COMPONENTS}" FFTW3_FIND_COMPONENTS) - -list(FIND FFTW3_FIND_COMPONENTS SINGLE LOOK_FOR_SINGLE) -list(FIND FFTW3_FIND_COMPONENTS DOUBLE LOOK_FOR_DOUBLE) -list(FIND FFTW3_FIND_COMPONENTS LONGDOUBLE LOOK_FOR_LONGDOUBLE) -list(FIND FFTW3_FIND_COMPONENTS THREADS LOOK_FOR_THREADS) -list(FIND FFTW3_FIND_COMPONENTS OPENMP LOOK_FOR_OPENMP) -list(FIND FFTW3_FIND_COMPONENTS MPI LOOK_FOR_MPI) -list(FIND FFTW3_FIND_COMPONENTS SERIAL LOOK_FOR_SERIAL) - -# FIXME - This may fail in computers wihtout serial -# Default serial to obtain version number -set(LOOK_FOR_SERIAL 1) - -# set serial as default if none parallel component has been set -if((LOOK_FOR_THREADS LESS 0) AND (LOOK_FOR_MPI LESS 0) AND - (LOOK_FOR_OPENMP LESS 0)) - set(LOOK_FOR_SERIAL 1) -endif() - -if(MPI_C_FOUND) - set(MPI_FOUND ${MPI_C_FOUND}) -endif() -unset(FFTW3_FIND_COMPONENTS) - - - - -if(WIN32) - set(HINT_DIRS ${FFTW3_DIRECTORY} $ENV{FFTW3_DIRECTORY}) -else() - find_package(PkgConfig) - if(PKG_CONFIG_FOUND) - pkg_check_modules(PC_FFTW QUIET fftw3) - set(FFTW3_DEFINITIONS ${PC_FFTW3_CFLAGS_OTHER}) - endif() - set(HINT_DIRS ${PC_FFTW3_INCLUDEDIR} ${PC_FFTW3_INCLUDE_DIRS} - ${FFTW3_INCLUDE_DIR} $ENV{FFTW3_INCLUDE_DIR} ) -endif() - -find_path(FFTW3_INCLUDE_DIR NAMES fftw3.h HINTS ${HINT_DIRS}) -if (LOOK_FOR_MPI) # Probably is going to be the same as fftw3.h - find_path(FFTW3_MPI_INCLUDE_DIR NAMES fftw3-mpi.h HINTS ${HINT_DIRS}) -endif() - -function(find_version OUTVAR LIBRARY SUFFIX) - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c - # TODO: do we need to add include for mpi headers? - "#include - #include - int main(int nargs, char const *argv[]) { - printf(\"%s\", fftw${SUFFIX}_version); - return 0; - }" - ) -if(NOT CMAKE_CROSSCOMPILING) - try_run(RUN_RESULT COMPILE_RESULT - "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/" - "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/fftw${SUFFIX}/main.c" - CMAKE_FLAGS - -DLINK_LIBRARIES=${LIBRARY} - -DINCLUDE_DIRECTORIES=${FFTW3_INCLUDE_DIR} - RUN_OUTPUT_VARIABLE OUTPUT - COMPILE_OUTPUT_VARIABLE COUTPUT - ) - endif() - if(RUN_RESULT EQUAL 0) - string(REGEX REPLACE - ".*([0-9]+\\.[0-9]+\\.[0-9]+).*" - "\\1" VERSION_STRING "${OUTPUT}" - ) - set(${OUTVAR} ${VERSION_STRING} PARENT_SCOPE) - endif() -endfunction() - -set(SUFFIX_DOUBLE "") -set(SUFFIX_SINGLE "f") -set(SUFFIX_LONGDOUBLE "l") -set(SUFFIX_SERIAL "") -set(SUFFIX_OPENMP "_omp") -set(SUFFIX_MPI "_mpi") -set(SUFFIX_THREADS "_threads") -set(SUFFIX_FINAL "") - -if(WIN32) - set(SUFFIX_FINAL "-3") -else() - set(HINT_DIRS ${PC_FFTW3_LIBDIR} ${PC_FFTW3_LIBRARY_DIRS} - $ENV{FFTW3_LIBRARY_DIR} ${FFTW3_LIBRARY_DIR} ) -endif(WIN32) - -unset(FFTW3_LIBRARIES) -set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR} ) # TODO what's for? -set(FFTW3_FLAGS_C "") -foreach(KIND SINGLE DOUBLE LONGDOUBLE) - if(LOOK_FOR_${KIND} LESS 0) - continue() - endif() - foreach(PARALLEL SERIAL MPI OPENMP THREADS) - if(LOOK_FOR_${PARALLEL} LESS 0) - continue() - endif() - find_specific_libraries(${KIND} ${PARALLEL}) - endforeach() -endforeach() - -if(FFTW3_INCLUDE_DIR) - list(GET FFTW3_FIND_COMPONENTS 0 smallerrun) - string(REPLACE "_" ";" RUNLIST ${smallerrun}) - list(GET RUNLIST 0 KIND) - list(GET RUNLIST 1 PARALLEL) - unset(smallerrun) - unset(RUNLIST) - # suffix is quoted so it pass empty in the case of double as it's empty - find_version(FFTW3_VERSION_STRING ${FFTW3_${KIND}_${PARALLEL}_LIBRARY} - "${SUFFIX_${KIND}}") -endif() - -# FIXME: fails if use REQUIRED. -include(FindPackageHandleStandardArgs) -# handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE -# if all listed variables are TRUE -find_package_handle_standard_args(FFTW3 - REQUIRED_VARS FFTW3_LIBRARIES FFTW3_INCLUDE_DIR - VERSION_VAR FFTW3_VERSION_STRING - HANDLE_COMPONENTS -) diff --git a/src/grid_fft.cc b/src/grid_fft.cc index d5f103a..eeba708 100644 --- a/src/grid_fft.cc +++ b/src/grid_fft.cc @@ -2,192 +2,173 @@ #include #include -#include -#include - -template -void Grid_FFT::FillRandomReal(unsigned long int seed) +template +void Grid_FFT::Setup(void) { - gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937); -#if defined(USE_MPI) - seed += 17321 * CONFIG::MPI_task_rank; -#endif - gsl_rng_set(RNG, seed); - - for (size_t i = 0; i < sizes_[0]; ++i) + if (!bdistributed) { - for (size_t j = 0; j < sizes_[1]; ++j) + ntot_ = (n_[2] + 2) * n_[1] * n_[0]; + + music::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); + if (typeid(data_t) == typeid(real_t)) { - for (size_t k = 0; k < sizes_[2]; ++k) - { - this->relem(i, j, k) = gsl_ran_ugaussian_ratio_method(RNG); - } + data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(real_t))); + cdata_ = reinterpret_cast(data_); + + plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE); + iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE); + } + else if (typeid(data_t) == typeid(ccomplex_t)) + { + data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(ccomplex_t))); + cdata_ = reinterpret_cast(data_); + + plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE); + iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE); + } + else + { + music::elog.Print("invalid data type in Grid_FFT::setup_fft_interface\n"); + } + + fft_norm_fac_ = 1.0 / std::sqrt((real_t)((size_t)n_[0] * (real_t)n_[1] * (real_t)n_[2])); + + if (typeid(data_t) == typeid(real_t)) + { + npr_ = n_[2] + 2; + npc_ = n_[2] / 2 + 1; + } + else + { + npr_ = n_[2]; + npc_ = n_[2]; + } + + for (int i = 0; i < 3; ++i) + { + nhalf_[i] = n_[i] / 2; + kfac_[i] = 2.0 * M_PI / length_[i]; + kny_[i] = kfac_[i] * n_[i]/2; + dx_[i] = length_[i] / n_[i]; + + global_range_.x1_[i] = 0; + global_range_.x2_[i] = n_[i]; + } + + local_0_size_ = n_[0]; + local_1_size_ = n_[1]; + local_0_start_ = 0; + local_1_start_ = 0; + + if (space_ == rspace_id) + { + sizes_[0] = n_[0]; + sizes_[1] = n_[1]; + sizes_[2] = n_[2]; + sizes_[3] = npr_; + } + else + { + sizes_[0] = n_[1]; + sizes_[1] = n_[0]; + sizes_[2] = npc_; + sizes_[3] = npc_; } } - - gsl_rng_free(RNG); -} - -template -void Grid_FFT::Setup(void) -{ -#if !defined(USE_MPI) //////////////////////////////////////////////////////////////////////////////////////////// - - ntot_ = (n_[2] + 2) * n_[1] * n_[0]; - - csoca::dlog.Print("[FFT] Setting up a shared memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); - if (typeid(data_t) == typeid(real_t)) - { - data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(real_t))); - cdata_ = reinterpret_cast(data_); - - plan_ = FFTW_API(plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, FFTW_RUNMODE); - iplan_ = FFTW_API(plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, FFTW_RUNMODE); - } - else if (typeid(data_t) == typeid(ccomplex_t)) - { - data_ = reinterpret_cast(fftw_malloc(ntot_ * sizeof(ccomplex_t))); - cdata_ = reinterpret_cast(data_); - - plan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_FORWARD, FFTW_RUNMODE); - iplan_ = FFTW_API(plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, FFTW_BACKWARD, FFTW_RUNMODE); - } else { - csoca::elog.Print("invalid data type in Grid_FFT::setup_fft_interface\n"); - } +#ifdef USE_MPI //// i.e. ifdef USE_MPI //////////////////////////////////////////////////////////////////////////////////// + size_t cmplxsz; - fft_norm_fac_ = 1.0 / std::sqrt((double)((size_t)n_[0] * (double)n_[1] * (double)n_[2])); + if (typeid(data_t) == typeid(real_t)) + { + cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD, + &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); + ntot_ = 2 * cmplxsz; + data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t)); + cdata_ = reinterpret_cast(data_); + plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, + MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); + iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, + MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); + } + else if (typeid(data_t) == typeid(ccomplex_t)) + { + cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD, + &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); + ntot_ = cmplxsz; + data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t)); + cdata_ = reinterpret_cast(data_); + plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, + MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); + iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, + MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); + } + else + { + music::elog.Print("unknown data type in Grid_FFT::setup_fft_interface\n"); + abort(); + } - if (typeid(data_t) == typeid(real_t)) - { - npr_ = n_[2] + 2; - npc_ = n_[2] / 2 + 1; - } - else - { - npr_ = n_[2]; - npc_ = n_[2]; - } + music::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); - for (int i = 0; i < 3; ++i) - { - nhalf_[i] = n_[i] / 2; - kfac_[i] = 2.0 * M_PI / length_[i]; - dx_[i] = length_[i] / n_[i]; + fft_norm_fac_ = 1.0 / sqrt((real_t)n_[0] * (real_t)n_[1] * (real_t)n_[2]); - global_range_.x1_[i] = 0; - global_range_.x2_[i] = n_[i]; - } + if (typeid(data_t) == typeid(real_t)) + { + npr_ = n_[2] + 2; + npc_ = n_[2] / 2 + 1; + } + else + { + npr_ = n_[2]; + npc_ = n_[2]; + } - local_0_size_ = n_[0]; - local_1_size_ = n_[1]; - local_0_start_ = 0; - local_1_start_ = 0; + for (int i = 0; i < 3; ++i) + { + nhalf_[i] = n_[i] / 2; + kfac_[i] = 2.0 * M_PI / length_[i]; + kny_[i] = kfac_[i] * n_[i]/2; + dx_[i] = length_[i] / n_[i]; - if (space_ == rspace_id) - { - sizes_[0] = n_[0]; - sizes_[1] = n_[1]; - sizes_[2] = n_[2]; - sizes_[3] = npr_; - } - else - { - sizes_[0] = n_[1]; - sizes_[1] = n_[0]; - sizes_[2] = npc_; - sizes_[3] = npc_; - } - -#else //// i.e. ifdef USE_MPI //////////////////////////////////////////////////////////////////////////////////// - - size_t cmplxsz; - - if (typeid(data_t) == typeid(real_t)) - { - cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2] / 2 + 1, MPI_COMM_WORLD, - &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); - ntot_ = 2 * cmplxsz; - data_ = (data_t *)fftw_malloc(ntot_ * sizeof(real_t)); - cdata_ = reinterpret_cast(data_); - plan_ = FFTW_API(mpi_plan_dft_r2c_3d)(n_[0], n_[1], n_[2], (real_t *)data_, (complex_t *)data_, - MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); - iplan_ = FFTW_API(mpi_plan_dft_c2r_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (real_t *)data_, - MPI_COMM_WORLD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); - } - else if (typeid(data_t) == typeid(ccomplex_t)) - { - cmplxsz = FFTW_API(mpi_local_size_3d_transposed)(n_[0], n_[1], n_[2], MPI_COMM_WORLD, - &local_0_size_, &local_0_start_, &local_1_size_, &local_1_start_); - ntot_ = cmplxsz; - data_ = (data_t *)fftw_malloc(ntot_ * sizeof(ccomplex_t)); - cdata_ = reinterpret_cast(data_); - plan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, - MPI_COMM_WORLD, FFTW_FORWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_OUT); - iplan_ = FFTW_API(mpi_plan_dft_3d)(n_[0], n_[1], n_[2], (complex_t *)data_, (complex_t *)data_, - MPI_COMM_WORLD, FFTW_BACKWARD, FFTW_RUNMODE | FFTW_MPI_TRANSPOSED_IN); - } - else - { - csoca::elog.Print("unknown data type in Grid_FFT::setup_fft_interface\n"); - abort(); - } - - csoca::dlog.Print("[FFT] Setting up a distributed memory field %lux%lux%lu\n", n_[0], n_[1], n_[2]); - fft_norm_fac_ = 1.0 / sqrt((double)n_[0] * (double)n_[1] * (double)n_[2]); - - if (typeid(data_t) == typeid(real_t)) - { - npr_ = n_[2] + 2; - npc_ = n_[2] / 2 + 1; - } - else - { - npr_ = n_[2]; - npc_ = n_[2]; - } - - for (int i = 0; i < 3; ++i) - { - nhalf_[i] = n_[i] / 2; - kfac_[i] = 2.0 * M_PI / length_[i]; - dx_[i] = length_[i] / n_[i]; - - global_range_.x1_[i] = 0; - global_range_.x2_[i] = n_[i]; - } - global_range_.x1_[0] = (int)local_0_start_; - global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_); - - if (space_ == rspace_id) - { - sizes_[0] = (int)local_0_size_; - sizes_[1] = n_[1]; - sizes_[2] = n_[2]; - sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension - } - else - { - sizes_[0] = (int)local_1_size_; - sizes_[1] = n_[0]; - sizes_[2] = npc_; - sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension - } + global_range_.x1_[i] = 0; + global_range_.x2_[i] = n_[i]; + } + global_range_.x1_[0] = (int)local_0_start_; + global_range_.x2_[0] = (int)(local_0_start_ + local_0_size_); + if (space_ == rspace_id) + { + sizes_[0] = (int)local_0_size_; + sizes_[1] = n_[1]; + sizes_[2] = n_[2]; + sizes_[3] = npr_; // holds the physical memory size along the 3rd dimension + } + else + { + sizes_[0] = (int)local_1_size_; + sizes_[1] = n_[0]; + sizes_[2] = npc_; + sizes_[3] = npc_; // holds the physical memory size along the 3rd dimension + } +#else + music::flog << "MPI is required for distributed FFT arrays!" << std::endl; + throw std::runtime_error("MPI is required for distributed FFT arrays!"); #endif //// of #ifdef #else USE_MPI //////////////////////////////////////////////////////////////////////////////////// + } } -template -void Grid_FFT::ApplyNorm(void) +template +void Grid_FFT::ApplyNorm(void) { #pragma omp parallel for for (size_t i = 0; i < ntot_; ++i) data_[i] *= fft_norm_fac_; } -template -void Grid_FFT::FourierTransformForward(bool do_transform) +template +void Grid_FFT::FourierTransformForward(bool do_transform) { #if defined(USE_MPI) MPI_Barrier(MPI_COMM_WORLD); @@ -199,12 +180,13 @@ void Grid_FFT::FourierTransformForward(bool do_transform) if (do_transform) { double wtime = get_wtime(); - csoca::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]); - FFTW_API(execute)(plan_); + music::dlog.Print("[FFT] Calling Grid_FFT::to_kspace (%lux%lux%lu)", sizes_[0], sizes_[1], sizes_[2]); + FFTW_API(execute) + (plan_); this->ApplyNorm(); wtime = get_wtime() - wtime; - csoca::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime); + music::dlog.Print("[FFT] Completed Grid_FFT::to_kspace (%lux%lux%lu), took %f s", sizes_[0], sizes_[1], sizes_[2], wtime); } sizes_[0] = local_1_size_; @@ -217,8 +199,8 @@ void Grid_FFT::FourierTransformForward(bool do_transform) } } -template -void Grid_FFT::FourierTransformBackward(bool do_transform) +template +void Grid_FFT::FourierTransformBackward(bool do_transform) { #if defined(USE_MPI) MPI_Barrier(MPI_COMM_WORLD); @@ -229,14 +211,14 @@ void Grid_FFT::FourierTransformBackward(bool do_transform) //............................. if (do_transform) { - csoca::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]); + music::dlog.Print("[FFT] Calling Grid_FFT::to_rspace (%dx%dx%d)\n", sizes_[0], sizes_[1], sizes_[2]); double wtime = get_wtime(); FFTW_API(execute)(iplan_); this->ApplyNorm(); wtime = get_wtime() - wtime; - csoca::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime); + music::dlog.Print("[FFT] Completed Grid_FFT::to_rspace (%dx%dx%d), took %f s\n", sizes_[0], sizes_[1], sizes_[2], wtime); } sizes_[0] = local_0_size_; sizes_[1] = n_[1]; @@ -269,9 +251,293 @@ void create_hdf5(std::string Filename) H5Fclose(HDF_FileID); } -template -void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) const +template +hid_t hdf5_get_data_type(void) { + if (typeid(T) == typeid(int)) + return H5T_NATIVE_INT; + + if (typeid(T) == typeid(unsigned)) + return H5T_NATIVE_UINT; + + if (typeid(T) == typeid(float)) + return H5T_NATIVE_FLOAT; + + if (typeid(T) == typeid(double)) + return H5T_NATIVE_DOUBLE; + + if (typeid(T) == typeid(long double)) + return H5T_NATIVE_LDOUBLE; + + if (typeid(T) == typeid(long long)) + return H5T_NATIVE_LLONG; + + if (typeid(T) == typeid(unsigned long long)) + return H5T_NATIVE_ULLONG; + + if (typeid(T) == typeid(size_t)) + return H5T_NATIVE_ULLONG; + + music::elog << "[HDF_IO] trying to evaluate unsupported type in GetDataType"; + return -1; +} + +template +void Grid_FFT::Read_from_HDF5(const std::string Filename, const std::string ObjName) +{ + if (bdistributed) + { + music::elog << "Attempt to read from HDF5 into MPI-distributed array. This is not supported yet!" << std::endl; + abort(); + } + + hid_t HDF_Type = hdf5_get_data_type(); + + hid_t HDF_FileID = H5Fopen(Filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + + //... save old error handler + herr_t (*old_func)(void *); + void *old_client_data; + + H5Eget_auto(&old_func, &old_client_data); + + //... turn off error handling by hdf5 library + H5Eset_auto(NULL, NULL); + + //... probe dataset opening + hid_t HDF_DatasetID = H5Dopen(HDF_FileID, ObjName.c_str()); + + //... restore previous error handler + H5Eset_auto(old_func, old_client_data); + + //... dataset did not exist or was empty + if (HDF_DatasetID < 0) + { + music::elog << "Dataset \'" << ObjName.c_str() << "\' does not exist or is empty." << std::endl; + H5Fclose(HDF_FileID); + abort(); + } + + //... get space associated with dataset and its extensions + hid_t HDF_DataspaceID = H5Dget_space(HDF_DatasetID); + + int ndims = H5Sget_simple_extent_ndims(HDF_DataspaceID); + + hsize_t dimsize[3]; + + H5Sget_simple_extent_dims(HDF_DataspaceID, dimsize, NULL); + + hsize_t HDF_StorageSize = 1; + for (int i = 0; i < ndims; ++i) + HDF_StorageSize *= dimsize[i]; + + //... adjust the array size to hold the data + std::vector Data; + Data.reserve(HDF_StorageSize); + Data.assign(HDF_StorageSize, (data_t)0); + + if (Data.capacity() < HDF_StorageSize) + { + music::elog << "Not enough memory to store all data in HDFReadDataset!" << std::endl; + H5Sclose(HDF_DataspaceID); + H5Dclose(HDF_DatasetID); + H5Fclose(HDF_FileID); + abort(); + } + + //... read the dataset + H5Dread(HDF_DatasetID, HDF_Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, &Data[0]); + + if (Data.size() != HDF_StorageSize) + { + music::elog << "Something went wrong while reading!" << std::endl; + H5Sclose(HDF_DataspaceID); + H5Dclose(HDF_DatasetID); + H5Fclose(HDF_FileID); + abort(); + } + + H5Sclose(HDF_DataspaceID); + H5Dclose(HDF_DatasetID); + H5Fclose(HDF_FileID); + + assert(dimsize[0] == dimsize[1] && dimsize[0] == dimsize[2]); + music::ilog << "Read external constraint data of dimensions " << dimsize[0] << "**3." << std::endl; + + for (size_t i = 0; i < 3; ++i) + this->n_[i] = dimsize[i]; + this->space_ = rspace_id; + + if (data_ != nullptr) + { + fftw_free(data_); + } + this->Setup(); + + //... copy data to internal array ... + real_t sum1{0.0}, sum2{0.0}; + #pragma omp parallel for reduction(+ : sum1, sum2) + for (size_t i = 0; i < size(0); ++i) + { + for (size_t j = 0; j < size(1); ++j) + { + for (size_t k = 0; k < size(2); ++k) + { + this->relem(i, j, k) = Data[(i * size(1) + j) * size(2) + k]; + sum2 += std::real(this->relem(i, j, k) * this->relem(i, j, k)); + sum1 += std::real(this->relem(i, j, k)); + } + } + } + sum1 /= Data.size(); + sum2 /= Data.size(); + auto stdw = std::sqrt(sum2 - sum1 * sum1); + music::ilog << "Constraint field has =" << sum1 << ", -^2=" << stdw << std::endl; + + #pragma omp parallel for reduction(+ : sum1, sum2) + for (size_t i = 0; i < size(0); ++i) + { + for (size_t j = 0; j < size(1); ++j) + { + for (size_t k = 0; k < size(2); ++k) + { + this->relem(i, j, k) /= stdw; + } + } + } +} + +template +void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) const +{ + // FIXME: cleanup duplicate code in this function! + if (!bdistributed && CONFIG::MPI_task_rank == 0) + { + + hid_t file_id, dset_id; /* file and dataset identifiers */ + hid_t filespace, memspace; /* file and memory dataspace identifiers */ + hsize_t offset[3], count[3]; + hid_t dtype_id = H5T_NATIVE_FLOAT; + hid_t plist_id = H5P_DEFAULT; + + if (!file_exists(fname)) + create_hdf5(fname); + + file_id = H5Fopen(fname.c_str(), H5F_ACC_RDWR, plist_id); + + for (int i = 0; i < 3; ++i) + count[i] = size(i); + + if (typeid(data_t) == typeid(float)) + dtype_id = H5T_NATIVE_FLOAT; + else if (typeid(data_t) == typeid(double)) + dtype_id = H5T_NATIVE_DOUBLE; + else if (typeid(data_t) == typeid(long double)) + dtype_id = H5T_NATIVE_LDOUBLE; + else if (typeid(data_t) == typeid(std::complex)) + dtype_id = H5T_NATIVE_FLOAT; + else if (typeid(data_t) == typeid(std::complex)) + dtype_id = H5T_NATIVE_DOUBLE; + else if (typeid(data_t) == typeid(std::complex)) + dtype_id = H5T_NATIVE_LDOUBLE; + + filespace = H5Screate_simple(3, count, NULL); + dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Sclose(filespace); + + hsize_t slice_sz = size(1) * size(2); + + real_t *buf = new real_t[slice_sz]; + + count[0] = 1; + count[1] = size(1); + count[2] = size(2); + + offset[1] = 0; + offset[2] = 0; + + memspace = H5Screate_simple(3, count, NULL); + filespace = H5Dget_space(dset_id); + + for (size_t i = 0; i < size(0); ++i) + { + offset[0] = i; + for (size_t j = 0; j < size(1); ++j) + { + for (size_t k = 0; k < size(2); ++k) + { + if (this->space_ == rspace_id) + buf[j * size(2) + k] = std::real(relem(i, j, k)); + else + buf[j * size(2) + k] = std::real(kelem(i, j, k)); + } + } + + H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, NULL); + H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf); + } + + H5Sclose(filespace); + H5Sclose(memspace); + + // H5Sclose(filespace); + H5Dclose(dset_id); + + if (typeid(data_t) == typeid(std::complex) || + typeid(data_t) == typeid(std::complex) || + typeid(data_t) == typeid(std::complex) || + this->space_ == kspace_id) + { + datasetname += std::string(".im"); + + for (int i = 0; i < 3; ++i) + count[i] = size(i); + + filespace = H5Screate_simple(3, count, NULL); + dset_id = H5Dcreate2(file_id, datasetname.c_str(), dtype_id, filespace, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + H5Sclose(filespace); + + count[0] = 1; + + for (size_t i = 0; i < size(0); ++i) + { + offset[0] = i; + + for (size_t j = 0; j < size(1); ++j) + for (size_t k = 0; k < size(2); ++k) + { + if (this->space_ == rspace_id) + buf[j * size(2) + k] = std::imag(relem(i, j, k)); + else + buf[j * size(2) + k] = std::imag(kelem(i, j, k)); + } + + memspace = H5Screate_simple(3, count, NULL); + filespace = H5Dget_space(dset_id); + + H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, NULL, count, + NULL); + + H5Dwrite(dset_id, dtype_id, memspace, filespace, H5P_DEFAULT, buf); + + H5Sclose(memspace); + H5Sclose(filespace); + } + + H5Dclose(dset_id); + + delete[] buf; + } + + H5Fclose(file_id); + return; + } + + if (!bdistributed && CONFIG::MPI_task_rank != 0) + return; + hid_t file_id, dset_id; /* file and dataset identifiers */ hid_t filespace, memspace; /* file and memory dataspace identifiers */ hsize_t offset[3], count[3]; @@ -282,8 +548,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) int mpi_size, mpi_rank; - mpi_size = MPI_Get_size(); - mpi_rank = MPI_Get_rank(); + mpi_size = MPI::get_size(); + mpi_rank = MPI::get_rank(); if (!file_exists(fname) && mpi_rank == 0) create_hdf5(fname); @@ -329,14 +595,14 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) dtype_id = H5T_NATIVE_FLOAT; else if (typeid(data_t) == typeid(double)) dtype_id = H5T_NATIVE_DOUBLE; + else if (typeid(data_t) == typeid(long double)) + dtype_id = H5T_NATIVE_LDOUBLE; else if (typeid(data_t) == typeid(std::complex)) - { dtype_id = H5T_NATIVE_FLOAT; - } else if (typeid(data_t) == typeid(std::complex)) - { dtype_id = H5T_NATIVE_DOUBLE; - } + else if (typeid(data_t) == typeid(std::complex)) + dtype_id = H5T_NATIVE_LDOUBLE; #if defined(USE_MPI) && !defined(USE_MPI_IO) if (itask == 0) @@ -391,7 +657,10 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) { for (size_t k = 0; k < size(2); ++k) { - buf[j * size(2) + k] = std::real(relem(i, j, k)); + if (this->space_ == rspace_id) + buf[j * size(2) + k] = std::real(relem(i, j, k)); + else + buf[j * size(2) + k] = std::real(kelem(i, j, k)); } } @@ -410,7 +679,9 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) H5Dclose(dset_id); if (typeid(data_t) == typeid(std::complex) || - typeid(data_t) == typeid(std::complex)) + typeid(data_t) == typeid(std::complex) || + typeid(data_t) == typeid(std::complex) || + this->space_ == kspace_id) { datasetname += std::string(".im"); @@ -460,7 +731,10 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) for (size_t j = 0; j < size(1); ++j) for (size_t k = 0; k < size(2); ++k) { - buf[j * size(2) + k] = std::imag(relem(i, j, k)); + if (this->space_ == rspace_id) + buf[j * size(2) + k] = std::imag(relem(i, j, k)); + else + buf[j * size(2) + k] = std::imag(kelem(i, j, k)); } memspace = H5Screate_simple(3, count, NULL); @@ -493,8 +767,8 @@ void Grid_FFT::Write_to_HDF5(std::string fname, std::string datasetname) #include -template -void Grid_FFT::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax) +template +void Grid_FFT::Write_PDF(std::string ofname, int nbins, double scale, double vmin, double vmax) { double logvmin = std::log10(vmin); double logvmax = std::log10(vmax); @@ -545,13 +819,12 @@ void Grid_FFT::Write_PDF(std::string ofname, int nbins, double scale, do #endif } -template -void Grid_FFT::Write_PowerSpectrum(std::string ofname) +template +void Grid_FFT::Write_PowerSpectrum(std::string ofname) { std::vector bin_k, bin_P, bin_eP; std::vector bin_count; - int nbins = 4 * std::max(nhalf_[0], std::max(nhalf_[1], nhalf_[2])); - this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count ); + this->Compute_PowerSpectrum(bin_k, bin_P, bin_eP, bin_count); #if defined(USE_MPI) if (CONFIG::MPI_task_rank == 0) { @@ -576,8 +849,8 @@ void Grid_FFT::Write_PowerSpectrum(std::string ofname) #endif } -template -void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count ) +template +void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::vector &bin_P, std::vector &bin_eP, std::vector &bin_count) { this->FourierTransformForward(); @@ -597,7 +870,7 @@ void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::ve for (size_t iy = 0; iy < size(1); iy++) for (size_t iz = 0; iz < size(2); iz++) { - vec3 k3 = get_k(ix, iy, iz); + vec3_t k3 = get_k(ix, iy, iz); double k = k3.norm(); int idx2 = k / dk; //int((1.0f / dklog * std::log10(k / kmin))); auto z = this->kelem(ix, iy, iz); @@ -657,5 +930,7 @@ void Grid_FFT::Compute_PowerSpectrum(std::vector &bin_k, std::ve /********************************************************************************************/ -template class Grid_FFT; -template class Grid_FFT; +template class Grid_FFT; +template class Grid_FFT; +template class Grid_FFT; +template class Grid_FFT; diff --git a/src/ic_generator.cc b/src/ic_generator.cc index ba0e209..f677551 100644 --- a/src/ic_generator.cc +++ b/src/ic_generator.cc @@ -7,6 +7,7 @@ #include #include +#include #include // for unlink @@ -21,18 +22,18 @@ namespace ic_generator{ std::unique_ptr the_random_number_generator; std::unique_ptr the_output_plugin; -std::unique_ptr the_cosmo_calc; +std::unique_ptr the_cosmo_calc; -int Initialise( ConfigFile& the_config ) +int Initialise( config_file& the_config ) { the_random_number_generator = std::move(select_RNG_plugin(the_config)); the_output_plugin = std::move(select_output_plugin(the_config)); - the_cosmo_calc = std::make_unique(the_config); + the_cosmo_calc = std::make_unique(the_config); return 0; } -int Run( ConfigFile& the_config ) +int Run( config_file& the_config ) { //-------------------------------------------------------------------------------------------------------- // Read run parameters @@ -40,56 +41,75 @@ int Run( ConfigFile& the_config ) //-------------------------------------------------------------------------------------------------------- //! number of resolution elements per dimension - const size_t ngrid = the_config.GetValue("setup", "GridRes"); + const size_t ngrid = the_config.get_value("setup", "GridRes"); //-------------------------------------------------------------------------------------------------------- //! box side length in h-1 Mpc - const real_t boxlen = the_config.GetValue("setup", "BoxLength"); + const real_t boxlen = the_config.get_value("setup", "BoxLength"); //-------------------------------------------------------------------------------------------------------- //! starting redshift - const real_t zstart = the_config.GetValue("setup", "zstart"); + const real_t zstart = the_config.get_value("setup", "zstart"); //-------------------------------------------------------------------------------------------------------- //! order of the LPT approximation - int LPTorder = the_config.GetValueSafe("setup","LPTorder",100); + int LPTorder = the_config.get_value_safe("setup","LPTorder",100); //-------------------------------------------------------------------------------------------------------- //! initialice particles on a bcc or fcc lattice instead of a standard sc lattice (doubles and quadruples the number of particles) - std::string lattice_str = the_config.GetValueSafe("setup","ParticleLoad","sc"); - const particle::lattice lattice_type = (lattice_str=="bcc")? particle::lattice_bcc - : ((lattice_str=="fcc")? particle::lattice_fcc : particle::lattice_sc); + std::string lattice_str = the_config.get_value_safe("setup","ParticleLoad","sc"); + const particle::lattice lattice_type = + ((lattice_str=="bcc")? particle::lattice_bcc + : ((lattice_str=="fcc")? particle::lattice_fcc + : ((lattice_str=="rsc")? particle::lattice_rsc + : ((lattice_str=="glass")? particle::lattice_glass + : particle::lattice_sc)))); //-------------------------------------------------------------------------------------------------------- //! apply fixing of the complex mode amplitude following Angulo & Pontzen (2016) [https://arxiv.org/abs/1603.05253] - const bool bDoFixing = the_config.GetValueSafe("setup", "DoFixing", false); + const bool bDoFixing = the_config.get_value_safe("setup", "DoFixing", false); //-------------------------------------------------------------------------------------------------------- //! do baryon ICs? - const bool bDoBaryons = the_config.GetValueSafe("setup", "DoBaryons", false ); + const bool bDoBaryons = the_config.get_value_safe("setup", "DoBaryons", false ); + std::map< cosmo_species, double > Omega; + if( bDoBaryons ){ + double Om = the_config.get_value("cosmology", "Omega_m"); + double Ob = the_config.get_value("cosmology", "Omega_b"); + Omega[cosmo_species::dm] = Om-Ob; + Omega[cosmo_species::baryon] = Ob; + }else{ + double Om = the_config.get_value("cosmology", "Omega_m"); + Omega[cosmo_species::dm] = Om; + Omega[cosmo_species::baryon] = 0.0; + } + + //-------------------------------------------------------------------------------------------------------- + //! do constrained ICs? + const bool bAddConstrainedModes = the_config.contains_key("setup", "ConstraintFieldFile" ); //-------------------------------------------------------------------------------------------------------- //! add beyond box tidal field modes following Schmidt et al. (2018) [https://arxiv.org/abs/1803.03274] - bool bAddExternalTides = the_config.ContainsKey("cosmology", "LSS_aniso_lx") - & the_config.ContainsKey("cosmology", "LSS_aniso_ly") - & the_config.ContainsKey("cosmology", "LSS_aniso_lz"); + bool bAddExternalTides = the_config.contains_key("cosmology", "LSS_aniso_lx") + & the_config.contains_key("cosmology", "LSS_aniso_ly") + & the_config.contains_key("cosmology", "LSS_aniso_lz"); - if( bAddExternalTides && !( the_config.ContainsKey("cosmology", "LSS_aniso_lx") - | the_config.ContainsKey("cosmology", "LSS_aniso_ly") - | the_config.ContainsKey("cosmology", "LSS_aniso_lz") )) + if( bAddExternalTides && !( the_config.contains_key("cosmology", "LSS_aniso_lx") + | the_config.contains_key("cosmology", "LSS_aniso_ly") + | the_config.contains_key("cosmology", "LSS_aniso_lz") )) { - csoca::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl; + music::elog << "Not all dimensions of LSS_aniso_l{x,y,z} specified! Will ignore external tidal field!" << std::endl; bAddExternalTides = false; } // Anisotropy parameters for beyond box tidal field std::array lss_aniso_lambda = { - the_config.GetValueSafe("cosmology", "LSS_aniso_lx", 0.0), - the_config.GetValueSafe("cosmology", "LSS_aniso_ly", 0.0), - the_config.GetValueSafe("cosmology", "LSS_aniso_lz", 0.0), + the_config.get_value_safe("cosmology", "LSS_aniso_lx", 0.0), + the_config.get_value_safe("cosmology", "LSS_aniso_ly", 0.0), + the_config.get_value_safe("cosmology", "LSS_aniso_lz", 0.0), }; if( std::abs(lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2]) > 1e-10 ){ - csoca::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl; + music::elog << "External tidal field is not trace-free! Will subtract trace!" << std::endl; auto tr_l_3 = (lss_aniso_lambda[0]+lss_aniso_lambda[1]+lss_aniso_lambda[2])/3.0; lss_aniso_lambda[0] -= tr_l_3; lss_aniso_lambda[1] -= tr_l_3; @@ -101,20 +121,20 @@ int Run( ConfigFile& the_config ) const real_t astart = 1.0/(1.0+zstart); const real_t volfac(std::pow(boxlen / ngrid / 2.0 / M_PI, 1.5)); - the_cosmo_calc->WritePowerspectrum(astart, "input_powerspec.txt" ); + the_cosmo_calc->write_powerspectrum(astart, "input_powerspec.txt" ); - //csoca::ilog << "-----------------------------------------------------------------------------" << std::endl; + //music::ilog << "-----------------------------------------------------------------------------" << std::endl; // if( bSymplecticPT && LPTorder!=2 ){ - // csoca::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl; + // music::wlog << "SymplecticPT has been selected and will overwrite chosen order of LPT to 2" << std::endl; // LPTorder = 2; // } //-------------------------------------------------------------------- // Compute LPT time coefficients //-------------------------------------------------------------------- - const real_t Dplus0 = the_cosmo_calc->CalcGrowthFactor(astart) / the_cosmo_calc->CalcGrowthFactor(1.0); - const real_t vfac = the_cosmo_calc->CalcVFact(astart); + const real_t Dplus0 = the_cosmo_calc->get_growth_factor(astart); + const real_t vfac = the_cosmo_calc->get_vfact(astart); const double g1 = -Dplus0; const double g2 = ((LPTorder>1)? -3.0/7.0*Dplus0*Dplus0 : 0.0); @@ -132,7 +152,7 @@ int Run( ConfigFile& the_config ) // coefficients needed for anisotropic external tides const double ai3 = std::pow(astart,-3); const double Omega_m_of_a = the_cosmo_calc->cosmo_param_.Omega_m * ai3 / (the_cosmo_calc->cosmo_param_.Omega_m * ai3 + the_cosmo_calc->cosmo_param_.Omega_DE); - const double f1 = the_cosmo_calc->CalcGrowthRate(astart); + const double f1 = the_cosmo_calc->get_f(astart); const double f_aniso = -4.0/3.0 * f1 * f1 / Omega_m_of_a; const std::array lss_aniso_alpha = { @@ -151,200 +171,300 @@ int Run( ConfigFile& the_config ) Grid_FFT A3x({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT A3y({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT A3z({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + //... array [.] access to components of A3: - std::array< Grid_FFT*,3 > A3({&A3x,&A3y,&A3z}); + std::array *, 3> A3({&A3x, &A3y, &A3z}); + + // white noise field + Grid_FFT wnoise({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + + //-------------------------------------------------------------------- + // Fill the grid with a Gaussian white noise field + //-------------------------------------------------------------------- + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "Generating white noise field...." << std::endl; + + the_random_number_generator->Fill_Grid(wnoise); + + wnoise.FourierTransformForward(); + + //-------------------------------------------------------------------- + // Use externally specified large scale modes from constraints in case + //-------------------------------------------------------------------- + if( bAddConstrainedModes ){ + Grid_FFT cwnoise({8,8,8}, {boxlen,boxlen,boxlen}); + cwnoise.Read_from_HDF5( the_config.get_value("setup", "ConstraintFieldFile"), + the_config.get_value("setup", "ConstraintFieldName") ); + cwnoise.FourierTransformForward(); + + size_t ngrid_c = cwnoise.size(0), ngrid_c_2 = ngrid_c/2; + + // TODO: copy over modes + double rs1{0.0},rs2{0.0},is1{0.0},is2{0.0}; + double nrs1{0.0},nrs2{0.0},nis1{0.0},nis2{0.0}; + size_t count{0}; + + #pragma omp parallel for reduction(+:rs1,rs2,is1,is2,nrs1,nrs2,nis1,nis2,count) + for( size_t i=0; ingrid_c_2 && i+ngrid-ngrid_c>ngrid/2) il = ngrid-ngrid_c+i; + if( il == size_t(-1) ) continue; + if( il=size_t(wnoise.local_1_start_+wnoise.local_1_size_)) continue; + il -= wnoise.local_1_start_; + for( size_t j=0; jngrid_c_2 && j+ngrid-ngrid_c>ngrid/2 ) jl = ngrid-ngrid_c+j; + if( jl == size_t(-1) ) continue; + for( size_t k=0; kngrid/2 ) continue; + size_t kl = k; + + ++count; + + nrs1 += std::real(cwnoise.kelem(i,j,k)); + nrs2 += std::real(cwnoise.kelem(i,j,k))*std::real(cwnoise.kelem(i,j,k)); + nis1 += std::imag(cwnoise.kelem(i,j,k)); + nis2 += std::imag(cwnoise.kelem(i,j,k))*std::imag(cwnoise.kelem(i,j,k)); + + rs1 += std::real(wnoise.kelem(il,jl,kl)); + rs2 += std::real(wnoise.kelem(il,jl,kl))*std::real(wnoise.kelem(il,jl,kl)); + is1 += std::imag(wnoise.kelem(il,jl,kl)); + is2 += std::imag(wnoise.kelem(il,jl,kl))*std::imag(wnoise.kelem(il,jl,kl)); + + #if defined(USE_MPI) + wnoise.kelem(il,jl,kl) = cwnoise.kelem(j,i,k); + #else + wnoise.kelem(il,jl,kl) = cwnoise.kelem(i,j,k); + #endif + } + } + } + + // music::ilog << " ... old field: re =" << rs1/count << " -^2=" << rs2/count-rs1*rs1/count/count << std::endl; + // music::ilog << " ... old field: im =" << is1/count << " -^2=" << is2/count-is1*is1/count/count << std::endl; + // music::ilog << " ... new field: re =" << nrs1/count << " -^2=" << nrs2/count-nrs1*nrs1/count/count << std::endl; + // music::ilog << " ... new field: im =" << nis1/count << " -^2=" << nis2/count-nis1*nis1/count/count << std::endl; + music::ilog << "White noise field large-scale modes overwritten with external field." << std::endl; + } + + //-------------------------------------------------------------------- + // Apply Normalisation factor and Angulo&Pontzen fixing or not + //-------------------------------------------------------------------- + + wnoise.apply_function_k( [&](auto wn){ + if (bDoFixing) + wn = (std::abs(wn) != 0.0) ? wn / std::abs(wn) : wn; + return wn / volfac; + }); + + + //-------------------------------------------------------------------- + // Compute the LPT terms.... + //-------------------------------------------------------------------- //-------------------------------------------------------------------- // Create convolution class instance for non-linear terms //-------------------------------------------------------------------- +#if defined(USE_CONVOLVER_ORSZAG) OrszagConvolver Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); - // NaiveConvolver Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); +#elif defined(USE_CONVOLVER_NAIVE) + NaiveConvolver Conv({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); +#endif //-------------------------------------------------------------------- - std::vector species_list; - species_list.push_back( cosmo_species::dm ); - if( bDoBaryons ) species_list.push_back( cosmo_species::baryon ); + //-------------------------------------------------------------------- + // Create PLT gradient operator + //-------------------------------------------------------------------- +#if defined(ENABLE_PLT) + particle::lattice_gradient lg( the_config ); +#else + op::fourier_gradient lg( the_config ); +#endif + + //-------------------------------------------------------------------- + std::vector species_list; + species_list.push_back(cosmo_species::dm); + if (bDoBaryons) + species_list.push_back(cosmo_species::baryon); + + //====================================================================== + //... compute 1LPT displacement potential .... + //====================================================================== + // phi = - delta / k^2 + + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "Generating white noise field...." << std::endl; + + double wtime = get_wtime(); + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush; + + phi.FourierTransformForward(false); + phi.assign_function_of_grids_kdep([&](auto k, auto wn) { + real_t kmod = k.norm(); + ccomplex_t delta = wn * the_cosmo_calc->get_amplitude(kmod, total); + return -delta / (kmod * kmod); + }, wnoise); + + phi.zero_DC_mode(); + + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + + //====================================================================== + //... compute 2LPT displacement potential .... + //====================================================================== + if (LPTorder > 1) + { + wtime = get_wtime(); + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush; + phi2.FourierTransformForward(false); + Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2)); + Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2)); + Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, op::subtract_from(phi2)); + Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, op::subtract_from(phi2)); + Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, op::subtract_from(phi2)); + + if (bAddExternalTides) + { + phi2.assign_function_of_grids_kdep([&](vec3_t kvec, ccomplex_t pphi, ccomplex_t pphi2) { + // sign in front of f_aniso is reversed since phi1 = -phi + return pphi2 + f_aniso * (kvec[0] * kvec[0] * lss_aniso_lambda[0] + kvec[1] * kvec[1] * lss_aniso_lambda[1] + kvec[2] * kvec[2] * lss_aniso_lambda[2]) * pphi; + }, + phi, phi2); + } + + phi2.apply_InverseLaplacian(); + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + + if (bAddExternalTides) + { + music::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl; + music::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl; + } + } + + //====================================================================== + //... compute 3LPT displacement potential + //====================================================================== + if (LPTorder > 2) + { + //... 3a term ... + wtime = get_wtime(); + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush; + phi3a.FourierTransformForward(false); + Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3a)); + Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::multiply_add_to(phi3a,2.0)); + Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3a)); + Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3a)); + Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3a)); + phi3a.apply_InverseLaplacian(); + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + + //... 3b term ... + wtime = get_wtime(); + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush; + phi3b.FourierTransformForward(false); + Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::assign_to(phi3b)); + Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::add_to(phi3b)); + Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::add_to(phi3b)); + Conv.convolve_Hessians(phi, {0, 1}, phi2, {0, 1}, op::multiply_add_to(phi3b,-2.0)); + Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::multiply_add_to(phi3b,-2.0)); + Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::multiply_add_to(phi3b,-2.0)); + phi3b.apply_InverseLaplacian(); + phi3b *= 0.5; // factor 1/2 from definition of phi(3b)! + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + + //... transversal term ... + wtime = get_wtime(); + music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush; + for (int idim = 0; idim < 3; ++idim) + { + // cyclic rotations of indices + int idimp = (idim + 1) % 3, idimpp = (idim + 2) % 3; + A3[idim]->FourierTransformForward(false); + Conv.convolve_Hessians(phi2, {idim, idimp}, phi, {idim, idimpp}, op::assign_to(*A3[idim])); + Conv.convolve_Hessians(phi2, {idim, idimpp}, phi, {idim, idimp}, op::subtract_from(*A3[idim])); + Conv.convolve_DifferenceOfHessians(phi, {idimp, idimpp}, phi2, {idimp, idimp}, {idimpp, idimpp}, op::add_to(*A3[idim])); + Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim])); + A3[idim]->apply_InverseLaplacian(); + } + music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl; + } + + // if( bSymplecticPT ){ + // //... transversal term ... + // wtime = get_wtime(); + // music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush; + // for( int idim=0; idim<3; ++idim ){ + // // cyclic rotations of indices + // A3[idim]->FourierTransformForward(false); + // Conv.convolve_Gradient_and_Hessian( phi, {0}, phi2, {idim,0}, assign_to(*A3[idim]) ); + // Conv.convolve_Gradient_and_Hessian( phi, {1}, phi2, {idim,1}, add_to(*A3[idim]) ); + // Conv.convolve_Gradient_and_Hessian( phi, {2}, phi2, {idim,2}, add_to(*A3[idim]) ); + // } + // music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; + + // } + + ///... scale all potentials with respective growth factors + phi *= g1; + phi2 *= g2; + phi3a *= g3a; + phi3b *= g3b; + (*A3[0]) *= g3c; + (*A3[1]) *= g3c; + (*A3[2]) *= g3c; + + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + + /////////////////////////////////////////////////////////////////////// + // we store the densities here if we compute them + //====================================================================== + + // Testing + const std::string testing = the_config.get_value_safe("testing", "test", "none"); + + if (testing != "none") + { + music::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl; + if (testing == "potentials_and_densities"){ + testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3); + } + else if (testing == "velocity_displacement_symmetries"){ + testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); + } + else if (testing == "convergence"){ + testing::output_convergence(the_config, the_cosmo_calc.get(), ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); + } + else{ + music::flog << "unknown test '" << testing << "'" << std::endl; + std::abort(); + } + } - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - for( auto& this_species : species_list ) { - csoca::ilog << std::endl + music::ilog << std::endl << ">>> Computing ICs for species \'" << cosmo_species_name[this_species] << "\' <<<\n" << std::endl; - //====================================================================== - //... compute 1LPT displacement potential .... - //====================================================================== - // phi = - delta / k^2 - double wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush; - - #if 1 // random ICs - //-------------------------------------------------------------------- - // Fill the grid with a Gaussian white noise field - //-------------------------------------------------------------------- - the_random_number_generator->Fill_Grid( phi ); - - phi.FourierTransformForward(); - - phi.apply_function_k_dep([&](auto x, auto k) -> ccomplex_t { - real_t kmod = k.norm(); - if( bDoFixing ) x = (std::abs(x)!=0.0)? x / std::abs(x) : x; - ccomplex_t delta = x * the_cosmo_calc->GetAmplitude(kmod, total); - return -delta / (kmod * kmod) / volfac; - }); - - phi.zero_DC_mode(); - #else // ICs with a given phi(1) potential function - constexpr real_t twopi{2.0*M_PI}; - constexpr real_t epsilon_q1d{0.25}; - - constexpr real_t epsy{0.25}; - constexpr real_t epsz{0.0};//epsz{0.25}; - - phi.FourierTransformBackward(false); - - phi.apply_function_r_dep([&](auto v, auto r) -> real_t { - real_t q1 = r[0]-0.5*boxlen;//r[0]/boxlen * twopi - M_PI; - real_t q2 = r[1]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI; - real_t q3 = r[2]-0.5*boxlen;//r[1]/boxlen * twopi - M_PI; - - // std::cerr << q1 << " " << q2 << std::endl; - - return -2.0*std::cos(q1+std::cos(q2)); - // return (-std::cos(q1) + epsilon_q1d * std::sin(q2)); - // return (-std::cos(q1) + epsy * std::sin(q2) + epsz * std::cos(q1) * std::sin(q3)); - }); - phi.FourierTransformForward(); - - - #endif - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; - - //====================================================================== - //... compute 2LPT displacement potential .... - //====================================================================== - if( LPTorder > 1 ){ - wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush; - phi2.FourierTransformForward(false); - Conv.convolve_SumOfHessians( phi, {0,0}, phi, {1,1}, {2,2}, op::assign_to( phi2 ) ); - Conv.convolve_Hessians( phi, {1,1}, phi, {2,2}, op::add_to(phi2) ); - Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, op::subtract_from(phi2) ); - Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, op::subtract_from(phi2) ); - Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, op::subtract_from(phi2) ); - - if( bAddExternalTides ){ - phi2.assign_function_of_grids_kdep([&]( vec3 kvec, ccomplex_t pphi, ccomplex_t pphi2 ){ - // sign in front of f_aniso is reversed since phi1 = -phi - return pphi2 + f_aniso * (kvec[0]*kvec[0]*lss_aniso_lambda[0]+kvec[1]*kvec[1]*lss_aniso_lambda[1]+kvec[2]*kvec[2]*lss_aniso_lambda[2])*pphi; - }, phi, phi2 ); - } - - phi2.apply_InverseLaplacian(); - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; - - if( bAddExternalTides ){ - csoca::wlog << "Added external tide contribution to phi(2)... Make sure your N-body code supports this!" << std::endl; - csoca::wlog << " lss_aniso = (" << lss_aniso_lambda[0] << ", " << lss_aniso_lambda[1] << ", " << lss_aniso_lambda[2] << ")" << std::endl; - } - } - - //====================================================================== - //... compute 3LPT displacement potential - //====================================================================== - if( LPTorder > 2 ){ - //... 3a term ... - wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush; - phi3a.FourierTransformForward(false); - Conv.convolve_Hessians( phi, {0,0}, phi, {1,1}, phi, {2,2}, op::assign_to(phi3a) ); - Conv.convolve_Hessians( phi, {0,1}, phi, {0,2}, phi, {1,2}, op::add_twice_to(phi3a) ); - Conv.convolve_Hessians( phi, {1,2}, phi, {1,2}, phi, {0,0}, op::subtract_from(phi3a) ); - Conv.convolve_Hessians( phi, {0,2}, phi, {0,2}, phi, {1,1}, op::subtract_from(phi3a) ); - Conv.convolve_Hessians( phi, {0,1}, phi, {0,1}, phi, {2,2}, op::subtract_from(phi3a) ); - phi3a.apply_InverseLaplacian(); - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; - - //... 3b term ... - wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush; - phi3b.FourierTransformForward(false); - Conv.convolve_SumOfHessians( phi, {0,0}, phi2, {1,1}, {2,2}, op::assign_to(phi3b) ); - Conv.convolve_SumOfHessians( phi, {1,1}, phi2, {2,2}, {0,0}, op::add_to(phi3b) ); - Conv.convolve_SumOfHessians( phi, {2,2}, phi2, {0,0}, {1,1}, op::add_to(phi3b) ); - Conv.convolve_Hessians( phi, {0,1}, phi2, {0,1}, op::subtract_twice_from(phi3b) ); - Conv.convolve_Hessians( phi, {0,2}, phi2, {0,2}, op::subtract_twice_from(phi3b) ); - Conv.convolve_Hessians( phi, {1,2}, phi2, {1,2}, op::subtract_twice_from(phi3b) ); - phi3b.apply_InverseLaplacian(); - phi3b *= 0.5; // factor 1/2 from definition of phi(3b)! - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; - - //... transversal term ... - wtime = get_wtime(); - csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush; - for( int idim=0; idim<3; ++idim ){ - // cyclic rotations of indices - int idimp = (idim+1)%3, idimpp = (idim+2)%3; - A3[idim]->FourierTransformForward(false); - Conv.convolve_Hessians( phi2, {idim,idimp}, phi, {idim,idimpp}, op::assign_to(*A3[idim]) ); - Conv.convolve_Hessians( phi2, {idim,idimpp}, phi, {idim,idimp}, op::subtract_from(*A3[idim]) ); - Conv.convolve_DifferenceOfHessians( phi, {idimp,idimpp}, phi2,{idimp,idimp}, {idimpp,idimpp}, op::add_to(*A3[idim]) ); - Conv.convolve_DifferenceOfHessians( phi2,{idimp,idimpp}, phi, {idimp,idimp}, {idimpp,idimpp}, op::subtract_from(*A3[idim]) ); - A3[idim]->apply_InverseLaplacian(); - } - csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; - } - - // if( bSymplecticPT ){ - // //... transversal term ... - // wtime = get_wtime(); - // csoca::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing vNLO(3) term" << std::flush; - // for( int idim=0; idim<3; ++idim ){ - // // cyclic rotations of indices - // A3[idim]->FourierTransformForward(false); - // Conv.convolve_Gradient_and_Hessian( phi, {0}, phi2, {idim,0}, assign_to(*A3[idim]) ); - // Conv.convolve_Gradient_and_Hessian( phi, {1}, phi2, {idim,1}, add_to(*A3[idim]) ); - // Conv.convolve_Gradient_and_Hessian( phi, {2}, phi2, {idim,2}, add_to(*A3[idim]) ); - // } - // csoca::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime()-wtime << "s" << std::endl; - - // } - - ///... scale all potentials with respective growth factors - phi *= g1; - phi2 *= g2; - phi3a *= g3a; - phi3b *= g3b; - (*A3[0]) *= g3c; - (*A3[1]) *= g3c; - (*A3[2]) *= g3c; - - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - - /////////////////////////////////////////////////////////////////////// - // we store the densities here if we compute them - //====================================================================== - - // Testing - const std::string testing = the_config.GetValueSafe("testing", "test", "none"); - - if(testing != "none") { - csoca::wlog << "you are running in testing mode. No ICs, only diagnostic output will be written out!" << std::endl; - if(testing == "potentials_and_densities") { - testing::output_potentials_and_densities(the_config, ngrid, boxlen, phi, phi2, phi3a, phi3b, A3); - } else if(testing == "velocity_displacement_symmetries") { - testing::output_velocity_displacement_symmetries(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); - } else if(testing == "convergence") { - testing::output_convergence(the_config, ngrid, boxlen, vfac, Dplus0, phi, phi2, phi3a, phi3b, A3); - } else { - csoca::flog << "unknown test '" << testing << "'" << std::endl; - std::abort(); - } - } else { + { // temporary storage of data Grid_FFT tmp({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + std::unique_ptr>> particle_lattice_generator_ptr; + + // if output plugin wants particles, then we need to store them, along with their IDs + if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) + { + // somewhat arbitrarily, start baryon particle IDs from 2**31 if we have 32bit and from 2**56 if we have 64 bits + size_t IDoffset = (this_species == cosmo_species::baryon)? ((the_output_plugin->has_64bit_ids())? 1ul<<56 : 1ul<<31): 0 ; + + // allocate particle structure and generate particle IDs + particle_lattice_generator_ptr = + std::make_unique>>( lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config ); + } + //if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_eulerian ){ if( the_output_plugin->write_species_as(this_species) == output_type::field_eulerian ) @@ -362,7 +482,7 @@ int Run( ConfigFile& the_config ) real_t std_phi1 = phi.std(); const real_t hbar = 2.0 * M_PI/ngrid * (2*std_phi1/Dplus0); //3sigma, but this might rather depend on gradients of phi... - csoca::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl; + music::ilog << "Semiclassical PT : hbar = " << hbar << " from sigma(phi1) = " << std_phi1 << std::endl; if( LPTorder == 1 ){ psi.assign_function_of_grids_r([hbar,Dplus0]( real_t pphi ){ @@ -435,14 +555,21 @@ int Run( ConfigFile& the_config ) //=================================================================================== // we store displacements and velocities here if we compute them //=================================================================================== - particle::container particles; + + + bool shifted_lattice = (this_species == cosmo_species::baryon && + the_output_plugin->write_species_as(this_species) == output_type::particles) ? true : false; + + + + grid_interpolate<1,Grid_FFT> interp( tmp ); // if output plugin wants particles, then we need to store them, along with their IDs - if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) - { - // allocate particle structure and generate particle IDs - particle::initialize_lattice( particles, lattice_type, tmp ); - } + // if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) + // { + // // allocate particle structure and generate particle IDs + // particle::initialize_lattice( particles, lattice_type, the_output_plugin->has_64bit_reals(), the_output_plugin->has_64bit_ids(), IDoffset, tmp, the_config ); + // } // write out positions for( int idim=0; idim<3; ++idim ){ @@ -459,17 +586,37 @@ int Run( ConfigFile& the_config ) size_t idx = phi.get_idx(i,j,k); auto phitot = phi.kelem(idx) + phi2.kelem(idx) + phi3a.kelem(idx) + phi3b.kelem(idx); // divide by Lbox, because displacement is in box units for output plugin - tmp.kelem(idx) = lunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot - + phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx) ); + tmp.kelem(idx) = lunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot + + lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx) ); + + if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){ + tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k(i,j,k) ); + } + + if( bDoBaryons ){ + vec3_t kvec = phi.get_k(i,j,k); + real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); + // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) : + // (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : + // // the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total); + // the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1); + + real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, cdm) + : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, baryon) : + the_cosmo_calc->get_amplitude(kmod, total)) - the_cosmo_calc->get_amplitude(kmod, total)) * (-g1); + + tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * lunit * ampldiff / k2 / boxlen; + } } } } + tmp.zero_DC_mode(); tmp.FourierTransformBackward(); // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_positions( particles, lattice_type, idim, lunit, tmp ); + particle_lattice_generator_ptr->set_positions( lattice_type, shifted_lattice, idim, lunit, the_output_plugin->has_64bit_reals(), tmp, the_config ); } // otherwise write out the grid data directly to the output plugin // else if( the_output_plugin->write_species_as( cosmo_species::dm ) == output_type::field_lagrangian ) @@ -496,8 +643,29 @@ int Run( ConfigFile& the_config ) // divide by Lbox, because displacement is in box units for output plugin auto phitot_v = vfac1 * phi.kelem(idx) + vfac2 * phi2.kelem(idx) + vfac3 * (phi3a.kelem(idx) + phi3b.kelem(idx)); - tmp.kelem(idx) = vunit / boxlen * ( phi.gradient(idim,{i,j,k}) * phitot_v - + vfac3 * (phi.gradient(idimp,{i,j,k}) * A3[idimpp]->kelem(idx) - phi.gradient(idimpp,{i,j,k}) * A3[idimp]->kelem(idx)) ); + tmp.kelem(idx) = vunit / boxlen * ( lg.gradient(idim,tmp.get_k3(i,j,k)) * phitot_v + + vfac3 * (lg.gradient(idimp,tmp.get_k3(i,j,k)) * A3[idimpp]->kelem(idx) - lg.gradient(idimpp,tmp.get_k3(i,j,k)) * A3[idimp]->kelem(idx)) ); + + if( the_output_plugin->write_species_as( this_species ) == output_type::particles && lattice_type == particle::lattice_glass){ + tmp.kelem(idx) *= interp.compensation_kernel( tmp.get_k(i,j,k) ); + } + + if( bDoBaryons ){ + vec3_t kvec = phi.get_k(i,j,k); + real_t k2 = kvec.norm_squared(), kmod = std::sqrt(k2); + // double ampldiff = ((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm0) : + // (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon0) : + // the_cosmo_calc->get_amplitude(kmod, vtotal0)) - the_cosmo_calc->get_amplitude(kmod, vtotal0); + // // the_cosmo_calc->get_amplitude(kmod, total)*(-g1)) - the_cosmo_calc->get_amplitude(kmod, total)*(-g1); + real_t ampldiff = (((this_species == cosmo_species::dm)? the_cosmo_calc->get_amplitude(kmod, vcdm) + : (this_species == cosmo_species::baryon)? the_cosmo_calc->get_amplitude(kmod, vbaryon) : + the_cosmo_calc->get_amplitude(kmod, vtotal)) - the_cosmo_calc->get_amplitude(kmod, vtotal)) * (-g1); + tmp.kelem(idx) += lg.gradient(idim, tmp.get_k3(i,j,k)) * wnoise.kelem(idx) * vfac1 * vunit / boxlen * ampldiff / k2 ; + } + + // correct velocity with PLT mode growth rate + tmp.kelem(idx) *= lg.vfac_corr(tmp.get_k3(i,j,k)); + if( bAddExternalTides ){ // modify velocities with anisotropic expansion factor**2 @@ -510,12 +678,13 @@ int Run( ConfigFile& the_config ) } } } + tmp.zero_DC_mode(); tmp.FourierTransformBackward(); // if we write particle data, store particle data in particle structure if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - particle::set_velocities( particles, lattice_type, idim, tmp ); + particle_lattice_generator_ptr->set_velocities( lattice_type, shifted_lattice, idim, the_output_plugin->has_64bit_reals(), tmp, the_config ); } // otherwise write out the grid data directly to the output plugin else if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian ) @@ -527,7 +696,7 @@ int Run( ConfigFile& the_config ) if( the_output_plugin->write_species_as( this_species ) == output_type::particles ) { - the_output_plugin->write_particle_data( particles, this_species ); + the_output_plugin->write_particle_data( particle_lattice_generator_ptr->get_particles(), this_species, Omega[this_species] ); } if( the_output_plugin->write_species_as( this_species ) == output_type::field_lagrangian ) diff --git a/src/logger.cc b/src/logger.cc index 2b93b89..26c34a5 100644 --- a/src/logger.cc +++ b/src/logger.cc @@ -1,19 +1,19 @@ #include -namespace csoca { +namespace music { -std::ofstream Logger::output_file_; -LogLevel Logger::log_level_ = LogLevel::Off; +std::ofstream logger::output_file_; +log_level logger::log_level_ = log_level::off; -void Logger::SetLevel(const LogLevel &level) { +void logger::set_level(const log_level &level) { log_level_ = level; } -LogLevel Logger::GetLevel() { +log_level logger::get_level() { return log_level_; } -void Logger::SetOutput(const std::string filename) { +void logger::set_output(const std::string filename) { if (output_file_.is_open()) { output_file_.close(); } @@ -21,22 +21,22 @@ void Logger::SetOutput(const std::string filename) { assert(output_file_.is_open()); } -void Logger::UnsetOutput() { +void logger::unset_output() { if (output_file_.is_open()) { output_file_.close(); } } -std::ofstream &Logger::GetOutput() { +std::ofstream &logger::get_output() { return output_file_; } // global instantiations for different levels -Logger glogger; -LogStream flog(glogger, LogLevel::Fatal); -LogStream elog(glogger, LogLevel::Error); -LogStream wlog(glogger, LogLevel::Warning); -LogStream ilog(glogger, LogLevel::Info); -LogStream dlog(glogger, LogLevel::Debug); +logger the_logger; +log_stream flog(the_logger, log_level::fatal); +log_stream elog(the_logger, log_level::error); +log_stream wlog(the_logger, log_level::warning); +log_stream ilog(the_logger, log_level::info); +log_stream dlog(the_logger, log_level::debug); -} // namespace csoca +} // namespace music diff --git a/src/main.cc b/src/main.cc index 72e9a38..c609a4a 100644 --- a/src/main.cc +++ b/src/main.cc @@ -3,6 +3,7 @@ #include #include #include +#include #if defined(_OPENMP) #include @@ -10,6 +11,7 @@ #include #include +#include // initialise with "default" values @@ -26,10 +28,28 @@ int num_threads = 1; #include "system_stat.hh" +#include +#include + +void handle_eptr(std::exception_ptr eptr) // passing by value is ok +{ + try { + if (eptr) { + std::rethrow_exception(eptr); + } + } catch(const std::exception& e) { + music::elog << "This happened: \"" << e.what() << "\"" << std::endl; + } +} + int main( int argc, char** argv ) { - csoca::Logger::SetLevel(csoca::LogLevel::Info); - // csoca::Logger::SetLevel(csoca::LogLevel::Debug); + +#if defined(NDEBUG) + music::logger::set_level(music::log_level::info); +#else + music::logger::set_level(music::log_level::debug); +#endif //------------------------------------------------------------------------------ // initialise MPI @@ -45,20 +65,39 @@ int main( int argc, char** argv ) // set up lower logging levels for other tasks if( CONFIG::MPI_task_rank!=0 ) { - csoca::Logger::SetLevel(csoca::LogLevel::Error); + music::logger::set_level(music::log_level::error); } #endif - csoca::ilog << "\n" - << " unigrid MUSIC .8888b dP a88888b. \n" + // Ascii ART logo. generated via http://patorjk.com/software/taag/#p=display&f=Nancyj&t=monofonIC + music::ilog << "\n" + << " The unigrid version of MUSIC-2 .8888b dP a88888b. \n" << " 88 \" 88 d8\' `88 \n" << " 88d8b.d8b. .d8888b. 88d888b. .d8888b. 88aaa .d8888b. 88d888b. 88 88 \n" << " 88\'`88\'`88 88\' `88 88\' `88 88\' `88 88 88\' `88 88\' `88 88 88 \n" << " 88 88 88 88. .88 88 88 88. .88 88 88. .88 88 88 88 Y8. .88 \n" - << " dP dP dP `88888P\' dP dP `88888P\' dP `88888P\' dP dP dP Y88888P\' \n" << std::endl - << "version : v0.1a, git rev. : " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl - << "-------------------------------------------------------------------------------" << std::endl; + << " dP dP dP `88888P\' dP dP `88888P\' dP `88888P\' dP dP dP Y88888P\' \n" << std::endl; + + // git and versioning info: + music::ilog << "Version: v0.1a, git rev.: " << GIT_REV << ", tag: " << GIT_TAG << ", branch: " << GIT_BRANCH << std::endl; + // Compilation CMake configuration, time etc info: + music::ilog << "This " << CMAKE_BUILDTYPE_STR << " build was compiled at " << __TIME__ << " on " << __DATE__ << std::endl; + +#ifdef __GNUC__ + music::ilog << "Compiled with GNU C++ version " << __VERSION__ <("execution", "NumThreads",std::thread::hardware_concurrency()); + CONFIG::num_threads = the_config.get_value_safe("execution", "NumThreads",std::thread::hardware_concurrency()); #if defined(USE_FFTW_THREADS) if (CONFIG::FFTW_threads_ok) @@ -110,14 +149,16 @@ int main( int argc, char** argv ) omp_set_num_threads(CONFIG::num_threads); #endif + // std::feclearexcept(FE_ALL_EXCEPT); + //------------------------------------------------------------------------------ // Write code configuration to screen //------------------------------------------------------------------------------ // hardware related infos - csoca::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl; + music::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl; // multi-threading related infos - csoca::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl; + music::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl; // memory related infos SystemStat::Memory mem; @@ -134,34 +175,34 @@ int main( int argc, char** argv ) MPI_Allreduce(&minupmem,&temp,1,MPI_UNSIGNED,MPI_MIN,MPI_COMM_WORLD); minupmem = temp; MPI_Allreduce(&maxupmem,&temp,1,MPI_UNSIGNED,MPI_MAX,MPI_COMM_WORLD); maxupmem = temp; #endif - csoca::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " << "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl; + music::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl; + music::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl; + music::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " << "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl; // MPI related infos #if defined(USE_MPI) - csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl; - csoca::dlog << std::setw(32) << std::left << "MPI version" << " : " << GetMPIversion() << std::endl; + music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "yes (" << CONFIG::MPI_task_size << " tasks)" << std::endl; + music::dlog << std::setw(32) << std::left << "MPI version" << " : " << MPI::get_version() << std::endl; #else - csoca::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl; + music::ilog << std::setw(32) << std::left << "MPI is enabled" << " : " << "no" << std::endl; #endif - csoca::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl; + music::ilog << std::setw(32) << std::left << "MPI supports multi-threading" << " : " << (CONFIG::MPI_threads_ok? "yes" : "no") << std::endl; // Kernel related infos SystemStat::Kernel kern; auto kinfo = kern.get_kernel_info(); - csoca::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl; + music::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl; // FFTW related infos - csoca::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl; - csoca::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl; - csoca::ilog << std::setw(32) << std::left << "FFTW mode" << " : "; + music::ilog << std::setw(32) << std::left << "FFTW version" << " : " << fftw_version << std::endl; + music::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl; + music::ilog << std::setw(32) << std::left << "FFTW mode" << " : "; #if defined(FFTW_MODE_PATIENT) - csoca::ilog << "FFTW_PATIENT" << std::endl; + music::ilog << "FFTW_PATIENT" << std::endl; #elif defined(FFTW_MODE_MEASURE) - csoca::ilog << "FFTW_MEASURE" << std::endl; + music::ilog << "FFTW_MEASURE" << std::endl; #else - csoca::ilog << "FFTW_ESTIMATE" << std::endl; + music::ilog << "FFTW_ESTIMATE" << std::endl; #endif //-------------------------------------------------------------------- // Initialise plug-ins @@ -170,7 +211,8 @@ int main( int argc, char** argv ) { ic_generator::Initialise( the_config ); }catch(...){ - csoca::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl; + handle_eptr( std::current_exception() ); + music::elog << "Problem during initialisation. See error(s) above. Exiting..." << std::endl; #if defined(USE_MPI) MPI_Finalize(); #endif @@ -181,6 +223,8 @@ int main( int argc, char** argv ) // do the job... /////////////////////////////////////////////////////////////////////// ic_generator::Run( the_config ); + + // particle::test_plt(); /////////////////////////////////////////////////////////////////////// #if defined(USE_MPI) @@ -188,8 +232,8 @@ int main( int argc, char** argv ) MPI_Finalize(); #endif - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << "Done." << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << "Done. Have a nice day!\n" << std::endl; return 0; } diff --git a/src/output_plugin.cc b/src/output_plugin.cc index 763336e..d0a7c5d 100644 --- a/src/output_plugin.cc +++ b/src/output_plugin.cc @@ -23,31 +23,32 @@ void print_output_plugins() std::map< std::string, output_plugin_creator *>::iterator it; it = m.begin(); - csoca::ilog << "Available output plug-ins:\n"; + music::ilog << "Available output plug-ins:\n"; while( it!=m.end() ) { if( it->second ) - csoca::ilog << "\t\'" << it->first << "\'\n"; + music::ilog << "\t\'" << it->first << "\'\n"; ++it; } + music::ilog << std::endl; } -std::unique_ptr select_output_plugin( ConfigFile& cf ) +std::unique_ptr select_output_plugin( config_file& cf ) { - std::string formatname = cf.GetValue( "output", "format" ); + std::string formatname = cf.get_value( "output", "format" ); output_plugin_creator *the_output_plugin_creator = get_output_plugin_map()[ formatname ]; if( !the_output_plugin_creator ) { - csoca::elog << "Error: output plug-in \'" << formatname << "\' not found." << std::endl; + music::elog << "Output plug-in \'" << formatname << "\' not found." << std::endl; print_output_plugins(); throw std::runtime_error("Unknown output plug-in"); }else{ - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << std::setw(32) << std::left << "Output plugin" << " : " << formatname << std::endl; } return std::move(the_output_plugin_creator->create( cf )); diff --git a/src/plugins/output_arepo.cc b/src/plugins/output_arepo.cc new file mode 100644 index 0000000..1674604 --- /dev/null +++ b/src/plugins/output_arepo.cc @@ -0,0 +1,241 @@ + +#ifdef USE_HDF5 +#include // for unlink +#include +#include "HDF_IO.hh" + +template +std::vector from_6array(const T *a) +{ + return std::vector{{a[0], a[1], a[2], a[3], a[4], a[5]}}; +} + +template +std::vector from_value(const T a) +{ + return std::vector{{a}}; +} + +template +class gadget_hdf5_output_plugin : public output_plugin +{ + struct header_t + { + unsigned npart[6]; + double mass[6]; + double time; + double redshift; + int flag_sfr; + int flag_feedback; + unsigned int npartTotal[6]; + int flag_cooling; + int num_files; + double BoxSize; + double Omega0; + double OmegaLambda; + double HubbleParam; + int flag_stellarage; + int flag_metals; + unsigned int npartTotalHighWord[6]; + int flag_entropy_instead_u; + int flag_doubleprecision; + }; + +protected: + int num_files_, num_simultaneous_writers_; + header_t header_; + real_t lunit_, vunit_; + bool blongids_; + std::string this_fname_; + double Tini_; + unsigned pmgrid_; + unsigned gridboost_; + int doublePrec_; + int doBaryons_; + double softening_; + +public: + //! constructor + explicit gadget_hdf5_output_plugin(config_file &cf) + : output_plugin(cf, "GADGET-HDF5") + { + num_files_ = 1; +#ifdef USE_MPI + // use as many output files as we have MPI tasks + MPI_Comm_size(MPI_COMM_WORLD, &num_files_); +#endif + real_t astart = 1.0 / (1.0 + cf_.get_value("setup", "zstart")); + lunit_ = cf_.get_value("setup", "BoxLength"); + vunit_ = lunit_ / std::sqrt(astart); + blongids_ = cf_.get_value_safe("output", "UseLongids", false); + num_simultaneous_writers_ = cf_.get_value_safe("output", "NumSimWriters", num_files_); + + for (int i = 0; i < 6; ++i) + { + header_.npart[i] = 0; + header_.npartTotal[i] = 0; + header_.npartTotalHighWord[i] = 0; + header_.mass[i] = 0.0; + } + + header_.time = astart; + header_.redshift = 1.0 / astart - 1.0; + header_.flag_sfr = 0; + header_.flag_feedback = 0; + header_.flag_cooling = 0; + header_.num_files = num_files_; + header_.BoxSize = lunit_; + header_.Omega0 = cf_.get_value("cosmology", "Omega_m"); + header_.OmegaLambda = cf_.get_value("cosmology", "Omega_L"); + header_.HubbleParam = cf_.get_value("cosmology", "H0") / 100.0; + header_.flag_stellarage = 0; + header_.flag_metals = 0; + header_.flag_entropy_instead_u = 0; + header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false; + + // initial gas temperature + double Tcmb0 = 2.726; + double Omegab = cf_.get_value("cosmology", "Omega_b"); + double h = cf_.get_value("cosmology", "H0") / 100.0, h2 = h*h; + double adec = 1.0 / (160.0 * pow(Omegab * h2 / 0.022, 2.0 / 5.0)); + Tini_ = astart < adec ? Tcmb0 / astart : Tcmb0 / astart / astart * adec; + + // suggested PM res + pmgrid_ = 2*cf_.get_value("setup", "GridRes"); + gridboost_ = 1; + softening_ = cf_.get_value("setup", "BoxLength")/pmgrid_/20; + doBaryons_ = cf_.get_value("setup", "DoBaryons"); +#if !defined(USE_SINGLEPRECISION) + doublePrec_ = 1; +#else + doublePrec_ = 0; +#endif + + this_fname_ = fname_; +#ifdef USE_MPI + int thisrank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &thisrank); + if (num_files_ > 1) + this_fname_ += "." + std::to_string(thisrank); +#endif + + unlink(this_fname_.c_str()); + HDFCreateFile(this_fname_); + } + + // use destructor to write header post factum + ~gadget_hdf5_output_plugin() + { + HDFCreateGroup(this_fname_, "Header"); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array(header_.npart)); + HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array(header_.mass)); + HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value(header_.time)); + HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value(header_.redshift)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array(header_.npartTotal)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array(header_.npartTotalHighWord)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value(header_.num_files)); + HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value(header_.BoxSize)); + HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value(header_.Omega0)); + HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value(header_.OmegaLambda)); + HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value(header_.HubbleParam)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value(0)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_DoublePrecision", (int)doublePrec_); + // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmin", levelmin_); + // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelmax", levelmax_); + // HDFWriteGroupAttribute(this_fname_, "Header", "Music_levelcounts", levelcounts); + HDFWriteGroupAttribute(this_fname_, "Header", "haveBaryons", from_value((int)doBaryons_)); + HDFWriteGroupAttribute(this_fname_, "Header", "longIDs", from_value((int)blongids_)); + HDFWriteGroupAttribute(this_fname_, "Header", "suggested_pmgrid", from_value(pmgrid_)); + HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gridboost", from_value(gridboost_)); + HDFWriteGroupAttribute(this_fname_, "Header", "suggested_highressoft", from_value(softening_)); + HDFWriteGroupAttribute(this_fname_, "Header", "suggested_gas_Tinit", from_value(Tini_)); + + music::ilog << "Wrote" << std::endl; + } + + output_type write_species_as(const cosmo_species &) const { return output_type::particles; } + + real_t position_unit() const { return lunit_; } + + real_t velocity_unit() const { return vunit_; } + + bool has_64bit_reals() const + { + if (typeid(write_real_t) == typeid(double)) + return true; + return false; + } + + bool has_64bit_ids() const + { + if (blongids_) + return true; + return false; + } + + int get_species_idx(const cosmo_species &s) const + { + switch (s) + { + case cosmo_species::dm: + return 1; + case cosmo_species::baryon: + return 0; + case cosmo_species::neutrino: + return 3; + } + return -1; + } + + void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species) + { + int sid = get_species_idx(s); + + assert(sid != -1); + + header_.npart[sid] = (pc.get_local_num_particles()); + header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles()); + header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32); + + double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 + double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3); + header_.mass[sid] = boxmass / pc.get_global_num_particles(); + + HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid)); + + //... write positions and velocities..... + if (this->has_64bit_reals()) + { + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_); + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_); + } + else + { + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_); + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_); + } + + //... write ids..... + if (this->has_64bit_ids()) + HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_); + else + HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_); + + // std::cout << ">>>A> " << header_.npart[sid] << std::endl; + } +}; + +namespace +{ +#if !defined(USE_SINGLEPRECISION) +output_plugin_creator_concrete> creator1("AREPO"); +#else +output_plugin_creator_concrete> creator1("AREPO"); +#endif +} // namespace + +#endif \ No newline at end of file diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc index fddb734..0a3afbb 100644 --- a/src/plugins/output_gadget2.cc +++ b/src/plugins/output_gadget2.cc @@ -3,6 +3,7 @@ constexpr int empty_fill_bytes{56}; +template class gadget2_output_plugin : public output_plugin { public: @@ -33,32 +34,48 @@ protected: int num_files_; header this_header_; real_t lunit_, vunit_; + bool blongids_; public: //! constructor - explicit gadget2_output_plugin(ConfigFile &cf ) - : output_plugin(cf, "GADGET-2") + explicit gadget2_output_plugin(config_file &cf) + : output_plugin(cf, "GADGET-2") { num_files_ = 1; #ifdef USE_MPI // use as many output files as we have MPI tasks MPI_Comm_size(MPI_COMM_WORLD, &num_files_); #endif - real_t astart = 1.0/(1.0+cf_.GetValue("setup", "zstart")); - lunit_ = cf_.GetValue("setup", "BoxLength"); + real_t astart = 1.0 / (1.0 + cf_.get_value("setup", "zstart")); + lunit_ = cf_.get_value("setup", "BoxLength"); vunit_ = lunit_ / std::sqrt(astart); + blongids_ = cf_.get_value_safe("output", "UseLongids", false); } - output_type write_species_as( const cosmo_species & ) const { return output_type::particles; } + output_type write_species_as(const cosmo_species &) const { return output_type::particles; } real_t position_unit() const { return lunit_; } real_t velocity_unit() const { return vunit_; } - void write_particle_data(const particle::container &pc, const cosmo_species &s ) + bool has_64bit_reals() const { - // fill the Gadget-2 header - memset(reinterpret_cast(&this_header_),0,sizeof(header)); + if (typeid(write_real_t) == typeid(double)) + return true; + return false; + } + + bool has_64bit_ids() const + { + if (blongids_) + return true; + return false; + } + + void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species) + { + // fill the Gadget-2 header + memset(reinterpret_cast(&this_header_), 0, sizeof(header)); for (int i = 0; i < 6; ++i) { @@ -73,7 +90,7 @@ public: ///// //... set time ...................................................... - this_header_.redshift = cf_.GetValue("setup", "zstart"); + this_header_.redshift = cf_.get_value("setup", "zstart"); this_header_.time = 1.0 / (1.0 + this_header_.redshift); //... SF flags @@ -83,10 +100,10 @@ public: //... this_header_.num_files = num_files_; //1; - this_header_.BoxSize = cf_.GetValue("setup", "BoxLength"); - this_header_.Omega0 = cf_.GetValue("cosmology", "Omega_m"); - this_header_.OmegaLambda = cf_.GetValue("cosmology", "Omega_L"); - this_header_.HubbleParam = cf_.GetValue("cosmology", "H0") / 100.0; + this_header_.BoxSize = cf_.get_value("setup", "BoxLength"); + this_header_.Omega0 = cf_.get_value("cosmology", "Omega_m"); + this_header_.OmegaLambda = cf_.get_value("cosmology", "Omega_L"); + this_header_.HubbleParam = cf_.get_value("cosmology", "H0") / 100.0; this_header_.flag_stellarage = 0; this_header_.flag_metals = 0; @@ -100,50 +117,73 @@ public: //... set masses double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 - double boxmass = this_header_.Omega0 * rhoc * std::pow(this_header_.BoxSize,3); + double boxmass = Omega_species * rhoc * std::pow(this_header_.BoxSize, 3); this_header_.mass[1] = boxmass / pc.get_global_num_particles(); - + std::string fname = fname_; int thisrank = 0; - + #ifdef USE_MPI - MPI_Comm_rank(MPI_COMM_WORLD,&thisrank); - if( num_files_ > 1 ) + MPI_Comm_rank(MPI_COMM_WORLD, &thisrank); + if (num_files_ > 1) fname += "." + std::to_string(thisrank); #endif uint32_t blocksz; std::ofstream ofs(fname.c_str(), std::ios::binary); - csoca::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl; + music::ilog << "Writer \'" << this->interface_name_ << "\' : Writing data for " << pc.get_global_num_particles() << " particles." << std::endl; blocksz = sizeof(header); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(&this_header_), sizeof(header) ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - - blocksz = 3 * sizeof(float) * pc.get_local_num_particles(); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_pos_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_vel_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - - blocksz = sizeof(float) * pc.get_local_num_particles(); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - ofs.write( reinterpret_cast(pc.get_ids_ptr()), blocksz ); - ofs.write( reinterpret_cast(&blocksz), sizeof(uint32_t) ); - + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(&this_header_), sizeof(header)); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + + // we write double precision + if (this->has_64bit_reals()) + { + blocksz = 3 * sizeof(double) * pc.get_local_num_particles(); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_pos64_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_vel64_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + } + else + { + blocksz = 3 * sizeof(float) * pc.get_local_num_particles(); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_pos32_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_vel32_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + } + + // we write long IDs + if (this->has_64bit_ids()) + { + blocksz = sizeof(uint64_t) * pc.get_local_num_particles(); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_ids64_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + } + else + { + blocksz = sizeof(uint32_t) * pc.get_local_num_particles(); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + ofs.write(reinterpret_cast(pc.get_ids32_ptr()), blocksz); + ofs.write(reinterpret_cast(&blocksz), sizeof(uint32_t)); + } } }; - namespace { - output_plugin_creator_concrete creator1("gadget2"); -// output_plugin_creator_concrete> creator1("gadget2"); -// #ifndef SINGLE_PRECISION -// output_plugin_creator_concrete> creator2("gadget2_double"); -// #endif +output_plugin_creator_concrete> creator1("gadget2"); +#if !defined(USE_SINGLEPRECISION) +output_plugin_creator_concrete> creator3("gadget2_double"); +#endif } // namespace diff --git a/src/plugins/output_gadget_hdf5.cc b/src/plugins/output_gadget_hdf5.cc new file mode 100644 index 0000000..3908e64 --- /dev/null +++ b/src/plugins/output_gadget_hdf5.cc @@ -0,0 +1,210 @@ + +#ifdef USE_HDF5 +#include // for unlink +#include +#include "HDF_IO.hh" + +template +std::vector from_6array(const T *a) +{ + return std::vector{{a[0], a[1], a[2], a[3], a[4], a[5]}}; +} + +template +std::vector from_value(const T a) +{ + return std::vector{{a}}; +} + +template +class gadget_hdf5_output_plugin : public output_plugin +{ + struct header_t + { + unsigned npart[6]; + double mass[6]; + double time; + double redshift; + int flag_sfr; + int flag_feedback; + unsigned int npartTotal[6]; + int flag_cooling; + int num_files; + double BoxSize; + double Omega0; + double OmegaLambda; + double HubbleParam; + int flag_stellarage; + int flag_metals; + unsigned int npartTotalHighWord[6]; + int flag_entropy_instead_u; + int flag_doubleprecision; + }; + +protected: + int num_files_, num_simultaneous_writers_; + header_t header_; + real_t lunit_, vunit_; + bool blongids_; + std::string this_fname_; + +public: + //! constructor + explicit gadget_hdf5_output_plugin(config_file &cf) + : output_plugin(cf, "GADGET-HDF5") + { + num_files_ = 1; +#ifdef USE_MPI + // use as many output files as we have MPI tasks + MPI_Comm_size(MPI_COMM_WORLD, &num_files_); +#endif + real_t astart = 1.0 / (1.0 + cf_.get_value("setup", "zstart")); + lunit_ = cf_.get_value("setup", "BoxLength"); + vunit_ = lunit_ / std::sqrt(astart); + blongids_ = cf_.get_value_safe("output", "UseLongids", false); + num_simultaneous_writers_ = cf_.get_value_safe("output", "NumSimWriters", num_files_); + + for (int i = 0; i < 6; ++i) + { + header_.npart[i] = 0; + header_.npartTotal[i] = 0; + header_.npartTotalHighWord[i] = 0; + header_.mass[i] = 0.0; + } + + header_.time = astart; + header_.redshift = 1.0 / astart - 1.0; + header_.flag_sfr = 0; + header_.flag_feedback = 0; + header_.flag_cooling = 0; + header_.num_files = num_files_; + header_.BoxSize = lunit_; + header_.Omega0 = cf_.get_value("cosmology", "Omega_m"); + header_.OmegaLambda = cf_.get_value("cosmology", "Omega_L"); + header_.HubbleParam = cf_.get_value("cosmology", "H0") / 100.0; + header_.flag_stellarage = 0; + header_.flag_metals = 0; + header_.flag_entropy_instead_u = 0; + header_.flag_doubleprecision = (typeid(write_real_t) == typeid(double)) ? true : false; + + this_fname_ = fname_; +#ifdef USE_MPI + int thisrank = 0; + MPI_Comm_rank(MPI_COMM_WORLD, &thisrank); + if (num_files_ > 1) + this_fname_ += "." + std::to_string(thisrank); +#endif + + unlink(this_fname_.c_str()); + HDFCreateFile(this_fname_); + } + + // use destructor to write header post factum + ~gadget_hdf5_output_plugin() + { + if (!std::uncaught_exception()) + { + HDFCreateGroup(this_fname_, "Header"); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_ThisFile", from_6array(header_.npart)); + HDFWriteGroupAttribute(this_fname_, "Header", "MassTable", from_6array(header_.mass)); + HDFWriteGroupAttribute(this_fname_, "Header", "Time", from_value(header_.time)); + HDFWriteGroupAttribute(this_fname_, "Header", "Redshift", from_value(header_.redshift)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Sfr", from_value(header_.flag_sfr)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Feedback", from_value(header_.flag_feedback)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total", from_6array(header_.npartTotal)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Cooling", from_value(header_.flag_cooling)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumFilesPerSnapshot", from_value(header_.num_files)); + HDFWriteGroupAttribute(this_fname_, "Header", "BoxSize", from_value(header_.BoxSize)); + HDFWriteGroupAttribute(this_fname_, "Header", "Omega0", from_value(header_.Omega0)); + HDFWriteGroupAttribute(this_fname_, "Header", "OmegaLambda", from_value(header_.OmegaLambda)); + HDFWriteGroupAttribute(this_fname_, "Header", "HubbleParam", from_value(header_.HubbleParam)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_StellarAge", from_value(header_.flag_stellarage)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Metals", from_value(header_.flag_metals)); + HDFWriteGroupAttribute(this_fname_, "Header", "NumPart_Total_HighWord", from_6array(header_.npartTotalHighWord)); + HDFWriteGroupAttribute(this_fname_, "Header", "Flag_Entropy_ICs", from_value(header_.flag_entropy_instead_u)); + + music::ilog << "Wrote Gadget-HDF5 file(s) to " << this_fname_ << std::endl; + } + } + + output_type write_species_as(const cosmo_species &) const { return output_type::particles; } + + real_t position_unit() const { return lunit_; } + + real_t velocity_unit() const { return vunit_; } + + bool has_64bit_reals() const + { + if (typeid(write_real_t) == typeid(double)) + return true; + return false; + } + + bool has_64bit_ids() const + { + if (blongids_) + return true; + return false; + } + + int get_species_idx(const cosmo_species &s) const + { + switch (s) + { + case cosmo_species::dm: + return 1; + case cosmo_species::baryon: + return 0; + case cosmo_species::neutrino: + return 3; + } + return -1; + } + + void write_particle_data(const particle::container &pc, const cosmo_species &s, double Omega_species) + { + int sid = get_species_idx(s); + + assert(sid != -1); + + header_.npart[sid] = (pc.get_local_num_particles()); + header_.npartTotal[sid] = (uint32_t)(pc.get_global_num_particles()); + header_.npartTotalHighWord[sid] = (uint32_t)((pc.get_global_num_particles()) >> 32); + + double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 + double boxmass = Omega_species * rhoc * std::pow(header_.BoxSize, 3); + header_.mass[sid] = boxmass / pc.get_global_num_particles(); + + HDFCreateGroup(this_fname_, std::string("PartType") + std::to_string(sid)); + + //... write positions and velocities..... + if (this->has_64bit_reals()) + { + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions64_); + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities64_); + } + else + { + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Coordinates"), pc.positions32_); + HDFWriteDatasetVector(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/Velocities"), pc.velocities32_); + } + + //... write ids..... + if (this->has_64bit_ids()) + HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids64_); + else + HDFWriteDataset(this_fname_, std::string("PartType") + std::to_string(sid) + std::string("/ParticleIDs"), pc.ids32_); + + // std::cout << ">>>A> " << header_.npart[sid] << std::endl; + } +}; + +namespace +{ +output_plugin_creator_concrete> creator1("gadget_hdf5"); +#if !defined(USE_SINGLEPRECISION) +output_plugin_creator_concrete> creator3("gadget_hdf5_double"); +#endif +} // namespace + +#endif \ No newline at end of file diff --git a/src/plugins/output_generic.cc b/src/plugins/output_generic.cc index 10eacfc..79c2139 100644 --- a/src/plugins/output_generic.cc +++ b/src/plugins/output_generic.cc @@ -21,13 +21,13 @@ protected: bool out_eulerian_; public: //! constructor - explicit generic_output_plugin(ConfigFile &cf ) + explicit generic_output_plugin(config_file &cf ) : output_plugin(cf, "Generic HDF5") { - real_t astart = 1.0/(1.0+cf_.GetValue("setup", "zstart")); - real_t boxsize = cf_.GetValue("setup", "BoxLength"); + real_t astart = 1.0/(1.0+cf_.get_value("setup", "zstart")); + real_t boxsize = cf_.get_value("setup", "BoxLength"); - out_eulerian_ = cf_.GetValueSafe("output", "generic_out_eulerian",false); + out_eulerian_ = cf_.get_value_safe("output", "generic_out_eulerian",false); if( CONFIG::MPI_task_rank == 0 ) { @@ -50,6 +50,10 @@ public: return output_type::field_lagrangian; } + bool has_64bit_reals() const{ return true; } + + bool has_64bit_ids() const{ return true; } + real_t position_unit() const { return 1.0; } real_t velocity_unit() const { return 1.0; } @@ -95,7 +99,7 @@ void generic_output_plugin::write_grid_data(const Grid_FFT &g, const cos { std::string field_name = this->get_field_name( s, c ); g.Write_to_HDF5(fname_, field_name); - csoca::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl; + music::ilog << interface_name_ << " : Wrote field \'" << field_name << "\' to file \'" << fname_ << "\'" << std::endl; } namespace diff --git a/src/plugins/output_grafic2.cc b/src/plugins/output_grafic2.cc index b511dd9..b3f3f04 100644 --- a/src/plugins/output_grafic2.cc +++ b/src/plugins/output_grafic2.cc @@ -40,31 +40,31 @@ protected: public: //! constructor - explicit grafic2_output_plugin(ConfigFile &cf) + explicit grafic2_output_plugin(config_file &cf) : output_plugin(cf, "GRAFIC2/RAMSES") { lunit_ = 1.0; vunit_ = 1.0; double - boxlength = cf_.GetValue("setup", "BoxLength"), - H0 = cf_.GetValue("cosmology", "H0"), - zstart = cf_.GetValue("setup", "zstart"), + boxlength = cf_.get_value("setup", "BoxLength"), + H0 = cf_.get_value("cosmology", "H0"), + zstart = cf_.get_value("setup", "zstart"), astart = 1.0 / (1.0 + zstart), - omegam = cf_.GetValue("cosmology", "Omega_m"), - omegaL = cf_.GetValue("cosmology", "Omega_L"); - uint32_t ngrid = cf_.GetValue("setup", "GridRes"); + omegam = cf_.get_value("cosmology", "Omega_m"), + omegaL = cf_.get_value("cosmology", "Omega_L"); + uint32_t ngrid = cf_.get_value("setup", "GridRes"); - bUseSPT_ = cf_.GetValueSafe("output", "grafic_use_SPT", false); + bUseSPT_ = cf_.get_value_safe("output", "grafic_use_SPT", false); levelmin_ = uint32_t(std::log2(double(ngrid)) + 1e-6); if (std::abs(std::pow(2.0, levelmin_) - double(ngrid)) > 1e-4) { - csoca::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl; + music::elog << interface_name_ << " plugin requires setup/GridRes to be power of 2!" << std::endl; abort(); } - bhavebaryons_ = cf_.GetValueSafe("setup", "baryons", false); + bhavebaryons_ = cf_.get_value_safe("setup", "baryons", false); header_.n1 = ngrid; header_.n2 = ngrid; @@ -89,7 +89,7 @@ public: mkdir(dirname_.c_str(), 0777); // write RAMSES namelist file? if so only with one task - if (cf_.GetValueSafe("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 ) + if (cf_.get_value_safe("output", "ramses_nml", true) && CONFIG::MPI_task_rank==0 ) { write_ramses_namelist(); } @@ -102,6 +102,10 @@ public: return output_type::field_lagrangian; } + bool has_64bit_reals() const{ return false; } + + bool has_64bit_ids() const{ return false; } + real_t position_unit() const { return lunit_; } real_t velocity_unit() const { return vunit_; } @@ -192,7 +196,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT &g, const cos } // check field size against buffer size... - uint32_t ngrid = cf_.GetValue("setup", "GridRes"); + uint32_t ngrid = cf_.get_value("setup", "GridRes"); assert( g.global_size(0) == ngrid && g.global_size(1) == ngrid && g.global_size(2) == ngrid); assert( g.size(1) == ngrid && g.size(2) == ngrid); // write actual field slice by slice @@ -219,7 +223,7 @@ void grafic2_output_plugin::write_grid_data(const Grid_FFT &g, const cos } // end loop over write_rank - csoca::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl; + music::ilog << interface_name_ << " : Wrote field to file \'" << file_name << "\'" << std::endl; } void grafic2_output_plugin::write_ramses_namelist(void) const @@ -275,7 +279,7 @@ void grafic2_output_plugin::write_ramses_namelist(void) const << "m_refine=" << 1 + naddref << "*8.,\n" << "/\n"; - csoca::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl; + music::ilog << interface_name_ << " wrote partial RAMSES namelist file \'" << fname_ << "\'" << std::endl; } namespace diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc index a13726f..28486b5 100644 --- a/src/plugins/random_music.cc +++ b/src/plugins/random_music.cc @@ -34,29 +34,29 @@ protected: //void store_rnd(int ilevel, rng *prng); public: - explicit RNG_music(ConfigFile &cf) : RNG_plugin(cf), initialized_(false) {} + explicit RNG_music(config_file &cf) : RNG_plugin(cf), initialized_(false) {} ~RNG_music() {} bool isMultiscale() const { return true; } - void Fill_Grid( Grid_FFT& g ) const { } + void Fill_Grid( Grid_FFT& g ) {} //const { } void initialize_for_grid_structure()//const refinement_hierarchy &refh) { //prefh_ = &refh; - levelmin_ = pcf_->GetValue("setup", "levelmin"); - levelmax_ = pcf_->GetValue("setup", "levelmax"); + levelmin_ = pcf_->get_value("setup", "levelmin"); + levelmax_ = pcf_->get_value("setup", "levelmax"); - ran_cube_size_ = pcf_->GetValueSafe("random", "cubesize", DEF_RAN_CUBE_SIZE); - disk_cached_ = pcf_->GetValueSafe("random", "disk_cached", true); - restart_ = pcf_->GetValueSafe("random", "restart", false); + ran_cube_size_ = pcf_->get_value_safe("random", "cubesize", DEF_RAN_CUBE_SIZE); + disk_cached_ = pcf_->get_value_safe("random", "disk_cached", true); + restart_ = pcf_->get_value_safe("random", "restart", false); mem_cache_.assign(levelmax_ - levelmin_ + 1, (std::vector *)NULL); if (restart_ && !disk_cached_) { - csoca::elog.Print("Cannot restart from mem cached random numbers."); + music::elog.Print("Cannot restart from mem cached random numbers."); throw std::runtime_error("Cannot restart from mem cached random numbers."); } @@ -93,8 +93,8 @@ void RNG_music::parse_random_parameters(void) std::string tempstr; bool noseed = false; sprintf(seedstr, "seed[%d]", i); - if (pcf_->ContainsKey("random", seedstr)) - tempstr = pcf_->GetValue("random", seedstr); + if (pcf_->contains_key("random", seedstr)) + tempstr = pcf_->get_value("random", seedstr); else { // "-2" means that no seed entry was found for that level @@ -105,7 +105,7 @@ void RNG_music::parse_random_parameters(void) if (is_number(tempstr)) { long ltemp; - pcf_->Convert(tempstr, ltemp); + pcf_->convert(tempstr, ltemp); rngfnames_.push_back(""); if (noseed) // ltemp < 0 ) //... generate some dummy seed which only depends on the level, negative so we know it's not @@ -116,7 +116,7 @@ void RNG_music::parse_random_parameters(void) { if (ltemp <= 0) { - csoca::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr); + music::elog.Print("Specified seed [random]/%s needs to be a number >0!", seedstr); throw std::runtime_error("Seed values need to be >0"); } rngseeds_.push_back(ltemp); @@ -126,7 +126,7 @@ void RNG_music::parse_random_parameters(void) { rngfnames_.push_back(tempstr); rngseeds_.push_back(-1); - csoca::ilog.Print("Random numbers for level %3d will be read from file.", i); + music::ilog.Print("Random numbers for level %3d will be read from file.", i); } } @@ -141,7 +141,7 @@ void RNG_music::parse_random_parameters(void) void RNG_music::compute_random_numbers(void) { - bool rndsign = pcf_->GetValueSafe("random", "grafic_sign", false); + bool rndsign = pcf_->get_value_safe("random", "grafic_sign", false); std::vector randc(std::max(levelmax_, levelmin_seed_) + 1, (rng *)NULL); @@ -160,7 +160,7 @@ void RNG_music::compute_random_numbers(void) //#warning add possibility to read noise from file also here! if (rngfnames_[i].size() > 0) - csoca::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!"); + music::ilog.Print("Warning: Cannot use filenames for higher levels currently! Ignoring!"); randc[i] = new rng(*randc[i - 1], ran_cube_size_, rngseeds_[i], true); delete randc[i - 1]; @@ -180,7 +180,7 @@ void RNG_music::compute_random_numbers(void) for (int ilevel = levelmin_seed_ - 1; ilevel >= (int)levelmin_; --ilevel) { if (rngseeds_[ilevel - levelmin_] > 0) - csoca::ilog.Print("Warning: random seed for level %d will be ignored.\n" + music::ilog.Print("Warning: random seed for level %d will be ignored.\n" " consistency requires that it is obtained by restriction from level %d", ilevel, levelmin_seed_); @@ -227,11 +227,11 @@ void RNG_music::compute_random_numbers(void) // { // int lx[3], x0[3]; // int shift[3], levelmin_poisson; - // shift[0] = pcf_->GetValue("setup", "shift_x"); - // shift[1] = pcf_->GetValue("setup", "shift_y"); - // shift[2] = pcf_->GetValue("setup", "shift_z"); + // shift[0] = pcf_->get_value("setup", "shift_x"); + // shift[1] = pcf_->get_value("setup", "shift_y"); + // shift[2] = pcf_->get_value("setup", "shift_z"); - // levelmin_poisson = pcf_->GetValue("setup", "levelmin"); + // levelmin_poisson = pcf_->get_value("setup", "levelmin"); // int lfac = 1 << (ilevel - levelmin_poisson); diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc index a6d4c35..18e287f 100644 --- a/src/plugins/random_music_wnoise_generator.cc +++ b/src/plugins/random_music_wnoise_generator.cc @@ -11,7 +11,7 @@ template music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx) : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) { - csoca::ilog.Print("Generating random numbers (1) with seed %ld", baseseed); + music::ilog.Print("Generating random numbers (1) with seed %ld", baseseed); initialize(); fill_subvolume(x0, lx); @@ -21,7 +21,7 @@ template music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, bool zeromean) : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) { - csoca::ilog.Print("Generating random numbers (2) with seed %ld", baseseed); + music::ilog.Print("Generating random numbers (2) with seed %ld", baseseed); double mean = 0.0; size_t res_l = res; @@ -31,7 +31,7 @@ music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesiz cubesize_ = res_; if (!musicnoise) - csoca::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo"); + music::elog.Print("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo"); initialize(); @@ -90,7 +90,7 @@ music_wnoise_generator::music_wnoise_generator(unsigned res, std::string rand std::ifstream ifs(randfname.c_str(), std::ios::binary); if (!ifs) { - csoca::elog.Print("Could not open random number file \'%s\'!", randfname.c_str()); + music::elog.Print("Could not open random number file \'%s\'!", randfname.c_str()); throw std::runtime_error(std::string("Could not open random number file \'") + randfname + std::string("\'!")); } @@ -186,7 +186,7 @@ music_wnoise_generator::music_wnoise_generator(unsigned res, std::string rand std::vector in_float; std::vector in_double; - csoca::ilog.Print("Random number file \'%s\'\n contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz); + music::ilog.Print("Random number file \'%s\'\n contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz); long double sum = 0.0, sum2 = 0.0; size_t count = 0; @@ -285,7 +285,7 @@ music_wnoise_generator::music_wnoise_generator(unsigned res, std::string rand mean = sum / count; var = sum2 / count - mean * mean; - csoca::ilog.Print("Random numbers in file have \n mean = %f and var = %f", mean, var); + music::ilog.Print("Random numbers in file have \n mean = %f and var = %f", mean, var); } //... copy construct by averaging down @@ -298,7 +298,7 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat long double sum = 0.0, sum2 = 0.0; size_t count = 0; - csoca::ilog.Print("Generating a coarse white noise field by k-space degrading"); + music::ilog.Print("Generating a coarse white noise field by k-space degrading"); //... initialize properties of container res_ = rc.res_ / 2; cubesize_ = res_; @@ -307,7 +307,7 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat if (sizeof(real_t) != sizeof(T)) { - csoca::elog.Print("type mismatch with real_t in k-space averaging"); + music::elog.Print("type mismatch with real_t in k-space averaging"); throw std::runtime_error("type mismatch with real_t in k-space averaging"); } @@ -405,7 +405,7 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat rmean = sum / count; rvar = sum2 / count - rmean * rmean; - csoca::ilog.Print("Restricted random numbers have\n mean = %f, var = %f", rmean, rvar); + music::ilog.Print("Restricted random numbers have\n mean = %f, var = %f", rmean, rvar); } template @@ -438,7 +438,7 @@ music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, if (kspace) { - csoca::ilog.Print("Generating a constrained random number set with seed %ld\n using coarse mode replacement...", baseseed); + music::ilog.Print("Generating a constrained random number set with seed %ld\n using coarse mode replacement...", baseseed); assert(lx[0] % 2 == 0 && lx[1] % 2 == 0 && lx[2] % 2 == 0); size_t nx = lx[0], ny = lx[1], nz = lx[2], nxc = lx[0] / 2, nyc = lx[1] / 2, nzc = lx[2] / 2; @@ -573,7 +573,7 @@ music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, } else { - csoca::ilog.Print("Generating a constrained random number set with seed %ld\n using Hoffman-Ribak constraints...", baseseed); + music::ilog.Print("Generating a constrained random number set with seed %ld\n using Hoffman-Ribak constraints...", baseseed); double fac = 1.0 / sqrt(8.0); //1./sqrt(8.0); @@ -613,7 +613,7 @@ void music_wnoise_generator::register_cube(int i, int j, int k) rnums_.push_back(NULL); cubemap_[icube] = rnums_.size() - 1; #ifdef DEBUG - LOGDEBUG("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]); + music::dlog.Print("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]); #endif } } @@ -637,7 +637,7 @@ double music_wnoise_generator::fill_cube(int i, int j, int k) if (it == cubemap_.end()) { - csoca::elog.Print("Attempt to access non-registered random number cube!"); + music::elog.Print("Attempt to access non-registered random number cube!"); throw std::runtime_error("Attempt to access non-registered random number cube!"); } @@ -674,7 +674,7 @@ void music_wnoise_generator::subtract_from_cube(int i, int j, int k, double v if (it == cubemap_.end()) { - csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k); + music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k); throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::subtract_from_cube"); } @@ -700,7 +700,7 @@ void music_wnoise_generator::free_cube(int i, int j, int k) if (it == cubemap_.end()) { - csoca::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k); + music::elog.Print("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k); throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::free_cube"); } @@ -724,7 +724,7 @@ void music_wnoise_generator::initialize(void) cubesize_ = res_; } - csoca::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_); + music::ilog.Print("Generating random numbers w/ sample cube size of %d", cubesize_); } template @@ -741,8 +741,8 @@ double music_wnoise_generator::fill_subvolume(int *i0, int *n) ncube[2] = (int)(n[2] / cubesize_) + 2; #ifdef DEBUG - LOGDEBUG("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]); - LOGDEBUG("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]); + music::dlog.Print("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]); + music::dlog.Print("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]); #endif double mean = 0.0; @@ -836,7 +836,7 @@ void music_wnoise_generator::print_allocated(void) if (rnums_[i] != NULL) ncount++; - csoca::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot); + music::ilog.Print(" -> %d of %d random number cubes currently allocated", ncount, ntot); } template class music_wnoise_generator; diff --git a/src/plugins/random_music_wnoise_generator.hh b/src/plugins/random_music_wnoise_generator.hh index 5b9cb36..4dd1b37 100644 --- a/src/plugins/random_music_wnoise_generator.hh +++ b/src/plugins/random_music_wnoise_generator.hh @@ -80,7 +80,7 @@ protected: if (it == cubemap_.end()) { - csoca::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k); + music::elog.Print("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k); throw std::runtime_error("attempting to copy data from non-existing RND cube"); } @@ -186,7 +186,7 @@ public: if (it == cubemap_.end()) { - csoca::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k); + music::elog.Print("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k); throw std::runtime_error("attempting to copy data from non-existing RND cube"); } @@ -194,7 +194,7 @@ public: if (rnums_[cubeidx] == NULL) { - csoca::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc); + music::elog.Print("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc); throw std::runtime_error("attempting to access data from non-allocated RND cube"); } diff --git a/src/plugins/random_ngenic.cc b/src/plugins/random_ngenic.cc index 1aa1942..1498d4b 100644 --- a/src/plugins/random_ngenic.cc +++ b/src/plugins/random_ngenic.cc @@ -18,11 +18,11 @@ private: std::vector SeedTable_; public: - explicit RNG_ngenic(ConfigFile &cf) : RNG_plugin(cf) + explicit RNG_ngenic(config_file &cf) : RNG_plugin(cf) { - RandomSeed_ = cf.GetValue("random", "seed"); - nres_ = cf.GetValue("setup", "GridRes"); + RandomSeed_ = cf.get_value("random", "seed"); + nres_ = cf.get_value("setup", "GridRes"); pRandomGenerator_ = gsl_rng_alloc(gsl_rng_ranlxd1); gsl_rng_set(pRandomGenerator_, RandomSeed_); @@ -63,7 +63,7 @@ public: bool isMultiscale() const { return false; } - void Fill_Grid(Grid_FFT &g) const + void Fill_Grid(Grid_FFT &g) //const { g.zero(); g.FourierTransformForward(false); @@ -82,7 +82,11 @@ public: for (size_t j = 0; j < nres_; ++j) { ptrdiff_t jj = (j>0)? nres_ - j : 0; - gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]); + if( g.is_distributed() ) + gsl_rng_set( pRandomGenerator_, SeedTable_[j * nres_ + i]); + else + gsl_rng_set( pRandomGenerator_, SeedTable_[i * nres_ + j]); + for (size_t k = 0; k < g.size(2); ++k) { double phase = gsl_rng_uniform(pRandomGenerator_) * 2 * M_PI; @@ -101,15 +105,28 @@ public: if (k > 0) { if (i_in_range) g.kelem(ip,j,k) = zrand; } else{ /* k=0 plane needs special treatment */ - if (i == 0) { - if (j < nres_ / 2 && i_in_range) - { - g.kelem(ip,j,k) = zrand; - g.kelem(ip,jj,k) = std::conj(zrand); + if( g.is_distributed() ){ + if (j == 0) { + if (i < nres_ / 2 && i_in_range) + { + if(i_in_range) g.kelem(ip,jj,k) = zrand; + if(ii_in_range) g.kelem(iip,j,k) = std::conj(zrand); + } + } else if (j < nres_ / 2) { + if(i_in_range) g.kelem(ip,j,k) = zrand; + if(ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand); + } + }else{ + if (i == 0) { + if (j < nres_ / 2 && i_in_range) + { + g.kelem(ip,j,k) = zrand; + g.kelem(ip,jj,k) = std::conj(zrand); + } + } else if (i < nres_ / 2) { + if(i_in_range) g.kelem(ip,j,k) = zrand; + if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand); } - } else if (i < nres_ / 2) { - if(i_in_range) g.kelem(ip,j,k) = zrand; - if (ii_in_range) g.kelem(iip,jj,k) = std::conj(zrand); } } } diff --git a/src/plugins/random_panphasia.cc b/src/plugins/random_panphasia.cc new file mode 100644 index 0000000..1489f59 --- /dev/null +++ b/src/plugins/random_panphasia.cc @@ -0,0 +1,522 @@ +#if defined(USE_PANPHASIA) + +#include +#include +#include + +#include +#include +#include + +#ifdef _OPENMP +#include +#endif + +#include + +const int maxdim = 60, maxlev = 50, maxpow = 3 * maxdim; +typedef int rand_offset_[5]; +typedef struct +{ + int state[133]; // Nstore = Nstate (=5) + Nbatch (=128) + int need_fill; + int pos; +} rand_state_; + +/* pan_state_ struct -- corresponds to respective fortran module in panphasia_routines.f + * data structure that contains all panphasia state variables + * it needs to get passed between the fortran routines to enable + * thread-safe execution. + */ +typedef struct +{ + int base_state[5], base_lev_start[5][maxdim + 1]; + rand_offset_ poweroffset[maxpow + 1], superjump; + rand_state_ current_state[maxpow + 2]; + + int layer_min, layer_max, indep_field; + + long long xorigin_store[2][2][2], yorigin_store[2][2][2], zorigin_store[2][2][2]; + int lev_common, layer_min_store, layer_max_store; + long long ix_abs_store, iy_abs_store, iz_abs_store, ix_per_store, iy_per_store, iz_per_store, ix_rel_store, + iy_rel_store, iz_rel_store; + double exp_coeffs[8][8][maxdim + 2]; + long long xcursor[maxdim + 1], ycursor[maxdim + 1], zcursor[maxdim + 1]; + int ixshift[2][2][2], iyshift[2][2][2], izshift[2][2][2]; + + double cell_data[9][8]; + int ixh_last, iyh_last, izh_last; + int init; + + int init_cell_props; + int init_lecuyer_state; + long long p_xcursor[62], p_ycursor[62], p_zcursor[62]; + +} pan_state_; + +extern "C" +{ + void start_panphasia_(pan_state_ *lstate, const char *descriptor, int *ngrid, int *bverbose); + + void parse_descriptor_(const char *descriptor, int16_t *l, int32_t *ix, int32_t *iy, int32_t *iz, int16_t *side1, + int16_t *side2, int16_t *side3, int32_t *check_int, char *name); + + void panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, double *cell_prop); + + void adv_panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, int *layer_min, + int *layer_max, int *indep_field, double *cell_prop); + + void set_phases_and_rel_origin_(pan_state_ *lstate, const char *descriptor, int *lev, long long *ix_rel, + long long *iy_rel, long long *iz_rel, int *VERBOSE); +} + +struct panphasia_descriptor +{ + int16_t wn_level_base; + int32_t i_xorigin_base, i_yorigin_base, i_zorigin_base; + int16_t i_base, i_base_y, i_base_z; + int32_t check_rand; + std::string name; + + explicit panphasia_descriptor(std::string dstring) + { + char tmp[100]; + std::memset(tmp, ' ', 100); + parse_descriptor_(dstring.c_str(), &wn_level_base, &i_xorigin_base, &i_yorigin_base, &i_zorigin_base, &i_base, + &i_base_y, &i_base_z, &check_rand, tmp); + for (int i = 0; i < 100; i++) + if (tmp[i] == ' ') + { + tmp[i] = '\0'; + break; + } + name = tmp; + name.erase(std::remove(name.begin(), name.end(), ' '), name.end()); + } +}; + +// greatest common divisor +int gcd(int a, int b) +{ + if (b == 0) + return a; + return gcd(b, a % b); +} + +// least common multiple +int lcm(int a, int b) { return abs(a * b) / gcd(a, b); } + +// Two or largest power of 2 less than the argument +int largest_power_two_lte(int b) +{ + int a = 1; + if (b <= a) + return a; + while (2 * a < b) + a = 2 * a; + return a; +} + +class RNG_panphasia : public RNG_plugin +{ +private: +protected: + std::string descriptor_string_; + int num_threads_; + int levelmin_, levelmin_final_, levelmax_, ngrid_; + bool incongruent_fields_; + double inter_grid_phase_adjustment_; + // double translation_phase_; + pan_state_ *lstate; + int grid_p_, grid_m_; + double grid_rescale_fac_; + int coordinate_system_shift_[3]; + int ix_abs_[3], ix_per_[3], ix_rel_[3], level_p_, lextra_; + + void clear_panphasia_thread_states(void) + { + for (int i = 0; i < num_threads_; ++i) + { + lstate[i].init = 0; + lstate[i].init_cell_props = 0; + lstate[i].init_lecuyer_state = 0; + } + } + + void initialize_for_grid_structure(void) + { + clear_panphasia_thread_states(); + music::ilog.Print("PANPHASIA: running with %d threads", num_threads_); + + // if ngrid is not a multiple of i_base, then we need to enlarge and then sample down + ngrid_ = pcf_->get_value("setup", "GridRes"); + + grid_p_ = pdescriptor_->i_base; + grid_m_ = largest_power_two_lte(grid_p_); + + lextra_ = (log10((double)ngrid_ / (double)pdescriptor_->i_base) + 0.001) / log10(2.0); + int ratio = 1 << lextra_; + grid_rescale_fac_ = 1.0; + + coordinate_system_shift_[0] = -pcf_->get_value_safe("setup", "shift_x", 0); + coordinate_system_shift_[1] = -pcf_->get_value_safe("setup", "shift_y", 0); + coordinate_system_shift_[2] = -pcf_->get_value_safe("setup", "shift_z", 0); + + incongruent_fields_ = false; + if (ngrid_ != ratio * pdescriptor_->i_base) + { + incongruent_fields_ = true; + ngrid_ = 2 * ratio * pdescriptor_->i_base; + grid_rescale_fac_ = (double)ngrid_ / (1 << levelmin_); + music::ilog << "PANPHASIA: will use a higher resolution (using Fourier interpolation)" << std::endl; + music::ilog << " (" << grid_m_ << " -> " << grid_p_ << ") * 2**ref to be compatible with PANPHASIA" << std::endl; + } + } + + std::unique_ptr pdescriptor_; + +public: + explicit RNG_panphasia(config_file &cf) : RNG_plugin(cf) + { + descriptor_string_ = pcf_->get_value("random", "descriptor"); + +#ifdef _OPENMP + num_threads_ = omp_get_max_threads(); +#else + num_threads_ = 1; +#endif + + // create independent state descriptions for each thread + lstate = new pan_state_[num_threads_]; + + // parse the descriptor for its properties + pdescriptor_ = std::make_unique(descriptor_string_); + + music::ilog.Print("PANPHASIA: descriptor \'%s\' is base %d,", pdescriptor_->name.c_str(), pdescriptor_->i_base); + + // write panphasia base size into config file for the grid construction + // as the gridding unit we use the least common multiple of 2 and i_base + std::stringstream ss; + //ARJ ss << lcm(2, pdescriptor_->i_base); + //ss << two_or_largest_power_two_less_than(pdescriptor_->i_base);//ARJ + ss << 2; //ARJ - set gridding unit to two + pcf_->insert_value("setup", "gridding_unit", ss.str()); + ss.str(std::string()); + ss << pdescriptor_->i_base; + pcf_->insert_value("random", "base_unit", ss.str()); + + this->initialize_for_grid_structure(); + } + + ~RNG_panphasia() { delete[] lstate; } + + bool isMultiscale() const { return true; } + + void Fill_Grid(Grid_FFT &g) + { + auto sinc = [](real_t x) { return (std::abs(x) > 1e-16) ? std::sin(x) / x : 1.0; }; + auto dsinc = [](real_t x) { return (std::abs(x) > 1e-16) ? (x * std::cos(x) - std::sin(x)) / (x * x) : 0.0; }; + const real_t sqrt3{std::sqrt(3.0)}, sqrt27{std::sqrt(27.0)}; + + // make sure we're in the right space + Grid_FFT &g0 = g; + g0.FourierTransformBackward(false); + + // temporaries + Grid_FFT g1(g.n_, g.length_); + Grid_FFT g2(g.n_, g.length_); + Grid_FFT g3(g.n_, g.length_); + Grid_FFT g4(g.n_, g.length_); + + clear_panphasia_thread_states(); + music::ilog.Print("PANPHASIA: running with %d threads", num_threads_); + + ngrid_ = pcf_->get_value("setup", "GridRes"); + + grid_p_ = pdescriptor_->i_base; + // grid_m_ = largest_power_two_lte(grid_p_); + if (ngrid_ % grid_p_ != 0) + { + music::elog << "Grid resolution " << ngrid_ << " is not divisible by PANPHASIA descriptor length " << grid_p_ << std::endl; + throw std::runtime_error("Chosen [setup] / GridRes is not compatible with PANPHASIA descriptor length!"); + } + + double t1 = get_wtime(); + // double tp = t1; + +#pragma omp parallel + { +#ifdef _OPENMP + const int mythread = omp_get_thread_num(); +#else + const int mythread = 0; +#endif + + //int odd_x, odd_y, odd_z; + //int ng_level = ngrid_ * (1 << (level - levelmin_)); // full resolution of current level + + int verbosity = (mythread == 0); + char descriptor[100]; + std::memset(descriptor, 0, 100); + std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size()); + + start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity); + + { + panphasia_descriptor d(descriptor_string_); + + int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0); + int level_p = d.wn_level_base + lextra; + int ratio = 1 << lextra; + + lstate[mythread].layer_min = 0; + lstate[mythread].layer_max = level_p; + lstate[mythread].indep_field = 1; + + assert(ngrid_ == ratio * d.i_base); + + long long ix_rel[3]; + ix_rel[0] = 0; //ileft_corner_p[0]; + ix_rel[1] = 0; //ileft_corner_p[1]; + ix_rel[2] = 0; //ileft_corner_p[2]; + + set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], + &verbosity); + } + + if (verbosity) + t1 = get_wtime(); + + std::array cell_prop; + pan_state_ *ps = &lstate[mythread]; + +#pragma omp for //nowait + for (size_t i = 0; i < g.size(0); i += 2) + { + for (size_t j = 0; j < g.size(1); j += 2) + { + for (size_t k = 0; k < g.size(2); k += 2) + { + + // ARJ - added inner set of loops to speed up evaluation of Panphasia + + for (int ix = 0; ix < 2; ++ix) + { + for (int iy = 0; iy < 2; ++iy) + { + for (int iz = 0; iz < 2; ++iz) + { + int ilocal = i + ix; + int jlocal = j + iy; + int klocal = k + iz; + + int iglobal = ilocal + g.local_0_start_; + int jglobal = jlocal; + int kglobal = klocal; + + adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min, + &ps->layer_max, &ps->indep_field, &cell_prop[0]); + + g0.relem(ilocal, jlocal, klocal) = cell_prop[0]; + g1.relem(ilocal, jlocal, klocal) = cell_prop[4]; + g2.relem(ilocal, jlocal, klocal) = cell_prop[2]; + g3.relem(ilocal, jlocal, klocal) = cell_prop[1]; + g4.relem(ilocal, jlocal, klocal) = cell_prop[8]; + } + } + } + } + } + } + } // end omp parallel region + + g0.FourierTransformForward(); + g1.FourierTransformForward(); + g2.FourierTransformForward(); + g3.FourierTransformForward(); + g4.FourierTransformForward(); + +#pragma omp parallel for + for (size_t i = 0; i < g0.size(0); i++) + { + for (size_t j = 0; j < g0.size(1); j++) + { + for (size_t k = 0; k < g0.size(2); k++) + { + if (!g0.is_nyquist_mode(i, j, k)) + { + auto kvec = g0.get_k(i, j, k); + + auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0]; + auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1]; + auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2]; + + auto fx = sinc(argx); + auto gx = ccomplex_t(0.0, dsinc(argx)); + auto fy = sinc(argy); + auto gy = ccomplex_t(0.0, dsinc(argy)); + auto fz = sinc(argz); + auto gz = ccomplex_t(0.0, dsinc(argz)); + + auto temp = (fx + sqrt3 * gx) * (fy + sqrt3 * gy) * (fz + sqrt3 * gz); + auto magnitude = std::sqrt(1.0 - std::abs(temp * temp)); + + auto y0(g0.kelem(i, j, k)), y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k)); + + g0.kelem(i, j, k) = y0 * fx * fy * fz + + sqrt3 * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) + + y4 * magnitude; + } + else + { + g0.kelem(i, j, k) = 0.0; + } + } + } + } + + // music::ilog.Print("\033[31mtiming [build panphasia field]: %f s\033[0m", get_wtime() - tp); + // tp = get_wtime(); + + g1.FourierTransformBackward(false); + g2.FourierTransformBackward(false); + g3.FourierTransformBackward(false); + g4.FourierTransformBackward(false); + +#pragma omp parallel + { +#ifdef _OPENMP + const int mythread = omp_get_thread_num(); +#else + const int mythread = 0; +#endif + + // int odd_x, odd_y, odd_z; + int verbosity = (mythread == 0); + char descriptor[100]; + std::memset(descriptor, 0, 100); + std::memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size()); + + start_panphasia_(&lstate[mythread], descriptor, &ngrid_, &verbosity); + + { + panphasia_descriptor d(descriptor_string_); + + int lextra = (log10((double)ngrid_ / (double)d.i_base) + 0.001) / log10(2.0); + int level_p = d.wn_level_base + lextra; + int ratio = 1 << lextra; + + lstate[mythread].layer_min = 0; + lstate[mythread].layer_max = level_p; + lstate[mythread].indep_field = 1; + + assert(ngrid_ == ratio * d.i_base); + + long long ix_rel[3]; + ix_rel[0] = 0; //ileft_corner_p[0]; + ix_rel[1] = 0; //ileft_corner_p[1]; + ix_rel[2] = 0; //ileft_corner_p[2]; + + set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], + &verbosity); + } + + if (verbosity) + t1 = get_wtime(); + + //*************************************************************** + // Process Panphasia values: p110, p011, p101, p111 + //**************************************************************** + std::array cell_prop; + pan_state_ *ps = &lstate[mythread]; + +#pragma omp for //nowait + for (size_t i = 0; i < g1.size(0); i += 2) + { + for (size_t j = 0; j < g1.size(1); j += 2) + { + for (size_t k = 0; k < g1.size(2); k += 2) + { + // ARJ - added inner set of loops to speed up evaluation of Panphasia + for (int ix = 0; ix < 2; ++ix) + { + for (int iy = 0; iy < 2; ++iy) + { + for (int iz = 0; iz < 2; ++iz) + { + int ilocal = i + ix; + int jlocal = j + iy; + int klocal = k + iz; + + int iglobal = ilocal + g.local_0_start_; + int jglobal = jlocal; + int kglobal = klocal; + + adv_panphasia_cell_properties_(ps, &iglobal, &jglobal, &kglobal, &ps->layer_min, + &ps->layer_max, &ps->indep_field, &cell_prop[0]); + + g1.relem(ilocal, jlocal, klocal) = cell_prop[6]; + g2.relem(ilocal, jlocal, klocal) = cell_prop[3]; + g3.relem(ilocal, jlocal, klocal) = cell_prop[5]; + g4.relem(ilocal, jlocal, klocal) = cell_prop[7]; + } + } + } + } + } + } + } // end omp parallel region + + // music::ilog.Print("\033[31mtiming [adv_panphasia_cell_properties2]: %f s \033[0m", get_wtime() - tp); + // tp = get_wtime(); + + ///////////////////////////////////////////////////////////////////////// + // transform and convolve with Legendres + g1.FourierTransformForward(); + g2.FourierTransformForward(); + g3.FourierTransformForward(); + g4.FourierTransformForward(); + + #pragma omp parallel for + for (size_t i = 0; i < g1.size(0); i++) + { + for (size_t j = 0; j < g1.size(1); j++) + { + for (size_t k = 0; k < g1.size(2); k++) + { + if (!g1.is_nyquist_mode(i, j, k)) + { + auto kvec = g1.get_k(i, j, k); + + auto argx = 0.5 * M_PI * kvec[0] / g.kny_[0]; + auto argy = 0.5 * M_PI * kvec[1] / g.kny_[1]; + auto argz = 0.5 * M_PI * kvec[2] / g.kny_[2]; + + auto fx = sinc(argx); + auto gx = ccomplex_t(0.0, dsinc(argx)); + auto fy = sinc(argy); + auto gy = ccomplex_t(0.0, dsinc(argy)); + auto fz = sinc(argz); + auto gz = ccomplex_t(0.0, dsinc(argz)); + + auto y1(g1.kelem(i, j, k)), y2(g2.kelem(i, j, k)), y3(g3.kelem(i, j, k)), y4(g4.kelem(i, j, k)); + + g0.kelem(i, j, k) += 3.0 * (y1 * gx * gy * fz + y2 * fx * gy * gz + y3 * gx * fy * gz) + sqrt27 * y4 * gx * gy * gz; + } + } + } + } + + // music::ilog.Print("\033[31mtiming [build panphasia field2]: %f s\033[0m", get_wtime() - tp); + // tp = get_wtime(); + music::ilog.Print("time for calculating PANPHASIA field : %f s, %f µs/cell", get_wtime() - t1, + 1e6 * (get_wtime() - t1) / g.global_size(0) / g.global_size(1) / g.global_size(2)); + music::ilog.Print("PANPHASIA k-space statistices: mean Re = %f, std = %f", g0.mean(), g0.std()); + } +}; + +namespace +{ + RNG_plugin_creator_concrete creator("PANPHASIA"); +} +#endif // defined(USE_PANPHASIA) \ No newline at end of file diff --git a/src/plugins/transfer_CAMB_file.cc b/src/plugins/transfer_CAMB_file.cc new file mode 100644 index 0000000..4a2baf3 --- /dev/null +++ b/src/plugins/transfer_CAMB_file.cc @@ -0,0 +1,344 @@ +// transfer_CAMB.cc - This file is part of MUSIC - +// a code to generate multi-scale initial conditions for cosmological simulations + +// Copyright (C) 2019 Oliver Hahn + +#include +#include + +#include + +#include "transfer_function_plugin.hh" + +const double tiny = 1e-30; + +class transfer_CAMB_file_plugin : public TransferFunction_plugin +{ + +private: + std::string m_filename_Pk, m_filename_Tk; + std::vector m_tab_k, m_tab_Tk_tot, m_tab_Tk_cdm, m_tab_Tk_baryon; + std::vector m_tab_Tvk_tot, m_tab_Tvk_cdm, m_tab_Tvk_baryon; + gsl_interp_accel *acc_tot, *acc_cdm, *acc_baryon; + gsl_interp_accel *acc_vtot, *acc_vcdm, *acc_vbaryon; + gsl_spline *spline_tot, *spline_cdm, *spline_baryon; + gsl_spline *spline_vtot, *spline_vcdm, *spline_vbaryon; + + double m_kmin, m_kmax, m_Omega_b, m_Omega_m, m_zstart; + unsigned m_nlines; + + bool m_linbaryoninterp; + + void read_table(void) + { + + m_nlines = 0; + m_linbaryoninterp = false; + +#ifdef WITH_MPI + if (MPI::COMM_WORLD.Get_rank() == 0) + { +#endif + music::ilog.Print("Reading tabulated transfer function data from file \n \'%s\'", m_filename_Tk.c_str()); + + std::string line; + std::ifstream ifs(m_filename_Tk.c_str()); + + if (!ifs.good()) + throw std::runtime_error("Could not find transfer function file \'" + m_filename_Tk + "\'"); + + m_tab_k.clear(); + m_tab_Tk_tot.clear(); + m_tab_Tk_cdm.clear(); + m_tab_Tk_baryon.clear(); + m_tab_Tvk_tot.clear(); + m_tab_Tvk_cdm.clear(); //>[150609SH: add] + m_tab_Tvk_baryon.clear(); //>[150609SH: add] + + m_kmin = 1e30; + m_kmax = -1e30; + std::ofstream ofs("dump_transfer.txt"); + + while (!ifs.eof()) + { + getline(ifs, line); + if (ifs.eof()) + break; + + // OH: ignore line if it has a comment: + if (line.find("#") != std::string::npos) + continue; + + std::stringstream ss(line); + + double k, Tkc, Tkb, Tktot, Tkvtot, Tkvc, Tkvb, dummy; + + ss >> k; + ss >> Tkc; // cdm + ss >> Tkb; // baryon + ss >> dummy; // photon + ss >> dummy; // nu + ss >> dummy; // mass_nu + ss >> Tktot; // total + ss >> dummy; // no_nu + ss >> dummy; // total_de + ss >> dummy; // Weyl + ss >> Tkvc; // v_cdm + ss >> Tkvb; // v_b + ss >> dummy; // v_b-v_cdm + + if (ss.bad() || ss.fail()) + { + music::elog.Print("error reading the transfer function file (corrupt or not in expected format)!"); + throw std::runtime_error("error reading transfer function file \'" + + m_filename_Tk + "\'"); + } + + if (m_Omega_b < 1e-6) + Tkvtot = Tktot; + else + Tkvtot = ((m_Omega_m - m_Omega_b) * Tkvc + m_Omega_b * Tkvb) / m_Omega_m; //MvD + + m_linbaryoninterp |= Tkb < 0.0 || Tkvb < 0.0; + + m_tab_k.push_back(log10(k)); + + m_tab_Tk_tot.push_back(Tktot); + m_tab_Tk_baryon.push_back(Tkb); + m_tab_Tk_cdm.push_back(Tkc); + m_tab_Tvk_tot.push_back(Tkvtot); + m_tab_Tvk_baryon.push_back(Tkvb); + m_tab_Tvk_cdm.push_back(Tkvc); + + ++m_nlines; + + if (k < m_kmin) + m_kmin = k; + if (k > m_kmax) + m_kmax = k; + } + + for (size_t i = 0; i < m_tab_k.size(); ++i) + { + m_tab_Tk_tot[i] = log10(m_tab_Tk_tot[i]); + m_tab_Tk_cdm[i] = log10(m_tab_Tk_cdm[i]); + m_tab_Tvk_cdm[i] = log10(m_tab_Tvk_cdm[i]); + m_tab_Tvk_tot[i] = log10(m_tab_Tvk_tot[i]); + + if (!m_linbaryoninterp) + { + m_tab_Tk_baryon[i] = log10(m_tab_Tk_baryon[i]); + m_tab_Tvk_baryon[i] = log10(m_tab_Tvk_baryon[i]); + } + } + + ifs.close(); + + music::ilog.Print("Read CAMB transfer function table with %d rows", m_nlines); + + if (m_linbaryoninterp) + music::ilog.Print("Using log-lin interpolation for baryons\n (TF is not " + "positive definite)"); + +#ifdef WITH_MPI + } + + unsigned n = m_tab_k.size(); + MPI::COMM_WORLD.Bcast(&n, 1, MPI_UNSIGNED, 0); + + if (MPI::COMM_WORLD.Get_rank() > 0) + { + m_tab_k.assign(n, 0); + m_tab_Tk_tot.assign(n, 0); + m_tab_Tk_cdm.assign(n, 0); + m_tab_Tk_baryon.assign(n, 0); + m_tab_Tvk_tot.assign(n, 0); + m_tab_Tvk_cdm.assign(n, 0); + m_tab_Tvk_baryon.assign(n, 0); + } + + MPI::COMM_WORLD.Bcast(&m_tab_k[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tk_tot[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tk_cdm[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tk_baryon[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tvk_tot[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tvk_cdm[0], n, MPI_DOUBLE, 0); + MPI::COMM_WORLD.Bcast(&m_tab_Tvk_baryon[0], n, MPI_DOUBLE, 0); + +#endif + } + +public: + transfer_CAMB_file_plugin(config_file &cf) + : TransferFunction_plugin(cf) + { + m_filename_Tk = pcf_->get_value("cosmology", "transfer_file"); + m_Omega_m = cf.get_value("cosmology", "Omega_m"); //MvD + m_Omega_b = cf.get_value("cosmology", "Omega_b"); //MvD + m_zstart = cf.get_value("setup", "zstart"); //MvD + + read_table(); + + acc_tot = gsl_interp_accel_alloc(); + acc_cdm = gsl_interp_accel_alloc(); + acc_baryon = gsl_interp_accel_alloc(); + acc_vtot = gsl_interp_accel_alloc(); + acc_vcdm = gsl_interp_accel_alloc(); + acc_vbaryon = gsl_interp_accel_alloc(); + + spline_tot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_cdm = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_baryon = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_vtot = gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_vcdm = + gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + spline_vbaryon = + gsl_spline_alloc(gsl_interp_cspline, m_tab_k.size()); + + gsl_spline_init(spline_tot, &m_tab_k[0], &m_tab_Tk_tot[0], m_tab_k.size()); + gsl_spline_init(spline_cdm, &m_tab_k[0], &m_tab_Tk_cdm[0], m_tab_k.size()); + gsl_spline_init(spline_baryon, &m_tab_k[0], &m_tab_Tk_baryon[0], + m_tab_k.size()); + gsl_spline_init(spline_vtot, &m_tab_k[0], &m_tab_Tvk_tot[0], + m_tab_k.size()); + gsl_spline_init(spline_vcdm, &m_tab_k[0], &m_tab_Tvk_cdm[0], + m_tab_k.size()); + gsl_spline_init(spline_vbaryon, &m_tab_k[0], &m_tab_Tvk_baryon[0], + m_tab_k.size()); + + tf_distinct_ = true; // different density between CDM v.s. Baryon + tf_withvel_ = true; // using velocity transfer function + } + + ~transfer_CAMB_file_plugin() + { + gsl_spline_free(spline_tot); + gsl_spline_free(spline_cdm); + gsl_spline_free(spline_baryon); + gsl_spline_free(spline_vtot); + gsl_spline_free(spline_vcdm); + gsl_spline_free(spline_vbaryon); + + gsl_interp_accel_free(acc_tot); + gsl_interp_accel_free(acc_cdm); + gsl_interp_accel_free(acc_baryon); + gsl_interp_accel_free(acc_vtot); + gsl_interp_accel_free(acc_vcdm); + gsl_interp_accel_free(acc_vbaryon); + } + + // linear interpolation in log-log + inline double extrap_right(double k, const tf_type &type) const + { + int n = m_tab_k.size() - 1, n1 = n - 1; + + double v1(1.0), v2(1.0); + + double lk = log10(k); + double dk = m_tab_k[n] - m_tab_k[n1]; + double delk = lk - m_tab_k[n]; + + switch (type) + { + case cdm: + v1 = m_tab_Tk_cdm[n1]; + v2 = m_tab_Tk_cdm[n]; + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case baryon: + v1 = m_tab_Tk_baryon[n1]; + v2 = m_tab_Tk_baryon[n]; + if (m_linbaryoninterp) + return std::max((v2 - v1) / dk * (delk) + v2, tiny); + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case vtotal: //>[150609SH: add] + v1 = m_tab_Tvk_tot[n1]; + v2 = m_tab_Tvk_tot[n]; + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case vcdm: //>[150609SH: add] + v1 = m_tab_Tvk_cdm[n1]; + v2 = m_tab_Tvk_cdm[n]; + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case vbaryon: //>[150609SH: add] + v1 = m_tab_Tvk_baryon[n1]; + v2 = m_tab_Tvk_baryon[n]; + if (m_linbaryoninterp) + return std::max((v2 - v1) / dk * (delk) + v2, tiny); + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + case total: + v1 = m_tab_Tk_tot[n1]; + v2 = m_tab_Tk_tot[n]; + return pow(10.0, (v2 - v1) / dk * (delk) + v2); + default: + throw std::runtime_error( + "Invalid type requested in transfer function evaluation"); + } + + return 0.0; + } + + inline double compute(double k, tf_type type) const + { + // use constant interpolation on the left side of the tabulated values + if (k < m_kmin) + { + switch (type) + { + case cdm: + return pow(10.0, m_tab_Tk_cdm[0]); + case baryon: + if (m_linbaryoninterp) + return m_tab_Tk_baryon[0]; + return pow(10.0, m_tab_Tk_baryon[0]); + case vtotal: + return pow(10.0, m_tab_Tvk_tot[0]); + case vcdm: + return pow(10.0, m_tab_Tvk_cdm[0]); + case vbaryon: + if (m_linbaryoninterp) + return m_tab_Tvk_baryon[0]; + return pow(10.0, m_tab_Tvk_baryon[0]); + case total: + return pow(10.0, m_tab_Tk_tot[0]); + default: + throw std::runtime_error( + "Invalid type requested in transfer function evaluation"); + } + } + // use linear interpolation on the right side of the tabulated values + else if (k > m_kmax) + return extrap_right(k, type); + + double lk = log10(k); + switch (type) + { + case cdm: + return pow(10.0, gsl_spline_eval(spline_cdm, lk, acc_cdm)); + case baryon: + if (m_linbaryoninterp) + return gsl_spline_eval(spline_baryon, lk, acc_baryon); + return pow(10.0, gsl_spline_eval(spline_baryon, lk, acc_baryon)); + case vtotal: + return pow(10.0, gsl_spline_eval(spline_vtot, lk, acc_vtot)); //MvD + case vcdm: + return pow(10.0, gsl_spline_eval(spline_vcdm, lk, acc_vcdm)); + case vbaryon: + if (m_linbaryoninterp) + return gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon); + return pow(10.0, gsl_spline_eval(spline_vbaryon, lk, acc_vbaryon)); + case total: + return pow(10.0, gsl_spline_eval(spline_tot, lk, acc_tot)); + default: + throw std::runtime_error( + "Invalid type requested in transfer function evaluation"); + } + } + + inline double get_kmin(void) const { return pow(10.0, m_tab_k[1]); } + + inline double get_kmax(void) const { return pow(10.0, m_tab_k[m_tab_k.size() - 2]); } +}; + +namespace +{ +TransferFunction_plugin_creator_concrete creator("CAMB_file"); +} diff --git a/src/plugins/transfer_CLASS.cc b/src/plugins/transfer_CLASS.cc index 85b65b8..a842736 100644 --- a/src/plugins/transfer_CLASS.cc +++ b/src/plugins/transfer_CLASS.cc @@ -9,145 +9,328 @@ #include #include #include +#include #include #include #include #include +#include -#include -#include - -class transfer_CLASS_plugin : public TransferFunction_plugin { +class transfer_CLASS_plugin : public TransferFunction_plugin +{ private: - std::vector tab_lnk_, tab_dtot_, tab_dc_, tab_db_, tab_ttot_, tab_tc_, tab_tb_; - gsl_interp_accel *gsl_ia_dtot_, *gsl_ia_dc_, *gsl_ia_db_, *gsl_ia_ttot_, *gsl_ia_tc_, *gsl_ia_tb_; - gsl_spline *gsl_sp_dtot_, *gsl_sp_dc_, *gsl_sp_db_, *gsl_sp_ttot_, *gsl_sp_tc_, *gsl_sp_tb_; - double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_; + interpolated_function_1d delta_c_, delta_b_, delta_n_, delta_m_, theta_c_, theta_b_, theta_n_, theta_m_; + interpolated_function_1d delta_c0_, delta_b0_, delta_n0_, delta_m0_, theta_c0_, theta_b0_, theta_n0_, theta_m0_; - void ClassEngine_get_data( void ){ - std::vector d_ncdm, t_ncdm, phi, psi; + // single fluid growing/decaying mode decomposition + // gsl_interp_accel *gsl_ia_Cplus_, *gsl_ia_Cminus_; + // gsl_spline *gsl_sp_Cplus_, *gsl_sp_Cminus_; + // std::vector tab_Cplus_, tab_Cminus_; - csoca::ilog << "Computing TF via ClassEngine..." << std::endl << " ztarget = " << ztarget_ << ", zstart = " << zstart_ << " ..." << std::flush; - double wtime = get_wtime(); - - ClassParams pars; - pars.add("extra metric transfer functions", "yes"); - pars.add("z_pk",ztarget_); - pars.add("P_k_max_h/Mpc", kmax_); - pars.add("h",h_); - pars.add("Omega_b",Omega_b_); - // pars.add("Omega_k",0.0); - // pars.add("Omega_ur",0.0); - pars.add("N_ur",N_ur_); - pars.add("Omega_cdm",Omega_m_-Omega_b_); - pars.add("Omega_Lambda",1.0-Omega_m_); - // pars.add("Omega_fld",0.0); - // pars.add("Omega_scf",0.0); - pars.add("A_s",2.42e-9); - pars.add("n_s",.96); // tnis doesn't matter for TF - pars.add("output","dTk,vTk"); - pars.add("YHe",0.248); + double Omega_m_, Omega_b_, N_ur_, zstart_, ztarget_, kmax_, kmin_, h_, astart_, atarget_, A_s_, n_s_, sigma8_, Tcmb_, tnorm_; - pars.add("k_per_decade_for_pk",50); - pars.add("k_per_decade_for_bao",50); - pars.add("compute damping scale","yes"); - pars.add("z_reio",-1.0); // make sure reionisation is not included + ClassParams pars_; + std::unique_ptr the_ClassEngine_; + std::ofstream ofs_class_input_; - std::unique_ptr CE = std::make_unique(pars, false); + template + void add_class_parameter(std::string parameter_name, const T parameter_value) + { + pars_.add(parameter_name, parameter_value); + ofs_class_input_ << parameter_name << " = " << parameter_value << std::endl; + } - CE->getTk(ztarget_, tab_lnk_, tab_dc_, tab_db_, d_ncdm, tab_dtot_, - tab_tc_, tab_tb_, t_ncdm, tab_ttot_, phi, psi ); + //! Set up class parameters from MUSIC cosmological parameters + void init_ClassEngine(void) + { + //--- general parameters ------------------------------------------ + add_class_parameter("z_max_pk", std::max(std::max(zstart_, ztarget_),199.0)); // use 1.2 as safety + add_class_parameter("P_k_max_h/Mpc", kmax_); + add_class_parameter("output", "dTk,vTk"); + add_class_parameter("extra metric transfer functions","yes"); + // add_class_parameter("lensing", "no"); - wtime = get_wtime() - wtime; - csoca::ilog << " took " << wtime << " s / " << tab_lnk_.size() << " modes." << std::endl; + //--- choose gauge ------------------------------------------------ + // add_class_parameter("extra metric transfer functions", "yes"); + add_class_parameter("gauge", "synchronous"); + + //--- cosmological parameters, densities -------------------------- + add_class_parameter("h", h_); + + add_class_parameter("Omega_b", Omega_b_); + add_class_parameter("Omega_cdm", Omega_m_ - Omega_b_); + add_class_parameter("Omega_k", 0.0); + // add_class_parameter("Omega_Lambda",1.0-Omega_m_); + add_class_parameter("Omega_fld", 0.0); + add_class_parameter("Omega_scf", 0.0); + // add_class_parameter("fluid_equation_of_state","CLP"); + // add_class_parameter("w0_fld", -1 ); + // add_class_parameter("wa_fld", 0. ); + // add_class_parameter("cs2_fld", 1); + + //--- massive neutrinos ------------------------------------------- +#if 1 + //default off + // add_class_parameter("Omega_ur",0.0); + add_class_parameter("N_ur", N_ur_); + add_class_parameter("N_ncdm", 0); + +#else + // change above to enable + add_class_parameter("N_ur", 0); + add_class_parameter("N_ncdm", 1); + add_class_parameter("m_ncdm", "0.4"); + add_class_parameter("T_ncdm", 0.71611); +#endif + + //--- cosmological parameters, primordial ------------------------- + add_class_parameter("P_k_ini type", "analytic_Pk"); + + if( A_s_ > 0.0 ){ + add_class_parameter("A_s", A_s_); + }else{ + add_class_parameter("sigma8", sigma8_); } + add_class_parameter("n_s", n_s_); + add_class_parameter("alpha_s", 0.0); + add_class_parameter("T_cmb", Tcmb_); + add_class_parameter("YHe", 0.248); + + // precision parameters + add_class_parameter("k_per_decade_for_pk", 100); + add_class_parameter("k_per_decade_for_bao", 100); + add_class_parameter("compute damping scale", "yes"); + add_class_parameter("tol_perturb_integration", 1.e-8); + add_class_parameter("tol_background_integration", 1e-9); + + // high precision options from cl_permille.pre: + // precision file to be passed as input in order to achieve at least percent precision on scalar Cls + add_class_parameter("hyper_flat_approximation_nu", 7000.); + add_class_parameter("transfer_neglect_delta_k_S_t0", 0.17); + add_class_parameter("transfer_neglect_delta_k_S_t1", 0.05); + add_class_parameter("transfer_neglect_delta_k_S_t2", 0.17); + add_class_parameter("transfer_neglect_delta_k_S_e", 0.13); + add_class_parameter("delta_l_max", 1000); + + int class_verbosity = 0; + + add_class_parameter("background_verbose", class_verbosity); + add_class_parameter("thermodynamics_verbose", class_verbosity); + add_class_parameter("perturbations_verbose", class_verbosity); + add_class_parameter("transfer_verbose", class_verbosity); + add_class_parameter("primordial_verbose", class_verbosity); + add_class_parameter("spectra_verbose", class_verbosity); + add_class_parameter("nonlinear_verbose", class_verbosity); + add_class_parameter("lensing_verbose", class_verbosity); + add_class_parameter("output_verbose", class_verbosity); + + // output parameters, only needed for the control CLASS .ini file that we output + std::stringstream zlist; + if (ztarget_ == zstart_) + zlist << ztarget_ << ((ztarget_!=0.0)? ", 0.0" : ""); + else + zlist << std::max(ztarget_, zstart_) << ", " << std::min(ztarget_, zstart_) << ", 0.0"; + add_class_parameter("z_pk", zlist.str()); + + music::ilog << "Computing transfer function via ClassEngine..." << std::endl; + double wtime = get_wtime(); + + the_ClassEngine_ = std::move(std::make_unique(pars_, false)); + + wtime = get_wtime() - wtime; + music::ilog << "CLASS took " << wtime << " s." << std::endl; + } + + //! run ClassEngine with parameters set up + void run_ClassEngine(double z, std::vector &k, std::vector &dc, std::vector &tc, std::vector &db, std::vector &tb, + std::vector &dn, std::vector &tn, std::vector &dm, std::vector &tm) + { + k.clear(); + dc.clear(); db.clear(); dn.clear(); dm.clear(); + tc.clear(); tb.clear(); tn.clear(); tm.clear(); + + the_ClassEngine_->getTk(z, k, dc, db, dn, dm, tc, tb, tn, tm); + + real_t fc = (Omega_m_ - Omega_b_) / Omega_m_; + real_t fb = Omega_b_ / Omega_m_; + + for (size_t i = 0; i < k.size(); ++i) + { + // convert to 'CAMB' format, since we interpolate loglog and + // don't want negative numbers... + auto ik2 = 1.0 / (k[i] * k[i]) * h_ * h_; + dc[i] = -dc[i] * ik2; + db[i] = -db[i] * ik2; + dn[i] = -dn[i] * ik2; + dm[i] = fc * dc[i] + fb * db[i]; + tc[i] = -tc[i] * ik2; + tb[i] = -tb[i] * ik2; + tn[i] = -tn[i] * ik2; + tm[i] = fc * tc[i] + fb * tb[i]; + } + } public: - explicit transfer_CLASS_plugin( ConfigFile &cf) - : TransferFunction_plugin(cf) - { - h_ = pcf_->GetValue("cosmology","H0") / 100.0; - Omega_m_ = pcf_->GetValue("cosmology","Omega_m"); - Omega_b_ = pcf_->GetValue("cosmology","Omega_b"); - N_ur_ = pcf_->GetValueSafe("cosmology","N_ur", 3.046); - ztarget_ = pcf_->GetValueSafe("cosmology","ztarget",0.0); - zstart_ = pcf_->GetValue("setup","zstart"); - double lbox = pcf_->GetValue("setup","BoxLength"); - int nres = pcf_->GetValue("setup","GridRes"); - kmax_ = 2.0*M_PI/lbox * nres/2 * sqrt(3) * 2.0; // 120% of spatial diagonal + explicit transfer_CLASS_plugin(config_file &cf) + : TransferFunction_plugin(cf) + { + this->tf_isnormalised_ = true; - this->ClassEngine_get_data(); - - gsl_ia_dtot_ = gsl_interp_accel_alloc(); - gsl_ia_dc_ = gsl_interp_accel_alloc(); - gsl_ia_db_ = gsl_interp_accel_alloc(); - gsl_ia_ttot_ = gsl_interp_accel_alloc(); - gsl_ia_tc_ = gsl_interp_accel_alloc(); - gsl_ia_tb_ = gsl_interp_accel_alloc(); + ofs_class_input_.open("input_class_parameters.ini", std::ios::trunc); - gsl_sp_dtot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_dc_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_db_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_ttot_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_tc_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); - gsl_sp_tb_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + h_ = pcf_->get_value("cosmology", "H0") / 100.0; + Omega_m_ = pcf_->get_value("cosmology", "Omega_m"); + Omega_b_ = pcf_->get_value("cosmology", "Omega_b"); + N_ur_ = pcf_->get_value_safe("cosmology", "Neff", 3.046); + ztarget_ = pcf_->get_value_safe("cosmology", "ztarget", 0.0); + atarget_ = 1.0 / (1.0 + ztarget_); + zstart_ = pcf_->get_value("setup", "zstart"); + astart_ = 1.0 / (1.0 + zstart_); + A_s_ = pcf_->get_value_safe("cosmology", "A_s", -1.0); + n_s_ = pcf_->get_value("cosmology", "nspec"); + Tcmb_ = cf.get_value_safe("cosmology", "Tcmb", 2.7255); - gsl_spline_init(gsl_sp_dtot_, &tab_lnk_[0], &tab_dtot_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_dc_, &tab_lnk_[0], &tab_dc_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_db_, &tab_lnk_[0], &tab_db_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_ttot_, &tab_lnk_[0], &tab_ttot_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_tc_, &tab_lnk_[0], &tab_tc_[0], tab_lnk_.size()); - gsl_spline_init(gsl_sp_tb_, &tab_lnk_[0], &tab_tb_[0], tab_lnk_.size()); - - kmin_ = std::exp(tab_lnk_[0]); - - tf_distinct_ = true; - tf_withvel_ = true; - } - - ~transfer_CLASS_plugin(){ - gsl_spline_free(gsl_sp_dtot_); - gsl_spline_free(gsl_sp_dc_); - gsl_spline_free(gsl_sp_db_); - gsl_spline_free(gsl_sp_ttot_); - gsl_spline_free(gsl_sp_tc_); - gsl_spline_free(gsl_sp_tb_); - - gsl_interp_accel_free(gsl_ia_dtot_); - gsl_interp_accel_free(gsl_ia_dc_); - gsl_interp_accel_free(gsl_ia_db_); - gsl_interp_accel_free(gsl_ia_ttot_); - gsl_interp_accel_free(gsl_ia_tc_); - gsl_interp_accel_free(gsl_ia_tb_); - } - - inline double compute(double k, tf_type type) const { - gsl_spline *splineT = nullptr; - gsl_interp_accel *accT = nullptr; - switch(type){ - case total: splineT = gsl_sp_dtot_; accT = gsl_ia_dtot_; break; - case cdm: splineT = gsl_sp_dc_; accT = gsl_ia_dc_; break; - case baryon: splineT = gsl_sp_db_; accT = gsl_ia_db_; break; - case vtotal: splineT = gsl_sp_ttot_; accT = gsl_ia_ttot_; break; - case vcdm: splineT = gsl_sp_tc_; accT = gsl_ia_tc_; break; - case vbaryon: splineT = gsl_sp_tb_; accT = gsl_ia_tb_; break; - default: - throw std::runtime_error("Invalid type requested in transfer function evaluation"); + if (A_s_ > 0) { + music::ilog << "CLASS: Using A_s=" << A_s_<< " to normalise the transfer function." << std::endl; + }else{ + sigma8_ = pcf_->get_value_safe("cosmology", "sigma_8", -1.0); + if( sigma8_ < 0 ){ + throw std::runtime_error("Need to specify either A_s or sigma_8 for CLASS plugin..."); } + music::ilog << "CLASS: Using sigma8_ =" << sigma8_<< " to normalise the transfer function." << std::endl; + } - double d = (k<=kmin_)? gsl_spline_eval(splineT, std::log(kmin_), accT) - : gsl_spline_eval(splineT, std::log(k*h_), accT); - return -d/(k*k); + // determine highest k we will need for the resolution selected + double lbox = pcf_->get_value("setup", "BoxLength"); + int nres = pcf_->get_value("setup", "GridRes"); + kmax_ = std::max(20.0, 2.0 * M_PI / lbox * nres / 2 * sqrt(3) * 2.0); // 120% of spatial diagonal, or k=10h Mpc-1 + + // initialise CLASS and get the normalisation + this->init_ClassEngine(); + A_s_ = the_ClassEngine_->get_A_s(); // this either the input one, or the one computed from sigma8 + + // compute the normalisation to interface with MUSIC + double k_p = pcf_->get_value_safe("cosmology", "k_p", 0.05); + tnorm_ = std::sqrt(2.0 * M_PI * M_PI * A_s_ * std::pow(1.0 / k_p * h_, n_s_ - 1) / std::pow(2.0 * M_PI, 3.0)); + + // compute the transfer function at z=0 using CLASS engine + std::vector k, dc, tc, db, tb, dn, tn, dm, tm; + this->run_ClassEngine(0.0, k, dc, tc, db, tb, dn, tn, dm, tm); + + delta_c0_.set_data(k, dc); + theta_c0_.set_data(k, tc); + delta_b0_.set_data(k, db); + theta_b0_.set_data(k, tb); + delta_n0_.set_data(k, dn); + theta_n0_.set_data(k, tn); + delta_m0_.set_data(k, dm); + theta_m0_.set_data(k, tm); + + // compute the transfer function at z=z_target using CLASS engine + this->run_ClassEngine(ztarget_, k, dc, tc, db, tb, dn, tn, dm, tm); + delta_c_.set_data(k, dc); + theta_c_.set_data(k, tc); + delta_b_.set_data(k, db); + theta_b_.set_data(k, tb); + delta_n_.set_data(k, dn); + theta_n_.set_data(k, tn); + delta_m_.set_data(k, dm); + theta_m_.set_data(k, tm); + + kmin_ = k[0]; + kmax_ = k.back(); + + music::ilog << "CLASS table contains k = " << this->get_kmin() << " to " << this->get_kmax() << " h Mpc-1." << std::endl; + + //-------------------------------------------------------------------------- + // single fluid growing/decaying mode decomposition + //-------------------------------------------------------------------------- + /*gsl_ia_Cplus_ = gsl_interp_accel_alloc(); + gsl_ia_Cminus_ = gsl_interp_accel_alloc(); + + gsl_sp_Cplus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + gsl_sp_Cminus_ = gsl_spline_alloc(gsl_interp_cspline, tab_lnk_.size()); + + tab_Cplus_.assign(tab_lnk_.size(), 0); + tab_Cminus_.assign(tab_lnk_.size(), 0); + + std::ofstream ofs("grow_decay.txt"); + + for (size_t i = 0; i < tab_lnk_.size(); ++i) + { + tab_Cplus_[i] = (3.0 / 5.0 * tab_dtot_[i] / atarget_ - 2.0 / 5.0 * tab_ttot_[i] / atarget_); + tab_Cminus_[i] = (2.0 / 5.0 * std::pow(atarget_, 1.5) * (tab_dtot_[i] + tab_ttot_[i])); + + ofs << std::exp(tab_lnk_[i]) << " " << tab_Cplus_[i] << " " << tab_Cminus_[i] << " " << tab_dtot_[i] << " " << tab_ttot_[i] << std::endl; + } + + gsl_spline_init(gsl_sp_Cplus_, &tab_lnk_[0], &tab_Cplus_[0], tab_lnk_.size()); + gsl_spline_init(gsl_sp_Cminus_, &tab_lnk_[0], &tab_Cminus_[0], tab_lnk_.size());*/ + //-------------------------------------------------------------------------- + + tf_distinct_ = true; + tf_withvel_ = true; + tf_withtotal0_ = true; } - inline double get_kmin(void) const { return std::exp(tab_lnk_[0])/h_; } - inline double get_kmax(void) const { return std::exp(tab_lnk_[tab_lnk_.size()-1])/h_; } + ~transfer_CLASS_plugin() + { + } + + inline double compute(double k, tf_type type) const + { + k *= h_; + + if (k < kmin_ || k > kmax_) + { + return 0.0; + } + + real_t val(0.0); + switch (type) + { + // values at ztarget: + case total: + val = delta_m_(k); break; + case cdm: + val = delta_c_(k); break; + case baryon: + val = delta_b_(k); break; + case vtotal: + val = theta_m_(k); break; + case vcdm: + val = theta_c_(k); break; + case vbaryon: + val = theta_b_(k); break; + + // values at zstart: + case total0: + val = delta_m0_(k); break; + case cdm0: + val = delta_c0_(k); break; + case baryon0: + val = delta_b0_(k); break; + case vtotal0: + val = theta_m0_(k); break; + case vcdm0: + val = theta_c0_(k); break; + case vbaryon0: + val = theta_b0_(k); break; + default: + throw std::runtime_error("Invalid type requested in transfer function evaluation"); + } + return val * tnorm_; + } + + inline double get_kmin(void) const { return kmin_ / h_; } + inline double get_kmax(void) const { return kmax_ / h_; } }; -namespace { +namespace +{ TransferFunction_plugin_creator_concrete creator("CLASS"); } diff --git a/src/plugins/transfer_eisenstein.cc b/src/plugins/transfer_eisenstein.cc index 9d4c032..adc9e06 100644 --- a/src/plugins/transfer_eisenstein.cc +++ b/src/plugins/transfer_eisenstein.cc @@ -207,13 +207,13 @@ public: \param Tcmb mean temperature of the CMB fluctuations (defaults to Tcmb = 2.726 if not specified) */ - transfer_eisenstein_plugin(ConfigFile &cf) + transfer_eisenstein_plugin(config_file &cf) : TransferFunction_plugin(cf) { - double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726); - double H0 = pcf_->GetValue("cosmology", "H0"); - double Omega_m = pcf_->GetValue("cosmology", "Omega_m"); - double Omega_b = pcf_->GetValue("cosmology", "Omega_b"); + double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726); + double H0 = pcf_->get_value("cosmology", "H0"); + double Omega_m = pcf_->get_value("cosmology", "Omega_m"); + double Omega_b = pcf_->get_value("cosmology", "Omega_b"); etf_.set_parameters(H0, Omega_m, Omega_b, Tcmb); @@ -257,15 +257,15 @@ protected: }; public: - transfer_eisenstein_wdm_plugin(ConfigFile &cf) + transfer_eisenstein_wdm_plugin(config_file &cf) : TransferFunction_plugin(cf) { - double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726); - omegam_ = pcf_->GetValue("cosmology", "Omega_m"); - omegab_ = pcf_->GetValue("cosmology", "Omega_b"); - H0_ = pcf_->GetValue("cosmology", "H0"); + double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726); + omegam_ = pcf_->get_value("cosmology", "Omega_m"); + omegab_ = pcf_->get_value("cosmology", "Omega_b"); + H0_ = pcf_->get_value("cosmology", "H0"); m_h0 = H0_ / 100.0; - wdmm_ = pcf_->GetValue("cosmology", "WDMmass"); + wdmm_ = pcf_->get_value("cosmology", "WDMmass"); etf_.set_parameters(H0_, omegam_, omegab_, Tcmb); @@ -273,7 +273,7 @@ public: typemap_.insert(std::pair("VIEL", wdm_viel)); // add the other types typemap_.insert(std::pair("BODE_WRONG", wdm_bode_wrong)); // add the other types - type_ = pcf_->GetValueSafe("cosmology", "WDMtftype", "BODE"); + type_ = pcf_->get_value_safe("cosmology", "WDMtftype", "BODE"); //type_ = std::string( toupper( type_.c_str() ) ); @@ -286,29 +286,29 @@ public: { //... parameterisation from Bode et al. (2001), ApJ, 556, 93 case wdm_bode: - wdmnu_ = pcf_->GetValueSafe("cosmology", "WDMnu", 1.0); - wdmgx_ = pcf_->GetValueSafe("cosmology", "WDMg_x", 1.5); + wdmnu_ = pcf_->get_value_safe("cosmology", "WDMnu", 1.0); + wdmgx_ = pcf_->get_value_safe("cosmology", "WDMg_x", 1.5); m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29); break; //... parameterisation from Viel et al. (2005), Phys Rev D, 71 case wdm_viel: - wdmnu_ = pcf_->GetValueSafe("cosmology", "WDMnu", 1.12); + wdmnu_ = pcf_->get_value_safe("cosmology", "WDMnu", 1.12); m_WDMalpha = 0.049 * pow(omegam_ / 0.25, 0.11) * pow(H0_ * 0.01 / 0.7, 1.22) * pow(wdmm_, -1.11); break; //.... below is for historical reasons due to the buggy parameterisation //.... in early versions of MUSIC, but apart from H instead of h, Bode et al. case wdm_bode_wrong: - wdmnu_ = pcf_->GetValueSafe("cosmology", "WDMnu", 1.0); - wdmgx_ = pcf_->GetValueSafe("cosmology", "WDMg_x", 1.5); + wdmnu_ = pcf_->get_value_safe("cosmology", "WDMnu", 1.0); + wdmgx_ = pcf_->get_value_safe("cosmology", "WDMg_x", 1.5); m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29); break; default: - wdmnu_ = pcf_->GetValueSafe("cosmology", "WDMnu", 1.0); - wdmgx_ = pcf_->GetValueSafe("cosmology", "WDMg_x", 1.5); + wdmnu_ = pcf_->get_value_safe("cosmology", "WDMnu", 1.0); + wdmgx_ = pcf_->get_value_safe("cosmology", "WDMg_x", 1.5); m_WDMalpha = 0.05 * pow(omegam_ / 0.4, 0.15) * pow(H0_ * 0.01 / 0.65, 1.3) * pow(wdmm_, -1.15) * pow(1.5 / wdmgx_, 0.29); break; } @@ -340,20 +340,20 @@ protected: eisenstein_transfer etf_; public: - transfer_eisenstein_cdmbino_plugin(ConfigFile &cf) + transfer_eisenstein_cdmbino_plugin(config_file &cf) : TransferFunction_plugin(cf) { - double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726); + double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726); - omegam_ = pcf_->GetValue("cosmology", "Omega_m"); - omegab_ = pcf_->GetValue("cosmology", "Omega_b"); - H0_ = pcf_->GetValue("cosmology", "H0"); + omegam_ = pcf_->get_value("cosmology", "Omega_m"); + omegab_ = pcf_->get_value("cosmology", "Omega_b"); + H0_ = pcf_->get_value("cosmology", "H0"); m_h0 = H0_ / 100.0; etf_.set_parameters(H0_, omegam_, omegab_, Tcmb); - mcdm_ = pcf_->GetValueSafe("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV - Tkd_ = pcf_->GetValueSafe("cosmology", "CDM_Tkd", 33.0); // temperature at which CDM particle kinetically decouples (in MeV) + mcdm_ = pcf_->get_value_safe("cosmology", "CDM_mass", 100.0); // bino particle mass in GeV + Tkd_ = pcf_->get_value_safe("cosmology", "CDM_Tkd", 33.0); // temperature at which CDM particle kinetically decouples (in MeV) kfs_ = 1.7e6 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.) / (1.0 + log(Tkd_ / 30.) / 19.2); kd_ = 3.8e7 / m_h0 * sqrt(mcdm_ / 100. * Tkd_ / 30.); @@ -395,19 +395,19 @@ protected: eisenstein_transfer etf_; public: - transfer_eisenstein_cutoff_plugin(ConfigFile &cf) + transfer_eisenstein_cutoff_plugin(config_file &cf) : TransferFunction_plugin(cf) { - double Tcmb = pcf_->GetValueSafe("cosmology", "Tcmb", 2.726); + double Tcmb = pcf_->get_value_safe("cosmology", "Tcmb", 2.726); - omegam_ = pcf_->GetValue("cosmology", "Omega_m"); - omegab_ = pcf_->GetValue("cosmology", "Omega_b"); - H0_ = pcf_->GetValue("cosmology", "H0"); + omegam_ = pcf_->get_value("cosmology", "Omega_m"); + omegab_ = pcf_->get_value("cosmology", "Omega_b"); + H0_ = pcf_->get_value("cosmology", "H0"); m_h0 = H0_ / 100.0; etf_.set_parameters(H0_, omegam_, omegab_, Tcmb); - Rcut_ = pcf_->GetValueSafe("cosmology", "Rcut", 1.0); + Rcut_ = pcf_->get_value_safe("cosmology", "Rcut", 1.0); } inline double compute(double k, tf_type type) const @@ -434,5 +434,5 @@ namespace TransferFunction_plugin_creator_concrete creator("eisenstein"); TransferFunction_plugin_creator_concrete creator2("eisenstein_wdm"); TransferFunction_plugin_creator_concrete creator3("eisenstein_cdmbino"); -TransferFunction_plugin_creator_concrete creator4("eisenstein_cutoff"); +// TransferFunction_plugin_creator_concrete creator4("eisenstein_cutoff"); } // namespace diff --git a/src/random_plugin.cc b/src/random_plugin.cc index 045978f..5121efa 100644 --- a/src/random_plugin.cc +++ b/src/random_plugin.cc @@ -13,32 +13,33 @@ void print_RNG_plugins() std::map &m = get_RNG_plugin_map(); std::map::iterator it; it = m.begin(); - csoca::ilog << "- Available random number generator plug-ins:" << std::endl; + music::ilog << "Available random number generator plug-ins:" << std::endl; while (it != m.end()) { if ((*it).second){ - csoca::ilog.Print("\t\'%s\'\n", (*it).first.c_str()); + music::ilog.Print("\t\'%s\'\n", (*it).first.c_str()); } ++it; } + music::ilog << std::endl; } -std::unique_ptr select_RNG_plugin(ConfigFile &cf) +std::unique_ptr select_RNG_plugin(config_file &cf) { - std::string rngname = cf.GetValueSafe("random", "generator", "MUSIC"); + std::string rngname = cf.get_value_safe("random", "generator", "MUSIC"); RNG_plugin_creator *the_RNG_plugin_creator = get_RNG_plugin_map()[rngname]; if (!the_RNG_plugin_creator) { - csoca::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str()); + music::ilog.Print("Invalid/Unregistered random number generator plug-in encountered : %s", rngname.c_str()); print_RNG_plugins(); throw std::runtime_error("Unknown random number generator plug-in"); } else { - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << std::setw(32) << std::left << "Random number generator plugin" << " : " << rngname << std::endl; } return std::move(the_RNG_plugin_creator->Create(cf)); diff --git a/src/testing.cc b/src/testing.cc index bfd088d..8e88e17 100644 --- a/src/testing.cc +++ b/src/testing.cc @@ -9,7 +9,7 @@ namespace testing { void output_potentials_and_densities( - ConfigFile &the_config, + config_file &the_config, size_t ngrid, real_t boxlen, Grid_FFT &phi, Grid_FFT &phi2, @@ -17,8 +17,8 @@ void output_potentials_and_densities( Grid_FFT &phi3b, std::array *, 3> &A3) { - const std::string fname_hdf5 = the_config.GetValueSafe("output", "fname_hdf5", "output.hdf5"); - const std::string fname_analysis = the_config.GetValueSafe("output", "fbase_analysis", "output"); + const std::string fname_hdf5 = the_config.get_value_safe("output", "fname_hdf5", "output.hdf5"); + const std::string fname_analysis = the_config.get_value_safe("output", "fbase_analysis", "output"); Grid_FFT delta({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT delta2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); @@ -98,7 +98,7 @@ void output_potentials_and_densities( } void output_velocity_displacement_symmetries( - ConfigFile &the_config, + config_file &the_config, size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, @@ -107,8 +107,8 @@ void output_velocity_displacement_symmetries( std::array *, 3> &A3, bool bwrite_out_fields) { - const std::string fname_hdf5 = the_config.GetValueSafe("output", "fname_hdf5", "output.hdf5"); - const std::string fname_analysis = the_config.GetValueSafe("output", "fbase_analysis", "output"); + const std::string fname_hdf5 = the_config.get_value_safe("output", "fname_hdf5", "output.hdf5"); + const std::string fname_analysis = the_config.get_value_safe("output", "fbase_analysis", "output"); real_t vfac1 = vfac; real_t vfac2 = 2 * vfac; @@ -232,7 +232,7 @@ void output_velocity_displacement_symmetries( } - csoca::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n" + music::ilog << "std. deviation of invariant : ( D+ | I_xy | I_yz | I_zx ) \n" << std::setw(16) << dplus << " " << std::setw(16) << Icomp[0] << " " << std::setw(16) << Icomp[1] << " " @@ -241,7 +241,8 @@ void output_velocity_displacement_symmetries( } void output_convergence( - ConfigFile &the_config, + config_file &the_config, + cosmology::calculator* the_cosmo_calc, std::size_t ngrid, real_t boxlen, real_t vfac, real_t dplus, Grid_FFT &phi, Grid_FFT &phi2, @@ -249,7 +250,6 @@ void output_convergence( Grid_FFT &phi3b, std::array *, 3> &A3) { - // scale all potentials to remove dplus0 phi /= dplus; phi2 /= dplus * dplus; @@ -259,11 +259,95 @@ void output_convergence( (*A3[1]) /= dplus * dplus * dplus; (*A3[2]) /= dplus * dplus * dplus; + ////////////////////// theoretical convergence radius ////////////////////// + + // compute phi_code + Grid_FFT phi_code({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + phi_code.FourierTransformForward(false); + #pragma omp parallel for //collapse(3) + for (std::size_t i = 0; i < phi_code.size(0); ++i) { + for (std::size_t j = 0; j < phi_code.size(1); ++j) { + for (std::size_t k = 0; k < phi_code.size(2); ++k) { + std::size_t idx = phi_code.get_idx(i, j, k); + phi_code.kelem(idx) = -phi.kelem(idx); + } + } + } + + // initialize norm to 0 + Grid_FFT nabla_vini_norm({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + #pragma omp parallel for //collapse(3) + for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { + for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) { + for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) { + std::size_t idx = nabla_vini_norm.get_idx(i, j, k); + nabla_vini_norm.relem(idx) = 0.0; + } + } + } + + Grid_FFT nabla_vini_mn({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + for(std::size_t m = 0; m < 3; m++) { + for(std::size_t n = m; n < 3; n++) { + nabla_vini_mn.FourierTransformForward(false); + #pragma omp parallel for //collapse(3) + for (std::size_t i = 0; i < phi_code.size(0); ++i) { + for (std::size_t j = 0; j < phi_code.size(1); ++j) { + for (std::size_t k = 0; k < phi_code.size(2); ++k) { + std::size_t idx = phi_code.get_idx(i, j, k); + auto kk = phi_code.get_k(i, j, k); + nabla_vini_mn.kelem(idx) = phi_code.kelem(idx) * (kk[m] * kk[n]); + } + } + } + nabla_vini_mn.FourierTransformBackward(); + nabla_vini_mn *= (3.2144004915 / the_cosmo_calc->get_growth_factor(1.0)); + // sum of squares + #pragma omp parallel for //collapse(3) + for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { + for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) { + for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) { + std::size_t idx = nabla_vini_norm.get_idx(i, j, k); + if(m != n) { + nabla_vini_norm.relem(idx) += (2.0 * nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx)); + } else { + nabla_vini_norm.relem(idx) += (nabla_vini_mn.relem(idx) * nabla_vini_mn.relem(idx)); + } + } + } + } + } + } + // square root + #pragma omp parallel for //collapse(3) + for (std::size_t i = 0; i < nabla_vini_norm.size(0); ++i) { + for (std::size_t j = 0; j < nabla_vini_norm.size(1); ++j) { + for (std::size_t k = 0; k < nabla_vini_norm.size(2); ++k) { + std::size_t idx = nabla_vini_norm.get_idx(i, j, k); + nabla_vini_norm.relem(idx) = std::sqrt(nabla_vini_norm.relem(idx)); + } + } + } + + // get t_eds + Grid_FFT t_eds({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); + #pragma omp parallel for //collapse(3) + for (std::size_t i = 0; i < t_eds.size(0); ++i) { + for (std::size_t j = 0; j < t_eds.size(1); ++j) { + for (std::size_t k = 0; k < t_eds.size(2); ++k) { + std::size_t idx = t_eds.get_idx(i, j, k); + t_eds.relem(idx) = 0.0204 / nabla_vini_norm.relem(idx); + } + } + } + + ////////////////////////// 3lpt convergence test /////////////////////////// + // initialize grids to 0 Grid_FFT psi_1({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT psi_2({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); Grid_FFT psi_3({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); -#pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < psi_1.size(0); ++i) { for (std::size_t j = 0; j < psi_1.size(1); ++j) { for (std::size_t k = 0; k < psi_1.size(2); ++k) { @@ -290,7 +374,7 @@ void output_convergence( psi_2_tmp.FourierTransformForward(false); psi_3_tmp.FourierTransformForward(false); -#pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < phi.size(0); ++i) { for (std::size_t j = 0; j < phi.size(1); ++j) { for (std::size_t k = 0; k < phi.size(2); ++k) { @@ -311,7 +395,7 @@ void output_convergence( psi_3_tmp.FourierTransformBackward(); // sum of squares -#pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < psi_1.size(0); ++i) { for (std::size_t j = 0; j < psi_1.size(1); ++j) { for (std::size_t k = 0; k < psi_1.size(2); ++k) { @@ -325,7 +409,7 @@ void output_convergence( } // loop on dimensions // apply square root for the L2 norm -#pragma omp parallel for collapse(3) +#pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < psi_1.size(0); ++i) { for (std::size_t j = 0; j < psi_1.size(1); ++j) { for (std::size_t k = 0; k < psi_1.size(2); ++k) { @@ -339,7 +423,7 @@ void output_convergence( // convergence radius Grid_FFT inv_convergence_radius({ngrid, ngrid, ngrid}, {boxlen, boxlen, boxlen}); -#pragma omp parallel for collapse(3) + #pragma omp parallel for //collapse(3) for (std::size_t i = 0; i < psi_1.size(0); ++i) { for (std::size_t j = 0; j < psi_1.size(1); ++j) { for (std::size_t k = 0; k < psi_1.size(2); ++k) { @@ -351,13 +435,17 @@ void output_convergence( } } - // write results - unlink("convergence_test.hdf5"); - inv_convergence_radius.Write_to_HDF5("convergence_test.hdf5", "inv_convergence_radius"); - psi_1.Write_to_HDF5("convergence_test.hdf5", "psi_1_norm"); - psi_2.Write_to_HDF5("convergence_test.hdf5", "psi_2_norm"); - psi_3.Write_to_HDF5("convergence_test.hdf5", "psi_3_norm"); - + ////////////////////////////// write results /////////////////////////////// + std::string convergence_test_filename("convergence_test.hdf5"); + unlink(convergence_test_filename.c_str()); +#if defined(USE_MPI) + MPI_Barrier(MPI_COMM_WORLD); +#endif + t_eds.Write_to_HDF5(convergence_test_filename, "t_eds"); + inv_convergence_radius.Write_to_HDF5(convergence_test_filename, "inv_convergence_radius"); + // psi_1.Write_to_HDF5(convergence_test_filename, "psi_1_norm"); + // psi_2.Write_to_HDF5(convergence_test_filename, "psi_2_norm"); + // psi_3.Write_to_HDF5(convergence_test_filename, "psi_3_norm"); } } // namespace testing diff --git a/src/transfer_function_plugin.cc b/src/transfer_function_plugin.cc index e9d3748..5b2ec9e 100644 --- a/src/transfer_function_plugin.cc +++ b/src/transfer_function_plugin.cc @@ -13,31 +13,32 @@ void print_TransferFunction_plugins() std::map &m = get_TransferFunction_plugin_map(); std::map::iterator it; it = m.begin(); - csoca::ilog << "Available transfer function plug-ins:" << std::endl; + music::ilog << "Available transfer function plug-ins:" << std::endl; while (it != m.end()) { if ((*it).second) - csoca::ilog << "\t\'" << (*it).first << "\'" << std::endl; + music::ilog << "\t\'" << (*it).first << "\'" << std::endl; ++it; } + music::ilog << std::endl; } -std::unique_ptr select_TransferFunction_plugin(ConfigFile &cf) +std::unique_ptr select_TransferFunction_plugin(config_file &cf) { - std::string tfname = cf.GetValue("cosmology", "transfer"); + std::string tfname = cf.get_value("cosmology", "transfer"); TransferFunction_plugin_creator *the_TransferFunction_plugin_creator = get_TransferFunction_plugin_map()[tfname]; if (!the_TransferFunction_plugin_creator) { - csoca::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl; + music::elog << "Invalid/Unregistered transfer function plug-in encountered : " << tfname << std::endl; print_TransferFunction_plugins(); throw std::runtime_error("Unknown transfer function plug-in"); } else { - csoca::ilog << "-------------------------------------------------------------------------------" << std::endl; - csoca::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl; + music::ilog << "-------------------------------------------------------------------------------" << std::endl; + music::ilog << std::setw(32) << std::left << "Transfer function plugin" << " : " << tfname << std::endl; } return std::move(the_TransferFunction_plugin_creator->create(cf));