diff --git a/CMakeLists.txt b/CMakeLists.txt
index 10485a1..bd95567 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,21 @@
-cmake_minimum_required(VERSION 3.9)
+# This file is part of MUSIC2
+# A software package to generate ICs for cosmological simulations
+# Copyright (C) 2023 by Oliver Hahn
+#
+# monofonIC is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# monofonIC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+cmake_minimum_required(VERSION 3.11)
set(PRGNAME MUSIC)
project(MUSIC)
@@ -27,12 +44,10 @@ mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF)
mark_as_advanced(CMAKE_C_FLAGS_DEBUGSANADD CMAKE_C_FLAGS_DEBUGSANUNDEF)
mark_as_advanced(CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT)
-# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic -DCMAKE_BUILD")
find_package(PkgConfig REQUIRED)
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}")
-option(MUSIC_ENABLE_SINGLE_PRECISION "Enable Single Precision Mode" OFF)
########################################################################################################################
# OpenMP
@@ -52,12 +67,44 @@ find_package(Threads REQUIRED)
if(POLICY CMP0074)
cmake_policy(SET CMP0074 NEW)
endif()
-find_package(FFTW3 COMPONENTS SINGLE DOUBLE THREADS)
+if(ENABLE_MPI)
+ find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS MPI)
+else()
+ find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS)
+endif(ENABLE_MPI)
+mark_as_advanced(FFTW3_SINGLE_MPI_LIBRARY FFTW3_SINGLE_OPENMP_LIBRARY FFTW3_SINGLE_SERIAL_LIBRARY FFTW3_SINGLE_THREADS_LIBRARY)
+mark_as_advanced(FFTW3_DOUBLE_MPI_LIBRARY FFTW3_DOUBLE_OPENMP_LIBRARY FFTW3_DOUBLE_SERIAL_LIBRARY FFTW3_DOUBLE_THREADS_LIBRARY)
+mark_as_advanced(FFTW3_LONGDOUBLE_MPI_LIBRARY FFTW3_LONGDOUBLE_OPENMP_LIBRARY FFTW3_LONGDOUBLE_SERIAL_LIBRARY FFTW3_LONGDOUBLE_THREADS_LIBRARY)
+mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_MPI_INCLUDE_DIR)
+mark_as_advanced(pkgcfg_lib_PC_FFTW_fftw3)
########################################################################################################################
# TIRPC, needed only for Tipsy format
find_package(TIRPC)
+########################################################################################################################
+# GSL
+find_package(GSL REQUIRED)
+mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m)
+
+########################################################################################################################
+# HDF5
+find_package(HDF5)
+if( HDF5_FOUND )
+ mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz)
+endif()
+
+########################################################################################################################
+# floating point precision
+set (
+ CODE_PRECISION "DOUBLE"
+ CACHE STRING "Floating point type used for internal computations and FFTs"
+)
+set_property (
+ CACHE CODE_PRECISION
+ PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE
+)
+
########################################################################################################################
# Add a custom command that produces version.cc, plus
# a dummy output that's not actually produced, in order
@@ -68,16 +115,6 @@ ADD_CUSTOM_COMMAND(
COMMAND ${CMAKE_COMMAND} -P
${CMAKE_CURRENT_SOURCE_DIR}/version.cmake)
-
-
-########################################################################################################################
-# GSL
-find_package(GSL REQUIRED)
-
-########################################################################################################################
-# HDF5
-find_package(HDF5)
-
########################################################################################################################
# INCLUDES
include_directories(${PROJECT_SOURCE_DIR}/src)
@@ -112,40 +149,38 @@ list (APPEND SOURCES
# target_include_directories(${PRGNAME} PRIVATE ${PROJECT_SOURCE_DIR}/external/panphasia_ho)
endif(ENABLE_PANPHASIA)
+# project configuration header
+configure_file(
+ ${PROJECT_SOURCE_DIR}/src/cmake_config.hh.in
+ ${PROJECT_SOURCE_DIR}/src/cmake_config.hh
+)
+
add_executable(${PRGNAME} ${SOURCES} ${PLUGINS})
-set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 11)
+set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14)
-if(FFTW3_FOUND)
- target_compile_options(${PRGNAME} PRIVATE "-DFFTW3")
-
- if( MUSIC_ENABLE_SINGLE_PRECISION )
- target_compile_options(${PRGNAME} PRIVATE "-DSINGLE_PRECISION")
- if (FFTW3_SINGLE_THREADS_FOUND)
- target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_THREADS_LIBRARY})
- target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
- elseif(FFTW3_SINGLE_SERIAL_FOUND)
- target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_SERIAL_LIBRARY})
- message( WARNING "using serial version of FFTW3 -- this will most likely cause a very slow version of MUSIC. Rec: install FFTW3 with thread support")
- else()
- message( FATAL "chose compilation in single precision, but FFTW3 not found for single precision")
- endif()
- else(MUSIC_ENABLE_SINGLE_PRECISION)
- if (FFTW3_DOUBLE_THREADS_FOUND)
- target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_THREADS_LIBRARY})
- target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS")
- elseif(FFTW3_DOUBLE_SERIAL_FOUND)
- target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_SERIAL_LIBRARY})
- message( WARNING "using serial version of FFTW3 -- this will most likely cause a very slow version of MUSIC. Rec: install FFTW3 with thread support")
- else()
- message( FATAL "chose compilation in double precision, but FFTW3 not found for double precision")
- endif()
- endif(MUSIC_ENABLE_SINGLE_PRECISION)
-endif(FFTW3_FOUND)
+if(CODE_PRECISION STREQUAL "FLOAT")
+ if(FFTW3_SINGLE_THREADS_FOUND)
+ target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_SINGLE_THREADS)
+ target_compile_definitions(${PRGNAME} PRIVATE "USE_FFTW_THREADS")
+ endif()
+ target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_SINGLE_SERIAL)
+elseif(CODE_PRECISION STREQUAL "DOUBLE")
+ if(FFTW3_DOUBLE_THREADS_FOUND)
+ target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_DOUBLE_THREADS)
+ target_compile_definitions(${PRGNAME} PRIVATE "USE_FFTW_THREADS")
+ endif()
+ target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_DOUBLE_SERIAL)
+elseif(CODE_PRECISION STREQUAL "LONGDOUBLE")
+ if(FFTW3_LONGDOUBLE_THREADS_FOUND)
+ target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_LONGDOUBLE_THREADS)
+ target_compile_definitions(${PRGNAME} PRIVATE "USE_FFTW_THREADS")
+ endif()
+ target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_LONGDOUBLE_SERIAL)
+endif()
if(HDF5_FOUND)
- # target_link_libraries(${PRGNAME} ${HDF5_C_LIBRARY_DIRS})
- target_link_libraries(${PRGNAME} ${HDF5_LIBRARIES})
+ target_link_libraries(${PRGNAME} PRIVATE ${HDF5_LIBRARIES})
target_include_directories(${PRGNAME} PRIVATE ${HDF5_INCLUDE_DIRS})
target_compile_options(${PRGNAME} PRIVATE "-DHAVE_HDF5")
target_compile_options(${PRGNAME} PRIVATE "-DH5_USE_16_API")
@@ -161,11 +196,5 @@ if(ENABLE_PANPHASIA)
target_compile_options(${PRGNAME} PRIVATE "-DHAVE_PANPHASIA")
endif(ENABLE_PANPHASIA)
-target_link_libraries(${PRGNAME} ${FFTW3_LIBRARIES})
-target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIRS})
+target_link_libraries(${PRGNAME} PRIVATE GSL::gsl)
-target_link_libraries(${PRGNAME} ${GSL_LIBRARIES})
-target_include_directories(${PRGNAME} PRIVATE ${GSL_INCLUDE_DIR})
-
-target_link_libraries(${PRGNAME} ${HDF5_LIBRARIES})
-target_include_directories(${PRGNAME} PRIVATE ${HDF5_INCLUDE_DIR})
diff --git a/FindFFTW3.cmake b/FindFFTW3.cmake
index 0c65570..b69d975 100644
--- a/FindFFTW3.cmake
+++ b/FindFFTW3.cmake
@@ -230,3 +230,14 @@ find_package_handle_standard_args(FFTW3
VERSION_VAR FFTW3_VERSION_STRING
HANDLE_COMPONENTS
)
+
+if(FFTW3_FOUND)
+ foreach(component ${FFTW3_FIND_COMPONENTS})
+ if(NOT TARGET FFTW3::FFTW3_${component})
+ add_library(FFTW3::FFTW3_${component} UNKNOWN IMPORTED)
+ set_target_properties(FFTW3::FFTW3_${component} PROPERTIES
+ IMPORTED_LOCATION "${FFTW3_${component}_LIBRARY}"
+ INTERFACE_INCLUDE_DIRECTORIES ${FFTW3_INCLUDE_DIR})
+ endif()
+ endforeach()
+endif()
diff --git a/src/TransferFunction.hh b/src/TransferFunction.hh
index 1cce3fb..919ee86 100644
--- a/src/TransferFunction.hh
+++ b/src/TransferFunction.hh
@@ -130,7 +130,7 @@ protected:
double fftnorm = 1.0/N;
- fftw_complex in[N], out[N];
+ complex_t in[N], out[N];
fftw_plan p,ip;
//... perform anti-ringing correction from Hamilton (2000)
diff --git a/src/cmake_config.hh.in b/src/cmake_config.hh.in
new file mode 100644
index 0000000..5162967
--- /dev/null
+++ b/src/cmake_config.hh.in
@@ -0,0 +1,25 @@
+#pragma once
+
+
+#define USE_PRECISION_${CODE_PRECISION}
+
+#ifdef __cplusplus
+constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}";
+#if defined(USE_PRECISION_FLOAT)
+ constexpr char CMAKE_PRECISION_STR[] = "single";
+#elif defined(USE_PRECISION_DOUBLE)
+ constexpr char CMAKE_PRECISION_STR[] = "double";
+#elif defined(USE_PRECISION_LONGDOUBLE)
+ constexpr char CMAKE_PRECISION_STR[] = "long double";
+#endif
+
+// These variables are autogenerated and compiled
+// into the library by the version.cmake script. do not touch!
+extern "C"
+{
+ extern const char *GIT_TAG;
+ extern const char *GIT_REV;
+ extern const char *GIT_BRANCH;
+
+}
+#endif // __cplusplus
\ No newline at end of file
diff --git a/src/constraints.cc b/src/constraints.cc
index 479e282..a07d4d3 100644
--- a/src/constraints.cc
+++ b/src/constraints.cc
@@ -267,7 +267,7 @@ constraint_set::constraint_set( config_file& cf, transfer_function *ptf )
}
-void constraint_set::wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector& g0, matrix& cinv, fftw_complex* cw )
+void constraint_set::wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector& g0, matrix& cinv, complex_t* cw )
{
double lsub = nx*dx;
double dk = 2.0*M_PI/lsub, d3k=dk*dk*dk;
@@ -374,7 +374,7 @@ void constraint_set::wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t
-void constraint_set::wnoise_constr_corr( double dx, fftw_complex* cw, size_t nx, size_t ny, size_t nz, std::vector& g0 )
+void constraint_set::wnoise_constr_corr( double dx, complex_t* cw, size_t nx, size_t ny, size_t nz, std::vector& g0 )
{
size_t nconstr = cset_.size();
size_t nzp=nz/2+1;
diff --git a/src/constraints.hh b/src/constraints.hh
index a5029f3..66f2871 100644
--- a/src/constraints.hh
+++ b/src/constraints.hh
@@ -1,118 +1,121 @@
/*
-
- constraints.hh - This file is part of MUSIC -
- a code to generate multi-scale initial conditions
- for cosmological simulations
-
- Copyright (C) 2010 Oliver Hahn
-
- */
-#ifndef __CONSTRAINTS_HH
-#define __CONSTRAINTS_HH
+ constraints.hh - This file is part of MUSIC -
+ a code to generate multi-scale initial conditions
+ for cosmological simulations
+
+ Copyright (C) 2010 Oliver Hahn
+
+ */
+#pragma once
#include
#include
#include
-#include "general.hh"
-#include "config_file.hh"
-#include "transfer_function.hh"
-#include "cosmology.hh"
-
+#include
+#include
+#include
+#include
//! matrix class serving as a gsl wrapper
class matrix
{
protected:
- gsl_matrix * m_;
- //double *data_;
+ gsl_matrix *m_;
+ // double *data_;
size_t M_, N_;
-
+
public:
- matrix( size_t M, size_t N )
- : M_(M), N_(N)
+ matrix(size_t M, size_t N)
+ : M_(M), N_(N)
{
- m_ = gsl_matrix_alloc(M_,N_);
+ m_ = gsl_matrix_alloc(M_, N_);
}
-
- matrix( size_t N )
- : M_(N), N_(N)
+
+ matrix(size_t N)
+ : M_(N), N_(N)
{
- m_ = gsl_matrix_alloc(M_,N_);
+ m_ = gsl_matrix_alloc(M_, N_);
}
-
- matrix( const matrix& o )
+
+ matrix(const matrix &o)
{
M_ = o.M_;
N_ = o.N_;
- m_ = gsl_matrix_alloc(M_,N_);
- gsl_matrix_memcpy(m_, o.m_ );
+ m_ = gsl_matrix_alloc(M_, N_);
+ gsl_matrix_memcpy(m_, o.m_);
}
-
+
~matrix()
{
- gsl_matrix_free( m_ );
+ gsl_matrix_free(m_);
}
-
- double& operator()( size_t i, size_t j )
- { return *gsl_matrix_ptr( m_, i, j ); }
-
- const double& operator()( size_t i, size_t j ) const
- { return *gsl_matrix_const_ptr( m_, i, j ); }
-
- matrix& operator=( const matrix& o )
+
+ double &operator()(size_t i, size_t j)
{
- gsl_matrix_free( m_ );
-
+ return *gsl_matrix_ptr(m_, i, j);
+ }
+
+ const double &operator()(size_t i, size_t j) const
+ {
+ return *gsl_matrix_const_ptr(m_, i, j);
+ }
+
+ matrix &operator=(const matrix &o)
+ {
+ gsl_matrix_free(m_);
+
M_ = o.M_;
N_ = o.N_;
- m_ = gsl_matrix_alloc(M_,N_);
- gsl_matrix_memcpy(m_, o.m_ );
+ m_ = gsl_matrix_alloc(M_, N_);
+ gsl_matrix_memcpy(m_, o.m_);
return *this;
}
-
-
- matrix& invert()
+
+ matrix &invert()
{
- if( M_!=N_ )
+ if (M_ != N_)
throw std::runtime_error("Attempt to invert a non-square matrix!");
-
+
int s;
- gsl_matrix* im = gsl_matrix_alloc(M_,N_);
-
- gsl_permutation * p = gsl_permutation_alloc (M_);
- gsl_linalg_LU_decomp( m_, p, &s );
- gsl_linalg_LU_invert( m_, p, im );
-
+ gsl_matrix *im = gsl_matrix_alloc(M_, N_);
+
+ gsl_permutation *p = gsl_permutation_alloc(M_);
+ gsl_linalg_LU_decomp(m_, p, &s);
+ gsl_linalg_LU_invert(m_, p, im);
+
gsl_matrix_memcpy(m_, im);
-
+
gsl_permutation_free(p);
gsl_matrix_free(im);
return *this;
}
};
-
//! class to impose constraints on the white noise field (van de Weygaert & Bertschinger 1996)
class constraint_set
{
-
+
public:
- enum constr_type{ halo, peak };
-
+ enum constr_type
+ {
+ halo,
+ peak
+ };
+
protected:
-
- struct constraint{
+ struct constraint
+ {
constr_type type;
- double x,y,z;
- double gx,gy,gz;
+ double x, y, z;
+ double gx, gy, gz;
double Rg, Rg2;
double gRg, gRg2;
double sigma;
};
-
+
config_file *pcf_;
std::vector cset_;
transfer_function *ptf_;
@@ -120,303 +123,185 @@ protected:
Cosmology *pcosmo_;
double dplus0_;
unsigned constr_level_;
-
-
- inline std::complex eval_constr( size_t icon, double kx, double ky, double kz )
+
+ inline std::complex eval_constr(size_t icon, double kx, double ky, double kz)
{
double re, im, kdotx, k2;
-
- kdotx = cset_[icon].gx*kx+cset_[icon].gy*ky+cset_[icon].gz*kz;
- k2 = kx*kx+ky*ky+kz*kz;
-
- re = im = exp(-k2*cset_[icon].gRg2/2.0);
- re *= cos( kdotx );
- im *= sin( kdotx );
-
- return std::complex(re,im);
+
+ kdotx = cset_[icon].gx * kx + cset_[icon].gy * ky + cset_[icon].gz * kz;
+ k2 = kx * kx + ky * ky + kz * kz;
+
+ re = im = exp(-k2 * cset_[icon].gRg2 / 2.0);
+ re *= cos(kdotx);
+ im *= sin(kdotx);
+
+ return std::complex(re, im);
}
-
-
-#if defined(FFTW3) && defined(SINGLE_PRECISION)
-
//! apply constraints to the white noise
- void wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector& g0, matrix& cinv, fftwf_complex* cw );
-
+ void wnoise_constr_corr(double dx, size_t nx, size_t ny, size_t nz, std::vector &g0, matrix &cinv, complex_t *cw);
+
//! measure sigma for each constraint in the unconstrained noise
- void wnoise_constr_corr( double dx, fftwf_complex* cw, size_t nx, size_t ny, size_t nz, std::vector& g0 );
-
-#else
- //! apply constraints to the white noise
- void wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector& g0, matrix& cinv, fftw_complex* cw );
-
- //! measure sigma for each constraint in the unconstrained noise
- void wnoise_constr_corr( double dx, fftw_complex* cw, size_t nx, size_t ny, size_t nz, std::vector& g0 );
-
-#endif
-
+ void wnoise_constr_corr(double dx, complex_t *cw, size_t nx, size_t ny, size_t nz, std::vector &g0);
+
//! compute the covariance between the constraints
- void icov_constr( double dx, size_t nx, size_t ny, size_t nz, matrix& cij );
-
-
+ void icov_constr(double dx, size_t nx, size_t ny, size_t nz, matrix &cij);
+
public:
-
-
- //! constructor
- constraint_set( config_file& cf, transfer_function *ptf );
-
+ //! constructor
+ constraint_set(config_file &cf, transfer_function *ptf);
+
//! destructor
~constraint_set()
{
delete pccalc_;
delete pcosmo_;
}
-
-
- template< typename rng >
- void apply( unsigned ilevel, int x0[], int lx[], rng* wnoise )
+
+ template
+ void apply(unsigned ilevel, int x0[], int lx[], rng *wnoise)
{
- if( cset_.size() == 0 || constr_level_ != ilevel )
+ if (cset_.size() == 0 || constr_level_ != ilevel)
return;
-
- unsigned nlvl = 1<get_value("setup","boxlength");
-
+
+ unsigned nlvl = 1 << ilevel;
+ double boxlength = pcf_->get_value("setup", "boxlength");
+
//... compute constraint coordinates for grid
- for( size_t i=0; i 0.5*lx[0])
- music::wlog.Print("Constraint %d appears to be too large scale",i);
- }
-
-
- std::vector g0;
-
-// unsigned levelmax = pcf_->get_value("setup","levelmax");
- unsigned levelmin = pcf_->get_value("setup","levelmin_TF");
-
- bool bperiodic = ilevel==levelmin;
- double dx = pcf_->get_value("setup","boxlength")/(1< 0.5 * lx[0])
+ music::wlog.Print("Constraint %d appears to be too large scale", i);
+ }
+
+ std::vector g0;
+
+ // unsigned levelmax = pcf_->get_value("setup","levelmax");
+ unsigned levelmin = pcf_->get_value("setup", "levelmin_TF");
+
+ bool bperiodic = ilevel == levelmin;
+ double dx = pcf_->get_value("setup", "boxlength") / (1 << ilevel);
+
+ music::ilog.Print("Computing constrained realization...");
+
+ if (bperiodic)
{
//... we are operating on the periodic coarse grid
- size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz+2;
- fftw_real * w = new fftw_real[nx*ny*nzp];
-
-
-#ifdef FFTW3
- #ifdef SINGLE_PRECISION
- fftwf_complex * cw = reinterpret_cast (w);
- fftwf_plan p = fftwf_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE),
- ip = fftwf_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE);
- #else
- fftw_complex * cw = reinterpret_cast (w);
- fftw_plan p = fftw_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE),
- ip = fftw_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE);
- #endif
-#else
- fftw_complex * cw = reinterpret_cast (w);
- rfftwnd_plan p = rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
- ip = rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
-#endif
-
- double fftnorm = 1.0/sqrt(nx*ny*nz);
-
- #pragma omp parallel for
- for( int i=0; i<(int)nx; i++ )
- for( int j=0; j<(int)ny; j++ )
- for( int k=0; k<(int)nz; k++ )
+ size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz + 2;
+ real_t *w = new real_t[nx * ny * nzp];
+
+ complex_t *cw = reinterpret_cast(w);
+ fftw_plan_t p = FFTW_API(plan_dft_r2c_3d)(nx, ny, nz, w, cw, FFTW_ESTIMATE),
+ ip = FFTW_API(plan_dft_c2r_3d)(nx, ny, nz, cw, w, FFTW_ESTIMATE);
+
+ double fftnorm = 1.0 / sqrt(nx * ny * nz);
+
+#pragma omp parallel for
+ for (int i = 0; i < (int)nx; i++)
+ for (int j = 0; j < (int)ny; j++)
+ for (int k = 0; k < (int)nz; k++)
{
- size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k;
- w[q] = (*wnoise)((x0[0]+i)%nx,(x0[1]+j)%ny,(x0[2]+k)%nz)*fftnorm;
+ size_t q = ((size_t)i * ny + (size_t)j) * nzp + (size_t)k;
+ w[q] = (*wnoise)((x0[0] + i) % nx, (x0[1] + j) % ny, (x0[2] + k) % nz) * fftnorm;
}
-
-#ifdef FFTW3
- #ifdef SINGLE_PRECISION
- fftwf_execute( p );
- #else
- fftw_execute( p );
- #endif
-#else
-#ifndef SINGLETHREAD_FFTW
- rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), p, w, NULL );
-#else
- rfftwnd_one_real_to_complex( p, w, NULL );
-#endif
-#endif
- wnoise_constr_corr( dx, cw, nx, ny, nz, g0 );
-
- matrix c(2,2);
- icov_constr( dx, nx, ny, nz, c );
-
-
- wnoise_constr_corr( dx, nx, ny, nz, g0, c, cw );
-
-#ifdef FFTW3
- #ifdef SINGLE_PRECISION
- fftwf_execute( ip );
- #else
- fftw_execute( ip );
- #endif
-#else
-#ifndef SINGLETHREAD_FFTW
- rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), ip, cw, NULL );
-#else
- rfftwnd_one_complex_to_real( ip, cw, NULL );
-#endif
-#endif
-
- #pragma omp parallel for
- for( int i=0; i<(int)nx; i++ )
- for( int j=0; j<(int)ny; j++ )
- for( int k=0; k<(int)nz; k++ )
+
+ FFTW_API(execute)(p);
+ wnoise_constr_corr(dx, cw, nx, ny, nz, g0);
+
+ matrix c(2, 2);
+ icov_constr(dx, nx, ny, nz, c);
+
+ wnoise_constr_corr(dx, nx, ny, nz, g0, c, cw);
+
+ FFTW_API(execute)(ip);
+
+#pragma omp parallel for
+ for (int i = 0; i < (int)nx; i++)
+ for (int j = 0; j < (int)ny; j++)
+ for (int k = 0; k < (int)nz; k++)
{
- size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k;
- (*wnoise)((x0[0]+i),(x0[1]+j),(x0[2]+k)) = w[q]*fftnorm;
+ size_t q = ((size_t)i * ny + (size_t)j) * nzp + (size_t)k;
+ (*wnoise)((x0[0] + i), (x0[1] + j), (x0[2] + k)) = w[q] * fftnorm;
}
-
- music::ilog.Print("Applied constraints to level %d.",ilevel);
-
-
+
+ music::ilog.Print("Applied constraints to level %d.", ilevel);
+
delete[] w;
-
-
-#ifdef FFTW3
- #ifdef SINGLE_PRECISION
- fftwf_destroy_plan(p);
- #else
- fftw_destroy_plan(p);
- #endif
-#else
- fftwnd_destroy_plan(p);
-#endif
- }else{
-
+
+ FFTW_API(destroy_plan)(p);
+ FFTW_API(destroy_plan)(ip);
+ }
+ else
+ {
+
//... we are operating on a refinement grid, not necessarily the finest
-
- size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz+2;
- fftw_real * w = new fftw_real[nx*ny*nzp];
-
-
-#ifdef FFTW3
- #ifdef SINGLE_PRECISION
- fftwf_complex * cw = reinterpret_cast (w);
- fftwf_plan p = fftwf_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE),
- ip = fftwf_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE);
- #else
- fftw_complex * cw = reinterpret_cast (w);
- fftw_plan p = fftw_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE),
- ip = fftw_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE);
- #endif
-#else
- fftw_complex * cw = reinterpret_cast (w);
- rfftwnd_plan p = rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
- ip = rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
-#endif
-
- double fftnorm = 1.0/sqrt(nx*ny*nz);
-
- int il = nx/4, ir = 3*nx/4, jl=ny/4, jr = 3*ny/4, kl = nz/4, kr = 3*nz/4;
-
- #pragma omp parallel for
- for( int i=0; i<(int)nx; i++ )
- for( int j=0; j<(int)ny; j++ )
- for( int k=0; k<(int)nz; k++ )
+
+ size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz + 2;
+ real_t *w = new real_t[nx * ny * nzp];
+
+ complex_t *cw = reinterpret_cast(w);
+ fftw_plan_t p = FFTW_API(plan_dft_r2c_3d)(nx, ny, nz, w, cw, FFTW_ESTIMATE),
+ ip = FFTW_API(plan_dft_c2r_3d)(nx, ny, nz, cw, w, FFTW_ESTIMATE);
+
+ double fftnorm = 1.0 / sqrt(nx * ny * nz);
+
+ int il = nx / 4, ir = 3 * nx / 4, jl = ny / 4, jr = 3 * ny / 4, kl = nz / 4, kr = 3 * nz / 4;
+
+#pragma omp parallel for
+ for (int i = 0; i < (int)nx; i++)
+ for (int j = 0; j < (int)ny; j++)
+ for (int k = 0; k < (int)nz; k++)
{
- size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k;
-
- if( i>=il && i=jl && j=kl && k= il && i < ir && j >= jl && j < jr && k >= kl && k < kr)
+ w[q] = (*wnoise)((x0[0] + i), (x0[1] + j), (x0[2] + k)) * fftnorm;
else
w[q] = 0.0;
-
}
-
- int nlvl05 = 1<<(ilevel-1);
- int xs = nlvl05-x0[0], ys = nlvl05-x0[1], zs = nlvl05-x0[2];
-
- for( size_t i=0; i=il && i=jl && j=kl && k= il && i < ir && j >= jl && j < jr && k >= kl && k < kr)
+ (*wnoise)((x0[0] + i), (x0[1] + j), (x0[2] + k)) = w[q] * fftnorm;
}
-
- music::ilog.Print("Applied constraints to level %d.",ilevel);
-
+ music::ilog.Print("Applied constraints to level %d.", ilevel);
+
delete[] w;
-
-
-#ifdef FFTW3
- #ifdef SINGLE_PRECISION
- fftwf_destroy_plan(p);
- #else
- fftw_destroy_plan(p);
- #endif
-#else
- fftwnd_destroy_plan(p);
-#endif
-
+
+ FFTW_API(destroy_plan)(p);
+ FFTW_API(destroy_plan)(ip);
}
-
}
-
};
-
-
-#endif // __CONSTRAINTS_HH
diff --git a/src/convolution_kernel.cc b/src/convolution_kernel.cc
index 8bec532..9b4acb1 100644
--- a/src/convolution_kernel.cc
+++ b/src/convolution_kernel.cc
@@ -7,13 +7,9 @@
*/
-#include "general.hh"
-#include "densities.hh"
-#include "convolution_kernel.hh"
-
-#if defined(FFTW3) && defined(SINGLE_PRECISION)
-typedef fftw_complex fftwf_complex;
-#endif
+#include
+#include
+#include
namespace convolution
{
@@ -25,7 +21,6 @@ get_kernel_map()
return kernel_map;
}
-template
void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip)
{
//return;
@@ -34,49 +29,26 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip)
double fftnormp = 1.0/sqrt((double)cparam_.nx * (double)cparam_.ny * (double)cparam_.nz);
double fftnorm = pow(2.0 * M_PI, 1.5) / sqrt(cparam_.lx * cparam_.ly * cparam_.lz) * fftnormp;
- fftw_complex *cdata;
- [[maybe_unused]] fftw_complex *ckernel;
- fftw_real *data;
+ complex_t *cdata;
+ [[maybe_unused]] complex_t *ckernel;
+ real_t *data;
- data = reinterpret_cast(pd);
- cdata = reinterpret_cast(data);
- ckernel = reinterpret_cast(pk->get_ptr());
+ data = reinterpret_cast(pd);
+ cdata = reinterpret_cast(data);
+ ckernel = reinterpret_cast(pk->get_ptr());
std::cout << " - Performing density convolution... ("
<< cparam_.nx << ", " << cparam_.ny << ", " << cparam_.nz << ")\n";
music::ulog.Print("Performing kernel convolution on (%5d,%5d,%5d) grid", cparam_.nx, cparam_.ny, cparam_.nz);
music::ulog.Print("Performing forward FFT...");
-#ifdef FFTW3
-#ifdef SINGLE_PRECISION
- fftwf_plan plan, iplan;
- plan = fftwf_plan_dft_r2c_3d(cparam_.nx, cparam_.ny, cparam_.nz, data, cdata, FFTW_ESTIMATE);
- iplan = fftwf_plan_dft_c2r_3d(cparam_.nx, cparam_.ny, cparam_.nz, cdata, data, FFTW_ESTIMATE);
- fftwf_execute(plan);
-#else
- fftw_plan plan, iplan;
- plan = fftw_plan_dft_r2c_3d(cparam_.nx, cparam_.ny, cparam_.nz, data, cdata, FFTW_ESTIMATE);
- iplan = fftw_plan_dft_c2r_3d(cparam_.nx, cparam_.ny, cparam_.nz, cdata, data, FFTW_ESTIMATE);
+ fftw_plan_t plan, iplan;
+ plan = FFTW_API(plan_dft_r2c_3d)(cparam_.nx, cparam_.ny, cparam_.nz, data, cdata, FFTW_ESTIMATE);
+ iplan = FFTW_API(plan_dft_c2r_3d)(cparam_.nx, cparam_.ny, cparam_.nz, cdata, data, FFTW_ESTIMATE);
- fftw_execute(plan);
-#endif
-#else
- rfftwnd_plan iplan, plan;
+ FFTW_API(execute)(plan);
- plan = rfftw3d_create_plan(cparam_.nx, cparam_.ny, cparam_.nz,
- FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE);
-
- iplan = rfftw3d_create_plan(cparam_.nx, cparam_.ny, cparam_.nz,
- FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
-
-#ifndef SINGLETHREAD_FFTW
- rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), plan, data, NULL);
-#else
- rfftwnd_one_real_to_complex(plan, data, NULL);
-#endif
-
-#endif
//..... need a phase shift for baryons for SPH
double dstag = 0.0;
@@ -163,27 +135,9 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip)
music::ulog.Print("Performing backward FFT...");
-#ifdef FFTW3
-#ifdef SINGLE_PRECISION
- fftwf_execute(iplan);
- fftwf_destroy_plan(plan);
- fftwf_destroy_plan(iplan);
-#else
- fftw_execute(iplan);
- fftw_destroy_plan(plan);
- fftw_destroy_plan(iplan);
-
-#endif
-#else
-#ifndef SINGLETHREAD_FFTW
- rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), iplan, cdata, NULL);
-#else
- rfftwnd_one_complex_to_real(iplan, cdata, NULL);
-#endif
-
- rfftwnd_destroy_plan(plan);
- rfftwnd_destroy_plan(iplan);
-#endif
+ FFTW_API(execute)(iplan);
+ FFTW_API(destroy_plan)(plan);
+ FFTW_API(destroy_plan)(iplan);
// set the DC mode here to avoid a possible truncation error in single precision
{
@@ -196,14 +150,12 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip)
}
}
-template void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip);
-template void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip);
+void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip);
/*****************************************************************************************/
/*** SPECIFIC KERNEL IMPLEMENTATIONS *********************************************/
/*****************************************************************************************/
-template
class kernel_k : public kernel
{
protected:
@@ -298,8 +250,4 @@ public:
/**************************************************************************************/
/**************************************************************************************/
-namespace
-{
-convolution::kernel_creator_concrete> creator_kd("tf_kernel_k_double");
-convolution::kernel_creator_concrete> creator_kf("tf_kernel_k_float");
-} // namespace
+convolution::kernel_creator_concrete creator_kd("tf_kernel_k");
diff --git a/src/convolution_kernel.hh b/src/convolution_kernel.hh
index 4fe13cd..30bb851 100644
--- a/src/convolution_kernel.hh
+++ b/src/convolution_kernel.hh
@@ -112,7 +112,6 @@ struct kernel_creator_concrete : public kernel_creator
};
//! actual implementation of the FFT convolution (independent of the actual kernel)
-template
void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip);
} //namespace convolution
diff --git a/src/cosmology.cc b/src/cosmology.cc
index 7286733..73e6a5f 100644
--- a/src/cosmology.cc
+++ b/src/cosmology.cc
@@ -1,11 +1,11 @@
/*
-
+
cosmology.cc - This file is part of MUSIC -
- a code to generate multi-scale initial conditions
- for cosmological simulations
-
+ a code to generate multi-scale initial conditions
+ for cosmological simulations
+
Copyright (C) 2010 Oliver Hahn
-
+
*/
#include "cosmology.hh"
@@ -13,301 +13,252 @@
#include "mg_operators.hh"
#include "general.hh"
-#define ACC(i,j,k) ((*u.get_grid((ilevel)))((i),(j),(k)))
-#define SQR(x) ((x)*(x))
+#define ACC(i, j, k) ((*u.get_grid((ilevel)))((i), (j), (k)))
+#define SQR(x) ((x) * (x))
-#if defined(FFTW3) && defined(SINGLE_PRECISION)
-#define fftw_complex fftwf_complex
-#endif
-
-
-void compute_LLA_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order )
+void compute_LLA_density(const grid_hierarchy &u, grid_hierarchy &fnew, unsigned order)
{
fnew = u;
-
- for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
- {
- double h = pow(2.0,ilevel), h2 = h*h, h2_4 = 0.25*h2;
- meshvar_bnd *pvar = fnew.get_grid(ilevel);
-
-
- if( order == 2 )
- {
- #pragma omp parallel for //reduction(+:sum_corr,sum,sum2)
- for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
- for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
- for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
- {
- double D[3][3];
-
- D[0][0] = (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2;
- D[1][1] = (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2;
- D[2][2] = (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2;
-
- D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
- D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
- D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
-
- D[0][0] += 1.0;
- D[1][1] += 1.0;
- D[2][2] += 1.0;
-
- double det = D[0][0]*D[1][1]*D[2][2]
- - D[0][0]*D[1][2]*D[2][1]
- - D[1][0]*D[0][1]*D[2][2]
- + D[1][0]*D[0][2]*D[1][2]
- + D[2][0]*D[0][1]*D[1][2]
- - D[2][0]*D[0][2]*D[1][1];
-
- (*pvar)(ix,iy,iz) = 1.0/det-1.0;
-
- }
- }
- else if ( order == 4 )
- {
- #pragma omp parallel for
- for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
- for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
- for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
- {
- double D[3][3];
-
- D[0][0] = (-ACC(ix-2,iy,iz)+16.*ACC(ix-1,iy,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix+1,iy,iz)-ACC(ix+2,iy,iz)) * h2/12.0;
- D[1][1] = (-ACC(ix,iy-2,iz)+16.*ACC(ix,iy-1,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy+1,iz)-ACC(ix,iy+2,iz)) * h2/12.0;
- D[2][2] = (-ACC(ix,iy,iz-2)+16.*ACC(ix,iy,iz-1)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy,iz+1)-ACC(ix,iy,iz+2)) * h2/12.0;
-
- D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4;
- D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4;
- D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4;
-
-
- D[0][0] += 1.0;
- D[1][1] += 1.0;
- D[2][2] += 1.0;
-
- double det = D[0][0]*D[1][1]*D[2][2]
- - D[0][0]*D[1][2]*D[2][1]
- - D[1][0]*D[0][1]*D[2][2]
- + D[1][0]*D[0][2]*D[1][2]
- + D[2][0]*D[0][1]*D[1][2]
- - D[2][0]*D[0][2]*D[1][1];
-
- (*pvar)(ix,iy,iz) = 1.0/det-1.0;
-
- }
- }
- else if ( order == 6 )
- {
- h2_4/=36.;
- h2/=180.;
- #pragma omp parallel for
- for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
- for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
- for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
- {
- double D[3][3];
-
- D[0][0] = (2.*ACC(ix-3,iy,iz)-27.*ACC(ix-2,iy,iz)+270.*ACC(ix-1,iy,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix+1,iy,iz)-27.*ACC(ix+2,iy,iz)+2.*ACC(ix+3,iy,iz)) * h2;
- D[1][1] = (2.*ACC(ix,iy-3,iz)-27.*ACC(ix,iy-2,iz)+270.*ACC(ix,iy-1,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy+1,iz)-27.*ACC(ix,iy+2,iz)+2.*ACC(ix,iy+3,iz)) * h2;
- D[2][2] = (2.*ACC(ix,iy,iz-3)-27.*ACC(ix,iy,iz-2)+270.*ACC(ix,iy,iz-1)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy,iz+1)-27.*ACC(ix,iy,iz+2)+2.*ACC(ix,iy,iz+3)) * h2;
-
- //.. this is actually 8th order accurate
- D[0][1] = D[1][0] = (64.*(ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))
- -8.*(ACC(ix-2,iy-1,iz)-ACC(ix+2,iy-1,iz)-ACC(ix-2,iy+1,iz)+ACC(ix+2,iy+1,iz)
- + ACC(ix-1,iy-2,iz)-ACC(ix-1,iy+2,iz)-ACC(ix+1,iy-2,iz)+ACC(ix+1,iy+2,iz))
- +1.*(ACC(ix-2,iy-2,iz)-ACC(ix-2,iy+2,iz)-ACC(ix+2,iy-2,iz)+ACC(ix+2,iy+2,iz)))*h2_4;
- D[0][2] = D[2][0] = (64.*(ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))
- -8.*(ACC(ix-2,iy,iz-1)-ACC(ix+2,iy,iz-1)-ACC(ix-2,iy,iz+1)+ACC(ix+2,iy,iz+1)
- + ACC(ix-1,iy,iz-2)-ACC(ix-1,iy,iz+2)-ACC(ix+1,iy,iz-2)+ACC(ix+1,iy,iz+2))
- +1.*(ACC(ix-2,iy,iz-2)-ACC(ix-2,iy,iz+2)-ACC(ix+2,iy,iz-2)+ACC(ix+2,iy,iz+2)))*h2_4;
- D[1][2] = D[2][1] = (64.*(ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))
- -8.*(ACC(ix,iy-2,iz-1)-ACC(ix,iy+2,iz-1)-ACC(ix,iy-2,iz+1)+ACC(ix,iy+2,iz+1)
- + ACC(ix,iy-1,iz-2)-ACC(ix,iy-1,iz+2)-ACC(ix,iy+1,iz-2)+ACC(ix,iy+1,iz+2))
- +1.*(ACC(ix,iy-2,iz-2)-ACC(ix,iy-2,iz+2)-ACC(ix,iy+2,iz-2)+ACC(ix,iy+2,iz+2)))*h2_4;
-
- D[0][0] += 1.0;
- D[1][1] += 1.0;
- D[2][2] += 1.0;
-
- double det = D[0][0]*D[1][1]*D[2][2]
- - D[0][0]*D[1][2]*D[2][1]
- - D[1][0]*D[0][1]*D[2][2]
- + D[1][0]*D[0][2]*D[1][2]
- + D[2][0]*D[0][1]*D[1][2]
- - D[2][0]*D[0][2]*D[1][1];
-
- (*pvar)(ix,iy,iz) = 1.0/det-1.0;
-
- }
-
- }else
- throw std::runtime_error("compute_LLA_density : invalid operator order specified");
+ for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel)
+ {
+ double h = pow(2.0, ilevel), h2 = h * h, h2_4 = 0.25 * h2;
+ meshvar_bnd *pvar = fnew.get_grid(ilevel);
+
+ if (order == 2)
+ {
+#pragma omp parallel for // reduction(+:sum_corr,sum,sum2)
+ for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix)
+ for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy)
+ for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz)
+ {
+ double D[3][3];
+
+ D[0][0] = (ACC(ix - 1, iy, iz) - 2.0 * ACC(ix, iy, iz) + ACC(ix + 1, iy, iz)) * h2;
+ D[1][1] = (ACC(ix, iy - 1, iz) - 2.0 * ACC(ix, iy, iz) + ACC(ix, iy + 1, iz)) * h2;
+ D[2][2] = (ACC(ix, iy, iz - 1) - 2.0 * ACC(ix, iy, iz) + ACC(ix, iy, iz + 1)) * h2;
+
+ D[0][1] = D[1][0] = (ACC(ix - 1, iy - 1, iz) - ACC(ix - 1, iy + 1, iz) - ACC(ix + 1, iy - 1, iz) + ACC(ix + 1, iy + 1, iz)) * h2_4;
+ D[0][2] = D[2][0] = (ACC(ix - 1, iy, iz - 1) - ACC(ix - 1, iy, iz + 1) - ACC(ix + 1, iy, iz - 1) + ACC(ix + 1, iy, iz + 1)) * h2_4;
+ D[1][2] = D[2][1] = (ACC(ix, iy - 1, iz - 1) - ACC(ix, iy - 1, iz + 1) - ACC(ix, iy + 1, iz - 1) + ACC(ix, iy + 1, iz + 1)) * h2_4;
+
+ D[0][0] += 1.0;
+ D[1][1] += 1.0;
+ D[2][2] += 1.0;
+
+ double det = D[0][0] * D[1][1] * D[2][2] - D[0][0] * D[1][2] * D[2][1] - D[1][0] * D[0][1] * D[2][2] + D[1][0] * D[0][2] * D[1][2] + D[2][0] * D[0][1] * D[1][2] - D[2][0] * D[0][2] * D[1][1];
+
+ (*pvar)(ix, iy, iz) = 1.0 / det - 1.0;
+ }
+ }
+ else if (order == 4)
+ {
+#pragma omp parallel for
+ for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix)
+ for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy)
+ for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz)
+ {
+ double D[3][3];
+
+ D[0][0] = (-ACC(ix - 2, iy, iz) + 16. * ACC(ix - 1, iy, iz) - 30.0 * ACC(ix, iy, iz) + 16. * ACC(ix + 1, iy, iz) - ACC(ix + 2, iy, iz)) * h2 / 12.0;
+ D[1][1] = (-ACC(ix, iy - 2, iz) + 16. * ACC(ix, iy - 1, iz) - 30.0 * ACC(ix, iy, iz) + 16. * ACC(ix, iy + 1, iz) - ACC(ix, iy + 2, iz)) * h2 / 12.0;
+ D[2][2] = (-ACC(ix, iy, iz - 2) + 16. * ACC(ix, iy, iz - 1) - 30.0 * ACC(ix, iy, iz) + 16. * ACC(ix, iy, iz + 1) - ACC(ix, iy, iz + 2)) * h2 / 12.0;
+
+ D[0][1] = D[1][0] = (ACC(ix - 1, iy - 1, iz) - ACC(ix - 1, iy + 1, iz) - ACC(ix + 1, iy - 1, iz) + ACC(ix + 1, iy + 1, iz)) * h2_4;
+ D[0][2] = D[2][0] = (ACC(ix - 1, iy, iz - 1) - ACC(ix - 1, iy, iz + 1) - ACC(ix + 1, iy, iz - 1) + ACC(ix + 1, iy, iz + 1)) * h2_4;
+ D[1][2] = D[2][1] = (ACC(ix, iy - 1, iz - 1) - ACC(ix, iy - 1, iz + 1) - ACC(ix, iy + 1, iz - 1) + ACC(ix, iy + 1, iz + 1)) * h2_4;
+
+ D[0][0] += 1.0;
+ D[1][1] += 1.0;
+ D[2][2] += 1.0;
+
+ double det = D[0][0] * D[1][1] * D[2][2] - D[0][0] * D[1][2] * D[2][1] - D[1][0] * D[0][1] * D[2][2] + D[1][0] * D[0][2] * D[1][2] + D[2][0] * D[0][1] * D[1][2] - D[2][0] * D[0][2] * D[1][1];
+
+ (*pvar)(ix, iy, iz) = 1.0 / det - 1.0;
+ }
+ }
+ else if (order == 6)
+ {
+ h2_4 /= 36.;
+ h2 /= 180.;
+#pragma omp parallel for
+ for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix)
+ for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy)
+ for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz)
+ {
+ double D[3][3];
+
+ D[0][0] = (2. * ACC(ix - 3, iy, iz) - 27. * ACC(ix - 2, iy, iz) + 270. * ACC(ix - 1, iy, iz) - 490.0 * ACC(ix, iy, iz) + 270. * ACC(ix + 1, iy, iz) - 27. * ACC(ix + 2, iy, iz) + 2. * ACC(ix + 3, iy, iz)) * h2;
+ D[1][1] = (2. * ACC(ix, iy - 3, iz) - 27. * ACC(ix, iy - 2, iz) + 270. * ACC(ix, iy - 1, iz) - 490.0 * ACC(ix, iy, iz) + 270. * ACC(ix, iy + 1, iz) - 27. * ACC(ix, iy + 2, iz) + 2. * ACC(ix, iy + 3, iz)) * h2;
+ D[2][2] = (2. * ACC(ix, iy, iz - 3) - 27. * ACC(ix, iy, iz - 2) + 270. * ACC(ix, iy, iz - 1) - 490.0 * ACC(ix, iy, iz) + 270. * ACC(ix, iy, iz + 1) - 27. * ACC(ix, iy, iz + 2) + 2. * ACC(ix, iy, iz + 3)) * h2;
+
+ //.. this is actually 8th order accurate
+ D[0][1] = D[1][0] = (64. * (ACC(ix - 1, iy - 1, iz) - ACC(ix - 1, iy + 1, iz) - ACC(ix + 1, iy - 1, iz) + ACC(ix + 1, iy + 1, iz)) - 8. * (ACC(ix - 2, iy - 1, iz) - ACC(ix + 2, iy - 1, iz) - ACC(ix - 2, iy + 1, iz) + ACC(ix + 2, iy + 1, iz) + ACC(ix - 1, iy - 2, iz) - ACC(ix - 1, iy + 2, iz) - ACC(ix + 1, iy - 2, iz) + ACC(ix + 1, iy + 2, iz)) + 1. * (ACC(ix - 2, iy - 2, iz) - ACC(ix - 2, iy + 2, iz) - ACC(ix + 2, iy - 2, iz) + ACC(ix + 2, iy + 2, iz))) * h2_4;
+ D[0][2] = D[2][0] = (64. * (ACC(ix - 1, iy, iz - 1) - ACC(ix - 1, iy, iz + 1) - ACC(ix + 1, iy, iz - 1) + ACC(ix + 1, iy, iz + 1)) - 8. * (ACC(ix - 2, iy, iz - 1) - ACC(ix + 2, iy, iz - 1) - ACC(ix - 2, iy, iz + 1) + ACC(ix + 2, iy, iz + 1) + ACC(ix - 1, iy, iz - 2) - ACC(ix - 1, iy, iz + 2) - ACC(ix + 1, iy, iz - 2) + ACC(ix + 1, iy, iz + 2)) + 1. * (ACC(ix - 2, iy, iz - 2) - ACC(ix - 2, iy, iz + 2) - ACC(ix + 2, iy, iz - 2) + ACC(ix + 2, iy, iz + 2))) * h2_4;
+ D[1][2] = D[2][1] = (64. * (ACC(ix, iy - 1, iz - 1) - ACC(ix, iy - 1, iz + 1) - ACC(ix, iy + 1, iz - 1) + ACC(ix, iy + 1, iz + 1)) - 8. * (ACC(ix, iy - 2, iz - 1) - ACC(ix, iy + 2, iz - 1) - ACC(ix, iy - 2, iz + 1) + ACC(ix, iy + 2, iz + 1) + ACC(ix, iy - 1, iz - 2) - ACC(ix, iy - 1, iz + 2) - ACC(ix, iy + 1, iz - 2) + ACC(ix, iy + 1, iz + 2)) + 1. * (ACC(ix, iy - 2, iz - 2) - ACC(ix, iy - 2, iz + 2) - ACC(ix, iy + 2, iz - 2) + ACC(ix, iy + 2, iz + 2))) * h2_4;
+
+ D[0][0] += 1.0;
+ D[1][1] += 1.0;
+ D[2][2] += 1.0;
+
+ double det = D[0][0] * D[1][1] * D[2][2] - D[0][0] * D[1][2] * D[2][1] - D[1][0] * D[0][1] * D[2][2] + D[1][0] * D[0][2] * D[1][2] + D[2][0] * D[0][1] * D[1][2] - D[2][0] * D[0][2] * D[1][1];
+
+ (*pvar)(ix, iy, iz) = 1.0 / det - 1.0;
+ }
+ }
+ else
+ throw std::runtime_error("compute_LLA_density : invalid operator order specified");
}
-
}
-
-void compute_Lu_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order )
+void compute_Lu_density(const grid_hierarchy &u, grid_hierarchy &fnew, unsigned order)
{
fnew = u;
-
- for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel )
+
+ for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel)
{
- double h = pow(2.0,ilevel), h2 = h*h;
+ double h = pow(2.0, ilevel), h2 = h * h;
meshvar_bnd *pvar = fnew.get_grid(ilevel);
-
- #pragma omp parallel for
- for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix )
- for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy )
- for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz )
+
+#pragma omp parallel for
+ for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix)
+ for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy)
+ for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz)
{
double D[3][3];
-
- D[0][0] = 1.0 + (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2;
- D[1][1] = 1.0 + (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2;
- D[2][2] = 1.0 + (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2;
-
- (*pvar)(ix,iy,iz) = -(D[0][0]+D[1][1]+D[2][2] - 3.0);
-
+
+ D[0][0] = 1.0 + (ACC(ix - 1, iy, iz) - 2.0 * ACC(ix, iy, iz) + ACC(ix + 1, iy, iz)) * h2;
+ D[1][1] = 1.0 + (ACC(ix, iy - 1, iz) - 2.0 * ACC(ix, iy, iz) + ACC(ix, iy + 1, iz)) * h2;
+ D[2][2] = 1.0 + (ACC(ix, iy, iz - 1) - 2.0 * ACC(ix, iy, iz) + ACC(ix, iy, iz + 1)) * h2;
+
+ (*pvar)(ix, iy, iz) = -(D[0][0] + D[1][1] + D[2][2] - 3.0);
}
}
-
}
-
-void compute_2LPT_source_FFT( config_file& cf_, const grid_hierarchy& u, grid_hierarchy& fnew )
+void compute_2LPT_source_FFT(config_file &cf_, const grid_hierarchy &u, grid_hierarchy &fnew)
{
- if( u.levelmin() != u.levelmax() )
+ if (u.levelmin() != u.levelmax())
throw std::runtime_error("FFT 2LPT can only be run in Unigrid mode!");
-
+
fnew = u;
- size_t nx,ny,nz,nzp;
+ size_t nx, ny, nz, nzp;
nx = u.get_grid(u.levelmax())->size(0);
ny = u.get_grid(u.levelmax())->size(1);
nz = u.get_grid(u.levelmax())->size(2);
- nzp = 2*(nz/2+1);
-
+ nzp = 2 * (nz / 2 + 1);
+
//... copy data ..................................................
- fftw_real *data = new fftw_real[nx*ny*nzp];
- fftw_complex *cdata = reinterpret_cast (data);
-
- fftw_complex *cdata_11, *cdata_12, *cdata_13, *cdata_22, *cdata_23, *cdata_33;
- fftw_real *data_11, *data_12, *data_13, *data_22, *data_23, *data_33;
-
- data_11 = new fftw_real[nx*ny*nzp]; cdata_11 = reinterpret_cast (data_11);
- data_12 = new fftw_real[nx*ny*nzp]; cdata_12 = reinterpret_cast (data_12);
- data_13 = new fftw_real[nx*ny*nzp]; cdata_13 = reinterpret_cast (data_13);
- data_22 = new fftw_real[nx*ny*nzp]; cdata_22 = reinterpret_cast (data_22);
- data_23 = new fftw_real[nx*ny*nzp]; cdata_23 = reinterpret_cast (data_23);
- data_33 = new fftw_real[nx*ny*nzp]; cdata_33 = reinterpret_cast (data_33);
-
- #pragma omp parallel for
- for( int i=0; i<(int)nx; ++i )
- for( size_t j=0; j(data);
+
+ complex_t *cdata_11, *cdata_12, *cdata_13, *cdata_22, *cdata_23, *cdata_33;
+ real_t *data_11, *data_12, *data_13, *data_22, *data_23, *data_33;
+
+ data_11 = new real_t[nx * ny * nzp];
+ cdata_11 = reinterpret_cast(data_11);
+ data_12 = new real_t[nx * ny * nzp];
+ cdata_12 = reinterpret_cast(data_12);
+ data_13 = new real_t[nx * ny * nzp];
+ cdata_13 = reinterpret_cast(data_13);
+ data_22 = new real_t[nx * ny * nzp];
+ cdata_22 = reinterpret_cast(data_22);
+ data_23 = new real_t[nx * ny * nzp];
+ cdata_23 = reinterpret_cast(data_23);
+ data_33 = new real_t[nx * ny * nzp];
+ cdata_33 = reinterpret_cast(data_33);
+
+#pragma omp parallel for
+ for (int i = 0; i < (int)nx; ++i)
+ for (size_t j = 0; j < ny; ++j)
+ for (size_t k = 0; k < nz; ++k)
{
- size_t idx = ((size_t)i*ny+j)*nzp+k;
- data[idx] = (*u.get_grid(u.levelmax()))(i,j,k);
+ size_t idx = ((size_t)i * ny + j) * nzp + k;
+ data[idx] = (*u.get_grid(u.levelmax()))(i, j, k);
}
-
+
//... perform FFT and Poisson solve................................
-#ifdef FFTW3
-
- #ifdef SINGLE_PRECISION
- fftwf_plan
- plan = fftwf_plan_dft_r2c_3d(nx,ny,nz, data, cdata, FFTW_ESTIMATE),
- iplan = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata, data, FFTW_ESTIMATE),
- ip11 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_11, data_11, FFTW_ESTIMATE),
- ip12 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_12, data_12, FFTW_ESTIMATE),
- ip13 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_13, data_13, FFTW_ESTIMATE),
- ip22 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_22, data_22, FFTW_ESTIMATE),
- ip23 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_23, data_23, FFTW_ESTIMATE),
- ip33 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_33, data_33, FFTW_ESTIMATE);
-
- fftwf_execute(plan);
-
- #else
-
- fftw_plan
- plan = fftw_plan_dft_r2c_3d(nx,ny,nz, data, cdata, FFTW_ESTIMATE),
- iplan = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata, data, FFTW_ESTIMATE),
- ip11 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_11, data_11, FFTW_ESTIMATE),
- ip12 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_12, data_12, FFTW_ESTIMATE),
- ip13 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_13, data_13, FFTW_ESTIMATE),
- ip22 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_22, data_22, FFTW_ESTIMATE),
- ip23 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_23, data_23, FFTW_ESTIMATE),
- ip33 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_33, data_33, FFTW_ESTIMATE);
-
- fftw_execute(plan);
-
- #endif
-
- double kfac = 2.0*M_PI;
- double norm = 1.0/((double)(nx*ny*nz));
-
- #pragma omp parallel for
- for( int i=0; i<(int)nx; ++i )
- for( size_t j=0; j(int)nx/2) ii-=nx;
- int jj = (int)j; if(jj>(int)ny/2) jj-=ny;
+ int ii = i;
+ if (ii > (int)nx / 2)
+ ii -= nx;
+ int jj = (int)j;
+ if (jj > (int)ny / 2)
+ jj -= ny;
double ki = (double)ii;
double kj = (double)jj;
double kk = (double)l;
-
+
double k[3];
k[0] = (double)ki * kfac;
k[1] = (double)kj * kfac;
k[2] = (double)kk * kfac;
-
- size_t idx = ((size_t)i*ny+j)*nzp/2+l;
- //double re = cdata[idx][0];
- //double im = cdata[idx][1];
-
- cdata_11[idx][0] = -k[0]*k[0] * cdata[idx][0] * norm;
- cdata_11[idx][1] = -k[0]*k[0] * cdata[idx][1] * norm;
-
- cdata_12[idx][0] = -k[0]*k[1] * cdata[idx][0] * norm;
- cdata_12[idx][1] = -k[0]*k[1] * cdata[idx][1] * norm;
-
- cdata_13[idx][0] = -k[0]*k[2] * cdata[idx][0] * norm;
- cdata_13[idx][1] = -k[0]*k[2] * cdata[idx][1] * norm;
-
- cdata_22[idx][0] = -k[1]*k[1] * cdata[idx][0] * norm;
- cdata_22[idx][1] = -k[1]*k[1] * cdata[idx][1] * norm;
-
- cdata_23[idx][0] = -k[1]*k[2] * cdata[idx][0] * norm;
- cdata_23[idx][1] = -k[1]*k[2] * cdata[idx][1] * norm;
-
- cdata_33[idx][0] = -k[2]*k[2] * cdata[idx][0] * norm;
- cdata_33[idx][1] = -k[2]*k[2] * cdata[idx][1] * norm;
-
-
- if( i==(int)nx/2||j==ny/2||l==nz/2)
+
+ size_t idx = ((size_t)i * ny + j) * nzp / 2 + l;
+ // double re = cdata[idx][0];
+ // double im = cdata[idx][1];
+
+ cdata_11[idx][0] = -k[0] * k[0] * cdata[idx][0] * norm;
+ cdata_11[idx][1] = -k[0] * k[0] * cdata[idx][1] * norm;
+
+ cdata_12[idx][0] = -k[0] * k[1] * cdata[idx][0] * norm;
+ cdata_12[idx][1] = -k[0] * k[1] * cdata[idx][1] * norm;
+
+ cdata_13[idx][0] = -k[0] * k[2] * cdata[idx][0] * norm;
+ cdata_13[idx][1] = -k[0] * k[2] * cdata[idx][1] * norm;
+
+ cdata_22[idx][0] = -k[1] * k[1] * cdata[idx][0] * norm;
+ cdata_22[idx][1] = -k[1] * k[1] * cdata[idx][1] * norm;
+
+ cdata_23[idx][0] = -k[1] * k[2] * cdata[idx][0] * norm;
+ cdata_23[idx][1] = -k[1] * k[2] * cdata[idx][1] * norm;
+
+ cdata_33[idx][0] = -k[2] * k[2] * cdata[idx][0] * norm;
+ cdata_33[idx][1] = -k[2] * k[2] * cdata[idx][1] * norm;
+
+ if (i == (int)nx / 2 || j == ny / 2 || l == nz / 2)
{
cdata_11[idx][0] = 0.0;
cdata_11[idx][1] = 0.0;
-
+
cdata_12[idx][0] = 0.0;
cdata_12[idx][1] = 0.0;
-
+
cdata_13[idx][0] = 0.0;
cdata_13[idx][1] = 0.0;
-
+
cdata_22[idx][0] = 0.0;
cdata_22[idx][1] = 0.0;
-
+
cdata_23[idx][0] = 0.0;
cdata_23[idx][1] = 0.0;
-
+
cdata_33[idx][0] = 0.0;
cdata_33[idx][1] = 0.0;
}
-
}
-
+
delete[] data;
/*cdata_11[0][0] = 0.0; cdata_11[0][1] = 0.0;
cdata_12[0][0] = 0.0; cdata_12[0][1] = 0.0;
@@ -315,175 +266,38 @@ void compute_2LPT_source_FFT( config_file& cf_, const grid_hierarchy& u, grid_hi
cdata_22[0][0] = 0.0; cdata_22[0][1] = 0.0;
cdata_23[0][0] = 0.0; cdata_23[0][1] = 0.0;
cdata_33[0][0] = 0.0; cdata_33[0][1] = 0.0;*/
-
-
-#ifdef SINGLE_PRECISION
- fftwf_execute(ip11);
- fftwf_execute(ip12);
- fftwf_execute(ip13);
- fftwf_execute(ip22);
- fftwf_execute(ip23);
- fftwf_execute(ip33);
-
- fftwf_destroy_plan(plan);
- fftwf_destroy_plan(iplan);
- fftwf_destroy_plan(ip11);
- fftwf_destroy_plan(ip12);
- fftwf_destroy_plan(ip13);
- fftwf_destroy_plan(ip22);
- fftwf_destroy_plan(ip23);
- fftwf_destroy_plan(ip33);
-#else
- fftw_execute(ip11);
- fftw_execute(ip12);
- fftw_execute(ip13);
- fftw_execute(ip22);
- fftw_execute(ip23);
- fftw_execute(ip33);
-
- fftw_destroy_plan(plan);
- fftw_destroy_plan(iplan);
- fftw_destroy_plan(ip11);
- fftw_destroy_plan(ip12);
- fftw_destroy_plan(ip13);
- fftw_destroy_plan(ip22);
- fftw_destroy_plan(ip23);
- fftw_destroy_plan(ip33);
-#endif
-//#endif
-
-
-#else
- rfftwnd_plan
- plan = rfftw3d_create_plan( nx,ny,nz,
- FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE),
- iplan = rfftw3d_create_plan( nx,ny,nz,
- FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE);
-
-
- #ifndef SINGLETHREAD_FFTW
- rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL );
- #else
- rfftwnd_one_real_to_complex( plan, data, NULL );
- #endif
-//#endif
- //double fac = -1.0/(nx*ny*nz);
- double kfac = 2.0*M_PI;
- double norm = 1.0/((double)(nx*ny*nz));
-
- #pragma omp parallel for
- for( int i=0; i<(int)nx; ++i )
- for( size_t j=0; j(int)(nx/2)) ii-=(int)nx;
- int jj = (int)j; if(jj>(int)(ny/2)) jj-=(int)ny;
- double ki = (double)ii;
- double kj = (double)jj;
- double kk = (double)l;
-
- double k[3];
- k[0] = (double)ki * kfac;
- k[1] = (double)kj * kfac;
- k[2] = (double)kk * kfac;
-
- size_t idx = ((size_t)i*ny+j)*nzp/2+l;
- //double re = cdata[idx].re;
- //double im = cdata[idx].im;
-
- cdata_11[idx].re = -k[0]*k[0] * cdata[idx].re * norm;
- cdata_11[idx].im = -k[0]*k[0] * cdata[idx].im * norm;
-
- cdata_12[idx].re = -k[0]*k[1] * cdata[idx].re * norm;
- cdata_12[idx].im = -k[0]*k[1] * cdata[idx].im * norm;
-
- cdata_13[idx].re = -k[0]*k[2] * cdata[idx].re * norm;
- cdata_13[idx].im = -k[0]*k[2] * cdata[idx].im * norm;
-
- cdata_22[idx].re = -k[1]*k[1] * cdata[idx].re * norm;
- cdata_22[idx].im = -k[1]*k[1] * cdata[idx].im * norm;
-
- cdata_23[idx].re = -k[1]*k[2] * cdata[idx].re * norm;
- cdata_23[idx].im = -k[1]*k[2] * cdata[idx].im * norm;
-
- cdata_33[idx].re = -k[2]*k[2] * cdata[idx].re * norm;
- cdata_33[idx].im = -k[2]*k[2] * cdata[idx].im * norm;
-
-
- if( i==(int)(nx/2)||j==ny/2||l==nz/2)
- {
- cdata_11[idx].re = 0.0;
- cdata_11[idx].im = 0.0;
-
- cdata_12[idx].re = 0.0;
- cdata_12[idx].im = 0.0;
-
- cdata_13[idx].re = 0.0;
- cdata_13[idx].im = 0.0;
-
- cdata_22[idx].re = 0.0;
- cdata_22[idx].im = 0.0;
-
- cdata_23[idx].re = 0.0;
- cdata_23[idx].im = 0.0;
-
- cdata_33[idx].re = 0.0;
- cdata_33[idx].im = 0.0;
- }
-
- }
-
- delete[] data;
- /*cdata_11[0].re = 0.0; cdata_11[0].im = 0.0;
- cdata_12[0].re = 0.0; cdata_12[0].im = 0.0;
- cdata_13[0].re = 0.0; cdata_13[0].im = 0.0;
- cdata_22[0].re = 0.0; cdata_22[0].im = 0.0;
- cdata_23[0].re = 0.0; cdata_23[0].im = 0.0;
- cdata_33[0].re = 0.0; cdata_33[0].im = 0.0;*/
-
-
-#ifndef SINGLETHREAD_FFTW
- //rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL );
- rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_11, NULL );
- rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_12, NULL );
- rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_13, NULL );
- rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_22, NULL );
- rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_23, NULL );
- rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_33, NULL );
-#else
- //rfftwnd_one_complex_to_real( iplan, cdata, NULL );
- rfftwnd_one_complex_to_real(iplan, cdata_11, NULL );
- rfftwnd_one_complex_to_real(iplan, cdata_12, NULL );
- rfftwnd_one_complex_to_real(iplan, cdata_13, NULL );
- rfftwnd_one_complex_to_real(iplan, cdata_22, NULL );
- rfftwnd_one_complex_to_real(iplan, cdata_23, NULL );
- rfftwnd_one_complex_to_real(iplan, cdata_33, NULL );
-#endif
-
-
-
- rfftwnd_destroy_plan(plan);
- rfftwnd_destroy_plan(iplan);
-#endif
+ size_t ii = ((size_t)i * ny + j) * nzp + k;
+ (*fnew.get_grid(u.levelmax()))(i, j, k) = ((data_11[ii] * data_22[ii] - data_12[ii] * data_12[ii]) +
+ (data_11[ii] * data_33[ii] - data_13[ii] * data_13[ii]) +
+ (data_22[ii] * data_33[ii] - data_23[ii] * data_23[ii]));
-
- //... copy data ..........................................
- #pragma omp parallel for
- for( int i=0; i<(int)nx; ++i )
- for( size_t j=0; j(int)fnew.levelmin(); --i )
- mg_straight().restrict( (*fnew.get_grid(i)), (*fnew.get_grid(i-1)) );
-
+
+ //.. subtract global mean so the multi-grid poisson solver behaves well
+
+ for (int i = fnew.levelmax(); i > (int)fnew.levelmin(); --i)
+ mg_straight().restrict((*fnew.get_grid(i)), (*fnew.get_grid(i - 1)));
+
long double sum = 0.0;
- int nx,ny,nz;
-
+ int nx, ny, nz;
+
nx = fnew.get_grid(fnew.levelmin())->size(0);
ny = fnew.get_grid(fnew.levelmin())->size(1);
nz = fnew.get_grid(fnew.levelmin())->size(2);
-
- for( int ix=0; ixsize(0);
ny = fnew.get_grid(i)->size(1);
nz = fnew.get_grid(i)->size(2);
-
- for( int ix=0; ix(rcoarse);
+ real_t *rcoarse = new real_t[nxF * nyF * nzFp];
+ complex_t *ccoarse = reinterpret_cast(rcoarse);
- fftw_real *rfine = new fftw_real[nxf * nyf * nzfp];
- fftw_complex *cfine = reinterpret_cast(rfine);
+ real_t *rfine = new real_t[nxf * nyf * nzfp];
+ complex_t *cfine = reinterpret_cast(rfine);
-#ifdef FFTW3
-#ifdef SINGLE_PRECISION
- fftwf_plan
- pf = fftwf_plan_dft_r2c_3d(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
- ipc = fftwf_plan_dft_c2r_3d(nxF, nyF, nzF, ccoarse, rcoarse, FFTW_ESTIMATE);
-#else
- fftw_plan
- pf = fftw_plan_dft_r2c_3d(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
- ipc = fftw_plan_dft_c2r_3d(nxF, nyF, nzF, ccoarse, rcoarse, FFTW_ESTIMATE);
-#endif
-
-#else
- rfftwnd_plan
- pf = rfftw3d_create_plan(nxf, nyf, nzf, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE),
- ipc = rfftw3d_create_plan(nxF, nyF, nzF, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
-#endif
+ fftw_plan_t
+ pf = FFTW_API(plan_dft_r2c_3d)(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+ ipc = FFTW_API(plan_dft_c2r_3d)(nxF, nyF, nzF, ccoarse, rcoarse, FFTW_ESTIMATE);
#pragma omp parallel for
for (int i = 0; i < (int)nxf; i++)
@@ -70,19 +57,7 @@ void fft_coarsen(m1 &v, m2 &V)
rfine[q] = v(i, j, k);
}
-#ifdef FFTW3
-#ifdef SINGLE_PRECISION
- fftwf_execute(pf);
-#else
- fftw_execute(pf);
-#endif
-#else
-#ifndef SINGLETHREAD_FFTW
- rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL);
-#else
- rfftwnd_one_real_to_complex(pf, rfine, NULL);
-#endif
-#endif
+ FFTW_API(execute)(pf);
double fftnorm = 1.0 / ((double)nxF * (double)nyF * (double)nzF);
@@ -125,19 +100,7 @@ void fft_coarsen(m1 &v, m2 &V)
delete[] rfine;
-#ifdef FFTW3
-#ifdef SINGLE_PRECISION
- fftwf_execute(ipc);
-#else
- fftw_execute(ipc);
-#endif
-#else
-#ifndef SINGLETHREAD_FFTW
- rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipc, ccoarse, NULL);
-#else
- rfftwnd_one_complex_to_real(ipc, ccoarse, NULL);
-#endif
-#endif
+ FFTW_API(execute)(ipc);
#pragma omp parallel for
for (int i = 0; i < (int)nxF; i++)
@@ -150,18 +113,8 @@ void fft_coarsen(m1 &v, m2 &V)
delete[] rcoarse;
-#ifdef FFTW3
-#ifdef SINGLE_PRECISION
- fftwf_destroy_plan(pf);
- fftwf_destroy_plan(ipc);
-#else
- fftw_destroy_plan(pf);
- fftw_destroy_plan(ipc);
-#endif
-#else
- rfftwnd_destroy_plan(pf);
- rfftwnd_destroy_plan(ipc);
-#endif
+ FFTW_API(destroy_plan)(pf);
+ FFTW_API(destroy_plan)(ipc);
}
template
@@ -191,14 +144,14 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false)
size_t nxc = nxf / 2, nyc = nyf / 2, nzc = nzf / 2, nzcp = nzf / 2 + 2;
- fftw_real *rcoarse = new fftw_real[nxc * nyc * nzcp];
- fftw_complex *ccoarse = reinterpret_cast(rcoarse);
+ real_t *rcoarse = new real_t[nxc * nyc * nzcp];
+ complex_t *ccoarse = reinterpret_cast(rcoarse);
- fftw_real *rfine = new fftw_real[nxf * nyf * nzfp];
- fftw_complex *cfine = reinterpret_cast(rfine);
+ real_t *rfine = new real_t[nxf * nyf * nzfp];
+ complex_t *cfine = reinterpret_cast(rfine);
// copy coarse data to rcoarse[.]
- memset(rcoarse, 0, sizeof(fftw_real) * nxc * nyc * nzcp);
+ memset(rcoarse, 0, sizeof(real_t) * nxc * nyc * nzcp);
#pragma omp parallel for
for (int i = 0; i < (int)nxc; ++i)
@@ -221,36 +174,13 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false)
rfine[q] = v(i, j, k);
}
-#ifdef FFTW3
-#ifdef SINGLE_PRECISION
- fftwf_plan
- pc = fftwf_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
- pf = fftwf_plan_dft_r2c_3d(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
- ipf = fftwf_plan_dft_c2r_3d(nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
- fftwf_execute(pc);
- fftwf_execute(pf);
-#else
- fftw_plan
- pc = fftw_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
- pf = fftw_plan_dft_r2c_3d(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
- ipf = fftw_plan_dft_c2r_3d(nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
- fftw_execute(pc);
- fftw_execute(pf);
-#endif
-#else
- rfftwnd_plan
- pc = rfftw3d_create_plan(nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE),
- pf = rfftw3d_create_plan(nxf, nyf, nzf, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE),
- ipf = rfftw3d_create_plan(nxf, nyf, nzf, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE);
-#ifndef SINGLETHREAD_FFTW
- rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pc, rcoarse, NULL);
- rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL);
-#else
- rfftwnd_one_real_to_complex(pc, rcoarse, NULL);
- rfftwnd_one_real_to_complex(pf, rfine, NULL);
-#endif
-#endif
+ fftw_plan_t
+ pc = FFTW_API(plan_dft_r2c_3d)(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE),
+ pf = FFTW_API(plan_dft_r2c_3d)(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE),
+ ipf = FFTW_API(plan_dft_c2r_3d)(nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE);
+ FFTW_API(execute)(pc);
+ FFTW_API(execute)(pf);
/*************************************************/
//.. perform actual interpolation
@@ -300,28 +230,11 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false)
/*************************************************/
-#ifdef FFTW3
-#ifdef SINGLE_PRECISION
- fftwf_execute(ipf);
- fftwf_destroy_plan(pf);
- fftwf_destroy_plan(pc);
- fftwf_destroy_plan(ipf);
-#else
- fftw_execute(ipf);
- fftw_destroy_plan(pf);
- fftw_destroy_plan(pc);
- fftw_destroy_plan(ipf);
-#endif
-#else
-#ifndef SINGLETHREAD_FFTW
- rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipf, cfine, NULL);
-#else
- rfftwnd_one_complex_to_real(ipf, cfine, NULL);
-#endif
- fftwnd_destroy_plan(pf);
- fftwnd_destroy_plan(pc);
- fftwnd_destroy_plan(ipf);
-#endif
+ FFTW_API(execute)(ipf);
+
+ FFTW_API(destroy_plan)(pf);
+ FFTW_API(destroy_plan)(pc);
+ FFTW_API(destroy_plan)(ipf);
// copy back and normalize
#pragma omp parallel for
@@ -349,8 +262,6 @@ void GenerateDensityUnigrid(config_file &cf, transfer_function *ptf, tf_type typ
levelmin = cf.get_value_safe("setup", "levelmin_TF", levelminPoisson);
levelmax = cf.get_value("setup", "levelmax");
- bool kspace = cf.get_value("setup", "kspace_TF");
-
bool fix = cf.get_value_safe("setup","fix_mode_amplitude",false);
bool flip = cf.get_value_safe("setup","flip_mode_amplitude",false);
@@ -360,30 +271,10 @@ void GenerateDensityUnigrid(config_file &cf, transfer_function *ptf, tf_type typ
music::ulog.Print("Running unigrid density convolution...");
//... select the transfer function to be used
- convolution::kernel_creator *the_kernel_creator;
+ convolution::kernel_creator *the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k"];
- if (kspace)
- {
- std::cout << " - Using k-space transfer function kernel.\n";
- music::ulog.Print("Using k-space transfer function kernel.");
-
-#ifdef SINGLE_PRECISION
- the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_float"];
-#else
- the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_double"];
-#endif
- }
- else
- {
- std::cout << " - Using real-space transfer function kernel.\n";
- music::ulog.Print("Using real-space transfer function kernel.");
-
-#ifdef SINGLE_PRECISION
- the_kernel_creator = convolution::get_kernel_map()["tf_kernel_real_float"];
-#else
- the_kernel_creator = convolution::get_kernel_map()["tf_kernel_real_double"];
-#endif
- }
+ std::cout << " - Using k-space transfer function kernel.\n";
+ music::ulog.Print("Using k-space transfer function kernel.");
//... initialize convolution kernel
convolution::kernel *the_tf_kernel = the_kernel_creator->create(cf, ptf, refh, type);
@@ -402,7 +293,7 @@ void GenerateDensityUnigrid(config_file &cf, transfer_function *ptf, tf_type typ
the_tf_kernel->fetch_kernel(levelmin, false);
//... perform convolution
- convolution::perform(the_tf_kernel, reinterpret_cast(top->get_data_ptr()), shift, fix, flip);
+ convolution::perform(the_tf_kernel, reinterpret_cast(top->get_data_ptr()), shift, fix, flip);
//... clean up kernel
delete the_tf_kernel;
@@ -451,17 +342,11 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t
unsigned nbase = 1 << levelmin;
- convolution::kernel_creator *the_kernel_creator;
+ convolution::kernel_creator *the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k"];
std::cout << " - Using k-space transfer function kernel.\n";
music::ulog.Print("Using k-space transfer function kernel.");
-#ifdef SINGLE_PRECISION
- the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_float"];
-#else
- the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_double"];
-#endif
-
convolution::kernel *the_tf_kernel = the_kernel_creator->create(cf, ptf, refh, type);
/***** PERFORM CONVOLUTIONS *****/
@@ -475,7 +360,7 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t
top = new DensityGrid(nbase, nbase, nbase);
music::ilog.Print("Performing noise convolution on level %3d", levelmin);
rand.load(*top, levelmin);
- convolution::perform(the_tf_kernel->fetch_kernel(levelmin, false), reinterpret_cast(top->get_data_ptr()), shift, fix, flip);
+ convolution::perform(the_tf_kernel->fetch_kernel(levelmin, false), reinterpret_cast(top->get_data_ptr()), shift, fix, flip);
delta.create_base_hierarchy(levelmin);
top->copy(*delta.get_grid(levelmin));
@@ -506,7 +391,7 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t
// load white noise for patch
rand.load(*fine, levelmin + i);
- convolution::perform(the_tf_kernel->fetch_kernel(levelmin + i, true),
+ convolution::perform(the_tf_kernel->fetch_kernel(levelmin + i, true),
reinterpret_cast(fine->get_data_ptr()), shift, fix, flip);
if( fourier_splicing ){
diff --git a/src/fd_schemes.hh b/src/fd_schemes.hh
index fa4be40..1ad2dd9 100644
--- a/src/fd_schemes.hh
+++ b/src/fd_schemes.hh
@@ -11,12 +11,13 @@
#ifndef __FD_SCHEMES_HH
#define __FD_SCHEMES_HH
+#include
#include
#include
//! abstract implementation of the Poisson/Force scheme
-template< class L, class G, typename real_t=double >
+template< class L, class G>
class scheme
{
public:
@@ -57,10 +58,9 @@ public:
};
//! base class for finite difference gradients
-template< int nextent, typename T >
+template< int nextent>
class gradient
{
- typedef T real_t;
std::vector m_stencil;
const unsigned nl;
public:
@@ -110,20 +110,21 @@ public:
};
//! base class for finite difference stencils
-template< int nextent, typename real_t >
+template< int nextent>
class base_stencil
{
protected:
- std::vector m_stencil;
- const unsigned nl;
+ static constexpr size_t nl{2*nextent+1};
+ std::array m_stencil;
+
public:
bool m_modsource;
public:
- base_stencil( bool amodsource = false )
- : nl( 2*nextent+1 ), m_modsource( amodsource )
+ explicit base_stencil( bool amodsource = false )
+ : m_modsource( amodsource )
{
- m_stencil.assign(nl*nl*nl,(real_t)0.0);
+ m_stencil.fill( (real_t)0.0 );
}
real_t& operator()(int i, int j, int k)
@@ -176,8 +177,7 @@ public:
//... Implementation of the Gradient schemes............................................
-template< typename real_t >
-class deriv_2P : public gradient<1,real_t>
+class deriv_2P : public gradient<1>
{
public:
@@ -194,8 +194,7 @@ public:
//... Implementation of the Laplacian schemes..........................................
//! 7-point, 2nd order finite difference Laplacian
-template< typename real_t >
-class stencil_7P : public base_stencil<1,real_t>
+class stencil_7P : public base_stencil<1>
{
public:
@@ -214,7 +213,7 @@ public:
inline real_t apply( const C& c, const int i, const int j, const int k ) const
{
//return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k);
- return (double)c(i-1,j,k)+(double)c(i+1,j,k)+(double)c(i,j-1,k)+(double)c(i,j+1,k)+(double)c(i,j,k-1)+(double)c(i,j,k+1)-6.0*(double)c(i,j,k);
+ return (real_t)c(i-1,j,k)+(real_t)c(i+1,j,k)+(real_t)c(i,j-1,k)+(real_t)c(i,j+1,k)+(real_t)c(i,j,k-1)+(real_t)c(i,j,k+1)-6.0*(real_t)c(i,j,k);
}
template< class C >
@@ -230,8 +229,7 @@ public:
};
//! 13-point, 4th order finite difference Laplacian
-template< typename real_t >
-class stencil_13P : public base_stencil<2,real_t>
+class stencil_13P : public base_stencil<2>
{
public:
@@ -279,8 +277,7 @@ public:
//! 19-point, 6th order finite difference Laplacian
-template< typename real_t >
-class stencil_19P : public base_stencil<3,real_t>
+class stencil_19P : public base_stencil<3>
{
public:
@@ -339,7 +336,6 @@ public:
//! flux operator for the 4th order FD Laplacian
-template< typename real_t >
class Laplace_flux_O4
{
public:
@@ -354,7 +350,7 @@ public:
template< class C >
inline double apply_x( int idir, const C& c, const int i, const int j, const int k )
{
- double fac = -((double)idir)/12.0;
+ double fac = -((real_t)idir)/12.0;
return fac*(-c(i-2,j,k)+15.0*c(i-1,j,k)-15.0*c(i,j,k)+c(i+1,j,k));
}
@@ -369,7 +365,7 @@ public:
template< class C >
inline double apply_y( int idir, const C& c, const int i, const int j, const int k )
{
- double fac = -((double)idir)/12.0;
+ double fac = -((real_t)idir)/12.0;
return fac*(-c(i,j-2,k)+15.0*c(i,j-1,k)-15.0*c(i,j,k)+c(i,j+1,k));
}
@@ -384,7 +380,7 @@ public:
template< class C >
inline double apply_z( int idir, const C& c, const int i, const int j, const int k )
{
- double fac = -((double)idir)/12.0;
+ double fac = -((real_t)idir)/12.0;
return fac*(-c(i,j,k-2)+15.0*c(i,j,k-1)-15.0*c(i,j,k)+c(i,j,k+1));
}
@@ -392,7 +388,6 @@ public:
//! flux operator for the 6th order FD Laplacian
-template< typename real_t >
class Laplace_flux_O6
{
public:
@@ -408,7 +403,7 @@ public:
template< class C >
inline double apply_x( int idir, const C& c, const int i, const int j, const int k )
{
- double fac = -((double)idir)/180.0;
+ real_t fac = -((real_t)idir)/180.0;
return fac*(2.*c(i-3,j,k)-25.*c(i-2,j,k)+245.*c(i-1,j,k)-245.0*c(i,j,k)+25.*c(i+1,j,k)-2.*c(i+2,j,k));
}
@@ -423,7 +418,7 @@ public:
template< class C >
inline double apply_y( int idir, const C& c, const int i, const int j, const int k )
{
- double fac = -((double)idir)/180.0;
+ real_t fac = -((real_t)idir)/180.0;
return fac*(2.*c(i,j-3,k)-25.*c(i,j-2,k)+245.*c(i,j-1,k)-245.0*c(i,j,k)+25.*c(i,j+1,k)-2.*c(i,j+2,k));
}
@@ -438,7 +433,7 @@ public:
template< class C >
inline double apply_z( int idir, const C& c, const int i, const int j, const int k )
{
- double fac = -((double)idir)/180.0;
+ real_t fac = -((real_t)idir)/180.0;
return fac*(2.*c(i,j,k-3)-25.*c(i,j,k-2)+245.*c(i,j,k-1)-245.0*c(i,j,k)+25.*c(i,j,k+1)-2.*c(i,j,k+2));
}
diff --git a/src/general.hh b/src/general.hh
index 789c35c..104cae6 100644
--- a/src/general.hh
+++ b/src/general.hh
@@ -8,75 +8,56 @@
*/
-#ifndef __GENERAL_HH
-#define __GENERAL_HH
+#pragma once
-#include "logger.hh"
+#include
+#include
#include
-#include "omp.h"
+#include
-#ifdef WITH_MPI
- #ifdef MANNO
- #include
- #else
- #include
- #endif
-#else
-#include
+#include
+
+// include CMake controlled configuration settings
+#include "cmake_config.hh"
+
+#if defined(USE_PRECISION_FLOAT)
+ using real_t = float;
+ using complex_t = fftwf_complex;
+ #define FFTW_PREFIX fftwf
+#elif defined(USE_PRECISION_DOUBLE)
+ using real_t = double;
+ using complex_t = fftw_complex;
+ #define FFTW_PREFIX fftw
+#elif defined(USE_PRECISION_LONGDOUBLE)
+ using real_t = long double;
+ using complex_t = fftwl_complex;
+ #define FFTW_PREFIX fftwl
#endif
-#ifdef FFTW3
- #include
- #if defined(SINGLE_PRECISION)
- typedef float fftw_real;
- #else
- typedef double fftw_real;
- #endif
+#define FFTW_GEN_NAME_PRIM(a, b) a##_##b
+#define FFTW_GEN_NAME(a, b) FFTW_GEN_NAME_PRIM(a, b)
+#define FFTW_API(x) FFTW_GEN_NAME(FFTW_PREFIX, x)
-#else
- #if defined(SINGLE_PRECISION) and not defined(SINGLETHREAD_FFTW)
- #include
- #include
- #elif defined(SINGLE_PRECISION) and defined(SINGLETHREAD_FFTW)
- #include
- #elif not defined(SINGLE_PRECISION) and not defined(SINGLETHREAD_FFTW)
- #include
- #include
- #elif not defined(SINGLE_PRECISION) and defined(SINGLETHREAD_FFTW)
- #include
- #endif
-#endif
+using fftw_plan_t = FFTW_GEN_NAME(FFTW_PREFIX, plan);
-#ifdef SINGLE_PRECISION
- typedef float real_t;
-#else
- typedef double real_t;
-#endif
+#define RE(x) ((x)[0])
+#define IM(x) ((x)[1])
+#include
#include
using vec3_t = std::array;
-#ifdef FFTW3
- #define RE(x) ((x)[0])
- #define IM(x) ((x)[1])
-#else
- #define RE(x) ((x).re)
- #define IM(x) ((x).im)
-#endif
-
-#if defined(FFTW3) && defined(SINGLE_PRECISION)
-#define fftw_complex fftwf_complex
-#endif
-
-
-
-#include
-
-#include "config_file.hh"
-//#include "mesh.hh"
-
-
+namespace CONFIG
+{
+// extern int MPI_thread_support;
+// extern int MPI_task_rank;
+// extern int MPI_task_size;
+// extern bool MPI_ok;
+// extern bool MPI_threads_ok;
+extern bool FFTW_threads_ok;
+extern int num_threads;
+} // namespace CONFIG
//! compute square of argument
template< typename T >
@@ -180,6 +161,3 @@ inline bool is_number(const std::string& s)
return true;
}
-
-
-#endif
diff --git a/src/main.cc b/src/main.cc
index ca316a8..b18fced 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -13,6 +13,8 @@
#include
#include
+#include
+
#include
#include
#include
@@ -26,25 +28,40 @@ extern "C"
}
#endif
-#include "general.hh"
-#include "defaults.hh"
-#include "output.hh"
+#include
+#include
-#include "config_file.hh"
+#include
+#include
+#include
-#include "poisson.hh"
-#include "mg_solver.hh"
-#include "fd_schemes.hh"
-#include "random.hh"
-#include "densities.hh"
+#include
-#include "convolution_kernel.hh"
-#include "cosmology.hh"
-#include "transfer_function.hh"
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
#define THE_CODE_NAME "music!"
#define THE_CODE_VERSION "2.0a"
+// initialise with "default" values
+namespace CONFIG{
+// int MPI_thread_support = -1;
+// int MPI_task_rank = 0;
+// int MPI_task_size = 1;
+// bool MPI_ok = false;
+// bool MPI_threads_ok = false;
+bool FFTW_threads_ok = false;
+int num_threads = 1;
+}
+
+
namespace music
{
@@ -87,11 +104,6 @@ void splash(void)
#if defined(CMAKE_BUILD)
music::ilog.Print("Version built from git rev.: %s, tag: %s, branch: %s", GIT_REV, GIT_TAG, GIT_BRANCH);
-#endif
-#if defined(SINGLE_PRECISION)
- music::ilog.Print("Version was compiled for single precision.");
-#else
- music::ilog.Print("Version was compiled for double precision.");
#endif
std::cout << "\n\n";
}
@@ -294,6 +306,50 @@ void add_constant_value( grid_hierarchy &u, const double val )
}
}
+#include
+void output_system_info()
+{
+ std::feclearexcept(FE_ALL_EXCEPT);
+
+ //------------------------------------------------------------------------------
+ // Write code configuration to screen
+ //------------------------------------------------------------------------------
+ // hardware related infos
+ music::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl;
+
+ // multi-threading related infos
+ music::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl;
+
+ // memory related infos
+ SystemStat::Memory mem;
+
+ unsigned availpmem = mem.get_AvailMem()/1024/1024;
+ unsigned usedpmem = mem.get_UsedMem()/1024/1024;
+ unsigned maxpmem = availpmem, minpmem = availpmem;
+ unsigned maxupmem = usedpmem, minupmem = usedpmem;
+
+ music::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl;
+ music::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl;
+ music::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " << "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl;
+
+ // Kernel related infos
+ SystemStat::Kernel kern;
+ auto kinfo = kern.get_kernel_info();
+ music::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl;
+
+ // FFTW related infos
+ music::ilog << std::setw(32) << std::left << "FFTW version" << " : " << FFTW_API(version) << std::endl;
+ music::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl;
+ music::ilog << std::setw(32) << std::left << "FFTW mode" << " : ";
+#if defined(FFTW_MODE_PATIENT)
+ music::ilog << "FFTW_PATIENT" << std::endl;
+#elif defined(FFTW_MODE_MEASURE)
+ music::ilog << "FFTW_MEASURE" << std::endl;
+#else
+ music::ilog << "FFTW_ESTIMATE" << std::endl;
+#endif
+}
+
/*****************************************************************************************************/
/*****************************************************************************************************/
/*****************************************************************************************************/
@@ -342,25 +398,6 @@ int main(int argc, const char *argv[])
music::ulog.Print("Running %s, version %s", THE_CODE_NAME, THE_CODE_VERSION);
music::ulog.Print("Log is for run started %s", asctime(localtime(<ime)));
-#ifdef FFTW3
- music::ulog.Print("Code was compiled using FFTW version 3.x");
-#else
- music::ulog.Print("Code was compiled using FFTW version 2.x");
-#endif
-
-#ifdef SINGLETHREAD_FFTW
- music::ulog.Print("Code was compiled for single-threaded FFTW");
-#else
- music::ulog.Print("Code was compiled for multi-threaded FFTW");
- music::ulog.Print("Running with a maximum of %d OpenMP threads", omp_get_max_threads());
-#endif
-
-#ifdef SINGLE_PRECISION
- music::ulog.Print("Code was compiled for single precision.");
-#else
- music::ulog.Print("Code was compiled for double precision.");
-#endif
-
//------------------------------------------------------------------------------
//... read and interpret config file
//------------------------------------------------------------------------------
@@ -369,6 +406,13 @@ int main(int argc, const char *argv[])
bool force_shift(false);
double boxlength;
+
+ //------------------------------------------------------------------------------
+ //... init multi-threading
+ //------------------------------------------------------------------------------
+ CONFIG::FFTW_threads_ok = FFTW_API(init_threads)();
+ CONFIG::num_threads = cf.get_value_safe("execution", "NumThreads",std::thread::hardware_concurrency());
+
//------------------------------------------------------------------------------
//... initialize some parameters about grid set-up
//------------------------------------------------------------------------------
@@ -403,24 +447,6 @@ int main(int argc, const char *argv[])
else
music::ilog.Print("Using real space sampled transfer functions...");
- //------------------------------------------------------------------------------
- //... initialize multithread FFTW
- //------------------------------------------------------------------------------
-
-#if not defined(SINGLETHREAD_FFTW)
-#ifdef FFTW3
-#ifdef SINGLE_PRECISION
- fftwf_init_threads();
- fftwf_plan_with_nthreads(omp_get_max_threads());
-#else
- fftw_init_threads();
- fftw_plan_with_nthreads(omp_get_max_threads());
-#endif
-#else
- fftw_threads_init();
-#endif
-#endif
-
//------------------------------------------------------------------------------
//... initialize cosmology
//------------------------------------------------------------------------------
@@ -1373,13 +1399,8 @@ int main(int argc, const char *argv[])
delete the_transfer_function_plugin;
delete the_poisson_solver;
-#if defined(FFTW3) and not defined(SINGLETHREAD_FFTW)
-#ifdef SINGLE_PRECISION
- fftwf_cleanup_threads();
-#else
- fftw_cleanup_threads();
-#endif
-#endif
+ if( CONFIG::FFTW_threads_ok )
+ FFTW_API(cleanup_threads)();
//------------------------------------------------------------------------------
//... we are done !
diff --git a/src/mg_interp.hh b/src/mg_interp.hh
index c90115c..7a601da 100644
--- a/src/mg_interp.hh
+++ b/src/mg_interp.hh
@@ -290,12 +290,12 @@ struct cubic_interp
{
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz);
- fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1);
- coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -312,12 +312,12 @@ struct cubic_interp
{
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz);
- fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1);
- coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -338,12 +338,12 @@ struct cubic_interp
{
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1);
- fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1);
- coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -359,12 +359,12 @@ struct cubic_interp
{
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz);
- fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz);
- fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1);
- coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -384,12 +384,12 @@ struct cubic_interp
{
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1);
- fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1);
- coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -405,12 +405,12 @@ struct cubic_interp
{
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz);
- fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz);
- fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz);
- coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -717,13 +717,13 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz);
- fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1);
fine_flux /= 4.0;
- coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
dflux = coarse_flux - fine_flux;
@@ -758,12 +758,12 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz);
- fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1);
- coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -798,12 +798,12 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1);
- fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1);
- coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -838,12 +838,12 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz);
- fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz);
- fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1);
- coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -880,12 +880,12 @@ struct interp_O5_fluxcorr
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1);
- fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1);
- fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1);
+ fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1);
+ fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1);
- coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -920,12 +920,12 @@ struct interp_O5_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz);
- fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz);
- fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz);
- fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz);
+ fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz);
+ fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz);
- coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -1027,13 +1027,13 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz);
- fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz);
- fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz+1);
- fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz+1);
+ fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz);
+ fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz);
+ fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz+1);
+ fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz+1);
fine_flux /= 4.0;
- coarse_flux = Laplace_flux_O6().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O6().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0;
dflux = coarse_flux - fine_flux;
@@ -1074,12 +1074,12 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz);
- fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz);
- fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz+1);
- fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz+1);
+ fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz);
+ fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz);
+ fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz+1);
+ fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz+1);
- coarse_flux = Laplace_flux_O6().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O6().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -1119,12 +1119,12 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz);
- fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz);
- fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz+1);
- fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz+1);
+ fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz);
+ fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz);
+ fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz+1);
+ fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz+1);
- coarse_flux = Laplace_flux_O6().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
+ coarse_flux = Laplace_flux_O6().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -1164,12 +1164,12 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz);
- fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz);
- fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz+1);
- fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz+1);
+ fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz);
+ fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz);
+ fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz+1);
+ fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz+1);
- coarse_flux = Laplace_flux_O6().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O6().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -1210,12 +1210,12 @@ struct interp_O7_fluxcorr
fine_flux = 0.0;
- fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy,iz+1);
- fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy,iz+1);
- fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy+1,iz+1);
- fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy+1,iz+1);
+ fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy,iz+1);
+ fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy,iz+1);
+ fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy+1,iz+1);
+ fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy+1,iz+1);
- coarse_flux = Laplace_flux_O6().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
+ coarse_flux = Laplace_flux_O6().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
@@ -1255,12 +1255,12 @@ struct interp_O7_fluxcorr
}
fine_flux = 0.0;
- fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy,iz);
- fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy,iz);
- fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy+1,iz);
- fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy+1,iz);
+ fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy,iz);
+ fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy,iz);
+ fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy+1,iz);
+ fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy+1,iz);
- coarse_flux = Laplace_flux_O6().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
+ coarse_flux = Laplace_flux_O6().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0;
fine_flux /= 4.0;
dflux = coarse_flux - fine_flux;
diff --git a/src/mg_solver.hh b/src/mg_solver.hh
index b2d53d2..c39cd23 100644
--- a/src/mg_solver.hh
+++ b/src/mg_solver.hh
@@ -1,37 +1,43 @@
/*
-
+
mg_solver.hh - This file is part of MUSIC -
- a code to generate multi-scale initial conditions
- for cosmological simulations
-
+ a code to generate multi-scale initial conditions
+ for cosmological simulations
+
Copyright (C) 2010 Oliver Hahn
-
+
*/
-#ifndef __MG_SOLVER_HH
-#define __MG_SOLVER_HH
+#pragma once
#include
#include
-#include "mg_operators.hh"
-#include "mg_interp.hh"
+#include
+#include
-#include "mesh.hh"
+#include
-#define BEGIN_MULTIGRID_NAMESPACE namespace multigrid {
+#define BEGIN_MULTIGRID_NAMESPACE \
+ namespace multigrid \
+ {
#define END_MULTIGRID_NAMESPACE }
BEGIN_MULTIGRID_NAMESPACE
-
+
//! options for multigrid smoothing operation
-namespace opt {
- enum smtype { sm_jacobi, sm_gauss_seidel, sm_sor };
+namespace opt
+{
+ enum smtype
+ {
+ sm_jacobi,
+ sm_gauss_seidel,
+ sm_sor
+ };
}
-
//! actual implementation of FAS adaptive multigrid solver
-template< class S, class I, class O, typename T=double >
+template
class solver
{
public:
@@ -40,229 +46,214 @@ public:
typedef I interp;
protected:
- scheme m_scheme; //!< finite difference scheme
- mgop m_gridop; //!< grid prolongation and restriction operator
- unsigned m_npresmooth, //!< number of pre sweeps
- m_npostsmooth; //!< number of post sweeps
- opt::smtype m_smoother; //!< smoothing method to be applied
- unsigned m_ilevelmin; //!< index of the top grid level
-
- const static bool m_bperiodic = true; //!< flag whether top grid is periodic
-
- std::vector m_residu_ini; //!< vector of initial residuals for each level
- bool m_is_ini; //!< bool that is true for first iteration
+ scheme m_scheme; //!< finite difference scheme
+ mgop m_gridop; //!< grid prolongation and restriction operator
+ unsigned m_npresmooth, //!< number of pre sweeps
+ m_npostsmooth; //!< number of post sweeps
+ opt::smtype m_smoother; //!< smoothing method to be applied
+ unsigned m_ilevelmin; //!< index of the top grid level
+
+ const static bool m_bperiodic = true; //!< flag whether top grid is periodic
+
+ std::vector m_residu_ini; //!< vector of initial residuals for each level
+ bool m_is_ini; //!< bool that is true for first iteration
+
+ GridHierarchy