From 6b32a7d6e46e839199f4d846a08c5325d22d7471 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Tue, 14 Feb 2023 17:12:19 -0800 Subject: [PATCH] WIP refactoring. Got rid of FFTW2, some of the old single/double precision templating,... --- CMakeLists.txt | 129 +- FindFFTW3.cmake | 11 + src/TransferFunction.hh | 2 +- src/cmake_config.hh.in | 25 + src/constraints.cc | 4 +- src/constraints.hh | 523 ++-- src/convolution_kernel.cc | 88 +- src/convolution_kernel.hh | 1 - src/cosmology.cc | 815 +++---- src/densities.cc | 183 +- src/fd_schemes.hh | 47 +- src/general.hh | 98 +- src/main.cc | 143 +- src/mg_interp.hh | 180 +- src/mg_solver.hh | 957 ++++---- src/plugins/output_enzo.cc | 12 +- src/plugins/output_gadget2.cc | 2 - src/plugins/output_gadget2_2comp.cc | 2272 +++++++++--------- src/plugins/output_gadget_tetmesh.cc | 2 - src/plugins/output_tipsy.cc | 2 - src/plugins/output_tipsy_resample.cc | 2 - src/plugins/random_music_wnoise_generator.cc | 196 +- src/plugins/random_panphasia.cc | 88 +- src/poisson.cc | 425 +--- src/solver.hh | 1602 ++++++------ src/system_stat.hh | 194 ++ src/transfer_function.hh | 51 +- 27 files changed, 3654 insertions(+), 4400 deletions(-) create mode 100644 src/cmake_config.hh.in create mode 100644 src/system_stat.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 10485a1..bd95567 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,21 @@ -cmake_minimum_required(VERSION 3.9) +# This file is part of MUSIC2 +# A software package to generate ICs for cosmological simulations +# Copyright (C) 2023 by Oliver Hahn +# +# monofonIC is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# monofonIC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +cmake_minimum_required(VERSION 3.11) set(PRGNAME MUSIC) project(MUSIC) @@ -27,12 +44,10 @@ mark_as_advanced(CMAKE_CXX_FLAGS_DEBUGSANADD CMAKE_CXX_FLAGS_DEBUGSANUNDEF) mark_as_advanced(CMAKE_C_FLAGS_DEBUGSANADD CMAKE_C_FLAGS_DEBUGSANUNDEF) mark_as_advanced(CMAKE_EXECUTABLE_FORMAT CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT) -# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -march=native -Wall -pedantic -DCMAKE_BUILD") find_package(PkgConfig REQUIRED) set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}") -option(MUSIC_ENABLE_SINGLE_PRECISION "Enable Single Precision Mode" OFF) ######################################################################################################################## # OpenMP @@ -52,12 +67,44 @@ find_package(Threads REQUIRED) if(POLICY CMP0074) cmake_policy(SET CMP0074 NEW) endif() -find_package(FFTW3 COMPONENTS SINGLE DOUBLE THREADS) +if(ENABLE_MPI) + find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS MPI) +else() + find_package(FFTW3 COMPONENTS SINGLE DOUBLE LONGDOUBLE OPENMP THREADS) +endif(ENABLE_MPI) +mark_as_advanced(FFTW3_SINGLE_MPI_LIBRARY FFTW3_SINGLE_OPENMP_LIBRARY FFTW3_SINGLE_SERIAL_LIBRARY FFTW3_SINGLE_THREADS_LIBRARY) +mark_as_advanced(FFTW3_DOUBLE_MPI_LIBRARY FFTW3_DOUBLE_OPENMP_LIBRARY FFTW3_DOUBLE_SERIAL_LIBRARY FFTW3_DOUBLE_THREADS_LIBRARY) +mark_as_advanced(FFTW3_LONGDOUBLE_MPI_LIBRARY FFTW3_LONGDOUBLE_OPENMP_LIBRARY FFTW3_LONGDOUBLE_SERIAL_LIBRARY FFTW3_LONGDOUBLE_THREADS_LIBRARY) +mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_MPI_INCLUDE_DIR) +mark_as_advanced(pkgcfg_lib_PC_FFTW_fftw3) ######################################################################################################################## # TIRPC, needed only for Tipsy format find_package(TIRPC) +######################################################################################################################## +# GSL +find_package(GSL REQUIRED) +mark_as_advanced(pkgcfg_lib_GSL_gsl pkgcfg_lib_GSL_gslcblas pkgcfg_lib_GSL_m) + +######################################################################################################################## +# HDF5 +find_package(HDF5) +if( HDF5_FOUND ) + mark_as_advanced(HDF5_C_LIBRARY_dl HDF5_C_LIBRARY_hdf5 HDF5_C_LIBRARY_m HDF5_C_LIBRARY_pthread HDF5_C_LIBRARY_z HDF5_C_LIBRARY_sz) +endif() + +######################################################################################################################## +# floating point precision +set ( + CODE_PRECISION "DOUBLE" + CACHE STRING "Floating point type used for internal computations and FFTs" +) +set_property ( + CACHE CODE_PRECISION + PROPERTY STRINGS FLOAT DOUBLE LONGDOUBLE +) + ######################################################################################################################## # Add a custom command that produces version.cc, plus # a dummy output that's not actually produced, in order @@ -68,16 +115,6 @@ ADD_CUSTOM_COMMAND( COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/version.cmake) - - -######################################################################################################################## -# GSL -find_package(GSL REQUIRED) - -######################################################################################################################## -# HDF5 -find_package(HDF5) - ######################################################################################################################## # INCLUDES include_directories(${PROJECT_SOURCE_DIR}/src) @@ -112,40 +149,38 @@ list (APPEND SOURCES # target_include_directories(${PRGNAME} PRIVATE ${PROJECT_SOURCE_DIR}/external/panphasia_ho) endif(ENABLE_PANPHASIA) +# project configuration header +configure_file( + ${PROJECT_SOURCE_DIR}/src/cmake_config.hh.in + ${PROJECT_SOURCE_DIR}/src/cmake_config.hh +) + add_executable(${PRGNAME} ${SOURCES} ${PLUGINS}) -set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 11) +set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 14) -if(FFTW3_FOUND) - target_compile_options(${PRGNAME} PRIVATE "-DFFTW3") - - if( MUSIC_ENABLE_SINGLE_PRECISION ) - target_compile_options(${PRGNAME} PRIVATE "-DSINGLE_PRECISION") - if (FFTW3_SINGLE_THREADS_FOUND) - target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_THREADS_LIBRARY}) - target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS") - elseif(FFTW3_SINGLE_SERIAL_FOUND) - target_link_libraries(${PRGNAME} ${FFTW3_SINGLE_SERIAL_LIBRARY}) - message( WARNING "using serial version of FFTW3 -- this will most likely cause a very slow version of MUSIC. Rec: install FFTW3 with thread support") - else() - message( FATAL "chose compilation in single precision, but FFTW3 not found for single precision") - endif() - else(MUSIC_ENABLE_SINGLE_PRECISION) - if (FFTW3_DOUBLE_THREADS_FOUND) - target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_THREADS_LIBRARY}) - target_compile_options(${PRGNAME} PRIVATE "-DUSE_FFTW_THREADS") - elseif(FFTW3_DOUBLE_SERIAL_FOUND) - target_link_libraries(${PRGNAME} ${FFTW3_DOUBLE_SERIAL_LIBRARY}) - message( WARNING "using serial version of FFTW3 -- this will most likely cause a very slow version of MUSIC. Rec: install FFTW3 with thread support") - else() - message( FATAL "chose compilation in double precision, but FFTW3 not found for double precision") - endif() - endif(MUSIC_ENABLE_SINGLE_PRECISION) -endif(FFTW3_FOUND) +if(CODE_PRECISION STREQUAL "FLOAT") + if(FFTW3_SINGLE_THREADS_FOUND) + target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_SINGLE_THREADS) + target_compile_definitions(${PRGNAME} PRIVATE "USE_FFTW_THREADS") + endif() + target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_SINGLE_SERIAL) +elseif(CODE_PRECISION STREQUAL "DOUBLE") + if(FFTW3_DOUBLE_THREADS_FOUND) + target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_DOUBLE_THREADS) + target_compile_definitions(${PRGNAME} PRIVATE "USE_FFTW_THREADS") + endif() + target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_DOUBLE_SERIAL) +elseif(CODE_PRECISION STREQUAL "LONGDOUBLE") + if(FFTW3_LONGDOUBLE_THREADS_FOUND) + target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_LONGDOUBLE_THREADS) + target_compile_definitions(${PRGNAME} PRIVATE "USE_FFTW_THREADS") + endif() + target_link_libraries(${PRGNAME} PRIVATE FFTW3::FFTW3_LONGDOUBLE_SERIAL) +endif() if(HDF5_FOUND) - # target_link_libraries(${PRGNAME} ${HDF5_C_LIBRARY_DIRS}) - target_link_libraries(${PRGNAME} ${HDF5_LIBRARIES}) + target_link_libraries(${PRGNAME} PRIVATE ${HDF5_LIBRARIES}) target_include_directories(${PRGNAME} PRIVATE ${HDF5_INCLUDE_DIRS}) target_compile_options(${PRGNAME} PRIVATE "-DHAVE_HDF5") target_compile_options(${PRGNAME} PRIVATE "-DH5_USE_16_API") @@ -161,11 +196,5 @@ if(ENABLE_PANPHASIA) target_compile_options(${PRGNAME} PRIVATE "-DHAVE_PANPHASIA") endif(ENABLE_PANPHASIA) -target_link_libraries(${PRGNAME} ${FFTW3_LIBRARIES}) -target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIRS}) +target_link_libraries(${PRGNAME} PRIVATE GSL::gsl) -target_link_libraries(${PRGNAME} ${GSL_LIBRARIES}) -target_include_directories(${PRGNAME} PRIVATE ${GSL_INCLUDE_DIR}) - -target_link_libraries(${PRGNAME} ${HDF5_LIBRARIES}) -target_include_directories(${PRGNAME} PRIVATE ${HDF5_INCLUDE_DIR}) diff --git a/FindFFTW3.cmake b/FindFFTW3.cmake index 0c65570..b69d975 100644 --- a/FindFFTW3.cmake +++ b/FindFFTW3.cmake @@ -230,3 +230,14 @@ find_package_handle_standard_args(FFTW3 VERSION_VAR FFTW3_VERSION_STRING HANDLE_COMPONENTS ) + +if(FFTW3_FOUND) + foreach(component ${FFTW3_FIND_COMPONENTS}) + if(NOT TARGET FFTW3::FFTW3_${component}) + add_library(FFTW3::FFTW3_${component} UNKNOWN IMPORTED) + set_target_properties(FFTW3::FFTW3_${component} PROPERTIES + IMPORTED_LOCATION "${FFTW3_${component}_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES ${FFTW3_INCLUDE_DIR}) + endif() + endforeach() +endif() diff --git a/src/TransferFunction.hh b/src/TransferFunction.hh index 1cce3fb..919ee86 100644 --- a/src/TransferFunction.hh +++ b/src/TransferFunction.hh @@ -130,7 +130,7 @@ protected: double fftnorm = 1.0/N; - fftw_complex in[N], out[N]; + complex_t in[N], out[N]; fftw_plan p,ip; //... perform anti-ringing correction from Hamilton (2000) diff --git a/src/cmake_config.hh.in b/src/cmake_config.hh.in new file mode 100644 index 0000000..5162967 --- /dev/null +++ b/src/cmake_config.hh.in @@ -0,0 +1,25 @@ +#pragma once + + +#define USE_PRECISION_${CODE_PRECISION} + +#ifdef __cplusplus +constexpr char CMAKE_BUILDTYPE_STR[] = "${CMAKE_BUILD_TYPE}"; +#if defined(USE_PRECISION_FLOAT) + constexpr char CMAKE_PRECISION_STR[] = "single"; +#elif defined(USE_PRECISION_DOUBLE) + constexpr char CMAKE_PRECISION_STR[] = "double"; +#elif defined(USE_PRECISION_LONGDOUBLE) + constexpr char CMAKE_PRECISION_STR[] = "long double"; +#endif + +// These variables are autogenerated and compiled +// into the library by the version.cmake script. do not touch! +extern "C" +{ + extern const char *GIT_TAG; + extern const char *GIT_REV; + extern const char *GIT_BRANCH; + +} +#endif // __cplusplus \ No newline at end of file diff --git a/src/constraints.cc b/src/constraints.cc index 479e282..a07d4d3 100644 --- a/src/constraints.cc +++ b/src/constraints.cc @@ -267,7 +267,7 @@ constraint_set::constraint_set( config_file& cf, transfer_function *ptf ) } -void constraint_set::wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector& g0, matrix& cinv, fftw_complex* cw ) +void constraint_set::wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector& g0, matrix& cinv, complex_t* cw ) { double lsub = nx*dx; double dk = 2.0*M_PI/lsub, d3k=dk*dk*dk; @@ -374,7 +374,7 @@ void constraint_set::wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t -void constraint_set::wnoise_constr_corr( double dx, fftw_complex* cw, size_t nx, size_t ny, size_t nz, std::vector& g0 ) +void constraint_set::wnoise_constr_corr( double dx, complex_t* cw, size_t nx, size_t ny, size_t nz, std::vector& g0 ) { size_t nconstr = cset_.size(); size_t nzp=nz/2+1; diff --git a/src/constraints.hh b/src/constraints.hh index a5029f3..66f2871 100644 --- a/src/constraints.hh +++ b/src/constraints.hh @@ -1,118 +1,121 @@ /* - - constraints.hh - This file is part of MUSIC - - a code to generate multi-scale initial conditions - for cosmological simulations - - Copyright (C) 2010 Oliver Hahn - - */ -#ifndef __CONSTRAINTS_HH -#define __CONSTRAINTS_HH + constraints.hh - This file is part of MUSIC - + a code to generate multi-scale initial conditions + for cosmological simulations + + Copyright (C) 2010 Oliver Hahn + + */ +#pragma once #include #include #include -#include "general.hh" -#include "config_file.hh" -#include "transfer_function.hh" -#include "cosmology.hh" - +#include +#include +#include +#include //! matrix class serving as a gsl wrapper class matrix { protected: - gsl_matrix * m_; - //double *data_; + gsl_matrix *m_; + // double *data_; size_t M_, N_; - + public: - matrix( size_t M, size_t N ) - : M_(M), N_(N) + matrix(size_t M, size_t N) + : M_(M), N_(N) { - m_ = gsl_matrix_alloc(M_,N_); + m_ = gsl_matrix_alloc(M_, N_); } - - matrix( size_t N ) - : M_(N), N_(N) + + matrix(size_t N) + : M_(N), N_(N) { - m_ = gsl_matrix_alloc(M_,N_); + m_ = gsl_matrix_alloc(M_, N_); } - - matrix( const matrix& o ) + + matrix(const matrix &o) { M_ = o.M_; N_ = o.N_; - m_ = gsl_matrix_alloc(M_,N_); - gsl_matrix_memcpy(m_, o.m_ ); + m_ = gsl_matrix_alloc(M_, N_); + gsl_matrix_memcpy(m_, o.m_); } - + ~matrix() { - gsl_matrix_free( m_ ); + gsl_matrix_free(m_); } - - double& operator()( size_t i, size_t j ) - { return *gsl_matrix_ptr( m_, i, j ); } - - const double& operator()( size_t i, size_t j ) const - { return *gsl_matrix_const_ptr( m_, i, j ); } - - matrix& operator=( const matrix& o ) + + double &operator()(size_t i, size_t j) { - gsl_matrix_free( m_ ); - + return *gsl_matrix_ptr(m_, i, j); + } + + const double &operator()(size_t i, size_t j) const + { + return *gsl_matrix_const_ptr(m_, i, j); + } + + matrix &operator=(const matrix &o) + { + gsl_matrix_free(m_); + M_ = o.M_; N_ = o.N_; - m_ = gsl_matrix_alloc(M_,N_); - gsl_matrix_memcpy(m_, o.m_ ); + m_ = gsl_matrix_alloc(M_, N_); + gsl_matrix_memcpy(m_, o.m_); return *this; } - - - matrix& invert() + + matrix &invert() { - if( M_!=N_ ) + if (M_ != N_) throw std::runtime_error("Attempt to invert a non-square matrix!"); - + int s; - gsl_matrix* im = gsl_matrix_alloc(M_,N_); - - gsl_permutation * p = gsl_permutation_alloc (M_); - gsl_linalg_LU_decomp( m_, p, &s ); - gsl_linalg_LU_invert( m_, p, im ); - + gsl_matrix *im = gsl_matrix_alloc(M_, N_); + + gsl_permutation *p = gsl_permutation_alloc(M_); + gsl_linalg_LU_decomp(m_, p, &s); + gsl_linalg_LU_invert(m_, p, im); + gsl_matrix_memcpy(m_, im); - + gsl_permutation_free(p); gsl_matrix_free(im); return *this; } }; - //! class to impose constraints on the white noise field (van de Weygaert & Bertschinger 1996) class constraint_set { - + public: - enum constr_type{ halo, peak }; - + enum constr_type + { + halo, + peak + }; + protected: - - struct constraint{ + struct constraint + { constr_type type; - double x,y,z; - double gx,gy,gz; + double x, y, z; + double gx, gy, gz; double Rg, Rg2; double gRg, gRg2; double sigma; }; - + config_file *pcf_; std::vector cset_; transfer_function *ptf_; @@ -120,303 +123,185 @@ protected: Cosmology *pcosmo_; double dplus0_; unsigned constr_level_; - - - inline std::complex eval_constr( size_t icon, double kx, double ky, double kz ) + + inline std::complex eval_constr(size_t icon, double kx, double ky, double kz) { double re, im, kdotx, k2; - - kdotx = cset_[icon].gx*kx+cset_[icon].gy*ky+cset_[icon].gz*kz; - k2 = kx*kx+ky*ky+kz*kz; - - re = im = exp(-k2*cset_[icon].gRg2/2.0); - re *= cos( kdotx ); - im *= sin( kdotx ); - - return std::complex(re,im); + + kdotx = cset_[icon].gx * kx + cset_[icon].gy * ky + cset_[icon].gz * kz; + k2 = kx * kx + ky * ky + kz * kz; + + re = im = exp(-k2 * cset_[icon].gRg2 / 2.0); + re *= cos(kdotx); + im *= sin(kdotx); + + return std::complex(re, im); } - - -#if defined(FFTW3) && defined(SINGLE_PRECISION) - //! apply constraints to the white noise - void wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector& g0, matrix& cinv, fftwf_complex* cw ); - + void wnoise_constr_corr(double dx, size_t nx, size_t ny, size_t nz, std::vector &g0, matrix &cinv, complex_t *cw); + //! measure sigma for each constraint in the unconstrained noise - void wnoise_constr_corr( double dx, fftwf_complex* cw, size_t nx, size_t ny, size_t nz, std::vector& g0 ); - -#else - //! apply constraints to the white noise - void wnoise_constr_corr( double dx, size_t nx, size_t ny, size_t nz, std::vector& g0, matrix& cinv, fftw_complex* cw ); - - //! measure sigma for each constraint in the unconstrained noise - void wnoise_constr_corr( double dx, fftw_complex* cw, size_t nx, size_t ny, size_t nz, std::vector& g0 ); - -#endif - + void wnoise_constr_corr(double dx, complex_t *cw, size_t nx, size_t ny, size_t nz, std::vector &g0); + //! compute the covariance between the constraints - void icov_constr( double dx, size_t nx, size_t ny, size_t nz, matrix& cij ); - - + void icov_constr(double dx, size_t nx, size_t ny, size_t nz, matrix &cij); + public: - - - //! constructor - constraint_set( config_file& cf, transfer_function *ptf ); - + //! constructor + constraint_set(config_file &cf, transfer_function *ptf); + //! destructor ~constraint_set() { delete pccalc_; delete pcosmo_; } - - - template< typename rng > - void apply( unsigned ilevel, int x0[], int lx[], rng* wnoise ) + + template + void apply(unsigned ilevel, int x0[], int lx[], rng *wnoise) { - if( cset_.size() == 0 || constr_level_ != ilevel ) + if (cset_.size() == 0 || constr_level_ != ilevel) return; - - unsigned nlvl = 1<get_value("setup","boxlength"); - + + unsigned nlvl = 1 << ilevel; + double boxlength = pcf_->get_value("setup", "boxlength"); + //... compute constraint coordinates for grid - for( size_t i=0; i 0.5*lx[0]) - music::wlog.Print("Constraint %d appears to be too large scale",i); - } - - - std::vector g0; - -// unsigned levelmax = pcf_->get_value("setup","levelmax"); - unsigned levelmin = pcf_->get_value("setup","levelmin_TF"); - - bool bperiodic = ilevel==levelmin; - double dx = pcf_->get_value("setup","boxlength")/(1< 0.5 * lx[0]) + music::wlog.Print("Constraint %d appears to be too large scale", i); + } + + std::vector g0; + + // unsigned levelmax = pcf_->get_value("setup","levelmax"); + unsigned levelmin = pcf_->get_value("setup", "levelmin_TF"); + + bool bperiodic = ilevel == levelmin; + double dx = pcf_->get_value("setup", "boxlength") / (1 << ilevel); + + music::ilog.Print("Computing constrained realization..."); + + if (bperiodic) { //... we are operating on the periodic coarse grid - size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz+2; - fftw_real * w = new fftw_real[nx*ny*nzp]; - - -#ifdef FFTW3 - #ifdef SINGLE_PRECISION - fftwf_complex * cw = reinterpret_cast (w); - fftwf_plan p = fftwf_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE), - ip = fftwf_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE); - #else - fftw_complex * cw = reinterpret_cast (w); - fftw_plan p = fftw_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE), - ip = fftw_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE); - #endif -#else - fftw_complex * cw = reinterpret_cast (w); - rfftwnd_plan p = rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE), - ip = rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); -#endif - - double fftnorm = 1.0/sqrt(nx*ny*nz); - - #pragma omp parallel for - for( int i=0; i<(int)nx; i++ ) - for( int j=0; j<(int)ny; j++ ) - for( int k=0; k<(int)nz; k++ ) + size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz + 2; + real_t *w = new real_t[nx * ny * nzp]; + + complex_t *cw = reinterpret_cast(w); + fftw_plan_t p = FFTW_API(plan_dft_r2c_3d)(nx, ny, nz, w, cw, FFTW_ESTIMATE), + ip = FFTW_API(plan_dft_c2r_3d)(nx, ny, nz, cw, w, FFTW_ESTIMATE); + + double fftnorm = 1.0 / sqrt(nx * ny * nz); + +#pragma omp parallel for + for (int i = 0; i < (int)nx; i++) + for (int j = 0; j < (int)ny; j++) + for (int k = 0; k < (int)nz; k++) { - size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k; - w[q] = (*wnoise)((x0[0]+i)%nx,(x0[1]+j)%ny,(x0[2]+k)%nz)*fftnorm; + size_t q = ((size_t)i * ny + (size_t)j) * nzp + (size_t)k; + w[q] = (*wnoise)((x0[0] + i) % nx, (x0[1] + j) % ny, (x0[2] + k) % nz) * fftnorm; } - -#ifdef FFTW3 - #ifdef SINGLE_PRECISION - fftwf_execute( p ); - #else - fftw_execute( p ); - #endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), p, w, NULL ); -#else - rfftwnd_one_real_to_complex( p, w, NULL ); -#endif -#endif - wnoise_constr_corr( dx, cw, nx, ny, nz, g0 ); - - matrix c(2,2); - icov_constr( dx, nx, ny, nz, c ); - - - wnoise_constr_corr( dx, nx, ny, nz, g0, c, cw ); - -#ifdef FFTW3 - #ifdef SINGLE_PRECISION - fftwf_execute( ip ); - #else - fftw_execute( ip ); - #endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), ip, cw, NULL ); -#else - rfftwnd_one_complex_to_real( ip, cw, NULL ); -#endif -#endif - - #pragma omp parallel for - for( int i=0; i<(int)nx; i++ ) - for( int j=0; j<(int)ny; j++ ) - for( int k=0; k<(int)nz; k++ ) + + FFTW_API(execute)(p); + wnoise_constr_corr(dx, cw, nx, ny, nz, g0); + + matrix c(2, 2); + icov_constr(dx, nx, ny, nz, c); + + wnoise_constr_corr(dx, nx, ny, nz, g0, c, cw); + + FFTW_API(execute)(ip); + +#pragma omp parallel for + for (int i = 0; i < (int)nx; i++) + for (int j = 0; j < (int)ny; j++) + for (int k = 0; k < (int)nz; k++) { - size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k; - (*wnoise)((x0[0]+i),(x0[1]+j),(x0[2]+k)) = w[q]*fftnorm; + size_t q = ((size_t)i * ny + (size_t)j) * nzp + (size_t)k; + (*wnoise)((x0[0] + i), (x0[1] + j), (x0[2] + k)) = w[q] * fftnorm; } - - music::ilog.Print("Applied constraints to level %d.",ilevel); - - + + music::ilog.Print("Applied constraints to level %d.", ilevel); + delete[] w; - - -#ifdef FFTW3 - #ifdef SINGLE_PRECISION - fftwf_destroy_plan(p); - #else - fftw_destroy_plan(p); - #endif -#else - fftwnd_destroy_plan(p); -#endif - }else{ - + + FFTW_API(destroy_plan)(p); + FFTW_API(destroy_plan)(ip); + } + else + { + //... we are operating on a refinement grid, not necessarily the finest - - size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz+2; - fftw_real * w = new fftw_real[nx*ny*nzp]; - - -#ifdef FFTW3 - #ifdef SINGLE_PRECISION - fftwf_complex * cw = reinterpret_cast (w); - fftwf_plan p = fftwf_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE), - ip = fftwf_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE); - #else - fftw_complex * cw = reinterpret_cast (w); - fftw_plan p = fftw_plan_dft_r2c_3d( nx, ny, nz, w, cw, FFTW_ESTIMATE), - ip = fftw_plan_dft_c2r_3d( nx, ny, nz, cw, w, FFTW_ESTIMATE); - #endif -#else - fftw_complex * cw = reinterpret_cast (w); - rfftwnd_plan p = rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE), - ip = rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); -#endif - - double fftnorm = 1.0/sqrt(nx*ny*nz); - - int il = nx/4, ir = 3*nx/4, jl=ny/4, jr = 3*ny/4, kl = nz/4, kr = 3*nz/4; - - #pragma omp parallel for - for( int i=0; i<(int)nx; i++ ) - for( int j=0; j<(int)ny; j++ ) - for( int k=0; k<(int)nz; k++ ) + + size_t nx = lx[0], ny = lx[1], nz = lx[2], nzp = nz + 2; + real_t *w = new real_t[nx * ny * nzp]; + + complex_t *cw = reinterpret_cast(w); + fftw_plan_t p = FFTW_API(plan_dft_r2c_3d)(nx, ny, nz, w, cw, FFTW_ESTIMATE), + ip = FFTW_API(plan_dft_c2r_3d)(nx, ny, nz, cw, w, FFTW_ESTIMATE); + + double fftnorm = 1.0 / sqrt(nx * ny * nz); + + int il = nx / 4, ir = 3 * nx / 4, jl = ny / 4, jr = 3 * ny / 4, kl = nz / 4, kr = 3 * nz / 4; + +#pragma omp parallel for + for (int i = 0; i < (int)nx; i++) + for (int j = 0; j < (int)ny; j++) + for (int k = 0; k < (int)nz; k++) { - size_t q = ((size_t)i*ny+(size_t)j)*nzp+(size_t)k; - - if( i>=il && i=jl && j=kl && k= il && i < ir && j >= jl && j < jr && k >= kl && k < kr) + w[q] = (*wnoise)((x0[0] + i), (x0[1] + j), (x0[2] + k)) * fftnorm; else w[q] = 0.0; - } - - int nlvl05 = 1<<(ilevel-1); - int xs = nlvl05-x0[0], ys = nlvl05-x0[1], zs = nlvl05-x0[2]; - - for( size_t i=0; i=il && i=jl && j=kl && k= il && i < ir && j >= jl && j < jr && k >= kl && k < kr) + (*wnoise)((x0[0] + i), (x0[1] + j), (x0[2] + k)) = w[q] * fftnorm; } - - music::ilog.Print("Applied constraints to level %d.",ilevel); - + music::ilog.Print("Applied constraints to level %d.", ilevel); + delete[] w; - - -#ifdef FFTW3 - #ifdef SINGLE_PRECISION - fftwf_destroy_plan(p); - #else - fftw_destroy_plan(p); - #endif -#else - fftwnd_destroy_plan(p); -#endif - + + FFTW_API(destroy_plan)(p); + FFTW_API(destroy_plan)(ip); } - } - }; - - -#endif // __CONSTRAINTS_HH diff --git a/src/convolution_kernel.cc b/src/convolution_kernel.cc index 8bec532..9b4acb1 100644 --- a/src/convolution_kernel.cc +++ b/src/convolution_kernel.cc @@ -7,13 +7,9 @@ */ -#include "general.hh" -#include "densities.hh" -#include "convolution_kernel.hh" - -#if defined(FFTW3) && defined(SINGLE_PRECISION) -typedef fftw_complex fftwf_complex; -#endif +#include +#include +#include namespace convolution { @@ -25,7 +21,6 @@ get_kernel_map() return kernel_map; } -template void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip) { //return; @@ -34,49 +29,26 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip) double fftnormp = 1.0/sqrt((double)cparam_.nx * (double)cparam_.ny * (double)cparam_.nz); double fftnorm = pow(2.0 * M_PI, 1.5) / sqrt(cparam_.lx * cparam_.ly * cparam_.lz) * fftnormp; - fftw_complex *cdata; - [[maybe_unused]] fftw_complex *ckernel; - fftw_real *data; + complex_t *cdata; + [[maybe_unused]] complex_t *ckernel; + real_t *data; - data = reinterpret_cast(pd); - cdata = reinterpret_cast(data); - ckernel = reinterpret_cast(pk->get_ptr()); + data = reinterpret_cast(pd); + cdata = reinterpret_cast(data); + ckernel = reinterpret_cast(pk->get_ptr()); std::cout << " - Performing density convolution... (" << cparam_.nx << ", " << cparam_.ny << ", " << cparam_.nz << ")\n"; music::ulog.Print("Performing kernel convolution on (%5d,%5d,%5d) grid", cparam_.nx, cparam_.ny, cparam_.nz); music::ulog.Print("Performing forward FFT..."); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan plan, iplan; - plan = fftwf_plan_dft_r2c_3d(cparam_.nx, cparam_.ny, cparam_.nz, data, cdata, FFTW_ESTIMATE); - iplan = fftwf_plan_dft_c2r_3d(cparam_.nx, cparam_.ny, cparam_.nz, cdata, data, FFTW_ESTIMATE); - fftwf_execute(plan); -#else - fftw_plan plan, iplan; - plan = fftw_plan_dft_r2c_3d(cparam_.nx, cparam_.ny, cparam_.nz, data, cdata, FFTW_ESTIMATE); - iplan = fftw_plan_dft_c2r_3d(cparam_.nx, cparam_.ny, cparam_.nz, cdata, data, FFTW_ESTIMATE); + fftw_plan_t plan, iplan; + plan = FFTW_API(plan_dft_r2c_3d)(cparam_.nx, cparam_.ny, cparam_.nz, data, cdata, FFTW_ESTIMATE); + iplan = FFTW_API(plan_dft_c2r_3d)(cparam_.nx, cparam_.ny, cparam_.nz, cdata, data, FFTW_ESTIMATE); - fftw_execute(plan); -#endif -#else - rfftwnd_plan iplan, plan; + FFTW_API(execute)(plan); - plan = rfftw3d_create_plan(cparam_.nx, cparam_.ny, cparam_.nz, - FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); - - iplan = rfftw3d_create_plan(cparam_.nx, cparam_.ny, cparam_.nz, - FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); - -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), plan, data, NULL); -#else - rfftwnd_one_real_to_complex(plan, data, NULL); -#endif - -#endif //..... need a phase shift for baryons for SPH double dstag = 0.0; @@ -163,27 +135,9 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip) music::ulog.Print("Performing backward FFT..."); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(iplan); - fftwf_destroy_plan(plan); - fftwf_destroy_plan(iplan); -#else - fftw_execute(iplan); - fftw_destroy_plan(plan); - fftw_destroy_plan(iplan); - -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), iplan, cdata, NULL); -#else - rfftwnd_one_complex_to_real(iplan, cdata, NULL); -#endif - - rfftwnd_destroy_plan(plan); - rfftwnd_destroy_plan(iplan); -#endif + FFTW_API(execute)(iplan); + FFTW_API(destroy_plan)(plan); + FFTW_API(destroy_plan)(iplan); // set the DC mode here to avoid a possible truncation error in single precision { @@ -196,14 +150,12 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip) } } -template void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip); -template void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip); +void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip); /*****************************************************************************************/ /*** SPECIFIC KERNEL IMPLEMENTATIONS *********************************************/ /*****************************************************************************************/ -template class kernel_k : public kernel { protected: @@ -298,8 +250,4 @@ public: /**************************************************************************************/ /**************************************************************************************/ -namespace -{ -convolution::kernel_creator_concrete> creator_kd("tf_kernel_k_double"); -convolution::kernel_creator_concrete> creator_kf("tf_kernel_k_float"); -} // namespace +convolution::kernel_creator_concrete creator_kd("tf_kernel_k"); diff --git a/src/convolution_kernel.hh b/src/convolution_kernel.hh index 4fe13cd..30bb851 100644 --- a/src/convolution_kernel.hh +++ b/src/convolution_kernel.hh @@ -112,7 +112,6 @@ struct kernel_creator_concrete : public kernel_creator }; //! actual implementation of the FFT convolution (independent of the actual kernel) -template void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip); } //namespace convolution diff --git a/src/cosmology.cc b/src/cosmology.cc index 7286733..73e6a5f 100644 --- a/src/cosmology.cc +++ b/src/cosmology.cc @@ -1,11 +1,11 @@ /* - + cosmology.cc - This file is part of MUSIC - - a code to generate multi-scale initial conditions - for cosmological simulations - + a code to generate multi-scale initial conditions + for cosmological simulations + Copyright (C) 2010 Oliver Hahn - + */ #include "cosmology.hh" @@ -13,301 +13,252 @@ #include "mg_operators.hh" #include "general.hh" -#define ACC(i,j,k) ((*u.get_grid((ilevel)))((i),(j),(k))) -#define SQR(x) ((x)*(x)) +#define ACC(i, j, k) ((*u.get_grid((ilevel)))((i), (j), (k))) +#define SQR(x) ((x) * (x)) -#if defined(FFTW3) && defined(SINGLE_PRECISION) -#define fftw_complex fftwf_complex -#endif - - -void compute_LLA_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order ) +void compute_LLA_density(const grid_hierarchy &u, grid_hierarchy &fnew, unsigned order) { fnew = u; - - for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) - { - double h = pow(2.0,ilevel), h2 = h*h, h2_4 = 0.25*h2; - meshvar_bnd *pvar = fnew.get_grid(ilevel); - - - if( order == 2 ) - { - #pragma omp parallel for //reduction(+:sum_corr,sum,sum2) - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - { - double D[3][3]; - - D[0][0] = (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2; - D[1][1] = (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2; - D[2][2] = (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2; - - D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4; - D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4; - D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4; - - D[0][0] += 1.0; - D[1][1] += 1.0; - D[2][2] += 1.0; - - double det = D[0][0]*D[1][1]*D[2][2] - - D[0][0]*D[1][2]*D[2][1] - - D[1][0]*D[0][1]*D[2][2] - + D[1][0]*D[0][2]*D[1][2] - + D[2][0]*D[0][1]*D[1][2] - - D[2][0]*D[0][2]*D[1][1]; - - (*pvar)(ix,iy,iz) = 1.0/det-1.0; - - } - } - else if ( order == 4 ) - { - #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - { - double D[3][3]; - - D[0][0] = (-ACC(ix-2,iy,iz)+16.*ACC(ix-1,iy,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix+1,iy,iz)-ACC(ix+2,iy,iz)) * h2/12.0; - D[1][1] = (-ACC(ix,iy-2,iz)+16.*ACC(ix,iy-1,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy+1,iz)-ACC(ix,iy+2,iz)) * h2/12.0; - D[2][2] = (-ACC(ix,iy,iz-2)+16.*ACC(ix,iy,iz-1)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy,iz+1)-ACC(ix,iy,iz+2)) * h2/12.0; - - D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4; - D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4; - D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4; - - - D[0][0] += 1.0; - D[1][1] += 1.0; - D[2][2] += 1.0; - - double det = D[0][0]*D[1][1]*D[2][2] - - D[0][0]*D[1][2]*D[2][1] - - D[1][0]*D[0][1]*D[2][2] - + D[1][0]*D[0][2]*D[1][2] - + D[2][0]*D[0][1]*D[1][2] - - D[2][0]*D[0][2]*D[1][1]; - - (*pvar)(ix,iy,iz) = 1.0/det-1.0; - - } - } - else if ( order == 6 ) - { - h2_4/=36.; - h2/=180.; - #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - { - double D[3][3]; - - D[0][0] = (2.*ACC(ix-3,iy,iz)-27.*ACC(ix-2,iy,iz)+270.*ACC(ix-1,iy,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix+1,iy,iz)-27.*ACC(ix+2,iy,iz)+2.*ACC(ix+3,iy,iz)) * h2; - D[1][1] = (2.*ACC(ix,iy-3,iz)-27.*ACC(ix,iy-2,iz)+270.*ACC(ix,iy-1,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy+1,iz)-27.*ACC(ix,iy+2,iz)+2.*ACC(ix,iy+3,iz)) * h2; - D[2][2] = (2.*ACC(ix,iy,iz-3)-27.*ACC(ix,iy,iz-2)+270.*ACC(ix,iy,iz-1)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy,iz+1)-27.*ACC(ix,iy,iz+2)+2.*ACC(ix,iy,iz+3)) * h2; - - //.. this is actually 8th order accurate - D[0][1] = D[1][0] = (64.*(ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz)) - -8.*(ACC(ix-2,iy-1,iz)-ACC(ix+2,iy-1,iz)-ACC(ix-2,iy+1,iz)+ACC(ix+2,iy+1,iz) - + ACC(ix-1,iy-2,iz)-ACC(ix-1,iy+2,iz)-ACC(ix+1,iy-2,iz)+ACC(ix+1,iy+2,iz)) - +1.*(ACC(ix-2,iy-2,iz)-ACC(ix-2,iy+2,iz)-ACC(ix+2,iy-2,iz)+ACC(ix+2,iy+2,iz)))*h2_4; - D[0][2] = D[2][0] = (64.*(ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1)) - -8.*(ACC(ix-2,iy,iz-1)-ACC(ix+2,iy,iz-1)-ACC(ix-2,iy,iz+1)+ACC(ix+2,iy,iz+1) - + ACC(ix-1,iy,iz-2)-ACC(ix-1,iy,iz+2)-ACC(ix+1,iy,iz-2)+ACC(ix+1,iy,iz+2)) - +1.*(ACC(ix-2,iy,iz-2)-ACC(ix-2,iy,iz+2)-ACC(ix+2,iy,iz-2)+ACC(ix+2,iy,iz+2)))*h2_4; - D[1][2] = D[2][1] = (64.*(ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1)) - -8.*(ACC(ix,iy-2,iz-1)-ACC(ix,iy+2,iz-1)-ACC(ix,iy-2,iz+1)+ACC(ix,iy+2,iz+1) - + ACC(ix,iy-1,iz-2)-ACC(ix,iy-1,iz+2)-ACC(ix,iy+1,iz-2)+ACC(ix,iy+1,iz+2)) - +1.*(ACC(ix,iy-2,iz-2)-ACC(ix,iy-2,iz+2)-ACC(ix,iy+2,iz-2)+ACC(ix,iy+2,iz+2)))*h2_4; - - D[0][0] += 1.0; - D[1][1] += 1.0; - D[2][2] += 1.0; - - double det = D[0][0]*D[1][1]*D[2][2] - - D[0][0]*D[1][2]*D[2][1] - - D[1][0]*D[0][1]*D[2][2] - + D[1][0]*D[0][2]*D[1][2] - + D[2][0]*D[0][1]*D[1][2] - - D[2][0]*D[0][2]*D[1][1]; - - (*pvar)(ix,iy,iz) = 1.0/det-1.0; - - } - - }else - throw std::runtime_error("compute_LLA_density : invalid operator order specified"); + for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) + { + double h = pow(2.0, ilevel), h2 = h * h, h2_4 = 0.25 * h2; + meshvar_bnd *pvar = fnew.get_grid(ilevel); + + if (order == 2) + { +#pragma omp parallel for // reduction(+:sum_corr,sum,sum2) + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + { + double D[3][3]; + + D[0][0] = (ACC(ix - 1, iy, iz) - 2.0 * ACC(ix, iy, iz) + ACC(ix + 1, iy, iz)) * h2; + D[1][1] = (ACC(ix, iy - 1, iz) - 2.0 * ACC(ix, iy, iz) + ACC(ix, iy + 1, iz)) * h2; + D[2][2] = (ACC(ix, iy, iz - 1) - 2.0 * ACC(ix, iy, iz) + ACC(ix, iy, iz + 1)) * h2; + + D[0][1] = D[1][0] = (ACC(ix - 1, iy - 1, iz) - ACC(ix - 1, iy + 1, iz) - ACC(ix + 1, iy - 1, iz) + ACC(ix + 1, iy + 1, iz)) * h2_4; + D[0][2] = D[2][0] = (ACC(ix - 1, iy, iz - 1) - ACC(ix - 1, iy, iz + 1) - ACC(ix + 1, iy, iz - 1) + ACC(ix + 1, iy, iz + 1)) * h2_4; + D[1][2] = D[2][1] = (ACC(ix, iy - 1, iz - 1) - ACC(ix, iy - 1, iz + 1) - ACC(ix, iy + 1, iz - 1) + ACC(ix, iy + 1, iz + 1)) * h2_4; + + D[0][0] += 1.0; + D[1][1] += 1.0; + D[2][2] += 1.0; + + double det = D[0][0] * D[1][1] * D[2][2] - D[0][0] * D[1][2] * D[2][1] - D[1][0] * D[0][1] * D[2][2] + D[1][0] * D[0][2] * D[1][2] + D[2][0] * D[0][1] * D[1][2] - D[2][0] * D[0][2] * D[1][1]; + + (*pvar)(ix, iy, iz) = 1.0 / det - 1.0; + } + } + else if (order == 4) + { +#pragma omp parallel for + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + { + double D[3][3]; + + D[0][0] = (-ACC(ix - 2, iy, iz) + 16. * ACC(ix - 1, iy, iz) - 30.0 * ACC(ix, iy, iz) + 16. * ACC(ix + 1, iy, iz) - ACC(ix + 2, iy, iz)) * h2 / 12.0; + D[1][1] = (-ACC(ix, iy - 2, iz) + 16. * ACC(ix, iy - 1, iz) - 30.0 * ACC(ix, iy, iz) + 16. * ACC(ix, iy + 1, iz) - ACC(ix, iy + 2, iz)) * h2 / 12.0; + D[2][2] = (-ACC(ix, iy, iz - 2) + 16. * ACC(ix, iy, iz - 1) - 30.0 * ACC(ix, iy, iz) + 16. * ACC(ix, iy, iz + 1) - ACC(ix, iy, iz + 2)) * h2 / 12.0; + + D[0][1] = D[1][0] = (ACC(ix - 1, iy - 1, iz) - ACC(ix - 1, iy + 1, iz) - ACC(ix + 1, iy - 1, iz) + ACC(ix + 1, iy + 1, iz)) * h2_4; + D[0][2] = D[2][0] = (ACC(ix - 1, iy, iz - 1) - ACC(ix - 1, iy, iz + 1) - ACC(ix + 1, iy, iz - 1) + ACC(ix + 1, iy, iz + 1)) * h2_4; + D[1][2] = D[2][1] = (ACC(ix, iy - 1, iz - 1) - ACC(ix, iy - 1, iz + 1) - ACC(ix, iy + 1, iz - 1) + ACC(ix, iy + 1, iz + 1)) * h2_4; + + D[0][0] += 1.0; + D[1][1] += 1.0; + D[2][2] += 1.0; + + double det = D[0][0] * D[1][1] * D[2][2] - D[0][0] * D[1][2] * D[2][1] - D[1][0] * D[0][1] * D[2][2] + D[1][0] * D[0][2] * D[1][2] + D[2][0] * D[0][1] * D[1][2] - D[2][0] * D[0][2] * D[1][1]; + + (*pvar)(ix, iy, iz) = 1.0 / det - 1.0; + } + } + else if (order == 6) + { + h2_4 /= 36.; + h2 /= 180.; +#pragma omp parallel for + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + { + double D[3][3]; + + D[0][0] = (2. * ACC(ix - 3, iy, iz) - 27. * ACC(ix - 2, iy, iz) + 270. * ACC(ix - 1, iy, iz) - 490.0 * ACC(ix, iy, iz) + 270. * ACC(ix + 1, iy, iz) - 27. * ACC(ix + 2, iy, iz) + 2. * ACC(ix + 3, iy, iz)) * h2; + D[1][1] = (2. * ACC(ix, iy - 3, iz) - 27. * ACC(ix, iy - 2, iz) + 270. * ACC(ix, iy - 1, iz) - 490.0 * ACC(ix, iy, iz) + 270. * ACC(ix, iy + 1, iz) - 27. * ACC(ix, iy + 2, iz) + 2. * ACC(ix, iy + 3, iz)) * h2; + D[2][2] = (2. * ACC(ix, iy, iz - 3) - 27. * ACC(ix, iy, iz - 2) + 270. * ACC(ix, iy, iz - 1) - 490.0 * ACC(ix, iy, iz) + 270. * ACC(ix, iy, iz + 1) - 27. * ACC(ix, iy, iz + 2) + 2. * ACC(ix, iy, iz + 3)) * h2; + + //.. this is actually 8th order accurate + D[0][1] = D[1][0] = (64. * (ACC(ix - 1, iy - 1, iz) - ACC(ix - 1, iy + 1, iz) - ACC(ix + 1, iy - 1, iz) + ACC(ix + 1, iy + 1, iz)) - 8. * (ACC(ix - 2, iy - 1, iz) - ACC(ix + 2, iy - 1, iz) - ACC(ix - 2, iy + 1, iz) + ACC(ix + 2, iy + 1, iz) + ACC(ix - 1, iy - 2, iz) - ACC(ix - 1, iy + 2, iz) - ACC(ix + 1, iy - 2, iz) + ACC(ix + 1, iy + 2, iz)) + 1. * (ACC(ix - 2, iy - 2, iz) - ACC(ix - 2, iy + 2, iz) - ACC(ix + 2, iy - 2, iz) + ACC(ix + 2, iy + 2, iz))) * h2_4; + D[0][2] = D[2][0] = (64. * (ACC(ix - 1, iy, iz - 1) - ACC(ix - 1, iy, iz + 1) - ACC(ix + 1, iy, iz - 1) + ACC(ix + 1, iy, iz + 1)) - 8. * (ACC(ix - 2, iy, iz - 1) - ACC(ix + 2, iy, iz - 1) - ACC(ix - 2, iy, iz + 1) + ACC(ix + 2, iy, iz + 1) + ACC(ix - 1, iy, iz - 2) - ACC(ix - 1, iy, iz + 2) - ACC(ix + 1, iy, iz - 2) + ACC(ix + 1, iy, iz + 2)) + 1. * (ACC(ix - 2, iy, iz - 2) - ACC(ix - 2, iy, iz + 2) - ACC(ix + 2, iy, iz - 2) + ACC(ix + 2, iy, iz + 2))) * h2_4; + D[1][2] = D[2][1] = (64. * (ACC(ix, iy - 1, iz - 1) - ACC(ix, iy - 1, iz + 1) - ACC(ix, iy + 1, iz - 1) + ACC(ix, iy + 1, iz + 1)) - 8. * (ACC(ix, iy - 2, iz - 1) - ACC(ix, iy + 2, iz - 1) - ACC(ix, iy - 2, iz + 1) + ACC(ix, iy + 2, iz + 1) + ACC(ix, iy - 1, iz - 2) - ACC(ix, iy - 1, iz + 2) - ACC(ix, iy + 1, iz - 2) + ACC(ix, iy + 1, iz + 2)) + 1. * (ACC(ix, iy - 2, iz - 2) - ACC(ix, iy - 2, iz + 2) - ACC(ix, iy + 2, iz - 2) + ACC(ix, iy + 2, iz + 2))) * h2_4; + + D[0][0] += 1.0; + D[1][1] += 1.0; + D[2][2] += 1.0; + + double det = D[0][0] * D[1][1] * D[2][2] - D[0][0] * D[1][2] * D[2][1] - D[1][0] * D[0][1] * D[2][2] + D[1][0] * D[0][2] * D[1][2] + D[2][0] * D[0][1] * D[1][2] - D[2][0] * D[0][2] * D[1][1]; + + (*pvar)(ix, iy, iz) = 1.0 / det - 1.0; + } + } + else + throw std::runtime_error("compute_LLA_density : invalid operator order specified"); } - } - -void compute_Lu_density( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order ) +void compute_Lu_density(const grid_hierarchy &u, grid_hierarchy &fnew, unsigned order) { fnew = u; - - for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + + for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0,ilevel), h2 = h*h; + double h = pow(2.0, ilevel), h2 = h * h; meshvar_bnd *pvar = fnew.get_grid(ilevel); - - #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + +#pragma omp parallel for + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) { double D[3][3]; - - D[0][0] = 1.0 + (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2; - D[1][1] = 1.0 + (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2; - D[2][2] = 1.0 + (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2; - - (*pvar)(ix,iy,iz) = -(D[0][0]+D[1][1]+D[2][2] - 3.0); - + + D[0][0] = 1.0 + (ACC(ix - 1, iy, iz) - 2.0 * ACC(ix, iy, iz) + ACC(ix + 1, iy, iz)) * h2; + D[1][1] = 1.0 + (ACC(ix, iy - 1, iz) - 2.0 * ACC(ix, iy, iz) + ACC(ix, iy + 1, iz)) * h2; + D[2][2] = 1.0 + (ACC(ix, iy, iz - 1) - 2.0 * ACC(ix, iy, iz) + ACC(ix, iy, iz + 1)) * h2; + + (*pvar)(ix, iy, iz) = -(D[0][0] + D[1][1] + D[2][2] - 3.0); } } - } - -void compute_2LPT_source_FFT( config_file& cf_, const grid_hierarchy& u, grid_hierarchy& fnew ) +void compute_2LPT_source_FFT(config_file &cf_, const grid_hierarchy &u, grid_hierarchy &fnew) { - if( u.levelmin() != u.levelmax() ) + if (u.levelmin() != u.levelmax()) throw std::runtime_error("FFT 2LPT can only be run in Unigrid mode!"); - + fnew = u; - size_t nx,ny,nz,nzp; + size_t nx, ny, nz, nzp; nx = u.get_grid(u.levelmax())->size(0); ny = u.get_grid(u.levelmax())->size(1); nz = u.get_grid(u.levelmax())->size(2); - nzp = 2*(nz/2+1); - + nzp = 2 * (nz / 2 + 1); + //... copy data .................................................. - fftw_real *data = new fftw_real[nx*ny*nzp]; - fftw_complex *cdata = reinterpret_cast (data); - - fftw_complex *cdata_11, *cdata_12, *cdata_13, *cdata_22, *cdata_23, *cdata_33; - fftw_real *data_11, *data_12, *data_13, *data_22, *data_23, *data_33; - - data_11 = new fftw_real[nx*ny*nzp]; cdata_11 = reinterpret_cast (data_11); - data_12 = new fftw_real[nx*ny*nzp]; cdata_12 = reinterpret_cast (data_12); - data_13 = new fftw_real[nx*ny*nzp]; cdata_13 = reinterpret_cast (data_13); - data_22 = new fftw_real[nx*ny*nzp]; cdata_22 = reinterpret_cast (data_22); - data_23 = new fftw_real[nx*ny*nzp]; cdata_23 = reinterpret_cast (data_23); - data_33 = new fftw_real[nx*ny*nzp]; cdata_33 = reinterpret_cast (data_33); - - #pragma omp parallel for - for( int i=0; i<(int)nx; ++i ) - for( size_t j=0; j(data); + + complex_t *cdata_11, *cdata_12, *cdata_13, *cdata_22, *cdata_23, *cdata_33; + real_t *data_11, *data_12, *data_13, *data_22, *data_23, *data_33; + + data_11 = new real_t[nx * ny * nzp]; + cdata_11 = reinterpret_cast(data_11); + data_12 = new real_t[nx * ny * nzp]; + cdata_12 = reinterpret_cast(data_12); + data_13 = new real_t[nx * ny * nzp]; + cdata_13 = reinterpret_cast(data_13); + data_22 = new real_t[nx * ny * nzp]; + cdata_22 = reinterpret_cast(data_22); + data_23 = new real_t[nx * ny * nzp]; + cdata_23 = reinterpret_cast(data_23); + data_33 = new real_t[nx * ny * nzp]; + cdata_33 = reinterpret_cast(data_33); + +#pragma omp parallel for + for (int i = 0; i < (int)nx; ++i) + for (size_t j = 0; j < ny; ++j) + for (size_t k = 0; k < nz; ++k) { - size_t idx = ((size_t)i*ny+j)*nzp+k; - data[idx] = (*u.get_grid(u.levelmax()))(i,j,k); + size_t idx = ((size_t)i * ny + j) * nzp + k; + data[idx] = (*u.get_grid(u.levelmax()))(i, j, k); } - + //... perform FFT and Poisson solve................................ -#ifdef FFTW3 - - #ifdef SINGLE_PRECISION - fftwf_plan - plan = fftwf_plan_dft_r2c_3d(nx,ny,nz, data, cdata, FFTW_ESTIMATE), - iplan = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata, data, FFTW_ESTIMATE), - ip11 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_11, data_11, FFTW_ESTIMATE), - ip12 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_12, data_12, FFTW_ESTIMATE), - ip13 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_13, data_13, FFTW_ESTIMATE), - ip22 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_22, data_22, FFTW_ESTIMATE), - ip23 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_23, data_23, FFTW_ESTIMATE), - ip33 = fftwf_plan_dft_c2r_3d(nx,ny,nz, cdata_33, data_33, FFTW_ESTIMATE); - - fftwf_execute(plan); - - #else - - fftw_plan - plan = fftw_plan_dft_r2c_3d(nx,ny,nz, data, cdata, FFTW_ESTIMATE), - iplan = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata, data, FFTW_ESTIMATE), - ip11 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_11, data_11, FFTW_ESTIMATE), - ip12 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_12, data_12, FFTW_ESTIMATE), - ip13 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_13, data_13, FFTW_ESTIMATE), - ip22 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_22, data_22, FFTW_ESTIMATE), - ip23 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_23, data_23, FFTW_ESTIMATE), - ip33 = fftw_plan_dft_c2r_3d(nx,ny,nz, cdata_33, data_33, FFTW_ESTIMATE); - - fftw_execute(plan); - - #endif - - double kfac = 2.0*M_PI; - double norm = 1.0/((double)(nx*ny*nz)); - - #pragma omp parallel for - for( int i=0; i<(int)nx; ++i ) - for( size_t j=0; j(int)nx/2) ii-=nx; - int jj = (int)j; if(jj>(int)ny/2) jj-=ny; + int ii = i; + if (ii > (int)nx / 2) + ii -= nx; + int jj = (int)j; + if (jj > (int)ny / 2) + jj -= ny; double ki = (double)ii; double kj = (double)jj; double kk = (double)l; - + double k[3]; k[0] = (double)ki * kfac; k[1] = (double)kj * kfac; k[2] = (double)kk * kfac; - - size_t idx = ((size_t)i*ny+j)*nzp/2+l; - //double re = cdata[idx][0]; - //double im = cdata[idx][1]; - - cdata_11[idx][0] = -k[0]*k[0] * cdata[idx][0] * norm; - cdata_11[idx][1] = -k[0]*k[0] * cdata[idx][1] * norm; - - cdata_12[idx][0] = -k[0]*k[1] * cdata[idx][0] * norm; - cdata_12[idx][1] = -k[0]*k[1] * cdata[idx][1] * norm; - - cdata_13[idx][0] = -k[0]*k[2] * cdata[idx][0] * norm; - cdata_13[idx][1] = -k[0]*k[2] * cdata[idx][1] * norm; - - cdata_22[idx][0] = -k[1]*k[1] * cdata[idx][0] * norm; - cdata_22[idx][1] = -k[1]*k[1] * cdata[idx][1] * norm; - - cdata_23[idx][0] = -k[1]*k[2] * cdata[idx][0] * norm; - cdata_23[idx][1] = -k[1]*k[2] * cdata[idx][1] * norm; - - cdata_33[idx][0] = -k[2]*k[2] * cdata[idx][0] * norm; - cdata_33[idx][1] = -k[2]*k[2] * cdata[idx][1] * norm; - - - if( i==(int)nx/2||j==ny/2||l==nz/2) + + size_t idx = ((size_t)i * ny + j) * nzp / 2 + l; + // double re = cdata[idx][0]; + // double im = cdata[idx][1]; + + cdata_11[idx][0] = -k[0] * k[0] * cdata[idx][0] * norm; + cdata_11[idx][1] = -k[0] * k[0] * cdata[idx][1] * norm; + + cdata_12[idx][0] = -k[0] * k[1] * cdata[idx][0] * norm; + cdata_12[idx][1] = -k[0] * k[1] * cdata[idx][1] * norm; + + cdata_13[idx][0] = -k[0] * k[2] * cdata[idx][0] * norm; + cdata_13[idx][1] = -k[0] * k[2] * cdata[idx][1] * norm; + + cdata_22[idx][0] = -k[1] * k[1] * cdata[idx][0] * norm; + cdata_22[idx][1] = -k[1] * k[1] * cdata[idx][1] * norm; + + cdata_23[idx][0] = -k[1] * k[2] * cdata[idx][0] * norm; + cdata_23[idx][1] = -k[1] * k[2] * cdata[idx][1] * norm; + + cdata_33[idx][0] = -k[2] * k[2] * cdata[idx][0] * norm; + cdata_33[idx][1] = -k[2] * k[2] * cdata[idx][1] * norm; + + if (i == (int)nx / 2 || j == ny / 2 || l == nz / 2) { cdata_11[idx][0] = 0.0; cdata_11[idx][1] = 0.0; - + cdata_12[idx][0] = 0.0; cdata_12[idx][1] = 0.0; - + cdata_13[idx][0] = 0.0; cdata_13[idx][1] = 0.0; - + cdata_22[idx][0] = 0.0; cdata_22[idx][1] = 0.0; - + cdata_23[idx][0] = 0.0; cdata_23[idx][1] = 0.0; - + cdata_33[idx][0] = 0.0; cdata_33[idx][1] = 0.0; } - } - + delete[] data; /*cdata_11[0][0] = 0.0; cdata_11[0][1] = 0.0; cdata_12[0][0] = 0.0; cdata_12[0][1] = 0.0; @@ -315,175 +266,38 @@ void compute_2LPT_source_FFT( config_file& cf_, const grid_hierarchy& u, grid_hi cdata_22[0][0] = 0.0; cdata_22[0][1] = 0.0; cdata_23[0][0] = 0.0; cdata_23[0][1] = 0.0; cdata_33[0][0] = 0.0; cdata_33[0][1] = 0.0;*/ - - -#ifdef SINGLE_PRECISION - fftwf_execute(ip11); - fftwf_execute(ip12); - fftwf_execute(ip13); - fftwf_execute(ip22); - fftwf_execute(ip23); - fftwf_execute(ip33); - - fftwf_destroy_plan(plan); - fftwf_destroy_plan(iplan); - fftwf_destroy_plan(ip11); - fftwf_destroy_plan(ip12); - fftwf_destroy_plan(ip13); - fftwf_destroy_plan(ip22); - fftwf_destroy_plan(ip23); - fftwf_destroy_plan(ip33); -#else - fftw_execute(ip11); - fftw_execute(ip12); - fftw_execute(ip13); - fftw_execute(ip22); - fftw_execute(ip23); - fftw_execute(ip33); - - fftw_destroy_plan(plan); - fftw_destroy_plan(iplan); - fftw_destroy_plan(ip11); - fftw_destroy_plan(ip12); - fftw_destroy_plan(ip13); - fftw_destroy_plan(ip22); - fftw_destroy_plan(ip23); - fftw_destroy_plan(ip33); -#endif -//#endif - - -#else - rfftwnd_plan - plan = rfftw3d_create_plan( nx,ny,nz, - FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE), - iplan = rfftw3d_create_plan( nx,ny,nz, - FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); - - - #ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL ); - #else - rfftwnd_one_real_to_complex( plan, data, NULL ); - #endif -//#endif - //double fac = -1.0/(nx*ny*nz); - double kfac = 2.0*M_PI; - double norm = 1.0/((double)(nx*ny*nz)); - - #pragma omp parallel for - for( int i=0; i<(int)nx; ++i ) - for( size_t j=0; j(int)(nx/2)) ii-=(int)nx; - int jj = (int)j; if(jj>(int)(ny/2)) jj-=(int)ny; - double ki = (double)ii; - double kj = (double)jj; - double kk = (double)l; - - double k[3]; - k[0] = (double)ki * kfac; - k[1] = (double)kj * kfac; - k[2] = (double)kk * kfac; - - size_t idx = ((size_t)i*ny+j)*nzp/2+l; - //double re = cdata[idx].re; - //double im = cdata[idx].im; - - cdata_11[idx].re = -k[0]*k[0] * cdata[idx].re * norm; - cdata_11[idx].im = -k[0]*k[0] * cdata[idx].im * norm; - - cdata_12[idx].re = -k[0]*k[1] * cdata[idx].re * norm; - cdata_12[idx].im = -k[0]*k[1] * cdata[idx].im * norm; - - cdata_13[idx].re = -k[0]*k[2] * cdata[idx].re * norm; - cdata_13[idx].im = -k[0]*k[2] * cdata[idx].im * norm; - - cdata_22[idx].re = -k[1]*k[1] * cdata[idx].re * norm; - cdata_22[idx].im = -k[1]*k[1] * cdata[idx].im * norm; - - cdata_23[idx].re = -k[1]*k[2] * cdata[idx].re * norm; - cdata_23[idx].im = -k[1]*k[2] * cdata[idx].im * norm; - - cdata_33[idx].re = -k[2]*k[2] * cdata[idx].re * norm; - cdata_33[idx].im = -k[2]*k[2] * cdata[idx].im * norm; - - - if( i==(int)(nx/2)||j==ny/2||l==nz/2) - { - cdata_11[idx].re = 0.0; - cdata_11[idx].im = 0.0; - - cdata_12[idx].re = 0.0; - cdata_12[idx].im = 0.0; - - cdata_13[idx].re = 0.0; - cdata_13[idx].im = 0.0; - - cdata_22[idx].re = 0.0; - cdata_22[idx].im = 0.0; - - cdata_23[idx].re = 0.0; - cdata_23[idx].im = 0.0; - - cdata_33[idx].re = 0.0; - cdata_33[idx].im = 0.0; - } - - } - - delete[] data; - /*cdata_11[0].re = 0.0; cdata_11[0].im = 0.0; - cdata_12[0].re = 0.0; cdata_12[0].im = 0.0; - cdata_13[0].re = 0.0; cdata_13[0].im = 0.0; - cdata_22[0].re = 0.0; cdata_22[0].im = 0.0; - cdata_23[0].re = 0.0; cdata_23[0].im = 0.0; - cdata_33[0].re = 0.0; cdata_33[0].im = 0.0;*/ - - -#ifndef SINGLETHREAD_FFTW - //rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL ); - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_11, NULL ); - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_12, NULL ); - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_13, NULL ); - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_22, NULL ); - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_23, NULL ); - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata_33, NULL ); -#else - //rfftwnd_one_complex_to_real( iplan, cdata, NULL ); - rfftwnd_one_complex_to_real(iplan, cdata_11, NULL ); - rfftwnd_one_complex_to_real(iplan, cdata_12, NULL ); - rfftwnd_one_complex_to_real(iplan, cdata_13, NULL ); - rfftwnd_one_complex_to_real(iplan, cdata_22, NULL ); - rfftwnd_one_complex_to_real(iplan, cdata_23, NULL ); - rfftwnd_one_complex_to_real(iplan, cdata_33, NULL ); -#endif - - - - rfftwnd_destroy_plan(plan); - rfftwnd_destroy_plan(iplan); -#endif + size_t ii = ((size_t)i * ny + j) * nzp + k; + (*fnew.get_grid(u.levelmax()))(i, j, k) = ((data_11[ii] * data_22[ii] - data_12[ii] * data_12[ii]) + + (data_11[ii] * data_33[ii] - data_13[ii] * data_13[ii]) + + (data_22[ii] * data_33[ii] - data_23[ii] * data_23[ii])); - - //... copy data .......................................... - #pragma omp parallel for - for( int i=0; i<(int)nx; ++i ) - for( size_t j=0; j(int)fnew.levelmin(); --i ) - mg_straight().restrict( (*fnew.get_grid(i)), (*fnew.get_grid(i-1)) ); - + + //.. subtract global mean so the multi-grid poisson solver behaves well + + for (int i = fnew.levelmax(); i > (int)fnew.levelmin(); --i) + mg_straight().restrict((*fnew.get_grid(i)), (*fnew.get_grid(i - 1))); + long double sum = 0.0; - int nx,ny,nz; - + int nx, ny, nz; + nx = fnew.get_grid(fnew.levelmin())->size(0); ny = fnew.get_grid(fnew.levelmin())->size(1); nz = fnew.get_grid(fnew.levelmin())->size(2); - - for( int ix=0; ixsize(0); ny = fnew.get_grid(i)->size(1); nz = fnew.get_grid(i)->size(2); - - for( int ix=0; ix(rcoarse); + real_t *rcoarse = new real_t[nxF * nyF * nzFp]; + complex_t *ccoarse = reinterpret_cast(rcoarse); - fftw_real *rfine = new fftw_real[nxf * nyf * nzfp]; - fftw_complex *cfine = reinterpret_cast(rfine); + real_t *rfine = new real_t[nxf * nyf * nzfp]; + complex_t *cfine = reinterpret_cast(rfine); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan - pf = fftwf_plan_dft_r2c_3d(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE), - ipc = fftwf_plan_dft_c2r_3d(nxF, nyF, nzF, ccoarse, rcoarse, FFTW_ESTIMATE); -#else - fftw_plan - pf = fftw_plan_dft_r2c_3d(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE), - ipc = fftw_plan_dft_c2r_3d(nxF, nyF, nzF, ccoarse, rcoarse, FFTW_ESTIMATE); -#endif - -#else - rfftwnd_plan - pf = rfftw3d_create_plan(nxf, nyf, nzf, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), - ipc = rfftw3d_create_plan(nxF, nyF, nzF, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); -#endif + fftw_plan_t + pf = FFTW_API(plan_dft_r2c_3d)(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE), + ipc = FFTW_API(plan_dft_c2r_3d)(nxF, nyF, nzF, ccoarse, rcoarse, FFTW_ESTIMATE); #pragma omp parallel for for (int i = 0; i < (int)nxf; i++) @@ -70,19 +57,7 @@ void fft_coarsen(m1 &v, m2 &V) rfine[q] = v(i, j, k); } -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(pf); -#else - fftw_execute(pf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL); -#else - rfftwnd_one_real_to_complex(pf, rfine, NULL); -#endif -#endif + FFTW_API(execute)(pf); double fftnorm = 1.0 / ((double)nxF * (double)nyF * (double)nzF); @@ -125,19 +100,7 @@ void fft_coarsen(m1 &v, m2 &V) delete[] rfine; -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(ipc); -#else - fftw_execute(ipc); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipc, ccoarse, NULL); -#else - rfftwnd_one_complex_to_real(ipc, ccoarse, NULL); -#endif -#endif + FFTW_API(execute)(ipc); #pragma omp parallel for for (int i = 0; i < (int)nxF; i++) @@ -150,18 +113,8 @@ void fft_coarsen(m1 &v, m2 &V) delete[] rcoarse; -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_destroy_plan(pf); - fftwf_destroy_plan(ipc); -#else - fftw_destroy_plan(pf); - fftw_destroy_plan(ipc); -#endif -#else - rfftwnd_destroy_plan(pf); - rfftwnd_destroy_plan(ipc); -#endif + FFTW_API(destroy_plan)(pf); + FFTW_API(destroy_plan)(ipc); } template @@ -191,14 +144,14 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false) size_t nxc = nxf / 2, nyc = nyf / 2, nzc = nzf / 2, nzcp = nzf / 2 + 2; - fftw_real *rcoarse = new fftw_real[nxc * nyc * nzcp]; - fftw_complex *ccoarse = reinterpret_cast(rcoarse); + real_t *rcoarse = new real_t[nxc * nyc * nzcp]; + complex_t *ccoarse = reinterpret_cast(rcoarse); - fftw_real *rfine = new fftw_real[nxf * nyf * nzfp]; - fftw_complex *cfine = reinterpret_cast(rfine); + real_t *rfine = new real_t[nxf * nyf * nzfp]; + complex_t *cfine = reinterpret_cast(rfine); // copy coarse data to rcoarse[.] - memset(rcoarse, 0, sizeof(fftw_real) * nxc * nyc * nzcp); + memset(rcoarse, 0, sizeof(real_t) * nxc * nyc * nzcp); #pragma omp parallel for for (int i = 0; i < (int)nxc; ++i) @@ -221,36 +174,13 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false) rfine[q] = v(i, j, k); } -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan - pc = fftwf_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE), - pf = fftwf_plan_dft_r2c_3d(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE), - ipf = fftwf_plan_dft_c2r_3d(nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE); - fftwf_execute(pc); - fftwf_execute(pf); -#else - fftw_plan - pc = fftw_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE), - pf = fftw_plan_dft_r2c_3d(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE), - ipf = fftw_plan_dft_c2r_3d(nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE); - fftw_execute(pc); - fftw_execute(pf); -#endif -#else - rfftwnd_plan - pc = rfftw3d_create_plan(nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), - pf = rfftw3d_create_plan(nxf, nyf, nzf, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), - ipf = rfftw3d_create_plan(nxf, nyf, nzf, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pc, rcoarse, NULL); - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL); -#else - rfftwnd_one_real_to_complex(pc, rcoarse, NULL); - rfftwnd_one_real_to_complex(pf, rfine, NULL); -#endif -#endif + fftw_plan_t + pc = FFTW_API(plan_dft_r2c_3d)(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE), + pf = FFTW_API(plan_dft_r2c_3d)(nxf, nyf, nzf, rfine, cfine, FFTW_ESTIMATE), + ipf = FFTW_API(plan_dft_c2r_3d)(nxf, nyf, nzf, cfine, rfine, FFTW_ESTIMATE); + FFTW_API(execute)(pc); + FFTW_API(execute)(pf); /*************************************************/ //.. perform actual interpolation @@ -300,28 +230,11 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false) /*************************************************/ -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(ipf); - fftwf_destroy_plan(pf); - fftwf_destroy_plan(pc); - fftwf_destroy_plan(ipf); -#else - fftw_execute(ipf); - fftw_destroy_plan(pf); - fftw_destroy_plan(pc); - fftw_destroy_plan(ipf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipf, cfine, NULL); -#else - rfftwnd_one_complex_to_real(ipf, cfine, NULL); -#endif - fftwnd_destroy_plan(pf); - fftwnd_destroy_plan(pc); - fftwnd_destroy_plan(ipf); -#endif + FFTW_API(execute)(ipf); + + FFTW_API(destroy_plan)(pf); + FFTW_API(destroy_plan)(pc); + FFTW_API(destroy_plan)(ipf); // copy back and normalize #pragma omp parallel for @@ -349,8 +262,6 @@ void GenerateDensityUnigrid(config_file &cf, transfer_function *ptf, tf_type typ levelmin = cf.get_value_safe("setup", "levelmin_TF", levelminPoisson); levelmax = cf.get_value("setup", "levelmax"); - bool kspace = cf.get_value("setup", "kspace_TF"); - bool fix = cf.get_value_safe("setup","fix_mode_amplitude",false); bool flip = cf.get_value_safe("setup","flip_mode_amplitude",false); @@ -360,30 +271,10 @@ void GenerateDensityUnigrid(config_file &cf, transfer_function *ptf, tf_type typ music::ulog.Print("Running unigrid density convolution..."); //... select the transfer function to be used - convolution::kernel_creator *the_kernel_creator; + convolution::kernel_creator *the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k"]; - if (kspace) - { - std::cout << " - Using k-space transfer function kernel.\n"; - music::ulog.Print("Using k-space transfer function kernel."); - -#ifdef SINGLE_PRECISION - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_float"]; -#else - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_double"]; -#endif - } - else - { - std::cout << " - Using real-space transfer function kernel.\n"; - music::ulog.Print("Using real-space transfer function kernel."); - -#ifdef SINGLE_PRECISION - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_real_float"]; -#else - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_real_double"]; -#endif - } + std::cout << " - Using k-space transfer function kernel.\n"; + music::ulog.Print("Using k-space transfer function kernel."); //... initialize convolution kernel convolution::kernel *the_tf_kernel = the_kernel_creator->create(cf, ptf, refh, type); @@ -402,7 +293,7 @@ void GenerateDensityUnigrid(config_file &cf, transfer_function *ptf, tf_type typ the_tf_kernel->fetch_kernel(levelmin, false); //... perform convolution - convolution::perform(the_tf_kernel, reinterpret_cast(top->get_data_ptr()), shift, fix, flip); + convolution::perform(the_tf_kernel, reinterpret_cast(top->get_data_ptr()), shift, fix, flip); //... clean up kernel delete the_tf_kernel; @@ -451,17 +342,11 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t unsigned nbase = 1 << levelmin; - convolution::kernel_creator *the_kernel_creator; + convolution::kernel_creator *the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k"]; std::cout << " - Using k-space transfer function kernel.\n"; music::ulog.Print("Using k-space transfer function kernel."); -#ifdef SINGLE_PRECISION - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_float"]; -#else - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_double"]; -#endif - convolution::kernel *the_tf_kernel = the_kernel_creator->create(cf, ptf, refh, type); /***** PERFORM CONVOLUTIONS *****/ @@ -475,7 +360,7 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t top = new DensityGrid(nbase, nbase, nbase); music::ilog.Print("Performing noise convolution on level %3d", levelmin); rand.load(*top, levelmin); - convolution::perform(the_tf_kernel->fetch_kernel(levelmin, false), reinterpret_cast(top->get_data_ptr()), shift, fix, flip); + convolution::perform(the_tf_kernel->fetch_kernel(levelmin, false), reinterpret_cast(top->get_data_ptr()), shift, fix, flip); delta.create_base_hierarchy(levelmin); top->copy(*delta.get_grid(levelmin)); @@ -506,7 +391,7 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t // load white noise for patch rand.load(*fine, levelmin + i); - convolution::perform(the_tf_kernel->fetch_kernel(levelmin + i, true), + convolution::perform(the_tf_kernel->fetch_kernel(levelmin + i, true), reinterpret_cast(fine->get_data_ptr()), shift, fix, flip); if( fourier_splicing ){ diff --git a/src/fd_schemes.hh b/src/fd_schemes.hh index fa4be40..1ad2dd9 100644 --- a/src/fd_schemes.hh +++ b/src/fd_schemes.hh @@ -11,12 +11,13 @@ #ifndef __FD_SCHEMES_HH #define __FD_SCHEMES_HH +#include #include #include //! abstract implementation of the Poisson/Force scheme -template< class L, class G, typename real_t=double > +template< class L, class G> class scheme { public: @@ -57,10 +58,9 @@ public: }; //! base class for finite difference gradients -template< int nextent, typename T > +template< int nextent> class gradient { - typedef T real_t; std::vector m_stencil; const unsigned nl; public: @@ -110,20 +110,21 @@ public: }; //! base class for finite difference stencils -template< int nextent, typename real_t > +template< int nextent> class base_stencil { protected: - std::vector m_stencil; - const unsigned nl; + static constexpr size_t nl{2*nextent+1}; + std::array m_stencil; + public: bool m_modsource; public: - base_stencil( bool amodsource = false ) - : nl( 2*nextent+1 ), m_modsource( amodsource ) + explicit base_stencil( bool amodsource = false ) + : m_modsource( amodsource ) { - m_stencil.assign(nl*nl*nl,(real_t)0.0); + m_stencil.fill( (real_t)0.0 ); } real_t& operator()(int i, int j, int k) @@ -176,8 +177,7 @@ public: //... Implementation of the Gradient schemes............................................ -template< typename real_t > -class deriv_2P : public gradient<1,real_t> +class deriv_2P : public gradient<1> { public: @@ -194,8 +194,7 @@ public: //... Implementation of the Laplacian schemes.......................................... //! 7-point, 2nd order finite difference Laplacian -template< typename real_t > -class stencil_7P : public base_stencil<1,real_t> +class stencil_7P : public base_stencil<1> { public: @@ -214,7 +213,7 @@ public: inline real_t apply( const C& c, const int i, const int j, const int k ) const { //return c(i-1,j,k)+c(i+1,j,k)+c(i,j-1,k)+c(i,j+1,k)+c(i,j,k-1)+c(i,j,k+1)-6.0*c(i,j,k); - return (double)c(i-1,j,k)+(double)c(i+1,j,k)+(double)c(i,j-1,k)+(double)c(i,j+1,k)+(double)c(i,j,k-1)+(double)c(i,j,k+1)-6.0*(double)c(i,j,k); + return (real_t)c(i-1,j,k)+(real_t)c(i+1,j,k)+(real_t)c(i,j-1,k)+(real_t)c(i,j+1,k)+(real_t)c(i,j,k-1)+(real_t)c(i,j,k+1)-6.0*(real_t)c(i,j,k); } template< class C > @@ -230,8 +229,7 @@ public: }; //! 13-point, 4th order finite difference Laplacian -template< typename real_t > -class stencil_13P : public base_stencil<2,real_t> +class stencil_13P : public base_stencil<2> { public: @@ -279,8 +277,7 @@ public: //! 19-point, 6th order finite difference Laplacian -template< typename real_t > -class stencil_19P : public base_stencil<3,real_t> +class stencil_19P : public base_stencil<3> { public: @@ -339,7 +336,6 @@ public: //! flux operator for the 4th order FD Laplacian -template< typename real_t > class Laplace_flux_O4 { public: @@ -354,7 +350,7 @@ public: template< class C > inline double apply_x( int idir, const C& c, const int i, const int j, const int k ) { - double fac = -((double)idir)/12.0; + double fac = -((real_t)idir)/12.0; return fac*(-c(i-2,j,k)+15.0*c(i-1,j,k)-15.0*c(i,j,k)+c(i+1,j,k)); } @@ -369,7 +365,7 @@ public: template< class C > inline double apply_y( int idir, const C& c, const int i, const int j, const int k ) { - double fac = -((double)idir)/12.0; + double fac = -((real_t)idir)/12.0; return fac*(-c(i,j-2,k)+15.0*c(i,j-1,k)-15.0*c(i,j,k)+c(i,j+1,k)); } @@ -384,7 +380,7 @@ public: template< class C > inline double apply_z( int idir, const C& c, const int i, const int j, const int k ) { - double fac = -((double)idir)/12.0; + double fac = -((real_t)idir)/12.0; return fac*(-c(i,j,k-2)+15.0*c(i,j,k-1)-15.0*c(i,j,k)+c(i,j,k+1)); } @@ -392,7 +388,6 @@ public: //! flux operator for the 6th order FD Laplacian -template< typename real_t > class Laplace_flux_O6 { public: @@ -408,7 +403,7 @@ public: template< class C > inline double apply_x( int idir, const C& c, const int i, const int j, const int k ) { - double fac = -((double)idir)/180.0; + real_t fac = -((real_t)idir)/180.0; return fac*(2.*c(i-3,j,k)-25.*c(i-2,j,k)+245.*c(i-1,j,k)-245.0*c(i,j,k)+25.*c(i+1,j,k)-2.*c(i+2,j,k)); } @@ -423,7 +418,7 @@ public: template< class C > inline double apply_y( int idir, const C& c, const int i, const int j, const int k ) { - double fac = -((double)idir)/180.0; + real_t fac = -((real_t)idir)/180.0; return fac*(2.*c(i,j-3,k)-25.*c(i,j-2,k)+245.*c(i,j-1,k)-245.0*c(i,j,k)+25.*c(i,j+1,k)-2.*c(i,j+2,k)); } @@ -438,7 +433,7 @@ public: template< class C > inline double apply_z( int idir, const C& c, const int i, const int j, const int k ) { - double fac = -((double)idir)/180.0; + real_t fac = -((real_t)idir)/180.0; return fac*(2.*c(i,j,k-3)-25.*c(i,j,k-2)+245.*c(i,j,k-1)-245.0*c(i,j,k)+25.*c(i,j,k+1)-2.*c(i,j,k+2)); } diff --git a/src/general.hh b/src/general.hh index 789c35c..104cae6 100644 --- a/src/general.hh +++ b/src/general.hh @@ -8,75 +8,56 @@ */ -#ifndef __GENERAL_HH -#define __GENERAL_HH +#pragma once -#include "logger.hh" +#include +#include #include -#include "omp.h" +#include -#ifdef WITH_MPI - #ifdef MANNO - #include - #else - #include - #endif -#else -#include +#include + +// include CMake controlled configuration settings +#include "cmake_config.hh" + +#if defined(USE_PRECISION_FLOAT) + using real_t = float; + using complex_t = fftwf_complex; + #define FFTW_PREFIX fftwf +#elif defined(USE_PRECISION_DOUBLE) + using real_t = double; + using complex_t = fftw_complex; + #define FFTW_PREFIX fftw +#elif defined(USE_PRECISION_LONGDOUBLE) + using real_t = long double; + using complex_t = fftwl_complex; + #define FFTW_PREFIX fftwl #endif -#ifdef FFTW3 - #include - #if defined(SINGLE_PRECISION) - typedef float fftw_real; - #else - typedef double fftw_real; - #endif +#define FFTW_GEN_NAME_PRIM(a, b) a##_##b +#define FFTW_GEN_NAME(a, b) FFTW_GEN_NAME_PRIM(a, b) +#define FFTW_API(x) FFTW_GEN_NAME(FFTW_PREFIX, x) -#else - #if defined(SINGLE_PRECISION) and not defined(SINGLETHREAD_FFTW) - #include - #include - #elif defined(SINGLE_PRECISION) and defined(SINGLETHREAD_FFTW) - #include - #elif not defined(SINGLE_PRECISION) and not defined(SINGLETHREAD_FFTW) - #include - #include - #elif not defined(SINGLE_PRECISION) and defined(SINGLETHREAD_FFTW) - #include - #endif -#endif +using fftw_plan_t = FFTW_GEN_NAME(FFTW_PREFIX, plan); -#ifdef SINGLE_PRECISION - typedef float real_t; -#else - typedef double real_t; -#endif +#define RE(x) ((x)[0]) +#define IM(x) ((x)[1]) +#include #include using vec3_t = std::array; -#ifdef FFTW3 - #define RE(x) ((x)[0]) - #define IM(x) ((x)[1]) -#else - #define RE(x) ((x).re) - #define IM(x) ((x).im) -#endif - -#if defined(FFTW3) && defined(SINGLE_PRECISION) -#define fftw_complex fftwf_complex -#endif - - - -#include - -#include "config_file.hh" -//#include "mesh.hh" - - +namespace CONFIG +{ +// extern int MPI_thread_support; +// extern int MPI_task_rank; +// extern int MPI_task_size; +// extern bool MPI_ok; +// extern bool MPI_threads_ok; +extern bool FFTW_threads_ok; +extern int num_threads; +} // namespace CONFIG //! compute square of argument template< typename T > @@ -180,6 +161,3 @@ inline bool is_number(const std::string& s) return true; } - - -#endif diff --git a/src/main.cc b/src/main.cc index ca316a8..b18fced 100644 --- a/src/main.cc +++ b/src/main.cc @@ -13,6 +13,8 @@ #include #include +#include + #include #include #include @@ -26,25 +28,40 @@ extern "C" } #endif -#include "general.hh" -#include "defaults.hh" -#include "output.hh" +#include +#include -#include "config_file.hh" +#include +#include +#include -#include "poisson.hh" -#include "mg_solver.hh" -#include "fd_schemes.hh" -#include "random.hh" -#include "densities.hh" +#include -#include "convolution_kernel.hh" -#include "cosmology.hh" -#include "transfer_function.hh" +#include +#include +#include +#include +#include + +#include +#include +#include #define THE_CODE_NAME "music!" #define THE_CODE_VERSION "2.0a" +// initialise with "default" values +namespace CONFIG{ +// int MPI_thread_support = -1; +// int MPI_task_rank = 0; +// int MPI_task_size = 1; +// bool MPI_ok = false; +// bool MPI_threads_ok = false; +bool FFTW_threads_ok = false; +int num_threads = 1; +} + + namespace music { @@ -87,11 +104,6 @@ void splash(void) #if defined(CMAKE_BUILD) music::ilog.Print("Version built from git rev.: %s, tag: %s, branch: %s", GIT_REV, GIT_TAG, GIT_BRANCH); -#endif -#if defined(SINGLE_PRECISION) - music::ilog.Print("Version was compiled for single precision."); -#else - music::ilog.Print("Version was compiled for double precision."); #endif std::cout << "\n\n"; } @@ -294,6 +306,50 @@ void add_constant_value( grid_hierarchy &u, const double val ) } } +#include +void output_system_info() +{ + std::feclearexcept(FE_ALL_EXCEPT); + + //------------------------------------------------------------------------------ + // Write code configuration to screen + //------------------------------------------------------------------------------ + // hardware related infos + music::ilog << std::setw(32) << std::left << "CPU vendor string" << " : " << SystemStat::Cpu().get_CPUstring() << std::endl; + + // multi-threading related infos + music::ilog << std::setw(32) << std::left << "Available HW threads / task" << " : " << std::thread::hardware_concurrency() << " (" << CONFIG::num_threads << " used)" << std::endl; + + // memory related infos + SystemStat::Memory mem; + + unsigned availpmem = mem.get_AvailMem()/1024/1024; + unsigned usedpmem = mem.get_UsedMem()/1024/1024; + unsigned maxpmem = availpmem, minpmem = availpmem; + unsigned maxupmem = usedpmem, minupmem = usedpmem; + + music::ilog << std::setw(32) << std::left << "Total system memory (phys)" << " : " << mem.get_TotalMem()/1024/1024 << " Mb" << std::endl; + music::ilog << std::setw(32) << std::left << "Used system memory (phys)" << " : " << "Max: " << maxupmem << " Mb, Min: " << minupmem << " Mb" << std::endl; + music::ilog << std::setw(32) << std::left << "Available system memory (phys)" << " : " << "Max: " << maxpmem << " Mb, Min: " << minpmem << " Mb" << std::endl; + + // Kernel related infos + SystemStat::Kernel kern; + auto kinfo = kern.get_kernel_info(); + music::ilog << std::setw(32) << std::left << "OS/Kernel version" << " : " << kinfo.kernel << " version " << kinfo.major << "." << kinfo.minor << " build " << kinfo.build_number << std::endl; + + // FFTW related infos + music::ilog << std::setw(32) << std::left << "FFTW version" << " : " << FFTW_API(version) << std::endl; + music::ilog << std::setw(32) << std::left << "FFTW supports multi-threading" << " : " << (CONFIG::FFTW_threads_ok? "yes" : "no") << std::endl; + music::ilog << std::setw(32) << std::left << "FFTW mode" << " : "; +#if defined(FFTW_MODE_PATIENT) + music::ilog << "FFTW_PATIENT" << std::endl; +#elif defined(FFTW_MODE_MEASURE) + music::ilog << "FFTW_MEASURE" << std::endl; +#else + music::ilog << "FFTW_ESTIMATE" << std::endl; +#endif +} + /*****************************************************************************************************/ /*****************************************************************************************************/ /*****************************************************************************************************/ @@ -342,25 +398,6 @@ int main(int argc, const char *argv[]) music::ulog.Print("Running %s, version %s", THE_CODE_NAME, THE_CODE_VERSION); music::ulog.Print("Log is for run started %s", asctime(localtime(<ime))); -#ifdef FFTW3 - music::ulog.Print("Code was compiled using FFTW version 3.x"); -#else - music::ulog.Print("Code was compiled using FFTW version 2.x"); -#endif - -#ifdef SINGLETHREAD_FFTW - music::ulog.Print("Code was compiled for single-threaded FFTW"); -#else - music::ulog.Print("Code was compiled for multi-threaded FFTW"); - music::ulog.Print("Running with a maximum of %d OpenMP threads", omp_get_max_threads()); -#endif - -#ifdef SINGLE_PRECISION - music::ulog.Print("Code was compiled for single precision."); -#else - music::ulog.Print("Code was compiled for double precision."); -#endif - //------------------------------------------------------------------------------ //... read and interpret config file //------------------------------------------------------------------------------ @@ -369,6 +406,13 @@ int main(int argc, const char *argv[]) bool force_shift(false); double boxlength; + + //------------------------------------------------------------------------------ + //... init multi-threading + //------------------------------------------------------------------------------ + CONFIG::FFTW_threads_ok = FFTW_API(init_threads)(); + CONFIG::num_threads = cf.get_value_safe("execution", "NumThreads",std::thread::hardware_concurrency()); + //------------------------------------------------------------------------------ //... initialize some parameters about grid set-up //------------------------------------------------------------------------------ @@ -403,24 +447,6 @@ int main(int argc, const char *argv[]) else music::ilog.Print("Using real space sampled transfer functions..."); - //------------------------------------------------------------------------------ - //... initialize multithread FFTW - //------------------------------------------------------------------------------ - -#if not defined(SINGLETHREAD_FFTW) -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_init_threads(); - fftwf_plan_with_nthreads(omp_get_max_threads()); -#else - fftw_init_threads(); - fftw_plan_with_nthreads(omp_get_max_threads()); -#endif -#else - fftw_threads_init(); -#endif -#endif - //------------------------------------------------------------------------------ //... initialize cosmology //------------------------------------------------------------------------------ @@ -1373,13 +1399,8 @@ int main(int argc, const char *argv[]) delete the_transfer_function_plugin; delete the_poisson_solver; -#if defined(FFTW3) and not defined(SINGLETHREAD_FFTW) -#ifdef SINGLE_PRECISION - fftwf_cleanup_threads(); -#else - fftw_cleanup_threads(); -#endif -#endif + if( CONFIG::FFTW_threads_ok ) + FFTW_API(cleanup_threads)(); //------------------------------------------------------------------------------ //... we are done ! diff --git a/src/mg_interp.hh b/src/mg_interp.hh index c90115c..7a601da 100644 --- a/src/mg_interp.hh +++ b/src/mg_interp.hh @@ -290,12 +290,12 @@ struct cubic_interp { fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz); - fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1); - coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -312,12 +312,12 @@ struct cubic_interp { fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz); - fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1); - coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -338,12 +338,12 @@ struct cubic_interp { fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1); - fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1); - coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -359,12 +359,12 @@ struct cubic_interp { fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz); - fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz); - fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1); - coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -384,12 +384,12 @@ struct cubic_interp { fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1); - fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1); - coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0; + coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -405,12 +405,12 @@ struct cubic_interp { fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz); - fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz); - fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz); - coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -717,13 +717,13 @@ struct interp_O5_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz); - fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_x(-1,*u,ix+1,iy+1,iz+1); fine_flux /= 4.0; - coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0; dflux = coarse_flux - fine_flux; @@ -758,12 +758,12 @@ struct interp_O5_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz); - fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_x(+1,*u,ix,iy+1,iz+1); - coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -798,12 +798,12 @@ struct interp_O5_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1); - fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_y(-1,*u,ix+1,iy+1,iz+1); - coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -838,12 +838,12 @@ struct interp_O5_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz); - fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz); - fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_y(+1,*u,ix+1,iy,iz+1); - coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -880,12 +880,12 @@ struct interp_O5_fluxcorr fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1); - fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1); - fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O4().apply_z(-1,*u,ix+1,iy+1,iz+1); - coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0; + coarse_flux = Laplace_flux_O4().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -920,12 +920,12 @@ struct interp_O5_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz); - fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz); - fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz); - fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O4().apply_z(+1,*u,ix+1,iy+1,iz); - coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O4().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -1027,13 +1027,13 @@ struct interp_O7_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz); - fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz); - fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz+1); - fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz+1); + fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_x(-1,*u,ix+1,iy+1,iz+1); fine_flux /= 4.0; - coarse_flux = Laplace_flux_O6().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O6().apply_x(-1,*utop,ixtop+1,iytop,iztop)/2.0; dflux = coarse_flux - fine_flux; @@ -1074,12 +1074,12 @@ struct interp_O7_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz); - fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz); - fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz+1); - fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_x(+1,*u,ix,iy+1,iz+1); - coarse_flux = Laplace_flux_O6().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O6().apply_x(+1,*utop,ixtop,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -1119,12 +1119,12 @@ struct interp_O7_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz); - fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz); - fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz+1); - fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz+1); + fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O6().apply_y(-1,*u,ix+1,iy+1,iz+1); - coarse_flux = Laplace_flux_O6().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0; + coarse_flux = Laplace_flux_O6().apply_y(-1,*utop,ixtop,iytop+1,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -1164,12 +1164,12 @@ struct interp_O7_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz); - fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz); - fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz+1); - fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_y(+1,*u,ix+1,iy,iz+1); - coarse_flux = Laplace_flux_O6().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O6().apply_y(+1,*utop,ixtop,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -1210,12 +1210,12 @@ struct interp_O7_fluxcorr fine_flux = 0.0; - fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy,iz+1); - fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy,iz+1); - fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy+1,iz+1); - fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy+1,iz+1); + fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy,iz+1); + fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix,iy+1,iz+1); + fine_flux += Laplace_flux_O6().apply_z(-1,*u,ix+1,iy+1,iz+1); - coarse_flux = Laplace_flux_O6().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0; + coarse_flux = Laplace_flux_O6().apply_z(-1,*utop,ixtop,iytop,iztop+1)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; @@ -1255,12 +1255,12 @@ struct interp_O7_fluxcorr } fine_flux = 0.0; - fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy,iz); - fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy,iz); - fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy+1,iz); - fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy,iz); + fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy,iz); + fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix,iy+1,iz); + fine_flux += Laplace_flux_O6().apply_z(+1,*u,ix+1,iy+1,iz); - coarse_flux = Laplace_flux_O6().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0; + coarse_flux = Laplace_flux_O6().apply_z(+1,*utop,ixtop,iytop,iztop)/2.0; fine_flux /= 4.0; dflux = coarse_flux - fine_flux; diff --git a/src/mg_solver.hh b/src/mg_solver.hh index b2d53d2..c39cd23 100644 --- a/src/mg_solver.hh +++ b/src/mg_solver.hh @@ -1,37 +1,43 @@ /* - + mg_solver.hh - This file is part of MUSIC - - a code to generate multi-scale initial conditions - for cosmological simulations - + a code to generate multi-scale initial conditions + for cosmological simulations + Copyright (C) 2010 Oliver Hahn - + */ -#ifndef __MG_SOLVER_HH -#define __MG_SOLVER_HH +#pragma once #include #include -#include "mg_operators.hh" -#include "mg_interp.hh" +#include +#include -#include "mesh.hh" +#include -#define BEGIN_MULTIGRID_NAMESPACE namespace multigrid { +#define BEGIN_MULTIGRID_NAMESPACE \ + namespace multigrid \ + { #define END_MULTIGRID_NAMESPACE } BEGIN_MULTIGRID_NAMESPACE - + //! options for multigrid smoothing operation -namespace opt { - enum smtype { sm_jacobi, sm_gauss_seidel, sm_sor }; +namespace opt +{ + enum smtype + { + sm_jacobi, + sm_gauss_seidel, + sm_sor + }; } - //! actual implementation of FAS adaptive multigrid solver -template< class S, class I, class O, typename T=double > +template class solver { public: @@ -40,229 +46,214 @@ public: typedef I interp; protected: - scheme m_scheme; //!< finite difference scheme - mgop m_gridop; //!< grid prolongation and restriction operator - unsigned m_npresmooth, //!< number of pre sweeps - m_npostsmooth; //!< number of post sweeps - opt::smtype m_smoother; //!< smoothing method to be applied - unsigned m_ilevelmin; //!< index of the top grid level - - const static bool m_bperiodic = true; //!< flag whether top grid is periodic - - std::vector m_residu_ini; //!< vector of initial residuals for each level - bool m_is_ini; //!< bool that is true for first iteration + scheme m_scheme; //!< finite difference scheme + mgop m_gridop; //!< grid prolongation and restriction operator + unsigned m_npresmooth, //!< number of pre sweeps + m_npostsmooth; //!< number of post sweeps + opt::smtype m_smoother; //!< smoothing method to be applied + unsigned m_ilevelmin; //!< index of the top grid level + + const static bool m_bperiodic = true; //!< flag whether top grid is periodic + + std::vector m_residu_ini; //!< vector of initial residuals for each level + bool m_is_ini; //!< bool that is true for first iteration + + GridHierarchy + *m_pu, //!< pointer to GridHierarchy for solution u + *m_pf, //!< pointer to GridHierarchy for right-hand-side + *m_pfsave; //!< pointer to saved state of right-hand-side (unused) + + const MeshvarBnd *m_pubnd; - GridHierarchy *m_pu, //!< pointer to GridHierarchy for solution u - *m_pf, //!< pointer to GridHierarchy for right-hand-side - *m_pfsave; //!< pointer to saved state of right-hand-side (unused) - - const MeshvarBnd *m_pubnd; - //! compute residual for a level - double compute_error( const MeshvarBnd& u, const MeshvarBnd& unew, int ilevel ); - + double compute_error(const MeshvarBnd &u, const MeshvarBnd &unew, int ilevel); + //! compute residuals for entire grid hierarchy - double compute_error( const GridHierarchy& uh, const GridHierarchy& uhnew, bool verbose ); - + double compute_error(const GridHierarchy &uh, const GridHierarchy &uhnew, bool verbose); + //! compute residuals for entire grid hierarchy - double compute_RMS_resid( const GridHierarchy& uh, const GridHierarchy& fh, bool verbose ); + double compute_RMS_resid(const GridHierarchy &uh, const GridHierarchy &fh, bool verbose); protected: - - //! Jacobi smoothing - void Jacobi( T h, MeshvarBnd* u, const MeshvarBnd* f ); - + //! Jacobi smoothing + void Jacobi(real_t h, MeshvarBnd *u, const MeshvarBnd *f); + //! Gauss-Seidel smoothing - void GaussSeidel( T h, MeshvarBnd* u, const MeshvarBnd* f ); - + void GaussSeidel(real_t h, MeshvarBnd *u, const MeshvarBnd *f); + //! Successive-Overrelaxation smoothing - void SOR( T h, MeshvarBnd* u, const MeshvarBnd* f ); - + void SOR(real_t h, MeshvarBnd *u, const MeshvarBnd *f); + //! main two-grid (V-cycle) for multi-grid iterations - void twoGrid( unsigned ilevel ); - + void twoGrid(unsigned ilevel); + //! apply boundary conditions - void setBC( unsigned ilevel ); - + void setBC(unsigned ilevel); + //! make top grid periodic boundary conditions - void make_periodic( MeshvarBnd *u ); - - //void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine ); - + void make_periodic(MeshvarBnd *u); + + // void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine ); + public: - //! constructor - solver( GridHierarchy& f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth ); - + solver(GridHierarchy &f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth); + //! destructor ~solver() - { } - - //! solve Poisson's equation - double solve( GridHierarchy& u, double accuracy, double h=-1.0, bool verbose=false ); - - //! solve Poisson's equation - double solve( GridHierarchy& u, double accuracy, bool verbose=false ) { - return this->solve ( u, accuracy, -1.0, verbose ); } - - - + + //! solve Poisson's equation + double solve(GridHierarchy &u, double accuracy, double h = -1.0, bool verbose = false); + + //! solve Poisson's equation + double solve(GridHierarchy &u, double accuracy, bool verbose = false) + { + return this->solve(u, accuracy, -1.0, verbose); + } }; - -template< class S, class I, class O, typename T > -solver::solver( GridHierarchy& f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth ) -: m_scheme(), m_gridop(), m_npresmooth( npresmooth ), m_npostsmooth( npostsmooth ), -m_smoother( smoother ), m_ilevelmin( f.levelmin() ), m_is_ini( true ), m_pf( &f ) -{ +template +solver::solver(GridHierarchy &f, opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth) + : m_scheme(), m_gridop(), m_npresmooth(npresmooth), m_npostsmooth(npostsmooth), + m_smoother(smoother), m_ilevelmin(f.levelmin()), m_is_ini(true), m_pf(&f) +{ m_is_ini = true; } - -template< class S, class I, class O, typename T > -void solver::Jacobi( T h, MeshvarBnd *u, const MeshvarBnd* f ) +template +void solver::Jacobi(real_t h, MeshvarBnd *u, const MeshvarBnd *f) { int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - - double - c0 = -1.0/m_scheme.ccoeff(), - h2 = h*h; - - MeshvarBnd uold(*u); - - double alpha = 0.95, ialpha = 1.0-alpha; - - #pragma omp parallel for - for( int ix=0; ixsize(0), + ny = u->size(1), + nz = u->size(2); + + double + c0 = -1.0 / m_scheme.ccoeff(), + h2 = h * h; + + MeshvarBnd uold(*u); + + double alpha = 0.95, ialpha = 1.0 - alpha; + +#pragma omp parallel for + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + (*u)(ix, iy, iz) = ialpha * uold(ix, iy, iz) + alpha * (m_scheme.rhs(uold, ix, iy, iz) + h2 * (*f)(ix, iy, iz)) * c0; } -template< class S, class I, class O, typename T > -void solver::SOR( T h, MeshvarBnd *u, const MeshvarBnd* f ) +template +void solver::SOR(real_t h, MeshvarBnd *u, const MeshvarBnd *f) { int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); - double - c0 = -1.0/m_scheme.ccoeff(), - h2 = h*h; - - MeshvarBnd uold(*u); - - double - alpha = 1.2, - //alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0, //.. ideal alpha - ialpha = 1.0-alpha; - - #pragma omp parallel for - for( int ix=0; ix uold(*u); + + double + alpha = 1.2, + // alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0, //.. ideal alpha + ialpha = 1.0 - alpha; + +#pragma omp parallel for + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + if ((ix + iy + iz) % 2 == 0) + (*u)(ix, iy, iz) = ialpha * uold(ix, iy, iz) + alpha * (m_scheme.rhs(uold, ix, iy, iz) + h2 * (*f)(ix, iy, iz)) * c0; + +#pragma omp parallel for + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + if ((ix + iy + iz) % 2 != 0) + (*u)(ix, iy, iz) = ialpha * uold(ix, iy, iz) + alpha * (m_scheme.rhs(*u, ix, iy, iz) + h2 * (*f)(ix, iy, iz)) * c0; } -template< class S, class I, class O, typename T > -void solver::GaussSeidel( T h, MeshvarBnd* u, const MeshvarBnd* f ) +template +void solver::GaussSeidel(real_t h, MeshvarBnd *u, const MeshvarBnd *f) { - int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - - T - c0 = -1.0/m_scheme.ccoeff(), - h2 = h*h; - - for( int color=0; color < 2; ++color ) - #pragma omp parallel for - for( int ix=0; ixsize(0), + ny = u->size(1), + nz = u->size(2); + + real_t + c0 = -1.0 / m_scheme.ccoeff(), + h2 = h * h; + + for (int color = 0; color < 2; ++color) +#pragma omp parallel for + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + if ((ix + iy + iz) % 2 == color) + (*u)(ix, iy, iz) = (m_scheme.rhs(*u, ix, iy, iz) + h2 * (*f)(ix, iy, iz)) * c0; } - -template< class S, class I, class O, typename T > -void solver::twoGrid( unsigned ilevel ) +template +void solver::twoGrid(unsigned ilevel) { - MeshvarBnd *uf, *uc, *ff, *fc; - - - double - h = 1.0/(1< *uf, *uc, *ff, *fc; + + double + h = 1.0 / (1 << ilevel), + c0 = -1.0 / m_scheme.ccoeff(), + h2 = h * h; + uf = m_pu->get_grid(ilevel); - ff = m_pf->get_grid(ilevel); - - uc = m_pu->get_grid(ilevel-1); - fc = m_pf->get_grid(ilevel-1); - - - int - nx = uf->size(0), - ny = uf->size(1), - nz = uf->size(2); - - if( m_bperiodic && ilevel <= m_ilevelmin) - make_periodic( uf ); - else if(!m_bperiodic) - setBC( ilevel ); - + ff = m_pf->get_grid(ilevel); + + uc = m_pu->get_grid(ilevel - 1); + fc = m_pf->get_grid(ilevel - 1); + + int + nx = uf->size(0), + ny = uf->size(1), + nz = uf->size(2); + + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uf); + else if (!m_bperiodic) + setBC(ilevel); + //... do smoothing sweeps with specified solver - for( unsigned i=0; i m_ilevelmin ) - interp().interp_coarse_fine(ilevel,*uc,*uf); - - if( m_smoother == opt::sm_gauss_seidel ) - GaussSeidel( h, uf, ff ); - - else if( m_smoother == opt::sm_jacobi ) - Jacobi( h, uf, ff); - - else if( m_smoother == opt::sm_sor ) - SOR( h, uf, ff ); - - if( m_bperiodic && ilevel <= m_ilevelmin ) - make_periodic( uf ); + for (unsigned i = 0; i < m_npresmooth; ++i) + { + + if (ilevel > m_ilevelmin) + interp().interp_coarse_fine(ilevel, *uc, *uf); + + if (m_smoother == opt::sm_gauss_seidel) + GaussSeidel(h, uf, ff); + + else if (m_smoother == opt::sm_jacobi) + Jacobi(h, uf, ff); + + else if (m_smoother == opt::sm_sor) + SOR(h, uf, ff); + + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uf); } - - - m_gridop.restrict( *uf, *uc ); - + + m_gridop.restrict(*uf, *uc); + //... essential!! - if( m_bperiodic && ilevel <= m_ilevelmin ) - make_periodic( uc ); - else if( ilevel > m_ilevelmin ) - interp().interp_coarse_fine(ilevel,*uc,*uf); - - + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uc); + else if (ilevel > m_ilevelmin) + interp().interp_coarse_fine(ilevel, *uc, *uf); + //.................................................................... //... we now use hard-coded restriction+operatore app, see below /*meshvar_bnd Lu(*uf,false); @@ -273,407 +264,383 @@ void solver::twoGrid( unsigned ilevel ) for( int iy=0; iyoffset(0), - oyp = uf->offset(1), - ozp = uf->offset(2); - - meshvar_bnd tLu(*uc,false); - #pragma omp parallel for - for( int ix=0; ixoffset(0), + oyp = uf->offset(1), + ozp = uf->offset(2); + + meshvar_bnd tLu(*uc, false); +#pragma omp parallel for + for (int ix = 0; ix < nx / 2; ++ix) + { + int iix = 2 * ix; + for (int iy = 0, iiy = 0; iy < ny / 2; ++iy, iiy += 2) + + for (int iz = 0, iiz = 0; iz < nz / 2; ++iz, iiz += 2) + tLu(ix + oxp, iy + oyp, iz + ozp) = 0.125 * (m_scheme.apply((*uf), iix, iiy, iiz) + m_scheme.apply((*uf), iix, iiy, iiz + 1) + m_scheme.apply((*uf), iix, iiy + 1, iiz) + m_scheme.apply((*uf), iix, iiy + 1, iiz + 1) + m_scheme.apply((*uf), iix + 1, iiy, iiz) + m_scheme.apply((*uf), iix + 1, iiy, iiz + 1) + m_scheme.apply((*uf), iix + 1, iiy + 1, iiz) + m_scheme.apply((*uf), iix + 1, iiy + 1, iiz + 1)) / h2; } - + //... restrict source term - m_gridop.restrict( *ff, *fc ); - + m_gridop.restrict(*ff, *fc); + int oi, oj, ok; oi = ff->offset(0); oj = ff->offset(1); ok = ff->offset(2); - - #pragma omp parallel for - for( int ix=oi; ixsize(0)/2; ++ix ) - for( int iy=oj; iysize(1)/2; ++iy ) - for( int iz=ok; izsize(2)/2; ++iz ) - (*fc)(ix,iy,iz) += ((tLu( ix, iy, iz ) - (m_scheme.apply( *uc, ix, iy, iz )/(4.0*h2)))); - + +#pragma omp parallel for + for (int ix = oi; ix < oi + (int)ff->size(0) / 2; ++ix) + for (int iy = oj; iy < oj + (int)ff->size(1) / 2; ++iy) + for (int iz = ok; iz < ok + (int)ff->size(2) / 2; ++iz) + (*fc)(ix, iy, iz) += ((tLu(ix, iy, iz) - (m_scheme.apply(*uc, ix, iy, iz) / (4.0 * h2)))); + tLu.deallocate(); - - meshvar_bnd ucsave(*uc,true); - + + meshvar_bnd ucsave(*uc, true); + //... have we reached the end of the recursion or do we need to go up one level? - if( ilevel == 1 ) - if( m_bperiodic ) - (*uc)(0,0,0) = 0.0; - else - (*uc)(0,0,0) = (m_scheme.rhs( (*uc), 0, 0, 0 ) + 4.0 * h2 * (*fc)(0,0,0))*c0; + if (ilevel == 1) + if (m_bperiodic) + (*uc)(0, 0, 0) = 0.0; + else + (*uc)(0, 0, 0) = (m_scheme.rhs((*uc), 0, 0, 0) + 4.0 * h2 * (*fc)(0, 0, 0)) * c0; else - twoGrid( ilevel-1 ); - - meshvar_bnd cc(*uc,false); - - - //... compute correction on coarse grid - #pragma omp parallel for - for( int ix=0; ix<(int)cc.size(0); ++ix ) - for( int iy=0; iy<(int)cc.size(1); ++iy ) - for( int iz=0; iz<(int)cc.size(2); ++iz ) - cc(ix,iy,iz) = (*uc)(ix,iy,iz) - ucsave(ix,iy,iz); - + twoGrid(ilevel - 1); + + meshvar_bnd cc(*uc, false); + +//... compute correction on coarse grid +#pragma omp parallel for + for (int ix = 0; ix < (int)cc.size(0); ++ix) + for (int iy = 0; iy < (int)cc.size(1); ++iy) + for (int iz = 0; iz < (int)cc.size(2); ++iz) + cc(ix, iy, iz) = (*uc)(ix, iy, iz) - ucsave(ix, iy, iz); + ucsave.deallocate(); - if( m_bperiodic && ilevel <= m_ilevelmin ) - make_periodic( &cc ); + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(&cc); + + m_gridop.prolong_add(cc, *uf); - m_gridop.prolong_add( cc, *uf ); - //... interpolate and apply coarse-fine boundary conditions on fine level - if( m_bperiodic && ilevel <= m_ilevelmin ) - make_periodic( uf ); - else if(!m_bperiodic) - setBC( ilevel ); - + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uf); + else if (!m_bperiodic) + setBC(ilevel); + //... do smoothing sweeps with specified solver - for( unsigned i=0; i m_ilevelmin ) - interp().interp_coarse_fine(ilevel,*uc,*uf); + for (unsigned i = 0; i < m_npostsmooth; ++i) + { - if( m_smoother == opt::sm_gauss_seidel ) - GaussSeidel( h, uf, ff ); - - else if( m_smoother == opt::sm_jacobi ) - Jacobi( h, uf, ff); - - else if( m_smoother == opt::sm_sor ) - SOR( h, uf, ff ); - - if( m_bperiodic && ilevel <= m_ilevelmin ) - make_periodic( uf ); + if (ilevel > m_ilevelmin) + interp().interp_coarse_fine(ilevel, *uc, *uf); + if (m_smoother == opt::sm_gauss_seidel) + GaussSeidel(h, uf, ff); + + else if (m_smoother == opt::sm_jacobi) + Jacobi(h, uf, ff); + + else if (m_smoother == opt::sm_sor) + SOR(h, uf, ff); + + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uf); } - } -template< class S, class I, class O, typename T > -double solver::compute_error( const MeshvarBnd& u, const MeshvarBnd& f, int ilevel ) +template +double solver::compute_error(const MeshvarBnd &u, const MeshvarBnd &f, int ilevel) { - int - nx = u.size(0), - ny = u.size(1), - nz = u.size(2); - + int + nx = u.size(0), + ny = u.size(1), + nz = u.size(2); + double err = 0.0, err2 = 0.0; size_t count = 0; - double h = 1.0/(1ul< 0.0 )//&& u(ix,iy,iz) != unew(ix,iy,iz) ) - { - //err += fabs(1.0 - (double)u(ix,iy,iz)/(double)unew(ix,iy,iz)); - /*err += fabs(((double)m_scheme.apply( u, ix, iy, iz )/h2 + (double)(f(ix,iy,iz)) )); - err2 += fabs((double)f(ix,iy,iz));*/ + double h = 1.0 / (1ul << ilevel), h2 = h * h; - err += fabs( (double)m_scheme.apply( u, ix, iy, iz )/h2/(double)(f(ix,iy,iz)) + 1.0 ); +#pragma omp parallel for reduction(+ \ + : err, count) + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + if (true) // fabs(unew(ix,iy,iz)) > 0.0 )//&& u(ix,iy,iz) != unew(ix,iy,iz) ) + { + // err += fabs(1.0 - (double)u(ix,iy,iz)/(double)unew(ix,iy,iz)); + /*err += fabs(((double)m_scheme.apply( u, ix, iy, iz )/h2 + (double)(f(ix,iy,iz)) )); + err2 += fabs((double)f(ix,iy,iz));*/ + + err += fabs((double)m_scheme.apply(u, ix, iy, iz) / h2 / (double)(f(ix, iy, iz)) + 1.0); ++count; } - - if( count != 0 ) - err /= count; - + + if (count != 0) + err /= count; + return err; } -template< class S, class I, class O, typename T > -double solver::compute_error( const GridHierarchy& uh, const GridHierarchy& fh, bool verbose ) +template +double solver::compute_error(const GridHierarchy &uh, const GridHierarchy &fh, bool verbose) { double maxerr = 0.0; - for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel ) + for (unsigned ilevel = uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel) { - int - nx = uh.get_grid(ilevel)->size(0), - ny = uh.get_grid(ilevel)->size(1), - nz = uh.get_grid(ilevel)->size(2); - + int + nx = uh.get_grid(ilevel)->size(0), + ny = uh.get_grid(ilevel)->size(1), + nz = uh.get_grid(ilevel)->size(2); + double err = 0.0, mean_res = 0.0; size_t count = 0; - double h = 1.0/(1ul< 0.0 ) - { - err += fabs( res/val ); - mean_res += fabs(res); - ++count; - } - } - - if( count != 0 ) - { - err /= count; - mean_res /= count; - } - if( verbose ) +#pragma omp parallel for reduction(+ \ + : err, count) + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + { + double res = (double)m_scheme.apply(*uh.get_grid(ilevel), ix, iy, iz) + h2 * (double)((*fh.get_grid(ilevel))(ix, iy, iz)); + double val = (*uh.get_grid(ilevel))(ix, iy, iz); + + if (fabs(val) > 0.0) + { + err += fabs(res / val); + mean_res += fabs(res); + ++count; + } + } + + if (count != 0) + { + err /= count; + mean_res /= count; + } + if (verbose) std::cout << " Level " << std::setw(6) << ilevel << ", Error = " << err << std::endl; - music::dlog.Print("[mg] level %3d, residual %g, rel. error %g",ilevel, mean_res, err); - - maxerr = std::max(maxerr,err); - + music::dlog.Print("[mg] level %3d, residual %g, rel. error %g", ilevel, mean_res, err); + + maxerr = std::max(maxerr, err); } return maxerr; } -template< class S, class I, class O, typename T > -double solver::compute_RMS_resid( const GridHierarchy& uh, const GridHierarchy& fh, bool verbose ) +template +double solver::compute_RMS_resid(const GridHierarchy &uh, const GridHierarchy &fh, bool verbose) { - if( m_is_ini ) - m_residu_ini.assign( uh.levelmax()+1, 0.0 ); - - double maxerr=0.0; - - for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel ) + if (m_is_ini) + m_residu_ini.assign(uh.levelmax() + 1, 0.0); + + double maxerr = 0.0; + + for (unsigned ilevel = uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel) { - int - nx = uh.get_grid(ilevel)->size(0), - ny = uh.get_grid(ilevel)->size(1), - nz = uh.get_grid(ilevel)->size(2); - - double h = 1.0/(1<size(0), + ny = uh.get_grid(ilevel)->size(1), + nz = uh.get_grid(ilevel)->size(2); + + double h = 1.0 / (1 << ilevel), h2 = h * h; double sum = 0.0, sumd2 = 0.0; size_t count = 0; - - #pragma omp parallel for reduction(+:sum,sumd2,count) - for( int ix=0; ix maxerr ) + + if (m_is_ini) + m_residu_ini[ilevel] = sqrt(sum) / count; + + double err_abs = sqrt(sum / count); + double err_rel = err_abs / sqrt(sumd2 / count); + + if (verbose && !m_is_ini) + std::cout << " Level " << std::setw(6) << ilevel << ", Error = " << err_rel << std::endl; + + music::dlog.Print("[mg] level %3d, rms residual %g, rel. error %g", ilevel, err_abs, err_rel); + + if (err_rel > maxerr) maxerr = err_rel; - } - - if( m_is_ini ) + + if (m_is_ini) m_is_ini = false; - + return maxerr; } - -template< class S, class I, class O, typename T > -double solver::solve( GridHierarchy& uh, double acc, double h, bool verbose ) +template +double solver::solve(GridHierarchy &uh, double acc, double h, bool verbose) { double err, maxerr = 1e30; unsigned niter = 0; - + bool fullverbose = false; - + m_pu = &uh; - - //err = compute_RMS_resid( *m_pu, *m_pf, fullverbose ); - + + // err = compute_RMS_resid( *m_pu, *m_pf, fullverbose ); + //... iterate ...// while (true) { - + music::ulog.Print("Performing multi-grid V-cycle..."); - twoGrid( uh.levelmax() ); - - //err = compute_RMS_resid( *m_pu, *m_pf, fullverbose ); - err = compute_error( *m_pu, *m_pf, fullverbose ); + twoGrid(uh.levelmax()); + + // err = compute_RMS_resid( *m_pu, *m_pf, fullverbose ); + err = compute_error(*m_pu, *m_pf, fullverbose); ++niter; - - if( fullverbose ){ - music::ulog.Print(" multigrid iteration %3d, maximum RMS residual = %g", niter, err ); + + if (fullverbose) + { + music::ulog.Print(" multigrid iteration %3d, maximum RMS residual = %g", niter, err); std::cout << " - Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl; std::cout << " ---------------------------------------------------\n"; } - - if( err < maxerr ) + + if (err < maxerr) maxerr = err; - - if( (niter > 1) && ((err < acc) || (niter > 20)) ) + + if ((niter > 1) && ((err < acc) || (niter > 20))) break; - } - - if( err > acc ) - { - std::cout << "Error : no convergence in Poisson solver" << std::endl; - music::elog.Print("No convergence in Poisson solver, final error: %g.",err); - } - else if( verbose ) - { - std::cout << " - Converged in " << niter << " steps to " << maxerr << std::endl; - music::ulog.Print("Poisson solver converged to max. error of %g in %d steps.",err,niter); } - + if (err > acc) + { + std::cout << "Error : no convergence in Poisson solver" << std::endl; + music::elog.Print("No convergence in Poisson solver, final error: %g.", err); + } + else if (verbose) + { + std::cout << " - Converged in " << niter << " steps to " << maxerr << std::endl; + music::ulog.Print("Poisson solver converged to max. error of %g in %d steps.", err, niter); + } + //.. make sure that the RHS does not contain the FAS corrections any more - for( int i=m_pf->levelmax(); i>0; --i ) - m_gridop.restrict( *m_pf->get_grid(i), *m_pf->get_grid(i-1) ); - - + for (int i = m_pf->levelmax(); i > 0; --i) + m_gridop.restrict(*m_pf->get_grid(i), *m_pf->get_grid(i - 1)); + return err; } - - -//TODO: this only works for 2nd order! (but actually not needed) -template< class S, class I, class O, typename T > -void solver::setBC( unsigned ilevel ) +// TODO: this only works for 2nd order! (but actually not needed) +template +void solver::setBC(unsigned ilevel) { //... set only on level before additional refinement starts - if( ilevel == m_ilevelmin ) + if (ilevel == m_ilevelmin) { - MeshvarBnd *u = m_pu->get_grid(ilevel); + MeshvarBnd *u = m_pu->get_grid(ilevel); int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - - for( int iy=0; iysize(0), + ny = u->size(1), + nz = u->size(2); + + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) { - (*u)(-1,iy,iz) = 2.0*(*m_pubnd)(-1,iy,iz) - (*u)(0,iy,iz); - (*u)(nx,iy,iz) = 2.0*(*m_pubnd)(nx,iy,iz) - (*u)(nx-1,iy,iz);; + (*u)(-1, iy, iz) = 2.0 * (*m_pubnd)(-1, iy, iz) - (*u)(0, iy, iz); + (*u)(nx, iy, iz) = 2.0 * (*m_pubnd)(nx, iy, iz) - (*u)(nx - 1, iy, iz); + ; } - - for( int ix=0; ix -void solver::make_periodic( MeshvarBnd *u ) +template +void solver::make_periodic(MeshvarBnd *u) { - int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); int nb = u->m_nbnd; - - - //if( u->offset(0) == 0 ) - for( int iy=-nb; iyoffset(0) == 0 ) + for (int iy = -nb; iy < ny + nb; ++iy) + for (int iz = -nb; iz < nz + nb; ++iz) + { + int iiy((iy + ny) % ny), iiz((iz + nz) % nz); + + for (int i = -nb; i < 0; ++i) { - int iiy( (iy+ny)%ny ), iiz( (iz+nz)%nz ); - - for( int i=-nb; i<0; ++i ) - { - (*u)(i,iy,iz) = (*u)(nx+i,iiy,iiz); - (*u)(nx-1-i,iy,iz) = (*u)(-1-i,iiy,iiz); - } - + (*u)(i, iy, iz) = (*u)(nx + i, iiy, iiz); + (*u)(nx - 1 - i, iy, iz) = (*u)(-1 - i, iiy, iiz); } - - //if( u->offset(1) == 0 ) - for( int ix=-nb; ixoffset(1) == 0 ) + for (int ix = -nb; ix < nx + nb; ++ix) + for (int iz = -nb; iz < nz + nb; ++iz) + { + int iix((ix + nx) % nx), iiz((iz + nz) % nz); + + for (int i = -nb; i < 0; ++i) { - int iix( (ix+nx)%nx ), iiz( (iz+nz)%nz ); - - for( int i=-nb; i<0; ++i ) - { - (*u)(ix,i,iz) = (*u)(iix,ny+i,iiz); - (*u)(ix,ny-1-i,iz) = (*u)(iix,-1-i,iiz); - } + (*u)(ix, i, iz) = (*u)(iix, ny + i, iiz); + (*u)(ix, ny - 1 - i, iz) = (*u)(iix, -1 - i, iiz); } - - //if( u->offset(2) == 0 ) - for( int ix=-nb; ixoffset(2) == 0 ) + for (int ix = -nb; ix < nx + nb; ++ix) + for (int iy = -nb; iy < ny + nb; ++iy) + { + int iix((ix + nx) % nx), iiy((iy + ny) % ny); + + for (int i = -nb; i < 0; ++i) { - int iix( (ix+nx)%nx ), iiy( (iy+ny)%ny ); - - for( int i=-nb; i<0; ++i ) - { - (*u)(ix,iy,i) = (*u)(iix,iiy,nz+i); - (*u)(ix,iy,nz-1-i) = (*u)(iix,iiy,-1-i); - } + (*u)(ix, iy, i) = (*u)(iix, iiy, nz + i); + (*u)(ix, iy, nz - 1 - i) = (*u)(iix, iiy, -1 - i); } - + } } - END_MULTIGRID_NAMESPACE - -#endif + + diff --git a/src/plugins/output_enzo.cc b/src/plugins/output_enzo.cc index 2e0ae5d..9a7e0dd 100644 --- a/src/plugins/output_enzo.cc +++ b/src/plugins/output_enzo.cc @@ -230,19 +230,11 @@ protected: HDFCreateFile(filename); write_sim_header(filename, the_sim_header); -#ifdef SINGLE_PRECISION //... create full array in file - HDFHyperslabWriter3Ds *slab_writer = new HDFHyperslabWriter3Ds(filename, enzoname, nsz); + HDFHyperslabWriter3Ds *slab_writer = new HDFHyperslabWriter3Ds(filename, enzoname, nsz); //... create buffer - float *data_buf = new float[slices_in_slab * (size_t)ng[0] * (size_t)ng[1]]; -#else - //... create full array in file - HDFHyperslabWriter3Ds *slab_writer = new HDFHyperslabWriter3Ds(filename, enzoname, nsz); - - //... create buffer - double *data_buf = new double[slices_in_slab * (size_t)ng[0] * (size_t)ng[1]]; -#endif + real_t *data_buf = new real_t[slices_in_slab * (size_t)ng[0] * (size_t)ng[1]]; //... write slice by slice size_t slices_written = 0; diff --git a/src/plugins/output_gadget2.cc b/src/plugins/output_gadget2.cc index 32da217..866548b 100644 --- a/src/plugins/output_gadget2.cc +++ b/src/plugins/output_gadget2.cc @@ -1390,7 +1390,5 @@ public: namespace { output_plugin_creator_concrete> creator1("gadget2"); -#ifndef SINGLE_PRECISION output_plugin_creator_concrete> creator2("gadget2_double"); -#endif } diff --git a/src/plugins/output_gadget2_2comp.cc b/src/plugins/output_gadget2_2comp.cc index 1a7ccc4..327f81e 100644 --- a/src/plugins/output_gadget2_2comp.cc +++ b/src/plugins/output_gadget2_2comp.cc @@ -1,11 +1,11 @@ /* - + output_gadget2.cc - This file is part of MUSIC - - a code to generate multi-scale initial conditions - for cosmological simulations - + a code to generate multi-scale initial conditions + for cosmological simulations + Copyright (C) 2010 Oliver Hahn - + */ #include @@ -14,1676 +14,1622 @@ #include "mg_interp.hh" #include "mesh.hh" -template< typename T_store=float > +template class gadget2_2comp_output_plugin : public output_plugin { protected: - std::ofstream ofs_; bool bmultimass_; - - + typedef struct io_header { - unsigned int npart[6]; - double mass[6]; - double time; - double redshift; - int flag_sfr; - int flag_feedback; - unsigned int npartTotal[6]; - int flag_cooling; - int num_files; - double BoxSize; - double Omega0; - double OmegaLambda; - double HubbleParam; - int flag_stellarage; - int flag_metals; - unsigned int npartTotalHighWord[6]; - int flag_entropy_instead_u; - int flag_doubleprecision; - int flag_ic_info; - char fill[52]; - }header; - - + unsigned int npart[6]; + double mass[6]; + double time; + double redshift; + int flag_sfr; + int flag_feedback; + unsigned int npartTotal[6]; + int flag_cooling; + int num_files; + double BoxSize; + double Omega0; + double OmegaLambda; + double HubbleParam; + int flag_stellarage; + int flag_metals; + unsigned int npartTotalHighWord[6]; + int flag_entropy_instead_u; + int flag_doubleprecision; + int flag_ic_info; + char fill[52]; + } header; + header header_; - + std::string fname; - + bool do_glass_; std::string fname_glass_baryon_, fname_glass_cdm_; - - enum iofields { - id_dm_mass, id_dm_vel, id_dm_pos, id_gas_vel, id_gas_rho, id_gas_temp, id_gas_pos + + enum iofields + { + id_dm_mass, + id_dm_vel, + id_dm_pos, + id_gas_vel, + id_gas_rho, + id_gas_temp, + id_gas_pos }; - + size_t np_fine_gas_, np_fine_dm_, np_coarse_dm_; - + size_t block_buf_size_; unsigned long long npartmax_; unsigned nfiles_; - - //bool bbndparticles_; + + // bool bbndparticles_; bool bmorethan2bnd_; bool kpcunits_; double YHe_; - - void distribute_particles( unsigned nfiles, size_t nfine_dm, size_t nfine_gas, size_t ncoarse, - std::vector& nfdm_pf, std::vector& nfgas_pf, std::vector& nc_pf ) - { - nfdm_pf.assign( nfiles, 0 ); - nfgas_pf.assign( nfiles, 0 ); - nc_pf.assign( nfiles, 0 ); - - size_t ntotal = nfine_dm + nfine_gas + ncoarse; - size_t nnominal = (size_t)((double)ntotal/(double)nfiles); - - size_t nf_dm_assigned = 0, nf_gas_assigned = 0, nc_assigned = 0; - - for( unsigned i=0; i 0 ) - { - nfdm_pf[i] = std::min( nnominal/2ul, nfine_dm-nf_dm_assigned ); - nf_dm_assigned += nfdm_pf[i]; - nfgas_pf[i] = std::min( nnominal/2ul, nfine_gas-nf_gas_assigned ); - nf_gas_assigned += nfgas_pf[i]; - - }else{ - nfdm_pf[i] = std::min( nnominal, nfine_dm-nf_dm_assigned ); - nf_dm_assigned += nfdm_pf[i]; - } - - // once all fine particles are assigned, start with the coarse - if( nf_dm_assigned+nf_gas_assigned == nfine_dm+nfine_gas ) - { - nc_pf[i] = std::min( nnominal-(size_t)(nfdm_pf[i]+nfgas_pf[i]), ncoarse-nc_assigned ); - nc_assigned += nc_pf[i]; - } - - } - - // make sure all particles are assigned - nfdm_pf[ nfiles-1 ] += nfine_dm-nf_dm_assigned; - nfgas_pf[ nfiles-1 ] += nfine_gas-nf_gas_assigned; - nc_pf[ nfiles-1 ] += ncoarse-nc_assigned; - - } - - std::ifstream& open_and_check( std::string ffname, size_t npart ) + + void distribute_particles(unsigned nfiles, size_t nfine_dm, size_t nfine_gas, size_t ncoarse, + std::vector &nfdm_pf, std::vector &nfgas_pf, std::vector &nc_pf) { - std::ifstream ifs( ffname.c_str(), std::ios::binary ); - unsigned long long blk, expected; - ifs.read( (char*)&blk, sizeof(unsigned long long) ); - expected = ((unsigned long long) npart*(unsigned long long)sizeof(T_store)); - if( blk != expected ) - { - music::elog.Print("Internal consistency error in gadget2 output plug-in, open_and_check"); - music::elog.Print("Expected %d particles (%lld bytes) in temp file %s but found %lld",npart, expected ,ffname.c_str(), blk); - //throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + nfdm_pf.assign(nfiles, 0); + nfgas_pf.assign(nfiles, 0); + nc_pf.assign(nfiles, 0); + + size_t ntotal = nfine_dm + nfine_gas + ncoarse; + size_t nnominal = (size_t)((double)ntotal / (double)nfiles); + + size_t nf_dm_assigned = 0, nf_gas_assigned = 0, nc_assigned = 0; + + for (unsigned i = 0; i < nfiles; ++i) + { + if (nfine_gas > 0) + { + nfdm_pf[i] = std::min(nnominal / 2ul, nfine_dm - nf_dm_assigned); + nf_dm_assigned += nfdm_pf[i]; + nfgas_pf[i] = std::min(nnominal / 2ul, nfine_gas - nf_gas_assigned); + nf_gas_assigned += nfgas_pf[i]; + } + else + { + nfdm_pf[i] = std::min(nnominal, nfine_dm - nf_dm_assigned); + nf_dm_assigned += nfdm_pf[i]; + } + + // once all fine particles are assigned, start with the coarse + if (nf_dm_assigned + nf_gas_assigned == nfine_dm + nfine_gas) + { + nc_pf[i] = std::min(nnominal - (size_t)(nfdm_pf[i] + nfgas_pf[i]), ncoarse - nc_assigned); + nc_assigned += nc_pf[i]; + } } - + + // make sure all particles are assigned + nfdm_pf[nfiles - 1] += nfine_dm - nf_dm_assigned; + nfgas_pf[nfiles - 1] += nfine_gas - nf_gas_assigned; + nc_pf[nfiles - 1] += ncoarse - nc_assigned; + } + + std::ifstream &open_and_check(std::string ffname, size_t npart) + { + std::ifstream ifs(ffname.c_str(), std::ios::binary); + unsigned long long blk, expected; + ifs.read((char *)&blk, sizeof(unsigned long long)); + expected = ((unsigned long long)npart * (unsigned long long)sizeof(T_store)); + if (blk != expected) + { + music::elog.Print("Internal consistency error in gadget2 output plug-in, open_and_check"); + music::elog.Print("Expected %d particles (%lld bytes) in temp file %s but found %lld", npart, expected, ffname.c_str(), blk); + // throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); + } + return ifs; } - + class pistream : public std::ifstream { public: - pistream (std::string fname, size_t npart, size_t offset=0 ) - : std::ifstream( fname.c_str(), std::ios::binary ) + pistream(std::string fname, size_t npart, size_t offset = 0) + : std::ifstream(fname.c_str(), std::ios::binary) { size_t blk; - - if( !this->good() ) - { + + if (!this->good()) + { music::elog.Print("Could not open buffer file in gadget2 output plug-in"); throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); } - - this->read( (char*)&blk, sizeof(size_t) ); - - if( blk != npart*sizeof(T_store) ) - { + + this->read((char *)&blk, sizeof(size_t)); + + if (blk != npart * sizeof(T_store)) + { music::elog.Print("Internal consistency error in gadget2 output plug-in"); - music::elog.Print("Expected %ld bytes in temp file but found %ld",npart*sizeof(T_store),blk); + music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); } - - this->seekg( offset+sizeof(size_t), std::ios::beg ); + + this->seekg(offset + sizeof(size_t), std::ios::beg); } - - pistream () + + pistream() { - } - - void open(std::string fname, size_t npart, size_t offset=0 ) + + void open(std::string fname, size_t npart, size_t offset = 0) { - std::ifstream::open( fname.c_str(), std::ios::binary ); + std::ifstream::open(fname.c_str(), std::ios::binary); size_t blk; - - if( !this->good() ) - { - music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in",fname.c_str()); + + if (!this->good()) + { + music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); } - - this->read( (char*)&blk, sizeof(size_t) ); - - if( blk != npart*sizeof(T_store) ) - { + + this->read((char *)&blk, sizeof(size_t)); + + if (blk != npart * sizeof(T_store)) + { music::elog.Print("Internal consistency error in gadget2 output plug-in"); - music::elog.Print("Expected %ld bytes in temp file but found %ld",npart*sizeof(T_store),blk); + music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); } - - this->seekg( offset+sizeof(size_t), std::ios::beg ); + + this->seekg(offset + sizeof(size_t), std::ios::beg); } }; - - class postream : public std::fstream + + class postream : public std::fstream { public: - postream (std::string fname, size_t npart, size_t offset=0 ) - : std::fstream( fname.c_str(), std::ios::binary|std::ios::in|std::ios::out ) + postream(std::string fname, size_t npart, size_t offset = 0) + : std::fstream(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out) { size_t blk; - - if( !this->good() ) - { + + if (!this->good()) + { music::elog.Print("Could not open buffer file in gadget2 output plug-in"); throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); } - - this->read( (char*)&blk, sizeof(size_t) ); - - if( blk != npart*sizeof(T_store) ) - { + + this->read((char *)&blk, sizeof(size_t)); + + if (blk != npart * sizeof(T_store)) + { music::elog.Print("Internal consistency error in gadget2 output plug-in"); - music::elog.Print("Expected %ld bytes in temp file but found %ld",npart*sizeof(T_store),blk); + music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); } - - this->seekg( offset, std::ios::cur ); - this->seekp( offset+sizeof(size_t), std::ios::beg ); + + this->seekg(offset, std::ios::cur); + this->seekp(offset + sizeof(size_t), std::ios::beg); } - - postream () + + postream() { - } - - void open(std::string fname, size_t npart, size_t offset=0 ) + + void open(std::string fname, size_t npart, size_t offset = 0) { - if( is_open() ) - this->close(); - - std::fstream::open( fname.c_str(), std::ios::binary|std::ios::in|std::ios::out ); + if (is_open()) + this->close(); + + std::fstream::open(fname.c_str(), std::ios::binary | std::ios::in | std::ios::out); size_t blk; - - if( !this->good() ) - { - music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in",fname.c_str()); + + if (!this->good()) + { + music::elog.Print("Could not open buffer file \'%s\' in gadget2 output plug-in", fname.c_str()); throw std::runtime_error("Could not open buffer file in gadget2 output plug-in"); } - - this->read( (char*)&blk, sizeof(size_t) ); - - if( blk != npart*sizeof(T_store) ) - { + + this->read((char *)&blk, sizeof(size_t)); + + if (blk != npart * sizeof(T_store)) + { music::elog.Print("Internal consistency error in gadget2 output plug-in"); - music::elog.Print("Expected %ld bytes in temp file but found %ld",npart*sizeof(T_store),blk); + music::elog.Print("Expected %ld bytes in temp file but found %ld", npart * sizeof(T_store), blk); throw std::runtime_error("Internal consistency error in gadget2 output plug-in"); } - - this->seekg( offset, std::ios::cur ); - this->seekp( offset+sizeof(size_t), std::ios::beg ); + + this->seekg(offset, std::ios::cur); + this->seekp(offset + sizeof(size_t), std::ios::beg); } }; - - void assemble_gadget_file( void ) + + void assemble_gadget_file(void) { - - + //............................................................................ //... copy from the temporary files, interleave the data and save ............ - - char fnx[256],fny[256],fnz[256],fnvx[256],fnvy[256],fnvz[256],fnm[256]; - char fnbx[256], fnby[256], fnbz[256], fnbvx[256], fnbvy[256], fnbvz[256]; - - sprintf( fnx, "___ic_temp_%05d.bin", 100*id_dm_pos+0 ); - sprintf( fny, "___ic_temp_%05d.bin", 100*id_dm_pos+1 ); - sprintf( fnz, "___ic_temp_%05d.bin", 100*id_dm_pos+2 ); - sprintf( fnvx, "___ic_temp_%05d.bin", 100*id_dm_vel+0 ); - sprintf( fnvy, "___ic_temp_%05d.bin", 100*id_dm_vel+1 ); - sprintf( fnvz, "___ic_temp_%05d.bin", 100*id_dm_vel+2 ); - sprintf( fnm, "___ic_temp_%05d.bin", 100*id_dm_mass ); - sprintf( fnbx, "___ic_temp_%05d.bin", 100*id_gas_pos+0 ); - sprintf( fnby, "___ic_temp_%05d.bin", 100*id_gas_pos+1 ); - sprintf( fnbz, "___ic_temp_%05d.bin", 100*id_gas_pos+2 ); - sprintf( fnbvx, "___ic_temp_%05d.bin", 100*id_gas_vel+0 ); - sprintf( fnbvy, "___ic_temp_%05d.bin", 100*id_gas_vel+1 ); - sprintf( fnbvz, "___ic_temp_%05d.bin", 100*id_gas_vel+2 ); + char fnx[256], fny[256], fnz[256], fnvx[256], fnvy[256], fnvz[256], fnm[256]; + char fnbx[256], fnby[256], fnbz[256], fnbvx[256], fnbvy[256], fnbvz[256]; + + sprintf(fnx, "___ic_temp_%05d.bin", 100 * id_dm_pos + 0); + sprintf(fny, "___ic_temp_%05d.bin", 100 * id_dm_pos + 1); + sprintf(fnz, "___ic_temp_%05d.bin", 100 * id_dm_pos + 2); + sprintf(fnvx, "___ic_temp_%05d.bin", 100 * id_dm_vel + 0); + sprintf(fnvy, "___ic_temp_%05d.bin", 100 * id_dm_vel + 1); + sprintf(fnvz, "___ic_temp_%05d.bin", 100 * id_dm_vel + 2); + sprintf(fnm, "___ic_temp_%05d.bin", 100 * id_dm_mass); + + sprintf(fnbx, "___ic_temp_%05d.bin", 100 * id_gas_pos + 0); + sprintf(fnby, "___ic_temp_%05d.bin", 100 * id_gas_pos + 1); + sprintf(fnbz, "___ic_temp_%05d.bin", 100 * id_gas_pos + 2); + sprintf(fnbvx, "___ic_temp_%05d.bin", 100 * id_gas_vel + 0); + sprintf(fnbvy, "___ic_temp_%05d.bin", 100 * id_gas_vel + 1); + sprintf(fnbvz, "___ic_temp_%05d.bin", 100 * id_gas_vel + 2); pistream iffs1, iffs2, iffs3; - - /*const size_t + + /*const size_t nptot = header_.npart[1]+header_.npart[2]+header_.npart[5], npgas = header_.npart[2], npcdm = nptot-npgas;*/ - - const size_t - nptot = np_fine_gas_+np_fine_dm_+np_coarse_dm_, - //npgas = np_fine_gas_, - npcdm = np_fine_dm_+np_coarse_dm_; - + + const size_t + nptot = np_fine_gas_ + np_fine_dm_ + np_coarse_dm_, + // npgas = np_fine_gas_, + npcdm = np_fine_dm_ + np_coarse_dm_; + size_t - wrote_coarse = 0, - wrote_gas = 0, - wrote_dm = 0; - + wrote_coarse = 0, + wrote_gas = 0, + wrote_dm = 0; + size_t - npleft = nptot, - n2read = std::min(block_buf_size_,npleft); - - if( header_.npart[5] > 0 ) + npleft = nptot, + n2read = std::min(block_buf_size_, npleft); + + if (header_.npart[5] > 0) music::elog.Print("Multi-resolution setup not supported for 2comp hack"); - + std::cout << " - Writing " << nptot << " particles to Gadget file...\n" - << " type 1 : " << header_.npart[1] << "\n" - << " type 2 : " << header_.npart[2] << "\n" - << " type 5 : " << header_.npart[5] << "\n"; - + << " type 1 : " << header_.npart[1] << "\n" + << " type 2 : " << header_.npart[2] << "\n" + << " type 5 : " << header_.npart[5] << "\n"; + bool bbaryons = np_fine_gas_ > 0; - + std::vector adata3; - adata3.reserve( 3*block_buf_size_ ); + adata3.reserve(3 * block_buf_size_); T_store *tmp1, *tmp2, *tmp3; - + tmp1 = new T_store[block_buf_size_]; tmp2 = new T_store[block_buf_size_]; tmp3 = new T_store[block_buf_size_]; - + std::vector nfdm_per_file, nfgas_per_file, nc_per_file; - distribute_particles( nfiles_, np_fine_dm_, np_fine_gas_, np_coarse_dm_, - nfdm_per_file, nfgas_per_file, nc_per_file ); - - - if( nfiles_ > 1 ) + distribute_particles(nfiles_, np_fine_dm_, np_fine_gas_, np_coarse_dm_, + nfdm_per_file, nfgas_per_file, nc_per_file); + + if (nfiles_ > 1) { std::cout << " - Gadget2 : distributing particles to " << nfiles_ << " files\n" - << " " << std::setw(12) << "type 1" << "," << std::setw(12) << "type 2" << "," << std::setw(12) << "type 5" << std::endl; - for( unsigned i=0; i= 1ul<<32 ) - { - bneed_long_ids = true; - music::wlog.Print("Need long particle IDs, make sure to enable in Gadget!"); - } - - - for( unsigned ifile=0; ifile 1 ) + + size_t idcount = 0; + bool bneed_long_ids = false; + if (nptot >= 1ul << 32) + { + bneed_long_ids = true; + music::wlog.Print("Need long particle IDs, make sure to enable in Gadget!"); + } + + for (unsigned ifile = 0; ifile < nfiles_; ++ifile) + { + + if (nfiles_ > 1) { char ffname[256]; - sprintf(ffname,"%s.%d",fname_.c_str(), ifile); - ofs_.open(ffname, std::ios::binary|std::ios::trunc ); - }else{ - ofs_.open(fname_.c_str(), std::ios::binary|std::ios::trunc ); + sprintf(ffname, "%s.%d", fname_.c_str(), ifile); + ofs_.open(ffname, std::ios::binary | std::ios::trunc); } - - + else + { + ofs_.open(fname_.c_str(), std::ios::binary | std::ios::trunc); + } + size_t np_this_file = nfgas_per_file[ifile] + nfdm_per_file[ifile] + nc_per_file[ifile]; - + int blksize = sizeof(header); - + //... write the header ....................................................... - - header this_header( header_ ); - this_header.npart[1] = nfdm_per_file[ifile]; - this_header.npart[2] = nfgas_per_file[ifile]; - this_header.npart[5] = nc_per_file[ifile]; - - - ofs_.write( (char *)&blksize, sizeof(int) ); - ofs_.write( (char *)&this_header, sizeof(header) ); - ofs_.write( (char *)&blksize, sizeof(int) ); - - + + header this_header(header_); + this_header.npart[1] = nfdm_per_file[ifile]; + this_header.npart[2] = nfgas_per_file[ifile]; + this_header.npart[5] = nc_per_file[ifile]; + + ofs_.write((char *)&blksize, sizeof(int)); + ofs_.write((char *)&this_header, sizeof(header)); + ofs_.write((char *)&blksize, sizeof(int)); + //... particle positions .................................................. - blksize = 3ul*np_this_file*sizeof(T_store); - ofs_.write( (char *)&blksize, sizeof(int) ); - - npleft = nfdm_per_file[ifile];//+nc_per_file[ifile]; - n2read = std::min(curr_block_buf_size,npleft); - - iffs1.open( fnx, npcdm, wrote_dm*sizeof(T_store) ); - iffs2.open( fny, npcdm, wrote_dm*sizeof(T_store) ); - iffs3.open( fnz, npcdm, wrote_dm*sizeof(T_store) ); - - while( n2read > 0ul ) + blksize = 3ul * np_this_file * sizeof(T_store); + ofs_.write((char *)&blksize, sizeof(int)); + + npleft = nfdm_per_file[ifile]; //+nc_per_file[ifile]; + n2read = std::min(curr_block_buf_size, npleft); + + iffs1.open(fnx, npcdm, wrote_dm * sizeof(T_store)); + iffs2.open(fny, npcdm, wrote_dm * sizeof(T_store)); + iffs3.open(fnz, npcdm, wrote_dm * sizeof(T_store)); + + while (n2read > 0ul) { - iffs1.read( reinterpret_cast(&tmp1[0]), n2read*sizeof(T_store) ); - iffs2.read( reinterpret_cast(&tmp2[0]), n2read*sizeof(T_store) ); - iffs3.read( reinterpret_cast(&tmp3[0]), n2read*sizeof(T_store) ); - - for( size_t i=0; i(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) { - adata3.push_back( fmod(tmp1[i]+header_.BoxSize,header_.BoxSize) ); - adata3.push_back( fmod(tmp2[i]+header_.BoxSize,header_.BoxSize) ); - adata3.push_back( fmod(tmp3[i]+header_.BoxSize,header_.BoxSize) ); + adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize)); + adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize)); + adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize)); } - ofs_.write( reinterpret_cast(&adata3[0]), 3*n2read*sizeof(T_store) ); - + ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); + adata3.clear(); npleft -= n2read; - n2read = std::min( curr_block_buf_size,npleft ); + n2read = std::min(curr_block_buf_size, npleft); } - + iffs1.close(); iffs2.close(); iffs3.close(); - - if( bbaryons && nfgas_per_file[ifile] > 0ul ) + + if (bbaryons && nfgas_per_file[ifile] > 0ul) { - - iffs1.open( fnbx, npcdm, wrote_gas*sizeof(T_store) ); - iffs2.open( fnby, npcdm, wrote_gas*sizeof(T_store) ); - iffs3.open( fnbz, npcdm, wrote_gas*sizeof(T_store) ); - + + iffs1.open(fnbx, npcdm, wrote_gas * sizeof(T_store)); + iffs2.open(fnby, npcdm, wrote_gas * sizeof(T_store)); + iffs3.open(fnbz, npcdm, wrote_gas * sizeof(T_store)); + npleft = nfgas_per_file[ifile]; - n2read = std::min(curr_block_buf_size,npleft); - while( n2read > 0ul ) + n2read = std::min(curr_block_buf_size, npleft); + while (n2read > 0ul) { - iffs1.read( reinterpret_cast(&tmp1[0]), n2read*sizeof(T_store) ); - iffs2.read( reinterpret_cast(&tmp2[0]), n2read*sizeof(T_store) ); - iffs3.read( reinterpret_cast(&tmp3[0]), n2read*sizeof(T_store) ); - - for( size_t i=0; i(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) { - adata3.push_back( fmod(tmp1[i]+header_.BoxSize,header_.BoxSize) ); - adata3.push_back( fmod(tmp2[i]+header_.BoxSize,header_.BoxSize) ); - adata3.push_back( fmod(tmp3[i]+header_.BoxSize,header_.BoxSize) ); + adata3.push_back(fmod(tmp1[i] + header_.BoxSize, header_.BoxSize)); + adata3.push_back(fmod(tmp2[i] + header_.BoxSize, header_.BoxSize)); + adata3.push_back(fmod(tmp3[i] + header_.BoxSize, header_.BoxSize)); } - ofs_.write( reinterpret_cast(&adata3[0]), 3*n2read*sizeof(T_store) ); - + ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); + adata3.clear(); npleft -= n2read; - n2read = std::min( curr_block_buf_size,npleft ); + n2read = std::min(curr_block_buf_size, npleft); } iffs1.close(); iffs2.close(); iffs3.close(); - - } - - ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); - - - - + + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + //... particle velocities .................................................. - blksize = 3ul*np_this_file*sizeof(T_store); - ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); - - iffs1.open( fnvx, npcdm, wrote_dm*sizeof(T_store) ); - iffs2.open( fnvy, npcdm, wrote_dm*sizeof(T_store) ); - iffs3.open( fnvz, npcdm, wrote_dm*sizeof(T_store) ); - - npleft = nfdm_per_file[ifile];//+nc_per_file[ifile]; - n2read = std::min(curr_block_buf_size,npleft); - while( n2read > 0ul ) + blksize = 3ul * np_this_file * sizeof(T_store); + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + + iffs1.open(fnvx, npcdm, wrote_dm * sizeof(T_store)); + iffs2.open(fnvy, npcdm, wrote_dm * sizeof(T_store)); + iffs3.open(fnvz, npcdm, wrote_dm * sizeof(T_store)); + + npleft = nfdm_per_file[ifile]; //+nc_per_file[ifile]; + n2read = std::min(curr_block_buf_size, npleft); + while (n2read > 0ul) { - iffs1.read( reinterpret_cast(&tmp1[0]), n2read*sizeof(T_store) ); - iffs2.read( reinterpret_cast(&tmp2[0]), n2read*sizeof(T_store) ); - iffs3.read( reinterpret_cast(&tmp3[0]), n2read*sizeof(T_store) ); - - for( size_t i=0; i(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) { - adata3.push_back( tmp1[i] ); - adata3.push_back( tmp2[i] ); - adata3.push_back( tmp3[i] ); + adata3.push_back(tmp1[i]); + adata3.push_back(tmp2[i]); + adata3.push_back(tmp3[i]); } - - ofs_.write( reinterpret_cast(&adata3[0]), 3*n2read*sizeof(T_store) ); - + + ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); + adata3.clear(); npleft -= n2read; - n2read = std::min( curr_block_buf_size,npleft ); + n2read = std::min(curr_block_buf_size, npleft); } - + iffs1.close(); iffs2.close(); iffs3.close(); - - if( bbaryons && nfgas_per_file[ifile] > 0ul ) + + if (bbaryons && nfgas_per_file[ifile] > 0ul) { - iffs1.open( fnbvx, npcdm, wrote_gas*sizeof(T_store) ); - iffs2.open( fnbvy, npcdm, wrote_gas*sizeof(T_store) ); - iffs3.open( fnbvz, npcdm, wrote_gas*sizeof(T_store) ); - + iffs1.open(fnbvx, npcdm, wrote_gas * sizeof(T_store)); + iffs2.open(fnbvy, npcdm, wrote_gas * sizeof(T_store)); + iffs3.open(fnbvz, npcdm, wrote_gas * sizeof(T_store)); + npleft = nfgas_per_file[ifile]; - n2read = std::min(curr_block_buf_size,npleft); - while( n2read > 0ul ) + n2read = std::min(curr_block_buf_size, npleft); + while (n2read > 0ul) { - iffs1.read( reinterpret_cast(&tmp1[0]), n2read*sizeof(T_store) ); - iffs2.read( reinterpret_cast(&tmp2[0]), n2read*sizeof(T_store) ); - iffs3.read( reinterpret_cast(&tmp3[0]), n2read*sizeof(T_store) ); - - for( size_t i=0; i(&tmp1[0]), n2read * sizeof(T_store)); + iffs2.read(reinterpret_cast(&tmp2[0]), n2read * sizeof(T_store)); + iffs3.read(reinterpret_cast(&tmp3[0]), n2read * sizeof(T_store)); + + for (size_t i = 0; i < n2read; ++i) { - adata3.push_back( tmp1[i] ); - adata3.push_back( tmp2[i] ); - adata3.push_back( tmp3[i] ); + adata3.push_back(tmp1[i]); + adata3.push_back(tmp2[i]); + adata3.push_back(tmp3[i]); } - - ofs_.write( reinterpret_cast(&adata3[0]), 3*n2read*sizeof(T_store) ); - + + ofs_.write(reinterpret_cast(&adata3[0]), 3 * n2read * sizeof(T_store)); + adata3.clear(); npleft -= n2read; - n2read = std::min( curr_block_buf_size,npleft ); + n2read = std::min(curr_block_buf_size, npleft); } - + iffs1.close(); iffs2.close(); iffs3.close(); - - } - - ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); - + + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + //... particle IDs .......................................................... std::vector short_ids; - std::vector long_ids; - - if( bneed_long_ids ) - long_ids.assign(curr_block_buf_size,0); + std::vector long_ids; + + if (bneed_long_ids) + long_ids.assign(curr_block_buf_size, 0); else - short_ids.assign(curr_block_buf_size,0); - - npleft = np_this_file; - n2read = std::min(curr_block_buf_size,npleft); - blksize = sizeof(unsigned)*np_this_file; - - if( bneed_long_ids ) - blksize = sizeof(size_t)*np_this_file; - - + short_ids.assign(curr_block_buf_size, 0); + + npleft = np_this_file; + n2read = std::min(curr_block_buf_size, npleft); + blksize = sizeof(unsigned) * np_this_file; + + if (bneed_long_ids) + blksize = sizeof(size_t) * np_this_file; + //... generate contiguous IDs and store in file .. - ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); - while( n2read > 0ul ) + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + while (n2read > 0ul) { - if( bneed_long_ids ) - { - for( size_t i=0; i(&long_ids[0]), n2read*sizeof(size_t) ); - }else{ - for( size_t i=0; i(&long_ids[0]), n2read * sizeof(size_t)); + } + else + { + for (size_t i = 0; i < n2read; ++i) short_ids[i] = idcount++; - ofs_.write( reinterpret_cast(&short_ids[0]), n2read*sizeof(unsigned) ); - } - npleft -= n2read; - n2read = std::min( curr_block_buf_size,npleft ); + ofs_.write(reinterpret_cast(&short_ids[0]), n2read * sizeof(unsigned)); + } + npleft -= n2read; + n2read = std::min(curr_block_buf_size, npleft); } - ofs_.write( reinterpret_cast(&blksize), sizeof(int) ); - - std::vector().swap( short_ids ); - std::vector().swap( long_ids ); - - + ofs_.write(reinterpret_cast(&blksize), sizeof(int)); + + std::vector().swap(short_ids); + std::vector().swap(long_ids); + //... particle masses ....................................................... // multi-mass not supported here - - + ofs_.flush(); ofs_.close(); - - wrote_gas += nfgas_per_file[ifile]; - wrote_dm += nfdm_per_file[ifile] + nc_per_file[ifile]; - wrote_coarse += nc_per_file[ifile]; - - + + wrote_gas += nfgas_per_file[ifile]; + wrote_dm += nfdm_per_file[ifile] + nc_per_file[ifile]; + wrote_coarse += nc_per_file[ifile]; } - - delete[] tmp1; + + delete[] tmp1; delete[] tmp2; - delete[] tmp3; - - remove( fnbx ); - remove( fnby ); - remove( fnbz ); - remove( fnx ); - remove( fny ); - remove( fnz ); - remove( fnbvx ); - remove( fnbvy ); - remove( fnbvz ); - remove( fnvx ); - remove( fnvy ); - remove( fnvz ); - remove( fnm ); + delete[] tmp3; + + remove(fnbx); + remove(fnby); + remove(fnbz); + remove(fnx); + remove(fny); + remove(fnz); + remove(fnbvx); + remove(fnbvy); + remove(fnbvz); + remove(fnvx); + remove(fnvy); + remove(fnvz); + remove(fnm); } - - void get_cic_displacement( size_t icoord, const float* ppos, size_t np, float l, const grid_hierarchy& gh, T_store* valp ) + + void get_cic_displacement(size_t icoord, const float *ppos, size_t np, float l, const grid_hierarchy &gh, T_store *valp) { size_t N = gh.size(gh.levelmax(), 0); - - float facconv = 1.f / l * (float)N/(float)(1ul<("output","gadget_blksize",2*1048576); - + block_buf_size_ = cf_.get_value_safe("output", "gadget_blksize", 2 * 1048576); + //... ensure that everyone knows we want to do SPH - cf.insert_value("setup","do_SPH","yes"); - - //bbndparticles_ = !cf_.get_value_safe("output","gadget_nobndpart",false); - npartmax_ = 1<<30; - - nfiles_ = cf.get_value_safe("output","gadget_num_files",1); - - - + cf.insert_value("setup", "do_SPH", "yes"); + + // bbndparticles_ = !cf_.get_value_safe("output","gadget_nobndpart",false); + npartmax_ = 1 << 30; + + nfiles_ = cf.get_value_safe("output", "gadget_num_files", 1); + /****************************************/ - if (nfiles_ > 1 ) + if (nfiles_ > 1) { - for( unsigned ifile=0; ifile levelmin_ +1) + if (levelmax_ > levelmin_ + 1) bmorethan2bnd_ = true; bmultimass_ = true; - if( levelmax_ == levelmin_ ) + if (levelmax_ == levelmin_) bmultimass_ = false; - - - for( int i=0; i<6; ++i ) + + for (int i = 0; i < 6; ++i) { header_.npart[i] = 0; header_.npartTotal[i] = 0; header_.npartTotalHighWord[i] = 0; header_.mass[i] = 0.0; } - - YHe_ = cf.get_value_safe("cosmology","YHe",0.248); - gamma_ = cf.get_value_safe("cosmology","gamma",5.0/3.0); - - do_baryons_ = cf.get_value_safe("setup","baryons",false); - omegab_ = cf.get_value_safe("cosmology","Omega_b",0.045); - + + YHe_ = cf.get_value_safe("cosmology", "YHe", 0.248); + gamma_ = cf.get_value_safe("cosmology", "gamma", 5.0 / 3.0); + + do_baryons_ = cf.get_value_safe("setup", "baryons", false); + omegab_ = cf.get_value_safe("cosmology", "Omega_b", 0.045); + //... write displacements in kpc/h rather than Mpc/h? - kpcunits_ = cf.get_value_safe("output","gadget_usekpc",false); - - do_glass_ = cf.get_value_safe("output","glass", false); - if( do_glass_ ) + kpcunits_ = cf.get_value_safe("output", "gadget_usekpc", false); + + do_glass_ = cf.get_value_safe("output", "glass", false); + if (do_glass_) { music::ilog.Print("Will use provided glass rather than Cartesian mesh for particle placement."); - - fname_glass_cdm_ = cf.get_value("output","glass_file_cdm"); - - if( do_baryons_ ) - fname_glass_baryon_ = fname_glass_cdm_;//cf.get_value("output","glass_file_baryon"); + + fname_glass_cdm_ = cf.get_value("output", "glass_file_cdm"); + + if (do_baryons_) + fname_glass_baryon_ = fname_glass_cdm_; // cf.get_value("output","glass_file_baryon"); } - - + //... set time ...................................................... - header_.redshift = cf.get_value("setup","zstart"); - header_.time = 1.0/(1.0+header_.redshift); - + header_.redshift = cf.get_value("setup", "zstart"); + header_.time = 1.0 / (1.0 + header_.redshift); + //... SF flags header_.flag_sfr = 0; header_.flag_feedback = 0; header_.flag_cooling = 0; - - //... + + //... header_.num_files = nfiles_; - header_.BoxSize = cf.get_value("setup","boxlength"); - header_.Omega0 = cf.get_value("cosmology","Omega_m"); - omegam_ = header_.Omega0; - omegac_ = omegam_ - omegab_; - - header_.OmegaLambda = cf.get_value("cosmology","Omega_L"); - header_.HubbleParam = cf.get_value("cosmology","H0"); - + header_.BoxSize = cf.get_value("setup", "boxlength"); + header_.Omega0 = cf.get_value("cosmology", "Omega_m"); + omegam_ = header_.Omega0; + omegac_ = omegam_ - omegab_; + + header_.OmegaLambda = cf.get_value("cosmology", "Omega_L"); + header_.HubbleParam = cf.get_value("cosmology", "H0"); + header_.flag_stellarage = 0; header_.flag_metals = 0; - - + header_.flag_entropy_instead_u = 0; -#ifdef SINGLE_PRECISION - header_.flag_doubleprecision = 0; -#else - header_.flag_doubleprecision = 1; -#endif - std::cout << "header_.flag_doubleprecision " << header_.flag_doubleprecision << "\n"; - header_.flag_ic_info = 0; - - if( kpcunits_ ) + + header_.flag_doubleprecision = typeid(real_t) == typeid(double) ? 1 : 0; + std::cout << "header_.flag_doubleprecision " << header_.flag_doubleprecision << "\n"; + header_.flag_ic_info = 0; + + if (kpcunits_) header_.BoxSize *= 1000.0; } - - - void write_dm_mass( const grid_hierarchy& gh ) + + void write_dm_mass(const grid_hierarchy &gh) { double rhoc = 27.7519737; // in h^2 1e10 M_sol / Mpc^3 - - if( kpcunits_ ) + + if (kpcunits_) rhoc *= 10.0; // in h^2 M_sol / kpc^3 - - if(! do_glass_ ) + + if (!do_glass_) { - if( !do_baryons_ ) - header_.mass[1] = header_.Omega0 * rhoc * pow(header_.BoxSize,3.)/pow(2,3*levelmax_); + if (!do_baryons_) + header_.mass[1] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * levelmax_); else - header_.mass[1] = (header_.Omega0-omegab_) * rhoc * pow(header_.BoxSize,3.)/pow(2,3*levelmax_); + header_.mass[1] = (header_.Omega0 - omegab_) * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * levelmax_); } - - if( bmorethan2bnd_ ) + + if (bmorethan2bnd_) { - unsigned long long npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax()-1); + unsigned long long npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax() - 1); unsigned long long nwritten = 0; - + std::vector temp_dat; temp_dat.reserve(block_buf_size_); - + char temp_fname[256]; - sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_mass ); - std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc ); - - unsigned long long blksize = sizeof(T_store)*npcoarse; - - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - for( int ilevel=gh.levelmax()-1; ilevel>=(int)gh.levelmin(); --ilevel ) + sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_mass); + std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); + + unsigned long long blksize = sizeof(T_store) * npcoarse; + + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + for (int ilevel = gh.levelmax() - 1; ilevel >= (int)gh.levelmin(); --ilevel) { double pmass = 0.0; - - if( !do_baryons_ ) - pmass = header_.Omega0 * rhoc * pow(header_.BoxSize,3.)/pow(2,3*ilevel); + + if (!do_baryons_) + pmass = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel); else - pmass = (header_.Omega0-omegab_) * rhoc * pow(header_.BoxSize,3.)/pow(2,3*ilevel); - - for( unsigned i=0; isize(0); ++i ) - for( unsigned j=0; jsize(1); ++j ) - for( unsigned k=0; ksize(2); ++k ) - if( ! gh.is_refined(ilevel,i,j,k) ) + pmass = (header_.Omega0 - omegab_) * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * ilevel); + + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (!gh.is_refined(ilevel, i, j, k)) { - if( temp_dat.size() < block_buf_size_ ) - temp_dat.push_back( pmass ); + if (temp_dat.size() < block_buf_size_) + temp_dat.push_back(pmass); else { - ofs_temp.write( (char*)&temp_dat[0], sizeof(T_store)*block_buf_size_ ); + ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * block_buf_size_); nwritten += block_buf_size_; temp_dat.clear(); - temp_dat.push_back( pmass ); + temp_dat.push_back(pmass); } } } - - if( temp_dat.size() > 0 ) - { - ofs_temp.write( (char*)&temp_dat[0], sizeof(T_store)*temp_dat.size() ); - nwritten+=temp_dat.size(); + + if (temp_dat.size() > 0) + { + ofs_temp.write((char *)&temp_dat[0], sizeof(T_store) * temp_dat.size()); + nwritten += temp_dat.size(); } - - if( nwritten != npcoarse ) + + if (nwritten != npcoarse) throw std::runtime_error("Internal consistency error while writing temporary file for masses"); - - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - if( ofs_temp.bad() ) + + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + if (ofs_temp.bad()) throw std::runtime_error("I/O error while writing temporary file for masses"); - } - else if( gh.levelmax() != gh.levelmin() ) + else if (gh.levelmax() != gh.levelmin()) { - header_.mass[5] = header_.Omega0 * rhoc * pow(header_.BoxSize,3.)/pow(2,3*levelmin_); + header_.mass[5] = header_.Omega0 * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * levelmin_); } } - - - void write_dm_position( int coord, const grid_hierarchy& gh ) + + void write_dm_position(int coord, const grid_hierarchy &gh) { //... count number of leaf cells ...// unsigned long long npcoarse = 0, npfine = 0; - - npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); - if( bmultimass_ ) - npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax()-1); - - - np_fine_dm_ = npfine; - np_fine_gas_ = do_baryons_? npfine : 0ul; - np_coarse_dm_ = npcoarse; - + + npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); + if (bmultimass_) + npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax() - 1); + + np_fine_dm_ = npfine; + np_fine_gas_ = do_baryons_ ? npfine : 0ul; + np_coarse_dm_ = npcoarse; + //... determine if we need to shift the coordinates back double *shift = NULL; - - if( cf_.get_value_safe("output","shift_back",false ) ) + + if (cf_.get_value_safe("output", "shift_back", false)) { - if( coord == 0 ) + if (coord == 0) std::cout << " - gadget2 output plug-in will shift particle positions back...\n"; - - double h = 1.0/(1<( "setup", "shift_x" )*h; - shift[1] = -(double)cf_.get_value( "setup", "shift_y" )*h; - shift[2] = -(double)cf_.get_value( "setup", "shift_z" )*h; + shift[0] = -(double)cf_.get_value("setup", "shift_x") * h; + shift[1] = -(double)cf_.get_value("setup", "shift_y") * h; + shift[2] = -(double)cf_.get_value("setup", "shift_z") * h; } - - size_t npart = npfine+npcoarse; + + size_t npart = npfine + npcoarse; size_t nwritten = 0; size_t blksize; - + //... header_.npart[1] = npfine; header_.npart[5] = npcoarse; header_.npartTotal[1] = (unsigned)npfine; header_.npartTotal[5] = (unsigned)npcoarse; - header_.npartTotalHighWord[1] = (unsigned)(npfine>>32); - header_.npartTotalHighWord[5] = (unsigned)(npfine>>32); - - //header_.num_files = (int)ceil((double)npart/(double)npartmax_); - + header_.npartTotalHighWord[1] = (unsigned)(npfine >> 32); + header_.npartTotalHighWord[5] = (unsigned)(npfine >> 32); + + // header_.num_files = (int)ceil((double)npart/(double)npartmax_); + //... collect displacements and convert to absolute coordinates with correct //... units std::vector temp_data; - temp_data.reserve( block_buf_size_ ); - + temp_data.reserve(block_buf_size_); + double xfac = header_.BoxSize; - + char temp_fname[256]; - sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_pos+coord ); - std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc ); - - //... if baryons are present, then stagger the two fields - if( do_baryons_ && !do_glass_ ) - { - - - double h = 1. / (1<=(int)gh.levelmin(); --ilevel ) - for( unsigned i=0; isize(0); ++i ) - for( unsigned j=0; jsize(1); ++j ) - for( unsigned k=0; ksize(2); ++k ) - if( ! gh.is_refined(ilevel,i,j,k) ) + + double h = 1. / (1 << gh.levelmax()); + + if (shift == NULL) + { + shift = new double[3]; + shift[0] = 0.0; + shift[1] = 0.0; + shift[2] = 0.0; + } + shift[0] -= 0.5 * h * omegab_ / omegam_; + shift[1] -= 0.5 * h * omegab_ / omegam_; + shift[2] -= 0.5 * h * omegab_ / omegam_; + } + + if (!do_glass_) + { + + blksize = sizeof(T_store) * npart; + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + for (int ilevel = gh.levelmax(); ilevel >= (int)gh.levelmin(); --ilevel) + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (!gh.is_refined(ilevel, i, j, k)) { double xx[3]; gh.cell_pos(ilevel, i, j, k, xx); - if( shift != NULL ) + if (shift != NULL) xx[coord] += shift[coord]; - - xx[coord] = fmod( (xx[coord]+(*gh.get_grid(ilevel))(i,j,k))*xfac + header_.BoxSize, header_.BoxSize ); - - if( temp_data.size() < block_buf_size_ ) - temp_data.push_back( xx[coord] ); + + xx[coord] = fmod((xx[coord] + (*gh.get_grid(ilevel))(i, j, k)) * xfac + header_.BoxSize, header_.BoxSize); + + if (temp_data.size() < block_buf_size_) + temp_data.push_back(xx[coord]); else { - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ ); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); nwritten += block_buf_size_; temp_data.clear(); - temp_data.push_back( xx[coord] ); + temp_data.push_back(xx[coord]); } } } else { - - std::ifstream ofg( fname_glass_cdm_.c_str(), std::ios::binary ); - - if( !ofg.good() ) - music::elog.Print("could not open glass input file \'%s\'",fname_glass_cdm_.c_str()); - + + std::ifstream ofg(fname_glass_cdm_.c_str(), std::ios::binary); + + if (!ofg.good()) + music::elog.Print("could not open glass input file \'%s\'", fname_glass_cdm_.c_str()); + io_header glasshead; unsigned blksz; - - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - assert( blksz == sizeof(io_header) ); - - ofg.read( reinterpret_cast(&glasshead), sizeof( io_header ) ); - - //size_t nreq = gh.size(gh.levelmax(), 0)*gh.size(gh.levelmax(), 1)*gh.size(gh.levelmax(), 2); + + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + assert(blksz == sizeof(io_header)); + + ofg.read(reinterpret_cast(&glasshead), sizeof(io_header)); + + // size_t nreq = gh.size(gh.levelmax(), 0)*gh.size(gh.levelmax(), 1)*gh.size(gh.levelmax(), 2); /*if( nreq != (size_t)glasshead.npart[1] ) { music::elog.Print("glass file contains %d particles, but should contain %ld",glasshead.npart[1],nreq); throw std::runtime_error("glass file does not contain the right amount of particles"); }*/ - - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - //assert( blksz == glasshead.npart[1]*sizeof(float)*3 ); - - + + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + // assert( blksz == glasshead.npart[1]*sizeof(float)*3 ); + float lglass = glasshead.BoxSize; - - - blksize = sizeof(T_store)*glasshead.npart[1]; - //ofs_temp.write( (char *)&blksize, sizeof(int) ); - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - + + blksize = sizeof(T_store) * glasshead.npart[1]; + // ofs_temp.write( (char *)&blksize, sizeof(int) ); + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + header_.npart[1] = glasshead.npart[1]; header_.npartTotal[1] = glasshead.npartTotal[1]; header_.npartTotalHighWord[1] = 0; double rhoc = 27.7519737; - if( kpcunits_ ) + if (kpcunits_) rhoc *= 10.0; // in h^2 M_sol / kpc^3 - - if( do_baryons_ ) - header_.mass[1] = omegac_ * rhoc * pow(header_.BoxSize,3.)/(glasshead.npart[1]); - else - header_.mass[1] = omegam_ * rhoc * pow(header_.BoxSize,3.)/(glasshead.npart[1]); - - // read glass, do interpolation and write - size_t npartdone=0; - size_t npinter = glasshead.npart[1]; - - blksize = sizeof(T_store)*npinter; - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - float *pos_tmp = new float[3*block_buf_size_]; - temp_data.assign( block_buf_size_, 0.0 ); - - while( npartdone < npinter ) - { - size_t npart2read = std::min(npinter-npartdone,block_buf_size_); - - ofg.read( reinterpret_cast(&pos_tmp[0]), npart2read*sizeof(float)*3 ); - get_cic_displacement( coord, pos_tmp, npart2read, lglass, gh, &temp_data[0] ); - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*npart2read ); - // std::cout << "npart2read " << npart2read << "\n"; - // std::cout << "pos temp " << temp_data[0] << " " << temp_data[1] << " " << temp_data[2] << "\n"; - // std::cout << "pos temp " << temp_data[npart2read-1] << " " << temp_data[npart2read-2] << " " << temp_data[npart2read-3] <<"\n"; - + if (do_baryons_) + header_.mass[1] = omegac_ * rhoc * pow(header_.BoxSize, 3.) / (glasshead.npart[1]); + else + header_.mass[1] = omegam_ * rhoc * pow(header_.BoxSize, 3.) / (glasshead.npart[1]); + + // read glass, do interpolation and write + size_t npartdone = 0; + size_t npinter = glasshead.npart[1]; + + blksize = sizeof(T_store) * npinter; + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + float *pos_tmp = new float[3 * block_buf_size_]; + temp_data.assign(block_buf_size_, 0.0); + + while (npartdone < npinter) + { + size_t npart2read = std::min(npinter - npartdone, block_buf_size_); + + ofg.read(reinterpret_cast(&pos_tmp[0]), npart2read * sizeof(float) * 3); + get_cic_displacement(coord, pos_tmp, npart2read, lglass, gh, &temp_data[0]); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * npart2read); + + // std::cout << "npart2read " << npart2read << "\n"; + // std::cout << "pos temp " << temp_data[0] << " " << temp_data[1] << " " << temp_data[2] << "\n"; + // std::cout << "pos temp " << temp_data[npart2read-1] << " " << temp_data[npart2read-2] << " " << temp_data[npart2read-3] <<"\n"; + npartdone += npart2read; nwritten += npart2read; - } delete[] pos_tmp; temp_data.clear(); - + // do all lower levels with standard cartesian grid - for( int ilevel=gh.levelmax()-1; ilevel>=(int)gh.levelmin(); --ilevel ) - for( unsigned i=0; isize(0); ++i ) - for( unsigned j=0; jsize(1); ++j ) - for( unsigned k=0; ksize(2); ++k ) - if( ! gh.is_refined(ilevel,i,j,k) ) + for (int ilevel = gh.levelmax() - 1; ilevel >= (int)gh.levelmin(); --ilevel) + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (!gh.is_refined(ilevel, i, j, k)) { double xx[3]; gh.cell_pos(ilevel, i, j, k, xx); - if( shift != NULL ) + if (shift != NULL) xx[coord] += shift[coord]; - - xx[coord] = fmod( (xx[coord]+(*gh.get_grid(ilevel))(i,j,k))*xfac + header_.BoxSize, header_.BoxSize ); - - if( temp_data.size() < block_buf_size_ ) - temp_data.push_back( xx[coord] ); + + xx[coord] = fmod((xx[coord] + (*gh.get_grid(ilevel))(i, j, k)) * xfac + header_.BoxSize, header_.BoxSize); + + if (temp_data.size() < block_buf_size_) + temp_data.push_back(xx[coord]); else { - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ ); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); nwritten += block_buf_size_; temp_data.clear(); - temp_data.push_back( xx[coord] ); + temp_data.push_back(xx[coord]); } } - } - - - - if( temp_data.size() > 0 ) - { - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*temp_data.size() ); + + if (temp_data.size() > 0) + { + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size()); nwritten += temp_data.size(); } - - if( nwritten != npart ) + + if (nwritten != npart) throw std::runtime_error("Internal consistency error while writing temporary file for positions"); //... dump to temporary file - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - if( ofs_temp.bad() ) + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + if (ofs_temp.bad()) throw std::runtime_error("I/O error while writing temporary file for positions"); - + ofs_temp.close(); - - if( shift != NULL ) + + if (shift != NULL) delete[] shift; - } - - void write_dm_velocity( int coord, const grid_hierarchy& gh ) + + void write_dm_velocity(int coord, const grid_hierarchy &gh) { //... count number of leaf cells ...// size_t npcoarse = 0, npfine = 0; - - npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); - if( bmultimass_ ) - npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax()-1); - + + npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); + if (bmultimass_) + npcoarse = gh.count_leaf_cells(gh.levelmin(), gh.levelmax() - 1); + header_.npart[1] = npfine; header_.npart[5] = npcoarse; header_.npartTotal[1] = npfine; header_.npartTotal[5] = npcoarse; header_.npartTotalHighWord[1] = 0; header_.npartTotalHighWord[5] = 0; - + //... collect displacements and convert to absolute coordinates with correct //... units std::vector temp_data; - temp_data.reserve( block_buf_size_ ); - - float isqrta = 1.0f/sqrt(header_.time); - float vfac = isqrta*header_.BoxSize; - - if( kpcunits_ ) + temp_data.reserve(block_buf_size_); + + float isqrta = 1.0f / sqrt(header_.time); + float vfac = isqrta * header_.BoxSize; + + if (kpcunits_) vfac /= 1000.0; - - unsigned npart = npfine+npcoarse; + + unsigned npart = npfine + npcoarse; unsigned nwritten = 0; unsigned long long blksize; - + char temp_fname[256]; - sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_dm_vel+coord ); - std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc ); - - - if( !do_glass_ ) + sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_dm_vel + coord); + std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); + + if (!do_glass_) { - - blksize = sizeof(T_store)*npart; - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - for( int ilevel=levelmax_; ilevel>=(int)levelmin_; --ilevel ) - for( unsigned i=0; isize(0); ++i ) - for( unsigned j=0; jsize(1); ++j ) - for( unsigned k=0; ksize(2); ++k ) - if( ! gh.is_refined(ilevel,i,j,k) ) - { - if( temp_data.size() < block_buf_size_ ) - temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac ); - else + + blksize = sizeof(T_store) * npart; + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + for (int ilevel = levelmax_; ilevel >= (int)levelmin_; --ilevel) + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (!gh.is_refined(ilevel, i, j, k)) + { + if (temp_data.size() < block_buf_size_) + temp_data.push_back((*gh.get_grid(ilevel))(i, j, k) * vfac); + else { - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ ); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); nwritten += block_buf_size_; temp_data.clear(); - temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac ); + temp_data.push_back((*gh.get_grid(ilevel))(i, j, k) * vfac); } - } } else { - - std::ifstream ofg( fname_glass_cdm_.c_str(), std::ios::binary ); - - if( !ofg.good() ) - music::elog.Print("could not open glass input file \'%s\'",fname_glass_cdm_.c_str()); - + + std::ifstream ofg(fname_glass_cdm_.c_str(), std::ios::binary); + + if (!ofg.good()) + music::elog.Print("could not open glass input file \'%s\'", fname_glass_cdm_.c_str()); + io_header glasshead; unsigned blksz; - - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - assert( blksz == sizeof(io_header) ); - - ofg.read( reinterpret_cast(&glasshead), sizeof( io_header ) ); - - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - //assert( blksz == glasshead.npart[1]*sizeof(float)*3 ); - + + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + assert(blksz == sizeof(io_header)); + + ofg.read(reinterpret_cast(&glasshead), sizeof(io_header)); + + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + // assert( blksz == glasshead.npart[1]*sizeof(float)*3 ); + header_.npart[1] = glasshead.npart[1]; header_.npartTotal[1] = glasshead.npartTotal[1]; header_.npartTotalHighWord[1] = glasshead.npartTotalHighWord[1]; float lglass = glasshead.BoxSize; - + // read glass, do interpolation and write - size_t npartdone=0; + size_t npartdone = 0; size_t npinter = glasshead.npart[1]; - - blksize = sizeof(T_store)*npinter; - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - float *pos_tmp = new float[3*block_buf_size_]; - temp_data.assign( block_buf_size_, 0.0 ); - - while( npartdone < npinter ) + + blksize = sizeof(T_store) * npinter; + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + float *pos_tmp = new float[3 * block_buf_size_]; + temp_data.assign(block_buf_size_, 0.0); + + while (npartdone < npinter) { - size_t npart2read = std::min(npinter-npartdone,block_buf_size_); - - ofg.read( reinterpret_cast(&pos_tmp[0]), npart2read*sizeof(float)*3 ); - get_cic_velocity( pos_tmp, npart2read, lglass, gh, &temp_data[0] ); - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*npart2read ); - + size_t npart2read = std::min(npinter - npartdone, block_buf_size_); + + ofg.read(reinterpret_cast(&pos_tmp[0]), npart2read * sizeof(float) * 3); + get_cic_velocity(pos_tmp, npart2read, lglass, gh, &temp_data[0]); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * npart2read); + npartdone += npart2read; nwritten += npart2read; - } - + delete[] pos_tmp; temp_data.clear(); - - for( int ilevel=levelmax_-1; ilevel>=(int)levelmin_; --ilevel ) - for( unsigned i=0; isize(0); ++i ) - for( unsigned j=0; jsize(1); ++j ) - for( unsigned k=0; ksize(2); ++k ) - if( ! gh.is_refined(ilevel,i,j,k) ) - { - if( temp_data.size() < block_buf_size_ ) - temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac ); - else + + for (int ilevel = levelmax_ - 1; ilevel >= (int)levelmin_; --ilevel) + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (!gh.is_refined(ilevel, i, j, k)) + { + if (temp_data.size() < block_buf_size_) + temp_data.push_back((*gh.get_grid(ilevel))(i, j, k) * vfac); + else { - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ ); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); nwritten += block_buf_size_; temp_data.clear(); - temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac ); + temp_data.push_back((*gh.get_grid(ilevel))(i, j, k) * vfac); } - } - - } - - if( temp_data.size() > 0 ) - { - ofs_temp.write( (char*)&temp_data[0], temp_data.size()*sizeof(T_store) ); + + if (temp_data.size() > 0) + { + ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store)); nwritten += temp_data.size(); } - - if( nwritten != npart ) + + if (nwritten != npart) throw std::runtime_error("Internal consistency error while writing temporary file for velocities"); - - ofs_temp.write( (char *)&blksize, sizeof(int) ); - - if( ofs_temp.bad() ) + + ofs_temp.write((char *)&blksize, sizeof(int)); + + if (ofs_temp.bad()) throw std::runtime_error("I/O error while writing temporary file for velocities"); - + ofs_temp.close(); } - - void write_dm_density( const grid_hierarchy& gh ) + + void write_dm_density(const grid_hierarchy &gh) { //... we don't care about DM density for Gadget } - - void write_dm_potential( const grid_hierarchy& gh ) - { } - - void write_gas_potential( const grid_hierarchy& gh ) - { } - - - + + void write_dm_potential(const grid_hierarchy &gh) + { + } + + void write_gas_potential(const grid_hierarchy &gh) + { + } + //... write data for gas -- don't do this - void write_gas_velocity( int coord, const grid_hierarchy& gh ) - { + void write_gas_velocity(int coord, const grid_hierarchy &gh) + { //... count number of leaf cells ...// size_t npcoarse = 0, npfine = 0; - - npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); - + + npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); + header_.npart[2] = npfine; header_.npartTotal[2] = (unsigned)npfine; - header_.npartTotalHighWord[2] = (unsigned)(npfine>>32); - + header_.npartTotalHighWord[2] = (unsigned)(npfine >> 32); + //... collect displacements and convert to absolute coordinates with correct //... units std::vector temp_data; - temp_data.reserve( block_buf_size_ ); - - float isqrta = 1.0f/sqrt(header_.time); - float vfac = isqrta*header_.BoxSize; - - if( kpcunits_ ) + temp_data.reserve(block_buf_size_); + + float isqrta = 1.0f / sqrt(header_.time); + float vfac = isqrta * header_.BoxSize; + + if (kpcunits_) vfac /= 1000.0; - - unsigned npart = npfine+npcoarse; + + unsigned npart = npfine + npcoarse; unsigned nwritten = 0; - + char temp_fname[256]; - sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_gas_vel+coord ); - std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc ); + sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_vel + coord); + std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); unsigned long long blksize; - - - if(!do_glass_) + + if (!do_glass_) { - blksize = sizeof(T_store)*npart; - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - + blksize = sizeof(T_store) * npart; + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + const unsigned ilevel = gh.levelmax(); - const unsigned - nx = gh.get_grid(ilevel)->size(0), - ny = gh.get_grid(ilevel)->size(1), - nz = gh.get_grid(ilevel)->size(2); - - for( unsigned i=0; isize(0), + ny = gh.get_grid(ilevel)->size(1), + nz = gh.get_grid(ilevel)->size(2); + + for (unsigned i = 0; i < nx; ++i) + for (unsigned j = 0; j < ny; ++j) + for (unsigned k = 0; k < nz; ++k) + { + double v = (*gh.get_grid(ilevel))(i, j, k); + + if (temp_data.size() < block_buf_size_) + temp_data.push_back(v * vfac); + else { - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ ); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); nwritten += block_buf_size_; temp_data.clear(); - temp_data.push_back( v * vfac ); + temp_data.push_back(v * vfac); } - } - }else{ - - std::ifstream ofg( fname_glass_baryon_.c_str(), std::ios::binary ); - - if( !ofg.good() ) - music::elog.Print("could not open glass input file \'%s\'",fname_glass_cdm_.c_str()); - + } + else + { + + std::ifstream ofg(fname_glass_baryon_.c_str(), std::ios::binary); + + if (!ofg.good()) + music::elog.Print("could not open glass input file \'%s\'", fname_glass_cdm_.c_str()); + io_header glasshead; unsigned blksz; - - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - assert( blksz == sizeof(io_header) ); - - ofg.read( reinterpret_cast(&glasshead), sizeof( io_header ) ); - - //size_t nreq = gh.size(gh.levelmax(), 0)*gh.size(gh.levelmax(), 1)*gh.size(gh.levelmax(), 2); + + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + assert(blksz == sizeof(io_header)); + + ofg.read(reinterpret_cast(&glasshead), sizeof(io_header)); + + // size_t nreq = gh.size(gh.levelmax(), 0)*gh.size(gh.levelmax(), 1)*gh.size(gh.levelmax(), 2); /*if( nreq != (size_t)glasshead.npart[1] ) { music::elog.Print("glass file contains %d particles, but should contain %ld",glasshead.npart[1],nreq); throw std::runtime_error("glass file does not contain the right amount of particles"); }*/ - - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - //assert( blksz == (glasshead.npart[1]+glasshead.npart[2])*sizeof(float)*3 ); - //ofg.seekg( sizeof(float)*3*glasshead.npart[1], std::ios_base::cur ); - + + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + // assert( blksz == (glasshead.npart[1]+glasshead.npart[2])*sizeof(float)*3 ); + // ofg.seekg( sizeof(float)*3*glasshead.npart[1], std::ios_base::cur ); + // do the highest level with the glass float lglass = glasshead.BoxSize; - + header_.npart[2] = glasshead.npart[2]; header_.npartTotal[2] = glasshead.npartTotal[2]; header_.npartTotalHighWord[2] = glasshead.npartTotalHighWord[2]; - + // read glass, do interpolation and write - size_t npartdone=0; + size_t npartdone = 0; size_t npinter = (size_t)glasshead.npart[2]; - - - blksize = sizeof(T_store)*npinter; - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - float *pos_tmp = new float[3*block_buf_size_]; - temp_data.assign( block_buf_size_, 0.0 ); - - while( npartdone < npinter ) + + blksize = sizeof(T_store) * npinter; + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + float *pos_tmp = new float[3 * block_buf_size_]; + temp_data.assign(block_buf_size_, 0.0); + + while (npartdone < npinter) { - size_t npart2read = std::min(npinter-npartdone,block_buf_size_); - - ofg.read( reinterpret_cast(&pos_tmp[0]), npart2read*sizeof(float)*3 ); - get_cic_velocity( pos_tmp, npart2read, lglass, gh, &temp_data[0] ); - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*npart2read ); - + size_t npart2read = std::min(npinter - npartdone, block_buf_size_); + + ofg.read(reinterpret_cast(&pos_tmp[0]), npart2read * sizeof(float) * 3); + get_cic_velocity(pos_tmp, npart2read, lglass, gh, &temp_data[0]); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * npart2read); + npartdone += npart2read; nwritten += npart2read; } - + delete[] pos_tmp; temp_data.clear(); - - for( int ilevel=levelmax_-1; ilevel>=(int)levelmin_; --ilevel ) - for( unsigned i=0; isize(0); ++i ) - for( unsigned j=0; jsize(1); ++j ) - for( unsigned k=0; ksize(2); ++k ) - if( ! gh.is_refined(ilevel,i,j,k) ) - { - if( temp_data.size() < block_buf_size_ ) - temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac ); - else + + for (int ilevel = levelmax_ - 1; ilevel >= (int)levelmin_; --ilevel) + for (unsigned i = 0; i < gh.get_grid(ilevel)->size(0); ++i) + for (unsigned j = 0; j < gh.get_grid(ilevel)->size(1); ++j) + for (unsigned k = 0; k < gh.get_grid(ilevel)->size(2); ++k) + if (!gh.is_refined(ilevel, i, j, k)) + { + if (temp_data.size() < block_buf_size_) + temp_data.push_back((*gh.get_grid(ilevel))(i, j, k) * vfac); + else { - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ ); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); nwritten += block_buf_size_; temp_data.clear(); - temp_data.push_back( (*gh.get_grid(ilevel))(i,j,k) * vfac ); + temp_data.push_back((*gh.get_grid(ilevel))(i, j, k) * vfac); } - } } - - if( temp_data.size() > 0 ) - { - ofs_temp.write( (char*)&temp_data[0], temp_data.size()*sizeof(T_store) ); + + if (temp_data.size() > 0) + { + ofs_temp.write((char *)&temp_data[0], temp_data.size() * sizeof(T_store)); nwritten += temp_data.size(); } - - if( nwritten != npart ) + + if (nwritten != npart) throw std::runtime_error("Internal consistency error while writing temporary file for gas velocities"); - - ofs_temp.write( (char *)&blksize, sizeof(int) ); - - if( ofs_temp.bad() ) + + ofs_temp.write((char *)&blksize, sizeof(int)); + + if (ofs_temp.bad()) throw std::runtime_error("I/O error while writing temporary file for gas velocities"); - + ofs_temp.close(); } - - + //... write only for fine level - void write_gas_position( int coord, const grid_hierarchy& gh ) - { + void write_gas_position(int coord, const grid_hierarchy &gh) + { //... count number of leaf cells ...// unsigned long long npfine = 0; - - npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); - + + npfine = gh.count_leaf_cells(gh.levelmax(), gh.levelmax()); + //... determine if we need to shift the coordinates back double *shift = NULL; - - if( cf_.get_value_safe("output","shift_back",false ) ) + + if (cf_.get_value_safe("output", "shift_back", false)) { - if( coord == 0 ) + if (coord == 0) std::cout << " - gadget2 output plug-in will shift particle positions back...\n"; - - double h = 1.0/(1<( "setup", "shift_x" )*h; - shift[1] = -(double)cf_.get_value( "setup", "shift_y" )*h; - shift[2] = -(double)cf_.get_value( "setup", "shift_z" )*h; + shift[0] = -(double)cf_.get_value("setup", "shift_x") * h; + shift[1] = -(double)cf_.get_value("setup", "shift_y") * h; + shift[2] = -(double)cf_.get_value("setup", "shift_z") * h; } - + unsigned long long npart = npfine; unsigned long long nwritten = 0; - + //... header_.npart[2] = npfine; header_.npartTotal[2] = (unsigned)npfine; - header_.npartTotalHighWord[2] = (unsigned)(npfine>>32); - - //header_.num_files = (int)ceil((double)npart/(double)npartmax_); - + header_.npartTotalHighWord[2] = (unsigned)(npfine >> 32); + + // header_.num_files = (int)ceil((double)npart/(double)npartmax_); + //... collect displacements and convert to absolute coordinates with correct //... units std::vector temp_data; - temp_data.reserve( block_buf_size_ ); - - + temp_data.reserve(block_buf_size_); + char temp_fname[256]; - sprintf( temp_fname, "___ic_temp_%05d.bin", 100*id_gas_pos+coord ); - std::ofstream ofs_temp( temp_fname, std::ios::binary|std::ios::trunc ); - + sprintf(temp_fname, "___ic_temp_%05d.bin", 100 * id_gas_pos + coord); + std::ofstream ofs_temp(temp_fname, std::ios::binary | std::ios::trunc); + unsigned long long blksize; - + double xfac = header_.BoxSize; - - //... shift particle positions (this has to be done as the same shift - //... is used when computing the convolution kernel for SPH baryons) - if( do_baryons_ ) - { - double h = 1. / (1<size(0), - ny = gh.get_grid(ilevel)->size(1), - nz = gh.get_grid(ilevel)->size(2); - - for( unsigned i=0; isize(0), + ny = gh.get_grid(ilevel)->size(1), + nz = gh.get_grid(ilevel)->size(2); + + for (unsigned i = 0; i < nx; ++i) + for (unsigned j = 0; j < ny; ++j) + for (unsigned k = 0; k < nz; ++k) + { double xx[3]; gh.cell_pos(ilevel, i, j, k, xx); - if( shift != NULL ) + if (shift != NULL) xx[coord] += shift[coord]; - - double v = (*gh.get_grid(ilevel))(i,j,k); - - xx[coord] = fmod( (xx[coord]+v)*xfac + header_.BoxSize, header_.BoxSize ); - - if( temp_data.size() < block_buf_size_ ) - temp_data.push_back( xx[coord] ); + + double v = (*gh.get_grid(ilevel))(i, j, k); + + xx[coord] = fmod((xx[coord] + v) * xfac + header_.BoxSize, header_.BoxSize); + + if (temp_data.size() < block_buf_size_) + temp_data.push_back(xx[coord]); else { - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*block_buf_size_ ); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * block_buf_size_); nwritten += block_buf_size_; temp_data.clear(); - temp_data.push_back( xx[coord] ); + temp_data.push_back(xx[coord]); } - } - }else{ - - std::ifstream ofg( fname_glass_baryon_.c_str(), std::ios::binary ); - - if( !ofg.good() ) - music::elog.Print("could not open glass input file \'%s\'",fname_glass_cdm_.c_str()); - + } + else + { + + std::ifstream ofg(fname_glass_baryon_.c_str(), std::ios::binary); + + if (!ofg.good()) + music::elog.Print("could not open glass input file \'%s\'", fname_glass_cdm_.c_str()); + io_header glasshead; unsigned blksz; - - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - assert( blksz == sizeof(io_header) ); - - ofg.read( reinterpret_cast(&glasshead), sizeof( io_header ) ); - - //size_t nreq = gh.size(gh.levelmax(), 0)*gh.size(gh.levelmax(), 1)*gh.size(gh.levelmax(), 2); + + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + assert(blksz == sizeof(io_header)); + + ofg.read(reinterpret_cast(&glasshead), sizeof(io_header)); + + // size_t nreq = gh.size(gh.levelmax(), 0)*gh.size(gh.levelmax(), 1)*gh.size(gh.levelmax(), 2); /*if( nreq != (size_t)glasshead.npart[1] ) { music::elog.Print("glass file contains %d particles, but should contain %ld",glasshead.npart[1],nreq); throw std::runtime_error("glass file does not contain the right amount of particles"); }*/ - - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - ofg.read( reinterpret_cast(&blksz), sizeof(unsigned) ); - //assert( blksz == (glasshead.npart[1]+glasshead.npart[2])*sizeof(float)*3 ); - //ofg.seekg( sizeof(float)*3*glasshead.npart[1], std::ios_base::cur ); - + + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + ofg.read(reinterpret_cast(&blksz), sizeof(unsigned)); + // assert( blksz == (glasshead.npart[1]+glasshead.npart[2])*sizeof(float)*3 ); + // ofg.seekg( sizeof(float)*3*glasshead.npart[1], std::ios_base::cur ); + float lglass = glasshead.BoxSize; - - - blksize = sizeof(T_store)*glasshead.npart[1]; - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - + + blksize = sizeof(T_store) * glasshead.npart[1]; + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + header_.npart[2] = glasshead.npart[2]; header_.npartTotal[2] = glasshead.npartTotal[2]; header_.npartTotalHighWord[2] = glasshead.npartTotalHighWord[2]; - + double rhoc = 27.7519737; - if( kpcunits_ ) + if (kpcunits_) rhoc *= 10.0; // in h^2 M_sol / kpc^3 - - header_.mass[2] = omegab_ * rhoc * pow(header_.BoxSize,3.)/(glasshead.npart[2]); - + + header_.mass[2] = omegab_ * rhoc * pow(header_.BoxSize, 3.) / (glasshead.npart[2]); + // read glass, do interpolation and write - size_t npartdone=0; + size_t npartdone = 0; size_t npinter = glasshead.npart[2]; - - blksize = sizeof(T_store)*npinter; - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - float *pos_tmp = new float[3*block_buf_size_]; - temp_data.assign( block_buf_size_, 0.0 ); - - while( npartdone < npinter ) + + blksize = sizeof(T_store) * npinter; + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + float *pos_tmp = new float[3 * block_buf_size_]; + temp_data.assign(block_buf_size_, 0.0); + + while (npartdone < npinter) { - size_t npart2read = std::min(npinter-npartdone,block_buf_size_); - - ofg.read( reinterpret_cast(&pos_tmp[0]), npart2read*sizeof(float)*3 ); - get_cic_displacement( coord, pos_tmp, npart2read, lglass, gh, &temp_data[0] ); - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*npart2read ); - + size_t npart2read = std::min(npinter - npartdone, block_buf_size_); + + ofg.read(reinterpret_cast(&pos_tmp[0]), npart2read * sizeof(float) * 3); + get_cic_displacement(coord, pos_tmp, npart2read, lglass, gh, &temp_data[0]); + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * npart2read); + npartdone += npart2read; nwritten += npart2read; - } - + delete[] pos_tmp; - temp_data.clear(); + temp_data.clear(); } - - if( temp_data.size() > 0 ) - { - ofs_temp.write( (char*)&temp_data[0], sizeof(T_store)*temp_data.size() ); + + if (temp_data.size() > 0) + { + ofs_temp.write((char *)&temp_data[0], sizeof(T_store) * temp_data.size()); nwritten += temp_data.size(); } - - if( nwritten != npart ) + + if (nwritten != npart) throw std::runtime_error("Internal consistency error while writing temporary file for gas positions"); - + //... dump to temporary file - ofs_temp.write( (char *)&blksize, sizeof(unsigned long long) ); - - if( ofs_temp.bad() ) + ofs_temp.write((char *)&blksize, sizeof(unsigned long long)); + + if (ofs_temp.bad()) throw std::runtime_error("I/O error while writing temporary file for gas positions"); - + ofs_temp.close(); - - if( shift != NULL ) + + if (shift != NULL) delete[] shift; } - - void write_gas_density( const grid_hierarchy& gh ) - { + + void write_gas_density(const grid_hierarchy &gh) + { double rhoc = 27.7519737; // h^2 1e10 M_sol / Mpc^3 - - if( kpcunits_ ) + + if (kpcunits_) rhoc *= 10.0; // in h^2 M_sol / kpc^3 - - if( do_baryons_ && !do_glass_ ) - header_.mass[2] = omegab_ * rhoc * pow(header_.BoxSize,3.)/pow(2,3*levelmax_); + + if (do_baryons_ && !do_glass_) + header_.mass[2] = omegab_ * rhoc * pow(header_.BoxSize, 3.) / pow(2, 3 * levelmax_); } - - void finalize( void ) - { + + void finalize(void) + { this->assemble_gadget_file(); } }; - - -namespace{ - output_plugin_creator_concrete< gadget2_2comp_output_plugin > creator1("gadget2_2c"); -#ifndef SINGLE_PRECISION - output_plugin_creator_concrete< gadget2_2comp_output_plugin > creator2("gadget2_2c_double"); -#endif +namespace +{ + output_plugin_creator_concrete> creator1("gadget2_2c"); + output_plugin_creator_concrete> creator2("gadget2_2c_double"); } - diff --git a/src/plugins/output_gadget_tetmesh.cc b/src/plugins/output_gadget_tetmesh.cc index 24e9b39..bd8d957 100644 --- a/src/plugins/output_gadget_tetmesh.cc +++ b/src/plugins/output_gadget_tetmesh.cc @@ -1573,8 +1573,6 @@ public: namespace{ output_plugin_creator_concrete< gadget_tetmesh_output_plugin > creator1("gadget_tetmesh"); -#ifndef SINGLE_PRECISION output_plugin_creator_concrete< gadget_tetmesh_output_plugin > creator2("gadget_tetmesh_double"); -#endif } diff --git a/src/plugins/output_tipsy.cc b/src/plugins/output_tipsy.cc index beee4fc..2b81efd 100644 --- a/src/plugins/output_tipsy.cc +++ b/src/plugins/output_tipsy.cc @@ -1107,9 +1107,7 @@ int tipsy_output_plugin::xdr_dump( XDR *xdrs, double*p ) namespace{ output_plugin_creator_concrete< tipsy_output_plugin > creator1("tipsy"); - //#ifndef SINGLE_PRECISION output_plugin_creator_concrete< tipsy_output_plugin > creator2("tipsy_double"); - //#endif } diff --git a/src/plugins/output_tipsy_resample.cc b/src/plugins/output_tipsy_resample.cc index 41e68ad..41ef8dc 100644 --- a/src/plugins/output_tipsy_resample.cc +++ b/src/plugins/output_tipsy_resample.cc @@ -1396,9 +1396,7 @@ int tipsy_output_plugin_res < double >::xdr_dump (XDR * xdrs, double *p) namespace { output_plugin_creator_concrete< tipsy_output_plugin_res >creator1 ("tipsy_resample"); -#ifndef SINGLE_PRECISION output_plugin_creator_concrete< tipsy_output_plugin_res >creator2 ("tipsy_double_resample"); -#endif } #endif // defined(HAVE_TIRPC) \ No newline at end of file diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc index f3e7439..61d4dd2 100644 --- a/src/plugins/random_music_wnoise_generator.cc +++ b/src/plugins/random_music_wnoise_generator.cc @@ -40,8 +40,8 @@ void rapid_proto_ngenic_rng(size_t res, long baseseed, music_wnoise_generator seedtable[(res - 1 - j) * res + (res - 1 - i)] = 0x7fffffff * gsl_rng_uniform(random_generator); } - fftw_real *rnoise = new fftw_real[res * res * (res + 2)]; - fftw_complex *knoise = reinterpret_cast(rnoise); + real_t *rnoise = new real_t[res * res * (res + 2)]; + complex_t *knoise = reinterpret_cast(rnoise); double fnorm = 1. / sqrt(res * res * res); @@ -126,26 +126,9 @@ void rapid_proto_ngenic_rng(size_t res, long baseseed, music_wnoise_generator delete[] seedtable; //... perform FT to real space - -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan plan = fftwf_plan_dft_c2r_3d(res, res, res, knoise, rnoise, FFTW_ESTIMATE); - fftwf_execute(plan); - fftwf_destroy_plan(plan); -#else - fftw_plan plan = fftw_plan_dft_c2r_3d(res, res, res, knoise, rnoise, FFTW_ESTIMATE); - fftw_execute(plan); - fftw_destroy_plan(plan); -#endif -#else - rfftwnd_plan plan = rfftw3d_create_plan(res, res, res, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), plan, knoise, NULL); -#else - rfftwnd_one_complex_to_real(plan, knoise, NULL); -#endif - rfftwnd_destroy_plan(plan); -#endif + fftw_plan_t plan = FFTW_API(plan_dft_c2r_3d)(res, res, res, knoise, rnoise, FFTW_ESTIMATE); + FFTW_API(execute)(plan); + FFTW_API(destroy_plan)(plan); // copy to array that holds the random numbers @@ -443,37 +426,25 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat ncubes_ = 1; baseseed_ = -2; - if (sizeof(fftw_real) != sizeof(T)) + if (sizeof(real_t) != sizeof(T)) { - music::elog.Print("type mismatch with fftw_real in k-space averaging"); - throw std::runtime_error("type mismatch with fftw_real in k-space averaging"); + music::elog.Print("type mismatch with real_t in k-space averaging"); + throw std::runtime_error("type mismatch with real_t in k-space averaging"); } - fftw_real - *rfine = new fftw_real[(size_t)rc.res_ * (size_t)rc.res_ * 2 * ((size_t)rc.res_ / 2 + 1)], - *rcoarse = new fftw_real[(size_t)res_ * (size_t)res_ * 2 * ((size_t)res_ / 2 + 1)]; + real_t + *rfine = new real_t[(size_t)rc.res_ * (size_t)rc.res_ * 2 * ((size_t)rc.res_ / 2 + 1)], + *rcoarse = new real_t[(size_t)res_ * (size_t)res_ * 2 * ((size_t)res_ / 2 + 1)]; - fftw_complex - *ccoarse = reinterpret_cast(rcoarse), - *cfine = reinterpret_cast(rfine); + complex_t + *ccoarse = reinterpret_cast(rcoarse), + *cfine = reinterpret_cast(rfine); int nx(rc.res_), ny(rc.res_), nz(rc.res_), nxc(res_), nyc(res_), nzc(res_); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan - pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), - ipc = fftwf_plan_dft_c2r_3d(nxc, nyc, nzc, ccoarse, rcoarse, FFTW_ESTIMATE); -#else - fftw_plan - pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), - ipc = fftw_plan_dft_c2r_3d(nxc, nyc, nzc, ccoarse, rcoarse, FFTW_ESTIMATE); -#endif -#else - rfftwnd_plan - pf = rfftw3d_create_plan(nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), - ipc = rfftw3d_create_plan(nxc, nyc, nzc, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); -#endif + fftw_plan_t + pf = FFTW_API(plan_dft_r2c_3d)(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), + ipc = FFTW_API(plan_dft_c2r_3d)(nxc, nyc, nzc, ccoarse, rcoarse, FFTW_ESTIMATE); #pragma omp parallel for for (int i = 0; i < nx; i++) @@ -484,19 +455,7 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat rfine[q] = rc(i, j, k); } -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(pf); -#else - fftw_execute(pf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL); -#else - rfftwnd_one_real_to_complex(pf, rfine, NULL); -#endif -#endif + FFTW_API(execute)(pf); double fftnorm = 1.0 / ((double)nxc * (double)nyc * (double)nzc); @@ -532,19 +491,9 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat } delete[] rfine; -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(ipc); -#else - fftw_execute(ipc); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipc, ccoarse, NULL); -#else - rfftwnd_one_complex_to_real(ipc, ccoarse, NULL); -#endif -#endif + + FFTW_API(execute)(ipc); + rnums_.push_back(new Meshvar(res_, 0, 0, 0)); cubemap_[0] = 0; // map all to single array @@ -563,18 +512,8 @@ music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generat delete[] rcoarse; -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_destroy_plan(pf); - fftwf_destroy_plan(ipc); -#else - fftw_destroy_plan(pf); - fftw_destroy_plan(ipc); -#endif -#else - rfftwnd_destroy_plan(pf); - rfftwnd_destroy_plan(ipc); -#endif + FFTW_API(destroy_plan)(pf); + FFTW_API(destroy_plan)(ipc); double rmean, rvar; rmean = sum / count; @@ -617,24 +556,12 @@ music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, size_t nx = lx[0], ny = lx[1], nz = lx[2], nxc = lx[0] / 2, nyc = lx[1] / 2, nzc = lx[2] / 2; - fftw_real *rfine = new fftw_real[nx * ny * (nz + 2l)]; - fftw_complex *cfine = reinterpret_cast(rfine); + real_t *rfine = new real_t[nx * ny * (nz + 2l)]; + complex_t *cfine = reinterpret_cast(rfine); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan - pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), - ipf = fftwf_plan_dft_c2r_3d(nx, ny, nz, cfine, rfine, FFTW_ESTIMATE); -#else - fftw_plan - pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), - ipf = fftw_plan_dft_c2r_3d(nx, ny, nz, cfine, rfine, FFTW_ESTIMATE); -#endif -#else - rfftwnd_plan - pf = rfftw3d_create_plan(nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), - ipf = rfftw3d_create_plan(nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); -#endif + fftw_plan_t + pf = FFTW_API(plan_dft_r2c_3d)(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), + ipf = FFTW_API(plan_dft_c2r_3d)(nx, ny, nz, cfine, rfine, FFTW_ESTIMATE); #pragma omp parallel for for (int i = 0; i < (int)nx; i++) @@ -646,18 +573,10 @@ music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, } // this->free_all_mem(); // temporarily free memory, allocate again later - fftw_real *rcoarse = new fftw_real[nxc * nyc * (nzc + 2)]; - fftw_complex *ccoarse = reinterpret_cast(rcoarse); + real_t *rcoarse = new real_t[nxc * nyc * (nzc + 2)]; + complex_t *ccoarse = reinterpret_cast(rcoarse); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan pc = fftwf_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE); -#else - fftw_plan pc = fftw_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE); -#endif -#else - rfftwnd_plan pc = rfftw3d_create_plan(nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); -#endif + fftw_plan pc = FFTW_API(plan_dft_r2c_3d)(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE); #pragma omp parallel for for (int i = 0; i < (int)nxc; i++) @@ -667,23 +586,9 @@ music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, size_t q = ((size_t)i * (size_t)nyc + (size_t)j) * (size_t)(nzc + 2) + (size_t)k; rcoarse[q] = rc(x0[0] / 2 + i, x0[1] / 2 + j, x0[2] / 2 + k); } -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(pc); - fftwf_execute(pf); -#else - fftw_execute(pc); - fftw_execute(pf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pc, rcoarse, NULL); - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL); -#else - rfftwnd_one_real_to_complex(pc, rcoarse, NULL); - rfftwnd_one_real_to_complex(pf, rfine, NULL); -#endif -#endif + + FFTW_API(execute)(pc); + FFTW_API(execute)(pf); double fftnorm = 1.0 / ((double)nx * (double)ny * (double)nz); double sqrt8 = sqrt(8.0); @@ -747,19 +652,8 @@ music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, IM(cfine[q]) *= fftnorm; } -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(ipf); -#else - fftw_execute(ipf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipf, cfine, NULL); -#else - rfftwnd_one_complex_to_real(ipf, cfine, NULL); -#endif -#endif + + FFTW_API(execute)(ipf); #pragma omp parallel for for (int i = 0; i < (int)nx; i++) @@ -772,21 +666,9 @@ music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, delete[] rfine; -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_destroy_plan(pf); - fftwf_destroy_plan(pc); - fftwf_destroy_plan(ipf); -#else - fftw_destroy_plan(pf); - fftw_destroy_plan(pc); - fftw_destroy_plan(ipf); -#endif -#else - fftwnd_destroy_plan(pf); - fftwnd_destroy_plan(pc); - fftwnd_destroy_plan(ipf); -#endif + FFTW_API(destroy_plan)(pf); + FFTW_API(destroy_plan)(pc); + FFTW_API(destroy_plan)(ipf); } diff --git a/src/plugins/random_panphasia.cc b/src/plugins/random_panphasia.cc index 1eb7e51..b0aa2d2 100644 --- a/src/plugins/random_panphasia.cc +++ b/src/plugins/random_panphasia.cc @@ -238,63 +238,25 @@ public: void RNG_panphasia::forward_transform_field(real_t *field, int nx, int ny, int nz) { - fftw_real *rfield = reinterpret_cast(field); - fftw_complex *cfield = reinterpret_cast(field); + real_t *rfield = reinterpret_cast(field); + complex_t *cfield = reinterpret_cast(field); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfield, cfield, FFTW_ESTIMATE); -#else - fftw_plan pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfield, cfield, FFTW_ESTIMATE); -#endif -#else - rfftwnd_plan pf = rfftw3d_create_plan(nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); -#endif + fftw_plan_t pf = FFTW_API(plan_dft_r2c_3d)(nx, ny, nz, rfield, cfield, FFTW_ESTIMATE); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(pf); -#else - fftw_execute(pf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(num_threads_, pf, rfield, NULL); -#else - rfftwnd_one_real_to_complex(pf, rfield, NULL); -#endif -#endif + FFTW_API(execute)(pf); + + FFTW_API(destroy_plan)(pf); } void RNG_panphasia::backward_transform_field(real_t *field, int nx, int ny, int nz) { - fftw_real *rfield = reinterpret_cast(field); - fftw_complex *cfield = reinterpret_cast(field); + real_t *rfield = reinterpret_cast(field); + complex_t *cfield = reinterpret_cast(field); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan ipf = fftwf_plan_dft_c2r_3d(nx, ny, nz, cfield, rfield, FFTW_ESTIMATE); -#else - fftw_plan ipf = fftw_plan_dft_c2r_3d(nx, ny, nz, cfield, rfield, FFTW_ESTIMATE); -#endif -#else - rfftwnd_plan ipf = rfftw3d_create_plan(nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); -#endif - -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(ipf); -#else - fftw_execute(ipf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(num_threads_, ipf, cfield, NULL); -#else - rfftwnd_one_complex_to_real(ipf, cfield, NULL); -#endif -#endif + fftw_plan_t ipf = FFTW_API(plan_dft_c2r_3d)(nx, ny, nz, cfield, rfield, FFTW_ESTIMATE); + FFTW_API(execute)(ipf); + FFTW_API(destroy_plan(ipf)); } #include @@ -309,8 +271,8 @@ inline double get_wtime(void) void RNG_panphasia::fill_grid(int level, DensityGrid &R) { - fftw_real *pr0, *pr1, *pr2, *pr3, *pr4; - fftw_complex *pc0, *pc1, *pc2, *pc3, *pc4; + real_t *pr0, *pr1, *pr2, *pr3, *pr4; + complex_t *pc0, *pc1, *pc2, *pc3, *pc4; // determine resolution and offset so that we can do proper resampling int ileft[3], ileft_corner[3], nx[3], nxremap[3]; @@ -379,17 +341,17 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) size_t ngp = nxremap[0] * nxremap[1] * (nxremap[2] + 2); - pr0 = new fftw_real[ngp]; - pr1 = new fftw_real[ngp]; - pr2 = new fftw_real[ngp]; - pr3 = new fftw_real[ngp]; - pr4 = new fftw_real[ngp]; + pr0 = new real_t[ngp]; + pr1 = new real_t[ngp]; + pr2 = new real_t[ngp]; + pr3 = new real_t[ngp]; + pr4 = new real_t[ngp]; - pc0 = reinterpret_cast(pr0); - pc1 = reinterpret_cast(pr1); - pc2 = reinterpret_cast(pr2); - pc3 = reinterpret_cast(pr3); - pc4 = reinterpret_cast(pr4); + pc0 = reinterpret_cast(pr0); + pc1 = reinterpret_cast(pr1); + pc2 = reinterpret_cast(pr2); + pc3 = reinterpret_cast(pr3); + pc4 = reinterpret_cast(pr4); music::ilog.Print("calculating PANPHASIA random numbers for level %d...", level); clear_panphasia_thread_states(); @@ -782,7 +744,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { music::ulog.Print("Remapping fields from dimension %d -> %d", nxremap[0], nx_m[0]); - memset(pr1, 0, ngp * sizeof(fftw_real)); + memset(pr1, 0, ngp * sizeof(real_t)); #pragma omp parallel for for (int i = 0; i < nxremap[0]; i++) @@ -812,7 +774,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) } } - memcpy(pr0, pr1, ngp * sizeof(fftw_real)); + memcpy(pr0, pr1, ngp * sizeof(real_t)); } // if (level == 9) diff --git a/src/poisson.cc b/src/poisson.cc index adab786..038b1b7 100644 --- a/src/poisson.cc +++ b/src/poisson.cc @@ -10,8 +10,8 @@ /****** ABSTRACT FACTORY PATTERN IMPLEMENTATION *******/ -#include "poisson.hh" -#include "Numerics.hh" +#include +#include std::map & get_poisson_plugin_map() @@ -40,23 +40,18 @@ void print_poisson_plugins() /****** CALL IMPLEMENTATIONS OF POISSON SOLVER CLASSES ******/ -#include "mg_solver.hh" -#include "fd_schemes.hh" +#include +#include -#ifdef SINGLE_PRECISION -typedef multigrid::solver, interp_O3_fluxcorr, mg_straight, float> poisson_solver_O2; -typedef multigrid::solver, interp_O5_fluxcorr, mg_straight, float> poisson_solver_O4; -typedef multigrid::solver, interp_O7_fluxcorr, mg_straight, float> poisson_solver_O6; -#else -typedef multigrid::solver, interp_O3_fluxcorr, mg_straight, double> poisson_solver_O2; -typedef multigrid::solver, interp_O5_fluxcorr, mg_straight, double> poisson_solver_O4; -typedef multigrid::solver, interp_O7_fluxcorr, mg_straight, double> poisson_solver_O6; -#endif + +typedef multigrid::solver poisson_solver_O2; +typedef multigrid::solver poisson_solver_O4; +typedef multigrid::solver poisson_solver_O6; /**************************************************************************************/ /**************************************************************************************/ -double multigrid_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) +real_t multigrid_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) { music::ulog.Print("Initializing multi-grid Poisson solver..."); @@ -68,11 +63,11 @@ double multigrid_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) std::cout << " - Invoking multi-grid Poisson solver..." << std::endl; } - double acc = 1e-5, err; + real_t acc = 1e-5, err; std::string ps_smoother_name; unsigned ps_presmooth, ps_postsmooth, order; - acc = cf_.get_value_safe("poisson", "accuracy", acc); + acc = cf_.get_value_safe("poisson", "accuracy", acc); ps_presmooth = cf_.get_value_safe("poisson", "pre_smooth", 3); ps_postsmooth = cf_.get_value_safe("poisson", "post_smooth", 3); ps_smoother_name = cf_.get_value_safe("poisson", "smoother", "gs"); @@ -102,12 +97,12 @@ double multigrid_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) << " reverting to \'gs\' (Gauss-Seidel)" << std::endl; } - double tstart, tend; + real_t tstart, tend; #ifndef SINGLETHREAD_FFTW tstart = omp_get_wtime(); #else - tstart = (double)clock() / CLOCKS_PER_SEC; + tstart = (real_t)clock() / CLOCKS_PER_SEC; #endif //----- run Poisson solver -----// @@ -142,7 +137,7 @@ double multigrid_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) if (verbosity > 1) std::cout << " - Poisson solver took " << tend - tstart << "s with " << omp_get_max_threads() << " threads." << std::endl; #else - tend = (double)clock() / CLOCKS_PER_SEC; + tend = (real_t)clock() / CLOCKS_PER_SEC; if (verbosity > 1) std::cout << " - Poisson solver took " << tend - tstart << "s." << std::endl; @@ -151,7 +146,7 @@ double multigrid_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) return err; } -double multigrid_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy &Du) +real_t multigrid_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy &Du) { Du = u; @@ -176,7 +171,7 @@ double multigrid_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hiera return 0.0; } -double multigrid_poisson_plugin::gradient_add(int dir, grid_hierarchy &u, grid_hierarchy &Du) +real_t multigrid_poisson_plugin::gradient_add(int dir, grid_hierarchy &u, grid_hierarchy &Du) { // Du = u; @@ -207,7 +202,7 @@ void multigrid_poisson_plugin::implementation::gradient_O2(int dir, grid_hierarc for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0, ilevel); + real_t h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); if (dir == 0) @@ -241,7 +236,7 @@ void multigrid_poisson_plugin::implementation::gradient_add_O2(int dir, grid_hie for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0, ilevel); + real_t h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); if (dir == 0) @@ -275,7 +270,7 @@ void multigrid_poisson_plugin::implementation::gradient_O4(int dir, grid_hierarc for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0, ilevel); + real_t h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); h /= 12.0; @@ -311,7 +306,7 @@ void multigrid_poisson_plugin::implementation::gradient_add_O4(int dir, grid_hie for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0, ilevel); + real_t h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); h /= 12.0; @@ -347,7 +342,7 @@ void multigrid_poisson_plugin::implementation::gradient_O6(int dir, grid_hierarc for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0, ilevel); + real_t h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); h /= 60.; @@ -385,7 +380,7 @@ void multigrid_poisson_plugin::implementation::gradient_add_O6(int dir, grid_hie for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0, ilevel); + real_t h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); h /= 60.; @@ -421,7 +416,7 @@ void multigrid_poisson_plugin::implementation::gradient_add_O6(int dir, grid_hie /**************************************************************************************/ #include "general.hh" -double fft_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) +real_t fft_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) { music::ulog.Print("Entering k-space Poisson solver..."); @@ -446,8 +441,8 @@ double fft_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) nzp = 2 * (nz / 2 + 1); //... copy data .................................................. - fftw_real *data = new fftw_real[(size_t)nx * (size_t)ny * (size_t)nzp]; - fftw_complex *cdata = reinterpret_cast(data); + real_t *data = new real_t[(size_t)nx * (size_t)ny * (size_t)nzp]; + complex_t *cdata = reinterpret_cast(data); #pragma omp parallel for for (int i = 0; i < nx; ++i) @@ -461,37 +456,14 @@ double fft_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) //... perform FFT and Poisson solve................................ music::ulog.Print("Performing forward transform."); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan - plan = fftwf_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), - iplan = fftwf_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); + fftw_plan_t + plan = FFTW_API(plan_dft_r2c_3d)(nx, ny, nz, data, cdata, FFTW_ESTIMATE), + iplan = FFTW_API(plan_dft_c2r_3d)(nx, ny, nz, cdata, data, FFTW_ESTIMATE); - fftwf_execute(plan); -#else - fftw_plan - plan = fftw_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), - iplan = fftw_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); + FFTW_API(execute)(plan); - fftw_execute(plan); -#endif - -#else - rfftwnd_plan - plan = rfftw3d_create_plan(nx, ny, nz, - FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), - iplan = rfftw3d_create_plan(nx, ny, nz, - FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); - -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), plan, data, NULL); -#else - rfftwnd_one_real_to_complex(plan, data, NULL); -#endif - -#endif - double kfac = 2.0 * M_PI; - double fac = -1.0 / (double)((size_t)nx * (size_t)ny * (size_t)nz); + real_t kfac = 2.0 * M_PI; + real_t fac = -1.0 / (real_t)((size_t)nx * (size_t)ny * (size_t)nz); #pragma omp parallel for for (int i = 0; i < nx; ++i) @@ -504,11 +476,11 @@ double fft_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) int jj = j; if (jj > ny / 2) jj -= ny; - double ki = (double)ii; - double kj = (double)jj; - double kk = (double)k; + real_t ki = (real_t)ii; + real_t kj = (real_t)jj; + real_t kk = (real_t)k; - double kk2 = kfac * kfac * (ki * ki + kj * kj + kk * kk); + real_t kk2 = kfac * kfac * (ki * ki + kj * kj + kk * kk); size_t idx = (size_t)(i * ny + j) * (size_t)(nzp / 2) + (size_t)k; @@ -521,26 +493,9 @@ double fft_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) music::ulog.Print("Performing backward transform."); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(iplan); - fftwf_destroy_plan(plan); - fftwf_destroy_plan(iplan); -#else - fftw_execute(iplan); - fftw_destroy_plan(plan); - fftw_destroy_plan(iplan); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), iplan, cdata, NULL); -#else - rfftwnd_one_complex_to_real(iplan, cdata, NULL); -#endif - - rfftwnd_destroy_plan(plan); - rfftwnd_destroy_plan(iplan); -#endif + FFTW_API(execute)(iplan); + FFTW_API(destroy_plan)(plan); + FFTW_API(destroy_plan)(iplan); //... copy data .......................................... #pragma omp parallel for @@ -596,7 +551,7 @@ double fft_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) return 0.0; } -double fft_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy &Du) +real_t fft_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy &Du) { music::ulog.Print("Computing a gradient in k-space...\n"); @@ -612,8 +567,8 @@ double fft_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy & nzp = 2 * (nz / 2 + 1); //... copy data .................................................. - fftw_real *data = new fftw_real[(size_t)nx * (size_t)ny * (size_t)nzp]; - fftw_complex *cdata = reinterpret_cast(data); + real_t *data = new real_t[(size_t)nx * (size_t)ny * (size_t)nzp]; + complex_t *cdata = reinterpret_cast(data); #pragma omp parallel for for (int i = 0; i < nx; ++i) @@ -625,38 +580,14 @@ double fft_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy & } //... perform FFT and Poisson solve................................ + fftw_plan_t + plan = FFTW_API(plan_dft_r2c_3d)(nx, ny, nz, data, cdata, FFTW_ESTIMATE), + iplan = FFTW_API(plan_dft_c2r_3d)(nx, ny, nz, cdata, data, FFTW_ESTIMATE); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan - plan = fftwf_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), - iplan = fftwf_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); + FFTW_API(execute)(plan); - fftwf_execute(plan); -#else - fftw_plan - plan = fftw_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), - iplan = fftw_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); - - fftw_execute(plan); -#endif -#else - rfftwnd_plan - plan = rfftw3d_create_plan(nx, ny, nz, - FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), - iplan = rfftw3d_create_plan(nx, ny, nz, - FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); - -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), plan, data, NULL); -#else - rfftwnd_one_real_to_complex(plan, data, NULL); -#endif - -#endif - - double fac = -1.0 / (double)((size_t)nx * (size_t)ny * (size_t)nz); - double kfac = 2.0 * M_PI; + real_t fac = -1.0 / (real_t)((size_t)nx * (size_t)ny * (size_t)nz); + real_t kfac = 2.0 * M_PI; bool do_glass = cf_.get_value_safe("output", "glass", false); bool deconvolve_cic = do_glass | cf_.get_value_safe("output", "glass_cicdeconvolve", false); @@ -671,98 +602,54 @@ double fft_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy & { size_t idx = (size_t)(i * ny + j) * (size_t)(nzp / 2) + (size_t)k; int ii = i; - if (ii > nx / 2) - ii -= nx; + if (ii > nx / 2) ii -= nx; int jj = j; - if (jj > ny / 2) - jj -= ny; - const double ki = (double)ii; - const double kj = (double)jj; - const double kk = (double)k; + if (jj > ny / 2) jj -= ny; - const double kkdir[3] = {kfac * ki, kfac * kj, kfac * kk}; - const double kdir = kkdir[dir]; + const real_t ki{(real_t)ii}; + const real_t kj{(real_t)jj}; + const real_t kk{(real_t)k}; + const real_t kkdir[3] = {kfac * ki, kfac * kj, kfac * kk}; + const real_t kdir = kkdir[dir]; - double re = RE(cdata[idx]); - double im = IM(cdata[idx]); + real_t re = RE(cdata[idx]); + real_t im = IM(cdata[idx]); RE(cdata[idx]) = fac * im * kdir; IM(cdata[idx]) = -fac * re * kdir; -#ifdef FFTW3 if (deconvolve_cic) { - double dfx, dfy, dfz; - dfx = M_PI * ki / (double)nx; - dfx = (i != 0) ? sin(dfx) / dfx : 1.0; - dfy = M_PI * kj / (double)ny; - dfy = (j != 0) ? sin(dfy) / dfy : 1.0; - dfz = M_PI * kk / (double)nz; - dfz = (k != 0) ? sin(dfz) / dfz : 1.0; + real_t dfx, dfy, dfz; + dfx = M_PI * ki / (real_t)nx; + dfx = (i != 0) ? std::sin(dfx) / dfx : 1.0; + dfy = M_PI * kj / (real_t)ny; + dfy = (j != 0) ? std::sin(dfy) / dfy : 1.0; + dfz = M_PI * kk / (real_t)nz; + dfz = (k != 0) ? std::sin(dfz) / dfz : 1.0; dfx = 1.0 / (dfx * dfy * dfz); dfx = dfx * dfx; - cdata[idx][0] *= dfx; - cdata[idx][1] *= dfx; + RE(cdata[idx]) *= dfx; + IM(cdata[idx]) *= dfx; } -#else - if (deconvolve_cic) - { - double dfx, dfy, dfz; - dfx = M_PI * ki / (double)nx; - dfx = (i != 0) ? sin(dfx) / dfx : 1.0; - dfy = M_PI * kj / (double)ny; - dfy = (j != 0) ? sin(dfy) / dfy : 1.0; - dfz = M_PI * kk / (double)nz; - dfz = (k != 0) ? sin(dfz) / dfz : 1.0; - - dfx = 1.0 / (dfx * dfy * dfz); - dfx = dfx * dfx; - - cdata[idx].re *= dfx; - cdata[idx].im *= dfx; - } -#endif if( (dir == 0 && i==nx/2) || (dir == 1 && j==ny/2) || (dir == 2 && k==nz/2) ) { -#ifdef FFTW3 - cdata[idx][0] = 0.0; - cdata[idx][1] = 0.0; -#else - cdata[idx].re = 0.0; - cdata[idx].im = 0.0; -#endif + RE(cdata[idx]) = 0.0; + IM(cdata[idx]) = 0.0; } } RE(cdata[0]) = 0.0; IM(cdata[0]) = 0.0; -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(iplan); - fftwf_destroy_plan(plan); - fftwf_destroy_plan(iplan); -#else - fftw_execute(iplan); - fftw_destroy_plan(plan); - fftw_destroy_plan(iplan); -#endif - -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), iplan, cdata, NULL); -#else - rfftwnd_one_complex_to_real(iplan, cdata, NULL); -#endif - - rfftwnd_destroy_plan(plan); - rfftwnd_destroy_plan(iplan); -#endif + FFTW_API(execute)(iplan); + FFTW_API(destroy_plan)(plan); + FFTW_API(destroy_plan)(iplan); //... copy data .......................................... - double dmax = 0.0; + real_t dmax = 0.0; for (int i = 0; i < nx; ++i) for (int j = 0; j < ny; ++j) for (int k = 0; k < nz; ++k) @@ -784,35 +671,35 @@ double fft_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy & /**************************************************************************************/ template -double poisson_hybrid_kernel(int idir, int i, int j, int k, int n) +real_t poisson_hybrid_kernel(int idir, int i, int j, int k, int n) { return 1.0; } template <> -inline double poisson_hybrid_kernel<2>(int idir, int i, int j, int k, int n) +inline real_t poisson_hybrid_kernel<2>(int idir, int i, int j, int k, int n) { if (i == 0 && j == 0 && k == 0) return 0.0; - double - ki(M_PI * (double)i / (double)n), - kj(M_PI * (double)j / (double)n), - kk(M_PI * (double)k / (double)n), + real_t + ki(M_PI * (real_t)i / (real_t)n), + kj(M_PI * (real_t)j / (real_t)n), + kk(M_PI * (real_t)k / (real_t)n), kr(sqrt(ki * ki + kj * kj + kk * kk)); - double grad = 1.0, laplace = 1.0; + real_t grad = 1.0, laplace = 1.0; if (idir == 0) - grad = sin(ki); + grad = std::sin(ki); else if (idir == 1) - grad = sin(kj); + grad = std::sin(kj); else - grad = sin(kk); + grad = std::sin(kk); - laplace = 2.0 * ((-cos(ki) + 1.0) + (-cos(kj) + 1.0) + (-cos(kk) + 1.0)); + laplace = 2.0 * ((-std::cos(ki) + 1.0) + (-std::cos(kj) + 1.0) + (-std::cos(kk) + 1.0)); - double kgrad = 1.0; + real_t kgrad = 1.0; if (idir == 0) kgrad = ki; else if (idir == 1) @@ -824,30 +711,30 @@ inline double poisson_hybrid_kernel<2>(int idir, int i, int j, int k, int n) } template <> -inline double poisson_hybrid_kernel<4>(int idir, int i, int j, int k, int n) +inline real_t poisson_hybrid_kernel<4>(int idir, int i, int j, int k, int n) { if (i == 0 && j == 0 && k == 0) return 0.0; - double - ki(M_PI * (double)i / (double)n), - kj(M_PI * (double)j / (double)n), - kk(M_PI * (double)k / (double)n), + real_t + ki(M_PI * (real_t)i / (real_t)n), + kj(M_PI * (real_t)j / (real_t)n), + kk(M_PI * (real_t)k / (real_t)n), kr(sqrt(ki * ki + kj * kj + kk * kk)); - double grad = 1.0, laplace = 1.0; + real_t grad = 1.0, laplace = 1.0; if (idir == 0) - grad = 0.166666666667 * (-sin(2. * ki) + 8. * sin(ki)); + grad = 0.166666666667 * (-std::sin(2. * ki) + 8. * std::sin(ki)); else if (idir == 1) - grad = 0.166666666667 * (-sin(2. * kj) + 8. * sin(kj)); + grad = 0.166666666667 * (-std::sin(2. * kj) + 8. * std::sin(kj)); else if (idir == 2) - grad = 0.166666666667 * (-sin(2. * kk) + 8. * sin(kk)); + grad = 0.166666666667 * (-std::sin(2. * kk) + 8. * std::sin(kk)); - laplace = 0.1666666667 * ((cos(2 * ki) - 16. * cos(ki) + 15.) + (cos(2 * kj) - 16. * cos(kj) + 15.) + (cos(2 * kk) - 16. * cos(kk) + 15.)); + laplace = 0.1666666667 * ((std::cos(2 * ki) - 16. * std::cos(ki) + 15.) + (std::cos(2 * kj) - 16. * std::cos(kj) + 15.) + (std::cos(2 * kk) - 16. * std::cos(kk) + 15.)); - double kgrad = 1.0; + real_t kgrad = 1.0; if (idir == 0) kgrad = ki; else if (idir == 1) @@ -859,29 +746,29 @@ inline double poisson_hybrid_kernel<4>(int idir, int i, int j, int k, int n) } template <> -inline double poisson_hybrid_kernel<6>(int idir, int i, int j, int k, int n) +inline real_t poisson_hybrid_kernel<6>(int idir, int i, int j, int k, int n) { - double - ki(M_PI * (double)i / (double)n), - kj(M_PI * (double)j / (double)n), - kk(M_PI * (double)k / (double)n), + real_t + ki(M_PI * (real_t)i / (real_t)n), + kj(M_PI * (real_t)j / (real_t)n), + kk(M_PI * (real_t)k / (real_t)n), kr(sqrt(ki * ki + kj * kj + kk * kk)); if (i == 0 && j == 0 && k == 0) return 0.0; - double grad = 1.0, laplace = 1.0; + real_t grad = 1.0, laplace = 1.0; if (idir == 0) - grad = 0.0333333333333 * (sin(3. * ki) - 9. * sin(2. * ki) + 45. * sin(ki)); + grad = 0.0333333333333 * (std::sin(3. * ki) - 9. * std::sin(2. * ki) + 45. * std::sin(ki)); else if (idir == 1) - grad = 0.0333333333333 * (sin(3. * kj) - 9. * sin(2. * kj) + 45. * sin(kj)); + grad = 0.0333333333333 * (std::sin(3. * kj) - 9. * std::sin(2. * kj) + 45. * std::sin(kj)); else if (idir == 2) - grad = 0.0333333333333 * (sin(3. * kk) - 9. * sin(2. * kk) + 45. * sin(kk)); + grad = 0.0333333333333 * (std::sin(3. * kk) - 9. * std::sin(2. * kk) + 45. * std::sin(kk)); - laplace = 0.01111111111111 * ((-2. * cos(3.0 * ki) + 27. * cos(2. * ki) - 270. * cos(ki) + 245.) + (-2. * cos(3.0 * kj) + 27. * cos(2. * kj) - 270. * cos(kj) + 245.) + (-2. * cos(3.0 * kk) + 27. * cos(2. * kk) - 270. * cos(kk) + 245.)); + laplace = 0.01111111111111 * ((-2. * std::cos(3.0 * ki) + 27. * std::cos(2. * ki) - 270. * std::cos(ki) + 245.) + (-2. * std::cos(3.0 * kj) + 27. * std::cos(2. * kj) - 270. * std::cos(kj) + 245.) + (-2. * std::cos(3.0 * kk) + 27. * std::cos(2. * kk) - 270. * std::cos(kk) + 245.)); - double kgrad = 1.0; + real_t kgrad = 1.0; if (idir == 0) kgrad = ki; else if (idir == 1) @@ -896,42 +783,19 @@ inline double poisson_hybrid_kernel<6>(int idir, int i, int j, int k, int n) } template -void do_poisson_hybrid(fftw_real *data, int idir, int nxp, int nyp, int nzp, bool periodic, bool deconvolve_cic) +void do_poisson_hybrid(real_t *data, int idir, int nxp, int nyp, int nzp, bool periodic, bool deconvolve_cic) { - double fftnorm = 1.0 / ((double)nxp * (double)nyp * (double)nzp); + real_t fftnorm = 1.0 / ((real_t)nxp * (real_t)nyp * (real_t)nzp); - fftw_complex *cdata = reinterpret_cast(data); + complex_t *cdata = reinterpret_cast(data); if (deconvolve_cic) music::ilog.Print("CIC deconvolution step is enabled."); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan iplan, plan; - plan = fftwf_plan_dft_r2c_3d(nxp, nyp, nzp, data, cdata, FFTW_ESTIMATE); - iplan = fftwf_plan_dft_c2r_3d(nxp, nyp, nzp, cdata, data, FFTW_ESTIMATE); - fftwf_execute(plan); -#else - fftw_plan iplan, plan; - plan = fftw_plan_dft_r2c_3d(nxp, nyp, nzp, data, cdata, FFTW_ESTIMATE); - iplan = fftw_plan_dft_c2r_3d(nxp, nyp, nzp, cdata, data, FFTW_ESTIMATE); - fftw_execute(plan); -#endif -#else - rfftwnd_plan iplan, plan; - - plan = rfftw3d_create_plan(nxp, nyp, nzp, - FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); - - iplan = rfftw3d_create_plan(nxp, nyp, nzp, - FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); - -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), plan, data, NULL); -#else - rfftwnd_one_real_to_complex(plan, data, NULL); -#endif -#endif + fftw_plan_t iplan, plan; + plan = FFTW_API(plan_dft_r2c_3d)(nxp, nyp, nzp, data, cdata, FFTW_ESTIMATE); + iplan = FFTW_API(plan_dft_c2r_3d)(nxp, nyp, nzp, cdata, data, FFTW_ESTIMATE); + FFTW_API(execute)(plan); #pragma omp parallel for for (int i = 0; i < nxp; ++i) @@ -948,22 +812,22 @@ void do_poisson_hybrid(fftw_real *data, int idir, int nxp, int nyp, int nzp, boo kj -= nyp; //... apply hybrid correction - double dk = poisson_hybrid_kernel(idir, ki, kj, k, nxp / 2); + real_t dk = poisson_hybrid_kernel(idir, ki, kj, k, nxp / 2); - fftw_real re = RE(cdata[ii]), im = IM(cdata[ii]); + real_t re = RE(cdata[ii]), im = IM(cdata[ii]); RE(cdata[ii]) = -im * dk * fftnorm; IM(cdata[ii]) = re * dk * fftnorm; if (deconvolve_cic) { - double dfx, dfy, dfz; - dfx = M_PI * ki / (double)nxp; - dfx = (i != 0) ? sin(dfx) / dfx : 1.0; - dfy = M_PI * kj / (double)nyp; - dfy = (j != 0) ? sin(dfy) / dfy : 1.0; - dfz = M_PI * kk / (double)nzp; - dfz = (k != 0) ? sin(dfz) / dfz : 1.0; + real_t dfx, dfy, dfz; + dfx = M_PI * ki / (real_t)nxp; + dfx = (i != 0) ? std::sin(dfx) / dfx : 1.0; + dfy = M_PI * kj / (real_t)nyp; + dfy = (j != 0) ? std::sin(dfy) / dfy : 1.0; + dfz = M_PI * kk / (real_t)nzp; + dfz = (k != 0) ? std::sin(dfz) / dfz : 1.0; dfx = 1.0 / (dfx * dfy * dfz); dfx = dfx * dfx; @@ -981,26 +845,9 @@ void do_poisson_hybrid(fftw_real *data, int idir, int nxp, int nyp, int nzp, boo RE(cdata[0]) = 0.0; IM(cdata[0]) = 0.0; -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(iplan); - fftwf_destroy_plan(plan); - fftwf_destroy_plan(iplan); -#else - fftw_execute(iplan); - fftw_destroy_plan(plan); - fftw_destroy_plan(iplan); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), iplan, cdata, NULL); -#else - rfftwnd_one_complex_to_real(iplan, cdata, NULL); -#endif - - rfftwnd_destroy_plan(plan); - rfftwnd_destroy_plan(iplan); -#endif + FFTW_API(execute)(iplan); + FFTW_API(destroy_plan)(plan); + FFTW_API(destroy_plan)(iplan); } template @@ -1008,7 +855,7 @@ void poisson_hybrid(T &f, int idir, int order, bool periodic, bool deconvolve_ci { int nx = f.size(0), ny = f.size(1), nz = f.size(2), nxp, nyp, nzp; - fftw_real *data; + real_t *data; int xo = 0, yo = 0, zo = 0; int nmax = std::max(nx, std::max(ny, nz)); @@ -1018,12 +865,12 @@ void poisson_hybrid(T &f, int idir, int order, bool periodic, bool deconvolve_ci if (!periodic) { - nxp = nmax + 2 * boundary; // 2*nmax; - nyp = nmax + 2 * boundary; // 2*nmax; - nzp = nmax + 2 * boundary; // 2*nmax; - xo = boundary; // nmax/2; - yo = boundary; // nmax/2; - zo = boundary; // nmax/2; + nxp = nmax + 2 * boundary; + nyp = nmax + 2 * boundary; + nzp = nmax + 2 * boundary; + xo = boundary; + yo = boundary; + zo = boundary; } else { @@ -1032,17 +879,11 @@ void poisson_hybrid(T &f, int idir, int order, bool periodic, bool deconvolve_ci nzp = nmax; } - data = new fftw_real[(size_t)nxp * (size_t)nyp * (size_t)(nzp + 2)]; + data = new real_t[(size_t)nxp * (size_t)nyp * (size_t)(nzp + 2)]; if (idir == 0) std::cout << " - Performing hybrid Poisson step... (" << nxp << ", " << nyp << ", " << nzp << ")\n"; - // size_t N = (size_t)nxp*(size_t)nyp*2*((size_t)nzp/2+1); - - // #pragma omp parallel for - // for( size_t i=0; i>(MeshvarBnd &f, int idir, int order, bool periodic, bool deconvolve_cic); +template void poisson_hybrid>(MeshvarBnd &f, int idir, int order, bool periodic, bool deconvolve_cic); template void poisson_hybrid>(MeshvarBnd &f, int idir, int order, bool periodic, bool deconvolve_cic); namespace diff --git a/src/solver.hh b/src/solver.hh index 148eb55..aec5f37 100644 --- a/src/solver.hh +++ b/src/solver.hh @@ -7,23 +7,30 @@ * */ -#ifndef __SOLVER_HH -#define __SOLVER_HH +#pragma once #include #include -#include "mesh.hh" +#include -#define BEGIN_MULTIGRID_NAMESPACE namespace multigrid { +#define BEGIN_MULTIGRID_NAMESPACE \ + namespace multigrid \ + { #define END_MULTIGRID_NAMESPACE } BEGIN_MULTIGRID_NAMESPACE - -namespace opt { - enum smtype { sm_jacobi, sm_gauss_seidel, sm_sor }; + +namespace opt +{ + enum smtype + { + sm_jacobi, + sm_gauss_seidel, + sm_sor + }; } -template< class S, class O, typename T=double > +template class solver { public: @@ -31,1080 +38,1023 @@ public: typedef O mgop; protected: - scheme m_scheme; - mgop m_gridop; - unsigned m_npresmooth, m_npostsmooth; - opt::smtype m_smoother; - unsigned m_ilevelmin; - - const static bool m_bperiodic = true; + scheme m_scheme; + mgop m_gridop; + unsigned m_npresmooth, m_npostsmooth; + opt::smtype m_smoother; + unsigned m_ilevelmin; - GridHierarchy *m_pu, *m_pf, *m_pfsave; + const static bool m_bperiodic = true; + + GridHierarchy *m_pu, *m_pf, *m_pfsave; GridHierarchy *m_pmask; - const MeshvarBnd *m_pubnd; - - double compute_error( const MeshvarBnd& u, const MeshvarBnd& unew ); - - double compute_error( const GridHierarchy& uh, const GridHierarchy& uhnew, bool verbose ); + const MeshvarBnd *m_pubnd; + + double compute_error(const MeshvarBnd &u, const MeshvarBnd &unew); + + double compute_error(const GridHierarchy &uh, const GridHierarchy &uhnew, bool verbose); protected: - - void Jacobi( T h, MeshvarBnd* u, const MeshvarBnd* f ); - - void GaussSeidel( T h, MeshvarBnd* u, const MeshvarBnd* f ); - - void SOR( T h, MeshvarBnd* u, const MeshvarBnd* f ); - - void twoGrid( unsigned ilevel ); - - void interp_coarse_fine( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf=true ); - - void setBC( unsigned ilevel ); - - void make_periodic( MeshvarBnd *u ); - - void interp_cubic( MeshvarBnd& coarse, MeshvarBnd& fine, int itop, int jtop, int ktop, int i, int j, int k ); - void interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf ); - + void Jacobi(real_t h, MeshvarBnd *u, const MeshvarBnd *f); + + void GaussSeidel(real_t h, MeshvarBnd *u, const MeshvarBnd *f); + + void SOR(real_t h, MeshvarBnd *u, const MeshvarBnd *f); + + void twoGrid(unsigned ilevel); + + void interp_coarse_fine(unsigned ilevel, MeshvarBnd &coarse, MeshvarBnd &fine, bool bcf = true); + + void setBC(unsigned ilevel); + + void make_periodic(MeshvarBnd *u); + + void interp_cubic(MeshvarBnd &coarse, MeshvarBnd &fine, int itop, int jtop, int ktop, int i, int j, int k); + void interp_coarse_fine_cubic(unsigned ilevel, MeshvarBnd &coarse, MeshvarBnd &fine, bool bcf); + public: - solver( GridHierarchy& f, //const MeshvarBnd& uBC_top, - opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth ); - + solver(GridHierarchy &f, // const MeshvarBnd& uBC_top, + opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth); + ~solver() - { delete m_pmask; } - - double solve( GridHierarchy& u, double accuracy, double h=-1.0, bool verbose=false ); - - double solve( GridHierarchy& u, double accuracy, bool verbose=false ) { - return this->solve ( u, accuracy, -1.0, verbose ); + delete m_pmask; + } + + double solve(GridHierarchy &u, double accuracy, double h = -1.0, bool verbose = false); + + double solve(GridHierarchy &u, double accuracy, bool verbose = false) + { + return this->solve(u, accuracy, -1.0, verbose); } - - - }; - -template< class S, class O, typename T > -solver::solver( GridHierarchy& f, //const MeshvarBnd& ubnd, - opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth ) -: m_scheme(), m_gridop(), m_npresmooth( npresmooth ), m_npostsmooth( npostsmooth ), -m_smoother( smoother ), m_ilevelmin( f.levelmin() ), m_pf( &f )//, m_pubnd( &ubnd ) -{ +template +solver::solver(GridHierarchy &f, // const MeshvarBnd& ubnd, + opt::smtype smoother, unsigned npresmooth, unsigned npostsmooth) + : m_scheme(), m_gridop(), m_npresmooth(npresmooth), m_npostsmooth(npostsmooth), + m_smoother(smoother), m_ilevelmin(f.levelmin()), m_pf(&f) //, m_pubnd( &ubnd ) +{ //... initialize the refinement mask - m_pmask = new GridHierarchy( f.m_nbnd ); + m_pmask = new GridHierarchy(f.m_nbnd); m_pmask->create_base_hierarchy(f.levelmin()); - - for( unsigned ilevel=f.levelmin()+1; ilevel<=f.levelmax(); ++ilevel ) + + for (unsigned ilevel = f.levelmin() + 1; ilevel <= f.levelmax(); ++ilevel) { - meshvar_bnd* pf = f.get_grid(ilevel); - m_pmask->add_patch( pf->offset(0), pf->offset(1), pf->offset(2), pf->size(0), pf->size(1), pf->size(2) ); + meshvar_bnd *pf = f.get_grid(ilevel); + m_pmask->add_patch(pf->offset(0), pf->offset(1), pf->offset(2), pf->size(0), pf->size(1), pf->size(2)); } - + m_pmask->zero(); - - for( unsigned ilevel=0; ilevel *pf = f.get_grid(ilevel); - for( int ix=0; ix < (int)pf->size(0); ++ix ) - for( int iy=0; iy < (int)pf->size(1); ++iy ) - for( int iz=0; iz < (int)pf->size(2); ++iz ) - (*m_pmask->get_grid(ilevel))(ix,iy,iz) = true; + MeshvarBnd *pf = f.get_grid(ilevel); + for (int ix = 0; ix < (int)pf->size(0); ++ix) + for (int iy = 0; iy < (int)pf->size(1); ++iy) + for (int iz = 0; iz < (int)pf->size(2); ++iz) + (*m_pmask->get_grid(ilevel))(ix, iy, iz) = true; } - - for( unsigned ilevel=m_ilevelmin; ilevel* pf = f.get_grid(ilevel+1);//, *pfc = f.get_grid(ilevel); - - for( int ix=pf->offset(0); ix < (int)(pf->offset(0)+pf->size(0)/2); ++ix ) - for( int iy=pf->offset(1); iy < (int)(pf->offset(1)+pf->size(1)/2); ++iy ) - for( int iz=pf->offset(2); iz < (int)(pf->offset(2)+pf->size(2)/2); ++iz ) - (*m_pmask->get_grid(ilevel))(ix,iy,iz) = true; + MeshvarBnd *pf = f.get_grid(ilevel + 1); //, *pfc = f.get_grid(ilevel); + + for (int ix = pf->offset(0); ix < (int)(pf->offset(0) + pf->size(0) / 2); ++ix) + for (int iy = pf->offset(1); iy < (int)(pf->offset(1) + pf->size(1) / 2); ++iy) + for (int iz = pf->offset(2); iz < (int)(pf->offset(2) + pf->size(2) / 2); ++iz) + (*m_pmask->get_grid(ilevel))(ix, iy, iz) = true; } - } - -template< class S, class O, typename T > -void solver::Jacobi( T h, MeshvarBnd *u, const MeshvarBnd* f ) +template +void solver::Jacobi(real_t h, MeshvarBnd *u, const MeshvarBnd *f) { int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - - double - c0 = -1.0/m_scheme.ccoeff(), - h2 = h*h; - - MeshvarBnd uold(*u); - - double alpha = 0.95, ialpha = 1.0-alpha; - - #pragma omp parallel for - for( int ix=0; ixsize(0), + ny = u->size(1), + nz = u->size(2); + + real_t + c0 = -1.0 / m_scheme.ccoeff(), + h2 = h * h; + + MeshvarBnd uold(*u); + + real_t alpha = 0.95, ialpha = 1.0 - alpha; + +#pragma omp parallel for + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + (*u)(ix, iy, iz) = ialpha * uold(ix, iy, iz) + alpha * (m_scheme.rhs(uold, ix, iy, iz) + h2 * (*f)(ix, iy, iz)) * c0; } -template< class S, class O, typename T > -void solver::SOR( T h, MeshvarBnd *u, const MeshvarBnd* f ) +template +void solver::SOR(real_t h, MeshvarBnd *u, const MeshvarBnd *f) { int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); - double - c0 = -1.0/m_scheme.ccoeff(), - h2 = h*h; - - MeshvarBnd uold(*u); - - double - alpha = 1.2, - //alpha = 2 / (1 + 4 * atan(1.0) / double(u->size(0)))-1.0, - ialpha = 1.0-alpha; - - //std::cerr << "omega_opt = " << alpha << std::endl; - - #pragma omp parallel for - for( int ix=0; ix uold(*u); + + real_t + alpha = 1.2, + ialpha = 1.0 - alpha; + +#pragma omp parallel for + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + if ((ix + iy + iz) % 2 == 0) + (*u)(ix, iy, iz) = ialpha * uold(ix, iy, iz) + alpha * (m_scheme.rhs(uold, ix, iy, iz) + h2 * (*f)(ix, iy, iz)) * c0; + +#pragma omp parallel for + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + if ((ix + iy + iz) % 2 != 0) + (*u)(ix, iy, iz) = ialpha * uold(ix, iy, iz) + alpha * (m_scheme.rhs(*u, ix, iy, iz) + h2 * (*f)(ix, iy, iz)) * c0; } -template< class S, class O, typename T > -void solver::GaussSeidel( T h, MeshvarBnd* u, const MeshvarBnd* f ) +template +void solver::GaussSeidel(real_t h, MeshvarBnd *u, const MeshvarBnd *f) { - int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - + int + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + T - c0 = -1.0/m_scheme.ccoeff(), - h2 = h*h; - - for( int color=0; color < 2; ++color ) - #pragma omp parallel for - for( int ix=0; ix -void solver::twoGrid( unsigned ilevel ) +template +void solver::twoGrid(unsigned ilevel) { - MeshvarBnd *uf, *uc, *ff, *fc; - - T - h = 1.0/(pow(2.0,ilevel)), - c0 = -1.0/m_scheme.ccoeff(), - h2 = h*h; - + MeshvarBnd *uf, *uc, *ff, *fc; + + real_t + h = 1.0 / (pow(2.0, ilevel)), + c0 = -1.0 / m_scheme.ccoeff(), + h2 = h * h; + uf = m_pu->get_grid(ilevel); - ff = m_pf->get_grid(ilevel); - - uc = m_pu->get_grid(ilevel-1); - fc = m_pf->get_grid(ilevel-1); - - int - nx = uf->size(0), - ny = uf->size(1), - nz = uf->size(2); - - if( m_bperiodic && ilevel <= m_ilevelmin) - make_periodic( uf ); - else if(!m_bperiodic) - setBC( ilevel ); - + ff = m_pf->get_grid(ilevel); + + uc = m_pu->get_grid(ilevel - 1); + fc = m_pf->get_grid(ilevel - 1); + + int + nx = uf->size(0), + ny = uf->size(1), + nz = uf->size(2); + + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uf); + else if (!m_bperiodic) + setBC(ilevel); + //... do smoothing sweeps with specified solver - for( unsigned i=0; i m_ilevelmin ) - interp_coarse_fine(ilevel, *uc, *uf ); - - if( m_smoother == opt::sm_gauss_seidel ) - GaussSeidel( h, uf, ff ); - - else if( m_smoother == opt::sm_jacobi ) - Jacobi( h, uf, ff); - - else if( m_smoother == opt::sm_sor ) - SOR( h, uf, ff ); - - if( m_bperiodic && ilevel <= m_ilevelmin ) - make_periodic( uf ); + for (unsigned i = 0; i < m_npresmooth; ++i) + { + + if (ilevel > m_ilevelmin) + interp_coarse_fine(ilevel, *uc, *uf); + + if (m_smoother == opt::sm_gauss_seidel) + GaussSeidel(h, uf, ff); + + else if (m_smoother == opt::sm_jacobi) + Jacobi(h, uf, ff); + + else if (m_smoother == opt::sm_sor) + SOR(h, uf, ff); + + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uf); } - - - m_gridop.restrict( *uf, *uc ); - + + m_gridop.restrict(*uf, *uc); + //... essential!! - if( m_bperiodic && ilevel <= m_ilevelmin ) - make_periodic( uc ); - else if( m_bperiodic ) - interp_coarse_fine(ilevel,*uc,*uf); - - meshvar_bnd Lu(*uf,false); + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uc); + else if (m_bperiodic) + interp_coarse_fine(ilevel, *uc, *uf); + + meshvar_bnd Lu(*uf, false); Lu.zero(); - #pragma omp parallel for - for( int ix=0; ixsize(0); ++ix ) - for( int iy=0; iy<(int)uc->size(1); ++iy ) - for( int iz=0; iz<(int)uc->size(2); ++iz ) - if( (*m_pmask->get_grid(ilevel-1))(ix,iy,iz) == true ) - (*fc)(ix,iy,iz) += ((tLu( ix, iy, iz ) - (m_scheme.apply( *uc, ix, iy, iz )/(4.0*h2)))); - - + m_gridop.restrict(*ff, *fc); + +//... compute RHS tau-correction +#pragma omp parallel for schedule(dynamic) + for (int ix = 0; ix < (int)uc->size(0); ++ix) + for (int iy = 0; iy < (int)uc->size(1); ++iy) + for (int iz = 0; iz < (int)uc->size(2); ++iz) + if ((*m_pmask->get_grid(ilevel - 1))(ix, iy, iz) == true) + (*fc)(ix, iy, iz) += ((tLu(ix, iy, iz) - (m_scheme.apply(*uc, ix, iy, iz) / (4.0 * h2)))); + tLu.deallocate(); - - meshvar_bnd ucsave(*uc,true); - + + meshvar_bnd ucsave(*uc, true); + //... have we reached the end of the recursion or do we need to go up one level? - if( ilevel == 1 ) - if( m_bperiodic ) - (*uc)(0,0,0) = 0.0; - else - (*uc)(0,0,0) = (m_scheme.rhs( (*uc), 0, 0, 0 ) + 4.0 * h2 * (*fc)(0,0,0))*c0; + if (ilevel == 1) + if (m_bperiodic) + (*uc)(0, 0, 0) = 0.0; + else + (*uc)(0, 0, 0) = (m_scheme.rhs((*uc), 0, 0, 0) + 4.0 * h2 * (*fc)(0, 0, 0)) * c0; else - twoGrid( ilevel-1 ); - - meshvar_bnd cc(*uc,false); - - //... compute correction on coarse grid - #pragma omp parallel for - for( int ix=0; ix<(int)cc.size(0); ++ix ) - for( int iy=0; iy<(int)cc.size(1); ++iy ) - for( int iz=0; iz<(int)cc.size(2); ++iz ) - cc(ix,iy,iz) = (*uc)(ix,iy,iz) - ucsave(ix,iy,iz); - + twoGrid(ilevel - 1); + + meshvar_bnd cc(*uc, false); + +//... compute correction on coarse grid +#pragma omp parallel for + for (int ix = 0; ix < (int)cc.size(0); ++ix) + for (int iy = 0; iy < (int)cc.size(1); ++iy) + for (int iz = 0; iz < (int)cc.size(2); ++iz) + cc(ix, iy, iz) = (*uc)(ix, iy, iz) - ucsave(ix, iy, iz); + ucsave.deallocate(); + //... prolongate correction to fine grid + meshvar_bnd cf(*uf, false); + m_gridop.prolong(cc, cf); - //... prolongate correction to fine grid - meshvar_bnd cf(*uf,false); - m_gridop.prolong( cc, cf ); - cc.deallocate(); - - - #pragma omp parallel for - for( int ix=0; ix m_ilevelmin ) + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uf); + else if (!m_bperiodic) + setBC(ilevel); + + // if( ilevel > m_ilevelmin ) // interp_coarse_fine(ilevel, *uc, *uf ); //... do smoothing sweeps with specified solver - for( unsigned i=0; i m_ilevelmin ) - interp_coarse_fine(ilevel, *uc, *uf ); + for (unsigned i = 0; i < m_npostsmooth; ++i) + { - if( m_smoother == opt::sm_gauss_seidel ) - GaussSeidel( h, uf, ff ); - - else if( m_smoother == opt::sm_jacobi ) - Jacobi( h, uf, ff); - - else if( m_smoother == opt::sm_sor ) - SOR( h, uf, ff ); - - if( m_bperiodic && ilevel <= m_ilevelmin ) - make_periodic( uf ); + if (ilevel > m_ilevelmin) + interp_coarse_fine(ilevel, *uc, *uf); + if (m_smoother == opt::sm_gauss_seidel) + GaussSeidel(h, uf, ff); + + else if (m_smoother == opt::sm_jacobi) + Jacobi(h, uf, ff); + + else if (m_smoother == opt::sm_sor) + SOR(h, uf, ff); + + if (m_bperiodic && ilevel <= m_ilevelmin) + make_periodic(uf); } } -template< class S, class O, typename T > -double solver::compute_error( const MeshvarBnd& u, const MeshvarBnd& unew ) +template +double solver::compute_error(const MeshvarBnd &u, const MeshvarBnd &unew) { - int - nx = u.size(0), - ny = u.size(1), - nz = u.size(2); - + int + nx = u.size(0), + ny = u.size(1), + nz = u.size(2); + double err = 0.0; unsigned count = 0; - -#pragma omp parallel for reduction(+:err,count) - for( int ix=0; ix 0.0 )//&& u(ix,iy,iz) != unew(ix,iy,iz) ) + +#pragma omp parallel for reduction(+ \ + : err, count) + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + if (fabs(unew(ix, iy, iz)) > 0.0) //&& u(ix,iy,iz) != unew(ix,iy,iz) ) { - err += fabs(1.0 - u(ix,iy,iz)/unew(ix,iy,iz)); + err += fabs(1.0 - u(ix, iy, iz) / unew(ix, iy, iz)); ++count; } - - if( count != 0 ) + + if (count != 0) err /= count; - + return err; } -template< class S, class O, typename T > -double solver::compute_error( const GridHierarchy& uh, const GridHierarchy& uhnew, bool verbose ) +template +double solver::compute_error(const GridHierarchy &uh, const GridHierarchy &uhnew, bool verbose) { double maxerr = 0.0; - - for( unsigned ilevel=uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel ) + + for (unsigned ilevel = uh.levelmin(); ilevel <= uh.levelmax(); ++ilevel) { double err = 0.0; - err = compute_error( *uh.get_grid(ilevel), *uhnew.get_grid(ilevel) ); - - if( verbose ) + err = compute_error(*uh.get_grid(ilevel), *uhnew.get_grid(ilevel)); + + if (verbose) std::cout << " Level " << std::setw(6) << ilevel << ", Error = " << err << std::endl; - maxerr = std::max(maxerr,err); - + maxerr = std::max(maxerr, err); } return maxerr; } -template< class S, class O, typename T > -double solver::solve( GridHierarchy& uh, double acc, double h, bool verbose ) +template +double solver::solve(GridHierarchy &uh, double acc, double h, bool verbose) { double err; - - GridHierarchy uhnew(uh);//, fsave(*m_pf); + + GridHierarchy uhnew(uh); //, fsave(*m_pf); m_pu = &uh; - - unsigned niter = 0; - + + unsigned niter = 0; + //... iterate ...// while (true) { - - - twoGrid( uh.levelmax() ); - err = compute_error( *m_pu, uhnew, verbose ); + + twoGrid(uh.levelmax()); + err = compute_error(*m_pu, uhnew, verbose); ++niter; - - if( verbose ){ + + if (verbose) + { std::cout << "--> Step No. " << std::setw(3) << niter << ", Max Err = " << err << std::endl; std::cout << "-------------------------------------------------------------\n"; } - - if( (niter > 1) && ((err < acc) || (niter > 20)) ) + + if ((niter > 1) && ((err < acc) || (niter > 20))) break; - + uhnew = *m_pu; //*m_pf = fsave; - } - - if( err > acc ) + } + + if (err > acc) std::cout << "Error : no convergence in Poisson solver" << std::endl; - else if( verbose ) + else if (verbose) std::cout << " - Converged in " << niter << " steps to req. acc. of " << acc << std::endl; - - //uh = uhnew; + // uh = uhnew; //*m_pf = fsave; return err; } -inline double interp2( double x1, double x2, double x3, double f1, double f2, double f3, double x ) +inline double interp2(double x1, double x2, double x3, double f1, double f2, double f3, double x) { - double a,b,c; + double a, b, c; a = (x1 * f3 - x3 * f1 - x2 * f3 - x1 * f2 + x2 * f1 + x3 * f2) / (x1 * x3 * x3 - x2 * x3 * x3 + x2 * x1 * x1 - x3 * x1 * x1 + x3 * x2 * x2 - x1 * x2 * x2); b = -(x1 * x1 * f3 - x1 * x1 * f2 - f1 * x3 * x3 + f2 * x3 * x3 - x2 * x2 * f3 + f1 * x2 * x2) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3); c = (x1 * x1 * x2 * f3 - x1 * x1 * x3 * f2 - x2 * x2 * x1 * f3 + f2 * x1 * x3 * x3 + x2 * x2 * x3 * f1 - f1 * x2 * x3 * x3) / (x1 - x2) / (x1 * x2 - x1 * x3 + x3 * x3 - x2 * x3); - - return a*x*x+b*x+c; + + return a * x * x + b * x + c; } -inline double interp2( double fleft, double fcenter, double fright, double x ) +inline double interp2(double fleft, double fcenter, double fright, double x) { - double a,b,c; - a = 0.5*(fleft+fright)-fcenter; - b = 0.5*(fright-fleft); + double a, b, c; + a = 0.5 * (fleft + fright) - fcenter; + b = 0.5 * (fright - fleft); c = fcenter; - - return a*x*x+b*x+c; + + return a * x * x + b * x + c; } - -inline double interp2left( double fleft, double fcenter, double fright ) +inline double interp2left(double fleft, double fcenter, double fright) { - double a,b,c; - a = (6.0*fright-10.0*fcenter+4.0*fleft)/15.0; - b = (-4.0*fleft+9.0*fright-5.0*fcenter)/15.0; + double a, b, c; + a = (6.0 * fright - 10.0 * fcenter + 4.0 * fleft) / 15.0; + b = (-4.0 * fleft + 9.0 * fright - 5.0 * fcenter) / 15.0; c = fcenter; - - return a-b+c; + + return a - b + c; } -inline double interp2right( double fleft, double fcenter, double fright ) +inline double interp2right(double fleft, double fcenter, double fright) { - double a,b,c; - a = (6.0*fleft-10.0*fcenter+4.0*fright)/15.0; - b = (4.0*fright-9.0*fleft+5.0*fcenter)/15.0; + double a, b, c; + a = (6.0 * fleft - 10.0 * fcenter + 4.0 * fright) / 15.0; + b = (4.0 * fright - 9.0 * fleft + 5.0 * fcenter) / 15.0; c = fcenter; - - return a+b+c; + + return a + b + c; } -template< class S, class O, typename T > -void solver::interp_cubic( MeshvarBnd& coarse, MeshvarBnd& fine, int i, int j, int k, int itop, int jtop, int ktop ) +template +void solver::interp_cubic(MeshvarBnd &coarse, MeshvarBnd &fine, int i, int j, int k, int itop, int jtop, int ktop) { - MeshvarBnd &u = fine; - MeshvarBnd &utop = coarse; - + MeshvarBnd &u = fine; + MeshvarBnd &utop = coarse; + /* - u(i+0,j+0,k+0) = ( -125.*utop(itop-2,jtop-2,ktop-2) +875.*utop(itop-2,jtop-2,ktop-1) +2625.*utop(itop-2,jtop-2,ktop) - -175.*utop(itop-2,jtop-2,ktop+1) +875.*utop(itop-2,jtop-1,ktop-2) -6125.*utop(itop-2,jtop-1,ktop-1) - -18375.*utop(itop-2,jtop-1,ktop) +1225.*utop(itop-2,jtop-1,ktop+1) +2625.*utop(itop-2,jtop,ktop-2) - -18375.*utop(itop-2,jtop,ktop-1) -55125.*utop(itop-2,jtop,ktop) +3675.*utop(itop-2,jtop,ktop+1) - -175.*utop(itop-2,jtop+1,ktop-2) +1225.*utop(itop-2,jtop+1,ktop-1) +3675.*utop(itop-2,jtop+1,ktop) - -245.*utop(itop-2,jtop+1,ktop+1) +875.*utop(itop-1,jtop-2,ktop-2) -6125.*utop(itop-1,jtop-2,ktop-1) - -18375.*utop(itop-1,jtop-2,ktop) +1225.*utop(itop-1,jtop-2,ktop+1) -6125.*utop(itop-1,jtop-1,ktop-2) - +42875.*utop(itop-1,jtop-1,ktop-1) +128625.*utop(itop-1,jtop-1,ktop) -8575.*utop(itop-1,jtop-1,ktop+1) - -18375.*utop(itop-1,jtop,ktop-2) +128625.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) - -25725.*utop(itop-1,jtop,ktop+1) +1225.*utop(itop-1,jtop+1,ktop-2) -8575.*utop(itop-1,jtop+1,ktop-1) - -25725.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) +2625.*utop(itop,jtop-2,ktop-2) - -18375.*utop(itop,jtop-2,ktop-1) -55125.*utop(itop,jtop-2,ktop) +3675.*utop(itop,jtop-2,ktop+1) - -18375.*utop(itop,jtop-1,ktop-2) +128625.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) - -25725.*utop(itop,jtop-1,ktop+1) -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) - +1157625.*utop(itop,jtop,ktop) -77175.*utop(itop,jtop,ktop+1) +3675.*utop(itop,jtop+1,ktop-2) - -25725.*utop(itop,jtop+1,ktop-1) -77175.*utop(itop,jtop+1,ktop) +5145.*utop(itop,jtop+1,ktop+1) - -175.*utop(itop+1,jtop-2,ktop-2) +1225.*utop(itop+1,jtop-2,ktop-1) +3675.*utop(itop+1,jtop-2,ktop) - -245.*utop(itop+1,jtop-2,ktop+1) +1225.*utop(itop+1,jtop-1,ktop-2) -8575.*utop(itop+1,jtop-1,ktop-1) - -25725.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) +3675.*utop(itop+1,jtop,ktop-2) - -25725.*utop(itop+1,jtop,ktop-1) -77175.*utop(itop+1,jtop,ktop) +5145.*utop(itop+1,jtop,ktop+1) - -245.*utop(itop+1,jtop+1,ktop-2) +1715.*utop(itop+1,jtop+1,ktop-1) +5145.*utop(itop+1,jtop+1,ktop) - -343.*utop(itop+1,jtop+1,ktop+1) )/2097152.; - u(i+0,j+0,k+1) = ( -175.*utop(itop-2,jtop-2,ktop-1) +2625.*utop(itop-2,jtop-2,ktop) +875.*utop(itop-2,jtop-2,ktop+1) - -125.*utop(itop-2,jtop-2,ktop+2) +1225.*utop(itop-2,jtop-1,ktop-1) -18375.*utop(itop-2,jtop-1,ktop) - -6125.*utop(itop-2,jtop-1,ktop+1) +875.*utop(itop-2,jtop-1,ktop+2) +3675.*utop(itop-2,jtop,ktop-1) - -55125.*utop(itop-2,jtop,ktop) -18375.*utop(itop-2,jtop,ktop+1) +2625.*utop(itop-2,jtop,ktop+2) - -245.*utop(itop-2,jtop+1,ktop-1) +3675.*utop(itop-2,jtop+1,ktop) +1225.*utop(itop-2,jtop+1,ktop+1) - -175.*utop(itop-2,jtop+1,ktop+2) +1225.*utop(itop-1,jtop-2,ktop-1) -18375.*utop(itop-1,jtop-2,ktop) - -6125.*utop(itop-1,jtop-2,ktop+1) +875.*utop(itop-1,jtop-2,ktop+2) -8575.*utop(itop-1,jtop-1,ktop-1) - +128625.*utop(itop-1,jtop-1,ktop) +42875.*utop(itop-1,jtop-1,ktop+1) -6125.*utop(itop-1,jtop-1,ktop+2) - -25725.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) +128625.*utop(itop-1,jtop,ktop+1) - -18375.*utop(itop-1,jtop,ktop+2) +1715.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) - -8575.*utop(itop-1,jtop+1,ktop+1) +1225.*utop(itop-1,jtop+1,ktop+2) +3675.*utop(itop,jtop-2,ktop-1) - -55125.*utop(itop,jtop-2,ktop) -18375.*utop(itop,jtop-2,ktop+1) +2625.*utop(itop,jtop-2,ktop+2) - -25725.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) +128625.*utop(itop,jtop-1,ktop+1) - -18375.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) - +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) +5145.*utop(itop,jtop+1,ktop-1) - -77175.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +3675.*utop(itop,jtop+1,ktop+2) - -245.*utop(itop+1,jtop-2,ktop-1) +3675.*utop(itop+1,jtop-2,ktop) +1225.*utop(itop+1,jtop-2,ktop+1) - -175.*utop(itop+1,jtop-2,ktop+2) +1715.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) - -8575.*utop(itop+1,jtop-1,ktop+1) +1225.*utop(itop+1,jtop-1,ktop+2) +5145.*utop(itop+1,jtop,ktop-1) - -77175.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) +3675.*utop(itop+1,jtop,ktop+2) - -343.*utop(itop+1,jtop+1,ktop-1) +5145.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) - -245.*utop(itop+1,jtop+1,ktop+2) )/2097152.; - u(i+0,j+1,k+0) = ( -175.*utop(itop-2,jtop-1,ktop-2) +1225.*utop(itop-2,jtop-1,ktop-1) +3675.*utop(itop-2,jtop-1,ktop) - -245.*utop(itop-2,jtop-1,ktop+1) +2625.*utop(itop-2,jtop,ktop-2) -18375.*utop(itop-2,jtop,ktop-1) - -55125.*utop(itop-2,jtop,ktop) +3675.*utop(itop-2,jtop,ktop+1) +875.*utop(itop-2,jtop+1,ktop-2) - -6125.*utop(itop-2,jtop+1,ktop-1) -18375.*utop(itop-2,jtop+1,ktop) +1225.*utop(itop-2,jtop+1,ktop+1) - -125.*utop(itop-2,jtop+2,ktop-2) +875.*utop(itop-2,jtop+2,ktop-1) +2625.*utop(itop-2,jtop+2,ktop) - -175.*utop(itop-2,jtop+2,ktop+1) +1225.*utop(itop-1,jtop-1,ktop-2) -8575.*utop(itop-1,jtop-1,ktop-1) - -25725.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) -18375.*utop(itop-1,jtop,ktop-2) - +128625.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) - -6125.*utop(itop-1,jtop+1,ktop-2) +42875.*utop(itop-1,jtop+1,ktop-1) +128625.*utop(itop-1,jtop+1,ktop) - -8575.*utop(itop-1,jtop+1,ktop+1) +875.*utop(itop-1,jtop+2,ktop-2) -6125.*utop(itop-1,jtop+2,ktop-1) - -18375.*utop(itop-1,jtop+2,ktop) +1225.*utop(itop-1,jtop+2,ktop+1) +3675.*utop(itop,jtop-1,ktop-2) - -25725.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) +5145.*utop(itop,jtop-1,ktop+1) - -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) - -77175.*utop(itop,jtop,ktop+1) -18375.*utop(itop,jtop+1,ktop-2) +128625.*utop(itop,jtop+1,ktop-1) - +385875.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +2625.*utop(itop,jtop+2,ktop-2) - -18375.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) +3675.*utop(itop,jtop+2,ktop+1) - -245.*utop(itop+1,jtop-1,ktop-2) +1715.*utop(itop+1,jtop-1,ktop-1) +5145.*utop(itop+1,jtop-1,ktop) - -343.*utop(itop+1,jtop-1,ktop+1) +3675.*utop(itop+1,jtop,ktop-2) -25725.*utop(itop+1,jtop,ktop-1) - -77175.*utop(itop+1,jtop,ktop) +5145.*utop(itop+1,jtop,ktop+1) +1225.*utop(itop+1,jtop+1,ktop-2) - -8575.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) - -175.*utop(itop+1,jtop+2,ktop-2) +1225.*utop(itop+1,jtop+2,ktop-1) +3675.*utop(itop+1,jtop+2,ktop) - -245.*utop(itop+1,jtop+2,ktop+1) )/2097152.; - u(i+0,j+1,k+1) = ( -245.*utop(itop-2,jtop-1,ktop-1) +3675.*utop(itop-2,jtop-1,ktop) +1225.*utop(itop-2,jtop-1,ktop+1) - -175.*utop(itop-2,jtop-1,ktop+2) +3675.*utop(itop-2,jtop,ktop-1) -55125.*utop(itop-2,jtop,ktop) - -18375.*utop(itop-2,jtop,ktop+1) +2625.*utop(itop-2,jtop,ktop+2) +1225.*utop(itop-2,jtop+1,ktop-1) - -18375.*utop(itop-2,jtop+1,ktop) -6125.*utop(itop-2,jtop+1,ktop+1) +875.*utop(itop-2,jtop+1,ktop+2) - -175.*utop(itop-2,jtop+2,ktop-1) +2625.*utop(itop-2,jtop+2,ktop) +875.*utop(itop-2,jtop+2,ktop+1) - -125.*utop(itop-2,jtop+2,ktop+2) +1715.*utop(itop-1,jtop-1,ktop-1) -25725.*utop(itop-1,jtop-1,ktop) - -8575.*utop(itop-1,jtop-1,ktop+1) +1225.*utop(itop-1,jtop-1,ktop+2) -25725.*utop(itop-1,jtop,ktop-1) - +385875.*utop(itop-1,jtop,ktop) +128625.*utop(itop-1,jtop,ktop+1) -18375.*utop(itop-1,jtop,ktop+2) - -8575.*utop(itop-1,jtop+1,ktop-1) +128625.*utop(itop-1,jtop+1,ktop) +42875.*utop(itop-1,jtop+1,ktop+1) - -6125.*utop(itop-1,jtop+1,ktop+2) +1225.*utop(itop-1,jtop+2,ktop-1) -18375.*utop(itop-1,jtop+2,ktop) - -6125.*utop(itop-1,jtop+2,ktop+1) +875.*utop(itop-1,jtop+2,ktop+2) +5145.*utop(itop,jtop-1,ktop-1) - -77175.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) +3675.*utop(itop,jtop-1,ktop+2) - -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) - -55125.*utop(itop,jtop,ktop+2) -25725.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) - +128625.*utop(itop,jtop+1,ktop+1) -18375.*utop(itop,jtop+1,ktop+2) +3675.*utop(itop,jtop+2,ktop-1) - -55125.*utop(itop,jtop+2,ktop) -18375.*utop(itop,jtop+2,ktop+1) +2625.*utop(itop,jtop+2,ktop+2) - -343.*utop(itop+1,jtop-1,ktop-1) +5145.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) - -245.*utop(itop+1,jtop-1,ktop+2) +5145.*utop(itop+1,jtop,ktop-1) -77175.*utop(itop+1,jtop,ktop) - -25725.*utop(itop+1,jtop,ktop+1) +3675.*utop(itop+1,jtop,ktop+2) +1715.*utop(itop+1,jtop+1,ktop-1) - -25725.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +1225.*utop(itop+1,jtop+1,ktop+2) - -245.*utop(itop+1,jtop+2,ktop-1) +3675.*utop(itop+1,jtop+2,ktop) +1225.*utop(itop+1,jtop+2,ktop+1) - -175.*utop(itop+1,jtop+2,ktop+2) )/2097152.; - u(i+1,j+0,k+0) = ( -175.*utop(itop-1,jtop-2,ktop-2) +1225.*utop(itop-1,jtop-2,ktop-1) +3675.*utop(itop-1,jtop-2,ktop) - -245.*utop(itop-1,jtop-2,ktop+1) +1225.*utop(itop-1,jtop-1,ktop-2) -8575.*utop(itop-1,jtop-1,ktop-1) - -25725.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) +3675.*utop(itop-1,jtop,ktop-2) - -25725.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) +5145.*utop(itop-1,jtop,ktop+1) - -245.*utop(itop-1,jtop+1,ktop-2) +1715.*utop(itop-1,jtop+1,ktop-1) +5145.*utop(itop-1,jtop+1,ktop) - -343.*utop(itop-1,jtop+1,ktop+1) +2625.*utop(itop,jtop-2,ktop-2) -18375.*utop(itop,jtop-2,ktop-1) - -55125.*utop(itop,jtop-2,ktop) +3675.*utop(itop,jtop-2,ktop+1) -18375.*utop(itop,jtop-1,ktop-2) - +128625.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) - -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) - -77175.*utop(itop,jtop,ktop+1) +3675.*utop(itop,jtop+1,ktop-2) -25725.*utop(itop,jtop+1,ktop-1) - -77175.*utop(itop,jtop+1,ktop) +5145.*utop(itop,jtop+1,ktop+1) +875.*utop(itop+1,jtop-2,ktop-2) - -6125.*utop(itop+1,jtop-2,ktop-1) -18375.*utop(itop+1,jtop-2,ktop) +1225.*utop(itop+1,jtop-2,ktop+1) - -6125.*utop(itop+1,jtop-1,ktop-2) +42875.*utop(itop+1,jtop-1,ktop-1) +128625.*utop(itop+1,jtop-1,ktop) - -8575.*utop(itop+1,jtop-1,ktop+1) -18375.*utop(itop+1,jtop,ktop-2) +128625.*utop(itop+1,jtop,ktop-1) - +385875.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) +1225.*utop(itop+1,jtop+1,ktop-2) - -8575.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) - -125.*utop(itop+2,jtop-2,ktop-2) +875.*utop(itop+2,jtop-2,ktop-1) +2625.*utop(itop+2,jtop-2,ktop) - -175.*utop(itop+2,jtop-2,ktop+1) +875.*utop(itop+2,jtop-1,ktop-2) -6125.*utop(itop+2,jtop-1,ktop-1) - -18375.*utop(itop+2,jtop-1,ktop) +1225.*utop(itop+2,jtop-1,ktop+1) +2625.*utop(itop+2,jtop,ktop-2) - -18375.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) +3675.*utop(itop+2,jtop,ktop+1) - -175.*utop(itop+2,jtop+1,ktop-2) +1225.*utop(itop+2,jtop+1,ktop-1) +3675.*utop(itop+2,jtop+1,ktop) - -245.*utop(itop+2,jtop+1,ktop+1) )/2097152.; + u(i+0,j+0,k+0) = ( -125.*utop(itop-2,jtop-2,ktop-2) +875.*utop(itop-2,jtop-2,ktop-1) +2625.*utop(itop-2,jtop-2,ktop) + -175.*utop(itop-2,jtop-2,ktop+1) +875.*utop(itop-2,jtop-1,ktop-2) -6125.*utop(itop-2,jtop-1,ktop-1) + -18375.*utop(itop-2,jtop-1,ktop) +1225.*utop(itop-2,jtop-1,ktop+1) +2625.*utop(itop-2,jtop,ktop-2) + -18375.*utop(itop-2,jtop,ktop-1) -55125.*utop(itop-2,jtop,ktop) +3675.*utop(itop-2,jtop,ktop+1) + -175.*utop(itop-2,jtop+1,ktop-2) +1225.*utop(itop-2,jtop+1,ktop-1) +3675.*utop(itop-2,jtop+1,ktop) + -245.*utop(itop-2,jtop+1,ktop+1) +875.*utop(itop-1,jtop-2,ktop-2) -6125.*utop(itop-1,jtop-2,ktop-1) + -18375.*utop(itop-1,jtop-2,ktop) +1225.*utop(itop-1,jtop-2,ktop+1) -6125.*utop(itop-1,jtop-1,ktop-2) + +42875.*utop(itop-1,jtop-1,ktop-1) +128625.*utop(itop-1,jtop-1,ktop) -8575.*utop(itop-1,jtop-1,ktop+1) + -18375.*utop(itop-1,jtop,ktop-2) +128625.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) + -25725.*utop(itop-1,jtop,ktop+1) +1225.*utop(itop-1,jtop+1,ktop-2) -8575.*utop(itop-1,jtop+1,ktop-1) + -25725.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) +2625.*utop(itop,jtop-2,ktop-2) + -18375.*utop(itop,jtop-2,ktop-1) -55125.*utop(itop,jtop-2,ktop) +3675.*utop(itop,jtop-2,ktop+1) + -18375.*utop(itop,jtop-1,ktop-2) +128625.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) + -25725.*utop(itop,jtop-1,ktop+1) -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) + +1157625.*utop(itop,jtop,ktop) -77175.*utop(itop,jtop,ktop+1) +3675.*utop(itop,jtop+1,ktop-2) + -25725.*utop(itop,jtop+1,ktop-1) -77175.*utop(itop,jtop+1,ktop) +5145.*utop(itop,jtop+1,ktop+1) + -175.*utop(itop+1,jtop-2,ktop-2) +1225.*utop(itop+1,jtop-2,ktop-1) +3675.*utop(itop+1,jtop-2,ktop) + -245.*utop(itop+1,jtop-2,ktop+1) +1225.*utop(itop+1,jtop-1,ktop-2) -8575.*utop(itop+1,jtop-1,ktop-1) + -25725.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) +3675.*utop(itop+1,jtop,ktop-2) + -25725.*utop(itop+1,jtop,ktop-1) -77175.*utop(itop+1,jtop,ktop) +5145.*utop(itop+1,jtop,ktop+1) + -245.*utop(itop+1,jtop+1,ktop-2) +1715.*utop(itop+1,jtop+1,ktop-1) +5145.*utop(itop+1,jtop+1,ktop) + -343.*utop(itop+1,jtop+1,ktop+1) )/2097152.; + u(i+0,j+0,k+1) = ( -175.*utop(itop-2,jtop-2,ktop-1) +2625.*utop(itop-2,jtop-2,ktop) +875.*utop(itop-2,jtop-2,ktop+1) + -125.*utop(itop-2,jtop-2,ktop+2) +1225.*utop(itop-2,jtop-1,ktop-1) -18375.*utop(itop-2,jtop-1,ktop) + -6125.*utop(itop-2,jtop-1,ktop+1) +875.*utop(itop-2,jtop-1,ktop+2) +3675.*utop(itop-2,jtop,ktop-1) + -55125.*utop(itop-2,jtop,ktop) -18375.*utop(itop-2,jtop,ktop+1) +2625.*utop(itop-2,jtop,ktop+2) + -245.*utop(itop-2,jtop+1,ktop-1) +3675.*utop(itop-2,jtop+1,ktop) +1225.*utop(itop-2,jtop+1,ktop+1) + -175.*utop(itop-2,jtop+1,ktop+2) +1225.*utop(itop-1,jtop-2,ktop-1) -18375.*utop(itop-1,jtop-2,ktop) + -6125.*utop(itop-1,jtop-2,ktop+1) +875.*utop(itop-1,jtop-2,ktop+2) -8575.*utop(itop-1,jtop-1,ktop-1) + +128625.*utop(itop-1,jtop-1,ktop) +42875.*utop(itop-1,jtop-1,ktop+1) -6125.*utop(itop-1,jtop-1,ktop+2) + -25725.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) +128625.*utop(itop-1,jtop,ktop+1) + -18375.*utop(itop-1,jtop,ktop+2) +1715.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) + -8575.*utop(itop-1,jtop+1,ktop+1) +1225.*utop(itop-1,jtop+1,ktop+2) +3675.*utop(itop,jtop-2,ktop-1) + -55125.*utop(itop,jtop-2,ktop) -18375.*utop(itop,jtop-2,ktop+1) +2625.*utop(itop,jtop-2,ktop+2) + -25725.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) +128625.*utop(itop,jtop-1,ktop+1) + -18375.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) + +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) +5145.*utop(itop,jtop+1,ktop-1) + -77175.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +3675.*utop(itop,jtop+1,ktop+2) + -245.*utop(itop+1,jtop-2,ktop-1) +3675.*utop(itop+1,jtop-2,ktop) +1225.*utop(itop+1,jtop-2,ktop+1) + -175.*utop(itop+1,jtop-2,ktop+2) +1715.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) + -8575.*utop(itop+1,jtop-1,ktop+1) +1225.*utop(itop+1,jtop-1,ktop+2) +5145.*utop(itop+1,jtop,ktop-1) + -77175.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) +3675.*utop(itop+1,jtop,ktop+2) + -343.*utop(itop+1,jtop+1,ktop-1) +5145.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) + -245.*utop(itop+1,jtop+1,ktop+2) )/2097152.; + u(i+0,j+1,k+0) = ( -175.*utop(itop-2,jtop-1,ktop-2) +1225.*utop(itop-2,jtop-1,ktop-1) +3675.*utop(itop-2,jtop-1,ktop) + -245.*utop(itop-2,jtop-1,ktop+1) +2625.*utop(itop-2,jtop,ktop-2) -18375.*utop(itop-2,jtop,ktop-1) + -55125.*utop(itop-2,jtop,ktop) +3675.*utop(itop-2,jtop,ktop+1) +875.*utop(itop-2,jtop+1,ktop-2) + -6125.*utop(itop-2,jtop+1,ktop-1) -18375.*utop(itop-2,jtop+1,ktop) +1225.*utop(itop-2,jtop+1,ktop+1) + -125.*utop(itop-2,jtop+2,ktop-2) +875.*utop(itop-2,jtop+2,ktop-1) +2625.*utop(itop-2,jtop+2,ktop) + -175.*utop(itop-2,jtop+2,ktop+1) +1225.*utop(itop-1,jtop-1,ktop-2) -8575.*utop(itop-1,jtop-1,ktop-1) + -25725.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) -18375.*utop(itop-1,jtop,ktop-2) + +128625.*utop(itop-1,jtop,ktop-1) +385875.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) + -6125.*utop(itop-1,jtop+1,ktop-2) +42875.*utop(itop-1,jtop+1,ktop-1) +128625.*utop(itop-1,jtop+1,ktop) + -8575.*utop(itop-1,jtop+1,ktop+1) +875.*utop(itop-1,jtop+2,ktop-2) -6125.*utop(itop-1,jtop+2,ktop-1) + -18375.*utop(itop-1,jtop+2,ktop) +1225.*utop(itop-1,jtop+2,ktop+1) +3675.*utop(itop,jtop-1,ktop-2) + -25725.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) +5145.*utop(itop,jtop-1,ktop+1) + -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) + -77175.*utop(itop,jtop,ktop+1) -18375.*utop(itop,jtop+1,ktop-2) +128625.*utop(itop,jtop+1,ktop-1) + +385875.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +2625.*utop(itop,jtop+2,ktop-2) + -18375.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) +3675.*utop(itop,jtop+2,ktop+1) + -245.*utop(itop+1,jtop-1,ktop-2) +1715.*utop(itop+1,jtop-1,ktop-1) +5145.*utop(itop+1,jtop-1,ktop) + -343.*utop(itop+1,jtop-1,ktop+1) +3675.*utop(itop+1,jtop,ktop-2) -25725.*utop(itop+1,jtop,ktop-1) + -77175.*utop(itop+1,jtop,ktop) +5145.*utop(itop+1,jtop,ktop+1) +1225.*utop(itop+1,jtop+1,ktop-2) + -8575.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) + -175.*utop(itop+1,jtop+2,ktop-2) +1225.*utop(itop+1,jtop+2,ktop-1) +3675.*utop(itop+1,jtop+2,ktop) + -245.*utop(itop+1,jtop+2,ktop+1) )/2097152.; + u(i+0,j+1,k+1) = ( -245.*utop(itop-2,jtop-1,ktop-1) +3675.*utop(itop-2,jtop-1,ktop) +1225.*utop(itop-2,jtop-1,ktop+1) + -175.*utop(itop-2,jtop-1,ktop+2) +3675.*utop(itop-2,jtop,ktop-1) -55125.*utop(itop-2,jtop,ktop) + -18375.*utop(itop-2,jtop,ktop+1) +2625.*utop(itop-2,jtop,ktop+2) +1225.*utop(itop-2,jtop+1,ktop-1) + -18375.*utop(itop-2,jtop+1,ktop) -6125.*utop(itop-2,jtop+1,ktop+1) +875.*utop(itop-2,jtop+1,ktop+2) + -175.*utop(itop-2,jtop+2,ktop-1) +2625.*utop(itop-2,jtop+2,ktop) +875.*utop(itop-2,jtop+2,ktop+1) + -125.*utop(itop-2,jtop+2,ktop+2) +1715.*utop(itop-1,jtop-1,ktop-1) -25725.*utop(itop-1,jtop-1,ktop) + -8575.*utop(itop-1,jtop-1,ktop+1) +1225.*utop(itop-1,jtop-1,ktop+2) -25725.*utop(itop-1,jtop,ktop-1) + +385875.*utop(itop-1,jtop,ktop) +128625.*utop(itop-1,jtop,ktop+1) -18375.*utop(itop-1,jtop,ktop+2) + -8575.*utop(itop-1,jtop+1,ktop-1) +128625.*utop(itop-1,jtop+1,ktop) +42875.*utop(itop-1,jtop+1,ktop+1) + -6125.*utop(itop-1,jtop+1,ktop+2) +1225.*utop(itop-1,jtop+2,ktop-1) -18375.*utop(itop-1,jtop+2,ktop) + -6125.*utop(itop-1,jtop+2,ktop+1) +875.*utop(itop-1,jtop+2,ktop+2) +5145.*utop(itop,jtop-1,ktop-1) + -77175.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) +3675.*utop(itop,jtop-1,ktop+2) + -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) + -55125.*utop(itop,jtop,ktop+2) -25725.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) + +128625.*utop(itop,jtop+1,ktop+1) -18375.*utop(itop,jtop+1,ktop+2) +3675.*utop(itop,jtop+2,ktop-1) + -55125.*utop(itop,jtop+2,ktop) -18375.*utop(itop,jtop+2,ktop+1) +2625.*utop(itop,jtop+2,ktop+2) + -343.*utop(itop+1,jtop-1,ktop-1) +5145.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) + -245.*utop(itop+1,jtop-1,ktop+2) +5145.*utop(itop+1,jtop,ktop-1) -77175.*utop(itop+1,jtop,ktop) + -25725.*utop(itop+1,jtop,ktop+1) +3675.*utop(itop+1,jtop,ktop+2) +1715.*utop(itop+1,jtop+1,ktop-1) + -25725.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +1225.*utop(itop+1,jtop+1,ktop+2) + -245.*utop(itop+1,jtop+2,ktop-1) +3675.*utop(itop+1,jtop+2,ktop) +1225.*utop(itop+1,jtop+2,ktop+1) + -175.*utop(itop+1,jtop+2,ktop+2) )/2097152.; + u(i+1,j+0,k+0) = ( -175.*utop(itop-1,jtop-2,ktop-2) +1225.*utop(itop-1,jtop-2,ktop-1) +3675.*utop(itop-1,jtop-2,ktop) + -245.*utop(itop-1,jtop-2,ktop+1) +1225.*utop(itop-1,jtop-1,ktop-2) -8575.*utop(itop-1,jtop-1,ktop-1) + -25725.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) +3675.*utop(itop-1,jtop,ktop-2) + -25725.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) +5145.*utop(itop-1,jtop,ktop+1) + -245.*utop(itop-1,jtop+1,ktop-2) +1715.*utop(itop-1,jtop+1,ktop-1) +5145.*utop(itop-1,jtop+1,ktop) + -343.*utop(itop-1,jtop+1,ktop+1) +2625.*utop(itop,jtop-2,ktop-2) -18375.*utop(itop,jtop-2,ktop-1) + -55125.*utop(itop,jtop-2,ktop) +3675.*utop(itop,jtop-2,ktop+1) -18375.*utop(itop,jtop-1,ktop-2) + +128625.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) + -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) + -77175.*utop(itop,jtop,ktop+1) +3675.*utop(itop,jtop+1,ktop-2) -25725.*utop(itop,jtop+1,ktop-1) + -77175.*utop(itop,jtop+1,ktop) +5145.*utop(itop,jtop+1,ktop+1) +875.*utop(itop+1,jtop-2,ktop-2) + -6125.*utop(itop+1,jtop-2,ktop-1) -18375.*utop(itop+1,jtop-2,ktop) +1225.*utop(itop+1,jtop-2,ktop+1) + -6125.*utop(itop+1,jtop-1,ktop-2) +42875.*utop(itop+1,jtop-1,ktop-1) +128625.*utop(itop+1,jtop-1,ktop) + -8575.*utop(itop+1,jtop-1,ktop+1) -18375.*utop(itop+1,jtop,ktop-2) +128625.*utop(itop+1,jtop,ktop-1) + +385875.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) +1225.*utop(itop+1,jtop+1,ktop-2) + -8575.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) +1715.*utop(itop+1,jtop+1,ktop+1) + -125.*utop(itop+2,jtop-2,ktop-2) +875.*utop(itop+2,jtop-2,ktop-1) +2625.*utop(itop+2,jtop-2,ktop) + -175.*utop(itop+2,jtop-2,ktop+1) +875.*utop(itop+2,jtop-1,ktop-2) -6125.*utop(itop+2,jtop-1,ktop-1) + -18375.*utop(itop+2,jtop-1,ktop) +1225.*utop(itop+2,jtop-1,ktop+1) +2625.*utop(itop+2,jtop,ktop-2) + -18375.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) +3675.*utop(itop+2,jtop,ktop+1) + -175.*utop(itop+2,jtop+1,ktop-2) +1225.*utop(itop+2,jtop+1,ktop-1) +3675.*utop(itop+2,jtop+1,ktop) + -245.*utop(itop+2,jtop+1,ktop+1) )/2097152.; u(i+1,j+0,k+1) = ( -245.*utop(itop-1,jtop-2,ktop-1) +3675.*utop(itop-1,jtop-2,ktop) +1225.*utop(itop-1,jtop-2,ktop+1) -175.*utop(itop-1,jtop-2,ktop+2) +1715.*utop(itop-1,jtop-1,ktop-1) -25725.*utop(itop-1,jtop-1,ktop) -8575.*utop(itop-1,jtop-1,ktop+1) +1225.*utop(itop-1,jtop-1,ktop+2) +5145.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) +3675.*utop(itop-1,jtop,ktop+2) -343.*utop(itop-1,jtop+1,ktop-1) +5145.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) -245.*utop(itop-1,jtop+1,ktop+2) +3675.*utop(itop,jtop-2,ktop-1) -55125.*utop(itop,jtop-2,ktop) -18375.*utop(itop,jtop-2,ktop+1) +2625.*utop(itop,jtop-2,ktop+2) -25725.*utop(itop,jtop-1,ktop-1) +385875.*utop(itop,jtop-1,ktop) +128625.*utop(itop,jtop-1,ktop+1) -18375.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) +5145.*utop(itop,jtop+1,ktop-1) -77175.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +3675.*utop(itop,jtop+1,ktop+2) +1225.*utop(itop+1,jtop-2,ktop-1) -18375.*utop(itop+1,jtop-2,ktop) -6125.*utop(itop+1,jtop-2,ktop+1) +875.*utop(itop+1,jtop-2,ktop+2) -8575.*utop(itop+1,jtop-1,ktop-1) +128625.*utop(itop+1,jtop-1,ktop) +42875.*utop(itop+1,jtop-1,ktop+1) -6125.*utop(itop+1,jtop-1,ktop+2) -25725.*utop(itop+1,jtop,ktop-1) +385875.*utop(itop+1,jtop,ktop) +128625.*utop(itop+1,jtop,ktop+1) -18375.*utop(itop+1,jtop,ktop+2) +1715.*utop(itop+1,jtop+1,ktop-1) -25725.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +1225.*utop(itop+1,jtop+1,ktop+2) -175.*utop(itop+2,jtop-2,ktop-1) +2625.*utop(itop+2,jtop-2,ktop) +875.*utop(itop+2,jtop-2,ktop+1) -125.*utop(itop+2,jtop-2,ktop+2) +1225.*utop(itop+2,jtop-1,ktop-1) -18375.*utop(itop+2,jtop-1,ktop) -6125.*utop(itop+2,jtop-1,ktop+1) +875.*utop(itop+2,jtop-1,ktop+2) +3675.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) -18375.*utop(itop+2,jtop,ktop+1) +2625.*utop(itop+2,jtop,ktop+2) -245.*utop(itop+2,jtop+1,ktop-1) +3675.*utop(itop+2,jtop+1,ktop) +1225.*utop(itop+2,jtop+1,ktop+1) -175.*utop(itop+2,jtop+1,ktop+2) )/2097152.; u(i+1,j+1,k+0) = ( -245.*utop(itop-1,jtop-1,ktop-2) +1715.*utop(itop-1,jtop-1,ktop-1) +5145.*utop(itop-1,jtop-1,ktop) -343.*utop(itop-1,jtop-1,ktop+1) +3675.*utop(itop-1,jtop,ktop-2) -25725.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) +5145.*utop(itop-1,jtop,ktop+1) +1225.*utop(itop-1,jtop+1,ktop-2) -8575.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) +1715.*utop(itop-1,jtop+1,ktop+1) -175.*utop(itop-1,jtop+2,ktop-2) +1225.*utop(itop-1,jtop+2,ktop-1) +3675.*utop(itop-1,jtop+2,ktop) -245.*utop(itop-1,jtop+2,ktop+1) +3675.*utop(itop,jtop-1,ktop-2) -25725.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) +5145.*utop(itop,jtop-1,ktop+1) -55125.*utop(itop,jtop,ktop-2) +385875.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) -77175.*utop(itop,jtop,ktop+1) -18375.*utop(itop,jtop+1,ktop-2) +128625.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) -25725.*utop(itop,jtop+1,ktop+1) +2625.*utop(itop,jtop+2,ktop-2) -18375.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) +3675.*utop(itop,jtop+2,ktop+1) +1225.*utop(itop+1,jtop-1,ktop-2) -8575.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) +1715.*utop(itop+1,jtop-1,ktop+1) -18375.*utop(itop+1,jtop,ktop-2) +128625.*utop(itop+1,jtop,ktop-1) +385875.*utop(itop+1,jtop,ktop) -25725.*utop(itop+1,jtop,ktop+1) -6125.*utop(itop+1,jtop+1,ktop-2) +42875.*utop(itop+1,jtop+1,ktop-1) +128625.*utop(itop+1,jtop+1,ktop) -8575.*utop(itop+1,jtop+1,ktop+1) +875.*utop(itop+1,jtop+2,ktop-2) -6125.*utop(itop+1,jtop+2,ktop-1) -18375.*utop(itop+1,jtop+2,ktop) +1225.*utop(itop+1,jtop+2,ktop+1) -175.*utop(itop+2,jtop-1,ktop-2) +1225.*utop(itop+2,jtop-1,ktop-1) +3675.*utop(itop+2,jtop-1,ktop) -245.*utop(itop+2,jtop-1,ktop+1) +2625.*utop(itop+2,jtop,ktop-2) -18375.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) +3675.*utop(itop+2,jtop,ktop+1) +875.*utop(itop+2,jtop+1,ktop-2) -6125.*utop(itop+2,jtop+1,ktop-1) -18375.*utop(itop+2,jtop+1,ktop) +1225.*utop(itop+2,jtop+1,ktop+1) -125.*utop(itop+2,jtop+2,ktop-2) +875.*utop(itop+2,jtop+2,ktop-1) +2625.*utop(itop+2,jtop+2,ktop) -175.*utop(itop+2,jtop+2,ktop+1) )/2097152.; u(i+1,j+1,k+1) = ( -343.*utop(itop-1,jtop-1,ktop-1) +5145.*utop(itop-1,jtop-1,ktop) +1715.*utop(itop-1,jtop-1,ktop+1) -245.*utop(itop-1,jtop-1,ktop+2) +5145.*utop(itop-1,jtop,ktop-1) -77175.*utop(itop-1,jtop,ktop) -25725.*utop(itop-1,jtop,ktop+1) +3675.*utop(itop-1,jtop,ktop+2) +1715.*utop(itop-1,jtop+1,ktop-1) -25725.*utop(itop-1,jtop+1,ktop) -8575.*utop(itop-1,jtop+1,ktop+1) +1225.*utop(itop-1,jtop+1,ktop+2) -245.*utop(itop-1,jtop+2,ktop-1) +3675.*utop(itop-1,jtop+2,ktop) +1225.*utop(itop-1,jtop+2,ktop+1) -175.*utop(itop-1,jtop+2,ktop+2) +5145.*utop(itop,jtop-1,ktop-1) -77175.*utop(itop,jtop-1,ktop) -25725.*utop(itop,jtop-1,ktop+1) +3675.*utop(itop,jtop-1,ktop+2) -77175.*utop(itop,jtop,ktop-1) +1157625.*utop(itop,jtop,ktop) +385875.*utop(itop,jtop,ktop+1) -55125.*utop(itop,jtop,ktop+2) -25725.*utop(itop,jtop+1,ktop-1) +385875.*utop(itop,jtop+1,ktop) +128625.*utop(itop,jtop+1,ktop+1) -18375.*utop(itop,jtop+1,ktop+2) +3675.*utop(itop,jtop+2,ktop-1) -55125.*utop(itop,jtop+2,ktop) -18375.*utop(itop,jtop+2,ktop+1) +2625.*utop(itop,jtop+2,ktop+2) +1715.*utop(itop+1,jtop-1,ktop-1) -25725.*utop(itop+1,jtop-1,ktop) -8575.*utop(itop+1,jtop-1,ktop+1) +1225.*utop(itop+1,jtop-1,ktop+2) -25725.*utop(itop+1,jtop,ktop-1) +385875.*utop(itop+1,jtop,ktop) +128625.*utop(itop+1,jtop,ktop+1) -18375.*utop(itop+1,jtop,ktop+2) -8575.*utop(itop+1,jtop+1,ktop-1) +128625.*utop(itop+1,jtop+1,ktop) +42875.*utop(itop+1,jtop+1,ktop+1) -6125.*utop(itop+1,jtop+1,ktop+2) +1225.*utop(itop+1,jtop+2,ktop-1) -18375.*utop(itop+1,jtop+2,ktop) -6125.*utop(itop+1,jtop+2,ktop+1) +875.*utop(itop+1,jtop+2,ktop+2) -245.*utop(itop+2,jtop-1,ktop-1) +3675.*utop(itop+2,jtop-1,ktop) +1225.*utop(itop+2,jtop-1,ktop+1) -175.*utop(itop+2,jtop-1,ktop+2) +3675.*utop(itop+2,jtop,ktop-1) -55125.*utop(itop+2,jtop,ktop) -18375.*utop(itop+2,jtop,ktop+1) +2625.*utop(itop+2,jtop,ktop+2) +1225.*utop(itop+2,jtop+1,ktop-1) -18375.*utop(itop+2,jtop+1,ktop) -6125.*utop(itop+2,jtop+1,ktop+1) +875.*utop(itop+2,jtop+1,ktop+2) -175.*utop(itop+2,jtop+2,ktop-1) +2625.*utop(itop+2,jtop+2,ktop) +875.*utop(itop+2,jtop+2,ktop+1) -125.*utop(itop+2,jtop+2,ktop+2) )/2097152.; */ - - u(i+0,j+0,k+0) = ( -1.060835e-05*utop(itop-2,jtop-2,ktop-2) +9.901123e-05*utop(itop-2,jtop-2,ktop-1) +4.455505e-04*utop(itop-2,jtop-2,ktop) -5.940674e-05*utop(itop-2,jtop-2,ktop+1) +8.250936e-06*utop(itop-2,jtop-2,ktop+2) +9.901123e-05*utop(itop-2,jtop-1,ktop-2) -9.241048e-04*utop(itop-2,jtop-1,ktop-1) -4.158472e-03*utop(itop-2,jtop-1,ktop) +5.544629e-04*utop(itop-2,jtop-1,ktop+1) -7.700874e-05*utop(itop-2,jtop-1,ktop+2) +4.455505e-04*utop(itop-2,jtop,ktop-2) -4.158472e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) +2.495083e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) -5.940674e-05*utop(itop-2,jtop+1,ktop-2) +5.544629e-04*utop(itop-2,jtop+1,ktop-1) +2.495083e-03*utop(itop-2,jtop+1,ktop) -3.326777e-04*utop(itop-2,jtop+1,ktop+1) +4.620524e-05*utop(itop-2,jtop+1,ktop+2) +8.250936e-06*utop(itop-2,jtop+2,ktop-2) -7.700874e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) +4.620524e-05*utop(itop-2,jtop+2,ktop+1) -6.417395e-06*utop(itop-2,jtop+2,ktop+2) +9.901123e-05*utop(itop-1,jtop-2,ktop-2) -9.241048e-04*utop(itop-1,jtop-2,ktop-1) -4.158472e-03*utop(itop-1,jtop-2,ktop) +5.544629e-04*utop(itop-1,jtop-2,ktop+1) -7.700874e-05*utop(itop-1,jtop-2,ktop+2) -9.241048e-04*utop(itop-1,jtop-1,ktop-2) +8.624978e-03*utop(itop-1,jtop-1,ktop-1) +3.881240e-02*utop(itop-1,jtop-1,ktop) -5.174987e-03*utop(itop-1,jtop-1,ktop+1) +7.187482e-04*utop(itop-1,jtop-1,ktop+2) -4.158472e-03*utop(itop-1,jtop,ktop-2) +3.881240e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +3.234367e-03*utop(itop-1,jtop,ktop+2) +5.544629e-04*utop(itop-1,jtop+1,ktop-2) -5.174987e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) +3.104992e-03*utop(itop-1,jtop+1,ktop+1) -4.312489e-04*utop(itop-1,jtop+1,ktop+2) -7.700874e-05*utop(itop-1,jtop+2,ktop-2) +7.187482e-04*utop(itop-1,jtop+2,ktop-1) +3.234367e-03*utop(itop-1,jtop+2,ktop) -4.312489e-04*utop(itop-1,jtop+2,ktop+1) +5.989568e-05*utop(itop-1,jtop+2,ktop+2) +4.455505e-04*utop(itop,jtop-2,ktop-2) -4.158472e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) +2.495083e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -4.158472e-03*utop(itop,jtop-1,ktop-2) +3.881240e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +3.234367e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) +2.495083e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) +1.397246e-02*utop(itop,jtop+1,ktop+1) -1.940620e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +3.234367e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) -1.940620e-03*utop(itop,jtop+2,ktop+1) +2.695306e-04*utop(itop,jtop+2,ktop+2) -5.940674e-05*utop(itop+1,jtop-2,ktop-2) +5.544629e-04*utop(itop+1,jtop-2,ktop-1) +2.495083e-03*utop(itop+1,jtop-2,ktop) -3.326777e-04*utop(itop+1,jtop-2,ktop+1) +4.620524e-05*utop(itop+1,jtop-2,ktop+2) +5.544629e-04*utop(itop+1,jtop-1,ktop-2) -5.174987e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) +3.104992e-03*utop(itop+1,jtop-1,ktop+1) -4.312489e-04*utop(itop+1,jtop-1,ktop+2) +2.495083e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) +1.397246e-02*utop(itop+1,jtop,ktop+1) -1.940620e-03*utop(itop+1,jtop,ktop+2) -3.326777e-04*utop(itop+1,jtop+1,ktop-2) +3.104992e-03*utop(itop+1,jtop+1,ktop-1) +1.397246e-02*utop(itop+1,jtop+1,ktop) -1.862995e-03*utop(itop+1,jtop+1,ktop+1) +2.587494e-04*utop(itop+1,jtop+1,ktop+2) +4.620524e-05*utop(itop+1,jtop+2,ktop-2) -4.312489e-04*utop(itop+1,jtop+2,ktop-1) -1.940620e-03*utop(itop+1,jtop+2,ktop) +2.587494e-04*utop(itop+1,jtop+2,ktop+1) -3.593741e-05*utop(itop+1,jtop+2,ktop+2) +8.250936e-06*utop(itop+2,jtop-2,ktop-2) -7.700874e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) +4.620524e-05*utop(itop+2,jtop-2,ktop+1) -6.417395e-06*utop(itop+2,jtop-2,ktop+2) -7.700874e-05*utop(itop+2,jtop-1,ktop-2) +7.187482e-04*utop(itop+2,jtop-1,ktop-1) +3.234367e-03*utop(itop+2,jtop-1,ktop) -4.312489e-04*utop(itop+2,jtop-1,ktop+1) +5.989568e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +3.234367e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) -1.940620e-03*utop(itop+2,jtop,ktop+1) +2.695306e-04*utop(itop+2,jtop,ktop+2) +4.620524e-05*utop(itop+2,jtop+1,ktop-2) -4.312489e-04*utop(itop+2,jtop+1,ktop-1) -1.940620e-03*utop(itop+2,jtop+1,ktop) +2.587494e-04*utop(itop+2,jtop+1,ktop+1) -3.593741e-05*utop(itop+2,jtop+1,ktop+2) -6.417395e-06*utop(itop+2,jtop+2,ktop-2) +5.989568e-05*utop(itop+2,jtop+2,ktop-1) +2.695306e-04*utop(itop+2,jtop+2,ktop) -3.593741e-05*utop(itop+2,jtop+2,ktop+1) +4.991307e-06*utop(itop+2,jtop+2,ktop+2)); - u(i+0,j+0,k+1) = ( +8.250936e-06*utop(itop-2,jtop-2,ktop-2) -5.940674e-05*utop(itop-2,jtop-2,ktop-1) +4.455505e-04*utop(itop-2,jtop-2,ktop) +9.901123e-05*utop(itop-2,jtop-2,ktop+1) -1.060835e-05*utop(itop-2,jtop-2,ktop+2) -7.700874e-05*utop(itop-2,jtop-1,ktop-2) +5.544629e-04*utop(itop-2,jtop-1,ktop-1) -4.158472e-03*utop(itop-2,jtop-1,ktop) -9.241048e-04*utop(itop-2,jtop-1,ktop+1) +9.901123e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +2.495083e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) -4.158472e-03*utop(itop-2,jtop,ktop+1) +4.455505e-04*utop(itop-2,jtop,ktop+2) +4.620524e-05*utop(itop-2,jtop+1,ktop-2) -3.326777e-04*utop(itop-2,jtop+1,ktop-1) +2.495083e-03*utop(itop-2,jtop+1,ktop) +5.544629e-04*utop(itop-2,jtop+1,ktop+1) -5.940674e-05*utop(itop-2,jtop+1,ktop+2) -6.417395e-06*utop(itop-2,jtop+2,ktop-2) +4.620524e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) -7.700874e-05*utop(itop-2,jtop+2,ktop+1) +8.250936e-06*utop(itop-2,jtop+2,ktop+2) -7.700874e-05*utop(itop-1,jtop-2,ktop-2) +5.544629e-04*utop(itop-1,jtop-2,ktop-1) -4.158472e-03*utop(itop-1,jtop-2,ktop) -9.241048e-04*utop(itop-1,jtop-2,ktop+1) +9.901123e-05*utop(itop-1,jtop-2,ktop+2) +7.187482e-04*utop(itop-1,jtop-1,ktop-2) -5.174987e-03*utop(itop-1,jtop-1,ktop-1) +3.881240e-02*utop(itop-1,jtop-1,ktop) +8.624978e-03*utop(itop-1,jtop-1,ktop+1) -9.241048e-04*utop(itop-1,jtop-1,ktop+2) +3.234367e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) +3.881240e-02*utop(itop-1,jtop,ktop+1) -4.158472e-03*utop(itop-1,jtop,ktop+2) -4.312489e-04*utop(itop-1,jtop+1,ktop-2) +3.104992e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) -5.174987e-03*utop(itop-1,jtop+1,ktop+1) +5.544629e-04*utop(itop-1,jtop+1,ktop+2) +5.989568e-05*utop(itop-1,jtop+2,ktop-2) -4.312489e-04*utop(itop-1,jtop+2,ktop-1) +3.234367e-03*utop(itop-1,jtop+2,ktop) +7.187482e-04*utop(itop-1,jtop+2,ktop+1) -7.700874e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +2.495083e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) -4.158472e-03*utop(itop,jtop-2,ktop+1) +4.455505e-04*utop(itop,jtop-2,ktop+2) +3.234367e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) +3.881240e-02*utop(itop,jtop-1,ktop+1) -4.158472e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) -1.940620e-03*utop(itop,jtop+1,ktop-2) +1.397246e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +2.495083e-03*utop(itop,jtop+1,ktop+2) +2.695306e-04*utop(itop,jtop+2,ktop-2) -1.940620e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) +3.234367e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) +4.620524e-05*utop(itop+1,jtop-2,ktop-2) -3.326777e-04*utop(itop+1,jtop-2,ktop-1) +2.495083e-03*utop(itop+1,jtop-2,ktop) +5.544629e-04*utop(itop+1,jtop-2,ktop+1) -5.940674e-05*utop(itop+1,jtop-2,ktop+2) -4.312489e-04*utop(itop+1,jtop-1,ktop-2) +3.104992e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) -5.174987e-03*utop(itop+1,jtop-1,ktop+1) +5.544629e-04*utop(itop+1,jtop-1,ktop+2) -1.940620e-03*utop(itop+1,jtop,ktop-2) +1.397246e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +2.495083e-03*utop(itop+1,jtop,ktop+2) +2.587494e-04*utop(itop+1,jtop+1,ktop-2) -1.862995e-03*utop(itop+1,jtop+1,ktop-1) +1.397246e-02*utop(itop+1,jtop+1,ktop) +3.104992e-03*utop(itop+1,jtop+1,ktop+1) -3.326777e-04*utop(itop+1,jtop+1,ktop+2) -3.593741e-05*utop(itop+1,jtop+2,ktop-2) +2.587494e-04*utop(itop+1,jtop+2,ktop-1) -1.940620e-03*utop(itop+1,jtop+2,ktop) -4.312489e-04*utop(itop+1,jtop+2,ktop+1) +4.620524e-05*utop(itop+1,jtop+2,ktop+2) -6.417395e-06*utop(itop+2,jtop-2,ktop-2) +4.620524e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) -7.700874e-05*utop(itop+2,jtop-2,ktop+1) +8.250936e-06*utop(itop+2,jtop-2,ktop+2) +5.989568e-05*utop(itop+2,jtop-1,ktop-2) -4.312489e-04*utop(itop+2,jtop-1,ktop-1) +3.234367e-03*utop(itop+2,jtop-1,ktop) +7.187482e-04*utop(itop+2,jtop-1,ktop+1) -7.700874e-05*utop(itop+2,jtop-1,ktop+2) +2.695306e-04*utop(itop+2,jtop,ktop-2) -1.940620e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) +3.234367e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) -3.593741e-05*utop(itop+2,jtop+1,ktop-2) +2.587494e-04*utop(itop+2,jtop+1,ktop-1) -1.940620e-03*utop(itop+2,jtop+1,ktop) -4.312489e-04*utop(itop+2,jtop+1,ktop+1) +4.620524e-05*utop(itop+2,jtop+1,ktop+2) +4.991307e-06*utop(itop+2,jtop+2,ktop-2) -3.593741e-05*utop(itop+2,jtop+2,ktop-1) +2.695306e-04*utop(itop+2,jtop+2,ktop) +5.989568e-05*utop(itop+2,jtop+2,ktop+1) -6.417395e-06*utop(itop+2,jtop+2,ktop+2)); - u(i+0,j+1,k+0) = ( +8.250936e-06*utop(itop-2,jtop-2,ktop-2) -7.700874e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) +4.620524e-05*utop(itop-2,jtop-2,ktop+1) -6.417395e-06*utop(itop-2,jtop-2,ktop+2) -5.940674e-05*utop(itop-2,jtop-1,ktop-2) +5.544629e-04*utop(itop-2,jtop-1,ktop-1) +2.495083e-03*utop(itop-2,jtop-1,ktop) -3.326777e-04*utop(itop-2,jtop-1,ktop+1) +4.620524e-05*utop(itop-2,jtop-1,ktop+2) +4.455505e-04*utop(itop-2,jtop,ktop-2) -4.158472e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) +2.495083e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) +9.901123e-05*utop(itop-2,jtop+1,ktop-2) -9.241048e-04*utop(itop-2,jtop+1,ktop-1) -4.158472e-03*utop(itop-2,jtop+1,ktop) +5.544629e-04*utop(itop-2,jtop+1,ktop+1) -7.700874e-05*utop(itop-2,jtop+1,ktop+2) -1.060835e-05*utop(itop-2,jtop+2,ktop-2) +9.901123e-05*utop(itop-2,jtop+2,ktop-1) +4.455505e-04*utop(itop-2,jtop+2,ktop) -5.940674e-05*utop(itop-2,jtop+2,ktop+1) +8.250936e-06*utop(itop-2,jtop+2,ktop+2) -7.700874e-05*utop(itop-1,jtop-2,ktop-2) +7.187482e-04*utop(itop-1,jtop-2,ktop-1) +3.234367e-03*utop(itop-1,jtop-2,ktop) -4.312489e-04*utop(itop-1,jtop-2,ktop+1) +5.989568e-05*utop(itop-1,jtop-2,ktop+2) +5.544629e-04*utop(itop-1,jtop-1,ktop-2) -5.174987e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) +3.104992e-03*utop(itop-1,jtop-1,ktop+1) -4.312489e-04*utop(itop-1,jtop-1,ktop+2) -4.158472e-03*utop(itop-1,jtop,ktop-2) +3.881240e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +3.234367e-03*utop(itop-1,jtop,ktop+2) -9.241048e-04*utop(itop-1,jtop+1,ktop-2) +8.624978e-03*utop(itop-1,jtop+1,ktop-1) +3.881240e-02*utop(itop-1,jtop+1,ktop) -5.174987e-03*utop(itop-1,jtop+1,ktop+1) +7.187482e-04*utop(itop-1,jtop+1,ktop+2) +9.901123e-05*utop(itop-1,jtop+2,ktop-2) -9.241048e-04*utop(itop-1,jtop+2,ktop-1) -4.158472e-03*utop(itop-1,jtop+2,ktop) +5.544629e-04*utop(itop-1,jtop+2,ktop+1) -7.700874e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +3.234367e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) -1.940620e-03*utop(itop,jtop-2,ktop+1) +2.695306e-04*utop(itop,jtop-2,ktop+2) +2.495083e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) +1.397246e-02*utop(itop,jtop-1,ktop+1) -1.940620e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) -4.158472e-03*utop(itop,jtop+1,ktop-2) +3.881240e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +3.234367e-03*utop(itop,jtop+1,ktop+2) +4.455505e-04*utop(itop,jtop+2,ktop-2) -4.158472e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) +2.495083e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) +4.620524e-05*utop(itop+1,jtop-2,ktop-2) -4.312489e-04*utop(itop+1,jtop-2,ktop-1) -1.940620e-03*utop(itop+1,jtop-2,ktop) +2.587494e-04*utop(itop+1,jtop-2,ktop+1) -3.593741e-05*utop(itop+1,jtop-2,ktop+2) -3.326777e-04*utop(itop+1,jtop-1,ktop-2) +3.104992e-03*utop(itop+1,jtop-1,ktop-1) +1.397246e-02*utop(itop+1,jtop-1,ktop) -1.862995e-03*utop(itop+1,jtop-1,ktop+1) +2.587494e-04*utop(itop+1,jtop-1,ktop+2) +2.495083e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) +1.397246e-02*utop(itop+1,jtop,ktop+1) -1.940620e-03*utop(itop+1,jtop,ktop+2) +5.544629e-04*utop(itop+1,jtop+1,ktop-2) -5.174987e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) +3.104992e-03*utop(itop+1,jtop+1,ktop+1) -4.312489e-04*utop(itop+1,jtop+1,ktop+2) -5.940674e-05*utop(itop+1,jtop+2,ktop-2) +5.544629e-04*utop(itop+1,jtop+2,ktop-1) +2.495083e-03*utop(itop+1,jtop+2,ktop) -3.326777e-04*utop(itop+1,jtop+2,ktop+1) +4.620524e-05*utop(itop+1,jtop+2,ktop+2) -6.417395e-06*utop(itop+2,jtop-2,ktop-2) +5.989568e-05*utop(itop+2,jtop-2,ktop-1) +2.695306e-04*utop(itop+2,jtop-2,ktop) -3.593741e-05*utop(itop+2,jtop-2,ktop+1) +4.991307e-06*utop(itop+2,jtop-2,ktop+2) +4.620524e-05*utop(itop+2,jtop-1,ktop-2) -4.312489e-04*utop(itop+2,jtop-1,ktop-1) -1.940620e-03*utop(itop+2,jtop-1,ktop) +2.587494e-04*utop(itop+2,jtop-1,ktop+1) -3.593741e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +3.234367e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) -1.940620e-03*utop(itop+2,jtop,ktop+1) +2.695306e-04*utop(itop+2,jtop,ktop+2) -7.700874e-05*utop(itop+2,jtop+1,ktop-2) +7.187482e-04*utop(itop+2,jtop+1,ktop-1) +3.234367e-03*utop(itop+2,jtop+1,ktop) -4.312489e-04*utop(itop+2,jtop+1,ktop+1) +5.989568e-05*utop(itop+2,jtop+1,ktop+2) +8.250936e-06*utop(itop+2,jtop+2,ktop-2) -7.700874e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) +4.620524e-05*utop(itop+2,jtop+2,ktop+1) -6.417395e-06*utop(itop+2,jtop+2,ktop+2)); - u(i+0,j+1,k+1) = ( -6.417395e-06*utop(itop-2,jtop-2,ktop-2) +4.620524e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) -7.700874e-05*utop(itop-2,jtop-2,ktop+1) +8.250936e-06*utop(itop-2,jtop-2,ktop+2) +4.620524e-05*utop(itop-2,jtop-1,ktop-2) -3.326777e-04*utop(itop-2,jtop-1,ktop-1) +2.495083e-03*utop(itop-2,jtop-1,ktop) +5.544629e-04*utop(itop-2,jtop-1,ktop+1) -5.940674e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +2.495083e-03*utop(itop-2,jtop,ktop-1) -1.871312e-02*utop(itop-2,jtop,ktop) -4.158472e-03*utop(itop-2,jtop,ktop+1) +4.455505e-04*utop(itop-2,jtop,ktop+2) -7.700874e-05*utop(itop-2,jtop+1,ktop-2) +5.544629e-04*utop(itop-2,jtop+1,ktop-1) -4.158472e-03*utop(itop-2,jtop+1,ktop) -9.241048e-04*utop(itop-2,jtop+1,ktop+1) +9.901123e-05*utop(itop-2,jtop+1,ktop+2) +8.250936e-06*utop(itop-2,jtop+2,ktop-2) -5.940674e-05*utop(itop-2,jtop+2,ktop-1) +4.455505e-04*utop(itop-2,jtop+2,ktop) +9.901123e-05*utop(itop-2,jtop+2,ktop+1) -1.060835e-05*utop(itop-2,jtop+2,ktop+2) +5.989568e-05*utop(itop-1,jtop-2,ktop-2) -4.312489e-04*utop(itop-1,jtop-2,ktop-1) +3.234367e-03*utop(itop-1,jtop-2,ktop) +7.187482e-04*utop(itop-1,jtop-2,ktop+1) -7.700874e-05*utop(itop-1,jtop-2,ktop+2) -4.312489e-04*utop(itop-1,jtop-1,ktop-2) +3.104992e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) -5.174987e-03*utop(itop-1,jtop-1,ktop+1) +5.544629e-04*utop(itop-1,jtop-1,ktop+2) +3.234367e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) +1.746558e-01*utop(itop-1,jtop,ktop) +3.881240e-02*utop(itop-1,jtop,ktop+1) -4.158472e-03*utop(itop-1,jtop,ktop+2) +7.187482e-04*utop(itop-1,jtop+1,ktop-2) -5.174987e-03*utop(itop-1,jtop+1,ktop-1) +3.881240e-02*utop(itop-1,jtop+1,ktop) +8.624978e-03*utop(itop-1,jtop+1,ktop+1) -9.241048e-04*utop(itop-1,jtop+1,ktop+2) -7.700874e-05*utop(itop-1,jtop+2,ktop-2) +5.544629e-04*utop(itop-1,jtop+2,ktop-1) -4.158472e-03*utop(itop-1,jtop+2,ktop) -9.241048e-04*utop(itop-1,jtop+2,ktop+1) +9.901123e-05*utop(itop-1,jtop+2,ktop+2) +2.695306e-04*utop(itop,jtop-2,ktop-2) -1.940620e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) +3.234367e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -1.940620e-03*utop(itop,jtop-1,ktop-2) +1.397246e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +2.495083e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) +3.234367e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) +3.881240e-02*utop(itop,jtop+1,ktop+1) -4.158472e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +2.495083e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) -4.158472e-03*utop(itop,jtop+2,ktop+1) +4.455505e-04*utop(itop,jtop+2,ktop+2) -3.593741e-05*utop(itop+1,jtop-2,ktop-2) +2.587494e-04*utop(itop+1,jtop-2,ktop-1) -1.940620e-03*utop(itop+1,jtop-2,ktop) -4.312489e-04*utop(itop+1,jtop-2,ktop+1) +4.620524e-05*utop(itop+1,jtop-2,ktop+2) +2.587494e-04*utop(itop+1,jtop-1,ktop-2) -1.862995e-03*utop(itop+1,jtop-1,ktop-1) +1.397246e-02*utop(itop+1,jtop-1,ktop) +3.104992e-03*utop(itop+1,jtop-1,ktop+1) -3.326777e-04*utop(itop+1,jtop-1,ktop+2) -1.940620e-03*utop(itop+1,jtop,ktop-2) +1.397246e-02*utop(itop+1,jtop,ktop-1) -1.047935e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +2.495083e-03*utop(itop+1,jtop,ktop+2) -4.312489e-04*utop(itop+1,jtop+1,ktop-2) +3.104992e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) -5.174987e-03*utop(itop+1,jtop+1,ktop+1) +5.544629e-04*utop(itop+1,jtop+1,ktop+2) +4.620524e-05*utop(itop+1,jtop+2,ktop-2) -3.326777e-04*utop(itop+1,jtop+2,ktop-1) +2.495083e-03*utop(itop+1,jtop+2,ktop) +5.544629e-04*utop(itop+1,jtop+2,ktop+1) -5.940674e-05*utop(itop+1,jtop+2,ktop+2) +4.991307e-06*utop(itop+2,jtop-2,ktop-2) -3.593741e-05*utop(itop+2,jtop-2,ktop-1) +2.695306e-04*utop(itop+2,jtop-2,ktop) +5.989568e-05*utop(itop+2,jtop-2,ktop+1) -6.417395e-06*utop(itop+2,jtop-2,ktop+2) -3.593741e-05*utop(itop+2,jtop-1,ktop-2) +2.587494e-04*utop(itop+2,jtop-1,ktop-1) -1.940620e-03*utop(itop+2,jtop-1,ktop) -4.312489e-04*utop(itop+2,jtop-1,ktop+1) +4.620524e-05*utop(itop+2,jtop-1,ktop+2) +2.695306e-04*utop(itop+2,jtop,ktop-2) -1.940620e-03*utop(itop+2,jtop,ktop-1) +1.455465e-02*utop(itop+2,jtop,ktop) +3.234367e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) +5.989568e-05*utop(itop+2,jtop+1,ktop-2) -4.312489e-04*utop(itop+2,jtop+1,ktop-1) +3.234367e-03*utop(itop+2,jtop+1,ktop) +7.187482e-04*utop(itop+2,jtop+1,ktop+1) -7.700874e-05*utop(itop+2,jtop+1,ktop+2) -6.417395e-06*utop(itop+2,jtop+2,ktop-2) +4.620524e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) -7.700874e-05*utop(itop+2,jtop+2,ktop+1) +8.250936e-06*utop(itop+2,jtop+2,ktop+2)); - u(i+1,j+0,k+0) = ( +8.250936e-06*utop(itop-2,jtop-2,ktop-2) -7.700874e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) +4.620524e-05*utop(itop-2,jtop-2,ktop+1) -6.417395e-06*utop(itop-2,jtop-2,ktop+2) -7.700874e-05*utop(itop-2,jtop-1,ktop-2) +7.187482e-04*utop(itop-2,jtop-1,ktop-1) +3.234367e-03*utop(itop-2,jtop-1,ktop) -4.312489e-04*utop(itop-2,jtop-1,ktop+1) +5.989568e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +3.234367e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) -1.940620e-03*utop(itop-2,jtop,ktop+1) +2.695306e-04*utop(itop-2,jtop,ktop+2) +4.620524e-05*utop(itop-2,jtop+1,ktop-2) -4.312489e-04*utop(itop-2,jtop+1,ktop-1) -1.940620e-03*utop(itop-2,jtop+1,ktop) +2.587494e-04*utop(itop-2,jtop+1,ktop+1) -3.593741e-05*utop(itop-2,jtop+1,ktop+2) -6.417395e-06*utop(itop-2,jtop+2,ktop-2) +5.989568e-05*utop(itop-2,jtop+2,ktop-1) +2.695306e-04*utop(itop-2,jtop+2,ktop) -3.593741e-05*utop(itop-2,jtop+2,ktop+1) +4.991307e-06*utop(itop-2,jtop+2,ktop+2) -5.940674e-05*utop(itop-1,jtop-2,ktop-2) +5.544629e-04*utop(itop-1,jtop-2,ktop-1) +2.495083e-03*utop(itop-1,jtop-2,ktop) -3.326777e-04*utop(itop-1,jtop-2,ktop+1) +4.620524e-05*utop(itop-1,jtop-2,ktop+2) +5.544629e-04*utop(itop-1,jtop-1,ktop-2) -5.174987e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) +3.104992e-03*utop(itop-1,jtop-1,ktop+1) -4.312489e-04*utop(itop-1,jtop-1,ktop+2) +2.495083e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) +1.397246e-02*utop(itop-1,jtop,ktop+1) -1.940620e-03*utop(itop-1,jtop,ktop+2) -3.326777e-04*utop(itop-1,jtop+1,ktop-2) +3.104992e-03*utop(itop-1,jtop+1,ktop-1) +1.397246e-02*utop(itop-1,jtop+1,ktop) -1.862995e-03*utop(itop-1,jtop+1,ktop+1) +2.587494e-04*utop(itop-1,jtop+1,ktop+2) +4.620524e-05*utop(itop-1,jtop+2,ktop-2) -4.312489e-04*utop(itop-1,jtop+2,ktop-1) -1.940620e-03*utop(itop-1,jtop+2,ktop) +2.587494e-04*utop(itop-1,jtop+2,ktop+1) -3.593741e-05*utop(itop-1,jtop+2,ktop+2) +4.455505e-04*utop(itop,jtop-2,ktop-2) -4.158472e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) +2.495083e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -4.158472e-03*utop(itop,jtop-1,ktop-2) +3.881240e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +3.234367e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) +2.495083e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) +1.397246e-02*utop(itop,jtop+1,ktop+1) -1.940620e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +3.234367e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) -1.940620e-03*utop(itop,jtop+2,ktop+1) +2.695306e-04*utop(itop,jtop+2,ktop+2) +9.901123e-05*utop(itop+1,jtop-2,ktop-2) -9.241048e-04*utop(itop+1,jtop-2,ktop-1) -4.158472e-03*utop(itop+1,jtop-2,ktop) +5.544629e-04*utop(itop+1,jtop-2,ktop+1) -7.700874e-05*utop(itop+1,jtop-2,ktop+2) -9.241048e-04*utop(itop+1,jtop-1,ktop-2) +8.624978e-03*utop(itop+1,jtop-1,ktop-1) +3.881240e-02*utop(itop+1,jtop-1,ktop) -5.174987e-03*utop(itop+1,jtop-1,ktop+1) +7.187482e-04*utop(itop+1,jtop-1,ktop+2) -4.158472e-03*utop(itop+1,jtop,ktop-2) +3.881240e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +3.234367e-03*utop(itop+1,jtop,ktop+2) +5.544629e-04*utop(itop+1,jtop+1,ktop-2) -5.174987e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) +3.104992e-03*utop(itop+1,jtop+1,ktop+1) -4.312489e-04*utop(itop+1,jtop+1,ktop+2) -7.700874e-05*utop(itop+1,jtop+2,ktop-2) +7.187482e-04*utop(itop+1,jtop+2,ktop-1) +3.234367e-03*utop(itop+1,jtop+2,ktop) -4.312489e-04*utop(itop+1,jtop+2,ktop+1) +5.989568e-05*utop(itop+1,jtop+2,ktop+2) -1.060835e-05*utop(itop+2,jtop-2,ktop-2) +9.901123e-05*utop(itop+2,jtop-2,ktop-1) +4.455505e-04*utop(itop+2,jtop-2,ktop) -5.940674e-05*utop(itop+2,jtop-2,ktop+1) +8.250936e-06*utop(itop+2,jtop-2,ktop+2) +9.901123e-05*utop(itop+2,jtop-1,ktop-2) -9.241048e-04*utop(itop+2,jtop-1,ktop-1) -4.158472e-03*utop(itop+2,jtop-1,ktop) +5.544629e-04*utop(itop+2,jtop-1,ktop+1) -7.700874e-05*utop(itop+2,jtop-1,ktop+2) +4.455505e-04*utop(itop+2,jtop,ktop-2) -4.158472e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) +2.495083e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) -5.940674e-05*utop(itop+2,jtop+1,ktop-2) +5.544629e-04*utop(itop+2,jtop+1,ktop-1) +2.495083e-03*utop(itop+2,jtop+1,ktop) -3.326777e-04*utop(itop+2,jtop+1,ktop+1) +4.620524e-05*utop(itop+2,jtop+1,ktop+2) +8.250936e-06*utop(itop+2,jtop+2,ktop-2) -7.700874e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) +4.620524e-05*utop(itop+2,jtop+2,ktop+1) -6.417395e-06*utop(itop+2,jtop+2,ktop+2)); - u(i+1,j+0,k+1) = ( -6.417395e-06*utop(itop-2,jtop-2,ktop-2) +4.620524e-05*utop(itop-2,jtop-2,ktop-1) -3.465393e-04*utop(itop-2,jtop-2,ktop) -7.700874e-05*utop(itop-2,jtop-2,ktop+1) +8.250936e-06*utop(itop-2,jtop-2,ktop+2) +5.989568e-05*utop(itop-2,jtop-1,ktop-2) -4.312489e-04*utop(itop-2,jtop-1,ktop-1) +3.234367e-03*utop(itop-2,jtop-1,ktop) +7.187482e-04*utop(itop-2,jtop-1,ktop+1) -7.700874e-05*utop(itop-2,jtop-1,ktop+2) +2.695306e-04*utop(itop-2,jtop,ktop-2) -1.940620e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) +3.234367e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) -3.593741e-05*utop(itop-2,jtop+1,ktop-2) +2.587494e-04*utop(itop-2,jtop+1,ktop-1) -1.940620e-03*utop(itop-2,jtop+1,ktop) -4.312489e-04*utop(itop-2,jtop+1,ktop+1) +4.620524e-05*utop(itop-2,jtop+1,ktop+2) +4.991307e-06*utop(itop-2,jtop+2,ktop-2) -3.593741e-05*utop(itop-2,jtop+2,ktop-1) +2.695306e-04*utop(itop-2,jtop+2,ktop) +5.989568e-05*utop(itop-2,jtop+2,ktop+1) -6.417395e-06*utop(itop-2,jtop+2,ktop+2) +4.620524e-05*utop(itop-1,jtop-2,ktop-2) -3.326777e-04*utop(itop-1,jtop-2,ktop-1) +2.495083e-03*utop(itop-1,jtop-2,ktop) +5.544629e-04*utop(itop-1,jtop-2,ktop+1) -5.940674e-05*utop(itop-1,jtop-2,ktop+2) -4.312489e-04*utop(itop-1,jtop-1,ktop-2) +3.104992e-03*utop(itop-1,jtop-1,ktop-1) -2.328744e-02*utop(itop-1,jtop-1,ktop) -5.174987e-03*utop(itop-1,jtop-1,ktop+1) +5.544629e-04*utop(itop-1,jtop-1,ktop+2) -1.940620e-03*utop(itop-1,jtop,ktop-2) +1.397246e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +2.495083e-03*utop(itop-1,jtop,ktop+2) +2.587494e-04*utop(itop-1,jtop+1,ktop-2) -1.862995e-03*utop(itop-1,jtop+1,ktop-1) +1.397246e-02*utop(itop-1,jtop+1,ktop) +3.104992e-03*utop(itop-1,jtop+1,ktop+1) -3.326777e-04*utop(itop-1,jtop+1,ktop+2) -3.593741e-05*utop(itop-1,jtop+2,ktop-2) +2.587494e-04*utop(itop-1,jtop+2,ktop-1) -1.940620e-03*utop(itop-1,jtop+2,ktop) -4.312489e-04*utop(itop-1,jtop+2,ktop+1) +4.620524e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +2.495083e-03*utop(itop,jtop-2,ktop-1) -1.871312e-02*utop(itop,jtop-2,ktop) -4.158472e-03*utop(itop,jtop-2,ktop+1) +4.455505e-04*utop(itop,jtop-2,ktop+2) +3.234367e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) +1.746558e-01*utop(itop,jtop-1,ktop) +3.881240e-02*utop(itop,jtop-1,ktop+1) -4.158472e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) -1.940620e-03*utop(itop,jtop+1,ktop-2) +1.397246e-02*utop(itop,jtop+1,ktop-1) -1.047935e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +2.495083e-03*utop(itop,jtop+1,ktop+2) +2.695306e-04*utop(itop,jtop+2,ktop-2) -1.940620e-03*utop(itop,jtop+2,ktop-1) +1.455465e-02*utop(itop,jtop+2,ktop) +3.234367e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) -7.700874e-05*utop(itop+1,jtop-2,ktop-2) +5.544629e-04*utop(itop+1,jtop-2,ktop-1) -4.158472e-03*utop(itop+1,jtop-2,ktop) -9.241048e-04*utop(itop+1,jtop-2,ktop+1) +9.901123e-05*utop(itop+1,jtop-2,ktop+2) +7.187482e-04*utop(itop+1,jtop-1,ktop-2) -5.174987e-03*utop(itop+1,jtop-1,ktop-1) +3.881240e-02*utop(itop+1,jtop-1,ktop) +8.624978e-03*utop(itop+1,jtop-1,ktop+1) -9.241048e-04*utop(itop+1,jtop-1,ktop+2) +3.234367e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) +3.881240e-02*utop(itop+1,jtop,ktop+1) -4.158472e-03*utop(itop+1,jtop,ktop+2) -4.312489e-04*utop(itop+1,jtop+1,ktop-2) +3.104992e-03*utop(itop+1,jtop+1,ktop-1) -2.328744e-02*utop(itop+1,jtop+1,ktop) -5.174987e-03*utop(itop+1,jtop+1,ktop+1) +5.544629e-04*utop(itop+1,jtop+1,ktop+2) +5.989568e-05*utop(itop+1,jtop+2,ktop-2) -4.312489e-04*utop(itop+1,jtop+2,ktop-1) +3.234367e-03*utop(itop+1,jtop+2,ktop) +7.187482e-04*utop(itop+1,jtop+2,ktop+1) -7.700874e-05*utop(itop+1,jtop+2,ktop+2) +8.250936e-06*utop(itop+2,jtop-2,ktop-2) -5.940674e-05*utop(itop+2,jtop-2,ktop-1) +4.455505e-04*utop(itop+2,jtop-2,ktop) +9.901123e-05*utop(itop+2,jtop-2,ktop+1) -1.060835e-05*utop(itop+2,jtop-2,ktop+2) -7.700874e-05*utop(itop+2,jtop-1,ktop-2) +5.544629e-04*utop(itop+2,jtop-1,ktop-1) -4.158472e-03*utop(itop+2,jtop-1,ktop) -9.241048e-04*utop(itop+2,jtop-1,ktop+1) +9.901123e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +2.495083e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) -4.158472e-03*utop(itop+2,jtop,ktop+1) +4.455505e-04*utop(itop+2,jtop,ktop+2) +4.620524e-05*utop(itop+2,jtop+1,ktop-2) -3.326777e-04*utop(itop+2,jtop+1,ktop-1) +2.495083e-03*utop(itop+2,jtop+1,ktop) +5.544629e-04*utop(itop+2,jtop+1,ktop+1) -5.940674e-05*utop(itop+2,jtop+1,ktop+2) -6.417395e-06*utop(itop+2,jtop+2,ktop-2) +4.620524e-05*utop(itop+2,jtop+2,ktop-1) -3.465393e-04*utop(itop+2,jtop+2,ktop) -7.700874e-05*utop(itop+2,jtop+2,ktop+1) +8.250936e-06*utop(itop+2,jtop+2,ktop+2)); - u(i+1,j+1,k+0) = ( -6.417395e-06*utop(itop-2,jtop-2,ktop-2) +5.989568e-05*utop(itop-2,jtop-2,ktop-1) +2.695306e-04*utop(itop-2,jtop-2,ktop) -3.593741e-05*utop(itop-2,jtop-2,ktop+1) +4.991307e-06*utop(itop-2,jtop-2,ktop+2) +4.620524e-05*utop(itop-2,jtop-1,ktop-2) -4.312489e-04*utop(itop-2,jtop-1,ktop-1) -1.940620e-03*utop(itop-2,jtop-1,ktop) +2.587494e-04*utop(itop-2,jtop-1,ktop+1) -3.593741e-05*utop(itop-2,jtop-1,ktop+2) -3.465393e-04*utop(itop-2,jtop,ktop-2) +3.234367e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) -1.940620e-03*utop(itop-2,jtop,ktop+1) +2.695306e-04*utop(itop-2,jtop,ktop+2) -7.700874e-05*utop(itop-2,jtop+1,ktop-2) +7.187482e-04*utop(itop-2,jtop+1,ktop-1) +3.234367e-03*utop(itop-2,jtop+1,ktop) -4.312489e-04*utop(itop-2,jtop+1,ktop+1) +5.989568e-05*utop(itop-2,jtop+1,ktop+2) +8.250936e-06*utop(itop-2,jtop+2,ktop-2) -7.700874e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) +4.620524e-05*utop(itop-2,jtop+2,ktop+1) -6.417395e-06*utop(itop-2,jtop+2,ktop+2) +4.620524e-05*utop(itop-1,jtop-2,ktop-2) -4.312489e-04*utop(itop-1,jtop-2,ktop-1) -1.940620e-03*utop(itop-1,jtop-2,ktop) +2.587494e-04*utop(itop-1,jtop-2,ktop+1) -3.593741e-05*utop(itop-1,jtop-2,ktop+2) -3.326777e-04*utop(itop-1,jtop-1,ktop-2) +3.104992e-03*utop(itop-1,jtop-1,ktop-1) +1.397246e-02*utop(itop-1,jtop-1,ktop) -1.862995e-03*utop(itop-1,jtop-1,ktop+1) +2.587494e-04*utop(itop-1,jtop-1,ktop+2) +2.495083e-03*utop(itop-1,jtop,ktop-2) -2.328744e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) +1.397246e-02*utop(itop-1,jtop,ktop+1) -1.940620e-03*utop(itop-1,jtop,ktop+2) +5.544629e-04*utop(itop-1,jtop+1,ktop-2) -5.174987e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) +3.104992e-03*utop(itop-1,jtop+1,ktop+1) -4.312489e-04*utop(itop-1,jtop+1,ktop+2) -5.940674e-05*utop(itop-1,jtop+2,ktop-2) +5.544629e-04*utop(itop-1,jtop+2,ktop-1) +2.495083e-03*utop(itop-1,jtop+2,ktop) -3.326777e-04*utop(itop-1,jtop+2,ktop+1) +4.620524e-05*utop(itop-1,jtop+2,ktop+2) -3.465393e-04*utop(itop,jtop-2,ktop-2) +3.234367e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) -1.940620e-03*utop(itop,jtop-2,ktop+1) +2.695306e-04*utop(itop,jtop-2,ktop+2) +2.495083e-03*utop(itop,jtop-1,ktop-2) -2.328744e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) +1.397246e-02*utop(itop,jtop-1,ktop+1) -1.940620e-03*utop(itop,jtop-1,ktop+2) -1.871312e-02*utop(itop,jtop,ktop-2) +1.746558e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) -1.047935e-01*utop(itop,jtop,ktop+1) +1.455465e-02*utop(itop,jtop,ktop+2) -4.158472e-03*utop(itop,jtop+1,ktop-2) +3.881240e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) -2.328744e-02*utop(itop,jtop+1,ktop+1) +3.234367e-03*utop(itop,jtop+1,ktop+2) +4.455505e-04*utop(itop,jtop+2,ktop-2) -4.158472e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) +2.495083e-03*utop(itop,jtop+2,ktop+1) -3.465393e-04*utop(itop,jtop+2,ktop+2) -7.700874e-05*utop(itop+1,jtop-2,ktop-2) +7.187482e-04*utop(itop+1,jtop-2,ktop-1) +3.234367e-03*utop(itop+1,jtop-2,ktop) -4.312489e-04*utop(itop+1,jtop-2,ktop+1) +5.989568e-05*utop(itop+1,jtop-2,ktop+2) +5.544629e-04*utop(itop+1,jtop-1,ktop-2) -5.174987e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) +3.104992e-03*utop(itop+1,jtop-1,ktop+1) -4.312489e-04*utop(itop+1,jtop-1,ktop+2) -4.158472e-03*utop(itop+1,jtop,ktop-2) +3.881240e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) -2.328744e-02*utop(itop+1,jtop,ktop+1) +3.234367e-03*utop(itop+1,jtop,ktop+2) -9.241048e-04*utop(itop+1,jtop+1,ktop-2) +8.624978e-03*utop(itop+1,jtop+1,ktop-1) +3.881240e-02*utop(itop+1,jtop+1,ktop) -5.174987e-03*utop(itop+1,jtop+1,ktop+1) +7.187482e-04*utop(itop+1,jtop+1,ktop+2) +9.901123e-05*utop(itop+1,jtop+2,ktop-2) -9.241048e-04*utop(itop+1,jtop+2,ktop-1) -4.158472e-03*utop(itop+1,jtop+2,ktop) +5.544629e-04*utop(itop+1,jtop+2,ktop+1) -7.700874e-05*utop(itop+1,jtop+2,ktop+2) +8.250936e-06*utop(itop+2,jtop-2,ktop-2) -7.700874e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) +4.620524e-05*utop(itop+2,jtop-2,ktop+1) -6.417395e-06*utop(itop+2,jtop-2,ktop+2) -5.940674e-05*utop(itop+2,jtop-1,ktop-2) +5.544629e-04*utop(itop+2,jtop-1,ktop-1) +2.495083e-03*utop(itop+2,jtop-1,ktop) -3.326777e-04*utop(itop+2,jtop-1,ktop+1) +4.620524e-05*utop(itop+2,jtop-1,ktop+2) +4.455505e-04*utop(itop+2,jtop,ktop-2) -4.158472e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) +2.495083e-03*utop(itop+2,jtop,ktop+1) -3.465393e-04*utop(itop+2,jtop,ktop+2) +9.901123e-05*utop(itop+2,jtop+1,ktop-2) -9.241048e-04*utop(itop+2,jtop+1,ktop-1) -4.158472e-03*utop(itop+2,jtop+1,ktop) +5.544629e-04*utop(itop+2,jtop+1,ktop+1) -7.700874e-05*utop(itop+2,jtop+1,ktop+2) -1.060835e-05*utop(itop+2,jtop+2,ktop-2) +9.901123e-05*utop(itop+2,jtop+2,ktop-1) +4.455505e-04*utop(itop+2,jtop+2,ktop) -5.940674e-05*utop(itop+2,jtop+2,ktop+1) +8.250936e-06*utop(itop+2,jtop+2,ktop+2)); - u(i+1,j+1,k+1) = ( +4.991307e-06*utop(itop-2,jtop-2,ktop-2) -3.593741e-05*utop(itop-2,jtop-2,ktop-1) +2.695306e-04*utop(itop-2,jtop-2,ktop) +5.989568e-05*utop(itop-2,jtop-2,ktop+1) -6.417395e-06*utop(itop-2,jtop-2,ktop+2) -3.593741e-05*utop(itop-2,jtop-1,ktop-2) +2.587494e-04*utop(itop-2,jtop-1,ktop-1) -1.940620e-03*utop(itop-2,jtop-1,ktop) -4.312489e-04*utop(itop-2,jtop-1,ktop+1) +4.620524e-05*utop(itop-2,jtop-1,ktop+2) +2.695306e-04*utop(itop-2,jtop,ktop-2) -1.940620e-03*utop(itop-2,jtop,ktop-1) +1.455465e-02*utop(itop-2,jtop,ktop) +3.234367e-03*utop(itop-2,jtop,ktop+1) -3.465393e-04*utop(itop-2,jtop,ktop+2) +5.989568e-05*utop(itop-2,jtop+1,ktop-2) -4.312489e-04*utop(itop-2,jtop+1,ktop-1) +3.234367e-03*utop(itop-2,jtop+1,ktop) +7.187482e-04*utop(itop-2,jtop+1,ktop+1) -7.700874e-05*utop(itop-2,jtop+1,ktop+2) -6.417395e-06*utop(itop-2,jtop+2,ktop-2) +4.620524e-05*utop(itop-2,jtop+2,ktop-1) -3.465393e-04*utop(itop-2,jtop+2,ktop) -7.700874e-05*utop(itop-2,jtop+2,ktop+1) +8.250936e-06*utop(itop-2,jtop+2,ktop+2) -3.593741e-05*utop(itop-1,jtop-2,ktop-2) +2.587494e-04*utop(itop-1,jtop-2,ktop-1) -1.940620e-03*utop(itop-1,jtop-2,ktop) -4.312489e-04*utop(itop-1,jtop-2,ktop+1) +4.620524e-05*utop(itop-1,jtop-2,ktop+2) +2.587494e-04*utop(itop-1,jtop-1,ktop-2) -1.862995e-03*utop(itop-1,jtop-1,ktop-1) +1.397246e-02*utop(itop-1,jtop-1,ktop) +3.104992e-03*utop(itop-1,jtop-1,ktop+1) -3.326777e-04*utop(itop-1,jtop-1,ktop+2) -1.940620e-03*utop(itop-1,jtop,ktop-2) +1.397246e-02*utop(itop-1,jtop,ktop-1) -1.047935e-01*utop(itop-1,jtop,ktop) -2.328744e-02*utop(itop-1,jtop,ktop+1) +2.495083e-03*utop(itop-1,jtop,ktop+2) -4.312489e-04*utop(itop-1,jtop+1,ktop-2) +3.104992e-03*utop(itop-1,jtop+1,ktop-1) -2.328744e-02*utop(itop-1,jtop+1,ktop) -5.174987e-03*utop(itop-1,jtop+1,ktop+1) +5.544629e-04*utop(itop-1,jtop+1,ktop+2) +4.620524e-05*utop(itop-1,jtop+2,ktop-2) -3.326777e-04*utop(itop-1,jtop+2,ktop-1) +2.495083e-03*utop(itop-1,jtop+2,ktop) +5.544629e-04*utop(itop-1,jtop+2,ktop+1) -5.940674e-05*utop(itop-1,jtop+2,ktop+2) +2.695306e-04*utop(itop,jtop-2,ktop-2) -1.940620e-03*utop(itop,jtop-2,ktop-1) +1.455465e-02*utop(itop,jtop-2,ktop) +3.234367e-03*utop(itop,jtop-2,ktop+1) -3.465393e-04*utop(itop,jtop-2,ktop+2) -1.940620e-03*utop(itop,jtop-1,ktop-2) +1.397246e-02*utop(itop,jtop-1,ktop-1) -1.047935e-01*utop(itop,jtop-1,ktop) -2.328744e-02*utop(itop,jtop-1,ktop+1) +2.495083e-03*utop(itop,jtop-1,ktop+2) +1.455465e-02*utop(itop,jtop,ktop-2) -1.047935e-01*utop(itop,jtop,ktop-1) +7.859512e-01*utop(itop,jtop,ktop) +1.746558e-01*utop(itop,jtop,ktop+1) -1.871312e-02*utop(itop,jtop,ktop+2) +3.234367e-03*utop(itop,jtop+1,ktop-2) -2.328744e-02*utop(itop,jtop+1,ktop-1) +1.746558e-01*utop(itop,jtop+1,ktop) +3.881240e-02*utop(itop,jtop+1,ktop+1) -4.158472e-03*utop(itop,jtop+1,ktop+2) -3.465393e-04*utop(itop,jtop+2,ktop-2) +2.495083e-03*utop(itop,jtop+2,ktop-1) -1.871312e-02*utop(itop,jtop+2,ktop) -4.158472e-03*utop(itop,jtop+2,ktop+1) +4.455505e-04*utop(itop,jtop+2,ktop+2) +5.989568e-05*utop(itop+1,jtop-2,ktop-2) -4.312489e-04*utop(itop+1,jtop-2,ktop-1) +3.234367e-03*utop(itop+1,jtop-2,ktop) +7.187482e-04*utop(itop+1,jtop-2,ktop+1) -7.700874e-05*utop(itop+1,jtop-2,ktop+2) -4.312489e-04*utop(itop+1,jtop-1,ktop-2) +3.104992e-03*utop(itop+1,jtop-1,ktop-1) -2.328744e-02*utop(itop+1,jtop-1,ktop) -5.174987e-03*utop(itop+1,jtop-1,ktop+1) +5.544629e-04*utop(itop+1,jtop-1,ktop+2) +3.234367e-03*utop(itop+1,jtop,ktop-2) -2.328744e-02*utop(itop+1,jtop,ktop-1) +1.746558e-01*utop(itop+1,jtop,ktop) +3.881240e-02*utop(itop+1,jtop,ktop+1) -4.158472e-03*utop(itop+1,jtop,ktop+2) +7.187482e-04*utop(itop+1,jtop+1,ktop-2) -5.174987e-03*utop(itop+1,jtop+1,ktop-1) +3.881240e-02*utop(itop+1,jtop+1,ktop) +8.624978e-03*utop(itop+1,jtop+1,ktop+1) -9.241048e-04*utop(itop+1,jtop+1,ktop+2) -7.700874e-05*utop(itop+1,jtop+2,ktop-2) +5.544629e-04*utop(itop+1,jtop+2,ktop-1) -4.158472e-03*utop(itop+1,jtop+2,ktop) -9.241048e-04*utop(itop+1,jtop+2,ktop+1) +9.901123e-05*utop(itop+1,jtop+2,ktop+2) -6.417395e-06*utop(itop+2,jtop-2,ktop-2) +4.620524e-05*utop(itop+2,jtop-2,ktop-1) -3.465393e-04*utop(itop+2,jtop-2,ktop) -7.700874e-05*utop(itop+2,jtop-2,ktop+1) +8.250936e-06*utop(itop+2,jtop-2,ktop+2) +4.620524e-05*utop(itop+2,jtop-1,ktop-2) -3.326777e-04*utop(itop+2,jtop-1,ktop-1) +2.495083e-03*utop(itop+2,jtop-1,ktop) +5.544629e-04*utop(itop+2,jtop-1,ktop+1) -5.940674e-05*utop(itop+2,jtop-1,ktop+2) -3.465393e-04*utop(itop+2,jtop,ktop-2) +2.495083e-03*utop(itop+2,jtop,ktop-1) -1.871312e-02*utop(itop+2,jtop,ktop) -4.158472e-03*utop(itop+2,jtop,ktop+1) +4.455505e-04*utop(itop+2,jtop,ktop+2) -7.700874e-05*utop(itop+2,jtop+1,ktop-2) +5.544629e-04*utop(itop+2,jtop+1,ktop-1) -4.158472e-03*utop(itop+2,jtop+1,ktop) -9.241048e-04*utop(itop+2,jtop+1,ktop+1) +9.901123e-05*utop(itop+2,jtop+1,ktop+2) +8.250936e-06*utop(itop+2,jtop+2,ktop-2) -5.940674e-05*utop(itop+2,jtop+2,ktop-1) +4.455505e-04*utop(itop+2,jtop+2,ktop) +9.901123e-05*utop(itop+2,jtop+2,ktop+1) -1.060835e-05*utop(itop+2,jtop+2,ktop+2)); - + u(i + 0, j + 0, k + 0) = (-1.060835e-05 * utop(itop - 2, jtop - 2, ktop - 2) + 9.901123e-05 * utop(itop - 2, jtop - 2, ktop - 1) + 4.455505e-04 * utop(itop - 2, jtop - 2, ktop) - 5.940674e-05 * utop(itop - 2, jtop - 2, ktop + 1) + 8.250936e-06 * utop(itop - 2, jtop - 2, ktop + 2) + 9.901123e-05 * utop(itop - 2, jtop - 1, ktop - 2) - 9.241048e-04 * utop(itop - 2, jtop - 1, ktop - 1) - 4.158472e-03 * utop(itop - 2, jtop - 1, ktop) + 5.544629e-04 * utop(itop - 2, jtop - 1, ktop + 1) - 7.700874e-05 * utop(itop - 2, jtop - 1, ktop + 2) + 4.455505e-04 * utop(itop - 2, jtop, ktop - 2) - 4.158472e-03 * utop(itop - 2, jtop, ktop - 1) - 1.871312e-02 * utop(itop - 2, jtop, ktop) + 2.495083e-03 * utop(itop - 2, jtop, ktop + 1) - 3.465393e-04 * utop(itop - 2, jtop, ktop + 2) - 5.940674e-05 * utop(itop - 2, jtop + 1, ktop - 2) + 5.544629e-04 * utop(itop - 2, jtop + 1, ktop - 1) + 2.495083e-03 * utop(itop - 2, jtop + 1, ktop) - 3.326777e-04 * utop(itop - 2, jtop + 1, ktop + 1) + 4.620524e-05 * utop(itop - 2, jtop + 1, ktop + 2) + 8.250936e-06 * utop(itop - 2, jtop + 2, ktop - 2) - 7.700874e-05 * utop(itop - 2, jtop + 2, ktop - 1) - 3.465393e-04 * utop(itop - 2, jtop + 2, ktop) + 4.620524e-05 * utop(itop - 2, jtop + 2, ktop + 1) - 6.417395e-06 * utop(itop - 2, jtop + 2, ktop + 2) + 9.901123e-05 * utop(itop - 1, jtop - 2, ktop - 2) - 9.241048e-04 * utop(itop - 1, jtop - 2, ktop - 1) - 4.158472e-03 * utop(itop - 1, jtop - 2, ktop) + 5.544629e-04 * utop(itop - 1, jtop - 2, ktop + 1) - 7.700874e-05 * utop(itop - 1, jtop - 2, ktop + 2) - 9.241048e-04 * utop(itop - 1, jtop - 1, ktop - 2) + 8.624978e-03 * utop(itop - 1, jtop - 1, ktop - 1) + 3.881240e-02 * utop(itop - 1, jtop - 1, ktop) - 5.174987e-03 * utop(itop - 1, jtop - 1, ktop + 1) + 7.187482e-04 * utop(itop - 1, jtop - 1, ktop + 2) - 4.158472e-03 * utop(itop - 1, jtop, ktop - 2) + 3.881240e-02 * utop(itop - 1, jtop, ktop - 1) + 1.746558e-01 * utop(itop - 1, jtop, ktop) - 2.328744e-02 * utop(itop - 1, jtop, ktop + 1) + 3.234367e-03 * utop(itop - 1, jtop, ktop + 2) + 5.544629e-04 * utop(itop - 1, jtop + 1, ktop - 2) - 5.174987e-03 * utop(itop - 1, jtop + 1, ktop - 1) - 2.328744e-02 * utop(itop - 1, jtop + 1, ktop) + 3.104992e-03 * utop(itop - 1, jtop + 1, ktop + 1) - 4.312489e-04 * utop(itop - 1, jtop + 1, ktop + 2) - 7.700874e-05 * utop(itop - 1, jtop + 2, ktop - 2) + 7.187482e-04 * utop(itop - 1, jtop + 2, ktop - 1) + 3.234367e-03 * utop(itop - 1, jtop + 2, ktop) - 4.312489e-04 * utop(itop - 1, jtop + 2, ktop + 1) + 5.989568e-05 * utop(itop - 1, jtop + 2, ktop + 2) + 4.455505e-04 * utop(itop, jtop - 2, ktop - 2) - 4.158472e-03 * utop(itop, jtop - 2, ktop - 1) - 1.871312e-02 * utop(itop, jtop - 2, ktop) + 2.495083e-03 * utop(itop, jtop - 2, ktop + 1) - 3.465393e-04 * utop(itop, jtop - 2, ktop + 2) - 4.158472e-03 * utop(itop, jtop - 1, ktop - 2) + 3.881240e-02 * utop(itop, jtop - 1, ktop - 1) + 1.746558e-01 * utop(itop, jtop - 1, ktop) - 2.328744e-02 * utop(itop, jtop - 1, ktop + 1) + 3.234367e-03 * utop(itop, jtop - 1, ktop + 2) - 1.871312e-02 * utop(itop, jtop, ktop - 2) + 1.746558e-01 * utop(itop, jtop, ktop - 1) + 7.859512e-01 * utop(itop, jtop, ktop) - 1.047935e-01 * utop(itop, jtop, ktop + 1) + 1.455465e-02 * utop(itop, jtop, ktop + 2) + 2.495083e-03 * utop(itop, jtop + 1, ktop - 2) - 2.328744e-02 * utop(itop, jtop + 1, ktop - 1) - 1.047935e-01 * utop(itop, jtop + 1, ktop) + 1.397246e-02 * utop(itop, jtop + 1, ktop + 1) - 1.940620e-03 * utop(itop, jtop + 1, ktop + 2) - 3.465393e-04 * utop(itop, jtop + 2, ktop - 2) + 3.234367e-03 * utop(itop, jtop + 2, ktop - 1) + 1.455465e-02 * utop(itop, jtop + 2, ktop) - 1.940620e-03 * utop(itop, jtop + 2, ktop + 1) + 2.695306e-04 * utop(itop, jtop + 2, ktop + 2) - 5.940674e-05 * utop(itop + 1, jtop - 2, ktop - 2) + 5.544629e-04 * utop(itop + 1, jtop - 2, ktop - 1) + 2.495083e-03 * utop(itop + 1, jtop - 2, ktop) - 3.326777e-04 * utop(itop + 1, jtop - 2, ktop + 1) + 4.620524e-05 * utop(itop + 1, jtop - 2, ktop + 2) + 5.544629e-04 * utop(itop + 1, jtop - 1, ktop - 2) - 5.174987e-03 * utop(itop + 1, jtop - 1, ktop - 1) - 2.328744e-02 * utop(itop + 1, jtop - 1, ktop) + 3.104992e-03 * utop(itop + 1, jtop - 1, ktop + 1) - 4.312489e-04 * utop(itop + 1, jtop - 1, ktop + 2) + 2.495083e-03 * utop(itop + 1, jtop, ktop - 2) - 2.328744e-02 * utop(itop + 1, jtop, ktop - 1) - 1.047935e-01 * utop(itop + 1, jtop, ktop) + 1.397246e-02 * utop(itop + 1, jtop, ktop + 1) - 1.940620e-03 * utop(itop + 1, jtop, ktop + 2) - 3.326777e-04 * utop(itop + 1, jtop + 1, ktop - 2) + 3.104992e-03 * utop(itop + 1, jtop + 1, ktop - 1) + 1.397246e-02 * utop(itop + 1, jtop + 1, ktop) - 1.862995e-03 * utop(itop + 1, jtop + 1, ktop + 1) + 2.587494e-04 * utop(itop + 1, jtop + 1, ktop + 2) + 4.620524e-05 * utop(itop + 1, jtop + 2, ktop - 2) - 4.312489e-04 * utop(itop + 1, jtop + 2, ktop - 1) - 1.940620e-03 * utop(itop + 1, jtop + 2, ktop) + 2.587494e-04 * utop(itop + 1, jtop + 2, ktop + 1) - 3.593741e-05 * utop(itop + 1, jtop + 2, ktop + 2) + 8.250936e-06 * utop(itop + 2, jtop - 2, ktop - 2) - 7.700874e-05 * utop(itop + 2, jtop - 2, ktop - 1) - 3.465393e-04 * utop(itop + 2, jtop - 2, ktop) + 4.620524e-05 * utop(itop + 2, jtop - 2, ktop + 1) - 6.417395e-06 * utop(itop + 2, jtop - 2, ktop + 2) - 7.700874e-05 * utop(itop + 2, jtop - 1, ktop - 2) + 7.187482e-04 * utop(itop + 2, jtop - 1, ktop - 1) + 3.234367e-03 * utop(itop + 2, jtop - 1, ktop) - 4.312489e-04 * utop(itop + 2, jtop - 1, ktop + 1) + 5.989568e-05 * utop(itop + 2, jtop - 1, ktop + 2) - 3.465393e-04 * utop(itop + 2, jtop, ktop - 2) + 3.234367e-03 * utop(itop + 2, jtop, ktop - 1) + 1.455465e-02 * utop(itop + 2, jtop, ktop) - 1.940620e-03 * utop(itop + 2, jtop, ktop + 1) + 2.695306e-04 * utop(itop + 2, jtop, ktop + 2) + 4.620524e-05 * utop(itop + 2, jtop + 1, ktop - 2) - 4.312489e-04 * utop(itop + 2, jtop + 1, ktop - 1) - 1.940620e-03 * utop(itop + 2, jtop + 1, ktop) + 2.587494e-04 * utop(itop + 2, jtop + 1, ktop + 1) - 3.593741e-05 * utop(itop + 2, jtop + 1, ktop + 2) - 6.417395e-06 * utop(itop + 2, jtop + 2, ktop - 2) + 5.989568e-05 * utop(itop + 2, jtop + 2, ktop - 1) + 2.695306e-04 * utop(itop + 2, jtop + 2, ktop) - 3.593741e-05 * utop(itop + 2, jtop + 2, ktop + 1) + 4.991307e-06 * utop(itop + 2, jtop + 2, ktop + 2)); + u(i + 0, j + 0, k + 1) = (+8.250936e-06 * utop(itop - 2, jtop - 2, ktop - 2) - 5.940674e-05 * utop(itop - 2, jtop - 2, ktop - 1) + 4.455505e-04 * utop(itop - 2, jtop - 2, ktop) + 9.901123e-05 * utop(itop - 2, jtop - 2, ktop + 1) - 1.060835e-05 * utop(itop - 2, jtop - 2, ktop + 2) - 7.700874e-05 * utop(itop - 2, jtop - 1, ktop - 2) + 5.544629e-04 * utop(itop - 2, jtop - 1, ktop - 1) - 4.158472e-03 * utop(itop - 2, jtop - 1, ktop) - 9.241048e-04 * utop(itop - 2, jtop - 1, ktop + 1) + 9.901123e-05 * utop(itop - 2, jtop - 1, ktop + 2) - 3.465393e-04 * utop(itop - 2, jtop, ktop - 2) + 2.495083e-03 * utop(itop - 2, jtop, ktop - 1) - 1.871312e-02 * utop(itop - 2, jtop, ktop) - 4.158472e-03 * utop(itop - 2, jtop, ktop + 1) + 4.455505e-04 * utop(itop - 2, jtop, ktop + 2) + 4.620524e-05 * utop(itop - 2, jtop + 1, ktop - 2) - 3.326777e-04 * utop(itop - 2, jtop + 1, ktop - 1) + 2.495083e-03 * utop(itop - 2, jtop + 1, ktop) + 5.544629e-04 * utop(itop - 2, jtop + 1, ktop + 1) - 5.940674e-05 * utop(itop - 2, jtop + 1, ktop + 2) - 6.417395e-06 * utop(itop - 2, jtop + 2, ktop - 2) + 4.620524e-05 * utop(itop - 2, jtop + 2, ktop - 1) - 3.465393e-04 * utop(itop - 2, jtop + 2, ktop) - 7.700874e-05 * utop(itop - 2, jtop + 2, ktop + 1) + 8.250936e-06 * utop(itop - 2, jtop + 2, ktop + 2) - 7.700874e-05 * utop(itop - 1, jtop - 2, ktop - 2) + 5.544629e-04 * utop(itop - 1, jtop - 2, ktop - 1) - 4.158472e-03 * utop(itop - 1, jtop - 2, ktop) - 9.241048e-04 * utop(itop - 1, jtop - 2, ktop + 1) + 9.901123e-05 * utop(itop - 1, jtop - 2, ktop + 2) + 7.187482e-04 * utop(itop - 1, jtop - 1, ktop - 2) - 5.174987e-03 * utop(itop - 1, jtop - 1, ktop - 1) + 3.881240e-02 * utop(itop - 1, jtop - 1, ktop) + 8.624978e-03 * utop(itop - 1, jtop - 1, ktop + 1) - 9.241048e-04 * utop(itop - 1, jtop - 1, ktop + 2) + 3.234367e-03 * utop(itop - 1, jtop, ktop - 2) - 2.328744e-02 * utop(itop - 1, jtop, ktop - 1) + 1.746558e-01 * utop(itop - 1, jtop, ktop) + 3.881240e-02 * utop(itop - 1, jtop, ktop + 1) - 4.158472e-03 * utop(itop - 1, jtop, ktop + 2) - 4.312489e-04 * utop(itop - 1, jtop + 1, ktop - 2) + 3.104992e-03 * utop(itop - 1, jtop + 1, ktop - 1) - 2.328744e-02 * utop(itop - 1, jtop + 1, ktop) - 5.174987e-03 * utop(itop - 1, jtop + 1, ktop + 1) + 5.544629e-04 * utop(itop - 1, jtop + 1, ktop + 2) + 5.989568e-05 * utop(itop - 1, jtop + 2, ktop - 2) - 4.312489e-04 * utop(itop - 1, jtop + 2, ktop - 1) + 3.234367e-03 * utop(itop - 1, jtop + 2, ktop) + 7.187482e-04 * utop(itop - 1, jtop + 2, ktop + 1) - 7.700874e-05 * utop(itop - 1, jtop + 2, ktop + 2) - 3.465393e-04 * utop(itop, jtop - 2, ktop - 2) + 2.495083e-03 * utop(itop, jtop - 2, ktop - 1) - 1.871312e-02 * utop(itop, jtop - 2, ktop) - 4.158472e-03 * utop(itop, jtop - 2, ktop + 1) + 4.455505e-04 * utop(itop, jtop - 2, ktop + 2) + 3.234367e-03 * utop(itop, jtop - 1, ktop - 2) - 2.328744e-02 * utop(itop, jtop - 1, ktop - 1) + 1.746558e-01 * utop(itop, jtop - 1, ktop) + 3.881240e-02 * utop(itop, jtop - 1, ktop + 1) - 4.158472e-03 * utop(itop, jtop - 1, ktop + 2) + 1.455465e-02 * utop(itop, jtop, ktop - 2) - 1.047935e-01 * utop(itop, jtop, ktop - 1) + 7.859512e-01 * utop(itop, jtop, ktop) + 1.746558e-01 * utop(itop, jtop, ktop + 1) - 1.871312e-02 * utop(itop, jtop, ktop + 2) - 1.940620e-03 * utop(itop, jtop + 1, ktop - 2) + 1.397246e-02 * utop(itop, jtop + 1, ktop - 1) - 1.047935e-01 * utop(itop, jtop + 1, ktop) - 2.328744e-02 * utop(itop, jtop + 1, ktop + 1) + 2.495083e-03 * utop(itop, jtop + 1, ktop + 2) + 2.695306e-04 * utop(itop, jtop + 2, ktop - 2) - 1.940620e-03 * utop(itop, jtop + 2, ktop - 1) + 1.455465e-02 * utop(itop, jtop + 2, ktop) + 3.234367e-03 * utop(itop, jtop + 2, ktop + 1) - 3.465393e-04 * utop(itop, jtop + 2, ktop + 2) + 4.620524e-05 * utop(itop + 1, jtop - 2, ktop - 2) - 3.326777e-04 * utop(itop + 1, jtop - 2, ktop - 1) + 2.495083e-03 * utop(itop + 1, jtop - 2, ktop) + 5.544629e-04 * utop(itop + 1, jtop - 2, ktop + 1) - 5.940674e-05 * utop(itop + 1, jtop - 2, ktop + 2) - 4.312489e-04 * utop(itop + 1, jtop - 1, ktop - 2) + 3.104992e-03 * utop(itop + 1, jtop - 1, ktop - 1) - 2.328744e-02 * utop(itop + 1, jtop - 1, ktop) - 5.174987e-03 * utop(itop + 1, jtop - 1, ktop + 1) + 5.544629e-04 * utop(itop + 1, jtop - 1, ktop + 2) - 1.940620e-03 * utop(itop + 1, jtop, ktop - 2) + 1.397246e-02 * utop(itop + 1, jtop, ktop - 1) - 1.047935e-01 * utop(itop + 1, jtop, ktop) - 2.328744e-02 * utop(itop + 1, jtop, ktop + 1) + 2.495083e-03 * utop(itop + 1, jtop, ktop + 2) + 2.587494e-04 * utop(itop + 1, jtop + 1, ktop - 2) - 1.862995e-03 * utop(itop + 1, jtop + 1, ktop - 1) + 1.397246e-02 * utop(itop + 1, jtop + 1, ktop) + 3.104992e-03 * utop(itop + 1, jtop + 1, ktop + 1) - 3.326777e-04 * utop(itop + 1, jtop + 1, ktop + 2) - 3.593741e-05 * utop(itop + 1, jtop + 2, ktop - 2) + 2.587494e-04 * utop(itop + 1, jtop + 2, ktop - 1) - 1.940620e-03 * utop(itop + 1, jtop + 2, ktop) - 4.312489e-04 * utop(itop + 1, jtop + 2, ktop + 1) + 4.620524e-05 * utop(itop + 1, jtop + 2, ktop + 2) - 6.417395e-06 * utop(itop + 2, jtop - 2, ktop - 2) + 4.620524e-05 * utop(itop + 2, jtop - 2, ktop - 1) - 3.465393e-04 * utop(itop + 2, jtop - 2, ktop) - 7.700874e-05 * utop(itop + 2, jtop - 2, ktop + 1) + 8.250936e-06 * utop(itop + 2, jtop - 2, ktop + 2) + 5.989568e-05 * utop(itop + 2, jtop - 1, ktop - 2) - 4.312489e-04 * utop(itop + 2, jtop - 1, ktop - 1) + 3.234367e-03 * utop(itop + 2, jtop - 1, ktop) + 7.187482e-04 * utop(itop + 2, jtop - 1, ktop + 1) - 7.700874e-05 * utop(itop + 2, jtop - 1, ktop + 2) + 2.695306e-04 * utop(itop + 2, jtop, ktop - 2) - 1.940620e-03 * utop(itop + 2, jtop, ktop - 1) + 1.455465e-02 * utop(itop + 2, jtop, ktop) + 3.234367e-03 * utop(itop + 2, jtop, ktop + 1) - 3.465393e-04 * utop(itop + 2, jtop, ktop + 2) - 3.593741e-05 * utop(itop + 2, jtop + 1, ktop - 2) + 2.587494e-04 * utop(itop + 2, jtop + 1, ktop - 1) - 1.940620e-03 * utop(itop + 2, jtop + 1, ktop) - 4.312489e-04 * utop(itop + 2, jtop + 1, ktop + 1) + 4.620524e-05 * utop(itop + 2, jtop + 1, ktop + 2) + 4.991307e-06 * utop(itop + 2, jtop + 2, ktop - 2) - 3.593741e-05 * utop(itop + 2, jtop + 2, ktop - 1) + 2.695306e-04 * utop(itop + 2, jtop + 2, ktop) + 5.989568e-05 * utop(itop + 2, jtop + 2, ktop + 1) - 6.417395e-06 * utop(itop + 2, jtop + 2, ktop + 2)); + u(i + 0, j + 1, k + 0) = (+8.250936e-06 * utop(itop - 2, jtop - 2, ktop - 2) - 7.700874e-05 * utop(itop - 2, jtop - 2, ktop - 1) - 3.465393e-04 * utop(itop - 2, jtop - 2, ktop) + 4.620524e-05 * utop(itop - 2, jtop - 2, ktop + 1) - 6.417395e-06 * utop(itop - 2, jtop - 2, ktop + 2) - 5.940674e-05 * utop(itop - 2, jtop - 1, ktop - 2) + 5.544629e-04 * utop(itop - 2, jtop - 1, ktop - 1) + 2.495083e-03 * utop(itop - 2, jtop - 1, ktop) - 3.326777e-04 * utop(itop - 2, jtop - 1, ktop + 1) + 4.620524e-05 * utop(itop - 2, jtop - 1, ktop + 2) + 4.455505e-04 * utop(itop - 2, jtop, ktop - 2) - 4.158472e-03 * utop(itop - 2, jtop, ktop - 1) - 1.871312e-02 * utop(itop - 2, jtop, ktop) + 2.495083e-03 * utop(itop - 2, jtop, ktop + 1) - 3.465393e-04 * utop(itop - 2, jtop, ktop + 2) + 9.901123e-05 * utop(itop - 2, jtop + 1, ktop - 2) - 9.241048e-04 * utop(itop - 2, jtop + 1, ktop - 1) - 4.158472e-03 * utop(itop - 2, jtop + 1, ktop) + 5.544629e-04 * utop(itop - 2, jtop + 1, ktop + 1) - 7.700874e-05 * utop(itop - 2, jtop + 1, ktop + 2) - 1.060835e-05 * utop(itop - 2, jtop + 2, ktop - 2) + 9.901123e-05 * utop(itop - 2, jtop + 2, ktop - 1) + 4.455505e-04 * utop(itop - 2, jtop + 2, ktop) - 5.940674e-05 * utop(itop - 2, jtop + 2, ktop + 1) + 8.250936e-06 * utop(itop - 2, jtop + 2, ktop + 2) - 7.700874e-05 * utop(itop - 1, jtop - 2, ktop - 2) + 7.187482e-04 * utop(itop - 1, jtop - 2, ktop - 1) + 3.234367e-03 * utop(itop - 1, jtop - 2, ktop) - 4.312489e-04 * utop(itop - 1, jtop - 2, ktop + 1) + 5.989568e-05 * utop(itop - 1, jtop - 2, ktop + 2) + 5.544629e-04 * utop(itop - 1, jtop - 1, ktop - 2) - 5.174987e-03 * utop(itop - 1, jtop - 1, ktop - 1) - 2.328744e-02 * utop(itop - 1, jtop - 1, ktop) + 3.104992e-03 * utop(itop - 1, jtop - 1, ktop + 1) - 4.312489e-04 * utop(itop - 1, jtop - 1, ktop + 2) - 4.158472e-03 * utop(itop - 1, jtop, ktop - 2) + 3.881240e-02 * utop(itop - 1, jtop, ktop - 1) + 1.746558e-01 * utop(itop - 1, jtop, ktop) - 2.328744e-02 * utop(itop - 1, jtop, ktop + 1) + 3.234367e-03 * utop(itop - 1, jtop, ktop + 2) - 9.241048e-04 * utop(itop - 1, jtop + 1, ktop - 2) + 8.624978e-03 * utop(itop - 1, jtop + 1, ktop - 1) + 3.881240e-02 * utop(itop - 1, jtop + 1, ktop) - 5.174987e-03 * utop(itop - 1, jtop + 1, ktop + 1) + 7.187482e-04 * utop(itop - 1, jtop + 1, ktop + 2) + 9.901123e-05 * utop(itop - 1, jtop + 2, ktop - 2) - 9.241048e-04 * utop(itop - 1, jtop + 2, ktop - 1) - 4.158472e-03 * utop(itop - 1, jtop + 2, ktop) + 5.544629e-04 * utop(itop - 1, jtop + 2, ktop + 1) - 7.700874e-05 * utop(itop - 1, jtop + 2, ktop + 2) - 3.465393e-04 * utop(itop, jtop - 2, ktop - 2) + 3.234367e-03 * utop(itop, jtop - 2, ktop - 1) + 1.455465e-02 * utop(itop, jtop - 2, ktop) - 1.940620e-03 * utop(itop, jtop - 2, ktop + 1) + 2.695306e-04 * utop(itop, jtop - 2, ktop + 2) + 2.495083e-03 * utop(itop, jtop - 1, ktop - 2) - 2.328744e-02 * utop(itop, jtop - 1, ktop - 1) - 1.047935e-01 * utop(itop, jtop - 1, ktop) + 1.397246e-02 * utop(itop, jtop - 1, ktop + 1) - 1.940620e-03 * utop(itop, jtop - 1, ktop + 2) - 1.871312e-02 * utop(itop, jtop, ktop - 2) + 1.746558e-01 * utop(itop, jtop, ktop - 1) + 7.859512e-01 * utop(itop, jtop, ktop) - 1.047935e-01 * utop(itop, jtop, ktop + 1) + 1.455465e-02 * utop(itop, jtop, ktop + 2) - 4.158472e-03 * utop(itop, jtop + 1, ktop - 2) + 3.881240e-02 * utop(itop, jtop + 1, ktop - 1) + 1.746558e-01 * utop(itop, jtop + 1, ktop) - 2.328744e-02 * utop(itop, jtop + 1, ktop + 1) + 3.234367e-03 * utop(itop, jtop + 1, ktop + 2) + 4.455505e-04 * utop(itop, jtop + 2, ktop - 2) - 4.158472e-03 * utop(itop, jtop + 2, ktop - 1) - 1.871312e-02 * utop(itop, jtop + 2, ktop) + 2.495083e-03 * utop(itop, jtop + 2, ktop + 1) - 3.465393e-04 * utop(itop, jtop + 2, ktop + 2) + 4.620524e-05 * utop(itop + 1, jtop - 2, ktop - 2) - 4.312489e-04 * utop(itop + 1, jtop - 2, ktop - 1) - 1.940620e-03 * utop(itop + 1, jtop - 2, ktop) + 2.587494e-04 * utop(itop + 1, jtop - 2, ktop + 1) - 3.593741e-05 * utop(itop + 1, jtop - 2, ktop + 2) - 3.326777e-04 * utop(itop + 1, jtop - 1, ktop - 2) + 3.104992e-03 * utop(itop + 1, jtop - 1, ktop - 1) + 1.397246e-02 * utop(itop + 1, jtop - 1, ktop) - 1.862995e-03 * utop(itop + 1, jtop - 1, ktop + 1) + 2.587494e-04 * utop(itop + 1, jtop - 1, ktop + 2) + 2.495083e-03 * utop(itop + 1, jtop, ktop - 2) - 2.328744e-02 * utop(itop + 1, jtop, ktop - 1) - 1.047935e-01 * utop(itop + 1, jtop, ktop) + 1.397246e-02 * utop(itop + 1, jtop, ktop + 1) - 1.940620e-03 * utop(itop + 1, jtop, ktop + 2) + 5.544629e-04 * utop(itop + 1, jtop + 1, ktop - 2) - 5.174987e-03 * utop(itop + 1, jtop + 1, ktop - 1) - 2.328744e-02 * utop(itop + 1, jtop + 1, ktop) + 3.104992e-03 * utop(itop + 1, jtop + 1, ktop + 1) - 4.312489e-04 * utop(itop + 1, jtop + 1, ktop + 2) - 5.940674e-05 * utop(itop + 1, jtop + 2, ktop - 2) + 5.544629e-04 * utop(itop + 1, jtop + 2, ktop - 1) + 2.495083e-03 * utop(itop + 1, jtop + 2, ktop) - 3.326777e-04 * utop(itop + 1, jtop + 2, ktop + 1) + 4.620524e-05 * utop(itop + 1, jtop + 2, ktop + 2) - 6.417395e-06 * utop(itop + 2, jtop - 2, ktop - 2) + 5.989568e-05 * utop(itop + 2, jtop - 2, ktop - 1) + 2.695306e-04 * utop(itop + 2, jtop - 2, ktop) - 3.593741e-05 * utop(itop + 2, jtop - 2, ktop + 1) + 4.991307e-06 * utop(itop + 2, jtop - 2, ktop + 2) + 4.620524e-05 * utop(itop + 2, jtop - 1, ktop - 2) - 4.312489e-04 * utop(itop + 2, jtop - 1, ktop - 1) - 1.940620e-03 * utop(itop + 2, jtop - 1, ktop) + 2.587494e-04 * utop(itop + 2, jtop - 1, ktop + 1) - 3.593741e-05 * utop(itop + 2, jtop - 1, ktop + 2) - 3.465393e-04 * utop(itop + 2, jtop, ktop - 2) + 3.234367e-03 * utop(itop + 2, jtop, ktop - 1) + 1.455465e-02 * utop(itop + 2, jtop, ktop) - 1.940620e-03 * utop(itop + 2, jtop, ktop + 1) + 2.695306e-04 * utop(itop + 2, jtop, ktop + 2) - 7.700874e-05 * utop(itop + 2, jtop + 1, ktop - 2) + 7.187482e-04 * utop(itop + 2, jtop + 1, ktop - 1) + 3.234367e-03 * utop(itop + 2, jtop + 1, ktop) - 4.312489e-04 * utop(itop + 2, jtop + 1, ktop + 1) + 5.989568e-05 * utop(itop + 2, jtop + 1, ktop + 2) + 8.250936e-06 * utop(itop + 2, jtop + 2, ktop - 2) - 7.700874e-05 * utop(itop + 2, jtop + 2, ktop - 1) - 3.465393e-04 * utop(itop + 2, jtop + 2, ktop) + 4.620524e-05 * utop(itop + 2, jtop + 2, ktop + 1) - 6.417395e-06 * utop(itop + 2, jtop + 2, ktop + 2)); + u(i + 0, j + 1, k + 1) = (-6.417395e-06 * utop(itop - 2, jtop - 2, ktop - 2) + 4.620524e-05 * utop(itop - 2, jtop - 2, ktop - 1) - 3.465393e-04 * utop(itop - 2, jtop - 2, ktop) - 7.700874e-05 * utop(itop - 2, jtop - 2, ktop + 1) + 8.250936e-06 * utop(itop - 2, jtop - 2, ktop + 2) + 4.620524e-05 * utop(itop - 2, jtop - 1, ktop - 2) - 3.326777e-04 * utop(itop - 2, jtop - 1, ktop - 1) + 2.495083e-03 * utop(itop - 2, jtop - 1, ktop) + 5.544629e-04 * utop(itop - 2, jtop - 1, ktop + 1) - 5.940674e-05 * utop(itop - 2, jtop - 1, ktop + 2) - 3.465393e-04 * utop(itop - 2, jtop, ktop - 2) + 2.495083e-03 * utop(itop - 2, jtop, ktop - 1) - 1.871312e-02 * utop(itop - 2, jtop, ktop) - 4.158472e-03 * utop(itop - 2, jtop, ktop + 1) + 4.455505e-04 * utop(itop - 2, jtop, ktop + 2) - 7.700874e-05 * utop(itop - 2, jtop + 1, ktop - 2) + 5.544629e-04 * utop(itop - 2, jtop + 1, ktop - 1) - 4.158472e-03 * utop(itop - 2, jtop + 1, ktop) - 9.241048e-04 * utop(itop - 2, jtop + 1, ktop + 1) + 9.901123e-05 * utop(itop - 2, jtop + 1, ktop + 2) + 8.250936e-06 * utop(itop - 2, jtop + 2, ktop - 2) - 5.940674e-05 * utop(itop - 2, jtop + 2, ktop - 1) + 4.455505e-04 * utop(itop - 2, jtop + 2, ktop) + 9.901123e-05 * utop(itop - 2, jtop + 2, ktop + 1) - 1.060835e-05 * utop(itop - 2, jtop + 2, ktop + 2) + 5.989568e-05 * utop(itop - 1, jtop - 2, ktop - 2) - 4.312489e-04 * utop(itop - 1, jtop - 2, ktop - 1) + 3.234367e-03 * utop(itop - 1, jtop - 2, ktop) + 7.187482e-04 * utop(itop - 1, jtop - 2, ktop + 1) - 7.700874e-05 * utop(itop - 1, jtop - 2, ktop + 2) - 4.312489e-04 * utop(itop - 1, jtop - 1, ktop - 2) + 3.104992e-03 * utop(itop - 1, jtop - 1, ktop - 1) - 2.328744e-02 * utop(itop - 1, jtop - 1, ktop) - 5.174987e-03 * utop(itop - 1, jtop - 1, ktop + 1) + 5.544629e-04 * utop(itop - 1, jtop - 1, ktop + 2) + 3.234367e-03 * utop(itop - 1, jtop, ktop - 2) - 2.328744e-02 * utop(itop - 1, jtop, ktop - 1) + 1.746558e-01 * utop(itop - 1, jtop, ktop) + 3.881240e-02 * utop(itop - 1, jtop, ktop + 1) - 4.158472e-03 * utop(itop - 1, jtop, ktop + 2) + 7.187482e-04 * utop(itop - 1, jtop + 1, ktop - 2) - 5.174987e-03 * utop(itop - 1, jtop + 1, ktop - 1) + 3.881240e-02 * utop(itop - 1, jtop + 1, ktop) + 8.624978e-03 * utop(itop - 1, jtop + 1, ktop + 1) - 9.241048e-04 * utop(itop - 1, jtop + 1, ktop + 2) - 7.700874e-05 * utop(itop - 1, jtop + 2, ktop - 2) + 5.544629e-04 * utop(itop - 1, jtop + 2, ktop - 1) - 4.158472e-03 * utop(itop - 1, jtop + 2, ktop) - 9.241048e-04 * utop(itop - 1, jtop + 2, ktop + 1) + 9.901123e-05 * utop(itop - 1, jtop + 2, ktop + 2) + 2.695306e-04 * utop(itop, jtop - 2, ktop - 2) - 1.940620e-03 * utop(itop, jtop - 2, ktop - 1) + 1.455465e-02 * utop(itop, jtop - 2, ktop) + 3.234367e-03 * utop(itop, jtop - 2, ktop + 1) - 3.465393e-04 * utop(itop, jtop - 2, ktop + 2) - 1.940620e-03 * utop(itop, jtop - 1, ktop - 2) + 1.397246e-02 * utop(itop, jtop - 1, ktop - 1) - 1.047935e-01 * utop(itop, jtop - 1, ktop) - 2.328744e-02 * utop(itop, jtop - 1, ktop + 1) + 2.495083e-03 * utop(itop, jtop - 1, ktop + 2) + 1.455465e-02 * utop(itop, jtop, ktop - 2) - 1.047935e-01 * utop(itop, jtop, ktop - 1) + 7.859512e-01 * utop(itop, jtop, ktop) + 1.746558e-01 * utop(itop, jtop, ktop + 1) - 1.871312e-02 * utop(itop, jtop, ktop + 2) + 3.234367e-03 * utop(itop, jtop + 1, ktop - 2) - 2.328744e-02 * utop(itop, jtop + 1, ktop - 1) + 1.746558e-01 * utop(itop, jtop + 1, ktop) + 3.881240e-02 * utop(itop, jtop + 1, ktop + 1) - 4.158472e-03 * utop(itop, jtop + 1, ktop + 2) - 3.465393e-04 * utop(itop, jtop + 2, ktop - 2) + 2.495083e-03 * utop(itop, jtop + 2, ktop - 1) - 1.871312e-02 * utop(itop, jtop + 2, ktop) - 4.158472e-03 * utop(itop, jtop + 2, ktop + 1) + 4.455505e-04 * utop(itop, jtop + 2, ktop + 2) - 3.593741e-05 * utop(itop + 1, jtop - 2, ktop - 2) + 2.587494e-04 * utop(itop + 1, jtop - 2, ktop - 1) - 1.940620e-03 * utop(itop + 1, jtop - 2, ktop) - 4.312489e-04 * utop(itop + 1, jtop - 2, ktop + 1) + 4.620524e-05 * utop(itop + 1, jtop - 2, ktop + 2) + 2.587494e-04 * utop(itop + 1, jtop - 1, ktop - 2) - 1.862995e-03 * utop(itop + 1, jtop - 1, ktop - 1) + 1.397246e-02 * utop(itop + 1, jtop - 1, ktop) + 3.104992e-03 * utop(itop + 1, jtop - 1, ktop + 1) - 3.326777e-04 * utop(itop + 1, jtop - 1, ktop + 2) - 1.940620e-03 * utop(itop + 1, jtop, ktop - 2) + 1.397246e-02 * utop(itop + 1, jtop, ktop - 1) - 1.047935e-01 * utop(itop + 1, jtop, ktop) - 2.328744e-02 * utop(itop + 1, jtop, ktop + 1) + 2.495083e-03 * utop(itop + 1, jtop, ktop + 2) - 4.312489e-04 * utop(itop + 1, jtop + 1, ktop - 2) + 3.104992e-03 * utop(itop + 1, jtop + 1, ktop - 1) - 2.328744e-02 * utop(itop + 1, jtop + 1, ktop) - 5.174987e-03 * utop(itop + 1, jtop + 1, ktop + 1) + 5.544629e-04 * utop(itop + 1, jtop + 1, ktop + 2) + 4.620524e-05 * utop(itop + 1, jtop + 2, ktop - 2) - 3.326777e-04 * utop(itop + 1, jtop + 2, ktop - 1) + 2.495083e-03 * utop(itop + 1, jtop + 2, ktop) + 5.544629e-04 * utop(itop + 1, jtop + 2, ktop + 1) - 5.940674e-05 * utop(itop + 1, jtop + 2, ktop + 2) + 4.991307e-06 * utop(itop + 2, jtop - 2, ktop - 2) - 3.593741e-05 * utop(itop + 2, jtop - 2, ktop - 1) + 2.695306e-04 * utop(itop + 2, jtop - 2, ktop) + 5.989568e-05 * utop(itop + 2, jtop - 2, ktop + 1) - 6.417395e-06 * utop(itop + 2, jtop - 2, ktop + 2) - 3.593741e-05 * utop(itop + 2, jtop - 1, ktop - 2) + 2.587494e-04 * utop(itop + 2, jtop - 1, ktop - 1) - 1.940620e-03 * utop(itop + 2, jtop - 1, ktop) - 4.312489e-04 * utop(itop + 2, jtop - 1, ktop + 1) + 4.620524e-05 * utop(itop + 2, jtop - 1, ktop + 2) + 2.695306e-04 * utop(itop + 2, jtop, ktop - 2) - 1.940620e-03 * utop(itop + 2, jtop, ktop - 1) + 1.455465e-02 * utop(itop + 2, jtop, ktop) + 3.234367e-03 * utop(itop + 2, jtop, ktop + 1) - 3.465393e-04 * utop(itop + 2, jtop, ktop + 2) + 5.989568e-05 * utop(itop + 2, jtop + 1, ktop - 2) - 4.312489e-04 * utop(itop + 2, jtop + 1, ktop - 1) + 3.234367e-03 * utop(itop + 2, jtop + 1, ktop) + 7.187482e-04 * utop(itop + 2, jtop + 1, ktop + 1) - 7.700874e-05 * utop(itop + 2, jtop + 1, ktop + 2) - 6.417395e-06 * utop(itop + 2, jtop + 2, ktop - 2) + 4.620524e-05 * utop(itop + 2, jtop + 2, ktop - 1) - 3.465393e-04 * utop(itop + 2, jtop + 2, ktop) - 7.700874e-05 * utop(itop + 2, jtop + 2, ktop + 1) + 8.250936e-06 * utop(itop + 2, jtop + 2, ktop + 2)); + u(i + 1, j + 0, k + 0) = (+8.250936e-06 * utop(itop - 2, jtop - 2, ktop - 2) - 7.700874e-05 * utop(itop - 2, jtop - 2, ktop - 1) - 3.465393e-04 * utop(itop - 2, jtop - 2, ktop) + 4.620524e-05 * utop(itop - 2, jtop - 2, ktop + 1) - 6.417395e-06 * utop(itop - 2, jtop - 2, ktop + 2) - 7.700874e-05 * utop(itop - 2, jtop - 1, ktop - 2) + 7.187482e-04 * utop(itop - 2, jtop - 1, ktop - 1) + 3.234367e-03 * utop(itop - 2, jtop - 1, ktop) - 4.312489e-04 * utop(itop - 2, jtop - 1, ktop + 1) + 5.989568e-05 * utop(itop - 2, jtop - 1, ktop + 2) - 3.465393e-04 * utop(itop - 2, jtop, ktop - 2) + 3.234367e-03 * utop(itop - 2, jtop, ktop - 1) + 1.455465e-02 * utop(itop - 2, jtop, ktop) - 1.940620e-03 * utop(itop - 2, jtop, ktop + 1) + 2.695306e-04 * utop(itop - 2, jtop, ktop + 2) + 4.620524e-05 * utop(itop - 2, jtop + 1, ktop - 2) - 4.312489e-04 * utop(itop - 2, jtop + 1, ktop - 1) - 1.940620e-03 * utop(itop - 2, jtop + 1, ktop) + 2.587494e-04 * utop(itop - 2, jtop + 1, ktop + 1) - 3.593741e-05 * utop(itop - 2, jtop + 1, ktop + 2) - 6.417395e-06 * utop(itop - 2, jtop + 2, ktop - 2) + 5.989568e-05 * utop(itop - 2, jtop + 2, ktop - 1) + 2.695306e-04 * utop(itop - 2, jtop + 2, ktop) - 3.593741e-05 * utop(itop - 2, jtop + 2, ktop + 1) + 4.991307e-06 * utop(itop - 2, jtop + 2, ktop + 2) - 5.940674e-05 * utop(itop - 1, jtop - 2, ktop - 2) + 5.544629e-04 * utop(itop - 1, jtop - 2, ktop - 1) + 2.495083e-03 * utop(itop - 1, jtop - 2, ktop) - 3.326777e-04 * utop(itop - 1, jtop - 2, ktop + 1) + 4.620524e-05 * utop(itop - 1, jtop - 2, ktop + 2) + 5.544629e-04 * utop(itop - 1, jtop - 1, ktop - 2) - 5.174987e-03 * utop(itop - 1, jtop - 1, ktop - 1) - 2.328744e-02 * utop(itop - 1, jtop - 1, ktop) + 3.104992e-03 * utop(itop - 1, jtop - 1, ktop + 1) - 4.312489e-04 * utop(itop - 1, jtop - 1, ktop + 2) + 2.495083e-03 * utop(itop - 1, jtop, ktop - 2) - 2.328744e-02 * utop(itop - 1, jtop, ktop - 1) - 1.047935e-01 * utop(itop - 1, jtop, ktop) + 1.397246e-02 * utop(itop - 1, jtop, ktop + 1) - 1.940620e-03 * utop(itop - 1, jtop, ktop + 2) - 3.326777e-04 * utop(itop - 1, jtop + 1, ktop - 2) + 3.104992e-03 * utop(itop - 1, jtop + 1, ktop - 1) + 1.397246e-02 * utop(itop - 1, jtop + 1, ktop) - 1.862995e-03 * utop(itop - 1, jtop + 1, ktop + 1) + 2.587494e-04 * utop(itop - 1, jtop + 1, ktop + 2) + 4.620524e-05 * utop(itop - 1, jtop + 2, ktop - 2) - 4.312489e-04 * utop(itop - 1, jtop + 2, ktop - 1) - 1.940620e-03 * utop(itop - 1, jtop + 2, ktop) + 2.587494e-04 * utop(itop - 1, jtop + 2, ktop + 1) - 3.593741e-05 * utop(itop - 1, jtop + 2, ktop + 2) + 4.455505e-04 * utop(itop, jtop - 2, ktop - 2) - 4.158472e-03 * utop(itop, jtop - 2, ktop - 1) - 1.871312e-02 * utop(itop, jtop - 2, ktop) + 2.495083e-03 * utop(itop, jtop - 2, ktop + 1) - 3.465393e-04 * utop(itop, jtop - 2, ktop + 2) - 4.158472e-03 * utop(itop, jtop - 1, ktop - 2) + 3.881240e-02 * utop(itop, jtop - 1, ktop - 1) + 1.746558e-01 * utop(itop, jtop - 1, ktop) - 2.328744e-02 * utop(itop, jtop - 1, ktop + 1) + 3.234367e-03 * utop(itop, jtop - 1, ktop + 2) - 1.871312e-02 * utop(itop, jtop, ktop - 2) + 1.746558e-01 * utop(itop, jtop, ktop - 1) + 7.859512e-01 * utop(itop, jtop, ktop) - 1.047935e-01 * utop(itop, jtop, ktop + 1) + 1.455465e-02 * utop(itop, jtop, ktop + 2) + 2.495083e-03 * utop(itop, jtop + 1, ktop - 2) - 2.328744e-02 * utop(itop, jtop + 1, ktop - 1) - 1.047935e-01 * utop(itop, jtop + 1, ktop) + 1.397246e-02 * utop(itop, jtop + 1, ktop + 1) - 1.940620e-03 * utop(itop, jtop + 1, ktop + 2) - 3.465393e-04 * utop(itop, jtop + 2, ktop - 2) + 3.234367e-03 * utop(itop, jtop + 2, ktop - 1) + 1.455465e-02 * utop(itop, jtop + 2, ktop) - 1.940620e-03 * utop(itop, jtop + 2, ktop + 1) + 2.695306e-04 * utop(itop, jtop + 2, ktop + 2) + 9.901123e-05 * utop(itop + 1, jtop - 2, ktop - 2) - 9.241048e-04 * utop(itop + 1, jtop - 2, ktop - 1) - 4.158472e-03 * utop(itop + 1, jtop - 2, ktop) + 5.544629e-04 * utop(itop + 1, jtop - 2, ktop + 1) - 7.700874e-05 * utop(itop + 1, jtop - 2, ktop + 2) - 9.241048e-04 * utop(itop + 1, jtop - 1, ktop - 2) + 8.624978e-03 * utop(itop + 1, jtop - 1, ktop - 1) + 3.881240e-02 * utop(itop + 1, jtop - 1, ktop) - 5.174987e-03 * utop(itop + 1, jtop - 1, ktop + 1) + 7.187482e-04 * utop(itop + 1, jtop - 1, ktop + 2) - 4.158472e-03 * utop(itop + 1, jtop, ktop - 2) + 3.881240e-02 * utop(itop + 1, jtop, ktop - 1) + 1.746558e-01 * utop(itop + 1, jtop, ktop) - 2.328744e-02 * utop(itop + 1, jtop, ktop + 1) + 3.234367e-03 * utop(itop + 1, jtop, ktop + 2) + 5.544629e-04 * utop(itop + 1, jtop + 1, ktop - 2) - 5.174987e-03 * utop(itop + 1, jtop + 1, ktop - 1) - 2.328744e-02 * utop(itop + 1, jtop + 1, ktop) + 3.104992e-03 * utop(itop + 1, jtop + 1, ktop + 1) - 4.312489e-04 * utop(itop + 1, jtop + 1, ktop + 2) - 7.700874e-05 * utop(itop + 1, jtop + 2, ktop - 2) + 7.187482e-04 * utop(itop + 1, jtop + 2, ktop - 1) + 3.234367e-03 * utop(itop + 1, jtop + 2, ktop) - 4.312489e-04 * utop(itop + 1, jtop + 2, ktop + 1) + 5.989568e-05 * utop(itop + 1, jtop + 2, ktop + 2) - 1.060835e-05 * utop(itop + 2, jtop - 2, ktop - 2) + 9.901123e-05 * utop(itop + 2, jtop - 2, ktop - 1) + 4.455505e-04 * utop(itop + 2, jtop - 2, ktop) - 5.940674e-05 * utop(itop + 2, jtop - 2, ktop + 1) + 8.250936e-06 * utop(itop + 2, jtop - 2, ktop + 2) + 9.901123e-05 * utop(itop + 2, jtop - 1, ktop - 2) - 9.241048e-04 * utop(itop + 2, jtop - 1, ktop - 1) - 4.158472e-03 * utop(itop + 2, jtop - 1, ktop) + 5.544629e-04 * utop(itop + 2, jtop - 1, ktop + 1) - 7.700874e-05 * utop(itop + 2, jtop - 1, ktop + 2) + 4.455505e-04 * utop(itop + 2, jtop, ktop - 2) - 4.158472e-03 * utop(itop + 2, jtop, ktop - 1) - 1.871312e-02 * utop(itop + 2, jtop, ktop) + 2.495083e-03 * utop(itop + 2, jtop, ktop + 1) - 3.465393e-04 * utop(itop + 2, jtop, ktop + 2) - 5.940674e-05 * utop(itop + 2, jtop + 1, ktop - 2) + 5.544629e-04 * utop(itop + 2, jtop + 1, ktop - 1) + 2.495083e-03 * utop(itop + 2, jtop + 1, ktop) - 3.326777e-04 * utop(itop + 2, jtop + 1, ktop + 1) + 4.620524e-05 * utop(itop + 2, jtop + 1, ktop + 2) + 8.250936e-06 * utop(itop + 2, jtop + 2, ktop - 2) - 7.700874e-05 * utop(itop + 2, jtop + 2, ktop - 1) - 3.465393e-04 * utop(itop + 2, jtop + 2, ktop) + 4.620524e-05 * utop(itop + 2, jtop + 2, ktop + 1) - 6.417395e-06 * utop(itop + 2, jtop + 2, ktop + 2)); + u(i + 1, j + 0, k + 1) = (-6.417395e-06 * utop(itop - 2, jtop - 2, ktop - 2) + 4.620524e-05 * utop(itop - 2, jtop - 2, ktop - 1) - 3.465393e-04 * utop(itop - 2, jtop - 2, ktop) - 7.700874e-05 * utop(itop - 2, jtop - 2, ktop + 1) + 8.250936e-06 * utop(itop - 2, jtop - 2, ktop + 2) + 5.989568e-05 * utop(itop - 2, jtop - 1, ktop - 2) - 4.312489e-04 * utop(itop - 2, jtop - 1, ktop - 1) + 3.234367e-03 * utop(itop - 2, jtop - 1, ktop) + 7.187482e-04 * utop(itop - 2, jtop - 1, ktop + 1) - 7.700874e-05 * utop(itop - 2, jtop - 1, ktop + 2) + 2.695306e-04 * utop(itop - 2, jtop, ktop - 2) - 1.940620e-03 * utop(itop - 2, jtop, ktop - 1) + 1.455465e-02 * utop(itop - 2, jtop, ktop) + 3.234367e-03 * utop(itop - 2, jtop, ktop + 1) - 3.465393e-04 * utop(itop - 2, jtop, ktop + 2) - 3.593741e-05 * utop(itop - 2, jtop + 1, ktop - 2) + 2.587494e-04 * utop(itop - 2, jtop + 1, ktop - 1) - 1.940620e-03 * utop(itop - 2, jtop + 1, ktop) - 4.312489e-04 * utop(itop - 2, jtop + 1, ktop + 1) + 4.620524e-05 * utop(itop - 2, jtop + 1, ktop + 2) + 4.991307e-06 * utop(itop - 2, jtop + 2, ktop - 2) - 3.593741e-05 * utop(itop - 2, jtop + 2, ktop - 1) + 2.695306e-04 * utop(itop - 2, jtop + 2, ktop) + 5.989568e-05 * utop(itop - 2, jtop + 2, ktop + 1) - 6.417395e-06 * utop(itop - 2, jtop + 2, ktop + 2) + 4.620524e-05 * utop(itop - 1, jtop - 2, ktop - 2) - 3.326777e-04 * utop(itop - 1, jtop - 2, ktop - 1) + 2.495083e-03 * utop(itop - 1, jtop - 2, ktop) + 5.544629e-04 * utop(itop - 1, jtop - 2, ktop + 1) - 5.940674e-05 * utop(itop - 1, jtop - 2, ktop + 2) - 4.312489e-04 * utop(itop - 1, jtop - 1, ktop - 2) + 3.104992e-03 * utop(itop - 1, jtop - 1, ktop - 1) - 2.328744e-02 * utop(itop - 1, jtop - 1, ktop) - 5.174987e-03 * utop(itop - 1, jtop - 1, ktop + 1) + 5.544629e-04 * utop(itop - 1, jtop - 1, ktop + 2) - 1.940620e-03 * utop(itop - 1, jtop, ktop - 2) + 1.397246e-02 * utop(itop - 1, jtop, ktop - 1) - 1.047935e-01 * utop(itop - 1, jtop, ktop) - 2.328744e-02 * utop(itop - 1, jtop, ktop + 1) + 2.495083e-03 * utop(itop - 1, jtop, ktop + 2) + 2.587494e-04 * utop(itop - 1, jtop + 1, ktop - 2) - 1.862995e-03 * utop(itop - 1, jtop + 1, ktop - 1) + 1.397246e-02 * utop(itop - 1, jtop + 1, ktop) + 3.104992e-03 * utop(itop - 1, jtop + 1, ktop + 1) - 3.326777e-04 * utop(itop - 1, jtop + 1, ktop + 2) - 3.593741e-05 * utop(itop - 1, jtop + 2, ktop - 2) + 2.587494e-04 * utop(itop - 1, jtop + 2, ktop - 1) - 1.940620e-03 * utop(itop - 1, jtop + 2, ktop) - 4.312489e-04 * utop(itop - 1, jtop + 2, ktop + 1) + 4.620524e-05 * utop(itop - 1, jtop + 2, ktop + 2) - 3.465393e-04 * utop(itop, jtop - 2, ktop - 2) + 2.495083e-03 * utop(itop, jtop - 2, ktop - 1) - 1.871312e-02 * utop(itop, jtop - 2, ktop) - 4.158472e-03 * utop(itop, jtop - 2, ktop + 1) + 4.455505e-04 * utop(itop, jtop - 2, ktop + 2) + 3.234367e-03 * utop(itop, jtop - 1, ktop - 2) - 2.328744e-02 * utop(itop, jtop - 1, ktop - 1) + 1.746558e-01 * utop(itop, jtop - 1, ktop) + 3.881240e-02 * utop(itop, jtop - 1, ktop + 1) - 4.158472e-03 * utop(itop, jtop - 1, ktop + 2) + 1.455465e-02 * utop(itop, jtop, ktop - 2) - 1.047935e-01 * utop(itop, jtop, ktop - 1) + 7.859512e-01 * utop(itop, jtop, ktop) + 1.746558e-01 * utop(itop, jtop, ktop + 1) - 1.871312e-02 * utop(itop, jtop, ktop + 2) - 1.940620e-03 * utop(itop, jtop + 1, ktop - 2) + 1.397246e-02 * utop(itop, jtop + 1, ktop - 1) - 1.047935e-01 * utop(itop, jtop + 1, ktop) - 2.328744e-02 * utop(itop, jtop + 1, ktop + 1) + 2.495083e-03 * utop(itop, jtop + 1, ktop + 2) + 2.695306e-04 * utop(itop, jtop + 2, ktop - 2) - 1.940620e-03 * utop(itop, jtop + 2, ktop - 1) + 1.455465e-02 * utop(itop, jtop + 2, ktop) + 3.234367e-03 * utop(itop, jtop + 2, ktop + 1) - 3.465393e-04 * utop(itop, jtop + 2, ktop + 2) - 7.700874e-05 * utop(itop + 1, jtop - 2, ktop - 2) + 5.544629e-04 * utop(itop + 1, jtop - 2, ktop - 1) - 4.158472e-03 * utop(itop + 1, jtop - 2, ktop) - 9.241048e-04 * utop(itop + 1, jtop - 2, ktop + 1) + 9.901123e-05 * utop(itop + 1, jtop - 2, ktop + 2) + 7.187482e-04 * utop(itop + 1, jtop - 1, ktop - 2) - 5.174987e-03 * utop(itop + 1, jtop - 1, ktop - 1) + 3.881240e-02 * utop(itop + 1, jtop - 1, ktop) + 8.624978e-03 * utop(itop + 1, jtop - 1, ktop + 1) - 9.241048e-04 * utop(itop + 1, jtop - 1, ktop + 2) + 3.234367e-03 * utop(itop + 1, jtop, ktop - 2) - 2.328744e-02 * utop(itop + 1, jtop, ktop - 1) + 1.746558e-01 * utop(itop + 1, jtop, ktop) + 3.881240e-02 * utop(itop + 1, jtop, ktop + 1) - 4.158472e-03 * utop(itop + 1, jtop, ktop + 2) - 4.312489e-04 * utop(itop + 1, jtop + 1, ktop - 2) + 3.104992e-03 * utop(itop + 1, jtop + 1, ktop - 1) - 2.328744e-02 * utop(itop + 1, jtop + 1, ktop) - 5.174987e-03 * utop(itop + 1, jtop + 1, ktop + 1) + 5.544629e-04 * utop(itop + 1, jtop + 1, ktop + 2) + 5.989568e-05 * utop(itop + 1, jtop + 2, ktop - 2) - 4.312489e-04 * utop(itop + 1, jtop + 2, ktop - 1) + 3.234367e-03 * utop(itop + 1, jtop + 2, ktop) + 7.187482e-04 * utop(itop + 1, jtop + 2, ktop + 1) - 7.700874e-05 * utop(itop + 1, jtop + 2, ktop + 2) + 8.250936e-06 * utop(itop + 2, jtop - 2, ktop - 2) - 5.940674e-05 * utop(itop + 2, jtop - 2, ktop - 1) + 4.455505e-04 * utop(itop + 2, jtop - 2, ktop) + 9.901123e-05 * utop(itop + 2, jtop - 2, ktop + 1) - 1.060835e-05 * utop(itop + 2, jtop - 2, ktop + 2) - 7.700874e-05 * utop(itop + 2, jtop - 1, ktop - 2) + 5.544629e-04 * utop(itop + 2, jtop - 1, ktop - 1) - 4.158472e-03 * utop(itop + 2, jtop - 1, ktop) - 9.241048e-04 * utop(itop + 2, jtop - 1, ktop + 1) + 9.901123e-05 * utop(itop + 2, jtop - 1, ktop + 2) - 3.465393e-04 * utop(itop + 2, jtop, ktop - 2) + 2.495083e-03 * utop(itop + 2, jtop, ktop - 1) - 1.871312e-02 * utop(itop + 2, jtop, ktop) - 4.158472e-03 * utop(itop + 2, jtop, ktop + 1) + 4.455505e-04 * utop(itop + 2, jtop, ktop + 2) + 4.620524e-05 * utop(itop + 2, jtop + 1, ktop - 2) - 3.326777e-04 * utop(itop + 2, jtop + 1, ktop - 1) + 2.495083e-03 * utop(itop + 2, jtop + 1, ktop) + 5.544629e-04 * utop(itop + 2, jtop + 1, ktop + 1) - 5.940674e-05 * utop(itop + 2, jtop + 1, ktop + 2) - 6.417395e-06 * utop(itop + 2, jtop + 2, ktop - 2) + 4.620524e-05 * utop(itop + 2, jtop + 2, ktop - 1) - 3.465393e-04 * utop(itop + 2, jtop + 2, ktop) - 7.700874e-05 * utop(itop + 2, jtop + 2, ktop + 1) + 8.250936e-06 * utop(itop + 2, jtop + 2, ktop + 2)); + u(i + 1, j + 1, k + 0) = (-6.417395e-06 * utop(itop - 2, jtop - 2, ktop - 2) + 5.989568e-05 * utop(itop - 2, jtop - 2, ktop - 1) + 2.695306e-04 * utop(itop - 2, jtop - 2, ktop) - 3.593741e-05 * utop(itop - 2, jtop - 2, ktop + 1) + 4.991307e-06 * utop(itop - 2, jtop - 2, ktop + 2) + 4.620524e-05 * utop(itop - 2, jtop - 1, ktop - 2) - 4.312489e-04 * utop(itop - 2, jtop - 1, ktop - 1) - 1.940620e-03 * utop(itop - 2, jtop - 1, ktop) + 2.587494e-04 * utop(itop - 2, jtop - 1, ktop + 1) - 3.593741e-05 * utop(itop - 2, jtop - 1, ktop + 2) - 3.465393e-04 * utop(itop - 2, jtop, ktop - 2) + 3.234367e-03 * utop(itop - 2, jtop, ktop - 1) + 1.455465e-02 * utop(itop - 2, jtop, ktop) - 1.940620e-03 * utop(itop - 2, jtop, ktop + 1) + 2.695306e-04 * utop(itop - 2, jtop, ktop + 2) - 7.700874e-05 * utop(itop - 2, jtop + 1, ktop - 2) + 7.187482e-04 * utop(itop - 2, jtop + 1, ktop - 1) + 3.234367e-03 * utop(itop - 2, jtop + 1, ktop) - 4.312489e-04 * utop(itop - 2, jtop + 1, ktop + 1) + 5.989568e-05 * utop(itop - 2, jtop + 1, ktop + 2) + 8.250936e-06 * utop(itop - 2, jtop + 2, ktop - 2) - 7.700874e-05 * utop(itop - 2, jtop + 2, ktop - 1) - 3.465393e-04 * utop(itop - 2, jtop + 2, ktop) + 4.620524e-05 * utop(itop - 2, jtop + 2, ktop + 1) - 6.417395e-06 * utop(itop - 2, jtop + 2, ktop + 2) + 4.620524e-05 * utop(itop - 1, jtop - 2, ktop - 2) - 4.312489e-04 * utop(itop - 1, jtop - 2, ktop - 1) - 1.940620e-03 * utop(itop - 1, jtop - 2, ktop) + 2.587494e-04 * utop(itop - 1, jtop - 2, ktop + 1) - 3.593741e-05 * utop(itop - 1, jtop - 2, ktop + 2) - 3.326777e-04 * utop(itop - 1, jtop - 1, ktop - 2) + 3.104992e-03 * utop(itop - 1, jtop - 1, ktop - 1) + 1.397246e-02 * utop(itop - 1, jtop - 1, ktop) - 1.862995e-03 * utop(itop - 1, jtop - 1, ktop + 1) + 2.587494e-04 * utop(itop - 1, jtop - 1, ktop + 2) + 2.495083e-03 * utop(itop - 1, jtop, ktop - 2) - 2.328744e-02 * utop(itop - 1, jtop, ktop - 1) - 1.047935e-01 * utop(itop - 1, jtop, ktop) + 1.397246e-02 * utop(itop - 1, jtop, ktop + 1) - 1.940620e-03 * utop(itop - 1, jtop, ktop + 2) + 5.544629e-04 * utop(itop - 1, jtop + 1, ktop - 2) - 5.174987e-03 * utop(itop - 1, jtop + 1, ktop - 1) - 2.328744e-02 * utop(itop - 1, jtop + 1, ktop) + 3.104992e-03 * utop(itop - 1, jtop + 1, ktop + 1) - 4.312489e-04 * utop(itop - 1, jtop + 1, ktop + 2) - 5.940674e-05 * utop(itop - 1, jtop + 2, ktop - 2) + 5.544629e-04 * utop(itop - 1, jtop + 2, ktop - 1) + 2.495083e-03 * utop(itop - 1, jtop + 2, ktop) - 3.326777e-04 * utop(itop - 1, jtop + 2, ktop + 1) + 4.620524e-05 * utop(itop - 1, jtop + 2, ktop + 2) - 3.465393e-04 * utop(itop, jtop - 2, ktop - 2) + 3.234367e-03 * utop(itop, jtop - 2, ktop - 1) + 1.455465e-02 * utop(itop, jtop - 2, ktop) - 1.940620e-03 * utop(itop, jtop - 2, ktop + 1) + 2.695306e-04 * utop(itop, jtop - 2, ktop + 2) + 2.495083e-03 * utop(itop, jtop - 1, ktop - 2) - 2.328744e-02 * utop(itop, jtop - 1, ktop - 1) - 1.047935e-01 * utop(itop, jtop - 1, ktop) + 1.397246e-02 * utop(itop, jtop - 1, ktop + 1) - 1.940620e-03 * utop(itop, jtop - 1, ktop + 2) - 1.871312e-02 * utop(itop, jtop, ktop - 2) + 1.746558e-01 * utop(itop, jtop, ktop - 1) + 7.859512e-01 * utop(itop, jtop, ktop) - 1.047935e-01 * utop(itop, jtop, ktop + 1) + 1.455465e-02 * utop(itop, jtop, ktop + 2) - 4.158472e-03 * utop(itop, jtop + 1, ktop - 2) + 3.881240e-02 * utop(itop, jtop + 1, ktop - 1) + 1.746558e-01 * utop(itop, jtop + 1, ktop) - 2.328744e-02 * utop(itop, jtop + 1, ktop + 1) + 3.234367e-03 * utop(itop, jtop + 1, ktop + 2) + 4.455505e-04 * utop(itop, jtop + 2, ktop - 2) - 4.158472e-03 * utop(itop, jtop + 2, ktop - 1) - 1.871312e-02 * utop(itop, jtop + 2, ktop) + 2.495083e-03 * utop(itop, jtop + 2, ktop + 1) - 3.465393e-04 * utop(itop, jtop + 2, ktop + 2) - 7.700874e-05 * utop(itop + 1, jtop - 2, ktop - 2) + 7.187482e-04 * utop(itop + 1, jtop - 2, ktop - 1) + 3.234367e-03 * utop(itop + 1, jtop - 2, ktop) - 4.312489e-04 * utop(itop + 1, jtop - 2, ktop + 1) + 5.989568e-05 * utop(itop + 1, jtop - 2, ktop + 2) + 5.544629e-04 * utop(itop + 1, jtop - 1, ktop - 2) - 5.174987e-03 * utop(itop + 1, jtop - 1, ktop - 1) - 2.328744e-02 * utop(itop + 1, jtop - 1, ktop) + 3.104992e-03 * utop(itop + 1, jtop - 1, ktop + 1) - 4.312489e-04 * utop(itop + 1, jtop - 1, ktop + 2) - 4.158472e-03 * utop(itop + 1, jtop, ktop - 2) + 3.881240e-02 * utop(itop + 1, jtop, ktop - 1) + 1.746558e-01 * utop(itop + 1, jtop, ktop) - 2.328744e-02 * utop(itop + 1, jtop, ktop + 1) + 3.234367e-03 * utop(itop + 1, jtop, ktop + 2) - 9.241048e-04 * utop(itop + 1, jtop + 1, ktop - 2) + 8.624978e-03 * utop(itop + 1, jtop + 1, ktop - 1) + 3.881240e-02 * utop(itop + 1, jtop + 1, ktop) - 5.174987e-03 * utop(itop + 1, jtop + 1, ktop + 1) + 7.187482e-04 * utop(itop + 1, jtop + 1, ktop + 2) + 9.901123e-05 * utop(itop + 1, jtop + 2, ktop - 2) - 9.241048e-04 * utop(itop + 1, jtop + 2, ktop - 1) - 4.158472e-03 * utop(itop + 1, jtop + 2, ktop) + 5.544629e-04 * utop(itop + 1, jtop + 2, ktop + 1) - 7.700874e-05 * utop(itop + 1, jtop + 2, ktop + 2) + 8.250936e-06 * utop(itop + 2, jtop - 2, ktop - 2) - 7.700874e-05 * utop(itop + 2, jtop - 2, ktop - 1) - 3.465393e-04 * utop(itop + 2, jtop - 2, ktop) + 4.620524e-05 * utop(itop + 2, jtop - 2, ktop + 1) - 6.417395e-06 * utop(itop + 2, jtop - 2, ktop + 2) - 5.940674e-05 * utop(itop + 2, jtop - 1, ktop - 2) + 5.544629e-04 * utop(itop + 2, jtop - 1, ktop - 1) + 2.495083e-03 * utop(itop + 2, jtop - 1, ktop) - 3.326777e-04 * utop(itop + 2, jtop - 1, ktop + 1) + 4.620524e-05 * utop(itop + 2, jtop - 1, ktop + 2) + 4.455505e-04 * utop(itop + 2, jtop, ktop - 2) - 4.158472e-03 * utop(itop + 2, jtop, ktop - 1) - 1.871312e-02 * utop(itop + 2, jtop, ktop) + 2.495083e-03 * utop(itop + 2, jtop, ktop + 1) - 3.465393e-04 * utop(itop + 2, jtop, ktop + 2) + 9.901123e-05 * utop(itop + 2, jtop + 1, ktop - 2) - 9.241048e-04 * utop(itop + 2, jtop + 1, ktop - 1) - 4.158472e-03 * utop(itop + 2, jtop + 1, ktop) + 5.544629e-04 * utop(itop + 2, jtop + 1, ktop + 1) - 7.700874e-05 * utop(itop + 2, jtop + 1, ktop + 2) - 1.060835e-05 * utop(itop + 2, jtop + 2, ktop - 2) + 9.901123e-05 * utop(itop + 2, jtop + 2, ktop - 1) + 4.455505e-04 * utop(itop + 2, jtop + 2, ktop) - 5.940674e-05 * utop(itop + 2, jtop + 2, ktop + 1) + 8.250936e-06 * utop(itop + 2, jtop + 2, ktop + 2)); + u(i + 1, j + 1, k + 1) = (+4.991307e-06 * utop(itop - 2, jtop - 2, ktop - 2) - 3.593741e-05 * utop(itop - 2, jtop - 2, ktop - 1) + 2.695306e-04 * utop(itop - 2, jtop - 2, ktop) + 5.989568e-05 * utop(itop - 2, jtop - 2, ktop + 1) - 6.417395e-06 * utop(itop - 2, jtop - 2, ktop + 2) - 3.593741e-05 * utop(itop - 2, jtop - 1, ktop - 2) + 2.587494e-04 * utop(itop - 2, jtop - 1, ktop - 1) - 1.940620e-03 * utop(itop - 2, jtop - 1, ktop) - 4.312489e-04 * utop(itop - 2, jtop - 1, ktop + 1) + 4.620524e-05 * utop(itop - 2, jtop - 1, ktop + 2) + 2.695306e-04 * utop(itop - 2, jtop, ktop - 2) - 1.940620e-03 * utop(itop - 2, jtop, ktop - 1) + 1.455465e-02 * utop(itop - 2, jtop, ktop) + 3.234367e-03 * utop(itop - 2, jtop, ktop + 1) - 3.465393e-04 * utop(itop - 2, jtop, ktop + 2) + 5.989568e-05 * utop(itop - 2, jtop + 1, ktop - 2) - 4.312489e-04 * utop(itop - 2, jtop + 1, ktop - 1) + 3.234367e-03 * utop(itop - 2, jtop + 1, ktop) + 7.187482e-04 * utop(itop - 2, jtop + 1, ktop + 1) - 7.700874e-05 * utop(itop - 2, jtop + 1, ktop + 2) - 6.417395e-06 * utop(itop - 2, jtop + 2, ktop - 2) + 4.620524e-05 * utop(itop - 2, jtop + 2, ktop - 1) - 3.465393e-04 * utop(itop - 2, jtop + 2, ktop) - 7.700874e-05 * utop(itop - 2, jtop + 2, ktop + 1) + 8.250936e-06 * utop(itop - 2, jtop + 2, ktop + 2) - 3.593741e-05 * utop(itop - 1, jtop - 2, ktop - 2) + 2.587494e-04 * utop(itop - 1, jtop - 2, ktop - 1) - 1.940620e-03 * utop(itop - 1, jtop - 2, ktop) - 4.312489e-04 * utop(itop - 1, jtop - 2, ktop + 1) + 4.620524e-05 * utop(itop - 1, jtop - 2, ktop + 2) + 2.587494e-04 * utop(itop - 1, jtop - 1, ktop - 2) - 1.862995e-03 * utop(itop - 1, jtop - 1, ktop - 1) + 1.397246e-02 * utop(itop - 1, jtop - 1, ktop) + 3.104992e-03 * utop(itop - 1, jtop - 1, ktop + 1) - 3.326777e-04 * utop(itop - 1, jtop - 1, ktop + 2) - 1.940620e-03 * utop(itop - 1, jtop, ktop - 2) + 1.397246e-02 * utop(itop - 1, jtop, ktop - 1) - 1.047935e-01 * utop(itop - 1, jtop, ktop) - 2.328744e-02 * utop(itop - 1, jtop, ktop + 1) + 2.495083e-03 * utop(itop - 1, jtop, ktop + 2) - 4.312489e-04 * utop(itop - 1, jtop + 1, ktop - 2) + 3.104992e-03 * utop(itop - 1, jtop + 1, ktop - 1) - 2.328744e-02 * utop(itop - 1, jtop + 1, ktop) - 5.174987e-03 * utop(itop - 1, jtop + 1, ktop + 1) + 5.544629e-04 * utop(itop - 1, jtop + 1, ktop + 2) + 4.620524e-05 * utop(itop - 1, jtop + 2, ktop - 2) - 3.326777e-04 * utop(itop - 1, jtop + 2, ktop - 1) + 2.495083e-03 * utop(itop - 1, jtop + 2, ktop) + 5.544629e-04 * utop(itop - 1, jtop + 2, ktop + 1) - 5.940674e-05 * utop(itop - 1, jtop + 2, ktop + 2) + 2.695306e-04 * utop(itop, jtop - 2, ktop - 2) - 1.940620e-03 * utop(itop, jtop - 2, ktop - 1) + 1.455465e-02 * utop(itop, jtop - 2, ktop) + 3.234367e-03 * utop(itop, jtop - 2, ktop + 1) - 3.465393e-04 * utop(itop, jtop - 2, ktop + 2) - 1.940620e-03 * utop(itop, jtop - 1, ktop - 2) + 1.397246e-02 * utop(itop, jtop - 1, ktop - 1) - 1.047935e-01 * utop(itop, jtop - 1, ktop) - 2.328744e-02 * utop(itop, jtop - 1, ktop + 1) + 2.495083e-03 * utop(itop, jtop - 1, ktop + 2) + 1.455465e-02 * utop(itop, jtop, ktop - 2) - 1.047935e-01 * utop(itop, jtop, ktop - 1) + 7.859512e-01 * utop(itop, jtop, ktop) + 1.746558e-01 * utop(itop, jtop, ktop + 1) - 1.871312e-02 * utop(itop, jtop, ktop + 2) + 3.234367e-03 * utop(itop, jtop + 1, ktop - 2) - 2.328744e-02 * utop(itop, jtop + 1, ktop - 1) + 1.746558e-01 * utop(itop, jtop + 1, ktop) + 3.881240e-02 * utop(itop, jtop + 1, ktop + 1) - 4.158472e-03 * utop(itop, jtop + 1, ktop + 2) - 3.465393e-04 * utop(itop, jtop + 2, ktop - 2) + 2.495083e-03 * utop(itop, jtop + 2, ktop - 1) - 1.871312e-02 * utop(itop, jtop + 2, ktop) - 4.158472e-03 * utop(itop, jtop + 2, ktop + 1) + 4.455505e-04 * utop(itop, jtop + 2, ktop + 2) + 5.989568e-05 * utop(itop + 1, jtop - 2, ktop - 2) - 4.312489e-04 * utop(itop + 1, jtop - 2, ktop - 1) + 3.234367e-03 * utop(itop + 1, jtop - 2, ktop) + 7.187482e-04 * utop(itop + 1, jtop - 2, ktop + 1) - 7.700874e-05 * utop(itop + 1, jtop - 2, ktop + 2) - 4.312489e-04 * utop(itop + 1, jtop - 1, ktop - 2) + 3.104992e-03 * utop(itop + 1, jtop - 1, ktop - 1) - 2.328744e-02 * utop(itop + 1, jtop - 1, ktop) - 5.174987e-03 * utop(itop + 1, jtop - 1, ktop + 1) + 5.544629e-04 * utop(itop + 1, jtop - 1, ktop + 2) + 3.234367e-03 * utop(itop + 1, jtop, ktop - 2) - 2.328744e-02 * utop(itop + 1, jtop, ktop - 1) + 1.746558e-01 * utop(itop + 1, jtop, ktop) + 3.881240e-02 * utop(itop + 1, jtop, ktop + 1) - 4.158472e-03 * utop(itop + 1, jtop, ktop + 2) + 7.187482e-04 * utop(itop + 1, jtop + 1, ktop - 2) - 5.174987e-03 * utop(itop + 1, jtop + 1, ktop - 1) + 3.881240e-02 * utop(itop + 1, jtop + 1, ktop) + 8.624978e-03 * utop(itop + 1, jtop + 1, ktop + 1) - 9.241048e-04 * utop(itop + 1, jtop + 1, ktop + 2) - 7.700874e-05 * utop(itop + 1, jtop + 2, ktop - 2) + 5.544629e-04 * utop(itop + 1, jtop + 2, ktop - 1) - 4.158472e-03 * utop(itop + 1, jtop + 2, ktop) - 9.241048e-04 * utop(itop + 1, jtop + 2, ktop + 1) + 9.901123e-05 * utop(itop + 1, jtop + 2, ktop + 2) - 6.417395e-06 * utop(itop + 2, jtop - 2, ktop - 2) + 4.620524e-05 * utop(itop + 2, jtop - 2, ktop - 1) - 3.465393e-04 * utop(itop + 2, jtop - 2, ktop) - 7.700874e-05 * utop(itop + 2, jtop - 2, ktop + 1) + 8.250936e-06 * utop(itop + 2, jtop - 2, ktop + 2) + 4.620524e-05 * utop(itop + 2, jtop - 1, ktop - 2) - 3.326777e-04 * utop(itop + 2, jtop - 1, ktop - 1) + 2.495083e-03 * utop(itop + 2, jtop - 1, ktop) + 5.544629e-04 * utop(itop + 2, jtop - 1, ktop + 1) - 5.940674e-05 * utop(itop + 2, jtop - 1, ktop + 2) - 3.465393e-04 * utop(itop + 2, jtop, ktop - 2) + 2.495083e-03 * utop(itop + 2, jtop, ktop - 1) - 1.871312e-02 * utop(itop + 2, jtop, ktop) - 4.158472e-03 * utop(itop + 2, jtop, ktop + 1) + 4.455505e-04 * utop(itop + 2, jtop, ktop + 2) - 7.700874e-05 * utop(itop + 2, jtop + 1, ktop - 2) + 5.544629e-04 * utop(itop + 2, jtop + 1, ktop - 1) - 4.158472e-03 * utop(itop + 2, jtop + 1, ktop) - 9.241048e-04 * utop(itop + 2, jtop + 1, ktop + 1) + 9.901123e-05 * utop(itop + 2, jtop + 1, ktop + 2) + 8.250936e-06 * utop(itop + 2, jtop + 2, ktop - 2) - 5.940674e-05 * utop(itop + 2, jtop + 2, ktop - 1) + 4.455505e-04 * utop(itop + 2, jtop + 2, ktop) + 9.901123e-05 * utop(itop + 2, jtop + 2, ktop + 1) - 1.060835e-05 * utop(itop + 2, jtop + 2, ktop + 2)); } - -template< class S, class O, typename T > -void solver::interp_coarse_fine_cubic( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf=false ) +template +void solver::interp_coarse_fine_cubic(unsigned ilevel, MeshvarBnd &coarse, MeshvarBnd &fine, bool bcf = false) { - - MeshvarBnd *u = &fine; - MeshvarBnd *utop = &coarse; - - + + MeshvarBnd *u = &fine; + MeshvarBnd *utop = &coarse; + bcf = true; - + int - xoff = u->offset(0), - yoff = u->offset(1), - zoff = u->offset(2); - + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); + //... don't do anything if we are not an additional refinement region - if( ilevel <= m_ilevelmin ) + if (ilevel <= m_ilevelmin) return; - + int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - - for( int j=0; jsize(0), + ny = u->size(1), + nz = u->size(2); + + for (int j = 0; j < ny; ++j) + for (int k = 0; k < nz; ++k) { - int jtop = (int)(0.5*(double)(j))+yoff; - int ktop = (int)(0.5*(double)(k))+zoff; - - interp_cubic( coarse, fine, -2, j, k, xoff-1, jtop, ktop ); - interp_cubic( coarse, fine, nz, j, k, xoff+nz/2, jtop, ktop ); - + int jtop = (int)(0.5 * (double)(j)) + yoff; + int ktop = (int)(0.5 * (double)(k)) + zoff; + + interp_cubic(coarse, fine, -2, j, k, xoff - 1, jtop, ktop); + interp_cubic(coarse, fine, nz, j, k, xoff + nz / 2, jtop, ktop); } - - for( int i=0; i -void solver::interp_coarse_fine( unsigned ilevel, MeshvarBnd& coarse, MeshvarBnd& fine, bool bcf ) +template +void solver::interp_coarse_fine(unsigned ilevel, MeshvarBnd &coarse, MeshvarBnd &fine, bool bcf) { - MeshvarBnd *u = &fine; - MeshvarBnd *utop = &coarse; - - - bcf = true;; - //bcf = false; - + MeshvarBnd *u = &fine; + MeshvarBnd *utop = &coarse; + + bcf = true; + ; + // bcf = false; + int - xoff = u->offset(0), - yoff = u->offset(1), - zoff = u->offset(2); + xoff = u->offset(0), + yoff = u->offset(1), + zoff = u->offset(2); //... don't do anything if we are not an additional refinement region - if( xoff == 0 && yoff == 0 && zoff == 0 ) + if (xoff == 0 && yoff == 0 && zoff == 0) return; - + int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + //... set boundary condition for fine grid - - #pragma omp parallel for schedule(dynamic) - for( int ix=-1; ix<=nx; ++ix ) - for( int iy=-1; iy<=ny; ++iy ) - for( int iz=-1; iz<=nz; ++iz ) + +#pragma omp parallel for schedule(dynamic) + for (int ix = -1; ix <= nx; ++ix) + for (int iy = -1; iy <= ny; ++iy) + for (int iz = -1; iz <= nz; ++iz) { - bool xbnd=(ix==-1||ix==nx),ybnd=(iy==-1||iy==ny),zbnd=(iz==-1||iz==nz); - - //if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz) - if( xbnd || ybnd || zbnd ) - //if( xbnd ^ ybnd ^ zbnd ) + bool xbnd = (ix == -1 || ix == nx), ybnd = (iy == -1 || iy == ny), zbnd = (iz == -1 || iz == nz); + + // if(ix==-1||ix==nx||iy==-1||iy==ny||iz==-1||iz==nz) + if (xbnd || ybnd || zbnd) + // if( xbnd ^ ybnd ^ zbnd ) { - + //... only deal with proper ghostzones - if( (xbnd&&ybnd) || (xbnd&&zbnd) || (ybnd&&zbnd) || (xbnd&&ybnd&&zbnd)) + if ((xbnd && ybnd) || (xbnd && zbnd) || (ybnd && zbnd) || (xbnd && ybnd && zbnd)) continue; - + /*int ixtop = (int)(0.5*(double)(ix+2*xoff)+1e-3); int iytop = (int)(0.5*(double)(iy+2*yoff)+1e-3); int iztop = (int)(0.5*(double)(iz+2*zoff)+1e-3);*/ - - int ixtop = (int)(0.5*(double)(ix))+xoff; - int iytop = (int)(0.5*(double)(iy))+yoff; - int iztop = (int)(0.5*(double)(iz))+zoff; - - if( ix==-1 ) ixtop=xoff-1; - if( iy==-1 ) iytop=yoff-1; - if( iz==-1 ) iztop=zoff-1; - - double ustar1, ustar2, ustar3, uhat; - double fac = 0.5;//0.25; - double flux;; - if( ix == -1 && iy%2==0 && iz%2==0 ) + + int ixtop = (int)(0.5 * (double)(ix)) + xoff; + int iytop = (int)(0.5 * (double)(iy)) + yoff; + int iztop = (int)(0.5 * (double)(iz)) + zoff; + + if (ix == -1) + ixtop = xoff - 1; + if (iy == -1) + iytop = yoff - 1; + if (iz == -1) + iztop = zoff - 1; + + double ustar1, ustar2, ustar3, uhat; + double fac = 0.5; // 0.25; + double flux; + ; + if (ix == -1 && iy % 2 == 0 && iz % 2 == 0) { flux = 0.0; - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - { - ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) ); - ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) ); - ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) ); - - uhat = interp2( /*-1.0, 0.0, 1.0, */ustar1, ustar2, ustar3, fac*((double)k-0.5) ); - + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + { + ustar1 = interp2((*utop)(ixtop, iytop - 1, iztop - 1), (*utop)(ixtop, iytop, iztop - 1), (*utop)(ixtop, iytop + 1, iztop - 1), fac * ((double)j - 0.5)); + ustar2 = interp2((*utop)(ixtop, iytop - 1, iztop), (*utop)(ixtop, iytop, iztop), (*utop)(ixtop, iytop + 1, iztop), fac * ((double)j - 0.5)); + ustar3 = interp2((*utop)(ixtop, iytop - 1, iztop + 1), (*utop)(ixtop, iytop, iztop + 1), (*utop)(ixtop, iytop + 1, iztop + 1), fac * ((double)j - 0.5)); + + uhat = interp2(/*-1.0, 0.0, 1.0, */ ustar1, ustar2, ustar3, fac * ((double)k - 0.5)); + //(*u)(ix,iy+j,iz+k) = 0.0;//(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k), -1.0 ); - - (*u)(ix,iy+j,iz+k) = interp2left( uhat, (*u)(ix+1,iy+j,iz+k), (*u)(ix+2,iy+j,iz+k) ); - - flux += ((*u)(ix+1,iy+j,iz+k)-(*u)(ix,iy+j,iz+k)); + + (*u)(ix, iy + j, iz + k) = interp2left(uhat, (*u)(ix + 1, iy + j, iz + k), (*u)(ix + 2, iy + j, iz + k)); + + flux += ((*u)(ix + 1, iy + j, iz + k) - (*u)(ix, iy + j, iz + k)); } - - - + flux /= 4.0; - - double dflux = ((*utop)(ixtop+1,iytop,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; - - //dflux *= 2.0; - - if( bcf ) - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - (*u)(ix,iy+j,iz+k) -= dflux; + + double dflux = ((*utop)(ixtop + 1, iytop, iztop) - (*utop)(ixtop, iytop, iztop)) / 2.0 - flux; + + // dflux *= 2.0; + + if (bcf) + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + (*u)(ix, iy + j, iz + k) -= dflux; else - (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop+1,iytop,iztop) - 2.0*flux; - - + (*utop)(ixtop, iytop, iztop) = (*utop)(ixtop + 1, iytop, iztop) - 2.0 * flux; } // right boundary - if( ix == nx && iy%2==0 && iz%2==0 ) + if (ix == nx && iy % 2 == 0 && iz % 2 == 0) { flux = 0.0; - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - { - ustar1 = interp2( (*utop)(ixtop,iytop-1,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop,iytop+1,iztop-1), fac*((double)j-0.5) ); - ustar2 = interp2( (*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop,iytop+1,iztop), fac*((double)j-0.5) ); - ustar3 = interp2( (*utop)(ixtop,iytop-1,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop,iytop+1,iztop+1), fac*((double)j-0.5) ); - - uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); - + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + { + ustar1 = interp2((*utop)(ixtop, iytop - 1, iztop - 1), (*utop)(ixtop, iytop, iztop - 1), (*utop)(ixtop, iytop + 1, iztop - 1), fac * ((double)j - 0.5)); + ustar2 = interp2((*utop)(ixtop, iytop - 1, iztop), (*utop)(ixtop, iytop, iztop), (*utop)(ixtop, iytop + 1, iztop), fac * ((double)j - 0.5)); + ustar3 = interp2((*utop)(ixtop, iytop - 1, iztop + 1), (*utop)(ixtop, iytop, iztop + 1), (*utop)(ixtop, iytop + 1, iztop + 1), fac * ((double)j - 0.5)); + + uhat = interp2(-1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac * ((double)k - 0.5)); + //(*u)(ix,iy+j,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix-1,iy+j,iz+k), (*u)(ix-2,iy+j,iz+k), 1.0 ); - (*u)(ix,iy+j,iz+k) = interp2right( (*u)(ix-2,iy+j,iz+k), (*u)(ix-1,iy+j,iz+k), uhat ); - flux += ((*u)(ix,iy+j,iz+k)-(*u)(ix-1,iy+j,iz+k)); + (*u)(ix, iy + j, iz + k) = interp2right((*u)(ix - 2, iy + j, iz + k), (*u)(ix - 1, iy + j, iz + k), uhat); + flux += ((*u)(ix, iy + j, iz + k) - (*u)(ix - 1, iy + j, iz + k)); } flux /= 4.0; - - - double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop-1,iytop,iztop))/2.0 - flux; - //dflux *= 2.0; - - if( bcf ) - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - (*u)(ix,iy+j,iz+k) += dflux; + + double dflux = ((*utop)(ixtop, iytop, iztop) - (*utop)(ixtop - 1, iytop, iztop)) / 2.0 - flux; + // dflux *= 2.0; + + if (bcf) + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + (*u)(ix, iy + j, iz + k) += dflux; else - (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop-1,iytop,iztop) + 2.0*flux; - + (*utop)(ixtop, iytop, iztop) = (*utop)(ixtop - 1, iytop, iztop) + 2.0 * flux; } // bottom boundary - if( iy == -1 && ix%2==0 && iz%2==0 ) + if (iy == -1 && ix % 2 == 0 && iz % 2 == 0) { flux = 0.0; - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) { - ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) ); - ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); - ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) ); - - uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); - + ustar1 = interp2((*utop)(ixtop - 1, iytop, iztop - 1), (*utop)(ixtop, iytop, iztop - 1), (*utop)(ixtop + 1, iytop, iztop - 1), fac * (j - 0.5)); + ustar2 = interp2((*utop)(ixtop - 1, iytop, iztop), (*utop)(ixtop, iytop, iztop), (*utop)(ixtop + 1, iytop, iztop), fac * (j - 0.5)); + ustar3 = interp2((*utop)(ixtop - 1, iytop, iztop + 1), (*utop)(ixtop, iytop, iztop + 1), (*utop)(ixtop + 1, iytop, iztop + 1), fac * (j - 0.5)); + + uhat = interp2(-1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac * ((double)k - 0.5)); + //(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k), -1.0 ); - (*u)(ix+j,iy,iz+k) = interp2left( uhat, (*u)(ix+j,iy+1,iz+k), (*u)(ix+j,iy+2,iz+k) ); - - flux += ((*u)(ix+j,iy+1,iz+k)-(*u)(ix+j,iy,iz+k)); + (*u)(ix + j, iy, iz + k) = interp2left(uhat, (*u)(ix + j, iy + 1, iz + k), (*u)(ix + j, iy + 2, iz + k)); + + flux += ((*u)(ix + j, iy + 1, iz + k) - (*u)(ix + j, iy, iz + k)); } flux /= 4.0; //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - flux; - double dflux = ((*utop)(ixtop,iytop+1,iztop)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; - //dflux *= 2.0; - if( bcf ) - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - (*u)(ix+j,iy,iz+k) -= dflux; + double dflux = ((*utop)(ixtop, iytop + 1, iztop) - (*utop)(ixtop, iytop, iztop)) / 2.0 - flux; + // dflux *= 2.0; + if (bcf) + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + (*u)(ix + j, iy, iz + k) -= dflux; else - (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop+1,iztop) - 2.0*flux; - + (*utop)(ixtop, iytop, iztop) = (*utop)(ixtop, iytop + 1, iztop) - 2.0 * flux; } // top boundary - if( iy == ny && ix%2==0 && iz%2==0 ) + if (iy == ny && ix % 2 == 0 && iz % 2 == 0) { flux = 0.0; - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - { - ustar1 = interp2( (*utop)(ixtop-1,iytop,iztop-1),(*utop)(ixtop,iytop,iztop-1),(*utop)(ixtop+1,iytop,iztop-1), fac*(j-0.5) ); - ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); - ustar3 = interp2( (*utop)(ixtop-1,iytop,iztop+1),(*utop)(ixtop,iytop,iztop+1),(*utop)(ixtop+1,iytop,iztop+1), fac*(j-0.5) ); - - uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); - + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + { + ustar1 = interp2((*utop)(ixtop - 1, iytop, iztop - 1), (*utop)(ixtop, iytop, iztop - 1), (*utop)(ixtop + 1, iytop, iztop - 1), fac * (j - 0.5)); + ustar2 = interp2((*utop)(ixtop - 1, iytop, iztop), (*utop)(ixtop, iytop, iztop), (*utop)(ixtop + 1, iytop, iztop), fac * (j - 0.5)); + ustar3 = interp2((*utop)(ixtop - 1, iytop, iztop + 1), (*utop)(ixtop, iytop, iztop + 1), (*utop)(ixtop + 1, iytop, iztop + 1), fac * (j - 0.5)); + + uhat = interp2(-1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac * ((double)k - 0.5)); + //(*u)(ix+j,iy,iz+k) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy-1,iz+k), (*u)(ix+j,iy-2,iz+k), 1.0 ); - (*u)(ix+j,iy,iz+k) = interp2right( (*u)(ix+j,iy-2,iz+k), (*u)(ix+j,iy-1,iz+k), uhat ); - - flux += ((*u)(ix+j,iy,iz+k)-(*u)(ix+j,iy-1,iz+k)); + (*u)(ix + j, iy, iz + k) = interp2right((*u)(ix + j, iy - 2, iz + k), (*u)(ix + j, iy - 1, iz + k), uhat); + + flux += ((*u)(ix + j, iy, iz + k) - (*u)(ix + j, iy - 1, iz + k)); } flux /= 4.0; //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + flux; - double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop-1,iztop))/2.0 - flux; - //dflux *= 2.0; - if( bcf ) - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - (*u)(ix+j,iy,iz+k) += dflux; + double dflux = ((*utop)(ixtop, iytop, iztop) - (*utop)(ixtop, iytop - 1, iztop)) / 2.0 - flux; + // dflux *= 2.0; + if (bcf) + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + (*u)(ix + j, iy, iz + k) += dflux; else - (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop-1,iztop) + 2.0*flux; - + (*utop)(ixtop, iytop, iztop) = (*utop)(ixtop, iytop - 1, iztop) + 2.0 * flux; } // front boundary - if( iz == -1 && ix%2==0 && iy%2==0 ) + if (iz == -1 && ix % 2 == 0 && iy % 2 == 0) { flux = 0.0; - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - { - ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) ); - ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); - ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) ); - - uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); - + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + { + ustar1 = interp2((*utop)(ixtop - 1, iytop - 1, iztop), (*utop)(ixtop, iytop - 1, iztop), (*utop)(ixtop + 1, iytop - 1, iztop), fac * (j - 0.5)); + ustar2 = interp2((*utop)(ixtop - 1, iytop, iztop), (*utop)(ixtop, iytop, iztop), (*utop)(ixtop + 1, iytop, iztop), fac * (j - 0.5)); + ustar3 = interp2((*utop)(ixtop - 1, iytop + 1, iztop), (*utop)(ixtop, iytop + 1, iztop), (*utop)(ixtop + 1, iytop + 1, iztop), fac * (j - 0.5)); + + uhat = interp2(-1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac * ((double)k - 0.5)); + //(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( -1.5, 0.0, 1.0, uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2), -1.0 ); - (*u)(ix+j,iy+k,iz) = interp2left( uhat, (*u)(ix+j,iy+k,iz+1), (*u)(ix+j,iy+k,iz+2) ); - - flux += ((*u)(ix+j,iy+k,iz+1)-(*u)(ix+j,iy+k,iz)); + (*u)(ix + j, iy + k, iz) = interp2left(uhat, (*u)(ix + j, iy + k, iz + 1), (*u)(ix + j, iy + k, iz + 2)); + + flux += ((*u)(ix + j, iy + k, iz + 1) - (*u)(ix + j, iy + k, iz)); } flux /= 4.0; //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - flux; - double dflux = ((*utop)(ixtop,iytop,iztop+1)-(*utop)(ixtop,iytop,iztop))/2.0 - flux; - //dflux *= 2.0; - if( bcf ) - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - (*u)(ix+j,iy+k,iz) -= dflux; + double dflux = ((*utop)(ixtop, iytop, iztop + 1) - (*utop)(ixtop, iytop, iztop)) / 2.0 - flux; + // dflux *= 2.0; + if (bcf) + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + (*u)(ix + j, iy + k, iz) -= dflux; else - (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop+1) - 2.0*flux; - + (*utop)(ixtop, iytop, iztop) = (*utop)(ixtop, iytop, iztop + 1) - 2.0 * flux; } // back boundary - if( iz == nz && ix%2==0 && iy%2==0 ) + if (iz == nz && ix % 2 == 0 && iy % 2 == 0) { flux = 0.0; - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - { - ustar1 = interp2( (*utop)(ixtop-1,iytop-1,iztop),(*utop)(ixtop,iytop-1,iztop),(*utop)(ixtop+1,iytop-1,iztop), fac*(j-0.5) ); - ustar2 = interp2( (*utop)(ixtop-1,iytop,iztop),(*utop)(ixtop,iytop,iztop),(*utop)(ixtop+1,iytop,iztop), fac*(j-0.5) ); - ustar3 = interp2( (*utop)(ixtop-1,iytop+1,iztop),(*utop)(ixtop,iytop+1,iztop),(*utop)(ixtop+1,iytop+1,iztop), fac*(j-0.5) ); - - uhat = interp2( -1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac*((double)k-0.5) ); - + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + { + ustar1 = interp2((*utop)(ixtop - 1, iytop - 1, iztop), (*utop)(ixtop, iytop - 1, iztop), (*utop)(ixtop + 1, iytop - 1, iztop), fac * (j - 0.5)); + ustar2 = interp2((*utop)(ixtop - 1, iytop, iztop), (*utop)(ixtop, iytop, iztop), (*utop)(ixtop + 1, iytop, iztop), fac * (j - 0.5)); + ustar3 = interp2((*utop)(ixtop - 1, iytop + 1, iztop), (*utop)(ixtop, iytop + 1, iztop), (*utop)(ixtop + 1, iytop + 1, iztop), fac * (j - 0.5)); + + uhat = interp2(-1.0, 0.0, 1.0, ustar1, ustar2, ustar3, fac * ((double)k - 0.5)); + //(*u)(ix+j,iy+k,iz) = 0.0;(*utop)(ixtop,iytop,iztop);//interp2( 1.5, 0.0, -1.0, uhat, (*u)(ix+j,iy+k,iz-1), (*u)(ix+j,iy+k,iz-2), 1.0 ); - (*u)(ix+j,iy+k,iz) = interp2right( (*u)(ix+j,iy+k,iz-2), (*u)(ix+j,iy+k,iz-1), uhat ); - - flux += ((*u)(ix+j,iy+k,iz)-(*u)(ix+j,iy+k,iz-1)); + (*u)(ix + j, iy + k, iz) = interp2right((*u)(ix + j, iy + k, iz - 2), (*u)(ix + j, iy + k, iz - 1), uhat); + + flux += ((*u)(ix + j, iy + k, iz) - (*u)(ix + j, iy + k, iz - 1)); } flux /= 4.0; //(*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + flux; - double dflux = ((*utop)(ixtop,iytop,iztop)-(*utop)(ixtop,iytop,iztop-1))/2.0 - flux; - //dflux *= 2.0; - if( bcf ) - for( int j=0;j<=1;j++) - for( int k=0;k<=1;k++) - (*u)(ix+j,iy+k,iz) += dflux; + double dflux = ((*utop)(ixtop, iytop, iztop) - (*utop)(ixtop, iytop, iztop - 1)) / 2.0 - flux; + // dflux *= 2.0; + if (bcf) + for (int j = 0; j <= 1; j++) + for (int k = 0; k <= 1; k++) + (*u)(ix + j, iy + k, iz) += dflux; else - (*utop)(ixtop,iytop,iztop) = (*utop)(ixtop,iytop,iztop-1) + 2.0*flux; + (*utop)(ixtop, iytop, iztop) = (*utop)(ixtop, iytop, iztop - 1) + 2.0 * flux; } - } } - } - #if 1 -template< class S, class O, typename T > -void solver::setBC( unsigned ilevel ) +template +void solver::setBC(unsigned ilevel) { //... set only on level before additional refinement starts - //if( ilevel == m_ilevelmin ) - if( ilevel == m_ilevelmin ) + // if( ilevel == m_ilevelmin ) + if (ilevel == m_ilevelmin) { - MeshvarBnd *u = m_pu->get_grid(ilevel); - //int nbnd = u->m_nbnd, + MeshvarBnd *u = m_pu->get_grid(ilevel); + // int nbnd = u->m_nbnd, int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + /*for( int ix=-nbnd; ix=nx||iy<0||iy>=ny||iz<0||iz>=nz ) (*u)(ix,iy,iz) = (*m_pubnd)(ix,iy,iz);*/ - - - for( int iy=0; iy *u = m_pu->get_grid(ilevel); - int nbnd = u->m_nbnd, - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - - for( int ix=-nbnd; ix=nx||iy<0||iy>=ny||iz<0||iz>=nz ) - (*u)(ix,iy,iz) = 0.0; - }*/ + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) + { + (*u)(-1, iy, iz) = 2.0 * (*m_pubnd)(-1, iy, iz) - (*u)(0, iy, iz); + (*u)(nx, iy, iz) = 2.0 * (*m_pubnd)(nx, iy, iz) - (*u)(nx - 1, iy, iz); + ; + } + for (int ix = 0; ix < nx; ++ix) + for (int iz = 0; iz < nz; ++iz) + { + (*u)(ix, -1, iz) = 2.0 * (*m_pubnd)(ix, -1, iz) - (*u)(ix, 0, iz); + (*u)(ix, ny, iz) = 2.0 * (*m_pubnd)(ix, ny, iz) - (*u)(ix, ny - 1, iz); + } + + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) + { + (*u)(ix, iy, -1) = 2.0 * (*m_pubnd)(ix, iy, -1) - (*u)(ix, iy, 0); + (*u)(ix, iy, nz) = 2.0 * (*m_pubnd)(ix, iy, nz) - (*u)(ix, iy, nz - 1); + } + + } /*else if( ilevel < m_ilevelmin ) { + MeshvarBnd *u = m_pu->get_grid(ilevel); + int nbnd = u->m_nbnd, + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + + for( int ix=-nbnd; ix=nx||iy<0||iy>=ny||iz<0||iz>=nz ) + (*u)(ix,iy,iz) = 0.0; + }*/ } #else //... enforce periodic boundary conditions -template< class S, class O, typename T > -void solver::setBC( unsigned ilevel ) +template +void solver::setBC(unsigned ilevel) { - MeshvarBnd *u = m_pu->get_grid(ilevel); - + MeshvarBnd *u = m_pu->get_grid(ilevel); + //... set only on level before additional refinement starts - if( ilevel <= m_ilevelmin ) + if (ilevel <= m_ilevelmin) { - + int nbnd = u->m_nbnd, - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); + //(*u)(0,0,0) = 0.0; - - + double sum = 0.0; - for( int ix=0; ix -void solver::make_periodic( MeshvarBnd *u ) +template +void solver::make_periodic(MeshvarBnd *u) { int - nx = u->size(0), - ny = u->size(1), - nz = u->size(2); - + nx = u->size(0), + ny = u->size(1), + nz = u->size(2); - #pragma omp parallel +#pragma omp parallel { - - if( u->offset(0) == 0 ) - for( int iy=0; iyoffset(0) == 0) + for (int iy = 0; iy < ny; ++iy) + for (int iz = 0; iz < nz; ++iz) { - (*u)(-1,iy,iz) = (*u)(nx-1,iy,iz); - (*u)(nx,iy,iz) = (*u)(0,iy,iz); + (*u)(-1, iy, iz) = (*u)(nx - 1, iy, iz); + (*u)(nx, iy, iz) = (*u)(0, iy, iz); } - - if( u->offset(1) == 0 ) - for( int ix=0; ixoffset(1) == 0) + for (int ix = 0; ix < nx; ++ix) + for (int iz = 0; iz < nz; ++iz) { - (*u)(ix,-1,iz) = (*u)(ix,ny-1,iz); - (*u)(ix,ny,iz) = (*u)(ix,0,iz); + (*u)(ix, -1, iz) = (*u)(ix, ny - 1, iz); + (*u)(ix, ny, iz) = (*u)(ix, 0, iz); } - - if( u->offset(2) == 0 ) - for( int ix=0; ixoffset(2) == 0) + for (int ix = 0; ix < nx; ++ix) + for (int iy = 0; iy < ny; ++iy) { - (*u)(ix,iy,-1) = (*u)(ix,iy,nz-1); - (*u)(ix,iy,nz) = (*u)(ix,iy,0); - } - + (*u)(ix, iy, -1) = (*u)(ix, iy, nz - 1); + (*u)(ix, iy, nz) = (*u)(ix, iy, 0); + } } } END_MULTIGRID_NAMESPACE - -#endif diff --git a/src/system_stat.hh b/src/system_stat.hh new file mode 100644 index 0000000..1a30566 --- /dev/null +++ b/src/system_stat.hh @@ -0,0 +1,194 @@ +// This file is part of MUSIC2 +// A software package to generate ICs for cosmological simulations +// Copyright (C) 2020-23 by Oliver Hahn +// +// MUSIC2 is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// MUSIC2 is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +#pragma once + +#ifdef __APPLE__ +#include +#include +#include +#include +#include +#include +#include +#include +#elif __linux__ +#include +#include +#include +#endif + +#include + +namespace SystemStat +{ + + + + +class Cpu +{ +public: + Cpu() {} + + std::string get_CPUstring() const + { +#ifdef __APPLE__ + char buffer[1024]; + size_t size = sizeof(buffer); + if (sysctlbyname("machdep.cpu.brand_string", &buffer, &size, NULL, 0) < 0) + { + return ""; + } + return std::string(buffer); +#elif __linux__ + std::string str = ""; + FILE *cpuinfo = fopen("/proc/cpuinfo", "rb"); + char *arg = 0; + size_t size = 0; + while (getdelim(&arg, &size, '\n', cpuinfo) != -1) + { + if (strncmp(arg, "model name", 10) == 0) + { + str = std::string(arg + 13); + break; + } + } + free(arg); + fclose(cpuinfo); + //remove newline characters from string + str.erase(std::remove(str.begin(), str.end(), '\n'), str.end()); + return str; +#endif + } +}; + +class Memory +{ +private: + size_t total; + size_t avail; + size_t used; + +public: + Memory() + : total(0), avail(0), used(0) + { + this->get_statistics(); + } + + size_t get_TotalMem() const { return this->total; } + size_t get_AvailMem() const { return this->avail; } + size_t get_UsedMem() const { return this->used; } + void update() { this->get_statistics(); } + +protected: + int get_statistics(void) + { +#ifdef __APPLE__ + int64_t pagesize = int64_t(getpagesize()); + int mib[2] = {CTL_HW, HW_MEMSIZE}; + size_t length = sizeof(size_t); + sysctl(mib, 2, &this->total, &length, nullptr, 0); + + vm_statistics64 vmstat; + natural_t mcount = HOST_VM_INFO64_COUNT; + if (host_statistics64(mach_host_self(), HOST_VM_INFO64, reinterpret_cast(&vmstat), &mcount) == KERN_SUCCESS) + { +#if 1 // count inactive as available + this->avail = (int64_t(vmstat.free_count) + + int64_t(vmstat.inactive_count)) * + pagesize; + this->used = (int64_t(vmstat.active_count) + + int64_t(vmstat.wire_count)) * + pagesize; +#else // count inactive as unavailable + this->avail = int64_t(vmstat.free_count) * pagesize; + this->used = (int64_t(vmstat.active_count) + + int64_t(vmstat.inactive_count) + + int64_t(vmstat.wire_count)) * + pagesize; +#endif + } + +#elif __linux__ + FILE *fd; + char buf[1024]; + if ((fd = fopen("/proc/meminfo", "r"))) + { + while (1) + { + if (fgets(buf, 500, fd) != buf) + break; + if (bcmp(buf, "MemTotal", 8) == 0) + { + this->total = atoll(buf + 10) * 1024; // in Mb + } + if (strncmp(buf, "Committed_AS", 12) == 0) + { + this->used = atoll(buf + 14) * 1024; // in Mb + } + // if(strncmp(buf, "SwapTotal", 9) == 0) + // { + // *SwapTotal = atoll(buf + 11); + // } + // if(strncmp(buf, "SwapFree", 8) == 0) + // { + // *SwapFree = atoll(buf + 10); + // } + } + fclose(fd); + } + this->avail = this->total - this->used; + +#endif + return 0; + } +}; + +#include +#include +#include + +class Kernel +{ +public: + struct info_t + { + std::string kernel; + std::uint32_t major; + std::uint32_t minor; + std::uint32_t patch; + std::uint32_t build_number; + }; + + Kernel() {} + + info_t get_kernel_info() + { + utsname uts; + uname(&uts); + char *marker = uts.release; + const std::uint32_t major = std::strtoul(marker, &marker, 10); + const std::uint32_t minor = std::strtoul(marker + 1, &marker, 10); + const std::uint32_t patch = std::strtoul(marker + 1, &marker, 10); + const std::uint32_t build_number = std::strtoul(marker + 1, nullptr, 10); + std::string kernel = uts.sysname; + return {kernel, major, minor, patch, build_number}; + } +}; + +} /* namespace SystemStat */ diff --git a/src/transfer_function.hh b/src/transfer_function.hh index 35fa92b..cf368b3 100644 --- a/src/transfer_function.hh +++ b/src/transfer_function.hh @@ -294,10 +294,10 @@ protected: double fftnorm = 1.0/N; - fftw_complex *in, *out; + complex_t *in, *out; - in = new fftw_complex[N]; - out = new fftw_complex[N]; + in = new complex_t[N]; + out = new complex_t[N]; //... perform anti-ringing correction from Hamilton (2000) k0r0 = krgood( mu, q, dlnr, k0r0 ); @@ -341,24 +341,10 @@ protected: } ofsk.close(); -#ifdef FFTW3 - #ifdef SINGLE_PRECISION - fftwf_plan p,ip; - p = fftwf_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE); - ip = fftwf_plan_dft_1d(N, out, in, FFTW_BACKWARD, FFTW_ESTIMATE); - fftwf_execute(p); - #else - fftw_plan p,ip; - p = fftw_plan_dft_1d(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE); - ip = fftw_plan_dft_1d(N, out, in, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_execute(p); - #endif -#else - fftw_plan p,ip; - p = fftw_create_plan(N, FFTW_FORWARD, FFTW_ESTIMATE); - ip = fftw_create_plan(N, FFTW_BACKWARD, FFTW_ESTIMATE); - fftw_one(p, in, out); -#endif + fftw_plan_t p,ip; + p = FFTW_API(plan_dft_1d)(N, in, out, FFTW_FORWARD, FFTW_ESTIMATE); + ip = FFTW_API(plan_dft_1d)(N, out, in, FFTW_BACKWARD, FFTW_ESTIMATE); + FFTW_API(execute)(p); //... compute the Hankel transform by convolution with the Bessel function for( unsigned i=0; i m_xtable,m_ytable,m_dytable; double m_xmin, m_xmax, m_dx, m_rdx;