From 41d0dfb917583ad6e1a891cc9c08687af91ca8c0 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Wed, 8 Feb 2023 11:48:51 -0800 Subject: [PATCH] memory footprint significantly reduced. basic tests work fine with only very minor loss of accuracy --- src/densities.cc | 181 +----- src/poisson.cc | 1538 ++++++++++++++++++++++------------------------ src/random.cc | 128 +--- 3 files changed, 754 insertions(+), 1093 deletions(-) diff --git a/src/densities.cc b/src/densities.cc index 261b446..6f68737 100644 --- a/src/densities.cc +++ b/src/densities.cc @@ -16,25 +16,19 @@ //TODO: this should be a larger number by default, just to maintain consistency with old default #define DEF_RAN_CUBE_SIZE 32 -double blend_sharpness = 0.5; - -double Blend_Function(double k, double kmax) +double Meyer_scaling_function( double k, double kmax ) { -#if 0 - const static double pihalf = 0.5*M_PI; - if( fabs(k)>kmax ) return 0.0; - return cos(k/kmax * pihalf ); -#else - float kabs = fabs(k); - double const eps = blend_sharpness; - float kp = (1.0f - 2.0f * eps) * kmax; + constexpr double twopithirds{2.0*M_PI/3.0}; + constexpr double fourpithirds{4.0*M_PI/3.0}; + auto nu = []( double x ){ return x<0.0?0.0:(x<1.0?x:1.0); }; - if (kabs >= kmax) - return 0.; - if (kabs > kp) - return 1.0f / (expf((kp - kmax) / (k - kp) + (kp - kmax) / (k - kmax)) + 1.0f); - return 1.0f; -#endif + k = k/kmax * fourpithirds; + + if( k < twopithirds ) return 1.0; + else if( k< fourpithirds ){ + return std::cos( 0.5*M_PI * nu(3*k/(2*M_PI)-1.0) ); + } + return 0.0; } template @@ -117,7 +111,7 @@ void fft_coarsen(m1 &v, m2 &V) std::complex val_phas(cos(phase), sin(phase)); - val_fine *= val_phas * fftnorm / 8.0; //sqrt(8.0); + val_fine *= val_phas * fftnorm / 8.0; RE(ccoarse[qc]) = val_fine.real(); IM(ccoarse[qc]) = val_fine.imag(); @@ -164,38 +158,27 @@ void fft_coarsen(m1 &v, m2 &V) #endif } -//#define NO_COARSE_OVERLAP - template void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false) { int oxf = v.offset(0), oyf = v.offset(1), ozf = v.offset(2); size_t nxf = v.size(0), nyf = v.size(1), nzf = v.size(2), nzfp = nzf + 2; - size_t nxF = V.size(0), nyF = V.size(1), nzF = V.size(2); - size_t mxf = v.margin(0), myf = v.margin(1), mzf = v.margin(2); if (!from_basegrid) { -#ifdef NO_COARSE_OVERLAP - oxf += nxF / 4; - oyf += nyF / 4; - ozf += nzF / 4; -#else - oxf += nxF / 4 - nxf / 8; //mxf / 2; - oyf += nyF / 4 - nyf / 8; //myf / 2; - ozf += nzF / 4 - nzf / 8; //mzf / 2; + oxf += mxf - mxf/2; + oyf += myf - myf/2; + ozf += mzf - mzf/2; } else { oxf -= mxf/2; //nxf / 8; oyf -= myf/2; //nyf / 8; ozf -= mzf/2; //nzf / 8; -#endif } - // LOGUSER("FFT interpolate: offset=%d,%d,%d size=%d,%d,%d", oxf, oyf, ozf, nxf, nyf, nzf); - LOGINFO("FFT interpolate: offset=%d,%d,%d size=%d,%d,%d", oxf, oyf, ozf, nxf, nyf, nzf); + LOGUSER("FFT interpolate: offset=%d,%d,%d size=%d,%d,%d", oxf, oyf, ozf, nxf, nyf, nzf); // cut out piece of coarse grid that overlaps the fine: assert(nxf % 2 == 0 && nyf % 2 == 0 && nzf % 2 == 0); @@ -211,19 +194,6 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false) // copy coarse data to rcoarse[.] memset(rcoarse, 0, sizeof(fftw_real) * nxc * nyc * nzcp); -#ifdef NO_COARSE_OVERLAP -#pragma omp parallel for - for (int i = 0; i < (int)nxc / 2; ++i) - for (int j = 0; j < (int)nyc / 2; ++j) - for (int k = 0; k < (int)nzc / 2; ++k) - { - int ii(i + nxc / 4); - int jj(j + nyc / 4); - int kk(k + nzc / 4); - size_t q = ((size_t)ii * nyc + (size_t)jj) * nzcp + (size_t)kk; - rcoarse[q] = V(oxf + i, oyf + j, ozf + k); - } -#else #pragma omp parallel for for (int i = 0; i < (int)nxc; ++i) for (int j = 0; j < (int)nyc; ++j) @@ -235,7 +205,6 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false) size_t q = ((size_t)ii * nyc + (size_t)jj) * nzcp + (size_t)kk; rcoarse[q] = V(oxf + i, oyf + j, ozf + k); } -#endif #pragma omp parallel for for (int i = 0; i < (int)nxf; ++i) @@ -284,7 +253,6 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false) double phasefac = -0.5; // this enables filtered splicing of coarse and fine modes -#if 1 for (int i = 0; i < (int)nxc; i++) for (int j = 0; j < (int)nyc; j++) for (int k = 0; k < (int)nzc / 2 + 1; k++) @@ -313,116 +281,15 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false) std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); val *= sqrt8 * val_phas; - double blend_coarse = Blend_Function(sqrt(kx * kx + ky * ky + kz * kz), nxc / 2); - double blend_fine = 1.0 - blend_coarse; - - RE(cfine[qf]) = blend_fine * RE(cfine[qf]) + blend_coarse * val.real(); - IM(cfine[qf]) = blend_fine * IM(cfine[qf]) + blend_coarse * val.imag(); + if (i != (int)nxc / 2 && j != (int)nyc / 2 && k != (int)nzc / 2){ + double blend_coarse = Meyer_scaling_function(kx, nxc / 2) * Meyer_scaling_function(ky, nyc / 2) * Meyer_scaling_function(kz, nzc / 2); + double blend_fine = 1.0 - blend_coarse; + + RE(cfine[qf]) = blend_fine * RE(cfine[qf]) + blend_coarse * val.real(); + IM(cfine[qf]) = blend_fine * IM(cfine[qf]) + blend_coarse * val.imag(); + } } -#else - - // 0 0 -#pragma omp parallel for - for (int i = 0; i < (int)nxc / 2 + 1; i++) - for (int j = 0; j < (int)nyc / 2 + 1; j++) - for (int k = 0; k < (int)nzc / 2 + 1; k++) - { - int ii(i), jj(j), kk(k); - size_t qc, qf; - qc = ((size_t)i * (size_t)nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * (size_t)nyf + (size_t)jj) * (nzf / 2 + 1) + (size_t)kk; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - double phase = phasefac * (kx / nxc + ky / nyc + kz / nzc) * M_PI; - std::complex val_phas(cos(phase), sin(phase)); - - std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); - val *= sqrt8 * val_phas; - - RE(cfine[qf]) = val.real(); - IM(cfine[qf]) = val.imag(); - } - -// 1 0 -#pragma omp parallel for - for (int i = nxc / 2; i < (int)nxc; i++) - for (int j = 0; j < (int)nyc / 2 + 1; j++) - for (int k = 0; k < (int)nzc / 2 + 1; k++) - { - int ii(i + nxf / 2), jj(j), kk(k); - size_t qc, qf; - qc = ((size_t)i * (size_t)nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * (size_t)nyf + (size_t)jj) * (nzf / 2 + 1) + (size_t)kk; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - double phase = phasefac * (kx / nxc + ky / nyc + kz / nzc) * M_PI; - std::complex val_phas(cos(phase), sin(phase)); - - std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); - val *= sqrt8 * val_phas; - - RE(cfine[qf]) = val.real(); - IM(cfine[qf]) = val.imag(); - } - -// 0 1 -#pragma omp parallel for - for (int i = 0; i < (int)nxc / 2 + 1; i++) - for (int j = nyc / 2; j < (int)nyc; j++) - for (int k = 0; k < (int)nzc / 2 + 1; k++) - { - int ii(i), jj(j + nyf / 2), kk(k); - size_t qc, qf; - qc = ((size_t)i * (size_t)nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * (size_t)nyf + (size_t)jj) * (nzf / 2 + 1) + (size_t)kk; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - double phase = phasefac * (kx / nxc + ky / nyc + kz / nzc) * M_PI; - std::complex val_phas(cos(phase), sin(phase)); - - std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); - val *= sqrt8 * val_phas; - - RE(cfine[qf]) = val.real(); - IM(cfine[qf]) = val.imag(); - } - -// 1 1 -#pragma omp parallel for - for (int i = nxc / 2; i < (int)nxc; i++) - for (int j = nyc / 2; j < (int)nyc; j++) - for (int k = 0; k < (int)nzc / 2 + 1; k++) - { - int ii(i + nxf / 2), jj(j + nyf / 2), kk(k); - size_t qc, qf; - qc = ((size_t)i * (size_t)nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * (size_t)nyf + (size_t)jj) * (nzf / 2 + 1) + (size_t)kk; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - double phase = phasefac * (kx / nxc + ky / nyc + kz / nzc) * M_PI; - std::complex val_phas(cos(phase), sin(phase)); - - std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); - val *= sqrt8 * val_phas; - - RE(cfine[qf]) = val.real(); - IM(cfine[qf]) = val.imag(); - } -#endif - delete[] rcoarse; /*************************************************/ @@ -577,8 +444,6 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t LOGWARN("You have chosen mode fixing for a zoom. This is not well tested,\n please proceed at your own risk..."); } - blend_sharpness = cf.getValueSafe("setup", "kspace_filter", blend_sharpness); - unsigned nbase = 1 << levelmin; convolution::kernel_creator *the_kernel_creator; diff --git a/src/poisson.cc b/src/poisson.cc index 5fbd787..c405f72 100644 --- a/src/poisson.cc +++ b/src/poisson.cc @@ -1,11 +1,11 @@ /* - + poisson.cc - This file is part of MUSIC - - a code to generate multi-scale initial conditions - for cosmological simulations - + a code to generate multi-scale initial conditions + for cosmological simulations + Copyright (C) 2010 Oliver Hahn - + */ /****** ABSTRACT FACTORY PATTERN IMPLEMENTATION *******/ @@ -13,777 +13,717 @@ #include "poisson.hh" #include "Numerics.hh" -std::map< std::string, poisson_plugin_creator *>& +std::map & get_poisson_plugin_map() { - static std::map< std::string, poisson_plugin_creator* > poisson_plugin_map; + static std::map poisson_plugin_map; return poisson_plugin_map; } void print_poisson_plugins() { - std::map< std::string, poisson_plugin_creator *>& m = get_poisson_plugin_map(); - - std::map< std::string, poisson_plugin_creator *>::iterator it; + std::map &m = get_poisson_plugin_map(); + + std::map::iterator it; it = m.begin(); std::cout << " - Available poisson solver plug-ins:\n"; - while( it!=m.end() ) + while (it != m.end()) { - if( (*it).second ) + if ((*it).second) std::cout << "\t\'" << (*it).first << "\'\n"; - - LOGINFO("Poisson plug-in :: %s",std::string((*it).first).c_str()); - + + LOGINFO("Poisson plug-in :: %s", std::string((*it).first).c_str()); + ++it; } } - /****** CALL IMPLEMENTATIONS OF POISSON SOLVER CLASSES ******/ #include "mg_solver.hh" #include "fd_schemes.hh" #ifdef SINGLE_PRECISION -typedef multigrid::solver< stencil_7P, interp_O3_fluxcorr, mg_straight, float > poisson_solver_O2; -typedef multigrid::solver< stencil_13P, interp_O5_fluxcorr, mg_straight, float > poisson_solver_O4; -typedef multigrid::solver< stencil_19P, interp_O7_fluxcorr, mg_straight, float > poisson_solver_O6; +typedef multigrid::solver, interp_O3_fluxcorr, mg_straight, float> poisson_solver_O2; +typedef multigrid::solver, interp_O5_fluxcorr, mg_straight, float> poisson_solver_O4; +typedef multigrid::solver, interp_O7_fluxcorr, mg_straight, float> poisson_solver_O6; #else -typedef multigrid::solver< stencil_7P, interp_O3_fluxcorr, mg_straight, double > poisson_solver_O2; -typedef multigrid::solver< stencil_13P, interp_O5_fluxcorr, mg_straight, double > poisson_solver_O4; -typedef multigrid::solver< stencil_19P, interp_O7_fluxcorr, mg_straight, double > poisson_solver_O6; +typedef multigrid::solver, interp_O3_fluxcorr, mg_straight, double> poisson_solver_O2; +typedef multigrid::solver, interp_O5_fluxcorr, mg_straight, double> poisson_solver_O4; +typedef multigrid::solver, interp_O7_fluxcorr, mg_straight, double> poisson_solver_O6; #endif - /**************************************************************************************/ /**************************************************************************************/ - -double multigrid_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u ) +double multigrid_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) { LOGUSER("Initializing multi-grid Poisson solver..."); - - unsigned verbosity = cf_.getValueSafe("setup","verbosity",2); - - if( verbosity > 0 ) - { + + unsigned verbosity = cf_.getValueSafe("setup", "verbosity", 2); + + if (verbosity > 0) + { std::cout << "-------------------------------------------------------------\n"; std::cout << " - Invoking multi-grid Poisson solver..." << std::endl; } - + double acc = 1e-5, err; std::string ps_smoother_name; unsigned ps_presmooth, ps_postsmooth, order; - - acc = cf_.getValueSafe("poisson","accuracy",acc); - ps_presmooth = cf_.getValueSafe("poisson","pre_smooth",3); - ps_postsmooth = cf_.getValueSafe("poisson","post_smooth",3); - ps_smoother_name = cf_.getValueSafe("poisson","smoother","gs"); - order = cf_.getValueSafe( "poisson", "laplace_order", 4 ); - + + acc = cf_.getValueSafe("poisson", "accuracy", acc); + ps_presmooth = cf_.getValueSafe("poisson", "pre_smooth", 3); + ps_postsmooth = cf_.getValueSafe("poisson", "post_smooth", 3); + ps_smoother_name = cf_.getValueSafe("poisson", "smoother", "gs"); + order = cf_.getValueSafe("poisson", "laplace_order", 4); + multigrid::opt::smtype ps_smtype = multigrid::opt::sm_gauss_seidel; - - if ( ps_smoother_name == std::string("gs") ) - { + + if (ps_smoother_name == std::string("gs")) + { ps_smtype = multigrid::opt::sm_gauss_seidel; LOGUSER("Selected Gauss-Seidel multigrid smoother"); } - else if ( ps_smoother_name == std::string("jacobi") ) - { + else if (ps_smoother_name == std::string("jacobi")) + { ps_smtype = multigrid::opt::sm_jacobi; - LOGUSER("Selected Jacobi multigrid smoother"); + LOGUSER("Selected Jacobi multigrid smoother"); } - else if ( ps_smoother_name == std::string("sor") ) - { + else if (ps_smoother_name == std::string("sor")) + { ps_smtype = multigrid::opt::sm_sor; LOGUSER("Selected SOR multigrid smoother"); } else - { - LOGWARN("Unknown multigrid smoother \'%s\' specified. Reverting to Gauss-Seidel.",ps_smoother_name.c_str()); + { + LOGWARN("Unknown multigrid smoother \'%s\' specified. Reverting to Gauss-Seidel.", ps_smoother_name.c_str()); std::cerr << " - Warning: unknown smoother \'" << ps_smoother_name << "\' for multigrid solver!\n" - << " reverting to \'gs\' (Gauss-Seidel)" << std::endl; + << " reverting to \'gs\' (Gauss-Seidel)" << std::endl; } - - - + double tstart, tend; - + #ifndef SINGLETHREAD_FFTW tstart = omp_get_wtime(); #else tstart = (double)clock() / CLOCKS_PER_SEC; #endif - - + //----- run Poisson solver -----// - if( order == 2 ) + if (order == 2) { LOGUSER("Running multigrid solver with 2nd order Laplacian..."); - poisson_solver_O2 ps( f, ps_smtype, ps_presmooth, ps_postsmooth ); - err = ps.solve( u, acc, true ); + poisson_solver_O2 ps(f, ps_smtype, ps_presmooth, ps_postsmooth); + err = ps.solve(u, acc, true); } - else if( order == 4 ) + else if (order == 4) { LOGUSER("Running multigrid solver with 4th order Laplacian..."); - poisson_solver_O4 ps( f, ps_smtype, ps_presmooth, ps_postsmooth ); - err = ps.solve( u, acc, true ); + poisson_solver_O4 ps(f, ps_smtype, ps_presmooth, ps_postsmooth); + err = ps.solve(u, acc, true); } - else if( order == 6 ) + else if (order == 6) { LOGUSER("Running multigrid solver with 6th order Laplacian.."); - poisson_solver_O6 ps( f, ps_smtype, ps_presmooth, ps_postsmooth ); - err = ps.solve( u, acc, true ); - } + poisson_solver_O6 ps(f, ps_smtype, ps_presmooth, ps_postsmooth); + err = ps.solve(u, acc, true); + } else - { + { LOGERR("Invalid order specified for Laplace operator"); throw std::runtime_error("Invalid order specified for Laplace operator"); } - + //------------------------------// - + #ifndef SINGLETHREAD_FFTW tend = omp_get_wtime(); - if( verbosity > 1 ) - std::cout << " - Poisson solver took " << tend-tstart << "s with " << omp_get_max_threads() << " threads." << std::endl; + if (verbosity > 1) + std::cout << " - Poisson solver took " << tend - tstart << "s with " << omp_get_max_threads() << " threads." << std::endl; #else tend = (double)clock() / CLOCKS_PER_SEC; - if( verbosity > 1 ) - std::cout << " - Poisson solver took " << tend-tstart << "s." << std::endl; - + if (verbosity > 1) + std::cout << " - Poisson solver took " << tend - tstart << "s." << std::endl; + #endif - - + return err; } -double multigrid_poisson_plugin::gradient( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +double multigrid_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy &Du) { Du = u; - - unsigned order = cf_.getValueSafe( "poisson", "grad_order", 4 ); - - switch( order ) + + unsigned order = cf_.getValueSafe("poisson", "grad_order", 4); + + switch (order) { - case 2: - implementation().gradient_O2( dir, u, Du ); - break; - case 4: - implementation().gradient_O4( dir, u, Du ); - break; - case 6: - implementation().gradient_O6( dir, u, Du ); - break; - default: - LOGERR("Invalid order %d specified for gradient operator", order); - throw std::runtime_error("Invalid order specified for gradient operator!"); + case 2: + implementation().gradient_O2(dir, u, Du); + break; + case 4: + implementation().gradient_O4(dir, u, Du); + break; + case 6: + implementation().gradient_O6(dir, u, Du); + break; + default: + LOGERR("Invalid order %d specified for gradient operator", order); + throw std::runtime_error("Invalid order specified for gradient operator!"); } - + return 0.0; } -double multigrid_poisson_plugin::gradient_add( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +double multigrid_poisson_plugin::gradient_add(int dir, grid_hierarchy &u, grid_hierarchy &Du) { - //Du = u; - - unsigned order = cf_.getValueSafe( "poisson", "grad_order", 4 ); - - switch( order ) + // Du = u; + + unsigned order = cf_.getValueSafe("poisson", "grad_order", 4); + + switch (order) { - case 2: - implementation().gradient_add_O2( dir, u, Du ); - break; - case 4: - implementation().gradient_add_O4( dir, u, Du ); - break; - case 6: - implementation().gradient_add_O6( dir, u, Du ); - break; - default: - LOGERR("Invalid order %d specified for gradient operator!",order); - throw std::runtime_error("Invalid order specified for gradient operator!"); + case 2: + implementation().gradient_add_O2(dir, u, Du); + break; + case 4: + implementation().gradient_add_O4(dir, u, Du); + break; + case 6: + implementation().gradient_add_O6(dir, u, Du); + break; + default: + LOGERR("Invalid order %d specified for gradient operator!", order); + throw std::runtime_error("Invalid order specified for gradient operator!"); } - + return 0.0; } -void multigrid_poisson_plugin::implementation::gradient_O2( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +void multigrid_poisson_plugin::implementation::gradient_O2(int dir, grid_hierarchy &u, grid_hierarchy &Du) { LOGUSER("Computing a 2nd order finite difference gradient..."); - - for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + + for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0,ilevel); + double h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); - - if( dir == 0 ) + + if (dir == 0) #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) = 0.5*((*u.get_grid(ilevel))(ix+1,iy,iz)-(*u.get_grid(ilevel))(ix-1,iy,iz))*h; - - else if( dir == 1 ) + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) = 0.5 * ((*u.get_grid(ilevel))(ix + 1, iy, iz) - (*u.get_grid(ilevel))(ix - 1, iy, iz)) * h; + + else if (dir == 1) #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) = 0.5*((*u.get_grid(ilevel))(ix,iy+1,iz)-(*u.get_grid(ilevel))(ix,iy-1,iz))*h; - - else if( dir == 2 ) + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) = 0.5 * ((*u.get_grid(ilevel))(ix, iy + 1, iz) - (*u.get_grid(ilevel))(ix, iy - 1, iz)) * h; + + else if (dir == 2) #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) = 0.5*((*u.get_grid(ilevel))(ix,iy,iz+1)-(*u.get_grid(ilevel))(ix,iy,iz-1))*h; + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) = 0.5 * ((*u.get_grid(ilevel))(ix, iy, iz + 1) - (*u.get_grid(ilevel))(ix, iy, iz - 1)) * h; } - + LOGUSER("Done computing a 2nd order finite difference gradient."); } -void multigrid_poisson_plugin::implementation::gradient_add_O2( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +void multigrid_poisson_plugin::implementation::gradient_add_O2(int dir, grid_hierarchy &u, grid_hierarchy &Du) { LOGUSER("Computing a 2nd order finite difference gradient..."); - - for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + + for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0,ilevel); + double h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); - - if( dir == 0 ) - #pragma omp parallel for - for( int ix = 0; ix < (int)(*Du.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*Du.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*Du.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) += 0.5*((*u.get_grid(ilevel))(ix+1,iy,iz)-(*u.get_grid(ilevel))(ix-1,iy,iz))*h; - - else if( dir == 1 ) - #pragma omp parallel for - for( int ix = 0; ix < (int)(*Du.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*Du.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*Du.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) += 0.5*((*u.get_grid(ilevel))(ix,iy+1,iz)-(*u.get_grid(ilevel))(ix,iy-1,iz))*h; - - else if( dir == 2 ) - #pragma omp parallel for - for( int ix = 0; ix < (int)(*Du.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*Du.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*Du.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) += 0.5*((*u.get_grid(ilevel))(ix,iy,iz+1)-(*u.get_grid(ilevel))(ix,iy,iz-1))*h; + + if (dir == 0) +#pragma omp parallel for + for (int ix = 0; ix < (int)(*Du.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*Du.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*Du.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) += 0.5 * ((*u.get_grid(ilevel))(ix + 1, iy, iz) - (*u.get_grid(ilevel))(ix - 1, iy, iz)) * h; + + else if (dir == 1) +#pragma omp parallel for + for (int ix = 0; ix < (int)(*Du.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*Du.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*Du.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) += 0.5 * ((*u.get_grid(ilevel))(ix, iy + 1, iz) - (*u.get_grid(ilevel))(ix, iy - 1, iz)) * h; + + else if (dir == 2) +#pragma omp parallel for + for (int ix = 0; ix < (int)(*Du.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*Du.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*Du.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) += 0.5 * ((*u.get_grid(ilevel))(ix, iy, iz + 1) - (*u.get_grid(ilevel))(ix, iy, iz - 1)) * h; } - + LOGUSER("Done computing a 4th order finite difference gradient."); } -void multigrid_poisson_plugin::implementation::gradient_O4( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +void multigrid_poisson_plugin::implementation::gradient_O4(int dir, grid_hierarchy &u, grid_hierarchy &Du) { LOGUSER("Computing a 4th order finite difference gradient..."); - - for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + + for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0,ilevel); + double h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); - + h /= 12.0; - - if( dir == 0 ) + + if (dir == 0) #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) = ((*u.get_grid(ilevel))(ix-2,iy,iz) - -8.0*(*u.get_grid(ilevel))(ix-1,iy,iz) - +8.0*(*u.get_grid(ilevel))(ix+1,iy,iz) - -(*u.get_grid(ilevel))(ix+2,iy,iz))*h; - - else if( dir == 1 ) + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) = ((*u.get_grid(ilevel))(ix - 2, iy, iz) - 8.0 * (*u.get_grid(ilevel))(ix - 1, iy, iz) + 8.0 * (*u.get_grid(ilevel))(ix + 1, iy, iz) - (*u.get_grid(ilevel))(ix + 2, iy, iz)) * h; + + else if (dir == 1) #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) = ((*u.get_grid(ilevel))(ix,iy-2,iz) - -8.0*(*u.get_grid(ilevel))(ix,iy-1,iz) - +8.0*(*u.get_grid(ilevel))(ix,iy+1,iz) - -(*u.get_grid(ilevel))(ix,iy+2,iz))*h; - - else if( dir == 2 ) + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) = ((*u.get_grid(ilevel))(ix, iy - 2, iz) - 8.0 * (*u.get_grid(ilevel))(ix, iy - 1, iz) + 8.0 * (*u.get_grid(ilevel))(ix, iy + 1, iz) - (*u.get_grid(ilevel))(ix, iy + 2, iz)) * h; + + else if (dir == 2) #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) = ((*u.get_grid(ilevel))(ix,iy,iz-2) - -8.0*(*u.get_grid(ilevel))(ix,iy,iz-1) - +8.0*(*u.get_grid(ilevel))(ix,iy,iz+1) - -(*u.get_grid(ilevel))(ix,iy,iz+2))*h; - } - + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) = ((*u.get_grid(ilevel))(ix, iy, iz - 2) - 8.0 * (*u.get_grid(ilevel))(ix, iy, iz - 1) + 8.0 * (*u.get_grid(ilevel))(ix, iy, iz + 1) - (*u.get_grid(ilevel))(ix, iy, iz + 2)) * h; + } + LOGUSER("Done computing a 4th order finite difference gradient."); } -void multigrid_poisson_plugin::implementation::gradient_add_O4( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +void multigrid_poisson_plugin::implementation::gradient_add_O4(int dir, grid_hierarchy &u, grid_hierarchy &Du) { LOGUSER("Computing a 4th order finite difference gradient..."); - - for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + + for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0,ilevel); + double h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); - + h /= 12.0; - - if( dir == 0 ) + + if (dir == 0) #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) += ((*u.get_grid(ilevel))(ix-2,iy,iz) - -8.0*(*u.get_grid(ilevel))(ix-1,iy,iz) - +8.0*(*u.get_grid(ilevel))(ix+1,iy,iz) - -(*u.get_grid(ilevel))(ix+2,iy,iz))*h; - - else if( dir == 1 ) + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) += ((*u.get_grid(ilevel))(ix - 2, iy, iz) - 8.0 * (*u.get_grid(ilevel))(ix - 1, iy, iz) + 8.0 * (*u.get_grid(ilevel))(ix + 1, iy, iz) - (*u.get_grid(ilevel))(ix + 2, iy, iz)) * h; + + else if (dir == 1) #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) += ((*u.get_grid(ilevel))(ix,iy-2,iz) - -8.0*(*u.get_grid(ilevel))(ix,iy-1,iz) - +8.0*(*u.get_grid(ilevel))(ix,iy+1,iz) - -(*u.get_grid(ilevel))(ix,iy+2,iz))*h; - - else if( dir == 2 ) + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) += ((*u.get_grid(ilevel))(ix, iy - 2, iz) - 8.0 * (*u.get_grid(ilevel))(ix, iy - 1, iz) + 8.0 * (*u.get_grid(ilevel))(ix, iy + 1, iz) - (*u.get_grid(ilevel))(ix, iy + 2, iz)) * h; + + else if (dir == 2) #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) += ((*u.get_grid(ilevel))(ix,iy,iz-2) - -8.0*(*u.get_grid(ilevel))(ix,iy,iz-1) - +8.0*(*u.get_grid(ilevel))(ix,iy,iz+1) - -(*u.get_grid(ilevel))(ix,iy,iz+2))*h; - } - - + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) += ((*u.get_grid(ilevel))(ix, iy, iz - 2) - 8.0 * (*u.get_grid(ilevel))(ix, iy, iz - 1) + 8.0 * (*u.get_grid(ilevel))(ix, iy, iz + 1) - (*u.get_grid(ilevel))(ix, iy, iz + 2)) * h; + } + LOGUSER("Done computing a 4th order finite difference gradient."); } - -void multigrid_poisson_plugin::implementation::gradient_O6( int dir, grid_hierarchy& u, grid_hierarchy& Du ) +void multigrid_poisson_plugin::implementation::gradient_O6(int dir, grid_hierarchy &u, grid_hierarchy &Du) { LOGUSER("Computing a 6th order finite difference gradient..."); - - for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) - { - double h = pow(2.0,ilevel); - meshvar_bnd *pvar = Du.get_grid(ilevel); - - h /= 60.; - if( dir == 0 ) - #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) = - (-(*u.get_grid(ilevel))(ix-3,iy,iz) - +9.0*(*u.get_grid(ilevel))(ix-2,iy,iz) - -45.0*(*u.get_grid(ilevel))(ix-1,iy,iz) - +45.0*(*u.get_grid(ilevel))(ix+1,iy,iz) - -9.0*(*u.get_grid(ilevel))(ix+2,iy,iz) - +(*u.get_grid(ilevel))(ix+3,iy,iz))*h; - - else if( dir == 1 ) - #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) = - (-(*u.get_grid(ilevel))(ix,iy-3,iz) - +9.0*(*u.get_grid(ilevel))(ix,iy-2,iz) - -45.0*(*u.get_grid(ilevel))(ix,iy-1,iz) - +45.0*(*u.get_grid(ilevel))(ix,iy+1,iz) - -9.0*(*u.get_grid(ilevel))(ix,iy+2,iz) - +(*u.get_grid(ilevel))(ix,iy+3,iz))*h; - - else if( dir == 2 ) - #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) = - (-(*u.get_grid(ilevel))(ix,iy,iz-3) - +9.0*(*u.get_grid(ilevel))(ix,iy,iz-2) - -45.0*(*u.get_grid(ilevel))(ix,iy,iz-1) - +45.0*(*u.get_grid(ilevel))(ix,iy,iz+1) - -9.0*(*u.get_grid(ilevel))(ix,iy,iz+2) - +(*u.get_grid(ilevel))(ix,iy,iz+3))*h; - } - - LOGUSER("Done computing a 6th order finite difference gradient."); -} - -void multigrid_poisson_plugin::implementation::gradient_add_O6( int dir, grid_hierarchy& u, grid_hierarchy& Du ) -{ - LOGUSER("Computing a 6th order finite difference gradient..."); - - for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) + for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) { - double h = pow(2.0,ilevel); + double h = pow(2.0, ilevel); meshvar_bnd *pvar = Du.get_grid(ilevel); - + h /= 60.; - if( dir == 0 ) - #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) += - (-(*u.get_grid(ilevel))(ix-3,iy,iz) - +9.0*(*u.get_grid(ilevel))(ix-2,iy,iz) - -45.0*(*u.get_grid(ilevel))(ix-1,iy,iz) - +45.0*(*u.get_grid(ilevel))(ix+1,iy,iz) - -9.0*(*u.get_grid(ilevel))(ix+2,iy,iz) - +(*u.get_grid(ilevel))(ix+3,iy,iz))*h; - - else if( dir == 1 ) - #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) += - (-(*u.get_grid(ilevel))(ix,iy-3,iz) - +9.0*(*u.get_grid(ilevel))(ix,iy-2,iz) - -45.0*(*u.get_grid(ilevel))(ix,iy-1,iz) - +45.0*(*u.get_grid(ilevel))(ix,iy+1,iz) - -9.0*(*u.get_grid(ilevel))(ix,iy+2,iz) - +(*u.get_grid(ilevel))(ix,iy+3,iz))*h; - - else if( dir == 2 ) - #pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - (*pvar)(ix,iy,iz) += - (-(*u.get_grid(ilevel))(ix,iy,iz-3) - +9.0*(*u.get_grid(ilevel))(ix,iy,iz-2) - -45.0*(*u.get_grid(ilevel))(ix,iy,iz-1) - +45.0*(*u.get_grid(ilevel))(ix,iy,iz+1) - -9.0*(*u.get_grid(ilevel))(ix,iy,iz+2) - +(*u.get_grid(ilevel))(ix,iy,iz+3))*h; + if (dir == 0) +#pragma omp parallel for + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) = + (-(*u.get_grid(ilevel))(ix - 3, iy, iz) + 9.0 * (*u.get_grid(ilevel))(ix - 2, iy, iz) - 45.0 * (*u.get_grid(ilevel))(ix - 1, iy, iz) + 45.0 * (*u.get_grid(ilevel))(ix + 1, iy, iz) - 9.0 * (*u.get_grid(ilevel))(ix + 2, iy, iz) + (*u.get_grid(ilevel))(ix + 3, iy, iz)) * h; + + else if (dir == 1) +#pragma omp parallel for + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) = + (-(*u.get_grid(ilevel))(ix, iy - 3, iz) + 9.0 * (*u.get_grid(ilevel))(ix, iy - 2, iz) - 45.0 * (*u.get_grid(ilevel))(ix, iy - 1, iz) + 45.0 * (*u.get_grid(ilevel))(ix, iy + 1, iz) - 9.0 * (*u.get_grid(ilevel))(ix, iy + 2, iz) + (*u.get_grid(ilevel))(ix, iy + 3, iz)) * h; + + else if (dir == 2) +#pragma omp parallel for + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) = + (-(*u.get_grid(ilevel))(ix, iy, iz - 3) + 9.0 * (*u.get_grid(ilevel))(ix, iy, iz - 2) - 45.0 * (*u.get_grid(ilevel))(ix, iy, iz - 1) + 45.0 * (*u.get_grid(ilevel))(ix, iy, iz + 1) - 9.0 * (*u.get_grid(ilevel))(ix, iy, iz + 2) + (*u.get_grid(ilevel))(ix, iy, iz + 3)) * h; } - + LOGUSER("Done computing a 6th order finite difference gradient."); } +void multigrid_poisson_plugin::implementation::gradient_add_O6(int dir, grid_hierarchy &u, grid_hierarchy &Du) +{ + LOGUSER("Computing a 6th order finite difference gradient..."); + + for (unsigned ilevel = u.levelmin(); ilevel <= u.levelmax(); ++ilevel) + { + double h = pow(2.0, ilevel); + meshvar_bnd *pvar = Du.get_grid(ilevel); + + h /= 60.; + if (dir == 0) +#pragma omp parallel for + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) += + (-(*u.get_grid(ilevel))(ix - 3, iy, iz) + 9.0 * (*u.get_grid(ilevel))(ix - 2, iy, iz) - 45.0 * (*u.get_grid(ilevel))(ix - 1, iy, iz) + 45.0 * (*u.get_grid(ilevel))(ix + 1, iy, iz) - 9.0 * (*u.get_grid(ilevel))(ix + 2, iy, iz) + (*u.get_grid(ilevel))(ix + 3, iy, iz)) * h; + + else if (dir == 1) +#pragma omp parallel for + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) += + (-(*u.get_grid(ilevel))(ix, iy - 3, iz) + 9.0 * (*u.get_grid(ilevel))(ix, iy - 2, iz) - 45.0 * (*u.get_grid(ilevel))(ix, iy - 1, iz) + 45.0 * (*u.get_grid(ilevel))(ix, iy + 1, iz) - 9.0 * (*u.get_grid(ilevel))(ix, iy + 2, iz) + (*u.get_grid(ilevel))(ix, iy + 3, iz)) * h; + + else if (dir == 2) +#pragma omp parallel for + for (int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix) + for (int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy) + for (int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz) + (*pvar)(ix, iy, iz) += + (-(*u.get_grid(ilevel))(ix, iy, iz - 3) + 9.0 * (*u.get_grid(ilevel))(ix, iy, iz - 2) - 45.0 * (*u.get_grid(ilevel))(ix, iy, iz - 1) + 45.0 * (*u.get_grid(ilevel))(ix, iy, iz + 1) - 9.0 * (*u.get_grid(ilevel))(ix, iy, iz + 2) + (*u.get_grid(ilevel))(ix, iy, iz + 3)) * h; + } + + LOGUSER("Done computing a 6th order finite difference gradient."); +} /**************************************************************************************/ /**************************************************************************************/ #include "general.hh" -double fft_poisson_plugin::solve( grid_hierarchy& f, grid_hierarchy& u ) +double fft_poisson_plugin::solve(grid_hierarchy &f, grid_hierarchy &u) { LOGUSER("Entering k-space Poisson solver..."); - - unsigned verbosity = cf_.getValueSafe("setup","verbosity",2); - - if( f.levelmin() != f.levelmax() ) - { + + unsigned verbosity = cf_.getValueSafe("setup", "verbosity", 2); + + if (f.levelmin() != f.levelmax()) + { LOGERR("Attempt to run k-space Poisson solver on non unigrid mesh."); throw std::runtime_error("fft_poisson_plugin::solve : k-space method can only be used in unigrid mode (levelmin=levelmax)"); } - - if( verbosity > 0 ) - { + + if (verbosity > 0) + { std::cout << "-------------------------------------------------------------\n"; std::cout << " - Invoking unigrid FFT Poisson solver..." << std::endl; } - - int nx,ny,nz,nzp; + + int nx, ny, nz, nzp; nx = f.get_grid(f.levelmax())->size(0); ny = f.get_grid(f.levelmax())->size(1); nz = f.get_grid(f.levelmax())->size(2); - nzp = 2*(nz/2+1); - - + nzp = 2 * (nz / 2 + 1); + //... copy data .................................................. - fftw_real *data = new fftw_real[(size_t)nx*(size_t)ny*(size_t)nzp]; - fftw_complex *cdata = reinterpret_cast (data); - - #pragma omp parallel for - for( int i=0; i(data); + +#pragma omp parallel for + for (int i = 0; i < nx; ++i) + for (int j = 0; j < ny; ++j) + for (int k = 0; k < nz; ++k) { - size_t idx = (size_t)(i*ny+j)*(size_t)nzp+(size_t)k; - data[idx] = (*f.get_grid(f.levelmax()))(i,j,k); + size_t idx = (size_t)(i * ny + j) * (size_t)nzp + (size_t)k; + data[idx] = (*f.get_grid(f.levelmax()))(i, j, k); } - + //... perform FFT and Poisson solve................................ LOGUSER("Performing forward transform."); #ifdef FFTW3 - #ifdef SINGLE_PRECISION - fftwf_plan - plan = fftwf_plan_dft_r2c_3d( nx, ny, nz, data, cdata, FFTW_ESTIMATE ), - iplan = fftwf_plan_dft_c2r_3d( nx, ny, nz, cdata, data, FFTW_ESTIMATE ); - +#ifdef SINGLE_PRECISION + fftwf_plan + plan = fftwf_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), + iplan = fftwf_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); + fftwf_execute(plan); - #else - fftw_plan - plan = fftw_plan_dft_r2c_3d( nx, ny, nz, data, cdata, FFTW_ESTIMATE ), - iplan = fftw_plan_dft_c2r_3d( nx, ny, nz, cdata, data, FFTW_ESTIMATE ); - - fftw_execute(plan); - #endif - #else - rfftwnd_plan - plan = rfftw3d_create_plan( nx,ny,nz, - FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE), - iplan = rfftw3d_create_plan( nx,ny,nz, - FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); - - - #ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL ); - #else - rfftwnd_one_real_to_complex( plan, data, NULL ); - #endif - + fftw_plan + plan = fftw_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), + iplan = fftw_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); + + fftw_execute(plan); #endif - double kfac = 2.0*M_PI; - double fac = -1.0/(double)((size_t)nx*(size_t)ny*(size_t)nz); - - #pragma omp parallel for - for( int i=0; inx/2) ii-=nx; - int jj = j; if(jj>ny/2) jj-=ny; + int ii = i; + if (ii > nx / 2) + ii -= nx; + int jj = j; + if (jj > ny / 2) + jj -= ny; double ki = (double)ii; double kj = (double)jj; double kk = (double)k; - - double kk2 = kfac*kfac*(ki*ki+kj*kj+kk*kk); - - size_t idx = (size_t)(i*ny+j)*(size_t)(nzp/2)+(size_t)k; - - RE(cdata[idx]) *= -1.0/kk2*fac; - IM(cdata[idx]) *= -1.0/kk2*fac; + + double kk2 = kfac * kfac * (ki * ki + kj * kj + kk * kk); + + size_t idx = (size_t)(i * ny + j) * (size_t)(nzp / 2) + (size_t)k; + + RE(cdata[idx]) *= -1.0 / kk2 * fac; + IM(cdata[idx]) *= -1.0 / kk2 * fac; } RE(cdata[0]) = 0.0; IM(cdata[0]) = 0.0; - + LOGUSER("Performing backward transform."); - + #ifdef FFTW3 - #ifdef SINGLE_PRECISION +#ifdef SINGLE_PRECISION fftwf_execute(iplan); fftwf_destroy_plan(plan); fftwf_destroy_plan(iplan); - #else +#else fftw_execute(iplan); fftw_destroy_plan(plan); fftw_destroy_plan(iplan); - #endif +#endif #else - #ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL ); - #else - rfftwnd_one_complex_to_real( iplan, cdata, NULL ); - #endif - +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), iplan, cdata, NULL); +#else + rfftwnd_one_complex_to_real(iplan, cdata, NULL); +#endif + rfftwnd_destroy_plan(plan); rfftwnd_destroy_plan(iplan); #endif - - - - - //... copy data .......................................... - #pragma omp parallel for - for( int i=0; im_nbnd; - for( int iy=-nb; iysize(0); ny = u.get_grid(u.levelmax())->size(1); nz = u.get_grid(u.levelmax())->size(2); - nzp = 2*(nz/2+1); - + nzp = 2 * (nz / 2 + 1); + //... copy data .................................................. - fftw_real *data = new fftw_real[(size_t)nx*(size_t)ny*(size_t)nzp]; - fftw_complex *cdata = reinterpret_cast (data); - - #pragma omp parallel for - for( int i=0; i(data); + +#pragma omp parallel for + for (int i = 0; i < nx; ++i) + for (int j = 0; j < ny; ++j) + for (int k = 0; k < nz; ++k) { - size_t idx = (size_t)(i*ny+j)*(size_t)nzp+(size_t)k; - data[idx] = (*u.get_grid(u.levelmax()))(i,j,k); + size_t idx = (size_t)(i * ny + j) * (size_t)nzp + (size_t)k; + data[idx] = (*u.get_grid(u.levelmax()))(i, j, k); } - - //... perform FFT and Poisson solve................................ - + + //... perform FFT and Poisson solve................................ + #ifdef FFTW3 - #ifdef SINGLE_PRECISION - fftwf_plan - plan = fftwf_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), - iplan = fftwf_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); - +#ifdef SINGLE_PRECISION + fftwf_plan + plan = fftwf_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), + iplan = fftwf_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); + fftwf_execute(plan); - #else - fftw_plan - plan = fftw_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), - iplan = fftw_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); - - fftw_execute(plan); - #endif #else - rfftwnd_plan - plan = rfftw3d_create_plan( nx,ny,nz, - FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE), - iplan = rfftw3d_create_plan( nx,ny,nz, - FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); - - - #ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL ); - #else - rfftwnd_one_real_to_complex( plan, data, NULL ); - #endif - + fftw_plan + plan = fftw_plan_dft_r2c_3d(nx, ny, nz, data, cdata, FFTW_ESTIMATE), + iplan = fftw_plan_dft_c2r_3d(nx, ny, nz, cdata, data, FFTW_ESTIMATE); + + fftw_execute(plan); #endif - - double fac = -1.0/(double)((size_t)nx*(size_t)ny*(size_t)nz); - double kfac = 2.0*M_PI; - - - - bool do_glass = cf_.getValueSafe("output","glass",false); - bool deconvolve_cic = do_glass | cf_.getValueSafe("output","glass_cicdeconvolve",false); - - if( deconvolve_cic ) - LOGINFO("CIC deconvolution is enabled for kernel!"); - - #pragma omp parallel for - for( int i=0; i("output", "glass", false); + bool deconvolve_cic = do_glass | cf_.getValueSafe("output", "glass_cicdeconvolve", false); + + if (deconvolve_cic) + LOGINFO("CIC deconvolution is enabled for kernel!"); + +#pragma omp parallel for + for (int i = 0; i < nx; ++i) + for (int j = 0; j < ny; ++j) + for (int k = 0; k < nz / 2 + 1; ++k) { - size_t idx = (size_t)(i*ny+j)*(size_t)(nzp/2)+(size_t)k; - int ii = i; if(ii>nx/2) ii-=nx; - int jj = j; if(jj>ny/2) jj-=ny; + size_t idx = (size_t)(i * ny + j) * (size_t)(nzp / 2) + (size_t)k; + int ii = i; + if (ii > nx / 2) + ii -= nx; + int jj = j; + if (jj > ny / 2) + jj -= ny; const double ki = (double)ii; const double kj = (double)jj; const double kk = (double)k; - - const double kkdir[3] = {kfac*ki,kfac*kj,kfac*kk}; + + const double kkdir[3] = {kfac * ki, kfac * kj, kfac * kk}; const double kdir = kkdir[dir]; - + double re = RE(cdata[idx]); double im = IM(cdata[idx]); - - RE(cdata[idx]) = fac*im*kdir; - IM(cdata[idx]) = -fac*re*kdir; - - + + RE(cdata[idx]) = fac * im * kdir; + IM(cdata[idx]) = -fac * re * kdir; + #ifdef FFTW3 - if( deconvolve_cic ) + if (deconvolve_cic) { double dfx, dfy, dfz; - dfx = M_PI*ki/(double)nx; dfx = (i!=0)? sin(dfx)/dfx : 1.0; - dfy = M_PI*kj/(double)ny; dfy = (j!=0)? sin(dfy)/dfy : 1.0; - dfz = M_PI*kk/(double)nz; dfz = (k!=0)? sin(dfz)/dfz : 1.0; - - dfx = 1.0/(dfx*dfy*dfz); dfx = dfx*dfx; + dfx = M_PI * ki / (double)nx; + dfx = (i != 0) ? sin(dfx) / dfx : 1.0; + dfy = M_PI * kj / (double)ny; + dfy = (j != 0) ? sin(dfy) / dfy : 1.0; + dfz = M_PI * kk / (double)nz; + dfz = (k != 0) ? sin(dfz) / dfz : 1.0; + + dfx = 1.0 / (dfx * dfy * dfz); + dfx = dfx * dfx; cdata[idx][0] *= dfx; cdata[idx][1] *= dfx; - } #else - if( deconvolve_cic ) + if (deconvolve_cic) { double dfx, dfy, dfz; - dfx = M_PI*ki/(double)nx; dfx = (i!=0)? sin(dfx)/dfx : 1.0; - dfy = M_PI*kj/(double)ny; dfy = (j!=0)? sin(dfy)/dfy : 1.0; - dfz = M_PI*kk/(double)nz; dfz = (k!=0)? sin(dfz)/dfz : 1.0; - - dfx = 1.0/(dfx*dfy*dfz); dfx = dfx*dfx; + dfx = M_PI * ki / (double)nx; + dfx = (i != 0) ? sin(dfx) / dfx : 1.0; + dfy = M_PI * kj / (double)ny; + dfy = (j != 0) ? sin(dfy) / dfy : 1.0; + dfz = M_PI * kk / (double)nz; + dfz = (k != 0) ? sin(dfz) / dfz : 1.0; + + dfx = 1.0 / (dfx * dfy * dfz); + dfx = dfx * dfx; cdata[idx].re *= dfx; cdata[idx].im *= dfx; } -#endif - +#endif + /*double ktot = sqrt(ii*ii+jj*jj+k*k); if( ktot >= nx/2 )//dir == 0 && i==nx/2 || dir == 1 && j==ny/2 || dir == 2 && k==nz/2 ) { @@ -791,325 +731,295 @@ double fft_poisson_plugin::gradient( int dir, grid_hierarchy& u, grid_hierarchy& cdata[idx].im = 0.0; }*/ } - + RE(cdata[0]) = 0.0; IM(cdata[0]) = 0.0; - + #ifdef FFTW3 - #ifdef SINGLE_PRECISION +#ifdef SINGLE_PRECISION fftwf_execute(iplan); fftwf_destroy_plan(plan); fftwf_destroy_plan(iplan); - #else +#else fftw_execute(iplan); fftw_destroy_plan(plan); fftw_destroy_plan(iplan); - #endif +#endif #else - #ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL ); - #else - rfftwnd_one_complex_to_real( iplan, cdata, NULL ); - #endif - +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), iplan, cdata, NULL); +#else + rfftwnd_one_complex_to_real(iplan, cdata, NULL); +#endif + rfftwnd_destroy_plan(plan); rfftwnd_destroy_plan(iplan); #endif - + //... copy data .......................................... double dmax = 0.0; - for( int i=0; idmax) + size_t idx = ((size_t)i * ny + (size_t)j) * nzp + (size_t)k; + (*Du.get_grid(u.levelmax()))(i, j, k) = data[idx]; + if (fabs(data[idx]) > dmax) dmax = fabs(data[idx]); } delete[] data; - + LOGUSER("Done with k-space gradient.\n"); - + return 0.0; } /**************************************************************************************/ /**************************************************************************************/ - -template -double poisson_hybrid_kernel( int idir, int i, int j, int k, int n ) +template +double poisson_hybrid_kernel(int idir, int i, int j, int k, int n) { return 1.0; } -template<> -inline double poisson_hybrid_kernel<2>(int idir, int i, int j, int k, int n ) +template <> +inline double poisson_hybrid_kernel<2>(int idir, int i, int j, int k, int n) { - if(i==0&&j==0&&k==0) + if (i == 0 && j == 0 && k == 0) return 0.0; - - double - ki(M_PI*(double)i/(double)n), - kj(M_PI*(double)j/(double)n), - kk(M_PI*(double)k/(double)n), - kr(sqrt(ki*ki+kj*kj+kk*kk)); - + + double + ki(M_PI * (double)i / (double)n), + kj(M_PI * (double)j / (double)n), + kk(M_PI * (double)k / (double)n), + kr(sqrt(ki * ki + kj * kj + kk * kk)); + double grad = 1.0, laplace = 1.0; - - if( idir==0 ) + + if (idir == 0) grad = sin(ki); - else if( idir==1 ) + else if (idir == 1) grad = sin(kj); - else + else grad = sin(kk); - - laplace = 2.0*((-cos(ki)+1.0)+(-cos(kj)+1.0)+(-cos(kk)+1.0)); - + + laplace = 2.0 * ((-cos(ki) + 1.0) + (-cos(kj) + 1.0) + (-cos(kk) + 1.0)); + double kgrad = 1.0; - if( idir==0 ) + if (idir == 0) kgrad = ki; - else if( idir ==1) + else if (idir == 1) kgrad = kj; - else if( idir ==2) + else if (idir == 2) kgrad = kk; - - return kgrad/kr/kr-grad/laplace; + + return kgrad / kr / kr - grad / laplace; } -template<> -inline double poisson_hybrid_kernel<4>(int idir, int i, int j, int k, int n ) +template <> +inline double poisson_hybrid_kernel<4>(int idir, int i, int j, int k, int n) { - - if(i==0&&j==0&&k==0) - return 0.0; - double - ki(M_PI*(double)i/(double)n), - kj(M_PI*(double)j/(double)n), - kk(M_PI*(double)k/(double)n), - kr(sqrt(ki*ki+kj*kj+kk*kk)); + if (i == 0 && j == 0 && k == 0) + return 0.0; + + double + ki(M_PI * (double)i / (double)n), + kj(M_PI * (double)j / (double)n), + kk(M_PI * (double)k / (double)n), + kr(sqrt(ki * ki + kj * kj + kk * kk)); double grad = 1.0, laplace = 1.0; - if( idir==0 ) - grad = 0.166666666667*(-sin(2.*ki)+8.*sin(ki)); - else if( idir==1 ) - grad = 0.166666666667*(-sin(2.*kj)+8.*sin(kj)); - else if( idir==2 ) - grad = 0.166666666667*(-sin(2.*kk)+8.*sin(kk)); + if (idir == 0) + grad = 0.166666666667 * (-sin(2. * ki) + 8. * sin(ki)); + else if (idir == 1) + grad = 0.166666666667 * (-sin(2. * kj) + 8. * sin(kj)); + else if (idir == 2) + grad = 0.166666666667 * (-sin(2. * kk) + 8. * sin(kk)); - laplace = 0.1666666667*((cos(2*ki)-16.*cos(ki)+15.) - +(cos(2*kj)-16.*cos(kj)+15.) - +(cos(2*kk)-16.*cos(kk)+15.)); + laplace = 0.1666666667 * ((cos(2 * ki) - 16. * cos(ki) + 15.) + (cos(2 * kj) - 16. * cos(kj) + 15.) + (cos(2 * kk) - 16. * cos(kk) + 15.)); double kgrad = 1.0; - if( idir==0 ) - kgrad = ki; - else if( idir ==1) - kgrad = kj; - else if( idir ==2) - kgrad = kk; + if (idir == 0) + kgrad = ki; + else if (idir == 1) + kgrad = kj; + else if (idir == 2) + kgrad = kk; - return kgrad/kr/kr-grad/laplace; + return kgrad / kr / kr - grad / laplace; } - -template<> -inline double poisson_hybrid_kernel<6>(int idir, int i, int j, int k, int n ) -{ - double - ki(M_PI*(double)i/(double)n), - kj(M_PI*(double)j/(double)n), - kk(M_PI*(double)k/(double)n), - kr(sqrt(ki*ki+kj*kj+kk*kk)); - if(i==0&&j==0&&k==0) +template <> +inline double poisson_hybrid_kernel<6>(int idir, int i, int j, int k, int n) +{ + double + ki(M_PI * (double)i / (double)n), + kj(M_PI * (double)j / (double)n), + kk(M_PI * (double)k / (double)n), + kr(sqrt(ki * ki + kj * kj + kk * kk)); + + if (i == 0 && j == 0 && k == 0) return 0.0; double grad = 1.0, laplace = 1.0; - if( idir==0 ) - grad = 0.0333333333333*(sin(3.*ki)-9.*sin(2.*ki)+45.*sin(ki)); - else if( idir==1 ) - grad = 0.0333333333333*(sin(3.*kj)-9.*sin(2.*kj)+45.*sin(kj)); - else if( idir==2 ) - grad = 0.0333333333333*(sin(3.*kk)-9.*sin(2.*kk)+45.*sin(kk)); + if (idir == 0) + grad = 0.0333333333333 * (sin(3. * ki) - 9. * sin(2. * ki) + 45. * sin(ki)); + else if (idir == 1) + grad = 0.0333333333333 * (sin(3. * kj) - 9. * sin(2. * kj) + 45. * sin(kj)); + else if (idir == 2) + grad = 0.0333333333333 * (sin(3. * kk) - 9. * sin(2. * kk) + 45. * sin(kk)); - laplace = 0.01111111111111*( - (-2.*cos(3.0*ki)+27.*cos(2.*ki)-270.*cos(ki)+245.) - +(-2.*cos(3.0*kj)+27.*cos(2.*kj)-270.*cos(kj)+245.) - +(-2.*cos(3.0*kk)+27.*cos(2.*kk)-270.*cos(kk)+245.)); + laplace = 0.01111111111111 * ((-2. * cos(3.0 * ki) + 27. * cos(2. * ki) - 270. * cos(ki) + 245.) + (-2. * cos(3.0 * kj) + 27. * cos(2. * kj) - 270. * cos(kj) + 245.) + (-2. * cos(3.0 * kk) + 27. * cos(2. * kk) - 270. * cos(kk) + 245.)); double kgrad = 1.0; - if( idir==0 ) + if (idir == 0) kgrad = ki; - else if( idir ==1) + else if (idir == 1) kgrad = kj; - else if( idir ==2) + else if (idir == 2) kgrad = kk; -// if( i*i+j*j+k*k >= n*n ) -// kgrad = 0.0; - - return kgrad/kr/kr-grad/laplace; + // if( i*i+j*j+k*k >= n*n ) + // kgrad = 0.0; + + return kgrad / kr / kr - grad / laplace; } - - -template -void do_poisson_hybrid( fftw_real* data, int idir, int nxp, int nyp, int nzp, bool periodic, bool deconvolve_cic ) + +template +void do_poisson_hybrid(fftw_real *data, int idir, int nxp, int nyp, int nzp, bool periodic, bool deconvolve_cic) { - double fftnorm = 1.0/((double)nxp*(double)nyp*(double)nzp); - - fftw_complex *cdata = reinterpret_cast(data); - - if( deconvolve_cic ) - LOGINFO("CIC deconvolution step is enabled."); + double fftnorm = 1.0 / ((double)nxp * (double)nyp * (double)nzp); + + fftw_complex *cdata = reinterpret_cast(data); + + if (deconvolve_cic) + LOGINFO("CIC deconvolution step is enabled."); #ifdef FFTW3 - #ifdef SINGLE_PRECISION +#ifdef SINGLE_PRECISION fftwf_plan iplan, plan; - plan = fftwf_plan_dft_r2c_3d(nxp, nyp, nzp, data, cdata, FFTW_ESTIMATE); + plan = fftwf_plan_dft_r2c_3d(nxp, nyp, nzp, data, cdata, FFTW_ESTIMATE); iplan = fftwf_plan_dft_c2r_3d(nxp, nyp, nzp, cdata, data, FFTW_ESTIMATE); fftwf_execute(plan); - #else +#else fftw_plan iplan, plan; - plan = fftw_plan_dft_r2c_3d(nxp, nyp, nzp, data, cdata, FFTW_ESTIMATE); + plan = fftw_plan_dft_r2c_3d(nxp, nyp, nzp, data, cdata, FFTW_ESTIMATE); iplan = fftw_plan_dft_c2r_3d(nxp, nyp, nzp, cdata, data, FFTW_ESTIMATE); fftw_execute(plan); - #endif -#else - rfftwnd_plan iplan, plan; - - plan = rfftw3d_create_plan( nxp, nyp, nzp, - FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE); - - iplan = rfftw3d_create_plan( nxp, nyp, nzp, - FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); - - #ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), plan, data, NULL ); - #else - rfftwnd_one_real_to_complex( plan, data, NULL ); - #endif #endif - - long double ksum = 0.0; - size_t kcount = 0; - - #pragma omp parallel for reduction(+:ksum,kcount) - for( int i=0; i nxp/2 ) ki-=nxp; - if( kj > nyp/2 ) kj-=nyp; - - //... apply hybrid correction - double dk = poisson_hybrid_kernel(idir, ki, kj, k, nxp/2 ); - //RE(cdata[ii]) -= ksum; - - fftw_real re = RE(cdata[ii]), im = IM(cdata[ii]); - - RE(cdata[ii]) = -im*dk*fftnorm; - IM(cdata[ii]) = re*dk*fftnorm; +#else + rfftwnd_plan iplan, plan; - if( deconvolve_cic ) + plan = rfftw3d_create_plan(nxp, nyp, nzp, + FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); + + iplan = rfftw3d_create_plan(nxp, nyp, nzp, + FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); + +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), plan, data, NULL); +#else + rfftwnd_one_real_to_complex(plan, data, NULL); +#endif +#endif + +#pragma omp parallel for + for (int i = 0; i < nxp; ++i) + for (int j = 0; j < nyp; ++j) + for (int k = 0; k < nzp / 2 + 1; ++k) + { + + size_t ii = (size_t)(i * nyp + j) * (size_t)(nzp / 2 + 1) + (size_t)k; + + int ki(i), kj(j), kk(k); + if (ki > nxp / 2) + ki -= nxp; + if (kj > nyp / 2) + kj -= nyp; + + //... apply hybrid correction + double dk = poisson_hybrid_kernel(idir, ki, kj, k, nxp / 2); + + fftw_real re = RE(cdata[ii]), im = IM(cdata[ii]); + + RE(cdata[ii]) = -im * dk * fftnorm; + IM(cdata[ii]) = re * dk * fftnorm; + + if (deconvolve_cic) { double dfx, dfy, dfz; - dfx = M_PI*ki/(double)nxp; dfx = (i!=0)? sin(dfx)/dfx : 1.0; - dfy = M_PI*kj/(double)nyp; dfy = (j!=0)? sin(dfy)/dfy : 1.0; - dfz = M_PI*kk/(double)nzp; dfz = (k!=0)? sin(dfz)/dfz : 1.0; - - dfx = 1.0/(dfx*dfy*dfz); dfx = dfx*dfx; + dfx = M_PI * ki / (double)nxp; + dfx = (i != 0) ? sin(dfx) / dfx : 1.0; + dfy = M_PI * kj / (double)nyp; + dfy = (j != 0) ? sin(dfy) / dfy : 1.0; + dfz = M_PI * kk / (double)nzp; + dfz = (k != 0) ? sin(dfz) / dfz : 1.0; + + dfx = 1.0 / (dfx * dfy * dfz); + dfx = dfx * dfx; RE(cdata[ii]) *= dfx; IM(cdata[ii]) *= dfx; - } - - - //RE(cdata[ii]) += ksum*fftnorm; - - //if( i==nxp/2||j==nyp/2||k==nzp/2 ) - //{ - // RE(cdata[ii]) = 0.0; - // IM(cdata[ii]) = 0.0; - //} + if ((idir == 0 && i == nxp / 2) || (idir == 1 && j == nyp / 2) || (idir == 2 && k == nzp / 2)) + { + RE(cdata[ii]) = 0.0; + IM(cdata[ii]) = 0.0; + } } - + RE(cdata[0]) = 0.0; IM(cdata[0]) = 0.0; - + #ifdef FFTW3 - #ifdef SINGLE_PRECISION +#ifdef SINGLE_PRECISION fftwf_execute(iplan); fftwf_destroy_plan(plan); fftwf_destroy_plan(iplan); - #else +#else fftw_execute(iplan); fftw_destroy_plan(plan); fftw_destroy_plan(iplan); - #endif +#endif +#else +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), iplan, cdata, NULL); #else - #ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), iplan, cdata, NULL); - #else rfftwnd_one_complex_to_real(iplan, cdata, NULL); - #endif - +#endif + rfftwnd_destroy_plan(plan); rfftwnd_destroy_plan(iplan); #endif - } - -template< typename T > -void poisson_hybrid( T& f, int idir, int order, bool periodic, bool deconvolve_cic ) + +template +void poisson_hybrid(T &f, int idir, int order, bool periodic, bool deconvolve_cic) { - int nx=f.size(0), ny=f.size(1), nz=f.size(2), nxp, nyp, nzp; - fftw_real *data; - int xo=0,yo=0,zo=0; - int nmax = std::max(nx,std::max(ny,nz)); - + int nx = f.size(0), ny = f.size(1), nz = f.size(2), nxp, nyp, nzp; + fftw_real *data; + int xo = 0, yo = 0, zo = 0; + int nmax = std::max(nx, std::max(ny, nz)); + LOGUSER("Entering hybrid Poisson solver..."); - - if(!periodic) + + const int boundary = 32; + + if (!periodic) { - nxp = 2*nmax; - nyp = 2*nmax; - nzp = 2*nmax; - xo = nmax/2; - yo = nmax/2; - zo = nmax/2; + nxp = nmax + 2 * boundary; // 2*nmax; + nyp = nmax + 2 * boundary; // 2*nmax; + nzp = nmax + 2 * boundary; // 2*nmax; + xo = boundary; // nmax/2; + yo = boundary; // nmax/2; + zo = boundary; // nmax/2; } else { @@ -1117,77 +1027,77 @@ void poisson_hybrid( T& f, int idir, int order, bool periodic, bool deconvolve_c nyp = nmax; nzp = nmax; } - - - data = new fftw_real[(size_t)nxp*(size_t)nyp*(size_t)(nzp+2)]; - - if(idir==0) - std::cout << " - Performing hybrid Poisson step... (" << nxp << ", " << nyp << ", " << nzp << ")\n"; - - //size_t N = (size_t)nxp*(size_t)nyp*2*((size_t)nzp/2+1); - - //#pragma omp parallel for - //for( size_t i=0; i(data, idir, nxp, nyp, nzp, periodic, deconvolve_cic ); - break; - case 4: - do_poisson_hybrid<4>(data, idir, nxp, nyp, nzp, periodic, deconvolve_cic ); - break; - case 6: - do_poisson_hybrid<6>(data, idir, nxp, nyp, nzp, periodic, deconvolve_cic ); - break; - default: - std::cerr << " - ERROR: invalid operator order specified in deconvolution."; - LOGERR("Invalid operator order specified in deconvolution."); - break; + + switch (order) + { + case 2: + do_poisson_hybrid<2>(data, idir, nxp, nyp, nzp, periodic, deconvolve_cic); + break; + case 4: + do_poisson_hybrid<4>(data, idir, nxp, nyp, nzp, periodic, deconvolve_cic); + break; + case 6: + do_poisson_hybrid<6>(data, idir, nxp, nyp, nzp, periodic, deconvolve_cic); + break; + default: + std::cerr << " - ERROR: invalid operator order specified in deconvolution."; + LOGERR("Invalid operator order specified in deconvolution."); + break; } - + LOGUSER("Copying hybrid correction factor..."); - - #pragma omp parallel for - for( int i=0; i >( MeshvarBnd& f, int idir, int order, bool periodic, bool deconvolve_cic ); -template void poisson_hybrid< MeshvarBnd >( MeshvarBnd& f, int idir, int order, bool periodic, bool deconvolve_cic ); +template void poisson_hybrid>(MeshvarBnd &f, int idir, int order, bool periodic, bool deconvolve_cic); +template void poisson_hybrid>(MeshvarBnd &f, int idir, int order, bool periodic, bool deconvolve_cic); -namespace{ +namespace +{ poisson_plugin_creator_concrete multigrid_poisson_creator("mg_poisson"); poisson_plugin_creator_concrete fft_poisson_creator("fft_poisson"); } diff --git a/src/random.cc b/src/random.cc index d614683..f8564b6 100644 --- a/src/random.cc +++ b/src/random.cc @@ -103,7 +103,7 @@ void rapid_proto_ngenic_rng(size_t res, long baseseed, random_numbers &R) double fnorm = 1. / sqrt(res * res * res); -#warning need to check for race conditions below +// #warning need to check for race conditions below //#pragma omp parallel for for (size_t i = 0; i < res; i++) { @@ -819,9 +819,7 @@ random_numbers::random_numbers(random_numbers &rc, unsigned cubesize, long //if( isolated ) phasefac *= 1.5; // embedding of coarse white noise by fourier interpolation - -#if 1 -#pragma omp parallel for + #pragma omp parallel for for (int i = 0; i < (int)nxc; i++) for (int j = 0; j < (int)nyc; j++) for (int k = 0; k < (int)nzc / 2 + 1; k++) @@ -859,120 +857,11 @@ random_numbers::random_numbers(random_numbers &rc, unsigned cubesize, long } else { - //RE(cfine[qf]) = val.real(); - //IM(cfine[qf]) = 0.0; + // RE(cfine[qf]) = val.real(); + // IM(cfine[qf]) = 0.0; } } -#else - - // 0 0 -#pragma omp parallel for - for (int i = 0; i < (int)nxc / 2 + 1; i++) - for (int j = 0; j < (int)nyc / 2 + 1; j++) - for (int k = 0; k < (int)nzc / 2 + 1; k++) - { - int ii(i), jj(j), kk(k); - size_t qc, qf; - qc = ((size_t)i * (size_t)nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * (size_t)ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - double phase = phasefac * (kx / nxc + ky / nyc + kz / nzc) * M_PI; - std::complex val_phas(cos(phase), sin(phase)); - - std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); - val *= sqrt8 * val_phas; - - RE(cfine[qf]) = val.real(); - IM(cfine[qf]) = val.imag(); - - //if( k==0 & (i==(int)nxc/2 || j==(int)nyc/2) ) - // IM(cfine[qf]) *= -1.0; - } - // 1 0 -#pragma omp parallel for - for (int i = nxc / 2; i < (int)nxc; i++) - for (int j = 0; j < (int)nyc / 2 + 1; j++) - for (int k = 0; k < (int)nzc / 2 + 1; k++) - { - int ii(i + nx / 2), jj(j), kk(k); - size_t qc, qf; - qc = ((size_t)i * (size_t)nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * (size_t)ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - double phase = phasefac * (kx / nxc + ky / nyc + kz / nzc) * M_PI; - std::complex val_phas(cos(phase), sin(phase)); - - std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); - val *= sqrt8 * val_phas; - - RE(cfine[qf]) = val.real(); - IM(cfine[qf]) = val.imag(); - - //if( k==0 & (i==(int)nxc/2 || j==(int)nyc/2) ) - //IM(cfine[qf]) *= -1.0; - } - // 0 1 -#pragma omp parallel for - for (int i = 0; i < (int)nxc / 2 + 1; i++) - for (int j = nyc / 2; j < (int)nyc; j++) - for (int k = 0; k < (int)nzc / 2 + 1; k++) - { - int ii(i), jj(j + ny / 2), kk(k); - size_t qc, qf; - qc = ((size_t)i * (size_t)nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * (size_t)ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - double phase = phasefac * (kx / nxc + ky / nyc + kz / nzc) * M_PI; - std::complex val_phas(cos(phase), sin(phase)); - - std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); - val *= sqrt8 * val_phas; - - RE(cfine[qf]) = val.real(); - IM(cfine[qf]) = val.imag(); - - //if( k==0 && (i==(int)nxc/2 || j==(int)nyc/2) ) - // IM(cfine[qf]) *= -1.0; - } - - // 1 1 -#pragma omp parallel for - for (int i = nxc / 2; i < (int)nxc; i++) - for (int j = nyc / 2; j < (int)nyc; j++) - for (int k = 0; k < (int)nzc / 2 + 1; k++) - { - int ii(i + nx / 2), jj(j + ny / 2), kk(k); - size_t qc, qf; - qc = ((size_t)i * (size_t)nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * (size_t)ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - double phase = phasefac * (kx / nxc + ky / nyc + kz / nzc) * M_PI; - std::complex val_phas(cos(phase), sin(phase)); - - std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); - val *= sqrt8 * val_phas; - - RE(cfine[qf]) = val.real(); - IM(cfine[qf]) = val.imag(); - } -#endif delete[] rcoarse; @@ -1711,12 +1600,9 @@ void random_number_generator::compute_random_numbers(void) lx[0] = prefh_->size(ilevel, 0) + 2*margin[0]; lx[1] = prefh_->size(ilevel, 1) + 2*margin[1]; lx[2] = prefh_->size(ilevel, 2) + 2*margin[2]; - x0[0] = prefh_->offset_abs(ilevel, 0) - lfac * shift[0] - margin[0];//lx[0] / 4; - x0[1] = prefh_->offset_abs(ilevel, 1) - lfac * shift[1] - margin[1];//lx[1] / 4; - x0[2] = prefh_->offset_abs(ilevel, 2) - lfac * shift[2] - margin[2];//lx[2] / 4; - - LOGINFO("margin=%ld",prefh_->get_margin()); - LOGINFO("x0=(%ld,%ld,%ld) lx=(%ld,%ld,%ld)",x0[0],x0[1],x0[2],lx[0],lx[1],lx[2]); + x0[0] = prefh_->offset_abs(ilevel, 0) - lfac * shift[0] - margin[0]; + x0[1] = prefh_->offset_abs(ilevel, 1) - lfac * shift[1] - margin[1]; + x0[2] = prefh_->offset_abs(ilevel, 2) - lfac * shift[2] - margin[2]; if (randc[ilevel] == NULL) randc[ilevel] = new rng(*randc[ilevel - 1], ran_cube_size_, rngseeds_[ilevel], kavg, ilevel == levelmin_ + 1, x0, lx);