diff --git a/cosmology.cc b/cosmology.cc index 3da9ed7..ca0735c 100644 --- a/cosmology.cc +++ b/cosmology.cc @@ -494,97 +494,90 @@ void compute_2LPT_source_FFT( config_file& cf_, const grid_hierarchy& u, grid_hi delete[] data_33; } - void compute_2LPT_source( const grid_hierarchy& u, grid_hierarchy& fnew, unsigned order ) { fnew = u; + fnew.zero(); for( unsigned ilevel=u.levelmin(); ilevel<=u.levelmax(); ++ilevel ) { double h = pow(2.0,ilevel), h2 = h*h, h2_4 = 0.25*h2; meshvar_bnd *pvar = fnew.get_grid(ilevel); - + if ( order == 2 ) { #pragma omp parallel for for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - { - double D[3][3]; + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + { + double D[3][3]; + + D[0][0] = (ACC(ix-2,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+2,iy,iz)) * h2_4; + D[1][1] = (ACC(ix,iy-2,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+2,iz)) * h2_4; + D[2][2] = (ACC(ix,iy,iz-2)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+2)) * h2_4; + - D[0][0] = (ACC(ix-1,iy,iz)-2.0*ACC(ix,iy,iz)+ACC(ix+1,iy,iz)) * h2; - D[1][1] = (ACC(ix,iy-1,iz)-2.0*ACC(ix,iy,iz)+ACC(ix,iy+1,iz)) * h2; - D[2][2] = (ACC(ix,iy,iz-1)-2.0*ACC(ix,iy,iz)+ACC(ix,iy,iz+1)) * h2; - D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4; - D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4; - D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4; - - - (*pvar)(ix,iy,iz) = ( D[0][0]*D[1][1] - SQR( D[0][1] ) - + D[0][0]*D[2][2] - SQR( D[0][2] ) - + D[1][1]*D[2][2] - SQR( D[1][2] )); + D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4; + D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4; + D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4; + + (*pvar)(ix,iy,iz) = ( D[0][0]*D[1][1] - D[0][1]*D[0][1] + + D[0][0]*D[2][2] - D[0][2]*D[0][2] + + D[1][1]*D[2][2] - D[1][2]*D[1][2] ); } } - else if ( order == 4 ) + else if ( order == 4 || order == 6 ) { - #pragma omp parallel for + double h2_144 = h2 / 144.; + #pragma omp parallel for for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - { - double D[3][3]; + for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) + for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) + { + //.. this is actually 8th order accurate + + double D[3][3]; + + D[0][0] = ((ACC(ix-4,iy,iz)+ACC(ix+4,iy,iz)) + - 16. * (ACC(ix-3,iy,iz)+ACC(ix+3,iy,iz)) + + 64. * (ACC(ix-2,iy,iz)+ACC(ix+2,iy,iz)) + + 16. * (ACC(ix-1,iy,iz)+ACC(ix+1,iy,iz)) + - 130.* ACC(ix,iy,iz) ) * h2_144; + + D[1][1] = ((ACC(ix,iy-4,iz)+ACC(ix,iy+4,iz)) + - 16. * (ACC(ix,iy-3,iz)+ACC(ix,iy+3,iz)) + + 64. * (ACC(ix,iy-2,iz)+ACC(ix,iy+2,iz)) + + 16. * (ACC(ix,iy-1,iz)+ACC(ix,iy+1,iz)) + - 130.* ACC(ix,iy,iz) ) * h2_144; + + D[2][2] = ((ACC(ix,iy,iz-4)+ACC(ix,iy,iz+4)) + - 16. * (ACC(ix,iy,iz-3)+ACC(ix,iy,iz+3)) + + 64. * (ACC(ix,iy,iz-2)+ACC(ix,iy,iz+2)) + + 16. * (ACC(ix,iy,iz-1)+ACC(ix,iy,iz+1)) + - 130.* ACC(ix,iy,iz) ) * h2_144; + + + D[0][1] = D[1][0] = (64.*(ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz)) + -8.*(ACC(ix-2,iy-1,iz)-ACC(ix+2,iy-1,iz)-ACC(ix-2,iy+1,iz)+ACC(ix+2,iy+1,iz) + + ACC(ix-1,iy-2,iz)-ACC(ix-1,iy+2,iz)-ACC(ix+1,iy-2,iz)+ACC(ix+1,iy+2,iz)) + +1.*(ACC(ix-2,iy-2,iz)-ACC(ix-2,iy+2,iz)-ACC(ix+2,iy-2,iz)+ACC(ix+2,iy+2,iz)))*h2_144; + D[0][2] = D[2][0] = (64.*(ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1)) + -8.*(ACC(ix-2,iy,iz-1)-ACC(ix+2,iy,iz-1)-ACC(ix-2,iy,iz+1)+ACC(ix+2,iy,iz+1) + + ACC(ix-1,iy,iz-2)-ACC(ix-1,iy,iz+2)-ACC(ix+1,iy,iz-2)+ACC(ix+1,iy,iz+2)) + +1.*(ACC(ix-2,iy,iz-2)-ACC(ix-2,iy,iz+2)-ACC(ix+2,iy,iz-2)+ACC(ix+2,iy,iz+2)))*h2_144; + D[1][2] = D[2][1] = (64.*(ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1)) + -8.*(ACC(ix,iy-2,iz-1)-ACC(ix,iy+2,iz-1)-ACC(ix,iy-2,iz+1)+ACC(ix,iy+2,iz+1) + + ACC(ix,iy-1,iz-2)-ACC(ix,iy-1,iz+2)-ACC(ix,iy+1,iz-2)+ACC(ix,iy+1,iz+2)) + +1.*(ACC(ix,iy-2,iz-2)-ACC(ix,iy-2,iz+2)-ACC(ix,iy+2,iz-2)+ACC(ix,iy+2,iz+2)))*h2_144; + + (*pvar)(ix,iy,iz) = ( D[0][0]*D[1][1] - SQR( D[0][1] ) + + D[0][0]*D[2][2] - SQR( D[0][2] ) + + D[1][1]*D[2][2] - SQR( D[1][2] ) ); - D[0][0] = (-ACC(ix-2,iy,iz)+16.*ACC(ix-1,iy,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix+1,iy,iz)-ACC(ix+2,iy,iz)) * h2/12.0; - D[1][1] = (-ACC(ix,iy-2,iz)+16.*ACC(ix,iy-1,iz)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy+1,iz)-ACC(ix,iy+2,iz)) * h2/12.0; - D[2][2] = (-ACC(ix,iy,iz-2)+16.*ACC(ix,iy,iz-1)-30.0*ACC(ix,iy,iz)+16.*ACC(ix,iy,iz+1)-ACC(ix,iy,iz+2)) * h2/12.0; - D[0][1] = D[1][0] = (ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz))*h2_4; - D[0][2] = D[2][0] = (ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1))*h2_4; - D[1][2] = D[2][1] = (ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1))*h2_4; - - - (*pvar)(ix,iy,iz) = ( D[0][0]*D[1][1] - SQR( D[0][1] ) - + D[0][0]*D[2][2] - SQR( D[0][2] ) - + D[1][1]*D[2][2] - SQR( D[1][2] )); - - } - } - else if ( order == 6 ) - { - h2_4/=36.; - h2/=180.; -#pragma omp parallel for - for( int ix = 0; ix < (int)(*u.get_grid(ilevel)).size(0); ++ix ) - for( int iy = 0; iy < (int)(*u.get_grid(ilevel)).size(1); ++iy ) - for( int iz = 0; iz < (int)(*u.get_grid(ilevel)).size(2); ++iz ) - { - double D[3][3]; - - D[0][0] = (2.*ACC(ix-3,iy,iz)-27.*ACC(ix-2,iy,iz)+270.*ACC(ix-1,iy,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix+1,iy,iz)-27.*ACC(ix+2,iy,iz)+2.*ACC(ix+3,iy,iz)) * h2; - D[1][1] = (2.*ACC(ix,iy-3,iz)-27.*ACC(ix,iy-2,iz)+270.*ACC(ix,iy-1,iz)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy+1,iz)-27.*ACC(ix,iy+2,iz)+2.*ACC(ix,iy+3,iz)) * h2; - D[2][2] = (2.*ACC(ix,iy,iz-3)-27.*ACC(ix,iy,iz-2)+270.*ACC(ix,iy,iz-1)-490.0*ACC(ix,iy,iz)+270.*ACC(ix,iy,iz+1)-27.*ACC(ix,iy,iz+2)+2.*ACC(ix,iy,iz+3)) * h2; - - //.. this is actually 8th order accurate - D[0][1] = D[1][0] = (64.*(ACC(ix-1,iy-1,iz)-ACC(ix-1,iy+1,iz)-ACC(ix+1,iy-1,iz)+ACC(ix+1,iy+1,iz)) - -8.*(ACC(ix-2,iy-1,iz)-ACC(ix+2,iy-1,iz)-ACC(ix-2,iy+1,iz)+ACC(ix+2,iy+1,iz) - + ACC(ix-1,iy-2,iz)-ACC(ix-1,iy+2,iz)-ACC(ix+1,iy-2,iz)+ACC(ix+1,iy+2,iz)) - +1.*(ACC(ix-2,iy-2,iz)-ACC(ix-2,iy+2,iz)-ACC(ix+2,iy-2,iz)+ACC(ix+2,iy+2,iz)))*h2_4; - D[0][2] = D[2][0] = (64.*(ACC(ix-1,iy,iz-1)-ACC(ix-1,iy,iz+1)-ACC(ix+1,iy,iz-1)+ACC(ix+1,iy,iz+1)) - -8.*(ACC(ix-2,iy,iz-1)-ACC(ix+2,iy,iz-1)-ACC(ix-2,iy,iz+1)+ACC(ix+2,iy,iz+1) - + ACC(ix-1,iy,iz-2)-ACC(ix-1,iy,iz+2)-ACC(ix+1,iy,iz-2)+ACC(ix+1,iy,iz+2)) - +1.*(ACC(ix-2,iy,iz-2)-ACC(ix-2,iy,iz+2)-ACC(ix+2,iy,iz-2)+ACC(ix+2,iy,iz+2)))*h2_4; - D[1][2] = D[2][1] = (64.*(ACC(ix,iy-1,iz-1)-ACC(ix,iy-1,iz+1)-ACC(ix,iy+1,iz-1)+ACC(ix,iy+1,iz+1)) - -8.*(ACC(ix,iy-2,iz-1)-ACC(ix,iy+2,iz-1)-ACC(ix,iy-2,iz+1)+ACC(ix,iy+2,iz+1) - + ACC(ix,iy-1,iz-2)-ACC(ix,iy-1,iz+2)-ACC(ix,iy+1,iz-2)+ACC(ix,iy+1,iz+2)) - +1.*(ACC(ix,iy-2,iz-2)-ACC(ix,iy-2,iz+2)-ACC(ix,iy+2,iz-2)+ACC(ix,iy+2,iz+2)))*h2_4; - - (*pvar)(ix,iy,iz) = ( D[0][0]*D[1][1] - SQR( D[0][1] ) - + D[0][0]*D[2][2] - SQR( D[0][2] ) - + D[1][1]*D[2][2] - SQR( D[1][2] ) ); - - } + } } @@ -594,11 +587,10 @@ void compute_2LPT_source( const grid_hierarchy& u, grid_hierarchy& fnew, unsigne } - - //.. subtract global mean so the multi-grid poisson solver behaves well + //.. subtract global mean so the multi-grid poisson solver behaves well for( int i=fnew.levelmax(); i>(int)fnew.levelmin(); --i ) - mg_straight().restrict( (*fnew.get_grid(i)), (*fnew.get_grid(i-1)) ); + mg_straight().restrict( (*fnew.get_grid(i)), (*fnew.get_grid(i-1)) ); long double sum = 0.0; int nx,ny,nz; @@ -608,9 +600,9 @@ void compute_2LPT_source( const grid_hierarchy& u, grid_hierarchy& fnew, unsigne nz = fnew.get_grid(fnew.levelmin())->size(2); for( int ix=0; ixsize(2); for( int ix=0; ix( "poisson", "kspace", false ); - bool kspace2LPT = kspace; + bool kspace2LPT = kspace; bool decic_DM = cf.getValueSafe( "output", "glass_cicdeconvolve", false ); bool decic_baryons = cf.getValueSafe( "output", "glass_cicdeconvolve", false ) & bsph; @@ -998,9 +998,11 @@ int main (int argc, const char * argv[]) LOGUSER("computing term using FFT"); compute_2LPT_source_FFT(cf, u1, f2LPT); } - LOGINFO("Solving 2LPT Poisson equation"); + + LOGINFO("Solving 2LPT Poisson equation"); u2LPT = u1; u2LPT.zero(); err = the_poisson_solver->solve(f2LPT, u2LPT); + //... if doing the hybrid step, we need a combined source term if( bdefd )