mirror of
https://github.com/cosmo-sims/monofonIC.git
synced 2024-09-19 17:03:45 +02:00
Added a transpose step at the bottom of pan_mpi_routines.c.
Fixed an error when fdim !=1 - not currently relevant to this version, but will allow the code to use - say 1/8th as much memory for storing the Panphasia coefficients - at the cost of less accurate reproduction of the phases close to the Nyquist frequency of the Fourier grid.
This commit is contained in:
parent
316b187166
commit
c13fdc0572
2 changed files with 95 additions and 49 deletions
9
external/panphasia_ho/PAN_FFTW3.h
vendored
9
external/panphasia_ho/PAN_FFTW3.h
vendored
|
@ -8,9 +8,6 @@
|
|||
#endif
|
||||
|
||||
|
||||
|
||||
#define FOURIER_DOUBLE
|
||||
|
||||
#ifdef FOURIER_DOUBLE
|
||||
#define FFTW_REAL double
|
||||
#define FFTW_PLAN fftw_plan
|
||||
|
@ -24,6 +21,9 @@
|
|||
#define FFTW_FREE fftw_free
|
||||
#define FFTW_ALLOC_COMPLEX fftw_alloc_complex
|
||||
#define FFTW_MPI_LOCAL_SIZE_MANY fftw_mpi_local_size_many
|
||||
#define FFTW_MPI_LOCAL_SIZE_MANY_TRANSPOSED fftw_mpi_local_size_many_transposed
|
||||
#define FFTW_MPI_PLAN_MANY_TRANSPOSE fftw_mpi_plan_many_transpose
|
||||
#define FFTW_MPI_EXECUTE_R2R fftw_mpi_execute_r2r
|
||||
#define FFTW_PLAN_MANY_DFT fftw_plan_many_dft
|
||||
#define FFTW_MPI_LOCAL_SIZE_3D fftw_mpi_local_size_3d
|
||||
#define FFTW_MPI_PLAN_MANY_DTF fftw_mpi_plan_many_dft
|
||||
|
@ -43,6 +43,9 @@
|
|||
#define FFTW_FREE fftwf_free
|
||||
#define FFTW_ALLOC_COMPLEX fftwf_alloc_complex
|
||||
#define FFTW_MPI_LOCAL_SIZE_MANY fftwf_mpi_local_size_many
|
||||
#define FFTW_MPI_LOCAL_SIZE_MANY_TRANSPOSED fftwf_mpi_local_size_many_transposed
|
||||
#define FFTW_MPI_PLAN_MANY_TRANSPOSE fftwf_mpi_plan_many_transpose
|
||||
#define FFTW_MPI_EXECUTE_R2R fftwf_mpi_execute_r2r
|
||||
#define FFTW_PLAN_MANY_DFT fftwf_plan_many_dft
|
||||
#define FFTW_MPI_LOCAL_SIZE_3D fftwf_mpi_local_size_3d
|
||||
#define FFTW_MPI_PLAN_MANY_DTF fftwf_mpi_plan_many_dft
|
||||
|
|
135
external/panphasia_ho/pan_mpi_routines.c
vendored
135
external/panphasia_ho/pan_mpi_routines.c
vendored
|
@ -45,7 +45,7 @@ int flag_output_mode=2;
|
|||
int error;
|
||||
ptrdiff_t size_to_alloc_fourier;
|
||||
ptrdiff_t size_to_alloc_pan;
|
||||
ptrdiff_t local_n0_fourier_xoffset;
|
||||
ptrdiff_t local_fourier_x_start, local_fourier_x_end;
|
||||
FFTW_PLAN output_coeff_forward_plan;
|
||||
|
||||
ptrdiff_t N0_pan_grid = descriptor_base_size<<relative_level;
|
||||
|
@ -66,9 +66,9 @@ if (pmax>descriptor_order) return(100020);
|
|||
|
||||
for (size_t i=0; i<Nbasis; i++) copy_list[i]=i;
|
||||
|
||||
printf("Dimensions of FT (%td,%td,%td)\n",N0_fourier_grid,N0_fourier_grid,N0_fourier_grid);
|
||||
printf("Dimensions of PG (%td,%td,%td)\n",N0_pan_grid,N0_pan_grid,N0_pan_grid);
|
||||
printf("local_no %td local_0_start_fourier %td\n",local_n0_fourier_return, local_0_start_fourier_return);
|
||||
//printf("Dimensions of FT (%td,%td,%td)\n",N0_fourier_grid,N0_fourier_grid,N0_fourier_grid);
|
||||
//printf("Dimensions of PG (%td,%td,%td)\n",N0_pan_grid,N0_pan_grid,N0_pan_grid);
|
||||
//printf("local_no %td local_0_start_fourier %td\n",local_n0_fourier_return, local_0_start_fourier_return);
|
||||
|
||||
// Compute 1-D Spherical Bessel coefficients for each order //////////////////
|
||||
// These are needed for the convolutions below //////////////////
|
||||
|
@ -84,7 +84,7 @@ if (sph_bessel_coeff==NULL) return(100030);
|
|||
|
||||
compute_sph_bessel_coeffs(nfft_dim, pmax, n4dimen, fdim, sph_bessel_coeff);
|
||||
|
||||
printf("Reached here! ndimen4 %ld\n",n4dimen);
|
||||
//printf("Reached here! ndimen4 %ld\n",n4dimen);
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
@ -119,35 +119,39 @@ if (local_n0_fourier!=local_n0_fourier_return){
|
|||
return(100033);
|
||||
};
|
||||
|
||||
local_0_start_pan = local_0_start_fourier/fdim;
|
||||
local_n0_pan = (local_0_start_fourier + local_n0_fourier)/fdim - local_0_start_pan;
|
||||
|
||||
ptrdiff_t abs_fourier_x_start, abs_fourier_x_end;
|
||||
|
||||
const ptrdiff_t ndimens_alloc_pan[] = {N0_pan_grid, N0_pan_grid, N0_pan_grid+2}; // Allocated for r2c
|
||||
howmany = ncopy;
|
||||
|
||||
ptrdiff_t local_n0_pan_check;
|
||||
ptrdiff_t local_0_start_pan_check;
|
||||
size_to_alloc_pan = howmany * local_n0_pan * N0_pan_grid * ( N0_pan_grid+2);
|
||||
|
||||
ptrdiff_t size_to_alloc_pan_check = FFTW_MPI_LOCAL_SIZE_MANY(rank, ndimens_alloc_pan, howmany,
|
||||
FFTW_MPI_DEFAULT_BLOCK,MPI_COMM_WORLD,
|
||||
&local_n0_pan_check,&local_0_start_pan_check);
|
||||
if (size_to_alloc_pan!=size_to_alloc_pan_check){
|
||||
printf("size_to_alloc_pan!=size_to_alloc_pan_check\n");
|
||||
return(100034);
|
||||
|
||||
if (local_0_start_fourier_return%fdim==0){
|
||||
abs_fourier_x_start = local_0_start_fourier_return;
|
||||
}else{
|
||||
abs_fourier_x_start = local_0_start_fourier_return + fdim - (local_0_start_fourier_return%fdim);
|
||||
};
|
||||
|
||||
local_n0_fourier_xoffset = local_0_start_fourier_return%fdim;
|
||||
|
||||
printf("size_to_alloc_fourier = %td\n",size_to_alloc_fourier);
|
||||
printf("size_to_alloc_pan = %td\n",size_to_alloc_pan);
|
||||
printf("local_n0_fourier %td local_0_start_fourier %td\n",local_n0_fourier,local_0_start_fourier);
|
||||
printf("local_n0_pan %td local_0_start_pan %td\n",local_n0_pan,local_0_start_pan);
|
||||
printf("local_n0_fourier_xoffset %td\n",local_n0_fourier_xoffset);
|
||||
if ((local_0_start_fourier_return + local_n0_fourier_return - 1)%fdim==0){
|
||||
abs_fourier_x_end = local_0_start_fourier_return + local_n0_fourier_return - 1;
|
||||
}else{
|
||||
abs_fourier_x_end = (local_0_start_fourier_return + local_n0_fourier_return - 1)-
|
||||
((local_0_start_fourier_return + local_n0_fourier_return - 1)%fdim);
|
||||
};
|
||||
|
||||
if ((abs_fourier_x_end-abs_fourier_x_start)%fdim!=0) return (100036);
|
||||
|
||||
local_0_start_pan = abs_fourier_x_start/fdim;
|
||||
local_n0_pan = 1 + (abs_fourier_x_end-abs_fourier_x_start)/fdim;
|
||||
|
||||
local_fourier_x_start = abs_fourier_x_start - local_0_start_fourier_return;
|
||||
local_fourier_x_end = abs_fourier_x_end - local_0_start_fourier_return;
|
||||
|
||||
const ptrdiff_t ndimens_alloc_pan[] = {N0_pan_grid, N0_pan_grid, N0_pan_grid+2};
|
||||
howmany = ncopy;
|
||||
|
||||
size_to_alloc_pan = howmany * local_n0_pan * N0_pan_grid * ( N0_pan_grid+2);
|
||||
|
||||
|
||||
//printf("size_to_alloc_fourier = %td\n",size_to_alloc_fourier);
|
||||
//printf("size_to_alloc_pan = %td\n",size_to_alloc_pan);
|
||||
|
||||
|
||||
};
|
||||
|
@ -225,7 +229,7 @@ if (error = PANPHASIA_compute_coefficients_(&xorigin,&yorigin,&zorigin,&xextent,
|
|||
|
||||
{
|
||||
|
||||
if (N0_pan_grid<128){
|
||||
if (N0_pan_grid<-1){
|
||||
|
||||
int rank;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
|
||||
|
@ -242,7 +246,7 @@ int rank;
|
|||
for (int iy=0; iy < npan_dim; iy++)
|
||||
for (int iz=0; iz < npan_dim; iz++){
|
||||
int index = ix*N0_pan_grid*(N0_pan_grid+2) + iy*(N0_pan_grid+2) + iz;
|
||||
fprintf(fp,"%6ld%6d%6d %14.8lf %d\n",ix+local_0_start_pan,iy,iz,
|
||||
fprintf(fp,"%6llu%6d%6d %14.8lf %d\n",ix+local_0_start_pan,iy,iz,
|
||||
ptr_panphasia_coefficients[index],index);
|
||||
};
|
||||
|
||||
|
@ -256,7 +260,7 @@ int rank;
|
|||
////////////// Set up FTTW plan //////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
||||
printf("Making the plan ... \n");
|
||||
// printf("Making the plan ... \n");
|
||||
|
||||
//////////////////// Make plan for ncopy interleaved FTs ///////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
@ -280,13 +284,14 @@ int rank;
|
|||
|
||||
};
|
||||
|
||||
printf("Plan completed ... \n");
|
||||
//printf("Plan completed ... \n");
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
//----------------------------------------------------------------------------------
|
||||
|
||||
memset(return_field, 0, local_n0_fourier_return*N0_fourier_grid *(N0_fourier_grid +2)/2 * sizeof(FFTW_COMPLEX));
|
||||
|
||||
memset(return_field, 0, local_n0_fourier_return*N0_fourier_grid *(N0_fourier_grid +2) * sizeof(FFTW_REAL));
|
||||
|
||||
|
||||
for (int iter = 0; iter < nsubdivide; iter++){
|
||||
|
@ -304,17 +309,17 @@ if (!SHARED_FOUR_PAN_SPACE){
|
|||
size_t index_p,index_f;
|
||||
int m;
|
||||
|
||||
for(int ix_p=0, ix_f = local_n0_fourier_xoffset; ix_p<local_n0_pan; ix_p++,ix_f+=fdim)
|
||||
for (int ix_f = local_fourier_x_start, ix_p=0; ix_f <=local_fourier_x_end; ix_f+=fdim,ix_p++)
|
||||
#ifdef USE_OPENMP
|
||||
#pragma omp parallel for collapse(2) private (index_p,index_f,m)
|
||||
#endif
|
||||
for(int iy_p=0, iy_f=0 ; iy_p<npan_dim;iy_p++,iy_f+=fdim)
|
||||
for(int iz_p=0, iz_f=0; iz_p<npan_dim;iz_p++,iz_f+=fdim){
|
||||
|
||||
for(int iy_f=0, iy_p=0 ; iy_f<nfft_dim; iy_f+=fdim, iy_p++)
|
||||
for(int iz_f=0, iz_p=0; iz_f<nfft_dim; iz_f+=fdim, iz_p++){
|
||||
index_p = ix_p*N0_pan_grid*(N0_pan_grid + 2) + iy_p*(N0_pan_grid + 2) + iz_p;
|
||||
index_f = ix_f*N0_fourier_grid*(N0_fourier_grid + 2) + iy_f*(N0_fourier_grid + 2) + iz_f;
|
||||
|
||||
for (m=0; m<nchunk; m++){
|
||||
|
||||
ptr_real_fourier_grid[nchunk*index_f + m] =
|
||||
ptr_panphasia_coefficients[ncopy*index_p + moffset + m];
|
||||
|
||||
|
@ -366,7 +371,7 @@ int m;
|
|||
|
||||
|
||||
|
||||
printf("Reached here 10!\n");
|
||||
// printf("Reached here 10!\n");
|
||||
|
||||
//Add phase shift and normalise field
|
||||
{
|
||||
|
@ -394,9 +399,10 @@ int m;
|
|||
if ( (kx==nfft_dim/2)||(ky==nfft_dim/2)||(kz==nfft_dim/2)){
|
||||
// Set Nyquist modes to zero - not used by IC_Gen anyway.
|
||||
phase_shift_and_scale = 0.0; //1.0/pow((double)nfft_dim,1.5); // No phase shift
|
||||
ptr_mode_weightings[index1] = 0.0; // Set squared weight to zero
|
||||
}else{
|
||||
phase_shift_and_scale = sqrt( (double)(fdim*fdim*fdim))*
|
||||
cexp( (double)fdim * (-I)*pi*(double)(kx + ky + kz)/
|
||||
cexp( (double)fdim * (-I)*pi*(double)(kx + ky + kz)/sqrt(ptr_mode_weightings[index1])/
|
||||
(double)nfft_dim)/pow((double)nfft_dim,1.5);
|
||||
};
|
||||
|
||||
|
@ -406,12 +412,12 @@ int m;
|
|||
|
||||
};
|
||||
|
||||
printf("Minimum weight %lf\n",sqrt(min_weight));
|
||||
// printf("Minimum weight %lf\n",sqrt(min_weight));
|
||||
|
||||
|
||||
};
|
||||
|
||||
printf("Reached here 11!\n");
|
||||
// printf("Reached here 11!\n");
|
||||
|
||||
|
||||
|
||||
|
@ -435,7 +441,7 @@ int m;
|
|||
ky = (iy > nfft_dim/2) ? iy-nfft_dim : iy;
|
||||
kz = (iz > nfft_dim/2) ? iz-nfft_dim : iz;
|
||||
ksquared = kx*kx + ky*ky + kz*kz;
|
||||
if (ksquared<=descriptor_kk_limit){
|
||||
if ((ksquared<=descriptor_kk_limit)&&(ksquared!=0)){
|
||||
index1 = ix*N0_fourier_grid*(N0_fourier_grid/2+1) + iy*(N0_fourier_grid/2+1) + iz;
|
||||
weight = cabs(return_field[index1]);
|
||||
return_field[index1] /= weight;
|
||||
|
@ -444,7 +450,7 @@ int m;
|
|||
|
||||
};
|
||||
|
||||
printf("Reached here 12!\n");
|
||||
//printf("Reached here 12!\n");
|
||||
|
||||
|
||||
int rank;
|
||||
|
@ -455,7 +461,7 @@ int rank;
|
|||
|
||||
|
||||
|
||||
if (nfft_dim <128){
|
||||
if (nfft_dim <-1){
|
||||
|
||||
|
||||
|
||||
|
@ -468,7 +474,7 @@ if (nfft_dim <128){
|
|||
for (int iz=0; iz <= nfft_dim/2; iz++){
|
||||
|
||||
int index = ix*N0_fourier_grid*(N0_fourier_grid/2+1) + iy*(N0_fourier_grid/2+1) + iz;
|
||||
fprintf(fp,"%6ld%6d%6d %14.8lf %14.8lf %14.8lf \n",ix+local_0_start_fourier_return,iy,iz,
|
||||
fprintf(fp,"%6llu%6d%6d %14.8lf %14.8lf %14.8lf \n",ix+local_0_start_fourier_return,iy,iz,
|
||||
creal(return_field[index]),cimag(return_field[index]),sqrt(ptr_mode_weightings[index]));
|
||||
// ptr_mode_weightings[index]);
|
||||
};
|
||||
|
@ -485,7 +491,7 @@ if (nfft_dim <128){
|
|||
for (int iz=0; iz <= nfft_dim/2; iz++){
|
||||
if (ix+iy+iz+local_0_start_fourier_return<100){
|
||||
int index = ix*N0_fourier_grid*(N0_fourier_grid/2+1) + iy*(N0_fourier_grid/2+1) + iz;
|
||||
fprintf(fp,"%6ld%6d%6d %14.8lf %14.8lf %14.8lf \n",ix+local_0_start_fourier_return,iy,iz,
|
||||
fprintf(fp,"%6llu%6d%6d %14.8lf %14.8lf %14.8lf \n",ix+local_0_start_fourier_return,iy,iz,
|
||||
creal(return_field[index]),cimag(return_field[index]),sqrt(ptr_mode_weightings[index]));
|
||||
// ptr_mode_weightings[index]);
|
||||
};
|
||||
|
@ -496,9 +502,46 @@ if (nfft_dim <128){
|
|||
};
|
||||
|
||||
|
||||
// Transpose output field
|
||||
|
||||
{
|
||||
|
||||
FFTW_PLAN transpose_output_plan;
|
||||
unsigned flags = FFTW_ESTIMATE;
|
||||
|
||||
void *ptr_inter = return_field;
|
||||
|
||||
FFTW_REAL *ptr_return_as_real_field;
|
||||
ptr_return_as_real_field = ptr_inter;
|
||||
|
||||
|
||||
int rank = 2;
|
||||
|
||||
ptrdiff_t size_to_transpose;
|
||||
ptrdiff_t howmany = N0_fourier_grid + 2;
|
||||
ptrdiff_t local_n0, local_0_start;
|
||||
ptrdiff_t local_n1, local_1_start;
|
||||
|
||||
const ptrdiff_t ndimens[] = {N0_fourier_grid, N0_fourier_grid};
|
||||
|
||||
|
||||
size_to_transpose = FFTW_MPI_LOCAL_SIZE_MANY_TRANSPOSED(rank, ndimens, howmany,
|
||||
FFTW_MPI_DEFAULT_BLOCK, FFTW_MPI_DEFAULT_BLOCK,
|
||||
MPI_COMM_WORLD,&local_n0, &local_0_start,&local_n1, &local_1_start);
|
||||
|
||||
// printf("size_to_transpose = %td\n",size_to_transpose);
|
||||
|
||||
transpose_output_plan = FFTW_MPI_PLAN_MANY_TRANSPOSE(N0_fourier_grid, N0_fourier_grid,
|
||||
howmany, FFTW_MPI_DEFAULT_BLOCK,FFTW_MPI_DEFAULT_BLOCK,
|
||||
ptr_return_as_real_field, ptr_return_as_real_field,
|
||||
MPI_COMM_WORLD, flags);
|
||||
|
||||
//printf("Transpose plan completed.\n");
|
||||
|
||||
FFTW_MPI_EXECUTE_R2R(transpose_output_plan,ptr_return_as_real_field,ptr_return_as_real_field);
|
||||
|
||||
//printf("Transpose completed.\n");
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
@ -516,7 +559,7 @@ if (!SHARED_FOUR_PAN_SPACE) FFTW_FREE(fourier_grids);
|
|||
|
||||
FFTW_DESTROY_PLAN(output_coeff_forward_plan);
|
||||
|
||||
printf("Reached end of PANPHASIA_compute_kspace_field_\n");
|
||||
//printf("Reached end of PANPHASIA_compute_kspace_field_\n");
|
||||
return(0);
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue