#ifdef HAVE_PANPHASIA #include #include #include #include "random.hh" const int maxdim = 60, maxlev = 50, maxpow = 3*maxdim; typedef int rand_offset_[5]; typedef struct { int state[133]; //Nstore = Nstate (=5) + Nbatch (=128) int need_fill; int pos; }rand_state_; /* pan_state_ struct -- corresponds to respective fortran module in panphasia_routines.f * data structure that contains all panphasia state variables * it needs to get passed between the fortran routines to enable * thread-safe execution. */ typedef struct { int base_state[5], base_lev_start[5][maxdim+1]; rand_offset_ poweroffset[maxpow+1], superjump; rand_state_ current_state[maxpow+2]; int layer_min,layer_max,indep_field; long long xorigin_store[2][2][2],yorigin_store[2][2][2],zorigin_store[2][2][2]; int lev_common, layer_min_store,layer_max_store; long long ix_abs_store,iy_abs_store,iz_abs_store, ix_per_store,iy_per_store,iz_per_store, ix_rel_store,iy_rel_store,iz_rel_store; double exp_coeffs[8][8][maxdim+2]; long long xcursor[maxdim+1],ycursor[maxdim+1],zcursor[maxdim+1]; int ixshift[2][2][2],iyshift[2][2][2],izshift[2][2][2]; double cell_data[9][8]; int ixh_last,iyh_last,izh_last; int init; int init_cell_props; int init_lecuyer_state; long long p_xcursor[62],p_ycursor[62],p_zcursor[62]; }pan_state_; extern "C"{ void start_panphasia_( pan_state_ *lstate, const char* descriptor, int *ngrid, int* bverbose ); void parse_descriptor_( const char* descriptor, int16_t* l, int32_t* ix, int32_t* iy, int32_t* iz, int16_t* side1, int16_t* side2, int16_t* side3, int32_t* check_int, char* name ); void panphasia_cell_properties_( pan_state_ *lstate, int* ixcell, int* iycell, int* izcell, double *cell_prop ); void adv_panphasia_cell_properties_( pan_state_ *lstate, int* ixcell, int* iycell, int* izcell, int* layer_min, int* layer_max, int* indep_field, double *cell_prop ); void set_phases_and_rel_origin_( pan_state_ *lstate, const char *descriptor, int *lev, long long *ix_rel, long long *iy_rel, long long *iz_rel, int *VERBOSE ); /*void set_local_box_( pan_state_ *lstate, int lev, int8_t ix_abs, int8_t iy_abs, int8_t iz_abs, int8_t ix_per, int8_t iy_per, int8_t iz_per, int8_t ix_rel, int8_t iy_rel, int8_t iz_rel, int wn_level_base, int8_t check_rand, char *phase_name, int MYID);*/ /*extern struct { int layer_min, layer_max, hoswitch; }oct_range_; */ } class RNG_panphasia : public RNG_plugin{ private: void forward_transform_field( real_t *field, int n0, int n1, int n2 ); void forward_transform_field( real_t *field, int n ){ forward_transform_field( field, n, n, n ); } void backward_transform_field( real_t *field, int n0, int n1, int n2 ); void backward_transform_field( real_t *field, int n ){ backward_transform_field( field, n, n, n ); } protected: std::string descriptor_string_; int levelmin_, levelmin_poisson_, levelmax_, ngrid_; bool incongruent_fields_; double phase_adjustment_; double translation_phase_; pan_state_ *lstate; double grid_rescale_fac_; int ix_abs_[3], ix_per_[3], ix_rel_[3], level_p_, lextra_; struct panphasia_descriptor{ int16_t wn_level_base; int32_t i_xorigin_base, i_yorigin_base, i_zorigin_base; int16_t i_base,i_base_y,i_base_z; int32_t check_rand; std::string name; explicit panphasia_descriptor( std::string dstring ) { char tmp[100]; memset(tmp,' ',100); parse_descriptor_( dstring.c_str(), &wn_level_base, &i_xorigin_base, &i_yorigin_base, &i_zorigin_base, &i_base, &i_base_y, &i_base_z, &check_rand, tmp ); for( int i=0; i<100; i++ ) if(tmp[i]==' ') {tmp[i] = '\0'; break; } name = tmp; name.erase(std::remove(name.begin(), name.end(), ' '), name.end() ); } }; void clear_panphasia_thread_states( void ) { for( int i=0; i("random","descriptor"); levelmin_ = prefh_->levelmin(); levelmin_poisson_ = cf.getValue("setup","levelmin"); levelmax_ = prefh_->levelmax(); lstate = new pan_state_[ omp_get_max_threads() ]; clear_panphasia_thread_states(); LOGINFO("PANPHASIA: running with %d threads",omp_get_max_threads()); // parse the descriptor for its properties panphasia_descriptor d( descriptor_string_ ); // if ngrid is not a multiple of i_base, then we need to enlarge and then sample down ngrid_ = 1< %d) * 2**ref compatible with PANPHASIA\n"\ " will Fourier interpolate after.",1<& R ); bool is_multiscale() const { return true; } }; void RNG_panphasia::forward_transform_field( real_t *field, int nx, int ny, int nz ) { fftw_real *rfield = reinterpret_cast(field); fftw_complex *cfield = reinterpret_cast(field); #ifdef FFTW3 #ifdef SINGLE_PRECISION fftwf_plan pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfield, cfield, FFTW_ESTIMATE); #else fftw_plan pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfield, cfield, FFTW_ESTIMATE); #endif #else rfftwnd_plan pf = rfftw3d_create_plan( nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE|FFTW_IN_PLACE); #endif #ifdef FFTW3 #ifdef SINGLE_PRECISION fftwf_execute( pf ); #else fftw_execute( pf ); #endif #else #ifndef SINGLETHREAD_FFTW rfftwnd_threads_one_real_to_complex( omp_get_max_threads(), pf, rfield, NULL ); #else rfftwnd_one_real_to_complex( pf, rfield, NULL ); #endif #endif } void RNG_panphasia::backward_transform_field( real_t *field, int nx, int ny, int nz ) { fftw_real *rfield = reinterpret_cast(field); fftw_complex *cfield = reinterpret_cast(field); #ifdef FFTW3 #ifdef SINGLE_PRECISION fftwf_plan ipf = fftwf_plan_dft_c2r_3d(nx, ny, nz, cfield, rfield, FFTW_ESTIMATE); #else fftw_plan ipf = fftw_plan_dft_c2r_3d(nx, ny, nz, cfield, rfield, FFTW_ESTIMATE); #endif #else rfftwnd_plan ipf = rfftw3d_create_plan( nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE|FFTW_IN_PLACE); #endif #ifdef FFTW3 #ifdef SINGLE_PRECISION fftwf_execute( ipf ); #else fftw_execute( ipf ); #endif #else #ifndef SINGLETHREAD_FFTW rfftwnd_threads_one_complex_to_real( omp_get_max_threads(), ipf, cfield, NULL ); #else rfftwnd_one_complex_to_real( ipf, cfield, NULL ); #endif #endif } void RNG_panphasia::fill_grid( int level, DensityGrid& R ) { fftw_real *pr0, *pr1, *pr2, *pr3, *pr4; fftw_complex *pc0, *pc1, *pc2, *pc3, *pc4; // determine resolution and offset so that we can do proper resampling int ileft[3], iright[3], nx[3], nxremap[3], ishift[3]; double ileft_shift[3]; for( int k=0; k<3; ++k ) { // calculate coordinate of left corner of domain if( level == levelmin_ ) ileft[k] = 0; else ileft[k] = prefh_->offset_abs(level,k) - prefh_->size(level,k)/2; // global coordinate system shift ileft[k] -= (1<<(level-levelmin_poisson_)) * prefh_->get_shift(k); ishift[k] = (1<<(level-levelmin_poisson_)) * prefh_->get_shift(k); iright[k] = ileft[k] + R.size(k); nx[k] = iright[k] - ileft[k]; assert(nx[k]%2==0); // map to PANPHASIA descriptor's base resolution ileft_shift[k] = ileft[k] * grid_rescale_fac_; nxremap[k] = nx[k] * grid_rescale_fac_; //fprintf(stderr,"dim=%c : offset = %d, ileft = %d, ileftremap = %d, iright = %d, nx = %d, nxremap = %d, dileft = %d\n", //'x'+k,R.offset(k),ileft[k],ileftremap[k],iright[k],nx[k],nxremap[k],dileft[k]); //fprintf(stderr,"dim=%c : offset = %d, ileft = %d, ishift = %d, ileftremap = %d, iright = %d, nx = %d, nxremap = %d\n", // 'x'+k,R.offset(k),ileft[k],ishift[k],ileftremap[k],iright[k],nx[k],nxremap[k]); } int ng_level = ngrid_ * (1<<(level - levelmin_)); // full resolution of current level size_t ngp = nxremap[0] * nxremap[1] * (nxremap[2]+2); pr0 = new fftw_real[ngp]; pr1 = new fftw_real[ngp]; pr2 = new fftw_real[ngp]; pr3 = new fftw_real[ngp]; pr4 = new fftw_real[ngp]; pc0 = reinterpret_cast(pr0); pc1 = reinterpret_cast(pr1); pc2 = reinterpret_cast(pr2); pc3 = reinterpret_cast(pr3); pc4 = reinterpret_cast(pr4); LOGINFO("calculating PANPHASIA random numbers for level %d...",level); clear_panphasia_thread_states(); double t1 = omp_get_wtime(); #pragma omp parallel { const int mythread = omp_get_thread_num(); int verbosity = (mythread==0); char descriptor[100]; memset(descriptor,0,100); memcpy(descriptor,descriptor_string_.c_str(),descriptor_string_.size()); if( level == levelmin_ ){ start_panphasia_( &lstate[mythread], descriptor, &ng_level, &verbosity ); } { int level_p, lextra; long long ix_rel[3]; panphasia_descriptor d( descriptor_string_ ); lextra = (log10((double)ng_level/(double)d.i_base)+0.001)/log10(2.0); level_p = d.wn_level_base + lextra; int ratio = 1<layer_min, &ps->layer_max, &ps->indep_field, cell_prop ); pr0[idx] = cell_prop[0]; pr1[idx] = cell_prop[4]; pr2[idx] = cell_prop[2]; pr3[idx] = cell_prop[1]; pr4[idx] = cell_prop[8]; } } } LOGUSER("time for calculating PANPHASIA for level %d : %f s", level, omp_get_wtime() - t1); ///////////////////////////////////////////////////////////////////////// // transform and convolve with Legendres forward_transform_field( pr0, nxremap[0], nxremap[1], nxremap[2] ); forward_transform_field( pr1, nxremap[0], nxremap[1], nxremap[2] ); forward_transform_field( pr2, nxremap[0], nxremap[1], nxremap[2] ); forward_transform_field( pr3, nxremap[0], nxremap[1], nxremap[2] ); forward_transform_field( pr4, nxremap[0], nxremap[1], nxremap[2] ); #pragma omp parallel for for( int i=0; i nxremap[1]/2 ) ii -= nxremap[1]; if( j > nxremap[2]/2 ) jj -= nxremap[2]; //int kkmax = std::max(abs(ii),std::max(abs(jj),abs(kk))); int ktotal = ii+jj+kk; if( ii != 0 ){ arg = M_PI*(double)ii/(double)nxremap[0]; fx = sin(arg)/arg; gx = complex(0.0,(arg*cos(arg)-sin(arg))/(arg*arg)); }else{ fx = 1.0; gx = 0.0; } if( jj != 0 ){ arg = M_PI*(double)jj/(double)nxremap[1]; fy = sin(arg)/arg; gy = complex(0.0,(arg*cos(arg)-sin(arg))/(arg*arg)); }else{ fy = 1.0; gy = 0.0; } if( kk != 0 ){ arg = M_PI*(double)kk/(double)nxremap[2]; fz = sin(arg)/arg; gz = complex(0.0,(arg*cos(arg)-sin(arg))/(arg*arg)); }else{ fz = 1.0; gz = 0.0; } //complex temp_comp = (fx+sqrt(3.0)*gx)*(fy+sqrt(3.0)*gy)*(fz+sqrt(3.0)*gz); //double magnitude = sqrt(1.0-std::abs(temp_comp)); double fxyz= (fx*fy*fz)*(fx*fy*fz); double gxy = abs((fx*fy*gz)*conj(fx*fy*gz)); double gyz = abs((gx*fy*fz)*conj(gx*fy*fz)); double gxz = abs((fx*gy*fz)*conj(fx*gy*fz)); double magnitude=sqrt(1.0 - fxyz - 3.0*(gxy + gyz + gxz)); if( abs(ii) != nxremap[0]/2 && abs(jj) != nxremap[1]/2 && abs(kk) != nxremap[2]/2 ){ //kkmax != nxremap[2]/2 ){ complex x, y0(RE(pc0[idx]),IM(pc0[idx])), y1(RE(pc1[idx]),IM(pc1[idx])), y2(RE(pc2[idx]),IM(pc2[idx])), y3(RE(pc3[idx]),IM(pc3[idx])), y4(RE(pc4[idx]),IM(pc4[idx])); //OH:2015/05/20: should this be y1*gx*fy*fz or y3*gx*fy*fz? (and same for the last one) x = y0*fx*fy*fz + sqrt(3.0)*(y1*gx*fy*fz + y2*fx*gy*fz + y3*fx*fy*gz) + y4*magnitude; //ARJ The value of phase_adjustment_ is zero when incongruent_fields_ //ARJ is false. It is needed to ensure the phases of each mode //ARJ are preserved with respect to a fixed spatial location //ARJ when the modes are copied from a larger non-power-of-two Panphasia grid //ARJ to a smaller power-of-two grid needed by MUSIC. translation_phase_ = 2.0*M_PI * (ileft_shift[0]*(double)ii + ileft_shift[1]*(double)jj +ileft_shift[2]*(double)kk)/(double)ngrid_; x = x * exp(complex(0.0,translation_phase_ + phase_adjustment_*(double)ktotal)); RE(pc0[idx]) = x.real(); IM(pc0[idx]) = x.imag(); } } ///////////////////////////////////////////////////////////////////////// // do we need to cut off the small scales? // int nn = 1< nxremap[0]/2 ) ii -= nxremap[0]; if( j > nxremap[1]/2 ) jj -= nxremap[1]; int ia(abs(ii)),ja(abs(jj)),ka(abs(kk)); if( ia <= nx[0]/2 && ja <= nx[1]/2 && ka <= nx[2]/2 ) { size_t idx = ((size_t)(i) * nxremap[1] + (size_t)(j)) * (nxremap[2]/2+1) + (size_t)(k); ii = ii < 0 ? ii+nx[0] : ii; jj = jj < 0 ? jj+nx[1] : jj; size_t idx2 =((size_t)ii * nx[1] + (size_t)jj) * ((size_t)nx[2]/2+1) + (size_t)kk; RE(pc1[idx2]) = RE(pc0[idx]); if ( ia == nx[0]/2 || ja == nx[1]/2 || ka == nx[2]/2 ){ IM(pc1[idx2]) = 0.0; // Nquist modes are real }else{ IM(pc1[idx2]) = IM(pc0[idx]); } } } //std::swap(pc0,pc1); #pragma omp parallel for for( int i=0; i %d,%d,%d",nxremap[0],nxremap[1],nxremap[2],nx[0],nx[1],nx[2]); // n = 1< = %g | var(p) = %g", level, sum,sum2); } namespace{ RNG_plugin_creator_concrete< RNG_panphasia > creator("PANPHASIA"); } #endif