From 040324f3466045cb381b856740381052edd5cdd6 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sat, 11 Feb 2023 18:49:50 -0800 Subject: [PATCH 1/5] WIP added panphasia files. does not work yet --- CMakeLists.txt | 20 + ext/panphasia/generic_lecuyer.f90 | 683 ++++++ ext/panphasia/panphasia_routines.f | 3334 ++++++++++++++++++++++++++++ src/fft_operators.hh | 204 -- src/plugins/random_panphasia.cc | 821 +++++++ 5 files changed, 4858 insertions(+), 204 deletions(-) create mode 100644 ext/panphasia/generic_lecuyer.f90 create mode 100644 ext/panphasia/panphasia_routines.f delete mode 100644 src/fft_operators.hh create mode 100644 src/plugins/random_panphasia.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index e83719d..51ed9bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,22 @@ file( GLOB PLUGINS ${PROJECT_SOURCE_DIR}/src/plugins/*.cc ) +# PANPHASIA +option(ENABLE_PANPHASIA "Enable PANPHASIA random number generator" ON) +if(ENABLE_PANPHASIA) +enable_language(Fortran) +if ("${CMAKE_Fortran_COMPILER_ID}" MATCHES "Intel") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -132 -implicit-none") +elseif("${CMAKE_Fortran_COMPILER_ID}" MATCHES "GNU") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -ffixed-line-length-132 -fimplicit-none") +endif() +list (APPEND SOURCES + ${PROJECT_SOURCE_DIR}/ext/panphasia/panphasia_routines.f + ${PROJECT_SOURCE_DIR}/ext/panphasia/generic_lecuyer.f90 +) +# target_include_directories(${PRGNAME} PRIVATE ${PROJECT_SOURCE_DIR}/external/panphasia_ho) +endif(ENABLE_PANPHASIA) + add_executable(${PRGNAME} ${SOURCES} ${PLUGINS}) set_target_properties(${PRGNAME} PROPERTIES CXX_STANDARD 11) @@ -116,6 +132,10 @@ if(TIRPC_FOUND) target_compile_options(${PRGNAME} PRIVATE "-DHAVE_TIRPC") endif(TIRPC_FOUND) +if(ENABLE_PANPHASIA) + target_compile_options(${PRGNAME} PRIVATE "-DHAVE_PANPHASIA") +endif(ENABLE_PANPHASIA) + target_link_libraries(${PRGNAME} ${FFTW3_LIBRARIES}) target_include_directories(${PRGNAME} PRIVATE ${FFTW3_INCLUDE_DIRS}) diff --git a/ext/panphasia/generic_lecuyer.f90 b/ext/panphasia/generic_lecuyer.f90 new file mode 100644 index 0000000..13f53ed --- /dev/null +++ b/ext/panphasia/generic_lecuyer.f90 @@ -0,0 +1,683 @@ +!=====================================================================================c +! +! The code below was written by: Stephen Booth +! Edinburgh Parallel Computing Centre +! The University of Edinburgh +! JCMB +! Mayfield Road +! Edinburgh EH9 3JZ +! United Kingdom +! +! This file is part of the software made public in +! Jenkins and Booth 2013 - arXiv:1306.XXXX +! +! The software computes the Panphasia Gaussian white noise field +! realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX +! +! +! +! This software is free, subject to a agreeing licence conditions: +! +! +! (i) you will publish the phase descriptors and reference Jenkins (13) +! for any new simulations that use Panphasia phases. You will pass on this +! condition to others for any software or data you make available publically +! or privately that makes use of Panphasia. +! +! (ii) that you will ensure any publications using results derived from Panphasia +! will be submitted as a final version to arXiv prior to or coincident with +! publication in a journal. +! +! +! (iii) that you report any bugs in this software as soon as confirmed to +! A.R.Jenkins@durham.ac.uk +! +! (iv) that you understand that this software comes with no warranty and that is +! your responsibility to ensure that it is suitable for the purpose that +! you intend. +! +!=====================================================================================c +!{{{Rand_base (define kind types) +MODULE Rand_base +! This module just declares the base types +! we may have to edit this to match to the target machine +! we really need a power of 2 selected int kind in fortran-95 we could +! do this with a PURE function I think. + +! +! 10 decimal digits will hold 2^31 +! + + INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(9) +! INTEGER, PARAMETER :: Sint = SELECTED_INT_KIND(10) +! INTEGER, PARAMETER :: Sint = 4 + +! +! 18-19 decimal digits will hold 2^63 +! but all 19 digit numbers require 2^65 :-( +! + + INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(17) +! INTEGER, PARAMETER :: Dint = SELECTED_INT_KIND(18) +! INTEGER, PARAMETER :: Dint = 8 + +! type for index counters must hold Nstore + INTEGER, PARAMETER :: Ctype = SELECTED_INT_KIND(3) +END MODULE Rand_base +!}}} + +!{{{Rand_int (random integers mod 2^31-1) + +MODULE Rand_int + USE Rand_base + IMPLICIT NONE +! The general approach of this module is two have +! two types Sint and Dint +! +! Sint should have at least 31 bits +! dint shouldhave at least 63 + +!{{{constants + + INTEGER(KIND=Ctype), PARAMETER :: Nstate=5_Ctype + INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nbatch=128_Ctype + INTEGER(KIND=Ctype), PRIVATE, PARAMETER :: Nstore=Nstate+Nbatch + + INTEGER(KIND=Sint), PRIVATE, PARAMETER :: M = 2147483647_Sint + INTEGER(KIND=Dint), PRIVATE, PARAMETER :: Mask = 2147483647_Dint + INTEGER(KIND=Dint), PRIVATE, PARAMETER :: A1 = 107374182_Dint + INTEGER(KIND=Dint), PRIVATE, PARAMETER :: A5 = 104480_Dint + LOGICAL, PARAMETER :: Can_step_int=.TRUE. + LOGICAL, PARAMETER :: Can_reverse_int=.TRUE. + +!}}} + +!{{{Types +! +! This type holds the state of the generator +! +!{{{TYPE RAND_state + +TYPE RAND_state + PRIVATE + INTEGER(KIND=Sint) :: state(Nstore) +! do we need to re-fill state table this is reset when we initialise state. + LOGICAL :: need_fill +! position of the next state variable to output + INTEGER(KIND=Ctype) :: pos +END TYPE RAND_state + +!}}} + +! +! This type defines the offset type used for stepping. +! +!{{{TYPE RAND_offset + +TYPE RAND_offset + PRIVATE + INTEGER(KIND=Sint) :: poly(Nstate) +END TYPE RAND_offset + +!}}} + +!}}} + +!{{{interface and overloads +! +! Allow automatic conversion between integers and offsets +! +INTERFACE ASSIGNMENT(=) + MODULE PROCEDURE Rand_set_offset + MODULE PROCEDURE Rand_load + MODULE PROCEDURE Rand_save + MODULE PROCEDURE Rand_seed +END INTERFACE +INTERFACE OPERATOR(+) + MODULE PROCEDURE Rand_add_offset +END INTERFACE +INTERFACE OPERATOR(*) + MODULE PROCEDURE Rand_mul_offset +END INTERFACE + +! +! overload + as the boost/stepping operator +! +INTERFACE OPERATOR(+) + MODULE PROCEDURE Rand_step + MODULE PROCEDURE Rand_boost +END INTERFACE +!}}} + + +!{{{PUBLIC/PRIVATE + PRIVATE reduce,mod_saxpy,mod_sdot,p_saxpy,p_sdot,poly_mult + PRIVATE poly_square, poly_power + PRIVATE fill_state, repack_state + + PUBLIC Rand_sint, Rand_sint_vec + + PUBLIC Rand_save, Rand_load + PUBLIC Rand_set_offset, Rand_add_offset, Rand_mul_offset + PUBLIC Rand_step, Rand_boost, Rand_seed +!}}} + +CONTAINS + !{{{Internals + !{{{RECURSIVE FUNCTION reduce(A) + RECURSIVE FUNCTION reduce(A) + ! + ! Take A Dint and reduce to Sint MOD M + ! + INTEGER(KIND=Dint), INTENT(IN) :: A + INTEGER(KIND=Sint) reduce + INTEGER(KIND=Dint) tmp + + tmp = A + DO WHILE( ISHFT(tmp, -31) .GT. 0 ) + tmp = IAND(tmp,Mask) + ISHFT(tmp, -31) + END DO + IF( tmp .GE. M ) THEN + reduce = tmp - M + ELSE + reduce = tmp + END IF + END FUNCTION reduce + !}}} + !{{{RECURSIVE SUBROUTINE fill_state(x) + RECURSIVE SUBROUTINE fill_state(x) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Ctype) i + INTRINSIC IAND, ISHFT + INTEGER(KIND=Dint) tmp + DO i=Nstate+1,Nstore + tmp = (x%state(i-5) * A5) + (x%state(i-1)*A1) + ! + ! now reduce down to mod M efficiently + ! really hope the compiler in-lines this + ! + ! x%state(i) = reduce(tmp) + DO WHILE( ISHFT(tmp, -31) .GT. 0 ) + tmp = IAND(tmp,Mask) + ISHFT(tmp, -31) + END DO + IF( tmp .GE. M ) THEN + x%state(i) = tmp - M + ELSE + x%state(i) = tmp + END IF + + END DO + x%need_fill = .FALSE. + END SUBROUTINE fill_state + !}}} + !{{{RECURSIVE SUBROUTINE repack_state(x) + RECURSIVE SUBROUTINE repack_state(x) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Ctype) i + DO i=1,Nstate + x%state(i) = x%state(i+x%pos-(Nstate+1)) + END DO + x%pos = Nstate + 1 + x%need_fill = .TRUE. + END SUBROUTINE repack_state + !}}} + !{{{RECURSIVE SUBROUTINE mod_saxpy(y,a,x) + RECURSIVE SUBROUTINE mod_saxpy(y,a,x) + INTEGER(KIND=Ctype) i + INTEGER(KIND=Sint) y(Nstate) + INTEGER(KIND=Sint) a + INTEGER(KIND=Sint) x(Nstate) + INTEGER(KIND=Dint) tx,ty,ta + + IF( a .EQ. 0_Sint ) RETURN + + ! We use KIND=Dint temporaries here to ensure + ! that we don't overflow in the expression + + ta = a + DO i=1,Nstate + ty=y(i) + tx=x(i) + y(i) = reduce(ty + ta * tx) + END DO + + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE mod_sdot(res,x,y) + RECURSIVE SUBROUTINE mod_sdot(res,x,y) + INTEGER(KIND=Sint), INTENT(OUT) :: res + INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate) , y(Nstate) + INTEGER(KIND=Dint) dx, dy, dtmp + INTEGER(KIND=Sint) tmp + INTEGER(KIND=Ctype) i + + tmp = 0 + DO i=1,Nstate + dx = x(i) + dy = y(i) + dtmp = tmp + tmp = reduce(dtmp + dx * dy) + END DO + res = tmp + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE p_saxpy(y,a) + RECURSIVE SUBROUTINE p_saxpy(y,a) + ! Calculates mod_saxpy(y,a,P) + INTEGER(KIND=Sint), INTENT(INOUT) :: y(Nstate) + INTEGER(KIND=Sint), INTENT(IN) :: a + INTEGER(KIND=Dint) tmp, dy, da + dy = y(1) + da = a + tmp = dy + da*A5 + y(1) = reduce(tmp) + dy = y(5) + da = a + tmp = dy + da*A1 + y(5) = reduce(tmp) + + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE p_sdot(res,n,x) + RECURSIVE SUBROUTINE p_sdot(res,x) + INTEGER(KIND=Sint), INTENT(OUT) :: res + INTEGER(KIND=Sint), INTENT(IN) :: x(Nstate) + INTEGER(KIND=Dint) dx1, dx5, dtmp + dx1 = x(1) + dx5 = x(5) + + dtmp = A1*dx5 + A5*dx1 + res = reduce(dtmp) + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE poly_mult(a,b) + RECURSIVE SUBROUTINE poly_mult(a,b) + INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate) + INTEGER(KIND=Sint), INTENT(IN) :: b(Nstate) + INTEGER(KIND=Sint) tmp((2*Nstate) - 1) + INTEGER(KIND=Ctype) i + + tmp = 0_Sint + + DO i=1,Nstate + CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), b) + END DO + DO i=(2*Nstate)-1, Nstate+1, -1 + CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i)) + END DO + a = tmp(1:Nstate) + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE poly_square(a) + RECURSIVE SUBROUTINE poly_square(a) + INTEGER(KIND=Sint), INTENT(INOUT) :: a(Nstate) + INTEGER(KIND=Sint) tmp((2*Nstate) - 1) + INTEGER(KIND=Ctype) i + + tmp = 0_Sint + + DO i=1,Nstate + CALL mod_saxpy(tmp(i:Nstate+i-1),a(i), a) + END DO + DO i=(2*Nstate)-1, Nstate+1, -1 + CALL P_SAXPY(tmp(i-Nstate:i-1),tmp(i)) + END DO + a = tmp(1:Nstate) + END SUBROUTINE + !}}} + !{{{RECURSIVE SUBROUTINE poly_power(poly,n) + RECURSIVE SUBROUTINE poly_power(poly,n) + INTEGER(KIND=Sint), INTENT(INOUT) :: poly(Nstate) + INTEGER, INTENT(IN) :: n + INTEGER nn + INTEGER(KIND=Sint) x(Nstate), out(Nstate) + + IF( n .EQ. 0 )THEN + poly = 0_Sint + poly(1) = 1_Sint + RETURN + ELSE IF( n .LT. 0 )THEN + poly = 0_Sint + RETURN + END IF + + out = 0_sint + out(1) = 1_Sint + x = poly + nn = n + DO WHILE( nn .GT. 0 ) + IF( MOD(nn,2) .EQ. 1 )THEN + call poly_mult(out,x) + END IF + nn = nn/2 + IF( nn .GT. 0 )THEN + call poly_square(x) + END IF + END DO + poly = out + + END SUBROUTINE poly_power + !}}} + !}}} + + !{{{RECURSIVE SUBROUTINE Rand_seed( state, n ) + RECURSIVE SUBROUTINE Rand_seed( state, n ) + TYPE(Rand_state), INTENT(OUT) :: state + INTEGER, INTENT(IN) :: n + ! initialise the genrator using a single integer + ! fist initialise to an arbitrary state then boost by a multiple + ! of a long distance + ! + ! state is moved forward by P^n steps + ! we want this to be ok for seperating parallel sequences on MPP machines + ! P is taken as a prime number as this should prevent strong correlations + ! when the generators are operated in tight lockstep. + ! equivalent points on different processors will also be related by a + ! primative polynomial + ! P is 2^48-59 + TYPE(Rand_state) tmp + TYPE(Rand_offset), PARAMETER :: P = & + Rand_offset( (/ 1509238949_Sint ,2146167999_Sint ,1539340803_Sint , & + 1041407428_Sint ,666274987_Sint /) ) + + CALL Rand_load( tmp, (/ 5, 4, 3, 2, 1 /) ) + state = Rand_boost( tmp, Rand_mul_offset(P, n )) + + END SUBROUTINE Rand_seed + !}}} + !{{{RECURSIVE SUBROUTINE Rand_load( state, input ) + RECURSIVE SUBROUTINE Rand_load( state, input ) + TYPE(RAND_state), INTENT(OUT) :: state + INTEGER, INTENT(IN) :: input(Nstate) + + INTEGER(KIND=Ctype) i + + state%state = 0_Sint + DO i=1,Nstate + state%state(i) = MOD(INT(input(i),KIND=Sint),M) + END DO + state%need_fill = .TRUE. + state%pos = Nstate + 1 + END SUBROUTINE Rand_load + !}}} + !{{{RECURSIVE SUBROUTINE Rand_save( save_vec,state ) + RECURSIVE SUBROUTINE Rand_save( save_vec, x ) + INTEGER, INTENT(OUT) :: save_vec(Nstate) + TYPE(RAND_state), INTENT(IN) :: x + + INTEGER(KIND=Ctype) i + DO i=1,Nstate + save_vec(i) = x%state(x%pos-(Nstate+1) + i) + END DO + END SUBROUTINE Rand_save + !}}} + + !{{{RECURSIVE SUBROUTINE Rand_set_offset( offset, n ) + RECURSIVE SUBROUTINE Rand_set_offset( offset, n ) + TYPE(Rand_offset), INTENT(OUT) :: offset + INTEGER, INTENT(IN) :: n + + offset%poly = 0_Sint + IF ( n .GE. 0 ) THEN + offset%poly(2) = 1_Sint + call poly_power(offset%poly,n) + ELSE + ! + ! This is X^-1 + ! + offset%poly(4) = 858869107_Sint + offset%poly(5) = 1840344978_Sint + call poly_power(offset%poly,-n) + END IF + END SUBROUTINE Rand_set_offset + !}}} + !{{{TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b ) + TYPE(Rand_offset) RECURSIVE FUNCTION Rand_add_offset( a, b ) + TYPE(Rand_offset), INTENT(IN) :: a, b + + Rand_add_offset = a + CALL poly_mult(Rand_add_offset%poly,b%poly) + RETURN + END FUNCTION Rand_add_offset + !}}} + !{{{TYPE(Rand_offset) RECURSIVE FUNCTION Rand_mul_offset( a, n ) + TYPE(Rand_offset) RECURSIVE FUNCTION Rand_mul_offset( a, n ) + TYPE(Rand_offset), INTENT(IN) :: a + INTEGER, INTENT(IN) :: n + Rand_mul_offset = a + CALL poly_power(Rand_mul_offset%poly,n) + RETURN + END FUNCTION Rand_mul_offset + !}}} + !{{{RECURSIVE FUNCTION Rand_boost(x, offset) + RECURSIVE FUNCTION Rand_boost(x, offset) + TYPE(Rand_state) Rand_boost + TYPE(Rand_state), INTENT(IN) :: x + TYPE(Rand_offset), INTENT(IN) :: offset + INTEGER(KIND=Sint) tmp(2*Nstate-1), res(Nstate) + INTEGER(KIND=Ctype) i + + DO i=1,Nstate + tmp(i) = x%state(x%pos-(Nstate+1) + i) + END DO + tmp(Nstate+1:) = 0_Sint + + DO i=1,Nstate-1 + call P_SDOT(tmp(i+Nstate),tmp(i:Nstate+i-1)) + END DO + + DO i=1,Nstate + call mod_sdot(res(i),offset%poly,tmp(i:Nstate+i-1)) + END DO + Rand_boost%state = 0_Sint + DO i=1,Nstate + Rand_boost%state(i) = res(i) + END DO + Rand_boost%need_fill = .TRUE. + Rand_boost%pos = Nstate + 1 + + END FUNCTION Rand_boost + !}}} + !{{{RECURSIVE FUNCTION Rand_step(x, n) + RECURSIVE FUNCTION Rand_step(x, n) + TYPE(Rand_state) Rand_step + TYPE(RAND_state), INTENT(IN) :: x + INTEGER, INTENT(IN) :: n + TYPE(Rand_offset) tmp + + CALL Rand_set_offset(tmp,n) + Rand_step=Rand_boost(x,tmp) + + END FUNCTION + !}}} + + !{{{RECURSIVE FUNCTION Rand_sint(x) + RECURSIVE FUNCTION Rand_sint(x) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Sint) Rand_sint + IF( x%pos .GT. Nstore )THEN + CALL repack_state(x) + END IF + IF( x%need_fill ) CALL fill_state(x) + Rand_sint = x%state(x%pos) + x%pos = x%pos + 1 + RETURN + END FUNCTION Rand_sint + !}}} + !{{{RECURSIVE SUBROUTINE Rand_sint_vec(iv,x) + RECURSIVE SUBROUTINE Rand_sint_vec(iv,x) + INTEGER(KIND=Sint), INTENT(OUT) :: iv(:) + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER left,start, chunk, i + + start=1 + left=SIZE(iv) + DO WHILE( left .GT. 0 ) + IF( x%pos .GT. Nstore )THEN + CALL repack_state(x) + END IF + IF( x%need_fill ) CALL fill_state(x) + + chunk = MIN(left,Nstore-x%pos+1) + DO i=0,chunk-1 + iv(start+i) = x%state(x%pos+i) + END DO + start = start + chunk + x%pos = x%pos + chunk + left = left - chunk + END DO + + RETURN + END SUBROUTINE Rand_sint_vec + !}}} + + +END MODULE Rand_int + +!}}} + +!{{{Rand (use Rand_int to make random reals) + +MODULE Rand + USE Rand_int + IMPLICIT NONE + +!{{{Parameters + + INTEGER, PARAMETER :: RAND_kind1 = SELECTED_REAL_KIND(10) + INTEGER, PARAMETER :: RAND_kind2 = SELECTED_REAL_KIND(6) + + INTEGER, PARAMETER, PRIVATE :: Max_block=100 + INTEGER(KIND=Sint), PRIVATE, PARAMETER :: M = 2147483647 + REAL(KIND=RAND_kind1), PRIVATE, PARAMETER :: INVMP1_1 = ( 1.0_RAND_kind1 / 2147483647.0_RAND_kind1 ) + REAL(KIND=RAND_kind2), PRIVATE, PARAMETER :: INVMP1_2 = ( 1.0_RAND_kind2 / 2147483647.0_RAND_kind2 ) + + LOGICAL, PARAMETER :: Can_step = Can_step_int + LOGICAL, PARAMETER :: Can_reverse = Can_reverse_int + +!}}} + PUBLIC Rand_real + + +INTERFACE Rand_real + MODULE PROCEDURE Rand_real1 + MODULE PROCEDURE Rand_real2 + MODULE PROCEDURE Rand_real_vec1 + MODULE PROCEDURE Rand_real_vec2 +END INTERFACE + + +CONTAINS + + !{{{RECURSIVE SUBROUTINE Rand_real1(y,x) + RECURSIVE SUBROUTINE Rand_real1(y,x) + REAL(KIND=RAND_kind1), INTENT(OUT) :: y + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Sint) Z + + Z = Rand_sint(x) + IF (Z .EQ. 0) Z = M + + y = ((Z-0.5d0)*INVMP1_1) + RETURN + END SUBROUTINE Rand_real1 + !}}} + !{{{RECURSIVE SUBROUTINE Rand_real2(y,x) + RECURSIVE SUBROUTINE Rand_real2(y,x) + REAL(KIND=RAND_kind2), INTENT(OUT) :: y + TYPE(RAND_state), INTENT(INOUT) :: x + INTEGER(KIND=Sint) Z + + Z = Rand_sint(x) + IF (Z .EQ. 0) Z = M + + y = ((Z-0.5d0)*INVMP1_1) ! generate in double and truncate. + RETURN + END SUBROUTINE Rand_real2 + !}}} + + !{{{RECURSIVE SUBROUTINE Rand_real_vec1(rv,x) + RECURSIVE SUBROUTINE Rand_real_vec1(rv,x) + TYPE(RAND_state), INTENT(INOUT) :: x + REAL(KIND=RAND_kind1) rv(:) + INTEGER left,start, chunk, i + INTEGER(KIND=Sint) Z + INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block)) + + start=0 + left=SIZE(rv) + DO WHILE( left .GT. 0 ) + chunk = MIN(left,Max_block) + CALL Rand_sint_vec(temp(1:chunk),x) + DO i=1,chunk + Z = temp(i) + IF (Z .EQ. 0) Z = M + rv(start+i) = (Z-0.5d0)*INVMP1_1 + END DO + start = start + chunk + left = left - chunk + END DO + + RETURN + END SUBROUTINE Rand_real_vec1 + !}}} + !{{{RECURSIVE SUBROUTINE Rand_real_vec2(rv,x) + RECURSIVE SUBROUTINE Rand_real_vec2(rv,x) + TYPE(RAND_state), INTENT(INOUT) :: x + REAL(KIND=RAND_kind2) rv(:) + INTEGER left,start, chunk, i + INTEGER(KIND=Sint) Z + INTEGER(KIND=Sint) temp(MIN(SIZE(rv),Max_block)) + + start=0 + left=SIZE(rv) + DO WHILE( left .GT. 0 ) + chunk = MIN(left,Max_block) + CALL Rand_sint_vec(temp(1:chunk),x) + DO i=1,chunk + Z = temp(i) + IF (Z .EQ. 0) Z = M + rv(start+i) = (Z-0.5d0)*INVMP1_2 + END DO + start = start + chunk + left = left - chunk + END DO + + RETURN + END SUBROUTINE Rand_real_vec2 + !}}} +END MODULE Rand + +!}}} + +!{{{test program +! PROGRAM test_random +! use Rand +! TYPE(RAND_state) x +! REAL y +! CALL Rand_load(x,(/5,4,3,2,1/)) +! DO I=0,10 +! CALL Rand_real(y,x) +! WRITE(*,10) I,y +! END DO +! +!10 FORMAT(I10,E25.16) +! +! END + +! 0 0.5024326127022505E-01 +! 1 0.8260946767404675E-01 +! 2 0.2123264316469431E-01 +! 3 0.6926658791489899E+00 +! 4 0.2076155943796039E+00 +! 5 0.4327449947595596E-01 +! 6 0.2204052871093154E-01 +! 7 0.1288446951657534E+00 +! 8 0.4859915426932275E+00 +! 9 0.5721384193748236E-01 +! 10 0.7996825082227588E+00 +! + + +!}}} + diff --git a/ext/panphasia/panphasia_routines.f b/ext/panphasia/panphasia_routines.f new file mode 100644 index 0000000..2e1bfbd --- /dev/null +++ b/ext/panphasia/panphasia_routines.f @@ -0,0 +1,3334 @@ +c=====================================================================================c +c +c The code below was written by: Adrian Jenkins, +c Institute for Computational Cosmology +c Department of Physics +c South Road +c Durham, DH1 3LE +c United Kingdom +c +c This file is part of the software made public in +c Jenkins and Booth 2013 - arXiv:1306.XXXX +c +c The software computes the Panphasia Gaussian white noise field +c realisation described in detail in Jenkins 2013 - arXiv:1306.XXXX +c +c +c +c This software is free, subject to a agreeing licence conditions: +c +c +c (i) you will publish the phase descriptors and reference Jenkins (13) +c for any new simulations that use Panphasia phases. You will pass on this +c condition to others for any software or data you make available publically +c or privately that makes use of Panphasia. +c +c (ii) that you will ensure any publications using results derived from Panphasia +c will be submitted as a final version to arXiv prior to or coincident with +c publication in a journal. +c +c (iii) that you report any bugs in this software as soon as confirmed to +c A.R.Jenkins@durham.ac.uk +c +c (iv) that you understand that this software comes with no warranty and that is +c your responsibility to ensure that it is suitable for the purpose that +c you intend. +c +c=====================================================================================c + +c===================================================================================== +c List of subroutines and arguments. Each of these is documented in c +c arXiV/1306.XXXX c +c c +c Adrian Jenkins, 24/6/2013. c +c------------------------------------------------------------------------------------- +c Version 1.000 +c=================================================================================== + + module pan_state + use Rand + implicit none + integer maxdim_, maxlev_, maxpow_ + parameter (maxdim_=60,maxlev_=50, maxpow_ = 3*maxdim_) + integer nmulti_ + parameter (nmulti_=64) + integer range_max + parameter(range_max=10000) + integer indmin,indmax + parameter (indmin=-1, indmax=60) + + + type state_data + integer base_state(5), base_lev_start(5,0:maxdim_) + TYPE(Rand_offset) :: poweroffset(0:maxpow_) + TYPE(Rand_offset) :: superjump + TYPE(Rand_state) :: current_state(-1:maxpow_) + + integer layer_min,layer_max,indep_field + +! This module stores information needed to access the part of Panphasia +! selected by a particular descriptor. + integer*8 xorigin_store(0:1,0:1,0:1) + integer*8 yorigin_store(0:1,0:1,0:1) + integer*8 zorigin_store(0:1,0:1,0:1) + + integer*4 lev_common + integer*4 layer_min_store,layer_max_store + + integer*8 ix_abs_store,iy_abs_store,iz_abs_store + integer*8 ix_per_store,iy_per_store,iz_per_store + integer*8 ix_rel_store,iy_rel_store,iz_rel_store + + real*8 exp_coeffs(8,0:7,-1:maxdim_) + integer*8 xcursor(0:maxdim_),ycursor(0:maxdim_),zcursor(0:maxdim_) + +c Local box parameters + + integer*4 ixshift(0:1,0:1,0:1) + integer*4 iyshift(0:1,0:1,0:1) + integer*4 izshift(0:1,0:1,0:1) + + +c more state variables + real*8 cell_data(9,0:7) + integer*4 ixh_last,iyh_last,izh_last + integer init + + integer return_cell_props_init + integer reset_lecuyer_state_init + integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax) + + + + end type state_data + + + +c Switch for enabling custom spherical function +c Set isub_spherical_function = 1 to turn on the spherical function + integer*4 isub_spherical_function + parameter (isub_spherical_function=0) + + end module pan_state + + +c================================================================================ +c Begin white noise routines +c================================================================================ + recursive subroutine start_panphasia(ldata,descriptor,ngrid,VERBOSE) + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + character*100 descriptor + integer ngrid + integer VERBOSE + + + + integer*4 wn_level_base,i_base,i_base_y,i_base_z + integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand + character*20 name + + integer ratio + integer lextra + integer level_p + + + integer*8 ix_abs,iy_abs,iz_abs + integer*8 ix_per,iy_per,iz_per + integer*8 ix_rel,iy_rel,iz_rel + + !integer layer_min,layer_max,indep_field + !common /oct_range/ layer_min,layer_max,indep_field + + call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base, + & i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,name) + + + lextra = (log10(real(ngrid)/real(i_base))+0.001)/log10(2.0) + ratio = 2**lextra + + if (ratio*i_base.ne.ngrid) + &stop 'Value of ngrid inconsistent with dim of region in Panphasia' + + level_p = wn_level_base + lextra + + ix_abs = ishft(i_xorigin_base,lextra) + iy_abs = ishft(i_yorigin_base,lextra) + iz_abs = ishft(i_zorigin_base,lextra) + + ix_per = i_base*ratio + iy_per = i_base*ratio + iz_per = i_base*ratio + +c Set the refinement position at the origin. + + ix_rel = 0 + iy_rel = 0 + iz_rel = 0 + + call set_phases_and_rel_origin(ldata,descriptor,level_p,ix_rel,iy_rel,iz_rel,VERBOSE) + +c Finally set the octree functions required for making cosmological +c initial conditions. These are passed using a common block. + + ldata%layer_min = 0 + ldata%layer_max = level_p + ldata%indep_field = 1 + + end +c================================================================================= + recursive subroutine set_phases_and_rel_origin(ldata,descriptor,lev,ix_rel,iy_rel,iz_rel,VERBOSE) + use pan_state + !use descriptor_phases + implicit none + type(state_data), intent(inout) :: ldata + character*100 descriptor + integer lev + integer*8 ix_abs,iy_abs,iz_abs + integer*8 ix_per,iy_per,iz_per + integer*8 ix_rel,iy_rel,iz_rel + integer*8 xorigin,yorigin,zorigin + + integer VERBOSE + integer MYID + integer*8 maxco + integer i + integer px,py,pz + + integer lnblnk + integer*8 mconst + parameter(mconst = 2147483647_Dint) + + integer*4 wn_level_base,i_base,i_base_y,i_base_z + integer*8 i_xorigin_base,i_yorigin_base,i_zorigin_base, check_rand + integer lextra,ratio + character*20 phase_name + +c----------------------------------------------------------------------------------------------- + + call initialise_panphasia(ldata) + + call validate_descriptor(ldata, descriptor,-1,check_rand) + + call parse_descriptor(descriptor ,wn_level_base,i_xorigin_base,i_yorigin_base, + & i_zorigin_base,i_base,i_base_y,i_base_z,check_rand,phase_name) + lextra = lev - wn_level_base + ratio = 2**lextra + + ix_abs = ishft(i_xorigin_base,lextra) + iy_abs = ishft(i_yorigin_base,lextra) + iz_abs = ishft(i_zorigin_base,lextra) + + ix_per = i_base*ratio + iy_per = i_base*ratio + iz_per = i_base*ratio + +c------------------------------------------------------------------------- +c Error checking +c------------------------------------------------------------------------- + if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (1)' + + + maxco = 2_dint**lev + + if (ix_abs.lt.0) stop 'Error: ix_abs negative (1)' + if (iy_abs.lt.0) stop 'Error: iy_abs negative (1)' + if (iz_abs.lt.0) stop 'Error: iz_abs negative (1)' + + if (ix_rel.lt.0) stop 'Error: ix_rel negative (1)' + if (iy_rel.lt.0) stop 'Error: iy_rel negative (1)' + if (iz_rel.lt.0) stop 'Error: iz_rel negative (1)' + + + if (ix_abs+ix_rel.ge.maxco) + & stop 'Error: ix_abs + ix_rel out of range. (1)' + if (iy_abs+iy_rel.ge.maxco) + & stop 'Error: iy_abs + iy_rel out of range. (1)' + if (iz_abs+iz_rel.ge.maxco) + & stop 'Error: iz_abs + iz_rel out of range. (1)' + +c---------------------------------------------------------------------------------------- +c To allow the local box to wrap around, if needed, define a series of eight +c 'origins'. For many purposes (ix,iy,iz) = (0,0,0) is the only origin needed. + + + do px=0,1 + do py=0,1 + do pz=0,1 + + xorigin = max(0,( ix_abs + ix_rel - px*ix_per )/2) + yorigin = max(0,( iy_abs + iy_rel - py*iy_per )/2) + zorigin = max(0,( iz_abs + iz_rel - pz*iz_per )/2) + + ldata%ixshift(px,py,pz) = max(0, ix_abs + ix_rel -px*ix_per) - 2*xorigin + ldata%iyshift(px,py,pz) = max(0, iy_abs + iy_rel -py*iy_per) - 2*yorigin + ldata%izshift(px,py,pz) = max(0, iz_abs + iz_rel -pz*iz_per) - 2*zorigin + + +c Store box details: store the positions at level lev-1 + + + ldata%xorigin_store(px,py,pz) = xorigin + ldata%yorigin_store(px,py,pz) = yorigin + ldata%zorigin_store(px,py,pz) = zorigin + + enddo + enddo + enddo + + ldata%lev_common = lev + + + ldata%ix_abs_store = ix_abs + ldata%iy_abs_store = iy_abs + ldata%iz_abs_store = iz_abs + + ldata%ix_per_store = ix_per + ldata%iy_per_store = iy_per + ldata%iz_per_store = iz_per + + ldata%ix_rel_store = ix_rel + ldata%iy_rel_store = iy_rel + ldata%iz_rel_store = iz_rel + + +c Reset all cursor values to negative numbers. + + do i=0,maxdim_ + ldata%xcursor(i) = -999 + ldata%ycursor(i) = -999 + ldata%zcursor(i) = -999 + enddo + if (VERBOSE.gt.1) then + if (MYID.lt.1) then + print*,'----------------------------------------------------------' + print*,'Successfully initialised Panphasia box at level ',lev + write (6,105) ix_abs,iy_abs,iz_abs + write (6,106) ix_rel,iy_rel,iz_rel + write (6,107) ix_per,iy_per,iz_per + write (6,*) 'Phases used: ',descriptor(1:lnblnk(descriptor)) + print*,'----------------------------------------------------------' + endif + endif + 105 format(' Abs origin: (',i12,',',i12,',',i12,')') + 106 format(' Rel origin: (',i12,',',i12,',',i12,')') + 107 format(' Periods : (',i12,',',i12,',',i12,')') + end +c================================================================================ + recursive subroutine initialise_panphasia( ldata ) + use Rand + use pan_state + implicit none + + type(state_data), intent(inout) :: ldata + + TYPE(Rand_state) :: state + TYPE(Rand_offset) :: offset + integer ninitialise + parameter (ninitialise=218) + integer i + real*8 rand_num + + + call Rand_seed(state,ninitialise) + + call Rand_save(ldata%base_state,state) + + call Rand_set_offset(offset,1) + +c Calculate offsets of powers of 2 times nmulti +c + + do i=0,maxpow_ + ldata%poweroffset(i) = Rand_mul_offset(offset,nmulti_) + offset = Rand_mul_offset(offset,2) + enddo + + +c Compute the base state for each level. + + call Rand_load(state,ldata%base_state) + state = Rand_step(state,8) + + do i=0,maxdim_ + call Rand_save(ldata%base_lev_start(1,i),state) + state = Rand_boost(state,ldata%poweroffset(3*i)) + enddo + +c Set superjump to value 2**137 - used occasionally in computing Gaussian variables +c when the value of the returned random number is less an 10-6. + + call Rand_set_offset(ldata%superjump,1) + + do i=1,137 + ldata%superjump = Rand_mul_offset(ldata%superjump,2) + enddo + + +c Run time test to see if one particular value can be recovered. + + call Rand_load(state,ldata%base_lev_start(1,34)) + call Rand_real(rand_num,state) + + if (abs(rand_num- 0.828481889948473d0).gt.1.e-14) then + print*,'Error in initialisation!' + print*,'Rand_num = ',rand_num + print*,'Target value = ', 0.828481889948473d0 + stop + endif + return + end +c================================================================================= + recursive subroutine panphasia_cell_properties(ldata,ixcell,iycell,izcell,cell_prop) + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + !integer layer_min,layer_max,indep_field + !common /oct_range/ layer_min,layer_max,indep_field + integer*4 ixcell,iycell,izcell + real*8 cell_prop(9) + + call adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,ldata%layer_min, + & ldata%layer_max,ldata%indep_field,cell_prop) + return + end +c================================================================================= + recursive subroutine adv_panphasia_cell_properties(ldata,ixcell,iycell,izcell,layer_min, + & layer_max,indep_field,cell_prop) + use pan_state + !use descriptor_phases + implicit none + + type(state_data), intent(inout) :: ldata + + integer*4 lev + integer*4 ixcell,iycell,izcell + integer layer_min,layer_max,indep_field + real*8 cell_prop(9) +c real*8 cell_data(9,0:7) + integer*4 j,l,lx,ly,lz + integer*4 px,py,pz + +c integer*4 ixh_last,iyh_last,izh_last + +c integer init +c data init/0/ +c save init,cell_data,ixh_last,iyh_last,izh_last ! Keep internal state + + integer*4 ixh,iyh,izh + + lev = ldata%lev_common + +c------- Error checking ----------------------------- + + if (layer_min.gt.layer_max) then + + if (layer_min-layer_max.eq.1) then ! Not necessarily bad. No octree basis functions + do j=1,9 ! required at this level and position. + cell_prop(j) = 0.0d0 ! Set returned cell_prop data to zero. + enddo + return + endif + + print*,'Warning: layer_min.gt.layer_max!' + print*,'layer_min = ',layer_min + print*,'layer_max = ',layer_max + print*,'ixcell,iycell,izcell',ixcell,iycell,izcell + + call flush(6) + stop 'Error: layer_min.gt.layer_max' + endif + + if (layer_max.gt.ldata%lev_common) then + print*,'lev_common = ',ldata%lev_common + print*,'layer_min = ',layer_min + print*,'layer_max = ',layer_max + stop 'Error: layer_max.gt.lev_common' + endif + if ((indep_field.lt.-1).or.(indep_field.gt.1)) + & stop 'Error: indep_field out of range' + +c---------------------------------------------------- +c Check which 'origin' to use. + + px = 0 + py = 0 + pz = 0 + + if (ldata%ix_rel_store+ixcell.ge.ldata%ix_per_store) px = 1 ! Crossed x-periodic bndy + if (ldata%iy_rel_store+iycell.ge.ldata%iy_per_store) py = 1 ! Crossed y-periodic bndy + if (ldata%iz_rel_store+izcell.ge.ldata%iz_per_store) pz = 1 ! Crossed z-periodic bndy +c---------------------------------------------------- + + + ixh = (ixcell+ldata%ixshift(px,py,pz) )/2 + iyh = (iycell+ldata%iyshift(px,py,pz) )/2 + izh = (izcell+ldata%izshift(px,py,pz) )/2 + + lx = mod(ixcell+ldata%ixshift(px,py,pz) ,2) + ly = mod(iycell+ldata%iyshift(px,py,pz) ,2) + lz = mod(izcell+ldata%izshift(px,py,pz) ,2) + + + l = 4*lx + 2*ly + lz ! Determine which cell is required + +cc------------------ If no new evalation is needed skip assignment ----- + if ((ldata%init.eq.1).and.(ixh.eq.ldata%ixh_last).and.(iyh.eq.ldata%iyh_last).and. + & (izh.eq.ldata%izh_last).and.(layer_min.eq.ldata%layer_min_store).and. + & (layer_max.eq.ldata%layer_max_store)) goto 24 +cc----------------------------------------------------------------------------- + + + call return_cell_props(ldata,lev,ixh,iyh,izh,px,py,pz,layer_min, + & layer_max,indep_field,ldata%cell_data) + +c Remember previous values. + + ldata%ixh_last = ixh + ldata%iyh_last = iyh + ldata%izh_last = izh + + + 24 continue + + + do j=1,9 + cell_prop(j) = ldata%cell_data(j,l) ! Copy the required data + enddo + + if (ldata%init.eq.0) ldata%init=1 + + return + end +c================================================================================= + recursive subroutine return_cell_props(ldata,lev_input,ix_half,iy_half,iz_half, + & px,py,pz,layer_min,layer_max,indep_field,cell_data) + use Rand + use pan_state + !use descriptor_phases + implicit none + type(state_data), intent(inout) :: ldata + integer lev_input,ix_half,iy_half,iz_half,px,py,pz + integer layer_min,layer_max,indep_field + real*8 cell_data(9,0:7) + + real*8 garray(0:63) + integer lev + integer*8 xarray,yarray,zarray + + integer i,istart,icell_name + + +c integer init +c data init/0/ +c save init + + + +c-------------------------------------------------------- +c--------------------------- Initialise level -1 -------- +c-------------------------------------------------------- + + if (ldata%return_cell_props_init.eq.0) then ! First time called. Set up the Legendre coefficients + ldata%return_cell_props_init = 1 ! for the root cell. This is the first term on the + call Rand_load(ldata%current_state(-1),ldata%base_state) ! right hand side of the equation in appendix C of + call return_gaussian_array(ldata,-1,8,garray) ! Jenkins 2013 that defines PANPHASIA. + ldata%exp_coeffs(1,0,-1) = garray(0) + ldata%exp_coeffs(2,0,-1) = garray(1) + ldata%exp_coeffs(3,0,-1) = garray(2) + ldata%exp_coeffs(4,0,-1) = garray(3) + ldata%exp_coeffs(5,0,-1) = garray(4) + ldata%exp_coeffs(6,0,-1) = garray(5) + ldata%exp_coeffs(7,0,-1) = garray(6) + ldata%exp_coeffs(8,0,-1) = garray(7) + + ldata%layer_min_store = layer_min + ldata%layer_max_store = layer_max + + endif + +c-------------------------------------------------------- +c---------------------------- Error checking ------------ +c-------------------------------------------------------- + + lev = lev_input-1 + + if (lev_input.ne.ldata%lev_common) stop 'Box initialised at a different level !' + if (ix_half.lt.0) then + print*,'ix_half negative',ix_half + stop 'ix_half out of range!' + endif + if (iy_half.lt.0) stop 'iy_half out of range!' + if (iz_half.lt.0) then + print*,'iz_half negative',iz_half + stop 'iz_half out of range!' + endif + + + xarray = ldata%xorigin_store(px,py,pz) + ix_half + yarray = ldata%yorigin_store(px,py,pz) + iy_half + zarray = ldata%zorigin_store(px,py,pz) + iz_half + + +c If layer_max or layer_min have changed, rebuild from the start and reset the +c recorded value of layer_max and layer_min + + if ((layer_max.ne.ldata%layer_max_store).or.(layer_min.ne.ldata%layer_min_store)) then + + if (layer_min.gt.layer_max) stop 'layer_min > layer_max : 2' + + istart = max(1,layer_min-1) + + ldata%layer_max_store = layer_max + ldata%layer_min_store = layer_min + + goto 10 + + endif + + + if ((xarray.eq.ldata%xcursor(lev)).and.(yarray.eq.ldata%ycursor(lev)).and.(zarray.eq.ldata%zcursor(lev))) return ! Nothing to do. + +c=========================================================================================================== +c------------- First determine which levels need to be (re)computed +c=========================================================================================================== + + istart = 0 + do i=lev-1,0,-1 + if ((ishft(xarray,i-lev).eq.ldata%xcursor(i)).and.(ishft(yarray,i-lev).eq.ldata%ycursor(i)).and. + & (ishft(zarray,i-lev).eq.ldata%zcursor(i))) then + istart = i+1 + goto 10 + endif + enddo + + 10 continue + + +c==================================================================================== +c------------- Now compute each level as required and update (x,y,z) cursor variables +c==================================================================================== + + do i=istart,lev + + icell_name = 0 + + ldata%xcursor(i) = ishft(xarray,i-lev) + ldata%ycursor(i) = ishft(yarray,i-lev) + ldata%zcursor(i) = ishft(zarray,i-lev) + + if (btest(ldata%xcursor(i),0)) icell_name = icell_name + 4 + if (btest(ldata%ycursor(i),0)) icell_name = icell_name + 2 + if (btest(ldata%zcursor(i),0)) icell_name = icell_name + 1 + + call reset_lecuyer_state(ldata,i,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i)) + + if (isub_spherical_function.ne.1) then + call return_gaussian_array(ldata,i,64,garray) + else + call return_oct_sf_expansion(ldata,i,lev,ldata%xcursor(i),ldata%ycursor(i),ldata%zcursor(i), + & 64,garray) + endif + + + call evaluate_panphasia(ldata,i,maxdim_,garray,layer_min, + & layer_max, indep_field, icell_name,cell_data,ldata%exp_coeffs) + + enddo + return + end +c================================================================================= + recursive subroutine evaluate_panphasia(ldata,nlev,maxdim,g, + & layer_min,layer_max,indep_field,icell_name,cell_data,leg_coeff) + use pan_state + implicit none +c--------------------------------------------------------------------------------- +c This subroutine calculates the Legendre block coefficients for the eight child +c cells of an octree cell. +c +c----------------- Define subroutine arguments ----------------------------------- + type(state_data), intent(inout) :: ldata + integer nlev,maxdim + integer layer_min,layer_max,indep_field + integer icell_name + real*8 leg_coeff(0:7,0:7,-1:maxdim),cell_data(0:8,0:7) + real*8 g(*) + +c----------------- Define constants using notation from appendix A of Jenkins 2013 + + real*8 a1,a2,b1,b2,b3,c1,c2,c3,c4 + + parameter(a1 = 0.5d0*sqrt(3.0d0), a2 = 0.5d0) + + parameter(b1 = 0.75d0, b2 = 0.25d0*sqrt(3.0d0)) + parameter(b3 = 0.25d0) + + parameter(c1 = sqrt(27.0d0/64.0d0), c2 = 0.375d0) + parameter(c3 = sqrt(3.0d0/64.0d0), c4 = 0.125d0) + +c----------------- Define octree variables -------------------------------- + + real*8 coeff_p000, coeff_p001, coeff_p010, coeff_p011 + real*8 coeff_p100, coeff_p101, coeff_p110, coeff_p111 + + real*8 positive_octant_lc(0:7,0:1,0:1,0:1),temp_value(0:7,0:7) + integer i,j,ix,iy,iz + integer icx,icy,icz + integer iox,ioy,ioz + real*8 parity,isig + real*8 usually_rooteighth_factor +c-------------------------------------------------------------------------- + +c------------- Set the Legendre block coefficients for the parent cell +c itself. These are either inherited from the octree above +c or set to zero depending on which levels of the octree +c have been selected to be populated with the octree +c basis functions. +c--------------------------------------------------------------------------- + if (nlev.ge.layer_min) then + coeff_p000 = leg_coeff(0,icell_name,nlev-1) + coeff_p001 = leg_coeff(1,icell_name,nlev-1) + coeff_p010 = leg_coeff(2,icell_name,nlev-1) + coeff_p011 = leg_coeff(3,icell_name,nlev-1) + coeff_p100 = leg_coeff(4,icell_name,nlev-1) + coeff_p101 = leg_coeff(5,icell_name,nlev-1) + coeff_p110 = leg_coeff(6,icell_name,nlev-1) + coeff_p111 = leg_coeff(7,icell_name,nlev-1) + else + coeff_p000 = 0.0d0 + coeff_p001 = 0.0d0 + coeff_p010 = 0.0d0 + coeff_p011 = 0.0d0 + coeff_p100 = 0.0d0 + coeff_p101 = 0.0d0 + coeff_p110 = 0.0d0 + coeff_p111 = 0.0d0 + endif + +c Apply layer_max and indep_field inputs --------------------------------- + + if (indep_field.ne.-1) then + usually_rooteighth_factor = sqrt(0.125d0) + else + usually_rooteighth_factor = 0.0d0 ! This option returns only the indep field. + endif ! For use in testing only. + + if (nlev.ge.layer_max) then + do i=1,56 + g(i) = 0.0d0 ! Set octree coefficients to zero as not required. + enddo + endif + + if (indep_field.eq.0) then ! Set the independent field to zero as not required. + do i=57,64 + g(i) = 0.0d0 + enddo + endif +c----------------------------------------------------------------------------- +c +c +c The calculations immediately below evalute the eight Legendre block coefficients for the +c child cell that is furthest from the absolute coordiate origin of the octree - we call +c this the positive octant cell. +c +c The coefficients are given by a set of matrix equations which combine the +c coefficients of the Legendre basis functions of the parent cell itself, with +c the coefficients from the octree basis functions that occupy the +c parent cell. +c +c The Legendre basis function coefficients of the parent cell are stored in +c the variables, coeff_p000 - coeff_p111 and are initialise above. +c +c The coefficients of the octree basis functions are determined by the +c first 56 entries of the array g, which is passed down into this +c subroutine. +c +c These two sources of information are combined using a set of linear equations. +c The coefficients of these linear equations are taken from the inverses or +c equivalently transposes of the matrices given in appendix A of Jenkins 2013. +c The matrices in appendix A define the PANPHASIA octree basis functions +c in terms of Legendre blocks. +c +c All of the Legendre block functions of the parent cell, and the octree basis +c functions of the parent cell share one of eight distinct symmetries with respect to +c reflection about the x1=0,x2=0,x3=0 planes (where the origin is taken as the parent +c cell centre and x1,x2,x3 are parallel to the cell edges). +c +c Each function has either purely reflectional symmetry (even parity) or +c reflectional symmetry with a sign change (odd parity) about each of the three principal +c planes through the cell centre. There are therefore 8 parity types. We can label each +c parity type with a binary triplet. So 000 is pure reflectional symmetry about +c all of the principal planes. +c +c In the code below the parent cell Legendre block functions, and octree functions are +c organised into eight groups each with eight members. Each group has a common +c parity type. +c +c We keep the contributions of each parity type to each of the eight Legendre basis +c functions occupying the positive octant cell separate. Once they have all been +c computed, we can apply the different symmetry operations and determine the +c Legendre block basis functions for all eight child cells at the same time. +c--------------------------------------------------------------------------------------- +c 000 parity + + positive_octant_lc(0, 0,0,0) = 1.0d0*coeff_p000 + positive_octant_lc(1, 0,0,0) = -1.0d0*g(1) + positive_octant_lc(2, 0,0,0) = -1.0d0*g(2) + positive_octant_lc(3, 0,0,0) = 1.0d0*g(3) + positive_octant_lc(4, 0,0,0) = -1.0d0*g(4) + positive_octant_lc(5, 0,0,0) = 1.0d0*g(5) + positive_octant_lc(6, 0,0,0) = 1.0d0*g(6) + positive_octant_lc(7, 0,0,0) = -1.0d0*g(7) + +c 100 parity + + positive_octant_lc(0, 1,0,0) = a1*coeff_p100 - a2*g(8) + positive_octant_lc(1, 1,0,0) = g(9) + positive_octant_lc(2, 1,0,0) = g(10) + positive_octant_lc(3, 1,0,0) = -g(11) + positive_octant_lc(4, 1,0,0) = a2*coeff_p100 + a1*g(8) + positive_octant_lc(5, 1,0,0) = -g(12) + positive_octant_lc(6, 1,0,0) = -g(13) + positive_octant_lc(7, 1,0,0) = g(14) + +c 010 parity + + positive_octant_lc(0, 0,1,0) = a1*coeff_p010 - a2*g(15) + positive_octant_lc(1, 0,1,0) = g(16) + positive_octant_lc(2, 0,1,0) = a2*coeff_p010 + a1*g(15) + positive_octant_lc(3, 0,1,0) = -g(17) + positive_octant_lc(4, 0,1,0) = g(18) + positive_octant_lc(5, 0,1,0) = -g(19) + positive_octant_lc(6, 0,1,0) = -g(20) + positive_octant_lc(7, 0,1,0) = g(21) + + +c 001 parity + + positive_octant_lc(0, 0,0,1) = a1*coeff_p001 - a2*g(22) + positive_octant_lc(1, 0,0,1) = a2*coeff_p001 + a1*g(22) + positive_octant_lc(2, 0,0,1) = g(23) + positive_octant_lc(3, 0,0,1) = -g(24) + positive_octant_lc(4, 0,0,1) = g(25) + positive_octant_lc(5, 0,0,1) = -g(26) + positive_octant_lc(6, 0,0,1) = -g(27) + positive_octant_lc(7, 0,0,1) = g(28) + +c 110 parity + + positive_octant_lc(0, 1,1,0) = b1*coeff_p110 - b2*g(29) + b3*g(30) - b2*g(31) + positive_octant_lc(1, 1,1,0) = -g(32) + positive_octant_lc(2, 1,1,0) = b2*coeff_p110 - b3*g(29) - b2*g(30) + b1*g(31) + positive_octant_lc(3, 1,1,0) = g(33) + positive_octant_lc(4, 1,1,0) = b2*coeff_p110 + b1*g(29) + b2*g(30) + b3*g(31) + positive_octant_lc(5, 1,1,0) = g(34) + positive_octant_lc(6, 1,1,0) = b3*coeff_p110 + b2*g(29) - b1*g(30) - b2*g(31) + positive_octant_lc(7, 1,1,0) = -g(35) + + +c 011 parity + + positive_octant_lc(0, 0,1,1) = b1*coeff_p011 - b2*g(36) + b3*g(37) - b2*g(38) + positive_octant_lc(1, 0,1,1) = b2*coeff_p011 - b3*g(36) - b2*g(37) + b1*g(38) + positive_octant_lc(2, 0,1,1) = b2*coeff_p011 + b1*g(36) + b2*g(37) + b3*g(38) + positive_octant_lc(3, 0,1,1) = b3*coeff_p011 + b2*g(36) - b1*g(37) - b2*g(38) + positive_octant_lc(4, 0,1,1) = -g(39) + positive_octant_lc(5, 0,1,1) = g(40) + positive_octant_lc(6, 0,1,1) = g(41) + positive_octant_lc(7, 0,1,1) = -g(42) + +c 101 parity + + positive_octant_lc(0, 1,0,1) = b1*coeff_p101 - b2*g(43) + b3*g(44) - b2*g(45) + positive_octant_lc(1, 1,0,1) = b2*coeff_p101 - b3*g(43) - b2*g(44) + b1*g(45) + positive_octant_lc(2, 1,0,1) = -g(46) + positive_octant_lc(3, 1,0,1) = g(47) + positive_octant_lc(4, 1,0,1) = b2*coeff_p101 + b1*g(43) + b2*g(44) + b3*g(45) + positive_octant_lc(5, 1,0,1) = b3*coeff_p101 + b2*g(43) - b1*g(44) - b2*g(45) + positive_octant_lc(6, 1,0,1) = g(48) + positive_octant_lc(7, 1,0,1) = -g(49) + +c 111 parity + + positive_octant_lc(0, 1,1,1) = c1*coeff_p111 - c2*g(50) - c2*g(51) - c2*g(52) + c3*g(53) + c3*g(54) + c3*g(55) - c4*g(56) + positive_octant_lc(1, 1,1,1) = c2*coeff_p111 + c1*g(50) - c2*g(51) + c2*g(52) - c3*g(53) + c3*g(54) + c4*g(55) + c3*g(56) + positive_octant_lc(2, 1,1,1) = c2*coeff_p111 + c2*g(50) + c1*g(51) - c2*g(52) - c3*g(53) - c4*g(54) + c3*g(55) - c3*g(56) + positive_octant_lc(3, 1,1,1) = c3*coeff_p111 - c3*g(50) - c3*g(51) + c4*g(52) - c1*g(53) - c2*g(54) - c2*g(55) - c2*g(56) + positive_octant_lc(4, 1,1,1) = c2*coeff_p111 - c2*g(50) + c2*g(51) + c1*g(52) + c4*g(53) - c3*g(54) + c3*g(55) + c3*g(56) + positive_octant_lc(5, 1,1,1) = c3*coeff_p111 + c3*g(50) - c4*g(51) - c3*g(52) + c2*g(53) - c1*g(54) - c2*g(55) + c2*g(56) + positive_octant_lc(6, 1,1,1) = c3*coeff_p111 + c4*g(50) + c3*g(51) + c3*g(52) + c2*g(53) + c2*g(54) - c1*g(55) - c2*g(56) + positive_octant_lc(7, 1,1,1) = c4*coeff_p111 - c3*g(50) + c3*g(51) - c3*g(52) - c2*g(53) + c2*g(54) - c2*g(55) + c1*g(56) +c-------------------------------------------------------------------------------------------- +c +c +c We now calculate the Legendre basis coefficients for all eight child cells +c by applying the appropriate reflectional parities to the coefficients +c calculated above for the positive octant child cell. +c +c See equations A2 and A3 in appendix A of Jenkins 2013. +c +c The reflectional parity is given by (ix,iy,iz) loops below. +c +c The (icx,icy,icz) loops below, loop over the eight child cells. +c +c The positive octant child cell is given below by (icx=icy=icz=0) or i=7. +c +c The combination ix*icx +iy*icy +iz*icz is either even or odd, depending +c on whether the parity change is even or odd. +c +c The variables iox,ioy,ioz are used to loop over the different +c types of Legendre basis function. +c +c The combination iox*icx + ioy*icy + ioz*icz is either even and odd +c and identifies which coefficients keep or change sign respectively +c due to a pure reflection about the principal planes. +c-------------------------------------------------------------------------------------------- + + do iz=0,7 + do iy=0,7 + temp_value(iy,iz) = 0.0d0 ! Zero temporary sums + enddo + enddo +c-------------------------------------------------------------------------------------------- + do iz=0,1 ! Loop over z parity (0=keep sign, 1=change sign) + do iy=0,1 ! Loop over y parity (0=keep sign, 1=change sign) + do ix=0,1 ! Loop over x parity (0=keep sign, 1=change sign) + + + do icx=0,1 ! Loop over x-child cells + do icy=0,1 ! Loop over y-child cells + do icz=0,1 ! Loop over z-child cells + + if (mod(ix*icx+iy*icy+iz*icz,2).eq.0) then + parity = 1.0d0 + else + parity =-1.0d0 + endif + + i = 7 - 4*icx -2*icy - icz ! Calculate which child cell this is. + + + do iox=0,1 ! Loop over Legendre basis function type + do ioy=0,1 ! Loop over Legendre basis function type + do ioz=0,1 ! Loop over Legendre basis function type + + j = 4*iox + 2*ioy + ioz + + if (mod(iox*icx + ioy*icy + ioz*icz,2).eq.0) then + isig = parity + else + isig = -parity + endif + + temp_value(j,i) = temp_value(j,i) + isig*positive_octant_lc(j,ix,iy,iz) + + enddo + enddo + enddo + + enddo + enddo + enddo + + enddo + enddo + enddo + + +c Assign values of the output variables + + do i=0,7 + do j=0,7 + leg_coeff(j,i,nlev) = temp_value(j,i)*usually_rooteighth_factor + cell_data(j,i) = leg_coeff(j,i,nlev) + enddo + enddo + +c Finally set the independent field values + + cell_data(8,0) = g(57) + cell_data(8,1) = g(58) + cell_data(8,2) = g(59) + cell_data(8,3) = g(60) + cell_data(8,4) = g(61) + cell_data(8,5) = g(62) + cell_data(8,6) = g(63) + cell_data(8,7) = g(64) + + + return + end +c================================================================================= + recursive subroutine reset_lecuyer_state(ldata,lev,xcursor,ycursor,zcursor) + use pan_state + implicit none + + type(state_data), intent(inout) :: ldata + integer lev + integer*8 xcursor,ycursor,zcursor + +c integer indmin,indmax +c parameter (indmin=-1, indmax=60) +c integer*8 p_xcursor(indmin:indmax),p_ycursor(indmin:indmax),p_zcursor(indmin:indmax) +c save p_xcursor,p_ycursor,p_zcursor + integer i +c integer init +c data init/0/ +c save init + + if (ldata%reset_lecuyer_state_init.eq.0) then ! Initialise p_cursor variables with + ldata%reset_lecuyer_state_init = 1 ! negative values. + do i=indmin,indmax + ldata%p_xcursor(i) = -9999 + ldata%p_ycursor(i) = -9999 + ldata%p_zcursor(i) = -9999 + enddo + endif + + if ( (xcursor.eq.ldata%p_xcursor(lev)).and.(ycursor.eq.ldata%p_ycursor(lev)).and. + & (zcursor.eq.ldata%p_zcursor(lev)+1)) then + ldata%p_xcursor(lev) = xcursor + ldata%p_ycursor(lev) = ycursor + ldata%p_zcursor(lev) = zcursor + return + endif + + call advance_current_state(ldata,lev,xcursor,ycursor,zcursor) + + ldata%p_xcursor(lev) = xcursor + ldata%p_ycursor(lev) = ycursor + ldata%p_zcursor(lev) = zcursor + + + return + end +c================================================================================= + recursive subroutine advance_current_state(ldata,lev,x,y,z) + use Rand + use pan_state + !use descriptor_phases + implicit none + + type(state_data), intent(inout) :: ldata + + integer lev + integer*8 x,y,z + + integer*8 lev_range + + TYPE(Rand_offset) :: offset1,offset2 + TYPE(Rand_offset) :: offset_x,offset_y,offset_z,offset_total + + integer ndiv,nrem + integer*8 ndiv8,nrem8 + integer nfactor + parameter (nfactor=291071) ! Value unimportant except has to be > 262144 + + +c----- First some error checking ------------------------------------------ + if ((lev.lt.0).or.(lev.gt.maxlev_)) stop 'Level out of range! (2)' + + lev_range = 2_dint**lev + + + if ((x.lt.0).or.(x.ge.lev_range)) then + print*,'x,lev,lev_range',x,lev,lev_range + call flush(6) + stop 'x out of range!' + endif + if ((y.lt.0).or.(y.ge.lev_range)) then + print*,'y,lev,lev_range',y,lev,lev_range + stop 'y out of range!' + endif + if ((z.lt.0).or.(z.ge.lev_range)) stop 'z out of range!' +c---------------------------------------------------------------------------- +c +c Note the Rand_set_offset subroutine takes an integer*4 value +c for the offset value. For this reason we need to use integer*4 +c values - ndiv,nrem. As a precaution an explicit check is made +c to be sure that these values are calculated correctly. +c--------------------------------------------------------------------------- + + + call Rand_load(ldata%current_state(lev),ldata%base_lev_start(1,lev)) + + if (lev.eq.0) return + +c Calculate z-offset + + ndiv = z/nfactor + nrem = z - ndiv*nfactor + ndiv8 = ndiv + nrem8 = nrem + + if (ndiv8*nfactor+nrem8.ne.z) stop 'Error in z ndiv nrem' + + call Rand_set_offset(offset1,ndiv) + offset1 = Rand_mul_offset(offset1,nfactor) + call Rand_set_offset(offset2,nrem) + offset2 = Rand_add_offset(offset1,offset2) + offset_z = Rand_mul_offset(offset2,nmulti_) + +c Calculate y-offset + + ndiv = y/nfactor + nrem = y - ndiv*nfactor + ndiv8 = ndiv + nrem8 = nrem + + if (ndiv8*nfactor+nrem8.ne.y) stop 'Error in y ndiv nrem' + + offset1 = Rand_mul_offset(ldata%poweroffset(lev),ndiv) + offset1 = Rand_mul_offset(offset1,nfactor) + offset2 = Rand_mul_offset(ldata%poweroffset(lev),nrem) + offset_y = Rand_add_offset(offset1,offset2) + +c Calculate x-offset + + ndiv = x/nfactor + nrem = x - ndiv*nfactor + ndiv8 = ndiv + nrem8 = nrem + + if (ndiv8*nfactor+nrem8.ne.x) then + print*,'ndiv,nfactor,nrem,x',ndiv,nfactor,nrem,x + print*,'ndiv*nfactor+nrem',ndiv*nfactor+nrem + print*,'x-ndiv*nfactor-nrem',x-ndiv*nfactor-nrem + stop 'Error in x ndiv nrem' + endif + + offset1 = Rand_mul_offset(ldata%poweroffset(2*lev),ndiv) + offset1 = Rand_mul_offset(offset1,nfactor) + offset2 = Rand_mul_offset(ldata%poweroffset(2*lev),nrem) + offset_x = Rand_add_offset(offset1,offset2) + + offset1 = Rand_add_offset(offset_x,offset_y) + offset_total = Rand_add_offset(offset1, offset_z) + + ldata%current_state(lev) = Rand_boost(ldata%current_state(lev),offset_total) + + return + end +c================================================================================= + recursive subroutine return_gaussian_array(ldata,lev,ngauss,garray) + use Rand + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + integer lev,ngauss + real*8 garray(0:*) + TYPE(Rand_state) :: state + real*8 PI + parameter (PI=3.1415926535897932384d0) + real*8 branch + parameter (branch=1.d-6) + integer iloop + + real*8 temp,mag,ang + integer i + + if (mod(ngauss,2).ne.0) + & stop 'Error in return_gaussian_array - even pairs only' + +c First obtain a set of uniformly distributed pseudorandom numbers +c between 0 and 1. The method used is described in detail in +c appendix B of Jenkins 2013. + + do i=0,ngauss-1 + call Rand_real(garray(i),ldata%current_state(lev)) + + if (garray(i).lt.branch) then + garray(i) = branch + state = Rand_boost(ldata%current_state(lev),ldata%superjump) + iloop = 0 + 10 continue + call Rand_real(temp,state) + iloop = iloop+1 + if (temp.lt.branch) then + garray(i) = garray(i)*branch + state = Rand_boost(state,ldata%superjump) + if (iloop.gt.100) then + print*,'Too may iterations in return_gaussian_array!' + call flush(6) + stop + endif + goto 10 + else + garray(i) = garray(i)*temp + endif + endif + enddo + +c Apply Box-Muller transformation to create pairs of Gaussian +c pseudorandom numbers. + + do i=0,ngauss/2-1 + + mag = sqrt(-2.0d0*log(garray(2*i))) + ang = 2.0d0*PI*garray(2*i+1) + + garray(2*i) = mag*cos(ang) + garray(2*i+1) = mag*sin(ang) + + enddo + end +c================================================================================= + recursive subroutine parse_descriptor(string,l,ix,iy,iz,side1,side2,side3,check_int,name) + implicit none + integer nchar + parameter(nchar=100) + character*100 string + integer*4 l,side1,side2,side3,ierror + integer*8 ix,iy,iz + integer*8 check_int + character*20 name + + + integer i,ip,iq,ir + + ierror = 0 + + ip = 1 + do while (string(ip:ip).eq.' ') + ip = ip + 1 + enddo + + if (string(ip:ip+7).ne.'[Panph1,') then + ierror = 1 + print*,string(ip:ip+7) + goto 10 + endif + + ip = ip+8 + if (string(ip:ip).ne.'L') then + ierror = 2 + goto 10 + endif + + ip = ip+1 + + iq = ip + scan( string(ip:nchar),',') -1 + + if (ip.eq.iq) then + ierror = 3 + goto 10 + endif + + + read (string(ip:iq),*) l + + ip = iq+1 + + if (string(ip:ip).ne.'(') then + ierror = 4 + goto 10 + endif + + ip = ip+1 + + iq = ip + scan( string(ip:nchar),')') -2 + + read(string(ip:iq),*) ix,iy,iz + + ip = iq+2 + + if (string(ip:ip).ne.',') then + ierror = 5 + goto 10 + endif + + ip = ip+1 + if ((string(ip:ip).ne.'S').and.(string(ip:ip).ne.'D')) then + ierror = 6 + goto 10 + endif + + if (string(ip:ip).eq.'S') then + ip = ip + 1 + iq = ip + scan( string(ip:nchar),',') -2 + read (string(ip:iq),*) side1 + side2 = side1 + side3 = side1 + iq = iq+1 + if (string(iq:iq+2).ne.',CH') then + print*,string(ip:iq),string(iq:iq+2) + ierror = 6 + goto 10 + endif + else + ip = ip + 1 + if (string(ip:ip).ne.'(') then + ierror = 7 + goto 10 + endif + + + ip = ip + 1 + iq = ip + scan( string(ip:nchar),')') -2 + read (string(ip:iq),*) side1,side2,side3 + + iq = iq + 1 + + if (string(iq:iq).ne.')') then + ierror = 8 + goto 10 + endif + + iq = iq + 1 + + if (string(iq:iq+2).ne.',CH') then + ierror = 9 + goto 10 + endif + + endif + + ip = iq + 3 + + iq = ip + scan( string(ip:nchar),',') -2 + + read (string(ip:iq),*) check_int + + ip = iq + 1 + + if (string(ip:ip).ne.',') then + ierror = 10 + goto 10 + endif + + ip = ip+1 + + ir = ip + scan( string(ip:nchar),']') -2 + + iq = min(ir,ip+19) + + do i=1,20 + name(i:i)=' ' + enddo + + do i=ip,iq + name(i-ip+1:i-ip+1) = string(i:i) + enddo + + iq = ir + 1 + + if (string(iq:iq).ne.']') then + ierror = 11 + goto 10 + endif + + + 10 continue + + if (ierror.eq.0) return + + print*,'Error reading panphasian descriptor. Error number:',ierror + stop + + return + end +c================================================================================= + recursive subroutine compose_descriptor(l,ix,iy,iz,side,check_int,name,string) + implicit none + integer nchar + parameter(nchar=100) + character*100,intent(out)::string + character*20 name + integer*4 l,ltemp + integer*8 side + integer*8 ix,iy,iz + integer*8 check_int + + character*50 temp1,temp2,temp3,temp4,temp5,temp6 + integer lnblnk + + integer ip1,ip2,ip3,ip4,ip5,ip6 + + ltemp = l + + 5 continue + if ((mod(ix,2).eq.0).and.(mod(iy,2).eq.0).and.(mod(iz,2).eq.0).and.(mod(side,2).eq.0)) then + ix = ix/2 + iy = iy/2 + iz = iz/2 + side = side/2 + ltemp = ltemp-1 + goto 5 + endif + + + write (temp1,*) ltemp + ip1= scan(temp1,'0123456789') + write (temp2,*) ix + ip2= scan(temp2,'0123456789') + write (temp3,*) iy + ip3= scan(temp3,'0123456789') + write (temp4,*) iz + ip4= scan(temp4,'0123456789') + write (temp5,*) side + ip5= scan(temp5,'0123456789') + write (temp6,*) check_int + ip6= scan(temp6,'-0123456789') + + + string='[Panph1,L'//temp1(ip1:lnblnk(temp1))//',('//temp2(ip2:lnblnk(temp2)) + & //','//temp3(ip3:lnblnk(temp3))//','//temp4(ip4:lnblnk(temp4))//'),S' + & // temp5(ip5:lnblnk(temp5))//',CH'//temp6(ip6:lnblnk(temp6))// + & ','//name(1:lnblnk(name))//']' + + return + + end +c================================================================================= + recursive subroutine validate_descriptor(ldata,string,MYID,check_number) + use pan_state + implicit none + + type(state_data), intent(inout) :: ldata + character*100 string + integer*8 check_number + integer MYID + + character*20 phase_name + integer*4 lev + + integer*8 ix_abs,iy_abs,iz_abs + integer*4 ix_base,iy_base,iz_base + + + integer*8 xval,yval,zval + integer val_state(5) + + TYPE(Rand_state) :: state + + real*8 rand_num + integer*8 mconst,check_total,check_rand + parameter(mconst = 2147483647_Dint) + integer ascii_list(0:255) + integer*8 maxco + integer i + integer*8 ii + integer lnblnk + + + + call parse_descriptor(string,lev,ix_abs,iy_abs,iz_abs, + & ix_base,iy_base,iz_base,check_rand,phase_name) + +c------------------------------------------------------------------------- +c Some basic checking +c------------------------------------------------------------------------- + if ((lev.lt.0).or.(lev.gt.maxlev_)) then + print*,'lev,maxlev',lev,maxlev_ + call flush(6) + stop 'Level out of range! (3)' + endif + + if ((mod(ix_abs,2).eq.0).and.(mod(iy_abs,2).eq.0).and.(mod(iz_abs,2).eq.0).and. + & (mod(ix_base,2).eq.0).and.(mod(iy_base,2).eq.0).and.(mod(iz_base,2).eq.0)) + & stop 'Parameters not at lowest level' + + + maxco = 2_dint**lev + + if (ix_abs.lt.0) stop 'Error: ix_abs negative (2)' + if (iy_abs.lt.0) stop 'Error: iy_abs negative (2)' + if (iz_abs.lt.0) stop 'Error: iz_abs negative (2)' + + + if (ix_abs+ix_base.ge.maxco) + & stop 'Error: ix_abs + ix_per out of range.' + if (iy_abs+iy_base.ge.maxco) + & stop 'Error: iy_abs + iy_per out of range.' + if (iz_abs+iz_base.ge.maxco) + & stop 'Error: iz_abs + iz_per out of range.' + + check_total = 0 + + call initialise_panphasia(ldata) +c First corner + xval = ix_abs + ix_base - 1 + yval = iy_abs + zval = iz_abs + call advance_current_state(ldata,lev,xval,yval,zval) + call Rand_real(rand_num,ldata%current_state(lev)) + call Rand_save(val_state,ldata%current_state(lev)) + check_total = check_total + val_state(5) + if (MYID.eq.0) print*,'--------------------------------------' + if (MYID.eq.0) print*,'X-corner rand = ',rand_num + if (MYID.eq.0) print*,'State:',val_state +c Second corner + xval = ix_abs + yval = iy_abs + iy_base - 1 + zval = iz_abs + call advance_current_state(ldata,lev,xval,yval,zval) + call Rand_real(rand_num,ldata%current_state(lev)) + call Rand_save(val_state,ldata%current_state(lev)) + check_total = check_total + val_state(5) + if (MYID.eq.0) print*,'Y-corner rand = ',rand_num + if (MYID.eq.0) print*,'State:',val_state +c Third corner + xval = ix_abs + yval = iy_abs + zval = iz_abs + iz_base - 1 + call advance_current_state(ldata,lev,xval,yval,zval) + call Rand_real(rand_num,ldata%current_state(lev)) + call Rand_save(val_state,ldata%current_state(lev)) + check_total = check_total + val_state(5) + if (MYID.eq.0) print*,'z-corner rand = ',rand_num + if (MYID.eq.0) print*,'State:',val_state + if (MYID.eq.0) print*,'--------------------------------------' + +c Now encode the name. An integer for each ascii character is generated +c starting from the state which gives r0 - the first random number in +c Panphasia. The integer is in the range 0 - m-1. +c After making the list, then loop over non-blank characters +c in the name and take the ascii value, and sum the associated numbers. +c To avoid simple anagrams giving the same score, weight the integer +c by position in the string. Finally take mod m - to give the +c check number. + + call Rand_load(state,ldata%base_state) + + do i=0,255 + call Rand_real(rand_num,state) + call Rand_save(val_state,state) + ascii_list(i) = val_state(5) + enddo + + + + do ii=1,lnblnk(phase_name) + check_total = check_total + ii*ascii_list(iachar(phase_name(ii:ii))) + enddo + + + check_total = mod(check_total,mconst) + if (check_rand.eq.-999) then ! override the safety check number. + check_number = check_total + return + else + if (check_rand.ne.check_total) then + print*,'Inconsistency in the input panphasia descriptor ',MYID + print*,'Check_rand = ',check_rand + print*,'val_state(5) =',val_state(5) + print*,'xval,yval,zval',xval,yval,zval + print*,'lev_val = ',lev + call flush(6) + stop + endif + endif + + + return + end +c================================================================================= + recursive subroutine generate_random_descriptor(ldata,string) + use Rand + use pan_state + implicit none + type(state_data), intent(inout) :: ldata + character*100 string + character*100 instring + character*20 name + integer*4 unix_timestamp + + real*8 lbox + real*8 lpanphasia + parameter (lpanphasia = 25000000.0) ! Units of Mpc/h + integer level + integer*8 cell_dim + integer val_state(5) + + TYPE(Rand_state) :: state + TYPE(Rand_offset) :: offset + + real*8 rand_num1,rand_num2 + integer*8 mconst,check_int + parameter(mconst = 2147483647_Dint) + integer*8 mfac,imajor,iminor + parameter(mfac=33554332_Dint) + integer ascii_list(0:255) + integer i,lnblnk + integer*8 ii + integer mult + + integer*8 ixco,iyco,izco,irange + + print*,'___________________________________________________________' + print* + print*,' Generate a random descriptor ' + print* + print*,'The code uses the time (the unix timestamp) plus some extra ' + print*,'information personal to the user to choose a random region ' + print*,'within PANPHASIA. The user must also specify the side length' + print*,'of the cosmological volume. The code assumes that the whole of' + print*,'PANPHASIA is 25000 Gpc/h on a side and selects an appropriate ' + print*,'level in the octree for the descriptor. ' + print*,'Assuming this scaling the small scale power is defined down ' + print*,'to a mass scale of around 10^{-12} solar masses.' + print* + print*,'The user must also specify a human readable label for the ' + print*,'descriptor of less than 21 characters.' + print*,'___________________________________________________________' + print* + print*,'Press return to continue ' + read (*,*) + print* + print*,'___________________________________________________________' + print*,'Enter the box side-length in Mpc/h units' + read (*,*) lbox + print*,'___________________________________________________________' + print* + print* + 5 continue + print*,'Enter up to 20 character name to label the descriptor (no spaces)' + read (*,'(a)') name + if ((len_trim(instring).lt.21).or.(scan(name,' ').le.len_trim(name))) goto 5 + print*,'___________________________________________________________' + print* + print* + print*,'___________________________________________________________' + print*,'The phases for the simulation are described by whole octree ' + print*,'cells. Enter an odd integer that defines the number of cells ' + print*,'you require in one dimension. Choose this number carefully ' + print*,'as it will limit the possible 1-D sizes of the of the Fourier ' + print*,'transforms that can be used to make initial conditions to a product ' + print*,'of this integer times any power of two. In which case the only' + print*,'choice is 1.)' + print*,'(I would recommend 3 unless the initial condition code is' + print*,'incapable of using grid sizes that are not purely powers of two.' + print*,'___________________________________________________________' + print* + 7 continue + print*,'Enter number of octree cells on an edge (positive odd number only) ' + read (*,*) cell_dim + if ((cell_dim.le.0).or.(mod(cell_dim,2).eq.0)) goto 7 + print*,'___________________________________________________________' + call system('date +%s>tempfile_42526037646') + open(16,file='tempfile_42526037646',status='old') + read (16,*) unix_timestamp + close(16) + call system('/bin/rm tempfile_42526037646') + + print*,'Unix_timestamp determined. Value: ',unix_timestamp + print*,'___________________________________________________________' + print* + print* + print* + print*,'___________________________________________________________' + print*,'The code has just read the unix timestamp and will use this' + print*,'to help choose a random region in PANPHASIA. Although it is' + print*,'perhaps unlikely that someone else is also running this code at ' + print*,'the same time to the nearest second, to make it more likely' + print*,' still that the desciptor to be generated is unique' + print*,'please enter your name or some other piece of information' + print*,'below that you think is unlikely to be used by anyone else' + print*,'___________________________________________________________' + + print* + + 10 continue + print*,'Please enter your name (a minimum of six characters)' + read (*,'(a)') instring !' + if (len_trim(instring).lt.6) goto 10 + + level = int(log10(dble(cell_dim)*lpanphasia/lbox)/log10(2.0d0)) + + if (level.gt.50) stop 'level >50 ' + + + +c 'd' lines allow the generation of a large set of +c descriptors. Use to check that they are randomly +c positioned over the available volume. + + +c First use the unix timestamp to initialises the +c random generator. + + call Rand_seed(state,unix_timestamp) + + call Rand_save(ldata%base_state,state) + + +c First generate an integer from the user data. + call Rand_load(state,ldata%base_state) + + do i=0,255 + call Rand_real(rand_num1,state) + call Rand_save(val_state,state) + ascii_list(i) = val_state(5) + enddo + + call Rand_set_offset(offset,1) + + do ii=1,lnblnk(instring) + mult = mod(ii*ascii_list(iachar(instring(ii:ii))),mconst) + offset = Rand_mul_offset(offset,mult) + enddo + + call Rand_load(state,ldata%base_state) + state = Rand_boost(state,offset) ! Starting point for choosing location. + + 20 continue + + irange = 2_Dint**level + imajor = irange/mfac + iminor = mod(irange,mfac) + + call Rand_real(rand_num1,state) + call Rand_real(rand_num2,state) + + ixco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor) + + if (ixco+cell_dim.ge.irange) goto 20 ! Invalid descriptor + + call Rand_real(rand_num1,state) + call Rand_real(rand_num2,state) + + iyco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor) + + if (iyco+cell_dim.ge.irange) goto 20 ! Invalid descriptor + + call Rand_real(rand_num1,state) + call Rand_real(rand_num2,state) + + izco = int(rand_num1*imajor)*mfac + int(rand_num2*iminor) + + if (izco+cell_dim.ge.irange) goto 20 ! Invalid descriptor + + +c Value of the check digit is not known. Use validate_descriptor to compute it. + + check_int = -999 ! Special value required to make validate_descriptor + ! return the check digit. + + call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string) + + call validate_descriptor(ldata,string,-1,check_int) + + call compose_descriptor(level,ixco,iyco,izco,cell_dim,check_int,name,string) + + + return + end +c================================================================================= + recursive subroutine demo_basis_function_allocator + + implicit none + integer nmax + parameter (nmax=10) + + integer*4 wn_level(nmax) + + integer*8 ix_abs(nmax),iy_abs(nmax),iz_abs(nmax) + integer*8 ix_per(nmax),iy_per(nmax),iz_per(nmax) + integer*8 ix_rel(nmax),iy_rel(nmax),iz_rel(nmax) + integer*8 ix_dim(nmax),iy_dim(nmax),iz_dim(nmax) + + integer ix,iy,iz,nref + integer layer_min,layer_max,indep_field + + + integer*8 itot_int,itot_ib + + integer inv_open + +c Assign some trial values + + nref = 3 + inv_open=9 + + wn_level(1) = 22 + + ix_abs(1) = 2000000 + iy_abs(1) = 1500032 + iz_abs(1) = 2500032 + + ix_per(1) = 768 + iy_per(1) = 768 + iz_per(1) = 768 + + ix_rel(1) = 0 + iy_rel(1) = 0 + iz_rel(1) = 0 + + ix_dim(1) = 768 + iy_dim(1) = 768 + iz_dim(1) = 768 + + + wn_level(2) = 23 + + ix_abs(2) = 4000000 + iy_abs(2) = 3000064 + iz_abs(2) = 5000064 + + ix_per(2) = 1536 + iy_per(2) = 1536 + iz_per(2) = 1536 + + ix_rel(2) = 256 + iy_rel(2) = 16 + iz_rel(2) = 720 + + ix_dim(2) = 768 + iy_dim(2) = 768 + iz_dim(2) = 768 + + + wn_level(3) = 24 + + ix_abs(3) = 8000000 + iy_abs(3) = 6000128 + iz_abs(3) = 10000128 + + ix_per(3) = 3072 + iy_per(3) = 3072 + iz_per(3) = 3072 + + ix_rel(3) = 896 + iy_rel(3) = 432 + iz_rel(3) = 1840 + + ix_dim(3) = 768 + iy_dim(3) = 768 + iz_dim(3) = 768 + + + itot_int = 0 + itot_ib = 0 + + + + + open(10,file='ascii_dump_r1',status='unknown') + + ix=320 + do iy=0,767 + do iz=0,767 + call layer_choice(ix,iy,iz,1,nref,ix_abs,iy_abs,iz_abs, + & ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,inv_open,layer_min,layer_max,indep_field) + write(10,*) iy,iz,layer_min,layer_max,indep_field + enddo + enddo + close(10) + + open(10,file='ascii_dump_r2',status='unknown') + + ix=384 + do iy=0,767 + do iz=0,767 + call layer_choice(ix,iy,iz,2,nref,ix_abs,iy_abs,iz_abs, + & ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,inv_open,layer_min,layer_max,indep_field) + write(10,*) iy,iz,layer_min,layer_max,indep_field + enddo + enddo + close(10) + + open(10,file='ascii_dump_r3',status='unknown') + + ix=384 + do iy=0,767 + do iz=0,767 + call layer_choice(ix,iy,iz,3,nref,ix_abs,iy_abs,iz_abs, + & ix_per,iy_per,iz_per,ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,inv_open,layer_min,layer_max,indep_field) + write(10,*) iy,iz,layer_min,layer_max,indep_field + enddo + enddo + close(10) + end +c================================================================================= + recursive subroutine layer_choice(ix0,iy0,iz0,iref,nref, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim, + & wn_level,x_fact,layer_min,layer_max,indep_field) + implicit none + + integer ix0,iy0,iz0,iref,nref,isize,ibase + integer ix,iy,iz,irefplus + integer ione + + integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref) + integer*8 ix_per(nref),iy_per(nref),iz_per(nref) + integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref) + integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref) + + integer wn_level(nref) + integer layer_min,layer_max,indep_field,x_fact + integer idebug + + + integer interior,iboundary + + if (iref.eq.9999) then + idebug = 1 + else + idebug = 0 + endif + + ione = 1 + + irefplus = min(iref+1,nref) + + if (nref.eq.1) then ! Deal with simplest case + layer_min = 0 + layer_max = wn_level(1) + indep_field = 1 + if (idebug.eq.1) print*,'return 1' + return + endif + +c----------- Case of the top periodic refinement. For this refinement layer_min=0 as +c----------- all the larger basis functions must be included. By default layer_max +c----------- is set to wn_level(1) so all basis functions are included. A check is +c----------- made to determine if the lowest basis function can be included in the +c----------- next refinement. If it can the same process is repeated for the next +c----------- largest basis function and this is repeated until a failure occurs. + + if ((iref.eq.1).and.(nref.gt.1)) then + ibase = 1 + 10 continue + + ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref) + iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref) + iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref) + isize = ishft(ione,ibase) + + call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + + if ((interior.eq.1).and.(iboundary.eq.1)) then + ibase = ibase + 1 + goto 10 + endif + + layer_min = 0 + layer_max = wn_level(iref) - ibase + 1 + if (layer_max.ne.wn_level(iref)) then + indep_field = 0 + else + indep_field = 1 + endif + + if (idebug.eq.1) then + print*,'iref,wn_level(iref)',iref,wn_level(iref) + print*,'Return 2',layer_min,layer_max,indep_field + endif + + return + endif +c------------------------------------------------------------------------------------------ +c------------------------------------------------------------------------------------------ + + +c----------- For second or higher refinement determine layer_min by reference +c----------- to itself. In this case the loop continues until a basis function +c------------ is found which fits in a larger refinement + + ibase = 1 + + 20 continue + + + ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref) + iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref) + iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref) + isize = ishft(ione,ibase) + + call inref(ix,iy,iz,isize,iref,iref,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + + if ((interior.eq.1).and.(iboundary.eq.1)) then + ibase = ibase + 1 + goto 20 + endif + + layer_min = wn_level(iref) - max(ibase-2,0) ! Take last suitable refinement + + +c----------- For an intermediate refinement define layer_max by reference to +c----------- the next refinement + + if (iref.lt.nref) then + ibase = 1 + + 30 continue + + ix = ishft(ishft(ix_abs(iref)+ix_rel(iref)+ix0,-ibase),ibase)-ix_abs(iref)-ix_rel(iref) + iy = ishft(ishft(iy_abs(iref)+iy_rel(iref)+iy0,-ibase),ibase)-iy_abs(iref)-iy_rel(iref) + iz = ishft(ishft(iz_abs(iref)+iz_rel(iref)+iz0,-ibase),ibase)-iz_abs(iref)-iz_rel(iref) + isize = ishft(ione,ibase) + + call inref(ix,iy,iz,isize,iref,irefplus,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + + if ((interior.eq.1).and.(iboundary.eq.1)) then + ibase = ibase + 1 + goto 30 + endif + + layer_max = wn_level(iref) - ibase + 1 + + if (layer_min.eq.wn_level(iref)) then + indep_field = 1 + else + indep_field = 0 + endif + else + layer_max = wn_level(iref) + indep_field = 1 + endif + + if (idebug.eq.1) then + print*,'Return 3' + print*,'layer_min,layer_max,indep_field',layer_min,layer_max,indep_field + print*,'interior,iboundary',interior,iboundary + print*,'ibase = ',ibase + print*,'iref,nref,wn_level(iref)',iref,nref,wn_level(iref) + endif + + + return + + end + + + + +c The function takes a given basis function specified by a corner ixc,iyc,izc +c and a size isz at level wn_c in the oct-tree and returns two integer values. +c (i) interior: +c Value 1 if the basis function is completely within the given +c refinement. +c +c Value 0 if the basis function is without the refinement, or +c overlaps the edges of the refinement, or the edges of the +c primary white noise patch. +c +c (ii) iboundary: +c Value 1 if the basis function is sufficiently far from the +c refinement boundary. +c +c Value 0 otherwise. +c The given refinement is defined at level wn_r in the oct-tree and by the variables +c (ix_rel,iy_rel,iz_rel) which give the location of the refinement relative to +c corner of the white noise patch, (ix_per,iy_per,iz_per) which define the +c periodicity of the white noise patch, and (ix_dim,iy_dim,iz_dim) which +c define the size of the refinement. +c +c +c +c================================================================================= + recursive subroutine inref(ixc,iyc,izc,isz,ir1,ir2,nref,wn_level, + & ix_abs,iy_abs,iz_abs,ix_per,iy_per,iz_per, + & ix_rel,iy_rel,iz_rel,ix_dim,iy_dim,iz_dim,x_fact, + & interior,iboundary) + implicit none + + integer nref + integer ixc,iyc,izc,isz,ir1,ir2 + integer wn_level(nref) + integer*8 ix_abs(nref),iy_abs(nref),iz_abs(nref) + integer*8 ix_per(nref),iy_per(nref),iz_per(nref) + integer*8 ix_rel(nref),iy_rel(nref),iz_rel(nref) + integer*8 ix_dim(nref),iy_dim(nref),iz_dim(nref) + integer interior, iboundary + integer x_fact + + integer*8 ixco,iyco,izco,isize + integer*8 ixref0,iyref0,izref0 + integer*8 ixref1,iyref1,izref1 + integer*8 idist + + integer delta_wn + +c Error checking + if (ir2.lt.ir1) stop 'ir2 - void interpolate( m1& V, m2& v, bool fourier_splice = false ) const - { - int oxc = V.offset(0), oyc = V.offset(1), ozc = V.offset(2); - int oxf = v.offset(0), oyf = v.offset(1), ozf = v.offset(2); - - size_t nxf = v.size(0), nyf = v.size(1), nzf = v.size(2), nzfp = nzf+2; - - // cut out piece of coarse grid that overlaps the fine: - assert( nxf%2==0 && nyf%2==0 && nzf%2==0 ); - - size_t nxc = nxf/2, nyc = nyf/2, nzc = nzf/2, nzcp = nzf/2+2; - - fftw_real *rcoarse = new fftw_real[ nxc * nyc * nzcp ]; - fftw_complex *ccoarse = reinterpret_cast (rcoarse); - - fftw_real *rfine = new fftw_real[ nxf * nyf * nzfp]; - fftw_complex *cfine = reinterpret_cast (rfine); - - #pragma omp parallel for - for( int i=0; i<(int)nxc; ++i ) - for( int j=0; j<(int)nyc; ++j ) - for( int k=0; k<(int)nzc; ++k ) - { - size_t q = ((size_t)i*nyc+(size_t)j)*nzcp+(size_t)k; - rcoarse[q] = V( oxf+i, oyf+j, ozf+k ); - } - - if( fourier_splice ) - { - #pragma omp parallel for - for( int i=0; i<(int)nxf; ++i ) - for( int j=0; j<(int)nyf; ++j ) - for( int k=0; k<(int)nzf; ++k ) - { - size_t q = ((size_t)i*nyf+(size_t)j)*nzfp+(size_t)k; - rfine[q] = v(i,j,k); - } - } - else - { - #pragma omp parallel for - for( size_t i=0; i +#include +#include + +#include "densities.hh" +#include "HDF_IO.hh" + +const int maxdim = 60, maxlev = 50, maxpow = 3 * maxdim; +typedef int rand_offset_[5]; +typedef struct { + int state[133]; // Nstore = Nstate (=5) + Nbatch (=128) + int need_fill; + int pos; +} rand_state_; + +/* pan_state_ struct -- corresponds to respective fortran module in panphasia_routines.f + * data structure that contains all panphasia state variables + * it needs to get passed between the fortran routines to enable + * thread-safe execution. + */ +typedef struct { + int base_state[5], base_lev_start[5][maxdim + 1]; + rand_offset_ poweroffset[maxpow + 1], superjump; + rand_state_ current_state[maxpow + 2]; + + int layer_min, layer_max, indep_field; + + long long xorigin_store[2][2][2], yorigin_store[2][2][2], zorigin_store[2][2][2]; + int lev_common, layer_min_store, layer_max_store; + long long ix_abs_store, iy_abs_store, iz_abs_store, ix_per_store, iy_per_store, iz_per_store, ix_rel_store, + iy_rel_store, iz_rel_store; + double exp_coeffs[8][8][maxdim + 2]; + long long xcursor[maxdim + 1], ycursor[maxdim + 1], zcursor[maxdim + 1]; + int ixshift[2][2][2], iyshift[2][2][2], izshift[2][2][2]; + + double cell_data[9][8]; + int ixh_last, iyh_last, izh_last; + int init; + + int init_cell_props; + int init_lecuyer_state; + long long p_xcursor[62], p_ycursor[62], p_zcursor[62]; + +} pan_state_; + +extern "C" { +void start_panphasia_(pan_state_ *lstate, const char *descriptor, int *ngrid, int *bverbose); + +void parse_descriptor_(const char *descriptor, int16_t *l, int32_t *ix, int32_t *iy, int32_t *iz, int16_t *side1, + int16_t *side2, int16_t *side3, int32_t *check_int, char *name); + +void panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, double *cell_prop); + +void adv_panphasia_cell_properties_(pan_state_ *lstate, int *ixcell, int *iycell, int *izcell, int *layer_min, + int *layer_max, int *indep_field, double *cell_prop); + +void set_phases_and_rel_origin_(pan_state_ *lstate, const char *descriptor, int *lev, long long *ix_rel, + long long *iy_rel, long long *iz_rel, int *VERBOSE); +/*void set_local_box_( pan_state_ *lstate, int lev, int8_t ix_abs, int8_t iy_abs, int8_t iz_abs, + int8_t ix_per, int8_t iy_per, int8_t iz_per, int8_t ix_rel, int8_t iy_rel, + int8_t iz_rel, int wn_level_base, int8_t check_rand, char *phase_name, int MYID);*/ +/*extern struct { + int layer_min, layer_max, hoswitch; + }oct_range_; +*/ +} + +class RNG_panphasia : public RNG_plugin { +private: + void forward_transform_field(real_t *field, int n0, int n1, int n2); + void forward_transform_field(real_t *field, int n) { forward_transform_field(field, n, n, n); } + + void backward_transform_field(real_t *field, int n0, int n1, int n2); + void backward_transform_field(real_t *field, int n) { backward_transform_field(field, n, n, n); } + +protected: + std::string descriptor_string_; + int num_threads_; + int levelmin_, levelmin_final_, levelmax_, ngrid_; + bool incongruent_fields_; + double inter_grid_phase_adjustment_; + // double translation_phase_; + pan_state_ *lstate; + int grid_p_,grid_m_; + double grid_rescale_fac_; + int coordinate_system_shift_[3]; + int ix_abs_[3], ix_per_[3], ix_rel_[3], level_p_, lextra_; + const refinement_hierarchy *prefh_; + + struct panphasia_descriptor { + int16_t wn_level_base; + int32_t i_xorigin_base, i_yorigin_base, i_zorigin_base; + int16_t i_base, i_base_y, i_base_z; + int32_t check_rand; + std::string name; + + explicit panphasia_descriptor(std::string dstring) { + char tmp[100]; + memset(tmp, ' ', 100); + parse_descriptor_(dstring.c_str(), &wn_level_base, &i_xorigin_base, &i_yorigin_base, &i_zorigin_base, &i_base, + &i_base_y, &i_base_z, &check_rand, tmp); + for (int i = 0; i < 100; i++) + if (tmp[i] == ' ') { + tmp[i] = '\0'; + break; + } + name = tmp; + name.erase(std::remove(name.begin(), name.end(), ' '), name.end()); + } + }; + + void clear_panphasia_thread_states(void) { + for (int i = 0; i < num_threads_; ++i) { + lstate[i].init = 0; + lstate[i].init_cell_props = 0; + lstate[i].init_lecuyer_state = 0; + } + } + + // greatest common divisor + int gcd(int a, int b) { + if (b == 0) + return a; + return gcd(b, a % b); + } + + // least common multiple + int lcm(int a, int b) { return abs(a * b) / gcd(a, b); } + +// Two or largest power of 2 less than the argument + int largest_power_two_lte(int b) { + int a = 1; + if (b<=a) return a; + while (2*a < b) a = 2*a; + return a; + } + + + panphasia_descriptor *pdescriptor_; + +public: + explicit RNG_panphasia(config_file &cf) : RNG_plugin(cf) { + descriptor_string_ = pcf_->getValue("random", "descriptor"); + +#ifdef _OPENMP + num_threads_ = omp_get_max_threads(); +#else + num_threads_ = 1; +#endif + + // create independent state descriptions for each thread + lstate = new pan_state_[num_threads_]; + + // parse the descriptor for its properties + pdescriptor_ = new panphasia_descriptor(descriptor_string_); + LOGINFO("PANPHASIA: descriptor \'%s\' is base %d,", pdescriptor_->name.c_str(), pdescriptor_->i_base); + + // write panphasia base size into config file for the grid construction + // as the gridding unit we use the least common multiple of 2 and i_base + std::stringstream ss; + //ARJ ss << lcm(2, pdescriptor_->i_base); + //ss << two_or_largest_power_two_less_than(pdescriptor_->i_base);//ARJ + ss << 2; //ARJ - set gridding unit to two + pcf_->insertValue("setup", "gridding_unit", ss.str()); + ss.str(std::string()); + ss << pdescriptor_->i_base ; + pcf_->insertValue("random","base_unit", ss.str()); + } + + void initialize_for_grid_structure(const refinement_hierarchy &refh) { + prefh_ = &refh; + levelmin_ = prefh_->levelmin(); + levelmin_final_ = pcf_->getValue("setup", "levelmin"); + levelmax_ = prefh_->levelmax(); + + clear_panphasia_thread_states(); + LOGINFO("PANPHASIA: running with %d threads", num_threads_); + + // if ngrid is not a multiple of i_base, then we need to enlarge and then sample down + ngrid_ = 1 << levelmin_; + + grid_p_ = pdescriptor_->i_base; + grid_m_ = largest_power_two_lte(grid_p_); + + lextra_ = (log10((double)ngrid_ / (double)pdescriptor_->i_base) + 0.001) / log10(2.0); + int ratio = 1 << lextra_; + grid_rescale_fac_ = 1.0; + + coordinate_system_shift_[0] = -pcf_->getValue("setup", "shift_x"); + coordinate_system_shift_[1] = -pcf_->getValue("setup", "shift_y"); + coordinate_system_shift_[2] = -pcf_->getValue("setup", "shift_z"); + + incongruent_fields_ = false; + if (ngrid_ != ratio * pdescriptor_->i_base) { + incongruent_fields_ = true; + ngrid_ = 2 * ratio * pdescriptor_->i_base; + grid_rescale_fac_ = (double)ngrid_ / (1 << levelmin_); + LOGINFO("PANPHASIA: will use a higher resolution:\n" + " (%d -> %d) * 2**ref compatible with PANPHASIA\n" + " will Fourier interpolate after.", + grid_m_, grid_p_); + } + } + + ~RNG_panphasia() { delete[] lstate; } + + void fill_grid(int level, DensityGrid &R); + + bool is_multiscale() const { return true; } +}; + +void RNG_panphasia::forward_transform_field(real_t *field, int nx, int ny, int nz) { + + fftw_real *rfield = reinterpret_cast(field); + fftw_complex *cfield = reinterpret_cast(field); + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_plan pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfield, cfield, FFTW_ESTIMATE); +#else + fftw_plan pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfield, cfield, FFTW_ESTIMATE); +#endif +#else + rfftwnd_plan pf = rfftw3d_create_plan(nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); +#endif + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_execute(pf); +#else + fftw_execute(pf); +#endif +#else +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_real_to_complex(num_threads_, pf, rfield, NULL); +#else + rfftwnd_one_real_to_complex(pf, rfield, NULL); +#endif +#endif +} + +void RNG_panphasia::backward_transform_field(real_t *field, int nx, int ny, int nz) { + + fftw_real *rfield = reinterpret_cast(field); + fftw_complex *cfield = reinterpret_cast(field); + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_plan ipf = fftwf_plan_dft_c2r_3d(nx, ny, nz, cfield, rfield, FFTW_ESTIMATE); +#else + fftw_plan ipf = fftw_plan_dft_c2r_3d(nx, ny, nz, cfield, rfield, FFTW_ESTIMATE); +#endif +#else + rfftwnd_plan ipf = rfftw3d_create_plan(nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); +#endif + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_execute(ipf); +#else + fftw_execute(ipf); +#endif +#else +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real(num_threads_, ipf, cfield, NULL); +#else + rfftwnd_one_complex_to_real(ipf, cfield, NULL); +#endif +#endif +} + +#include +inline double get_wtime(void) { +#ifdef _OPENMP + return omp_get_wtime(); +#else + return (double)clock() / CLOCKS_PER_SEC; +#endif +} + +void RNG_panphasia::fill_grid(int level, DensityGrid &R) { + fftw_real *pr0, *pr1, *pr2, *pr3, *pr4; + fftw_complex *pc0, *pc1, *pc2, *pc3, *pc4; + + + // determine resolution and offset so that we can do proper resampling + int ileft[3], ileft_corner[3], nx[3], nxremap[3]; + int iexpand_left[3]; + + for (int k = 0; k < 3; ++k) { + ileft[k] = prefh_->offset_abs(level, k); + nx[k] = R.size(k); + assert(nx[k] % 4 == 0); + if (level == levelmin_) { + ileft_corner[k] = ileft[k]; // Top level - periodic + }else{ + ileft_corner[k] = (ileft[k] - nx[k]/4 + (1 << level))%(1 << level); // Isolated + } + iexpand_left[k] = (ileft_corner[k]%grid_m_ ==0) ? 0 : ileft_corner[k]%grid_m_; + fprintf(stderr, "dim=%c : ileft = %d, ileft_corner %d, nx = %d\n", 'x' + k, ileft[k],ileft_corner[k],nx[k]); + }; + + int ileft_corner_m[3], ileft_corner_p[3],nx_m[3]; + int ileft_max_expand = std::max(iexpand_left[0],std::max(iexpand_left[1],iexpand_left[2])); + + for (int k = 0; k < 3; ++k) { + ileft_corner_m[k] = ((ileft_corner[k] - iexpand_left[k]) + + coordinate_system_shift_[k] * (1 << (level - levelmin_final_)) + (1 << level)) % (1 << level); + + ileft_corner_p[k] = grid_p_ * ileft_corner_m[k]/grid_m_; + nx_m[k] = (ileft_max_expand!=0)? nx[k] + ileft_max_expand: nx[k]; + if (nx_m[k]%grid_m_ !=0) nx_m[k] = nx_m[k] + grid_m_ - nx_m[k]%grid_m_; + nxremap[k] = grid_p_ * nx_m[k]/grid_m_; + if (nxremap[k]%2==1){ + nx_m[k] = nx_m[k] + grid_m_; + nxremap[k] = grid_p_ * nx_m[k]/grid_m_; + } + } + + + if ( (nx_m[0]!=nx_m[1]) || (nx_m[0]!=nx_m[2])) LOGERR("Fatal error: non-cubic refinement being requested"); + + inter_grid_phase_adjustment_ = M_PI * (1.0 / (double)nx_m[0] - 1.0 / (double)nxremap[0]); + LOGINFO("The value of the phase adjustement is %f\n", inter_grid_phase_adjustment_); + + + LOGINFO("ileft[0],ileft[1],ileft[2] %d %d %d", ileft[0], ileft[1], ileft[2]); + LOGINFO("ileft_corner[0,1,2] %d %d %d", ileft_corner[0], ileft_corner[1], ileft_corner[2]); + + LOGINFO("iexpand_left[1,2,3] = (%d, %d, %d) Max %d ",iexpand_left[0],iexpand_left[1],iexpand_left[2], + ileft_max_expand); + + LOGINFO("ileft_corner_m[0,1,2] = (%d,%d,%d)",ileft_corner_m[0],ileft_corner_m[1],ileft_corner_m[2]); + LOGINFO("grid_m_ %d grid_p_ %d",grid_m_,grid_p_); + LOGINFO("nx_m[0,1,2] = (%d,%d,%d)",nx_m[0],nx_m[1],nx_m[2]); + LOGINFO("ileft_corner_p[0,1,2] = (%d,%d,%d)",ileft_corner_p[0],ileft_corner_p[1],ileft_corner_p[2]); + LOGINFO("nxremap[0,1,2] = (%d,%d,%d)",nxremap[0],nxremap[1],nxremap[2]); + + size_t ngp = nxremap[0] * nxremap[1] * (nxremap[2] + 2); + + pr0 = new fftw_real[ngp]; + pr1 = new fftw_real[ngp]; + pr2 = new fftw_real[ngp]; + pr3 = new fftw_real[ngp]; + pr4 = new fftw_real[ngp]; + + pc0 = reinterpret_cast(pr0); + pc1 = reinterpret_cast(pr1); + pc2 = reinterpret_cast(pr2); + pc3 = reinterpret_cast(pr3); + pc4 = reinterpret_cast(pr4); + + LOGINFO("calculating PANPHASIA random numbers for level %d...", level); + clear_panphasia_thread_states(); + + double t1 = get_wtime(); + double tp = t1; + + +#pragma omp parallel + { +#ifdef _OPENMP + const int mythread = omp_get_thread_num(); +#else + const int mythread = 0; +#endif + int odd_x, odd_y, odd_z; + int ng_level = ngrid_ * (1 << (level - levelmin_)); // full resolution of current level + + int verbosity = (mythread == 0); + char descriptor[100]; + memset(descriptor, 0, 100); + memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size()); + + if (level == levelmin_) { + start_panphasia_(&lstate[mythread], descriptor, &ng_level, &verbosity); + } + + { + int level_p, lextra; + long long ix_rel[3]; + panphasia_descriptor d(descriptor_string_); + + lextra = (log10((double)ng_level / (double)d.i_base) + 0.001) / log10(2.0); + level_p = d.wn_level_base + lextra; + int ratio = 1 << lextra; + assert(ng_level == ratio * d.i_base); + + + + ix_rel[0] = ileft_corner_p[0]; + ix_rel[1] = ileft_corner_p[1]; + ix_rel[2] = ileft_corner_p[2]; + + + +// Code above ignores the coordinate_system_shift_ - but currently this is set to zero // + + + lstate[mythread].layer_min = 0; + lstate[mythread].layer_max = level_p; + lstate[mythread].indep_field = 1; + + set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], + &verbosity); + + LOGUSER(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], + ix_rel[1], ix_rel[2]); + + odd_x = ix_rel[0] % 2; + odd_y = ix_rel[1] % 2; + odd_z = ix_rel[2] % 2; + } + + if (verbosity) + t1 = get_wtime(); + +//*************************************************************** +// Process Panphasia values: p000, p001, p010, p100 and indep field +//**************************************************************** +// START // + +#pragma omp for //nowait + for (int i = 0; i < nxremap[0] / 2 + odd_x; ++i) { + double cell_prop[9]; + pan_state_ *ps = &lstate[mythread]; + + for (int j = 0; j < nxremap[1] / 2 + odd_y; ++j) + for (int k = 0; k < nxremap[2] / 2 + odd_z; ++k) { + + // ARJ - added inner set of loops to speed up evaluation of Panphasia + + for (int ix = 0; ix < 2; ++ix) + for (int iy = 0; iy < 2; ++iy) + for (int iz = 0; iz < 2; ++iz) { + int ii = 2 * i + ix - odd_x; + int jj = 2 * j + iy - odd_y; + int kk = 2 * k + iz - odd_z; + + if (((ii >= 0) && (ii < nxremap[0])) && ((jj >= 0) && (jj < nxremap[1])) && + ((kk >= 0) && (kk < nxremap[2]))) { + + size_t idx = ((size_t)ii * nxremap[1] + (size_t)jj) * (nxremap[2] + 2) + (size_t)kk; + adv_panphasia_cell_properties_(ps, &ii, &jj, &kk, &ps->layer_min, &ps->layer_max, &ps->indep_field, + cell_prop); + + pr0[idx] = cell_prop[0]; + pr1[idx] = cell_prop[4]; + pr2[idx] = cell_prop[2]; + pr3[idx] = cell_prop[1]; + pr4[idx] = cell_prop[8]; + } + } + } + } + } + LOGINFO("time for calculating PANPHASIA for level %d : %f s, %f µs/cell", level, get_wtime() - t1, + 1e6 * (get_wtime() - t1) / ((double)nxremap[2] * (double)nxremap[1] * (double)nxremap[0])); + LOGINFO("time for calculating PANPHASIA for level %d : %f s, %f µs/cell", level, get_wtime() - t1, + 1e6 * (get_wtime() - t1) / ((double)nxremap[2] * (double)nxremap[1] * (double)nxremap[0])); + + ////////////////////////////////////////////////////////////////////////////////////////////// + + LOGINFO("\033[31mtiming level %d [adv_panphasia_cell_properties]: %f s\033[0m", level, get_wtime() - tp); + tp = get_wtime(); + + ///////////////////////////////////////////////////////////////////////// + // transform and convolve with Legendres + + forward_transform_field(pr0, nxremap[0], nxremap[1], nxremap[2]); + forward_transform_field(pr1, nxremap[0], nxremap[1], nxremap[2]); + forward_transform_field(pr2, nxremap[0], nxremap[1], nxremap[2]); + forward_transform_field(pr3, nxremap[0], nxremap[1], nxremap[2]); + forward_transform_field(pr4, nxremap[0], nxremap[1], nxremap[2]); + +#pragma omp parallel for + for (int i = 0; i < nxremap[0]; i++) + for (int j = 0; j < nxremap[1]; j++) + for (int k = 0; k < nxremap[2] / 2 + 1; k++) { + size_t idx = ((size_t)i * nxremap[1] + (size_t)j) * (nxremap[2] / 2 + 1) + (size_t)k; + + double fx(1.0), fy(1.0), fz(1.0), arg = 0.; + complex gx(0., 0.), gy(0., 0.), gz(0., 0.); + + int ii(i), jj(j), kk(k); + if (i > nxremap[0] / 2) + ii -= nxremap[0]; + if (j > nxremap[1] / 2) + jj -= nxremap[1]; + + // int kkmax = std::max(abs(ii),std::max(abs(jj),abs(kk))); + + + if (ii != 0) { + arg = M_PI * (double)ii / (double)nxremap[0]; + fx = sin(arg) / arg; + gx = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); + } else { + fx = 1.0; + gx = 0.0; + } + + if (jj != 0) { + arg = M_PI * (double)jj / (double)nxremap[1]; + fy = sin(arg) / arg; + gy = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); + } else { + fy = 1.0; + gy = 0.0; + } + + if (kk != 0) { + arg = M_PI * (double)kk / (double)nxremap[2]; + fz = sin(arg) / arg; + gz = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); + } else { + fz = 1.0; + gz = 0.0; + } + + complex temp_comp = (fx + sqrt(3.0) * gx) * (fy + sqrt(3.0) * gy) * (fz + sqrt(3.0) * gz); + double magnitude = sqrt(1.0 - std::abs(temp_comp * temp_comp)); + + if (abs(ii) != nxremap[0] / 2 && abs(jj) != nxremap[1] / 2 && + abs(kk) != nxremap[2] / 2) { // kkmax != nxremap[2]/2 ){ + complex x, y0(RE(pc0[idx]), IM(pc0[idx])), y1(RE(pc1[idx]), IM(pc1[idx])), y2(RE(pc2[idx]), IM(pc2[idx])), + y3(RE(pc3[idx]), IM(pc3[idx])), y4(RE(pc4[idx]), IM(pc4[idx])); + + x = y0 * fx * fy * fz + sqrt(3.0) * (y1 * gx * fy * fz + y2 * fx * gy * fz + y3 * fx * fy * gz) + + y4 * magnitude; + + RE(pc0[idx]) = x.real(); + IM(pc0[idx]) = x.imag(); + } + } + + // END + + LOGINFO("\033[31mtiming level %d [build panphasia field]: %f s\033[0m", level, get_wtime() - tp); + tp = get_wtime(); + +//*************************************************************** +// Process Panphasia values: p000, p001, p010, p100 and indep field +//**************************************************************** + +#pragma omp parallel + { +#ifdef _OPENMP + const int mythread = omp_get_thread_num(); +#else + const int mythread = 0; +#endif + int odd_x, odd_y, odd_z; + int ng_level = ngrid_ * (1 << (level - levelmin_)); // full resolution of current level + int verbosity = (mythread == 0); + char descriptor[100]; + memset(descriptor, 0, 100); + memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size()); + + if (level == levelmin_) { + start_panphasia_(&lstate[mythread], descriptor, &ng_level, &verbosity); + } + + { + int level_p, lextra; + long long ix_rel[3]; + panphasia_descriptor d(descriptor_string_); + + lextra = (log10((double)ng_level / (double)d.i_base) + 0.001) / log10(2.0); + level_p = d.wn_level_base + lextra; + int ratio = 1 << lextra; + assert(ng_level == ratio * d.i_base); + + ix_rel[0] = ileft_corner_p[0]; + ix_rel[1] = ileft_corner_p[1]; + ix_rel[2] = ileft_corner_p[2]; + +// Code above ignores the coordinate_system_shift_ - but currently this is set to zero // + + lstate[mythread].layer_min = 0; + lstate[mythread].layer_max = level_p; + lstate[mythread].indep_field = 1; + + set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], + &verbosity); + + LOGUSER(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], + ix_rel[1], ix_rel[2]); + + odd_x = ix_rel[0] % 2; + odd_y = ix_rel[1] % 2; + odd_z = ix_rel[2] % 2; + } + + if (verbosity) + t1 = get_wtime(); + +// START // +//*************************************************************** +// Process Panphasia values: p110, p011, p101, p111 +//**************************************************************** +#pragma omp for //nowait + for (int i = 0; i < nxremap[0] / 2 + odd_x; ++i) { + double cell_prop[9]; + pan_state_ *ps = &lstate[mythread]; + + for (int j = 0; j < nxremap[1] / 2 + odd_y; ++j) + for (int k = 0; k < nxremap[2] / 2 + odd_z; ++k) { + + // ARJ - added inner set of loops to speed up evaluation of Panphasia + + for (int ix = 0; ix < 2; ++ix) + for (int iy = 0; iy < 2; ++iy) + for (int iz = 0; iz < 2; ++iz) { + int ii = 2 * i + ix - odd_x; + int jj = 2 * j + iy - odd_y; + int kk = 2 * k + iz - odd_z; + + if (((ii >= 0) && (ii < nxremap[0])) && ((jj >= 0) && (jj < nxremap[1])) && + ((kk >= 0) && (kk < nxremap[2]))) { + + size_t idx = ((size_t)ii * nxremap[1] + (size_t)jj) * (nxremap[2] + 2) + (size_t)kk; + adv_panphasia_cell_properties_(ps, &ii, &jj, &kk, &ps->layer_min, &ps->layer_max, &ps->indep_field, + cell_prop); + + pr1[idx] = cell_prop[6]; + pr2[idx] = cell_prop[3]; + pr3[idx] = cell_prop[5]; + pr4[idx] = cell_prop[7]; + } + } + } + } + } + LOGINFO("time for calculating PANPHASIA for level %d : %f s, %f µs/cell", level, get_wtime() - t1, + 1e6 * (get_wtime() - t1) / ((double)nxremap[2] * (double)nxremap[1] * (double)nxremap[0])); + + LOGINFO("\033[31mtiming level %d [adv_panphasia_cell_properties2]: %f s \033[0m", level, get_wtime() - tp); + tp = get_wtime(); + + ///////////////////////////////////////////////////////////////////////// + // transform and convolve with Legendres + + forward_transform_field(pr1, nxremap[0], nxremap[1], nxremap[2]); + forward_transform_field(pr2, nxremap[0], nxremap[1], nxremap[2]); + forward_transform_field(pr3, nxremap[0], nxremap[1], nxremap[2]); + forward_transform_field(pr4, nxremap[0], nxremap[1], nxremap[2]); + +#pragma omp parallel for + for (int i = 0; i < nxremap[0]; i++) + for (int j = 0; j < nxremap[1]; j++) + for (int k = 0; k < nxremap[2] / 2 + 1; k++) { + size_t idx = ((size_t)i * nxremap[1] + (size_t)j) * (nxremap[2] / 2 + 1) + (size_t)k; + + double fx(1.0), fy(1.0), fz(1.0), arg = 0.; + complex gx(0., 0.), gy(0., 0.), gz(0., 0.); + + int ii(i), jj(j), kk(k); + if (i > nxremap[0] / 2) + ii -= nxremap[0]; + if (j > nxremap[1] / 2) + jj -= nxremap[1]; + + // int kkmax = std::max(abs(ii),std::max(abs(jj),abs(kk))); + + if (ii != 0) { + arg = M_PI * (double)ii / (double)nxremap[0]; + fx = sin(arg) / arg; + gx = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); + } else { + fx = 1.0; + gx = 0.0; + } + + if (jj != 0) { + arg = M_PI * (double)jj / (double)nxremap[1]; + fy = sin(arg) / arg; + gy = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); + } else { + fy = 1.0; + gy = 0.0; + } + + if (kk != 0) { + arg = M_PI * (double)kk / (double)nxremap[2]; + fz = sin(arg) / arg; + gz = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); + } else { + fz = 1.0; + gz = 0.0; + } + + if (abs(ii) != nxremap[0] / 2 && abs(jj) != nxremap[1] / 2 && + abs(kk) != nxremap[2] / 2) { // kkmax != nxremap[2]/2 ){ + complex x, y1(RE(pc1[idx]), IM(pc1[idx])), y2(RE(pc2[idx]), IM(pc2[idx])), y3(RE(pc3[idx]), IM(pc3[idx])), + y4(RE(pc4[idx]), IM(pc4[idx])); + + x = 3.0 * (y1 * gx * gy * fz + y2 * fx * gy * gz + y3 * gx * fy * gz) + sqrt(27.0) * y4 * gx * gy * gz; + + RE(pc0[idx]) = RE(pc0[idx]) + x.real(); + IM(pc0[idx]) = IM(pc0[idx]) + x.imag(); + } + } + + LOGINFO("\033[31mtiming level %d [build panphasia field2]: %f s\033[0m", level, get_wtime() - tp); + tp = get_wtime(); + + // END + //*************************************************************** + // Compute Panphasia values of p011, p101, p110, p111 coefficients + // and combine with p000, p001, p010, p100 and indep field. + //**************************************************************** + + ///////////////////////////////////////////////////////////////////////// + // do we need to cut off the small scales? + // int nn = 1< %d",nxremap[0],nx_m[0]); + memset(pr1, 0, ngp * sizeof(fftw_real)); + +#pragma omp parallel for + for (int i = 0; i < nxremap[0]; i++) + for (int j = 0; j < nxremap[1]; j++) + for (int k = 0; k < nxremap[2] / 2 + 1; k++) { + + int ii = (i > nxremap[0] / 2) ? i - nxremap[0] : i, jj = (j > nxremap[1] / 2) ? j - nxremap[1] : j, kk = k; + + int ia(abs(ii)), ja(abs(jj)), ka(abs(kk)); + + if (ia < nx_m[0] / 2 && ja < nx_m[1] / 2 && ka < nx_m[2] / 2) { + + size_t idx = ((size_t)(i)*nxremap[1] + (size_t)(j)) * (nxremap[2] / 2 + 1) + (size_t)(k); + + int ir = (ii < 0) ? ii + nx_m[0] : ii, jr = (jj < 0) ? jj + nx_m[1] : jj, kr = kk; // never negative + + size_t idx2 = ((size_t)ir * nx_m[1] + (size_t)jr) * ((size_t)nx_m[2] / 2 + 1) + (size_t)kr; + + + complex x(RE(pc0[idx]),IM(pc0[idx])); + double total_phase_shift; + total_phase_shift = inter_grid_phase_adjustment_ * (double)(ii+jj+kk); + x = x * exp(complex(0.0, total_phase_shift)); + RE(pc1[idx2]) = x.real(); + IM(pc1[idx2]) = x.imag(); + } + } + + memcpy(pr0, pr1, ngp * sizeof(fftw_real)); + } + + + if( level == 9 ){ + LOGINFO("DC mode of level is %g",RE(pc0[0])); + //RE(pc0[0]) = 1e8; + //IM(pc0[0]) = 0.0; + } + + LOGINFO("\033[31mtiming level %d [remap noncongruent]: %f s\033[0m", level, get_wtime() - tp); + tp = get_wtime(); + ///////////////////////////////////////////////////////////////////////// + // transform back + + backward_transform_field(pr0, nx_m[0], nx_m[1], nx_m[2]); + + ///////////////////////////////////////////////////////////////////////// + // copy to random data structure + delete[] pr1; + delete[] pr2; + delete[] pr3; + delete[] pr4; + + LOGINFO("Copying random field data %d,%d,%d -> %d,%d,%d", nxremap[0], nxremap[1], nxremap[2], nx[0], nx[1], nx[2]); + + // n = 1< = %g | var(p) = %g", level, sum, sum2); +} + +namespace { +RNG_plugin_creator_concrete creator("PANPHASIA"); +} + +#endif From bd12e40e227facc2d53ae381bdc292821161408d Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 12 Feb 2023 14:42:51 -0800 Subject: [PATCH 2/5] WIP panphasia integration. compiles and runs, but output not tested --- src/convolution_kernel.hh | 3 +- src/densities.cc | 5 +- src/densities.hh | 284 +-- src/density_grid.hh | 297 +++ src/main.cc | 35 +- src/plugins/random_music.cc | 613 +++++- src/plugins/random_music_wnoise_generator.cc | 895 +++++++++ src/plugins/random_music_wnoise_generator.hh | 205 ++ src/plugins/random_panphasia.cc | 387 ++-- src/random.cc | 1837 +----------------- src/random.hh | 371 +--- 11 files changed, 2288 insertions(+), 2644 deletions(-) create mode 100644 src/density_grid.hh create mode 100644 src/plugins/random_music_wnoise_generator.cc create mode 100644 src/plugins/random_music_wnoise_generator.hh diff --git a/src/convolution_kernel.hh b/src/convolution_kernel.hh index 081c6e9..4fe13cd 100644 --- a/src/convolution_kernel.hh +++ b/src/convolution_kernel.hh @@ -4,7 +4,7 @@ a code to generate multi-scale initial conditions for cosmological simulations - Copyright (C) 2010-19 Oliver Hahn + Copyright (C) 2010-23 Oliver Hahn */ @@ -16,6 +16,7 @@ #include "config_file.hh" #include "densities.hh" +#include "density_grid.hh" #include "transfer_function.hh" #define ACC_RF(i, j, k) (((((size_t)(i) + nx) % nx) * ny + (((size_t)(j) + ny) % ny)) * 2 * (nz / 2 + 1) + (((size_t)(k) + nz) % nz)) diff --git a/src/densities.cc b/src/densities.cc index 6f68737..fde1938 100644 --- a/src/densities.cc +++ b/src/densities.cc @@ -11,6 +11,7 @@ #include #include "densities.hh" +#include "random.hh" #include "convolution_kernel.hh" //TODO: this should be a larger number by default, just to maintain consistency with old default @@ -335,7 +336,7 @@ void fft_interpolate(m1 &V, m2 &v, bool from_basegrid = false) /*******************************************************************************************/ void GenerateDensityUnigrid(config_file &cf, transfer_function *ptf, tf_type type, - refinement_hierarchy &refh, rand_gen &rand, grid_hierarchy &delta, bool smooth, bool shift) + refinement_hierarchy &refh, noise_generator &rand, grid_hierarchy &delta, bool smooth, bool shift) { unsigned levelmin, levelmax, levelminPoisson; @@ -416,7 +417,7 @@ void GenerateDensityUnigrid(config_file &cf, transfer_function *ptf, tf_type typ /*******************************************************************************************/ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type type, - refinement_hierarchy &refh, rand_gen &rand, + refinement_hierarchy &refh, noise_generator &rand, grid_hierarchy &delta, bool smooth, bool shift) { unsigned levelmin, levelmax, levelminPoisson; diff --git a/src/densities.hh b/src/densities.hh index 6074dbe..a789515 100644 --- a/src/densities.hh +++ b/src/densities.hh @@ -15,298 +15,20 @@ #include "general.hh" #include "config_file.hh" +// #include "density_grid.hh" #include "random.hh" #include "cosmology.hh" #include "transfer_function.hh" #include "general.hh" void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type type, - refinement_hierarchy &refh, rand_gen &rand, grid_hierarchy &delta, bool smooth, bool shift); + refinement_hierarchy &refh, noise_generator &rand, grid_hierarchy &delta, bool smooth, bool shift); void GenerateDensityUnigrid(config_file &cf, transfer_function *ptf, tf_type type, - refinement_hierarchy &refh, rand_gen &rand, grid_hierarchy &delta, bool smooth, bool shift); + refinement_hierarchy &refh, noise_generator &rand, grid_hierarchy &delta, bool smooth, bool shift); void normalize_density(grid_hierarchy &delta); -/*! - * @class DensityGrid - * @brief provides infrastructure for computing the initial density field - * - * This class provides access and data management member functions that - * are used when computing the initial density field by convolution with - * transfer functions. - */ -template -class DensityGrid -{ -public: - size_t nx_; //!< number of grid cells in x-direction - size_t ny_; //!< number of grid cells in y-direction - size_t nz_; //!< number of grid cells in z-direction - size_t nzp_; //!< number of cells in memory (z-dir), used for Nyquist padding - - size_t nv_[3]; - - int ox_; //!< offset of grid in x-direction - int oy_; //!< offset of grid in y-direction - int oz_; //!< offset of grid in z-direction - - size_t ov_[3]; - - //! the actual data container in the form of a 1D array - std::vector data_; - - //! constructor - /*! constructs an instance given the dimensions of the density field - * @param nx the number of cells in x - * @param ny the number of cells in y - * @param nz the number of cells in z - */ - DensityGrid(unsigned nx, unsigned ny, unsigned nz) - : nx_(nx), ny_(ny), nz_(nz), nzp_(2 * (nz_ / 2 + 1)), ox_(0), oy_(0), oz_(0) - { - data_.assign((size_t)nx_ * (size_t)ny_ * (size_t)nzp_, 0.0); - nv_[0] = nx_; - nv_[1] = ny_; - nv_[2] = nz_; - ov_[0] = ox_; - ov_[1] = oy_; - ov_[2] = oz_; - } - - DensityGrid(unsigned nx, unsigned ny, unsigned nz, int ox, int oy, int oz) - : nx_(nx), ny_(ny), nz_(nz), nzp_(2 * (nz_ / 2 + 1)), ox_(ox), oy_(oy), oz_(oz) - { - data_.assign((size_t)nx_ * (size_t)ny_ * (size_t)nzp_, 0.0); - nv_[0] = nx_; - nv_[1] = ny_; - nv_[2] = nz_; - ov_[0] = ox_; - ov_[1] = oy_; - ov_[2] = oz_; - } - - //! copy constructor - explicit DensityGrid(const DensityGrid &g) - : nx_(g.nx_), ny_(g.ny_), nz_(g.nz_), nzp_(g.nzp_), - ox_(g.ox_), oy_(g.oy_), oz_(g.oz_) - { - data_ = g.data_; - nv_[0] = nx_; - nv_[1] = ny_; - nv_[2] = nz_; - ov_[0] = ox_; - ov_[1] = oy_; - ov_[2] = oz_; - } - - //! destructor - ~DensityGrid() - { - } - - //! clears the density object - /*! sets all dimensions to zero and frees the memory - */ - void clear(void) - { - nx_ = ny_ = nz_ = nzp_ = 0; - ox_ = oy_ = oz_ = 0; - nv_[0] = nv_[1] = nv_[2] = 0; - ov_[0] = ov_[1] = ov_[2] = 0; - - data_.clear(); - std::vector().swap(data_); - } - - //! query the 3D array sizes of the density object - /*! returns the size of the 3D density object along a specified dimension - * @param i the dimension for which size is to be returned - * @returns array size along dimension i - */ - size_t size(int i) - { - return nv_[i]; - } - - int offset(int i) - { - return ov_[i]; - } - - //! zeroes the density object - /*! sets all values to 0.0 - */ - void zero(void) - { - data_.assign(data_.size(), 0.0); - } - - //! assigns the contents of another DensityGrid to this - DensityGrid &operator=(const DensityGrid &g) - { - nx_ = g.nx_; - ny_ = g.ny_; - nz_ = g.nz_; - nzp_ = g.nzp_; - ox_ = g.ox_; - oy_ = g.oy_; - oz_ = g.oz_; - data_ = g.data_; - - return *this; - } - - //! 3D index based data access operator - inline real_t &operator()(size_t i, size_t j, size_t k) - { - return data_[((size_t)i * ny_ + (size_t)j) * nzp_ + (size_t)k]; - } - - //! 3D index based const data access operator - inline const real_t &operator()(size_t i, size_t j, size_t k) const - { - return data_[((size_t)i * ny_ + (size_t)j) * nzp_ + (size_t)k]; - } - - //! recover the pointer to the 1D data array - inline real_t *get_data_ptr(void) - { - return &data_[0]; - } - - //! fills the density field with random number values - /*! given a pointer to a random_numbers object, fills the field with random values - * @param prc pointer to a random_numbers object - * @param variance the variance of the random numbers (the values returned by prc are multiplied by this) - * @param i0 x-offset (shift) in cells of the density field with respect to the random number field - * @param j0 y-offset (shift) in cells of the density field with respect to the random number field - * @param k0 z-offset (shift) in cells of the density field with respect to the random number field - * @param setzero boolean, if true, the global mean will be subtracted - */ - void fill_rand(/*const*/ random_numbers *prc, real_t variance, int i0, int j0, int k0, bool setzero = false) - { - long double sum = 0.0; - -#pragma omp parallel for reduction(+ \ - : sum) - for (int i = 0; i < nx_; ++i) - for (int j = 0; j < ny_; ++j) - for (int k = 0; k < nz_; ++k) - { - (*this)(i, j, k) = (*prc)(i0 + i, j0 + j, k0 + k) * variance; - sum += (*this)(i, j, k); - } - - sum /= nx_ * ny_ * nz_; - - if (setzero) - { -#pragma omp parallel for - for (int i = 0; i < nx_; ++i) - for (int j = 0; j < ny_; ++j) - for (int k = 0; k < nz_; ++k) - (*this)(i, j, k) -= sum; - } - } - - //! copies the data from another field with 3D index-based access operator - template - void copy(array3 &v) - { -#pragma omp parallel for - for (int ix = 0; ix < (int)nx_; ++ix) - for (int iy = 0; iy < (int)ny_; ++iy) - for (int iz = 0; iz < (int)nz_; ++iz) - v(ix, iy, iz) = (*this)(ix, iy, iz); - } - - //! adds the data from another field with 3D index-based access operator - template - void copy_add(array3 &v) - { -#pragma omp parallel for - for (int ix = 0; ix < (int)nx_; ++ix) - for (int iy = 0; iy < (int)ny_; ++iy) - for (int iz = 0; iz < (int)nz_; ++iz) - v(ix, iy, iz) += (*this)(ix, iy, iz); - } -}; - -template -class PaddedDensitySubGrid : public DensityGrid -{ -public: - using DensityGrid::nx_; - using DensityGrid::ny_; - using DensityGrid::nz_; - using DensityGrid::ox_; - using DensityGrid::oy_; - using DensityGrid::oz_; - using DensityGrid::data_; - - std::array pad_; - - using DensityGrid::fill_rand; - using DensityGrid::get_data_ptr; - -public: - PaddedDensitySubGrid(int ox, int oy, int oz, unsigned nx, unsigned ny, unsigned nz ) - : DensityGrid(nx*2, ny*2, nz*2, ox, oy, oz), - pad_{{ nx / 2, ny / 2, nz / 2 }} - { } - - PaddedDensitySubGrid(int ox, int oy, int oz, unsigned nx, unsigned ny, unsigned nz, unsigned padx, unsigned pady, unsigned padz ) - : DensityGrid(nx + 2 * padx, ny + 2 * pady, nz + 2 * padz, ox, oy, oz), - pad_{{ padx, pady, padz }} - { } - - PaddedDensitySubGrid(const PaddedDensitySubGrid &o) - : DensityGrid(o) - { } - - size_t margin(int i) const - { - return pad_[i]; - } - - template - void copy_unpad(array3 &v) - { - #pragma omp parallel for - for (size_t ix = pad_[0]; ix < nx_-pad_[0]; ++ix){ - const size_t ixu = ix - pad_[0]; - for (size_t iy = pad_[1], iyu = 0; iy < ny_-pad_[1]; ++iy, ++iyu) - for (size_t iz = pad_[2], izu = 0; iz < nz_-pad_[2]; ++iz, ++izu) - v(ixu, iyu, izu) = (*this)(ix, iy, iz); - } - } - - template - void copy_add_unpad(array3 &v) - { - #pragma omp parallel for - for (size_t ix = pad_[0]; ix < nx_-pad_[0]; ++ix){ - const size_t ixu = ix - pad_[0]; - for (size_t iy = pad_[1], iyu = 0; iy < ny_-pad_[1]; ++iy, ++iyu) - for (size_t iz = pad_[2], izu = 0; iz < nz_-pad_[2]; ++iz, ++izu) - v(ixu, iyu, izu) += (*this)(ix, iy, iz); - } - } - - template - void copy_subtract_unpad(array3 &v) - { - #pragma omp parallel for - for (size_t ix = pad_[0]; ix < nx_-pad_[0]; ++ix){ - const size_t ixu = ix - pad_[0]; - for (size_t iy = pad_[1], iyu = 0; iy < ny_-pad_[1]; ++iy, ++iyu) - for (size_t iz = pad_[2], izu = 0; iz < nz_-pad_[2]; ++iz, ++izu) - v(ixu, iyu, izu) -= (*this)(ix, iy, iz); - } - } -}; - void coarsen_density(const refinement_hierarchy &rh, GridHierarchy &u, bool kspace); #endif diff --git a/src/density_grid.hh b/src/density_grid.hh new file mode 100644 index 0000000..d1119aa --- /dev/null +++ b/src/density_grid.hh @@ -0,0 +1,297 @@ +#pragma once + +#include +#include + +/*! + * @class DensityGrid + * @brief provides infrastructure for computing the initial density field + * + * This class provides access and data management member functions that + * are used when computing the initial density field by convolution with + * transfer functions. + */ +template +class DensityGrid +{ +public: + size_t nx_; //!< number of grid cells in x-direction + size_t ny_; //!< number of grid cells in y-direction + size_t nz_; //!< number of grid cells in z-direction + size_t nzp_; //!< number of cells in memory (z-dir), used for Nyquist padding + + std::array nv_; + + int ox_; //!< offset of grid in x-direction + int oy_; //!< offset of grid in y-direction + int oz_; //!< offset of grid in z-direction + + std::array ov_; + + //! the actual data container in the form of a 1D array + std::vector data_; + + //! constructor + /*! constructs an instance given the dimensions of the density field + * @param nx the number of cells in x + * @param ny the number of cells in y + * @param nz the number of cells in z + */ + DensityGrid(unsigned nx, unsigned ny, unsigned nz) + : nx_(nx), ny_(ny), nz_(nz), nzp_(2 * (nz_ / 2 + 1)), ox_(0), oy_(0), oz_(0) + { + data_.assign((size_t)nx_ * (size_t)ny_ * (size_t)nzp_, 0.0); + nv_ = {nx_,ny_,nz_}; + ov_ = {ox_,oy_,oz_}; + // nv_[0] = nx_; + // nv_[1] = ny_; + // nv_[2] = nz_; + // ov_[0] = ox_; + // ov_[1] = oy_; + // ov_[2] = oz_; + } + + DensityGrid(unsigned nx, unsigned ny, unsigned nz, int ox, int oy, int oz) + : nx_(nx), ny_(ny), nz_(nz), nzp_(2 * (nz_ / 2 + 1)), ox_(ox), oy_(oy), oz_(oz) + { + data_.assign((size_t)nx_ * (size_t)ny_ * (size_t)nzp_, 0.0); + nv_ = {nx_,ny_,nz_}; + ov_ = {ox_,oy_,oz_}; + // nv_[0] = nx_; + // nv_[1] = ny_; + // nv_[2] = nz_; + // ov_[0] = ox_; + // ov_[1] = oy_; + // ov_[2] = oz_; + } + + //! copy constructor + explicit DensityGrid(const DensityGrid &g) + : nx_(g.nx_), ny_(g.ny_), nz_(g.nz_), nzp_(g.nzp_), + ox_(g.ox_), oy_(g.oy_), oz_(g.oz_) + { + data_ = g.data_; + nv_ = {nx_,ny_,nz_}; + ov_ = {ox_,oy_,oz_}; + // nv_[0] = nx_; + // nv_[1] = ny_; + // nv_[2] = nz_; + // ov_[0] = ox_; + // ov_[1] = oy_; + // ov_[2] = oz_; + } + + //! destructor + ~DensityGrid() + { + } + + //! clears the density object + /*! sets all dimensions to zero and frees the memory + */ + void clear(void) + { + nx_ = ny_ = nz_ = nzp_ = 0; + ox_ = oy_ = oz_ = 0; + nv_[0] = nv_[1] = nv_[2] = 0; + ov_[0] = ov_[1] = ov_[2] = 0; + + data_.clear(); + std::vector().swap(data_); + } + + //! query the 3D array sizes of the density object + /*! returns the size of the 3D density object along a specified dimension + * @param i the dimension for which size is to be returned + * @returns array size along dimension i + */ + size_t size(int i) + { + return nv_[i]; + } + + int offset(int i) + { + return ov_[i]; + } + + //! zeroes the density object + /*! sets all values to 0.0 + */ + void zero(void) + { + data_.assign(data_.size(), 0.0); + } + + //! assigns the contents of another DensityGrid to this + DensityGrid &operator=(const DensityGrid &g) + { + nx_ = g.nx_; + ny_ = g.ny_; + nz_ = g.nz_; + nzp_ = g.nzp_; + ox_ = g.ox_; + oy_ = g.oy_; + oz_ = g.oz_; + data_ = g.data_; + + return *this; + } + + //! 3D index based data access operator + inline real_t &operator()(size_t i, size_t j, size_t k) + { + return data_[((size_t)i * ny_ + (size_t)j) * nzp_ + (size_t)k]; + } + + //! 3D index based const data access operator + inline const real_t &operator()(size_t i, size_t j, size_t k) const + { + return data_[((size_t)i * ny_ + (size_t)j) * nzp_ + (size_t)k]; + } + + //! recover the pointer to the 1D data array + inline real_t *get_data_ptr(void) + { + return &data_[0]; + } + +#if 0 + //! fills the density field with random number values + /*! given a pointer to a random_numbers object, fills the field with random values + * @param prc pointer to a random_numbers object + * @param variance the variance of the random numbers (the values returned by prc are multiplied by this) + * @param i0 x-offset (shift) in cells of the density field with respect to the random number field + * @param j0 y-offset (shift) in cells of the density field with respect to the random number field + * @param k0 z-offset (shift) in cells of the density field with respect to the random number field + * @param setzero boolean, if true, the global mean will be subtracted + */ + void fill_rand(/*const*/ random_numbers *prc, real_t variance, int i0, int j0, int k0, bool setzero = false) + { + long double sum = 0.0; + +#pragma omp parallel for reduction(+ \ + : sum) + for (int i = 0; i < nx_; ++i) + for (int j = 0; j < ny_; ++j) + for (int k = 0; k < nz_; ++k) + { + (*this)(i, j, k) = (*prc)(i0 + i, j0 + j, k0 + k) * variance; + sum += (*this)(i, j, k); + } + + sum /= nx_ * ny_ * nz_; + + if (setzero) + { +#pragma omp parallel for + for (int i = 0; i < nx_; ++i) + for (int j = 0; j < ny_; ++j) + for (int k = 0; k < nz_; ++k) + (*this)(i, j, k) -= sum; + } + } +#endif + + //! copies the data from another field with 3D index-based access operator + template + void copy(array3 &v) + { +#pragma omp parallel for + for (int ix = 0; ix < (int)nx_; ++ix) + for (int iy = 0; iy < (int)ny_; ++iy) + for (int iz = 0; iz < (int)nz_; ++iz) + v(ix, iy, iz) = (*this)(ix, iy, iz); + } + + //! adds the data from another field with 3D index-based access operator + template + void copy_add(array3 &v) + { +#pragma omp parallel for + for (int ix = 0; ix < (int)nx_; ++ix) + for (int iy = 0; iy < (int)ny_; ++iy) + for (int iz = 0; iz < (int)nz_; ++iz) + v(ix, iy, iz) += (*this)(ix, iy, iz); + } +}; + +template +class PaddedDensitySubGrid : public DensityGrid +{ +public: + using DensityGrid::nx_; + using DensityGrid::ny_; + using DensityGrid::nz_; + using DensityGrid::ox_; + using DensityGrid::oy_; + using DensityGrid::oz_; + using DensityGrid::data_; + + std::array pad_; + + // using DensityGrid::fill_rand; + using DensityGrid::get_data_ptr; + +public: + PaddedDensitySubGrid(int ox, int oy, int oz, unsigned nx, unsigned ny, unsigned nz) + : DensityGrid(nx * 2, ny * 2, nz * 2, ox, oy, oz), + pad_{{nx / 2, ny / 2, nz / 2}} + { + } + + PaddedDensitySubGrid(int ox, int oy, int oz, unsigned nx, unsigned ny, unsigned nz, unsigned padx, unsigned pady, unsigned padz) + : DensityGrid(nx + 2 * padx, ny + 2 * pady, nz + 2 * padz, ox, oy, oz), + pad_{{padx, pady, padz}} + { + } + + PaddedDensitySubGrid(const PaddedDensitySubGrid &o) + : DensityGrid(o) + { + } + + size_t margin(int i) const + { + return pad_[i]; + } + + template + void copy_unpad(array3 &v) + { +#pragma omp parallel for + for (size_t ix = pad_[0]; ix < nx_ - pad_[0]; ++ix) + { + const size_t ixu = ix - pad_[0]; + for (size_t iy = pad_[1], iyu = 0; iy < ny_ - pad_[1]; ++iy, ++iyu) + for (size_t iz = pad_[2], izu = 0; iz < nz_ - pad_[2]; ++iz, ++izu) + v(ixu, iyu, izu) = (*this)(ix, iy, iz); + } + } + + template + void copy_add_unpad(array3 &v) + { +#pragma omp parallel for + for (size_t ix = pad_[0]; ix < nx_ - pad_[0]; ++ix) + { + const size_t ixu = ix - pad_[0]; + for (size_t iy = pad_[1], iyu = 0; iy < ny_ - pad_[1]; ++iy, ++iyu) + for (size_t iz = pad_[2], izu = 0; iz < nz_ - pad_[2]; ++iz, ++izu) + v(ixu, iyu, izu) += (*this)(ix, iy, iz); + } + } + + template + void copy_subtract_unpad(array3 &v) + { +#pragma omp parallel for + for (size_t ix = pad_[0]; ix < nx_ - pad_[0]; ++ix) + { + const size_t ixu = ix - pad_[0]; + for (size_t iy = pad_[1], iyu = 0; iy < ny_ - pad_[1]; ++iy, ++iyu) + for (size_t iz = pad_[2], izu = 0; iz < nz_ - pad_[2]; ++iz, ++izu) + v(ixu, iyu, izu) -= (*this)(ix, iy, iz); + } + } +}; \ No newline at end of file diff --git a/src/main.cc b/src/main.cc index 77b095a..2f03d2f 100644 --- a/src/main.cc +++ b/src/main.cc @@ -43,7 +43,7 @@ extern "C" #include "transfer_function.hh" #define THE_CODE_NAME "music!" -#define THE_CODE_VERSION "1.53" +#define THE_CODE_VERSION "2.0a" namespace music { @@ -249,7 +249,7 @@ double compute_finest_sigma(grid_hierarchy &u) return sqrt(sum2 - sum * sum); } -double compute_finest_max(grid_hierarchy &u) +double compute_finest_absmax(grid_hierarchy &u) { double valmax = 0.0; #pragma omp parallel for reduction(max:valmax) @@ -257,8 +257,8 @@ double compute_finest_max(grid_hierarchy &u) for (int iy = 0; iy < (int)(*u.get_grid(u.levelmax())).size(1); ++iy) for (int iz = 0; iz < (int)(*u.get_grid(u.levelmax())).size(2); ++iz) { - if (fabs((*u.get_grid(u.levelmax()))(ix, iy, iz)) > fabs(valmax)) - valmax = (*u.get_grid(u.levelmax()))(ix, iy, iz); + if (std::fabs((*u.get_grid(u.levelmax()))(ix, iy, iz)) > valmax) + valmax = std::fabs((*u.get_grid(u.levelmax()))(ix, iy, iz)); } return valmax; @@ -299,7 +299,6 @@ void add_constant_value( grid_hierarchy &u, const double val ) /*****************************************************************************************************/ region_generator_plugin *the_region_generator; -RNG_plugin *the_random_number_generator; int main(int argc, const char *argv[]) { @@ -461,8 +460,7 @@ int main(int argc, const char *argv[]) } the_region_generator = select_region_generator_plugin(cf); - - the_random_number_generator = select_RNG_plugin(cf); + //------------------------------------------------------------------------------ //... determine run parameters //------------------------------------------------------------------------------ @@ -472,6 +470,12 @@ int main(int argc, const char *argv[]) << " distinct amplitudes for baryon and DM fields!\n" << " Perturbation amplitudes will be identical!" << std::endl; + + //------------------------------------------------------------------------------ + //... start up the random number generator plugin + //... see if we need to set some grid building constraints + noise_generator rand( cf, the_transfer_function_plugin ); + //------------------------------------------------------------------------------ //... determine the refinement hierarchy //------------------------------------------------------------------------------ @@ -505,7 +509,8 @@ int main(int argc, const char *argv[]) std::cout << " GENERATING WHITE NOISE\n"; std::cout << "-------------------------------------------------------------\n"; LOGUSER("Computing white noise..."); - rand_gen rand(cf, rh_TF, the_transfer_function_plugin); + // rand_gen rand(cf, rh_TF, the_transfer_function_plugin); + rand.initialize_for_grid_structure( rh_TF ); //------------------------------------------------------------------------------ //... initialize the Poisson solver @@ -608,7 +613,7 @@ int main(int argc, const char *argv[]) else //... displacement the_poisson_solver->gradient(icoord, u, data_forIO); - double dispmax = compute_finest_max(data_forIO); + double dispmax = compute_finest_absmax(data_forIO); LOGINFO("max. %c-displacement of HR particles is %f [mean dx]", 'x' + icoord, dispmax * (double)(1ll << data_forIO.levelmax())); coarsen_density(rh_Poisson, data_forIO, false); @@ -745,7 +750,7 @@ int main(int argc, const char *argv[]) LOGINFO("mean of %c-velocity of high-res particles is %f", 'x' + icoord, meanv); LOGUSER("mean of %c-velocity of high-res particles is %f", 'x' + icoord, meanv); - double maxv = compute_finest_max(data_forIO); + double maxv = compute_finest_absmax(data_forIO); LOGINFO("max of abs of %c-velocity of high-res particles is %f", 'x' + icoord, maxv); coarsen_density(rh_Poisson, data_forIO, false); @@ -815,7 +820,7 @@ int main(int argc, const char *argv[]) LOGINFO("mean of %c-velocity of high-res particles is %f", 'x' + icoord, meanv); LOGUSER("mean of %c-velocity of high-res particles is %f", 'x' + icoord, meanv); - double maxv = compute_finest_max(data_forIO); + double maxv = compute_finest_absmax(data_forIO); LOGINFO("max of abs of %c-velocity of high-res particles is %f", 'x' + icoord, maxv); coarsen_density(rh_Poisson, data_forIO, false); @@ -874,7 +879,7 @@ int main(int argc, const char *argv[]) LOGINFO("mean of %c-velocity of high-res baryons is %f", 'x' + icoord, meanv); LOGUSER("mean of %c-velocity of high-res baryons is %f", 'x' + icoord, meanv); - double maxv = compute_finest_max(data_forIO); + double maxv = compute_finest_absmax(data_forIO); LOGINFO("max of abs of %c-velocity of high-res baryons is %f", 'x' + icoord, maxv); coarsen_density(rh_Poisson, data_forIO, false); @@ -993,7 +998,7 @@ int main(int argc, const char *argv[]) LOGINFO("mean of %c-velocity of high-res particles is %f", 'x' + icoord, meanv); LOGUSER("mean of %c-velocity of high-res particles is %f", 'x' + icoord, meanv); - double maxv = compute_finest_max(data_forIO); + double maxv = compute_finest_absmax(data_forIO); LOGINFO("max of abs of %c-velocity of high-res particles is %f", 'x' + icoord, maxv); std::cerr << " - velocity component " << icoord << " : sigma = " << sigv << std::endl; @@ -1091,7 +1096,7 @@ int main(int argc, const char *argv[]) LOGINFO("mean of %c-velocity of high-res baryons is %f", 'x' + icoord, meanv); LOGUSER("mean of %c-velocity of high-res baryons is %f", 'x' + icoord, meanv); - double maxv = compute_finest_max(data_forIO); + double maxv = compute_finest_absmax(data_forIO); LOGINFO("max of abs of %c-velocity of high-res baryons is %f", 'x' + icoord, maxv); std::cerr << " - velocity component " << icoord << " : sigma = " << sigv << std::endl; @@ -1199,7 +1204,7 @@ int main(int argc, const char *argv[]) else the_poisson_solver->gradient(icoord, u1, data_forIO); - double dispmax = compute_finest_max(data_forIO); + double dispmax = compute_finest_absmax(data_forIO); LOGINFO("max. %c-displacement of HR particles is %f [mean dx]", 'x' + icoord, dispmax * (double)(1ll << data_forIO.levelmax())); coarsen_density(rh_Poisson, data_forIO, false); diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc index 3ec1689..0bfdf6b 100644 --- a/src/plugins/random_music.cc +++ b/src/plugins/random_music.cc @@ -1,20 +1,607 @@ #include "random.hh" +#include "random_music_wnoise_generator.hh" + +typedef music_wnoise_generator rng; + +class RNG_music : public RNG_plugin +{ +protected: + std::vector rngseeds_; + std::vector rngfnames_; + unsigned ran_cube_size_; + + int levelmin_, levelmax_, levelmin_seed_; + + bool disk_cached_; + bool restart_; + bool initialized_; + + std::vector *> mem_cache_; + + //! checks if the specified string is numeric + bool is_number(const std::string &s); + + //! parses the random number parameters in the conf file + void parse_random_parameters(void); + + //! computes the white noise fields and keeps them either in memory or on disk + void compute_random_numbers(void); + + //! adjusts averages + void correct_avg(int icoarse, int ifine); + + //! store the white noise fields in memory or on disk + void store_rnd(int ilevel, rng *prng); -class RNG_music : public RNG_plugin{ public: - explicit RNG_music( config_file& cf ) - : RNG_plugin( cf ) - { } - - ~RNG_music() { } - - bool is_multiscale() const - { return true; } + explicit RNG_music(config_file &cf) : RNG_plugin(cf), initialized_(false) {} + + ~RNG_music() {} + + bool is_multiscale() const { return true; } + + void initialize_for_grid_structure(const refinement_hierarchy &refh) + { + prefh_ = &refh; + levelmin_ = prefh_->levelmin(); + levelmax_ = prefh_->levelmax(); + + ran_cube_size_ = pcf_->getValueSafe("random", "cubesize", DEF_RAN_CUBE_SIZE); + disk_cached_ = pcf_->getValueSafe("random", "disk_cached", true); + restart_ = pcf_->getValueSafe("random", "restart", false); + + mem_cache_.assign(levelmax_ - levelmin_ + 1, (std::vector *)NULL); + + if (restart_ && !disk_cached_) + { + LOGERR("Cannot restart from mem cached random numbers."); + throw std::runtime_error("Cannot restart from mem cached random numbers."); + } + + //... determine seed/white noise file data to be applied + parse_random_parameters(); + + if (!restart_) + { + //... compute the actual random numbers + compute_random_numbers(); + } + + initialized_ = true; + } + + void fill_grid(int level, DensityGrid &R); }; +bool RNG_music::is_number(const std::string &s) +{ + for (size_t i = 0; i < s.length(); i++) + if (!std::isdigit(s[i]) && s[i] != '-') + return false; -namespace{ - RNG_plugin_creator_concrete< RNG_music > creator("MUSIC"); - - + return true; +} + +void RNG_music::parse_random_parameters(void) +{ + //... parse random number options + for (int i = 0; i <= 100; ++i) + { + char seedstr[128]; + std::string tempstr; + bool noseed = false; + sprintf(seedstr, "seed[%d]", i); + if (pcf_->containsKey("random", seedstr)) + tempstr = pcf_->getValue("random", seedstr); + else + { + // "-2" means that no seed entry was found for that level + tempstr = std::string("-2"); + noseed = true; + } + + if (is_number(tempstr)) + { + long ltemp; + pcf_->convert(tempstr, ltemp); + rngfnames_.push_back(""); + if (noseed) // ltemp < 0 ) + //... generate some dummy seed which only depends on the level, negative so we know it's not + //... an actual seed and thus should not be used as a constraint for coarse levels + // rngseeds_.push_back( -abs((unsigned)(ltemp-i)%123+(unsigned)(ltemp+827342523521*i)%123456789) ); + rngseeds_.push_back(-abs((long)(ltemp - i) % 123 + (long)(ltemp + 7342523521 * i) % 123456789)); + else + { + if (ltemp <= 0) + { + LOGERR("Specified seed [random]/%s needs to be a number >0!", seedstr); + throw std::runtime_error("Seed values need to be >0"); + } + rngseeds_.push_back(ltemp); + } + } + else + { + rngfnames_.push_back(tempstr); + rngseeds_.push_back(-1); + LOGINFO("Random numbers for level %3d will be read from file.", i); + } + } + + //.. determine for which levels random seeds/random number files are given + levelmin_seed_ = -1; + for (unsigned ilevel = 0; ilevel < rngseeds_.size(); ++ilevel) + { + if (levelmin_seed_ < 0 && (rngfnames_[ilevel].size() > 0 || rngseeds_[ilevel] >= 0)) + levelmin_seed_ = ilevel; + } +} + +void RNG_music::compute_random_numbers(void) +{ + bool rndsign = pcf_->getValueSafe("random", "grafic_sign", false); + + std::vector randc(std::max(levelmax_, levelmin_seed_) + 1, (rng *)NULL); + + //--- FILL ALL WHITE NOISE ARRAYS FOR WHICH WE NEED THE FULL FIELD ---// + + //... seeds are given for a level coarser than levelmin + if (levelmin_seed_ < levelmin_) + { + if (rngfnames_[levelmin_seed_].size() > 0) + randc[levelmin_seed_] = new rng(1 << levelmin_seed_, rngfnames_[levelmin_seed_], rndsign); + else + randc[levelmin_seed_] = new rng(1 << levelmin_seed_, ran_cube_size_, rngseeds_[levelmin_seed_]); + + for (int i = levelmin_seed_ + 1; i <= levelmin_; ++i) + { + // #warning add possibility to read noise from file also here! + + if (rngfnames_[i].size() > 0) + LOGINFO("Warning: Cannot use filenames for higher levels currently! Ignoring!"); + + randc[i] = new rng(*randc[i - 1], ran_cube_size_, rngseeds_[i]); + delete randc[i - 1]; + randc[i - 1] = NULL; + } + } + + //... seeds are given for a level finer than levelmin, obtain by averaging + if (levelmin_seed_ > levelmin_) + { + if (rngfnames_[levelmin_seed_].size() > 0) + randc[levelmin_seed_] = new rng(1 << levelmin_seed_, rngfnames_[levelmin_seed_], rndsign); + else + randc[levelmin_seed_] = + new rng(1 << levelmin_seed_, ran_cube_size_, rngseeds_[levelmin_seed_]); //, x0, lx ); + + for (int ilevel = levelmin_seed_ - 1; ilevel >= (int)levelmin_; --ilevel) + { + if (rngseeds_[ilevel - levelmin_] > 0) + LOGINFO("Warning: random seed for level %d will be ignored.\n" + " consistency requires that it is obtained by restriction from level %d", + ilevel, levelmin_seed_); + + randc[ilevel] = new rng(*randc[ilevel + 1]); + + if (ilevel + 1 > levelmax_) + { + delete randc[ilevel + 1]; + randc[ilevel + 1] = NULL; + } + } + } + + //--- GENERATE AND STORE ALL LEVELS, INCLUDING REFINEMENTS ---// + + //... levelmin + if (randc[levelmin_] == NULL) + { + if (rngfnames_[levelmin_].size() > 0) + randc[levelmin_] = new rng(1 << levelmin_, rngfnames_[levelmin_], rndsign); + else + randc[levelmin_] = new rng(1 << levelmin_, ran_cube_size_, rngseeds_[levelmin_]); + } + + store_rnd(levelmin_, randc[levelmin_]); + + //... refinement levels + for (int ilevel = levelmin_ + 1; ilevel <= levelmax_; ++ilevel) + { + int lx[3], x0[3]; + int shift[3], levelmin_poisson; + shift[0] = pcf_->getValue("setup", "shift_x"); + shift[1] = pcf_->getValue("setup", "shift_y"); + shift[2] = pcf_->getValue("setup", "shift_z"); + + levelmin_poisson = pcf_->getValue("setup", "levelmin"); + + int lfac = 1 << (ilevel - levelmin_poisson); + + std::array margin; + if( prefh_->get_margin()>0 ){ + margin[0] = prefh_->get_margin(); + margin[1] = prefh_->get_margin(); + margin[2] = prefh_->get_margin(); + }else{ + margin[0] = prefh_->size(ilevel, 0)/2; + margin[1] = prefh_->size(ilevel, 1)/2; + margin[2] = prefh_->size(ilevel, 2)/2; + } + + lx[0] = prefh_->size(ilevel, 0) + 2 * margin[0]; + lx[1] = prefh_->size(ilevel, 1) + 2 * margin[1]; + lx[2] = prefh_->size(ilevel, 2) + 2 * margin[2]; + x0[0] = prefh_->offset_abs(ilevel, 0) - lfac * shift[0] - margin[0]; + x0[1] = prefh_->offset_abs(ilevel, 1) - lfac * shift[1] - margin[1]; + x0[2] = prefh_->offset_abs(ilevel, 2) - lfac * shift[2] - margin[2]; + + if (randc[ilevel] == NULL) + randc[ilevel] = + new rng(*randc[ilevel - 1], ran_cube_size_, rngseeds_[ilevel], x0, lx); + delete randc[ilevel - 1]; + randc[ilevel - 1] = NULL; + + //... store numbers + store_rnd(ilevel, randc[ilevel]); + } + + delete randc[levelmax_]; + randc[levelmax_] = NULL; +} + +void RNG_music::store_rnd(int ilevel, rng *prng) +{ + int shift[3], levelmin_poisson; + shift[0] = pcf_->getValue("setup", "shift_x"); + shift[1] = pcf_->getValue("setup", "shift_y"); + shift[2] = pcf_->getValue("setup", "shift_z"); + + levelmin_poisson = pcf_->getValue("setup", "levelmin"); + + int lfac = 1 << (ilevel - levelmin_poisson); + + bool grafic_out = false; + + if (grafic_out) + { + std::vector data; + if (ilevel == levelmin_) + { + int N = 1 << levelmin_; + int i0, j0, k0; + i0 = -lfac * shift[0]; + j0 = -lfac * shift[1]; + k0 = -lfac * shift[2]; + + char fname[128]; + sprintf(fname, "grafic_wnoise_%04d.bin", ilevel); + + LOGUSER("Storing white noise field for grafic in file \'%s\'...", fname); + + std::ofstream ofs(fname, std::ios::binary | std::ios::trunc); + data.assign(N * N, 0.0); + + int blksize = 4 * sizeof(int); + int iseed = 0; + + ofs.write(reinterpret_cast(&blksize), sizeof(int)); + ofs.write(reinterpret_cast(&N), sizeof(int)); + ofs.write(reinterpret_cast(&N), sizeof(int)); + ofs.write(reinterpret_cast(&N), sizeof(int)); + ofs.write(reinterpret_cast(&iseed), sizeof(int)); + ofs.write(reinterpret_cast(&blksize), sizeof(int)); + + for (int k = 0; k < N; ++k) + { + #pragma omp parallel for + for (int j = 0; j < N; ++j) + for (int i = 0; i < N; ++i) + data[j * N + i] = -(*prng)(i + i0, j + j0, k + k0); + + blksize = N * N * sizeof(float); + ofs.write(reinterpret_cast(&blksize), sizeof(int)); + ofs.write(reinterpret_cast(&data[0]), N * N * sizeof(float)); + ofs.write(reinterpret_cast(&blksize), sizeof(int)); + } + + ofs.close(); + } + else + { + + int nx, ny, nz; + int i0, j0, k0; + + nx = prefh_->size(ilevel, 0); + ny = prefh_->size(ilevel, 1); + nz = prefh_->size(ilevel, 2); + i0 = prefh_->offset_abs(ilevel, 0) - lfac * shift[0]; + j0 = prefh_->offset_abs(ilevel, 1) - lfac * shift[1]; + k0 = prefh_->offset_abs(ilevel, 2) - lfac * shift[2]; + + char fname[128]; + sprintf(fname, "grafic_wnoise_%04d.bin", ilevel); + + LOGUSER("Storing white noise field for grafic in file \'%s\'...", fname); + LOGDEBUG("(%d,%d,%d) -- (%d,%d,%d) -- lfac = %d", nx, ny, nz, i0, j0, k0, lfac); + + std::ofstream ofs(fname, std::ios::binary | std::ios::trunc); + data.assign(nx * ny, 0.0); + + int blksize = 4 * sizeof(int); + int iseed = 0; + + ofs.write(reinterpret_cast(&blksize), sizeof(int)); + ofs.write(reinterpret_cast(&nz), sizeof(unsigned)); + ofs.write(reinterpret_cast(&ny), sizeof(unsigned)); + ofs.write(reinterpret_cast(&nx), sizeof(unsigned)); + ofs.write(reinterpret_cast(&iseed), sizeof(int)); + ofs.write(reinterpret_cast(&blksize), sizeof(int)); + + for (int k = 0; k < nz; ++k) + { + #pragma omp parallel for + for (int j = 0; j < ny; ++j) + for (int i = 0; i < nx; ++i) + data[j * nx + i] = -(*prng)(i + i0, j + j0, k + k0); + + blksize = nx * ny * sizeof(float); + ofs.write(reinterpret_cast(&blksize), sizeof(int)); + ofs.write(reinterpret_cast(&data[0]), nx * ny * sizeof(float)); + ofs.write(reinterpret_cast(&blksize), sizeof(int)); + } + ofs.close(); + } + } + + if (disk_cached_) + { + std::vector data; + if (ilevel == levelmin_) + { + int N = 1 << levelmin_; + int i0, j0, k0; + + i0 = -lfac * shift[0]; + j0 = -lfac * shift[1]; + k0 = -lfac * shift[2]; + + char fname[128]; + sprintf(fname, "wnoise_%04d.bin", ilevel); + + LOGUSER("Storing white noise field in file \'%s\'...", fname); + + std::ofstream ofs(fname, std::ios::binary | std::ios::trunc); + + ofs.write(reinterpret_cast(&N), sizeof(unsigned)); + ofs.write(reinterpret_cast(&N), sizeof(unsigned)); + ofs.write(reinterpret_cast(&N), sizeof(unsigned)); + + data.assign(N * N, 0.0); + for (int i = 0; i < N; ++i) + { + #pragma omp parallel for + for (int j = 0; j < N; ++j) + for (int k = 0; k < N; ++k) + data[j * N + k] = (*prng)(i + i0, j + j0, k + k0); + + ofs.write(reinterpret_cast(&data[0]), N * N * sizeof(real_t)); + } + ofs.close(); + } + else + { + int nx, ny, nz; + int i0, j0, k0; + + std::array margin; + if( prefh_->get_margin()>0 ){ + margin[0] = prefh_->get_margin(); + margin[1] = prefh_->get_margin(); + margin[2] = prefh_->get_margin(); + }else{ + margin[0] = prefh_->size(ilevel, 0)/2; + margin[1] = prefh_->size(ilevel, 1)/2; + margin[2] = prefh_->size(ilevel, 2)/2; + } + + nx = prefh_->size(ilevel, 0) + 2 * margin[0]; + ny = prefh_->size(ilevel, 1) + 2 * margin[1]; + nz = prefh_->size(ilevel, 2) + 2 * margin[2]; + i0 = prefh_->offset_abs(ilevel, 0) - lfac * shift[0] - margin[0]; + j0 = prefh_->offset_abs(ilevel, 1) - lfac * shift[1] - margin[1]; + k0 = prefh_->offset_abs(ilevel, 2) - lfac * shift[2] - margin[2]; + + char fname[128]; + sprintf(fname, "wnoise_%04d.bin", ilevel); + + LOGUSER("Storing white noise field in file \'%s\'...", fname); + + std::ofstream ofs(fname, std::ios::binary | std::ios::trunc); + + ofs.write(reinterpret_cast(&nx), sizeof(unsigned)); + ofs.write(reinterpret_cast(&ny), sizeof(unsigned)); + ofs.write(reinterpret_cast(&nz), sizeof(unsigned)); + + data.assign(ny * nz, 0.0); + for (int i = 0; i < nx; ++i) + { + #pragma omp parallel for + for (int j = 0; j < ny; ++j) + for (int k = 0; k < nz; ++k) + data[j * nz + k] = (*prng)(i + i0, j + j0, k + k0); + + ofs.write(reinterpret_cast(&data[0]), ny * nz * sizeof(real_t)); + } + ofs.close(); + } + } + else + { + int nx, ny, nz; + int i0, j0, k0; + + if (ilevel == levelmin_) + { + i0 = -lfac * shift[0]; + j0 = -lfac * shift[1]; + k0 = -lfac * shift[2]; + + nx = ny = nz = 1 << levelmin_; + } + else + { + std::array margin; + if( prefh_->get_margin()>0 ){ + margin[0] = prefh_->get_margin(); + margin[1] = prefh_->get_margin(); + margin[2] = prefh_->get_margin(); + }else{ + margin[0] = prefh_->size(ilevel, 0)/2; + margin[1] = prefh_->size(ilevel, 1)/2; + margin[2] = prefh_->size(ilevel, 2)/2; + } + nx = prefh_->size(ilevel, 0) + 2 * margin[0]; + ny = prefh_->size(ilevel, 1) + 2 * margin[1]; + nz = prefh_->size(ilevel, 2) + 2 * margin[2]; + i0 = prefh_->offset_abs(ilevel, 0) - lfac * shift[0] - margin[0]; + j0 = prefh_->offset_abs(ilevel, 1) - lfac * shift[1] - margin[1]; + k0 = prefh_->offset_abs(ilevel, 2) - lfac * shift[2] - margin[2]; + } + + mem_cache_[ilevel - levelmin_] = new std::vector(nx * ny * nz, 0.0); + + LOGUSER("Copying white noise to mem cache..."); + + #pragma omp parallel for + for (int i = 0; i < nx; ++i) + for (int j = 0; j < ny; ++j) + for (int k = 0; k < nz; ++k) + (*mem_cache_[ilevel - levelmin_])[((size_t)i * ny + (size_t)j) * nz + (size_t)k] = + (*prng)(i + i0, j + j0, k + k0); + } +} + +void RNG_music::fill_grid(int ilevel, DensityGrid &A) +{ + if (!initialized_) + { + LOGERR("Call to RNG_music::fill_grid before call to RNG_music::initialize_for_grid_structure"); + throw std::runtime_error("invalid call order for random number generator"); + } + + if (restart_) + LOGINFO("Attempting to restart using random numbers for level %d\n from file \'wnoise_%04d.bin\'.", ilevel, + ilevel); + + if (disk_cached_) + { + char fname[128]; + sprintf(fname, "wnoise_%04d.bin", ilevel); + + LOGUSER("Loading white noise from file \'%s\'...", fname); + + std::ifstream ifs(fname, std::ios::binary); + if (!ifs.good()) + { + LOGERR("White noise file \'%s\'was not found.", fname); + throw std::runtime_error("A white noise file was not found. This is an internal inconsistency and bad."); + } + + int nx, ny, nz; + ifs.read(reinterpret_cast(&nx), sizeof(int)); + ifs.read(reinterpret_cast(&ny), sizeof(int)); + ifs.read(reinterpret_cast(&nz), sizeof(int)); + + if (nx != (int)A.size(0) || ny != (int)A.size(1) || nz != (int)A.size(2)) + { + std::array margin; + if( prefh_->get_margin()>0 ){ + margin[0] = prefh_->get_margin(); + margin[1] = prefh_->get_margin(); + margin[2] = prefh_->get_margin(); + }else{ + margin[0] = prefh_->size(ilevel, 0)/2; + margin[1] = prefh_->size(ilevel, 1)/2; + margin[2] = prefh_->size(ilevel, 2)/2; + } + + if (nx == (int)A.size(0) + 2 * margin[0] && ny == (int)A.size(1) + 2 * margin[1] && nz == (int)A.size(2) + 2 * margin[2]) + { + int ox = margin[0], oy = margin[1], oz = margin[2]; + std::vector slice(ny * nz, 0.0); + + for (int i = 0; i < nx; ++i) + { + ifs.read(reinterpret_cast(&slice[0]), ny * nz * sizeof(real_t)); + + if (i < ox) + continue; + if (i >= 3 * ox) + break; + + #pragma omp parallel for + for (int j = oy; j < 3 * oy; ++j) + for (int k = oz; k < 3 * oz; ++k) + A(i - ox, j - oy, k - oz) = slice[j * nz + k]; + } + + ifs.close(); + } + else + { + LOGERR("White noise file is not aligned with array. File: [%d,%d,%d]. Mem: [%d,%d,%d].", nx, ny, nz, A.size(0), + A.size(1), A.size(2)); + throw std::runtime_error( + "White noise file is not aligned with array. This is an internal inconsistency and bad."); + } + } + else + { + + for (int i = 0; i < nx; ++i) + { + std::vector slice(ny * nz, 0.0); + ifs.read(reinterpret_cast(&slice[0]), ny * nz * sizeof(real_t)); + + #pragma omp parallel for + for (int j = 0; j < ny; ++j) + for (int k = 0; k < nz; ++k) + A(i, j, k) = slice[j * nz + k]; + } + ifs.close(); + } + } + else + { + LOGUSER("Copying white noise from memory cache..."); + + if (mem_cache_[ilevel - levelmin_] == NULL) + LOGERR("Tried to access mem-cached random numbers for level %d. But these are not available!\n", ilevel); + + int nx(A.size(0)), ny(A.size(1)), nz(A.size(2)); + + if ((size_t)nx * (size_t)ny * (size_t)nz != mem_cache_[ilevel - levelmin_]->size()) + { + LOGERR("White noise file is not aligned with array. File: [%d,%d,%d]. Mem: [%d,%d,%d].", nx, ny, nz, A.size(0), + A.size(1), A.size(2)); + throw std::runtime_error("White noise file is not aligned with array. This is an internal inconsistency and bad"); + } + + #pragma omp parallel for + for (int i = 0; i < nx; ++i) + for (int j = 0; j < ny; ++j) + for (int k = 0; k < nz; ++k) + A(i, j, k) = (*mem_cache_[ilevel - levelmin_])[((size_t)i * ny + (size_t)j) * nz + (size_t)k]; + + std::vector().swap(*mem_cache_[ilevel - levelmin_]); + delete mem_cache_[ilevel - levelmin_]; + mem_cache_[ilevel - levelmin_] = NULL; + } +}; + +namespace +{ + RNG_plugin_creator_concrete creator("MUSIC"); } diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc new file mode 100644 index 0000000..f6426f0 --- /dev/null +++ b/src/plugins/random_music_wnoise_generator.cc @@ -0,0 +1,895 @@ + +#include + +#include +#include + +#include "random.hh" +#include "random_music_wnoise_generator.hh" + +template +music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx) + : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) +{ + LOGINFO("Generating random numbers (1) with seed %ld", baseseed); + + initialize(); + fill_subvolume(x0, lx); +} + +template +music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, bool zeromean) + : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) +{ + LOGINFO("Generating random numbers (2) with seed %ld", baseseed); + + double mean = 0.0; + size_t res_l = res; + + bool musicnoise = true; + if (!musicnoise) + cubesize_ = res_; + + if (!musicnoise) + LOGERR("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo"); + + initialize(); + + if (musicnoise) + mean = fill_all(); + else + { + rnums_.push_back(new Meshvar(res, 0, 0, 0)); + cubemap_[0] = 0; // create dummy map index + register_cube(0, 0, 0); + // rapid_proto_ngenic_rng( res_, baseseed_, *this ); + } + + /* + + if( musicnoise ) + mean = fill_all(); + else + { + rnums_.push_back( new Meshvar( res, 0, 0, 0 ) ); + cubemap_[0] = 0; // create dummy map index + register_cube(0,0,0); + rapid_proto_ngenic_rng( res_, baseseed_, *this ); + } + + */ + + if (zeromean) + { + mean = 0.0; + +#pragma omp parallel for reduction(+ \ + : mean) + for (int i = 0; i < (int)res_; ++i) + for (unsigned j = 0; j < res_; ++j) + for (unsigned k = 0; k < res_; ++k) + mean += (*this)(i, j, k); + + mean *= 1.0 / (double)(res_l * res_l * res_l); + +#pragma omp parallel for + for (int i = 0; i < (int)res_; ++i) + for (unsigned j = 0; j < res_; ++j) + for (unsigned k = 0; k < res_; ++k) + (*this)(i, j, k) = (*this)(i, j, k) - mean; + } +} + +template +music_wnoise_generator::music_wnoise_generator(unsigned res, std::string randfname, bool randsign) + : res_(res), cubesize_(res), ncubes_(1) +{ + rnums_.push_back(new Meshvar(res, 0, 0, 0)); + cubemap_[0] = 0; // create dummy map index + + std::ifstream ifs(randfname.c_str(), std::ios::binary); + if (!ifs) + { + LOGERR("Could not open random number file \'%s\'!", randfname.c_str()); + throw std::runtime_error(std::string("Could not open random number file \'") + randfname + std::string("\'!")); + } + + unsigned vartype; + unsigned nx, ny, nz, blksz32; + size_t blksz64; + int iseed; + // long seed; + + float sign4 = -1.0f; + double sign8 = -1.0; + + int addrtype = 32; + + if (randsign) // use grafic2 sign convention + { + sign4 = 1.0f; + sign8 = 1.0; + } + + //... read header and check if 32bit or 64bit block size .../ + ifs.read(reinterpret_cast(&blksz32), sizeof(int)); + ifs.read(reinterpret_cast(&nx), sizeof(unsigned)); + if (blksz32 != 4 * sizeof(int) || nx != res_) + { + addrtype = 64; + + ifs.seekg(0); + ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); + ifs.read(reinterpret_cast(&nx), sizeof(unsigned)); + + if (blksz64 != 4 * sizeof(int) || nx != res_) + addrtype = -1; + } + ifs.seekg(0); + + if (addrtype < 0) + throw std::runtime_error("corrupt random number file"); + + if (addrtype == 32) + ifs.read(reinterpret_cast(&blksz32), sizeof(int)); + else + ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); + + ifs.read(reinterpret_cast(&nx), sizeof(unsigned)); + ifs.read(reinterpret_cast(&ny), sizeof(unsigned)); + ifs.read(reinterpret_cast(&nz), sizeof(unsigned)); + ifs.read(reinterpret_cast(&iseed), sizeof(int)); + // seed = (long)iseed; + + if (nx != res_ || ny != res_ || nz != res_) + { + char errmsg[128]; + sprintf(errmsg, "White noise file dimensions do not match level dimensions: %ux%ux%u vs. %u**3", nx, ny, nz, res_); + throw std::runtime_error(errmsg); + } + + if (addrtype == 32) + ifs.read(reinterpret_cast(&blksz32), sizeof(int)); + else + ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); + + //... read data ...// + // check whether random numbers are single or double precision numbers + if (addrtype == 32) + { + ifs.read(reinterpret_cast(&blksz32), sizeof(int)); + if (blksz32 == nx * ny * sizeof(float)) + vartype = 4; + else if (blksz32 == nx * ny * sizeof(double)) + vartype = 8; + else + throw std::runtime_error("corrupt random number file"); + } + else + { + + ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); + if (blksz64 == nx * ny * sizeof(float)) + vartype = 4; + else if (blksz64 == nx * ny * sizeof(double)) + vartype = 8; + else + throw std::runtime_error("corrupt random number file"); + } + + // rewind to beginning of block + if (addrtype == 32) + ifs.seekg(-sizeof(int), std::ios::cur); + else + ifs.seekg(-sizeof(size_t), std::ios::cur); + + std::vector in_float; + std::vector in_double; + + LOGINFO("Random number file \'%s\'\n contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz); + + long double sum = 0.0, sum2 = 0.0; + size_t count = 0; + + // perform actual reading + if (vartype == 4) + { + for (int ii = 0; ii < (int)nz; ++ii) + { + + if (addrtype == 32) + { + ifs.read(reinterpret_cast(&blksz32), sizeof(int)); + if (blksz32 != nx * ny * sizeof(float)) + throw std::runtime_error("corrupt random number file"); + } + else + { + ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); + if (blksz64 != nx * ny * sizeof(float)) + throw std::runtime_error("corrupt random number file"); + } + + in_float.assign(nx * ny, 0.0f); + ifs.read((char *)&in_float[0], nx * ny * sizeof(float)); + + for (int jj = 0, q = 0; jj < (int)ny; ++jj) + for (int kk = 0; kk < (int)nx; ++kk) + { + sum += in_float[q]; + sum2 += in_float[q] * in_float[q]; + ++count; + + (*rnums_[0])(kk, jj, ii) = sign4 * in_float[q++]; + } + + if (addrtype == 32) + { + ifs.read(reinterpret_cast(&blksz32), sizeof(int)); + if (blksz32 != nx * ny * sizeof(float)) + throw std::runtime_error("corrupt random number file"); + } + else + { + ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); + if (blksz64 != nx * ny * sizeof(float)) + throw std::runtime_error("corrupt random number file"); + } + } + } + else if (vartype == 8) + { + for (int ii = 0; ii < (int)nz; ++ii) + { + if (addrtype == 32) + { + ifs.read(reinterpret_cast(&blksz32), sizeof(int)); + if (blksz32 != nx * ny * sizeof(double)) + throw std::runtime_error("corrupt random number file"); + } + else + { + ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); + if (blksz64 != nx * ny * sizeof(double)) + throw std::runtime_error("corrupt random number file"); + } + + in_double.assign(nx * ny, 0.0f); + ifs.read((char *)&in_double[0], nx * ny * sizeof(double)); + + for (int jj = 0, q = 0; jj < (int)ny; ++jj) + for (int kk = 0; kk < (int)nx; ++kk) + { + sum += in_double[q]; + sum2 += in_double[q] * in_double[q]; + ++count; + (*rnums_[0])(kk, jj, ii) = sign8 * in_double[q++]; + } + + if (addrtype == 32) + { + ifs.read(reinterpret_cast(&blksz32), sizeof(int)); + if (blksz32 != nx * ny * sizeof(double)) + throw std::runtime_error("corrupt random number file"); + } + else + { + ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); + if (blksz64 != nx * ny * sizeof(double)) + throw std::runtime_error("corrupt random number file"); + } + } + } + + double mean, var; + mean = sum / count; + var = sum2 / count - mean * mean; + + LOGINFO("Random numbers in file have \n mean = %f and var = %f", mean, var); +} + +//... copy construct by averaging down +template +music_wnoise_generator::music_wnoise_generator(/*const*/ music_wnoise_generator &rc) +{ + // if( res > rc.m_res || (res/rc.m_res)%2 != 0 ) + // throw std::runtime_error("Invalid restriction in random number container copy constructor."); + + long double sum = 0.0, sum2 = 0.0; + size_t count = 0; + + + LOGINFO("Generating a coarse white noise field by k-space degrading"); + //... initialize properties of container + res_ = rc.res_ / 2; + cubesize_ = res_; + ncubes_ = 1; + baseseed_ = -2; + + if (sizeof(fftw_real) != sizeof(T)) + { + LOGERR("type mismatch with fftw_real in k-space averaging"); + throw std::runtime_error("type mismatch with fftw_real in k-space averaging"); + } + + fftw_real + *rfine = new fftw_real[(size_t)rc.res_ * (size_t)rc.res_ * 2 * ((size_t)rc.res_ / 2 + 1)], + *rcoarse = new fftw_real[(size_t)res_ * (size_t)res_ * 2 * ((size_t)res_ / 2 + 1)]; + + fftw_complex + *ccoarse = reinterpret_cast(rcoarse), + *cfine = reinterpret_cast(rfine); + + int nx(rc.res_), ny(rc.res_), nz(rc.res_), nxc(res_), nyc(res_), nzc(res_); +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_plan + pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), + ipc = fftwf_plan_dft_c2r_3d(nxc, nyc, nzc, ccoarse, rcoarse, FFTW_ESTIMATE); +#else + fftw_plan + pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), + ipc = fftw_plan_dft_c2r_3d(nxc, nyc, nzc, ccoarse, rcoarse, FFTW_ESTIMATE); +#endif + +#else + rfftwnd_plan + pf = rfftw3d_create_plan(nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), + ipc = rfftw3d_create_plan(nxc, nyc, nzc, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); +#endif + +#pragma omp parallel for + for (int i = 0; i < nx; i++) + for (int j = 0; j < ny; j++) + for (int k = 0; k < nz; k++) + { + size_t q = ((size_t)i * ny + (size_t)j) * (nz + 2) + (size_t)k; + rfine[q] = rc(i, j, k); + } + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_execute(pf); +#else + fftw_execute(pf); +#endif +#else +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL); +#else + rfftwnd_one_real_to_complex(pf, rfine, NULL); +#endif +#endif + + double fftnorm = 1.0 / ((double)nxc * (double)nyc * (double)nzc); + +#pragma omp parallel for + for (int i = 0; i < nxc; i++) + for (int j = 0; j < nyc; j++) + for (int k = 0; k < nzc / 2 + 1; k++) + { + int ii(i), jj(j), kk(k); + + if (i > nxc / 2) + ii += nx / 2; + if (j > nyc / 2) + jj += ny / 2; + + size_t qc, qf; + + double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); + double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); + double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); + + qc = ((size_t)i * nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; + qf = ((size_t)ii * ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk; + + std::complex val_fine(RE(cfine[qf]), IM(cfine[qf])); + double phase = (kx / nxc + ky / nyc + kz / nzc) * 0.5 * M_PI; + std::complex val_phas(cos(phase), sin(phase)); + + val_fine *= val_phas * fftnorm / sqrt(8.0); + + RE(ccoarse[qc]) = val_fine.real(); + IM(ccoarse[qc]) = val_fine.imag(); + } + + delete[] rfine; +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_execute(ipc); +#else + fftw_execute(ipc); +#endif +#else +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipc, ccoarse, NULL); +#else + rfftwnd_one_complex_to_real(ipc, ccoarse, NULL); +#endif +#endif + rnums_.push_back(new Meshvar(res_, 0, 0, 0)); + cubemap_[0] = 0; // map all to single array + +#pragma omp parallel for reduction(+ \ + : sum, sum2, count) + for (int i = 0; i < nxc; i++) + for (int j = 0; j < nyc; j++) + for (int k = 0; k < nzc; k++) + { + size_t q = ((size_t)i * nyc + (size_t)j) * (nzc + 2) + (size_t)k; + (*rnums_[0])(i, j, k) = rcoarse[q]; + sum += (*rnums_[0])(i, j, k); + sum2 += (*rnums_[0])(i, j, k) * (*rnums_[0])(i, j, k); + ++count; + } + + delete[] rcoarse; + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_destroy_plan(pf); + fftwf_destroy_plan(ipc); +#else + fftw_destroy_plan(pf); + fftw_destroy_plan(ipc); +#endif +#else + rfftwnd_destroy_plan(pf); + rfftwnd_destroy_plan(ipc); +#endif + + double rmean, rvar; + rmean = sum / count; + rvar = sum2 / count - rmean * rmean; + + LOGINFO("Restricted random numbers have\n mean = %f, var = %f", rmean, rvar); +} + +template +music_wnoise_generator::music_wnoise_generator(music_wnoise_generator &rc, unsigned cubesize, long baseseed, int *x0_, int *lx_, bool zeromean) + : res_(2 * rc.res_), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) +{ + initialize(); + + int x0[3], lx[3]; + if (x0_ == NULL || lx_ == NULL) + { + for (int i = 0; i < 3; ++i) + { + x0[i] = 0; + lx[i] = res_; + } + fill_all(); + } + else + { + for (int i = 0; i < 3; ++i) + { + x0[i] = x0_[i]; + lx[i] = lx_[i]; + } + fill_subvolume(x0, lx); + } + + + LOGINFO("Generating a constrained random number set with seed %ld\n using coarse mode replacement...", baseseed); + assert(lx[0] % 2 == 0 && lx[1] % 2 == 0 && lx[2] % 2 == 0); + size_t nx = lx[0], ny = lx[1], nz = lx[2], + nxc = lx[0] / 2, nyc = lx[1] / 2, nzc = lx[2] / 2; + + fftw_real *rfine = new fftw_real[nx * ny * (nz + 2l)]; + fftw_complex *cfine = reinterpret_cast(rfine); + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_plan + pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), + ipf = fftwf_plan_dft_c2r_3d(nx, ny, nz, cfine, rfine, FFTW_ESTIMATE); +#else + fftw_plan + pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), + ipf = fftw_plan_dft_c2r_3d(nx, ny, nz, cfine, rfine, FFTW_ESTIMATE); +#endif +#else + rfftwnd_plan + pf = rfftw3d_create_plan(nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), + ipf = rfftw3d_create_plan(nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); +#endif + +#pragma omp parallel for + for (int i = 0; i < (int)nx; i++) + for (int j = 0; j < (int)ny; j++) + for (int k = 0; k < (int)nz; k++) + { + size_t q = ((size_t)i * (size_t)ny + (size_t)j) * (size_t)(nz + 2) + (size_t)k; + rfine[q] = (*this)(x0[0] + i, x0[1] + j, x0[2] + k); + } + // this->free_all_mem(); // temporarily free memory, allocate again later + + fftw_real *rcoarse = new fftw_real[nxc * nyc * (nzc + 2)]; + fftw_complex *ccoarse = reinterpret_cast(rcoarse); + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_plan pc = fftwf_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE); +#else + fftw_plan pc = fftw_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE); +#endif +#else + rfftwnd_plan pc = rfftw3d_create_plan(nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); +#endif + +#pragma omp parallel for + for (int i = 0; i < (int)nxc; i++) + for (int j = 0; j < (int)nyc; j++) + for (int k = 0; k < (int)nzc; k++) + { + size_t q = ((size_t)i * (size_t)nyc + (size_t)j) * (size_t)(nzc + 2) + (size_t)k; + rcoarse[q] = rc(x0[0] / 2 + i, x0[1] / 2 + j, x0[2] / 2 + k); + } +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_execute(pc); + fftwf_execute(pf); +#else + fftw_execute(pc); + fftw_execute(pf); +#endif +#else +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pc, rcoarse, NULL); + rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL); +#else + rfftwnd_one_real_to_complex(pc, rcoarse, NULL); + rfftwnd_one_real_to_complex(pf, rfine, NULL); +#endif +#endif + + double fftnorm = 1.0 / ((double)nx * (double)ny * (double)nz); + double sqrt8 = sqrt(8.0); + double phasefac = -0.5; + + // embedding of coarse white noise by fourier interpolation + +#pragma omp parallel for + for (int i = 0; i < (int)nxc; i++) + for (int j = 0; j < (int)nyc; j++) + for (int k = 0; k < (int)nzc / 2 + 1; k++) + { + int ii(i), jj(j), kk(k); + + // if( i==(int)nxc/2 ) continue; + // if( j==(int)nyc/2 ) continue; + + if (i > (int)nxc / 2) + ii += (int)nx / 2; + if (j > (int)nyc / 2) + jj += (int)ny / 2; + + size_t qc, qf; + + double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); + double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); + double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); + + qc = ((size_t)i * nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; + qf = ((size_t)ii * ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk; + + std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); + double phase = (kx / nxc + ky / nyc + kz / nzc) * phasefac * M_PI; + + std::complex val_phas(cos(phase), sin(phase)); + + val *= val_phas * sqrt8; + + if (i != (int)nxc / 2 && j != (int)nyc / 2 && k != (int)nzc / 2) + { + RE(cfine[qf]) = val.real(); + IM(cfine[qf]) = val.imag(); + } + else + { + // RE(cfine[qf]) = val.real(); + // IM(cfine[qf]) = 0.0; + } + } + + delete[] rcoarse; + +#pragma omp parallel for + for (int i = 0; i < (int)nx; i++) + for (int j = 0; j < (int)ny; j++) + for (int k = 0; k < (int)nz / 2 + 1; k++) + { + size_t q = ((size_t)i * ny + (size_t)j) * (nz / 2 + 1) + (size_t)k; + + RE(cfine[q]) *= fftnorm; + IM(cfine[q]) *= fftnorm; + } + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_execute(ipf); +#else + fftw_execute(ipf); +#endif +#else +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipf, cfine, NULL); +#else + rfftwnd_one_complex_to_real(ipf, cfine, NULL); +#endif +#endif + +#pragma omp parallel for + for (int i = 0; i < (int)nx; i++) + for (int j = 0; j < (int)ny; j++) + for (int k = 0; k < (int)nz; k++) + { + size_t q = ((size_t)i * ny + (size_t)j) * (nz + 2) + (size_t)k; + (*this)(x0[0] + i, x0[1] + j, x0[2] + k, false) = rfine[q]; + } + + delete[] rfine; + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_destroy_plan(pf); + fftwf_destroy_plan(pc); + fftwf_destroy_plan(ipf); +#else + fftw_destroy_plan(pf); + fftw_destroy_plan(pc); + fftw_destroy_plan(ipf); +#endif +#else + fftwnd_destroy_plan(pf); + fftwnd_destroy_plan(pc); + fftwnd_destroy_plan(ipf); +#endif + +} + +template +void music_wnoise_generator::register_cube(int i, int j, int k) +{ + i = (i + ncubes_) % ncubes_; + j = (j + ncubes_) % ncubes_; + k = (k + ncubes_) % ncubes_; + size_t icube = ((size_t)i * ncubes_ + (size_t)j) * ncubes_ + (size_t)k; + + cubemap_iterator it = cubemap_.find(icube); + + if (it == cubemap_.end()) + { + rnums_.push_back(NULL); + cubemap_[icube] = rnums_.size() - 1; +#ifdef DEBUG + LOGDEBUG("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]); +#endif + } +} + +template +double music_wnoise_generator::fill_cube(int i, int j, int k) +{ + + gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937); + + i = (i + ncubes_) % ncubes_; + j = (j + ncubes_) % ncubes_; + k = (k + ncubes_) % ncubes_; + + size_t icube = ((size_t)i * ncubes_ + (size_t)j) * ncubes_ + (size_t)k; + long cubeseed = baseseed_ + icube; //... each cube gets its unique seed + + gsl_rng_set(RNG, cubeseed); + + cubemap_iterator it = cubemap_.find(icube); + + if (it == cubemap_.end()) + { + LOGERR("Attempt to access non-registered random number cube!"); + throw std::runtime_error("Attempt to access non-registered random number cube!"); + } + + size_t cubeidx = it->second; + + if (rnums_[cubeidx] == NULL) + rnums_[cubeidx] = new Meshvar(cubesize_, 0, 0, 0); + + double mean = 0.0; + + for (int ii = 0; ii < (int)cubesize_; ++ii) + for (int jj = 0; jj < (int)cubesize_; ++jj) + for (int kk = 0; kk < (int)cubesize_; ++kk) + { + (*rnums_[cubeidx])(ii, jj, kk) = gsl_ran_ugaussian_ratio_method(RNG); + mean += (*rnums_[cubeidx])(ii, jj, kk); + } + + gsl_rng_free(RNG); + + return mean / (cubesize_ * cubesize_ * cubesize_); +} + +template +void music_wnoise_generator::subtract_from_cube(int i, int j, int k, double val) +{ + i = (i + ncubes_) % ncubes_; + j = (j + ncubes_) % ncubes_; + k = (k + ncubes_) % ncubes_; + + size_t icube = ((size_t)i * ncubes_ + (size_t)j) * ncubes_ + (size_t)k; + + cubemap_iterator it = cubemap_.find(icube); + + if (it == cubemap_.end()) + { + LOGERR("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::subtract_from_cube", i, j, k); + throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::subtract_from_cube"); + } + + size_t cubeidx = it->second; + + for (int ii = 0; ii < (int)cubesize_; ++ii) + for (int jj = 0; jj < (int)cubesize_; ++jj) + for (int kk = 0; kk < (int)cubesize_; ++kk) + (*rnums_[cubeidx])(ii, jj, kk) -= val; +} + +template +void music_wnoise_generator::free_cube(int i, int j, int k) +{ + + i = (i + ncubes_) % ncubes_; + j = (j + ncubes_) % ncubes_; + k = (k + ncubes_) % ncubes_; + + size_t icube = ((size_t)i * (size_t)ncubes_ + (size_t)j) * (size_t)ncubes_ + (size_t)k; + + cubemap_iterator it = cubemap_.find(icube); + + if (it == cubemap_.end()) + { + LOGERR("Attempt to access unallocated RND cube %d,%d,%d in music_wnoise_generator::free_cube", i, j, k); + throw std::runtime_error("Attempt to access unallocated RND cube in music_wnoise_generator::free_cube"); + } + + size_t cubeidx = it->second; + + if (rnums_[cubeidx] != NULL) + { + delete rnums_[cubeidx]; + rnums_[cubeidx] = NULL; + } +} + +template +void music_wnoise_generator::initialize(void) +{ + + ncubes_ = std::max((int)((double)res_ / cubesize_), 1); + if (res_ < cubesize_) + { + ncubes_ = 1; + cubesize_ = res_; + } + + LOGINFO("Generating random numbers w/ sample cube size of %d", cubesize_); +} + +template +double music_wnoise_generator::fill_subvolume(int *i0, int *n) +{ + int i0cube[3], ncube[3]; + + i0cube[0] = (int)((double)(res_ + i0[0]) / cubesize_); + i0cube[1] = (int)((double)(res_ + i0[1]) / cubesize_); + i0cube[2] = (int)((double)(res_ + i0[2]) / cubesize_); + + ncube[0] = (int)(n[0] / cubesize_) + 2; + ncube[1] = (int)(n[1] / cubesize_) + 2; + ncube[2] = (int)(n[2] / cubesize_) + 2; + +#ifdef DEBUG + LOGDEBUG("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]); + LOGDEBUG("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]); +#endif + + double mean = 0.0; + + for (int i = i0cube[0]; i < i0cube[0] + ncube[0]; ++i) + for (int j = i0cube[1]; j < i0cube[1] + ncube[1]; ++j) + for (int k = i0cube[2]; k < i0cube[2] + ncube[2]; ++k) + { + int ii(i), jj(j), kk(k); + + ii = (ii + ncubes_) % ncubes_; + jj = (jj + ncubes_) % ncubes_; + kk = (kk + ncubes_) % ncubes_; + + register_cube(ii, jj, kk); + } + + #pragma omp parallel for reduction(+ : mean) + for (int i = i0cube[0]; i < i0cube[0] + ncube[0]; ++i) + for (int j = i0cube[1]; j < i0cube[1] + ncube[1]; ++j) + for (int k = i0cube[2]; k < i0cube[2] + ncube[2]; ++k) + { + int ii(i), jj(j), kk(k); + + ii = (ii + ncubes_) % ncubes_; + jj = (jj + ncubes_) % ncubes_; + kk = (kk + ncubes_) % ncubes_; + + mean += fill_cube(ii, jj, kk); + } + return mean / (ncube[0] * ncube[1] * ncube[2]); +} + +template +double music_wnoise_generator::fill_all(void) +{ + double sum = 0.0; + + for (int i = 0; i < (int)ncubes_; ++i) + for (int j = 0; j < (int)ncubes_; ++j) + for (int k = 0; k < (int)ncubes_; ++k) + { + int ii(i), jj(j), kk(k); + + ii = (ii + ncubes_) % ncubes_; + jj = (jj + ncubes_) % ncubes_; + kk = (kk + ncubes_) % ncubes_; + + register_cube(ii, jj, kk); + } + +#pragma omp parallel for reduction(+ \ + : sum) + for (int i = 0; i < (int)ncubes_; ++i) + for (int j = 0; j < (int)ncubes_; ++j) + for (int k = 0; k < (int)ncubes_; ++k) + { + int ii(i), jj(j), kk(k); + + ii = (ii + ncubes_) % ncubes_; + jj = (jj + ncubes_) % ncubes_; + kk = (kk + ncubes_) % ncubes_; + + sum += fill_cube(ii, jj, kk); + } + +//... subtract mean +#pragma omp parallel for reduction(+ \ + : sum) + for (int i = 0; i < (int)ncubes_; ++i) + for (int j = 0; j < (int)ncubes_; ++j) + for (int k = 0; k < (int)ncubes_; ++k) + { + int ii(i), jj(j), kk(k); + + ii = (ii + ncubes_) % ncubes_; + jj = (jj + ncubes_) % ncubes_; + kk = (kk + ncubes_) % ncubes_; + subtract_from_cube(ii, jj, kk, sum / (ncubes_ * ncubes_ * ncubes_)); + } + + return sum / (ncubes_ * ncubes_ * ncubes_); +} + +template +void music_wnoise_generator::print_allocated(void) +{ + unsigned ncount = 0, ntot = rnums_.size(); + for (size_t i = 0; i < rnums_.size(); ++i) + if (rnums_[i] != NULL) + ncount++; + + LOGINFO(" -> %d of %d random number cubes currently allocated", ncount, ntot); +} + +template class music_wnoise_generator; +template class music_wnoise_generator; diff --git a/src/plugins/random_music_wnoise_generator.hh b/src/plugins/random_music_wnoise_generator.hh new file mode 100644 index 0000000..28a2977 --- /dev/null +++ b/src/plugins/random_music_wnoise_generator.hh @@ -0,0 +1,205 @@ +#ifndef __RANDOM_MUSIC_WNOISE_GENERATOR_HH +#define __RANDOM_MUSIC_WNOISE_GENERATOR_HH + +#include +#include +#include "general.hh" +#include "mesh.hh" + +#define DEF_RAN_CUBE_SIZE 32 + +/*! + * @brief encapsulates all things random number generator related + */ +template< typename T > +class music_wnoise_generator +{ +public: + unsigned + res_, //!< resolution of the full mesh + cubesize_, //!< size of one independent random number cube + ncubes_; //!< number of random number cubes to cover the full mesh + long baseseed_; //!< base seed from which cube seeds are computed + +protected: + //! vector of 3D meshes (the random number cubes) with random numbers + std::vector< Meshvar* > rnums_; + + //! map of 3D indices to cube index + std::map cubemap_; + + typedef std::map::iterator cubemap_iterator; + +protected: + + //! register a cube with the hash map + void register_cube( int i, int j, int k); + + //! fills a subcube with random numbers + double fill_cube( int i, int j, int k); + + //! subtract a constant from an entire cube + void subtract_from_cube( int i, int j, int k, double val ); + + //! copy random numbers from a cube to a full grid array + template< class C > + void copy_cube( int i, int j, int k, C& dat ) + { + int offi, offj, offk; + + offi = i*cubesize_; + offj = j*cubesize_; + offk = k*cubesize_; + + i = (i+ncubes_)%ncubes_; + j = (j+ncubes_)%ncubes_; + k = (k+ncubes_)%ncubes_; + + size_t icube = (i*ncubes_+j)*ncubes_+k; + cubemap_iterator it = cubemap_.find( icube ); + + if( it == cubemap_.end() ) + { + LOGERR("attempting to copy data from non-existing RND cube %d,%d,%d",i,j,k); + throw std::runtime_error("attempting to copy data from non-existing RND cube"); + } + + size_t cubeidx = it->second; + + for( int ii=0; ii<(int)cubesize_; ++ii ) + for( int jj=0; jj<(int)cubesize_; ++jj ) + for( int kk=0; kk<(int)cubesize_; ++kk ) + dat(offi+ii,offj+jj,offk+kk) = (*rnums_[cubeidx])(ii,jj,kk); + } + + //! free the memory associated with a subcube + void free_cube( int i, int j, int k ); + + //! initialize member variables and allocate memory + void initialize( void ); + + //! fill a cubic subvolume of the full grid with random numbers + double fill_subvolume( int *i0, int *n ); + + //! fill an entire grid with random numbers + double fill_all( void ); + + //! fill an external array instead of the internal field + template< class C > + double fill_all( C& dat ) + { + double sum = 0.0; + + for( int i=0; i<(int)ncubes_; ++i ) + for( int j=0; j<(int)ncubes_; ++j ) + for( int k=0; k<(int)ncubes_; ++k ) + { + int ii(i),jj(j),kk(k); + register_cube(ii,jj,kk); + } + + #pragma omp parallel for reduction(+:sum) + for( int i=0; i<(int)ncubes_; ++i ) + for( int j=0; j<(int)ncubes_; ++j ) + for( int k=0; k<(int)ncubes_; ++k ) + { + int ii(i),jj(j),kk(k); + + ii = (ii+ncubes_)%ncubes_; + jj = (jj+ncubes_)%ncubes_; + kk = (kk+ncubes_)%ncubes_; + + sum+=fill_cube(ii, jj, kk); + copy_cube(ii,jj,kk,dat); + free_cube(ii, jj, kk); + } + + return sum/(ncubes_*ncubes_*ncubes_); + } + + //! write the number of allocated random number cubes to stdout + void print_allocated( void ); + +public: + + //! constructor + music_wnoise_generator( unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx ); + + //! constructor for constrained fine field + music_wnoise_generator( music_wnoise_generator& rc, unsigned cubesize, long baseseed, int *x0_=NULL, int *lx_=NULL, bool zeromean=true ); + + //! constructor + music_wnoise_generator( unsigned res, unsigned cubesize, long baseseed, bool zeromean=true ); + + + //! constructor to read white noise from file + music_wnoise_generator( unsigned res, std::string randfname, bool rndsign ); + + + //! copy constructor for averaged field (not copying) hence explicit! + explicit music_wnoise_generator( /*const*/ music_wnoise_generator & rc ); + + //! destructor + ~music_wnoise_generator() + { + for( unsigned i=0; isecond; + + if( rnums_[ cubeidx ] == NULL ) + { + LOGERR("Attempting to access data from non-allocated RND cube %d,%d,%d",ic,jc,kc); + throw std::runtime_error("attempting to access data from non-allocated RND cube"); + } + + //... determine cell in cube + is = (i - ic * cubesize_ + cubesize_) % cubesize_; + js = (j - jc * cubesize_ + cubesize_) % cubesize_; + ks = (k - kc * cubesize_ + cubesize_) % cubesize_; + + return (*rnums_[ cubeidx ])(is,js,ks); + } + + //! free all cubes + void free_all_mem( void ) + { + for( unsigned i=0; igetValue("random", "descriptor"); #ifdef _OPENMP @@ -160,16 +173,17 @@ public: // write panphasia base size into config file for the grid construction // as the gridding unit we use the least common multiple of 2 and i_base std::stringstream ss; - //ARJ ss << lcm(2, pdescriptor_->i_base); - //ss << two_or_largest_power_two_less_than(pdescriptor_->i_base);//ARJ - ss << 2; //ARJ - set gridding unit to two + // ARJ ss << lcm(2, pdescriptor_->i_base); + // ss << two_or_largest_power_two_less_than(pdescriptor_->i_base);//ARJ + ss << 2; // ARJ - set gridding unit to two pcf_->insertValue("setup", "gridding_unit", ss.str()); ss.str(std::string()); - ss << pdescriptor_->i_base ; - pcf_->insertValue("random","base_unit", ss.str()); + ss << pdescriptor_->i_base; + pcf_->insertValue("random", "base_unit", ss.str()); } - void initialize_for_grid_structure(const refinement_hierarchy &refh) { + void initialize_for_grid_structure(const refinement_hierarchy &refh) + { prefh_ = &refh; levelmin_ = prefh_->levelmin(); levelmin_final_ = pcf_->getValue("setup", "levelmin"); @@ -180,20 +194,21 @@ public: // if ngrid is not a multiple of i_base, then we need to enlarge and then sample down ngrid_ = 1 << levelmin_; - + grid_p_ = pdescriptor_->i_base; grid_m_ = largest_power_two_lte(grid_p_); lextra_ = (log10((double)ngrid_ / (double)pdescriptor_->i_base) + 0.001) / log10(2.0); - int ratio = 1 << lextra_; + int ratio = 1 << lextra_; grid_rescale_fac_ = 1.0; - + coordinate_system_shift_[0] = -pcf_->getValue("setup", "shift_x"); coordinate_system_shift_[1] = -pcf_->getValue("setup", "shift_y"); coordinate_system_shift_[2] = -pcf_->getValue("setup", "shift_z"); incongruent_fields_ = false; - if (ngrid_ != ratio * pdescriptor_->i_base) { + if (ngrid_ != ratio * pdescriptor_->i_base) + { incongruent_fields_ = true; ngrid_ = 2 * ratio * pdescriptor_->i_base; grid_rescale_fac_ = (double)ngrid_ / (1 << levelmin_); @@ -201,7 +216,7 @@ public: " (%d -> %d) * 2**ref compatible with PANPHASIA\n" " will Fourier interpolate after.", grid_m_, grid_p_); - } + } } ~RNG_panphasia() { delete[] lstate; } @@ -211,7 +226,8 @@ public: bool is_multiscale() const { return true; } }; -void RNG_panphasia::forward_transform_field(real_t *field, int nx, int ny, int nz) { +void RNG_panphasia::forward_transform_field(real_t *field, int nx, int ny, int nz) +{ fftw_real *rfield = reinterpret_cast(field); fftw_complex *cfield = reinterpret_cast(field); @@ -241,7 +257,8 @@ void RNG_panphasia::forward_transform_field(real_t *field, int nx, int ny, int n #endif } -void RNG_panphasia::backward_transform_field(real_t *field, int nx, int ny, int nz) { +void RNG_panphasia::backward_transform_field(real_t *field, int nx, int ny, int nz) +{ fftw_real *rfield = reinterpret_cast(field); fftw_complex *cfield = reinterpret_cast(field); @@ -272,7 +289,8 @@ void RNG_panphasia::backward_transform_field(real_t *field, int nx, int ny, int } #include -inline double get_wtime(void) { +inline double get_wtime(void) +{ #ifdef _OPENMP return omp_get_wtime(); #else @@ -280,63 +298,69 @@ inline double get_wtime(void) { #endif } -void RNG_panphasia::fill_grid(int level, DensityGrid &R) { +void RNG_panphasia::fill_grid(int level, DensityGrid &R) +{ fftw_real *pr0, *pr1, *pr2, *pr3, *pr4; fftw_complex *pc0, *pc1, *pc2, *pc3, *pc4; - // determine resolution and offset so that we can do proper resampling int ileft[3], ileft_corner[3], nx[3], nxremap[3]; int iexpand_left[3]; - for (int k = 0; k < 3; ++k) { - ileft[k] = prefh_->offset_abs(level, k); + for (int k = 0; k < 3; ++k) + { + ileft[k] = prefh_->offset_abs(level, k); nx[k] = R.size(k); - assert(nx[k] % 4 == 0); - if (level == levelmin_) { - ileft_corner[k] = ileft[k]; // Top level - periodic - }else{ - ileft_corner[k] = (ileft[k] - nx[k]/4 + (1 << level))%(1 << level); // Isolated + assert(nx[k] % 4 == 0); + if (level == levelmin_) + { + ileft_corner[k] = ileft[k]; // Top level - periodic } - iexpand_left[k] = (ileft_corner[k]%grid_m_ ==0) ? 0 : ileft_corner[k]%grid_m_; - fprintf(stderr, "dim=%c : ileft = %d, ileft_corner %d, nx = %d\n", 'x' + k, ileft[k],ileft_corner[k],nx[k]); + else + { + ileft_corner[k] = (ileft[k] - nx[k] / 4 + (1 << level)) % (1 << level); // Isolated + } + iexpand_left[k] = (ileft_corner[k] % grid_m_ == 0) ? 0 : ileft_corner[k] % grid_m_; + // fprintf(stderr, "dim=%c : ileft = %d, ileft_corner %d, nx = %d\n", 'x' + k, ileft[k],ileft_corner[k],nx[k]); }; - int ileft_corner_m[3], ileft_corner_p[3],nx_m[3]; - int ileft_max_expand = std::max(iexpand_left[0],std::max(iexpand_left[1],iexpand_left[2])); + int ileft_corner_m[3], ileft_corner_p[3], nx_m[3]; + int ileft_max_expand = std::max(iexpand_left[0], std::max(iexpand_left[1], iexpand_left[2])); - for (int k = 0; k < 3; ++k) { - ileft_corner_m[k] = ((ileft_corner[k] - iexpand_left[k]) + - coordinate_system_shift_[k] * (1 << (level - levelmin_final_)) + (1 << level)) % (1 << level); + for (int k = 0; k < 3; ++k) + { + ileft_corner_m[k] = ((ileft_corner[k] - iexpand_left[k]) + + coordinate_system_shift_[k] * (1 << (level - levelmin_final_)) + (1 << level)) % + (1 << level); - ileft_corner_p[k] = grid_p_ * ileft_corner_m[k]/grid_m_; - nx_m[k] = (ileft_max_expand!=0)? nx[k] + ileft_max_expand: nx[k]; - if (nx_m[k]%grid_m_ !=0) nx_m[k] = nx_m[k] + grid_m_ - nx_m[k]%grid_m_; - nxremap[k] = grid_p_ * nx_m[k]/grid_m_; - if (nxremap[k]%2==1){ + ileft_corner_p[k] = grid_p_ * ileft_corner_m[k] / grid_m_; + nx_m[k] = (ileft_max_expand != 0) ? nx[k] + ileft_max_expand : nx[k]; + if (nx_m[k] % grid_m_ != 0) + nx_m[k] = nx_m[k] + grid_m_ - nx_m[k] % grid_m_; + nxremap[k] = grid_p_ * nx_m[k] / grid_m_; + if (nxremap[k] % 2 == 1) + { nx_m[k] = nx_m[k] + grid_m_; - nxremap[k] = grid_p_ * nx_m[k]/grid_m_; - } + nxremap[k] = grid_p_ * nx_m[k] / grid_m_; + } } - - if ( (nx_m[0]!=nx_m[1]) || (nx_m[0]!=nx_m[2])) LOGERR("Fatal error: non-cubic refinement being requested"); + if ((nx_m[0] != nx_m[1]) || (nx_m[0] != nx_m[2])) + LOGERR("Fatal error: non-cubic refinement being requested"); inter_grid_phase_adjustment_ = M_PI * (1.0 / (double)nx_m[0] - 1.0 / (double)nxremap[0]); - LOGINFO("The value of the phase adjustement is %f\n", inter_grid_phase_adjustment_); + LOGUSER("The value of the phase adjustement is %f\n", inter_grid_phase_adjustment_); + // LOGINFO("ileft[0],ileft[1],ileft[2] %d %d %d", ileft[0], ileft[1], ileft[2]); + // LOGINFO("ileft_corner[0,1,2] %d %d %d", ileft_corner[0], ileft_corner[1], ileft_corner[2]); - LOGINFO("ileft[0],ileft[1],ileft[2] %d %d %d", ileft[0], ileft[1], ileft[2]); - LOGINFO("ileft_corner[0,1,2] %d %d %d", ileft_corner[0], ileft_corner[1], ileft_corner[2]); + // LOGINFO("iexpand_left[1,2,3] = (%d, %d, %d) Max %d ",iexpand_left[0],iexpand_left[1],iexpand_left[2], ileft_max_expand); - LOGINFO("iexpand_left[1,2,3] = (%d, %d, %d) Max %d ",iexpand_left[0],iexpand_left[1],iexpand_left[2], - ileft_max_expand); - - LOGINFO("ileft_corner_m[0,1,2] = (%d,%d,%d)",ileft_corner_m[0],ileft_corner_m[1],ileft_corner_m[2]); - LOGINFO("grid_m_ %d grid_p_ %d",grid_m_,grid_p_); - LOGINFO("nx_m[0,1,2] = (%d,%d,%d)",nx_m[0],nx_m[1],nx_m[2]); - LOGINFO("ileft_corner_p[0,1,2] = (%d,%d,%d)",ileft_corner_p[0],ileft_corner_p[1],ileft_corner_p[2]); - LOGINFO("nxremap[0,1,2] = (%d,%d,%d)",nxremap[0],nxremap[1],nxremap[2]); + // LOGINFO("ileft_corner_m[0,1,2] = (%d,%d,%d)",ileft_corner_m[0],ileft_corner_m[1],ileft_corner_m[2]); + // LOGINFO("grid_m_ %d grid_p_ %d",grid_m_,grid_p_); + // LOGINFO("nx_m[0,1,2] = (%d,%d,%d)",nx_m[0],nx_m[1],nx_m[2]); + // LOGINFO("ileft_corner_p[0,1,2] = (%d,%d,%d)",ileft_corner_p[0],ileft_corner_p[1],ileft_corner_p[2]); + // LOGINFO("nxremap[0,1,2] = (%d,%d,%d)",nxremap[0],nxremap[1],nxremap[2]); size_t ngp = nxremap[0] * nxremap[1] * (nxremap[2] + 2); @@ -358,7 +382,6 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { double t1 = get_wtime(); double tp = t1; - #pragma omp parallel { #ifdef _OPENMP @@ -368,13 +391,14 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { #endif int odd_x, odd_y, odd_z; int ng_level = ngrid_ * (1 << (level - levelmin_)); // full resolution of current level - + int verbosity = (mythread == 0); char descriptor[100]; memset(descriptor, 0, 100); memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size()); - if (level == levelmin_) { + if (level == levelmin_) + { start_panphasia_(&lstate[mythread], descriptor, &ng_level, &verbosity); } @@ -388,16 +412,11 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { int ratio = 1 << lextra; assert(ng_level == ratio * d.i_base); + ix_rel[0] = ileft_corner_p[0]; + ix_rel[1] = ileft_corner_p[1]; + ix_rel[2] = ileft_corner_p[2]; - - ix_rel[0] = ileft_corner_p[0]; - ix_rel[1] = ileft_corner_p[1]; - ix_rel[2] = ileft_corner_p[2]; - - - -// Code above ignores the coordinate_system_shift_ - but currently this is set to zero // - + // Code above ignores the coordinate_system_shift_ - but currently this is set to zero // lstate[mythread].layer_min = 0; lstate[mythread].layer_max = level_p; @@ -417,30 +436,34 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { if (verbosity) t1 = get_wtime(); -//*************************************************************** -// Process Panphasia values: p000, p001, p010, p100 and indep field -//**************************************************************** -// START // + //*************************************************************** + // Process Panphasia values: p000, p001, p010, p100 and indep field + //**************************************************************** + // START // -#pragma omp for //nowait - for (int i = 0; i < nxremap[0] / 2 + odd_x; ++i) { +#pragma omp for // nowait + for (int i = 0; i < nxremap[0] / 2 + odd_x; ++i) + { double cell_prop[9]; pan_state_ *ps = &lstate[mythread]; for (int j = 0; j < nxremap[1] / 2 + odd_y; ++j) - for (int k = 0; k < nxremap[2] / 2 + odd_z; ++k) { + for (int k = 0; k < nxremap[2] / 2 + odd_z; ++k) + { // ARJ - added inner set of loops to speed up evaluation of Panphasia for (int ix = 0; ix < 2; ++ix) for (int iy = 0; iy < 2; ++iy) - for (int iz = 0; iz < 2; ++iz) { + for (int iz = 0; iz < 2; ++iz) + { int ii = 2 * i + ix - odd_x; int jj = 2 * j + iy - odd_y; int kk = 2 * k + iz - odd_z; if (((ii >= 0) && (ii < nxremap[0])) && ((jj >= 0) && (jj < nxremap[1])) && - ((kk >= 0) && (kk < nxremap[2]))) { + ((kk >= 0) && (kk < nxremap[2]))) + { size_t idx = ((size_t)ii * nxremap[1] + (size_t)jj) * (nxremap[2] + 2) + (size_t)kk; adv_panphasia_cell_properties_(ps, &ii, &jj, &kk, &ps->layer_min, &ps->layer_max, &ps->indep_field, @@ -456,14 +479,14 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { } } } - LOGINFO("time for calculating PANPHASIA for level %d : %f s, %f µs/cell", level, get_wtime() - t1, + LOGUSER("time for calculating PANPHASIA for level %d : %f s, %f µs/cell", level, get_wtime() - t1, 1e6 * (get_wtime() - t1) / ((double)nxremap[2] * (double)nxremap[1] * (double)nxremap[0])); - LOGINFO("time for calculating PANPHASIA for level %d : %f s, %f µs/cell", level, get_wtime() - t1, + LOGUSER("time for calculating PANPHASIA for level %d : %f s, %f µs/cell", level, get_wtime() - t1, 1e6 * (get_wtime() - t1) / ((double)nxremap[2] * (double)nxremap[1] * (double)nxremap[0])); ////////////////////////////////////////////////////////////////////////////////////////////// - LOGINFO("\033[31mtiming level %d [adv_panphasia_cell_properties]: %f s\033[0m", level, get_wtime() - tp); + LOGUSER("\033[31mtiming level %d [adv_panphasia_cell_properties]: %f s\033[0m", level, get_wtime() - tp); tp = get_wtime(); ///////////////////////////////////////////////////////////////////////// @@ -478,7 +501,8 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { #pragma omp parallel for for (int i = 0; i < nxremap[0]; i++) for (int j = 0; j < nxremap[1]; j++) - for (int k = 0; k < nxremap[2] / 2 + 1; k++) { + for (int k = 0; k < nxremap[2] / 2 + 1; k++) + { size_t idx = ((size_t)i * nxremap[1] + (size_t)j) * (nxremap[2] / 2 + 1) + (size_t)k; double fx(1.0), fy(1.0), fz(1.0), arg = 0.; @@ -492,30 +516,38 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { // int kkmax = std::max(abs(ii),std::max(abs(jj),abs(kk))); - - if (ii != 0) { + if (ii != 0) + { arg = M_PI * (double)ii / (double)nxremap[0]; fx = sin(arg) / arg; gx = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); - } else { + } + else + { fx = 1.0; gx = 0.0; } - if (jj != 0) { + if (jj != 0) + { arg = M_PI * (double)jj / (double)nxremap[1]; fy = sin(arg) / arg; gy = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); - } else { + } + else + { fy = 1.0; gy = 0.0; } - if (kk != 0) { + if (kk != 0) + { arg = M_PI * (double)kk / (double)nxremap[2]; fz = sin(arg) / arg; gz = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); - } else { + } + else + { fz = 1.0; gz = 0.0; } @@ -524,7 +556,8 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { double magnitude = sqrt(1.0 - std::abs(temp_comp * temp_comp)); if (abs(ii) != nxremap[0] / 2 && abs(jj) != nxremap[1] / 2 && - abs(kk) != nxremap[2] / 2) { // kkmax != nxremap[2]/2 ){ + abs(kk) != nxremap[2] / 2) + { // kkmax != nxremap[2]/2 ){ complex x, y0(RE(pc0[idx]), IM(pc0[idx])), y1(RE(pc1[idx]), IM(pc1[idx])), y2(RE(pc2[idx]), IM(pc2[idx])), y3(RE(pc3[idx]), IM(pc3[idx])), y4(RE(pc4[idx]), IM(pc4[idx])); @@ -538,12 +571,12 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { // END - LOGINFO("\033[31mtiming level %d [build panphasia field]: %f s\033[0m", level, get_wtime() - tp); + LOGUSER("\033[31mtiming level %d [build panphasia field]: %f s\033[0m", level, get_wtime() - tp); tp = get_wtime(); -//*************************************************************** -// Process Panphasia values: p000, p001, p010, p100 and indep field -//**************************************************************** + //*************************************************************** + // Process Panphasia values: p000, p001, p010, p100 and indep field + //**************************************************************** #pragma omp parallel { @@ -559,7 +592,8 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { memset(descriptor, 0, 100); memcpy(descriptor, descriptor_string_.c_str(), descriptor_string_.size()); - if (level == levelmin_) { + if (level == levelmin_) + { start_panphasia_(&lstate[mythread], descriptor, &ng_level, &verbosity); } @@ -577,7 +611,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { ix_rel[1] = ileft_corner_p[1]; ix_rel[2] = ileft_corner_p[2]; -// Code above ignores the coordinate_system_shift_ - but currently this is set to zero // + // Code above ignores the coordinate_system_shift_ - but currently this is set to zero // lstate[mythread].layer_min = 0; lstate[mythread].layer_max = level_p; @@ -601,25 +635,29 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { //*************************************************************** // Process Panphasia values: p110, p011, p101, p111 //**************************************************************** -#pragma omp for //nowait - for (int i = 0; i < nxremap[0] / 2 + odd_x; ++i) { +#pragma omp for // nowait + for (int i = 0; i < nxremap[0] / 2 + odd_x; ++i) + { double cell_prop[9]; pan_state_ *ps = &lstate[mythread]; for (int j = 0; j < nxremap[1] / 2 + odd_y; ++j) - for (int k = 0; k < nxremap[2] / 2 + odd_z; ++k) { + for (int k = 0; k < nxremap[2] / 2 + odd_z; ++k) + { // ARJ - added inner set of loops to speed up evaluation of Panphasia for (int ix = 0; ix < 2; ++ix) for (int iy = 0; iy < 2; ++iy) - for (int iz = 0; iz < 2; ++iz) { + for (int iz = 0; iz < 2; ++iz) + { int ii = 2 * i + ix - odd_x; int jj = 2 * j + iy - odd_y; int kk = 2 * k + iz - odd_z; if (((ii >= 0) && (ii < nxremap[0])) && ((jj >= 0) && (jj < nxremap[1])) && - ((kk >= 0) && (kk < nxremap[2]))) { + ((kk >= 0) && (kk < nxremap[2]))) + { size_t idx = ((size_t)ii * nxremap[1] + (size_t)jj) * (nxremap[2] + 2) + (size_t)kk; adv_panphasia_cell_properties_(ps, &ii, &jj, &kk, &ps->layer_min, &ps->layer_max, &ps->indep_field, @@ -637,7 +675,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { LOGINFO("time for calculating PANPHASIA for level %d : %f s, %f µs/cell", level, get_wtime() - t1, 1e6 * (get_wtime() - t1) / ((double)nxremap[2] * (double)nxremap[1] * (double)nxremap[0])); - LOGINFO("\033[31mtiming level %d [adv_panphasia_cell_properties2]: %f s \033[0m", level, get_wtime() - tp); + LOGUSER("\033[31mtiming level %d [adv_panphasia_cell_properties2]: %f s \033[0m", level, get_wtime() - tp); tp = get_wtime(); ///////////////////////////////////////////////////////////////////////// @@ -651,7 +689,8 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { #pragma omp parallel for for (int i = 0; i < nxremap[0]; i++) for (int j = 0; j < nxremap[1]; j++) - for (int k = 0; k < nxremap[2] / 2 + 1; k++) { + for (int k = 0; k < nxremap[2] / 2 + 1; k++) + { size_t idx = ((size_t)i * nxremap[1] + (size_t)j) * (nxremap[2] / 2 + 1) + (size_t)k; double fx(1.0), fy(1.0), fz(1.0), arg = 0.; @@ -665,35 +704,45 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { // int kkmax = std::max(abs(ii),std::max(abs(jj),abs(kk))); - if (ii != 0) { + if (ii != 0) + { arg = M_PI * (double)ii / (double)nxremap[0]; fx = sin(arg) / arg; gx = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); - } else { + } + else + { fx = 1.0; gx = 0.0; } - if (jj != 0) { + if (jj != 0) + { arg = M_PI * (double)jj / (double)nxremap[1]; fy = sin(arg) / arg; gy = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); - } else { + } + else + { fy = 1.0; gy = 0.0; } - if (kk != 0) { + if (kk != 0) + { arg = M_PI * (double)kk / (double)nxremap[2]; fz = sin(arg) / arg; gz = complex(0.0, (arg * cos(arg) - sin(arg)) / (arg * arg)); - } else { + } + else + { fz = 1.0; gz = 0.0; } if (abs(ii) != nxremap[0] / 2 && abs(jj) != nxremap[1] / 2 && - abs(kk) != nxremap[2] / 2) { // kkmax != nxremap[2]/2 ){ + abs(kk) != nxremap[2] / 2) + { // kkmax != nxremap[2]/2 ){ complex x, y1(RE(pc1[idx]), IM(pc1[idx])), y2(RE(pc2[idx]), IM(pc2[idx])), y3(RE(pc3[idx]), IM(pc3[idx])), y4(RE(pc4[idx]), IM(pc4[idx])); @@ -704,7 +753,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { } } - LOGINFO("\033[31mtiming level %d [build panphasia field2]: %f s\033[0m", level, get_wtime() - tp); + LOGUSER("\033[31mtiming level %d [build panphasia field2]: %f s\033[0m", level, get_wtime() - tp); tp = get_wtime(); // END @@ -717,21 +766,24 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { // do we need to cut off the small scales? // int nn = 1< %d",nxremap[0],nx_m[0]); + LOGINFO("Remapping fields from dimension %d -> %d", nxremap[0], nx_m[0]); memset(pr1, 0, ngp * sizeof(fftw_real)); -#pragma omp parallel for + #pragma omp parallel for for (int i = 0; i < nxremap[0]; i++) for (int j = 0; j < nxremap[1]; j++) - for (int k = 0; k < nxremap[2] / 2 + 1; k++) { + for (int k = 0; k < nxremap[2] / 2 + 1; k++) + { int ii = (i > nxremap[0] / 2) ? i - nxremap[0] : i, jj = (j > nxremap[1] / 2) ? j - nxremap[1] : j, kk = k; int ia(abs(ii)), ja(abs(jj)), ka(abs(kk)); - if (ia < nx_m[0] / 2 && ja < nx_m[1] / 2 && ka < nx_m[2] / 2) { + if (ia < nx_m[0] / 2 && ja < nx_m[1] / 2 && ka < nx_m[2] / 2) + { size_t idx = ((size_t)(i)*nxremap[1] + (size_t)(j)) * (nxremap[2] / 2 + 1) + (size_t)(k); @@ -739,27 +791,26 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { size_t idx2 = ((size_t)ir * nx_m[1] + (size_t)jr) * ((size_t)nx_m[2] / 2 + 1) + (size_t)kr; - - complex x(RE(pc0[idx]),IM(pc0[idx])); - double total_phase_shift; - total_phase_shift = inter_grid_phase_adjustment_ * (double)(ii+jj+kk); - x = x * exp(complex(0.0, total_phase_shift)); - RE(pc1[idx2]) = x.real(); - IM(pc1[idx2]) = x.imag(); - } + complex x(RE(pc0[idx]), IM(pc0[idx])); + double total_phase_shift; + total_phase_shift = inter_grid_phase_adjustment_ * (double)(ii + jj + kk); + x = x * exp(complex(0.0, total_phase_shift)); + RE(pc1[idx2]) = x.real(); + IM(pc1[idx2]) = x.imag(); + } } memcpy(pr0, pr1, ngp * sizeof(fftw_real)); } + // if (level == 9) + // { + // LOGUSER("DC mode of level is %g", RE(pc0[0])); + // // RE(pc0[0]) = 1e8; + // // IM(pc0[0]) = 0.0; + // } - if( level == 9 ){ - LOGINFO("DC mode of level is %g",RE(pc0[0])); - //RE(pc0[0]) = 1e8; - //IM(pc0[0]) = 0.0; - } - - LOGINFO("\033[31mtiming level %d [remap noncongruent]: %f s\033[0m", level, get_wtime() - tp); + LOGUSER("\033[31mtiming level %d [remap noncongruent]: %f s\033[0m", level, get_wtime() - tp); tp = get_wtime(); ///////////////////////////////////////////////////////////////////////// // transform back @@ -774,7 +825,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { delete[] pr4; LOGINFO("Copying random field data %d,%d,%d -> %d,%d,%d", nxremap[0], nxremap[1], nxremap[2], nx[0], nx[1], nx[2]); - + // n = 1< &R) { /*double norm = 1.0 / sqrt((double)nxremap[0] * (double)nxremap[1] * (double)nxremap[2] * (double)nx[0] * (double)nx[1] * (double)nx[2]);*/ - double norm = 1.0 / sqrt((double)nxremap[0] * (double)nxremap[1] * (double)nxremap[2] * (double)nx_m[0] * - (double)nx_m[1] * (double)nx_m[2]); + double norm = 1.0 / sqrt((double)nxremap[0] * (double)nxremap[1] * (double)nxremap[2] * (double)nx_m[0] * + (double)nx_m[1] * (double)nx_m[2]); -#pragma omp parallel for reduction(+ : sum, sum2, count) +#pragma omp parallel for reduction(+ \ + : sum, sum2, count) for (int k = 0; k < nx[2]; ++k) // ARJ - swapped roles of i,k, and reverse ordered loops for (int j = 0; j < nx[1]; ++j) - for (int i = 0; i < nx[0]; ++i) { - size_t idx = ((size_t)(i+iexpand_left[0])*nx_m[1] + (size_t)(j+iexpand_left[1])) * (nx_m[2] + 2) - + (size_t)(k+iexpand_left[2]); + for (int i = 0; i < nx[0]; ++i) + { + size_t idx = ((size_t)(i + iexpand_left[0]) * nx_m[1] + (size_t)(j + iexpand_left[1])) * (nx_m[2] + 2) + (size_t)(k + iexpand_left[2]); R(i, j, k) = pr0[idx] * norm; sum += R(i, j, k); @@ -814,8 +866,9 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) { LOGINFO("PANPHASIA level %d mean and variance are\n

= %g | var(p) = %g", level, sum, sum2); } -namespace { -RNG_plugin_creator_concrete creator("PANPHASIA"); +namespace +{ + RNG_plugin_creator_concrete creator("PANPHASIA"); } #endif diff --git a/src/random.cc b/src/random.cc index f8564b6..f2b15c3 100644 --- a/src/random.cc +++ b/src/random.cc @@ -1,17 +1,14 @@ /* - + random.cc - This file is part of MUSIC - - a code to generate multi-scale initial conditions - for cosmological simulations - - Copyright (C) 2010 Oliver Hahn - + a code to generate multi-scale initial conditions for cosmological simulations + + Copyright (C) 2010-23 Oliver Hahn + */ #include "random.hh" -// TODO: move all this into a plugin!!! - std::map & get_RNG_plugin_map() { @@ -56,1827 +53,3 @@ RNG_plugin *select_RNG_plugin(config_file &cf) return the_RNG_plugin; } - -//////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////// - -#if defined(FFTW3) && defined(SINGLE_PRECISION) -//#define fftw_complex fftwf_complex -typedef fftw_complex fftwf_complex; -#endif - -template -void rapid_proto_ngenic_rng(size_t res, long baseseed, random_numbers &R) -{ - LOGUSER("Invoking the N-GenIC random number generator"); - - unsigned *seedtable = new unsigned[res * res]; - - gsl_rng *random_generator = gsl_rng_alloc(gsl_rng_ranlxd1); - - gsl_rng_set(random_generator, baseseed); - - for (size_t i = 0; i < res / 2; i++) - { - size_t j; - for (j = 0; j < i; j++) - seedtable[i * res + j] = 0x7fffffff * gsl_rng_uniform(random_generator); - for (j = 0; j < i + 1; j++) - seedtable[j * res + i] = 0x7fffffff * gsl_rng_uniform(random_generator); - for (j = 0; j < i; j++) - seedtable[(res - 1 - i) * res + j] = 0x7fffffff * gsl_rng_uniform(random_generator); - for (j = 0; j < i + 1; j++) - seedtable[(res - 1 - j) * res + i] = 0x7fffffff * gsl_rng_uniform(random_generator); - for (j = 0; j < i; j++) - seedtable[i * res + (res - 1 - j)] = 0x7fffffff * gsl_rng_uniform(random_generator); - for (j = 0; j < i + 1; j++) - seedtable[j * res + (res - 1 - i)] = 0x7fffffff * gsl_rng_uniform(random_generator); - for (j = 0; j < i; j++) - seedtable[(res - 1 - i) * res + (res - 1 - j)] = 0x7fffffff * gsl_rng_uniform(random_generator); - for (j = 0; j < i + 1; j++) - seedtable[(res - 1 - j) * res + (res - 1 - i)] = 0x7fffffff * gsl_rng_uniform(random_generator); - } - - fftw_real *rnoise = new fftw_real[res * res * (res + 2)]; - fftw_complex *knoise = reinterpret_cast(rnoise); - - double fnorm = 1. / sqrt(res * res * res); - -// #warning need to check for race conditions below - //#pragma omp parallel for - for (size_t i = 0; i < res; i++) - { - int ii = (int)res - (int)i; - if (ii == (int)res) - ii = 0; - - for (size_t j = 0; j < res; j++) - { - gsl_rng_set(random_generator, seedtable[i * res + j]); - - for (size_t k = 0; k < res / 2; k++) - { - double phase = gsl_rng_uniform(random_generator) * 2 * M_PI; - double ampl; - do - ampl = gsl_rng_uniform(random_generator); - while (ampl == 0); - - if (i == res / 2 || j == res / 2 || k == res / 2) - continue; - if (i == 0 && j == 0 && k == 0) - continue; - - T rp = -sqrt(-log(ampl)) * cos(phase) * fnorm; - T ip = -sqrt(-log(ampl)) * sin(phase) * fnorm; - - if (k > 0) - { - RE(knoise[(i * res + j) * (res / 2 + 1) + k]) = rp; - IM(knoise[(i * res + j) * (res / 2 + 1) + k]) = ip; - } - else /* k=0 plane needs special treatment */ - { - if (i == 0) - { - if (j >= res / 2) - continue; - else - { - int jj = (int)res - (int)j; /* note: j!=0 surely holds at this point */ - - RE(knoise[(i * res + j) * (res / 2 + 1) + k]) = rp; - IM(knoise[(i * res + j) * (res / 2 + 1) + k]) = ip; - - RE(knoise[(i * res + jj) * (res / 2 + 1) + k]) = rp; - IM(knoise[(i * res + jj) * (res / 2 + 1) + k]) = -ip; - } - } - else - { - if (i >= res / 2) - continue; - else - { - int ii = (int)res - (int)i; - if (ii == (int)res) - ii = 0; - int jj = (int)res - (int)j; - if (jj == (int)res) - jj = 0; - - RE(knoise[(i * res + j) * (res / 2 + 1) + k]) = rp; - IM(knoise[(i * res + j) * (res / 2 + 1) + k]) = ip; - - if (ii >= 0 && ii < (int)res) - { - RE(knoise[(ii * res + jj) * (res / 2 + 1) + k]) = rp; - IM(knoise[(ii * res + jj) * (res / 2 + 1) + k]) = -ip; - } - } - } - } - } - } - } - - delete[] seedtable; - - //... perform FT to real space - -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan plan = fftwf_plan_dft_c2r_3d(res, res, res, knoise, rnoise, FFTW_ESTIMATE); - fftwf_execute(plan); - fftwf_destroy_plan(plan); -#else - fftw_plan plan = fftw_plan_dft_c2r_3d(res, res, res, knoise, rnoise, FFTW_ESTIMATE); - fftw_execute(plan); - fftw_destroy_plan(plan); -#endif -#else - rfftwnd_plan plan = rfftw3d_create_plan(res, res, res, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), plan, knoise, NULL); -#else - rfftwnd_one_complex_to_real(plan, knoise, NULL); -#endif - rfftwnd_destroy_plan(plan); -#endif - - // copy to array that holds the random numbers - -#pragma omp parallel for - for (int i = 0; i < (int)res; ++i) - for (size_t j = 0; j < res; ++j) - for (size_t k = 0; k < res; ++k) - R(i, j, k) = rnoise[((size_t)i * res + j) * res + k]; - - delete[] rnoise; -} - -template -random_numbers::random_numbers(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx) - : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) -{ - LOGINFO("Generating random numbers (1) with seed %ld", baseseed); - - initialize(); - fill_subvolume(x0, lx); -} - -template -random_numbers::random_numbers(unsigned res, unsigned cubesize, long baseseed, bool zeromean) - : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) -{ - LOGINFO("Generating random numbers (2) with seed %ld", baseseed); - - double mean = 0.0; - size_t res_l = res; - - bool musicnoise = true; - if (!musicnoise) - { - cubesize_ = res_; - LOGERR("This currently breaks compatibility. Need to disable by hand! Make sure to not check into repo"); - } - - initialize(); - - if (musicnoise) - mean = fill_all(); - else - { - rnums_.push_back(new Meshvar(res, 0, 0, 0)); - cubemap_[0] = 0; // create dummy map index - register_cube(0, 0, 0); - rapid_proto_ngenic_rng(res_, baseseed_, *this); - } - - if (zeromean) - { - mean = 0.0; - -#pragma omp parallel for reduction(+ \ - : mean) - for (int i = 0; i < (int)res_; ++i) - for (unsigned j = 0; j < res_; ++j) - for (unsigned k = 0; k < res_; ++k) - mean += (*this)(i, j, k); - - mean *= 1.0 / (double)(res_l * res_l * res_l); - -#pragma omp parallel for - for (int i = 0; i < (int)res_; ++i) - for (unsigned j = 0; j < res_; ++j) - for (unsigned k = 0; k < res_; ++k) - (*this)(i, j, k) = (*this)(i, j, k) - mean; - } -} - -template -random_numbers::random_numbers(unsigned res, std::string randfname, bool randsign) - : res_(res), cubesize_(res), ncubes_(1) -{ - rnums_.push_back(new Meshvar(res, 0, 0, 0)); - cubemap_[0] = 0; // create dummy map index - - std::ifstream ifs(randfname.c_str(), std::ios::binary); - if (!ifs) - { - LOGERR("Could not open random number file \'%s\'!", randfname.c_str()); - throw std::runtime_error(std::string("Could not open random number file \'") + randfname + std::string("\'!")); - } - - unsigned vartype; - unsigned nx, ny, nz, blksz32; - size_t blksz64; - int iseed; - //long seed; - - float sign4 = -1.0f; - double sign8 = -1.0; - - int addrtype = 32; - - if (randsign) // use grafic2 sign convention - { - sign4 = 1.0f; - sign8 = 1.0; - } - - //... read header and check if 32bit or 64bit block size .../ - ifs.read(reinterpret_cast(&blksz32), sizeof(int)); - ifs.read(reinterpret_cast(&nx), sizeof(unsigned)); - if (blksz32 != 4 * sizeof(int) || nx != res_) - { - addrtype = 64; - - ifs.seekg(0); - ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); - ifs.read(reinterpret_cast(&nx), sizeof(unsigned)); - - if (blksz64 != 4 * sizeof(int) || nx != res_) - addrtype = -1; - } - ifs.seekg(0); - - if (addrtype < 0) - { - throw std::runtime_error("corrupt random number file"); - } - - if (addrtype == 32) - { - ifs.read(reinterpret_cast(&blksz32), sizeof(int)); - } - else - { - ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); - } - ifs.read(reinterpret_cast(&nx), sizeof(unsigned)); - ifs.read(reinterpret_cast(&ny), sizeof(unsigned)); - ifs.read(reinterpret_cast(&nz), sizeof(unsigned)); - ifs.read(reinterpret_cast(&iseed), sizeof(int)); - //seed = (long)iseed; - - if (nx != res_ || ny != res_ || nz != res_) - { - char errmsg[128]; - sprintf(errmsg, "White noise file dimensions do not match level dimensions: %ux%ux%u vs. %u**3", nx, ny, nz, res_); - throw std::runtime_error(errmsg); - } - - if (addrtype == 32) - ifs.read(reinterpret_cast(&blksz32), sizeof(int)); - else - ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); - - //... read data ...// - //check whether random numbers are single or double precision numbers - if (addrtype == 32) - { - ifs.read(reinterpret_cast(&blksz32), sizeof(int)); - if (blksz32 == nx * ny * sizeof(float)) - vartype = 4; - else if (blksz32 == nx * ny * sizeof(double)) - vartype = 8; - else - throw std::runtime_error("corrupt random number file"); - } - else - { - - ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); - if (blksz64 == nx * ny * sizeof(float)) - vartype = 4; - else if (blksz64 == nx * ny * sizeof(double)) - vartype = 8; - else - throw std::runtime_error("corrupt random number file"); - } - - //rewind to beginning of block - if (addrtype == 32) - ifs.seekg(-sizeof(int), std::ios::cur); - else - ifs.seekg(-sizeof(size_t), std::ios::cur); - - std::vector in_float; - std::vector in_double; - - LOGINFO("Random number file \'%s\'\n contains %ld numbers. Reading...", randfname.c_str(), nx * ny * nz); - - long double sum = 0.0, sum2 = 0.0; - size_t count = 0; - - //perform actual reading - if (vartype == 4) - { - for (int ii = 0; ii < (int)nz; ++ii) - { - - if (addrtype == 32) - { - ifs.read(reinterpret_cast(&blksz32), sizeof(int)); - if (blksz32 != nx * ny * sizeof(float)) - throw std::runtime_error("corrupt random number file"); - } - else - { - ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); - if (blksz64 != nx * ny * sizeof(float)) - throw std::runtime_error("corrupt random number file"); - } - - in_float.assign(nx * ny, 0.0f); - ifs.read((char *)&in_float[0], nx * ny * sizeof(float)); - - for (int jj = 0, q = 0; jj < (int)ny; ++jj) - for (int kk = 0; kk < (int)nx; ++kk) - { - sum += in_float[q]; - sum2 += in_float[q] * in_float[q]; - ++count; - - (*rnums_[0])(kk, jj, ii) = sign4 * in_float[q++]; - } - - if (addrtype == 32) - { - ifs.read(reinterpret_cast(&blksz32), sizeof(int)); - if (blksz32 != nx * ny * sizeof(float)) - throw std::runtime_error("corrupt random number file"); - } - else - { - ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); - if (blksz64 != nx * ny * sizeof(float)) - throw std::runtime_error("corrupt random number file"); - } - } - } - else if (vartype == 8) - { - for (int ii = 0; ii < (int)nz; ++ii) - { - if (addrtype == 32) - { - ifs.read(reinterpret_cast(&blksz32), sizeof(int)); - if (blksz32 != nx * ny * sizeof(double)) - throw std::runtime_error("corrupt random number file"); - } - else - { - ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); - if (blksz64 != nx * ny * sizeof(double)) - throw std::runtime_error("corrupt random number file"); - } - - in_double.assign(nx * ny, 0.0f); - ifs.read((char *)&in_double[0], nx * ny * sizeof(double)); - - for (int jj = 0, q = 0; jj < (int)ny; ++jj) - for (int kk = 0; kk < (int)nx; ++kk) - { - sum += in_double[q]; - sum2 += in_double[q] * in_double[q]; - ++count; - (*rnums_[0])(kk, jj, ii) = sign8 * in_double[q++]; - } - - if (addrtype == 32) - { - ifs.read(reinterpret_cast(&blksz32), sizeof(int)); - if (blksz32 != nx * ny * sizeof(double)) - throw std::runtime_error("corrupt random number file"); - } - else - { - ifs.read(reinterpret_cast(&blksz64), sizeof(size_t)); - if (blksz64 != nx * ny * sizeof(double)) - throw std::runtime_error("corrupt random number file"); - } - } - } - - double mean, var; - mean = sum / count; - var = sum2 / count - mean * mean; - - LOGINFO("Random numbers in file have \n mean = %f and var = %f", mean, var); -} - -//... copy construct by averaging down -template -random_numbers::random_numbers(/*const*/ random_numbers &rc, bool kdegrade) -{ - //if( res > rc.m_res || (res/rc.m_res)%2 != 0 ) - // throw std::runtime_error("Invalid restriction in random number container copy constructor."); - - long double sum = 0.0, sum2 = 0.0; - size_t count = 0; - - if (kdegrade) - { - LOGINFO("Generating a coarse white noise field by k-space degrading"); - //... initialize properties of container - res_ = rc.res_ / 2; - cubesize_ = res_; - ncubes_ = 1; - baseseed_ = -2; - - if (sizeof(fftw_real) != sizeof(T)) - { - LOGERR("type mismatch with fftw_real in k-space averaging"); - throw std::runtime_error("type mismatch with fftw_real in k-space averaging"); - } - - fftw_real - *rfine = new fftw_real[(size_t)rc.res_ * (size_t)rc.res_ * 2 * ((size_t)rc.res_ / 2 + 1)], - *rcoarse = new fftw_real[(size_t)res_ * (size_t)res_ * 2 * ((size_t)res_ / 2 + 1)]; - - fftw_complex - *ccoarse = reinterpret_cast(rcoarse), - *cfine = reinterpret_cast(rfine); - - int nx(rc.res_), ny(rc.res_), nz(rc.res_), nxc(res_), nyc(res_), nzc(res_); -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan - pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), - ipc = fftwf_plan_dft_c2r_3d(nxc, nyc, nzc, ccoarse, rcoarse, FFTW_ESTIMATE); -#else - fftw_plan - pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), - ipc = fftw_plan_dft_c2r_3d(nxc, nyc, nzc, ccoarse, rcoarse, FFTW_ESTIMATE); -#endif - -#else - rfftwnd_plan - pf = rfftw3d_create_plan(nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), - ipc = rfftw3d_create_plan(nxc, nyc, nzc, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); -#endif - -#pragma omp parallel for - for (int i = 0; i < nx; i++) - for (int j = 0; j < ny; j++) - for (int k = 0; k < nz; k++) - { - size_t q = ((size_t)i * ny + (size_t)j) * (nz + 2) + (size_t)k; - rfine[q] = rc(i, j, k); - } - -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(pf); -#else - fftw_execute(pf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL); -#else - rfftwnd_one_real_to_complex(pf, rfine, NULL); -#endif -#endif - - double fftnorm = 1.0 / ((double)nxc * (double)nyc * (double)nzc); - -#pragma omp parallel for - for (int i = 0; i < nxc; i++) - for (int j = 0; j < nyc; j++) - for (int k = 0; k < nzc / 2 + 1; k++) - { - int ii(i), jj(j), kk(k); - - if (i > nxc / 2) - ii += nx / 2; - if (j > nyc / 2) - jj += ny / 2; - - size_t qc, qf; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - qc = ((size_t)i * nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk; - - std::complex val_fine(RE(cfine[qf]), IM(cfine[qf])); - double phase = (kx / nxc + ky / nyc + kz / nzc) * 0.5 * M_PI; - std::complex val_phas(cos(phase), sin(phase)); - - val_fine *= val_phas * fftnorm / sqrt(8.0); - - RE(ccoarse[qc]) = val_fine.real(); - IM(ccoarse[qc]) = val_fine.imag(); - } - - delete[] rfine; -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(ipc); -#else - fftw_execute(ipc); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipc, ccoarse, NULL); -#else - rfftwnd_one_complex_to_real(ipc, ccoarse, NULL); -#endif -#endif - rnums_.push_back(new Meshvar(res_, 0, 0, 0)); - cubemap_[0] = 0; // map all to single array - -#pragma omp parallel for reduction(+ \ - : sum, sum2, count) - for (int i = 0; i < nxc; i++) - for (int j = 0; j < nyc; j++) - for (int k = 0; k < nzc; k++) - { - size_t q = ((size_t)i * nyc + (size_t)j) * (nzc + 2) + (size_t)k; - (*rnums_[0])(i, j, k) = rcoarse[q]; - sum += (*rnums_[0])(i, j, k); - sum2 += (*rnums_[0])(i, j, k) * (*rnums_[0])(i, j, k); - ++count; - } - - delete[] rcoarse; - -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_destroy_plan(pf); - fftwf_destroy_plan(ipc); -#else - fftw_destroy_plan(pf); - fftw_destroy_plan(ipc); -#endif -#else - rfftwnd_destroy_plan(pf); - rfftwnd_destroy_plan(ipc); -#endif - } - else - { - LOGINFO("Generating a coarse white noise field by averaging"); - if (rc.rnums_.size() == 1) - { - //... initialize properties of container - res_ = rc.res_ / 2; - cubesize_ = res_; - ncubes_ = 1; - baseseed_ = -2; - - //... use restriction to get consistent random numbers on coarser grid - mg_straight gop; - rnums_.push_back(new Meshvar(res_, 0, 0, 0)); - cubemap_[0] = 0; // map all to single array - gop.restrict(*rc.rnums_[0], *rnums_[0]); - -#pragma omp parallel for reduction(+ \ - : sum, sum2, count) - for (int i = 0; i < (int)rnums_[0]->size(0); ++i) - for (unsigned j = 0; j < rnums_[0]->size(1); ++j) - for (unsigned k = 0; k < rnums_[0]->size(2); ++k) - { - (*rnums_[0])(i, j, k) *= sqrt(8); //.. maintain that var(delta)=1 - sum += (*rnums_[0])(i, j, k); - sum2 += (*rnums_[0])(i, j, k) * (*rnums_[0])(i, j, k); - ++count; - } - } - else - { - //... initialize properties of container - res_ = rc.res_ / 2; - cubesize_ = res_; - ncubes_ = 1; - baseseed_ = -2; - - rnums_.push_back(new Meshvar(res_, 0, 0, 0)); - cubemap_[0] = 0; - double fac = 1.0 / sqrt(8); - -#pragma omp parallel for reduction(+ \ - : sum, sum2, count) - for (int ii = 0; ii < (int)rc.res_ / 2; ++ii) - { - unsigned i = 2 * ii; - - for (unsigned j = 0, jj = 0; j < rc.res_; j += 2, ++jj) - for (unsigned k = 0, kk = 0; k < rc.res_; k += 2, ++kk) - { - (*rnums_[0])(ii, jj, kk) = fac * - (rc(i, j, k) + rc(i + 1, j, k) + rc(i, j + 1, k) + rc(i, j, k + 1) + - rc(i + 1, j + 1, k) + rc(i + 1, j, k + 1) + rc(i, j + 1, k + 1) + rc(i + 1, j + 1, k + 1)); - - sum += (*rnums_[0])(ii, jj, kk); - sum2 += (*rnums_[0])(ii, jj, kk) * (*rnums_[0])(ii, jj, kk); - ++count; - } - } - } - } - - double rmean, rvar; - rmean = sum / count; - rvar = sum2 / count - rmean * rmean; - - LOGINFO("Restricted random numbers have\n mean = %f, var = %f", rmean, rvar); -} - -template -random_numbers::random_numbers(random_numbers &rc, unsigned cubesize, long baseseed, - bool kspace, bool isolated, int *x0_, int *lx_, bool zeromean) - : res_(2 * rc.res_), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) -{ - initialize(); - - int x0[3], lx[3]; - if (x0_ == NULL || lx_ == NULL) - { - for (int i = 0; i < 3; ++i) - { - x0[i] = 0; - lx[i] = res_; - } - fill_all(); - } - else - { - for (int i = 0; i < 3; ++i) - { - x0[i] = x0_[i]; - lx[i] = lx_[i]; - } - fill_subvolume(x0, lx); - } - - if (kspace) - { - - LOGINFO("Generating a constrained random number set with seed %ld\n using coarse mode replacement...", baseseed); - assert(lx[0] % 2 == 0 && lx[1] % 2 == 0 && lx[2] % 2 == 0); - size_t nx = lx[0], ny = lx[1], nz = lx[2], - nxc = lx[0] / 2, nyc = lx[1] / 2, nzc = lx[2] / 2; - - fftw_real *rfine = new fftw_real[nx * ny * (nz + 2l)]; - fftw_complex *cfine = reinterpret_cast(rfine); - -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan - pf = fftwf_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), - ipf = fftwf_plan_dft_c2r_3d(nx, ny, nz, cfine, rfine, FFTW_ESTIMATE); -#else - fftw_plan - pf = fftw_plan_dft_r2c_3d(nx, ny, nz, rfine, cfine, FFTW_ESTIMATE), - ipf = fftw_plan_dft_c2r_3d(nx, ny, nz, cfine, rfine, FFTW_ESTIMATE); -#endif -#else - rfftwnd_plan - pf = rfftw3d_create_plan(nx, ny, nz, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE), - ipf = rfftw3d_create_plan(nx, ny, nz, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); -#endif - -#pragma omp parallel for - for (int i = 0; i < (int)nx; i++) - for (int j = 0; j < (int)ny; j++) - for (int k = 0; k < (int)nz; k++) - { - size_t q = ((size_t)i * (size_t)ny + (size_t)j) * (size_t)(nz + 2) + (size_t)k; - rfine[q] = (*this)(x0[0] + i, x0[1] + j, x0[2] + k); - } - //this->free_all_mem(); // temporarily free memory, allocate again later - - fftw_real *rcoarse = new fftw_real[nxc * nyc * (nzc + 2)]; - fftw_complex *ccoarse = reinterpret_cast(rcoarse); - -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_plan pc = fftwf_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE); -#else - fftw_plan pc = fftw_plan_dft_r2c_3d(nxc, nyc, nzc, rcoarse, ccoarse, FFTW_ESTIMATE); -#endif -#else - rfftwnd_plan pc = rfftw3d_create_plan(nxc, nyc, nzc, FFTW_REAL_TO_COMPLEX, FFTW_ESTIMATE | FFTW_IN_PLACE); -#endif - -#pragma omp parallel for - for (int i = 0; i < (int)nxc; i++) - for (int j = 0; j < (int)nyc; j++) - for (int k = 0; k < (int)nzc; k++) - { - size_t q = ((size_t)i * (size_t)nyc + (size_t)j) * (size_t)(nzc + 2) + (size_t)k; - rcoarse[q] = rc(x0[0] / 2 + i, x0[1] / 2 + j, x0[2] / 2 + k); - } -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(pc); - fftwf_execute(pf); -#else - fftw_execute(pc); - fftw_execute(pf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pc, rcoarse, NULL); - rfftwnd_threads_one_real_to_complex(omp_get_max_threads(), pf, rfine, NULL); -#else - rfftwnd_one_real_to_complex(pc, rcoarse, NULL); - rfftwnd_one_real_to_complex(pf, rfine, NULL); -#endif -#endif - - double fftnorm = 1.0 / ((double)nx * (double)ny * (double)nz); - double sqrt8 = sqrt(8.0); - double phasefac = -0.5; //-1.0;//-0.125; - - //if( isolated ) phasefac *= 1.5; - - // embedding of coarse white noise by fourier interpolation - #pragma omp parallel for - for (int i = 0; i < (int)nxc; i++) - for (int j = 0; j < (int)nyc; j++) - for (int k = 0; k < (int)nzc / 2 + 1; k++) - { - int ii(i), jj(j), kk(k); - - //if( i==(int)nxc/2 ) continue; - //if( j==(int)nyc/2 ) continue; - - if (i > (int)nxc / 2) - ii += (int)nx / 2; - if (j > (int)nyc / 2) - jj += (int)ny / 2; - - size_t qc, qf; - - double kx = (i <= (int)nxc / 2) ? (double)i : (double)(i - (int)nxc); - double ky = (j <= (int)nyc / 2) ? (double)j : (double)(j - (int)nyc); - double kz = (k <= (int)nzc / 2) ? (double)k : (double)(k - (int)nzc); - - qc = ((size_t)i * nyc + (size_t)j) * (nzc / 2 + 1) + (size_t)k; - qf = ((size_t)ii * ny + (size_t)jj) * (nz / 2 + 1) + (size_t)kk; - - std::complex val(RE(ccoarse[qc]), IM(ccoarse[qc])); - double phase = (kx / nxc + ky / nyc + kz / nzc) * phasefac * M_PI; - - std::complex val_phas(cos(phase), sin(phase)); - - val *= val_phas * sqrt8; - - if (i != (int)nxc / 2 && j != (int)nyc / 2 && k != (int)nzc / 2) - { - RE(cfine[qf]) = val.real(); - IM(cfine[qf]) = val.imag(); - } - else - { - // RE(cfine[qf]) = val.real(); - // IM(cfine[qf]) = 0.0; - } - } - - - delete[] rcoarse; - -#pragma omp parallel for - for (int i = 0; i < (int)nx; i++) - for (int j = 0; j < (int)ny; j++) - for (int k = 0; k < (int)nz / 2 + 1; k++) - { - size_t q = ((size_t)i * ny + (size_t)j) * (nz / 2 + 1) + (size_t)k; - - RE(cfine[q]) *= fftnorm; - IM(cfine[q]) *= fftnorm; - } - -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_execute(ipf); -#else - fftw_execute(ipf); -#endif -#else -#ifndef SINGLETHREAD_FFTW - rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), ipf, cfine, NULL); -#else - rfftwnd_one_complex_to_real(ipf, cfine, NULL); -#endif -#endif - -#pragma omp parallel for - for (int i = 0; i < (int)nx; i++) - for (int j = 0; j < (int)ny; j++) - for (int k = 0; k < (int)nz; k++) - { - size_t q = ((size_t)i * ny + (size_t)j) * (nz + 2) + (size_t)k; - (*this)(x0[0] + i, x0[1] + j, x0[2] + k, false) = rfine[q]; - } - - delete[] rfine; - -#ifdef FFTW3 -#ifdef SINGLE_PRECISION - fftwf_destroy_plan(pf); - fftwf_destroy_plan(pc); - fftwf_destroy_plan(ipf); -#else - fftw_destroy_plan(pf); - fftw_destroy_plan(pc); - fftw_destroy_plan(ipf); -#endif -#else - fftwnd_destroy_plan(pf); - fftwnd_destroy_plan(pc); - fftwnd_destroy_plan(ipf); -#endif - } - else - { - LOGINFO("Generating a constrained random number set with seed %ld\n using Hoffman-Ribak constraints...", baseseed); - - double fac = 1.0 / sqrt(8.0); //1./sqrt(8.0); - - for (int i = x0[0], ii = x0[0] / 2; i < x0[0] + lx[0]; i += 2, ++ii) - for (int j = x0[1], jj = x0[1] / 2; j < x0[1] + lx[1]; j += 2, ++jj) - for (int k = x0[2], kk = x0[2] / 2; k < x0[2] + lx[2]; k += 2, ++kk) - { - double topval = rc(ii, jj, kk); - double locmean = 0.125 * ((*this)(i, j, k) + (*this)(i + 1, j, k) + (*this)(i, j + 1, k) + (*this)(i, j, k + 1) + - (*this)(i + 1, j + 1, k) + (*this)(i + 1, j, k + 1) + (*this)(i, j + 1, k + 1) + (*this)(i + 1, j + 1, k + 1)); - double dif = fac * topval - locmean; - - (*this)(i, j, k) += dif; - (*this)(i + 1, j, k) += dif; - (*this)(i, j + 1, k) += dif; - (*this)(i, j, k + 1) += dif; - (*this)(i + 1, j + 1, k) += dif; - (*this)(i + 1, j, k + 1) += dif; - (*this)(i, j + 1, k + 1) += dif; - (*this)(i + 1, j + 1, k + 1) += dif; - } - } -} - -template -void random_numbers::register_cube(int i, int j, int k) -{ - i = (i + ncubes_) % ncubes_; - j = (j + ncubes_) % ncubes_; - k = (k + ncubes_) % ncubes_; - size_t icube = ((size_t)i * ncubes_ + (size_t)j) * ncubes_ + (size_t)k; - - cubemap_iterator it = cubemap_.find(icube); - - if (it == cubemap_.end()) - { - rnums_.push_back(NULL); - cubemap_[icube] = rnums_.size() - 1; -#ifdef DEBUG - LOGDEBUG("registering new cube %d,%d,%d . ID = %ld, memloc = %ld", i, j, k, icube, cubemap_[icube]); -#endif - } -} - -template -double random_numbers::fill_cube(int i, int j, int k) -{ - - gsl_rng *RNG = gsl_rng_alloc(gsl_rng_mt19937); - - i = (i + ncubes_) % ncubes_; - j = (j + ncubes_) % ncubes_; - k = (k + ncubes_) % ncubes_; - - size_t icube = ((size_t)i * ncubes_ + (size_t)j) * ncubes_ + (size_t)k; - long cubeseed = baseseed_ + icube; //... each cube gets its unique seed - - gsl_rng_set(RNG, cubeseed); - - cubemap_iterator it = cubemap_.find(icube); - - if (it == cubemap_.end()) - { - LOGERR("Attempt to access non-registered random number cube!"); - throw std::runtime_error("Attempt to access non-registered random number cube!"); - } - - size_t cubeidx = it->second; - - if (rnums_[cubeidx] == NULL) - rnums_[cubeidx] = new Meshvar(cubesize_, 0, 0, 0); - - double mean = 0.0; - - for (int ii = 0; ii < (int)cubesize_; ++ii) - for (int jj = 0; jj < (int)cubesize_; ++jj) - for (int kk = 0; kk < (int)cubesize_; ++kk) - { - (*rnums_[cubeidx])(ii, jj, kk) = gsl_ran_ugaussian_ratio_method(RNG); - mean += (*rnums_[cubeidx])(ii, jj, kk); - } - - gsl_rng_free(RNG); - - return mean / (cubesize_ * cubesize_ * cubesize_); -} - -template -void random_numbers::subtract_from_cube(int i, int j, int k, double val) -{ - i = (i + ncubes_) % ncubes_; - j = (j + ncubes_) % ncubes_; - k = (k + ncubes_) % ncubes_; - - size_t icube = ((size_t)i * ncubes_ + (size_t)j) * ncubes_ + (size_t)k; - - cubemap_iterator it = cubemap_.find(icube); - - if (it == cubemap_.end()) - { - LOGERR("Attempt to access unallocated RND cube %d,%d,%d in random_numbers::subtract_from_cube", i, j, k); - throw std::runtime_error("Attempt to access unallocated RND cube in random_numbers::subtract_from_cube"); - } - - size_t cubeidx = it->second; - - for (int ii = 0; ii < (int)cubesize_; ++ii) - for (int jj = 0; jj < (int)cubesize_; ++jj) - for (int kk = 0; kk < (int)cubesize_; ++kk) - (*rnums_[cubeidx])(ii, jj, kk) -= val; -} - -template -void random_numbers::free_cube(int i, int j, int k) -{ - - i = (i + ncubes_) % ncubes_; - j = (j + ncubes_) % ncubes_; - k = (k + ncubes_) % ncubes_; - - size_t icube = ((size_t)i * (size_t)ncubes_ + (size_t)j) * (size_t)ncubes_ + (size_t)k; - - cubemap_iterator it = cubemap_.find(icube); - - if (it == cubemap_.end()) - { - LOGERR("Attempt to access unallocated RND cube %d,%d,%d in random_numbers::free_cube", i, j, k); - throw std::runtime_error("Attempt to access unallocated RND cube in random_numbers::free_cube"); - } - - size_t cubeidx = it->second; - - if (rnums_[cubeidx] != NULL) - { - delete rnums_[cubeidx]; - rnums_[cubeidx] = NULL; - } -} - -template -void random_numbers::initialize(void) -{ - - ncubes_ = std::max((int)((double)res_ / cubesize_), 1); - if (res_ < cubesize_) - { - ncubes_ = 1; - cubesize_ = res_; - } - - LOGINFO("Generating random numbers w/ sample cube size of %d", cubesize_); -} - -template -double random_numbers::fill_subvolume(int *i0, int *n) -{ - int i0cube[3], ncube[3]; - - i0cube[0] = (int)((double)(res_ + i0[0]) / cubesize_); - i0cube[1] = (int)((double)(res_ + i0[1]) / cubesize_); - i0cube[2] = (int)((double)(res_ + i0[2]) / cubesize_); - - ncube[0] = (int)(n[0] / cubesize_) + 2; - ncube[1] = (int)(n[1] / cubesize_) + 2; - ncube[2] = (int)(n[2] / cubesize_) + 2; - -#ifdef DEBUG - LOGDEBUG("random numbers needed for region %d,%d,%d ..+ %d,%d,%d", i0[0], i0[1], i0[2], n[0], n[1], n[2]); - LOGDEBUG("filling cubes %d,%d,%d ..+ %d,%d,%d", i0cube[0], i0cube[1], i0cube[2], ncube[0], ncube[1], ncube[2]); -#endif - - double mean = 0.0; - - for (int i = i0cube[0]; i < i0cube[0] + ncube[0]; ++i) - for (int j = i0cube[1]; j < i0cube[1] + ncube[1]; ++j) - for (int k = i0cube[2]; k < i0cube[2] + ncube[2]; ++k) - { - int ii(i), jj(j), kk(k); - - ii = (ii + ncubes_) % ncubes_; - jj = (jj + ncubes_) % ncubes_; - kk = (kk + ncubes_) % ncubes_; - - register_cube(ii, jj, kk); - } - -#pragma omp parallel for reduction(+ \ - : mean) - for (int i = i0cube[0]; i < i0cube[0] + ncube[0]; ++i) - for (int j = i0cube[1]; j < i0cube[1] + ncube[1]; ++j) - for (int k = i0cube[2]; k < i0cube[2] + ncube[2]; ++k) - { - int ii(i), jj(j), kk(k); - - ii = (ii + ncubes_) % ncubes_; - jj = (jj + ncubes_) % ncubes_; - kk = (kk + ncubes_) % ncubes_; - - mean += fill_cube(ii, jj, kk); - } - return mean / (ncube[0] * ncube[1] * ncube[2]); -} - -template -double random_numbers::fill_all(void) -{ - double sum = 0.0; - - for (int i = 0; i < (int)ncubes_; ++i) - for (int j = 0; j < (int)ncubes_; ++j) - for (int k = 0; k < (int)ncubes_; ++k) - { - int ii(i), jj(j), kk(k); - - ii = (ii + ncubes_) % ncubes_; - jj = (jj + ncubes_) % ncubes_; - kk = (kk + ncubes_) % ncubes_; - - register_cube(ii, jj, kk); - } - -#pragma omp parallel for reduction(+ \ - : sum) - for (int i = 0; i < (int)ncubes_; ++i) - for (int j = 0; j < (int)ncubes_; ++j) - for (int k = 0; k < (int)ncubes_; ++k) - { - int ii(i), jj(j), kk(k); - - ii = (ii + ncubes_) % ncubes_; - jj = (jj + ncubes_) % ncubes_; - kk = (kk + ncubes_) % ncubes_; - - sum += fill_cube(ii, jj, kk); - } - - //... subtract mean -#pragma omp parallel for reduction(+ \ - : sum) - for (int i = 0; i < (int)ncubes_; ++i) - for (int j = 0; j < (int)ncubes_; ++j) - for (int k = 0; k < (int)ncubes_; ++k) - { - int ii(i), jj(j), kk(k); - - ii = (ii + ncubes_) % ncubes_; - jj = (jj + ncubes_) % ncubes_; - kk = (kk + ncubes_) % ncubes_; - subtract_from_cube(ii, jj, kk, sum / (ncubes_ * ncubes_ * ncubes_)); - } - - return sum / (ncubes_ * ncubes_ * ncubes_); -} - -template -void random_numbers::print_allocated(void) -{ - unsigned ncount = 0, ntot = rnums_.size(); - for (size_t i = 0; i < rnums_.size(); ++i) - if (rnums_[i] != NULL) - ncount++; - - LOGINFO(" -> %d of %d random number cubes currently allocated", ncount, ntot); -} - -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// - -template -random_number_generator::random_number_generator(config_file &cf, refinement_hierarchy &refh, transfer_function *ptf) - : pcf_(&cf), prefh_(&refh), constraints(cf, ptf) -{ - levelmin_ = prefh_->levelmin(); - levelmax_ = prefh_->levelmax(); - - ran_cube_size_ = pcf_->getValueSafe("random", "cubesize", DEF_RAN_CUBE_SIZE); - disk_cached_ = pcf_->getValueSafe("random", "disk_cached", true); - restart_ = pcf_->getValueSafe("random", "restart", false); - - mem_cache_.assign(levelmax_ - levelmin_ + 1, (std::vector *)NULL); - - if (restart_ && !disk_cached_) - { - LOGERR("Cannot restart from mem cached random numbers."); - throw std::runtime_error("Cannot restart from mem cached random numbers."); - } - ////disk_cached_ = false; - - //... determine seed/white noise file data to be applied - parse_rand_parameters(); - - if (!restart_) - { - //... compute the actual random numbers - compute_random_numbers(); - } -} - -template -random_number_generator::~random_number_generator() -{ - - //... clear memory caches - for (unsigned i = 0; i < mem_cache_.size(); ++i) - if (mem_cache_[i] != NULL) - delete mem_cache_[i]; - - //... clear disk caches - if (disk_cached_) - { - for (int ilevel = levelmin_; ilevel <= levelmax_; ++ilevel) - { - char fname[128]; - sprintf(fname, "wnoise_%04d.bin", ilevel); - // unlink(fname); - } - } -} - -template -bool random_number_generator::is_number(const std::string &s) -{ - for (size_t i = 0; i < s.length(); i++) - if (!std::isdigit(s[i]) && s[i] != '-') - return false; - - return true; -} - -template -void random_number_generator::parse_rand_parameters(void) -{ - //... parse random number options - for (int i = 0; i <= 100; ++i) - { - char seedstr[128]; - std::string tempstr; - bool noseed = false; - sprintf(seedstr, "seed[%d]", i); - if (pcf_->containsKey("random", seedstr)) - tempstr = pcf_->getValue("random", seedstr); - else - { - // "-2" means that no seed entry was found for that level - tempstr = std::string("-2"); - noseed = true; - } - - if (is_number(tempstr)) - { - long ltemp; - pcf_->convert(tempstr, ltemp); - rngfnames_.push_back(""); - if (noseed) //ltemp < 0 ) - //... generate some dummy seed which only depends on the level, negative so we know it's not - //... an actual seed and thus should not be used as a constraint for coarse levels - //rngseeds_.push_back( -abs((unsigned)(ltemp-i)%123+(unsigned)(ltemp+827342523521*i)%123456789) ); - rngseeds_.push_back(-abs((long)(ltemp - i) % 123 + (long)(ltemp + 7342523521 * i) % 123456789)); - else - { - if (ltemp <= 0) - { - LOGERR("Specified seed [random]/%s needs to be a number >0!", seedstr); - throw std::runtime_error("Seed values need to be >0"); - } - rngseeds_.push_back(ltemp); - } - } - else - { - rngfnames_.push_back(tempstr); - rngseeds_.push_back(-1); - LOGINFO("Random numbers for level %3d will be read from file.", i); - } - } - - //.. determine for which levels random seeds/random number files are given - levelmin_seed_ = -1; - for (unsigned ilevel = 0; ilevel < rngseeds_.size(); ++ilevel) - { - if (levelmin_seed_ < 0 && (rngfnames_[ilevel].size() > 0 || rngseeds_[ilevel] >= 0)) - levelmin_seed_ = ilevel; - } -} - -template -void random_number_generator::correct_avg(int icoarse, int ifine) -{ - int shift[3], levelmin_poisson; - shift[0] = pcf_->getValue("setup", "shift_x"); - shift[1] = pcf_->getValue("setup", "shift_y"); - shift[2] = pcf_->getValue("setup", "shift_z"); - - levelmin_poisson = pcf_->getValue("setup", "levelmin"); - - int lfacc = 1 << (icoarse - levelmin_poisson); - - int nc[3], i0c[3], nf[3], i0f[3]; - if (icoarse != levelmin_) - { - nc[0] = 2 * prefh_->size(icoarse, 0); - nc[1] = 2 * prefh_->size(icoarse, 1); - nc[2] = 2 * prefh_->size(icoarse, 2); - i0c[0] = prefh_->offset_abs(icoarse, 0) - lfacc * shift[0] - nc[0] / 4; - i0c[1] = prefh_->offset_abs(icoarse, 1) - lfacc * shift[1] - nc[1] / 4; - i0c[2] = prefh_->offset_abs(icoarse, 2) - lfacc * shift[2] - nc[2] / 4; - } - else - { - nc[0] = prefh_->size(icoarse, 0); - nc[1] = prefh_->size(icoarse, 1); - nc[2] = prefh_->size(icoarse, 2); - i0c[0] = -lfacc * shift[0]; - i0c[1] = -lfacc * shift[1]; - i0c[2] = -lfacc * shift[2]; - } - nf[0] = 2 * prefh_->size(ifine, 0); - nf[1] = 2 * prefh_->size(ifine, 1); - nf[2] = 2 * prefh_->size(ifine, 2); - i0f[0] = prefh_->offset_abs(ifine, 0) - 2 * lfacc * shift[0] - nf[0] / 4; - i0f[1] = prefh_->offset_abs(ifine, 1) - 2 * lfacc * shift[1] - nf[1] / 4; - i0f[2] = prefh_->offset_abs(ifine, 2) - 2 * lfacc * shift[2] - nf[2] / 4; - - //................................. - if (disk_cached_) - { - char fncoarse[128], fnfine[128]; - sprintf(fncoarse, "wnoise_%04d.bin", icoarse); - sprintf(fnfine, "wnoise_%04d.bin", ifine); - - std::ifstream - iffine(fnfine, std::ios::binary), - ifcoarse(fncoarse, std::ios::binary); - - int nxc, nyc, nzc, nxf, nyf, nzf; - iffine.read(reinterpret_cast(&nxf), sizeof(unsigned)); - iffine.read(reinterpret_cast(&nyf), sizeof(unsigned)); - iffine.read(reinterpret_cast(&nzf), sizeof(unsigned)); - - ifcoarse.read(reinterpret_cast(&nxc), sizeof(unsigned)); - ifcoarse.read(reinterpret_cast(&nyc), sizeof(unsigned)); - ifcoarse.read(reinterpret_cast(&nzc), sizeof(unsigned)); - - if (nxf != nf[0] || nyf != nf[1] || nzf != nf[2] || nxc != nc[0] || nyc != nc[1] || nzc != nc[2]) - { - LOGERR("White noise file mismatch. This should not happen. Notify a developer!"); - throw std::runtime_error("White noise file mismatch. This should not happen. Notify a developer!"); - } - int nxd(nxf / 2), nyd(nyf / 2), nzd(nzf / 2); - std::vector deg_rand((size_t)nxd * (size_t)nyd * (size_t)nzd, 0.0); - double fac = 1.0 / sqrt(8.0); - - for (int i = 0, ic = 0; i < nxf; i += 2, ic++) - { - std::vector fine_rand(2 * nyf * nzf, 0.0); - iffine.read(reinterpret_cast(&fine_rand[0]), 2 * nyf * nzf * sizeof(T)); - -#pragma omp parallel for - for (int j = 0; j < nyf; j += 2) - for (int k = 0; k < nzf; k += 2) - { - int jc = j / 2, kc = k / 2; - //size_t qc = (((size_t)i/2)*(size_t)nyd+((size_t)j/2))*(size_t)nzd+((size_t)k/2); - size_t qc = ((size_t)(ic * nyd + jc)) * (size_t)nzd + (size_t)kc; - - size_t qf[8]; - qf[0] = (0 * (size_t)nyf + (size_t)j + 0) * (size_t)nzf + (size_t)k + 0; - qf[1] = (0 * (size_t)nyf + (size_t)j + 0) * (size_t)nzf + (size_t)k + 1; - qf[2] = (0 * (size_t)nyf + (size_t)j + 1) * (size_t)nzf + (size_t)k + 0; - qf[3] = (0 * (size_t)nyf + (size_t)j + 1) * (size_t)nzf + (size_t)k + 1; - qf[4] = (1 * (size_t)nyf + (size_t)j + 0) * (size_t)nzf + (size_t)k + 0; - qf[5] = (1 * (size_t)nyf + (size_t)j + 0) * (size_t)nzf + (size_t)k + 1; - qf[6] = (1 * (size_t)nyf + (size_t)j + 1) * (size_t)nzf + (size_t)k + 0; - qf[7] = (1 * (size_t)nyf + (size_t)j + 1) * (size_t)nzf + (size_t)k + 1; - - double d = 0.0; - for (int q = 0; q < 8; ++q) - d += fac * fine_rand[qf[q]]; - - //deg_rand[qc] += d; - deg_rand[qc] = d; - } - } - - //... now deg_rand holds the oct-averaged fine field, store this in the coarse field - std::vector coarse_rand(nxc * nyc * nzc, 0.0); - ifcoarse.read(reinterpret_cast(&coarse_rand[0]), nxc * nyc * nzc * sizeof(T)); - - int di, dj, dk; - - di = i0f[0] / 2 - i0c[0]; - dj = i0f[1] / 2 - i0c[1]; - dk = i0f[2] / 2 - i0c[2]; - -#pragma omp parallel for - for (int i = 0; i < nxd; i++) - for (int j = 0; j < nyd; j++) - for (int k = 0; k < nzd; k++) - { - //unsigned qc = (((i+di+nxc)%nxc)*nyc+(((j+dj+nyc)%nyc)))*nzc+((k+dk+nzc)%nzc); - - if (i + di < 0 || i + di >= nxc || j + dj < 0 || j + dj >= nyc || k + dk < 0 || k + dk >= nzc) - continue; - - size_t qc = (((size_t)i + (size_t)di) * (size_t)nyc + ((size_t)j + (size_t)dj)) * (size_t)nzc + (size_t)(k + dk); - size_t qcd = (size_t)(i * nyd + j) * (size_t)nzd + (size_t)k; - - coarse_rand[qc] = deg_rand[qcd]; - } - - deg_rand.clear(); - - ifcoarse.close(); - std::ofstream ofcoarse(fncoarse, std::ios::binary | std::ios::trunc); - ofcoarse.write(reinterpret_cast(&nxc), sizeof(unsigned)); - ofcoarse.write(reinterpret_cast(&nyc), sizeof(unsigned)); - ofcoarse.write(reinterpret_cast(&nzc), sizeof(unsigned)); - ofcoarse.write(reinterpret_cast(&coarse_rand[0]), nxc * nyc * nzc * sizeof(T)); - ofcoarse.close(); - } - else - { - int nxc, nyc, nzc, nxf, nyf, nzf; - nxc = nc[0]; - nyc = nc[1]; - nzc = nc[2]; - nxf = nf[0]; - nyf = nf[1]; - nzf = nf[2]; - int nxd(nxf / 2), nyd(nyf / 2), nzd(nzf / 2); - - int di, dj, dk; - - di = i0f[0] / 2 - i0c[0]; - dj = i0f[1] / 2 - i0c[1]; - dk = i0f[2] / 2 - i0c[2]; - - double fac = 1.0 / sqrt(8.0); - -#pragma omp parallel for - for (int i = 0; i < nxd; i++) - for (int j = 0; j < nyd; j++) - for (int k = 0; k < nzd; k++) - { - if (i + di < 0 || i + di >= nxc || j + dj < 0 || j + dj >= nyc || k + dk < 0 || k + dk >= nzc) - continue; - - size_t qf[8]; - qf[0] = (size_t)((2 * i + 0) * nyf + 2 * j + 0) * (size_t)nzf + (size_t)(2 * k + 0); - qf[1] = (size_t)((2 * i + 0) * nyf + 2 * j + 0) * (size_t)nzf + (size_t)(2 * k + 1); - qf[2] = (size_t)((2 * i + 0) * nyf + 2 * j + 1) * (size_t)nzf + (size_t)(2 * k + 0); - qf[3] = (size_t)((2 * i + 0) * nyf + 2 * j + 1) * (size_t)nzf + (size_t)(2 * k + 1); - qf[4] = (size_t)((2 * i + 1) * nyf + 2 * j + 0) * (size_t)nzf + (size_t)(2 * k + 0); - qf[5] = (size_t)((2 * i + 1) * nyf + 2 * j + 0) * (size_t)nzf + (size_t)(2 * k + 1); - qf[6] = (size_t)((2 * i + 1) * nyf + 2 * j + 1) * (size_t)nzf + (size_t)(2 * k + 0); - qf[7] = (size_t)((2 * i + 1) * nyf + 2 * j + 1) * (size_t)nzf + (size_t)(2 * k + 1); - - double finesum = 0.0; - for (int q = 0; q < 8; ++q) - finesum += fac * (*mem_cache_[ifine - levelmin_])[qf[q]]; - - size_t qc = ((size_t)(i + di) * nyc + (size_t)(j + dj)) * (size_t)nzc + (size_t)(k + dk); - - (*mem_cache_[icoarse - levelmin_])[qc] = finesum; - } - } -} - -template -void random_number_generator::compute_random_numbers(void) -{ - bool kavg = pcf_->getValueSafe("random", "kaveraging", true); - bool rndsign = pcf_->getValueSafe("random", "grafic_sign", false); - bool brealspace_tf = !pcf_->getValue("setup", "kspace_TF"); - - std::vector randc(std::max(levelmax_, levelmin_seed_) + 1, (rng *)NULL); - - //--- FILL ALL WHITE NOISE ARRAYS FOR WHICH WE NEED THE FULL FIELD ---// - - //... seeds are given for a level coarser than levelmin - if (levelmin_seed_ < levelmin_) - { - if (rngfnames_[levelmin_seed_].size() > 0) - randc[levelmin_seed_] = new rng(1 << levelmin_seed_, rngfnames_[levelmin_seed_], rndsign); - else - randc[levelmin_seed_] = new rng(1 << levelmin_seed_, ran_cube_size_, rngseeds_[levelmin_seed_], true); - - for (int i = levelmin_seed_ + 1; i <= levelmin_; ++i) - { - //#warning add possibility to read noise from file also here! - - if (rngfnames_[i].size() > 0) - LOGINFO("Warning: Cannot use filenames for higher levels currently! Ignoring!"); - - randc[i] = new rng(*randc[i - 1], ran_cube_size_, rngseeds_[i], kavg); - delete randc[i - 1]; - randc[i - 1] = NULL; - } - } - - //... seeds are given for a level finer than levelmin, obtain by averaging - if (levelmin_seed_ > levelmin_) - { - if (rngfnames_[levelmin_seed_].size() > 0) - randc[levelmin_seed_] = new rng(1 << levelmin_seed_, rngfnames_[levelmin_seed_], rndsign); - else - randc[levelmin_seed_] = new rng(1 << levelmin_seed_, ran_cube_size_, rngseeds_[levelmin_seed_], true); //, x0, lx ); - - for (int ilevel = levelmin_seed_ - 1; ilevel >= (int)levelmin_; --ilevel) - { - if (rngseeds_[ilevel - levelmin_] > 0) - LOGINFO("Warning: random seed for level %d will be ignored.\n" - " consistency requires that it is obtained by restriction from level %d", - ilevel, levelmin_seed_); - - //if( brealspace_tf && ilevel < levelmax_ ) - // randc[ilevel] = new rng( *randc[ilevel+1], false ); - //else // do k-space averaging - randc[ilevel] = new rng(*randc[ilevel + 1], kavg); - - if (ilevel + 1 > levelmax_) - { - delete randc[ilevel + 1]; - randc[ilevel + 1] = NULL; - } - } - } - - //--- GENERATE AND STORE ALL LEVELS, INCLUDING REFINEMENTS ---// - - //... levelmin - if (randc[levelmin_] == NULL) - { - if (rngfnames_[levelmin_].size() > 0) - randc[levelmin_] = new rng(1 << levelmin_, rngfnames_[levelmin_], rndsign); - else - randc[levelmin_] = new rng(1 << levelmin_, ran_cube_size_, rngseeds_[levelmin_], true); - } - - //if( levelmax_ == levelmin_ ) - { - //... apply constraints to coarse grid - //... if no constraints are specified, or not for this level - //... constraints.apply will return without doing anything - int x0[3] = {0, 0, 0}; - int lx[3] = {1 << levelmin_, 1 << levelmin_, 1 << levelmin_}; - constraints.apply(levelmin_, x0, lx, randc[levelmin_]); - } - - store_rnd(levelmin_, randc[levelmin_]); - - //... refinement levels - for (int ilevel = levelmin_ + 1; ilevel <= levelmax_; ++ilevel) - { - int lx[3], x0[3]; - int shift[3], levelmin_poisson; - shift[0] = pcf_->getValue("setup", "shift_x"); - shift[1] = pcf_->getValue("setup", "shift_y"); - shift[2] = pcf_->getValue("setup", "shift_z"); - - levelmin_poisson = pcf_->getValue("setup", "levelmin"); - - int lfac = 1 << (ilevel - levelmin_poisson); - - int margin[3]; - if (prefh_->get_margin()>0){ - margin[0] = prefh_->get_margin(); - margin[1] = prefh_->get_margin(); - margin[2] = prefh_->get_margin(); - }else{ - margin[0] = prefh_->size(ilevel, 0)/2; - margin[1] = prefh_->size(ilevel, 1)/2; - margin[2] = prefh_->size(ilevel, 2)/2; - } - - lx[0] = prefh_->size(ilevel, 0) + 2*margin[0]; - lx[1] = prefh_->size(ilevel, 1) + 2*margin[1]; - lx[2] = prefh_->size(ilevel, 2) + 2*margin[2]; - x0[0] = prefh_->offset_abs(ilevel, 0) - lfac * shift[0] - margin[0]; - x0[1] = prefh_->offset_abs(ilevel, 1) - lfac * shift[1] - margin[1]; - x0[2] = prefh_->offset_abs(ilevel, 2) - lfac * shift[2] - margin[2]; - - if (randc[ilevel] == NULL) - randc[ilevel] = new rng(*randc[ilevel - 1], ran_cube_size_, rngseeds_[ilevel], kavg, ilevel == levelmin_ + 1, x0, lx); - delete randc[ilevel - 1]; - randc[ilevel - 1] = NULL; - - //... apply constraints to this level, if any - //if( ilevel == levelmax_ ) - //constraints.apply( ilevel, x0, lx, randc[ilevel] ); - - //... store numbers - store_rnd(ilevel, randc[ilevel]); - } - - delete randc[levelmax_]; - randc[levelmax_] = NULL; - - //... make sure that the coarse grid contains oct averages where it overlaps with a fine grid - //... this also ensures that constraints enforced on fine grids are carried to the coarser grids - if (brealspace_tf) - { - for (int ilevel = levelmax_; ilevel > levelmin_; --ilevel) - correct_avg(ilevel - 1, ilevel); - } - - //.. we do not have random numbers for a coarse level, generate them - /*if( levelmax_rand_ >= (int)levelmin_ ) - { - std::cerr << "lmaxread >= (int)levelmin\n"; - randc[levelmax_rand_] = new rng( (unsigned)pow(2,levelmax_rand_), rngfnames_[levelmax_rand_] ); - for( int ilevel = levelmax_rand_-1; ilevel >= (int)levelmin_; --ilevel ) - randc[ilevel] = new rng( *randc[ilevel+1] ); - }*/ -} - -template -void random_number_generator::store_rnd(int ilevel, rng *prng) -{ - int shift[3], levelmin_poisson; - shift[0] = pcf_->getValue("setup", "shift_x"); - shift[1] = pcf_->getValue("setup", "shift_y"); - shift[2] = pcf_->getValue("setup", "shift_z"); - - levelmin_poisson = pcf_->getValue("setup", "levelmin"); - - int lfac = 1 << (ilevel - levelmin_poisson); - - bool grafic_out = false; - - if (grafic_out) - { - std::vector data; - if (ilevel == levelmin_) - { - int N = 1 << levelmin_; - int i0, j0, k0; - i0 = -lfac * shift[0]; - j0 = -lfac * shift[1]; - k0 = -lfac * shift[2]; - - char fname[128]; - sprintf(fname, "grafic_wnoise_%04d.bin", ilevel); - - LOGUSER("Storing white noise field for grafic in file \'%s\'...", fname); - - std::ofstream ofs(fname, std::ios::binary | std::ios::trunc); - data.assign(N * N, 0.0); - - int blksize = 4 * sizeof(int); - int iseed = 0; - - ofs.write(reinterpret_cast(&blksize), sizeof(int)); - ofs.write(reinterpret_cast(&N), sizeof(int)); - ofs.write(reinterpret_cast(&N), sizeof(int)); - ofs.write(reinterpret_cast(&N), sizeof(int)); - ofs.write(reinterpret_cast(&iseed), sizeof(int)); - ofs.write(reinterpret_cast(&blksize), sizeof(int)); - - for (int k = 0; k < N; ++k) - { -#pragma omp parallel for - for (int j = 0; j < N; ++j) - for (int i = 0; i < N; ++i) - data[j * N + i] = -(*prng)(i + i0, j + j0, k + k0); - - blksize = N * N * sizeof(float); - ofs.write(reinterpret_cast(&blksize), sizeof(int)); - ofs.write(reinterpret_cast(&data[0]), N * N * sizeof(float)); - ofs.write(reinterpret_cast(&blksize), sizeof(int)); - } - - ofs.close(); - } - else - { - - int nx, ny, nz; - int i0, j0, k0; - - nx = prefh_->size(ilevel, 0); - ny = prefh_->size(ilevel, 1); - nz = prefh_->size(ilevel, 2); - i0 = prefh_->offset_abs(ilevel, 0) - lfac * shift[0]; - j0 = prefh_->offset_abs(ilevel, 1) - lfac * shift[1]; - k0 = prefh_->offset_abs(ilevel, 2) - lfac * shift[2]; - - char fname[128]; - sprintf(fname, "grafic_wnoise_%04d.bin", ilevel); - - LOGINFO("Storing white noise field for grafic in file \'%s\'...", fname); - LOGDEBUG("(%d,%d,%d) -- (%d,%d,%d) -- lfac = %d", nx, ny, nz, i0, j0, k0, lfac); - - std::ofstream ofs(fname, std::ios::binary | std::ios::trunc); - data.assign(nx * ny, 0.0); - - int blksize = 4 * sizeof(int); - int iseed = 0; - - ofs.write(reinterpret_cast(&blksize), sizeof(int)); - ofs.write(reinterpret_cast(&nz), sizeof(unsigned)); - ofs.write(reinterpret_cast(&ny), sizeof(unsigned)); - ofs.write(reinterpret_cast(&nx), sizeof(unsigned)); - ofs.write(reinterpret_cast(&iseed), sizeof(int)); - ofs.write(reinterpret_cast(&blksize), sizeof(int)); - - for (int k = 0; k < nz; ++k) - { -#pragma omp parallel for - for (int j = 0; j < ny; ++j) - for (int i = 0; i < nx; ++i) - data[j * nx + i] = -(*prng)(i + i0, j + j0, k + k0); - - blksize = nx * ny * sizeof(float); - ofs.write(reinterpret_cast(&blksize), sizeof(int)); - ofs.write(reinterpret_cast(&data[0]), nx * ny * sizeof(float)); - ofs.write(reinterpret_cast(&blksize), sizeof(int)); - } - ofs.close(); - } - } - - if (disk_cached_) - { - std::vector data; - if (ilevel == levelmin_) - { - int N = 1 << levelmin_; - int i0, j0, k0; - - i0 = -lfac * shift[0]; - j0 = -lfac * shift[1]; - k0 = -lfac * shift[2]; - - char fname[128]; - sprintf(fname, "wnoise_%04d.bin", ilevel); - - LOGUSER("Storing white noise field in file \'%s\'...", fname); - - std::ofstream ofs(fname, std::ios::binary | std::ios::trunc); - - ofs.write(reinterpret_cast(&N), sizeof(unsigned)); - ofs.write(reinterpret_cast(&N), sizeof(unsigned)); - ofs.write(reinterpret_cast(&N), sizeof(unsigned)); - - data.assign(N * N, 0.0); - for (int i = 0; i < N; ++i) - { -#pragma omp parallel for - for (int j = 0; j < N; ++j) - for (int k = 0; k < N; ++k) - data[j * N + k] = (*prng)(i + i0, j + j0, k + k0); - - ofs.write(reinterpret_cast(&data[0]), N * N * sizeof(T)); - } - ofs.close(); - } - else - { - int nx, ny, nz; - int i0, j0, k0; - - int margin[3]; - if (prefh_->get_margin()>0){ - margin[0] = prefh_->get_margin(); - margin[1] = prefh_->get_margin(); - margin[2] = prefh_->get_margin(); - }else{ - margin[0] = prefh_->size(ilevel, 0)/2; - margin[1] = prefh_->size(ilevel, 1)/2; - margin[2] = prefh_->size(ilevel, 2)/2; - } - - nx = prefh_->size(ilevel, 0) + 2*margin[0]; - ny = prefh_->size(ilevel, 1) + 2*margin[1]; - nz = prefh_->size(ilevel, 2) + 2*margin[2]; - i0 = prefh_->offset_abs(ilevel, 0) - lfac * shift[0] - margin[0]; //nx / 4; - j0 = prefh_->offset_abs(ilevel, 1) - lfac * shift[1] - margin[1]; //ny / 4; // was nx/4 - k0 = prefh_->offset_abs(ilevel, 2) - lfac * shift[2] - margin[2]; //nz / 4; // was nx/4 - - char fname[128]; - sprintf(fname, "wnoise_%04d.bin", ilevel); - - LOGUSER("Storing white noise field in file \'%s\'...", fname); - - std::ofstream ofs(fname, std::ios::binary | std::ios::trunc); - - ofs.write(reinterpret_cast(&nx), sizeof(unsigned)); - ofs.write(reinterpret_cast(&ny), sizeof(unsigned)); - ofs.write(reinterpret_cast(&nz), sizeof(unsigned)); - - data.assign(ny * nz, 0.0); - for (int i = 0; i < nx; ++i) - { -#pragma omp parallel for - for (int j = 0; j < ny; ++j) - for (int k = 0; k < nz; ++k) - data[j * nz + k] = (*prng)(i + i0, j + j0, k + k0); - - ofs.write(reinterpret_cast(&data[0]), ny * nz * sizeof(T)); - } - ofs.close(); - } - } - else - { - int nx, ny, nz; - int i0, j0, k0; - - if (ilevel == levelmin_) - { - i0 = -lfac * shift[0]; - j0 = -lfac * shift[1]; - k0 = -lfac * shift[2]; - - nx = ny = nz = 1 << levelmin_; - } - else - { - int margin[3]; - if (prefh_->get_margin()>0){ - margin[0] = prefh_->get_margin(); - margin[1] = prefh_->get_margin(); - margin[2] = prefh_->get_margin(); - }else{ - margin[0] = prefh_->size(ilevel, 0)/2; - margin[1] = prefh_->size(ilevel, 1)/2; - margin[2] = prefh_->size(ilevel, 2)/2; - } - nx = prefh_->size(ilevel, 0) + 2*margin[0]; - ny = prefh_->size(ilevel, 1) + 2*margin[1]; - nz = prefh_->size(ilevel, 2) + 2*margin[2]; - i0 = prefh_->offset_abs(ilevel, 0) - lfac * shift[0] - margin[0];//nx / 4; - j0 = prefh_->offset_abs(ilevel, 1) - lfac * shift[1] - margin[1];//ny / 4; // was nx/4 - k0 = prefh_->offset_abs(ilevel, 2) - lfac * shift[2] - margin[2];//nz / 4; // was nx/4 - } - - mem_cache_[ilevel - levelmin_] = new std::vector(nx * ny * nz, 0.0); - - LOGUSER("Copying white noise to mem cache..."); - -#pragma omp parallel for - for (int i = 0; i < nx; ++i) - for (int j = 0; j < ny; ++j) - for (int k = 0; k < nz; ++k) - (*mem_cache_[ilevel - levelmin_])[((size_t)i * ny + (size_t)j) * nz + (size_t)k] = (*prng)(i + i0, j + j0, k + k0); - } -} - -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// - -template class random_numbers; -template class random_numbers; -template class random_number_generator, float>; -template class random_number_generator, double>; diff --git a/src/random.hh b/src/random.hh index 409c8ea..6d428da 100644 --- a/src/random.hh +++ b/src/random.hh @@ -4,7 +4,7 @@ a code to generate multi-scale initial conditions for cosmological simulations - Copyright (C) 2010 Oliver Hahn + Copyright (C) 2010-23 by Oliver Hahn */ @@ -13,15 +13,17 @@ // #define DEGRADE_RAND2 //..................................... -#ifndef __RANDOM_HH -#define __RANDOM_HH +#pragma once #define DEF_RAN_CUBE_SIZE 32 #include #include #include + +#ifdef _OPENMP #include +#endif #include #include @@ -30,18 +32,23 @@ #include "mesh.hh" #include "mg_operators.hh" #include "constraints.hh" +// #include "convolution_kernel.hh" +#include "density_grid.hh" class RNG_plugin { protected: - config_file *pcf_; //!< pointer to config_file from which to read parameters + config_file *pcf_; //!< pointer to config_file from which to read parameters + const refinement_hierarchy *prefh_; //!< pointer to refinement hierarchy structure containing the grid sizes public: - explicit RNG_plugin(config_file &cf) - : pcf_(&cf) + explicit RNG_plugin(config_file &cf) //, const refinement_hierarchy& refh ) + : pcf_(&cf) //, prefh_( & refh ) { } virtual ~RNG_plugin() {} virtual bool is_multiscale() const = 0; + virtual void fill_grid(int level, DensityGrid &R) = 0; + virtual void initialize_for_grid_structure(const refinement_hierarchy &refh) = 0; }; struct RNG_plugin_creator @@ -65,356 +72,54 @@ struct RNG_plugin_creator_concrete : public RNG_plugin_creator } //! create an instance of the plugin - RNG_plugin *create(config_file &cf) const + RNG_plugin *create(config_file &cf) const //, const refinement_hierarchy& refh ) const { - return new Derived(cf); + return new Derived(cf); //, refh ); } }; typedef RNG_plugin RNG_instance; -RNG_plugin *select_RNG_plugin(config_file &cf); - -/*! - * @brief encapsulates all things random number generator related - */ -template -class random_numbers -{ -public: - unsigned - res_, //!< resolution of the full mesh - cubesize_, //!< size of one independent random number cube - ncubes_; //!< number of random number cubes to cover the full mesh - long baseseed_; //!< base seed from which cube seeds are computed - -protected: - //! vector of 3D meshes (the random number cubes) with random numbers - std::vector *> rnums_; - - //! map of 3D indices to cube index - std::map cubemap_; - - typedef std::map::iterator cubemap_iterator; - -protected: - //! register a cube with the hash map - void register_cube(int i, int j, int k); - - //! fills a subcube with random numbers - double fill_cube(int i, int j, int k); - - //! subtract a constant from an entire cube - void subtract_from_cube(int i, int j, int k, double val); - - //! copy random numbers from a cube to a full grid array - template - void copy_cube(int i, int j, int k, C &dat) - { - int offi, offj, offk; - - offi = i * cubesize_; - offj = j * cubesize_; - offk = k * cubesize_; - - i = (i + ncubes_) % ncubes_; - j = (j + ncubes_) % ncubes_; - k = (k + ncubes_) % ncubes_; - - size_t icube = (i * ncubes_ + j) * ncubes_ + k; - cubemap_iterator it = cubemap_.find(icube); - - if (it == cubemap_.end()) - { - LOGERR("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k); - throw std::runtime_error("attempting to copy data from non-existing RND cube"); - } - - size_t cubeidx = it->second; - - for (int ii = 0; ii < (int)cubesize_; ++ii) - for (int jj = 0; jj < (int)cubesize_; ++jj) - for (int kk = 0; kk < (int)cubesize_; ++kk) - dat(offi + ii, offj + jj, offk + kk) = (*rnums_[cubeidx])(ii, jj, kk); - } - - //! free the memory associated with a subcube - void free_cube(int i, int j, int k); - - //! initialize member variables and allocate memory - void initialize(void); - - //! fill a cubic subvolume of the full grid with random numbers - double fill_subvolume(int *i0, int *n); - - //! fill an entire grid with random numbers - double fill_all(void); - - //! fill an external array instead of the internal field - template - double fill_all(C &dat) - { - double sum = 0.0; - - for (int i = 0; i < (int)ncubes_; ++i) - for (int j = 0; j < (int)ncubes_; ++j) - for (int k = 0; k < (int)ncubes_; ++k) - { - int ii(i), jj(j), kk(k); - register_cube(ii, jj, kk); - } - -#pragma omp parallel for reduction(+ \ - : sum) - for (int i = 0; i < (int)ncubes_; ++i) - for (int j = 0; j < (int)ncubes_; ++j) - for (int k = 0; k < (int)ncubes_; ++k) - { - int ii(i), jj(j), kk(k); - - ii = (ii + ncubes_) % ncubes_; - jj = (jj + ncubes_) % ncubes_; - kk = (kk + ncubes_) % ncubes_; - - sum += fill_cube(ii, jj, kk); - copy_cube(ii, jj, kk, dat); - free_cube(ii, jj, kk); - } - - return sum / (ncubes_ * ncubes_ * ncubes_); - } - - //! write the number of allocated random number cubes to stdout - void print_allocated(void); - -public: - //! constructor - random_numbers(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx); - - //! constructor for constrained fine field - random_numbers(random_numbers &rc, unsigned cubesize, long baseseed, - bool kspace = false, bool isolated = false, int *x0_ = NULL, int *lx_ = NULL, bool zeromean = true); - - //! constructor - random_numbers(unsigned res, unsigned cubesize, long baseseed, bool zeromean = true); - - //! constructor to read white noise from file - random_numbers(unsigned res, std::string randfname, bool rndsign); - - //! copy constructor for averaged field (not copying) hence explicit! - explicit random_numbers(/*const*/ random_numbers &rc, bool kdegrade = true); - - //! destructor - ~random_numbers() - { - for (unsigned i = 0; i < rnums_.size(); ++i) - if (rnums_[i] != NULL) - delete rnums_[i]; - rnums_.clear(); - } - - //! access a random number, this allocates a cube and fills it with consistent random numbers - inline T &operator()(int i, int j, int k, bool fillrand = true) - { - int ic, jc, kc, is, js, ks; - - if (ncubes_ == 0) - throw std::runtime_error("random_numbers: internal error, not properly initialized"); - - //... determine cube - ic = (int)((double)i / cubesize_ + ncubes_) % ncubes_; - jc = (int)((double)j / cubesize_ + ncubes_) % ncubes_; - kc = (int)((double)k / cubesize_ + ncubes_) % ncubes_; - - size_t icube = ((size_t)ic * ncubes_ + (size_t)jc) * ncubes_ + (size_t)kc; - - cubemap_iterator it = cubemap_.find(icube); - - if (it == cubemap_.end()) - { - LOGERR("Attempting to copy data from non-existing RND cube %d,%d,%d @ %d,%d,%d", ic, jc, kc, i, j, k); - throw std::runtime_error("attempting to copy data from non-existing RND cube"); - } - - size_t cubeidx = it->second; - - if (rnums_[cubeidx] == NULL) - { - LOGERR("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc); - throw std::runtime_error("attempting to access data from non-allocated RND cube"); - } - - //... determine cell in cube - is = (i - ic * cubesize_ + cubesize_) % cubesize_; - js = (j - jc * cubesize_ + cubesize_) % cubesize_; - ks = (k - kc * cubesize_ + cubesize_) % cubesize_; - - return (*rnums_[cubeidx])(is, js, ks); - } - - //! free all cubes - void free_all_mem(void) - { - for (unsigned i = 0; i < rnums_.size(); ++i) - if (rnums_[i] != NULL) - { - delete rnums_[i]; - rnums_[i] = NULL; - } - } -}; +RNG_plugin *select_RNG_plugin(config_file &cf); //, const refinement_hierarchy& refh ); /*! * @brief encapsulates all things for multi-scale white noise generation */ -template +template class random_number_generator { protected: config_file *pcf_; - refinement_hierarchy *prefh_; - constraint_set constraints; - - int levelmin_, - levelmax_, - levelmin_seed_; - std::vector rngseeds_; - std::vector rngfnames_; - - bool disk_cached_; - bool restart_; - std::vector *> mem_cache_; - - unsigned ran_cube_size_; - -protected: - //! checks if the specified string is numeric - bool is_number(const std::string &s); - - //! parses the random number parameters in the conf file - void parse_rand_parameters(void); - - //! correct coarse grid averages for the change in small scale when using Fourier interpolation - void correct_avg(int icoarse, int ifine); - - //! the main driver routine for multi-scale white noise generation - void compute_random_numbers(void); - - //! store the white noise fields in memory or on disk - void store_rnd(int ilevel, rng *prng); + // const refinement_hierarchy * prefh_; + RNG_plugin *generator_; + int levelmin_, levelmax_; public: //! constructor - random_number_generator(config_file &cf, refinement_hierarchy &refh, transfer_function *ptf = NULL); + random_number_generator(config_file &cf, transfer_function *ptf = NULL) + : pcf_(&cf) //, prefh_( &refh ) + { + levelmin_ = pcf_->getValue("setup", "levelmin"); + levelmax_ = pcf_->getValue("setup", "levelmax"); + generator_ = select_RNG_plugin(cf); + } //! destructor - ~random_number_generator(); + ~random_number_generator() + { + } + + //! initialize_for_grid_structure + void initialize_for_grid_structure(const refinement_hierarchy &refh) + { + generator_->initialize_for_grid_structure(refh); + } //! load random numbers to a new array template void load(array &A, int ilevel) { - if (restart_) - LOGINFO("Attempting to restart using random numbers for level %d\n from file \'wnoise_%04d.bin\'.", ilevel, ilevel); - - if (disk_cached_) - { - char fname[128]; - sprintf(fname, "wnoise_%04d.bin", ilevel); - - LOGUSER("Loading white noise from file \'%s\'...", fname); - - std::ifstream ifs(fname, std::ios::binary); - if (!ifs.good()) - { - LOGERR("White noise file \'%s\'was not found.", fname); - throw std::runtime_error("A white noise file was not found. This is an internal inconsistency and bad."); - } - - int nx, ny, nz; - ifs.read(reinterpret_cast(&nx), sizeof(int)); - ifs.read(reinterpret_cast(&ny), sizeof(int)); - ifs.read(reinterpret_cast(&nz), sizeof(int)); - - if (nx != (int)A.size(0) || ny != (int)A.size(1) || nz != (int)A.size(2)) - { - - if (nx == (int)A.size(0) * 2 && ny == (int)A.size(1) * 2 && nz == (int)A.size(2) * 2) - { - std::cerr << "CHECKPOINT" << std::endl; - - int ox = nx / 4, oy = ny / 4, oz = nz / 4; - std::vector slice(ny * nz, 0.0); - - for (int i = 0; i < nx; ++i) - { - ifs.read(reinterpret_cast(&slice[0]), ny * nz * sizeof(T)); - - if (i < ox) - continue; - if (i >= 3 * ox) - break; - -#pragma omp parallel for - for (int j = oy; j < 3 * oy; ++j) - for (int k = oz; k < 3 * oz; ++k) - A(i - ox, j - oy, k - oz) = slice[j * nz + k]; - } - - ifs.close(); - } - else - { - LOGERR("White noise file is not aligned with array. File: [%d,%d,%d]. Mem: [%d,%d,%d].", - nx, ny, nz, A.size(0), A.size(1), A.size(2)); - throw std::runtime_error("White noise file is not aligned with array. This is an internal inconsistency and bad."); - } - } - else - { - - for (int i = 0; i < nx; ++i) - { - std::vector slice(ny * nz, 0.0); - ifs.read(reinterpret_cast(&slice[0]), ny * nz * sizeof(T)); - -#pragma omp parallel for - for (int j = 0; j < ny; ++j) - for (int k = 0; k < nz; ++k) - A(i, j, k) = slice[j * nz + k]; - } - - ifs.close(); - } - } - else - { - LOGUSER("Copying white noise from memory cache..."); - - if (mem_cache_[ilevel - levelmin_] == NULL) - LOGERR("Tried to access mem-cached random numbers for level %d. But these are not available!\n", ilevel); - - int nx(A.size(0)), ny(A.size(1)), nz(A.size(2)); - - if ((size_t)nx * (size_t)ny * (size_t)nz != mem_cache_[ilevel - levelmin_]->size()) - { - LOGERR("White noise file is not aligned with array. File: [%d,%d,%d]. Mem: [%d,%d,%d].", nx, ny, nz, A.size(0), A.size(1), A.size(2)); - throw std::runtime_error("White noise file is not aligned with array. This is an internal inconsistency and bad"); - } - -#pragma omp parallel for - for (int i = 0; i < nx; ++i) - for (int j = 0; j < ny; ++j) - for (int k = 0; k < nz; ++k) - A(i, j, k) = (*mem_cache_[ilevel - levelmin_])[((size_t)i * ny + (size_t)j) * nz + (size_t)k]; - - std::vector().swap(*mem_cache_[ilevel - levelmin_]); - delete mem_cache_[ilevel - levelmin_]; - mem_cache_[ilevel - levelmin_] = NULL; - } + generator_->fill_grid(ilevel, A); } }; -typedef random_numbers rand_nums; -typedef random_number_generator rand_gen; - -#endif //__RANDOM_HH +using noise_generator = random_number_generator; From 244f11a821f5de7e690982791d02418fbc37efa6 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 12 Feb 2023 22:38:28 -0800 Subject: [PATCH 3/5] WIP: some more cleanup --- ics_example.conf | 3 +- src/convolution_kernel.cc | 113 ++++++++++++-------------------------- src/densities.cc | 48 ++++++---------- src/main.cc | 21 +++---- src/poisson.cc | 10 +++- 5 files changed, 69 insertions(+), 126 deletions(-) diff --git a/ics_example.conf b/ics_example.conf index aae7dec..9c8cd33 100644 --- a/ics_example.conf +++ b/ics_example.conf @@ -12,8 +12,7 @@ align_top = no baryons = no use_2LPT = no use_LLA = no -periodic_TF = yes - +zero_zoom_velocity = no [cosmology] Omega_m = 0.276 diff --git a/src/convolution_kernel.cc b/src/convolution_kernel.cc index 05b0750..3aa1f3d 100644 --- a/src/convolution_kernel.cc +++ b/src/convolution_kernel.cc @@ -1,10 +1,9 @@ /* convolution_kernel.cc - This file is part of MUSIC - - a code to generate multi-scale initial conditions - for cosmological simulations + a code to generate multi-scale initial conditions for cosmological simulations - Copyright (C) 2010-19 Oliver Hahn + Copyright (C) 2010-23 Oliver Hahn */ @@ -96,16 +95,23 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip) std::complex dcmode(RE(cdata[0]), IM(cdata[0])); - if (!pk->is_ksampled()) + + #pragma omp parallel { -#pragma omp parallel for + const size_t veclen = cparam_.nz / 2 + 1; + + double *kvec = new double[veclen]; + double *Tkvec = new double[veclen]; + double *argvec = new double[veclen]; + + #pragma omp for for (int i = 0; i < cparam_.nx; ++i) for (int j = 0; j < cparam_.ny; ++j) + { + for (int k = 0; k < cparam_.nz / 2 + 1; ++k) { - size_t ii = (size_t)(i * cparam_.ny + j) * (size_t)(cparam_.nz / 2 + 1) + (size_t)k; - double kx, ky, kz; kx = (double)i; @@ -117,92 +123,42 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip) if (ky > cparam_.ny / 2) ky -= cparam_.ny; - double arg = (kx + ky + kz) * dstag; - std::complex carg(cos(arg), sin(arg)); + kvec[k] = sqrt(kx * kx + ky * ky + kz * kz); + argvec[k] = (kx + ky + kz) * dstag; + } - std::complex - ccdata(RE(cdata[ii]), IM(cdata[ii])), - cckernel(RE(ckernel[ii]), IM(ckernel[ii])); + pk->at_k(veclen, kvec, Tkvec); + + for (int k = 0; k < cparam_.nz / 2 + 1; ++k) + { + size_t ii = (size_t)(i * cparam_.ny + j) * (size_t)(cparam_.nz / 2 + 1) + (size_t)k; + std::complex carg(cos(argvec[k]), sin(argvec[k])); + + std::complex ccdata(RE(cdata[ii]), IM(cdata[ii])); if( fix ){ - ccdata = ccdata / std::abs(ccdata); + ccdata = ccdata / std::abs(ccdata) / fftnormp; } if( flip ){ ccdata = -ccdata; } - ccdata = ccdata * cckernel * fftnorm * carg; + ccdata = ccdata * Tkvec[k] * fftnorm * carg; RE(cdata[ii]) = ccdata.real(); IM(cdata[ii]) = ccdata.imag(); } + } + + delete[] kvec; + delete[] Tkvec; + delete[] argvec; } - else - { -#pragma omp parallel - { + // we now set the correct DC mode below... + RE(cdata[0]) = 0.0; + IM(cdata[0]) = 0.0; - const size_t veclen = cparam_.nz / 2 + 1; - - double *kvec = new double[veclen]; - double *Tkvec = new double[veclen]; - double *argvec = new double[veclen]; - -#pragma omp for - for (int i = 0; i < cparam_.nx; ++i) - for (int j = 0; j < cparam_.ny; ++j) - { - - for (int k = 0; k < cparam_.nz / 2 + 1; ++k) - { - double kx, ky, kz; - - kx = (double)i; - ky = (double)j; - kz = (double)k; - - if (kx > cparam_.nx / 2) - kx -= cparam_.nx; - if (ky > cparam_.ny / 2) - ky -= cparam_.ny; - - kvec[k] = sqrt(kx * kx + ky * ky + kz * kz); - argvec[k] = (kx + ky + kz) * dstag; - } - - pk->at_k(veclen, kvec, Tkvec); - - for (int k = 0; k < cparam_.nz / 2 + 1; ++k) - { - size_t ii = (size_t)(i * cparam_.ny + j) * (size_t)(cparam_.nz / 2 + 1) + (size_t)k; - std::complex carg(cos(argvec[k]), sin(argvec[k])); - - std::complex ccdata(RE(cdata[ii]), IM(cdata[ii])); - - if( fix ){ - ccdata = ccdata / std::abs(ccdata) / fftnormp; - } - if( flip ){ - ccdata = -ccdata; - } - - ccdata = ccdata * Tkvec[k] * fftnorm * carg; - - RE(cdata[ii]) = ccdata.real(); - IM(cdata[ii]) = ccdata.imag(); - } - } - - delete[] kvec; - delete[] Tkvec; - delete[] argvec; - } - - // we now set the correct DC mode below... - RE(cdata[0]) = 0.0; - IM(cdata[0]) = 0.0; - } LOGUSER("Performing backward FFT..."); @@ -229,7 +185,6 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip) #endif // set the DC mode here to avoid a possible truncation error in single precision - if (pk->is_ksampled()) { size_t nelem = (size_t)cparam_.nx * (size_t)cparam_.ny * (size_t)cparam_.nz; real_t mean = dcmode.real() * fftnorm / (real_t)nelem; diff --git a/src/densities.cc b/src/densities.cc index fde1938..589fe33 100644 --- a/src/densities.cc +++ b/src/densities.cc @@ -114,8 +114,13 @@ void fft_coarsen(m1 &v, m2 &V) val_fine *= val_phas * fftnorm / 8.0; - RE(ccoarse[qc]) = val_fine.real(); - IM(ccoarse[qc]) = val_fine.imag(); + if( i!=(int)nxF/2 && j!=(int)nyF/2 && k!=(int)nzF/2 ){ + RE(ccoarse[qc]) = val_fine.real(); + IM(ccoarse[qc]) = val_fine.imag(); + }else{ + RE(ccoarse[qc]) = 0.0;//val_fine.real(); + IM(ccoarse[qc]) = 0.0;//val_fine.imag(); + } } delete[] rfine; @@ -449,27 +454,14 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t convolution::kernel_creator *the_kernel_creator; - if (kspaceTF) - { - std::cout << " - Using k-space transfer function kernel.\n"; - LOGUSER("Using k-space transfer function kernel."); + std::cout << " - Using k-space transfer function kernel.\n"; + LOGUSER("Using k-space transfer function kernel."); #ifdef SINGLE_PRECISION - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_float"]; + the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_float"]; #else - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_double"]; + the_kernel_creator = convolution::get_kernel_map()["tf_kernel_k_double"]; #endif - } - else - { - std::cout << " - Using real-space transfer function kernel.\n"; - LOGUSER("Using real-space transfer function kernel."); -#ifdef SINGLE_PRECISION - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_real_float"]; -#else - the_kernel_creator = convolution::get_kernel_map()["tf_kernel_real_double"]; -#endif - } convolution::kernel *the_tf_kernel = the_kernel_creator->create(cf, ptf, refh, type); @@ -502,21 +494,13 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t refh.size(levelmin + i, 1), refh.size(levelmin + i, 2)); if( refh.get_margin() > 0 ){ - fine = new PaddedDensitySubGrid(refh.offset(levelmin + i, 0), - refh.offset(levelmin + i, 1), - refh.offset(levelmin + i, 2), - refh.size(levelmin + i, 0), - refh.size(levelmin + i, 1), - refh.size(levelmin + i, 2), - refh.get_margin(), refh.get_margin(), refh.get_margin() ); + fine = new PaddedDensitySubGrid( refh.offset(levelmin + i, 0), refh.offset(levelmin + i, 1), refh.offset(levelmin + i, 2), + refh.size(levelmin + i, 0), refh.size(levelmin + i, 1), refh.size(levelmin + i, 2), + refh.get_margin(), refh.get_margin(), refh.get_margin() ); LOGUSER(" margin = %d",refh.get_margin()); }else{ - fine = new PaddedDensitySubGrid(refh.offset(levelmin + i, 0), - refh.offset(levelmin + i, 1), - refh.offset(levelmin + i, 2), - refh.size(levelmin + i, 0), - refh.size(levelmin + i, 1), - refh.size(levelmin + i, 2)); + fine = new PaddedDensitySubGrid( refh.offset(levelmin + i, 0), refh.offset(levelmin + i, 1), refh.offset(levelmin + i, 2), + refh.size(levelmin + i, 0), refh.size(levelmin + i, 1), refh.size(levelmin + i, 2)); LOGUSER(" margin = %d",refh.size(levelmin + i, 0)/2); } ///////////////////////////////////////////////////////////////////////// diff --git a/src/main.cc b/src/main.cc index 2f03d2f..3fe0f7c 100644 --- a/src/main.cc +++ b/src/main.cc @@ -421,7 +421,8 @@ int main(int argc, const char *argv[]) bool do_baryons = cf.getValue("setup", "baryons"), do_2LPT = cf.getValueSafe("setup", "use_2LPT", false), - do_LLA = cf.getValueSafe("setup", "use_LLA", false); + do_LLA = cf.getValueSafe("setup", "use_LLA", false), + do_counter_mode = cf.getValueSafe("setup", "zero_zoom_velocity", false); transfer_function_plugin *the_transfer_function_plugin = select_transfer_function_plugin(cf); @@ -460,7 +461,7 @@ int main(int argc, const char *argv[]) } the_region_generator = select_region_generator_plugin(cf); - + //------------------------------------------------------------------------------ //... determine run parameters //------------------------------------------------------------------------------ @@ -619,7 +620,7 @@ int main(int argc, const char *argv[]) //... compute counter-mode to minimize advection errors counter_mode_amp[icoord] = compute_finest_mean(data_forIO); - add_constant_value( data_forIO, -counter_mode_amp[icoord] ); + if( do_counter_mode ) add_constant_value( data_forIO, -counter_mode_amp[icoord] ); LOGUSER("Writing CDM displacements"); the_output_plugin->write_dm_position(icoord, data_forIO); @@ -756,7 +757,7 @@ int main(int argc, const char *argv[]) coarsen_density(rh_Poisson, data_forIO, false); // add counter velocity-mode - add_constant_value( data_forIO, -counter_mode_amp[icoord]*cosmo.vfact ); + if( do_counter_mode ) add_constant_value( data_forIO, -counter_mode_amp[icoord]*cosmo.vfact ); LOGUSER("Writing CDM velocities"); the_output_plugin->write_dm_velocity(icoord, data_forIO); @@ -826,7 +827,7 @@ int main(int argc, const char *argv[]) coarsen_density(rh_Poisson, data_forIO, false); // add counter velocity mode - add_constant_value( data_forIO, -counter_mode_amp[icoord]*cosmo.vfact ); + if( do_counter_mode ) add_constant_value( data_forIO, -counter_mode_amp[icoord]*cosmo.vfact ); LOGUSER("Writing CDM velocities"); the_output_plugin->write_dm_velocity(icoord, data_forIO); @@ -885,7 +886,7 @@ int main(int argc, const char *argv[]) coarsen_density(rh_Poisson, data_forIO, false); // add counter velocity mode - add_constant_value( data_forIO, -counter_mode_amp[icoord]*cosmo.vfact ); + if( do_counter_mode ) add_constant_value( data_forIO, -counter_mode_amp[icoord]*cosmo.vfact ); LOGUSER("Writing baryon velocities"); the_output_plugin->write_gas_velocity(icoord, data_forIO); @@ -1008,7 +1009,7 @@ int main(int argc, const char *argv[]) //... compute counter-mode to minimize advection errors counter_mode_amp[icoord] = compute_finest_mean(data_forIO); - add_constant_value( data_forIO, -counter_mode_amp[icoord] ); + if( do_counter_mode ) add_constant_value( data_forIO, -counter_mode_amp[icoord] ); LOGUSER("Writing CDM velocities"); the_output_plugin->write_dm_velocity(icoord, data_forIO); @@ -1105,7 +1106,7 @@ int main(int argc, const char *argv[]) coarsen_density(rh_Poisson, data_forIO, false); // add counter velocity mode - add_constant_value( data_forIO, -counter_mode_amp[icoord] ); + if( do_counter_mode ) add_constant_value( data_forIO, -counter_mode_amp[icoord] ); LOGUSER("Writing baryon velocities"); the_output_plugin->write_gas_velocity(icoord, data_forIO); @@ -1210,7 +1211,7 @@ int main(int argc, const char *argv[]) coarsen_density(rh_Poisson, data_forIO, false); // add counter mode - add_constant_value( data_forIO, -counter_mode_amp[icoord]/cosmo.vfact ); + if( do_counter_mode ) add_constant_value( data_forIO, -counter_mode_amp[icoord]/cosmo.vfact ); LOGUSER("Writing CDM displacements"); the_output_plugin->write_dm_position(icoord, data_forIO); @@ -1327,7 +1328,7 @@ int main(int argc, const char *argv[]) coarsen_density(rh_Poisson, data_forIO, false); // add counter mode - add_constant_value( data_forIO, -counter_mode_amp[icoord]/cosmo.vfact ); + if( do_counter_mode ) add_constant_value( data_forIO, -counter_mode_amp[icoord]/cosmo.vfact ); LOGUSER("Writing baryon displacements"); diff --git a/src/poisson.cc b/src/poisson.cc index c405f72..7569ecf 100644 --- a/src/poisson.cc +++ b/src/poisson.cc @@ -724,12 +724,16 @@ double fft_poisson_plugin::gradient(int dir, grid_hierarchy &u, grid_hierarchy & } #endif - /*double ktot = sqrt(ii*ii+jj*jj+k*k); - if( ktot >= nx/2 )//dir == 0 && i==nx/2 || dir == 1 && j==ny/2 || dir == 2 && k==nz/2 ) + if( (dir == 0 && i==nx/2) || (dir == 1 && j==ny/2) || (dir == 2 && k==nz/2) ) { +#ifdef FFTW3 + cdata[idx][0] = 0.0; + cdata[idx][1] = 0.0; +#else cdata[idx].re = 0.0; cdata[idx].im = 0.0; - }*/ +#endif + } } RE(cdata[0]) = 0.0; From ced852069e2e06f57fd12768f94db68b55240ed3 Mon Sep 17 00:00:00 2001 From: Oliver Hahn Date: Sun, 12 Feb 2023 22:38:50 -0800 Subject: [PATCH 4/5] WIP fixes/cleanup for MUSIC RNG --- src/plugins/random_music_wnoise_generator.cc | 175 ++++++++++-- src/plugins/random_music_wnoise_generator.hh | 282 +++++++++---------- 2 files changed, 292 insertions(+), 165 deletions(-) diff --git a/src/plugins/random_music_wnoise_generator.cc b/src/plugins/random_music_wnoise_generator.cc index f6426f0..7b19079 100644 --- a/src/plugins/random_music_wnoise_generator.cc +++ b/src/plugins/random_music_wnoise_generator.cc @@ -7,6 +7,157 @@ #include "random.hh" #include "random_music_wnoise_generator.hh" + +template +void rapid_proto_ngenic_rng(size_t res, long baseseed, music_wnoise_generator &R) +{ + LOGUSER("Invoking the N-GenIC random number generator"); + + unsigned *seedtable = new unsigned[res * res]; + + gsl_rng *random_generator = gsl_rng_alloc(gsl_rng_ranlxd1); + + gsl_rng_set(random_generator, baseseed); + + for (size_t i = 0; i < res / 2; i++) + { + size_t j; + for (j = 0; j < i; j++) + seedtable[i * res + j] = 0x7fffffff * gsl_rng_uniform(random_generator); + for (j = 0; j < i + 1; j++) + seedtable[j * res + i] = 0x7fffffff * gsl_rng_uniform(random_generator); + for (j = 0; j < i; j++) + seedtable[(res - 1 - i) * res + j] = 0x7fffffff * gsl_rng_uniform(random_generator); + for (j = 0; j < i + 1; j++) + seedtable[(res - 1 - j) * res + i] = 0x7fffffff * gsl_rng_uniform(random_generator); + for (j = 0; j < i; j++) + seedtable[i * res + (res - 1 - j)] = 0x7fffffff * gsl_rng_uniform(random_generator); + for (j = 0; j < i + 1; j++) + seedtable[j * res + (res - 1 - i)] = 0x7fffffff * gsl_rng_uniform(random_generator); + for (j = 0; j < i; j++) + seedtable[(res - 1 - i) * res + (res - 1 - j)] = 0x7fffffff * gsl_rng_uniform(random_generator); + for (j = 0; j < i + 1; j++) + seedtable[(res - 1 - j) * res + (res - 1 - i)] = 0x7fffffff * gsl_rng_uniform(random_generator); + } + + fftw_real *rnoise = new fftw_real[res * res * (res + 2)]; + fftw_complex *knoise = reinterpret_cast(rnoise); + + double fnorm = 1. / sqrt(res * res * res); + +// #warning need to check for race conditions below + //#pragma omp parallel for + for (size_t i = 0; i < res; i++) + { + int ii = (int)res - (int)i; + if (ii == (int)res) + ii = 0; + + for (size_t j = 0; j < res; j++) + { + gsl_rng_set(random_generator, seedtable[i * res + j]); + + for (size_t k = 0; k < res / 2; k++) + { + double phase = gsl_rng_uniform(random_generator) * 2 * M_PI; + double ampl; + do + ampl = gsl_rng_uniform(random_generator); + while (ampl == 0); + + if (i == res / 2 || j == res / 2 || k == res / 2) + continue; + if (i == 0 && j == 0 && k == 0) + continue; + + T rp = -sqrt(-log(ampl)) * cos(phase) * fnorm; + T ip = -sqrt(-log(ampl)) * sin(phase) * fnorm; + + if (k > 0) + { + RE(knoise[(i * res + j) * (res / 2 + 1) + k]) = rp; + IM(knoise[(i * res + j) * (res / 2 + 1) + k]) = ip; + } + else /* k=0 plane needs special treatment */ + { + if (i == 0) + { + if (j >= res / 2) + continue; + else + { + int jj = (int)res - (int)j; /* note: j!=0 surely holds at this point */ + + RE(knoise[(i * res + j) * (res / 2 + 1) + k]) = rp; + IM(knoise[(i * res + j) * (res / 2 + 1) + k]) = ip; + + RE(knoise[(i * res + jj) * (res / 2 + 1) + k]) = rp; + IM(knoise[(i * res + jj) * (res / 2 + 1) + k]) = -ip; + } + } + else + { + if (i >= res / 2) + continue; + else + { + int ii = (int)res - (int)i; + if (ii == (int)res) + ii = 0; + int jj = (int)res - (int)j; + if (jj == (int)res) + jj = 0; + + RE(knoise[(i * res + j) * (res / 2 + 1) + k]) = rp; + IM(knoise[(i * res + j) * (res / 2 + 1) + k]) = ip; + + if (ii >= 0 && ii < (int)res) + { + RE(knoise[(ii * res + jj) * (res / 2 + 1) + k]) = rp; + IM(knoise[(ii * res + jj) * (res / 2 + 1) + k]) = -ip; + } + } + } + } + } + } + } + + delete[] seedtable; + + //... perform FT to real space + +#ifdef FFTW3 +#ifdef SINGLE_PRECISION + fftwf_plan plan = fftwf_plan_dft_c2r_3d(res, res, res, knoise, rnoise, FFTW_ESTIMATE); + fftwf_execute(plan); + fftwf_destroy_plan(plan); +#else + fftw_plan plan = fftw_plan_dft_c2r_3d(res, res, res, knoise, rnoise, FFTW_ESTIMATE); + fftw_execute(plan); + fftw_destroy_plan(plan); +#endif +#else + rfftwnd_plan plan = rfftw3d_create_plan(res, res, res, FFTW_COMPLEX_TO_REAL, FFTW_ESTIMATE | FFTW_IN_PLACE); +#ifndef SINGLETHREAD_FFTW + rfftwnd_threads_one_complex_to_real(omp_get_max_threads(), plan, knoise, NULL); +#else + rfftwnd_one_complex_to_real(plan, knoise, NULL); +#endif + rfftwnd_destroy_plan(plan); +#endif + + // copy to array that holds the random numbers + +#pragma omp parallel for + for (int i = 0; i < (int)res; ++i) + for (size_t j = 0; j < res; ++j) + for (size_t k = 0; k < res; ++k) + R(i, j, k) = rnoise[((size_t)i * res + j) * res + k]; + + delete[] rnoise; +} + template music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx) : res_(res), cubesize_(cubesize), ncubes_(1), baseseed_(baseseed) @@ -42,22 +193,8 @@ music_wnoise_generator::music_wnoise_generator(unsigned res, unsigned cubesiz rnums_.push_back(new Meshvar(res, 0, 0, 0)); cubemap_[0] = 0; // create dummy map index register_cube(0, 0, 0); - // rapid_proto_ngenic_rng( res_, baseseed_, *this ); - } - - /* - - if( musicnoise ) - mean = fill_all(); - else - { - rnums_.push_back( new Meshvar( res, 0, 0, 0 ) ); - cubemap_[0] = 0; // create dummy map index - register_cube(0,0,0); rapid_proto_ngenic_rng( res_, baseseed_, *this ); - } - - */ + } if (zeromean) { @@ -847,8 +984,7 @@ double music_wnoise_generator::fill_all(void) register_cube(ii, jj, kk); } -#pragma omp parallel for reduction(+ \ - : sum) + #pragma omp parallel for reduction(+ : sum) for (int i = 0; i < (int)ncubes_; ++i) for (int j = 0; j < (int)ncubes_; ++j) for (int k = 0; k < (int)ncubes_; ++k) @@ -862,9 +998,8 @@ double music_wnoise_generator::fill_all(void) sum += fill_cube(ii, jj, kk); } -//... subtract mean -#pragma omp parallel for reduction(+ \ - : sum) + //... subtract mean + #pragma omp parallel for reduction(+ : sum) for (int i = 0; i < (int)ncubes_; ++i) for (int j = 0; j < (int)ncubes_; ++j) for (int k = 0; k < (int)ncubes_; ++k) diff --git a/src/plugins/random_music_wnoise_generator.hh b/src/plugins/random_music_wnoise_generator.hh index 28a2977..10b1555 100644 --- a/src/plugins/random_music_wnoise_generator.hh +++ b/src/plugins/random_music_wnoise_generator.hh @@ -1,205 +1,197 @@ -#ifndef __RANDOM_MUSIC_WNOISE_GENERATOR_HH -#define __RANDOM_MUSIC_WNOISE_GENERATOR_HH +#pragma once #include #include #include "general.hh" #include "mesh.hh" -#define DEF_RAN_CUBE_SIZE 32 +#define DEF_RAN_CUBE_SIZE 32 /*! * @brief encapsulates all things random number generator related */ -template< typename T > +template class music_wnoise_generator { public: - unsigned - res_, //!< resolution of the full mesh - cubesize_, //!< size of one independent random number cube - ncubes_; //!< number of random number cubes to cover the full mesh - long baseseed_; //!< base seed from which cube seeds are computed - + unsigned + res_, //!< resolution of the full mesh + cubesize_, //!< size of one independent random number cube + ncubes_; //!< number of random number cubes to cover the full mesh + long baseseed_; //!< base seed from which cube seeds are computed + protected: //! vector of 3D meshes (the random number cubes) with random numbers - std::vector< Meshvar* > rnums_; - + std::vector *> rnums_; + //! map of 3D indices to cube index - std::map cubemap_; - - typedef std::map::iterator cubemap_iterator; - + std::map cubemap_; + + typedef std::map::iterator cubemap_iterator; + protected: - //! register a cube with the hash map - void register_cube( int i, int j, int k); - + void register_cube(int i, int j, int k); + //! fills a subcube with random numbers - double fill_cube( int i, int j, int k); - + double fill_cube(int i, int j, int k); + //! subtract a constant from an entire cube - void subtract_from_cube( int i, int j, int k, double val ); - + void subtract_from_cube(int i, int j, int k, double val); + //! copy random numbers from a cube to a full grid array - template< class C > - void copy_cube( int i, int j, int k, C& dat ) + template + void copy_cube(int i, int j, int k, C &dat) { int offi, offj, offk; - - offi = i*cubesize_; - offj = j*cubesize_; - offk = k*cubesize_; - - i = (i+ncubes_)%ncubes_; - j = (j+ncubes_)%ncubes_; - k = (k+ncubes_)%ncubes_; - - size_t icube = (i*ncubes_+j)*ncubes_+k; - cubemap_iterator it = cubemap_.find( icube ); - - if( it == cubemap_.end() ) - { - LOGERR("attempting to copy data from non-existing RND cube %d,%d,%d",i,j,k); - throw std::runtime_error("attempting to copy data from non-existing RND cube"); - } - + + offi = i * cubesize_; + offj = j * cubesize_; + offk = k * cubesize_; + + i = (i + ncubes_) % ncubes_; + j = (j + ncubes_) % ncubes_; + k = (k + ncubes_) % ncubes_; + + size_t icube = (i * ncubes_ + j) * ncubes_ + k; + cubemap_iterator it = cubemap_.find(icube); + + if (it == cubemap_.end()) + { + LOGERR("attempting to copy data from non-existing RND cube %d,%d,%d", i, j, k); + throw std::runtime_error("attempting to copy data from non-existing RND cube"); + } + size_t cubeidx = it->second; - - for( int ii=0; ii<(int)cubesize_; ++ii ) - for( int jj=0; jj<(int)cubesize_; ++jj ) - for( int kk=0; kk<(int)cubesize_; ++kk ) - dat(offi+ii,offj+jj,offk+kk) = (*rnums_[cubeidx])(ii,jj,kk); + + for (int ii = 0; ii < (int)cubesize_; ++ii) + for (int jj = 0; jj < (int)cubesize_; ++jj) + for (int kk = 0; kk < (int)cubesize_; ++kk) + dat(offi + ii, offj + jj, offk + kk) = (*rnums_[cubeidx])(ii, jj, kk); } - + //! free the memory associated with a subcube - void free_cube( int i, int j, int k ); - + void free_cube(int i, int j, int k); + //! initialize member variables and allocate memory - void initialize( void ); - + void initialize(void); + //! fill a cubic subvolume of the full grid with random numbers - double fill_subvolume( int *i0, int *n ); - + double fill_subvolume(int *i0, int *n); + //! fill an entire grid with random numbers - double fill_all( void ); - + double fill_all(void); + //! fill an external array instead of the internal field - template< class C > - double fill_all( C& dat ) + template + double fill_all(C &dat) { double sum = 0.0; - - for( int i=0; i<(int)ncubes_; ++i ) - for( int j=0; j<(int)ncubes_; ++j ) - for( int k=0; k<(int)ncubes_; ++k ) - { - int ii(i),jj(j),kk(k); - register_cube(ii,jj,kk); - } - - #pragma omp parallel for reduction(+:sum) - for( int i=0; i<(int)ncubes_; ++i ) - for( int j=0; j<(int)ncubes_; ++j ) - for( int k=0; k<(int)ncubes_; ++k ) - { - int ii(i),jj(j),kk(k); - - ii = (ii+ncubes_)%ncubes_; - jj = (jj+ncubes_)%ncubes_; - kk = (kk+ncubes_)%ncubes_; - - sum+=fill_cube(ii, jj, kk); - copy_cube(ii,jj,kk,dat); - free_cube(ii, jj, kk); - } - - return sum/(ncubes_*ncubes_*ncubes_); + + for (int i = 0; i < (int)ncubes_; ++i) + for (int j = 0; j < (int)ncubes_; ++j) + for (int k = 0; k < (int)ncubes_; ++k) + { + int ii(i), jj(j), kk(k); + register_cube(ii, jj, kk); + } + +#pragma omp parallel for reduction(+ \ + : sum) + for (int i = 0; i < (int)ncubes_; ++i) + for (int j = 0; j < (int)ncubes_; ++j) + for (int k = 0; k < (int)ncubes_; ++k) + { + int ii(i), jj(j), kk(k); + + ii = (ii + ncubes_) % ncubes_; + jj = (jj + ncubes_) % ncubes_; + kk = (kk + ncubes_) % ncubes_; + + sum += fill_cube(ii, jj, kk); + copy_cube(ii, jj, kk, dat); + free_cube(ii, jj, kk); + } + + return sum / (ncubes_ * ncubes_ * ncubes_); } - + //! write the number of allocated random number cubes to stdout - void print_allocated( void ); - + void print_allocated(void); + public: - //! constructor - music_wnoise_generator( unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx ); - + music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, int *x0, int *lx); + //! constructor for constrained fine field - music_wnoise_generator( music_wnoise_generator& rc, unsigned cubesize, long baseseed, int *x0_=NULL, int *lx_=NULL, bool zeromean=true ); - + music_wnoise_generator(music_wnoise_generator &rc, unsigned cubesize, long baseseed, int *x0_ = NULL, int *lx_ = NULL, bool zeromean = true); + //! constructor - music_wnoise_generator( unsigned res, unsigned cubesize, long baseseed, bool zeromean=true ); - - + music_wnoise_generator(unsigned res, unsigned cubesize, long baseseed, bool zeromean = true); + //! constructor to read white noise from file - music_wnoise_generator( unsigned res, std::string randfname, bool rndsign ); - - + music_wnoise_generator(unsigned res, std::string randfname, bool rndsign); + //! copy constructor for averaged field (not copying) hence explicit! - explicit music_wnoise_generator( /*const*/ music_wnoise_generator & rc ); - + explicit music_wnoise_generator(/*const*/ music_wnoise_generator &rc); + //! destructor ~music_wnoise_generator() { - for( unsigned i=0; isecond; - - if( rnums_[ cubeidx ] == NULL ) - { - LOGERR("Attempting to access data from non-allocated RND cube %d,%d,%d",ic,jc,kc); - throw std::runtime_error("attempting to access data from non-allocated RND cube"); - } - + + if (rnums_[cubeidx] == NULL) + { + LOGERR("Attempting to access data from non-allocated RND cube %d,%d,%d", ic, jc, kc); + throw std::runtime_error("attempting to access data from non-allocated RND cube"); + } + //... determine cell in cube is = (i - ic * cubesize_ + cubesize_) % cubesize_; js = (j - jc * cubesize_ + cubesize_) % cubesize_; ks = (k - kc * cubesize_ + cubesize_) % cubesize_; - - return (*rnums_[ cubeidx ])(is,js,ks); + + return (*rnums_[cubeidx])(is, js, ks); } - + //! free all cubes - void free_all_mem( void ) + void free_all_mem(void) { - for( unsigned i=0; i Date: Mon, 13 Feb 2023 15:07:54 -0800 Subject: [PATCH 5/5] WIP panphasia field now agrees within some errors with the previous result. --- src/convolution_kernel.cc | 3 +- src/densities.cc | 14 ++++-- src/mesh.hh | 89 +++++++++++++++++++++++++-------- src/plugins/random_music.cc | 2 + src/plugins/random_panphasia.cc | 29 ++++++++--- 5 files changed, 103 insertions(+), 34 deletions(-) diff --git a/src/convolution_kernel.cc b/src/convolution_kernel.cc index 3aa1f3d..c766e37 100644 --- a/src/convolution_kernel.cc +++ b/src/convolution_kernel.cc @@ -34,7 +34,8 @@ void perform(kernel *pk, void *pd, bool shift, bool fix, bool flip) double fftnormp = 1.0/sqrt((double)cparam_.nx * (double)cparam_.ny * (double)cparam_.nz); double fftnorm = pow(2.0 * M_PI, 1.5) / sqrt(cparam_.lx * cparam_.ly * cparam_.lz) * fftnormp; - fftw_complex *cdata, *ckernel; + fftw_complex *cdata; + [[maybe_unused]] fftw_complex *ckernel; fftw_real *data; data = reinterpret_cast(pd); diff --git a/src/densities.cc b/src/densities.cc index 589fe33..5ce7a35 100644 --- a/src/densities.cc +++ b/src/densities.cc @@ -445,6 +445,7 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t bool fix = cf.getValueSafe("setup","fix_mode_amplitude",false); bool flip = cf.getValueSafe("setup","flip_mode_amplitude",false); + bool fourier_splicing = cf.getValueSafe("setup","fourier_splicing",true); if( fix && levelmin != levelmax ){ LOGWARN("You have chosen mode fixing for a zoom. This is not well tested,\n please proceed at your own risk..."); @@ -511,10 +512,12 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t convolution::perform(the_tf_kernel->fetch_kernel(levelmin + i, true), reinterpret_cast(fine->get_data_ptr()), shift, fix, flip); - if (i == 1) - fft_interpolate(*top, *fine, true); - else - fft_interpolate(*coarse, *fine, false); + if( fourier_splicing ){ + if (i == 1) + fft_interpolate(*top, *fine, true); + else + fft_interpolate(*coarse, *fine, false); + } delta.add_patch(refh.offset(levelmin + i, 0), refh.offset(levelmin + i, 1), @@ -548,6 +551,9 @@ void GenerateDensityHierarchy(config_file &cf, transfer_function *ptf, tf_type t std::cout << " - Density calculation took " << tend - tstart << "s." << std::endl; #endif + if( !fourier_splicing ){ + coarsen_density(refh,delta,false); + } LOGUSER("Finished computing the density field in %fs", tend - tstart); } diff --git a/src/mesh.hh b/src/mesh.hh index efb82a5..744be4d 100644 --- a/src/mesh.hh +++ b/src/mesh.hh @@ -1208,11 +1208,12 @@ class refinement_hierarchy std::vector len_; unsigned - levelmin_, //!< minimum grid level for Poisson solver - levelmax_, //!< maximum grid level for all operations - levelmin_tf_, //!< minimum grid level for density calculation - padding_, //!< padding in number of coarse cells between refinement levels - blocking_factor_; + levelmin_, //!< minimum grid level for Poisson solver + levelmax_, //!< maximum grid level for all operations + levelmin_tf_, //!< minimum grid level for density calculation + padding_, //!< padding in number of coarse cells between refinement levels + blocking_factor_, //!< blocking factor of grids, necessary fo BoxLib codes such as NyX + gridding_unit_; //!< internal blocking factor of grids, necessary for Panphasia int margin_; //!< number of cells used for additional padding for convolutions with isolated boundaries (-1 = double padding) @@ -1231,6 +1232,30 @@ class refinement_hierarchy index3_t xshift_; //!< shift of refinement region in coarse cells (in order to center it in the domain) double rshift_[3]; + //! calculates the greatest common divisor + int gcd(int a, int b) const { + return b == 0 ? a : gcd(b, a % b); + } + + // calculates the cell shift in units of levelmin grid cells if there is an additional constraint to be + // congruent with another grid that partitions the same space in multiples of "base_unit" + int get_shift_unit( int base_unit, int levelmin ) const { + /*int Lp = 0; + while( base_unit * (1<( 1, (1<<(levelmin+1)) / (2*gcd(U,1<<(levelmin+1) )) );*/ + + int level_m = 0; + while( base_unit * (1<( 1, (1<("setup", "no_shift", false); bool force_shift = cf_.getValueSafe("setup", "force_shift", false); + gridding_unit_ = cf.getValueSafe("setup", "gridding_unit", 2); + + if (gridding_unit_ != 2 && blocking_factor_==0) { + blocking_factor_ = gridding_unit_; // THIS WILL LIKELY CAUSE PROBLEMS WITH NYX + }else if (gridding_unit_ != 2 && blocking_factor_!=0 && gridding_unit_!=blocking_factor_ ) { + LOGERR("incompatible gridding unit %d and blocking factor specified", gridding_unit_, blocking_factor_ ); + throw std::runtime_error("Incompatible gridding unit and blocking factor!"); + } + + //... call the region generator if (levelmin_ != levelmax_) { @@ -1292,9 +1327,18 @@ public: if ((levelmin_ != levelmax_) && (!bnoshift || force_shift)) { - xshift_[0] = (int)((0.5 - xc[0]) * ncoarse); - xshift_[1] = (int)((0.5 - xc[1]) * ncoarse); - xshift_[2] = (int)((0.5 - xc[2]) * ncoarse); + int random_base_grid_unit = cf.getValueSafe("random","base_unit",1); + int shift_unit = get_shift_unit( random_base_grid_unit, levelmin_ ); + if( shift_unit != 1 ){ + LOGINFO("volume can only be shifted by multiples of %d coarse cells.",shift_unit); + } + xshift_[0] = (int)((0.5-xc[0]) * (double)ncoarse / shift_unit + 0.5) * shift_unit;//ARJ(int)((0.5 - xc[0]) * ncoarse); + xshift_[1] = (int)((0.5-xc[1]) * (double)ncoarse / shift_unit + 0.5) * shift_unit;//ARJ(int)((0.5 - xc[1]) * ncoarse); + xshift_[2] = (int)((0.5-xc[2]) * (double)ncoarse / shift_unit + 0.5) * shift_unit;//ARJ(int)((0.5 - xc[2]) * ncoarse); + + // xshift_[0] = (int)((0.5 - xc[0]) * ncoarse); + // xshift_[1] = (int)((0.5 - xc[1]) * ncoarse); + // xshift_[2] = (int)((0.5 - xc[2]) * ncoarse); } else { @@ -1414,12 +1458,15 @@ public: else { //... require alignment with coarser grid - il -= il % 2; - jl -= jl % 2; - kl -= kl % 2; - ir += ir % 2; - jr += jr % 2; - kr += kr % 2; + LOGINFO("Internal refinement bounding box error: [%d,%d]x[%d,%d]x[%d,%d]", il, ir, jl, jr, kl, kr); + + il -= il % gridding_unit_; + jl -= jl % gridding_unit_; + kl -= kl % gridding_unit_; + + ir = ((ir%gridding_unit_)!=0)? (ir/gridding_unit_ + 1)*gridding_unit_ : ir; + jr = ((jr%gridding_unit_)!=0)? (jr/gridding_unit_ + 1)*gridding_unit_ : jr; + kr = ((kr%gridding_unit_)!=0)? (kr/gridding_unit_ + 1)*gridding_unit_ : kr; } // if doing unigrid, set region to whole box @@ -1511,7 +1558,6 @@ public: jr = (int)((double)jr / nref + 1.0) * nref; kr = (int)((double)kr / nref + 1.0) * nref; } - else if (preserve_dims_) { //... require alignment with coarser grid @@ -1528,12 +1574,13 @@ public: else { //... require alignment with coarser grid - il -= il % 2; - jl -= jl % 2; - kl -= kl % 2; - ir += ir % 2; - jr += jr % 2; - kr += kr % 2; + il -= il % gridding_unit_; + jl -= jl % gridding_unit_; + kl -= kl % gridding_unit_; + + ir = ((ir%gridding_unit_)!=0)? (ir/gridding_unit_ + 1)*gridding_unit_ : ir; + jr = ((jr%gridding_unit_)!=0)? (jr/gridding_unit_ + 1)*gridding_unit_ : jr; + kr = ((kr%gridding_unit_)!=0)? (kr/gridding_unit_ + 1)*gridding_unit_ : kr; } if (il >= ir || jl >= jr || kl >= kr || il < 0 || jl < 0 || kl < 0) diff --git a/src/plugins/random_music.cc b/src/plugins/random_music.cc index 0bfdf6b..035288e 100644 --- a/src/plugins/random_music.cc +++ b/src/plugins/random_music.cc @@ -50,6 +50,8 @@ public: disk_cached_ = pcf_->getValueSafe("random", "disk_cached", true); restart_ = pcf_->getValueSafe("random", "restart", false); + pcf_->insertValue("setup","fourier_splicing","true"); + mem_cache_.assign(levelmax_ - levelmin_ + 1, (std::vector *)NULL); if (restart_ && !disk_cached_) diff --git a/src/plugins/random_panphasia.cc b/src/plugins/random_panphasia.cc index 53dc456..0fa08fd 100644 --- a/src/plugins/random_panphasia.cc +++ b/src/plugins/random_panphasia.cc @@ -92,6 +92,7 @@ protected: int coordinate_system_shift_[3]; int ix_abs_[3], ix_per_[3], ix_rel_[3], level_p_, lextra_; const refinement_hierarchy *prefh_; + std::array margins_; struct panphasia_descriptor { @@ -180,6 +181,8 @@ public: ss.str(std::string()); ss << pdescriptor_->i_base; pcf_->insertValue("random", "base_unit", ss.str()); + + pcf_->insertValue("setup","fourier_splicing","false"); } void initialize_for_grid_structure(const refinement_hierarchy &refh) @@ -189,6 +192,12 @@ public: levelmin_final_ = pcf_->getValue("setup", "levelmin"); levelmax_ = prefh_->levelmax(); + if( refh.get_margin() < 0 ){ + margins_ = {-1,-1,-1}; + }else{ + margins_ = { refh.get_margin(), refh.get_margin(), refh.get_margin() }; + } + clear_panphasia_thread_states(); LOGINFO("PANPHASIA: running with %d threads", num_threads_); @@ -318,7 +327,13 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) } else { - ileft_corner[k] = (ileft[k] - nx[k] / 4 + (1 << level)) % (1 << level); // Isolated + if( margins_[0] < 0 ){ + ileft_corner[k] = (ileft[k] - nx[k] / 4 + (1 << level)) % (1 << level); // Isolated + ileft_corner[k] = (ileft[k] - nx[k] / 4 + (1 << level)) % (1 << level); // Isolated + }else{ + ileft_corner[k] = (ileft[k] - margins_[k] + (1 << level)) % (1 << level); // Isolated + ileft_corner[k] = (ileft[k] - margins_[k] + (1 << level)) % (1 << level); // Isolated + } } iexpand_left[k] = (ileft_corner[k] % grid_m_ == 0) ? 0 : ileft_corner[k] % grid_m_; // fprintf(stderr, "dim=%c : ileft = %d, ileft_corner %d, nx = %d\n", 'x' + k, ileft[k],ileft_corner[k],nx[k]); @@ -349,7 +364,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) LOGERR("Fatal error: non-cubic refinement being requested"); inter_grid_phase_adjustment_ = M_PI * (1.0 / (double)nx_m[0] - 1.0 / (double)nxremap[0]); - LOGUSER("The value of the phase adjustement is %f\n", inter_grid_phase_adjustment_); + // LOGINFO("The value of the phase adjustement is %f\n", inter_grid_phase_adjustment_); // LOGINFO("ileft[0],ileft[1],ileft[2] %d %d %d", ileft[0], ileft[1], ileft[2]); // LOGINFO("ileft_corner[0,1,2] %d %d %d", ileft_corner[0], ileft_corner[1], ileft_corner[2]); @@ -425,8 +440,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], &verbosity); - LOGUSER(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], - ix_rel[1], ix_rel[2]); + // LOGUSER(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], ix_rel[1], ix_rel[2]); odd_x = ix_rel[0] % 2; odd_y = ix_rel[1] % 2; @@ -620,8 +634,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) set_phases_and_rel_origin_(&lstate[mythread], descriptor, &level_p, &ix_rel[0], &ix_rel[1], &ix_rel[2], &verbosity); - LOGUSER(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], - ix_rel[1], ix_rel[2]); + // LOGINFO(" called set_phases_and_rel_origin level %d ix_rel iy_rel iz_rel %d %d %d\n", level_p, ix_rel[0], ix_rel[1], ix_rel[2]); odd_x = ix_rel[0] % 2; odd_y = ix_rel[1] % 2; @@ -769,7 +782,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) if (incongruent_fields_) { - LOGINFO("Remapping fields from dimension %d -> %d", nxremap[0], nx_m[0]); + LOGUSER("Remapping fields from dimension %d -> %d", nxremap[0], nx_m[0]); memset(pr1, 0, ngp * sizeof(fftw_real)); #pragma omp parallel for @@ -824,7 +837,7 @@ void RNG_panphasia::fill_grid(int level, DensityGrid &R) delete[] pr3; delete[] pr4; - LOGINFO("Copying random field data %d,%d,%d -> %d,%d,%d", nxremap[0], nxremap[1], nxremap[2], nx[0], nx[1], nx[2]); + LOGUSER("Copying random field data %d,%d,%d -> %d,%d,%d", nxremap[0], nxremap[1], nxremap[2], nx[0], nx[1], nx[2]); // n = 1<