mirror of
https://github.com/cosmo-sims/monofonIC.git
synced 2024-09-18 15:53:45 +02:00
added reporting of memory high marks
This commit is contained in:
parent
c9cfbf3721
commit
8b267f07be
5 changed files with 176 additions and 15 deletions
|
@ -185,6 +185,8 @@ inline void multitask_sync_barrier(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
extern size_t global_mem_high_mark, local_mem_high_mark;
|
||||
|
||||
namespace CONFIG
|
||||
{
|
||||
extern int MPI_thread_support;
|
||||
|
|
125
include/memory_stat.hh
Normal file
125
include/memory_stat.hh
Normal file
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Author: David Robert Nadeau
|
||||
* Site: http://NadeauSoftware.com/
|
||||
* License: Creative Commons Attribution 3.0 Unported License
|
||||
* http://creativecommons.org/licenses/by/3.0/deed.en_US
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
namespace memory
|
||||
{
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#include <psapi.h>
|
||||
|
||||
#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
|
||||
#include <unistd.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
#include <mach/mach.h>
|
||||
|
||||
#elif (defined(_AIX) || defined(__TOS__AIX__)) || (defined(__sun__) || defined(__sun) || defined(sun) && (defined(__SVR4) || defined(__svr4__)))
|
||||
#include <fcntl.h>
|
||||
#include <procfs.h>
|
||||
|
||||
#elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__)
|
||||
#include <stdio.h>
|
||||
|
||||
#endif
|
||||
|
||||
#else
|
||||
#error "Cannot define getPeakRSS( ) or getCurrentRSS( ) for an unknown OS."
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Returns the peak (maximum so far) resident set size (physical
|
||||
* memory use) measured in bytes, or zero if the value cannot be
|
||||
* determined on this OS.
|
||||
*/
|
||||
inline size_t getPeakRSS( )
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
/* Windows -------------------------------------------------- */
|
||||
PROCESS_MEMORY_COUNTERS info;
|
||||
GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
|
||||
return (size_t)info.PeakWorkingSetSize;
|
||||
|
||||
#elif (defined(_AIX) || defined(__TOS__AIX__)) || (defined(__sun__) || defined(__sun) || defined(sun) && (defined(__SVR4) || defined(__svr4__)))
|
||||
/* AIX and Solaris ------------------------------------------ */
|
||||
struct psinfo psinfo;
|
||||
int fd = -1;
|
||||
if ( (fd = open( "/proc/self/psinfo", O_RDONLY )) == -1 )
|
||||
return (size_t)0L; /* Can't open? */
|
||||
if ( read( fd, &psinfo, sizeof(psinfo) ) != sizeof(psinfo) )
|
||||
{
|
||||
close( fd );
|
||||
return (size_t)0L; /* Can't read? */
|
||||
}
|
||||
close( fd );
|
||||
return (size_t)(psinfo.pr_rssize * 1024L);
|
||||
|
||||
#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
|
||||
/* BSD, Linux, and OSX -------------------------------------- */
|
||||
struct rusage rusage;
|
||||
getrusage( RUSAGE_SELF, &rusage );
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
return (size_t)rusage.ru_maxrss;
|
||||
#else
|
||||
return (size_t)(rusage.ru_maxrss * 1024L);
|
||||
#endif
|
||||
|
||||
#else
|
||||
/* Unknown OS ----------------------------------------------- */
|
||||
return (size_t)0L; /* Unsupported. */
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Returns the current resident set size (physical memory use) measured
|
||||
* in bytes, or zero if the value cannot be determined on this OS.
|
||||
*/
|
||||
inline size_t getCurrentRSS( )
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
/* Windows -------------------------------------------------- */
|
||||
PROCESS_MEMORY_COUNTERS info;
|
||||
GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
|
||||
return (size_t)info.WorkingSetSize;
|
||||
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
/* OSX ------------------------------------------------------ */
|
||||
struct mach_task_basic_info info;
|
||||
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
|
||||
if ( task_info( mach_task_self( ), MACH_TASK_BASIC_INFO,
|
||||
(task_info_t)&info, &infoCount ) != KERN_SUCCESS )
|
||||
return (size_t)0L; /* Can't access? */
|
||||
return (size_t)info.resident_size;
|
||||
|
||||
#elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__)
|
||||
/* Linux ---------------------------------------------------- */
|
||||
long rss = 0L;
|
||||
FILE* fp = NULL;
|
||||
if ( (fp = fopen( "/proc/self/statm", "r" )) == NULL )
|
||||
return (size_t)0L; /* Can't open? */
|
||||
if ( fscanf( fp, "%*s%ld", &rss ) != 1 )
|
||||
{
|
||||
fclose( fp );
|
||||
return (size_t)0L; /* Can't read? */
|
||||
}
|
||||
fclose( fp );
|
||||
return (size_t)rss * (size_t)sysconf( _SC_PAGESIZE);
|
||||
|
||||
#else
|
||||
/* AIX, BSD, Solaris, and Unknown OS ------------------------ */
|
||||
return (size_t)0L; /* Unsupported. */
|
||||
#endif
|
||||
}
|
||||
|
||||
};
|
|
@ -19,6 +19,24 @@
|
|||
#include <grid_fft.hh>
|
||||
#include <thread>
|
||||
|
||||
#include "memory_stat.hh"
|
||||
|
||||
void memory_report(void)
|
||||
{
|
||||
//... report memory usage
|
||||
size_t curr_mem_high_mark = 0;
|
||||
local_mem_high_mark = memory::getCurrentRSS();
|
||||
#if defined(USE_MPI)
|
||||
MPI_Allreduce(&local_mem_high_mark, &curr_mem_high_mark, 1, MPI_UNSIGNED_LONG_LONG, MPI_MAX, MPI_COMM_WORLD);
|
||||
#else
|
||||
curr_mem_high_mark = local_mem_high_mark;
|
||||
#endif
|
||||
if( curr_mem_high_mark > 1.1*global_mem_high_mark ){
|
||||
music::ilog << "----mem-> new memory high mark: " << curr_mem_high_mark/(1ull<<20) << " MBytes / task" << std::endl;
|
||||
global_mem_high_mark = curr_mem_high_mark;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename data_t, bool bdistributed>
|
||||
void Grid_FFT<data_t, bdistributed>::allocate(void)
|
||||
{
|
||||
|
@ -175,6 +193,7 @@ void Grid_FFT<data_t, bdistributed>::allocate(void)
|
|||
#endif //// of #ifdef #else USE_MPI ////////////////////////////////////////////////////////////////////////////////////
|
||||
}
|
||||
ballocated_ = true;
|
||||
memory_report();
|
||||
}
|
||||
|
||||
template <typename data_t, bool bdistributed>
|
||||
|
|
|
@ -354,10 +354,10 @@ int run( config_file& the_config )
|
|||
// phi = - delta / k^2
|
||||
|
||||
music::ilog << "-------------------------------------------------------------------------------" << std::endl;
|
||||
music::ilog << "Generating LPT fields...." << std::endl;
|
||||
music::ilog << "\n>>> Generating LPT fields.... <<<\n" << std::endl;
|
||||
|
||||
double wtime = get_wtime();
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing phi(1) term" << std::endl;
|
||||
|
||||
phi.FourierTransformForward(false);
|
||||
phi.assign_function_of_grids_kdep([&](auto k, auto wn) {
|
||||
|
@ -368,7 +368,7 @@ int run( config_file& the_config )
|
|||
|
||||
phi.zero_DC_mode();
|
||||
|
||||
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
|
||||
music::ilog << "----cpu-> phi(1) took " << get_wtime() - wtime << "s" << std::endl;
|
||||
|
||||
//======================================================================
|
||||
//... compute 2LPT displacement potential ....
|
||||
|
@ -379,7 +379,7 @@ int run( config_file& the_config )
|
|||
phi2.FourierTransformForward(false);
|
||||
|
||||
wtime = get_wtime();
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing phi(2) term" << std::endl;
|
||||
Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2));
|
||||
Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2));
|
||||
Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, op::subtract_from(phi2));
|
||||
|
@ -398,7 +398,7 @@ int run( config_file& the_config )
|
|||
}
|
||||
|
||||
phi2.apply_InverseLaplacian();
|
||||
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
|
||||
music::ilog << "----cpu-> phi(2) took " << get_wtime() - wtime << "s" << std::endl;
|
||||
|
||||
if (bAddExternalTides)
|
||||
{
|
||||
|
@ -419,19 +419,18 @@ int run( config_file& the_config )
|
|||
//... phi3 = phi3a - 10/7 phi3b
|
||||
//... 3a term ...
|
||||
wtime = get_wtime();
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing phi(3a) term" << std::endl;
|
||||
Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3));
|
||||
Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::multiply_add_to(phi3,2.0));
|
||||
Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3));
|
||||
Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3));
|
||||
Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3));
|
||||
// phi3a.apply_InverseLaplacian();
|
||||
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
|
||||
music::ilog << "----cpu-> phi(3a) took " << get_wtime() - wtime << "s" << std::endl;
|
||||
|
||||
//... 3b term ...
|
||||
wtime = get_wtime();
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
|
||||
// phi3b.FourierTransformForward(false);
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing phi(3b) term" << std::endl;
|
||||
Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::multiply_add_to(phi3,-5.0/7.0));
|
||||
Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::multiply_add_to(phi3,-5.0/7.0));
|
||||
Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::multiply_add_to(phi3,-5.0/7.0));
|
||||
|
@ -439,12 +438,11 @@ int run( config_file& the_config )
|
|||
Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::multiply_add_to(phi3,+10.0/7.0));
|
||||
Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::multiply_add_to(phi3,+10.0/7.0));
|
||||
phi3.apply_InverseLaplacian();
|
||||
//phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
|
||||
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
|
||||
music::ilog << "----cpu-> phi(3b) took " << get_wtime() - wtime << "s" << std::endl;
|
||||
|
||||
//... transversal term ...
|
||||
wtime = get_wtime();
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
|
||||
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing A(3) term" << std::endl;
|
||||
for (int idim = 0; idim < 3; ++idim)
|
||||
{
|
||||
// cyclic rotations of indices
|
||||
|
@ -457,7 +455,7 @@ int run( config_file& the_config )
|
|||
Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim]));
|
||||
A3[idim]->apply_InverseLaplacian();
|
||||
}
|
||||
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
|
||||
music::ilog << "----cpu-> A(3) took " << get_wtime() - wtime << "s" << std::endl;
|
||||
}
|
||||
|
||||
///... scale all potentials with respective growth factors
|
||||
|
|
21
src/main.cc
21
src/main.cc
|
@ -43,8 +43,11 @@ bool FFTW_threads_ok = false;
|
|||
int num_threads = 1;
|
||||
}
|
||||
|
||||
size_t global_mem_high_mark, local_mem_high_mark;
|
||||
|
||||
#include "system_stat.hh"
|
||||
#include "memory_stat.hh"
|
||||
|
||||
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
|
@ -76,6 +79,8 @@ int main( int argc, char** argv )
|
|||
music::logger::set_level(music::log_level::debug);
|
||||
#endif
|
||||
|
||||
global_mem_high_mark = local_mem_high_mark = 0;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// initialise MPI
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -259,13 +264,25 @@ int main( int argc, char** argv )
|
|||
ic_generator::reset();
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
music::ilog << "-------------------------------------------------------------------------------" << std::endl;
|
||||
size_t peak_mem = memory::getPeakRSS();
|
||||
#if defined(USE_MPI)
|
||||
size_t peak_mem_max{0};
|
||||
MPI_Allreduce(&peak_mem, &peak_mem_max, 1, MPI_UNSIGNED_LONG_LONG, MPI_MAX, MPI_COMM_WORLD);
|
||||
peak_mem = peak_mem_max;
|
||||
#endif
|
||||
|
||||
if( peak_mem > (1ull<<30) )
|
||||
music::ilog << "----mem-> peak memory usage was " << peak_mem /(1ull<<30) << " GBytes / task" << std::endl;
|
||||
else
|
||||
music::ilog << "----mem-> peak memory usage was " << peak_mem /(1ull<<20) << " MBytes / task" << std::endl;
|
||||
|
||||
|
||||
#if defined(USE_MPI)
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
|
||||
music::ilog << "-------------------------------------------------------------------------------" << std::endl;
|
||||
|
||||
music::ilog << "Done. Have a nice day!\n" << std::endl;
|
||||
|
||||
return 0;
|
||||
|
|
Loading…
Reference in a new issue