1
0
Fork 0
mirror of https://github.com/cosmo-sims/monofonIC.git synced 2024-09-19 17:03:45 +02:00

added reporting of memory high marks

This commit is contained in:
Oliver Hahn 2023-04-04 00:48:38 +02:00
parent c9cfbf3721
commit 8b267f07be
5 changed files with 176 additions and 15 deletions

View file

@ -185,6 +185,8 @@ inline void multitask_sync_barrier(void)
#endif
}
extern size_t global_mem_high_mark, local_mem_high_mark;
namespace CONFIG
{
extern int MPI_thread_support;

125
include/memory_stat.hh Normal file
View file

@ -0,0 +1,125 @@
/*
* Author: David Robert Nadeau
* Site: http://NadeauSoftware.com/
* License: Creative Commons Attribution 3.0 Unported License
* http://creativecommons.org/licenses/by/3.0/deed.en_US
*/
#pragma once
namespace memory
{
#if defined(_WIN32)
#include <windows.h>
#include <psapi.h>
#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
#include <unistd.h>
#include <sys/resource.h>
#if defined(__APPLE__) && defined(__MACH__)
#include <mach/mach.h>
#elif (defined(_AIX) || defined(__TOS__AIX__)) || (defined(__sun__) || defined(__sun) || defined(sun) && (defined(__SVR4) || defined(__svr4__)))
#include <fcntl.h>
#include <procfs.h>
#elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__)
#include <stdio.h>
#endif
#else
#error "Cannot define getPeakRSS( ) or getCurrentRSS( ) for an unknown OS."
#endif
/**
* Returns the peak (maximum so far) resident set size (physical
* memory use) measured in bytes, or zero if the value cannot be
* determined on this OS.
*/
inline size_t getPeakRSS( )
{
#if defined(_WIN32)
/* Windows -------------------------------------------------- */
PROCESS_MEMORY_COUNTERS info;
GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
return (size_t)info.PeakWorkingSetSize;
#elif (defined(_AIX) || defined(__TOS__AIX__)) || (defined(__sun__) || defined(__sun) || defined(sun) && (defined(__SVR4) || defined(__svr4__)))
/* AIX and Solaris ------------------------------------------ */
struct psinfo psinfo;
int fd = -1;
if ( (fd = open( "/proc/self/psinfo", O_RDONLY )) == -1 )
return (size_t)0L; /* Can't open? */
if ( read( fd, &psinfo, sizeof(psinfo) ) != sizeof(psinfo) )
{
close( fd );
return (size_t)0L; /* Can't read? */
}
close( fd );
return (size_t)(psinfo.pr_rssize * 1024L);
#elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__))
/* BSD, Linux, and OSX -------------------------------------- */
struct rusage rusage;
getrusage( RUSAGE_SELF, &rusage );
#if defined(__APPLE__) && defined(__MACH__)
return (size_t)rusage.ru_maxrss;
#else
return (size_t)(rusage.ru_maxrss * 1024L);
#endif
#else
/* Unknown OS ----------------------------------------------- */
return (size_t)0L; /* Unsupported. */
#endif
}
/**
* Returns the current resident set size (physical memory use) measured
* in bytes, or zero if the value cannot be determined on this OS.
*/
inline size_t getCurrentRSS( )
{
#if defined(_WIN32)
/* Windows -------------------------------------------------- */
PROCESS_MEMORY_COUNTERS info;
GetProcessMemoryInfo( GetCurrentProcess( ), &info, sizeof(info) );
return (size_t)info.WorkingSetSize;
#elif defined(__APPLE__) && defined(__MACH__)
/* OSX ------------------------------------------------------ */
struct mach_task_basic_info info;
mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
if ( task_info( mach_task_self( ), MACH_TASK_BASIC_INFO,
(task_info_t)&info, &infoCount ) != KERN_SUCCESS )
return (size_t)0L; /* Can't access? */
return (size_t)info.resident_size;
#elif defined(__linux__) || defined(__linux) || defined(linux) || defined(__gnu_linux__)
/* Linux ---------------------------------------------------- */
long rss = 0L;
FILE* fp = NULL;
if ( (fp = fopen( "/proc/self/statm", "r" )) == NULL )
return (size_t)0L; /* Can't open? */
if ( fscanf( fp, "%*s%ld", &rss ) != 1 )
{
fclose( fp );
return (size_t)0L; /* Can't read? */
}
fclose( fp );
return (size_t)rss * (size_t)sysconf( _SC_PAGESIZE);
#else
/* AIX, BSD, Solaris, and Unknown OS ------------------------ */
return (size_t)0L; /* Unsupported. */
#endif
}
};

View file

@ -19,6 +19,24 @@
#include <grid_fft.hh>
#include <thread>
#include "memory_stat.hh"
void memory_report(void)
{
//... report memory usage
size_t curr_mem_high_mark = 0;
local_mem_high_mark = memory::getCurrentRSS();
#if defined(USE_MPI)
MPI_Allreduce(&local_mem_high_mark, &curr_mem_high_mark, 1, MPI_UNSIGNED_LONG_LONG, MPI_MAX, MPI_COMM_WORLD);
#else
curr_mem_high_mark = local_mem_high_mark;
#endif
if( curr_mem_high_mark > 1.1*global_mem_high_mark ){
music::ilog << "----mem-> new memory high mark: " << curr_mem_high_mark/(1ull<<20) << " MBytes / task" << std::endl;
global_mem_high_mark = curr_mem_high_mark;
}
}
template <typename data_t, bool bdistributed>
void Grid_FFT<data_t, bdistributed>::allocate(void)
{
@ -175,6 +193,7 @@ void Grid_FFT<data_t, bdistributed>::allocate(void)
#endif //// of #ifdef #else USE_MPI ////////////////////////////////////////////////////////////////////////////////////
}
ballocated_ = true;
memory_report();
}
template <typename data_t, bool bdistributed>

View file

@ -354,10 +354,10 @@ int run( config_file& the_config )
// phi = - delta / k^2
music::ilog << "-------------------------------------------------------------------------------" << std::endl;
music::ilog << "Generating LPT fields...." << std::endl;
music::ilog << "\n>>> Generating LPT fields.... <<<\n" << std::endl;
double wtime = get_wtime();
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(1) term" << std::flush;
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing phi(1) term" << std::endl;
phi.FourierTransformForward(false);
phi.assign_function_of_grids_kdep([&](auto k, auto wn) {
@ -368,7 +368,7 @@ int run( config_file& the_config )
phi.zero_DC_mode();
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
music::ilog << "----cpu-> phi(1) took " << get_wtime() - wtime << "s" << std::endl;
//======================================================================
//... compute 2LPT displacement potential ....
@ -379,7 +379,7 @@ int run( config_file& the_config )
phi2.FourierTransformForward(false);
wtime = get_wtime();
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(2) term" << std::flush;
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing phi(2) term" << std::endl;
Conv.convolve_SumOfHessians(phi, {0, 0}, phi, {1, 1}, {2, 2}, op::assign_to(phi2));
Conv.convolve_Hessians(phi, {1, 1}, phi, {2, 2}, op::add_to(phi2));
Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, op::subtract_from(phi2));
@ -398,7 +398,7 @@ int run( config_file& the_config )
}
phi2.apply_InverseLaplacian();
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
music::ilog << "----cpu-> phi(2) took " << get_wtime() - wtime << "s" << std::endl;
if (bAddExternalTides)
{
@ -419,19 +419,18 @@ int run( config_file& the_config )
//... phi3 = phi3a - 10/7 phi3b
//... 3a term ...
wtime = get_wtime();
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3a) term" << std::flush;
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing phi(3a) term" << std::endl;
Conv.convolve_Hessians(phi, {0, 0}, phi, {1, 1}, phi, {2, 2}, op::assign_to(phi3));
Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 2}, phi, {1, 2}, op::multiply_add_to(phi3,2.0));
Conv.convolve_Hessians(phi, {1, 2}, phi, {1, 2}, phi, {0, 0}, op::subtract_from(phi3));
Conv.convolve_Hessians(phi, {0, 2}, phi, {0, 2}, phi, {1, 1}, op::subtract_from(phi3));
Conv.convolve_Hessians(phi, {0, 1}, phi, {0, 1}, phi, {2, 2}, op::subtract_from(phi3));
// phi3a.apply_InverseLaplacian();
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
music::ilog << "----cpu-> phi(3a) took " << get_wtime() - wtime << "s" << std::endl;
//... 3b term ...
wtime = get_wtime();
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing phi(3b) term" << std::flush;
// phi3b.FourierTransformForward(false);
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing phi(3b) term" << std::endl;
Conv.convolve_SumOfHessians(phi, {0, 0}, phi2, {1, 1}, {2, 2}, op::multiply_add_to(phi3,-5.0/7.0));
Conv.convolve_SumOfHessians(phi, {1, 1}, phi2, {2, 2}, {0, 0}, op::multiply_add_to(phi3,-5.0/7.0));
Conv.convolve_SumOfHessians(phi, {2, 2}, phi2, {0, 0}, {1, 1}, op::multiply_add_to(phi3,-5.0/7.0));
@ -439,12 +438,11 @@ int run( config_file& the_config )
Conv.convolve_Hessians(phi, {0, 2}, phi2, {0, 2}, op::multiply_add_to(phi3,+10.0/7.0));
Conv.convolve_Hessians(phi, {1, 2}, phi2, {1, 2}, op::multiply_add_to(phi3,+10.0/7.0));
phi3.apply_InverseLaplacian();
//phi3b *= 0.5; // factor 1/2 from definition of phi(3b)!
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
music::ilog << "----cpu-> phi(3b) took " << get_wtime() - wtime << "s" << std::endl;
//... transversal term ...
wtime = get_wtime();
music::ilog << std::setw(40) << std::setfill('.') << std::left << "Computing A(3) term" << std::flush;
music::ilog << std::setw(40) << std::setfill('.') << std::left << ">> Computing A(3) term" << std::endl;
for (int idim = 0; idim < 3; ++idim)
{
// cyclic rotations of indices
@ -457,7 +455,7 @@ int run( config_file& the_config )
Conv.convolve_DifferenceOfHessians(phi2, {idimp, idimpp}, phi, {idimp, idimp}, {idimpp, idimpp}, op::subtract_from(*A3[idim]));
A3[idim]->apply_InverseLaplacian();
}
music::ilog << std::setw(20) << std::setfill(' ') << std::right << "took " << get_wtime() - wtime << "s" << std::endl;
music::ilog << "----cpu-> A(3) took " << get_wtime() - wtime << "s" << std::endl;
}
///... scale all potentials with respective growth factors

View file

@ -43,8 +43,11 @@ bool FFTW_threads_ok = false;
int num_threads = 1;
}
size_t global_mem_high_mark, local_mem_high_mark;
#include "system_stat.hh"
#include "memory_stat.hh"
#include <exception>
#include <stdexcept>
@ -76,6 +79,8 @@ int main( int argc, char** argv )
music::logger::set_level(music::log_level::debug);
#endif
global_mem_high_mark = local_mem_high_mark = 0;
//------------------------------------------------------------------------------
// initialise MPI
//------------------------------------------------------------------------------
@ -259,13 +264,25 @@ int main( int argc, char** argv )
ic_generator::reset();
///////////////////////////////////////////////////////////////////////
music::ilog << "-------------------------------------------------------------------------------" << std::endl;
size_t peak_mem = memory::getPeakRSS();
#if defined(USE_MPI)
size_t peak_mem_max{0};
MPI_Allreduce(&peak_mem, &peak_mem_max, 1, MPI_UNSIGNED_LONG_LONG, MPI_MAX, MPI_COMM_WORLD);
peak_mem = peak_mem_max;
#endif
if( peak_mem > (1ull<<30) )
music::ilog << "----mem-> peak memory usage was " << peak_mem /(1ull<<30) << " GBytes / task" << std::endl;
else
music::ilog << "----mem-> peak memory usage was " << peak_mem /(1ull<<20) << " MBytes / task" << std::endl;
#if defined(USE_MPI)
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
#endif
music::ilog << "-------------------------------------------------------------------------------" << std::endl;
music::ilog << "Done. Have a nice day!\n" << std::endl;
return 0;