diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2bbcfb424f90e8ef053b834b83d06a2dc77aee95..3e24708e205be14c1705aee348927c035e0021ed 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,6 +3,7 @@ set(BASE_SOURCES cell.cpp morphology.cpp parameter_list.cpp + profiling/memory_meter.cpp profiling/meter.cpp profiling/meter_manager.cpp profiling/profiler.cpp @@ -10,6 +11,7 @@ set(BASE_SOURCES swcio.cpp threading/affinity.cpp util/debug.cpp + util/memory.cpp util/path.cpp util/unwind.cpp backends/multicore/fvm.cpp diff --git a/src/profiling/memory_meter.cpp b/src/profiling/memory_meter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..afdc42a6607b375e84f495bb53653562339c8601 --- /dev/null +++ b/src/profiling/memory_meter.cpp @@ -0,0 +1,64 @@ +#include <string> +#include <vector> + +#include "memory_meter.hpp" +#include <communication/global_policy.hpp> + +namespace nest { +namespace mc { +namespace util { + +namespace { + measurement collate(const std::vector<memory_size_type>& readings, std::string name) { + using gcom = communication::global_policy; + + // Calculate the local change in allocated memory for each interval. + std::vector<memory_size_type> allocated; + allocated.push_back(0); + for (auto i=1u; i<readings.size(); ++i) { + allocated.push_back(readings[i] - readings[i-1]); + } + + // Assert that the same number of readings were taken on every domain. + const auto num_readings = allocated.size(); + if (gcom::min(num_readings)!=gcom::max(num_readings)) { + throw std::out_of_range( + "the number of checkpoints in the \"memory\" meter do not match across domains"); + } + + // Gather allocations from across all of the domains onto the root domain. + // Note: results are only valid on the root domain on completion. + measurement results; + results.name = std::move(name); + results.units = "kB"; + for (auto m: allocated) { + results.measurements.push_back(gcom::gather(std::round(m/1e3), 0)); + } + + return results; + } +} // anonymous namespace + +std::string memory_meter::name() { + return "memory"; +} + +void memory_meter::take_reading() { + readings_.push_back(allocated_memory()); + #ifdef NMC_HAVE_GPU + readings_gpu_.push_back(gpu_allocated_memory()); + #endif +} + +std::vector<measurement> memory_meter::measurements() { + std::vector<measurement> results; + results.push_back(collate(readings_, "memory-allocated")); + if (readings_gpu_.size()) { + results.push_back(collate(readings_gpu_, "memory-allocated-gpu")); + } + return results; +} + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/profiling/memory_meter.hpp b/src/profiling/memory_meter.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ee4e074e9afa016b7d2cf77c4d14a01769c65fdf --- /dev/null +++ b/src/profiling/memory_meter.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include <string> +#include <vector> + +#include <util/memory.hpp> + +#include "meter.hpp" + +namespace nest { +namespace mc { +namespace util { + +class memory_meter : public meter { + std::vector<memory_size_type> readings_; + + // only used if running on the GPU + std::vector<memory_size_type> readings_gpu_; + +public: + std::string name() override; + void take_reading() override; + virtual std::vector<measurement> measurements() override; +}; + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/profiling/meter_manager.cpp b/src/profiling/meter_manager.cpp index 01a01b0305f3391771bdf1b1142ef0cca02fe60f..f6c2efab2e7250de6d9a84c2589ae792780374aa 100644 --- a/src/profiling/meter_manager.cpp +++ b/src/profiling/meter_manager.cpp @@ -6,10 +6,12 @@ namespace util { meter_manager::meter_manager() { // add time-measurement meter - meters.emplace_back(new time_meter()); + meters_.emplace_back(new time_meter()); // add memory consumption meter - // TODO + if (has_memory_metering) { + meters_.emplace_back(new memory_meter()); + } // add energy consumption meter // TODO @@ -19,22 +21,30 @@ void meter_manager::checkpoint(std::string name) { // Enforce a global synchronization point the first time that the meters // are used, to ensure that times measured across all domains are // synchronised. - if (checkpoint_names.size()==0) { + if (checkpoint_names_.size()==0) { communication::global_policy::barrier(); } - checkpoint_names.push_back(std::move(name)); - for (auto& m: meters) { + checkpoint_names_.push_back(std::move(name)); + for (auto& m: meters_) { m->take_reading(); } } +const std::vector<std::unique_ptr<meter>>& meter_manager::meters() const { + return meters_; +} + +const std::vector<std::string>& meter_manager::checkpoint_names() const { + return checkpoint_names_; +} + nlohmann::json to_json(const meter_manager& manager) { using gcom = communication::global_policy; nlohmann::json meter_out; - for (const auto& m: manager.meters) { - for (const auto& measure: m->measurements()) { + for (auto& m: manager.meters()) { + for (auto& measure: m->measurements()) { meter_out.push_back(to_json(measure)); } } @@ -42,7 +52,7 @@ nlohmann::json to_json(const meter_manager& manager) { // Only the "root" process returns meter information if (gcom::id()==0) { return { - {"checkpoints", manager.checkpoint_names}, + {"checkpoints", manager.checkpoint_names()}, {"num_domains", gcom::size()}, {"global_model", std::to_string(gcom::kind())}, {"meters", meter_out}, diff --git a/src/profiling/meter_manager.hpp b/src/profiling/meter_manager.hpp index a4dc97ec452f44b3d11e648e6d58d2674fd4263f..323665799dd3c4f47121ce0f51ba7a4f77deed48 100644 --- a/src/profiling/meter_manager.hpp +++ b/src/profiling/meter_manager.hpp @@ -8,18 +8,23 @@ #include <json/json.hpp> #include "meter.hpp" +#include "memory_meter.hpp" #include "time_meter.hpp" namespace nest { namespace mc { namespace util { -struct meter_manager { - std::vector<std::unique_ptr<meter>> meters; - std::vector<std::string> checkpoint_names; +class meter_manager { + std::vector<std::unique_ptr<meter>> meters_; + std::vector<std::string> checkpoint_names_; +public: meter_manager(); void checkpoint(std::string name); + + const std::vector<std::unique_ptr<meter>>& meters() const; + const std::vector<std::string>& checkpoint_names() const; }; nlohmann::json to_json(const meter_manager&); diff --git a/src/profiling/time_meter.hpp b/src/profiling/time_meter.hpp index f9522af00e8a9e9fddfe74d0cd03f146ef0f4a54..32709167cd7e965491c9d738f8fec783117c8ea8 100644 --- a/src/profiling/time_meter.hpp +++ b/src/profiling/time_meter.hpp @@ -2,7 +2,6 @@ #include <string> #include <vector> -#include <json/json.hpp> #include "meter.hpp" #include "profiler.hpp" diff --git a/src/util/memory.cpp b/src/util/memory.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f6172d6e9f6675bfd8291345341e67cdefd17501 --- /dev/null +++ b/src/util/memory.cpp @@ -0,0 +1,44 @@ +#include "memory.hpp" + +#ifdef __linux__ +extern "C" { + #include <malloc.h> +} +#endif + +#ifdef NMC_HAVE_GPU + #include <cuda_runtime.h> +#endif + +namespace nest { +namespace mc { +namespace util { + +#if defined(__linux__) +memory_size_type allocated_memory() { + auto m = mallinfo(); + return m.hblkhd + m.uordblks; +} +#else +memory_size_type allocated_memory() { + return -1; +} +#endif + +#ifdef NMC_HAVE_GPU +memory_size_type gpu_allocated_memory() { + std::size_t free; + std::size_t total; + auto success = cudaMemGetInfo(&free, &total); + + return success==cudaSuccess? total-free: -1; +} +#else +memory_size_type gpu_allocated_memory() { + return -1; +} +#endif + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/util/memory.hpp b/src/util/memory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f231f8aeb3c574eb5c0e6330725f202c03fae8d7 --- /dev/null +++ b/src/util/memory.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include <cstdint> + +namespace nest { +namespace mc { +namespace util { + +#ifdef __linux__ + constexpr bool has_memory_metering = true; +#else + constexpr bool has_memory_metering = false; +#endif + +// Use a signed type to store memory sizes because it can be used to store +// the difference between two readings, which may be negative. +// A 64 bit type is large enough to store any amount of memory that will +// reasonably be used. +using memory_size_type = std::int64_t; + +// Returns the amount of memory currently allocated in bytes. +// Returns a negative value on error, or if the operation is not supported on +// the target architecture. +memory_size_type allocated_memory(); + +// Returns the amount of memory currently allocated on the gpu in bytes. +// Returns a negative value on error, or if not using the gpu +memory_size_type gpu_allocated_memory(); + +} // namespace util +} // namespace mc +} // namespace nest