Skip to content
Snippets Groups Projects
Commit 3b293193 authored by Ben Cumming's avatar Ben Cumming Committed by Sam Yates
Browse files

Add automatic memory usage meter.

Fixes #189.

* Add memory usage meter to profiling infrastructure.
* Include GPU memory usage also, when GPU support is enabled.
parent 829df8d6
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,7 @@ set(BASE_SOURCES ...@@ -3,6 +3,7 @@ set(BASE_SOURCES
cell.cpp cell.cpp
morphology.cpp morphology.cpp
parameter_list.cpp parameter_list.cpp
profiling/memory_meter.cpp
profiling/meter.cpp profiling/meter.cpp
profiling/meter_manager.cpp profiling/meter_manager.cpp
profiling/profiler.cpp profiling/profiler.cpp
...@@ -10,6 +11,7 @@ set(BASE_SOURCES ...@@ -10,6 +11,7 @@ set(BASE_SOURCES
swcio.cpp swcio.cpp
threading/affinity.cpp threading/affinity.cpp
util/debug.cpp util/debug.cpp
util/memory.cpp
util/path.cpp util/path.cpp
util/unwind.cpp util/unwind.cpp
backends/multicore/fvm.cpp backends/multicore/fvm.cpp
......
#include <string>
#include <vector>
#include "memory_meter.hpp"
#include <communication/global_policy.hpp>
namespace nest {
namespace mc {
namespace util {
namespace {
measurement collate(const std::vector<memory_size_type>& readings, std::string name) {
using gcom = communication::global_policy;
// Calculate the local change in allocated memory for each interval.
std::vector<memory_size_type> allocated;
allocated.push_back(0);
for (auto i=1u; i<readings.size(); ++i) {
allocated.push_back(readings[i] - readings[i-1]);
}
// Assert that the same number of readings were taken on every domain.
const auto num_readings = allocated.size();
if (gcom::min(num_readings)!=gcom::max(num_readings)) {
throw std::out_of_range(
"the number of checkpoints in the \"memory\" meter do not match across domains");
}
// Gather allocations from across all of the domains onto the root domain.
// Note: results are only valid on the root domain on completion.
measurement results;
results.name = std::move(name);
results.units = "kB";
for (auto m: allocated) {
results.measurements.push_back(gcom::gather(std::round(m/1e3), 0));
}
return results;
}
} // anonymous namespace
std::string memory_meter::name() {
return "memory";
}
void memory_meter::take_reading() {
readings_.push_back(allocated_memory());
#ifdef NMC_HAVE_GPU
readings_gpu_.push_back(gpu_allocated_memory());
#endif
}
std::vector<measurement> memory_meter::measurements() {
std::vector<measurement> results;
results.push_back(collate(readings_, "memory-allocated"));
if (readings_gpu_.size()) {
results.push_back(collate(readings_gpu_, "memory-allocated-gpu"));
}
return results;
}
} // namespace util
} // namespace mc
} // namespace nest
#pragma once
#include <string>
#include <vector>
#include <util/memory.hpp>
#include "meter.hpp"
namespace nest {
namespace mc {
namespace util {
class memory_meter : public meter {
std::vector<memory_size_type> readings_;
// only used if running on the GPU
std::vector<memory_size_type> readings_gpu_;
public:
std::string name() override;
void take_reading() override;
virtual std::vector<measurement> measurements() override;
};
} // namespace util
} // namespace mc
} // namespace nest
...@@ -6,10 +6,12 @@ namespace util { ...@@ -6,10 +6,12 @@ namespace util {
meter_manager::meter_manager() { meter_manager::meter_manager() {
// add time-measurement meter // add time-measurement meter
meters.emplace_back(new time_meter()); meters_.emplace_back(new time_meter());
// add memory consumption meter // add memory consumption meter
// TODO if (has_memory_metering) {
meters_.emplace_back(new memory_meter());
}
// add energy consumption meter // add energy consumption meter
// TODO // TODO
...@@ -19,22 +21,30 @@ void meter_manager::checkpoint(std::string name) { ...@@ -19,22 +21,30 @@ void meter_manager::checkpoint(std::string name) {
// Enforce a global synchronization point the first time that the meters // Enforce a global synchronization point the first time that the meters
// are used, to ensure that times measured across all domains are // are used, to ensure that times measured across all domains are
// synchronised. // synchronised.
if (checkpoint_names.size()==0) { if (checkpoint_names_.size()==0) {
communication::global_policy::barrier(); communication::global_policy::barrier();
} }
checkpoint_names.push_back(std::move(name)); checkpoint_names_.push_back(std::move(name));
for (auto& m: meters) { for (auto& m: meters_) {
m->take_reading(); m->take_reading();
} }
} }
const std::vector<std::unique_ptr<meter>>& meter_manager::meters() const {
return meters_;
}
const std::vector<std::string>& meter_manager::checkpoint_names() const {
return checkpoint_names_;
}
nlohmann::json to_json(const meter_manager& manager) { nlohmann::json to_json(const meter_manager& manager) {
using gcom = communication::global_policy; using gcom = communication::global_policy;
nlohmann::json meter_out; nlohmann::json meter_out;
for (const auto& m: manager.meters) { for (auto& m: manager.meters()) {
for (const auto& measure: m->measurements()) { for (auto& measure: m->measurements()) {
meter_out.push_back(to_json(measure)); meter_out.push_back(to_json(measure));
} }
} }
...@@ -42,7 +52,7 @@ nlohmann::json to_json(const meter_manager& manager) { ...@@ -42,7 +52,7 @@ nlohmann::json to_json(const meter_manager& manager) {
// Only the "root" process returns meter information // Only the "root" process returns meter information
if (gcom::id()==0) { if (gcom::id()==0) {
return { return {
{"checkpoints", manager.checkpoint_names}, {"checkpoints", manager.checkpoint_names()},
{"num_domains", gcom::size()}, {"num_domains", gcom::size()},
{"global_model", std::to_string(gcom::kind())}, {"global_model", std::to_string(gcom::kind())},
{"meters", meter_out}, {"meters", meter_out},
......
...@@ -8,18 +8,23 @@ ...@@ -8,18 +8,23 @@
#include <json/json.hpp> #include <json/json.hpp>
#include "meter.hpp" #include "meter.hpp"
#include "memory_meter.hpp"
#include "time_meter.hpp" #include "time_meter.hpp"
namespace nest { namespace nest {
namespace mc { namespace mc {
namespace util { namespace util {
struct meter_manager { class meter_manager {
std::vector<std::unique_ptr<meter>> meters; std::vector<std::unique_ptr<meter>> meters_;
std::vector<std::string> checkpoint_names; std::vector<std::string> checkpoint_names_;
public:
meter_manager(); meter_manager();
void checkpoint(std::string name); void checkpoint(std::string name);
const std::vector<std::unique_ptr<meter>>& meters() const;
const std::vector<std::string>& checkpoint_names() const;
}; };
nlohmann::json to_json(const meter_manager&); nlohmann::json to_json(const meter_manager&);
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <json/json.hpp>
#include "meter.hpp" #include "meter.hpp"
#include "profiler.hpp" #include "profiler.hpp"
......
#include "memory.hpp"
#ifdef __linux__
extern "C" {
#include <malloc.h>
}
#endif
#ifdef NMC_HAVE_GPU
#include <cuda_runtime.h>
#endif
namespace nest {
namespace mc {
namespace util {
#if defined(__linux__)
memory_size_type allocated_memory() {
auto m = mallinfo();
return m.hblkhd + m.uordblks;
}
#else
memory_size_type allocated_memory() {
return -1;
}
#endif
#ifdef NMC_HAVE_GPU
memory_size_type gpu_allocated_memory() {
std::size_t free;
std::size_t total;
auto success = cudaMemGetInfo(&free, &total);
return success==cudaSuccess? total-free: -1;
}
#else
memory_size_type gpu_allocated_memory() {
return -1;
}
#endif
} // namespace util
} // namespace mc
} // namespace nest
#pragma once
#include <cstdint>
namespace nest {
namespace mc {
namespace util {
#ifdef __linux__
constexpr bool has_memory_metering = true;
#else
constexpr bool has_memory_metering = false;
#endif
// Use a signed type to store memory sizes because it can be used to store
// the difference between two readings, which may be negative.
// A 64 bit type is large enough to store any amount of memory that will
// reasonably be used.
using memory_size_type = std::int64_t;
// Returns the amount of memory currently allocated in bytes.
// Returns a negative value on error, or if the operation is not supported on
// the target architecture.
memory_size_type allocated_memory();
// Returns the amount of memory currently allocated on the gpu in bytes.
// Returns a negative value on error, or if not using the gpu
memory_size_type gpu_allocated_memory();
} // namespace util
} // namespace mc
} // namespace nest
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment