Skip to content
Snippets Groups Projects
Commit 3b293193 authored by Ben Cumming's avatar Ben Cumming Committed by Sam Yates
Browse files

Add automatic memory usage meter.

Fixes #189.

* Add memory usage meter to profiling infrastructure.
* Include GPU memory usage also, when GPU support is enabled.
parent 829df8d6
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@ set(BASE_SOURCES
cell.cpp
morphology.cpp
parameter_list.cpp
profiling/memory_meter.cpp
profiling/meter.cpp
profiling/meter_manager.cpp
profiling/profiler.cpp
......@@ -10,6 +11,7 @@ set(BASE_SOURCES
swcio.cpp
threading/affinity.cpp
util/debug.cpp
util/memory.cpp
util/path.cpp
util/unwind.cpp
backends/multicore/fvm.cpp
......
#include <string>
#include <vector>
#include "memory_meter.hpp"
#include <communication/global_policy.hpp>
namespace nest {
namespace mc {
namespace util {
namespace {
measurement collate(const std::vector<memory_size_type>& readings, std::string name) {
using gcom = communication::global_policy;
// Calculate the local change in allocated memory for each interval.
std::vector<memory_size_type> allocated;
allocated.push_back(0);
for (auto i=1u; i<readings.size(); ++i) {
allocated.push_back(readings[i] - readings[i-1]);
}
// Assert that the same number of readings were taken on every domain.
const auto num_readings = allocated.size();
if (gcom::min(num_readings)!=gcom::max(num_readings)) {
throw std::out_of_range(
"the number of checkpoints in the \"memory\" meter do not match across domains");
}
// Gather allocations from across all of the domains onto the root domain.
// Note: results are only valid on the root domain on completion.
measurement results;
results.name = std::move(name);
results.units = "kB";
for (auto m: allocated) {
results.measurements.push_back(gcom::gather(std::round(m/1e3), 0));
}
return results;
}
} // anonymous namespace
std::string memory_meter::name() {
return "memory";
}
void memory_meter::take_reading() {
readings_.push_back(allocated_memory());
#ifdef NMC_HAVE_GPU
readings_gpu_.push_back(gpu_allocated_memory());
#endif
}
std::vector<measurement> memory_meter::measurements() {
std::vector<measurement> results;
results.push_back(collate(readings_, "memory-allocated"));
if (readings_gpu_.size()) {
results.push_back(collate(readings_gpu_, "memory-allocated-gpu"));
}
return results;
}
} // namespace util
} // namespace mc
} // namespace nest
#pragma once
#include <string>
#include <vector>
#include <util/memory.hpp>
#include "meter.hpp"
namespace nest {
namespace mc {
namespace util {
class memory_meter : public meter {
std::vector<memory_size_type> readings_;
// only used if running on the GPU
std::vector<memory_size_type> readings_gpu_;
public:
std::string name() override;
void take_reading() override;
virtual std::vector<measurement> measurements() override;
};
} // namespace util
} // namespace mc
} // namespace nest
......@@ -6,10 +6,12 @@ namespace util {
meter_manager::meter_manager() {
// add time-measurement meter
meters.emplace_back(new time_meter());
meters_.emplace_back(new time_meter());
// add memory consumption meter
// TODO
if (has_memory_metering) {
meters_.emplace_back(new memory_meter());
}
// add energy consumption meter
// TODO
......@@ -19,22 +21,30 @@ void meter_manager::checkpoint(std::string name) {
// Enforce a global synchronization point the first time that the meters
// are used, to ensure that times measured across all domains are
// synchronised.
if (checkpoint_names.size()==0) {
if (checkpoint_names_.size()==0) {
communication::global_policy::barrier();
}
checkpoint_names.push_back(std::move(name));
for (auto& m: meters) {
checkpoint_names_.push_back(std::move(name));
for (auto& m: meters_) {
m->take_reading();
}
}
const std::vector<std::unique_ptr<meter>>& meter_manager::meters() const {
return meters_;
}
const std::vector<std::string>& meter_manager::checkpoint_names() const {
return checkpoint_names_;
}
nlohmann::json to_json(const meter_manager& manager) {
using gcom = communication::global_policy;
nlohmann::json meter_out;
for (const auto& m: manager.meters) {
for (const auto& measure: m->measurements()) {
for (auto& m: manager.meters()) {
for (auto& measure: m->measurements()) {
meter_out.push_back(to_json(measure));
}
}
......@@ -42,7 +52,7 @@ nlohmann::json to_json(const meter_manager& manager) {
// Only the "root" process returns meter information
if (gcom::id()==0) {
return {
{"checkpoints", manager.checkpoint_names},
{"checkpoints", manager.checkpoint_names()},
{"num_domains", gcom::size()},
{"global_model", std::to_string(gcom::kind())},
{"meters", meter_out},
......
......@@ -8,18 +8,23 @@
#include <json/json.hpp>
#include "meter.hpp"
#include "memory_meter.hpp"
#include "time_meter.hpp"
namespace nest {
namespace mc {
namespace util {
struct meter_manager {
std::vector<std::unique_ptr<meter>> meters;
std::vector<std::string> checkpoint_names;
class meter_manager {
std::vector<std::unique_ptr<meter>> meters_;
std::vector<std::string> checkpoint_names_;
public:
meter_manager();
void checkpoint(std::string name);
const std::vector<std::unique_ptr<meter>>& meters() const;
const std::vector<std::string>& checkpoint_names() const;
};
nlohmann::json to_json(const meter_manager&);
......
......@@ -2,7 +2,6 @@
#include <string>
#include <vector>
#include <json/json.hpp>
#include "meter.hpp"
#include "profiler.hpp"
......
#include "memory.hpp"
#ifdef __linux__
extern "C" {
#include <malloc.h>
}
#endif
#ifdef NMC_HAVE_GPU
#include <cuda_runtime.h>
#endif
namespace nest {
namespace mc {
namespace util {
#if defined(__linux__)
memory_size_type allocated_memory() {
auto m = mallinfo();
return m.hblkhd + m.uordblks;
}
#else
memory_size_type allocated_memory() {
return -1;
}
#endif
#ifdef NMC_HAVE_GPU
memory_size_type gpu_allocated_memory() {
std::size_t free;
std::size_t total;
auto success = cudaMemGetInfo(&free, &total);
return success==cudaSuccess? total-free: -1;
}
#else
memory_size_type gpu_allocated_memory() {
return -1;
}
#endif
} // namespace util
} // namespace mc
} // namespace nest
#pragma once
#include <cstdint>
namespace nest {
namespace mc {
namespace util {
#ifdef __linux__
constexpr bool has_memory_metering = true;
#else
constexpr bool has_memory_metering = false;
#endif
// Use a signed type to store memory sizes because it can be used to store
// the difference between two readings, which may be negative.
// A 64 bit type is large enough to store any amount of memory that will
// reasonably be used.
using memory_size_type = std::int64_t;
// Returns the amount of memory currently allocated in bytes.
// Returns a negative value on error, or if the operation is not supported on
// the target architecture.
memory_size_type allocated_memory();
// Returns the amount of memory currently allocated on the gpu in bytes.
// Returns a negative value on error, or if not using the gpu
memory_size_type gpu_allocated_memory();
} // namespace util
} // namespace mc
} // namespace nest
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment