From 99a0b1c8bbd084843f9b4da834d6d6d83e1dde72 Mon Sep 17 00:00:00 2001 From: Ben Cumming <louncharf@gmail.com> Date: Tue, 18 Apr 2017 11:25:45 +0200 Subject: [PATCH] Add power meter and refactor meter interfaces. Fixes #190. The final piece in the metering features. * Add a `power_meter` which currently records energy used on each node of a Cray XC{30,40,50} systems, which all have built in `pm_counters` interface to power measurement. * Add information about which node each MPI rank runs on to the metering output in `meters.json`, which is needed to analyse energy recordings, which are per node, not per MPI rank. * Refactor collation of measurements: now the responsibility of the meter manager. * Add support for `gather` with `std::string` to the global communication policy, which required a back end MPI implementation and corresponding unit test. * Add `src/util/config.hpp` that populate the `nest::mc::config` namespace with `constexpr bool` flags describing system or environment capabilities. --- CMakeLists.txt | 1 + miniapp/miniapp.cpp | 4 +- src/CMakeLists.txt | 5 +- src/communication/mpi.hpp | 35 ++++-- src/communication/serial_global_policy.hpp | 2 +- src/profiling/memory_meter.cpp | 91 ++++++++------- src/profiling/memory_meter.hpp | 13 +-- src/profiling/meter.cpp | 23 ---- src/profiling/meter.hpp | 47 +++----- src/profiling/meter_manager.cpp | 105 ++++++++++++++---- src/profiling/meter_manager.hpp | 34 +++++- src/profiling/meters.md | 14 +-- src/profiling/power_meter.cpp | 48 ++++++++ src/profiling/power_meter.hpp | 18 +++ src/profiling/time_meter.cpp | 67 ----------- src/profiling/time_meter.hpp | 25 ----- src/util/config.hpp | 41 +++++++ src/util/hostname.cpp | 37 ++++++ src/util/hostname.hpp | 16 +++ src/util/memory.hpp | 6 - src/util/power.cpp | 32 ++++++ src/util/power.hpp | 17 +++ .../test_mpi_gather_all.cpp | 48 ++++++++ 23 files changed, 480 insertions(+), 249 deletions(-) delete mode 100644 src/profiling/meter.cpp create mode 100644 src/profiling/power_meter.cpp create mode 100644 src/profiling/power_meter.hpp delete mode 100644 src/profiling/time_meter.cpp delete mode 100644 src/profiling/time_meter.hpp create mode 100644 src/util/config.hpp create mode 100644 src/util/hostname.cpp create mode 100644 src/util/hostname.hpp create mode 100644 src/util/power.cpp create mode 100644 src/util/power.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d569fb2..b08a739f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,6 +122,7 @@ set_property(CACHE NMC_SYSTEM_TYPE PROPERTY STRINGS Generic Cray BGQ ) # Cray specific flags if(${NMC_SYSTEM_TYPE} MATCHES "Cray") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -dynamic") + add_definitions(-DNMC_HAVE_CRAY) endif() #---------------------------------------------------------- diff --git a/miniapp/miniapp.cpp b/miniapp/miniapp.cpp index 3582df42..c9fb6b7c 100644 --- a/miniapp/miniapp.cpp +++ b/miniapp/miniapp.cpp @@ -51,7 +51,7 @@ int main(int argc, char** argv) { try { nest::mc::util::meter_manager meters; - meters.checkpoint("start"); + meters.start(); std::cout << util::mask_stream(global_policy::id()==0); // read parameters @@ -168,8 +168,6 @@ int main(int argc, char** argv) { write_trace_json(*trace.get(), options.trace_prefix); } - meters.checkpoint("output"); - util::save_to_file(meters, "meters.json"); } catch (io::usage_error& e) { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bbbff660..c42d2d13 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,15 +5,16 @@ set(BASE_SOURCES morphology.cpp parameter_list.cpp profiling/memory_meter.cpp - profiling/meter.cpp profiling/meter_manager.cpp + profiling/power_meter.cpp profiling/profiler.cpp - profiling/time_meter.cpp swcio.cpp threading/affinity.cpp util/debug.cpp + util/hostname.cpp util/memory.cpp util/path.cpp + util/power.cpp util/unwind.cpp backends/multicore/fvm.cpp ) diff --git a/src/communication/mpi.hpp b/src/communication/mpi.hpp index 34b08864..ad74b8bc 100644 --- a/src/communication/mpi.hpp +++ b/src/communication/mpi.hpp @@ -65,10 +65,6 @@ namespace mpi { // T must be trivially copyable template<typename T> std::vector<T> gather(T value, int root) { - static_assert( - true,//std::is_trivially_copyable<T>::value, - "gather can only be performed on trivally copyable types"); - using traits = mpi_traits<T>; auto buffer_size = (rank()==root) ? size() : 0; std::vector<T> buffer(buffer_size); @@ -87,9 +83,6 @@ namespace mpi { // T must be trivially copyable template <typename T> std::vector<T> gather_all(T value) { - static_assert( - true,//std::is_trivially_copyable<T>::value, - "gather_all can only be performed on trivally copyable types"); using traits = mpi_traits<T>; std::vector<T> buffer(size()); @@ -103,11 +96,33 @@ namespace mpi { return buffer; } + // Specialize gather for std::string. + inline std::vector<std::string> gather(std::string str, int root) { + using traits = mpi_traits<char>; + + auto counts = gather_all(int(str.size())); + auto displs = algorithms::make_index(counts); + + std::vector<char> buffer(displs.back()); + + PE("MPI", "Gather"); + MPI_Gatherv(str.data(), counts[rank()], traits::mpi_type(), // send + buffer.data(), counts.data(), displs.data(), traits::mpi_type(), // receive + root, MPI_COMM_WORLD); + PL(2); + + // Unpack the raw string data into a vector of strings. + std::vector<std::string> result; + result.reserve(size()); + for (auto i=0; i<size(); ++i) { + result.push_back(std::string(buffer.data()+displs[i], counts[i])); + } + return result; + } + + template <typename T> std::vector<T> gather_all(const std::vector<T>& values) { - static_assert( - true,//std::is_trivially_copyable<T>::value, - "gather_all can only be performed on trivally copyable types"); using traits = mpi_traits<T>; auto counts = gather_all(int(values.size())); diff --git a/src/communication/serial_global_policy.hpp b/src/communication/serial_global_policy.hpp index 4fb88908..81d2f3bc 100644 --- a/src/communication/serial_global_policy.hpp +++ b/src/communication/serial_global_policy.hpp @@ -54,7 +54,7 @@ struct serial_global_policy { template <typename T> static std::vector<T> gather(T value, int) { - return {value}; + return {std::move(value)}; } static void barrier() {} diff --git a/src/profiling/memory_meter.cpp b/src/profiling/memory_meter.cpp index afdc42a6..c18c8f26 100644 --- a/src/profiling/memory_meter.cpp +++ b/src/profiling/memory_meter.cpp @@ -1,62 +1,75 @@ #include <string> #include <vector> +#include <util/config.hpp> + #include "memory_meter.hpp" -#include <communication/global_policy.hpp> namespace nest { namespace mc { namespace util { -namespace { - measurement collate(const std::vector<memory_size_type>& readings, std::string name) { - using gcom = communication::global_policy; +// +// memory_meter +// - // Calculate the local change in allocated memory for each interval. - std::vector<memory_size_type> allocated; - allocated.push_back(0); - for (auto i=1u; i<readings.size(); ++i) { - allocated.push_back(readings[i] - readings[i-1]); - } +class memory_meter: public meter { +protected: + std::vector<memory_size_type> readings_; - // Assert that the same number of readings were taken on every domain. - const auto num_readings = allocated.size(); - if (gcom::min(num_readings)!=gcom::max(num_readings)) { - throw std::out_of_range( - "the number of checkpoints in the \"memory\" meter do not match across domains"); - } +public: + std::string name() override { + return "memory-allocated"; + } + + std::string units() override { + return "B"; + } + + void take_reading() override { + readings_.push_back(allocated_memory()); + } - // Gather allocations from across all of the domains onto the root domain. - // Note: results are only valid on the root domain on completion. - measurement results; - results.name = std::move(name); - results.units = "kB"; - for (auto m: allocated) { - results.measurements.push_back(gcom::gather(std::round(m/1e3), 0)); + std::vector<double> measurements() override { + std::vector<double> diffs; + + for (auto i=1ul; i<readings_.size(); ++i) { + diffs.push_back(readings_[i]-readings_[i-1]); } - return results; + return diffs; } -} // anonymous namespace +}; -std::string memory_meter::name() { - return "memory"; +meter_ptr make_memory_meter() { + if (not config::has_memory_measurement) { + return nullptr; + } + return meter_ptr(new memory_meter()); } -void memory_meter::take_reading() { - readings_.push_back(allocated_memory()); - #ifdef NMC_HAVE_GPU - readings_gpu_.push_back(gpu_allocated_memory()); - #endif -} +// +// gpu_memory_meter +// + +// The gpu memory meter specializes the reading and name methods of the basic +// memory_meter. +class gpu_memory_meter: public memory_meter { +public: + std::string name() override { + return "gpu-memory-allocated"; + } + + void take_reading() override { + readings_.push_back(gpu_allocated_memory()); + } +}; -std::vector<measurement> memory_meter::measurements() { - std::vector<measurement> results; - results.push_back(collate(readings_, "memory-allocated")); - if (readings_gpu_.size()) { - results.push_back(collate(readings_gpu_, "memory-allocated-gpu")); +meter_ptr make_gpu_memory_meter() { + if (not config::has_cuda) { + return nullptr; } - return results; + return meter_ptr(new gpu_memory_meter()); } } // namespace util diff --git a/src/profiling/memory_meter.hpp b/src/profiling/memory_meter.hpp index ee4e074e..d19e1b84 100644 --- a/src/profiling/memory_meter.hpp +++ b/src/profiling/memory_meter.hpp @@ -11,17 +11,8 @@ namespace nest { namespace mc { namespace util { -class memory_meter : public meter { - std::vector<memory_size_type> readings_; - - // only used if running on the GPU - std::vector<memory_size_type> readings_gpu_; - -public: - std::string name() override; - void take_reading() override; - virtual std::vector<measurement> measurements() override; -}; +meter_ptr make_memory_meter(); +meter_ptr make_gpu_memory_meter(); } // namespace util } // namespace mc diff --git a/src/profiling/meter.cpp b/src/profiling/meter.cpp deleted file mode 100644 index be55bda2..00000000 --- a/src/profiling/meter.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "meter.hpp" - -namespace nest { -namespace mc { -namespace util { - -nlohmann::json to_json(const measurement& mnt) { - nlohmann::json measurements; - for (const auto& m: mnt.measurements) { - measurements.push_back(m); - } - - return { - {"name", mnt.name}, - {"units", mnt.units}, - {"measurements", measurements} - }; -} - -} // namespace util -} // namespace mc -} // namespace nest - diff --git a/src/profiling/meter.hpp b/src/profiling/meter.hpp index 69874ba9..11afc7a6 100644 --- a/src/profiling/meter.hpp +++ b/src/profiling/meter.hpp @@ -1,43 +1,25 @@ #pragma once +#include <memory> #include <string> -#include <json/json.hpp> +#include <vector> namespace nest { namespace mc { namespace util { -// A measurement from a meter has the following: -// * name -// * e.g. walltime or allocated-memory -// * units -// * use SI units -// * e.g. s or MiB -// * measurements -// * a vector with one entry for each checkpoint -// * each entry is a std::vector<double> of measurements gathered across -// domains at one checkpoint. -// -struct measurement { - std::string name; - std::string units; - std::vector<std::vector<double>> measurements; -}; - -// Converts a measurement to a json type for serialization to file. -// See src/profiling/meters.md for more information about the json formating. -nlohmann::json to_json(const measurement& m); - // A meter can be used to take a measurement of resource consumption, for // example wall time, memory or energy consumption. // Each specialization must: // 1) Record the resource consumption on calling meter::take_reading. // * How and which information is recorded is implementation dependent. -// 2) Return a std::vector containing the measurements that are derived -// from the information recorded on calls to meter::take_reading. -// * The return value is a vector of measurements, because a meter -// may derive multiple measurements from the recorded checkpoint -// information. +// 2) Provide the name of the resource being measured via name() +// e.g. : energy +// 3) Provide the units of the resource being measured via units() +// e.g. : J +// 4) Return the resources consumed between each pair of readings as a +// std::vector<double> from measurements(). So, for n readings, there will +// be n-1 differences. class meter { public: meter() = default; @@ -48,15 +30,16 @@ public: // Take a reading/measurement of the resource virtual void take_reading() = 0; - // Return a summary of the recordings. - // May perform expensive operations to process and analyse the readings. - // Full output is expected only on the root domain, i.e. when - // global_policy::id()==0 - virtual std::vector<measurement> measurements() = 0; + // The units of the values returned in from the measurements method. + virtual std::string units() = 0; + + virtual std::vector<double> measurements() = 0; virtual ~meter() = default; }; +using meter_ptr = std::unique_ptr<meter>; + } // namespace util } // namespace mc } // namespace nest diff --git a/src/profiling/meter_manager.cpp b/src/profiling/meter_manager.cpp index f6c2efab..54b5bd90 100644 --- a/src/profiling/meter_manager.cpp +++ b/src/profiling/meter_manager.cpp @@ -1,34 +1,79 @@ +#include <communication/global_policy.hpp> +#include <util/hostname.hpp> +#include <json/json.hpp> + #include "meter_manager.hpp" +#include "memory_meter.hpp" +#include "power_meter.hpp" namespace nest { namespace mc { namespace util { +measurement::measurement( + std::string n, std::string u, const std::vector<double>& readings): + name(std::move(n)), units(std::move(u)) +{ + using gcom = communication::global_policy; + + // Assert that the same number of readings were taken on every domain. + const auto num_readings = readings.size(); + if (gcom::min(num_readings)!=gcom::max(num_readings)) { + throw std::out_of_range( + "the number of checkpoints in the \""+name+"\" meter do not match across domains"); + } + + // Gather across all of the domains onto the root domain. + for (auto r: readings) { + measurements.push_back(gcom::gather(r, 0)); + } +} + meter_manager::meter_manager() { - // add time-measurement meter - meters_.emplace_back(new time_meter()); + if (auto m = make_memory_meter()) { + meters_.push_back(std::move(m)); + } + if (auto m = make_gpu_memory_meter()) { + meters_.push_back(std::move(m)); + } + if (auto m = make_power_meter()) { + meters_.push_back(std::move(m)); + } +}; + +void meter_manager::start() { + EXPECTS(!started_); + + started_ = true; - // add memory consumption meter - if (has_memory_metering) { - meters_.emplace_back(new memory_meter()); + // take readings for the start point + for (auto& m: meters_) { + m->take_reading(); } - // add energy consumption meter - // TODO + // Enforce a global barrier after taking the time stamp + communication::global_policy::barrier(); + + start_time_ = timer_type::tic(); }; + void meter_manager::checkpoint(std::string name) { - // Enforce a global synchronization point the first time that the meters - // are used, to ensure that times measured across all domains are - // synchronised. - if (checkpoint_names_.size()==0) { - communication::global_policy::barrier(); - } + EXPECTS(started_); + + // Record the time taken on this domain since the last checkpoint + auto end_time = timer_type::tic(); + times_.push_back(timer_type::difference(start_time_, end_time)); + // Update meters checkpoint_names_.push_back(std::move(name)); for (auto& m: meters_) { m->take_reading(); } + + // Synchronize all domains before setting start time for the next interval + communication::global_policy::barrier(); + start_time_ = timer_type::tic(); } const std::vector<std::unique_ptr<meter>>& meter_manager::meters() const { @@ -39,15 +84,39 @@ const std::vector<std::string>& meter_manager::checkpoint_names() const { return checkpoint_names_; } +const std::vector<double>& meter_manager::times() const { + return times_; +} + +nlohmann::json to_json(const measurement& mnt) { + nlohmann::json measurements; + for (const auto& m: mnt.measurements) { + measurements.push_back(m); + } + + return { + {"name", mnt.name}, + {"units", mnt.units}, + {"measurements", measurements} + }; +} + nlohmann::json to_json(const meter_manager& manager) { using gcom = communication::global_policy; + // Gather the meter outputs into a json Array nlohmann::json meter_out; for (auto& m: manager.meters()) { - for (auto& measure: m->measurements()) { - meter_out.push_back(to_json(measure)); - } + meter_out.push_back( + to_json(measurement(m->name(), m->units(), m->measurements())) + ); } + // Add the times to the meter outputs + meter_out.push_back(to_json(measurement("time", "s", manager.times()))); + + // Gather a vector with the names of the node that each rank is running on. + auto host = hostname(); + auto hosts = gcom::gather(host? *host: "unknown", 0); // Only the "root" process returns meter information if (gcom::id()==0) { @@ -56,9 +125,7 @@ nlohmann::json to_json(const meter_manager& manager) { {"num_domains", gcom::size()}, {"global_model", std::to_string(gcom::kind())}, {"meters", meter_out}, - // TODO mapping of domains to nodes, which will be required to - // calculate the total memory and energy consumption of a - // distributed simulation. + {"hosts", hosts}, }; } diff --git a/src/profiling/meter_manager.hpp b/src/profiling/meter_manager.hpp index 32366579..c3164b81 100644 --- a/src/profiling/meter_manager.hpp +++ b/src/profiling/meter_manager.hpp @@ -3,28 +3,54 @@ #include <memory> #include <vector> -#include <util/make_unique.hpp> -#include <communication/global_policy.hpp> #include <json/json.hpp> #include "meter.hpp" -#include "memory_meter.hpp" -#include "time_meter.hpp" +#include "profiler.hpp" namespace nest { namespace mc { namespace util { +// A measurement has the following: +// * name +// * e.g. walltime or allocated-memory +// * units +// * use SI units +// * e.g. s or MiB +// * measurements +// * a vector with one entry for each checkpoint +// * each entry is a std::vector<double> of measurements gathered across +// domains at one checkpoint. +struct measurement { + std::string name; + std::string units; + std::vector<std::vector<double>> measurements; + measurement(std::string, std::string, const std::vector<double>&); +}; + +// Converts a measurement to a json type for serialization to file. +// See src/profiling/meters.md for more information about the json formating. +nlohmann::json to_json(const measurement& m); + class meter_manager { +private: + bool started_ = false; + + timer_type::time_point start_time_; + std::vector<double> times_; + std::vector<std::unique_ptr<meter>> meters_; std::vector<std::string> checkpoint_names_; public: meter_manager(); + void start(); void checkpoint(std::string name); const std::vector<std::unique_ptr<meter>>& meters() const; const std::vector<std::string>& checkpoint_names() const; + const std::vector<double>& times() const; }; nlohmann::json to_json(const meter_manager&); diff --git a/src/profiling/meters.md b/src/profiling/meters.md index afe11da2..72cdc8e1 100644 --- a/src/profiling/meters.md +++ b/src/profiling/meters.md @@ -3,13 +3,14 @@ A json record for a meter measurement is a json object. Each Object corresponds to a derived measurement: * `name`: a string describing the measurement * `units`: a string with SI units for measurements - * `measurements`: a json Array of measurements, with one - entry per checkpoint (corresponding to a call to - meter::take_reading) - * each measurement is itself a numeric array, with one - recording for each domain in the global communicator + * `measurements`: a json Array of measurements, with one entry for the + each checkpoint. The first enry is the measure of resources consumed + between the call to `meter_manager::start()` and the first checkpoint, the + second entry measure between the first and second checkpoints, and son on. + * each measurement is itself a numeric array, with one recording for each + domain in the global communicator -For example, the output of a meter for measuring wall time where 5 readings +For example, the output of a meter for measuring wall time where 4 checkpoints were taken on 4 MPI ranks could be represented as follows: ```json @@ -17,7 +18,6 @@ were taken on 4 MPI ranks could be represented as follows: "name": "walltime", "units": "s", "measurements": [ - [ 0, 0, 0, 0, ], [ 0.001265837, 0.001344004, 0.001299362, 0.001195762, ], [ 0.014114013, 0.015045662, 0.015071675, 0.014209514, ], [ 1.491986631, 1.491121134, 1.490957219, 1.492064233, ], diff --git a/src/profiling/power_meter.cpp b/src/profiling/power_meter.cpp new file mode 100644 index 00000000..2601f114 --- /dev/null +++ b/src/profiling/power_meter.cpp @@ -0,0 +1,48 @@ +#include <string> +#include <vector> + +#include <util/config.hpp> + +#include "power_meter.hpp" + +namespace nest { +namespace mc { +namespace util { + +class power_meter: public meter { + std::vector<energy_size_type> readings_; + +public: + std::string name() override { + return "energy"; + } + + std::string units() override { + return "J"; + } + + std::vector<double> measurements() override { + std::vector<double> diffs; + + for (auto i=1ul; i<readings_.size(); ++i) { + diffs.push_back(readings_[i]-readings_[i-1]); + } + + return diffs; + } + + void take_reading() override { + readings_.push_back(energy()); + } +}; + +meter_ptr make_power_meter() { + if (not config::has_power_measurement) { + return nullptr; + } + return meter_ptr(new power_meter()); +} + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/profiling/power_meter.hpp b/src/profiling/power_meter.hpp new file mode 100644 index 00000000..40682acb --- /dev/null +++ b/src/profiling/power_meter.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include <string> +#include <vector> + +#include <util/power.hpp> + +#include "meter.hpp" + +namespace nest { +namespace mc { +namespace util { + +meter_ptr make_power_meter(); + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/profiling/time_meter.cpp b/src/profiling/time_meter.cpp deleted file mode 100644 index c29bf12b..00000000 --- a/src/profiling/time_meter.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#include <string> -#include <vector> - -#ifdef NMC_HAVE_GPU - #include <cuda_runtime.h> -#endif - -#include "time_meter.hpp" -#include <communication/global_policy.hpp> - -namespace nest { -namespace mc { -namespace util { - -std::string time_meter::name() { - return "time"; -} - -void time_meter::take_reading() { - // Wait for execution on this global domain to finish before recording the - // time stamp. For now this means waiting for all work to finish executing - // on the GPU (if GPU support is enabled) -#ifdef NMC_HAVE_GPU - cudaDeviceSynchronize(); -#endif - - // Record the time stamp - readings_.push_back(timer_type::tic()); - - // Enforce a global barrier after taking the time stamp - communication::global_policy::barrier(); -} - -std::vector<measurement> time_meter::measurements() { - using gcom = communication::global_policy; - - // Calculate the elapsed time on the local domain for each interval, - // and store them in the times vector. - std::vector<double> times; - times.push_back(0); - for (auto i=1u; i<readings_.size(); ++i) { - double t = timer_type::difference(readings_[i-1], readings_[i]); - times.push_back(t); - } - - // Assert that the same number of readings were taken on every domain. - const auto num_readings = times.size(); - if (gcom::min(num_readings)!=gcom::max(num_readings)) { - throw std::out_of_range( - "the number of checkpoints in the \"time\" meter do not match across domains"); - } - - // Gather the timers from accross all of the domains onto the root domain. - // Note: results are only valid on the root domain on completion. - measurement results; - results.name = "walltime"; - results.units = "s"; - for (auto t: times) { - results.measurements.push_back(gcom::gather(t, 0)); - } - - return {results}; -} - -} // namespace util -} // namespace mc -} // namespace nest diff --git a/src/profiling/time_meter.hpp b/src/profiling/time_meter.hpp deleted file mode 100644 index 32709167..00000000 --- a/src/profiling/time_meter.hpp +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include <string> -#include <vector> - -#include "meter.hpp" -#include "profiler.hpp" - -namespace nest { -namespace mc { -namespace util { - -class time_meter : public meter { - std::vector<timer_type::time_point> readings_; - -public: - std::string name() override; - void take_reading() override; - virtual std::vector<measurement> measurements() override; -}; - -} // namespace util -} // namespace mc -} // namespace nest - diff --git a/src/util/config.hpp b/src/util/config.hpp new file mode 100644 index 00000000..4bd3adf3 --- /dev/null +++ b/src/util/config.hpp @@ -0,0 +1,41 @@ +#pragma once + +namespace nest { +namespace mc { +namespace config { + +// has_memory_measurement +// Support for measuring total allocated memory. +// * true: calls to util::allocated_memory() will return valid results +// * false: calls to util::allocated_memory() will return -1 +// +// has_power_measurement +// Support for measuring energy consumption. +// Currently only on Cray XC30/40/50 systems. +// * true: calls to util::energy() will return valid results +// * false: calls to util::energy() will return -1 +// +// has_cuda +// Has been compiled with CUDA back end support + +#ifdef __linux__ +constexpr bool has_memory_measurement = true; +#else +constexpr bool has_memory_measurement = false; +#endif + +#ifdef NMC_HAVE_CRAY +constexpr bool has_power_measurement = true; +#else +constexpr bool has_power_measurement = false; +#endif + +#ifdef NMC_HAVE_CUDA +constexpr bool has_cuda = true; +#else +constexpr bool has_cuda = false; +#endif + +} // namespace config +} // namespace mc +} // namespace nest diff --git a/src/util/hostname.cpp b/src/util/hostname.cpp new file mode 100644 index 00000000..c39f3084 --- /dev/null +++ b/src/util/hostname.cpp @@ -0,0 +1,37 @@ +#include <string> + +#include <util/optional.hpp> + +#include "hostname.hpp" + +#ifdef __linux__ +extern "C" { + #include <unistd.h> +} +#endif + +namespace nest { +namespace mc { +namespace util { + +#ifdef __linux__ +util::optional<std::string> hostname() { + // Hostnames can be up to 256 characters in length, however on many systems + // it is limitted to 64. + char name[256]; + auto result = gethostname(name, sizeof(name)); + if (result) { + return util::nothing; + } + return std::string(name); +} +#else +util::optional<std::string> hostname() { + return util::nothing; +} +#endif + +} // namespace util +} // namespace mc +} // namespace nest + diff --git a/src/util/hostname.hpp b/src/util/hostname.hpp new file mode 100644 index 00000000..ae735ef5 --- /dev/null +++ b/src/util/hostname.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include <string> + +#include <util/optional.hpp> + +namespace nest { +namespace mc { +namespace util { + +// Get the name of the host on which this process is running. +util::optional<std::string> hostname(); + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/util/memory.hpp b/src/util/memory.hpp index f231f8ae..7dab29b8 100644 --- a/src/util/memory.hpp +++ b/src/util/memory.hpp @@ -6,12 +6,6 @@ namespace nest { namespace mc { namespace util { -#ifdef __linux__ - constexpr bool has_memory_metering = true; -#else - constexpr bool has_memory_metering = false; -#endif - // Use a signed type to store memory sizes because it can be used to store // the difference between two readings, which may be negative. // A 64 bit type is large enough to store any amount of memory that will diff --git a/src/util/power.cpp b/src/util/power.cpp new file mode 100644 index 00000000..07037ea8 --- /dev/null +++ b/src/util/power.cpp @@ -0,0 +1,32 @@ +#include <fstream> + +#include "power.hpp" + +namespace nest { +namespace mc { +namespace util { + +#ifdef NMC_HAVE_CRAY + +energy_size_type energy() { + energy_size_type result = -1; + + std::ifstream fid("/sys/cray/pm_counters/energy"); + if (fid) { + fid >> result; + } + + return result; +} + +#else + +energy_size_type energy() { + return -1; +} + +#endif + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/util/power.hpp b/src/util/power.hpp new file mode 100644 index 00000000..7c136247 --- /dev/null +++ b/src/util/power.hpp @@ -0,0 +1,17 @@ +#pragma once + +#include <cstdint> + +namespace nest { +namespace mc { +namespace util { + +// Energy in Joules (J) +using energy_size_type = std::uint64_t; + +// Returns negative value if unable to read energy +energy_size_type energy(); + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/tests/global_communication/test_mpi_gather_all.cpp b/tests/global_communication/test_mpi_gather_all.cpp index aa62b0d5..8d676a17 100644 --- a/tests/global_communication/test_mpi_gather_all.cpp +++ b/tests/global_communication/test_mpi_gather_all.cpp @@ -97,4 +97,52 @@ TEST(mpi, gather_all_with_partition) { EXPECT_EQ(expected_divisions, gathered.partition()); } +TEST(mpi, gather_string) { + using policy = mpi_global_policy; + + int id = policy::id(); + + // Make a string of variable length, with the character + // in the string distrubuted as follows + // rank string + // 0 a + // 1 bb + // 2 ccc + // 3 dddd + // ... + // 25 zzzz...zzz (26 times z) + // 26 aaaa...aaaa (27 times a) + auto make_string = [](int id) { + return std::string(id+1, 'a'+char(id%26));}; + + auto s = make_string(id); + + auto gathered = mpi::gather(s, 0); + + if (!id) { + ASSERT_TRUE(policy::size()==(int)gathered.size()); + for (std::size_t i=0; i<gathered.size(); ++i) { + EXPECT_EQ(make_string(i), gathered[i]); + } + } +} + +TEST(mpi, gather) { + using policy = mpi_global_policy; + + int id = policy::id(); + + auto gathered = mpi::gather(id, 0); + + if (!id) { + ASSERT_TRUE(policy::size()==(int)gathered.size()); + for (std::size_t i=0; i<gathered.size(); ++i) { + EXPECT_EQ(int(i), gathered[i]); + } + } + else { + EXPECT_EQ(0u, gathered.size()); + } +} + #endif // NMC_HAVE_MPI -- GitLab