diff --git a/miniapp/miniapp.cpp b/miniapp/miniapp.cpp index 2a6ef9b740a2f48921529de2fa90daaf403ce002..f21208874a9f78d30995f4fa590e37c8233a6bf1 100644 --- a/miniapp/miniapp.cpp +++ b/miniapp/miniapp.cpp @@ -16,6 +16,7 @@ #include <io/exporter_spike_file.hpp> #include <model.hpp> #include <profiling/profiler.hpp> +#include <profiling/meter_manager.hpp> #include <threading/threading.hpp> #include <util/debug.hpp> #include <util/ioutil.hpp> @@ -50,6 +51,9 @@ int main(int argc, char** argv) { nest::mc::communication::global_policy_guard global_guard(argc, argv); try { + nest::mc::util::meter_manager meters; + meters.checkpoint("start"); + std::cout << util::mask_stream(global_policy::id()==0); // read parameters io::cl_options options = io::read_options(argc, argv, global_policy::id()==0); @@ -72,6 +76,8 @@ int main(int argc, char** argv) { banner(); + meters.checkpoint("global setup"); + // determine what to attach probes to probe_distribution pdist; pdist.proportion = options.probe_ratio; @@ -110,10 +116,8 @@ int main(int argc, char** argv) { m.set_binning_policy(binning_policy, options.bin_dt); // Inject some artificial spikes, 1 per 20 neurons. - std::vector<cell_gid_type> local_sources; cell_gid_type first_spike_cell = 20*((cell_range.first+19)/20); for (auto c=first_spike_cell; c<cell_range.second; c+=20) { - local_sources.push_back(c); m.add_artificial_spike({c, 0}); } @@ -129,19 +133,7 @@ int main(int argc, char** argv) { m.attach_sampler(probe.id, make_trace_sampler(traces.back().get(), sample_dt)); } -#ifdef WITH_PROFILING - // dummy run of the model for one step to ensure that profiling is consistent - m.run(options.dt, options.dt); - // reset and add the source spikes once again - m.reset(); - for (auto source : local_sources) { - m.add_artificial_spike({source, 0}); - } -#endif - - // Initialize the spike exporting interface after the profiler dummy - // steps, to avoid having the initial seed spikes that are artificially - // injected at t=0 from being recorded and output twice. + // Initialize the spike exporting interface std::unique_ptr<file_export_type> file_exporter; if (options.spike_file_output) { if (options.single_file_per_rank) { @@ -160,9 +152,13 @@ int main(int argc, char** argv) { } } + meters.checkpoint("model initialization"); + // run model m.run(options.tfinal, options.dt); + meters.checkpoint("time stepping"); + // output profile and diagnostic feedback auto const num_steps = options.tfinal / options.dt; util::profiler_output(0.001, m.num_cells()*num_steps, options.profile_only_zero); @@ -172,6 +168,10 @@ int main(int argc, char** argv) { for (const auto& trace: traces) { write_trace_json(*trace.get(), options.trace_prefix); } + + meters.checkpoint("output"); + + util::save_to_file(meters, "meters.json"); } catch (io::usage_error& e) { // only print usage/startup errors on master diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d11ca436ed2aeb51c50d8fa11bf4e891c3e421df..2bbcfb424f90e8ef053b834b83d06a2dc77aee95 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,7 +3,10 @@ set(BASE_SOURCES cell.cpp morphology.cpp parameter_list.cpp + profiling/meter.cpp + profiling/meter_manager.cpp profiling/profiler.cpp + profiling/time_meter.cpp swcio.cpp threading/affinity.cpp util/debug.cpp diff --git a/src/communication/dryrun_global_policy.hpp b/src/communication/dryrun_global_policy.hpp index f67a603090628d4ef15c3ca287bf8c2beb7bb8fd..811c6796bda2d69804a065cd42ddbc010c9eeab7 100644 --- a/src/communication/dryrun_global_policy.hpp +++ b/src/communication/dryrun_global_policy.hpp @@ -74,6 +74,13 @@ struct dryrun_global_policy { return size()*value; } + template <typename T> + static std::vector<T> gather(T value, int) { + return std::vector<T>(size(), value); + } + + static void barrier() {} + static void setup(int& argc, char**& argv) {} static void teardown() {} diff --git a/src/communication/mpi_global_policy.hpp b/src/communication/mpi_global_policy.hpp index 38b2ce224b622a398a7c747687eb473c5099e6ae..178d52e60584af08382146257492b797f43ef3d5 100644 --- a/src/communication/mpi_global_policy.hpp +++ b/src/communication/mpi_global_policy.hpp @@ -51,6 +51,15 @@ struct mpi_global_policy { return nest::mc::mpi::reduce(value, MPI_SUM); } + template <typename T> + static std::vector<T> gather(T value, int root) { + return mpi::gather(value, root); + } + + static void barrier() { + mpi::barrier(); + } + static void setup(int& argc, char**& argv) { nest::mc::mpi::init(&argc, &argv); } diff --git a/src/communication/serial_global_policy.hpp b/src/communication/serial_global_policy.hpp index c4d2a356857168d91be5380733844505fd824e21..4fb88908111cf05b3edbd60183115c0f860d56eb 100644 --- a/src/communication/serial_global_policy.hpp +++ b/src/communication/serial_global_policy.hpp @@ -52,6 +52,13 @@ struct serial_global_policy { return value; } + template <typename T> + static std::vector<T> gather(T value, int) { + return {value}; + } + + static void barrier() {} + static void setup(int& argc, char**& argv) {} static void teardown() {} diff --git a/src/profiling/meter.cpp b/src/profiling/meter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..be55bda27d64df31bddec1f58d0d0808777d8361 --- /dev/null +++ b/src/profiling/meter.cpp @@ -0,0 +1,23 @@ +#include "meter.hpp" + +namespace nest { +namespace mc { +namespace util { + +nlohmann::json to_json(const measurement& mnt) { + nlohmann::json measurements; + for (const auto& m: mnt.measurements) { + measurements.push_back(m); + } + + return { + {"name", mnt.name}, + {"units", mnt.units}, + {"measurements", measurements} + }; +} + +} // namespace util +} // namespace mc +} // namespace nest + diff --git a/src/profiling/meter.hpp b/src/profiling/meter.hpp new file mode 100644 index 0000000000000000000000000000000000000000..69874ba97c83a2c3498be20bf08c6c0cbb1b1aee --- /dev/null +++ b/src/profiling/meter.hpp @@ -0,0 +1,62 @@ +#pragma once + +#include <string> +#include <json/json.hpp> + +namespace nest { +namespace mc { +namespace util { + +// A measurement from a meter has the following: +// * name +// * e.g. walltime or allocated-memory +// * units +// * use SI units +// * e.g. s or MiB +// * measurements +// * a vector with one entry for each checkpoint +// * each entry is a std::vector<double> of measurements gathered across +// domains at one checkpoint. +// +struct measurement { + std::string name; + std::string units; + std::vector<std::vector<double>> measurements; +}; + +// Converts a measurement to a json type for serialization to file. +// See src/profiling/meters.md for more information about the json formating. +nlohmann::json to_json(const measurement& m); + +// A meter can be used to take a measurement of resource consumption, for +// example wall time, memory or energy consumption. +// Each specialization must: +// 1) Record the resource consumption on calling meter::take_reading. +// * How and which information is recorded is implementation dependent. +// 2) Return a std::vector containing the measurements that are derived +// from the information recorded on calls to meter::take_reading. +// * The return value is a vector of measurements, because a meter +// may derive multiple measurements from the recorded checkpoint +// information. +class meter { +public: + meter() = default; + + // Provide a human readable name for the meter + virtual std::string name() = 0; + + // Take a reading/measurement of the resource + virtual void take_reading() = 0; + + // Return a summary of the recordings. + // May perform expensive operations to process and analyse the readings. + // Full output is expected only on the root domain, i.e. when + // global_policy::id()==0 + virtual std::vector<measurement> measurements() = 0; + + virtual ~meter() = default; +}; + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/profiling/meter_manager.cpp b/src/profiling/meter_manager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c84754f2c77b1d274850903974783dd1d1a30d0b --- /dev/null +++ b/src/profiling/meter_manager.cpp @@ -0,0 +1,71 @@ +#include "meter_manager.hpp" + +namespace nest { +namespace mc { +namespace util { + +meter_manager::meter_manager() { + // add time-measurement meter + meters.emplace_back(new time_meter()); + + // add memory consumption meter + // TODO + + // add energy consumption meter + // TODO +}; + +void meter_manager::checkpoint(std::string name) { + checkpoint_names.push_back(std::move(name)); + + // Enforce a global synchronization point the first time that the meters + // are used, to ensure that times measured across all domains are + // synchronised. + if (meters.size()==0) { + communication::global_policy::barrier(); + } + + for (auto& m: meters) { + m->take_reading(); + } +} + +nlohmann::json to_json(const meter_manager& manager) { + using gcom = communication::global_policy; + + nlohmann::json meter_out; + for (const auto& m: manager.meters) { + for (const auto& measure: m->measurements()) { + meter_out.push_back(to_json(measure)); + } + } + + // Only the "root" process returns meter information + if (gcom::id()==0) { + return { + {"checkpoints", manager.checkpoint_names}, + {"num_domains", gcom::size()}, + {"global_model", std::to_string(gcom::kind())}, + {"meters", meter_out}, + // TODO mapping of domains to nodes, which will be required to + // calculate the total memory and energy consumption of a + // distributed simulation. + }; + } + + return {}; +} + +void save_to_file(const meter_manager& manager, const std::string& name) { + auto measurements = to_json(manager); + if (!communication::global_policy::id()) { + std::ofstream fid; + fid.exceptions(std::ios_base::badbit | std::ios_base::failbit); + fid.open(name); + fid << std::setw(1) << measurements << "\n"; + } +} + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/profiling/meter_manager.hpp b/src/profiling/meter_manager.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a4dc97ec452f44b3d11e648e6d58d2674fd4263f --- /dev/null +++ b/src/profiling/meter_manager.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include <memory> +#include <vector> + +#include <util/make_unique.hpp> +#include <communication/global_policy.hpp> +#include <json/json.hpp> + +#include "meter.hpp" +#include "time_meter.hpp" + +namespace nest { +namespace mc { +namespace util { + +struct meter_manager { + std::vector<std::unique_ptr<meter>> meters; + std::vector<std::string> checkpoint_names; + + meter_manager(); + void checkpoint(std::string name); +}; + +nlohmann::json to_json(const meter_manager&); +void save_to_file(const meter_manager& manager, const std::string& name); + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/profiling/meters.md b/src/profiling/meters.md new file mode 100644 index 0000000000000000000000000000000000000000..afe11da26f333805cd8094058e947f36e07c09ed --- /dev/null +++ b/src/profiling/meters.md @@ -0,0 +1,27 @@ +A json record for a meter measurement is a json object. + +Each Object corresponds to a derived measurement: + * `name`: a string describing the measurement + * `units`: a string with SI units for measurements + * `measurements`: a json Array of measurements, with one + entry per checkpoint (corresponding to a call to + meter::take_reading) + * each measurement is itself a numeric array, with one + recording for each domain in the global communicator + +For example, the output of a meter for measuring wall time where 5 readings +were taken on 4 MPI ranks could be represented as follows: + +```json + { + "name": "walltime", + "units": "s", + "measurements": [ + [ 0, 0, 0, 0, ], + [ 0.001265837, 0.001344004, 0.001299362, 0.001195762, ], + [ 0.014114013, 0.015045662, 0.015071675, 0.014209514, ], + [ 1.491986631, 1.491121134, 1.490957219, 1.492064233, ], + [ 0.00565307, 0.004375347, 0.002228206, 0.002483978, ] + ] + } +``` diff --git a/src/profiling/profiler.cpp b/src/profiling/profiler.cpp index a6f08e96f579c142f579d550b450fe56e55d02b7..6452369e7945052e006089e80d5a74801887f823 100644 --- a/src/profiling/profiler.cpp +++ b/src/profiling/profiler.cpp @@ -15,14 +15,10 @@ namespace mc { namespace util { // Here we provide functionality that the profiler can use to control the CUDA -// profiler nvprof. The cudaStartProfiler and cudaStopProfiler API calls are -// provided to let a program control which parts of the program are to be -// profiled. -// Here are some wrappers that the NestMC profiler restrict nvprof to recording -// only the time intervals that the user requests when they start and stop the -// profiler. -// It is a simple wrapper around the API calls with a mutex to ensure correct -// behaviour when multiple threads attempt to start or stop the profiler. +// profiler nvprof. The start_nvprof and stop_nvprof calls are provided to let +// a program control which parts of the program are to be profiled. It is a +// simple wrapper around the API calls with a mutex to ensure correct behaviour +// when multiple threads attempt to start or stop the profiler. #ifdef NMC_HAVE_GPU namespace gpu { bool is_running_nvprof = false; @@ -51,9 +47,9 @@ namespace gpu { } #endif -///////////////////////////////////////////////////////// -// profiler_node -///////////////////////////////////////////////////////// +// +// profiler_node implementation +// void profiler_node::print(int indent) { std::string s = std::string(indent, ' ') + name; std::cout << s @@ -178,9 +174,9 @@ bool operator== (const profiler_node& lhs, const profiler_node& rhs) { return lhs.name == rhs.name; } -///////////////////////////////////////////////////////// -// region_type -///////////////////////////////////////////////////////// +// +// region_type implementation +// region_type* region_type::subregion(const char* n) { size_t hsh = impl::hash(n); auto s = subregions_.find(hsh); @@ -234,9 +230,9 @@ profiler_node region_type::populate_performance_tree() const { return tree; } -///////////////////////////////////////////////////////// -// region_type -///////////////////////////////////////////////////////// +// +// profiler implementation +// void profiler::enter(const char* name) { if (!is_activated()) return; current_region_ = current_region_->subregion(name); @@ -435,7 +431,7 @@ void profiler_output(double threshold, std::size_t num_local_work_items, bool pr as_json["regions"] = p.as_json(); if (output_this_rank) { - auto fname = std::string("profile_" + std::to_string(comm_rank)); + auto fname = std::string("profile_" + std::to_string(comm_rank) + ".json"); std::ofstream fid(fname); fid << std::setw(1) << as_json; } diff --git a/src/profiling/time_meter.cpp b/src/profiling/time_meter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c29bf12b157dd2bcd3730be307a4a6cab830bf95 --- /dev/null +++ b/src/profiling/time_meter.cpp @@ -0,0 +1,67 @@ +#include <string> +#include <vector> + +#ifdef NMC_HAVE_GPU + #include <cuda_runtime.h> +#endif + +#include "time_meter.hpp" +#include <communication/global_policy.hpp> + +namespace nest { +namespace mc { +namespace util { + +std::string time_meter::name() { + return "time"; +} + +void time_meter::take_reading() { + // Wait for execution on this global domain to finish before recording the + // time stamp. For now this means waiting for all work to finish executing + // on the GPU (if GPU support is enabled) +#ifdef NMC_HAVE_GPU + cudaDeviceSynchronize(); +#endif + + // Record the time stamp + readings_.push_back(timer_type::tic()); + + // Enforce a global barrier after taking the time stamp + communication::global_policy::barrier(); +} + +std::vector<measurement> time_meter::measurements() { + using gcom = communication::global_policy; + + // Calculate the elapsed time on the local domain for each interval, + // and store them in the times vector. + std::vector<double> times; + times.push_back(0); + for (auto i=1u; i<readings_.size(); ++i) { + double t = timer_type::difference(readings_[i-1], readings_[i]); + times.push_back(t); + } + + // Assert that the same number of readings were taken on every domain. + const auto num_readings = times.size(); + if (gcom::min(num_readings)!=gcom::max(num_readings)) { + throw std::out_of_range( + "the number of checkpoints in the \"time\" meter do not match across domains"); + } + + // Gather the timers from accross all of the domains onto the root domain. + // Note: results are only valid on the root domain on completion. + measurement results; + results.name = "walltime"; + results.units = "s"; + for (auto t: times) { + results.measurements.push_back(gcom::gather(t, 0)); + } + + return {results}; +} + +} // namespace util +} // namespace mc +} // namespace nest diff --git a/src/profiling/time_meter.hpp b/src/profiling/time_meter.hpp new file mode 100644 index 0000000000000000000000000000000000000000..f9522af00e8a9e9fddfe74d0cd03f146ef0f4a54 --- /dev/null +++ b/src/profiling/time_meter.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include <string> +#include <vector> +#include <json/json.hpp> + +#include "meter.hpp" +#include "profiler.hpp" + +namespace nest { +namespace mc { +namespace util { + +class time_meter : public meter { + std::vector<timer_type::time_point> readings_; + +public: + std::string name() override; + void take_reading() override; + virtual std::vector<measurement> measurements() override; +}; + +} // namespace util +} // namespace mc +} // namespace nest +