diff --git a/miniapp/io.cpp b/miniapp/io.cpp index 97962f2931d19d673be5c0a35aa35c370bfd165f..887848b549b7bad46574b92ac9fcde74faae5d5c 100644 --- a/miniapp/io.cpp +++ b/miniapp/io.cpp @@ -8,7 +8,7 @@ namespace io { // for now this is just a placeholder options read_options(std::string fname) { // 10 cells, 1 synapses per cell, 10 compartments per segment - return {10, 1, 100}; + return {200, 1, 100}; } std::ostream& operator<<(std::ostream& o, const options& opt) { diff --git a/miniapp/miniapp.cpp b/miniapp/miniapp.cpp index 15aae8e349227d90f6cabcf0ae1aedbf070d9ccf..18326d8db4df56bece5f422efe05c9590e82a684 100644 --- a/miniapp/miniapp.cpp +++ b/miniapp/miniapp.cpp @@ -11,7 +11,6 @@ #include <profiling/profiler.hpp> #include <communication/communicator.hpp> #include <communication/serial_global_policy.hpp> -#include <communication/mpi_global_policy.hpp> using namespace nest; @@ -21,6 +20,7 @@ using id_type = uint32_t; using numeric_cell = mc::fvm::fvm_cell<real_type, index_type>; using cell_group = mc::cell_group<numeric_cell>; #ifdef WITH_MPI +#include <communication/mpi_global_policy.hpp> using communicator_type = mc::communication::communicator<mc::communication::mpi_global_policy>; #else @@ -32,17 +32,6 @@ struct model { communicator_type communicator; std::vector<cell_group> cell_groups; - double time_init; - double time_network; - double time_solve; - double time_comms; - void print_times() const { - std::cout << "initialization took " << time_init << " s\n"; - std::cout << "network took " << time_network << " s\n"; - std::cout << "solve took " << time_solve << " s\n"; - std::cout << "comms took " << time_comms << " s\n"; - } - int num_groups() const { return cell_groups.size(); } @@ -50,30 +39,32 @@ struct model { void run(double tfinal, double dt) { auto t = 0.; auto delta = communicator.min_delay(); - time_solve = 0.; - time_comms = 0.; while(t<tfinal) { - auto start_solve = mc::util::timer_type::tic(); mc::threading::parallel_for::apply( 0, num_groups(), [&](int i) { + mc::util::profiler_enter("stepping","events"); cell_groups[i].enqueue_events(communicator.queue(i)); + mc::util::profiler_leave(); cell_groups[i].advance(t+delta, dt); + mc::util::profiler_enter("events"); communicator.add_spikes(cell_groups[i].spikes()); cell_groups[i].clear_spikes(); + mc::util::profiler_leave(2); } ); - time_solve += mc::util::timer_type::toc(start_solve); - auto start_comms = mc::util::timer_type::tic(); + mc::util::profiler_enter("stepping", "exchange"); communicator.exchange(); - time_comms += mc::util::timer_type::toc(start_comms); + mc::util::profiler_leave(2); t += delta; } } void init_communicator() { + mc::util::profiler_enter("setup", "communicator"); + // calculate the source and synapse distribution serially std::vector<id_type> target_counts(num_groups()); std::vector<id_type> source_counts(num_groups()); @@ -87,9 +78,12 @@ struct model { // create connections communicator = communicator_type(num_groups(), target_counts); + + mc::util::profiler_leave(2); } void update_gids() { + mc::util::profiler_enter("setup", "globalize"); auto com_policy = communicator.communication_policy(); auto global_source_map = com_policy.make_map(source_map.back()); auto domain_idx = communicator.domain_id(); @@ -97,6 +91,7 @@ struct model { cell_groups[i].set_source_gids(source_map[i]+global_source_map[domain_idx]); cell_groups[i].set_target_gids(target_map[i]+communicator.target_gid_from_group_lid(0)); } + mc::util::profiler_leave(2); } // TODO : only stored here because init_communicator() and update_gids() are split @@ -162,7 +157,7 @@ int main(int argc, char** argv) { // // time stepping // - auto tfinal = 20.; + auto tfinal = 10.; auto dt = 0.01; auto id = m.communicator.domain_id(); @@ -174,7 +169,7 @@ int main(int argc, char** argv) { m.run(tfinal, dt); if (!id) { - //mc::util::data::profilers_.local().performance_tree().print(std::cout, 0.001); + mc::util::profiler_output(0.00001); std::cout << "there were " << m.communicator.num_spikes() << " spikes\n"; } @@ -206,7 +201,6 @@ void ring_model(nest::mc::io::options& opt, model& m) { auto basic_cell = make_cell(opt.compartments_per_segment, 1); // make a vector for storing all of the cells - auto start_init = mc::util::timer_type::tic(); m.cell_groups = std::vector<cell_group>(opt.cells); // initialize the cells in parallel @@ -214,15 +208,17 @@ void ring_model(nest::mc::io::options& opt, model& m) { 0, opt.cells, [&](int i) { // initialize cell + mc::util::profiler_enter("setup"); + mc::util::profiler_enter("make cell"); m.cell_groups[i] = make_lowered_cell(i, basic_cell); + mc::util::profiler_leave(); + mc::util::profiler_leave(); } ); - m.time_init = mc::util::timer_type::toc(start_init); // // network creation // - auto start_network = mc::util::timer_type::tic(); m.init_communicator(); for (auto i=0u; i<(id_type)opt.cells; ++i) { @@ -232,18 +228,13 @@ void ring_model(nest::mc::io::options& opt, model& m) { }); } - m.communicator.construct(); - m.update_gids(); - - m.time_network = mc::util::timer_type::toc(start_network); } void all_to_all_model(nest::mc::io::options& opt, model& m) { // // make cells // - auto timer = mc::util::timer_type(); // make a basic cell auto basic_cell = make_cell(opt.compartments_per_segment, opt.cells-1); @@ -262,16 +253,18 @@ void all_to_all_model(nest::mc::io::options& opt, model& m) { mc::threading::parallel_for::apply( 0, ncell_local, [&](int i) { + mc::util::profiler_enter("setup", "cells"); m.cell_groups[i] = make_lowered_cell(i, basic_cell); + mc::util::profiler_leave(2); } ); // // network creation // - auto start_network = timer.tic(); m.init_communicator(); + mc::util::profiler_enter("setup", "connections"); // lid is local cell/group id for (auto lid=0u; lid<ncell_local; ++lid) { auto target = m.communicator.target_gid_from_group_lid(lid); @@ -288,10 +281,9 @@ void all_to_all_model(nest::mc::io::options& opt, model& m) { } m.communicator.construct(); + mc::util::profiler_leave(2); m.update_gids(); - - m.time_network = timer.toc(start_network); } /////////////////////////////////////// @@ -333,8 +325,8 @@ mc::cell make_cell(int compartments_per_segment, int num_synapses) { // add dendrite of length 200 um and diameter 1 um with passive channel std::vector<mc::cable_segment*> dendrites; dendrites.push_back(cell.add_cable(0, mc::segmentKind::dendrite, 0.5, 0.5, 200)); - dendrites.push_back(cell.add_cable(1, mc::segmentKind::dendrite, 0.5, 0.25, 100)); - dendrites.push_back(cell.add_cable(1, mc::segmentKind::dendrite, 0.5, 0.25, 100)); + //dendrites.push_back(cell.add_cable(1, mc::segmentKind::dendrite, 0.5, 0.25, 100)); + //dendrites.push_back(cell.add_cable(1, mc::segmentKind::dendrite, 0.5, 0.25, 100)); for (auto d : dendrites) { d->add_mechanism(mc::pas_parameters()); diff --git a/src/cell_group.hpp b/src/cell_group.hpp index 85a0832bf24a5720951b35db392133e2d877a312..4964ad10a2328c96593f9e6b07b136baa7ca7fcd 100644 --- a/src/cell_group.hpp +++ b/src/cell_group.hpp @@ -7,6 +7,8 @@ #include <communication/spike.hpp> #include <communication/spike_source.hpp> +#include <profiling/profiler.hpp> + namespace nest { namespace mc { @@ -76,6 +78,7 @@ class cell_group { // integrate cell state cell_.advance(tnext - cell_.time()); + nest::mc::util::profiler_enter("events"); // check for new spikes for (auto& s : spike_sources_) { if (auto spike = s.source.test(cell_, cell_.time())) { @@ -93,6 +96,7 @@ class cell_group { cell_.apply_event(e.get()); } } + nest::mc::util::profiler_leave(); } } diff --git a/src/fvm_cell.hpp b/src/fvm_cell.hpp index 4bd215716b2b77471e8c488925f3c54f6105e165..c89dcdf0c3e920b4aa370f71a48767f4e20d1040 100644 --- a/src/fvm_cell.hpp +++ b/src/fvm_cell.hpp @@ -17,10 +17,12 @@ #include <segment.hpp> #include <stimulus.hpp> #include <util.hpp> +#include <profiling/profiler.hpp> #include <vector/include/Vector.hpp> #include <mechanisms/expsyn.hpp> + namespace nest { namespace mc { namespace fvm { @@ -113,9 +115,6 @@ class fvm_cell { /// make a time step void advance(value_type dt); - /// advance solution to target time tfinal with maximum step size dt - void advance_to(value_type tfinal, value_type dt); - /// pass an event to the appropriate synapse and call net_receive void apply_event(local_event e) { mechanisms_[synapse_index_]->net_receive(e.target, e.weight); @@ -479,12 +478,15 @@ void fvm_cell<T, I>::advance(T dt) { using memory::all; + mc::util::profiler_enter("current"); current_(all) = 0.; // update currents from ion channels for(auto& m : mechanisms_) { + mc::util::profiler_enter(m->name().c_str()); m->set_params(t_, dt); m->nrn_current(); + mc::util::profiler_leave(); } // add current contributions from stimulii @@ -495,42 +497,29 @@ void fvm_cell<T, I>::advance(T dt) // the factor of 100 scales the injected current to 10^2.nA current_[loc] -= 100.*ie/cv_areas_[loc]; } + mc::util::profiler_leave(); + mc::util::profiler_enter("matrix", "setup"); // solve the linear system setup_matrix(dt); + mc::util::profiler_leave(); mc::util::profiler_enter("solve"); matrix_.solve(); + mc::util::profiler_leave(); voltage_(all) = matrix_.rhs(); + mc::util::profiler_leave(); + mc::util::profiler_enter("state"); // integrate state of gating variables etc. for(auto& m : mechanisms_) { + mc::util::profiler_enter(m->name().c_str()); m->nrn_state(); + mc::util::profiler_leave(); } + mc::util::profiler_leave(); t_ += dt; } -/* -template <typename T, typename I> -void fvm_cell<T, I>::advance_to(T tfinal, T dt) -{ - if(t_>=tfinal) { - return; - } - - do { - auto tstep = std::min(tfinal, t_+dt); - auto next = events_.pop_if_before(tstep); - auto tnext = next? next->time: tstep; - - advance(tnext-t_); - t_ = tnext; - if (next) { // handle event - mechanisms_[synapse_index_]->net_receive(next->target, next->weight); - } - } while(t_<tfinal); -} -*/ - } // namespace fvm } // namespace mc } // namespace nest diff --git a/src/profiling/profiler.hpp b/src/profiling/profiler.hpp index 7b82f00323baf7a3797d06ce76e54fb357738c6b..8af4e334535f50e9b985a2ef2dd37da33c67106c 100644 --- a/src/profiling/profiler.hpp +++ b/src/profiling/profiler.hpp @@ -18,11 +18,11 @@ namespace nest { namespace mc { namespace util { -static inline std::string green(std::string s) { return s; } -static inline std::string yellow(std::string s) { return s; } -static inline std::string white(std::string s) { return s; } -static inline std::string red(std::string s) { return s; } -static inline std::string cyan(std::string s) { return s; } +inline std::string green(std::string s) { return s; } +inline std::string yellow(std::string s) { return s; } +inline std::string white(std::string s) { return s; } +inline std::string red(std::string s) { return s; } +inline std::string cyan(std::string s) { return s; } namespace impl { @@ -30,7 +30,7 @@ namespace impl { /// - for easy comparison of strings over MPI /// - for fast searching of regions named with strings static inline - size_t hash(char* s) { + size_t hash(const char* s) { size_t h = 5381; while (*s) { @@ -143,7 +143,7 @@ namespace impl { n.print_sub(stream, indent + 2, threshold, total); } } - if (other >= threshold && children.size()) { + if (other>=std::max(threshold, 0.01) && children.size()) { label = indent_str + " other"; percentage = 100.*other/total; snprintf(buffer, sizeof(buffer), "%-25s%10.3f%10.1f", @@ -188,14 +188,10 @@ public: using profiler_node = impl::profiler_node; explicit region_type(std::string n) : - //name_(std::move(n)) - name_(n) + name_(std::move(n)) { - std::cout << "creating region " << name_ << "\n"; start_time_ = timer_type::tic(); - std::cout << " started timer " << name_ << "\n"; hash_ = impl::hash(n); - std::cout << " hashed " << name_ << " -> " << hash_ << "\n"; } explicit region_type(const char* n) : @@ -304,21 +300,26 @@ public: void enter(const char* name) { if (!is_activated()) return; - auto start = timer_type::tic(); current_region_ = current_region_->subregion(name); current_region_->start_time(); - self_time_ += timer_type::toc(start); } void leave() { if (!is_activated()) return; - auto start = timer_type::tic(); if (current_region_->parent()==nullptr) { throw std::out_of_range("attempt to leave root memory tracing region"); } current_region_->end_time(); current_region_ = current_region_->parent(); - self_time_ += timer_type::toc(start); + } + + // step up multiple n levels in one call + void leave(int n) { + EXPECTS(n>=1); + + while(n--) { + leave(); + } } region_type& regions() { @@ -329,10 +330,6 @@ public: return current_region_; } - double self_time() const { - return self_time_; - } - bool is_in_root() const { return &root_region_ == current_region_; } @@ -348,19 +345,28 @@ public: ); } activate(); + start_time_ = timer_type::tic(); root_region_.start_time(); } void stop() { if (!is_in_root()) { throw std::out_of_range( - "attempt to profiler that is not in the root region" + "profiler must be in root region when stopped" ); } root_region_.end_time(); + stop_time_ = timer_type::tic(); + deactivate(); } + timer_type::time_point start_time() const { return start_time_; } + timer_type::time_point stop_time() const { return stop_time_; } + double wall_time() const { + return timer_type::difference(start_time_, stop_time_); + } + region_type::profiler_node performance_tree() { if (is_activated()) { stop(); @@ -372,18 +378,13 @@ private: void activate() { activated_ = true; } void deactivate() { activated_ = false; } + timer_type::time_point start_time_; + timer_type::time_point stop_time_; bool activated_ = false; region_type root_region_; region_type* current_region_ = &root_region_; - double self_time_ = 0.; }; -namespace data { - using profiler_wrapper = nest::mc::threading::enumerable_thread_specific<profiler>; - profiler_wrapper profilers_(profiler("root")); -} - -/* #ifdef WITH_PROFILING namespace data { using profiler_wrapper = nest::mc::threading::enumerable_thread_specific<profiler>; @@ -408,9 +409,19 @@ inline void profiler_stop() { inline void profiler_enter(const char* n) { get_profiler().enter(n); } + +template <class...Args> +void profiler_enter(const char* n, Args... args) { + get_profiler().enter(n); + profiler_enter(args...); +} + inline void profiler_leave() { get_profiler().leave(); } +inline void profiler_leave(int nlevels) { + get_profiler().leave(nlevels); +} // iterate over all profilers and ensure that they have the same start stop times inline void stop_profilers() { @@ -420,14 +431,28 @@ inline void stop_profilers() { } } +inline void profiler_output(double threshold) { + stop_profilers(); + auto p = impl::profiler_node(0, "results"); + for(auto& thread_profiler : data::profilers_) { + std::cout << "fusing profiler : " << thread_profiler.wall_time() << " s\n"; + p.fuse(thread_profiler.performance_tree()); + } + p.print(std::cout, threshold); +} + #else -*/ inline void profiler_start() {} inline void profiler_stop() {} -inline void profiler_enter(const char* n) {} +inline void profiler_enter(const char*) {} +template <class...Args> +void profiler_enter(const char*, Args... args) {} +inline void profiler_enter(const char*, const char*, const char*) {} inline void profiler_leave() {} +inline void profiler_leave(int) {} inline void stop_profilers() {} -//#endif +inline void profiler_output(double threshold) {} +#endif } // namespace util } // namespace mc diff --git a/src/threading/serial.hpp b/src/threading/serial.hpp index 0af901fbc16be94dabbbc3c159a5aae01159556f..3a2bc0ad13aa922dd28829f7c6114d1a14ccdefa 100644 --- a/src/threading/serial.hpp +++ b/src/threading/serial.hpp @@ -31,29 +31,16 @@ class enumerable_thread_specific { data{std::move(init)} {} - T& local() { - return data[0]; - } - const T& local() const { - return data[0]; - } + T& local() { return data[0]; } + const T& local() const { return data[0]; } - auto begin() -> decltype(data.begin()) - { - return data.begin(); - } - auto end() -> decltype(data.end()) - { - return data.end(); - } - auto cbegin() -> decltype(data.cbegin()) - { - return data.cbegin(); - } - auto cend() -> decltype(data.cend()) - { - return data.cend(); - } + auto size() -> decltype(data.size()) const { return data.size(); } + + auto begin() -> decltype(data.begin()) { return data.begin(); } + auto end() -> decltype(data.end()) { return data.end(); } + + auto cbegin() -> decltype(data.cbegin()) const { return data.cbegin(); } + auto cend() -> decltype(data.cend()) const { return data.cend(); } }; @@ -77,17 +64,17 @@ std::string description() { struct timer { using time_point = std::chrono::time_point<std::chrono::system_clock>; - static - inline time_point tic() - { + static inline time_point tic() { return std::chrono::system_clock::now(); } - static - inline double toc(time_point t) - { + static inline double toc(time_point t) { return std::chrono::duration<double>(tic() - t).count(); } + + static inline double difference(time_point b, time_point e) { + return std::chrono::duration<double>(e-b).count(); + } }; diff --git a/src/threading/tbb.hpp b/src/threading/tbb.hpp index 32097586169f794c90baafdbd948f8b60df025d6..a36f2b876b788a3d7c2e52477e652fc4d66c49c9 100644 --- a/src/threading/tbb.hpp +++ b/src/threading/tbb.hpp @@ -32,17 +32,17 @@ std::string description() { struct timer { using time_point = tbb::tick_count; - static - inline time_point tic() - { + static inline time_point tic() { return tbb::tick_count::now(); } - static - inline double toc(time_point t) - { + static inline double toc(time_point t) { return (tic() - t).seconds(); } + + static inline double difference(time_point b, time_point e) { + return (e-b).seconds(); + } }; } // threading