diff --git a/CMakeLists.txt b/CMakeLists.txt index f91e332eecdd77b327846951ed9271d740e926df..bc45593eb2451be9818e2b75671d91341647e4e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,8 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(WITH_TBB OFF CACHE BOOL "use TBB for on-node threading" ) if(WITH_TBB) find_package(TBB REQUIRED) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWITH_TBB ${TBB_DEFINITIONS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TBB_DEFINITIONS}") + add_definitions(-DWITH_TBB) endif() # MPI support @@ -37,6 +38,12 @@ if(WITH_MPI) set_property(DIRECTORY APPEND_STRING PROPERTY COMPILE_OPTIONS "${MPI_C_COMPILE_FLAGS}") endif() +# Profiler support +set(WITH_PROFILING OFF CACHE BOOL "use built in profiling of miniapp" ) +if(WITH_PROFILING) + add_definitions(-DWITH_PROFILING) +endif() + # Cray systems set(SYSTEM_CRAY OFF CACHE BOOL "add flags for compilation on Cray systems") if(SYSTEM_CRAY) diff --git a/miniapp/io.cpp b/miniapp/io.cpp index e1a059ae1c072275d43211c84550d7ccb6b5c74e..97962f2931d19d673be5c0a35aa35c370bfd165f 100644 --- a/miniapp/io.cpp +++ b/miniapp/io.cpp @@ -8,7 +8,7 @@ namespace io { // for now this is just a placeholder options read_options(std::string fname) { // 10 cells, 1 synapses per cell, 10 compartments per segment - return {100, 1, 100}; + return {10, 1, 100}; } std::ostream& operator<<(std::ostream& o, const options& opt) { diff --git a/miniapp/miniapp.cpp b/miniapp/miniapp.cpp index 5f2082e1b41945ecd0452a27984b6dc34bcb20b8..15aae8e349227d90f6cabcf0ae1aedbf070d9ccf 100644 --- a/miniapp/miniapp.cpp +++ b/miniapp/miniapp.cpp @@ -6,11 +6,12 @@ #include <mechanism_interface.hpp> #include "io.hpp" -#include "threading/threading.hpp" -#include "profiling/profiler.hpp" -#include "communication/communicator.hpp" -#include "communication/serial_global_policy.hpp" -#include "communication/mpi_global_policy.hpp" + +#include <threading/threading.hpp> +#include <profiling/profiler.hpp> +#include <communication/communicator.hpp> +#include <communication/serial_global_policy.hpp> +#include <communication/mpi_global_policy.hpp> using namespace nest; @@ -142,9 +143,13 @@ int main(int argc, char** argv) { mc::io::options opt; try { opt = mc::io::read_options(""); + #ifdef WITH_MPI if (mc::mpi::rank()==0) { std::cout << opt << "\n"; } + #else + std::cout << opt << "\n"; + #endif } catch (std::exception e) { std::cerr << e.what() << std::endl; @@ -152,13 +157,12 @@ int main(int argc, char** argv) { } model m; - //ring_model(opt, m); all_to_all_model(opt, m); - ///////////////////////////////////////////////////// + // // time stepping - ///////////////////////////////////////////////////// - auto tfinal = 50.; + // + auto tfinal = 20.; auto dt = 0.01; auto id = m.communicator.domain_id(); @@ -168,8 +172,9 @@ int main(int argc, char** argv) { } m.run(tfinal, dt); + if (!id) { - m.print_times(); + //mc::util::data::profilers_.local().performance_tree().print(std::cout, 0.001); std::cout << "there were " << m.communicator.num_spikes() << " spikes\n"; } @@ -244,7 +249,6 @@ void all_to_all_model(nest::mc::io::options& opt, model& m) { auto basic_cell = make_cell(opt.compartments_per_segment, opt.cells-1); // make a vector for storing all of the cells - auto start_init = timer.tic(); id_type ncell_global = opt.cells; id_type ncell_local = ncell_global / m.communicator.num_domains(); int remainder = ncell_global - (ncell_local*m.communicator.num_domains()); @@ -261,7 +265,6 @@ void all_to_all_model(nest::mc::io::options& opt, model& m) { m.cell_groups[i] = make_lowered_cell(i, basic_cell); } ); - m.time_init = timer.toc(start_init); // // network creation diff --git a/src/profiling/profiler.hpp b/src/profiling/profiler.hpp index 11fbfb4fe34c9894f87c5c6fb6ec4103f4d31c60..7b82f00323baf7a3797d06ce76e54fb357738c6b 100644 --- a/src/profiling/profiler.hpp +++ b/src/profiling/profiler.hpp @@ -26,54 +26,53 @@ static inline std::string cyan(std::string s) { return s; } namespace impl { + /// simple hashing function for strings + /// - for easy comparison of strings over MPI + /// - for fast searching of regions named with strings static inline - size_t hash(std::string const& s) - { - size_t h = 5381; - for(auto c: s) { - h = ((h << 5) + h) + int(c); - } - return h; - } - - static inline - size_t hash(char* s) - { + size_t hash(char* s) { size_t h = 5381; - while(*s) { + while (*s) { h = ((h << 5) + h) + int(*s); ++s; } return h; } + /// std::string overload for hash + static inline + size_t hash(const std::string& s) { + return hash(s.c_str()); + } + struct profiler_node { double value; std::string name; std::vector<profiler_node> children; - profiler_node() - : value(0.), name("") + profiler_node() : + value(0.), name("") {} - profiler_node(double v, std::string const& n) - : value(v), name(n) + profiler_node(double v, const std::string& n) : + value(v), name(n) {} - void print(int indent=0) - { + void print(int indent=0) { std::string s = std::string(indent, ' ') + name; std::cout << s << std::string(60-s.size(), '.') << value << "\n"; - for(auto &n: children) { + for (auto& n : children) { n.print(indent+2); } } - friend profiler_node operator +(profiler_node const& lhs, profiler_node const& rhs) + friend profiler_node operator+ ( + const profiler_node& lhs, + const profiler_node& rhs) { assert(lhs.name == rhs.name); auto node = lhs; @@ -81,26 +80,28 @@ namespace impl { return node; } - friend bool operator ==(profiler_node const& lhs, profiler_node const& rhs) + friend bool operator== ( + const profiler_node& lhs, + const profiler_node& rhs) { return lhs.name == rhs.name; } - void print(std::ostream& stream, double threshold) - { + void print(std::ostream& stream, double threshold) { // convert threshold from proportion to time threshold *= value; print_sub(stream, 0, threshold, value); } - void print_sub(std::ostream& stream, - int indent, - double threshold, - double total) + void print_sub( + std::ostream& stream, + int indent, + double threshold, + double total) { char buffer[512]; - if(value < threshold) { + if (value < threshold) { std::cout << green("not printing ") << name << std::endl; return; } @@ -108,7 +109,7 @@ namespace impl { auto max_contribution = std::accumulate( children.begin(), children.end(), -1., - [] (double lhs, profiler_node const& rhs) { + [] (double lhs, const profiler_node& rhs) { return lhs > rhs.value ? lhs : rhs.value; } ); @@ -125,24 +126,24 @@ namespace impl { threshold==0. ? children.size()>0 : max_contribution >= threshold; - if(print_children) { + if (print_children) { stream << white(buffer) << std::endl; } else { stream << buffer << std::endl; } - if(print_children) { + if (print_children) { auto other = 0.; - for(auto &n : children) { - if(n.value<threshold || n.name=="other") { + for (auto &n : children) { + if (n.value<threshold || n.name=="other") { other += n.value; } else { n.print_sub(stream, indent + 2, threshold, total); } } - if(other >= threshold && children.size()) { + if (other >= threshold && children.size()) { label = indent_str + " other"; percentage = 100.*other/total; snprintf(buffer, sizeof(buffer), "%-25s%10.3f%10.1f", @@ -152,12 +153,10 @@ namespace impl { } } - void fuse(profiler_node const& other) - { - for(auto const& n : other.children) { - // linear search isn't ideal... - auto const it = std::find(children.begin(), children.end(), n); - if(it!=children.end()) { + void fuse(const profiler_node& other) { + for (auto& n : other.children) { + auto it = std::find(children.begin(), children.end(), n); + if (it!=children.end()) { (*it).fuse(n); } else { @@ -167,10 +166,7 @@ namespace impl { value += other.value; } - }; - - } // namespace impl using timer_type = nest::mc::threading::timer; @@ -183,10 +179,7 @@ class region_type { region_type *parent_ = nullptr; std::string name_; size_t hash_; - std::unordered_map< - size_t, - std::unique_ptr<region_type> - > subregions_; + std::unordered_map<size_t, std::unique_ptr<region_type>> subregions_; timer_type::time_point start_time_; double total_time_ = 0; @@ -194,24 +187,33 @@ public: using profiler_node = impl::profiler_node; - explicit region_type(std::string const& n) - : name_(n) + explicit region_type(std::string n) : + //name_(std::move(n)) + name_(n) { + std::cout << "creating region " << name_ << "\n"; start_time_ = timer_type::tic(); + std::cout << " started timer " << name_ << "\n"; hash_ = impl::hash(n); + std::cout << " hashed " << name_ << " -> " << hash_ << "\n"; } - - explicit region_type(const char* n) - : region_type(std::string(n)) + explicit region_type(const char* n) : + region_type(std::string(n)) {} - std::string const& name() const { + region_type(std::string n, region_type* p) : + region_type(std::move(n)) + { + parent_ = p; + } + + const std::string& name() const { return name_; } - void name(std::string const& n) { - name_ = n; + void name(std::string n) { + name_ = std::move(n); } region_type* parent() { @@ -221,33 +223,25 @@ public: void start_time() { start_time_ = timer_type::tic(); } void end_time () { total_time_ += timer_type::toc(start_time_); } - region_type(std::string const& n, region_type* p) - : region_type(n) - { - parent_ = p; - } - bool has_subregions() const { return subregions_.size() > 0; } - size_t hash () const { + size_t hash() const { return hash_; } - region_type* subregion(const char* n) - { + region_type* subregion(const char* n) { size_t hsh = impl::hash(n); auto s = subregions_.find(hsh); - if(s == subregions_.end()) { + if (s == subregions_.end()) { subregions_[hsh] = util::make_unique<region_type>(n, this); return subregions_[hsh].get(); } return s->second.get(); } - double subregion_contributions() const - { + double subregion_contributions() const { return std::accumulate( subregions_.begin(), subregions_.end(), 0., @@ -257,27 +251,26 @@ public: ); } - double total() const - { + double total() const { return total_time_; } profiler_node populate_performance_tree() const { profiler_node tree(total(), name()); - for(auto &it : subregions_) { + for (auto &it : subregions_) { tree.children.push_back(it.second->populate_performance_tree()); } // sort the contributions in descending order std::stable_sort( tree.children.begin(), tree.children.end(), - [](profiler_node const& lhs, profiler_node const& rhs) { + [](const profiler_node& lhs, const profiler_node& rhs) { return lhs.value>rhs.value; } ); - if(tree.children.size()) { + if (tree.children.size()) { // find the contribution of parts of the code that were not explicitly profiled auto contributions = std::accumulate( @@ -296,34 +289,31 @@ public: } }; -class Profiler { +class profiler { public: - Profiler(std::string const& name) - : root_region_(name) - { } + profiler(std::string name) : + root_region_(std::move(name)) + {} // the copy constructor doesn't do a "deep copy" - // it simply creates a new Profiler with the same name + // it simply creates a new profiler with the same name // This is needed for tbb to create a list of thread local profilers - Profiler(Profiler const& other) - : Profiler(other.root_region_.name()) + profiler(const profiler& other) : + profiler(other.root_region_.name()) {} - void enter(const char* name) - { - if(!is_activated()) return; + void enter(const char* name) { + if (!is_activated()) return; auto start = timer_type::tic(); current_region_ = current_region_->subregion(name); current_region_->start_time(); self_time_ += timer_type::toc(start); } - void leave() - { - if(!is_activated()) return; + void leave() { + if (!is_activated()) return; auto start = timer_type::tic(); - if(current_region_->parent()==nullptr) { - std::cout << "error" << std::endl; + if (current_region_->parent()==nullptr) { throw std::out_of_range("attempt to leave root memory tracing region"); } current_region_->end_time(); @@ -331,23 +321,19 @@ public: self_time_ += timer_type::toc(start); } - region_type& regions() - { + region_type& regions() { return root_region_; } - region_type* current_region() - { + region_type* current_region() { return current_region_; } - double self_time() const - { + double self_time() const { return self_time_; } - bool is_in_root() const - { + bool is_in_root() const { return &root_region_ == current_region_; } @@ -356,7 +342,7 @@ public: } void start() { - if(is_activated()) { + if (is_activated()) { throw std::out_of_range( "attempt to start an already running profiler" ); @@ -366,25 +352,25 @@ public: } void stop() { - if(!is_in_root()) { + if (!is_in_root()) { throw std::out_of_range( "attempt to profiler that is not in the root region" ); } root_region_.end_time(); - disactivate(); + deactivate(); } region_type::profiler_node performance_tree() { - if(is_activated()) { + if (is_activated()) { stop(); } return root_region_.populate_performance_tree(); } private: - void activate() { activated_ = true; } - void disactivate() { activated_ = false; } + void activate() { activated_ = true; } + void deactivate() { activated_ = false; } bool activated_ = false; region_type root_region_; @@ -392,6 +378,57 @@ private: double self_time_ = 0.; }; +namespace data { + using profiler_wrapper = nest::mc::threading::enumerable_thread_specific<profiler>; + profiler_wrapper profilers_(profiler("root")); +} + +/* +#ifdef WITH_PROFILING +namespace data { + using profiler_wrapper = nest::mc::threading::enumerable_thread_specific<profiler>; + profiler_wrapper profilers_(profiler("root")); +} + +inline profiler& get_profiler() { + auto& p = data::profilers_.local(); + if (!p.is_activated()) { + p.start(); + } + return p; +} + +// this will throw an exception if the profler has already been started +inline void profiler_start() { + data::profilers_.local().start(); +} +inline void profiler_stop() { + get_profiler().stop(); +} +inline void profiler_enter(const char* n) { + get_profiler().enter(n); +} +inline void profiler_leave() { + get_profiler().leave(); +} + +// iterate over all profilers and ensure that they have the same start stop times +inline void stop_profilers() { + std::cout << "::profiler : stopping " << data::profilers_.size() << " profilers\n"; + for (auto& p : data::profilers_) { + p.stop(); + } +} + +#else +*/ +inline void profiler_start() {} +inline void profiler_stop() {} +inline void profiler_enter(const char* n) {} +inline void profiler_leave() {} +inline void stop_profilers() {} +//#endif + } // namespace util } // namespace mc } // namespace nest diff --git a/src/threading/serial.hpp b/src/threading/serial.hpp index ebdede8426d2fa8a0ba0b7b327595b83af0970ee..0af901fbc16be94dabbbc3c159a5aae01159556f 100644 --- a/src/threading/serial.hpp +++ b/src/threading/serial.hpp @@ -21,6 +21,16 @@ class enumerable_thread_specific { public : + enumerable_thread_specific() = default; + + enumerable_thread_specific(const T& init) : + data{init} + {} + + enumerable_thread_specific(T&& init) : + data{std::move(init)} + {} + T& local() { return data[0]; }