From eee16c7898db2232340e350819a75f050c143e28 Mon Sep 17 00:00:00 2001 From: Thorsten Hater <24411438+thorstenhater@users.noreply.github.com> Date: Thu, 12 Jan 2023 18:44:50 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=8A=20=20Make=20profiler=20output=20a?= =?UTF-8?q?=20bit=20nicer.=20(#2078)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Profiler output is now correctly aligned regardless of widths. - Python (and C++) can now actually set a limit on the nodes printed. After (with a >=4% filter): ``` REGION CALLS WALL THREAD % root - 9.401 75.205 100.0 advance - 9.401 75.205 100.0 integrate - 9.400 75.201 100.0 current - 2.001 16.007 21.3 zero 29120728 0.518 4.142 5.5 Granule_98_3D__all 24000600 0.459 3.675 4.9 Granule_98_3D__soma_group 24000600 0.456 3.649 4.9 Gran_CaPool_98 24000600 0.410 3.282 4.4 state - 1.861 14.888 19.8 Granule_98_3D__soma_group 24000600 0.778 6.223 8.3 Gran_CaPool_98 24000600 0.498 3.981 5.3 Granule_98_3D__all 24000600 0.410 3.283 4.4 events 58241456 1.017 8.138 10.8 voltage 29120728 0.954 7.629 10.1 ionupdate 29120728 0.526 4.205 5.6 threshold 29120728 0.514 4.116 5.5 samples 29120728 0.513 4.103 5.5 stimuli 29120728 0.512 4.092 5.4 post 29120728 0.509 4.072 5.4 stepsupdate 29120728 0.500 4.003 5.3 diffusion 29120728 0.494 3.950 5.3 ``` Before: ``` _p_ REGION CALLS THREAD WALL % _p_ root - 56.635 7.079 100.0 _p_ advance - 56.635 7.079 100.0 _p_ integrate - 56.633 7.079 100.0 _p_ current - 11.836 1.480 20.9 _p_ zero 29120728 3.133 0.392 5.5 _p_ Granule_98_3D__soma_group 24000600 2.816 0.352 5 _p_ Granule_98_3D__all 24000600 2.650 0.331 4.7 _p_ Gran_CaPool_98 24000600 2.291 0.286 4.0 _p_ MossyFiber_all 3840096 0.501 0.063 0.9 _p_ Golgi_98_3D_all 1280032 0.123 0.015 0.2 _p_ Golgi_98_3D_soma_group 1280032 0.120 0.015 0.2 _p_ Golgi_CaPool_CML 1280032 0.103 0.013 0.2 _p_ nernst 1280032 0.100 0.013 0.2 _p_ state - 11.343 1.418 20.0 _p_ Granule_98_3D__soma_group 24000600 4.918 0.615 8 _p_ Gran_CaPool_98 24000600 3.086 0.386 5.4 _p_ Granule_98_3D__all 24000600 2.288 0.286 4.0 _p_ MossyFiber_all 3840096 0.479 0.060 0.8 _p_ Golgi_98_3D_all 1280032 0.252 0.031 0.4 _p_ Golgi_98_3D_soma_group 1280032 0.180 0.023 0.3 _p_ Golgi_CaPool_CML 1280032 0.139 0.017 0.2 _p_ voltage 29120728 7.124 0.891 12.6 _p_ events 58241456 6.003 0.750 10.6 _p_ threshold 29120728 3.015 0.377 5.3 _p_ samples 29120728 2.976 0.372 5.3 _p_ ionupdate 29120728 2.961 0.370 5.2 _p_ stimuli 29120728 2.890 0.361 5.1 _p_ stepsupdate 29120728 2.868 0.358 5.1 _p_ post 29120728 2.820 0.353 5.0 _p_ diffusion 29120728 2.794 0.349 4.9 _p_ setup 728 0.001 0.000 0.0 _p_ samplesetup 728 0.001 0.000 0.0 _p_ spikes 728 0.000 0.000 0.0 _p_ eventsetup 728 0.000 0.000 0.0 _p_ sampledeliver 728 0.000 0.000 0.0 _p_ communication - 0.000 0.000 0.0 _p_ enqueue - 0.000 0.000 0.0 _p_ sort 728 0.000 0.000 0.0 _p_ merge 728 0.000 0.000 0.0 _p_ setup 728 0.000 0.000 0.0 _p_ walkspikes 1 0.000 0.000 0.0 _p_ spikeio 1 0.000 0.000 0.0 _p_ exchange - 0.000 0.000 0.0 _p_ gather 1 0.000 0.000 0.0 _p_ sort 1 0.000 0.000 0.0 _p_ gatherlocal 1 0.000 0.000 0.0 ``` --- arbor/include/arbor/profile/profiler.hpp | 2 + arbor/profile/profiler.cpp | 92 +++++++++++++++++------- python/profiler.cpp | 13 ++-- 3 files changed, 77 insertions(+), 30 deletions(-) diff --git a/arbor/include/arbor/profile/profiler.hpp b/arbor/include/arbor/profile/profiler.hpp index fd8dd980..810600eb 100644 --- a/arbor/include/arbor/profile/profiler.hpp +++ b/arbor/include/arbor/profile/profiler.hpp @@ -41,6 +41,8 @@ ARB_ARBOR_API void profiler_enter(std::size_t region_id); ARB_ARBOR_API void profiler_leave(); ARB_ARBOR_API profile profiler_summary(); + +ARB_ARBOR_API std::ostream& print_profiler_summary(std::ostream&, double limit=0.0); ARB_ARBOR_API std::size_t profiler_region_id(const std::string& name); ARB_ARBOR_API std::ostream& operator<<(std::ostream&, const profile&); diff --git a/arbor/profile/profiler.cpp b/arbor/profile/profiler.cpp index 0a6af0a2..fccb33ba 100644 --- a/arbor/profile/profiler.cpp +++ b/arbor/profile/profiler.cpp @@ -306,35 +306,73 @@ const std::vector<std::string>& profiler::regions() const { return region_names_; } -void print(std::ostream& o, - profile_node& n, - float wall_time, - unsigned nthreads, - float thresh, - std::string indent="") -{ +struct prof_line { + std::string name; + std::string count; + std::string time; + std::string thread; + std::string percent; +}; + +void print_lines(std::vector<prof_line>& lines, + profile_node& n, + float wall_time, + unsigned nthreads, + float thresh, + std::string indent) { static char buf[80]; - auto name = indent + n.name; float per_thread_time = n.time/nthreads; float proportion = n.time/wall_time*100; // If the percentage of overall time for this region is below the // threashold, stop drawing this branch. - if (proportion<thresh) return; + if (proportion < thresh) return; + + prof_line res; + res.name = indent + n.name; + res.count = (n.count==profile_node::npos) ? "-" : std::to_string(n.count); + snprintf(buf, std::size(buf), "%.3f", float(n.time)); + res.time = buf; + snprintf(buf, std::size(buf), "%.3f", float(per_thread_time)); + res.thread = buf; + snprintf(buf, std::size(buf), "%.1f", float(proportion)); + res.percent = buf; + lines.push_back(res); + // print each of the children in turn + for (auto& c: n.children) print_lines(lines, c, wall_time, nthreads, thresh, indent + " "); +}; - if (n.count==profile_node::npos) { - snprintf(buf, std::size(buf), "_p_ %-20s%12s%12.3f%12.3f%8.1f", - name.c_str(), "-", float(n.time), per_thread_time, proportion); - } - else { - snprintf(buf, std::size(buf), "_p_ %-20s%12lu%12.3f%12.3f%8.1f", - name.c_str(), n.count, float(n.time), per_thread_time, proportion); +void print(std::ostream& os, + profile_node& n, + float wall_time, + unsigned nthreads, + float thresh) { + std::vector<prof_line> lines{{"REGION", "CALLS", "THREAD", "WALL", "\%"}}; + print_lines(lines, n, wall_time, nthreads, thresh, ""); + // fixing up lengths here + std::size_t max_len_name = 0; + std::size_t max_len_count = 0; + std::size_t max_len_thread = 0; + std::size_t max_len_time = 0; + std::size_t max_len_percent = 0; + for (const auto& line: lines) { + max_len_name = std::max(max_len_name, line.name.size()); + max_len_count = std::max(max_len_count, line.count.size()); + max_len_thread = std::max(max_len_thread, line.thread.size()); + max_len_time = std::max(max_len_time, line.time.size()); + max_len_percent = std::max(max_len_percent, line.percent.size()); } - o << "\n" << buf; - // print each of the children in turn - for (auto& c: n.children) print(o, c, wall_time, nthreads, thresh, indent+" "); + auto lpad = [](const std::string& s, std::size_t n) { return std::string(n - s.size(), ' ') + s + " "; }; + auto rpad = [](const std::string& s, std::size_t n) { return s + std::string(n - s.size(), ' ') + " "; }; + + for (const auto& line: lines) os << rpad(line.name, max_len_name) + << lpad(line.count, max_len_count) + << lpad(line.thread, max_len_thread) + << lpad(line.time, max_len_time) + << lpad(line.percent, max_len_percent) + << '\n'; }; // @@ -362,13 +400,8 @@ ARB_ARBOR_API void profiler_initialize(context ctx) { // Print profiler statistics to an ostream ARB_ARBOR_API std::ostream& operator<<(std::ostream& o, const profile& prof) { - char buf[80]; - auto tree = make_profile_tree(prof); - - snprintf(buf, std::size(buf), "_p_ %-20s%12s%12s%12s%8s", "REGION", "CALLS", "THREAD", "WALL", "\%"); - o << buf; - print(o, tree, tree.time, prof.num_threads, 0, ""); + print(o, tree, tree.time, prof.num_threads, 0); return o; } @@ -376,6 +409,13 @@ ARB_ARBOR_API profile profiler_summary() { return profiler::get_global_profiler().results(); } +ARB_ARBOR_API std::ostream& print_profiler_summary(std::ostream& os, double limit) { + auto prof = profiler_summary(); + auto tree = make_profile_tree(prof); + print(os, tree, tree.time, prof.num_threads, limit); + return os; +} + #else ARB_ARBOR_API void profiler_leave() {} @@ -384,6 +424,8 @@ ARB_ARBOR_API profile profiler_summary(); ARB_ARBOR_API profile profiler_summary() {return profile();} ARB_ARBOR_API region_id_type profiler_region_id(const std::string&) {return 0;} ARB_ARBOR_API std::ostream& operator<<(std::ostream& o, const profile&) {return o;} +ARB_ARBOR_API std::ostream& profiler_print_summary(std::ostream& os, double limit) { return os; } + #endif // ARB_HAVE_PROFILING diff --git a/python/profiler.cpp b/python/profiler.cpp index 31fe6b06..b3fe1011 100644 --- a/python/profiler.cpp +++ b/python/profiler.cpp @@ -59,11 +59,14 @@ void register_profiler(pybind11::module& m) { m.def("profiler_initialize", [](context_shim& ctx) { arb::profile::profiler_initialize(ctx.context); }); - m.def("profiler_summary", [](){ - std::stringstream stream; - stream << arb::profile::profiler_summary(); - return stream.str(); - }); + m.def("profiler_summary", + [](double limit){ + std::stringstream stream; + arb::profile::print_profiler_summary(stream, limit); + return stream.str(); + }, + pybind11::arg_v("limit", 0.0, "Print contributions above <limit> percent. Defaults to showing all.") + ); #endif } -- GitLab