Skip to content
Snippets Groups Projects
Unverified Commit eee16c78 authored by Thorsten Hater's avatar Thorsten Hater Committed by GitHub
Browse files

:bar_chart: Make profiler output a bit nicer. (#2078)

- Profiler output is now correctly aligned regardless of widths.
- Python (and C++) can now actually set a limit on the nodes printed.

After (with a >=4% filter):
```
REGION                                  CALLS     WALL    THREAD        %
root                                        -    9.401    75.205    100.0
  advance                                   -    9.401    75.205    100.0
    integrate                               -    9.400    75.201    100.0
      current                               -    2.001    16.007     21.3
        zero                         29120728    0.518     4.142      5.5
        Granule_98_3D__all           24000600    0.459     3.675      4.9
        Granule_98_3D__soma_group    24000600    0.456     3.649      4.9
        Gran_CaPool_98               24000600    0.410     3.282      4.4
      state                                 -    1.861    14.888     19.8
        Granule_98_3D__soma_group    24000600    0.778     6.223      8.3
        Gran_CaPool_98               24000600    0.498     3.981      5.3
        Granule_98_3D__all           24000600    0.410     3.283      4.4
      events                         58241456    1.017     8.138     10.8
      voltage                        29120728    0.954     7.629     10.1
      ionupdate                      29120728    0.526     4.205      5.6
      threshold                      29120728    0.514     4.116      5.5
      samples                        29120728    0.513     4.103      5.5
      stimuli                        29120728    0.512     4.092      5.4
      post                           29120728    0.509     4.072      5.4
      stepsupdate                    29120728    0.500     4.003      5.3
      diffusion                      29120728    0.494     3.950      5.3
```

Before:
```
_p_ REGION                     CALLS      THREAD        WALL       %
_p_ root                           -      56.635       7.079   100.0
_p_   advance                      -      56.635       7.079   100.0
_p_     integrate                  -      56.633       7.079   100.0
_p_       current                  -      11.836       1.480    20.9
_p_         zero            29120728       3.133       0.392     5.5
_p_         Granule_98_3D__soma_group    24000600       2.816       0.352     5
_p_         Granule_98_3D__all    24000600       2.650       0.331     4.7
_p_         Gran_CaPool_98    24000600       2.291       0.286     4.0
_p_         MossyFiber_all     3840096       0.501       0.063     0.9
_p_         Golgi_98_3D_all     1280032       0.123       0.015     0.2
_p_         Golgi_98_3D_soma_group     1280032       0.120       0.015     0.2
_p_         Golgi_CaPool_CML     1280032       0.103       0.013     0.2
_p_         nernst           1280032       0.100       0.013     0.2
_p_       state                    -      11.343       1.418    20.0
_p_         Granule_98_3D__soma_group    24000600       4.918       0.615     8
_p_         Gran_CaPool_98    24000600       3.086       0.386     5.4
_p_         Granule_98_3D__all    24000600       2.288       0.286     4.0
_p_         MossyFiber_all     3840096       0.479       0.060     0.8
_p_         Golgi_98_3D_all     1280032       0.252       0.031     0.4
_p_         Golgi_98_3D_soma_group     1280032       0.180       0.023     0.3
_p_         Golgi_CaPool_CML     1280032       0.139       0.017     0.2
_p_       voltage           29120728       7.124       0.891    12.6
_p_       events            58241456       6.003       0.750    10.6
_p_       threshold         29120728       3.015       0.377     5.3
_p_       samples           29120728       2.976       0.372     5.3
_p_       ionupdate         29120728       2.961       0.370     5.2
_p_       stimuli           29120728       2.890       0.361     5.1
_p_       stepsupdate       29120728       2.868       0.358     5.1
_p_       post              29120728       2.820       0.353     5.0
_p_       diffusion         29120728       2.794       0.349     4.9
_p_       setup                  728       0.001       0.000     0.0
_p_     samplesetup              728       0.001       0.000     0.0
_p_     spikes                   728       0.000       0.000     0.0
_p_     eventsetup               728       0.000       0.000     0.0
_p_     sampledeliver            728       0.000       0.000     0.0
_p_   communication                -       0.000       0.000     0.0
_p_     enqueue                    -       0.000       0.000     0.0
_p_       sort                   728       0.000       0.000     0.0
_p_       merge                  728       0.000       0.000     0.0
_p_       setup                  728       0.000       0.000     0.0
_p_     walkspikes                 1       0.000       0.000     0.0
_p_     spikeio                    1       0.000       0.000     0.0
_p_     exchange                   -       0.000       0.000     0.0
_p_       gather                   1       0.000       0.000     0.0
_p_       sort                     1       0.000       0.000     0.0
_p_       gatherlocal              1       0.000       0.000     0.0
```
parent 2dabd87a
No related branches found
No related tags found
No related merge requests found
......@@ -41,6 +41,8 @@ ARB_ARBOR_API void profiler_enter(std::size_t region_id);
ARB_ARBOR_API void profiler_leave();
ARB_ARBOR_API profile profiler_summary();
ARB_ARBOR_API std::ostream& print_profiler_summary(std::ostream&, double limit=0.0);
ARB_ARBOR_API std::size_t profiler_region_id(const std::string& name);
ARB_ARBOR_API std::ostream& operator<<(std::ostream&, const profile&);
......
......@@ -306,35 +306,73 @@ const std::vector<std::string>& profiler::regions() const {
return region_names_;
}
void print(std::ostream& o,
profile_node& n,
float wall_time,
unsigned nthreads,
float thresh,
std::string indent="")
{
struct prof_line {
std::string name;
std::string count;
std::string time;
std::string thread;
std::string percent;
};
void print_lines(std::vector<prof_line>& lines,
profile_node& n,
float wall_time,
unsigned nthreads,
float thresh,
std::string indent) {
static char buf[80];
auto name = indent + n.name;
float per_thread_time = n.time/nthreads;
float proportion = n.time/wall_time*100;
// If the percentage of overall time for this region is below the
// threashold, stop drawing this branch.
if (proportion<thresh) return;
if (proportion < thresh) return;
prof_line res;
res.name = indent + n.name;
res.count = (n.count==profile_node::npos) ? "-" : std::to_string(n.count);
snprintf(buf, std::size(buf), "%.3f", float(n.time));
res.time = buf;
snprintf(buf, std::size(buf), "%.3f", float(per_thread_time));
res.thread = buf;
snprintf(buf, std::size(buf), "%.1f", float(proportion));
res.percent = buf;
lines.push_back(res);
// print each of the children in turn
for (auto& c: n.children) print_lines(lines, c, wall_time, nthreads, thresh, indent + " ");
};
if (n.count==profile_node::npos) {
snprintf(buf, std::size(buf), "_p_ %-20s%12s%12.3f%12.3f%8.1f",
name.c_str(), "-", float(n.time), per_thread_time, proportion);
}
else {
snprintf(buf, std::size(buf), "_p_ %-20s%12lu%12.3f%12.3f%8.1f",
name.c_str(), n.count, float(n.time), per_thread_time, proportion);
void print(std::ostream& os,
profile_node& n,
float wall_time,
unsigned nthreads,
float thresh) {
std::vector<prof_line> lines{{"REGION", "CALLS", "THREAD", "WALL", "\%"}};
print_lines(lines, n, wall_time, nthreads, thresh, "");
// fixing up lengths here
std::size_t max_len_name = 0;
std::size_t max_len_count = 0;
std::size_t max_len_thread = 0;
std::size_t max_len_time = 0;
std::size_t max_len_percent = 0;
for (const auto& line: lines) {
max_len_name = std::max(max_len_name, line.name.size());
max_len_count = std::max(max_len_count, line.count.size());
max_len_thread = std::max(max_len_thread, line.thread.size());
max_len_time = std::max(max_len_time, line.time.size());
max_len_percent = std::max(max_len_percent, line.percent.size());
}
o << "\n" << buf;
// print each of the children in turn
for (auto& c: n.children) print(o, c, wall_time, nthreads, thresh, indent+" ");
auto lpad = [](const std::string& s, std::size_t n) { return std::string(n - s.size(), ' ') + s + " "; };
auto rpad = [](const std::string& s, std::size_t n) { return s + std::string(n - s.size(), ' ') + " "; };
for (const auto& line: lines) os << rpad(line.name, max_len_name)
<< lpad(line.count, max_len_count)
<< lpad(line.thread, max_len_thread)
<< lpad(line.time, max_len_time)
<< lpad(line.percent, max_len_percent)
<< '\n';
};
//
......@@ -362,13 +400,8 @@ ARB_ARBOR_API void profiler_initialize(context ctx) {
// Print profiler statistics to an ostream
ARB_ARBOR_API std::ostream& operator<<(std::ostream& o, const profile& prof) {
char buf[80];
auto tree = make_profile_tree(prof);
snprintf(buf, std::size(buf), "_p_ %-20s%12s%12s%12s%8s", "REGION", "CALLS", "THREAD", "WALL", "\%");
o << buf;
print(o, tree, tree.time, prof.num_threads, 0, "");
print(o, tree, tree.time, prof.num_threads, 0);
return o;
}
......@@ -376,6 +409,13 @@ ARB_ARBOR_API profile profiler_summary() {
return profiler::get_global_profiler().results();
}
ARB_ARBOR_API std::ostream& print_profiler_summary(std::ostream& os, double limit) {
auto prof = profiler_summary();
auto tree = make_profile_tree(prof);
print(os, tree, tree.time, prof.num_threads, limit);
return os;
}
#else
ARB_ARBOR_API void profiler_leave() {}
......@@ -384,6 +424,8 @@ ARB_ARBOR_API profile profiler_summary();
ARB_ARBOR_API profile profiler_summary() {return profile();}
ARB_ARBOR_API region_id_type profiler_region_id(const std::string&) {return 0;}
ARB_ARBOR_API std::ostream& operator<<(std::ostream& o, const profile&) {return o;}
ARB_ARBOR_API std::ostream& profiler_print_summary(std::ostream& os, double limit) { return os; }
#endif // ARB_HAVE_PROFILING
......
......@@ -59,11 +59,14 @@ void register_profiler(pybind11::module& m) {
m.def("profiler_initialize", [](context_shim& ctx) {
arb::profile::profiler_initialize(ctx.context);
});
m.def("profiler_summary", [](){
std::stringstream stream;
stream << arb::profile::profiler_summary();
return stream.str();
});
m.def("profiler_summary",
[](double limit){
std::stringstream stream;
arb::profile::print_profiler_summary(stream, limit);
return stream.str();
},
pybind11::arg_v("limit", 0.0, "Print contributions above <limit> percent. Defaults to showing all.")
);
#endif
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment