diff --git a/CMakeLists.txt b/CMakeLists.txt index d4cd87e44c0b4306f4bed59f10685b0bf723a0a6..f51b8f5324c6b1fef8caac5c83e3212d85801540 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,20 +127,23 @@ endif() #---------------------------------------------------------- # MPI support #---------------------------------------------------------- -option(NMC_WITH_MPI "use MPI for distrubuted parallelism" OFF) - +option(NMC_WITH_MPI "use MPI for distributed parallelism" OFF) if(NMC_WITH_MPI) - find_package(MPI REQUIRED) + # BGQ specific flags + if(${NMC_SYSTEM_TYPE} MATCHES "BGQ" ) + # On BGQ, set CXX to the mpi wrapper, and pass it a static + add_definitions(-DMPICH2_CONST=const) + set(MPI_FOUND TRUE) + endif() + + if (NOT MPI_FOUND) + find_package(MPI REQUIRED) + endif() include_directories(SYSTEM ${MPI_C_INCLUDE_PATH}) add_definitions(-DNMC_HAVE_MPI) # unfortunate workaround for C++ detection in system mpi.h add_definitions(-DMPICH_SKIP_MPICXX=1 -DOMPI_SKIP_MPICXX=1) set_property(DIRECTORY APPEND_STRING PROPERTY COMPILE_OPTIONS "${MPI_C_COMPILE_FLAGS}") - - # BGQ specific flags - if(${NMC_SYSTEM_TYPE} MATCHES "BGQ" ) - add_definitions(-DMPICH2_CONST=const) - endif() endif() #---------------------------------------------------------- diff --git a/miniapp/io.cpp b/miniapp/io.cpp index d2aaef48f36afeb943ef2fe6bb91b8e4ecb97242..ad07161c7da0e32a58c1cf7fc6a1c9195b707ef1 100644 --- a/miniapp/io.cpp +++ b/miniapp/io.cpp @@ -134,7 +134,10 @@ cl_options read_options(int argc, char** argv, bool allow_write) { true, // Overwrite outputfile if exists "./", // output path "spikes", // file name - "gdf" // file extension + "gdf", // file extension + + // Turn on/off profiling output for all ranks + false }; cl_options options; @@ -191,6 +194,9 @@ cl_options read_options(int argc, char** argv, bool allow_write) { TCLAP::SwitchArg spike_output_arg( "f","spike_file_output","save spikes to file", cmd, false); + TCLAP::SwitchArg profile_only_zero_arg( + "z", "profile-only-zero", "Only output profile information for rank 0", cmd, false); + cmd.reorder_arguments(); cmd.parse(argc, argv); @@ -230,6 +236,8 @@ cl_options read_options(int argc, char** argv, bool allow_write) { update_option(options.file_extension, fopts, "file_extension"); } + update_option(options.profile_only_zero, fopts, "profile_only_zero"); + } catch (std::exception& e) { throw model_description_error( @@ -255,6 +263,7 @@ cl_options read_options(int argc, char** argv, bool allow_write) { update_option(options.trace_prefix, trace_prefix_arg); update_option(options.trace_max_gid, trace_max_gid_arg); update_option(options.spike_file_output, spike_output_arg); + update_option(options.profile_only_zero, profile_only_zero_arg); if (options.all_to_all && options.ring) { throw usage_error("can specify at most one of --ring and --all-to-all"); diff --git a/miniapp/io.hpp b/miniapp/io.hpp index ac769d436b6a36550afe2e64b33e0b35a69f3f80..3100de17441d1fcc01dc5eb87e42918892d8a9ff 100644 --- a/miniapp/io.hpp +++ b/miniapp/io.hpp @@ -35,6 +35,9 @@ struct cl_options { std::string output_path; std::string file_name; std::string file_extension; + + // Turn on/off profiling output for all ranks + bool profile_only_zero; }; class usage_error: public std::runtime_error { diff --git a/miniapp/miniapp.cpp b/miniapp/miniapp.cpp index 6094631b26ec449de143d8070c63171977c107bc..c52543004afc20f2e0143ad053623f4352d1e616 100644 --- a/miniapp/miniapp.cpp +++ b/miniapp/miniapp.cpp @@ -141,7 +141,7 @@ int main(int argc, char** argv) { // output profile and diagnostic feedback auto const num_steps = options.tfinal / options.dt; - util::profiler_output(0.001, m.num_cells()*num_steps); + util::profiler_output(0.001, m.num_cells()*num_steps, options.profile_only_zero); std::cout << "there were " << m.num_spikes() << " spikes\n"; // save traces diff --git a/scripts/profstats b/scripts/profstats deleted file mode 100755 index 88f68c72e6253fa4c0d240d051d7ce1be1960604..0000000000000000000000000000000000000000 --- a/scripts/profstats +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python2 -#coding: utf-8 - -import json -import argparse -import re -import numpy as np -from itertools import chain - -def parse_clargs(): - P = argparse.ArgumentParser(description='Aggregate and analyse MPI profile output.') - P.add_argument('inputs', metavar='FILE', nargs='+', - help='MPI profile output in JSON format') - P.add_argument('-r', '--raw', action='store_true', - help='emit raw times in csv table') - - return P.parse_args() - -def parse_profile_json(source): - j = json.load(source) - rank = j['rank'] - if rank is None: - raise ValueError('missing rank information in profile') - - tx = dict() - - def collect_times(j, prefix): - t = j['time'] - n = j['name'] - - if t is None or n is None: - return - - prefix = prefix + n - tx[prefix] = t - - try: - children = j['regions'] - # special case for top level - if prefix == 'total': - prefix = '' - else: - prefix = prefix + '/' - - for j in children: - collect_times(j, prefix) - except KeyError: - pass - - collect_times(j['regions'], '') - return rank, tx - -def csv_escape(x): - s = re.sub('"','""',str(x)) - if re.search('["\t\n,]',s): - s = '"'+s+'"' - return s - -def emit_csv(cols, rows): - print(",".join([csv_escape(c) for c in cols])) - for r in rows: - print(",".join([csv_escape(r[c]) if c in r else '' for c in cols])) - -args = parse_clargs() - -rank_times = dict() -for filename in args.inputs: - with open(filename) as f: - rank, times = parse_profile_json(f) - rank_times[rank] = times - -if args.raw: - rows = [rank_times[rank] for rank in sorted(rank_times.keys())] - cols = sorted({col for tbl in rows for col in tbl.keys()}) - emit_csv(cols, rows) -else: - rank_entry = [rank_times[rank] for rank in sorted(rank_times.keys())] - bins = sorted({col for tbl in rank_entry for col in tbl.keys()}) - - rows = [] - for b in bins: - qs = np.percentile([entry[b] for entry in rank_times.values() if b in entry], - [0., 0.25, 0.5, 0.75, 1.]) - rows.append({ - 'region': b, - 'min': qs[0], - 'q25': qs[1], - 'median': qs[2], - 'q75': qs[3], - 'max': qs[4] - }) - - emit_csv(['region','min','q25','median','q75','max'], rows) diff --git a/scripts/profstats b/scripts/profstats new file mode 120000 index 0000000000000000000000000000000000000000..8170d8312648ad82df61e58c3d3de18f02e0f3fb --- /dev/null +++ b/scripts/profstats @@ -0,0 +1 @@ +profstats.py \ No newline at end of file diff --git a/scripts/profstats.py b/scripts/profstats.py new file mode 100755 index 0000000000000000000000000000000000000000..86611e33a3b00e873b2698946e98ce4e19461789 --- /dev/null +++ b/scripts/profstats.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +#coding: utf-8 + +import json +import argparse +import re +import numpy as np +from itertools import chain + +def parse_clargs(): + P = argparse.ArgumentParser(description='Aggregate and analyse MPI profile output.') + P.add_argument('inputs', metavar='FILE', nargs='+', + help='MPI profile output in JSON format') + P.add_argument('-r', '--raw', action='store_true', + help='emit raw times in csv table') + + return P.parse_args() + +def parse_profile_json(source): + j = json.load(source) + rank = j['rank'] + if rank is None: + raise ValueError('missing rank information in profile') + + tx = dict() + + def collect_times(j, prefix): + t = j['time'] + n = j['name'] + + if t is None or n is None: + return + + prefix = prefix + n + tx[prefix] = t + + try: + children = j['regions'] + # special case for top level + if prefix == 'total': + prefix = '' + else: + prefix = prefix + '/' + + for j in children: + collect_times(j, prefix) + except KeyError: + pass + + collect_times(j['regions'], '') + return rank, tx + +def csv_escape(x): + s = re.sub('"','""',str(x)) + if re.search('["\t\n,]',s): + s = '"'+s+'"' + return s + +def emit_csv(cols, rows, stdout): + stdout.write(",".join([csv_escape(c) for c in cols])) + stdout.write("\n") + for r in rows: + stdout.write(",".join([csv_escape(r[c]) if c in r else '' for c in cols])) + stdout.write("\n") + +def main(raw, inputs, stdout): + rank_times = dict() + for filename in inputs: + with open(filename) as f: + rank, times = parse_profile_json(f) + rank_times[rank] = times + + if raw: + rows = [rank_times[rank] for rank in sorted(rank_times.keys())] + cols = sorted({col for tbl in rows for col in tbl.keys()}) + emit_csv(cols, rows, stdout) + else: + rank_entry = [rank_times[rank] for rank in sorted(rank_times.keys())] + bins = sorted({col for tbl in rank_entry for col in tbl.keys()}) + + rows = [] + for b in bins: + qs = np.percentile([entry[b] for entry in rank_times.values() if b in entry], + [0., 0.25, 0.5, 0.75, 1.]) + rows.append({ + 'region': b, + 'min': qs[0], + 'q25': qs[1], + 'median': qs[2], + 'q75': qs[3], + 'max': qs[4] + }) + + emit_csv(['region','min','q25','median','q75','max'], rows, stdout) + +if __name__ == "__main__": + import sys + args = parse_clargs() + main(args.raw, args.inputs, sys.stdout) diff --git a/src/profiling/profiler.cpp b/src/profiling/profiler.cpp index 896e7bcf6c11383bf21e2c2416149f1f1970527f..a6f08e96f579c142f579d550b450fe56e55d02b7 100644 --- a/src/profiling/profiler.cpp +++ b/src/profiling/profiler.cpp @@ -349,7 +349,7 @@ void profilers_restart() { } } -void profiler_output(double threshold, std::size_t num_local_work_items) { +void profiler_output(double threshold, std::size_t num_local_work_items, bool profile_only_zero) { profilers_stop(); // Find the earliest start time and latest stop time over all profilers @@ -385,6 +385,7 @@ void profiler_output(double threshold, std::size_t num_local_work_items) { auto ncomms = communication::global_policy::size(); auto comm_rank = communication::global_policy::id(); bool print = comm_rank==0 ? true : false; + bool output_this_rank = (comm_rank == 0) || ! profile_only_zero; // calculate the throughput in terms of work items per second auto local_throughput = num_local_work_items / wall_time; @@ -433,9 +434,11 @@ void profiler_output(double threshold, std::size_t num_local_work_items) { as_json["rank"] = comm_rank; as_json["regions"] = p.as_json(); - auto fname = std::string("profile_" + std::to_string(comm_rank)); - std::ofstream fid(fname); - fid << std::setw(1) << as_json; + if (output_this_rank) { + auto fname = std::string("profile_" + std::to_string(comm_rank)); + std::ofstream fid(fname); + fid << std::setw(1) << as_json; + } } #else @@ -445,7 +448,7 @@ void profiler_enter(const char*) {} void profiler_leave() {} void profiler_leave(int) {} void profilers_stop() {} -void profiler_output(double threshold, std::size_t num_local_work_items) {} +void profiler_output(double threshold, std::size_t num_local_work_items, bool profile_only_zero) {} void profilers_restart() {}; #endif diff --git a/src/profiling/profiler.hpp b/src/profiling/profiler.hpp index b000de671c96b80d5268d797af090b61a55b0f9d..0747fbdcf556b77503628c2d6caa3d238c040c17 100644 --- a/src/profiling/profiler.hpp +++ b/src/profiling/profiler.hpp @@ -245,7 +245,7 @@ void profilers_stop(); void profilers_restart(); /// print the collated profiler to std::cout -void profiler_output(double threshold, std::size_t num_local_work_items); +void profiler_output(double threshold, std::size_t num_local_work_items, bool profile_only_zero); } // namespace util } // namespace mc