Skip to content
Snippets Groups Projects
Unverified Commit 546e2ad4 authored by Benjamin Cumming's avatar Benjamin Cumming Committed by GitHub
Browse files

Add dry run benchmark cell model for testing communication scaling (#1627)

Add a modified version of the benchmark cell example that uses dry run scaling.
parent 44b67f38
No related branches found
No related tags found
No related merge requests found
......@@ -2,6 +2,7 @@
# Example executable targets should be added to the 'examples' target as dependencies.
add_custom_target(examples DEPENDS)
add_subdirectory(drybench)
add_subdirectory(dryrun)
add_subdirectory(generators)
add_subdirectory(brunel)
......
add_executable(drybench EXCLUDE_FROM_ALL drybench.cpp)
add_dependencies(examples drybench)
target_link_libraries(drybench PRIVATE arbor arborenv arbor-sup ext-json)
/*
* A miniapp that demonstrates how to use dry_run mode
*
*/
#include <fstream>
#include <iomanip>
#include <iostream>
#include <nlohmann/json.hpp>
#include <arbor/assert_macro.hpp>
#include <arbor/cable_cell.hpp>
#include <arbor/common_types.hpp>
#include <arbor/context.hpp>
#include <arbor/benchmark_cell.hpp>
#include <arbor/load_balance.hpp>
#include <arbor/morph/primitives.hpp>
#include <arbor/profile/meter_manager.hpp>
#include <arbor/profile/profiler.hpp>
#include <arbor/simple_sampler.hpp>
#include <arbor/simulation.hpp>
#include <arbor/symmetric_recipe.hpp>
#include <arbor/recipe.hpp>
#include <arbor/version.hpp>
#include <arborenv/default_env.hpp>
#include <sup/ioutil.hpp>
#include <sup/json_meter.hpp>
#include <sup/json_params.hpp>
struct bench_params {
struct cell_params {
double spike_freq_hz = 20; // Frequency in hz that cell will generate (poisson) spikes.
double realtime_ratio = 0.1; // Integration speed relative to real time, e.g. 10 implies
// that a cell is integrated 10 times slower than real time.
};
struct network_params {
unsigned fan_in = 5000; // Number of incoming connections on each cell.
double min_delay = 10; // Used as the delay on all connections.
};
int num_ranks = 1; // Number of simulated MPI ranks.
int num_threads = 1; // Number of threads per rank.
std::string name = "default"; // Name of the model.
unsigned num_cells = 100; // Number of cells _per rank_.
arb::time_type duration = 100; // Simulation duration in ms.
cell_params cell; // Cell parameters for all cells in model.
network_params network; // Description of the network.
// Expected simulation performance properties based on model parameters.
// Time to finish simulation if only cell overheads are counted.
double expected_advance_time() const {
return cell.realtime_ratio * duration*1e-3 * num_cells;
}
// Total expected number of spikes generated by simulation.
unsigned expected_spikes() const {
return num_cells * duration*1e-3 * cell.spike_freq_hz * num_ranks;
}
// Expected number of spikes generated per min_delay/2 interval.
unsigned expected_spikes_per_interval() const {
return num_cells * network.min_delay*1e-3/2 * cell.spike_freq_hz;
}
// Expected number of post-synaptic events delivered over simulation.
unsigned expected_events() const {
return expected_spikes() * network.fan_in * num_ranks;
}
// Expected number of post-synaptic events delivered per min_delay/2 interval.
unsigned expected_events_per_interval() const {
return expected_spikes_per_interval() * network.fan_in * num_ranks;
}
};
bench_params read_options(int argc, char** argv);
std::ostream& operator<<(std::ostream& o, const bench_params& p);
using arb::cell_gid_type;
using arb::cell_lid_type;
using arb::cell_size_type;
using arb::cell_member_type;
using arb::cell_kind;
using arb::time_type;
class tile_desc: public arb::tile {
public:
tile_desc(bench_params params):
params_(params),
num_cells_(params.num_cells),
num_tiles_(params.num_ranks)
{}
cell_size_type num_cells() const override {
return num_cells_;
}
cell_size_type num_tiles() const override {
return num_tiles_;
}
arb::util::unique_any get_cell_description(cell_gid_type gid) const override {
using RNG = std::mt19937_64;
auto gen = arb::poisson_schedule(params_.cell.spike_freq_hz/1000, RNG(gid));
return arb::benchmark_cell("src", "tgt", std::move(gen), params_.cell.realtime_ratio);
}
cell_kind get_cell_kind(cell_gid_type gid) const override {
return cell_kind::benchmark;
}
// Each cell has num_synapses incoming connections, from any cell in the
// network spanning all ranks, src gid in {0, ..., num_cells_*num_tiles_ - 1}.
std::vector<arb::cell_connection> connections_on(cell_gid_type gid) const override {
std::uniform_int_distribution<cell_gid_type>
source_distribution(0, num_cells_*num_tiles_ - 2);
std::vector<arb::cell_connection> conns;
auto src_gen = std::mt19937(gid);
for (unsigned i=0; i<params_.network.fan_in; ++i) {
auto src = source_distribution(src_gen);
if (src>=gid) ++src;
conns.push_back(arb::cell_connection({src, "src"}, {"tgt"}, 1.f, params_.network.min_delay));
}
return conns;
}
private:
bench_params params_;
cell_size_type num_cells_;
cell_size_type num_tiles_;
};
int main(int argc, char** argv) {
try {
auto params = read_options(argc, argv);
std::cout << params << "\n";
auto resources = arb::proc_allocation();
resources.num_threads = params.num_threads;
auto ctx = arb::make_context(resources);
ctx = arb::make_context(resources, arb::dry_run_info(params.num_ranks, params.num_cells));
arb_assert(arb::num_ranks(ctx)==params.num_ranks);
#ifdef ARB_PROFILE_ENABLED
arb::profile::profiler_initialize(ctx);
#endif
arb::profile::meter_manager meters;
meters.start(ctx);
// Create an instance of our tile and use it to make a symmetric_recipe.
auto tile = std::make_unique<tile_desc>(params);
arb::symmetric_recipe recipe(std::move(tile));
auto decomp = arb::partition_load_balance(recipe, ctx);
// Construct the model.
arb::simulation sim(recipe, decomp, ctx);
meters.checkpoint("model-init", ctx);
// Run the simulation for 100 ms, with time steps of 0.025 ms.
sim.run(params.duration, 0.025);
meters.checkpoint("model-run", ctx);
auto ns = sim.num_spikes();
auto total_cells = params.num_ranks*params.num_cells;
std::cout << "\n" << ns << " spikes generated at rate of "
<< ns/total_cells << " spikes per cell\n\n";
auto profile = arb::profile::profiler_summary();
std::cout << profile << "\n";
auto report = arb::profile::make_meter_report(meters, ctx);
std::cout << report;
}
catch (std::exception& e) {
std::cerr << "exception caught in benchmark: \n" << e.what() << "\n";
return 1;
}
return 0;
}
std::ostream& operator<<(std::ostream& o, const bench_params& p) {
o << "benchmark parameters:\n"
<< " name: " << p.name << "\n"
<< " cells per rank: " << p.num_cells << "\n"
<< " duration: " << p.duration << " ms\n"
<< " fan in: " << p.network.fan_in << " connections/cell\n"
<< " min delay: " << p.network.min_delay << " ms\n"
<< " spike freq: " << p.cell.spike_freq_hz << " Hz\n"
<< " cell overhead: " << p.cell.realtime_ratio << " ms to advance 1 ms\n";
o << "expected:\n"
<< " cell advance: " << p.expected_advance_time() << " s\n"
<< " spikes: " << p.expected_spikes() << "\n"
<< " events: " << p.expected_events() << "\n"
<< " spikes: " << p.expected_spikes_per_interval() << " per interval\n"
<< " events: " << p.expected_events_per_interval()/p.num_cells << " per cell per interval\n";
o << "HW resources:\n"
<< " threads: " << p.num_threads << "\n"
<< " ranks: " << p.num_ranks;
return o;
}
bench_params read_options(int argc, char** argv) {
using sup::param_from_json;
bench_params params;
// Set default number of threads to that provided by system
params.num_threads = arbenv::default_concurrency();
if (argc<2) {
std::cout << "Using default parameters.\n";
return params;
}
if (argc>2) {
throw std::runtime_error("More than one command line option is not permitted.");
}
std::string fname = argv[1];
std::cout << "Loading parameters from file: " << fname << "\n";
std::ifstream f(fname);
if (!f.good()) {
throw std::runtime_error("Unable to open input parameter file: "+fname);
}
nlohmann::json json;
f >> json;
param_from_json(params.name, "name", json);
param_from_json(params.num_cells, "num-cells", json);
param_from_json(params.duration, "duration", json);
param_from_json(params.network.min_delay, "min-delay", json);
param_from_json(params.network.fan_in, "fan-in", json);
param_from_json(params.cell.realtime_ratio, "realtime-ratio", json);
param_from_json(params.cell.spike_freq_hz, "spike-frequency", json);
param_from_json(params.num_threads, "threads", json);
param_from_json(params.num_ranks, "ranks", json);
for (auto it=json.begin(); it!=json.end(); ++it) {
std::cout << " Warning: unused input parameter: \"" << it.key() << "\"\n";
}
std::cout << "\n";
return params;
}
{
"name": "test",
"num-cells": 1000,
"duration": 100,
"min-delay": 10,
"fan-in": 10000,
"realtime-ratio": 0.1,
"spike-frequency": 20,
"threads": 4,
"ranks": 1000
}
# Dryrun Example
A miniapp that demonstrates how to use dry-run mode to simulate the effect of communication scaling
with benchmark cells, useful for evaluating the communication overheads associated with
is weak-scaled a model.
The overheads of processing spikes and generating local events do not weak scale perfectly: the cost
of traversing the global spike list increases with global model size.
Whether these overheads contribute significantly to total run time depends on the computational complexity
of the local model and the size of the global model.
## How it works
The dry run mode mimics running a distributed model on a single node or laptop by creating a local model
and generating fake spike information for the other "ranks" in the larger distributed model.
The benchmark allows the user to tune three key parameters
1. the number of ranks in the global model, which can be used to simulate weak scaling
2. the size number and computational complexity of cells in the local model
3. the complexity of the network (fan in and min-delay).
By tuning the parameters above to match those of a target distributed model, it is possible to replicate
the spike and event processing overheads without having to run resource-intensive benchmarks at scale.
**Note**: instead of performing MPI communication to gather the global spike list, the dry run mode
creates a fake global spike list using the local spike data as a template. As such, the scaling of the MPI library
is not captured, which would have to be benchmarked separately if it is a relevant.
## Configuration
The benchmark uses an `arb::tile` to build a network of `num_cells` local cells of type
`arb::benchmark_cell`. The network is translated over `ranks` domains using `arb::symmetric_recipe`.
The model of the *tile* can be configured using a json configuration file:
```
./drybench params.json
```
An example parameter file for a dry-run is:
```
{
"name": "test",
"num-cells": 100,
"duration": 100,
"min-delay": 10,
"fan-in": 10,
"realtime-ratio": 0.1,
"spike-frequency": 20,
"ranks": 10000
"threads": 4,
}
```
The parameters in the file:
* `name="default"`: a string with a name for the benchmark.
* `num-cells=100`: the number of cells on a single tile.
The total number of cells in the model = num-cells * ranks.
* `duration=100`: the length of the simulated time interval, in ms.
* `min-delay=10`: the minimum delay of the network.
* `fan-in=5000`: the number of incoming connections on each cell.
* `spike-frequency=20`: frequency (Hz) of the independent Poisson processes that
generate spikes for each cell.
* `realtime-ratio=0.1`: the ratio between time taken to advance a single cell in
the simulation and the simulated time. For example, a value of 1 indicates
that the cell is simulated in real time, while a value of 0.1 indicates
that 10s can be simulated in a single second.
* `ranks=1`: the number of domains to simulate.
* `threads=available-threads-on-system`: the number of threads per rank: default is automatically detected.
The network is randomly connected with no self-connections and `fan-in`
incoming connections on each cell, with every connection having delay of `min-delay`,
and spikes on each cell are generated according to unique Poisson sequence at `spike-frequency` Hz.
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment