Dry-run mode (#582)

Dry-run mode: * An implementation of distributed_context that is used to mimic the performance of running an MPI distributed simulation with n ranks. * Verifiable against an MPI run with the same parameters. Implementation: * Describe the model on a single domain (tile) and translate it to however many domains we want to mimic using arb::tile and arb::symmetric_recipe. This allows us to know the exact behavior of the entire system by only running the simulation on a single node. * Mimic communication between domains using arb::dry_run_context Example: * dryrun in example/ is a verifiable example of using dry-run mode with mc_cells Other: * Documentation of dry-run mode * unit test for dry_run_context

Dry-run mode (#582)
Dry-run mode: * An implementation of distributed_context that is used to mimic the performance of running an MPI distributed simulation with n ranks. * Verifiable against an MPI run with the same parameters. Implementation: * Describe the model on a single domain (tile) and translate it to however many domains we want to mimic using arb::tile and arb::symmetric_recipe. This allows us to know the exact behavior of the entire system by only running the simulation on a single node. * Mimic communication between domains using arb::dry_run_context Example: * dryrun in example/ is a verifiable example of using dry-run mode with mc_cells Other: * Documentation of dry-run mode * unit test for dry_run_context
a2b39382 · noraabiakar · Benjamin Cumming · 2ff590ea · a2b39382 · a2b39382
Commit a2b39382 authored 6 years ago by noraabiakar Committed by Benjamin Cumming 6 years ago
--- a/arbor/CMakeLists.txt
+++ b/arbor/CMakeLists.txt
@@ -6,6 +6,7 @@ set(arbor_sources
    backends/multicore/mechanism.cpp
    backends/multicore/shared_state.cpp
    backends/multicore/stimulus.cpp
+    communication/dry_run_context.cpp
    benchmark_cell_group.cpp
    builtin_mechanisms.cpp
    cell_group_factory.cpp

--- a/arbor/communication/dry_run_context.cpp
+++ b/arbor/communication/dry_run_context.cpp
+#include <algorithm>
+#include <string>
+#include <vector>
+#include <arbor/spike.hpp>
+#include <distributed_context.hpp>
+#include <threading/threading.hpp>
+namespace arb {
+struct dry_run_context_impl {
+    explicit dry_run_context_impl(unsigned num_ranks, unsigned num_cells_per_tile):
+        num_ranks_(num_ranks), num_cells_per_tile_(num_cells_per_tile) {};
+    gathered_vector<arb::spike>
+    gather_spikes(const std::vector<arb::spike>& local_spikes) const {
+        using count_type = typename gathered_vector<arb::spike>::count_type;
+        count_type local_size = local_spikes.size();
+        std::vector<arb::spike> gathered_spikes;
+        gathered_spikes.reserve(local_size*num_ranks_);
+        for (count_type i = 0; i < num_ranks_; i++) {
+            gathered_spikes.insert(gathered_spikes.end(), local_spikes.begin(), local_spikes.end());
+        }
+        for (count_type i = 0; i < num_ranks_; i++) {
+            for (count_type j = i*local_size; j < (i+1)*local_size; j++){
+                gathered_spikes[j].source += num_cells_per_tile_*i;
+            }
+        }
+        std::vector<count_type> partition;
+        for(count_type i = 0; i <= num_ranks_; i++) {
+            partition.push_back(static_cast<count_type>(i*local_size));
+        }
+        return gathered_vector<arb::spike>(std::move(gathered_spikes), std::move(partition));
+    }
+    int id() const { return 0; }
+    int size() const { return num_ranks_; }
+    template <typename T>
+    T min(T value) const { return value; }
+    template <typename T>
+    T max(T value) const { return value; }
+    template <typename T>
+    T sum(T value) const { return value * num_ranks_; }
+    template <typename T>
+    std::vector<T> gather(T value, int) const {
+        return std::vector<T>(num_ranks_, value);
+    }
+    void barrier() const {}
+    std::string name() const { return "dryrun"; }
+    unsigned num_ranks_;
+    unsigned num_cells_per_tile_;
+};
+std::shared_ptr<distributed_context> make_dry_run_context(unsigned num_ranks, unsigned num_cells_per_tile) {
+    return std::make_shared<distributed_context>(dry_run_context_impl(num_ranks, num_cells_per_tile));
+}
+} // namespace arb
--- a/arbor/distributed_context.hpp
+++ b/arbor/distributed_context.hpp
@@ -171,6 +171,8 @@ distributed_context_handle make_local_context() {
    return std::make_shared<distributed_context>();
 }
+distributed_context_handle make_dry_run_context(unsigned num_ranks, unsigned num_cells_per_rank);
 // MPI context creation functions only provided if built with MPI support.
 template <typename MPICommType>
 distributed_context_handle make_mpi_context(MPICommType);

--- a/arbor/execution_context.cpp
+++ b/arbor/execution_context.cpp
@@ -48,6 +48,24 @@ context make_context<MPI_Comm>(const proc_allocation& p, MPI_Comm comm) {
    return context(new execution_context(p, comm), [](execution_context* p){delete p;});
 }
 #endif
+template <>
+execution_context::execution_context<dry_run_info>(
+        const proc_allocation& resources,
+        dry_run_info d):
+        distributed(make_dry_run_context(d.num_ranks, d.num_cells_per_rank)),
+        thread_pool(std::make_shared<threading::task_system>(resources.num_threads)),
+        gpu(resources.has_gpu()? std::make_shared<gpu_context>(resources.gpu_id)
+                               : std::make_shared<gpu_context>())
+{}
+template <>
+context make_context(const proc_allocation& p, dry_run_info d) {
+    return context(new execution_context(p, d), [](execution_context* p){delete p;});
+}
+std::string distribution_type(const context& ctx) {
+    return ctx->distributed->name();
+}
 bool has_gpu(const context& ctx) {
    return ctx->gpu->has_gpu();

--- a/arbor/partition_load_balance.cpp
+++ b/arbor/partition_load_balance.cpp
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/recipe.hpp>
+#include <arbor/symmetric_recipe.hpp>
 #include <arbor/context.hpp>
 #include "cell_group_factory.hpp"

--- a/doc/cpp_dry_run.rst
+++ b/doc/cpp_dry_run.rst
+.. _cppdistcontext:
+.. Note::
+    This is a developer feature for benchmarking, and is not useful for scientific use cases.
+Dry-run Mode
+===================
+Dry-run mode is used to mimic the performance of running an MPI distributed simulation
+without having access to an HPC cluster or even MPI support. It is verifiable against an MPI
+run with the same parameters. In dry-run mode, we describe the model on a single domain and
+translate it to however many domains we want to mimic. This allows us to know the exact
+behavior of the entire system by only running the simulation on a single node.
+To support dry-run mode we use the following classes:
+.. cpp:namespace:: arb
+.. cpp:class:: dry_run_context
+    Implements the :cpp:class:`arb::distributed_context` interface for a fake distributed
+    simulation.
+    .. cpp:member:: unsigned num_ranks_
+        Number of domains we are mimicking.
+    .. cpp:member:: unsigned num_cells_per_tile_
+        Number of cells assigned to each domain.
+    **Constructor:**
+    .. cpp:function:: dry_run_context_impl(unsigned num_ranks, unsigned num_cells_per_tile)
+        Creates the dry run context and sets up the information needed to fake communication
+        between domains.
+    **Interface:**
+    .. cpp:function:: int id() const
+        Always 0. We are only performing the simulation on the local domain which will be root.
+    .. cpp:function:: int size() const
+        Equal to :cpp:member:`num_ranks_`.
+    .. cpp:function:: std::string name() const
+        Returns ``"dry_run"``.
+    .. cpp:function:: std::vector<std::string> gather(std::string value, int root) const
+        Duplicates the vector of strings from local domain, :cpp:member:`num_ranks_` times.
+        Returns the concatenated vector.
+    .. cpp:function:: gathered_vector<arb::spike>  gather_spikes(const std::vector<arb::spike>& local_spikes) const
+        The vector of :cpp:var:`local_spikes` represents the spikes obtained from running a
+        simulation of :cpp:member:`num_cells_per_tile_` on the local domain.
+        The returned vector should contain the spikes obtained from all domains in the dry-run.
+        The spikes from the non-simulated domains are obtained by copying :cpp:var:`local_spikes`
+        and modifying the gids of each spike to refer to the corresponding gids on each domain.
+        The obtained vectors of spikes from each domain are concatenated along with the original
+        :cpp:var:`local_spikes` and returned.
+    .. cpp:function:: distributed_context_handle  make_dry_run_context(unsigned num_ranks, unsigned num_cells_per_tile)
+        Convenience function that returns a handle to a :cpp:class:`dry_run_context`.
+.. cpp:class:: tile: public recipe
+    .. Note::
+        While this class inherits from :cpp:class:`arb::recipe`, it breaks one of its implicit
+        rules: it allows connection from gids greater than the total number of cells in a recipe,
+        :cpp:var:`ncells`.
+    :cpp:class:`arb::tile` describes the model on a single domain containing :cpp:var:`ncells` =
+    :cpp:var:`num_cells_per_tile` cells, which is to be duplicated over :cpp:var:`num_ranks`
+    domains in dry-run mode. It contains information about :cpp:var:`num_ranks` which is provided
+    by the following function:
+    .. cpp:function:: cell_size_type num_tiles() const
+Most of the overloaded functions in :cpp:class:`arb::tile` should describe a recipe on the local
+domain, as if it was the only domain in the simulation. The exceptions are the following 2 functions:
+    .. cpp:function:: std::vector<cell_connection> connections_on(cell_gid_type  i) const
+    Returns the connections on 0 <= i < :cpp:var:`ncells`. But allows connections from gids
+    outside the local domain (gid > :cpp:var:`ncells`). This is in order to create a realistic
+    network with communication between domains.
+    .. cpp:function:: std::vector<event_generator> event_generators(cell_gid_type i) const
+    Describes event generators for all gids from all domains: 0 <= i < :cpp:var:`ncells` *
+    :cpp:var:`num_tiles()`. Unlike other functions, has knowledge of the mimicked domains,
+    namely their event generators.
+.. cpp:class:: symmetric_recipe: public recipe
+    A symmetric_recipe mimics having a model containing :cpp:var:`num_tiles()`
+    instances of :cpp:class:`arb::tile` in a simulation of one tile per domain.
+    .. cpp:member:: std::unique_ptr<tile> tiled_recipe_
+        `symmetric_recipe` owns a unique pointer to a :cpp:class:`arb::tile`, and uses
+        :cpp:member:`tiled_recipe_` to query information about the tiles on the local
+        and mimicked domains.
+        Most functions in `symmetric_recipe` only need to call the underlying functions
+        of `tiled_recipe_` for the corresponding gid in the simulated domain. This is
+        done with a simple modulo operation. For example:
+        .. code-block:: cpp
+            cell_kind get_cell_kind(cell_gid_type i) const override {
+                return tiled_recipe_->get_cell_kind(i % tiled_recipe_->num_cells());
+            }
+    The exception is again the following 2 functions:
+    .. cpp:function:: std::vector<cell_connection> connections_on(cell_gid_type i) const
+        Calls
+        .. code-block:: cpp
+            tiled_recipe_.connections_on(i % tiled_recipe_->num_cells())
+        But the obtained connections have to be translated to refer to the correct
+        gids corresponding to the correct domain.
+    .. cpp:function:: std::vector<event_generator> event_generators(cell_gid_type i) const
+        Calls
+        .. code-block:: cpp
+            tiled_recipe_.event_generators(i)
+        Calls on the domain gid without the modulo operation, because the function has a
+        knowledge of the entire network.
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -64,4 +64,5 @@ Some key features include:
   profiler
   sampling_api
   cpp_distributed_context
+   cpp_dry_run
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
 add_subdirectory(miniapp)
+add_subdirectory(dryrun)
 add_subdirectory(generators)
 add_subdirectory(brunel)
 add_subdirectory(bench)

--- a/example/dryrun/CMakeLists.txt
+++ b/example/dryrun/CMakeLists.txt
+add_executable(dryrun dryrun.cpp)
+target_link_libraries(dryrun PRIVATE arbor arbor-aux ext-json)
--- a/example/dryrun/dryrun.cpp
+++ b/example/dryrun/dryrun.cpp
+/*
+ * A miniapp that demonstrates how to use dry_run mode
+ *
+ */
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <nlohmann/json.hpp>
+#include <arbor/assert_macro.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/context.hpp>
+#include <arbor/load_balance.hpp>
+#include <arbor/mc_cell.hpp>
+#include <arbor/profile/meter_manager.hpp>
+#include <arbor/profile/profiler.hpp>
+#include <arbor/simple_sampler.hpp>
+#include <arbor/simulation.hpp>
+#include <arbor/symmetric_recipe.hpp>
+#include <arbor/recipe.hpp>
+#include <arbor/version.hpp>
+#include <aux/ioutil.hpp>
+#include <aux/json_meter.hpp>
+#include "parameters.hpp"
+#ifdef ARB_MPI_ENABLED
+#include <mpi.h>
+#include <aux/with_mpi.hpp>
+#endif
+using arb::cell_gid_type;
+using arb::cell_lid_type;
+using arb::cell_size_type;
+using arb::cell_member_type;
+using arb::cell_kind;
+using arb::time_type;
+using arb::cell_probe_address;
+// Generate a cell.
+arb::mc_cell branch_cell(arb::cell_gid_type gid, const cell_parameters& params);
+class tile_desc: public arb::tile {
+public:
+    tile_desc(unsigned num_cells, unsigned num_tiles, cell_parameters params, unsigned min_delay):
+            num_cells_(num_cells),
+            num_tiles_(num_tiles),
+            cell_params_(params),
+            min_delay_(min_delay)
+    {}
+    cell_size_type num_cells() const override {
+        return num_cells_;
+    }
+    cell_size_type num_tiles() const override {
+        return num_tiles_;
+    }
+    arb::util::unique_any get_cell_description(cell_gid_type gid) const override {
+        return branch_cell(gid, cell_params_);
+    }
+    cell_kind get_cell_kind(cell_gid_type gid) const override {
+        return cell_kind::cable1d_neuron;
+    }
+    // Each cell has one spike detector (at the soma).
+    cell_size_type num_sources(cell_gid_type gid) const override {
+        return 1;
+    }
+    // The cell has one target synapse
+    cell_size_type num_targets(cell_gid_type gid) const override {
+        return 1;
+    }
+    // Each cell has one incoming connection, from any cell in the network spanning all ranks:
+    // src gid in {0, ..., num_cells_*num_tiles_ - 1}.
+    std::vector<arb::cell_connection> connections_on(cell_gid_type gid) const override {
+        std::uniform_int_distribution<cell_gid_type> source_distribution(0, num_cells_*num_tiles_ - 2);
+        auto src_gen = std::mt19937(gid);
+        auto src = source_distribution(src_gen);
+        if (src>=gid) ++src;
+        return {arb::cell_connection({src, 0}, {gid, 0}, event_weight_, min_delay_)};
+    }
+    // Return an event generator on every 20th gid. This function needs to generate events
+    // for ALL cells on ALL ranks. This is because the symmetric recipe can not easily
+    // translate the src gid of an event generator
+    std::vector<arb::event_generator> event_generators(cell_gid_type gid) const override {
+        std::vector<arb::event_generator> gens;
+        if (gid%20 == 0) {
+            gens.push_back(arb::explicit_generator(arb::pse_vector{{{gid, 0}, 0.1, 1.0}}));
+        }
+        return gens;
+    }
+    // There is one probe (for measuring voltage at the soma) on the cell.
+    cell_size_type num_probes(cell_gid_type gid)  const override {
+        return 1;
+    }
+    arb::probe_info get_probe(cell_member_type id) const override {
+        // Get the appropriate kind for measuring voltage.
+        cell_probe_address::probe_kind kind = cell_probe_address::membrane_voltage;
+        // Measure at the soma.
+        arb::segment_location loc(0, 0.0);
+        return arb::probe_info{id, kind, cell_probe_address{loc, kind}};
+    }
+private:
+    cell_size_type num_cells_;
+    cell_size_type num_tiles_;
+    cell_parameters cell_params_;
+    double min_delay_;
+    float event_weight_ = 0.01;
+};
+struct cell_stats {
+    using size_type = unsigned;
+    size_type ncells = 0;
+    int nranks = 1;
+    size_type nsegs = 0;
+    size_type ncomp = 0;
+    cell_stats(arb::recipe& r, run_params params) {
+#ifdef ARB_MPI_ENABLED
+        if(!params.dry_run) {
+            int rank;
+            MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+            MPI_Comm_size(MPI_COMM_WORLD, &nranks);
+            ncells = r.num_cells();
+            size_type cells_per_rank = ncells/nranks;
+            size_type b = rank*cells_per_rank;
+            size_type e = (rank+1)*cells_per_rank;
+            size_type nsegs_tmp = 0;
+            size_type ncomp_tmp = 0;
+            for (size_type i=b; i<e; ++i) {
+                auto c = arb::util::any_cast<arb::mc_cell>(r.get_cell_description(i));
+                nsegs_tmp += c.num_segments();
+                ncomp_tmp += c.num_compartments();
+            }
+            MPI_Allreduce(&nsegs_tmp, &nsegs, 1, MPI_UNSIGNED, MPI_SUM, MPI_COMM_WORLD);
+            MPI_Allreduce(&ncomp_tmp, &ncomp, 1, MPI_UNSIGNED, MPI_SUM, MPI_COMM_WORLD);
+        }
+#else
+        if(!params.dry_run) {
+            nranks = 1;
+            ncells = r.num_cells();
+            for (size_type i = 0; i < ncells; ++i) {
+                auto c = arb::util::any_cast<arb::mc_cell>(r.get_cell_description(i));
+                nsegs += c.num_segments();
+                ncomp += c.num_compartments();
+            }
+        }
+#endif
+        else {
+            nranks = params.num_ranks;
+            ncells = r.num_cells(); //total number of cells across all ranks
+            for (size_type i = 0; i < params.num_cells_per_rank; ++i) {
+                auto c = arb::util::any_cast<arb::mc_cell>(r.get_cell_description(i));
+                nsegs += c.num_segments();
+                ncomp += c.num_compartments();
+            }
+            nsegs *= params.num_ranks;
+            ncomp *= params.num_ranks;
+        }
+    }
+    friend std::ostream& operator<<(std::ostream& o, const cell_stats& s) {
+        return o << "cell stats: "
+                 << s.nranks << " ranks; "
+                 << s.ncells << " cells; "
+                 << s.nsegs << " segments; "
+                 << s.ncomp << " compartments.";
+    }
+};
+int main(int argc, char** argv) {
+    try {
+#ifdef ARB_MPI_ENABLED
+        aux::with_mpi guard(argc, argv, false);
+#endif
+        bool root = true;
+        auto params = read_options(argc, argv);
+        auto resources = arb::proc_allocation();
+        auto ctx = arb::make_context(resources);
+        if (params.dry_run) {
+            ctx = arb::make_context(resources, arb::dry_run_info(params.num_ranks, params.num_cells_per_rank));
+        }
+#ifdef ARB_MPI_ENABLED
+        else {
+            ctx = arb::make_context(resources, MPI_COMM_WORLD);
+            {
+                int rank;
+                MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+                root = rank==0;
+            }
+        }
+#endif
+        arb_assert(arb::num_ranks(ctx)==params.num_ranks);
+#ifdef ARB_PROFILE_ENABLED
+        arb::profile::profiler_initialize(ctx);
+#endif
+        std::cout << aux::mask_stream(root);
+        // Print a banner with information about hardware configuration
+        std::cout << "gpu:      " << (has_gpu(ctx)? "yes": "no") << "\n";
+        std::cout << "threads:  " << num_threads(ctx) << "\n";
+        std::cout << "mpi:      " << (has_mpi(ctx)? "yes": "no") << "\n";
+        std::cout << "ranks:    " << num_ranks(ctx) << "\n" << std::endl;
+        std::cout << "run mode: " << distribution_type(ctx) << "\n";
+        arb::profile::meter_manager meters;
+        meters.start(ctx);
+        // Create an instance of our tile and use it to make a symmetric_recipe.
+        auto tile = std::make_unique<tile_desc>(params.num_cells_per_rank,
+                params.num_ranks, params.cell, params.min_delay);
+        arb::symmetric_recipe recipe(std::move(tile));
+        cell_stats stats(recipe, params);
+        std::cout << stats << "\n";
+        auto decomp = arb::partition_load_balance(recipe, ctx);
+        // Construct the model.
+        arb::simulation sim(recipe, decomp, ctx);
+        // Set up recording of spikes to a vector on the root process.
+        std::vector<arb::spike> recorded_spikes;
+        if (root) {
+            sim.set_global_spike_callback(
+                    [&recorded_spikes](const std::vector<arb::spike>& spikes) {
+                        recorded_spikes.insert(recorded_spikes.end(), spikes.begin(), spikes.end());
+                    });
+        }
+        meters.checkpoint("model-init", ctx);
+        // Run the simulation for 100 ms, with time steps of 0.025 ms.
+        sim.run(params.duration, 0.025);
+        meters.checkpoint("model-run", ctx);
+        auto ns = sim.num_spikes();
+        std::cout << "\n" << ns << " spikes generated at rate of "
+                  << params.duration/ns << " ms between spikes\n\n";
+        // Write spikes to file
+        if (root) {
+            std::ofstream fid("spikes.gdf");
+            if (!fid.good()) {
+                std::cerr << "Warning: unable to open file spikes.gdf for spike output\n";
+            }
+            else {
+                char linebuf[45];
+                for (auto spike: recorded_spikes) {
+                    auto n = std::snprintf(
+                            linebuf, sizeof(linebuf), "%u %.4f\n",
+                            unsigned{spike.source.gid}, float(spike.time));
+                    fid.write(linebuf, n);
+                }
+            }
+        }
+        auto profile = arb::profile::profiler_summary();
+        std::cout << profile << "\n";
+        auto report = arb::profile::make_meter_report(meters, ctx);
+        std::cout << report;
+    }
+    catch (std::exception& e) {
+        std::cerr << "exception caught in ring miniapp:\n" << e.what() << "\n";
+        return 1;
+    }
+    return 0;
+}
+// Helper used to interpolate in branch_cell.
+template <typename T>
+double interp(const std::array<T,2>& r, unsigned i, unsigned n) {
+    double p = i * 1./(n-1);
+    double r0 = r[0];
+    double r1 = r[1];
+    return r[0] + p*(r1-r0);
+}
+arb::mc_cell branch_cell(arb::cell_gid_type gid, const cell_parameters& params) {
+    arb::mc_cell cell;
+    // Add soma.
+    auto soma = cell.add_soma(12.6157/2.0); // For area of 500 μm².
+    soma->rL = 100;
+    soma->add_mechanism("hh");
+    std::vector<std::vector<unsigned>> levels;
+    levels.push_back({0});
+    // Standard mersenne_twister_engine seeded with gid.
+    std::mt19937 gen(gid);
+    std::uniform_real_distribution<double> dis(0, 1);
+    double dend_radius = 0.5; // Diameter of 1 μm for each cable.
+    unsigned nsec = 1;
+    for (unsigned i=0; i<params.max_depth; ++i) {
+        // Branch prob at this level.
+        double bp = interp(params.branch_probs, i, params.max_depth);
+        // Length at this level.
+        double l = interp(params.lengths, i, params.max_depth);
+        // Number of compartments at this level.
+        unsigned nc = std::round(interp(params.compartments, i, params.max_depth));
+        std::vector<unsigned> sec_ids;
+        for (unsigned sec: levels[i]) {
+            for (unsigned j=0; j<2; ++j) {
+                if (dis(gen)<bp) {
+                    sec_ids.push_back(nsec++);
+                    auto dend = cell.add_cable(sec, arb::section_kind::dendrite, dend_radius, dend_radius, l);
+                    dend->set_compartments(nc);
+                    dend->add_mechanism("pas");
+                    dend->rL = 100;
+                }
+            }
+        }
+        if (sec_ids.empty()) {
+            break;
+        }
+        levels.push_back(sec_ids);
+    }
+    // Add spike threshold detector at the soma.
+    cell.add_detector({0,0}, 10);
+    // Add a synapse to the mid point of the first dendrite.
+    cell.add_synapse({1, 0.5}, "expsyn");
+    return cell;
+}
--- a/example/dryrun/parameters.hpp
+++ b/example/dryrun/parameters.hpp
+#include <iostream>
+#include <array>
+#include <cmath>
+#include <fstream>
+#include <random>
+#include <arbor/mc_cell.hpp>
+#include <aux/json_params.hpp>
+// Parameters used to generate the random cell morphologies.
+struct cell_parameters {
+    cell_parameters() = default;
+    //  Maximum number of levels in the cell (not including the soma)
+    unsigned max_depth = 5;
+    // The following parameters are described as ranges.
+    // The first value is at the soma, and the last value is used on the last level.
+    // Values at levels in between are found by linear interpolation.
+    std::array<double,2> branch_probs = {1.0, 0.5}; //  Probability of a branch occuring.
+    std::array<unsigned,2> compartments = {20, 2};  //  Compartment count on a branch.
+    std::array<double,2> lengths = {200, 20};       //  Length of branch in μm.
+};
+struct run_params {
+    run_params() = default;
+    std::string name = "default";
+    bool dry_run = false;
+    unsigned num_cells_per_rank = 10;
+    unsigned num_ranks = 1;
+    double min_delay = 10;
+    double duration = 100;
+    cell_parameters cell;
+};
+run_params read_options(int argc, char** argv) {
+    using aux::param_from_json;
+    run_params params;
+    if (argc<2) {
+        std::cout << "Using default parameters.\n";
+        return params;
+    }
+    if (argc>2) {
+        throw std::runtime_error("More than command line one option not permitted.");
+    }
+    std::string fname = argv[1];
+    std::cout << "Loading parameters from file: " << fname << "\n";
+    std::ifstream f(fname);
+    if (!f.good()) {
+        throw std::runtime_error("Unable to open input parameter file: "+fname);
+    }
+    nlohmann::json json;
+    json << f;
+    param_from_json(params.name, "name", json);
+    param_from_json(params.dry_run, "dry-run", json);
+    param_from_json(params.num_cells_per_rank, "num-cells-per-rank", json);
+    param_from_json(params.num_ranks, "num-ranks", json);
+    param_from_json(params.duration, "duration", json);
+    param_from_json(params.min_delay, "min-delay", json);
+    param_from_json(params.cell.max_depth, "depth", json);
+    param_from_json(params.cell.branch_probs, "branch-probs", json);
+    param_from_json(params.cell.compartments, "compartments", json);
+    param_from_json(params.cell.lengths, "lengths", json);
+    if (!json.empty()) {
+        for (auto it=json.begin(); it!=json.end(); ++it) {
+            std::cout << "  Warning: unused input parameter: \"" << it.key() << "\"\n";
+        }
+        std::cout << "\n";
+    }
+    return params;
+}
--- a/example/dryrun/readme.md
+++ b/example/dryrun/readme.md
+# Dryrun Example
+A miniapp that demonstrates how to use dry-run mode on a simple network
+duplicated across `num_ranks`.
+It uses the `arb::tile` to build a network of `num_cells_per_rank` cells of type
+`arb::mc_cell`. The network is translated over `num_ranks` domains using
+`arb::symmetric_recipe`.
+Example:
+```
+num_cells_per_rank = 4;
+num_ranks = 2;
+gids = {0, ..., 7}
+tile connections:
+    dest <- src:
+    0 <- 1;
+    1 <- 3;
+    2 <- 5;
+    3 <- 7;
+symmetric_recipe inferred connections:
+    translate tile connections for second doimain:
+    dest <- src:
+    0 <- 1;
+    1 <- 3;
+    2 <- 5;
+    3 <- 7;
+    4 <- 5;
+    5 <- 7;
+    6 <- 1;
+    7 <- 3;
+```
+The model of the *tile* can be configured using a json configuration file:
+```
+./bench.exe params.json
+```
+An example parameter file for a dry-run is:
+```
+{
+    "name": "dry run test",
+    "dry-run": true,
+    "num-cells-per-rank": 1000,
+    "num-ranks": 2,
+    "duration": 100,
+    "min-delay": 1,
+    "depth": 5,
+    "branch-probs": [1.0, 0.5],
+    "compartments": [100, 2]
+}
+```
+The parameter file for the equivalent MPI run is:
+```
+{
+    "name": "MPI test",
+    "dry-run": false,
+    "num-cells-per-rank": 1000,
+    "duration": 100,
+    "min-delay": 1,
+    "depth": 5,
+    "branch-probs": [1.0, 0.5],
+    "compartments": [100, 2]
+}
+```
+These 2 files should provide exactly the same spike.gdf files.
+The parameters in the file:
+  * `name`: a string with a name for the benchmark.
+  * `dry-run`: a bool indicating whether or not to use dry-run mode.
+    if false, use MPI if available, and local distribution if not.
+  * `num-ranks`: the number of domains to simulate. Only for dry-run
+    mode.
+  * `num-cells-per-rank`: the number of cells on a single tile.
+    The total number of cells in the model = num-cells-per-rank *
+    num-ranks.
+  * `duration`: the length of the simulated time interval, in ms.
+  * `fan-in`: the number of incoming connections on each cell.
+  * `min-delay`: the minimum delay of the network.
+  * `spike-frequency`: frequency of the independent Poisson processes that
+    generate spikes for each cell.
+  * `realtime-ratio`: the ratio between time taken to advance a single cell in
+    the simulation and the simulated time. For example, a value of 1 indicates
+    that the cell is simulated in real time, while a value of 0.1 indicates
+    that 10s can be simulated in a single second.
+The network is randomly connected with no self-connections and `fan-in`
+incoming connections on each cell, with every connection having delay of
+`min-delay`.
\ No newline at end of file
--- a/include/arbor/common_types.hpp
+++ b/include/arbor/common_types.hpp
@@ -47,6 +47,28 @@ using cell_local_size_type = std::make_unsigned_t<cell_lid_type>;
 struct cell_member_type {
    cell_gid_type gid;
    cell_lid_type index;
+    cell_member_type& operator= (cell_gid_type inc) {
+        gid = inc;
+        return *this;
+    }
+    cell_member_type& operator+= (cell_gid_type inc) {
+        gid += inc;
+        return *this;
+    }
+    friend cell_member_type operator+ (const cell_member_type& lhs, const cell_gid_type rhs) {
+        cell_member_type result = lhs;
+        result.gid += rhs;
+        return result;
+    }
+    friend cell_member_type operator% (const cell_member_type& lhs, const cell_gid_type rhs) {
+        cell_member_type result = lhs;
+        result.gid %= rhs;
+        return result;
+    }
 };
 ARB_DEFINE_LEXICOGRAPHIC_ORDERING(cell_member_type,(a.gid,a.index),(b.gid,b.index))

--- a/include/arbor/context.hpp
+++ b/include/arbor/context.hpp
@@ -15,6 +15,15 @@ struct local_resources {
    {}
 };
+/// Requested dry-run parameters
+struct dry_run_info {
+    unsigned num_ranks;
+    unsigned num_cells_per_rank;
+    dry_run_info(unsigned ranks, unsigned cells_per_rank):
+            num_ranks(ranks),
+            num_cells_per_rank(cells_per_rank) {}
+};
 /// Determine available local domain resources.
 local_resources get_local_resources();
@@ -76,12 +85,13 @@ context make_context();
 context make_context(const proc_allocation& resources);
 // Distributed context that uses MPI communicator comm, and local resources
-// described by resources.
+// described by resources. Or dry run context that uses dry_run_info.
 template <typename Comm>
 context make_context(const proc_allocation& resources, Comm comm);
 // Queries for properties of execution resources in a context.
+std::string distribution_type(const context&);
 bool has_gpu(const context&);
 unsigned num_threads(const context&);
 bool has_mpi(const context&);

--- a/include/arbor/symmetric_recipe.hpp
+++ b/include/arbor/symmetric_recipe.hpp
+#pragma once
+#include <cstddef>
+#include <memory>
+#include <unordered_map>
+#include <stdexcept>
+#include <arbor/recipe.hpp>
+namespace arb {
+// tile inherits from recipe but is not a regular recipe.
+// It is used to describe a recipe on a single rank
+// that will be translated to all other ranks using symmetric_recipe.
+// This means it can allow connections from gid >= ncells
+// which should not be allowed for other recipes.
+class tile: public recipe {
+public:
+    virtual cell_size_type num_tiles() const { return 1; }
+};
+// symmetric recipe takes a tile and duplicates it across
+// as many ranks as tile indicates. Its functions call the
+// underlying functions of tile and perform transformations
+// on the results when needed.
+class symmetric_recipe: public recipe {
+public:
+    symmetric_recipe(std::unique_ptr<tile> rec): tiled_recipe_(std::move(rec)) {}
+    cell_size_type num_cells() const override {
+        return tiled_recipe_->num_cells() * tiled_recipe_->num_tiles();
+    }
+    util::unique_any get_cell_description(cell_gid_type i) const override {
+        return tiled_recipe_->get_cell_description(i % tiled_recipe_->num_cells());
+    }
+    cell_kind get_cell_kind(cell_gid_type i) const override {
+        return tiled_recipe_->get_cell_kind(i % tiled_recipe_->num_cells());
+    }
+    cell_size_type num_sources(cell_gid_type i) const override {
+        return tiled_recipe_->num_sources(i % tiled_recipe_->num_cells());
+    }
+    cell_size_type num_targets(cell_gid_type i) const override {
+        return tiled_recipe_->num_targets(i % tiled_recipe_->num_cells());
+    }
+    cell_size_type num_probes(cell_gid_type i) const override {
+        return tiled_recipe_->num_probes(i % tiled_recipe_->num_cells());
+    }
+    // Only function that calls the underlying tile's function on the same gid.
+    // This is because applying transformations to event generators is not straightforward.
+    std::vector<event_generator> event_generators(cell_gid_type i) const override {
+        return tiled_recipe_->event_generators(i);
+    }
+    // Take connections_on from the original tile recipe for the cell we are duplicating.
+    // Transate the source and destination gids
+    std::vector<cell_connection> connections_on(cell_gid_type i) const override {
+        int n_local = tiled_recipe_->num_cells();
+        int n_global = num_cells();
+        int offset = (i / n_local) * n_local;
+        std::vector<cell_connection> conns = tiled_recipe_->connections_on(i % n_local);
+        for (unsigned j = 0; j < conns.size(); j++) {
+            conns[j].source = (conns[j].source + offset) % n_global;
+            conns[j].dest = (conns[j].dest + offset) % n_global;
+        }
+        return conns;
+    }
+    probe_info get_probe(cell_member_type probe_id) const override {
+        probe_id.gid %= tiled_recipe_->num_cells();
+        return tiled_recipe_->get_probe(probe_id);
+    }
+    util::any get_global_properties(cell_kind ck) const override {
+        return tiled_recipe_->get_global_properties(ck);
+    };
+    std::unique_ptr<tile> tiled_recipe_;
+};
+} // namespace arb
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -56,6 +56,7 @@ set(unit_sources
    test_any.cpp
    test_backend.cpp
    test_double_buffer.cpp
+    test_dry_run_context.cpp
    test_compartments.cpp
    test_counter.cpp
    test_cycle.cpp

--- a/test/unit/test_dry_run_context.cpp
+++ b/test/unit/test_dry_run_context.cpp
+#include <vector>
+#include <cstring>
+#include "../gtest.h"
+#include <distributed_context.hpp>
+#include <arbor/spike.hpp>
+// Test that there are no errors constructing a distributed_context from a dry_run_context
+using distributed_context_handle = std::shared_ptr<arb::distributed_context>;
+unsigned num_ranks = 100;
+unsigned num_cells_per_rank = 1000;
+TEST(dry_run_context, construct_distributed_context)
+{
+    distributed_context_handle ctx = arb::make_dry_run_context(num_ranks, num_cells_per_rank);
+}
+TEST(dry_run_context, size_rank)
+{
+    distributed_context_handle ctx = arb::make_dry_run_context(num_ranks, num_cells_per_rank);
+    EXPECT_EQ(ctx->size(), num_ranks);
+    EXPECT_EQ(ctx->id(), 0);
+}
+TEST(dry_run_context, minmax)
+{
+    distributed_context_handle ctx = arb::make_dry_run_context(num_ranks, num_cells_per_rank);
+    EXPECT_EQ(1., ctx->min(1.));
+    EXPECT_EQ(1., ctx->max(1.));
+    EXPECT_EQ(1.f, ctx->min(1.f));
+    EXPECT_EQ(1.f, ctx->max(1.f));
+    int32_t one32 = 1;
+    EXPECT_EQ(one32, ctx->min(one32));
+    EXPECT_EQ(one32, ctx->max(one32));
+    int64_t one64 = 1;
+    EXPECT_EQ(one64, ctx->min(one64));
+    EXPECT_EQ(one64, ctx->max(one64));
+}
+TEST(dry_run_context, sum)
+{
+    distributed_context_handle ctx = arb::make_dry_run_context(num_ranks, num_cells_per_rank);
+    EXPECT_EQ(42.,  ctx->min(42.));
+    EXPECT_EQ(42.f, ctx->min(42.));
+    EXPECT_EQ(42 * num_ranks,   ctx->sum(42));
+    EXPECT_EQ(42u,  ctx->min(42u));
+}
+TEST(dry_run_context, gather_spikes)
+{
+    distributed_context_handle ctx = arb::make_dry_run_context(4, 4);
+    using svec = std::vector<arb::spike>;
+    svec spikes = {
+        {{0u,3u}, 42.f},
+        {{1u,2u}, 42.f},
+        {{2u,1u}, 42.f},
+        {{3u,0u}, 42.f},
+    };
+    svec gathered_spikes = {
+        {{0u,3u}, 42.f},
+        {{1u,2u}, 42.f},
+        {{2u,1u}, 42.f},
+        {{3u,0u}, 42.f},
+        {{4u,3u}, 42.f},
+        {{5u,2u}, 42.f},
+        {{6u,1u}, 42.f},
+        {{7u,0u}, 42.f},
+        {{8u,3u}, 42.f},
+        {{9u,2u}, 42.f},
+        {{10u,1u}, 42.f},
+        {{11u,0u}, 42.f},
+        {{12u,3u}, 42.f},
+        {{13u,2u}, 42.f},
+        {{14u,1u}, 42.f},
+        {{15u,0u}, 42.f},
+    };
+    auto s = ctx->gather_spikes(spikes);
+    auto& part = s.partition();
+    EXPECT_EQ(s.values(), gathered_spikes);
+    EXPECT_EQ(part.size(), 5u);
+    EXPECT_EQ(part[0], 0u);
+    EXPECT_EQ(part[1], spikes.size());
+    EXPECT_EQ(part[2], spikes.size()*2);
+    EXPECT_EQ(part[3], spikes.size()*3);
+    EXPECT_EQ(part[4], spikes.size()*4);
+}