diff --git a/cmake/CompilerOptions.cmake b/cmake/CompilerOptions.cmake
index b15e252acf81e3cb2338f45971c50582c5136a06..1db887eec3f02d68cf0e3e593a4e66ed8ced96b5 100644
--- a/cmake/CompilerOptions.cmake
+++ b/cmake/CompilerOptions.cmake
@@ -20,6 +20,16 @@ if(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
     # flag initializations such as
     #     std::array<int,3> a={1,2,3};
     set(CXXOPT_WALL "${CXXOPT_WALL} -Wno-missing-braces")
+
+    # Clang is erroneously warning that T is an 'unused type alias' in code like this:
+    # struct X {
+    #     using T = decltype(expression);
+    #     T x;
+    # };
+    set(CXXOPT_WALL "${CXXOPT_WALL} -Wno-unused-local-typedef")
+
+    # Ignore warning if string passed to snprintf is not a string literal.
+    set(CXXOPT_WALL "${CXXOPT_WALL} -Wno-format-security")
 endif()
 
 if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU")
@@ -37,14 +47,14 @@ if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU")
 endif()
 
 if(${CMAKE_CXX_COMPILER_ID} MATCHES "Intel")
-    # Disable warning for unused template parameter
-    # this is raised by a templated function in the json library.
-    set(CXXOPT_WALL "${CXXOPT_WALL} -wd488")
-
     # Compiler flags for generating KNL-specific AVX512 instructions.
     set(CXXOPT_KNL "-xMIC-AVX512")
     set(CXXOPT_AVX "-xAVX")
     set(CXXOPT_AVX2 "-xCORE-AVX2")
     set(CXXOPT_AVX512 "-xCORE-AVX512")
+
+    # Disable warning for unused template parameter
+    # this is raised by a templated function in the json library.
+    set(CXXOPT_WALL "${CXXOPT_WALL} -wd488")
 endif()
 
diff --git a/miniapp/io.cpp b/miniapp/io.cpp
index 89a976b9a0da367c22508cceb07a235809bf63d5..631ff9e5aa7f4246719b82199d03ac7e2ccc7dc2 100644
--- a/miniapp/io.cpp
+++ b/miniapp/io.cpp
@@ -160,9 +160,6 @@ cl_options read_options(int argc, char** argv, bool allow_write) {
             "m","alltoall","all to all network", cmd, false);
         TCLAP::SwitchArg ring_arg(
             "r","ring","ring network", cmd, false);
-        TCLAP::ValueArg<uint32_t> group_size_arg(
-            "g", "group-size", "number of cells per cell group",
-            false, defopts.compartments_per_segment, "integer", cmd);
         TCLAP::ValueArg<double> sample_dt_arg(
             "", "sample-dt", "set sampling interval to <time> ms",
             false, defopts.bin_dt, "time", cmd);
@@ -229,7 +226,6 @@ cl_options read_options(int argc, char** argv, bool allow_write) {
                     update_option(options.tfinal, fopts, "tfinal");
                     update_option(options.all_to_all, fopts, "all_to_all");
                     update_option(options.ring, fopts, "ring");
-                    update_option(options.group_size, fopts, "group_size");
                     update_option(options.sample_dt, fopts, "sample_dt");
                     update_option(options.probe_ratio, fopts, "probe_ratio");
                     update_option(options.probe_soma_only, fopts, "probe_soma_only");
@@ -280,7 +276,6 @@ cl_options read_options(int argc, char** argv, bool allow_write) {
         update_option(options.bin_regular, bin_regular_arg);
         update_option(options.all_to_all, all_to_all_arg);
         update_option(options.ring, ring_arg);
-        update_option(options.group_size, group_size_arg);
         update_option(options.sample_dt, sample_dt_arg);
         update_option(options.probe_ratio, probe_ratio_arg);
         update_option(options.probe_soma_only, probe_soma_only_arg);
@@ -308,10 +303,6 @@ cl_options read_options(int argc, char** argv, bool allow_write) {
             throw usage_error("can specify at most one of --ring and --all-to-all");
         }
 
-        if (options.group_size<1) {
-            throw usage_error("minimum of one cell per group");
-        }
-
         save_file = ofile_arg.getValue();
     }
     catch (TCLAP::ArgException& e) {
@@ -335,7 +326,6 @@ cl_options read_options(int argc, char** argv, bool allow_write) {
                 fopts["tfinal"] = options.tfinal;
                 fopts["all_to_all"] = options.all_to_all;
                 fopts["ring"] = options.ring;
-                fopts["group_size"] = options.group_size;
                 fopts["sample_dt"] = options.sample_dt;
                 fopts["probe_ratio"] = options.probe_ratio;
                 fopts["probe_soma_only"] = options.probe_soma_only;
@@ -388,7 +378,6 @@ std::ostream& operator<<(std::ostream& o, const cl_options& options) {
         (options.bin_dt==0? "none": options.bin_regular? "regular": "following") << "\n";
     o << "  all to all network   : " << (options.all_to_all ? "yes" : "no") << "\n";
     o << "  ring network         : " << (options.ring ? "yes" : "no") << "\n";
-    o << "  group size           : " << options.group_size << "\n";
     o << "  sample dt            : " << options.sample_dt << "\n";
     o << "  probe ratio          : " << options.probe_ratio << "\n";
     o << "  probe soma only      : " << (options.probe_soma_only ? "yes" : "no") << "\n";
diff --git a/miniapp/io.hpp b/miniapp/io.hpp
index a2887d2cb344aa677c24673de9e162491e7e998a..8dbda791490b72e2eccda4a520852f5e77693624 100644
--- a/miniapp/io.hpp
+++ b/miniapp/io.hpp
@@ -34,7 +34,6 @@ struct cl_options {
     // Simulation running parameters:
     double tfinal = 100.;
     double dt = 0.025;
-    uint32_t group_size = 1;
     bool bin_regular = false; // False => use 'following' instead of 'regular'.
     double bin_dt = 0.0025;   // 0 => no binning.
 
diff --git a/miniapp/miniapp.cpp b/miniapp/miniapp.cpp
index d7db89deab84daa8d49e09b30b6c1d4b86e5f61c..b4af4cb4cee64330ad02a018e01d0666afee3422 100644
--- a/miniapp/miniapp.cpp
+++ b/miniapp/miniapp.cpp
@@ -12,6 +12,8 @@
 #include <communication/global_policy.hpp>
 #include <cell.hpp>
 #include <fvm_multicell.hpp>
+#include <hardware/gpu.hpp>
+#include <hardware/node_info.hpp>
 #include <io/exporter_spike_file.hpp>
 #include <model.hpp>
 #include <profiling/profiler.hpp>
@@ -31,7 +33,7 @@ using namespace nest::mc;
 using global_policy = communication::global_policy;
 using sample_trace_type = sample_trace<time_type, double>;
 using file_export_type = io::exporter_spike_file<global_policy>;
-void banner();
+void banner(hw::node_info);
 std::unique_ptr<recipe> make_recipe(const io::cl_options&, const probe_distribution&);
 std::unique_ptr<sample_trace_type> make_trace(probe_record probe);
 using communicator_type = communication::communicator<communication::global_policy>;
@@ -67,7 +69,12 @@ int main(int argc, char** argv) {
             global_policy::set_sizes(options.dry_run_ranks, cells_per_rank);
         }
 
-        banner();
+        // Use a node description that uses the number of threads used by the
+        // threading back end, and 1 gpu if available.
+        hw::node_info nd;
+        nd.num_cpu_cores = threading::num_threads();
+        nd.num_gpus = hw::num_gpus()>0? 1: 0;
+        banner(nd);
 
         meters.checkpoint("setup");
 
@@ -85,11 +92,7 @@ int main(int argc, char** argv) {
                     options.file_extension, options.over_write);
         };
 
-        group_rules rules;
-        rules.policy = config::has_cuda?
-            backend_policy::prefer_gpu: backend_policy::use_multicore;
-        rules.target_group_size = options.group_size;
-        auto decomp = domain_decomposition(*recipe, rules);
+        auto decomp = domain_decomposition(*recipe, nd);
 
         model m(*recipe, decomp);
 
@@ -144,8 +147,7 @@ int main(int argc, char** argv) {
         meters.checkpoint("model-simulate");
 
         // output profile and diagnostic feedback
-        auto const num_steps = options.tfinal / options.dt;
-        util::profiler_output(0.001, m.num_cells()*num_steps, options.profile_only_zero);
+        util::profiler_output(0.001, options.profile_only_zero);
         std::cout << "there were " << m.num_spikes() << " spikes\n";
 
         // save traces
@@ -176,13 +178,15 @@ int main(int argc, char** argv) {
     return 0;
 }
 
-void banner() {
-    std::cout << "====================\n";
+void banner(hw::node_info nd) {
+    std::cout << "==========================================\n";
     std::cout << "  NestMC miniapp\n";
-    std::cout << "  - " << threading::description() << " threading support (" << threading::num_threads() << ")\n";
-    std::cout << "  - communication policy: " << std::to_string(global_policy::kind()) << " (" << global_policy::size() << ")\n";
-    std::cout << "  - gpu support: " << (config::has_cuda? "on": "off") << "\n";
-    std::cout << "====================\n";
+    std::cout << "  - distributed : " << global_policy::size()
+              << " (" << std::to_string(global_policy::kind()) << ")\n";
+    std::cout << "  - threads     : " << nd.num_cpu_cores
+              << " (" << threading::description() << ")\n";
+    std::cout << "  - gpus        : " << nd.num_gpus << "\n";
+    std::cout << "==========================================\n";
 }
 
 std::unique_ptr<recipe> make_recipe(const io::cl_options& options, const probe_distribution& pdist) {
diff --git a/miniapp/miniapp_recipes.cpp b/miniapp/miniapp_recipes.cpp
index 45cb86f4ef3ca012969e5ca978bdaf589d058ebe..6fe6896d97a05d2ea9d4f3e39e150605e2471c53 100644
--- a/miniapp/miniapp_recipes.cpp
+++ b/miniapp/miniapp_recipes.cpp
@@ -239,7 +239,14 @@ public:
     basic_rgraph_recipe(cell_gid_type ncell,
                       basic_recipe_param param,
                       probe_distribution pdist = probe_distribution{}):
-        basic_cell_recipe(ncell, std::move(param), std::move(pdist)) {}
+        basic_cell_recipe(ncell, std::move(param), std::move(pdist))
+    {
+        // Cells are not allowed to connect to themselves; hence there must be least two cells
+        // to build a connected network.
+        if (ncell<2) {
+            throw std::runtime_error("A randomly connected network must have at least 2 cells.");
+        }
+    }
 
     std::vector<cell_connection> connections_on(cell_gid_type i) const override {
         std::vector<cell_connection> conns;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 93643789a848522927570eaed4a6833d3c0b6d80..c12c49829df10687bcd690a5e7682c7e01988dee 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -14,6 +14,7 @@ set(BASE_SOURCES
     hardware/affinity.cpp
     hardware/gpu.cpp
     hardware/memory.cpp
+    hardware/node_info.cpp
     hardware/power.cpp
     threading/threading.cpp
     util/debug.cpp
diff --git a/src/backends.hpp b/src/backends.hpp
index 0941d5927a19ede939f3b02f93b89ce1276bf305..9f97d1d5b16062214d523debf4ebeee5127f3919 100644
--- a/src/backends.hpp
+++ b/src/backends.hpp
@@ -6,16 +6,19 @@
 namespace nest {
 namespace mc {
 
-enum class backend_policy {
-    use_multicore,      //  use multicore backend for all computation
-    prefer_gpu          //  use gpu back end when supported by cell_group type
+enum class backend_kind {
+    multicore,   //  use multicore backend for all computation
+    gpu          //  use gpu back end when supported by cell_group type
 };
 
-inline std::string to_string(backend_policy p) {
-    if (p==backend_policy::use_multicore) {
-        return "use_multicore";
+inline std::string to_string(backend_kind p) {
+    switch (p) {
+        case backend_kind::multicore:
+            return "multicore";
+        case backend_kind::gpu:
+            return "gpu";
     }
-    return "prefer_gpu";
+    return "unknown";
 }
 
 } // namespace mc
diff --git a/src/cell_group_factory.cpp b/src/cell_group_factory.cpp
index 77a604db1743b79777a7e4d3ad8d486d9e2fd4b3..857aa26035dc1f5a9ebc477c2f27968a62e95eae 100644
--- a/src/cell_group_factory.cpp
+++ b/src/cell_group_factory.cpp
@@ -2,9 +2,11 @@
 
 #include <backends.hpp>
 #include <cell_group.hpp>
+#include <domain_decomposition.hpp>
 #include <dss_cell_group.hpp>
 #include <fvm_multicell.hpp>
 #include <mc_cell_group.hpp>
+#include <recipe.hpp>
 #include <rss_cell_group.hpp>
 #include <util/unique_any.hpp>
 
@@ -14,26 +16,29 @@ namespace mc {
 using gpu_fvm_cell = mc_cell_group<fvm::fvm_multicell<gpu::backend>>;
 using mc_fvm_cell = mc_cell_group<fvm::fvm_multicell<multicore::backend>>;
 
-cell_group_ptr cell_group_factory(
-        cell_kind kind,
-        cell_gid_type first_gid,
-        const std::vector<util::unique_any>& cell_descriptions,
-        backend_policy backend)
-{
-    switch (kind) {
+cell_group_ptr cell_group_factory(const recipe& rec, const group_description& group) {
+    // Make a list of all the cell descriptions to be forwarded
+    // to the appropriate cell_group constructor.
+    std::vector<util::unique_any> descriptions;
+    descriptions.reserve(group.gids.size());
+    for (auto gid: group.gids) {
+        descriptions.push_back(rec.get_cell_description(gid));
+    }
+
+    switch (group.kind) {
     case cell_kind::cable1d_neuron:
-        if (backend == backend_policy::prefer_gpu) {
-            return make_cell_group<gpu_fvm_cell>(first_gid, cell_descriptions);
+        if (group.backend == backend_kind::gpu) {
+            return make_cell_group<gpu_fvm_cell>(group.gids, descriptions);
         }
         else {
-            return make_cell_group<mc_fvm_cell>(first_gid, cell_descriptions);
+            return make_cell_group<mc_fvm_cell>(group.gids, descriptions);
         }
 
     case cell_kind::regular_spike_source:
-        return make_cell_group<rss_cell_group>(first_gid, cell_descriptions);
+        return make_cell_group<rss_cell_group>(group.gids, descriptions);
 
     case cell_kind::data_spike_source:
-        return make_cell_group<dss_cell_group>(first_gid, cell_descriptions);
+        return make_cell_group<dss_cell_group>(group.gids, descriptions);
 
     default:
         throw std::runtime_error("unknown cell kind");
diff --git a/src/cell_group_factory.hpp b/src/cell_group_factory.hpp
index e5c4718532b3b9ec08b0388b4fc73b17dd8dfbb1..e639b2c848966da6392a6609956a48aeed12219c 100644
--- a/src/cell_group_factory.hpp
+++ b/src/cell_group_factory.hpp
@@ -4,17 +4,15 @@
 
 #include <backends.hpp>
 #include <cell_group.hpp>
+#include <domain_decomposition.hpp>
+#include <recipe.hpp>
 #include <util/unique_any.hpp>
 
 namespace nest {
 namespace mc {
 
 // Helper factory for building cell groups
-cell_group_ptr cell_group_factory(
-    cell_kind kind,
-    cell_gid_type first_gid,
-    const std::vector<util::unique_any>& cells,
-    backend_policy backend);
+cell_group_ptr cell_group_factory(const recipe& rec, const group_description& group);
 
 } // namespace mc
 } // namespace nest
diff --git a/src/communication/communicator.hpp b/src/communication/communicator.hpp
index ba671b4ce19dd05ba4d392b5aadbc2df5a1e62eb..553e1bd7c851b29ba5f44d76104c131d503460de 100644
--- a/src/communication/communicator.hpp
+++ b/src/communication/communicator.hpp
@@ -10,11 +10,14 @@
 #include <common_types.hpp>
 #include <connection.hpp>
 #include <communication/gathered_vector.hpp>
+#include <domain_decomposition.hpp>
 #include <event_queue.hpp>
+#include <recipe.hpp>
 #include <spike.hpp>
 #include <util/debug.hpp>
 #include <util/double_buffer.hpp>
 #include <util/partition.hpp>
+#include <util/rangeutil.hpp>
 
 namespace nest {
 namespace mc {
@@ -45,31 +48,71 @@ public:
 
     communicator() {}
 
-    explicit communicator(gid_partition_type cell_gid_partition):
-        cell_gid_partition_(cell_gid_partition)
-    {}
-
-    cell_local_size_type num_groups_local() const
-    {
-        return cell_gid_partition_.size();
-    }
-
-    void add_connection(connection con) {
-        EXPECTS(is_local_cell(con.destination().gid));
-        connections_.push_back(con);
-    }
-
-    /// returns true if the cell with gid is on the domain of the caller
-    bool is_local_cell(cell_gid_type gid) const {
-        return algorithms::in_interval(gid, cell_gid_partition_.bounds());
-    }
+    explicit communicator(const recipe& rec, const domain_decomposition& dom_dec) {
+        using util::make_span;
+        num_domains_ = comms_.size();
+        num_local_groups_ = dom_dec.num_local_groups();
+
+        // For caching information about each cell
+        struct gid_info {
+            using connection_list = decltype(rec.connections_on(0));
+            cell_gid_type gid;
+            cell_gid_type local_group;
+            connection_list conns;
+            gid_info(cell_gid_type g, cell_gid_type lg, connection_list c):
+                gid(g), local_group(lg), conns(std::move(c)) {}
+        };
+
+        // Make a list of local gid with their group index and connections
+        //   -> gid_infos
+        // Count the number of local connections (i.e. connections terminating on this domain)
+        //   -> n_cons: scalar
+        // Calculate and store domain id of the presynaptic cell on each local connection
+        //   -> src_domains: array with one entry for every local connection
+        // Also the count of presynaptic sources from each domain
+        //   -> src_counts: array with one entry for each domain
+        std::vector<gid_info> gid_infos;
+        gid_infos.reserve(dom_dec.num_local_cells());
+
+        cell_local_size_type n_cons = 0;
+        std::vector<unsigned> src_domains;
+        std::vector<cell_size_type> src_counts(num_domains_);
+        for (auto i: make_span(0, num_local_groups_)) {
+            const auto& group = dom_dec.get_group(i);
+            for (auto gid: group.gids) {
+                gid_info info(gid, i, rec.connections_on(gid));
+                n_cons += info.conns.size();
+                for (auto con: info.conns) {
+                    const auto src = dom_dec.gid_domain(con.source.gid);
+                    src_domains.push_back(src);
+                    src_counts[src]++;
+                }
+                gid_infos.push_back(std::move(info));
+            }
+        }
 
-    /// builds the optimized data structure
-    /// must be called after all connections have been added
-    void construct() {
-        if (!std::is_sorted(connections_.begin(), connections_.end())) {
-            threading::sort(connections_);
+        // Construct the connections.
+        // The loop above gave the information required to construct in place
+        // the connections as partitioned by the domain of their source gid.
+        connections_.resize(n_cons);
+        connection_part_ = algorithms::make_index(src_counts);
+        auto offsets = connection_part_;
+        std::size_t pos = 0;
+        for (const auto& cell: gid_infos) {
+            for (auto c: cell.conns) {
+                const auto i = offsets[src_domains[pos]]++;
+                connections_[i] = {c.source, c.dest, c.weight, c.delay, cell.local_group};
+                ++pos;
+            }
         }
+
+        // Sort the connections for each domain.
+        // This is num_domains_ independent sorts, so it can be parallelized trivially.
+        const auto& cp = connection_part_;
+        threading::parallel_for::apply(0, num_domains_,
+            [&](cell_gid_type i) {
+                util::sort(util::subrange_view(connections_, cp[i], cp[i+1]));
+            });
     }
 
     /// the minimum delay of all connections in the global network.
@@ -79,16 +122,19 @@ public:
             local_min = std::min(local_min, con.delay());
         }
 
-        return communication_policy_.min(local_min);
+        return comms_.min(local_min);
     }
 
     /// Perform exchange of spikes.
     ///
     /// Takes as input the list of local_spikes that were generated on the calling domain.
     /// Returns the full global set of vectors, along with meta data about their partition
-    gathered_vector<spike> exchange(const std::vector<spike>& local_spikes) {
+    gathered_vector<spike> exchange(std::vector<spike> local_spikes) {
+        // sort the spikes in ascending order of source gid
+        util::sort_by(local_spikes, [](spike s){return s.source;});
+
         // global all-to-all to gather a local copy of the global spike list on each node.
-        auto global_spikes = communication_policy_.gather_spikes( local_spikes );
+        auto global_spikes = comms_.gather_spikes(local_spikes);
         num_spikes_ += global_spikes.size();
         return global_spikes;
     }
@@ -101,15 +147,51 @@ public:
     /// events in each queue are all events that must be delivered to targets in that cell
     /// group as a result of the global spike exchange.
     std::vector<event_queue> make_event_queues(const gathered_vector<spike>& global_spikes) {
-        auto queues = std::vector<event_queue>(num_groups_local());
-        for (auto spike : global_spikes.values()) {
-            // search for targets
-            auto targets = std::equal_range(connections_.begin(), connections_.end(), spike.source);
-
-            // generate an event for each target
-            for (auto it=targets.first; it!=targets.second; ++it) {
-                auto gidx = cell_group_index(it->destination().gid);
-                queues[gidx].push_back(it->make_event(spike));
+        using util::subrange_view;
+        using util::make_span;
+        using util::make_range;
+
+        auto queues = std::vector<event_queue>(num_local_groups_);
+        const auto& sp = global_spikes.partition();
+        const auto& cp = connection_part_;
+        for (auto dom: make_span(0, num_domains_)) {
+            auto cons = subrange_view(connections_, cp[dom], cp[dom+1]);
+            auto spks = subrange_view(global_spikes.values(), sp[dom], sp[dom+1]);
+
+            struct spike_pred {
+                bool operator()(const spike& spk, const cell_member_type& src)
+                    {return spk.source<src;}
+                bool operator()(const cell_member_type& src, const spike& spk)
+                    {return src<spk.source;}
+            };
+
+            if (cons.size()<spks.size()) {
+                auto sp = spks.begin();
+                auto cn = cons.begin();
+                while (cn!=cons.end() && sp!=spks.end()) {
+                    auto sources = std::equal_range(sp, spks.end(), cn->source(), spike_pred());
+
+                    for (auto s: make_range(sources.first, sources.second)) {
+                        queues[cn->group_index()].push_back(cn->make_event(s));
+                    }
+
+                    sp = sources.first;
+                    ++cn;
+                }
+            }
+            else {
+                auto cn = cons.begin();
+                auto sp = spks.begin();
+                while (cn!=cons.end() && sp!=spks.end()) {
+                    auto targets = std::equal_range(cn, cons.end(), sp->source);
+
+                    for (auto c: make_range(targets.first, targets.second)) {
+                        queues[c.group_index()].push_back(c.make_event(*sp));
+                    }
+
+                    cn = targets.first;
+                    ++sp;
+                }
             }
         }
 
@@ -117,33 +199,23 @@ public:
     }
 
     /// Returns the total number of global spikes over the duration of the simulation
-    uint64_t num_spikes() const { return num_spikes_; }
+    std::uint64_t num_spikes() const { return num_spikes_; }
 
     const std::vector<connection>& connections() const {
         return connections_;
     }
 
-    communication_policy_type communication_policy() const {
-        return communication_policy_;
-    }
-
     void reset() {
         num_spikes_ = 0;
     }
 
 private:
-    std::size_t cell_group_index(cell_gid_type cell_gid) const {
-        EXPECTS(is_local_cell(cell_gid));
-        return cell_gid_partition_.index(cell_gid);
-    }
-
+    cell_size_type num_local_groups_;
+    cell_size_type num_domains_;
     std::vector<connection> connections_;
-
-    communication_policy_type communication_policy_;
-
-    uint64_t num_spikes_ = 0u;
-
-    gid_partition_type cell_gid_partition_;
+    std::vector<cell_size_type> connection_part_;
+    communication_policy_type comms_;
+    std::uint64_t num_spikes_ = 0u;
 };
 
 } // namespace communication
diff --git a/src/connection.hpp b/src/connection.hpp
index e65100a97c01291ab37eeba4670710a4ff65ff0d..65d039151c1863ac39cea9d959fc498b0e5ab4eb 100644
--- a/src/connection.hpp
+++ b/src/connection.hpp
@@ -12,11 +12,16 @@ namespace mc {
 class connection {
 public:
     connection() = default;
-    connection(cell_member_type src, cell_member_type dest, float w, time_type d) :
+    connection( cell_member_type src,
+                cell_member_type dest,
+                float w,
+                time_type d,
+                cell_gid_type gidx=cell_gid_type(-1)):
         source_(src),
         destination_(dest),
         weight_(w),
-        delay_(d)
+        delay_(d),
+        group_index_(gidx)
     {}
 
     float weight() const { return weight_; }
@@ -24,6 +29,7 @@ public:
 
     cell_member_type source() const { return source_; }
     cell_member_type destination() const { return destination_; }
+    cell_gid_type group_index() const { return group_index_; }
 
     postsynaptic_spike_event make_event(const spike& s) {
         return {destination_, s.time + delay_, weight_};
@@ -34,6 +40,7 @@ private:
     cell_member_type destination_;
     float weight_;
     time_type delay_;
+    cell_gid_type group_index_;
 };
 
 // connections are sorted by source id
diff --git a/src/domain_decomposition.hpp b/src/domain_decomposition.hpp
index a4d5d406f794d4dab85e4f249870de8c33c4f495..796d5dc40e1d6fd8c917f00240bd2eb081d2ea36 100644
--- a/src/domain_decomposition.hpp
+++ b/src/domain_decomposition.hpp
@@ -1,10 +1,13 @@
 #pragma once
 
+#include <type_traits>
 #include <vector>
+#include <unordered_map>
 
 #include <backends.hpp>
 #include <common_types.hpp>
 #include <communication/global_policy.hpp>
+#include <hardware/node_info.hpp>
 #include <recipe.hpp>
 #include <util/optional.hpp>
 #include <util/partition.hpp>
@@ -13,97 +16,91 @@
 namespace nest {
 namespace mc {
 
-// Meta data used to guide the domain_decomposition in distributing
-// and grouping cells.
-struct group_rules {
-    cell_size_type target_group_size;
-    backend_policy policy;
+inline bool has_gpu_backend(cell_kind k) {
+    if (k==cell_kind::cable1d_neuron) {
+        return true;
+    }
+    return false;
+}
+
+/// Utility type for meta data for a local cell group.
+struct group_description {
+    const cell_kind kind;
+    const std::vector<cell_gid_type> gids;
+    const backend_kind backend;
+
+    group_description(cell_kind k, std::vector<cell_gid_type> g, backend_kind b):
+        kind(k), gids(std::move(g)), backend(b)
+    {}
 };
 
 class domain_decomposition {
-    using gid_partition_type =
-        util::partition_range<std::vector<cell_gid_type>::const_iterator>;
-
 public:
-    /// Utility type for meta data for a local cell group.
-    struct group_range_type {
-        cell_gid_type begin;
-        cell_gid_type end;
-        cell_kind kind;
-    };
-
-    domain_decomposition(const recipe& rec, const group_rules& rules):
-        backend_policy_(rules.policy)
+    domain_decomposition(const recipe& rec, hw::node_info nd):
+        node_(nd)
     {
-        EXPECTS(rules.target_group_size>0);
-
-        auto num_domains = communication::global_policy::size();
-        auto domain_id = communication::global_policy::id();
+        using kind_type = std::underlying_type<cell_kind>::type;
+        using util::make_span;
 
-        // Partition the cells globally across the domains.
+        num_domains_ = communication::global_policy::size();
+        domain_id_ = communication::global_policy::id();
         num_global_cells_ = rec.num_cells();
-        cell_begin_ = (cell_gid_type)(num_global_cells_*(domain_id/(double)num_domains));
-        cell_end_ = (cell_gid_type)(num_global_cells_*((domain_id+1)/(double)num_domains));
-
-        // Partition the local cells into cell groups that satisfy three
-        // criteria:
-        //  1. the cells in a group have contiguous gid
-        //  2. the size of a cell group does not exceed rules.target_group_size;
-        //  3. all cells in a cell group have the same cell_kind.
-        // This simple greedy algorithm appends contiguous cells to a cell
-        // group until either the target group size is reached, or a cell with a
-        // different kind is encountered.
-        // On completion, cell_starts_ partitions the local gid into cell
-        // groups, and group_kinds_ records the cell kind in each cell group.
-        if (num_local_cells()>0) {
-            cell_size_type group_size = 1;
-
-            // 1st group starts at cell_begin_
-            group_starts_.push_back(cell_begin_);
-            auto group_kind = rec.get_cell_kind(cell_begin_);
-
-            // set kind for 1st group
-            group_kinds_.push_back(group_kind);
-
-            cell_gid_type gid = cell_begin_+1;
-            while (gid<cell_end_) {
-                auto kind = rec.get_cell_kind(gid);
-
-                // Test if gid belongs to a new cell group, i.e. whether it has
-                // a new cell_kind or if the target group size has been reached.
-                if (kind!=group_kind || group_size>=rules.target_group_size) {
-                    group_starts_.push_back(gid);
-                    group_kinds_.push_back(kind);
-                    group_size = 0;
-                }
-                ++group_size;
-                ++gid;
-            }
-            group_starts_.push_back(cell_end_);
+
+        auto dom_size = [this](unsigned dom) -> cell_gid_type {
+            const cell_gid_type B = num_global_cells_/num_domains_;
+            const cell_gid_type R = num_global_cells_ - num_domains_*B;
+            return B + (dom<R);
+        };
+
+        // TODO: load balancing logic will be refactored into its own class,
+        // and the domain decomposition will become a much simpler representation
+        // of the result distribution of cells over domains.
+
+        // Global load balance
+
+        gid_part_ = make_partition(
+            gid_divisions_, transform_view(make_span(0, num_domains_), dom_size));
+
+        // Local load balance
+
+        std::unordered_map<kind_type, std::vector<cell_gid_type>> kind_lists;
+        for (auto gid: make_span(gid_part_[domain_id_])) {
+            kind_lists[rec.get_cell_kind(gid)].push_back(gid);
         }
-    }
 
-    /// Returns the local index of the cell_group that contains a cell with
-    /// with gid.
-    /// If the cell is not on the local domain, the optional return value is
-    /// not set.
-    util::optional<cell_size_type>
-    local_group_from_gid(cell_gid_type gid) const {
-        // check if gid is a local cell
-        if (!is_local_gid(gid)) {
-            return util::nothing;
+        // Create a flat vector of the cell kinds present on this node,
+        // partitioned such that kinds for which GPU implementation are
+        // listed before the others. This is a very primitive attempt at
+        // scheduling; the cell_groups that run on the GPU will be executed
+        // before other cell_groups, which is likely to be more efficient.
+        //
+        // TODO: This creates an dependency between the load balancer and
+        // the threading internals. We need support for setting the priority
+        // of cell group updates according to rules such as the back end on
+        // which the cell group is running.
+        std::vector<cell_kind> kinds;
+        for (auto l: kind_lists) {
+            kinds.push_back(cell_kind(l.first));
         }
-        return gid_group_partition().index(gid);
-    }
+        std::partition(kinds.begin(), kinds.end(), has_gpu_backend);
 
-    /// Returns the gid of the first cell on the local domain.
-    cell_gid_type cell_begin() const {
-        return cell_begin_;
+        for (auto k: kinds) {
+            // put all cells into a single cell group on the gpu if possible
+            if (node_.num_gpus && has_gpu_backend(k)) {
+                groups_.push_back({k, std::move(kind_lists[k]), backend_kind::gpu});
+            }
+            // otherwise place into cell groups of size 1 on the cpu cores
+            else {
+                for (auto gid: kind_lists[k]) {
+                    groups_.push_back({k, {gid}, backend_kind::multicore});
+                }
+            }
+        }
     }
 
-    /// Returns one past the gid of the last cell in the local domain.
-    cell_gid_type cell_end() const {
-        return cell_end_;
+    int gid_domain(cell_gid_type gid) const {
+        EXPECTS(gid<num_global_cells_);
+        return gid_part_.index(gid);
     }
 
     /// Returns the total number of cells in the global model.
@@ -113,42 +110,35 @@ public:
 
     /// Returns the number of cells on the local domain.
     cell_size_type num_local_cells() const {
-        return cell_end()-cell_begin();
+        auto rng = gid_part_[domain_id_];
+        return rng.second - rng.first;
     }
 
     /// Returns the number of cell groups on the local domain.
     cell_size_type num_local_groups() const {
-        return group_kinds_.size();
+        return groups_.size();
     }
 
     /// Returns meta data for a local cell group.
-    group_range_type get_group(cell_size_type i) const {
-        return {group_starts_[i], group_starts_[i+1], group_kinds_[i]};
+    const group_description& get_group(cell_size_type i) const {
+        EXPECTS(i<num_local_groups());
+        return groups_[i];
     }
 
     /// Tests whether a gid is on the local domain.
-    bool is_local_gid(cell_gid_type i) const {
-        return i>=cell_begin_ && i<cell_end_;
-    }
-
-    /// Return a partition of the cell gid over local cell groups.
-    gid_partition_type gid_group_partition() const {
-        return util::partition_view(group_starts_);
-    }
-
-    /// Returns the backend policy.
-    backend_policy backend() const {
-        return backend_policy_;
+    bool is_local_gid(cell_gid_type gid) const {
+        return algorithms::in_interval(gid, gid_part_[domain_id_]);
     }
 
 private:
-
-    backend_policy backend_policy_;
-    cell_gid_type cell_begin_;
-    cell_gid_type cell_end_;
+    int num_domains_;
+    int domain_id_;
+    hw::node_info node_;
     cell_size_type num_global_cells_;
-    std::vector<cell_size_type> group_starts_;
+    std::vector<cell_gid_type> gid_divisions_;
+    decltype(util::make_partition(gid_divisions_, gid_divisions_)) gid_part_;
     std::vector<cell_kind> group_kinds_;
+    std::vector<group_description> groups_;
 };
 
 } // namespace mc
diff --git a/src/dss_cell_group.hpp b/src/dss_cell_group.hpp
index c30d3f44adbadd8715b9d8f2b9f13e0fe776a461..d51a4e1f7bd847239a2682b8e7de46dd250f56d3 100644
--- a/src/dss_cell_group.hpp
+++ b/src/dss_cell_group.hpp
@@ -9,27 +9,21 @@ namespace nest {
 namespace mc {
 
 /// Cell_group to collect spike sources
-class dss_cell_group : public cell_group {
+class dss_cell_group: public cell_group {
 public:
-    using source_id_type = cell_member_type;
-
-    dss_cell_group(cell_gid_type first_gid, const std::vector<util::unique_any>& cell_descriptions):
-        gid_base_(first_gid)
+    dss_cell_group(std::vector<cell_gid_type> gids,
+                   const std::vector<util::unique_any>& cell_descriptions):
+        gids_(std::move(gids))
     {
         using util::make_span;
         for (cell_gid_type i: make_span(0, cell_descriptions.size())) {
             // store spike times from description
-            const auto times = util::any_cast<dss_cell_description>(cell_descriptions[i]).spike_times;
-            spike_times_.push_back(std::vector<time_type>(times));
-
-            // Assure the spike times are sorted
-            std::sort(spike_times_[i].begin(), spike_times_[i].end());
+            auto times = util::any_cast<dss_cell_description>(cell_descriptions[i]).spike_times;
+            util::sort(times);
+            spike_times_.push_back(std::move(times));
 
-            // Now we can grab the first spike time
+            // Take a reference to the first spike time
             not_emit_it_.push_back(spike_times_[i].begin());
-
-            // create a lid to gid map
-            spike_sources_.push_back({gid_base_+i, 0});
         }
     }
 
@@ -40,12 +34,11 @@ public:
     }
 
     void reset() override {
-        // Declare both iterators outside of the for loop for consistency
+        // Reset the pointers to the next undelivered spike to the start
+        // of the input range.
         auto it = not_emit_it_.begin();
         auto times = spike_times_.begin();
-
         for (;it != not_emit_it_.end(); ++it, times++) {
-            // Point to the first not emitted spike.
             *it = times->begin();
         }
 
@@ -56,25 +49,25 @@ public:
     {}
 
     void advance(time_type tfinal, time_type dt) override {
-        for (auto cell_idx: util::make_span(0, not_emit_it_.size())) {
+        for (auto i: util::make_span(0, not_emit_it_.size())) {
             // The first potential spike_time to emit for this cell
-            auto spike_time_it = not_emit_it_[cell_idx];
+            auto spike_time_it = not_emit_it_[i];
 
-            // Find the first to not emit and store as iterator
-            not_emit_it_[cell_idx] = std::find_if(
-                spike_time_it, spike_times_[cell_idx].end(),
+            // Find the first spike past tfinal
+            not_emit_it_[i] = std::find_if(
+                spike_time_it, spike_times_[i].end(),
                 [tfinal](time_type t) {return t >= tfinal; }
             );
 
-            // loop over the range we now have (might be empty), and create spikes
-            for (; spike_time_it != not_emit_it_[cell_idx]; ++spike_time_it) {
-                spikes_.push_back({ spike_sources_[cell_idx], *spike_time_it });
+            // Loop over the range and create spikes
+            for (; spike_time_it != not_emit_it_[i]; ++spike_time_it) {
+                spikes_.push_back({ {gids_[i], 0u}, *spike_time_it });
             }
         }
     };
 
     void enqueue_events(const std::vector<postsynaptic_spike_event>& events) override {
-        std::logic_error("The dss_cells do not support incoming events!");
+        std::runtime_error("The dss_cells do not support incoming events!");
     }
 
     const std::vector<spike>& spikes() const override {
@@ -94,14 +87,11 @@ public:
     }
 
 private:
-    // gid of first cell in group.
-    cell_gid_type gid_base_;
-
     // Spikes that are generated.
     std::vector<spike> spikes_;
 
-    // Spike generators attached to the cell
-    std::vector<source_id_type> spike_sources_;
+    // Map of local index to gid
+    std::vector<cell_gid_type> gids_;
 
     std::vector<probe_record> probes_;
 
diff --git a/src/event_queue.hpp b/src/event_queue.hpp
index ea97e48fa0537b44580b547d513fcdc3c54d2d48..113ac319e8efa06e6f0c07d0716db45fd916097e 100644
--- a/src/event_queue.hpp
+++ b/src/event_queue.hpp
@@ -27,6 +27,15 @@ struct postsynaptic_spike_event {
     cell_member_type target;
     time_type time;
     float weight;
+
+    friend bool operator==(postsynaptic_spike_event l, postsynaptic_spike_event r) {
+        return l.target==r.target && l.time==r.time && l.weight==r.weight;
+    }
+
+    friend std::ostream& operator<<(std::ostream& o, const nest::mc::postsynaptic_spike_event& e)
+    {
+        return o << "E[tgt " << e.target << ", t " << e.time << ", w " << e.weight << "]";
+    }
 };
 
 struct sample_event {
@@ -140,9 +149,3 @@ private:
 
 } // namespace nest
 } // namespace mc
-
-inline std::ostream& operator<<(
-    std::ostream& o, const nest::mc::postsynaptic_spike_event& e)
-{
-    return o << "event[" << e.target << "," << e.time << "," << e.weight << "]";
-}
diff --git a/src/hardware/node_info.cpp b/src/hardware/node_info.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3a7e4c1631ee3ffb32e9688dad6c773dc8142219
--- /dev/null
+++ b/src/hardware/node_info.cpp
@@ -0,0 +1,22 @@
+#include <algorithm>
+
+#include "affinity.hpp"
+#include "gpu.hpp"
+#include "node_info.hpp"
+
+namespace nest {
+namespace mc {
+namespace hw {
+
+// Return a node_info that describes the hardware resources available on this node.
+// If unable to determine the number of available cores, assumes that there is one
+// core available.
+node_info get_node_info() {
+    auto res = num_cores();
+    unsigned ncpu = res? *res: 1u;
+    return {ncpu, num_gpus()};
+}
+
+} // namespace util
+} // namespace mc
+} // namespace nest
diff --git a/src/hardware/node_info.hpp b/src/hardware/node_info.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b7a68b517b084af40618e17d7db79fc3877c18ef
--- /dev/null
+++ b/src/hardware/node_info.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+namespace nest {
+namespace mc {
+namespace hw {
+
+// Information about the computational resources available on a compute node.
+// Currently a simple enumeration of the number of cpu cores and gpus, which
+// will become richer.
+struct node_info {
+    node_info() = default;
+    node_info(unsigned c, unsigned g):
+        num_cpu_cores(c), num_gpus(g)
+    {}
+
+    unsigned num_cpu_cores = 1;
+    unsigned num_gpus = 0;
+};
+
+node_info get_node_info();
+
+} // namespace util
+} // namespace mc
+} // namespace nest
diff --git a/src/hardware/power.cpp b/src/hardware/power.cpp
index 3c5c9306a84b94efa62f4b3c398f97967c6639b6..9f08af85283f0e1fe27cdfb9f8a6880e84eb65dc 100644
--- a/src/hardware/power.cpp
+++ b/src/hardware/power.cpp
@@ -9,7 +9,7 @@ namespace hw {
 #ifdef NMC_HAVE_CRAY
 
 energy_size_type energy() {
-    energy_size_type result = -1;
+    energy_size_type result = energy_size_type(-1);
 
     std::ifstream fid("/sys/cray/pm_counters/energy");
     if (fid) {
diff --git a/src/mc_cell_group.hpp b/src/mc_cell_group.hpp
index f6ef43da4435fc3cfd06f0828be0491683002c62..4e25d15c88a3768caa62d1efa2d9bdae3241bb0a 100644
--- a/src/mc_cell_group.hpp
+++ b/src/mc_cell_group.hpp
@@ -3,6 +3,7 @@
 #include <cstdint>
 #include <functional>
 #include <iterator>
+#include <unordered_map>
 #include <vector>
 
 #include <algorithms.hpp>
@@ -35,9 +36,14 @@ public:
     mc_cell_group() = default;
 
     template <typename Cells>
-    mc_cell_group(cell_gid_type first_gid, const Cells& cell_descriptions):
-        gid_base_{first_gid}
+    mc_cell_group(std::vector<cell_gid_type> gids, const Cells& cell_descriptions):
+        gids_(std::move(gids))
     {
+        // Build lookup table for gid to local index.
+        for (auto i: util::make_span(0, gids_.size())) {
+            gid2lid_[gids_[i]] = i;
+        }
+
         // Create lookup structure for probe and target ids.
         build_handle_partitions(cell_descriptions);
         std::size_t n_probes = probe_handle_divisions_.back();
@@ -51,41 +57,42 @@ public:
 
         lowered_.initialize(cell_descriptions, target_handles_, probe_handles_);
 
-        // Create a list of the global identifiers for the spike sources
-        auto source_gid = cell_gid_type{gid_base_};
+        // Create a list of the global identifiers for the spike sources.
+        auto source_gid = gids_.begin();
         for (const auto& cell: cell_descriptions) {
             for (cell_lid_type lid=0u; lid<cell.detectors().size(); ++lid) {
-                spike_sources_.push_back(source_id_type{source_gid, lid});
+                spike_sources_.push_back(source_id_type{*source_gid, lid});
             }
             ++source_gid;
         }
         EXPECTS(spike_sources_.size()==n_detectors);
 
-        // Create the enumeration of probes attached to cells in this cell group
+        // Create the enumeration of probes attached to cells in this cell group.
         probes_.reserve(n_probes);
         for (auto i: util::make_span(0, cell_descriptions.size())){
-            const cell_gid_type probe_gid = gid_base_ + i;
+            const cell_gid_type probe_gid = gids_[i];
             const auto probes_on_cell = cell_descriptions[i].probes();
             for (cell_lid_type lid: util::make_span(0, probes_on_cell.size())) {
-                // get the unique global identifier of this probe
+                // Get the unique global identifier of this probe.
                 cell_member_type id{probe_gid, lid};
 
-                // get the location and kind information of the probe
+                // Get the location and kind information of the probe.
                 const auto p = probes_on_cell[lid];
 
-                // record the combined identifier and probe details
+                // Record the combined identifier and probe details.
                 probes_.push_back(probe_record{id, p.location, p.kind});
             }
         }
     }
 
-    mc_cell_group(cell_gid_type first_gid, const std::vector<util::unique_any>& cell_descriptions):
+    mc_cell_group(std::vector<cell_gid_type> gids,
+                  const std::vector<util::unique_any>& cell_descriptions):
         mc_cell_group(
-            first_gid,
-            util::transform_view(
-                cell_descriptions,
-                [](const util::unique_any& c) -> const cell& {return util::any_cast<const cell&>(c);})
-        )
+            std::move(gids),
+            util::transform_view(cell_descriptions,
+                [](const util::unique_any& c) -> const cell& {
+                    return util::any_cast<const cell&>(c);
+                }))
     {}
 
     cell_kind get_cell_kind() const override {
@@ -132,7 +139,7 @@ public:
                     auto& s = samplers_[m->sampler_index];
                     EXPECTS((bool)s.sampler);
 
-                    time_type cell_time = lowered_.time(s.cell_gid-gid_base_);
+                    time_type cell_time = lowered_.time(gid2lid(s.cell_gid));
                     if (cell_time<m->time) {
                         // This cell hasn't reached this sample time yet.
                         requeue_sample_events.push_back(*m);
@@ -161,8 +168,8 @@ public:
             lowered_.step_integration();
 
             if (util::is_debug_mode() && !lowered_.is_physical_solution()) {
-                std::cerr << "warning: solution out of bounds for cell "
-                          << gid_base_ << " at (max) t " << lowered_.max_time() << " ms\n";
+                std::cerr << "warning: solution out of bounds  at (max) t "
+                          << lowered_.max_time() << " ms\n";
             }
         }
 
@@ -213,8 +220,11 @@ public:
     }
 
 private:
-    // gid of first cell in group.
-    cell_gid_type gid_base_;
+    // List of the gids of the cells in the group
+    std::vector<cell_gid_type> gids_;
+
+    // Hash table for converting gid to local index
+    std::unordered_map<cell_gid_type, cell_gid_type> gid2lid_;
 
     // The lowered cell state (e.g. FVM) of the cell.
     lowered_cell_type lowered_;
@@ -275,19 +285,7 @@ private:
     // Use handle partition to get index from id.
     template <typename Divisions>
     std::size_t handle_partition_lookup(const Divisions& divisions, cell_member_type id) const {
-        // NB: without any assertion checking, this would just be:
-        // return divisions[id.gid-gid_base_]+id.index;
-
-        EXPECTS(id.gid>=gid_base_);
-
-        auto handle_partition = util::partition_view(divisions);
-        EXPECTS(id.gid-gid_base_<handle_partition.size());
-
-        auto ival = handle_partition[id.gid-gid_base_];
-        std::size_t i = ival.first + id.index;
-        EXPECTS(i<ival.second);
-
-        return i;
+        return divisions[gid2lid(id.gid)]+id.index;
     }
 
     // Get probe handle from probe id.
@@ -308,6 +306,12 @@ private:
             sample_events_.push({i, sampler_start_times_[i]});
         }
     }
+
+    cell_gid_type gid2lid(cell_gid_type gid) const {
+        auto it = gid2lid_.find(gid);
+        EXPECTS(it!=gid2lid_.end());
+        return it->second;
+    }
 };
 
 } // namespace mc
diff --git a/src/model.cpp b/src/model.cpp
index 85c469f877c3414d9aad6f2ec3b69f40da6307e2..43e56ba56afc9d57170abc4a48f5d3b0eee3f138 100644
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -15,48 +15,28 @@ namespace nest {
 namespace mc {
 
 model::model(const recipe& rec, const domain_decomposition& decomp):
-    domain_(decomp)
+    communicator_(rec, decomp)
 {
-    // set up communicator based on partition
-    communicator_ = communicator_type(domain_.gid_group_partition());
-
-    // generate the cell groups in parallel, with one task per cell group
-    cell_groups_.resize(domain_.num_local_groups());
-
-    // thread safe vector for constructing the list of probes in parallel
-    threading::parallel_vector<probe_record> probe_tmp;
+    for (auto i: util::make_span(0, decomp.num_local_groups())) {
+        for (auto gid: decomp.get_group(i).gids) {
+            gid_groups_[gid] = i;
+        }
+    }
 
+    // Generate the cell groups in parallel, with one task per cell group.
+    cell_groups_.resize(decomp.num_local_groups());
     threading::parallel_for::apply(0, cell_groups_.size(),
         [&](cell_gid_type i) {
             PE("setup", "cells");
-
-            auto group = domain_.get_group(i);
-            std::vector<util::unique_any> cell_descriptions(group.end-group.begin);
-
-            for (auto gid: util::make_span(group.begin, group.end)) {
-                auto i = gid-group.begin;
-                cell_descriptions[i] = rec.get_cell_description(gid);
-            }
-
-            cell_groups_[i] = cell_group_factory(
-                    group.kind, group.begin, cell_descriptions, domain_.backend());
+            cell_groups_[i] = cell_group_factory(rec, decomp.get_group(i));
             PL(2);
         });
 
-    // store probes
+    // Store probes.
     for (const auto& c: cell_groups_) {
         util::append(probes_, c->probes());
     }
 
-    // generate the network connections
-    for (cell_gid_type i: util::make_span(domain_.cell_begin(), domain_.cell_end())) {
-        for (const auto& cc: rec.connections_on(i)) {
-            connection conn{cc.source, cc.dest, cc.weight, cc.delay};
-            communicator_.add_connection(conn);
-        }
-    }
-    communicator_.construct();
-
     // Allocate an empty queue buffer for each cell group
     // These must be set initially to ensure that a queue is available for each
     // cell group for the first time step.
@@ -72,10 +52,10 @@ void model::reset() {
 
     communicator_.reset();
 
-    for(auto& q : current_events()) {
+    for(auto& q: current_events()) {
         q.clear();
     }
-    for(auto& q : future_events()) {
+    for(auto& q: future_events()) {
         q.clear();
     }
 
@@ -169,11 +149,11 @@ time_type model::run(time_type tfinal, time_type dt) {
 }
 
 void model::attach_sampler(cell_member_type probe_id, sampler_function f, time_type tfrom) {
-    const auto idx = domain_.local_group_from_gid(probe_id.gid);
-
-    // only attach samplers for local cells
-    if (idx) {
-        cell_groups_[*idx]->add_sampler(probe_id, f, tfrom);
+    // TODO: remove the gid_groups data structure completely when/if no longer needed
+    // for the probe interface.
+    auto it = gid_groups_.find(probe_id.gid);
+    if (it!=gid_groups_.end()) {
+        cell_groups_[it->second]->add_sampler(probe_id, f, tfrom);
     }
 }
 
@@ -189,10 +169,6 @@ std::size_t model::num_groups() const {
     return cell_groups_.size();
 }
 
-std::size_t model::num_cells() const {
-    return domain_.num_local_cells();
-}
-
 void model::set_binning_policy(binning_kind policy, time_type bin_interval) {
     for (auto& group: cell_groups_) {
         group->set_binning_policy(policy, bin_interval);
diff --git a/src/model.hpp b/src/model.hpp
index 7f93e3b9dc43620bd317aa99575045f58f3a217c..6fe5a106dc07e6c3c8f7b76e9b13ce06e4940233 100644
--- a/src/model.hpp
+++ b/src/model.hpp
@@ -12,6 +12,7 @@
 #include <sampler_function.hpp>
 #include <thread_private_spike_store.hpp>
 #include <util/nop.hpp>
+#include <util/rangeutil.hpp>
 #include <util/unique_any.hpp>
 
 namespace nest {
@@ -34,14 +35,13 @@ public:
 
     std::size_t num_spikes() const;
 
-    std::size_t num_groups() const;
-
-    std::size_t num_cells() const;
-
     // Set event binning policy on all our groups.
     void set_binning_policy(binning_kind policy, time_type bin_interval);
 
     // access cell_group directly
+    // TODO: depricate. Currently used in some validation tests to inject
+    // events directly into a cell group. This should be done with a spiking
+    // neuron.
     cell_group& group(int i);
 
     // register a callback that will perform a export of the global
@@ -53,11 +53,10 @@ public:
     void set_local_spike_callback(spike_export_function export_callback);
 
 private:
-    const domain_decomposition &domain_;
+    std::size_t num_groups() const;
 
     time_type t_ = 0.;
     std::vector<cell_group_ptr> cell_groups_;
-    communicator_type communicator_;
     std::vector<probe_record> probes_;
 
     using event_queue_type = typename communicator_type::event_queue;
@@ -69,6 +68,12 @@ private:
     spike_export_function global_export_callback_ = util::nop_function;
     spike_export_function local_export_callback_ = util::nop_function;
 
+    // Hash table for looking up the group index of the cell_group that
+    // contains gid
+    std::unordered_map<cell_gid_type, cell_gid_type> gid_groups_;
+
+    communicator_type communicator_;
+
     // Convenience functions that map the spike buffers and event queues onto
     // the appropriate integration interval.
     //
diff --git a/src/profiling/profiler.cpp b/src/profiling/profiler.cpp
index 6452369e7945052e006089e80d5a74801887f823..2a85c4645cfd1906d4e5fd0ee32ffe1d4adbd833 100644
--- a/src/profiling/profiler.cpp
+++ b/src/profiling/profiler.cpp
@@ -345,7 +345,7 @@ void profilers_restart() {
     }
 }
 
-void profiler_output(double threshold, std::size_t num_local_work_items, bool profile_only_zero) {
+void profiler_output(double threshold, bool profile_only_zero) {
     profilers_stop();
 
     // Find the earliest start time and latest stop time over all profilers
@@ -383,10 +383,6 @@ void profiler_output(double threshold, std::size_t num_local_work_items, bool pr
     bool print = comm_rank==0 ? true : false;
     bool output_this_rank = (comm_rank == 0) || ! profile_only_zero;
 
-    // calculate the throughput in terms of work items per second
-    auto local_throughput = num_local_work_items / wall_time;
-    auto global_throughput = communication::global_policy::sum(local_throughput);
-
     if(print) {
         std::cout << " ---------------------------------------------------- \n";
         std::cout << "|                      profiler                      |\n";
@@ -413,10 +409,6 @@ void profiler_output(double threshold, std::size_t num_local_work_items, bool pr
             line, sizeof(line), "%-18s%10s%10s\n",
             "", "local", "global");
         std::cout << line;
-        std::snprintf(
-            line, sizeof(line), "%-18s%10d%10d\n",
-            "throughput", int(local_throughput), int(global_throughput));
-        std::cout << line;
 
         std::cout << "\n\n";
     }
@@ -426,7 +418,6 @@ void profiler_output(double threshold, std::size_t num_local_work_items, bool pr
     as_json["threads"] = nthreads;
     as_json["efficiency"] = efficiency;
     as_json["communicators"] = ncomms;
-    as_json["throughput"] = unsigned(local_throughput);
     as_json["rank"] = comm_rank;
     as_json["regions"] = p.as_json();
 
@@ -444,7 +435,7 @@ void profiler_enter(const char*) {}
 void profiler_leave() {}
 void profiler_leave(int) {}
 void profilers_stop() {}
-void profiler_output(double threshold, std::size_t num_local_work_items, bool profile_only_zero) {}
+void profiler_output(double threshold, bool profile_only_zero) {}
 void profilers_restart() {};
 #endif
 
diff --git a/src/profiling/profiler.hpp b/src/profiling/profiler.hpp
index 0747fbdcf556b77503628c2d6caa3d238c040c17..271f49453414d14203ae8430d382ee634e783b28 100644
--- a/src/profiling/profiler.hpp
+++ b/src/profiling/profiler.hpp
@@ -245,7 +245,7 @@ void profilers_stop();
 void profilers_restart();
 
 /// print the collated profiler to std::cout
-void profiler_output(double threshold, std::size_t num_local_work_items, bool profile_only_zero);
+void profiler_output(double threshold, bool profile_only_zero);
 
 } // namespace util
 } // namespace mc
diff --git a/src/recipe.hpp b/src/recipe.hpp
index 8dd7565ad7f84b5dc5b99462b72d523227af6354..b2b4f6b04bf45e49914a8c04eb5a15880253cec4 100644
--- a/src/recipe.hpp
+++ b/src/recipe.hpp
@@ -42,6 +42,10 @@ struct cell_connection {
 
     float weight;
     float delay;
+
+    cell_connection(cell_connection_endpoint src, cell_connection_endpoint dst, float w, float d):
+        source(src), dest(dst), weight(w), delay(d)
+    {}
 };
 
 class recipe {
diff --git a/src/rss_cell_group.hpp b/src/rss_cell_group.hpp
index 8ec5b9cb10e7cbe8935996f579f7d92d64fdbc19..6fc50d3fc1dcd4f0c60395a7de855f6ac0371242 100644
--- a/src/rss_cell_group.hpp
+++ b/src/rss_cell_group.hpp
@@ -2,6 +2,7 @@
 
 #include <cell_group.hpp>
 #include <rss_cell.hpp>
+#include <util/optional.hpp>
 #include <util/span.hpp>
 #include <util/unique_any.hpp>
 
@@ -12,23 +13,20 @@ namespace mc {
 
 /// Cell_group to collect cells that spike at a set frequency
 /// Cell are lightweight and are not executed in anybackend implementation
-class rss_cell_group : public cell_group {
+class rss_cell_group: public cell_group {
 public:
     using source_id_type = cell_member_type;
 
-    rss_cell_group(cell_gid_type first_gid, const std::vector<util::unique_any>& cell_descriptions):
-        gid_base_(first_gid)
+    rss_cell_group(std::vector<cell_gid_type> gids,
+                   const std::vector<util::unique_any>& cell_descriptions):
+        gids_(gids)
     {
         using util::make_span;
 
         for (cell_gid_type i: make_span(0, cell_descriptions.size())) {
-            // Copy all the rss_cells
             cells_.push_back(rss_cell(
                 util::any_cast<rss_cell::rss_cell_description>(cell_descriptions[i])
             ));
-
-            // create a lid to gid map
-            spike_sources_.push_back({gid_base_+i, 0});
         }
     }
 
@@ -45,13 +43,13 @@ public:
     }
 
     void set_binning_policy(binning_kind policy, time_type bin_interval) override
-    {} // Nothing to do?
+    {}
 
     void advance(time_type tfinal, time_type dt) override {
-        // TODO: Move source information to rss_cell implementation
+        // TODO: Move rss_cell implementation into the rss_cell_group
         for (auto i: util::make_span(0, cells_.size())) {
             for (auto spike_time: cells_[i].spikes_until(tfinal)) {
-                spikes_.push_back({spike_sources_[i], spike_time});
+                spikes_.push_back({{gids_[i], 0}, spike_time});
             }
         }
     };
@@ -69,27 +67,22 @@ public:
     }
 
     std::vector<probe_record> probes() const override {
-        return probes_;
+        return {};
     }
 
-    void add_sampler(cell_member_type probe_id, sampler_function s, time_type start_time = 0) override {
+    void add_sampler(cell_member_type, sampler_function, time_type ts=0) override {
         std::logic_error("The rss_cells do not support sampling of internal state!");
     }
 
 private:
-    // gid of first cell in group.
-    cell_gid_type gid_base_;
+    // List of the gids of the cells in the group
+    std::vector<cell_gid_type> gids_;
 
     // Spikes that are generated.
     std::vector<spike> spikes_;
 
-    // Spike generators attached to the cell
-    std::vector<source_id_type> spike_sources_;
-
     // Store a reference to the cell actually implementing the spiking
     std::vector<rss_cell> cells_;
-
-    std::vector<probe_record> probes_;
 };
 
 } // namespace mc
diff --git a/src/spike.hpp b/src/spike.hpp
index d45ea97e74e1e412e08417eaa19a020689ca3143..d2343ed3ddcb41d8ffbcca1316abe78a3d4fda04 100644
--- a/src/spike.hpp
+++ b/src/spike.hpp
@@ -20,12 +20,6 @@ struct basic_spike {
     basic_spike(id_type s, time_type t):
         source(s), time(t)
     {}
-
-    /// Less than comparison operator for nest::mc::spike<> values:
-    /// spikes are ordered by spike time, for use in sorting and queueing.
-    friend bool operator<(basic_spike lhs, basic_spike rhs) {
-        return lhs.time < rhs.time;
-    }
 };
 
 /// Standard specialization:
@@ -37,5 +31,5 @@ using spike = basic_spike<cell_member_type>;
 // Custom stream operator for printing nest::mc::spike<> values.
 template <typename I>
 std::ostream& operator<<(std::ostream& o, nest::mc::basic_spike<I> s) {
-    return o << "spike[t " << s.time << ", src " << s.source << "]";
+    return o << "S[src " << s.source << ", t " << s.time << "]";
 }
diff --git a/src/threading/cthread.cpp b/src/threading/cthread.cpp
index 0495ffd51cfb1bf5565ad255ea247db9e19f475a..6a257f9d504f3b5597acff0c25b6f0c65e822d7b 100644
--- a/src/threading/cthread.cpp
+++ b/src/threading/cthread.cpp
@@ -1,3 +1,4 @@
+#include <atomic>
 #include <cassert>
 #include <cstring>
 #include <exception>
diff --git a/src/util/counter.hpp b/src/util/counter.hpp
index d071752807f53fc72324367d4e4ea87e0dc6c7be..5f31cc4810edbdb39b89aaf6b6ab7d07e548e8cf 100644
--- a/src/util/counter.hpp
+++ b/src/util/counter.hpp
@@ -74,6 +74,8 @@ struct counter {
 
     value_type operator*() const { return v_; }
 
+    pointer operator->() const { return &v_; }
+
     value_type operator[](difference_type n) const { return v_+n; }
 
     bool operator==(counter x) const { return v_==x.v_; }
diff --git a/tests/global_communication/CMakeLists.txt b/tests/global_communication/CMakeLists.txt
index 6a653a415bae8d160259e6e5ee277a9482b8d23e..b6e38f99c4c2a4f3b571907f6bb4c1a9b5109b64 100644
--- a/tests/global_communication/CMakeLists.txt
+++ b/tests/global_communication/CMakeLists.txt
@@ -5,7 +5,7 @@ set(COMMUNICATION_SOURCES
     test_domain_decomposition.cpp
     test_exporter_spike_file.cpp
     test_communicator.cpp
-    test_mpi_gather_all.cpp
+    test_mpi.cpp
 
     # unit test driver
     test.cpp
diff --git a/tests/global_communication/test_communicator.cpp b/tests/global_communication/test_communicator.cpp
index 5cbceec6b08a2f538e063b73e350010b80923460..5a423b932be3523e3154d13f8d41359a55bc7b2a 100644
--- a/tests/global_communication/test_communicator.cpp
+++ b/tests/global_communication/test_communicator.cpp
@@ -8,6 +8,10 @@
 
 #include <communication/communicator.hpp>
 #include <communication/global_policy.hpp>
+#include <hardware/node_info.hpp>
+#include <util/filter.hpp>
+#include <util/rangeutil.hpp>
+#include <util/span.hpp>
 
 using namespace nest::mc;
 
@@ -173,3 +177,318 @@ TEST(communicator, gather_spikes_variant) {
     }
 }
 
+namespace {
+    // Population of cable and rss cells with ring connection topology.
+    // Even gid are rss, and odd gid are cable cells.
+    class ring_recipe: public recipe {
+    public:
+        ring_recipe(cell_size_type s):
+            size_(s),
+            ranks_(communication::global_policy::size())
+        {}
+
+        cell_size_type num_cells() const override {
+            return size_;
+        }
+
+        util::unique_any get_cell_description(cell_gid_type) const override {
+            return {};
+        }
+
+        cell_kind get_cell_kind(cell_gid_type gid) const override {
+            return gid%2? cell_kind::cable1d_neuron: cell_kind::regular_spike_source;
+        }
+
+        cell_count_info get_cell_count_info(cell_gid_type) const override {
+            return {1, 1, 0};
+        }
+
+        std::vector<cell_connection> connections_on(cell_gid_type gid) const override {
+            // a single connection from the preceding cell, i.e. a ring
+            // weight is the destination gid
+            // delay is 1
+            cell_member_type src = {gid==0? size_-1: gid-1, 0};
+            cell_member_type dst = {gid, 0};
+            return {cell_connection(
+                        src, dst,   // end points
+                        float(gid), // weight
+                        1.0f)};     // delay
+        }
+
+    private:
+        cell_size_type size_;
+        cell_size_type ranks_;
+    };
+
+
+    cell_gid_type source_of(cell_gid_type gid, cell_size_type num_cells) {
+        if (gid) {
+            return gid-1;
+        }
+        return num_cells-1;
+    }
+
+    // gid expects an event from source_of(gid) with weight gid, and fired at
+    // time source_of(gid).
+    postsynaptic_spike_event expected_event_ring(cell_gid_type gid, cell_size_type num_cells) {
+        auto sid = source_of(gid, num_cells);
+        return {
+            {gid, 0u},  // source
+            sid+1.0f,   // time (all conns have delay 1 ms)
+            float(gid)};// weight
+    }
+
+    // spike generated by cell gid
+    spike make_spike(cell_gid_type gid) {
+        return spike({gid, 0u}, time_type(gid));
+    }
+
+    // Population of cable and rss cells with all-to-all connection topology.
+    // Even gid are rss, and odd gid are cable cells.
+    class all2all_recipe: public recipe {
+    public:
+        all2all_recipe(cell_size_type s):
+            size_(s),
+            ranks_(communication::global_policy::size())
+        {}
+
+        cell_size_type num_cells() const override {
+            return size_;
+        }
+
+        util::unique_any get_cell_description(cell_gid_type) const override {
+            return {};
+        }
+        cell_kind get_cell_kind(cell_gid_type gid) const override {
+            return gid%2? cell_kind::cable1d_neuron: cell_kind::regular_spike_source;
+        }
+
+        cell_count_info get_cell_count_info(cell_gid_type) const override {
+            return {1, size_, 0};    // sources, targets, probes
+        }
+
+        std::vector<cell_connection> connections_on(cell_gid_type gid) const override {
+            std::vector<cell_connection> cons;
+            cons.reserve(size_);
+            for (auto sid: util::make_span(0, size_)) {
+                cell_connection con(
+                        {sid, 0},       // source
+                        {gid, sid},     // destination
+                        float(gid+sid), // weight
+                        1.0f);          // delay
+                cons.push_back(con);
+            }
+            return cons;
+        }
+
+    private:
+        cell_size_type size_;
+        cell_size_type ranks_;
+    };
+
+    postsynaptic_spike_event expected_event_all2all(cell_gid_type gid, cell_gid_type sid) {
+        return {
+            {gid, sid},      // target, event from sid goes to synapse with index sid
+            sid+1.0f,        // time (all conns have delay 1 ms)
+            float(gid+sid)}; // weight
+    }
+
+    // make a list of the gids on the local domain
+    std::vector<cell_gid_type> get_gids(const domain_decomposition& D) {
+        std::vector<cell_gid_type> gids;
+        for (auto i: util::make_span(0, D.num_local_groups())) {
+            util::append(gids, D.get_group(i).gids);
+        }
+        return gids;
+    }
+
+    // make a hash table mapping local gid to local cell_group index
+    std::unordered_map<cell_gid_type, cell_gid_type>
+    get_group_map(const domain_decomposition& D) {
+        std::unordered_map<cell_gid_type, cell_gid_type> map;
+        for (auto i: util::make_span(0, D.num_local_groups())) {
+            for (auto gid: D.get_group(i).gids) {
+                map[gid] = i;
+            }
+        }
+        return map;
+    }
+}
+
+using policy = communication::global_policy;
+using comm_type = communication::communicator<policy>;
+
+template <typename F>
+::testing::AssertionResult
+test_ring(const domain_decomposition& D, comm_type& C, F&& f) {
+    using util::transform_view;
+    using util::assign_from;
+    using util::filter;
+
+
+    auto gids = get_gids(D);
+    auto group_map = get_group_map(D);
+
+    std::vector<spike> local_spikes = assign_from(transform_view(filter(gids, f), make_spike));
+    // Reverse the order of spikes so that they are "unsorted" in terms
+    // of source gid.
+    std::reverse(local_spikes.begin(), local_spikes.end());
+
+    // gather the global set of spikes
+    auto global_spikes = C.exchange(local_spikes);
+    if (global_spikes.size()!=policy::sum(local_spikes.size())) {
+        return ::testing::AssertionFailure() << " the number of gathered spikes "
+            << global_spikes.size() << " doesn't match the expected "
+            << policy::sum(local_spikes.size());
+    }
+
+    // generate the events
+    auto queues = C.make_event_queues(global_spikes);
+    if (queues.size() != D.num_local_groups()) { // one queue for each cell group
+        return ::testing::AssertionFailure()
+            << "expect one event queue for each cell group";
+    }
+
+    // Assert that all the correct events were generated.
+    // Iterate over each local gid, and testing whether an event is expected for
+    // that gid. If so, look up the event queue of the cell_group of gid, and
+    // search for the expected event.
+    for (auto gid: gids) {
+        auto src = source_of(gid, D.num_global_cells());
+        if (f(src)) {
+            auto expected = expected_event_ring(gid, D.num_global_cells());
+            auto grp = group_map[gid];
+            auto& q = queues[grp];
+            if (std::find(q.begin(), q.end(), expected)==q.end()) {
+                return ::testing::AssertionFailure()
+                    << " expected event " << expected << " was not found";
+            }
+        }
+    }
+
+    // Assert that only the expected events were produced. The preceding test
+    // showed that all expected events were generated, so this only requires
+    // that the number of generated events is as expected.
+    int num_events = std::accumulate(queues.begin(), queues.end(), 0,
+            [](int l, decltype(queues.front())& r){return l + r.size();});
+
+    int expected_events = util::size(filter(gids, f));
+
+    EXPECT_EQ(policy::sum(expected_events), policy::sum(num_events));
+    return ::testing::AssertionSuccess();
+}
+
+TEST(communicator, ring)
+{
+    using util::make_span;
+
+    // construct a homogeneous network of 10*n_domain identical cells in a ring
+    unsigned N = policy::size();
+
+    unsigned n_local = 10u;
+    unsigned n_global = n_local*N;
+
+    auto R = ring_recipe(n_global);
+    // use a node decomposition that reflects the resources available
+    // on the node that the test is running on, including gpus.
+    auto D = domain_decomposition(R, hw::node_info());
+    auto C = communication::communicator<policy>(R, D);
+
+    // every cell fires
+    EXPECT_TRUE(test_ring(D, C, [](cell_gid_type g){return true;}));
+    // last cell in each domain fires
+    EXPECT_TRUE(test_ring(D, C, [n_local](cell_gid_type g){return (g+1)%n_local == 0u;}));
+    // oddly numbered cells fire
+    EXPECT_TRUE(test_ring(D, C, [n_local](cell_gid_type g){return g%2==0;}));
+    // oddly numbered cells fire
+    EXPECT_TRUE(test_ring(D, C, [n_local](cell_gid_type g){return g%2==1;}));
+}
+
+template <typename F>
+::testing::AssertionResult
+test_all2all(const domain_decomposition& D, comm_type& C, F&& f) {
+    using util::transform_view;
+    using util::assign_from;
+    using util::filter;
+    using util::make_span;
+
+    auto gids = get_gids(D);
+    auto group_map = get_group_map(D);
+
+    std::vector<spike> local_spikes = assign_from(transform_view(filter(gids, f), make_spike));
+    // Reverse the order of spikes so that they are "unsorted" in terms
+    // of source gid.
+    std::reverse(local_spikes.begin(), local_spikes.end());
+
+    std::vector<cell_gid_type> spike_gids = assign_from(
+        filter(make_span(0, D.num_global_cells()), f));
+
+    // gather the global set of spikes
+    auto global_spikes = C.exchange(local_spikes);
+    if (global_spikes.size()!=policy::sum(local_spikes.size())) {
+        return ::testing::AssertionFailure() << " the number of gathered spikes "
+            << global_spikes.size() << " doesn't match the expected "
+            << policy::sum(local_spikes.size());
+    }
+
+    // generate the events
+    auto queues = C.make_event_queues(global_spikes);
+    if (queues.size() != D.num_local_groups()) { // one queue for each cell group
+        return ::testing::AssertionFailure()
+            << "expect one event queue for each cell group";
+    }
+
+    // Assert that all the correct events were generated.
+    // Iterate over each local gid, and testing whether an event is expected for
+    // that gid. If so, look up the event queue of the cell_group of gid, and
+    // search for the expected event.
+    for (auto gid: gids) {
+        // get the event queue that this gid belongs to
+        auto& q = queues[group_map[gid]];
+        for (auto src: spike_gids) {
+            auto expected = expected_event_all2all(gid, src);
+            if (std::find(q.begin(), q.end(), expected)==q.end()) {
+                return ::testing::AssertionFailure()
+                    << "expected event " << expected
+                    << " from " << src << " was not found";
+            }
+        }
+    }
+
+    // Assert that only the expected events were produced. The preceding test
+    // showed that all expected events were generated, so this only requires
+    // that the number of generated events is as expected.
+    int num_events = std::accumulate(queues.begin(), queues.end(), 0,
+            [](int l, decltype(queues.front())& r){return l + r.size();});
+
+    int expected_events = D.num_global_cells()*spike_gids.size();
+
+    EXPECT_EQ(expected_events, policy::sum(num_events));
+    return ::testing::AssertionSuccess();
+}
+
+TEST(communicator, all2all)
+{
+    using util::make_span;
+
+    // construct a homogeneous network of 10*n_domain identical cells in a ring
+    unsigned N = policy::size();
+
+    unsigned n_local = 10u;
+    unsigned n_global = n_local*N;
+
+    auto R = all2all_recipe(n_global);
+    // use a node decomposition that reflects the resources available
+    // on the node that the test is running on, including gpus.
+    auto D = domain_decomposition(R, hw::node_info());
+    auto C = communication::communicator<policy>(R, D);
+
+    // every cell fires
+    EXPECT_TRUE(test_all2all(D, C, [](cell_gid_type g){return true;}));
+    // only cell 0 fires
+    EXPECT_TRUE(test_all2all(D, C, [n_local](cell_gid_type g){return g==0u;}));
+    // oddly numbered cells fire
+    EXPECT_TRUE(test_all2all(D, C, [n_local](cell_gid_type g){return g%2==0;}));
+    // oddly numbered cells fire
+    EXPECT_TRUE(test_all2all(D, C, [n_local](cell_gid_type g){return g%2==1;}));
+}
diff --git a/tests/global_communication/test_domain_decomposition.cpp b/tests/global_communication/test_domain_decomposition.cpp
index bd6eb35c3d84fe9694a794e6c8093af91436ee73..0e64b4f3a2384361e4d0276ba891276bb78f390c 100644
--- a/tests/global_communication/test_domain_decomposition.cpp
+++ b/tests/global_communication/test_domain_decomposition.cpp
@@ -8,23 +8,198 @@
 
 #include <communication/communicator.hpp>
 #include <communication/global_policy.hpp>
+#include <hardware/node_info.hpp>
 
 using namespace nest::mc;
 
 using communicator_type = communication::communicator<communication::global_policy>;
 
-inline bool is_dry_run() {
-    return communication::global_policy::kind() ==
-        communication::global_policy_kind::dryrun;
+namespace {
+    // Homogenous cell population of cable cells.
+    class homo_recipe: public recipe {
+    public:
+        homo_recipe(cell_size_type s): size_(s)
+        {}
+
+        cell_size_type num_cells() const override {
+            return size_;
+        }
+
+        util::unique_any get_cell_description(cell_gid_type) const override {
+            return {};
+        }
+        cell_kind get_cell_kind(cell_gid_type) const override {
+            return cell_kind::cable1d_neuron;
+        }
+
+        cell_count_info get_cell_count_info(cell_gid_type) const override {
+            return {0, 0, 0};
+        }
+        std::vector<cell_connection> connections_on(cell_gid_type) const override {
+            return {};
+        }
+
+    private:
+        cell_size_type size_;
+    };
+
+    // Heterogenous cell population of cable and rss cells.
+    // Interleaved so that cells with even gid are cable cells, and even gid are
+    // rss cells.
+    class hetero_recipe: public recipe {
+    public:
+        hetero_recipe(cell_size_type s): size_(s)
+        {}
+
+        cell_size_type num_cells() const override {
+            return size_;
+        }
+
+        util::unique_any get_cell_description(cell_gid_type) const override {
+            return {};
+        }
+        cell_kind get_cell_kind(cell_gid_type gid) const override {
+            return gid%2?
+                cell_kind::regular_spike_source:
+                cell_kind::cable1d_neuron;
+        }
+
+        cell_count_info get_cell_count_info(cell_gid_type) const override {
+            return {0, 0, 0};
+        }
+        std::vector<cell_connection> connections_on(cell_gid_type) const override {
+            return {};
+        }
+
+    private:
+        cell_size_type size_;
+    };
 }
 
-TEST(domain_decomp, basic) {
-/*
-    using policy = communication::global_policy;
+TEST(domain_decomp, homogeneous) {
+    const auto N = communication::global_policy::size();
+    const auto I = communication::global_policy::id();
+
+    {   // Test on a node with 1 cpu core and no gpus.
+        // We assume that all cells will be put into cell groups of size 1.
+        // This assumption will not hold in the future, requiring and update to
+        // the test.
+        hw::node_info nd(1, 0);
+
+        // 10 cells per domain
+        unsigned n_local = 10;
+        unsigned n_global = n_local*N;
+        domain_decomposition D(homo_recipe(n_global), nd);
+
+        EXPECT_EQ(D.num_global_cells(), n_global);
+        EXPECT_EQ(D.num_local_cells(), n_local);
+        EXPECT_EQ(D.num_local_groups(), n_local);
+
+        auto b = I*n_local;
+        auto e = (I+1)*n_local;
+        auto gids = util::make_span(b, e);
+        for (auto gid: gids) {
+            EXPECT_EQ(I, D.gid_domain(gid));
+            EXPECT_TRUE(D.is_local_gid(gid));
+        }
+        EXPECT_FALSE(D.is_local_gid(b-1));
+        EXPECT_FALSE(D.is_local_gid(e));
 
-    const auto num_domains = policy::size();
-    const auto rank = policy::id();
-*/
+        // Each cell group contains 1 cell of kind cable1d_neuron
+        // Each group should also be tagged for cpu execution
+        for (auto i: gids) {
+            auto local_group = i-b;
+            auto& grp = D.get_group(local_group);
+            EXPECT_EQ(grp.gids.size(), 1u);
+            EXPECT_EQ(grp.gids.front(), unsigned(i));
+            EXPECT_EQ(grp.backend, backend_kind::multicore);
+            EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
+        }
+    }
+    {   // Test on a node with 1 gpu and 1 cpu core.
+        // Assumes that all cells will be placed on gpu in a single group.
+        hw::node_info nd(1, 1);
 
+        // 10 cells per domain
+        unsigned n_local = 10;
+        unsigned n_global = n_local*N;
+        domain_decomposition D(homo_recipe(n_global), nd);
 
+        EXPECT_EQ(D.num_global_cells(), n_global);
+        EXPECT_EQ(D.num_local_cells(), n_local);
+        EXPECT_EQ(D.num_local_groups(), 1u);
+
+        auto b = I*n_local;
+        auto e = (I+1)*n_local;
+        auto gids = util::make_span(b, e);
+        for (auto gid: gids) {
+            EXPECT_EQ(I, D.gid_domain(gid));
+            EXPECT_TRUE(D.is_local_gid(gid));
+        }
+        EXPECT_FALSE(D.is_local_gid(b-1));
+        EXPECT_FALSE(D.is_local_gid(e));
+
+        // Each cell group contains 1 cell of kind cable1d_neuron
+        // Each group should also be tagged for cpu execution
+        auto grp = D.get_group(0u);
+
+        EXPECT_EQ(grp.gids.size(), n_local);
+        EXPECT_EQ(grp.gids.front(), b);
+        EXPECT_EQ(grp.gids.back(), e-1);
+        EXPECT_EQ(grp.backend, backend_kind::gpu);
+        EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
+    }
+}
+
+TEST(domain_decomp, heterogeneous) {
+    const auto N = communication::global_policy::size();
+    const auto I = communication::global_policy::id();
+
+    {   // Test on a node with 1 cpu core and no gpus.
+        // We assume that all cells will be put into cell groups of size 1.
+        // This assumption will not hold in the future, requiring and update to
+        // the test.
+        hw::node_info nd(1, 0);
+
+        // 10 cells per domain
+        const unsigned n_local = 10;
+        const unsigned n_global = n_local*N;
+        const unsigned n_local_grps = n_local; // 1 cell per group
+        auto R = hetero_recipe(n_global);
+        domain_decomposition D(R, nd);
+
+        EXPECT_EQ(D.num_global_cells(), n_global);
+        EXPECT_EQ(D.num_local_cells(), n_local);
+        EXPECT_EQ(D.num_local_groups(), n_local);
+
+        auto b = I*n_local;
+        auto e = (I+1)*n_local;
+        auto gids = util::make_span(b, e);
+        for (auto gid: gids) {
+            EXPECT_EQ(I, D.gid_domain(gid));
+            EXPECT_TRUE(D.is_local_gid(gid));
+        }
+        EXPECT_FALSE(D.is_local_gid(b-1));
+        EXPECT_FALSE(D.is_local_gid(e));
+
+        // Each cell group contains 1 cell of kind cable1d_neuron
+        // Each group should also be tagged for cpu execution
+        auto grps = util::make_span(0, n_local_grps);
+        std::map<cell_kind, std::set<cell_gid_type>> kind_lists;
+        for (auto i: grps) {
+            auto& grp = D.get_group(i);
+            EXPECT_EQ(grp.gids.size(), 1u);
+            kind_lists[grp.kind].insert(grp.gids.front());
+            EXPECT_EQ(grp.backend, backend_kind::multicore);
+        }
+
+        for (auto k: {cell_kind::cable1d_neuron, cell_kind::regular_spike_source}) {
+            const auto& gids = kind_lists[k];
+            EXPECT_EQ(gids.size(), n_local/2);
+            for (auto gid: gids) {
+                EXPECT_EQ(k, R.get_cell_kind(gid));
+            }
+        }
+    }
 }
+
diff --git a/tests/global_communication/test_mpi_gather_all.cpp b/tests/global_communication/test_mpi.cpp
similarity index 100%
rename from tests/global_communication/test_mpi_gather_all.cpp
rename to tests/global_communication/test_mpi.cpp
diff --git a/tests/unit/test_domain_decomposition.cpp b/tests/unit/test_domain_decomposition.cpp
index 39e9a03837e2a765322bf71bec78550c0e5f161a..b7e313d8bbe3cd34f21f7b6fb8d907a6894d76cb 100644
--- a/tests/unit/test_domain_decomposition.cpp
+++ b/tests/unit/test_domain_decomposition.cpp
@@ -1,118 +1,226 @@
 #include "../gtest.h"
 
-#include <domain_decomposition.hpp>
 #include <backends.hpp>
+#include <domain_decomposition.hpp>
+#include <hardware/node_info.hpp>
 
 using namespace nest::mc;
 
 namespace {
+    //
+    // Dummy recipes type for testing.
+    //
+
+    // Homogenous cell population of cable cells.
+    class homo_recipe: public recipe {
+    public:
+        homo_recipe(cell_size_type s): size_(s)
+        {}
+
+        cell_size_type num_cells() const override {
+            return size_;
+        }
 
-// dummy recipe type for testing
-class test_recipe: public recipe {
-public:
-    test_recipe(cell_size_type s): size_(s)
-    {}
+        util::unique_any get_cell_description(cell_gid_type) const override {
+            return {};
+        }
+        cell_kind get_cell_kind(cell_gid_type) const override {
+            return cell_kind::cable1d_neuron;
+        }
 
-    cell_size_type num_cells() const override {
-        return size_;
-    }
+        cell_count_info get_cell_count_info(cell_gid_type) const override {
+            return {0, 0, 0};
+        }
+        std::vector<cell_connection> connections_on(cell_gid_type) const override {
+            return {};
+        }
 
-    util::unique_any get_cell_description(cell_gid_type) const override {
-        return {};
-    }
-    cell_kind get_cell_kind(cell_gid_type) const override {
-        return cell_kind::cable1d_neuron;
-    }
+    private:
+        cell_size_type size_;
+    };
 
-    cell_count_info get_cell_count_info(cell_gid_type) const override {
-        return {0, 0, 0};
-    }
-    std::vector<cell_connection> connections_on(cell_gid_type) const override {
-        return {};
-    }
+    // Heterogenous cell population of cable and rss cells.
+    // Interleaved so that cells with even gid are cable cells, and even gid are
+    // rss cells.
+    class hetero_recipe: public recipe {
+    public:
+        hetero_recipe(cell_size_type s): size_(s)
+        {}
+
+        cell_size_type num_cells() const override {
+            return size_;
+        }
 
-private:
-    cell_size_type size_;
-};
+        util::unique_any get_cell_description(cell_gid_type) const override {
+            return {};
+        }
+        cell_kind get_cell_kind(cell_gid_type gid) const override {
+            return gid%2?
+                cell_kind::regular_spike_source:
+                cell_kind::cable1d_neuron;
+        }
+
+        cell_count_info get_cell_count_info(cell_gid_type) const override {
+            return {0, 0, 0};
+        }
+        std::vector<cell_connection> connections_on(cell_gid_type) const override {
+            return {};
+        }
 
+    private:
+        cell_size_type size_;
+    };
 }
 
-TEST(domain_decomposition, one_cell_groups)
+//  domain_decomposition interface:
+//      int gid_domain(cell_gid_type gid)
+//      cell_size_type num_global_cells()
+//      cell_size_type num_local_cells()
+//      cell_size_type num_local_groups()
+//      const group_description& get_group(cell_size_type i)
+//      bool is_local_gid(cell_gid_type i)
+
+TEST(domain_decomposition, homogenous_population)
 {
-    group_rules rules{1, backend_policy::use_multicore};
+    {   // Test on a node with 1 cpu core and no gpus.
+        // We assume that all cells will be put into cell groups of size 1.
+        // This assumption will not hold in the future, requiring and update to
+        // the test.
+        hw::node_info nd(1, 0);
+
+        unsigned num_cells = 10;
+        domain_decomposition D(homo_recipe(num_cells), nd);
+
+        EXPECT_EQ(D.num_global_cells(), num_cells);
+        EXPECT_EQ(D.num_local_cells(), num_cells);
+        EXPECT_EQ(D.num_local_groups(), num_cells);
+
+        auto gids = util::make_span(0, num_cells);
+        for (auto gid: gids) {
+            EXPECT_EQ(0, D.gid_domain(gid));
+            EXPECT_TRUE(D.is_local_gid(gid));
+        }
+        EXPECT_FALSE(D.is_local_gid(num_cells));
+
+        // Each cell group contains 1 cell of kind cable1d_neuron
+        // Each group should also be tagged for cpu execution
+        for (auto i: gids) {
+            auto& grp = D.get_group(i);
+            EXPECT_EQ(grp.gids.size(), 1u);
+            EXPECT_EQ(grp.gids.front(), unsigned(i));
+            EXPECT_EQ(grp.backend, backend_kind::multicore);
+            EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
+        }
+    }
+    {   // Test on a node with 1 gpu and 1 cpu core.
+        // Assumes that all cells will be placed on gpu in a single group.
+        hw::node_info nd(1, 1);
 
-    unsigned num_cells = 10;
-    domain_decomposition decomp(test_recipe(num_cells), rules);
+        unsigned num_cells = 10;
+        domain_decomposition D(homo_recipe(num_cells), nd);
 
-    EXPECT_EQ(0u, decomp.cell_begin());
-    EXPECT_EQ(num_cells, decomp.cell_end());
+        EXPECT_EQ(D.num_global_cells(), num_cells);
+        EXPECT_EQ(D.num_local_cells(), num_cells);
+        EXPECT_EQ(D.num_local_groups(), 1u);
 
-    EXPECT_EQ(num_cells, decomp.num_local_cells());
-    EXPECT_EQ(num_cells, decomp.num_global_cells());
-    EXPECT_EQ(num_cells, decomp.num_local_groups());
+        auto gids = util::make_span(0, num_cells);
+        for (auto gid: gids) {
+            EXPECT_EQ(0, D.gid_domain(gid));
+            EXPECT_TRUE(D.is_local_gid(gid));
+        }
+        EXPECT_FALSE(D.is_local_gid(num_cells));
 
-    // cell group indexes are monotonically increasing
-    for (unsigned i=0u; i<num_cells; ++i) {
-        auto g = decomp.get_group(i);
-        EXPECT_LT(g.begin, g.end);
-        EXPECT_EQ(g.end-g.begin, 1u);
-    }
+        // Each cell group contains 1 cell of kind cable1d_neuron
+        // Each group should also be tagged for cpu execution
+        auto grp = D.get_group(0u);
 
-    // check that local gid are identified as local
-    for (auto i=0u; i<num_cells; ++i) {
-        EXPECT_TRUE(decomp.is_local_gid(i));
+        EXPECT_EQ(grp.gids.size(), num_cells);
+        EXPECT_EQ(grp.gids.front(), 0u);
+        EXPECT_EQ(grp.gids.back(), num_cells-1);
+        EXPECT_EQ(grp.backend, backend_kind::gpu);
+        EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
     }
-    EXPECT_FALSE(decomp.is_local_gid(num_cells));
 }
 
-TEST(domain_decomposition, multi_cell_groups)
+TEST(domain_decomposition, heterogenous_population)
 {
-    unsigned num_cells = 10;
-
-    // test group sizes from 1 up to 1 more than the total number of cells
-    for (unsigned group_size=1u; group_size<=num_cells+1; ++group_size) {
-        group_rules rules{group_size, backend_policy::use_multicore};
-        domain_decomposition decomp(test_recipe(num_cells), rules);
-
-        EXPECT_EQ(0u, decomp.cell_begin());
-        EXPECT_EQ(num_cells, decomp.cell_end());
-
-        EXPECT_EQ(num_cells, decomp.num_local_cells());
-        EXPECT_EQ(num_cells, decomp.num_global_cells());
-
-        unsigned num_groups = decomp.num_local_groups();
-
-        // check that cell group indexes are monotonically increasing
-        unsigned total_cells = 0;
-        std::vector<cell_size_type> bounds{decomp.cell_begin()};
-        for (auto i=0u; i<num_groups; ++i) {
-            auto g = decomp.get_group(i);
-            auto size = g.end-g.begin;
-
-            // assert that size of the group:
-            //   is nonzero
-            EXPECT_LT(g.begin, g.end);
-            //   is no larger than group_size
-            EXPECT_TRUE(size<=group_size);
-
-            // Check that the start of this group matches the end of
-            // the preceding group.
-            EXPECT_EQ(bounds.back(), g.begin);
-            bounds.push_back(g.end);
-
-            total_cells += size;
+    {   // Test on a node with 1 cpu core and no gpus.
+        // We assume that all cells will be put into cell groups of size 1.
+        // This assumption will not hold in the future, requiring and update to
+        // the test.
+        hw::node_info nd(1, 0);
+
+        unsigned num_cells = 10;
+        auto R = hetero_recipe(num_cells);
+        domain_decomposition D(R, nd);
+
+        EXPECT_EQ(D.num_global_cells(), num_cells);
+        EXPECT_EQ(D.num_local_cells(), num_cells);
+        EXPECT_EQ(D.num_local_groups(), num_cells);
+
+        auto gids = util::make_span(0, num_cells);
+        for (auto gid: gids) {
+            EXPECT_EQ(0, D.gid_domain(gid));
+            EXPECT_TRUE(D.is_local_gid(gid));
+        }
+        EXPECT_FALSE(D.is_local_gid(num_cells));
+
+        // Each cell group contains 1 cell of kind cable1d_neuron
+        // Each group should also be tagged for cpu execution
+        auto grps = util::make_span(0, num_cells);
+        std::map<cell_kind, std::set<cell_gid_type>> kind_lists;
+        for (auto i: grps) {
+            auto& grp = D.get_group(i);
+            EXPECT_EQ(grp.gids.size(), 1u);
+            auto k = grp.kind;
+            kind_lists[k].insert(grp.gids.front());
+            EXPECT_EQ(grp.backend, backend_kind::multicore);
         }
-        // check that the sum of the group sizes euqals the total number of cells
-        EXPECT_EQ(total_cells, num_cells);
 
-        // check that local gid are identified as local
-        for (auto i=0u; i<num_cells; ++i) {
-            EXPECT_TRUE(decomp.is_local_gid(i));
-            auto group_id = decomp.local_group_from_gid(i);
-            EXPECT_TRUE(group_id);
-            EXPECT_LT(*group_id, num_groups);
+        for (auto k: {cell_kind::cable1d_neuron, cell_kind::regular_spike_source}) {
+            const auto& gids = kind_lists[k];
+            EXPECT_EQ(gids.size(), num_cells/2);
+            for (auto gid: gids) {
+                EXPECT_EQ(k, R.get_cell_kind(gid));
+            }
+        }
+    }
+    {   // Test on a node with 1 gpu and 1 cpu core.
+        // Assumes that calble cells are on gpu in a single group, and
+        // rff cells are on cpu in cell groups of size 1
+        hw::node_info nd(1, 1);
+
+        unsigned num_cells = 10;
+        auto R = hetero_recipe(num_cells);
+        domain_decomposition D(R, nd);
+
+        EXPECT_EQ(D.num_global_cells(), num_cells);
+        EXPECT_EQ(D.num_local_cells(), num_cells);
+        // one cell group with num_cells/2 on gpu, and num_cells/2 groups on cpu
+        auto expected_groups = num_cells/2+1;
+        EXPECT_EQ(D.num_local_groups(), expected_groups);
+
+        auto grps = util::make_span(0, expected_groups);
+        unsigned ncells = 0;
+        // iterate over each group and test its properties
+        for (auto i: grps) {
+            auto& grp = D.get_group(i);
+            auto k = grp.kind;
+            if (k==cell_kind::cable1d_neuron) {
+                EXPECT_EQ(grp.backend, backend_kind::gpu);
+                EXPECT_EQ(grp.gids.size(), num_cells/2);
+                for (auto gid: grp.gids) {
+                    EXPECT_TRUE(gid%2==0);
+                    ++ncells;
+                }
+            }
+            else if (k==cell_kind::regular_spike_source){
+                EXPECT_EQ(grp.backend, backend_kind::multicore);
+                EXPECT_EQ(grp.gids.size(), 1u);
+                EXPECT_TRUE(grp.gids.front()%2);
+                ++ncells;
+            }
         }
-        EXPECT_FALSE(decomp.is_local_gid(num_cells));
-        EXPECT_FALSE(decomp.local_group_from_gid(num_cells));
+        EXPECT_EQ(num_cells, ncells);
     }
 }
diff --git a/tests/unit/test_dss_cell_group.cpp b/tests/unit/test_dss_cell_group.cpp
index 675487893551820ad0ae123003a94a0dccc9db24..5d853b10cb4f1e6d5b3ded429187805d46574321 100644
--- a/tests/unit/test_dss_cell_group.cpp
+++ b/tests/unit/test_dss_cell_group.cpp
@@ -14,7 +14,7 @@ TEST(dss_cell, constructor)
     std::vector<util::unique_any> cell_descriptions(1);
     cell_descriptions[0] = util::unique_any(dss_cell_description(spikes));
 
-    dss_cell_group sut(0, cell_descriptions);
+    dss_cell_group sut({0}, cell_descriptions);
 }
 
 TEST(dss_cell, basic_usage)
@@ -27,7 +27,7 @@ TEST(dss_cell, basic_usage)
     std::vector<util::unique_any> cell_descriptions(1);
     cell_descriptions[0] = util::unique_any(dss_cell_description(spikes_to_emit));
 
-    dss_cell_group sut(0, cell_descriptions);
+    dss_cell_group sut({0}, cell_descriptions);
 
     // no spikes in this time frame
     sut.advance(0.09, 0.01);   // The dt (0,01) is not used
@@ -69,7 +69,7 @@ TEST(dss_cell, cell_kind_correct)
     std::vector<util::unique_any> cell_descriptions(1);
     cell_descriptions[0] = util::unique_any(dss_cell_description(spikes_to_emit));
 
-    dss_cell_group sut(0, cell_descriptions);
+    dss_cell_group sut({0}, cell_descriptions);
 
     EXPECT_EQ(cell_kind::data_spike_source, sut.get_cell_kind());
 }
diff --git a/tests/unit/test_mc_cell_group.cpp b/tests/unit/test_mc_cell_group.cpp
index 75c6d355d6ef2ec9b8c798059785d3710c4049b2..4c157cb1e71578d7cc2f9982bde90c4ccd58f334 100644
--- a/tests/unit/test_mc_cell_group.cpp
+++ b/tests/unit/test_mc_cell_group.cpp
@@ -23,14 +23,14 @@ cell make_cell() {
 
 
 TEST(mc_cell_group, get_kind) {
-    mc_cell_group<fvm_cell> group{ 0, util::singleton_view(make_cell()) };
+    mc_cell_group<fvm_cell> group{ {0}, util::singleton_view(make_cell()) };
 
     // we are generating a mc_cell_group which should be of the correct type
     EXPECT_EQ(cell_kind::cable1d_neuron, group.get_cell_kind());
 }
 
 TEST(mc_cell_group, test) {
-    mc_cell_group<fvm_cell> group{0, util::singleton_view(make_cell())};
+    mc_cell_group<fvm_cell> group{{0}, util::singleton_view(make_cell())};
 
     group.advance(50, 0.01);
 
@@ -48,7 +48,7 @@ TEST(mc_cell_group, sources) {
     cell.add_detector({1, 0.3}, 2.3);
 
     cell_gid_type first_gid = 37u;
-    auto group = cell_group_type{first_gid, util::singleton_view(cell)};
+    auto group = cell_group_type{{first_gid}, util::singleton_view(cell)};
 
     // expect group sources to be lexicographically sorted by source id
     // with gids in cell group's range and indices starting from zero
diff --git a/tests/unit/test_mc_cell_group.cu b/tests/unit/test_mc_cell_group.cu
index 92f7af506db8c5480786e65764c6ca78c29c087d..118b97033ec4c6370dc265c2dc5782fc76da04ad 100644
--- a/tests/unit/test_mc_cell_group.cu
+++ b/tests/unit/test_mc_cell_group.cu
@@ -27,7 +27,7 @@ nest::mc::cell make_cell() {
 TEST(cell_group, test)
 {
     using cell_group_type = mc_cell_group<fvm_cell>;
-    auto group = cell_group_type{0, util::singleton_view(make_cell())};
+    auto group = cell_group_type({0u}, util::singleton_view(make_cell()));
 
     group.advance(50, 0.01);
 
diff --git a/tests/unit/test_spike_store.cpp b/tests/unit/test_spike_store.cpp
index a8039018572cdaecc2515e33fced63b30d330a0b..854e0eee00a6031a96d33f4b23d9f0e2a6e461e6 100644
--- a/tests/unit/test_spike_store.cpp
+++ b/tests/unit/test_spike_store.cpp
@@ -85,4 +85,3 @@ TEST(spike_store, gather)
         EXPECT_EQ(spikes[i].time, gathered_spikes[i].time);
     }
 }
-
diff --git a/tests/unit/test_spikes.cpp b/tests/unit/test_spikes.cpp
index 402305642bfbee6046481a96d20eff529a2f4161..5e759025138a493ebdc449b3ce7ac9c69c87e248 100644
--- a/tests/unit/test_spikes.cpp
+++ b/tests/unit/test_spikes.cpp
@@ -17,39 +17,6 @@ using backend = multicore::backend;
 using backend = USE_BACKEND;
 #endif
 
-TEST(spikes, ordering) {
-    {
-        spike s1({0,0}, 1), s2({0,0}, 2);
-        EXPECT_TRUE(s1<s2);
-    }
-    {
-        spike s1({0,0}, 1), s2({0,0}, 1);
-        EXPECT_FALSE(s1<s2);
-    }
-    {
-        spike s1({0,0}, 2), s2({0,0}, 1);
-        EXPECT_FALSE(s1<s2);
-    }
-}
-
-TEST(spikes, sorting) {
-    std::vector<spike> spikes = {
-        {{0,0}, 1},
-        {{0,0}, 2},
-        {{0,0}, 0},
-        {{0,0}, 3},
-        {{0,0}, 1},
-        {{0,0}, 2},
-    };
-
-    std::sort(spikes.begin(), spikes.end());
-
-    auto it = spikes.begin();
-    while (++it!=spikes.end()) {
-        EXPECT_TRUE(it->time >= (it-1)->time);
-    }
-}
-
 TEST(spikes, threshold_watcher) {
     using backend = multicore::backend;
     using size_type = backend::size_type;
diff --git a/tests/validation/validate_ball_and_stick.cpp b/tests/validation/validate_ball_and_stick.cpp
index c28530a1afbd0f81c663275bec8819d5ee9a2331..e4c22efabb1c75fb01c419971f66e92df5173d2b 100644
--- a/tests/validation/validate_ball_and_stick.cpp
+++ b/tests/validation/validate_ball_and_stick.cpp
@@ -3,7 +3,7 @@
 
 #include <fvm_multicell.hpp>
 
-const auto backend = nest::mc::backend_policy::use_multicore;
+const auto backend = nest::mc::backend_kind::multicore;
 
 TEST(ball_and_stick, neuron_ref) {
     validate_ball_and_stick(backend);
diff --git a/tests/validation/validate_ball_and_stick.cu b/tests/validation/validate_ball_and_stick.cu
index 3f8e442761e757c2096c2638426f3db08a71d7e7..1ad415c843c9c88143307dd100d6a424c0b7582a 100644
--- a/tests/validation/validate_ball_and_stick.cu
+++ b/tests/validation/validate_ball_and_stick.cu
@@ -3,7 +3,7 @@
 
 #include <fvm_multicell.hpp>
 
-const auto backend = nest::mc::backend_policy::prefer_gpu;
+const auto backend = nest::mc::backend_kind::gpu;
 
 TEST(ball_and_stick, neuron_ref) {
     validate_ball_and_stick(backend);
diff --git a/tests/validation/validate_ball_and_stick.hpp b/tests/validation/validate_ball_and_stick.hpp
index 786c6bc7b8c05592f81fb350a6d5e9c49b430ec8..9c288fce885c29b1a4cb1ddacaaad24b3bdd12d4 100644
--- a/tests/validation/validate_ball_and_stick.hpp
+++ b/tests/validation/validate_ball_and_stick.hpp
@@ -3,6 +3,7 @@
 #include <cell.hpp>
 #include <common_types.hpp>
 #include <fvm_multicell.hpp>
+#include <hardware/node_info.hpp>
 #include <model.hpp>
 #include <recipe.hpp>
 #include <simple_sampler.hpp>
@@ -19,7 +20,7 @@ template <typename SamplerInfoSeq>
 void run_ncomp_convergence_test(
     const char* model_name,
     const nest::mc::util::path& ref_data_path,
-    nest::mc::backend_policy backend,
+    nest::mc::backend_kind backend,
     const nest::mc::cell& c,
     SamplerInfoSeq& samplers,
     float t_end=100.f)
@@ -35,7 +36,7 @@ void run_ncomp_convergence_test(
         {"dt", dt},
         {"sim", "nestmc"},
         {"units", "mV"},
-        {"backend_policy", to_string(backend)}
+        {"backend_kind", to_string(backend)}
     };
 
     auto exclude = stimulus_ends(c);
@@ -49,7 +50,8 @@ void run_ncomp_convergence_test(
                 seg->set_compartments(ncomp);
             }
         }
-        domain_decomposition decomp(singleton_recipe{c}, {1u, backend});
+        hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
+        domain_decomposition decomp(singleton_recipe{c}, nd);
         model m(singleton_recipe{c}, decomp);
 
         runner.run(m, ncomp, t_end, dt, exclude);
@@ -58,7 +60,7 @@ void run_ncomp_convergence_test(
     runner.assert_all_convergence();
 }
 
-void validate_ball_and_stick(nest::mc::backend_policy backend) {
+void validate_ball_and_stick(nest::mc::backend_kind backend) {
     using namespace nest::mc;
 
     cell c = make_cell_ball_and_stick();
@@ -79,7 +81,7 @@ void validate_ball_and_stick(nest::mc::backend_policy backend) {
         samplers);
 }
 
-void validate_ball_and_taper(nest::mc::backend_policy backend) {
+void validate_ball_and_taper(nest::mc::backend_kind backend) {
     using namespace nest::mc;
 
     cell c = make_cell_ball_and_taper();
@@ -100,7 +102,7 @@ void validate_ball_and_taper(nest::mc::backend_policy backend) {
         samplers);
 }
 
-void validate_ball_and_3stick(nest::mc::backend_policy backend) {
+void validate_ball_and_3stick(nest::mc::backend_kind backend) {
     using namespace nest::mc;
 
     cell c = make_cell_ball_and_3stick();
@@ -125,7 +127,7 @@ void validate_ball_and_3stick(nest::mc::backend_policy backend) {
         samplers);
 }
 
-void validate_rallpack1(nest::mc::backend_policy backend) {
+void validate_rallpack1(nest::mc::backend_kind backend) {
     using namespace nest::mc;
 
     cell c = make_cell_simple_cable();
@@ -151,7 +153,7 @@ void validate_rallpack1(nest::mc::backend_policy backend) {
         250.f);
 }
 
-void validate_ball_and_squiggle(nest::mc::backend_policy backend) {
+void validate_ball_and_squiggle(nest::mc::backend_kind backend) {
     using namespace nest::mc;
 
     cell c = make_cell_ball_and_squiggle();
diff --git a/tests/validation/validate_kinetic.cpp b/tests/validation/validate_kinetic.cpp
index b5d97b23ffb87dd3a0a26e1a4dfe1da5f103af37..a9d95faab8dec854f03f24e24acb615e9c92337f 100644
--- a/tests/validation/validate_kinetic.cpp
+++ b/tests/validation/validate_kinetic.cpp
@@ -2,7 +2,7 @@
 
 #include "../gtest.h"
 
-const auto backend = nest::mc::backend_policy::use_multicore;
+const auto backend = nest::mc::backend_kind::multicore;
 
 TEST(kinetic, kin1_numeric_ref) {
     validate_kinetic_kin1(backend);
diff --git a/tests/validation/validate_kinetic.cu b/tests/validation/validate_kinetic.cu
index a86847f82bd12f0871c0443b3bf40e743f66af7f..afc510312bd02a81146eeb1665bee73b08006f6a 100644
--- a/tests/validation/validate_kinetic.cu
+++ b/tests/validation/validate_kinetic.cu
@@ -2,7 +2,7 @@
 
 #include "../gtest.h"
 
-const auto backend = nest::mc::backend_policy::prefer_gpu;
+const auto backend = nest::mc::backend_kind::gpu;
 
 TEST(kinetic, kin1_numeric_ref) {
     validate_kinetic_kin1(backend);
diff --git a/tests/validation/validate_kinetic.hpp b/tests/validation/validate_kinetic.hpp
index ba21d0697a5bce98b3cb30643576fbee489f6f24..f4bab4bb6215952517703bd4901c754483b59f8d 100644
--- a/tests/validation/validate_kinetic.hpp
+++ b/tests/validation/validate_kinetic.hpp
@@ -3,6 +3,7 @@
 #include <common_types.hpp>
 #include <cell.hpp>
 #include <fvm_multicell.hpp>
+#include <hardware/node_info.hpp>
 #include <model.hpp>
 #include <recipe.hpp>
 #include <simple_sampler.hpp>
@@ -14,7 +15,7 @@
 #include "validation_data.hpp"
 
 void run_kinetic_dt(
-    nest::mc::backend_policy backend,
+    nest::mc::backend_kind backend,
     nest::mc::cell& c,
     float t_end,
     nlohmann::json meta,
@@ -28,11 +29,12 @@ void run_kinetic_dt(
     };
 
     meta["sim"] = "nestmc";
-    meta["backend_policy"] = to_string(backend);
+    meta["backend_kind"] = to_string(backend);
     convergence_test_runner<float> runner("dt", samplers, meta);
     runner.load_reference_data(ref_file);
 
-    domain_decomposition decomp(singleton_recipe{c}, {1u, backend});
+    hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
+    domain_decomposition decomp(singleton_recipe{c}, nd);
     model model(singleton_recipe{c}, decomp);
 
     auto exclude = stimulus_ends(c);
@@ -55,7 +57,7 @@ end:
     runner.assert_all_convergence();
 }
 
-void validate_kinetic_kin1(nest::mc::backend_policy backend) {
+void validate_kinetic_kin1(nest::mc::backend_kind backend) {
     using namespace nest::mc;
 
     // 20 Âµm diameter soma with single mechanism, current probe
@@ -73,7 +75,7 @@ void validate_kinetic_kin1(nest::mc::backend_policy backend) {
     run_kinetic_dt(backend, c, 100.f, meta, "numeric_kin1.json");
 }
 
-void validate_kinetic_kinlva(nest::mc::backend_policy backend) {
+void validate_kinetic_kinlva(nest::mc::backend_kind backend) {
     using namespace nest::mc;
 
     // 20 Âµm diameter soma with single mechanism, current probe
diff --git a/tests/validation/validate_soma.cpp b/tests/validation/validate_soma.cpp
index 57b094b38a8026cf0ef589308aa3241dea5cbade..acb7486127eb6ee97668ee4b33382909b2696471 100644
--- a/tests/validation/validate_soma.cpp
+++ b/tests/validation/validate_soma.cpp
@@ -3,7 +3,7 @@
 #include "../gtest.h"
 
 
-const auto backend = nest::mc::backend_policy::use_multicore;
+const auto backend = nest::mc::backend_kind::multicore;
 
 TEST(soma, numeric_ref) {
     validate_soma(backend);
diff --git a/tests/validation/validate_soma.cu b/tests/validation/validate_soma.cu
index b31b91a42b9a24e8d343ec7a8a1a451177a9f45f..f726155ddc35b3fc71205c7a03ae4eec126e7fd4 100644
--- a/tests/validation/validate_soma.cu
+++ b/tests/validation/validate_soma.cu
@@ -2,7 +2,7 @@
 
 #include "../gtest.h"
 
-const auto backend = nest::mc::backend_policy::prefer_gpu;
+const auto backend = nest::mc::backend_kind::gpu;
 
 TEST(soma, numeric_ref) {
     validate_soma(backend);
diff --git a/tests/validation/validate_soma.hpp b/tests/validation/validate_soma.hpp
index 862e50304aaa776302f913fd3156772eb861d95e..d062af98b879f68a8a8da20eefad8f97db8c2297 100644
--- a/tests/validation/validate_soma.hpp
+++ b/tests/validation/validate_soma.hpp
@@ -3,6 +3,7 @@
 #include <common_types.hpp>
 #include <cell.hpp>
 #include <fvm_multicell.hpp>
+#include <hardware/node_info.hpp>
 #include <model.hpp>
 #include <recipe.hpp>
 #include <simple_sampler.hpp>
@@ -13,12 +14,14 @@
 #include "trace_analysis.hpp"
 #include "validation_data.hpp"
 
-void validate_soma(nest::mc::backend_policy backend) {
+void validate_soma(nest::mc::backend_kind backend) {
     using namespace nest::mc;
 
     cell c = make_cell_soma_only();
     add_common_voltage_probes(c);
-    domain_decomposition decomp(singleton_recipe{c}, {1u, backend});
+
+    hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
+    domain_decomposition decomp(singleton_recipe{c}, nd);
     model m(singleton_recipe{c}, decomp);
 
     float sample_dt = .025f;
@@ -29,7 +32,7 @@ void validate_soma(nest::mc::backend_policy backend) {
         {"model", "soma"},
         {"sim", "nestmc"},
         {"units", "mV"},
-        {"backend_policy", to_string(backend)}
+        {"backend_kind", to_string(backend)}
     };
 
     convergence_test_runner<float> runner("dt", samplers, meta);
diff --git a/tests/validation/validate_synapses.cpp b/tests/validation/validate_synapses.cpp
index d1331b70a10976fe234495bdb9a886b3f9b29602..1a8bbf03b9e389ad2fe0e8f46ee561e2987696e4 100644
--- a/tests/validation/validate_synapses.cpp
+++ b/tests/validation/validate_synapses.cpp
@@ -3,7 +3,7 @@
 #include "../gtest.h"
 #include "validate_synapses.hpp"
 
-const auto backend = nest::mc::backend_policy::use_multicore;
+const auto backend = nest::mc::backend_kind::multicore;
 
 TEST(simple_synapse, expsyn_neuron_ref) {
     SCOPED_TRACE("expsyn");
diff --git a/tests/validation/validate_synapses.cu b/tests/validation/validate_synapses.cu
index 47f0cc67230dd89e09660c66cbaa95f068664385..886bbdfb5ac095571783882e4fbbc408175c3d9e 100644
--- a/tests/validation/validate_synapses.cu
+++ b/tests/validation/validate_synapses.cu
@@ -3,7 +3,7 @@
 #include "../gtest.h"
 #include "validate_synapses.hpp"
 
-const auto backend = nest::mc::backend_policy::prefer_gpu;
+const auto backend = nest::mc::backend_kind::gpu;
 
 TEST(simple_synapse, expsyn_neuron_ref) {
     SCOPED_TRACE("expsyn");
diff --git a/tests/validation/validate_synapses.hpp b/tests/validation/validate_synapses.hpp
index 83a77d36eb73bfee8533055a90cc1c872aecc38b..7607e46885dd10c859a0e9ed9f1ee5227a48740d 100644
--- a/tests/validation/validate_synapses.hpp
+++ b/tests/validation/validate_synapses.hpp
@@ -3,6 +3,7 @@
 #include <cell.hpp>
 #include <cell_group.hpp>
 #include <fvm_multicell.hpp>
+#include <hardware/node_info.hpp>
 #include <model.hpp>
 #include <recipe.hpp>
 #include <simple_sampler.hpp>
@@ -18,7 +19,7 @@
 void run_synapse_test(
     const char* syn_type,
     const nest::mc::util::path& ref_data_path,
-    nest::mc::backend_policy backend,
+    nest::mc::backend_kind backend,
     float t_end=70.f,
     float dt=0.001)
 {
@@ -30,7 +31,7 @@ void run_synapse_test(
         {"model", syn_type},
         {"sim", "nestmc"},
         {"units", "mV"},
-        {"backend_policy", to_string(backend)}
+        {"backend_kind", to_string(backend)}
     };
 
     cell c = make_cell_ball_and_stick(false); // no stimuli
@@ -58,9 +59,10 @@ void run_synapse_test(
     convergence_test_runner<int> runner("ncomp", samplers, meta);
     runner.load_reference_data(ref_data_path);
 
+    hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
     for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) {
         c.cable(1)->set_compartments(ncomp);
-        domain_decomposition decomp(singleton_recipe{c}, {1u, backend});
+        domain_decomposition decomp(singleton_recipe{c}, nd);
         model m(singleton_recipe{c}, decomp);
         m.group(0).enqueue_events(synthetic_events);