diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt
index 462ce9f6b48a2f5e2c7fc3a596199ad27d757ad2..db81edbbd89510d908d677f7dbc5f4b6843a5b8f 100644
--- a/arbor/CMakeLists.txt
+++ b/arbor/CMakeLists.txt
@@ -12,6 +12,8 @@ set(arbor_sources
     cell_group_factory.cpp
     common_types_io.cpp
     cv_policy.cpp
+    domdecexcept.cpp
+    domain_decomposition.cpp
     execution_context.cpp
     gpu_context.cpp
     event_binner.cpp
diff --git a/arbor/arbexcept.cpp b/arbor/arbexcept.cpp
index fc4ffe7ec585c20e6b7141d1e705acac35d35e27..36ec2a6397031219b4185df60e74e970fd321fdb 100644
--- a/arbor/arbexcept.cpp
+++ b/arbor/arbexcept.cpp
@@ -42,12 +42,6 @@ bad_probe_id::bad_probe_id(cell_member_type probe_id):
     probe_id(probe_id)
 {}
 
-gj_unsupported_domain_decomposition::gj_unsupported_domain_decomposition(cell_gid_type gid_0, cell_gid_type gid_1):
-    arbor_exception(pprintf("No support for gap junctions across domain decomposition groups for gid {} and {}", gid_0, gid_1)),
-    gid_0(gid_0),
-    gid_1(gid_1)
-{}
-
 gj_unsupported_lid_selection_policy::gj_unsupported_lid_selection_policy(cell_gid_type gid, cell_tag_type label):
     arbor_exception(pprintf("Model building error on cell {}: gap junction site label \"{}\" must be univalent.", gid, label)),
     gid(gid),
diff --git a/arbor/communication/communicator.cpp b/arbor/communication/communicator.cpp
index 30835fff384af68aed8838518ce1d3b5ee9d2ba7..8fa93e9b1b2504bb044b6c9c61c22bfeb6bc975e 100644
--- a/arbor/communication/communicator.cpp
+++ b/arbor/communication/communicator.cpp
@@ -32,8 +32,8 @@ communicator::communicator(const recipe& rec,
     thread_pool_ = ctx.thread_pool;
 
     num_domains_ = distributed_->size();
-    num_local_groups_ = dom_dec.groups.size();
-    num_local_cells_ = dom_dec.num_local_cells;
+    num_local_groups_ = dom_dec.num_groups();
+    num_local_cells_ = dom_dec.num_local_cells();
     auto num_total_cells = rec.num_cells();
 
     // For caching information about each cell
@@ -61,7 +61,7 @@ communicator::communicator(const recipe& rec,
     // that populates gid_infos.
     std::vector<cell_gid_type> gids;
     gids.reserve(num_local_cells_);
-    for (auto g: dom_dec.groups) {
+    for (auto g: dom_dec.groups()) {
         util::append(gids, g.gids);
     }
     // Build the connection information for local cells in parallel.
@@ -112,7 +112,7 @@ communicator::communicator(const recipe& rec,
     // Build cell partition by group for passing events to cell groups
     index_part_ = util::make_partition(index_divisions_,
         util::transform_view(
-            dom_dec.groups,
+            dom_dec.groups(),
             [](const group_description& g){return g.gids.size();}));
 
     // Sort the connections for each domain.
diff --git a/arbor/domain_decomposition.cpp b/arbor/domain_decomposition.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7a58d804d4784a186f652f70b25b07658646a19c
--- /dev/null
+++ b/arbor/domain_decomposition.cpp
@@ -0,0 +1,120 @@
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include <arbor/domdecexcept.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/recipe.hpp>
+#include <arbor/context.hpp>
+
+#include "execution_context.hpp"
+#include "util/partition.hpp"
+#include "util/rangeutil.hpp"
+#include "util/span.hpp"
+
+namespace arb {
+domain_decomposition::domain_decomposition(
+    const recipe& rec,
+    const context& ctx,
+    const std::vector<group_description>& groups)
+{
+    struct partition_gid_domain {
+        partition_gid_domain(const gathered_vector<cell_gid_type>& divs, unsigned domains) {
+            auto rank_part = util::partition_view(divs.partition());
+            for (auto rank: count_along(rank_part)) {
+                for (auto gid: util::subrange_view(divs.values(), rank_part[rank])) {
+                    gid_map[gid] = rank;
+                }
+            }
+        }
+        int operator()(cell_gid_type gid) const {
+            return gid_map.at(gid);
+        }
+        std::unordered_map<cell_gid_type, int> gid_map;
+    };
+
+    unsigned num_domains = ctx->distributed->size();
+    int domain_id = ctx->distributed->id();
+    cell_size_type num_global_cells = rec.num_cells();
+    const bool has_gpu = ctx->gpu->has_gpu();
+
+    std::vector<cell_gid_type> local_gids;
+    for (const auto& g: groups) {
+        if (g.backend == backend_kind::gpu && !has_gpu) {
+            throw invalid_backend(domain_id);
+        }
+        if (g.backend == backend_kind::gpu && g.kind != cell_kind::cable) {
+            throw incompatible_backend(domain_id, g.kind);
+        }
+
+        std::unordered_set<cell_gid_type> gid_set(g.gids.begin(), g.gids.end());
+        for (const auto& gid: g.gids) {
+            if (gid >= num_global_cells) {
+                throw out_of_bounds(gid, num_global_cells);
+            }
+            for (const auto& gj: rec.gap_junctions_on(gid)) {
+                if (!gid_set.count(gj.peer.gid)) {
+                    throw invalid_gj_cell_group(gid, gj.peer.gid);
+                }
+            }
+        }
+        local_gids.insert(local_gids.end(), g.gids.begin(), g.gids.end());
+    }
+    cell_size_type num_local_cells = local_gids.size();
+
+    auto global_gids = ctx->distributed->gather_gids(local_gids);
+    if (global_gids.size() != num_global_cells) {
+        throw invalid_sum_local_cells(global_gids.size(), num_global_cells);
+    }
+
+    auto global_gid_vals = global_gids.values();
+    util::sort(global_gid_vals);
+    for (unsigned i = 1; i < global_gid_vals.size(); ++i) {
+        if (global_gid_vals[i] == global_gid_vals[i-1]) {
+            throw duplicate_gid(global_gid_vals[i]);
+        }
+    }
+
+    num_domains_ = num_domains;
+    domain_id_ = domain_id;
+    num_local_cells_ = num_local_cells;
+    num_global_cells_ = num_global_cells;
+    groups_ = groups;
+    gid_domain_ = partition_gid_domain(global_gids, num_domains);
+}
+
+int domain_decomposition::gid_domain(cell_gid_type gid) const {
+    return gid_domain_(gid);
+}
+
+int domain_decomposition::num_domains() const {
+    return num_domains_;
+}
+
+int domain_decomposition::domain_id() const {
+    return domain_id_;
+}
+
+cell_size_type domain_decomposition::num_local_cells() const {
+    return num_local_cells_;
+}
+
+cell_size_type domain_decomposition::num_global_cells() const {
+    return num_global_cells_;
+}
+
+cell_size_type domain_decomposition::num_groups() const {
+    return groups_.size();
+}
+
+const std::vector<group_description>& domain_decomposition::groups() const {
+    return groups_;
+}
+
+const group_description& domain_decomposition::group(unsigned idx) const {
+    arb_assert(idx<num_groups());
+    return groups_[idx];
+}
+
+} // namespace arb
+
diff --git a/arbor/domdecexcept.cpp b/arbor/domdecexcept.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..0c3770fb649b2d74ddf5c3736cdc20510b01f2cc
--- /dev/null
+++ b/arbor/domdecexcept.cpp
@@ -0,0 +1,52 @@
+#include <string>
+
+#include <arbor/domdecexcept.hpp>
+
+#include "util/strprintf.hpp"
+
+namespace arb {
+
+using arb::util::pprintf;
+
+invalid_gj_cell_group::invalid_gj_cell_group(cell_gid_type gid_0, cell_gid_type gid_1):
+    dom_dec_exception(pprintf("cell {} needs to be in the same group as cell {} because they are connected via gap-junction.",
+                              gid_0, gid_1)),
+    gid_0(gid_0),
+    gid_1(gid_1)
+{}
+
+invalid_sum_local_cells::invalid_sum_local_cells(unsigned gc_wrong, unsigned gc_right):
+    dom_dec_exception(pprintf("sum of local cells on the individual ranks ({}) is not equal to the total number of cells in the recipe ({}).",
+                              gc_wrong, gc_right)),
+    gc_wrong(gc_wrong),
+    gc_right(gc_right)
+{}
+
+duplicate_gid::duplicate_gid(cell_gid_type gid):
+    dom_dec_exception(pprintf("gid {} is present in multiple cell-groups or multiple times in the same cell group.",
+                              gid)),
+    gid(gid)
+{}
+
+out_of_bounds::out_of_bounds(cell_gid_type gid, unsigned num_cells):
+    dom_dec_exception(pprintf("cell {} is out-of-bounds of the allowed gids in the simulation which has {} total cells.",
+                              gid, num_cells)),
+    gid(gid),
+    num_cells(num_cells)
+{}
+
+invalid_backend::invalid_backend(int rank):
+    dom_dec_exception(pprintf("rank {} contains a group meant to run on GPU, but no GPU backend was detected in the context.",
+                              rank)),
+    rank(rank)
+{}
+
+incompatible_backend::incompatible_backend(int rank, cell_kind kind):
+    dom_dec_exception(pprintf("rank {} contains a group with cells of kind {} meant to run on the GPU backend, but no GPU backend support exists for {}",
+                              rank, kind, kind)),
+    rank(rank),
+    kind(kind)
+{}
+
+} // namespace arb
+
diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
index c3dbdfad4f8aeb5c013483f70432cdb8e4f47b0e..5af43287e0609af9afa7b9ac024d2d99e1a8d0dc 100644
--- a/arbor/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -667,11 +667,7 @@ fvm_size_type fvm_lowered_cell_impl<Backend>::fvm_intdom(
 
             cell_to_intdom[gid_to_loc[g]] = intdom_id;
 
-            for (auto gj: rec.gap_junctions_on(g)) {
-                if (!gid_to_loc.count(gj.peer.gid)) {
-                    throw gj_unsupported_domain_decomposition(g, gj.peer.gid);
-                }
-
+            for (const auto& gj: rec.gap_junctions_on(g)) {
                 if (!visited.count(gj.peer.gid)) {
                     visited.insert(gj.peer.gid);
                     intdomq.push(gj.peer.gid);
diff --git a/arbor/include/arbor/arbexcept.hpp b/arbor/include/arbor/arbexcept.hpp
index 2185c5dfbfef704fded3e0090fa655aa649a0288..9851823d3cfd2c34b1a57da6bee92b9e9ca7f324 100644
--- a/arbor/include/arbor/arbexcept.hpp
+++ b/arbor/include/arbor/arbexcept.hpp
@@ -75,13 +75,6 @@ struct gj_unsupported_lid_selection_policy: arbor_exception {
     cell_tag_type label;
 };
 
-// Domain decomposition errors:
-
-struct gj_unsupported_domain_decomposition: arbor_exception {
-    gj_unsupported_domain_decomposition(cell_gid_type gid_0, cell_gid_type gid_1);
-    cell_gid_type gid_0, gid_1;
-};
-
 // Simulation errors:
 
 struct bad_event_time: arbor_exception {
diff --git a/arbor/include/arbor/domain_decomposition.hpp b/arbor/include/arbor/domain_decomposition.hpp
index 43aa6415a27e19de3663eb2b5a76463a5fb54240..c79800503542064977af8ca539f0b0d64a891b7c 100644
--- a/arbor/include/arbor/domain_decomposition.hpp
+++ b/arbor/include/arbor/domain_decomposition.hpp
@@ -7,6 +7,7 @@
 #include <arbor/assert.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/context.hpp>
+#include <arbor/recipe.hpp>
 
 namespace arb {
 
@@ -31,26 +32,39 @@ struct group_description {
 /// distribution of cells across cell_groups and domains.
 /// A load balancing algorithm is responsible for generating the
 /// domain_decomposition, e.g. arb::partitioned_load_balancer().
-struct domain_decomposition {
+class domain_decomposition {
+public:
+    domain_decomposition() = delete;
+    domain_decomposition(const recipe& rec, const context& ctx, const std::vector<group_description>& groups);
+
+    int gid_domain(cell_gid_type gid) const;
+    int num_domains() const;
+    int domain_id() const;
+    cell_size_type num_local_cells() const;
+    cell_size_type num_global_cells() const;
+    cell_size_type num_groups() const;
+    const std::vector<group_description>& groups() const;
+    const group_description& group(unsigned) const;
+
+private:
     /// Return the domain id of cell with gid.
     /// Supplied by the load balancing algorithm that generates the domain
     /// decomposition.
-    std::function<int(cell_gid_type)> gid_domain;
+    std::function<int(cell_gid_type)> gid_domain_;
 
     /// Number of distrubuted domains
-    int num_domains;
+    int num_domains_;
 
     /// The index of the local domain
-    int domain_id;
+    int domain_id_;
 
     /// Total number of cells in the local domain
-    cell_size_type num_local_cells;
+    cell_size_type num_local_cells_;
 
     /// Total number of cells in the global model (sum over all domains)
-    cell_size_type num_global_cells;
+    cell_size_type num_global_cells_;
 
     /// Descriptions of the cell groups on the local domain
-    std::vector<group_description> groups;
+    std::vector<group_description> groups_;
 };
-
 } // namespace arb
diff --git a/arbor/include/arbor/domdecexcept.hpp b/arbor/include/arbor/domdecexcept.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..206f1c7628c2dba7eee780ca5cc37c5627cd9085
--- /dev/null
+++ b/arbor/include/arbor/domdecexcept.hpp
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <string>
+
+#include <arbor/arbexcept.hpp>
+#include <arbor/domain_decomposition.hpp>
+
+namespace arb {
+struct dom_dec_exception: public arbor_exception {
+    dom_dec_exception(const std::string& what): arbor_exception("Invalid domain decomposition: " + what) {}
+};
+
+struct invalid_gj_cell_group: dom_dec_exception {
+    invalid_gj_cell_group(cell_gid_type gid_0, cell_gid_type gid_1);
+    cell_gid_type gid_0, gid_1;
+};
+
+struct invalid_sum_local_cells: dom_dec_exception {
+    invalid_sum_local_cells(unsigned gc_wrong, unsigned gc_right);
+    unsigned gc_wrong, gc_right;
+};
+
+struct duplicate_gid: dom_dec_exception {
+    duplicate_gid(cell_gid_type gid);
+    cell_gid_type gid;
+};
+
+struct out_of_bounds: dom_dec_exception {
+    out_of_bounds(cell_gid_type gid, unsigned num_cells);
+    cell_gid_type gid;
+    unsigned num_cells;
+};
+
+struct invalid_backend: dom_dec_exception {
+    invalid_backend(int rank);
+    int rank;
+};
+
+struct incompatible_backend: dom_dec_exception {
+    incompatible_backend(int rank, cell_kind kind);
+    int rank;
+    cell_kind kind;
+};
+} // namespace arb
+
diff --git a/arbor/include/arbor/load_balance.hpp b/arbor/include/arbor/load_balance.hpp
index 7a021f19192e076f7b4af744648ae6b25cf54a6e..44d966e4cf7d81f17d7a304f97f5136d6aef427c 100644
--- a/arbor/include/arbor/load_balance.hpp
+++ b/arbor/include/arbor/load_balance.hpp
@@ -23,5 +23,4 @@ domain_decomposition partition_load_balance(
     const recipe& rec,
     const context& ctx,
     partition_hint_map hint_map = {});
-
 } // namespace arb
diff --git a/arbor/partition_load_balance.cpp b/arbor/partition_load_balance.cpp
index 73227f9238517c92a6217b3beb1f52ea0c78ee7a..40b145b003bb04e5bfc7e5c3117eb622f8984a4d 100644
--- a/arbor/partition_load_balance.cpp
+++ b/arbor/partition_load_balance.cpp
@@ -3,7 +3,7 @@
 #include <unordered_set>
 #include <vector>
 
-#include <arbor/arbexcept.hpp>
+#include <arbor/domdecexcept.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/recipe.hpp>
@@ -25,34 +25,11 @@ domain_decomposition partition_load_balance(
     const context& ctx,
     partition_hint_map hint_map)
 {
-    const bool gpu_avail = ctx->gpu->has_gpu();
-
-    struct partition_gid_domain {
-        partition_gid_domain(const gathered_vector<cell_gid_type>& divs, unsigned domains) {
-            auto rank_part = util::partition_view(divs.partition());
-            for (auto rank: count_along(rank_part)) {
-                for (auto gid: util::subrange_view(divs.values(), rank_part[rank])) {
-                    gid_map[gid] = rank;
-                }
-            }
-        }
-
-        int operator()(cell_gid_type gid) const {
-            return gid_map.at(gid);
-        }
-
-        std::unordered_map<cell_gid_type, int> gid_map;
-    };
-
-    struct cell_identifier {
-        cell_gid_type id;
-        bool is_super_cell;
-    };
-
     using util::make_span;
 
     unsigned num_domains = ctx->distributed->size();
     unsigned domain_id = ctx->distributed->id();
+    const bool gpu_avail = ctx->gpu->has_gpu();
     auto num_global_cells = rec.num_cells();
 
     auto dom_size = [&](unsigned dom) -> cell_gid_type {
@@ -156,6 +133,10 @@ domain_decomposition partition_load_balance(
     // 1. gids of regular cells (in reg_cells)
     // 2. indices of supercells (in super_cells)
 
+    struct cell_identifier {
+        cell_gid_type id;
+        bool is_super_cell;
+    };
     std::vector<cell_gid_type> local_gids;
     std::unordered_map<cell_kind, std::vector<cell_identifier>> kind_lists;
     for (auto gid: reg_cells) {
@@ -220,11 +201,11 @@ domain_decomposition partition_load_balance(
         std::vector<cell_gid_type> group_elements;
         // group_elements are sorted such that the gids of all members of a super_cell are consecutive.
         for (auto cell: kind_lists[k]) {
-            if (cell.is_super_cell == false) {
+            if (!cell.is_super_cell) {
                 group_elements.push_back(cell.id);
             } else {
                 if (group_elements.size() + super_cells[cell.id].size() > group_size && !group_elements.empty()) {
-                    groups.push_back({k, std::move(group_elements), backend});
+                    groups.emplace_back(k, std::move(group_elements), backend);
                     group_elements.clear();
                 }
                 for (auto gid: super_cells[cell.id]) {
@@ -232,30 +213,20 @@ domain_decomposition partition_load_balance(
                 }
             }
             if (group_elements.size()>=group_size) {
-                groups.push_back({k, std::move(group_elements), backend});
+                groups.emplace_back(k, std::move(group_elements), backend);
                 group_elements.clear();
             }
         }
         if (!group_elements.empty()) {
-            groups.push_back({k, std::move(group_elements), backend});
+            groups.emplace_back(k, std::move(group_elements), backend);
         }
     }
 
-    cell_size_type num_local_cells = local_gids.size();
-
     // Exchange gid list with all other nodes
     // global all-to-all to gather a local copy of the global gid list on each node.
     auto global_gids = ctx->distributed->gather_gids(local_gids);
 
-    domain_decomposition d;
-    d.num_domains = num_domains;
-    d.domain_id = domain_id;
-    d.num_local_cells = num_local_cells;
-    d.num_global_cells = num_global_cells;
-    d.groups = std::move(groups);
-    d.gid_domain = partition_gid_domain(global_gids, num_domains);
-
-    return d;
+    return domain_decomposition(rec, ctx, groups);
 }
 
 } // namespace arb
diff --git a/arbor/simulation.cpp b/arbor/simulation.cpp
index 7284a3c41c537959f76bdf1406490269d7276304..b16e7a26e3ffd4cea3d1ec58e048ed322d28cf83 100644
--- a/arbor/simulation.cpp
+++ b/arbor/simulation.cpp
@@ -187,12 +187,12 @@ simulation_state::simulation_state(
     local_spikes_({thread_private_spike_store(ctx.thread_pool), thread_private_spike_store(ctx.thread_pool)})
 {
     // Generate the cell groups in parallel, with one task per cell group.
-    cell_groups_.resize(decomp.groups.size());
+    cell_groups_.resize(decomp.num_groups());
     std::vector<cell_labels_and_gids> cg_sources(cell_groups_.size());
     std::vector<cell_labels_and_gids> cg_targets(cell_groups_.size());
     foreach_group_index(
         [&](cell_group_ptr& group, int i) {
-          const auto& group_info = decomp.groups[i];
+          const auto& group_info = decomp.group(i);
           cell_label_range sources, targets;
           auto factory = cell_kind_implementation(group_info.kind, group_info.backend, ctx);
           group = factory(group_info.gids, rec, sources, targets);
@@ -226,7 +226,7 @@ simulation_state::simulation_state(
     cell_size_type grpidx = 0;
 
     auto target_resolution_map_ptr = std::make_shared<label_resolution_map>(std::move(target_resolution_map));
-    for (const auto& group_info: decomp.groups) {
+    for (const auto& group_info: decomp.groups()) {
         for (auto gid: group_info.gids) {
             // Store mapping of gid to local cell index.
             gid_to_local_[gid] = gid_local_info{lidx, grpidx};
diff --git a/doc/cpp/domdec.rst b/doc/cpp/domdec.rst
index 3ceddbd3fc836c2db02e3f9ecf5bcd503198db8d..cb091390d2155390b9cf05206fd8e130122e8958 100644
--- a/doc/cpp/domdec.rst
+++ b/doc/cpp/domdec.rst
@@ -7,57 +7,6 @@ Domain decomposition
 
 The C++ API for partitioning a model over distributed and local hardware is described here.
 
-Load balancers
---------------
-
-Load balancing generates a :cpp:class:`domain_decomposition` given an :cpp:class:`arb::recipe`
-and a description of the hardware on which the model will run. Currently Arbor provides
-one load balancer, :cpp:func:`partition_load_balance`, and more will be added over time.
-
-If the model is distributed with MPI, the partitioning algorithm for cells is
-distributed with MPI communication. The returned :cpp:class:`domain_decomposition`
-describes the cell groups on the local MPI rank.
-
-.. Note::
-    The :cpp:class:`domain_decomposition` type is
-    independent of any load balancing algorithm, so users can define a
-    domain decomposition directly, instead of generating it with a load balancer.
-    This is useful for cases where the provided load balancers are inadequate,
-    or when the user has specific insight into running their model on the
-    target computer.
-
-.. Important::
-    When users supply their own :cpp:class:`domain_decomposition`, if they have
-    **Gap Junction connections**, they have to be careful to place all cells that
-    are connected via gap junctions in the same group.
-    Example:
-    ``A -gj- B -gj- C``  and ``D -gj- E``.
-    Cells A, B and C need to be in a single group; and cells D and E need to be in a
-    single group. They may all be placed in the same group but not necessarily.
-    Be mindful that smaller cell groups perform better on multi-core systems and
-    try not to overcrowd cell groups if not needed.
-    Arbor provided load balancers such as :cpp:func:`partition_load_balance`
-    guarantee that this rule is obeyed.
-
-.. cpp:function:: domain_decomposition partition_load_balance(const recipe& rec, const arb::context& ctx)
-
-    Construct a :cpp:class:`domain_decomposition` that distributes the cells
-    in the model described by :cpp:any:`rec` over the distributed and local hardware
-    resources described by :cpp:any:`ctx`.
-
-    The algorithm counts the number of each cell type in the global model, then
-    partitions the cells of each type equally over the available nodes.
-    If a GPU is available, and if the cell type can be run on the GPU, the
-    cells on each node are put one large group to maximise the amount of fine
-    grained parallelism in the cell group.
-    Otherwise, cells are grouped into small groups that fit in cache, and can be
-    distributed over the available cores.
-
-    .. Note::
-        The partitioning assumes that all cells of the same kind have equal
-        computational cost, hence it may not produce a balanced partition for
-        models with cells that have a large variance in computational costs.
-
 Decomposition
 -------------
 
@@ -65,78 +14,129 @@ Documentation for the data structures used to describe domain decompositions.
 
 .. cpp:namespace:: arb
 
-.. cpp:enum-class:: backend_kind
-
-    Used to indicate which hardware backend to use for running a :cpp:class:`cell_group`.
-
-    .. cpp:enumerator:: multicore
-
-        Use multicore backend.
-
-    .. cpp:enumerator:: gpu
-
-        Use GPU back end.
-
-        .. Note::
-            Setting the GPU back end is only meaningful if the
-            :cpp:class:`cell_group` type supports the GPU backend.
-
 .. cpp:class:: domain_decomposition
 
     Describes a domain decomposition and is solely responsible for describing the
     distribution of cells across cell groups and domains.
     It holds cell group descriptions (:cpp:member:`groups`) for cells assigned to
-    the local domain, and a helper function (:cpp:member:`gid_domain`) used to
+    the local domain, and a helper member (:cpp:member:`gid_domain`) used to
     look up which domain a cell has been assigned to.
     The :cpp:class:`domain_decomposition` object also has meta-data about the
     number of cells in the global model, and the number of domains over which
     the model is distributed.
 
     .. Note::
-        The domain decomposition represents a division **all** of the cells in
+        The domain decomposition represents a division of **all** of the cells in
         the model into non-overlapping sets, with one set of cells assigned to
         each domain.
         A domain decomposition is generated either by a load balancer or is
-        directly specified by a user, and it is a requirement that the
-        decomposition is correct:
-
-            * Every cell in the model appears once in one and only one cell
-              :cpp:member:`groups` on one and only one local
-              :cpp:class:`domain_decomposition` object.
-            * :cpp:member:`num_local_cells` is the sum of the number of cells in
-              each of the :cpp:member:`groups`.
-            * The sum of :cpp:member:`num_local_cells` over all domains matches
-              :cpp:member:`num_global_cells`.
-
-    .. cpp:member:: std::function<int(cell_gid_type)> gid_domain
-
-        A function for querying the domain id that a cell assigned to
-        (using global identifier :cpp:var:`gid`).
-        It must be a pure function, that is it has no side effects, and hence is
-        thread safe.
+        directly constructed by a user, the following conditions must be met,
+        if not, an exception will be thrown:
+
+        * Every cell in the model appears once in one and only one cell
+          :cpp:class:`group <group_description>` on one and only one local
+          :cpp:class:`domain_decomposition` object.
+        * The total number of cells across all cell
+          :cpp:class:`groups <group_description>` on all
+          :cpp:class:`domain_decomposition` objects must match the total
+          number of cells in the :cpp:class:`recipe`.
+        * Cells that are connected via gap-junction must be present in the
+          same cell :cpp:class:`group <group_description>`.
+
+    .. cpp:function:: domain_decomposition(const recipe& rec, const context& ctx, const std::vector<group_description>& groups)
+
+        The constructor takes:
+
+        *   a :cpp:class:`arb::recipe` that describes the model;
+        *   a :cpp:class:`arb::context` that describes the hardware resources;
+        *   a vector of :cpp:class:`arb::group_description` that contains the indices of the cells
+            to be executed on the local rank, categorized into groups.
+
+        It's expected that a different :cpp:class:`arb::domain_decomposition` object will be constructed on
+        each rank in a distributed simulation containing that selected cell groups for that rank.
+        For example, in a simulation of 10 cells on 2 MPI ranks where cells {0, 2, 4, 6, 8} of kind
+        :class:`cable_cell` are meant to be in a single group executed on the GPU on rank 0;
+        and cells {1, 3, 5, 7, 9} of kind :class:`lif_cell` are expected to be in a single group executed
+        on the CPU on rank 1:
+
+        Rank 0 should run:
+
+        .. code-block:: c++
+
+            std::vector<arb::group_description> groups = {
+                {arb::cell_kind::cable, {0, 2, 4, 6, 8}, arb::backend_kind::gpu}
+            };
+            auto decomp = arb::domain_decomposition(recipe, context, groups);
+
+        And Rank 1 should run:
+
+        .. code-block:: c++
+
+            std::vector<arb::group_description> groups = {
+                {arb::cell_kind::lif,   {1, 3, 5, 7, 9}, arb::backend_kind::multicore}
+            };
+            auto decomp = arb::domain_decomposition(recipe, context, groups);
+
+        .. _domdecnotes:
+        .. Important::
+            Constructing a balanced :cpp:class:`domain_decomposition` quickly
+            becomes a difficult task for large and diverse networks. This is why
+            arbor provides :ref:`load balancing algorithms<domdecloadbalance>`
+            that automatically generate a :cpp:class:`domain_decomposition` from
+            a :cpp:class:`recipe` and :cpp:class:`context`.
+            A user-defined :cpp:class:`domain_decomposition` using the constructor
+            is useful for cases where the provided load balancers are inadequate,
+            or when the user has specific insight into running their model on the
+            target computer.
+
+        .. Important::
+            When creating your own :cpp:class:`domain_decomposition` of a network
+            containing **Gap Junction connections**, be sure to place all cells that
+            are connected via gap junctions in the same :cpp:member:`group <groups>`.
+            Example:
+            ``A -gj- B -gj- C``  and ``D -gj- E``.
+            Cells A, B and C need to be in a single group; and cells D and E need to be in a
+            single group. They may all be placed in the same group but not necessarily.
+            Be mindful that smaller cell groups perform better on multi-core systems and
+            try not to overcrowd cell groups if not needed.
+            Arbor provided load balancers such as :cpp:func:`partition_load_balance`
+            guarantee that this rule is obeyed.
+
+    .. cpp:member:: int gid_domain(cell_gid_type gid)
+
+        Returns the domain id of the cell with id ``gid``.
+
+    .. cpp:member:: int num_domains()
+
+        Returns the number of domains that the model is distributed over.
+
+    .. cpp:member:: int domain_id()
+
+        Returns the index of the local domain.
+        Always 0 for non-distributed models, and corresponds to the MPI rank
+        for distributed runs.
 
-    .. cpp:member:: int num_domains
+    .. cpp:member:: cell_size_type num_local_cells()
 
-        Number of domains that the model is distributed over.
+        Returns the total number of cells in the local domain.
 
-    .. cpp:member:: int domain_id
+    .. cpp:member:: cell_size_type num_global_cells()
 
-        The index of the local domain.
-        Always 0 for non-distributed models, and corresponds to the MPI rank
-        for distributed runs.
+        Returns the total number of cells in the global model
+        (sum of :cpp:member:`num_local_cells` over all domains).
 
-    .. cpp:member:: cell_size_type num_local_cells
+    .. cpp:member:: cell_size_type num_groups()
 
-        Total number of cells in the local domain.
+        Returns the total number of cell groups on the local domain.
 
-    .. cpp:member:: cell_size_type num_global_cells
+    .. cpp:member:: const group_description& group(unsigned idx)
 
-        Total number of cells in the global model
-        (sum of :cpp:member:`num_local_cells` over all domains).
+        Returns the description of the cell group at index ``idx`` on the local domain.
+        See :cpp:class:`group_description`.
 
-    .. cpp:member:: std::vector<group_description> groups
+    .. cpp:member:: const std::vector<group_description>& groups()
 
-        Descriptions of the cell groups on the local domain.
+        Returns the descriptions of the cell groups on the local domain.
         See :cpp:class:`group_description`.
 
 .. cpp:class:: group_description
@@ -159,3 +159,51 @@ Documentation for the data structures used to describe domain decompositions.
     .. cpp:member:: const backend_kind backend
 
         The back end on which the cell group is to run.
+
+.. cpp:enum-class:: backend_kind
+
+    Used to indicate which hardware backend to use for running a :cpp:class:`cell_group`.
+
+    .. cpp:enumerator:: multicore
+
+        Use multicore backend.
+
+    .. cpp:enumerator:: gpu
+
+        Use GPU back end.
+
+        .. Note::
+            Setting the GPU back end is only meaningful if the
+            :cpp:class:`cell_group` type supports the GPU backend.
+
+.. _domdecloadbalance:
+
+Load balancers
+--------------
+
+Load balancing generates a :cpp:class:`domain_decomposition` given an :cpp:class:`arb::recipe`
+and a description of the hardware on which the model will run. Currently Arbor provides
+one load balancer, :cpp:func:`partition_load_balance`, and more will be added over time.
+
+If the model is distributed with MPI, the partitioning algorithm for cells is
+distributed with MPI communication. The returned :cpp:class:`domain_decomposition`
+describes the cell groups on the local MPI rank.
+
+.. cpp:function:: domain_decomposition partition_load_balance(const recipe& rec, const arb::context& ctx)
+
+    Construct a :cpp:class:`domain_decomposition` that distributes the cells
+    in the model described by :cpp:any:`rec` over the distributed and local hardware
+    resources described by :cpp:any:`ctx`.
+
+    The algorithm counts the number of each cell type in the global model, then
+    partitions the cells of each type equally over the available nodes.
+    If a GPU is available, and if the cell type can be run on the GPU, the
+    cells on each node are put one large group to maximise the amount of fine
+    grained parallelism in the cell group.
+    Otherwise, cells are grouped into small groups that fit in cache, and can be
+    distributed over the available cores.
+
+    .. Note::
+        The partitioning assumes that all cells of the same kind have equal
+        computational cost, hence it may not produce a balanced partition for
+        models with cells that have a large variance in computational costs.
diff --git a/doc/python/domdec.rst b/doc/python/domdec.rst
index 8b9e03e300da18182ffc6d07b9206b6b636d97f0..e2fbac2256136b9d13af8dcae13ce20344891f0a 100644
--- a/doc/python/domdec.rst
+++ b/doc/python/domdec.rst
@@ -12,7 +12,9 @@ Load balancers
 
 Load balancing generates a :class:`domain_decomposition` given an :class:`arbor.recipe`
 and a description of the hardware on which the model will run. Currently Arbor provides
-one load balancer, :func:`partition_load_balance`, and more will be added over time.
+one load balancer, :func:`partition_load_balance`; and a function for creating
+custom decompositions, :func:`partition_by_group`.
+More load balancers will be added over time.
 
 If the model is distributed with MPI, the partitioning algorithm for cells is
 distributed with MPI communication. The returned :class:`domain_decomposition`
@@ -38,6 +40,68 @@ describes the cell groups on the local MPI rank.
         computational cost, hence it may not produce a balanced partition for
         models with cells that have a large variance in computational costs.
 
+.. function:: partition_by_group(recipe, context, groups)
+
+   Construct a :class:`domain_decomposition` that assigns the groups described by
+   the provided list of :class:`group_description` to the local hardware of the calling rank.
+
+   The function expects to be called by each rank in a distributed simulation with the
+   selected groups for that rank. For example, in a simulation of 10 cells on 2 MPI ranks
+   where cells {0, 2, 4, 6, 8} of kind :class:`cable_cell` are expected to be in a single group executed
+   on the GPU on rank 0; and cells {1, 3, 5, 7, 9} of kind :class:`lif_cell` are expected to be in a single
+   group executed on the CPU on rank 1:
+
+   Rank 0 should run:
+
+   .. code-block:: python
+
+        import arbor
+
+        # Get a communication context (with 4 threads, and 1 GPU with id 0)
+        context = arbor.context(threads=4, gpu_id=0)
+
+        # Initialise a recipe of user defined type my_recipe with 10 cells.
+        n_cells = 10
+        recipe = my_recipe(n_cells)
+
+        groups = [arbor.group_description(arbor.cell_kind.cable, [0, 2, 4, 6, 8], arbor.backend.gpu)]
+        decomp = arbor.partition_by_group(recipe, context, groups)
+
+   And Rank 1 should run:
+
+   .. code-block:: python
+
+        import arbor
+
+        # Get a communication context (with 4 threads, and no GPU)
+        context = arbor.context(threads=4, gpu_id=-1)
+
+        # Initialise a recipe of user defined type my_recipe with 10 cells.
+        n_cells = 10
+        recipe = my_recipe(n_cells)
+
+        groups = [arbor.group_description(arbor.cell_kind.lif, [1, 3, 5, 7, 9], arbor.backend.multicore)]
+        decomp = arbor.partition_by_group(recipe, context, groups)
+
+   The function expects that cells connected by gap-junction are in the same group. An exception will be raised
+   if this is not the case.
+
+   The function doesn't perform any checks on the validity of the generated :class:`domain_decomposition`.
+   The validity is only checked when a :class:`simulation` object is constructed using that :class:`domain_decomposition`.
+
+   .. Note::
+        This function is intended for users who have a good understanding of the computational
+        cost of simulating the cells in their network and want fine-grained control over the
+        partitioning of cells across ranks. It is recommended to start off by using
+        :func:`partition_load_balance` and switch to this function if the observed performance
+        across ranks is unbalanced (for example, if the performance of the network is not scaling
+        well with the number of nodes.)
+
+   .. Note::
+        This function relies on the user to decide the size of the cell groups. It is therefore important
+        to keep in mind that smaller cell groups have better performance on the multicore backend and
+        larger cell groups have better performance on the GPU backend.
+
 .. class:: partition_hint
 
     Provide a hint on how the cell groups should be partitioned.
@@ -154,6 +218,10 @@ Therefore, the following data structures are used to describe domain decompositi
         The total number of cells in the global model
         (sum of :attr:`num_local_cells` over all domains).
 
+    .. attribute:: num_groups
+
+        The total number of cell groups on the local domain.
+
     .. attribute:: groups
 
         The descriptions of the cell groups on the local domain.
diff --git a/example/gap_junctions/gap_junctions.cpp b/example/gap_junctions/gap_junctions.cpp
index d1ea8f6fd87be141a20baa331a513bfe47c031cf..f61b835ea1d15337d0851a2e55da6e56a1f5572e 100644
--- a/example/gap_junctions/gap_junctions.cpp
+++ b/example/gap_junctions/gap_junctions.cpp
@@ -178,11 +178,11 @@ int main(int argc, char** argv) {
 
         auto sched = arb::regular_schedule(0.025);
         // This is where the voltage samples will be stored as (time, value) pairs
-        std::vector<arb::trace_vector<double>> voltage_traces(decomp.num_local_cells);
+        std::vector<arb::trace_vector<double>> voltage_traces(decomp.num_local_cells());
 
         // Now attach the sampler at probe_id, with sampling schedule sched, writing to voltage
         unsigned j=0;
-        for (auto g : decomp.groups) {
+        for (auto g : decomp.groups()) {
             for (auto i : g.gids) {
                 sim.add_sampler(arb::one_probe({i, 0}), sched, arb::make_simple_sampler(voltage_traces[j++]));
             }
diff --git a/python/domain_decomposition.cpp b/python/domain_decomposition.cpp
index a87710130b3f20a4fffbbe6b8ae400f564f8a3aa..1d7b5ea5e968f336fa8b9165ace6fa339d4c84ed 100644
--- a/python/domain_decomposition.cpp
+++ b/python/domain_decomposition.cpp
@@ -25,7 +25,7 @@ std::string gd_string(const arb::group_description& g) {
 std::string dd_string(const arb::domain_decomposition& d) {
     return util::pprintf(
         "<arbor.domain_decomposition: domain_id {}, num_domains {}, num_local_cells {}, num_global_cells {}, groups {}>",
-        d.domain_id, d.num_domains, d.num_local_cells, d.num_global_cells, d.groups.size());
+        d.domain_id(), d.num_domains(), d.num_local_cells(), d.num_global_cells(), d.num_groups());
 }
 
 std::string ph_string(const arb::partition_hint& h) {
@@ -80,23 +80,24 @@ void register_domain_decomposition(pybind11::module& m) {
     pybind11::class_<arb::domain_decomposition> domain_decomposition(m, "domain_decomposition",
         "The domain decomposition is responsible for describing the distribution of cells across cell groups and domains.");
     domain_decomposition
-        .def(pybind11::init<>())
         .def("gid_domain",
             [](const arb::domain_decomposition& d, arb::cell_gid_type gid) {
                 return d.gid_domain(gid);
             },
             "Query the domain id that a cell assigned to (using global identifier gid).",
             "gid"_a)
-        .def_readonly("num_domains", &arb::domain_decomposition::num_domains,
+        .def_property_readonly("num_domains", &arb::domain_decomposition::num_domains,
             "Number of domains that the model is distributed over.")
-        .def_readonly("domain_id", &arb::domain_decomposition::domain_id,
+        .def_property_readonly("domain_id", &arb::domain_decomposition::domain_id,
             "The index of the local domain.\n"
             "Always 0 for non-distributed models, and corresponds to the MPI rank for distributed runs.")
-        .def_readonly("num_local_cells", &arb::domain_decomposition::num_local_cells,
+        .def_property_readonly("num_local_cells", &arb::domain_decomposition::num_local_cells,
             "Total number of cells in the local domain.")
-        .def_readonly("num_global_cells", &arb::domain_decomposition::num_global_cells,
+        .def_property_readonly("num_global_cells", &arb::domain_decomposition::num_global_cells,
             "Total number of cells in the global model (sum of num_local_cells over all domains).")
-        .def_readonly("groups", &arb::domain_decomposition::groups,
+        .def_property_readonly("num_groups", &arb::domain_decomposition::num_groups,
+            "Total number of cell groups in the local domain.")
+        .def_property_readonly("groups", &arb::domain_decomposition::groups,
             "Descriptions of the cell groups on the local domain.")
         .def("__str__",  &dd_string)
         .def("__repr__", &dd_string);
@@ -117,6 +118,21 @@ void register_domain_decomposition(pybind11::module& m) {
         "over the distributed and local hardware resources described by context.\n"
         "Optionally, provide a dictionary of partition hints for certain cell kinds, by default empty.",
         "recipe"_a, "context"_a, "hints"_a=arb::partition_hint_map{});
+
+    m.def("partition_by_group",
+          [](std::shared_ptr<py_recipe>& recipe, const context_shim& ctx, const std::vector<arb::group_description>& groups) {
+              try {
+                  return arb::domain_decomposition(py_recipe_shim(recipe), ctx.context, groups);
+              }
+              catch (...) {
+                  py_reset_and_throw();
+                  throw;
+              }
+          },
+          "Construct a domain_decomposition that assigned the groups of cell provided as argument \n"
+          "to the local hardware resources described by context on the calling rank.\n"
+          "The cell_groups are guaranteed to be present on the calling rank.",
+          "recipe"_a, "context"_a, "groups"_a);
 }
 
 } // namespace pyarb
diff --git a/test/unit-distributed/test_communicator.cpp b/test/unit-distributed/test_communicator.cpp
index e71590ba69eb127c1dac5914c00079db85937f9c..eeefa66789ae543844d3b9392ec7d1e7f6880e65 100644
--- a/test/unit-distributed/test_communicator.cpp
+++ b/test/unit-distributed/test_communicator.cpp
@@ -23,7 +23,7 @@ using namespace arb;
 TEST(communicator, policy_basics) {
 
     const int num_domains = g_context->distributed->size();
-    const int rank = g_context->distributed->id();;
+    const int rank = g_context->distributed->id();
 
     EXPECT_EQ((int)arb::num_ranks(g_context), num_domains);
     EXPECT_EQ((int)arb::rank(g_context), rank);
@@ -314,8 +314,8 @@ namespace {
     // make a list of the gids on the local domain
     std::vector<cell_gid_type> get_gids(const domain_decomposition& D) {
         std::vector<cell_gid_type> gids;
-        for (auto i: util::make_span(0, D.groups.size())) {
-            util::append(gids, D.groups[i].gids);
+        for (auto i: util::make_span(0, D.num_groups())) {
+            util::append(gids, D.group(i).gids);
         }
         return gids;
     }
@@ -324,8 +324,8 @@ namespace {
     std::unordered_map<cell_gid_type, cell_gid_type>
     get_group_map(const domain_decomposition& D) {
         std::unordered_map<cell_gid_type, cell_gid_type> map;
-        for (auto i: util::make_span(0, D.groups.size())) {
-            for (auto gid: D.groups[i].gids) {
+        for (auto i: util::make_span(0, D.num_groups())) {
+            for (auto gid: D.group(i).gids) {
                 map[gid] = i;
             }
         }
@@ -465,9 +465,9 @@ test_ring(const domain_decomposition& D, communicator& C, F&& f) {
     // search for the expected event.
     int expected_count = 0;
     for (auto gid: gids) {
-        auto src = source_of(gid, D.num_global_cells);
+        auto src = source_of(gid, D.num_global_cells());
         if (f(src)) {
-            auto expected = expected_event_ring(gid, D.num_global_cells);
+            auto expected = expected_event_ring(gid, D.num_global_cells());
             auto grp = group_map[gid];
             auto& q = queues[grp];
             if (std::find(q.begin(), q.end(), expected)==q.end()) {
@@ -511,7 +511,7 @@ TEST(communicator, ring)
     // set up source and target label->lid resolvers
     // from mc_cell_group and lif_cell_group
     std::vector<cell_gid_type> mc_gids, lif_gids;
-    for (auto g: D.groups) {
+    for (auto g: D.groups()) {
         if (g.kind == cell_kind::cable) {
             mc_gids.insert(mc_gids.end(), g.gids.begin(), g.gids.end());
         }
@@ -560,7 +560,7 @@ test_all2all(const domain_decomposition& D, communicator& C, F&& f) {
     std::reverse(local_spikes.begin(), local_spikes.end());
 
     std::vector<cell_gid_type> spike_gids = assign_from(
-        filter(make_span(0, D.num_global_cells), f));
+        filter(make_span(0, D.num_global_cells()), f));
 
     // gather the global set of spikes
     auto global_spikes = C.exchange(local_spikes);
@@ -573,7 +573,7 @@ test_all2all(const domain_decomposition& D, communicator& C, F&& f) {
     // generate the events
     std::vector<arb::pse_vector> queues(C.num_local_cells());
     C.make_event_queues(global_spikes, queues);
-    if (queues.size() != D.groups.size()) { // one queue for each cell group
+    if (queues.size() != D.num_groups()) { // one queue for each cell group
         return ::testing::AssertionFailure()
             << "expect one event queue for each cell group";
     }
@@ -630,7 +630,7 @@ TEST(communicator, all2all)
     // set up source and target label->lid resolvers
     // from mc_cell_group
     std::vector<cell_gid_type> mc_gids;
-    for (auto g: D.groups) {
+    for (auto g: D.groups()) {
         mc_gids.insert(mc_gids.end(), g.gids.begin(), g.gids.end());
     }
     cell_label_range local_sources, local_targets;
@@ -676,7 +676,7 @@ TEST(communicator, mini_network)
     // set up source and target label->lid resolvers
     // from mc_cell_group
     std::vector<cell_gid_type> gids;
-    for (auto g: D.groups) {
+    for (auto g: D.groups()) {
         gids.insert(gids.end(), g.gids.begin(), g.gids.end());
     }
     cell_label_range local_sources, local_targets;
diff --git a/test/unit-distributed/test_domain_decomposition.cpp b/test/unit-distributed/test_domain_decomposition.cpp
index eb539ea54123ba35141f09dcfffb04d65f60f60b..752e00b941023136313c0d0d20a91d57e8d3d0f0 100644
--- a/test/unit-distributed/test_domain_decomposition.cpp
+++ b/test/unit-distributed/test_domain_decomposition.cpp
@@ -8,10 +8,13 @@
 #include <vector>
 
 #include <arbor/context.hpp>
+#include <arbor/domdecexcept.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/version.hpp>
 
+#include <arborenv/default_env.hpp>
+
 #include "util/span.hpp"
 
 #include "../simple_recipes.hpp"
@@ -70,6 +73,10 @@ namespace {
             ncopies_(num_ranks),
             fully_connected_(fully_connected) {}
 
+        cell_size_type num_cells_per_rank() const {
+            return size_;
+        }
+
         cell_size_type num_cells() const override {
             return size_*ncopies_;
         }
@@ -236,11 +243,13 @@ TEST(domain_decomposition, homogeneous_population_mc) {
     // 10 cells per domain
     unsigned n_local = 10;
     unsigned n_global = n_local*N;
-    const auto D = partition_load_balance(homo_recipe(n_global, dummy_cell{}), ctx);
 
-    EXPECT_EQ(D.num_global_cells, n_global);
-    EXPECT_EQ(D.num_local_cells, n_local);
-    EXPECT_EQ(D.groups.size(), n_local);
+    auto rec = homo_recipe(n_global, dummy_cell{});
+    const auto D = partition_load_balance(rec, ctx);
+
+    EXPECT_EQ(D.num_global_cells(), (unsigned)n_global);
+    EXPECT_EQ(D.num_local_cells(), n_local);
+    EXPECT_EQ(D.num_groups(), n_local);
 
     auto b = I*n_local;
     auto e = (I+1)*n_local;
@@ -253,7 +262,7 @@ TEST(domain_decomposition, homogeneous_population_mc) {
     // Each group should also be tagged for cpu execution
     for (auto i: gids) {
         auto local_group = i-b;
-        auto& grp = D.groups[local_group];
+        auto& grp = D.group(local_group);
         EXPECT_EQ(grp.gids.size(), 1u);
         EXPECT_EQ(grp.gids.front(), unsigned(i));
         EXPECT_EQ(grp.backend, backend_kind::multicore);
@@ -287,11 +296,12 @@ TEST(domain_decomposition, homogeneous_population_gpu) {
     // 10 cells per domain
     unsigned n_local = 10;
     unsigned n_global = n_local*N;
-    const auto D = partition_load_balance(homo_recipe(n_global, dummy_cell{}), ctx);
+    auto rec = homo_recipe(n_global, dummy_cell{});
+    const auto D = partition_load_balance(rec, ctx);
 
-    EXPECT_EQ(D.num_global_cells, n_global);
-    EXPECT_EQ(D.num_local_cells, n_local);
-    EXPECT_EQ(D.groups.size(), 1u);
+    EXPECT_EQ(D.num_global_cells(), n_global);
+    EXPECT_EQ(D.num_local_cells(), n_local);
+    EXPECT_EQ(D.num_groups(), 1u);
 
     auto b = I*n_local;
     auto e = (I+1)*n_local;
@@ -302,7 +312,7 @@ TEST(domain_decomposition, homogeneous_population_gpu) {
 
     // Each cell group contains 1 cell of kind cable
     // Each group should also be tagged for cpu execution
-    auto grp = D.groups[0u];
+    auto grp = D.group(0u);
 
     EXPECT_EQ(grp.gids.size(), n_local);
     EXPECT_EQ(grp.gids.front(), b);
@@ -335,9 +345,9 @@ TEST(domain_decomposition, heterogeneous_population) {
     auto R = hetero_recipe(n_global);
     const auto D = partition_load_balance(R, ctx);
 
-    EXPECT_EQ(D.num_global_cells, n_global);
-    EXPECT_EQ(D.num_local_cells, n_local);
-    EXPECT_EQ(D.groups.size(), n_local);
+    EXPECT_EQ(D.num_global_cells(), n_global);
+    EXPECT_EQ(D.num_local_cells(), n_local);
+    EXPECT_EQ(D.num_groups(), n_local);
 
     auto b = I*n_local;
     auto e = (I+1)*n_local;
@@ -351,7 +361,7 @@ TEST(domain_decomposition, heterogeneous_population) {
     auto grps = util::make_span(0, n_local_grps);
     std::map<cell_kind, std::set<cell_gid_type>> kind_lists;
     for (auto i: grps) {
-        auto& grp = D.groups[i];
+        auto& grp = D.group(i);
         EXPECT_EQ(grp.gids.size(), 1u);
         kind_lists[grp.kind].insert(grp.gids.front());
         EXPECT_EQ(grp.backend, backend_kind::multicore);
@@ -380,7 +390,7 @@ TEST(domain_decomposition, symmetric_groups) {
     std::vector<gj_symmetric> recipes = {gj_symmetric(nranks, true), gj_symmetric(nranks, false)};
     for (const auto& R: recipes) {
         const auto D0 = partition_load_balance(R, ctx);
-        EXPECT_EQ(6u, D0.groups.size());
+        EXPECT_EQ(6u, D0.num_groups());
 
         unsigned shift = rank * R.num_cells()/nranks;
         std::vector<std::vector<cell_gid_type>> expected_groups0 =
@@ -393,7 +403,7 @@ TEST(domain_decomposition, symmetric_groups) {
                 };
 
         for (unsigned i = 0; i < 6; i++) {
-            EXPECT_EQ(expected_groups0[i], D0.groups[i].gids);
+            EXPECT_EQ(expected_groups0[i], D0.group(i).gids);
         }
 
         unsigned cells_per_rank = R.num_cells()/nranks;
@@ -407,13 +417,13 @@ TEST(domain_decomposition, symmetric_groups) {
         hints[cell_kind::cable].prefer_gpu = false;
 
         const auto D1 = partition_load_balance(R, ctx, hints);
-        EXPECT_EQ(1u, D1.groups.size());
+        EXPECT_EQ(1u, D1.num_groups());
 
         std::vector<cell_gid_type> expected_groups1 =
                 {0 + shift, 3 + shift, 4 + shift, 5 + shift, 8 + shift,
                  1 + shift, 2 + shift, 6 + shift, 7 + shift, 9 + shift};
 
-        EXPECT_EQ(expected_groups1, D1.groups[0].gids);
+        EXPECT_EQ(expected_groups1, D1.group(0).gids);
 
         for (unsigned i = 0; i < R.num_cells(); i++) {
             EXPECT_EQ(i/cells_per_rank, (unsigned) D1.gid_domain(i));
@@ -423,14 +433,14 @@ TEST(domain_decomposition, symmetric_groups) {
         hints[cell_kind::cable].prefer_gpu = false;
 
         const auto D2 = partition_load_balance(R, ctx, hints);
-        EXPECT_EQ(2u, D2.groups.size());
+        EXPECT_EQ(2u, D2.num_groups());
 
         std::vector<std::vector<cell_gid_type>> expected_groups2 =
                 {{0 + shift, 3 + shift, 4 + shift, 5 + shift, 8 + shift},
                  {1 + shift, 2 + shift, 6 + shift, 7 + shift, 9 + shift}};
 
         for (unsigned i = 0; i < 2u; i++) {
-            EXPECT_EQ(expected_groups2[i], D2.groups[i].gids);
+            EXPECT_EQ(expected_groups2[i], D2.group(i).gids);
         }
         for (unsigned i = 0; i < R.num_cells(); i++) {
             EXPECT_EQ(i/cells_per_rank, (unsigned) D2.gid_domain(i));
@@ -461,10 +471,10 @@ TEST(domain_decomposition, gj_multi_distributed_groups) {
                 continue;
             } else if (gid % nranks == (unsigned) rank && rank != nranks - 1) {
                 std::vector<cell_gid_type> cg = {gid, gid + cells_per_rank};
-                EXPECT_EQ(cg, D.groups[D.groups.size() - 1].gids);
+                EXPECT_EQ(cg, D.group(D.num_groups() - 1).gids);
             } else {
                 std::vector<cell_gid_type> cg = {gid};
-                EXPECT_EQ(cg, D.groups[i++].gids);
+                EXPECT_EQ(cg, D.group(i++).gids);
             }
         }
         // check gid_domains
@@ -500,20 +510,20 @@ TEST(domain_decomposition, gj_single_distributed_group) {
     unsigned cells_per_rank = nranks;
     // check groups
     unsigned i = 0;
-    for (unsigned gid = rank * cells_per_rank; gid < (rank + 1) * cells_per_rank; gid++) {
+    for (unsigned gid = rank*cells_per_rank; gid < (rank + 1)*cells_per_rank; gid++) {
         if (gid%nranks == 1) {
             if (rank == 0) {
                 std::vector<cell_gid_type> cg;
                 for (int r = 0; r < nranks; ++r) {
-                    cg.push_back(gid + (r * nranks));
+                    cg.push_back(gid + (r*nranks));
                 }
-                EXPECT_EQ(cg, D.groups.back().gids);
+                EXPECT_EQ(cg, D.groups().back().gids);
             } else {
                 continue;
             }
         } else {
             std::vector<cell_gid_type> cg = {gid};
-            EXPECT_EQ(cg, D.groups[i++].gids);
+            EXPECT_EQ(cg, D.group(i++).gids);
         }
     }
     // check gid_domains
@@ -526,4 +536,120 @@ TEST(domain_decomposition, gj_single_distributed_group) {
             EXPECT_EQ(group, (unsigned) D.gid_domain(gid));
         }
     }
-}
\ No newline at end of file
+}
+
+TEST(domain_decomposition, partition_by_group)
+{
+    proc_allocation resources{1,  arbenv::default_gpu()};
+    int nranks = 1;
+    int rank = 0;
+#ifdef TEST_MPI
+    auto ctx = make_context(resources, MPI_COMM_WORLD);
+    MPI_Comm_size(MPI_COMM_WORLD, &nranks);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+#else
+    auto ctx = make_context(resources);
+#endif
+
+    {
+        const unsigned cells_per_rank = 10;
+        auto rec = homo_recipe(cells_per_rank*nranks, dummy_cell{});
+        std::vector<cell_gid_type> gids;
+        for (unsigned i = 0; i < cells_per_rank; ++i) {
+            gids.push_back(rank*cells_per_rank + i);
+        }
+#ifdef ARB_GPU_ENABLED
+        auto d = domain_decomposition(rec, ctx, {{cell_kind::cable, gids, backend_kind::gpu}});
+#else
+        auto d = domain_decomposition(rec, ctx, {{cell_kind::cable, gids, backend_kind::multicore}});
+#endif
+
+        EXPECT_EQ(nranks, d.num_domains());
+        EXPECT_EQ(rank, d.domain_id());
+        EXPECT_EQ(cells_per_rank, d.num_local_cells());
+        EXPECT_EQ(cells_per_rank*nranks, d.num_global_cells());
+        EXPECT_EQ(1u, d.num_groups());
+        EXPECT_EQ(gids, d.group(0).gids);
+        EXPECT_EQ(cell_kind::cable, d.group(0).kind);
+#ifdef ARB_GPU_ENABLED
+        EXPECT_EQ(backend_kind::gpu, d.group(0).backend);
+#else
+        EXPECT_EQ(backend_kind::multicore, d.group(0).backend);
+#endif
+        for (unsigned i = 0; i < cells_per_rank*nranks; ++i) {
+            EXPECT_EQ((int)(i/cells_per_rank), d.gid_domain(i));
+        }
+    }
+    {
+        auto rec = gj_symmetric(nranks, true);
+        const unsigned cells_per_rank = rec.num_cells_per_rank();
+        const unsigned shift = cells_per_rank*rank;
+
+        std::vector<cell_gid_type> gids0 = {1+shift, 2+shift, 6+shift, 7+shift, 9+shift};
+        std::vector<cell_gid_type> gids1 = {3+shift};
+        std::vector<cell_gid_type> gids2 = {0+shift, 4+shift, 5+shift, 8+shift};
+
+        group_description g0 = {cell_kind::lif,   gids0, backend_kind::multicore};
+        group_description g1 = {cell_kind::cable, gids1, backend_kind::multicore};
+#ifdef ARB_GPU_ENABLED
+        group_description g2 = {cell_kind::cable, gids2, backend_kind::gpu};
+#else
+        group_description g2 = {cell_kind::cable, gids2, backend_kind::multicore};
+#endif
+        auto d = domain_decomposition(rec, ctx, {g0, g1, g2});
+
+        EXPECT_EQ(nranks, d.num_domains());
+        EXPECT_EQ(rank, d.domain_id());
+        EXPECT_EQ(cells_per_rank, d.num_local_cells());
+        EXPECT_EQ(cells_per_rank*nranks, d.num_global_cells());
+        EXPECT_EQ(3u, d.num_groups());
+
+        EXPECT_EQ(gids0, d.group(0).gids);
+        EXPECT_EQ(gids1, d.group(1).gids);
+        EXPECT_EQ(gids2, d.group(2).gids);
+
+        EXPECT_EQ(cell_kind::lif,   d.group(0).kind);
+        EXPECT_EQ(cell_kind::cable, d.group(1).kind);
+        EXPECT_EQ(cell_kind::cable, d.group(2).kind);
+
+        EXPECT_EQ(backend_kind::multicore, d.group(0).backend);
+        EXPECT_EQ(backend_kind::multicore, d.group(1).backend);
+#ifdef ARB_GPU_ENABLED
+        EXPECT_EQ(backend_kind::gpu, d.group(2).backend);
+#else
+        EXPECT_EQ(backend_kind::multicore, d.group(2).backend);
+#endif
+        for (unsigned i = 0; i < cells_per_rank*nranks; ++i) {
+            EXPECT_EQ((int)(i/cells_per_rank), d.gid_domain(i));
+        }
+    }
+}
+
+TEST(domain_decomposition, invalid)
+{
+    proc_allocation resources{1, -1};
+    int nranks = 1;
+    int rank = 0;
+#ifdef TEST_MPI
+    auto ctx = make_context(resources, MPI_COMM_WORLD);
+    MPI_Comm_size(MPI_COMM_WORLD, &nranks);
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+#else
+    auto ctx = make_context(resources);
+#endif
+
+    {
+        auto rec = homo_recipe(10*nranks, dummy_cell{});
+        std::vector<group_description> groups =
+                {{cell_kind::cable, {0, 1, 2, 3, 4, 5, 6, 7, 8, (unsigned)nranks*10}, backend_kind::multicore}};
+        EXPECT_THROW(domain_decomposition(rec, ctx, groups), out_of_bounds);
+
+        std::vector<cell_gid_type> gids;
+        for (unsigned i = 0; i < 10; ++i) {
+            gids.push_back(rank*10 + i);
+        }
+        if (rank == 0) gids.back() = 0;
+        groups = {{cell_kind::cable, gids, backend_kind::multicore}};
+        EXPECT_THROW(domain_decomposition(rec, ctx, groups), duplicate_gid);
+    }
+}
diff --git a/test/unit/test_domain_decomposition.cpp b/test/unit/test_domain_decomposition.cpp
index cae46a3cb0c7f60764dbc3948accd0516e21cf1d..0db031e1c621b7c923ccb6977a218c7ca905ea9e 100644
--- a/test/unit/test_domain_decomposition.cpp
+++ b/test/unit/test_domain_decomposition.cpp
@@ -3,6 +3,7 @@
 #include <stdexcept>
 
 #include <arbor/context.hpp>
+#include <arbor/domdecexcept.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
 
@@ -158,11 +159,12 @@ TEST(domain_decomposition, homogenous_population)
         auto ctx = make_context(resources);
 
         unsigned num_cells = 10;
-        const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), ctx);
+        auto rec = homo_recipe(num_cells, dummy_cell{});
+        const auto D = partition_load_balance(rec, ctx);
 
-        EXPECT_EQ(D.num_global_cells, num_cells);
-        EXPECT_EQ(D.num_local_cells, num_cells);
-        EXPECT_EQ(D.groups.size(), 1u);
+        EXPECT_EQ(D.num_global_cells(), num_cells);
+        EXPECT_EQ(D.num_local_cells(), num_cells);
+        EXPECT_EQ(D.num_groups(), 1u);
 
         auto gids = make_span(num_cells);
         for (auto gid: gids) {
@@ -171,7 +173,7 @@ TEST(domain_decomposition, homogenous_population)
 
         // Each cell group contains 1 cell of kind cable
         // Each group should also be tagged for cpu execution
-        auto grp = D.groups[0u];
+        auto grp = D.group(0u);
 
         EXPECT_EQ(grp.gids.size(), num_cells);
         EXPECT_EQ(grp.gids.front(), 0u);
@@ -189,11 +191,12 @@ TEST(domain_decomposition, homogenous_population)
         // the test.
 
         unsigned num_cells = 10;
-        const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), ctx);
+        auto rec = homo_recipe(num_cells, dummy_cell{});
+        const auto D = partition_load_balance(rec, ctx);
 
-        EXPECT_EQ(D.num_global_cells, num_cells);
-        EXPECT_EQ(D.num_local_cells, num_cells);
-        EXPECT_EQ(D.groups.size(), num_cells);
+        EXPECT_EQ(D.num_global_cells(), num_cells);
+        EXPECT_EQ(D.num_local_cells(), num_cells);
+        EXPECT_EQ(D.num_groups(), num_cells);
 
         auto gids = make_span(num_cells);
         for (auto gid: gids) {
@@ -203,7 +206,7 @@ TEST(domain_decomposition, homogenous_population)
         // Each cell group contains 1 cell of kind cable
         // Each group should also be tagged for cpu execution
         for (auto i: gids) {
-            auto& grp = D.groups[i];
+            auto& grp = D.group(i);
             EXPECT_EQ(grp.gids.size(), 1u);
             EXPECT_EQ(grp.gids.front(), unsigned(i));
             EXPECT_EQ(grp.backend, backend_kind::multicore);
@@ -227,17 +230,17 @@ TEST(domain_decomposition, heterogenous_population)
         auto R = hetero_recipe(num_cells);
         const auto D = partition_load_balance(R, ctx);
 
-        EXPECT_EQ(D.num_global_cells, num_cells);
-        EXPECT_EQ(D.num_local_cells, num_cells);
+        EXPECT_EQ(D.num_global_cells(), num_cells);
+        EXPECT_EQ(D.num_local_cells(), num_cells);
         // one cell group with num_cells/2 on gpu, and num_cells/2 groups on cpu
         auto expected_groups = num_cells/2+1;
-        EXPECT_EQ(D.groups.size(), expected_groups);
+        EXPECT_EQ(D.num_groups(), expected_groups);
 
         auto grps = make_span(expected_groups);
         unsigned ncells = 0;
         // iterate over each group and test its properties
         for (auto i: grps) {
-            auto& grp = D.groups[i];
+            auto& grp = D.group(i);
             auto k = grp.kind;
             if (k==cell_kind::cable) {
                 EXPECT_EQ(grp.backend, backend_kind::gpu);
@@ -269,9 +272,9 @@ TEST(domain_decomposition, heterogenous_population)
         auto R = hetero_recipe(num_cells);
         const auto D = partition_load_balance(R, ctx);
 
-        EXPECT_EQ(D.num_global_cells, num_cells);
-        EXPECT_EQ(D.num_local_cells, num_cells);
-        EXPECT_EQ(D.groups.size(), num_cells);
+        EXPECT_EQ(D.num_global_cells(), num_cells);
+        EXPECT_EQ(D.num_local_cells(), num_cells);
+        EXPECT_EQ(D.num_groups(), num_cells);
 
         auto gids = make_span(num_cells);
         for (auto gid: gids) {
@@ -283,7 +286,7 @@ TEST(domain_decomposition, heterogenous_population)
         auto grps = make_span(num_cells);
         std::map<cell_kind, std::set<cell_gid_type>> kind_lists;
         for (auto i: grps) {
-            auto& grp = D.groups[i];
+            auto& grp = D.group(i);
             EXPECT_EQ(grp.gids.size(), 1u);
             auto k = grp.kind;
             kind_lists[k].insert(grp.gids.front());
@@ -314,10 +317,8 @@ TEST(domain_decomposition, hints) {
     hints[cell_kind::cable].prefer_gpu = false;
     hints[cell_kind::spike_source].cpu_group_size = 4;
 
-    domain_decomposition D = partition_load_balance(
-        hetero_recipe(20),
-        ctx,
-        hints);
+    auto rec = hetero_recipe(20);
+    domain_decomposition D = partition_load_balance(rec, ctx, hints);
 
     std::vector<std::vector<cell_gid_type>> expected_c1d_groups =
         {{0, 2, 4}, {6, 8, 10}, {12, 14, 16}, {18}};
@@ -327,7 +328,7 @@ TEST(domain_decomposition, hints) {
 
     std::vector<std::vector<cell_gid_type>> c1d_groups, ss_groups;
 
-    for (auto& g: D.groups) {
+    for (auto& g: D.groups()) {
         EXPECT_TRUE(g.kind==cell_kind::cable || g.kind==cell_kind::spike_source);
 
         if (g.kind==cell_kind::cable) {
@@ -345,19 +346,19 @@ TEST(domain_decomposition, hints) {
 TEST(domain_decomposition, gj_recipe) {
     proc_allocation resources;
     resources.num_threads = 1;
-    resources.gpu_id = -1; // disable GPU if available
+    resources.gpu_id = -1;
     auto ctx = make_context(resources);
 
     auto recipes = {gap_recipe(false), gap_recipe(true)};
     for (const auto& R: recipes) {
         const auto D0 = partition_load_balance(R, ctx);
-        EXPECT_EQ(9u, D0.groups.size());
+        EXPECT_EQ(9u, D0.num_groups());
 
         std::vector<std::vector<cell_gid_type>> expected_groups0 =
             {{1}, {5}, {6}, {10}, {12}, {14}, {0, 13}, {2, 7, 11}, {3, 4, 8, 9}};
 
         for (unsigned i = 0; i < 9u; i++) {
-            EXPECT_EQ(expected_groups0[i], D0.groups[i].gids);
+            EXPECT_EQ(expected_groups0[i], D0.group(i).gids);
         }
 
         // Test different group_hints
@@ -366,25 +367,25 @@ TEST(domain_decomposition, gj_recipe) {
         hints[cell_kind::cable].prefer_gpu = false;
 
         const auto D1 = partition_load_balance(R, ctx, hints);
-        EXPECT_EQ(5u, D1.groups.size());
+        EXPECT_EQ(5u, D1.num_groups());
 
          std::vector<std::vector<cell_gid_type>> expected_groups1 =
             {{1, 5, 6}, {10, 12, 14}, {0, 13}, {2, 7, 11}, {3, 4, 8, 9}};
 
         for (unsigned i = 0; i < 5u; i++) {
-            EXPECT_EQ(expected_groups1[i], D1.groups[i].gids);
+            EXPECT_EQ(expected_groups1[i], D1.group(i).gids);
         }
 
         hints[cell_kind::cable].cpu_group_size = 20;
         hints[cell_kind::cable].prefer_gpu = false;
 
         const auto D2 = partition_load_balance(R, ctx, hints);
-        EXPECT_EQ(1u, D2.groups.size());
+        EXPECT_EQ(1u, D2.num_groups());
 
         std::vector<cell_gid_type> expected_groups2 =
             {1, 5, 6, 10, 12, 14, 0, 13, 2, 7, 11, 3, 4, 8, 9};
 
-        EXPECT_EQ(expected_groups2, D2.groups[0].gids);
+        EXPECT_EQ(expected_groups2, D2.group(0).gids);
     }
 }
 
@@ -408,8 +409,8 @@ TEST(domain_decomposition, unidirectional_gj_recipe) {
         const auto D = partition_load_balance(R, ctx);
         std::vector<cell_gid_type> expected_group = {0, 1, 2, 3, 4, 5, 6};
 
-        EXPECT_EQ(1u, D.groups.size());
-        EXPECT_EQ(expected_group, D.groups[0].gids);
+        EXPECT_EQ(1u, D.num_groups());
+        EXPECT_EQ(expected_group, D.group(0).gids);
     }
     {
         std::vector<std::vector<gap_junction_connection>> gj_conns =
@@ -429,9 +430,9 @@ TEST(domain_decomposition, unidirectional_gj_recipe) {
         const auto D = partition_load_balance(R, ctx);
         std::vector<std::vector<cell_gid_type>> expected_groups = {{6}, {0, 1, 2, 3}, {4, 5}, {7, 8, 9}};
 
-        EXPECT_EQ(expected_groups.size(), D.groups.size());
+        EXPECT_EQ(expected_groups.size(), D.num_groups());
         for (unsigned i=0; i < expected_groups.size(); ++i) {
-            EXPECT_EQ(expected_groups[i], D.groups[i].gids);
+            EXPECT_EQ(expected_groups[i], D.group(i).gids);
         }
     }
     {
@@ -452,9 +453,122 @@ TEST(domain_decomposition, unidirectional_gj_recipe) {
         const auto D = partition_load_balance(R, ctx);
         std::vector<std::vector<cell_gid_type>> expected_groups = {{1}, {2}, {9}, {0, 8}, {3, 4, 5, 6, 7}};
 
-        EXPECT_EQ(expected_groups.size(), D.groups.size());
+        EXPECT_EQ(expected_groups.size(), D.num_groups());
         for (unsigned i=0; i < expected_groups.size(); ++i) {
-            EXPECT_EQ(expected_groups[i], D.groups[i].gids);
+            EXPECT_EQ(expected_groups[i], D.group(i).gids);
         }
     }
 }
+
+TEST(domain_decomposition, partition_by_groups) {
+    proc_allocation resources;
+    resources.num_threads = 1;
+    resources.gpu_id = arbenv::default_gpu();
+    auto ctx = make_context(resources);
+
+    {
+        const unsigned ncells = 10;
+        auto rec = homo_recipe(ncells, dummy_cell{});
+        std::vector<cell_gid_type> gids(ncells);
+        std::iota(gids.begin(), gids.end(), 0);
+
+#ifdef ARB_GPU_ENABLED
+        auto d = domain_decomposition(rec, ctx, {{cell_kind::cable, gids, backend_kind::gpu}});
+#else
+        auto d = domain_decomposition(rec, ctx, {{cell_kind::cable, gids, backend_kind::multicore}});
+#endif
+
+        EXPECT_EQ(1, d.num_domains());
+        EXPECT_EQ(0, d.domain_id());
+        EXPECT_EQ(ncells, d.num_local_cells());
+        EXPECT_EQ(ncells, d.num_global_cells());
+        EXPECT_EQ(1u,     d.num_groups());
+        EXPECT_EQ(gids,   d.group(0).gids);
+        EXPECT_EQ(cell_kind::cable, d.group(0).kind);
+#ifdef ARB_GPU_ENABLED
+        EXPECT_EQ(backend_kind::gpu, d.group(0).backend);
+#else
+        EXPECT_EQ(backend_kind::multicore, d.group(0).backend);
+#endif
+        for (unsigned i = 0; i < ncells; ++i) {
+            EXPECT_EQ(0, d.gid_domain(i));
+        }
+    }
+    {
+        const unsigned ncells = 10;
+        auto rec = homo_recipe(ncells, dummy_cell{});
+        std::vector<group_description> groups;
+        for (unsigned i = 0; i < ncells; ++i) {
+            groups.push_back({cell_kind::cable, {i}, backend_kind::multicore});
+        }
+        auto d = domain_decomposition(rec, ctx, groups);
+
+        EXPECT_EQ(1, d.num_domains());
+        EXPECT_EQ(0, d.domain_id());
+        EXPECT_EQ(ncells, d.num_local_cells());
+        EXPECT_EQ(ncells, d.num_global_cells());
+        EXPECT_EQ(ncells, d.num_groups());
+        for (unsigned i = 0; i < ncells; ++i) {
+            EXPECT_EQ(std::vector<cell_gid_type>{i}, d.group(i).gids);
+            EXPECT_EQ(cell_kind::cable, d.group(i).kind);
+            EXPECT_EQ(backend_kind::multicore, d.group(i).backend);
+        }
+        for (unsigned i = 0; i < ncells; ++i) {
+            EXPECT_EQ(0, d.gid_domain(i));
+        }
+    }
+    {
+        auto rec = gap_recipe(true);
+        std::vector<cell_gid_type> gids(rec.num_cells());
+        std::iota(gids.begin(), gids.end(), 0);
+        EXPECT_NO_THROW(domain_decomposition(rec, ctx,  {{cell_kind::cable, gids, backend_kind::multicore}}));
+
+        EXPECT_NO_THROW(domain_decomposition(rec, ctx, {{cell_kind::cable, {0, 13}, backend_kind::multicore},
+                                                        {cell_kind::cable, {2, 7, 11}, backend_kind::multicore},
+                                                        {cell_kind::cable, {1, 3, 4, 5, 6, 8, 9, 10, 12, 14}, backend_kind::multicore}}));
+    }
+}
+
+TEST(domain_decomposition, invalid) {
+    proc_allocation resources;
+    resources.num_threads = 1;
+    resources.gpu_id = -1; // disable GPU if available
+    auto ctx = make_context(resources);
+
+    {
+        auto rec = homo_recipe(10, dummy_cell{});
+
+        std::vector<group_description> groups =
+                {{cell_kind::cable, {0, 1, 2, 3, 4, 5, 6, 7, 8, 10}, backend_kind::multicore}};
+        EXPECT_THROW(domain_decomposition(rec, ctx, groups), out_of_bounds);
+
+        groups = {{cell_kind::cable, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, backend_kind::gpu}};
+#ifdef ARB_GPU_ENABLED
+        EXPECT_NO_THROW(domain_decomposition(rec, ctx, groups));
+
+        groups = {{cell_kind::lif, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, backend_kind::gpu}};
+        EXPECT_THROW(domain_decomposition(rec, ctx, groups), incompatible_backend);
+#else
+        EXPECT_THROW(domain_decomposition(rec, ctx, groups), invalid_backend);
+#endif
+
+        groups = {{cell_kind::cable, {0, 1, 2, 3, 4, 5, 6, 7, 8, 8}, backend_kind::multicore}};
+        EXPECT_THROW(domain_decomposition(rec, ctx, groups), duplicate_gid);
+    }
+    {
+        auto rec = gap_recipe(true);
+        std::vector<group_description> groups =
+                {{cell_kind::cable, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}, backend_kind::multicore}};
+        EXPECT_NO_THROW(domain_decomposition(rec, ctx, groups));
+
+        groups = {{cell_kind::cable, {0, 13}, backend_kind::multicore},
+                  {cell_kind::cable, {2, 7, 11}, backend_kind::multicore},
+                  {cell_kind::cable, {1, 3, 4, 5, 6, 8, 9, 10, 12, 14}, backend_kind::multicore}};
+        EXPECT_NO_THROW(domain_decomposition(rec, ctx, groups));
+
+        groups = {{cell_kind::cable, {0}, backend_kind::multicore},
+                  {cell_kind::cable, {2, 7, 11, 13}, backend_kind::multicore},
+                  {cell_kind::cable, {1, 3, 4, 5, 6, 8, 9, 10, 12, 14}, backend_kind::multicore}};
+        EXPECT_THROW(domain_decomposition(rec, ctx, groups), invalid_gj_cell_group);
+    }
+}
\ No newline at end of file
diff --git a/test/unit/test_event_delivery.cpp b/test/unit/test_event_delivery.cpp
index caca91e3a79a5a3da9c25202dd0bc9e5bfdae233..1294240ce8f8d285684d90e814ca824115d88b5c 100644
--- a/test/unit/test_event_delivery.cpp
+++ b/test/unit/test_event_delivery.cpp
@@ -9,6 +9,7 @@
 #include <arbor/cable_cell.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/morph/segment_tree.hpp>
 #include <arbor/simulation.hpp>
 #include <arbor/spike.hpp>
@@ -51,17 +52,11 @@ std::vector<cell_gid_type> run_test_sim(const recipe& R, const group_gids_type&
     arb::context ctx = make_context(proc_allocation{});
     unsigned n = R.num_cells();
 
-    domain_decomposition D;
-    D.gid_domain = [](cell_gid_type) { return 0; };
-    D.num_domains = 1;
-    D.num_local_cells = n;
-    D.num_global_cells = n;
-
+    std::vector<group_description> groups;
     for (const auto& gidvec: group_gids) {
-        group_description group{cell_kind::cable, gidvec, backend_kind::multicore};
-        D.groups.push_back(group);
+        groups.emplace_back(cell_kind::cable, gidvec, backend_kind::multicore);
     }
-
+    auto D = domain_decomposition(R, ctx, groups);
     std::vector<spike> spikes;
 
     simulation sim(R, D, ctx);
diff --git a/test/unit/test_piecewise.cpp b/test/unit/test_piecewise.cpp
index 0f65cf8933c09e5993b2ff082eabde76603db68c..43a8e702178b2c6d72320d995aacc697d9358d33 100644
--- a/test/unit/test_piecewise.cpp
+++ b/test/unit/test_piecewise.cpp
@@ -243,21 +243,21 @@ TEST(piecewise, equal_range) {
         ASSERT_EQ(er0.first, er0.second);
 
         auto er1 = p.equal_range(1.0);
-        ASSERT_EQ(1, er1.second-er1.first);
+        ASSERT_EQ(1u, er1.second-er1.first);
         EXPECT_EQ(10, er1.first->value);
 
         auto er2 = p.equal_range(2.0);
-        ASSERT_EQ(2, er2.second-er2.first);
+        ASSERT_EQ(2u, er2.second-er2.first);
         auto iter = er2.first;
         EXPECT_EQ(10, iter++->value);
         EXPECT_EQ(9, iter->value);
 
         auto er3_5 = p.equal_range(3.5);
-        ASSERT_EQ(1, er3_5.second-er3_5.first);
+        ASSERT_EQ(1u, er3_5.second-er3_5.first);
         EXPECT_EQ(8, er3_5.first->value);
 
         auto er4 = p.equal_range(4.0);
-        ASSERT_EQ(1, er4.second-er4.first);
+        ASSERT_EQ(1u, er4.second-er4.first);
         EXPECT_EQ(8, er4.first->value);
 
         auto er5 = p.equal_range(5.0);
@@ -271,13 +271,13 @@ TEST(piecewise, equal_range) {
         ASSERT_EQ(er0.first, er0.second);
 
         auto er1 = p.equal_range(1.0);
-        ASSERT_EQ(2, er1.second-er1.first);
+        ASSERT_EQ(2u, er1.second-er1.first);
         auto iter = er1.first;
         EXPECT_EQ(10, iter++->value);
         EXPECT_EQ(11, iter++->value);
 
         auto er2 = p.equal_range(2.0);
-        ASSERT_EQ(4, er2.second-er2.first);
+        ASSERT_EQ(4u, er2.second-er2.first);
         iter = er2.first;
         EXPECT_EQ(11, iter++->value);
         EXPECT_EQ(12, iter++->value);
@@ -285,7 +285,7 @@ TEST(piecewise, equal_range) {
         EXPECT_EQ(14, iter++->value);
 
         auto er3 = p.equal_range(3.0);
-        ASSERT_EQ(2, er3.second-er3.first);
+        ASSERT_EQ(2u, er3.second-er3.first);
         iter = er3.first;
         EXPECT_EQ(14, iter++->value);
         EXPECT_EQ(15, iter++->value);
diff --git a/test/unit/test_span.cpp b/test/unit/test_span.cpp
index 7447e93366ef8ee18184f1cb73c17c0c8068de7a..a2d484badf08b5a779767011ffdfef3c99e236ef 100644
--- a/test/unit/test_span.cpp
+++ b/test/unit/test_span.cpp
@@ -45,14 +45,15 @@ TEST(span, int_iterators) {
     int a = 3;
     int b = a+n;
 
-    span s(a, b);
-
-    EXPECT_TRUE(util::is_iterator<span::iterator>::value);
-    EXPECT_TRUE(util::is_random_access_iterator<span::iterator>::value);
+    {
+        span s(a, b);
 
-    EXPECT_EQ(n, std::distance(s.begin(), s.end()));
-    EXPECT_EQ(n, std::distance(s.cbegin(), s.cend()));
+        EXPECT_TRUE(util::is_iterator<span::iterator>::value);
+        EXPECT_TRUE(util::is_random_access_iterator<span::iterator>::value);
 
+        EXPECT_EQ(n, std::distance(s.begin(), s.end()));
+        EXPECT_EQ(n, std::distance(s.cbegin(), s.cend()));
+    }
     {
         int sum = 0;
         for (auto i: span(a, b)) {
@@ -64,7 +65,7 @@ TEST(span, int_iterators) {
     {
         auto touched = 0ul;
         auto s = uc_span(b, a);
-        for (auto _: s) ++touched;
+        for (auto it = s.begin(); it != s.end(); ++it) ++touched;
         EXPECT_EQ(s.size(), touched);
         EXPECT_GT(touched, 0ul); //
     }
@@ -72,7 +73,7 @@ TEST(span, int_iterators) {
     {
         auto touched = 0ul;
         auto s = uc_span(a, a);
-        for (auto _: s) ++touched;
+        for (auto it = s.begin(); it != s.end(); ++it) ++touched;
         EXPECT_EQ(s.size(), touched);
         EXPECT_EQ(0ul, touched);
     }