diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt
index 37917fb7b6638641b0f2c31c23e177744f0e4c1e..eeb0b9624a99897d4eafb29ed0204dc88b6323c2 100644
--- a/arbor/CMakeLists.txt
+++ b/arbor/CMakeLists.txt
@@ -10,6 +10,7 @@ set(arbor_sources
     builtin_mechanisms.cpp
     cell_group_factory.cpp
     common_types_io.cpp
+    execution_context.cpp
     gpu_context.cpp
     local_alloc.cpp
     event_binner.cpp
diff --git a/arbor/backends/gpu/threshold_watcher.hpp b/arbor/backends/gpu/threshold_watcher.hpp
index 68942b71652b50d4c191bb2bb8370ce25e74ca75..e0668a9fa1c86953ddef971e513dee4244d39ccb 100644
--- a/arbor/backends/gpu/threshold_watcher.hpp
+++ b/arbor/backends/gpu/threshold_watcher.hpp
@@ -4,6 +4,7 @@
 #include <arbor/common_types.hpp>
 #include <arbor/fvm_types.hpp>
 
+#include "execution_context.hpp"
 #include "memory/memory.hpp"
 #include "util/span.hpp"
 
diff --git a/arbor/backends/multicore/fvm.hpp b/arbor/backends/multicore/fvm.hpp
index 99dcd2b675de419f9d64d155578339756029d9fa..1cbae3eff1b2ed167e446b00ebfc6c7af7afc8ba 100644
--- a/arbor/backends/multicore/fvm.hpp
+++ b/arbor/backends/multicore/fvm.hpp
@@ -2,7 +2,6 @@
 
 #include <string>
 #include <vector>
-#include <arbor/execution_context.hpp>
 
 #include "backends/event.hpp"
 #include "backends/multicore/matrix_state.hpp"
@@ -10,6 +9,7 @@
 #include "backends/multicore/multicore_common.hpp"
 #include "backends/multicore/shared_state.hpp"
 #include "backends/multicore/threshold_watcher.hpp"
+#include "execution_context.hpp"
 #include "util/padded_alloc.hpp"
 #include "util/range.hpp"
 #include "util/rangeutil.hpp"
diff --git a/arbor/backends/multicore/threshold_watcher.hpp b/arbor/backends/multicore/threshold_watcher.hpp
index 13e5525712be6114e9f471a774df0a5023a8037a..68b1d61d16dfde5af36a4265b35bd4b8713edabc 100644
--- a/arbor/backends/multicore/threshold_watcher.hpp
+++ b/arbor/backends/multicore/threshold_watcher.hpp
@@ -1,11 +1,11 @@
 #pragma once
 
 #include <arbor/assert.hpp>
-#include <arbor/execution_context.hpp>
 #include <arbor/fvm_types.hpp>
 #include <arbor/math.hpp>
 
 #include "backends/threshold_crossing.hpp"
+#include "execution_context.hpp"
 #include "multicore_common.hpp"
 
 namespace arb {
diff --git a/arbor/cell_group_factory.cpp b/arbor/cell_group_factory.cpp
index 0fc0a08b8b7f1d8bfc81f6f1bb2f37d425884db7..e3dc6a749d9a26dbfaf8db855e81e0ccdbb81c9d 100644
--- a/arbor/cell_group_factory.cpp
+++ b/arbor/cell_group_factory.cpp
@@ -6,6 +6,7 @@
 #include "benchmark_cell_group.hpp"
 #include "cell_group.hpp"
 #include "cell_group_factory.hpp"
+#include "execution_context.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "lif_cell_group.hpp"
 #include "mc_cell_group.hpp"
@@ -18,7 +19,9 @@ cell_group_ptr make_cell_group(Args&&... args) {
     return cell_group_ptr(new Impl(std::forward<Args>(args)...));
 }
 
-cell_group_factory cell_kind_implementation(cell_kind ck, backend_kind bk, const execution_context& ctx) {
+cell_group_factory cell_kind_implementation(
+        cell_kind ck, backend_kind bk, const execution_context& ctx)
+{
     using gid_vector = std::vector<cell_gid_type>;
 
     switch (ck) {
diff --git a/arbor/cell_group_factory.hpp b/arbor/cell_group_factory.hpp
index 0cd72de54aaa46b91b1fd394eb2652d1b9056eec..903526532457bf0dbcf256cf1c18c003f1b754ed 100644
--- a/arbor/cell_group_factory.hpp
+++ b/arbor/cell_group_factory.hpp
@@ -11,17 +11,21 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/recipe.hpp>
-#include <arbor/execution_context.hpp>
 
 #include "cell_group.hpp"
+#include "execution_context.hpp"
 
 namespace arb {
 
-using cell_group_factory = std::function<cell_group_ptr (const std::vector<cell_gid_type>&, const recipe&)>;
+using cell_group_factory = std::function<
+        cell_group_ptr(const std::vector<cell_gid_type>&, const recipe&)>;
 
-cell_group_factory cell_kind_implementation(cell_kind, backend_kind, const execution_context&);
+cell_group_factory cell_kind_implementation(
+        cell_kind, backend_kind, const execution_context&);
 
-inline bool cell_kind_supported(cell_kind c, backend_kind b, const execution_context& ctx) {
+inline bool cell_kind_supported(
+        cell_kind c, backend_kind b, const execution_context& ctx)
+{
     return static_cast<bool>(cell_kind_implementation(c, b, ctx));
 }
 
diff --git a/arbor/communication/communicator.hpp b/arbor/communication/communicator.hpp
index 4a0777bf73b8712401fa5852a520b1184bb9eb35..996e76da02f946b54abe5906a5f1e4e4247b53b0 100644
--- a/arbor/communication/communicator.hpp
+++ b/arbor/communication/communicator.hpp
@@ -9,15 +9,16 @@
 
 #include <arbor/assert.hpp>
 #include <arbor/common_types.hpp>
-#include <arbor/communication/gathered_vector.hpp>
-#include <arbor/distributed_context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/spike.hpp>
 
 #include "algorithms.hpp"
+#include "communication/gathered_vector.hpp"
 #include "connection.hpp"
+#include "distributed_context.hpp"
 #include "event_queue.hpp"
+#include "execution_context.hpp"
 #include "profile/profiler_macro.hpp"
 #include "threading/threading.hpp"
 #include "util/double_buffer.hpp"
@@ -44,7 +45,7 @@ public:
 
     explicit communicator(const recipe& rec,
                           const domain_decomposition& dom_dec,
-                          execution_context ctx)
+                          execution_context& ctx)
     {
         distributed_ = ctx.distributed;
         thread_pool_ = ctx.thread_pool;
diff --git a/include/arbor/communication/gathered_vector.hpp b/arbor/communication/gathered_vector.hpp
similarity index 100%
rename from include/arbor/communication/gathered_vector.hpp
rename to arbor/communication/gathered_vector.hpp
diff --git a/arbor/communication/mpi.hpp b/arbor/communication/mpi.hpp
index 3200dd9e2ee1c0620e4e7fe8d920e8e7dc8e748a..8d7ec7826cda5029c350d96ba3f03e22ec1dc227 100644
--- a/arbor/communication/mpi.hpp
+++ b/arbor/communication/mpi.hpp
@@ -8,10 +8,10 @@
 #include <mpi.h>
 
 #include <arbor/assert.hpp>
-#include <arbor/communication/gathered_vector.hpp>
 #include <arbor/communication/mpi_error.hpp>
 
 #include "algorithms.hpp"
+#include "communication/gathered_vector.hpp"
 #include "profile/profiler_macro.hpp"
 
 
diff --git a/arbor/communication/mpi_context.cpp b/arbor/communication/mpi_context.cpp
index b1134b9a7334eb6f1b763ee898dcc50b6e2bac42..be80dca50966bddde9683d4b05d0b8c4cd1d54c7 100644
--- a/arbor/communication/mpi_context.cpp
+++ b/arbor/communication/mpi_context.cpp
@@ -10,10 +10,10 @@
 
 #include <mpi.h>
 
-#include <arbor/distributed_context.hpp>
 #include <arbor/spike.hpp>
 
 #include "communication/mpi.hpp"
+#include "distributed_context.hpp"
 
 namespace arb {
 
@@ -62,12 +62,8 @@ struct mpi_context_impl {
     }
 };
 
-std::shared_ptr<distributed_context> mpi_context() {
-    return std::make_shared<distributed_context>(mpi_context_impl(MPI_COMM_WORLD));
-}
-
 template <>
-std::shared_ptr<distributed_context> mpi_context(MPI_Comm comm) {
+std::shared_ptr<distributed_context> make_mpi_context(MPI_Comm comm) {
     return std::make_shared<distributed_context>(mpi_context_impl(comm));
 }
 
diff --git a/include/arbor/distributed_context.hpp b/arbor/distributed_context.hpp
similarity index 94%
rename from include/arbor/distributed_context.hpp
rename to arbor/distributed_context.hpp
index 4c95292ab127851e4a4420efa3de355d4ba66e9c..d4153a15755954d80692889cae48f28a25cf5b07 100644
--- a/include/arbor/distributed_context.hpp
+++ b/arbor/distributed_context.hpp
@@ -4,9 +4,10 @@
 #include <string>
 
 #include <arbor/spike.hpp>
-#include <arbor/communication/gathered_vector.hpp>
 #include <arbor/util/pp_util.hpp>
 
+#include "communication/gathered_vector.hpp"
+
 namespace arb {
 
 #define ARB_PUBLIC_COLLECTIVES_(T) \
@@ -163,12 +164,16 @@ inline distributed_context::distributed_context():
     distributed_context(local_context())
 {}
 
-// MPI context creation functions only provided if built with MPI support.
+using distributed_context_handle = std::shared_ptr<distributed_context>;
 
-std::shared_ptr<distributed_context> mpi_context();
+inline
+distributed_context_handle make_local_context() {
+    return std::make_shared<distributed_context>();
+}
 
+// MPI context creation functions only provided if built with MPI support.
 template <typename MPICommType>
-std::shared_ptr<distributed_context> mpi_context(MPICommType);
+distributed_context_handle make_mpi_context(MPICommType);
 
 } // namespace arb
 
diff --git a/arbor/execution_context.cpp b/arbor/execution_context.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5436332961b8157541992d30ca9e4787d24ff036
--- /dev/null
+++ b/arbor/execution_context.cpp
@@ -0,0 +1,73 @@
+#include <iostream>
+#include <memory>
+
+#include <arbor/context.hpp>
+#include <arbor/version.hpp>
+
+#include "gpu_context.hpp"
+#include "distributed_context.hpp"
+#include "execution_context.hpp"
+#include "threading/threading.hpp"
+
+#ifdef ARB_MPI_ENABLED
+#include <mpi.h>
+#endif
+
+namespace arb {
+
+execution_context::execution_context():
+    execution_context(proc_allocation())
+{}
+
+execution_context::execution_context(const proc_allocation& resources):
+    distributed(make_local_context()),
+    thread_pool(std::make_shared<threading::task_system>(resources.num_threads)),
+    gpu(resources.has_gpu()? std::make_shared<gpu_context>(resources.gpu_id)
+                           : std::make_shared<gpu_context>())
+{}
+
+context make_context() {
+    return context(new execution_context(), [](execution_context* p){delete p;});
+}
+
+context make_context(const proc_allocation& p) {
+    return context(new execution_context(p), [](execution_context* p){delete p;});
+}
+
+#ifdef ARB_MPI_ENABLED
+template <>
+execution_context::execution_context<MPI_Comm>(const proc_allocation& resources, MPI_Comm comm):
+    distributed(make_mpi_context(comm)),
+    thread_pool(std::make_shared<threading::task_system>(resources.num_threads)),
+    gpu(resources.has_gpu()? std::make_shared<gpu_context>(resources.gpu_id)
+                           : std::make_shared<gpu_context>())
+{}
+
+template <>
+context make_context<MPI_Comm>(const proc_allocation& p, MPI_Comm comm) {
+    return context(new execution_context(p, comm), [](execution_context* p){delete p;});
+}
+#endif
+
+bool has_gpu(const context& ctx) {
+    return ctx->gpu->has_gpu();
+}
+
+unsigned num_threads(const context& ctx) {
+    return ctx->thread_pool->get_num_threads();
+}
+
+unsigned num_ranks(const context& ctx) {
+    return ctx->distributed->size();
+}
+
+unsigned rank(const context& ctx) {
+    return ctx->distributed->id();
+}
+
+bool has_mpi(const context& ctx) {
+    return ctx->distributed->name() == "MPI";
+}
+
+} // namespace arb
+
diff --git a/arbor/execution_context.hpp b/arbor/execution_context.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ee1e8415e69cf0da710416bd352515fd5d9f3eb7
--- /dev/null
+++ b/arbor/execution_context.hpp
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <memory>
+
+#include <arbor/context.hpp>
+
+#include "distributed_context.hpp"
+#include "threading/threading.hpp"
+#include "gpu_context.hpp"
+
+namespace arb {
+
+// execution_context is a simple container for the state relating to
+// execution resources.
+// Specifically, it has handles for the distributed context, gpu
+// context and thread pool.
+//
+// Note: the public API uses an opaque handle arb::context for
+// execution_context, to hide implementation details of the
+// container and its constituent contexts from the public API.
+
+struct execution_context {
+    distributed_context_handle distributed;
+    task_system_handle thread_pool;
+    gpu_context_handle gpu;
+
+    execution_context();
+    execution_context(const proc_allocation& resources);
+
+    // Use a template for constructing with a specific distributed context.
+    // Specialised implementations are implemented in execution_context.cpp.
+    template <typename Comm>
+    execution_context(const proc_allocation& resources, Comm comm);
+};
+
+} // namespace arb
diff --git a/arbor/fvm_lowered_cell.hpp b/arbor/fvm_lowered_cell.hpp
index bb4184fcbce6b51cbf29a70f02cc6bb0cc02fc51..c710074cc18b4a096fa463993acc77b03118b901 100644
--- a/arbor/fvm_lowered_cell.hpp
+++ b/arbor/fvm_lowered_cell.hpp
@@ -4,12 +4,12 @@
 #include <vector>
 
 #include <arbor/common_types.hpp>
-#include <arbor/execution_context.hpp>
 #include <arbor/fvm_types.hpp>
 #include <arbor/recipe.hpp>
 
 #include "backends/event.hpp"
 #include "backends/threshold_crossing.hpp"
+#include "execution_context.hpp"
 #include "sampler_map.hpp"
 #include "util/range.hpp"
 
diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
index fcc9670d2cc0597de39ebd9f04e4238cfb16973d..f2a454852c33d169d2013f09795251a5f56aff17 100644
--- a/arbor/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -20,6 +20,7 @@
 #include <arbor/recipe.hpp>
 
 #include "builtin_mechanisms.hpp"
+#include "execution_context.hpp"
 #include "fvm_layout.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "matrix.hpp"
diff --git a/arbor/gpu_context.cpp b/arbor/gpu_context.cpp
index 1fcbba074bb430c8117b096d131210dcd6cef939..34b078eab9fa066e70250e8ade4266a32cf42b2e 100644
--- a/arbor/gpu_context.cpp
+++ b/arbor/gpu_context.cpp
@@ -1,14 +1,29 @@
 #include <memory>
 
+#include <arbor/arbexcept.hpp>
+
+#include "gpu_context.hpp"
+
 #ifdef ARB_HAVE_GPU
 #include <cuda.h>
 #include <cuda_runtime.h>
 #endif
 
-#include "gpu_context.hpp"
-
 namespace arb {
 
+enum gpu_flags {
+    has_concurrent_managed_access = 1,
+    has_atomic_double = 2
+};
+
+gpu_context_handle make_gpu_context(int id) {
+    return std::make_shared<gpu_context>(id);
+}
+
+bool gpu_context_has_gpu(const gpu_context& ctx) {
+    return ctx.has_gpu();
+}
+
 bool gpu_context::has_concurrent_managed_access() const {
     return attributes_ & gpu_flags::has_concurrent_managed_access;
 }
@@ -17,23 +32,43 @@ bool gpu_context::has_atomic_double() const {
     return attributes_ & gpu_flags::has_atomic_double;
 }
 
+bool gpu_context::has_gpu() const {
+    return id_ != -1;
+}
+
 #ifndef ARB_HAVE_GPU
 
-gpu_context::gpu_context(): has_gpu_(false), attributes_(0) {}
 void gpu_context::synchronize_for_managed_access() const {}
+gpu_context::gpu_context(int) {
+    throw arbor_exception("Arbor must be compiled with CUDA support to select a GPU.");
+}
 
 #else
 
-gpu_context::gpu_context(): has_gpu_(true), attributes_(0) {
+gpu_context::gpu_context(int gpu_id) {
     cudaDeviceProp prop;
-    cudaGetDeviceProperties(&prop, 0);
+    auto status = cudaGetDeviceProperties(&prop, gpu_id);
+    if (status==cudaErrorInvalidDevice) {
+        throw arbor_exception("Invalid GPU id " + std::to_string(gpu_id));
+    }
+
+    // Set the device
+    status = cudaSetDevice(gpu_id);
+    if (status!=cudaSuccess) {
+        throw arbor_exception("Unable to select GPU id " + std::to_string(gpu_id));
+    }
+
+    id_ = gpu_id;
+
+    // Record the device attributes
+    attributes_ = 0;
     if (prop.concurrentManagedAccess) {
         attributes_ |= gpu_flags::has_concurrent_managed_access;
     }
     if (prop.major*100 + prop.minor >= 600) {
         attributes_ |= gpu_flags::has_atomic_double;
     }
-};
+}
 
 void gpu_context::synchronize_for_managed_access() const {
     if(!has_concurrent_managed_access()) {
@@ -43,8 +78,4 @@ void gpu_context::synchronize_for_managed_access() const {
 
 #endif
 
-std::shared_ptr<gpu_context> make_gpu_context() {
-    return std::make_shared<gpu_context>();
-}
-
-}
+} // namespace arb
diff --git a/arbor/gpu_context.hpp b/arbor/gpu_context.hpp
index edcb9c933ccddbafd68e3a1412c80dc54e7ea512..2b1e4f496a2d47c3a848e2bd08f1c58a5f02ff0e 100644
--- a/arbor/gpu_context.hpp
+++ b/arbor/gpu_context.hpp
@@ -1,21 +1,25 @@
 #pragma once
 
-namespace arb {
+#include <cstdlib>
+#include <memory>
 
-enum gpu_flags {
-    has_concurrent_managed_access = 1,
-    has_atomic_double = 2
-};
+namespace arb {
 
-struct gpu_context {
-    bool has_gpu_;
-    size_t attributes_;
+class gpu_context {
+    int id_ = -1;
+    std::size_t attributes_ = 0;
 
-    gpu_context();
+public:
+    gpu_context() = default;
+    gpu_context(int id);
 
     bool has_concurrent_managed_access() const;
     bool has_atomic_double() const;
     void synchronize_for_managed_access() const;
+    bool has_gpu() const;
 };
 
-}
+using gpu_context_handle = std::shared_ptr<gpu_context>;
+gpu_context_handle make_gpu_context(int id);
+
+} // namespace arb
diff --git a/arbor/local_alloc.cpp b/arbor/local_alloc.cpp
index dfe00d603d480b45227221455b56acc88bed6359..3320e22e5b53586d8e90adf56d95580821397e3e 100644
--- a/arbor/local_alloc.cpp
+++ b/arbor/local_alloc.cpp
@@ -1,17 +1,16 @@
-#include <arbor/domain_decomposition.hpp>
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 
 #include "hardware/node_info.hpp"
+#include "threading/thread_info.hpp"
 #include "threading/threading.hpp"
 
 namespace arb {
 
-proc_allocation local_allocation(const execution_context& ctx) {
-    proc_allocation info;
-    info.num_threads = ctx.thread_pool->get_num_threads();
-    info.num_gpus = arb::hw::node_gpus();
+local_resources get_local_resources() {
+    auto avail_threads = threading::num_threads_init();
+    auto avail_gpus = arb::hw::node_gpus();
 
-    return info;
+    return local_resources(avail_threads, avail_gpus);
 }
 
 } // namespace arb
diff --git a/arbor/partition_load_balance.cpp b/arbor/partition_load_balance.cpp
index 0e4ffca339abe8ac1f51d47cdb97ae77d9dba8d6..fe1d79b27975dc0b97bbb1da51be94bf60bea094 100644
--- a/arbor/partition_load_balance.cpp
+++ b/arbor/partition_load_balance.cpp
@@ -1,9 +1,11 @@
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/recipe.hpp>
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 
 #include "cell_group_factory.hpp"
+#include "execution_context.hpp"
+#include "gpu_context.hpp"
 #include "util/maputil.hpp"
 #include "util/partition.hpp"
 #include "util/span.hpp"
@@ -12,10 +14,11 @@ namespace arb {
 
 domain_decomposition partition_load_balance(
     const recipe& rec,
-    proc_allocation nd,
-    const execution_context& ctx,
+    const context& ctx,
     partition_hint_map hint_map)
 {
+    const bool gpu_avail = ctx->gpu->has_gpu();
+
     struct partition_gid_domain {
         partition_gid_domain(std::vector<cell_gid_type> divs):
             gid_divisions(std::move(divs))
@@ -31,8 +34,8 @@ domain_decomposition partition_load_balance(
 
     using util::make_span;
 
-    unsigned num_domains = ctx.distributed->size();
-    unsigned domain_id = ctx.distributed->id();
+    unsigned num_domains = ctx->distributed->size();
+    unsigned domain_id = ctx->distributed->id();
     auto num_global_cells = rec.num_cells();
 
     auto dom_size = [&](unsigned dom) -> cell_gid_type {
@@ -65,8 +68,8 @@ domain_decomposition partition_load_balance(
     // of cell group updates according to rules such as the back end on
     // which the cell group is running.
 
-    auto has_gpu_backend = [ctx](cell_kind c) {
-        return cell_kind_supported(c, backend_kind::gpu, ctx);
+    auto has_gpu_backend = [&ctx](cell_kind c) {
+        return cell_kind_supported(c, backend_kind::gpu, *ctx);
     };
 
     std::vector<cell_kind> kinds;
@@ -85,7 +88,7 @@ domain_decomposition partition_load_balance(
         backend_kind backend = backend_kind::multicore;
         std::size_t group_size = hint.cpu_group_size;
 
-        if (hint.prefer_gpu && nd.num_gpus>0 && has_gpu_backend(k)) {
+        if (hint.prefer_gpu && gpu_avail && has_gpu_backend(k)) {
             backend = backend_kind::gpu;
             group_size = hint.gpu_group_size;
         }
diff --git a/arbor/profile/meter_manager.cpp b/arbor/profile/meter_manager.cpp
index e08b1c1a85d2202a80443413d3667584952bb551..ba5fd8dc6c0b8a4b5706817f5e16abdcc67401c3 100644
--- a/arbor/profile/meter_manager.cpp
+++ b/arbor/profile/meter_manager.cpp
@@ -1,12 +1,13 @@
 #include <arbor/profile/timer.hpp>
 
 #include <arbor/profile/meter_manager.hpp>
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 
 #include "memory_meter.hpp"
 #include "power_meter.hpp"
 
 #include "algorithms.hpp"
+#include "execution_context.hpp"
 #include "util/hostname.hpp"
 #include "util/strprintf.hpp"
 #include "util/rangeutil.hpp"
@@ -19,23 +20,25 @@ using util::strprintf;
 
 measurement::measurement(std::string n, std::string u,
                          const std::vector<double>& readings,
-                         const distributed_context_handle& ctx):
+                         const context& ctx):
     name(std::move(n)), units(std::move(u))
 {
+    auto dist = ctx->distributed;
+
     // Assert that the same number of readings were taken on every domain.
     const auto num_readings = readings.size();
-    if (ctx->min(num_readings)!=ctx->max(num_readings)) {
+    if (dist->min(num_readings)!=dist->max(num_readings)) {
         throw std::out_of_range(
             "the number of checkpoints in the \""+name+"\" meter do not match across domains");
     }
 
     // Gather across all of the domains onto the root domain.
     for (auto r: readings) {
-        measurements.push_back(ctx->gather(r, 0));
+        measurements.push_back(dist->gather(r, 0));
     }
 }
 
-meter_manager::meter_manager(distributed_context_handle ctx): glob_ctx_(ctx) {
+meter_manager::meter_manager() {
     if (auto m = make_memory_meter()) {
         meters_.push_back(std::move(m));
     }
@@ -47,7 +50,7 @@ meter_manager::meter_manager(distributed_context_handle ctx): glob_ctx_(ctx) {
     }
 };
 
-void meter_manager::start() {
+void meter_manager::start(const context& ctx) {
     arb_assert(!started_);
 
     started_ = true;
@@ -58,13 +61,13 @@ void meter_manager::start() {
     }
 
     // Enforce a global barrier after taking the time stamp
-    glob_ctx_->barrier();
+    ctx->distributed->barrier();
 
     start_time_ = timer_type::tic();
 };
 
 
-void meter_manager::checkpoint(std::string name) {
+void meter_manager::checkpoint(std::string name, const context& ctx) {
     arb_assert(started_);
 
     // Record the time taken on this domain since the last checkpoint
@@ -77,7 +80,7 @@ void meter_manager::checkpoint(std::string name) {
     }
 
     // Synchronize all domains before setting start time for the next interval
-    glob_ctx_->barrier();
+    ctx->distributed->barrier();
     start_time_ = timer<>::tic();
 }
 
@@ -93,17 +96,11 @@ const std::vector<double>& meter_manager::times() const {
     return times_;
 }
 
-distributed_context_handle meter_manager::context() const {
-    return glob_ctx_;
-}
-
 // Build a report of meters, for use at the end of a simulation
 // for output to file or analysis.
-meter_report make_meter_report(const meter_manager& manager) {
+meter_report make_meter_report(const meter_manager& manager, const context& ctx) {
     meter_report report;
 
-    auto ctx = manager.context();
-
     // Add the times to the meter outputs
     report.meters.push_back(measurement("time", "s", manager.times(), ctx));
 
@@ -115,7 +112,7 @@ meter_report make_meter_report(const meter_manager& manager) {
 
     // Gather a vector with the names of the node that each rank is running on.
     auto host = util::hostname();
-    auto hosts = ctx->gather(host? *host: "unknown", 0);
+    auto hosts = ctx->distributed->gather(host? *host: "unknown", 0);
     report.hosts = hosts;
 
     // Count the number of unique hosts.
@@ -124,7 +121,7 @@ meter_report make_meter_report(const meter_manager& manager) {
     auto num_hosts = std::distance(hosts.begin(), std::unique(hosts.begin(), hosts.end()));
 
     report.checkpoints = manager.checkpoint_names();
-    report.num_domains = ctx->size();
+    report.num_domains = ctx->distributed->size();
     report.num_hosts = num_hosts;
 
     return report;
diff --git a/arbor/profile/profiler.cpp b/arbor/profile/profiler.cpp
index 9923438f8a7167d6a43bdf66e1a26d13a05cafc7..4ecd1efc5cc04cd7302ce29554df04ac7646edaf 100644
--- a/arbor/profile/profiler.cpp
+++ b/arbor/profile/profiler.cpp
@@ -2,8 +2,10 @@
 #include <mutex>
 #include <ostream>
 
+#include <arbor/context.hpp>
 #include <arbor/profile/profiler.hpp>
 
+#include "execution_context.hpp"
 #include "threading/threading.hpp"
 #include "util/span.hpp"
 #include "util/rangeutil.hpp"
@@ -341,8 +343,8 @@ void profiler_enter(region_id_type region_id) {
     profiler::get_global_profiler().enter(region_id);
 }
 
-void profiler_initialize(task_system_handle& ts) {
-    profiler::get_global_profiler().initialize(ts);
+void profiler_initialize(context& ctx) {
+    profiler::get_global_profiler().initialize(ctx->thread_pool);
 }
 
 // Print profiler statistics to an ostream
diff --git a/arbor/simulation.cpp b/arbor/simulation.cpp
index 87d88c28b594a99feac251d5b81c122486a44743..2e87f4a45cf4da9f19f52acfd0a158c2d6a4681b 100644
--- a/arbor/simulation.cpp
+++ b/arbor/simulation.cpp
@@ -2,6 +2,7 @@
 #include <set>
 #include <vector>
 
+#include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/generic_event.hpp>
 #include <arbor/recipe.hpp>
@@ -11,6 +12,7 @@
 #include "cell_group.hpp"
 #include "cell_group_factory.hpp"
 #include "communication/communicator.hpp"
+#include "execution_context.hpp"
 #include "merge_events.hpp"
 #include "thread_private_spike_store.hpp"
 #include "threading/threading.hpp"
@@ -419,9 +421,9 @@ void simulation_state::inject_events(const pse_vector& events) {
 simulation::simulation(
     const recipe& rec,
     const domain_decomposition& decomp,
-    execution_context ctx)
+    const context& ctx)
 {
-    impl_.reset(new simulation_state(rec, decomp, ctx));
+    impl_.reset(new simulation_state(rec, decomp, *ctx));
 }
 
 void simulation::reset() {
diff --git a/arbor/thread_private_spike_store.cpp b/arbor/thread_private_spike_store.cpp
index e6a1f92eaa07682d3673d68aa54edacc058abee4..f7805350c486e7d0bbb619f430c9029e2a3494e5 100644
--- a/arbor/thread_private_spike_store.cpp
+++ b/arbor/thread_private_spike_store.cpp
@@ -3,6 +3,8 @@
 #include <arbor/common_types.hpp>
 #include <arbor/spike.hpp>
 
+#include "threading/enumerable_thread_specific.hpp"
+#include "threading/threading.hpp"
 #include "thread_private_spike_store.hpp"
 
 namespace arb {
@@ -13,11 +15,13 @@ struct local_spike_store_type {
     local_spike_store_type(const task_system_handle& ts): buffers_(ts) {};
 };
 
-thread_private_spike_store::thread_private_spike_store(thread_private_spike_store&& t): impl_(std::move(t.impl_)) {};
+thread_private_spike_store::thread_private_spike_store(thread_private_spike_store&& t):
+    impl_(std::move(t.impl_))
+{}
 
 thread_private_spike_store::thread_private_spike_store(const task_system_handle& ts):
-    impl_(new local_spike_store_type(ts)) {
-}
+    impl_(new local_spike_store_type(ts))
+{}
 
 thread_private_spike_store::~thread_private_spike_store() {}
 
diff --git a/arbor/thread_private_spike_store.hpp b/arbor/thread_private_spike_store.hpp
index eec507090ad6332ad430e78e6182d00ab463b8e2..1293de5b26521311d741785190dee0a8e2e66fcb 100644
--- a/arbor/thread_private_spike_store.hpp
+++ b/arbor/thread_private_spike_store.hpp
@@ -5,7 +5,6 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/spike.hpp>
-#include <arbor/execution_context.hpp>
 
 #include "threading/threading.hpp"
 
diff --git a/arbor/threading/enumerable_thread_specific.hpp b/arbor/threading/enumerable_thread_specific.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..1fbbfea407370c1a4cf81d3906a6bc6a012bbc04
--- /dev/null
+++ b/arbor/threading/enumerable_thread_specific.hpp
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <vector>
+
+#include "threading.hpp"
+
+namespace arb {
+namespace threading {
+
+template <typename T>
+class enumerable_thread_specific {
+    std::unordered_map<std::thread::id, std::size_t> thread_ids_;
+
+    using storage_class = std::vector<T>;
+    storage_class data;
+
+public:
+    using iterator = typename storage_class::iterator;
+    using const_iterator = typename storage_class::const_iterator;
+
+    enumerable_thread_specific(const task_system_handle& ts):
+        thread_ids_{ts->get_thread_ids()},
+        data{std::vector<T>(ts->get_num_threads())}
+    {}
+
+    enumerable_thread_specific(const T& init, const task_system_handle& ts):
+        thread_ids_{ts->get_thread_ids()},
+        data{std::vector<T>(ts->get_num_threads(), init)}
+    {}
+
+    T& local() {
+        return data[thread_ids_.at(std::this_thread::get_id())];
+    }
+    const T& local() const {
+        return data[thread_ids_.at(std::this_thread::get_id())];
+    }
+
+    auto size() const { return data.size(); }
+
+    iterator begin() { return data.begin(); }
+    iterator end()   { return data.end(); }
+
+    const_iterator begin() const { return data.begin(); }
+    const_iterator end()   const { return data.end(); }
+
+    const_iterator cbegin() const { return data.cbegin(); }
+    const_iterator cend()   const { return data.cend(); }
+};
+
+} // namespace threading
+} // namespace arb
+
diff --git a/arbor/threading/thread_info.cpp b/arbor/threading/thread_info.cpp
index 025de42097b31b66ea8352051491cb4d42c546ea..dce15dbb6eb819ef13cb9547d4f28d455c7c1169 100644
--- a/arbor/threading/thread_info.cpp
+++ b/arbor/threading/thread_info.cpp
@@ -45,6 +45,7 @@ util::optional<size_t> get_env_num_threads() {
         return util::nullopt;
     }
 
+    errno = 0;
     auto nthreads = std::strtoul(str, nullptr, 10);
 
     // check that the environment variable string describes a non-negative integer
diff --git a/arbor/threading/threading.cpp b/arbor/threading/threading.cpp
index 7c28cdc8cf9d69c5af78f7fc28cb3c0ba27bfbb9..ec116fad338132006f40cddec28cad884530084b 100644
--- a/arbor/threading/threading.cpp
+++ b/arbor/threading/threading.cpp
@@ -2,7 +2,6 @@
 
 #include "threading.hpp"
 #include "thread_info.hpp"
-#include <arbor/execution_context.hpp>
 
 using namespace arb::threading::impl;
 using namespace arb::threading;
@@ -114,18 +113,10 @@ void task_system::async(task tsk) {
     q_[i % count_].push(std::move(tsk));
 }
 
-int task_system::get_num_threads() {
+int task_system::get_num_threads() const {
     return threads_.size() + 1;
 }
 
-std::unordered_map<std::thread::id, std::size_t> task_system::get_thread_ids() {
+std::unordered_map<std::thread::id, std::size_t> task_system::get_thread_ids() const {
     return thread_ids_;
 };
-
-task_system_handle arb::make_thread_pool() {
-    return arb::make_thread_pool(num_threads_init());
-}
-
-task_system_handle arb::make_thread_pool(int nthreads) {
-    return task_system_handle(new task_system(nthreads));
-}
diff --git a/arbor/threading/threading.hpp b/arbor/threading/threading.hpp
index ea084dcce9371417ab8df5379b38695b86fee0cd..5c9d8ca0f203d87a97d888cc66e73d1e22960eaa 100644
--- a/arbor/threading/threading.hpp
+++ b/arbor/threading/threading.hpp
@@ -14,8 +14,6 @@
 #include <unordered_map>
 #include <utility>
 
-#include <arbor/execution_context.hpp>
-
 namespace arb {
 namespace threading {
 
@@ -91,62 +89,12 @@ public:
     void try_run_task();
 
     // Includes master thread.
-    int get_num_threads();
+    int get_num_threads() const;
 
     // Returns the thread_id map
-    std::unordered_map<std::thread::id, std::size_t> get_thread_ids();
+    std::unordered_map<std::thread::id, std::size_t> get_thread_ids() const;
 };
 
-///////////////////////////////////////////////////////////////////////
-// types
-///////////////////////////////////////////////////////////////////////
-
-template <typename T>
-class enumerable_thread_specific {
-    std::unordered_map<std::thread::id, std::size_t> thread_ids_;
-
-    using storage_class = std::vector<T>;
-    storage_class data;
-
-public:
-    using iterator = typename storage_class::iterator;
-    using const_iterator = typename storage_class::const_iterator;
-
-    enumerable_thread_specific(const task_system_handle& ts):
-        thread_ids_{ts.get()->get_thread_ids()},
-        data{std::vector<T>(ts.get()->get_num_threads())}
-    {}
-
-    enumerable_thread_specific(const T& init, const task_system_handle& ts):
-        thread_ids_{ts.get()->get_thread_ids()},
-        data{std::vector<T>(ts.get()->get_num_threads(), init)}
-    {}
-
-    T& local() {
-        return data[thread_ids_.at(std::this_thread::get_id())];
-    }
-    const T& local() const {
-        return data[thread_ids_.at(std::this_thread::get_id())];
-    }
-
-    auto size() const { return data.size(); }
-
-    iterator begin() { return data.begin(); }
-    iterator end()   { return data.end(); }
-
-    const_iterator begin() const { return data.begin(); }
-    const_iterator end()   const { return data.end(); }
-
-    const_iterator cbegin() const { return data.cbegin(); }
-    const_iterator cend()   const { return data.cend(); }
-};
-
-inline std::string description() {
-    return "CThread Pool";
-}
-
-constexpr bool multithreaded() { return true; }
-
 class task_group {
 private:
     std::atomic<std::size_t> in_flight_{0};
@@ -235,4 +183,7 @@ struct parallel_for {
     }
 };
 } // namespace threading
+
+using task_system_handle = std::shared_ptr<threading::task_system>;
+
 } // namespace arb
diff --git a/arbor/util/double_buffer.hpp b/arbor/util/double_buffer.hpp
index 67afecdee7da1134c8d770106baaf9af904b5f82..845a5ce9457722e4688403007923d444002e75c3 100644
--- a/arbor/util/double_buffer.hpp
+++ b/arbor/util/double_buffer.hpp
@@ -1,10 +1,9 @@
 #pragma once
 
-#include <array>
 #include <atomic>
+#include <vector>
 
 #include <arbor/assert.hpp>
-#include <arbor/execution_context.hpp>
 
 namespace arb {
 namespace util {
diff --git a/doc/cpp_distributed_context.rst b/doc/cpp_distributed_context.rst
index 3eeeaee08964cdeb105763b25901bff6073648f0..b5878a1eafc420dff6221242046f2cfaaa57ac50 100644
--- a/doc/cpp_distributed_context.rst
+++ b/doc/cpp_distributed_context.rst
@@ -18,52 +18,55 @@ This means that if Arbor is compiled with support for MPI enabled, then at run t
 user can choose between using a non-distributed (local) context, or an distributed MPI
 context.
 
-A global context is created by a user before building and running a simulation.
-The context is then used to perform domain decomposition and initialize the simulation
+An execution context is created by a user before building and running a simulation.
+This context is then used to perform domain decomposition and initialize the simulation
 (see :ref:`cppsimulation` for more about the simulation building workflow).
 In the example below, a context that uses MPI is used to run a distributed simulation:
 
+The public API does not directly expose :cpp:class:`arb::distributed_context` or any of its
+implementations.
+By default :cpp:class:`arb::context` uses only local "on-node" resources. To use an MPI
+communicator for distributed communication, it can be initialised with the communicator:
+
 .. container:: example-code
 
     .. code-block:: cpp
 
-        arb::hw::node_info node;
+        arb::proc_allocation resources;
         my_recipe recipe;
 
-        // Get an MPI communication context
-        arb::distributed_context context = arb::mpi_context();
+        // Create a context that uses the local resources enumerated in resources,
+        // and that uses the standard MPI communicator MPI_COMM_WORLD for
+        // distributed communication.
+        arb::context context = arb::make_context(resources, MPI_COMM_WORLD);
 
-        // Partition model over the distributed system
-        arb::domain_decomposition decomp = arb::partition_load_balance(recipe, node, &context);
+        // Partition model over the distributed system.
+        arb::domain_decomposition decomp = arb::partition_load_balance(recipe, context);
 
-        // Instatitate the simulation over the distributed system
-        arb::simulation sim(recipe, decomp, &context);
+        // Instatitate the simulation over the distributed system.
+        arb::simulation sim(recipe, decomp, context);
 
-        // Run the simulation for 100ms over the distributed system
+        // Run the simulation for 100ms over the distributed system.
         sim.run(100, 0.01);
 
-By default :cpp:class:`arb::distributed_context` uses an :cpp:class:`arb::local_context`, which
-runs on the local computer or node, that is, it is not distributed.
+In the back end :cpp:class:`arb::distributed_context` defines the interface for distributed contexts,
+for which two implementations are provided: :cpp:class:`arb::local_context` and :cpp:class:`arb::mpi_context`.
+Distributed contexts are wrapped in shared pointers:
+
+.. cpp:type:: distributed_context_handle = std::shared_ptr<distributed_context>
 
-To run on a distributed system, use :cpp:class:`arb::mpi_context`, which uses
-MPI for distributed communication.
-By default the context will use the default MPI communicator ``MPI_COMM_WORLD``,
-though it can be initialised with a user-supplied communicator.
+A distributed context can then be generated using helper functions :cpp:func:`arb::make_local_context` and
+:cpp:func:`arb::make_mpi_context`:
 
 .. container:: example-code
 
     .. code-block:: cpp
 
-        arb::distributed_context context;
-
-        // This is equivelent to default constructed context above
-        arb::distributed_context context = arb::local_context();
+        // Create a context that uses only local resources (is non-distributed).
+        auto dist_ctx  arb::make_local_context();
 
-        // Create an MPI context that uses MPI_COMM_WORLD
-        arb::distributed_context context = arb::mpi_context();
-
-        // create an MPI context with a user-supplied MPI_Comm
-        arb::distributed_context context = arb::mpi_context(communicator);
+        // Create an MPI context that uses the standared MPI_COMM_WORLD communicator.
+        auto dist_ctx = arb::make_mpi_context(MPI_COMM_WORLD);
 
 
 Class Documentation
@@ -170,6 +173,10 @@ Class Documentation
 
         Default constructor.
 
+.. cpp:function:: distributed_context_handle make_local_context()
+
+    Convenience function that returns a handle to a local context.
+
 .. cpp:class:: mpi_context
 
     Implements the :cpp:class:`arb::distributed_context` interface for
@@ -177,7 +184,12 @@ Class Documentation
 
     **Constructor:**
 
-    .. cpp:function:: mpi_context(MPI_Comm comm=MPI_COMM_WORLD)
+    .. cpp:function:: mpi_context(MPI_Comm comm)
 
         Create a context that will uses the MPI communicator :cpp:var:`comm`.
-        By default uses the global communicator ``MPI_COMM_WORLD``.
+
+.. cpp:function:: distributed_context_handle make_mpi_context(MPI_Comm comm)
+
+    Convenience function that returns a handle to a :cpp:class:`arb::mpi_context`
+    that uses the MPI communicator comm.
+
diff --git a/doc/cpp_domdec.rst b/doc/cpp_domdec.rst
index f4cd406436e37ff8567937e8b8aea6c04624f0f3..4ae78020fa2cc3c712cb896d2e78a3578f90aa0a 100644
--- a/doc/cpp_domdec.rst
+++ b/doc/cpp_domdec.rst
@@ -30,45 +30,141 @@ Load balancer
 Hardware
 --------
 
-.. cpp:namespace:: arb::hw
+.. cpp:namespace:: arb
+
+.. cpp:class:: local_resources
+
+    Enumerates the computational resources available locally, specifically the
+    number of hardware threads and the number of GPUs.
+
+    The function :cpp:func:`arb::get_local_resources` can be used to automatically
+    detect the available resources are available :cpp:class:`local_resources` 
+
+    .. container:: example-code
+
+        .. code-block:: cpp
+
+            auto resources = arb::get_local_resources();
+            std::cout << "This node supports " << resources.num_threads " threads," <<
+                      << " and " << resources.num_gpus << " gpus.";
+
+    .. cpp:function:: local_resources(unsigned threads, unsigned gpus)
+
+        Constructor.
 
-.. cpp:class:: node_info
+    .. cpp:member:: const unsigned num_threads
 
-    Information about the computational resources available to a simulation, typically a
+        The number of threads available.
+
+    .. cpp:member:: const unsigned num_gpus
+
+        The number of GPUs available.
+
+.. cpp:function:: local_resources get_local_resources()
+
+    Returns an instance of :cpp:class:`local_resources` with the following:
+
+    * ``num_threads`` is determined from the ``ARB_NUM_THREADS`` environment variable if
+      set, otherwise Arbor attempts to detect the number of available hardware cores.
+      If Arbor can't determine the available threads it defaults to 1 thread.
+    * ``num_gpus`` is the number of GPUs detected using the CUDA ``cudaGetDeviceCount`` that
+      `API call <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html>`_.
+
+.. cpp:class:: proc_allocation
+
+    Enumerates the computational resources to be used for a simulation, typically a
     subset of the resources available on a physical hardware node.
-    When used for distributed simulations, where a model will be distributed over more than
-    one node, a :cpp:class:`hw::node_info` represents the resources available to the local
-    MPI rank.
 
     .. container:: example-code
 
         .. code-block:: cpp
 
-            // Make node that uses one thread for each available hardware thread,
-            // and one GPU if any GPUs are available.
-            hw::node_info node;
-            node.num_cpu_cores = threading::num_threads();
-            node.num_gpus = hw::num_gpus()>0? 1: 0;
+            // Default construction uses all detected cores/threads, and the first GPU, if available.
+            arb::proc_allocation resources;
 
-    .. cpp:function:: node_info() = default
+            // Remove any GPU from the resource description.
+            resources.gpu_id = -1;
 
-        Default constructor (sets 1 CPU core and 0 GPUs).
 
-    .. cpp:function:: node_info(unsigned cores, unsigned gpus)
+    .. cpp:function:: proc_allocation() = default
 
-        Constructor that sets the number of :cpp:var:`cores` and :cpp:var:`gpus` available.
+        Sets the number of threads to the number detected by :cpp:func:`get_local_resources`, and
+        chooses either the first available GPU, or no GPU if none are available.
 
-    .. cpp:member:: unsigned num_cpu_cores = 1
+    .. cpp:function:: proc_allocation(unsigned threads, int gpu_id)
 
-        The number of CPU cores available.
+        Constructor that sets the number of :cpp:var:`threads` and selects :cpp:var:`gpus` available.
 
-        By default it is assumed that there is one core available.
+    .. cpp:member:: unsigned num_threads
 
-    .. cpp:member:: unsigned num_gpus = 0
+        The number of CPU threads available.
 
-        The number of GPUs available.
+    .. cpp:member:: int gpu_id
+
+        The identifier of the the GPU to use.
+        The gpu id corresponds to the ``int device`` parameter used by CUDA API calls
+        to identify gpu devices.
+        Set to -1 to indicate that no GPU device is to be used.
+        See ``cudaSetDevice`` and ``cudaDeviceGetAttribute`` provided by the
+        `CUDA API <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html>`_.
+
+    .. cpp:function:: bool has_gpu() const
+
+        Indicates whether a GPU is selected (i.e. whether :cpp:member:`gpu_id` is ``-1``).
+
+Execution Context
+-----------------
+
+The :cpp:class:`proc_allocation` class enumerates the hardware resources on the local hardware
+to use for a simulation.
+A :cpp:class:`arb::context`  ...
+
+.. cpp:namespace:: arb
+
+.. cpp:class:: context
+
+    A handle for the interfaces to the hardware resources used in a simulation.
+    A :cpp:class:`context` contains the local thread pool, and optionally the GPU state
+    and MPI communicator, if available. Users of the library do not directly use the functionality
+    provided by :cpp:class:`context`, instead they configure contexts, which are passed to
+    Arbor methods and types.
+
+.. cpp:function:: context make_context()
+
+    Local context that uses all detected threads and a GPU if any are available.
+
+.. cpp:function:: context make_context(proc_allocation alloc)
+
+    Local context that uses the local resources described by :cpp:var:`alloc`.
+
+.. cpp:function:: context make_context(proc_allocation alloc, MPI_Comm comm)
+
+    A context that uses the local resources described by :cpp:var:`alloc`, and
+    uses the MPI communicator :cpp:var:`comm` for distributed calculation.
+
+
+Here are some examples of how to create a :cpp:class:`arb::context`:
+
+    .. container:: example-code
+
+        .. code-block:: cpp
+
+            #include <arbor/context.hpp>
+
+            // Construct a non-distributed context that uses all detected available resources.
+            auto context = arb::make_context();
+
+            // Construct a context that:
+            //  * does not use a GPU, reguardless of whether one is available;
+            //  * uses 8 threads in its thread pool.
+            arb::proc_allocation resources(8, -1);
+            auto context = arb::make_context(resources);
 
-        By default it is assumed that there are no GPUs.
+            // Construct a context that:
+            //  * uses all available local hardware resources;
+            //  * uses the standard MPI communicator MPI_COMM_WORLD for distributed computation.
+            arb::proc_allocation resources; // defaults to all detected local resources
+            auto context = arb::make_context(resources, MPI_COMM_WORLD);
 
 Load Balancers
 --------------
@@ -91,12 +187,11 @@ describes the cell groups on the local MPI rank.
 
 .. cpp:namespace:: arb
 
-.. cpp:function:: domain_decomposition partition_load_balance(const recipe& rec, hw::node_info nd, const distributed_context* ctx)
+.. cpp:function:: domain_decomposition partition_load_balance(const recipe& rec, const arb::context& ctx)
 
     Construct a :cpp:class:`domain_decomposition` that distributes the cells
-    in the model described by :cpp:any:`rec` over the set of distributed
-    compute nodes that communicate using :cpp:any:`ctx`, with hardware resources
-    on the calling node described by :cpp:any:`nd`.
+    in the model described by :cpp:any:`rec` over the distributed and local hardware
+    resources described by :cpp:any:`ctx`.
 
     The algorithm counts the number of each cell type in the global model, then
     partitions the cells of each type equally over the available nodes.
diff --git a/doc/cpp_simulation.rst b/doc/cpp_simulation.rst
index a1b5d5f8f442b6c7e47791922c588d8fd0a5c0d0..525314f798335d2289475858afe254352f660805 100644
--- a/doc/cpp_simulation.rst
+++ b/doc/cpp_simulation.rst
@@ -10,37 +10,33 @@ To build a simulation the following are needed:
 
     * An :cpp:class:`arb::recipe` that describes the cells and connections
       in the model.
-    * An :cpp:class:`arb::hw::node_info` that describes the CPU and GPU hardware
-      resources on which the model will be run.
-    * An :cpp:class:`arb::distributed_context` that describes the distributed system
-      on which the model will run.
+    * An :cpp:class:`arb::context` used to execute the simulation.
 
 The workflow to build a simulation is to first generate a
 :cpp:class:`arb::domain_decomposition` that describes the distribution of the model
-over the local and distributed hardware resources (see :ref:`cppdomdec` and :ref:`cppdistcontext`),
+over the local and distributed hardware resources (see :ref:`cppdomdec`),
 then build the simulation.
 
 .. container:: example-code
 
     .. code-block:: cpp
 
-        // Get a communication context
-        arb::distributed_context context;
+        #include <arbor/context.hpp>
+        #include <arbor/domain_decomposition.hpp>
+        #include <arbor/simulation.hpp>
 
-        // Make description of the hardware that the simulation will run on.
-        arb::hw::node_info node;
-        node.num_cpu_cores = arb::threading::num_threads();
-        node.num_gpus = arb::hw::num_gpus()>0? 1: 0; // use 1 GPU if any available
+        // Get a communication context
+        arb::context context = make_context();
 
         // Make a recipe of user defined type my_recipe.
         my_recipe recipe;
 
         // Get a description of the partition the model over the cores
         // (and gpu if available) on node.
-        arb::domain_decomposition decomp = arb::partition_load_balance(recipe, node, &context);
+        arb::domain_decomposition decomp = arb::partition_load_balance(recipe, context);
 
         // Instatitate the simulation.
-        arb::simulation sim(recipe, decomp, &context);
+        arb::simulation sim(recipe, decomp, context);
 
 
 Class Documentation
@@ -59,8 +55,7 @@ Class Documentation
             *   an :cpp:class:`arb::recipe` that describes the model;
             *   an :cpp:class:`arb::domain_decomposition` that describes how the
                 cells in the model are assigned to hardware resources;
-            *   an :cpp:class:`arb::distributed_context` which performs communication
-                on distributed memory syustems.
+            *   an :cpp:class:`arb::context` which is used to execute the simulation.
         * **Experimental inputs** that can change between model runs, such
           as external spike trains.
 
@@ -81,7 +76,7 @@ Class Documentation
 
     **Constructor:**
 
-    .. cpp:function:: simulation(const recipe& rec, const domain_decomposition& decomp, const distributed_context* ctx)
+    .. cpp:function:: simulation(const recipe& rec, const domain_decomposition& decomp, const context& ctx)
 
     **Experimental inputs:**
 
diff --git a/doc/index.rst b/doc/index.rst
index f6de1c0fe9ce2e0292a61f3c07d5a168ba419ddf..0b4364d29e9be1044192ce4f068d837468698f1b 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -55,7 +55,6 @@ Some key features include:
    cpp_recipe
    cpp_domdec
    cpp_simulation
-   cpp_distributed_context
 
 .. toctree::
    :caption: Developers:
@@ -64,4 +63,5 @@ Some key features include:
    simd_api
    profiler
    sampling_api
+   cpp_distributed_context
 
diff --git a/example/bench/bench.cpp b/example/bench/bench.cpp
index a8c32991ecbf2ebde93e1e5586decef4218c915a..988b07b04c506f817609fe24bafd50959b1ca03b 100644
--- a/example/bench/bench.cpp
+++ b/example/bench/bench.cpp
@@ -9,8 +9,7 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/profile/meter_manager.hpp>
-#include <arbor/common_types.hpp>
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/profile/profiler.hpp>
@@ -31,16 +30,23 @@
 namespace profile = arb::profile;
 
 int main(int argc, char** argv) {
+    bool is_root = true;
+
     try {
-        arb::execution_context context;
 #ifdef ARB_MPI_ENABLED
         aux::with_mpi guard(argc, argv, false);
-        context.distributed = arb::mpi_context(MPI_COMM_WORLD);
+        auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD);
+        {
+            int rank = 0;
+            MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+            is_root = rank==0;
+        }
+#else
+        auto context = arb::make_context();
 #endif
 #ifdef ARB_PROFILE_ENABLED
-        profile::profiler_initialize(context.thread_pool);
+        profile::profiler_initialize(context);
 #endif
-        const bool is_root =  context.distributed->id()==0;
 
         std::cout << aux::mask_stream(is_root);
 
@@ -48,31 +54,30 @@ int main(int argc, char** argv) {
 
         std::cout << params << "\n";
 
-        profile::meter_manager meters(context.distributed);
-        meters.start();
+        profile::meter_manager meters;
+        meters.start(context);
 
         // Create an instance of our recipe.
         bench_recipe recipe(params);
-        meters.checkpoint("recipe-build");
+        meters.checkpoint("recipe-build", context);
 
         // Make the domain decomposition for the model
-        auto local = arb::local_allocation(context);
-        auto decomp = arb::partition_load_balance(recipe, local, context);
-        meters.checkpoint("domain-decomp");
+        auto decomp = arb::partition_load_balance(recipe, context);
+        meters.checkpoint("domain-decomp", context);
 
         // Construct the model.
         arb::simulation sim(recipe, decomp, context);
-        meters.checkpoint("model-build");
+        meters.checkpoint("model-build", context);
 
         // Run the simulation for 100 ms, with time steps of 0.01 ms.
         sim.run(params.duration, 0.01);
-        meters.checkpoint("model-run");
+        meters.checkpoint("model-run", context);
 
         // write meters
-        auto report = profile::make_meter_report(meters);
+        auto report = profile::make_meter_report(meters, context);
         std::cout << report << "\n";
 
-        if (is_root==0) {
+        if (is_root) {
             std::ofstream fid;
             fid.exceptions(std::ios_base::badbit | std::ios_base::failbit);
             fid.open("meters.json");
diff --git a/example/brunel/brunel_miniapp.cpp b/example/brunel/brunel_miniapp.cpp
index 9a377a810446e9863904dc9ed36b1015f16893bf..0cf109ed488a25a5d64a703f511913ae25046766 100644
--- a/example/brunel/brunel_miniapp.cpp
+++ b/example/brunel/brunel_miniapp.cpp
@@ -6,6 +6,7 @@
 #include <set>
 #include <vector>
 
+#include <arbor/context.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/event_generator.hpp>
@@ -24,13 +25,14 @@
 #include <aux/strsub.hpp>
 #ifdef ARB_MPI_ENABLED
 #include <aux/with_mpi.hpp>
+#include <mpi.h>
 #endif
 
 #include "io.hpp"
 
 using namespace arb;
 
-void banner(proc_allocation, const execution_context&);
+void banner(const context& ctx);
 
 // Samples m unique values in interval [start, end) - gid.
 // We exclude gid because we don't want self-loops.
@@ -183,25 +185,31 @@ private:
 };
 
 int main(int argc, char** argv) {
-    execution_context context;
+    bool root = true;
+    int rank = 0;
 
     try {
 #ifdef ARB_MPI_ENABLED
         aux::with_mpi guard(argc, argv, false);
-        context.distributed = mpi_context(MPI_COMM_WORLD);
-#endif
-#ifdef ARB_PROFILE_ENABLED
-        profile::profiler_initialize(context.thread_pool);
+        auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD);
+        {
+            MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+            root = rank==0;
+        }
+#else
+        auto context = arb::make_context();
 #endif
-        arb::profile::meter_manager meters(context.distributed);
-        meters.start();
-        std::cout << aux::mask_stream(context.distributed->id()==0);
+
+        std::cout << aux::mask_stream(root);
+        banner(context);
+
+        arb::profile::meter_manager meters;
+        meters.start(context);
+
         // read parameters
-        io::cl_options options = io::read_options(argc, argv, context.distributed->id()==0);
-        proc_allocation nd = local_allocation(context);
-        banner(nd, context);
+        io::cl_options options = io::read_options(argc, argv, root);
 
-        meters.checkpoint("setup");
+        meters.checkpoint("setup", context);
 
         // The size of excitatory population.
         cell_size_type nexc = options.nexc;
@@ -236,7 +244,7 @@ int main(int argc, char** argv) {
 
         partition_hint_map hints;
         hints[cell_kind::lif_neuron].cpu_group_size = group_size;
-        auto decomp = partition_load_balance(recipe, nd, context, hints);
+        auto decomp = partition_load_balance(recipe, context, hints);
 
         simulation sim(recipe, decomp, context);
 
@@ -245,7 +253,6 @@ int main(int argc, char** argv) {
         if (options.spike_file_output) {
             using std::ios_base;
 
-            auto rank = context.distributed->id();
             aux::path p = options.output_path;
             p /= aux::strsub("%_%.%", options.file_name, rank, options.file_extension);
 
@@ -259,20 +266,20 @@ int main(int argc, char** argv) {
             }
         }
 
-        meters.checkpoint("model-init");
+        meters.checkpoint("model-init", context);
 
         // run simulation
         sim.run(options.tfinal, options.dt);
 
-        meters.checkpoint("model-simulate");
+        meters.checkpoint("model-simulate", context);
 
         // output profile and diagnostic feedback
         std::cout << profile::profiler_summary() << "\n";
         std::cout << "\nThere were " << sim.num_spikes() << " spikes\n";
 
-        auto report = profile::make_meter_report(meters);
+        auto report = profile::make_meter_report(meters, context);
         std::cout << report;
-        if (context.distributed->id()==0) {
+        if (root) {
             std::ofstream fid;
             fid.exceptions(std::ios_base::badbit | std::ios_base::failbit);
             fid.open("meters.json");
@@ -281,7 +288,7 @@ int main(int argc, char** argv) {
     }
     catch (io::usage_error& e) {
         // only print usage/startup errors on master
-        std::cerr << aux::mask_stream(context.distributed->id()==0);
+        std::cerr << aux::mask_stream(root);
         std::cerr << e.what() << "\n";
         return 1;
     }
@@ -292,13 +299,12 @@ int main(int argc, char** argv) {
     return 0;
 }
 
-void banner(proc_allocation nd, const execution_context& ctx) {
+void banner(const context& ctx) {
     std::cout << "==========================================\n";
-    std::cout << "  Arbor miniapp\n";
-    std::cout << "  - distributed : " << ctx.distributed->size()
-              << " (" << ctx.distributed->name() << ")\n";
-    std::cout << "  - threads     : " << nd.num_threads << "\n";
-    std::cout << "  - gpus        : " << nd.num_gpus << "\n";
+    std::cout << "  Brunel model miniapp\n";
+    std::cout << "  - distributed : " << arb::num_ranks(ctx)
+              << (arb::has_mpi(ctx)? " (mpi)": " (serial)") << "\n";
+    std::cout << "  - threads     : " << arb::num_threads(ctx) << "\n";
+    std::cout << "  - gpus        : " << (arb::has_gpu(ctx)? "yes": "no") << "\n";
     std::cout << "==========================================\n";
 }
-
diff --git a/example/generators/event_gen.cpp b/example/generators/event_gen.cpp
index 6cd934d0f42e74682b5582b7ef76a95fe069e2dc..7ea031c489a00e3e922502eab4a71e41a2d40f53 100644
--- a/example/generators/event_gen.cpp
+++ b/example/generators/event_gen.cpp
@@ -12,8 +12,8 @@
 
 #include <nlohmann/json.hpp>
 
+#include <arbor/context.hpp>
 #include <arbor/common_types.hpp>
-#include <arbor/distributed_context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/event_generator.hpp>
 #include <arbor/load_balance.hpp>
@@ -127,14 +127,13 @@ int main() {
     // A distributed_context is required for distributed computation (e.g. MPI).
     // For this simple one-cell example, non-distributed context is suitable,
     // which is what we get with a default-constructed distributed_context.
-    arb::execution_context context;
+    auto context = arb::make_context();
 
     // Create an instance of our recipe.
     generator_recipe recipe;
 
     // Make the domain decomposition for the model
-    auto node = arb::local_allocation(context);
-    auto decomp = arb::partition_load_balance(recipe, node, context);
+    auto decomp = arb::partition_load_balance(recipe, context);
 
     // Construct the model.
     arb::simulation sim(recipe, decomp, context);
diff --git a/example/miniapp/miniapp.cpp b/example/miniapp/miniapp.cpp
index a84aedc18df4beab264bc55c78b0baf71a4c53cf..97530ed0cabff51a3d318ba6550581bf589becea 100644
--- a/example/miniapp/miniapp.cpp
+++ b/example/miniapp/miniapp.cpp
@@ -5,8 +5,7 @@
 #include <memory>
 #include <vector>
 
-#include <arbor/common_types.hpp>
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/profile/meter_manager.hpp>
@@ -25,6 +24,7 @@
 #include <aux/strsub.hpp>
 #ifdef ARB_MPI_ENABLED
 #include <aux/with_mpi.hpp>
+#include <mpi.h>
 #endif
 
 #include "io.hpp"
@@ -35,40 +35,45 @@ using namespace arb;
 
 using util::any_cast;
 
-void banner(proc_allocation, const execution_context&);
+void banner(const context&);
 std::unique_ptr<recipe> make_recipe(const io::cl_options&, const probe_distribution&);
 sample_trace make_trace(const probe_info& probe);
 std::fstream& open_or_throw(std::fstream& file, const aux::path& p, bool exclusive = false);
 void report_compartment_stats(const recipe&);
 
 int main(int argc, char** argv) {
-    // default serial context
-    execution_context context;
+    bool root = true;
+    int rank = 0;
 
     try {
 #ifdef ARB_MPI_ENABLED
         aux::with_mpi guard(argc, argv, false);
-        context.distributed = mpi_context(MPI_COMM_WORLD);
+        auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD);
+        {
+            MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+            root = rank==0;
+        }
+#else
+        auto context = arb::make_context();
 #endif
 #ifdef ARB_PROFILE_ENABLED
-        profile::profiler_initialize(context.thread_pool);
+        profile::profiler_initialize(context);
 #endif
-        profile::meter_manager meters(context.distributed);
-        meters.start();
+        profile::meter_manager meters;
+        meters.start(context);
+
+        std::cout << aux::mask_stream(root);
 
-        std::cout << aux::mask_stream(context.distributed->id()==0);
         // read parameters
-        io::cl_options options = io::read_options(argc, argv, context.distributed->id()==0);
+        io::cl_options options = io::read_options(argc, argv, root);
 
         // TODO: add dry run mode
 
         // Use a node description that uses the number of threads used by the
         // threading back end, and 1 gpu if available.
-        proc_allocation nd = local_allocation(context);
-        nd.num_gpus = nd.num_gpus>=1? 1: 0;
-        banner(nd, context);
+        banner(context);
 
-        meters.checkpoint("setup");
+        meters.checkpoint("setup", context);
 
         // determine what to attach probes to
         probe_distribution pdist;
@@ -80,7 +85,7 @@ int main(int argc, char** argv) {
             report_compartment_stats(*recipe);
         }
 
-        auto decomp = partition_load_balance(*recipe, nd, context);
+        auto decomp = partition_load_balance(*recipe, context);
         simulation sim(*recipe, decomp, context);
 
         // Set up samplers for probes on local cable cells, as requested
@@ -118,7 +123,6 @@ int main(int argc, char** argv) {
         if (options.spike_file_output) {
             using std::ios_base;
 
-            auto rank = context.distributed->id();
             aux::path p = options.output_path;
             p /= aux::strsub("%_%.%", options.file_name, rank, options.file_extension);
 
@@ -132,12 +136,12 @@ int main(int argc, char** argv) {
             }
         }
 
-        meters.checkpoint("model-init");
+        meters.checkpoint("model-init", context);
 
         // run model
         sim.run(options.tfinal, options.dt);
 
-        meters.checkpoint("model-simulate");
+        meters.checkpoint("model-simulate", context);
 
         // output profile and diagnostic feedback
         auto profile = profile::profiler_summary();
@@ -150,9 +154,9 @@ int main(int argc, char** argv) {
             write_trace(trace, options.trace_prefix);
         }
 
-        auto report = profile::make_meter_report(meters);
+        auto report = profile::make_meter_report(meters, context);
         std::cout << report;
-        if (context.distributed->id()==0) {
+        if (root) {
             std::ofstream fid;
             fid.exceptions(std::ios_base::badbit | std::ios_base::failbit);
             fid.open("meters.json");
@@ -161,7 +165,7 @@ int main(int argc, char** argv) {
     }
     catch (io::usage_error& e) {
         // only print usage/startup errors on master
-        std::cerr << aux::mask_stream(context.distributed->id()==0);
+        std::cerr << aux::mask_stream(root);
         std::cerr << e.what() << "\n";
         return 1;
     }
@@ -172,13 +176,13 @@ int main(int argc, char** argv) {
     return 0;
 }
 
-void banner(proc_allocation nd, const execution_context& ctx) {
+void banner(const context& ctx) {
     std::cout << "==========================================\n";
     std::cout << "  Arbor miniapp\n";
-    std::cout << "  - distributed : " << ctx.distributed->size()
-              << " (" << ctx.distributed->name() << ")\n";
-    std::cout << "  - threads     : " << nd.num_threads << "\n";
-    std::cout << "  - gpus        : " << nd.num_gpus << "\n";
+    std::cout << "  - distributed : " << arb::num_ranks(ctx)
+              << (arb::has_mpi(ctx)? " (mpi)": " (serial)") << "\n";
+    std::cout << "  - threads     : " << arb::num_threads(ctx) << "\n";
+    std::cout << "  - gpus        : " << (arb::has_gpu(ctx)? "yes": "no") << "\n";
     std::cout << "==========================================\n";
 }
 
diff --git a/example/ring/ring.cpp b/example/ring/ring.cpp
index 11caa577bdd361491f8804d3982c03fbc6e85ff4..30cd1c54f7efcea1bf608ee3fb40d670c6c8bb0e 100644
--- a/example/ring/ring.cpp
+++ b/example/ring/ring.cpp
@@ -11,19 +11,26 @@
 
 #include <arbor/assert_macro.hpp>
 #include <arbor/common_types.hpp>
-#include <arbor/distributed_context.hpp>
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/profile/meter_manager.hpp>
+#include <arbor/profile/profiler.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
 #include <arbor/recipe.hpp>
+#include <arbor/version.hpp>
 
+#include <aux/ioutil.hpp>
 #include <aux/json_meter.hpp>
 
 #include "parameters.hpp"
 
+#ifdef ARB_MPI_ENABLED
+#include <mpi.h>
+#include <aux/with_mpi.hpp>
+#endif
+
 using arb::cell_gid_type;
 using arb::cell_lid_type;
 using arb::cell_size_type;
@@ -106,25 +113,37 @@ private:
 };
 
 struct cell_stats {
-    using size_type = std::uint64_t;
+    using size_type = unsigned;
     size_type ncells = 0;
     size_type nsegs = 0;
     size_type ncomp = 0;
 
-    cell_stats(arb::distributed_context_handle ctx, arb::recipe& r) {
-        size_type nranks = ctx->size();
-        size_type rank = ctx->id();
+    cell_stats(arb::recipe& r) {
+#ifdef ARB_MPI_ENABLED
+        int nranks, rank;
+        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+        MPI_Comm_size(MPI_COMM_WORLD, &nranks);
         ncells = r.num_cells();
         size_type cells_per_rank = ncells/nranks;
         size_type b = rank*cells_per_rank;
         size_type e = (rank==nranks-1)? ncells: (rank+1)*cells_per_rank;
+        size_type nsegs_tmp = 0;
+        size_type ncomp_tmp = 0;
         for (size_type i=b; i<e; ++i) {
+            auto c = arb::util::any_cast<arb::mc_cell>(r.get_cell_description(i));
+            nsegs_tmp += c.num_segments();
+            ncomp_tmp += c.num_compartments();
+        }
+        MPI_Allreduce(&nsegs_tmp, &nsegs, 1, MPI_UNSIGNED, MPI_SUM, MPI_COMM_WORLD);
+        MPI_Allreduce(&ncomp_tmp, &ncomp, 1, MPI_UNSIGNED, MPI_SUM, MPI_COMM_WORLD);
+#else
+        ncells = r.num_cells();
+        for (size_type i=0; i<ncells; ++i) {
             auto c = arb::util::any_cast<arb::mc_cell>(r.get_cell_description(i));
             nsegs += c.num_segments();
             ncomp += c.num_compartments();
         }
-        nsegs = ctx->sum(nsegs);
-        ncomp = ctx->sum(ncomp);
+#endif
     }
 
     friend std::ostream& operator<<(std::ostream& o, const cell_stats& s) {
@@ -137,36 +156,44 @@ struct cell_stats {
 
 
 int main(int argc, char** argv) {
-    // default serial context
-    arb::execution_context context;
-
     try {
+        bool root = true;
+
 #ifdef ARB_MPI_ENABLED
         aux::with_mpi guard(argc, argv, false);
-        context.distributed = mpi_context(MPI_COMM_WORLD);
+        auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD);
+        {
+            int rank;
+            MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+            root = rank==0;
+        }
+#else
+        auto context = arb::make_context();
 #endif
+
 #ifdef ARB_PROFILE_ENABLED
-        profile::profiler_initialize(context.thread_pool);
+        arb::profile::profiler_initialize(context);
 #endif
 
-        const bool root = context.distributed->id()==0;
+        std::cout << aux::mask_stream(root);
+
+        // Print a banner with information about hardware configuration
+        std::cout << "gpu:      " << (has_gpu(context)? "yes": "no") << "\n";
+        std::cout << "threads:  " << num_threads(context) << "\n";
+        std::cout << "mpi:      " << (has_mpi(context)? "yes": "no") << "\n";
+        std::cout << "ranks:    " << num_ranks(context) << "\n" << std::endl;
 
         auto params = read_options(argc, argv);
 
-        arb::profile::meter_manager meters(context.distributed);
-        meters.start();
+        arb::profile::meter_manager meters;
+        meters.start(context);
 
         // Create an instance of our recipe.
         ring_recipe recipe(params.num_cells, params.cell, params.min_delay);
-        cell_stats stats(context.distributed, recipe);
+        cell_stats stats(recipe);
         std::cout << stats << "\n";
 
-        // Use a node description that uses the number of threads used by the
-        // threading back end, and 1 gpu if available.
-        arb::proc_allocation nd = arb::local_allocation(context);
-        nd.num_gpus = nd.num_gpus>=1? 1: 0;
-
-        auto decomp = arb::partition_load_balance(recipe, nd, context);
+        auto decomp = arb::partition_load_balance(recipe, context);
 
         // Construct the model.
         arb::simulation sim(recipe, decomp, context);
@@ -191,19 +218,20 @@ int main(int argc, char** argv) {
                 });
         }
 
-        meters.checkpoint("model-init");
+        meters.checkpoint("model-init", context);
 
+        std::cout << "running simulation" << std::endl;
         // Run the simulation for 100 ms, with time steps of 0.025 ms.
         sim.run(params.duration, 0.025);
 
-        meters.checkpoint("model-run");
+        meters.checkpoint("model-run", context);
 
         auto ns = sim.num_spikes();
-        std::cout << "\n" << ns << " spikes generated at rate of "
-                  << params.duration/ns << " ms between spikes\n";
 
         // Write spikes to file
         if (root) {
+            std::cout << "\n" << ns << " spikes generated at rate of "
+                      << params.duration/ns << " ms between spikes\n";
             std::ofstream fid("spikes.gdf");
             if (!fid.good()) {
                 std::cerr << "Warning: unable to open file spikes.gdf for spike output\n";
@@ -220,9 +248,9 @@ int main(int argc, char** argv) {
         }
 
         // Write the samples to a json file.
-        write_trace_json(voltage);
+        if (root) write_trace_json(voltage);
 
-        auto report = arb::profile::make_meter_report(meters);
+        auto report = arb::profile::make_meter_report(meters, context);
         std::cout << report;
     }
     catch (std::exception& e) {
diff --git a/include/arbor/context.hpp b/include/arbor/context.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d781896c8ccc657aa1b88a3501e84264acee7fbc
--- /dev/null
+++ b/include/arbor/context.hpp
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <memory>
+
+namespace arb {
+
+/// Summary of all available local computation resource.
+struct local_resources {
+    const unsigned num_threads;
+    const unsigned num_gpus;
+
+    local_resources(unsigned threads, unsigned gpus):
+        num_threads(threads),
+        num_gpus(gpus)
+    {}
+};
+
+/// Determine available local domain resources.
+local_resources get_local_resources();
+
+/// A subset of local computation resources to use in a computation.
+struct proc_allocation {
+    unsigned num_threads;
+
+    // The gpu id corresponds to the `int device` parameter used by CUDA API calls
+    // to identify gpu devices.
+    // Set to -1 to indicate that no GPU device is to be used.
+    // see CUDA documenation for cudaSetDevice and cudaDeviceGetAttribute 
+    int gpu_id;
+
+    // By default a proc_allocation will take all available threads and the
+    // GPU with id 0, if available.
+    proc_allocation() {
+        auto avail = get_local_resources();
+
+        // By default take all available threads.
+        num_threads = avail.num_threads;
+
+        // Take the first GPU, if available.
+        gpu_id = avail.num_gpus>0? 0: -1;
+    }
+
+    proc_allocation(unsigned threads, int gpu):
+        num_threads(threads),
+        gpu_id(gpu)
+    {}
+
+    bool has_gpu() const {
+        return gpu_id>=0;
+    }
+};
+
+// arb::execution_context is a container defined in the implementation for state
+// related to execution resources, specifically thread pools, gpus and MPI
+// communicators.
+
+// Forward declare execution_context.
+struct execution_context;
+
+// arb::context is an opaque handle for this container presented in the
+// public API.
+// It doesn't make sense to copy contexts, so we use a std::unique_ptr to
+// implement the handle with lifetime management.
+//
+// Because execution_context is an incomplete type, a destructor prototype must
+// be provided.
+using context = std::unique_ptr<execution_context, void (*)(execution_context*)>;
+
+
+// Helpers for creating contexts. These are implemented in the back end.
+
+// Non-distributed context that uses all detected threads and one GPU if available.
+context make_context();
+
+// Non-distributed context that uses resources described by resources
+context make_context(const proc_allocation& resources);
+
+// Distributed context that uses MPI communicator comm, and local resources
+// described by resources.
+template <typename Comm>
+context make_context(const proc_allocation& resources, Comm comm);
+
+// Queries for properties of execution resources in a context.
+
+bool has_gpu(const context&);
+unsigned num_threads(const context&);
+bool has_mpi(const context&);
+unsigned num_ranks(const context&);
+unsigned rank(const context&);
+
+}
diff --git a/include/arbor/domain_decomposition.hpp b/include/arbor/domain_decomposition.hpp
index 2aa14eafbb751401699e3cf7daf6493782017e0b..619526c5e6b55fbec6f2d1aca04c66b15780d148 100644
--- a/include/arbor/domain_decomposition.hpp
+++ b/include/arbor/domain_decomposition.hpp
@@ -6,19 +6,10 @@
 
 #include <arbor/assert.hpp>
 #include <arbor/common_types.hpp>
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 
 namespace arb {
 
-/// Local resource info for domain partitioning.
-struct proc_allocation {
-    unsigned num_threads = 1;
-    unsigned num_gpus = 0;
-};
-
-/// Determine available local domain resources.
-proc_allocation local_allocation(const execution_context& ctx);
-
 /// Metadata for a local cell group.
 struct group_description {
     /// The kind of cell in the group. All cells in a cell_group have the same type.
diff --git a/include/arbor/execution_context.hpp b/include/arbor/execution_context.hpp
deleted file mode 100644
index 3a457a2a92c579b5a6a89e3802ddd4c84710bb44..0000000000000000000000000000000000000000
--- a/include/arbor/execution_context.hpp
+++ /dev/null
@@ -1,35 +0,0 @@
-#pragma once
-
-#include <memory>
-#include <string>
-
-#include <arbor/distributed_context.hpp>
-#include <arbor/util/pp_util.hpp>
-
-
-namespace arb {
-namespace threading {
-    class task_system;
-}
-struct gpu_context;
-
-using task_system_handle = std::shared_ptr<threading::task_system>;
-using distributed_context_handle = std::shared_ptr<distributed_context>;
-using gpu_context_handle = std::shared_ptr<gpu_context>;
-
-task_system_handle make_thread_pool();
-task_system_handle make_thread_pool(int nthreads);
-
-gpu_context_handle make_gpu_context();
-
-struct execution_context {
-    distributed_context_handle distributed;
-    task_system_handle thread_pool;
-    gpu_context_handle gpu;
-
-    execution_context(): distributed(std::make_shared<distributed_context>()),
-                         thread_pool(arb::make_thread_pool()),
-                         gpu(arb::make_gpu_context()) {};
-};
-
-}
diff --git a/include/arbor/load_balance.hpp b/include/arbor/load_balance.hpp
index 004445cceb35c78c6bc47487640dc6b0e46fd9ab..8849f45c5a2e075eb16702d45b2fe92c0faf0f9b 100644
--- a/include/arbor/load_balance.hpp
+++ b/include/arbor/load_balance.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/recipe.hpp>
 
@@ -18,8 +18,7 @@ using partition_hint_map = std::unordered_map<cell_kind, partition_hint>;
 
 domain_decomposition partition_load_balance(
     const recipe& rec,
-    proc_allocation nd,
-    const execution_context& ctx,
+    const context& ctx,
     partition_hint_map hint_map = {});
 
 } // namespace arb
diff --git a/include/arbor/profile/meter_manager.hpp b/include/arbor/profile/meter_manager.hpp
index 454d8dffdf6cd2d48f09176de2a93bdd3a026c5d..c264660e7f306cc94c5c342796f1ddbe3b1687d3 100644
--- a/include/arbor/profile/meter_manager.hpp
+++ b/include/arbor/profile/meter_manager.hpp
@@ -4,7 +4,7 @@
 #include <string>
 #include <vector>
 
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 #include <arbor/profile/meter.hpp>
 #include <arbor/profile/timer.hpp>
 
@@ -25,7 +25,7 @@ struct measurement {
     std::string name;
     std::string units;
     std::vector<std::vector<double>> measurements;
-    measurement(std::string, std::string, const std::vector<double>&, const distributed_context_handle&);
+    measurement(std::string, std::string, const std::vector<double>&, const context&);
 };
 
 class meter_manager {
@@ -38,17 +38,16 @@ private:
     std::vector<std::unique_ptr<meter>> meters_;
     std::vector<std::string> checkpoint_names_;
 
-    distributed_context_handle glob_ctx_;
-
 public:
-    meter_manager(distributed_context_handle ctx);
-    void start();
-    void checkpoint(std::string name);
-    distributed_context_handle context() const;
+    meter_manager();
+    void start(const context& ctx);
+    void checkpoint(std::string name, const context& ctx);
 
     const std::vector<std::unique_ptr<meter>>& meters() const;
     const std::vector<std::string>& checkpoint_names() const;
     const std::vector<double>& times() const;
+
+    const context& ctx() const;
 };
 
 // Simple type for gathering distributed meter information
@@ -60,7 +59,7 @@ struct meter_report {
     std::vector<std::string> hosts;
 };
 
-meter_report make_meter_report(const meter_manager& manager);
+meter_report make_meter_report(const meter_manager& manager, const context& ctx);
 std::ostream& operator<<(std::ostream& o, const meter_report& report);
 
 } // namespace profile
diff --git a/include/arbor/profile/profiler.hpp b/include/arbor/profile/profiler.hpp
index fa8da040f7ae3ae4171afec667f510910b4b9970..9dbd2c61e910311f5b4fc4eb242d303957077109 100644
--- a/include/arbor/profile/profiler.hpp
+++ b/include/arbor/profile/profiler.hpp
@@ -5,10 +5,11 @@
 #include <unordered_map>
 #include <vector>
 
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 #include <arbor/profile/timer.hpp>
 
 namespace arb {
+
 namespace profile {
 
 // type used for region identifiers
@@ -33,7 +34,7 @@ struct profile {
 };
 
 void profiler_clear();
-void profiler_initialize(task_system_handle& ts);
+void profiler_initialize(context& ctx);
 void profiler_enter(std::size_t region_id);
 void profiler_leave();
 
diff --git a/include/arbor/simulation.hpp b/include/arbor/simulation.hpp
index 083409a655ee22e3d74f6f9b65da9761b7974883..bff327d1d4c2cbc2cb439eec843cb79bb1eda26b 100644
--- a/include/arbor/simulation.hpp
+++ b/include/arbor/simulation.hpp
@@ -6,11 +6,12 @@
 #include <vector>
 
 #include <arbor/common_types.hpp>
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/sampling.hpp>
 #include <arbor/schedule.hpp>
+#include <arbor/spike.hpp>
 #include <arbor/util/handle_set.hpp>
 
 namespace arb {
@@ -22,7 +23,7 @@ class simulation_state;
 
 class simulation {
 public:
-    simulation(const recipe& rec, const domain_decomposition& decomp, execution_context ctx);
+    simulation(const recipe& rec, const domain_decomposition& decomp, const context& ctx);
 
     void reset();
 
diff --git a/test/unit-distributed/distributed_listener.cpp b/test/unit-distributed/distributed_listener.cpp
index 1110dfeba0dcdd30889c368968e8e45cb861b65e..075d00528867077ebccaca70cc72a7de2ec0e9b2 100644
--- a/test/unit-distributed/distributed_listener.cpp
+++ b/test/unit-distributed/distributed_listener.cpp
@@ -25,10 +25,11 @@ distributed_listener::printer& operator<<(distributed_listener::printer& p, cons
     return p;
 }
 
-distributed_listener::distributed_listener(std::string f_base, arb::distributed_context_handle ctx):
+distributed_listener::distributed_listener(std::string f_base, const arb::context& ctx):
     context_(ctx),
-    rank_(context_->id()),
-    size_(context_->size()),
+    rank_(arb::rank(ctx)),
+    size_(arb::num_ranks(ctx)),
+    mpi_(arb::has_mpi(ctx)),
     emit_(std::move(f_base), rank_)
 {}
 
@@ -93,7 +94,13 @@ void distributed_listener::OnTestEnd(const TestInfo& test_info) {
     ++test_case_tests_;
 
     // count the number of ranks that had errors
-    int global_errors = context_->sum(test_failures_>0 ? 1 : 0);
+    int global_errors = test_failures_? 1: 0;
+#ifdef ARB_MPI_ENABLED
+    if (mpi_) {
+        int local_error = test_failures_? 1: 0;
+        MPI_Allreduce(&local_error, global_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
+    }
+#endif
     if (global_errors>0) {
         ++test_case_failures_;
         emit_ << "  GLOBAL_FAIL on " << global_errors << "ranks\n";
diff --git a/test/unit-distributed/distributed_listener.hpp b/test/unit-distributed/distributed_listener.hpp
index b94acea908d19cd86387079dd99984122d581f07..48c9a48748a8bbd78982f08f1804170d55459f4b 100644
--- a/test/unit-distributed/distributed_listener.hpp
+++ b/test/unit-distributed/distributed_listener.hpp
@@ -4,7 +4,7 @@
 #include <string>
 #include <utility>
 
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 
 #include "../gtest.h"
 
@@ -29,7 +29,7 @@ class distributed_listener: public testing::EmptyTestEventListener {
     using TestPartResult = testing::TestPartResult;
 
 public:
-    distributed_listener(std::string f_base, arb::distributed_context_handle ctx);
+    distributed_listener(std::string f_base, const arb::context &ctx);
 
     /// Messages that are printed at the start and end of the test program.
     /// i.e. once only.
@@ -57,16 +57,19 @@ private:
         std::ofstream fid_;
         bool cout_;
 
+        printer() = default;
         printer(std::string base_name, int rank);
     };
 
     template <typename T>
     friend printer& operator<<(printer&, const T&);
 
-    arb::distributed_context_handle context_;
+    const arb::context& context_;
     int rank_;
     int size_;
+    bool mpi_;
     printer emit_;
+
     int test_case_failures_;
     int test_case_tests_;
     int test_failures_;
diff --git a/test/unit-distributed/test.cpp b/test/unit-distributed/test.cpp
index 085a8990b2ee29e3e76df199a98ce7bd5816be89..4f116af81ee07a65b725447f79843f52ce82e03d 100644
--- a/test/unit-distributed/test.cpp
+++ b/test/unit-distributed/test.cpp
@@ -5,7 +5,7 @@
 
 #include "../gtest.h"
 
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 
 #include <aux/ioutil.hpp>
 #include <aux/tinyopt.hpp>
@@ -13,11 +13,16 @@
 #include <aux/with_mpi.hpp>
 #endif
 
+#include "distributed_context.hpp"
+#include "execution_context.hpp"
+
 #include "distributed_listener.hpp"
 
+#include "test.hpp"
+
 using namespace arb;
 
-execution_context g_context;
+context g_context = make_context();
 
 const char* usage_str =
 "[OPTION]...\n"
@@ -26,11 +31,14 @@ const char* usage_str =
 "  -h, --help          Display usage information and exit\n";
 
 int main(int argc, char **argv) {
+    proc_allocation alloc;
+    alloc.gpu_id = -1;
+
 #ifdef TEST_MPI
     aux::with_mpi guard(argc, argv, false);
-    g_context.distributed = mpi_context(MPI_COMM_WORLD);
+    g_context = arb::make_context(alloc, MPI_COMM_WORLD);
 #elif defined(TEST_LOCAL)
-    g_context.distributed = std::make_shared<distributed_context>(local_context());
+    g_context = arb::make_context(alloc);
 #else
 #error "define TEST_MPI or TEST_LOCAL for distributed test"
 #endif
@@ -42,7 +50,7 @@ int main(int argc, char **argv) {
     auto& listeners = testing::UnitTest::GetInstance()->listeners();
     // replace original printer with our custom printer
     delete listeners.Release(listeners.default_result_printer());
-    listeners.Append(new distributed_listener("run_"+g_context.distributed->name(), g_context.distributed));
+    listeners.Append(new distributed_listener("run_"+g_context->distributed->name(), g_context));
 
     int return_value = 0;
     try {
@@ -84,5 +92,5 @@ int main(int argc, char **argv) {
 
     // perform global collective, to ensure that all ranks return
     // the same exit code
-    return g_context.distributed->max(return_value);
+    return g_context->distributed->max(return_value);
 }
diff --git a/test/unit-distributed/test.hpp b/test/unit-distributed/test.hpp
index 630bd188258e8fd5a57346be3e2182faa1203e8a..3cb3cb38eff57e85b0e1f1d5753936f8c3bef413 100644
--- a/test/unit-distributed/test.hpp
+++ b/test/unit-distributed/test.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 
 // Global context is a global variable, set in the main() funtion of the main
 // test driver test.cpp.
-extern arb::execution_context g_context;
+extern arb::context g_context;
diff --git a/test/unit-distributed/test_communicator.cpp b/test/unit-distributed/test_communicator.cpp
index 14a5bc0b36b7b63e2426340dd6cb46e844ddb738..e40a13acee3b8906b9440a60663832973a7c9429 100644
--- a/test/unit-distributed/test_communicator.cpp
+++ b/test/unit-distributed/test_communicator.cpp
@@ -10,25 +10,23 @@
 #include <threading/threading.hpp>
 
 #include "communication/communicator.hpp"
+#include "execution_context.hpp"
 #include "util/filter.hpp"
 #include "util/rangeutil.hpp"
 #include "util/span.hpp"
 
 using namespace arb;
 
-static bool is_dry_run() {
-    //return global_policy::kind() == global_policy_kind::dryrun;
-    return false;
-}
-
 TEST(communicator, policy_basics) {
-    const auto num_domains = g_context.distributed->size();
-    const auto rank = g_context.distributed->id();
 
-    EXPECT_EQ(g_context.distributed->min(rank), 0);
-    if (!is_dry_run()) {
-        EXPECT_EQ(g_context.distributed->max(rank), num_domains-1);
-    }
+    const int num_domains = g_context->distributed->size();
+    const int rank = g_context->distributed->id();;
+
+    EXPECT_EQ((int)arb::num_ranks(g_context), num_domains);
+    EXPECT_EQ((int)arb::rank(g_context), rank);
+
+    EXPECT_EQ(g_context->distributed->min(rank), 0);
+    EXPECT_EQ(g_context->distributed->max(rank), num_domains-1);
 }
 
 // Wrappers for creating and testing spikes used
@@ -51,19 +49,11 @@ int get_value(const arb::spike& s) {
 // Test low level spike_gather function when each domain produces the same
 // number of spikes in the pattern used by dry run mode.
 TEST(communicator, gather_spikes_equal) {
-    const auto num_domains = g_context.distributed->size();
-    const auto rank = g_context.distributed->id();
+    const auto num_domains = g_context->distributed->size();
+    const auto rank = g_context->distributed->id();
 
     const auto n_local_spikes = 10;
 
-    /*
-    const auto n_local_cells = n_local_spikes;
-    // Important: set up meta-data in dry run back end.
-    if (is_dry_run()) {
-        g_context.set_sizes(g_context.size(), n_local_cells);
-    }
-    */
-
     // Create local spikes for communication.
     std::vector<spike> local_spikes;
     for (auto i=0; i<n_local_spikes; ++i) {
@@ -71,7 +61,7 @@ TEST(communicator, gather_spikes_equal) {
     }
 
     // Perform exchange
-    const auto global_spikes = g_context.distributed->gather_spikes(local_spikes);
+    const auto global_spikes = g_context->distributed->gather_spikes(local_spikes);
 
     // Test that partition information is correct
     const auto& part = global_spikes.partition();
@@ -91,29 +81,19 @@ TEST(communicator, gather_spikes_equal) {
     // is a list of num_domains*n_local_spikes spikes that have
     // contiguous source gid
     const auto& spikes = global_spikes.values();
-    EXPECT_EQ(n_local_spikes*g_context.distributed->size(), int(spikes.size()));
+    EXPECT_EQ(n_local_spikes*g_context->distributed->size(), int(spikes.size()));
     for (auto i=0u; i<spikes.size(); ++i) {
         const auto s = spikes[i];
         EXPECT_EQ(i, unsigned(s.source.gid));
-        if (is_dry_run()) {
-            EXPECT_EQ(0, get_value(s));
-        }
-        else {
-            EXPECT_EQ(int(i)/n_local_spikes, get_value(s));
-        }
+        EXPECT_EQ(int(i)/n_local_spikes, get_value(s));
     }
 }
 
 // Test low level spike_gather function when the number of spikes per domain
 // are not equal.
 TEST(communicator, gather_spikes_variant) {
-    // This test does not apply if in dry run mode.
-    // Because dry run mode requires that each domain have the same
-    // number of spikes.
-    if (is_dry_run()) return;
-
-    const auto num_domains = g_context.distributed->size();
-    const auto rank = g_context.distributed->id();
+    const auto num_domains = g_context->distributed->size();
+    const auto rank = g_context->distributed->id();
 
     // Parameter used to scale the number of spikes generated on successive
     // ranks.
@@ -137,7 +117,7 @@ TEST(communicator, gather_spikes_variant) {
     }
 
     // Perform exchange
-    const auto global_spikes = g_context.distributed->gather_spikes(local_spikes);
+    const auto global_spikes = g_context->distributed->gather_spikes(local_spikes);
 
     // Test that partition information is correct
     const auto& part =global_spikes.partition();
@@ -167,7 +147,7 @@ namespace {
     public:
         ring_recipe(cell_size_type s):
             size_(s),
-            ranks_(g_context.distributed->size())
+            ranks_(g_context->distributed->size())
         {}
 
         cell_size_type num_cells() const override {
@@ -231,7 +211,7 @@ namespace {
     public:
         all2all_recipe(cell_size_type s):
             size_(s),
-            ranks_(g_context.distributed->size())
+            ranks_(g_context->distributed->size())
         {}
 
         cell_size_type num_cells() const override {
@@ -314,10 +294,10 @@ test_ring(const domain_decomposition& D, communicator& C, F&& f) {
 
     // gather the global set of spikes
     auto global_spikes = C.exchange(local_spikes);
-    if (global_spikes.size()!=g_context.distributed->sum(local_spikes.size())) {
+    if (global_spikes.size()!=g_context->distributed->sum(local_spikes.size())) {
         return ::testing::AssertionFailure() << "the number of gathered spikes "
             << global_spikes.size() << " doesn't match the expected "
-            << g_context.distributed->sum(local_spikes.size());
+            << g_context->distributed->sum(local_spikes.size());
     }
 
     // generate the events
@@ -363,7 +343,7 @@ TEST(communicator, ring)
     using util::make_span;
 
     // construct a homogeneous network of 10*n_domain identical cells in a ring
-    unsigned N = g_context.distributed->size();
+    unsigned N = g_context->distributed->size();
 
     unsigned n_local = 10u;
     unsigned n_global = n_local*N;
@@ -371,8 +351,8 @@ TEST(communicator, ring)
     auto R = ring_recipe(n_global);
     // use a node decomposition that reflects the resources available
     // on the node that the test is running on, including gpus.
-    const auto D = partition_load_balance(R, local_allocation(g_context), g_context);
-    auto C = communicator(R, D, g_context);
+    const auto D = partition_load_balance(R, g_context);
+    auto C = communicator(R, D, *g_context);
 
     // every cell fires
     EXPECT_TRUE(test_ring(D, C, [](cell_gid_type g){return true;}));
@@ -405,10 +385,10 @@ test_all2all(const domain_decomposition& D, communicator& C, F&& f) {
 
     // gather the global set of spikes
     auto global_spikes = C.exchange(local_spikes);
-    if (global_spikes.size()!=g_context.distributed->sum(local_spikes.size())) {
+    if (global_spikes.size()!=g_context->distributed->sum(local_spikes.size())) {
         return ::testing::AssertionFailure() << "the number of gathered spikes "
             << global_spikes.size() << " doesn't match the expected "
-            << g_context.distributed->sum(local_spikes.size());
+            << g_context->distributed->sum(local_spikes.size());
     }
 
     // generate the events
@@ -458,7 +438,7 @@ TEST(communicator, all2all)
     using util::make_span;
 
     // construct a homogeneous network of 10*n_domain identical cells in a ring
-    unsigned N = g_context.distributed->size();
+    unsigned N = g_context->distributed->size();
 
     unsigned n_local = 10u;
     unsigned n_global = n_local*N;
@@ -466,8 +446,8 @@ TEST(communicator, all2all)
     auto R = all2all_recipe(n_global);
     // use a node decomposition that reflects the resources available
     // on the node that the test is running on, including gpus.
-    const auto D = partition_load_balance(R, local_allocation(g_context), g_context);
-    auto C = communicator(R, D, g_context);
+    const auto D = partition_load_balance(R, g_context);
+    auto C = communicator(R, D, *g_context);
 
     // every cell fires
     EXPECT_TRUE(test_all2all(D, C, [](cell_gid_type g){return true;}));
diff --git a/test/unit-distributed/test_domain_decomposition.cpp b/test/unit-distributed/test_domain_decomposition.cpp
index 2d513c1741040cd0c1c54070bcb0d6e998fc685e..43f3127f7772a680af3285333dfad03e10e198b7 100644
--- a/test/unit-distributed/test_domain_decomposition.cpp
+++ b/test/unit-distributed/test_domain_decomposition.cpp
@@ -7,14 +7,20 @@
 #include <string>
 #include <vector>
 
+#include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
+#include <arbor/version.hpp>
 
 #include "util/span.hpp"
 
 #include "../simple_recipes.hpp"
 #include "test.hpp"
 
+#ifdef TEST_MPI
+#include <mpi.h>
+#endif
+
 using namespace arb;
 
 namespace {
@@ -63,120 +69,150 @@ namespace {
     };
 }
 
-TEST(domain_decomposition, homogeneous_population) {
-    const auto N = g_context.distributed->size();
-    const auto I = g_context.distributed->id();
-
-    {   // Test on a node with 1 cpu core and no gpus.
-        // We assume that all cells will be put into cell groups of size 1.
-        // This assumption will not hold in the future, requiring and update to
-        // the test.
-        proc_allocation nd{1, 0};
-
-        // 10 cells per domain
-        unsigned n_local = 10;
-        unsigned n_global = n_local*N;
-        const auto D = partition_load_balance(homo_recipe(n_global, dummy_cell{}), nd, g_context);
-
-        EXPECT_EQ(D.num_global_cells, n_global);
-        EXPECT_EQ(D.num_local_cells, n_local);
-        EXPECT_EQ(D.groups.size(), n_local);
+TEST(domain_decomposition, homogeneous_population_mc) {
+    // Test on a node with 1 cpu core and no gpus.
+    // We assume that all cells will be put into cell groups of size 1.
+    // This assumption will not hold in the future, requiring and update to
+    // the test.
+    proc_allocation resources{1, -1};
+#ifdef ARB_TEST_MPI
+    auto ctx = make_context(resources, MPI_COMM_WORLD);
+#else
+    auto ctx = make_context(resources);
+#endif
+
+    const unsigned N = arb::num_ranks(ctx);
+    const unsigned I = arb::rank(ctx);
+
+    // 10 cells per domain
+    unsigned n_local = 10;
+    unsigned n_global = n_local*N;
+    const auto D = partition_load_balance(homo_recipe(n_global, dummy_cell{}), ctx);
+
+    EXPECT_EQ(D.num_global_cells, n_global);
+    EXPECT_EQ(D.num_local_cells, n_local);
+    EXPECT_EQ(D.groups.size(), n_local);
+
+    auto b = I*n_local;
+    auto e = (I+1)*n_local;
+    auto gids = util::make_span(b, e);
+    for (auto gid: gids) {
+        EXPECT_EQ(I, (unsigned)D.gid_domain(gid));
+    }
 
-        auto b = I*n_local;
-        auto e = (I+1)*n_local;
-        auto gids = util::make_span(b, e);
-        for (auto gid: gids) {
-            EXPECT_EQ(I, D.gid_domain(gid));
-        }
+    // Each cell group contains 1 cell of kind cable1d_neuron
+    // Each group should also be tagged for cpu execution
+    for (auto i: gids) {
+        auto local_group = i-b;
+        auto& grp = D.groups[local_group];
+        EXPECT_EQ(grp.gids.size(), 1u);
+        EXPECT_EQ(grp.gids.front(), unsigned(i));
+        EXPECT_EQ(grp.backend, backend_kind::multicore);
+        EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
+    }
+}
 
-        // Each cell group contains 1 cell of kind cable1d_neuron
-        // Each group should also be tagged for cpu execution
-        for (auto i: gids) {
-            auto local_group = i-b;
-            auto& grp = D.groups[local_group];
-            EXPECT_EQ(grp.gids.size(), 1u);
-            EXPECT_EQ(grp.gids.front(), unsigned(i));
-            EXPECT_EQ(grp.backend, backend_kind::multicore);
-            EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
-        }
+#ifdef ARB_GPU_ENABLED
+TEST(domain_decomposition, homogeneous_population_gpu) {
+    //  TODO: skip this test
+    //      * when the ability to skip tests at runtime is added to Google Test.
+    //      * when a GPU is not available
+    //  https://github.com/google/googletest/pull/1544
+
+    // Test on a node with 1 gpu and 1 cpu core.
+    // Assumes that all cells will be placed on gpu in a single group.
+
+    proc_allocation resources;
+    resources.num_threads = 1;
+#ifdef ARB_TEST_MPI
+    auto ctx = make_context(resources, MPI_COMM_WORLD);
+#else
+    auto ctx = make_context(resources);
+#endif
+
+    const unsigned N = arb::num_ranks(ctx);
+    const unsigned I = arb::rank(ctx);
+
+    if (!resources.has_gpu()) return; // Skip if no gpu available.
+
+    // 10 cells per domain
+    unsigned n_local = 10;
+    unsigned n_global = n_local*N;
+    const auto D = partition_load_balance(homo_recipe(n_global, dummy_cell{}), ctx);
+
+    EXPECT_EQ(D.num_global_cells, n_global);
+    EXPECT_EQ(D.num_local_cells, n_local);
+    EXPECT_EQ(D.groups.size(), 1u);
+
+    auto b = I*n_local;
+    auto e = (I+1)*n_local;
+    auto gids = util::make_span(b, e);
+    for (auto gid: gids) {
+        EXPECT_EQ(I, (unsigned)D.gid_domain(gid));
     }
-    {   // Test on a node with 1 gpu and 1 cpu core.
-        // Assumes that all cells will be placed on gpu in a single group.
-        proc_allocation nd{1, 1};
-
-        // 10 cells per domain
-        unsigned n_local = 10;
-        unsigned n_global = n_local*N;
-        const auto D = partition_load_balance(homo_recipe(n_global, dummy_cell{}), nd, g_context);
-
-        EXPECT_EQ(D.num_global_cells, n_global);
-        EXPECT_EQ(D.num_local_cells, n_local);
-        EXPECT_EQ(D.groups.size(), 1u);
-
-        auto b = I*n_local;
-        auto e = (I+1)*n_local;
-        auto gids = util::make_span(b, e);
-        for (auto gid: gids) {
-            EXPECT_EQ(I, D.gid_domain(gid));
-        }
 
-        // Each cell group contains 1 cell of kind cable1d_neuron
-        // Each group should also be tagged for cpu execution
-        auto grp = D.groups[0u];
+    // Each cell group contains 1 cell of kind cable1d_neuron
+    // Each group should also be tagged for cpu execution
+    auto grp = D.groups[0u];
 
-        EXPECT_EQ(grp.gids.size(), n_local);
-        EXPECT_EQ(grp.gids.front(), b);
-        EXPECT_EQ(grp.gids.back(), e-1);
-        EXPECT_EQ(grp.backend, backend_kind::gpu);
-        EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
-    }
+    EXPECT_EQ(grp.gids.size(), n_local);
+    EXPECT_EQ(grp.gids.front(), b);
+    EXPECT_EQ(grp.gids.back(), e-1);
+    EXPECT_EQ(grp.backend, backend_kind::gpu);
+    EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
 }
+#endif
 
 TEST(domain_decomposition, heterogeneous_population) {
-    const auto N = g_context.distributed->size();
-    const auto I = g_context.distributed->id();
-
-    {   // Test on a node with 1 cpu core and no gpus.
-        // We assume that all cells will be put into cell groups of size 1.
-        // This assumption will not hold in the future, requiring and update to
-        // the test.
-        proc_allocation nd{1, 0};
-
-        // 10 cells per domain
-        const unsigned n_local = 10;
-        const unsigned n_global = n_local*N;
-        const unsigned n_local_grps = n_local; // 1 cell per group
-        auto R = hetero_recipe(n_global);
-        const auto D = partition_load_balance(R, nd, g_context);
-
-        EXPECT_EQ(D.num_global_cells, n_global);
-        EXPECT_EQ(D.num_local_cells, n_local);
-        EXPECT_EQ(D.groups.size(), n_local);
-
-        auto b = I*n_local;
-        auto e = (I+1)*n_local;
-        auto gids = util::make_span(b, e);
-        for (auto gid: gids) {
-            EXPECT_EQ(I, D.gid_domain(gid));
-        }
+    // Test on a node with 1 cpu core and no gpus.
+    // We assume that all cells will be put into cell groups of size 1.
+    // This assumption will not hold in the future, requiring and update to
+    // the test.
+    proc_allocation resources{1, -1};
+#ifdef ARB_TEST_MPI
+    auto ctx = make_context(resources, MPI_COMM_WORLD);
+#else
+    auto ctx = make_context(resources);
+#endif
+
+    const auto N = arb::num_ranks(ctx);
+    const auto I = arb::rank(ctx);
+
+
+    // 10 cells per domain
+    const unsigned n_local = 10;
+    const unsigned n_global = n_local*N;
+    const unsigned n_local_grps = n_local; // 1 cell per group
+    auto R = hetero_recipe(n_global);
+    const auto D = partition_load_balance(R, ctx);
+
+    EXPECT_EQ(D.num_global_cells, n_global);
+    EXPECT_EQ(D.num_local_cells, n_local);
+    EXPECT_EQ(D.groups.size(), n_local);
+
+    auto b = I*n_local;
+    auto e = (I+1)*n_local;
+    auto gids = util::make_span(b, e);
+    for (auto gid: gids) {
+        EXPECT_EQ(I, (unsigned)D.gid_domain(gid));
+    }
 
-        // Each cell group contains 1 cell of kind cable1d_neuron
-        // Each group should also be tagged for cpu execution
-        auto grps = util::make_span(0, n_local_grps);
-        std::map<cell_kind, std::set<cell_gid_type>> kind_lists;
-        for (auto i: grps) {
-            auto& grp = D.groups[i];
-            EXPECT_EQ(grp.gids.size(), 1u);
-            kind_lists[grp.kind].insert(grp.gids.front());
-            EXPECT_EQ(grp.backend, backend_kind::multicore);
-        }
+    // Each cell group contains 1 cell of kind cable1d_neuron
+    // Each group should also be tagged for cpu execution
+    auto grps = util::make_span(0, n_local_grps);
+    std::map<cell_kind, std::set<cell_gid_type>> kind_lists;
+    for (auto i: grps) {
+        auto& grp = D.groups[i];
+        EXPECT_EQ(grp.gids.size(), 1u);
+        kind_lists[grp.kind].insert(grp.gids.front());
+        EXPECT_EQ(grp.backend, backend_kind::multicore);
+    }
 
-        for (auto k: {cell_kind::cable1d_neuron, cell_kind::spike_source}) {
-            const auto& gids = kind_lists[k];
-            EXPECT_EQ(gids.size(), n_local/2);
-            for (auto gid: gids) {
-                EXPECT_EQ(k, R.get_cell_kind(gid));
-            }
+    for (auto k: {cell_kind::cable1d_neuron, cell_kind::spike_source}) {
+        const auto& gids = kind_lists[k];
+        EXPECT_EQ(gids.size(), n_local/2);
+        for (auto gid: gids) {
+            EXPECT_EQ(k, R.get_cell_kind(gid));
         }
     }
 }
diff --git a/test/unit/test_backend.cpp b/test/unit/test_backend.cpp
index 48f5836c2e90cb1b979f636eda7bc6d3086aade6..b43b7554284b2031cc768fa910608737a1f9db5e 100644
--- a/test/unit/test_backend.cpp
+++ b/test/unit/test_backend.cpp
@@ -1,6 +1,7 @@
 #include <arbor/common_types.hpp>
 #include <arbor/version.hpp>
 
+#include "execution_context.hpp"
 #include "fvm_lowered_cell.hpp"
 
 #include "../gtest.h"
diff --git a/test/unit/test_domain_decomposition.cpp b/test/unit/test_domain_decomposition.cpp
index ac8b4a2d6252d4f6591674a1c0975a05251d3028..a35cef75753a0d221b65dd389ed8761e2095a749 100644
--- a/test/unit/test_domain_decomposition.cpp
+++ b/test/unit/test_domain_decomposition.cpp
@@ -2,7 +2,7 @@
 
 #include <stdexcept>
 
-#include <arbor/execution_context.hpp>
+#include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
 
@@ -13,6 +13,17 @@
 using namespace arb;
 using arb::util::make_span;
 
+// TODO
+// The tests here will only test domain decomposition with GPUs when compiled
+// with CUDA support and run on a system with a GPU.
+// Ideally the tests should test domain decompositions under all conditions, however
+// to do that we have to refactor the partition_load_balance algorithm.
+// The partition_load_balance performs the decomposition to distribute
+// over resources described by the user-supplied arb::context, which is a
+// provides an interface to resources available at runtime.
+// The best way to test under all conditions, is probably to refactor the
+// partition_load_balance into components that can be tested in isolation.
+
 namespace {
     // Dummy recipes types for testing.
 
@@ -48,42 +59,16 @@ namespace {
 // test assumes one domain
 TEST(domain_decomposition, homogenous_population)
 {
-    execution_context context;
-
-    {   // Test on a node with 1 cpu core and no gpus.
-        // We assume that all cells will be put into cell groups of size 1.
-        // This assumption will not hold in the future, requiring and update to
-        // the test.
-        proc_allocation nd{1, 0};
-
-        unsigned num_cells = 10;
-        const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), nd, context);
-
-        EXPECT_EQ(D.num_global_cells, num_cells);
-        EXPECT_EQ(D.num_local_cells, num_cells);
-        EXPECT_EQ(D.groups.size(), num_cells);
+    proc_allocation resources;
+    resources.num_threads = 1;
 
-        auto gids = make_span(num_cells);
-        for (auto gid: gids) {
-            EXPECT_EQ(0, D.gid_domain(gid));
-        }
-
-        // Each cell group contains 1 cell of kind cable1d_neuron
-        // Each group should also be tagged for cpu execution
-        for (auto i: gids) {
-            auto& grp = D.groups[i];
-            EXPECT_EQ(grp.gids.size(), 1u);
-            EXPECT_EQ(grp.gids.front(), unsigned(i));
-            EXPECT_EQ(grp.backend, backend_kind::multicore);
-            EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
-        }
-    }
-    {   // Test on a node with 1 gpu and 1 cpu core.
+    if (resources.has_gpu()) {
+        // Test on a node with 1 gpu and 1 cpu core.
         // Assumes that all cells will be placed on gpu in a single group.
-        proc_allocation nd{1, 1};
+        auto ctx = make_context(resources);
 
         unsigned num_cells = 10;
-        const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), nd, context);
+        const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), ctx);
 
         EXPECT_EQ(D.num_global_cells, num_cells);
         EXPECT_EQ(D.num_local_cells, num_cells);
@@ -104,21 +89,17 @@ TEST(domain_decomposition, homogenous_population)
         EXPECT_EQ(grp.backend, backend_kind::gpu);
         EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
     }
-}
+    {
+        resources.gpu_id = -1; // disable GPU if available
+        auto ctx = make_context(resources);
 
-TEST(domain_decomposition, heterogenous_population)
-{
-    execution_context context;
-
-    {   // Test on a node with 1 cpu core and no gpus.
+        // Test on a node with 1 cpu core and no gpus.
         // We assume that all cells will be put into cell groups of size 1.
         // This assumption will not hold in the future, requiring and update to
         // the test.
-        proc_allocation nd{1, 0};
 
         unsigned num_cells = 10;
-        auto R = hetero_recipe(num_cells);
-        const auto D = partition_load_balance(R, nd, context);
+        const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), ctx);
 
         EXPECT_EQ(D.num_global_cells, num_cells);
         EXPECT_EQ(D.num_local_cells, num_cells);
@@ -131,32 +112,30 @@ TEST(domain_decomposition, heterogenous_population)
 
         // Each cell group contains 1 cell of kind cable1d_neuron
         // Each group should also be tagged for cpu execution
-        auto grps = make_span(num_cells);
-        std::map<cell_kind, std::set<cell_gid_type>> kind_lists;
-        for (auto i: grps) {
+        for (auto i: gids) {
             auto& grp = D.groups[i];
             EXPECT_EQ(grp.gids.size(), 1u);
-            auto k = grp.kind;
-            kind_lists[k].insert(grp.gids.front());
+            EXPECT_EQ(grp.gids.front(), unsigned(i));
             EXPECT_EQ(grp.backend, backend_kind::multicore);
-        }
-
-        for (auto k: {cell_kind::cable1d_neuron, cell_kind::spike_source}) {
-            const auto& gids = kind_lists[k];
-            EXPECT_EQ(gids.size(), num_cells/2);
-            for (auto gid: gids) {
-                EXPECT_EQ(k, R.get_cell_kind(gid));
-            }
+            EXPECT_EQ(grp.kind, cell_kind::cable1d_neuron);
         }
     }
-    {   // Test on a node with 1 gpu and 1 cpu core.
+}
+
+TEST(domain_decomposition, heterogenous_population)
+{
+    proc_allocation resources;
+    resources.num_threads = 1;
+
+    if (resources.has_gpu()) {
+        // Test on a node with 1 gpu and 1 cpu core.
         // Assumes that calble cells are on gpu in a single group, and
         // rff cells are on cpu in cell groups of size 1
-        proc_allocation nd{1, 1};
+        auto ctx = make_context(resources);
 
         unsigned num_cells = 10;
         auto R = hetero_recipe(num_cells);
-        const auto D = partition_load_balance(R, nd, context);
+        const auto D = partition_load_balance(R, ctx);
 
         EXPECT_EQ(D.num_global_cells, num_cells);
         EXPECT_EQ(D.num_local_cells, num_cells);
@@ -187,13 +166,58 @@ TEST(domain_decomposition, heterogenous_population)
         }
         EXPECT_EQ(num_cells, ncells);
     }
+    {
+        // Test on a node with 1 cpu core and no gpus.
+        // We assume that all cells will be put into cell groups of size 1.
+        // This assumption will not hold in the future, requiring and update to
+        // the test.
+
+        resources.gpu_id = -1; // disable GPU if available
+        auto ctx = make_context(resources);
+
+        unsigned num_cells = 10;
+        auto R = hetero_recipe(num_cells);
+        const auto D = partition_load_balance(R, ctx);
+
+        EXPECT_EQ(D.num_global_cells, num_cells);
+        EXPECT_EQ(D.num_local_cells, num_cells);
+        EXPECT_EQ(D.groups.size(), num_cells);
+
+        auto gids = make_span(num_cells);
+        for (auto gid: gids) {
+            EXPECT_EQ(0, D.gid_domain(gid));
+        }
+
+        // Each cell group contains 1 cell of kind cable1d_neuron
+        // Each group should also be tagged for cpu execution
+        auto grps = make_span(num_cells);
+        std::map<cell_kind, std::set<cell_gid_type>> kind_lists;
+        for (auto i: grps) {
+            auto& grp = D.groups[i];
+            EXPECT_EQ(grp.gids.size(), 1u);
+            auto k = grp.kind;
+            kind_lists[k].insert(grp.gids.front());
+            EXPECT_EQ(grp.backend, backend_kind::multicore);
+        }
+
+        for (auto k: {cell_kind::cable1d_neuron, cell_kind::spike_source}) {
+            const auto& gids = kind_lists[k];
+            EXPECT_EQ(gids.size(), num_cells/2);
+            for (auto gid: gids) {
+                EXPECT_EQ(k, R.get_cell_kind(gid));
+            }
+        }
+    }
 }
 
 TEST(domain_decomposition, hints) {
     // Check that we can provide group size hint and gpu/cpu preference
     // by cell kind.
+    // The hints perfer the multicore backend, so the decomposition is expected
+    // to never have cell groups on the GPU, regardless of whether a GPU is
+    // available or not.
 
-    execution_context context;
+    auto ctx = make_context();
 
     partition_hint_map hints;
     hints[cell_kind::cable1d_neuron].cpu_group_size = 3;
@@ -202,8 +226,7 @@ TEST(domain_decomposition, hints) {
 
     domain_decomposition D = partition_load_balance(
         hetero_recipe(20),
-        proc_allocation{16, 1}, // 16 threads, 1 gpu.
-        context,
+        ctx,
         hints);
 
     std::vector<std::vector<cell_gid_type>> expected_c1d_groups =
diff --git a/test/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp
index 9001ef0c3be38d5bb1f0a7e58e0cead6721d7f73..80e73927bb5483d3a551a784488eaf0c546eab72 100644
--- a/test/unit/test_fvm_lowered.cpp
+++ b/test/unit/test_fvm_lowered.cpp
@@ -4,7 +4,6 @@
 #include "../gtest.h"
 
 #include <arbor/common_types.hpp>
-#include <arbor/distributed_context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/fvm_types.hpp>
 #include <arbor/load_balance.hpp>
@@ -19,6 +18,7 @@
 #include "algorithms.hpp"
 #include "backends/multicore/fvm.hpp"
 #include "backends/multicore/mechanism.hpp"
+#include "execution_context.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "fvm_lowered_cell_impl.hpp"
 #include "sampler_map.hpp"
@@ -256,8 +256,6 @@ TEST(fvm_lowered, derived_mechs) {
     //
     // 3. Cell with both test_kin1 and custom_kin1.
 
-    execution_context context;
-
     std::vector<mc_cell> cells(3);
     for (int i = 0; i<3; ++i) {
         mc_cell& c = cells[i];
@@ -298,6 +296,7 @@ TEST(fvm_lowered, derived_mechs) {
         std::vector<target_handle> targets;
         probe_association_map<probe_handle> probe_map;
 
+        execution_context context;
         fvm_cell fvcell(context);
         fvcell.initialize({0, 1, 2}, rec, targets, probe_map);
 
@@ -318,7 +317,6 @@ TEST(fvm_lowered, derived_mechs) {
         util::sort(tau_values);
         EXPECT_EQ(fvec({10., 20.}), tau_values);
     }
-
     {
         // Test dynamics:
         // 1. Current at same point on cell 0 at time 10 ms should equal that
@@ -336,9 +334,9 @@ TEST(fvm_lowered, derived_mechs) {
 
         float times[] = {10.f, 20.f};
 
-        execution_context context;
-        auto decomp = partition_load_balance(rec, proc_allocation{1, 0}, context);
-        simulation sim(rec, decomp, context);
+        auto ctx = make_context();
+        auto decomp = partition_load_balance(rec, ctx);
+        simulation sim(rec, decomp, ctx);
         sim.add_sampler(all_probes, explicit_schedule(times), sampler);
         sim.run(30.0, 1.f/1024);
 
@@ -354,6 +352,7 @@ TEST(fvm_lowered, derived_mechs) {
     }
 }
 
+
 // Test area-weighted linear combination of ion species concentrations
 
 TEST(fvm_lowered, weighted_write_ion) {
diff --git a/test/unit/test_gpu_stack.cu b/test/unit/test_gpu_stack.cu
index 9e0235dd2c4a047c1b869575a29beddbfabace4a..e75f8c3b08f174a34f7f06976a32d5d43690d9ac 100644
--- a/test/unit/test_gpu_stack.cu
+++ b/test/unit/test_gpu_stack.cu
@@ -1,17 +1,19 @@
 #include "../gtest.h"
 
-#include <backends/gpu/stack.hpp>
-#include <backends/gpu/stack_cu.hpp>
-#include <backends/gpu/managed_ptr.hpp>
-#include <arbor/execution_context.hpp>
+#include "backends/gpu/stack.hpp"
+#include "backends/gpu/stack_cu.hpp"
+#include "backends/gpu/managed_ptr.hpp"
+#include "gpu_context.hpp"
 
 using namespace arb;
 
 TEST(stack, construction) {
     using T = int;
 
-    execution_context context;
-    gpu::stack<T> s(10, context.gpu);
+    auto context = make_gpu_context(0);
+    if (!context->has_gpu()) return;
+
+    gpu::stack<T> s(10, context);
 
     EXPECT_EQ(0u, s.size());
     EXPECT_EQ(10u, s.capacity());
@@ -53,11 +55,12 @@ TEST(stack, push_back) {
     using T = int;
     using stack = gpu::stack<T>;
 
-    execution_context context;
+    auto context = make_gpu_context(0);
+    if (!context->has_gpu()) return;
 
     const unsigned n = 10;
     EXPECT_TRUE(n%2 == 0); // require n is even for tests to work
-    auto s = stack(n, context.gpu);
+    auto s = stack(n, context);
     auto& sstorage = s.storage();
 
     kernels::push_back<<<1, n>>>(sstorage, kernels::all_ftor());
@@ -88,10 +91,11 @@ TEST(stack, overflow) {
     using T = int;
     using stack = gpu::stack<T>;
 
-    execution_context context;
+    auto context = make_gpu_context(0);
+    if (!context->has_gpu()) return;
 
     const unsigned n = 10;
-    auto s = stack(n, context.gpu);
+    auto s = stack(n, context);
     auto& sstorage = s.storage();
     EXPECT_FALSE(s.overflow());
 
@@ -107,9 +111,10 @@ TEST(stack, empty) {
     using T = int;
     using stack = gpu::stack<T>;
 
-    execution_context context;
+    auto context = make_gpu_context(0);
+    if (!context->has_gpu()) return;
 
-    stack s(0u, context.gpu);
+    stack s(0u, context);
 
     EXPECT_EQ(s.size(), 0u);
     EXPECT_EQ(s.capacity(), 0u);
diff --git a/test/unit/test_lif_cell_group.cpp b/test/unit/test_lif_cell_group.cpp
index 51283529dbdcc50bb814e8740077e8872793b57d..7f0e8d839fbb3f93d47ad3155c1949e8635e4684 100644
--- a/test/unit/test_lif_cell_group.cpp
+++ b/test/unit/test_lif_cell_group.cpp
@@ -154,10 +154,9 @@ TEST(lif_cell_group, spikes) {
     // make two lif cells
     path_recipe recipe(2, 1000, 0.1);
 
-    execution_context context;
-    proc_allocation nd = local_allocation(context);
+    auto context = make_context();
 
-    auto decomp = partition_load_balance(recipe, nd, context);
+    auto decomp = partition_load_balance(recipe, context);
     simulation sim(recipe, decomp, context);
 
     std::vector<spike_event> events;
@@ -193,10 +192,9 @@ TEST(lif_cell_group, ring)
     // Total simulation time.
     time_type simulation_time = 100;
 
-    execution_context context;
-    proc_allocation nd = local_allocation(context);
+    auto context = make_context();
     auto recipe = ring_recipe(num_lif_cells, weight, delay);
-    auto decomp = partition_load_balance(recipe, nd, context);
+    auto decomp = partition_load_balance(recipe, context);
 
     // Creates a simulation with a ring recipe of lif neurons
     simulation sim(recipe, decomp, context);
diff --git a/test/unit/test_local_context.cpp b/test/unit/test_local_context.cpp
index 7ac207d8950ca98526a6a21b15daa97bc693124b..0f42e206f22eceefb672039097c32a5e278aa50e 100644
--- a/test/unit/test_local_context.cpp
+++ b/test/unit/test_local_context.cpp
@@ -1,8 +1,8 @@
 #include <vector>
 
 #include "../gtest.h"
+#include "distributed_context.hpp"
 
-#include <arbor/distributed_context.hpp>
 #include <arbor/spike.hpp>
 
 // Test that there are no errors constructing a distributed_context from a local_context
diff --git a/test/unit/test_mc_cell_group_gpu.cpp b/test/unit/test_mc_cell_group_gpu.cpp
index ad15d437467cfb915e2f41346adfdecb2157e180..7628dce147c61966aa77bab1cdbb24cbc62f6d65 100644
--- a/test/unit/test_mc_cell_group_gpu.cpp
+++ b/test/unit/test_mc_cell_group_gpu.cpp
@@ -3,6 +3,7 @@
 #include <arbor/common_types.hpp>
 
 #include "epoch.hpp"
+#include "execution_context.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "mc_cell_group.hpp"
 
diff --git a/test/unit/test_simd.cpp b/test/unit/test_simd.cpp
index 8b37db6503e5e4b9dc71ee3384a7a27d69b3f9c2..40c05587b99f08fe0b3507c935933fd41406cfda 100644
--- a/test/unit/test_simd.cpp
+++ b/test/unit/test_simd.cpp
@@ -706,6 +706,11 @@ TYPED_TEST_P(simd_fp_value, fp_maths) {
         pow(simd(u), simd(v)).copy_to(r);
         EXPECT_TRUE(testing::seq_almost_eq<fp>(pow_u_v_int, r));
     }
+
+    // The tests can cause floating point exceptions, which may set errno to nonzero
+    // value.
+    // Reset errno so that subsequent tests are not affected.
+    errno = 0;
 }
 
 // Check special function behaviour for specific values including
diff --git a/test/unit/test_spike_store.cpp b/test/unit/test_spike_store.cpp
index 11bd112344223793692ff93081cb4175dfe12dd6..61c01340d88308ff35e7e4454f5d17ef2e18e080 100644
--- a/test/unit/test_spike_store.cpp
+++ b/test/unit/test_spike_store.cpp
@@ -1,8 +1,8 @@
 #include "../gtest.h"
 
 #include <arbor/spike.hpp>
-#include <arbor/execution_context.hpp>
 
+#include "execution_context.hpp"
 #include "thread_private_spike_store.hpp"
 
 using arb::spike;
diff --git a/test/unit/test_thread.cpp b/test/unit/test_thread.cpp
index ea3c0b4eb06a4120f7ec3e6524cb1d2cc04dcf14..e1f0b2ed1117244ba919b349976191acd8fc908b 100644
--- a/test/unit/test_thread.cpp
+++ b/test/unit/test_thread.cpp
@@ -1,6 +1,5 @@
 #include "../gtest.h"
 #include "common.hpp"
-#include <arbor/execution_context.hpp>
 
 #include <iostream>
 #include <ostream>
@@ -8,6 +7,7 @@
 #include <arbor/version.hpp>
 
 #include "threading/threading.hpp"
+#include "threading/enumerable_thread_specific.hpp"
 
 using namespace arb::threading::impl;
 using namespace arb::threading;
diff --git a/test/validation/validate_ball_and_stick.cpp b/test/validation/validate_ball_and_stick.cpp
index 8d3691d2b9a25a9fee40daade18fbb8b092ccfc4..2a047354bf3efa80577a11c40e52955f4a087c8d 100644
--- a/test/validation/validate_ball_and_stick.cpp
+++ b/test/validation/validate_ball_and_stick.cpp
@@ -3,7 +3,9 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
+#include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
+#include <arbor/context.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/recipe.hpp>
@@ -32,7 +34,7 @@ template <typename ProbePointSeq>
 void run_ncomp_convergence_test(
     const char* model_name,
     const aux::path& ref_data_path,
-    backend_kind backend,
+    context& context,
     const mc_cell& c,
     ProbePointSeq& probe_points,
     float t_end=100.f)
@@ -49,7 +51,7 @@ void run_ncomp_convergence_test(
         {"dt", dt},
         {"sim", "arbor"},
         {"units", "mV"},
-        {"backend_kind", to_string(backend)}
+        {"backend_kind", (has_gpu(context)? "gpu": "multicore")}
     };
 
     auto exclude = stimulus_ends(c);
@@ -64,10 +66,6 @@ void run_ncomp_convergence_test(
     convergence_test_runner<int> runner("ncomp", plabels, meta);
     runner.load_reference_data(ref_data_path);
 
-    execution_context context;
-    proc_allocation nd;
-    nd.num_gpus = (backend==backend_kind::gpu);
-
     for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) {
         for (auto& seg: c.segments()) {
             if (!seg->is_soma()) {
@@ -79,7 +77,7 @@ void run_ncomp_convergence_test(
             rec.add_probe(0, 0, cell_probe_address{p.where, cell_probe_address::membrane_voltage});
         }
 
-        auto decomp = partition_load_balance(rec, nd, context);
+        auto decomp = partition_load_balance(rec, context);
         simulation sim(rec, decomp, context);
 
         runner.run(sim, ncomp, sample_dt, t_end, dt, exclude);
@@ -88,7 +86,7 @@ void run_ncomp_convergence_test(
     runner.assert_all_convergence();
 }
 
-void validate_ball_and_stick(arb::backend_kind backend) {
+void validate_ball_and_stick(context& ctx) {
     using namespace arb;
 
     mc_cell c = make_cell_ball_and_stick();
@@ -101,12 +99,12 @@ void validate_ball_and_stick(arb::backend_kind backend) {
     run_ncomp_convergence_test(
         "ball_and_stick",
         "neuron_ball_and_stick.json",
-        backend,
+        ctx,
         c,
         points);
 }
 
-void validate_ball_and_taper(arb::backend_kind backend) {
+void validate_ball_and_taper(context& ctx) {
     using namespace arb;
 
     mc_cell c = make_cell_ball_and_taper();
@@ -119,12 +117,12 @@ void validate_ball_and_taper(arb::backend_kind backend) {
     run_ncomp_convergence_test(
         "ball_and_taper",
         "neuron_ball_and_taper.json",
-        backend,
+        ctx,
         c,
         points);
 }
 
-void validate_ball_and_3stick(arb::backend_kind backend) {
+void validate_ball_and_3stick(context& ctx) {
     using namespace arb;
 
     mc_cell c = make_cell_ball_and_3stick();
@@ -141,12 +139,12 @@ void validate_ball_and_3stick(arb::backend_kind backend) {
     run_ncomp_convergence_test(
         "ball_and_3stick",
         "neuron_ball_and_3stick.json",
-        backend,
+        ctx,
         c,
         points);
 }
 
-void validate_rallpack1(arb::backend_kind backend) {
+void validate_rallpack1(context& ctx) {
     using namespace arb;
 
     mc_cell c = make_cell_simple_cable();
@@ -159,13 +157,13 @@ void validate_rallpack1(arb::backend_kind backend) {
     run_ncomp_convergence_test(
         "rallpack1",
         "numeric_rallpack1.json",
-        backend,
+        ctx,
         c,
         points,
         250.f);
 }
 
-void validate_ball_and_squiggle(arb::backend_kind backend) {
+void validate_ball_and_squiggle(context& ctx) {
     using namespace arb;
 
     mc_cell c = make_cell_ball_and_squiggle();
@@ -189,47 +187,72 @@ void validate_ball_and_squiggle(arb::backend_kind backend) {
     run_ncomp_convergence_test(
         "ball_and_squiggle_integrator",
         "neuron_ball_and_squiggle.json",
-        backend,
+        ctx,
         c,
         points);
 }
 
 TEST(ball_and_stick, neuron_ref) {
-    execution_context ctx;
-    validate_ball_and_stick(backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
-        validate_ball_and_stick(backend_kind::gpu);
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        validate_ball_and_stick(ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
+        validate_ball_and_stick(ctx);
     }
 }
 
 TEST(ball_and_taper, neuron_ref) {
-    execution_context ctx;
-    validate_ball_and_taper(backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
-        validate_ball_and_taper(backend_kind::gpu);
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        validate_ball_and_taper(ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
+        validate_ball_and_taper(ctx);
     }
 }
 
 TEST(ball_and_3stick, neuron_ref) {
-    execution_context ctx;
-    validate_ball_and_3stick(backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
-        validate_ball_and_3stick(backend_kind::gpu);
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        validate_ball_and_3stick(ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
+        validate_ball_and_3stick(ctx);
     }
 }
 
 TEST(rallpack1, numeric_ref) {
-    execution_context ctx;
-    validate_rallpack1(backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
-        validate_rallpack1(backend_kind::gpu);
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        validate_rallpack1(ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
+        validate_rallpack1(ctx);
     }
 }
 
 TEST(ball_and_squiggle, neuron_ref) {
-    execution_context ctx;
-    validate_ball_and_squiggle(backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
-        validate_ball_and_squiggle(backend_kind::gpu);
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        validate_ball_and_squiggle(ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
+        validate_ball_and_squiggle(ctx);
     }
 }
diff --git a/test/validation/validate_kinetic.cpp b/test/validation/validate_kinetic.cpp
index b8bfb31094be5448be40f2bda5220661a967d907..9bf35611a6159e8d1ac9c8fdc53ffe2dcb622816 100644
--- a/test/validation/validate_kinetic.cpp
+++ b/test/validation/validate_kinetic.cpp
@@ -4,6 +4,7 @@
 
 #include <nlohmann/json.hpp>
 
+#include <arbor/context.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
@@ -21,7 +22,7 @@
 #include "validation_data.hpp"
 
 void run_kinetic_dt(
-    arb::backend_kind backend,
+    const arb::context& context,
     arb::mc_cell& c,
     arb::cell_probe_address probe,
     float t_end,
@@ -38,16 +39,12 @@ void run_kinetic_dt(
     probe_label plabels[1] = {{"soma.mid", {0u, 0u}}};
 
     meta["sim"] = "arbor";
-    meta["backend_kind"] = to_string(backend);
+    meta["backend_kind"] = arb::has_gpu(context)? "gpu": "multicore";
 
     convergence_test_runner<float> runner("dt", plabels, meta);
     runner.load_reference_data(ref_file);
 
-    execution_context context;
-    proc_allocation nd;
-    nd.num_gpus = (backend==backend_kind::gpu);
-
-    auto decomp = partition_load_balance(rec, nd, context);
+    auto decomp = partition_load_balance(rec, context);
     simulation sim(rec, decomp, context);
 
     auto exclude = stimulus_ends(c);
@@ -70,7 +67,7 @@ end:
     runner.assert_all_convergence();
 }
 
-void validate_kinetic_kin1(arb::backend_kind backend) {
+void validate_kinetic_kin1(const arb::context& ctx) {
     using namespace arb;
 
     // 20 Âµm diameter soma with single mechanism, current probe
@@ -85,10 +82,10 @@ void validate_kinetic_kin1(arb::backend_kind backend) {
         {"units", "nA"}
     };
 
-    run_kinetic_dt(backend, c, probe, 100.f, meta, "numeric_kin1.json");
+    run_kinetic_dt(ctx, c, probe, 100.f, meta, "numeric_kin1.json");
 }
 
-void validate_kinetic_kinlva(arb::backend_kind backend) {
+void validate_kinetic_kinlva(const arb::context& ctx) {
     using namespace arb;
 
     // 20 Âµm diameter soma with single mechanism, current probe
@@ -104,24 +101,34 @@ void validate_kinetic_kinlva(arb::backend_kind backend) {
         {"units", "mV"}
     };
 
-    run_kinetic_dt(backend, c, probe, 300.f, meta, "numeric_kinlva.json");
+    run_kinetic_dt(ctx, c, probe, 300.f, meta, "numeric_kinlva.json");
 }
 
 
 using namespace arb;
 
 TEST(kinetic, kin1_numeric_ref) {
-    execution_context ctx;
-    validate_kinetic_kin1(backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
-        validate_kinetic_kin1(arb::backend_kind::gpu);
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        validate_kinetic_kin1(ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
+        validate_kinetic_kin1(ctx);
     }
 }
 
 TEST(kinetic, kinlva_numeric_ref) {
-    execution_context ctx;
-    validate_kinetic_kinlva(backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
-        validate_kinetic_kinlva(arb::backend_kind::gpu);
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        validate_kinetic_kinlva(ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
+        validate_kinetic_kinlva(ctx);
     }
 }
diff --git a/test/validation/validate_soma.cpp b/test/validation/validate_soma.cpp
index f61e3ab1c75e317ef6b18c0299cd020b4c9708e1..5e5fdd6e91db9079c9753944c10f410e5110991b 100644
--- a/test/validation/validate_soma.cpp
+++ b/test/validation/validate_soma.cpp
@@ -1,6 +1,7 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
+#include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
@@ -20,7 +21,7 @@
 
 using namespace arb;
 
-void validate_soma(backend_kind backend) {
+void validate_soma(const context& context) {
     float sample_dt = g_trace_io.sample_dt();
 
     mc_cell c = make_cell_soma_only();
@@ -29,11 +30,7 @@ void validate_soma(backend_kind backend) {
     rec.add_probe(0, 0, cell_probe_address{{0, 0.5}, cell_probe_address::membrane_voltage});
     probe_label plabels[1] = {{"soma.mid", {0u, 0u}}};
 
-    execution_context context;
-    proc_allocation nd;
-    nd.num_gpus = (backend==backend_kind::gpu);
-
-    auto decomp = partition_load_balance(rec, nd, context);
+    auto decomp = partition_load_balance(rec, context);
     simulation sim(rec, decomp, context);
 
     nlohmann::json meta = {
@@ -41,7 +38,7 @@ void validate_soma(backend_kind backend) {
         {"model", "soma"},
         {"sim", "arbor"},
         {"units", "mV"},
-        {"backend_kind", to_string(backend)}
+        {"backend_kind", has_gpu(context)? "gpu": "multicore"}
     };
 
     convergence_test_runner<float> runner("dt", plabels, meta);
@@ -68,9 +65,14 @@ end:
 }
 
 TEST(soma, numeric_ref) {
-    execution_context ctx;
-    validate_soma(backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
-        validate_soma(backend_kind::gpu);
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        validate_soma(ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
+        validate_soma(ctx);
     }
 }
diff --git a/test/validation/validate_synapses.cpp b/test/validation/validate_synapses.cpp
index cd19a08dd3fd1ec4db6ecb851eb0cd46723b617b..92b43b9784740dd004bfd8916ca67568aa1a47bb 100644
--- a/test/validation/validate_synapses.cpp
+++ b/test/validation/validate_synapses.cpp
@@ -1,5 +1,6 @@
 #include <nlohmann/json.hpp>
 
+#include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
@@ -24,7 +25,7 @@ using namespace arb;
 void run_synapse_test(
     const char* syn_type,
     const aux::path& ref_data_path,
-    backend_kind backend,
+    const context& context,
     float t_end=70.f,
     float dt=0.001)
 {
@@ -34,7 +35,7 @@ void run_synapse_test(
         {"model", syn_type},
         {"sim", "arbor"},
         {"units", "mV"},
-        {"backend_kind", to_string(backend)}
+        {"backend_kind", arb::has_gpu(context)? "gpu": "multicore"}
     };
 
     mc_cell c = make_cell_ball_and_stick(false); // no stimuli
@@ -61,10 +62,6 @@ void run_synapse_test(
     convergence_test_runner<int> runner("ncomp", plabels, meta);
     runner.load_reference_data(ref_data_path);
 
-    execution_context context;
-    proc_allocation nd;
-    nd.num_gpus = (backend==backend_kind::gpu);
-
     for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) {
         c.cable(1)->set_compartments(ncomp);
 
@@ -76,7 +73,7 @@ void run_synapse_test(
         // dend.end
         rec.add_probe(0, 0, cell_probe_address{{1, 1.0}, cell_probe_address::membrane_voltage});
 
-        auto decomp = partition_load_balance(rec, nd, context);
+        auto decomp = partition_load_balance(rec, context);
         simulation sim(rec, decomp, context);
 
         sim.inject_events(synthetic_events);
@@ -88,21 +85,31 @@ void run_synapse_test(
 }
 
 TEST(simple_synapse, expsyn_neuron_ref) {
-    execution_context ctx;
-    SCOPED_TRACE("expsyn-multicore");
-    run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        SCOPED_TRACE("expsyn-multicore");
+        run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
         SCOPED_TRACE("expsyn-gpu");
-        run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend_kind::gpu);
+        run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", ctx);
     }
 }
 
 TEST(simple_synapse, exp2syn_neuron_ref) {
-    execution_context ctx;
-    SCOPED_TRACE("exp2syn-multicore");
-    run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend_kind::multicore);
-    if (local_allocation(ctx).num_gpus) {
+    proc_allocation resources;
+    {
+        auto ctx = make_context(resources);
+        SCOPED_TRACE("exp2syn-multicore");
+        run_synapse_test("exp2syn", "neuron_simple_exp_synapse.json", ctx);
+    }
+    if (resources.has_gpu()) {
+        resources.gpu_id = -1;
+        auto ctx = make_context(resources);
         SCOPED_TRACE("exp2syn-gpu");
-        run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend_kind::gpu);
+        run_synapse_test("exp2syn", "neuron_simple_exp_synapse.json", ctx);
     }
 }